summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/fid.c2
-rw-r--r--fs/9p/vfs_inode.c4
-rw-r--r--fs/9p/vfs_super.c4
-rw-r--r--fs/Kconfig18
-rw-r--r--fs/Makefile1
-rw-r--r--fs/affs/inode.c4
-rw-r--r--fs/affs/super.c4
-rw-r--r--fs/afs/proc.c4
-rw-r--r--fs/afs/server.c9
-rw-r--r--fs/anon_inodes.c4
-rw-r--r--fs/attr.c4
-rw-r--r--fs/autofs/inode.c4
-rw-r--r--fs/autofs4/dev-ioctl.c3
-rw-r--r--fs/autofs4/inode.c4
-rw-r--r--fs/autofs4/waitq.c4
-rw-r--r--fs/bfs/dir.c4
-rw-r--r--fs/binfmt_aout.c2
-rw-r--r--fs/binfmt_elf.c22
-rw-r--r--fs/binfmt_elf_fdpic.c19
-rw-r--r--fs/binfmt_flat.c2
-rw-r--r--fs/binfmt_som.c2
-rw-r--r--fs/block_dev.c14
-rw-r--r--fs/buffer.c122
-rw-r--r--fs/char_dev.c30
-rw-r--r--fs/cifs/CHANGES6
-rw-r--r--fs/cifs/cifs_fs_sb.h2
-rw-r--r--fs/cifs/cifs_spnego.c6
-rw-r--r--fs/cifs/cifsglob.h10
-rw-r--r--fs/cifs/cifsproto.h2
-rw-r--r--fs/cifs/cifssmb.c2
-rw-r--r--fs/cifs/connect.c8
-rw-r--r--fs/cifs/dir.c12
-rw-r--r--fs/cifs/file.c98
-rw-r--r--fs/cifs/inode.c8
-rw-r--r--fs/cifs/ioctl.c2
-rw-r--r--fs/cifs/misc.c7
-rw-r--r--fs/cifs/readdir.c5
-rw-r--r--fs/coda/cache.c6
-rw-r--r--fs/coda/file.c3
-rw-r--r--fs/coda/upcall.c2
-rw-r--r--fs/compat.c42
-rw-r--r--fs/dcache.c148
-rw-r--r--fs/devpts/inode.c4
-rw-r--r--fs/dlm/dir.c18
-rw-r--r--fs/dlm/lowcomms.c8
-rw-r--r--fs/dlm/memory.c6
-rw-r--r--fs/dlm/midcomms.c2
-rw-r--r--fs/dlm/netlink.c2
-rw-r--r--fs/dquot.c440
-rw-r--r--fs/drop_caches.c2
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h3
-rw-r--r--fs/ecryptfs/keystore.c31
-rw-r--r--fs/ecryptfs/kthread.c9
-rw-r--r--fs/ecryptfs/main.c3
-rw-r--r--fs/ecryptfs/messaging.c27
-rw-r--r--fs/ecryptfs/miscdev.c27
-rw-r--r--fs/exec.c183
-rw-r--r--fs/exportfs/expfs.c4
-rw-r--r--fs/ext2/balloc.c2
-rw-r--r--fs/ext2/ialloc.c4
-rw-r--r--fs/ext2/super.c9
-rw-r--r--fs/ext3/balloc.c2
-rw-r--r--fs/ext3/hash.c77
-rw-r--r--fs/ext3/ialloc.c4
-rw-r--r--fs/ext3/namei.c7
-rw-r--r--fs/ext3/super.c36
-rw-r--r--fs/ext4/balloc.c4
-rw-r--r--fs/ext4/ext4.h3
-rw-r--r--fs/ext4/ext4_sb.h1
-rw-r--r--fs/ext4/extents.c11
-rw-r--r--fs/ext4/hash.c77
-rw-r--r--fs/ext4/ialloc.c4
-rw-r--r--fs/ext4/inode.c76
-rw-r--r--fs/ext4/namei.c7
-rw-r--r--fs/ext4/super.c48
-rw-r--r--fs/fat/file.c2
-rw-r--r--fs/fat/inode.c4
-rw-r--r--fs/fcntl.c18
-rw-r--r--fs/file_table.c10
-rw-r--r--fs/fuse/dev.c4
-rw-r--r--fs/fuse/dir.c23
-rw-r--r--fs/gfs2/Makefile2
-rw-r--r--fs/gfs2/acl.c2
-rw-r--r--fs/gfs2/bmap.c30
-rw-r--r--fs/gfs2/daemon.c136
-rw-r--r--fs/gfs2/daemon.h17
-rw-r--r--fs/gfs2/dir.c62
-rw-r--r--fs/gfs2/dir.h1
-rw-r--r--fs/gfs2/eattr.c40
-rw-r--r--fs/gfs2/glock.c306
-rw-r--r--fs/gfs2/glock.h4
-rw-r--r--fs/gfs2/glops.c56
-rw-r--r--fs/gfs2/incore.h55
-rw-r--r--fs/gfs2/inode.c63
-rw-r--r--fs/gfs2/inode.h13
-rw-r--r--fs/gfs2/locking/dlm/mount.c12
-rw-r--r--fs/gfs2/locking/dlm/sysfs.c16
-rw-r--r--fs/gfs2/main.c15
-rw-r--r--fs/gfs2/mount.c29
-rw-r--r--fs/gfs2/ops_address.c26
-rw-r--r--fs/gfs2/ops_dentry.c2
-rw-r--r--fs/gfs2/ops_dentry.h17
-rw-r--r--fs/gfs2/ops_export.c5
-rw-r--r--fs/gfs2/ops_file.c20
-rw-r--r--fs/gfs2/ops_fstype.c203
-rw-r--r--fs/gfs2/ops_fstype.h19
-rw-r--r--fs/gfs2/ops_inode.c75
-rw-r--r--fs/gfs2/ops_inode.h25
-rw-r--r--fs/gfs2/ops_super.c216
-rw-r--r--fs/gfs2/ops_super.h17
-rw-r--r--fs/gfs2/quota.c113
-rw-r--r--fs/gfs2/quota.h24
-rw-r--r--fs/gfs2/recovery.c48
-rw-r--r--fs/gfs2/recovery.h14
-rw-r--r--fs/gfs2/rgrp.c58
-rw-r--r--fs/gfs2/super.c280
-rw-r--r--fs/gfs2/super.h16
-rw-r--r--fs/gfs2/sys.c66
-rw-r--r--fs/gfs2/sys.h4
-rw-r--r--fs/gfs2/util.c1
-rw-r--r--fs/gfs2/util.h1
-rw-r--r--fs/hfs/inode.c4
-rw-r--r--fs/hfs/super.c4
-rw-r--r--fs/hfsplus/inode.c4
-rw-r--r--fs/hfsplus/options.c4
-rw-r--r--fs/hostfs/hostfs.h2
-rw-r--r--fs/hostfs/hostfs_kern.c4
-rw-r--r--fs/hostfs/hostfs_user.c2
-rw-r--r--fs/hpfs/namei.c24
-rw-r--r--fs/hpfs/super.c4
-rw-r--r--fs/hppfs/hppfs.c6
-rw-r--r--fs/hugetlbfs/inode.c21
-rw-r--r--fs/inode.c332
-rw-r--r--fs/inotify_user.c2
-rw-r--r--fs/internal.h6
-rw-r--r--fs/ioctl.c44
-rw-r--r--fs/ioprio.c18
-rw-r--r--fs/jfs/jfs_inode.c4
-rw-r--r--fs/lockd/host.c11
-rw-r--r--fs/lockd/mon.c2
-rw-r--r--fs/lockd/svc.c9
-rw-r--r--fs/locks.c2
-rw-r--r--fs/minix/bitmap.c4
-rw-r--r--fs/namei.c12
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/ncpfs/ioctl.c91
-rw-r--r--fs/nfs/nfsroot.c6
-rw-r--r--fs/nfs/super.c6
-rw-r--r--fs/nfsctl.c5
-rw-r--r--fs/nfsd/auth.c95
-rw-r--r--fs/nfsd/nfs4recover.c75
-rw-r--r--fs/nfsd/nfs4state.c12
-rw-r--r--fs/nfsd/nfsctl.c2
-rw-r--r--fs/nfsd/nfsfh.c41
-rw-r--r--fs/nfsd/vfs.c9
-rw-r--r--fs/ocfs2/Makefile6
-rw-r--r--fs/ocfs2/acl.c479
-rw-r--r--fs/ocfs2/acl.h58
-rw-r--r--fs/ocfs2/alloc.c399
-rw-r--r--fs/ocfs2/alloc.h21
-rw-r--r--fs/ocfs2/aops.c51
-rw-r--r--fs/ocfs2/buffer_head_io.c47
-rw-r--r--fs/ocfs2/buffer_head_io.h27
-rw-r--r--fs/ocfs2/cluster/masklog.h1
-rw-r--r--fs/ocfs2/cluster/netdebug.c8
-rw-r--r--fs/ocfs2/cluster/nodemanager.c2
-rw-r--r--fs/ocfs2/cluster/tcp.c29
-rw-r--r--fs/ocfs2/dir.c147
-rw-r--r--fs/ocfs2/dlm/dlmfs.c12
-rw-r--r--fs/ocfs2/dlm/userdlm.h2
-rw-r--r--fs/ocfs2/dlmglue.c161
-rw-r--r--fs/ocfs2/dlmglue.h19
-rw-r--r--fs/ocfs2/extent_map.c96
-rw-r--r--fs/ocfs2/extent_map.h24
-rw-r--r--fs/ocfs2/file.c193
-rw-r--r--fs/ocfs2/file.h3
-rw-r--r--fs/ocfs2/inode.c144
-rw-r--r--fs/ocfs2/inode.h18
-rw-r--r--fs/ocfs2/journal.c176
-rw-r--r--fs/ocfs2/journal.h96
-rw-r--r--fs/ocfs2/localalloc.c8
-rw-r--r--fs/ocfs2/namei.c280
-rw-r--r--fs/ocfs2/ocfs2.h33
-rw-r--r--fs/ocfs2/ocfs2_fs.h126
-rw-r--r--fs/ocfs2/ocfs2_jbd_compat.h82
-rw-r--r--fs/ocfs2/ocfs2_lockid.h5
-rw-r--r--fs/ocfs2/quota.h117
-rw-r--r--fs/ocfs2/quota_global.c990
-rw-r--r--fs/ocfs2/quota_local.c1253
-rw-r--r--fs/ocfs2/resize.c60
-rw-r--r--fs/ocfs2/slot_map.c4
-rw-r--r--fs/ocfs2/stack_user.c3
-rw-r--r--fs/ocfs2/suballoc.c278
-rw-r--r--fs/ocfs2/suballoc.h18
-rw-r--r--fs/ocfs2/super.c310
-rw-r--r--fs/ocfs2/symlink.c2
-rw-r--r--fs/ocfs2/xattr.c2267
-rw-r--r--fs/ocfs2/xattr.h31
-rw-r--r--fs/omfs/inode.c8
-rw-r--r--fs/open.c59
-rw-r--r--fs/partitions/check.c11
-rw-r--r--fs/pipe.c4
-rw-r--r--fs/posix_acl.c4
-rw-r--r--fs/proc/array.c32
-rw-r--r--fs/proc/base.c32
-rw-r--r--fs/proc/inode.c8
-rw-r--r--fs/quota.c15
-rw-r--r--fs/quota_tree.c645
-rw-r--r--fs/quota_tree.h25
-rw-r--r--fs/quota_v1.c28
-rw-r--r--fs/quota_v2.c631
-rw-r--r--fs/quotaio_v1.h33
-rw-r--r--fs/quotaio_v2.h60
-rw-r--r--fs/ramfs/inode.c4
-rw-r--r--fs/reiserfs/namei.c4
-rw-r--r--fs/reiserfs/super.c18
-rw-r--r--fs/seq_file.c14
-rw-r--r--fs/smbfs/dir.c3
-rw-r--r--fs/smbfs/inode.c2
-rw-r--r--fs/smbfs/proc.c2
-rw-r--r--fs/sysv/ialloc.c4
-rw-r--r--fs/ubifs/budget.c2
-rw-r--r--fs/ubifs/commit.c4
-rw-r--r--fs/ubifs/debug.c66
-rw-r--r--fs/ubifs/dir.c9
-rw-r--r--fs/ubifs/file.c91
-rw-r--r--fs/ubifs/journal.c8
-rw-r--r--fs/ubifs/key.h4
-rw-r--r--fs/ubifs/lpt_commit.c2
-rw-r--r--fs/ubifs/orphan.c28
-rw-r--r--fs/ubifs/recovery.c17
-rw-r--r--fs/ubifs/replay.c2
-rw-r--r--fs/ubifs/sb.c9
-rw-r--r--fs/ubifs/super.c70
-rw-r--r--fs/ubifs/tnc.c12
-rw-r--r--fs/ubifs/ubifs.h12
-rw-r--r--fs/udf/balloc.c91
-rw-r--r--fs/udf/dir.c14
-rw-r--r--fs/udf/directory.c38
-rw-r--r--fs/udf/ecma_167.h416
-rw-r--r--fs/udf/ialloc.c8
-rw-r--r--fs/udf/inode.c214
-rw-r--r--fs/udf/misc.c29
-rw-r--r--fs/udf/namei.c88
-rw-r--r--fs/udf/osta_udf.h22
-rw-r--r--fs/udf/partition.c2
-rw-r--r--fs/udf/super.c139
-rw-r--r--fs/udf/truncate.c44
-rw-r--r--fs/udf/udf_i.h6
-rw-r--r--fs/udf/udf_sb.h2
-rw-r--r--fs/udf/udfdecl.h46
-rw-r--r--fs/udf/udfend.h28
-rw-r--r--fs/udf/udftime.c6
-rw-r--r--fs/udf/unicode.c41
-rw-r--r--fs/ufs/ialloc.c4
-rw-r--r--fs/xfs/Makefile4
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_cred.h6
-rw-r--r--fs/xfs/linux-2.6/xfs_globals.c8
-rw-r--r--fs/xfs/linux-2.6/xfs_globals.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c12
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c24
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_stats.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_stats.h65
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c840
-rw-r--r--fs/xfs/linux-2.6/xfs_super.h3
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c764
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h55
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.c11
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.h3
-rw-r--r--fs/xfs/linux-2.6/xfs_vfs.h31
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.c15
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h5
-rw-r--r--fs/xfs/quota/xfs_dquot.c39
-rw-r--r--fs/xfs/quota/xfs_dquot.h4
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c45
-rw-r--r--fs/xfs/quota/xfs_qm.c13
-rw-r--r--fs/xfs/quota/xfs_qm.h1
-rw-r--r--fs/xfs/quota/xfs_qm_bhv.c1
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c151
-rw-r--r--fs/xfs/support/debug.c2
-rw-r--r--fs/xfs/xfs.h2
-rw-r--r--fs/xfs/xfs_acl.c8
-rw-r--r--fs/xfs/xfs_ag.h10
-rw-r--r--fs/xfs/xfs_alloc.c195
-rw-r--r--fs/xfs/xfs_alloc.h27
-rw-r--r--fs/xfs/xfs_alloc_btree.c2387
-rw-r--r--fs/xfs/xfs_alloc_btree.h107
-rw-r--r--fs/xfs/xfs_arch.h39
-rw-r--r--fs/xfs/xfs_bit.h3
-rw-r--r--fs/xfs/xfs_bmap.c306
-rw-r--r--fs/xfs/xfs_bmap.h61
-rw-r--r--fs/xfs/xfs_bmap_btree.c2614
-rw-r--r--fs/xfs/xfs_bmap_btree.h171
-rw-r--r--fs/xfs/xfs_btree.c3596
-rw-r--r--fs/xfs/xfs_btree.h392
-rw-r--r--fs/xfs/xfs_btree_trace.c249
-rw-r--r--fs/xfs/xfs_btree_trace.h116
-rw-r--r--fs/xfs/xfs_buf_item.c25
-rw-r--r--fs/xfs/xfs_clnt.h105
-rw-r--r--fs/xfs/xfs_da_btree.h24
-rw-r--r--fs/xfs/xfs_dinode.h5
-rw-r--r--fs/xfs/xfs_dmops.c5
-rw-r--r--fs/xfs/xfs_extfree_item.c45
-rw-r--r--fs/xfs/xfs_fsops.c24
-rw-r--r--fs/xfs/xfs_ialloc.c132
-rw-r--r--fs/xfs/xfs_ialloc.h21
-rw-r--r--fs/xfs/xfs_ialloc_btree.c2193
-rw-r--r--fs/xfs/xfs_ialloc_btree.h111
-rw-r--r--fs/xfs/xfs_iget.c501
-rw-r--r--fs/xfs/xfs_imap.h2
-rw-r--r--fs/xfs/xfs_inode.c300
-rw-r--r--fs/xfs/xfs_inode.h290
-rw-r--r--fs/xfs/xfs_inode_item.c30
-rw-r--r--fs/xfs/xfs_inode_item.h41
-rw-r--r--fs/xfs/xfs_itable.c33
-rw-r--r--fs/xfs/xfs_log.c49
-rw-r--r--fs/xfs/xfs_log.h4
-rw-r--r--fs/xfs/xfs_log_priv.h2
-rw-r--r--fs/xfs/xfs_log_recover.c97
-rw-r--r--fs/xfs/xfs_mount.c42
-rw-r--r--fs/xfs/xfs_mount.h54
-rw-r--r--fs/xfs/xfs_qmops.c5
-rw-r--r--fs/xfs/xfs_trans.c22
-rw-r--r--fs/xfs/xfs_trans.h322
-rw-r--r--fs/xfs/xfs_trans_ail.c362
-rw-r--r--fs/xfs/xfs_trans_buf.c7
-rw-r--r--fs/xfs/xfs_trans_item.c10
-rw-r--r--fs/xfs/xfs_trans_priv.h98
-rw-r--r--fs/xfs/xfs_utils.c6
-rw-r--r--fs/xfs/xfs_vfsops.c617
-rw-r--r--fs/xfs/xfs_vfsops.h2
-rw-r--r--fs/xfs/xfs_vnodeops.c147
-rw-r--r--fs/xfs/xfs_vnodeops.h12
335 files changed, 19091 insertions, 15193 deletions
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 3031e3233dd6..a43e4ab7c6c3 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -120,7 +120,7 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
switch (access) {
case V9FS_ACCESS_SINGLE:
case V9FS_ACCESS_USER:
- uid = current->fsuid;
+ uid = current_fsuid();
any = 0;
break;
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 8314d3f43b71..8fddfe86a0dc 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -215,8 +215,8 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
inode = new_inode(sb);
if (inode) {
inode->i_mode = mode;
- inode->i_uid = current->fsuid;
- inode->i_gid = current->fsgid;
+ inode->i_uid = current_fsuid();
+ inode->i_gid = current_fsgid();
inode->i_blocks = 0;
inode->i_rdev = 0;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index d6cb1a0ca724..93212e40221a 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -113,8 +113,8 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
struct v9fs_session_info *v9ses = NULL;
struct p9_wstat *st = NULL;
int mode = S_IRWXUGO | S_ISVTX;
- uid_t uid = current->fsuid;
- gid_t gid = current->fsgid;
+ uid_t uid = current_fsuid();
+ gid_t gid = current_fsgid();
struct p9_fid *fid;
int retval = 0;
diff --git a/fs/Kconfig b/fs/Kconfig
index 522469a7eca3..107f1cda2adc 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -189,6 +189,8 @@ config OCFS2_FS
select CONFIGFS_FS
select JBD2
select CRC32
+ select QUOTA
+ select QUOTA_TREE
help
OCFS2 is a general purpose extent based shared disk cluster file
system with many similarities to ext3. It supports 64 bit inode
@@ -258,15 +260,14 @@ config OCFS2_DEBUG_FS
this option for debugging only as it is likely to decrease
performance of the filesystem.
-config OCFS2_COMPAT_JBD
- bool "Use JBD for compatibility"
+config OCFS2_FS_POSIX_ACL
+ bool "OCFS2 POSIX Access Control Lists"
depends on OCFS2_FS
+ select FS_POSIX_ACL
default n
- select JBD
help
- The ocfs2 filesystem now uses JBD2 for its journalling. JBD2
- is backwards compatible with JBD. It is safe to say N here.
- However, if you really want to use the original JBD, say Y here.
+ Posix Access Control Lists (ACLs) support permissions for users and
+ groups beyond the owner/group/world scheme.
endif # BLOCK
@@ -340,6 +341,10 @@ config PRINT_QUOTA_WARNING
Note that this behavior is currently deprecated and may go away in
future. Please use notification via netlink socket instead.
+# Generic support for tree structured quota files. Seleted when needed.
+config QUOTA_TREE
+ tristate
+
config QFMT_V1
tristate "Old quota format support"
depends on QUOTA
@@ -351,6 +356,7 @@ config QFMT_V1
config QFMT_V2
tristate "Quota format v2 support"
depends on QUOTA
+ select QUOTA_TREE
help
This quota format allows using quotas with 32-bit UIDs/GIDs. If you
need this functionality say Y here.
diff --git a/fs/Makefile b/fs/Makefile
index d9f8afe6f0c4..cdf655640594 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -55,6 +55,7 @@ obj-$(CONFIG_GENERIC_ACL) += generic_acl.o
obj-$(CONFIG_QUOTA) += dquot.o
obj-$(CONFIG_QFMT_V1) += quota_v1.o
obj-$(CONFIG_QFMT_V2) += quota_v2.o
+obj-$(CONFIG_QUOTA_TREE) += quota_tree.o
obj-$(CONFIG_QUOTACTL) += quota.o
obj-$(CONFIG_DNOTIFY) += dnotify.o
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index a13b334a3910..415d9c67ac16 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -293,8 +293,8 @@ affs_new_inode(struct inode *dir)
mark_buffer_dirty_inode(bh, inode);
affs_brelse(bh);
- inode->i_uid = current->fsuid;
- inode->i_gid = current->fsgid;
+ inode->i_uid = current_fsuid();
+ inode->i_gid = current_fsgid();
inode->i_ino = block;
inode->i_nlink = 1;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 8989c93193ed..a19d64b582aa 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -163,8 +163,8 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s
/* Fill in defaults */
- *uid = current->uid;
- *gid = current->gid;
+ *uid = current_uid();
+ *gid = current_gid();
*reserved = 2;
*root = -1;
*blocksize = -1;
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 9f7d1ae70269..7578c1ab9e0b 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -646,7 +646,7 @@ static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v)
}
/* display one cell per line on subsequent lines */
- seq_printf(m, "%u.%u.%u.%u\n", NIPQUAD(addr->s_addr));
+ seq_printf(m, "%pI4\n", &addr->s_addr);
return 0;
}
@@ -737,7 +737,7 @@ static int afs_proc_cell_servers_show(struct seq_file *m, void *v)
}
/* display one cell per line on subsequent lines */
- sprintf(ipaddr, "%u.%u.%u.%u", NIPQUAD(server->addr));
+ sprintf(ipaddr, "%pI4", &server->addr);
seq_printf(m, "%3d %-15.15s %5d\n",
atomic_read(&server->usage), ipaddr, server->fs_state);
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 28f2451419e1..f49099516675 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -105,7 +105,7 @@ struct afs_server *afs_lookup_server(struct afs_cell *cell,
{
struct afs_server *server, *candidate;
- _enter("%p,"NIPQUAD_FMT, cell, NIPQUAD(addr->s_addr));
+ _enter("%p,%pI4", cell, &addr->s_addr);
/* quick scan of the list to see if we already have the server */
read_lock(&cell->servers_lock);
@@ -168,9 +168,8 @@ found_server:
server_in_two_cells:
write_unlock(&cell->servers_lock);
kfree(candidate);
- printk(KERN_NOTICE "kAFS:"
- " Server "NIPQUAD_FMT" appears to be in two cells\n",
- NIPQUAD(*addr));
+ printk(KERN_NOTICE "kAFS: Server %pI4 appears to be in two cells\n",
+ addr);
_leave(" = -EEXIST");
return ERR_PTR(-EEXIST);
}
@@ -184,7 +183,7 @@ struct afs_server *afs_find_server(const struct in_addr *_addr)
struct rb_node *p;
struct in_addr addr = *_addr;
- _enter(NIPQUAD_FMT, NIPQUAD(addr.s_addr));
+ _enter("%pI4", &addr.s_addr);
read_lock(&afs_servers_lock);
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 3662dd44896b..c16d9be1b017 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -154,8 +154,8 @@ static struct inode *anon_inode_mkinode(void)
*/
inode->i_state = I_DIRTY;
inode->i_mode = S_IRUSR | S_IWUSR;
- inode->i_uid = current->fsuid;
- inode->i_gid = current->fsgid;
+ inode->i_uid = current_fsuid();
+ inode->i_gid = current_fsgid();
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
return inode;
}
diff --git a/fs/attr.c b/fs/attr.c
index 7a83819f6ba2..f4360192a938 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -29,13 +29,13 @@ int inode_change_ok(struct inode *inode, struct iattr *attr)
/* Make sure a caller can chown. */
if ((ia_valid & ATTR_UID) &&
- (current->fsuid != inode->i_uid ||
+ (current_fsuid() != inode->i_uid ||
attr->ia_uid != inode->i_uid) && !capable(CAP_CHOWN))
goto error;
/* Make sure caller can chgrp. */
if ((ia_valid & ATTR_GID) &&
- (current->fsuid != inode->i_uid ||
+ (current_fsuid() != inode->i_uid ||
(!in_group_p(attr->ia_gid) && attr->ia_gid != inode->i_gid)) &&
!capable(CAP_CHOWN))
goto error;
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index b70eea1e8c59..c773680d5c60 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -76,8 +76,8 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
substring_t args[MAX_OPT_ARGS];
int option;
- *uid = current->uid;
- *gid = current->gid;
+ *uid = current_uid();
+ *gid = current_gid();
*pgrp = task_pgrp_nr(current);
*minproto = *maxproto = AUTOFS_PROTO_VERSION;
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 33bf8cbfd051..63b7c7afe8df 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -308,7 +308,8 @@ static int autofs_dev_ioctl_open_mountpoint(const char *path, dev_t devid)
goto out;
}
- filp = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY);
+ filp = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY,
+ current_cred());
if (IS_ERR(filp)) {
err = PTR_ERR(filp);
goto out;
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index c7e65bb30ba0..7b19802cfef4 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -235,8 +235,8 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
substring_t args[MAX_OPT_ARGS];
int option;
- *uid = current->uid;
- *gid = current->gid;
+ *uid = current_uid();
+ *gid = current_gid();
*pgrp = task_pgrp_nr(current);
*minproto = AUTOFS_MIN_PROTO_VERSION;
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 4b67c2a2d77c..e02cc8ae5eb3 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -391,8 +391,8 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
memcpy(&wq->name, &qstr, sizeof(struct qstr));
wq->dev = autofs4_get_dev(sbi);
wq->ino = autofs4_get_ino(sbi);
- wq->uid = current->uid;
- wq->gid = current->gid;
+ wq->uid = current_uid();
+ wq->gid = current_gid();
wq->pid = current->pid;
wq->tgid = current->tgid;
wq->status = -EINTR; /* Status return if interrupted */
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index daae463068e4..4dd1b623f937 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -106,8 +106,8 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, int mode,
}
set_bit(ino, info->si_imap);
info->si_freei--;
- inode->i_uid = current->fsuid;
- inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current->fsgid;
+ inode->i_uid = current_fsuid();
+ inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current_fsgid();
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
inode->i_blocks = 0;
inode->i_op = &bfs_file_inops;
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 204cfd1d7676..f1f3f4192a60 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -320,7 +320,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
current->mm->free_area_cache = current->mm->mmap_base;
current->mm->cached_hole_size = 0;
- compute_creds(bprm);
+ install_exec_creds(bprm);
current->flags &= ~PF_FORKNOEXEC;
#ifdef __sparc__
if (N_MAGIC(ex) == NMAGIC) {
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 8fcfa398d350..c41fa2af7677 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -157,7 +157,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
int items;
elf_addr_t *elf_info;
int ei_index = 0;
- struct task_struct *tsk = current;
+ const struct cred *cred = current_cred();
struct vm_area_struct *vma;
/*
@@ -223,10 +223,10 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
NEW_AUX_ENT(AT_BASE, interp_load_addr);
NEW_AUX_ENT(AT_FLAGS, 0);
NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
- NEW_AUX_ENT(AT_UID, tsk->uid);
- NEW_AUX_ENT(AT_EUID, tsk->euid);
- NEW_AUX_ENT(AT_GID, tsk->gid);
- NEW_AUX_ENT(AT_EGID, tsk->egid);
+ NEW_AUX_ENT(AT_UID, cred->uid);
+ NEW_AUX_ENT(AT_EUID, cred->euid);
+ NEW_AUX_ENT(AT_GID, cred->gid);
+ NEW_AUX_ENT(AT_EGID, cred->egid);
NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
NEW_AUX_ENT(AT_EXECFN, bprm->exec);
if (k_platform) {
@@ -949,14 +949,14 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
set_binfmt(&elf_format);
#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
- retval = arch_setup_additional_pages(bprm, executable_stack);
+ retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
if (retval < 0) {
send_sig(SIGKILL, current, 0);
goto out;
}
#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
- compute_creds(bprm);
+ install_exec_creds(bprm);
current->flags &= ~PF_FORKNOEXEC;
retval = create_elf_tables(bprm, &loc->elf_ex,
load_addr, interp_load_addr);
@@ -1361,6 +1361,7 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
struct mm_struct *mm)
{
+ const struct cred *cred;
unsigned int i, len;
/* first copy the parameters from user space */
@@ -1388,8 +1389,11 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
psinfo->pr_zomb = psinfo->pr_sname == 'Z';
psinfo->pr_nice = task_nice(p);
psinfo->pr_flag = p->flags;
- SET_UID(psinfo->pr_uid, p->uid);
- SET_GID(psinfo->pr_gid, p->gid);
+ rcu_read_lock();
+ cred = __task_cred(p);
+ SET_UID(psinfo->pr_uid, cred->uid);
+ SET_GID(psinfo->pr_gid, cred->gid);
+ rcu_read_unlock();
strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
return 0;
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 5b5424cb3391..aa5b43205e37 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -404,7 +404,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
current->mm->start_stack = current->mm->start_brk + stack_size;
#endif
- compute_creds(bprm);
+ install_exec_creds(bprm);
current->flags &= ~PF_FORKNOEXEC;
if (create_elf_fdpic_tables(bprm, current->mm,
&exec_params, &interp_params) < 0)
@@ -475,6 +475,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
struct elf_fdpic_params *exec_params,
struct elf_fdpic_params *interp_params)
{
+ const struct cred *cred = current_cred();
unsigned long sp, csp, nitems;
elf_caddr_t __user *argv, *envp;
size_t platform_len = 0, len;
@@ -623,10 +624,10 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
NEW_AUX_ENT(AT_BASE, interp_params->elfhdr_addr);
NEW_AUX_ENT(AT_FLAGS, 0);
NEW_AUX_ENT(AT_ENTRY, exec_params->entry_addr);
- NEW_AUX_ENT(AT_UID, (elf_addr_t) current->uid);
- NEW_AUX_ENT(AT_EUID, (elf_addr_t) current->euid);
- NEW_AUX_ENT(AT_GID, (elf_addr_t) current->gid);
- NEW_AUX_ENT(AT_EGID, (elf_addr_t) current->egid);
+ NEW_AUX_ENT(AT_UID, (elf_addr_t) cred->uid);
+ NEW_AUX_ENT(AT_EUID, (elf_addr_t) cred->euid);
+ NEW_AUX_ENT(AT_GID, (elf_addr_t) cred->gid);
+ NEW_AUX_ENT(AT_EGID, (elf_addr_t) cred->egid);
NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
NEW_AUX_ENT(AT_EXECFN, bprm->exec);
@@ -1413,6 +1414,7 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
struct mm_struct *mm)
{
+ const struct cred *cred;
unsigned int i, len;
/* first copy the parameters from user space */
@@ -1440,8 +1442,11 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
psinfo->pr_zomb = psinfo->pr_sname == 'Z';
psinfo->pr_nice = task_nice(p);
psinfo->pr_flag = p->flags;
- SET_UID(psinfo->pr_uid, p->uid);
- SET_GID(psinfo->pr_gid, p->gid);
+ rcu_read_lock();
+ cred = __task_cred(p);
+ SET_UID(psinfo->pr_uid, cred->uid);
+ SET_GID(psinfo->pr_gid, cred->gid);
+ rcu_read_unlock();
strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
return 0;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index ccb781a6a804..7bbd5c6b3725 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -880,7 +880,7 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
(libinfo.lib_list[j].loaded)?
libinfo.lib_list[j].start_data:UNLOADED_LIB;
- compute_creds(bprm);
+ install_exec_creds(bprm);
current->flags &= ~PF_FORKNOEXEC;
set_binfmt(&flat_format);
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index 74e587a52796..08644a61616e 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -255,7 +255,7 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs)
kfree(hpuxhdr);
set_binfmt(&som_format);
- compute_creds(bprm);
+ install_exec_creds(bprm);
setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT);
create_som_tables(bprm);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index db831efbdbbd..30b88b96bac4 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1004,6 +1004,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
}
lock_kernel();
+ restart:
ret = -ENXIO;
disk = get_gendisk(bdev->bd_dev, &partno);
@@ -1024,6 +1025,19 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
if (disk->fops->open) {
ret = disk->fops->open(bdev, mode);
+ if (ret == -ERESTARTSYS) {
+ /* Lost a race with 'disk' being
+ * deleted, try again.
+ * See md.c
+ */
+ disk_put_part(bdev->bd_part);
+ bdev->bd_part = NULL;
+ module_put(disk->fops->owner);
+ put_disk(disk);
+ bdev->bd_disk = NULL;
+ mutex_unlock(&bdev->bd_mutex);
+ goto restart;
+ }
if (ret)
goto out_clear;
}
diff --git a/fs/buffer.c b/fs/buffer.c
index 6569fda5cfed..fb0659217c67 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -99,10 +99,18 @@ __clear_page_buffers(struct page *page)
page_cache_release(page);
}
+
+static int quiet_error(struct buffer_head *bh)
+{
+ if (!test_bit(BH_Quiet, &bh->b_state) && printk_ratelimit())
+ return 0;
+ return 1;
+}
+
+
static void buffer_io_error(struct buffer_head *bh)
{
char b[BDEVNAME_SIZE];
-
printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n",
bdevname(bh->b_bdev, b),
(unsigned long long)bh->b_blocknr);
@@ -144,7 +152,7 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
if (uptodate) {
set_buffer_uptodate(bh);
} else {
- if (!buffer_eopnotsupp(bh) && printk_ratelimit()) {
+ if (!buffer_eopnotsupp(bh) && !quiet_error(bh)) {
buffer_io_error(bh);
printk(KERN_WARNING "lost page write due to "
"I/O error on %s\n",
@@ -394,7 +402,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
set_buffer_uptodate(bh);
} else {
clear_buffer_uptodate(bh);
- if (printk_ratelimit())
+ if (!quiet_error(bh))
buffer_io_error(bh);
SetPageError(page);
}
@@ -455,7 +463,7 @@ static void end_buffer_async_write(struct buffer_head *bh, int uptodate)
if (uptodate) {
set_buffer_uptodate(bh);
} else {
- if (printk_ratelimit()) {
+ if (!quiet_error(bh)) {
buffer_io_error(bh);
printk(KERN_WARNING "lost page write due to "
"I/O error on %s\n",
@@ -878,6 +886,7 @@ void invalidate_inode_buffers(struct inode *inode)
spin_unlock(&buffer_mapping->private_lock);
}
}
+EXPORT_SYMBOL(invalidate_inode_buffers);
/*
* Remove any clean buffers from the inode's buffer list. This is called
@@ -2912,6 +2921,9 @@ static void end_bio_bh_io_sync(struct bio *bio, int err)
set_bit(BH_Eopnotsupp, &bh->b_state);
}
+ if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags)))
+ set_bit(BH_Quiet, &bh->b_state);
+
bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags));
bio_put(bio);
}
@@ -3318,6 +3330,107 @@ int bh_submit_read(struct buffer_head *bh)
}
EXPORT_SYMBOL(bh_submit_read);
+/*
+ * Writeback a page to clean the dirty state
+ */
+static void trigger_write(struct page *page)
+{
+ struct address_space *mapping = page_mapping(page);
+ int rc;
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_NONE,
+ .nr_to_write = 1,
+ .range_start = 0,
+ .range_end = LLONG_MAX,
+ .nonblocking = 1,
+ .for_reclaim = 0
+ };
+
+ if (PageWriteback(page))
+ goto unlock;
+
+ if (!mapping->a_ops->writepage)
+ /* No write method for the address space */
+ goto unlock;
+
+ if (!clear_page_dirty_for_io(page))
+ /* Someone else already triggered a write */
+ goto unlock;
+
+ rc = mapping->a_ops->writepage(page, &wbc);
+ if (rc < 0)
+ /* I/O Error writing */
+ return;
+
+ if (rc == AOP_WRITEPAGE_ACTIVATE)
+unlock: unlock_page(page);
+}
+
+/*
+ * Get references on buffers.
+ *
+ * We obtain references on the page that uses the buffer. v[i] will point to
+ * the corresponding page after get_buffers() is through.
+ *
+ * We are safe from the underlying page being removed simply by doing
+ * a get_page_unless_zero. The buffer head removal may race at will.
+ * try_to_free_buffes will later take appropriate locks to remove the
+ * buffers if they are still there.
+ */
+static void *get_buffers(struct kmem_cache *s, int nr, void **v)
+{
+ struct page *page;
+ struct buffer_head *bh;
+ int i, j;
+ int n = 0;
+
+ for (i = 0; i < nr; i++) {
+ bh = v[i];
+ v[i] = NULL;
+
+ page = bh->b_page;
+
+ if (page && PagePrivate(page)) {
+ for (j = 0; j < n; j++)
+ if (page == v[j])
+ continue;
+ }
+
+ if (get_page_unless_zero(page))
+ v[n++] = page;
+ }
+ return NULL;
+}
+
+/*
+ * Despite its name: kick_buffers operates on a list of pointers to
+ * page structs that was set up by get_buffer().
+ */
+static void kick_buffers(struct kmem_cache *s, int nr, void **v,
+ void *private)
+{
+ struct page *page;
+ int i;
+
+ for (i = 0; i < nr; i++) {
+ page = v[i];
+
+ if (!page)
+ continue;
+
+ if (trylock_page(page)) {
+ if (PageDirty(page))
+ trigger_write(page);
+ else {
+ if (PagePrivate(page))
+ try_to_free_buffers(page);
+ unlock_page(page);
+ }
+ }
+ put_page(page);
+ }
+}
+
static void
init_buffer_head(void *data)
{
@@ -3336,6 +3449,7 @@ void __init buffer_init(void)
(SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
SLAB_MEM_SPREAD),
init_buffer_head);
+ kmem_cache_setup_defrag(bh_cachep, get_buffers, kick_buffers);
/*
* Limit the bh occupancy to 10% of ZONE_NORMAL
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 700697a72618..5ce907c4364f 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -482,26 +482,22 @@ void cdev_del(struct cdev *p)
}
-static void cdev_default_release(struct kobject *kobj)
+static void cdev_release(struct kobject *kobj)
{
struct cdev *p = container_of(kobj, struct cdev, kobj);
cdev_purge(p);
+ if (p->release)
+ p->release(p);
}
-static void cdev_dynamic_release(struct kobject *kobj)
-{
- struct cdev *p = container_of(kobj, struct cdev, kobj);
- cdev_purge(p);
- kfree(p);
-}
-
-static struct kobj_type ktype_cdev_default = {
- .release = cdev_default_release,
+static struct kobj_type cdev_ktype = {
+ .release = cdev_release,
};
-static struct kobj_type ktype_cdev_dynamic = {
- .release = cdev_dynamic_release,
-};
+static void cdev_alloc_release(struct cdev *cdev)
+{
+ kfree(cdev);
+}
/**
* cdev_alloc() - allocate a cdev structure
@@ -510,10 +506,10 @@ static struct kobj_type ktype_cdev_dynamic = {
*/
struct cdev *cdev_alloc(void)
{
- struct cdev *p = kzalloc(sizeof(struct cdev), GFP_KERNEL);
+ struct cdev *p = kmalloc(sizeof(struct cdev), GFP_KERNEL);
if (p) {
- INIT_LIST_HEAD(&p->list);
- kobject_init(&p->kobj, &ktype_cdev_dynamic);
+ cdev_init(p, NULL);
+ p->release = cdev_alloc_release;
}
return p;
}
@@ -530,7 +526,7 @@ void cdev_init(struct cdev *cdev, const struct file_operations *fops)
{
memset(cdev, 0, sizeof *cdev);
INIT_LIST_HEAD(&cdev->list);
- kobject_init(&cdev->kobj, &ktype_cdev_default);
+ kobject_init(&cdev->kobj, &cdev_ktype);
cdev->ops = fops;
}
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 8855331b2fba..e078b7aea143 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -8,7 +8,11 @@ handling fcntl(F_SETLEASE). Convert cifs to using blocking tcp
sends, and also let tcp autotune the socket send and receive buffers.
This reduces the number of EAGAIN errors returned by TCP/IP in
high stress workloads (and the number of retries on socket writes
-when sending large SMBWriteX requests).
+when sending large SMBWriteX requests). Fix case in which a portion of
+data can in some cases not get written to the file on the server before the
+file is closed. Fix DFS parsing to properly handle path consumed field,
+and to handle certain codepage conversions better. Fix mount and
+umount race that can cause oops in mount or umount or reconnect.
Version 1.54
------------
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 877c85409f1f..1e7b87497f26 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -19,7 +19,7 @@
#define _CIFS_FS_SB_H
#define CIFS_MOUNT_NO_PERM 1 /* do not do client vfs_perm check */
-#define CIFS_MOUNT_SET_UID 2 /* set current->euid in create etc. */
+#define CIFS_MOUNT_SET_UID 2 /* set current's euid in create etc. */
#define CIFS_MOUNT_SERVER_INUM 4 /* inode numbers from uniqueid from server */
#define CIFS_MOUNT_DIRECT_IO 8 /* do not write nor read through page cache */
#define CIFS_MOUNT_NO_XATTR 0x10 /* if set - disable xattr support */
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 0ab2fb5afef1..3fd3a9df043a 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -121,11 +121,9 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
/* add the server address */
if (server->addr.sockAddr.sin_family == AF_INET)
- sprintf(dp, "ip4=" NIPQUAD_FMT,
- NIPQUAD(server->addr.sockAddr.sin_addr));
+ sprintf(dp, "ip4=%pI4", &server->addr.sockAddr.sin_addr);
else if (server->addr.sockAddr.sin_family == AF_INET6)
- sprintf(dp, "ip6=" NIP6_SEQFMT,
- NIP6(server->addr.sockAddr6.sin6_addr));
+ sprintf(dp, "ip6=%pi6", &server->addr.sockAddr6.sin6_addr);
else
goto out;
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index f1ae1f57c30d..c57c0565547f 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -606,7 +606,15 @@ GLOBAL_EXTERN struct list_head cifs_tcp_ses_list;
* changes to the tcon->tidStatus should be done while holding this lock.
*/
GLOBAL_EXTERN rwlock_t cifs_tcp_ses_lock;
-GLOBAL_EXTERN rwlock_t GlobalSMBSeslock; /* protects list inserts on 3 above */
+
+/*
+ * This lock protects the cifs_file->llist and cifs_file->flist
+ * list operations, and updates to some flags (cifs_file->invalidHandle)
+ * It will be moved to either use the tcon->stat_lock or equivalent later.
+ * If cifs_tcp_ses_lock and the lock below are both needed to be held, then
+ * the cifs_tcp_ses_lock must be grabbed first and released last.
+ */
+GLOBAL_EXTERN rwlock_t GlobalSMBSeslock;
GLOBAL_EXTERN struct list_head GlobalOplock_Q;
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 6f21ecb85ce5..9d8b978137ad 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -39,7 +39,7 @@ extern int smb_send(struct socket *, struct smb_hdr *,
unsigned int /* length */ , struct sockaddr *, bool);
extern unsigned int _GetXid(void);
extern void _FreeXid(unsigned int);
-#define GetXid() (int)_GetXid(); cFYI(1,("CIFS VFS: in %s as Xid: %d with uid: %d",__func__, xid,current->fsuid));
+#define GetXid() (int)_GetXid(); cFYI(1,("CIFS VFS: in %s as Xid: %d with uid: %d",__func__, xid,current_fsuid()));
#define FreeXid(curr_xid) {_FreeXid(curr_xid); cFYI(1,("CIFS VFS: leaving %s (xid = %d) rc = %d",__func__,curr_xid,(int)rc));}
extern char *build_path_from_dentry(struct dentry *);
extern char *build_wildcard_path_from_dentry(struct dentry *direntry);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index bdda46dd435a..2af8626ced43 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -295,7 +295,7 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
check for tcp and smb session status done differently
for those three - in the calling routine */
if (tcon) {
- if (tcon->need_reconnect) {
+ if (tcon->tidStatus == CifsExiting) {
/* only tree disconnect, open, and write,
(and ulogoff which does not have tcon)
are allowed as we start force umount */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index c7d341714586..e6a6b63f4c8c 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -836,8 +836,8 @@ cifs_parse_mount_options(char *options, const char *devname,
/* null target name indicates to use *SMBSERVR default called name
if we end up sending RFC1001 session initialize */
vol->target_rfc1001_name[0] = 0;
- vol->linux_uid = current->uid; /* current->euid instead? */
- vol->linux_gid = current->gid;
+ vol->linux_uid = current_uid(); /* use current_euid() instead? */
+ vol->linux_gid = current_gid();
vol->dir_mode = S_IRWXUGO;
/* 2767 perms indicate mandatory locking support */
vol->file_mode = (S_IRWXUGO | S_ISGID) & (~S_IXGRP);
@@ -2228,8 +2228,8 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
/* new SMB session uses our srvTcp ref */
pSesInfo->server = srvTcp;
- sprintf(pSesInfo->serverName, "%u.%u.%u.%u",
- NIPQUAD(sin_server->sin_addr.s_addr));
+ sprintf(pSesInfo->serverName, "%pI4",
+ &sin_server->sin_addr.s_addr);
write_lock(&cifs_tcp_ses_lock);
list_add(&pSesInfo->smb_ses_list, &srvTcp->smb_ses_list);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index e962e75e6f7b..2f02c52db666 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -235,11 +235,11 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
};
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
- args.uid = (__u64) current->fsuid;
+ args.uid = (__u64) current_fsuid();
if (inode->i_mode & S_ISGID)
args.gid = (__u64) inode->i_gid;
else
- args.gid = (__u64) current->fsgid;
+ args.gid = (__u64) current_fsgid();
} else {
args.uid = NO_CHANGE_64;
args.gid = NO_CHANGE_64;
@@ -271,13 +271,13 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
if ((oplock & CIFS_CREATE_ACTION) &&
(cifs_sb->mnt_cifs_flags &
CIFS_MOUNT_SET_UID)) {
- newinode->i_uid = current->fsuid;
+ newinode->i_uid = current_fsuid();
if (inode->i_mode & S_ISGID)
newinode->i_gid =
inode->i_gid;
else
newinode->i_gid =
- current->fsgid;
+ current_fsgid();
}
}
}
@@ -375,8 +375,8 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
.device = device_number,
};
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
- args.uid = (__u64) current->fsuid;
- args.gid = (__u64) current->fsgid;
+ args.uid = (__u64) current_fsuid();
+ args.gid = (__u64) current_fsgid();
} else {
args.uid = NO_CHANGE_64;
args.gid = NO_CHANGE_64;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 6449e1aae621..f0a81e631ae6 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -488,12 +488,13 @@ int cifs_close(struct inode *inode, struct file *file)
pTcon = cifs_sb->tcon;
if (pSMBFile) {
struct cifsLockInfo *li, *tmp;
-
+ write_lock(&GlobalSMBSeslock);
pSMBFile->closePend = true;
if (pTcon) {
/* no sense reconnecting to close a file that is
already closed */
if (!pTcon->need_reconnect) {
+ write_unlock(&GlobalSMBSeslock);
timeout = 2;
while ((atomic_read(&pSMBFile->wrtPending) != 0)
&& (timeout <= 2048)) {
@@ -510,12 +511,15 @@ int cifs_close(struct inode *inode, struct file *file)
timeout *= 4;
}
if (atomic_read(&pSMBFile->wrtPending))
- cERROR(1,
- ("close with pending writes"));
- rc = CIFSSMBClose(xid, pTcon,
+ cERROR(1, ("close with pending write"));
+ if (!pTcon->need_reconnect &&
+ !pSMBFile->invalidHandle)
+ rc = CIFSSMBClose(xid, pTcon,
pSMBFile->netfid);
- }
- }
+ } else
+ write_unlock(&GlobalSMBSeslock);
+ } else
+ write_unlock(&GlobalSMBSeslock);
/* Delete any outstanding lock records.
We'll lose them when the file is closed anyway. */
@@ -587,15 +591,18 @@ int cifs_closedir(struct inode *inode, struct file *file)
pTcon = cifs_sb->tcon;
cFYI(1, ("Freeing private data in close dir"));
+ write_lock(&GlobalSMBSeslock);
if (!pCFileStruct->srch_inf.endOfSearch &&
!pCFileStruct->invalidHandle) {
pCFileStruct->invalidHandle = true;
+ write_unlock(&GlobalSMBSeslock);
rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid);
cFYI(1, ("Closing uncompleted readdir with rc %d",
rc));
/* not much we can do if it fails anyway, ignore rc */
rc = 0;
- }
+ } else
+ write_unlock(&GlobalSMBSeslock);
ptmp = pCFileStruct->srch_inf.ntwrk_buf_start;
if (ptmp) {
cFYI(1, ("closedir free smb buf in srch struct"));
@@ -1468,7 +1475,11 @@ static int cifs_write_end(struct file *file, struct address_space *mapping,
cFYI(1, ("write_end for page %p from pos %lld with %d bytes",
page, pos, copied));
- if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
+ if (PageChecked(page)) {
+ if (copied == len)
+ SetPageUptodate(page);
+ ClearPageChecked(page);
+ } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
SetPageUptodate(page);
if (!PageUptodate(page)) {
@@ -2055,39 +2066,70 @@ static int cifs_write_begin(struct file *file, struct address_space *mapping,
{
pgoff_t index = pos >> PAGE_CACHE_SHIFT;
loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
+ loff_t page_start = pos & PAGE_MASK;
+ loff_t i_size;
+ struct page *page;
+ int rc = 0;
cFYI(1, ("write_begin from %lld len %d", (long long)pos, len));
- *pagep = __grab_cache_page(mapping, index);
- if (!*pagep)
- return -ENOMEM;
-
- if (PageUptodate(*pagep))
- return 0;
+ page = __grab_cache_page(mapping, index);
+ if (!page) {
+ rc = -ENOMEM;
+ goto out;
+ }
- /* If we are writing a full page it will be up to date,
- no need to read from the server */
- if (len == PAGE_CACHE_SIZE && flags & AOP_FLAG_UNINTERRUPTIBLE)
- return 0;
+ if (PageUptodate(page))
+ goto out;
- if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
- int rc;
+ /*
+ * If we write a full page it will be up to date, no need to read from
+ * the server. If the write is short, we'll end up doing a sync write
+ * instead.
+ */
+ if (len == PAGE_CACHE_SIZE)
+ goto out;
- /* might as well read a page, it is fast enough */
- rc = cifs_readpage_worker(file, *pagep, &offset);
+ /*
+ * optimize away the read when we have an oplock, and we're not
+ * expecting to use any of the data we'd be reading in. That
+ * is, when the page lies beyond the EOF, or straddles the EOF
+ * and the write will cover all of the existing data.
+ */
+ if (CIFS_I(mapping->host)->clientCanCacheRead) {
+ i_size = i_size_read(mapping->host);
+ if (page_start >= i_size ||
+ (offset == 0 && (pos + len) >= i_size)) {
+ zero_user_segments(page, 0, offset,
+ offset + len,
+ PAGE_CACHE_SIZE);
+ /*
+ * PageChecked means that the parts of the page
+ * to which we're not writing are considered up
+ * to date. Once the data is copied to the
+ * page, it can be set uptodate.
+ */
+ SetPageChecked(page);
+ goto out;
+ }
+ }
- /* we do not need to pass errors back
- e.g. if we do not have read access to the file
- because cifs_write_end will attempt synchronous writes
- -- shaggy */
+ if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
+ /*
+ * might as well read a page, it is fast enough. If we get
+ * an error, we don't need to return it. cifs_write_end will
+ * do a sync write instead since PG_uptodate isn't set.
+ */
+ cifs_readpage_worker(file, page, &page_start);
} else {
/* we could try using another file handle if there is one -
but how would we lock it to prevent close of that handle
racing with this read? In any case
this will be written out by write_end so is fine */
}
-
- return 0;
+out:
+ *pagep = page;
+ return rc;
}
const struct address_space_operations cifs_addr_ops = {
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index ff8c68de4a92..8b7305e73d7e 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1143,11 +1143,11 @@ mkdir_get_info:
.device = 0,
};
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
- args.uid = (__u64)current->fsuid;
+ args.uid = (__u64)current_fsuid();
if (inode->i_mode & S_ISGID)
args.gid = (__u64)inode->i_gid;
else
- args.gid = (__u64)current->fsgid;
+ args.gid = (__u64)current_fsgid();
} else {
args.uid = NO_CHANGE_64;
args.gid = NO_CHANGE_64;
@@ -1184,13 +1184,13 @@ mkdir_get_info:
if (cifs_sb->mnt_cifs_flags &
CIFS_MOUNT_SET_UID) {
direntry->d_inode->i_uid =
- current->fsuid;
+ current_fsuid();
if (inode->i_mode & S_ISGID)
direntry->d_inode->i_gid =
inode->i_gid;
else
direntry->d_inode->i_gid =
- current->fsgid;
+ current_fsgid();
}
}
}
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 0088a5b52564..f94650683a00 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -65,7 +65,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
switch (command) {
case CIFS_IOC_CHECKUMOUNT:
cFYI(1, ("User unmount attempted"));
- if (cifs_sb->mnt_uid == current->uid)
+ if (cifs_sb->mnt_uid == current_uid())
rc = 0;
else {
rc = -EACCES;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index addd1dcc2d79..8a82d076450b 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -338,13 +338,13 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
/* BB Add support for establishing new tCon and SMB Session */
/* with userid/password pairs found on the smb session */
/* for other target tcp/ip addresses BB */
- if (current->fsuid != treeCon->ses->linux_uid) {
+ if (current_fsuid() != treeCon->ses->linux_uid) {
cFYI(1, ("Multiuser mode and UID "
"did not match tcon uid"));
read_lock(&cifs_tcp_ses_lock);
list_for_each(temp_item, &treeCon->ses->server->smb_ses_list) {
ses = list_entry(temp_item, struct cifsSesInfo, smb_ses_list);
- if (ses->linux_uid == current->fsuid) {
+ if (ses->linux_uid == current_fsuid()) {
if (ses->server == treeCon->ses->server) {
cFYI(1, ("found matching uid substitute right smb_uid"));
buffer->Uid = ses->Suid;
@@ -555,12 +555,14 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
continue;
cifs_stats_inc(&tcon->num_oplock_brks);
+ write_lock(&GlobalSMBSeslock);
list_for_each(tmp2, &tcon->openFileList) {
netfile = list_entry(tmp2, struct cifsFileInfo,
tlist);
if (pSMB->Fid != netfile->netfid)
continue;
+ write_unlock(&GlobalSMBSeslock);
read_unlock(&cifs_tcp_ses_lock);
cFYI(1, ("file id match, oplock break"));
pCifsInode = CIFS_I(netfile->pInode);
@@ -576,6 +578,7 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
return true;
}
+ write_unlock(&GlobalSMBSeslock);
read_unlock(&cifs_tcp_ses_lock);
cFYI(1, ("No matching file for oplock break"));
return true;
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 58d57299f2a0..9f51f9bf0292 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -741,11 +741,14 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
(index_to_find < first_entry_in_buffer)) {
/* close and restart search */
cFYI(1, ("search backing up - close and restart search"));
+ write_lock(&GlobalSMBSeslock);
if (!cifsFile->srch_inf.endOfSearch &&
!cifsFile->invalidHandle) {
cifsFile->invalidHandle = true;
+ write_unlock(&GlobalSMBSeslock);
CIFSFindClose(xid, pTcon, cifsFile->netfid);
- }
+ } else
+ write_unlock(&GlobalSMBSeslock);
if (cifsFile->srch_inf.ntwrk_buf_start) {
cFYI(1, ("freeing SMB ff cache buf on search rewind"));
if (cifsFile->srch_inf.smallBuf)
diff --git a/fs/coda/cache.c b/fs/coda/cache.c
index 8a2370341c7a..a5bf5771a22a 100644
--- a/fs/coda/cache.c
+++ b/fs/coda/cache.c
@@ -32,8 +32,8 @@ void coda_cache_enter(struct inode *inode, int mask)
struct coda_inode_info *cii = ITOC(inode);
cii->c_cached_epoch = atomic_read(&permission_epoch);
- if (cii->c_uid != current->fsuid) {
- cii->c_uid = current->fsuid;
+ if (cii->c_uid != current_fsuid()) {
+ cii->c_uid = current_fsuid();
cii->c_cached_perm = mask;
} else
cii->c_cached_perm |= mask;
@@ -60,7 +60,7 @@ int coda_cache_check(struct inode *inode, int mask)
int hit;
hit = (mask & cii->c_cached_perm) == mask &&
- cii->c_uid == current->fsuid &&
+ cii->c_uid == current_fsuid() &&
cii->c_cached_epoch == atomic_read(&permission_epoch);
return hit;
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 29137ff3ca67..466303db2df6 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -13,6 +13,7 @@
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/stat.h>
+#include <linux/cred.h>
#include <linux/errno.h>
#include <linux/smp_lock.h>
#include <linux/string.h>
@@ -174,7 +175,7 @@ int coda_release(struct inode *coda_inode, struct file *coda_file)
BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
err = venus_close(coda_inode->i_sb, coda_i2f(coda_inode),
- coda_flags, coda_file->f_uid);
+ coda_flags, coda_file->f_cred->fsuid);
host_inode = cfi->cfi_container->f_path.dentry->d_inode;
cii = ITOC(coda_inode);
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index ce432bca95d1..c274d949179d 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -52,7 +52,7 @@ static void *alloc_upcall(int opcode, int size)
inp->ih.opcode = opcode;
inp->ih.pid = current->pid;
inp->ih.pgid = task_pgrp_nr(current);
- inp->ih.uid = current->fsuid;
+ inp->ih.uid = current_fsuid();
return (void*)inp;
}
diff --git a/fs/compat.c b/fs/compat.c
index e5f49f538502..d1ece79b6411 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1393,10 +1393,20 @@ int compat_do_execve(char * filename,
if (!bprm)
goto out_ret;
+ retval = mutex_lock_interruptible(&current->cred_exec_mutex);
+ if (retval < 0)
+ goto out_free;
+
+ retval = -ENOMEM;
+ bprm->cred = prepare_exec_creds();
+ if (!bprm->cred)
+ goto out_unlock;
+ check_unsafe_exec(bprm);
+
file = open_exec(filename);
retval = PTR_ERR(file);
if (IS_ERR(file))
- goto out_kfree;
+ goto out_unlock;
sched_exec();
@@ -1410,14 +1420,10 @@ int compat_do_execve(char * filename,
bprm->argc = compat_count(argv, MAX_ARG_STRINGS);
if ((retval = bprm->argc) < 0)
- goto out_mm;
+ goto out;
bprm->envc = compat_count(envp, MAX_ARG_STRINGS);
if ((retval = bprm->envc) < 0)
- goto out_mm;
-
- retval = security_bprm_alloc(bprm);
- if (retval)
goto out;
retval = prepare_binprm(bprm);
@@ -1438,19 +1444,16 @@ int compat_do_execve(char * filename,
goto out;
retval = search_binary_handler(bprm, regs);
- if (retval >= 0) {
- /* execve success */
- security_bprm_free(bprm);
- acct_update_integrals(current);
- free_bprm(bprm);
- return retval;
- }
+ if (retval < 0)
+ goto out;
-out:
- if (bprm->security)
- security_bprm_free(bprm);
+ /* execve succeeded */
+ mutex_unlock(&current->cred_exec_mutex);
+ acct_update_integrals(current);
+ free_bprm(bprm);
+ return retval;
-out_mm:
+out:
if (bprm->mm)
mmput(bprm->mm);
@@ -1460,7 +1463,10 @@ out_file:
fput(bprm->file);
}
-out_kfree:
+out_unlock:
+ mutex_unlock(&current->cred_exec_mutex);
+
+out_free:
free_bprm(bprm);
out_ret:
diff --git a/fs/dcache.c b/fs/dcache.c
index a1d86c7f3e66..bb653fa5a1a4 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -32,6 +32,7 @@
#include <linux/seqlock.h>
#include <linux/swap.h>
#include <linux/bootmem.h>
+#include <linux/backing-dev.h>
#include "internal.h"
@@ -142,15 +143,6 @@ static void dentry_lru_add_tail(struct dentry *dentry)
static void dentry_lru_del(struct dentry *dentry)
{
if (!list_empty(&dentry->d_lru)) {
- list_del(&dentry->d_lru);
- dentry->d_sb->s_nr_dentry_unused--;
- dentry_stat.nr_unused--;
- }
-}
-
-static void dentry_lru_del_init(struct dentry *dentry)
-{
- if (likely(!list_empty(&dentry->d_lru))) {
list_del_init(&dentry->d_lru);
dentry->d_sb->s_nr_dentry_unused--;
dentry_stat.nr_unused--;
@@ -173,7 +165,10 @@ static struct dentry *d_kill(struct dentry *dentry)
list_del(&dentry->d_u.d_child);
dentry_stat.nr_dentry--; /* For d_free, below */
- /*drops the locks, at that point nobody can reach this dentry */
+ /*
+ * drops the locks, at that point nobody (aside from defrag)
+ * can reach this dentry
+ */
dentry_iput(dentry);
if (IS_ROOT(dentry))
parent = NULL;
@@ -320,7 +315,7 @@ int d_invalidate(struct dentry * dentry)
static inline struct dentry * __dget_locked(struct dentry *dentry)
{
atomic_inc(&dentry->d_count);
- dentry_lru_del_init(dentry);
+ dentry_lru_del(dentry);
return dentry;
}
@@ -436,7 +431,7 @@ static void prune_one_dentry(struct dentry * dentry)
if (dentry->d_op && dentry->d_op->d_delete)
dentry->d_op->d_delete(dentry);
- dentry_lru_del_init(dentry);
+ dentry_lru_del(dentry);
__d_drop(dentry);
dentry = d_kill(dentry);
spin_lock(&dcache_lock);
@@ -496,7 +491,7 @@ restart:
}
while (!list_empty(&tmp)) {
dentry = list_entry(tmp.prev, struct dentry, d_lru);
- dentry_lru_del_init(dentry);
+ dentry_lru_del(dentry);
spin_lock(&dentry->d_lock);
/*
* We found an inuse dentry which was not removed from
@@ -625,7 +620,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
/* detach this root from the system */
spin_lock(&dcache_lock);
- dentry_lru_del_init(dentry);
+ dentry_lru_del(dentry);
__d_drop(dentry);
spin_unlock(&dcache_lock);
@@ -639,7 +634,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
spin_lock(&dcache_lock);
list_for_each_entry(loop, &dentry->d_subdirs,
d_u.d_child) {
- dentry_lru_del_init(loop);
+ dentry_lru_del(loop);
__d_drop(loop);
cond_resched_lock(&dcache_lock);
}
@@ -822,7 +817,7 @@ resume:
struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
next = tmp->next;
- dentry_lru_del_init(dentry);
+ dentry_lru_del(dentry);
/*
* move only zero ref count dentries to the end
* of the unused list for prune_dcache
@@ -904,6 +899,16 @@ static struct shrinker dcache_shrinker = {
.seeks = DEFAULT_SEEKS,
};
+static void dcache_ctor(void *p)
+{
+ struct dentry *dentry = p;
+
+ spin_lock_init(&dentry->d_lock);
+ dentry->d_inode = NULL;
+ INIT_LIST_HEAD(&dentry->d_lru);
+ INIT_LIST_HEAD(&dentry->d_alias);
+}
+
/**
* d_alloc - allocate a dcache entry
* @parent: parent of entry to allocate
@@ -941,8 +946,6 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
atomic_set(&dentry->d_count, 1);
dentry->d_flags = DCACHE_UNHASHED;
- spin_lock_init(&dentry->d_lock);
- dentry->d_inode = NULL;
dentry->d_parent = NULL;
dentry->d_sb = NULL;
dentry->d_op = NULL;
@@ -952,9 +955,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
dentry->d_cookie = NULL;
#endif
INIT_HLIST_NODE(&dentry->d_hash);
- INIT_LIST_HEAD(&dentry->d_lru);
INIT_LIST_HEAD(&dentry->d_subdirs);
- INIT_LIST_HEAD(&dentry->d_alias);
if (parent) {
dentry->d_parent = dget(parent);
@@ -2278,19 +2279,110 @@ static void __init dcache_init_early(void)
INIT_HLIST_HEAD(&dentry_hashtable[loop]);
}
+/*
+ * The slab allocator is holding off frees. We can safely examine
+ * the object without the danger of it vanishing from under us.
+ */
+static void *get_dentries(struct kmem_cache *s, int nr, void **v)
+{
+ struct dentry *dentry;
+ int i;
+
+ spin_lock(&dcache_lock);
+ for (i = 0; i < nr; i++) {
+ dentry = v[i];
+
+ /*
+ * Three sorts of dentries cannot be reclaimed:
+ *
+ * 1. dentries that are in the process of being allocated
+ * or being freed. In that case the dentry is neither
+ * on the LRU nor hashed.
+ *
+ * 2. Fake hashed entries as used for anonymous dentries
+ * and pipe I/O. The fake hashed entries have d_flags
+ * set to indicate a hashed entry. However, the
+ * d_hash field indicates that the entry is not hashed.
+ *
+ * 3. dentries that have a backing store that is not
+ * writable. This is true for tmpsfs and other in
+ * memory filesystems. Removing dentries from them
+ * would loose dentries for good.
+ */
+ if ((d_unhashed(dentry) && list_empty(&dentry->d_lru)) ||
+ (!d_unhashed(dentry) && hlist_unhashed(&dentry->d_hash)) ||
+ (dentry->d_inode &&
+ !mapping_cap_writeback_dirty(dentry->d_inode->i_mapping)))
+ /* Ignore this dentry */
+ v[i] = NULL;
+ else
+ /* dget_locked will remove the dentry from the LRU */
+ dget_locked(dentry);
+ }
+ spin_unlock(&dcache_lock);
+ return NULL;
+}
+
+/*
+ * Slab has dropped all the locks. Get rid of the refcount obtained
+ * earlier and also free the object.
+ */
+static void kick_dentries(struct kmem_cache *s,
+ int nr, void **v, void *private)
+{
+ struct dentry *dentry;
+ int i;
+
+ /*
+ * First invalidate the dentries without holding the dcache lock
+ */
+ for (i = 0; i < nr; i++) {
+ dentry = v[i];
+
+ if (dentry)
+ d_invalidate(dentry);
+ }
+
+ /*
+ * If we are the last one holding a reference then the dentries can
+ * be freed. We need the dcache_lock.
+ */
+ spin_lock(&dcache_lock);
+ for (i = 0; i < nr; i++) {
+ dentry = v[i];
+ if (!dentry)
+ continue;
+
+ spin_lock(&dentry->d_lock);
+ if (atomic_read(&dentry->d_count) > 1) {
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
+ dput(dentry);
+ spin_lock(&dcache_lock);
+ continue;
+ }
+
+ prune_one_dentry(dentry);
+ }
+ spin_unlock(&dcache_lock);
+
+ /*
+ * dentries are freed using RCU so we need to wait until RCU
+ * operations are complete.
+ */
+ synchronize_rcu();
+}
+
static void __init dcache_init(void)
{
int loop;
- /*
- * A constructor could be added for stable state like the lists,
- * but it is probably not worth it because of the cache nature
- * of the dcache.
- */
- dentry_cache = KMEM_CACHE(dentry,
- SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
-
+ dentry_cache = kmem_cache_create("dentry_cache", sizeof(struct dentry),
+ 0, SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD,
+ dcache_ctor);
+
register_shrinker(&dcache_shrinker);
+ kmem_cache_setup_defrag(dentry_cache, get_dentries, kick_dentries);
/* Hash may have been set up in dcache_init_early */
if (!hashdist)
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 4a714f6c1bed..5d61b7c06e13 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -222,8 +222,8 @@ int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty)
return -ENOMEM;
inode->i_ino = number+2;
- inode->i_uid = config.setuid ? config.uid : current->fsuid;
- inode->i_gid = config.setgid ? config.gid : current->fsgid;
+ inode->i_uid = config.setuid ? config.uid : current_fsuid();
+ inode->i_gid = config.setgid ? config.gid : current_fsgid();
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
init_special_inode(inode, S_IFCHR|config.mode, device);
inode->i_private = tty;
diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c
index 85defeb64df4..92969f879a17 100644
--- a/fs/dlm/dir.c
+++ b/fs/dlm/dir.c
@@ -374,7 +374,7 @@ void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen,
struct list_head *list;
struct dlm_rsb *r;
int offset = 0, dir_nodeid;
- uint16_t be_namelen;
+ __be16 be_namelen;
down_read(&ls->ls_root_sem);
@@ -410,15 +410,15 @@ void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen,
if (offset + sizeof(uint16_t)*2 + r->res_length > outlen) {
/* Write end-of-block record */
- be_namelen = 0;
- memcpy(outbuf + offset, &be_namelen, sizeof(uint16_t));
- offset += sizeof(uint16_t);
+ be_namelen = cpu_to_be16(0);
+ memcpy(outbuf + offset, &be_namelen, sizeof(__be16));
+ offset += sizeof(__be16);
goto out;
}
be_namelen = cpu_to_be16(r->res_length);
- memcpy(outbuf + offset, &be_namelen, sizeof(uint16_t));
- offset += sizeof(uint16_t);
+ memcpy(outbuf + offset, &be_namelen, sizeof(__be16));
+ offset += sizeof(__be16);
memcpy(outbuf + offset, r->res_name, r->res_length);
offset += r->res_length;
}
@@ -430,9 +430,9 @@ void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen,
if ((list == &ls->ls_root_list) &&
(offset + sizeof(uint16_t) <= outlen)) {
- be_namelen = 0xFFFF;
- memcpy(outbuf + offset, &be_namelen, sizeof(uint16_t));
- offset += sizeof(uint16_t);
+ be_namelen = cpu_to_be16(0xFFFF);
+ memcpy(outbuf + offset, &be_namelen, sizeof(__be16));
+ offset += sizeof(__be16);
}
out:
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 3962262f991a..103a5ebd1371 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -295,6 +295,7 @@ static int add_sock(struct socket *sock, struct connection *con)
con->sock->sk->sk_write_space = lowcomms_write_space;
con->sock->sk->sk_state_change = lowcomms_state_change;
con->sock->sk->sk_user_data = con;
+ con->sock->sk->sk_allocation = GFP_NOFS;
return 0;
}
@@ -823,7 +824,6 @@ static void sctp_init_assoc(struct connection *con)
len = e->len;
offset = e->offset;
spin_unlock(&con->writequeue_lock);
- kmap(e->page);
/* Send the first block off the write queue */
iov[0].iov_base = page_address(e->page)+offset;
@@ -854,7 +854,6 @@ static void sctp_init_assoc(struct connection *con)
if (e->len == 0 && e->users == 0) {
list_del(&e->list);
- kunmap(e->page);
free_entry(e);
}
spin_unlock(&con->writequeue_lock);
@@ -1203,8 +1202,6 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc)
if (e) {
got_one:
- if (users == 0)
- kmap(e->page);
*ppc = page_address(e->page) + offset;
return e;
}
@@ -1233,7 +1230,6 @@ void dlm_lowcomms_commit_buffer(void *mh)
if (users)
goto out;
e->len = e->end - e->offset;
- kunmap(e->page);
spin_unlock(&con->writequeue_lock);
if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) {
@@ -1272,7 +1268,6 @@ static void send_to_sock(struct connection *con)
offset = e->offset;
BUG_ON(len == 0 && e->users == 0);
spin_unlock(&con->writequeue_lock);
- kmap(e->page);
ret = 0;
if (len) {
@@ -1294,7 +1289,6 @@ static void send_to_sock(struct connection *con)
if (e->len == 0 && e->users == 0) {
list_del(&e->list);
- kunmap(e->page);
free_entry(e);
continue;
}
diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c
index 54c14c6d06cb..c1775b84ebab 100644
--- a/fs/dlm/memory.c
+++ b/fs/dlm/memory.c
@@ -39,7 +39,7 @@ char *dlm_allocate_lvb(struct dlm_ls *ls)
{
char *p;
- p = kzalloc(ls->ls_lvblen, GFP_KERNEL);
+ p = kzalloc(ls->ls_lvblen, ls->ls_allocation);
return p;
}
@@ -57,7 +57,7 @@ struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls, int namelen)
DLM_ASSERT(namelen <= DLM_RESNAME_MAXLEN,);
- r = kzalloc(sizeof(*r) + namelen, GFP_KERNEL);
+ r = kzalloc(sizeof(*r) + namelen, ls->ls_allocation);
return r;
}
@@ -72,7 +72,7 @@ struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls)
{
struct dlm_lkb *lkb;
- lkb = kmem_cache_zalloc(lkb_cache, GFP_KERNEL);
+ lkb = kmem_cache_zalloc(lkb_cache, ls->ls_allocation);
return lkb;
}
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index 07ac709f3ed7..f3396c622aec 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -112,7 +112,7 @@ int dlm_process_incoming_buffer(int nodeid, const void *base,
ordinary messages). */
if (msglen > sizeof(__tmp) && p == &__tmp.p) {
- p = kmalloc(dlm_config.ci_buffer_size, GFP_KERNEL);
+ p = kmalloc(dlm_config.ci_buffer_size, GFP_NOFS);
if (p == NULL)
return ret;
}
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
index 18bda83cc892..aa2a5775a027 100644
--- a/fs/dlm/netlink.c
+++ b/fs/dlm/netlink.c
@@ -127,8 +127,8 @@ static void fill_data(struct dlm_lock_data *data, struct dlm_lkb *lkb)
void dlm_timeout_warn(struct dlm_lkb *lkb)
{
+ struct sk_buff *uninitialized_var(send_skb);
struct dlm_lock_data *data;
- struct sk_buff *send_skb;
size_t size;
int rv;
diff --git a/fs/dquot.c b/fs/dquot.c
index 5e95261005b2..61bfff64e5af 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -211,8 +211,6 @@ static struct hlist_head *dquot_hash;
struct dqstats dqstats;
-static void dqput(struct dquot *dquot);
-
static inline unsigned int
hashfn(const struct super_block *sb, unsigned int id, int type)
{
@@ -415,6 +413,17 @@ out_dqlock:
return ret;
}
+void dquot_destroy(struct dquot *dquot)
+{
+ kmem_cache_free(dquot_cachep, dquot);
+}
+EXPORT_SYMBOL(dquot_destroy);
+
+static inline void do_destroy_dquot(struct dquot *dquot)
+{
+ dquot->dq_sb->dq_op->destroy_dquot(dquot);
+}
+
/* Invalidate all dquots on the list. Note that this function is called after
* quota is disabled and pointers from inodes removed so there cannot be new
* quota users. There can still be some users of quotas due to inodes being
@@ -463,9 +472,44 @@ restart:
remove_dquot_hash(dquot);
remove_free_dquot(dquot);
remove_inuse(dquot);
- kmem_cache_free(dquot_cachep, dquot);
+ do_destroy_dquot(dquot);
+ }
+ spin_unlock(&dq_list_lock);
+}
+
+/* Call callback for every active dquot on given filesystem */
+int dquot_scan_active(struct super_block *sb,
+ int (*fn)(struct dquot *dquot, unsigned long priv),
+ unsigned long priv)
+{
+ struct dquot *dquot, *old_dquot = NULL;
+ int ret = 0;
+
+ mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
+ spin_lock(&dq_list_lock);
+ list_for_each_entry(dquot, &inuse_list, dq_inuse) {
+ if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags))
+ continue;
+ if (dquot->dq_sb != sb)
+ continue;
+ /* Now we have active dquot so we can just increase use count */
+ atomic_inc(&dquot->dq_count);
+ dqstats.lookups++;
+ spin_unlock(&dq_list_lock);
+ dqput(old_dquot);
+ old_dquot = dquot;
+ ret = fn(dquot, priv);
+ if (ret < 0)
+ goto out;
+ spin_lock(&dq_list_lock);
+ /* We are safe to continue now because our dquot could not
+ * be moved out of the inuse list while we hold the reference */
}
spin_unlock(&dq_list_lock);
+out:
+ dqput(old_dquot);
+ mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
+ return ret;
}
int vfs_quota_sync(struct super_block *sb, int type)
@@ -479,7 +523,7 @@ int vfs_quota_sync(struct super_block *sb, int type)
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (type != -1 && cnt != type)
continue;
- if (!sb_has_quota_enabled(sb, cnt))
+ if (!sb_has_quota_active(sb, cnt))
continue;
spin_lock(&dq_list_lock);
dirty = &dqopt->info[cnt].dqi_dirty_list;
@@ -504,8 +548,8 @@ int vfs_quota_sync(struct super_block *sb, int type)
}
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
- if ((cnt == type || type == -1) && sb_has_quota_enabled(sb, cnt)
- && info_dirty(&dqopt->info[cnt]))
+ if ((cnt == type || type == -1) && sb_has_quota_active(sb, cnt)
+ && info_dirty(&dqopt->info[cnt]))
sb->dq_op->write_info(sb, cnt);
spin_lock(&dq_list_lock);
dqstats.syncs++;
@@ -527,7 +571,7 @@ static void prune_dqcache(int count)
remove_dquot_hash(dquot);
remove_free_dquot(dquot);
remove_inuse(dquot);
- kmem_cache_free(dquot_cachep, dquot);
+ do_destroy_dquot(dquot);
count--;
head = free_dquots.prev;
}
@@ -558,7 +602,7 @@ static struct shrinker dqcache_shrinker = {
* NOTE: If you change this function please check whether dqput_blocks() works right...
* MUST be called with either dqptr_sem or dqonoff_mutex held
*/
-static void dqput(struct dquot *dquot)
+void dqput(struct dquot *dquot)
{
int ret;
@@ -584,7 +628,7 @@ we_slept:
/* We have more than one user... nothing to do */
atomic_dec(&dquot->dq_count);
/* Releasing dquot during quotaoff phase? */
- if (!sb_has_quota_enabled(dquot->dq_sb, dquot->dq_type) &&
+ if (!sb_has_quota_active(dquot->dq_sb, dquot->dq_type) &&
atomic_read(&dquot->dq_count) == 1)
wake_up(&dquot->dq_wait_unused);
spin_unlock(&dq_list_lock);
@@ -625,11 +669,17 @@ we_slept:
spin_unlock(&dq_list_lock);
}
+struct dquot *dquot_alloc(struct super_block *sb, int type)
+{
+ return kmem_cache_zalloc(dquot_cachep, GFP_NOFS);
+}
+EXPORT_SYMBOL(dquot_alloc);
+
static struct dquot *get_empty_dquot(struct super_block *sb, int type)
{
struct dquot *dquot;
- dquot = kmem_cache_zalloc(dquot_cachep, GFP_NOFS);
+ dquot = sb->dq_op->alloc_dquot(sb, type);
if(!dquot)
return NODQUOT;
@@ -647,15 +697,33 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type)
}
/*
+ * Check whether dquot is in memory.
+ * MUST be called with either dqptr_sem or dqonoff_mutex held
+ */
+int dquot_is_cached(struct super_block *sb, unsigned int id, int type)
+{
+ unsigned int hashent = hashfn(sb, id, type);
+ int ret = 0;
+
+ if (!sb_has_quota_active(sb, type))
+ return 0;
+ spin_lock(&dq_list_lock);
+ if (find_dquot(hashent, sb, id, type) != NODQUOT)
+ ret = 1;
+ spin_unlock(&dq_list_lock);
+ return ret;
+}
+
+/*
* Get reference to dquot
* MUST be called with either dqptr_sem or dqonoff_mutex held
*/
-static struct dquot *dqget(struct super_block *sb, unsigned int id, int type)
+struct dquot *dqget(struct super_block *sb, unsigned int id, int type)
{
unsigned int hashent = hashfn(sb, id, type);
struct dquot *dquot, *empty = NODQUOT;
- if (!sb_has_quota_enabled(sb, type))
+ if (!sb_has_quota_active(sb, type))
return NODQUOT;
we_slept:
spin_lock(&dq_list_lock);
@@ -682,7 +750,7 @@ we_slept:
dqstats.lookups++;
spin_unlock(&dq_list_lock);
if (empty)
- kmem_cache_free(dquot_cachep, empty);
+ do_destroy_dquot(empty);
}
/* Wait for dq_lock - after this we know that either dquot_release() is already
* finished or it will be canceled due to dq_count > 1 test */
@@ -820,7 +888,7 @@ static void drop_dquot_ref(struct super_block *sb, int type)
}
}
-static inline void dquot_incr_inodes(struct dquot *dquot, unsigned long number)
+static inline void dquot_incr_inodes(struct dquot *dquot, qsize_t number)
{
dquot->dq_dqb.dqb_curinodes += number;
}
@@ -830,9 +898,10 @@ static inline void dquot_incr_space(struct dquot *dquot, qsize_t number)
dquot->dq_dqb.dqb_curspace += number;
}
-static inline void dquot_decr_inodes(struct dquot *dquot, unsigned long number)
+static inline void dquot_decr_inodes(struct dquot *dquot, qsize_t number)
{
- if (dquot->dq_dqb.dqb_curinodes > number)
+ if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NEGATIVE_USAGE ||
+ dquot->dq_dqb.dqb_curinodes >= number)
dquot->dq_dqb.dqb_curinodes -= number;
else
dquot->dq_dqb.dqb_curinodes = 0;
@@ -843,11 +912,12 @@ static inline void dquot_decr_inodes(struct dquot *dquot, unsigned long number)
static inline void dquot_decr_space(struct dquot *dquot, qsize_t number)
{
- if (dquot->dq_dqb.dqb_curspace > number)
+ if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NEGATIVE_USAGE ||
+ dquot->dq_dqb.dqb_curspace >= number)
dquot->dq_dqb.dqb_curspace -= number;
else
dquot->dq_dqb.dqb_curspace = 0;
- if (toqb(dquot->dq_dqb.dqb_curspace) <= dquot->dq_dqb.dqb_bsoftlimit)
+ if (dquot->dq_dqb.dqb_curspace <= dquot->dq_dqb.dqb_bsoftlimit)
dquot->dq_dqb.dqb_btime = (time_t) 0;
clear_bit(DQ_BLKS_B, &dquot->dq_flags);
}
@@ -874,7 +944,7 @@ static inline int need_print_warning(struct dquot *dquot)
switch (dquot->dq_type) {
case USRQUOTA:
- return current->fsuid == dquot->dq_id;
+ return current_fsuid() == dquot->dq_id;
case GRPQUOTA:
return in_group_p(dquot->dq_id);
}
@@ -981,7 +1051,7 @@ static void send_warning(const struct dquot *dquot, const char warntype)
MINOR(dquot->dq_sb->s_dev));
if (ret)
goto attr_err_out;
- ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current->user->uid);
+ ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current_uid());
if (ret)
goto attr_err_out;
genlmsg_end(skb, msg_head);
@@ -1023,10 +1093,11 @@ static inline char ignore_hardlimit(struct dquot *dquot)
}
/* needs dq_data_lock */
-static int check_idq(struct dquot *dquot, ulong inodes, char *warntype)
+static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
{
*warntype = QUOTA_NL_NOWARN;
- if (inodes <= 0 || test_bit(DQ_FAKE_B, &dquot->dq_flags))
+ if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type) ||
+ test_bit(DQ_FAKE_B, &dquot->dq_flags))
return QUOTA_OK;
if (dquot->dq_dqb.dqb_ihardlimit &&
@@ -1058,11 +1129,12 @@ static int check_idq(struct dquot *dquot, ulong inodes, char *warntype)
static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *warntype)
{
*warntype = QUOTA_NL_NOWARN;
- if (space <= 0 || test_bit(DQ_FAKE_B, &dquot->dq_flags))
+ if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type) ||
+ test_bit(DQ_FAKE_B, &dquot->dq_flags))
return QUOTA_OK;
if (dquot->dq_dqb.dqb_bhardlimit &&
- toqb(dquot->dq_dqb.dqb_curspace + space) > dquot->dq_dqb.dqb_bhardlimit &&
+ dquot->dq_dqb.dqb_curspace + space > dquot->dq_dqb.dqb_bhardlimit &&
!ignore_hardlimit(dquot)) {
if (!prealloc)
*warntype = QUOTA_NL_BHARDWARN;
@@ -1070,7 +1142,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
}
if (dquot->dq_dqb.dqb_bsoftlimit &&
- toqb(dquot->dq_dqb.dqb_curspace + space) > dquot->dq_dqb.dqb_bsoftlimit &&
+ dquot->dq_dqb.dqb_curspace + space > dquot->dq_dqb.dqb_bsoftlimit &&
dquot->dq_dqb.dqb_btime && get_seconds() >= dquot->dq_dqb.dqb_btime &&
!ignore_hardlimit(dquot)) {
if (!prealloc)
@@ -1079,7 +1151,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
}
if (dquot->dq_dqb.dqb_bsoftlimit &&
- toqb(dquot->dq_dqb.dqb_curspace + space) > dquot->dq_dqb.dqb_bsoftlimit &&
+ dquot->dq_dqb.dqb_curspace + space > dquot->dq_dqb.dqb_bsoftlimit &&
dquot->dq_dqb.dqb_btime == 0) {
if (!prealloc) {
*warntype = QUOTA_NL_BSOFTWARN;
@@ -1096,10 +1168,11 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
return QUOTA_OK;
}
-static int info_idq_free(struct dquot *dquot, ulong inodes)
+static int info_idq_free(struct dquot *dquot, qsize_t inodes)
{
if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
- dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit)
+ dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit ||
+ !sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type))
return QUOTA_NL_NOWARN;
if (dquot->dq_dqb.dqb_curinodes - inodes <= dquot->dq_dqb.dqb_isoftlimit)
@@ -1113,15 +1186,13 @@ static int info_idq_free(struct dquot *dquot, ulong inodes)
static int info_bdq_free(struct dquot *dquot, qsize_t space)
{
if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
- toqb(dquot->dq_dqb.dqb_curspace) <= dquot->dq_dqb.dqb_bsoftlimit)
+ dquot->dq_dqb.dqb_curspace <= dquot->dq_dqb.dqb_bsoftlimit)
return QUOTA_NL_NOWARN;
- if (toqb(dquot->dq_dqb.dqb_curspace - space) <=
- dquot->dq_dqb.dqb_bsoftlimit)
+ if (dquot->dq_dqb.dqb_curspace - space <= dquot->dq_dqb.dqb_bsoftlimit)
return QUOTA_NL_BSOFTBELOW;
- if (toqb(dquot->dq_dqb.dqb_curspace) >= dquot->dq_dqb.dqb_bhardlimit &&
- toqb(dquot->dq_dqb.dqb_curspace - space) <
- dquot->dq_dqb.dqb_bhardlimit)
+ if (dquot->dq_dqb.dqb_curspace >= dquot->dq_dqb.dqb_bhardlimit &&
+ dquot->dq_dqb.dqb_curspace - space < dquot->dq_dqb.dqb_bhardlimit)
return QUOTA_NL_BHARDBELOW;
return QUOTA_NL_NOWARN;
}
@@ -1166,17 +1237,23 @@ out_err:
* Release all quotas referenced by inode
* Transaction must be started at an entry
*/
-int dquot_drop(struct inode *inode)
+int dquot_drop_locked(struct inode *inode)
{
int cnt;
- down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (inode->i_dquot[cnt] != NODQUOT) {
dqput(inode->i_dquot[cnt]);
inode->i_dquot[cnt] = NODQUOT;
}
}
+ return 0;
+}
+
+int dquot_drop(struct inode *inode)
+{
+ down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
+ dquot_drop_locked(inode);
up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
return 0;
}
@@ -1264,7 +1341,7 @@ warn_put_all:
/*
* This operation can block, but only after everything is updated
*/
-int dquot_alloc_inode(const struct inode *inode, unsigned long number)
+int dquot_alloc_inode(const struct inode *inode, qsize_t number)
{
int cnt, ret = NO_QUOTA;
char warntype[MAXQUOTAS];
@@ -1349,7 +1426,7 @@ out_sub:
/*
* This operation can block, but only after everything is updated
*/
-int dquot_free_inode(const struct inode *inode, unsigned long number)
+int dquot_free_inode(const struct inode *inode, qsize_t number)
{
unsigned int cnt;
char warntype[MAXQUOTAS];
@@ -1495,7 +1572,7 @@ warn_put_all:
/* Wrapper for transferring ownership of an inode */
int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
{
- if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) {
+ if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode)) {
vfs_dq_init(inode);
if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA)
return 1;
@@ -1533,54 +1610,27 @@ struct dquot_operations dquot_operations = {
.acquire_dquot = dquot_acquire,
.release_dquot = dquot_release,
.mark_dirty = dquot_mark_dquot_dirty,
- .write_info = dquot_commit_info
+ .write_info = dquot_commit_info,
+ .alloc_dquot = dquot_alloc,
+ .destroy_dquot = dquot_destroy,
};
-static inline void set_enable_flags(struct quota_info *dqopt, int type)
-{
- switch (type) {
- case USRQUOTA:
- dqopt->flags |= DQUOT_USR_ENABLED;
- dqopt->flags &= ~DQUOT_USR_SUSPENDED;
- break;
- case GRPQUOTA:
- dqopt->flags |= DQUOT_GRP_ENABLED;
- dqopt->flags &= ~DQUOT_GRP_SUSPENDED;
- break;
- }
-}
-
-static inline void reset_enable_flags(struct quota_info *dqopt, int type,
- int remount)
-{
- switch (type) {
- case USRQUOTA:
- dqopt->flags &= ~DQUOT_USR_ENABLED;
- if (remount)
- dqopt->flags |= DQUOT_USR_SUSPENDED;
- else
- dqopt->flags &= ~DQUOT_USR_SUSPENDED;
- break;
- case GRPQUOTA:
- dqopt->flags &= ~DQUOT_GRP_ENABLED;
- if (remount)
- dqopt->flags |= DQUOT_GRP_SUSPENDED;
- else
- dqopt->flags &= ~DQUOT_GRP_SUSPENDED;
- break;
- }
-}
-
-
/*
* Turn quota off on a device. type == -1 ==> quotaoff for all types (umount)
*/
-int vfs_quota_off(struct super_block *sb, int type, int remount)
+int vfs_quota_disable(struct super_block *sb, int type, unsigned int flags)
{
int cnt, ret = 0;
struct quota_info *dqopt = sb_dqopt(sb);
struct inode *toputinode[MAXQUOTAS];
+ /* Cannot turn off usage accounting without turning off limits, or
+ * suspend quotas and simultaneously turn quotas off. */
+ if ((flags & DQUOT_USAGE_ENABLED && !(flags & DQUOT_LIMITS_ENABLED))
+ || (flags & DQUOT_SUSPENDED && flags & (DQUOT_LIMITS_ENABLED |
+ DQUOT_USAGE_ENABLED)))
+ return -EINVAL;
+
/* We need to serialize quota_off() for device */
mutex_lock(&dqopt->dqonoff_mutex);
@@ -1589,7 +1639,7 @@ int vfs_quota_off(struct super_block *sb, int type, int remount)
* sometimes we are called when fill_super() failed and calling
* sync_fs() in such cases does no good.
*/
- if (!sb_any_quota_enabled(sb) && !sb_any_quota_suspended(sb)) {
+ if (!sb_any_quota_loaded(sb)) {
mutex_unlock(&dqopt->dqonoff_mutex);
return 0;
}
@@ -1597,17 +1647,28 @@ int vfs_quota_off(struct super_block *sb, int type, int remount)
toputinode[cnt] = NULL;
if (type != -1 && cnt != type)
continue;
- /* If we keep inodes of quota files after remount and quotaoff
- * is called, drop kept inodes. */
- if (!remount && sb_has_quota_suspended(sb, cnt)) {
- iput(dqopt->files[cnt]);
- dqopt->files[cnt] = NULL;
- reset_enable_flags(dqopt, cnt, 0);
+ if (!sb_has_quota_loaded(sb, cnt))
continue;
+
+ if (flags & DQUOT_SUSPENDED) {
+ dqopt->flags |=
+ dquot_state_flag(DQUOT_SUSPENDED, cnt);
+ } else {
+ dqopt->flags &= ~dquot_state_flag(flags, cnt);
+ /* Turning off suspended quotas? */
+ if (!sb_has_quota_loaded(sb, cnt) &&
+ sb_has_quota_suspended(sb, cnt)) {
+ dqopt->flags &= ~dquot_state_flag(
+ DQUOT_SUSPENDED, cnt);
+ iput(dqopt->files[cnt]);
+ dqopt->files[cnt] = NULL;
+ continue;
+ }
}
- if (!sb_has_quota_enabled(sb, cnt))
+
+ /* We still have to keep quota loaded? */
+ if (sb_has_quota_loaded(sb, cnt) && !(flags & DQUOT_SUSPENDED))
continue;
- reset_enable_flags(dqopt, cnt, remount);
/* Note: these are blocking operations */
drop_dquot_ref(sb, cnt);
@@ -1623,7 +1684,7 @@ int vfs_quota_off(struct super_block *sb, int type, int remount)
put_quota_format(dqopt->info[cnt].dqi_format);
toputinode[cnt] = dqopt->files[cnt];
- if (!remount)
+ if (!sb_has_quota_loaded(sb, cnt))
dqopt->files[cnt] = NULL;
dqopt->info[cnt].dqi_flags = 0;
dqopt->info[cnt].dqi_igrace = 0;
@@ -1631,6 +1692,11 @@ int vfs_quota_off(struct super_block *sb, int type, int remount)
dqopt->ops[cnt] = NULL;
}
mutex_unlock(&dqopt->dqonoff_mutex);
+
+ /* Skip syncing and setting flags if quota files are hidden */
+ if (dqopt->flags & DQUOT_QUOTA_SYS_FILE)
+ goto put_inodes;
+
/* Sync the superblock so that buffers with quota data are written to
* disk (and so userspace sees correct data afterwards). */
if (sb->s_op->sync_fs)
@@ -1646,7 +1712,7 @@ int vfs_quota_off(struct super_block *sb, int type, int remount)
mutex_lock(&dqopt->dqonoff_mutex);
/* If quota was reenabled in the meantime, we have
* nothing to do */
- if (!sb_has_quota_enabled(sb, cnt)) {
+ if (!sb_has_quota_loaded(sb, cnt)) {
mutex_lock_nested(&toputinode[cnt]->i_mutex, I_MUTEX_QUOTA);
toputinode[cnt]->i_flags &= ~(S_IMMUTABLE |
S_NOATIME | S_NOQUOTA);
@@ -1655,26 +1721,43 @@ int vfs_quota_off(struct super_block *sb, int type, int remount)
mark_inode_dirty(toputinode[cnt]);
}
mutex_unlock(&dqopt->dqonoff_mutex);
+ }
+ if (sb->s_bdev)
+ invalidate_bdev(sb->s_bdev);
+put_inodes:
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+ if (toputinode[cnt]) {
/* On remount RO, we keep the inode pointer so that we
- * can reenable quota on the subsequent remount RW.
- * But we have better not keep inode pointer when there
- * is pending delete on the quota file... */
- if (!remount)
+ * can reenable quota on the subsequent remount RW. We
+ * have to check 'flags' variable and not use sb_has_
+ * function because another quotaon / quotaoff could
+ * change global state before we got here. We refuse
+ * to suspend quotas when there is pending delete on
+ * the quota file... */
+ if (!(flags & DQUOT_SUSPENDED))
iput(toputinode[cnt]);
else if (!toputinode[cnt]->i_nlink)
ret = -EBUSY;
}
- if (sb->s_bdev)
- invalidate_bdev(sb->s_bdev);
return ret;
}
+int vfs_quota_off(struct super_block *sb, int type, int remount)
+{
+ return vfs_quota_disable(sb, type, remount ? DQUOT_SUSPENDED :
+ (DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED));
+}
+
/*
* Turn quotas on on a device
*/
-/* Helper function when we already have the inode */
-static int vfs_quota_on_inode(struct inode *inode, int type, int format_id)
+/*
+ * Helper function to turn quotas on when we already have the inode of
+ * quota file and no quota information is loaded.
+ */
+static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
+ unsigned int flags)
{
struct quota_format_type *fmt = find_quota_format(format_id);
struct super_block *sb = inode->i_sb;
@@ -1696,27 +1779,37 @@ static int vfs_quota_on_inode(struct inode *inode, int type, int format_id)
error = -EINVAL;
goto out_fmt;
}
+ /* Usage always has to be set... */
+ if (!(flags & DQUOT_USAGE_ENABLED)) {
+ error = -EINVAL;
+ goto out_fmt;
+ }
- /* As we bypass the pagecache we must now flush the inode so that
- * we see all the changes from userspace... */
- write_inode_now(inode, 1);
- /* And now flush the block cache so that kernel sees the changes */
- invalidate_bdev(sb->s_bdev);
+ if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) {
+ /* As we bypass the pagecache we must now flush the inode so
+ * that we see all the changes from userspace... */
+ write_inode_now(inode, 1);
+ /* And now flush the block cache so that kernel sees the
+ * changes */
+ invalidate_bdev(sb->s_bdev);
+ }
mutex_lock(&inode->i_mutex);
mutex_lock(&dqopt->dqonoff_mutex);
- if (sb_has_quota_enabled(sb, type) ||
- sb_has_quota_suspended(sb, type)) {
+ if (sb_has_quota_loaded(sb, type)) {
error = -EBUSY;
goto out_lock;
}
- /* We don't want quota and atime on quota files (deadlocks possible)
- * Also nobody should write to the file - we use special IO operations
- * which ignore the immutable bit. */
- down_write(&dqopt->dqptr_sem);
- oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE | S_NOQUOTA);
- inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE;
- up_write(&dqopt->dqptr_sem);
- sb->dq_op->drop(inode);
+
+ if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) {
+ /* We don't want quota and atime on quota files (deadlocks
+ * possible) Also nobody should write to the file - we use
+ * special IO operations which ignore the immutable bit. */
+ down_write(&dqopt->dqptr_sem);
+ oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE | S_NOQUOTA);
+ inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE;
+ up_write(&dqopt->dqptr_sem);
+ sb->dq_op->drop(inode);
+ }
error = -EIO;
dqopt->files[type] = igrab(inode);
@@ -1737,7 +1830,7 @@ static int vfs_quota_on_inode(struct inode *inode, int type, int format_id)
}
mutex_unlock(&dqopt->dqio_mutex);
mutex_unlock(&inode->i_mutex);
- set_enable_flags(dqopt, type);
+ dqopt->flags |= dquot_state_flag(flags, type);
add_dquot_ref(sb, type);
mutex_unlock(&dqopt->dqonoff_mutex);
@@ -1770,20 +1863,23 @@ static int vfs_quota_on_remount(struct super_block *sb, int type)
struct quota_info *dqopt = sb_dqopt(sb);
struct inode *inode;
int ret;
+ unsigned int flags;
mutex_lock(&dqopt->dqonoff_mutex);
if (!sb_has_quota_suspended(sb, type)) {
mutex_unlock(&dqopt->dqonoff_mutex);
return 0;
}
- BUG_ON(sb_has_quota_enabled(sb, type));
-
inode = dqopt->files[type];
dqopt->files[type] = NULL;
- reset_enable_flags(dqopt, type, 0);
+ flags = dqopt->flags & dquot_state_flag(DQUOT_USAGE_ENABLED |
+ DQUOT_LIMITS_ENABLED, type);
+ dqopt->flags &= ~dquot_state_flag(DQUOT_STATE_FLAGS, type);
mutex_unlock(&dqopt->dqonoff_mutex);
- ret = vfs_quota_on_inode(inode, type, dqopt->info[type].dqi_fmt_id);
+ flags = dquot_generic_flag(flags, type);
+ ret = vfs_load_quota_inode(inode, type, dqopt->info[type].dqi_fmt_id,
+ flags);
iput(inode);
return ret;
@@ -1799,12 +1895,12 @@ int vfs_quota_on_path(struct super_block *sb, int type, int format_id,
if (path->mnt->mnt_sb != sb)
error = -EXDEV;
else
- error = vfs_quota_on_inode(path->dentry->d_inode, type,
- format_id);
+ error = vfs_load_quota_inode(path->dentry->d_inode, type,
+ format_id, DQUOT_USAGE_ENABLED |
+ DQUOT_LIMITS_ENABLED);
return error;
}
-/* Actual function called from quotactl() */
int vfs_quota_on(struct super_block *sb, int type, int format_id, char *name,
int remount)
{
@@ -1823,6 +1919,50 @@ int vfs_quota_on(struct super_block *sb, int type, int format_id, char *name,
}
/*
+ * More powerful function for turning on quotas allowing setting
+ * of individual quota flags
+ */
+int vfs_quota_enable(struct inode *inode, int type, int format_id,
+ unsigned int flags)
+{
+ int ret = 0;
+ struct super_block *sb = inode->i_sb;
+ struct quota_info *dqopt = sb_dqopt(sb);
+
+ /* Just unsuspend quotas? */
+ if (flags & DQUOT_SUSPENDED)
+ return vfs_quota_on_remount(sb, type);
+ if (!flags)
+ return 0;
+ /* Just updating flags needed? */
+ if (sb_has_quota_loaded(sb, type)) {
+ mutex_lock(&dqopt->dqonoff_mutex);
+ /* Now do a reliable test... */
+ if (!sb_has_quota_loaded(sb, type)) {
+ mutex_unlock(&dqopt->dqonoff_mutex);
+ goto load_quota;
+ }
+ if (flags & DQUOT_USAGE_ENABLED &&
+ sb_has_quota_usage_enabled(sb, type)) {
+ ret = -EBUSY;
+ goto out_lock;
+ }
+ if (flags & DQUOT_LIMITS_ENABLED &&
+ sb_has_quota_limits_enabled(sb, type)) {
+ ret = -EBUSY;
+ goto out_lock;
+ }
+ sb_dqopt(sb)->flags |= dquot_state_flag(flags, type);
+out_lock:
+ mutex_unlock(&dqopt->dqonoff_mutex);
+ return ret;
+ }
+
+load_quota:
+ return vfs_load_quota_inode(inode, type, format_id, flags);
+}
+
+/*
* This function is used when filesystem needs to initialize quotas
* during mount time.
*/
@@ -1843,7 +1983,8 @@ int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
error = security_quota_on(dentry);
if (!error)
- error = vfs_quota_on_inode(dentry->d_inode, type, format_id);
+ error = vfs_load_quota_inode(dentry->d_inode, type, format_id,
+ DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
out:
dput(dentry);
@@ -1866,14 +2007,24 @@ int vfs_dq_quota_on_remount(struct super_block *sb)
return ret;
}
+static inline qsize_t qbtos(qsize_t blocks)
+{
+ return blocks << QIF_DQBLKSIZE_BITS;
+}
+
+static inline qsize_t stoqb(qsize_t space)
+{
+ return (space + QIF_DQBLKSIZE - 1) >> QIF_DQBLKSIZE_BITS;
+}
+
/* Generic routine for getting common part of quota structure */
static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di)
{
struct mem_dqblk *dm = &dquot->dq_dqb;
spin_lock(&dq_data_lock);
- di->dqb_bhardlimit = dm->dqb_bhardlimit;
- di->dqb_bsoftlimit = dm->dqb_bsoftlimit;
+ di->dqb_bhardlimit = stoqb(dm->dqb_bhardlimit);
+ di->dqb_bsoftlimit = stoqb(dm->dqb_bsoftlimit);
di->dqb_curspace = dm->dqb_curspace;
di->dqb_ihardlimit = dm->dqb_ihardlimit;
di->dqb_isoftlimit = dm->dqb_isoftlimit;
@@ -1918,28 +2069,36 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
if (di->dqb_valid & QIF_SPACE) {
dm->dqb_curspace = di->dqb_curspace;
check_blim = 1;
+ __set_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags);
}
if (di->dqb_valid & QIF_BLIMITS) {
- dm->dqb_bsoftlimit = di->dqb_bsoftlimit;
- dm->dqb_bhardlimit = di->dqb_bhardlimit;
+ dm->dqb_bsoftlimit = qbtos(di->dqb_bsoftlimit);
+ dm->dqb_bhardlimit = qbtos(di->dqb_bhardlimit);
check_blim = 1;
+ __set_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags);
}
if (di->dqb_valid & QIF_INODES) {
dm->dqb_curinodes = di->dqb_curinodes;
check_ilim = 1;
+ __set_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags);
}
if (di->dqb_valid & QIF_ILIMITS) {
dm->dqb_isoftlimit = di->dqb_isoftlimit;
dm->dqb_ihardlimit = di->dqb_ihardlimit;
check_ilim = 1;
+ __set_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags);
}
- if (di->dqb_valid & QIF_BTIME)
+ if (di->dqb_valid & QIF_BTIME) {
dm->dqb_btime = di->dqb_btime;
- if (di->dqb_valid & QIF_ITIME)
+ __set_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags);
+ }
+ if (di->dqb_valid & QIF_ITIME) {
dm->dqb_itime = di->dqb_itime;
+ __set_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags);
+ }
if (check_blim) {
- if (!dm->dqb_bsoftlimit || toqb(dm->dqb_curspace) < dm->dqb_bsoftlimit) {
+ if (!dm->dqb_bsoftlimit || dm->dqb_curspace < dm->dqb_bsoftlimit) {
dm->dqb_btime = 0;
clear_bit(DQ_BLKS_B, &dquot->dq_flags);
}
@@ -1970,12 +2129,14 @@ int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *d
int rc;
mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
- if (!(dquot = dqget(sb, id, type))) {
- mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
- return -ESRCH;
+ dquot = dqget(sb, id, type);
+ if (!dquot) {
+ rc = -ESRCH;
+ goto out;
}
rc = do_set_dqblk(dquot, di);
dqput(dquot);
+out:
mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
return rc;
}
@@ -1986,7 +2147,7 @@ int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
struct mem_dqinfo *mi;
mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
- if (!sb_has_quota_enabled(sb, type)) {
+ if (!sb_has_quota_active(sb, type)) {
mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
return -ESRCH;
}
@@ -2005,11 +2166,12 @@ int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
{
struct mem_dqinfo *mi;
+ int err = 0;
mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
- if (!sb_has_quota_enabled(sb, type)) {
- mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
- return -ESRCH;
+ if (!sb_has_quota_active(sb, type)) {
+ err = -ESRCH;
+ goto out;
}
mi = sb_dqopt(sb)->info + type;
spin_lock(&dq_data_lock);
@@ -2023,8 +2185,9 @@ int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
mark_info_dirty(sb, type);
/* Force write to disk */
sb->dq_op->write_info(sb, type);
+out:
mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
- return 0;
+ return err;
}
struct quotactl_ops vfs_quotactl_ops = {
@@ -2186,10 +2349,13 @@ EXPORT_SYMBOL(register_quota_format);
EXPORT_SYMBOL(unregister_quota_format);
EXPORT_SYMBOL(dqstats);
EXPORT_SYMBOL(dq_data_lock);
+EXPORT_SYMBOL(vfs_quota_enable);
EXPORT_SYMBOL(vfs_quota_on);
EXPORT_SYMBOL(vfs_quota_on_path);
EXPORT_SYMBOL(vfs_quota_on_mount);
+EXPORT_SYMBOL(vfs_quota_disable);
EXPORT_SYMBOL(vfs_quota_off);
+EXPORT_SYMBOL(dquot_scan_active);
EXPORT_SYMBOL(vfs_quota_sync);
EXPORT_SYMBOL(vfs_get_dqinfo);
EXPORT_SYMBOL(vfs_set_dqinfo);
@@ -2202,7 +2368,11 @@ EXPORT_SYMBOL(dquot_release);
EXPORT_SYMBOL(dquot_mark_dquot_dirty);
EXPORT_SYMBOL(dquot_initialize);
EXPORT_SYMBOL(dquot_drop);
+EXPORT_SYMBOL(dquot_drop_locked);
EXPORT_SYMBOL(vfs_dq_drop);
+EXPORT_SYMBOL(dqget);
+EXPORT_SYMBOL(dqput);
+EXPORT_SYMBOL(dquot_is_cached);
EXPORT_SYMBOL(dquot_alloc_space);
EXPORT_SYMBOL(dquot_alloc_inode);
EXPORT_SYMBOL(dquot_free_space);
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 3e5637fc3779..55f68b421547 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -58,7 +58,7 @@ static void drop_slab(void)
int nr_objects;
do {
- nr_objects = shrink_slab(1000, GFP_KERNEL, 1000);
+ nr_objects = shrink_slab(1000, GFP_KERNEL, 1000, NULL);
} while (nr_objects > 10);
}
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 3504cf9df358..a75026d35d16 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -691,7 +691,8 @@ int ecryptfs_init_kthread(void);
void ecryptfs_destroy_kthread(void);
int ecryptfs_privileged_open(struct file **lower_file,
struct dentry *lower_dentry,
- struct vfsmount *lower_mnt);
+ struct vfsmount *lower_mnt,
+ const struct cred *cred);
int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry);
#endif /* #ifndef ECRYPTFS_KERNEL_H */
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index e22bc3961345..0d713b691941 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -1037,17 +1037,14 @@ static int
decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
struct ecryptfs_crypt_stat *crypt_stat)
{
- struct scatterlist dst_sg;
- struct scatterlist src_sg;
+ struct scatterlist dst_sg[2];
+ struct scatterlist src_sg[2];
struct mutex *tfm_mutex;
struct blkcipher_desc desc = {
.flags = CRYPTO_TFM_REQ_MAY_SLEEP
};
int rc = 0;
- sg_init_table(&dst_sg, 1);
- sg_init_table(&src_sg, 1);
-
if (unlikely(ecryptfs_verbosity > 0)) {
ecryptfs_printk(
KERN_DEBUG, "Session key encryption key (size [%d]):\n",
@@ -1066,8 +1063,8 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
}
rc = virt_to_scatterlist(auth_tok->session_key.encrypted_key,
auth_tok->session_key.encrypted_key_size,
- &src_sg, 1);
- if (rc != 1) {
+ src_sg, 2);
+ if (rc < 1 || rc > 2) {
printk(KERN_ERR "Internal error whilst attempting to convert "
"auth_tok->session_key.encrypted_key to scatterlist; "
"expected rc = 1; got rc = [%d]. "
@@ -1079,8 +1076,8 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
auth_tok->session_key.encrypted_key_size;
rc = virt_to_scatterlist(auth_tok->session_key.decrypted_key,
auth_tok->session_key.decrypted_key_size,
- &dst_sg, 1);
- if (rc != 1) {
+ dst_sg, 2);
+ if (rc < 1 || rc > 2) {
printk(KERN_ERR "Internal error whilst attempting to convert "
"auth_tok->session_key.decrypted_key to scatterlist; "
"expected rc = 1; got rc = [%d]\n", rc);
@@ -1096,7 +1093,7 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
rc = -EINVAL;
goto out;
}
- rc = crypto_blkcipher_decrypt(&desc, &dst_sg, &src_sg,
+ rc = crypto_blkcipher_decrypt(&desc, dst_sg, src_sg,
auth_tok->session_key.encrypted_key_size);
mutex_unlock(tfm_mutex);
if (unlikely(rc)) {
@@ -1539,8 +1536,8 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
size_t i;
size_t encrypted_session_key_valid = 0;
char session_key_encryption_key[ECRYPTFS_MAX_KEY_BYTES];
- struct scatterlist dst_sg;
- struct scatterlist src_sg;
+ struct scatterlist dst_sg[2];
+ struct scatterlist src_sg[2];
struct mutex *tfm_mutex = NULL;
u8 cipher_code;
size_t packet_size_length;
@@ -1619,8 +1616,8 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
ecryptfs_dump_hex(session_key_encryption_key, 16);
}
rc = virt_to_scatterlist(crypt_stat->key, key_rec->enc_key_size,
- &src_sg, 1);
- if (rc != 1) {
+ src_sg, 2);
+ if (rc < 1 || rc > 2) {
ecryptfs_printk(KERN_ERR, "Error generating scatterlist "
"for crypt_stat session key; expected rc = 1; "
"got rc = [%d]. key_rec->enc_key_size = [%d]\n",
@@ -1629,8 +1626,8 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
goto out;
}
rc = virt_to_scatterlist(key_rec->enc_key, key_rec->enc_key_size,
- &dst_sg, 1);
- if (rc != 1) {
+ dst_sg, 2);
+ if (rc < 1 || rc > 2) {
ecryptfs_printk(KERN_ERR, "Error generating scatterlist "
"for crypt_stat encrypted session key; "
"expected rc = 1; got rc = [%d]. "
@@ -1651,7 +1648,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
rc = 0;
ecryptfs_printk(KERN_DEBUG, "Encrypting [%d] bytes of the key\n",
crypt_stat->key_size);
- rc = crypto_blkcipher_encrypt(&desc, &dst_sg, &src_sg,
+ rc = crypto_blkcipher_encrypt(&desc, dst_sg, src_sg,
(*key_rec).enc_key_size);
mutex_unlock(tfm_mutex);
if (rc) {
diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c
index c440c6b58b2d..c6d7a4d748a0 100644
--- a/fs/ecryptfs/kthread.c
+++ b/fs/ecryptfs/kthread.c
@@ -73,7 +73,7 @@ static int ecryptfs_threadfn(void *ignored)
mntget(req->lower_mnt);
(*req->lower_file) = dentry_open(
req->lower_dentry, req->lower_mnt,
- (O_RDWR | O_LARGEFILE));
+ (O_RDWR | O_LARGEFILE), current_cred());
req->flags |= ECRYPTFS_REQ_PROCESSED;
}
wake_up(&req->wait);
@@ -132,7 +132,8 @@ void ecryptfs_destroy_kthread(void)
*/
int ecryptfs_privileged_open(struct file **lower_file,
struct dentry *lower_dentry,
- struct vfsmount *lower_mnt)
+ struct vfsmount *lower_mnt,
+ const struct cred *cred)
{
struct ecryptfs_open_req *req;
int rc = 0;
@@ -143,7 +144,7 @@ int ecryptfs_privileged_open(struct file **lower_file,
dget(lower_dentry);
mntget(lower_mnt);
(*lower_file) = dentry_open(lower_dentry, lower_mnt,
- (O_RDWR | O_LARGEFILE));
+ (O_RDWR | O_LARGEFILE), cred);
if (!IS_ERR(*lower_file))
goto out;
req = kmem_cache_alloc(ecryptfs_open_req_cache, GFP_KERNEL);
@@ -184,7 +185,7 @@ int ecryptfs_privileged_open(struct file **lower_file,
dget(lower_dentry);
mntget(lower_mnt);
(*lower_file) = dentry_open(lower_dentry, lower_mnt,
- (O_RDONLY | O_LARGEFILE));
+ (O_RDONLY | O_LARGEFILE), cred);
if (IS_ERR(*lower_file)) {
rc = PTR_ERR(*req->lower_file);
(*lower_file) = NULL;
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 64d2ba980df4..fd630713c5c7 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -115,6 +115,7 @@ void __ecryptfs_printk(const char *fmt, ...)
*/
int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
{
+ const struct cred *cred = current_cred();
struct ecryptfs_inode_info *inode_info =
ecryptfs_inode_to_private(ecryptfs_dentry->d_inode);
int rc = 0;
@@ -127,7 +128,7 @@ int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
rc = ecryptfs_privileged_open(&inode_info->lower_file,
- lower_dentry, lower_mnt);
+ lower_dentry, lower_mnt, cred);
if (rc || IS_ERR(inode_info->lower_file)) {
printk(KERN_ERR "Error opening lower persistent file "
"for lower_dentry [0x%p] and lower_mnt [0x%p]; "
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index c6983978a31e..6913f727624d 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -360,7 +360,8 @@ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid,
struct ecryptfs_msg_ctx *msg_ctx;
size_t msg_size;
struct nsproxy *nsproxy;
- struct user_namespace *current_user_ns;
+ struct user_namespace *tsk_user_ns;
+ uid_t ctx_euid;
int rc;
if (msg->index >= ecryptfs_message_buf_len) {
@@ -384,9 +385,9 @@ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid,
mutex_unlock(&ecryptfs_daemon_hash_mux);
goto wake_up;
}
- current_user_ns = nsproxy->user_ns;
- rc = ecryptfs_find_daemon_by_euid(&daemon, msg_ctx->task->euid,
- current_user_ns);
+ tsk_user_ns = __task_cred(msg_ctx->task)->user->user_ns;
+ ctx_euid = task_euid(msg_ctx->task);
+ rc = ecryptfs_find_daemon_by_euid(&daemon, ctx_euid, tsk_user_ns);
rcu_read_unlock();
mutex_unlock(&ecryptfs_daemon_hash_mux);
if (rc) {
@@ -394,28 +395,28 @@ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid,
printk(KERN_WARNING "%s: User [%d] received a "
"message response from process [0x%p] but does "
"not have a registered daemon\n", __func__,
- msg_ctx->task->euid, pid);
+ ctx_euid, pid);
goto wake_up;
}
- if (msg_ctx->task->euid != euid) {
+ if (ctx_euid != euid) {
rc = -EBADMSG;
printk(KERN_WARNING "%s: Received message from user "
"[%d]; expected message from user [%d]\n", __func__,
- euid, msg_ctx->task->euid);
+ euid, ctx_euid);
goto unlock;
}
- if (current_user_ns != user_ns) {
+ if (tsk_user_ns != user_ns) {
rc = -EBADMSG;
printk(KERN_WARNING "%s: Received message from user_ns "
"[0x%p]; expected message from user_ns [0x%p]\n",
- __func__, user_ns, nsproxy->user_ns);
+ __func__, user_ns, tsk_user_ns);
goto unlock;
}
if (daemon->pid != pid) {
rc = -EBADMSG;
printk(KERN_ERR "%s: User [%d] sent a message response "
"from an unrecognized process [0x%p]\n",
- __func__, msg_ctx->task->euid, pid);
+ __func__, ctx_euid, pid);
goto unlock;
}
if (msg_ctx->state != ECRYPTFS_MSG_CTX_STATE_PENDING) {
@@ -464,14 +465,14 @@ ecryptfs_send_message_locked(char *data, int data_len, u8 msg_type,
struct ecryptfs_msg_ctx **msg_ctx)
{
struct ecryptfs_daemon *daemon;
+ uid_t euid = current_euid();
int rc;
- rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid,
- current->nsproxy->user_ns);
+ rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
if (rc || !daemon) {
rc = -ENOTCONN;
printk(KERN_ERR "%s: User [%d] does not have a daemon "
- "registered\n", __func__, current->euid);
+ "registered\n", __func__, euid);
goto out;
}
mutex_lock(&ecryptfs_msg_ctx_lists_mux);
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index b484792a0996..efd95a0ed1ea 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -42,12 +42,12 @@ ecryptfs_miscdev_poll(struct file *file, poll_table *pt)
{
struct ecryptfs_daemon *daemon;
unsigned int mask = 0;
+ uid_t euid = current_euid();
int rc;
mutex_lock(&ecryptfs_daemon_hash_mux);
/* TODO: Just use file->private_data? */
- rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid,
- current->nsproxy->user_ns);
+ rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
BUG_ON(rc || !daemon);
mutex_lock(&daemon->mux);
mutex_unlock(&ecryptfs_daemon_hash_mux);
@@ -83,6 +83,7 @@ static int
ecryptfs_miscdev_open(struct inode *inode, struct file *file)
{
struct ecryptfs_daemon *daemon = NULL;
+ uid_t euid = current_euid();
int rc;
mutex_lock(&ecryptfs_daemon_hash_mux);
@@ -93,11 +94,9 @@ ecryptfs_miscdev_open(struct inode *inode, struct file *file)
"count; rc = [%d]\n", __func__, rc);
goto out_unlock_daemon_list;
}
- rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid,
- current->nsproxy->user_ns);
+ rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
if (rc || !daemon) {
- rc = ecryptfs_spawn_daemon(&daemon, current->euid,
- current->nsproxy->user_ns,
+ rc = ecryptfs_spawn_daemon(&daemon, euid, current_user_ns(),
task_pid(current));
if (rc) {
printk(KERN_ERR "%s: Error attempting to spawn daemon; "
@@ -147,11 +146,11 @@ static int
ecryptfs_miscdev_release(struct inode *inode, struct file *file)
{
struct ecryptfs_daemon *daemon = NULL;
+ uid_t euid = current_euid();
int rc;
mutex_lock(&ecryptfs_daemon_hash_mux);
- rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid,
- current->nsproxy->user_ns);
+ rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
BUG_ON(rc || !daemon);
mutex_lock(&daemon->mux);
BUG_ON(daemon->pid != task_pid(current));
@@ -246,12 +245,12 @@ ecryptfs_miscdev_read(struct file *file, char __user *buf, size_t count,
char packet_length[3];
size_t i;
size_t total_length;
+ uid_t euid = current_euid();
int rc;
mutex_lock(&ecryptfs_daemon_hash_mux);
/* TODO: Just use file->private_data? */
- rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid,
- current->nsproxy->user_ns);
+ rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
BUG_ON(rc || !daemon);
mutex_lock(&daemon->mux);
if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
@@ -290,8 +289,8 @@ check_list:
* message from the queue; try again */
goto check_list;
}
- BUG_ON(current->euid != daemon->euid);
- BUG_ON(current->nsproxy->user_ns != daemon->user_ns);
+ BUG_ON(euid != daemon->euid);
+ BUG_ON(current_user_ns() != daemon->user_ns);
BUG_ON(task_pid(current) != daemon->pid);
msg_ctx = list_first_entry(&daemon->msg_ctx_out_queue,
struct ecryptfs_msg_ctx, daemon_out_list);
@@ -414,6 +413,7 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf,
size_t packet_size, packet_size_length, i;
ssize_t sz = 0;
char *data;
+ uid_t euid = current_euid();
int rc;
if (count == 0)
@@ -463,8 +463,7 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf,
goto out_free;
}
rc = ecryptfs_miscdev_response(&data[i], packet_size,
- current->euid,
- current->nsproxy->user_ns,
+ euid, current_user_ns(),
task_pid(current), seq);
if (rc)
printk(KERN_WARNING "%s: Failed to deliver miscdev "
diff --git a/fs/exec.c b/fs/exec.c
index 4e834f16d9da..32f13e299417 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -55,6 +55,7 @@
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
#include <asm/tlb.h>
+#include "internal.h"
#ifdef __alpha__
/* for /sbin/loader handling in search_binary_handler() */
@@ -980,7 +981,7 @@ int flush_old_exec(struct linux_binprm * bprm)
/* This is the point of no return */
current->sas_ss_sp = current->sas_ss_size = 0;
- if (current->euid == current->uid && current->egid == current->gid)
+ if (current_euid() == current_uid() && current_egid() == current_gid())
set_dumpable(current->mm, 1);
else
set_dumpable(current->mm, suid_dumpable);
@@ -1007,16 +1008,17 @@ int flush_old_exec(struct linux_binprm * bprm)
*/
current->mm->task_size = TASK_SIZE;
- if (bprm->e_uid != current->euid || bprm->e_gid != current->egid) {
- suid_keys(current);
- set_dumpable(current->mm, suid_dumpable);
+ /* install the new credentials */
+ if (bprm->cred->uid != current_euid() ||
+ bprm->cred->gid != current_egid()) {
current->pdeath_signal = 0;
} else if (file_permission(bprm->file, MAY_READ) ||
- (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) {
- suid_keys(current);
+ bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP) {
set_dumpable(current->mm, suid_dumpable);
}
+ current->personality &= ~bprm->per_clear;
+
/* An exec changes our domain. We are no longer part of the thread
group */
@@ -1033,13 +1035,50 @@ out:
EXPORT_SYMBOL(flush_old_exec);
+/*
+ * install the new credentials for this executable
+ */
+void install_exec_creds(struct linux_binprm *bprm)
+{
+ security_bprm_committing_creds(bprm);
+
+ commit_creds(bprm->cred);
+ bprm->cred = NULL;
+
+ /* cred_exec_mutex must be held at least to this point to prevent
+ * ptrace_attach() from altering our determination of the task's
+ * credentials; any time after this it may be unlocked */
+
+ security_bprm_committed_creds(bprm);
+}
+EXPORT_SYMBOL(install_exec_creds);
+
+/*
+ * determine how safe it is to execute the proposed program
+ * - the caller must hold current->cred_exec_mutex to protect against
+ * PTRACE_ATTACH
+ */
+void check_unsafe_exec(struct linux_binprm *bprm)
+{
+ struct task_struct *p = current;
+
+ bprm->unsafe = tracehook_unsafe_exec(p);
+
+ if (atomic_read(&p->fs->count) > 1 ||
+ atomic_read(&p->files->count) > 1 ||
+ atomic_read(&p->sighand->count) > 1)
+ bprm->unsafe |= LSM_UNSAFE_SHARE;
+}
+
/*
* Fill the binprm structure from the inode.
* Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes
+ *
+ * This may be called multiple times for binary chains (scripts for example).
*/
int prepare_binprm(struct linux_binprm *bprm)
{
- int mode;
+ umode_t mode;
struct inode * inode = bprm->file->f_path.dentry->d_inode;
int retval;
@@ -1047,14 +1086,15 @@ int prepare_binprm(struct linux_binprm *bprm)
if (bprm->file->f_op == NULL)
return -EACCES;
- bprm->e_uid = current->euid;
- bprm->e_gid = current->egid;
+ /* clear any previous set[ug]id data from a previous binary */
+ bprm->cred->euid = current_euid();
+ bprm->cred->egid = current_egid();
- if(!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)) {
+ if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)) {
/* Set-uid? */
if (mode & S_ISUID) {
- current->personality &= ~PER_CLEAR_ON_SETID;
- bprm->e_uid = inode->i_uid;
+ bprm->per_clear |= PER_CLEAR_ON_SETID;
+ bprm->cred->euid = inode->i_uid;
}
/* Set-gid? */
@@ -1064,52 +1104,23 @@ int prepare_binprm(struct linux_binprm *bprm)
* executable.
*/
if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
- current->personality &= ~PER_CLEAR_ON_SETID;
- bprm->e_gid = inode->i_gid;
+ bprm->per_clear |= PER_CLEAR_ON_SETID;
+ bprm->cred->egid = inode->i_gid;
}
}
/* fill in binprm security blob */
- retval = security_bprm_set(bprm);
+ retval = security_bprm_set_creds(bprm);
if (retval)
return retval;
+ bprm->cred_prepared = 1;
- memset(bprm->buf,0,BINPRM_BUF_SIZE);
- return kernel_read(bprm->file,0,bprm->buf,BINPRM_BUF_SIZE);
+ memset(bprm->buf, 0, BINPRM_BUF_SIZE);
+ return kernel_read(bprm->file, 0, bprm->buf, BINPRM_BUF_SIZE);
}
EXPORT_SYMBOL(prepare_binprm);
-static int unsafe_exec(struct task_struct *p)
-{
- int unsafe = tracehook_unsafe_exec(p);
-
- if (atomic_read(&p->fs->count) > 1 ||
- atomic_read(&p->files->count) > 1 ||
- atomic_read(&p->sighand->count) > 1)
- unsafe |= LSM_UNSAFE_SHARE;
-
- return unsafe;
-}
-
-void compute_creds(struct linux_binprm *bprm)
-{
- int unsafe;
-
- if (bprm->e_uid != current->uid) {
- suid_keys(current);
- current->pdeath_signal = 0;
- }
- exec_keys(current);
-
- task_lock(current);
- unsafe = unsafe_exec(current);
- security_bprm_apply_creds(bprm, unsafe);
- task_unlock(current);
- security_bprm_post_apply_creds(bprm);
-}
-EXPORT_SYMBOL(compute_creds);
-
/*
* Arguments are '\0' separated strings found at the location bprm->p
* points to; chop off the first by relocating brpm->p to right after
@@ -1262,6 +1273,8 @@ EXPORT_SYMBOL(search_binary_handler);
void free_bprm(struct linux_binprm *bprm)
{
free_arg_pages(bprm);
+ if (bprm->cred)
+ abort_creds(bprm->cred);
kfree(bprm);
}
@@ -1287,10 +1300,20 @@ int do_execve(char * filename,
if (!bprm)
goto out_files;
+ retval = mutex_lock_interruptible(&current->cred_exec_mutex);
+ if (retval < 0)
+ goto out_free;
+
+ retval = -ENOMEM;
+ bprm->cred = prepare_exec_creds();
+ if (!bprm->cred)
+ goto out_unlock;
+ check_unsafe_exec(bprm);
+
file = open_exec(filename);
retval = PTR_ERR(file);
if (IS_ERR(file))
- goto out_kfree;
+ goto out_unlock;
sched_exec();
@@ -1304,14 +1327,10 @@ int do_execve(char * filename,
bprm->argc = count(argv, MAX_ARG_STRINGS);
if ((retval = bprm->argc) < 0)
- goto out_mm;
+ goto out;
bprm->envc = count(envp, MAX_ARG_STRINGS);
if ((retval = bprm->envc) < 0)
- goto out_mm;
-
- retval = security_bprm_alloc(bprm);
- if (retval)
goto out;
retval = prepare_binprm(bprm);
@@ -1333,21 +1352,18 @@ int do_execve(char * filename,
current->flags &= ~PF_KTHREAD;
retval = search_binary_handler(bprm,regs);
- if (retval >= 0) {
- /* execve success */
- security_bprm_free(bprm);
- acct_update_integrals(current);
- free_bprm(bprm);
- if (displaced)
- put_files_struct(displaced);
- return retval;
- }
+ if (retval < 0)
+ goto out;
-out:
- if (bprm->security)
- security_bprm_free(bprm);
+ /* execve succeeded */
+ mutex_unlock(&current->cred_exec_mutex);
+ acct_update_integrals(current);
+ free_bprm(bprm);
+ if (displaced)
+ put_files_struct(displaced);
+ return retval;
-out_mm:
+out:
if (bprm->mm)
mmput (bprm->mm);
@@ -1356,7 +1372,11 @@ out_file:
allow_write_access(bprm->file);
fput(bprm->file);
}
-out_kfree:
+
+out_unlock:
+ mutex_unlock(&current->cred_exec_mutex);
+
+out_free:
free_bprm(bprm);
out_files:
@@ -1388,6 +1408,7 @@ EXPORT_SYMBOL(set_binfmt);
*/
static int format_corename(char *corename, long signr)
{
+ const struct cred *cred = current_cred();
const char *pat_ptr = core_pattern;
int ispipe = (*pat_ptr == '|');
char *out_ptr = corename;
@@ -1424,7 +1445,7 @@ static int format_corename(char *corename, long signr)
/* uid */
case 'u':
rc = snprintf(out_ptr, out_end - out_ptr,
- "%d", current->uid);
+ "%d", cred->uid);
if (rc > out_end - out_ptr)
goto out;
out_ptr += rc;
@@ -1432,7 +1453,7 @@ static int format_corename(char *corename, long signr)
/* gid */
case 'g':
rc = snprintf(out_ptr, out_end - out_ptr,
- "%d", current->gid);
+ "%d", cred->gid);
if (rc > out_end - out_ptr)
goto out;
out_ptr += rc;
@@ -1708,8 +1729,9 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
struct linux_binfmt * binfmt;
struct inode * inode;
struct file * file;
+ const struct cred *old_cred;
+ struct cred *cred;
int retval = 0;
- int fsuid = current->fsuid;
int flag = 0;
int ispipe = 0;
unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
@@ -1722,12 +1744,20 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
binfmt = current->binfmt;
if (!binfmt || !binfmt->core_dump)
goto fail;
+
+ cred = prepare_creds();
+ if (!cred) {
+ retval = -ENOMEM;
+ goto fail;
+ }
+
down_write(&mm->mmap_sem);
/*
* If another thread got here first, or we are not dumpable, bail out.
*/
if (mm->core_state || !get_dumpable(mm)) {
up_write(&mm->mmap_sem);
+ put_cred(cred);
goto fail;
}
@@ -1738,12 +1768,16 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
*/
if (get_dumpable(mm) == 2) { /* Setuid core dump mode */
flag = O_EXCL; /* Stop rewrite attacks */
- current->fsuid = 0; /* Dump root private */
+ cred->fsuid = 0; /* Dump root private */
}
retval = coredump_wait(exit_code, &core_state);
- if (retval < 0)
+ if (retval < 0) {
+ put_cred(cred);
goto fail;
+ }
+
+ old_cred = override_creds(cred);
/*
* Clear any false indication of pending signals that might
@@ -1815,7 +1849,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
* Dont allow local users get cute and trick others to coredump
* into their pre-created files:
*/
- if (inode->i_uid != current->fsuid)
+ if (inode->i_uid != current_fsuid())
goto close_fail;
if (!file->f_op)
goto close_fail;
@@ -1834,7 +1868,8 @@ fail_unlock:
if (helper_argv)
argv_free(helper_argv);
- current->fsuid = fsuid;
+ revert_creds(old_cred);
+ put_cred(cred);
coredump_finish(mm);
fail:
return retval;
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 80246bad1b7f..ec1fb918200f 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -14,6 +14,7 @@
#include <linux/module.h>
#include <linux/mount.h>
#include <linux/namei.h>
+#include <linux/sched.h>
#define dprintk(fmt, args...) do{}while(0)
@@ -249,6 +250,7 @@ static int filldir_one(void * __buf, const char * name, int len,
static int get_name(struct vfsmount *mnt, struct dentry *dentry,
char *name, struct dentry *child)
{
+ const struct cred *cred = current_cred();
struct inode *dir = dentry->d_inode;
int error;
struct file *file;
@@ -263,7 +265,7 @@ static int get_name(struct vfsmount *mnt, struct dentry *dentry,
/*
* Open the directory ...
*/
- file = dentry_open(dget(dentry), mntget(mnt), O_RDONLY);
+ file = dentry_open(dget(dentry), mntget(mnt), O_RDONLY, cred);
error = PTR_ERR(file);
if (IS_ERR(file))
goto out;
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 6dac7ba2d22d..4a29d6376081 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -1193,7 +1193,7 @@ static int ext2_has_free_blocks(struct ext2_sb_info *sbi)
free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count);
if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
- sbi->s_resuid != current->fsuid &&
+ sbi->s_resuid != current_fsuid() &&
(sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
return 0;
}
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index f59741346760..8d0add625870 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -550,7 +550,7 @@ got:
sb->s_dirt = 1;
mark_buffer_dirty(bh2);
- inode->i_uid = current->fsuid;
+ inode->i_uid = current_fsuid();
if (test_opt (sb, GRPID))
inode->i_gid = dir->i_gid;
else if (dir->i_mode & S_ISGID) {
@@ -558,7 +558,7 @@ got:
if (S_ISDIR(mode))
mode |= S_ISGID;
} else
- inode->i_gid = current->fsgid;
+ inode->i_gid = current_fsgid();
inode->i_mode = mode;
inode->i_ino = ino;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 647cd888ac87..cf26ff18a795 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -171,6 +171,12 @@ static void init_once(void *foo)
inode_init_once(&ei->vfs_inode);
}
+static void *ext2_get_inodes(struct kmem_cache *s, int nr, void **v)
+{
+ return fs_get_inodes(s, nr, v,
+ offsetof(struct ext2_inode_info, vfs_inode));
+}
+
static int init_inodecache(void)
{
ext2_inode_cachep = kmem_cache_create("ext2_inode_cache",
@@ -180,6 +186,9 @@ static int init_inodecache(void)
init_once);
if (ext2_inode_cachep == NULL)
return -ENOMEM;
+
+ kmem_cache_setup_defrag(ext2_inode_cachep,
+ ext2_get_inodes, kick_inodes);
return 0;
}
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index f5b57a2ca35a..0dbf1c048475 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1422,7 +1422,7 @@ static int ext3_has_free_blocks(struct ext3_sb_info *sbi)
free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count);
if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
- sbi->s_resuid != current->fsuid &&
+ sbi->s_resuid != current_fsuid() &&
(sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
return 0;
}
diff --git a/fs/ext3/hash.c b/fs/ext3/hash.c
index c30e149fbd2e..7d215b4d4f2e 100644
--- a/fs/ext3/hash.c
+++ b/fs/ext3/hash.c
@@ -35,23 +35,71 @@ static void TEA_transform(__u32 buf[4], __u32 const in[])
/* The old legacy hash */
-static __u32 dx_hack_hash (const char *name, int len)
+static __u32 dx_hack_hash_unsigned(const char *name, int len)
{
- __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
+ __u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
+ const unsigned char *ucp = (const unsigned char *) name;
+
+ while (len--) {
+ hash = hash1 + (hash0 ^ (((int) *ucp++) * 7152373));
+
+ if (hash & 0x80000000)
+ hash -= 0x7fffffff;
+ hash1 = hash0;
+ hash0 = hash;
+ }
+ return hash0 << 1;
+}
+
+static __u32 dx_hack_hash_signed(const char *name, int len)
+{
+ __u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
+ const signed char *scp = (const signed char *) name;
+
while (len--) {
- __u32 hash = hash1 + (hash0 ^ (*name++ * 7152373));
+ hash = hash1 + (hash0 ^ (((int) *scp++) * 7152373));
- if (hash & 0x80000000) hash -= 0x7fffffff;
+ if (hash & 0x80000000)
+ hash -= 0x7fffffff;
hash1 = hash0;
hash0 = hash;
}
- return (hash0 << 1);
+ return hash0 << 1;
}
-static void str2hashbuf(const char *msg, int len, __u32 *buf, int num)
+static void str2hashbuf_signed(const char *msg, int len, __u32 *buf, int num)
{
__u32 pad, val;
int i;
+ const signed char *scp = (const signed char *) msg;
+
+ pad = (__u32)len | ((__u32)len << 8);
+ pad |= pad << 16;
+
+ val = pad;
+ if (len > num*4)
+ len = num * 4;
+ for (i = 0; i < len; i++) {
+ if ((i % 4) == 0)
+ val = pad;
+ val = ((int) scp[i]) + (val << 8);
+ if ((i % 4) == 3) {
+ *buf++ = val;
+ val = pad;
+ num--;
+ }
+ }
+ if (--num >= 0)
+ *buf++ = val;
+ while (--num >= 0)
+ *buf++ = pad;
+}
+
+static void str2hashbuf_unsigned(const char *msg, int len, __u32 *buf, int num)
+{
+ __u32 pad, val;
+ int i;
+ const unsigned char *ucp = (const unsigned char *) msg;
pad = (__u32)len | ((__u32)len << 8);
pad |= pad << 16;
@@ -62,7 +110,7 @@ static void str2hashbuf(const char *msg, int len, __u32 *buf, int num)
for (i=0; i < len; i++) {
if ((i % 4) == 0)
val = pad;
- val = msg[i] + (val << 8);
+ val = ((int) ucp[i]) + (val << 8);
if ((i % 4) == 3) {
*buf++ = val;
val = pad;
@@ -95,6 +143,8 @@ int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
const char *p;
int i;
__u32 in[8], buf[4];
+ void (*str2hashbuf)(const char *, int, __u32 *, int) =
+ str2hashbuf_signed;
/* Initialize the default seed for the hash checksum functions */
buf[0] = 0x67452301;
@@ -113,13 +163,18 @@ int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
}
switch (hinfo->hash_version) {
+ case DX_HASH_LEGACY_UNSIGNED:
+ hash = dx_hack_hash_unsigned(name, len);
+ break;
case DX_HASH_LEGACY:
- hash = dx_hack_hash(name, len);
+ hash = dx_hack_hash_signed(name, len);
break;
+ case DX_HASH_HALF_MD4_UNSIGNED:
+ str2hashbuf = str2hashbuf_unsigned;
case DX_HASH_HALF_MD4:
p = name;
while (len > 0) {
- str2hashbuf(p, len, in, 8);
+ (*str2hashbuf)(p, len, in, 8);
half_md4_transform(buf, in);
len -= 32;
p += 32;
@@ -127,10 +182,12 @@ int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
minor_hash = buf[2];
hash = buf[1];
break;
+ case DX_HASH_TEA_UNSIGNED:
+ str2hashbuf = str2hashbuf_unsigned;
case DX_HASH_TEA:
p = name;
while (len > 0) {
- str2hashbuf(p, len, in, 4);
+ (*str2hashbuf)(p, len, in, 4);
TEA_transform(buf, in);
len -= 16;
p += 16;
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 47b678d73e7a..490bd0ed7896 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -539,7 +539,7 @@ got:
percpu_counter_inc(&sbi->s_dirs_counter);
sb->s_dirt = 1;
- inode->i_uid = current->fsuid;
+ inode->i_uid = current_fsuid();
if (test_opt (sb, GRPID))
inode->i_gid = dir->i_gid;
else if (dir->i_mode & S_ISGID) {
@@ -547,7 +547,7 @@ got:
if (S_ISDIR(mode))
mode |= S_ISGID;
} else
- inode->i_gid = current->fsgid;
+ inode->i_gid = current_fsgid();
inode->i_mode = mode;
inode->i_ino = ino;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 3e5edc92aa0b..4fb04931e6da 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -368,6 +368,8 @@ dx_probe(struct qstr *entry, struct inode *dir,
goto fail;
}
hinfo->hash_version = root->info.hash_version;
+ if (hinfo->hash_version <= DX_HASH_TEA)
+ hinfo->hash_version += EXT3_SB(dir->i_sb)->s_hash_unsigned;
hinfo->seed = EXT3_SB(dir->i_sb)->s_hash_seed;
if (entry)
ext3fs_dirhash(entry->name, entry->len, hinfo);
@@ -636,6 +638,9 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
dir = dir_file->f_path.dentry->d_inode;
if (!(EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) {
hinfo.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
+ if (hinfo.hash_version <= DX_HASH_TEA)
+ hinfo.hash_version +=
+ EXT3_SB(dir->i_sb)->s_hash_unsigned;
hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed;
count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
start_hash, start_minor_hash);
@@ -1398,6 +1403,8 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
/* Initialize as for dx_probe */
hinfo.hash_version = root->info.hash_version;
+ if (hinfo.hash_version <= DX_HASH_TEA)
+ hinfo.hash_version += EXT3_SB(dir->i_sb)->s_hash_unsigned;
hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed;
ext3fs_dirhash(name, namelen, &hinfo);
frame = frames;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index f6c94f232ec1..f4fb51811c59 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -489,6 +489,12 @@ static void init_once(void *foo)
inode_init_once(&ei->vfs_inode);
}
+static void *ext3_get_inodes(struct kmem_cache *s, int nr, void **v)
+{
+ return fs_get_inodes(s, nr, v,
+ offsetof(struct ext3_inode_info, vfs_inode));
+}
+
static int init_inodecache(void)
{
ext3_inode_cachep = kmem_cache_create("ext3_inode_cache",
@@ -498,6 +504,8 @@ static int init_inodecache(void)
init_once);
if (ext3_inode_cachep == NULL)
return -ENOMEM;
+ kmem_cache_setup_defrag(ext3_inode_cachep,
+ ext3_get_inodes, kick_inodes);
return 0;
}
@@ -713,7 +721,9 @@ static struct dquot_operations ext3_quota_operations = {
.acquire_dquot = ext3_acquire_dquot,
.release_dquot = ext3_release_dquot,
.mark_dirty = ext3_mark_dquot_dirty,
- .write_info = ext3_write_info
+ .write_info = ext3_write_info,
+ .alloc_dquot = dquot_alloc,
+ .destroy_dquot = dquot_destroy,
};
static struct quotactl_ops ext3_qctl_operations = {
@@ -1035,8 +1045,7 @@ static int parse_options (char *options, struct super_block *sb,
case Opt_grpjquota:
qtype = GRPQUOTA;
set_qf_name:
- if ((sb_any_quota_enabled(sb) ||
- sb_any_quota_suspended(sb)) &&
+ if (sb_any_quota_loaded(sb) &&
!sbi->s_qf_names[qtype]) {
printk(KERN_ERR
"EXT3-fs: Cannot change journaled "
@@ -1075,8 +1084,7 @@ set_qf_name:
case Opt_offgrpjquota:
qtype = GRPQUOTA;
clear_qf_name:
- if ((sb_any_quota_enabled(sb) ||
- sb_any_quota_suspended(sb)) &&
+ if (sb_any_quota_loaded(sb) &&
sbi->s_qf_names[qtype]) {
printk(KERN_ERR "EXT3-fs: Cannot change "
"journaled quota options when "
@@ -1095,8 +1103,7 @@ clear_qf_name:
case Opt_jqfmt_vfsv0:
qfmt = QFMT_VFS_V0;
set_qf_format:
- if ((sb_any_quota_enabled(sb) ||
- sb_any_quota_suspended(sb)) &&
+ if (sb_any_quota_loaded(sb) &&
sbi->s_jquota_fmt != qfmt) {
printk(KERN_ERR "EXT3-fs: Cannot change "
"journaled quota options when "
@@ -1115,8 +1122,7 @@ set_qf_format:
set_opt(sbi->s_mount_opt, GRPQUOTA);
break;
case Opt_noquota:
- if (sb_any_quota_enabled(sb) ||
- sb_any_quota_suspended(sb)) {
+ if (sb_any_quota_loaded(sb)) {
printk(KERN_ERR "EXT3-fs: Cannot change quota "
"options when quota turned on.\n");
return 0;
@@ -1744,6 +1750,18 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
for (i=0; i < 4; i++)
sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
sbi->s_def_hash_version = es->s_def_hash_version;
+ i = le32_to_cpu(es->s_flags);
+ if (i & EXT2_FLAGS_UNSIGNED_HASH)
+ sbi->s_hash_unsigned = 3;
+ else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
+#ifdef __CHAR_UNSIGNED__
+ es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
+ sbi->s_hash_unsigned = 3;
+#else
+ es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
+#endif
+ sb->s_dirt = 1;
+ }
if (sbi->s_blocks_per_group > blocksize * 8) {
printk (KERN_ERR
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index d2003cdc36aa..752eddc6e0e8 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -614,7 +614,7 @@ int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
if (dirty_blocks < 0) {
printk(KERN_CRIT "Dirty block accounting "
"went wrong %lld\n",
- dirty_blocks);
+ (long long)dirty_blocks);
}
}
/* Check whether we have space after
@@ -624,7 +624,7 @@ int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
return 1;
/* Hm, nope. Are (enough) root reserved blocks available? */
- if (sbi->s_resuid == current->fsuid ||
+ if (sbi->s_resuid == current_fsuid() ||
((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) ||
capable(CAP_SYS_RESOURCE)) {
if (free_blocks >= (nblocks + dirty_blocks))
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index b0537c827024..8370ffd2d62f 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -891,6 +891,9 @@ static inline __le16 ext4_rec_len_to_disk(unsigned len)
#define DX_HASH_LEGACY 0
#define DX_HASH_HALF_MD4 1
#define DX_HASH_TEA 2
+#define DX_HASH_LEGACY_UNSIGNED 3
+#define DX_HASH_HALF_MD4_UNSIGNED 4
+#define DX_HASH_TEA_UNSIGNED 5
#ifdef __KERNEL__
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h
index 445fde603df8..2f3b8b1ca592 100644
--- a/fs/ext4/ext4_sb.h
+++ b/fs/ext4/ext4_sb.h
@@ -57,6 +57,7 @@ struct ext4_sb_info {
u32 s_next_generation;
u32 s_hash_seed[4];
int s_def_hash_version;
+ int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */
struct percpu_counter s_freeblocks_counter;
struct percpu_counter s_freeinodes_counter;
struct percpu_counter s_dirs_counter;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index ea2ce3c0ae66..8bd3409dc01c 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1160,15 +1160,13 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
while (--depth >= 0) {
ix = path[depth].p_idx;
if (ix != EXT_LAST_INDEX(path[depth].p_hdr))
- break;
+ goto got_index;
}
- if (depth < 0) {
- /* we've gone up to the root and
- * found no index to the right */
- return 0;
- }
+ /* we've gone up to the root and found no index to the right */
+ return 0;
+got_index:
/* we've found index to the right, let's
* follow it and find the closest allocated
* block to the right */
@@ -1201,7 +1199,6 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
*phys = ext_pblock(ex);
put_bh(bh);
return 0;
-
}
/*
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index 556ca8eba3db..ac8f168c8ab4 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -35,23 +35,71 @@ static void TEA_transform(__u32 buf[4], __u32 const in[])
/* The old legacy hash */
-static __u32 dx_hack_hash(const char *name, int len)
+static __u32 dx_hack_hash_unsigned(const char *name, int len)
{
- __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
+ __u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
+ const unsigned char *ucp = (const unsigned char *) name;
+
+ while (len--) {
+ hash = hash1 + (hash0 ^ (((int) *ucp++) * 7152373));
+
+ if (hash & 0x80000000)
+ hash -= 0x7fffffff;
+ hash1 = hash0;
+ hash0 = hash;
+ }
+ return hash0 << 1;
+}
+
+static __u32 dx_hack_hash_signed(const char *name, int len)
+{
+ __u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
+ const signed char *scp = (const signed char *) name;
+
while (len--) {
- __u32 hash = hash1 + (hash0 ^ (*name++ * 7152373));
+ hash = hash1 + (hash0 ^ (((int) *scp++) * 7152373));
- if (hash & 0x80000000) hash -= 0x7fffffff;
+ if (hash & 0x80000000)
+ hash -= 0x7fffffff;
hash1 = hash0;
hash0 = hash;
}
- return (hash0 << 1);
+ return hash0 << 1;
+}
+
+static void str2hashbuf_signed(const char *msg, int len, __u32 *buf, int num)
+{
+ __u32 pad, val;
+ int i;
+ const signed char *scp = (const signed char *) msg;
+
+ pad = (__u32)len | ((__u32)len << 8);
+ pad |= pad << 16;
+
+ val = pad;
+ if (len > num*4)
+ len = num * 4;
+ for (i = 0; i < len; i++) {
+ if ((i % 4) == 0)
+ val = pad;
+ val = ((int) scp[i]) + (val << 8);
+ if ((i % 4) == 3) {
+ *buf++ = val;
+ val = pad;
+ num--;
+ }
+ }
+ if (--num >= 0)
+ *buf++ = val;
+ while (--num >= 0)
+ *buf++ = pad;
}
-static void str2hashbuf(const char *msg, int len, __u32 *buf, int num)
+static void str2hashbuf_unsigned(const char *msg, int len, __u32 *buf, int num)
{
__u32 pad, val;
int i;
+ const unsigned char *ucp = (const unsigned char *) msg;
pad = (__u32)len | ((__u32)len << 8);
pad |= pad << 16;
@@ -62,7 +110,7 @@ static void str2hashbuf(const char *msg, int len, __u32 *buf, int num)
for (i = 0; i < len; i++) {
if ((i % 4) == 0)
val = pad;
- val = msg[i] + (val << 8);
+ val = ((int) ucp[i]) + (val << 8);
if ((i % 4) == 3) {
*buf++ = val;
val = pad;
@@ -95,6 +143,8 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
const char *p;
int i;
__u32 in[8], buf[4];
+ void (*str2hashbuf)(const char *, int, __u32 *, int) =
+ str2hashbuf_signed;
/* Initialize the default seed for the hash checksum functions */
buf[0] = 0x67452301;
@@ -113,13 +163,18 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
}
switch (hinfo->hash_version) {
+ case DX_HASH_LEGACY_UNSIGNED:
+ hash = dx_hack_hash_unsigned(name, len);
+ break;
case DX_HASH_LEGACY:
- hash = dx_hack_hash(name, len);
+ hash = dx_hack_hash_signed(name, len);
break;
+ case DX_HASH_HALF_MD4_UNSIGNED:
+ str2hashbuf = str2hashbuf_unsigned;
case DX_HASH_HALF_MD4:
p = name;
while (len > 0) {
- str2hashbuf(p, len, in, 8);
+ (*str2hashbuf)(p, len, in, 8);
half_md4_transform(buf, in);
len -= 32;
p += 32;
@@ -127,10 +182,12 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
minor_hash = buf[2];
hash = buf[1];
break;
+ case DX_HASH_TEA_UNSIGNED:
+ str2hashbuf = str2hashbuf_unsigned;
case DX_HASH_TEA:
p = name;
while (len > 0) {
- str2hashbuf(p, len, in, 4);
+ (*str2hashbuf)(p, len, in, 4);
TEA_transform(buf, in);
len -= 16;
p += 16;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 2a117e286e54..08cac9fcace2 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -787,7 +787,7 @@ got:
spin_unlock(sb_bgl_lock(sbi, flex_group));
}
- inode->i_uid = current->fsuid;
+ inode->i_uid = current_fsuid();
if (test_opt(sb, GRPID))
inode->i_gid = dir->i_gid;
else if (dir->i_mode & S_ISGID) {
@@ -795,7 +795,7 @@ got:
if (S_ISDIR(mode))
mode |= S_ISGID;
} else
- inode->i_gid = current->fsgid;
+ inode->i_gid = current_fsgid();
inode->i_mode = mode;
inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index be21a5ae33cb..ca88060e08b2 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1644,35 +1644,39 @@ struct mpage_da_data {
*/
static int mpage_da_submit_io(struct mpage_da_data *mpd)
{
- struct address_space *mapping = mpd->inode->i_mapping;
- int ret = 0, err, nr_pages, i;
- unsigned long index, end;
- struct pagevec pvec;
long pages_skipped;
+ struct pagevec pvec;
+ unsigned long index, end;
+ int ret = 0, err, nr_pages, i;
+ struct inode *inode = mpd->inode;
+ struct address_space *mapping = inode->i_mapping;
BUG_ON(mpd->next_page <= mpd->first_page);
- pagevec_init(&pvec, 0);
+ /*
+ * We need to start from the first_page to the next_page - 1
+ * to make sure we also write the mapped dirty buffer_heads.
+ * If we look at mpd->lbh.b_blocknr we would only be looking
+ * at the currently mapped buffer_heads.
+ */
index = mpd->first_page;
end = mpd->next_page - 1;
+ pagevec_init(&pvec, 0);
while (index <= end) {
- /*
- * We can use PAGECACHE_TAG_DIRTY lookup here because
- * even though we have cleared the dirty flag on the page
- * We still keep the page in the radix tree with tag
- * PAGECACHE_TAG_DIRTY. See clear_page_dirty_for_io.
- * The PAGECACHE_TAG_DIRTY is cleared in set_page_writeback
- * which is called via the below writepage callback.
- */
- nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
- PAGECACHE_TAG_DIRTY,
- min(end - index,
- (pgoff_t)PAGEVEC_SIZE-1) + 1);
+ nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
if (nr_pages == 0)
break;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
+ index = page->index;
+ if (index > end)
+ break;
+ index++;
+
+ BUG_ON(!PageLocked(page));
+ BUG_ON(PageWriteback(page));
+
pages_skipped = mpd->wbc->pages_skipped;
err = mapping->a_ops->writepage(page, mpd->wbc);
if (!err && (pages_skipped == mpd->wbc->pages_skipped))
@@ -1830,9 +1834,9 @@ static void ext4_print_free_blocks(struct inode *inode)
ext4_count_free_blocks(inode->i_sb));
printk(KERN_EMERG "Free/Dirty block details\n");
printk(KERN_EMERG "free_blocks=%lld\n",
- percpu_counter_sum(&sbi->s_freeblocks_counter));
+ (long long)percpu_counter_sum(&sbi->s_freeblocks_counter));
printk(KERN_EMERG "dirty_blocks=%lld\n",
- percpu_counter_sum(&sbi->s_dirtyblocks_counter));
+ (long long)percpu_counter_sum(&sbi->s_dirtyblocks_counter));
printk(KERN_EMERG "Block reservation details\n");
printk(KERN_EMERG "i_reserved_data_blocks=%lu\n",
EXT4_I(inode)->i_reserved_data_blocks);
@@ -2086,11 +2090,29 @@ static int __mpage_da_writepage(struct page *page,
bh = head;
do {
BUG_ON(buffer_locked(bh));
+ /*
+ * We need to try to allocate
+ * unmapped blocks in the same page.
+ * Otherwise we won't make progress
+ * with the page in ext4_da_writepage
+ */
if (buffer_dirty(bh) &&
(!buffer_mapped(bh) || buffer_delay(bh))) {
mpage_add_bh_to_extent(mpd, logical, bh);
if (mpd->io_done)
return MPAGE_DA_EXTENT_TAIL;
+ } else if (buffer_dirty(bh) && (buffer_mapped(bh))) {
+ /*
+ * mapped dirty buffer. We need to update
+ * the b_state because we look at
+ * b_state in mpage_da_map_blocks. We don't
+ * update b_size because if we find an
+ * unmapped buffer_head later we need to
+ * use the b_state flag of that buffer_head.
+ */
+ if (mpd->lbh.b_size == 0)
+ mpd->lbh.b_state =
+ bh->b_state & BH_FLAGS;
}
logical++;
} while ((bh = bh->b_this_page) != head);
@@ -2388,6 +2410,20 @@ static int ext4_da_writepages(struct address_space *mapping,
*/
if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
return 0;
+
+ /*
+ * If the filesystem has aborted, it is read-only, so return
+ * right away instead of dumping stack traces later on that
+ * will obscure the real source of the problem. We test
+ * EXT4_MOUNT_ABORT instead of sb->s_flag's MS_RDONLY because
+ * the latter could be true if the filesystem is mounted
+ * read-only, and in that case, ext4_da_writepages should
+ * *never* be called, so if that ever happens, we would want
+ * the stack trace.
+ */
+ if (unlikely(sbi->s_mount_opt & EXT4_MOUNT_ABORT))
+ return -EROFS;
+
/*
* Make sure nr_to_write is >= sbi->s_mb_stream_request
* This make sure small files blocks are allocated in
@@ -2432,7 +2468,7 @@ static int ext4_da_writepages(struct address_space *mapping,
handle = ext4_journal_start(inode, needed_blocks);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
- printk(KERN_EMERG "%s: jbd2_start: "
+ printk(KERN_CRIT "%s: jbd2_start: "
"%ld pages, ino %lu; err %d\n", __func__,
wbc->nr_to_write, inode->i_ino, ret);
dump_stack();
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 63adcb792988..56afe7dc2f0f 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -372,6 +372,8 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
goto fail;
}
hinfo->hash_version = root->info.hash_version;
+ if (hinfo->hash_version <= DX_HASH_TEA)
+ hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
if (d_name)
ext4fs_dirhash(d_name->name, d_name->len, hinfo);
@@ -641,6 +643,9 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
dir = dir_file->f_path.dentry->d_inode;
if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) {
hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
+ if (hinfo.hash_version <= DX_HASH_TEA)
+ hinfo.hash_version +=
+ EXT4_SB(dir->i_sb)->s_hash_unsigned;
hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
start_hash, start_minor_hash);
@@ -1408,6 +1413,8 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
/* Initialize as for dx_probe */
hinfo.hash_version = root->info.hash_version;
+ if (hinfo.hash_version <= DX_HASH_TEA)
+ hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
ext4fs_dirhash(name, namelen, &hinfo);
frame = frames;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e4a241c65dbe..2570fa91864c 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -556,6 +556,12 @@ static void init_once(void *foo)
inode_init_once(&ei->vfs_inode);
}
+static void *ext4_get_inodes(struct kmem_cache *s, int nr, void **v)
+{
+ return fs_get_inodes(s, nr, v,
+ offsetof(struct ext4_inode_info, vfs_inode));
+}
+
static int init_inodecache(void)
{
ext4_inode_cachep = kmem_cache_create("ext4_inode_cache",
@@ -565,6 +571,8 @@ static int init_inodecache(void)
init_once);
if (ext4_inode_cachep == NULL)
return -ENOMEM;
+ kmem_cache_setup_defrag(ext4_inode_cachep,
+ ext4_get_inodes, kick_inodes);
return 0;
}
@@ -803,7 +811,9 @@ static struct dquot_operations ext4_quota_operations = {
.acquire_dquot = ext4_acquire_dquot,
.release_dquot = ext4_release_dquot,
.mark_dirty = ext4_mark_dquot_dirty,
- .write_info = ext4_write_info
+ .write_info = ext4_write_info,
+ .alloc_dquot = dquot_alloc,
+ .destroy_dquot = dquot_destroy,
};
static struct quotactl_ops ext4_qctl_operations = {
@@ -1142,8 +1152,7 @@ static int parse_options(char *options, struct super_block *sb,
case Opt_grpjquota:
qtype = GRPQUOTA;
set_qf_name:
- if ((sb_any_quota_enabled(sb) ||
- sb_any_quota_suspended(sb)) &&
+ if (sb_any_quota_loaded(sb) &&
!sbi->s_qf_names[qtype]) {
printk(KERN_ERR
"EXT4-fs: Cannot change journaled "
@@ -1182,8 +1191,7 @@ set_qf_name:
case Opt_offgrpjquota:
qtype = GRPQUOTA;
clear_qf_name:
- if ((sb_any_quota_enabled(sb) ||
- sb_any_quota_suspended(sb)) &&
+ if (sb_any_quota_loaded(sb) &&
sbi->s_qf_names[qtype]) {
printk(KERN_ERR "EXT4-fs: Cannot change "
"journaled quota options when "
@@ -1202,8 +1210,7 @@ clear_qf_name:
case Opt_jqfmt_vfsv0:
qfmt = QFMT_VFS_V0;
set_qf_format:
- if ((sb_any_quota_enabled(sb) ||
- sb_any_quota_suspended(sb)) &&
+ if (sb_any_quota_loaded(sb) &&
sbi->s_jquota_fmt != qfmt) {
printk(KERN_ERR "EXT4-fs: Cannot change "
"journaled quota options when "
@@ -1222,7 +1229,7 @@ set_qf_format:
set_opt(sbi->s_mount_opt, GRPQUOTA);
break;
case Opt_noquota:
- if (sb_any_quota_enabled(sb)) {
+ if (sb_any_quota_loaded(sb)) {
printk(KERN_ERR "EXT4-fs: Cannot change quota "
"options when quota turned on.\n");
return 0;
@@ -1445,7 +1452,6 @@ static int ext4_fill_flex_info(struct super_block *sb)
ext4_group_t flex_group_count;
ext4_group_t flex_group;
int groups_per_flex = 0;
- __u64 block_bitmap = 0;
int i;
if (!sbi->s_es->s_log_groups_per_flex) {
@@ -1468,9 +1474,6 @@ static int ext4_fill_flex_info(struct super_block *sb)
goto failed;
}
- gdp = ext4_get_group_desc(sb, 1, &bh);
- block_bitmap = ext4_block_bitmap(sb, gdp) - 1;
-
for (i = 0; i < sbi->s_groups_count; i++) {
gdp = ext4_get_group_desc(sb, i, &bh);
@@ -2118,6 +2121,18 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
for (i = 0; i < 4; i++)
sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
sbi->s_def_hash_version = es->s_def_hash_version;
+ i = le32_to_cpu(es->s_flags);
+ if (i & EXT2_FLAGS_UNSIGNED_HASH)
+ sbi->s_hash_unsigned = 3;
+ else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
+#ifdef __CHAR_UNSIGNED__
+ es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
+ sbi->s_hash_unsigned = 3;
+#else
+ es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
+#endif
+ sb->s_dirt = 1;
+ }
if (sbi->s_blocks_per_group > blocksize * 8) {
printk(KERN_ERR
@@ -3513,18 +3528,15 @@ static int ext4_ui_proc_open(struct inode *inode, struct file *file)
static ssize_t ext4_ui_proc_write(struct file *file, const char __user *buf,
size_t cnt, loff_t *ppos)
{
- unsigned int *p = PDE(file->f_path.dentry->d_inode)->data;
+ unsigned long *p = PDE(file->f_path.dentry->d_inode)->data;
char str[32];
- unsigned long value;
if (cnt >= sizeof(str))
return -EINVAL;
if (copy_from_user(str, buf, cnt))
return -EFAULT;
- value = simple_strtol(str, NULL, 0);
- if (value < 0)
- return -ERANGE;
- *p = value;
+
+ *p = simple_strtoul(str, NULL, 0);
return cnt;
}
diff --git a/fs/fat/file.c b/fs/fat/file.c
index f06a4e525ece..0a7f4a9918b3 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -304,7 +304,7 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode)
{
mode_t allow_utime = sbi->options.allow_utime;
- if (current->fsuid != inode->i_uid) {
+ if (current_fsuid() != inode->i_uid) {
if (in_group_p(inode->i_gid))
allow_utime >>= 3;
if (allow_utime & MAY_WRITE)
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index bdd8fb7be2ca..d937aaf77374 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -926,8 +926,8 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
opts->isvfat = is_vfat;
- opts->fs_uid = current->uid;
- opts->fs_gid = current->gid;
+ opts->fs_uid = current_uid();
+ opts->fs_gid = current_gid();
opts->fs_fmask = opts->fs_dmask = current->fs->umask;
opts->allow_utime = -1;
opts->codepage = fat_default_codepage;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index ac4f7db9f134..87c39f1f0817 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -205,13 +205,14 @@ static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
int force)
{
+ const struct cred *cred = current_cred();
int err;
err = security_file_set_fowner(filp);
if (err)
return err;
- f_modown(filp, pid, type, current->uid, current->euid, force);
+ f_modown(filp, pid, type, cred->uid, cred->euid, force);
return 0;
}
EXPORT_SYMBOL(__f_setown);
@@ -400,10 +401,17 @@ static const long band_table[NSIGPOLL] = {
static inline int sigio_perm(struct task_struct *p,
struct fown_struct *fown, int sig)
{
- return (((fown->euid == 0) ||
- (fown->euid == p->suid) || (fown->euid == p->uid) ||
- (fown->uid == p->suid) || (fown->uid == p->uid)) &&
- !security_file_send_sigiotask(p, fown, sig));
+ const struct cred *cred;
+ int ret;
+
+ rcu_read_lock();
+ cred = __task_cred(p);
+ ret = ((fown->euid == 0 ||
+ fown->euid == cred->suid || fown->euid == cred->uid ||
+ fown->uid == cred->suid || fown->uid == cred->uid) &&
+ !security_file_send_sigiotask(p, fown, sig));
+ rcu_read_unlock();
+ return ret;
}
static void send_sigio_to_task(struct task_struct *p,
diff --git a/fs/file_table.c b/fs/file_table.c
index 5ad0eca6eea2..0fbcacc3ea75 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -36,7 +36,9 @@ static struct percpu_counter nr_files __cacheline_aligned_in_smp;
static inline void file_free_rcu(struct rcu_head *head)
{
- struct file *f = container_of(head, struct file, f_u.fu_rcuhead);
+ struct file *f = container_of(head, struct file, f_u.fu_rcuhead);
+
+ put_cred(f->f_cred);
kmem_cache_free(filp_cachep, f);
}
@@ -94,7 +96,7 @@ int proc_nr_files(ctl_table *table, int write, struct file *filp,
*/
struct file *get_empty_filp(void)
{
- struct task_struct *tsk;
+ const struct cred *cred = current_cred();
static int old_max;
struct file * f;
@@ -118,12 +120,10 @@ struct file *get_empty_filp(void)
if (security_file_alloc(f))
goto fail_sec;
- tsk = current;
INIT_LIST_HEAD(&f->f_u.fu_list);
atomic_long_set(&f->f_count, 1);
rwlock_init(&f->f_owner.lock);
- f->f_uid = tsk->fsuid;
- f->f_gid = tsk->fsgid;
+ f->f_cred = get_cred(cred);
eventpoll_init_file(f);
/* f->f_version: 0 */
return f;
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index b72361479be2..fba571648a8e 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -87,8 +87,8 @@ static void __fuse_put_request(struct fuse_req *req)
static void fuse_req_init_context(struct fuse_req *req)
{
- req->in.h.uid = current->fsuid;
- req->in.h.gid = current->fsgid;
+ req->in.h.uid = current_fsuid();
+ req->in.h.gid = current_fsgid();
req->in.h.pid = current->pid;
}
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index fd03330cadeb..95bc22bdd060 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -869,18 +869,25 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat,
*/
int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task)
{
+ const struct cred *cred;
+ int ret;
+
if (fc->flags & FUSE_ALLOW_OTHER)
return 1;
- if (task->euid == fc->user_id &&
- task->suid == fc->user_id &&
- task->uid == fc->user_id &&
- task->egid == fc->group_id &&
- task->sgid == fc->group_id &&
- task->gid == fc->group_id)
- return 1;
+ rcu_read_lock();
+ ret = 0;
+ cred = __task_cred(task);
+ if (cred->euid == fc->user_id &&
+ cred->suid == fc->user_id &&
+ cred->uid == fc->user_id &&
+ cred->egid == fc->group_id &&
+ cred->sgid == fc->group_id &&
+ cred->gid == fc->group_id)
+ ret = 1;
+ rcu_read_unlock();
- return 0;
+ return ret;
}
static int fuse_access(struct inode *inode, int mask)
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index ec65851ec80a..c1b4ec6a9650 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -1,5 +1,5 @@
obj-$(CONFIG_GFS2_FS) += gfs2.o
-gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \
+gfs2-y := acl.o bmap.o dir.o eaops.o eattr.o glock.o \
glops.o inode.o log.o lops.o locking.o main.o meta_io.o \
mount.o ops_address.o ops_dentry.o ops_export.o ops_file.o \
ops_fstype.o ops_inode.o ops_super.o quota.o \
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 3e9bd46f27e3..e335dceb6a4f 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -91,7 +91,7 @@ static int acl_get(struct gfs2_inode *ip, int access, struct posix_acl **acl,
struct gfs2_ea_location el_this;
int error;
- if (!ip->i_di.di_eattr)
+ if (!ip->i_eattr)
return 0;
memset(&er, 0, sizeof(struct gfs2_ea_request));
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index bec76b1c2bb0..789f28cfdc20 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -75,9 +75,9 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
void *kaddr = kmap(page);
memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode),
- ip->i_di.di_size);
- memset(kaddr + ip->i_di.di_size, 0,
- PAGE_CACHE_SIZE - ip->i_di.di_size);
+ ip->i_disksize);
+ memset(kaddr + ip->i_disksize, 0,
+ PAGE_CACHE_SIZE - ip->i_disksize);
kunmap(page);
SetPageUptodate(page);
@@ -132,7 +132,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
if (error)
goto out;
- if (ip->i_di.di_size) {
+ if (ip->i_disksize) {
/* Get a free block, fill it with the stuffed data,
and write it out to disk */
@@ -159,7 +159,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
di = (struct gfs2_dinode *)dibh->b_data;
gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
- if (ip->i_di.di_size) {
+ if (ip->i_disksize) {
*(__be64 *)(di + 1) = cpu_to_be64(block);
gfs2_add_inode_blocks(&ip->i_inode, 1);
di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
@@ -926,7 +926,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size)
}
}
- ip->i_di.di_size = size;
+ ip->i_disksize = size;
ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
@@ -1033,7 +1033,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
goto out;
if (gfs2_is_stuffed(ip)) {
- ip->i_di.di_size = size;
+ ip->i_disksize = size;
ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
@@ -1045,9 +1045,9 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
error = gfs2_block_truncate_page(ip->i_inode.i_mapping);
if (!error) {
- ip->i_di.di_size = size;
+ ip->i_disksize = size;
ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
- ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG;
+ ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
}
@@ -1114,13 +1114,13 @@ static int trunc_end(struct gfs2_inode *ip)
if (error)
goto out;
- if (!ip->i_di.di_size) {
+ if (!ip->i_disksize) {
ip->i_height = 0;
ip->i_goal = ip->i_no_addr;
gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
}
ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
- ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG;
+ ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
@@ -1205,9 +1205,9 @@ int gfs2_truncatei(struct gfs2_inode *ip, u64 size)
if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), S_ISREG(ip->i_inode.i_mode)))
return -EINVAL;
- if (size > ip->i_di.di_size)
+ if (size > ip->i_disksize)
error = do_grow(ip, size);
- else if (size < ip->i_di.di_size)
+ else if (size < ip->i_disksize)
error = do_shrink(ip, size);
else
/* update time stamps */
@@ -1219,7 +1219,7 @@ int gfs2_truncatei(struct gfs2_inode *ip, u64 size)
int gfs2_truncatei_resume(struct gfs2_inode *ip)
{
int error;
- error = trunc_dealloc(ip, ip->i_di.di_size);
+ error = trunc_dealloc(ip, ip->i_disksize);
if (!error)
error = trunc_end(ip);
return error;
@@ -1298,7 +1298,7 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
lblock_stop = offset + len + bsize - 1;
do_div(lblock_stop, bsize);
} else {
- u64 end_of_file = (ip->i_di.di_size + sdp->sd_sb.sb_bsize - 1) >> shift;
+ u64 end_of_file = (ip->i_disksize + sdp->sd_sb.sb_bsize - 1) >> shift;
lblock = offset >> shift;
lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
if (lblock_stop > end_of_file)
diff --git a/fs/gfs2/daemon.c b/fs/gfs2/daemon.c
deleted file mode 100644
index e51991947d2c..000000000000
--- a/fs/gfs2/daemon.c
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/completion.h>
-#include <linux/buffer_head.h>
-#include <linux/kthread.h>
-#include <linux/delay.h>
-#include <linux/gfs2_ondisk.h>
-#include <linux/lm_interface.h>
-#include <linux/freezer.h>
-
-#include "gfs2.h"
-#include "incore.h"
-#include "daemon.h"
-#include "glock.h"
-#include "log.h"
-#include "quota.h"
-#include "recovery.h"
-#include "super.h"
-#include "util.h"
-
-/* This uses schedule_timeout() instead of msleep() because it's good for
- the daemons to wake up more often than the timeout when unmounting so
- the user's unmount doesn't sit there forever.
-
- The kthread functions used to start these daemons block and flush signals. */
-
-/**
- * gfs2_glockd - Reclaim unused glock structures
- * @sdp: Pointer to GFS2 superblock
- *
- * One or more of these daemons run, reclaiming glocks on sd_reclaim_list.
- * Number of daemons can be set by user, with num_glockd mount option.
- */
-
-int gfs2_glockd(void *data)
-{
- struct gfs2_sbd *sdp = data;
-
- while (!kthread_should_stop()) {
- while (atomic_read(&sdp->sd_reclaim_count))
- gfs2_reclaim_glock(sdp);
-
- wait_event_interruptible(sdp->sd_reclaim_wq,
- (atomic_read(&sdp->sd_reclaim_count) ||
- kthread_should_stop()));
- if (freezing(current))
- refrigerator();
- }
-
- return 0;
-}
-
-/**
- * gfs2_recoverd - Recover dead machine's journals
- * @sdp: Pointer to GFS2 superblock
- *
- */
-
-int gfs2_recoverd(void *data)
-{
- struct gfs2_sbd *sdp = data;
- unsigned long t;
-
- while (!kthread_should_stop()) {
- gfs2_check_journals(sdp);
- t = gfs2_tune_get(sdp, gt_recoverd_secs) * HZ;
- if (freezing(current))
- refrigerator();
- schedule_timeout_interruptible(t);
- }
-
- return 0;
-}
-
-/**
- * gfs2_quotad - Write cached quota changes into the quota file
- * @sdp: Pointer to GFS2 superblock
- *
- */
-
-int gfs2_quotad(void *data)
-{
- struct gfs2_sbd *sdp = data;
- unsigned long t;
- int error;
-
- while (!kthread_should_stop()) {
- /* Update the master statfs file */
-
- t = sdp->sd_statfs_sync_time +
- gfs2_tune_get(sdp, gt_statfs_quantum) * HZ;
-
- if (time_after_eq(jiffies, t)) {
- error = gfs2_statfs_sync(sdp);
- if (error &&
- error != -EROFS &&
- !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
- fs_err(sdp, "quotad: (1) error=%d\n", error);
- sdp->sd_statfs_sync_time = jiffies;
- }
-
- /* Update quota file */
-
- t = sdp->sd_quota_sync_time +
- gfs2_tune_get(sdp, gt_quota_quantum) * HZ;
-
- if (time_after_eq(jiffies, t)) {
- error = gfs2_quota_sync(sdp);
- if (error &&
- error != -EROFS &&
- !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
- fs_err(sdp, "quotad: (2) error=%d\n", error);
- sdp->sd_quota_sync_time = jiffies;
- }
-
- gfs2_quota_scan(sdp);
-
- t = gfs2_tune_get(sdp, gt_quotad_secs) * HZ;
- if (freezing(current))
- refrigerator();
- schedule_timeout_interruptible(t);
- }
-
- return 0;
-}
-
diff --git a/fs/gfs2/daemon.h b/fs/gfs2/daemon.h
deleted file mode 100644
index 4be084fb6a62..000000000000
--- a/fs/gfs2/daemon.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#ifndef __DAEMON_DOT_H__
-#define __DAEMON_DOT_H__
-
-int gfs2_glockd(void *data);
-int gfs2_recoverd(void *data);
-int gfs2_quotad(void *data);
-
-#endif /* __DAEMON_DOT_H__ */
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index eed040d8ba3a..b7c8e5c70791 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -36,7 +36,7 @@
* the block. In leaves, they begin at offset sizeof(struct gfs2_leaf) from the
* beginning of the leaf block. The dirents reside in leaves when
*
- * dip->i_di.di_flags & GFS2_DIF_EXHASH is true
+ * dip->i_diskflags & GFS2_DIF_EXHASH is true
*
* Otherwise, the dirents are "linear", within a single stuffed dinode block.
*
@@ -128,8 +128,8 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size);
- if (ip->i_di.di_size < offset + size)
- ip->i_di.di_size = offset + size;
+ if (ip->i_disksize < offset + size)
+ ip->i_disksize = offset + size;
ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
gfs2_dinode_out(ip, dibh->b_data);
@@ -226,8 +226,8 @@ out:
if (error)
return error;
- if (ip->i_di.di_size < offset + copied)
- ip->i_di.di_size = offset + copied;
+ if (ip->i_disksize < offset + copied)
+ ip->i_disksize = offset + copied;
ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
@@ -277,11 +277,11 @@ static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset,
int copied = 0;
int error = 0;
- if (offset >= ip->i_di.di_size)
+ if (offset >= ip->i_disksize)
return 0;
- if (offset + size > ip->i_di.di_size)
- size = ip->i_di.di_size - offset;
+ if (offset + size > ip->i_disksize)
+ size = ip->i_disksize - offset;
if (!size)
return 0;
@@ -755,12 +755,12 @@ static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode,
struct gfs2_inode *ip = GFS2_I(inode);
int error;
- if (ip->i_di.di_flags & GFS2_DIF_EXHASH) {
+ if (ip->i_diskflags & GFS2_DIF_EXHASH) {
struct gfs2_leaf *leaf;
unsigned hsize = 1 << ip->i_depth;
unsigned index;
u64 ln;
- if (hsize * sizeof(u64) != ip->i_di.di_size) {
+ if (hsize * sizeof(u64) != ip->i_disksize) {
gfs2_consist_inode(ip);
return ERR_PTR(-EIO);
}
@@ -858,8 +858,8 @@ static int dir_make_exhash(struct inode *inode)
return -ENOSPC;
bn = bh->b_blocknr;
- gfs2_assert(sdp, dip->i_di.di_entries < (1 << 16));
- leaf->lf_entries = cpu_to_be16(dip->i_di.di_entries);
+ gfs2_assert(sdp, dip->i_entries < (1 << 16));
+ leaf->lf_entries = cpu_to_be16(dip->i_entries);
/* Copy dirents */
@@ -905,9 +905,9 @@ static int dir_make_exhash(struct inode *inode)
for (x = sdp->sd_hash_ptrs; x--; lp++)
*lp = cpu_to_be64(bn);
- dip->i_di.di_size = sdp->sd_sb.sb_bsize / 2;
+ dip->i_disksize = sdp->sd_sb.sb_bsize / 2;
gfs2_add_inode_blocks(&dip->i_inode, 1);
- dip->i_di.di_flags |= GFS2_DIF_EXHASH;
+ dip->i_diskflags |= GFS2_DIF_EXHASH;
for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ;
dip->i_depth = y;
@@ -1082,7 +1082,7 @@ static int dir_double_exhash(struct gfs2_inode *dip)
int error = 0;
hsize = 1 << dip->i_depth;
- if (hsize * sizeof(u64) != dip->i_di.di_size) {
+ if (hsize * sizeof(u64) != dip->i_disksize) {
gfs2_consist_inode(dip);
return -EIO;
}
@@ -1091,7 +1091,7 @@ static int dir_double_exhash(struct gfs2_inode *dip)
buf = kcalloc(3, sdp->sd_hash_bsize, GFP_NOFS | __GFP_NOFAIL);
- for (block = dip->i_di.di_size >> sdp->sd_hash_bsize_shift; block--;) {
+ for (block = dip->i_disksize >> sdp->sd_hash_bsize_shift; block--;) {
error = gfs2_dir_read_data(dip, (char *)buf,
block * sdp->sd_hash_bsize,
sdp->sd_hash_bsize, 1);
@@ -1370,7 +1370,7 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
unsigned depth = 0;
hsize = 1 << dip->i_depth;
- if (hsize * sizeof(u64) != dip->i_di.di_size) {
+ if (hsize * sizeof(u64) != dip->i_disksize) {
gfs2_consist_inode(dip);
return -EIO;
}
@@ -1426,10 +1426,10 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
int copied = 0;
int error;
- if (!dip->i_di.di_entries)
+ if (!dip->i_entries)
return 0;
- if (dip->i_di.di_flags & GFS2_DIF_EXHASH)
+ if (dip->i_diskflags & GFS2_DIF_EXHASH)
return dir_e_read(inode, offset, opaque, filldir);
if (!gfs2_is_stuffed(dip)) {
@@ -1453,17 +1453,17 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
error = PTR_ERR(dent);
goto out;
}
- if (dip->i_di.di_entries != g.offset) {
+ if (dip->i_entries != g.offset) {
fs_warn(sdp, "Number of entries corrupt in dir %llu, "
- "ip->i_di.di_entries (%u) != g.offset (%u)\n",
+ "ip->i_entries (%u) != g.offset (%u)\n",
(unsigned long long)dip->i_no_addr,
- dip->i_di.di_entries,
+ dip->i_entries,
g.offset);
error = -EIO;
goto out;
}
error = do_filldir_main(dip, offset, opaque, filldir, darr,
- dip->i_di.di_entries, &copied);
+ dip->i_entries, &copied);
out:
kfree(darr);
}
@@ -1612,7 +1612,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
dent = gfs2_init_dirent(inode, dent, name, bh);
gfs2_inum_out(nip, dent);
dent->de_type = cpu_to_be16(type);
- if (ip->i_di.di_flags & GFS2_DIF_EXHASH) {
+ if (ip->i_diskflags & GFS2_DIF_EXHASH) {
leaf = (struct gfs2_leaf *)bh->b_data;
be16_add_cpu(&leaf->lf_entries, 1);
}
@@ -1621,14 +1621,14 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
if (error)
break;
gfs2_trans_add_bh(ip->i_gl, bh, 1);
- ip->i_di.di_entries++;
+ ip->i_entries++;
ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
gfs2_dinode_out(ip, bh->b_data);
brelse(bh);
error = 0;
break;
}
- if (!(ip->i_di.di_flags & GFS2_DIF_EXHASH)) {
+ if (!(ip->i_diskflags & GFS2_DIF_EXHASH)) {
error = dir_make_exhash(inode);
if (error)
break;
@@ -1691,7 +1691,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
}
dirent_del(dip, bh, prev, dent);
- if (dip->i_di.di_flags & GFS2_DIF_EXHASH) {
+ if (dip->i_diskflags & GFS2_DIF_EXHASH) {
struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data;
u16 entries = be16_to_cpu(leaf->lf_entries);
if (!entries)
@@ -1704,10 +1704,10 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
if (error)
return error;
- if (!dip->i_di.di_entries)
+ if (!dip->i_entries)
gfs2_consist_inode(dip);
gfs2_trans_add_bh(dip->i_gl, bh, 1);
- dip->i_di.di_entries--;
+ dip->i_entries--;
dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
gfs2_dinode_out(dip, bh->b_data);
brelse(bh);
@@ -1748,7 +1748,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
gfs2_inum_out(nip, dent);
dent->de_type = cpu_to_be16(new_type);
- if (dip->i_di.di_flags & GFS2_DIF_EXHASH) {
+ if (dip->i_diskflags & GFS2_DIF_EXHASH) {
brelse(bh);
error = gfs2_meta_inode_buffer(dip, &bh);
if (error)
@@ -1784,7 +1784,7 @@ static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data)
int error = 0;
hsize = 1 << dip->i_depth;
- if (hsize * sizeof(u64) != dip->i_di.di_size) {
+ if (hsize * sizeof(u64) != dip->i_disksize) {
gfs2_consist_inode(dip);
return -EIO;
}
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
index 8a468cac9328..4f919440c3be 100644
--- a/fs/gfs2/dir.h
+++ b/fs/gfs2/dir.h
@@ -11,6 +11,7 @@
#define __DIR_DOT_H__
#include <linux/dcache.h>
+#include <linux/crc32.h>
struct inode;
struct gfs2_inode;
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
index e3f76f451b0a..0d1c76d906ae 100644
--- a/fs/gfs2/eattr.c
+++ b/fs/gfs2/eattr.c
@@ -114,11 +114,11 @@ static int ea_foreach(struct gfs2_inode *ip, ea_call_t ea_call, void *data)
__be64 *eablk, *end;
int error;
- error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr, DIO_WAIT, &bh);
+ error = gfs2_meta_read(ip->i_gl, ip->i_eattr, DIO_WAIT, &bh);
if (error)
return error;
- if (!(ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT)) {
+ if (!(ip->i_diskflags & GFS2_DIF_EA_INDIRECT)) {
error = ea_foreach_i(ip, bh, ea_call, data);
goto out;
}
@@ -414,7 +414,7 @@ int gfs2_ea_list(struct gfs2_inode *ip, struct gfs2_ea_request *er)
if (error)
return error;
- if (ip->i_di.di_eattr) {
+ if (ip->i_eattr) {
struct ea_list ei = { .ei_er = er, .ei_size = 0 };
error = ea_foreach(ip, ea_list_i, &ei);
@@ -514,7 +514,7 @@ int gfs2_ea_get_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
struct gfs2_ea_location el;
int error;
- if (!ip->i_di.di_eattr)
+ if (!ip->i_eattr)
return -ENODATA;
error = gfs2_ea_find(ip, er, &el);
@@ -741,7 +741,7 @@ static int ea_init_i(struct gfs2_inode *ip, struct gfs2_ea_request *er,
if (error)
return error;
- ip->i_di.di_eattr = bh->b_blocknr;
+ ip->i_eattr = bh->b_blocknr;
error = ea_write(ip, GFS2_EA_BH2FIRST(bh), er);
brelse(bh);
@@ -935,10 +935,10 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
int error;
int mh_size = sizeof(struct gfs2_meta_header);
- if (ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT) {
+ if (ip->i_diskflags & GFS2_DIF_EA_INDIRECT) {
__be64 *end;
- error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr, DIO_WAIT,
+ error = gfs2_meta_read(ip->i_gl, ip->i_eattr, DIO_WAIT,
&indbh);
if (error)
return error;
@@ -972,9 +972,9 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
gfs2_buffer_clear_tail(indbh, mh_size);
eablk = (__be64 *)(indbh->b_data + mh_size);
- *eablk = cpu_to_be64(ip->i_di.di_eattr);
- ip->i_di.di_eattr = blk;
- ip->i_di.di_flags |= GFS2_DIF_EA_INDIRECT;
+ *eablk = cpu_to_be64(ip->i_eattr);
+ ip->i_eattr = blk;
+ ip->i_diskflags |= GFS2_DIF_EA_INDIRECT;
gfs2_add_inode_blocks(&ip->i_inode, 1);
eablk++;
@@ -1015,7 +1015,7 @@ static int ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er,
if (error)
return error;
- if (!(ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT))
+ if (!(ip->i_diskflags & GFS2_DIF_EA_INDIRECT))
blks++;
if (GFS2_EAREQ_SIZE_STUFFED(er) > GFS2_SB(&ip->i_inode)->sd_jbsize)
blks += DIV_ROUND_UP(er->er_data_len, GFS2_SB(&ip->i_inode)->sd_jbsize);
@@ -1040,7 +1040,7 @@ int gfs2_ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
struct gfs2_ea_location el;
int error;
- if (!ip->i_di.di_eattr) {
+ if (!ip->i_eattr) {
if (er->er_flags & XATTR_REPLACE)
return -ENODATA;
return ea_init(ip, er);
@@ -1051,7 +1051,7 @@ int gfs2_ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
return error;
if (el.el_ea) {
- if (ip->i_di.di_flags & GFS2_DIF_APPENDONLY) {
+ if (ip->i_diskflags & GFS2_DIF_APPENDONLY) {
brelse(el.el_bh);
return -EPERM;
}
@@ -1145,7 +1145,7 @@ int gfs2_ea_remove_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
struct gfs2_ea_location el;
int error;
- if (!ip->i_di.di_eattr)
+ if (!ip->i_eattr)
return -ENODATA;
error = gfs2_ea_find(ip, er, &el);
@@ -1309,7 +1309,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
- error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr, DIO_WAIT, &indbh);
+ error = gfs2_meta_read(ip->i_gl, ip->i_eattr, DIO_WAIT, &indbh);
if (error)
return error;
@@ -1388,7 +1388,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
if (bstart)
gfs2_free_meta(ip, bstart, blen);
- ip->i_di.di_flags &= ~GFS2_DIF_EA_INDIRECT;
+ ip->i_diskflags &= ~GFS2_DIF_EA_INDIRECT;
error = gfs2_meta_inode_buffer(ip, &dibh);
if (!error) {
@@ -1416,7 +1416,7 @@ static int ea_dealloc_block(struct gfs2_inode *ip)
struct buffer_head *dibh;
int error;
- rgd = gfs2_blk2rgrpd(sdp, ip->i_di.di_eattr);
+ rgd = gfs2_blk2rgrpd(sdp, ip->i_eattr);
if (!rgd) {
gfs2_consist_inode(ip);
return -EIO;
@@ -1432,9 +1432,9 @@ static int ea_dealloc_block(struct gfs2_inode *ip)
if (error)
goto out_gunlock;
- gfs2_free_meta(ip, ip->i_di.di_eattr, 1);
+ gfs2_free_meta(ip, ip->i_eattr, 1);
- ip->i_di.di_eattr = 0;
+ ip->i_eattr = 0;
gfs2_add_inode_blocks(&ip->i_inode, -1);
error = gfs2_meta_inode_buffer(ip, &dibh);
@@ -1479,7 +1479,7 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip)
if (error)
goto out_rindex;
- if (ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT) {
+ if (ip->i_diskflags & GFS2_DIF_EA_INDIRECT) {
error = ea_dealloc_indirect(ip);
if (error)
goto out_rindex;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index c962283d4e7f..5eae62e7f778 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -40,6 +40,7 @@
#include "quota.h"
#include "super.h"
#include "util.h"
+#include "bmap.h"
struct gfs2_gl_hash_bucket {
struct hlist_head hb_list;
@@ -61,9 +62,10 @@ static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int
static DECLARE_RWSEM(gfs2_umount_flush_sem);
static struct dentry *gfs2_root;
-static struct task_struct *scand_process;
-static unsigned int scand_secs = 5;
static struct workqueue_struct *glock_workqueue;
+static LIST_HEAD(lru_list);
+static atomic_t lru_count = ATOMIC_INIT(0);
+static spinlock_t lru_lock = SPIN_LOCK_UNLOCKED;
#define GFS2_GL_HASH_SHIFT 15
#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT)
@@ -174,6 +176,22 @@ static void gfs2_glock_hold(struct gfs2_glock *gl)
}
/**
+ * gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list
+ * @gl: the glock
+ *
+ */
+
+static void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
+{
+ spin_lock(&lru_lock);
+ if (list_empty(&gl->gl_lru) && gl->gl_state != LM_ST_UNLOCKED) {
+ list_add_tail(&gl->gl_lru, &lru_list);
+ atomic_inc(&lru_count);
+ }
+ spin_unlock(&lru_lock);
+}
+
+/**
* gfs2_glock_put() - Decrement reference count on glock
* @gl: The glock to put
*
@@ -187,14 +205,23 @@ int gfs2_glock_put(struct gfs2_glock *gl)
if (atomic_dec_and_test(&gl->gl_ref)) {
hlist_del(&gl->gl_list);
write_unlock(gl_lock_addr(gl->gl_hash));
+ spin_lock(&lru_lock);
+ if (!list_empty(&gl->gl_lru)) {
+ list_del_init(&gl->gl_lru);
+ atomic_dec(&lru_count);
+ }
+ spin_unlock(&lru_lock);
GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_UNLOCKED);
- GLOCK_BUG_ON(gl, !list_empty(&gl->gl_reclaim));
+ GLOCK_BUG_ON(gl, !list_empty(&gl->gl_lru));
GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
glock_free(gl);
rv = 1;
goto out;
}
write_unlock(gl_lock_addr(gl->gl_hash));
+ /* 1 for being hashed, 1 for having state != LM_ST_UNLOCKED */
+ if (atomic_read(&gl->gl_ref) == 2)
+ gfs2_glock_schedule_for_reclaim(gl);
out:
return rv;
}
@@ -289,10 +316,13 @@ static void gfs2_holder_wake(struct gfs2_holder *gh)
* do_promote - promote as many requests as possible on the current queue
* @gl: The glock
*
- * Returns: true if there is a blocked holder at the head of the list
+ * Returns: 1 if there is a blocked holder at the head of the list, or 2
+ * if a type specific operation is underway.
*/
static int do_promote(struct gfs2_glock *gl)
+__releases(&gl->gl_spin)
+__acquires(&gl->gl_spin)
{
const struct gfs2_glock_operations *glops = gl->gl_ops;
struct gfs2_holder *gh, *tmp;
@@ -310,6 +340,8 @@ restart:
ret = glops->go_lock(gh);
spin_lock(&gl->gl_spin);
if (ret) {
+ if (ret == 1)
+ return 2;
gh->gh_error = ret;
list_del_init(&gh->gh_list);
gfs2_holder_wake(gh);
@@ -414,6 +446,7 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret)
const struct gfs2_glock_operations *glops = gl->gl_ops;
struct gfs2_holder *gh;
unsigned state = ret & LM_OUT_ST_MASK;
+ int rv;
spin_lock(&gl->gl_spin);
state_change(gl, state);
@@ -468,7 +501,6 @@ retry:
gfs2_demote_wake(gl);
if (state != LM_ST_UNLOCKED) {
if (glops->go_xmote_bh) {
- int rv;
spin_unlock(&gl->gl_spin);
rv = glops->go_xmote_bh(gl, gh);
if (rv == -EAGAIN)
@@ -479,10 +511,13 @@ retry:
goto out;
}
}
- do_promote(gl);
+ rv = do_promote(gl);
+ if (rv == 2)
+ goto out_locked;
}
out:
clear_bit(GLF_LOCK, &gl->gl_flags);
+out_locked:
spin_unlock(&gl->gl_spin);
gfs2_glock_put(gl);
}
@@ -511,6 +546,8 @@ static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock,
*/
static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target)
+__releases(&gl->gl_spin)
+__acquires(&gl->gl_spin)
{
const struct gfs2_glock_operations *glops = gl->gl_ops;
struct gfs2_sbd *sdp = gl->gl_sbd;
@@ -576,8 +613,11 @@ static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl)
*/
static void run_queue(struct gfs2_glock *gl, const int nonblock)
+__releases(&gl->gl_spin)
+__acquires(&gl->gl_spin)
{
struct gfs2_holder *gh = NULL;
+ int ret;
if (test_and_set_bit(GLF_LOCK, &gl->gl_flags))
return;
@@ -596,8 +636,11 @@ static void run_queue(struct gfs2_glock *gl, const int nonblock)
} else {
if (test_bit(GLF_DEMOTE, &gl->gl_flags))
gfs2_demote_wake(gl);
- if (do_promote(gl) == 0)
+ ret = do_promote(gl);
+ if (ret == 0)
goto out;
+ if (ret == 2)
+ return;
gh = find_first_waiter(gl);
gl->gl_target = gh->gh_state;
if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
@@ -820,7 +863,7 @@ static void wait_on_demote(struct gfs2_glock *gl)
*/
static void handle_callback(struct gfs2_glock *gl, unsigned int state,
- int remote, unsigned long delay)
+ unsigned long delay)
{
int bit = delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE;
@@ -828,9 +871,6 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state,
if (gl->gl_demote_state == LM_ST_EXCLUSIVE) {
gl->gl_demote_state = state;
gl->gl_demote_time = jiffies;
- if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN &&
- gl->gl_object)
- gfs2_glock_schedule_for_reclaim(gl);
} else if (gl->gl_demote_state != LM_ST_UNLOCKED &&
gl->gl_demote_state != state) {
gl->gl_demote_state = LM_ST_UNLOCKED;
@@ -877,6 +917,8 @@ void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...)
*/
static inline void add_to_queue(struct gfs2_holder *gh)
+__releases(&gl->gl_spin)
+__acquires(&gl->gl_spin)
{
struct gfs2_glock *gl = gh->gh_gl;
struct gfs2_sbd *sdp = gl->gl_sbd;
@@ -998,7 +1040,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
spin_lock(&gl->gl_spin);
if (gh->gh_flags & GL_NOCACHE)
- handle_callback(gl, LM_ST_UNLOCKED, 0, 0);
+ handle_callback(gl, LM_ST_UNLOCKED, 0);
list_del_init(&gh->gh_list);
if (find_first_holder(gl) == NULL) {
@@ -1269,12 +1311,26 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
delay = gl->gl_ops->go_min_hold_time;
spin_lock(&gl->gl_spin);
- handle_callback(gl, state, 1, delay);
+ handle_callback(gl, state, delay);
spin_unlock(&gl->gl_spin);
if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
gfs2_glock_put(gl);
}
+static void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid)
+{
+ struct gfs2_jdesc *jd;
+
+ spin_lock(&sdp->sd_jindex_spin);
+ list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
+ if (jd->jd_jid != jid)
+ continue;
+ jd->jd_dirty = 1;
+ break;
+ }
+ spin_unlock(&sdp->sd_jindex_spin);
+}
+
/**
* gfs2_glock_cb - Callback used by locking module
* @sdp: Pointer to the superblock
@@ -1338,80 +1394,83 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data)
* Returns: 1 if it's ok
*/
-static int demote_ok(struct gfs2_glock *gl)
+static int demote_ok(const struct gfs2_glock *gl)
{
const struct gfs2_glock_operations *glops = gl->gl_ops;
- int demote = 1;
-
- if (test_bit(GLF_STICKY, &gl->gl_flags))
- demote = 0;
- else if (glops->go_demote_ok)
- demote = glops->go_demote_ok(gl);
-
- return demote;
-}
-
-/**
- * gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list
- * @gl: the glock
- *
- */
-
-void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
-{
- struct gfs2_sbd *sdp = gl->gl_sbd;
- spin_lock(&sdp->sd_reclaim_lock);
- if (list_empty(&gl->gl_reclaim)) {
- gfs2_glock_hold(gl);
- list_add(&gl->gl_reclaim, &sdp->sd_reclaim_list);
- atomic_inc(&sdp->sd_reclaim_count);
- spin_unlock(&sdp->sd_reclaim_lock);
- wake_up(&sdp->sd_reclaim_wq);
- } else
- spin_unlock(&sdp->sd_reclaim_lock);
+ if (gl->gl_state == LM_ST_UNLOCKED)
+ return 0;
+ if (!list_empty(&gl->gl_holders))
+ return 0;
+ if (glops->go_demote_ok)
+ return glops->go_demote_ok(gl);
+ return 1;
}
-/**
- * gfs2_reclaim_glock - process the next glock on the filesystem's reclaim list
- * @sdp: the filesystem
- *
- * Called from gfs2_glockd() glock reclaim daemon, or when promoting a
- * different glock and we notice that there are a lot of glocks in the
- * reclaim list.
- *
- */
-void gfs2_reclaim_glock(struct gfs2_sbd *sdp)
+static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask)
{
struct gfs2_glock *gl;
- int done_callback = 0;
+ int may_demote;
+ int nr_skipped = 0;
+ int got_ref = 0;
+ LIST_HEAD(skipped);
- spin_lock(&sdp->sd_reclaim_lock);
- if (list_empty(&sdp->sd_reclaim_list)) {
- spin_unlock(&sdp->sd_reclaim_lock);
- return;
- }
- gl = list_entry(sdp->sd_reclaim_list.next,
- struct gfs2_glock, gl_reclaim);
- list_del_init(&gl->gl_reclaim);
- spin_unlock(&sdp->sd_reclaim_lock);
+ if (nr == 0)
+ goto out;
- atomic_dec(&sdp->sd_reclaim_count);
- atomic_inc(&sdp->sd_reclaimed);
+ if (!(gfp_mask & __GFP_FS))
+ return -1;
- spin_lock(&gl->gl_spin);
- if (find_first_holder(gl) == NULL &&
- gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) {
- handle_callback(gl, LM_ST_UNLOCKED, 0, 0);
- done_callback = 1;
+ spin_lock(&lru_lock);
+ while(nr && !list_empty(&lru_list)) {
+ gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru);
+ list_del_init(&gl->gl_lru);
+ atomic_dec(&lru_count);
+
+ /* Test for being demotable */
+ if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
+ gfs2_glock_hold(gl);
+ got_ref = 1;
+ spin_unlock(&lru_lock);
+ spin_lock(&gl->gl_spin);
+ may_demote = demote_ok(gl);
+ spin_unlock(&gl->gl_spin);
+ clear_bit(GLF_LOCK, &gl->gl_flags);
+ if (may_demote) {
+ handle_callback(gl, LM_ST_UNLOCKED, 0);
+ nr--;
+ if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
+ gfs2_glock_put(gl);
+ }
+ spin_lock(&lru_lock);
+ if (may_demote)
+ continue;
+ }
+ if (list_empty(&gl->gl_lru) &&
+ (atomic_read(&gl->gl_ref) <= (2 + got_ref))) {
+ nr_skipped++;
+ list_add(&gl->gl_lru, &skipped);
+ }
+ if (got_ref) {
+ spin_unlock(&lru_lock);
+ gfs2_glock_put(gl);
+ spin_lock(&lru_lock);
+ got_ref = 0;
+ }
}
- spin_unlock(&gl->gl_spin);
- if (!done_callback ||
- queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
- gfs2_glock_put(gl);
+ list_splice(&skipped, &lru_list);
+ atomic_add(nr_skipped, &lru_count);
+ spin_unlock(&lru_lock);
+out:
+ return (atomic_read(&lru_count) / 100) * sysctl_vfs_cache_pressure;
}
+static struct shrinker glock_shrinker = {
+ .shrink = gfs2_shrink_glock_memory,
+ .seeks = DEFAULT_SEEKS,
+};
+
/**
* examine_bucket - Call a function for glock in a hash bucket
* @examiner: the function
@@ -1457,26 +1516,6 @@ out:
}
/**
- * scan_glock - look at a glock and see if we can reclaim it
- * @gl: the glock to look at
- *
- */
-
-static void scan_glock(struct gfs2_glock *gl)
-{
- if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object)
- return;
- if (test_bit(GLF_LOCK, &gl->gl_flags))
- return;
-
- spin_lock(&gl->gl_spin);
- if (find_first_holder(gl) == NULL &&
- gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl))
- gfs2_glock_schedule_for_reclaim(gl);
- spin_unlock(&gl->gl_spin);
-}
-
-/**
* clear_glock - look at a glock and see if we can free it from glock cache
* @gl: the glock to look at
*
@@ -1484,23 +1523,16 @@ static void scan_glock(struct gfs2_glock *gl)
static void clear_glock(struct gfs2_glock *gl)
{
- struct gfs2_sbd *sdp = gl->gl_sbd;
- int released;
-
- spin_lock(&sdp->sd_reclaim_lock);
- if (!list_empty(&gl->gl_reclaim)) {
- list_del_init(&gl->gl_reclaim);
- atomic_dec(&sdp->sd_reclaim_count);
- spin_unlock(&sdp->sd_reclaim_lock);
- released = gfs2_glock_put(gl);
- gfs2_assert(sdp, !released);
- } else {
- spin_unlock(&sdp->sd_reclaim_lock);
+ spin_lock(&lru_lock);
+ if (!list_empty(&gl->gl_lru)) {
+ list_del_init(&gl->gl_lru);
+ atomic_dec(&lru_count);
}
+ spin_unlock(&lru_lock);
spin_lock(&gl->gl_spin);
if (find_first_holder(gl) == NULL && gl->gl_state != LM_ST_UNLOCKED)
- handle_callback(gl, LM_ST_UNLOCKED, 0, 0);
+ handle_callback(gl, LM_ST_UNLOCKED, 0);
spin_unlock(&gl->gl_spin);
gfs2_glock_hold(gl);
if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
@@ -1515,8 +1547,9 @@ static void clear_glock(struct gfs2_glock *gl)
* Called when unmounting the filesystem.
*/
-void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
+void gfs2_gl_hash_clear(struct super_block *sb)
{
+ struct gfs2_sbd *sdp = sb->s_fs_info;
unsigned long t;
unsigned int x;
int cont;
@@ -1548,6 +1581,20 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
}
}
+void gfs2_glock_finish_truncate(struct gfs2_inode *ip)
+{
+ struct gfs2_glock *gl = ip->i_gl;
+ int ret;
+
+ ret = gfs2_truncatei_resume(ip);
+ gfs2_assert_withdraw(gl->gl_sbd, ret == 0);
+
+ spin_lock(&gl->gl_spin);
+ clear_bit(GLF_LOCK, &gl->gl_flags);
+ run_queue(gl, 1);
+ spin_unlock(&gl->gl_spin);
+}
+
static const char *state2str(unsigned state)
{
switch(state) {
@@ -1623,8 +1670,6 @@ static const char *gflags2str(char *buf, const unsigned long *gflags)
char *p = buf;
if (test_bit(GLF_LOCK, gflags))
*p++ = 'l';
- if (test_bit(GLF_STICKY, gflags))
- *p++ = 's';
if (test_bit(GLF_DEMOTE, gflags))
*p++ = 'D';
if (test_bit(GLF_PENDING_DEMOTE, gflags))
@@ -1743,34 +1788,6 @@ static int gfs2_dump_lockstate(struct gfs2_sbd *sdp)
return error;
}
-/**
- * gfs2_scand - Look for cached glocks and inodes to toss from memory
- * @sdp: Pointer to GFS2 superblock
- *
- * One of these daemons runs, finding candidates to add to sd_reclaim_list.
- * See gfs2_glockd()
- */
-
-static int gfs2_scand(void *data)
-{
- unsigned x;
- unsigned delay;
-
- while (!kthread_should_stop()) {
- for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
- examine_bucket(scan_glock, NULL, x);
- if (freezing(current))
- refrigerator();
- delay = scand_secs;
- if (delay < 1)
- delay = 1;
- schedule_timeout_interruptible(delay * HZ);
- }
-
- return 0;
-}
-
-
int __init gfs2_glock_init(void)
{
@@ -1784,28 +1801,21 @@ int __init gfs2_glock_init(void)
}
#endif
- scand_process = kthread_run(gfs2_scand, NULL, "gfs2_scand");
- if (IS_ERR(scand_process))
- return PTR_ERR(scand_process);
-
glock_workqueue = create_workqueue("glock_workqueue");
- if (IS_ERR(glock_workqueue)) {
- kthread_stop(scand_process);
+ if (IS_ERR(glock_workqueue))
return PTR_ERR(glock_workqueue);
- }
+
+ register_shrinker(&glock_shrinker);
return 0;
}
void gfs2_glock_exit(void)
{
+ unregister_shrinker(&glock_shrinker);
destroy_workqueue(glock_workqueue);
- kthread_stop(scand_process);
}
-module_param(scand_secs, uint, S_IRUGO|S_IWUSR);
-MODULE_PARM_DESC(scand_secs, "The number of seconds between scand runs");
-
static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
{
struct gfs2_glock *gl;
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 695c6b193611..ce54f338cff9 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -129,9 +129,9 @@ int gfs2_lvb_hold(struct gfs2_glock *gl);
void gfs2_lvb_unhold(struct gfs2_glock *gl);
void gfs2_glock_cb(void *cb_data, unsigned int type, void *data);
-void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl);
void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
-void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
+void gfs2_gl_hash_clear(struct super_block *sb);
+void gfs2_glock_finish_truncate(struct gfs2_inode *ip);
int __init gfs2_glock_init(void);
void gfs2_glock_exit(void);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index c6c318c2a0f6..8522d3aa64fc 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -201,19 +201,12 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags)
* Returns: 1 if it's ok
*/
-static int inode_go_demote_ok(struct gfs2_glock *gl)
+static int inode_go_demote_ok(const struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
- int demote = 0;
-
- if (!gl->gl_object && !gl->gl_aspace->i_mapping->nrpages)
- demote = 1;
- else if (!sdp->sd_args.ar_localcaching &&
- time_after_eq(jiffies, gl->gl_stamp +
- gfs2_tune_get(sdp, gt_demote_secs) * HZ))
- demote = 1;
-
- return demote;
+ if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object)
+ return 0;
+ return 1;
}
/**
@@ -227,6 +220,7 @@ static int inode_go_demote_ok(struct gfs2_glock *gl)
static int inode_go_lock(struct gfs2_holder *gh)
{
struct gfs2_glock *gl = gh->gh_gl;
+ struct gfs2_sbd *sdp = gl->gl_sbd;
struct gfs2_inode *ip = gl->gl_object;
int error = 0;
@@ -239,10 +233,16 @@ static int inode_go_lock(struct gfs2_holder *gh)
return error;
}
- if ((ip->i_di.di_flags & GFS2_DIF_TRUNC_IN_PROG) &&
+ if ((ip->i_diskflags & GFS2_DIF_TRUNC_IN_PROG) &&
(gl->gl_state == LM_ST_EXCLUSIVE) &&
- (gh->gh_state == LM_ST_EXCLUSIVE))
- error = gfs2_truncatei_resume(ip);
+ (gh->gh_state == LM_ST_EXCLUSIVE)) {
+ spin_lock(&sdp->sd_trunc_lock);
+ if (list_empty(&ip->i_trunc_list))
+ list_add(&sdp->sd_trunc_list, &ip->i_trunc_list);
+ spin_unlock(&sdp->sd_trunc_lock);
+ wake_up(&sdp->sd_quota_wait);
+ return 1;
+ }
return error;
}
@@ -260,10 +260,13 @@ static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl)
const struct gfs2_inode *ip = gl->gl_object;
if (ip == NULL)
return 0;
- gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%08lx\n",
+ gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu/%llu\n",
(unsigned long long)ip->i_no_formal_ino,
(unsigned long long)ip->i_no_addr,
- IF2DT(ip->i_inode.i_mode), ip->i_flags);
+ IF2DT(ip->i_inode.i_mode), ip->i_flags,
+ (unsigned int)ip->i_diskflags,
+ (unsigned long long)ip->i_inode.i_size,
+ (unsigned long long)ip->i_disksize);
return 0;
}
@@ -274,7 +277,7 @@ static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl)
* Returns: 1 if it's ok
*/
-static int rgrp_go_demote_ok(struct gfs2_glock *gl)
+static int rgrp_go_demote_ok(const struct gfs2_glock *gl)
{
return !gl->gl_aspace->i_mapping->nrpages;
}
@@ -318,7 +321,9 @@ static int rgrp_go_dump(struct seq_file *seq, const struct gfs2_glock *gl)
const struct gfs2_rgrpd *rgd = gl->gl_object;
if (rgd == NULL)
return 0;
- gfs2_print_dbg(seq, " R: n:%llu\n", (unsigned long long)rgd->rd_addr);
+ gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u\n",
+ (unsigned long long)rgd->rd_addr, rgd->rd_flags,
+ rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes);
return 0;
}
@@ -374,13 +379,25 @@ static int trans_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh)
}
/**
+ * trans_go_demote_ok
+ * @gl: the glock
+ *
+ * Always returns 0
+ */
+
+static int trans_go_demote_ok(const struct gfs2_glock *gl)
+{
+ return 0;
+}
+
+/**
* quota_go_demote_ok - Check to see if it's ok to unlock a quota glock
* @gl: the glock
*
* Returns: 1 if it's ok
*/
-static int quota_go_demote_ok(struct gfs2_glock *gl)
+static int quota_go_demote_ok(const struct gfs2_glock *gl)
{
return !atomic_read(&gl->gl_lvb_count);
}
@@ -414,6 +431,7 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = {
const struct gfs2_glock_operations gfs2_trans_glops = {
.go_xmote_th = trans_go_sync,
.go_xmote_bh = trans_go_xmote_bh,
+ .go_demote_ok = trans_go_demote_ok,
.go_type = LM_TYPE_NONDISK,
};
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index f566ec1b4e8e..608849d00021 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -68,12 +68,6 @@ struct gfs2_bitmap {
u32 bi_len;
};
-struct gfs2_rgrp_host {
- u32 rg_free;
- u32 rg_dinodes;
- u64 rg_igeneration;
-};
-
struct gfs2_rgrpd {
struct list_head rd_list; /* Link with superblock */
struct list_head rd_list_mru;
@@ -83,14 +77,16 @@ struct gfs2_rgrpd {
u32 rd_length; /* length of rgrp header in fs blocks */
u32 rd_data; /* num of data blocks in rgrp */
u32 rd_bitbytes; /* number of bytes in data bitmaps */
- struct gfs2_rgrp_host rd_rg;
+ u32 rd_free;
+ u32 rd_free_clone;
+ u32 rd_dinodes;
+ u64 rd_igeneration;
struct gfs2_bitmap *rd_bits;
- unsigned int rd_bh_count;
struct mutex rd_mutex;
- u32 rd_free_clone;
struct gfs2_log_element rd_le;
- u32 rd_last_alloc;
struct gfs2_sbd *rd_sbd;
+ unsigned int rd_bh_count;
+ u32 rd_last_alloc;
unsigned char rd_flags;
#define GFS2_RDF_CHECK 0x01 /* Need to check for unlinked inodes */
#define GFS2_RDF_NOALLOC 0x02 /* rg prohibits allocation */
@@ -129,7 +125,7 @@ struct gfs2_glock_operations {
void (*go_xmote_th) (struct gfs2_glock *gl);
int (*go_xmote_bh) (struct gfs2_glock *gl, struct gfs2_holder *gh);
void (*go_inval) (struct gfs2_glock *gl, int flags);
- int (*go_demote_ok) (struct gfs2_glock *gl);
+ int (*go_demote_ok) (const struct gfs2_glock *gl);
int (*go_lock) (struct gfs2_holder *gh);
void (*go_unlock) (struct gfs2_holder *gh);
int (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl);
@@ -159,7 +155,6 @@ struct gfs2_holder {
enum {
GLF_LOCK = 1,
- GLF_STICKY = 2,
GLF_DEMOTE = 3,
GLF_PENDING_DEMOTE = 4,
GLF_DEMOTE_IN_PROGRESS = 5,
@@ -194,7 +189,7 @@ struct gfs2_glock {
unsigned long gl_tchange;
void *gl_object;
- struct list_head gl_reclaim;
+ struct list_head gl_lru;
struct gfs2_sbd *gl_sbd;
@@ -233,29 +228,24 @@ enum {
GIF_USER = 4, /* user inode, not metadata addr space */
};
-struct gfs2_dinode_host {
- u64 di_size; /* number of bytes in file */
- u64 di_generation; /* generation number for NFS */
- u32 di_flags; /* GFS2_DIF_... */
- /* These only apply to directories */
- u32 di_entries; /* The number of entries in the directory */
- u64 di_eattr; /* extended attribute block number */
-};
struct gfs2_inode {
struct inode i_inode;
u64 i_no_addr;
u64 i_no_formal_ino;
+ u64 i_generation;
+ u64 i_eattr;
+ loff_t i_disksize;
unsigned long i_flags; /* GIF_... */
-
- struct gfs2_dinode_host i_di; /* To be replaced by ref to block */
-
struct gfs2_glock *i_gl; /* Move into i_gh? */
struct gfs2_holder i_iopen_gh;
struct gfs2_holder i_gh; /* for prepare/commit_write only */
struct gfs2_alloc *i_alloc;
u64 i_goal; /* goal block for allocations */
struct rw_semaphore i_rw_mutex;
+ struct list_head i_trunc_list;
+ u32 i_entries;
+ u32 i_diskflags;
u8 i_height;
u8 i_depth;
};
@@ -406,13 +396,11 @@ struct gfs2_args {
struct gfs2_tune {
spinlock_t gt_spin;
- unsigned int gt_demote_secs; /* Cache retention for unheld glock */
unsigned int gt_incore_log_blocks;
unsigned int gt_log_flush_secs;
unsigned int gt_recoverd_secs;
unsigned int gt_logd_secs;
- unsigned int gt_quotad_secs;
unsigned int gt_quota_simul_sync; /* Max quotavals to sync at once */
unsigned int gt_quota_warn_period; /* Secs between quota warn msgs */
@@ -488,10 +476,6 @@ struct gfs2_sbd {
/* Lock Stuff */
struct lm_lockstruct sd_lockstruct;
- struct list_head sd_reclaim_list;
- spinlock_t sd_reclaim_lock;
- wait_queue_head_t sd_reclaim_wq;
- atomic_t sd_reclaim_count;
struct gfs2_holder sd_live_gh;
struct gfs2_glock *sd_rename_gl;
struct gfs2_glock *sd_trans_gl;
@@ -519,7 +503,6 @@ struct gfs2_sbd {
spinlock_t sd_statfs_spin;
struct gfs2_statfs_change_host sd_statfs_master;
struct gfs2_statfs_change_host sd_statfs_local;
- unsigned long sd_statfs_sync_time;
/* Resource group stuff */
@@ -552,8 +535,6 @@ struct gfs2_sbd {
struct task_struct *sd_recoverd_process;
struct task_struct *sd_logd_process;
struct task_struct *sd_quotad_process;
- struct task_struct *sd_glockd_process[GFS2_GLOCKD_MAX];
- unsigned int sd_glockd_num;
/* Quota stuff */
@@ -561,13 +542,15 @@ struct gfs2_sbd {
atomic_t sd_quota_count;
spinlock_t sd_quota_spin;
struct mutex sd_quota_mutex;
+ wait_queue_head_t sd_quota_wait;
+ struct list_head sd_trunc_list;
+ spinlock_t sd_trunc_lock;
unsigned int sd_quota_slots;
unsigned int sd_quota_chunks;
unsigned char **sd_quota_bitmap;
u64 sd_quota_sync_gen;
- unsigned long sd_quota_sync_time;
/* Log stuff */
@@ -624,10 +607,6 @@ struct gfs2_sbd {
struct mutex sd_freeze_lock;
unsigned int sd_freeze_count;
- /* Counters */
-
- atomic_t sd_reclaimed;
-
char sd_fsname[GFS2_FSNAME_LEN];
char sd_table_name[GFS2_FSNAME_LEN];
char sd_proto_name[GFS2_FSNAME_LEN];
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 7cee695fa441..3b87c188da41 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -32,7 +32,6 @@
#include "log.h"
#include "meta_io.h"
#include "ops_address.h"
-#include "ops_inode.h"
#include "quota.h"
#include "rgrp.h"
#include "trans.h"
@@ -248,7 +247,6 @@ fail:
static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
{
- struct gfs2_dinode_host *di = &ip->i_di;
const struct gfs2_dinode *str = buf;
struct timespec atime;
u16 height, depth;
@@ -274,8 +272,8 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
* to do that.
*/
ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink);
- di->di_size = be64_to_cpu(str->di_size);
- i_size_write(&ip->i_inode, di->di_size);
+ ip->i_disksize = be64_to_cpu(str->di_size);
+ i_size_write(&ip->i_inode, ip->i_disksize);
gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
atime.tv_sec = be64_to_cpu(str->di_atime);
atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
@@ -287,9 +285,9 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
ip->i_goal = be64_to_cpu(str->di_goal_meta);
- di->di_generation = be64_to_cpu(str->di_generation);
+ ip->i_generation = be64_to_cpu(str->di_generation);
- di->di_flags = be32_to_cpu(str->di_flags);
+ ip->i_diskflags = be32_to_cpu(str->di_flags);
gfs2_set_inode_flags(&ip->i_inode);
height = be16_to_cpu(str->di_height);
if (unlikely(height > GFS2_MAX_META_HEIGHT))
@@ -300,9 +298,9 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
if (unlikely(depth > GFS2_DIR_MAX_DEPTH))
goto corrupt;
ip->i_depth = (u8)depth;
- di->di_entries = be32_to_cpu(str->di_entries);
+ ip->i_entries = be32_to_cpu(str->di_entries);
- di->di_eattr = be64_to_cpu(str->di_eattr);
+ ip->i_eattr = be64_to_cpu(str->di_eattr);
if (S_ISREG(ip->i_inode.i_mode))
gfs2_set_aops(&ip->i_inode);
@@ -388,7 +386,6 @@ int gfs2_dinode_dealloc(struct gfs2_inode *ip)
gfs2_free_di(rgd, ip);
gfs2_trans_end(sdp);
- clear_bit(GLF_STICKY, &ip->i_gl->gl_flags);
out_rg_gunlock:
gfs2_glock_dq_uninit(&al->al_rgd_gh);
@@ -690,7 +687,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
return error;
}
- if (dip->i_di.di_entries == (u32)-1)
+ if (dip->i_entries == (u32)-1)
return -EFBIG;
if (S_ISDIR(mode) && dip->i_inode.i_nlink == (u32)-1)
return -EMLINK;
@@ -705,18 +702,18 @@ static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode,
(dip->i_inode.i_mode & S_ISUID) && dip->i_inode.i_uid) {
if (S_ISDIR(*mode))
*mode |= S_ISUID;
- else if (dip->i_inode.i_uid != current->fsuid)
+ else if (dip->i_inode.i_uid != current_fsuid())
*mode &= ~07111;
*uid = dip->i_inode.i_uid;
} else
- *uid = current->fsuid;
+ *uid = current_fsuid();
if (dip->i_inode.i_mode & S_ISGID) {
if (S_ISDIR(*mode))
*mode |= S_ISGID;
*gid = dip->i_inode.i_gid;
} else
- *gid = current->fsgid;
+ *gid = current_fsgid();
}
static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation)
@@ -790,11 +787,11 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
di->di_flags = 0;
if (S_ISREG(mode)) {
- if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_JDATA) ||
+ if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) ||
gfs2_tune_get(sdp, gt_new_files_jdata))
di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
} else if (S_ISDIR(mode)) {
- di->di_flags |= cpu_to_be32(dip->i_di.di_flags &
+ di->di_flags |= cpu_to_be32(dip->i_diskflags &
GFS2_DIF_INHERIT_JDATA);
}
@@ -1068,7 +1065,7 @@ int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
struct qstr dotname;
int error;
- if (ip->i_di.di_entries != 2) {
+ if (ip->i_entries != 2) {
if (gfs2_consist_inode(ip))
gfs2_dinode_print(ip);
return -EIO;
@@ -1124,8 +1121,8 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
return -EPERM;
if ((dip->i_inode.i_mode & S_ISVTX) &&
- dip->i_inode.i_uid != current->fsuid &&
- ip->i_inode.i_uid != current->fsuid && !capable(CAP_FOWNER))
+ dip->i_inode.i_uid != current_fsuid() &&
+ ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER))
return -EPERM;
if (IS_APPEND(&dip->i_inode))
@@ -1168,7 +1165,7 @@ int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len)
return error;
}
- if (!ip->i_di.di_size) {
+ if (!ip->i_disksize) {
gfs2_consist_inode(ip);
error = -EIO;
goto out;
@@ -1178,7 +1175,7 @@ int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len)
if (error)
goto out;
- x = ip->i_di.di_size + 1;
+ x = ip->i_disksize + 1;
if (x > *len) {
*buf = kmalloc(x, GFP_NOFS);
if (!*buf) {
@@ -1242,7 +1239,6 @@ int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
{
- const struct gfs2_dinode_host *di = &ip->i_di;
struct gfs2_dinode *str = buf;
str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
@@ -1256,7 +1252,7 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
str->di_uid = cpu_to_be32(ip->i_inode.i_uid);
str->di_gid = cpu_to_be32(ip->i_inode.i_gid);
str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
- str->di_size = cpu_to_be64(di->di_size);
+ str->di_size = cpu_to_be64(ip->i_disksize);
str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
@@ -1264,17 +1260,17 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
str->di_goal_meta = cpu_to_be64(ip->i_goal);
str->di_goal_data = cpu_to_be64(ip->i_goal);
- str->di_generation = cpu_to_be64(di->di_generation);
+ str->di_generation = cpu_to_be64(ip->i_generation);
- str->di_flags = cpu_to_be32(di->di_flags);
+ str->di_flags = cpu_to_be32(ip->i_diskflags);
str->di_height = cpu_to_be16(ip->i_height);
str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
- !(ip->i_di.di_flags & GFS2_DIF_EXHASH) ?
+ !(ip->i_diskflags & GFS2_DIF_EXHASH) ?
GFS2_FORMAT_DE : 0);
str->di_depth = cpu_to_be16(ip->i_depth);
- str->di_entries = cpu_to_be32(di->di_entries);
+ str->di_entries = cpu_to_be32(ip->i_entries);
- str->di_eattr = cpu_to_be64(di->di_eattr);
+ str->di_eattr = cpu_to_be64(ip->i_eattr);
str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec);
str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec);
@@ -1282,22 +1278,21 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
void gfs2_dinode_print(const struct gfs2_inode *ip)
{
- const struct gfs2_dinode_host *di = &ip->i_di;
-
printk(KERN_INFO " no_formal_ino = %llu\n",
(unsigned long long)ip->i_no_formal_ino);
printk(KERN_INFO " no_addr = %llu\n",
(unsigned long long)ip->i_no_addr);
- printk(KERN_INFO " di_size = %llu\n", (unsigned long long)di->di_size);
+ printk(KERN_INFO " i_disksize = %llu\n",
+ (unsigned long long)ip->i_disksize);
printk(KERN_INFO " blocks = %llu\n",
(unsigned long long)gfs2_get_inode_blocks(&ip->i_inode));
printk(KERN_INFO " i_goal = %llu\n",
(unsigned long long)ip->i_goal);
- printk(KERN_INFO " di_flags = 0x%.8X\n", di->di_flags);
+ printk(KERN_INFO " i_diskflags = 0x%.8X\n", ip->i_diskflags);
printk(KERN_INFO " i_height = %u\n", ip->i_height);
printk(KERN_INFO " i_depth = %u\n", ip->i_depth);
- printk(KERN_INFO " di_entries = %u\n", di->di_entries);
- printk(KERN_INFO " di_eattr = %llu\n",
- (unsigned long long)di->di_eattr);
+ printk(KERN_INFO " i_entries = %u\n", ip->i_entries);
+ printk(KERN_INFO " i_eattr = %llu\n",
+ (unsigned long long)ip->i_eattr);
}
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 2d43f69610a0..d5329364cdff 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -10,6 +10,7 @@
#ifndef __INODE_DOT_H__
#define __INODE_DOT_H__
+#include <linux/fs.h>
#include "util.h"
static inline int gfs2_is_stuffed(const struct gfs2_inode *ip)
@@ -19,7 +20,7 @@ static inline int gfs2_is_stuffed(const struct gfs2_inode *ip)
static inline int gfs2_is_jdata(const struct gfs2_inode *ip)
{
- return ip->i_di.di_flags & GFS2_DIF_JDATA;
+ return ip->i_diskflags & GFS2_DIF_JDATA;
}
static inline int gfs2_is_writeback(const struct gfs2_inode *ip)
@@ -97,5 +98,15 @@ struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
void gfs2_dinode_print(const struct gfs2_inode *ip);
+extern const struct inode_operations gfs2_file_iops;
+extern const struct inode_operations gfs2_dir_iops;
+extern const struct inode_operations gfs2_symlink_iops;
+extern const struct file_operations gfs2_file_fops;
+extern const struct file_operations gfs2_dir_fops;
+extern const struct file_operations gfs2_file_fops_nolock;
+extern const struct file_operations gfs2_dir_fops_nolock;
+
+extern void gfs2_set_inode_flags(struct inode *inode);
+
#endif /* __INODE_DOT_H__ */
diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c
index 0c4cbe6c8285..1aa7eb6a0226 100644
--- a/fs/gfs2/locking/dlm/mount.c
+++ b/fs/gfs2/locking/dlm/mount.c
@@ -194,17 +194,25 @@ out:
static void gdlm_recovery_done(void *lockspace, unsigned int jid,
unsigned int message)
{
+ char env_jid[20];
+ char env_status[20];
+ char *envp[] = { env_jid, env_status, NULL };
struct gdlm_ls *ls = lockspace;
ls->recover_jid_done = jid;
ls->recover_jid_status = message;
- kobject_uevent(&ls->kobj, KOBJ_CHANGE);
+ sprintf(env_jid, "JID=%d", jid);
+ sprintf(env_status, "RECOVERY=%s",
+ message == LM_RD_SUCCESS ? "Done" : "Failed");
+ kobject_uevent_env(&ls->kobj, KOBJ_CHANGE, envp);
}
static void gdlm_others_may_mount(void *lockspace)
{
+ char *message = "FIRSTMOUNT=Done";
+ char *envp[] = { message, NULL };
struct gdlm_ls *ls = lockspace;
ls->first_done = 1;
- kobject_uevent(&ls->kobj, KOBJ_CHANGE);
+ kobject_uevent_env(&ls->kobj, KOBJ_CHANGE, envp);
}
/* Userspace gets the offline uevent, blocks new gfs locks on
diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c
index 4ec571c3d8a9..9b7edcf7bd49 100644
--- a/fs/gfs2/locking/dlm/sysfs.c
+++ b/fs/gfs2/locking/dlm/sysfs.c
@@ -195,9 +195,23 @@ void gdlm_kobject_release(struct gdlm_ls *ls)
kobject_put(&ls->kobj);
}
+static int gdlm_uevent(struct kset *kset, struct kobject *kobj,
+ struct kobj_uevent_env *env)
+{
+ struct gdlm_ls *ls = container_of(kobj, struct gdlm_ls, kobj);
+ add_uevent_var(env, "LOCKTABLE=%s:%s", ls->clustername, ls->fsname);
+ add_uevent_var(env, "LOCKPROTO=lock_dlm");
+ return 0;
+}
+
+static struct kset_uevent_ops gdlm_uevent_ops = {
+ .uevent = gdlm_uevent,
+};
+
+
int gdlm_sysfs_init(void)
{
- gdlm_kset = kset_create_and_add("lock_dlm", NULL, kernel_kobj);
+ gdlm_kset = kset_create_and_add("lock_dlm", &gdlm_uevent_ops, kernel_kobj);
if (!gdlm_kset) {
printk(KERN_WARNING "%s: can not create kset\n", __func__);
return -ENOMEM;
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index bb2cc303ac29..7cacfde32194 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -19,7 +19,7 @@
#include "gfs2.h"
#include "incore.h"
-#include "ops_fstype.h"
+#include "super.h"
#include "sys.h"
#include "util.h"
#include "glock.h"
@@ -30,6 +30,7 @@ static void gfs2_init_inode_once(void *foo)
inode_init_once(&ip->i_inode);
init_rwsem(&ip->i_rw_mutex);
+ INIT_LIST_HEAD(&ip->i_trunc_list);
ip->i_alloc = NULL;
}
@@ -42,7 +43,7 @@ static void gfs2_init_glock_once(void *foo)
INIT_LIST_HEAD(&gl->gl_holders);
gl->gl_lvb = NULL;
atomic_set(&gl->gl_lvb_count, 0);
- INIT_LIST_HEAD(&gl->gl_reclaim);
+ INIT_LIST_HEAD(&gl->gl_lru);
INIT_LIST_HEAD(&gl->gl_ail_list);
atomic_set(&gl->gl_ail_count, 0);
}
@@ -93,6 +94,12 @@ static int __init init_gfs2_fs(void)
if (!gfs2_rgrpd_cachep)
goto fail;
+ gfs2_quotad_cachep = kmem_cache_create("gfs2_quotad",
+ sizeof(struct gfs2_quota_data),
+ 0, 0, NULL);
+ if (!gfs2_quotad_cachep)
+ goto fail;
+
error = register_filesystem(&gfs2_fs_type);
if (error)
goto fail;
@@ -112,6 +119,9 @@ fail_unregister:
fail:
gfs2_glock_exit();
+ if (gfs2_quotad_cachep)
+ kmem_cache_destroy(gfs2_quotad_cachep);
+
if (gfs2_rgrpd_cachep)
kmem_cache_destroy(gfs2_rgrpd_cachep);
@@ -140,6 +150,7 @@ static void __exit exit_gfs2_fs(void)
unregister_filesystem(&gfs2_fs_type);
unregister_filesystem(&gfs2meta_fs_type);
+ kmem_cache_destroy(gfs2_quotad_cachep);
kmem_cache_destroy(gfs2_rgrpd_cachep);
kmem_cache_destroy(gfs2_bufdata_cachep);
kmem_cache_destroy(gfs2_inode_cachep);
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c
index f96eb90a2cfa..3cb0a44ba023 100644
--- a/fs/gfs2/mount.c
+++ b/fs/gfs2/mount.c
@@ -32,7 +32,6 @@ enum {
Opt_debug,
Opt_nodebug,
Opt_upgrade,
- Opt_num_glockd,
Opt_acl,
Opt_noacl,
Opt_quota_off,
@@ -57,7 +56,6 @@ static const match_table_t tokens = {
{Opt_debug, "debug"},
{Opt_nodebug, "nodebug"},
{Opt_upgrade, "upgrade"},
- {Opt_num_glockd, "num_glockd=%d"},
{Opt_acl, "acl"},
{Opt_noacl, "noacl"},
{Opt_quota_off, "quota=off"},
@@ -87,16 +85,7 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
int error = 0;
if (!remount) {
- /* If someone preloaded options, use those instead */
- spin_lock(&gfs2_sys_margs_lock);
- if (gfs2_sys_margs) {
- data = gfs2_sys_margs;
- gfs2_sys_margs = NULL;
- }
- spin_unlock(&gfs2_sys_margs_lock);
-
/* Set some defaults */
- args->ar_num_glockd = GFS2_GLOCKD_DEFAULT;
args->ar_quota = GFS2_QUOTA_DEFAULT;
args->ar_data = GFS2_DATA_DEFAULT;
}
@@ -105,7 +94,7 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
process them */
for (options = data; (o = strsep(&options, ",")); ) {
- int token, option;
+ int token;
substring_t tmp[MAX_OPT_ARGS];
if (!*o)
@@ -196,22 +185,6 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
goto cant_remount;
args->ar_upgrade = 1;
break;
- case Opt_num_glockd:
- if ((error = match_int(&tmp[0], &option))) {
- fs_info(sdp, "problem getting num_glockd\n");
- goto out_error;
- }
-
- if (remount && option != args->ar_num_glockd)
- goto cant_remount;
- if (!option || option > GFS2_GLOCKD_MAX) {
- fs_info(sdp, "0 < num_glockd <= %u (not %u)\n",
- GFS2_GLOCKD_MAX, option);
- error = -EINVAL;
- goto out_error;
- }
- args->ar_num_glockd = option;
- break;
case Opt_acl:
args->ar_posix_acl = 1;
sdp->sd_vfs->s_flags |= MS_POSIXACL;
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 27563816e1c5..82a1db674257 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -210,25 +210,23 @@ static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc
{
struct inode *inode = page->mapping->host;
struct gfs2_sbd *sdp = GFS2_SB(inode);
- int error;
+ int ret;
int done_trans = 0;
- error = gfs2_writepage_common(page, wbc);
- if (error <= 0)
- return error;
-
if (PageChecked(page)) {
if (wbc->sync_mode != WB_SYNC_ALL)
goto out_ignore;
- error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
- if (error)
+ ret = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
+ if (ret)
goto out_ignore;
done_trans = 1;
}
- error = __gfs2_jdata_writepage(page, wbc);
+ ret = gfs2_writepage_common(page, wbc);
+ if (ret > 0)
+ ret = __gfs2_jdata_writepage(page, wbc);
if (done_trans)
gfs2_trans_end(sdp);
- return error;
+ return ret;
out_ignore:
redirty_page_for_writepage(wbc, page);
@@ -453,8 +451,8 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
kaddr = kmap_atomic(page, KM_USER0);
memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode),
- ip->i_di.di_size);
- memset(kaddr + ip->i_di.di_size, 0, PAGE_CACHE_SIZE - ip->i_di.di_size);
+ ip->i_disksize);
+ memset(kaddr + ip->i_disksize, 0, PAGE_CACHE_SIZE - ip->i_disksize);
kunmap_atomic(kaddr, KM_USER0);
flush_dcache_page(page);
brelse(dibh);
@@ -782,7 +780,7 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
if (inode->i_size < to) {
i_size_write(inode, to);
- ip->i_di.di_size = inode->i_size;
+ ip->i_disksize = inode->i_size;
di->di_size = cpu_to_be64(inode->i_size);
mark_inode_dirty(inode);
}
@@ -847,9 +845,9 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
- if (likely(ret >= 0) && (inode->i_size > ip->i_di.di_size)) {
+ if (likely(ret >= 0) && (inode->i_size > ip->i_disksize)) {
di = (struct gfs2_dinode *)dibh->b_data;
- ip->i_di.di_size = inode->i_size;
+ ip->i_disksize = inode->i_size;
di->di_size = cpu_to_be64(inode->i_size);
mark_inode_dirty(inode);
}
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c
index 4a5e676b4420..c2ad36330ca3 100644
--- a/fs/gfs2/ops_dentry.c
+++ b/fs/gfs2/ops_dentry.c
@@ -19,7 +19,7 @@
#include "incore.h"
#include "dir.h"
#include "glock.h"
-#include "ops_dentry.h"
+#include "super.h"
#include "util.h"
#include "inode.h"
diff --git a/fs/gfs2/ops_dentry.h b/fs/gfs2/ops_dentry.h
deleted file mode 100644
index 5caa3db4d3f5..000000000000
--- a/fs/gfs2/ops_dentry.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#ifndef __OPS_DENTRY_DOT_H__
-#define __OPS_DENTRY_DOT_H__
-
-#include <linux/dcache.h>
-
-extern struct dentry_operations gfs2_dops;
-
-#endif /* __OPS_DENTRY_DOT_H__ */
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
index bbb8c36403a9..7fdeb14ddd1a 100644
--- a/fs/gfs2/ops_export.c
+++ b/fs/gfs2/ops_export.c
@@ -22,8 +22,7 @@
#include "glock.h"
#include "glops.h"
#include "inode.h"
-#include "ops_dentry.h"
-#include "ops_fstype.h"
+#include "super.h"
#include "rgrp.h"
#include "util.h"
@@ -214,7 +213,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
}
error = -EIO;
- if (GFS2_I(inode)->i_di.di_flags & GFS2_DIF_SYSTEM) {
+ if (GFS2_I(inode)->i_diskflags & GFS2_DIF_SYSTEM) {
iput(inode);
goto fail;
}
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 3a747f8e2188..a6b7a733fd4d 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -39,7 +39,6 @@
#include "util.h"
#include "eaops.h"
#include "ops_address.h"
-#include "ops_inode.h"
/**
* gfs2_llseek - seek to a location in a file
@@ -158,8 +157,8 @@ static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
if (error)
return error;
- fsflags = fsflags_cvt(gfs2_to_fsflags, ip->i_di.di_flags);
- if (!S_ISDIR(inode->i_mode) && ip->i_di.di_flags & GFS2_DIF_JDATA)
+ fsflags = fsflags_cvt(gfs2_to_fsflags, ip->i_diskflags);
+ if (!S_ISDIR(inode->i_mode) && ip->i_diskflags & GFS2_DIF_JDATA)
fsflags |= FS_JOURNAL_DATA_FL;
if (put_user(fsflags, ptr))
error = -EFAULT;
@@ -172,17 +171,16 @@ static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
void gfs2_set_inode_flags(struct inode *inode)
{
struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_dinode_host *di = &ip->i_di;
unsigned int flags = inode->i_flags;
flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
- if (di->di_flags & GFS2_DIF_IMMUTABLE)
+ if (ip->i_diskflags & GFS2_DIF_IMMUTABLE)
flags |= S_IMMUTABLE;
- if (di->di_flags & GFS2_DIF_APPENDONLY)
+ if (ip->i_diskflags & GFS2_DIF_APPENDONLY)
flags |= S_APPEND;
- if (di->di_flags & GFS2_DIF_NOATIME)
+ if (ip->i_diskflags & GFS2_DIF_NOATIME)
flags |= S_NOATIME;
- if (di->di_flags & GFS2_DIF_SYNC)
+ if (ip->i_diskflags & GFS2_DIF_SYNC)
flags |= S_SYNC;
inode->i_flags = flags;
}
@@ -221,7 +219,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
if (error)
goto out_drop_write;
- flags = ip->i_di.di_flags;
+ flags = ip->i_diskflags;
new_flags = (flags & ~mask) | (reqflags & mask);
if ((new_flags ^ flags) == 0)
goto out;
@@ -260,7 +258,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
if (error)
goto out_trans_end;
gfs2_trans_add_bh(ip->i_gl, bh, 1);
- ip->i_di.di_flags = new_flags;
+ ip->i_diskflags = new_flags;
gfs2_dinode_out(ip, bh->b_data);
brelse(bh);
gfs2_set_inode_flags(inode);
@@ -479,7 +477,7 @@ static int gfs2_open(struct inode *inode, struct file *file)
goto fail;
if (!(file->f_flags & O_LARGEFILE) &&
- ip->i_di.di_size > MAX_NON_LFS) {
+ ip->i_disksize > MAX_NON_LFS) {
error = -EOVERFLOW;
goto fail_gunlock;
}
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index b117fcf2c4f5..2e735bece6b9 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -22,20 +22,18 @@
#include "gfs2.h"
#include "incore.h"
#include "bmap.h"
-#include "daemon.h"
#include "glock.h"
#include "glops.h"
#include "inode.h"
#include "mount.h"
-#include "ops_fstype.h"
-#include "ops_dentry.h"
-#include "ops_super.h"
#include "recovery.h"
#include "rgrp.h"
#include "super.h"
#include "sys.h"
#include "util.h"
#include "log.h"
+#include "quota.h"
+#include "dir.h"
#define DO 0
#define UNDO 1
@@ -58,12 +56,10 @@ static void gfs2_tune_init(struct gfs2_tune *gt)
{
spin_lock_init(&gt->gt_spin);
- gt->gt_demote_secs = 300;
gt->gt_incore_log_blocks = 1024;
gt->gt_log_flush_secs = 60;
gt->gt_recoverd_secs = 60;
gt->gt_logd_secs = 1;
- gt->gt_quotad_secs = 5;
gt->gt_quota_simul_sync = 64;
gt->gt_quota_warn_period = 10;
gt->gt_quota_scale_num = 1;
@@ -91,10 +87,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
gfs2_tune_init(&sdp->sd_tune);
- INIT_LIST_HEAD(&sdp->sd_reclaim_list);
- spin_lock_init(&sdp->sd_reclaim_lock);
- init_waitqueue_head(&sdp->sd_reclaim_wq);
-
mutex_init(&sdp->sd_inum_mutex);
spin_lock_init(&sdp->sd_statfs_spin);
@@ -110,6 +102,9 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
INIT_LIST_HEAD(&sdp->sd_quota_list);
spin_lock_init(&sdp->sd_quota_spin);
mutex_init(&sdp->sd_quota_mutex);
+ init_waitqueue_head(&sdp->sd_quota_wait);
+ INIT_LIST_HEAD(&sdp->sd_trunc_list);
+ spin_lock_init(&sdp->sd_trunc_lock);
spin_lock_init(&sdp->sd_log_lock);
@@ -443,24 +438,11 @@ out:
static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh,
int undo)
{
- struct task_struct *p;
int error = 0;
if (undo)
goto fail_trans;
- for (sdp->sd_glockd_num = 0;
- sdp->sd_glockd_num < sdp->sd_args.ar_num_glockd;
- sdp->sd_glockd_num++) {
- p = kthread_run(gfs2_glockd, sdp, "gfs2_glockd");
- error = IS_ERR(p);
- if (error) {
- fs_err(sdp, "can't start glockd thread: %d\n", error);
- goto fail;
- }
- sdp->sd_glockd_process[sdp->sd_glockd_num] = p;
- }
-
error = gfs2_glock_nq_num(sdp,
GFS2_MOUNT_LOCK, &gfs2_nondisk_glops,
LM_ST_EXCLUSIVE, LM_FLAG_NOEXP | GL_NOCACHE,
@@ -493,7 +475,6 @@ static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh,
fs_err(sdp, "can't create transaction glock: %d\n", error);
goto fail_rename;
}
- set_bit(GLF_STICKY, &sdp->sd_trans_gl->gl_flags);
return 0;
@@ -506,9 +487,6 @@ fail_live:
fail_mount:
gfs2_glock_dq_uninit(mount_gh);
fail:
- while (sdp->sd_glockd_num--)
- kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]);
-
return error;
}
@@ -620,7 +598,7 @@ static int map_journal_extents(struct gfs2_sbd *sdp)
prev_db = 0;
- for (lb = 0; lb < ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift; lb++) {
+ for (lb = 0; lb < ip->i_disksize >> sdp->sd_sb.sb_bsize_shift; lb++) {
bh.b_state = 0;
bh.b_blocknr = 0;
bh.b_size = 1 << ip->i_inode.i_blkbits;
@@ -661,6 +639,106 @@ static void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp)
sdp->sd_lockstruct.ls_lockspace);
}
+/**
+ * gfs2_jindex_hold - Grab a lock on the jindex
+ * @sdp: The GFS2 superblock
+ * @ji_gh: the holder for the jindex glock
+ *
+ * Returns: errno
+ */
+
+static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
+{
+ struct gfs2_inode *dip = GFS2_I(sdp->sd_jindex);
+ struct qstr name;
+ char buf[20];
+ struct gfs2_jdesc *jd;
+ int error;
+
+ name.name = buf;
+
+ mutex_lock(&sdp->sd_jindex_mutex);
+
+ for (;;) {
+ error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, ji_gh);
+ if (error)
+ break;
+
+ name.len = sprintf(buf, "journal%u", sdp->sd_journals);
+ name.hash = gfs2_disk_hash(name.name, name.len);
+
+ error = gfs2_dir_check(sdp->sd_jindex, &name, NULL);
+ if (error == -ENOENT) {
+ error = 0;
+ break;
+ }
+
+ gfs2_glock_dq_uninit(ji_gh);
+
+ if (error)
+ break;
+
+ error = -ENOMEM;
+ jd = kzalloc(sizeof(struct gfs2_jdesc), GFP_KERNEL);
+ if (!jd)
+ break;
+
+ INIT_LIST_HEAD(&jd->extent_list);
+ jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1);
+ if (!jd->jd_inode || IS_ERR(jd->jd_inode)) {
+ if (!jd->jd_inode)
+ error = -ENOENT;
+ else
+ error = PTR_ERR(jd->jd_inode);
+ kfree(jd);
+ break;
+ }
+
+ spin_lock(&sdp->sd_jindex_spin);
+ jd->jd_jid = sdp->sd_journals++;
+ list_add_tail(&jd->jd_list, &sdp->sd_jindex_list);
+ spin_unlock(&sdp->sd_jindex_spin);
+ }
+
+ mutex_unlock(&sdp->sd_jindex_mutex);
+
+ return error;
+}
+
+/**
+ * gfs2_jindex_free - Clear all the journal index information
+ * @sdp: The GFS2 superblock
+ *
+ */
+
+static void gfs2_jindex_free(struct gfs2_sbd *sdp)
+{
+ struct list_head list, *head;
+ struct gfs2_jdesc *jd;
+ struct gfs2_journal_extent *jext;
+
+ spin_lock(&sdp->sd_jindex_spin);
+ list_add(&list, &sdp->sd_jindex_list);
+ list_del_init(&sdp->sd_jindex_list);
+ sdp->sd_journals = 0;
+ spin_unlock(&sdp->sd_jindex_spin);
+
+ while (!list_empty(&list)) {
+ jd = list_entry(list.next, struct gfs2_jdesc, jd_list);
+ head = &jd->extent_list;
+ while (!list_empty(head)) {
+ jext = list_entry(head->next,
+ struct gfs2_journal_extent,
+ extent_list);
+ list_del(&jext->extent_list);
+ kfree(jext);
+ }
+ list_del(&jd->jd_list);
+ iput(jd->jd_inode);
+ kfree(jd);
+ }
+}
+
static int init_journal(struct gfs2_sbd *sdp, int undo)
{
struct inode *master = sdp->sd_master_dir->d_inode;
@@ -681,7 +759,6 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
return PTR_ERR(sdp->sd_jindex);
}
ip = GFS2_I(sdp->sd_jindex);
- set_bit(GLF_STICKY, &ip->i_gl->gl_flags);
/* Load in the journal index special file */
@@ -832,7 +909,6 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
goto fail_statfs;
}
ip = GFS2_I(sdp->sd_rindex);
- set_bit(GLF_STICKY, &ip->i_gl->gl_flags);
sdp->sd_rindex_uptodate = 0;
/* Read in the quota inode */
@@ -973,9 +1049,6 @@ static int init_threads(struct gfs2_sbd *sdp, int undo)
}
sdp->sd_logd_process = p;
- sdp->sd_statfs_sync_time = jiffies;
- sdp->sd_quota_sync_time = jiffies;
-
p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad");
error = IS_ERR(p);
if (error) {
@@ -1164,7 +1237,7 @@ fail_sb:
fail_locking:
init_locking(sdp, &mount_gh, UNDO);
fail_lm:
- gfs2_gl_hash_clear(sdp);
+ gfs2_gl_hash_clear(sb);
gfs2_lm_unmount(sdp);
while (invalidate_inodes(sb))
yield();
@@ -1224,17 +1297,61 @@ static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
static void gfs2_kill_sb(struct super_block *sb)
{
struct gfs2_sbd *sdp = sb->s_fs_info;
- if (sdp) {
- gfs2_meta_syncfs(sdp);
- dput(sdp->sd_root_dir);
- dput(sdp->sd_master_dir);
- sdp->sd_root_dir = NULL;
- sdp->sd_master_dir = NULL;
+
+ if (sdp == NULL) {
+ kill_block_super(sb);
+ return;
+ }
+ gfs2_meta_syncfs(sdp);
+ dput(sdp->sd_root_dir);
+ dput(sdp->sd_master_dir);
+ sdp->sd_root_dir = NULL;
+ sdp->sd_master_dir = NULL;
+
+ /* Unfreeze the filesystem, if we need to */
+ mutex_lock(&sdp->sd_freeze_lock);
+ if (sdp->sd_freeze_count)
+ gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
+ mutex_unlock(&sdp->sd_freeze_lock);
+
+ kthread_stop(sdp->sd_quotad_process);
+ kthread_stop(sdp->sd_logd_process);
+ kthread_stop(sdp->sd_recoverd_process);
+
+ if (!(sb->s_flags & MS_RDONLY)) {
+ int error = gfs2_make_fs_ro(sdp);
+ if (error)
+ gfs2_io_error(sdp);
}
- shrink_dcache_sb(sb);
+
+ /* At this point, we're through modifying the disk */
+ gfs2_jindex_free(sdp);
+ gfs2_clear_rgrpd(sdp);
+ iput(sdp->sd_jindex);
+ iput(sdp->sd_inum_inode);
+ iput(sdp->sd_statfs_inode);
+ iput(sdp->sd_rindex);
+ iput(sdp->sd_quota_inode);
+
+ gfs2_glock_put(sdp->sd_rename_gl);
+ gfs2_glock_put(sdp->sd_trans_gl);
+
+ if (!sdp->sd_args.ar_spectator) {
+ gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
+ gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
+ gfs2_glock_dq_uninit(&sdp->sd_ir_gh);
+ gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
+ gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
+ iput(sdp->sd_ir_inode);
+ iput(sdp->sd_sc_inode);
+ iput(sdp->sd_qc_inode);
+ }
+ gfs2_glock_dq_uninit(&sdp->sd_live_gh);
kill_block_super(sb);
- if (sdp)
- gfs2_delete_debugfs_file(sdp);
+ gfs2_lm_unmount(sdp);
+ gfs2_sys_fs_del(sdp);
+ gfs2_delete_debugfs_file(sdp);
+ kfree(sdp);
}
struct file_system_type gfs2_fs_type = {
diff --git a/fs/gfs2/ops_fstype.h b/fs/gfs2/ops_fstype.h
deleted file mode 100644
index da8490511836..000000000000
--- a/fs/gfs2/ops_fstype.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#ifndef __OPS_FSTYPE_DOT_H__
-#define __OPS_FSTYPE_DOT_H__
-
-#include <linux/fs.h>
-
-extern struct file_system_type gfs2_fs_type;
-extern struct file_system_type gfs2meta_fs_type;
-extern const struct export_operations gfs2_export_ops;
-
-#endif /* __OPS_FSTYPE_DOT_H__ */
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index d232991b9046..49877546beb9 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -19,6 +19,7 @@
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
#include <linux/lm_interface.h>
+#include <linux/fiemap.h>
#include <asm/uaccess.h>
#include "gfs2.h"
@@ -31,12 +32,11 @@
#include "glock.h"
#include "inode.h"
#include "meta_io.h"
-#include "ops_dentry.h"
-#include "ops_inode.h"
#include "quota.h"
#include "rgrp.h"
#include "trans.h"
#include "util.h"
+#include "super.h"
/**
* gfs2_create - Create a file
@@ -185,7 +185,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
if (!dip->i_inode.i_nlink)
goto out_gunlock;
error = -EFBIG;
- if (dip->i_di.di_entries == (u32)-1)
+ if (dip->i_entries == (u32)-1)
goto out_gunlock;
error = -EPERM;
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
@@ -371,7 +371,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
ip = ghs[1].gh_gl->gl_object;
- ip->i_di.di_size = size;
+ ip->i_disksize = size;
error = gfs2_meta_inode_buffer(ip, &dibh);
@@ -425,9 +425,9 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
ip = ghs[1].gh_gl->gl_object;
ip->i_inode.i_nlink = 2;
- ip->i_di.di_size = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode);
- ip->i_di.di_flags |= GFS2_DIF_JDATA;
- ip->i_di.di_entries = 2;
+ ip->i_disksize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode);
+ ip->i_diskflags |= GFS2_DIF_JDATA;
+ ip->i_entries = 2;
error = gfs2_meta_inode_buffer(ip, &dibh);
@@ -517,13 +517,13 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
if (error)
goto out_gunlock;
- if (ip->i_di.di_entries < 2) {
+ if (ip->i_entries < 2) {
if (gfs2_consist_inode(ip))
gfs2_dinode_print(ip);
error = -EIO;
goto out_gunlock;
}
- if (ip->i_di.di_entries > 2) {
+ if (ip->i_entries > 2) {
error = -ENOTEMPTY;
goto out_gunlock;
}
@@ -726,13 +726,13 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
goto out_gunlock;
if (S_ISDIR(nip->i_inode.i_mode)) {
- if (nip->i_di.di_entries < 2) {
+ if (nip->i_entries < 2) {
if (gfs2_consist_inode(nip))
gfs2_dinode_print(nip);
error = -EIO;
goto out_gunlock;
}
- if (nip->i_di.di_entries > 2) {
+ if (nip->i_entries > 2) {
error = -ENOTEMPTY;
goto out_gunlock;
}
@@ -758,7 +758,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
error = -EINVAL;
goto out_gunlock;
}
- if (ndip->i_di.di_entries == (u32)-1) {
+ if (ndip->i_entries == (u32)-1) {
error = -EFBIG;
goto out_gunlock;
}
@@ -990,7 +990,7 @@ static int setattr_size(struct inode *inode, struct iattr *attr)
struct gfs2_sbd *sdp = GFS2_SB(inode);
int error;
- if (attr->ia_size != ip->i_di.di_size) {
+ if (attr->ia_size != ip->i_disksize) {
error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
if (error)
return error;
@@ -1001,8 +1001,8 @@ static int setattr_size(struct inode *inode, struct iattr *attr)
}
error = gfs2_truncatei(ip, attr->ia_size);
- if (error && (inode->i_size != ip->i_di.di_size))
- i_size_write(inode, ip->i_di.di_size);
+ if (error && (inode->i_size != ip->i_disksize))
+ i_size_write(inode, ip->i_disksize);
return error;
}
@@ -1212,6 +1212,48 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name)
return gfs2_ea_remove(GFS2_I(dentry->d_inode), &er);
}
+static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+ u64 start, u64 len)
+{
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_holder gh;
+ int ret;
+
+ ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
+ if (ret)
+ return ret;
+
+ mutex_lock(&inode->i_mutex);
+
+ ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
+ if (ret)
+ goto out;
+
+ if (gfs2_is_stuffed(ip)) {
+ u64 phys = ip->i_no_addr << inode->i_blkbits;
+ u64 size = i_size_read(inode);
+ u32 flags = FIEMAP_EXTENT_LAST|FIEMAP_EXTENT_NOT_ALIGNED|
+ FIEMAP_EXTENT_DATA_INLINE;
+ phys += sizeof(struct gfs2_dinode);
+ phys += start;
+ if (start + len > size)
+ len = size - start;
+ if (start < size)
+ ret = fiemap_fill_next_extent(fieinfo, start, phys,
+ len, flags);
+ if (ret == 1)
+ ret = 0;
+ } else {
+ ret = __generic_block_fiemap(inode, fieinfo, start, len,
+ gfs2_block_map);
+ }
+
+ gfs2_glock_dq_uninit(&gh);
+out:
+ mutex_unlock(&inode->i_mutex);
+ return ret;
+}
+
const struct inode_operations gfs2_file_iops = {
.permission = gfs2_permission,
.setattr = gfs2_setattr,
@@ -1220,6 +1262,7 @@ const struct inode_operations gfs2_file_iops = {
.getxattr = gfs2_getxattr,
.listxattr = gfs2_listxattr,
.removexattr = gfs2_removexattr,
+ .fiemap = gfs2_fiemap,
};
const struct inode_operations gfs2_dir_iops = {
@@ -1239,6 +1282,7 @@ const struct inode_operations gfs2_dir_iops = {
.getxattr = gfs2_getxattr,
.listxattr = gfs2_listxattr,
.removexattr = gfs2_removexattr,
+ .fiemap = gfs2_fiemap,
};
const struct inode_operations gfs2_symlink_iops = {
@@ -1251,5 +1295,6 @@ const struct inode_operations gfs2_symlink_iops = {
.getxattr = gfs2_getxattr,
.listxattr = gfs2_listxattr,
.removexattr = gfs2_removexattr,
+ .fiemap = gfs2_fiemap,
};
diff --git a/fs/gfs2/ops_inode.h b/fs/gfs2/ops_inode.h
deleted file mode 100644
index 14b4b797622a..000000000000
--- a/fs/gfs2/ops_inode.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#ifndef __OPS_INODE_DOT_H__
-#define __OPS_INODE_DOT_H__
-
-#include <linux/fs.h>
-
-extern const struct inode_operations gfs2_file_iops;
-extern const struct inode_operations gfs2_dir_iops;
-extern const struct inode_operations gfs2_symlink_iops;
-extern const struct file_operations gfs2_file_fops;
-extern const struct file_operations gfs2_dir_fops;
-extern const struct file_operations gfs2_file_fops_nolock;
-extern const struct file_operations gfs2_dir_fops_nolock;
-
-extern void gfs2_set_inode_flags(struct inode *inode);
-
-#endif /* __OPS_INODE_DOT_H__ */
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index d5355d9b5926..bd08a0a8d9bf 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -28,7 +28,6 @@
#include "inode.h"
#include "log.h"
#include "mount.h"
-#include "ops_super.h"
#include "quota.h"
#include "recovery.h"
#include "rgrp.h"
@@ -96,7 +95,7 @@ do_flush:
* Returns: errno
*/
-static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
+int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
{
struct gfs2_holder t_gh;
int error;
@@ -123,72 +122,6 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
}
/**
- * gfs2_put_super - Unmount the filesystem
- * @sb: The VFS superblock
- *
- */
-
-static void gfs2_put_super(struct super_block *sb)
-{
- struct gfs2_sbd *sdp = sb->s_fs_info;
- int error;
-
- /* Unfreeze the filesystem, if we need to */
-
- mutex_lock(&sdp->sd_freeze_lock);
- if (sdp->sd_freeze_count)
- gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
- mutex_unlock(&sdp->sd_freeze_lock);
-
- kthread_stop(sdp->sd_quotad_process);
- kthread_stop(sdp->sd_logd_process);
- kthread_stop(sdp->sd_recoverd_process);
- while (sdp->sd_glockd_num--)
- kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]);
-
- if (!(sb->s_flags & MS_RDONLY)) {
- error = gfs2_make_fs_ro(sdp);
- if (error)
- gfs2_io_error(sdp);
- }
- /* At this point, we're through modifying the disk */
-
- /* Release stuff */
-
- iput(sdp->sd_jindex);
- iput(sdp->sd_inum_inode);
- iput(sdp->sd_statfs_inode);
- iput(sdp->sd_rindex);
- iput(sdp->sd_quota_inode);
-
- gfs2_glock_put(sdp->sd_rename_gl);
- gfs2_glock_put(sdp->sd_trans_gl);
-
- if (!sdp->sd_args.ar_spectator) {
- gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
- gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
- gfs2_glock_dq_uninit(&sdp->sd_ir_gh);
- gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
- gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
- iput(sdp->sd_ir_inode);
- iput(sdp->sd_sc_inode);
- iput(sdp->sd_qc_inode);
- }
-
- gfs2_glock_dq_uninit(&sdp->sd_live_gh);
- gfs2_clear_rgrpd(sdp);
- gfs2_jindex_free(sdp);
- /* Take apart glock structures and buffer lists */
- gfs2_gl_hash_clear(sdp);
- /* Unmount the locking protocol */
- gfs2_lm_unmount(sdp);
-
- /* At this point, we're through participating in the lockspace */
- gfs2_sys_fs_del(sdp);
- kfree(sdp);
-}
-
-/**
* gfs2_write_super
* @sb: the superblock
*
@@ -260,6 +193,137 @@ static void gfs2_unlockfs(struct super_block *sb)
}
/**
+ * statfs_fill - fill in the sg for a given RG
+ * @rgd: the RG
+ * @sc: the sc structure
+ *
+ * Returns: 0 on success, -ESTALE if the LVB is invalid
+ */
+
+static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
+ struct gfs2_statfs_change_host *sc)
+{
+ gfs2_rgrp_verify(rgd);
+ sc->sc_total += rgd->rd_data;
+ sc->sc_free += rgd->rd_free;
+ sc->sc_dinodes += rgd->rd_dinodes;
+ return 0;
+}
+
+/**
+ * gfs2_statfs_slow - Stat a filesystem using asynchronous locking
+ * @sdp: the filesystem
+ * @sc: the sc info that will be returned
+ *
+ * Any error (other than a signal) will cause this routine to fall back
+ * to the synchronous version.
+ *
+ * FIXME: This really shouldn't busy wait like this.
+ *
+ * Returns: errno
+ */
+
+static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
+{
+ struct gfs2_holder ri_gh;
+ struct gfs2_rgrpd *rgd_next;
+ struct gfs2_holder *gha, *gh;
+ unsigned int slots = 64;
+ unsigned int x;
+ int done;
+ int error = 0, err;
+
+ memset(sc, 0, sizeof(struct gfs2_statfs_change_host));
+ gha = kcalloc(slots, sizeof(struct gfs2_holder), GFP_KERNEL);
+ if (!gha)
+ return -ENOMEM;
+
+ error = gfs2_rindex_hold(sdp, &ri_gh);
+ if (error)
+ goto out;
+
+ rgd_next = gfs2_rgrpd_get_first(sdp);
+
+ for (;;) {
+ done = 1;
+
+ for (x = 0; x < slots; x++) {
+ gh = gha + x;
+
+ if (gh->gh_gl && gfs2_glock_poll(gh)) {
+ err = gfs2_glock_wait(gh);
+ if (err) {
+ gfs2_holder_uninit(gh);
+ error = err;
+ } else {
+ if (!error)
+ error = statfs_slow_fill(
+ gh->gh_gl->gl_object, sc);
+ gfs2_glock_dq_uninit(gh);
+ }
+ }
+
+ if (gh->gh_gl)
+ done = 0;
+ else if (rgd_next && !error) {
+ error = gfs2_glock_nq_init(rgd_next->rd_gl,
+ LM_ST_SHARED,
+ GL_ASYNC,
+ gh);
+ rgd_next = gfs2_rgrpd_get_next(rgd_next);
+ done = 0;
+ }
+
+ if (signal_pending(current))
+ error = -ERESTARTSYS;
+ }
+
+ if (done)
+ break;
+
+ yield();
+ }
+
+ gfs2_glock_dq_uninit(&ri_gh);
+
+out:
+ kfree(gha);
+ return error;
+}
+
+/**
+ * gfs2_statfs_i - Do a statfs
+ * @sdp: the filesystem
+ * @sg: the sg structure
+ *
+ * Returns: errno
+ */
+
+static int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
+{
+ struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
+ struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
+
+ spin_lock(&sdp->sd_statfs_spin);
+
+ *sc = *m_sc;
+ sc->sc_total += l_sc->sc_total;
+ sc->sc_free += l_sc->sc_free;
+ sc->sc_dinodes += l_sc->sc_dinodes;
+
+ spin_unlock(&sdp->sd_statfs_spin);
+
+ if (sc->sc_free < 0)
+ sc->sc_free = 0;
+ if (sc->sc_free > sc->sc_total)
+ sc->sc_free = sc->sc_total;
+ if (sc->sc_dinodes < 0)
+ sc->sc_dinodes = 0;
+
+ return 0;
+}
+
+/**
* gfs2_statfs - Gather and return stats about the filesystem
* @sb: The superblock
* @statfsbuf: The buffer
@@ -370,7 +434,6 @@ static void gfs2_clear_inode(struct inode *inode)
*/
if (test_bit(GIF_USER, &ip->i_flags)) {
ip->i_gl->gl_object = NULL;
- gfs2_glock_schedule_for_reclaim(ip->i_gl);
gfs2_glock_put(ip->i_gl);
ip->i_gl = NULL;
if (ip->i_iopen_gh.gh_gl) {
@@ -423,8 +486,6 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
seq_printf(s, ",debug");
if (args->ar_upgrade)
seq_printf(s, ",upgrade");
- if (args->ar_num_glockd != GFS2_GLOCKD_DEFAULT)
- seq_printf(s, ",num_glockd=%u", args->ar_num_glockd);
if (args->ar_posix_acl)
seq_printf(s, ",acl");
if (args->ar_quota != GFS2_QUOTA_DEFAULT) {
@@ -494,16 +555,16 @@ static void gfs2_delete_inode(struct inode *inode)
gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh);
error = gfs2_glock_nq(&ip->i_iopen_gh);
if (error)
- goto out_uninit;
+ goto out_truncate;
if (S_ISDIR(inode->i_mode) &&
- (ip->i_di.di_flags & GFS2_DIF_EXHASH)) {
+ (ip->i_diskflags & GFS2_DIF_EXHASH)) {
error = gfs2_dir_exhash_dealloc(ip);
if (error)
goto out_unlock;
}
- if (ip->i_di.di_eattr) {
+ if (ip->i_eattr) {
error = gfs2_ea_dealloc(ip);
if (error)
goto out_unlock;
@@ -519,6 +580,7 @@ static void gfs2_delete_inode(struct inode *inode)
if (error)
goto out_unlock;
+out_truncate:
error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
if (error)
goto out_unlock;
@@ -527,8 +589,8 @@ static void gfs2_delete_inode(struct inode *inode)
gfs2_trans_end(sdp);
out_unlock:
- gfs2_glock_dq(&ip->i_iopen_gh);
-out_uninit:
+ if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags))
+ gfs2_glock_dq(&ip->i_iopen_gh);
gfs2_holder_uninit(&ip->i_iopen_gh);
gfs2_glock_dq_uninit(&gh);
if (error && error != GLR_TRYFAILED)
@@ -560,7 +622,7 @@ const struct super_operations gfs2_super_ops = {
.destroy_inode = gfs2_destroy_inode,
.write_inode = gfs2_write_inode,
.delete_inode = gfs2_delete_inode,
- .put_super = gfs2_put_super,
+ .put_super = gfs2_gl_hash_clear,
.write_super = gfs2_write_super,
.sync_fs = gfs2_sync_fs,
.write_super_lockfs = gfs2_write_super_lockfs,
diff --git a/fs/gfs2/ops_super.h b/fs/gfs2/ops_super.h
deleted file mode 100644
index 442a274c6272..000000000000
--- a/fs/gfs2/ops_super.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#ifndef __OPS_SUPER_DOT_H__
-#define __OPS_SUPER_DOT_H__
-
-#include <linux/fs.h>
-
-extern const struct super_operations gfs2_super_ops;
-
-#endif /* __OPS_SUPER_DOT_H__ */
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 3e073f5144fa..b08d09696b3e 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -46,6 +46,8 @@
#include <linux/bio.h>
#include <linux/gfs2_ondisk.h>
#include <linux/lm_interface.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
#include "gfs2.h"
#include "incore.h"
@@ -94,7 +96,7 @@ static int qd_alloc(struct gfs2_sbd *sdp, int user, u32 id,
struct gfs2_quota_data *qd;
int error;
- qd = kzalloc(sizeof(struct gfs2_quota_data), GFP_NOFS);
+ qd = kmem_cache_zalloc(gfs2_quotad_cachep, GFP_NOFS);
if (!qd)
return -ENOMEM;
@@ -119,7 +121,7 @@ static int qd_alloc(struct gfs2_sbd *sdp, int user, u32 id,
return 0;
fail:
- kfree(qd);
+ kmem_cache_free(gfs2_quotad_cachep, qd);
return error;
}
@@ -158,7 +160,7 @@ static int qd_get(struct gfs2_sbd *sdp, int user, u32 id, int create,
if (qd || !create) {
if (new_qd) {
gfs2_lvb_unhold(new_qd->qd_gl);
- kfree(new_qd);
+ kmem_cache_free(gfs2_quotad_cachep, new_qd);
}
*qdp = qd;
return 0;
@@ -1013,7 +1015,7 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), change))
return;
- if (ip->i_di.di_flags & GFS2_DIF_SYSTEM)
+ if (ip->i_diskflags & GFS2_DIF_SYSTEM)
return;
for (x = 0; x < al->al_qd_num; x++) {
@@ -1100,15 +1102,15 @@ static void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *
int gfs2_quota_init(struct gfs2_sbd *sdp)
{
struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode);
- unsigned int blocks = ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift;
+ unsigned int blocks = ip->i_disksize >> sdp->sd_sb.sb_bsize_shift;
unsigned int x, slot = 0;
unsigned int found = 0;
u64 dblock;
u32 extlen = 0;
int error;
- if (!ip->i_di.di_size || ip->i_di.di_size > (64 << 20) ||
- ip->i_di.di_size & (sdp->sd_sb.sb_bsize - 1)) {
+ if (!ip->i_disksize || ip->i_disksize > (64 << 20) ||
+ ip->i_disksize & (sdp->sd_sb.sb_bsize - 1)) {
gfs2_consist_inode(ip);
return -EIO;
}
@@ -1195,7 +1197,7 @@ fail:
return error;
}
-void gfs2_quota_scan(struct gfs2_sbd *sdp)
+static void gfs2_quota_scan(struct gfs2_sbd *sdp)
{
struct gfs2_quota_data *qd, *safe;
LIST_HEAD(dead);
@@ -1222,7 +1224,7 @@ void gfs2_quota_scan(struct gfs2_sbd *sdp)
gfs2_assert_warn(sdp, !qd->qd_bh_count);
gfs2_lvb_unhold(qd->qd_gl);
- kfree(qd);
+ kmem_cache_free(gfs2_quotad_cachep, qd);
}
}
@@ -1257,7 +1259,7 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp)
gfs2_assert_warn(sdp, !qd->qd_bh_count);
gfs2_lvb_unhold(qd->qd_gl);
- kfree(qd);
+ kmem_cache_free(gfs2_quotad_cachep, qd);
spin_lock(&sdp->sd_quota_spin);
}
@@ -1272,3 +1274,94 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp)
}
}
+static void quotad_error(struct gfs2_sbd *sdp, const char *msg, int error)
+{
+ if (error == 0 || error == -EROFS)
+ return;
+ if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
+ fs_err(sdp, "gfs2_quotad: %s error %d\n", msg, error);
+}
+
+static void quotad_check_timeo(struct gfs2_sbd *sdp, const char *msg,
+ int (*fxn)(struct gfs2_sbd *sdp),
+ unsigned long t, unsigned long *timeo,
+ unsigned int *new_timeo)
+{
+ if (t >= *timeo) {
+ int error = fxn(sdp);
+ quotad_error(sdp, msg, error);
+ *timeo = gfs2_tune_get_i(&sdp->sd_tune, new_timeo) * HZ;
+ } else {
+ *timeo -= t;
+ }
+}
+
+static void quotad_check_trunc_list(struct gfs2_sbd *sdp)
+{
+ struct gfs2_inode *ip;
+
+ while(1) {
+ ip = NULL;
+ spin_lock(&sdp->sd_trunc_lock);
+ if (!list_empty(&sdp->sd_trunc_list)) {
+ ip = list_entry(sdp->sd_trunc_list.next,
+ struct gfs2_inode, i_trunc_list);
+ list_del_init(&ip->i_trunc_list);
+ }
+ spin_unlock(&sdp->sd_trunc_lock);
+ if (ip == NULL)
+ return;
+ gfs2_glock_finish_truncate(ip);
+ }
+}
+
+/**
+ * gfs2_quotad - Write cached quota changes into the quota file
+ * @sdp: Pointer to GFS2 superblock
+ *
+ */
+
+int gfs2_quotad(void *data)
+{
+ struct gfs2_sbd *sdp = data;
+ struct gfs2_tune *tune = &sdp->sd_tune;
+ unsigned long statfs_timeo = 0;
+ unsigned long quotad_timeo = 0;
+ unsigned long t = 0;
+ DEFINE_WAIT(wait);
+ int empty;
+
+ while (!kthread_should_stop()) {
+
+ /* Update the master statfs file */
+ quotad_check_timeo(sdp, "statfs", gfs2_statfs_sync, t,
+ &statfs_timeo, &tune->gt_statfs_quantum);
+
+ /* Update quota file */
+ quotad_check_timeo(sdp, "sync", gfs2_quota_sync, t,
+ &quotad_timeo, &tune->gt_quota_quantum);
+
+ /* FIXME: This should be turned into a shrinker */
+ gfs2_quota_scan(sdp);
+
+ /* Check for & recover partially truncated inodes */
+ quotad_check_trunc_list(sdp);
+
+ if (freezing(current))
+ refrigerator();
+ t = min(quotad_timeo, statfs_timeo);
+
+ prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_UNINTERRUPTIBLE);
+ spin_lock(&sdp->sd_trunc_lock);
+ empty = list_empty(&sdp->sd_trunc_list);
+ spin_unlock(&sdp->sd_trunc_lock);
+ if (empty)
+ t -= schedule_timeout(t);
+ else
+ t = 0;
+ finish_wait(&sdp->sd_quota_wait, &wait);
+ }
+
+ return 0;
+}
+
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 3b7f4b0e5dfe..cec9032be97d 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -15,22 +15,22 @@ struct gfs2_sbd;
#define NO_QUOTA_CHANGE ((u32)-1)
-int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid);
-void gfs2_quota_unhold(struct gfs2_inode *ip);
+extern int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid);
+extern void gfs2_quota_unhold(struct gfs2_inode *ip);
-int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid);
-void gfs2_quota_unlock(struct gfs2_inode *ip);
+extern int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid);
+extern void gfs2_quota_unlock(struct gfs2_inode *ip);
-int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid);
-void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
- u32 uid, u32 gid);
+extern int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid);
+extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
+ u32 uid, u32 gid);
-int gfs2_quota_sync(struct gfs2_sbd *sdp);
-int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id);
+extern int gfs2_quota_sync(struct gfs2_sbd *sdp);
+extern int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id);
-int gfs2_quota_init(struct gfs2_sbd *sdp);
-void gfs2_quota_scan(struct gfs2_sbd *sdp);
-void gfs2_quota_cleanup(struct gfs2_sbd *sdp);
+extern int gfs2_quota_init(struct gfs2_sbd *sdp);
+extern void gfs2_quota_cleanup(struct gfs2_sbd *sdp);
+extern int gfs2_quotad(void *data);
static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
{
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index d5e91f4f6a0b..efd09c3d2b26 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -14,6 +14,8 @@
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
#include <linux/lm_interface.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
#include "gfs2.h"
#include "incore.h"
@@ -583,13 +585,35 @@ fail:
return error;
}
+static struct gfs2_jdesc *gfs2_jdesc_find_dirty(struct gfs2_sbd *sdp)
+{
+ struct gfs2_jdesc *jd;
+ int found = 0;
+
+ spin_lock(&sdp->sd_jindex_spin);
+
+ list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
+ if (jd->jd_dirty) {
+ jd->jd_dirty = 0;
+ found = 1;
+ break;
+ }
+ }
+ spin_unlock(&sdp->sd_jindex_spin);
+
+ if (!found)
+ jd = NULL;
+
+ return jd;
+}
+
/**
* gfs2_check_journals - Recover any dirty journals
* @sdp: the filesystem
*
*/
-void gfs2_check_journals(struct gfs2_sbd *sdp)
+static void gfs2_check_journals(struct gfs2_sbd *sdp)
{
struct gfs2_jdesc *jd;
@@ -603,3 +627,25 @@ void gfs2_check_journals(struct gfs2_sbd *sdp)
}
}
+/**
+ * gfs2_recoverd - Recover dead machine's journals
+ * @sdp: Pointer to GFS2 superblock
+ *
+ */
+
+int gfs2_recoverd(void *data)
+{
+ struct gfs2_sbd *sdp = data;
+ unsigned long t;
+
+ while (!kthread_should_stop()) {
+ gfs2_check_journals(sdp);
+ t = gfs2_tune_get(sdp, gt_recoverd_secs) * HZ;
+ if (freezing(current))
+ refrigerator();
+ schedule_timeout_interruptible(t);
+ }
+
+ return 0;
+}
+
diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h
index f7235e61c723..a8218ea15b57 100644
--- a/fs/gfs2/recovery.h
+++ b/fs/gfs2/recovery.h
@@ -18,17 +18,17 @@ static inline void gfs2_replay_incr_blk(struct gfs2_sbd *sdp, unsigned int *blk)
*blk = 0;
}
-int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
+extern int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
struct buffer_head **bh);
-int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where);
-int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where);
-void gfs2_revoke_clean(struct gfs2_sbd *sdp);
+extern int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where);
+extern int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where);
+extern void gfs2_revoke_clean(struct gfs2_sbd *sdp);
-int gfs2_find_jhead(struct gfs2_jdesc *jd,
+extern int gfs2_find_jhead(struct gfs2_jdesc *jd,
struct gfs2_log_header_host *head);
-int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd);
-void gfs2_check_journals(struct gfs2_sbd *sdp);
+extern int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd);
+extern int gfs2_recoverd(void *data);
#endif /* __RECOVERY_DOT_H__ */
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 2d90fb253505..8b01c635d925 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -269,16 +269,14 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
bi->bi_len, x);
}
- if (count[0] != rgd->rd_rg.rg_free) {
+ if (count[0] != rgd->rd_free) {
if (gfs2_consist_rgrpd(rgd))
fs_err(sdp, "free data mismatch: %u != %u\n",
- count[0], rgd->rd_rg.rg_free);
+ count[0], rgd->rd_free);
return;
}
- tmp = rgd->rd_data -
- rgd->rd_rg.rg_free -
- rgd->rd_rg.rg_dinodes;
+ tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes;
if (count[1] + count[2] != tmp) {
if (gfs2_consist_rgrpd(rgd))
fs_err(sdp, "used data mismatch: %u != %u\n",
@@ -286,10 +284,10 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
return;
}
- if (count[3] != rgd->rd_rg.rg_dinodes) {
+ if (count[3] != rgd->rd_dinodes) {
if (gfs2_consist_rgrpd(rgd))
fs_err(sdp, "used metadata mismatch: %u != %u\n",
- count[3], rgd->rd_rg.rg_dinodes);
+ count[3], rgd->rd_dinodes);
return;
}
@@ -501,7 +499,7 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp)
for (rgrps = 0;; rgrps++) {
loff_t pos = rgrps * sizeof(struct gfs2_rindex);
- if (pos + sizeof(struct gfs2_rindex) >= ip->i_di.di_size)
+ if (pos + sizeof(struct gfs2_rindex) >= ip->i_disksize)
break;
error = gfs2_internal_read(ip, &ra_state, buf, &pos,
sizeof(struct gfs2_rindex));
@@ -590,7 +588,7 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct inode *inode = &ip->i_inode;
struct file_ra_state ra_state;
- u64 rgrp_count = ip->i_di.di_size;
+ u64 rgrp_count = ip->i_disksize;
int error;
if (do_div(rgrp_count, sizeof(struct gfs2_rindex))) {
@@ -634,7 +632,7 @@ static int gfs2_ri_update_special(struct gfs2_inode *ip)
for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) {
/* Ignore partials */
if ((sdp->sd_rgrps + 1) * sizeof(struct gfs2_rindex) >
- ip->i_di.di_size)
+ ip->i_disksize)
break;
error = read_rindex_entry(ip, &ra_state);
if (error) {
@@ -692,7 +690,6 @@ int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh)
static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf)
{
const struct gfs2_rgrp *str = buf;
- struct gfs2_rgrp_host *rg = &rgd->rd_rg;
u32 rg_flags;
rg_flags = be32_to_cpu(str->rg_flags);
@@ -700,24 +697,23 @@ static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf)
rgd->rd_flags |= GFS2_RDF_NOALLOC;
else
rgd->rd_flags &= ~GFS2_RDF_NOALLOC;
- rg->rg_free = be32_to_cpu(str->rg_free);
- rg->rg_dinodes = be32_to_cpu(str->rg_dinodes);
- rg->rg_igeneration = be64_to_cpu(str->rg_igeneration);
+ rgd->rd_free = be32_to_cpu(str->rg_free);
+ rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes);
+ rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration);
}
static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
{
struct gfs2_rgrp *str = buf;
- struct gfs2_rgrp_host *rg = &rgd->rd_rg;
u32 rg_flags = 0;
if (rgd->rd_flags & GFS2_RDF_NOALLOC)
rg_flags |= GFS2_RGF_NOALLOC;
str->rg_flags = cpu_to_be32(rg_flags);
- str->rg_free = cpu_to_be32(rg->rg_free);
- str->rg_dinodes = cpu_to_be32(rg->rg_dinodes);
+ str->rg_free = cpu_to_be32(rgd->rd_free);
+ str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes);
str->__pad = cpu_to_be32(0);
- str->rg_igeneration = cpu_to_be64(rg->rg_igeneration);
+ str->rg_igeneration = cpu_to_be64(rgd->rd_igeneration);
memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
}
@@ -776,7 +772,7 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
}
spin_lock(&sdp->sd_rindex_spin);
- rgd->rd_free_clone = rgd->rd_rg.rg_free;
+ rgd->rd_free_clone = rgd->rd_free;
rgd->rd_bh_count++;
spin_unlock(&sdp->sd_rindex_spin);
@@ -850,7 +846,7 @@ void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd)
}
spin_lock(&sdp->sd_rindex_spin);
- rgd->rd_free_clone = rgd->rd_rg.rg_free;
+ rgd->rd_free_clone = rgd->rd_free;
spin_unlock(&sdp->sd_rindex_spin);
}
@@ -1403,8 +1399,8 @@ u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n)
block = rgd->rd_data0 + blk;
ip->i_goal = block;
- gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free >= *n);
- rgd->rd_rg.rg_free -= *n;
+ gfs2_assert_withdraw(sdp, rgd->rd_free >= *n);
+ rgd->rd_free -= *n;
gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
@@ -1445,10 +1441,10 @@ u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation)
block = rgd->rd_data0 + blk;
- gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
- rgd->rd_rg.rg_free--;
- rgd->rd_rg.rg_dinodes++;
- *generation = rgd->rd_rg.rg_igeneration++;
+ gfs2_assert_withdraw(sdp, rgd->rd_free);
+ rgd->rd_free--;
+ rgd->rd_dinodes++;
+ *generation = rgd->rd_igeneration++;
gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
@@ -1481,7 +1477,7 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
if (!rgd)
return;
- rgd->rd_rg.rg_free += blen;
+ rgd->rd_free += blen;
gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
@@ -1509,7 +1505,7 @@ void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
if (!rgd)
return;
- rgd->rd_rg.rg_free += blen;
+ rgd->rd_free += blen;
gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
@@ -1546,10 +1542,10 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
return;
gfs2_assert_withdraw(sdp, rgd == tmp_rgd);
- if (!rgd->rd_rg.rg_dinodes)
+ if (!rgd->rd_dinodes)
gfs2_consist_rgrpd(rgd);
- rgd->rd_rg.rg_dinodes--;
- rgd->rd_rg.rg_free++;
+ rgd->rd_dinodes--;
+ rgd->rd_free++;
gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index c3ba3d9d0aac..f14658b20204 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -33,110 +33,6 @@
#include "trans.h"
#include "util.h"
-/**
- * gfs2_jindex_hold - Grab a lock on the jindex
- * @sdp: The GFS2 superblock
- * @ji_gh: the holder for the jindex glock
- *
- * This is very similar to the gfs2_rindex_hold() function, except that
- * in general we hold the jindex lock for longer periods of time and
- * we grab it far less frequently (in general) then the rgrp lock.
- *
- * Returns: errno
- */
-
-int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
-{
- struct gfs2_inode *dip = GFS2_I(sdp->sd_jindex);
- struct qstr name;
- char buf[20];
- struct gfs2_jdesc *jd;
- int error;
-
- name.name = buf;
-
- mutex_lock(&sdp->sd_jindex_mutex);
-
- for (;;) {
- error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, ji_gh);
- if (error)
- break;
-
- name.len = sprintf(buf, "journal%u", sdp->sd_journals);
- name.hash = gfs2_disk_hash(name.name, name.len);
-
- error = gfs2_dir_check(sdp->sd_jindex, &name, NULL);
- if (error == -ENOENT) {
- error = 0;
- break;
- }
-
- gfs2_glock_dq_uninit(ji_gh);
-
- if (error)
- break;
-
- error = -ENOMEM;
- jd = kzalloc(sizeof(struct gfs2_jdesc), GFP_KERNEL);
- if (!jd)
- break;
-
- INIT_LIST_HEAD(&jd->extent_list);
- jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1);
- if (!jd->jd_inode || IS_ERR(jd->jd_inode)) {
- if (!jd->jd_inode)
- error = -ENOENT;
- else
- error = PTR_ERR(jd->jd_inode);
- kfree(jd);
- break;
- }
-
- spin_lock(&sdp->sd_jindex_spin);
- jd->jd_jid = sdp->sd_journals++;
- list_add_tail(&jd->jd_list, &sdp->sd_jindex_list);
- spin_unlock(&sdp->sd_jindex_spin);
- }
-
- mutex_unlock(&sdp->sd_jindex_mutex);
-
- return error;
-}
-
-/**
- * gfs2_jindex_free - Clear all the journal index information
- * @sdp: The GFS2 superblock
- *
- */
-
-void gfs2_jindex_free(struct gfs2_sbd *sdp)
-{
- struct list_head list, *head;
- struct gfs2_jdesc *jd;
- struct gfs2_journal_extent *jext;
-
- spin_lock(&sdp->sd_jindex_spin);
- list_add(&list, &sdp->sd_jindex_list);
- list_del_init(&sdp->sd_jindex_list);
- sdp->sd_journals = 0;
- spin_unlock(&sdp->sd_jindex_spin);
-
- while (!list_empty(&list)) {
- jd = list_entry(list.next, struct gfs2_jdesc, jd_list);
- head = &jd->extent_list;
- while (!list_empty(head)) {
- jext = list_entry(head->next,
- struct gfs2_journal_extent,
- extent_list);
- list_del(&jext->extent_list);
- kfree(jext);
- }
- list_del(&jd->jd_list);
- iput(jd->jd_inode);
- kfree(jd);
- }
-}
-
static struct gfs2_jdesc *jdesc_find_i(struct list_head *head, unsigned int jid)
{
struct gfs2_jdesc *jd;
@@ -166,39 +62,6 @@ struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid)
return jd;
}
-void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid)
-{
- struct gfs2_jdesc *jd;
-
- spin_lock(&sdp->sd_jindex_spin);
- jd = jdesc_find_i(&sdp->sd_jindex_list, jid);
- if (jd)
- jd->jd_dirty = 1;
- spin_unlock(&sdp->sd_jindex_spin);
-}
-
-struct gfs2_jdesc *gfs2_jdesc_find_dirty(struct gfs2_sbd *sdp)
-{
- struct gfs2_jdesc *jd;
- int found = 0;
-
- spin_lock(&sdp->sd_jindex_spin);
-
- list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
- if (jd->jd_dirty) {
- jd->jd_dirty = 0;
- found = 1;
- break;
- }
- }
- spin_unlock(&sdp->sd_jindex_spin);
-
- if (!found)
- jd = NULL;
-
- return jd;
-}
-
int gfs2_jdesc_check(struct gfs2_jdesc *jd)
{
struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
@@ -206,14 +69,14 @@ int gfs2_jdesc_check(struct gfs2_jdesc *jd)
int ar;
int error;
- if (ip->i_di.di_size < (8 << 20) || ip->i_di.di_size > (1 << 30) ||
- (ip->i_di.di_size & (sdp->sd_sb.sb_bsize - 1))) {
+ if (ip->i_disksize < (8 << 20) || ip->i_disksize > (1 << 30) ||
+ (ip->i_disksize & (sdp->sd_sb.sb_bsize - 1))) {
gfs2_consist_inode(ip);
return -EIO;
}
- jd->jd_blocks = ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift;
+ jd->jd_blocks = ip->i_disksize >> sdp->sd_sb.sb_bsize_shift;
- error = gfs2_write_alloc_required(ip, 0, ip->i_di.di_size, &ar);
+ error = gfs2_write_alloc_required(ip, 0, ip->i_disksize, &ar);
if (!error && ar) {
gfs2_consist_inode(ip);
error = -EIO;
@@ -423,137 +286,6 @@ out:
return error;
}
-/**
- * gfs2_statfs_i - Do a statfs
- * @sdp: the filesystem
- * @sg: the sg structure
- *
- * Returns: errno
- */
-
-int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
-{
- struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
- struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
-
- spin_lock(&sdp->sd_statfs_spin);
-
- *sc = *m_sc;
- sc->sc_total += l_sc->sc_total;
- sc->sc_free += l_sc->sc_free;
- sc->sc_dinodes += l_sc->sc_dinodes;
-
- spin_unlock(&sdp->sd_statfs_spin);
-
- if (sc->sc_free < 0)
- sc->sc_free = 0;
- if (sc->sc_free > sc->sc_total)
- sc->sc_free = sc->sc_total;
- if (sc->sc_dinodes < 0)
- sc->sc_dinodes = 0;
-
- return 0;
-}
-
-/**
- * statfs_fill - fill in the sg for a given RG
- * @rgd: the RG
- * @sc: the sc structure
- *
- * Returns: 0 on success, -ESTALE if the LVB is invalid
- */
-
-static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
- struct gfs2_statfs_change_host *sc)
-{
- gfs2_rgrp_verify(rgd);
- sc->sc_total += rgd->rd_data;
- sc->sc_free += rgd->rd_rg.rg_free;
- sc->sc_dinodes += rgd->rd_rg.rg_dinodes;
- return 0;
-}
-
-/**
- * gfs2_statfs_slow - Stat a filesystem using asynchronous locking
- * @sdp: the filesystem
- * @sc: the sc info that will be returned
- *
- * Any error (other than a signal) will cause this routine to fall back
- * to the synchronous version.
- *
- * FIXME: This really shouldn't busy wait like this.
- *
- * Returns: errno
- */
-
-int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
-{
- struct gfs2_holder ri_gh;
- struct gfs2_rgrpd *rgd_next;
- struct gfs2_holder *gha, *gh;
- unsigned int slots = 64;
- unsigned int x;
- int done;
- int error = 0, err;
-
- memset(sc, 0, sizeof(struct gfs2_statfs_change_host));
- gha = kcalloc(slots, sizeof(struct gfs2_holder), GFP_KERNEL);
- if (!gha)
- return -ENOMEM;
-
- error = gfs2_rindex_hold(sdp, &ri_gh);
- if (error)
- goto out;
-
- rgd_next = gfs2_rgrpd_get_first(sdp);
-
- for (;;) {
- done = 1;
-
- for (x = 0; x < slots; x++) {
- gh = gha + x;
-
- if (gh->gh_gl && gfs2_glock_poll(gh)) {
- err = gfs2_glock_wait(gh);
- if (err) {
- gfs2_holder_uninit(gh);
- error = err;
- } else {
- if (!error)
- error = statfs_slow_fill(
- gh->gh_gl->gl_object, sc);
- gfs2_glock_dq_uninit(gh);
- }
- }
-
- if (gh->gh_gl)
- done = 0;
- else if (rgd_next && !error) {
- error = gfs2_glock_nq_init(rgd_next->rd_gl,
- LM_ST_SHARED,
- GL_ASYNC,
- gh);
- rgd_next = gfs2_rgrpd_get_next(rgd_next);
- done = 0;
- }
-
- if (signal_pending(current))
- error = -ERESTARTSYS;
- }
-
- if (done)
- break;
-
- yield();
- }
-
- gfs2_glock_dq_uninit(&ri_gh);
-
-out:
- kfree(gha);
- return error;
-}
-
struct lfcc {
struct list_head list;
struct gfs2_holder gh;
@@ -580,10 +312,6 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp,
struct gfs2_log_header_host lh;
int error;
- error = gfs2_jindex_hold(sdp, &ji_gh);
- if (error)
- return error;
-
list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
lfcc = kmalloc(sizeof(struct lfcc), GFP_KERNEL);
if (!lfcc) {
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index 50a4c9b1215e..4d2492b3e7ef 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -10,6 +10,8 @@
#ifndef __SUPER_DOT_H__
#define __SUPER_DOT_H__
+#include <linux/fs.h>
+#include <linux/dcache.h>
#include "incore.h"
void gfs2_lm_unmount(struct gfs2_sbd *sdp);
@@ -23,28 +25,28 @@ static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
return x;
}
-int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh);
-void gfs2_jindex_free(struct gfs2_sbd *sdp);
-
struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid);
-void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid);
-struct gfs2_jdesc *gfs2_jdesc_find_dirty(struct gfs2_sbd *sdp);
int gfs2_jdesc_check(struct gfs2_jdesc *jd);
int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
struct gfs2_inode **ipp);
int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
+int gfs2_make_fs_ro(struct gfs2_sbd *sdp);
int gfs2_statfs_init(struct gfs2_sbd *sdp);
void gfs2_statfs_change(struct gfs2_sbd *sdp,
s64 total, s64 free, s64 dinodes);
int gfs2_statfs_sync(struct gfs2_sbd *sdp);
-int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc);
-int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc);
int gfs2_freeze_fs(struct gfs2_sbd *sdp);
void gfs2_unfreeze_fs(struct gfs2_sbd *sdp);
+extern struct file_system_type gfs2_fs_type;
+extern struct file_system_type gfs2meta_fs_type;
+extern const struct export_operations gfs2_export_ops;
+extern const struct super_operations gfs2_super_ops;
+extern struct dentry_operations gfs2_dops;
+
#endif /* __SUPER_DOT_H__ */
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 7e1879f1a02c..26c1fa777a95 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -26,9 +26,6 @@
#include "quota.h"
#include "util.h"
-char *gfs2_sys_margs;
-spinlock_t gfs2_sys_margs_lock;
-
static ssize_t id_show(struct gfs2_sbd *sdp, char *buf)
{
return snprintf(buf, PAGE_SIZE, "%u:%u\n",
@@ -263,7 +260,6 @@ ARGS_ATTR(localcaching, "%d\n");
ARGS_ATTR(localflocks, "%d\n");
ARGS_ATTR(debug, "%d\n");
ARGS_ATTR(upgrade, "%d\n");
-ARGS_ATTR(num_glockd, "%u\n");
ARGS_ATTR(posix_acl, "%d\n");
ARGS_ATTR(quota, "%u\n");
ARGS_ATTR(suiddir, "%d\n");
@@ -279,7 +275,6 @@ static struct attribute *args_attrs[] = {
&args_attr_localflocks.attr,
&args_attr_debug.attr,
&args_attr_upgrade.attr,
- &args_attr_num_glockd.attr,
&args_attr_posix_acl.attr,
&args_attr_quota.attr,
&args_attr_suiddir.attr,
@@ -288,30 +283,6 @@ static struct attribute *args_attrs[] = {
};
/*
- * display counters from superblock
- */
-
-struct counters_attr {
- struct attribute attr;
- ssize_t (*show)(struct gfs2_sbd *, char *);
-};
-
-#define COUNTERS_ATTR(name, fmt) \
-static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \
-{ \
- return snprintf(buf, PAGE_SIZE, fmt, \
- (unsigned int)atomic_read(&sdp->sd_##name)); \
-} \
-static struct counters_attr counters_attr_##name = __ATTR_RO(name)
-
-COUNTERS_ATTR(reclaimed, "%u\n");
-
-static struct attribute *counters_attrs[] = {
- &counters_attr_reclaimed.attr,
- NULL,
-};
-
-/*
* get and set struct gfs2_tune fields
*/
@@ -393,7 +364,6 @@ static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\
} \
TUNE_ATTR_2(name, name##_store)
-TUNE_ATTR(demote_secs, 0);
TUNE_ATTR(incore_log_blocks, 0);
TUNE_ATTR(log_flush_secs, 0);
TUNE_ATTR(quota_warn_period, 0);
@@ -408,11 +378,9 @@ TUNE_ATTR(stall_secs, 1);
TUNE_ATTR(statfs_quantum, 1);
TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process);
TUNE_ATTR_DAEMON(logd_secs, logd_process);
-TUNE_ATTR_DAEMON(quotad_secs, quotad_process);
TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store);
static struct attribute *tune_attrs[] = {
- &tune_attr_demote_secs.attr,
&tune_attr_incore_log_blocks.attr,
&tune_attr_log_flush_secs.attr,
&tune_attr_quota_warn_period.attr,
@@ -426,7 +394,6 @@ static struct attribute *tune_attrs[] = {
&tune_attr_statfs_quantum.attr,
&tune_attr_recoverd_secs.attr,
&tune_attr_logd_secs.attr,
- &tune_attr_quotad_secs.attr,
&tune_attr_quota_scale.attr,
&tune_attr_new_files_jdata.attr,
NULL,
@@ -437,11 +404,6 @@ static struct attribute_group lockstruct_group = {
.attrs = lockstruct_attrs,
};
-static struct attribute_group counters_group = {
- .name = "counters",
- .attrs = counters_attrs,
-};
-
static struct attribute_group args_group = {
.name = "args",
.attrs = args_attrs,
@@ -466,13 +428,9 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
if (error)
goto fail_reg;
- error = sysfs_create_group(&sdp->sd_kobj, &counters_group);
- if (error)
- goto fail_lockstruct;
-
error = sysfs_create_group(&sdp->sd_kobj, &args_group);
if (error)
- goto fail_counters;
+ goto fail_lockstruct;
error = sysfs_create_group(&sdp->sd_kobj, &tune_group);
if (error)
@@ -483,8 +441,6 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
fail_args:
sysfs_remove_group(&sdp->sd_kobj, &args_group);
-fail_counters:
- sysfs_remove_group(&sdp->sd_kobj, &counters_group);
fail_lockstruct:
sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group);
fail_reg:
@@ -498,16 +454,27 @@ void gfs2_sys_fs_del(struct gfs2_sbd *sdp)
{
sysfs_remove_group(&sdp->sd_kobj, &tune_group);
sysfs_remove_group(&sdp->sd_kobj, &args_group);
- sysfs_remove_group(&sdp->sd_kobj, &counters_group);
sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group);
kobject_put(&sdp->sd_kobj);
}
+static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
+ struct kobj_uevent_env *env)
+{
+ struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
+ add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name);
+ add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name);
+ return 0;
+}
+
+static struct kset_uevent_ops gfs2_uevent_ops = {
+ .uevent = gfs2_uevent,
+};
+
+
int gfs2_sys_init(void)
{
- gfs2_sys_margs = NULL;
- spin_lock_init(&gfs2_sys_margs_lock);
- gfs2_kset = kset_create_and_add("gfs2", NULL, fs_kobj);
+ gfs2_kset = kset_create_and_add("gfs2", &gfs2_uevent_ops, fs_kobj);
if (!gfs2_kset)
return -ENOMEM;
return 0;
@@ -515,7 +482,6 @@ int gfs2_sys_init(void)
void gfs2_sys_uninit(void)
{
- kfree(gfs2_sys_margs);
kset_unregister(gfs2_kset);
}
diff --git a/fs/gfs2/sys.h b/fs/gfs2/sys.h
index 1ca8cdac5304..e94560e836d7 100644
--- a/fs/gfs2/sys.h
+++ b/fs/gfs2/sys.h
@@ -13,10 +13,6 @@
#include <linux/spinlock.h>
struct gfs2_sbd;
-/* Allow args to be passed to GFS2 when using an initial ram disk */
-extern char *gfs2_sys_margs;
-extern spinlock_t gfs2_sys_margs_lock;
-
int gfs2_sys_fs_add(struct gfs2_sbd *sdp);
void gfs2_sys_fs_del(struct gfs2_sbd *sdp);
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index d31e355c61fb..374f50e95496 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -25,6 +25,7 @@ struct kmem_cache *gfs2_glock_cachep __read_mostly;
struct kmem_cache *gfs2_inode_cachep __read_mostly;
struct kmem_cache *gfs2_bufdata_cachep __read_mostly;
struct kmem_cache *gfs2_rgrpd_cachep __read_mostly;
+struct kmem_cache *gfs2_quotad_cachep __read_mostly;
void gfs2_assert_i(struct gfs2_sbd *sdp)
{
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index 7f48576289c9..33e96b0ce9ab 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -148,6 +148,7 @@ extern struct kmem_cache *gfs2_glock_cachep;
extern struct kmem_cache *gfs2_inode_cachep;
extern struct kmem_cache *gfs2_bufdata_cachep;
extern struct kmem_cache *gfs2_rgrpd_cachep;
+extern struct kmem_cache *gfs2_quotad_cachep;
static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt,
unsigned int *p)
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index c69b7ac75bf7..9435dda8f1e0 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -155,8 +155,8 @@ struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, int mode)
hfs_cat_build_key(sb, (btree_key *)&HFS_I(inode)->cat_key, dir->i_ino, name);
inode->i_ino = HFS_SB(sb)->next_id++;
inode->i_mode = mode;
- inode->i_uid = current->fsuid;
- inode->i_gid = current->fsgid;
+ inode->i_uid = current_fsuid();
+ inode->i_gid = current_fsgid();
inode->i_nlink = 1;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
HFS_I(inode)->flags = 0;
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 3c7c7637719c..c8b5acf4b0b7 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -210,8 +210,8 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
int tmp, token;
/* initialize the sb with defaults */
- hsb->s_uid = current->uid;
- hsb->s_gid = current->gid;
+ hsb->s_uid = current_uid();
+ hsb->s_gid = current_gid();
hsb->s_file_umask = 0133;
hsb->s_dir_umask = 0022;
hsb->s_type = hsb->s_creator = cpu_to_be32(0x3f3f3f3f); /* == '????' */
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index b207f0e6fc22..f105ee9e1cc4 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -296,8 +296,8 @@ struct inode *hfsplus_new_inode(struct super_block *sb, int mode)
inode->i_ino = HFSPLUS_SB(sb).next_cnid++;
inode->i_mode = mode;
- inode->i_uid = current->fsuid;
- inode->i_gid = current->fsgid;
+ inode->i_uid = current_fsuid();
+ inode->i_gid = current_fsgid();
inode->i_nlink = 1;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c
index 9699c56d323f..bab7f8d1bdfa 100644
--- a/fs/hfsplus/options.c
+++ b/fs/hfsplus/options.c
@@ -49,8 +49,8 @@ void hfsplus_fill_defaults(struct hfsplus_sb_info *opts)
opts->creator = HFSPLUS_DEF_CR_TYPE;
opts->type = HFSPLUS_DEF_CR_TYPE;
opts->umask = current->fs->umask;
- opts->uid = current->uid;
- opts->gid = current->gid;
+ opts->uid = current_uid();
+ opts->gid = current_gid();
opts->part = -1;
opts->session = -1;
}
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index 6ae9011b95eb..2f34f8f2134b 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -81,7 +81,7 @@ extern int do_rmdir(const char *file);
extern int do_mknod(const char *file, int mode, unsigned int major,
unsigned int minor);
extern int link_file(const char *from, const char *to);
-extern int do_readlink(char *file, char *buf, int size);
+extern int hostfs_do_readlink(char *file, char *buf, int size);
extern int rename_file(char *from, char *to);
extern int do_statfs(char *root, long *bsize_out, long long *blocks_out,
long long *bfree_out, long long *bavail_out,
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 7f34f4385de0..3a31451ac170 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -168,7 +168,7 @@ static char *follow_link(char *link)
if (name == NULL)
goto out;
- n = do_readlink(link, name, len);
+ n = hostfs_do_readlink(link, name, len);
if (n < len)
break;
len *= 2;
@@ -943,7 +943,7 @@ int hostfs_link_readpage(struct file *file, struct page *page)
name = inode_name(page->mapping->host, 0);
if (name == NULL)
return -ENOMEM;
- err = do_readlink(name, buffer, PAGE_CACHE_SIZE);
+ err = hostfs_do_readlink(name, buffer, PAGE_CACHE_SIZE);
kfree(name);
if (err == PAGE_CACHE_SIZE)
err = -E2BIG;
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index 53fd0a67c11a..b79424f93282 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -377,7 +377,7 @@ int link_file(const char *to, const char *from)
return 0;
}
-int do_readlink(char *file, char *buf, int size)
+int hostfs_do_readlink(char *file, char *buf, int size)
{
int n;
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 10783f3d265a..b649232dde97 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -92,11 +92,11 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
inc_nlink(dir);
insert_inode_hash(result);
- if (result->i_uid != current->fsuid ||
- result->i_gid != current->fsgid ||
+ if (result->i_uid != current_fsuid() ||
+ result->i_gid != current_fsgid() ||
result->i_mode != (mode | S_IFDIR)) {
- result->i_uid = current->fsuid;
- result->i_gid = current->fsgid;
+ result->i_uid = current_fsuid();
+ result->i_gid = current_fsgid();
result->i_mode = mode | S_IFDIR;
hpfs_write_inode_nolock(result);
}
@@ -184,11 +184,11 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
insert_inode_hash(result);
- if (result->i_uid != current->fsuid ||
- result->i_gid != current->fsgid ||
+ if (result->i_uid != current_fsuid() ||
+ result->i_gid != current_fsgid() ||
result->i_mode != (mode | S_IFREG)) {
- result->i_uid = current->fsuid;
- result->i_gid = current->fsgid;
+ result->i_uid = current_fsuid();
+ result->i_gid = current_fsgid();
result->i_mode = mode | S_IFREG;
hpfs_write_inode_nolock(result);
}
@@ -247,8 +247,8 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t
result->i_mtime.tv_nsec = 0;
result->i_atime.tv_nsec = 0;
hpfs_i(result)->i_ea_size = 0;
- result->i_uid = current->fsuid;
- result->i_gid = current->fsgid;
+ result->i_uid = current_fsuid();
+ result->i_gid = current_fsgid();
result->i_nlink = 1;
result->i_size = 0;
result->i_blocks = 1;
@@ -325,8 +325,8 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
result->i_atime.tv_nsec = 0;
hpfs_i(result)->i_ea_size = 0;
result->i_mode = S_IFLNK | 0777;
- result->i_uid = current->fsuid;
- result->i_gid = current->fsgid;
+ result->i_uid = current_fsuid();
+ result->i_gid = current_fsgid();
result->i_blocks = 1;
result->i_nlink = 1;
result->i_size = strlen(symlink);
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 29ad461d568f..0d049b8919c4 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -475,8 +475,8 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
init_MUTEX(&sbi->hpfs_creation_de);
- uid = current->uid;
- gid = current->gid;
+ uid = current_uid();
+ gid = current_gid();
umask = current->fs->umask;
lowercase = 0;
conv = CONV_BINARY;
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 2b3d1828db99..b278f7f52024 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -426,6 +426,7 @@ static int file_mode(int fmode)
static int hppfs_open(struct inode *inode, struct file *file)
{
+ const struct cred *cred = file->f_cred;
struct hppfs_private *data;
struct vfsmount *proc_mnt;
struct dentry *proc_dentry;
@@ -446,7 +447,7 @@ static int hppfs_open(struct inode *inode, struct file *file)
/* XXX This isn't closed anywhere */
data->proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt),
- file_mode(file->f_mode));
+ file_mode(file->f_mode), cred);
err = PTR_ERR(data->proc_file);
if (IS_ERR(data->proc_file))
goto out_free1;
@@ -489,6 +490,7 @@ static int hppfs_open(struct inode *inode, struct file *file)
static int hppfs_dir_open(struct inode *inode, struct file *file)
{
+ const struct cred *cred = file->f_cred;
struct hppfs_private *data;
struct vfsmount *proc_mnt;
struct dentry *proc_dentry;
@@ -502,7 +504,7 @@ static int hppfs_dir_open(struct inode *inode, struct file *file)
proc_dentry = HPPFS_I(inode)->proc_dentry;
proc_mnt = inode->i_sb->s_fs_info;
data->proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt),
- file_mode(file->f_mode));
+ file_mode(file->f_mode), cred);
err = PTR_ERR(data->proc_file);
if (IS_ERR(data->proc_file))
goto out_free;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 61edc701b0e6..7d479ce3aceb 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -551,9 +551,9 @@ static int hugetlbfs_mknod(struct inode *dir,
if (S_ISDIR(mode))
mode |= S_ISGID;
} else {
- gid = current->fsgid;
+ gid = current_fsgid();
}
- inode = hugetlbfs_get_inode(dir->i_sb, current->fsuid, gid, mode, dev);
+ inode = hugetlbfs_get_inode(dir->i_sb, current_fsuid(), gid, mode, dev);
if (inode) {
dir->i_ctime = dir->i_mtime = CURRENT_TIME;
d_instantiate(dentry, inode);
@@ -586,9 +586,9 @@ static int hugetlbfs_symlink(struct inode *dir,
if (dir->i_mode & S_ISGID)
gid = dir->i_gid;
else
- gid = current->fsgid;
+ gid = current_fsgid();
- inode = hugetlbfs_get_inode(dir->i_sb, current->fsuid,
+ inode = hugetlbfs_get_inode(dir->i_sb, current_fsuid(),
gid, S_IFLNK|S_IRWXUGO, 0);
if (inode) {
int l = strlen(symname)+1;
@@ -854,8 +854,8 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
config.nr_blocks = -1; /* No limit on size by default */
config.nr_inodes = -1; /* No limit on number of inodes by default */
- config.uid = current->fsuid;
- config.gid = current->fsgid;
+ config.uid = current_fsuid();
+ config.gid = current_fsgid();
config.mode = 0755;
config.hstate = &default_hstate;
ret = hugetlbfs_parse_options(data, &config);
@@ -951,6 +951,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size)
struct inode *inode;
struct dentry *dentry, *root;
struct qstr quick_string;
+ struct user_struct *user = current_user();
if (!hugetlbfs_vfsmount)
return ERR_PTR(-ENOENT);
@@ -958,7 +959,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size)
if (!can_do_hugetlb_shm())
return ERR_PTR(-EPERM);
- if (!user_shm_lock(size, current->user))
+ if (!user_shm_lock(size, user))
return ERR_PTR(-ENOMEM);
root = hugetlbfs_vfsmount->mnt_root;
@@ -970,8 +971,8 @@ struct file *hugetlb_file_setup(const char *name, size_t size)
goto out_shm_unlock;
error = -ENOSPC;
- inode = hugetlbfs_get_inode(root->d_sb, current->fsuid,
- current->fsgid, S_IFREG | S_IRWXUGO, 0);
+ inode = hugetlbfs_get_inode(root->d_sb, current_fsuid(),
+ current_fsgid(), S_IFREG | S_IRWXUGO, 0);
if (!inode)
goto out_dentry;
@@ -998,7 +999,7 @@ out_inode:
out_dentry:
dput(dentry);
out_shm_unlock:
- user_shm_unlock(size, current->user);
+ user_shm_unlock(size, user);
return ERR_PTR(error);
}
diff --git a/fs/inode.c b/fs/inode.c
index 0487ddba1397..aab7feb17ad6 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -108,84 +108,100 @@ static void wake_up_inode(struct inode *inode)
wake_up_bit(&inode->i_state, __I_LOCK);
}
-static struct inode *alloc_inode(struct super_block *sb)
+/**
+ * inode_init_always - perform inode structure intialisation
+ * @sb - superblock inode belongs to.
+ * @inode - inode to initialise
+ *
+ * These are initializations that need to be done on every inode
+ * allocation as the fields are not initialised by slab allocation.
+ */
+struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
{
static const struct address_space_operations empty_aops;
static struct inode_operations empty_iops;
static const struct file_operations empty_fops;
- struct inode *inode;
- if (sb->s_op->alloc_inode)
- inode = sb->s_op->alloc_inode(sb);
- else
- inode = (struct inode *) kmem_cache_alloc(inode_cachep, GFP_KERNEL);
-
- if (inode) {
- struct address_space * const mapping = &inode->i_data;
-
- inode->i_sb = sb;
- inode->i_blkbits = sb->s_blocksize_bits;
- inode->i_flags = 0;
- atomic_set(&inode->i_count, 1);
- inode->i_op = &empty_iops;
- inode->i_fop = &empty_fops;
- inode->i_nlink = 1;
- atomic_set(&inode->i_writecount, 0);
- inode->i_size = 0;
- inode->i_blocks = 0;
- inode->i_bytes = 0;
- inode->i_generation = 0;
+ struct address_space * const mapping = &inode->i_data;
+
+ inode->i_sb = sb;
+ inode->i_blkbits = sb->s_blocksize_bits;
+ inode->i_flags = 0;
+ atomic_set(&inode->i_count, 1);
+ inode->i_op = &empty_iops;
+ inode->i_fop = &empty_fops;
+ inode->i_nlink = 1;
+ atomic_set(&inode->i_writecount, 0);
+ inode->i_size = 0;
+ inode->i_blocks = 0;
+ inode->i_bytes = 0;
+ inode->i_generation = 0;
#ifdef CONFIG_QUOTA
- memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
+ memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
#endif
- inode->i_pipe = NULL;
- inode->i_bdev = NULL;
- inode->i_cdev = NULL;
- inode->i_rdev = 0;
- inode->dirtied_when = 0;
- if (security_inode_alloc(inode)) {
- if (inode->i_sb->s_op->destroy_inode)
- inode->i_sb->s_op->destroy_inode(inode);
- else
- kmem_cache_free(inode_cachep, (inode));
- return NULL;
- }
+ inode->i_pipe = NULL;
+ inode->i_bdev = NULL;
+ inode->i_cdev = NULL;
+ inode->i_rdev = 0;
+ inode->dirtied_when = 0;
+ if (security_inode_alloc(inode)) {
+ if (inode->i_sb->s_op->destroy_inode)
+ inode->i_sb->s_op->destroy_inode(inode);
+ else
+ kmem_cache_free(inode_cachep, (inode));
+ return NULL;
+ }
- spin_lock_init(&inode->i_lock);
- lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
+ spin_lock_init(&inode->i_lock);
+ lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
- mutex_init(&inode->i_mutex);
- lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key);
+ mutex_init(&inode->i_mutex);
+ lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key);
- init_rwsem(&inode->i_alloc_sem);
- lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key);
+ init_rwsem(&inode->i_alloc_sem);
+ lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key);
- mapping->a_ops = &empty_aops;
- mapping->host = inode;
- mapping->flags = 0;
- mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE);
- mapping->assoc_mapping = NULL;
- mapping->backing_dev_info = &default_backing_dev_info;
- mapping->writeback_index = 0;
+ mapping->a_ops = &empty_aops;
+ mapping->host = inode;
+ mapping->flags = 0;
+ mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE);
+ mapping->assoc_mapping = NULL;
+ mapping->backing_dev_info = &default_backing_dev_info;
+ mapping->writeback_index = 0;
- /*
- * If the block_device provides a backing_dev_info for client
- * inodes then use that. Otherwise the inode share the bdev's
- * backing_dev_info.
- */
- if (sb->s_bdev) {
- struct backing_dev_info *bdi;
+ /*
+ * If the block_device provides a backing_dev_info for client
+ * inodes then use that. Otherwise the inode share the bdev's
+ * backing_dev_info.
+ */
+ if (sb->s_bdev) {
+ struct backing_dev_info *bdi;
- bdi = sb->s_bdev->bd_inode_backing_dev_info;
- if (!bdi)
- bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
- mapping->backing_dev_info = bdi;
- }
- inode->i_private = NULL;
- inode->i_mapping = mapping;
+ bdi = sb->s_bdev->bd_inode_backing_dev_info;
+ if (!bdi)
+ bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
+ mapping->backing_dev_info = bdi;
}
+ inode->i_private = NULL;
+ inode->i_mapping = mapping;
+
return inode;
}
+EXPORT_SYMBOL(inode_init_always);
+
+static struct inode *alloc_inode(struct super_block *sb)
+{
+ struct inode *inode;
+
+ if (sb->s_op->alloc_inode)
+ inode = sb->s_op->alloc_inode(sb);
+ else
+ inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL);
+
+ if (inode)
+ return inode_init_always(sb, inode);
+ return NULL;
+}
void destroy_inode(struct inode *inode)
{
@@ -196,6 +212,7 @@ void destroy_inode(struct inode *inode)
else
kmem_cache_free(inode_cachep, (inode));
}
+EXPORT_SYMBOL(destroy_inode);
/*
@@ -534,6 +551,49 @@ repeat:
return node ? inode : NULL;
}
+static unsigned long hash(struct super_block *sb, unsigned long hashval)
+{
+ unsigned long tmp;
+
+ tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
+ L1_CACHE_BYTES;
+ tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS);
+ return tmp & I_HASHMASK;
+}
+
+static inline void
+__inode_add_to_lists(struct super_block *sb, struct hlist_head *head,
+ struct inode *inode)
+{
+ inodes_stat.nr_inodes++;
+ list_add(&inode->i_list, &inode_in_use);
+ list_add(&inode->i_sb_list, &sb->s_inodes);
+ if (head)
+ hlist_add_head(&inode->i_hash, head);
+}
+
+/**
+ * inode_add_to_lists - add a new inode to relevant lists
+ * @sb - superblock inode belongs to.
+ * @inode - inode to mark in use
+ *
+ * When an inode is allocated it needs to be accounted for, added to the in use
+ * list, the owning superblock and the inode hash. This needs to be done under
+ * the inode_lock, so export a function to do this rather than the inode lock
+ * itself. We calculate the hash list to add to here so it is all internal
+ * which requires the caller to have already set up the inode number in the
+ * inode to add.
+ */
+void inode_add_to_lists(struct super_block *sb, struct inode *inode)
+{
+ struct hlist_head *head = inode_hashtable + hash(sb, inode->i_ino);
+
+ spin_lock(&inode_lock);
+ __inode_add_to_lists(sb, head, inode);
+ spin_unlock(&inode_lock);
+}
+EXPORT_SYMBOL_GPL(inode_add_to_lists);
+
/**
* new_inode - obtain an inode
* @sb: superblock
@@ -561,9 +621,7 @@ struct inode *new_inode(struct super_block *sb)
inode = alloc_inode(sb);
if (inode) {
spin_lock(&inode_lock);
- inodes_stat.nr_inodes++;
- list_add(&inode->i_list, &inode_in_use);
- list_add(&inode->i_sb_list, &sb->s_inodes);
+ __inode_add_to_lists(sb, NULL, inode);
inode->i_ino = ++last_ino;
inode->i_state = 0;
spin_unlock(&inode_lock);
@@ -622,10 +680,7 @@ static struct inode * get_new_inode(struct super_block *sb, struct hlist_head *h
if (set(inode, data))
goto set_failed;
- inodes_stat.nr_inodes++;
- list_add(&inode->i_list, &inode_in_use);
- list_add(&inode->i_sb_list, &sb->s_inodes);
- hlist_add_head(&inode->i_hash, head);
+ __inode_add_to_lists(sb, head, inode);
inode->i_state = I_LOCK|I_NEW;
spin_unlock(&inode_lock);
@@ -671,10 +726,7 @@ static struct inode * get_new_inode_fast(struct super_block *sb, struct hlist_he
old = find_inode_fast(sb, head, ino);
if (!old) {
inode->i_ino = ino;
- inodes_stat.nr_inodes++;
- list_add(&inode->i_list, &inode_in_use);
- list_add(&inode->i_sb_list, &sb->s_inodes);
- hlist_add_head(&inode->i_hash, head);
+ __inode_add_to_lists(sb, head, inode);
inode->i_state = I_LOCK|I_NEW;
spin_unlock(&inode_lock);
@@ -698,16 +750,6 @@ static struct inode * get_new_inode_fast(struct super_block *sb, struct hlist_he
return inode;
}
-static unsigned long hash(struct super_block *sb, unsigned long hashval)
-{
- unsigned long tmp;
-
- tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
- L1_CACHE_BYTES;
- tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS);
- return tmp & I_HASHMASK;
-}
-
/**
* iunique - get a unique inode number
* @sb: superblock
@@ -1292,6 +1334,7 @@ int inode_wait(void *word)
schedule();
return 0;
}
+EXPORT_SYMBOL(inode_wait);
/*
* If we try to find an inode in the inode hash while it is being
@@ -1364,6 +1407,128 @@ static int __init set_ihash_entries(char *str)
__setup("ihash_entries=", set_ihash_entries);
/*
+ * Obtain a refcount on a list of struct inodes pointed to by v. If the
+ * inode is in the process of being freed then zap the v[] entry so that
+ * we skip the freeing attempts later.
+ *
+ * This is a generic function for the ->get slab defrag callback.
+ */
+void *get_inodes(struct kmem_cache *s, int nr, void **v)
+{
+ int i;
+
+ spin_lock(&inode_lock);
+ for (i = 0; i < nr; i++) {
+ struct inode *inode = v[i];
+
+ if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))
+ v[i] = NULL;
+ else
+ __iget(inode);
+ }
+ spin_unlock(&inode_lock);
+ return NULL;
+}
+EXPORT_SYMBOL(get_inodes);
+
+/*
+ * Function for filesystems that embedd struct inode into their own
+ * fs inode. The offset is the offset of the struct inode in the fs inode.
+ *
+ * The function adds to the pointers in v[] in order to make them point to
+ * struct inode. Then get_inodes() is used to get the refcount.
+ * The converted v[] pointers can then also be passed to the kick() callback
+ * without further processing.
+ */
+void *fs_get_inodes(struct kmem_cache *s, int nr, void **v,
+ unsigned long offset)
+{
+ int i;
+
+ for (i = 0; i < nr; i++)
+ v[i] += offset;
+
+ return get_inodes(s, nr, v);
+}
+EXPORT_SYMBOL(fs_get_inodes);
+
+/*
+ * Generic callback function slab defrag ->kick methods. Takes the
+ * array with inodes where we obtained refcounts using fs_get_inodes()
+ * or get_inodes() and tries to free them.
+ */
+void kick_inodes(struct kmem_cache *s, int nr, void **v, void *private)
+{
+ struct inode *inode;
+ int i;
+ int abort = 0;
+ LIST_HEAD(freeable);
+ int active;
+
+ for (i = 0; i < nr; i++) {
+ inode = v[i];
+ if (!inode)
+ continue;
+
+ if (inode_has_buffers(inode) || inode->i_data.nrpages) {
+ if (remove_inode_buffers(inode))
+ /*
+ * Should we really be doing this? Or
+ * limit the writeback here to only a few pages?
+ *
+ * Possibly an expensive operation but we
+ * cannot reclaim the inode if the pages
+ * are still present.
+ */
+ invalidate_mapping_pages(&inode->i_data,
+ 0, -1);
+ }
+
+ /* Invalidate children and dentry */
+ if (S_ISDIR(inode->i_mode)) {
+ struct dentry *d = d_find_alias(inode);
+
+ if (d) {
+ d_invalidate(d);
+ dput(d);
+ }
+ }
+
+ if (inode->i_state & I_DIRTY)
+ write_inode_now(inode, 1);
+
+ d_prune_aliases(inode);
+ }
+
+ mutex_lock(&iprune_mutex);
+ for (i = 0; i < nr; i++) {
+ inode = v[i];
+
+ if (!inode)
+ /* inode is alrady being freed */
+ continue;
+
+ active = inode->i_sb->s_flags & MS_ACTIVE;
+ iput(inode);
+ if (abort || !active)
+ continue;
+
+ spin_lock(&inode_lock);
+ abort = !can_unuse(inode);
+
+ if (!abort) {
+ list_move(&inode->i_list, &freeable);
+ inode->i_state |= I_FREEING;
+ inodes_stat.nr_unused--;
+ }
+ spin_unlock(&inode_lock);
+ }
+ dispose_list(&freeable);
+ mutex_unlock(&iprune_mutex);
+}
+EXPORT_SYMBOL(kick_inodes);
+
+/*
* Initialize the waitqueues and inode hash table.
*/
void __init inode_init_early(void)
@@ -1402,6 +1567,7 @@ void __init inode_init(void)
SLAB_MEM_SPREAD),
init_once);
register_shrinker(&icache_shrinker);
+ kmem_cache_setup_defrag(inode_cachep, get_inodes, kick_inodes);
/* Hash may have been set up in inode_init_early */
if (!hashdist)
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index d367e9b92862..e2425bbd871f 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -601,7 +601,7 @@ asmlinkage long sys_inotify_init1(int flags)
goto out_put_fd;
}
- user = get_uid(current->user);
+ user = get_current_user();
if (unlikely(atomic_read(&user->inotify_devs) >=
inotify_max_user_instances)) {
ret = -EMFILE;
diff --git a/fs/internal.h b/fs/internal.h
index 80aa9a023372..53af885f1732 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -10,6 +10,7 @@
*/
struct super_block;
+struct linux_binprm;
/*
* block_dev.c
@@ -40,6 +41,11 @@ static inline int sb_is_blkdev_sb(struct super_block *sb)
extern void __init chrdev_init(void);
/*
+ * exec.c
+ */
+extern void check_unsafe_exec(struct linux_binprm *);
+
+/*
* namespace.c
*/
extern int copy_mount_options(const void __user *, unsigned long *);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index d152856c371b..f9b5c2438c0e 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -231,7 +231,8 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
#define blk_to_logical(inode, blk) (blk << (inode)->i_blkbits)
#define logical_to_blk(inode, offset) (offset >> (inode)->i_blkbits);
-/*
+/**
+ * __generic_block_fiemap - FIEMAP for block based inodes (no locking)
* @inode - the inode to map
* @arg - the pointer to userspace where we copy everything to
* @get_block - the fs's get_block function
@@ -242,11 +243,15 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
*
* If it is possible to have data blocks beyond a hole past @inode->i_size, then
* please do not use this function, it will stop at the first unmapped block
- * beyond i_size
+ * beyond i_size.
+ *
+ * If you use this function directly, you need to do your own locking. Use
+ * generic_block_fiemap if you want the locking done for you.
*/
-int generic_block_fiemap(struct inode *inode,
- struct fiemap_extent_info *fieinfo, u64 start,
- u64 len, get_block_t *get_block)
+
+int __generic_block_fiemap(struct inode *inode,
+ struct fiemap_extent_info *fieinfo, u64 start,
+ u64 len, get_block_t *get_block)
{
struct buffer_head tmp;
unsigned int start_blk;
@@ -260,9 +265,6 @@ int generic_block_fiemap(struct inode *inode,
start_blk = logical_to_blk(inode, start);
- /* guard against change */
- mutex_lock(&inode->i_mutex);
-
length = (long long)min_t(u64, len, i_size_read(inode));
map_len = length;
@@ -334,14 +336,36 @@ int generic_block_fiemap(struct inode *inode,
cond_resched();
} while (1);
- mutex_unlock(&inode->i_mutex);
-
/* if ret is 1 then we just hit the end of the extent array */
if (ret == 1)
ret = 0;
return ret;
}
+EXPORT_SYMBOL(__generic_block_fiemap);
+
+/**
+ * generic_block_fiemap - FIEMAP for block based inodes
+ * @inode: The inode to map
+ * @fieinfo: The mapping information
+ * @start: The initial block to map
+ * @len: The length of the extect to attempt to map
+ * @get_block: The block mapping function for the fs
+ *
+ * Calls __generic_block_fiemap to map the inode, after taking
+ * the inode's mutex lock.
+ */
+
+int generic_block_fiemap(struct inode *inode,
+ struct fiemap_extent_info *fieinfo, u64 start,
+ u64 len, get_block_t *get_block)
+{
+ int ret;
+ mutex_lock(&inode->i_mutex);
+ ret = __generic_block_fiemap(inode, fieinfo, start, len, get_block);
+ mutex_unlock(&inode->i_mutex);
+ return ret;
+}
EXPORT_SYMBOL(generic_block_fiemap);
#endif /* CONFIG_BLOCK */
diff --git a/fs/ioprio.c b/fs/ioprio.c
index da3cc460d4df..3569e0ad86a2 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -31,10 +31,16 @@ static int set_task_ioprio(struct task_struct *task, int ioprio)
{
int err;
struct io_context *ioc;
+ const struct cred *cred = current_cred(), *tcred;
- if (task->uid != current->euid &&
- task->uid != current->uid && !capable(CAP_SYS_NICE))
+ rcu_read_lock();
+ tcred = __task_cred(task);
+ if (tcred->uid != cred->euid &&
+ tcred->uid != cred->uid && !capable(CAP_SYS_NICE)) {
+ rcu_read_unlock();
return -EPERM;
+ }
+ rcu_read_unlock();
err = security_task_setioprio(task, ioprio);
if (err)
@@ -123,7 +129,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
break;
case IOPRIO_WHO_USER:
if (!who)
- user = current->user;
+ user = current_user();
else
user = find_user(who);
@@ -131,7 +137,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
break;
do_each_thread(g, p) {
- if (p->uid != who)
+ if (__task_cred(p)->uid != who)
continue;
ret = set_task_ioprio(p, ioprio);
if (ret)
@@ -216,7 +222,7 @@ asmlinkage long sys_ioprio_get(int which, int who)
break;
case IOPRIO_WHO_USER:
if (!who)
- user = current->user;
+ user = current_user();
else
user = find_user(who);
@@ -224,7 +230,7 @@ asmlinkage long sys_ioprio_get(int which, int who)
break;
do_each_thread(g, p) {
- if (p->uid != user->uid)
+ if (__task_cred(p)->uid != user->uid)
continue;
tmpio = get_task_ioprio(p);
if (tmpio < 0)
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index ed6574bee51a..70022fd1c539 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -93,13 +93,13 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
return ERR_PTR(rc);
}
- inode->i_uid = current->fsuid;
+ inode->i_uid = current_fsuid();
if (parent->i_mode & S_ISGID) {
inode->i_gid = parent->i_gid;
if (S_ISDIR(mode))
mode |= S_ISGID;
} else
- inode->i_gid = current->fsgid;
+ inode->i_gid = current_fsgid();
/*
* New inodes need to save sane values on disk when
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 9fd8889097b7..e05d04416037 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -115,14 +115,14 @@ static void nlm_display_address(const struct sockaddr *sap,
snprintf(buf, len, "unspecified");
break;
case AF_INET:
- snprintf(buf, len, NIPQUAD_FMT, NIPQUAD(sin->sin_addr.s_addr));
+ snprintf(buf, len, "%pI4", &sin->sin_addr.s_addr);
break;
case AF_INET6:
if (ipv6_addr_v4mapped(&sin6->sin6_addr))
- snprintf(buf, len, NIPQUAD_FMT,
- NIPQUAD(sin6->sin6_addr.s6_addr32[3]));
+ snprintf(buf, len, "%pI4",
+ &sin6->sin6_addr.s6_addr32[3]);
else
- snprintf(buf, len, NIP6_FMT, NIP6(sin6->sin6_addr));
+ snprintf(buf, len, "%pI6", &sin6->sin6_addr);
break;
default:
snprintf(buf, len, "unsupported address family");
@@ -167,7 +167,8 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni)
continue;
if (host->h_server != ni->server)
continue;
- if (!nlm_cmp_addr(nlm_srcaddr(host), ni->src_sap))
+ if (ni->server &&
+ !nlm_cmp_addr(nlm_srcaddr(host), ni->src_sap))
continue;
/* Move to head of hash chain. */
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 4e7e958e8f67..ffd3461f75ef 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -179,7 +179,7 @@ static __be32 *xdr_encode_mon_name(__be32 *p, struct nsm_args *argp)
if (!nsm_use_hostnames) {
snprintf(buffer, XDR_ADDRBUF_LEN,
- NIPQUAD_FMT, NIPQUAD(argp->addr));
+ "%pI4", &argp->addr);
name = buffer;
}
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index c631a83931ce..c312df718911 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -62,6 +62,9 @@ static unsigned long nlm_timeout = LOCKD_DFLT_TIMEO;
static int nlm_udpport, nlm_tcpport;
int nsm_use_hostnames = 0;
+/* RLIM_NOFILE defaults to 1024. That seems like a reasonable default here. */
+static unsigned int nlm_max_connections = 1024;
+
/*
* Constants needed for the sysctl interface.
*/
@@ -143,6 +146,9 @@ lockd(void *vrqstp)
long timeout = MAX_SCHEDULE_TIMEOUT;
RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
+ /* update sv_maxconn if it has changed */
+ rqstp->rq_server->sv_maxconn = nlm_max_connections;
+
if (signalled()) {
flush_signals(current);
if (nlmsvc_ops) {
@@ -181,6 +187,7 @@ lockd(void *vrqstp)
}
flush_signals(current);
cancel_delayed_work_sync(&grace_period_end);
+ locks_end_grace(&lockd_manager);
if (nlmsvc_ops)
nlmsvc_invalidate_all();
nlm_shutdown_hosts();
@@ -275,6 +282,7 @@ int lockd_up(void)
}
svc_sock_update_bufs(serv);
+ serv->sv_maxconn = nlm_max_connections;
nlmsvc_task = kthread_run(lockd, nlmsvc_rqst, serv->sv_name);
if (IS_ERR(nlmsvc_task)) {
@@ -484,6 +492,7 @@ module_param_call(nlm_udpport, param_set_port, param_get_int,
module_param_call(nlm_tcpport, param_set_port, param_get_int,
&nlm_tcpport, 0644);
module_param(nsm_use_hostnames, bool, 0644);
+module_param(nlm_max_connections, uint, 0644);
/*
* Initialising and terminating the module.
diff --git a/fs/locks.c b/fs/locks.c
index 09062e3ff104..46a2e12f7d42 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1349,7 +1349,7 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
struct inode *inode = dentry->d_inode;
int error, rdlease_count = 0, wrlease_count = 0;
- if ((current->fsuid != inode->i_uid) && !capable(CAP_LEASE))
+ if ((current_fsuid() != inode->i_uid) && !capable(CAP_LEASE))
return -EACCES;
if (!S_ISREG(inode->i_mode))
return -EINVAL;
diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c
index 703cc35e04b9..3aebe322271a 100644
--- a/fs/minix/bitmap.c
+++ b/fs/minix/bitmap.c
@@ -262,8 +262,8 @@ struct inode * minix_new_inode(const struct inode * dir, int * error)
iput(inode);
return NULL;
}
- inode->i_uid = current->fsuid;
- inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current->fsgid;
+ inode->i_uid = current_fsuid();
+ inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current_fsgid();
inode->i_ino = j;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
inode->i_blocks = 0;
diff --git a/fs/namei.c b/fs/namei.c
index 09ce58e49e72..af3783fff1de 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -186,7 +186,7 @@ int generic_permission(struct inode *inode, int mask,
mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
- if (current->fsuid == inode->i_uid)
+ if (current_fsuid() == inode->i_uid)
mode >>= 6;
else {
if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) {
@@ -441,7 +441,7 @@ static int exec_permission_lite(struct inode *inode)
if (inode->i_op && inode->i_op->permission)
return -EAGAIN;
- if (current->fsuid == inode->i_uid)
+ if (current_fsuid() == inode->i_uid)
mode >>= 6;
else if (in_group_p(inode->i_gid))
mode >>= 3;
@@ -1334,11 +1334,13 @@ static int user_path_parent(int dfd, const char __user *path,
*/
static inline int check_sticky(struct inode *dir, struct inode *inode)
{
+ uid_t fsuid = current_fsuid();
+
if (!(dir->i_mode & S_ISVTX))
return 0;
- if (inode->i_uid == current->fsuid)
+ if (inode->i_uid == fsuid)
return 0;
- if (dir->i_uid == current->fsuid)
+ if (dir->i_uid == fsuid)
return 0;
return !capable(CAP_FOWNER);
}
@@ -1378,7 +1380,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
if (IS_APPEND(dir))
return -EPERM;
if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
- IS_IMMUTABLE(victim->d_inode))
+ IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
return -EPERM;
if (isdir) {
if (!S_ISDIR(victim->d_inode->i_mode))
diff --git a/fs/namespace.c b/fs/namespace.c
index 65b3dc844c87..1c09cab8f7cf 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1176,7 +1176,7 @@ static int mount_is_safe(struct path *path)
if (S_ISLNK(path->dentry->d_inode->i_mode))
return -EPERM;
if (path->dentry->d_inode->i_mode & S_ISVTX) {
- if (current->uid != path->dentry->d_inode->i_uid)
+ if (current_uid() != path->dentry->d_inode->i_uid)
return -EPERM;
}
if (inode_permission(path->dentry->d_inode, MAY_WRITE))
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 3a97c95e1ca2..6d04e050c74e 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -40,10 +40,10 @@ ncp_get_fs_info(struct ncp_server * server, struct file *file,
struct inode *inode = file->f_path.dentry->d_inode;
struct ncp_fs_info info;
- if ((file_permission(file, MAY_WRITE) != 0)
- && (current->uid != server->m.mounted_uid)) {
+ if (file_permission(file, MAY_WRITE) != 0
+ && current_uid() != server->m.mounted_uid)
return -EACCES;
- }
+
if (copy_from_user(&info, arg, sizeof(info)))
return -EFAULT;
@@ -70,10 +70,10 @@ ncp_get_fs_info_v2(struct ncp_server * server, struct file *file,
struct inode *inode = file->f_path.dentry->d_inode;
struct ncp_fs_info_v2 info2;
- if ((file_permission(file, MAY_WRITE) != 0)
- && (current->uid != server->m.mounted_uid)) {
+ if (file_permission(file, MAY_WRITE) != 0
+ && current_uid() != server->m.mounted_uid)
return -EACCES;
- }
+
if (copy_from_user(&info2, arg, sizeof(info2)))
return -EFAULT;
@@ -141,10 +141,10 @@ ncp_get_compat_fs_info_v2(struct ncp_server * server, struct file *file,
struct inode *inode = file->f_path.dentry->d_inode;
struct compat_ncp_fs_info_v2 info2;
- if ((file_permission(file, MAY_WRITE) != 0)
- && (current->uid != server->m.mounted_uid)) {
+ if (file_permission(file, MAY_WRITE) != 0
+ && current_uid() != server->m.mounted_uid)
return -EACCES;
- }
+
if (copy_from_user(&info2, arg, sizeof(info2)))
return -EFAULT;
@@ -270,16 +270,17 @@ static int __ncp_ioctl(struct inode *inode, struct file *filp,
struct ncp_ioctl_request request;
char* bouncebuffer;
void __user *argp = (void __user *)arg;
+ uid_t uid = current_uid();
switch (cmd) {
#ifdef CONFIG_COMPAT
case NCP_IOC_NCPREQUEST_32:
#endif
case NCP_IOC_NCPREQUEST:
- if ((file_permission(filp, MAY_WRITE) != 0)
- && (current->uid != server->m.mounted_uid)) {
+ if (file_permission(filp, MAY_WRITE) != 0
+ && uid != server->m.mounted_uid)
return -EACCES;
- }
+
#ifdef CONFIG_COMPAT
if (cmd == NCP_IOC_NCPREQUEST_32) {
struct compat_ncp_ioctl_request request32;
@@ -356,10 +357,10 @@ static int __ncp_ioctl(struct inode *inode, struct file *filp,
case NCP_IOC_GETMOUNTUID16:
case NCP_IOC_GETMOUNTUID32:
case NCP_IOC_GETMOUNTUID64:
- if ((file_permission(filp, MAY_READ) != 0)
- && (current->uid != server->m.mounted_uid)) {
+ if (file_permission(filp, MAY_READ) != 0
+ && uid != server->m.mounted_uid)
return -EACCES;
- }
+
if (cmd == NCP_IOC_GETMOUNTUID16) {
u16 uid;
SET_UID(uid, server->m.mounted_uid);
@@ -380,11 +381,10 @@ static int __ncp_ioctl(struct inode *inode, struct file *filp,
{
struct ncp_setroot_ioctl sr;
- if ((file_permission(filp, MAY_READ) != 0)
- && (current->uid != server->m.mounted_uid))
- {
+ if (file_permission(filp, MAY_READ) != 0
+ && uid != server->m.mounted_uid)
return -EACCES;
- }
+
if (server->m.mounted_vol[0]) {
struct dentry* dentry = inode->i_sb->s_root;
@@ -408,6 +408,7 @@ static int __ncp_ioctl(struct inode *inode, struct file *filp,
return -EFAULT;
return 0;
}
+
case NCP_IOC_SETROOT:
{
struct ncp_setroot_ioctl sr;
@@ -455,11 +456,10 @@ static int __ncp_ioctl(struct inode *inode, struct file *filp,
#ifdef CONFIG_NCPFS_PACKET_SIGNING
case NCP_IOC_SIGN_INIT:
- if ((file_permission(filp, MAY_WRITE) != 0)
- && (current->uid != server->m.mounted_uid))
- {
+ if (file_permission(filp, MAY_WRITE) != 0
+ && uid != server->m.mounted_uid)
return -EACCES;
- }
+
if (argp) {
if (server->sign_wanted)
{
@@ -478,24 +478,22 @@ static int __ncp_ioctl(struct inode *inode, struct file *filp,
return 0;
case NCP_IOC_SIGN_WANTED:
- if ((file_permission(filp, MAY_READ) != 0)
- && (current->uid != server->m.mounted_uid))
- {
+ if (file_permission(filp, MAY_READ) != 0
+ && uid != server->m.mounted_uid)
return -EACCES;
- }
if (put_user(server->sign_wanted, (int __user *)argp))
return -EFAULT;
return 0;
+
case NCP_IOC_SET_SIGN_WANTED:
{
int newstate;
- if ((file_permission(filp, MAY_WRITE) != 0)
- && (current->uid != server->m.mounted_uid))
- {
+ if (file_permission(filp, MAY_WRITE) != 0
+ && uid != server->m.mounted_uid)
return -EACCES;
- }
+
/* get only low 8 bits... */
if (get_user(newstate, (unsigned char __user *)argp))
return -EFAULT;
@@ -512,11 +510,10 @@ static int __ncp_ioctl(struct inode *inode, struct file *filp,
#ifdef CONFIG_NCPFS_IOCTL_LOCKING
case NCP_IOC_LOCKUNLOCK:
- if ((file_permission(filp, MAY_WRITE) != 0)
- && (current->uid != server->m.mounted_uid))
- {
+ if (file_permission(filp, MAY_WRITE) != 0
+ && uid != server->m.mounted_uid)
return -EACCES;
- }
+
{
struct ncp_lock_ioctl rqdata;
@@ -585,9 +582,8 @@ outrel:
#ifdef CONFIG_COMPAT
case NCP_IOC_GETOBJECTNAME_32:
- if (current->uid != server->m.mounted_uid) {
+ if (uid != server->m.mounted_uid)
return -EACCES;
- }
{
struct compat_ncp_objectname_ioctl user;
size_t outl;
@@ -609,10 +605,10 @@ outrel:
return 0;
}
#endif
+
case NCP_IOC_GETOBJECTNAME:
- if (current->uid != server->m.mounted_uid) {
+ if (uid != server->m.mounted_uid)
return -EACCES;
- }
{
struct ncp_objectname_ioctl user;
size_t outl;
@@ -633,13 +629,13 @@ outrel:
return -EFAULT;
return 0;
}
+
#ifdef CONFIG_COMPAT
case NCP_IOC_SETOBJECTNAME_32:
#endif
case NCP_IOC_SETOBJECTNAME:
- if (current->uid != server->m.mounted_uid) {
+ if (uid != server->m.mounted_uid)
return -EACCES;
- }
{
struct ncp_objectname_ioctl user;
void* newname;
@@ -691,13 +687,13 @@ outrel:
kfree(oldname);
return 0;
}
+
#ifdef CONFIG_COMPAT
case NCP_IOC_GETPRIVATEDATA_32:
#endif
case NCP_IOC_GETPRIVATEDATA:
- if (current->uid != server->m.mounted_uid) {
+ if (uid != server->m.mounted_uid)
return -EACCES;
- }
{
struct ncp_privatedata_ioctl user;
size_t outl;
@@ -736,13 +732,13 @@ outrel:
return 0;
}
+
#ifdef CONFIG_COMPAT
case NCP_IOC_SETPRIVATEDATA_32:
#endif
case NCP_IOC_SETPRIVATEDATA:
- if (current->uid != server->m.mounted_uid) {
+ if (uid != server->m.mounted_uid)
return -EACCES;
- }
{
struct ncp_privatedata_ioctl user;
void* new;
@@ -794,9 +790,10 @@ outrel:
#endif /* CONFIG_NCPFS_NLS */
case NCP_IOC_SETDENTRYTTL:
- if ((file_permission(filp, MAY_WRITE) != 0) &&
- (current->uid != server->m.mounted_uid))
+ if (file_permission(filp, MAY_WRITE) != 0 &&
+ uid != server->m.mounted_uid)
return -EACCES;
+
{
u_int32_t user;
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 8478fc25daee..d74d16ce0d49 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -329,7 +329,7 @@ static int __init root_nfs_addr(void)
}
snprintf(nfs_data.hostname, sizeof(nfs_data.hostname),
- "%u.%u.%u.%u", NIPQUAD(servaddr));
+ "%pI4", &servaddr);
return 0;
}
@@ -421,8 +421,8 @@ static int __init root_nfs_getport(int program, int version, int proto)
{
struct sockaddr_in sin;
- printk(KERN_NOTICE "Looking up port of RPC %d/%d on %u.%u.%u.%u\n",
- program, version, NIPQUAD(servaddr));
+ printk(KERN_NOTICE "Looking up port of RPC %d/%d on %pI4\n",
+ program, version, &servaddr);
set_sockaddr(&sin, servaddr, 0);
return rpcb_getport_sync(&sin, program, version, proto);
}
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index f48db679a1c6..bb0313ac9e1f 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -462,14 +462,12 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss,
switch (sap->sa_family) {
case AF_INET: {
struct sockaddr_in *sin = (struct sockaddr_in *)sap;
- seq_printf(m, ",mountaddr=" NIPQUAD_FMT,
- NIPQUAD(sin->sin_addr.s_addr));
+ seq_printf(m, ",mountaddr=%pI4", &sin->sin_addr.s_addr);
break;
}
case AF_INET6: {
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
- seq_printf(m, ",mountaddr=" NIP6_FMT,
- NIP6(sin6->sin6_addr));
+ seq_printf(m, ",mountaddr=%pI6", &sin6->sin6_addr);
break;
}
default:
diff --git a/fs/nfsctl.c b/fs/nfsctl.c
index aed8145d9087..b1acbd6ab6fb 100644
--- a/fs/nfsctl.c
+++ b/fs/nfsctl.c
@@ -10,6 +10,8 @@
#include <linux/sunrpc/svc.h>
#include <linux/nfsd/nfsd.h>
#include <linux/nfsd/syscall.h>
+#include <linux/cred.h>
+#include <linux/sched.h>
#include <linux/linkage.h>
#include <linux/namei.h>
#include <linux/mount.h>
@@ -41,7 +43,8 @@ static struct file *do_open(char *name, int flags)
error = may_open(&nd, MAY_WRITE, FMODE_WRITE);
if (!error)
- return dentry_open(nd.path.dentry, nd.path.mnt, flags);
+ return dentry_open(nd.path.dentry, nd.path.mnt, flags,
+ current_cred());
path_put(&nd.path);
return ERR_PTR(error);
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 294992e9bf69..0184fe9b514c 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -27,53 +27,70 @@ int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp)
int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
{
- struct svc_cred cred = rqstp->rq_cred;
+ struct group_info *rqgi;
+ struct group_info *gi;
+ struct cred *new;
int i;
int flags = nfsexp_flags(rqstp, exp);
int ret;
+ /* discard any old override before preparing the new set */
+ revert_creds(get_cred(current->real_cred));
+ new = prepare_creds();
+ if (!new)
+ return -ENOMEM;
+
+ new->fsuid = rqstp->rq_cred.cr_uid;
+ new->fsgid = rqstp->rq_cred.cr_gid;
+
+ rqgi = rqstp->rq_cred.cr_group_info;
+
if (flags & NFSEXP_ALLSQUASH) {
- cred.cr_uid = exp->ex_anon_uid;
- cred.cr_gid = exp->ex_anon_gid;
- cred.cr_group_info = groups_alloc(0);
+ new->fsuid = exp->ex_anon_uid;
+ new->fsgid = exp->ex_anon_gid;
+ gi = groups_alloc(0);
} else if (flags & NFSEXP_ROOTSQUASH) {
- struct group_info *gi;
- if (!cred.cr_uid)
- cred.cr_uid = exp->ex_anon_uid;
- if (!cred.cr_gid)
- cred.cr_gid = exp->ex_anon_gid;
- gi = groups_alloc(cred.cr_group_info->ngroups);
- if (gi)
- for (i = 0; i < cred.cr_group_info->ngroups; i++) {
- if (!GROUP_AT(cred.cr_group_info, i))
- GROUP_AT(gi, i) = exp->ex_anon_gid;
- else
- GROUP_AT(gi, i) = GROUP_AT(cred.cr_group_info, i);
- }
- cred.cr_group_info = gi;
- } else
- get_group_info(cred.cr_group_info);
-
- if (cred.cr_uid != (uid_t) -1)
- current->fsuid = cred.cr_uid;
- else
- current->fsuid = exp->ex_anon_uid;
- if (cred.cr_gid != (gid_t) -1)
- current->fsgid = cred.cr_gid;
- else
- current->fsgid = exp->ex_anon_gid;
+ if (!new->fsuid)
+ new->fsuid = exp->ex_anon_uid;
+ if (!new->fsgid)
+ new->fsgid = exp->ex_anon_gid;
- if (!cred.cr_group_info)
- return -ENOMEM;
- ret = set_current_groups(cred.cr_group_info);
- put_group_info(cred.cr_group_info);
- if ((cred.cr_uid)) {
- current->cap_effective =
- cap_drop_nfsd_set(current->cap_effective);
+ gi = groups_alloc(rqgi->ngroups);
+ if (!gi)
+ goto oom;
+
+ for (i = 0; i < rqgi->ngroups; i++) {
+ if (!GROUP_AT(rqgi, i))
+ GROUP_AT(gi, i) = exp->ex_anon_gid;
+ else
+ GROUP_AT(gi, i) = GROUP_AT(rqgi, i);
+ }
} else {
- current->cap_effective =
- cap_raise_nfsd_set(current->cap_effective,
- current->cap_permitted);
+ gi = get_group_info(rqgi);
}
+
+ if (new->fsuid == (uid_t) -1)
+ new->fsuid = exp->ex_anon_uid;
+ if (new->fsgid == (gid_t) -1)
+ new->fsgid = exp->ex_anon_gid;
+
+ ret = set_groups(new, gi);
+ put_group_info(gi);
+ if (!ret)
+ goto error;
+
+ if (new->uid)
+ new->cap_effective = cap_drop_nfsd_set(new->cap_effective);
+ else
+ new->cap_effective = cap_raise_nfsd_set(new->cap_effective,
+ new->cap_permitted);
+ put_cred(override_creds(new));
+ return 0;
+
+oom:
+ ret = -ENOMEM;
+error:
+ abort_creds(new);
return ret;
}
+
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index bb93946ace22..d2681a12980f 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -54,20 +54,26 @@
static struct path rec_dir;
static int rec_dir_init = 0;
-static void
-nfs4_save_user(uid_t *saveuid, gid_t *savegid)
+static int
+nfs4_save_creds(const struct cred **original_creds)
{
- *saveuid = current->fsuid;
- *savegid = current->fsgid;
- current->fsuid = 0;
- current->fsgid = 0;
+ struct cred *new;
+
+ new = prepare_creds();
+ if (!new)
+ return -ENOMEM;
+
+ new->fsuid = 0;
+ new->fsgid = 0;
+ *original_creds = override_creds(new);
+ put_cred(new);
+ return 0;
}
static void
-nfs4_reset_user(uid_t saveuid, gid_t savegid)
+nfs4_reset_creds(const struct cred *original)
{
- current->fsuid = saveuid;
- current->fsgid = savegid;
+ revert_creds(original);
}
static void
@@ -110,9 +116,9 @@ nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname)
md5_to_hex(dname, cksum.data);
- kfree(cksum.data);
status = nfs_ok;
out:
+ kfree(cksum.data);
crypto_free_hash(desc.tfm);
out_no_tfm:
return status;
@@ -129,10 +135,9 @@ nfsd4_sync_rec_dir(void)
int
nfsd4_create_clid_dir(struct nfs4_client *clp)
{
+ const struct cred *original_cred;
char *dname = clp->cl_recdir;
struct dentry *dentry;
- uid_t uid;
- gid_t gid;
int status;
dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname);
@@ -140,7 +145,9 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
if (!rec_dir_init || clp->cl_firststate)
return 0;
- nfs4_save_user(&uid, &gid);
+ status = nfs4_save_creds(&original_cred);
+ if (status < 0)
+ return status;
/* lock the parent */
mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
@@ -168,7 +175,7 @@ out_unlock:
clp->cl_firststate = 1;
nfsd4_sync_rec_dir();
}
- nfs4_reset_user(uid, gid);
+ nfs4_reset_creds(original_cred);
dprintk("NFSD: nfsd4_create_clid_dir returns %d\n", status);
return status;
}
@@ -211,26 +218,28 @@ nfsd4_build_dentrylist(void *arg, const char *name, int namlen,
static int
nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f)
{
+ const struct cred *original_cred;
struct file *filp;
struct dentry_list_arg dla = {
.parent = dir,
};
struct list_head *dentries = &dla.dentries;
struct dentry_list *child;
- uid_t uid;
- gid_t gid;
int status;
if (!rec_dir_init)
return 0;
- nfs4_save_user(&uid, &gid);
+ status = nfs4_save_creds(&original_cred);
+ if (status < 0)
+ return status;
+ INIT_LIST_HEAD(dentries);
- filp = dentry_open(dget(dir), mntget(rec_dir.mnt), O_RDONLY);
+ filp = dentry_open(dget(dir), mntget(rec_dir.mnt), O_RDONLY,
+ current_cred());
status = PTR_ERR(filp);
if (IS_ERR(filp))
goto out;
- INIT_LIST_HEAD(dentries);
status = vfs_readdir(filp, nfsd4_build_dentrylist, &dla);
fput(filp);
while (!list_empty(dentries)) {
@@ -249,7 +258,7 @@ out:
dput(child->dentry);
kfree(child);
}
- nfs4_reset_user(uid, gid);
+ nfs4_reset_creds(original_cred);
return status;
}
@@ -311,8 +320,7 @@ out:
void
nfsd4_remove_clid_dir(struct nfs4_client *clp)
{
- uid_t uid;
- gid_t gid;
+ const struct cred *original_cred;
int status;
if (!rec_dir_init || !clp->cl_firststate)
@@ -322,9 +330,13 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
if (status)
goto out;
clp->cl_firststate = 0;
- nfs4_save_user(&uid, &gid);
+
+ status = nfs4_save_creds(&original_cred);
+ if (status < 0)
+ goto out;
+
status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1);
- nfs4_reset_user(uid, gid);
+ nfs4_reset_creds(original_cred);
if (status == 0)
nfsd4_sync_rec_dir();
mnt_drop_write(rec_dir.mnt);
@@ -401,16 +413,21 @@ nfsd4_recdir_load(void) {
void
nfsd4_init_recdir(char *rec_dirname)
{
- uid_t uid = 0;
- gid_t gid = 0;
- int status;
+ const struct cred *original_cred;
+ int status;
printk("NFSD: Using %s as the NFSv4 state recovery directory\n",
rec_dirname);
BUG_ON(rec_dir_init);
- nfs4_save_user(&uid, &gid);
+ status = nfs4_save_creds(&original_cred);
+ if (status < 0) {
+ printk("NFSD: Unable to change credentials to find recovery"
+ " directory: error %d\n",
+ status);
+ return;
+ }
status = kern_path(rec_dirname, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
&rec_dir);
@@ -420,7 +437,7 @@ nfsd4_init_recdir(char *rec_dirname)
if (!status)
rec_dir_init = 1;
- nfs4_reset_user(uid, gid);
+ nfs4_reset_creds(original_cred);
}
void
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index b0bebc552a11..6910374c8a6a 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -719,8 +719,8 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
status = nfserr_clid_inuse;
if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)
|| conf->cl_addr != sin->sin_addr.s_addr) {
- dprintk("NFSD: setclientid: string in use by client"
- "at %u.%u.%u.%u\n", NIPQUAD(conf->cl_addr));
+ dprintk("NFSD: setclientid: string in use by clientat %pI4\n",
+ &conf->cl_addr);
goto out;
}
}
@@ -2423,13 +2423,13 @@ static struct list_head lockstateid_hashtbl[STATEID_HASH_SIZE];
static struct nfs4_stateid *
find_stateid(stateid_t *stid, int flags)
{
- struct nfs4_stateid *local = NULL;
+ struct nfs4_stateid *local;
u32 st_id = stid->si_stateownerid;
u32 f_id = stid->si_fileid;
unsigned int hashval;
dprintk("NFSD: find_stateid flags 0x%x\n",flags);
- if ((flags & LOCK_STATE) || (flags & RD_STATE) || (flags & WR_STATE)) {
+ if (flags & (LOCK_STATE | RD_STATE | WR_STATE)) {
hashval = stateid_hashval(st_id, f_id);
list_for_each_entry(local, &lockstateid_hashtbl[hashval], st_hash) {
if ((local->st_stateid.si_stateownerid == st_id) &&
@@ -2437,7 +2437,8 @@ find_stateid(stateid_t *stid, int flags)
return local;
}
}
- if ((flags & OPEN_STATE) || (flags & RD_STATE) || (flags & WR_STATE)) {
+
+ if (flags & (OPEN_STATE | RD_STATE | WR_STATE)) {
hashval = stateid_hashval(st_id, f_id);
list_for_each_entry(local, &stateid_hashtbl[hashval], st_hash) {
if ((local->st_stateid.si_stateownerid == st_id) &&
@@ -3261,6 +3262,7 @@ nfs4_state_shutdown(void)
{
cancel_rearming_delayed_workqueue(laundry_wq, &laundromat_work);
destroy_workqueue(laundry_wq);
+ locks_end_grace(&nfsd4_manager);
nfs4_lock_state();
nfs4_release_reclaim();
__nfs4_state_shutdown();
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index e3f9783fdcf7..77d7b8c531a6 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -330,7 +330,7 @@ static ssize_t failover_unlock_ip(struct file *file, char *buf, size_t size)
return -EINVAL;
/* get ipv4 address */
- if (sscanf(fo_path, NIPQUAD_FMT "%c", &b1, &b2, &b3, &b4, &c) != 4)
+ if (sscanf(fo_path, "%u.%u.%u.%u%c", &b1, &b2, &b3, &b4, &c) != 4)
return -EINVAL;
if (b1 > 255 || b2 > 255 || b3 > 255 || b4 > 255)
return -EINVAL;
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index cd25d91895a1..019a8a20184d 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -186,9 +186,14 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
* access control settings being in effect, we cannot
* fix that case easily.
*/
- current->cap_effective =
- cap_raise_nfsd_set(current->cap_effective,
- current->cap_permitted);
+ struct cred *new = prepare_creds();
+ if (!new)
+ return nfserrno(-ENOMEM);
+ new->cap_effective =
+ cap_raise_nfsd_set(new->cap_effective,
+ new->cap_permitted);
+ put_cred(override_creds(new));
+ put_cred(new);
} else {
error = nfsd_setuser_and_check_port(rqstp, exp);
if (error)
@@ -253,14 +258,32 @@ out:
return error;
}
-/*
- * Perform sanity checks on the dentry in a client's file handle.
+/**
+ * fh_verify - filehandle lookup and access checking
+ * @rqstp: pointer to current rpc request
+ * @fhp: filehandle to be verified
+ * @type: expected type of object pointed to by filehandle
+ * @access: type of access needed to object
+ *
+ * Look up a dentry from the on-the-wire filehandle, check the client's
+ * access to the export, and set the current task's credentials.
+ *
+ * Regardless of success or failure of fh_verify(), fh_put() should be
+ * called on @fhp when the caller is finished with the filehandle.
+ *
+ * fh_verify() may be called multiple times on a given filehandle, for
+ * example, when processing an NFSv4 compound. The first call will look
+ * up a dentry using the on-the-wire filehandle. Subsequent calls will
+ * skip the lookup and just perform the other checks and possibly change
+ * the current task's credentials.
*
- * Note that the file handle dentry may need to be freed even after
- * an error return.
+ * @type specifies the type of object expected using one of the S_IF*
+ * constants defined in include/linux/stat.h. The caller may use zero
+ * to indicate that it doesn't care, or a negative integer to indicate
+ * that it expects something not of the given type.
*
- * This is only called at the start of an nfsproc call, so fhp points to
- * a svc_fh which is all 0 except for the over-the-wire file handle.
+ * @access is formed from the NFSD_MAY_* constants defined in
+ * include/linux/nfsd/nfsd.h.
*/
__be32
fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 4433c8f00163..d1c5f787b365 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -671,6 +671,7 @@ __be32
nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
int access, struct file **filp)
{
+ const struct cred *cred = current_cred();
struct dentry *dentry;
struct inode *inode;
int flags = O_RDONLY|O_LARGEFILE;
@@ -725,7 +726,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
DQUOT_INIT(inode);
}
*filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt),
- flags);
+ flags, cred);
if (IS_ERR(*filp))
host_err = PTR_ERR(*filp);
out_nfserr:
@@ -1169,7 +1170,7 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
* send along the gid on create when it tries to implement
* setgid directories via NFS:
*/
- if (current->fsuid != 0)
+ if (current_fsuid() != 0)
iap->ia_valid &= ~(ATTR_UID|ATTR_GID);
if (iap->ia_valid)
return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
@@ -2001,7 +2002,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
IS_APPEND(inode)? " append" : "",
__mnt_is_readonly(exp->ex_path.mnt)? " ro" : "");
dprintk(" owner %d/%d user %d/%d\n",
- inode->i_uid, inode->i_gid, current->fsuid, current->fsgid);
+ inode->i_uid, inode->i_gid, current_fsuid(), current_fsgid());
#endif
/* Normally we reject any write/sattr etc access on a read-only file
@@ -2044,7 +2045,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
* with NFSv3.
*/
if ((acc & NFSD_MAY_OWNER_OVERRIDE) &&
- inode->i_uid == current->fsuid)
+ inode->i_uid == current_fsuid())
return 0;
/* This assumes NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index 589dcdfdfe3c..7e4b361b755c 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -35,8 +35,14 @@ ocfs2-objs := \
sysfile.o \
uptodate.o \
ver.o \
+ quota_local.o \
+ quota_global.o \
xattr.o
+ifeq ($(CONFIG_OCFS2_FS_POSIX_ACL),y)
+ocfs2-objs += acl.o
+endif
+
ocfs2_stackglue-objs := stackglue.o
ocfs2_stack_o2cb-objs := stack_o2cb.o
ocfs2_stack_user-objs := stack_user.o
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
new file mode 100644
index 000000000000..12dfb44c22e5
--- /dev/null
+++ b/fs/ocfs2/acl.c
@@ -0,0 +1,479 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * acl.c
+ *
+ * Copyright (C) 2004, 2008 Oracle. All rights reserved.
+ *
+ * CREDITS:
+ * Lots of code in this file is copy from linux/fs/ext3/acl.c.
+ * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/string.h>
+
+#define MLOG_MASK_PREFIX ML_INODE
+#include <cluster/masklog.h>
+
+#include "ocfs2.h"
+#include "alloc.h"
+#include "dlmglue.h"
+#include "file.h"
+#include "ocfs2_fs.h"
+
+#include "xattr.h"
+#include "acl.h"
+
+/*
+ * Convert from xattr value to acl struct.
+ */
+static struct posix_acl *ocfs2_acl_from_xattr(const void *value, size_t size)
+{
+ int n, count;
+ struct posix_acl *acl;
+
+ if (!value)
+ return NULL;
+ if (size < sizeof(struct posix_acl_entry))
+ return ERR_PTR(-EINVAL);
+
+ count = size / sizeof(struct posix_acl_entry);
+ if (count < 0)
+ return ERR_PTR(-EINVAL);
+ if (count == 0)
+ return NULL;
+
+ acl = posix_acl_alloc(count, GFP_NOFS);
+ if (!acl)
+ return ERR_PTR(-ENOMEM);
+ for (n = 0; n < count; n++) {
+ struct ocfs2_acl_entry *entry =
+ (struct ocfs2_acl_entry *)value;
+
+ acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag);
+ acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm);
+ acl->a_entries[n].e_id = le32_to_cpu(entry->e_id);
+ value += sizeof(struct posix_acl_entry);
+
+ }
+ return acl;
+}
+
+/*
+ * Convert acl struct to xattr value.
+ */
+static void *ocfs2_acl_to_xattr(const struct posix_acl *acl, size_t *size)
+{
+ struct ocfs2_acl_entry *entry = NULL;
+ char *ocfs2_acl;
+ size_t n;
+
+ *size = acl->a_count * sizeof(struct posix_acl_entry);
+
+ ocfs2_acl = kmalloc(*size, GFP_NOFS);
+ if (!ocfs2_acl)
+ return ERR_PTR(-ENOMEM);
+
+ entry = (struct ocfs2_acl_entry *)ocfs2_acl;
+ for (n = 0; n < acl->a_count; n++, entry++) {
+ entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag);
+ entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
+ entry->e_id = cpu_to_le32(acl->a_entries[n].e_id);
+ }
+ return ocfs2_acl;
+}
+
+static struct posix_acl *ocfs2_get_acl_nolock(struct inode *inode,
+ int type,
+ struct buffer_head *di_bh)
+{
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ int name_index;
+ char *value = NULL;
+ struct posix_acl *acl;
+ int retval;
+
+ if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
+ return NULL;
+
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS;
+ break;
+ case ACL_TYPE_DEFAULT:
+ name_index = OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT;
+ break;
+ default:
+ return ERR_PTR(-EINVAL);
+ }
+
+ retval = ocfs2_xattr_get_nolock(inode, di_bh, name_index, "", NULL, 0);
+ if (retval > 0) {
+ value = kmalloc(retval, GFP_NOFS);
+ if (!value)
+ return ERR_PTR(-ENOMEM);
+ retval = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
+ "", value, retval);
+ }
+
+ if (retval > 0)
+ acl = ocfs2_acl_from_xattr(value, retval);
+ else if (retval == -ENODATA || retval == 0)
+ acl = NULL;
+ else
+ acl = ERR_PTR(retval);
+
+ kfree(value);
+
+ return acl;
+}
+
+
+/*
+ * Get posix acl.
+ */
+static struct posix_acl *ocfs2_get_acl(struct inode *inode, int type)
+{
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct buffer_head *di_bh = NULL;
+ struct posix_acl *acl;
+ int ret;
+
+ if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
+ return NULL;
+
+ ret = ocfs2_inode_lock(inode, &di_bh, 0);
+ if (ret < 0) {
+ mlog_errno(ret);
+ acl = ERR_PTR(ret);
+ return acl;
+ }
+
+ acl = ocfs2_get_acl_nolock(inode, type, di_bh);
+
+ ocfs2_inode_unlock(inode, 0);
+
+ brelse(di_bh);
+
+ return acl;
+}
+
+/*
+ * Set the access or default ACL of an inode.
+ */
+static int ocfs2_set_acl(handle_t *handle,
+ struct inode *inode,
+ struct buffer_head *di_bh,
+ int type,
+ struct posix_acl *acl,
+ struct ocfs2_alloc_context *meta_ac,
+ struct ocfs2_alloc_context *data_ac)
+{
+ int name_index;
+ void *value = NULL;
+ size_t size = 0;
+ int ret;
+
+ if (S_ISLNK(inode->i_mode))
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS;
+ if (acl) {
+ mode_t mode = inode->i_mode;
+ ret = posix_acl_equiv_mode(acl, &mode);
+ if (ret < 0)
+ return ret;
+ else {
+ inode->i_mode = mode;
+ if (ret == 0)
+ acl = NULL;
+ }
+ }
+ break;
+ case ACL_TYPE_DEFAULT:
+ name_index = OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT;
+ if (!S_ISDIR(inode->i_mode))
+ return acl ? -EACCES : 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (acl) {
+ value = ocfs2_acl_to_xattr(acl, &size);
+ if (IS_ERR(value))
+ return (int)PTR_ERR(value);
+ }
+
+ if (handle)
+ ret = ocfs2_xattr_set_handle(handle, inode, di_bh, name_index,
+ "", value, size, 0,
+ meta_ac, data_ac);
+ else
+ ret = ocfs2_xattr_set(inode, name_index, "", value, size, 0);
+
+ kfree(value);
+
+ return ret;
+}
+
+int ocfs2_check_acl(struct inode *inode, int mask)
+{
+ struct posix_acl *acl = ocfs2_get_acl(inode, ACL_TYPE_ACCESS);
+
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ if (acl) {
+ int ret = posix_acl_permission(inode, acl, mask);
+ posix_acl_release(acl);
+ return ret;
+ }
+
+ return -EAGAIN;
+}
+
+int ocfs2_acl_chmod(struct inode *inode)
+{
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct posix_acl *acl, *clone;
+ int ret;
+
+ if (S_ISLNK(inode->i_mode))
+ return -EOPNOTSUPP;
+
+ if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
+ return 0;
+
+ acl = ocfs2_get_acl(inode, ACL_TYPE_ACCESS);
+ if (IS_ERR(acl) || !acl)
+ return PTR_ERR(acl);
+ clone = posix_acl_clone(acl, GFP_KERNEL);
+ posix_acl_release(acl);
+ if (!clone)
+ return -ENOMEM;
+ ret = posix_acl_chmod_masq(clone, inode->i_mode);
+ if (!ret)
+ ret = ocfs2_set_acl(NULL, inode, NULL, ACL_TYPE_ACCESS,
+ clone, NULL, NULL);
+ posix_acl_release(clone);
+ return ret;
+}
+
+/*
+ * Initialize the ACLs of a new inode. If parent directory has default ACL,
+ * then clone to new inode. Called from ocfs2_mknod.
+ */
+int ocfs2_init_acl(handle_t *handle,
+ struct inode *inode,
+ struct inode *dir,
+ struct buffer_head *di_bh,
+ struct buffer_head *dir_bh,
+ struct ocfs2_alloc_context *meta_ac,
+ struct ocfs2_alloc_context *data_ac)
+{
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct posix_acl *acl = NULL;
+ int ret = 0;
+
+ if (!S_ISLNK(inode->i_mode)) {
+ if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
+ acl = ocfs2_get_acl_nolock(dir, ACL_TYPE_DEFAULT,
+ dir_bh);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ }
+ if (!acl)
+ inode->i_mode &= ~current->fs->umask;
+ }
+ if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) {
+ struct posix_acl *clone;
+ mode_t mode;
+
+ if (S_ISDIR(inode->i_mode)) {
+ ret = ocfs2_set_acl(handle, inode, di_bh,
+ ACL_TYPE_DEFAULT, acl,
+ meta_ac, data_ac);
+ if (ret)
+ goto cleanup;
+ }
+ clone = posix_acl_clone(acl, GFP_NOFS);
+ ret = -ENOMEM;
+ if (!clone)
+ goto cleanup;
+
+ mode = inode->i_mode;
+ ret = posix_acl_create_masq(clone, &mode);
+ if (ret >= 0) {
+ inode->i_mode = mode;
+ if (ret > 0) {
+ ret = ocfs2_set_acl(handle, inode,
+ di_bh, ACL_TYPE_ACCESS,
+ clone, meta_ac, data_ac);
+ }
+ }
+ posix_acl_release(clone);
+ }
+cleanup:
+ posix_acl_release(acl);
+ return ret;
+}
+
+static size_t ocfs2_xattr_list_acl_access(struct inode *inode,
+ char *list,
+ size_t list_len,
+ const char *name,
+ size_t name_len)
+{
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
+
+ if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
+ return 0;
+
+ if (list && size <= list_len)
+ memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
+ return size;
+}
+
+static size_t ocfs2_xattr_list_acl_default(struct inode *inode,
+ char *list,
+ size_t list_len,
+ const char *name,
+ size_t name_len)
+{
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
+
+ if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
+ return 0;
+
+ if (list && size <= list_len)
+ memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
+ return size;
+}
+
+static int ocfs2_xattr_get_acl(struct inode *inode,
+ int type,
+ void *buffer,
+ size_t size)
+{
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct posix_acl *acl;
+ int ret;
+
+ if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
+ return -EOPNOTSUPP;
+
+ acl = ocfs2_get_acl(inode, type);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ if (acl == NULL)
+ return -ENODATA;
+ ret = posix_acl_to_xattr(acl, buffer, size);
+ posix_acl_release(acl);
+
+ return ret;
+}
+
+static int ocfs2_xattr_get_acl_access(struct inode *inode,
+ const char *name,
+ void *buffer,
+ size_t size)
+{
+ if (strcmp(name, "") != 0)
+ return -EINVAL;
+ return ocfs2_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
+}
+
+static int ocfs2_xattr_get_acl_default(struct inode *inode,
+ const char *name,
+ void *buffer,
+ size_t size)
+{
+ if (strcmp(name, "") != 0)
+ return -EINVAL;
+ return ocfs2_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
+}
+
+static int ocfs2_xattr_set_acl(struct inode *inode,
+ int type,
+ const void *value,
+ size_t size)
+{
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct posix_acl *acl;
+ int ret = 0;
+
+ if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
+ return -EOPNOTSUPP;
+
+ if (!is_owner_or_cap(inode))
+ return -EPERM;
+
+ if (value) {
+ acl = posix_acl_from_xattr(value, size);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ else if (acl) {
+ ret = posix_acl_valid(acl);
+ if (ret)
+ goto cleanup;
+ }
+ } else
+ acl = NULL;
+
+ ret = ocfs2_set_acl(NULL, inode, NULL, type, acl, NULL, NULL);
+
+cleanup:
+ posix_acl_release(acl);
+ return ret;
+}
+
+static int ocfs2_xattr_set_acl_access(struct inode *inode,
+ const char *name,
+ const void *value,
+ size_t size,
+ int flags)
+{
+ if (strcmp(name, "") != 0)
+ return -EINVAL;
+ return ocfs2_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
+}
+
+static int ocfs2_xattr_set_acl_default(struct inode *inode,
+ const char *name,
+ const void *value,
+ size_t size,
+ int flags)
+{
+ if (strcmp(name, "") != 0)
+ return -EINVAL;
+ return ocfs2_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
+}
+
+struct xattr_handler ocfs2_xattr_acl_access_handler = {
+ .prefix = POSIX_ACL_XATTR_ACCESS,
+ .list = ocfs2_xattr_list_acl_access,
+ .get = ocfs2_xattr_get_acl_access,
+ .set = ocfs2_xattr_set_acl_access,
+};
+
+struct xattr_handler ocfs2_xattr_acl_default_handler = {
+ .prefix = POSIX_ACL_XATTR_DEFAULT,
+ .list = ocfs2_xattr_list_acl_default,
+ .get = ocfs2_xattr_get_acl_default,
+ .set = ocfs2_xattr_set_acl_default,
+};
diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h
new file mode 100644
index 000000000000..8f6389ed4da5
--- /dev/null
+++ b/fs/ocfs2/acl.h
@@ -0,0 +1,58 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * acl.h
+ *
+ * Copyright (C) 2004, 2008 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef OCFS2_ACL_H
+#define OCFS2_ACL_H
+
+#include <linux/posix_acl_xattr.h>
+
+struct ocfs2_acl_entry {
+ __le16 e_tag;
+ __le16 e_perm;
+ __le32 e_id;
+};
+
+#ifdef CONFIG_OCFS2_FS_POSIX_ACL
+
+extern int ocfs2_check_acl(struct inode *, int);
+extern int ocfs2_acl_chmod(struct inode *);
+extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *,
+ struct buffer_head *, struct buffer_head *,
+ struct ocfs2_alloc_context *,
+ struct ocfs2_alloc_context *);
+
+#else /* CONFIG_OCFS2_FS_POSIX_ACL*/
+
+#define ocfs2_check_acl NULL
+static inline int ocfs2_acl_chmod(struct inode *inode)
+{
+ return 0;
+}
+static inline int ocfs2_init_acl(handle_t *handle,
+ struct inode *inode,
+ struct inode *dir,
+ struct buffer_head *di_bh,
+ struct buffer_head *dir_bh,
+ struct ocfs2_alloc_context *meta_ac,
+ struct ocfs2_alloc_context *data_ac)
+{
+ return 0;
+}
+
+#endif /* CONFIG_OCFS2_FS_POSIX_ACL*/
+
+#endif /* OCFS2_ACL_H */
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 0cc2deb9394c..84a7bd4db5da 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -28,6 +28,7 @@
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/swap.h>
+#include <linux/quotaops.h>
#define MLOG_MASK_PREFIX ML_DISK_ALLOC
#include <cluster/masklog.h>
@@ -187,20 +188,12 @@ static int ocfs2_dinode_insert_check(struct inode *inode,
static int ocfs2_dinode_sanity_check(struct inode *inode,
struct ocfs2_extent_tree *et)
{
- int ret = 0;
- struct ocfs2_dinode *di;
+ struct ocfs2_dinode *di = et->et_object;
BUG_ON(et->et_ops != &ocfs2_dinode_et_ops);
+ BUG_ON(!OCFS2_IS_VALID_DINODE(di));
- di = et->et_object;
- if (!OCFS2_IS_VALID_DINODE(di)) {
- ret = -EIO;
- ocfs2_error(inode->i_sb,
- "Inode %llu has invalid path root",
- (unsigned long long)OCFS2_I(inode)->ip_blkno);
- }
-
- return ret;
+ return 0;
}
static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et)
@@ -686,6 +679,61 @@ struct ocfs2_merge_ctxt {
int c_split_covers_rec;
};
+static int ocfs2_validate_extent_block(struct super_block *sb,
+ struct buffer_head *bh)
+{
+ struct ocfs2_extent_block *eb =
+ (struct ocfs2_extent_block *)bh->b_data;
+
+ mlog(0, "Validating extent block %llu\n",
+ (unsigned long long)bh->b_blocknr);
+
+ if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
+ ocfs2_error(sb,
+ "Extent block #%llu has bad signature %.*s",
+ (unsigned long long)bh->b_blocknr, 7,
+ eb->h_signature);
+ return -EINVAL;
+ }
+
+ if (le64_to_cpu(eb->h_blkno) != bh->b_blocknr) {
+ ocfs2_error(sb,
+ "Extent block #%llu has an invalid h_blkno "
+ "of %llu",
+ (unsigned long long)bh->b_blocknr,
+ (unsigned long long)le64_to_cpu(eb->h_blkno));
+ return -EINVAL;
+ }
+
+ if (le32_to_cpu(eb->h_fs_generation) != OCFS2_SB(sb)->fs_generation) {
+ ocfs2_error(sb,
+ "Extent block #%llu has an invalid "
+ "h_fs_generation of #%u",
+ (unsigned long long)bh->b_blocknr,
+ le32_to_cpu(eb->h_fs_generation));
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno,
+ struct buffer_head **bh)
+{
+ int rc;
+ struct buffer_head *tmp = *bh;
+
+ rc = ocfs2_read_block(inode, eb_blkno, &tmp,
+ ocfs2_validate_extent_block);
+
+ /* If ocfs2_read_block() got us a new bh, pass it up. */
+ if (!rc && !*bh)
+ *bh = tmp;
+
+ return rc;
+}
+
+
/*
* How many free extents have we got before we need more meta data?
*/
@@ -705,8 +753,7 @@ int ocfs2_num_free_extents(struct ocfs2_super *osb,
last_eb_blk = ocfs2_et_get_last_eb_blk(et);
if (last_eb_blk) {
- retval = ocfs2_read_block(inode, last_eb_blk,
- &eb_bh);
+ retval = ocfs2_read_extent_block(inode, last_eb_blk, &eb_bh);
if (retval < 0) {
mlog_errno(retval);
goto bail;
@@ -908,11 +955,8 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
for(i = 0; i < new_blocks; i++) {
bh = new_eb_bhs[i];
eb = (struct ocfs2_extent_block *) bh->b_data;
- if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
- OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
- status = -EIO;
- goto bail;
- }
+ /* ocfs2_create_new_meta_bhs() should create it right! */
+ BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb));
eb_el = &eb->h_list;
status = ocfs2_journal_access(handle, inode, bh,
@@ -1052,11 +1096,8 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
}
eb = (struct ocfs2_extent_block *) new_eb_bh->b_data;
- if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
- OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
- status = -EIO;
- goto bail;
- }
+ /* ocfs2_create_new_meta_bhs() should create it right! */
+ BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb));
eb_el = &eb->h_list;
root_el = et->et_root_el;
@@ -1176,18 +1217,13 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb,
brelse(bh);
bh = NULL;
- status = ocfs2_read_block(inode, blkno, &bh);
+ status = ocfs2_read_extent_block(inode, blkno, &bh);
if (status < 0) {
mlog_errno(status);
goto bail;
}
eb = (struct ocfs2_extent_block *) bh->b_data;
- if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
- OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
- status = -EIO;
- goto bail;
- }
el = &eb->h_list;
if (le16_to_cpu(el->l_next_free_rec) <
@@ -1540,7 +1576,7 @@ static int __ocfs2_find_path(struct inode *inode,
brelse(bh);
bh = NULL;
- ret = ocfs2_read_block(inode, blkno, &bh);
+ ret = ocfs2_read_extent_block(inode, blkno, &bh);
if (ret) {
mlog_errno(ret);
goto out;
@@ -1548,11 +1584,6 @@ static int __ocfs2_find_path(struct inode *inode,
eb = (struct ocfs2_extent_block *) bh->b_data;
el = &eb->h_list;
- if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
- OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
- ret = -EIO;
- goto out;
- }
if (le16_to_cpu(el->l_next_free_rec) >
le16_to_cpu(el->l_count)) {
@@ -4097,8 +4128,15 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
le16_to_cpu(new_el->l_count)) {
bh = path_leaf_bh(left_path);
eb = (struct ocfs2_extent_block *)bh->b_data;
- OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb,
- eb);
+ ocfs2_error(inode->i_sb,
+ "Extent block #%llu has an "
+ "invalid l_next_free_rec of "
+ "%d. It should have "
+ "matched the l_count of %d",
+ (unsigned long long)le64_to_cpu(eb->h_blkno),
+ le16_to_cpu(new_el->l_next_free_rec),
+ le16_to_cpu(new_el->l_count));
+ status = -EINVAL;
goto out;
}
rec = &new_el->l_recs[
@@ -4147,8 +4185,12 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
if (le16_to_cpu(new_el->l_next_free_rec) <= 1) {
bh = path_leaf_bh(right_path);
eb = (struct ocfs2_extent_block *)bh->b_data;
- OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb,
- eb);
+ ocfs2_error(inode->i_sb,
+ "Extent block #%llu has an "
+ "invalid l_next_free_rec of %d",
+ (unsigned long long)le64_to_cpu(eb->h_blkno),
+ le16_to_cpu(new_el->l_next_free_rec));
+ status = -EINVAL;
goto out;
}
rec = &new_el->l_recs[1];
@@ -4294,7 +4336,9 @@ static int ocfs2_figure_insert_type(struct inode *inode,
* ocfs2_figure_insert_type() and ocfs2_add_branch()
* may want it later.
*/
- ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et), &bh);
+ ret = ocfs2_read_extent_block(inode,
+ ocfs2_et_get_last_eb_blk(et),
+ &bh);
if (ret) {
mlog_exit(ret);
goto out;
@@ -4760,20 +4804,15 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
if (path->p_tree_depth) {
struct ocfs2_extent_block *eb;
- ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et),
- &last_eb_bh);
+ ret = ocfs2_read_extent_block(inode,
+ ocfs2_et_get_last_eb_blk(et),
+ &last_eb_bh);
if (ret) {
mlog_exit(ret);
goto out;
}
eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
- if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
- OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
- ret = -EROFS;
- goto out;
- }
-
rightmost_el = &eb->h_list;
} else
rightmost_el = path_root_el(path);
@@ -4918,8 +4957,9 @@ static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et,
depth = path->p_tree_depth;
if (depth > 0) {
- ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et),
- &last_eb_bh);
+ ret = ocfs2_read_extent_block(inode,
+ ocfs2_et_get_last_eb_blk(et),
+ &last_eb_bh);
if (ret < 0) {
mlog_errno(ret);
goto out;
@@ -5255,6 +5295,78 @@ out:
return ret;
}
+int ocfs2_remove_btree_range(struct inode *inode,
+ struct ocfs2_extent_tree *et,
+ u32 cpos, u32 phys_cpos, u32 len,
+ struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+ int ret;
+ u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct inode *tl_inode = osb->osb_tl_inode;
+ handle_t *handle;
+ struct ocfs2_alloc_context *meta_ac = NULL;
+
+ ret = ocfs2_lock_allocators(inode, et, 0, 1, NULL, &meta_ac);
+ if (ret) {
+ mlog_errno(ret);
+ return ret;
+ }
+
+ mutex_lock(&tl_inode->i_mutex);
+
+ if (ocfs2_truncate_log_needs_flush(osb)) {
+ ret = __ocfs2_flush_truncate_log(osb);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto out;
+ }
+ }
+
+ handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb));
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ret = ocfs2_journal_access(handle, inode, et->et_root_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ret = ocfs2_remove_extent(inode, et, cpos, len, handle, meta_ac,
+ dealloc);
+ if (ret) {
+ mlog_errno(ret);
+ goto out_commit;
+ }
+
+ ocfs2_et_update_clusters(inode, et, -len);
+
+ ret = ocfs2_journal_dirty(handle, et->et_root_bh);
+ if (ret) {
+ mlog_errno(ret);
+ goto out_commit;
+ }
+
+ ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len);
+ if (ret)
+ mlog_errno(ret);
+
+out_commit:
+ ocfs2_commit_trans(osb, handle);
+out:
+ mutex_unlock(&tl_inode->i_mutex);
+
+ if (meta_ac)
+ ocfs2_free_alloc_context(meta_ac);
+
+ return ret;
+}
+
int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb)
{
struct buffer_head *tl_bh = osb->osb_tl_bh;
@@ -5308,13 +5420,13 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk);
di = (struct ocfs2_dinode *) tl_bh->b_data;
- tl = &di->id2.i_dealloc;
- if (!OCFS2_IS_VALID_DINODE(di)) {
- OCFS2_RO_ON_INVALID_DINODE(osb->sb, di);
- status = -EIO;
- goto bail;
- }
+ /* tl_bh is loaded from ocfs2_truncate_log_init(). It's validated
+ * by the underlying call to ocfs2_read_inode_block(), so any
+ * corruption is a code bug */
+ BUG_ON(!OCFS2_IS_VALID_DINODE(di));
+
+ tl = &di->id2.i_dealloc;
tl_count = le16_to_cpu(tl->tl_count);
mlog_bug_on_msg(tl_count > ocfs2_truncate_recs_per_inode(osb->sb) ||
tl_count == 0,
@@ -5464,13 +5576,13 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
BUG_ON(mutex_trylock(&tl_inode->i_mutex));
di = (struct ocfs2_dinode *) tl_bh->b_data;
- tl = &di->id2.i_dealloc;
- if (!OCFS2_IS_VALID_DINODE(di)) {
- OCFS2_RO_ON_INVALID_DINODE(osb->sb, di);
- status = -EIO;
- goto out;
- }
+ /* tl_bh is loaded from ocfs2_truncate_log_init(). It's validated
+ * by the underlying call to ocfs2_read_inode_block(), so any
+ * corruption is a code bug */
+ BUG_ON(!OCFS2_IS_VALID_DINODE(di));
+
+ tl = &di->id2.i_dealloc;
num_to_flush = le16_to_cpu(tl->tl_used);
mlog(0, "Flush %u records from truncate log #%llu\n",
num_to_flush, (unsigned long long)OCFS2_I(tl_inode)->ip_blkno);
@@ -5586,7 +5698,7 @@ static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb,
goto bail;
}
- status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh);
+ status = ocfs2_read_inode_block(inode, &bh);
if (status < 0) {
iput(inode);
mlog_errno(status);
@@ -5625,13 +5737,13 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
}
di = (struct ocfs2_dinode *) tl_bh->b_data;
- tl = &di->id2.i_dealloc;
- if (!OCFS2_IS_VALID_DINODE(di)) {
- OCFS2_RO_ON_INVALID_DINODE(tl_inode->i_sb, di);
- status = -EIO;
- goto bail;
- }
+ /* tl_bh is loaded from ocfs2_get_truncate_log_info(). It's
+ * validated by the underlying call to ocfs2_read_inode_block(),
+ * so any corruption is a code bug */
+ BUG_ON(!OCFS2_IS_VALID_DINODE(di));
+
+ tl = &di->id2.i_dealloc;
if (le16_to_cpu(tl->tl_used)) {
mlog(0, "We'll have %u logs to recover\n",
le16_to_cpu(tl->tl_used));
@@ -5800,7 +5912,10 @@ int ocfs2_truncate_log_init(struct ocfs2_super *osb)
*/
/*
- * Describes a single block free from a suballocator
+ * Describe a single bit freed from a suballocator. For the block
+ * suballocators, it represents one block. For the global cluster
+ * allocator, it represents some clusters and free_bit indicates
+ * clusters number.
*/
struct ocfs2_cached_block_free {
struct ocfs2_cached_block_free *free_next;
@@ -5815,10 +5930,10 @@ struct ocfs2_per_slot_free_list {
struct ocfs2_cached_block_free *f_first;
};
-static int ocfs2_free_cached_items(struct ocfs2_super *osb,
- int sysfile_type,
- int slot,
- struct ocfs2_cached_block_free *head)
+static int ocfs2_free_cached_blocks(struct ocfs2_super *osb,
+ int sysfile_type,
+ int slot,
+ struct ocfs2_cached_block_free *head)
{
int ret;
u64 bg_blkno;
@@ -5893,6 +6008,82 @@ out:
return ret;
}
+int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
+ u64 blkno, unsigned int bit)
+{
+ int ret = 0;
+ struct ocfs2_cached_block_free *item;
+
+ item = kmalloc(sizeof(*item), GFP_NOFS);
+ if (item == NULL) {
+ ret = -ENOMEM;
+ mlog_errno(ret);
+ return ret;
+ }
+
+ mlog(0, "Insert clusters: (bit %u, blk %llu)\n",
+ bit, (unsigned long long)blkno);
+
+ item->free_blk = blkno;
+ item->free_bit = bit;
+ item->free_next = ctxt->c_global_allocator;
+
+ ctxt->c_global_allocator = item;
+ return ret;
+}
+
+static int ocfs2_free_cached_clusters(struct ocfs2_super *osb,
+ struct ocfs2_cached_block_free *head)
+{
+ struct ocfs2_cached_block_free *tmp;
+ struct inode *tl_inode = osb->osb_tl_inode;
+ handle_t *handle;
+ int ret = 0;
+
+ mutex_lock(&tl_inode->i_mutex);
+
+ while (head) {
+ if (ocfs2_truncate_log_needs_flush(osb)) {
+ ret = __ocfs2_flush_truncate_log(osb);
+ if (ret < 0) {
+ mlog_errno(ret);
+ break;
+ }
+ }
+
+ handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_UPDATE);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ mlog_errno(ret);
+ break;
+ }
+
+ ret = ocfs2_truncate_log_append(osb, handle, head->free_blk,
+ head->free_bit);
+
+ ocfs2_commit_trans(osb, handle);
+ tmp = head;
+ head = head->free_next;
+ kfree(tmp);
+
+ if (ret < 0) {
+ mlog_errno(ret);
+ break;
+ }
+ }
+
+ mutex_unlock(&tl_inode->i_mutex);
+
+ while (head) {
+ /* Premature exit may have left some dangling items. */
+ tmp = head;
+ head = head->free_next;
+ kfree(tmp);
+ }
+
+ return ret;
+}
+
int ocfs2_run_deallocs(struct ocfs2_super *osb,
struct ocfs2_cached_dealloc_ctxt *ctxt)
{
@@ -5908,8 +6099,10 @@ int ocfs2_run_deallocs(struct ocfs2_super *osb,
if (fl->f_first) {
mlog(0, "Free items: (type %u, slot %d)\n",
fl->f_inode_type, fl->f_slot);
- ret2 = ocfs2_free_cached_items(osb, fl->f_inode_type,
- fl->f_slot, fl->f_first);
+ ret2 = ocfs2_free_cached_blocks(osb,
+ fl->f_inode_type,
+ fl->f_slot,
+ fl->f_first);
if (ret2)
mlog_errno(ret2);
if (!ret)
@@ -5920,6 +6113,17 @@ int ocfs2_run_deallocs(struct ocfs2_super *osb,
kfree(fl);
}
+ if (ctxt->c_global_allocator) {
+ ret2 = ocfs2_free_cached_clusters(osb,
+ ctxt->c_global_allocator);
+ if (ret2)
+ mlog_errno(ret2);
+ if (!ret)
+ ret = ret2;
+
+ ctxt->c_global_allocator = NULL;
+ }
+
return ret;
}
@@ -6075,11 +6279,10 @@ static int ocfs2_find_new_last_ext_blk(struct inode *inode,
eb = (struct ocfs2_extent_block *) bh->b_data;
el = &eb->h_list;
- if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
- OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
- ret = -EROFS;
- goto out;
- }
+
+ /* ocfs2_find_leaf() gets the eb from ocfs2_read_extent_block().
+ * Any corruption is a code bug. */
+ BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb));
*new_last_eb = bh;
get_bh(*new_last_eb);
@@ -6350,6 +6553,8 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
goto bail;
}
+ vfs_dq_free_space_nodirty(inode,
+ ocfs2_clusters_to_bytes(osb->sb, clusters_to_del));
spin_lock(&OCFS2_I(inode)->ip_lock);
OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters) -
clusters_to_del;
@@ -6436,11 +6641,6 @@ static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
mlog_errno(ret);
else if (ocfs2_should_order_data(inode)) {
ret = ocfs2_jbd2_file_inode(handle, inode);
-#ifdef CONFIG_OCFS2_COMPAT_JBD
- ret = walk_page_buffers(handle, page_buffers(page),
- from, to, &partial,
- ocfs2_journal_dirty_data);
-#endif
if (ret < 0)
mlog_errno(ret);
}
@@ -6663,6 +6863,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
struct page **pages = NULL;
loff_t end = osb->s_clustersize;
struct ocfs2_extent_tree et;
+ int did_quota = 0;
has_data = i_size_read(inode) ? 1 : 0;
@@ -6682,7 +6883,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
}
}
- handle = ocfs2_start_trans(osb, OCFS2_INLINE_TO_EXTENTS_CREDITS);
+ handle = ocfs2_start_trans(osb,
+ ocfs2_inline_to_extents_credits(osb->sb));
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
mlog_errno(ret);
@@ -6701,6 +6903,13 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
unsigned int page_end;
u64 phys;
+ if (vfs_dq_alloc_space_nodirty(inode,
+ ocfs2_clusters_to_bytes(osb->sb, 1))) {
+ ret = -EDQUOT;
+ goto out_commit;
+ }
+ did_quota = 1;
+
ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off,
&num);
if (ret) {
@@ -6774,6 +6983,10 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
}
out_commit:
+ if (ret < 0 && did_quota)
+ vfs_dq_free_space_nodirty(inode,
+ ocfs2_clusters_to_bytes(osb->sb, 1));
+
ocfs2_commit_trans(osb, handle);
out_unlock:
@@ -6984,20 +7197,14 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb,
ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc);
if (fe->id2.i_list.l_tree_depth) {
- status = ocfs2_read_block(inode, le64_to_cpu(fe->i_last_eb_blk),
- &last_eb_bh);
+ status = ocfs2_read_extent_block(inode,
+ le64_to_cpu(fe->i_last_eb_blk),
+ &last_eb_bh);
if (status < 0) {
mlog_errno(status);
goto bail;
}
eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
- if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
- OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
-
- brelse(last_eb_bh);
- status = -EIO;
- goto bail;
- }
}
(*tc)->tc_last_eb_bh = last_eb_bh;
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 70257c84cfbe..59d37d1b7d4c 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -73,6 +73,14 @@ void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
struct buffer_head *bh,
struct ocfs2_xattr_value_root *xv);
+/*
+ * Read an extent block into *bh. If *bh is NULL, a bh will be
+ * allocated. This is a cached read. The extent block will be validated
+ * with ocfs2_validate_extent_block().
+ */
+int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno,
+ struct buffer_head **bh);
+
struct ocfs2_alloc_context;
int ocfs2_insert_extent(struct ocfs2_super *osb,
handle_t *handle,
@@ -110,6 +118,11 @@ int ocfs2_remove_extent(struct inode *inode,
u32 cpos, u32 len, handle_t *handle,
struct ocfs2_alloc_context *meta_ac,
struct ocfs2_cached_dealloc_ctxt *dealloc);
+int ocfs2_remove_btree_range(struct inode *inode,
+ struct ocfs2_extent_tree *et,
+ u32 cpos, u32 phys_cpos, u32 len,
+ struct ocfs2_cached_dealloc_ctxt *dealloc);
+
int ocfs2_num_free_extents(struct ocfs2_super *osb,
struct inode *inode,
struct ocfs2_extent_tree *et);
@@ -167,10 +180,18 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb);
*/
struct ocfs2_cached_dealloc_ctxt {
struct ocfs2_per_slot_free_list *c_first_suballocator;
+ struct ocfs2_cached_block_free *c_global_allocator;
};
static inline void ocfs2_init_dealloc_ctxt(struct ocfs2_cached_dealloc_ctxt *c)
{
c->c_first_suballocator = NULL;
+ c->c_global_allocator = NULL;
+}
+int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
+ u64 blkno, unsigned int bit);
+static inline int ocfs2_dealloc_has_cluster(struct ocfs2_cached_dealloc_ctxt *c)
+{
+ return c->c_global_allocator != NULL;
}
int ocfs2_run_deallocs(struct ocfs2_super *osb,
struct ocfs2_cached_dealloc_ctxt *ctxt);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index c22543b33420..6b647ec87bb3 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -27,6 +27,7 @@
#include <linux/swap.h>
#include <linux/pipe_fs_i.h>
#include <linux/mpage.h>
+#include <linux/quotaops.h>
#define MLOG_MASK_PREFIX ML_FILE_IO
#include <cluster/masklog.h>
@@ -68,20 +69,13 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
goto bail;
}
- status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh);
+ status = ocfs2_read_inode_block(inode, &bh);
if (status < 0) {
mlog_errno(status);
goto bail;
}
fe = (struct ocfs2_dinode *) bh->b_data;
- if (!OCFS2_IS_VALID_DINODE(fe)) {
- mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n",
- (unsigned long long)le64_to_cpu(fe->i_blkno), 7,
- fe->i_signature);
- goto bail;
- }
-
if ((u64)iblock >= ocfs2_clusters_to_blocks(inode->i_sb,
le32_to_cpu(fe->i_clusters))) {
mlog(ML_ERROR, "block offset is outside the allocated size: "
@@ -262,7 +256,7 @@ static int ocfs2_readpage_inline(struct inode *inode, struct page *page)
BUG_ON(!PageLocked(page));
BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL));
- ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh);
+ ret = ocfs2_read_inode_block(inode, &di_bh);
if (ret) {
mlog_errno(ret);
goto out;
@@ -481,12 +475,6 @@ handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
if (ocfs2_should_order_data(inode)) {
ret = ocfs2_jbd2_file_inode(handle, inode);
-#ifdef CONFIG_OCFS2_COMPAT_JBD
- ret = walk_page_buffers(handle,
- page_buffers(page),
- from, to, NULL,
- ocfs2_journal_dirty_data);
-#endif
if (ret < 0)
mlog_errno(ret);
}
@@ -1072,15 +1060,8 @@ static void ocfs2_write_failure(struct inode *inode,
tmppage = wc->w_pages[i];
if (page_has_buffers(tmppage)) {
- if (ocfs2_should_order_data(inode)) {
+ if (ocfs2_should_order_data(inode))
ocfs2_jbd2_file_inode(wc->w_handle, inode);
-#ifdef CONFIG_OCFS2_COMPAT_JBD
- walk_page_buffers(wc->w_handle,
- page_buffers(tmppage),
- from, to, NULL,
- ocfs2_journal_dirty_data);
-#endif
- }
block_commit_write(tmppage, from, to);
}
@@ -1750,6 +1731,11 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
wc->w_handle = handle;
+ if (clusters_to_alloc && vfs_dq_alloc_space_nodirty(inode,
+ ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc))) {
+ ret = -EDQUOT;
+ goto out_commit;
+ }
/*
* We don't want this to fail in ocfs2_write_end(), so do it
* here.
@@ -1758,7 +1744,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
- goto out_commit;
+ goto out_quota;
}
/*
@@ -1771,14 +1757,14 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
mmap_page);
if (ret) {
mlog_errno(ret);
- goto out_commit;
+ goto out_quota;
}
ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos,
len);
if (ret) {
mlog_errno(ret);
- goto out_commit;
+ goto out_quota;
}
if (data_ac)
@@ -1790,6 +1776,10 @@ success:
*pagep = wc->w_target_page;
*fsdata = wc;
return 0;
+out_quota:
+ if (clusters_to_alloc)
+ vfs_dq_free_space(inode,
+ ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc));
out_commit:
ocfs2_commit_trans(osb, handle);
@@ -1919,15 +1909,8 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
}
if (page_has_buffers(tmppage)) {
- if (ocfs2_should_order_data(inode)) {
+ if (ocfs2_should_order_data(inode))
ocfs2_jbd2_file_inode(wc->w_handle, inode);
-#ifdef CONFIG_OCFS2_COMPAT_JBD
- walk_page_buffers(wc->w_handle,
- page_buffers(tmppage),
- from, to, NULL,
- ocfs2_journal_dirty_data);
-#endif
- }
block_commit_write(tmppage, from, to);
}
}
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 7e947c672469..15c8e6deee2e 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -39,6 +39,18 @@
#include "buffer_head_io.h"
+/*
+ * Bits on bh->b_state used by ocfs2.
+ *
+ * These MUST be after the JBD2 bits. Hence, we use BH_JBDPrivateStart.
+ */
+enum ocfs2_state_bits {
+ BH_NeedsValidate = BH_JBDPrivateStart,
+};
+
+/* Expand the magic b_state functions */
+BUFFER_FNS(NeedsValidate, needs_validate);
+
int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
struct inode *inode)
{
@@ -112,7 +124,7 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
bh = bhs[i];
if (buffer_jbd(bh)) {
- mlog(ML_ERROR,
+ mlog(ML_BH_IO,
"trying to sync read a jbd "
"managed bh (blocknr = %llu), skipping\n",
(unsigned long long)bh->b_blocknr);
@@ -147,15 +159,10 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
for (i = nr; i > 0; i--) {
bh = bhs[i - 1];
- if (buffer_jbd(bh)) {
- mlog(ML_ERROR,
- "the journal got the buffer while it was "
- "locked for io! (blocknr = %llu)\n",
- (unsigned long long)bh->b_blocknr);
- BUG();
- }
+ /* No need to wait on the buffer if it's managed by JBD. */
+ if (!buffer_jbd(bh))
+ wait_on_buffer(bh);
- wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
/* Status won't be cleared from here on out,
* so we can safely record this and loop back
@@ -171,7 +178,9 @@ bail:
}
int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
- struct buffer_head *bhs[], int flags)
+ struct buffer_head *bhs[], int flags,
+ int (*validate)(struct super_block *sb,
+ struct buffer_head *bh))
{
int status = 0;
int i, ignore_cache = 0;
@@ -251,8 +260,6 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
ignore_cache = 1;
}
- /* XXX: Can we ever get this and *not* have the cached
- * flag set? */
if (buffer_jbd(bh)) {
if (ignore_cache)
mlog(ML_BH_IO, "trying to sync read a jbd "
@@ -305,6 +312,8 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
clear_buffer_uptodate(bh);
get_bh(bh); /* for end_buffer_read_sync() */
+ if (validate)
+ set_buffer_needs_validate(bh);
bh->b_end_io = end_buffer_read_sync;
submit_bh(READ, bh);
continue;
@@ -335,6 +344,20 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
bhs[i] = NULL;
continue;
}
+
+ if (buffer_needs_validate(bh)) {
+ /* We never set NeedsValidate if the
+ * buffer was held by the journal, so
+ * that better not have changed */
+ BUG_ON(buffer_jbd(bh));
+ clear_buffer_needs_validate(bh);
+ status = validate(inode->i_sb, bh);
+ if (status) {
+ put_bh(bh);
+ bhs[i] = NULL;
+ continue;
+ }
+ }
}
/* Always set the buffer in the cache, even if it was
diff --git a/fs/ocfs2/buffer_head_io.h b/fs/ocfs2/buffer_head_io.h
index 75e1dcb1ade7..c75d682dadd8 100644
--- a/fs/ocfs2/buffer_head_io.h
+++ b/fs/ocfs2/buffer_head_io.h
@@ -31,21 +31,24 @@
void ocfs2_end_buffer_io_sync(struct buffer_head *bh,
int uptodate);
-static inline int ocfs2_read_block(struct inode *inode,
- u64 off,
- struct buffer_head **bh);
-
int ocfs2_write_block(struct ocfs2_super *osb,
struct buffer_head *bh,
struct inode *inode);
-int ocfs2_read_blocks(struct inode *inode,
- u64 block,
- int nr,
- struct buffer_head *bhs[],
- int flags);
int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
unsigned int nr, struct buffer_head *bhs[]);
+/*
+ * If not NULL, validate() will be called on a buffer that is freshly
+ * read from disk. It will not be called if the buffer was in cache.
+ * Note that if validate() is being used for this buffer, it needs to
+ * be set even for a READAHEAD call, as it marks the buffer for later
+ * validation.
+ */
+int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
+ struct buffer_head *bhs[], int flags,
+ int (*validate)(struct super_block *sb,
+ struct buffer_head *bh));
+
int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
struct buffer_head *bh);
@@ -53,7 +56,9 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
#define OCFS2_BH_READAHEAD 8
static inline int ocfs2_read_block(struct inode *inode, u64 off,
- struct buffer_head **bh)
+ struct buffer_head **bh,
+ int (*validate)(struct super_block *sb,
+ struct buffer_head *bh))
{
int status = 0;
@@ -63,7 +68,7 @@ static inline int ocfs2_read_block(struct inode *inode, u64 off,
goto bail;
}
- status = ocfs2_read_blocks(inode, off, 1, bh, 0);
+ status = ocfs2_read_blocks(inode, off, 1, bh, 0, validate);
bail:
return status;
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 57670c680471..7e72a81bc2d4 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -113,6 +113,7 @@
#define ML_QUORUM 0x0000000008000000ULL /* net connection quorum */
#define ML_EXPORT 0x0000000010000000ULL /* ocfs2 export operations */
#define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */
+#define ML_QUOTA 0x0000000040000000ULL /* ocfs2 quota operations */
/* bits that are infrequently given and frequently matched in the high word */
#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */
#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index 52276c02f710..f8424874fa07 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -304,8 +304,8 @@ static int sc_seq_show(struct seq_file *seq, void *v)
* use of it here generates a warning with -Wbitwise */
seq_printf(seq, "%p:\n"
" krefs: %d\n"
- " sock: %u.%u.%u.%u:%u -> "
- "%u.%u.%u.%u:%u\n"
+ " sock: %pI4:%u -> "
+ "%pI4:%u\n"
" remote node: %s\n"
" page off: %zu\n"
" handshake ok: %u\n"
@@ -319,8 +319,8 @@ static int sc_seq_show(struct seq_file *seq, void *v)
" func type: %u\n",
sc,
atomic_read(&sc->sc_kref.refcount),
- NIPQUAD(saddr), inet ? ntohs(sport) : 0,
- NIPQUAD(daddr), inet ? ntohs(dport) : 0,
+ &saddr, inet ? ntohs(sport) : 0,
+ &daddr, inet ? ntohs(dport) : 0,
sc->sc_node->nd_name,
sc->sc_page_off,
sc->sc_handshake_ok,
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index 816a3f61330c..70e8fa9e2539 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -250,7 +250,7 @@ static ssize_t o2nm_node_ipv4_port_write(struct o2nm_node *node,
static ssize_t o2nm_node_ipv4_address_read(struct o2nm_node *node, char *page)
{
- return sprintf(page, "%u.%u.%u.%u\n", NIPQUAD(node->nd_ipv4_address));
+ return sprintf(page, "%pI4\n", &node->nd_ipv4_address);
}
static ssize_t o2nm_node_ipv4_address_write(struct o2nm_node *node,
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 2bcf706d9dd3..9fbe849f6344 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1597,8 +1597,8 @@ static void o2net_start_connect(struct work_struct *work)
ret = sock->ops->bind(sock, (struct sockaddr *)&myaddr,
sizeof(myaddr));
if (ret) {
- mlog(ML_ERROR, "bind failed with %d at address %u.%u.%u.%u\n",
- ret, NIPQUAD(mynode->nd_ipv4_address));
+ mlog(ML_ERROR, "bind failed with %d at address %pI4\n",
+ ret, &mynode->nd_ipv4_address);
goto out;
}
@@ -1790,17 +1790,16 @@ static int o2net_accept_one(struct socket *sock)
node = o2nm_get_node_by_ip(sin.sin_addr.s_addr);
if (node == NULL) {
- mlog(ML_NOTICE, "attempt to connect from unknown node at "
- "%u.%u.%u.%u:%d\n", NIPQUAD(sin.sin_addr.s_addr),
- ntohs(sin.sin_port));
+ mlog(ML_NOTICE, "attempt to connect from unknown node at %pI4:%d\n",
+ &sin.sin_addr.s_addr, ntohs(sin.sin_port));
ret = -EINVAL;
goto out;
}
if (o2nm_this_node() > node->nd_num) {
mlog(ML_NOTICE, "unexpected connect attempted from a lower "
- "numbered node '%s' at " "%u.%u.%u.%u:%d with num %u\n",
- node->nd_name, NIPQUAD(sin.sin_addr.s_addr),
+ "numbered node '%s' at " "%pI4:%d with num %u\n",
+ node->nd_name, &sin.sin_addr.s_addr,
ntohs(sin.sin_port), node->nd_num);
ret = -EINVAL;
goto out;
@@ -1810,8 +1809,8 @@ static int o2net_accept_one(struct socket *sock)
* and tries to connect before we see their heartbeat */
if (!o2hb_check_node_heartbeating_from_callback(node->nd_num)) {
mlog(ML_CONN, "attempt to connect from node '%s' at "
- "%u.%u.%u.%u:%d but it isn't heartbeating\n",
- node->nd_name, NIPQUAD(sin.sin_addr.s_addr),
+ "%pI4:%d but it isn't heartbeating\n",
+ node->nd_name, &sin.sin_addr.s_addr,
ntohs(sin.sin_port));
ret = -EINVAL;
goto out;
@@ -1827,8 +1826,8 @@ static int o2net_accept_one(struct socket *sock)
spin_unlock(&nn->nn_lock);
if (ret) {
mlog(ML_NOTICE, "attempt to connect from node '%s' at "
- "%u.%u.%u.%u:%d but it already has an open connection\n",
- node->nd_name, NIPQUAD(sin.sin_addr.s_addr),
+ "%pI4:%d but it already has an open connection\n",
+ node->nd_name, &sin.sin_addr.s_addr,
ntohs(sin.sin_port));
goto out;
}
@@ -1924,15 +1923,15 @@ static int o2net_open_listening_sock(__be32 addr, __be16 port)
sock->sk->sk_reuse = 1;
ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin));
if (ret < 0) {
- mlog(ML_ERROR, "unable to bind socket at %u.%u.%u.%u:%u, "
- "ret=%d\n", NIPQUAD(addr), ntohs(port), ret);
+ mlog(ML_ERROR, "unable to bind socket at %pI4:%u, "
+ "ret=%d\n", &addr, ntohs(port), ret);
goto out;
}
ret = sock->ops->listen(sock, 64);
if (ret < 0) {
- mlog(ML_ERROR, "unable to listen on %u.%u.%u.%u:%u, ret=%d\n",
- NIPQUAD(addr), ntohs(port), ret);
+ mlog(ML_ERROR, "unable to listen on %pI4:%u, ret=%d\n",
+ &addr, ntohs(port), ret);
}
out:
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 026e6eb85187..3708fe482e3e 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -40,6 +40,7 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/highmem.h>
+#include <linux/quotaops.h>
#define MLOG_MASK_PREFIX ML_NAMEI
#include <cluster/masklog.h>
@@ -82,49 +83,6 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
struct ocfs2_alloc_context *meta_ac,
struct buffer_head **new_bh);
-static struct buffer_head *ocfs2_bread(struct inode *inode,
- int block, int *err, int reada)
-{
- struct buffer_head *bh = NULL;
- int tmperr;
- u64 p_blkno;
- int readflags = 0;
-
- if (reada)
- readflags |= OCFS2_BH_READAHEAD;
-
- if (((u64)block << inode->i_sb->s_blocksize_bits) >=
- i_size_read(inode)) {
- BUG_ON(!reada);
- return NULL;
- }
-
- down_read(&OCFS2_I(inode)->ip_alloc_sem);
- tmperr = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL,
- NULL);
- up_read(&OCFS2_I(inode)->ip_alloc_sem);
- if (tmperr < 0) {
- mlog_errno(tmperr);
- goto fail;
- }
-
- tmperr = ocfs2_read_blocks(inode, p_blkno, 1, &bh, readflags);
- if (tmperr < 0)
- goto fail;
-
- tmperr = 0;
-
- *err = 0;
- return bh;
-
-fail:
- brelse(bh);
- bh = NULL;
-
- *err = -EIO;
- return NULL;
-}
-
/*
* bh passed here can be an inode block or a dir data block, depending
* on the inode inline data flag.
@@ -231,7 +189,7 @@ static struct buffer_head *ocfs2_find_entry_id(const char *name,
struct ocfs2_dinode *di;
struct ocfs2_inline_data *data;
- ret = ocfs2_read_block(dir, OCFS2_I(dir)->ip_blkno, &di_bh);
+ ret = ocfs2_read_inode_block(dir, &di_bh);
if (ret) {
mlog_errno(ret);
goto out;
@@ -250,6 +208,43 @@ out:
return NULL;
}
+static int ocfs2_validate_dir_block(struct super_block *sb,
+ struct buffer_head *bh)
+{
+ /*
+ * Nothing yet. We don't validate dirents here, that's handled
+ * in-place when the code walks them.
+ */
+ mlog(0, "Validating dirblock %llu\n",
+ (unsigned long long)bh->b_blocknr);
+
+ return 0;
+}
+
+/*
+ * This function forces all errors to -EIO for consistency with its
+ * predecessor, ocfs2_bread(). We haven't audited what returning the
+ * real error codes would do to callers. We log the real codes with
+ * mlog_errno() before we squash them.
+ */
+static int ocfs2_read_dir_block(struct inode *inode, u64 v_block,
+ struct buffer_head **bh, int flags)
+{
+ int rc = 0;
+ struct buffer_head *tmp = *bh;
+
+ rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, flags,
+ ocfs2_validate_dir_block);
+ if (rc)
+ mlog_errno(rc);
+
+ /* If ocfs2_read_virt_blocks() got us a new bh, pass it up. */
+ if (!rc && !*bh)
+ *bh = tmp;
+
+ return rc ? -EIO : 0;
+}
+
static struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen,
struct inode *dir,
struct ocfs2_dir_entry **res_dir)
@@ -296,15 +291,17 @@ restart:
}
num++;
- bh = ocfs2_bread(dir, b++, &err, 1);
+ bh = NULL;
+ err = ocfs2_read_dir_block(dir, b++, &bh,
+ OCFS2_BH_READAHEAD);
bh_use[ra_max] = bh;
}
}
if ((bh = bh_use[ra_ptr++]) == NULL)
goto next;
- if (ocfs2_read_block(dir, block, &bh)) {
+ if (ocfs2_read_dir_block(dir, block, &bh, 0)) {
/* read error, skip block & hope for the best.
- * ocfs2_read_block() has released the bh. */
+ * ocfs2_read_dir_block() has released the bh. */
ocfs2_error(dir->i_sb, "reading directory %llu, "
"offset %lu\n",
(unsigned long long)OCFS2_I(dir)->ip_blkno,
@@ -458,7 +455,7 @@ static inline int ocfs2_delete_entry_id(handle_t *handle,
struct ocfs2_dinode *di;
struct ocfs2_inline_data *data;
- ret = ocfs2_read_block(dir, OCFS2_I(dir)->ip_blkno, &di_bh);
+ ret = ocfs2_read_inode_block(dir, &di_bh);
if (ret) {
mlog_errno(ret);
goto out;
@@ -636,7 +633,7 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode,
struct ocfs2_inline_data *data;
struct ocfs2_dir_entry *de;
- ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh);
+ ret = ocfs2_read_inode_block(inode, &di_bh);
if (ret) {
mlog(ML_ERROR, "Unable to read inode block for dir %llu\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno);
@@ -724,7 +721,6 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
int i, stored;
struct buffer_head * bh, * tmp;
struct ocfs2_dir_entry * de;
- int err;
struct super_block * sb = inode->i_sb;
unsigned int ra_sectors = 16;
@@ -735,12 +731,8 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
while (!error && !stored && *f_pos < i_size_read(inode)) {
blk = (*f_pos) >> sb->s_blocksize_bits;
- bh = ocfs2_bread(inode, blk, &err, 0);
- if (!bh) {
- mlog(ML_ERROR,
- "directory #%llu contains a hole at offset %lld\n",
- (unsigned long long)OCFS2_I(inode)->ip_blkno,
- *f_pos);
+ if (ocfs2_read_dir_block(inode, blk, &bh, 0)) {
+ /* Skip the corrupt dirblock and keep trying */
*f_pos += sb->s_blocksize - offset;
continue;
}
@@ -754,8 +746,10 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
|| (((last_ra_blk - blk) << 9) <= (ra_sectors / 2))) {
for (i = ra_sectors >> (sb->s_blocksize_bits - 9);
i > 0; i--) {
- tmp = ocfs2_bread(inode, ++blk, &err, 1);
- brelse(tmp);
+ tmp = NULL;
+ if (!ocfs2_read_dir_block(inode, ++blk, &tmp,
+ OCFS2_BH_READAHEAD))
+ brelse(tmp);
}
last_ra_blk = blk;
ra_sectors = 8;
@@ -828,6 +822,7 @@ revalidate:
}
offset = 0;
brelse(bh);
+ bh = NULL;
}
stored = 0;
@@ -1216,9 +1211,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
unsigned int blocks_wanted,
struct buffer_head **first_block_bh)
{
- int ret, credits = OCFS2_INLINE_TO_EXTENTS_CREDITS;
u32 alloc, bit_off, len;
struct super_block *sb = dir->i_sb;
+ int ret, credits = ocfs2_inline_to_extents_credits(sb);
u64 blkno, bytes = blocks_wanted << sb->s_blocksize_bits;
struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
struct ocfs2_inode_info *oi = OCFS2_I(dir);
@@ -1227,6 +1222,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
handle_t *handle;
struct ocfs2_extent_tree et;
+ int did_quota = 0;
ocfs2_init_dinode_extent_tree(&et, dir, di_bh);
@@ -1264,6 +1260,12 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
goto out_sem;
}
+ if (vfs_dq_alloc_space_nodirty(dir,
+ ocfs2_clusters_to_bytes(osb->sb, alloc))) {
+ ret = -EDQUOT;
+ goto out_commit;
+ }
+ did_quota = 1;
/*
* Try to claim as many clusters as the bitmap can give though
* if we only get one now, that's enough to continue. The rest
@@ -1386,6 +1388,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
dirdata_bh = NULL;
out_commit:
+ if (ret < 0 && did_quota)
+ vfs_dq_free_space_nodirty(dir,
+ ocfs2_clusters_to_bytes(osb->sb, 2));
ocfs2_commit_trans(osb, handle);
out_sem:
@@ -1410,7 +1415,7 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
struct buffer_head **new_bh)
{
int status;
- int extend;
+ int extend, did_quota = 0;
u64 p_blkno, v_blkno;
spin_lock(&OCFS2_I(dir)->ip_lock);
@@ -1420,6 +1425,13 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
if (extend) {
u32 offset = OCFS2_I(dir)->ip_clusters;
+ if (vfs_dq_alloc_space_nodirty(dir,
+ ocfs2_clusters_to_bytes(sb, 1))) {
+ status = -EDQUOT;
+ goto bail;
+ }
+ did_quota = 1;
+
status = ocfs2_add_inode_data(OCFS2_SB(sb), dir, &offset,
1, 0, parent_fe_bh, handle,
data_ac, meta_ac, NULL);
@@ -1445,6 +1457,8 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
}
status = 0;
bail:
+ if (did_quota && status < 0)
+ vfs_dq_free_space_nodirty(dir, ocfs2_clusters_to_bytes(sb, 1));
mlog_exit(status);
return status;
}
@@ -1680,8 +1694,8 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
struct super_block *sb = dir->i_sb;
int status;
- bh = ocfs2_bread(dir, 0, &status, 0);
- if (!bh) {
+ status = ocfs2_read_dir_block(dir, 0, &bh, 0);
+ if (status) {
mlog_errno(status);
goto bail;
}
@@ -1702,11 +1716,10 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
status = -ENOSPC;
goto bail;
}
- bh = ocfs2_bread(dir,
- offset >> sb->s_blocksize_bits,
- &status,
- 0);
- if (!bh) {
+ status = ocfs2_read_dir_block(dir,
+ offset >> sb->s_blocksize_bits,
+ &bh, 0);
+ if (status) {
mlog_errno(status);
goto bail;
}
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index 533a789c3ef8..6f7a77d54020 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -339,8 +339,8 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb)
ip = DLMFS_I(inode);
inode->i_mode = mode;
- inode->i_uid = current->fsuid;
- inode->i_gid = current->fsgid;
+ inode->i_uid = current_fsuid();
+ inode->i_gid = current_fsgid();
inode->i_blocks = 0;
inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
@@ -365,8 +365,8 @@ static struct inode *dlmfs_get_inode(struct inode *parent,
return NULL;
inode->i_mode = mode;
- inode->i_uid = current->fsuid;
- inode->i_gid = current->fsgid;
+ inode->i_uid = current_fsuid();
+ inode->i_gid = current_fsgid();
inode->i_blocks = 0;
inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
@@ -608,8 +608,10 @@ static int __init init_dlmfs_fs(void)
0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
SLAB_MEM_SPREAD),
dlmfs_init_once);
- if (!dlmfs_inode_cache)
+ if (!dlmfs_inode_cache) {
+ status = -ENOMEM;
goto bail;
+ }
cleanup_inode = 1;
user_dlm_worker = create_singlethread_workqueue("user_dlm");
diff --git a/fs/ocfs2/dlm/userdlm.h b/fs/ocfs2/dlm/userdlm.h
index 39ec27738499..0c3cc03c61fa 100644
--- a/fs/ocfs2/dlm/userdlm.h
+++ b/fs/ocfs2/dlm/userdlm.h
@@ -33,7 +33,7 @@
#include <linux/workqueue.h>
/* user_lock_res->l_flags flags. */
-#define USER_LOCK_ATTACHED (0x00000001) /* have we initialized
+#define USER_LOCK_ATTACHED (0x00000001) /* we have initialized
* the lvb */
#define USER_LOCK_BUSY (0x00000002) /* we are currently in
* dlm_lock */
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index ec684426034b..b1c75911d8ad 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -32,6 +32,7 @@
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <linux/time.h>
+#include <linux/quotaops.h>
#define MLOG_MASK_PREFIX ML_DLM_GLUE
#include <cluster/masklog.h>
@@ -51,6 +52,7 @@
#include "slot_map.h"
#include "super.h"
#include "uptodate.h"
+#include "quota.h"
#include "buffer_head_io.h"
@@ -68,6 +70,7 @@ struct ocfs2_mask_waiter {
static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres);
static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres);
static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres);
+static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres);
/*
* Return value from ->downconvert_worker functions.
@@ -102,6 +105,7 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
struct ocfs2_lock_res *lockres);
+static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres);
#define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres)
@@ -258,6 +262,12 @@ static struct ocfs2_lock_res_ops ocfs2_flock_lops = {
.flags = 0,
};
+static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = {
+ .set_lvb = ocfs2_set_qinfo_lvb,
+ .get_osb = ocfs2_get_qinfo_osb,
+ .flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB,
+};
+
static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
{
return lockres->l_type == OCFS2_LOCK_TYPE_META ||
@@ -279,6 +289,13 @@ static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res
return (struct ocfs2_dentry_lock *)lockres->l_priv;
}
+static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_res *lockres)
+{
+ BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_QINFO);
+
+ return (struct ocfs2_mem_dqinfo *)lockres->l_priv;
+}
+
static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres)
{
if (lockres->l_ops->get_osb)
@@ -507,6 +524,13 @@ static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres)
return OCFS2_SB(inode->i_sb);
}
+static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres)
+{
+ struct ocfs2_mem_dqinfo *info = lockres->l_priv;
+
+ return OCFS2_SB(info->dqi_gi.dqi_sb);
+}
+
static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres)
{
struct ocfs2_file_private *fp = lockres->l_priv;
@@ -609,6 +633,17 @@ void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
lockres->l_flags |= OCFS2_LOCK_NOCACHE;
}
+void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres,
+ struct ocfs2_mem_dqinfo *info)
+{
+ ocfs2_lock_res_init_once(lockres);
+ ocfs2_build_lock_name(OCFS2_LOCK_TYPE_QINFO, info->dqi_gi.dqi_type,
+ 0, lockres->l_name);
+ ocfs2_lock_res_init_common(OCFS2_SB(info->dqi_gi.dqi_sb), lockres,
+ OCFS2_LOCK_TYPE_QINFO, &ocfs2_qinfo_lops,
+ info);
+}
+
void ocfs2_lock_res_free(struct ocfs2_lock_res *res)
{
mlog_entry_void();
@@ -2024,7 +2059,7 @@ static int ocfs2_inode_lock_update(struct inode *inode,
} else {
/* Boo, we have to go to disk. */
/* read bh, cast, ocfs2_refresh_inode */
- status = ocfs2_read_block(inode, oi->ip_blkno, bh);
+ status = ocfs2_read_inode_block(inode, bh);
if (status < 0) {
mlog_errno(status);
goto bail_refresh;
@@ -2032,18 +2067,14 @@ static int ocfs2_inode_lock_update(struct inode *inode,
fe = (struct ocfs2_dinode *) (*bh)->b_data;
/* This is a good chance to make sure we're not
- * locking an invalid object.
+ * locking an invalid object. ocfs2_read_inode_block()
+ * already checked that the inode block is sane.
*
* We bug on a stale inode here because we checked
* above whether it was wiped from disk. The wiping
* node provides a guarantee that we receive that
* message and can mark the inode before dropping any
* locks associated with it. */
- if (!OCFS2_IS_VALID_DINODE(fe)) {
- OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
- status = -EIO;
- goto bail_refresh;
- }
mlog_bug_on_msg(inode->i_generation !=
le32_to_cpu(fe->i_generation),
"Invalid dinode %llu disk generation: %u "
@@ -2085,7 +2116,7 @@ static int ocfs2_assign_bh(struct inode *inode,
return 0;
}
- status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, ret_bh);
+ status = ocfs2_read_inode_block(inode, ret_bh);
if (status < 0)
mlog_errno(status);
@@ -2841,9 +2872,8 @@ static void ocfs2_unlock_ast(void *opaque, int error)
lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
- spin_unlock_irqrestore(&lockres->l_lock, flags);
-
wake_up(&lockres->l_event);
+ spin_unlock_irqrestore(&lockres->l_lock, flags);
mlog_exit_void();
}
@@ -3450,6 +3480,117 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
return UNBLOCK_CONTINUE_POST;
}
+static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres)
+{
+ struct ocfs2_qinfo_lvb *lvb;
+ struct ocfs2_mem_dqinfo *oinfo = ocfs2_lock_res_qinfo(lockres);
+ struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
+ oinfo->dqi_gi.dqi_type);
+
+ mlog_entry_void();
+
+ lvb = (struct ocfs2_qinfo_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb);
+ lvb->lvb_version = OCFS2_QINFO_LVB_VERSION;
+ lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace);
+ lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace);
+ lvb->lvb_syncms = cpu_to_be32(oinfo->dqi_syncms);
+ lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks);
+ lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk);
+ lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry);
+
+ mlog_exit_void();
+}
+
+void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex)
+{
+ struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
+ struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
+ int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
+
+ mlog_entry_void();
+ if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb))
+ ocfs2_cluster_unlock(osb, lockres, level);
+ mlog_exit_void();
+}
+
+static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo)
+{
+ struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
+ oinfo->dqi_gi.dqi_type);
+ struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
+ struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
+ struct buffer_head *bh = NULL;
+ struct ocfs2_global_disk_dqinfo *gdinfo;
+ int status = 0;
+
+ if (lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) {
+ info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace);
+ info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace);
+ oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms);
+ oinfo->dqi_gi.dqi_blocks = be32_to_cpu(lvb->lvb_blocks);
+ oinfo->dqi_gi.dqi_free_blk = be32_to_cpu(lvb->lvb_free_blk);
+ oinfo->dqi_gi.dqi_free_entry =
+ be32_to_cpu(lvb->lvb_free_entry);
+ } else {
+ status = ocfs2_read_quota_block(oinfo->dqi_gqinode, 0, &bh);
+ if (status) {
+ mlog_errno(status);
+ goto bail;
+ }
+ gdinfo = (struct ocfs2_global_disk_dqinfo *)
+ (bh->b_data + OCFS2_GLOBAL_INFO_OFF);
+ info->dqi_bgrace = le32_to_cpu(gdinfo->dqi_bgrace);
+ info->dqi_igrace = le32_to_cpu(gdinfo->dqi_igrace);
+ oinfo->dqi_syncms = le32_to_cpu(gdinfo->dqi_syncms);
+ oinfo->dqi_gi.dqi_blocks = le32_to_cpu(gdinfo->dqi_blocks);
+ oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(gdinfo->dqi_free_blk);
+ oinfo->dqi_gi.dqi_free_entry =
+ le32_to_cpu(gdinfo->dqi_free_entry);
+ brelse(bh);
+ ocfs2_track_lock_refresh(lockres);
+ }
+
+bail:
+ return status;
+}
+
+/* Lock quota info, this function expects at least shared lock on the quota file
+ * so that we can safely refresh quota info from disk. */
+int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex)
+{
+ struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
+ struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
+ int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
+ int status = 0;
+
+ mlog_entry_void();
+
+ /* On RO devices, locking really isn't needed... */
+ if (ocfs2_is_hard_readonly(osb)) {
+ if (ex)
+ status = -EROFS;
+ goto bail;
+ }
+ if (ocfs2_mount_local(osb))
+ goto bail;
+
+ status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
+ if (!ocfs2_should_refresh_lock_res(lockres))
+ goto bail;
+ /* OK, we have the lock but we need to refresh the quota info */
+ status = ocfs2_refresh_qinfo(oinfo);
+ if (status)
+ ocfs2_qinfo_unlock(oinfo, ex);
+ ocfs2_complete_lock_res_refresh(lockres, status);
+bail:
+ mlog_exit(status);
+ return status;
+}
+
/*
* This is the filesystem locking protocol. It provides the lock handling
* hooks for the underlying DLM. It has a maximum version number.
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 2bb01f09c1b1..3f8d9986b8e0 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -49,6 +49,19 @@ struct ocfs2_meta_lvb {
__be32 lvb_reserved2;
};
+#define OCFS2_QINFO_LVB_VERSION 1
+
+struct ocfs2_qinfo_lvb {
+ __u8 lvb_version;
+ __u8 lvb_reserved[3];
+ __be32 lvb_bgrace;
+ __be32 lvb_igrace;
+ __be32 lvb_syncms;
+ __be32 lvb_blocks;
+ __be32 lvb_free_blk;
+ __be32 lvb_free_entry;
+};
+
/* ocfs2_inode_lock_full() 'arg_flags' flags */
/* don't wait on recovery. */
#define OCFS2_META_LOCK_RECOVERY (0x01)
@@ -69,6 +82,9 @@ void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
struct ocfs2_file_private;
void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
struct ocfs2_file_private *fp);
+struct ocfs2_mem_dqinfo;
+void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres,
+ struct ocfs2_mem_dqinfo *info);
void ocfs2_lock_res_free(struct ocfs2_lock_res *res);
int ocfs2_create_new_inode_locks(struct inode *inode);
int ocfs2_drop_inode_locks(struct inode *inode);
@@ -103,6 +119,9 @@ int ocfs2_dentry_lock(struct dentry *dentry, int ex);
void ocfs2_dentry_unlock(struct dentry *dentry, int ex);
int ocfs2_file_lock(struct file *file, int ex, int trylock);
void ocfs2_file_unlock(struct file *file);
+int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex);
+void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex);
+
void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres);
void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 2baedac58234..f2bb1a04d253 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -293,7 +293,7 @@ static int ocfs2_last_eb_is_empty(struct inode *inode,
struct ocfs2_extent_block *eb;
struct ocfs2_extent_list *el;
- ret = ocfs2_read_block(inode, last_eb_blk, &eb_bh);
+ ret = ocfs2_read_extent_block(inode, last_eb_blk, &eb_bh);
if (ret) {
mlog_errno(ret);
goto out;
@@ -302,12 +302,6 @@ static int ocfs2_last_eb_is_empty(struct inode *inode,
eb = (struct ocfs2_extent_block *) eb_bh->b_data;
el = &eb->h_list;
- if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
- ret = -EROFS;
- OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
- goto out;
- }
-
if (el->l_tree_depth) {
ocfs2_error(inode->i_sb,
"Inode %lu has non zero tree depth in "
@@ -381,23 +375,16 @@ static int ocfs2_figure_hole_clusters(struct inode *inode,
if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL)
goto no_more_extents;
- ret = ocfs2_read_block(inode,
- le64_to_cpu(eb->h_next_leaf_blk),
- &next_eb_bh);
+ ret = ocfs2_read_extent_block(inode,
+ le64_to_cpu(eb->h_next_leaf_blk),
+ &next_eb_bh);
if (ret) {
mlog_errno(ret);
goto out;
}
- next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data;
-
- if (!OCFS2_IS_VALID_EXTENT_BLOCK(next_eb)) {
- ret = -EROFS;
- OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, next_eb);
- goto out;
- }
+ next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data;
el = &next_eb->h_list;
-
i = ocfs2_search_for_hole_index(el, v_cluster);
}
@@ -630,7 +617,7 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
if (ret == 0)
goto out;
- ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh);
+ ret = ocfs2_read_inode_block(inode, &di_bh);
if (ret) {
mlog_errno(ret);
goto out;
@@ -819,3 +806,74 @@ out:
return ret;
}
+
+int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
+ struct buffer_head *bhs[], int flags,
+ int (*validate)(struct super_block *sb,
+ struct buffer_head *bh))
+{
+ int rc = 0;
+ u64 p_block, p_count;
+ int i, count, done = 0;
+
+ mlog_entry("(inode = %p, v_block = %llu, nr = %d, bhs = %p, "
+ "flags = %x, validate = %p)\n",
+ inode, (unsigned long long)v_block, nr, bhs, flags,
+ validate);
+
+ if (((v_block + nr - 1) << inode->i_sb->s_blocksize_bits) >=
+ i_size_read(inode)) {
+ BUG_ON(!(flags & OCFS2_BH_READAHEAD));
+ goto out;
+ }
+
+ while (done < nr) {
+ down_read(&OCFS2_I(inode)->ip_alloc_sem);
+ rc = ocfs2_extent_map_get_blocks(inode, v_block + done,
+ &p_block, &p_count, NULL);
+ up_read(&OCFS2_I(inode)->ip_alloc_sem);
+ if (rc) {
+ mlog_errno(rc);
+ break;
+ }
+
+ if (!p_block) {
+ rc = -EIO;
+ mlog(ML_ERROR,
+ "Inode #%llu contains a hole at offset %llu\n",
+ (unsigned long long)OCFS2_I(inode)->ip_blkno,
+ (unsigned long long)(v_block + done) <<
+ inode->i_sb->s_blocksize_bits);
+ break;
+ }
+
+ count = nr - done;
+ if (p_count < count)
+ count = p_count;
+
+ /*
+ * If the caller passed us bhs, they should have come
+ * from a previous readahead call to this function. Thus,
+ * they should have the right b_blocknr.
+ */
+ for (i = 0; i < count; i++) {
+ if (!bhs[done + i])
+ continue;
+ BUG_ON(bhs[done + i]->b_blocknr != (p_block + i));
+ }
+
+ rc = ocfs2_read_blocks(inode, p_block, count, bhs + done,
+ flags, validate);
+ if (rc) {
+ mlog_errno(rc);
+ break;
+ }
+ done += count;
+ }
+
+out:
+ mlog_exit(rc);
+ return rc;
+}
+
+
diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h
index 1c4aa8b06f34..b7dd9731b462 100644
--- a/fs/ocfs2/extent_map.h
+++ b/fs/ocfs2/extent_map.h
@@ -57,4 +57,28 @@ int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
u32 *p_cluster, u32 *num_clusters,
struct ocfs2_extent_list *el);
+int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
+ struct buffer_head *bhs[], int flags,
+ int (*validate)(struct super_block *sb,
+ struct buffer_head *bh));
+static inline int ocfs2_read_virt_block(struct inode *inode, u64 v_block,
+ struct buffer_head **bh,
+ int (*validate)(struct super_block *sb,
+ struct buffer_head *bh))
+{
+ int status = 0;
+
+ if (bh == NULL) {
+ printk("ocfs2: bh == NULL\n");
+ status = -EINVAL;
+ goto bail;
+ }
+
+ status = ocfs2_read_virt_blocks(inode, v_block, 1, bh, 0, validate);
+
+bail:
+ return status;
+}
+
+
#endif /* _EXTENT_MAP_H */
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index e2570a3bc2b2..9374d374a264 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -35,6 +35,7 @@
#include <linux/mount.h>
#include <linux/writeback.h>
#include <linux/falloc.h>
+#include <linux/quotaops.h>
#define MLOG_MASK_PREFIX ML_INODE
#include <cluster/masklog.h>
@@ -56,6 +57,8 @@
#include "suballoc.h"
#include "super.h"
#include "xattr.h"
+#include "acl.h"
+#include "quota.h"
#include "buffer_head_io.h"
@@ -303,9 +306,9 @@ bail:
return status;
}
-static int ocfs2_simple_size_update(struct inode *inode,
- struct buffer_head *di_bh,
- u64 new_i_size)
+int ocfs2_simple_size_update(struct inode *inode,
+ struct buffer_head *di_bh,
+ u64 new_i_size)
{
int ret;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
@@ -401,12 +404,9 @@ static int ocfs2_truncate_file(struct inode *inode,
(unsigned long long)OCFS2_I(inode)->ip_blkno,
(unsigned long long)new_i_size);
+ /* We trust di_bh because it comes from ocfs2_inode_lock(), which
+ * already validated it */
fe = (struct ocfs2_dinode *) di_bh->b_data;
- if (!OCFS2_IS_VALID_DINODE(fe)) {
- OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
- status = -EIO;
- goto bail;
- }
mlog_bug_on_msg(le64_to_cpu(fe->i_size) != i_size_read(inode),
"Inode %llu, inode i_size = %lld != di "
@@ -536,6 +536,7 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
enum ocfs2_alloc_restarted why;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_extent_tree et;
+ int did_quota = 0;
mlog_entry("(clusters_to_add = %u)\n", clusters_to_add);
@@ -545,18 +546,12 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
*/
BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb));
- status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh);
+ status = ocfs2_read_inode_block(inode, &bh);
if (status < 0) {
mlog_errno(status);
goto leave;
}
-
fe = (struct ocfs2_dinode *) bh->b_data;
- if (!OCFS2_IS_VALID_DINODE(fe)) {
- OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
- status = -EIO;
- goto leave;
- }
restart_all:
BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters);
@@ -585,6 +580,13 @@ restart_all:
}
restarted_transaction:
+ if (vfs_dq_alloc_space_nodirty(inode, ocfs2_clusters_to_bytes(osb->sb,
+ clusters_to_add))) {
+ status = -EDQUOT;
+ goto leave;
+ }
+ did_quota = 1;
+
/* reserve a write to the file entry early on - that we if we
* run out of credits in the allocation path, we can still
* update i_size. */
@@ -622,6 +624,10 @@ restarted_transaction:
spin_lock(&OCFS2_I(inode)->ip_lock);
clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters);
spin_unlock(&OCFS2_I(inode)->ip_lock);
+ /* Release unused quota reservation */
+ vfs_dq_free_space(inode,
+ ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
+ did_quota = 0;
if (why != RESTART_NONE && clusters_to_add) {
if (why == RESTART_META) {
@@ -654,6 +660,9 @@ restarted_transaction:
OCFS2_I(inode)->ip_clusters, (long long)i_size_read(inode));
leave:
+ if (status < 0 && did_quota)
+ vfs_dq_free_space(inode,
+ ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
if (handle) {
ocfs2_commit_trans(osb, handle);
handle = NULL;
@@ -885,6 +894,9 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
struct ocfs2_super *osb = OCFS2_SB(sb);
struct buffer_head *bh = NULL;
handle_t *handle = NULL;
+ int locked[MAXQUOTAS] = {0, 0};
+ int credits, qtype;
+ struct ocfs2_mem_dqinfo *oinfo;
mlog_entry("(0x%p, '%.*s')\n", dentry,
dentry->d_name.len, dentry->d_name.name);
@@ -955,11 +967,47 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
}
}
- handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
- if (IS_ERR(handle)) {
- status = PTR_ERR(handle);
- mlog_errno(status);
- goto bail_unlock;
+ if ((attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
+ (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
+ credits = OCFS2_INODE_UPDATE_CREDITS;
+ if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid
+ && OCFS2_HAS_RO_COMPAT_FEATURE(sb,
+ OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
+ oinfo = sb_dqinfo(sb, USRQUOTA)->dqi_priv;
+ status = ocfs2_lock_global_qf(oinfo, 1);
+ if (status < 0)
+ goto bail_unlock;
+ credits += ocfs2_calc_qinit_credits(sb, USRQUOTA) +
+ ocfs2_calc_qdel_credits(sb, USRQUOTA);
+ locked[USRQUOTA] = 1;
+ }
+ if (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid
+ && OCFS2_HAS_RO_COMPAT_FEATURE(sb,
+ OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
+ oinfo = sb_dqinfo(sb, GRPQUOTA)->dqi_priv;
+ status = ocfs2_lock_global_qf(oinfo, 1);
+ if (status < 0)
+ goto bail_unlock;
+ credits += ocfs2_calc_qinit_credits(sb, GRPQUOTA) +
+ ocfs2_calc_qdel_credits(sb, GRPQUOTA);
+ locked[GRPQUOTA] = 1;
+ }
+ handle = ocfs2_start_trans(osb, credits);
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ goto bail_unlock;
+ }
+ status = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0;
+ if (status < 0)
+ goto bail_commit;
+ } else {
+ handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ goto bail_unlock;
+ }
}
/*
@@ -982,6 +1030,12 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
bail_commit:
ocfs2_commit_trans(osb, handle);
bail_unlock:
+ for (qtype = 0; qtype < MAXQUOTAS; qtype++) {
+ if (!locked[qtype])
+ continue;
+ oinfo = sb_dqinfo(sb, qtype)->dqi_priv;
+ ocfs2_unlock_global_qf(oinfo, 1);
+ }
ocfs2_inode_unlock(inode, 1);
bail_unlock_rw:
if (size_change)
@@ -989,6 +1043,12 @@ bail_unlock_rw:
bail:
brelse(bh);
+ if (!status && attr->ia_valid & ATTR_MODE) {
+ status = ocfs2_acl_chmod(inode);
+ if (status < 0)
+ mlog_errno(status);
+ }
+
mlog_exit(status);
return status;
}
@@ -1035,7 +1095,7 @@ int ocfs2_permission(struct inode *inode, int mask)
goto out;
}
- ret = generic_permission(inode, mask, NULL);
+ ret = generic_permission(inode, mask, ocfs2_check_acl);
ocfs2_inode_unlock(inode, 0);
out:
@@ -1128,9 +1188,8 @@ static int ocfs2_write_remove_suid(struct inode *inode)
{
int ret;
struct buffer_head *bh = NULL;
- struct ocfs2_inode_info *oi = OCFS2_I(inode);
- ret = ocfs2_read_block(inode, oi->ip_blkno, &bh);
+ ret = ocfs2_read_inode_block(inode, &bh);
if (ret < 0) {
mlog_errno(ret);
goto out;
@@ -1156,8 +1215,7 @@ static int ocfs2_allocate_unwritten_extents(struct inode *inode,
struct buffer_head *di_bh = NULL;
if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
- ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno,
- &di_bh);
+ ret = ocfs2_read_inode_block(inode, &di_bh);
if (ret) {
mlog_errno(ret);
goto out;
@@ -1226,83 +1284,6 @@ out:
return ret;
}
-static int __ocfs2_remove_inode_range(struct inode *inode,
- struct buffer_head *di_bh,
- u32 cpos, u32 phys_cpos, u32 len,
- struct ocfs2_cached_dealloc_ctxt *dealloc)
-{
- int ret;
- u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
- struct inode *tl_inode = osb->osb_tl_inode;
- handle_t *handle;
- struct ocfs2_alloc_context *meta_ac = NULL;
- struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
- struct ocfs2_extent_tree et;
-
- ocfs2_init_dinode_extent_tree(&et, inode, di_bh);
-
- ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
- if (ret) {
- mlog_errno(ret);
- return ret;
- }
-
- mutex_lock(&tl_inode->i_mutex);
-
- if (ocfs2_truncate_log_needs_flush(osb)) {
- ret = __ocfs2_flush_truncate_log(osb);
- if (ret < 0) {
- mlog_errno(ret);
- goto out;
- }
- }
-
- handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- mlog_errno(ret);
- goto out;
- }
-
- ret = ocfs2_journal_access(handle, inode, di_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
- if (ret) {
- mlog_errno(ret);
- goto out;
- }
-
- ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac,
- dealloc);
- if (ret) {
- mlog_errno(ret);
- goto out_commit;
- }
-
- OCFS2_I(inode)->ip_clusters -= len;
- di->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters);
-
- ret = ocfs2_journal_dirty(handle, di_bh);
- if (ret) {
- mlog_errno(ret);
- goto out_commit;
- }
-
- ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len);
- if (ret)
- mlog_errno(ret);
-
-out_commit:
- ocfs2_commit_trans(osb, handle);
-out:
- mutex_unlock(&tl_inode->i_mutex);
-
- if (meta_ac)
- ocfs2_free_alloc_context(meta_ac);
-
- return ret;
-}
-
/*
* Truncate a byte range, avoiding pages within partial clusters. This
* preserves those pages for the zeroing code to write to.
@@ -1402,7 +1383,9 @@ static int ocfs2_remove_inode_range(struct inode *inode,
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_cached_dealloc_ctxt dealloc;
struct address_space *mapping = inode->i_mapping;
+ struct ocfs2_extent_tree et;
+ ocfs2_init_dinode_extent_tree(&et, inode, di_bh);
ocfs2_init_dealloc_ctxt(&dealloc);
if (byte_len == 0)
@@ -1458,9 +1441,9 @@ static int ocfs2_remove_inode_range(struct inode *inode,
/* Only do work for non-holes */
if (phys_cpos != 0) {
- ret = __ocfs2_remove_inode_range(inode, di_bh, cpos,
- phys_cpos, alloc_size,
- &dealloc);
+ ret = ocfs2_remove_btree_range(inode, &et, cpos,
+ phys_cpos, alloc_size,
+ &dealloc);
if (ret) {
mlog_errno(ret);
goto out;
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index e92382cbca5f..172f9fbc9fc7 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -51,6 +51,9 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb,
struct ocfs2_alloc_context *data_ac,
struct ocfs2_alloc_context *meta_ac,
enum ocfs2_alloc_restarted *reason_ret);
+int ocfs2_simple_size_update(struct inode *inode,
+ struct buffer_head *di_bh,
+ u64 new_i_size);
int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size,
u64 zero_to);
int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 7aa00d511874..288512c9dbc2 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -28,6 +28,7 @@
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/pagemap.h>
+#include <linux/quotaops.h>
#include <asm/byteorder.h>
@@ -214,12 +215,11 @@ static int ocfs2_init_locked_inode(struct inode *inode, void *opaque)
return 0;
}
-int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
- int create_ino)
+void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
+ int create_ino)
{
struct super_block *sb;
struct ocfs2_super *osb;
- int status = -EINVAL;
int use_plocks = 1;
mlog_entry("(0x%p, size:%llu)\n", inode,
@@ -232,25 +232,17 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
ocfs2_mount_local(osb) || !ocfs2_stack_supports_plocks())
use_plocks = 0;
- /* this means that read_inode cannot create a superblock inode
- * today. change if needed. */
- if (!OCFS2_IS_VALID_DINODE(fe) ||
- !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL))) {
- mlog(0, "Invalid dinode: i_ino=%lu, i_blkno=%llu, "
- "signature = %.*s, flags = 0x%x\n",
- inode->i_ino,
- (unsigned long long)le64_to_cpu(fe->i_blkno), 7,
- fe->i_signature, le32_to_cpu(fe->i_flags));
- goto bail;
- }
+ /*
+ * These have all been checked by ocfs2_read_inode_block() or set
+ * by ocfs2_mknod_locked(), so a failure is a code bug.
+ */
+ BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); /* This means that read_inode
+ cannot create a superblock
+ inode today. change if
+ that is needed. */
+ BUG_ON(!(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)));
+ BUG_ON(le32_to_cpu(fe->i_fs_generation) != osb->fs_generation);
- if (le32_to_cpu(fe->i_fs_generation) != osb->fs_generation) {
- mlog(ML_ERROR, "file entry generation does not match "
- "superblock! osb->fs_generation=%x, "
- "fe->i_fs_generation=%x\n",
- osb->fs_generation, le32_to_cpu(fe->i_fs_generation));
- goto bail;
- }
OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
@@ -284,14 +276,18 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
inode->i_nlink = le16_to_cpu(fe->i_links_count);
- if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL))
+ if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) {
OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE;
+ inode->i_flags |= S_NOQUOTA;
+ }
if (fe->i_flags & cpu_to_le32(OCFS2_LOCAL_ALLOC_FL)) {
OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP;
mlog(0, "local alloc inode: i_ino=%lu\n", inode->i_ino);
} else if (fe->i_flags & cpu_to_le32(OCFS2_BITMAP_FL)) {
OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP;
+ } else if (fe->i_flags & cpu_to_le32(OCFS2_QUOTA_FL)) {
+ inode->i_flags |= S_NOQUOTA;
} else if (fe->i_flags & cpu_to_le32(OCFS2_SUPER_BLOCK_FL)) {
mlog(0, "superblock inode: i_ino=%lu\n", inode->i_ino);
/* we can't actually hit this as read_inode can't
@@ -354,10 +350,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
ocfs2_set_inode_flags(inode);
- status = 0;
-bail:
- mlog_exit(status);
- return status;
+ mlog_exit_void();
}
static int ocfs2_read_locked_inode(struct inode *inode,
@@ -460,11 +453,14 @@ static int ocfs2_read_locked_inode(struct inode *inode,
}
}
- if (can_lock)
- status = ocfs2_read_blocks(inode, args->fi_blkno, 1, &bh,
- OCFS2_BH_IGNORE_CACHE);
- else
+ if (can_lock) {
+ status = ocfs2_read_inode_block_full(inode, &bh,
+ OCFS2_BH_IGNORE_CACHE);
+ } else {
status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh);
+ if (!status)
+ status = ocfs2_validate_inode_block(osb->sb, bh);
+ }
if (status < 0) {
mlog_errno(status);
goto bail;
@@ -472,12 +468,6 @@ static int ocfs2_read_locked_inode(struct inode *inode,
status = -EINVAL;
fe = (struct ocfs2_dinode *) bh->b_data;
- if (!OCFS2_IS_VALID_DINODE(fe)) {
- mlog(0, "Invalid dinode #%llu: signature = %.*s\n",
- (unsigned long long)args->fi_blkno, 7,
- fe->i_signature);
- goto bail;
- }
/*
* This is a code bug. Right now the caller needs to
@@ -491,10 +481,9 @@ static int ocfs2_read_locked_inode(struct inode *inode,
if (S_ISCHR(le16_to_cpu(fe->i_mode)) ||
S_ISBLK(le16_to_cpu(fe->i_mode)))
- inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
+ inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
- if (ocfs2_populate_inode(inode, fe, 0) < 0)
- goto bail;
+ ocfs2_populate_inode(inode, fe, 0);
BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno));
@@ -615,7 +604,8 @@ static int ocfs2_remove_inode(struct inode *inode,
goto bail;
}
- handle = ocfs2_start_trans(osb, OCFS2_DELETE_INODE_CREDITS);
+ handle = ocfs2_start_trans(osb, OCFS2_DELETE_INODE_CREDITS +
+ ocfs2_quota_trans_credits(inode->i_sb));
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
mlog_errno(status);
@@ -647,6 +637,7 @@ static int ocfs2_remove_inode(struct inode *inode,
}
ocfs2_remove_from_cache(inode, di_bh);
+ vfs_dq_free_inode(inode);
status = ocfs2_free_dinode(handle, inode_alloc_inode,
inode_alloc_bh, di);
@@ -929,7 +920,10 @@ void ocfs2_delete_inode(struct inode *inode)
mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino);
- if (is_bad_inode(inode)) {
+ /* When we fail in read_inode() we mark inode as bad. The second test
+ * catches the case when inode allocation fails before allocating
+ * a block for inode. */
+ if (is_bad_inode(inode) || !OCFS2_I(inode)->ip_blkno) {
mlog(0, "Skipping delete of bad inode\n");
goto bail;
}
@@ -1264,3 +1258,71 @@ void ocfs2_refresh_inode(struct inode *inode,
spin_unlock(&OCFS2_I(inode)->ip_lock);
}
+
+int ocfs2_validate_inode_block(struct super_block *sb,
+ struct buffer_head *bh)
+{
+ int rc = -EINVAL;
+ struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
+
+ mlog(0, "Validating dinode %llu\n",
+ (unsigned long long)bh->b_blocknr);
+
+ BUG_ON(!buffer_uptodate(bh));
+
+ if (!OCFS2_IS_VALID_DINODE(di)) {
+ ocfs2_error(sb, "Invalid dinode #%llu: signature = %.*s\n",
+ (unsigned long long)bh->b_blocknr, 7,
+ di->i_signature);
+ goto bail;
+ }
+
+ if (le64_to_cpu(di->i_blkno) != bh->b_blocknr) {
+ ocfs2_error(sb, "Invalid dinode #%llu: i_blkno is %llu\n",
+ (unsigned long long)bh->b_blocknr,
+ (unsigned long long)le64_to_cpu(di->i_blkno));
+ goto bail;
+ }
+
+ if (!(di->i_flags & cpu_to_le32(OCFS2_VALID_FL))) {
+ ocfs2_error(sb,
+ "Invalid dinode #%llu: OCFS2_VALID_FL not set\n",
+ (unsigned long long)bh->b_blocknr);
+ goto bail;
+ }
+
+ if (le32_to_cpu(di->i_fs_generation) !=
+ OCFS2_SB(sb)->fs_generation) {
+ ocfs2_error(sb,
+ "Invalid dinode #%llu: fs_generation is %u\n",
+ (unsigned long long)bh->b_blocknr,
+ le32_to_cpu(di->i_fs_generation));
+ goto bail;
+ }
+
+ rc = 0;
+
+bail:
+ return rc;
+}
+
+int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh,
+ int flags)
+{
+ int rc;
+ struct buffer_head *tmp = *bh;
+
+ rc = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, &tmp,
+ flags, ocfs2_validate_inode_block);
+
+ /* If ocfs2_read_blocks() got us a new bh, pass it up. */
+ if (!rc && !*bh)
+ *bh = tmp;
+
+ return rc;
+}
+
+int ocfs2_read_inode_block(struct inode *inode, struct buffer_head **bh)
+{
+ return ocfs2_read_inode_block_full(inode, bh, 0);
+}
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 2f37af9bcc4a..eb3c302b38d3 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -128,8 +128,8 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, unsigned flags,
int sysfile_type);
int ocfs2_inode_init_private(struct inode *inode);
int ocfs2_inode_revalidate(struct dentry *dentry);
-int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
- int create_ino);
+void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
+ int create_ino);
void ocfs2_read_inode(struct inode *inode);
void ocfs2_read_inode2(struct inode *inode, void *opaque);
ssize_t ocfs2_rw_direct(int rw, struct file *filp, char *buf,
@@ -142,6 +142,8 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
struct buffer_head *bh);
int ocfs2_aio_read(struct file *file, struct kiocb *req, struct iocb *iocb);
int ocfs2_aio_write(struct file *file, struct kiocb *req, struct iocb *iocb);
+struct buffer_head *ocfs2_bread(struct inode *inode,
+ int block, int *err, int reada);
void ocfs2_set_inode_flags(struct inode *inode);
void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi);
@@ -153,4 +155,16 @@ static inline blkcnt_t ocfs2_inode_sector_count(struct inode *inode)
return (blkcnt_t)(OCFS2_I(inode)->ip_clusters << c_to_s_bits);
}
+/* Validate that a bh contains a valid inode */
+int ocfs2_validate_inode_block(struct super_block *sb,
+ struct buffer_head *bh);
+/*
+ * Read an inode block into *bh. If *bh is NULL, a bh will be allocated.
+ * This is a cached read. The inode will be validated with
+ * ocfs2_validate_inode_block().
+ */
+int ocfs2_read_inode_block(struct inode *inode, struct buffer_head **bh);
+/* The same, but can be passed OCFS2_BH_* flags */
+int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh,
+ int flags);
#endif /* OCFS2_INODE_H */
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 99fe9d584f3c..302f1144a708 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -45,6 +45,7 @@
#include "slot_map.h"
#include "super.h"
#include "sysfile.h"
+#include "quota.h"
#include "buffer_head_io.h"
@@ -52,10 +53,10 @@ DEFINE_SPINLOCK(trans_inc_lock);
static int ocfs2_force_read_journal(struct inode *inode);
static int ocfs2_recover_node(struct ocfs2_super *osb,
- int node_num);
+ int node_num, int slot_num);
static int __ocfs2_recovery_thread(void *arg);
static int ocfs2_commit_cache(struct ocfs2_super *osb);
-static int ocfs2_wait_on_mount(struct ocfs2_super *osb);
+static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota);
static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
int dirty, int replayed);
static int ocfs2_trylock_journal(struct ocfs2_super *osb,
@@ -64,6 +65,17 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
int slot);
static int ocfs2_commit_thread(void *arg);
+static inline int ocfs2_wait_on_mount(struct ocfs2_super *osb)
+{
+ return __ocfs2_wait_on_mount(osb, 0);
+}
+
+static inline int ocfs2_wait_on_quotas(struct ocfs2_super *osb)
+{
+ return __ocfs2_wait_on_mount(osb, 1);
+}
+
+
/*
* The recovery_list is a simple linked list of node numbers to recover.
@@ -256,11 +268,9 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
BUG_ON(osb->journal->j_state == OCFS2_JOURNAL_FREE);
BUG_ON(max_buffs <= 0);
- /* JBD might support this, but our journalling code doesn't yet. */
- if (journal_current_handle()) {
- mlog(ML_ERROR, "Recursive transaction attempted!\n");
- BUG();
- }
+ /* Nested transaction? Just return the handle... */
+ if (journal_current_handle())
+ return jbd2_journal_start(journal, max_buffs);
down_read(&osb->journal->j_trans_barrier);
@@ -285,16 +295,18 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
int ocfs2_commit_trans(struct ocfs2_super *osb,
handle_t *handle)
{
- int ret;
+ int ret, nested;
struct ocfs2_journal *journal = osb->journal;
BUG_ON(!handle);
+ nested = handle->h_ref > 1;
ret = jbd2_journal_stop(handle);
if (ret < 0)
mlog_errno(ret);
- up_read(&journal->j_trans_barrier);
+ if (!nested)
+ up_read(&journal->j_trans_barrier);
return ret;
}
@@ -434,20 +446,6 @@ int ocfs2_journal_dirty(handle_t *handle,
return status;
}
-#ifdef CONFIG_OCFS2_COMPAT_JBD
-int ocfs2_journal_dirty_data(handle_t *handle,
- struct buffer_head *bh)
-{
- int err = journal_dirty_data(handle, bh);
- if (err)
- mlog_errno(err);
- /* TODO: When we can handle it, abort the handle and go RO on
- * error here. */
-
- return err;
-}
-#endif
-
#define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE)
void ocfs2_set_journal_params(struct ocfs2_super *osb)
@@ -587,17 +585,11 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
mlog_entry_void();
fe = (struct ocfs2_dinode *)bh->b_data;
- if (!OCFS2_IS_VALID_DINODE(fe)) {
- /* This is called from startup/shutdown which will
- * handle the errors in a specific manner, so no need
- * to call ocfs2_error() here. */
- mlog(ML_ERROR, "Journal dinode %llu has invalid "
- "signature: %.*s",
- (unsigned long long)le64_to_cpu(fe->i_blkno), 7,
- fe->i_signature);
- status = -EIO;
- goto out;
- }
+
+ /* The journal bh on the osb always comes from ocfs2_journal_init()
+ * and was validated there inside ocfs2_inode_lock_full(). It's a
+ * code bug if we mess it up. */
+ BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
flags = le32_to_cpu(fe->id1.journal1.ij_flags);
if (dirty)
@@ -613,7 +605,6 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
if (status < 0)
mlog_errno(status);
-out:
mlog_exit(status);
return status;
}
@@ -878,6 +869,7 @@ struct ocfs2_la_recovery_item {
int lri_slot;
struct ocfs2_dinode *lri_la_dinode;
struct ocfs2_dinode *lri_tl_dinode;
+ struct ocfs2_quota_recovery *lri_qrec;
};
/* Does the second half of the recovery process. By this point, the
@@ -898,6 +890,7 @@ void ocfs2_complete_recovery(struct work_struct *work)
struct ocfs2_super *osb = journal->j_osb;
struct ocfs2_dinode *la_dinode, *tl_dinode;
struct ocfs2_la_recovery_item *item, *n;
+ struct ocfs2_quota_recovery *qrec;
LIST_HEAD(tmp_la_list);
mlog_entry_void();
@@ -913,6 +906,8 @@ void ocfs2_complete_recovery(struct work_struct *work)
mlog(0, "Complete recovery for slot %d\n", item->lri_slot);
+ ocfs2_wait_on_quotas(osb);
+
la_dinode = item->lri_la_dinode;
if (la_dinode) {
mlog(0, "Clean up local alloc %llu\n",
@@ -943,6 +938,16 @@ void ocfs2_complete_recovery(struct work_struct *work)
if (ret < 0)
mlog_errno(ret);
+ qrec = item->lri_qrec;
+ if (qrec) {
+ mlog(0, "Recovering quota files");
+ ret = ocfs2_finish_quota_recovery(osb, qrec,
+ item->lri_slot);
+ if (ret < 0)
+ mlog_errno(ret);
+ /* Recovery info is already freed now */
+ }
+
kfree(item);
}
@@ -956,7 +961,8 @@ void ocfs2_complete_recovery(struct work_struct *work)
static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
int slot_num,
struct ocfs2_dinode *la_dinode,
- struct ocfs2_dinode *tl_dinode)
+ struct ocfs2_dinode *tl_dinode,
+ struct ocfs2_quota_recovery *qrec)
{
struct ocfs2_la_recovery_item *item;
@@ -971,6 +977,9 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
if (tl_dinode)
kfree(tl_dinode);
+ if (qrec)
+ ocfs2_free_quota_recovery(qrec);
+
mlog_errno(-ENOMEM);
return;
}
@@ -979,6 +988,7 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
item->lri_la_dinode = la_dinode;
item->lri_slot = slot_num;
item->lri_tl_dinode = tl_dinode;
+ item->lri_qrec = qrec;
spin_lock(&journal->j_lock);
list_add_tail(&item->lri_list, &journal->j_la_cleanups);
@@ -998,6 +1008,7 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
ocfs2_queue_recovery_completion(journal,
osb->slot_num,
osb->local_alloc_copy,
+ NULL,
NULL);
ocfs2_schedule_truncate_log_flush(osb, 0);
@@ -1006,11 +1017,26 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
}
}
+void ocfs2_complete_quota_recovery(struct ocfs2_super *osb)
+{
+ if (osb->quota_rec) {
+ ocfs2_queue_recovery_completion(osb->journal,
+ osb->slot_num,
+ NULL,
+ NULL,
+ osb->quota_rec);
+ osb->quota_rec = NULL;
+ }
+}
+
static int __ocfs2_recovery_thread(void *arg)
{
- int status, node_num;
+ int status, node_num, slot_num;
struct ocfs2_super *osb = arg;
struct ocfs2_recovery_map *rm = osb->recovery_map;
+ int *rm_quota = NULL;
+ int rm_quota_used = 0, i;
+ struct ocfs2_quota_recovery *qrec;
mlog_entry_void();
@@ -1019,6 +1045,11 @@ static int __ocfs2_recovery_thread(void *arg)
goto bail;
}
+ rm_quota = kzalloc(osb->max_slots * sizeof(int), GFP_NOFS);
+ if (!rm_quota) {
+ status = -ENOMEM;
+ goto bail;
+ }
restart:
status = ocfs2_super_lock(osb, 1);
if (status < 0) {
@@ -1032,8 +1063,28 @@ restart:
* clear it until ocfs2_recover_node() has succeeded. */
node_num = rm->rm_entries[0];
spin_unlock(&osb->osb_lock);
-
- status = ocfs2_recover_node(osb, node_num);
+ mlog(0, "checking node %d\n", node_num);
+ slot_num = ocfs2_node_num_to_slot(osb, node_num);
+ if (slot_num == -ENOENT) {
+ status = 0;
+ mlog(0, "no slot for this node, so no recovery"
+ "required.\n");
+ goto skip_recovery;
+ }
+ mlog(0, "node %d was using slot %d\n", node_num, slot_num);
+
+ /* It is a bit subtle with quota recovery. We cannot do it
+ * immediately because we have to obtain cluster locks from
+ * quota files and we also don't want to just skip it because
+ * then quota usage would be out of sync until some node takes
+ * the slot. So we remember which nodes need quota recovery
+ * and when everything else is done, we recover quotas. */
+ for (i = 0; i < rm_quota_used && rm_quota[i] != slot_num; i++);
+ if (i == rm_quota_used)
+ rm_quota[rm_quota_used++] = slot_num;
+
+ status = ocfs2_recover_node(osb, node_num, slot_num);
+skip_recovery:
if (!status) {
ocfs2_recovery_map_clear(osb, node_num);
} else {
@@ -1055,13 +1106,27 @@ restart:
if (status < 0)
mlog_errno(status);
+ /* Now it is right time to recover quotas... We have to do this under
+ * superblock lock so that noone can start using the slot (and crash)
+ * before we recover it */
+ for (i = 0; i < rm_quota_used; i++) {
+ qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]);
+ if (IS_ERR(qrec)) {
+ status = PTR_ERR(qrec);
+ mlog_errno(status);
+ continue;
+ }
+ ocfs2_queue_recovery_completion(osb->journal, rm_quota[i],
+ NULL, NULL, qrec);
+ }
+
ocfs2_super_unlock(osb, 1);
/* We always run recovery on our own orphan dir - the dead
* node(s) may have disallowd a previos inode delete. Re-processing
* is therefore required. */
ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,
- NULL);
+ NULL, NULL);
bail:
mutex_lock(&osb->recovery_lock);
@@ -1076,6 +1141,9 @@ bail:
mutex_unlock(&osb->recovery_lock);
+ if (rm_quota)
+ kfree(rm_quota);
+
mlog_exit(status);
/* no one is callint kthread_stop() for us so the kthread() api
* requires that we call do_exit(). And it isn't exported, but
@@ -1135,8 +1203,7 @@ static int ocfs2_read_journal_inode(struct ocfs2_super *osb,
}
SET_INODE_JOURNAL(inode);
- status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, bh,
- OCFS2_BH_IGNORE_CACHE);
+ status = ocfs2_read_inode_block_full(inode, bh, OCFS2_BH_IGNORE_CACHE);
if (status < 0) {
mlog_errno(status);
goto bail;
@@ -1304,31 +1371,19 @@ done:
* far less concerning.
*/
static int ocfs2_recover_node(struct ocfs2_super *osb,
- int node_num)
+ int node_num, int slot_num)
{
int status = 0;
- int slot_num;
struct ocfs2_dinode *la_copy = NULL;
struct ocfs2_dinode *tl_copy = NULL;
- mlog_entry("(node_num=%d, osb->node_num = %d)\n",
- node_num, osb->node_num);
-
- mlog(0, "checking node %d\n", node_num);
+ mlog_entry("(node_num=%d, slot_num=%d, osb->node_num = %d)\n",
+ node_num, slot_num, osb->node_num);
/* Should not ever be called to recover ourselves -- in that
* case we should've called ocfs2_journal_load instead. */
BUG_ON(osb->node_num == node_num);
- slot_num = ocfs2_node_num_to_slot(osb, node_num);
- if (slot_num == -ENOENT) {
- status = 0;
- mlog(0, "no slot for this node, so no recovery required.\n");
- goto done;
- }
-
- mlog(0, "node %d was using slot %d\n", node_num, slot_num);
-
status = ocfs2_replay_journal(osb, node_num, slot_num);
if (status < 0) {
if (status == -EBUSY) {
@@ -1364,7 +1419,7 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
/* This will kfree the memory pointed to by la_copy and tl_copy */
ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy,
- tl_copy);
+ tl_copy, NULL);
status = 0;
done:
@@ -1659,13 +1714,14 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
return ret;
}
-static int ocfs2_wait_on_mount(struct ocfs2_super *osb)
+static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota)
{
/* This check is good because ocfs2 will wait on our recovery
* thread before changing it to something other than MOUNTED
* or DISABLED. */
wait_event(osb->osb_mount_event,
- atomic_read(&osb->vol_state) == VOLUME_MOUNTED ||
+ (!quota && atomic_read(&osb->vol_state) == VOLUME_MOUNTED) ||
+ atomic_read(&osb->vol_state) == VOLUME_MOUNTED_QUOTAS ||
atomic_read(&osb->vol_state) == VOLUME_DISABLED);
/* If there's an error on mount, then we may never get to the
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index d4d14e9a3cea..37013bf9ce28 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -27,12 +27,7 @@
#define OCFS2_JOURNAL_H
#include <linux/fs.h>
-#ifndef CONFIG_OCFS2_COMPAT_JBD
-# include <linux/jbd2.h>
-#else
-# include <linux/jbd.h>
-# include "ocfs2_jbd_compat.h"
-#endif
+#include <linux/jbd2.h>
enum ocfs2_journal_state {
OCFS2_JOURNAL_FREE = 0,
@@ -173,6 +168,7 @@ void ocfs2_recovery_thread(struct ocfs2_super *osb,
int node_num);
int ocfs2_mark_dead_nodes(struct ocfs2_super *osb);
void ocfs2_complete_mount_recovery(struct ocfs2_super *osb);
+void ocfs2_complete_quota_recovery(struct ocfs2_super *osb);
static inline void ocfs2_start_checkpoint(struct ocfs2_super *osb)
{
@@ -273,10 +269,6 @@ int ocfs2_journal_access(handle_t *handle,
*/
int ocfs2_journal_dirty(handle_t *handle,
struct buffer_head *bh);
-#ifdef CONFIG_OCFS2_COMPAT_JBD
-int ocfs2_journal_dirty_data(handle_t *handle,
- struct buffer_head *bh);
-#endif
/*
* Credit Macros:
@@ -293,6 +285,37 @@ int ocfs2_journal_dirty_data(handle_t *handle,
/* extended attribute block update */
#define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1
+/* global quotafile inode update, data block */
+#define OCFS2_QINFO_WRITE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
+
+/*
+ * The two writes below can accidentally see global info dirty due
+ * to set_info() quotactl so make them prepared for the writes.
+ */
+/* quota data block, global info */
+/* Write to local quota file */
+#define OCFS2_QWRITE_CREDITS (OCFS2_QINFO_WRITE_CREDITS + 1)
+
+/* global quota data block, local quota data block, global quota inode,
+ * global quota info */
+#define OCFS2_QSYNC_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 3)
+
+static inline int ocfs2_quota_trans_credits(struct super_block *sb)
+{
+ int credits = 0;
+
+ if (OCFS2_HAS_RO_COMPAT_FEATURE(sb, OCFS2_FEATURE_RO_COMPAT_USRQUOTA))
+ credits += OCFS2_QWRITE_CREDITS;
+ if (OCFS2_HAS_RO_COMPAT_FEATURE(sb, OCFS2_FEATURE_RO_COMPAT_GRPQUOTA))
+ credits += OCFS2_QWRITE_CREDITS;
+ return credits;
+}
+
+/* Number of credits needed for removing quota structure from file */
+int ocfs2_calc_qdel_credits(struct super_block *sb, int type);
+/* Number of credits needed for initialization of new quota structure */
+int ocfs2_calc_qinit_credits(struct super_block *sb, int type);
+
/* group extend. inode update and last group update. */
#define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
@@ -303,8 +326,11 @@ int ocfs2_journal_dirty_data(handle_t *handle,
* prev. group desc. if we relink. */
#define OCFS2_SUBALLOC_ALLOC (3)
-#define OCFS2_INLINE_TO_EXTENTS_CREDITS (OCFS2_SUBALLOC_ALLOC \
- + OCFS2_INODE_UPDATE_CREDITS)
+static inline int ocfs2_inline_to_extents_credits(struct super_block *sb)
+{
+ return OCFS2_SUBALLOC_ALLOC + OCFS2_INODE_UPDATE_CREDITS +
+ ocfs2_quota_trans_credits(sb);
+}
/* dinode + group descriptor update. We don't relink on free yet. */
#define OCFS2_SUBALLOC_FREE (2)
@@ -313,16 +339,23 @@ int ocfs2_journal_dirty_data(handle_t *handle,
#define OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC (OCFS2_SUBALLOC_FREE \
+ OCFS2_TRUNCATE_LOG_UPDATE)
-#define OCFS2_REMOVE_EXTENT_CREDITS (OCFS2_TRUNCATE_LOG_UPDATE + OCFS2_INODE_UPDATE_CREDITS)
+static inline int ocfs2_remove_extent_credits(struct super_block *sb)
+{
+ return OCFS2_TRUNCATE_LOG_UPDATE + OCFS2_INODE_UPDATE_CREDITS +
+ ocfs2_quota_trans_credits(sb);
+}
/* data block for new dir/symlink, 2 for bitmap updates (bitmap fe +
* bitmap block for the new bit) */
#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2)
/* parent fe, parent block, new file entry, inode alloc fe, inode alloc
- * group descriptor + mkdir/symlink blocks */
-#define OCFS2_MKNOD_CREDITS (3 + OCFS2_SUBALLOC_ALLOC \
- + OCFS2_DIR_LINK_ADDITIONAL_CREDITS)
+ * group descriptor + mkdir/symlink blocks + quota update */
+static inline int ocfs2_mknod_credits(struct super_block *sb)
+{
+ return 3 + OCFS2_SUBALLOC_ALLOC + OCFS2_DIR_LINK_ADDITIONAL_CREDITS +
+ ocfs2_quota_trans_credits(sb);
+}
/* local alloc metadata change + main bitmap updates */
#define OCFS2_WINDOW_MOVE_CREDITS (OCFS2_INODE_UPDATE_CREDITS \
@@ -332,13 +365,21 @@ int ocfs2_journal_dirty_data(handle_t *handle,
* for the dinode, one for the new block. */
#define OCFS2_SIMPLE_DIR_EXTEND_CREDITS (2)
-/* file update (nlink, etc) + directory mtime/ctime + dir entry block */
-#define OCFS2_LINK_CREDITS (2*OCFS2_INODE_UPDATE_CREDITS + 1)
+/* file update (nlink, etc) + directory mtime/ctime + dir entry block + quota
+ * update on dir */
+static inline int ocfs2_link_credits(struct super_block *sb)
+{
+ return 2*OCFS2_INODE_UPDATE_CREDITS + 1 +
+ ocfs2_quota_trans_credits(sb);
+}
/* inode + dir inode (if we unlink a dir), + dir entry block + orphan
* dir inode link */
-#define OCFS2_UNLINK_CREDITS (2 * OCFS2_INODE_UPDATE_CREDITS + 1 \
- + OCFS2_LINK_CREDITS)
+static inline int ocfs2_unlink_credits(struct super_block *sb)
+{
+ /* The quota update from ocfs2_link_credits is unused here... */
+ return 2 * OCFS2_INODE_UPDATE_CREDITS + 1 + ocfs2_link_credits(sb);
+}
/* dinode + orphan dir dinode + inode alloc dinode + orphan dir entry +
* inode alloc group descriptor */
@@ -347,8 +388,10 @@ int ocfs2_journal_dirty_data(handle_t *handle,
/* dinode update, old dir dinode update, new dir dinode update, old
* dir dir entry, new dir dir entry, dir entry update for renaming
* directory + target unlink */
-#define OCFS2_RENAME_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 3 \
- + OCFS2_UNLINK_CREDITS)
+static inline int ocfs2_rename_credits(struct super_block *sb)
+{
+ return 3 * OCFS2_INODE_UPDATE_CREDITS + 3 + ocfs2_unlink_credits(sb);
+}
/* global bitmap dinode, group desc., relinked group,
* suballocator dinode, group desc., relinked group,
@@ -386,18 +429,19 @@ static inline int ocfs2_calc_extend_credits(struct super_block *sb,
* credit for the dinode there. */
extent_blocks = 1 + 1 + le16_to_cpu(root_el->l_tree_depth);
- return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks;
+ return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks +
+ ocfs2_quota_trans_credits(sb);
}
static inline int ocfs2_calc_symlink_credits(struct super_block *sb)
{
- int blocks = OCFS2_MKNOD_CREDITS;
+ int blocks = ocfs2_mknod_credits(sb);
/* links can be longer than one block so we may update many
* within our single allocated extent. */
blocks += ocfs2_clusters_to_blocks(sb, 1);
- return blocks;
+ return blocks + ocfs2_quota_trans_credits(sb);
}
static inline int ocfs2_calc_group_alloc_credits(struct super_block *sb,
@@ -434,6 +478,8 @@ static inline int ocfs2_calc_tree_trunc_credits(struct super_block *sb,
/* update to the truncate log. */
credits += OCFS2_TRUNCATE_LOG_UPDATE;
+ credits += ocfs2_quota_trans_credits(sb);
+
return credits;
}
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 687b28713c32..19cfb1b9ce09 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -248,8 +248,8 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
goto bail;
}
- status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1,
- &alloc_bh, OCFS2_BH_IGNORE_CACHE);
+ status = ocfs2_read_inode_block_full(inode, &alloc_bh,
+ OCFS2_BH_IGNORE_CACHE);
if (status < 0) {
mlog_errno(status);
goto bail;
@@ -459,8 +459,8 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
mutex_lock(&inode->i_mutex);
- status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1,
- &alloc_bh, OCFS2_BH_IGNORE_CACHE);
+ status = ocfs2_read_inode_block_full(inode, &alloc_bh,
+ OCFS2_BH_IGNORE_CACHE);
if (status < 0) {
mlog_errno(status);
goto bail;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index f4967e634ffd..6173807ba23b 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -40,6 +40,7 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/highmem.h>
+#include <linux/quotaops.h>
#define MLOG_MASK_PREFIX ML_NAMEI
#include <cluster/masklog.h>
@@ -61,17 +62,18 @@
#include "sysfile.h"
#include "uptodate.h"
#include "xattr.h"
+#include "acl.h"
#include "buffer_head_io.h"
static int ocfs2_mknod_locked(struct ocfs2_super *osb,
struct inode *dir,
- struct dentry *dentry, int mode,
+ struct inode *inode,
+ struct dentry *dentry,
dev_t dev,
struct buffer_head **new_fe_bh,
struct buffer_head *parent_fe_bh,
handle_t *handle,
- struct inode **ret_inode,
struct ocfs2_alloc_context *inode_ac);
static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
@@ -186,6 +188,35 @@ bail:
return ret;
}
+static struct inode *ocfs2_get_init_inode(struct inode *dir, int mode)
+{
+ struct inode *inode;
+
+ inode = new_inode(dir->i_sb);
+ if (!inode) {
+ mlog(ML_ERROR, "new_inode failed!\n");
+ return NULL;
+ }
+
+ /* populate as many fields early on as possible - many of
+ * these are used by the support functions here and in
+ * callers. */
+ if (S_ISDIR(mode))
+ inode->i_nlink = 2;
+ else
+ inode->i_nlink = 1;
+ inode->i_uid = current_fsuid();
+ if (dir->i_mode & S_ISGID) {
+ inode->i_gid = dir->i_gid;
+ if (S_ISDIR(mode))
+ mode |= S_ISGID;
+ } else
+ inode->i_gid = current_fsgid();
+ inode->i_mode = mode;
+ vfs_dq_init(inode);
+ return inode;
+}
+
static int ocfs2_mknod(struct inode *dir,
struct dentry *dentry,
int mode,
@@ -201,6 +232,13 @@ static int ocfs2_mknod(struct inode *dir,
struct inode *inode = NULL;
struct ocfs2_alloc_context *inode_ac = NULL;
struct ocfs2_alloc_context *data_ac = NULL;
+ struct ocfs2_alloc_context *xattr_ac = NULL;
+ int want_clusters = 0;
+ int xattr_credits = 0;
+ struct ocfs2_security_xattr_info si = {
+ .enable = 1,
+ };
+ int did_quota_inode = 0;
mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode,
(unsigned long)dev, dentry->d_name.len,
@@ -250,17 +288,46 @@ static int ocfs2_mknod(struct inode *dir,
goto leave;
}
- /* Reserve a cluster if creating an extent based directory. */
- if (S_ISDIR(mode) && !ocfs2_supports_inline_data(osb)) {
- status = ocfs2_reserve_clusters(osb, 1, &data_ac);
- if (status < 0) {
- if (status != -ENOSPC)
- mlog_errno(status);
+ inode = ocfs2_get_init_inode(dir, mode);
+ if (!inode) {
+ status = -ENOMEM;
+ mlog_errno(status);
+ goto leave;
+ }
+
+ /* get security xattr */
+ status = ocfs2_init_security_get(inode, dir, &si);
+ if (status) {
+ if (status == -EOPNOTSUPP)
+ si.enable = 0;
+ else {
+ mlog_errno(status);
goto leave;
}
}
- handle = ocfs2_start_trans(osb, OCFS2_MKNOD_CREDITS);
+ /* calculate meta data/clusters for setting security and acl xattr */
+ status = ocfs2_calc_xattr_init(dir, parent_fe_bh, mode,
+ &si, &want_clusters,
+ &xattr_credits, &xattr_ac);
+ if (status < 0) {
+ mlog_errno(status);
+ goto leave;
+ }
+
+ /* Reserve a cluster if creating an extent based directory. */
+ if (S_ISDIR(mode) && !ocfs2_supports_inline_data(osb))
+ want_clusters += 1;
+
+ status = ocfs2_reserve_clusters(osb, want_clusters, &data_ac);
+ if (status < 0) {
+ if (status != -ENOSPC)
+ mlog_errno(status);
+ goto leave;
+ }
+
+ handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb) +
+ xattr_credits);
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
handle = NULL;
@@ -268,10 +335,19 @@ static int ocfs2_mknod(struct inode *dir,
goto leave;
}
+ /* We don't use standard VFS wrapper because we don't want vfs_dq_init
+ * to be called. */
+ if (sb_any_quota_active(osb->sb) &&
+ osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) {
+ status = -EDQUOT;
+ goto leave;
+ }
+ did_quota_inode = 1;
+
/* do the real work now. */
- status = ocfs2_mknod_locked(osb, dir, dentry, mode, dev,
+ status = ocfs2_mknod_locked(osb, dir, inode, dentry, dev,
&new_fe_bh, parent_fe_bh, handle,
- &inode, inode_ac);
+ inode_ac);
if (status < 0) {
mlog_errno(status);
goto leave;
@@ -300,6 +376,22 @@ static int ocfs2_mknod(struct inode *dir,
inc_nlink(dir);
}
+ status = ocfs2_init_acl(handle, inode, dir, new_fe_bh, parent_fe_bh,
+ xattr_ac, data_ac);
+ if (status < 0) {
+ mlog_errno(status);
+ goto leave;
+ }
+
+ if (si.enable) {
+ status = ocfs2_init_security_set(handle, inode, new_fe_bh, &si,
+ xattr_ac, data_ac);
+ if (status < 0) {
+ mlog_errno(status);
+ goto leave;
+ }
+ }
+
status = ocfs2_add_entry(handle, dentry, inode,
OCFS2_I(inode)->ip_blkno, parent_fe_bh,
de_bh);
@@ -320,6 +412,8 @@ static int ocfs2_mknod(struct inode *dir,
d_instantiate(dentry, inode);
status = 0;
leave:
+ if (status < 0 && did_quota_inode)
+ vfs_dq_free_inode(inode);
if (handle)
ocfs2_commit_trans(osb, handle);
@@ -331,9 +425,13 @@ leave:
brelse(new_fe_bh);
brelse(de_bh);
brelse(parent_fe_bh);
+ kfree(si.name);
+ kfree(si.value);
- if ((status < 0) && inode)
+ if ((status < 0) && inode) {
+ clear_nlink(inode);
iput(inode);
+ }
if (inode_ac)
ocfs2_free_alloc_context(inode_ac);
@@ -341,6 +439,9 @@ leave:
if (data_ac)
ocfs2_free_alloc_context(data_ac);
+ if (xattr_ac)
+ ocfs2_free_alloc_context(xattr_ac);
+
mlog_exit(status);
return status;
@@ -348,12 +449,12 @@ leave:
static int ocfs2_mknod_locked(struct ocfs2_super *osb,
struct inode *dir,
- struct dentry *dentry, int mode,
+ struct inode *inode,
+ struct dentry *dentry,
dev_t dev,
struct buffer_head **new_fe_bh,
struct buffer_head *parent_fe_bh,
handle_t *handle,
- struct inode **ret_inode,
struct ocfs2_alloc_context *inode_ac)
{
int status = 0;
@@ -361,14 +462,12 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
struct ocfs2_extent_list *fel;
u64 fe_blkno = 0;
u16 suballoc_bit;
- struct inode *inode = NULL;
- mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode,
- (unsigned long)dev, dentry->d_name.len,
+ mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry,
+ inode->i_mode, (unsigned long)dev, dentry->d_name.len,
dentry->d_name.name);
*new_fe_bh = NULL;
- *ret_inode = NULL;
status = ocfs2_claim_new_inode(osb, handle, inode_ac, &suballoc_bit,
&fe_blkno);
@@ -377,23 +476,11 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
goto leave;
}
- inode = new_inode(dir->i_sb);
- if (!inode) {
- status = -ENOMEM;
- mlog(ML_ERROR, "new_inode failed!\n");
- goto leave;
- }
-
/* populate as many fields early on as possible - many of
* these are used by the support functions here and in
* callers. */
inode->i_ino = ino_from_blkno(osb->sb, fe_blkno);
OCFS2_I(inode)->ip_blkno = fe_blkno;
- if (S_ISDIR(mode))
- inode->i_nlink = 2;
- else
- inode->i_nlink = 1;
- inode->i_mode = mode;
spin_lock(&osb->osb_lock);
inode->i_generation = osb->s_next_generation++;
spin_unlock(&osb->osb_lock);
@@ -421,17 +508,11 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
fe->i_blkno = cpu_to_le64(fe_blkno);
fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot);
- fe->i_uid = cpu_to_le32(current->fsuid);
- if (dir->i_mode & S_ISGID) {
- fe->i_gid = cpu_to_le32(dir->i_gid);
- if (S_ISDIR(mode))
- mode |= S_ISGID;
- } else
- fe->i_gid = cpu_to_le32(current->fsgid);
- fe->i_mode = cpu_to_le16(mode);
- if (S_ISCHR(mode) || S_ISBLK(mode))
+ fe->i_uid = cpu_to_le32(inode->i_uid);
+ fe->i_gid = cpu_to_le32(inode->i_gid);
+ fe->i_mode = cpu_to_le16(inode->i_mode);
+ if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev));
-
fe->i_links_count = cpu_to_le16(inode->i_nlink);
fe->i_last_eb_blk = 0;
@@ -446,7 +527,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
/*
* If supported, directories start with inline data.
*/
- if (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) {
+ if (S_ISDIR(inode->i_mode) && ocfs2_supports_inline_data(osb)) {
u16 feat = le16_to_cpu(fe->i_dyn_features);
fe->i_dyn_features = cpu_to_le16(feat | OCFS2_INLINE_DATA_FL);
@@ -465,15 +546,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
goto leave;
}
- if (ocfs2_populate_inode(inode, fe, 1) < 0) {
- mlog(ML_ERROR, "populate inode failed! bh->b_blocknr=%llu, "
- "i_blkno=%llu, i_ino=%lu\n",
- (unsigned long long)(*new_fe_bh)->b_blocknr,
- (unsigned long long)le64_to_cpu(fe->i_blkno),
- inode->i_ino);
- BUG();
- }
-
+ ocfs2_populate_inode(inode, fe, 1);
ocfs2_inode_set_new(osb, inode);
if (!ocfs2_mount_local(osb)) {
status = ocfs2_create_new_inode_locks(inode);
@@ -484,17 +557,12 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
status = 0; /* error in ocfs2_create_new_inode_locks is not
* critical */
- *ret_inode = inode;
leave:
if (status < 0) {
if (*new_fe_bh) {
brelse(*new_fe_bh);
*new_fe_bh = NULL;
}
- if (inode) {
- clear_nlink(inode);
- iput(inode);
- }
}
mlog_exit(status);
@@ -588,7 +656,7 @@ static int ocfs2_link(struct dentry *old_dentry,
goto out_unlock_inode;
}
- handle = ocfs2_start_trans(osb, OCFS2_LINK_CREDITS);
+ handle = ocfs2_start_trans(osb, ocfs2_link_credits(osb->sb));
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
handle = NULL;
@@ -775,7 +843,7 @@ static int ocfs2_unlink(struct inode *dir,
}
}
- handle = ocfs2_start_trans(osb, OCFS2_UNLINK_CREDITS);
+ handle = ocfs2_start_trans(osb, ocfs2_unlink_credits(osb->sb));
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
handle = NULL;
@@ -1181,7 +1249,7 @@ static int ocfs2_rename(struct inode *old_dir,
}
}
- handle = ocfs2_start_trans(osb, OCFS2_RENAME_CREDITS);
+ handle = ocfs2_start_trans(osb, ocfs2_rename_credits(osb->sb));
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
handle = NULL;
@@ -1496,6 +1564,13 @@ static int ocfs2_symlink(struct inode *dir,
handle_t *handle = NULL;
struct ocfs2_alloc_context *inode_ac = NULL;
struct ocfs2_alloc_context *data_ac = NULL;
+ struct ocfs2_alloc_context *xattr_ac = NULL;
+ int want_clusters = 0;
+ int xattr_credits = 0;
+ struct ocfs2_security_xattr_info si = {
+ .enable = 1,
+ };
+ int did_quota = 0, did_quota_inode = 0;
mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir,
dentry, symname, dentry->d_name.len, dentry->d_name.name);
@@ -1542,17 +1617,46 @@ static int ocfs2_symlink(struct inode *dir,
goto bail;
}
- /* don't reserve bitmap space for fast symlinks. */
- if (l > ocfs2_fast_symlink_chars(sb)) {
- status = ocfs2_reserve_clusters(osb, 1, &data_ac);
+ inode = ocfs2_get_init_inode(dir, S_IFLNK | S_IRWXUGO);
+ if (!inode) {
+ status = -ENOMEM;
+ mlog_errno(status);
+ goto bail;
+ }
+
+ /* get security xattr */
+ status = ocfs2_init_security_get(inode, dir, &si);
+ if (status) {
+ if (status == -EOPNOTSUPP)
+ si.enable = 0;
+ else {
+ mlog_errno(status);
+ goto bail;
+ }
+ }
+
+ /* calculate meta data/clusters for setting security xattr */
+ if (si.enable) {
+ status = ocfs2_calc_security_init(dir, &si, &want_clusters,
+ &xattr_credits, &xattr_ac);
if (status < 0) {
- if (status != -ENOSPC)
- mlog_errno(status);
+ mlog_errno(status);
goto bail;
}
}
- handle = ocfs2_start_trans(osb, credits);
+ /* don't reserve bitmap space for fast symlinks. */
+ if (l > ocfs2_fast_symlink_chars(sb))
+ want_clusters += 1;
+
+ status = ocfs2_reserve_clusters(osb, want_clusters, &data_ac);
+ if (status < 0) {
+ if (status != -ENOSPC)
+ mlog_errno(status);
+ goto bail;
+ }
+
+ handle = ocfs2_start_trans(osb, credits + xattr_credits);
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
handle = NULL;
@@ -1560,10 +1664,18 @@ static int ocfs2_symlink(struct inode *dir,
goto bail;
}
- status = ocfs2_mknod_locked(osb, dir, dentry,
- S_IFLNK | S_IRWXUGO, 0,
- &new_fe_bh, parent_fe_bh, handle,
- &inode, inode_ac);
+ /* We don't use standard VFS wrapper because we don't want vfs_dq_init
+ * to be called. */
+ if (sb_any_quota_active(osb->sb) &&
+ osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) {
+ status = -EDQUOT;
+ goto bail;
+ }
+ did_quota_inode = 1;
+
+ status = ocfs2_mknod_locked(osb, dir, inode, dentry,
+ 0, &new_fe_bh, parent_fe_bh, handle,
+ inode_ac);
if (status < 0) {
mlog_errno(status);
goto bail;
@@ -1576,6 +1688,12 @@ static int ocfs2_symlink(struct inode *dir,
u32 offset = 0;
inode->i_op = &ocfs2_symlink_inode_operations;
+ if (vfs_dq_alloc_space_nodirty(inode,
+ ocfs2_clusters_to_bytes(osb->sb, 1))) {
+ status = -EDQUOT;
+ goto bail;
+ }
+ did_quota = 1;
status = ocfs2_add_inode_data(osb, inode, &offset, 1, 0,
new_fe_bh,
handle, data_ac, NULL,
@@ -1614,6 +1732,15 @@ static int ocfs2_symlink(struct inode *dir,
}
}
+ if (si.enable) {
+ status = ocfs2_init_security_set(handle, inode, new_fe_bh, &si,
+ xattr_ac, data_ac);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
+ }
+
status = ocfs2_add_entry(handle, dentry, inode,
le64_to_cpu(fe->i_blkno), parent_fe_bh,
de_bh);
@@ -1632,6 +1759,11 @@ static int ocfs2_symlink(struct inode *dir,
dentry->d_op = &ocfs2_dentry_ops;
d_instantiate(dentry, inode);
bail:
+ if (status < 0 && did_quota)
+ vfs_dq_free_space_nodirty(inode,
+ ocfs2_clusters_to_bytes(osb->sb, 1));
+ if (status < 0 && did_quota_inode)
+ vfs_dq_free_inode(inode);
if (handle)
ocfs2_commit_trans(osb, handle);
@@ -1640,12 +1772,18 @@ bail:
brelse(new_fe_bh);
brelse(parent_fe_bh);
brelse(de_bh);
+ kfree(si.name);
+ kfree(si.value);
if (inode_ac)
ocfs2_free_alloc_context(inode_ac);
if (data_ac)
ocfs2_free_alloc_context(data_ac);
- if ((status < 0) && inode)
+ if (xattr_ac)
+ ocfs2_free_alloc_context(xattr_ac);
+ if ((status < 0) && inode) {
+ clear_nlink(inode);
iput(inode);
+ }
mlog_exit(status);
@@ -1754,9 +1892,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino);
- status = ocfs2_read_block(orphan_dir_inode,
- OCFS2_I(orphan_dir_inode)->ip_blkno,
- &orphan_dir_bh);
+ status = ocfs2_read_inode_block(orphan_dir_inode, &orphan_dir_bh);
if (status < 0) {
mlog_errno(status);
goto leave;
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index fef7ece32376..5c777988042f 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -85,7 +85,7 @@ enum ocfs2_unlock_action {
};
/* ocfs2_lock_res->l_flags flags. */
-#define OCFS2_LOCK_ATTACHED (0x00000001) /* have we initialized
+#define OCFS2_LOCK_ATTACHED (0x00000001) /* we have initialized
* the lvb */
#define OCFS2_LOCK_BUSY (0x00000002) /* we are currently in
* dlm_lock */
@@ -161,6 +161,7 @@ enum ocfs2_vol_state
{
VOLUME_INIT = 0,
VOLUME_MOUNTED,
+ VOLUME_MOUNTED_QUOTAS,
VOLUME_DISMOUNTED,
VOLUME_DISABLED
};
@@ -195,6 +196,9 @@ enum ocfs2_mount_options
OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */
OCFS2_MOUNT_NOUSERXATTR = 1 << 6, /* No user xattr */
OCFS2_MOUNT_INODE64 = 1 << 7, /* Allow inode numbers > 2^32 */
+ OCFS2_MOUNT_POSIX_ACL = 1 << 8, /* POSIX access control lists */
+ OCFS2_MOUNT_USRQUOTA = 1 << 9, /* We support user quotas */
+ OCFS2_MOUNT_GRPQUOTA = 1 << 10, /* We support group quotas */
};
#define OCFS2_OSB_SOFT_RO 0x0001
@@ -205,6 +209,7 @@ enum ocfs2_mount_options
struct ocfs2_journal;
struct ocfs2_slot_info;
struct ocfs2_recovery_map;
+struct ocfs2_quota_recovery;
struct ocfs2_super
{
struct task_struct *commit_task;
@@ -286,10 +291,11 @@ struct ocfs2_super
char *local_alloc_debug_buf;
#endif
- /* Next two fields are for local node slot recovery during
+ /* Next three fields are for local node slot recovery during
* mount. */
int dirty;
struct ocfs2_dinode *local_alloc_copy;
+ struct ocfs2_quota_recovery *quota_rec;
struct ocfs2_alloc_stats alloc_stats;
char dev_str[20]; /* "major,minor" of the device */
@@ -443,35 +449,12 @@ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)
#define OCFS2_IS_VALID_DINODE(ptr) \
(!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE))
-#define OCFS2_RO_ON_INVALID_DINODE(__sb, __di) do { \
- typeof(__di) ____di = (__di); \
- ocfs2_error((__sb), \
- "Dinode # %llu has bad signature %.*s", \
- (unsigned long long)le64_to_cpu((____di)->i_blkno), 7, \
- (____di)->i_signature); \
-} while (0)
-
#define OCFS2_IS_VALID_EXTENT_BLOCK(ptr) \
(!strcmp((ptr)->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE))
-#define OCFS2_RO_ON_INVALID_EXTENT_BLOCK(__sb, __eb) do { \
- typeof(__eb) ____eb = (__eb); \
- ocfs2_error((__sb), \
- "Extent Block # %llu has bad signature %.*s", \
- (unsigned long long)le64_to_cpu((____eb)->h_blkno), 7, \
- (____eb)->h_signature); \
-} while (0)
-
#define OCFS2_IS_VALID_GROUP_DESC(ptr) \
(!strcmp((ptr)->bg_signature, OCFS2_GROUP_DESC_SIGNATURE))
-#define OCFS2_RO_ON_INVALID_GROUP_DESC(__sb, __gd) do { \
- typeof(__gd) ____gd = (__gd); \
- ocfs2_error((__sb), \
- "Group Descriptor # %llu has bad signature %.*s", \
- (unsigned long long)le64_to_cpu((____gd)->bg_blkno), 7, \
- (____gd)->bg_signature); \
-} while (0)
#define OCFS2_IS_VALID_XATTR_BLOCK(ptr) \
(!strcmp((ptr)->xb_signature, OCFS2_XATTR_BLOCK_SIGNATURE))
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 5f180cf7abbd..4ae3984366ee 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -93,7 +93,9 @@
| OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \
| OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \
| OCFS2_FEATURE_INCOMPAT_XATTR)
-#define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN
+#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \
+ | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \
+ | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
/*
* Heartbeat-only devices are missing journals and other files. The
@@ -157,6 +159,12 @@
*/
#define OCFS2_FEATURE_RO_COMPAT_UNWRITTEN 0x0001
+/*
+ * Maintain quota information for this filesystem
+ */
+#define OCFS2_FEATURE_RO_COMPAT_USRQUOTA 0x0002
+#define OCFS2_FEATURE_RO_COMPAT_GRPQUOTA 0x0004
+
/* The byte offset of the first backup block will be 1G.
* The following will be 4G, 16G, 64G, 256G and 1T.
*/
@@ -186,6 +194,7 @@
#define OCFS2_HEARTBEAT_FL (0x00000200) /* Heartbeat area */
#define OCFS2_CHAIN_FL (0x00000400) /* Chain allocator */
#define OCFS2_DEALLOC_FL (0x00000800) /* Truncate log */
+#define OCFS2_QUOTA_FL (0x00001000) /* Quota file */
/*
* Flags on ocfs2_dinode.i_dyn_features
@@ -323,13 +332,17 @@ enum {
#define OCFS2_FIRST_ONLINE_SYSTEM_INODE SLOT_MAP_SYSTEM_INODE
HEARTBEAT_SYSTEM_INODE,
GLOBAL_BITMAP_SYSTEM_INODE,
-#define OCFS2_LAST_GLOBAL_SYSTEM_INODE GLOBAL_BITMAP_SYSTEM_INODE
+ USER_QUOTA_SYSTEM_INODE,
+ GROUP_QUOTA_SYSTEM_INODE,
+#define OCFS2_LAST_GLOBAL_SYSTEM_INODE GROUP_QUOTA_SYSTEM_INODE
ORPHAN_DIR_SYSTEM_INODE,
EXTENT_ALLOC_SYSTEM_INODE,
INODE_ALLOC_SYSTEM_INODE,
JOURNAL_SYSTEM_INODE,
LOCAL_ALLOC_SYSTEM_INODE,
TRUNCATE_LOG_SYSTEM_INODE,
+ LOCAL_USER_QUOTA_SYSTEM_INODE,
+ LOCAL_GROUP_QUOTA_SYSTEM_INODE,
NUM_SYSTEM_INODES
};
@@ -343,6 +356,8 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = {
[SLOT_MAP_SYSTEM_INODE] = { "slot_map", 0, S_IFREG | 0644 },
[HEARTBEAT_SYSTEM_INODE] = { "heartbeat", OCFS2_HEARTBEAT_FL, S_IFREG | 0644 },
[GLOBAL_BITMAP_SYSTEM_INODE] = { "global_bitmap", 0, S_IFREG | 0644 },
+ [USER_QUOTA_SYSTEM_INODE] = { "aquota.user", OCFS2_QUOTA_FL, S_IFREG | 0644 },
+ [GROUP_QUOTA_SYSTEM_INODE] = { "aquota.group", OCFS2_QUOTA_FL, S_IFREG | 0644 },
/* Slot-specific system inodes (one copy per slot) */
[ORPHAN_DIR_SYSTEM_INODE] = { "orphan_dir:%04d", 0, S_IFDIR | 0755 },
@@ -350,7 +365,9 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = {
[INODE_ALLOC_SYSTEM_INODE] = { "inode_alloc:%04d", OCFS2_BITMAP_FL | OCFS2_CHAIN_FL, S_IFREG | 0644 },
[JOURNAL_SYSTEM_INODE] = { "journal:%04d", OCFS2_JOURNAL_FL, S_IFREG | 0644 },
[LOCAL_ALLOC_SYSTEM_INODE] = { "local_alloc:%04d", OCFS2_BITMAP_FL | OCFS2_LOCAL_ALLOC_FL, S_IFREG | 0644 },
- [TRUNCATE_LOG_SYSTEM_INODE] = { "truncate_log:%04d", OCFS2_DEALLOC_FL, S_IFREG | 0644 }
+ [TRUNCATE_LOG_SYSTEM_INODE] = { "truncate_log:%04d", OCFS2_DEALLOC_FL, S_IFREG | 0644 },
+ [LOCAL_USER_QUOTA_SYSTEM_INODE] = { "aquota.user:%04d", OCFS2_QUOTA_FL, S_IFREG | 0644 },
+ [LOCAL_GROUP_QUOTA_SYSTEM_INODE] = { "aquota.group:%04d", OCFS2_QUOTA_FL, S_IFREG | 0644 },
};
/* Parameter passed from mount.ocfs2 to module */
@@ -862,6 +879,109 @@ static inline int ocfs2_xattr_get_type(struct ocfs2_xattr_entry *xe)
return xe->xe_type & OCFS2_XATTR_TYPE_MASK;
}
+/*
+ * On disk structures for global quota file
+ */
+
+/* Magic numbers and known versions for global quota files */
+#define OCFS2_GLOBAL_QMAGICS {\
+ 0x0cf52470, /* USRQUOTA */ \
+ 0x0cf52471 /* GRPQUOTA */ \
+}
+
+#define OCFS2_GLOBAL_QVERSIONS {\
+ 0, \
+ 0, \
+}
+
+
+/* Each block of each quota file has a certain fixed number of bytes reserved
+ * for OCFS2 internal use at its end. OCFS2 can use it for things like
+ * checksums, etc. */
+#define OCFS2_QBLK_RESERVED_SPACE 8
+
+/* Generic header of all quota files */
+struct ocfs2_disk_dqheader {
+ __le32 dqh_magic; /* Magic number identifying file */
+ __le32 dqh_version; /* Quota format version */
+};
+
+#define OCFS2_GLOBAL_INFO_OFF (sizeof(struct ocfs2_disk_dqheader))
+
+/* Information header of global quota file (immediately follows the generic
+ * header) */
+struct ocfs2_global_disk_dqinfo {
+/*00*/ __le32 dqi_bgrace; /* Grace time for space softlimit excess */
+ __le32 dqi_igrace; /* Grace time for inode softlimit excess */
+ __le32 dqi_syncms; /* Time after which we sync local changes to
+ * global quota file */
+ __le32 dqi_blocks; /* Number of blocks in quota file */
+/*10*/ __le32 dqi_free_blk; /* First free block in quota file */
+ __le32 dqi_free_entry; /* First block with free dquot entry in quota
+ * file */
+};
+
+/* Structure with global user / group information. We reserve some space
+ * for future use. */
+struct ocfs2_global_disk_dqblk {
+/*00*/ __le32 dqb_id; /* ID the structure belongs to */
+ __le32 dqb_use_count; /* Number of nodes having reference to this structure */
+ __le64 dqb_ihardlimit; /* absolute limit on allocated inodes */
+/*10*/ __le64 dqb_isoftlimit; /* preferred inode limit */
+ __le64 dqb_curinodes; /* current # allocated inodes */
+/*20*/ __le64 dqb_bhardlimit; /* absolute limit on disk space */
+ __le64 dqb_bsoftlimit; /* preferred limit on disk space */
+/*30*/ __le64 dqb_curspace; /* current space occupied */
+ __le64 dqb_btime; /* time limit for excessive disk use */
+/*40*/ __le64 dqb_itime; /* time limit for excessive inode use */
+ __le64 dqb_pad1;
+/*50*/ __le64 dqb_pad2;
+};
+
+/*
+ * On-disk structures for local quota file
+ */
+
+/* Magic numbers and known versions for local quota files */
+#define OCFS2_LOCAL_QMAGICS {\
+ 0x0cf524c0, /* USRQUOTA */ \
+ 0x0cf524c1 /* GRPQUOTA */ \
+}
+
+#define OCFS2_LOCAL_QVERSIONS {\
+ 0, \
+ 0, \
+}
+
+/* Quota flags in dqinfo header */
+#define OLQF_CLEAN 0x0001 /* Quota file is empty (this should be after\
+ * quota has been cleanly turned off) */
+
+#define OCFS2_LOCAL_INFO_OFF (sizeof(struct ocfs2_disk_dqheader))
+
+/* Information header of local quota file (immediately follows the generic
+ * header) */
+struct ocfs2_local_disk_dqinfo {
+ __le32 dqi_flags; /* Flags for quota file */
+ __le32 dqi_chunks; /* Number of chunks of quota structures
+ * with a bitmap */
+ __le32 dqi_blocks; /* Number of blocks allocated for quota file */
+};
+
+/* Header of one chunk of a quota file */
+struct ocfs2_local_disk_chunk {
+ __le32 dqc_free; /* Number of free entries in the bitmap */
+ u8 dqc_bitmap[0]; /* Bitmap of entries in the corresponding
+ * chunk of quota file */
+};
+
+/* One entry in local quota file */
+struct ocfs2_local_disk_dqblk {
+/*00*/ __le64 dqb_id; /* id this quota applies to */
+ __le64 dqb_spacemod; /* Change in the amount of used space */
+/*10*/ __le64 dqb_inodemod; /* Change in the amount of used inodes */
+};
+
#ifdef __KERNEL__
static inline int ocfs2_fast_symlink_chars(struct super_block *sb)
{
diff --git a/fs/ocfs2/ocfs2_jbd_compat.h b/fs/ocfs2/ocfs2_jbd_compat.h
deleted file mode 100644
index b91c78f8f558..000000000000
--- a/fs/ocfs2/ocfs2_jbd_compat.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * ocfs2_jbd_compat.h
- *
- * Compatibility defines for JBD.
- *
- * Copyright (C) 2008 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- */
-
-#ifndef OCFS2_JBD_COMPAT_H
-#define OCFS2_JBD_COMPAT_H
-
-#ifndef CONFIG_OCFS2_COMPAT_JBD
-# error Should not have been included
-#endif
-
-struct jbd2_inode {
- unsigned int dummy;
-};
-
-#define JBD2_BARRIER JFS_BARRIER
-#define JBD2_DEFAULT_MAX_COMMIT_AGE JBD_DEFAULT_MAX_COMMIT_AGE
-
-#define jbd2_journal_ack_err journal_ack_err
-#define jbd2_journal_clear_err journal_clear_err
-#define jbd2_journal_destroy journal_destroy
-#define jbd2_journal_dirty_metadata journal_dirty_metadata
-#define jbd2_journal_errno journal_errno
-#define jbd2_journal_extend journal_extend
-#define jbd2_journal_flush journal_flush
-#define jbd2_journal_force_commit journal_force_commit
-#define jbd2_journal_get_write_access journal_get_write_access
-#define jbd2_journal_get_undo_access journal_get_undo_access
-#define jbd2_journal_init_inode journal_init_inode
-#define jbd2_journal_invalidatepage journal_invalidatepage
-#define jbd2_journal_load journal_load
-#define jbd2_journal_lock_updates journal_lock_updates
-#define jbd2_journal_restart journal_restart
-#define jbd2_journal_start journal_start
-#define jbd2_journal_start_commit journal_start_commit
-#define jbd2_journal_stop journal_stop
-#define jbd2_journal_try_to_free_buffers journal_try_to_free_buffers
-#define jbd2_journal_unlock_updates journal_unlock_updates
-#define jbd2_journal_wipe journal_wipe
-#define jbd2_log_wait_commit log_wait_commit
-
-static inline int jbd2_journal_file_inode(handle_t *handle,
- struct jbd2_inode *inode)
-{
- return 0;
-}
-
-static inline int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode,
- loff_t new_size)
-{
- return 0;
-}
-
-static inline void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode,
- struct inode *inode)
-{
- return;
-}
-
-static inline void jbd2_journal_release_jbd_inode(journal_t *journal,
- struct jbd2_inode *jinode)
-{
- return;
-}
-
-
-#endif /* OCFS2_JBD_COMPAT_H */
diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h
index 82c200f7a8f1..eb6f50c9ceca 100644
--- a/fs/ocfs2/ocfs2_lockid.h
+++ b/fs/ocfs2/ocfs2_lockid.h
@@ -46,6 +46,7 @@ enum ocfs2_lock_type {
OCFS2_LOCK_TYPE_DENTRY,
OCFS2_LOCK_TYPE_OPEN,
OCFS2_LOCK_TYPE_FLOCK,
+ OCFS2_LOCK_TYPE_QINFO,
OCFS2_NUM_LOCK_TYPES
};
@@ -77,6 +78,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type)
case OCFS2_LOCK_TYPE_FLOCK:
c = 'F';
break;
+ case OCFS2_LOCK_TYPE_QINFO:
+ c = 'Q';
+ break;
default:
c = '\0';
}
@@ -95,6 +99,7 @@ static char *ocfs2_lock_type_strings[] = {
[OCFS2_LOCK_TYPE_DENTRY] = "Dentry",
[OCFS2_LOCK_TYPE_OPEN] = "Open",
[OCFS2_LOCK_TYPE_FLOCK] = "Flock",
+ [OCFS2_LOCK_TYPE_QINFO] = "Quota",
};
static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type)
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
new file mode 100644
index 000000000000..abf694155d21
--- /dev/null
+++ b/fs/ocfs2/quota.h
@@ -0,0 +1,117 @@
+/*
+ * quota.h for OCFS2
+ *
+ * On disk quota structures for local and global quota file, in-memory
+ * structures.
+ *
+ */
+
+#ifndef _OCFS2_QUOTA_H
+#define _OCFS2_QUOTA_H
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/quota.h>
+#include <linux/list.h>
+#include <linux/dqblk_qtree.h>
+#include <linux/timer.h>
+
+#include "ocfs2.h"
+
+/* Common stuff */
+/* id number of quota format */
+#define QFMT_OCFS2 3
+
+/*
+ * In-memory structures
+ */
+struct ocfs2_dquot {
+ struct dquot dq_dquot; /* Generic VFS dquot */
+ loff_t dq_local_off; /* Offset in the local quota file */
+ struct ocfs2_quota_chunk *dq_chunk; /* Chunk dquot is in */
+ unsigned int dq_use_count; /* Number of nodes having reference to this entry in global quota file */
+ s64 dq_origspace; /* Last globally synced space usage */
+ s64 dq_originodes; /* Last globally synced inode usage */
+};
+
+/* Description of one chunk to recover in memory */
+struct ocfs2_recovery_chunk {
+ struct list_head rc_list; /* List of chunks */
+ int rc_chunk; /* Chunk number */
+ unsigned long *rc_bitmap; /* Bitmap of entries to recover */
+};
+
+struct ocfs2_quota_recovery {
+ struct list_head r_list[MAXQUOTAS]; /* List of chunks to recover */
+};
+
+/* In-memory structure with quota header information */
+struct ocfs2_mem_dqinfo {
+ unsigned int dqi_type; /* Quota type this structure describes */
+ unsigned int dqi_chunks; /* Number of chunks in local quota file */
+ unsigned int dqi_blocks; /* Number of blocks allocated for local quota file */
+ unsigned int dqi_syncms; /* How often should we sync with other nodes */
+ unsigned int dqi_syncjiff; /* Precomputed dqi_syncms in jiffies */
+ struct list_head dqi_chunk; /* List of chunks */
+ struct inode *dqi_gqinode; /* Global quota file inode */
+ struct ocfs2_lock_res dqi_gqlock; /* Lock protecting quota information structure */
+ struct buffer_head *dqi_gqi_bh; /* Buffer head with global quota file inode - set only if inode lock is obtained */
+ int dqi_gqi_count; /* Number of holders of dqi_gqi_bh */
+ struct buffer_head *dqi_lqi_bh; /* Buffer head with local quota file inode */
+ struct buffer_head *dqi_ibh; /* Buffer with information header */
+ struct qtree_mem_dqinfo dqi_gi; /* Info about global file */
+ struct timer_list dqi_sync_timer; /* Timer for syncing dquots */
+ struct ocfs2_quota_recovery *dqi_rec; /* Pointer to recovery
+ * information, in case we
+ * enable quotas on file
+ * needing it */
+};
+
+static inline struct ocfs2_dquot *OCFS2_DQUOT(struct dquot *dquot)
+{
+ return container_of(dquot, struct ocfs2_dquot, dq_dquot);
+}
+
+struct ocfs2_quota_chunk {
+ struct list_head qc_chunk; /* List of quotafile chunks */
+ int qc_num; /* Number of quota chunk */
+ struct buffer_head *qc_headerbh; /* Buffer head with chunk header */
+};
+
+extern struct kmem_cache *ocfs2_dquot_cachep;
+extern struct kmem_cache *ocfs2_qf_chunk_cachep;
+
+extern struct qtree_fmt_operations ocfs2_global_ops;
+
+struct ocfs2_quota_recovery *ocfs2_begin_quota_recovery(
+ struct ocfs2_super *osb, int slot_num);
+int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
+ struct ocfs2_quota_recovery *rec,
+ int slot_num);
+void ocfs2_free_quota_recovery(struct ocfs2_quota_recovery *rec);
+ssize_t ocfs2_quota_read(struct super_block *sb, int type, char *data,
+ size_t len, loff_t off);
+ssize_t ocfs2_quota_write(struct super_block *sb, int type,
+ const char *data, size_t len, loff_t off);
+int ocfs2_global_read_info(struct super_block *sb, int type);
+int ocfs2_global_write_info(struct super_block *sb, int type);
+int ocfs2_global_read_dquot(struct dquot *dquot);
+int __ocfs2_sync_dquot(struct dquot *dquot, int freeing);
+static inline int ocfs2_sync_dquot(struct dquot *dquot)
+{
+ return __ocfs2_sync_dquot(dquot, 0);
+}
+static inline int ocfs2_global_release_dquot(struct dquot *dquot)
+{
+ return __ocfs2_sync_dquot(dquot, 1);
+}
+
+int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex);
+void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex);
+int ocfs2_read_quota_block(struct inode *inode, u64 v_block,
+ struct buffer_head **bh);
+
+extern struct dquot_operations ocfs2_quota_operations;
+extern struct quota_format_type ocfs2_quota_format;
+
+#endif /* _OCFS2_QUOTA_H */
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
new file mode 100644
index 000000000000..4b38a2ef7aae
--- /dev/null
+++ b/fs/ocfs2/quota_global.c
@@ -0,0 +1,990 @@
+/*
+ * Implementation of operations over global quota file
+ */
+#include <linux/spinlock.h>
+#include <linux/fs.h>
+#include <linux/quota.h>
+#include <linux/quotaops.h>
+#include <linux/dqblk_qtree.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/writeback.h>
+
+#define MLOG_MASK_PREFIX ML_QUOTA
+#include <cluster/masklog.h>
+
+#include "ocfs2_fs.h"
+#include "ocfs2.h"
+#include "alloc.h"
+#include "inode.h"
+#include "journal.h"
+#include "file.h"
+#include "sysfile.h"
+#include "dlmglue.h"
+#include "uptodate.h"
+#include "quota.h"
+
+static void qsync_timer_fn(unsigned long oinfo_ptr);
+
+static void ocfs2_global_disk2memdqb(struct dquot *dquot, void *dp)
+{
+ struct ocfs2_global_disk_dqblk *d = dp;
+ struct mem_dqblk *m = &dquot->dq_dqb;
+
+ /* Update from disk only entries not set by the admin */
+ if (!test_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags)) {
+ m->dqb_ihardlimit = le64_to_cpu(d->dqb_ihardlimit);
+ m->dqb_isoftlimit = le64_to_cpu(d->dqb_isoftlimit);
+ }
+ if (!test_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags))
+ m->dqb_curinodes = le64_to_cpu(d->dqb_curinodes);
+ if (!test_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags)) {
+ m->dqb_bhardlimit = le64_to_cpu(d->dqb_bhardlimit);
+ m->dqb_bsoftlimit = le64_to_cpu(d->dqb_bsoftlimit);
+ }
+ if (!test_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags))
+ m->dqb_curspace = le64_to_cpu(d->dqb_curspace);
+ if (!test_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags))
+ m->dqb_btime = le64_to_cpu(d->dqb_btime);
+ if (!test_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags))
+ m->dqb_itime = le64_to_cpu(d->dqb_itime);
+ OCFS2_DQUOT(dquot)->dq_use_count = le32_to_cpu(d->dqb_use_count);
+}
+
+static void ocfs2_global_mem2diskdqb(void *dp, struct dquot *dquot)
+{
+ struct ocfs2_global_disk_dqblk *d = dp;
+ struct mem_dqblk *m = &dquot->dq_dqb;
+
+ d->dqb_id = cpu_to_le32(dquot->dq_id);
+ d->dqb_use_count = cpu_to_le32(OCFS2_DQUOT(dquot)->dq_use_count);
+ d->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit);
+ d->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit);
+ d->dqb_curinodes = cpu_to_le64(m->dqb_curinodes);
+ d->dqb_bhardlimit = cpu_to_le64(m->dqb_bhardlimit);
+ d->dqb_bsoftlimit = cpu_to_le64(m->dqb_bsoftlimit);
+ d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
+ d->dqb_btime = cpu_to_le64(m->dqb_btime);
+ d->dqb_itime = cpu_to_le64(m->dqb_itime);
+}
+
+static int ocfs2_global_is_id(void *dp, struct dquot *dquot)
+{
+ struct ocfs2_global_disk_dqblk *d = dp;
+ struct ocfs2_mem_dqinfo *oinfo =
+ sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
+
+ if (qtree_entry_unused(&oinfo->dqi_gi, dp))
+ return 0;
+ return le32_to_cpu(d->dqb_id) == dquot->dq_id;
+}
+
+struct qtree_fmt_operations ocfs2_global_ops = {
+ .mem2disk_dqblk = ocfs2_global_mem2diskdqb,
+ .disk2mem_dqblk = ocfs2_global_disk2memdqb,
+ .is_id = ocfs2_global_is_id,
+};
+
+int ocfs2_read_quota_block(struct inode *inode, u64 v_block,
+ struct buffer_head **bh)
+{
+ int rc = 0;
+ struct buffer_head *tmp = *bh;
+
+ rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, 0, NULL);
+ if (rc)
+ mlog_errno(rc);
+
+ /* If ocfs2_read_virt_blocks() got us a new bh, pass it up. */
+ if (!rc && !*bh)
+ *bh = tmp;
+
+ return rc;
+}
+
+static int ocfs2_get_quota_block(struct inode *inode, int block,
+ struct buffer_head **bh)
+{
+ u64 pblock, pcount;
+ int err;
+
+ down_read(&OCFS2_I(inode)->ip_alloc_sem);
+ err = ocfs2_extent_map_get_blocks(inode, block, &pblock, &pcount, NULL);
+ up_read(&OCFS2_I(inode)->ip_alloc_sem);
+ if (err) {
+ mlog_errno(err);
+ return err;
+ }
+ *bh = sb_getblk(inode->i_sb, pblock);
+ if (!*bh) {
+ err = -EIO;
+ mlog_errno(err);
+ }
+ return err;;
+}
+
+/* Read data from global quotafile - avoid pagecache and such because we cannot
+ * afford acquiring the locks... We use quota cluster lock to serialize
+ * operations. Caller is responsible for acquiring it. */
+ssize_t ocfs2_quota_read(struct super_block *sb, int type, char *data,
+ size_t len, loff_t off)
+{
+ struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
+ struct inode *gqinode = oinfo->dqi_gqinode;
+ loff_t i_size = i_size_read(gqinode);
+ int offset = off & (sb->s_blocksize - 1);
+ sector_t blk = off >> sb->s_blocksize_bits;
+ int err = 0;
+ struct buffer_head *bh;
+ size_t toread, tocopy;
+
+ if (off > i_size)
+ return 0;
+ if (off + len > i_size)
+ len = i_size - off;
+ toread = len;
+ while (toread > 0) {
+ tocopy = min((size_t)(sb->s_blocksize - offset), toread);
+ bh = NULL;
+ err = ocfs2_read_quota_block(gqinode, blk, &bh);
+ if (err) {
+ mlog_errno(err);
+ return err;
+ }
+ memcpy(data, bh->b_data + offset, tocopy);
+ brelse(bh);
+ offset = 0;
+ toread -= tocopy;
+ data += tocopy;
+ blk++;
+ }
+ return len;
+}
+
+/* Write to quotafile (we know the transaction is already started and has
+ * enough credits) */
+ssize_t ocfs2_quota_write(struct super_block *sb, int type,
+ const char *data, size_t len, loff_t off)
+{
+ struct mem_dqinfo *info = sb_dqinfo(sb, type);
+ struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
+ struct inode *gqinode = oinfo->dqi_gqinode;
+ int offset = off & (sb->s_blocksize - 1);
+ sector_t blk = off >> sb->s_blocksize_bits;
+ int err = 0, new = 0, ja_type;
+ struct buffer_head *bh = NULL;
+ handle_t *handle = journal_current_handle();
+
+ if (!handle) {
+ mlog(ML_ERROR, "Quota write (off=%llu, len=%llu) cancelled "
+ "because transaction was not started.\n",
+ (unsigned long long)off, (unsigned long long)len);
+ return -EIO;
+ }
+ if (len > sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE - offset) {
+ WARN_ON(1);
+ len = sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE - offset;
+ }
+
+ mutex_lock_nested(&gqinode->i_mutex, I_MUTEX_QUOTA);
+ if (gqinode->i_size < off + len) {
+ down_write(&OCFS2_I(gqinode)->ip_alloc_sem);
+ err = ocfs2_extend_no_holes(gqinode, off + len, off);
+ up_write(&OCFS2_I(gqinode)->ip_alloc_sem);
+ if (err < 0)
+ goto out;
+ err = ocfs2_simple_size_update(gqinode,
+ oinfo->dqi_gqi_bh,
+ off + len);
+ if (err < 0)
+ goto out;
+ new = 1;
+ }
+ /* Not rewriting whole block? */
+ if ((offset || len < sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE) &&
+ !new) {
+ err = ocfs2_read_quota_block(gqinode, blk, &bh);
+ ja_type = OCFS2_JOURNAL_ACCESS_WRITE;
+ } else {
+ err = ocfs2_get_quota_block(gqinode, blk, &bh);
+ ja_type = OCFS2_JOURNAL_ACCESS_CREATE;
+ }
+ if (err) {
+ mlog_errno(err);
+ return err;
+ }
+ lock_buffer(bh);
+ if (new)
+ memset(bh->b_data, 0, sb->s_blocksize);
+ memcpy(bh->b_data + offset, data, len);
+ flush_dcache_page(bh->b_page);
+ set_buffer_uptodate(bh);
+ unlock_buffer(bh);
+ ocfs2_set_buffer_uptodate(gqinode, bh);
+ err = ocfs2_journal_access(handle, gqinode, bh, ja_type);
+ if (err < 0) {
+ brelse(bh);
+ goto out;
+ }
+ err = ocfs2_journal_dirty(handle, bh);
+ brelse(bh);
+ if (err < 0)
+ goto out;
+out:
+ if (err) {
+ mutex_unlock(&gqinode->i_mutex);
+ mlog_errno(err);
+ return err;
+ }
+ gqinode->i_version++;
+ ocfs2_mark_inode_dirty(handle, gqinode, oinfo->dqi_gqi_bh);
+ mutex_unlock(&gqinode->i_mutex);
+ return len;
+}
+
+int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex)
+{
+ int status;
+ struct buffer_head *bh = NULL;
+
+ status = ocfs2_inode_lock(oinfo->dqi_gqinode, &bh, ex);
+ if (status < 0)
+ return status;
+ spin_lock(&dq_data_lock);
+ if (!oinfo->dqi_gqi_count++)
+ oinfo->dqi_gqi_bh = bh;
+ else
+ WARN_ON(bh != oinfo->dqi_gqi_bh);
+ spin_unlock(&dq_data_lock);
+ return 0;
+}
+
+void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex)
+{
+ ocfs2_inode_unlock(oinfo->dqi_gqinode, ex);
+ brelse(oinfo->dqi_gqi_bh);
+ spin_lock(&dq_data_lock);
+ if (!--oinfo->dqi_gqi_count)
+ oinfo->dqi_gqi_bh = NULL;
+ spin_unlock(&dq_data_lock);
+}
+
+/* Read information header from global quota file */
+int ocfs2_global_read_info(struct super_block *sb, int type)
+{
+ struct inode *gqinode = NULL;
+ unsigned int ino[MAXQUOTAS] = { USER_QUOTA_SYSTEM_INODE,
+ GROUP_QUOTA_SYSTEM_INODE };
+ struct ocfs2_global_disk_dqinfo dinfo;
+ struct mem_dqinfo *info = sb_dqinfo(sb, type);
+ struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
+ int status;
+
+ mlog_entry_void();
+
+ /* Read global header */
+ gqinode = ocfs2_get_system_file_inode(OCFS2_SB(sb), ino[type],
+ OCFS2_INVALID_SLOT);
+ if (!gqinode) {
+ mlog(ML_ERROR, "failed to get global quota inode (type=%d)\n",
+ type);
+ status = -EINVAL;
+ goto out_err;
+ }
+ oinfo->dqi_gi.dqi_sb = sb;
+ oinfo->dqi_gi.dqi_type = type;
+ ocfs2_qinfo_lock_res_init(&oinfo->dqi_gqlock, oinfo);
+ oinfo->dqi_gi.dqi_entry_size = sizeof(struct ocfs2_global_disk_dqblk);
+ oinfo->dqi_gi.dqi_ops = &ocfs2_global_ops;
+ oinfo->dqi_gqi_bh = NULL;
+ oinfo->dqi_gqi_count = 0;
+ oinfo->dqi_gqinode = gqinode;
+ status = ocfs2_lock_global_qf(oinfo, 0);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_err;
+ }
+ status = sb->s_op->quota_read(sb, type, (char *)&dinfo,
+ sizeof(struct ocfs2_global_disk_dqinfo),
+ OCFS2_GLOBAL_INFO_OFF);
+ ocfs2_unlock_global_qf(oinfo, 0);
+ if (status != sizeof(struct ocfs2_global_disk_dqinfo)) {
+ mlog(ML_ERROR, "Cannot read global quota info (%d).\n",
+ status);
+ if (status >= 0)
+ status = -EIO;
+ mlog_errno(status);
+ goto out_err;
+ }
+ info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
+ info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
+ oinfo->dqi_syncms = le32_to_cpu(dinfo.dqi_syncms);
+ oinfo->dqi_syncjiff = msecs_to_jiffies(oinfo->dqi_syncms);
+ oinfo->dqi_gi.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
+ oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
+ oinfo->dqi_gi.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
+ oinfo->dqi_gi.dqi_blocksize_bits = sb->s_blocksize_bits;
+ oinfo->dqi_gi.dqi_usable_bs = sb->s_blocksize -
+ OCFS2_QBLK_RESERVED_SPACE;
+ oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi);
+ setup_timer(&oinfo->dqi_sync_timer, qsync_timer_fn,
+ (unsigned long)oinfo);
+ mod_timer(&oinfo->dqi_sync_timer,
+ round_jiffies(jiffies + oinfo->dqi_syncjiff));
+out_err:
+ mlog_exit(status);
+ return status;
+}
+
+/* Write information to global quota file. Expects exlusive lock on quota
+ * file inode and quota info */
+static int __ocfs2_global_write_info(struct super_block *sb, int type)
+{
+ struct mem_dqinfo *info = sb_dqinfo(sb, type);
+ struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
+ struct ocfs2_global_disk_dqinfo dinfo;
+ ssize_t size;
+
+ spin_lock(&dq_data_lock);
+ info->dqi_flags &= ~DQF_INFO_DIRTY;
+ dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace);
+ dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace);
+ spin_unlock(&dq_data_lock);
+ dinfo.dqi_syncms = cpu_to_le32(oinfo->dqi_syncms);
+ dinfo.dqi_blocks = cpu_to_le32(oinfo->dqi_gi.dqi_blocks);
+ dinfo.dqi_free_blk = cpu_to_le32(oinfo->dqi_gi.dqi_free_blk);
+ dinfo.dqi_free_entry = cpu_to_le32(oinfo->dqi_gi.dqi_free_entry);
+ size = sb->s_op->quota_write(sb, type, (char *)&dinfo,
+ sizeof(struct ocfs2_global_disk_dqinfo),
+ OCFS2_GLOBAL_INFO_OFF);
+ if (size != sizeof(struct ocfs2_global_disk_dqinfo)) {
+ mlog(ML_ERROR, "Cannot write global quota info structure\n");
+ if (size >= 0)
+ size = -EIO;
+ return size;
+ }
+ return 0;
+}
+
+int ocfs2_global_write_info(struct super_block *sb, int type)
+{
+ int err;
+ struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv;
+
+ err = ocfs2_qinfo_lock(info, 1);
+ if (err < 0)
+ return err;
+ err = __ocfs2_global_write_info(sb, type);
+ ocfs2_qinfo_unlock(info, 1);
+ return err;
+}
+
+/* Read in information from global quota file and acquire a reference to it.
+ * dquot_acquire() has already started the transaction and locked quota file */
+int ocfs2_global_read_dquot(struct dquot *dquot)
+{
+ int err, err2, ex = 0;
+ struct ocfs2_mem_dqinfo *info =
+ sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
+
+ err = ocfs2_qinfo_lock(info, 0);
+ if (err < 0)
+ goto out;
+ err = qtree_read_dquot(&info->dqi_gi, dquot);
+ if (err < 0)
+ goto out_qlock;
+ OCFS2_DQUOT(dquot)->dq_use_count++;
+ OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace;
+ OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes;
+ if (!dquot->dq_off) { /* No real quota entry? */
+ /* Upgrade to exclusive lock for allocation */
+ err = ocfs2_qinfo_lock(info, 1);
+ if (err < 0)
+ goto out_qlock;
+ ex = 1;
+ }
+ err = qtree_write_dquot(&info->dqi_gi, dquot);
+ if (ex && info_dirty(sb_dqinfo(dquot->dq_sb, dquot->dq_type))) {
+ err2 = __ocfs2_global_write_info(dquot->dq_sb, dquot->dq_type);
+ if (!err)
+ err = err2;
+ }
+out_qlock:
+ if (ex)
+ ocfs2_qinfo_unlock(info, 1);
+ ocfs2_qinfo_unlock(info, 0);
+out:
+ if (err < 0)
+ mlog_errno(err);
+ return err;
+}
+
+/* Sync local information about quota modifications with global quota file.
+ * Caller must have started the transaction and obtained exclusive lock for
+ * global quota file inode */
+int __ocfs2_sync_dquot(struct dquot *dquot, int freeing)
+{
+ int err, err2;
+ struct super_block *sb = dquot->dq_sb;
+ int type = dquot->dq_type;
+ struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv;
+ struct ocfs2_global_disk_dqblk dqblk;
+ s64 spacechange, inodechange;
+ time_t olditime, oldbtime;
+
+ err = sb->s_op->quota_read(sb, type, (char *)&dqblk,
+ sizeof(struct ocfs2_global_disk_dqblk),
+ dquot->dq_off);
+ if (err != sizeof(struct ocfs2_global_disk_dqblk)) {
+ if (err >= 0) {
+ mlog(ML_ERROR, "Short read from global quota file "
+ "(%u read)\n", err);
+ err = -EIO;
+ }
+ goto out;
+ }
+
+ /* Update space and inode usage. Get also other information from
+ * global quota file so that we don't overwrite any changes there.
+ * We are */
+ spin_lock(&dq_data_lock);
+ spacechange = dquot->dq_dqb.dqb_curspace -
+ OCFS2_DQUOT(dquot)->dq_origspace;
+ inodechange = dquot->dq_dqb.dqb_curinodes -
+ OCFS2_DQUOT(dquot)->dq_originodes;
+ olditime = dquot->dq_dqb.dqb_itime;
+ oldbtime = dquot->dq_dqb.dqb_btime;
+ ocfs2_global_disk2memdqb(dquot, &dqblk);
+ mlog(0, "Syncing global dquot %u space %lld+%lld, inodes %lld+%lld\n",
+ dquot->dq_id, dquot->dq_dqb.dqb_curspace, (long long)spacechange,
+ dquot->dq_dqb.dqb_curinodes, (long long)inodechange);
+ if (!test_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags))
+ dquot->dq_dqb.dqb_curspace += spacechange;
+ if (!test_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags))
+ dquot->dq_dqb.dqb_curinodes += inodechange;
+ /* Set properly space grace time... */
+ if (dquot->dq_dqb.dqb_bsoftlimit &&
+ dquot->dq_dqb.dqb_curspace > dquot->dq_dqb.dqb_bsoftlimit) {
+ if (!test_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags) &&
+ oldbtime > 0) {
+ if (dquot->dq_dqb.dqb_btime > 0)
+ dquot->dq_dqb.dqb_btime =
+ min(dquot->dq_dqb.dqb_btime, oldbtime);
+ else
+ dquot->dq_dqb.dqb_btime = oldbtime;
+ }
+ } else {
+ dquot->dq_dqb.dqb_btime = 0;
+ clear_bit(DQ_BLKS_B, &dquot->dq_flags);
+ }
+ /* Set properly inode grace time... */
+ if (dquot->dq_dqb.dqb_isoftlimit &&
+ dquot->dq_dqb.dqb_curinodes > dquot->dq_dqb.dqb_isoftlimit) {
+ if (!test_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags) &&
+ olditime > 0) {
+ if (dquot->dq_dqb.dqb_itime > 0)
+ dquot->dq_dqb.dqb_itime =
+ min(dquot->dq_dqb.dqb_itime, olditime);
+ else
+ dquot->dq_dqb.dqb_itime = olditime;
+ }
+ } else {
+ dquot->dq_dqb.dqb_itime = 0;
+ clear_bit(DQ_INODES_B, &dquot->dq_flags);
+ }
+ /* All information is properly updated, clear the flags */
+ __clear_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags);
+ __clear_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags);
+ __clear_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags);
+ __clear_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags);
+ __clear_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags);
+ __clear_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags);
+ OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace;
+ OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes;
+ spin_unlock(&dq_data_lock);
+ err = ocfs2_qinfo_lock(info, freeing);
+ if (err < 0) {
+ mlog(ML_ERROR, "Failed to lock quota info, loosing quota write"
+ " (type=%d, id=%u)\n", dquot->dq_type,
+ (unsigned)dquot->dq_id);
+ goto out;
+ }
+ if (freeing)
+ OCFS2_DQUOT(dquot)->dq_use_count--;
+ err = qtree_write_dquot(&info->dqi_gi, dquot);
+ if (err < 0)
+ goto out_qlock;
+ if (freeing && !OCFS2_DQUOT(dquot)->dq_use_count) {
+ err = qtree_release_dquot(&info->dqi_gi, dquot);
+ if (info_dirty(sb_dqinfo(sb, type))) {
+ err2 = __ocfs2_global_write_info(sb, type);
+ if (!err)
+ err = err2;
+ }
+ }
+out_qlock:
+ ocfs2_qinfo_unlock(info, freeing);
+out:
+ if (err < 0)
+ mlog_errno(err);
+ return err;
+}
+
+/*
+ * Functions for periodic syncing of dquots with global file
+ */
+static int ocfs2_sync_dquot_helper(struct dquot *dquot, unsigned long type)
+{
+ handle_t *handle;
+ struct super_block *sb = dquot->dq_sb;
+ struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
+ struct ocfs2_super *osb = OCFS2_SB(sb);
+ int status = 0;
+
+ mlog_entry("id=%u qtype=%u type=%lu device=%s\n", dquot->dq_id,
+ dquot->dq_type, type, sb->s_id);
+ if (type != dquot->dq_type)
+ goto out;
+ status = ocfs2_lock_global_qf(oinfo, 1);
+ if (status < 0)
+ goto out;
+
+ handle = ocfs2_start_trans(osb, OCFS2_QSYNC_CREDITS);
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ goto out_ilock;
+ }
+ mutex_lock(&sb_dqopt(sb)->dqio_mutex);
+ status = ocfs2_sync_dquot(dquot);
+ mutex_unlock(&sb_dqopt(sb)->dqio_mutex);
+ if (status < 0)
+ mlog_errno(status);
+ /* We have to write local structure as well... */
+ dquot_mark_dquot_dirty(dquot);
+ status = dquot_commit(dquot);
+ if (status < 0)
+ mlog_errno(status);
+ ocfs2_commit_trans(osb, handle);
+out_ilock:
+ ocfs2_unlock_global_qf(oinfo, 1);
+out:
+ mlog_exit(status);
+ return status;
+}
+
+static void ocfs2_do_qsync(unsigned long oinfo_ptr)
+{
+ struct ocfs2_mem_dqinfo *oinfo = (struct ocfs2_mem_dqinfo *)oinfo_ptr;
+ struct super_block *sb = oinfo->dqi_gqinode->i_sb;
+
+ dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type);
+}
+
+static void qsync_timer_fn(unsigned long oinfo_ptr)
+{
+ struct ocfs2_mem_dqinfo *oinfo = (struct ocfs2_mem_dqinfo *)oinfo_ptr;
+
+ pdflush_operation(ocfs2_do_qsync, oinfo_ptr);
+ mod_timer(&oinfo->dqi_sync_timer,
+ round_jiffies(jiffies + oinfo->dqi_syncjiff));
+}
+
+/*
+ * Wrappers for generic quota functions
+ */
+
+static int ocfs2_write_dquot(struct dquot *dquot)
+{
+ handle_t *handle;
+ struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb);
+ int status = 0;
+
+ mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type);
+
+ handle = ocfs2_start_trans(osb, OCFS2_QWRITE_CREDITS);
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ goto out;
+ }
+ status = dquot_commit(dquot);
+ ocfs2_commit_trans(osb, handle);
+out:
+ mlog_exit(status);
+ return status;
+}
+
+int ocfs2_calc_qdel_credits(struct super_block *sb, int type)
+{
+ struct ocfs2_mem_dqinfo *oinfo;
+ int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
+ OCFS2_FEATURE_RO_COMPAT_GRPQUOTA };
+
+ if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type]))
+ return 0;
+
+ oinfo = sb_dqinfo(sb, type)->dqi_priv;
+ /* We modify tree, leaf block, global info, local chunk header,
+ * global and local inode */
+ return oinfo->dqi_gi.dqi_qtree_depth + 2 + 1 +
+ 2 * OCFS2_INODE_UPDATE_CREDITS;
+}
+
+static int ocfs2_release_dquot(struct dquot *dquot)
+{
+ handle_t *handle;
+ struct ocfs2_mem_dqinfo *oinfo =
+ sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
+ struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb);
+ int status = 0;
+
+ mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type);
+
+ status = ocfs2_lock_global_qf(oinfo, 1);
+ if (status < 0)
+ goto out;
+ handle = ocfs2_start_trans(osb,
+ ocfs2_calc_qdel_credits(dquot->dq_sb, dquot->dq_type));
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ goto out_ilock;
+ }
+ status = dquot_release(dquot);
+ ocfs2_commit_trans(osb, handle);
+out_ilock:
+ ocfs2_unlock_global_qf(oinfo, 1);
+out:
+ mlog_exit(status);
+ return status;
+}
+
+int ocfs2_calc_qinit_credits(struct super_block *sb, int type)
+{
+ struct ocfs2_mem_dqinfo *oinfo;
+ int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
+ OCFS2_FEATURE_RO_COMPAT_GRPQUOTA };
+ struct ocfs2_dinode *lfe, *gfe;
+
+ if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type]))
+ return 0;
+
+ oinfo = sb_dqinfo(sb, type)->dqi_priv;
+ gfe = (struct ocfs2_dinode *)oinfo->dqi_gqi_bh->b_data;
+ lfe = (struct ocfs2_dinode *)oinfo->dqi_lqi_bh->b_data;
+ /* We can extend local file + global file. In local file we
+ * can modify info, chunk header block and dquot block. In
+ * global file we can modify info, tree and leaf block */
+ return ocfs2_calc_extend_credits(sb, &lfe->id2.i_list, 0) +
+ ocfs2_calc_extend_credits(sb, &gfe->id2.i_list, 0) +
+ 3 + oinfo->dqi_gi.dqi_qtree_depth + 2;
+}
+
+static int ocfs2_acquire_dquot(struct dquot *dquot)
+{
+ handle_t *handle;
+ struct ocfs2_mem_dqinfo *oinfo =
+ sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
+ struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb);
+ int status = 0;
+
+ mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type);
+ /* We need an exclusive lock, because we're going to update use count
+ * and instantiate possibly new dquot structure */
+ status = ocfs2_lock_global_qf(oinfo, 1);
+ if (status < 0)
+ goto out;
+ handle = ocfs2_start_trans(osb,
+ ocfs2_calc_qinit_credits(dquot->dq_sb, dquot->dq_type));
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ goto out_ilock;
+ }
+ status = dquot_acquire(dquot);
+ ocfs2_commit_trans(osb, handle);
+out_ilock:
+ ocfs2_unlock_global_qf(oinfo, 1);
+out:
+ mlog_exit(status);
+ return status;
+}
+
+static int ocfs2_mark_dquot_dirty(struct dquot *dquot)
+{
+ unsigned long mask = (1 << (DQ_LASTSET_B + QIF_ILIMITS_B)) |
+ (1 << (DQ_LASTSET_B + QIF_BLIMITS_B)) |
+ (1 << (DQ_LASTSET_B + QIF_INODES_B)) |
+ (1 << (DQ_LASTSET_B + QIF_SPACE_B)) |
+ (1 << (DQ_LASTSET_B + QIF_BTIME_B)) |
+ (1 << (DQ_LASTSET_B + QIF_ITIME_B));
+ int sync = 0;
+ int status;
+ struct super_block *sb = dquot->dq_sb;
+ int type = dquot->dq_type;
+ struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
+ handle_t *handle;
+ struct ocfs2_super *osb = OCFS2_SB(sb);
+
+ mlog_entry("id=%u, type=%d", dquot->dq_id, type);
+ dquot_mark_dquot_dirty(dquot);
+
+ /* In case user set some limits, sync dquot immediately to global
+ * quota file so that information propagates quicker */
+ spin_lock(&dq_data_lock);
+ if (dquot->dq_flags & mask)
+ sync = 1;
+ spin_unlock(&dq_data_lock);
+ if (!sync) {
+ status = ocfs2_write_dquot(dquot);
+ goto out;
+ }
+ status = ocfs2_lock_global_qf(oinfo, 1);
+ if (status < 0)
+ goto out;
+ handle = ocfs2_start_trans(osb, OCFS2_QSYNC_CREDITS);
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ goto out_ilock;
+ }
+ status = ocfs2_sync_dquot(dquot);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_trans;
+ }
+ /* Now write updated local dquot structure */
+ status = dquot_commit(dquot);
+out_trans:
+ ocfs2_commit_trans(osb, handle);
+out_ilock:
+ ocfs2_unlock_global_qf(oinfo, 1);
+out:
+ mlog_exit(status);
+ return status;
+}
+
+/* This should happen only after set_dqinfo(). */
+static int ocfs2_write_info(struct super_block *sb, int type)
+{
+ handle_t *handle;
+ int status = 0;
+ struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
+
+ mlog_entry_void();
+
+ status = ocfs2_lock_global_qf(oinfo, 1);
+ if (status < 0)
+ goto out;
+ handle = ocfs2_start_trans(OCFS2_SB(sb), OCFS2_QINFO_WRITE_CREDITS);
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ goto out_ilock;
+ }
+ status = dquot_commit_info(sb, type);
+ ocfs2_commit_trans(OCFS2_SB(sb), handle);
+out_ilock:
+ ocfs2_unlock_global_qf(oinfo, 1);
+out:
+ mlog_exit(status);
+ return status;
+}
+
+/* This is difficult. We have to lock quota inode and start transaction
+ * in this function but we don't want to take the penalty of exlusive
+ * quota file lock when we are just going to use cached structures. So
+ * we just take read lock check whether we have dquot cached and if so,
+ * we don't have to take the write lock... */
+static int ocfs2_dquot_initialize(struct inode *inode, int type)
+{
+ handle_t *handle = NULL;
+ int status = 0;
+ struct super_block *sb = inode->i_sb;
+ struct ocfs2_mem_dqinfo *oinfo;
+ int exclusive = 0;
+ int cnt;
+ qid_t id;
+
+ mlog_entry_void();
+
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+ if (type != -1 && cnt != type)
+ continue;
+ if (!sb_has_quota_active(sb, cnt))
+ continue;
+ oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
+ status = ocfs2_lock_global_qf(oinfo, 0);
+ if (status < 0)
+ goto out;
+ /* This is just a performance optimization not a reliable test.
+ * Since we hold an inode lock, noone can actually release
+ * the structure until we are finished with initialization. */
+ if (inode->i_dquot[cnt] != NODQUOT) {
+ ocfs2_unlock_global_qf(oinfo, 0);
+ continue;
+ }
+ /* When we have inode lock, we know that no dquot_release() can
+ * run and thus we can safely check whether we need to
+ * read+modify global file to get quota information or whether
+ * our node already has it. */
+ if (cnt == USRQUOTA)
+ id = inode->i_uid;
+ else if (cnt == GRPQUOTA)
+ id = inode->i_gid;
+ else
+ BUG();
+ /* Obtain exclusion from quota off... */
+ down_write(&sb_dqopt(sb)->dqptr_sem);
+ exclusive = !dquot_is_cached(sb, id, cnt);
+ up_write(&sb_dqopt(sb)->dqptr_sem);
+ if (exclusive) {
+ status = ocfs2_lock_global_qf(oinfo, 1);
+ if (status < 0) {
+ exclusive = 0;
+ mlog_errno(status);
+ goto out_ilock;
+ }
+ handle = ocfs2_start_trans(OCFS2_SB(sb),
+ ocfs2_calc_qinit_credits(sb, cnt));
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ goto out_ilock;
+ }
+ }
+ dquot_initialize(inode, cnt);
+ if (exclusive) {
+ ocfs2_commit_trans(OCFS2_SB(sb), handle);
+ ocfs2_unlock_global_qf(oinfo, 1);
+ }
+ ocfs2_unlock_global_qf(oinfo, 0);
+ }
+ mlog_exit(0);
+ return 0;
+out_ilock:
+ if (exclusive)
+ ocfs2_unlock_global_qf(oinfo, 1);
+ ocfs2_unlock_global_qf(oinfo, 0);
+out:
+ mlog_exit(status);
+ return status;
+}
+
+static int ocfs2_dquot_drop_slow(struct inode *inode)
+{
+ int status = 0;
+ int cnt;
+ int got_lock[MAXQUOTAS] = {0, 0};
+ handle_t *handle;
+ struct super_block *sb = inode->i_sb;
+ struct ocfs2_mem_dqinfo *oinfo;
+
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+ if (!sb_has_quota_active(sb, cnt))
+ continue;
+ oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
+ status = ocfs2_lock_global_qf(oinfo, 1);
+ if (status < 0)
+ goto out;
+ got_lock[cnt] = 1;
+ }
+ handle = ocfs2_start_trans(OCFS2_SB(sb),
+ ocfs2_calc_qinit_credits(sb, USRQUOTA) +
+ ocfs2_calc_qinit_credits(sb, GRPQUOTA));
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ goto out;
+ }
+ dquot_drop(inode);
+ ocfs2_commit_trans(OCFS2_SB(sb), handle);
+out:
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+ if (got_lock[cnt]) {
+ oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
+ ocfs2_unlock_global_qf(oinfo, 1);
+ }
+ return status;
+}
+
+/* See the comment before ocfs2_dquot_initialize. */
+static int ocfs2_dquot_drop(struct inode *inode)
+{
+ int status = 0;
+ struct super_block *sb = inode->i_sb;
+ struct ocfs2_mem_dqinfo *oinfo;
+ int exclusive = 0;
+ int cnt;
+ int got_lock[MAXQUOTAS] = {0, 0};
+
+ mlog_entry_void();
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+ if (!sb_has_quota_active(sb, cnt))
+ continue;
+ oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
+ status = ocfs2_lock_global_qf(oinfo, 0);
+ if (status < 0)
+ goto out;
+ got_lock[cnt] = 1;
+ }
+ /* Lock against anyone releasing references so that when when we check
+ * we know we are not going to be last ones to release dquot */
+ down_write(&sb_dqopt(sb)->dqptr_sem);
+ /* Urgh, this is a terrible hack :( */
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+ if (inode->i_dquot[cnt] != NODQUOT &&
+ atomic_read(&inode->i_dquot[cnt]->dq_count) > 1) {
+ exclusive = 1;
+ break;
+ }
+ }
+ if (!exclusive)
+ dquot_drop_locked(inode);
+ up_write(&sb_dqopt(sb)->dqptr_sem);
+out:
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+ if (got_lock[cnt]) {
+ oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
+ ocfs2_unlock_global_qf(oinfo, 0);
+ }
+ /* In case we bailed out because we had to do expensive locking
+ * do it now... */
+ if (exclusive)
+ status = ocfs2_dquot_drop_slow(inode);
+ mlog_exit(status);
+ return status;
+}
+
+static struct dquot *ocfs2_alloc_dquot(struct super_block *sb, int type)
+{
+ struct ocfs2_dquot *dquot =
+ kmem_cache_zalloc(ocfs2_dquot_cachep, GFP_NOFS);
+
+ if (!dquot)
+ return NULL;
+ return &dquot->dq_dquot;
+}
+
+static void ocfs2_destroy_dquot(struct dquot *dquot)
+{
+ kmem_cache_free(ocfs2_dquot_cachep, dquot);
+}
+
+struct dquot_operations ocfs2_quota_operations = {
+ .initialize = ocfs2_dquot_initialize,
+ .drop = ocfs2_dquot_drop,
+ .alloc_space = dquot_alloc_space,
+ .alloc_inode = dquot_alloc_inode,
+ .free_space = dquot_free_space,
+ .free_inode = dquot_free_inode,
+ .transfer = dquot_transfer,
+ .write_dquot = ocfs2_write_dquot,
+ .acquire_dquot = ocfs2_acquire_dquot,
+ .release_dquot = ocfs2_release_dquot,
+ .mark_dirty = ocfs2_mark_dquot_dirty,
+ .write_info = ocfs2_write_info,
+ .alloc_dquot = ocfs2_alloc_dquot,
+ .destroy_dquot = ocfs2_destroy_dquot,
+};
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
new file mode 100644
index 000000000000..5353c423829f
--- /dev/null
+++ b/fs/ocfs2/quota_local.c
@@ -0,0 +1,1253 @@
+/*
+ * Implementation of operations over local quota file
+ */
+
+#include <linux/fs.h>
+#include <linux/quota.h>
+#include <linux/quotaops.h>
+#include <linux/module.h>
+
+#define MLOG_MASK_PREFIX ML_QUOTA
+#include <cluster/masklog.h>
+
+#include "ocfs2_fs.h"
+#include "ocfs2.h"
+#include "inode.h"
+#include "alloc.h"
+#include "file.h"
+#include "buffer_head_io.h"
+#include "journal.h"
+#include "sysfile.h"
+#include "dlmglue.h"
+#include "quota.h"
+
+/* Number of local quota structures per block */
+static inline unsigned int ol_quota_entries_per_block(struct super_block *sb)
+{
+ return ((sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE) /
+ sizeof(struct ocfs2_local_disk_dqblk));
+}
+
+/* Number of blocks with entries in one chunk */
+static inline unsigned int ol_chunk_blocks(struct super_block *sb)
+{
+ return ((sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) -
+ OCFS2_QBLK_RESERVED_SPACE) << 3) /
+ ol_quota_entries_per_block(sb);
+}
+
+/* Number of entries in a chunk bitmap */
+static unsigned int ol_chunk_entries(struct super_block *sb)
+{
+ return ol_chunk_blocks(sb) * ol_quota_entries_per_block(sb);
+}
+
+/* Offset of the chunk in quota file */
+static unsigned int ol_quota_chunk_block(struct super_block *sb, int c)
+{
+ /* 1 block for local quota file info, 1 block per chunk for chunk info */
+ return 1 + (ol_chunk_blocks(sb) + 1) * c;
+}
+
+static unsigned int ol_dqblk_block(struct super_block *sb, int c, int off)
+{
+ int epb = ol_quota_entries_per_block(sb);
+
+ return ol_quota_chunk_block(sb, c) + 1 + off / epb;
+}
+
+static unsigned int ol_dqblk_block_off(struct super_block *sb, int c, int off)
+{
+ int epb = ol_quota_entries_per_block(sb);
+
+ return (off % epb) * sizeof(struct ocfs2_local_disk_dqblk);
+}
+
+/* Offset of the dquot structure in the quota file */
+static loff_t ol_dqblk_off(struct super_block *sb, int c, int off)
+{
+ return (ol_dqblk_block(sb, c, off) << sb->s_blocksize_bits) +
+ ol_dqblk_block_off(sb, c, off);
+}
+
+/* Compute block number from given offset */
+static inline unsigned int ol_dqblk_file_block(struct super_block *sb, loff_t off)
+{
+ return off >> sb->s_blocksize_bits;
+}
+
+static inline unsigned int ol_dqblk_block_offset(struct super_block *sb, loff_t off)
+{
+ return off & ((1 << sb->s_blocksize_bits) - 1);
+}
+
+/* Compute offset in the chunk of a structure with the given offset */
+static int ol_dqblk_chunk_off(struct super_block *sb, int c, loff_t off)
+{
+ int epb = ol_quota_entries_per_block(sb);
+
+ return ((off >> sb->s_blocksize_bits) -
+ ol_quota_chunk_block(sb, c) - 1) * epb
+ + ((unsigned int)(off & ((1 << sb->s_blocksize_bits) - 1))) /
+ sizeof(struct ocfs2_local_disk_dqblk);
+}
+
+/* Write bufferhead into the fs */
+static int ocfs2_modify_bh(struct inode *inode, struct buffer_head *bh,
+ void (*modify)(struct buffer_head *, void *), void *private)
+{
+ struct super_block *sb = inode->i_sb;
+ handle_t *handle;
+ int status;
+
+ handle = ocfs2_start_trans(OCFS2_SB(sb), 1);
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ return status;
+ }
+ status = ocfs2_journal_access(handle, inode, bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ mlog_errno(status);
+ ocfs2_commit_trans(OCFS2_SB(sb), handle);
+ return status;
+ }
+ lock_buffer(bh);
+ modify(bh, private);
+ unlock_buffer(bh);
+ status = ocfs2_journal_dirty(handle, bh);
+ if (status < 0) {
+ mlog_errno(status);
+ ocfs2_commit_trans(OCFS2_SB(sb), handle);
+ return status;
+ }
+ status = ocfs2_commit_trans(OCFS2_SB(sb), handle);
+ if (status < 0) {
+ mlog_errno(status);
+ return status;
+ }
+ return 0;
+}
+
+/* Check whether we understand format of quota files */
+static int ocfs2_local_check_quota_file(struct super_block *sb, int type)
+{
+ unsigned int lmagics[MAXQUOTAS] = OCFS2_LOCAL_QMAGICS;
+ unsigned int lversions[MAXQUOTAS] = OCFS2_LOCAL_QVERSIONS;
+ unsigned int gmagics[MAXQUOTAS] = OCFS2_GLOBAL_QMAGICS;
+ unsigned int gversions[MAXQUOTAS] = OCFS2_GLOBAL_QVERSIONS;
+ unsigned int ino[MAXQUOTAS] = { USER_QUOTA_SYSTEM_INODE,
+ GROUP_QUOTA_SYSTEM_INODE };
+ struct buffer_head *bh = NULL;
+ struct inode *linode = sb_dqopt(sb)->files[type];
+ struct inode *ginode = NULL;
+ struct ocfs2_disk_dqheader *dqhead;
+ int status, ret = 0;
+
+ /* First check whether we understand local quota file */
+ status = ocfs2_read_quota_block(linode, 0, &bh);
+ if (status) {
+ mlog_errno(status);
+ mlog(ML_ERROR, "failed to read quota file header (type=%d)\n",
+ type);
+ goto out_err;
+ }
+ dqhead = (struct ocfs2_disk_dqheader *)(bh->b_data);
+ if (le32_to_cpu(dqhead->dqh_magic) != lmagics[type]) {
+ mlog(ML_ERROR, "quota file magic does not match (%u != %u),"
+ " type=%d\n", le32_to_cpu(dqhead->dqh_magic),
+ lmagics[type], type);
+ goto out_err;
+ }
+ if (le32_to_cpu(dqhead->dqh_version) != lversions[type]) {
+ mlog(ML_ERROR, "quota file version does not match (%u != %u),"
+ " type=%d\n", le32_to_cpu(dqhead->dqh_version),
+ lversions[type], type);
+ goto out_err;
+ }
+ brelse(bh);
+ bh = NULL;
+
+ /* Next check whether we understand global quota file */
+ ginode = ocfs2_get_system_file_inode(OCFS2_SB(sb), ino[type],
+ OCFS2_INVALID_SLOT);
+ if (!ginode) {
+ mlog(ML_ERROR, "cannot get global quota file inode "
+ "(type=%d)\n", type);
+ goto out_err;
+ }
+ /* Since the header is read only, we don't care about locking */
+ status = ocfs2_read_quota_block(ginode, 0, &bh);
+ if (status) {
+ mlog_errno(status);
+ mlog(ML_ERROR, "failed to read global quota file header "
+ "(type=%d)\n", type);
+ goto out_err;
+ }
+ dqhead = (struct ocfs2_disk_dqheader *)(bh->b_data);
+ if (le32_to_cpu(dqhead->dqh_magic) != gmagics[type]) {
+ mlog(ML_ERROR, "global quota file magic does not match "
+ "(%u != %u), type=%d\n",
+ le32_to_cpu(dqhead->dqh_magic), gmagics[type], type);
+ goto out_err;
+ }
+ if (le32_to_cpu(dqhead->dqh_version) != gversions[type]) {
+ mlog(ML_ERROR, "global quota file version does not match "
+ "(%u != %u), type=%d\n",
+ le32_to_cpu(dqhead->dqh_version), gversions[type],
+ type);
+ goto out_err;
+ }
+
+ ret = 1;
+out_err:
+ brelse(bh);
+ iput(ginode);
+ return ret;
+}
+
+/* Release given list of quota file chunks */
+static void ocfs2_release_local_quota_bitmaps(struct list_head *head)
+{
+ struct ocfs2_quota_chunk *pos, *next;
+
+ list_for_each_entry_safe(pos, next, head, qc_chunk) {
+ list_del(&pos->qc_chunk);
+ brelse(pos->qc_headerbh);
+ kmem_cache_free(ocfs2_qf_chunk_cachep, pos);
+ }
+}
+
+/* Load quota bitmaps into memory */
+static int ocfs2_load_local_quota_bitmaps(struct inode *inode,
+ struct ocfs2_local_disk_dqinfo *ldinfo,
+ struct list_head *head)
+{
+ struct ocfs2_quota_chunk *newchunk;
+ int i, status;
+
+ INIT_LIST_HEAD(head);
+ for (i = 0; i < le32_to_cpu(ldinfo->dqi_chunks); i++) {
+ newchunk = kmem_cache_alloc(ocfs2_qf_chunk_cachep, GFP_NOFS);
+ if (!newchunk) {
+ ocfs2_release_local_quota_bitmaps(head);
+ return -ENOMEM;
+ }
+ newchunk->qc_num = i;
+ newchunk->qc_headerbh = NULL;
+ status = ocfs2_read_quota_block(inode,
+ ol_quota_chunk_block(inode->i_sb, i),
+ &newchunk->qc_headerbh);
+ if (status) {
+ mlog_errno(status);
+ kmem_cache_free(ocfs2_qf_chunk_cachep, newchunk);
+ ocfs2_release_local_quota_bitmaps(head);
+ return status;
+ }
+ list_add_tail(&newchunk->qc_chunk, head);
+ }
+ return 0;
+}
+
+static void olq_update_info(struct buffer_head *bh, void *private)
+{
+ struct mem_dqinfo *info = private;
+ struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
+ struct ocfs2_local_disk_dqinfo *ldinfo;
+
+ ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data +
+ OCFS2_LOCAL_INFO_OFF);
+ spin_lock(&dq_data_lock);
+ ldinfo->dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK);
+ ldinfo->dqi_chunks = cpu_to_le32(oinfo->dqi_chunks);
+ ldinfo->dqi_blocks = cpu_to_le32(oinfo->dqi_blocks);
+ spin_unlock(&dq_data_lock);
+}
+
+static int ocfs2_add_recovery_chunk(struct super_block *sb,
+ struct ocfs2_local_disk_chunk *dchunk,
+ int chunk,
+ struct list_head *head)
+{
+ struct ocfs2_recovery_chunk *rc;
+
+ rc = kmalloc(sizeof(struct ocfs2_recovery_chunk), GFP_NOFS);
+ if (!rc)
+ return -ENOMEM;
+ rc->rc_chunk = chunk;
+ rc->rc_bitmap = kmalloc(sb->s_blocksize, GFP_NOFS);
+ if (!rc->rc_bitmap) {
+ kfree(rc);
+ return -ENOMEM;
+ }
+ memcpy(rc->rc_bitmap, dchunk->dqc_bitmap,
+ (ol_chunk_entries(sb) + 7) >> 3);
+ list_add_tail(&rc->rc_list, head);
+ return 0;
+}
+
+static void free_recovery_list(struct list_head *head)
+{
+ struct ocfs2_recovery_chunk *next;
+ struct ocfs2_recovery_chunk *rchunk;
+
+ list_for_each_entry_safe(rchunk, next, head, rc_list) {
+ list_del(&rchunk->rc_list);
+ kfree(rchunk->rc_bitmap);
+ kfree(rchunk);
+ }
+}
+
+void ocfs2_free_quota_recovery(struct ocfs2_quota_recovery *rec)
+{
+ int type;
+
+ for (type = 0; type < MAXQUOTAS; type++)
+ free_recovery_list(&(rec->r_list[type]));
+ kfree(rec);
+}
+
+/* Load entries in our quota file we have to recover*/
+static int ocfs2_recovery_load_quota(struct inode *lqinode,
+ struct ocfs2_local_disk_dqinfo *ldinfo,
+ int type,
+ struct list_head *head)
+{
+ struct super_block *sb = lqinode->i_sb;
+ struct buffer_head *hbh;
+ struct ocfs2_local_disk_chunk *dchunk;
+ int i, chunks = le32_to_cpu(ldinfo->dqi_chunks);
+ int status = 0;
+
+ for (i = 0; i < chunks; i++) {
+ hbh = NULL;
+ status = ocfs2_read_quota_block(lqinode,
+ ol_quota_chunk_block(sb, i),
+ &hbh);
+ if (status) {
+ mlog_errno(status);
+ break;
+ }
+ dchunk = (struct ocfs2_local_disk_chunk *)hbh->b_data;
+ if (le32_to_cpu(dchunk->dqc_free) < ol_chunk_entries(sb))
+ status = ocfs2_add_recovery_chunk(sb, dchunk, i, head);
+ brelse(hbh);
+ if (status < 0)
+ break;
+ }
+ if (status < 0)
+ free_recovery_list(head);
+ return status;
+}
+
+static struct ocfs2_quota_recovery *ocfs2_alloc_quota_recovery(void)
+{
+ int type;
+ struct ocfs2_quota_recovery *rec;
+
+ rec = kmalloc(sizeof(struct ocfs2_quota_recovery), GFP_NOFS);
+ if (!rec)
+ return NULL;
+ for (type = 0; type < MAXQUOTAS; type++)
+ INIT_LIST_HEAD(&(rec->r_list[type]));
+ return rec;
+}
+
+/* Load information we need for quota recovery into memory */
+struct ocfs2_quota_recovery *ocfs2_begin_quota_recovery(
+ struct ocfs2_super *osb,
+ int slot_num)
+{
+ unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
+ OCFS2_FEATURE_RO_COMPAT_GRPQUOTA};
+ unsigned int ino[MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE,
+ LOCAL_GROUP_QUOTA_SYSTEM_INODE };
+ struct super_block *sb = osb->sb;
+ struct ocfs2_local_disk_dqinfo *ldinfo;
+ struct inode *lqinode;
+ struct buffer_head *bh;
+ int type;
+ int status = 0;
+ struct ocfs2_quota_recovery *rec;
+
+ mlog(ML_NOTICE, "Beginning quota recovery in slot %u\n", slot_num);
+ rec = ocfs2_alloc_quota_recovery();
+ if (!rec)
+ return ERR_PTR(-ENOMEM);
+ /* First init... */
+
+ for (type = 0; type < MAXQUOTAS; type++) {
+ if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type]))
+ continue;
+ /* At this point, journal of the slot is already replayed so
+ * we can trust metadata and data of the quota file */
+ lqinode = ocfs2_get_system_file_inode(osb, ino[type], slot_num);
+ if (!lqinode) {
+ status = -ENOENT;
+ goto out;
+ }
+ status = ocfs2_inode_lock_full(lqinode, NULL, 1,
+ OCFS2_META_LOCK_RECOVERY);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_put;
+ }
+ /* Now read local header */
+ bh = NULL;
+ status = ocfs2_read_quota_block(lqinode, 0, &bh);
+ if (status) {
+ mlog_errno(status);
+ mlog(ML_ERROR, "failed to read quota file info header "
+ "(slot=%d type=%d)\n", slot_num, type);
+ goto out_lock;
+ }
+ ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data +
+ OCFS2_LOCAL_INFO_OFF);
+ status = ocfs2_recovery_load_quota(lqinode, ldinfo, type,
+ &rec->r_list[type]);
+ brelse(bh);
+out_lock:
+ ocfs2_inode_unlock(lqinode, 1);
+out_put:
+ iput(lqinode);
+ if (status < 0)
+ break;
+ }
+out:
+ if (status < 0) {
+ ocfs2_free_quota_recovery(rec);
+ rec = ERR_PTR(status);
+ }
+ return rec;
+}
+
+/* Sync changes in local quota file into global quota file and
+ * reinitialize local quota file.
+ * The function expects local quota file to be already locked and
+ * dqonoff_mutex locked. */
+static int ocfs2_recover_local_quota_file(struct inode *lqinode,
+ int type,
+ struct ocfs2_quota_recovery *rec)
+{
+ struct super_block *sb = lqinode->i_sb;
+ struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
+ struct ocfs2_local_disk_chunk *dchunk;
+ struct ocfs2_local_disk_dqblk *dqblk;
+ struct dquot *dquot;
+ handle_t *handle;
+ struct buffer_head *hbh = NULL, *qbh = NULL;
+ int status = 0;
+ int bit, chunk;
+ struct ocfs2_recovery_chunk *rchunk, *next;
+ qsize_t spacechange, inodechange;
+
+ mlog_entry("ino=%lu type=%u", (unsigned long)lqinode->i_ino, type);
+
+ status = ocfs2_lock_global_qf(oinfo, 1);
+ if (status < 0)
+ goto out;
+
+ list_for_each_entry_safe(rchunk, next, &(rec->r_list[type]), rc_list) {
+ chunk = rchunk->rc_chunk;
+ hbh = NULL;
+ status = ocfs2_read_quota_block(lqinode,
+ ol_quota_chunk_block(sb, chunk),
+ &hbh);
+ if (status) {
+ mlog_errno(status);
+ break;
+ }
+ dchunk = (struct ocfs2_local_disk_chunk *)hbh->b_data;
+ for_each_bit(bit, rchunk->rc_bitmap, ol_chunk_entries(sb)) {
+ qbh = NULL;
+ status = ocfs2_read_quota_block(lqinode,
+ ol_dqblk_block(sb, chunk, bit),
+ &qbh);
+ if (status) {
+ mlog_errno(status);
+ break;
+ }
+ dqblk = (struct ocfs2_local_disk_dqblk *)(qbh->b_data +
+ ol_dqblk_block_off(sb, chunk, bit));
+ dquot = dqget(sb, le64_to_cpu(dqblk->dqb_id), type);
+ if (!dquot) {
+ status = -EIO;
+ mlog(ML_ERROR, "Failed to get quota structure "
+ "for id %u, type %d. Cannot finish quota "
+ "file recovery.\n",
+ (unsigned)le64_to_cpu(dqblk->dqb_id),
+ type);
+ goto out_put_bh;
+ }
+ handle = ocfs2_start_trans(OCFS2_SB(sb),
+ OCFS2_QSYNC_CREDITS);
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ goto out_put_dquot;
+ }
+ mutex_lock(&sb_dqopt(sb)->dqio_mutex);
+ spin_lock(&dq_data_lock);
+ /* Add usage from quota entry into quota changes
+ * of our node. Auxiliary variables are important
+ * due to signedness */
+ spacechange = le64_to_cpu(dqblk->dqb_spacemod);
+ inodechange = le64_to_cpu(dqblk->dqb_inodemod);
+ dquot->dq_dqb.dqb_curspace += spacechange;
+ dquot->dq_dqb.dqb_curinodes += inodechange;
+ spin_unlock(&dq_data_lock);
+ /* We want to drop reference held by the crashed
+ * node. Since we have our own reference we know
+ * global structure actually won't be freed. */
+ status = ocfs2_global_release_dquot(dquot);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_commit;
+ }
+ /* Release local quota file entry */
+ status = ocfs2_journal_access(handle, lqinode,
+ qbh, OCFS2_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_commit;
+ }
+ lock_buffer(qbh);
+ WARN_ON(!ocfs2_test_bit(bit, dchunk->dqc_bitmap));
+ ocfs2_clear_bit(bit, dchunk->dqc_bitmap);
+ le32_add_cpu(&dchunk->dqc_free, 1);
+ unlock_buffer(qbh);
+ status = ocfs2_journal_dirty(handle, qbh);
+ if (status < 0)
+ mlog_errno(status);
+out_commit:
+ mutex_unlock(&sb_dqopt(sb)->dqio_mutex);
+ ocfs2_commit_trans(OCFS2_SB(sb), handle);
+out_put_dquot:
+ dqput(dquot);
+out_put_bh:
+ brelse(qbh);
+ if (status < 0)
+ break;
+ }
+ brelse(hbh);
+ list_del(&rchunk->rc_list);
+ kfree(rchunk->rc_bitmap);
+ kfree(rchunk);
+ if (status < 0)
+ break;
+ }
+ ocfs2_unlock_global_qf(oinfo, 1);
+out:
+ if (status < 0)
+ free_recovery_list(&(rec->r_list[type]));
+ mlog_exit(status);
+ return status;
+}
+
+/* Recover local quota files for given node different from us */
+int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
+ struct ocfs2_quota_recovery *rec,
+ int slot_num)
+{
+ unsigned int ino[MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE,
+ LOCAL_GROUP_QUOTA_SYSTEM_INODE };
+ struct super_block *sb = osb->sb;
+ struct ocfs2_local_disk_dqinfo *ldinfo;
+ struct buffer_head *bh;
+ handle_t *handle;
+ int type;
+ int status = 0;
+ struct inode *lqinode;
+ unsigned int flags;
+
+ mlog(ML_NOTICE, "Finishing quota recovery in slot %u\n", slot_num);
+ mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
+ for (type = 0; type < MAXQUOTAS; type++) {
+ if (list_empty(&(rec->r_list[type])))
+ continue;
+ mlog(0, "Recovering quota in slot %d\n", slot_num);
+ lqinode = ocfs2_get_system_file_inode(osb, ino[type], slot_num);
+ if (!lqinode) {
+ status = -ENOENT;
+ goto out;
+ }
+ status = ocfs2_inode_lock_full(lqinode, NULL, 1,
+ OCFS2_META_LOCK_NOQUEUE);
+ /* Someone else is holding the lock? Then he must be
+ * doing the recovery. Just skip the file... */
+ if (status == -EAGAIN) {
+ mlog(ML_NOTICE, "skipping quota recovery for slot %d "
+ "because quota file is locked.\n", slot_num);
+ status = 0;
+ goto out_put;
+ } else if (status < 0) {
+ mlog_errno(status);
+ goto out_put;
+ }
+ /* Now read local header */
+ bh = NULL;
+ status = ocfs2_read_quota_block(lqinode, 0, &bh);
+ if (status) {
+ mlog_errno(status);
+ mlog(ML_ERROR, "failed to read quota file info header "
+ "(slot=%d type=%d)\n", slot_num, type);
+ goto out_lock;
+ }
+ ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data +
+ OCFS2_LOCAL_INFO_OFF);
+ /* Is recovery still needed? */
+ flags = le32_to_cpu(ldinfo->dqi_flags);
+ if (!(flags & OLQF_CLEAN))
+ status = ocfs2_recover_local_quota_file(lqinode,
+ type,
+ rec);
+ /* We don't want to mark file as clean when it is actually
+ * active */
+ if (slot_num == osb->slot_num)
+ goto out_bh;
+ /* Mark quota file as clean if we are recovering quota file of
+ * some other node. */
+ handle = ocfs2_start_trans(osb, 1);
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ goto out_bh;
+ }
+ status = ocfs2_journal_access(handle, lqinode, bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_trans;
+ }
+ lock_buffer(bh);
+ ldinfo->dqi_flags = cpu_to_le32(flags | OLQF_CLEAN);
+ unlock_buffer(bh);
+ status = ocfs2_journal_dirty(handle, bh);
+ if (status < 0)
+ mlog_errno(status);
+out_trans:
+ ocfs2_commit_trans(osb, handle);
+out_bh:
+ brelse(bh);
+out_lock:
+ ocfs2_inode_unlock(lqinode, 1);
+out_put:
+ iput(lqinode);
+ if (status < 0)
+ break;
+ }
+out:
+ mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
+ kfree(rec);
+ return status;
+}
+
+/* Read information header from quota file */
+static int ocfs2_local_read_info(struct super_block *sb, int type)
+{
+ struct ocfs2_local_disk_dqinfo *ldinfo;
+ struct mem_dqinfo *info = sb_dqinfo(sb, type);
+ struct ocfs2_mem_dqinfo *oinfo;
+ struct inode *lqinode = sb_dqopt(sb)->files[type];
+ int status;
+ struct buffer_head *bh = NULL;
+ struct ocfs2_quota_recovery *rec;
+ int locked = 0;
+
+ info->dqi_maxblimit = 0x7fffffffffffffffLL;
+ info->dqi_maxilimit = 0x7fffffffffffffffLL;
+ oinfo = kmalloc(sizeof(struct ocfs2_mem_dqinfo), GFP_NOFS);
+ if (!oinfo) {
+ mlog(ML_ERROR, "failed to allocate memory for ocfs2 quota"
+ " info.");
+ goto out_err;
+ }
+ info->dqi_priv = oinfo;
+ oinfo->dqi_type = type;
+ INIT_LIST_HEAD(&oinfo->dqi_chunk);
+ oinfo->dqi_rec = NULL;
+ oinfo->dqi_lqi_bh = NULL;
+ oinfo->dqi_ibh = NULL;
+
+ status = ocfs2_global_read_info(sb, type);
+ if (status < 0)
+ goto out_err;
+
+ status = ocfs2_inode_lock(lqinode, &oinfo->dqi_lqi_bh, 1);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_err;
+ }
+ locked = 1;
+
+ /* Now read local header */
+ status = ocfs2_read_quota_block(lqinode, 0, &bh);
+ if (status) {
+ mlog_errno(status);
+ mlog(ML_ERROR, "failed to read quota file info header "
+ "(type=%d)\n", type);
+ goto out_err;
+ }
+ ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data +
+ OCFS2_LOCAL_INFO_OFF);
+ info->dqi_flags = le32_to_cpu(ldinfo->dqi_flags);
+ oinfo->dqi_chunks = le32_to_cpu(ldinfo->dqi_chunks);
+ oinfo->dqi_blocks = le32_to_cpu(ldinfo->dqi_blocks);
+ oinfo->dqi_ibh = bh;
+
+ /* We crashed when using local quota file? */
+ if (!(info->dqi_flags & OLQF_CLEAN)) {
+ rec = OCFS2_SB(sb)->quota_rec;
+ if (!rec) {
+ rec = ocfs2_alloc_quota_recovery();
+ if (!rec) {
+ status = -ENOMEM;
+ mlog_errno(status);
+ goto out_err;
+ }
+ OCFS2_SB(sb)->quota_rec = rec;
+ }
+
+ status = ocfs2_recovery_load_quota(lqinode, ldinfo, type,
+ &rec->r_list[type]);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_err;
+ }
+ }
+
+ status = ocfs2_load_local_quota_bitmaps(lqinode,
+ ldinfo,
+ &oinfo->dqi_chunk);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_err;
+ }
+
+ /* Now mark quota file as used */
+ info->dqi_flags &= ~OLQF_CLEAN;
+ status = ocfs2_modify_bh(lqinode, bh, olq_update_info, info);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_err;
+ }
+
+ return 0;
+out_err:
+ if (oinfo) {
+ iput(oinfo->dqi_gqinode);
+ ocfs2_simple_drop_lockres(OCFS2_SB(sb), &oinfo->dqi_gqlock);
+ ocfs2_lock_res_free(&oinfo->dqi_gqlock);
+ brelse(oinfo->dqi_lqi_bh);
+ if (locked)
+ ocfs2_inode_unlock(lqinode, 1);
+ ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk);
+ kfree(oinfo);
+ }
+ brelse(bh);
+ return -1;
+}
+
+/* Write local info to quota file */
+static int ocfs2_local_write_info(struct super_block *sb, int type)
+{
+ struct mem_dqinfo *info = sb_dqinfo(sb, type);
+ struct buffer_head *bh = ((struct ocfs2_mem_dqinfo *)info->dqi_priv)
+ ->dqi_ibh;
+ int status;
+
+ status = ocfs2_modify_bh(sb_dqopt(sb)->files[type], bh, olq_update_info,
+ info);
+ if (status < 0) {
+ mlog_errno(status);
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Release info from memory */
+static int ocfs2_local_free_info(struct super_block *sb, int type)
+{
+ struct mem_dqinfo *info = sb_dqinfo(sb, type);
+ struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
+ struct ocfs2_quota_chunk *chunk;
+ struct ocfs2_local_disk_chunk *dchunk;
+ int mark_clean = 1, len;
+ int status;
+
+ /* At this point we know there are no more dquots and thus
+ * even if there's some sync in the pdflush queue, it won't
+ * find any dquots and return without doing anything */
+ del_timer_sync(&oinfo->dqi_sync_timer);
+ iput(oinfo->dqi_gqinode);
+ ocfs2_simple_drop_lockres(OCFS2_SB(sb), &oinfo->dqi_gqlock);
+ ocfs2_lock_res_free(&oinfo->dqi_gqlock);
+ list_for_each_entry(chunk, &oinfo->dqi_chunk, qc_chunk) {
+ dchunk = (struct ocfs2_local_disk_chunk *)
+ (chunk->qc_headerbh->b_data);
+ if (chunk->qc_num < oinfo->dqi_chunks - 1) {
+ len = ol_chunk_entries(sb);
+ } else {
+ len = (oinfo->dqi_blocks -
+ ol_quota_chunk_block(sb, chunk->qc_num) - 1)
+ * ol_quota_entries_per_block(sb);
+ }
+ /* Not all entries free? Bug! */
+ if (le32_to_cpu(dchunk->dqc_free) != len) {
+ mlog(ML_ERROR, "releasing quota file with used "
+ "entries (type=%d)\n", type);
+ mark_clean = 0;
+ }
+ }
+ ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk);
+
+ /* dqonoff_mutex protects us against racing with recovery thread... */
+ if (oinfo->dqi_rec) {
+ ocfs2_free_quota_recovery(oinfo->dqi_rec);
+ mark_clean = 0;
+ }
+
+ if (!mark_clean)
+ goto out;
+
+ /* Mark local file as clean */
+ info->dqi_flags |= OLQF_CLEAN;
+ status = ocfs2_modify_bh(sb_dqopt(sb)->files[type],
+ oinfo->dqi_ibh,
+ olq_update_info,
+ info);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+
+out:
+ ocfs2_inode_unlock(sb_dqopt(sb)->files[type], 1);
+ brelse(oinfo->dqi_ibh);
+ brelse(oinfo->dqi_lqi_bh);
+ kfree(oinfo);
+ return 0;
+}
+
+static void olq_set_dquot(struct buffer_head *bh, void *private)
+{
+ struct ocfs2_dquot *od = private;
+ struct ocfs2_local_disk_dqblk *dqblk;
+ struct super_block *sb = od->dq_dquot.dq_sb;
+
+ dqblk = (struct ocfs2_local_disk_dqblk *)(bh->b_data
+ + ol_dqblk_block_offset(sb, od->dq_local_off));
+
+ dqblk->dqb_id = cpu_to_le64(od->dq_dquot.dq_id);
+ spin_lock(&dq_data_lock);
+ dqblk->dqb_spacemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curspace -
+ od->dq_origspace);
+ dqblk->dqb_inodemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curinodes -
+ od->dq_originodes);
+ spin_unlock(&dq_data_lock);
+ mlog(0, "Writing local dquot %u space %lld inodes %lld\n",
+ od->dq_dquot.dq_id, (long long)le64_to_cpu(dqblk->dqb_spacemod),
+ (long long)le64_to_cpu(dqblk->dqb_inodemod));
+}
+
+/* Write dquot to local quota file */
+static int ocfs2_local_write_dquot(struct dquot *dquot)
+{
+ struct super_block *sb = dquot->dq_sb;
+ struct ocfs2_dquot *od = OCFS2_DQUOT(dquot);
+ struct buffer_head *bh = NULL;
+ int status;
+
+ status = ocfs2_read_quota_block(sb_dqopt(sb)->files[dquot->dq_type],
+ ol_dqblk_file_block(sb, od->dq_local_off),
+ &bh);
+ if (status) {
+ mlog_errno(status);
+ goto out;
+ }
+ status = ocfs2_modify_bh(sb_dqopt(sb)->files[dquot->dq_type], bh,
+ olq_set_dquot, od);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+out:
+ brelse(bh);
+ return status;
+}
+
+/* Find free entry in local quota file */
+static struct ocfs2_quota_chunk *ocfs2_find_free_entry(struct super_block *sb,
+ int type,
+ int *offset)
+{
+ struct mem_dqinfo *info = sb_dqinfo(sb, type);
+ struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
+ struct ocfs2_quota_chunk *chunk;
+ struct ocfs2_local_disk_chunk *dchunk;
+ int found = 0, len;
+
+ list_for_each_entry(chunk, &oinfo->dqi_chunk, qc_chunk) {
+ dchunk = (struct ocfs2_local_disk_chunk *)
+ chunk->qc_headerbh->b_data;
+ if (le32_to_cpu(dchunk->dqc_free) > 0) {
+ found = 1;
+ break;
+ }
+ }
+ if (!found)
+ return NULL;
+
+ if (chunk->qc_num < oinfo->dqi_chunks - 1) {
+ len = ol_chunk_entries(sb);
+ } else {
+ len = (oinfo->dqi_blocks -
+ ol_quota_chunk_block(sb, chunk->qc_num) - 1)
+ * ol_quota_entries_per_block(sb);
+ }
+
+ found = ocfs2_find_next_zero_bit(dchunk->dqc_bitmap, len, 0);
+ /* We failed? */
+ if (found == len) {
+ mlog(ML_ERROR, "Did not find empty entry in chunk %d with %u"
+ " entries free (type=%d)\n", chunk->qc_num,
+ le32_to_cpu(dchunk->dqc_free), type);
+ return ERR_PTR(-EIO);
+ }
+ *offset = found;
+ return chunk;
+}
+
+/* Add new chunk to the local quota file */
+static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
+ struct super_block *sb,
+ int type,
+ int *offset)
+{
+ struct mem_dqinfo *info = sb_dqinfo(sb, type);
+ struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
+ struct inode *lqinode = sb_dqopt(sb)->files[type];
+ struct ocfs2_quota_chunk *chunk = NULL;
+ struct ocfs2_local_disk_chunk *dchunk;
+ int status;
+ handle_t *handle;
+ struct buffer_head *bh = NULL;
+ u64 p_blkno;
+
+ /* We are protected by dqio_sem so no locking needed */
+ status = ocfs2_extend_no_holes(lqinode,
+ lqinode->i_size + 2 * sb->s_blocksize,
+ lqinode->i_size);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+ status = ocfs2_simple_size_update(lqinode, oinfo->dqi_lqi_bh,
+ lqinode->i_size + 2 * sb->s_blocksize);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+
+ chunk = kmem_cache_alloc(ocfs2_qf_chunk_cachep, GFP_NOFS);
+ if (!chunk) {
+ status = -ENOMEM;
+ mlog_errno(status);
+ goto out;
+ }
+
+ down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
+ status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks,
+ &p_blkno, NULL, NULL);
+ up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+ bh = sb_getblk(sb, p_blkno);
+ if (!bh) {
+ status = -ENOMEM;
+ mlog_errno(status);
+ goto out;
+ }
+ dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data;
+
+ handle = ocfs2_start_trans(OCFS2_SB(sb), 2);
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ goto out;
+ }
+
+ status = ocfs2_journal_access(handle, lqinode, bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_trans;
+ }
+ lock_buffer(bh);
+ dchunk->dqc_free = cpu_to_le32(ol_quota_entries_per_block(sb));
+ memset(dchunk->dqc_bitmap, 0,
+ sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) -
+ OCFS2_QBLK_RESERVED_SPACE);
+ set_buffer_uptodate(bh);
+ unlock_buffer(bh);
+ status = ocfs2_journal_dirty(handle, bh);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_trans;
+ }
+
+ oinfo->dqi_blocks += 2;
+ oinfo->dqi_chunks++;
+ status = ocfs2_local_write_info(sb, type);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_trans;
+ }
+ status = ocfs2_commit_trans(OCFS2_SB(sb), handle);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+
+ list_add_tail(&chunk->qc_chunk, &oinfo->dqi_chunk);
+ chunk->qc_num = list_entry(chunk->qc_chunk.prev,
+ struct ocfs2_quota_chunk,
+ qc_chunk)->qc_num + 1;
+ chunk->qc_headerbh = bh;
+ *offset = 0;
+ return chunk;
+out_trans:
+ ocfs2_commit_trans(OCFS2_SB(sb), handle);
+out:
+ brelse(bh);
+ kmem_cache_free(ocfs2_qf_chunk_cachep, chunk);
+ return ERR_PTR(status);
+}
+
+/* Find free entry in local quota file */
+static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
+ struct super_block *sb,
+ int type,
+ int *offset)
+{
+ struct mem_dqinfo *info = sb_dqinfo(sb, type);
+ struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
+ struct ocfs2_quota_chunk *chunk;
+ struct inode *lqinode = sb_dqopt(sb)->files[type];
+ struct ocfs2_local_disk_chunk *dchunk;
+ int epb = ol_quota_entries_per_block(sb);
+ unsigned int chunk_blocks;
+ int status;
+ handle_t *handle;
+
+ if (list_empty(&oinfo->dqi_chunk))
+ return ocfs2_local_quota_add_chunk(sb, type, offset);
+ /* Is the last chunk full? */
+ chunk = list_entry(oinfo->dqi_chunk.prev,
+ struct ocfs2_quota_chunk, qc_chunk);
+ chunk_blocks = oinfo->dqi_blocks -
+ ol_quota_chunk_block(sb, chunk->qc_num) - 1;
+ if (ol_chunk_blocks(sb) == chunk_blocks)
+ return ocfs2_local_quota_add_chunk(sb, type, offset);
+
+ /* We are protected by dqio_sem so no locking needed */
+ status = ocfs2_extend_no_holes(lqinode,
+ lqinode->i_size + sb->s_blocksize,
+ lqinode->i_size);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+ status = ocfs2_simple_size_update(lqinode, oinfo->dqi_lqi_bh,
+ lqinode->i_size + sb->s_blocksize);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+ handle = ocfs2_start_trans(OCFS2_SB(sb), 2);
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ goto out;
+ }
+ status = ocfs2_journal_access(handle, lqinode, chunk->qc_headerbh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_trans;
+ }
+
+ dchunk = (struct ocfs2_local_disk_chunk *)chunk->qc_headerbh->b_data;
+ lock_buffer(chunk->qc_headerbh);
+ le32_add_cpu(&dchunk->dqc_free, ol_quota_entries_per_block(sb));
+ unlock_buffer(chunk->qc_headerbh);
+ status = ocfs2_journal_dirty(handle, chunk->qc_headerbh);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_trans;
+ }
+ oinfo->dqi_blocks++;
+ status = ocfs2_local_write_info(sb, type);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_trans;
+ }
+
+ status = ocfs2_commit_trans(OCFS2_SB(sb), handle);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+ *offset = chunk_blocks * epb;
+ return chunk;
+out_trans:
+ ocfs2_commit_trans(OCFS2_SB(sb), handle);
+out:
+ return ERR_PTR(status);
+}
+
+static void olq_alloc_dquot(struct buffer_head *bh, void *private)
+{
+ int *offset = private;
+ struct ocfs2_local_disk_chunk *dchunk;
+
+ dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data;
+ ocfs2_set_bit(*offset, dchunk->dqc_bitmap);
+ le32_add_cpu(&dchunk->dqc_free, -1);
+}
+
+/* Create dquot in the local file for given id */
+static int ocfs2_create_local_dquot(struct dquot *dquot)
+{
+ struct super_block *sb = dquot->dq_sb;
+ int type = dquot->dq_type;
+ struct inode *lqinode = sb_dqopt(sb)->files[type];
+ struct ocfs2_quota_chunk *chunk;
+ struct ocfs2_dquot *od = OCFS2_DQUOT(dquot);
+ int offset;
+ int status;
+
+ chunk = ocfs2_find_free_entry(sb, type, &offset);
+ if (!chunk) {
+ chunk = ocfs2_extend_local_quota_file(sb, type, &offset);
+ if (IS_ERR(chunk))
+ return PTR_ERR(chunk);
+ } else if (IS_ERR(chunk)) {
+ return PTR_ERR(chunk);
+ }
+ od->dq_local_off = ol_dqblk_off(sb, chunk->qc_num, offset);
+ od->dq_chunk = chunk;
+
+ /* Initialize dquot structure on disk */
+ status = ocfs2_local_write_dquot(dquot);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+
+ /* Mark structure as allocated */
+ status = ocfs2_modify_bh(lqinode, chunk->qc_headerbh, olq_alloc_dquot,
+ &offset);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+out:
+ return status;
+}
+
+/* Create entry in local file for dquot, load data from the global file */
+static int ocfs2_local_read_dquot(struct dquot *dquot)
+{
+ int status;
+
+ mlog_entry("id=%u, type=%d\n", dquot->dq_id, dquot->dq_type);
+
+ status = ocfs2_global_read_dquot(dquot);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_err;
+ }
+
+ /* Now create entry in the local quota file */
+ status = ocfs2_create_local_dquot(dquot);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_err;
+ }
+ mlog_exit(0);
+ return 0;
+out_err:
+ mlog_exit(status);
+ return status;
+}
+
+/* Release dquot structure from local quota file. ocfs2_release_dquot() has
+ * already started a transaction and obtained exclusive lock for global
+ * quota file. */
+static int ocfs2_local_release_dquot(struct dquot *dquot)
+{
+ int status;
+ int type = dquot->dq_type;
+ struct ocfs2_dquot *od = OCFS2_DQUOT(dquot);
+ struct super_block *sb = dquot->dq_sb;
+ struct ocfs2_local_disk_chunk *dchunk;
+ int offset;
+ handle_t *handle = journal_current_handle();
+
+ BUG_ON(!handle);
+ /* First write all local changes to global file */
+ status = ocfs2_global_release_dquot(dquot);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+
+ status = ocfs2_journal_access(handle, sb_dqopt(sb)->files[type],
+ od->dq_chunk->qc_headerbh, OCFS2_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+ offset = ol_dqblk_chunk_off(sb, od->dq_chunk->qc_num,
+ od->dq_local_off);
+ dchunk = (struct ocfs2_local_disk_chunk *)
+ (od->dq_chunk->qc_headerbh->b_data);
+ /* Mark structure as freed */
+ lock_buffer(od->dq_chunk->qc_headerbh);
+ ocfs2_clear_bit(offset, dchunk->dqc_bitmap);
+ le32_add_cpu(&dchunk->dqc_free, 1);
+ unlock_buffer(od->dq_chunk->qc_headerbh);
+ status = ocfs2_journal_dirty(handle, od->dq_chunk->qc_headerbh);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+ status = 0;
+out:
+ /* Clear the read bit so that next time someone uses this
+ * dquot he reads fresh info from disk and allocates local
+ * dquot structure */
+ clear_bit(DQ_READ_B, &dquot->dq_flags);
+ return status;
+}
+
+static struct quota_format_ops ocfs2_format_ops = {
+ .check_quota_file = ocfs2_local_check_quota_file,
+ .read_file_info = ocfs2_local_read_info,
+ .write_file_info = ocfs2_global_write_info,
+ .free_file_info = ocfs2_local_free_info,
+ .read_dqblk = ocfs2_local_read_dquot,
+ .commit_dqblk = ocfs2_local_write_dquot,
+ .release_dqblk = ocfs2_local_release_dquot,
+};
+
+struct quota_format_type ocfs2_quota_format = {
+ .qf_fmt_id = QFMT_OCFS2,
+ .qf_ops = &ocfs2_format_ops,
+ .qf_owner = THIS_MODULE
+};
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c
index ffd48db229a7..867de3ebfcaf 100644
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -314,6 +314,10 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters)
fe = (struct ocfs2_dinode *)main_bm_bh->b_data;
+ /* main_bm_bh is validated by inode read inside ocfs2_inode_lock(),
+ * so any corruption is a code bug. */
+ BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
+
if (le16_to_cpu(fe->id2.i_chain.cl_cpg) !=
ocfs2_group_bitmap_size(osb->sb) * 8) {
mlog(ML_ERROR, "The disk is too old and small. "
@@ -322,30 +326,18 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters)
goto out_unlock;
}
- if (!OCFS2_IS_VALID_DINODE(fe)) {
- OCFS2_RO_ON_INVALID_DINODE(main_bm_inode->i_sb, fe);
- ret = -EIO;
- goto out_unlock;
- }
-
first_new_cluster = le32_to_cpu(fe->i_clusters);
lgd_blkno = ocfs2_which_cluster_group(main_bm_inode,
first_new_cluster - 1);
- ret = ocfs2_read_block(main_bm_inode, lgd_blkno, &group_bh);
+ ret = ocfs2_read_group_descriptor(main_bm_inode, fe, lgd_blkno,
+ &group_bh);
if (ret < 0) {
mlog_errno(ret);
goto out_unlock;
}
-
group = (struct ocfs2_group_desc *)group_bh->b_data;
- ret = ocfs2_check_group_descriptor(inode->i_sb, fe, group);
- if (ret) {
- mlog_errno(ret);
- goto out_unlock;
- }
-
cl_bpc = le16_to_cpu(fe->id2.i_chain.cl_bpc);
if (le16_to_cpu(group->bg_bits) / cl_bpc + new_clusters >
le16_to_cpu(fe->id2.i_chain.cl_cpg)) {
@@ -398,41 +390,16 @@ static int ocfs2_check_new_group(struct inode *inode,
struct buffer_head *group_bh)
{
int ret;
- struct ocfs2_group_desc *gd;
+ struct ocfs2_group_desc *gd =
+ (struct ocfs2_group_desc *)group_bh->b_data;
u16 cl_bpc = le16_to_cpu(di->id2.i_chain.cl_bpc);
- unsigned int max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) *
- le16_to_cpu(di->id2.i_chain.cl_bpc);
-
- gd = (struct ocfs2_group_desc *)group_bh->b_data;
+ ret = ocfs2_check_group_descriptor(inode->i_sb, di, group_bh);
+ if (ret)
+ goto out;
- ret = -EIO;
- if (!OCFS2_IS_VALID_GROUP_DESC(gd))
- mlog(ML_ERROR, "Group descriptor # %llu isn't valid.\n",
- (unsigned long long)le64_to_cpu(gd->bg_blkno));
- else if (di->i_blkno != gd->bg_parent_dinode)
- mlog(ML_ERROR, "Group descriptor # %llu has bad parent "
- "pointer (%llu, expected %llu)\n",
- (unsigned long long)le64_to_cpu(gd->bg_blkno),
- (unsigned long long)le64_to_cpu(gd->bg_parent_dinode),
- (unsigned long long)le64_to_cpu(di->i_blkno));
- else if (le16_to_cpu(gd->bg_bits) > max_bits)
- mlog(ML_ERROR, "Group descriptor # %llu has bit count of %u\n",
- (unsigned long long)le64_to_cpu(gd->bg_blkno),
- le16_to_cpu(gd->bg_bits));
- else if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits))
- mlog(ML_ERROR, "Group descriptor # %llu has bit count %u but "
- "claims that %u are free\n",
- (unsigned long long)le64_to_cpu(gd->bg_blkno),
- le16_to_cpu(gd->bg_bits),
- le16_to_cpu(gd->bg_free_bits_count));
- else if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size)))
- mlog(ML_ERROR, "Group descriptor # %llu has bit count %u but "
- "max bitmap bits of %u\n",
- (unsigned long long)le64_to_cpu(gd->bg_blkno),
- le16_to_cpu(gd->bg_bits),
- 8 * le16_to_cpu(gd->bg_size));
- else if (le16_to_cpu(gd->bg_chain) != input->chain)
+ ret = -EINVAL;
+ if (le16_to_cpu(gd->bg_chain) != input->chain)
mlog(ML_ERROR, "Group descriptor # %llu has bad chain %u "
"while input has %u set.\n",
(unsigned long long)le64_to_cpu(gd->bg_blkno),
@@ -451,6 +418,7 @@ static int ocfs2_check_new_group(struct inode *inode,
else
ret = 0;
+out:
return ret;
}
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index bdda2d8f8508..40661e7824e9 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -151,7 +151,7 @@ int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
* this is not true, the read of -1 (UINT64_MAX) will fail.
*/
ret = ocfs2_read_blocks(si->si_inode, -1, si->si_blocks, si->si_bh,
- OCFS2_BH_IGNORE_CACHE);
+ OCFS2_BH_IGNORE_CACHE, NULL);
if (ret == 0) {
spin_lock(&osb->osb_lock);
ocfs2_update_slot_info(si);
@@ -405,7 +405,7 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
bh = NULL; /* Acquire a fresh bh */
status = ocfs2_read_blocks(si->si_inode, blkno, 1, &bh,
- OCFS2_BH_IGNORE_CACHE);
+ OCFS2_BH_IGNORE_CACHE, NULL);
if (status < 0) {
mlog_errno(status);
goto bail;
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index faec2d879357..9b76d41a8ac6 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -740,6 +740,9 @@ static int user_dlm_lock_status(union ocfs2_dlm_lksb *lksb)
static void *user_dlm_lvb(union ocfs2_dlm_lksb *lksb)
{
+ if (!lksb->lksb_fsdlm.sb_lvbptr)
+ lksb->lksb_fsdlm.sb_lvbptr = (char *)lksb +
+ sizeof(struct dlm_lksb);
return (void *)(lksb->lksb_fsdlm.sb_lvbptr);
}
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index c5ff18b46b57..226fe21f2608 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -145,62 +145,151 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl)
return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc);
}
-/* somewhat more expensive than our other checks, so use sparingly. */
-int ocfs2_check_group_descriptor(struct super_block *sb,
- struct ocfs2_dinode *di,
- struct ocfs2_group_desc *gd)
+#define do_error(fmt, ...) \
+ do{ \
+ if (clean_error) \
+ mlog(ML_ERROR, fmt "\n", ##__VA_ARGS__); \
+ else \
+ ocfs2_error(sb, fmt, ##__VA_ARGS__); \
+ } while (0)
+
+static int ocfs2_validate_gd_self(struct super_block *sb,
+ struct buffer_head *bh,
+ int clean_error)
{
- unsigned int max_bits;
+ struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
- OCFS2_RO_ON_INVALID_GROUP_DESC(sb, gd);
- return -EIO;
+ do_error("Group descriptor #%llu has bad signature %.*s",
+ (unsigned long long)bh->b_blocknr, 7,
+ gd->bg_signature);
+ return -EINVAL;
+ }
+
+ if (le64_to_cpu(gd->bg_blkno) != bh->b_blocknr) {
+ do_error("Group descriptor #%llu has an invalid bg_blkno "
+ "of %llu",
+ (unsigned long long)bh->b_blocknr,
+ (unsigned long long)le64_to_cpu(gd->bg_blkno));
+ return -EINVAL;
}
+ if (le32_to_cpu(gd->bg_generation) != OCFS2_SB(sb)->fs_generation) {
+ do_error("Group descriptor #%llu has an invalid "
+ "fs_generation of #%u",
+ (unsigned long long)bh->b_blocknr,
+ le32_to_cpu(gd->bg_generation));
+ return -EINVAL;
+ }
+
+ if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) {
+ do_error("Group descriptor #%llu has bit count %u but "
+ "claims that %u are free",
+ (unsigned long long)bh->b_blocknr,
+ le16_to_cpu(gd->bg_bits),
+ le16_to_cpu(gd->bg_free_bits_count));
+ return -EINVAL;
+ }
+
+ if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) {
+ do_error("Group descriptor #%llu has bit count %u but "
+ "max bitmap bits of %u",
+ (unsigned long long)bh->b_blocknr,
+ le16_to_cpu(gd->bg_bits),
+ 8 * le16_to_cpu(gd->bg_size));
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int ocfs2_validate_gd_parent(struct super_block *sb,
+ struct ocfs2_dinode *di,
+ struct buffer_head *bh,
+ int clean_error)
+{
+ unsigned int max_bits;
+ struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
+
if (di->i_blkno != gd->bg_parent_dinode) {
- ocfs2_error(sb, "Group descriptor # %llu has bad parent "
- "pointer (%llu, expected %llu)",
- (unsigned long long)le64_to_cpu(gd->bg_blkno),
- (unsigned long long)le64_to_cpu(gd->bg_parent_dinode),
- (unsigned long long)le64_to_cpu(di->i_blkno));
- return -EIO;
+ do_error("Group descriptor #%llu has bad parent "
+ "pointer (%llu, expected %llu)",
+ (unsigned long long)bh->b_blocknr,
+ (unsigned long long)le64_to_cpu(gd->bg_parent_dinode),
+ (unsigned long long)le64_to_cpu(di->i_blkno));
+ return -EINVAL;
}
max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc);
if (le16_to_cpu(gd->bg_bits) > max_bits) {
- ocfs2_error(sb, "Group descriptor # %llu has bit count of %u",
- (unsigned long long)le64_to_cpu(gd->bg_blkno),
- le16_to_cpu(gd->bg_bits));
- return -EIO;
+ do_error("Group descriptor #%llu has bit count of %u",
+ (unsigned long long)bh->b_blocknr,
+ le16_to_cpu(gd->bg_bits));
+ return -EINVAL;
}
if (le16_to_cpu(gd->bg_chain) >=
le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) {
- ocfs2_error(sb, "Group descriptor # %llu has bad chain %u",
- (unsigned long long)le64_to_cpu(gd->bg_blkno),
- le16_to_cpu(gd->bg_chain));
- return -EIO;
+ do_error("Group descriptor #%llu has bad chain %u",
+ (unsigned long long)bh->b_blocknr,
+ le16_to_cpu(gd->bg_chain));
+ return -EINVAL;
}
- if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) {
- ocfs2_error(sb, "Group descriptor # %llu has bit count %u but "
- "claims that %u are free",
- (unsigned long long)le64_to_cpu(gd->bg_blkno),
- le16_to_cpu(gd->bg_bits),
- le16_to_cpu(gd->bg_free_bits_count));
- return -EIO;
- }
+ return 0;
+}
- if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) {
- ocfs2_error(sb, "Group descriptor # %llu has bit count %u but "
- "max bitmap bits of %u",
- (unsigned long long)le64_to_cpu(gd->bg_blkno),
- le16_to_cpu(gd->bg_bits),
- 8 * le16_to_cpu(gd->bg_size));
- return -EIO;
+#undef do_error
+
+/*
+ * This version only prints errors. It does not fail the filesystem, and
+ * exists only for resize.
+ */
+int ocfs2_check_group_descriptor(struct super_block *sb,
+ struct ocfs2_dinode *di,
+ struct buffer_head *bh)
+{
+ int rc;
+
+ rc = ocfs2_validate_gd_self(sb, bh, 1);
+ if (!rc)
+ rc = ocfs2_validate_gd_parent(sb, di, bh, 1);
+
+ return rc;
+}
+
+static int ocfs2_validate_group_descriptor(struct super_block *sb,
+ struct buffer_head *bh)
+{
+ mlog(0, "Validating group descriptor %llu\n",
+ (unsigned long long)bh->b_blocknr);
+
+ return ocfs2_validate_gd_self(sb, bh, 0);
+}
+
+int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di,
+ u64 gd_blkno, struct buffer_head **bh)
+{
+ int rc;
+ struct buffer_head *tmp = *bh;
+
+ rc = ocfs2_read_block(inode, gd_blkno, &tmp,
+ ocfs2_validate_group_descriptor);
+ if (rc)
+ goto out;
+
+ rc = ocfs2_validate_gd_parent(inode->i_sb, di, tmp, 0);
+ if (rc) {
+ brelse(tmp);
+ goto out;
}
- return 0;
+ /* If ocfs2_read_block() got us a new bh, pass it up. */
+ if (!*bh)
+ *bh = tmp;
+
+out:
+ return rc;
}
static int ocfs2_block_group_fill(handle_t *handle,
@@ -441,11 +530,11 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
ac->ac_alloc_slot = slot;
fe = (struct ocfs2_dinode *) bh->b_data;
- if (!OCFS2_IS_VALID_DINODE(fe)) {
- OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe);
- status = -EIO;
- goto bail;
- }
+
+ /* The bh was validated by the inode read inside
+ * ocfs2_inode_lock(). Any corruption is a code bug. */
+ BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
+
if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) {
ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator %llu",
(unsigned long long)le64_to_cpu(fe->i_blkno));
@@ -790,10 +879,9 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
int offset, start, found, status = 0;
struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
- if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
- OCFS2_RO_ON_INVALID_GROUP_DESC(osb->sb, bg);
- return -EIO;
- }
+ /* Callers got this descriptor from
+ * ocfs2_read_group_descriptor(). Any corruption is a code bug. */
+ BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
found = start = best_offset = best_size = 0;
bitmap = bg->bg_bitmap;
@@ -858,11 +946,9 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle,
mlog_entry_void();
- if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
- OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
- status = -EIO;
- goto bail;
- }
+ /* All callers get the descriptor via
+ * ocfs2_read_group_descriptor(). Any corruption is a code bug. */
+ BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits);
mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off,
@@ -931,21 +1017,10 @@ static int ocfs2_relink_block_group(handle_t *handle,
struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data;
- if (!OCFS2_IS_VALID_DINODE(fe)) {
- OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe);
- status = -EIO;
- goto out;
- }
- if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
- OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
- status = -EIO;
- goto out;
- }
- if (!OCFS2_IS_VALID_GROUP_DESC(prev_bg)) {
- OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, prev_bg);
- status = -EIO;
- goto out;
- }
+ /* The caller got these descriptors from
+ * ocfs2_read_group_descriptor(). Any corruption is a code bug. */
+ BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
+ BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(prev_bg));
mlog(0, "Suballoc %llu, chain %u, move group %llu to top, prev = %llu\n",
(unsigned long long)le64_to_cpu(fe->i_blkno), chain,
@@ -1008,7 +1083,7 @@ out_rollback:
bg->bg_next_group = cpu_to_le64(bg_ptr);
prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr);
}
-out:
+
mlog_exit(status);
return status;
}
@@ -1170,21 +1245,17 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
u16 found;
struct buffer_head *group_bh = NULL;
struct ocfs2_group_desc *gd;
+ struct ocfs2_dinode *di = (struct ocfs2_dinode *)ac->ac_bh->b_data;
struct inode *alloc_inode = ac->ac_inode;
- ret = ocfs2_read_block(alloc_inode, gd_blkno, &group_bh);
+ ret = ocfs2_read_group_descriptor(alloc_inode, di, gd_blkno,
+ &group_bh);
if (ret < 0) {
mlog_errno(ret);
return ret;
}
gd = (struct ocfs2_group_desc *) group_bh->b_data;
- if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
- OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, gd);
- ret = -EIO;
- goto out;
- }
-
ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits,
ac->ac_max_block, bit_off, &found);
if (ret < 0) {
@@ -1241,19 +1312,14 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
bits_wanted, chain,
(unsigned long long)OCFS2_I(alloc_inode)->ip_blkno);
- status = ocfs2_read_block(alloc_inode,
- le64_to_cpu(cl->cl_recs[chain].c_blkno),
- &group_bh);
+ status = ocfs2_read_group_descriptor(alloc_inode, fe,
+ le64_to_cpu(cl->cl_recs[chain].c_blkno),
+ &group_bh);
if (status < 0) {
mlog_errno(status);
goto bail;
}
bg = (struct ocfs2_group_desc *) group_bh->b_data;
- status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg);
- if (status) {
- mlog_errno(status);
- goto bail;
- }
status = -ENOSPC;
/* for now, the chain search is a bit simplistic. We just use
@@ -1271,18 +1337,13 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
next_group = le64_to_cpu(bg->bg_next_group);
prev_group_bh = group_bh;
group_bh = NULL;
- status = ocfs2_read_block(alloc_inode,
- next_group, &group_bh);
+ status = ocfs2_read_group_descriptor(alloc_inode, fe,
+ next_group, &group_bh);
if (status < 0) {
mlog_errno(status);
goto bail;
}
bg = (struct ocfs2_group_desc *) group_bh->b_data;
- status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg);
- if (status) {
- mlog_errno(status);
- goto bail;
- }
}
if (status < 0) {
if (status != -ENOSPC)
@@ -1392,11 +1453,11 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
BUG_ON(!ac->ac_bh);
fe = (struct ocfs2_dinode *) ac->ac_bh->b_data;
- if (!OCFS2_IS_VALID_DINODE(fe)) {
- OCFS2_RO_ON_INVALID_DINODE(osb->sb, fe);
- status = -EIO;
- goto bail;
- }
+
+ /* The bh was validated by the inode read during
+ * ocfs2_reserve_suballoc_bits(). Any corruption is a code bug. */
+ BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
+
if (le32_to_cpu(fe->id1.bitmap1.i_used) >=
le32_to_cpu(fe->id1.bitmap1.i_total)) {
ocfs2_error(osb->sb, "Chain allocator dinode %llu has %u used "
@@ -1725,11 +1786,9 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle,
mlog_entry_void();
- if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
- OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
- status = -EIO;
- goto bail;
- }
+ /* The caller got this descriptor from
+ * ocfs2_read_group_descriptor(). Any corruption is a code bug. */
+ BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
mlog(0, "off = %u, num = %u\n", bit_off, num_bits);
@@ -1782,29 +1841,26 @@ int ocfs2_free_suballoc_bits(handle_t *handle,
mlog_entry_void();
- if (!OCFS2_IS_VALID_DINODE(fe)) {
- OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe);
- status = -EIO;
- goto bail;
- }
+ /* The alloc_bh comes from ocfs2_free_dinode() or
+ * ocfs2_free_clusters(). The callers have all locked the
+ * allocator and gotten alloc_bh from the lock call. This
+ * validates the dinode buffer. Any corruption that has happended
+ * is a code bug. */
+ BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl));
mlog(0, "%llu: freeing %u bits from group %llu, starting at %u\n",
(unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count,
(unsigned long long)bg_blkno, start_bit);
- status = ocfs2_read_block(alloc_inode, bg_blkno, &group_bh);
+ status = ocfs2_read_group_descriptor(alloc_inode, fe, bg_blkno,
+ &group_bh);
if (status < 0) {
mlog_errno(status);
goto bail;
}
-
group = (struct ocfs2_group_desc *) group_bh->b_data;
- status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, group);
- if (status) {
- mlog_errno(status);
- goto bail;
- }
+
BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits));
status = ocfs2_block_group_clear_bits(handle, alloc_inode,
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index 4df159d8f450..e3c13c77f9e8 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -164,10 +164,24 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac);
* and return that block offset. */
u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster);
-/* somewhat more expensive than our other checks, so use sparingly. */
+/*
+ * By default, ocfs2_read_group_descriptor() calls ocfs2_error() when it
+ * finds a problem. A caller that wants to check a group descriptor
+ * without going readonly should read the block with ocfs2_read_block[s]()
+ * and then checking it with this function. This is only resize, really.
+ * Everyone else should be using ocfs2_read_group_descriptor().
+ */
int ocfs2_check_group_descriptor(struct super_block *sb,
struct ocfs2_dinode *di,
- struct ocfs2_group_desc *gd);
+ struct buffer_head *bh);
+/*
+ * Read a group descriptor block into *bh. If *bh is NULL, a bh will be
+ * allocated. This is a cached read. The descriptor will be validated with
+ * ocfs2_validate_group_descriptor().
+ */
+int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di,
+ u64 gd_blkno, struct buffer_head **bh);
+
int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et,
u32 clusters_to_add, u32 extents_to_split,
struct ocfs2_alloc_context **data_ac,
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 304b63ac78cf..bc431386443e 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -41,6 +41,7 @@
#include <linux/debugfs.h>
#include <linux/mount.h>
#include <linux/seq_file.h>
+#include <linux/quotaops.h>
#define MLOG_MASK_PREFIX ML_SUPER
#include <cluster/masklog.h>
@@ -65,10 +66,13 @@
#include "uptodate.h"
#include "ver.h"
#include "xattr.h"
+#include "quota.h"
#include "buffer_head_io.h"
static struct kmem_cache *ocfs2_inode_cachep = NULL;
+struct kmem_cache *ocfs2_dquot_cachep;
+struct kmem_cache *ocfs2_qf_chunk_cachep;
/* OCFS2 needs to schedule several differnt types of work which
* require cluster locking, disk I/O, recovery waits, etc. Since these
@@ -124,6 +128,9 @@ static int ocfs2_get_sector(struct super_block *sb,
static void ocfs2_write_super(struct super_block *sb);
static struct inode *ocfs2_alloc_inode(struct super_block *sb);
static void ocfs2_destroy_inode(struct inode *inode);
+static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend);
+static int ocfs2_enable_quotas(struct ocfs2_super *osb);
+static void ocfs2_disable_quotas(struct ocfs2_super *osb);
static const struct super_operations ocfs2_sops = {
.statfs = ocfs2_statfs,
@@ -137,6 +144,8 @@ static const struct super_operations ocfs2_sops = {
.put_super = ocfs2_put_super,
.remount_fs = ocfs2_remount,
.show_options = ocfs2_show_options,
+ .quota_read = ocfs2_quota_read,
+ .quota_write = ocfs2_quota_write,
};
enum {
@@ -158,6 +167,10 @@ enum {
Opt_user_xattr,
Opt_nouser_xattr,
Opt_inode64,
+ Opt_acl,
+ Opt_noacl,
+ Opt_usrquota,
+ Opt_grpquota,
Opt_err,
};
@@ -180,6 +193,10 @@ static const match_table_t tokens = {
{Opt_user_xattr, "user_xattr"},
{Opt_nouser_xattr, "nouser_xattr"},
{Opt_inode64, "inode64"},
+ {Opt_acl, "acl"},
+ {Opt_noacl, "noacl"},
+ {Opt_usrquota, "usrquota"},
+ {Opt_grpquota, "grpquota"},
{Opt_err, NULL}
};
@@ -221,6 +238,19 @@ static int ocfs2_sync_fs(struct super_block *sb, int wait)
return 0;
}
+static int ocfs2_need_system_inode(struct ocfs2_super *osb, int ino)
+{
+ if (!OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, OCFS2_FEATURE_RO_COMPAT_USRQUOTA)
+ && (ino == USER_QUOTA_SYSTEM_INODE
+ || ino == LOCAL_USER_QUOTA_SYSTEM_INODE))
+ return 0;
+ if (!OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
+ && (ino == GROUP_QUOTA_SYSTEM_INODE
+ || ino == LOCAL_GROUP_QUOTA_SYSTEM_INODE))
+ return 0;
+ return 1;
+}
+
static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
{
struct inode *new = NULL;
@@ -247,6 +277,8 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
for (i = OCFS2_FIRST_ONLINE_SYSTEM_INODE;
i <= OCFS2_LAST_GLOBAL_SYSTEM_INODE; i++) {
+ if (!ocfs2_need_system_inode(osb, i))
+ continue;
new = ocfs2_get_system_file_inode(osb, i, osb->slot_num);
if (!new) {
ocfs2_release_system_inodes(osb);
@@ -277,6 +309,8 @@ static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb)
for (i = OCFS2_LAST_GLOBAL_SYSTEM_INODE + 1;
i < NUM_SYSTEM_INODES;
i++) {
+ if (!ocfs2_need_system_inode(osb, i))
+ continue;
new = ocfs2_get_system_file_inode(osb, i, osb->slot_num);
if (!new) {
ocfs2_release_system_inodes(osb);
@@ -426,6 +460,12 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
/* We're going to/from readonly mode. */
if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
+ /* Disable quota accounting before remounting RO */
+ if (*flags & MS_RDONLY) {
+ ret = ocfs2_susp_quotas(osb, 0);
+ if (ret < 0)
+ goto out;
+ }
/* Lock here so the check of HARD_RO and the potential
* setting of SOFT_RO is atomic. */
spin_lock(&osb->osb_lock);
@@ -461,11 +501,28 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
}
unlock_osb:
spin_unlock(&osb->osb_lock);
+ /* Enable quota accounting after remounting RW */
+ if (!ret && !(*flags & MS_RDONLY)) {
+ if (sb_any_quota_suspended(sb))
+ ret = ocfs2_susp_quotas(osb, 1);
+ else
+ ret = ocfs2_enable_quotas(osb);
+ if (ret < 0) {
+ /* Return back changes... */
+ spin_lock(&osb->osb_lock);
+ sb->s_flags |= MS_RDONLY;
+ osb->osb_flags |= OCFS2_OSB_SOFT_RO;
+ spin_unlock(&osb->osb_lock);
+ goto out;
+ }
+ }
}
if (!ret) {
/* Only save off the new mount options in case of a successful
* remount. */
+ if (!(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR))
+ parsed_options.mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
osb->s_mount_opt = parsed_options.mount_opt;
osb->s_atime_quantum = parsed_options.atime_quantum;
osb->preferred_slot = parsed_options.slot;
@@ -619,6 +676,131 @@ static int ocfs2_verify_userspace_stack(struct ocfs2_super *osb,
return 0;
}
+static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend)
+{
+ int type;
+ struct super_block *sb = osb->sb;
+ unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
+ OCFS2_FEATURE_RO_COMPAT_GRPQUOTA};
+ int status = 0;
+
+ for (type = 0; type < MAXQUOTAS; type++) {
+ if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type]))
+ continue;
+ if (unsuspend)
+ status = vfs_quota_enable(
+ sb_dqopt(sb)->files[type],
+ type, QFMT_OCFS2,
+ DQUOT_SUSPENDED);
+ else
+ status = vfs_quota_disable(sb, type,
+ DQUOT_SUSPENDED);
+ if (status < 0)
+ break;
+ }
+ if (status < 0)
+ mlog(ML_ERROR, "Failed to suspend/unsuspend quotas on "
+ "remount (error = %d).\n", status);
+ return status;
+}
+
+static int ocfs2_enable_quotas(struct ocfs2_super *osb)
+{
+ struct inode *inode[MAXQUOTAS] = { NULL, NULL };
+ struct super_block *sb = osb->sb;
+ unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
+ OCFS2_FEATURE_RO_COMPAT_GRPQUOTA};
+ unsigned int ino[MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE,
+ LOCAL_GROUP_QUOTA_SYSTEM_INODE };
+ int status;
+ int type;
+
+ sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NEGATIVE_USAGE;
+ for (type = 0; type < MAXQUOTAS; type++) {
+ if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type]))
+ continue;
+ inode[type] = ocfs2_get_system_file_inode(osb, ino[type],
+ osb->slot_num);
+ if (!inode[type]) {
+ status = -ENOENT;
+ goto out_quota_off;
+ }
+ status = vfs_quota_enable(inode[type], type, QFMT_OCFS2,
+ DQUOT_USAGE_ENABLED);
+ if (status < 0)
+ goto out_quota_off;
+ }
+
+ for (type = 0; type < MAXQUOTAS; type++)
+ iput(inode[type]);
+ return 0;
+out_quota_off:
+ ocfs2_disable_quotas(osb);
+ for (type = 0; type < MAXQUOTAS; type++)
+ iput(inode[type]);
+ mlog_errno(status);
+ return status;
+}
+
+static void ocfs2_disable_quotas(struct ocfs2_super *osb)
+{
+ int type;
+ struct inode *inode;
+ struct super_block *sb = osb->sb;
+
+ /* We mostly ignore errors in this function because there's not much
+ * we can do when we see them */
+ for (type = 0; type < MAXQUOTAS; type++) {
+ if (!sb_has_quota_loaded(sb, type))
+ continue;
+ inode = igrab(sb->s_dquot.files[type]);
+ /* Turn off quotas. This will remove all dquot structures from
+ * memory and so they will be automatically synced to global
+ * quota files */
+ vfs_quota_disable(sb, type, DQUOT_USAGE_ENABLED |
+ DQUOT_LIMITS_ENABLED);
+ if (!inode)
+ continue;
+ iput(inode);
+ }
+}
+
+/* Handle quota on quotactl */
+static int ocfs2_quota_on(struct super_block *sb, int type, int format_id,
+ char *path, int remount)
+{
+ unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
+ OCFS2_FEATURE_RO_COMPAT_GRPQUOTA};
+
+ if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type]))
+ return -EINVAL;
+
+ if (remount)
+ return 0; /* Just ignore it has been handled in
+ * ocfs2_remount() */
+ return vfs_quota_enable(sb_dqopt(sb)->files[type], type,
+ format_id, DQUOT_LIMITS_ENABLED);
+}
+
+/* Handle quota off quotactl */
+static int ocfs2_quota_off(struct super_block *sb, int type, int remount)
+{
+ if (remount)
+ return 0; /* Ignore now and handle later in
+ * ocfs2_remount() */
+ return vfs_quota_disable(sb, type, DQUOT_LIMITS_ENABLED);
+}
+
+static struct quotactl_ops ocfs2_quotactl_ops = {
+ .quota_on = ocfs2_quota_on,
+ .quota_off = ocfs2_quota_off,
+ .quota_sync = vfs_quota_sync,
+ .get_info = vfs_get_dqinfo,
+ .set_info = vfs_set_dqinfo,
+ .get_dqblk = vfs_get_dqblk,
+ .set_dqblk = vfs_set_dqblk,
+};
+
static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
{
struct dentry *root;
@@ -651,12 +833,32 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
}
brelse(bh);
bh = NULL;
+
+ if (!(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR))
+ parsed_options.mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
+
osb->s_mount_opt = parsed_options.mount_opt;
osb->s_atime_quantum = parsed_options.atime_quantum;
osb->preferred_slot = parsed_options.slot;
osb->osb_commit_interval = parsed_options.commit_interval;
osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt);
osb->local_alloc_bits = osb->local_alloc_default_bits;
+ if (osb->s_mount_opt & OCFS2_MOUNT_USRQUOTA &&
+ !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
+ OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
+ status = -EINVAL;
+ mlog(ML_ERROR, "User quotas were requested, but this "
+ "filesystem does not have the feature enabled.\n");
+ goto read_super_error;
+ }
+ if (osb->s_mount_opt & OCFS2_MOUNT_GRPQUOTA &&
+ !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
+ OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
+ status = -EINVAL;
+ mlog(ML_ERROR, "Group quotas were requested, but this "
+ "filesystem does not have the feature enabled.\n");
+ goto read_super_error;
+ }
status = ocfs2_verify_userspace_stack(osb, &parsed_options);
if (status)
@@ -664,6 +866,9 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
sb->s_magic = OCFS2_SUPER_MAGIC;
+ sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
+ ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
+
/* Hard readonly mode only if: bdev_read_only, MS_RDONLY,
* heartbeat=none */
if (bdev_read_only(sb->s_bdev)) {
@@ -758,6 +963,28 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
atomic_set(&osb->vol_state, VOLUME_MOUNTED);
wake_up(&osb->osb_mount_event);
+ /* Now we can initialize quotas because we can afford to wait
+ * for cluster locks recovery now. That also means that truncation
+ * log recovery can happen but that waits for proper quota setup */
+ if (!(sb->s_flags & MS_RDONLY)) {
+ status = ocfs2_enable_quotas(osb);
+ if (status < 0) {
+ /* We have to err-out specially here because
+ * s_root is already set */
+ mlog_errno(status);
+ atomic_set(&osb->vol_state, VOLUME_DISABLED);
+ wake_up(&osb->osb_mount_event);
+ mlog_exit(status);
+ return status;
+ }
+ }
+
+ ocfs2_complete_quota_recovery(osb);
+
+ /* Now we wake up again for processes waiting for quotas */
+ atomic_set(&osb->vol_state, VOLUME_MOUNTED_QUOTAS);
+ wake_up(&osb->osb_mount_event);
+
mlog_exit(status);
return status;
@@ -945,6 +1172,41 @@ static int ocfs2_parse_options(struct super_block *sb,
case Opt_inode64:
mopt->mount_opt |= OCFS2_MOUNT_INODE64;
break;
+ case Opt_usrquota:
+ /* We check only on remount, otherwise features
+ * aren't yet initialized. */
+ if (is_remount && !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
+ OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
+ mlog(ML_ERROR, "User quota requested but "
+ "filesystem feature is not set\n");
+ status = 0;
+ goto bail;
+ }
+ mopt->mount_opt |= OCFS2_MOUNT_USRQUOTA;
+ break;
+ case Opt_grpquota:
+ if (is_remount && !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
+ OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
+ mlog(ML_ERROR, "Group quota requested but "
+ "filesystem feature is not set\n");
+ status = 0;
+ goto bail;
+ }
+ mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA;
+ break;
+#ifdef CONFIG_OCFS2_FS_POSIX_ACL
+ case Opt_acl:
+ mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL;
+ break;
+ case Opt_noacl:
+ mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
+ break;
+#else
+ case Opt_acl:
+ case Opt_noacl:
+ printk(KERN_INFO "ocfs2 (no)acl options not supported\n");
+ break;
+#endif
default:
mlog(ML_ERROR,
"Unrecognized mount option \"%s\" "
@@ -1008,6 +1270,10 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
if (osb->osb_cluster_stack[0])
seq_printf(s, ",cluster_stack=%.*s", OCFS2_STACK_LABEL_LEN,
osb->osb_cluster_stack);
+ if (opts & OCFS2_MOUNT_USRQUOTA)
+ seq_printf(s, ",usrquota");
+ if (opts & OCFS2_MOUNT_GRPQUOTA)
+ seq_printf(s, ",grpquota");
if (opts & OCFS2_MOUNT_NOUSERXATTR)
seq_printf(s, ",nouser_xattr");
@@ -1017,6 +1283,13 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
if (opts & OCFS2_MOUNT_INODE64)
seq_printf(s, ",inode64");
+#ifdef CONFIG_OCFS2_FS_POSIX_ACL
+ if (opts & OCFS2_MOUNT_POSIX_ACL)
+ seq_printf(s, ",acl");
+ else
+ seq_printf(s, ",noacl");
+#endif
+
return 0;
}
@@ -1054,6 +1327,7 @@ static int __init ocfs2_init(void)
ocfs2_set_locking_protocol();
+ status = register_quota_format(&ocfs2_quota_format);
leave:
if (status < 0) {
ocfs2_free_mem_caches();
@@ -1077,6 +1351,8 @@ static void __exit ocfs2_exit(void)
destroy_workqueue(ocfs2_wq);
}
+ unregister_quota_format(&ocfs2_quota_format);
+
debugfs_remove(ocfs2_debugfs_root);
ocfs2_free_mem_caches();
@@ -1192,8 +1468,27 @@ static int ocfs2_initialize_mem_caches(void)
(SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
SLAB_MEM_SPREAD),
ocfs2_inode_init_once);
- if (!ocfs2_inode_cachep)
+ ocfs2_dquot_cachep = kmem_cache_create("ocfs2_dquot_cache",
+ sizeof(struct ocfs2_dquot),
+ 0,
+ (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
+ SLAB_MEM_SPREAD),
+ NULL);
+ ocfs2_qf_chunk_cachep = kmem_cache_create("ocfs2_qf_chunk_cache",
+ sizeof(struct ocfs2_quota_chunk),
+ 0,
+ (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
+ NULL);
+ if (!ocfs2_inode_cachep || !ocfs2_dquot_cachep ||
+ !ocfs2_qf_chunk_cachep) {
+ if (ocfs2_inode_cachep)
+ kmem_cache_destroy(ocfs2_inode_cachep);
+ if (ocfs2_dquot_cachep)
+ kmem_cache_destroy(ocfs2_dquot_cachep);
+ if (ocfs2_qf_chunk_cachep)
+ kmem_cache_destroy(ocfs2_qf_chunk_cachep);
return -ENOMEM;
+ }
return 0;
}
@@ -1202,8 +1497,15 @@ static void ocfs2_free_mem_caches(void)
{
if (ocfs2_inode_cachep)
kmem_cache_destroy(ocfs2_inode_cachep);
-
ocfs2_inode_cachep = NULL;
+
+ if (ocfs2_dquot_cachep)
+ kmem_cache_destroy(ocfs2_dquot_cachep);
+ ocfs2_dquot_cachep = NULL;
+
+ if (ocfs2_qf_chunk_cachep)
+ kmem_cache_destroy(ocfs2_qf_chunk_cachep);
+ ocfs2_qf_chunk_cachep = NULL;
}
static int ocfs2_get_sector(struct super_block *sb,
@@ -1303,6 +1605,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
osb = OCFS2_SB(sb);
BUG_ON(!osb);
+ ocfs2_disable_quotas(osb);
+
ocfs2_shutdown_local_alloc(osb);
ocfs2_truncate_log_shutdown(osb);
@@ -1413,6 +1717,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
sb->s_fs_info = osb;
sb->s_op = &ocfs2_sops;
sb->s_export_op = &ocfs2_export_ops;
+ sb->s_qcop = &ocfs2_quotactl_ops;
+ sb->dq_op = &ocfs2_quota_operations;
sb->s_xattr = ocfs2_xattr_handlers;
sb->s_time_gran = 1;
sb->s_flags |= MS_NOATIME;
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index cbd03dfdc7b9..ed0a0cfd68d2 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -84,7 +84,7 @@ static char *ocfs2_fast_symlink_getlink(struct inode *inode,
mlog_entry_void();
- status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, bh);
+ status = ocfs2_read_inode_block(inode, bh);
if (status < 0) {
mlog_errno(status);
link = ERR_PTR(status);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 054e2efb0b7e..7e0d62ac441b 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -35,6 +35,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/string.h>
+#include <linux/security.h>
#define MLOG_MASK_PREFIX ML_XATTR
#include <cluster/masklog.h>
@@ -61,12 +62,32 @@ struct ocfs2_xattr_def_value_root {
};
struct ocfs2_xattr_bucket {
- struct buffer_head *bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
- struct ocfs2_xattr_header *xh;
+ /* The inode these xattrs are associated with */
+ struct inode *bu_inode;
+
+ /* The actual buffers that make up the bucket */
+ struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
+
+ /* How many blocks make up one bucket for this filesystem */
+ int bu_blocks;
+};
+
+struct ocfs2_xattr_set_ctxt {
+ handle_t *handle;
+ struct ocfs2_alloc_context *meta_ac;
+ struct ocfs2_alloc_context *data_ac;
+ struct ocfs2_cached_dealloc_ctxt dealloc;
};
#define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root))
#define OCFS2_XATTR_INLINE_SIZE 80
+#define OCFS2_XATTR_FREE_IN_IBODY (OCFS2_MIN_XATTR_INLINE_SIZE \
+ - sizeof(struct ocfs2_xattr_header) \
+ - sizeof(__u32))
+#define OCFS2_XATTR_FREE_IN_BLOCK(ptr) ((ptr)->i_sb->s_blocksize \
+ - sizeof(struct ocfs2_xattr_block) \
+ - sizeof(struct ocfs2_xattr_header) \
+ - sizeof(__u32))
static struct ocfs2_xattr_def_value_root def_xv = {
.xv.xr_list.l_count = cpu_to_le16(1),
@@ -74,13 +95,25 @@ static struct ocfs2_xattr_def_value_root def_xv = {
struct xattr_handler *ocfs2_xattr_handlers[] = {
&ocfs2_xattr_user_handler,
+#ifdef CONFIG_OCFS2_FS_POSIX_ACL
+ &ocfs2_xattr_acl_access_handler,
+ &ocfs2_xattr_acl_default_handler,
+#endif
&ocfs2_xattr_trusted_handler,
+ &ocfs2_xattr_security_handler,
NULL
};
static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
[OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler,
+#ifdef CONFIG_OCFS2_FS_POSIX_ACL
+ [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS]
+ = &ocfs2_xattr_acl_access_handler,
+ [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT]
+ = &ocfs2_xattr_acl_default_handler,
+#endif
[OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler,
+ [OCFS2_XATTR_INDEX_SECURITY] = &ocfs2_xattr_security_handler,
};
struct ocfs2_xattr_info {
@@ -98,7 +131,7 @@ struct ocfs2_xattr_search {
*/
struct buffer_head *xattr_bh;
struct ocfs2_xattr_header *header;
- struct ocfs2_xattr_bucket bucket;
+ struct ocfs2_xattr_bucket *bucket;
void *base;
void *end;
struct ocfs2_xattr_entry *here;
@@ -127,11 +160,13 @@ static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
size_t buffer_size);
static int ocfs2_xattr_create_index_block(struct inode *inode,
- struct ocfs2_xattr_search *xs);
+ struct ocfs2_xattr_search *xs,
+ struct ocfs2_xattr_set_ctxt *ctxt);
static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
struct ocfs2_xattr_info *xi,
- struct ocfs2_xattr_search *xs);
+ struct ocfs2_xattr_search *xs,
+ struct ocfs2_xattr_set_ctxt *ctxt);
static int ocfs2_delete_xattr_index_block(struct inode *inode,
struct buffer_head *xb_bh);
@@ -154,6 +189,187 @@ static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb)
return len / sizeof(struct ocfs2_xattr_entry);
}
+#define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr)
+#define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data)
+#define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0))
+
+static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode)
+{
+ struct ocfs2_xattr_bucket *bucket;
+ int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
+
+ BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET);
+
+ bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS);
+ if (bucket) {
+ bucket->bu_inode = inode;
+ bucket->bu_blocks = blks;
+ }
+
+ return bucket;
+}
+
+static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket)
+{
+ int i;
+
+ for (i = 0; i < bucket->bu_blocks; i++) {
+ brelse(bucket->bu_bhs[i]);
+ bucket->bu_bhs[i] = NULL;
+ }
+}
+
+static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket)
+{
+ if (bucket) {
+ ocfs2_xattr_bucket_relse(bucket);
+ bucket->bu_inode = NULL;
+ kfree(bucket);
+ }
+}
+
+/*
+ * A bucket that has never been written to disk doesn't need to be
+ * read. We just need the buffer_heads. Don't call this for
+ * buckets that are already on disk. ocfs2_read_xattr_bucket() initializes
+ * them fully.
+ */
+static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
+ u64 xb_blkno)
+{
+ int i, rc = 0;
+
+ for (i = 0; i < bucket->bu_blocks; i++) {
+ bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb,
+ xb_blkno + i);
+ if (!bucket->bu_bhs[i]) {
+ rc = -EIO;
+ mlog_errno(rc);
+ break;
+ }
+
+ if (!ocfs2_buffer_uptodate(bucket->bu_inode,
+ bucket->bu_bhs[i]))
+ ocfs2_set_new_buffer_uptodate(bucket->bu_inode,
+ bucket->bu_bhs[i]);
+ }
+
+ if (rc)
+ ocfs2_xattr_bucket_relse(bucket);
+ return rc;
+}
+
+/* Read the xattr bucket at xb_blkno */
+static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
+ u64 xb_blkno)
+{
+ int rc;
+
+ rc = ocfs2_read_blocks(bucket->bu_inode, xb_blkno,
+ bucket->bu_blocks, bucket->bu_bhs, 0,
+ NULL);
+ if (rc)
+ ocfs2_xattr_bucket_relse(bucket);
+ return rc;
+}
+
+static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
+ struct ocfs2_xattr_bucket *bucket,
+ int type)
+{
+ int i, rc = 0;
+
+ for (i = 0; i < bucket->bu_blocks; i++) {
+ rc = ocfs2_journal_access(handle, bucket->bu_inode,
+ bucket->bu_bhs[i], type);
+ if (rc) {
+ mlog_errno(rc);
+ break;
+ }
+ }
+
+ return rc;
+}
+
+static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
+ struct ocfs2_xattr_bucket *bucket)
+{
+ int i;
+
+ for (i = 0; i < bucket->bu_blocks; i++)
+ ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
+}
+
+static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest,
+ struct ocfs2_xattr_bucket *src)
+{
+ int i;
+ int blocksize = src->bu_inode->i_sb->s_blocksize;
+
+ BUG_ON(dest->bu_blocks != src->bu_blocks);
+ BUG_ON(dest->bu_inode != src->bu_inode);
+
+ for (i = 0; i < src->bu_blocks; i++) {
+ memcpy(bucket_block(dest, i), bucket_block(src, i),
+ blocksize);
+ }
+}
+
+static int ocfs2_validate_xattr_block(struct super_block *sb,
+ struct buffer_head *bh)
+{
+ struct ocfs2_xattr_block *xb =
+ (struct ocfs2_xattr_block *)bh->b_data;
+
+ mlog(0, "Validating xattr block %llu\n",
+ (unsigned long long)bh->b_blocknr);
+
+ if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
+ ocfs2_error(sb,
+ "Extended attribute block #%llu has bad "
+ "signature %.*s",
+ (unsigned long long)bh->b_blocknr, 7,
+ xb->xb_signature);
+ return -EINVAL;
+ }
+
+ if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) {
+ ocfs2_error(sb,
+ "Extended attribute block #%llu has an "
+ "invalid xb_blkno of %llu",
+ (unsigned long long)bh->b_blocknr,
+ (unsigned long long)le64_to_cpu(xb->xb_blkno));
+ return -EINVAL;
+ }
+
+ if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) {
+ ocfs2_error(sb,
+ "Extended attribute block #%llu has an invalid "
+ "xb_fs_generation of #%u",
+ (unsigned long long)bh->b_blocknr,
+ le32_to_cpu(xb->xb_fs_generation));
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno,
+ struct buffer_head **bh)
+{
+ int rc;
+ struct buffer_head *tmp = *bh;
+
+ rc = ocfs2_read_block(inode, xb_blkno, &tmp,
+ ocfs2_validate_xattr_block);
+
+ /* If ocfs2_read_block() got us a new bh, pass it up. */
+ if (!rc && !*bh)
+ *bh = tmp;
+
+ return rc;
+}
+
static inline const char *ocfs2_xattr_prefix(int name_index)
{
struct xattr_handler *handler = NULL;
@@ -200,17 +416,135 @@ static void ocfs2_xattr_hash_entry(struct inode *inode,
return;
}
+static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len)
+{
+ int size = 0;
+
+ if (value_len <= OCFS2_XATTR_INLINE_SIZE)
+ size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len);
+ else
+ size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
+ size += sizeof(struct ocfs2_xattr_entry);
+
+ return size;
+}
+
+int ocfs2_calc_security_init(struct inode *dir,
+ struct ocfs2_security_xattr_info *si,
+ int *want_clusters,
+ int *xattr_credits,
+ struct ocfs2_alloc_context **xattr_ac)
+{
+ int ret = 0;
+ struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
+ int s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
+ si->value_len);
+
+ /*
+ * The max space of security xattr taken inline is
+ * 256(name) + 80(value) + 16(entry) = 352 bytes,
+ * So reserve one metadata block for it is ok.
+ */
+ if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
+ s_size > OCFS2_XATTR_FREE_IN_IBODY) {
+ ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
+ if (ret) {
+ mlog_errno(ret);
+ return ret;
+ }
+ *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
+ }
+
+ /* reserve clusters for xattr value which will be set in B tree*/
+ if (si->value_len > OCFS2_XATTR_INLINE_SIZE)
+ *want_clusters += ocfs2_clusters_for_bytes(dir->i_sb,
+ si->value_len);
+ return ret;
+}
+
+int ocfs2_calc_xattr_init(struct inode *dir,
+ struct buffer_head *dir_bh,
+ int mode,
+ struct ocfs2_security_xattr_info *si,
+ int *want_clusters,
+ int *xattr_credits,
+ struct ocfs2_alloc_context **xattr_ac)
+{
+ int ret = 0;
+ struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
+ int s_size = 0;
+ int a_size = 0;
+ int acl_len = 0;
+
+ if (si->enable)
+ s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
+ si->value_len);
+
+ if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
+ acl_len = ocfs2_xattr_get_nolock(dir, dir_bh,
+ OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT,
+ "", NULL, 0);
+ if (acl_len > 0) {
+ a_size = ocfs2_xattr_entry_real_size(0, acl_len);
+ if (S_ISDIR(mode))
+ a_size <<= 1;
+ } else if (acl_len != 0 && acl_len != -ENODATA) {
+ mlog_errno(ret);
+ return ret;
+ }
+ }
+
+ if (!(s_size + a_size))
+ return ret;
+
+ /*
+ * The max space of security xattr taken inline is
+ * 256(name) + 80(value) + 16(entry) = 352 bytes,
+ * The max space of acl xattr taken inline is
+ * 80(value) + 16(entry) * 2(if directory) = 192 bytes,
+ * when blocksize = 512, may reserve one more cluser for
+ * xattr bucket, otherwise reserve one metadata block
+ * for them is ok.
+ */
+ if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
+ (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
+ ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
+ if (ret) {
+ mlog_errno(ret);
+ return ret;
+ }
+ *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
+ }
+
+ if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE &&
+ (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) {
+ *want_clusters += 1;
+ *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb);
+ }
+
+ /* reserve clusters for xattr value which will be set in B tree*/
+ if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE)
+ *want_clusters += ocfs2_clusters_for_bytes(dir->i_sb,
+ si->value_len);
+ if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL &&
+ acl_len > OCFS2_XATTR_INLINE_SIZE) {
+ *want_clusters += ocfs2_clusters_for_bytes(dir->i_sb, acl_len);
+ if (S_ISDIR(mode))
+ *want_clusters += ocfs2_clusters_for_bytes(dir->i_sb,
+ acl_len);
+ }
+
+ return ret;
+}
+
static int ocfs2_xattr_extend_allocation(struct inode *inode,
u32 clusters_to_add,
struct buffer_head *xattr_bh,
- struct ocfs2_xattr_value_root *xv)
+ struct ocfs2_xattr_value_root *xv,
+ struct ocfs2_xattr_set_ctxt *ctxt)
{
int status = 0;
- int restart_func = 0;
- int credits = 0;
- handle_t *handle = NULL;
- struct ocfs2_alloc_context *data_ac = NULL;
- struct ocfs2_alloc_context *meta_ac = NULL;
+ handle_t *handle = ctxt->handle;
enum ocfs2_alloc_restarted why;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
u32 prev_clusters, logical_start = le32_to_cpu(xv->xr_clusters);
@@ -220,26 +554,6 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
ocfs2_init_xattr_value_extent_tree(&et, inode, xattr_bh, xv);
-restart_all:
-
- status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0,
- &data_ac, &meta_ac);
- if (status) {
- mlog_errno(status);
- goto leave;
- }
-
- credits = ocfs2_calc_extend_credits(osb->sb, et.et_root_el,
- clusters_to_add);
- handle = ocfs2_start_trans(osb, credits);
- if (IS_ERR(handle)) {
- status = PTR_ERR(handle);
- handle = NULL;
- mlog_errno(status);
- goto leave;
- }
-
-restarted_transaction:
status = ocfs2_journal_access(handle, inode, xattr_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
@@ -255,12 +569,11 @@ restarted_transaction:
0,
&et,
handle,
- data_ac,
- meta_ac,
+ ctxt->data_ac,
+ ctxt->meta_ac,
&why);
- if ((status < 0) && (status != -EAGAIN)) {
- if (status != -ENOSPC)
- mlog_errno(status);
+ if (status < 0) {
+ mlog_errno(status);
goto leave;
}
@@ -272,47 +585,13 @@ restarted_transaction:
clusters_to_add -= le32_to_cpu(xv->xr_clusters) - prev_clusters;
- if (why != RESTART_NONE && clusters_to_add) {
- if (why == RESTART_META) {
- mlog(0, "restarting function.\n");
- restart_func = 1;
- } else {
- BUG_ON(why != RESTART_TRANS);
-
- mlog(0, "restarting transaction.\n");
- /* TODO: This can be more intelligent. */
- credits = ocfs2_calc_extend_credits(osb->sb,
- et.et_root_el,
- clusters_to_add);
- status = ocfs2_extend_trans(handle, credits);
- if (status < 0) {
- /* handle still has to be committed at
- * this point. */
- status = -ENOMEM;
- mlog_errno(status);
- goto leave;
- }
- goto restarted_transaction;
- }
- }
+ /*
+ * We should have already allocated enough space before the transaction,
+ * so no need to restart.
+ */
+ BUG_ON(why != RESTART_NONE || clusters_to_add);
leave:
- if (handle) {
- ocfs2_commit_trans(osb, handle);
- handle = NULL;
- }
- if (data_ac) {
- ocfs2_free_alloc_context(data_ac);
- data_ac = NULL;
- }
- if (meta_ac) {
- ocfs2_free_alloc_context(meta_ac);
- meta_ac = NULL;
- }
- if ((!status) && restart_func) {
- restart_func = 0;
- goto restart_all;
- }
return status;
}
@@ -321,53 +600,27 @@ static int __ocfs2_remove_xattr_range(struct inode *inode,
struct buffer_head *root_bh,
struct ocfs2_xattr_value_root *xv,
u32 cpos, u32 phys_cpos, u32 len,
- struct ocfs2_cached_dealloc_ctxt *dealloc)
+ struct ocfs2_xattr_set_ctxt *ctxt)
{
int ret;
u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
- struct inode *tl_inode = osb->osb_tl_inode;
- handle_t *handle;
- struct ocfs2_alloc_context *meta_ac = NULL;
+ handle_t *handle = ctxt->handle;
struct ocfs2_extent_tree et;
ocfs2_init_xattr_value_extent_tree(&et, inode, root_bh, xv);
- ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
- if (ret) {
- mlog_errno(ret);
- return ret;
- }
-
- mutex_lock(&tl_inode->i_mutex);
-
- if (ocfs2_truncate_log_needs_flush(osb)) {
- ret = __ocfs2_flush_truncate_log(osb);
- if (ret < 0) {
- mlog_errno(ret);
- goto out;
- }
- }
-
- handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- mlog_errno(ret);
- goto out;
- }
-
ret = ocfs2_journal_access(handle, inode, root_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
- goto out_commit;
+ goto out;
}
- ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac,
- dealloc);
+ ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, ctxt->meta_ac,
+ &ctxt->dealloc);
if (ret) {
mlog_errno(ret);
- goto out_commit;
+ goto out;
}
le32_add_cpu(&xv->xr_clusters, -len);
@@ -375,21 +628,14 @@ static int __ocfs2_remove_xattr_range(struct inode *inode,
ret = ocfs2_journal_dirty(handle, root_bh);
if (ret) {
mlog_errno(ret);
- goto out_commit;
+ goto out;
}
- ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len);
+ ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc, phys_blkno, len);
if (ret)
mlog_errno(ret);
-out_commit:
- ocfs2_commit_trans(osb, handle);
out:
- mutex_unlock(&tl_inode->i_mutex);
-
- if (meta_ac)
- ocfs2_free_alloc_context(meta_ac);
-
return ret;
}
@@ -397,15 +643,12 @@ static int ocfs2_xattr_shrink_size(struct inode *inode,
u32 old_clusters,
u32 new_clusters,
struct buffer_head *root_bh,
- struct ocfs2_xattr_value_root *xv)
+ struct ocfs2_xattr_value_root *xv,
+ struct ocfs2_xattr_set_ctxt *ctxt)
{
int ret = 0;
u32 trunc_len, cpos, phys_cpos, alloc_size;
u64 block;
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
- struct ocfs2_cached_dealloc_ctxt dealloc;
-
- ocfs2_init_dealloc_ctxt(&dealloc);
if (old_clusters <= new_clusters)
return 0;
@@ -425,7 +668,7 @@ static int ocfs2_xattr_shrink_size(struct inode *inode,
ret = __ocfs2_remove_xattr_range(inode, root_bh, xv, cpos,
phys_cpos, alloc_size,
- &dealloc);
+ ctxt);
if (ret) {
mlog_errno(ret);
goto out;
@@ -439,16 +682,14 @@ static int ocfs2_xattr_shrink_size(struct inode *inode,
}
out:
- ocfs2_schedule_truncate_log_flush(osb, 1);
- ocfs2_run_deallocs(osb, &dealloc);
-
return ret;
}
static int ocfs2_xattr_value_truncate(struct inode *inode,
struct buffer_head *root_bh,
struct ocfs2_xattr_value_root *xv,
- int len)
+ int len,
+ struct ocfs2_xattr_set_ctxt *ctxt)
{
int ret;
u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
@@ -460,11 +701,11 @@ static int ocfs2_xattr_value_truncate(struct inode *inode,
if (new_clusters > old_clusters)
ret = ocfs2_xattr_extend_allocation(inode,
new_clusters - old_clusters,
- root_bh, xv);
+ root_bh, xv, ctxt);
else
ret = ocfs2_xattr_shrink_size(inode,
old_clusters, new_clusters,
- root_bh, xv);
+ root_bh, xv, ctxt);
return ret;
}
@@ -554,18 +795,14 @@ static int ocfs2_xattr_block_list(struct inode *inode,
if (!di->i_xattr_loc)
return ret;
- ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh);
+ ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
+ &blk_bh);
if (ret < 0) {
mlog_errno(ret);
return ret;
}
xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
- if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
- ret = -EIO;
- goto cleanup;
- }
-
if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
ret = ocfs2_xattr_list_entries(inode, header,
@@ -575,7 +812,7 @@ static int ocfs2_xattr_block_list(struct inode *inode,
ret = ocfs2_xattr_tree_list_index_block(inode, xt,
buffer, buffer_size);
}
-cleanup:
+
brelse(blk_bh);
return ret;
@@ -685,7 +922,7 @@ static int ocfs2_xattr_get_value_outside(struct inode *inode,
blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
/* Copy ocfs2_xattr_value */
for (i = 0; i < num_clusters * bpc; i++, blkno++) {
- ret = ocfs2_read_block(inode, blkno, &bh);
+ ret = ocfs2_read_block(inode, blkno, &bh, NULL);
if (ret) {
mlog_errno(ret);
goto out;
@@ -769,7 +1006,12 @@ static int ocfs2_xattr_block_get(struct inode *inode,
size_t size;
int ret = -ENODATA, name_offset, name_len, block_off, i;
- memset(&xs->bucket, 0, sizeof(xs->bucket));
+ xs->bucket = ocfs2_xattr_bucket_new(inode);
+ if (!xs->bucket) {
+ ret = -ENOMEM;
+ mlog_errno(ret);
+ goto cleanup;
+ }
ret = ocfs2_xattr_block_find(inode, name_index, name, xs);
if (ret) {
@@ -795,11 +1037,11 @@ static int ocfs2_xattr_block_get(struct inode *inode,
if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
ret = ocfs2_xattr_bucket_get_name_value(inode,
- xs->bucket.xh,
+ bucket_xh(xs->bucket),
i,
&block_off,
&name_offset);
- xs->base = xs->bucket.bhs[block_off]->b_data;
+ xs->base = bucket_block(xs->bucket, block_off);
}
if (ocfs2_xattr_is_local(xs->here)) {
memcpy(buffer, (void *)xs->base +
@@ -817,21 +1059,15 @@ static int ocfs2_xattr_block_get(struct inode *inode,
}
ret = size;
cleanup:
- for (i = 0; i < OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET; i++)
- brelse(xs->bucket.bhs[i]);
- memset(&xs->bucket, 0, sizeof(xs->bucket));
+ ocfs2_xattr_bucket_free(xs->bucket);
brelse(xs->xattr_bh);
xs->xattr_bh = NULL;
return ret;
}
-/* ocfs2_xattr_get()
- *
- * Copy an extended attribute into the buffer provided.
- * Buffer is NULL to compute the size of buffer required.
- */
-static int ocfs2_xattr_get(struct inode *inode,
+int ocfs2_xattr_get_nolock(struct inode *inode,
+ struct buffer_head *di_bh,
int name_index,
const char *name,
void *buffer,
@@ -839,7 +1075,6 @@ static int ocfs2_xattr_get(struct inode *inode,
{
int ret;
struct ocfs2_dinode *di = NULL;
- struct buffer_head *di_bh = NULL;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct ocfs2_xattr_search xis = {
.not_found = -ENODATA,
@@ -854,11 +1089,6 @@ static int ocfs2_xattr_get(struct inode *inode,
if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
ret = -ENODATA;
- ret = ocfs2_inode_lock(inode, &di_bh, 0);
- if (ret < 0) {
- mlog_errno(ret);
- return ret;
- }
xis.inode_bh = xbs.inode_bh = di_bh;
di = (struct ocfs2_dinode *)di_bh->b_data;
@@ -869,6 +1099,32 @@ static int ocfs2_xattr_get(struct inode *inode,
ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
buffer_size, &xbs);
up_read(&oi->ip_xattr_sem);
+
+ return ret;
+}
+
+/* ocfs2_xattr_get()
+ *
+ * Copy an extended attribute into the buffer provided.
+ * Buffer is NULL to compute the size of buffer required.
+ */
+static int ocfs2_xattr_get(struct inode *inode,
+ int name_index,
+ const char *name,
+ void *buffer,
+ size_t buffer_size)
+{
+ int ret;
+ struct buffer_head *di_bh = NULL;
+
+ ret = ocfs2_inode_lock(inode, &di_bh, 0);
+ if (ret < 0) {
+ mlog_errno(ret);
+ return ret;
+ }
+ ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
+ name, buffer, buffer_size);
+
ocfs2_inode_unlock(inode, 0);
brelse(di_bh);
@@ -877,6 +1133,7 @@ static int ocfs2_xattr_get(struct inode *inode,
}
static int __ocfs2_xattr_set_value_outside(struct inode *inode,
+ handle_t *handle,
struct ocfs2_xattr_value_root *xv,
const void *value,
int value_len)
@@ -888,14 +1145,17 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
u64 blkno;
struct buffer_head *bh = NULL;
- handle_t *handle;
BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
+ /*
+ * In __ocfs2_xattr_set_value_outside has already been dirtied,
+ * so we don't need to worry about whether ocfs2_extend_trans
+ * will create a new transactio for us or not.
+ */
credits = clusters * bpc;
- handle = ocfs2_start_trans(OCFS2_SB(inode->i_sb), credits);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
+ ret = ocfs2_extend_trans(handle, credits);
+ if (ret) {
mlog_errno(ret);
goto out;
}
@@ -905,16 +1165,16 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
&num_clusters, &xv->xr_list);
if (ret) {
mlog_errno(ret);
- goto out_commit;
+ goto out;
}
blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
for (i = 0; i < num_clusters * bpc; i++, blkno++) {
- ret = ocfs2_read_block(inode, blkno, &bh);
+ ret = ocfs2_read_block(inode, blkno, &bh, NULL);
if (ret) {
mlog_errno(ret);
- goto out_commit;
+ goto out;
}
ret = ocfs2_journal_access(handle,
@@ -923,7 +1183,7 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret < 0) {
mlog_errno(ret);
- goto out_commit;
+ goto out;
}
cp_len = value_len > blocksize ? blocksize : value_len;
@@ -937,7 +1197,7 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
ret = ocfs2_journal_dirty(handle, bh);
if (ret < 0) {
mlog_errno(ret);
- goto out_commit;
+ goto out;
}
brelse(bh);
bh = NULL;
@@ -951,8 +1211,6 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
}
cpos += num_clusters;
}
-out_commit:
- ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
out:
brelse(bh);
@@ -960,28 +1218,21 @@ out:
}
static int ocfs2_xattr_cleanup(struct inode *inode,
+ handle_t *handle,
struct ocfs2_xattr_info *xi,
struct ocfs2_xattr_search *xs,
size_t offs)
{
- handle_t *handle = NULL;
int ret = 0;
size_t name_len = strlen(xi->name);
void *val = xs->base + offs;
size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
- handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
- OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- mlog_errno(ret);
- goto out;
- }
ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
- goto out_commit;
+ goto out;
}
/* Decrease xattr count */
le16_add_cpu(&xs->header->xh_count, -1);
@@ -992,32 +1243,23 @@ static int ocfs2_xattr_cleanup(struct inode *inode,
ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
if (ret < 0)
mlog_errno(ret);
-out_commit:
- ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
out:
return ret;
}
static int ocfs2_xattr_update_entry(struct inode *inode,
+ handle_t *handle,
struct ocfs2_xattr_info *xi,
struct ocfs2_xattr_search *xs,
size_t offs)
{
- handle_t *handle = NULL;
- int ret = 0;
+ int ret;
- handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
- OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- mlog_errno(ret);
- goto out;
- }
ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
- goto out_commit;
+ goto out;
}
xs->here->xe_name_offset = cpu_to_le16(offs);
@@ -1031,8 +1273,6 @@ static int ocfs2_xattr_update_entry(struct inode *inode,
ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
if (ret < 0)
mlog_errno(ret);
-out_commit:
- ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
out:
return ret;
}
@@ -1045,6 +1285,7 @@ out:
static int ocfs2_xattr_set_value_outside(struct inode *inode,
struct ocfs2_xattr_info *xi,
struct ocfs2_xattr_search *xs,
+ struct ocfs2_xattr_set_ctxt *ctxt,
size_t offs)
{
size_t name_len = strlen(xi->name);
@@ -1064,18 +1305,18 @@ static int ocfs2_xattr_set_value_outside(struct inode *inode,
xv->xr_list.l_next_free_rec = 0;
ret = ocfs2_xattr_value_truncate(inode, xs->xattr_bh, xv,
- xi->value_len);
+ xi->value_len, ctxt);
if (ret < 0) {
mlog_errno(ret);
return ret;
}
- ret = __ocfs2_xattr_set_value_outside(inode, xv, xi->value,
- xi->value_len);
+ ret = ocfs2_xattr_update_entry(inode, ctxt->handle, xi, xs, offs);
if (ret < 0) {
mlog_errno(ret);
return ret;
}
- ret = ocfs2_xattr_update_entry(inode, xi, xs, offs);
+ ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, xv,
+ xi->value, xi->value_len);
if (ret < 0)
mlog_errno(ret);
@@ -1195,6 +1436,7 @@ static void ocfs2_xattr_set_entry_local(struct inode *inode,
static int ocfs2_xattr_set_entry(struct inode *inode,
struct ocfs2_xattr_info *xi,
struct ocfs2_xattr_search *xs,
+ struct ocfs2_xattr_set_ctxt *ctxt,
int flag)
{
struct ocfs2_xattr_entry *last;
@@ -1202,7 +1444,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
size_t min_offs = xs->end - xs->base, name_len = strlen(xi->name);
size_t size_l = 0;
- handle_t *handle = NULL;
+ handle_t *handle = ctxt->handle;
int free, i, ret;
struct ocfs2_xattr_info xi_l = {
.name_index = xi->name_index,
@@ -1265,7 +1507,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
if (ocfs2_xattr_is_local(xs->here) && size == size_l) {
/* Replace existing local xattr with tree root */
ret = ocfs2_xattr_set_value_outside(inode, xi, xs,
- offs);
+ ctxt, offs);
if (ret < 0)
mlog_errno(ret);
goto out;
@@ -1284,25 +1526,28 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
ret = ocfs2_xattr_value_truncate(inode,
xs->xattr_bh,
xv,
- xi->value_len);
+ xi->value_len,
+ ctxt);
if (ret < 0) {
mlog_errno(ret);
goto out;
}
- ret = __ocfs2_xattr_set_value_outside(inode,
- xv,
- xi->value,
- xi->value_len);
+ ret = ocfs2_xattr_update_entry(inode,
+ handle,
+ xi,
+ xs,
+ offs);
if (ret < 0) {
mlog_errno(ret);
goto out;
}
- ret = ocfs2_xattr_update_entry(inode,
- xi,
- xs,
- offs);
+ ret = __ocfs2_xattr_set_value_outside(inode,
+ handle,
+ xv,
+ xi->value,
+ xi->value_len);
if (ret < 0)
mlog_errno(ret);
goto out;
@@ -1312,44 +1557,29 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
* just trucate old value to zero.
*/
ret = ocfs2_xattr_value_truncate(inode,
- xs->xattr_bh,
- xv,
- 0);
+ xs->xattr_bh,
+ xv,
+ 0,
+ ctxt);
if (ret < 0)
mlog_errno(ret);
}
}
}
- handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
- OCFS2_INODE_UPDATE_CREDITS);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- mlog_errno(ret);
- goto out;
- }
-
ret = ocfs2_journal_access(handle, inode, xs->inode_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
- goto out_commit;
+ goto out;
}
if (!(flag & OCFS2_INLINE_XATTR_FL)) {
- /* set extended attribute in external block. */
- ret = ocfs2_extend_trans(handle,
- OCFS2_INODE_UPDATE_CREDITS +
- OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
- if (ret) {
- mlog_errno(ret);
- goto out_commit;
- }
ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
- goto out_commit;
+ goto out;
}
}
@@ -1363,7 +1593,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
if (ret < 0) {
mlog_errno(ret);
- goto out_commit;
+ goto out;
}
}
@@ -1400,16 +1630,13 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
if (ret < 0)
mlog_errno(ret);
-out_commit:
- ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
-
if (!ret && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
/*
* Set value outside in B tree.
* This is the second step for value size > INLINE_SIZE.
*/
size_t offs = le16_to_cpu(xs->here->xe_name_offset);
- ret = ocfs2_xattr_set_value_outside(inode, xi, xs, offs);
+ ret = ocfs2_xattr_set_value_outside(inode, xi, xs, ctxt, offs);
if (ret < 0) {
int ret2;
@@ -1418,14 +1645,14 @@ out_commit:
* If set value outside failed, we have to clean
* the junk tree root we have already set in local.
*/
- ret2 = ocfs2_xattr_cleanup(inode, xi, xs, offs);
+ ret2 = ocfs2_xattr_cleanup(inode, ctxt->handle,
+ xi, xs, offs);
if (ret2 < 0)
mlog_errno(ret2);
}
}
out:
return ret;
-
}
static int ocfs2_remove_value_outside(struct inode*inode,
@@ -1433,6 +1660,18 @@ static int ocfs2_remove_value_outside(struct inode*inode,
struct ocfs2_xattr_header *header)
{
int ret = 0, i;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
+
+ ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
+
+ ctxt.handle = ocfs2_start_trans(osb,
+ ocfs2_remove_extent_credits(osb->sb));
+ if (IS_ERR(ctxt.handle)) {
+ ret = PTR_ERR(ctxt.handle);
+ mlog_errno(ret);
+ goto out;
+ }
for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
@@ -1445,14 +1684,19 @@ static int ocfs2_remove_value_outside(struct inode*inode,
le16_to_cpu(entry->xe_name_offset);
xv = (struct ocfs2_xattr_value_root *)
(val + OCFS2_XATTR_SIZE(entry->xe_name_len));
- ret = ocfs2_xattr_value_truncate(inode, bh, xv, 0);
+ ret = ocfs2_xattr_value_truncate(inode, bh, xv,
+ 0, &ctxt);
if (ret < 0) {
mlog_errno(ret);
- return ret;
+ break;
}
}
}
+ ocfs2_commit_trans(osb, ctxt.handle);
+ ocfs2_schedule_truncate_log_flush(osb, 1);
+ ocfs2_run_deallocs(osb, &ctxt.dealloc);
+out:
return ret;
}
@@ -1502,24 +1746,19 @@ static int ocfs2_xattr_free_block(struct inode *inode,
u64 blk, bg_blkno;
u16 bit;
- ret = ocfs2_read_block(inode, block, &blk_bh);
+ ret = ocfs2_read_xattr_block(inode, block, &blk_bh);
if (ret < 0) {
mlog_errno(ret);
goto out;
}
- xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
- if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
- ret = -EIO;
- goto out;
- }
-
ret = ocfs2_xattr_block_remove(inode, blk_bh);
if (ret < 0) {
mlog_errno(ret);
goto out;
}
+ xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
blk = le64_to_cpu(xb->xb_blkno);
bit = le16_to_cpu(xb->xb_suballoc_bit);
bg_blkno = ocfs2_which_suballoc_group(blk, bit);
@@ -1714,7 +1953,8 @@ static int ocfs2_xattr_ibody_find(struct inode *inode,
*/
static int ocfs2_xattr_ibody_set(struct inode *inode,
struct ocfs2_xattr_info *xi,
- struct ocfs2_xattr_search *xs)
+ struct ocfs2_xattr_search *xs,
+ struct ocfs2_xattr_set_ctxt *ctxt)
{
struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
@@ -1731,7 +1971,7 @@ static int ocfs2_xattr_ibody_set(struct inode *inode,
}
}
- ret = ocfs2_xattr_set_entry(inode, xi, xs,
+ ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL));
out:
up_write(&oi->ip_alloc_sem);
@@ -1758,19 +1998,15 @@ static int ocfs2_xattr_block_find(struct inode *inode,
if (!di->i_xattr_loc)
return ret;
- ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh);
+ ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
+ &blk_bh);
if (ret < 0) {
mlog_errno(ret);
return ret;
}
- xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
- if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
- ret = -EIO;
- goto cleanup;
- }
-
xs->xattr_bh = blk_bh;
+ xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
xs->header = &xb->xb_attrs.xb_header;
@@ -1804,13 +2040,13 @@ cleanup:
*/
static int ocfs2_xattr_block_set(struct inode *inode,
struct ocfs2_xattr_info *xi,
- struct ocfs2_xattr_search *xs)
+ struct ocfs2_xattr_search *xs,
+ struct ocfs2_xattr_set_ctxt *ctxt)
{
struct buffer_head *new_bh = NULL;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
- struct ocfs2_alloc_context *meta_ac = NULL;
- handle_t *handle = NULL;
+ handle_t *handle = ctxt->handle;
struct ocfs2_xattr_block *xblk = NULL;
u16 suballoc_bit_start;
u32 num_got;
@@ -1818,35 +2054,19 @@ static int ocfs2_xattr_block_set(struct inode *inode,
int ret;
if (!xs->xattr_bh) {
- /*
- * Alloc one external block for extended attribute
- * outside of inode.
- */
- ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
- if (ret < 0) {
- mlog_errno(ret);
- goto out;
- }
- handle = ocfs2_start_trans(osb,
- OCFS2_XATTR_BLOCK_CREATE_CREDITS);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- mlog_errno(ret);
- goto out;
- }
ret = ocfs2_journal_access(handle, inode, xs->inode_bh,
OCFS2_JOURNAL_ACCESS_CREATE);
if (ret < 0) {
mlog_errno(ret);
- goto out_commit;
+ goto end;
}
- ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1,
+ ret = ocfs2_claim_metadata(osb, handle, ctxt->meta_ac, 1,
&suballoc_bit_start, &num_got,
&first_blkno);
if (ret < 0) {
mlog_errno(ret);
- goto out_commit;
+ goto end;
}
new_bh = sb_getblk(inode->i_sb, first_blkno);
@@ -1856,7 +2076,7 @@ static int ocfs2_xattr_block_set(struct inode *inode,
OCFS2_JOURNAL_ACCESS_CREATE);
if (ret < 0) {
mlog_errno(ret);
- goto out_commit;
+ goto end;
}
/* Initialize ocfs2_xattr_block */
@@ -1874,44 +2094,512 @@ static int ocfs2_xattr_block_set(struct inode *inode,
xs->end = (void *)xblk + inode->i_sb->s_blocksize;
xs->here = xs->header->xh_entries;
-
ret = ocfs2_journal_dirty(handle, new_bh);
if (ret < 0) {
mlog_errno(ret);
- goto out_commit;
+ goto end;
}
di->i_xattr_loc = cpu_to_le64(first_blkno);
- ret = ocfs2_journal_dirty(handle, xs->inode_bh);
- if (ret < 0)
- mlog_errno(ret);
-out_commit:
- ocfs2_commit_trans(osb, handle);
-out:
- if (meta_ac)
- ocfs2_free_alloc_context(meta_ac);
- if (ret < 0)
- return ret;
+ ocfs2_journal_dirty(handle, xs->inode_bh);
} else
xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
/* Set extended attribute into external block */
- ret = ocfs2_xattr_set_entry(inode, xi, xs, OCFS2_HAS_XATTR_FL);
+ ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
+ OCFS2_HAS_XATTR_FL);
if (!ret || ret != -ENOSPC)
goto end;
- ret = ocfs2_xattr_create_index_block(inode, xs);
+ ret = ocfs2_xattr_create_index_block(inode, xs, ctxt);
if (ret)
goto end;
}
- ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs);
+ ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt);
end:
return ret;
}
+/* Check whether the new xattr can be inserted into the inode. */
+static int ocfs2_xattr_can_be_in_inode(struct inode *inode,
+ struct ocfs2_xattr_info *xi,
+ struct ocfs2_xattr_search *xs)
+{
+ u64 value_size;
+ struct ocfs2_xattr_entry *last;
+ int free, i;
+ size_t min_offs = xs->end - xs->base;
+
+ if (!xs->header)
+ return 0;
+
+ last = xs->header->xh_entries;
+
+ for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
+ size_t offs = le16_to_cpu(last->xe_name_offset);
+ if (offs < min_offs)
+ min_offs = offs;
+ last += 1;
+ }
+
+ free = min_offs - ((void *)last - xs->base) - sizeof(__u32);
+ if (free < 0)
+ return 0;
+
+ BUG_ON(!xs->not_found);
+
+ if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
+ value_size = OCFS2_XATTR_ROOT_SIZE;
+ else
+ value_size = OCFS2_XATTR_SIZE(xi->value_len);
+
+ if (free >= sizeof(struct ocfs2_xattr_entry) +
+ OCFS2_XATTR_SIZE(strlen(xi->name)) + value_size)
+ return 1;
+
+ return 0;
+}
+
+static int ocfs2_calc_xattr_set_need(struct inode *inode,
+ struct ocfs2_dinode *di,
+ struct ocfs2_xattr_info *xi,
+ struct ocfs2_xattr_search *xis,
+ struct ocfs2_xattr_search *xbs,
+ int *clusters_need,
+ int *meta_need,
+ int *credits_need)
+{
+ int ret = 0, old_in_xb = 0;
+ int clusters_add = 0, meta_add = 0, credits = 0;
+ struct buffer_head *bh = NULL;
+ struct ocfs2_xattr_block *xb = NULL;
+ struct ocfs2_xattr_entry *xe = NULL;
+ struct ocfs2_xattr_value_root *xv = NULL;
+ char *base = NULL;
+ int name_offset, name_len = 0;
+ u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb,
+ xi->value_len);
+ u64 value_size;
+
+ if (xis->not_found && xbs->not_found) {
+ credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
+
+ if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
+ clusters_add += new_clusters;
+ credits += ocfs2_calc_extend_credits(inode->i_sb,
+ &def_xv.xv.xr_list,
+ new_clusters);
+ }
+
+ goto meta_guess;
+ }
+
+ if (!xis->not_found) {
+ xe = xis->here;
+ name_offset = le16_to_cpu(xe->xe_name_offset);
+ name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
+ base = xis->base;
+ credits += OCFS2_INODE_UPDATE_CREDITS;
+ } else {
+ int i, block_off = 0;
+ xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
+ xe = xbs->here;
+ name_offset = le16_to_cpu(xe->xe_name_offset);
+ name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
+ i = xbs->here - xbs->header->xh_entries;
+ old_in_xb = 1;
+
+ if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
+ ret = ocfs2_xattr_bucket_get_name_value(inode,
+ bucket_xh(xbs->bucket),
+ i, &block_off,
+ &name_offset);
+ base = bucket_block(xbs->bucket, block_off);
+ credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
+ } else {
+ base = xbs->base;
+ credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS;
+ }
+ }
+
+ /*
+ * delete a xattr doesn't need metadata and cluster allocation.
+ * so just calculate the credits and return.
+ *
+ * The credits for removing the value tree will be extended
+ * by ocfs2_remove_extent itself.
+ */
+ if (!xi->value) {
+ if (!ocfs2_xattr_is_local(xe))
+ credits += ocfs2_remove_extent_credits(inode->i_sb);
+
+ goto out;
+ }
+
+ /* do cluster allocation guess first. */
+ value_size = le64_to_cpu(xe->xe_value_size);
+
+ if (old_in_xb) {
+ /*
+ * In xattr set, we always try to set the xe in inode first,
+ * so if it can be inserted into inode successfully, the old
+ * one will be removed from the xattr block, and this xattr
+ * will be inserted into inode as a new xattr in inode.
+ */
+ if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) {
+ clusters_add += new_clusters;
+ credits += ocfs2_remove_extent_credits(inode->i_sb) +
+ OCFS2_INODE_UPDATE_CREDITS;
+ if (!ocfs2_xattr_is_local(xe))
+ credits += ocfs2_calc_extend_credits(
+ inode->i_sb,
+ &def_xv.xv.xr_list,
+ new_clusters);
+ goto out;
+ }
+ }
+
+ if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
+ /* the new values will be stored outside. */
+ u32 old_clusters = 0;
+
+ if (!ocfs2_xattr_is_local(xe)) {
+ old_clusters = ocfs2_clusters_for_bytes(inode->i_sb,
+ value_size);
+ xv = (struct ocfs2_xattr_value_root *)
+ (base + name_offset + name_len);
+ value_size = OCFS2_XATTR_ROOT_SIZE;
+ } else
+ xv = &def_xv.xv;
+
+ if (old_clusters >= new_clusters) {
+ credits += ocfs2_remove_extent_credits(inode->i_sb);
+ goto out;
+ } else {
+ meta_add += ocfs2_extend_meta_needed(&xv->xr_list);
+ clusters_add += new_clusters - old_clusters;
+ credits += ocfs2_calc_extend_credits(inode->i_sb,
+ &xv->xr_list,
+ new_clusters -
+ old_clusters);
+ if (value_size >= OCFS2_XATTR_ROOT_SIZE)
+ goto out;
+ }
+ } else {
+ /*
+ * Now the new value will be stored inside. So if the new
+ * value is smaller than the size of value root or the old
+ * value, we don't need any allocation, otherwise we have
+ * to guess metadata allocation.
+ */
+ if ((ocfs2_xattr_is_local(xe) && value_size >= xi->value_len) ||
+ (!ocfs2_xattr_is_local(xe) &&
+ OCFS2_XATTR_ROOT_SIZE >= xi->value_len))
+ goto out;
+ }
+
+meta_guess:
+ /* calculate metadata allocation. */
+ if (di->i_xattr_loc) {
+ if (!xbs->xattr_bh) {
+ ret = ocfs2_read_xattr_block(inode,
+ le64_to_cpu(di->i_xattr_loc),
+ &bh);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ xb = (struct ocfs2_xattr_block *)bh->b_data;
+ } else
+ xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
+
+ if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
+ struct ocfs2_extent_list *el =
+ &xb->xb_attrs.xb_root.xt_list;
+ meta_add += ocfs2_extend_meta_needed(el);
+ credits += ocfs2_calc_extend_credits(inode->i_sb,
+ el, 1);
+ }
+
+ /*
+ * This cluster will be used either for new bucket or for
+ * new xattr block.
+ * If the cluster size is the same as the bucket size, one
+ * more is needed since we may need to extend the bucket
+ * also.
+ */
+ clusters_add += 1;
+ credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
+ if (OCFS2_XATTR_BUCKET_SIZE ==
+ OCFS2_SB(inode->i_sb)->s_clustersize) {
+ credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
+ clusters_add += 1;
+ }
+ } else {
+ meta_add += 1;
+ credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
+ }
+out:
+ if (clusters_need)
+ *clusters_need = clusters_add;
+ if (meta_need)
+ *meta_need = meta_add;
+ if (credits_need)
+ *credits_need = credits;
+ brelse(bh);
+ return ret;
+}
+
+static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
+ struct ocfs2_dinode *di,
+ struct ocfs2_xattr_info *xi,
+ struct ocfs2_xattr_search *xis,
+ struct ocfs2_xattr_search *xbs,
+ struct ocfs2_xattr_set_ctxt *ctxt,
+ int *credits)
+{
+ int clusters_add, meta_add, ret;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+
+ memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt));
+
+ ocfs2_init_dealloc_ctxt(&ctxt->dealloc);
+
+ ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs,
+ &clusters_add, &meta_add, credits);
+ if (ret) {
+ mlog_errno(ret);
+ return ret;
+ }
+
+ mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, "
+ "credits = %d\n", xi->name, meta_add, clusters_add, *credits);
+
+ if (meta_add) {
+ ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add,
+ &ctxt->meta_ac);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ }
+
+ if (clusters_add) {
+ ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac);
+ if (ret)
+ mlog_errno(ret);
+ }
+out:
+ if (ret) {
+ if (ctxt->meta_ac) {
+ ocfs2_free_alloc_context(ctxt->meta_ac);
+ ctxt->meta_ac = NULL;
+ }
+
+ /*
+ * We cannot have an error and a non null ctxt->data_ac.
+ */
+ }
+
+ return ret;
+}
+
+static int __ocfs2_xattr_set_handle(struct inode *inode,
+ struct ocfs2_dinode *di,
+ struct ocfs2_xattr_info *xi,
+ struct ocfs2_xattr_search *xis,
+ struct ocfs2_xattr_search *xbs,
+ struct ocfs2_xattr_set_ctxt *ctxt)
+{
+ int ret = 0, credits, old_found;
+
+ if (!xi->value) {
+ /* Remove existing extended attribute */
+ if (!xis->not_found)
+ ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
+ else if (!xbs->not_found)
+ ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
+ } else {
+ /* We always try to set extended attribute into inode first*/
+ ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
+ if (!ret && !xbs->not_found) {
+ /*
+ * If succeed and that extended attribute existing in
+ * external block, then we will remove it.
+ */
+ xi->value = NULL;
+ xi->value_len = 0;
+
+ old_found = xis->not_found;
+ xis->not_found = -ENODATA;
+ ret = ocfs2_calc_xattr_set_need(inode,
+ di,
+ xi,
+ xis,
+ xbs,
+ NULL,
+ NULL,
+ &credits);
+ xis->not_found = old_found;
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ret = ocfs2_extend_trans(ctxt->handle, credits +
+ ctxt->handle->h_buffer_credits);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
+ } else if (ret == -ENOSPC) {
+ if (di->i_xattr_loc && !xbs->xattr_bh) {
+ ret = ocfs2_xattr_block_find(inode,
+ xi->name_index,
+ xi->name, xbs);
+ if (ret)
+ goto out;
+
+ old_found = xis->not_found;
+ xis->not_found = -ENODATA;
+ ret = ocfs2_calc_xattr_set_need(inode,
+ di,
+ xi,
+ xis,
+ xbs,
+ NULL,
+ NULL,
+ &credits);
+ xis->not_found = old_found;
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ret = ocfs2_extend_trans(ctxt->handle, credits +
+ ctxt->handle->h_buffer_credits);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ }
+ /*
+ * If no space in inode, we will set extended attribute
+ * into external block.
+ */
+ ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
+ if (ret)
+ goto out;
+ if (!xis->not_found) {
+ /*
+ * If succeed and that extended attribute
+ * existing in inode, we will remove it.
+ */
+ xi->value = NULL;
+ xi->value_len = 0;
+ xbs->not_found = -ENODATA;
+ ret = ocfs2_calc_xattr_set_need(inode,
+ di,
+ xi,
+ xis,
+ xbs,
+ NULL,
+ NULL,
+ &credits);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ret = ocfs2_extend_trans(ctxt->handle, credits +
+ ctxt->handle->h_buffer_credits);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ ret = ocfs2_xattr_ibody_set(inode, xi,
+ xis, ctxt);
+ }
+ }
+ }
+
+out:
+ return ret;
+}
+
+/*
+ * This function only called duing creating inode
+ * for init security/acl xattrs of the new inode.
+ * The xattrs could be put into ibody or extent block,
+ * xattr bucket would not be use in this case.
+ * transanction credits also be reserved in here.
+ */
+int ocfs2_xattr_set_handle(handle_t *handle,
+ struct inode *inode,
+ struct buffer_head *di_bh,
+ int name_index,
+ const char *name,
+ const void *value,
+ size_t value_len,
+ int flags,
+ struct ocfs2_alloc_context *meta_ac,
+ struct ocfs2_alloc_context *data_ac)
+{
+ struct ocfs2_dinode *di;
+ int ret;
+
+ struct ocfs2_xattr_info xi = {
+ .name_index = name_index,
+ .name = name,
+ .value = value,
+ .value_len = value_len,
+ };
+
+ struct ocfs2_xattr_search xis = {
+ .not_found = -ENODATA,
+ };
+
+ struct ocfs2_xattr_search xbs = {
+ .not_found = -ENODATA,
+ };
+
+ struct ocfs2_xattr_set_ctxt ctxt = {
+ .handle = handle,
+ .meta_ac = meta_ac,
+ .data_ac = data_ac,
+ };
+
+ if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
+ return -EOPNOTSUPP;
+
+ xis.inode_bh = xbs.inode_bh = di_bh;
+ di = (struct ocfs2_dinode *)di_bh->b_data;
+
+ down_write(&OCFS2_I(inode)->ip_xattr_sem);
+
+ ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
+ if (ret)
+ goto cleanup;
+ if (xis.not_found) {
+ ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
+ if (ret)
+ goto cleanup;
+ }
+
+ ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
+
+cleanup:
+ up_write(&OCFS2_I(inode)->ip_xattr_sem);
+ brelse(xbs.xattr_bh);
+
+ return ret;
+}
+
/*
* ocfs2_xattr_set()
*
@@ -1928,8 +2616,10 @@ int ocfs2_xattr_set(struct inode *inode,
{
struct buffer_head *di_bh = NULL;
struct ocfs2_dinode *di;
- int ret;
- u16 i, blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
+ int ret, credits;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct inode *tl_inode = osb->osb_tl_inode;
+ struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
struct ocfs2_xattr_info xi = {
.name_index = name_index,
@@ -1949,10 +2639,20 @@ int ocfs2_xattr_set(struct inode *inode,
if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
return -EOPNOTSUPP;
+ /*
+ * Only xbs will be used on indexed trees. xis doesn't need a
+ * bucket.
+ */
+ xbs.bucket = ocfs2_xattr_bucket_new(inode);
+ if (!xbs.bucket) {
+ mlog_errno(-ENOMEM);
+ return -ENOMEM;
+ }
+
ret = ocfs2_inode_lock(inode, &di_bh, 1);
if (ret < 0) {
mlog_errno(ret);
- return ret;
+ goto cleanup_nolock;
}
xis.inode_bh = xbs.inode_bh = di_bh;
di = (struct ocfs2_dinode *)di_bh->b_data;
@@ -1984,55 +2684,51 @@ int ocfs2_xattr_set(struct inode *inode,
goto cleanup;
}
- if (!value) {
- /* Remove existing extended attribute */
- if (!xis.not_found)
- ret = ocfs2_xattr_ibody_set(inode, &xi, &xis);
- else if (!xbs.not_found)
- ret = ocfs2_xattr_block_set(inode, &xi, &xbs);
- } else {
- /* We always try to set extended attribute into inode first*/
- ret = ocfs2_xattr_ibody_set(inode, &xi, &xis);
- if (!ret && !xbs.not_found) {
- /*
- * If succeed and that extended attribute existing in
- * external block, then we will remove it.
- */
- xi.value = NULL;
- xi.value_len = 0;
- ret = ocfs2_xattr_block_set(inode, &xi, &xbs);
- } else if (ret == -ENOSPC) {
- if (di->i_xattr_loc && !xbs.xattr_bh) {
- ret = ocfs2_xattr_block_find(inode, name_index,
- name, &xbs);
- if (ret)
- goto cleanup;
- }
- /*
- * If no space in inode, we will set extended attribute
- * into external block.
- */
- ret = ocfs2_xattr_block_set(inode, &xi, &xbs);
- if (ret)
- goto cleanup;
- if (!xis.not_found) {
- /*
- * If succeed and that extended attribute
- * existing in inode, we will remove it.
- */
- xi.value = NULL;
- xi.value_len = 0;
- ret = ocfs2_xattr_ibody_set(inode, &xi, &xis);
- }
+
+ mutex_lock(&tl_inode->i_mutex);
+
+ if (ocfs2_truncate_log_needs_flush(osb)) {
+ ret = __ocfs2_flush_truncate_log(osb);
+ if (ret < 0) {
+ mutex_unlock(&tl_inode->i_mutex);
+ mlog_errno(ret);
+ goto cleanup;
}
}
+ mutex_unlock(&tl_inode->i_mutex);
+
+ ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
+ &xbs, &ctxt, &credits);
+ if (ret) {
+ mlog_errno(ret);
+ goto cleanup;
+ }
+
+ ctxt.handle = ocfs2_start_trans(osb, credits);
+ if (IS_ERR(ctxt.handle)) {
+ ret = PTR_ERR(ctxt.handle);
+ mlog_errno(ret);
+ goto cleanup;
+ }
+
+ ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
+
+ ocfs2_commit_trans(osb, ctxt.handle);
+
+ if (ctxt.data_ac)
+ ocfs2_free_alloc_context(ctxt.data_ac);
+ if (ctxt.meta_ac)
+ ocfs2_free_alloc_context(ctxt.meta_ac);
+ if (ocfs2_dealloc_has_cluster(&ctxt.dealloc))
+ ocfs2_schedule_truncate_log_flush(osb, 1);
+ ocfs2_run_deallocs(osb, &ctxt.dealloc);
cleanup:
up_write(&OCFS2_I(inode)->ip_xattr_sem);
ocfs2_inode_unlock(inode, 1);
+cleanup_nolock:
brelse(di_bh);
brelse(xbs.xattr_bh);
- for (i = 0; i < blk_per_bucket; i++)
- brelse(xbs.bucket.bhs[i]);
+ ocfs2_xattr_bucket_free(xbs.bucket);
return ret;
}
@@ -2107,7 +2803,7 @@ typedef int (xattr_bucket_func)(struct inode *inode,
void *para);
static int ocfs2_find_xe_in_bucket(struct inode *inode,
- struct buffer_head *header_bh,
+ struct ocfs2_xattr_bucket *bucket,
int name_index,
const char *name,
u32 name_hash,
@@ -2115,11 +2811,9 @@ static int ocfs2_find_xe_in_bucket(struct inode *inode,
int *found)
{
int i, ret = 0, cmp = 1, block_off, new_offset;
- struct ocfs2_xattr_header *xh =
- (struct ocfs2_xattr_header *)header_bh->b_data;
+ struct ocfs2_xattr_header *xh = bucket_xh(bucket);
size_t name_len = strlen(name);
struct ocfs2_xattr_entry *xe = NULL;
- struct buffer_head *name_bh = NULL;
char *xe_name;
/*
@@ -2150,19 +2844,9 @@ static int ocfs2_find_xe_in_bucket(struct inode *inode,
break;
}
- ret = ocfs2_read_block(inode, header_bh->b_blocknr + block_off,
- &name_bh);
- if (ret) {
- mlog_errno(ret);
- break;
- }
- xe_name = name_bh->b_data + new_offset;
-
- cmp = memcmp(name, xe_name, name_len);
- brelse(name_bh);
- name_bh = NULL;
- if (cmp == 0) {
+ xe_name = bucket_block(bucket, block_off) + new_offset;
+ if (!memcmp(name, xe_name, name_len)) {
*xe_index = i;
*found = 1;
ret = 0;
@@ -2192,39 +2876,42 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
struct ocfs2_xattr_search *xs)
{
int ret, found = 0;
- struct buffer_head *bh = NULL;
- struct buffer_head *lower_bh = NULL;
struct ocfs2_xattr_header *xh = NULL;
struct ocfs2_xattr_entry *xe = NULL;
u16 index = 0;
u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
int low_bucket = 0, bucket, high_bucket;
+ struct ocfs2_xattr_bucket *search;
u32 last_hash;
- u64 blkno;
+ u64 blkno, lower_blkno = 0;
+
+ search = ocfs2_xattr_bucket_new(inode);
+ if (!search) {
+ ret = -ENOMEM;
+ mlog_errno(ret);
+ goto out;
+ }
- ret = ocfs2_read_block(inode, p_blkno, &bh);
+ ret = ocfs2_read_xattr_bucket(search, p_blkno);
if (ret) {
mlog_errno(ret);
goto out;
}
- xh = (struct ocfs2_xattr_header *)bh->b_data;
+ xh = bucket_xh(search);
high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
-
while (low_bucket <= high_bucket) {
- brelse(bh);
- bh = NULL;
- bucket = (low_bucket + high_bucket) / 2;
+ ocfs2_xattr_bucket_relse(search);
+ bucket = (low_bucket + high_bucket) / 2;
blkno = p_blkno + bucket * blk_per_bucket;
-
- ret = ocfs2_read_block(inode, blkno, &bh);
+ ret = ocfs2_read_xattr_bucket(search, blkno);
if (ret) {
mlog_errno(ret);
goto out;
}
- xh = (struct ocfs2_xattr_header *)bh->b_data;
+ xh = bucket_xh(search);
xe = &xh->xh_entries[0];
if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
high_bucket = bucket - 1;
@@ -2241,10 +2928,8 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
last_hash = le32_to_cpu(xe->xe_name_hash);
- /* record lower_bh which may be the insert place. */
- brelse(lower_bh);
- lower_bh = bh;
- bh = NULL;
+ /* record lower_blkno which may be the insert place. */
+ lower_blkno = blkno;
if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
low_bucket = bucket + 1;
@@ -2252,7 +2937,7 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
}
/* the searched xattr should reside in this bucket if exists. */
- ret = ocfs2_find_xe_in_bucket(inode, lower_bh,
+ ret = ocfs2_find_xe_in_bucket(inode, search,
name_index, name, name_hash,
&index, &found);
if (ret) {
@@ -2267,46 +2952,29 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
* When the xattr's hash value is in the gap of 2 buckets, we will
* always set it to the previous bucket.
*/
- if (!lower_bh) {
- /*
- * We can't find any bucket whose first name_hash is less
- * than the find name_hash.
- */
- BUG_ON(bh->b_blocknr != p_blkno);
- lower_bh = bh;
- bh = NULL;
+ if (!lower_blkno)
+ lower_blkno = p_blkno;
+
+ /* This should be in cache - we just read it during the search */
+ ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
}
- xs->bucket.bhs[0] = lower_bh;
- xs->bucket.xh = (struct ocfs2_xattr_header *)
- xs->bucket.bhs[0]->b_data;
- lower_bh = NULL;
- xs->header = xs->bucket.xh;
- xs->base = xs->bucket.bhs[0]->b_data;
+ xs->header = bucket_xh(xs->bucket);
+ xs->base = bucket_block(xs->bucket, 0);
xs->end = xs->base + inode->i_sb->s_blocksize;
if (found) {
- /*
- * If we have found the xattr enty, read all the blocks in
- * this bucket.
- */
- ret = ocfs2_read_blocks(inode, xs->bucket.bhs[0]->b_blocknr + 1,
- blk_per_bucket - 1, &xs->bucket.bhs[1],
- 0);
- if (ret) {
- mlog_errno(ret);
- goto out;
- }
-
xs->here = &xs->header->xh_entries[index];
mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name,
- (unsigned long long)xs->bucket.bhs[0]->b_blocknr, index);
+ (unsigned long long)bucket_blkno(xs->bucket), index);
} else
ret = -ENODATA;
out:
- brelse(bh);
- brelse(lower_bh);
+ ocfs2_xattr_bucket_free(search);
return ret;
}
@@ -2357,53 +3025,50 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode,
xattr_bucket_func *func,
void *para)
{
- int i, j, ret = 0;
- int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
+ int i, ret = 0;
u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
u32 num_buckets = clusters * bpc;
- struct ocfs2_xattr_bucket bucket;
+ struct ocfs2_xattr_bucket *bucket;
- memset(&bucket, 0, sizeof(bucket));
+ bucket = ocfs2_xattr_bucket_new(inode);
+ if (!bucket) {
+ mlog_errno(-ENOMEM);
+ return -ENOMEM;
+ }
mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n",
clusters, (unsigned long long)blkno);
- for (i = 0; i < num_buckets; i++, blkno += blk_per_bucket) {
- ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket,
- bucket.bhs, 0);
+ for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) {
+ ret = ocfs2_read_xattr_bucket(bucket, blkno);
if (ret) {
mlog_errno(ret);
- goto out;
+ break;
}
- bucket.xh = (struct ocfs2_xattr_header *)bucket.bhs[0]->b_data;
/*
* The real bucket num in this series of blocks is stored
* in the 1st bucket.
*/
if (i == 0)
- num_buckets = le16_to_cpu(bucket.xh->xh_num_buckets);
+ num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets);
mlog(0, "iterating xattr bucket %llu, first hash %u\n",
(unsigned long long)blkno,
- le32_to_cpu(bucket.xh->xh_entries[0].xe_name_hash));
+ le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));
if (func) {
- ret = func(inode, &bucket, para);
- if (ret) {
- mlog_errno(ret);
- break;
- }
+ ret = func(inode, bucket, para);
+ if (ret)
+ mlog_errno(ret);
+ /* Fall through to bucket_relse() */
}
- for (j = 0; j < blk_per_bucket; j++)
- brelse(bucket.bhs[j]);
- memset(&bucket, 0, sizeof(bucket));
+ ocfs2_xattr_bucket_relse(bucket);
+ if (ret)
+ break;
}
-out:
- for (j = 0; j < blk_per_bucket; j++)
- brelse(bucket.bhs[j]);
-
+ ocfs2_xattr_bucket_free(bucket);
return ret;
}
@@ -2441,21 +3106,21 @@ static int ocfs2_list_xattr_bucket(struct inode *inode,
int i, block_off, new_offset;
const char *prefix, *name;
- for (i = 0 ; i < le16_to_cpu(bucket->xh->xh_count); i++) {
- struct ocfs2_xattr_entry *entry = &bucket->xh->xh_entries[i];
+ for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) {
+ struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i];
type = ocfs2_xattr_get_type(entry);
prefix = ocfs2_xattr_prefix(type);
if (prefix) {
ret = ocfs2_xattr_bucket_get_name_value(inode,
- bucket->xh,
+ bucket_xh(bucket),
i,
&block_off,
&new_offset);
if (ret)
break;
- name = (const char *)bucket->bhs[block_off]->b_data +
+ name = (const char *)bucket_block(bucket, block_off) +
new_offset;
ret = ocfs2_xattr_list_entry(xl->buffer,
xl->buffer_size,
@@ -2540,32 +3205,34 @@ static void swap_xe(void *a, void *b, int size)
/*
* When the ocfs2_xattr_block is filled up, new bucket will be created
* and all the xattr entries will be moved to the new bucket.
+ * The header goes at the start of the bucket, and the names+values are
+ * filled from the end. This is why *target starts as the last buffer.
* Note: we need to sort the entries since they are not saved in order
* in the ocfs2_xattr_block.
*/
static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
struct buffer_head *xb_bh,
- struct buffer_head *xh_bh,
- struct buffer_head *data_bh)
+ struct ocfs2_xattr_bucket *bucket)
{
int i, blocksize = inode->i_sb->s_blocksize;
+ int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
u16 offset, size, off_change;
struct ocfs2_xattr_entry *xe;
struct ocfs2_xattr_block *xb =
(struct ocfs2_xattr_block *)xb_bh->b_data;
struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
- struct ocfs2_xattr_header *xh =
- (struct ocfs2_xattr_header *)xh_bh->b_data;
+ struct ocfs2_xattr_header *xh = bucket_xh(bucket);
u16 count = le16_to_cpu(xb_xh->xh_count);
- char *target = xh_bh->b_data, *src = xb_bh->b_data;
+ char *src = xb_bh->b_data;
+ char *target = bucket_block(bucket, blks - 1);
mlog(0, "cp xattr from block %llu to bucket %llu\n",
(unsigned long long)xb_bh->b_blocknr,
- (unsigned long long)xh_bh->b_blocknr);
+ (unsigned long long)bucket_blkno(bucket));
+
+ for (i = 0; i < blks; i++)
+ memset(bucket_block(bucket, i), 0, blocksize);
- memset(xh_bh->b_data, 0, blocksize);
- if (data_bh)
- memset(data_bh->b_data, 0, blocksize);
/*
* Since the xe_name_offset is based on ocfs2_xattr_header,
* there is a offset change corresponding to the change of
@@ -2577,8 +3244,6 @@ static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
size = blocksize - offset;
/* copy all the names and values. */
- if (data_bh)
- target = data_bh->b_data;
memcpy(target + offset, src + offset, size);
/* Init new header now. */
@@ -2588,7 +3253,7 @@ static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
/* copy all the entries. */
- target = xh_bh->b_data;
+ target = bucket_block(bucket, 0);
offset = offsetof(struct ocfs2_xattr_header, xh_entries);
size = count * sizeof(struct ocfs2_xattr_entry);
memcpy(target + offset, (char *)xb_xh + offset, size);
@@ -2614,73 +3279,53 @@ static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
* While if the entry is in index b-tree, "bucket" indicates the
* real place of the xattr.
*/
-static int ocfs2_xattr_update_xattr_search(struct inode *inode,
- struct ocfs2_xattr_search *xs,
- struct buffer_head *old_bh,
- struct buffer_head *new_bh)
+static void ocfs2_xattr_update_xattr_search(struct inode *inode,
+ struct ocfs2_xattr_search *xs,
+ struct buffer_head *old_bh)
{
- int ret = 0;
char *buf = old_bh->b_data;
struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
- int i, blocksize = inode->i_sb->s_blocksize;
- u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
-
- xs->bucket.bhs[0] = new_bh;
- get_bh(new_bh);
- xs->bucket.xh = (struct ocfs2_xattr_header *)xs->bucket.bhs[0]->b_data;
- xs->header = xs->bucket.xh;
+ int i;
- xs->base = new_bh->b_data;
+ xs->header = bucket_xh(xs->bucket);
+ xs->base = bucket_block(xs->bucket, 0);
xs->end = xs->base + inode->i_sb->s_blocksize;
- if (!xs->not_found) {
- if (OCFS2_XATTR_BUCKET_SIZE != blocksize) {
- ret = ocfs2_read_blocks(inode,
- xs->bucket.bhs[0]->b_blocknr + 1,
- blk_per_bucket - 1, &xs->bucket.bhs[1],
- 0);
- if (ret) {
- mlog_errno(ret);
- return ret;
- }
+ if (xs->not_found)
+ return;
- i = xs->here - old_xh->xh_entries;
- xs->here = &xs->header->xh_entries[i];
- }
+ /*
+ * If a bucket is more than one block, the name+value moved when
+ * we went to a bucket.
+ */
+ if (xs->bucket->bu_blocks > 1) {
+ i = xs->here - old_xh->xh_entries;
+ xs->here = &xs->header->xh_entries[i];
}
-
- return ret;
}
static int ocfs2_xattr_create_index_block(struct inode *inode,
- struct ocfs2_xattr_search *xs)
+ struct ocfs2_xattr_search *xs,
+ struct ocfs2_xattr_set_ctxt *ctxt)
{
- int ret, credits = OCFS2_SUBALLOC_ALLOC;
+ int ret;
u32 bit_off, len;
u64 blkno;
- handle_t *handle;
+ handle_t *handle = ctxt->handle;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_inode_info *oi = OCFS2_I(inode);
- struct ocfs2_alloc_context *data_ac;
- struct buffer_head *xh_bh = NULL, *data_bh = NULL;
struct buffer_head *xb_bh = xs->xattr_bh;
struct ocfs2_xattr_block *xb =
(struct ocfs2_xattr_block *)xb_bh->b_data;
struct ocfs2_xattr_tree_root *xr;
u16 xb_flags = le16_to_cpu(xb->xb_flags);
- u16 bpb = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
mlog(0, "create xattr index block for %llu\n",
(unsigned long long)xb_bh->b_blocknr);
BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
-
- ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
- if (ret) {
- mlog_errno(ret);
- goto out;
- }
+ BUG_ON(!xs->bucket);
/*
* XXX:
@@ -2689,29 +3334,18 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
*/
down_write(&oi->ip_alloc_sem);
- /*
- * 3 more credits, one for xattr block update, one for the 1st block
- * of the new xattr bucket and one for the value/data.
- */
- credits += 3;
- handle = ocfs2_start_trans(osb, credits);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- mlog_errno(ret);
- goto out_sem;
- }
-
ret = ocfs2_journal_access(handle, inode, xb_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
- goto out_commit;
+ goto out;
}
- ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, &len);
+ ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac,
+ 1, 1, &bit_off, &len);
if (ret) {
mlog_errno(ret);
- goto out_commit;
+ goto out;
}
/*
@@ -2724,51 +3358,23 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
mlog(0, "allocate 1 cluster from %llu to xattr block\n",
(unsigned long long)blkno);
- xh_bh = sb_getblk(inode->i_sb, blkno);
- if (!xh_bh) {
- ret = -EIO;
+ ret = ocfs2_init_xattr_bucket(xs->bucket, blkno);
+ if (ret) {
mlog_errno(ret);
- goto out_commit;
+ goto out;
}
- ocfs2_set_new_buffer_uptodate(inode, xh_bh);
-
- ret = ocfs2_journal_access(handle, inode, xh_bh,
- OCFS2_JOURNAL_ACCESS_CREATE);
+ ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
+ OCFS2_JOURNAL_ACCESS_CREATE);
if (ret) {
mlog_errno(ret);
- goto out_commit;
- }
-
- if (bpb > 1) {
- data_bh = sb_getblk(inode->i_sb, blkno + bpb - 1);
- if (!data_bh) {
- ret = -EIO;
- mlog_errno(ret);
- goto out_commit;
- }
-
- ocfs2_set_new_buffer_uptodate(inode, data_bh);
-
- ret = ocfs2_journal_access(handle, inode, data_bh,
- OCFS2_JOURNAL_ACCESS_CREATE);
- if (ret) {
- mlog_errno(ret);
- goto out_commit;
- }
+ goto out;
}
- ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xh_bh, data_bh);
+ ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket);
+ ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
- ocfs2_journal_dirty(handle, xh_bh);
- if (data_bh)
- ocfs2_journal_dirty(handle, data_bh);
-
- ret = ocfs2_xattr_update_xattr_search(inode, xs, xb_bh, xh_bh);
- if (ret) {
- mlog_errno(ret);
- goto out_commit;
- }
+ ocfs2_xattr_update_xattr_search(inode, xs, xb_bh);
/* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
@@ -2787,24 +3393,10 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
- ret = ocfs2_journal_dirty(handle, xb_bh);
- if (ret) {
- mlog_errno(ret);
- goto out_commit;
- }
-
-out_commit:
- ocfs2_commit_trans(osb, handle);
-
-out_sem:
- up_write(&oi->ip_alloc_sem);
+ ocfs2_journal_dirty(handle, xb_bh);
out:
- if (data_ac)
- ocfs2_free_alloc_context(data_ac);
-
- brelse(xh_bh);
- brelse(data_bh);
+ up_write(&oi->ip_alloc_sem);
return ret;
}
@@ -2829,29 +3421,18 @@ static int cmp_xe_offset(const void *a, const void *b)
* so that we can spare some space for insertion.
*/
static int ocfs2_defrag_xattr_bucket(struct inode *inode,
+ handle_t *handle,
struct ocfs2_xattr_bucket *bucket)
{
int ret, i;
size_t end, offset, len, value_len;
struct ocfs2_xattr_header *xh;
char *entries, *buf, *bucket_buf = NULL;
- u64 blkno = bucket->bhs[0]->b_blocknr;
- u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
+ u64 blkno = bucket_blkno(bucket);
u16 xh_free_start;
size_t blocksize = inode->i_sb->s_blocksize;
- handle_t *handle;
- struct buffer_head **bhs;
struct ocfs2_xattr_entry *xe;
- bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
- GFP_NOFS);
- if (!bhs)
- return -ENOMEM;
-
- ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket, bhs, 0);
- if (ret)
- goto out;
-
/*
* In order to make the operation more efficient and generic,
* we copy all the blocks into a contiguous memory and do the
@@ -2865,26 +3446,16 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
}
buf = bucket_buf;
- for (i = 0; i < blk_per_bucket; i++, buf += blocksize)
- memcpy(buf, bhs[i]->b_data, blocksize);
+ for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
+ memcpy(buf, bucket_block(bucket, i), blocksize);
- handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), blk_per_bucket);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- handle = NULL;
+ ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret < 0) {
mlog_errno(ret);
goto out;
}
- for (i = 0; i < blk_per_bucket; i++) {
- ret = ocfs2_journal_access(handle, inode, bhs[i],
- OCFS2_JOURNAL_ACCESS_WRITE);
- if (ret < 0) {
- mlog_errno(ret);
- goto commit;
- }
- }
-
xh = (struct ocfs2_xattr_header *)bucket_buf;
entries = (char *)xh->xh_entries;
xh_free_start = le16_to_cpu(xh->xh_free_start);
@@ -2940,7 +3511,7 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
"bucket %llu\n", (unsigned long long)blkno);
if (xh_free_start == end)
- goto commit;
+ goto out;
memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
xh->xh_free_start = cpu_to_le16(end);
@@ -2951,21 +3522,11 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
cmp_xe, swap_xe);
buf = bucket_buf;
- for (i = 0; i < blk_per_bucket; i++, buf += blocksize) {
- memcpy(bhs[i]->b_data, buf, blocksize);
- ocfs2_journal_dirty(handle, bhs[i]);
- }
+ for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
+ memcpy(bucket_block(bucket, i), buf, blocksize);
+ ocfs2_xattr_bucket_journal_dirty(handle, bucket);
-commit:
- ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
out:
-
- if (bhs) {
- for (i = 0; i < blk_per_bucket; i++)
- brelse(bhs[i]);
- }
- kfree(bhs);
-
kfree(bucket_buf);
return ret;
}
@@ -3015,7 +3576,7 @@ static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
* 1 more for the update of the 1st bucket of the previous
* extent record.
*/
- credits = bpc / 2 + 1;
+ credits = bpc / 2 + 1 + handle->h_buffer_credits;
ret = ocfs2_extend_trans(handle, credits);
if (ret) {
mlog_errno(ret);
@@ -3048,7 +3609,7 @@ static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
goto out;
}
- ret = ocfs2_read_block(inode, prev_blkno, &old_bh);
+ ret = ocfs2_read_block(inode, prev_blkno, &old_bh, NULL);
if (ret < 0) {
mlog_errno(ret);
brelse(new_bh);
@@ -3092,31 +3653,6 @@ out:
return ret;
}
-static int ocfs2_read_xattr_bucket(struct inode *inode,
- u64 blkno,
- struct buffer_head **bhs,
- int new)
-{
- int ret = 0;
- u16 i, blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
-
- if (!new)
- return ocfs2_read_blocks(inode, blkno,
- blk_per_bucket, bhs, 0);
-
- for (i = 0; i < blk_per_bucket; i++) {
- bhs[i] = sb_getblk(inode->i_sb, blkno + i);
- if (bhs[i] == NULL) {
- ret = -EIO;
- mlog_errno(ret);
- break;
- }
- ocfs2_set_new_buffer_uptodate(inode, bhs[i]);
- }
-
- return ret;
-}
-
/*
* Find the suitable pos when we divide a bucket into 2.
* We have to make sure the xattrs with the same hash value exist
@@ -3178,8 +3714,7 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
{
int ret, i;
int count, start, len, name_value_len = 0, xe_len, name_offset = 0;
- u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
- struct buffer_head **s_bhs, **t_bhs = NULL;
+ struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
struct ocfs2_xattr_header *xh;
struct ocfs2_xattr_entry *xe;
int blocksize = inode->i_sb->s_blocksize;
@@ -3187,47 +3722,47 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
mlog(0, "move some of xattrs from bucket %llu to %llu\n",
(unsigned long long)blk, (unsigned long long)new_blk);
- s_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS);
- if (!s_bhs)
- return -ENOMEM;
-
- ret = ocfs2_read_xattr_bucket(inode, blk, s_bhs, 0);
- if (ret) {
+ s_bucket = ocfs2_xattr_bucket_new(inode);
+ t_bucket = ocfs2_xattr_bucket_new(inode);
+ if (!s_bucket || !t_bucket) {
+ ret = -ENOMEM;
mlog_errno(ret);
goto out;
}
- ret = ocfs2_journal_access(handle, inode, s_bhs[0],
- OCFS2_JOURNAL_ACCESS_WRITE);
+ ret = ocfs2_read_xattr_bucket(s_bucket, blk);
if (ret) {
mlog_errno(ret);
goto out;
}
- t_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS);
- if (!t_bhs) {
- ret = -ENOMEM;
+ ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret) {
+ mlog_errno(ret);
goto out;
}
- ret = ocfs2_read_xattr_bucket(inode, new_blk, t_bhs, new_bucket_head);
+ /*
+ * Even if !new_bucket_head, we're overwriting t_bucket. Thus,
+ * there's no need to read it.
+ */
+ ret = ocfs2_init_xattr_bucket(t_bucket, new_blk);
if (ret) {
mlog_errno(ret);
goto out;
}
- for (i = 0; i < blk_per_bucket; i++) {
- ret = ocfs2_journal_access(handle, inode, t_bhs[i],
- new_bucket_head ?
- OCFS2_JOURNAL_ACCESS_CREATE :
- OCFS2_JOURNAL_ACCESS_WRITE);
- if (ret) {
- mlog_errno(ret);
- goto out;
- }
+ ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
+ new_bucket_head ?
+ OCFS2_JOURNAL_ACCESS_CREATE :
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
}
- xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data;
+ xh = bucket_xh(s_bucket);
count = le16_to_cpu(xh->xh_count);
start = ocfs2_xattr_find_divide_pos(xh);
@@ -3239,10 +3774,10 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
* The hash value is set as one larger than
* that of the last entry in the previous bucket.
*/
- for (i = 0; i < blk_per_bucket; i++)
- memset(t_bhs[i]->b_data, 0, blocksize);
+ for (i = 0; i < t_bucket->bu_blocks; i++)
+ memset(bucket_block(t_bucket, i), 0, blocksize);
- xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data;
+ xh = bucket_xh(t_bucket);
xh->xh_free_start = cpu_to_le16(blocksize);
xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
@@ -3251,11 +3786,10 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
}
/* copy the whole bucket to the new first. */
- for (i = 0; i < blk_per_bucket; i++)
- memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize);
+ ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
/* update the new bucket. */
- xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data;
+ xh = bucket_xh(t_bucket);
/*
* Calculate the total name/value len and xh_free_start for
@@ -3319,11 +3853,7 @@ set_num_buckets:
else
xh->xh_num_buckets = 0;
- for (i = 0; i < blk_per_bucket; i++) {
- ocfs2_journal_dirty(handle, t_bhs[i]);
- if (ret)
- mlog_errno(ret);
- }
+ ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
/* store the first_hash of the new bucket. */
if (first_hash)
@@ -3337,29 +3867,18 @@ set_num_buckets:
if (start == count)
goto out;
- xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data;
+ xh = bucket_xh(s_bucket);
memset(&xh->xh_entries[start], 0,
sizeof(struct ocfs2_xattr_entry) * (count - start));
xh->xh_count = cpu_to_le16(start);
xh->xh_free_start = cpu_to_le16(name_offset);
xh->xh_name_value_len = cpu_to_le16(name_value_len);
- ocfs2_journal_dirty(handle, s_bhs[0]);
- if (ret)
- mlog_errno(ret);
+ ocfs2_xattr_bucket_journal_dirty(handle, s_bucket);
out:
- if (s_bhs) {
- for (i = 0; i < blk_per_bucket; i++)
- brelse(s_bhs[i]);
- }
- kfree(s_bhs);
-
- if (t_bhs) {
- for (i = 0; i < blk_per_bucket; i++)
- brelse(t_bhs[i]);
- }
- kfree(t_bhs);
+ ocfs2_xattr_bucket_free(s_bucket);
+ ocfs2_xattr_bucket_free(t_bucket);
return ret;
}
@@ -3376,10 +3895,8 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode,
u64 t_blkno,
int t_is_new)
{
- int ret, i;
- int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
- int blocksize = inode->i_sb->s_blocksize;
- struct buffer_head **s_bhs, **t_bhs = NULL;
+ int ret;
+ struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
BUG_ON(s_blkno == t_blkno);
@@ -3387,52 +3904,39 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode,
(unsigned long long)s_blkno, (unsigned long long)t_blkno,
t_is_new);
- s_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
- GFP_NOFS);
- if (!s_bhs)
- return -ENOMEM;
-
- ret = ocfs2_read_xattr_bucket(inode, s_blkno, s_bhs, 0);
- if (ret)
- goto out;
-
- t_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
- GFP_NOFS);
- if (!t_bhs) {
+ s_bucket = ocfs2_xattr_bucket_new(inode);
+ t_bucket = ocfs2_xattr_bucket_new(inode);
+ if (!s_bucket || !t_bucket) {
ret = -ENOMEM;
+ mlog_errno(ret);
goto out;
}
+
+ ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno);
+ if (ret)
+ goto out;
- ret = ocfs2_read_xattr_bucket(inode, t_blkno, t_bhs, t_is_new);
+ /*
+ * Even if !t_is_new, we're overwriting t_bucket. Thus,
+ * there's no need to read it.
+ */
+ ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno);
if (ret)
goto out;
- for (i = 0; i < blk_per_bucket; i++) {
- ret = ocfs2_journal_access(handle, inode, t_bhs[i],
- t_is_new ?
- OCFS2_JOURNAL_ACCESS_CREATE :
- OCFS2_JOURNAL_ACCESS_WRITE);
- if (ret)
- goto out;
- }
+ ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
+ t_is_new ?
+ OCFS2_JOURNAL_ACCESS_CREATE :
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret)
+ goto out;
- for (i = 0; i < blk_per_bucket; i++) {
- memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize);
- ocfs2_journal_dirty(handle, t_bhs[i]);
- }
+ ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
+ ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
out:
- if (s_bhs) {
- for (i = 0; i < blk_per_bucket; i++)
- brelse(s_bhs[i]);
- }
- kfree(s_bhs);
-
- if (t_bhs) {
- for (i = 0; i < blk_per_bucket; i++)
- brelse(t_bhs[i]);
- }
- kfree(t_bhs);
+ ocfs2_xattr_bucket_free(t_bucket);
+ ocfs2_xattr_bucket_free(s_bucket);
return ret;
}
@@ -3464,7 +3968,7 @@ static int ocfs2_cp_xattr_cluster(struct inode *inode,
* We need to update the new cluster and 1 more for the update of
* the 1st bucket of the previous extent rec.
*/
- credits = bpc + 1;
+ credits = bpc + 1 + handle->h_buffer_credits;
ret = ocfs2_extend_trans(handle, credits);
if (ret) {
mlog_errno(ret);
@@ -3497,7 +4001,7 @@ static int ocfs2_cp_xattr_cluster(struct inode *inode,
ocfs2_journal_dirty(handle, first_bh);
/* update the new bucket header. */
- ret = ocfs2_read_block(inode, to_blk_start, &bh);
+ ret = ocfs2_read_block(inode, to_blk_start, &bh, NULL);
if (ret < 0) {
mlog_errno(ret);
goto out;
@@ -3534,7 +4038,7 @@ static int ocfs2_divide_xattr_cluster(struct inode *inode,
u32 *first_hash)
{
u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
- int ret, credits = 2 * blk_per_bucket;
+ int ret, credits = 2 * blk_per_bucket + handle->h_buffer_credits;
BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
@@ -3644,16 +4148,15 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
u32 *num_clusters,
u32 prev_cpos,
u64 prev_blkno,
- int *extend)
+ int *extend,
+ struct ocfs2_xattr_set_ctxt *ctxt)
{
- int ret, credits;
+ int ret;
u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
u32 prev_clusters = *num_clusters;
u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
u64 block;
- handle_t *handle = NULL;
- struct ocfs2_alloc_context *data_ac = NULL;
- struct ocfs2_alloc_context *meta_ac = NULL;
+ handle_t *handle = ctxt->handle;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_extent_tree et;
@@ -3664,23 +4167,6 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);
- ret = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0,
- &data_ac, &meta_ac);
- if (ret) {
- mlog_errno(ret);
- goto leave;
- }
-
- credits = ocfs2_calc_extend_credits(osb->sb, et.et_root_el,
- clusters_to_add);
- handle = ocfs2_start_trans(osb, credits);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- handle = NULL;
- mlog_errno(ret);
- goto leave;
- }
-
ret = ocfs2_journal_access(handle, inode, root_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret < 0) {
@@ -3688,7 +4174,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
goto leave;
}
- ret = __ocfs2_claim_clusters(osb, handle, data_ac, 1,
+ ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac, 1,
clusters_to_add, &bit_off, &num_bits);
if (ret < 0) {
if (ret != -ENOSPC)
@@ -3734,41 +4220,20 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
}
}
- if (handle->h_buffer_credits < credits) {
- /*
- * The journal has been restarted before, and don't
- * have enough space for the insertion, so extend it
- * here.
- */
- ret = ocfs2_extend_trans(handle, credits);
- if (ret) {
- mlog_errno(ret);
- goto leave;
- }
- }
mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
num_bits, (unsigned long long)block, v_start);
ret = ocfs2_insert_extent(osb, handle, inode, &et, v_start, block,
- num_bits, 0, meta_ac);
+ num_bits, 0, ctxt->meta_ac);
if (ret < 0) {
mlog_errno(ret);
goto leave;
}
ret = ocfs2_journal_dirty(handle, root_bh);
- if (ret < 0) {
+ if (ret < 0)
mlog_errno(ret);
- goto leave;
- }
leave:
- if (handle)
- ocfs2_commit_trans(osb, handle);
- if (data_ac)
- ocfs2_free_alloc_context(data_ac);
- if (meta_ac)
- ocfs2_free_alloc_context(meta_ac);
-
return ret;
}
@@ -3777,6 +4242,7 @@ leave:
* We meet with start_bh. Only move half of the xattrs to the bucket after it.
*/
static int ocfs2_extend_xattr_bucket(struct inode *inode,
+ handle_t *handle,
struct buffer_head *first_bh,
struct buffer_head *start_bh,
u32 num_clusters)
@@ -3786,7 +4252,6 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode,
u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
u64 start_blk = start_bh->b_blocknr, end_blk;
u32 num_buckets = num_clusters * ocfs2_xattr_buckets_per_cluster(osb);
- handle_t *handle;
struct ocfs2_xattr_header *first_xh =
(struct ocfs2_xattr_header *)first_bh->b_data;
u16 bucket = le16_to_cpu(first_xh->xh_num_buckets);
@@ -3801,13 +4266,12 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode,
/*
* We will touch all the buckets after the start_bh(include it).
- * Add one more bucket and modify the first_bh.
+ * Then we add one more bucket.
*/
- credits = end_blk - start_blk + 2 * blk_per_bucket + 1;
- handle = ocfs2_start_trans(osb, credits);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- handle = NULL;
+ credits = end_blk - start_blk + 3 * blk_per_bucket + 1 +
+ handle->h_buffer_credits;
+ ret = ocfs2_extend_trans(handle, credits);
+ if (ret) {
mlog_errno(ret);
goto out;
}
@@ -3816,14 +4280,14 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
- goto commit;
+ goto out;
}
while (end_blk != start_blk) {
ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
end_blk + blk_per_bucket, 0);
if (ret)
- goto commit;
+ goto out;
end_blk -= blk_per_bucket;
}
@@ -3834,8 +4298,6 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode,
le16_add_cpu(&first_xh->xh_num_buckets, 1);
ocfs2_journal_dirty(handle, first_bh);
-commit:
- ocfs2_commit_trans(osb, handle);
out:
return ret;
}
@@ -3851,7 +4313,8 @@ out:
*/
static int ocfs2_add_new_xattr_bucket(struct inode *inode,
struct buffer_head *xb_bh,
- struct buffer_head *header_bh)
+ struct buffer_head *header_bh,
+ struct ocfs2_xattr_set_ctxt *ctxt)
{
struct ocfs2_xattr_header *first_xh = NULL;
struct buffer_head *first_bh = NULL;
@@ -3885,7 +4348,7 @@ static int ocfs2_add_new_xattr_bucket(struct inode *inode,
goto out;
}
- ret = ocfs2_read_block(inode, p_blkno, &first_bh);
+ ret = ocfs2_read_block(inode, p_blkno, &first_bh, NULL);
if (ret) {
mlog_errno(ret);
goto out;
@@ -3902,7 +4365,8 @@ static int ocfs2_add_new_xattr_bucket(struct inode *inode,
&num_clusters,
e_cpos,
p_blkno,
- &extend);
+ &extend,
+ ctxt);
if (ret) {
mlog_errno(ret);
goto out;
@@ -3911,6 +4375,7 @@ static int ocfs2_add_new_xattr_bucket(struct inode *inode,
if (extend)
ret = ocfs2_extend_xattr_bucket(inode,
+ ctxt->handle,
first_bh,
header_bh,
num_clusters);
@@ -3929,7 +4394,7 @@ static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode,
int block_off = offs >> inode->i_sb->s_blocksize_bits;
offs = offs % inode->i_sb->s_blocksize;
- return bucket->bhs[block_off]->b_data + offs;
+ return bucket_block(bucket, block_off) + offs;
}
/*
@@ -3984,7 +4449,7 @@ static void ocfs2_xattr_set_entry_normal(struct inode *inode,
xe->xe_value_size = 0;
val = ocfs2_xattr_bucket_get_val(inode,
- &xs->bucket, offs);
+ xs->bucket, offs);
memset(val + OCFS2_XATTR_SIZE(name_len), 0,
size - OCFS2_XATTR_SIZE(name_len));
if (OCFS2_XATTR_SIZE(xi->value_len) > 0)
@@ -4062,8 +4527,7 @@ set_new_name_value:
xh->xh_free_start = cpu_to_le16(offs);
}
- val = ocfs2_xattr_bucket_get_val(inode,
- &xs->bucket, offs - size);
+ val = ocfs2_xattr_bucket_get_val(inode, xs->bucket, offs - size);
xe->xe_name_offset = cpu_to_le16(offs - size);
memset(val, 0, size);
@@ -4079,115 +4543,62 @@ set_new_name_value:
return;
}
-static int ocfs2_xattr_bucket_handle_journal(struct inode *inode,
- handle_t *handle,
- struct ocfs2_xattr_search *xs,
- struct buffer_head **bhs,
- u16 bh_num)
-{
- int ret = 0, off, block_off;
- struct ocfs2_xattr_entry *xe = xs->here;
-
- /*
- * First calculate all the blocks we should journal_access
- * and journal_dirty. The first block should always be touched.
- */
- ret = ocfs2_journal_dirty(handle, bhs[0]);
- if (ret)
- mlog_errno(ret);
-
- /* calc the data. */
- off = le16_to_cpu(xe->xe_name_offset);
- block_off = off >> inode->i_sb->s_blocksize_bits;
- ret = ocfs2_journal_dirty(handle, bhs[block_off]);
- if (ret)
- mlog_errno(ret);
-
- return ret;
-}
-
/*
* Set the xattr entry in the specified bucket.
* The bucket is indicated by xs->bucket and it should have the enough
* space for the xattr insertion.
*/
static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
+ handle_t *handle,
struct ocfs2_xattr_info *xi,
struct ocfs2_xattr_search *xs,
u32 name_hash,
int local)
{
- int i, ret;
- handle_t *handle = NULL;
- u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ int ret;
+ u64 blkno;
mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n",
(unsigned long)xi->value_len, xi->name_index,
- (unsigned long long)xs->bucket.bhs[0]->b_blocknr);
+ (unsigned long long)bucket_blkno(xs->bucket));
- if (!xs->bucket.bhs[1]) {
- ret = ocfs2_read_blocks(inode,
- xs->bucket.bhs[0]->b_blocknr + 1,
- blk_per_bucket - 1, &xs->bucket.bhs[1],
- 0);
+ if (!xs->bucket->bu_bhs[1]) {
+ blkno = bucket_blkno(xs->bucket);
+ ocfs2_xattr_bucket_relse(xs->bucket);
+ ret = ocfs2_read_xattr_bucket(xs->bucket, blkno);
if (ret) {
mlog_errno(ret);
goto out;
}
}
- handle = ocfs2_start_trans(osb, blk_per_bucket);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- handle = NULL;
+ ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret < 0) {
mlog_errno(ret);
goto out;
}
- for (i = 0; i < blk_per_bucket; i++) {
- ret = ocfs2_journal_access(handle, inode, xs->bucket.bhs[i],
- OCFS2_JOURNAL_ACCESS_WRITE);
- if (ret < 0) {
- mlog_errno(ret);
- goto out;
- }
- }
-
ocfs2_xattr_set_entry_normal(inode, xi, xs, name_hash, local);
+ ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
- /*Only dirty the blocks we have touched in set xattr. */
- ret = ocfs2_xattr_bucket_handle_journal(inode, handle, xs,
- xs->bucket.bhs, blk_per_bucket);
- if (ret)
- mlog_errno(ret);
out:
- ocfs2_commit_trans(osb, handle);
-
return ret;
}
static int ocfs2_xattr_value_update_size(struct inode *inode,
+ handle_t *handle,
struct buffer_head *xe_bh,
struct ocfs2_xattr_entry *xe,
u64 new_size)
{
int ret;
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
- handle_t *handle = NULL;
-
- handle = ocfs2_start_trans(osb, 1);
- if (IS_ERR(handle)) {
- ret = -ENOMEM;
- mlog_errno(ret);
- goto out;
- }
ret = ocfs2_journal_access(handle, inode, xe_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret < 0) {
mlog_errno(ret);
- goto out_commit;
+ goto out;
}
xe->xe_value_size = cpu_to_le64(new_size);
@@ -4196,8 +4607,6 @@ static int ocfs2_xattr_value_update_size(struct inode *inode,
if (ret < 0)
mlog_errno(ret);
-out_commit:
- ocfs2_commit_trans(osb, handle);
out:
return ret;
}
@@ -4212,7 +4621,8 @@ out:
static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
struct buffer_head *header_bh,
int xe_off,
- int len)
+ int len,
+ struct ocfs2_xattr_set_ctxt *ctxt)
{
int ret, offset;
u64 value_blk;
@@ -4236,7 +4646,7 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
value_blk += header_bh->b_blocknr;
- ret = ocfs2_read_block(inode, value_blk, &value_bh);
+ ret = ocfs2_read_block(inode, value_blk, &value_bh, NULL);
if (ret) {
mlog_errno(ret);
goto out;
@@ -4247,13 +4657,14 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n",
xe_off, (unsigned long long)header_bh->b_blocknr, len);
- ret = ocfs2_xattr_value_truncate(inode, value_bh, xv, len);
+ ret = ocfs2_xattr_value_truncate(inode, value_bh, xv, len, ctxt);
if (ret) {
mlog_errno(ret);
goto out;
}
- ret = ocfs2_xattr_value_update_size(inode, header_bh, xe, len);
+ ret = ocfs2_xattr_value_update_size(inode, ctxt->handle,
+ header_bh, xe, len);
if (ret) {
mlog_errno(ret);
goto out;
@@ -4265,18 +4676,19 @@ out:
}
static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode,
- struct ocfs2_xattr_search *xs,
- int len)
+ struct ocfs2_xattr_search *xs,
+ int len,
+ struct ocfs2_xattr_set_ctxt *ctxt)
{
int ret, offset;
struct ocfs2_xattr_entry *xe = xs->here;
struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base;
- BUG_ON(!xs->bucket.bhs[0] || !xe || ocfs2_xattr_is_local(xe));
+ BUG_ON(!xs->bucket->bu_bhs[0] || !xe || ocfs2_xattr_is_local(xe));
offset = xe - xh->xh_entries;
- ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket.bhs[0],
- offset, len);
+ ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket->bu_bhs[0],
+ offset, len, ctxt);
if (ret)
mlog_errno(ret);
@@ -4284,6 +4696,7 @@ static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode,
}
static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
+ handle_t *handle,
struct ocfs2_xattr_search *xs,
char *val,
int value_len)
@@ -4299,7 +4712,8 @@ static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
xv = (struct ocfs2_xattr_value_root *)(xs->base + offset);
- return __ocfs2_xattr_set_value_outside(inode, xv, val, value_len);
+ return __ocfs2_xattr_set_value_outside(inode, handle,
+ xv, val, value_len);
}
static int ocfs2_rm_xattr_cluster(struct inode *inode,
@@ -4343,7 +4757,7 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
}
}
- handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
+ handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb));
if (IS_ERR(handle)) {
ret = -ENOMEM;
mlog_errno(ret);
@@ -4392,26 +4806,19 @@ out:
}
static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
+ handle_t *handle,
struct ocfs2_xattr_search *xs)
{
- handle_t *handle = NULL;
- struct ocfs2_xattr_header *xh = xs->bucket.xh;
+ struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
struct ocfs2_xattr_entry *last = &xh->xh_entries[
le16_to_cpu(xh->xh_count) - 1];
int ret = 0;
- handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 1);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- mlog_errno(ret);
- return;
- }
-
- ret = ocfs2_journal_access(handle, inode, xs->bucket.bhs[0],
- OCFS2_JOURNAL_ACCESS_WRITE);
+ ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
+ OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
- goto out_commit;
+ return;
}
/* Remove the old entry. */
@@ -4420,11 +4827,7 @@ static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
memset(last, 0, sizeof(struct ocfs2_xattr_entry));
le16_add_cpu(&xh->xh_count, -1);
- ret = ocfs2_journal_dirty(handle, xs->bucket.bhs[0]);
- if (ret < 0)
- mlog_errno(ret);
-out_commit:
- ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
+ ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
}
/*
@@ -4440,7 +4843,8 @@ out_commit:
*/
static int ocfs2_xattr_set_in_bucket(struct inode *inode,
struct ocfs2_xattr_info *xi,
- struct ocfs2_xattr_search *xs)
+ struct ocfs2_xattr_search *xs,
+ struct ocfs2_xattr_set_ctxt *ctxt)
{
int ret, local = 1;
size_t value_len;
@@ -4468,7 +4872,8 @@ static int ocfs2_xattr_set_in_bucket(struct inode *inode,
value_len = 0;
ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
- value_len);
+ value_len,
+ ctxt);
if (ret)
goto out;
@@ -4488,7 +4893,8 @@ static int ocfs2_xattr_set_in_bucket(struct inode *inode,
xi->value_len = OCFS2_XATTR_ROOT_SIZE;
}
- ret = ocfs2_xattr_set_entry_in_bucket(inode, xi, xs, name_hash, local);
+ ret = ocfs2_xattr_set_entry_in_bucket(inode, ctxt->handle, xi, xs,
+ name_hash, local);
if (ret) {
mlog_errno(ret);
goto out;
@@ -4499,7 +4905,7 @@ static int ocfs2_xattr_set_in_bucket(struct inode *inode,
/* allocate the space now for the outside block storage. */
ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
- value_len);
+ value_len, ctxt);
if (ret) {
mlog_errno(ret);
@@ -4509,13 +4915,14 @@ static int ocfs2_xattr_set_in_bucket(struct inode *inode,
* storage and we have allocated xattr already,
* so need to remove it.
*/
- ocfs2_xattr_bucket_remove_xs(inode, xs);
+ ocfs2_xattr_bucket_remove_xs(inode, ctxt->handle, xs);
}
goto out;
}
set_value_outside:
- ret = ocfs2_xattr_bucket_set_value_outside(inode, xs, val, value_len);
+ ret = ocfs2_xattr_bucket_set_value_outside(inode, ctxt->handle,
+ xs, val, value_len);
out:
return ret;
}
@@ -4530,7 +4937,7 @@ static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
struct ocfs2_xattr_bucket *bucket,
const char *name)
{
- struct ocfs2_xattr_header *xh = bucket->xh;
+ struct ocfs2_xattr_header *xh = bucket_xh(bucket);
u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
@@ -4540,7 +4947,7 @@ static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
xh->xh_entries[0].xe_name_hash) {
mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
"hash = %u\n",
- (unsigned long long)bucket->bhs[0]->b_blocknr,
+ (unsigned long long)bucket_blkno(bucket),
le32_to_cpu(xh->xh_entries[0].xe_name_hash));
return -ENOSPC;
}
@@ -4550,16 +4957,16 @@ static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
struct ocfs2_xattr_info *xi,
- struct ocfs2_xattr_search *xs)
+ struct ocfs2_xattr_search *xs,
+ struct ocfs2_xattr_set_ctxt *ctxt)
{
struct ocfs2_xattr_header *xh;
struct ocfs2_xattr_entry *xe;
u16 count, header_size, xh_free_start;
- int i, free, max_free, need, old;
+ int free, max_free, need, old;
size_t value_size = 0, name_len = strlen(xi->name);
size_t blocksize = inode->i_sb->s_blocksize;
int ret, allocation = 0;
- u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
mlog_entry("Set xattr %s in xattr index block\n", xi->name);
@@ -4574,7 +4981,7 @@ try_again:
mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size "
"of %u which exceed block size\n",
- (unsigned long long)xs->bucket.bhs[0]->b_blocknr,
+ (unsigned long long)bucket_blkno(xs->bucket),
header_size);
if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE)
@@ -4614,11 +5021,13 @@ try_again:
mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, "
"need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len ="
" %u\n", xs->not_found,
- (unsigned long long)xs->bucket.bhs[0]->b_blocknr,
+ (unsigned long long)bucket_blkno(xs->bucket),
free, need, max_free, le16_to_cpu(xh->xh_free_start),
le16_to_cpu(xh->xh_name_value_len));
- if (free < need || count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
+ if (free < need ||
+ (xs->not_found &&
+ count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb))) {
if (need <= max_free &&
count < ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
/*
@@ -4626,7 +5035,8 @@ try_again:
* name/value will be moved, the xe shouldn't be changed
* in xs.
*/
- ret = ocfs2_defrag_xattr_bucket(inode, &xs->bucket);
+ ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle,
+ xs->bucket);
if (ret) {
mlog_errno(ret);
goto out;
@@ -4658,7 +5068,7 @@ try_again:
* add a new bucket for the insert.
*/
ret = ocfs2_check_xattr_bucket_collision(inode,
- &xs->bucket,
+ xs->bucket,
xi->name);
if (ret) {
mlog_errno(ret);
@@ -4667,16 +5077,14 @@ try_again:
ret = ocfs2_add_new_xattr_bucket(inode,
xs->xattr_bh,
- xs->bucket.bhs[0]);
+ xs->bucket->bu_bhs[0],
+ ctxt);
if (ret) {
mlog_errno(ret);
goto out;
}
- for (i = 0; i < blk_per_bucket; i++)
- brelse(xs->bucket.bhs[i]);
-
- memset(&xs->bucket, 0, sizeof(xs->bucket));
+ ocfs2_xattr_bucket_relse(xs->bucket);
ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
xi->name_index,
@@ -4689,7 +5097,7 @@ try_again:
}
xattr_set:
- ret = ocfs2_xattr_set_in_bucket(inode, xi, xs);
+ ret = ocfs2_xattr_set_in_bucket(inode, xi, xs, ctxt);
out:
mlog_exit(ret);
return ret;
@@ -4700,9 +5108,21 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
void *para)
{
int ret = 0;
- struct ocfs2_xattr_header *xh = bucket->xh;
+ struct ocfs2_xattr_header *xh = bucket_xh(bucket);
u16 i;
struct ocfs2_xattr_entry *xe;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,};
+
+ ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
+
+ ctxt.handle = ocfs2_start_trans(osb,
+ ocfs2_remove_extent_credits(osb->sb));
+ if (IS_ERR(ctxt.handle)) {
+ ret = PTR_ERR(ctxt.handle);
+ mlog_errno(ret);
+ goto out;
+ }
for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
xe = &xh->xh_entries[i];
@@ -4710,14 +5130,18 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
continue;
ret = ocfs2_xattr_bucket_value_truncate(inode,
- bucket->bhs[0],
- i, 0);
+ bucket->bu_bhs[0],
+ i, 0, &ctxt);
if (ret) {
mlog_errno(ret);
break;
}
}
+ ret = ocfs2_commit_trans(osb, ctxt.handle);
+ ocfs2_schedule_truncate_log_flush(osb, 1);
+ ocfs2_run_deallocs(osb, &ctxt.dealloc);
+out:
return ret;
}
@@ -4768,6 +5192,71 @@ out:
}
/*
+ * 'security' attributes support
+ */
+static size_t ocfs2_xattr_security_list(struct inode *inode, char *list,
+ size_t list_size, const char *name,
+ size_t name_len)
+{
+ const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
+ const size_t total_len = prefix_len + name_len + 1;
+
+ if (list && total_len <= list_size) {
+ memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
+ memcpy(list + prefix_len, name, name_len);
+ list[prefix_len + name_len] = '\0';
+ }
+ return total_len;
+}
+
+static int ocfs2_xattr_security_get(struct inode *inode, const char *name,
+ void *buffer, size_t size)
+{
+ if (strcmp(name, "") == 0)
+ return -EINVAL;
+ return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY, name,
+ buffer, size);
+}
+
+static int ocfs2_xattr_security_set(struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
+{
+ if (strcmp(name, "") == 0)
+ return -EINVAL;
+
+ return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, name, value,
+ size, flags);
+}
+
+int ocfs2_init_security_get(struct inode *inode,
+ struct inode *dir,
+ struct ocfs2_security_xattr_info *si)
+{
+ return security_inode_init_security(inode, dir, &si->name, &si->value,
+ &si->value_len);
+}
+
+int ocfs2_init_security_set(handle_t *handle,
+ struct inode *inode,
+ struct buffer_head *di_bh,
+ struct ocfs2_security_xattr_info *si,
+ struct ocfs2_alloc_context *xattr_ac,
+ struct ocfs2_alloc_context *data_ac)
+{
+ return ocfs2_xattr_set_handle(handle, inode, di_bh,
+ OCFS2_XATTR_INDEX_SECURITY,
+ si->name, si->value, si->value_len, 0,
+ xattr_ac, data_ac);
+}
+
+struct xattr_handler ocfs2_xattr_security_handler = {
+ .prefix = XATTR_SECURITY_PREFIX,
+ .list = ocfs2_xattr_security_list,
+ .get = ocfs2_xattr_security_get,
+ .set = ocfs2_xattr_security_set,
+};
+
+/*
* 'trusted' attributes support
*/
static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list,
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index 1d8314c7656d..9a67e7d8f812 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -30,13 +30,44 @@ enum ocfs2_xattr_type {
OCFS2_XATTR_MAX
};
+struct ocfs2_security_xattr_info {
+ int enable;
+ char *name;
+ void *value;
+ size_t value_len;
+};
+
extern struct xattr_handler ocfs2_xattr_user_handler;
extern struct xattr_handler ocfs2_xattr_trusted_handler;
+extern struct xattr_handler ocfs2_xattr_security_handler;
+#ifdef CONFIG_OCFS2_FS_POSIX_ACL
+extern struct xattr_handler ocfs2_xattr_acl_access_handler;
+extern struct xattr_handler ocfs2_xattr_acl_default_handler;
+#endif
extern struct xattr_handler *ocfs2_xattr_handlers[];
ssize_t ocfs2_listxattr(struct dentry *, char *, size_t);
+int ocfs2_xattr_get_nolock(struct inode *, struct buffer_head *, int,
+ const char *, void *, size_t);
int ocfs2_xattr_set(struct inode *, int, const char *, const void *,
size_t, int);
+int ocfs2_xattr_set_handle(handle_t *, struct inode *, struct buffer_head *,
+ int, const char *, const void *, size_t, int,
+ struct ocfs2_alloc_context *,
+ struct ocfs2_alloc_context *);
int ocfs2_xattr_remove(struct inode *, struct buffer_head *);
+int ocfs2_init_security_get(struct inode *, struct inode *,
+ struct ocfs2_security_xattr_info *);
+int ocfs2_init_security_set(handle_t *, struct inode *,
+ struct buffer_head *,
+ struct ocfs2_security_xattr_info *,
+ struct ocfs2_alloc_context *,
+ struct ocfs2_alloc_context *);
+int ocfs2_calc_security_init(struct inode *,
+ struct ocfs2_security_xattr_info *,
+ int *, int *, struct ocfs2_alloc_context **);
+int ocfs2_calc_xattr_init(struct inode *, struct buffer_head *,
+ int, struct ocfs2_security_xattr_info *,
+ int *, int *, struct ocfs2_alloc_context **);
#endif /* OCFS2_XATTR_H */
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index cbf047a847c5..6afe57c84f84 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -37,8 +37,8 @@ struct inode *omfs_new_inode(struct inode *dir, int mode)
inode->i_ino = new_block;
inode->i_mode = mode;
- inode->i_uid = current->fsuid;
- inode->i_gid = current->fsgid;
+ inode->i_uid = current_fsuid();
+ inode->i_gid = current_fsgid();
inode->i_blocks = 0;
inode->i_mapping->a_ops = &omfs_aops;
@@ -420,8 +420,8 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_fs_info = sbi;
- sbi->s_uid = current->uid;
- sbi->s_gid = current->gid;
+ sbi->s_uid = current_uid();
+ sbi->s_gid = current_gid();
sbi->s_dmask = sbi->s_fmask = current->fs->umask;
if (!parse_options((char *) data, sbi))
diff --git a/fs/open.c b/fs/open.c
index 83cdb9dee0c1..c0a426d5766c 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -425,39 +425,33 @@ out:
*/
asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
{
+ const struct cred *old_cred;
+ struct cred *override_cred;
struct path path;
struct inode *inode;
- int old_fsuid, old_fsgid;
- kernel_cap_t uninitialized_var(old_cap); /* !SECURE_NO_SETUID_FIXUP */
int res;
if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */
return -EINVAL;
- old_fsuid = current->fsuid;
- old_fsgid = current->fsgid;
+ override_cred = prepare_creds();
+ if (!override_cred)
+ return -ENOMEM;
- current->fsuid = current->uid;
- current->fsgid = current->gid;
+ override_cred->fsuid = override_cred->uid;
+ override_cred->fsgid = override_cred->gid;
if (!issecure(SECURE_NO_SETUID_FIXUP)) {
- /*
- * Clear the capabilities if we switch to a non-root user
- */
-#ifndef CONFIG_SECURITY_FILE_CAPABILITIES
- /*
- * FIXME: There is a race here against sys_capset. The
- * capabilities can change yet we will restore the old
- * value below. We should hold task_capabilities_lock,
- * but we cannot because user_path_at can sleep.
- */
-#endif /* ndef CONFIG_SECURITY_FILE_CAPABILITIES */
- if (current->uid)
- old_cap = cap_set_effective(__cap_empty_set);
+ /* Clear the capabilities if we switch to a non-root user */
+ if (override_cred->uid)
+ cap_clear(override_cred->cap_effective);
else
- old_cap = cap_set_effective(current->cap_permitted);
+ override_cred->cap_effective =
+ override_cred->cap_permitted;
}
+ old_cred = override_creds(override_cred);
+
res = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
if (res)
goto out;
@@ -494,12 +488,8 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
out_path_release:
path_put(&path);
out:
- current->fsuid = old_fsuid;
- current->fsgid = old_fsgid;
-
- if (!issecure(SECURE_NO_SETUID_FIXUP))
- cap_set_effective(old_cap);
-
+ revert_creds(old_cred);
+ put_cred(override_cred);
return res;
}
@@ -792,7 +782,8 @@ static inline int __get_file_write_access(struct inode *inode,
static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
int flags, struct file *f,
- int (*open)(struct inode *, struct file *))
+ int (*open)(struct inode *, struct file *),
+ const struct cred *cred)
{
struct inode *inode;
int error;
@@ -816,7 +807,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
f->f_op = fops_get(inode->i_fop);
file_move(f, &inode->i_sb->s_files);
- error = security_dentry_open(f);
+ error = security_dentry_open(f, cred);
if (error)
goto cleanup_all;
@@ -891,6 +882,8 @@ cleanup_file:
struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
int (*open)(struct inode *, struct file *))
{
+ const struct cred *cred = current_cred();
+
if (IS_ERR(nd->intent.open.file))
goto out;
if (IS_ERR(dentry))
@@ -898,7 +891,7 @@ struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry
nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->path.mnt),
nd->intent.open.flags - 1,
nd->intent.open.file,
- open);
+ open, cred);
out:
return nd->intent.open.file;
out_err:
@@ -917,6 +910,7 @@ EXPORT_SYMBOL_GPL(lookup_instantiate_filp);
*/
struct file *nameidata_to_filp(struct nameidata *nd, int flags)
{
+ const struct cred *cred = current_cred();
struct file *filp;
/* Pick up the filp from the open intent */
@@ -924,7 +918,7 @@ struct file *nameidata_to_filp(struct nameidata *nd, int flags)
/* Has the filesystem initialised the file for us? */
if (filp->f_path.dentry == NULL)
filp = __dentry_open(nd->path.dentry, nd->path.mnt, flags, filp,
- NULL);
+ NULL, cred);
else
path_put(&nd->path);
return filp;
@@ -934,7 +928,8 @@ struct file *nameidata_to_filp(struct nameidata *nd, int flags)
* dentry_open() will have done dput(dentry) and mntput(mnt) if it returns an
* error.
*/
-struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
+struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
+ const struct cred *cred)
{
int error;
struct file *f;
@@ -959,7 +954,7 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
return ERR_PTR(error);
}
- return __dentry_open(dentry, mnt, flags, f, NULL);
+ return __dentry_open(dentry, mnt, flags, f, NULL, cred);
}
EXPORT_SYMBOL(dentry_open);
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 6d5b213b8a9b..5198ada67398 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -384,9 +384,9 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
dname = dev_name(ddev);
if (isdigit(dname[strlen(dname) - 1]))
- snprintf(pdev->bus_id, BUS_ID_SIZE, "%sp%d", dname, partno);
+ dev_set_name(pdev, "%sp%d", dname, partno);
else
- snprintf(pdev->bus_id, BUS_ID_SIZE, "%s%d", dname, partno);
+ dev_set_name(pdev, "%s%d", dname, partno);
device_initialize(pdev);
pdev->class = &block_class;
@@ -447,16 +447,11 @@ void register_disk(struct gendisk *disk)
struct block_device *bdev;
struct disk_part_iter piter;
struct hd_struct *part;
- char *s;
int err;
ddev->parent = disk->driverfs_dev;
- strlcpy(ddev->bus_id, disk->disk_name, BUS_ID_SIZE);
- /* ewww... some of these buggers have / in the name... */
- s = strchr(ddev->bus_id, '/');
- if (s)
- *s = '!';
+ dev_set_name(ddev, disk->disk_name);
/* delay uevents, until we scanned partition table */
ddev->uevent_suppress = 1;
diff --git a/fs/pipe.c b/fs/pipe.c
index 7aea8b89baac..aaf797bd57b9 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -899,8 +899,8 @@ static struct inode * get_pipe_inode(void)
*/
inode->i_state = I_DIRTY;
inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
- inode->i_uid = current->fsuid;
- inode->i_gid = current->fsgid;
+ inode->i_uid = current_fsuid();
+ inode->i_gid = current_fsgid();
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
return inode;
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index aec931e09973..39df95a0ec25 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -217,11 +217,11 @@ posix_acl_permission(struct inode *inode, const struct posix_acl *acl, int want)
switch(pa->e_tag) {
case ACL_USER_OBJ:
/* (May have been checked already) */
- if (inode->i_uid == current->fsuid)
+ if (inode->i_uid == current_fsuid())
goto check_perm;
break;
case ACL_USER:
- if (pa->e_id == current->fsuid)
+ if (pa->e_id == current_fsuid())
goto mask;
break;
case ACL_GROUP_OBJ:
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 6af7fba7abb1..7e4877d9dcb5 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -159,6 +159,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
struct group_info *group_info;
int g;
struct fdtable *fdt = NULL;
+ const struct cred *cred;
pid_t ppid, tpid;
rcu_read_lock();
@@ -170,6 +171,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
if (tracer)
tpid = task_pid_nr_ns(tracer, ns);
}
+ cred = get_cred((struct cred *) __task_cred(p));
seq_printf(m,
"State:\t%s\n"
"Tgid:\t%d\n"
@@ -182,8 +184,8 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
task_tgid_nr_ns(p, ns),
pid_nr_ns(pid, ns),
ppid, tpid,
- p->uid, p->euid, p->suid, p->fsuid,
- p->gid, p->egid, p->sgid, p->fsgid);
+ cred->uid, cred->euid, cred->suid, cred->fsuid,
+ cred->gid, cred->egid, cred->sgid, cred->fsgid);
task_lock(p);
if (p->files)
@@ -194,13 +196,12 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
fdt ? fdt->max_fds : 0);
rcu_read_unlock();
- group_info = p->group_info;
- get_group_info(group_info);
+ group_info = cred->group_info;
task_unlock(p);
for (g = 0; g < min(group_info->ngroups, NGROUPS_SMALL); g++)
seq_printf(m, "%d ", GROUP_AT(group_info, g));
- put_group_info(group_info);
+ put_cred(cred);
seq_printf(m, "\n");
}
@@ -262,7 +263,7 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p)
blocked = p->blocked;
collect_sigign_sigcatch(p, &ignored, &caught);
num_threads = atomic_read(&p->signal->count);
- qsize = atomic_read(&p->user->sigpending);
+ qsize = atomic_read(&__task_cred(p)->user->sigpending);
qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur;
unlock_task_sighand(p, &flags);
}
@@ -293,10 +294,21 @@ static void render_cap_t(struct seq_file *m, const char *header,
static inline void task_cap(struct seq_file *m, struct task_struct *p)
{
- render_cap_t(m, "CapInh:\t", &p->cap_inheritable);
- render_cap_t(m, "CapPrm:\t", &p->cap_permitted);
- render_cap_t(m, "CapEff:\t", &p->cap_effective);
- render_cap_t(m, "CapBnd:\t", &p->cap_bset);
+ const struct cred *cred;
+ kernel_cap_t cap_inheritable, cap_permitted, cap_effective, cap_bset;
+
+ rcu_read_lock();
+ cred = __task_cred(p);
+ cap_inheritable = cred->cap_inheritable;
+ cap_permitted = cred->cap_permitted;
+ cap_effective = cred->cap_effective;
+ cap_bset = cred->cap_bset;
+ rcu_read_unlock();
+
+ render_cap_t(m, "CapInh:\t", &cap_inheritable);
+ render_cap_t(m, "CapPrm:\t", &cap_permitted);
+ render_cap_t(m, "CapEff:\t", &cap_effective);
+ render_cap_t(m, "CapBnd:\t", &cap_bset);
}
static inline void task_context_switch_counts(struct seq_file *m,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 486cf3fe7139..cf42c42cbfbb 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1406,6 +1406,7 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st
{
struct inode * inode;
struct proc_inode *ei;
+ const struct cred *cred;
/* We need a new inode */
@@ -1428,8 +1429,11 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st
inode->i_uid = 0;
inode->i_gid = 0;
if (task_dumpable(task)) {
- inode->i_uid = task->euid;
- inode->i_gid = task->egid;
+ rcu_read_lock();
+ cred = __task_cred(task);
+ inode->i_uid = cred->euid;
+ inode->i_gid = cred->egid;
+ rcu_read_unlock();
}
security_task_to_inode(task, inode);
@@ -1445,6 +1449,8 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat
{
struct inode *inode = dentry->d_inode;
struct task_struct *task;
+ const struct cred *cred;
+
generic_fillattr(inode, stat);
rcu_read_lock();
@@ -1454,8 +1460,9 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat
if (task) {
if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
task_dumpable(task)) {
- stat->uid = task->euid;
- stat->gid = task->egid;
+ cred = __task_cred(task);
+ stat->uid = cred->euid;
+ stat->gid = cred->egid;
}
}
rcu_read_unlock();
@@ -1483,11 +1490,16 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
{
struct inode *inode = dentry->d_inode;
struct task_struct *task = get_proc_task(inode);
+ const struct cred *cred;
+
if (task) {
if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
task_dumpable(task)) {
- inode->i_uid = task->euid;
- inode->i_gid = task->egid;
+ rcu_read_lock();
+ cred = __task_cred(task);
+ inode->i_uid = cred->euid;
+ inode->i_gid = cred->egid;
+ rcu_read_unlock();
} else {
inode->i_uid = 0;
inode->i_gid = 0;
@@ -1649,6 +1661,7 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
struct task_struct *task = get_proc_task(inode);
int fd = proc_fd(inode);
struct files_struct *files;
+ const struct cred *cred;
if (task) {
files = get_files_struct(task);
@@ -1658,8 +1671,11 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
rcu_read_unlock();
put_files_struct(files);
if (task_dumpable(task)) {
- inode->i_uid = task->euid;
- inode->i_gid = task->egid;
+ rcu_read_lock();
+ cred = __task_cred(task);
+ inode->i_uid = cred->euid;
+ inode->i_gid = cred->egid;
+ rcu_read_unlock();
} else {
inode->i_uid = 0;
inode->i_gid = 0;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 2543fd00c658..bcb674275348 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -106,6 +106,12 @@ static void init_once(void *foo)
inode_init_once(&ei->vfs_inode);
}
+static void *proc_get_inodes(struct kmem_cache *s, int nr, void **v)
+{
+ return fs_get_inodes(s, nr, v,
+ offsetof(struct proc_inode, vfs_inode));
+}
+
void __init proc_init_inodecache(void)
{
proc_inode_cachep = kmem_cache_create("proc_inode_cache",
@@ -113,6 +119,8 @@ void __init proc_init_inodecache(void)
0, (SLAB_RECLAIM_ACCOUNT|
SLAB_MEM_SPREAD|SLAB_PANIC),
init_once);
+ kmem_cache_setup_defrag(proc_inode_cachep,
+ proc_get_inodes, kick_inodes);
}
static const struct super_operations proc_sops = {
diff --git a/fs/quota.c b/fs/quota.c
index 7f4386ebc23a..4a8c94f05f76 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -73,13 +73,13 @@ static int generic_quotactl_valid(struct super_block *sb, int type, int cmd, qid
case Q_SETQUOTA:
case Q_GETQUOTA:
/* This is just informative test so we are satisfied without a lock */
- if (!sb_has_quota_enabled(sb, type))
+ if (!sb_has_quota_active(sb, type))
return -ESRCH;
}
/* Check privileges */
if (cmd == Q_GETQUOTA) {
- if (((type == USRQUOTA && current->euid != id) ||
+ if (((type == USRQUOTA && current_euid() != id) ||
(type == GRPQUOTA && !in_egroup_p(id))) &&
!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -130,7 +130,7 @@ static int xqm_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t i
/* Check privileges */
if (cmd == Q_XGETQUOTA) {
- if (((type == XQM_USRQUOTA && current->euid != id) ||
+ if (((type == XQM_USRQUOTA && current_euid() != id) ||
(type == XQM_GRPQUOTA && !in_egroup_p(id))) &&
!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -160,6 +160,9 @@ static void quota_sync_sb(struct super_block *sb, int type)
int cnt;
sb->s_qcop->quota_sync(sb, type);
+
+ if (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE)
+ return;
/* This is not very clever (and fast) but currently I don't know about
* any other simple way of getting quota data to disk and we must get
* them there for userspace to be visible... */
@@ -175,7 +178,7 @@ static void quota_sync_sb(struct super_block *sb, int type)
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (type != -1 && cnt != type)
continue;
- if (!sb_has_quota_enabled(sb, cnt))
+ if (!sb_has_quota_active(sb, cnt))
continue;
mutex_lock_nested(&sb_dqopt(sb)->files[cnt]->i_mutex, I_MUTEX_QUOTA);
truncate_inode_pages(&sb_dqopt(sb)->files[cnt]->i_data, 0);
@@ -201,7 +204,7 @@ restart:
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (type != -1 && type != cnt)
continue;
- if (!sb_has_quota_enabled(sb, cnt))
+ if (!sb_has_quota_active(sb, cnt))
continue;
if (!info_dirty(&sb_dqopt(sb)->info[cnt]) &&
list_empty(&sb_dqopt(sb)->info[cnt].dqi_dirty_list))
@@ -245,7 +248,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, void
__u32 fmt;
down_read(&sb_dqopt(sb)->dqptr_sem);
- if (!sb_has_quota_enabled(sb, type)) {
+ if (!sb_has_quota_active(sb, type)) {
up_read(&sb_dqopt(sb)->dqptr_sem);
return -ESRCH;
}
diff --git a/fs/quota_tree.c b/fs/quota_tree.c
new file mode 100644
index 000000000000..953404c95b17
--- /dev/null
+++ b/fs/quota_tree.c
@@ -0,0 +1,645 @@
+/*
+ * vfsv0 quota IO operations on file
+ */
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/dqblk_v2.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/quotaops.h>
+
+#include <asm/byteorder.h>
+
+#include "quota_tree.h"
+
+MODULE_AUTHOR("Jan Kara");
+MODULE_DESCRIPTION("Quota trie support");
+MODULE_LICENSE("GPL");
+
+#define __QUOTA_QT_PARANOIA
+
+typedef char *dqbuf_t;
+
+static int get_index(struct qtree_mem_dqinfo *info, qid_t id, int depth)
+{
+ unsigned int epb = info->dqi_usable_bs >> 2;
+
+ depth = info->dqi_qtree_depth - depth - 1;
+ while (depth--)
+ id /= epb;
+ return id % epb;
+}
+
+/* Number of entries in one blocks */
+static inline int qtree_dqstr_in_blk(struct qtree_mem_dqinfo *info)
+{
+ return (info->dqi_usable_bs - sizeof(struct qt_disk_dqdbheader))
+ / info->dqi_entry_size;
+}
+
+static dqbuf_t getdqbuf(size_t size)
+{
+ dqbuf_t buf = kmalloc(size, GFP_NOFS);
+ if (!buf)
+ printk(KERN_WARNING "VFS: Not enough memory for quota buffers.\n");
+ return buf;
+}
+
+static inline void freedqbuf(dqbuf_t buf)
+{
+ kfree(buf);
+}
+
+static inline ssize_t read_blk(struct qtree_mem_dqinfo *info, uint blk, dqbuf_t buf)
+{
+ struct super_block *sb = info->dqi_sb;
+
+ memset(buf, 0, info->dqi_usable_bs);
+ return sb->s_op->quota_read(sb, info->dqi_type, (char *)buf,
+ info->dqi_usable_bs, blk << info->dqi_blocksize_bits);
+}
+
+static inline ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, dqbuf_t buf)
+{
+ struct super_block *sb = info->dqi_sb;
+
+ return sb->s_op->quota_write(sb, info->dqi_type, (char *)buf,
+ info->dqi_usable_bs, blk << info->dqi_blocksize_bits);
+}
+
+/* Remove empty block from list and return it */
+static int get_free_dqblk(struct qtree_mem_dqinfo *info)
+{
+ dqbuf_t buf = getdqbuf(info->dqi_usable_bs);
+ struct qt_disk_dqdbheader *dh = (struct qt_disk_dqdbheader *)buf;
+ int ret, blk;
+
+ if (!buf)
+ return -ENOMEM;
+ if (info->dqi_free_blk) {
+ blk = info->dqi_free_blk;
+ ret = read_blk(info, blk, buf);
+ if (ret < 0)
+ goto out_buf;
+ info->dqi_free_blk = le32_to_cpu(dh->dqdh_next_free);
+ }
+ else {
+ memset(buf, 0, info->dqi_usable_bs);
+ /* Assure block allocation... */
+ ret = write_blk(info, info->dqi_blocks, buf);
+ if (ret < 0)
+ goto out_buf;
+ blk = info->dqi_blocks++;
+ }
+ mark_info_dirty(info->dqi_sb, info->dqi_type);
+ ret = blk;
+out_buf:
+ freedqbuf(buf);
+ return ret;
+}
+
+/* Insert empty block to the list */
+static int put_free_dqblk(struct qtree_mem_dqinfo *info, dqbuf_t buf, uint blk)
+{
+ struct qt_disk_dqdbheader *dh = (struct qt_disk_dqdbheader *)buf;
+ int err;
+
+ dh->dqdh_next_free = cpu_to_le32(info->dqi_free_blk);
+ dh->dqdh_prev_free = cpu_to_le32(0);
+ dh->dqdh_entries = cpu_to_le16(0);
+ err = write_blk(info, blk, buf);
+ if (err < 0)
+ return err;
+ info->dqi_free_blk = blk;
+ mark_info_dirty(info->dqi_sb, info->dqi_type);
+ return 0;
+}
+
+/* Remove given block from the list of blocks with free entries */
+static int remove_free_dqentry(struct qtree_mem_dqinfo *info, dqbuf_t buf, uint blk)
+{
+ dqbuf_t tmpbuf = getdqbuf(info->dqi_usable_bs);
+ struct qt_disk_dqdbheader *dh = (struct qt_disk_dqdbheader *)buf;
+ uint nextblk = le32_to_cpu(dh->dqdh_next_free);
+ uint prevblk = le32_to_cpu(dh->dqdh_prev_free);
+ int err;
+
+ if (!tmpbuf)
+ return -ENOMEM;
+ if (nextblk) {
+ err = read_blk(info, nextblk, tmpbuf);
+ if (err < 0)
+ goto out_buf;
+ ((struct qt_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
+ dh->dqdh_prev_free;
+ err = write_blk(info, nextblk, tmpbuf);
+ if (err < 0)
+ goto out_buf;
+ }
+ if (prevblk) {
+ err = read_blk(info, prevblk, tmpbuf);
+ if (err < 0)
+ goto out_buf;
+ ((struct qt_disk_dqdbheader *)tmpbuf)->dqdh_next_free =
+ dh->dqdh_next_free;
+ err = write_blk(info, prevblk, tmpbuf);
+ if (err < 0)
+ goto out_buf;
+ } else {
+ info->dqi_free_entry = nextblk;
+ mark_info_dirty(info->dqi_sb, info->dqi_type);
+ }
+ freedqbuf(tmpbuf);
+ dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0);
+ /* No matter whether write succeeds block is out of list */
+ if (write_blk(info, blk, buf) < 0)
+ printk(KERN_ERR "VFS: Can't write block (%u) with free entries.\n", blk);
+ return 0;
+out_buf:
+ freedqbuf(tmpbuf);
+ return err;
+}
+
+/* Insert given block to the beginning of list with free entries */
+static int insert_free_dqentry(struct qtree_mem_dqinfo *info, dqbuf_t buf, uint blk)
+{
+ dqbuf_t tmpbuf = getdqbuf(info->dqi_usable_bs);
+ struct qt_disk_dqdbheader *dh = (struct qt_disk_dqdbheader *)buf;
+ int err;
+
+ if (!tmpbuf)
+ return -ENOMEM;
+ dh->dqdh_next_free = cpu_to_le32(info->dqi_free_entry);
+ dh->dqdh_prev_free = cpu_to_le32(0);
+ err = write_blk(info, blk, buf);
+ if (err < 0)
+ goto out_buf;
+ if (info->dqi_free_entry) {
+ err = read_blk(info, info->dqi_free_entry, tmpbuf);
+ if (err < 0)
+ goto out_buf;
+ ((struct qt_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
+ cpu_to_le32(blk);
+ err = write_blk(info, info->dqi_free_entry, tmpbuf);
+ if (err < 0)
+ goto out_buf;
+ }
+ freedqbuf(tmpbuf);
+ info->dqi_free_entry = blk;
+ mark_info_dirty(info->dqi_sb, info->dqi_type);
+ return 0;
+out_buf:
+ freedqbuf(tmpbuf);
+ return err;
+}
+
+/* Is the entry in the block free? */
+int qtree_entry_unused(struct qtree_mem_dqinfo *info, char *disk)
+{
+ int i;
+
+ for (i = 0; i < info->dqi_entry_size; i++)
+ if (disk[i])
+ return 0;
+ return 1;
+}
+EXPORT_SYMBOL(qtree_entry_unused);
+
+/* Find space for dquot */
+static uint find_free_dqentry(struct qtree_mem_dqinfo *info,
+ struct dquot *dquot, int *err)
+{
+ uint blk, i;
+ struct qt_disk_dqdbheader *dh;
+ dqbuf_t buf = getdqbuf(info->dqi_usable_bs);
+ char *ddquot;
+
+ *err = 0;
+ if (!buf) {
+ *err = -ENOMEM;
+ return 0;
+ }
+ dh = (struct qt_disk_dqdbheader *)buf;
+ if (info->dqi_free_entry) {
+ blk = info->dqi_free_entry;
+ *err = read_blk(info, blk, buf);
+ if (*err < 0)
+ goto out_buf;
+ } else {
+ blk = get_free_dqblk(info);
+ if ((int)blk < 0) {
+ *err = blk;
+ freedqbuf(buf);
+ return 0;
+ }
+ memset(buf, 0, info->dqi_usable_bs);
+ /* This is enough as block is already zeroed and entry list is empty... */
+ info->dqi_free_entry = blk;
+ mark_info_dirty(dquot->dq_sb, dquot->dq_type);
+ }
+ /* Block will be full? */
+ if (le16_to_cpu(dh->dqdh_entries) + 1 >= qtree_dqstr_in_blk(info)) {
+ *err = remove_free_dqentry(info, buf, blk);
+ if (*err < 0) {
+ printk(KERN_ERR "VFS: find_free_dqentry(): Can't "
+ "remove block (%u) from entry free list.\n",
+ blk);
+ goto out_buf;
+ }
+ }
+ le16_add_cpu(&dh->dqdh_entries, 1);
+ /* Find free structure in block */
+ for (i = 0, ddquot = ((char *)buf) + sizeof(struct qt_disk_dqdbheader);
+ i < qtree_dqstr_in_blk(info) && !qtree_entry_unused(info, ddquot);
+ i++, ddquot += info->dqi_entry_size);
+#ifdef __QUOTA_QT_PARANOIA
+ if (i == qtree_dqstr_in_blk(info)) {
+ printk(KERN_ERR "VFS: find_free_dqentry(): Data block full "
+ "but it shouldn't.\n");
+ *err = -EIO;
+ goto out_buf;
+ }
+#endif
+ *err = write_blk(info, blk, buf);
+ if (*err < 0) {
+ printk(KERN_ERR "VFS: find_free_dqentry(): Can't write quota "
+ "data block %u.\n", blk);
+ goto out_buf;
+ }
+ dquot->dq_off = (blk << info->dqi_blocksize_bits) +
+ sizeof(struct qt_disk_dqdbheader) +
+ i * info->dqi_entry_size;
+ freedqbuf(buf);
+ return blk;
+out_buf:
+ freedqbuf(buf);
+ return 0;
+}
+
+/* Insert reference to structure into the trie */
+static int do_insert_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
+ uint *treeblk, int depth)
+{
+ dqbuf_t buf = getdqbuf(info->dqi_usable_bs);
+ int ret = 0, newson = 0, newact = 0;
+ __le32 *ref;
+ uint newblk;
+
+ if (!buf)
+ return -ENOMEM;
+ if (!*treeblk) {
+ ret = get_free_dqblk(info);
+ if (ret < 0)
+ goto out_buf;
+ *treeblk = ret;
+ memset(buf, 0, info->dqi_usable_bs);
+ newact = 1;
+ } else {
+ ret = read_blk(info, *treeblk, buf);
+ if (ret < 0) {
+ printk(KERN_ERR "VFS: Can't read tree quota block "
+ "%u.\n", *treeblk);
+ goto out_buf;
+ }
+ }
+ ref = (__le32 *)buf;
+ newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]);
+ if (!newblk)
+ newson = 1;
+ if (depth == info->dqi_qtree_depth - 1) {
+#ifdef __QUOTA_QT_PARANOIA
+ if (newblk) {
+ printk(KERN_ERR "VFS: Inserting already present quota "
+ "entry (block %u).\n",
+ le32_to_cpu(ref[get_index(info,
+ dquot->dq_id, depth)]));
+ ret = -EIO;
+ goto out_buf;
+ }
+#endif
+ newblk = find_free_dqentry(info, dquot, &ret);
+ } else {
+ ret = do_insert_tree(info, dquot, &newblk, depth+1);
+ }
+ if (newson && ret >= 0) {
+ ref[get_index(info, dquot->dq_id, depth)] =
+ cpu_to_le32(newblk);
+ ret = write_blk(info, *treeblk, buf);
+ } else if (newact && ret < 0) {
+ put_free_dqblk(info, buf, *treeblk);
+ }
+out_buf:
+ freedqbuf(buf);
+ return ret;
+}
+
+/* Wrapper for inserting quota structure into tree */
+static inline int dq_insert_tree(struct qtree_mem_dqinfo *info,
+ struct dquot *dquot)
+{
+ int tmp = QT_TREEOFF;
+ return do_insert_tree(info, dquot, &tmp, 0);
+}
+
+/*
+ * We don't have to be afraid of deadlocks as we never have quotas on quota files...
+ */
+int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
+{
+ int type = dquot->dq_type;
+ struct super_block *sb = dquot->dq_sb;
+ ssize_t ret;
+ dqbuf_t ddquot = getdqbuf(info->dqi_entry_size);
+
+ if (!ddquot)
+ return -ENOMEM;
+
+ /* dq_off is guarded by dqio_mutex */
+ if (!dquot->dq_off) {
+ ret = dq_insert_tree(info, dquot);
+ if (ret < 0) {
+ printk(KERN_ERR "VFS: Error %zd occurred while "
+ "creating quota.\n", ret);
+ freedqbuf(ddquot);
+ return ret;
+ }
+ }
+ spin_lock(&dq_data_lock);
+ info->dqi_ops->mem2disk_dqblk(ddquot, dquot);
+ spin_unlock(&dq_data_lock);
+ ret = sb->s_op->quota_write(sb, type, (char *)ddquot,
+ info->dqi_entry_size, dquot->dq_off);
+ if (ret != info->dqi_entry_size) {
+ printk(KERN_WARNING "VFS: dquota write failed on dev %s\n",
+ sb->s_id);
+ if (ret >= 0)
+ ret = -ENOSPC;
+ } else {
+ ret = 0;
+ }
+ dqstats.writes++;
+ freedqbuf(ddquot);
+
+ return ret;
+}
+EXPORT_SYMBOL(qtree_write_dquot);
+
+/* Free dquot entry in data block */
+static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot,
+ uint blk)
+{
+ struct qt_disk_dqdbheader *dh;
+ dqbuf_t buf = getdqbuf(info->dqi_usable_bs);
+ int ret = 0;
+
+ if (!buf)
+ return -ENOMEM;
+ if (dquot->dq_off >> info->dqi_blocksize_bits != blk) {
+ printk(KERN_ERR "VFS: Quota structure has offset to other "
+ "block (%u) than it should (%u).\n", blk,
+ (uint)(dquot->dq_off >> info->dqi_blocksize_bits));
+ goto out_buf;
+ }
+ ret = read_blk(info, blk, buf);
+ if (ret < 0) {
+ printk(KERN_ERR "VFS: Can't read quota data block %u\n", blk);
+ goto out_buf;
+ }
+ dh = (struct qt_disk_dqdbheader *)buf;
+ le16_add_cpu(&dh->dqdh_entries, -1);
+ if (!le16_to_cpu(dh->dqdh_entries)) { /* Block got free? */
+ ret = remove_free_dqentry(info, buf, blk);
+ if (ret >= 0)
+ ret = put_free_dqblk(info, buf, blk);
+ if (ret < 0) {
+ printk(KERN_ERR "VFS: Can't move quota data block (%u) "
+ "to free list.\n", blk);
+ goto out_buf;
+ }
+ } else {
+ memset(buf +
+ (dquot->dq_off & ((1 << info->dqi_blocksize_bits) - 1)),
+ 0, info->dqi_entry_size);
+ if (le16_to_cpu(dh->dqdh_entries) ==
+ qtree_dqstr_in_blk(info) - 1) {
+ /* Insert will write block itself */
+ ret = insert_free_dqentry(info, buf, blk);
+ if (ret < 0) {
+ printk(KERN_ERR "VFS: Can't insert quota data "
+ "block (%u) to free entry list.\n", blk);
+ goto out_buf;
+ }
+ } else {
+ ret = write_blk(info, blk, buf);
+ if (ret < 0) {
+ printk(KERN_ERR "VFS: Can't write quota data "
+ "block %u\n", blk);
+ goto out_buf;
+ }
+ }
+ }
+ dquot->dq_off = 0; /* Quota is now unattached */
+out_buf:
+ freedqbuf(buf);
+ return ret;
+}
+
+/* Remove reference to dquot from tree */
+static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
+ uint *blk, int depth)
+{
+ dqbuf_t buf = getdqbuf(info->dqi_usable_bs);
+ int ret = 0;
+ uint newblk;
+ __le32 *ref = (__le32 *)buf;
+
+ if (!buf)
+ return -ENOMEM;
+ ret = read_blk(info, *blk, buf);
+ if (ret < 0) {
+ printk(KERN_ERR "VFS: Can't read quota data block %u\n", *blk);
+ goto out_buf;
+ }
+ newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]);
+ if (depth == info->dqi_qtree_depth - 1) {
+ ret = free_dqentry(info, dquot, newblk);
+ newblk = 0;
+ } else {
+ ret = remove_tree(info, dquot, &newblk, depth+1);
+ }
+ if (ret >= 0 && !newblk) {
+ int i;
+ ref[get_index(info, dquot->dq_id, depth)] = cpu_to_le32(0);
+ /* Block got empty? */
+ for (i = 0;
+ i < (info->dqi_usable_bs >> 2) && !ref[i];
+ i++);
+ /* Don't put the root block into the free block list */
+ if (i == (info->dqi_usable_bs >> 2)
+ && *blk != QT_TREEOFF) {
+ put_free_dqblk(info, buf, *blk);
+ *blk = 0;
+ } else {
+ ret = write_blk(info, *blk, buf);
+ if (ret < 0)
+ printk(KERN_ERR "VFS: Can't write quota tree "
+ "block %u.\n", *blk);
+ }
+ }
+out_buf:
+ freedqbuf(buf);
+ return ret;
+}
+
+/* Delete dquot from tree */
+int qtree_delete_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
+{
+ uint tmp = QT_TREEOFF;
+
+ if (!dquot->dq_off) /* Even not allocated? */
+ return 0;
+ return remove_tree(info, dquot, &tmp, 0);
+}
+EXPORT_SYMBOL(qtree_delete_dquot);
+
+/* Find entry in block */
+static loff_t find_block_dqentry(struct qtree_mem_dqinfo *info,
+ struct dquot *dquot, uint blk)
+{
+ dqbuf_t buf = getdqbuf(info->dqi_usable_bs);
+ loff_t ret = 0;
+ int i;
+ char *ddquot;
+
+ if (!buf)
+ return -ENOMEM;
+ ret = read_blk(info, blk, buf);
+ if (ret < 0) {
+ printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
+ goto out_buf;
+ }
+ for (i = 0, ddquot = ((char *)buf) + sizeof(struct qt_disk_dqdbheader);
+ i < qtree_dqstr_in_blk(info) && !info->dqi_ops->is_id(ddquot, dquot);
+ i++, ddquot += info->dqi_entry_size);
+ if (i == qtree_dqstr_in_blk(info)) {
+ printk(KERN_ERR "VFS: Quota for id %u referenced "
+ "but not present.\n", dquot->dq_id);
+ ret = -EIO;
+ goto out_buf;
+ } else {
+ ret = (blk << info->dqi_blocksize_bits) + sizeof(struct
+ qt_disk_dqdbheader) + i * info->dqi_entry_size;
+ }
+out_buf:
+ freedqbuf(buf);
+ return ret;
+}
+
+/* Find entry for given id in the tree */
+static loff_t find_tree_dqentry(struct qtree_mem_dqinfo *info,
+ struct dquot *dquot, uint blk, int depth)
+{
+ dqbuf_t buf = getdqbuf(info->dqi_usable_bs);
+ loff_t ret = 0;
+ __le32 *ref = (__le32 *)buf;
+
+ if (!buf)
+ return -ENOMEM;
+ ret = read_blk(info, blk, buf);
+ if (ret < 0) {
+ printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
+ goto out_buf;
+ }
+ ret = 0;
+ blk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]);
+ if (!blk) /* No reference? */
+ goto out_buf;
+ if (depth < info->dqi_qtree_depth - 1)
+ ret = find_tree_dqentry(info, dquot, blk, depth+1);
+ else
+ ret = find_block_dqentry(info, dquot, blk);
+out_buf:
+ freedqbuf(buf);
+ return ret;
+}
+
+/* Find entry for given id in the tree - wrapper function */
+static inline loff_t find_dqentry(struct qtree_mem_dqinfo *info,
+ struct dquot *dquot)
+{
+ return find_tree_dqentry(info, dquot, QT_TREEOFF, 0);
+}
+
+int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
+{
+ int type = dquot->dq_type;
+ struct super_block *sb = dquot->dq_sb;
+ loff_t offset;
+ dqbuf_t ddquot;
+ int ret = 0;
+
+#ifdef __QUOTA_QT_PARANOIA
+ /* Invalidated quota? */
+ if (!sb_dqopt(dquot->dq_sb)->files[type]) {
+ printk(KERN_ERR "VFS: Quota invalidated while reading!\n");
+ return -EIO;
+ }
+#endif
+ /* Do we know offset of the dquot entry in the quota file? */
+ if (!dquot->dq_off) {
+ offset = find_dqentry(info, dquot);
+ if (offset <= 0) { /* Entry not present? */
+ if (offset < 0)
+ printk(KERN_ERR "VFS: Can't read quota "
+ "structure for id %u.\n", dquot->dq_id);
+ dquot->dq_off = 0;
+ set_bit(DQ_FAKE_B, &dquot->dq_flags);
+ memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk));
+ ret = offset;
+ goto out;
+ }
+ dquot->dq_off = offset;
+ }
+ ddquot = getdqbuf(info->dqi_entry_size);
+ if (!ddquot)
+ return -ENOMEM;
+ ret = sb->s_op->quota_read(sb, type, (char *)ddquot,
+ info->dqi_entry_size, dquot->dq_off);
+ if (ret != info->dqi_entry_size) {
+ if (ret >= 0)
+ ret = -EIO;
+ printk(KERN_ERR "VFS: Error while reading quota "
+ "structure for id %u.\n", dquot->dq_id);
+ set_bit(DQ_FAKE_B, &dquot->dq_flags);
+ memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk));
+ freedqbuf(ddquot);
+ goto out;
+ }
+ spin_lock(&dq_data_lock);
+ info->dqi_ops->disk2mem_dqblk(dquot, ddquot);
+ if (!dquot->dq_dqb.dqb_bhardlimit &&
+ !dquot->dq_dqb.dqb_bsoftlimit &&
+ !dquot->dq_dqb.dqb_ihardlimit &&
+ !dquot->dq_dqb.dqb_isoftlimit)
+ set_bit(DQ_FAKE_B, &dquot->dq_flags);
+ spin_unlock(&dq_data_lock);
+ freedqbuf(ddquot);
+out:
+ dqstats.reads++;
+ return ret;
+}
+EXPORT_SYMBOL(qtree_read_dquot);
+
+/* Check whether dquot should not be deleted. We know we are
+ * the only one operating on dquot (thanks to dq_lock) */
+int qtree_release_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
+{
+ if (test_bit(DQ_FAKE_B, &dquot->dq_flags) && !(dquot->dq_dqb.dqb_curinodes | dquot->dq_dqb.dqb_curspace))
+ return qtree_delete_dquot(info, dquot);
+ return 0;
+}
+EXPORT_SYMBOL(qtree_release_dquot);
diff --git a/fs/quota_tree.h b/fs/quota_tree.h
new file mode 100644
index 000000000000..a1ab8db81a51
--- /dev/null
+++ b/fs/quota_tree.h
@@ -0,0 +1,25 @@
+/*
+ * Definitions of structures for vfsv0 quota format
+ */
+
+#ifndef _LINUX_QUOTA_TREE_H
+#define _LINUX_QUOTA_TREE_H
+
+#include <linux/types.h>
+#include <linux/quota.h>
+
+/*
+ * Structure of header of block with quota structures. It is padded to 16 bytes so
+ * there will be space for exactly 21 quota-entries in a block
+ */
+struct qt_disk_dqdbheader {
+ __le32 dqdh_next_free; /* Number of next block with free entry */
+ __le32 dqdh_prev_free; /* Number of previous block with free entry */
+ __le16 dqdh_entries; /* Number of valid entries in block */
+ __le16 dqdh_pad1;
+ __le32 dqdh_pad2;
+};
+
+#define QT_TREEOFF 1 /* Offset of tree in file in blocks */
+
+#endif /* _LINUX_QUOTAIO_TREE_H */
diff --git a/fs/quota_v1.c b/fs/quota_v1.c
index 5ae15b13eeb0..b4af1c69ad16 100644
--- a/fs/quota_v1.c
+++ b/fs/quota_v1.c
@@ -3,25 +3,39 @@
#include <linux/quota.h>
#include <linux/quotaops.h>
#include <linux/dqblk_v1.h>
-#include <linux/quotaio_v1.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <asm/byteorder.h>
+#include "quotaio_v1.h"
+
MODULE_AUTHOR("Jan Kara");
MODULE_DESCRIPTION("Old quota format support");
MODULE_LICENSE("GPL");
+#define QUOTABLOCK_BITS 10
+#define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS)
+
+static inline qsize_t v1_stoqb(qsize_t space)
+{
+ return (space + QUOTABLOCK_SIZE - 1) >> QUOTABLOCK_BITS;
+}
+
+static inline qsize_t v1_qbtos(qsize_t blocks)
+{
+ return blocks << QUOTABLOCK_BITS;
+}
+
static void v1_disk2mem_dqblk(struct mem_dqblk *m, struct v1_disk_dqblk *d)
{
m->dqb_ihardlimit = d->dqb_ihardlimit;
m->dqb_isoftlimit = d->dqb_isoftlimit;
m->dqb_curinodes = d->dqb_curinodes;
- m->dqb_bhardlimit = d->dqb_bhardlimit;
- m->dqb_bsoftlimit = d->dqb_bsoftlimit;
- m->dqb_curspace = ((qsize_t)d->dqb_curblocks) << QUOTABLOCK_BITS;
+ m->dqb_bhardlimit = v1_qbtos(d->dqb_bhardlimit);
+ m->dqb_bsoftlimit = v1_qbtos(d->dqb_bsoftlimit);
+ m->dqb_curspace = v1_qbtos(d->dqb_curblocks);
m->dqb_itime = d->dqb_itime;
m->dqb_btime = d->dqb_btime;
}
@@ -31,9 +45,9 @@ static void v1_mem2disk_dqblk(struct v1_disk_dqblk *d, struct mem_dqblk *m)
d->dqb_ihardlimit = m->dqb_ihardlimit;
d->dqb_isoftlimit = m->dqb_isoftlimit;
d->dqb_curinodes = m->dqb_curinodes;
- d->dqb_bhardlimit = m->dqb_bhardlimit;
- d->dqb_bsoftlimit = m->dqb_bsoftlimit;
- d->dqb_curblocks = toqb(m->dqb_curspace);
+ d->dqb_bhardlimit = v1_stoqb(m->dqb_bhardlimit);
+ d->dqb_bsoftlimit = v1_stoqb(m->dqb_bsoftlimit);
+ d->dqb_curblocks = v1_stoqb(m->dqb_curspace);
d->dqb_itime = m->dqb_itime;
d->dqb_btime = m->dqb_btime;
}
diff --git a/fs/quota_v2.c b/fs/quota_v2.c
index b53827dc02d9..b618b563635c 100644
--- a/fs/quota_v2.c
+++ b/fs/quota_v2.c
@@ -6,7 +6,6 @@
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/dqblk_v2.h>
-#include <linux/quotaio_v2.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
@@ -15,16 +14,37 @@
#include <asm/byteorder.h>
+#include "quota_tree.h"
+#include "quotaio_v2.h"
+
MODULE_AUTHOR("Jan Kara");
MODULE_DESCRIPTION("Quota format v2 support");
MODULE_LICENSE("GPL");
#define __QUOTA_V2_PARANOIA
-typedef char *dqbuf_t;
+static void v2_mem2diskdqb(void *dp, struct dquot *dquot);
+static void v2_disk2memdqb(struct dquot *dquot, void *dp);
+static int v2_is_id(void *dp, struct dquot *dquot);
+
+static struct qtree_fmt_operations v2_qtree_ops = {
+ .mem2disk_dqblk = v2_mem2diskdqb,
+ .disk2mem_dqblk = v2_disk2memdqb,
+ .is_id = v2_is_id,
+};
+
+#define QUOTABLOCK_BITS 10
+#define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS)
-#define GETIDINDEX(id, depth) (((id) >> ((V2_DQTREEDEPTH-(depth)-1)*8)) & 0xff)
-#define GETENTRIES(buf) ((struct v2_disk_dqblk *)(((char *)buf)+sizeof(struct v2_disk_dqdbheader)))
+static inline qsize_t v2_stoqb(qsize_t space)
+{
+ return (space + QUOTABLOCK_SIZE - 1) >> QUOTABLOCK_BITS;
+}
+
+static inline qsize_t v2_qbtos(qsize_t blocks)
+{
+ return blocks << QUOTABLOCK_BITS;
+}
/* Check whether given file is really vfsv0 quotafile */
static int v2_check_quota_file(struct super_block *sb, int type)
@@ -50,7 +70,8 @@ static int v2_check_quota_file(struct super_block *sb, int type)
static int v2_read_file_info(struct super_block *sb, int type)
{
struct v2_disk_dqinfo dinfo;
- struct mem_dqinfo *info = sb_dqopt(sb)->info+type;
+ struct mem_dqinfo *info = sb_dqinfo(sb, type);
+ struct qtree_mem_dqinfo *qinfo;
ssize_t size;
size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
@@ -60,15 +81,29 @@ static int v2_read_file_info(struct super_block *sb, int type)
sb->s_id);
return -1;
}
+ info->dqi_priv = kmalloc(sizeof(struct qtree_mem_dqinfo), GFP_NOFS);
+ if (!info->dqi_priv) {
+ printk(KERN_WARNING
+ "Not enough memory for quota information structure.\n");
+ return -1;
+ }
+ qinfo = info->dqi_priv;
/* limits are stored as unsigned 32-bit data */
info->dqi_maxblimit = 0xffffffff;
info->dqi_maxilimit = 0xffffffff;
info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
info->dqi_flags = le32_to_cpu(dinfo.dqi_flags);
- info->u.v2_i.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
- info->u.v2_i.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
- info->u.v2_i.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
+ qinfo->dqi_sb = sb;
+ qinfo->dqi_type = type;
+ qinfo->dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
+ qinfo->dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
+ qinfo->dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
+ qinfo->dqi_blocksize_bits = V2_DQBLKSIZE_BITS;
+ qinfo->dqi_usable_bs = 1 << V2_DQBLKSIZE_BITS;
+ qinfo->dqi_qtree_depth = qtree_depth(qinfo);
+ qinfo->dqi_entry_size = sizeof(struct v2_disk_dqblk);
+ qinfo->dqi_ops = &v2_qtree_ops;
return 0;
}
@@ -76,7 +111,8 @@ static int v2_read_file_info(struct super_block *sb, int type)
static int v2_write_file_info(struct super_block *sb, int type)
{
struct v2_disk_dqinfo dinfo;
- struct mem_dqinfo *info = sb_dqopt(sb)->info+type;
+ struct mem_dqinfo *info = sb_dqinfo(sb, type);
+ struct qtree_mem_dqinfo *qinfo = info->dqi_priv;
ssize_t size;
spin_lock(&dq_data_lock);
@@ -85,9 +121,9 @@ static int v2_write_file_info(struct super_block *sb, int type)
dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace);
dinfo.dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK);
spin_unlock(&dq_data_lock);
- dinfo.dqi_blocks = cpu_to_le32(info->u.v2_i.dqi_blocks);
- dinfo.dqi_free_blk = cpu_to_le32(info->u.v2_i.dqi_free_blk);
- dinfo.dqi_free_entry = cpu_to_le32(info->u.v2_i.dqi_free_entry);
+ dinfo.dqi_blocks = cpu_to_le32(qinfo->dqi_blocks);
+ dinfo.dqi_free_blk = cpu_to_le32(qinfo->dqi_free_blk);
+ dinfo.dqi_free_entry = cpu_to_le32(qinfo->dqi_free_entry);
size = sb->s_op->quota_write(sb, type, (char *)&dinfo,
sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
if (size != sizeof(struct v2_disk_dqinfo)) {
@@ -98,574 +134,75 @@ static int v2_write_file_info(struct super_block *sb, int type)
return 0;
}
-static void disk2memdqb(struct mem_dqblk *m, struct v2_disk_dqblk *d)
+static void v2_disk2memdqb(struct dquot *dquot, void *dp)
{
+ struct v2_disk_dqblk *d = dp, empty;
+ struct mem_dqblk *m = &dquot->dq_dqb;
+
m->dqb_ihardlimit = le32_to_cpu(d->dqb_ihardlimit);
m->dqb_isoftlimit = le32_to_cpu(d->dqb_isoftlimit);
m->dqb_curinodes = le32_to_cpu(d->dqb_curinodes);
m->dqb_itime = le64_to_cpu(d->dqb_itime);
- m->dqb_bhardlimit = le32_to_cpu(d->dqb_bhardlimit);
- m->dqb_bsoftlimit = le32_to_cpu(d->dqb_bsoftlimit);
+ m->dqb_bhardlimit = v2_qbtos(le32_to_cpu(d->dqb_bhardlimit));
+ m->dqb_bsoftlimit = v2_qbtos(le32_to_cpu(d->dqb_bsoftlimit));
m->dqb_curspace = le64_to_cpu(d->dqb_curspace);
m->dqb_btime = le64_to_cpu(d->dqb_btime);
+ /* We need to escape back all-zero structure */
+ memset(&empty, 0, sizeof(struct v2_disk_dqblk));
+ empty.dqb_itime = cpu_to_le64(1);
+ if (!memcmp(&empty, dp, sizeof(struct v2_disk_dqblk)))
+ m->dqb_itime = 0;
}
-static void mem2diskdqb(struct v2_disk_dqblk *d, struct mem_dqblk *m, qid_t id)
+static void v2_mem2diskdqb(void *dp, struct dquot *dquot)
{
+ struct v2_disk_dqblk *d = dp;
+ struct mem_dqblk *m = &dquot->dq_dqb;
+ struct qtree_mem_dqinfo *info =
+ sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
+
d->dqb_ihardlimit = cpu_to_le32(m->dqb_ihardlimit);
d->dqb_isoftlimit = cpu_to_le32(m->dqb_isoftlimit);
d->dqb_curinodes = cpu_to_le32(m->dqb_curinodes);
d->dqb_itime = cpu_to_le64(m->dqb_itime);
- d->dqb_bhardlimit = cpu_to_le32(m->dqb_bhardlimit);
- d->dqb_bsoftlimit = cpu_to_le32(m->dqb_bsoftlimit);
+ d->dqb_bhardlimit = cpu_to_le32(v2_stoqb(m->dqb_bhardlimit));
+ d->dqb_bsoftlimit = cpu_to_le32(v2_stoqb(m->dqb_bsoftlimit));
d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
d->dqb_btime = cpu_to_le64(m->dqb_btime);
- d->dqb_id = cpu_to_le32(id);
-}
-
-static dqbuf_t getdqbuf(void)
-{
- dqbuf_t buf = kmalloc(V2_DQBLKSIZE, GFP_NOFS);
- if (!buf)
- printk(KERN_WARNING "VFS: Not enough memory for quota buffers.\n");
- return buf;
-}
-
-static inline void freedqbuf(dqbuf_t buf)
-{
- kfree(buf);
-}
-
-static inline ssize_t read_blk(struct super_block *sb, int type, uint blk, dqbuf_t buf)
-{
- memset(buf, 0, V2_DQBLKSIZE);
- return sb->s_op->quota_read(sb, type, (char *)buf,
- V2_DQBLKSIZE, blk << V2_DQBLKSIZE_BITS);
-}
-
-static inline ssize_t write_blk(struct super_block *sb, int type, uint blk, dqbuf_t buf)
-{
- return sb->s_op->quota_write(sb, type, (char *)buf,
- V2_DQBLKSIZE, blk << V2_DQBLKSIZE_BITS);
-}
-
-/* Remove empty block from list and return it */
-static int get_free_dqblk(struct super_block *sb, int type)
-{
- dqbuf_t buf = getdqbuf();
- struct mem_dqinfo *info = sb_dqinfo(sb, type);
- struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf;
- int ret, blk;
-
- if (!buf)
- return -ENOMEM;
- if (info->u.v2_i.dqi_free_blk) {
- blk = info->u.v2_i.dqi_free_blk;
- if ((ret = read_blk(sb, type, blk, buf)) < 0)
- goto out_buf;
- info->u.v2_i.dqi_free_blk = le32_to_cpu(dh->dqdh_next_free);
- }
- else {
- memset(buf, 0, V2_DQBLKSIZE);
- /* Assure block allocation... */
- if ((ret = write_blk(sb, type, info->u.v2_i.dqi_blocks, buf)) < 0)
- goto out_buf;
- blk = info->u.v2_i.dqi_blocks++;
- }
- mark_info_dirty(sb, type);
- ret = blk;
-out_buf:
- freedqbuf(buf);
- return ret;
-}
-
-/* Insert empty block to the list */
-static int put_free_dqblk(struct super_block *sb, int type, dqbuf_t buf, uint blk)
-{
- struct mem_dqinfo *info = sb_dqinfo(sb, type);
- struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf;
- int err;
-
- dh->dqdh_next_free = cpu_to_le32(info->u.v2_i.dqi_free_blk);
- dh->dqdh_prev_free = cpu_to_le32(0);
- dh->dqdh_entries = cpu_to_le16(0);
- info->u.v2_i.dqi_free_blk = blk;
- mark_info_dirty(sb, type);
- /* Some strange block. We had better leave it... */
- if ((err = write_blk(sb, type, blk, buf)) < 0)
- return err;
- return 0;
+ d->dqb_id = cpu_to_le32(dquot->dq_id);
+ if (qtree_entry_unused(info, dp))
+ d->dqb_itime = cpu_to_le64(1);
}
-/* Remove given block from the list of blocks with free entries */
-static int remove_free_dqentry(struct super_block *sb, int type, dqbuf_t buf, uint blk)
+static int v2_is_id(void *dp, struct dquot *dquot)
{
- dqbuf_t tmpbuf = getdqbuf();
- struct mem_dqinfo *info = sb_dqinfo(sb, type);
- struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf;
- uint nextblk = le32_to_cpu(dh->dqdh_next_free), prevblk = le32_to_cpu(dh->dqdh_prev_free);
- int err;
+ struct v2_disk_dqblk *d = dp;
+ struct qtree_mem_dqinfo *info =
+ sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
- if (!tmpbuf)
- return -ENOMEM;
- if (nextblk) {
- if ((err = read_blk(sb, type, nextblk, tmpbuf)) < 0)
- goto out_buf;
- ((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_prev_free = dh->dqdh_prev_free;
- if ((err = write_blk(sb, type, nextblk, tmpbuf)) < 0)
- goto out_buf;
- }
- if (prevblk) {
- if ((err = read_blk(sb, type, prevblk, tmpbuf)) < 0)
- goto out_buf;
- ((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_next_free = dh->dqdh_next_free;
- if ((err = write_blk(sb, type, prevblk, tmpbuf)) < 0)
- goto out_buf;
- }
- else {
- info->u.v2_i.dqi_free_entry = nextblk;
- mark_info_dirty(sb, type);
- }
- freedqbuf(tmpbuf);
- dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0);
- /* No matter whether write succeeds block is out of list */
- if (write_blk(sb, type, blk, buf) < 0)
- printk(KERN_ERR "VFS: Can't write block (%u) with free entries.\n", blk);
- return 0;
-out_buf:
- freedqbuf(tmpbuf);
- return err;
-}
-
-/* Insert given block to the beginning of list with free entries */
-static int insert_free_dqentry(struct super_block *sb, int type, dqbuf_t buf, uint blk)
-{
- dqbuf_t tmpbuf = getdqbuf();
- struct mem_dqinfo *info = sb_dqinfo(sb, type);
- struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf;
- int err;
-
- if (!tmpbuf)
- return -ENOMEM;
- dh->dqdh_next_free = cpu_to_le32(info->u.v2_i.dqi_free_entry);
- dh->dqdh_prev_free = cpu_to_le32(0);
- if ((err = write_blk(sb, type, blk, buf)) < 0)
- goto out_buf;
- if (info->u.v2_i.dqi_free_entry) {
- if ((err = read_blk(sb, type, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0)
- goto out_buf;
- ((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_prev_free = cpu_to_le32(blk);
- if ((err = write_blk(sb, type, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0)
- goto out_buf;
- }
- freedqbuf(tmpbuf);
- info->u.v2_i.dqi_free_entry = blk;
- mark_info_dirty(sb, type);
- return 0;
-out_buf:
- freedqbuf(tmpbuf);
- return err;
-}
-
-/* Find space for dquot */
-static uint find_free_dqentry(struct dquot *dquot, int *err)
-{
- struct super_block *sb = dquot->dq_sb;
- struct mem_dqinfo *info = sb_dqopt(sb)->info+dquot->dq_type;
- uint blk, i;
- struct v2_disk_dqdbheader *dh;
- struct v2_disk_dqblk *ddquot;
- struct v2_disk_dqblk fakedquot;
- dqbuf_t buf;
-
- *err = 0;
- if (!(buf = getdqbuf())) {
- *err = -ENOMEM;
+ if (qtree_entry_unused(info, dp))
return 0;
- }
- dh = (struct v2_disk_dqdbheader *)buf;
- ddquot = GETENTRIES(buf);
- if (info->u.v2_i.dqi_free_entry) {
- blk = info->u.v2_i.dqi_free_entry;
- if ((*err = read_blk(sb, dquot->dq_type, blk, buf)) < 0)
- goto out_buf;
- }
- else {
- blk = get_free_dqblk(sb, dquot->dq_type);
- if ((int)blk < 0) {
- *err = blk;
- freedqbuf(buf);
- return 0;
- }
- memset(buf, 0, V2_DQBLKSIZE);
- /* This is enough as block is already zeroed and entry list is empty... */
- info->u.v2_i.dqi_free_entry = blk;
- mark_info_dirty(sb, dquot->dq_type);
- }
- if (le16_to_cpu(dh->dqdh_entries)+1 >= V2_DQSTRINBLK) /* Block will be full? */
- if ((*err = remove_free_dqentry(sb, dquot->dq_type, buf, blk)) < 0) {
- printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk);
- goto out_buf;
- }
- le16_add_cpu(&dh->dqdh_entries, 1);
- memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
- /* Find free structure in block */
- for (i = 0; i < V2_DQSTRINBLK && memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)); i++);
-#ifdef __QUOTA_V2_PARANOIA
- if (i == V2_DQSTRINBLK) {
- printk(KERN_ERR "VFS: find_free_dqentry(): Data block full but it shouldn't.\n");
- *err = -EIO;
- goto out_buf;
- }
-#endif
- if ((*err = write_blk(sb, dquot->dq_type, blk, buf)) < 0) {
- printk(KERN_ERR "VFS: find_free_dqentry(): Can't write quota data block %u.\n", blk);
- goto out_buf;
- }
- dquot->dq_off = (blk<<V2_DQBLKSIZE_BITS)+sizeof(struct v2_disk_dqdbheader)+i*sizeof(struct v2_disk_dqblk);
- freedqbuf(buf);
- return blk;
-out_buf:
- freedqbuf(buf);
- return 0;
-}
-
-/* Insert reference to structure into the trie */
-static int do_insert_tree(struct dquot *dquot, uint *treeblk, int depth)
-{
- struct super_block *sb = dquot->dq_sb;
- dqbuf_t buf;
- int ret = 0, newson = 0, newact = 0;
- __le32 *ref;
- uint newblk;
-
- if (!(buf = getdqbuf()))
- return -ENOMEM;
- if (!*treeblk) {
- ret = get_free_dqblk(sb, dquot->dq_type);
- if (ret < 0)
- goto out_buf;
- *treeblk = ret;
- memset(buf, 0, V2_DQBLKSIZE);
- newact = 1;
- }
- else {
- if ((ret = read_blk(sb, dquot->dq_type, *treeblk, buf)) < 0) {
- printk(KERN_ERR "VFS: Can't read tree quota block %u.\n", *treeblk);
- goto out_buf;
- }
- }
- ref = (__le32 *)buf;
- newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
- if (!newblk)
- newson = 1;
- if (depth == V2_DQTREEDEPTH-1) {
-#ifdef __QUOTA_V2_PARANOIA
- if (newblk) {
- printk(KERN_ERR "VFS: Inserting already present quota entry (block %u).\n", le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]));
- ret = -EIO;
- goto out_buf;
- }
-#endif
- newblk = find_free_dqentry(dquot, &ret);
- }
- else
- ret = do_insert_tree(dquot, &newblk, depth+1);
- if (newson && ret >= 0) {
- ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(newblk);
- ret = write_blk(sb, dquot->dq_type, *treeblk, buf);
- }
- else if (newact && ret < 0)
- put_free_dqblk(sb, dquot->dq_type, buf, *treeblk);
-out_buf:
- freedqbuf(buf);
- return ret;
+ return le32_to_cpu(d->dqb_id) == dquot->dq_id;
}
-/* Wrapper for inserting quota structure into tree */
-static inline int dq_insert_tree(struct dquot *dquot)
+static int v2_read_dquot(struct dquot *dquot)
{
- int tmp = V2_DQTREEOFF;
- return do_insert_tree(dquot, &tmp, 0);
+ return qtree_read_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv, dquot);
}
-/*
- * We don't have to be afraid of deadlocks as we never have quotas on quota files...
- */
static int v2_write_dquot(struct dquot *dquot)
{
- int type = dquot->dq_type;
- ssize_t ret;
- struct v2_disk_dqblk ddquot, empty;
-
- /* dq_off is guarded by dqio_mutex */
- if (!dquot->dq_off)
- if ((ret = dq_insert_tree(dquot)) < 0) {
- printk(KERN_ERR "VFS: Error %zd occurred while creating quota.\n", ret);
- return ret;
- }
- spin_lock(&dq_data_lock);
- mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id);
- /* Argh... We may need to write structure full of zeroes but that would be
- * treated as an empty place by the rest of the code. Format change would
- * be definitely cleaner but the problems probably are not worth it */
- memset(&empty, 0, sizeof(struct v2_disk_dqblk));
- if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
- ddquot.dqb_itime = cpu_to_le64(1);
- spin_unlock(&dq_data_lock);
- ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type,
- (char *)&ddquot, sizeof(struct v2_disk_dqblk), dquot->dq_off);
- if (ret != sizeof(struct v2_disk_dqblk)) {
- printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", dquot->dq_sb->s_id);
- if (ret >= 0)
- ret = -ENOSPC;
- }
- else
- ret = 0;
- dqstats.writes++;
-
- return ret;
+ return qtree_write_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv, dquot);
}
-/* Free dquot entry in data block */
-static int free_dqentry(struct dquot *dquot, uint blk)
-{
- struct super_block *sb = dquot->dq_sb;
- int type = dquot->dq_type;
- struct v2_disk_dqdbheader *dh;
- dqbuf_t buf = getdqbuf();
- int ret = 0;
-
- if (!buf)
- return -ENOMEM;
- if (dquot->dq_off >> V2_DQBLKSIZE_BITS != blk) {
- printk(KERN_ERR "VFS: Quota structure has offset to other "
- "block (%u) than it should (%u).\n", blk,
- (uint)(dquot->dq_off >> V2_DQBLKSIZE_BITS));
- goto out_buf;
- }
- if ((ret = read_blk(sb, type, blk, buf)) < 0) {
- printk(KERN_ERR "VFS: Can't read quota data block %u\n", blk);
- goto out_buf;
- }
- dh = (struct v2_disk_dqdbheader *)buf;
- le16_add_cpu(&dh->dqdh_entries, -1);
- if (!le16_to_cpu(dh->dqdh_entries)) { /* Block got free? */
- if ((ret = remove_free_dqentry(sb, type, buf, blk)) < 0 ||
- (ret = put_free_dqblk(sb, type, buf, blk)) < 0) {
- printk(KERN_ERR "VFS: Can't move quota data block (%u) "
- "to free list.\n", blk);
- goto out_buf;
- }
- }
- else {
- memset(buf+(dquot->dq_off & ((1 << V2_DQBLKSIZE_BITS)-1)), 0,
- sizeof(struct v2_disk_dqblk));
- if (le16_to_cpu(dh->dqdh_entries) == V2_DQSTRINBLK-1) {
- /* Insert will write block itself */
- if ((ret = insert_free_dqentry(sb, type, buf, blk)) < 0) {
- printk(KERN_ERR "VFS: Can't insert quota data block (%u) to free entry list.\n", blk);
- goto out_buf;
- }
- }
- else
- if ((ret = write_blk(sb, type, blk, buf)) < 0) {
- printk(KERN_ERR "VFS: Can't write quota data "
- "block %u\n", blk);
- goto out_buf;
- }
- }
- dquot->dq_off = 0; /* Quota is now unattached */
-out_buf:
- freedqbuf(buf);
- return ret;
-}
-
-/* Remove reference to dquot from tree */
-static int remove_tree(struct dquot *dquot, uint *blk, int depth)
-{
- struct super_block *sb = dquot->dq_sb;
- int type = dquot->dq_type;
- dqbuf_t buf = getdqbuf();
- int ret = 0;
- uint newblk;
- __le32 *ref = (__le32 *)buf;
-
- if (!buf)
- return -ENOMEM;
- if ((ret = read_blk(sb, type, *blk, buf)) < 0) {
- printk(KERN_ERR "VFS: Can't read quota data block %u\n", *blk);
- goto out_buf;
- }
- newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
- if (depth == V2_DQTREEDEPTH-1) {
- ret = free_dqentry(dquot, newblk);
- newblk = 0;
- }
- else
- ret = remove_tree(dquot, &newblk, depth+1);
- if (ret >= 0 && !newblk) {
- int i;
- ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(0);
- for (i = 0; i < V2_DQBLKSIZE && !buf[i]; i++); /* Block got empty? */
- /* Don't put the root block into the free block list */
- if (i == V2_DQBLKSIZE && *blk != V2_DQTREEOFF) {
- put_free_dqblk(sb, type, buf, *blk);
- *blk = 0;
- }
- else
- if ((ret = write_blk(sb, type, *blk, buf)) < 0)
- printk(KERN_ERR "VFS: Can't write quota tree "
- "block %u.\n", *blk);
- }
-out_buf:
- freedqbuf(buf);
- return ret;
-}
-
-/* Delete dquot from tree */
-static int v2_delete_dquot(struct dquot *dquot)
-{
- uint tmp = V2_DQTREEOFF;
-
- if (!dquot->dq_off) /* Even not allocated? */
- return 0;
- return remove_tree(dquot, &tmp, 0);
-}
-
-/* Find entry in block */
-static loff_t find_block_dqentry(struct dquot *dquot, uint blk)
-{
- dqbuf_t buf = getdqbuf();
- loff_t ret = 0;
- int i;
- struct v2_disk_dqblk *ddquot = GETENTRIES(buf);
-
- if (!buf)
- return -ENOMEM;
- if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) {
- printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
- goto out_buf;
- }
- if (dquot->dq_id)
- for (i = 0; i < V2_DQSTRINBLK &&
- le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id; i++);
- else { /* ID 0 as a bit more complicated searching... */
- struct v2_disk_dqblk fakedquot;
-
- memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
- for (i = 0; i < V2_DQSTRINBLK; i++)
- if (!le32_to_cpu(ddquot[i].dqb_id) &&
- memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)))
- break;
- }
- if (i == V2_DQSTRINBLK) {
- printk(KERN_ERR "VFS: Quota for id %u referenced "
- "but not present.\n", dquot->dq_id);
- ret = -EIO;
- goto out_buf;
- }
- else
- ret = (blk << V2_DQBLKSIZE_BITS) + sizeof(struct
- v2_disk_dqdbheader) + i * sizeof(struct v2_disk_dqblk);
-out_buf:
- freedqbuf(buf);
- return ret;
-}
-
-/* Find entry for given id in the tree */
-static loff_t find_tree_dqentry(struct dquot *dquot, uint blk, int depth)
-{
- dqbuf_t buf = getdqbuf();
- loff_t ret = 0;
- __le32 *ref = (__le32 *)buf;
-
- if (!buf)
- return -ENOMEM;
- if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) {
- printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
- goto out_buf;
- }
- ret = 0;
- blk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
- if (!blk) /* No reference? */
- goto out_buf;
- if (depth < V2_DQTREEDEPTH-1)
- ret = find_tree_dqentry(dquot, blk, depth+1);
- else
- ret = find_block_dqentry(dquot, blk);
-out_buf:
- freedqbuf(buf);
- return ret;
-}
-
-/* Find entry for given id in the tree - wrapper function */
-static inline loff_t find_dqentry(struct dquot *dquot)
-{
- return find_tree_dqentry(dquot, V2_DQTREEOFF, 0);
-}
-
-static int v2_read_dquot(struct dquot *dquot)
+static int v2_release_dquot(struct dquot *dquot)
{
- int type = dquot->dq_type;
- loff_t offset;
- struct v2_disk_dqblk ddquot, empty;
- int ret = 0;
-
-#ifdef __QUOTA_V2_PARANOIA
- /* Invalidated quota? */
- if (!dquot->dq_sb || !sb_dqopt(dquot->dq_sb)->files[type]) {
- printk(KERN_ERR "VFS: Quota invalidated while reading!\n");
- return -EIO;
- }
-#endif
- offset = find_dqentry(dquot);
- if (offset <= 0) { /* Entry not present? */
- if (offset < 0)
- printk(KERN_ERR "VFS: Can't read quota "
- "structure for id %u.\n", dquot->dq_id);
- dquot->dq_off = 0;
- set_bit(DQ_FAKE_B, &dquot->dq_flags);
- memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk));
- ret = offset;
- }
- else {
- dquot->dq_off = offset;
- if ((ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type,
- (char *)&ddquot, sizeof(struct v2_disk_dqblk), offset))
- != sizeof(struct v2_disk_dqblk)) {
- if (ret >= 0)
- ret = -EIO;
- printk(KERN_ERR "VFS: Error while reading quota "
- "structure for id %u.\n", dquot->dq_id);
- memset(&ddquot, 0, sizeof(struct v2_disk_dqblk));
- }
- else {
- ret = 0;
- /* We need to escape back all-zero structure */
- memset(&empty, 0, sizeof(struct v2_disk_dqblk));
- empty.dqb_itime = cpu_to_le64(1);
- if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
- ddquot.dqb_itime = 0;
- }
- disk2memdqb(&dquot->dq_dqb, &ddquot);
- if (!dquot->dq_dqb.dqb_bhardlimit &&
- !dquot->dq_dqb.dqb_bsoftlimit &&
- !dquot->dq_dqb.dqb_ihardlimit &&
- !dquot->dq_dqb.dqb_isoftlimit)
- set_bit(DQ_FAKE_B, &dquot->dq_flags);
- }
- dqstats.reads++;
-
- return ret;
+ return qtree_release_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv, dquot);
}
-/* Check whether dquot should not be deleted. We know we are
- * the only one operating on dquot (thanks to dq_lock) */
-static int v2_release_dquot(struct dquot *dquot)
+static int v2_free_file_info(struct super_block *sb, int type)
{
- if (test_bit(DQ_FAKE_B, &dquot->dq_flags) && !(dquot->dq_dqb.dqb_curinodes | dquot->dq_dqb.dqb_curspace))
- return v2_delete_dquot(dquot);
+ kfree(sb_dqinfo(sb, type)->dqi_priv);
return 0;
}
@@ -673,7 +210,7 @@ static struct quota_format_ops v2_format_ops = {
.check_quota_file = v2_check_quota_file,
.read_file_info = v2_read_file_info,
.write_file_info = v2_write_file_info,
- .free_file_info = NULL,
+ .free_file_info = v2_free_file_info,
.read_dqblk = v2_read_dquot,
.commit_dqblk = v2_write_dquot,
.release_dqblk = v2_release_dquot,
diff --git a/fs/quotaio_v1.h b/fs/quotaio_v1.h
new file mode 100644
index 000000000000..746654b5de70
--- /dev/null
+++ b/fs/quotaio_v1.h
@@ -0,0 +1,33 @@
+#ifndef _LINUX_QUOTAIO_V1_H
+#define _LINUX_QUOTAIO_V1_H
+
+#include <linux/types.h>
+
+/*
+ * The following constants define the amount of time given a user
+ * before the soft limits are treated as hard limits (usually resulting
+ * in an allocation failure). The timer is started when the user crosses
+ * their soft limit, it is reset when they go below their soft limit.
+ */
+#define MAX_IQ_TIME 604800 /* (7*24*60*60) 1 week */
+#define MAX_DQ_TIME 604800 /* (7*24*60*60) 1 week */
+
+/*
+ * The following structure defines the format of the disk quota file
+ * (as it appears on disk) - the file is an array of these structures
+ * indexed by user or group number.
+ */
+struct v1_disk_dqblk {
+ __u32 dqb_bhardlimit; /* absolute limit on disk blks alloc */
+ __u32 dqb_bsoftlimit; /* preferred limit on disk blks */
+ __u32 dqb_curblocks; /* current block count */
+ __u32 dqb_ihardlimit; /* absolute limit on allocated inodes */
+ __u32 dqb_isoftlimit; /* preferred inode limit */
+ __u32 dqb_curinodes; /* current # allocated inodes */
+ time_t dqb_btime; /* time limit for excessive disk use */
+ time_t dqb_itime; /* time limit for excessive inode use */
+};
+
+#define v1_dqoff(UID) ((loff_t)((UID) * sizeof (struct v1_disk_dqblk)))
+
+#endif /* _LINUX_QUOTAIO_V1_H */
diff --git a/fs/quotaio_v2.h b/fs/quotaio_v2.h
new file mode 100644
index 000000000000..530fe580685c
--- /dev/null
+++ b/fs/quotaio_v2.h
@@ -0,0 +1,60 @@
+/*
+ * Definitions of structures for vfsv0 quota format
+ */
+
+#ifndef _LINUX_QUOTAIO_V2_H
+#define _LINUX_QUOTAIO_V2_H
+
+#include <linux/types.h>
+#include <linux/quota.h>
+
+/*
+ * Definitions of magics and versions of current quota files
+ */
+#define V2_INITQMAGICS {\
+ 0xd9c01f11, /* USRQUOTA */\
+ 0xd9c01927 /* GRPQUOTA */\
+}
+
+#define V2_INITQVERSIONS {\
+ 0, /* USRQUOTA */\
+ 0 /* GRPQUOTA */\
+}
+
+/* First generic header */
+struct v2_disk_dqheader {
+ __le32 dqh_magic; /* Magic number identifying file */
+ __le32 dqh_version; /* File version */
+};
+
+/*
+ * The following structure defines the format of the disk quota file
+ * (as it appears on disk) - the file is a radix tree whose leaves point
+ * to blocks of these structures.
+ */
+struct v2_disk_dqblk {
+ __le32 dqb_id; /* id this quota applies to */
+ __le32 dqb_ihardlimit; /* absolute limit on allocated inodes */
+ __le32 dqb_isoftlimit; /* preferred inode limit */
+ __le32 dqb_curinodes; /* current # allocated inodes */
+ __le32 dqb_bhardlimit; /* absolute limit on disk space (in QUOTABLOCK_SIZE) */
+ __le32 dqb_bsoftlimit; /* preferred limit on disk space (in QUOTABLOCK_SIZE) */
+ __le64 dqb_curspace; /* current space occupied (in bytes) */
+ __le64 dqb_btime; /* time limit for excessive disk use */
+ __le64 dqb_itime; /* time limit for excessive inode use */
+};
+
+/* Header with type and version specific information */
+struct v2_disk_dqinfo {
+ __le32 dqi_bgrace; /* Time before block soft limit becomes hard limit */
+ __le32 dqi_igrace; /* Time before inode soft limit becomes hard limit */
+ __le32 dqi_flags; /* Flags for quotafile (DQF_*) */
+ __le32 dqi_blocks; /* Number of blocks in file */
+ __le32 dqi_free_blk; /* Number of first free block in the list */
+ __le32 dqi_free_entry; /* Number of block with at least one free entry */
+};
+
+#define V2_DQINFOOFF sizeof(struct v2_disk_dqheader) /* Offset of info header in file */
+#define V2_DQBLKSIZE_BITS 10 /* Size of leaf block in tree */
+
+#endif /* _LINUX_QUOTAIO_V2_H */
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index f031d1c925f0..a83a3518ae33 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -55,8 +55,8 @@ struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev)
if (inode) {
inode->i_mode = mode;
- inode->i_uid = current->fsuid;
- inode->i_gid = current->fsgid;
+ inode->i_uid = current_fsuid();
+ inode->i_gid = current_fsgid();
inode->i_blocks = 0;
inode->i_mapping->a_ops = &ramfs_aops;
inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info;
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index f89ebb943f3f..4f322e5ed840 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -573,7 +573,7 @@ static int new_inode_init(struct inode *inode, struct inode *dir, int mode)
/* the quota init calls have to know who to charge the quota to, so
** we have to set uid and gid here
*/
- inode->i_uid = current->fsuid;
+ inode->i_uid = current_fsuid();
inode->i_mode = mode;
/* Make inode invalid - just in case we are going to drop it before
* the initialization happens */
@@ -584,7 +584,7 @@ static int new_inode_init(struct inode *inode, struct inode *dir, int mode)
if (S_ISDIR(mode))
inode->i_mode |= S_ISGID;
} else {
- inode->i_gid = current->fsgid;
+ inode->i_gid = current_fsgid();
}
DQUOT_INIT(inode);
return 0;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 663a91f5dce8..f863769fa8aa 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -532,6 +532,12 @@ static void init_once(void *foo)
#endif
}
+static void *reiserfs_get_inodes(struct kmem_cache *s, int nr, void **v)
+{
+ return fs_get_inodes(s, nr, v,
+ offsetof(struct reiserfs_inode_info, vfs_inode));
+}
+
static int init_inodecache(void)
{
reiserfs_inode_cachep = kmem_cache_create("reiser_inode_cache",
@@ -542,6 +548,8 @@ static int init_inodecache(void)
init_once);
if (reiserfs_inode_cachep == NULL)
return -ENOMEM;
+ kmem_cache_setup_defrag(reiserfs_inode_cachep,
+ reiserfs_get_inodes, kick_inodes);
return 0;
}
@@ -649,6 +657,8 @@ static struct dquot_operations reiserfs_quota_operations = {
.release_dquot = reiserfs_release_dquot,
.mark_dirty = reiserfs_mark_dquot_dirty,
.write_info = reiserfs_write_info,
+ .alloc_dquot = dquot_alloc,
+ .destroy_dquot = dquot_destroy,
};
static struct quotactl_ops reiserfs_qctl_operations = {
@@ -994,8 +1004,7 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
if (c == 'u' || c == 'g') {
int qtype = c == 'u' ? USRQUOTA : GRPQUOTA;
- if ((sb_any_quota_enabled(s) ||
- sb_any_quota_suspended(s)) &&
+ if (sb_any_quota_loaded(s) &&
(!*arg != !REISERFS_SB(s)->s_qf_names[qtype])) {
reiserfs_warning(s,
"reiserfs_parse_options: cannot change journaled quota options when quota turned on.");
@@ -1041,8 +1050,7 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
"reiserfs_parse_options: unknown quota format specified.");
return 0;
}
- if ((sb_any_quota_enabled(s) ||
- sb_any_quota_suspended(s)) &&
+ if (sb_any_quota_loaded(s) &&
*qfmt != REISERFS_SB(s)->s_jquota_fmt) {
reiserfs_warning(s,
"reiserfs_parse_options: cannot change journaled quota options when quota turned on.");
@@ -1067,7 +1075,7 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
}
/* This checking is not precise wrt the quota type but for our purposes it is sufficient */
if (!(*mount_options & (1 << REISERFS_QUOTA))
- && sb_any_quota_enabled(s)) {
+ && sb_any_quota_loaded(s)) {
reiserfs_warning(s,
"reiserfs_parse_options: quota options must be present when quota is turned on.");
return 0;
diff --git a/fs/seq_file.c b/fs/seq_file.c
index eba2eabcd2b8..f03220d7891b 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -357,7 +357,18 @@ int seq_printf(struct seq_file *m, const char *f, ...)
}
EXPORT_SYMBOL(seq_printf);
-static char *mangle_path(char *s, char *p, char *esc)
+/**
+ * mangle_path - mangle and copy path to buffer beginning
+ * @s: buffer start
+ * @p: beginning of path in above buffer
+ * @esc: set of characters that need escaping
+ *
+ * Copy the path from @p to @s, replacing each occurrence of character from
+ * @esc with usual octal escape.
+ * Returns pointer past last written character in @s, or NULL in case of
+ * failure.
+ */
+char *mangle_path(char *s, char *p, char *esc)
{
while (s <= p) {
char c = *p++;
@@ -376,6 +387,7 @@ static char *mangle_path(char *s, char *p, char *esc)
}
return NULL;
}
+EXPORT_SYMBOL_GPL(mangle_path);
/*
* return the absolute path of 'dentry' residing in mount 'mnt'.
diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c
index 48da4fa6b7d4..e7ddd0328ddc 100644
--- a/fs/smbfs/dir.c
+++ b/fs/smbfs/dir.c
@@ -667,8 +667,7 @@ smb_make_node(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
attr.ia_valid = ATTR_MODE | ATTR_UID | ATTR_GID;
attr.ia_mode = mode;
- attr.ia_uid = current->euid;
- attr.ia_gid = current->egid;
+ current_euid_egid(&attr.ia_uid, &attr.ia_gid);
if (!new_valid_dev(dev))
return -EINVAL;
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 3528f40ffb0f..fc27fbfc5397 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -586,7 +586,7 @@ static int smb_fill_super(struct super_block *sb, void *raw_data, int silent)
if (parse_options(mnt, raw_data))
goto out_bad_option;
}
- mnt->mounted_uid = current->uid;
+ mnt->mounted_uid = current_uid();
smb_setcodepage(server, &mnt->codepage);
/*
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
index ee536e8a649a..9468168b9af5 100644
--- a/fs/smbfs/proc.c
+++ b/fs/smbfs/proc.c
@@ -864,7 +864,7 @@ smb_newconn(struct smb_sb_info *server, struct smb_conn_opt *opt)
goto out;
error = -EACCES;
- if (current->uid != server->mnt->mounted_uid &&
+ if (current_uid() != server->mnt->mounted_uid &&
!capable(CAP_SYS_ADMIN))
goto out;
diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c
index 115ab0d6f4bc..241e9765cfad 100644
--- a/fs/sysv/ialloc.c
+++ b/fs/sysv/ialloc.c
@@ -165,9 +165,9 @@ struct inode * sysv_new_inode(const struct inode * dir, mode_t mode)
if (S_ISDIR(mode))
mode |= S_ISGID;
} else
- inode->i_gid = current->fsgid;
+ inode->i_gid = current_fsgid();
- inode->i_uid = current->fsuid;
+ inode->i_uid = current_fsuid();
inode->i_ino = fs16_to_cpu(sbi, ino);
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
inode->i_blocks = 0;
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 1a4973e10664..4a18f084cc42 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -363,7 +363,7 @@ long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs)
*/
static int can_use_rp(struct ubifs_info *c)
{
- if (current->fsuid == c->rp_uid || capable(CAP_SYS_RESOURCE) ||
+ if (current_fsuid() == c->rp_uid || capable(CAP_SYS_RESOURCE) ||
(c->rp_gid != 0 && in_group_p(c->rp_gid)))
return 1;
return 0;
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index 0a6aa2cc78f0..b49884c8c10e 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -234,8 +234,8 @@ int ubifs_bg_thread(void *info)
int err;
struct ubifs_info *c = info;
- ubifs_msg("background thread \"%s\" started, PID %d",
- c->bgt_name, current->pid);
+ dbg_msg("background thread \"%s\" started, PID %d",
+ c->bgt_name, current->pid);
set_freezable();
while (1) {
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 7186400750e7..510ffa0bbda4 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -101,21 +101,24 @@ static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key,
if (c->key_fmt == UBIFS_SIMPLE_KEY_FMT) {
switch (type) {
case UBIFS_INO_KEY:
- sprintf(p, "(%lu, %s)", key_inum(c, key),
+ sprintf(p, "(%lu, %s)", (unsigned long)key_inum(c, key),
get_key_type(type));
break;
case UBIFS_DENT_KEY:
case UBIFS_XENT_KEY:
- sprintf(p, "(%lu, %s, %#08x)", key_inum(c, key),
+ sprintf(p, "(%lu, %s, %#08x)",
+ (unsigned long)key_inum(c, key),
get_key_type(type), key_hash(c, key));
break;
case UBIFS_DATA_KEY:
- sprintf(p, "(%lu, %s, %u)", key_inum(c, key),
+ sprintf(p, "(%lu, %s, %u)",
+ (unsigned long)key_inum(c, key),
get_key_type(type), key_block(c, key));
break;
case UBIFS_TRUN_KEY:
sprintf(p, "(%lu, %s)",
- key_inum(c, key), get_key_type(type));
+ (unsigned long)key_inum(c, key),
+ get_key_type(type));
break;
default:
sprintf(p, "(bad key type: %#08x, %#08x)",
@@ -364,8 +367,8 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
le32_to_cpu(mst->ihead_lnum));
printk(KERN_DEBUG "\tihead_offs %u\n",
le32_to_cpu(mst->ihead_offs));
- printk(KERN_DEBUG "\tindex_size %u\n",
- le32_to_cpu(mst->index_size));
+ printk(KERN_DEBUG "\tindex_size %llu\n",
+ (unsigned long long)le64_to_cpu(mst->index_size));
printk(KERN_DEBUG "\tlpt_lnum %u\n",
le32_to_cpu(mst->lpt_lnum));
printk(KERN_DEBUG "\tlpt_offs %u\n",
@@ -1589,7 +1592,7 @@ static struct fsck_inode *add_inode(struct ubifs_info *c,
if (inum > c->highest_inum) {
ubifs_err("too high inode number, max. is %lu",
- c->highest_inum);
+ (unsigned long)c->highest_inum);
return ERR_PTR(-EINVAL);
}
@@ -1668,16 +1671,18 @@ static struct fsck_inode *read_add_inode(struct ubifs_info *c,
ino_key_init(c, &key, inum);
err = ubifs_lookup_level0(c, &key, &znode, &n);
if (!err) {
- ubifs_err("inode %lu not found in index", inum);
+ ubifs_err("inode %lu not found in index", (unsigned long)inum);
return ERR_PTR(-ENOENT);
} else if (err < 0) {
- ubifs_err("error %d while looking up inode %lu", err, inum);
+ ubifs_err("error %d while looking up inode %lu",
+ err, (unsigned long)inum);
return ERR_PTR(err);
}
zbr = &znode->zbranch[n];
if (zbr->len < UBIFS_INO_NODE_SZ) {
- ubifs_err("bad node %lu node length %d", inum, zbr->len);
+ ubifs_err("bad node %lu node length %d",
+ (unsigned long)inum, zbr->len);
return ERR_PTR(-EINVAL);
}
@@ -1697,7 +1702,7 @@ static struct fsck_inode *read_add_inode(struct ubifs_info *c,
kfree(ino);
if (IS_ERR(fscki)) {
ubifs_err("error %ld while adding inode %lu node",
- PTR_ERR(fscki), inum);
+ PTR_ERR(fscki), (unsigned long)inum);
return fscki;
}
@@ -1786,7 +1791,8 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
if (IS_ERR(fscki)) {
err = PTR_ERR(fscki);
ubifs_err("error %d while processing data node and "
- "trying to find inode node %lu", err, inum);
+ "trying to find inode node %lu",
+ err, (unsigned long)inum);
goto out_dump;
}
@@ -1819,7 +1825,8 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
if (IS_ERR(fscki)) {
err = PTR_ERR(fscki);
ubifs_err("error %d while processing entry node and "
- "trying to find inode node %lu", err, inum);
+ "trying to find inode node %lu",
+ err, (unsigned long)inum);
goto out_dump;
}
@@ -1832,7 +1839,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
err = PTR_ERR(fscki);
ubifs_err("error %d while processing entry node and "
"trying to find parent inode node %lu",
- err, inum);
+ err, (unsigned long)inum);
goto out_dump;
}
@@ -1923,7 +1930,8 @@ static int check_inodes(struct ubifs_info *c, struct fsck_data *fsckd)
fscki->references != 1) {
ubifs_err("directory inode %lu has %d "
"direntries which refer it, but "
- "should be 1", fscki->inum,
+ "should be 1",
+ (unsigned long)fscki->inum,
fscki->references);
goto out_dump;
}
@@ -1931,27 +1939,29 @@ static int check_inodes(struct ubifs_info *c, struct fsck_data *fsckd)
fscki->references != 0) {
ubifs_err("root inode %lu has non-zero (%d) "
"direntries which refer it",
- fscki->inum, fscki->references);
+ (unsigned long)fscki->inum,
+ fscki->references);
goto out_dump;
}
if (fscki->calc_sz != fscki->size) {
ubifs_err("directory inode %lu size is %lld, "
"but calculated size is %lld",
- fscki->inum, fscki->size,
- fscki->calc_sz);
+ (unsigned long)fscki->inum,
+ fscki->size, fscki->calc_sz);
goto out_dump;
}
if (fscki->calc_cnt != fscki->nlink) {
ubifs_err("directory inode %lu nlink is %d, "
"but calculated nlink is %d",
- fscki->inum, fscki->nlink,
- fscki->calc_cnt);
+ (unsigned long)fscki->inum,
+ fscki->nlink, fscki->calc_cnt);
goto out_dump;
}
} else {
if (fscki->references != fscki->nlink) {
ubifs_err("inode %lu nlink is %d, but "
- "calculated nlink is %d", fscki->inum,
+ "calculated nlink is %d",
+ (unsigned long)fscki->inum,
fscki->nlink, fscki->references);
goto out_dump;
}
@@ -1959,20 +1969,21 @@ static int check_inodes(struct ubifs_info *c, struct fsck_data *fsckd)
if (fscki->xattr_sz != fscki->calc_xsz) {
ubifs_err("inode %lu has xattr size %u, but "
"calculated size is %lld",
- fscki->inum, fscki->xattr_sz,
+ (unsigned long)fscki->inum, fscki->xattr_sz,
fscki->calc_xsz);
goto out_dump;
}
if (fscki->xattr_cnt != fscki->calc_xcnt) {
ubifs_err("inode %lu has %u xattrs, but "
- "calculated count is %lld", fscki->inum,
+ "calculated count is %lld",
+ (unsigned long)fscki->inum,
fscki->xattr_cnt, fscki->calc_xcnt);
goto out_dump;
}
if (fscki->xattr_nms != fscki->calc_xnms) {
ubifs_err("inode %lu has xattr names' size %u, but "
"calculated names' size is %lld",
- fscki->inum, fscki->xattr_nms,
+ (unsigned long)fscki->inum, fscki->xattr_nms,
fscki->calc_xnms);
goto out_dump;
}
@@ -1985,11 +1996,12 @@ out_dump:
ino_key_init(c, &key, fscki->inum);
err = ubifs_lookup_level0(c, &key, &znode, &n);
if (!err) {
- ubifs_err("inode %lu not found in index", fscki->inum);
+ ubifs_err("inode %lu not found in index",
+ (unsigned long)fscki->inum);
return -ENOENT;
} else if (err < 0) {
ubifs_err("error %d while looking up inode %lu",
- err, fscki->inum);
+ err, (unsigned long)fscki->inum);
return err;
}
@@ -2007,7 +2019,7 @@ out_dump:
}
ubifs_msg("dump of the inode %lu sitting in LEB %d:%d",
- fscki->inum, zbr->lnum, zbr->offs);
+ (unsigned long)fscki->inum, zbr->lnum, zbr->offs);
dbg_dump_node(c, ino);
kfree(ino);
return -EINVAL;
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 526c01ec8003..f448ab1f9c38 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -104,13 +104,13 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
*/
inode->i_flags |= (S_NOCMTIME);
- inode->i_uid = current->fsuid;
+ inode->i_uid = current_fsuid();
if (dir->i_mode & S_ISGID) {
inode->i_gid = dir->i_gid;
if (S_ISDIR(mode))
mode |= S_ISGID;
} else
- inode->i_gid = current->fsgid;
+ inode->i_gid = current_fsgid();
inode->i_mode = mode;
inode->i_mtime = inode->i_atime = inode->i_ctime =
ubifs_current_time(inode);
@@ -161,7 +161,7 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
return ERR_PTR(-EINVAL);
}
ubifs_warn("running out of inode numbers (current %lu, max %d)",
- c->highest_inum, INUM_WATERMARK);
+ (unsigned long)c->highest_inum, INUM_WATERMARK);
}
inode->i_ino = ++c->highest_inum;
@@ -428,7 +428,8 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir)
dbg_gen("feed '%s', ino %llu, new f_pos %#x",
dent->name, (unsigned long long)le64_to_cpu(dent->inum),
key_hash_flash(c, &dent->key));
- ubifs_assert(dent->ch.sqnum > ubifs_inode(dir)->creat_sqnum);
+ ubifs_assert(le64_to_cpu(dent->ch.sqnum) >
+ ubifs_inode(dir)->creat_sqnum);
nm.len = le16_to_cpu(dent->nlen);
over = filldir(dirent, dent->name, nm.len, file->f_pos,
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 51cf511d44d9..2624411d9758 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -72,7 +72,7 @@ static int read_block(struct inode *inode, void *addr, unsigned int block,
return err;
}
- ubifs_assert(dn->ch.sqnum > ubifs_inode(inode)->creat_sqnum);
+ ubifs_assert(le64_to_cpu(dn->ch.sqnum) > ubifs_inode(inode)->creat_sqnum);
len = le32_to_cpu(dn->size);
if (len <= 0 || len > UBIFS_BLOCK_SIZE)
@@ -626,7 +626,7 @@ static int populate_page(struct ubifs_info *c, struct page *page,
dn = bu->buf + (bu->zbranch[nn].offs - offs);
- ubifs_assert(dn->ch.sqnum >
+ ubifs_assert(le64_to_cpu(dn->ch.sqnum) >
ubifs_inode(inode)->creat_sqnum);
len = le32_to_cpu(dn->size);
@@ -691,32 +691,22 @@ out_err:
/**
* ubifs_do_bulk_read - do bulk-read.
* @c: UBIFS file-system description object
- * @page1: first page
+ * @bu: bulk-read information
+ * @page1: first page to read
*
* This function returns %1 if the bulk-read is done, otherwise %0 is returned.
*/
-static int ubifs_do_bulk_read(struct ubifs_info *c, struct page *page1)
+static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu,
+ struct page *page1)
{
pgoff_t offset = page1->index, end_index;
struct address_space *mapping = page1->mapping;
struct inode *inode = mapping->host;
struct ubifs_inode *ui = ubifs_inode(inode);
- struct bu_info *bu;
int err, page_idx, page_cnt, ret = 0, n = 0;
+ int allocate = bu->buf ? 0 : 1;
loff_t isize;
- bu = kmalloc(sizeof(struct bu_info), GFP_NOFS);
- if (!bu)
- return 0;
-
- bu->buf_len = c->bulk_read_buf_size;
- bu->buf = kmalloc(bu->buf_len, GFP_NOFS);
- if (!bu->buf)
- goto out_free;
-
- data_key_init(c, &bu->key, inode->i_ino,
- offset << UBIFS_BLOCKS_PER_PAGE_SHIFT);
-
err = ubifs_tnc_get_bu_keys(c, bu);
if (err)
goto out_warn;
@@ -735,12 +725,25 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct page *page1)
* together. If all the pages were like this, bulk-read would
* reduce performance, so we turn it off for a while.
*/
- ui->read_in_a_row = 0;
- ui->bulk_read = 0;
- goto out_free;
+ goto out_bu_off;
}
if (bu->cnt) {
+ if (allocate) {
+ /*
+ * Allocate bulk-read buffer depending on how many data
+ * nodes we are going to read.
+ */
+ bu->buf_len = bu->zbranch[bu->cnt - 1].offs +
+ bu->zbranch[bu->cnt - 1].len -
+ bu->zbranch[0].offs;
+ ubifs_assert(bu->buf_len > 0);
+ ubifs_assert(bu->buf_len <= c->leb_size);
+ bu->buf = kmalloc(bu->buf_len, GFP_NOFS | __GFP_NOWARN);
+ if (!bu->buf)
+ goto out_bu_off;
+ }
+
err = ubifs_tnc_bulk_read(c, bu);
if (err)
goto out_warn;
@@ -779,13 +782,17 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct page *page1)
ui->last_page_read = offset + page_idx - 1;
out_free:
- kfree(bu->buf);
- kfree(bu);
+ if (allocate)
+ kfree(bu->buf);
return ret;
out_warn:
ubifs_warn("ignoring error %d and skipping bulk-read", err);
goto out_free;
+
+out_bu_off:
+ ui->read_in_a_row = ui->bulk_read = 0;
+ goto out_free;
}
/**
@@ -803,18 +810,20 @@ static int ubifs_bulk_read(struct page *page)
struct ubifs_info *c = inode->i_sb->s_fs_info;
struct ubifs_inode *ui = ubifs_inode(inode);
pgoff_t index = page->index, last_page_read = ui->last_page_read;
- int ret = 0;
+ struct bu_info *bu;
+ int err = 0, allocated = 0;
ui->last_page_read = index;
-
if (!c->bulk_read)
return 0;
+
/*
- * Bulk-read is protected by ui_mutex, but it is an optimization, so
- * don't bother if we cannot lock the mutex.
+ * Bulk-read is protected by @ui->ui_mutex, but it is an optimization,
+ * so don't bother if we cannot lock the mutex.
*/
if (!mutex_trylock(&ui->ui_mutex))
return 0;
+
if (index != last_page_read + 1) {
/* Turn off bulk-read if we stop reading sequentially */
ui->read_in_a_row = 1;
@@ -822,6 +831,7 @@ static int ubifs_bulk_read(struct page *page)
ui->bulk_read = 0;
goto out_unlock;
}
+
if (!ui->bulk_read) {
ui->read_in_a_row += 1;
if (ui->read_in_a_row < 3)
@@ -829,10 +839,35 @@ static int ubifs_bulk_read(struct page *page)
/* Three reads in a row, so switch on bulk-read */
ui->bulk_read = 1;
}
- ret = ubifs_do_bulk_read(c, page);
+
+ /*
+ * If possible, try to use pre-allocated bulk-read information, which
+ * is protected by @c->bu_mutex.
+ */
+ if (mutex_trylock(&c->bu_mutex))
+ bu = &c->bu;
+ else {
+ bu = kmalloc(sizeof(struct bu_info), GFP_NOFS | __GFP_NOWARN);
+ if (!bu)
+ goto out_unlock;
+
+ bu->buf = NULL;
+ allocated = 1;
+ }
+
+ bu->buf_len = c->max_bu_buf_len;
+ data_key_init(c, &bu->key, inode->i_ino,
+ page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT);
+ err = ubifs_do_bulk_read(c, bu, page);
+
+ if (!allocated)
+ mutex_unlock(&c->bu_mutex);
+ else
+ kfree(bu);
+
out_unlock:
mutex_unlock(&ui->ui_mutex);
- return ret;
+ return err;
}
static int ubifs_readpage(struct file *file, struct page *page)
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 22993f867d19..f91b745908ea 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -690,8 +690,9 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
int dlen = UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR;
struct ubifs_inode *ui = ubifs_inode(inode);
- dbg_jnl("ino %lu, blk %u, len %d, key %s", key_inum(c, key),
- key_block(c, key), len, DBGKEY(key));
+ dbg_jnl("ino %lu, blk %u, len %d, key %s",
+ (unsigned long)key_inum(c, key), key_block(c, key), len,
+ DBGKEY(key));
ubifs_assert(len <= UBIFS_BLOCK_SIZE);
data = kmalloc(dlen, GFP_NOFS);
@@ -1128,7 +1129,8 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
ino_t inum = inode->i_ino;
unsigned int blk;
- dbg_jnl("ino %lu, size %lld -> %lld", inum, old_size, new_size);
+ dbg_jnl("ino %lu, size %lld -> %lld",
+ (unsigned long)inum, old_size, new_size);
ubifs_assert(!ui->data_len);
ubifs_assert(S_ISREG(inode->i_mode));
ubifs_assert(mutex_is_locked(&ui->ui_mutex));
diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h
index 9ee65086f627..3f1f16bc25c9 100644
--- a/fs/ubifs/key.h
+++ b/fs/ubifs/key.h
@@ -345,7 +345,7 @@ static inline int key_type_flash(const struct ubifs_info *c, const void *k)
{
const union ubifs_key *key = k;
- return le32_to_cpu(key->u32[1]) >> UBIFS_S_KEY_BLOCK_BITS;
+ return le32_to_cpu(key->j32[1]) >> UBIFS_S_KEY_BLOCK_BITS;
}
/**
@@ -416,7 +416,7 @@ static inline unsigned int key_block_flash(const struct ubifs_info *c,
{
const union ubifs_key *key = k;
- return le32_to_cpu(key->u32[1]) & UBIFS_S_KEY_BLOCK_MASK;
+ return le32_to_cpu(key->j32[1]) & UBIFS_S_KEY_BLOCK_MASK;
}
/**
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index eed5a0025d63..a41434b42785 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -571,8 +571,6 @@ static struct ubifs_pnode *next_pnode(struct ubifs_info *c,
/* We assume here that LEB zero is never an LPT LEB */
if (nnode->nbranch[iip].lnum)
return ubifs_get_pnode(c, nnode, iip);
- else
- return NULL;
}
/* Go up while can't go right */
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index 02d3462f4d3e..9bd5a43d4526 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -105,7 +105,7 @@ int ubifs_add_orphan(struct ubifs_info *c, ino_t inum)
list_add_tail(&orphan->list, &c->orph_list);
list_add_tail(&orphan->new_list, &c->orph_new);
spin_unlock(&c->orphan_lock);
- dbg_gen("ino %lu", inum);
+ dbg_gen("ino %lu", (unsigned long)inum);
return 0;
}
@@ -132,14 +132,16 @@ void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum)
else {
if (o->dnext) {
spin_unlock(&c->orphan_lock);
- dbg_gen("deleted twice ino %lu", inum);
+ dbg_gen("deleted twice ino %lu",
+ (unsigned long)inum);
return;
}
if (o->cnext) {
o->dnext = c->orph_dnext;
c->orph_dnext = o;
spin_unlock(&c->orphan_lock);
- dbg_gen("delete later ino %lu", inum);
+ dbg_gen("delete later ino %lu",
+ (unsigned long)inum);
return;
}
rb_erase(p, &c->orph_tree);
@@ -151,12 +153,12 @@ void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum)
}
spin_unlock(&c->orphan_lock);
kfree(o);
- dbg_gen("inum %lu", inum);
+ dbg_gen("inum %lu", (unsigned long)inum);
return;
}
}
spin_unlock(&c->orphan_lock);
- dbg_err("missing orphan ino %lu", inum);
+ dbg_err("missing orphan ino %lu", (unsigned long)inum);
dbg_dump_stack();
}
@@ -448,7 +450,7 @@ static void erase_deleted(struct ubifs_info *c)
rb_erase(&orphan->rb, &c->orph_tree);
list_del(&orphan->list);
c->tot_orphans -= 1;
- dbg_gen("deleting orphan ino %lu", orphan->inum);
+ dbg_gen("deleting orphan ino %lu", (unsigned long)orphan->inum);
kfree(orphan);
}
c->orph_dnext = NULL;
@@ -536,8 +538,8 @@ static int insert_dead_orphan(struct ubifs_info *c, ino_t inum)
list_add_tail(&orphan->list, &c->orph_list);
orphan->dnext = c->orph_dnext;
c->orph_dnext = orphan;
- dbg_mnt("ino %lu, new %d, tot %d",
- inum, c->new_orphans, c->tot_orphans);
+ dbg_mnt("ino %lu, new %d, tot %d", (unsigned long)inum,
+ c->new_orphans, c->tot_orphans);
return 0;
}
@@ -609,7 +611,8 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
n = (le32_to_cpu(orph->ch.len) - UBIFS_ORPH_NODE_SZ) >> 3;
for (i = 0; i < n; i++) {
inum = le64_to_cpu(orph->inos[i]);
- dbg_rcvry("deleting orphaned inode %lu", inum);
+ dbg_rcvry("deleting orphaned inode %lu",
+ (unsigned long)inum);
err = ubifs_tnc_remove_ino(c, inum);
if (err)
return err;
@@ -840,8 +843,8 @@ static int dbg_orphan_check(struct ubifs_info *c, struct ubifs_zbranch *zbr,
if (inum != ci->last_ino) {
/* Lowest node type is the inode node, so it comes first */
if (key_type(c, &zbr->key) != UBIFS_INO_KEY)
- ubifs_err("found orphan node ino %lu, type %d", inum,
- key_type(c, &zbr->key));
+ ubifs_err("found orphan node ino %lu, type %d",
+ (unsigned long)inum, key_type(c, &zbr->key));
ci->last_ino = inum;
ci->tot_inos += 1;
err = ubifs_tnc_read_node(c, zbr, ci->node);
@@ -853,7 +856,8 @@ static int dbg_orphan_check(struct ubifs_info *c, struct ubifs_zbranch *zbr,
/* Must be recorded as an orphan */
if (!dbg_find_check_orphan(&ci->root, inum) &&
!dbg_find_orphan(c, inum)) {
- ubifs_err("missing orphan, ino %lu", inum);
+ ubifs_err("missing orphan, ino %lu",
+ (unsigned long)inum);
ci->missing += 1;
}
}
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 77d26c141cf6..90acac603e63 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -168,12 +168,12 @@ static int write_rcvrd_mst_node(struct ubifs_info *c,
struct ubifs_mst_node *mst)
{
int err = 0, lnum = UBIFS_MST_LNUM, sz = c->mst_node_alsz;
- uint32_t save_flags;
+ __le32 save_flags;
dbg_rcvry("recovery");
save_flags = mst->flags;
- mst->flags = cpu_to_le32(le32_to_cpu(mst->flags) | UBIFS_MST_RCVRY);
+ mst->flags |= cpu_to_le32(UBIFS_MST_RCVRY);
ubifs_prepare_node(c, mst, UBIFS_MST_NODE_SZ, 1);
err = ubi_leb_change(c->ubi, lnum, mst, sz, UBI_SHORTTERM);
@@ -1435,13 +1435,13 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e)
err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN);
if (err)
goto out;
- dbg_rcvry("inode %lu at %d:%d size %lld -> %lld ", e->inum, lnum, offs,
- i_size, e->d_size);
+ dbg_rcvry("inode %lu at %d:%d size %lld -> %lld ",
+ (unsigned long)e->inum, lnum, offs, i_size, e->d_size);
return 0;
out:
ubifs_warn("inode %lu failed to fix size %lld -> %lld error %d",
- e->inum, e->i_size, e->d_size, err);
+ (unsigned long)e->inum, e->i_size, e->d_size, err);
return err;
}
@@ -1472,7 +1472,8 @@ int ubifs_recover_size(struct ubifs_info *c)
return err;
if (err == -ENOENT) {
/* Remove data nodes that have no inode */
- dbg_rcvry("removing ino %lu", e->inum);
+ dbg_rcvry("removing ino %lu",
+ (unsigned long)e->inum);
err = ubifs_tnc_remove_ino(c, e->inum);
if (err)
return err;
@@ -1493,8 +1494,8 @@ int ubifs_recover_size(struct ubifs_info *c)
return PTR_ERR(inode);
if (inode->i_size < e->d_size) {
dbg_rcvry("ino %lu size %lld -> %lld",
- e->inum, e->d_size,
- inode->i_size);
+ (unsigned long)e->inum,
+ e->d_size, inode->i_size);
inode->i_size = e->d_size;
ubifs_inode(inode)->ui_size = e->d_size;
e->inode = inode;
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index 7399692af859..21f7d047c306 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -1065,7 +1065,7 @@ int ubifs_replay_journal(struct ubifs_info *c)
ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery);
dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, "
"highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum,
- c->highest_inum);
+ (unsigned long)c->highest_inum);
out:
destroy_replay_tree(c);
destroy_bud_list(c);
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index 2bf753b38889..0f392351dc5a 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -81,6 +81,7 @@ static int create_default_filesystem(struct ubifs_info *c)
int lpt_lebs, lpt_first, orph_lebs, big_lpt, ino_waste, sup_flags = 0;
int min_leb_cnt = UBIFS_MIN_LEB_CNT;
uint64_t tmp64, main_bytes;
+ __le64 tmp_le64;
/* Some functions called from here depend on the @c->key_len filed */
c->key_len = UBIFS_SK_LEN;
@@ -295,10 +296,10 @@ static int create_default_filesystem(struct ubifs_info *c)
ino->ch.node_type = UBIFS_INO_NODE;
ino->creat_sqnum = cpu_to_le64(++c->max_sqnum);
ino->nlink = cpu_to_le32(2);
- tmp = cpu_to_le64(CURRENT_TIME_SEC.tv_sec);
- ino->atime_sec = tmp;
- ino->ctime_sec = tmp;
- ino->mtime_sec = tmp;
+ tmp_le64 = cpu_to_le64(CURRENT_TIME_SEC.tv_sec);
+ ino->atime_sec = tmp_le64;
+ ino->ctime_sec = tmp_le64;
+ ino->mtime_sec = tmp_le64;
ino->atime_nsec = 0;
ino->ctime_nsec = 0;
ino->mtime_nsec = 0;
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 8780efbf40ac..d80b2aef42b6 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -36,6 +36,12 @@
#include <linux/mount.h>
#include "ubifs.h"
+/*
+ * Maximum amount of memory we may 'kmalloc()' without worrying that we are
+ * allocating too much.
+ */
+#define UBIFS_KMALLOC_OK (128*1024)
+
/* Slab cache for UBIFS inodes */
struct kmem_cache *ubifs_inode_slab;
@@ -561,18 +567,11 @@ static int init_constants_early(struct ubifs_info *c)
* calculations when reporting free space.
*/
c->leb_overhead = c->leb_size % UBIFS_MAX_DATA_NODE_SZ;
- /* Buffer size for bulk-reads */
- c->bulk_read_buf_size = UBIFS_MAX_BULK_READ * UBIFS_MAX_DATA_NODE_SZ;
- if (c->bulk_read_buf_size > c->leb_size)
- c->bulk_read_buf_size = c->leb_size;
- if (c->bulk_read_buf_size > 128 * 1024) {
- /* Check if we can kmalloc more than 128KiB */
- void *try = kmalloc(c->bulk_read_buf_size, GFP_KERNEL);
- kfree(try);
- if (!try)
- c->bulk_read_buf_size = 128 * 1024;
- }
+ /* Buffer size for bulk-reads */
+ c->max_bu_buf_len = UBIFS_MAX_BULK_READ * UBIFS_MAX_DATA_NODE_SZ;
+ if (c->max_bu_buf_len > c->leb_size)
+ c->max_bu_buf_len = c->leb_size;
return 0;
}
@@ -992,6 +991,34 @@ static void destroy_journal(struct ubifs_info *c)
}
/**
+ * bu_init - initialize bulk-read information.
+ * @c: UBIFS file-system description object
+ */
+static void bu_init(struct ubifs_info *c)
+{
+ ubifs_assert(c->bulk_read == 1);
+
+ if (c->bu.buf)
+ return; /* Already initialized */
+
+again:
+ c->bu.buf = kmalloc(c->max_bu_buf_len, GFP_KERNEL | __GFP_NOWARN);
+ if (!c->bu.buf) {
+ if (c->max_bu_buf_len > UBIFS_KMALLOC_OK) {
+ c->max_bu_buf_len = UBIFS_KMALLOC_OK;
+ goto again;
+ }
+
+ /* Just disable bulk-read */
+ ubifs_warn("Cannot allocate %d bytes of memory for bulk-read, "
+ "disabling it", c->max_bu_buf_len);
+ c->mount_opts.bulk_read = 1;
+ c->bulk_read = 0;
+ return;
+ }
+}
+
+/**
* mount_ubifs - mount UBIFS file-system.
* @c: UBIFS file-system description object
*
@@ -1059,6 +1086,13 @@ static int mount_ubifs(struct ubifs_info *c)
goto out_free;
}
+ if (c->bulk_read == 1)
+ bu_init(c);
+
+ /*
+ * We have to check all CRCs, even for data nodes, when we mount the FS
+ * (specifically, when we are replaying).
+ */
c->always_chk_crc = 1;
err = ubifs_read_superblock(c);
@@ -1289,6 +1323,7 @@ out_cbuf:
out_dereg:
dbg_failure_mode_deregistration(c);
out_free:
+ kfree(c->bu.buf);
vfree(c->ileb_buf);
vfree(c->sbuf);
kfree(c->bottom_up_buf);
@@ -1325,10 +1360,11 @@ static void ubifs_umount(struct ubifs_info *c)
kfree(c->cbuf);
kfree(c->rcvrd_mst_node);
kfree(c->mst_node);
+ kfree(c->bu.buf);
+ vfree(c->ileb_buf);
vfree(c->sbuf);
kfree(c->bottom_up_buf);
UBIFS_DBG(vfree(c->dbg_buf));
- vfree(c->ileb_buf);
dbg_failure_mode_deregistration(c);
}
@@ -1626,6 +1662,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
ubifs_err("invalid or unknown remount parameter");
return err;
}
+
if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) {
err = ubifs_remount_rw(c);
if (err)
@@ -1633,6 +1670,14 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
} else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY))
ubifs_remount_ro(c);
+ if (c->bulk_read == 1)
+ bu_init(c);
+ else {
+ dbg_gen("disable bulk-read");
+ kfree(c->bu.buf);
+ c->bu.buf = NULL;
+ }
+
return 0;
}
@@ -1723,6 +1768,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
mutex_init(&c->log_mutex);
mutex_init(&c->mst_mutex);
mutex_init(&c->umount_mutex);
+ mutex_init(&c->bu_mutex);
init_waitqueue_head(&c->cmt_wq);
c->buds = RB_ROOT;
c->old_idx = RB_ROOT;
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index d27fd918b9c9..6eef5344a145 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -1501,7 +1501,12 @@ out:
* @bu: bulk-read parameters and results
*
* Lookup consecutive data node keys for the same inode that reside
- * consecutively in the same LEB.
+ * consecutively in the same LEB. This function returns zero in case of success
+ * and a negative error code in case of failure.
+ *
+ * Note, if the bulk-read buffer length (@bu->buf_len) is known, this function
+ * makes sure bulk-read nodes fit the buffer. Otherwise, this function prepares
+ * maxumum possible amount of nodes for bulk-read.
*/
int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu)
{
@@ -2677,7 +2682,7 @@ int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum)
struct ubifs_dent_node *xent, *pxent = NULL;
struct qstr nm = { .name = NULL };
- dbg_tnc("ino %lu", inum);
+ dbg_tnc("ino %lu", (unsigned long)inum);
/*
* Walk all extended attribute entries and remove them together with
@@ -2697,7 +2702,8 @@ int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum)
}
xattr_inum = le64_to_cpu(xent->inum);
- dbg_tnc("xent '%s', ino %lu", xent->name, xattr_inum);
+ dbg_tnc("xent '%s', ino %lu", xent->name,
+ (unsigned long)xattr_inum);
nm.name = xent->name;
nm.len = le16_to_cpu(xent->nlen);
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index a7bd32fa15b9..46b172560a06 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -753,7 +753,7 @@ struct ubifs_znode {
};
/**
- * struct bu_info - bulk-read information
+ * struct bu_info - bulk-read information.
* @key: first data node key
* @zbranch: zbranches of data nodes to bulk read
* @buf: buffer to read into
@@ -969,7 +969,10 @@ struct ubifs_mount_opts {
* @mst_node: master node
* @mst_offs: offset of valid master node
* @mst_mutex: protects the master node area, @mst_node, and @mst_offs
- * @bulk_read_buf_size: buffer size for bulk-reads
+ *
+ * @max_bu_buf_len: maximum bulk-read buffer length
+ * @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu
+ * @bu: pre-allocated bulk-read information
*
* @log_lebs: number of logical eraseblocks in the log
* @log_bytes: log size in bytes
@@ -1217,7 +1220,10 @@ struct ubifs_info {
struct ubifs_mst_node *mst_node;
int mst_offs;
struct mutex mst_mutex;
- int bulk_read_buf_size;
+
+ int max_bu_buf_len;
+ struct mutex bu_mutex;
+ struct bu_info bu;
int log_lebs;
long long log_bytes;
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 1b809bd494bd..c0e87db7703c 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -87,12 +87,12 @@ static int read_block_bitmap(struct super_block *sb,
{
struct buffer_head *bh = NULL;
int retval = 0;
- kernel_lb_addr loc;
+ struct kernel_lb_addr loc;
loc.logicalBlockNum = bitmap->s_extPosition;
loc.partitionReferenceNum = UDF_SB(sb)->s_partition;
- bh = udf_tread(sb, udf_get_lb_pblock(sb, loc, block));
+ bh = udf_tread(sb, udf_get_lb_pblock(sb, &loc, block));
if (!bh)
retval = -EIO;
@@ -156,11 +156,13 @@ static bool udf_add_free_space(struct udf_sb_info *sbi,
static void udf_bitmap_free_blocks(struct super_block *sb,
struct inode *inode,
struct udf_bitmap *bitmap,
- kernel_lb_addr bloc, uint32_t offset,
+ struct kernel_lb_addr *bloc,
+ uint32_t offset,
uint32_t count)
{
struct udf_sb_info *sbi = UDF_SB(sb);
struct buffer_head *bh = NULL;
+ struct udf_part_map *partmap;
unsigned long block;
unsigned long block_group;
unsigned long bit;
@@ -169,17 +171,17 @@ static void udf_bitmap_free_blocks(struct super_block *sb,
unsigned long overflow;
mutex_lock(&sbi->s_alloc_mutex);
- if (bloc.logicalBlockNum < 0 ||
- (bloc.logicalBlockNum + count) >
- sbi->s_partmaps[bloc.partitionReferenceNum].s_partition_len) {
+ partmap = &sbi->s_partmaps[bloc->partitionReferenceNum];
+ if (bloc->logicalBlockNum < 0 ||
+ (bloc->logicalBlockNum + count) >
+ partmap->s_partition_len) {
udf_debug("%d < %d || %d + %d > %d\n",
- bloc.logicalBlockNum, 0, bloc.logicalBlockNum, count,
- sbi->s_partmaps[bloc.partitionReferenceNum].
- s_partition_len);
+ bloc->logicalBlockNum, 0, bloc->logicalBlockNum,
+ count, partmap->s_partition_len);
goto error_return;
}
- block = bloc.logicalBlockNum + offset +
+ block = bloc->logicalBlockNum + offset +
(sizeof(struct spaceBitmapDesc) << 3);
do {
@@ -425,26 +427,28 @@ error_return:
static void udf_table_free_blocks(struct super_block *sb,
struct inode *inode,
struct inode *table,
- kernel_lb_addr bloc, uint32_t offset,
+ struct kernel_lb_addr *bloc,
+ uint32_t offset,
uint32_t count)
{
struct udf_sb_info *sbi = UDF_SB(sb);
+ struct udf_part_map *partmap;
uint32_t start, end;
uint32_t elen;
- kernel_lb_addr eloc;
+ struct kernel_lb_addr eloc;
struct extent_position oepos, epos;
int8_t etype;
int i;
struct udf_inode_info *iinfo;
mutex_lock(&sbi->s_alloc_mutex);
- if (bloc.logicalBlockNum < 0 ||
- (bloc.logicalBlockNum + count) >
- sbi->s_partmaps[bloc.partitionReferenceNum].s_partition_len) {
+ partmap = &sbi->s_partmaps[bloc->partitionReferenceNum];
+ if (bloc->logicalBlockNum < 0 ||
+ (bloc->logicalBlockNum + count) >
+ partmap->s_partition_len) {
udf_debug("%d < %d || %d + %d > %d\n",
bloc.logicalBlockNum, 0, bloc.logicalBlockNum, count,
- sbi->s_partmaps[bloc.partitionReferenceNum].
- s_partition_len);
+ partmap->s_partition_len);
goto error_return;
}
@@ -456,8 +460,8 @@ static void udf_table_free_blocks(struct super_block *sb,
if (udf_add_free_space(sbi, sbi->s_partition, count))
mark_buffer_dirty(sbi->s_lvid_bh);
- start = bloc.logicalBlockNum + offset;
- end = bloc.logicalBlockNum + offset + count - 1;
+ start = bloc->logicalBlockNum + offset;
+ end = bloc->logicalBlockNum + offset + count - 1;
epos.offset = oepos.offset = sizeof(struct unallocSpaceEntry);
elen = 0;
@@ -483,7 +487,7 @@ static void udf_table_free_blocks(struct super_block *sb,
start += count;
count = 0;
}
- udf_write_aext(table, &oepos, eloc, elen, 1);
+ udf_write_aext(table, &oepos, &eloc, elen, 1);
} else if (eloc.logicalBlockNum == (end + 1)) {
if ((0x3FFFFFFF - elen) <
(count << sb->s_blocksize_bits)) {
@@ -502,7 +506,7 @@ static void udf_table_free_blocks(struct super_block *sb,
end -= count;
count = 0;
}
- udf_write_aext(table, &oepos, eloc, elen, 1);
+ udf_write_aext(table, &oepos, &eloc, elen, 1);
}
if (epos.bh != oepos.bh) {
@@ -532,8 +536,8 @@ static void udf_table_free_blocks(struct super_block *sb,
*/
int adsize;
- short_ad *sad = NULL;
- long_ad *lad = NULL;
+ struct short_ad *sad = NULL;
+ struct long_ad *lad = NULL;
struct allocExtDesc *aed;
eloc.logicalBlockNum = start;
@@ -541,9 +545,9 @@ static void udf_table_free_blocks(struct super_block *sb,
(count << sb->s_blocksize_bits);
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
- adsize = sizeof(short_ad);
+ adsize = sizeof(struct short_ad);
else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
- adsize = sizeof(long_ad);
+ adsize = sizeof(struct long_ad);
else {
brelse(oepos.bh);
brelse(epos.bh);
@@ -563,7 +567,7 @@ static void udf_table_free_blocks(struct super_block *sb,
elen -= sb->s_blocksize;
epos.bh = udf_tread(sb,
- udf_get_lb_pblock(sb, epos.block, 0));
+ udf_get_lb_pblock(sb, &epos.block, 0));
if (!epos.bh) {
brelse(oepos.bh);
goto error_return;
@@ -601,15 +605,15 @@ static void udf_table_free_blocks(struct super_block *sb,
if (sbi->s_udfrev >= 0x0200)
udf_new_tag(epos.bh->b_data, TAG_IDENT_AED,
3, 1, epos.block.logicalBlockNum,
- sizeof(tag));
+ sizeof(struct tag));
else
udf_new_tag(epos.bh->b_data, TAG_IDENT_AED,
2, 1, epos.block.logicalBlockNum,
- sizeof(tag));
+ sizeof(struct tag));
switch (iinfo->i_alloc_type) {
case ICBTAG_FLAG_AD_SHORT:
- sad = (short_ad *)sptr;
+ sad = (struct short_ad *)sptr;
sad->extLength = cpu_to_le32(
EXT_NEXT_EXTENT_ALLOCDECS |
sb->s_blocksize);
@@ -617,7 +621,7 @@ static void udf_table_free_blocks(struct super_block *sb,
cpu_to_le32(epos.block.logicalBlockNum);
break;
case ICBTAG_FLAG_AD_LONG:
- lad = (long_ad *)sptr;
+ lad = (struct long_ad *)sptr;
lad->extLength = cpu_to_le32(
EXT_NEXT_EXTENT_ALLOCDECS |
sb->s_blocksize);
@@ -635,7 +639,7 @@ static void udf_table_free_blocks(struct super_block *sb,
/* It's possible that stealing the block emptied the extent */
if (elen) {
- udf_write_aext(table, &epos, eloc, elen, 1);
+ udf_write_aext(table, &epos, &eloc, elen, 1);
if (!epos.bh) {
iinfo->i_lenAlloc += adsize;
@@ -666,7 +670,7 @@ static int udf_table_prealloc_blocks(struct super_block *sb,
struct udf_sb_info *sbi = UDF_SB(sb);
int alloc_count = 0;
uint32_t elen, adsize;
- kernel_lb_addr eloc;
+ struct kernel_lb_addr eloc;
struct extent_position epos;
int8_t etype = -1;
struct udf_inode_info *iinfo;
@@ -677,9 +681,9 @@ static int udf_table_prealloc_blocks(struct super_block *sb,
iinfo = UDF_I(table);
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
- adsize = sizeof(short_ad);
+ adsize = sizeof(struct short_ad);
else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
- adsize = sizeof(long_ad);
+ adsize = sizeof(struct long_ad);
else
return 0;
@@ -707,7 +711,7 @@ static int udf_table_prealloc_blocks(struct super_block *sb,
alloc_count = block_count;
eloc.logicalBlockNum += alloc_count;
elen -= (alloc_count << sb->s_blocksize_bits);
- udf_write_aext(table, &epos, eloc,
+ udf_write_aext(table, &epos, &eloc,
(etype << 30) | elen, 1);
} else
udf_delete_aext(table, epos, eloc,
@@ -735,7 +739,7 @@ static int udf_table_new_block(struct super_block *sb,
uint32_t spread = 0xFFFFFFFF, nspread = 0xFFFFFFFF;
uint32_t newblock = 0, adsize;
uint32_t elen, goal_elen = 0;
- kernel_lb_addr eloc, uninitialized_var(goal_eloc);
+ struct kernel_lb_addr eloc, uninitialized_var(goal_eloc);
struct extent_position epos, goal_epos;
int8_t etype;
struct udf_inode_info *iinfo = UDF_I(table);
@@ -743,9 +747,9 @@ static int udf_table_new_block(struct super_block *sb,
*err = -ENOSPC;
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
- adsize = sizeof(short_ad);
+ adsize = sizeof(struct short_ad);
else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
- adsize = sizeof(long_ad);
+ adsize = sizeof(struct long_ad);
else
return newblock;
@@ -814,7 +818,7 @@ static int udf_table_new_block(struct super_block *sb,
}
if (goal_elen)
- udf_write_aext(table, &goal_epos, goal_eloc, goal_elen, 1);
+ udf_write_aext(table, &goal_epos, &goal_eloc, goal_elen, 1);
else
udf_delete_aext(table, goal_epos, goal_eloc, goal_elen);
brelse(goal_epos.bh);
@@ -828,12 +832,11 @@ static int udf_table_new_block(struct super_block *sb,
return newblock;
}
-inline void udf_free_blocks(struct super_block *sb,
- struct inode *inode,
- kernel_lb_addr bloc, uint32_t offset,
- uint32_t count)
+void udf_free_blocks(struct super_block *sb, struct inode *inode,
+ struct kernel_lb_addr *bloc, uint32_t offset,
+ uint32_t count)
{
- uint16_t partition = bloc.partitionReferenceNum;
+ uint16_t partition = bloc->partitionReferenceNum;
struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition];
if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) {
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index 62dc270c69d1..2efd4d5291b6 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -51,7 +51,7 @@ static int do_udf_readdir(struct inode *dir, struct file *filp,
uint8_t lfi;
loff_t size = udf_ext0_offset(dir) + dir->i_size;
struct buffer_head *tmp, *bha[16];
- kernel_lb_addr eloc;
+ struct kernel_lb_addr eloc;
uint32_t elen;
sector_t offset;
int i, num, ret = 0;
@@ -80,13 +80,13 @@ static int do_udf_readdir(struct inode *dir, struct file *filp,
ret = -ENOENT;
goto out;
}
- block = udf_get_lb_pblock(dir->i_sb, eloc, offset);
+ block = udf_get_lb_pblock(dir->i_sb, &eloc, offset);
if ((++offset << dir->i_sb->s_blocksize_bits) < elen) {
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
- epos.offset -= sizeof(short_ad);
+ epos.offset -= sizeof(struct short_ad);
else if (iinfo->i_alloc_type ==
ICBTAG_FLAG_AD_LONG)
- epos.offset -= sizeof(long_ad);
+ epos.offset -= sizeof(struct long_ad);
} else {
offset = 0;
}
@@ -101,7 +101,7 @@ static int do_udf_readdir(struct inode *dir, struct file *filp,
if (i + offset > (elen >> dir->i_sb->s_blocksize_bits))
i = (elen >> dir->i_sb->s_blocksize_bits) - offset;
for (num = 0; i > 0; i--) {
- block = udf_get_lb_pblock(dir->i_sb, eloc, offset + i);
+ block = udf_get_lb_pblock(dir->i_sb, &eloc, offset + i);
tmp = udf_tgetblk(dir->i_sb, block);
if (tmp && !buffer_uptodate(tmp) && !buffer_locked(tmp))
bha[num++] = tmp;
@@ -161,9 +161,9 @@ static int do_udf_readdir(struct inode *dir, struct file *filp,
memcpy(fname, "..", flen);
dt_type = DT_DIR;
} else {
- kernel_lb_addr tloc = lelb_to_cpu(cfi.icb.extLocation);
+ struct kernel_lb_addr tloc = lelb_to_cpu(cfi.icb.extLocation);
- iblock = udf_get_lb_pblock(dir->i_sb, tloc, 0);
+ iblock = udf_get_lb_pblock(dir->i_sb, &tloc, 0);
flen = udf_get_filename(dir->i_sb, nameptr, fname, lfi);
dt_type = DT_UNKNOWN;
}
diff --git a/fs/udf/directory.c b/fs/udf/directory.c
index 2820f8fcf4cc..1d2c570704c8 100644
--- a/fs/udf/directory.c
+++ b/fs/udf/directory.c
@@ -20,7 +20,7 @@
#if 0
static uint8_t *udf_filead_read(struct inode *dir, uint8_t *tmpad,
- uint8_t ad_size, kernel_lb_addr fe_loc,
+ uint8_t ad_size, struct kernel_lb_addr fe_loc,
int *pos, int *offset, struct buffer_head **bh,
int *error)
{
@@ -75,7 +75,7 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos,
struct udf_fileident_bh *fibh,
struct fileIdentDesc *cfi,
struct extent_position *epos,
- kernel_lb_addr *eloc, uint32_t *elen,
+ struct kernel_lb_addr *eloc, uint32_t *elen,
sector_t *offset)
{
struct fileIdentDesc *fi;
@@ -111,7 +111,7 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos,
(EXT_RECORDED_ALLOCATED >> 30))
return NULL;
- block = udf_get_lb_pblock(dir->i_sb, *eloc, *offset);
+ block = udf_get_lb_pblock(dir->i_sb, eloc, *offset);
(*offset)++;
@@ -131,7 +131,7 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos,
if (i + *offset > (*elen >> blocksize_bits))
i = (*elen >> blocksize_bits)-*offset;
for (num = 0; i > 0; i--) {
- block = udf_get_lb_pblock(dir->i_sb, *eloc,
+ block = udf_get_lb_pblock(dir->i_sb, eloc,
*offset + i);
tmp = udf_tgetblk(dir->i_sb, block);
if (tmp && !buffer_uptodate(tmp) &&
@@ -169,7 +169,7 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos,
(EXT_RECORDED_ALLOCATED >> 30))
return NULL;
- block = udf_get_lb_pblock(dir->i_sb, *eloc, *offset);
+ block = udf_get_lb_pblock(dir->i_sb, eloc, *offset);
(*offset)++;
@@ -249,9 +249,9 @@ struct fileIdentDesc *udf_get_fileident(void *buffer, int bufsize, int *offset)
}
#if 0
-static extent_ad *udf_get_fileextent(void *buffer, int bufsize, int *offset)
+static struct extent_ad *udf_get_fileextent(void *buffer, int bufsize, int *offset)
{
- extent_ad *ext;
+ struct extent_ad *ext;
struct fileEntry *fe;
uint8_t *ptr;
@@ -274,54 +274,54 @@ static extent_ad *udf_get_fileextent(void *buffer, int bufsize, int *offset)
if ((*offset > 0) && (*offset < le32_to_cpu(fe->lengthAllocDescs)))
ptr += *offset;
- ext = (extent_ad *)ptr;
+ ext = (struct extent_ad *)ptr;
- *offset = *offset + sizeof(extent_ad);
+ *offset = *offset + sizeof(struct extent_ad);
return ext;
}
#endif
-short_ad *udf_get_fileshortad(uint8_t *ptr, int maxoffset, uint32_t *offset,
+struct short_ad *udf_get_fileshortad(uint8_t *ptr, int maxoffset, uint32_t *offset,
int inc)
{
- short_ad *sa;
+ struct short_ad *sa;
if ((!ptr) || (!offset)) {
printk(KERN_ERR "udf: udf_get_fileshortad() invalidparms\n");
return NULL;
}
- if ((*offset + sizeof(short_ad)) > maxoffset)
+ if ((*offset + sizeof(struct short_ad)) > maxoffset)
return NULL;
else {
- sa = (short_ad *)ptr;
+ sa = (struct short_ad *)ptr;
if (sa->extLength == 0)
return NULL;
}
if (inc)
- *offset += sizeof(short_ad);
+ *offset += sizeof(struct short_ad);
return sa;
}
-long_ad *udf_get_filelongad(uint8_t *ptr, int maxoffset, uint32_t *offset, int inc)
+struct long_ad *udf_get_filelongad(uint8_t *ptr, int maxoffset, uint32_t *offset, int inc)
{
- long_ad *la;
+ struct long_ad *la;
if ((!ptr) || (!offset)) {
printk(KERN_ERR "udf: udf_get_filelongad() invalidparms\n");
return NULL;
}
- if ((*offset + sizeof(long_ad)) > maxoffset)
+ if ((*offset + sizeof(struct long_ad)) > maxoffset)
return NULL;
else {
- la = (long_ad *)ptr;
+ la = (struct long_ad *)ptr;
if (la->extLength == 0)
return NULL;
}
if (inc)
- *offset += sizeof(long_ad);
+ *offset += sizeof(struct long_ad);
return la;
}
diff --git a/fs/udf/ecma_167.h b/fs/udf/ecma_167.h
index a0974df82b31..4792b771aa80 100644
--- a/fs/udf/ecma_167.h
+++ b/fs/udf/ecma_167.h
@@ -38,10 +38,10 @@
#define _ECMA_167_H 1
/* Character set specification (ECMA 167r3 1/7.2.1) */
-typedef struct {
+struct charspec {
uint8_t charSetType;
uint8_t charSetInfo[63];
-} __attribute__ ((packed)) charspec;
+} __attribute__ ((packed));
/* Character Set Type (ECMA 167r3 1/7.2.1.1) */
#define CHARSPEC_TYPE_CS0 0x00 /* (1/7.2.2) */
@@ -57,7 +57,7 @@ typedef struct {
typedef uint8_t dstring;
/* Timestamp (ECMA 167r3 1/7.3) */
-typedef struct {
+struct timestamp {
__le16 typeAndTimezone;
__le16 year;
uint8_t month;
@@ -68,7 +68,7 @@ typedef struct {
uint8_t centiseconds;
uint8_t hundredsOfMicroseconds;
uint8_t microseconds;
-} __attribute__ ((packed)) timestamp;
+} __attribute__ ((packed));
/* Type and Time Zone (ECMA 167r3 1/7.3.1) */
#define TIMESTAMP_TYPE_MASK 0xF000
@@ -78,11 +78,11 @@ typedef struct {
#define TIMESTAMP_TIMEZONE_MASK 0x0FFF
/* Entity identifier (ECMA 167r3 1/7.4) */
-typedef struct {
+struct regid {
uint8_t flags;
uint8_t ident[23];
uint8_t identSuffix[8];
-} __attribute__ ((packed)) regid;
+} __attribute__ ((packed));
/* Flags (ECMA 167r3 1/7.4.1) */
#define ENTITYID_FLAGS_DIRTY 0x00
@@ -126,38 +126,38 @@ struct terminatingExtendedAreaDesc {
/* Boot Descriptor (ECMA 167r3 2/9.4) */
struct bootDesc {
- uint8_t structType;
- uint8_t stdIdent[VSD_STD_ID_LEN];
- uint8_t structVersion;
- uint8_t reserved1;
- regid archType;
- regid bootIdent;
- __le32 bootExtLocation;
- __le32 bootExtLength;
- __le64 loadAddress;
- __le64 startAddress;
- timestamp descCreationDateAndTime;
- __le16 flags;
- uint8_t reserved2[32];
- uint8_t bootUse[1906];
+ uint8_t structType;
+ uint8_t stdIdent[VSD_STD_ID_LEN];
+ uint8_t structVersion;
+ uint8_t reserved1;
+ struct regid archType;
+ struct regid bootIdent;
+ __le32 bootExtLocation;
+ __le32 bootExtLength;
+ __le64 loadAddress;
+ __le64 startAddress;
+ struct timestamp descCreationDateAndTime;
+ __le16 flags;
+ uint8_t reserved2[32];
+ uint8_t bootUse[1906];
} __attribute__ ((packed));
/* Flags (ECMA 167r3 2/9.4.12) */
#define BOOT_FLAGS_ERASE 0x01
/* Extent Descriptor (ECMA 167r3 3/7.1) */
-typedef struct {
+struct extent_ad {
__le32 extLength;
__le32 extLocation;
-} __attribute__ ((packed)) extent_ad;
+} __attribute__ ((packed));
-typedef struct {
+struct kernel_extent_ad {
uint32_t extLength;
uint32_t extLocation;
-} kernel_extent_ad;
+};
/* Descriptor Tag (ECMA 167r3 3/7.2) */
-typedef struct {
+struct tag {
__le16 tagIdent;
__le16 descVersion;
uint8_t tagChecksum;
@@ -166,7 +166,7 @@ typedef struct {
__le16 descCRC;
__le16 descCRCLength;
__le32 tagLocation;
-} __attribute__ ((packed)) tag;
+} __attribute__ ((packed));
/* Tag Identifier (ECMA 167r3 3/7.2.1) */
#define TAG_IDENT_PVD 0x0001
@@ -190,28 +190,28 @@ struct NSRDesc {
/* Primary Volume Descriptor (ECMA 167r3 3/10.1) */
struct primaryVolDesc {
- tag descTag;
- __le32 volDescSeqNum;
- __le32 primaryVolDescNum;
- dstring volIdent[32];
- __le16 volSeqNum;
- __le16 maxVolSeqNum;
- __le16 interchangeLvl;
- __le16 maxInterchangeLvl;
- __le32 charSetList;
- __le32 maxCharSetList;
- dstring volSetIdent[128];
- charspec descCharSet;
- charspec explanatoryCharSet;
- extent_ad volAbstract;
- extent_ad volCopyright;
- regid appIdent;
- timestamp recordingDateAndTime;
- regid impIdent;
- uint8_t impUse[64];
- __le32 predecessorVolDescSeqLocation;
- __le16 flags;
- uint8_t reserved[22];
+ struct tag descTag;
+ __le32 volDescSeqNum;
+ __le32 primaryVolDescNum;
+ dstring volIdent[32];
+ __le16 volSeqNum;
+ __le16 maxVolSeqNum;
+ __le16 interchangeLvl;
+ __le16 maxInterchangeLvl;
+ __le32 charSetList;
+ __le32 maxCharSetList;
+ dstring volSetIdent[128];
+ struct charspec descCharSet;
+ struct charspec explanatoryCharSet;
+ struct extent_ad volAbstract;
+ struct extent_ad volCopyright;
+ struct regid appIdent;
+ struct timestamp recordingDateAndTime;
+ struct regid impIdent;
+ uint8_t impUse[64];
+ __le32 predecessorVolDescSeqLocation;
+ __le16 flags;
+ uint8_t reserved[22];
} __attribute__ ((packed));
/* Flags (ECMA 167r3 3/10.1.21) */
@@ -219,40 +219,40 @@ struct primaryVolDesc {
/* Anchor Volume Descriptor Pointer (ECMA 167r3 3/10.2) */
struct anchorVolDescPtr {
- tag descTag;
- extent_ad mainVolDescSeqExt;
- extent_ad reserveVolDescSeqExt;
- uint8_t reserved[480];
+ struct tag descTag;
+ struct extent_ad mainVolDescSeqExt;
+ struct extent_ad reserveVolDescSeqExt;
+ uint8_t reserved[480];
} __attribute__ ((packed));
/* Volume Descriptor Pointer (ECMA 167r3 3/10.3) */
struct volDescPtr {
- tag descTag;
- __le32 volDescSeqNum;
- extent_ad nextVolDescSeqExt;
- uint8_t reserved[484];
+ struct tag descTag;
+ __le32 volDescSeqNum;
+ struct extent_ad nextVolDescSeqExt;
+ uint8_t reserved[484];
} __attribute__ ((packed));
/* Implementation Use Volume Descriptor (ECMA 167r3 3/10.4) */
struct impUseVolDesc {
- tag descTag;
+ struct tag descTag;
__le32 volDescSeqNum;
- regid impIdent;
+ struct regid impIdent;
uint8_t impUse[460];
} __attribute__ ((packed));
/* Partition Descriptor (ECMA 167r3 3/10.5) */
struct partitionDesc {
- tag descTag;
+ struct tag descTag;
__le32 volDescSeqNum;
__le16 partitionFlags;
__le16 partitionNumber;
- regid partitionContents;
+ struct regid partitionContents;
uint8_t partitionContentsUse[128];
__le32 accessType;
__le32 partitionStartingLocation;
__le32 partitionLength;
- regid impIdent;
+ struct regid impIdent;
uint8_t impUse[128];
uint8_t reserved[156];
} __attribute__ ((packed));
@@ -278,19 +278,19 @@ struct partitionDesc {
/* Logical Volume Descriptor (ECMA 167r3 3/10.6) */
struct logicalVolDesc {
- tag descTag;
- __le32 volDescSeqNum;
- charspec descCharSet;
- dstring logicalVolIdent[128];
- __le32 logicalBlockSize;
- regid domainIdent;
- uint8_t logicalVolContentsUse[16];
- __le32 mapTableLength;
- __le32 numPartitionMaps;
- regid impIdent;
- uint8_t impUse[128];
- extent_ad integritySeqExt;
- uint8_t partitionMaps[0];
+ struct tag descTag;
+ __le32 volDescSeqNum;
+ struct charspec descCharSet;
+ dstring logicalVolIdent[128];
+ __le32 logicalBlockSize;
+ struct regid domainIdent;
+ uint8_t logicalVolContentsUse[16];
+ __le32 mapTableLength;
+ __le32 numPartitionMaps;
+ struct regid impIdent;
+ uint8_t impUse[128];
+ struct extent_ad integritySeqExt;
+ uint8_t partitionMaps[0];
} __attribute__ ((packed));
/* Generic Partition Map (ECMA 167r3 3/10.7.1) */
@@ -322,30 +322,30 @@ struct genericPartitionMap2 {
/* Unallocated Space Descriptor (ECMA 167r3 3/10.8) */
struct unallocSpaceDesc {
- tag descTag;
- __le32 volDescSeqNum;
- __le32 numAllocDescs;
- extent_ad allocDescs[0];
+ struct tag descTag;
+ __le32 volDescSeqNum;
+ __le32 numAllocDescs;
+ struct extent_ad allocDescs[0];
} __attribute__ ((packed));
/* Terminating Descriptor (ECMA 167r3 3/10.9) */
struct terminatingDesc {
- tag descTag;
+ struct tag descTag;
uint8_t reserved[496];
} __attribute__ ((packed));
/* Logical Volume Integrity Descriptor (ECMA 167r3 3/10.10) */
struct logicalVolIntegrityDesc {
- tag descTag;
- timestamp recordingDateAndTime;
- __le32 integrityType;
- extent_ad nextIntegrityExt;
- uint8_t logicalVolContentsUse[32];
- __le32 numOfPartitions;
- __le32 lengthOfImpUse;
- __le32 freeSpaceTable[0];
- __le32 sizeTable[0];
- uint8_t impUse[0];
+ struct tag descTag;
+ struct timestamp recordingDateAndTime;
+ __le32 integrityType;
+ struct extent_ad nextIntegrityExt;
+ uint8_t logicalVolContentsUse[32];
+ __le32 numOfPartitions;
+ __le32 lengthOfImpUse;
+ __le32 freeSpaceTable[0];
+ __le32 sizeTable[0];
+ uint8_t impUse[0];
} __attribute__ ((packed));
/* Integrity Type (ECMA 167r3 3/10.10.3) */
@@ -353,50 +353,50 @@ struct logicalVolIntegrityDesc {
#define LVID_INTEGRITY_TYPE_CLOSE 0x00000001
/* Recorded Address (ECMA 167r3 4/7.1) */
-typedef struct {
+struct lb_addr {
__le32 logicalBlockNum;
__le16 partitionReferenceNum;
-} __attribute__ ((packed)) lb_addr;
+} __attribute__ ((packed));
/* ... and its in-core analog */
-typedef struct {
+struct kernel_lb_addr {
uint32_t logicalBlockNum;
uint16_t partitionReferenceNum;
-} kernel_lb_addr;
+};
/* Short Allocation Descriptor (ECMA 167r3 4/14.14.1) */
-typedef struct {
+struct short_ad {
__le32 extLength;
__le32 extPosition;
-} __attribute__ ((packed)) short_ad;
+} __attribute__ ((packed));
/* Long Allocation Descriptor (ECMA 167r3 4/14.14.2) */
-typedef struct {
+struct long_ad {
__le32 extLength;
- lb_addr extLocation;
+ struct lb_addr extLocation;
uint8_t impUse[6];
-} __attribute__ ((packed)) long_ad;
+} __attribute__ ((packed));
-typedef struct {
- uint32_t extLength;
- kernel_lb_addr extLocation;
- uint8_t impUse[6];
-} kernel_long_ad;
+struct kernel_long_ad {
+ uint32_t extLength;
+ struct kernel_lb_addr extLocation;
+ uint8_t impUse[6];
+};
/* Extended Allocation Descriptor (ECMA 167r3 4/14.14.3) */
-typedef struct {
+struct ext_ad {
__le32 extLength;
__le32 recordedLength;
__le32 informationLength;
- lb_addr extLocation;
-} __attribute__ ((packed)) ext_ad;
+ struct lb_addr extLocation;
+} __attribute__ ((packed));
-typedef struct {
- uint32_t extLength;
- uint32_t recordedLength;
- uint32_t informationLength;
- kernel_lb_addr extLocation;
-} kernel_ext_ad;
+struct kernel_ext_ad {
+ uint32_t extLength;
+ uint32_t recordedLength;
+ uint32_t informationLength;
+ struct kernel_lb_addr extLocation;
+};
/* Descriptor Tag (ECMA 167r3 4/7.2 - See 3/7.2) */
@@ -415,44 +415,44 @@ typedef struct {
/* File Set Descriptor (ECMA 167r3 4/14.1) */
struct fileSetDesc {
- tag descTag;
- timestamp recordingDateAndTime;
- __le16 interchangeLvl;
- __le16 maxInterchangeLvl;
- __le32 charSetList;
- __le32 maxCharSetList;
- __le32 fileSetNum;
- __le32 fileSetDescNum;
- charspec logicalVolIdentCharSet;
- dstring logicalVolIdent[128];
- charspec fileSetCharSet;
- dstring fileSetIdent[32];
- dstring copyrightFileIdent[32];
- dstring abstractFileIdent[32];
- long_ad rootDirectoryICB;
- regid domainIdent;
- long_ad nextExt;
- long_ad streamDirectoryICB;
- uint8_t reserved[32];
+ struct tag descTag;
+ struct timestamp recordingDateAndTime;
+ __le16 interchangeLvl;
+ __le16 maxInterchangeLvl;
+ __le32 charSetList;
+ __le32 maxCharSetList;
+ __le32 fileSetNum;
+ __le32 fileSetDescNum;
+ struct charspec logicalVolIdentCharSet;
+ dstring logicalVolIdent[128];
+ struct charspec fileSetCharSet;
+ dstring fileSetIdent[32];
+ dstring copyrightFileIdent[32];
+ dstring abstractFileIdent[32];
+ struct long_ad rootDirectoryICB;
+ struct regid domainIdent;
+ struct long_ad nextExt;
+ struct long_ad streamDirectoryICB;
+ uint8_t reserved[32];
} __attribute__ ((packed));
/* Partition Header Descriptor (ECMA 167r3 4/14.3) */
struct partitionHeaderDesc {
- short_ad unallocSpaceTable;
- short_ad unallocSpaceBitmap;
- short_ad partitionIntegrityTable;
- short_ad freedSpaceTable;
- short_ad freedSpaceBitmap;
+ struct short_ad unallocSpaceTable;
+ struct short_ad unallocSpaceBitmap;
+ struct short_ad partitionIntegrityTable;
+ struct short_ad freedSpaceTable;
+ struct short_ad freedSpaceBitmap;
uint8_t reserved[88];
} __attribute__ ((packed));
/* File Identifier Descriptor (ECMA 167r3 4/14.4) */
struct fileIdentDesc {
- tag descTag;
+ struct tag descTag;
__le16 fileVersionNum;
uint8_t fileCharacteristics;
uint8_t lengthFileIdent;
- long_ad icb;
+ struct long_ad icb;
__le16 lengthOfImpUse;
uint8_t impUse[0];
uint8_t fileIdent[0];
@@ -468,22 +468,22 @@ struct fileIdentDesc {
/* Allocation Ext Descriptor (ECMA 167r3 4/14.5) */
struct allocExtDesc {
- tag descTag;
+ struct tag descTag;
__le32 previousAllocExtLocation;
__le32 lengthAllocDescs;
} __attribute__ ((packed));
/* ICB Tag (ECMA 167r3 4/14.6) */
-typedef struct {
+struct icbtag {
__le32 priorRecordedNumDirectEntries;
__le16 strategyType;
__le16 strategyParameter;
__le16 numEntries;
uint8_t reserved;
uint8_t fileType;
- lb_addr parentICBLocation;
+ struct lb_addr parentICBLocation;
__le16 flags;
-} __attribute__ ((packed)) icbtag;
+} __attribute__ ((packed));
/* Strategy Type (ECMA 167r3 4/14.6.2) */
#define ICBTAG_STRATEGY_TYPE_UNDEF 0x0000
@@ -528,41 +528,41 @@ typedef struct {
/* Indirect Entry (ECMA 167r3 4/14.7) */
struct indirectEntry {
- tag descTag;
- icbtag icbTag;
- long_ad indirectICB;
+ struct tag descTag;
+ struct icbtag icbTag;
+ struct long_ad indirectICB;
} __attribute__ ((packed));
/* Terminal Entry (ECMA 167r3 4/14.8) */
struct terminalEntry {
- tag descTag;
- icbtag icbTag;
+ struct tag descTag;
+ struct icbtag icbTag;
} __attribute__ ((packed));
/* File Entry (ECMA 167r3 4/14.9) */
struct fileEntry {
- tag descTag;
- icbtag icbTag;
- __le32 uid;
- __le32 gid;
- __le32 permissions;
- __le16 fileLinkCount;
- uint8_t recordFormat;
- uint8_t recordDisplayAttr;
- __le32 recordLength;
- __le64 informationLength;
- __le64 logicalBlocksRecorded;
- timestamp accessTime;
- timestamp modificationTime;
- timestamp attrTime;
- __le32 checkpoint;
- long_ad extendedAttrICB;
- regid impIdent;
- __le64 uniqueID;
- __le32 lengthExtendedAttr;
- __le32 lengthAllocDescs;
- uint8_t extendedAttr[0];
- uint8_t allocDescs[0];
+ struct tag descTag;
+ struct icbtag icbTag;
+ __le32 uid;
+ __le32 gid;
+ __le32 permissions;
+ __le16 fileLinkCount;
+ uint8_t recordFormat;
+ uint8_t recordDisplayAttr;
+ __le32 recordLength;
+ __le64 informationLength;
+ __le64 logicalBlocksRecorded;
+ struct timestamp accessTime;
+ struct timestamp modificationTime;
+ struct timestamp attrTime;
+ __le32 checkpoint;
+ struct long_ad extendedAttrICB;
+ struct regid impIdent;
+ __le64 uniqueID;
+ __le32 lengthExtendedAttr;
+ __le32 lengthAllocDescs;
+ uint8_t extendedAttr[0];
+ uint8_t allocDescs[0];
} __attribute__ ((packed));
/* Permissions (ECMA 167r3 4/14.9.5) */
@@ -604,7 +604,7 @@ struct fileEntry {
/* Extended Attribute Header Descriptor (ECMA 167r3 4/14.10.1) */
struct extendedAttrHeaderDesc {
- tag descTag;
+ struct tag descTag;
__le32 impAttrLocation;
__le32 appAttrLocation;
} __attribute__ ((packed));
@@ -687,7 +687,7 @@ struct impUseExtAttr {
uint8_t reserved[3];
__le32 attrLength;
__le32 impUseLength;
- regid impIdent;
+ struct regid impIdent;
uint8_t impUse[0];
} __attribute__ ((packed));
@@ -698,7 +698,7 @@ struct appUseExtAttr {
uint8_t reserved[3];
__le32 attrLength;
__le32 appUseLength;
- regid appIdent;
+ struct regid appIdent;
uint8_t appUse[0];
} __attribute__ ((packed));
@@ -712,15 +712,15 @@ struct appUseExtAttr {
/* Unallocated Space Entry (ECMA 167r3 4/14.11) */
struct unallocSpaceEntry {
- tag descTag;
- icbtag icbTag;
+ struct tag descTag;
+ struct icbtag icbTag;
__le32 lengthAllocDescs;
uint8_t allocDescs[0];
} __attribute__ ((packed));
/* Space Bitmap Descriptor (ECMA 167r3 4/14.12) */
struct spaceBitmapDesc {
- tag descTag;
+ struct tag descTag;
__le32 numOfBits;
__le32 numOfBytes;
uint8_t bitmap[0];
@@ -728,13 +728,13 @@ struct spaceBitmapDesc {
/* Partition Integrity Entry (ECMA 167r3 4/14.13) */
struct partitionIntegrityEntry {
- tag descTag;
- icbtag icbTag;
- timestamp recordingDateAndTime;
- uint8_t integrityType;
- uint8_t reserved[175];
- regid impIdent;
- uint8_t impUse[256];
+ struct tag descTag;
+ struct icbtag icbTag;
+ struct timestamp recordingDateAndTime;
+ uint8_t integrityType;
+ uint8_t reserved[175];
+ struct regid impIdent;
+ uint8_t impUse[256];
} __attribute__ ((packed));
/* Short Allocation Descriptor (ECMA 167r3 4/14.14.1) */
@@ -765,32 +765,32 @@ struct pathComponent {
/* File Entry (ECMA 167r3 4/14.17) */
struct extendedFileEntry {
- tag descTag;
- icbtag icbTag;
- __le32 uid;
- __le32 gid;
- __le32 permissions;
- __le16 fileLinkCount;
- uint8_t recordFormat;
- uint8_t recordDisplayAttr;
- __le32 recordLength;
- __le64 informationLength;
- __le64 objectSize;
- __le64 logicalBlocksRecorded;
- timestamp accessTime;
- timestamp modificationTime;
- timestamp createTime;
- timestamp attrTime;
- __le32 checkpoint;
- __le32 reserved;
- long_ad extendedAttrICB;
- long_ad streamDirectoryICB;
- regid impIdent;
- __le64 uniqueID;
- __le32 lengthExtendedAttr;
- __le32 lengthAllocDescs;
- uint8_t extendedAttr[0];
- uint8_t allocDescs[0];
+ struct tag descTag;
+ struct icbtag icbTag;
+ __le32 uid;
+ __le32 gid;
+ __le32 permissions;
+ __le16 fileLinkCount;
+ uint8_t recordFormat;
+ uint8_t recordDisplayAttr;
+ __le32 recordLength;
+ __le64 informationLength;
+ __le64 objectSize;
+ __le64 logicalBlocksRecorded;
+ struct timestamp accessTime;
+ struct timestamp modificationTime;
+ struct timestamp createTime;
+ struct timestamp attrTime;
+ __le32 checkpoint;
+ __le32 reserved;
+ struct long_ad extendedAttrICB;
+ struct long_ad streamDirectoryICB;
+ struct regid impIdent;
+ __le64 uniqueID;
+ __le32 lengthExtendedAttr;
+ __le32 lengthAllocDescs;
+ uint8_t extendedAttr[0];
+ uint8_t allocDescs[0];
} __attribute__ ((packed));
#endif /* _ECMA_167_H */
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index a4f2b3ce45b0..76f9b3635a23 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -54,7 +54,7 @@ void udf_free_inode(struct inode *inode)
}
mutex_unlock(&sbi->s_alloc_mutex);
- udf_free_blocks(sb, NULL, UDF_I(inode)->i_location, 0, 1);
+ udf_free_blocks(sb, NULL, &UDF_I(inode)->i_location, 0, 1);
}
struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
@@ -126,19 +126,19 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
}
mutex_unlock(&sbi->s_alloc_mutex);
inode->i_mode = mode;
- inode->i_uid = current->fsuid;
+ inode->i_uid = current_fsuid();
if (dir->i_mode & S_ISGID) {
inode->i_gid = dir->i_gid;
if (S_ISDIR(mode))
mode |= S_ISGID;
} else {
- inode->i_gid = current->fsgid;
+ inode->i_gid = current_fsgid();
}
iinfo->i_location.logicalBlockNum = block;
iinfo->i_location.partitionReferenceNum =
dinfo->i_location.partitionReferenceNum;
- inode->i_ino = udf_get_lb_pblock(sb, iinfo->i_location, 0);
+ inode->i_ino = udf_get_lb_pblock(sb, &iinfo->i_location, 0);
inode->i_blocks = 0;
iinfo->i_lenEAttr = 0;
iinfo->i_lenAlloc = 0;
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 6e74b117aaf0..1456d238f8f8 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -55,15 +55,15 @@ static int udf_alloc_i_data(struct inode *inode, size_t size);
static struct buffer_head *inode_getblk(struct inode *, sector_t, int *,
sector_t *, int *);
static int8_t udf_insert_aext(struct inode *, struct extent_position,
- kernel_lb_addr, uint32_t);
+ struct kernel_lb_addr, uint32_t);
static void udf_split_extents(struct inode *, int *, int, int,
- kernel_long_ad[EXTENT_MERGE_SIZE], int *);
+ struct kernel_long_ad[EXTENT_MERGE_SIZE], int *);
static void udf_prealloc_extents(struct inode *, int, int,
- kernel_long_ad[EXTENT_MERGE_SIZE], int *);
+ struct kernel_long_ad[EXTENT_MERGE_SIZE], int *);
static void udf_merge_extents(struct inode *,
- kernel_long_ad[EXTENT_MERGE_SIZE], int *);
+ struct kernel_long_ad[EXTENT_MERGE_SIZE], int *);
static void udf_update_extents(struct inode *,
- kernel_long_ad[EXTENT_MERGE_SIZE], int, int,
+ struct kernel_long_ad[EXTENT_MERGE_SIZE], int, int,
struct extent_position *);
static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int);
@@ -106,6 +106,7 @@ void udf_clear_inode(struct inode *inode)
udf_truncate_tail_extent(inode);
unlock_kernel();
write_inode_now(inode, 0);
+ invalidate_inode_buffers(inode);
}
iinfo = UDF_I(inode);
kfree(iinfo->i_ext.i_data);
@@ -199,7 +200,7 @@ struct buffer_head *udf_expand_dir_adinicb(struct inode *inode, int *block,
{
int newblock;
struct buffer_head *dbh = NULL;
- kernel_lb_addr eloc;
+ struct kernel_lb_addr eloc;
uint32_t elen;
uint8_t alloctype;
struct extent_position epos;
@@ -280,7 +281,7 @@ struct buffer_head *udf_expand_dir_adinicb(struct inode *inode, int *block,
epos.bh = NULL;
epos.block = iinfo->i_location;
epos.offset = udf_file_entry_alloc_offset(inode);
- udf_add_aext(inode, &epos, eloc, elen, 0);
+ udf_add_aext(inode, &epos, &eloc, elen, 0);
/* UniqueID stuff */
brelse(epos.bh);
@@ -358,12 +359,12 @@ static struct buffer_head *udf_getblk(struct inode *inode, long block,
/* Extend the file by 'blocks' blocks, return the number of extents added */
int udf_extend_file(struct inode *inode, struct extent_position *last_pos,
- kernel_long_ad *last_ext, sector_t blocks)
+ struct kernel_long_ad *last_ext, sector_t blocks)
{
sector_t add;
int count = 0, fake = !(last_ext->extLength & UDF_EXTENT_LENGTH_MASK);
struct super_block *sb = inode->i_sb;
- kernel_lb_addr prealloc_loc = {};
+ struct kernel_lb_addr prealloc_loc = {};
int prealloc_len = 0;
struct udf_inode_info *iinfo;
@@ -410,11 +411,11 @@ int udf_extend_file(struct inode *inode, struct extent_position *last_pos,
}
if (fake) {
- udf_add_aext(inode, last_pos, last_ext->extLocation,
+ udf_add_aext(inode, last_pos, &last_ext->extLocation,
last_ext->extLength, 1);
count++;
} else
- udf_write_aext(inode, last_pos, last_ext->extLocation,
+ udf_write_aext(inode, last_pos, &last_ext->extLocation,
last_ext->extLength, 1);
/* Managed to do everything necessary? */
@@ -431,7 +432,7 @@ int udf_extend_file(struct inode *inode, struct extent_position *last_pos,
/* Create enough extents to cover the whole hole */
while (blocks > add) {
blocks -= add;
- if (udf_add_aext(inode, last_pos, last_ext->extLocation,
+ if (udf_add_aext(inode, last_pos, &last_ext->extLocation,
last_ext->extLength, 1) == -1)
return -1;
count++;
@@ -439,7 +440,7 @@ int udf_extend_file(struct inode *inode, struct extent_position *last_pos,
if (blocks) {
last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED |
(blocks << sb->s_blocksize_bits);
- if (udf_add_aext(inode, last_pos, last_ext->extLocation,
+ if (udf_add_aext(inode, last_pos, &last_ext->extLocation,
last_ext->extLength, 1) == -1)
return -1;
count++;
@@ -448,7 +449,7 @@ int udf_extend_file(struct inode *inode, struct extent_position *last_pos,
out:
/* Do we have some preallocated blocks saved? */
if (prealloc_len) {
- if (udf_add_aext(inode, last_pos, prealloc_loc,
+ if (udf_add_aext(inode, last_pos, &prealloc_loc,
prealloc_len, 1) == -1)
return -1;
last_ext->extLocation = prealloc_loc;
@@ -458,9 +459,9 @@ out:
/* last_pos should point to the last written extent... */
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
- last_pos->offset -= sizeof(short_ad);
+ last_pos->offset -= sizeof(struct short_ad);
else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
- last_pos->offset -= sizeof(long_ad);
+ last_pos->offset -= sizeof(struct long_ad);
else
return -1;
@@ -472,11 +473,11 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block,
{
static sector_t last_block;
struct buffer_head *result = NULL;
- kernel_long_ad laarr[EXTENT_MERGE_SIZE];
+ struct kernel_long_ad laarr[EXTENT_MERGE_SIZE];
struct extent_position prev_epos, cur_epos, next_epos;
int count = 0, startnum = 0, endnum = 0;
uint32_t elen = 0, tmpelen;
- kernel_lb_addr eloc, tmpeloc;
+ struct kernel_lb_addr eloc, tmpeloc;
int c = 1;
loff_t lbcount = 0, b_off = 0;
uint32_t newblocknum, newblock;
@@ -549,12 +550,12 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block,
elen = EXT_RECORDED_ALLOCATED |
((elen + inode->i_sb->s_blocksize - 1) &
~(inode->i_sb->s_blocksize - 1));
- etype = udf_write_aext(inode, &cur_epos, eloc, elen, 1);
+ etype = udf_write_aext(inode, &cur_epos, &eloc, elen, 1);
}
brelse(prev_epos.bh);
brelse(cur_epos.bh);
brelse(next_epos.bh);
- newblock = udf_get_lb_pblock(inode->i_sb, eloc, offset);
+ newblock = udf_get_lb_pblock(inode->i_sb, &eloc, offset);
*phys = newblock;
return NULL;
}
@@ -571,7 +572,7 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block,
} else {
/* Create a fake extent when there's not one */
memset(&laarr[0].extLocation, 0x00,
- sizeof(kernel_lb_addr));
+ sizeof(struct kernel_lb_addr));
laarr[0].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED;
/* Will udf_extend_file() create real extent from
a fake one? */
@@ -601,7 +602,7 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block,
laarr[c].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED |
inode->i_sb->s_blocksize;
memset(&laarr[c].extLocation, 0x00,
- sizeof(kernel_lb_addr));
+ sizeof(struct kernel_lb_addr));
count++;
endnum++;
}
@@ -698,7 +699,7 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block,
static void udf_split_extents(struct inode *inode, int *c, int offset,
int newblocknum,
- kernel_long_ad laarr[EXTENT_MERGE_SIZE],
+ struct kernel_long_ad laarr[EXTENT_MERGE_SIZE],
int *endnum)
{
unsigned long blocksize = inode->i_sb->s_blocksize;
@@ -725,7 +726,7 @@ static void udf_split_extents(struct inode *inode, int *c, int offset,
if (offset) {
if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) {
udf_free_blocks(inode->i_sb, inode,
- laarr[curr].extLocation,
+ &laarr[curr].extLocation,
0, offset);
laarr[curr].extLength =
EXT_NOT_RECORDED_NOT_ALLOCATED |
@@ -762,7 +763,7 @@ static void udf_split_extents(struct inode *inode, int *c, int offset,
}
static void udf_prealloc_extents(struct inode *inode, int c, int lastblock,
- kernel_long_ad laarr[EXTENT_MERGE_SIZE],
+ struct kernel_long_ad laarr[EXTENT_MERGE_SIZE],
int *endnum)
{
int start, length = 0, currlength = 0, i;
@@ -816,7 +817,7 @@ static void udf_prealloc_extents(struct inode *inode, int c, int lastblock,
inode->i_sb->s_blocksize_bits);
else {
memmove(&laarr[c + 2], &laarr[c + 1],
- sizeof(long_ad) * (*endnum - (c + 1)));
+ sizeof(struct long_ad) * (*endnum - (c + 1)));
(*endnum)++;
laarr[c + 1].extLocation.logicalBlockNum = next;
laarr[c + 1].extLocation.partitionReferenceNum =
@@ -845,7 +846,7 @@ static void udf_prealloc_extents(struct inode *inode, int c, int lastblock,
if (*endnum > (i + 1))
memmove(&laarr[i],
&laarr[i + 1],
- sizeof(long_ad) *
+ sizeof(struct long_ad) *
(*endnum - (i + 1)));
i--;
(*endnum)--;
@@ -858,7 +859,7 @@ static void udf_prealloc_extents(struct inode *inode, int c, int lastblock,
}
static void udf_merge_extents(struct inode *inode,
- kernel_long_ad laarr[EXTENT_MERGE_SIZE],
+ struct kernel_long_ad laarr[EXTENT_MERGE_SIZE],
int *endnum)
{
int i;
@@ -866,8 +867,8 @@ static void udf_merge_extents(struct inode *inode,
unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits;
for (i = 0; i < (*endnum - 1); i++) {
- kernel_long_ad *li /*l[i]*/ = &laarr[i];
- kernel_long_ad *lip1 /*l[i plus 1]*/ = &laarr[i + 1];
+ struct kernel_long_ad *li /*l[i]*/ = &laarr[i];
+ struct kernel_long_ad *lip1 /*l[i plus 1]*/ = &laarr[i + 1];
if (((li->extLength >> 30) == (lip1->extLength >> 30)) &&
(((li->extLength >> 30) ==
@@ -901,7 +902,7 @@ static void udf_merge_extents(struct inode *inode,
blocksize - 1) & ~(blocksize - 1));
if (*endnum > (i + 2))
memmove(&laarr[i + 1], &laarr[i + 2],
- sizeof(long_ad) *
+ sizeof(struct long_ad) *
(*endnum - (i + 2)));
i--;
(*endnum)--;
@@ -910,7 +911,7 @@ static void udf_merge_extents(struct inode *inode,
(EXT_NOT_RECORDED_ALLOCATED >> 30)) &&
((lip1->extLength >> 30) ==
(EXT_NOT_RECORDED_NOT_ALLOCATED >> 30))) {
- udf_free_blocks(inode->i_sb, inode, li->extLocation, 0,
+ udf_free_blocks(inode->i_sb, inode, &li->extLocation, 0,
((li->extLength &
UDF_EXTENT_LENGTH_MASK) +
blocksize - 1) >> blocksize_bits);
@@ -936,7 +937,7 @@ static void udf_merge_extents(struct inode *inode,
blocksize - 1) & ~(blocksize - 1));
if (*endnum > (i + 2))
memmove(&laarr[i + 1], &laarr[i + 2],
- sizeof(long_ad) *
+ sizeof(struct long_ad) *
(*endnum - (i + 2)));
i--;
(*endnum)--;
@@ -944,7 +945,7 @@ static void udf_merge_extents(struct inode *inode,
} else if ((li->extLength >> 30) ==
(EXT_NOT_RECORDED_ALLOCATED >> 30)) {
udf_free_blocks(inode->i_sb, inode,
- li->extLocation, 0,
+ &li->extLocation, 0,
((li->extLength &
UDF_EXTENT_LENGTH_MASK) +
blocksize - 1) >> blocksize_bits);
@@ -958,12 +959,12 @@ static void udf_merge_extents(struct inode *inode,
}
static void udf_update_extents(struct inode *inode,
- kernel_long_ad laarr[EXTENT_MERGE_SIZE],
+ struct kernel_long_ad laarr[EXTENT_MERGE_SIZE],
int startnum, int endnum,
struct extent_position *epos)
{
int start = 0, i;
- kernel_lb_addr tmploc;
+ struct kernel_lb_addr tmploc;
uint32_t tmplen;
if (startnum > endnum) {
@@ -982,7 +983,7 @@ static void udf_update_extents(struct inode *inode,
for (i = start; i < endnum; i++) {
udf_next_aext(inode, epos, &tmploc, &tmplen, 0);
- udf_write_aext(inode, epos, laarr[i].extLocation,
+ udf_write_aext(inode, epos, &laarr[i].extLocation,
laarr[i].extLength, 1);
}
}
@@ -1075,7 +1076,7 @@ static void __udf_read_inode(struct inode *inode)
* i_nlink = 1
* i_op = NULL;
*/
- bh = udf_read_ptagged(inode->i_sb, iinfo->i_location, 0, &ident);
+ bh = udf_read_ptagged(inode->i_sb, &iinfo->i_location, 0, &ident);
if (!bh) {
printk(KERN_ERR "udf: udf_read_inode(ino %ld) failed !bh\n",
inode->i_ino);
@@ -1097,24 +1098,24 @@ static void __udf_read_inode(struct inode *inode)
if (fe->icbTag.strategyType == cpu_to_le16(4096)) {
struct buffer_head *ibh;
- ibh = udf_read_ptagged(inode->i_sb, iinfo->i_location, 1,
+ ibh = udf_read_ptagged(inode->i_sb, &iinfo->i_location, 1,
&ident);
if (ident == TAG_IDENT_IE && ibh) {
struct buffer_head *nbh = NULL;
- kernel_lb_addr loc;
+ struct kernel_lb_addr loc;
struct indirectEntry *ie;
ie = (struct indirectEntry *)ibh->b_data;
loc = lelb_to_cpu(ie->indirectICB.extLocation);
if (ie->indirectICB.extLength &&
- (nbh = udf_read_ptagged(inode->i_sb, loc, 0,
+ (nbh = udf_read_ptagged(inode->i_sb, &loc, 0,
&ident))) {
if (ident == TAG_IDENT_FE ||
ident == TAG_IDENT_EFE) {
memcpy(&iinfo->i_location,
&loc,
- sizeof(kernel_lb_addr));
+ sizeof(struct kernel_lb_addr));
brelse(bh);
brelse(ibh);
brelse(nbh);
@@ -1221,8 +1222,15 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
inode->i_size = le64_to_cpu(fe->informationLength);
iinfo->i_lenExtents = inode->i_size;
- inode->i_mode = udf_convert_permissions(fe);
- inode->i_mode &= ~UDF_SB(inode->i_sb)->s_umask;
+ if (fe->icbTag.fileType != ICBTAG_FILE_TYPE_DIRECTORY &&
+ sbi->s_fmode != -1)
+ inode->i_mode = sbi->s_fmode;
+ else if (fe->icbTag.fileType == ICBTAG_FILE_TYPE_DIRECTORY &&
+ sbi->s_dmode != -1)
+ inode->i_mode = sbi->s_dmode;
+ else
+ inode->i_mode = udf_convert_permissions(fe);
+ inode->i_mode &= ~sbi->s_umask;
if (iinfo->i_efe == 0) {
inode->i_blocks = le64_to_cpu(fe->logicalBlocksRecorded) <<
@@ -1395,7 +1403,7 @@ static int udf_update_inode(struct inode *inode, int do_sync)
bh = udf_tread(inode->i_sb,
udf_get_lb_pblock(inode->i_sb,
- iinfo->i_location, 0));
+ &iinfo->i_location, 0));
if (!bh) {
udf_debug("bread failure\n");
return -EIO;
@@ -1415,13 +1423,13 @@ static int udf_update_inode(struct inode *inode, int do_sync)
iinfo->i_ext.i_data, inode->i_sb->s_blocksize -
sizeof(struct unallocSpaceEntry));
crclen = sizeof(struct unallocSpaceEntry) +
- iinfo->i_lenAlloc - sizeof(tag);
+ iinfo->i_lenAlloc - sizeof(struct tag);
use->descTag.tagLocation = cpu_to_le32(
iinfo->i_location.
logicalBlockNum);
use->descTag.descCRCLength = cpu_to_le16(crclen);
use->descTag.descCRC = cpu_to_le16(crc_itu_t(0, (char *)use +
- sizeof(tag),
+ sizeof(struct tag),
crclen));
use->descTag.tagChecksum = udf_tag_checksum(&use->descTag);
@@ -1458,23 +1466,23 @@ static int udf_update_inode(struct inode *inode, int do_sync)
fe->informationLength = cpu_to_le64(inode->i_size);
if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
- regid *eid;
+ struct regid *eid;
struct deviceSpec *dsea =
(struct deviceSpec *)udf_get_extendedattr(inode, 12, 1);
if (!dsea) {
dsea = (struct deviceSpec *)
udf_add_extendedattr(inode,
sizeof(struct deviceSpec) +
- sizeof(regid), 12, 0x3);
+ sizeof(struct regid), 12, 0x3);
dsea->attrType = cpu_to_le32(12);
dsea->attrSubtype = 1;
dsea->attrLength = cpu_to_le32(
sizeof(struct deviceSpec) +
- sizeof(regid));
- dsea->impUseLength = cpu_to_le32(sizeof(regid));
+ sizeof(struct regid));
+ dsea->impUseLength = cpu_to_le32(sizeof(struct regid));
}
- eid = (regid *)dsea->impUse;
- memset(eid, 0, sizeof(regid));
+ eid = (struct regid *)dsea->impUse;
+ memset(eid, 0, sizeof(struct regid));
strcpy(eid->ident, UDF_ID_DEVELOPER);
eid->identSuffix[0] = UDF_OS_CLASS_UNIX;
eid->identSuffix[1] = UDF_OS_ID_LINUX;
@@ -1493,7 +1501,7 @@ static int udf_update_inode(struct inode *inode, int do_sync)
udf_time_to_disk_stamp(&fe->accessTime, inode->i_atime);
udf_time_to_disk_stamp(&fe->modificationTime, inode->i_mtime);
udf_time_to_disk_stamp(&fe->attrTime, inode->i_ctime);
- memset(&(fe->impIdent), 0, sizeof(regid));
+ memset(&(fe->impIdent), 0, sizeof(struct regid));
strcpy(fe->impIdent.ident, UDF_ID_DEVELOPER);
fe->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX;
fe->impIdent.identSuffix[1] = UDF_OS_ID_LINUX;
@@ -1532,7 +1540,7 @@ static int udf_update_inode(struct inode *inode, int do_sync)
udf_time_to_disk_stamp(&efe->createTime, iinfo->i_crtime);
udf_time_to_disk_stamp(&efe->attrTime, inode->i_ctime);
- memset(&(efe->impIdent), 0, sizeof(regid));
+ memset(&(efe->impIdent), 0, sizeof(struct regid));
strcpy(efe->impIdent.ident, UDF_ID_DEVELOPER);
efe->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX;
efe->impIdent.identSuffix[1] = UDF_OS_ID_LINUX;
@@ -1583,9 +1591,9 @@ static int udf_update_inode(struct inode *inode, int do_sync)
fe->descTag.tagLocation = cpu_to_le32(
iinfo->i_location.logicalBlockNum);
crclen += iinfo->i_lenEAttr + iinfo->i_lenAlloc -
- sizeof(tag);
+ sizeof(struct tag);
fe->descTag.descCRCLength = cpu_to_le16(crclen);
- fe->descTag.descCRC = cpu_to_le16(crc_itu_t(0, (char *)fe + sizeof(tag),
+ fe->descTag.descCRC = cpu_to_le16(crc_itu_t(0, (char *)fe + sizeof(struct tag),
crclen));
fe->descTag.tagChecksum = udf_tag_checksum(&fe->descTag);
@@ -1605,7 +1613,7 @@ static int udf_update_inode(struct inode *inode, int do_sync)
return err;
}
-struct inode *udf_iget(struct super_block *sb, kernel_lb_addr ino)
+struct inode *udf_iget(struct super_block *sb, struct kernel_lb_addr *ino)
{
unsigned long block = udf_get_lb_pblock(sb, ino, 0);
struct inode *inode = iget_locked(sb, block);
@@ -1614,7 +1622,7 @@ struct inode *udf_iget(struct super_block *sb, kernel_lb_addr ino)
return NULL;
if (inode->i_state & I_NEW) {
- memcpy(&UDF_I(inode)->i_location, &ino, sizeof(kernel_lb_addr));
+ memcpy(&UDF_I(inode)->i_location, ino, sizeof(struct kernel_lb_addr));
__udf_read_inode(inode);
unlock_new_inode(inode);
}
@@ -1622,10 +1630,10 @@ struct inode *udf_iget(struct super_block *sb, kernel_lb_addr ino)
if (is_bad_inode(inode))
goto out_iput;
- if (ino.logicalBlockNum >= UDF_SB(sb)->
- s_partmaps[ino.partitionReferenceNum].s_partition_len) {
+ if (ino->logicalBlockNum >= UDF_SB(sb)->
+ s_partmaps[ino->partitionReferenceNum].s_partition_len) {
udf_debug("block=%d, partition=%d out of range\n",
- ino.logicalBlockNum, ino.partitionReferenceNum);
+ ino->logicalBlockNum, ino->partitionReferenceNum);
make_bad_inode(inode);
goto out_iput;
}
@@ -1638,11 +1646,11 @@ struct inode *udf_iget(struct super_block *sb, kernel_lb_addr ino)
}
int8_t udf_add_aext(struct inode *inode, struct extent_position *epos,
- kernel_lb_addr eloc, uint32_t elen, int inc)
+ struct kernel_lb_addr *eloc, uint32_t elen, int inc)
{
int adsize;
- short_ad *sad = NULL;
- long_ad *lad = NULL;
+ struct short_ad *sad = NULL;
+ struct long_ad *lad = NULL;
struct allocExtDesc *aed;
int8_t etype;
uint8_t *ptr;
@@ -1656,9 +1664,9 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos,
ptr = epos->bh->b_data + epos->offset;
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
- adsize = sizeof(short_ad);
+ adsize = sizeof(struct short_ad);
else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
- adsize = sizeof(long_ad);
+ adsize = sizeof(struct long_ad);
else
return -1;
@@ -1666,7 +1674,7 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos,
char *sptr, *dptr;
struct buffer_head *nbh;
int err, loffset;
- kernel_lb_addr obloc = epos->block;
+ struct kernel_lb_addr obloc = epos->block;
epos->block.logicalBlockNum = udf_new_block(inode->i_sb, NULL,
obloc.partitionReferenceNum,
@@ -1674,7 +1682,7 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos,
if (!epos->block.logicalBlockNum)
return -1;
nbh = udf_tgetblk(inode->i_sb, udf_get_lb_pblock(inode->i_sb,
- epos->block,
+ &epos->block,
0));
if (!nbh)
return -1;
@@ -1711,20 +1719,20 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos,
}
if (UDF_SB(inode->i_sb)->s_udfrev >= 0x0200)
udf_new_tag(nbh->b_data, TAG_IDENT_AED, 3, 1,
- epos->block.logicalBlockNum, sizeof(tag));
+ epos->block.logicalBlockNum, sizeof(struct tag));
else
udf_new_tag(nbh->b_data, TAG_IDENT_AED, 2, 1,
- epos->block.logicalBlockNum, sizeof(tag));
+ epos->block.logicalBlockNum, sizeof(struct tag));
switch (iinfo->i_alloc_type) {
case ICBTAG_FLAG_AD_SHORT:
- sad = (short_ad *)sptr;
+ sad = (struct short_ad *)sptr;
sad->extLength = cpu_to_le32(EXT_NEXT_EXTENT_ALLOCDECS |
inode->i_sb->s_blocksize);
sad->extPosition =
cpu_to_le32(epos->block.logicalBlockNum);
break;
case ICBTAG_FLAG_AD_LONG:
- lad = (long_ad *)sptr;
+ lad = (struct long_ad *)sptr;
lad->extLength = cpu_to_le32(EXT_NEXT_EXTENT_ALLOCDECS |
inode->i_sb->s_blocksize);
lad->extLocation = cpu_to_lelb(epos->block);
@@ -1768,12 +1776,12 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos,
}
int8_t udf_write_aext(struct inode *inode, struct extent_position *epos,
- kernel_lb_addr eloc, uint32_t elen, int inc)
+ struct kernel_lb_addr *eloc, uint32_t elen, int inc)
{
int adsize;
uint8_t *ptr;
- short_ad *sad;
- long_ad *lad;
+ struct short_ad *sad;
+ struct long_ad *lad;
struct udf_inode_info *iinfo = UDF_I(inode);
if (!epos->bh)
@@ -1785,17 +1793,17 @@ int8_t udf_write_aext(struct inode *inode, struct extent_position *epos,
switch (iinfo->i_alloc_type) {
case ICBTAG_FLAG_AD_SHORT:
- sad = (short_ad *)ptr;
+ sad = (struct short_ad *)ptr;
sad->extLength = cpu_to_le32(elen);
- sad->extPosition = cpu_to_le32(eloc.logicalBlockNum);
- adsize = sizeof(short_ad);
+ sad->extPosition = cpu_to_le32(eloc->logicalBlockNum);
+ adsize = sizeof(struct short_ad);
break;
case ICBTAG_FLAG_AD_LONG:
- lad = (long_ad *)ptr;
+ lad = (struct long_ad *)ptr;
lad->extLength = cpu_to_le32(elen);
- lad->extLocation = cpu_to_lelb(eloc);
+ lad->extLocation = cpu_to_lelb(*eloc);
memset(lad->impUse, 0x00, sizeof(lad->impUse));
- adsize = sizeof(long_ad);
+ adsize = sizeof(struct long_ad);
break;
default:
return -1;
@@ -1822,7 +1830,7 @@ int8_t udf_write_aext(struct inode *inode, struct extent_position *epos,
}
int8_t udf_next_aext(struct inode *inode, struct extent_position *epos,
- kernel_lb_addr *eloc, uint32_t *elen, int inc)
+ struct kernel_lb_addr *eloc, uint32_t *elen, int inc)
{
int8_t etype;
@@ -1832,7 +1840,7 @@ int8_t udf_next_aext(struct inode *inode, struct extent_position *epos,
epos->block = *eloc;
epos->offset = sizeof(struct allocExtDesc);
brelse(epos->bh);
- block = udf_get_lb_pblock(inode->i_sb, epos->block, 0);
+ block = udf_get_lb_pblock(inode->i_sb, &epos->block, 0);
epos->bh = udf_tread(inode->i_sb, block);
if (!epos->bh) {
udf_debug("reading block %d failed!\n", block);
@@ -1844,13 +1852,13 @@ int8_t udf_next_aext(struct inode *inode, struct extent_position *epos,
}
int8_t udf_current_aext(struct inode *inode, struct extent_position *epos,
- kernel_lb_addr *eloc, uint32_t *elen, int inc)
+ struct kernel_lb_addr *eloc, uint32_t *elen, int inc)
{
int alen;
int8_t etype;
uint8_t *ptr;
- short_ad *sad;
- long_ad *lad;
+ struct short_ad *sad;
+ struct long_ad *lad;
struct udf_inode_info *iinfo = UDF_I(inode);
if (!epos->bh) {
@@ -1899,9 +1907,9 @@ int8_t udf_current_aext(struct inode *inode, struct extent_position *epos,
}
static int8_t udf_insert_aext(struct inode *inode, struct extent_position epos,
- kernel_lb_addr neloc, uint32_t nelen)
+ struct kernel_lb_addr neloc, uint32_t nelen)
{
- kernel_lb_addr oeloc;
+ struct kernel_lb_addr oeloc;
uint32_t oelen;
int8_t etype;
@@ -1909,18 +1917,18 @@ static int8_t udf_insert_aext(struct inode *inode, struct extent_position epos,
get_bh(epos.bh);
while ((etype = udf_next_aext(inode, &epos, &oeloc, &oelen, 0)) != -1) {
- udf_write_aext(inode, &epos, neloc, nelen, 1);
+ udf_write_aext(inode, &epos, &neloc, nelen, 1);
neloc = oeloc;
nelen = (etype << 30) | oelen;
}
- udf_add_aext(inode, &epos, neloc, nelen, 1);
+ udf_add_aext(inode, &epos, &neloc, nelen, 1);
brelse(epos.bh);
return (nelen >> 30);
}
int8_t udf_delete_aext(struct inode *inode, struct extent_position epos,
- kernel_lb_addr eloc, uint32_t elen)
+ struct kernel_lb_addr eloc, uint32_t elen)
{
struct extent_position oepos;
int adsize;
@@ -1935,9 +1943,9 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos,
iinfo = UDF_I(inode);
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
- adsize = sizeof(short_ad);
+ adsize = sizeof(struct short_ad);
else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
- adsize = sizeof(long_ad);
+ adsize = sizeof(struct long_ad);
else
adsize = 0;
@@ -1946,7 +1954,7 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos,
return -1;
while ((etype = udf_next_aext(inode, &epos, &eloc, &elen, 1)) != -1) {
- udf_write_aext(inode, &oepos, eloc, (etype << 30) | elen, 1);
+ udf_write_aext(inode, &oepos, &eloc, (etype << 30) | elen, 1);
if (oepos.bh != epos.bh) {
oepos.block = epos.block;
brelse(oepos.bh);
@@ -1955,13 +1963,13 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos,
oepos.offset = epos.offset - adsize;
}
}
- memset(&eloc, 0x00, sizeof(kernel_lb_addr));
+ memset(&eloc, 0x00, sizeof(struct kernel_lb_addr));
elen = 0;
if (epos.bh != oepos.bh) {
- udf_free_blocks(inode->i_sb, inode, epos.block, 0, 1);
- udf_write_aext(inode, &oepos, eloc, elen, 1);
- udf_write_aext(inode, &oepos, eloc, elen, 1);
+ udf_free_blocks(inode->i_sb, inode, &epos.block, 0, 1);
+ udf_write_aext(inode, &oepos, &eloc, elen, 1);
+ udf_write_aext(inode, &oepos, &eloc, elen, 1);
if (!oepos.bh) {
iinfo->i_lenAlloc -= (adsize * 2);
mark_inode_dirty(inode);
@@ -1978,7 +1986,7 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos,
mark_buffer_dirty_inode(oepos.bh, inode);
}
} else {
- udf_write_aext(inode, &oepos, eloc, elen, 1);
+ udf_write_aext(inode, &oepos, &eloc, elen, 1);
if (!oepos.bh) {
iinfo->i_lenAlloc -= adsize;
mark_inode_dirty(inode);
@@ -2003,7 +2011,7 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos,
}
int8_t inode_bmap(struct inode *inode, sector_t block,
- struct extent_position *pos, kernel_lb_addr *eloc,
+ struct extent_position *pos, struct kernel_lb_addr *eloc,
uint32_t *elen, sector_t *offset)
{
unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits;
@@ -2035,7 +2043,7 @@ int8_t inode_bmap(struct inode *inode, sector_t block,
long udf_block_map(struct inode *inode, sector_t block)
{
- kernel_lb_addr eloc;
+ struct kernel_lb_addr eloc;
uint32_t elen;
sector_t offset;
struct extent_position epos = {};
@@ -2045,7 +2053,7 @@ long udf_block_map(struct inode *inode, sector_t block)
if (inode_bmap(inode, block, &epos, &eloc, &elen, &offset) ==
(EXT_RECORDED_ALLOCATED >> 30))
- ret = udf_get_lb_pblock(inode->i_sb, eloc, offset);
+ ret = udf_get_lb_pblock(inode->i_sb, &eloc, offset);
else
ret = 0;
diff --git a/fs/udf/misc.c b/fs/udf/misc.c
index 84bf0fd4a4f1..9215700c00a4 100644
--- a/fs/udf/misc.c
+++ b/fs/udf/misc.c
@@ -134,10 +134,10 @@ struct genericFormat *udf_add_extendedattr(struct inode *inode, uint32_t size,
}
}
/* rewrite CRC + checksum of eahd */
- crclen = sizeof(struct extendedAttrHeaderDesc) - sizeof(tag);
+ crclen = sizeof(struct extendedAttrHeaderDesc) - sizeof(struct tag);
eahd->descTag.descCRCLength = cpu_to_le16(crclen);
eahd->descTag.descCRC = cpu_to_le16(crc_itu_t(0, (char *)eahd +
- sizeof(tag), crclen));
+ sizeof(struct tag), crclen));
eahd->descTag.tagChecksum = udf_tag_checksum(&eahd->descTag);
iinfo->i_lenEAttr += size;
return (struct genericFormat *)&ea[offset];
@@ -202,7 +202,7 @@ struct genericFormat *udf_get_extendedattr(struct inode *inode, uint32_t type,
struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block,
uint32_t location, uint16_t *ident)
{
- tag *tag_p;
+ struct tag *tag_p;
struct buffer_head *bh = NULL;
/* Read the block */
@@ -216,7 +216,7 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block,
return NULL;
}
- tag_p = (tag *)(bh->b_data);
+ tag_p = (struct tag *)(bh->b_data);
*ident = le16_to_cpu(tag_p->tagIdent);
@@ -241,9 +241,9 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block,
}
/* Verify the descriptor CRC */
- if (le16_to_cpu(tag_p->descCRCLength) + sizeof(tag) > sb->s_blocksize ||
+ if (le16_to_cpu(tag_p->descCRCLength) + sizeof(struct tag) > sb->s_blocksize ||
le16_to_cpu(tag_p->descCRC) == crc_itu_t(0,
- bh->b_data + sizeof(tag),
+ bh->b_data + sizeof(struct tag),
le16_to_cpu(tag_p->descCRCLength)))
return bh;
@@ -255,27 +255,28 @@ error_out:
return NULL;
}
-struct buffer_head *udf_read_ptagged(struct super_block *sb, kernel_lb_addr loc,
+struct buffer_head *udf_read_ptagged(struct super_block *sb,
+ struct kernel_lb_addr *loc,
uint32_t offset, uint16_t *ident)
{
return udf_read_tagged(sb, udf_get_lb_pblock(sb, loc, offset),
- loc.logicalBlockNum + offset, ident);
+ loc->logicalBlockNum + offset, ident);
}
void udf_update_tag(char *data, int length)
{
- tag *tptr = (tag *)data;
- length -= sizeof(tag);
+ struct tag *tptr = (struct tag *)data;
+ length -= sizeof(struct tag);
tptr->descCRCLength = cpu_to_le16(length);
- tptr->descCRC = cpu_to_le16(crc_itu_t(0, data + sizeof(tag), length));
+ tptr->descCRC = cpu_to_le16(crc_itu_t(0, data + sizeof(struct tag), length));
tptr->tagChecksum = udf_tag_checksum(tptr);
}
void udf_new_tag(char *data, uint16_t ident, uint16_t version, uint16_t snum,
uint32_t loc, int length)
{
- tag *tptr = (tag *)data;
+ struct tag *tptr = (struct tag *)data;
tptr->tagIdent = cpu_to_le16(ident);
tptr->descVersion = cpu_to_le16(version);
tptr->tagSerialNum = cpu_to_le16(snum);
@@ -283,12 +284,12 @@ void udf_new_tag(char *data, uint16_t ident, uint16_t version, uint16_t snum,
udf_update_tag(data, length);
}
-u8 udf_tag_checksum(const tag *t)
+u8 udf_tag_checksum(const struct tag *t)
{
u8 *data = (u8 *)t;
u8 checksum = 0;
int i;
- for (i = 0; i < sizeof(tag); ++i)
+ for (i = 0; i < sizeof(struct tag); ++i)
if (i != 4) /* position of checksum */
checksum += data[i];
return checksum;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 082409cd4b8a..6a29fa34c478 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -47,7 +47,7 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi,
struct fileIdentDesc *sfi, struct udf_fileident_bh *fibh,
uint8_t *impuse, uint8_t *fileident)
{
- uint16_t crclen = fibh->eoffset - fibh->soffset - sizeof(tag);
+ uint16_t crclen = fibh->eoffset - fibh->soffset - sizeof(struct tag);
uint16_t crc;
int offset;
uint16_t liu = le16_to_cpu(cfi->lengthOfImpUse);
@@ -99,18 +99,18 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi,
memset(fibh->ebh->b_data, 0x00, padlen + offset);
}
- crc = crc_itu_t(0, (uint8_t *)cfi + sizeof(tag),
- sizeof(struct fileIdentDesc) - sizeof(tag));
+ crc = crc_itu_t(0, (uint8_t *)cfi + sizeof(struct tag),
+ sizeof(struct fileIdentDesc) - sizeof(struct tag));
if (fibh->sbh == fibh->ebh) {
crc = crc_itu_t(crc, (uint8_t *)sfi->impUse,
- crclen + sizeof(tag) -
+ crclen + sizeof(struct tag) -
sizeof(struct fileIdentDesc));
} else if (sizeof(struct fileIdentDesc) >= -fibh->soffset) {
crc = crc_itu_t(crc, fibh->ebh->b_data +
sizeof(struct fileIdentDesc) +
fibh->soffset,
- crclen + sizeof(tag) -
+ crclen + sizeof(struct tag) -
sizeof(struct fileIdentDesc));
} else {
crc = crc_itu_t(crc, (uint8_t *)sfi->impUse,
@@ -154,7 +154,7 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
uint8_t lfi;
uint16_t liu;
loff_t size;
- kernel_lb_addr eloc;
+ struct kernel_lb_addr eloc;
uint32_t elen;
sector_t offset;
struct extent_position epos = {};
@@ -171,12 +171,12 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
if (inode_bmap(dir, f_pos >> dir->i_sb->s_blocksize_bits, &epos,
&eloc, &elen, &offset) != (EXT_RECORDED_ALLOCATED >> 30))
goto out_err;
- block = udf_get_lb_pblock(dir->i_sb, eloc, offset);
+ block = udf_get_lb_pblock(dir->i_sb, &eloc, offset);
if ((++offset << dir->i_sb->s_blocksize_bits) < elen) {
if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
- epos.offset -= sizeof(short_ad);
+ epos.offset -= sizeof(struct short_ad);
else if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
- epos.offset -= sizeof(long_ad);
+ epos.offset -= sizeof(struct long_ad);
} else
offset = 0;
@@ -268,7 +268,7 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
#ifdef UDF_RECOVERY
/* temporary shorthand for specifying files by inode number */
if (!strncmp(dentry->d_name.name, ".B=", 3)) {
- kernel_lb_addr lb = {
+ struct kernel_lb_addr lb = {
.logicalBlockNum = 0,
.partitionReferenceNum =
simple_strtoul(dentry->d_name.name + 3,
@@ -283,11 +283,14 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
#endif /* UDF_RECOVERY */
if (udf_find_entry(dir, &dentry->d_name, &fibh, &cfi)) {
+ struct kernel_lb_addr loc;
+
if (fibh.sbh != fibh.ebh)
brelse(fibh.ebh);
brelse(fibh.sbh);
- inode = udf_iget(dir->i_sb, lelb_to_cpu(cfi.icb.extLocation));
+ loc = lelb_to_cpu(cfi.icb.extLocation);
+ inode = udf_iget(dir->i_sb, &loc);
if (!inode) {
unlock_kernel();
return ERR_PTR(-EACCES);
@@ -313,7 +316,7 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir,
uint8_t lfi;
uint16_t liu;
int block;
- kernel_lb_addr eloc;
+ struct kernel_lb_addr eloc;
uint32_t elen = 0;
sector_t offset;
struct extent_position epos = {};
@@ -351,16 +354,16 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir,
if (inode_bmap(dir, f_pos >> dir->i_sb->s_blocksize_bits, &epos,
&eloc, &elen, &offset) != (EXT_RECORDED_ALLOCATED >> 30)) {
block = udf_get_lb_pblock(dir->i_sb,
- dinfo->i_location, 0);
+ &dinfo->i_location, 0);
fibh->soffset = fibh->eoffset = sb->s_blocksize;
goto add;
}
- block = udf_get_lb_pblock(dir->i_sb, eloc, offset);
+ block = udf_get_lb_pblock(dir->i_sb, &eloc, offset);
if ((++offset << dir->i_sb->s_blocksize_bits) < elen) {
if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
- epos.offset -= sizeof(short_ad);
+ epos.offset -= sizeof(struct short_ad);
else if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
- epos.offset -= sizeof(long_ad);
+ epos.offset -= sizeof(struct long_ad);
} else
offset = 0;
@@ -409,10 +412,10 @@ add:
if (dinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB && elen) {
elen = (elen + sb->s_blocksize - 1) & ~(sb->s_blocksize - 1);
if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
- epos.offset -= sizeof(short_ad);
+ epos.offset -= sizeof(struct short_ad);
else if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
- epos.offset -= sizeof(long_ad);
- udf_write_aext(dir, &epos, eloc, elen, 1);
+ epos.offset -= sizeof(struct long_ad);
+ udf_write_aext(dir, &epos, &eloc, elen, 1);
}
f_pos += nfidlen;
@@ -494,10 +497,10 @@ add:
memset(cfi, 0, sizeof(struct fileIdentDesc));
if (UDF_SB(sb)->s_udfrev >= 0x0200)
udf_new_tag((char *)cfi, TAG_IDENT_FID, 3, 1, block,
- sizeof(tag));
+ sizeof(struct tag));
else
udf_new_tag((char *)cfi, TAG_IDENT_FID, 2, 1, block,
- sizeof(tag));
+ sizeof(struct tag));
cfi->fileVersionNum = cpu_to_le16(1);
cfi->lengthFileIdent = namelen;
cfi->lengthOfImpUse = cpu_to_le16(0);
@@ -530,7 +533,7 @@ static int udf_delete_entry(struct inode *inode, struct fileIdentDesc *fi,
cfi->fileCharacteristics |= FID_FILE_CHAR_DELETED;
if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT))
- memset(&(cfi->icb), 0x00, sizeof(long_ad));
+ memset(&(cfi->icb), 0x00, sizeof(struct long_ad));
return udf_write_fi(inode, cfi, fi, fibh, NULL, NULL);
}
@@ -604,7 +607,7 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode,
goto out;
iinfo = UDF_I(inode);
- inode->i_uid = current->fsuid;
+ inode->i_uid = current_fsuid();
init_special_inode(inode, mode, rdev);
fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
if (!fi) {
@@ -710,7 +713,7 @@ static int empty_dir(struct inode *dir)
loff_t f_pos;
loff_t size = udf_ext0_offset(dir) + dir->i_size;
int block;
- kernel_lb_addr eloc;
+ struct kernel_lb_addr eloc;
uint32_t elen;
sector_t offset;
struct extent_position epos = {};
@@ -724,12 +727,12 @@ static int empty_dir(struct inode *dir)
else if (inode_bmap(dir, f_pos >> dir->i_sb->s_blocksize_bits,
&epos, &eloc, &elen, &offset) ==
(EXT_RECORDED_ALLOCATED >> 30)) {
- block = udf_get_lb_pblock(dir->i_sb, eloc, offset);
+ block = udf_get_lb_pblock(dir->i_sb, &eloc, offset);
if ((++offset << dir->i_sb->s_blocksize_bits) < elen) {
if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
- epos.offset -= sizeof(short_ad);
+ epos.offset -= sizeof(struct short_ad);
else if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
- epos.offset -= sizeof(long_ad);
+ epos.offset -= sizeof(struct long_ad);
} else
offset = 0;
@@ -778,7 +781,7 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry)
struct inode *inode = dentry->d_inode;
struct udf_fileident_bh fibh;
struct fileIdentDesc *fi, cfi;
- kernel_lb_addr tloc;
+ struct kernel_lb_addr tloc;
retval = -ENOENT;
lock_kernel();
@@ -788,7 +791,7 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry)
retval = -EIO;
tloc = lelb_to_cpu(cfi.icb.extLocation);
- if (udf_get_lb_pblock(dir->i_sb, tloc, 0) != inode->i_ino)
+ if (udf_get_lb_pblock(dir->i_sb, &tloc, 0) != inode->i_ino)
goto end_rmdir;
retval = -ENOTEMPTY;
if (!empty_dir(inode))
@@ -824,7 +827,7 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry)
struct udf_fileident_bh fibh;
struct fileIdentDesc *fi;
struct fileIdentDesc cfi;
- kernel_lb_addr tloc;
+ struct kernel_lb_addr tloc;
retval = -ENOENT;
lock_kernel();
@@ -834,7 +837,7 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry)
retval = -EIO;
tloc = lelb_to_cpu(cfi.icb.extLocation);
- if (udf_get_lb_pblock(dir->i_sb, tloc, 0) != inode->i_ino)
+ if (udf_get_lb_pblock(dir->i_sb, &tloc, 0) != inode->i_ino)
goto end_unlink;
if (!inode->i_nlink) {
@@ -897,7 +900,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
inode->i_op = &page_symlink_inode_operations;
if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
- kernel_lb_addr eloc;
+ struct kernel_lb_addr eloc;
uint32_t bsize;
block = udf_new_block(inode->i_sb, inode,
@@ -913,7 +916,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
iinfo->i_location.partitionReferenceNum;
bsize = inode->i_sb->s_blocksize;
iinfo->i_lenExtents = bsize;
- udf_add_aext(inode, &epos, eloc, bsize, 0);
+ udf_add_aext(inode, &epos, &eloc, bsize, 0);
brelse(epos.bh);
block = udf_get_pblock(inode->i_sb, block,
@@ -1108,7 +1111,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
struct fileIdentDesc ocfi, ncfi;
struct buffer_head *dir_bh = NULL;
int retval = -ENOENT;
- kernel_lb_addr tloc;
+ struct kernel_lb_addr tloc;
struct udf_inode_info *old_iinfo = UDF_I(old_inode);
lock_kernel();
@@ -1119,7 +1122,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
brelse(ofibh.sbh);
}
tloc = lelb_to_cpu(ocfi.icb.extLocation);
- if (!ofi || udf_get_lb_pblock(old_dir->i_sb, tloc, 0)
+ if (!ofi || udf_get_lb_pblock(old_dir->i_sb, &tloc, 0)
!= old_inode->i_ino)
goto end_rename;
@@ -1158,7 +1161,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
if (!dir_fi)
goto end_rename;
tloc = lelb_to_cpu(dir_fi->icb.extLocation);
- if (udf_get_lb_pblock(old_inode->i_sb, tloc, 0) !=
+ if (udf_get_lb_pblock(old_inode->i_sb, &tloc, 0) !=
old_dir->i_ino)
goto end_rename;
@@ -1187,7 +1190,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
*/
ncfi.fileVersionNum = ocfi.fileVersionNum;
ncfi.fileCharacteristics = ocfi.fileCharacteristics;
- memcpy(&(ncfi.icb), &(ocfi.icb), sizeof(long_ad));
+ memcpy(&(ncfi.icb), &(ocfi.icb), sizeof(struct long_ad));
udf_write_fi(new_dir, &ncfi, nfi, &nfibh, NULL, NULL);
/* The old fid may have moved - find it again */
@@ -1242,6 +1245,7 @@ end_rename:
static struct dentry *udf_get_parent(struct dentry *child)
{
+ struct kernel_lb_addr tloc;
struct inode *inode = NULL;
struct qstr dotdot = {.name = "..", .len = 2};
struct fileIdentDesc cfi;
@@ -1255,8 +1259,8 @@ static struct dentry *udf_get_parent(struct dentry *child)
brelse(fibh.ebh);
brelse(fibh.sbh);
- inode = udf_iget(child->d_inode->i_sb,
- lelb_to_cpu(cfi.icb.extLocation));
+ tloc = lelb_to_cpu(cfi.icb.extLocation);
+ inode = udf_iget(child->d_inode->i_sb, &tloc);
if (!inode)
goto out_unlock;
unlock_kernel();
@@ -1272,14 +1276,14 @@ static struct dentry *udf_nfs_get_inode(struct super_block *sb, u32 block,
u16 partref, __u32 generation)
{
struct inode *inode;
- kernel_lb_addr loc;
+ struct kernel_lb_addr loc;
if (block == 0)
return ERR_PTR(-ESTALE);
loc.logicalBlockNum = block;
loc.partitionReferenceNum = partref;
- inode = udf_iget(sb, loc);
+ inode = udf_iget(sb, &loc);
if (inode == NULL)
return ERR_PTR(-ENOMEM);
@@ -1318,7 +1322,7 @@ static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp,
{
int len = *lenp;
struct inode *inode = de->d_inode;
- kernel_lb_addr location = UDF_I(inode)->i_location;
+ struct kernel_lb_addr location = UDF_I(inode)->i_location;
struct fid *fid = (struct fid *)fh;
int type = FILEID_UDF_WITHOUT_PARENT;
diff --git a/fs/udf/osta_udf.h b/fs/udf/osta_udf.h
index 65ff47902bd2..fbff74654df2 100644
--- a/fs/udf/osta_udf.h
+++ b/fs/udf/osta_udf.h
@@ -85,7 +85,7 @@ struct appIdentSuffix {
/* Logical Volume Integrity Descriptor (UDF 2.50 2.2.6) */
/* Implementation Use (UDF 2.50 2.2.6.4) */
struct logicalVolIntegrityDescImpUse {
- regid impIdent;
+ struct regid impIdent;
__le32 numFiles;
__le32 numDirs;
__le16 minUDFReadRev;
@@ -97,12 +97,12 @@ struct logicalVolIntegrityDescImpUse {
/* Implementation Use Volume Descriptor (UDF 2.50 2.2.7) */
/* Implementation Use (UDF 2.50 2.2.7.2) */
struct impUseVolDescImpUse {
- charspec LVICharset;
+ struct charspec LVICharset;
dstring logicalVolIdent[128];
dstring LVInfo1[36];
dstring LVInfo2[36];
dstring LVInfo3[36];
- regid impIdent;
+ struct regid impIdent;
uint8_t impUse[128];
} __attribute__ ((packed));
@@ -110,7 +110,7 @@ struct udfPartitionMap2 {
uint8_t partitionMapType;
uint8_t partitionMapLength;
uint8_t reserved1[2];
- regid partIdent;
+ struct regid partIdent;
__le16 volSeqNum;
__le16 partitionNum;
} __attribute__ ((packed));
@@ -120,7 +120,7 @@ struct virtualPartitionMap {
uint8_t partitionMapType;
uint8_t partitionMapLength;
uint8_t reserved1[2];
- regid partIdent;
+ struct regid partIdent;
__le16 volSeqNum;
__le16 partitionNum;
uint8_t reserved2[24];
@@ -131,7 +131,7 @@ struct sparablePartitionMap {
uint8_t partitionMapType;
uint8_t partitionMapLength;
uint8_t reserved1[2];
- regid partIdent;
+ struct regid partIdent;
__le16 volSeqNum;
__le16 partitionNum;
__le16 packetLength;
@@ -146,7 +146,7 @@ struct metadataPartitionMap {
uint8_t partitionMapType;
uint8_t partitionMapLength;
uint8_t reserved1[2];
- regid partIdent;
+ struct regid partIdent;
__le16 volSeqNum;
__le16 partitionNum;
__le32 metadataFileLoc;
@@ -161,7 +161,7 @@ struct metadataPartitionMap {
/* Virtual Allocation Table (UDF 1.5 2.2.10) */
struct virtualAllocationTable15 {
__le32 VirtualSector[0];
- regid vatIdent;
+ struct regid vatIdent;
__le32 previousVATICBLoc;
} __attribute__ ((packed));
@@ -192,8 +192,8 @@ struct sparingEntry {
} __attribute__ ((packed));
struct sparingTable {
- tag descTag;
- regid sparingIdent;
+ struct tag descTag;
+ struct regid sparingIdent;
__le16 reallocationTableLen;
__le16 reserved;
__le32 sequenceNum;
@@ -206,7 +206,7 @@ struct sparingTable {
#define ICBTAG_FILE_TYPE_MIRROR 0xFB
#define ICBTAG_FILE_TYPE_BITMAP 0xFC
-/* struct long_ad ICB - ADImpUse (UDF 2.50 2.2.4.3) */
+/* struct struct long_ad ICB - ADImpUse (UDF 2.50 2.2.4.3) */
struct allocDescImpUse {
__le16 flags;
uint8_t impUse[4];
diff --git a/fs/udf/partition.c b/fs/udf/partition.c
index 96dfd207c3d6..4b540ee632d5 100644
--- a/fs/udf/partition.c
+++ b/fs/udf/partition.c
@@ -273,7 +273,7 @@ static uint32_t udf_try_read_meta(struct inode *inode, uint32_t block,
{
struct super_block *sb = inode->i_sb;
struct udf_part_map *map;
- kernel_lb_addr eloc;
+ struct kernel_lb_addr eloc;
uint32_t elen;
sector_t ext_offset;
struct extent_position epos = {};
diff --git a/fs/udf/super.c b/fs/udf/super.c
index e25e7010627b..0368bf60f42b 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -85,12 +85,12 @@ static void udf_write_super(struct super_block *);
static int udf_remount_fs(struct super_block *, int *, char *);
static int udf_check_valid(struct super_block *, int, int);
static int udf_vrs(struct super_block *sb, int silent);
-static void udf_load_logicalvolint(struct super_block *, kernel_extent_ad);
+static void udf_load_logicalvolint(struct super_block *, struct kernel_extent_ad);
static void udf_find_anchor(struct super_block *);
-static int udf_find_fileset(struct super_block *, kernel_lb_addr *,
- kernel_lb_addr *);
+static int udf_find_fileset(struct super_block *, struct kernel_lb_addr *,
+ struct kernel_lb_addr *);
static void udf_load_fileset(struct super_block *, struct buffer_head *,
- kernel_lb_addr *);
+ struct kernel_lb_addr *);
static void udf_open_lvid(struct super_block *);
static void udf_close_lvid(struct super_block *);
static unsigned int udf_count_free(struct super_block *);
@@ -201,6 +201,8 @@ struct udf_options {
mode_t umask;
gid_t gid;
uid_t uid;
+ mode_t fmode;
+ mode_t dmode;
struct nls_table *nls_map;
};
@@ -282,6 +284,10 @@ static int udf_show_options(struct seq_file *seq, struct vfsmount *mnt)
seq_printf(seq, ",gid=%u", sbi->s_gid);
if (sbi->s_umask != 0)
seq_printf(seq, ",umask=%o", sbi->s_umask);
+ if (sbi->s_fmode != -1)
+ seq_printf(seq, ",mode=%o", sbi->s_fmode);
+ if (sbi->s_dmode != -1)
+ seq_printf(seq, ",dmode=%o", sbi->s_dmode);
if (UDF_QUERY_FLAG(sb, UDF_FLAG_SESSION_SET))
seq_printf(seq, ",session=%u", sbi->s_session);
if (UDF_QUERY_FLAG(sb, UDF_FLAG_LASTBLOCK_SET))
@@ -317,6 +323,8 @@ static int udf_show_options(struct seq_file *seq, struct vfsmount *mnt)
*
* gid= Set the default group.
* umask= Set the default umask.
+ * mode= Set the default file permissions.
+ * dmode= Set the default directory permissions.
* uid= Set the default user.
* bs= Set the block size.
* unhide Show otherwise hidden files.
@@ -366,7 +374,8 @@ enum {
Opt_gid, Opt_uid, Opt_umask, Opt_session, Opt_lastblock,
Opt_anchor, Opt_volume, Opt_partition, Opt_fileset,
Opt_rootdir, Opt_utf8, Opt_iocharset,
- Opt_err, Opt_uforget, Opt_uignore, Opt_gforget, Opt_gignore
+ Opt_err, Opt_uforget, Opt_uignore, Opt_gforget, Opt_gignore,
+ Opt_fmode, Opt_dmode
};
static const match_table_t tokens = {
@@ -395,6 +404,8 @@ static const match_table_t tokens = {
{Opt_rootdir, "rootdir=%u"},
{Opt_utf8, "utf8"},
{Opt_iocharset, "iocharset=%s"},
+ {Opt_fmode, "mode=%o"},
+ {Opt_dmode, "dmode=%o"},
{Opt_err, NULL}
};
@@ -531,6 +542,16 @@ static int udf_parse_options(char *options, struct udf_options *uopt,
case Opt_gforget:
uopt->flags |= (1 << UDF_FLAG_GID_FORGET);
break;
+ case Opt_fmode:
+ if (match_octal(args, &option))
+ return 0;
+ uopt->fmode = option & 0777;
+ break;
+ case Opt_dmode:
+ if (match_octal(args, &option))
+ return 0;
+ uopt->dmode = option & 0777;
+ break;
default:
printk(KERN_ERR "udf: bad mount option \"%s\" "
"or missing value\n", p);
@@ -560,6 +581,8 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
uopt.uid = sbi->s_uid;
uopt.gid = sbi->s_gid;
uopt.umask = sbi->s_umask;
+ uopt.fmode = sbi->s_fmode;
+ uopt.dmode = sbi->s_dmode;
if (!udf_parse_options(options, &uopt, true))
return -EINVAL;
@@ -568,6 +591,8 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
sbi->s_uid = uopt.uid;
sbi->s_gid = uopt.gid;
sbi->s_umask = uopt.umask;
+ sbi->s_fmode = uopt.fmode;
+ sbi->s_dmode = uopt.dmode;
if (sbi->s_lvid_bh) {
int write_rev = le16_to_cpu(udf_sb_lvidiu(sbi)->minUDFWriteRev);
@@ -721,8 +746,6 @@ static sector_t udf_scan_anchors(struct super_block *sb, sector_t lastblock)
* however, if the disc isn't closed, it could be 512 */
for (i = 0; i < ARRAY_SIZE(last); i++) {
- if (last[i] < 0)
- continue;
if (last[i] >= sb->s_bdev->bd_inode->i_size >>
sb->s_blocksize_bits)
continue;
@@ -810,8 +833,8 @@ check_anchor:
}
static int udf_find_fileset(struct super_block *sb,
- kernel_lb_addr *fileset,
- kernel_lb_addr *root)
+ struct kernel_lb_addr *fileset,
+ struct kernel_lb_addr *root)
{
struct buffer_head *bh = NULL;
long lastblock;
@@ -820,7 +843,7 @@ static int udf_find_fileset(struct super_block *sb,
if (fileset->logicalBlockNum != 0xFFFFFFFF ||
fileset->partitionReferenceNum != 0xFFFF) {
- bh = udf_read_ptagged(sb, *fileset, 0, &ident);
+ bh = udf_read_ptagged(sb, fileset, 0, &ident);
if (!bh) {
return 1;
@@ -834,7 +857,7 @@ static int udf_find_fileset(struct super_block *sb,
sbi = UDF_SB(sb);
if (!bh) {
/* Search backwards through the partitions */
- kernel_lb_addr newfileset;
+ struct kernel_lb_addr newfileset;
/* --> cvg: FIXME - is it reasonable? */
return 1;
@@ -850,7 +873,7 @@ static int udf_find_fileset(struct super_block *sb,
newfileset.logicalBlockNum = 0;
do {
- bh = udf_read_ptagged(sb, newfileset, 0,
+ bh = udf_read_ptagged(sb, &newfileset, 0,
&ident);
if (!bh) {
newfileset.logicalBlockNum++;
@@ -902,14 +925,23 @@ static int udf_find_fileset(struct super_block *sb,
static int udf_load_pvoldesc(struct super_block *sb, sector_t block)
{
struct primaryVolDesc *pvoldesc;
- struct ustr instr;
- struct ustr outstr;
+ struct ustr *instr, *outstr;
struct buffer_head *bh;
uint16_t ident;
+ int ret = 1;
+
+ instr = kmalloc(sizeof(struct ustr), GFP_NOFS);
+ if (!instr)
+ return 1;
+
+ outstr = kmalloc(sizeof(struct ustr), GFP_NOFS);
+ if (!outstr)
+ goto out1;
bh = udf_read_tagged(sb, block, block, &ident);
if (!bh)
- return 1;
+ goto out2;
+
BUG_ON(ident != TAG_IDENT_PVD);
pvoldesc = (struct primaryVolDesc *)bh->b_data;
@@ -917,7 +949,7 @@ static int udf_load_pvoldesc(struct super_block *sb, sector_t block)
if (udf_disk_stamp_to_time(&UDF_SB(sb)->s_record_time,
pvoldesc->recordingDateAndTime)) {
#ifdef UDFFS_DEBUG
- timestamp *ts = &pvoldesc->recordingDateAndTime;
+ struct timestamp *ts = &pvoldesc->recordingDateAndTime;
udf_debug("recording time %04u/%02u/%02u"
" %02u:%02u (%x)\n",
le16_to_cpu(ts->year), ts->month, ts->day, ts->hour,
@@ -925,20 +957,25 @@ static int udf_load_pvoldesc(struct super_block *sb, sector_t block)
#endif
}
- if (!udf_build_ustr(&instr, pvoldesc->volIdent, 32))
- if (udf_CS0toUTF8(&outstr, &instr)) {
- strncpy(UDF_SB(sb)->s_volume_ident, outstr.u_name,
- outstr.u_len > 31 ? 31 : outstr.u_len);
+ if (!udf_build_ustr(instr, pvoldesc->volIdent, 32))
+ if (udf_CS0toUTF8(outstr, instr)) {
+ strncpy(UDF_SB(sb)->s_volume_ident, outstr->u_name,
+ outstr->u_len > 31 ? 31 : outstr->u_len);
udf_debug("volIdent[] = '%s'\n",
UDF_SB(sb)->s_volume_ident);
}
- if (!udf_build_ustr(&instr, pvoldesc->volSetIdent, 128))
- if (udf_CS0toUTF8(&outstr, &instr))
- udf_debug("volSetIdent[] = '%s'\n", outstr.u_name);
+ if (!udf_build_ustr(instr, pvoldesc->volSetIdent, 128))
+ if (udf_CS0toUTF8(outstr, instr))
+ udf_debug("volSetIdent[] = '%s'\n", outstr->u_name);
brelse(bh);
- return 0;
+ ret = 0;
+out2:
+ kfree(outstr);
+out1:
+ kfree(instr);
+ return ret;
}
static int udf_load_metadata_files(struct super_block *sb, int partition)
@@ -946,7 +983,7 @@ static int udf_load_metadata_files(struct super_block *sb, int partition)
struct udf_sb_info *sbi = UDF_SB(sb);
struct udf_part_map *map;
struct udf_meta_data *mdata;
- kernel_lb_addr addr;
+ struct kernel_lb_addr addr;
int fe_error = 0;
map = &sbi->s_partmaps[partition];
@@ -959,7 +996,7 @@ static int udf_load_metadata_files(struct super_block *sb, int partition)
udf_debug("Metadata file location: block = %d part = %d\n",
addr.logicalBlockNum, addr.partitionReferenceNum);
- mdata->s_metadata_fe = udf_iget(sb, addr);
+ mdata->s_metadata_fe = udf_iget(sb, &addr);
if (mdata->s_metadata_fe == NULL) {
udf_warning(sb, __func__, "metadata inode efe not found, "
@@ -981,7 +1018,7 @@ static int udf_load_metadata_files(struct super_block *sb, int partition)
udf_debug("Mirror metadata file location: block = %d part = %d\n",
addr.logicalBlockNum, addr.partitionReferenceNum);
- mdata->s_mirror_fe = udf_iget(sb, addr);
+ mdata->s_mirror_fe = udf_iget(sb, &addr);
if (mdata->s_mirror_fe == NULL) {
if (fe_error) {
@@ -1013,7 +1050,7 @@ static int udf_load_metadata_files(struct super_block *sb, int partition)
udf_debug("Bitmap file location: block = %d part = %d\n",
addr.logicalBlockNum, addr.partitionReferenceNum);
- mdata->s_bitmap_fe = udf_iget(sb, addr);
+ mdata->s_bitmap_fe = udf_iget(sb, &addr);
if (mdata->s_bitmap_fe == NULL) {
if (sb->s_flags & MS_RDONLY)
@@ -1037,7 +1074,7 @@ error_exit:
}
static void udf_load_fileset(struct super_block *sb, struct buffer_head *bh,
- kernel_lb_addr *root)
+ struct kernel_lb_addr *root)
{
struct fileSetDesc *fset;
@@ -1119,13 +1156,13 @@ static int udf_fill_partdesc_info(struct super_block *sb,
phd = (struct partitionHeaderDesc *)p->partitionContentsUse;
if (phd->unallocSpaceTable.extLength) {
- kernel_lb_addr loc = {
+ struct kernel_lb_addr loc = {
.logicalBlockNum = le32_to_cpu(
phd->unallocSpaceTable.extPosition),
.partitionReferenceNum = p_index,
};
- map->s_uspace.s_table = udf_iget(sb, loc);
+ map->s_uspace.s_table = udf_iget(sb, &loc);
if (!map->s_uspace.s_table) {
udf_debug("cannot load unallocSpaceTable (part %d)\n",
p_index);
@@ -1154,13 +1191,13 @@ static int udf_fill_partdesc_info(struct super_block *sb,
udf_debug("partitionIntegrityTable (part %d)\n", p_index);
if (phd->freedSpaceTable.extLength) {
- kernel_lb_addr loc = {
+ struct kernel_lb_addr loc = {
.logicalBlockNum = le32_to_cpu(
phd->freedSpaceTable.extPosition),
.partitionReferenceNum = p_index,
};
- map->s_fspace.s_table = udf_iget(sb, loc);
+ map->s_fspace.s_table = udf_iget(sb, &loc);
if (!map->s_fspace.s_table) {
udf_debug("cannot load freedSpaceTable (part %d)\n",
p_index);
@@ -1192,7 +1229,7 @@ static int udf_load_vat(struct super_block *sb, int p_index, int type1_index)
{
struct udf_sb_info *sbi = UDF_SB(sb);
struct udf_part_map *map = &sbi->s_partmaps[p_index];
- kernel_lb_addr ino;
+ struct kernel_lb_addr ino;
struct buffer_head *bh = NULL;
struct udf_inode_info *vati;
uint32_t pos;
@@ -1201,7 +1238,7 @@ static int udf_load_vat(struct super_block *sb, int p_index, int type1_index)
/* VAT file entry is in the last recorded block */
ino.partitionReferenceNum = type1_index;
ino.logicalBlockNum = sbi->s_last_block - map->s_partition_root;
- sbi->s_vat_inode = udf_iget(sb, ino);
+ sbi->s_vat_inode = udf_iget(sb, &ino);
if (!sbi->s_vat_inode)
return 1;
@@ -1322,7 +1359,7 @@ out_bh:
}
static int udf_load_logicalvol(struct super_block *sb, sector_t block,
- kernel_lb_addr *fileset)
+ struct kernel_lb_addr *fileset)
{
struct logicalVolDesc *lvd;
int i, j, offset;
@@ -1471,7 +1508,7 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
}
if (fileset) {
- long_ad *la = (long_ad *)&(lvd->logicalVolContentsUse[0]);
+ struct long_ad *la = (struct long_ad *)&(lvd->logicalVolContentsUse[0]);
*fileset = lelb_to_cpu(la->extLocation);
udf_debug("FileSet found in LogicalVolDesc at block=%d, "
@@ -1490,7 +1527,7 @@ out_bh:
* udf_load_logicalvolint
*
*/
-static void udf_load_logicalvolint(struct super_block *sb, kernel_extent_ad loc)
+static void udf_load_logicalvolint(struct super_block *sb, struct kernel_extent_ad loc)
{
struct buffer_head *bh = NULL;
uint16_t ident;
@@ -1533,7 +1570,7 @@ static void udf_load_logicalvolint(struct super_block *sb, kernel_extent_ad loc)
* Written, tested, and released.
*/
static noinline int udf_process_sequence(struct super_block *sb, long block,
- long lastblock, kernel_lb_addr *fileset)
+ long lastblock, struct kernel_lb_addr *fileset)
{
struct buffer_head *bh = NULL;
struct udf_vds_record vds[VDS_POS_LENGTH];
@@ -1678,7 +1715,7 @@ static int udf_check_valid(struct super_block *sb, int novrs, int silent)
return !block;
}
-static int udf_load_sequence(struct super_block *sb, kernel_lb_addr *fileset)
+static int udf_load_sequence(struct super_block *sb, struct kernel_lb_addr *fileset)
{
struct anchorVolDescPtr *anchor;
uint16_t ident;
@@ -1755,7 +1792,7 @@ static void udf_open_lvid(struct super_block *sb)
lvid->integrityType = LVID_INTEGRITY_TYPE_OPEN;
lvid->descTag.descCRC = cpu_to_le16(
- crc_itu_t(0, (char *)lvid + sizeof(tag),
+ crc_itu_t(0, (char *)lvid + sizeof(struct tag),
le16_to_cpu(lvid->descTag.descCRCLength)));
lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag);
@@ -1790,7 +1827,7 @@ static void udf_close_lvid(struct super_block *sb)
lvid->integrityType = cpu_to_le32(LVID_INTEGRITY_TYPE_CLOSE);
lvid->descTag.descCRC = cpu_to_le16(
- crc_itu_t(0, (char *)lvid + sizeof(tag),
+ crc_itu_t(0, (char *)lvid + sizeof(struct tag),
le16_to_cpu(lvid->descTag.descCRCLength)));
lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag);
@@ -1848,13 +1885,15 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
int i;
struct inode *inode = NULL;
struct udf_options uopt;
- kernel_lb_addr rootdir, fileset;
+ struct kernel_lb_addr rootdir, fileset;
struct udf_sb_info *sbi;
uopt.flags = (1 << UDF_FLAG_USE_AD_IN_ICB) | (1 << UDF_FLAG_STRICT);
uopt.uid = -1;
uopt.gid = -1;
uopt.umask = 0;
+ uopt.fmode = -1;
+ uopt.dmode = -1;
sbi = kzalloc(sizeof(struct udf_sb_info), GFP_KERNEL);
if (!sbi)
@@ -1892,6 +1931,8 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
sbi->s_uid = uopt.uid;
sbi->s_gid = uopt.gid;
sbi->s_umask = uopt.umask;
+ sbi->s_fmode = uopt.fmode;
+ sbi->s_dmode = uopt.dmode;
sbi->s_nls_map = uopt.nls_map;
/* Set the block size for all transfers */
@@ -1978,7 +2019,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
}
if (!silent) {
- timestamp ts;
+ struct timestamp ts;
udf_time_to_disk_stamp(&ts, sbi->s_record_time);
udf_info("UDF: Mounting volume '%s', "
"timestamp %04u/%02u/%02u %02u:%02u (%x)\n",
@@ -1991,7 +2032,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
/* Assign the root inode */
/* assign inodes by physical block number */
/* perhaps it's not extensible enough, but for now ... */
- inode = udf_iget(sb, rootdir);
+ inode = udf_iget(sb, &rootdir);
if (!inode) {
printk(KERN_ERR "UDF-fs: Error in udf_iget, block=%d, "
"partition=%d\n",
@@ -2114,7 +2155,7 @@ static unsigned int udf_count_free_bitmap(struct super_block *sb,
unsigned int accum = 0;
int index;
int block = 0, newblock;
- kernel_lb_addr loc;
+ struct kernel_lb_addr loc;
uint32_t bytes;
uint8_t *ptr;
uint16_t ident;
@@ -2124,7 +2165,7 @@ static unsigned int udf_count_free_bitmap(struct super_block *sb,
loc.logicalBlockNum = bitmap->s_extPosition;
loc.partitionReferenceNum = UDF_SB(sb)->s_partition;
- bh = udf_read_ptagged(sb, loc, 0, &ident);
+ bh = udf_read_ptagged(sb, &loc, 0, &ident);
if (!bh) {
printk(KERN_ERR "udf: udf_count_free failed\n");
@@ -2147,7 +2188,7 @@ static unsigned int udf_count_free_bitmap(struct super_block *sb,
bytes -= cur_bytes;
if (bytes) {
brelse(bh);
- newblock = udf_get_lb_pblock(sb, loc, ++block);
+ newblock = udf_get_lb_pblock(sb, &loc, ++block);
bh = udf_tread(sb, newblock);
if (!bh) {
udf_debug("read failed\n");
@@ -2170,7 +2211,7 @@ static unsigned int udf_count_free_table(struct super_block *sb,
{
unsigned int accum = 0;
uint32_t elen;
- kernel_lb_addr eloc;
+ struct kernel_lb_addr eloc;
int8_t etype;
struct extent_position epos;
diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
index 65e19b4f9424..225527cdc885 100644
--- a/fs/udf/truncate.c
+++ b/fs/udf/truncate.c
@@ -28,10 +28,10 @@
#include "udf_sb.h"
static void extent_trunc(struct inode *inode, struct extent_position *epos,
- kernel_lb_addr eloc, int8_t etype, uint32_t elen,
+ struct kernel_lb_addr *eloc, int8_t etype, uint32_t elen,
uint32_t nelen)
{
- kernel_lb_addr neloc = {};
+ struct kernel_lb_addr neloc = {};
int last_block = (elen + inode->i_sb->s_blocksize - 1) >>
inode->i_sb->s_blocksize_bits;
int first_block = (nelen + inode->i_sb->s_blocksize - 1) >>
@@ -43,12 +43,12 @@ static void extent_trunc(struct inode *inode, struct extent_position *epos,
last_block);
etype = (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30);
} else
- neloc = eloc;
+ neloc = *eloc;
nelen = (etype << 30) | nelen;
}
if (elen != nelen) {
- udf_write_aext(inode, epos, neloc, nelen, 0);
+ udf_write_aext(inode, epos, &neloc, nelen, 0);
if (last_block - first_block > 0) {
if (etype == (EXT_RECORDED_ALLOCATED >> 30))
mark_inode_dirty(inode);
@@ -68,7 +68,7 @@ static void extent_trunc(struct inode *inode, struct extent_position *epos,
void udf_truncate_tail_extent(struct inode *inode)
{
struct extent_position epos = {};
- kernel_lb_addr eloc;
+ struct kernel_lb_addr eloc;
uint32_t elen, nelen;
uint64_t lbcount = 0;
int8_t etype = -1, netype;
@@ -83,9 +83,9 @@ void udf_truncate_tail_extent(struct inode *inode)
return;
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
- adsize = sizeof(short_ad);
+ adsize = sizeof(struct short_ad);
else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
- adsize = sizeof(long_ad);
+ adsize = sizeof(struct long_ad);
else
BUG();
@@ -106,7 +106,7 @@ void udf_truncate_tail_extent(struct inode *inode)
(unsigned)elen);
nelen = elen - (lbcount - inode->i_size);
epos.offset -= adsize;
- extent_trunc(inode, &epos, eloc, etype, elen, nelen);
+ extent_trunc(inode, &epos, &eloc, etype, elen, nelen);
epos.offset += adsize;
if (udf_next_aext(inode, &epos, &eloc, &elen, 1) != -1)
printk(KERN_ERR "udf_truncate_tail_extent(): "
@@ -124,7 +124,7 @@ void udf_truncate_tail_extent(struct inode *inode)
void udf_discard_prealloc(struct inode *inode)
{
struct extent_position epos = { NULL, 0, {0, 0} };
- kernel_lb_addr eloc;
+ struct kernel_lb_addr eloc;
uint32_t elen;
uint64_t lbcount = 0;
int8_t etype = -1, netype;
@@ -136,9 +136,9 @@ void udf_discard_prealloc(struct inode *inode)
return;
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
- adsize = sizeof(short_ad);
+ adsize = sizeof(struct short_ad);
else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
- adsize = sizeof(long_ad);
+ adsize = sizeof(struct long_ad);
else
adsize = 0;
@@ -152,7 +152,7 @@ void udf_discard_prealloc(struct inode *inode)
if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) {
epos.offset -= adsize;
lbcount -= elen;
- extent_trunc(inode, &epos, eloc, etype, elen, 0);
+ extent_trunc(inode, &epos, &eloc, etype, elen, 0);
if (!epos.bh) {
iinfo->i_lenAlloc =
epos.offset -
@@ -200,7 +200,7 @@ static void udf_update_alloc_ext_desc(struct inode *inode,
void udf_truncate_extents(struct inode *inode)
{
struct extent_position epos;
- kernel_lb_addr eloc, neloc = {};
+ struct kernel_lb_addr eloc, neloc = {};
uint32_t elen, nelen = 0, indirect_ext_len = 0, lenalloc;
int8_t etype;
struct super_block *sb = inode->i_sb;
@@ -210,9 +210,9 @@ void udf_truncate_extents(struct inode *inode)
struct udf_inode_info *iinfo = UDF_I(inode);
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
- adsize = sizeof(short_ad);
+ adsize = sizeof(struct short_ad);
else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
- adsize = sizeof(long_ad);
+ adsize = sizeof(struct long_ad);
else
BUG();
@@ -221,7 +221,7 @@ void udf_truncate_extents(struct inode *inode)
(inode->i_size & (sb->s_blocksize - 1));
if (etype != -1) {
epos.offset -= adsize;
- extent_trunc(inode, &epos, eloc, etype, elen, byte_offset);
+ extent_trunc(inode, &epos, &eloc, etype, elen, byte_offset);
epos.offset += adsize;
if (byte_offset)
lenalloc = epos.offset;
@@ -236,12 +236,12 @@ void udf_truncate_extents(struct inode *inode)
while ((etype = udf_current_aext(inode, &epos, &eloc,
&elen, 0)) != -1) {
if (etype == (EXT_NEXT_EXTENT_ALLOCDECS >> 30)) {
- udf_write_aext(inode, &epos, neloc, nelen, 0);
+ udf_write_aext(inode, &epos, &neloc, nelen, 0);
if (indirect_ext_len) {
/* We managed to free all extents in the
* indirect extent - free it too */
BUG_ON(!epos.bh);
- udf_free_blocks(sb, inode, epos.block,
+ udf_free_blocks(sb, inode, &epos.block,
0, indirect_ext_len);
} else if (!epos.bh) {
iinfo->i_lenAlloc = lenalloc;
@@ -253,7 +253,7 @@ void udf_truncate_extents(struct inode *inode)
epos.offset = sizeof(struct allocExtDesc);
epos.block = eloc;
epos.bh = udf_tread(sb,
- udf_get_lb_pblock(sb, eloc, 0));
+ udf_get_lb_pblock(sb, &eloc, 0));
if (elen)
indirect_ext_len =
(elen + sb->s_blocksize - 1) >>
@@ -261,7 +261,7 @@ void udf_truncate_extents(struct inode *inode)
else
indirect_ext_len = 1;
} else {
- extent_trunc(inode, &epos, eloc, etype,
+ extent_trunc(inode, &epos, &eloc, etype,
elen, 0);
epos.offset += adsize;
}
@@ -269,7 +269,7 @@ void udf_truncate_extents(struct inode *inode)
if (indirect_ext_len) {
BUG_ON(!epos.bh);
- udf_free_blocks(sb, inode, epos.block, 0,
+ udf_free_blocks(sb, inode, &epos.block, 0,
indirect_ext_len);
} else if (!epos.bh) {
iinfo->i_lenAlloc = lenalloc;
@@ -278,7 +278,7 @@ void udf_truncate_extents(struct inode *inode)
udf_update_alloc_ext_desc(inode, &epos, lenalloc);
} else if (inode->i_size) {
if (byte_offset) {
- kernel_long_ad extent;
+ struct kernel_long_ad extent;
/*
* OK, there is not extent covering inode->i_size and
diff --git a/fs/udf/udf_i.h b/fs/udf/udf_i.h
index 4f86b1d98a5d..e58d1de41073 100644
--- a/fs/udf/udf_i.h
+++ b/fs/udf/udf_i.h
@@ -4,7 +4,7 @@
struct udf_inode_info {
struct timespec i_crtime;
/* Physical address of inode */
- kernel_lb_addr i_location;
+ struct kernel_lb_addr i_location;
__u64 i_unique;
__u32 i_lenEAttr;
__u32 i_lenAlloc;
@@ -17,8 +17,8 @@ struct udf_inode_info {
unsigned i_strat4096 : 1;
unsigned reserved : 26;
union {
- short_ad *i_sad;
- long_ad *i_lad;
+ struct short_ad *i_sad;
+ struct long_ad *i_lad;
__u8 *i_data;
} i_ext;
struct inode vfs_inode;
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h
index 1c1c514a9725..5d32c609fceb 100644
--- a/fs/udf/udf_sb.h
+++ b/fs/udf/udf_sb.h
@@ -123,6 +123,8 @@ struct udf_sb_info {
mode_t s_umask;
gid_t s_gid;
uid_t s_uid;
+ mode_t s_fmode;
+ mode_t s_dmode;
/* Root Info */
struct timespec s_record_time;
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 8ec865de5f13..9a2a9b61413e 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -62,10 +62,8 @@ static inline size_t udf_ext0_offset(struct inode *inode)
return 0;
}
-#define udf_get_lb_pblock(sb,loc,offset) udf_get_pblock((sb), (loc).logicalBlockNum, (loc).partitionReferenceNum, (offset))
-
/* computes tag checksum */
-u8 udf_tag_checksum(const tag *t);
+u8 udf_tag_checksum(const struct tag *t);
struct dentry;
struct inode;
@@ -95,7 +93,7 @@ struct udf_vds_record {
};
struct generic_desc {
- tag descTag;
+ struct tag descTag;
__le32 volDescSeqNum;
};
@@ -108,7 +106,7 @@ struct ustr {
struct extent_position {
struct buffer_head *bh;
uint32_t offset;
- kernel_lb_addr block;
+ struct kernel_lb_addr block;
};
/* super.c */
@@ -124,7 +122,7 @@ extern int udf_ioctl(struct inode *, struct file *, unsigned int,
unsigned long);
/* inode.c */
-extern struct inode *udf_iget(struct super_block *, kernel_lb_addr);
+extern struct inode *udf_iget(struct super_block *, struct kernel_lb_addr *);
extern int udf_sync_inode(struct inode *);
extern void udf_expand_file_adinicb(struct inode *, int, int *);
extern struct buffer_head *udf_expand_dir_adinicb(struct inode *, int *, int *);
@@ -136,19 +134,19 @@ extern void udf_clear_inode(struct inode *);
extern int udf_write_inode(struct inode *, int);
extern long udf_block_map(struct inode *, sector_t);
extern int udf_extend_file(struct inode *, struct extent_position *,
- kernel_long_ad *, sector_t);
+ struct kernel_long_ad *, sector_t);
extern int8_t inode_bmap(struct inode *, sector_t, struct extent_position *,
- kernel_lb_addr *, uint32_t *, sector_t *);
+ struct kernel_lb_addr *, uint32_t *, sector_t *);
extern int8_t udf_add_aext(struct inode *, struct extent_position *,
- kernel_lb_addr, uint32_t, int);
+ struct kernel_lb_addr *, uint32_t, int);
extern int8_t udf_write_aext(struct inode *, struct extent_position *,
- kernel_lb_addr, uint32_t, int);
+ struct kernel_lb_addr *, uint32_t, int);
extern int8_t udf_delete_aext(struct inode *, struct extent_position,
- kernel_lb_addr, uint32_t);
+ struct kernel_lb_addr, uint32_t);
extern int8_t udf_next_aext(struct inode *, struct extent_position *,
- kernel_lb_addr *, uint32_t *, int);
+ struct kernel_lb_addr *, uint32_t *, int);
extern int8_t udf_current_aext(struct inode *, struct extent_position *,
- kernel_lb_addr *, uint32_t *, int);
+ struct kernel_lb_addr *, uint32_t *, int);
/* misc.c */
extern struct buffer_head *udf_tgetblk(struct super_block *, int);
@@ -160,7 +158,7 @@ extern struct genericFormat *udf_get_extendedattr(struct inode *, uint32_t,
extern struct buffer_head *udf_read_tagged(struct super_block *, uint32_t,
uint32_t, uint16_t *);
extern struct buffer_head *udf_read_ptagged(struct super_block *,
- kernel_lb_addr, uint32_t,
+ struct kernel_lb_addr *, uint32_t,
uint16_t *);
extern void udf_update_tag(char *, int);
extern void udf_new_tag(char *, uint16_t, uint16_t, uint16_t, uint32_t, int);
@@ -182,6 +180,14 @@ extern uint32_t udf_get_pblock_meta25(struct super_block *, uint32_t, uint16_t,
uint32_t);
extern int udf_relocate_blocks(struct super_block *, long, long *);
+static inline uint32_t
+udf_get_lb_pblock(struct super_block *sb, struct kernel_lb_addr *loc,
+ uint32_t offset)
+{
+ return udf_get_pblock(sb, loc->logicalBlockNum,
+ loc->partitionReferenceNum, offset);
+}
+
/* unicode.c */
extern int udf_get_filename(struct super_block *, uint8_t *, uint8_t *, int);
extern int udf_put_filename(struct super_block *, const uint8_t *, uint8_t *,
@@ -200,7 +206,7 @@ extern void udf_truncate_extents(struct inode *);
/* balloc.c */
extern void udf_free_blocks(struct super_block *, struct inode *,
- kernel_lb_addr, uint32_t, uint32_t);
+ struct kernel_lb_addr *, uint32_t, uint32_t);
extern int udf_prealloc_blocks(struct super_block *, struct inode *, uint16_t,
uint32_t, uint32_t);
extern int udf_new_block(struct super_block *, struct inode *, uint16_t,
@@ -214,16 +220,16 @@ extern struct fileIdentDesc *udf_fileident_read(struct inode *, loff_t *,
struct udf_fileident_bh *,
struct fileIdentDesc *,
struct extent_position *,
- kernel_lb_addr *, uint32_t *,
+ struct kernel_lb_addr *, uint32_t *,
sector_t *);
extern struct fileIdentDesc *udf_get_fileident(void *buffer, int bufsize,
int *offset);
-extern long_ad *udf_get_filelongad(uint8_t *, int, uint32_t *, int);
-extern short_ad *udf_get_fileshortad(uint8_t *, int, uint32_t *, int);
+extern struct long_ad *udf_get_filelongad(uint8_t *, int, uint32_t *, int);
+extern struct short_ad *udf_get_fileshortad(uint8_t *, int, uint32_t *, int);
/* udftime.c */
extern struct timespec *udf_disk_stamp_to_time(struct timespec *dest,
- timestamp src);
-extern timestamp *udf_time_to_disk_stamp(timestamp *dest, struct timespec src);
+ struct timestamp src);
+extern struct timestamp *udf_time_to_disk_stamp(struct timestamp *dest, struct timespec src);
#endif /* __UDF_DECL_H */
diff --git a/fs/udf/udfend.h b/fs/udf/udfend.h
index 489f52fb428c..6a9f3a9cc428 100644
--- a/fs/udf/udfend.h
+++ b/fs/udf/udfend.h
@@ -4,9 +4,9 @@
#include <asm/byteorder.h>
#include <linux/string.h>
-static inline kernel_lb_addr lelb_to_cpu(lb_addr in)
+static inline struct kernel_lb_addr lelb_to_cpu(struct lb_addr in)
{
- kernel_lb_addr out;
+ struct kernel_lb_addr out;
out.logicalBlockNum = le32_to_cpu(in.logicalBlockNum);
out.partitionReferenceNum = le16_to_cpu(in.partitionReferenceNum);
@@ -14,9 +14,9 @@ static inline kernel_lb_addr lelb_to_cpu(lb_addr in)
return out;
}
-static inline lb_addr cpu_to_lelb(kernel_lb_addr in)
+static inline struct lb_addr cpu_to_lelb(struct kernel_lb_addr in)
{
- lb_addr out;
+ struct lb_addr out;
out.logicalBlockNum = cpu_to_le32(in.logicalBlockNum);
out.partitionReferenceNum = cpu_to_le16(in.partitionReferenceNum);
@@ -24,9 +24,9 @@ static inline lb_addr cpu_to_lelb(kernel_lb_addr in)
return out;
}
-static inline short_ad lesa_to_cpu(short_ad in)
+static inline struct short_ad lesa_to_cpu(struct short_ad in)
{
- short_ad out;
+ struct short_ad out;
out.extLength = le32_to_cpu(in.extLength);
out.extPosition = le32_to_cpu(in.extPosition);
@@ -34,9 +34,9 @@ static inline short_ad lesa_to_cpu(short_ad in)
return out;
}
-static inline short_ad cpu_to_lesa(short_ad in)
+static inline struct short_ad cpu_to_lesa(struct short_ad in)
{
- short_ad out;
+ struct short_ad out;
out.extLength = cpu_to_le32(in.extLength);
out.extPosition = cpu_to_le32(in.extPosition);
@@ -44,9 +44,9 @@ static inline short_ad cpu_to_lesa(short_ad in)
return out;
}
-static inline kernel_long_ad lela_to_cpu(long_ad in)
+static inline struct kernel_long_ad lela_to_cpu(struct long_ad in)
{
- kernel_long_ad out;
+ struct kernel_long_ad out;
out.extLength = le32_to_cpu(in.extLength);
out.extLocation = lelb_to_cpu(in.extLocation);
@@ -54,9 +54,9 @@ static inline kernel_long_ad lela_to_cpu(long_ad in)
return out;
}
-static inline long_ad cpu_to_lela(kernel_long_ad in)
+static inline struct long_ad cpu_to_lela(struct kernel_long_ad in)
{
- long_ad out;
+ struct long_ad out;
out.extLength = cpu_to_le32(in.extLength);
out.extLocation = cpu_to_lelb(in.extLocation);
@@ -64,9 +64,9 @@ static inline long_ad cpu_to_lela(kernel_long_ad in)
return out;
}
-static inline kernel_extent_ad leea_to_cpu(extent_ad in)
+static inline struct kernel_extent_ad leea_to_cpu(struct extent_ad in)
{
- kernel_extent_ad out;
+ struct kernel_extent_ad out;
out.extLength = le32_to_cpu(in.extLength);
out.extLocation = le32_to_cpu(in.extLocation);
diff --git a/fs/udf/udftime.c b/fs/udf/udftime.c
index 5f811655c9b5..b8c828c4d200 100644
--- a/fs/udf/udftime.c
+++ b/fs/udf/udftime.c
@@ -85,7 +85,8 @@ extern struct timezone sys_tz;
#define SECS_PER_HOUR (60 * 60)
#define SECS_PER_DAY (SECS_PER_HOUR * 24)
-struct timespec *udf_disk_stamp_to_time(struct timespec *dest, timestamp src)
+struct timespec *
+udf_disk_stamp_to_time(struct timespec *dest, struct timestamp src)
{
int yday;
u16 typeAndTimezone = le16_to_cpu(src.typeAndTimezone);
@@ -116,7 +117,8 @@ struct timespec *udf_disk_stamp_to_time(struct timespec *dest, timestamp src)
return dest;
}
-timestamp *udf_time_to_disk_stamp(timestamp *dest, struct timespec ts)
+struct timestamp *
+udf_time_to_disk_stamp(struct timestamp *dest, struct timespec ts)
{
long int days, rem, y;
const unsigned short int *ip;
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index 9fdf8c93c58e..a3bbdbde9f4b 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -324,34 +324,43 @@ try_again:
int udf_get_filename(struct super_block *sb, uint8_t *sname, uint8_t *dname,
int flen)
{
- struct ustr filename, unifilename;
- int len;
+ struct ustr *filename, *unifilename;
+ int len = 0;
- if (udf_build_ustr_exact(&unifilename, sname, flen))
+ filename = kmalloc(sizeof(struct ustr), GFP_NOFS);
+ if (!filename)
return 0;
+ unifilename = kmalloc(sizeof(struct ustr), GFP_NOFS);
+ if (!unifilename)
+ goto out1;
+
+ if (udf_build_ustr_exact(unifilename, sname, flen))
+ goto out2;
+
if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) {
- if (!udf_CS0toUTF8(&filename, &unifilename)) {
+ if (!udf_CS0toUTF8(filename, unifilename)) {
udf_debug("Failed in udf_get_filename: sname = %s\n",
sname);
- return 0;
+ goto out2;
}
} else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) {
- if (!udf_CS0toNLS(UDF_SB(sb)->s_nls_map, &filename,
- &unifilename)) {
+ if (!udf_CS0toNLS(UDF_SB(sb)->s_nls_map, filename,
+ unifilename)) {
udf_debug("Failed in udf_get_filename: sname = %s\n",
sname);
- return 0;
+ goto out2;
}
} else
- return 0;
-
- len = udf_translate_to_linux(dname, filename.u_name, filename.u_len,
- unifilename.u_name, unifilename.u_len);
- if (len)
- return len;
-
- return 0;
+ goto out2;
+
+ len = udf_translate_to_linux(dname, filename->u_name, filename->u_len,
+ unifilename->u_name, unifilename->u_len);
+out2:
+ kfree(unifilename);
+out1:
+ kfree(filename);
+ return len;
}
int udf_put_filename(struct super_block *sb, const uint8_t *sname,
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index ac181f6806a3..6f5dcf006096 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -304,13 +304,13 @@ cg_found:
inode->i_ino = cg * uspi->s_ipg + bit;
inode->i_mode = mode;
- inode->i_uid = current->fsuid;
+ inode->i_uid = current_fsuid();
if (dir->i_mode & S_ISGID) {
inode->i_gid = dir->i_gid;
if (S_ISDIR(mode))
inode->i_mode |= S_ISGID;
} else
- inode->i_gid = current->fsgid;
+ inode->i_gid = current_fsgid();
inode->i_blocks = 0;
inode->i_generation = 0;
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 737c9a425361..51b87de97f87 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -91,7 +91,8 @@ xfs-y += xfs_alloc.o \
xfs_dmops.o \
xfs_qmops.o
-xfs-$(CONFIG_XFS_TRACE) += xfs_dir2_trace.o
+xfs-$(CONFIG_XFS_TRACE) += xfs_btree_trace.o \
+ xfs_dir2_trace.o
# Objects in linux/
xfs-y += $(addprefix $(XFS_LINUX)/, \
@@ -106,6 +107,7 @@ xfs-y += $(addprefix $(XFS_LINUX)/, \
xfs_iops.o \
xfs_lrw.o \
xfs_super.o \
+ xfs_sync.o \
xfs_vnode.o \
xfs_xattr.o)
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index a44d68eb50b5..8fbc97df3609 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -191,7 +191,7 @@ xfs_setfilesize(
ip->i_d.di_size = isize;
ip->i_update_core = 1;
ip->i_update_size = 1;
- mark_inode_dirty_sync(ioend->io_inode);
+ xfs_mark_inode_dirty_sync(ip);
}
xfs_iunlock(ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h
index 652721ce0ea5..8c022cd0ad67 100644
--- a/fs/xfs/linux-2.6/xfs_cred.h
+++ b/fs/xfs/linux-2.6/xfs_cred.h
@@ -23,11 +23,9 @@
/*
* Credentials
*/
-typedef struct cred {
- /* EMPTY */
-} cred_t;
+typedef const struct cred cred_t;
-extern struct cred *sys_cred;
+extern cred_t *sys_cred;
/* this is a hack.. (assumes sys_cred is the only cred_t in the system) */
static inline int capable_cred(cred_t *cr, int cid)
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c
index ef90e64641e6..2ae8b1ccb02e 100644
--- a/fs/xfs/linux-2.6/xfs_globals.c
+++ b/fs/xfs/linux-2.6/xfs_globals.c
@@ -26,7 +26,6 @@
*/
xfs_param_t xfs_params = {
/* MIN DFLT MAX */
- .restrict_chown = { 0, 1, 1 },
.sgid_inherit = { 0, 0, 1 },
.symlink_mode = { 0, 0, 1 },
.panic_mask = { 0, 0, 255 },
@@ -43,10 +42,3 @@ xfs_param_t xfs_params = {
.inherit_nodfrg = { 0, 1, 1 },
.fstrm_timer = { 1, 30*100, 3600*100},
};
-
-/*
- * Global system credential structure.
- */
-static cred_t sys_cred_val;
-cred_t *sys_cred = &sys_cred_val;
-
diff --git a/fs/xfs/linux-2.6/xfs_globals.h b/fs/xfs/linux-2.6/xfs_globals.h
index 2770b0085ee8..69f71caf061c 100644
--- a/fs/xfs/linux-2.6/xfs_globals.h
+++ b/fs/xfs/linux-2.6/xfs_globals.h
@@ -19,6 +19,5 @@
#define __XFS_GLOBALS_H__
extern uint64_t xfs_panic_mask; /* set to cause more panics */
-extern struct cred *sys_cred;
#endif /* __XFS_GLOBALS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index d3438c72dcaf..010b35a9cf06 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -256,6 +256,7 @@ xfs_open_by_handle(
struct file *parfilp,
struct inode *parinode)
{
+ const struct cred *cred = current_cred();
int error;
int new_fd;
int permflag;
@@ -321,7 +322,7 @@ xfs_open_by_handle(
mntget(parfilp->f_path.mnt);
/* Create file pointer. */
- filp = dentry_open(dentry, parfilp->f_path.mnt, hreq.oflags);
+ filp = dentry_open(dentry, parfilp->f_path.mnt, hreq.oflags, cred);
if (IS_ERR(filp)) {
put_unused_fd(new_fd);
return -XFS_ERROR(-PTR_ERR(filp));
@@ -691,8 +692,7 @@ xfs_ioc_space(
if (ioflags & IO_INVIS)
attr_flags |= XFS_ATTR_DMI;
- error = xfs_change_file_space(ip, cmd, &bf, filp->f_pos,
- NULL, attr_flags);
+ error = xfs_change_file_space(ip, cmd, &bf, filp->f_pos, attr_flags);
return -error;
}
@@ -1007,7 +1007,7 @@ xfs_ioctl_setattr(
* to the file owner ID, except in cases where the
* CAP_FSETID capability is applicable.
*/
- if (current->fsuid != ip->i_d.di_uid && !capable(CAP_FOWNER)) {
+ if (current_fsuid() != ip->i_d.di_uid && !capable(CAP_FOWNER)) {
code = XFS_ERROR(EPERM);
goto error_return;
}
@@ -1104,10 +1104,6 @@ xfs_ioctl_setattr(
/*
* Change file ownership. Must be the owner or privileged.
- * If the system was configured with the "restricted_chown"
- * option, the owner is not permitted to give away the file,
- * and can change the group id only to a group of which he
- * or she is a member.
*/
if (mask & FSX_PROJID) {
/*
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 095d271f3434..69c98cf32336 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -64,14 +64,14 @@ xfs_synchronize_atime(
{
struct inode *inode = VFS_I(ip);
- if (inode) {
+ if (!(inode->i_state & I_CLEAR)) {
ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
}
}
/*
- * If the linux inode exists, mark it dirty.
+ * If the linux inode is valid, mark it dirty.
* Used when commiting a dirty inode into a transaction so that
* the inode will get written back by the linux code
*/
@@ -81,7 +81,7 @@ xfs_mark_inode_dirty_sync(
{
struct inode *inode = VFS_I(ip);
- if (inode)
+ if (!(inode->i_state & (I_WILL_FREE|I_FREEING|I_CLEAR)))
mark_inode_dirty_sync(inode);
}
@@ -128,7 +128,7 @@ xfs_ichgtime(
if (sync_it) {
SYNCHRONIZE();
ip->i_update_core = 1;
- mark_inode_dirty_sync(inode);
+ xfs_mark_inode_dirty_sync(ip);
}
}
@@ -601,7 +601,7 @@ xfs_vn_setattr(
struct dentry *dentry,
struct iattr *iattr)
{
- return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0, NULL);
+ return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0);
}
/*
@@ -642,7 +642,7 @@ xfs_vn_fallocate(
xfs_ilock(ip, XFS_IOLOCK_EXCL);
error = xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf,
- 0, NULL, XFS_ATTR_NOLOCK);
+ 0, XFS_ATTR_NOLOCK);
if (!error && !(mode & FALLOC_FL_KEEP_SIZE) &&
offset + len > i_size_read(inode))
new_size = offset + len;
@@ -653,7 +653,7 @@ xfs_vn_fallocate(
iattr.ia_valid = ATTR_SIZE;
iattr.ia_size = new_size;
- error = xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK, NULL);
+ error = xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK);
}
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
@@ -766,12 +766,20 @@ xfs_diflags_to_iflags(
* When reading existing inodes from disk this is called directly
* from xfs_iget, when creating a new inode it is called from
* xfs_ialloc after setting up the inode.
+ *
+ * We are always called with an uninitialised linux inode here.
+ * We need to initialise the necessary fields and take a reference
+ * on it.
*/
void
xfs_setup_inode(
struct xfs_inode *ip)
{
- struct inode *inode = ip->i_vnode;
+ struct inode *inode = &ip->i_vnode;
+
+ inode->i_ino = ip->i_ino;
+ inode->i_state = I_NEW|I_LOCK;
+ inode_add_to_lists(ip->i_mount->m_super, inode);
inode->i_mode = ip->i_d.di_mode;
inode->i_nlink = ip->i_d.di_nlink;
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index cc0f7b3a9795..77d6ddcaf547 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -77,6 +77,7 @@
#include <linux/spinlock.h>
#include <linux/random.h>
#include <linux/ctype.h>
+#include <linux/writeback.h>
#include <asm/page.h>
#include <asm/div64.h>
@@ -107,7 +108,6 @@
#undef HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
#endif
-#define restricted_chown xfs_params.restrict_chown.val
#define irix_sgid_inherit xfs_params.sgid_inherit.val
#define irix_symlink_mode xfs_params.symlink_mode.val
#define xfs_panic_mask xfs_params.panic_mask.val
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c
index 3d5b67c075c7..64f4ec90b8b2 100644
--- a/fs/xfs/linux-2.6/xfs_stats.c
+++ b/fs/xfs/linux-2.6/xfs_stats.c
@@ -53,6 +53,10 @@ xfs_read_xfsstats(
{ "icluster", XFSSTAT_END_INODE_CLUSTER },
{ "vnodes", XFSSTAT_END_VNODE_OPS },
{ "buf", XFSSTAT_END_BUF },
+ { "abtb2", XFSSTAT_END_ABTB_V2 },
+ { "abtc2", XFSSTAT_END_ABTC_V2 },
+ { "bmbt2", XFSSTAT_END_BMBT_V2 },
+ { "ibt2", XFSSTAT_END_IBT_V2 },
};
/* Loop over all stats groups */
diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h
index e83820febc9f..736854b1ca1a 100644
--- a/fs/xfs/linux-2.6/xfs_stats.h
+++ b/fs/xfs/linux-2.6/xfs_stats.h
@@ -118,6 +118,71 @@ struct xfsstats {
__uint32_t xb_page_retries;
__uint32_t xb_page_found;
__uint32_t xb_get_read;
+/* Version 2 btree counters */
+#define XFSSTAT_END_ABTB_V2 (XFSSTAT_END_BUF+15)
+ __uint32_t xs_abtb_2_lookup;
+ __uint32_t xs_abtb_2_compare;
+ __uint32_t xs_abtb_2_insrec;
+ __uint32_t xs_abtb_2_delrec;
+ __uint32_t xs_abtb_2_newroot;
+ __uint32_t xs_abtb_2_killroot;
+ __uint32_t xs_abtb_2_increment;
+ __uint32_t xs_abtb_2_decrement;
+ __uint32_t xs_abtb_2_lshift;
+ __uint32_t xs_abtb_2_rshift;
+ __uint32_t xs_abtb_2_split;
+ __uint32_t xs_abtb_2_join;
+ __uint32_t xs_abtb_2_alloc;
+ __uint32_t xs_abtb_2_free;
+ __uint32_t xs_abtb_2_moves;
+#define XFSSTAT_END_ABTC_V2 (XFSSTAT_END_ABTB_V2+15)
+ __uint32_t xs_abtc_2_lookup;
+ __uint32_t xs_abtc_2_compare;
+ __uint32_t xs_abtc_2_insrec;
+ __uint32_t xs_abtc_2_delrec;
+ __uint32_t xs_abtc_2_newroot;
+ __uint32_t xs_abtc_2_killroot;
+ __uint32_t xs_abtc_2_increment;
+ __uint32_t xs_abtc_2_decrement;
+ __uint32_t xs_abtc_2_lshift;
+ __uint32_t xs_abtc_2_rshift;
+ __uint32_t xs_abtc_2_split;
+ __uint32_t xs_abtc_2_join;
+ __uint32_t xs_abtc_2_alloc;
+ __uint32_t xs_abtc_2_free;
+ __uint32_t xs_abtc_2_moves;
+#define XFSSTAT_END_BMBT_V2 (XFSSTAT_END_ABTC_V2+15)
+ __uint32_t xs_bmbt_2_lookup;
+ __uint32_t xs_bmbt_2_compare;
+ __uint32_t xs_bmbt_2_insrec;
+ __uint32_t xs_bmbt_2_delrec;
+ __uint32_t xs_bmbt_2_newroot;
+ __uint32_t xs_bmbt_2_killroot;
+ __uint32_t xs_bmbt_2_increment;
+ __uint32_t xs_bmbt_2_decrement;
+ __uint32_t xs_bmbt_2_lshift;
+ __uint32_t xs_bmbt_2_rshift;
+ __uint32_t xs_bmbt_2_split;
+ __uint32_t xs_bmbt_2_join;
+ __uint32_t xs_bmbt_2_alloc;
+ __uint32_t xs_bmbt_2_free;
+ __uint32_t xs_bmbt_2_moves;
+#define XFSSTAT_END_IBT_V2 (XFSSTAT_END_BMBT_V2+15)
+ __uint32_t xs_ibt_2_lookup;
+ __uint32_t xs_ibt_2_compare;
+ __uint32_t xs_ibt_2_insrec;
+ __uint32_t xs_ibt_2_delrec;
+ __uint32_t xs_ibt_2_newroot;
+ __uint32_t xs_ibt_2_killroot;
+ __uint32_t xs_ibt_2_increment;
+ __uint32_t xs_ibt_2_decrement;
+ __uint32_t xs_ibt_2_lshift;
+ __uint32_t xs_ibt_2_rshift;
+ __uint32_t xs_ibt_2_split;
+ __uint32_t xs_ibt_2_join;
+ __uint32_t xs_ibt_2_alloc;
+ __uint32_t xs_ibt_2_free;
+ __uint32_t xs_ibt_2_moves;
/* Extra precision counters */
__uint64_t xs_xstrat_bytes;
__uint64_t xs_write_bytes;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 37ebe36056eb..c3d004bc4621 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -18,7 +18,6 @@
#include "xfs.h"
#include "xfs_bit.h"
#include "xfs_log.h"
-#include "xfs_clnt.h"
#include "xfs_inum.h"
#include "xfs_trans.h"
#include "xfs_sb.h"
@@ -36,6 +35,7 @@
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
+#include "xfs_btree_trace.h"
#include "xfs_ialloc.h"
#include "xfs_bmap.h"
#include "xfs_rtalloc.h"
@@ -58,6 +58,7 @@
#include "xfs_extfree_item.h"
#include "xfs_mru_cache.h"
#include "xfs_inode_item.h"
+#include "xfs_sync.h"
#include <linux/namei.h>
#include <linux/init.h>
@@ -70,36 +71,9 @@
static struct quotactl_ops xfs_quotactl_operations;
static struct super_operations xfs_super_operations;
-static kmem_zone_t *xfs_vnode_zone;
static kmem_zone_t *xfs_ioend_zone;
mempool_t *xfs_ioend_pool;
-STATIC struct xfs_mount_args *
-xfs_args_allocate(
- struct super_block *sb,
- int silent)
-{
- struct xfs_mount_args *args;
-
- args = kzalloc(sizeof(struct xfs_mount_args), GFP_KERNEL);
- if (!args)
- return NULL;
-
- args->logbufs = args->logbufsize = -1;
- strncpy(args->fsname, sb->s_id, MAXNAMELEN);
-
- /* Copy the already-parsed mount(2) flags we're interested in */
- if (sb->s_flags & MS_DIRSYNC)
- args->flags |= XFSMNT_DIRSYNC;
- if (sb->s_flags & MS_SYNCHRONOUS)
- args->flags |= XFSMNT_WSYNC;
- if (silent)
- args->flags |= XFSMNT_QUIET;
- args->flags |= XFSMNT_32BITINODES;
-
- return args;
-}
-
#define MNTOPT_LOGBUFS "logbufs" /* number of XFS log buffers */
#define MNTOPT_LOGBSIZE "logbsize" /* size of XFS log buffers */
#define MNTOPT_LOGDEV "logdev" /* log device */
@@ -188,26 +162,54 @@ suffix_strtoul(char *s, char **endp, unsigned int base)
return simple_strtoul((const char *)s, endp, base) << shift_left_factor;
}
+/*
+ * This function fills in xfs_mount_t fields based on mount args.
+ * Note: the superblock has _not_ yet been read in.
+ *
+ * Note that this function leaks the various device name allocations on
+ * failure. The caller takes care of them.
+ */
STATIC int
xfs_parseargs(
struct xfs_mount *mp,
char *options,
- struct xfs_mount_args *args,
- int update)
+ char **mtpt)
{
+ struct super_block *sb = mp->m_super;
char *this_char, *value, *eov;
- int dsunit, dswidth, vol_dsunit, vol_dswidth;
- int iosize;
+ int dsunit = 0;
+ int dswidth = 0;
+ int iosize = 0;
int dmapi_implies_ikeep = 1;
+ uchar_t iosizelog = 0;
- args->flags |= XFSMNT_BARRIER;
- args->flags2 |= XFSMNT2_COMPAT_IOSIZE;
+ /*
+ * Copy binary VFS mount flags we are interested in.
+ */
+ if (sb->s_flags & MS_RDONLY)
+ mp->m_flags |= XFS_MOUNT_RDONLY;
+ if (sb->s_flags & MS_DIRSYNC)
+ mp->m_flags |= XFS_MOUNT_DIRSYNC;
+ if (sb->s_flags & MS_SYNCHRONOUS)
+ mp->m_flags |= XFS_MOUNT_WSYNC;
+
+ /*
+ * Set some default flags that could be cleared by the mount option
+ * parsing.
+ */
+ mp->m_flags |= XFS_MOUNT_BARRIER;
+ mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
+ mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
+
+ /*
+ * These can be overridden by the mount option parsing.
+ */
+ mp->m_logbufs = -1;
+ mp->m_logbsize = -1;
if (!options)
goto done;
- iosize = dsunit = dswidth = vol_dsunit = vol_dswidth = 0;
-
while ((this_char = strsep(&options, ",")) != NULL) {
if (!*this_char)
continue;
@@ -221,7 +223,7 @@ xfs_parseargs(
this_char);
return EINVAL;
}
- args->logbufs = simple_strtoul(value, &eov, 10);
+ mp->m_logbufs = simple_strtoul(value, &eov, 10);
} else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
if (!value || !*value) {
cmn_err(CE_WARN,
@@ -229,7 +231,7 @@ xfs_parseargs(
this_char);
return EINVAL;
}
- args->logbufsize = suffix_strtoul(value, &eov, 10);
+ mp->m_logbsize = suffix_strtoul(value, &eov, 10);
} else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
if (!value || !*value) {
cmn_err(CE_WARN,
@@ -237,7 +239,9 @@ xfs_parseargs(
this_char);
return EINVAL;
}
- strncpy(args->logname, value, MAXNAMELEN);
+ mp->m_logname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
+ if (!mp->m_logname)
+ return ENOMEM;
} else if (!strcmp(this_char, MNTOPT_MTPT)) {
if (!value || !*value) {
cmn_err(CE_WARN,
@@ -245,7 +249,9 @@ xfs_parseargs(
this_char);
return EINVAL;
}
- strncpy(args->mtpt, value, MAXNAMELEN);
+ *mtpt = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
+ if (!*mtpt)
+ return ENOMEM;
} else if (!strcmp(this_char, MNTOPT_RTDEV)) {
if (!value || !*value) {
cmn_err(CE_WARN,
@@ -253,7 +259,9 @@ xfs_parseargs(
this_char);
return EINVAL;
}
- strncpy(args->rtname, value, MAXNAMELEN);
+ mp->m_rtname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
+ if (!mp->m_rtname)
+ return ENOMEM;
} else if (!strcmp(this_char, MNTOPT_BIOSIZE)) {
if (!value || !*value) {
cmn_err(CE_WARN,
@@ -262,8 +270,7 @@ xfs_parseargs(
return EINVAL;
}
iosize = simple_strtoul(value, &eov, 10);
- args->flags |= XFSMNT_IOSIZE;
- args->iosizelog = (uint8_t) iosize;
+ iosizelog = ffs(iosize) - 1;
} else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
if (!value || !*value) {
cmn_err(CE_WARN,
@@ -272,8 +279,7 @@ xfs_parseargs(
return EINVAL;
}
iosize = suffix_strtoul(value, &eov, 10);
- args->flags |= XFSMNT_IOSIZE;
- args->iosizelog = ffs(iosize) - 1;
+ iosizelog = ffs(iosize) - 1;
} else if (!strcmp(this_char, MNTOPT_GRPID) ||
!strcmp(this_char, MNTOPT_BSDGROUPS)) {
mp->m_flags |= XFS_MOUNT_GRPID;
@@ -281,23 +287,25 @@ xfs_parseargs(
!strcmp(this_char, MNTOPT_SYSVGROUPS)) {
mp->m_flags &= ~XFS_MOUNT_GRPID;
} else if (!strcmp(this_char, MNTOPT_WSYNC)) {
- args->flags |= XFSMNT_WSYNC;
+ mp->m_flags |= XFS_MOUNT_WSYNC;
} else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) {
- args->flags |= XFSMNT_OSYNCISOSYNC;
+ mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
} else if (!strcmp(this_char, MNTOPT_NORECOVERY)) {
- args->flags |= XFSMNT_NORECOVERY;
+ mp->m_flags |= XFS_MOUNT_NORECOVERY;
} else if (!strcmp(this_char, MNTOPT_INO64)) {
- args->flags |= XFSMNT_INO64;
-#if !XFS_BIG_INUMS
+#if XFS_BIG_INUMS
+ mp->m_flags |= XFS_MOUNT_INO64;
+ mp->m_inoadd = XFS_INO64_OFFSET;
+#else
cmn_err(CE_WARN,
"XFS: %s option not allowed on this system",
this_char);
return EINVAL;
#endif
} else if (!strcmp(this_char, MNTOPT_NOALIGN)) {
- args->flags |= XFSMNT_NOALIGN;
+ mp->m_flags |= XFS_MOUNT_NOALIGN;
} else if (!strcmp(this_char, MNTOPT_SWALLOC)) {
- args->flags |= XFSMNT_SWALLOC;
+ mp->m_flags |= XFS_MOUNT_SWALLOC;
} else if (!strcmp(this_char, MNTOPT_SUNIT)) {
if (!value || !*value) {
cmn_err(CE_WARN,
@@ -315,7 +323,7 @@ xfs_parseargs(
}
dswidth = simple_strtoul(value, &eov, 10);
} else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
- args->flags &= ~XFSMNT_32BITINODES;
+ mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
#if !XFS_BIG_INUMS
cmn_err(CE_WARN,
"XFS: %s option not allowed on this system",
@@ -323,56 +331,61 @@ xfs_parseargs(
return EINVAL;
#endif
} else if (!strcmp(this_char, MNTOPT_NOUUID)) {
- args->flags |= XFSMNT_NOUUID;
+ mp->m_flags |= XFS_MOUNT_NOUUID;
} else if (!strcmp(this_char, MNTOPT_BARRIER)) {
- args->flags |= XFSMNT_BARRIER;
+ mp->m_flags |= XFS_MOUNT_BARRIER;
} else if (!strcmp(this_char, MNTOPT_NOBARRIER)) {
- args->flags &= ~XFSMNT_BARRIER;
+ mp->m_flags &= ~XFS_MOUNT_BARRIER;
} else if (!strcmp(this_char, MNTOPT_IKEEP)) {
- args->flags |= XFSMNT_IKEEP;
+ mp->m_flags |= XFS_MOUNT_IKEEP;
} else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
dmapi_implies_ikeep = 0;
- args->flags &= ~XFSMNT_IKEEP;
+ mp->m_flags &= ~XFS_MOUNT_IKEEP;
} else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
- args->flags2 &= ~XFSMNT2_COMPAT_IOSIZE;
+ mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE;
} else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
- args->flags2 |= XFSMNT2_COMPAT_IOSIZE;
+ mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
} else if (!strcmp(this_char, MNTOPT_ATTR2)) {
- args->flags |= XFSMNT_ATTR2;
+ mp->m_flags |= XFS_MOUNT_ATTR2;
} else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
- args->flags &= ~XFSMNT_ATTR2;
- args->flags |= XFSMNT_NOATTR2;
+ mp->m_flags &= ~XFS_MOUNT_ATTR2;
+ mp->m_flags |= XFS_MOUNT_NOATTR2;
} else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
- args->flags2 |= XFSMNT2_FILESTREAMS;
+ mp->m_flags |= XFS_MOUNT_FILESTREAMS;
} else if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
- args->flags &= ~(XFSMNT_UQUOTAENF|XFSMNT_UQUOTA);
- args->flags &= ~(XFSMNT_GQUOTAENF|XFSMNT_GQUOTA);
+ mp->m_qflags &= ~(XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
+ XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
+ XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
+ XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD);
} else if (!strcmp(this_char, MNTOPT_QUOTA) ||
!strcmp(this_char, MNTOPT_UQUOTA) ||
!strcmp(this_char, MNTOPT_USRQUOTA)) {
- args->flags |= XFSMNT_UQUOTA | XFSMNT_UQUOTAENF;
+ mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
+ XFS_UQUOTA_ENFD);
} else if (!strcmp(this_char, MNTOPT_QUOTANOENF) ||
!strcmp(this_char, MNTOPT_UQUOTANOENF)) {
- args->flags |= XFSMNT_UQUOTA;
- args->flags &= ~XFSMNT_UQUOTAENF;
+ mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
+ mp->m_qflags &= ~XFS_UQUOTA_ENFD;
} else if (!strcmp(this_char, MNTOPT_PQUOTA) ||
!strcmp(this_char, MNTOPT_PRJQUOTA)) {
- args->flags |= XFSMNT_PQUOTA | XFSMNT_PQUOTAENF;
+ mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
+ XFS_OQUOTA_ENFD);
} else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) {
- args->flags |= XFSMNT_PQUOTA;
- args->flags &= ~XFSMNT_PQUOTAENF;
+ mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
+ mp->m_qflags &= ~XFS_OQUOTA_ENFD;
} else if (!strcmp(this_char, MNTOPT_GQUOTA) ||
!strcmp(this_char, MNTOPT_GRPQUOTA)) {
- args->flags |= XFSMNT_GQUOTA | XFSMNT_GQUOTAENF;
+ mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
+ XFS_OQUOTA_ENFD);
} else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
- args->flags |= XFSMNT_GQUOTA;
- args->flags &= ~XFSMNT_GQUOTAENF;
+ mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
+ mp->m_qflags &= ~XFS_OQUOTA_ENFD;
} else if (!strcmp(this_char, MNTOPT_DMAPI)) {
- args->flags |= XFSMNT_DMAPI;
+ mp->m_flags |= XFS_MOUNT_DMAPI;
} else if (!strcmp(this_char, MNTOPT_XDSM)) {
- args->flags |= XFSMNT_DMAPI;
+ mp->m_flags |= XFS_MOUNT_DMAPI;
} else if (!strcmp(this_char, MNTOPT_DMI)) {
- args->flags |= XFSMNT_DMAPI;
+ mp->m_flags |= XFS_MOUNT_DMAPI;
} else if (!strcmp(this_char, "ihashsize")) {
cmn_err(CE_WARN,
"XFS: ihashsize no longer used, option is deprecated.");
@@ -390,27 +403,29 @@ xfs_parseargs(
}
}
- if (args->flags & XFSMNT_NORECOVERY) {
- if ((mp->m_flags & XFS_MOUNT_RDONLY) == 0) {
- cmn_err(CE_WARN,
- "XFS: no-recovery mounts must be read-only.");
- return EINVAL;
- }
+ /*
+ * no recovery flag requires a read-only mount
+ */
+ if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
+ !(mp->m_flags & XFS_MOUNT_RDONLY)) {
+ cmn_err(CE_WARN, "XFS: no-recovery mounts must be read-only.");
+ return EINVAL;
}
- if ((args->flags & XFSMNT_NOALIGN) && (dsunit || dswidth)) {
+ if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) {
cmn_err(CE_WARN,
"XFS: sunit and swidth options incompatible with the noalign option");
return EINVAL;
}
- if ((args->flags & XFSMNT_GQUOTA) && (args->flags & XFSMNT_PQUOTA)) {
+ if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
+ (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) {
cmn_err(CE_WARN,
"XFS: cannot mount with both project and group quota");
return EINVAL;
}
- if ((args->flags & XFSMNT_DMAPI) && *args->mtpt == '\0') {
+ if ((mp->m_flags & XFS_MOUNT_DMAPI) && (!*mtpt || *mtpt[0] == '\0')) {
printk("XFS: %s option needs the mount point option as well\n",
MNTOPT_DMAPI);
return EINVAL;
@@ -438,27 +453,66 @@ xfs_parseargs(
* Note that if "ikeep" or "noikeep" mount options are
* supplied, then they are honored.
*/
- if ((args->flags & XFSMNT_DMAPI) && dmapi_implies_ikeep)
- args->flags |= XFSMNT_IKEEP;
+ if ((mp->m_flags & XFS_MOUNT_DMAPI) && dmapi_implies_ikeep)
+ mp->m_flags |= XFS_MOUNT_IKEEP;
- if ((args->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
+done:
+ if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) {
+ /*
+ * At this point the superblock has not been read
+ * in, therefore we do not know the block size.
+ * Before the mount call ends we will convert
+ * these to FSBs.
+ */
if (dsunit) {
- args->sunit = dsunit;
- args->flags |= XFSMNT_RETERR;
- } else {
- args->sunit = vol_dsunit;
+ mp->m_dalign = dsunit;
+ mp->m_flags |= XFS_MOUNT_RETERR;
}
- dswidth ? (args->swidth = dswidth) :
- (args->swidth = vol_dswidth);
- } else {
- args->sunit = args->swidth = 0;
+
+ if (dswidth)
+ mp->m_swidth = dswidth;
+ }
+
+ if (mp->m_logbufs != -1 &&
+ mp->m_logbufs != 0 &&
+ (mp->m_logbufs < XLOG_MIN_ICLOGS ||
+ mp->m_logbufs > XLOG_MAX_ICLOGS)) {
+ cmn_err(CE_WARN,
+ "XFS: invalid logbufs value: %d [not %d-%d]",
+ mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
+ return XFS_ERROR(EINVAL);
+ }
+ if (mp->m_logbsize != -1 &&
+ mp->m_logbsize != 0 &&
+ (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
+ mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
+ !is_power_of_2(mp->m_logbsize))) {
+ cmn_err(CE_WARN,
+ "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
+ mp->m_logbsize);
+ return XFS_ERROR(EINVAL);
+ }
+
+ mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL);
+ if (!mp->m_fsname)
+ return ENOMEM;
+ mp->m_fsname_len = strlen(mp->m_fsname) + 1;
+
+ if (iosizelog) {
+ if (iosizelog > XFS_MAX_IO_LOG ||
+ iosizelog < XFS_MIN_IO_LOG) {
+ cmn_err(CE_WARN,
+ "XFS: invalid log iosize: %d [not %d-%d]",
+ iosizelog, XFS_MIN_IO_LOG,
+ XFS_MAX_IO_LOG);
+ return XFS_ERROR(EINVAL);
+ }
+
+ mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
+ mp->m_readio_log = iosizelog;
+ mp->m_writeio_log = iosizelog;
}
-done:
- if (args->flags & XFSMNT_32BITINODES)
- mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
- if (args->flags2)
- args->flags |= XFSMNT_FLAGS2;
return 0;
}
@@ -704,8 +758,7 @@ xfs_close_devices(
*/
STATIC int
xfs_open_devices(
- struct xfs_mount *mp,
- struct xfs_mount_args *args)
+ struct xfs_mount *mp)
{
struct block_device *ddev = mp->m_super->s_bdev;
struct block_device *logdev = NULL, *rtdev = NULL;
@@ -714,14 +767,14 @@ xfs_open_devices(
/*
* Open real time and log devices - order is important.
*/
- if (args->logname[0]) {
- error = xfs_blkdev_get(mp, args->logname, &logdev);
+ if (mp->m_logname) {
+ error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
if (error)
goto out;
}
- if (args->rtname[0]) {
- error = xfs_blkdev_get(mp, args->rtname, &rtdev);
+ if (mp->m_rtname) {
+ error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev);
if (error)
goto out_close_logdev;
@@ -813,18 +866,18 @@ xfs_setup_devices(
*/
void
xfsaild_wakeup(
- xfs_mount_t *mp,
+ struct xfs_ail *ailp,
xfs_lsn_t threshold_lsn)
{
- mp->m_ail.xa_target = threshold_lsn;
- wake_up_process(mp->m_ail.xa_task);
+ ailp->xa_target = threshold_lsn;
+ wake_up_process(ailp->xa_task);
}
int
xfsaild(
void *data)
{
- xfs_mount_t *mp = (xfs_mount_t *)data;
+ struct xfs_ail *ailp = data;
xfs_lsn_t last_pushed_lsn = 0;
long tout = 0;
@@ -836,11 +889,11 @@ xfsaild(
/* swsusp */
try_to_freeze();
- ASSERT(mp->m_log);
- if (XFS_FORCED_SHUTDOWN(mp))
+ ASSERT(ailp->xa_mount->m_log);
+ if (XFS_FORCED_SHUTDOWN(ailp->xa_mount))
continue;
- tout = xfsaild_push(mp, &last_pushed_lsn);
+ tout = xfsaild_push(ailp, &last_pushed_lsn);
}
return 0;
@@ -848,43 +901,82 @@ xfsaild(
int
xfsaild_start(
- xfs_mount_t *mp)
+ struct xfs_ail *ailp)
{
- mp->m_ail.xa_target = 0;
- mp->m_ail.xa_task = kthread_run(xfsaild, mp, "xfsaild");
- if (IS_ERR(mp->m_ail.xa_task))
- return -PTR_ERR(mp->m_ail.xa_task);
+ ailp->xa_target = 0;
+ ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild");
+ if (IS_ERR(ailp->xa_task))
+ return -PTR_ERR(ailp->xa_task);
return 0;
}
void
xfsaild_stop(
- xfs_mount_t *mp)
+ struct xfs_ail *ailp)
{
- kthread_stop(mp->m_ail.xa_task);
+ kthread_stop(ailp->xa_task);
}
-
+/* Catch misguided souls that try to use this interface on XFS */
STATIC struct inode *
xfs_fs_alloc_inode(
struct super_block *sb)
{
- return kmem_zone_alloc(xfs_vnode_zone, KM_SLEEP);
+ BUG();
+ return NULL;
}
+/*
+ * Now that the generic code is guaranteed not to be accessing
+ * the linux inode, we can reclaim the inode.
+ */
STATIC void
xfs_fs_destroy_inode(
- struct inode *inode)
+ struct inode *inode)
{
- kmem_zone_free(xfs_vnode_zone, inode);
+ xfs_inode_t *ip = XFS_I(inode);
+
+ XFS_STATS_INC(vn_reclaim);
+ if (xfs_reclaim(ip))
+ panic("%s: cannot reclaim 0x%p\n", __func__, inode);
}
+/*
+ * Slab object creation initialisation for the XFS inode.
+ * This covers only the idempotent fields in the XFS inode;
+ * all other fields need to be initialised on allocation
+ * from the slab. This avoids the need to repeatedly intialise
+ * fields in the xfs inode that left in the initialise state
+ * when freeing the inode.
+ */
STATIC void
xfs_fs_inode_init_once(
- void *vnode)
+ void *inode)
{
- inode_init_once((struct inode *)vnode);
+ struct xfs_inode *ip = inode;
+
+ memset(ip, 0, sizeof(struct xfs_inode));
+
+ /* vfs inode */
+ inode_init_once(VFS_I(ip));
+
+ /* xfs inode */
+ atomic_set(&ip->i_iocount, 0);
+ atomic_set(&ip->i_pincount, 0);
+ spin_lock_init(&ip->i_flags_lock);
+ init_waitqueue_head(&ip->i_ipin_wait);
+ /*
+ * Because we want to use a counting completion, complete
+ * the flush completion once to allow a single access to
+ * the flush completion without blocking.
+ */
+ init_completion(&ip->i_flush);
+ complete(&ip->i_flush);
+
+ mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
+ "xfsino", ip->i_ino);
+ mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
}
/*
@@ -912,7 +1004,7 @@ xfs_fs_write_inode(
* it dirty again so we'll try again later.
*/
if (error)
- mark_inode_dirty_sync(inode);
+ xfs_mark_inode_dirty_sync(XFS_I(inode));
return -error;
}
@@ -923,164 +1015,13 @@ xfs_fs_clear_inode(
{
xfs_inode_t *ip = XFS_I(inode);
- /*
- * ip can be null when xfs_iget_core calls xfs_idestroy if we
- * find an inode with di_mode == 0 but without IGET_CREATE set.
- */
- if (ip) {
- xfs_itrace_entry(ip);
- XFS_STATS_INC(vn_rele);
- XFS_STATS_INC(vn_remove);
- XFS_STATS_INC(vn_reclaim);
- XFS_STATS_DEC(vn_active);
-
- xfs_inactive(ip);
- xfs_iflags_clear(ip, XFS_IMODIFIED);
- if (xfs_reclaim(ip))
- panic("%s: cannot reclaim 0x%p\n", __func__, inode);
- }
+ xfs_itrace_entry(ip);
+ XFS_STATS_INC(vn_rele);
+ XFS_STATS_INC(vn_remove);
+ XFS_STATS_DEC(vn_active);
- ASSERT(XFS_I(inode) == NULL);
-}
-
-/*
- * Enqueue a work item to be picked up by the vfs xfssyncd thread.
- * Doing this has two advantages:
- * - It saves on stack space, which is tight in certain situations
- * - It can be used (with care) as a mechanism to avoid deadlocks.
- * Flushing while allocating in a full filesystem requires both.
- */
-STATIC void
-xfs_syncd_queue_work(
- struct xfs_mount *mp,
- void *data,
- void (*syncer)(struct xfs_mount *, void *))
-{
- struct bhv_vfs_sync_work *work;
-
- work = kmem_alloc(sizeof(struct bhv_vfs_sync_work), KM_SLEEP);
- INIT_LIST_HEAD(&work->w_list);
- work->w_syncer = syncer;
- work->w_data = data;
- work->w_mount = mp;
- spin_lock(&mp->m_sync_lock);
- list_add_tail(&work->w_list, &mp->m_sync_list);
- spin_unlock(&mp->m_sync_lock);
- wake_up_process(mp->m_sync_task);
-}
-
-/*
- * Flush delayed allocate data, attempting to free up reserved space
- * from existing allocations. At this point a new allocation attempt
- * has failed with ENOSPC and we are in the process of scratching our
- * heads, looking about for more room...
- */
-STATIC void
-xfs_flush_inode_work(
- struct xfs_mount *mp,
- void *arg)
-{
- struct inode *inode = arg;
- filemap_flush(inode->i_mapping);
- iput(inode);
-}
-
-void
-xfs_flush_inode(
- xfs_inode_t *ip)
-{
- struct inode *inode = VFS_I(ip);
-
- igrab(inode);
- xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work);
- delay(msecs_to_jiffies(500));
-}
-
-/*
- * This is the "bigger hammer" version of xfs_flush_inode_work...
- * (IOW, "If at first you don't succeed, use a Bigger Hammer").
- */
-STATIC void
-xfs_flush_device_work(
- struct xfs_mount *mp,
- void *arg)
-{
- struct inode *inode = arg;
- sync_blockdev(mp->m_super->s_bdev);
- iput(inode);
-}
-
-void
-xfs_flush_device(
- xfs_inode_t *ip)
-{
- struct inode *inode = VFS_I(ip);
-
- igrab(inode);
- xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work);
- delay(msecs_to_jiffies(500));
- xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
-}
-
-STATIC void
-xfs_sync_worker(
- struct xfs_mount *mp,
- void *unused)
-{
- int error;
-
- if (!(mp->m_flags & XFS_MOUNT_RDONLY))
- error = xfs_sync(mp, SYNC_FSDATA | SYNC_BDFLUSH | SYNC_ATTR);
- mp->m_sync_seq++;
- wake_up(&mp->m_wait_single_sync_task);
-}
-
-STATIC int
-xfssyncd(
- void *arg)
-{
- struct xfs_mount *mp = arg;
- long timeleft;
- bhv_vfs_sync_work_t *work, *n;
- LIST_HEAD (tmp);
-
- set_freezable();
- timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
- for (;;) {
- timeleft = schedule_timeout_interruptible(timeleft);
- /* swsusp */
- try_to_freeze();
- if (kthread_should_stop() && list_empty(&mp->m_sync_list))
- break;
-
- spin_lock(&mp->m_sync_lock);
- /*
- * We can get woken by laptop mode, to do a sync -
- * that's the (only!) case where the list would be
- * empty with time remaining.
- */
- if (!timeleft || list_empty(&mp->m_sync_list)) {
- if (!timeleft)
- timeleft = xfs_syncd_centisecs *
- msecs_to_jiffies(10);
- INIT_LIST_HEAD(&mp->m_sync_work.w_list);
- list_add_tail(&mp->m_sync_work.w_list,
- &mp->m_sync_list);
- }
- list_for_each_entry_safe(work, n, &mp->m_sync_list, w_list)
- list_move(&work->w_list, &tmp);
- spin_unlock(&mp->m_sync_lock);
-
- list_for_each_entry_safe(work, n, &tmp, w_list) {
- (*work->w_syncer)(mp, work->w_data);
- list_del(&work->w_list);
- if (work == &mp->m_sync_work)
- continue;
- kmem_free(work);
- }
- }
-
- return 0;
+ xfs_inactive(ip);
+ xfs_iflags_clear(ip, XFS_IMODIFIED);
}
STATIC void
@@ -1101,9 +1042,8 @@ xfs_fs_put_super(
int unmount_event_flags = 0;
int error;
- kthread_stop(mp->m_sync_task);
-
- xfs_sync(mp, SYNC_ATTR | SYNC_DELWRI);
+ xfs_syncd_stop(mp);
+ xfs_sync_inodes(mp, SYNC_ATTR|SYNC_DELWRI);
#ifdef HAVE_DMAPI
if (mp->m_flags & XFS_MOUNT_DMAPI) {
@@ -1131,16 +1071,6 @@ xfs_fs_put_super(
error = xfs_unmount_flush(mp, 0);
WARN_ON(error);
- /*
- * If we're forcing a shutdown, typically because of a media error,
- * we want to make sure we invalidate dirty pages that belong to
- * referenced vnodes as well.
- */
- if (XFS_FORCED_SHUTDOWN(mp)) {
- error = xfs_sync(mp, SYNC_WAIT | SYNC_CLOSE);
- ASSERT(error != EFSCORRUPTED);
- }
-
if (mp->m_flags & XFS_MOUNT_DMAPI) {
XFS_SEND_UNMOUNT(mp, rip, DM_RIGHT_NULL, 0, 0,
unmount_event_flags);
@@ -1161,7 +1091,7 @@ xfs_fs_write_super(
struct super_block *sb)
{
if (!(sb->s_flags & MS_RDONLY))
- xfs_sync(XFS_M(sb), SYNC_FSDATA);
+ xfs_sync_fsdata(XFS_M(sb), 0);
sb->s_dirt = 0;
}
@@ -1172,7 +1102,6 @@ xfs_fs_sync_super(
{
struct xfs_mount *mp = XFS_M(sb);
int error;
- int flags;
/*
* Treat a sync operation like a freeze. This is to work
@@ -1186,20 +1115,10 @@ xfs_fs_sync_super(
* dirty the Linux inode until after the transaction I/O
* completes.
*/
- if (wait || unlikely(sb->s_frozen == SB_FREEZE_WRITE)) {
- /*
- * First stage of freeze - no more writers will make progress
- * now we are here, so we flush delwri and delalloc buffers
- * here, then wait for all I/O to complete. Data is frozen at
- * that point. Metadata is not frozen, transactions can still
- * occur here so don't bother flushing the buftarg (i.e
- * SYNC_QUIESCE) because it'll just get dirty again.
- */
- flags = SYNC_DATA_QUIESCE;
- } else
- flags = SYNC_FSDATA;
-
- error = xfs_sync(mp, flags);
+ if (wait || unlikely(sb->s_frozen == SB_FREEZE_WRITE))
+ error = xfs_quiesce_data(mp);
+ else
+ error = xfs_sync_fsdata(mp, 0);
sb->s_dirt = 0;
if (unlikely(laptop_mode)) {
@@ -1337,9 +1256,8 @@ xfs_fs_remount(
/* rw -> ro */
if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
- xfs_filestream_flush(mp);
- xfs_sync(mp, SYNC_DATA_QUIESCE);
- xfs_attr_quiesce(mp);
+ xfs_quiesce_data(mp);
+ xfs_quiesce_attr(mp);
mp->m_flags |= XFS_MOUNT_RDONLY;
}
@@ -1348,7 +1266,7 @@ xfs_fs_remount(
/*
* Second stage of a freeze. The data is already frozen so we only
- * need to take care of themetadata. Once that's done write a dummy
+ * need to take care of the metadata. Once that's done write a dummy
* record to dirty the log in case of a crash while frozen.
*/
STATIC void
@@ -1357,7 +1275,7 @@ xfs_fs_lockfs(
{
struct xfs_mount *mp = XFS_M(sb);
- xfs_attr_quiesce(mp);
+ xfs_quiesce_attr(mp);
xfs_fs_log_dummy(mp);
}
@@ -1422,175 +1340,28 @@ xfs_fs_setxquota(
/*
* This function fills in xfs_mount_t fields based on mount args.
- * Note: the superblock has _not_ yet been read in.
- */
-STATIC int
-xfs_start_flags(
- struct xfs_mount_args *ap,
- struct xfs_mount *mp)
-{
- int error;
-
- /* Values are in BBs */
- if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
- /*
- * At this point the superblock has not been read
- * in, therefore we do not know the block size.
- * Before the mount call ends we will convert
- * these to FSBs.
- */
- mp->m_dalign = ap->sunit;
- mp->m_swidth = ap->swidth;
- }
-
- if (ap->logbufs != -1 &&
- ap->logbufs != 0 &&
- (ap->logbufs < XLOG_MIN_ICLOGS ||
- ap->logbufs > XLOG_MAX_ICLOGS)) {
- cmn_err(CE_WARN,
- "XFS: invalid logbufs value: %d [not %d-%d]",
- ap->logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
- return XFS_ERROR(EINVAL);
- }
- mp->m_logbufs = ap->logbufs;
- if (ap->logbufsize != -1 &&
- ap->logbufsize != 0 &&
- (ap->logbufsize < XLOG_MIN_RECORD_BSIZE ||
- ap->logbufsize > XLOG_MAX_RECORD_BSIZE ||
- !is_power_of_2(ap->logbufsize))) {
- cmn_err(CE_WARN,
- "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
- ap->logbufsize);
- return XFS_ERROR(EINVAL);
- }
-
- error = ENOMEM;
-
- mp->m_logbsize = ap->logbufsize;
- mp->m_fsname_len = strlen(ap->fsname) + 1;
-
- mp->m_fsname = kstrdup(ap->fsname, GFP_KERNEL);
- if (!mp->m_fsname)
- goto out;
-
- if (ap->rtname[0]) {
- mp->m_rtname = kstrdup(ap->rtname, GFP_KERNEL);
- if (!mp->m_rtname)
- goto out_free_fsname;
-
- }
-
- if (ap->logname[0]) {
- mp->m_logname = kstrdup(ap->logname, GFP_KERNEL);
- if (!mp->m_logname)
- goto out_free_rtname;
- }
-
- if (ap->flags & XFSMNT_WSYNC)
- mp->m_flags |= XFS_MOUNT_WSYNC;
-#if XFS_BIG_INUMS
- if (ap->flags & XFSMNT_INO64) {
- mp->m_flags |= XFS_MOUNT_INO64;
- mp->m_inoadd = XFS_INO64_OFFSET;
- }
-#endif
- if (ap->flags & XFSMNT_RETERR)
- mp->m_flags |= XFS_MOUNT_RETERR;
- if (ap->flags & XFSMNT_NOALIGN)
- mp->m_flags |= XFS_MOUNT_NOALIGN;
- if (ap->flags & XFSMNT_SWALLOC)
- mp->m_flags |= XFS_MOUNT_SWALLOC;
- if (ap->flags & XFSMNT_OSYNCISOSYNC)
- mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
- if (ap->flags & XFSMNT_32BITINODES)
- mp->m_flags |= XFS_MOUNT_32BITINODES;
-
- if (ap->flags & XFSMNT_IOSIZE) {
- if (ap->iosizelog > XFS_MAX_IO_LOG ||
- ap->iosizelog < XFS_MIN_IO_LOG) {
- cmn_err(CE_WARN,
- "XFS: invalid log iosize: %d [not %d-%d]",
- ap->iosizelog, XFS_MIN_IO_LOG,
- XFS_MAX_IO_LOG);
- return XFS_ERROR(EINVAL);
- }
-
- mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
- mp->m_readio_log = mp->m_writeio_log = ap->iosizelog;
- }
-
- if (ap->flags & XFSMNT_IKEEP)
- mp->m_flags |= XFS_MOUNT_IKEEP;
- if (ap->flags & XFSMNT_DIRSYNC)
- mp->m_flags |= XFS_MOUNT_DIRSYNC;
- if (ap->flags & XFSMNT_ATTR2)
- mp->m_flags |= XFS_MOUNT_ATTR2;
- if (ap->flags & XFSMNT_NOATTR2)
- mp->m_flags |= XFS_MOUNT_NOATTR2;
-
- if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE)
- mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
-
- /*
- * no recovery flag requires a read-only mount
- */
- if (ap->flags & XFSMNT_NORECOVERY) {
- if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
- cmn_err(CE_WARN,
- "XFS: tried to mount a FS read-write without recovery!");
- return XFS_ERROR(EINVAL);
- }
- mp->m_flags |= XFS_MOUNT_NORECOVERY;
- }
-
- if (ap->flags & XFSMNT_NOUUID)
- mp->m_flags |= XFS_MOUNT_NOUUID;
- if (ap->flags & XFSMNT_BARRIER)
- mp->m_flags |= XFS_MOUNT_BARRIER;
- else
- mp->m_flags &= ~XFS_MOUNT_BARRIER;
-
- if (ap->flags2 & XFSMNT2_FILESTREAMS)
- mp->m_flags |= XFS_MOUNT_FILESTREAMS;
-
- if (ap->flags & XFSMNT_DMAPI)
- mp->m_flags |= XFS_MOUNT_DMAPI;
- return 0;
-
-
- out_free_rtname:
- kfree(mp->m_rtname);
- out_free_fsname:
- kfree(mp->m_fsname);
- out:
- return error;
-}
-
-/*
- * This function fills in xfs_mount_t fields based on mount args.
* Note: the superblock _has_ now been read in.
*/
STATIC int
xfs_finish_flags(
- struct xfs_mount_args *ap,
struct xfs_mount *mp)
{
int ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
/* Fail a mount where the logbuf is smaller then the log stripe */
if (xfs_sb_version_haslogv2(&mp->m_sb)) {
- if ((ap->logbufsize <= 0) &&
- (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) {
+ if (mp->m_logbsize <= 0 &&
+ mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
mp->m_logbsize = mp->m_sb.sb_logsunit;
- } else if (ap->logbufsize > 0 &&
- ap->logbufsize < mp->m_sb.sb_logsunit) {
+ } else if (mp->m_logbsize > 0 &&
+ mp->m_logbsize < mp->m_sb.sb_logsunit) {
cmn_err(CE_WARN,
"XFS: logbuf size must be greater than or equal to log stripe size");
return XFS_ERROR(EINVAL);
}
} else {
/* Fail a mount if the logbuf is larger than 32K */
- if (ap->logbufsize > XLOG_BIG_RECORD_BSIZE) {
+ if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
cmn_err(CE_WARN,
"XFS: logbuf size for version 1 logs must be 16K or 32K");
return XFS_ERROR(EINVAL);
@@ -1602,7 +1373,7 @@ xfs_finish_flags(
* told by noattr2 to turn it off
*/
if (xfs_sb_version_hasattr2(&mp->m_sb) &&
- !(ap->flags & XFSMNT_NOATTR2))
+ !(mp->m_flags & XFS_MOUNT_NOATTR2))
mp->m_flags |= XFS_MOUNT_ATTR2;
/*
@@ -1614,6 +1385,7 @@ xfs_finish_flags(
return XFS_ERROR(EROFS);
}
+#if 0 /* shared mounts were never supported on Linux */
/*
* check for shared mount.
*/
@@ -1636,25 +1408,11 @@ xfs_finish_flags(
/*
* Shared XFS V0 can't deal with DMI. Return EINVAL.
*/
- if (mp->m_sb.sb_shared_vn == 0 && (ap->flags & XFSMNT_DMAPI))
+ if (mp->m_sb.sb_shared_vn == 0 &&
+ (mp->m_flags & XFS_MOUNT_DMAPI))
return XFS_ERROR(EINVAL);
}
-
- if (ap->flags & XFSMNT_UQUOTA) {
- mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
- if (ap->flags & XFSMNT_UQUOTAENF)
- mp->m_qflags |= XFS_UQUOTA_ENFD;
- }
-
- if (ap->flags & XFSMNT_GQUOTA) {
- mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
- if (ap->flags & XFSMNT_GQUOTAENF)
- mp->m_qflags |= XFS_OQUOTA_ENFD;
- } else if (ap->flags & XFSMNT_PQUOTA) {
- mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
- if (ap->flags & XFSMNT_PQUOTAENF)
- mp->m_qflags |= XFS_OQUOTA_ENFD;
- }
+#endif
return 0;
}
@@ -1667,19 +1425,14 @@ xfs_fs_fill_super(
{
struct inode *root;
struct xfs_mount *mp = NULL;
- struct xfs_mount_args *args;
int flags = 0, error = ENOMEM;
-
- args = xfs_args_allocate(sb, silent);
- if (!args)
- return -ENOMEM;
+ char *mtpt = NULL;
mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
if (!mp)
- goto out_free_args;
+ goto out;
spin_lock_init(&mp->m_sb_lock);
- mutex_init(&mp->m_ilock);
mutex_init(&mp->m_growlock);
atomic_set(&mp->m_active_trans, 0);
INIT_LIST_HEAD(&mp->m_sync_list);
@@ -1689,12 +1442,9 @@ xfs_fs_fill_super(
mp->m_super = sb;
sb->s_fs_info = mp;
- if (sb->s_flags & MS_RDONLY)
- mp->m_flags |= XFS_MOUNT_RDONLY;
-
- error = xfs_parseargs(mp, (char *)data, args, 0);
+ error = xfs_parseargs(mp, (char *)data, &mtpt);
if (error)
- goto out_free_mp;
+ goto out_free_fsname;
sb_min_blocksize(sb, BBSIZE);
sb->s_xattr = xfs_xattr_handlers;
@@ -1702,33 +1452,28 @@ xfs_fs_fill_super(
sb->s_qcop = &xfs_quotactl_operations;
sb->s_op = &xfs_super_operations;
- error = xfs_dmops_get(mp, args);
+ error = xfs_dmops_get(mp);
if (error)
- goto out_free_mp;
- error = xfs_qmops_get(mp, args);
+ goto out_free_fsname;
+ error = xfs_qmops_get(mp);
if (error)
goto out_put_dmops;
- if (args->flags & XFSMNT_QUIET)
+ if (silent)
flags |= XFS_MFSI_QUIET;
- error = xfs_open_devices(mp, args);
+ error = xfs_open_devices(mp);
if (error)
goto out_put_qmops;
if (xfs_icsb_init_counters(mp))
mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
- /*
- * Setup flags based on mount(2) options and then the superblock
- */
- error = xfs_start_flags(args, mp);
- if (error)
- goto out_free_fsname;
error = xfs_readsb(mp, flags);
if (error)
- goto out_free_fsname;
- error = xfs_finish_flags(args, mp);
+ goto out_destroy_counters;
+
+ error = xfs_finish_flags(mp);
if (error)
goto out_free_sb;
@@ -1747,7 +1492,7 @@ xfs_fs_fill_super(
if (error)
goto out_filestream_unmount;
- XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, args->mtpt, args->fsname);
+ XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, mtpt, mp->m_fsname);
sb->s_dirt = 1;
sb->s_magic = XFS_SB_MAGIC;
@@ -1772,35 +1517,31 @@ xfs_fs_fill_super(
goto fail_vnrele;
}
- mp->m_sync_work.w_syncer = xfs_sync_worker;
- mp->m_sync_work.w_mount = mp;
- mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd");
- if (IS_ERR(mp->m_sync_task)) {
- error = -PTR_ERR(mp->m_sync_task);
+ error = xfs_syncd_init(mp);
+ if (error)
goto fail_vnrele;
- }
- xfs_itrace_exit(XFS_I(sb->s_root->d_inode));
+ kfree(mtpt);
- kfree(args);
+ xfs_itrace_exit(XFS_I(sb->s_root->d_inode));
return 0;
out_filestream_unmount:
xfs_filestream_unmount(mp);
out_free_sb:
xfs_freesb(mp);
- out_free_fsname:
- xfs_free_fsname(mp);
+ out_destroy_counters:
xfs_icsb_destroy_counters(mp);
xfs_close_devices(mp);
out_put_qmops:
xfs_qmops_put(mp);
out_put_dmops:
xfs_dmops_put(mp);
- out_free_mp:
+ out_free_fsname:
+ xfs_free_fsname(mp);
+ kfree(mtpt);
kfree(mp);
- out_free_args:
- kfree(args);
+ out:
return -error;
fail_vnrele:
@@ -1882,10 +1623,19 @@ xfs_alloc_trace_bufs(void)
if (!xfs_bmap_trace_buf)
goto out_free_alloc_trace;
#endif
-#ifdef XFS_BMBT_TRACE
+#ifdef XFS_BTREE_TRACE
+ xfs_allocbt_trace_buf = ktrace_alloc(XFS_ALLOCBT_TRACE_SIZE,
+ KM_MAYFAIL);
+ if (!xfs_allocbt_trace_buf)
+ goto out_free_bmap_trace;
+
+ xfs_inobt_trace_buf = ktrace_alloc(XFS_INOBT_TRACE_SIZE, KM_MAYFAIL);
+ if (!xfs_inobt_trace_buf)
+ goto out_free_allocbt_trace;
+
xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_MAYFAIL);
if (!xfs_bmbt_trace_buf)
- goto out_free_bmap_trace;
+ goto out_free_inobt_trace;
#endif
#ifdef XFS_ATTR_TRACE
xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_MAYFAIL);
@@ -1907,8 +1657,12 @@ xfs_alloc_trace_bufs(void)
ktrace_free(xfs_attr_trace_buf);
out_free_bmbt_trace:
#endif
-#ifdef XFS_BMBT_TRACE
+#ifdef XFS_BTREE_TRACE
ktrace_free(xfs_bmbt_trace_buf);
+ out_free_inobt_trace:
+ ktrace_free(xfs_inobt_trace_buf);
+ out_free_allocbt_trace:
+ ktrace_free(xfs_allocbt_trace_buf);
out_free_bmap_trace:
#endif
#ifdef XFS_BMAP_TRACE
@@ -1931,8 +1685,10 @@ xfs_free_trace_bufs(void)
#ifdef XFS_ATTR_TRACE
ktrace_free(xfs_attr_trace_buf);
#endif
-#ifdef XFS_BMBT_TRACE
+#ifdef XFS_BTREE_TRACE
ktrace_free(xfs_bmbt_trace_buf);
+ ktrace_free(xfs_inobt_trace_buf);
+ ktrace_free(xfs_allocbt_trace_buf);
#endif
#ifdef XFS_BMAP_TRACE
ktrace_free(xfs_bmap_trace_buf);
@@ -1945,16 +1701,10 @@ xfs_free_trace_bufs(void)
STATIC int __init
xfs_init_zones(void)
{
- xfs_vnode_zone = kmem_zone_init_flags(sizeof(struct inode), "xfs_vnode",
- KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
- KM_ZONE_SPREAD,
- xfs_fs_inode_init_once);
- if (!xfs_vnode_zone)
- goto out;
xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
if (!xfs_ioend_zone)
- goto out_destroy_vnode_zone;
+ goto out;
xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
xfs_ioend_zone);
@@ -1970,6 +1720,7 @@ xfs_init_zones(void)
"xfs_bmap_free_item");
if (!xfs_bmap_free_item_zone)
goto out_destroy_log_ticket_zone;
+
xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
"xfs_btree_cur");
if (!xfs_btree_cur_zone)
@@ -2017,8 +1768,8 @@ xfs_init_zones(void)
xfs_inode_zone =
kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
- KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
- KM_ZONE_SPREAD, NULL);
+ KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD,
+ xfs_fs_inode_init_once);
if (!xfs_inode_zone)
goto out_destroy_efi_zone;
@@ -2066,8 +1817,6 @@ xfs_init_zones(void)
mempool_destroy(xfs_ioend_pool);
out_destroy_ioend_zone:
kmem_zone_destroy(xfs_ioend_zone);
- out_destroy_vnode_zone:
- kmem_zone_destroy(xfs_vnode_zone);
out:
return -ENOMEM;
}
@@ -2092,7 +1841,6 @@ xfs_destroy_zones(void)
kmem_zone_destroy(xfs_log_ticket_zone);
mempool_destroy(xfs_ioend_pool);
kmem_zone_destroy(xfs_ioend_zone);
- kmem_zone_destroy(xfs_vnode_zone);
}
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index fe2ef4e6a0f9..56dc48a76fab 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -101,9 +101,6 @@ struct block_device;
extern __uint64_t xfs_max_file_offset(unsigned int);
-extern void xfs_flush_inode(struct xfs_inode *);
-extern void xfs_flush_device(struct xfs_inode *);
-
extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
extern const struct export_operations xfs_export_operations;
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
new file mode 100644
index 000000000000..d12d31b86fa0
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -0,0 +1,764 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_types.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_dir2.h"
+#include "xfs_dmapi.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
+#include "xfs_inode.h"
+#include "xfs_dinode.h"
+#include "xfs_error.h"
+#include "xfs_mru_cache.h"
+#include "xfs_filestream.h"
+#include "xfs_vnodeops.h"
+#include "xfs_utils.h"
+#include "xfs_buf_item.h"
+#include "xfs_inode_item.h"
+#include "xfs_rw.h"
+
+#include <linux/kthread.h>
+#include <linux/freezer.h>
+
+/*
+ * Sync all the inodes in the given AG according to the
+ * direction given by the flags.
+ */
+STATIC int
+xfs_sync_inodes_ag(
+ xfs_mount_t *mp,
+ int ag,
+ int flags)
+{
+ xfs_perag_t *pag = &mp->m_perag[ag];
+ int nr_found;
+ uint32_t first_index = 0;
+ int error = 0;
+ int last_error = 0;
+ int fflag = XFS_B_ASYNC;
+
+ if (flags & SYNC_DELWRI)
+ fflag = XFS_B_DELWRI;
+ if (flags & SYNC_WAIT)
+ fflag = 0; /* synchronous overrides all */
+
+ do {
+ struct inode *inode;
+ xfs_inode_t *ip = NULL;
+ int lock_flags = XFS_ILOCK_SHARED;
+
+ /*
+ * use a gang lookup to find the next inode in the tree
+ * as the tree is sparse and a gang lookup walks to find
+ * the number of objects requested.
+ */
+ read_lock(&pag->pag_ici_lock);
+ nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
+ (void**)&ip, first_index, 1);
+
+ if (!nr_found) {
+ read_unlock(&pag->pag_ici_lock);
+ break;
+ }
+
+ /*
+ * Update the index for the next lookup. Catch overflows
+ * into the next AG range which can occur if we have inodes
+ * in the last block of the AG and we are currently
+ * pointing to the last inode.
+ */
+ first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+ if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
+ read_unlock(&pag->pag_ici_lock);
+ break;
+ }
+
+ /* nothing to sync during shutdown */
+ if (XFS_FORCED_SHUTDOWN(mp)) {
+ read_unlock(&pag->pag_ici_lock);
+ return 0;
+ }
+
+ /*
+ * If we can't get a reference on the inode, it must be
+ * in reclaim. Leave it for the reclaim code to flush.
+ */
+ inode = VFS_I(ip);
+ if (!igrab(inode)) {
+ read_unlock(&pag->pag_ici_lock);
+ continue;
+ }
+ read_unlock(&pag->pag_ici_lock);
+
+ /* avoid new or bad inodes */
+ if (is_bad_inode(inode) ||
+ xfs_iflags_test(ip, XFS_INEW)) {
+ IRELE(ip);
+ continue;
+ }
+
+ /*
+ * If we have to flush data or wait for I/O completion
+ * we need to hold the iolock.
+ */
+ if ((flags & SYNC_DELWRI) && VN_DIRTY(inode)) {
+ xfs_ilock(ip, XFS_IOLOCK_SHARED);
+ lock_flags |= XFS_IOLOCK_SHARED;
+ error = xfs_flush_pages(ip, 0, -1, fflag, FI_NONE);
+ if (flags & SYNC_IOWAIT)
+ vn_iowait(ip);
+ }
+ xfs_ilock(ip, XFS_ILOCK_SHARED);
+
+ if ((flags & SYNC_ATTR) && !xfs_inode_clean(ip)) {
+ if (flags & SYNC_WAIT) {
+ xfs_iflock(ip);
+ if (!xfs_inode_clean(ip))
+ error = xfs_iflush(ip, XFS_IFLUSH_SYNC);
+ else
+ xfs_ifunlock(ip);
+ } else if (xfs_iflock_nowait(ip)) {
+ if (!xfs_inode_clean(ip))
+ error = xfs_iflush(ip, XFS_IFLUSH_DELWRI);
+ else
+ xfs_ifunlock(ip);
+ }
+ }
+ xfs_iput(ip, lock_flags);
+
+ if (error)
+ last_error = error;
+ /*
+ * bail out if the filesystem is corrupted.
+ */
+ if (error == EFSCORRUPTED)
+ return XFS_ERROR(error);
+
+ } while (nr_found);
+
+ return last_error;
+}
+
+int
+xfs_sync_inodes(
+ xfs_mount_t *mp,
+ int flags)
+{
+ int error;
+ int last_error;
+ int i;
+ int lflags = XFS_LOG_FORCE;
+
+ if (mp->m_flags & XFS_MOUNT_RDONLY)
+ return 0;
+ error = 0;
+ last_error = 0;
+
+ if (flags & SYNC_WAIT)
+ lflags |= XFS_LOG_SYNC;
+
+ for (i = 0; i < mp->m_sb.sb_agcount; i++) {
+ if (!mp->m_perag[i].pag_ici_init)
+ continue;
+ error = xfs_sync_inodes_ag(mp, i, flags);
+ if (error)
+ last_error = error;
+ if (error == EFSCORRUPTED)
+ break;
+ }
+ if (flags & SYNC_DELWRI)
+ xfs_log_force(mp, 0, lflags);
+
+ return XFS_ERROR(last_error);
+}
+
+STATIC int
+xfs_commit_dummy_trans(
+ struct xfs_mount *mp,
+ uint log_flags)
+{
+ struct xfs_inode *ip = mp->m_rootip;
+ struct xfs_trans *tp;
+ int error;
+
+ /*
+ * Put a dummy transaction in the log to tell recovery
+ * that all others are OK.
+ */
+ tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1);
+ error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
+ if (error) {
+ xfs_trans_cancel(tp, 0);
+ return error;
+ }
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+ xfs_trans_ihold(tp, ip);
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ /* XXX(hch): ignoring the error here.. */
+ error = xfs_trans_commit(tp, 0);
+
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+ xfs_log_force(mp, 0, log_flags);
+ return 0;
+}
+
+int
+xfs_sync_fsdata(
+ struct xfs_mount *mp,
+ int flags)
+{
+ struct xfs_buf *bp;
+ struct xfs_buf_log_item *bip;
+ int error = 0;
+
+ /*
+ * If this is xfssyncd() then only sync the superblock if we can
+ * lock it without sleeping and it is not pinned.
+ */
+ if (flags & SYNC_BDFLUSH) {
+ ASSERT(!(flags & SYNC_WAIT));
+
+ bp = xfs_getsb(mp, XFS_BUF_TRYLOCK);
+ if (!bp)
+ goto out;
+
+ bip = XFS_BUF_FSPRIVATE(bp, struct xfs_buf_log_item *);
+ if (!bip || !xfs_buf_item_dirty(bip) || XFS_BUF_ISPINNED(bp))
+ goto out_brelse;
+ } else {
+ bp = xfs_getsb(mp, 0);
+
+ /*
+ * If the buffer is pinned then push on the log so we won't
+ * get stuck waiting in the write for someone, maybe
+ * ourselves, to flush the log.
+ *
+ * Even though we just pushed the log above, we did not have
+ * the superblock buffer locked at that point so it can
+ * become pinned in between there and here.
+ */
+ if (XFS_BUF_ISPINNED(bp))
+ xfs_log_force(mp, 0, XFS_LOG_FORCE);
+ }
+
+
+ if (flags & SYNC_WAIT)
+ XFS_BUF_UNASYNC(bp);
+ else
+ XFS_BUF_ASYNC(bp);
+
+ return xfs_bwrite(mp, bp);
+
+ out_brelse:
+ xfs_buf_relse(bp);
+ out:
+ return error;
+}
+
+/*
+ * When remounting a filesystem read-only or freezing the filesystem, we have
+ * two phases to execute. This first phase is syncing the data before we
+ * quiesce the filesystem, and the second is flushing all the inodes out after
+ * we've waited for all the transactions created by the first phase to
+ * complete. The second phase ensures that the inodes are written to their
+ * location on disk rather than just existing in transactions in the log. This
+ * means after a quiesce there is no log replay required to write the inodes to
+ * disk (this is the main difference between a sync and a quiesce).
+ */
+/*
+ * First stage of freeze - no writers will make progress now we are here,
+ * so we flush delwri and delalloc buffers here, then wait for all I/O to
+ * complete. Data is frozen at that point. Metadata is not frozen,
+ * transactions can still occur here so don't bother flushing the buftarg
+ * because it'll just get dirty again.
+ */
+int
+xfs_quiesce_data(
+ struct xfs_mount *mp)
+{
+ int error;
+
+ /* push non-blocking */
+ xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_BDFLUSH);
+ XFS_QM_DQSYNC(mp, SYNC_BDFLUSH);
+ xfs_filestream_flush(mp);
+
+ /* push and block */
+ xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_WAIT|SYNC_IOWAIT);
+ XFS_QM_DQSYNC(mp, SYNC_WAIT);
+
+ /* write superblock and hoover up shutdown errors */
+ error = xfs_sync_fsdata(mp, 0);
+
+ /* flush data-only devices */
+ if (mp->m_rtdev_targp)
+ XFS_bflush(mp->m_rtdev_targp);
+
+ return error;
+}
+
+STATIC void
+xfs_quiesce_fs(
+ struct xfs_mount *mp)
+{
+ int count = 0, pincount;
+
+ xfs_flush_buftarg(mp->m_ddev_targp, 0);
+ xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
+
+ /*
+ * This loop must run at least twice. The first instance of the loop
+ * will flush most meta data but that will generate more meta data
+ * (typically directory updates). Which then must be flushed and
+ * logged before we can write the unmount record.
+ */
+ do {
+ xfs_sync_inodes(mp, SYNC_ATTR|SYNC_WAIT);
+ pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
+ if (!pincount) {
+ delay(50);
+ count++;
+ }
+ } while (count < 2);
+}
+
+/*
+ * Second stage of a quiesce. The data is already synced, now we have to take
+ * care of the metadata. New transactions are already blocked, so we need to
+ * wait for any remaining transactions to drain out before proceding.
+ */
+void
+xfs_quiesce_attr(
+ struct xfs_mount *mp)
+{
+ int error = 0;
+
+ /* wait for all modifications to complete */
+ while (atomic_read(&mp->m_active_trans) > 0)
+ delay(100);
+
+ /* flush inodes and push all remaining buffers out to disk */
+ xfs_quiesce_fs(mp);
+
+ ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0);
+
+ /* Push the superblock and write an unmount record */
+ error = xfs_log_sbcount(mp, 1);
+ if (error)
+ xfs_fs_cmn_err(CE_WARN, mp,
+ "xfs_attr_quiesce: failed to log sb changes. "
+ "Frozen image may not be consistent.");
+ xfs_log_unmount_write(mp);
+ xfs_unmountfs_writesb(mp);
+}
+
+/*
+ * Enqueue a work item to be picked up by the vfs xfssyncd thread.
+ * Doing this has two advantages:
+ * - It saves on stack space, which is tight in certain situations
+ * - It can be used (with care) as a mechanism to avoid deadlocks.
+ * Flushing while allocating in a full filesystem requires both.
+ */
+STATIC void
+xfs_syncd_queue_work(
+ struct xfs_mount *mp,
+ void *data,
+ void (*syncer)(struct xfs_mount *, void *))
+{
+ struct bhv_vfs_sync_work *work;
+
+ work = kmem_alloc(sizeof(struct bhv_vfs_sync_work), KM_SLEEP);
+ INIT_LIST_HEAD(&work->w_list);
+ work->w_syncer = syncer;
+ work->w_data = data;
+ work->w_mount = mp;
+ spin_lock(&mp->m_sync_lock);
+ list_add_tail(&work->w_list, &mp->m_sync_list);
+ spin_unlock(&mp->m_sync_lock);
+ wake_up_process(mp->m_sync_task);
+}
+
+/*
+ * Flush delayed allocate data, attempting to free up reserved space
+ * from existing allocations. At this point a new allocation attempt
+ * has failed with ENOSPC and we are in the process of scratching our
+ * heads, looking about for more room...
+ */
+STATIC void
+xfs_flush_inode_work(
+ struct xfs_mount *mp,
+ void *arg)
+{
+ struct inode *inode = arg;
+ filemap_flush(inode->i_mapping);
+ iput(inode);
+}
+
+void
+xfs_flush_inode(
+ xfs_inode_t *ip)
+{
+ struct inode *inode = VFS_I(ip);
+
+ igrab(inode);
+ xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work);
+ delay(msecs_to_jiffies(500));
+}
+
+/*
+ * This is the "bigger hammer" version of xfs_flush_inode_work...
+ * (IOW, "If at first you don't succeed, use a Bigger Hammer").
+ */
+STATIC void
+xfs_flush_device_work(
+ struct xfs_mount *mp,
+ void *arg)
+{
+ struct inode *inode = arg;
+ sync_blockdev(mp->m_super->s_bdev);
+ iput(inode);
+}
+
+void
+xfs_flush_device(
+ xfs_inode_t *ip)
+{
+ struct inode *inode = VFS_I(ip);
+
+ igrab(inode);
+ xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work);
+ delay(msecs_to_jiffies(500));
+ xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
+}
+
+/*
+ * Every sync period we need to unpin all items, reclaim inodes, sync
+ * quota and write out the superblock. We might need to cover the log
+ * to indicate it is idle.
+ */
+STATIC void
+xfs_sync_worker(
+ struct xfs_mount *mp,
+ void *unused)
+{
+ int error;
+
+ if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
+ xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
+ xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
+ /* dgc: errors ignored here */
+ error = XFS_QM_DQSYNC(mp, SYNC_BDFLUSH);
+ error = xfs_sync_fsdata(mp, SYNC_BDFLUSH);
+ if (xfs_log_need_covered(mp))
+ error = xfs_commit_dummy_trans(mp, XFS_LOG_FORCE);
+ }
+ mp->m_sync_seq++;
+ wake_up(&mp->m_wait_single_sync_task);
+}
+
+STATIC int
+xfssyncd(
+ void *arg)
+{
+ struct xfs_mount *mp = arg;
+ long timeleft;
+ bhv_vfs_sync_work_t *work, *n;
+ LIST_HEAD (tmp);
+
+ set_freezable();
+ timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
+ for (;;) {
+ timeleft = schedule_timeout_interruptible(timeleft);
+ /* swsusp */
+ try_to_freeze();
+ if (kthread_should_stop() && list_empty(&mp->m_sync_list))
+ break;
+
+ spin_lock(&mp->m_sync_lock);
+ /*
+ * We can get woken by laptop mode, to do a sync -
+ * that's the (only!) case where the list would be
+ * empty with time remaining.
+ */
+ if (!timeleft || list_empty(&mp->m_sync_list)) {
+ if (!timeleft)
+ timeleft = xfs_syncd_centisecs *
+ msecs_to_jiffies(10);
+ INIT_LIST_HEAD(&mp->m_sync_work.w_list);
+ list_add_tail(&mp->m_sync_work.w_list,
+ &mp->m_sync_list);
+ }
+ list_for_each_entry_safe(work, n, &mp->m_sync_list, w_list)
+ list_move(&work->w_list, &tmp);
+ spin_unlock(&mp->m_sync_lock);
+
+ list_for_each_entry_safe(work, n, &tmp, w_list) {
+ (*work->w_syncer)(mp, work->w_data);
+ list_del(&work->w_list);
+ if (work == &mp->m_sync_work)
+ continue;
+ kmem_free(work);
+ }
+ }
+
+ return 0;
+}
+
+int
+xfs_syncd_init(
+ struct xfs_mount *mp)
+{
+ mp->m_sync_work.w_syncer = xfs_sync_worker;
+ mp->m_sync_work.w_mount = mp;
+ mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd");
+ if (IS_ERR(mp->m_sync_task))
+ return -PTR_ERR(mp->m_sync_task);
+ return 0;
+}
+
+void
+xfs_syncd_stop(
+ struct xfs_mount *mp)
+{
+ kthread_stop(mp->m_sync_task);
+}
+
+int
+xfs_reclaim_inode(
+ xfs_inode_t *ip,
+ int locked,
+ int sync_mode)
+{
+ xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
+
+ /* The hash lock here protects a thread in xfs_iget_core from
+ * racing with us on linking the inode back with a vnode.
+ * Once we have the XFS_IRECLAIM flag set it will not touch
+ * us.
+ */
+ write_lock(&pag->pag_ici_lock);
+ spin_lock(&ip->i_flags_lock);
+ if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
+ !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
+ spin_unlock(&ip->i_flags_lock);
+ write_unlock(&pag->pag_ici_lock);
+ if (locked) {
+ xfs_ifunlock(ip);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ }
+ return 1;
+ }
+ __xfs_iflags_set(ip, XFS_IRECLAIM);
+ spin_unlock(&ip->i_flags_lock);
+ write_unlock(&pag->pag_ici_lock);
+ xfs_put_perag(ip->i_mount, pag);
+
+ /*
+ * If the inode is still dirty, then flush it out. If the inode
+ * is not in the AIL, then it will be OK to flush it delwri as
+ * long as xfs_iflush() does not keep any references to the inode.
+ * We leave that decision up to xfs_iflush() since it has the
+ * knowledge of whether it's OK to simply do a delwri flush of
+ * the inode or whether we need to wait until the inode is
+ * pulled from the AIL.
+ * We get the flush lock regardless, though, just to make sure
+ * we don't free it while it is being flushed.
+ */
+ if (!locked) {
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ xfs_iflock(ip);
+ }
+
+ /*
+ * In the case of a forced shutdown we rely on xfs_iflush() to
+ * wait for the inode to be unpinned before returning an error.
+ */
+ if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) {
+ /* synchronize with xfs_iflush_done */
+ xfs_iflock(ip);
+ xfs_ifunlock(ip);
+ }
+
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ xfs_ireclaim(ip);
+ return 0;
+}
+
+/*
+ * We set the inode flag atomically with the radix tree tag.
+ * Once we get tag lookups on the radix tree, this inode flag
+ * can go away.
+ */
+void
+xfs_inode_set_reclaim_tag(
+ xfs_inode_t *ip)
+{
+ xfs_mount_t *mp = ip->i_mount;
+ xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);
+
+ read_lock(&pag->pag_ici_lock);
+ spin_lock(&ip->i_flags_lock);
+ radix_tree_tag_set(&pag->pag_ici_root,
+ XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
+ __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
+ spin_unlock(&ip->i_flags_lock);
+ read_unlock(&pag->pag_ici_lock);
+ xfs_put_perag(mp, pag);
+}
+
+void
+__xfs_inode_clear_reclaim_tag(
+ xfs_mount_t *mp,
+ xfs_perag_t *pag,
+ xfs_inode_t *ip)
+{
+ radix_tree_tag_clear(&pag->pag_ici_root,
+ XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
+}
+
+void
+xfs_inode_clear_reclaim_tag(
+ xfs_inode_t *ip)
+{
+ xfs_mount_t *mp = ip->i_mount;
+ xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);
+
+ read_lock(&pag->pag_ici_lock);
+ spin_lock(&ip->i_flags_lock);
+ __xfs_inode_clear_reclaim_tag(mp, pag, ip);
+ spin_unlock(&ip->i_flags_lock);
+ read_unlock(&pag->pag_ici_lock);
+ xfs_put_perag(mp, pag);
+}
+
+
+STATIC void
+xfs_reclaim_inodes_ag(
+ xfs_mount_t *mp,
+ int ag,
+ int noblock,
+ int mode)
+{
+ xfs_inode_t *ip = NULL;
+ xfs_perag_t *pag = &mp->m_perag[ag];
+ int nr_found;
+ uint32_t first_index;
+ int skipped;
+
+restart:
+ first_index = 0;
+ skipped = 0;
+ do {
+ /*
+ * use a gang lookup to find the next inode in the tree
+ * as the tree is sparse and a gang lookup walks to find
+ * the number of objects requested.
+ */
+ read_lock(&pag->pag_ici_lock);
+ nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
+ (void**)&ip, first_index, 1,
+ XFS_ICI_RECLAIM_TAG);
+
+ if (!nr_found) {
+ read_unlock(&pag->pag_ici_lock);
+ break;
+ }
+
+ /*
+ * Update the index for the next lookup. Catch overflows
+ * into the next AG range which can occur if we have inodes
+ * in the last block of the AG and we are currently
+ * pointing to the last inode.
+ */
+ first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+ if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
+ read_unlock(&pag->pag_ici_lock);
+ break;
+ }
+
+ ASSERT(xfs_iflags_test(ip, (XFS_IRECLAIMABLE|XFS_IRECLAIM)));
+
+ /* ignore if already under reclaim */
+ if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
+ read_unlock(&pag->pag_ici_lock);
+ continue;
+ }
+
+ if (noblock) {
+ if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
+ read_unlock(&pag->pag_ici_lock);
+ continue;
+ }
+ if (xfs_ipincount(ip) ||
+ !xfs_iflock_nowait(ip)) {
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ read_unlock(&pag->pag_ici_lock);
+ continue;
+ }
+ }
+ read_unlock(&pag->pag_ici_lock);
+
+ /*
+ * hmmm - this is an inode already in reclaim. Do
+ * we even bother catching it here?
+ */
+ if (xfs_reclaim_inode(ip, noblock, mode))
+ skipped++;
+ } while (nr_found);
+
+ if (skipped) {
+ delay(1);
+ goto restart;
+ }
+ return;
+
+}
+
+int
+xfs_reclaim_inodes(
+ xfs_mount_t *mp,
+ int noblock,
+ int mode)
+{
+ int i;
+
+ for (i = 0; i < mp->m_sb.sb_agcount; i++) {
+ if (!mp->m_perag[i].pag_ici_init)
+ continue;
+ xfs_reclaim_inodes_ag(mp, i, noblock, mode);
+ }
+ return 0;
+}
+
+
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
new file mode 100644
index 000000000000..5f6de1efe1f6
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef XFS_SYNC_H
+#define XFS_SYNC_H 1
+
+struct xfs_mount;
+
+typedef struct bhv_vfs_sync_work {
+ struct list_head w_list;
+ struct xfs_mount *w_mount;
+ void *w_data; /* syncer routine argument */
+ void (*w_syncer)(struct xfs_mount *, void *);
+} bhv_vfs_sync_work_t;
+
+#define SYNC_ATTR 0x0001 /* sync attributes */
+#define SYNC_DELWRI 0x0002 /* look at delayed writes */
+#define SYNC_WAIT 0x0004 /* wait for i/o to complete */
+#define SYNC_BDFLUSH 0x0008 /* BDFLUSH is calling -- don't block */
+#define SYNC_IOWAIT 0x0010 /* wait for all I/O to complete */
+
+int xfs_syncd_init(struct xfs_mount *mp);
+void xfs_syncd_stop(struct xfs_mount *mp);
+
+int xfs_sync_inodes(struct xfs_mount *mp, int flags);
+int xfs_sync_fsdata(struct xfs_mount *mp, int flags);
+
+int xfs_quiesce_data(struct xfs_mount *mp);
+void xfs_quiesce_attr(struct xfs_mount *mp);
+
+void xfs_flush_inode(struct xfs_inode *ip);
+void xfs_flush_device(struct xfs_inode *ip);
+
+int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode);
+int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode);
+
+void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
+void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip);
+void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
+ struct xfs_inode *ip);
+#endif
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index 7dacb5bbde3f..916c0ffb6083 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -56,17 +56,6 @@ xfs_stats_clear_proc_handler(
static ctl_table xfs_table[] = {
{
- .ctl_name = XFS_RESTRICT_CHOWN,
- .procname = "restrict_chown",
- .data = &xfs_params.restrict_chown.val,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
- .extra1 = &xfs_params.restrict_chown.min,
- .extra2 = &xfs_params.restrict_chown.max
- },
- {
.ctl_name = XFS_SGID_INHERIT,
.procname = "irix_sgid_inherit",
.data = &xfs_params.sgid_inherit.val,
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h
index 4aadb8056c37..b9937d450f8e 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.h
+++ b/fs/xfs/linux-2.6/xfs_sysctl.h
@@ -31,7 +31,6 @@ typedef struct xfs_sysctl_val {
} xfs_sysctl_val_t;
typedef struct xfs_param {
- xfs_sysctl_val_t restrict_chown;/* Root/non-root can give away files.*/
xfs_sysctl_val_t sgid_inherit; /* Inherit S_ISGID if process' GID is
* not a member of parent dir GID. */
xfs_sysctl_val_t symlink_mode; /* Link creat mode affected by umask */
@@ -68,7 +67,7 @@ typedef struct xfs_param {
enum {
/* XFS_REFCACHE_SIZE = 1 */
/* XFS_REFCACHE_PURGE = 2 */
- XFS_RESTRICT_CHOWN = 3,
+ /* XFS_RESTRICT_CHOWN = 3 */
XFS_SGID_INHERIT = 4,
XFS_SYMLINK_MODE = 5,
XFS_PANIC_MASK = 6,
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h
index 7e60c7776b1c..0ab60bc2e761 100644
--- a/fs/xfs/linux-2.6/xfs_vfs.h
+++ b/fs/xfs/linux-2.6/xfs_vfs.h
@@ -33,37 +33,6 @@ struct xfs_mount_args;
typedef struct kstatfs bhv_statvfs_t;
-typedef struct bhv_vfs_sync_work {
- struct list_head w_list;
- struct xfs_mount *w_mount;
- void *w_data; /* syncer routine argument */
- void (*w_syncer)(struct xfs_mount *, void *);
-} bhv_vfs_sync_work_t;
-
-#define SYNC_ATTR 0x0001 /* sync attributes */
-#define SYNC_CLOSE 0x0002 /* close file system down */
-#define SYNC_DELWRI 0x0004 /* look at delayed writes */
-#define SYNC_WAIT 0x0008 /* wait for i/o to complete */
-#define SYNC_BDFLUSH 0x0010 /* BDFLUSH is calling -- don't block */
-#define SYNC_FSDATA 0x0020 /* flush fs data (e.g. superblocks) */
-#define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */
-#define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */
-#define SYNC_IOWAIT 0x0100 /* wait for all I/O to complete */
-
-/*
- * When remounting a filesystem read-only or freezing the filesystem,
- * we have two phases to execute. This first phase is syncing the data
- * before we quiesce the fielsystem, and the second is flushing all the
- * inodes out after we've waited for all the transactions created by
- * the first phase to complete. The second phase uses SYNC_INODE_QUIESCE
- * to ensure that the inodes are written to their location on disk
- * rather than just existing in transactions in the log. This means
- * after a quiesce there is no log replay required to write the inodes
- * to disk (this is the main difference between a sync and a quiesce).
- */
-#define SYNC_DATA_QUIESCE (SYNC_DELWRI|SYNC_FSDATA|SYNC_WAIT|SYNC_IOWAIT)
-#define SYNC_INODE_QUIESCE (SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT)
-
#define SHUTDOWN_META_IO_ERROR 0x0001 /* write attempt to metadata failed */
#define SHUTDOWN_LOG_IO_ERROR 0x0002 /* write attempt to the log failed */
#define SHUTDOWN_FORCE_UMOUNT 0x0004 /* shutdown from a forced unmount */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index b52528bbbfff..ad18262d651b 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -84,25 +84,12 @@ vn_ioerror(
#ifdef XFS_INODE_TRACE
-/*
- * Reference count of Linux inode if present, -1 if the xfs_inode
- * has no associated Linux inode.
- */
-static inline int xfs_icount(struct xfs_inode *ip)
-{
- struct inode *vp = VFS_I(ip);
-
- if (vp)
- return vn_count(vp);
- return -1;
-}
-
#define KTRACE_ENTER(ip, vk, s, line, ra) \
ktrace_enter( (ip)->i_trace, \
/* 0 */ (void *)(__psint_t)(vk), \
/* 1 */ (void *)(s), \
/* 2 */ (void *)(__psint_t) line, \
-/* 3 */ (void *)(__psint_t)xfs_icount(ip), \
+/* 3 */ (void *)(__psint_t)atomic_read(&VFS_I(ip)->i_count), \
/* 4 */ (void *)(ra), \
/* 5 */ NULL, \
/* 6 */ (void *)(__psint_t)current_cpu(), \
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 683ce16210ff..bf89e41c3b8d 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -80,11 +80,6 @@ do { \
iput(VFS_I(ip)); \
} while (0)
-static inline struct inode *vn_grab(struct inode *vp)
-{
- return igrab(vp);
-}
-
/*
* Dealing with bad inodes
*/
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index f2705f2fd43c..591ca6602bfb 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -101,7 +101,7 @@ xfs_qm_dqinit(
if (brandnewdquot) {
dqp->dq_flnext = dqp->dq_flprev = dqp;
mutex_init(&dqp->q_qlock);
- sv_init(&dqp->q_pinwait, SV_DEFAULT, "pdq");
+ init_waitqueue_head(&dqp->q_pinwait);
/*
* Because we want to use a counting completion, complete
@@ -131,7 +131,7 @@ xfs_qm_dqinit(
dqp->q_res_bcount = 0;
dqp->q_res_icount = 0;
dqp->q_res_rtbcount = 0;
- dqp->q_pincount = 0;
+ atomic_set(&dqp->q_pincount, 0);
dqp->q_hash = NULL;
ASSERT(dqp->dq_flnext == dqp->dq_flprev);
@@ -1221,16 +1221,14 @@ xfs_qm_dqflush(
xfs_dqtrace_entry(dqp, "DQFLUSH");
/*
- * If not dirty, nada.
+ * If not dirty, or it's pinned and we are not supposed to
+ * block, nada.
*/
- if (!XFS_DQ_IS_DIRTY(dqp)) {
+ if (!XFS_DQ_IS_DIRTY(dqp) ||
+ (!(flags & XFS_QMOPT_SYNC) && atomic_read(&dqp->q_pincount) > 0)) {
xfs_dqfunlock(dqp);
- return (0);
+ return 0;
}
-
- /*
- * Cant flush a pinned dquot. Wait for it.
- */
xfs_qm_dqunpin_wait(dqp);
/*
@@ -1274,10 +1272,8 @@ xfs_qm_dqflush(
dqp->dq_flags &= ~(XFS_DQ_DIRTY);
mp = dqp->q_mount;
- /* lsn is 64 bits */
- spin_lock(&mp->m_ail_lock);
- dqp->q_logitem.qli_flush_lsn = dqp->q_logitem.qli_item.li_lsn;
- spin_unlock(&mp->m_ail_lock);
+ xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn,
+ &dqp->q_logitem.qli_item.li_lsn);
/*
* Attach an iodone routine so that we can remove this dquot from the
@@ -1323,8 +1319,10 @@ xfs_qm_dqflush_done(
xfs_dq_logitem_t *qip)
{
xfs_dquot_t *dqp;
+ struct xfs_ail *ailp;
dqp = qip->qli_dquot;
+ ailp = qip->qli_item.li_ailp;
/*
* We only want to pull the item from the AIL if its
@@ -1337,15 +1335,12 @@ xfs_qm_dqflush_done(
if ((qip->qli_item.li_flags & XFS_LI_IN_AIL) &&
qip->qli_item.li_lsn == qip->qli_flush_lsn) {
- spin_lock(&dqp->q_mount->m_ail_lock);
- /*
- * xfs_trans_delete_ail() drops the AIL lock.
- */
+ /* xfs_trans_ail_delete() drops the AIL lock. */
+ spin_lock(&ailp->xa_lock);
if (qip->qli_item.li_lsn == qip->qli_flush_lsn)
- xfs_trans_delete_ail(dqp->q_mount,
- (xfs_log_item_t*)qip);
+ xfs_trans_ail_delete(ailp, (xfs_log_item_t*)qip);
else
- spin_unlock(&dqp->q_mount->m_ail_lock);
+ spin_unlock(&ailp->xa_lock);
}
/*
@@ -1375,7 +1370,7 @@ xfs_dqunlock(
mutex_unlock(&(dqp->q_qlock));
if (dqp->q_logitem.qli_dquot == dqp) {
/* Once was dqp->q_mount, but might just have been cleared */
- xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_mountp,
+ xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp,
(xfs_log_item_t*)&(dqp->q_logitem));
}
}
@@ -1489,7 +1484,7 @@ xfs_qm_dqpurge(
"xfs_qm_dqpurge: dquot %p flush failed", dqp);
xfs_dqflock(dqp);
}
- ASSERT(dqp->q_pincount == 0);
+ ASSERT(atomic_read(&dqp->q_pincount) == 0);
ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
!(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index 8958d0faf8d3..7e455337e2ba 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -83,8 +83,8 @@ typedef struct xfs_dquot {
xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */
mutex_t q_qlock; /* quota lock */
struct completion q_flush; /* flush completion queue */
- uint q_pincount; /* pin count for this dquot */
- sv_t q_pinwait; /* sync var for pinning */
+ atomic_t q_pincount; /* dquot pin count */
+ wait_queue_head_t q_pinwait; /* dquot pinning wait queue */
#ifdef XFS_DQUOT_TRACE
struct ktrace *q_trace; /* trace header structure */
#endif
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index f028644caa5e..1728f6a7c4f5 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -88,25 +88,22 @@ xfs_qm_dquot_logitem_format(
/*
* Increment the pin count of the given dquot.
- * This value is protected by pinlock spinlock in the xQM structure.
*/
STATIC void
xfs_qm_dquot_logitem_pin(
xfs_dq_logitem_t *logitem)
{
- xfs_dquot_t *dqp;
+ xfs_dquot_t *dqp = logitem->qli_dquot;
- dqp = logitem->qli_dquot;
ASSERT(XFS_DQ_IS_LOCKED(dqp));
- spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
- dqp->q_pincount++;
- spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
+ atomic_inc(&dqp->q_pincount);
}
/*
* Decrement the pin count of the given dquot, and wake up
* anyone in xfs_dqwait_unpin() if the count goes to 0. The
- * dquot must have been previously pinned with a call to xfs_dqpin().
+ * dquot must have been previously pinned with a call to
+ * xfs_qm_dquot_logitem_pin().
*/
/* ARGSUSED */
STATIC void
@@ -114,16 +111,11 @@ xfs_qm_dquot_logitem_unpin(
xfs_dq_logitem_t *logitem,
int stale)
{
- xfs_dquot_t *dqp;
+ xfs_dquot_t *dqp = logitem->qli_dquot;
- dqp = logitem->qli_dquot;
- ASSERT(dqp->q_pincount > 0);
- spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
- dqp->q_pincount--;
- if (dqp->q_pincount == 0) {
- sv_broadcast(&dqp->q_pinwait);
- }
- spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
+ ASSERT(atomic_read(&dqp->q_pincount) > 0);
+ if (atomic_dec_and_test(&dqp->q_pincount))
+ wake_up(&dqp->q_pinwait);
}
/* ARGSUSED */
@@ -193,21 +185,14 @@ xfs_qm_dqunpin_wait(
xfs_dquot_t *dqp)
{
ASSERT(XFS_DQ_IS_LOCKED(dqp));
- if (dqp->q_pincount == 0) {
+ if (atomic_read(&dqp->q_pincount) == 0)
return;
- }
/*
* Give the log a push so we don't wait here too long.
*/
xfs_log_force(dqp->q_mount, (xfs_lsn_t)0, XFS_LOG_FORCE);
- spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
- if (dqp->q_pincount == 0) {
- spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
- return;
- }
- sv_wait(&(dqp->q_pinwait), PINOD,
- &(XFS_DQ_TO_QINF(dqp)->qi_pinlock), s);
+ wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0));
}
/*
@@ -310,7 +295,7 @@ xfs_qm_dquot_logitem_trylock(
uint retval;
dqp = qip->qli_dquot;
- if (dqp->q_pincount > 0)
+ if (atomic_read(&dqp->q_pincount) > 0)
return (XFS_ITEM_PINNED);
if (! xfs_qm_dqlock_nowait(dqp))
@@ -568,14 +553,16 @@ xfs_qm_qoffend_logitem_committed(
xfs_lsn_t lsn)
{
xfs_qoff_logitem_t *qfs;
+ struct xfs_ail *ailp;
qfs = qfe->qql_start_lip;
- spin_lock(&qfs->qql_item.li_mountp->m_ail_lock);
+ ailp = qfs->qql_item.li_ailp;
+ spin_lock(&ailp->xa_lock);
/*
* Delete the qoff-start logitem from the AIL.
- * xfs_trans_delete_ail() drops the AIL lock.
+ * xfs_trans_ail_delete() drops the AIL lock.
*/
- xfs_trans_delete_ail(qfs->qql_item.li_mountp, (xfs_log_item_t *)qfs);
+ xfs_trans_ail_delete(ailp, (xfs_log_item_t *)qfs);
kmem_free(qfs);
kmem_free(qfe);
return (xfs_lsn_t)-1;
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index df0ffef9775a..5b198d15e76b 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -20,7 +20,6 @@
#include "xfs_bit.h"
#include "xfs_log.h"
#include "xfs_inum.h"
-#include "xfs_clnt.h"
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
@@ -987,14 +986,10 @@ xfs_qm_dqdetach(
}
/*
- * This is called by VFS_SYNC and flags arg determines the caller,
- * and its motives, as done in xfs_sync.
- *
- * vfs_sync: SYNC_FSDATA|SYNC_ATTR|SYNC_BDFLUSH 0x31
- * syscall sync: SYNC_FSDATA|SYNC_ATTR|SYNC_DELWRI 0x25
- * umountroot : SYNC_WAIT | SYNC_CLOSE | SYNC_ATTR | SYNC_FSDATA
+ * This is called to sync quotas. We can be told to use non-blocking
+ * semantics by either the SYNC_BDFLUSH flag or the absence of the
+ * SYNC_WAIT flag.
*/
-
int
xfs_qm_sync(
xfs_mount_t *mp,
@@ -1137,7 +1132,6 @@ xfs_qm_init_quotainfo(
return error;
}
- spin_lock_init(&qinf->qi_pinlock);
xfs_qm_list_init(&qinf->qi_dqlist, "mpdqlist", 0);
qinf->qi_dqreclaims = 0;
@@ -1234,7 +1228,6 @@ xfs_qm_destroy_quotainfo(
*/
xfs_qm_rele_quotafs_ref(mp);
- spinlock_destroy(&qi->qi_pinlock);
xfs_qm_list_destroy(&qi->qi_dqlist);
if (qi->qi_uquotaip) {
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index 44f25349e478..4f2de9771728 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -106,7 +106,6 @@ typedef struct xfs_qm {
typedef struct xfs_quotainfo {
xfs_inode_t *qi_uquotaip; /* user quota inode */
xfs_inode_t *qi_gquotaip; /* group quota inode */
- spinlock_t qi_pinlock; /* dquot pinning lock */
xfs_dqlist_t qi_dqlist; /* all dquots in filesys */
int qi_dqreclaims; /* a change here indicates
a removal in the dqlist */
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index eea2e60b456b..9556df9f7dab 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -20,7 +20,6 @@
#include "xfs_bit.h"
#include "xfs_log.h"
#include "xfs_inum.h"
-#include "xfs_clnt.h"
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 1a3b803dfa55..68139b38aede 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -127,7 +127,7 @@ xfs_qm_quotactl(
break;
case Q_XQUOTASYNC:
- return (xfs_sync_inodes(mp, SYNC_DELWRI, NULL));
+ return xfs_sync_inodes(mp, SYNC_DELWRI);
default:
break;
@@ -1022,101 +1022,104 @@ xfs_qm_export_flags(
/*
- * Go thru all the inodes in the file system, releasing their dquots.
- * Note that the mount structure gets modified to indicate that quotas are off
- * AFTER this, in the case of quotaoff. This also gets called from
- * xfs_rootumount.
+ * Release all the dquots on the inodes in an AG.
*/
-void
-xfs_qm_dqrele_all_inodes(
- struct xfs_mount *mp,
- uint flags)
+STATIC void
+xfs_qm_dqrele_inodes_ag(
+ xfs_mount_t *mp,
+ int ag,
+ uint flags)
{
- xfs_inode_t *ip, *topino;
- uint ireclaims;
- struct inode *vp;
- boolean_t vnode_refd;
+ xfs_inode_t *ip = NULL;
+ xfs_perag_t *pag = &mp->m_perag[ag];
+ int first_index = 0;
+ int nr_found;
- ASSERT(mp->m_quotainfo);
-
- XFS_MOUNT_ILOCK(mp);
-again:
- ip = mp->m_inodes;
- if (ip == NULL) {
- XFS_MOUNT_IUNLOCK(mp);
- return;
- }
do {
- /* Skip markers inserted by xfs_sync */
- if (ip->i_mount == NULL) {
- ip = ip->i_mnext;
- continue;
+ /*
+ * use a gang lookup to find the next inode in the tree
+ * as the tree is sparse and a gang lookup walks to find
+ * the number of objects requested.
+ */
+ read_lock(&pag->pag_ici_lock);
+ nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
+ (void**)&ip, first_index, 1);
+
+ if (!nr_found) {
+ read_unlock(&pag->pag_ici_lock);
+ break;
}
- /* Root inode, rbmip and rsumip have associated blocks */
+
+ /*
+ * Update the index for the next lookup. Catch overflows
+ * into the next AG range which can occur if we have inodes
+ * in the last block of the AG and we are currently
+ * pointing to the last inode.
+ */
+ first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+ if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
+ read_unlock(&pag->pag_ici_lock);
+ break;
+ }
+
+ /* skip quota inodes */
if (ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) {
ASSERT(ip->i_udquot == NULL);
ASSERT(ip->i_gdquot == NULL);
- ip = ip->i_mnext;
+ read_unlock(&pag->pag_ici_lock);
continue;
}
- vp = VFS_I(ip);
- if (!vp) {
- ASSERT(ip->i_udquot == NULL);
- ASSERT(ip->i_gdquot == NULL);
- ip = ip->i_mnext;
+
+ /*
+ * If we can't get a reference on the inode, it must be
+ * in reclaim. Leave it for the reclaim code to flush.
+ */
+ if (!igrab(VFS_I(ip))) {
+ read_unlock(&pag->pag_ici_lock);
continue;
}
- vnode_refd = B_FALSE;
- if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) {
- ireclaims = mp->m_ireclaims;
- topino = mp->m_inodes;
- vp = vn_grab(vp);
- if (!vp)
- goto again;
-
- XFS_MOUNT_IUNLOCK(mp);
- /* XXX restart limit ? */
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- vnode_refd = B_TRUE;
- } else {
- ireclaims = mp->m_ireclaims;
- topino = mp->m_inodes;
- XFS_MOUNT_IUNLOCK(mp);
+ read_unlock(&pag->pag_ici_lock);
+
+ /* avoid new inodes though we shouldn't find any here */
+ if (xfs_iflags_test(ip, XFS_INEW)) {
+ IRELE(ip);
+ continue;
}
- /*
- * We don't keep the mountlock across the dqrele() call,
- * since it can take a while..
- */
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
xfs_qm_dqrele(ip->i_udquot);
ip->i_udquot = NULL;
}
- if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) {
+ if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) &&
+ ip->i_gdquot) {
xfs_qm_dqrele(ip->i_gdquot);
ip->i_gdquot = NULL;
}
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- /*
- * Wait until we've dropped the ilock and mountlock to
- * do the vn_rele. Or be condemned to an eternity in the
- * inactive code in hell.
- */
- if (vnode_refd)
- IRELE(ip);
- XFS_MOUNT_ILOCK(mp);
- /*
- * If an inode was inserted or removed, we gotta
- * start over again.
- */
- if (topino != mp->m_inodes || mp->m_ireclaims != ireclaims) {
- /* XXX use a sentinel */
- goto again;
- }
- ip = ip->i_mnext;
- } while (ip != mp->m_inodes);
+ xfs_iput(ip, XFS_ILOCK_EXCL);
+
+ } while (nr_found);
+}
+
+/*
+ * Go thru all the inodes in the file system, releasing their dquots.
+ * Note that the mount structure gets modified to indicate that quotas are off
+ * AFTER this, in the case of quotaoff. This also gets called from
+ * xfs_rootumount.
+ */
+void
+xfs_qm_dqrele_all_inodes(
+ struct xfs_mount *mp,
+ uint flags)
+{
+ int i;
- XFS_MOUNT_IUNLOCK(mp);
+ ASSERT(mp->m_quotainfo);
+ for (i = 0; i < mp->m_sb.sb_agcount; i++) {
+ if (!mp->m_perag[i].pag_ici_init)
+ continue;
+ xfs_qm_dqrele_inodes_ag(mp, i, flags);
+ }
}
/*------------------------------------------------------------------------*/
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
index c27abef7b84f..636104254cfd 100644
--- a/fs/xfs/support/debug.c
+++ b/fs/xfs/support/debug.c
@@ -84,5 +84,5 @@ assfail(char *expr, char *file, int line)
void
xfs_hex_dump(void *p, int length)
{
- print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_OFFSET, 16, 1, p, length, 1);
+ print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1);
}
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index 540e4c989825..17254b529c54 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -30,7 +30,7 @@
#define XFS_ATTR_TRACE 1
#define XFS_BLI_TRACE 1
#define XFS_BMAP_TRACE 1
-#define XFS_BMBT_TRACE 1
+#define XFS_BTREE_TRACE 1
#define XFS_DIR2_TRACE 1
#define XFS_DQUOT_TRACE 1
#define XFS_ILOCK_TRACE 1
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index b2f639a1416f..a8cdd73999a4 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -366,7 +366,7 @@ xfs_acl_allow_set(
return ENOTDIR;
if (vp->i_sb->s_flags & MS_RDONLY)
return EROFS;
- if (XFS_I(vp)->i_d.di_uid != current->fsuid && !capable(CAP_FOWNER))
+ if (XFS_I(vp)->i_d.di_uid != current_fsuid() && !capable(CAP_FOWNER))
return EPERM;
return 0;
}
@@ -413,13 +413,13 @@ xfs_acl_access(
switch (fap->acl_entry[i].ae_tag) {
case ACL_USER_OBJ:
seen_userobj = 1;
- if (fuid != current->fsuid)
+ if (fuid != current_fsuid())
continue;
matched.ae_tag = ACL_USER_OBJ;
matched.ae_perm = allows;
break;
case ACL_USER:
- if (fap->acl_entry[i].ae_id != current->fsuid)
+ if (fap->acl_entry[i].ae_id != current_fsuid())
continue;
matched.ae_tag = ACL_USER;
matched.ae_perm = allows;
@@ -758,7 +758,7 @@ xfs_acl_setmode(
if (gap && nomask)
iattr.ia_mode |= gap->ae_perm << 3;
- return xfs_setattr(XFS_I(vp), &iattr, 0, sys_cred);
+ return xfs_setattr(XFS_I(vp), &iattr, 0);
}
/*
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 61b292a9fb41..2bfd86329141 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -192,17 +192,23 @@ typedef struct xfs_perag
xfs_agino_t pagi_freecount; /* number of free inodes */
xfs_agino_t pagi_count; /* number of allocated inodes */
int pagb_count; /* pagb slots in use */
+ xfs_perag_busy_t *pagb_list; /* unstable blocks */
#ifdef __KERNEL__
spinlock_t pagb_lock; /* lock for pagb_list */
-#endif
- xfs_perag_busy_t *pagb_list; /* unstable blocks */
+
atomic_t pagf_fstrms; /* # of filestreams active in this AG */
int pag_ici_init; /* incore inode cache initialised */
rwlock_t pag_ici_lock; /* incore inode lock */
struct radix_tree_root pag_ici_root; /* incore inode cache root */
+#endif
} xfs_perag_t;
+/*
+ * tags for inode radix tree
+ */
+#define XFS_ICI_RECLAIM_TAG 0 /* inode is to be reclaimed */
+
#define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels)
#define XFS_MIN_FREELIST_RAW(bl,cl,mp) \
(MIN(bl + 1, XFS_AG_MAXLEVELS(mp)) + MIN(cl + 1, XFS_AG_MAXLEVELS(mp)))
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 1956f83489f1..c47ce9075728 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -90,6 +90,92 @@ STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
*/
/*
+ * Lookup the record equal to [bno, len] in the btree given by cur.
+ */
+STATIC int /* error */
+xfs_alloc_lookup_eq(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agblock_t bno, /* starting block of extent */
+ xfs_extlen_t len, /* length of extent */
+ int *stat) /* success/failure */
+{
+ cur->bc_rec.a.ar_startblock = bno;
+ cur->bc_rec.a.ar_blockcount = len;
+ return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
+}
+
+/*
+ * Lookup the first record greater than or equal to [bno, len]
+ * in the btree given by cur.
+ */
+STATIC int /* error */
+xfs_alloc_lookup_ge(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agblock_t bno, /* starting block of extent */
+ xfs_extlen_t len, /* length of extent */
+ int *stat) /* success/failure */
+{
+ cur->bc_rec.a.ar_startblock = bno;
+ cur->bc_rec.a.ar_blockcount = len;
+ return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
+}
+
+/*
+ * Lookup the first record less than or equal to [bno, len]
+ * in the btree given by cur.
+ */
+STATIC int /* error */
+xfs_alloc_lookup_le(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agblock_t bno, /* starting block of extent */
+ xfs_extlen_t len, /* length of extent */
+ int *stat) /* success/failure */
+{
+ cur->bc_rec.a.ar_startblock = bno;
+ cur->bc_rec.a.ar_blockcount = len;
+ return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
+}
+
+/*
+ * Update the record referred to by cur to the value given
+ * by [bno, len].
+ * This either works (return 0) or gets an EFSCORRUPTED error.
+ */
+STATIC int /* error */
+xfs_alloc_update(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agblock_t bno, /* starting block of extent */
+ xfs_extlen_t len) /* length of extent */
+{
+ union xfs_btree_rec rec;
+
+ rec.alloc.ar_startblock = cpu_to_be32(bno);
+ rec.alloc.ar_blockcount = cpu_to_be32(len);
+ return xfs_btree_update(cur, &rec);
+}
+
+/*
+ * Get the data from the pointed-to record.
+ */
+STATIC int /* error */
+xfs_alloc_get_rec(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agblock_t *bno, /* output: starting block of extent */
+ xfs_extlen_t *len, /* output: length of extent */
+ int *stat) /* output: success/failure */
+{
+ union xfs_btree_rec *rec;
+ int error;
+
+ error = xfs_btree_get_rec(cur, &rec, stat);
+ if (!error && *stat == 1) {
+ *bno = be32_to_cpu(rec->alloc.ar_startblock);
+ *len = be32_to_cpu(rec->alloc.ar_blockcount);
+ }
+ return error;
+}
+
+/*
* Compute aligned version of the found extent.
* Takes alignment and min length into account.
*/
@@ -294,21 +380,20 @@ xfs_alloc_fixup_trees(
return error;
XFS_WANT_CORRUPTED_RETURN(i == 1);
}
+
#ifdef DEBUG
- {
- xfs_alloc_block_t *bnoblock;
- xfs_alloc_block_t *cntblock;
-
- if (bno_cur->bc_nlevels == 1 &&
- cnt_cur->bc_nlevels == 1) {
- bnoblock = XFS_BUF_TO_ALLOC_BLOCK(bno_cur->bc_bufs[0]);
- cntblock = XFS_BUF_TO_ALLOC_BLOCK(cnt_cur->bc_bufs[0]);
- XFS_WANT_CORRUPTED_RETURN(
- be16_to_cpu(bnoblock->bb_numrecs) ==
- be16_to_cpu(cntblock->bb_numrecs));
- }
+ if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
+ struct xfs_btree_block *bnoblock;
+ struct xfs_btree_block *cntblock;
+
+ bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
+ cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
+
+ XFS_WANT_CORRUPTED_RETURN(
+ bnoblock->bb_numrecs == cntblock->bb_numrecs);
}
#endif
+
/*
* Deal with all four cases: the allocated record is contained
* within the freespace record, so we can have new freespace
@@ -333,7 +418,7 @@ xfs_alloc_fixup_trees(
/*
* Delete the entry from the by-size btree.
*/
- if ((error = xfs_alloc_delete(cnt_cur, &i)))
+ if ((error = xfs_btree_delete(cnt_cur, &i)))
return error;
XFS_WANT_CORRUPTED_RETURN(i == 1);
/*
@@ -343,7 +428,7 @@ xfs_alloc_fixup_trees(
if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
return error;
XFS_WANT_CORRUPTED_RETURN(i == 0);
- if ((error = xfs_alloc_insert(cnt_cur, &i)))
+ if ((error = xfs_btree_insert(cnt_cur, &i)))
return error;
XFS_WANT_CORRUPTED_RETURN(i == 1);
}
@@ -351,7 +436,7 @@ xfs_alloc_fixup_trees(
if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
return error;
XFS_WANT_CORRUPTED_RETURN(i == 0);
- if ((error = xfs_alloc_insert(cnt_cur, &i)))
+ if ((error = xfs_btree_insert(cnt_cur, &i)))
return error;
XFS_WANT_CORRUPTED_RETURN(i == 1);
}
@@ -362,7 +447,7 @@ xfs_alloc_fixup_trees(
/*
* No remaining freespace, just delete the by-block tree entry.
*/
- if ((error = xfs_alloc_delete(bno_cur, &i)))
+ if ((error = xfs_btree_delete(bno_cur, &i)))
return error;
XFS_WANT_CORRUPTED_RETURN(i == 1);
} else {
@@ -379,7 +464,7 @@ xfs_alloc_fixup_trees(
if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
return error;
XFS_WANT_CORRUPTED_RETURN(i == 0);
- if ((error = xfs_alloc_insert(bno_cur, &i)))
+ if ((error = xfs_btree_insert(bno_cur, &i)))
return error;
XFS_WANT_CORRUPTED_RETURN(i == 1);
}
@@ -640,8 +725,8 @@ xfs_alloc_ag_vextent_exact(
/*
* Allocate/initialize a cursor for the by-number freespace btree.
*/
- bno_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
- args->agno, XFS_BTNUM_BNO, NULL, 0);
+ bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
+ args->agno, XFS_BTNUM_BNO);
/*
* Lookup bno and minlen in the btree (minlen is irrelevant, really).
* Look for the closest free block <= bno, it must contain bno
@@ -696,8 +781,8 @@ xfs_alloc_ag_vextent_exact(
* We are allocating agbno for rlen [agbno .. end]
* Allocate/initialize a cursor for the by-size btree.
*/
- cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
- args->agno, XFS_BTNUM_CNT, NULL, 0);
+ cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
+ args->agno, XFS_BTNUM_CNT);
ASSERT(args->agbno + args->len <=
be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
@@ -759,8 +844,8 @@ xfs_alloc_ag_vextent_near(
/*
* Get a cursor for the by-size btree.
*/
- cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
- args->agno, XFS_BTNUM_CNT, NULL, 0);
+ cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
+ args->agno, XFS_BTNUM_CNT);
ltlen = 0;
bno_cur_lt = bno_cur_gt = NULL;
/*
@@ -818,7 +903,7 @@ xfs_alloc_ag_vextent_near(
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
if (ltlen >= args->minlen)
break;
- if ((error = xfs_alloc_increment(cnt_cur, 0, &i)))
+ if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
goto error0;
} while (i);
ASSERT(ltlen >= args->minlen);
@@ -828,7 +913,7 @@ xfs_alloc_ag_vextent_near(
i = cnt_cur->bc_ptrs[0];
for (j = 1, blen = 0, bdiff = 0;
!error && j && (blen < args->maxlen || bdiff > 0);
- error = xfs_alloc_increment(cnt_cur, 0, &j)) {
+ error = xfs_btree_increment(cnt_cur, 0, &j)) {
/*
* For each entry, decide if it's better than
* the previous best entry.
@@ -886,8 +971,8 @@ xfs_alloc_ag_vextent_near(
/*
* Set up a cursor for the by-bno tree.
*/
- bno_cur_lt = xfs_btree_init_cursor(args->mp, args->tp,
- args->agbp, args->agno, XFS_BTNUM_BNO, NULL, 0);
+ bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp,
+ args->agbp, args->agno, XFS_BTNUM_BNO);
/*
* Fix up the btree entries.
*/
@@ -914,8 +999,8 @@ xfs_alloc_ag_vextent_near(
/*
* Allocate and initialize the cursor for the leftward search.
*/
- bno_cur_lt = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
- args->agno, XFS_BTNUM_BNO, NULL, 0);
+ bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
+ args->agno, XFS_BTNUM_BNO);
/*
* Lookup <= bno to find the leftward search's starting point.
*/
@@ -938,7 +1023,7 @@ xfs_alloc_ag_vextent_near(
* Increment the cursor, so we will point at the entry just right
* of the leftward entry if any, or to the leftmost entry.
*/
- if ((error = xfs_alloc_increment(bno_cur_gt, 0, &i)))
+ if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
goto error0;
if (!i) {
/*
@@ -961,7 +1046,7 @@ xfs_alloc_ag_vextent_near(
args->minlen, &ltbnoa, &ltlena);
if (ltlena >= args->minlen)
break;
- if ((error = xfs_alloc_decrement(bno_cur_lt, 0, &i)))
+ if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
goto error0;
if (!i) {
xfs_btree_del_cursor(bno_cur_lt,
@@ -977,7 +1062,7 @@ xfs_alloc_ag_vextent_near(
args->minlen, &gtbnoa, &gtlena);
if (gtlena >= args->minlen)
break;
- if ((error = xfs_alloc_increment(bno_cur_gt, 0, &i)))
+ if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
goto error0;
if (!i) {
xfs_btree_del_cursor(bno_cur_gt,
@@ -1066,7 +1151,7 @@ xfs_alloc_ag_vextent_near(
/*
* Fell off the right end.
*/
- if ((error = xfs_alloc_increment(
+ if ((error = xfs_btree_increment(
bno_cur_gt, 0, &i)))
goto error0;
if (!i) {
@@ -1162,7 +1247,7 @@ xfs_alloc_ag_vextent_near(
/*
* Fell off the left end.
*/
- if ((error = xfs_alloc_decrement(
+ if ((error = xfs_btree_decrement(
bno_cur_lt, 0, &i)))
goto error0;
if (!i) {
@@ -1267,8 +1352,8 @@ xfs_alloc_ag_vextent_size(
/*
* Allocate and initialize a cursor for the by-size btree.
*/
- cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
- args->agno, XFS_BTNUM_CNT, NULL, 0);
+ cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
+ args->agno, XFS_BTNUM_CNT);
bno_cur = NULL;
/*
* Look for an entry >= maxlen+alignment-1 blocks.
@@ -1321,7 +1406,7 @@ xfs_alloc_ag_vextent_size(
bestflen = flen;
bestfbno = fbno;
for (;;) {
- if ((error = xfs_alloc_decrement(cnt_cur, 0, &i)))
+ if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
goto error0;
if (i == 0)
break;
@@ -1372,8 +1457,8 @@ xfs_alloc_ag_vextent_size(
/*
* Allocate and initialize a cursor for the by-block tree.
*/
- bno_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
- args->agno, XFS_BTNUM_BNO, NULL, 0);
+ bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
+ args->agno, XFS_BTNUM_BNO);
if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
rbno, rlen, XFSA_FIXUP_CNT_OK)))
goto error0;
@@ -1416,7 +1501,7 @@ xfs_alloc_ag_vextent_small(
xfs_extlen_t flen;
int i;
- if ((error = xfs_alloc_decrement(ccur, 0, &i)))
+ if ((error = xfs_btree_decrement(ccur, 0, &i)))
goto error0;
if (i) {
if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
@@ -1515,8 +1600,7 @@ xfs_free_ag_extent(
/*
* Allocate and initialize a cursor for the by-block btree.
*/
- bno_cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO, NULL,
- 0);
+ bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
cnt_cur = NULL;
/*
* Look for a neighboring block on the left (lower block numbers)
@@ -1549,7 +1633,7 @@ xfs_free_ag_extent(
* Look for a neighboring block on the right (higher block numbers)
* that is contiguous with this space.
*/
- if ((error = xfs_alloc_increment(bno_cur, 0, &haveright)))
+ if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
goto error0;
if (haveright) {
/*
@@ -1575,8 +1659,7 @@ xfs_free_ag_extent(
/*
* Now allocate and initialize a cursor for the by-size tree.
*/
- cnt_cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT, NULL,
- 0);
+ cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
/*
* Have both left and right contiguous neighbors.
* Merge all three into a single free block.
@@ -1588,7 +1671,7 @@ xfs_free_ag_extent(
if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if ((error = xfs_alloc_delete(cnt_cur, &i)))
+ if ((error = xfs_btree_delete(cnt_cur, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
/*
@@ -1597,19 +1680,19 @@ xfs_free_ag_extent(
if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if ((error = xfs_alloc_delete(cnt_cur, &i)))
+ if ((error = xfs_btree_delete(cnt_cur, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
/*
* Delete the old by-block entry for the right block.
*/
- if ((error = xfs_alloc_delete(bno_cur, &i)))
+ if ((error = xfs_btree_delete(bno_cur, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
/*
* Move the by-block cursor back to the left neighbor.
*/
- if ((error = xfs_alloc_decrement(bno_cur, 0, &i)))
+ if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
#ifdef DEBUG
@@ -1648,14 +1731,14 @@ xfs_free_ag_extent(
if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if ((error = xfs_alloc_delete(cnt_cur, &i)))
+ if ((error = xfs_btree_delete(cnt_cur, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
/*
* Back up the by-block cursor to the left neighbor, and
* update its length.
*/
- if ((error = xfs_alloc_decrement(bno_cur, 0, &i)))
+ if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
nbno = ltbno;
@@ -1674,7 +1757,7 @@ xfs_free_ag_extent(
if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if ((error = xfs_alloc_delete(cnt_cur, &i)))
+ if ((error = xfs_btree_delete(cnt_cur, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
/*
@@ -1693,7 +1776,7 @@ xfs_free_ag_extent(
else {
nbno = bno;
nlen = len;
- if ((error = xfs_alloc_insert(bno_cur, &i)))
+ if ((error = xfs_btree_insert(bno_cur, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
}
@@ -1705,7 +1788,7 @@ xfs_free_ag_extent(
if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 0, error0);
- if ((error = xfs_alloc_insert(cnt_cur, &i)))
+ if ((error = xfs_btree_insert(cnt_cur, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
@@ -2188,6 +2271,9 @@ xfs_alloc_read_agf(
be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp);
+ if (xfs_sb_version_haslazysbcount(&mp->m_sb))
+ agf_ok = agf_ok && be32_to_cpu(agf->agf_btreeblks) <=
+ be32_to_cpu(agf->agf_length);
if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF,
XFS_RANDOM_ALLOC_READ_AGF))) {
XFS_CORRUPTION_ERROR("xfs_alloc_read_agf",
@@ -2213,6 +2299,7 @@ xfs_alloc_read_agf(
#ifdef DEBUG
else if (!XFS_FORCED_SHUTDOWN(mp)) {
ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
+ ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 5aec15d0651e..588172796f7b 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -121,6 +121,19 @@ extern ktrace_t *xfs_alloc_trace_buf;
#define XFS_ALLOC_KTRACE_BUSYSEARCH 6
#endif
+void
+xfs_alloc_mark_busy(xfs_trans_t *tp,
+ xfs_agnumber_t agno,
+ xfs_agblock_t bno,
+ xfs_extlen_t len);
+
+void
+xfs_alloc_clear_busy(xfs_trans_t *tp,
+ xfs_agnumber_t ag,
+ int idx);
+
+#endif /* __KERNEL__ */
+
/*
* Compute and fill in value of m_ag_maxlevels.
*/
@@ -196,18 +209,4 @@ xfs_free_extent(
xfs_fsblock_t bno, /* starting block number of extent */
xfs_extlen_t len); /* length of extent */
-void
-xfs_alloc_mark_busy(xfs_trans_t *tp,
- xfs_agnumber_t agno,
- xfs_agblock_t bno,
- xfs_extlen_t len);
-
-void
-xfs_alloc_clear_busy(xfs_trans_t *tp,
- xfs_agnumber_t ag,
- int idx);
-
-
-#endif /* __KERNEL__ */
-
#endif /* __XFS_ALLOC_H__ */
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index 3ce2645508ae..733cb75a8c5d 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -35,2177 +35,464 @@
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
+#include "xfs_btree_trace.h"
#include "xfs_ialloc.h"
#include "xfs_alloc.h"
#include "xfs_error.h"
-/*
- * Prototypes for internal functions.
- */
-STATIC void xfs_alloc_log_block(xfs_trans_t *, xfs_buf_t *, int);
-STATIC void xfs_alloc_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int);
-STATIC void xfs_alloc_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
-STATIC void xfs_alloc_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
-STATIC int xfs_alloc_lshift(xfs_btree_cur_t *, int, int *);
-STATIC int xfs_alloc_newroot(xfs_btree_cur_t *, int *);
-STATIC int xfs_alloc_rshift(xfs_btree_cur_t *, int, int *);
-STATIC int xfs_alloc_split(xfs_btree_cur_t *, int, xfs_agblock_t *,
- xfs_alloc_key_t *, xfs_btree_cur_t **, int *);
-STATIC int xfs_alloc_updkey(xfs_btree_cur_t *, xfs_alloc_key_t *, int);
+STATIC struct xfs_btree_cur *
+xfs_allocbt_dup_cursor(
+ struct xfs_btree_cur *cur)
+{
+ return xfs_allocbt_init_cursor(cur->bc_mp, cur->bc_tp,
+ cur->bc_private.a.agbp, cur->bc_private.a.agno,
+ cur->bc_btnum);
+}
-/*
- * Internal functions.
- */
+STATIC void
+xfs_allocbt_set_root(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr,
+ int inc)
+{
+ struct xfs_buf *agbp = cur->bc_private.a.agbp;
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
+ xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno);
+ int btnum = cur->bc_btnum;
-/*
- * Single level of the xfs_alloc_delete record deletion routine.
- * Delete record pointed to by cur/level.
- * Remove the record from its block then rebalance the tree.
- * Return 0 for error, 1 for done, 2 to go on to the next level.
- */
-STATIC int /* error */
-xfs_alloc_delrec(
- xfs_btree_cur_t *cur, /* btree cursor */
- int level, /* level removing record from */
- int *stat) /* fail/done/go-on */
+ ASSERT(ptr->s != 0);
+
+ agf->agf_roots[btnum] = ptr->s;
+ be32_add_cpu(&agf->agf_levels[btnum], inc);
+ cur->bc_mp->m_perag[seqno].pagf_levels[btnum] += inc;
+
+ xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
+}
+
+STATIC int
+xfs_allocbt_alloc_block(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *start,
+ union xfs_btree_ptr *new,
+ int length,
+ int *stat)
{
- xfs_agf_t *agf; /* allocation group freelist header */
- xfs_alloc_block_t *block; /* btree block record/key lives in */
- xfs_agblock_t bno; /* btree block number */
- xfs_buf_t *bp; /* buffer for block */
- int error; /* error return value */
- int i; /* loop index */
- xfs_alloc_key_t key; /* kp points here if block is level 0 */
- xfs_agblock_t lbno; /* left block's block number */
- xfs_buf_t *lbp; /* left block's buffer pointer */
- xfs_alloc_block_t *left; /* left btree block */
- xfs_alloc_key_t *lkp=NULL; /* left block key pointer */
- xfs_alloc_ptr_t *lpp=NULL; /* left block address pointer */
- int lrecs=0; /* number of records in left block */
- xfs_alloc_rec_t *lrp; /* left block record pointer */
- xfs_mount_t *mp; /* mount structure */
- int ptr; /* index in btree block for this rec */
- xfs_agblock_t rbno; /* right block's block number */
- xfs_buf_t *rbp; /* right block's buffer pointer */
- xfs_alloc_block_t *right; /* right btree block */
- xfs_alloc_key_t *rkp; /* right block key pointer */
- xfs_alloc_ptr_t *rpp; /* right block address pointer */
- int rrecs=0; /* number of records in right block */
- int numrecs;
- xfs_alloc_rec_t *rrp; /* right block record pointer */
- xfs_btree_cur_t *tcur; /* temporary btree cursor */
+ int error;
+ xfs_agblock_t bno;
- /*
- * Get the index of the entry being deleted, check for nothing there.
- */
- ptr = cur->bc_ptrs[level];
- if (ptr == 0) {
- *stat = 0;
- return 0;
- }
- /*
- * Get the buffer & block containing the record or key/ptr.
- */
- bp = cur->bc_bufs[level];
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+
+ /* Allocate the new block from the freelist. If we can't, give up. */
+ error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
+ &bno, 1);
+ if (error) {
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
-#endif
- /*
- * Fail if we're off the end of the block.
- */
- numrecs = be16_to_cpu(block->bb_numrecs);
- if (ptr > numrecs) {
+ }
+
+ if (bno == NULLAGBLOCK) {
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
}
- XFS_STATS_INC(xs_abt_delrec);
- /*
- * It's a nonleaf. Excise the key and ptr being deleted, by
- * sliding the entries past them down one.
- * Log the changed areas of the block.
- */
- if (level > 0) {
- lkp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
- lpp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
-#ifdef DEBUG
- for (i = ptr; i < numrecs; i++) {
- if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level)))
- return error;
- }
-#endif
- if (ptr < numrecs) {
- memmove(&lkp[ptr - 1], &lkp[ptr],
- (numrecs - ptr) * sizeof(*lkp));
- memmove(&lpp[ptr - 1], &lpp[ptr],
- (numrecs - ptr) * sizeof(*lpp));
- xfs_alloc_log_ptrs(cur, bp, ptr, numrecs - 1);
- xfs_alloc_log_keys(cur, bp, ptr, numrecs - 1);
- }
- }
- /*
- * It's a leaf. Excise the record being deleted, by sliding the
- * entries past it down one. Log the changed areas of the block.
- */
- else {
- lrp = XFS_ALLOC_REC_ADDR(block, 1, cur);
- if (ptr < numrecs) {
- memmove(&lrp[ptr - 1], &lrp[ptr],
- (numrecs - ptr) * sizeof(*lrp));
- xfs_alloc_log_recs(cur, bp, ptr, numrecs - 1);
- }
- /*
- * If it's the first record in the block, we'll need a key
- * structure to pass up to the next level (updkey).
- */
- if (ptr == 1) {
- key.ar_startblock = lrp->ar_startblock;
- key.ar_blockcount = lrp->ar_blockcount;
- lkp = &key;
- }
- }
- /*
- * Decrement and log the number of entries in the block.
- */
- numrecs--;
- block->bb_numrecs = cpu_to_be16(numrecs);
- xfs_alloc_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
- /*
- * See if the longest free extent in the allocation group was
- * changed by this operation. True if it's the by-size btree, and
- * this is the leaf level, and there is no right sibling block,
- * and this was the last record.
- */
- agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
- mp = cur->bc_mp;
- if (level == 0 &&
- cur->bc_btnum == XFS_BTNUM_CNT &&
- be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK &&
- ptr > numrecs) {
- ASSERT(ptr == numrecs + 1);
- /*
- * There are still records in the block. Grab the size
- * from the last one.
- */
- if (numrecs) {
- rrp = XFS_ALLOC_REC_ADDR(block, numrecs, cur);
- agf->agf_longest = rrp->ar_blockcount;
- }
- /*
- * No free extents left.
- */
- else
- agf->agf_longest = 0;
- mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_longest =
- be32_to_cpu(agf->agf_longest);
- xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
- XFS_AGF_LONGEST);
- }
- /*
- * Is this the root level? If so, we're almost done.
- */
- if (level == cur->bc_nlevels - 1) {
- /*
- * If this is the root level,
- * and there's only one entry left,
- * and it's NOT the leaf level,
- * then we can get rid of this level.
- */
- if (numrecs == 1 && level > 0) {
- /*
- * lpp is still set to the first pointer in the block.
- * Make it the new root of the btree.
- */
- bno = be32_to_cpu(agf->agf_roots[cur->bc_btnum]);
- agf->agf_roots[cur->bc_btnum] = *lpp;
- be32_add_cpu(&agf->agf_levels[cur->bc_btnum], -1);
- mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_levels[cur->bc_btnum]--;
- /*
- * Put this buffer/block on the ag's freelist.
- */
- error = xfs_alloc_put_freelist(cur->bc_tp,
- cur->bc_private.a.agbp, NULL, bno, 1);
- if (error)
- return error;
- /*
- * Since blocks move to the free list without the
- * coordination used in xfs_bmap_finish, we can't allow
- * block to be available for reallocation and
- * non-transaction writing (user data) until we know
- * that the transaction that moved it to the free list
- * is permanently on disk. We track the blocks by
- * declaring these blocks as "busy"; the busy list is
- * maintained on a per-ag basis and each transaction
- * records which entries should be removed when the
- * iclog commits to disk. If a busy block is
- * allocated, the iclog is pushed up to the LSN
- * that freed the block.
- */
- xfs_alloc_mark_busy(cur->bc_tp,
- be32_to_cpu(agf->agf_seqno), bno, 1);
+ xfs_trans_agbtree_delta(cur->bc_tp, 1);
+ new->s = cpu_to_be32(bno);
- xfs_trans_agbtree_delta(cur->bc_tp, -1);
- xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
- XFS_AGF_ROOTS | XFS_AGF_LEVELS);
- /*
- * Update the cursor so there's one fewer level.
- */
- xfs_btree_setbuf(cur, level, NULL);
- cur->bc_nlevels--;
- } else if (level > 0 &&
- (error = xfs_alloc_decrement(cur, level, &i)))
- return error;
- *stat = 1;
- return 0;
- }
- /*
- * If we deleted the leftmost entry in the block, update the
- * key values above us in the tree.
- */
- if (ptr == 1 && (error = xfs_alloc_updkey(cur, lkp, level + 1)))
- return error;
- /*
- * If the number of records remaining in the block is at least
- * the minimum, we're done.
- */
- if (numrecs >= XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
- if (level > 0 && (error = xfs_alloc_decrement(cur, level, &i)))
- return error;
- *stat = 1;
- return 0;
- }
- /*
- * Otherwise, we have to move some records around to keep the
- * tree balanced. Look at the left and right sibling blocks to
- * see if we can re-balance by moving only one record.
- */
- rbno = be32_to_cpu(block->bb_rightsib);
- lbno = be32_to_cpu(block->bb_leftsib);
- bno = NULLAGBLOCK;
- ASSERT(rbno != NULLAGBLOCK || lbno != NULLAGBLOCK);
- /*
- * Duplicate the cursor so our btree manipulations here won't
- * disrupt the next level up.
- */
- if ((error = xfs_btree_dup_cursor(cur, &tcur)))
- return error;
- /*
- * If there's a right sibling, see if it's ok to shift an entry
- * out of it.
- */
- if (rbno != NULLAGBLOCK) {
- /*
- * Move the temp cursor to the last entry in the next block.
- * Actually any entry but the first would suffice.
- */
- i = xfs_btree_lastrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if ((error = xfs_alloc_increment(tcur, level, &i)))
- goto error0;
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- i = xfs_btree_lastrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- /*
- * Grab a pointer to the block.
- */
- rbp = tcur->bc_bufs[level];
- right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
- goto error0;
-#endif
- /*
- * Grab the current block number, for future use.
- */
- bno = be32_to_cpu(right->bb_leftsib);
- /*
- * If right block is full enough so that removing one entry
- * won't make it too empty, and left-shifting an entry out
- * of right to us works, we're done.
- */
- if (be16_to_cpu(right->bb_numrecs) - 1 >=
- XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
- if ((error = xfs_alloc_lshift(tcur, level, &i)))
- goto error0;
- if (i) {
- ASSERT(be16_to_cpu(block->bb_numrecs) >=
- XFS_ALLOC_BLOCK_MINRECS(level, cur));
- xfs_btree_del_cursor(tcur,
- XFS_BTREE_NOERROR);
- if (level > 0 &&
- (error = xfs_alloc_decrement(cur, level,
- &i)))
- return error;
- *stat = 1;
- return 0;
- }
- }
- /*
- * Otherwise, grab the number of records in right for
- * future reference, and fix up the temp cursor to point
- * to our block again (last record).
- */
- rrecs = be16_to_cpu(right->bb_numrecs);
- if (lbno != NULLAGBLOCK) {
- i = xfs_btree_firstrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if ((error = xfs_alloc_decrement(tcur, level, &i)))
- goto error0;
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- }
- }
- /*
- * If there's a left sibling, see if it's ok to shift an entry
- * out of it.
- */
- if (lbno != NULLAGBLOCK) {
- /*
- * Move the temp cursor to the first entry in the
- * previous block.
- */
- i = xfs_btree_firstrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if ((error = xfs_alloc_decrement(tcur, level, &i)))
- goto error0;
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- xfs_btree_firstrec(tcur, level);
- /*
- * Grab a pointer to the block.
- */
- lbp = tcur->bc_bufs[level];
- left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
- goto error0;
-#endif
- /*
- * Grab the current block number, for future use.
- */
- bno = be32_to_cpu(left->bb_rightsib);
- /*
- * If left block is full enough so that removing one entry
- * won't make it too empty, and right-shifting an entry out
- * of left to us works, we're done.
- */
- if (be16_to_cpu(left->bb_numrecs) - 1 >=
- XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
- if ((error = xfs_alloc_rshift(tcur, level, &i)))
- goto error0;
- if (i) {
- ASSERT(be16_to_cpu(block->bb_numrecs) >=
- XFS_ALLOC_BLOCK_MINRECS(level, cur));
- xfs_btree_del_cursor(tcur,
- XFS_BTREE_NOERROR);
- if (level == 0)
- cur->bc_ptrs[0]++;
- *stat = 1;
- return 0;
- }
- }
- /*
- * Otherwise, grab the number of records in right for
- * future reference.
- */
- lrecs = be16_to_cpu(left->bb_numrecs);
- }
- /*
- * Delete the temp cursor, we're done with it.
- */
- xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
- /*
- * If here, we need to do a join to keep the tree balanced.
- */
- ASSERT(bno != NULLAGBLOCK);
- /*
- * See if we can join with the left neighbor block.
- */
- if (lbno != NULLAGBLOCK &&
- lrecs + numrecs <= XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
- /*
- * Set "right" to be the starting block,
- * "left" to be the left neighbor.
- */
- rbno = bno;
- right = block;
- rrecs = be16_to_cpu(right->bb_numrecs);
- rbp = bp;
- if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
- cur->bc_private.a.agno, lbno, 0, &lbp,
- XFS_ALLOC_BTREE_REF)))
- return error;
- left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
- lrecs = be16_to_cpu(left->bb_numrecs);
- if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
- return error;
- }
- /*
- * If that won't work, see if we can join with the right neighbor block.
- */
- else if (rbno != NULLAGBLOCK &&
- rrecs + numrecs <= XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
- /*
- * Set "left" to be the starting block,
- * "right" to be the right neighbor.
- */
- lbno = bno;
- left = block;
- lrecs = be16_to_cpu(left->bb_numrecs);
- lbp = bp;
- if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
- cur->bc_private.a.agno, rbno, 0, &rbp,
- XFS_ALLOC_BTREE_REF)))
- return error;
- right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
- rrecs = be16_to_cpu(right->bb_numrecs);
- if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
- return error;
- }
- /*
- * Otherwise, we can't fix the imbalance.
- * Just return. This is probably a logic error, but it's not fatal.
- */
- else {
- if (level > 0 && (error = xfs_alloc_decrement(cur, level, &i)))
- return error;
- *stat = 1;
- return 0;
- }
- /*
- * We're now going to join "left" and "right" by moving all the stuff
- * in "right" to "left" and deleting "right".
- */
- if (level > 0) {
- /*
- * It's a non-leaf. Move keys and pointers.
- */
- lkp = XFS_ALLOC_KEY_ADDR(left, lrecs + 1, cur);
- lpp = XFS_ALLOC_PTR_ADDR(left, lrecs + 1, cur);
- rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
- rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
-#ifdef DEBUG
- for (i = 0; i < rrecs; i++) {
- if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
- return error;
- }
-#endif
- memcpy(lkp, rkp, rrecs * sizeof(*lkp));
- memcpy(lpp, rpp, rrecs * sizeof(*lpp));
- xfs_alloc_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
- xfs_alloc_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
- } else {
- /*
- * It's a leaf. Move records.
- */
- lrp = XFS_ALLOC_REC_ADDR(left, lrecs + 1, cur);
- rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
- memcpy(lrp, rrp, rrecs * sizeof(*lrp));
- xfs_alloc_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
- }
- /*
- * If we joined with the left neighbor, set the buffer in the
- * cursor to the left block, and fix up the index.
- */
- if (bp != lbp) {
- xfs_btree_setbuf(cur, level, lbp);
- cur->bc_ptrs[level] += lrecs;
- }
- /*
- * If we joined with the right neighbor and there's a level above
- * us, increment the cursor at that level.
- */
- else if (level + 1 < cur->bc_nlevels &&
- (error = xfs_alloc_increment(cur, level + 1, &i)))
- return error;
- /*
- * Fix up the number of records in the surviving block.
- */
- lrecs += rrecs;
- left->bb_numrecs = cpu_to_be16(lrecs);
- /*
- * Fix up the right block pointer in the surviving block, and log it.
- */
- left->bb_rightsib = right->bb_rightsib;
- xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
- /*
- * If there is a right sibling now, make it point to the
- * remaining block.
- */
- if (be32_to_cpu(left->bb_rightsib) != NULLAGBLOCK) {
- xfs_alloc_block_t *rrblock;
- xfs_buf_t *rrbp;
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 1;
+ return 0;
+}
- if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
- cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib), 0,
- &rrbp, XFS_ALLOC_BTREE_REF)))
- return error;
- rrblock = XFS_BUF_TO_ALLOC_BLOCK(rrbp);
- if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
- return error;
- rrblock->bb_leftsib = cpu_to_be32(lbno);
- xfs_alloc_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB);
- }
- /*
- * Free the deleting block by putting it on the freelist.
- */
- error = xfs_alloc_put_freelist(cur->bc_tp,
- cur->bc_private.a.agbp, NULL, rbno, 1);
+STATIC int
+xfs_allocbt_free_block(
+ struct xfs_btree_cur *cur,
+ struct xfs_buf *bp)
+{
+ struct xfs_buf *agbp = cur->bc_private.a.agbp;
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
+ xfs_agblock_t bno;
+ int error;
+
+ bno = XFS_DADDR_TO_AGBNO(cur->bc_mp, XFS_BUF_ADDR(bp));
+ error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1);
if (error)
return error;
+
/*
- * Since blocks move to the free list without the coordination
- * used in xfs_bmap_finish, we can't allow block to be available
- * for reallocation and non-transaction writing (user data)
- * until we know that the transaction that moved it to the free
- * list is permanently on disk. We track the blocks by declaring
- * these blocks as "busy"; the busy list is maintained on a
- * per-ag basis and each transaction records which entries
- * should be removed when the iclog commits to disk. If a
- * busy block is allocated, the iclog is pushed up to the
+ * Since blocks move to the free list without the coordination used in
+ * xfs_bmap_finish, we can't allow block to be available for
+ * reallocation and non-transaction writing (user data) until we know
+ * that the transaction that moved it to the free list is permanently
+ * on disk. We track the blocks by declaring these blocks as "busy";
+ * the busy list is maintained on a per-ag basis and each transaction
+ * records which entries should be removed when the iclog commits to
+ * disk. If a busy block is allocated, the iclog is pushed up to the
* LSN that freed the block.
*/
xfs_alloc_mark_busy(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1);
xfs_trans_agbtree_delta(cur->bc_tp, -1);
-
- /*
- * Adjust the current level's cursor so that we're left referring
- * to the right node, after we're done.
- * If this leaves the ptr value 0 our caller will fix it up.
- */
- if (level > 0)
- cur->bc_ptrs[level]--;
- /*
- * Return value means the next level up has something to do.
- */
- *stat = 2;
return 0;
-
-error0:
- xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
- return error;
}
/*
- * Insert one record/level. Return information to the caller
- * allowing the next level up to proceed if necessary.
+ * Update the longest extent in the AGF
*/
-STATIC int /* error */
-xfs_alloc_insrec(
- xfs_btree_cur_t *cur, /* btree cursor */
- int level, /* level to insert record at */
- xfs_agblock_t *bnop, /* i/o: block number inserted */
- xfs_alloc_rec_t *recp, /* i/o: record data inserted */
- xfs_btree_cur_t **curp, /* output: new cursor replacing cur */
- int *stat) /* output: success/failure */
+STATIC void
+xfs_allocbt_update_lastrec(
+ struct xfs_btree_cur *cur,
+ struct xfs_btree_block *block,
+ union xfs_btree_rec *rec,
+ int ptr,
+ int reason)
{
- xfs_agf_t *agf; /* allocation group freelist header */
- xfs_alloc_block_t *block; /* btree block record/key lives in */
- xfs_buf_t *bp; /* buffer for block */
- int error; /* error return value */
- int i; /* loop index */
- xfs_alloc_key_t key; /* key value being inserted */
- xfs_alloc_key_t *kp; /* pointer to btree keys */
- xfs_agblock_t nbno; /* block number of allocated block */
- xfs_btree_cur_t *ncur; /* new cursor to be used at next lvl */
- xfs_alloc_key_t nkey; /* new key value, from split */
- xfs_alloc_rec_t nrec; /* new record value, for caller */
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
+ xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno);
+ __be32 len;
int numrecs;
- int optr; /* old ptr value */
- xfs_alloc_ptr_t *pp; /* pointer to btree addresses */
- int ptr; /* index in btree block for this rec */
- xfs_alloc_rec_t *rp; /* pointer to btree records */
- ASSERT(be32_to_cpu(recp->ar_blockcount) > 0);
+ ASSERT(cur->bc_btnum == XFS_BTNUM_CNT);
+
+ switch (reason) {
+ case LASTREC_UPDATE:
+ /*
+ * If this is the last leaf block and it's the last record,
+ * then update the size of the longest extent in the AG.
+ */
+ if (ptr != xfs_btree_get_numrecs(block))
+ return;
+ len = rec->alloc.ar_blockcount;
+ break;
+ case LASTREC_INSREC:
+ if (be32_to_cpu(rec->alloc.ar_blockcount) <=
+ be32_to_cpu(agf->agf_longest))
+ return;
+ len = rec->alloc.ar_blockcount;
+ break;
+ case LASTREC_DELREC:
+ numrecs = xfs_btree_get_numrecs(block);
+ if (ptr <= numrecs)
+ return;
+ ASSERT(ptr == numrecs + 1);
- /*
- * GCC doesn't understand the (arguably complex) control flow in
- * this function and complains about uninitialized structure fields
- * without this.
- */
- memset(&nrec, 0, sizeof(nrec));
+ if (numrecs) {
+ xfs_alloc_rec_t *rrp;
- /*
- * If we made it to the root level, allocate a new root block
- * and we're done.
- */
- if (level >= cur->bc_nlevels) {
- XFS_STATS_INC(xs_abt_insrec);
- if ((error = xfs_alloc_newroot(cur, &i)))
- return error;
- *bnop = NULLAGBLOCK;
- *stat = i;
- return 0;
- }
- /*
- * Make a key out of the record data to be inserted, and save it.
- */
- key.ar_startblock = recp->ar_startblock;
- key.ar_blockcount = recp->ar_blockcount;
- optr = ptr = cur->bc_ptrs[level];
- /*
- * If we're off the left edge, return failure.
- */
- if (ptr == 0) {
- *stat = 0;
- return 0;
- }
- XFS_STATS_INC(xs_abt_insrec);
- /*
- * Get pointers to the btree buffer and block.
- */
- bp = cur->bc_bufs[level];
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
- numrecs = be16_to_cpu(block->bb_numrecs);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
- return error;
- /*
- * Check that the new entry is being inserted in the right place.
- */
- if (ptr <= numrecs) {
- if (level == 0) {
- rp = XFS_ALLOC_REC_ADDR(block, ptr, cur);
- xfs_btree_check_rec(cur->bc_btnum, recp, rp);
+ rrp = XFS_ALLOC_REC_ADDR(cur->bc_mp, block, numrecs);
+ len = rrp->ar_blockcount;
} else {
- kp = XFS_ALLOC_KEY_ADDR(block, ptr, cur);
- xfs_btree_check_key(cur->bc_btnum, &key, kp);
- }
- }
-#endif
- nbno = NULLAGBLOCK;
- ncur = NULL;
- /*
- * If the block is full, we can't insert the new entry until we
- * make the block un-full.
- */
- if (numrecs == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
- /*
- * First, try shifting an entry to the right neighbor.
- */
- if ((error = xfs_alloc_rshift(cur, level, &i)))
- return error;
- if (i) {
- /* nothing */
- }
- /*
- * Next, try shifting an entry to the left neighbor.
- */
- else {
- if ((error = xfs_alloc_lshift(cur, level, &i)))
- return error;
- if (i)
- optr = ptr = cur->bc_ptrs[level];
- else {
- /*
- * Next, try splitting the current block in
- * half. If this works we have to re-set our
- * variables because we could be in a
- * different block now.
- */
- if ((error = xfs_alloc_split(cur, level, &nbno,
- &nkey, &ncur, &i)))
- return error;
- if (i) {
- bp = cur->bc_bufs[level];
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
-#ifdef DEBUG
- if ((error =
- xfs_btree_check_sblock(cur,
- block, level, bp)))
- return error;
-#endif
- ptr = cur->bc_ptrs[level];
- nrec.ar_startblock = nkey.ar_startblock;
- nrec.ar_blockcount = nkey.ar_blockcount;
- }
- /*
- * Otherwise the insert fails.
- */
- else {
- *stat = 0;
- return 0;
- }
- }
- }
- }
- /*
- * At this point we know there's room for our new entry in the block
- * we're pointing at.
- */
- numrecs = be16_to_cpu(block->bb_numrecs);
- if (level > 0) {
- /*
- * It's a non-leaf entry. Make a hole for the new data
- * in the key and ptr regions of the block.
- */
- kp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
- pp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
-#ifdef DEBUG
- for (i = numrecs; i >= ptr; i--) {
- if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i - 1]), level)))
- return error;
+ len = 0;
}
-#endif
- memmove(&kp[ptr], &kp[ptr - 1],
- (numrecs - ptr + 1) * sizeof(*kp));
- memmove(&pp[ptr], &pp[ptr - 1],
- (numrecs - ptr + 1) * sizeof(*pp));
-#ifdef DEBUG
- if ((error = xfs_btree_check_sptr(cur, *bnop, level)))
- return error;
-#endif
- /*
- * Now stuff the new data in, bump numrecs and log the new data.
- */
- kp[ptr - 1] = key;
- pp[ptr - 1] = cpu_to_be32(*bnop);
- numrecs++;
- block->bb_numrecs = cpu_to_be16(numrecs);
- xfs_alloc_log_keys(cur, bp, ptr, numrecs);
- xfs_alloc_log_ptrs(cur, bp, ptr, numrecs);
-#ifdef DEBUG
- if (ptr < numrecs)
- xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1,
- kp + ptr);
-#endif
- } else {
- /*
- * It's a leaf entry. Make a hole for the new record.
- */
- rp = XFS_ALLOC_REC_ADDR(block, 1, cur);
- memmove(&rp[ptr], &rp[ptr - 1],
- (numrecs - ptr + 1) * sizeof(*rp));
- /*
- * Now stuff the new record in, bump numrecs
- * and log the new data.
- */
- rp[ptr - 1] = *recp;
- numrecs++;
- block->bb_numrecs = cpu_to_be16(numrecs);
- xfs_alloc_log_recs(cur, bp, ptr, numrecs);
-#ifdef DEBUG
- if (ptr < numrecs)
- xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1,
- rp + ptr);
-#endif
- }
- /*
- * Log the new number of records in the btree header.
- */
- xfs_alloc_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
- /*
- * If we inserted at the start of a block, update the parents' keys.
- */
- if (optr == 1 && (error = xfs_alloc_updkey(cur, &key, level + 1)))
- return error;
- /*
- * Look to see if the longest extent in the allocation group
- * needs to be updated.
- */
- agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
- if (level == 0 &&
- cur->bc_btnum == XFS_BTNUM_CNT &&
- be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK &&
- be32_to_cpu(recp->ar_blockcount) > be32_to_cpu(agf->agf_longest)) {
- /*
- * If this is a leaf in the by-size btree and there
- * is no right sibling block and this block is bigger
- * than the previous longest block, update it.
- */
- agf->agf_longest = recp->ar_blockcount;
- cur->bc_mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_longest
- = be32_to_cpu(recp->ar_blockcount);
- xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
- XFS_AGF_LONGEST);
+ break;
+ default:
+ ASSERT(0);
+ return;
}
- /*
- * Return the new block number, if any.
- * If there is one, give back a record value and a cursor too.
- */
- *bnop = nbno;
- if (nbno != NULLAGBLOCK) {
- *recp = nrec;
- *curp = ncur;
- }
- *stat = 1;
- return 0;
+
+ agf->agf_longest = len;
+ cur->bc_mp->m_perag[seqno].pagf_longest = be32_to_cpu(len);
+ xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, XFS_AGF_LONGEST);
}
-/*
- * Log header fields from a btree block.
- */
-STATIC void
-xfs_alloc_log_block(
- xfs_trans_t *tp, /* transaction pointer */
- xfs_buf_t *bp, /* buffer containing btree block */
- int fields) /* mask of fields: XFS_BB_... */
+STATIC int
+xfs_allocbt_get_minrecs(
+ struct xfs_btree_cur *cur,
+ int level)
{
- int first; /* first byte offset logged */
- int last; /* last byte offset logged */
- static const short offsets[] = { /* table of offsets */
- offsetof(xfs_alloc_block_t, bb_magic),
- offsetof(xfs_alloc_block_t, bb_level),
- offsetof(xfs_alloc_block_t, bb_numrecs),
- offsetof(xfs_alloc_block_t, bb_leftsib),
- offsetof(xfs_alloc_block_t, bb_rightsib),
- sizeof(xfs_alloc_block_t)
- };
+ return cur->bc_mp->m_alloc_mnr[level != 0];
+}
- xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first, &last);
- xfs_trans_log_buf(tp, bp, first, last);
+STATIC int
+xfs_allocbt_get_maxrecs(
+ struct xfs_btree_cur *cur,
+ int level)
+{
+ return cur->bc_mp->m_alloc_mxr[level != 0];
}
-/*
- * Log keys from a btree block (nonleaf).
- */
STATIC void
-xfs_alloc_log_keys(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_buf_t *bp, /* buffer containing btree block */
- int kfirst, /* index of first key to log */
- int klast) /* index of last key to log */
+xfs_allocbt_init_key_from_rec(
+ union xfs_btree_key *key,
+ union xfs_btree_rec *rec)
{
- xfs_alloc_block_t *block; /* btree block to log from */
- int first; /* first byte offset logged */
- xfs_alloc_key_t *kp; /* key pointer in btree block */
- int last; /* last byte offset logged */
+ ASSERT(rec->alloc.ar_startblock != 0);
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
- kp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
- first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block);
- last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block);
- xfs_trans_log_buf(cur->bc_tp, bp, first, last);
+ key->alloc.ar_startblock = rec->alloc.ar_startblock;
+ key->alloc.ar_blockcount = rec->alloc.ar_blockcount;
}
-/*
- * Log block pointer fields from a btree block (nonleaf).
- */
STATIC void
-xfs_alloc_log_ptrs(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_buf_t *bp, /* buffer containing btree block */
- int pfirst, /* index of first pointer to log */
- int plast) /* index of last pointer to log */
+xfs_allocbt_init_rec_from_key(
+ union xfs_btree_key *key,
+ union xfs_btree_rec *rec)
{
- xfs_alloc_block_t *block; /* btree block to log from */
- int first; /* first byte offset logged */
- int last; /* last byte offset logged */
- xfs_alloc_ptr_t *pp; /* block-pointer pointer in btree blk */
+ ASSERT(key->alloc.ar_startblock != 0);
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
- pp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
- first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block);
- last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block);
- xfs_trans_log_buf(cur->bc_tp, bp, first, last);
+ rec->alloc.ar_startblock = key->alloc.ar_startblock;
+ rec->alloc.ar_blockcount = key->alloc.ar_blockcount;
}
-/*
- * Log records from a btree block (leaf).
- */
STATIC void
-xfs_alloc_log_recs(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_buf_t *bp, /* buffer containing btree block */
- int rfirst, /* index of first record to log */
- int rlast) /* index of last record to log */
+xfs_allocbt_init_rec_from_cur(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *rec)
{
- xfs_alloc_block_t *block; /* btree block to log from */
- int first; /* first byte offset logged */
- int last; /* last byte offset logged */
- xfs_alloc_rec_t *rp; /* record pointer for btree block */
-
+ ASSERT(cur->bc_rec.a.ar_startblock != 0);
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
- rp = XFS_ALLOC_REC_ADDR(block, 1, cur);
-#ifdef DEBUG
- {
- xfs_agf_t *agf;
- xfs_alloc_rec_t *p;
-
- agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
- for (p = &rp[rfirst - 1]; p <= &rp[rlast - 1]; p++)
- ASSERT(be32_to_cpu(p->ar_startblock) +
- be32_to_cpu(p->ar_blockcount) <=
- be32_to_cpu(agf->agf_length));
- }
-#endif
- first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block);
- last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block);
- xfs_trans_log_buf(cur->bc_tp, bp, first, last);
+ rec->alloc.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock);
+ rec->alloc.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount);
}
-/*
- * Lookup the record. The cursor is made to point to it, based on dir.
- * Return 0 if can't find any such record, 1 for success.
- */
-STATIC int /* error */
-xfs_alloc_lookup(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_lookup_t dir, /* <=, ==, or >= */
- int *stat) /* success/failure */
+STATIC void
+xfs_allocbt_init_ptr_from_cur(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr)
{
- xfs_agblock_t agbno; /* a.g. relative btree block number */
- xfs_agnumber_t agno; /* allocation group number */
- xfs_alloc_block_t *block=NULL; /* current btree block */
- int diff; /* difference for the current key */
- int error; /* error return value */
- int keyno=0; /* current key number */
- int level; /* level in the btree */
- xfs_mount_t *mp; /* file system mount point */
-
- XFS_STATS_INC(xs_abt_lookup);
- /*
- * Get the allocation group header, and the root block number.
- */
- mp = cur->bc_mp;
-
- {
- xfs_agf_t *agf; /* a.g. freespace header */
-
- agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
- agno = be32_to_cpu(agf->agf_seqno);
- agbno = be32_to_cpu(agf->agf_roots[cur->bc_btnum]);
- }
- /*
- * Iterate over each level in the btree, starting at the root.
- * For each level above the leaves, find the key we need, based
- * on the lookup record, then follow the corresponding block
- * pointer down to the next level.
- */
- for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
- xfs_buf_t *bp; /* buffer pointer for btree block */
- xfs_daddr_t d; /* disk address of btree block */
-
- /*
- * Get the disk address we're looking for.
- */
- d = XFS_AGB_TO_DADDR(mp, agno, agbno);
- /*
- * If the old buffer at this level is for a different block,
- * throw it away, otherwise just use it.
- */
- bp = cur->bc_bufs[level];
- if (bp && XFS_BUF_ADDR(bp) != d)
- bp = NULL;
- if (!bp) {
- /*
- * Need to get a new buffer. Read it, then
- * set it in the cursor, releasing the old one.
- */
- if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, agno,
- agbno, 0, &bp, XFS_ALLOC_BTREE_REF)))
- return error;
- xfs_btree_setbuf(cur, level, bp);
- /*
- * Point to the btree block, now that we have the buffer
- */
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
- if ((error = xfs_btree_check_sblock(cur, block, level,
- bp)))
- return error;
- } else
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
- /*
- * If we already had a key match at a higher level, we know
- * we need to use the first entry in this block.
- */
- if (diff == 0)
- keyno = 1;
- /*
- * Otherwise we need to search this block. Do a binary search.
- */
- else {
- int high; /* high entry number */
- xfs_alloc_key_t *kkbase=NULL;/* base of keys in block */
- xfs_alloc_rec_t *krbase=NULL;/* base of records in block */
- int low; /* low entry number */
-
- /*
- * Get a pointer to keys or records.
- */
- if (level > 0)
- kkbase = XFS_ALLOC_KEY_ADDR(block, 1, cur);
- else
- krbase = XFS_ALLOC_REC_ADDR(block, 1, cur);
- /*
- * Set low and high entry numbers, 1-based.
- */
- low = 1;
- if (!(high = be16_to_cpu(block->bb_numrecs))) {
- /*
- * If the block is empty, the tree must
- * be an empty leaf.
- */
- ASSERT(level == 0 && cur->bc_nlevels == 1);
- cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
- *stat = 0;
- return 0;
- }
- /*
- * Binary search the block.
- */
- while (low <= high) {
- xfs_extlen_t blockcount; /* key value */
- xfs_agblock_t startblock; /* key value */
-
- XFS_STATS_INC(xs_abt_compare);
- /*
- * keyno is average of low and high.
- */
- keyno = (low + high) >> 1;
- /*
- * Get startblock & blockcount.
- */
- if (level > 0) {
- xfs_alloc_key_t *kkp;
-
- kkp = kkbase + keyno - 1;
- startblock = be32_to_cpu(kkp->ar_startblock);
- blockcount = be32_to_cpu(kkp->ar_blockcount);
- } else {
- xfs_alloc_rec_t *krp;
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
- krp = krbase + keyno - 1;
- startblock = be32_to_cpu(krp->ar_startblock);
- blockcount = be32_to_cpu(krp->ar_blockcount);
- }
- /*
- * Compute difference to get next direction.
- */
- if (cur->bc_btnum == XFS_BTNUM_BNO)
- diff = (int)startblock -
- (int)cur->bc_rec.a.ar_startblock;
- else if (!(diff = (int)blockcount -
- (int)cur->bc_rec.a.ar_blockcount))
- diff = (int)startblock -
- (int)cur->bc_rec.a.ar_startblock;
- /*
- * Less than, move right.
- */
- if (diff < 0)
- low = keyno + 1;
- /*
- * Greater than, move left.
- */
- else if (diff > 0)
- high = keyno - 1;
- /*
- * Equal, we're done.
- */
- else
- break;
- }
- }
- /*
- * If there are more levels, set up for the next level
- * by getting the block number and filling in the cursor.
- */
- if (level > 0) {
- /*
- * If we moved left, need the previous key number,
- * unless there isn't one.
- */
- if (diff > 0 && --keyno < 1)
- keyno = 1;
- agbno = be32_to_cpu(*XFS_ALLOC_PTR_ADDR(block, keyno, cur));
-#ifdef DEBUG
- if ((error = xfs_btree_check_sptr(cur, agbno, level)))
- return error;
-#endif
- cur->bc_ptrs[level] = keyno;
- }
- }
- /*
- * Done with the search.
- * See if we need to adjust the results.
- */
- if (dir != XFS_LOOKUP_LE && diff < 0) {
- keyno++;
- /*
- * If ge search and we went off the end of the block, but it's
- * not the last block, we're in the wrong block.
- */
- if (dir == XFS_LOOKUP_GE &&
- keyno > be16_to_cpu(block->bb_numrecs) &&
- be32_to_cpu(block->bb_rightsib) != NULLAGBLOCK) {
- int i;
+ ASSERT(cur->bc_private.a.agno == be32_to_cpu(agf->agf_seqno));
+ ASSERT(agf->agf_roots[cur->bc_btnum] != 0);
- cur->bc_ptrs[0] = keyno;
- if ((error = xfs_alloc_increment(cur, 0, &i)))
- return error;
- XFS_WANT_CORRUPTED_RETURN(i == 1);
- *stat = 1;
- return 0;
- }
- }
- else if (dir == XFS_LOOKUP_LE && diff > 0)
- keyno--;
- cur->bc_ptrs[0] = keyno;
- /*
- * Return if we succeeded or not.
- */
- if (keyno == 0 || keyno > be16_to_cpu(block->bb_numrecs))
- *stat = 0;
- else
- *stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0));
- return 0;
+ ptr->s = agf->agf_roots[cur->bc_btnum];
}
-/*
- * Move 1 record left from cur/level if possible.
- * Update cur to reflect the new path.
- */
-STATIC int /* error */
-xfs_alloc_lshift(
- xfs_btree_cur_t *cur, /* btree cursor */
- int level, /* level to shift record on */
- int *stat) /* success/failure */
+STATIC __int64_t
+xfs_allocbt_key_diff(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *key)
{
- int error; /* error return value */
-#ifdef DEBUG
- int i; /* loop index */
-#endif
- xfs_alloc_key_t key; /* key value for leaf level upward */
- xfs_buf_t *lbp; /* buffer for left neighbor block */
- xfs_alloc_block_t *left; /* left neighbor btree block */
- int nrec; /* new number of left block entries */
- xfs_buf_t *rbp; /* buffer for right (current) block */
- xfs_alloc_block_t *right; /* right (current) btree block */
- xfs_alloc_key_t *rkp=NULL; /* key pointer for right block */
- xfs_alloc_ptr_t *rpp=NULL; /* address pointer for right block */
- xfs_alloc_rec_t *rrp=NULL; /* record pointer for right block */
+ xfs_alloc_rec_incore_t *rec = &cur->bc_rec.a;
+ xfs_alloc_key_t *kp = &key->alloc;
+ __int64_t diff;
- /*
- * Set up variables for this block as "right".
- */
- rbp = cur->bc_bufs[level];
- right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
- return error;
-#endif
- /*
- * If we've got no left sibling then we can't shift an entry left.
- */
- if (be32_to_cpu(right->bb_leftsib) == NULLAGBLOCK) {
- *stat = 0;
- return 0;
- }
- /*
- * If the cursor entry is the one that would be moved, don't
- * do it... it's too complicated.
- */
- if (cur->bc_ptrs[level] <= 1) {
- *stat = 0;
- return 0;
- }
- /*
- * Set up the left neighbor as "left".
- */
- if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
- cur->bc_private.a.agno, be32_to_cpu(right->bb_leftsib),
- 0, &lbp, XFS_ALLOC_BTREE_REF)))
- return error;
- left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
- if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
- return error;
- /*
- * If it's full, it can't take another entry.
- */
- if (be16_to_cpu(left->bb_numrecs) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
- *stat = 0;
- return 0;
+ if (cur->bc_btnum == XFS_BTNUM_BNO) {
+ return (__int64_t)be32_to_cpu(kp->ar_startblock) -
+ rec->ar_startblock;
}
- nrec = be16_to_cpu(left->bb_numrecs) + 1;
- /*
- * If non-leaf, copy a key and a ptr to the left block.
- */
- if (level > 0) {
- xfs_alloc_key_t *lkp; /* key pointer for left block */
- xfs_alloc_ptr_t *lpp; /* address pointer for left block */
- lkp = XFS_ALLOC_KEY_ADDR(left, nrec, cur);
- rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
- *lkp = *rkp;
- xfs_alloc_log_keys(cur, lbp, nrec, nrec);
- lpp = XFS_ALLOC_PTR_ADDR(left, nrec, cur);
- rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*rpp), level)))
- return error;
-#endif
- *lpp = *rpp;
- xfs_alloc_log_ptrs(cur, lbp, nrec, nrec);
- xfs_btree_check_key(cur->bc_btnum, lkp - 1, lkp);
- }
- /*
- * If leaf, copy a record to the left block.
- */
- else {
- xfs_alloc_rec_t *lrp; /* record pointer for left block */
+ diff = (__int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount;
+ if (diff)
+ return diff;
- lrp = XFS_ALLOC_REC_ADDR(left, nrec, cur);
- rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
- *lrp = *rrp;
- xfs_alloc_log_recs(cur, lbp, nrec, nrec);
- xfs_btree_check_rec(cur->bc_btnum, lrp - 1, lrp);
- }
- /*
- * Bump and log left's numrecs, decrement and log right's numrecs.
- */
- be16_add_cpu(&left->bb_numrecs, 1);
- xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
- be16_add_cpu(&right->bb_numrecs, -1);
- xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
- /*
- * Slide the contents of right down one entry.
- */
- if (level > 0) {
-#ifdef DEBUG
- for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
- if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i + 1]),
- level)))
- return error;
- }
-#endif
- memmove(rkp, rkp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
- memmove(rpp, rpp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
- xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
- xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
- } else {
- memmove(rrp, rrp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
- xfs_alloc_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
- key.ar_startblock = rrp->ar_startblock;
- key.ar_blockcount = rrp->ar_blockcount;
- rkp = &key;
- }
- /*
- * Update the parent key values of right.
- */
- if ((error = xfs_alloc_updkey(cur, rkp, level + 1)))
- return error;
- /*
- * Slide the cursor value left one.
- */
- cur->bc_ptrs[level]--;
- *stat = 1;
- return 0;
+ return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
}
-/*
- * Allocate a new root block, fill it in.
- */
-STATIC int /* error */
-xfs_alloc_newroot(
- xfs_btree_cur_t *cur, /* btree cursor */
- int *stat) /* success/failure */
+STATIC int
+xfs_allocbt_kill_root(
+ struct xfs_btree_cur *cur,
+ struct xfs_buf *bp,
+ int level,
+ union xfs_btree_ptr *newroot)
{
- int error; /* error return value */
- xfs_agblock_t lbno; /* left block number */
- xfs_buf_t *lbp; /* left btree buffer */
- xfs_alloc_block_t *left; /* left btree block */
- xfs_mount_t *mp; /* mount structure */
- xfs_agblock_t nbno; /* new block number */
- xfs_buf_t *nbp; /* new (root) buffer */
- xfs_alloc_block_t *new; /* new (root) btree block */
- int nptr; /* new value for key index, 1 or 2 */
- xfs_agblock_t rbno; /* right block number */
- xfs_buf_t *rbp; /* right btree buffer */
- xfs_alloc_block_t *right; /* right btree block */
-
- mp = cur->bc_mp;
+ int error;
- ASSERT(cur->bc_nlevels < XFS_AG_MAXLEVELS(mp));
- /*
- * Get a buffer from the freelist blocks, for the new root.
- */
- error = xfs_alloc_get_freelist(cur->bc_tp,
- cur->bc_private.a.agbp, &nbno, 1);
- if (error)
- return error;
- /*
- * None available, we fail.
- */
- if (nbno == NULLAGBLOCK) {
- *stat = 0;
- return 0;
- }
- xfs_trans_agbtree_delta(cur->bc_tp, 1);
- nbp = xfs_btree_get_bufs(mp, cur->bc_tp, cur->bc_private.a.agno, nbno,
- 0);
- new = XFS_BUF_TO_ALLOC_BLOCK(nbp);
- /*
- * Set the root data in the a.g. freespace structure.
- */
- {
- xfs_agf_t *agf; /* a.g. freespace header */
- xfs_agnumber_t seqno;
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+ XFS_BTREE_STATS_INC(cur, killroot);
- agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
- agf->agf_roots[cur->bc_btnum] = cpu_to_be32(nbno);
- be32_add_cpu(&agf->agf_levels[cur->bc_btnum], 1);
- seqno = be32_to_cpu(agf->agf_seqno);
- mp->m_perag[seqno].pagf_levels[cur->bc_btnum]++;
- xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
- XFS_AGF_ROOTS | XFS_AGF_LEVELS);
- }
/*
- * At the previous root level there are now two blocks: the old
- * root, and the new block generated when it was split.
- * We don't know which one the cursor is pointing at, so we
- * set up variables "left" and "right" for each case.
+ * Update the root pointer, decreasing the level by 1 and then
+ * free the old root.
*/
- lbp = cur->bc_bufs[cur->bc_nlevels - 1];
- left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, left, cur->bc_nlevels - 1, lbp)))
+ xfs_allocbt_set_root(cur, newroot, -1);
+ error = xfs_allocbt_free_block(cur, bp);
+ if (error) {
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
-#endif
- if (be32_to_cpu(left->bb_rightsib) != NULLAGBLOCK) {
- /*
- * Our block is left, pick up the right block.
- */
- lbno = XFS_DADDR_TO_AGBNO(mp, XFS_BUF_ADDR(lbp));
- rbno = be32_to_cpu(left->bb_rightsib);
- if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
- cur->bc_private.a.agno, rbno, 0, &rbp,
- XFS_ALLOC_BTREE_REF)))
- return error;
- right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
- if ((error = xfs_btree_check_sblock(cur, right,
- cur->bc_nlevels - 1, rbp)))
- return error;
- nptr = 1;
- } else {
- /*
- * Our block is right, pick up the left block.
- */
- rbp = lbp;
- right = left;
- rbno = XFS_DADDR_TO_AGBNO(mp, XFS_BUF_ADDR(rbp));
- lbno = be32_to_cpu(right->bb_leftsib);
- if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
- cur->bc_private.a.agno, lbno, 0, &lbp,
- XFS_ALLOC_BTREE_REF)))
- return error;
- left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
- if ((error = xfs_btree_check_sblock(cur, left,
- cur->bc_nlevels - 1, lbp)))
- return error;
- nptr = 2;
}
- /*
- * Fill in the new block's btree header and log it.
- */
- new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
- new->bb_level = cpu_to_be16(cur->bc_nlevels);
- new->bb_numrecs = cpu_to_be16(2);
- new->bb_leftsib = cpu_to_be32(NULLAGBLOCK);
- new->bb_rightsib = cpu_to_be32(NULLAGBLOCK);
- xfs_alloc_log_block(cur->bc_tp, nbp, XFS_BB_ALL_BITS);
- ASSERT(lbno != NULLAGBLOCK && rbno != NULLAGBLOCK);
- /*
- * Fill in the key data in the new root.
- */
- {
- xfs_alloc_key_t *kp; /* btree key pointer */
- kp = XFS_ALLOC_KEY_ADDR(new, 1, cur);
- if (be16_to_cpu(left->bb_level) > 0) {
- kp[0] = *XFS_ALLOC_KEY_ADDR(left, 1, cur);
- kp[1] = *XFS_ALLOC_KEY_ADDR(right, 1, cur);
- } else {
- xfs_alloc_rec_t *rp; /* btree record pointer */
+ XFS_BTREE_STATS_INC(cur, free);
- rp = XFS_ALLOC_REC_ADDR(left, 1, cur);
- kp[0].ar_startblock = rp->ar_startblock;
- kp[0].ar_blockcount = rp->ar_blockcount;
- rp = XFS_ALLOC_REC_ADDR(right, 1, cur);
- kp[1].ar_startblock = rp->ar_startblock;
- kp[1].ar_blockcount = rp->ar_blockcount;
- }
- }
- xfs_alloc_log_keys(cur, nbp, 1, 2);
- /*
- * Fill in the pointer data in the new root.
- */
- {
- xfs_alloc_ptr_t *pp; /* btree address pointer */
+ xfs_btree_setbuf(cur, level, NULL);
+ cur->bc_nlevels--;
- pp = XFS_ALLOC_PTR_ADDR(new, 1, cur);
- pp[0] = cpu_to_be32(lbno);
- pp[1] = cpu_to_be32(rbno);
- }
- xfs_alloc_log_ptrs(cur, nbp, 1, 2);
- /*
- * Fix up the cursor.
- */
- xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
- cur->bc_ptrs[cur->bc_nlevels] = nptr;
- cur->bc_nlevels++;
- *stat = 1;
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
return 0;
}
-/*
- * Move 1 record right from cur/level if possible.
- * Update cur to reflect the new path.
- */
-STATIC int /* error */
-xfs_alloc_rshift(
- xfs_btree_cur_t *cur, /* btree cursor */
- int level, /* level to shift record on */
- int *stat) /* success/failure */
-{
- int error; /* error return value */
- int i; /* loop index */
- xfs_alloc_key_t key; /* key value for leaf level upward */
- xfs_buf_t *lbp; /* buffer for left (current) block */
- xfs_alloc_block_t *left; /* left (current) btree block */
- xfs_buf_t *rbp; /* buffer for right neighbor block */
- xfs_alloc_block_t *right; /* right neighbor btree block */
- xfs_alloc_key_t *rkp; /* key pointer for right block */
- xfs_btree_cur_t *tcur; /* temporary cursor */
-
- /*
- * Set up variables for this block as "left".
- */
- lbp = cur->bc_bufs[level];
- left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
- return error;
-#endif
- /*
- * If we've got no right sibling then we can't shift an entry right.
- */
- if (be32_to_cpu(left->bb_rightsib) == NULLAGBLOCK) {
- *stat = 0;
- return 0;
- }
- /*
- * If the cursor entry is the one that would be moved, don't
- * do it... it's too complicated.
- */
- if (cur->bc_ptrs[level] >= be16_to_cpu(left->bb_numrecs)) {
- *stat = 0;
- return 0;
- }
- /*
- * Set up the right neighbor as "right".
- */
- if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
- cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib),
- 0, &rbp, XFS_ALLOC_BTREE_REF)))
- return error;
- right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
- if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
- return error;
- /*
- * If it's full, it can't take another entry.
- */
- if (be16_to_cpu(right->bb_numrecs) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
- *stat = 0;
- return 0;
- }
- /*
- * Make a hole at the start of the right neighbor block, then
- * copy the last left block entry to the hole.
- */
- if (level > 0) {
- xfs_alloc_key_t *lkp; /* key pointer for left block */
- xfs_alloc_ptr_t *lpp; /* address pointer for left block */
- xfs_alloc_ptr_t *rpp; /* address pointer for right block */
-
- lkp = XFS_ALLOC_KEY_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
- lpp = XFS_ALLOC_PTR_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
- rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
- rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
#ifdef DEBUG
- for (i = be16_to_cpu(right->bb_numrecs) - 1; i >= 0; i--) {
- if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
- return error;
- }
-#endif
- memmove(rkp + 1, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
- memmove(rpp + 1, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
-#ifdef DEBUG
- if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*lpp), level)))
- return error;
-#endif
- *rkp = *lkp;
- *rpp = *lpp;
- xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
- xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
- xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1);
+STATIC int
+xfs_allocbt_keys_inorder(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *k1,
+ union xfs_btree_key *k2)
+{
+ if (cur->bc_btnum == XFS_BTNUM_BNO) {
+ return be32_to_cpu(k1->alloc.ar_startblock) <
+ be32_to_cpu(k2->alloc.ar_startblock);
} else {
- xfs_alloc_rec_t *lrp; /* record pointer for left block */
- xfs_alloc_rec_t *rrp; /* record pointer for right block */
-
- lrp = XFS_ALLOC_REC_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
- rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
- memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
- *rrp = *lrp;
- xfs_alloc_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
- key.ar_startblock = rrp->ar_startblock;
- key.ar_blockcount = rrp->ar_blockcount;
- rkp = &key;
- xfs_btree_check_rec(cur->bc_btnum, rrp, rrp + 1);
+ return be32_to_cpu(k1->alloc.ar_blockcount) <
+ be32_to_cpu(k2->alloc.ar_blockcount) ||
+ (k1->alloc.ar_blockcount == k2->alloc.ar_blockcount &&
+ be32_to_cpu(k1->alloc.ar_startblock) <
+ be32_to_cpu(k2->alloc.ar_startblock));
}
- /*
- * Decrement and log left's numrecs, bump and log right's numrecs.
- */
- be16_add_cpu(&left->bb_numrecs, -1);
- xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
- be16_add_cpu(&right->bb_numrecs, 1);
- xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
- /*
- * Using a temporary cursor, update the parent key values of the
- * block on the right.
- */
- if ((error = xfs_btree_dup_cursor(cur, &tcur)))
- return error;
- i = xfs_btree_lastrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if ((error = xfs_alloc_increment(tcur, level, &i)) ||
- (error = xfs_alloc_updkey(tcur, rkp, level + 1)))
- goto error0;
- xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
- *stat = 1;
- return 0;
-error0:
- xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
- return error;
}
-/*
- * Split cur/level block in half.
- * Return new block number and its first record (to be inserted into parent).
- */
-STATIC int /* error */
-xfs_alloc_split(
- xfs_btree_cur_t *cur, /* btree cursor */
- int level, /* level to split */
- xfs_agblock_t *bnop, /* output: block number allocated */
- xfs_alloc_key_t *keyp, /* output: first key of new block */
- xfs_btree_cur_t **curp, /* output: new cursor */
- int *stat) /* success/failure */
+STATIC int
+xfs_allocbt_recs_inorder(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *r1,
+ union xfs_btree_rec *r2)
{
- int error; /* error return value */
- int i; /* loop index/record number */
- xfs_agblock_t lbno; /* left (current) block number */
- xfs_buf_t *lbp; /* buffer for left block */
- xfs_alloc_block_t *left; /* left (current) btree block */
- xfs_agblock_t rbno; /* right (new) block number */
- xfs_buf_t *rbp; /* buffer for right block */
- xfs_alloc_block_t *right; /* right (new) btree block */
-
- /*
- * Allocate the new block from the freelist.
- * If we can't do it, we're toast. Give up.
- */
- error = xfs_alloc_get_freelist(cur->bc_tp,
- cur->bc_private.a.agbp, &rbno, 1);
- if (error)
- return error;
- if (rbno == NULLAGBLOCK) {
- *stat = 0;
- return 0;
- }
- xfs_trans_agbtree_delta(cur->bc_tp, 1);
- rbp = xfs_btree_get_bufs(cur->bc_mp, cur->bc_tp, cur->bc_private.a.agno,
- rbno, 0);
- /*
- * Set up the new block as "right".
- */
- right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
- /*
- * "Left" is the current (according to the cursor) block.
- */
- lbp = cur->bc_bufs[level];
- left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
- return error;
-#endif
- /*
- * Fill in the btree header for the new block.
- */
- right->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
- right->bb_level = left->bb_level;
- right->bb_numrecs = cpu_to_be16(be16_to_cpu(left->bb_numrecs) / 2);
- /*
- * Make sure that if there's an odd number of entries now, that
- * each new block will have the same number of entries.
- */
- if ((be16_to_cpu(left->bb_numrecs) & 1) &&
- cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1)
- be16_add_cpu(&right->bb_numrecs, 1);
- i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1;
- /*
- * For non-leaf blocks, copy keys and addresses over to the new block.
- */
- if (level > 0) {
- xfs_alloc_key_t *lkp; /* left btree key pointer */
- xfs_alloc_ptr_t *lpp; /* left btree address pointer */
- xfs_alloc_key_t *rkp; /* right btree key pointer */
- xfs_alloc_ptr_t *rpp; /* right btree address pointer */
-
- lkp = XFS_ALLOC_KEY_ADDR(left, i, cur);
- lpp = XFS_ALLOC_PTR_ADDR(left, i, cur);
- rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
- rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
-#ifdef DEBUG
- for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
- if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level)))
- return error;
- }
-#endif
- memcpy(rkp, lkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
- memcpy(rpp, lpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
- xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
- xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
- *keyp = *rkp;
+ if (cur->bc_btnum == XFS_BTNUM_BNO) {
+ return be32_to_cpu(r1->alloc.ar_startblock) +
+ be32_to_cpu(r1->alloc.ar_blockcount) <=
+ be32_to_cpu(r2->alloc.ar_startblock);
+ } else {
+ return be32_to_cpu(r1->alloc.ar_blockcount) <
+ be32_to_cpu(r2->alloc.ar_blockcount) ||
+ (r1->alloc.ar_blockcount == r2->alloc.ar_blockcount &&
+ be32_to_cpu(r1->alloc.ar_startblock) <
+ be32_to_cpu(r2->alloc.ar_startblock));
}
- /*
- * For leaf blocks, copy records over to the new block.
- */
- else {
- xfs_alloc_rec_t *lrp; /* left btree record pointer */
- xfs_alloc_rec_t *rrp; /* right btree record pointer */
+}
+#endif /* DEBUG */
- lrp = XFS_ALLOC_REC_ADDR(left, i, cur);
- rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
- memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
- xfs_alloc_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
- keyp->ar_startblock = rrp->ar_startblock;
- keyp->ar_blockcount = rrp->ar_blockcount;
- }
- /*
- * Find the left block number by looking in the buffer.
- * Adjust numrecs, sibling pointers.
- */
- lbno = XFS_DADDR_TO_AGBNO(cur->bc_mp, XFS_BUF_ADDR(lbp));
- be16_add_cpu(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs)));
- right->bb_rightsib = left->bb_rightsib;
- left->bb_rightsib = cpu_to_be32(rbno);
- right->bb_leftsib = cpu_to_be32(lbno);
- xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_ALL_BITS);
- xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
- /*
- * If there's a block to the new block's right, make that block
- * point back to right instead of to left.
- */
- if (be32_to_cpu(right->bb_rightsib) != NULLAGBLOCK) {
- xfs_alloc_block_t *rrblock; /* rr btree block */
- xfs_buf_t *rrbp; /* buffer for rrblock */
+#ifdef XFS_BTREE_TRACE
+ktrace_t *xfs_allocbt_trace_buf;
- if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
- cur->bc_private.a.agno, be32_to_cpu(right->bb_rightsib), 0,
- &rrbp, XFS_ALLOC_BTREE_REF)))
- return error;
- rrblock = XFS_BUF_TO_ALLOC_BLOCK(rrbp);
- if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
- return error;
- rrblock->bb_leftsib = cpu_to_be32(rbno);
- xfs_alloc_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB);
- }
- /*
- * If the cursor is really in the right block, move it there.
- * If it's just pointing past the last entry in left, then we'll
- * insert there, so don't change anything in that case.
- */
- if (cur->bc_ptrs[level] > be16_to_cpu(left->bb_numrecs) + 1) {
- xfs_btree_setbuf(cur, level, rbp);
- cur->bc_ptrs[level] -= be16_to_cpu(left->bb_numrecs);
- }
- /*
- * If there are more levels, we'll need another cursor which refers to
- * the right block, no matter where this cursor was.
- */
- if (level + 1 < cur->bc_nlevels) {
- if ((error = xfs_btree_dup_cursor(cur, curp)))
- return error;
- (*curp)->bc_ptrs[level + 1]++;
- }
- *bnop = rbno;
- *stat = 1;
- return 0;
+STATIC void
+xfs_allocbt_trace_enter(
+ struct xfs_btree_cur *cur,
+ const char *func,
+ char *s,
+ int type,
+ int line,
+ __psunsigned_t a0,
+ __psunsigned_t a1,
+ __psunsigned_t a2,
+ __psunsigned_t a3,
+ __psunsigned_t a4,
+ __psunsigned_t a5,
+ __psunsigned_t a6,
+ __psunsigned_t a7,
+ __psunsigned_t a8,
+ __psunsigned_t a9,
+ __psunsigned_t a10)
+{
+ ktrace_enter(xfs_allocbt_trace_buf, (void *)(__psint_t)type,
+ (void *)func, (void *)s, NULL, (void *)cur,
+ (void *)a0, (void *)a1, (void *)a2, (void *)a3,
+ (void *)a4, (void *)a5, (void *)a6, (void *)a7,
+ (void *)a8, (void *)a9, (void *)a10);
}
-/*
- * Update keys at all levels from here to the root along the cursor's path.
- */
-STATIC int /* error */
-xfs_alloc_updkey(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_alloc_key_t *keyp, /* new key value to update to */
- int level) /* starting level for update */
+STATIC void
+xfs_allocbt_trace_cursor(
+ struct xfs_btree_cur *cur,
+ __uint32_t *s0,
+ __uint64_t *l0,
+ __uint64_t *l1)
{
- int ptr; /* index of key in block */
-
- /*
- * Go up the tree from this level toward the root.
- * At each level, update the key value to the value input.
- * Stop when we reach a level where the cursor isn't pointing
- * at the first entry in the block.
- */
- for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
- xfs_alloc_block_t *block; /* btree block */
- xfs_buf_t *bp; /* buffer for block */
-#ifdef DEBUG
- int error; /* error return value */
-#endif
- xfs_alloc_key_t *kp; /* ptr to btree block keys */
-
- bp = cur->bc_bufs[level];
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
- return error;
-#endif
- ptr = cur->bc_ptrs[level];
- kp = XFS_ALLOC_KEY_ADDR(block, ptr, cur);
- *kp = *keyp;
- xfs_alloc_log_keys(cur, bp, ptr, ptr);
- }
- return 0;
+ *s0 = cur->bc_private.a.agno;
+ *l0 = cur->bc_rec.a.ar_startblock;
+ *l1 = cur->bc_rec.a.ar_blockcount;
}
-/*
- * Externally visible routines.
- */
-
-/*
- * Decrement cursor by one record at the level.
- * For nonzero levels the leaf-ward information is untouched.
- */
-int /* error */
-xfs_alloc_decrement(
- xfs_btree_cur_t *cur, /* btree cursor */
- int level, /* level in btree, 0 is leaf */
- int *stat) /* success/failure */
+STATIC void
+xfs_allocbt_trace_key(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *key,
+ __uint64_t *l0,
+ __uint64_t *l1)
{
- xfs_alloc_block_t *block; /* btree block */
- int error; /* error return value */
- int lev; /* btree level */
-
- ASSERT(level < cur->bc_nlevels);
- /*
- * Read-ahead to the left at this level.
- */
- xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
- /*
- * Decrement the ptr at this level. If we're still in the block
- * then we're done.
- */
- if (--cur->bc_ptrs[level] > 0) {
- *stat = 1;
- return 0;
- }
- /*
- * Get a pointer to the btree block.
- */
- block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[level]);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, block, level,
- cur->bc_bufs[level])))
- return error;
-#endif
- /*
- * If we just went off the left edge of the tree, return failure.
- */
- if (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK) {
- *stat = 0;
- return 0;
- }
- /*
- * March up the tree decrementing pointers.
- * Stop when we don't go off the left edge of a block.
- */
- for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
- if (--cur->bc_ptrs[lev] > 0)
- break;
- /*
- * Read-ahead the left block, we're going to read it
- * in the next loop.
- */
- xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
- }
- /*
- * If we went off the root then we are seriously confused.
- */
- ASSERT(lev < cur->bc_nlevels);
- /*
- * Now walk back down the tree, fixing up the cursor's buffer
- * pointers and key numbers.
- */
- for (block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[lev]); lev > level; ) {
- xfs_agblock_t agbno; /* block number of btree block */
- xfs_buf_t *bp; /* buffer pointer for block */
-
- agbno = be32_to_cpu(*XFS_ALLOC_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
- if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
- cur->bc_private.a.agno, agbno, 0, &bp,
- XFS_ALLOC_BTREE_REF)))
- return error;
- lev--;
- xfs_btree_setbuf(cur, lev, bp);
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
- if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
- return error;
- cur->bc_ptrs[lev] = be16_to_cpu(block->bb_numrecs);
- }
- *stat = 1;
- return 0;
+ *l0 = be32_to_cpu(key->alloc.ar_startblock);
+ *l1 = be32_to_cpu(key->alloc.ar_blockcount);
}
-/*
- * Delete the record pointed to by cur.
- * The cursor refers to the place where the record was (could be inserted)
- * when the operation returns.
- */
-int /* error */
-xfs_alloc_delete(
- xfs_btree_cur_t *cur, /* btree cursor */
- int *stat) /* success/failure */
+STATIC void
+xfs_allocbt_trace_record(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *rec,
+ __uint64_t *l0,
+ __uint64_t *l1,
+ __uint64_t *l2)
{
- int error; /* error return value */
- int i; /* result code */
- int level; /* btree level */
-
- /*
- * Go up the tree, starting at leaf level.
- * If 2 is returned then a join was done; go to the next level.
- * Otherwise we are done.
- */
- for (level = 0, i = 2; i == 2; level++) {
- if ((error = xfs_alloc_delrec(cur, level, &i)))
- return error;
- }
- if (i == 0) {
- for (level = 1; level < cur->bc_nlevels; level++) {
- if (cur->bc_ptrs[level] == 0) {
- if ((error = xfs_alloc_decrement(cur, level, &i)))
- return error;
- break;
- }
- }
- }
- *stat = i;
- return 0;
+ *l0 = be32_to_cpu(rec->alloc.ar_startblock);
+ *l1 = be32_to_cpu(rec->alloc.ar_blockcount);
+ *l2 = 0;
}
+#endif /* XFS_BTREE_TRACE */
+
+static const struct xfs_btree_ops xfs_allocbt_ops = {
+ .rec_len = sizeof(xfs_alloc_rec_t),
+ .key_len = sizeof(xfs_alloc_key_t),
+
+ .dup_cursor = xfs_allocbt_dup_cursor,
+ .set_root = xfs_allocbt_set_root,
+ .kill_root = xfs_allocbt_kill_root,
+ .alloc_block = xfs_allocbt_alloc_block,
+ .free_block = xfs_allocbt_free_block,
+ .update_lastrec = xfs_allocbt_update_lastrec,
+ .get_minrecs = xfs_allocbt_get_minrecs,
+ .get_maxrecs = xfs_allocbt_get_maxrecs,
+ .init_key_from_rec = xfs_allocbt_init_key_from_rec,
+ .init_rec_from_key = xfs_allocbt_init_rec_from_key,
+ .init_rec_from_cur = xfs_allocbt_init_rec_from_cur,
+ .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur,
+ .key_diff = xfs_allocbt_key_diff,
-/*
- * Get the data from the pointed-to record.
- */
-int /* error */
-xfs_alloc_get_rec(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_agblock_t *bno, /* output: starting block of extent */
- xfs_extlen_t *len, /* output: length of extent */
- int *stat) /* output: success/failure */
-{
- xfs_alloc_block_t *block; /* btree block */
#ifdef DEBUG
- int error; /* error return value */
+ .keys_inorder = xfs_allocbt_keys_inorder,
+ .recs_inorder = xfs_allocbt_recs_inorder,
#endif
- int ptr; /* record number */
- ptr = cur->bc_ptrs[0];
- block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[0]);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, block, 0, cur->bc_bufs[0])))
- return error;
+#ifdef XFS_BTREE_TRACE
+ .trace_enter = xfs_allocbt_trace_enter,
+ .trace_cursor = xfs_allocbt_trace_cursor,
+ .trace_key = xfs_allocbt_trace_key,
+ .trace_record = xfs_allocbt_trace_record,
#endif
- /*
- * Off the right end or left end, return failure.
- */
- if (ptr > be16_to_cpu(block->bb_numrecs) || ptr <= 0) {
- *stat = 0;
- return 0;
- }
- /*
- * Point to the record and extract its data.
- */
- {
- xfs_alloc_rec_t *rec; /* record data */
-
- rec = XFS_ALLOC_REC_ADDR(block, ptr, cur);
- *bno = be32_to_cpu(rec->ar_startblock);
- *len = be32_to_cpu(rec->ar_blockcount);
- }
- *stat = 1;
- return 0;
-}
+};
/*
- * Increment cursor by one record at the level.
- * For nonzero levels the leaf-ward information is untouched.
+ * Allocate a new allocation btree cursor.
*/
-int /* error */
-xfs_alloc_increment(
- xfs_btree_cur_t *cur, /* btree cursor */
- int level, /* level in btree, 0 is leaf */
- int *stat) /* success/failure */
+struct xfs_btree_cur * /* new alloc btree cursor */
+xfs_allocbt_init_cursor(
+ struct xfs_mount *mp, /* file system mount point */
+ struct xfs_trans *tp, /* transaction pointer */
+ struct xfs_buf *agbp, /* buffer for agf structure */
+ xfs_agnumber_t agno, /* allocation group number */
+ xfs_btnum_t btnum) /* btree identifier */
{
- xfs_alloc_block_t *block; /* btree block */
- xfs_buf_t *bp; /* tree block buffer */
- int error; /* error return value */
- int lev; /* btree level */
-
- ASSERT(level < cur->bc_nlevels);
- /*
- * Read-ahead to the right at this level.
- */
- xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
- /*
- * Get a pointer to the btree block.
- */
- bp = cur->bc_bufs[level];
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
- return error;
-#endif
- /*
- * Increment the ptr at this level. If we're still in the block
- * then we're done.
- */
- if (++cur->bc_ptrs[level] <= be16_to_cpu(block->bb_numrecs)) {
- *stat = 1;
- return 0;
- }
- /*
- * If we just went off the right edge of the tree, return failure.
- */
- if (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK) {
- *stat = 0;
- return 0;
- }
- /*
- * March up the tree incrementing pointers.
- * Stop when we don't go off the right edge of a block.
- */
- for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
- bp = cur->bc_bufs[lev];
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
- return error;
-#endif
- if (++cur->bc_ptrs[lev] <= be16_to_cpu(block->bb_numrecs))
- break;
- /*
- * Read-ahead the right block, we're going to read it
- * in the next loop.
- */
- xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
- }
- /*
- * If we went off the root then we are seriously confused.
- */
- ASSERT(lev < cur->bc_nlevels);
- /*
- * Now walk back down the tree, fixing up the cursor's buffer
- * pointers and key numbers.
- */
- for (bp = cur->bc_bufs[lev], block = XFS_BUF_TO_ALLOC_BLOCK(bp);
- lev > level; ) {
- xfs_agblock_t agbno; /* block number of btree block */
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
+ struct xfs_btree_cur *cur;
- agbno = be32_to_cpu(*XFS_ALLOC_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
- if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
- cur->bc_private.a.agno, agbno, 0, &bp,
- XFS_ALLOC_BTREE_REF)))
- return error;
- lev--;
- xfs_btree_setbuf(cur, lev, bp);
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
- if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
- return error;
- cur->bc_ptrs[lev] = 1;
- }
- *stat = 1;
- return 0;
-}
+ ASSERT(btnum == XFS_BTNUM_BNO || btnum == XFS_BTNUM_CNT);
-/*
- * Insert the current record at the point referenced by cur.
- * The cursor may be inconsistent on return if splits have been done.
- */
-int /* error */
-xfs_alloc_insert(
- xfs_btree_cur_t *cur, /* btree cursor */
- int *stat) /* success/failure */
-{
- int error; /* error return value */
- int i; /* result value, 0 for failure */
- int level; /* current level number in btree */
- xfs_agblock_t nbno; /* new block number (split result) */
- xfs_btree_cur_t *ncur; /* new cursor (split result) */
- xfs_alloc_rec_t nrec; /* record being inserted this level */
- xfs_btree_cur_t *pcur; /* previous level's cursor */
+ cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
- level = 0;
- nbno = NULLAGBLOCK;
- nrec.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock);
- nrec.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount);
- ncur = NULL;
- pcur = cur;
- /*
- * Loop going up the tree, starting at the leaf level.
- * Stop when we don't get a split block, that must mean that
- * the insert is finished with this level.
- */
- do {
- /*
- * Insert nrec/nbno into this level of the tree.
- * Note if we fail, nbno will be null.
- */
- if ((error = xfs_alloc_insrec(pcur, level++, &nbno, &nrec, &ncur,
- &i))) {
- if (pcur != cur)
- xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
- return error;
- }
- /*
- * See if the cursor we just used is trash.
- * Can't trash the caller's cursor, but otherwise we should
- * if ncur is a new cursor or we're about to be done.
- */
- if (pcur != cur && (ncur || nbno == NULLAGBLOCK)) {
- cur->bc_nlevels = pcur->bc_nlevels;
- xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
- }
- /*
- * If we got a new cursor, switch to it.
- */
- if (ncur) {
- pcur = ncur;
- ncur = NULL;
- }
- } while (nbno != NULLAGBLOCK);
- *stat = i;
- return 0;
-}
+ cur->bc_tp = tp;
+ cur->bc_mp = mp;
+ cur->bc_nlevels = be32_to_cpu(agf->agf_levels[btnum]);
+ cur->bc_btnum = btnum;
+ cur->bc_blocklog = mp->m_sb.sb_blocklog;
-/*
- * Lookup the record equal to [bno, len] in the btree given by cur.
- */
-int /* error */
-xfs_alloc_lookup_eq(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_agblock_t bno, /* starting block of extent */
- xfs_extlen_t len, /* length of extent */
- int *stat) /* success/failure */
-{
- cur->bc_rec.a.ar_startblock = bno;
- cur->bc_rec.a.ar_blockcount = len;
- return xfs_alloc_lookup(cur, XFS_LOOKUP_EQ, stat);
-}
+ cur->bc_ops = &xfs_allocbt_ops;
+ if (btnum == XFS_BTNUM_CNT)
+ cur->bc_flags = XFS_BTREE_LASTREC_UPDATE;
-/*
- * Lookup the first record greater than or equal to [bno, len]
- * in the btree given by cur.
- */
-int /* error */
-xfs_alloc_lookup_ge(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_agblock_t bno, /* starting block of extent */
- xfs_extlen_t len, /* length of extent */
- int *stat) /* success/failure */
-{
- cur->bc_rec.a.ar_startblock = bno;
- cur->bc_rec.a.ar_blockcount = len;
- return xfs_alloc_lookup(cur, XFS_LOOKUP_GE, stat);
-}
+ cur->bc_private.a.agbp = agbp;
+ cur->bc_private.a.agno = agno;
-/*
- * Lookup the first record less than or equal to [bno, len]
- * in the btree given by cur.
- */
-int /* error */
-xfs_alloc_lookup_le(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_agblock_t bno, /* starting block of extent */
- xfs_extlen_t len, /* length of extent */
- int *stat) /* success/failure */
-{
- cur->bc_rec.a.ar_startblock = bno;
- cur->bc_rec.a.ar_blockcount = len;
- return xfs_alloc_lookup(cur, XFS_LOOKUP_LE, stat);
+ return cur;
}
/*
- * Update the record referred to by cur, to the value given by [bno, len].
- * This either works (return 0) or gets an EFSCORRUPTED error.
+ * Calculate number of records in an alloc btree block.
*/
-int /* error */
-xfs_alloc_update(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_agblock_t bno, /* starting block of extent */
- xfs_extlen_t len) /* length of extent */
+int
+xfs_allocbt_maxrecs(
+ struct xfs_mount *mp,
+ int blocklen,
+ int leaf)
{
- xfs_alloc_block_t *block; /* btree block to update */
- int error; /* error return value */
- int ptr; /* current record number (updating) */
+ blocklen -= XFS_ALLOC_BLOCK_LEN(mp);
- ASSERT(len > 0);
- /*
- * Pick up the a.g. freelist struct and the current block.
- */
- block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[0]);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, block, 0, cur->bc_bufs[0])))
- return error;
-#endif
- /*
- * Get the address of the rec to be updated.
- */
- ptr = cur->bc_ptrs[0];
- {
- xfs_alloc_rec_t *rp; /* pointer to updated record */
-
- rp = XFS_ALLOC_REC_ADDR(block, ptr, cur);
- /*
- * Fill in the new contents and log them.
- */
- rp->ar_startblock = cpu_to_be32(bno);
- rp->ar_blockcount = cpu_to_be32(len);
- xfs_alloc_log_recs(cur, cur->bc_bufs[0], ptr, ptr);
- }
- /*
- * If it's the by-size btree and it's the last leaf block and
- * it's the last record... then update the size of the longest
- * extent in the a.g., which we cache in the a.g. freelist header.
- */
- if (cur->bc_btnum == XFS_BTNUM_CNT &&
- be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK &&
- ptr == be16_to_cpu(block->bb_numrecs)) {
- xfs_agf_t *agf; /* a.g. freespace header */
- xfs_agnumber_t seqno;
-
- agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
- seqno = be32_to_cpu(agf->agf_seqno);
- cur->bc_mp->m_perag[seqno].pagf_longest = len;
- agf->agf_longest = cpu_to_be32(len);
- xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
- XFS_AGF_LONGEST);
- }
- /*
- * Updating first record in leaf. Pass new key value up to our parent.
- */
- if (ptr == 1) {
- xfs_alloc_key_t key; /* key containing [bno, len] */
-
- key.ar_startblock = cpu_to_be32(bno);
- key.ar_blockcount = cpu_to_be32(len);
- if ((error = xfs_alloc_updkey(cur, &key, 1)))
- return error;
- }
- return 0;
+ if (leaf)
+ return blocklen / sizeof(xfs_alloc_rec_t);
+ return blocklen / (sizeof(xfs_alloc_key_t) + sizeof(xfs_alloc_ptr_t));
}
diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h
index 5bd1a2c8bd07..a6caa0022c9b 100644
--- a/fs/xfs/xfs_alloc_btree.h
+++ b/fs/xfs/xfs_alloc_btree.h
@@ -24,7 +24,6 @@
struct xfs_buf;
struct xfs_btree_cur;
-struct xfs_btree_sblock;
struct xfs_mount;
/*
@@ -50,16 +49,6 @@ typedef struct xfs_alloc_rec_incore {
/* btree pointer type */
typedef __be32 xfs_alloc_ptr_t;
-/* btree block header type */
-typedef struct xfs_btree_sblock xfs_alloc_block_t;
-
-#define XFS_BUF_TO_ALLOC_BLOCK(bp) ((xfs_alloc_block_t *)XFS_BUF_PTR(bp))
-
-/*
- * Real block structures have a size equal to the disk block size.
- */
-#define XFS_ALLOC_BLOCK_MAXRECS(lev,cur) ((cur)->bc_mp->m_alloc_mxr[lev != 0])
-#define XFS_ALLOC_BLOCK_MINRECS(lev,cur) ((cur)->bc_mp->m_alloc_mnr[lev != 0])
/*
* Minimum and maximum blocksize and sectorsize.
@@ -83,73 +72,39 @@ typedef struct xfs_btree_sblock xfs_alloc_block_t;
#define XFS_CNT_BLOCK(mp) ((xfs_agblock_t)(XFS_BNO_BLOCK(mp) + 1))
/*
- * Record, key, and pointer address macros for btree blocks.
- */
-#define XFS_ALLOC_REC_ADDR(bb,i,cur) \
- XFS_BTREE_REC_ADDR(xfs_alloc, bb, i)
-
-#define XFS_ALLOC_KEY_ADDR(bb,i,cur) \
- XFS_BTREE_KEY_ADDR(xfs_alloc, bb, i)
-
-#define XFS_ALLOC_PTR_ADDR(bb,i,cur) \
- XFS_BTREE_PTR_ADDR(xfs_alloc, bb, i, XFS_ALLOC_BLOCK_MAXRECS(1, cur))
-
-/*
- * Decrement cursor by one record at the level.
- * For nonzero levels the leaf-ward information is untouched.
- */
-extern int xfs_alloc_decrement(struct xfs_btree_cur *cur, int level, int *stat);
-
-/*
- * Delete the record pointed to by cur.
- * The cursor refers to the place where the record was (could be inserted)
- * when the operation returns.
- */
-extern int xfs_alloc_delete(struct xfs_btree_cur *cur, int *stat);
-
-/*
- * Get the data from the pointed-to record.
- */
-extern int xfs_alloc_get_rec(struct xfs_btree_cur *cur, xfs_agblock_t *bno,
- xfs_extlen_t *len, int *stat);
-
-/*
- * Increment cursor by one record at the level.
- * For nonzero levels the leaf-ward information is untouched.
- */
-extern int xfs_alloc_increment(struct xfs_btree_cur *cur, int level, int *stat);
-
-/*
- * Insert the current record at the point referenced by cur.
- * The cursor may be inconsistent on return if splits have been done.
- */
-extern int xfs_alloc_insert(struct xfs_btree_cur *cur, int *stat);
-
-/*
- * Lookup the record equal to [bno, len] in the btree given by cur.
- */
-extern int xfs_alloc_lookup_eq(struct xfs_btree_cur *cur, xfs_agblock_t bno,
- xfs_extlen_t len, int *stat);
-
-/*
- * Lookup the first record greater than or equal to [bno, len]
- * in the btree given by cur.
- */
-extern int xfs_alloc_lookup_ge(struct xfs_btree_cur *cur, xfs_agblock_t bno,
- xfs_extlen_t len, int *stat);
-
-/*
- * Lookup the first record less than or equal to [bno, len]
- * in the btree given by cur.
+ * Btree block header size depends on a superblock flag.
+ *
+ * (not quite yet, but soon)
*/
-extern int xfs_alloc_lookup_le(struct xfs_btree_cur *cur, xfs_agblock_t bno,
- xfs_extlen_t len, int *stat);
+#define XFS_ALLOC_BLOCK_LEN(mp) XFS_BTREE_SBLOCK_LEN
/*
- * Update the record referred to by cur, to the value given by [bno, len].
- * This either works (return 0) or gets an EFSCORRUPTED error.
- */
-extern int xfs_alloc_update(struct xfs_btree_cur *cur, xfs_agblock_t bno,
- xfs_extlen_t len);
+ * Record, key, and pointer address macros for btree blocks.
+ *
+ * (note that some of these may appear unused, but they are used in userspace)
+ */
+#define XFS_ALLOC_REC_ADDR(mp, block, index) \
+ ((xfs_alloc_rec_t *) \
+ ((char *)(block) + \
+ XFS_ALLOC_BLOCK_LEN(mp) + \
+ (((index) - 1) * sizeof(xfs_alloc_rec_t))))
+
+#define XFS_ALLOC_KEY_ADDR(mp, block, index) \
+ ((xfs_alloc_key_t *) \
+ ((char *)(block) + \
+ XFS_ALLOC_BLOCK_LEN(mp) + \
+ ((index) - 1) * sizeof(xfs_alloc_key_t)))
+
+#define XFS_ALLOC_PTR_ADDR(mp, block, index, maxrecs) \
+ ((xfs_alloc_ptr_t *) \
+ ((char *)(block) + \
+ XFS_ALLOC_BLOCK_LEN(mp) + \
+ (maxrecs) * sizeof(xfs_alloc_key_t) + \
+ ((index) - 1) * sizeof(xfs_alloc_ptr_t)))
+
+extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *,
+ struct xfs_trans *, struct xfs_buf *,
+ xfs_agnumber_t, xfs_btnum_t);
+extern int xfs_allocbt_maxrecs(struct xfs_mount *, int, int);
#endif /* __XFS_ALLOC_BTREE_H__ */
diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h
index 0b3b5efe848c..53d5e70d1360 100644
--- a/fs/xfs/xfs_arch.h
+++ b/fs/xfs/xfs_arch.h
@@ -41,21 +41,36 @@
#endif
#ifdef XFS_NATIVE_HOST
-#define cpu_to_be16(val) ((__be16)(val))
-#define cpu_to_be32(val) ((__be32)(val))
-#define cpu_to_be64(val) ((__be64)(val))
-#define be16_to_cpu(val) ((__uint16_t)(val))
-#define be32_to_cpu(val) ((__uint32_t)(val))
-#define be64_to_cpu(val) ((__uint64_t)(val))
+#define cpu_to_be16(val) ((__force __be16)(__u16)(val))
+#define cpu_to_be32(val) ((__force __be32)(__u32)(val))
+#define cpu_to_be64(val) ((__force __be64)(__u64)(val))
+#define be16_to_cpu(val) ((__force __u16)(__be16)(val))
+#define be32_to_cpu(val) ((__force __u32)(__be32)(val))
+#define be64_to_cpu(val) ((__force __u64)(__be64)(val))
#else
-#define cpu_to_be16(val) (__swab16((__uint16_t)(val)))
-#define cpu_to_be32(val) (__swab32((__uint32_t)(val)))
-#define cpu_to_be64(val) (__swab64((__uint64_t)(val)))
-#define be16_to_cpu(val) (__swab16((__be16)(val)))
-#define be32_to_cpu(val) (__swab32((__be32)(val)))
-#define be64_to_cpu(val) (__swab64((__be64)(val)))
+#define cpu_to_be16(val) ((__force __be16)__swab16((__u16)(val)))
+#define cpu_to_be32(val) ((__force __be32)__swab32((__u32)(val)))
+#define cpu_to_be64(val) ((__force __be64)__swab64((__u64)(val)))
+#define be16_to_cpu(val) (__swab16((__force __u16)(__be16)(val)))
+#define be32_to_cpu(val) (__swab32((__force __u32)(__be32)(val)))
+#define be64_to_cpu(val) (__swab64((__force __u64)(__be64)(val)))
#endif
+static inline void be16_add_cpu(__be16 *a, __s16 b)
+{
+ *a = cpu_to_be16(be16_to_cpu(*a) + b);
+}
+
+static inline void be32_add_cpu(__be32 *a, __s32 b)
+{
+ *a = cpu_to_be32(be32_to_cpu(*a) + b);
+}
+
+static inline void be64_add_cpu(__be64 *a, __s64 b)
+{
+ *a = cpu_to_be64(be64_to_cpu(*a) + b);
+}
+
#endif /* __KERNEL__ */
/* do we need conversion? */
diff --git a/fs/xfs/xfs_bit.h b/fs/xfs/xfs_bit.h
index 8e0e463dae2d..bca7b243c319 100644
--- a/fs/xfs/xfs_bit.h
+++ b/fs/xfs/xfs_bit.h
@@ -61,8 +61,7 @@ static inline int xfs_highbit64(__uint64_t v)
/* Get low bit set out of 32-bit argument, -1 if none set */
static inline int xfs_lowbit32(__uint32_t v)
{
- unsigned long t = v;
- return (v) ? find_first_bit(&t, 32) : -1;
+ return ffs(v) - 1;
}
/* Get low bit set out of 64-bit argument, -1 if none set */
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index a1aab9275d5a..c3912213645c 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -393,8 +393,8 @@ xfs_bmap_count_leaves(
STATIC void
xfs_bmap_disk_count_leaves(
- xfs_extnum_t idx,
- xfs_bmbt_block_t *block,
+ struct xfs_mount *mp,
+ struct xfs_btree_block *block,
int numrecs,
int *count);
@@ -402,6 +402,53 @@ xfs_bmap_disk_count_leaves(
* Bmap internal routines.
*/
+STATIC int /* error */
+xfs_bmbt_lookup_eq(
+ struct xfs_btree_cur *cur,
+ xfs_fileoff_t off,
+ xfs_fsblock_t bno,
+ xfs_filblks_t len,
+ int *stat) /* success/failure */
+{
+ cur->bc_rec.b.br_startoff = off;
+ cur->bc_rec.b.br_startblock = bno;
+ cur->bc_rec.b.br_blockcount = len;
+ return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
+}
+
+STATIC int /* error */
+xfs_bmbt_lookup_ge(
+ struct xfs_btree_cur *cur,
+ xfs_fileoff_t off,
+ xfs_fsblock_t bno,
+ xfs_filblks_t len,
+ int *stat) /* success/failure */
+{
+ cur->bc_rec.b.br_startoff = off;
+ cur->bc_rec.b.br_startblock = bno;
+ cur->bc_rec.b.br_blockcount = len;
+ return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
+}
+
+/*
+* Update the record referred to by cur to the value given
+ * by [off, bno, len, state].
+ * This either works (return 0) or gets an EFSCORRUPTED error.
+ */
+STATIC int
+xfs_bmbt_update(
+ struct xfs_btree_cur *cur,
+ xfs_fileoff_t off,
+ xfs_fsblock_t bno,
+ xfs_filblks_t len,
+ xfs_exntst_t state)
+{
+ union xfs_btree_rec rec;
+
+ xfs_bmbt_disk_set_allf(&rec.bmbt, off, bno, len, state);
+ return xfs_btree_update(cur, &rec);
+}
+
/*
* Called from xfs_bmap_add_attrfork to handle btree format files.
*/
@@ -422,15 +469,14 @@ xfs_bmap_add_attrfork_btree(
if (ip->i_df.if_broot_bytes <= XFS_IFORK_DSIZE(ip))
*flags |= XFS_ILOG_DBROOT;
else {
- cur = xfs_btree_init_cursor(mp, tp, NULL, 0, XFS_BTNUM_BMAP, ip,
- XFS_DATA_FORK);
+ cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
cur->bc_private.b.flist = flist;
cur->bc_private.b.firstblock = *firstblock;
if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat)))
goto error0;
/* must be at least one entry */
XFS_WANT_CORRUPTED_GOTO(stat == 1, error0);
- if ((error = xfs_bmbt_newroot(cur, flags, &stat)))
+ if ((error = xfs_btree_new_iroot(cur, flags, &stat)))
goto error0;
if (stat == 0) {
xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
@@ -818,10 +864,10 @@ xfs_bmap_add_extent_delay_real(
RIGHT.br_blockcount, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
- if ((error = xfs_bmbt_delete(cur, &i)))
+ if ((error = xfs_btree_delete(cur, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
- if ((error = xfs_bmbt_decrement(cur, 0, &i)))
+ if ((error = xfs_btree_decrement(cur, 0, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
@@ -931,7 +977,7 @@ xfs_bmap_add_extent_delay_real(
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 0, done);
cur->bc_rec.b.br_state = XFS_EXT_NORM;
- if ((error = xfs_bmbt_insert(cur, &i)))
+ if ((error = xfs_btree_insert(cur, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
@@ -1007,7 +1053,7 @@ xfs_bmap_add_extent_delay_real(
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 0, done);
cur->bc_rec.b.br_state = XFS_EXT_NORM;
- if ((error = xfs_bmbt_insert(cur, &i)))
+ if ((error = xfs_btree_insert(cur, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
@@ -1097,7 +1143,7 @@ xfs_bmap_add_extent_delay_real(
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 0, done);
cur->bc_rec.b.br_state = XFS_EXT_NORM;
- if ((error = xfs_bmbt_insert(cur, &i)))
+ if ((error = xfs_btree_insert(cur, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
@@ -1152,7 +1198,7 @@ xfs_bmap_add_extent_delay_real(
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 0, done);
cur->bc_rec.b.br_state = XFS_EXT_NORM;
- if ((error = xfs_bmbt_insert(cur, &i)))
+ if ((error = xfs_btree_insert(cur, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
@@ -1379,16 +1425,16 @@ xfs_bmap_add_extent_unwritten_real(
RIGHT.br_blockcount, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
- if ((error = xfs_bmbt_delete(cur, &i)))
+ if ((error = xfs_btree_delete(cur, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
- if ((error = xfs_bmbt_decrement(cur, 0, &i)))
+ if ((error = xfs_btree_decrement(cur, 0, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
- if ((error = xfs_bmbt_delete(cur, &i)))
+ if ((error = xfs_btree_delete(cur, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
- if ((error = xfs_bmbt_decrement(cur, 0, &i)))
+ if ((error = xfs_btree_decrement(cur, 0, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
@@ -1428,10 +1474,10 @@ xfs_bmap_add_extent_unwritten_real(
&i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
- if ((error = xfs_bmbt_delete(cur, &i)))
+ if ((error = xfs_btree_delete(cur, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
- if ((error = xfs_bmbt_decrement(cur, 0, &i)))
+ if ((error = xfs_btree_decrement(cur, 0, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
@@ -1471,10 +1517,10 @@ xfs_bmap_add_extent_unwritten_real(
RIGHT.br_blockcount, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
- if ((error = xfs_bmbt_delete(cur, &i)))
+ if ((error = xfs_btree_delete(cur, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
- if ((error = xfs_bmbt_decrement(cur, 0, &i)))
+ if ((error = xfs_btree_decrement(cur, 0, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, new->br_startoff,
@@ -1557,7 +1603,7 @@ xfs_bmap_add_extent_unwritten_real(
PREV.br_blockcount - new->br_blockcount,
oldext)))
goto done;
- if ((error = xfs_bmbt_decrement(cur, 0, &i)))
+ if ((error = xfs_btree_decrement(cur, 0, &i)))
goto done;
if (xfs_bmbt_update(cur, LEFT.br_startoff,
LEFT.br_startblock,
@@ -1605,7 +1651,7 @@ xfs_bmap_add_extent_unwritten_real(
oldext)))
goto done;
cur->bc_rec.b = *new;
- if ((error = xfs_bmbt_insert(cur, &i)))
+ if ((error = xfs_btree_insert(cur, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
@@ -1647,7 +1693,7 @@ xfs_bmap_add_extent_unwritten_real(
PREV.br_blockcount - new->br_blockcount,
oldext)))
goto done;
- if ((error = xfs_bmbt_increment(cur, 0, &i)))
+ if ((error = xfs_btree_increment(cur, 0, &i)))
goto done;
if ((error = xfs_bmbt_update(cur, new->br_startoff,
new->br_startblock,
@@ -1695,7 +1741,7 @@ xfs_bmap_add_extent_unwritten_real(
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 0, done);
cur->bc_rec.b.br_state = XFS_EXT_NORM;
- if ((error = xfs_bmbt_insert(cur, &i)))
+ if ((error = xfs_btree_insert(cur, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
@@ -1743,7 +1789,7 @@ xfs_bmap_add_extent_unwritten_real(
cur->bc_rec.b = PREV;
cur->bc_rec.b.br_blockcount =
new->br_startoff - PREV.br_startoff;
- if ((error = xfs_bmbt_insert(cur, &i)))
+ if ((error = xfs_btree_insert(cur, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
/*
@@ -1758,7 +1804,7 @@ xfs_bmap_add_extent_unwritten_real(
XFS_WANT_CORRUPTED_GOTO(i == 0, done);
/* new middle extent - newext */
cur->bc_rec.b.br_state = new->br_state;
- if ((error = xfs_bmbt_insert(cur, &i)))
+ if ((error = xfs_btree_insert(cur, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
@@ -2106,10 +2152,10 @@ xfs_bmap_add_extent_hole_real(
right.br_blockcount, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
- if ((error = xfs_bmbt_delete(cur, &i)))
+ if ((error = xfs_btree_delete(cur, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
- if ((error = xfs_bmbt_decrement(cur, 0, &i)))
+ if ((error = xfs_btree_decrement(cur, 0, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, left.br_startoff,
@@ -2218,7 +2264,7 @@ xfs_bmap_add_extent_hole_real(
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 0, done);
cur->bc_rec.b.br_state = new->br_state;
- if ((error = xfs_bmbt_insert(cur, &i)))
+ if ((error = xfs_btree_insert(cur, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
@@ -2996,24 +3042,24 @@ xfs_bmap_btree_to_extents(
int whichfork) /* data or attr fork */
{
/* REFERENCED */
- xfs_bmbt_block_t *cblock;/* child btree block */
+ struct xfs_btree_block *cblock;/* child btree block */
xfs_fsblock_t cbno; /* child block number */
xfs_buf_t *cbp; /* child block's buffer */
int error; /* error return value */
xfs_ifork_t *ifp; /* inode fork data */
xfs_mount_t *mp; /* mount point structure */
__be64 *pp; /* ptr to block address */
- xfs_bmbt_block_t *rblock;/* root btree block */
+ struct xfs_btree_block *rblock;/* root btree block */
+ mp = ip->i_mount;
ifp = XFS_IFORK_PTR(ip, whichfork);
ASSERT(ifp->if_flags & XFS_IFEXTENTS);
ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
rblock = ifp->if_broot;
ASSERT(be16_to_cpu(rblock->bb_level) == 1);
ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
- ASSERT(XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes) == 1);
- mp = ip->i_mount;
- pp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, ifp->if_broot_bytes);
+ ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
+ pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
cbno = be64_to_cpu(*pp);
*logflagsp = 0;
#ifdef DEBUG
@@ -3023,8 +3069,8 @@ xfs_bmap_btree_to_extents(
if ((error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp,
XFS_BMAP_BTREE_REF)))
return error;
- cblock = XFS_BUF_TO_BMBT_BLOCK(cbp);
- if ((error = xfs_btree_check_lblock(cur, cblock, 0, cbp)))
+ cblock = XFS_BUF_TO_BLOCK(cbp);
+ if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
return error;
xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp);
ip->i_d.di_nblocks--;
@@ -3170,7 +3216,7 @@ xfs_bmap_del_extent(
flags |= XFS_ILOG_FEXT(whichfork);
break;
}
- if ((error = xfs_bmbt_delete(cur, &i)))
+ if ((error = xfs_btree_delete(cur, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
break;
@@ -3254,10 +3300,10 @@ xfs_bmap_del_extent(
got.br_startblock, temp,
got.br_state)))
goto done;
- if ((error = xfs_bmbt_increment(cur, 0, &i)))
+ if ((error = xfs_btree_increment(cur, 0, &i)))
goto done;
cur->bc_rec.b = new;
- error = xfs_bmbt_insert(cur, &i);
+ error = xfs_btree_insert(cur, &i);
if (error && error != ENOSPC)
goto done;
/*
@@ -3404,11 +3450,11 @@ xfs_bmap_extents_to_btree(
int *logflagsp, /* inode logging flags */
int whichfork) /* data or attr fork */
{
- xfs_bmbt_block_t *ablock; /* allocated (child) bt block */
+ struct xfs_btree_block *ablock; /* allocated (child) bt block */
xfs_buf_t *abp; /* buffer for ablock */
xfs_alloc_arg_t args; /* allocation arguments */
xfs_bmbt_rec_t *arp; /* child record pointer */
- xfs_bmbt_block_t *block; /* btree root block */
+ struct xfs_btree_block *block; /* btree root block */
xfs_btree_cur_t *cur; /* bmap btree cursor */
xfs_bmbt_rec_host_t *ep; /* extent record pointer */
int error; /* error return value */
@@ -3428,6 +3474,7 @@ xfs_bmap_extents_to_btree(
*/
xfs_iroot_realloc(ip, 1, whichfork);
ifp->if_flags |= XFS_IFBROOT;
+
/*
* Fill in the root.
*/
@@ -3435,14 +3482,14 @@ xfs_bmap_extents_to_btree(
block->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
block->bb_level = cpu_to_be16(1);
block->bb_numrecs = cpu_to_be16(1);
- block->bb_leftsib = cpu_to_be64(NULLDFSBNO);
- block->bb_rightsib = cpu_to_be64(NULLDFSBNO);
+ block->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
+ block->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
+
/*
* Need a cursor. Can't allocate until bb_level is filled in.
*/
mp = ip->i_mount;
- cur = xfs_btree_init_cursor(mp, tp, NULL, 0, XFS_BTNUM_BMAP, ip,
- whichfork);
+ cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
cur->bc_private.b.firstblock = *firstblock;
cur->bc_private.b.flist = flist;
cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
@@ -3489,12 +3536,12 @@ xfs_bmap_extents_to_btree(
/*
* Fill in the child block.
*/
- ablock = XFS_BUF_TO_BMBT_BLOCK(abp);
+ ablock = XFS_BUF_TO_BLOCK(abp);
ablock->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
ablock->bb_level = 0;
- ablock->bb_leftsib = cpu_to_be64(NULLDFSBNO);
- ablock->bb_rightsib = cpu_to_be64(NULLDFSBNO);
- arp = XFS_BMAP_REC_IADDR(ablock, 1, cur);
+ ablock->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
+ ablock->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
+ arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
for (cnt = i = 0; i < nextents; i++) {
ep = xfs_iext_get_ext(ifp, i);
@@ -3505,21 +3552,24 @@ xfs_bmap_extents_to_btree(
}
}
ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork));
- ablock->bb_numrecs = cpu_to_be16(cnt);
+ xfs_btree_set_numrecs(ablock, cnt);
+
/*
* Fill in the root key and pointer.
*/
- kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
- arp = XFS_BMAP_REC_IADDR(ablock, 1, cur);
+ kp = XFS_BMBT_KEY_ADDR(mp, block, 1);
+ arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp));
- pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
+ pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur,
+ be16_to_cpu(block->bb_level)));
*pp = cpu_to_be64(args.fsbno);
+
/*
* Do all this logging at the end so that
* the root is at the right level.
*/
- xfs_bmbt_log_block(cur, abp, XFS_BB_ALL_BITS);
- xfs_bmbt_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
+ xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS);
+ xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
ASSERT(*curp == NULL);
*curp = cur;
*logflagsp = XFS_ILOG_CORE | XFS_ILOG_FBROOT(whichfork);
@@ -4176,7 +4226,7 @@ xfs_bmap_compute_maxlevels(
maxleafents = MAXAEXTNUM;
sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
}
- maxrootrecs = (int)XFS_BTREE_BLOCK_MAXRECS(sz, xfs_bmdr, 0);
+ maxrootrecs = xfs_bmdr_maxrecs(mp, sz, 0);
minleafrecs = mp->m_bmap_dmnr[0];
minnoderecs = mp->m_bmap_dmnr[1];
maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
@@ -4242,9 +4292,15 @@ xfs_bmap_finish(
* We have a new transaction, so we should return committed=1,
* even though we're returning an error.
*/
- if (error) {
+ if (error)
return error;
- }
+
+ /*
+ * transaction commit worked ok so we can drop the extra ticket
+ * reference that we gained in xfs_trans_dup()
+ */
+ xfs_log_ticket_put(ntp->t_ticket);
+
if ((error = xfs_trans_reserve(ntp, 0, logres, 0, XFS_TRANS_PERM_LOG_RES,
logcount)))
return error;
@@ -4474,6 +4530,22 @@ xfs_bmap_one_block(
return rval;
}
+STATIC int
+xfs_bmap_sanity_check(
+ struct xfs_mount *mp,
+ struct xfs_buf *bp,
+ int level)
+{
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+
+ if (be32_to_cpu(block->bb_magic) != XFS_BMAP_MAGIC ||
+ be16_to_cpu(block->bb_level) != level ||
+ be16_to_cpu(block->bb_numrecs) == 0 ||
+ be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
+ return 0;
+ return 1;
+}
+
/*
* Read in the extents to if_extents.
* All inode fields are set up by caller, we just traverse the btree
@@ -4486,7 +4558,7 @@ xfs_bmap_read_extents(
xfs_inode_t *ip, /* incore inode */
int whichfork) /* data or attr fork */
{
- xfs_bmbt_block_t *block; /* current btree block */
+ struct xfs_btree_block *block; /* current btree block */
xfs_fsblock_t bno; /* block # of "block" */
xfs_buf_t *bp; /* buffer for "block" */
int error; /* error return value */
@@ -4510,7 +4582,7 @@ xfs_bmap_read_extents(
*/
level = be16_to_cpu(block->bb_level);
ASSERT(level > 0);
- pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes);
+ pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
bno = be64_to_cpu(*pp);
ASSERT(bno != NULLDFSBNO);
ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
@@ -4523,13 +4595,13 @@ xfs_bmap_read_extents(
if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
XFS_BMAP_BTREE_REF)))
return error;
- block = XFS_BUF_TO_BMBT_BLOCK(bp);
+ block = XFS_BUF_TO_BLOCK(bp);
XFS_WANT_CORRUPTED_GOTO(
- XFS_BMAP_SANITY_CHECK(mp, block, level),
+ xfs_bmap_sanity_check(mp, bp, level),
error0);
if (level == 0)
break;
- pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]);
+ pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
bno = be64_to_cpu(*pp);
XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0);
xfs_trans_brelse(tp, bp);
@@ -4549,7 +4621,7 @@ xfs_bmap_read_extents(
xfs_extnum_t start;
- num_recs = be16_to_cpu(block->bb_numrecs);
+ num_recs = xfs_btree_get_numrecs(block);
if (unlikely(i + num_recs > room)) {
ASSERT(i + num_recs <= room);
xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
@@ -4561,18 +4633,18 @@ xfs_bmap_read_extents(
goto error0;
}
XFS_WANT_CORRUPTED_GOTO(
- XFS_BMAP_SANITY_CHECK(mp, block, 0),
+ xfs_bmap_sanity_check(mp, bp, 0),
error0);
/*
* Read-ahead the next leaf block, if any.
*/
- nextbno = be64_to_cpu(block->bb_rightsib);
+ nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
if (nextbno != NULLFSBLOCK)
xfs_btree_reada_bufl(mp, nextbno, 1);
/*
* Copy records into the extent records.
*/
- frp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, 1);
+ frp = XFS_BMBT_REC_ADDR(mp, block, 1);
start = i;
for (j = 0; j < num_recs; j++, i++, frp++) {
xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i);
@@ -4603,7 +4675,7 @@ xfs_bmap_read_extents(
if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
XFS_BMAP_BTREE_REF)))
return error;
- block = XFS_BUF_TO_BMBT_BLOCK(bp);
+ block = XFS_BUF_TO_BLOCK(bp);
}
ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork));
@@ -5029,8 +5101,7 @@ xfs_bmapi(
if (abno == NULLFSBLOCK)
break;
if ((ifp->if_flags & XFS_IFBROOT) && !cur) {
- cur = xfs_btree_init_cursor(mp,
- tp, NULL, 0, XFS_BTNUM_BMAP,
+ cur = xfs_bmbt_init_cursor(mp, tp,
ip, whichfork);
cur->bc_private.b.firstblock =
*firstblock;
@@ -5147,9 +5218,8 @@ xfs_bmapi(
*/
ASSERT(mval->br_blockcount <= len);
if ((ifp->if_flags & XFS_IFBROOT) && !cur) {
- cur = xfs_btree_init_cursor(mp,
- tp, NULL, 0, XFS_BTNUM_BMAP,
- ip, whichfork);
+ cur = xfs_bmbt_init_cursor(mp,
+ tp, ip, whichfork);
cur->bc_private.b.firstblock =
*firstblock;
cur->bc_private.b.flist = flist;
@@ -5440,8 +5510,7 @@ xfs_bunmapi(
logflags = 0;
if (ifp->if_flags & XFS_IFBROOT) {
ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
- cur = xfs_btree_init_cursor(mp, tp, NULL, 0, XFS_BTNUM_BMAP, ip,
- whichfork);
+ cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
cur->bc_private.b.firstblock = *firstblock;
cur->bc_private.b.flist = flist;
cur->bc_private.b.flags = 0;
@@ -6131,7 +6200,7 @@ xfs_bmap_get_bp(
void
xfs_check_block(
- xfs_bmbt_block_t *block,
+ struct xfs_btree_block *block,
xfs_mount_t *mp,
int root,
short sz)
@@ -6143,36 +6212,29 @@ xfs_check_block(
ASSERT(be16_to_cpu(block->bb_level) > 0);
prevp = NULL;
- for( i = 1; i <= be16_to_cpu(block->bb_numrecs); i++) {
+ for( i = 1; i <= xfs_btree_get_numrecs(block); i++) {
dmxr = mp->m_bmap_dmxr[0];
-
- if (root) {
- keyp = XFS_BMAP_BROOT_KEY_ADDR(block, i, sz);
- } else {
- keyp = XFS_BTREE_KEY_ADDR(xfs_bmbt, block, i);
- }
+ keyp = XFS_BMBT_KEY_ADDR(mp, block, i);
if (prevp) {
- xfs_btree_check_key(XFS_BTNUM_BMAP, prevp, keyp);
+ ASSERT(be64_to_cpu(prevp->br_startoff) <
+ be64_to_cpu(keyp->br_startoff));
}
prevp = keyp;
/*
* Compare the block numbers to see if there are dups.
*/
+ if (root)
+ pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz);
+ else
+ pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr);
- if (root) {
- pp = XFS_BMAP_BROOT_PTR_ADDR(block, i, sz);
- } else {
- pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, i, dmxr);
- }
for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
- if (root) {
- thispa = XFS_BMAP_BROOT_PTR_ADDR(block, j, sz);
- } else {
- thispa = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, j,
- dmxr);
- }
+ if (root)
+ thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz);
+ else
+ thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
if (*thispa == *pp) {
cmn_err(CE_WARN, "%s: thispa(%d) == pp(%d) %Ld",
__func__, j, i,
@@ -6195,7 +6257,7 @@ xfs_bmap_check_leaf_extents(
xfs_inode_t *ip, /* incore inode pointer */
int whichfork) /* data or attr fork */
{
- xfs_bmbt_block_t *block; /* current btree block */
+ struct xfs_btree_block *block; /* current btree block */
xfs_fsblock_t bno; /* block # of "block" */
xfs_buf_t *bp; /* buffer for "block" */
int error; /* error return value */
@@ -6223,7 +6285,7 @@ xfs_bmap_check_leaf_extents(
level = be16_to_cpu(block->bb_level);
ASSERT(level > 0);
xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
- pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes);
+ pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
bno = be64_to_cpu(*pp);
ASSERT(bno != NULLDFSBNO);
@@ -6245,9 +6307,9 @@ xfs_bmap_check_leaf_extents(
if (!bp && (error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
XFS_BMAP_BTREE_REF)))
goto error_norelse;
- block = XFS_BUF_TO_BMBT_BLOCK(bp);
+ block = XFS_BUF_TO_BLOCK(bp);
XFS_WANT_CORRUPTED_GOTO(
- XFS_BMAP_SANITY_CHECK(mp, block, level),
+ xfs_bmap_sanity_check(mp, bp, level),
error0);
if (level == 0)
break;
@@ -6258,7 +6320,7 @@ xfs_bmap_check_leaf_extents(
*/
xfs_check_block(block, mp, 0, 0);
- pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]);
+ pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
bno = be64_to_cpu(*pp);
XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0);
if (bp_release) {
@@ -6280,13 +6342,13 @@ xfs_bmap_check_leaf_extents(
xfs_extnum_t num_recs;
- num_recs = be16_to_cpu(block->bb_numrecs);
+ num_recs = xfs_btree_get_numrecs(block);
/*
* Read-ahead the next leaf block, if any.
*/
- nextbno = be64_to_cpu(block->bb_rightsib);
+ nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
/*
* Check all the extents to make sure they are OK.
@@ -6294,13 +6356,17 @@ xfs_bmap_check_leaf_extents(
* conform with the first entry in this one.
*/
- ep = XFS_BTREE_REC_ADDR(xfs_bmbt, block, 1);
+ ep = XFS_BMBT_REC_ADDR(mp, block, 1);
if (i) {
- xfs_btree_check_rec(XFS_BTNUM_BMAP, &last, ep);
+ ASSERT(xfs_bmbt_disk_get_startoff(&last) +
+ xfs_bmbt_disk_get_blockcount(&last) <=
+ xfs_bmbt_disk_get_startoff(ep));
}
for (j = 1; j < num_recs; j++) {
- nextp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, j + 1);
- xfs_btree_check_rec(XFS_BTNUM_BMAP, ep, nextp);
+ nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1);
+ ASSERT(xfs_bmbt_disk_get_startoff(ep) +
+ xfs_bmbt_disk_get_blockcount(ep) <=
+ xfs_bmbt_disk_get_startoff(nextp));
ep = nextp;
}
@@ -6326,7 +6392,7 @@ xfs_bmap_check_leaf_extents(
if (!bp && (error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
XFS_BMAP_BTREE_REF)))
goto error_norelse;
- block = XFS_BUF_TO_BMBT_BLOCK(bp);
+ block = XFS_BUF_TO_BLOCK(bp);
}
if (bp_release) {
bp_release = 0;
@@ -6356,7 +6422,7 @@ xfs_bmap_count_blocks(
int whichfork, /* data or attr fork */
int *count) /* out: count of blocks */
{
- xfs_bmbt_block_t *block; /* current btree block */
+ struct xfs_btree_block *block; /* current btree block */
xfs_fsblock_t bno; /* block # of "block" */
xfs_ifork_t *ifp; /* fork structure */
int level; /* btree level, for checking */
@@ -6379,7 +6445,7 @@ xfs_bmap_count_blocks(
block = ifp->if_broot;
level = be16_to_cpu(block->bb_level);
ASSERT(level > 0);
- pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes);
+ pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
bno = be64_to_cpu(*pp);
ASSERT(bno != NULLDFSBNO);
ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
@@ -6413,29 +6479,29 @@ xfs_bmap_count_tree(
__be64 *pp;
xfs_fsblock_t bno = blockno;
xfs_fsblock_t nextbno;
- xfs_bmbt_block_t *block, *nextblock;
+ struct xfs_btree_block *block, *nextblock;
int numrecs;
if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF)))
return error;
*count += 1;
- block = XFS_BUF_TO_BMBT_BLOCK(bp);
+ block = XFS_BUF_TO_BLOCK(bp);
if (--level) {
/* Not at node above leafs, count this level of nodes */
- nextbno = be64_to_cpu(block->bb_rightsib);
+ nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
while (nextbno != NULLFSBLOCK) {
if ((error = xfs_btree_read_bufl(mp, tp, nextbno,
0, &nbp, XFS_BMAP_BTREE_REF)))
return error;
*count += 1;
- nextblock = XFS_BUF_TO_BMBT_BLOCK(nbp);
- nextbno = be64_to_cpu(nextblock->bb_rightsib);
+ nextblock = XFS_BUF_TO_BLOCK(nbp);
+ nextbno = be64_to_cpu(nextblock->bb_u.l.bb_rightsib);
xfs_trans_brelse(tp, nbp);
}
/* Dive to the next level */
- pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]);
+ pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
bno = be64_to_cpu(*pp);
if (unlikely((error =
xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) {
@@ -6448,9 +6514,9 @@ xfs_bmap_count_tree(
} else {
/* count all level 1 nodes and their leaves */
for (;;) {
- nextbno = be64_to_cpu(block->bb_rightsib);
+ nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
numrecs = be16_to_cpu(block->bb_numrecs);
- xfs_bmap_disk_count_leaves(0, block, numrecs, count);
+ xfs_bmap_disk_count_leaves(mp, block, numrecs, count);
xfs_trans_brelse(tp, bp);
if (nextbno == NULLFSBLOCK)
break;
@@ -6459,7 +6525,7 @@ xfs_bmap_count_tree(
XFS_BMAP_BTREE_REF)))
return error;
*count += 1;
- block = XFS_BUF_TO_BMBT_BLOCK(bp);
+ block = XFS_BUF_TO_BLOCK(bp);
}
}
return 0;
@@ -6489,8 +6555,8 @@ xfs_bmap_count_leaves(
*/
STATIC void
xfs_bmap_disk_count_leaves(
- xfs_extnum_t idx,
- xfs_bmbt_block_t *block,
+ struct xfs_mount *mp,
+ struct xfs_btree_block *block,
int numrecs,
int *count)
{
@@ -6498,7 +6564,7 @@ xfs_bmap_disk_count_leaves(
xfs_bmbt_rec_t *frp;
for (b = 1; b <= numrecs; b++) {
- frp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, idx + b);
+ frp = XFS_BMBT_REC_ADDR(mp, block, b);
*count += xfs_bmbt_disk_get_blockcount(frp);
}
}
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 9f3e3a836d15..7c9d12cd7a47 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -137,9 +137,7 @@ typedef struct xfs_bmalloca {
char conv; /* overwriting unwritten extents */
} xfs_bmalloca_t;
-#ifdef __KERNEL__
-
-#if defined(XFS_BMAP_TRACE)
+#if defined(__KERNEL__) && defined(XFS_BMAP_TRACE)
/*
* Trace operations for bmap extent tracing
*/
@@ -163,9 +161,12 @@ xfs_bmap_trace_exlist(
int whichfork); /* data or attr fork */
#define XFS_BMAP_TRACE_EXLIST(ip,c,w) \
xfs_bmap_trace_exlist(__func__,ip,c,w)
-#else
+
+#else /* __KERNEL__ && XFS_BMAP_TRACE */
+
#define XFS_BMAP_TRACE_EXLIST(ip,c,w)
-#endif
+
+#endif /* __KERNEL__ && XFS_BMAP_TRACE */
/*
* Convert inode from non-attributed to attributed.
@@ -206,20 +207,6 @@ xfs_bmap_compute_maxlevels(
int whichfork); /* data or attr fork */
/*
- * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi
- * caller. Frees all the extents that need freeing, which must be done
- * last due to locking considerations.
- *
- * Return 1 if the given transaction was committed and a new one allocated,
- * and 0 otherwise.
- */
-int /* error */
-xfs_bmap_finish(
- struct xfs_trans **tp, /* transaction pointer addr */
- xfs_bmap_free_t *flist, /* i/o: list extents to free */
- int *committed); /* xact committed or not */
-
-/*
* Returns the file-relative block number of the first unused block in the file.
* This is the lowest-address hole if the file has holes, else the first block
* past the end of file.
@@ -344,6 +331,32 @@ xfs_bunmapi(
int *done); /* set if not done yet */
/*
+ * Check an extent list, which has just been read, for
+ * any bit in the extent flag field.
+ */
+int
+xfs_check_nostate_extents(
+ struct xfs_ifork *ifp,
+ xfs_extnum_t idx,
+ xfs_extnum_t num);
+
+#ifdef __KERNEL__
+
+/*
+ * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi
+ * caller. Frees all the extents that need freeing, which must be done
+ * last due to locking considerations.
+ *
+ * Return 1 if the given transaction was committed and a new one allocated,
+ * and 0 otherwise.
+ */
+int /* error */
+xfs_bmap_finish(
+ struct xfs_trans **tp, /* transaction pointer addr */
+ xfs_bmap_free_t *flist, /* i/o: list extents to free */
+ int *committed); /* xact committed or not */
+
+/*
* Fcntl interface to xfs_bmapi.
*/
int /* error code */
@@ -375,16 +388,6 @@ xfs_bmap_count_blocks(
int *count);
/*
- * Check an extent list, which has just been read, for
- * any bit in the extent flag field.
- */
-int
-xfs_check_nostate_extents(
- struct xfs_ifork *ifp,
- xfs_extnum_t idx,
- xfs_extnum_t num);
-
-/*
* Search the extent records for the entry containing block bno.
* If bno lies in a hole, point to the next entry. If bno lies
* past eof, *eofp will be set, and *prevp will contain the last
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 23efad29a5cd..e46e02b8e277 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -37,1406 +37,13 @@
#include "xfs_inode_item.h"
#include "xfs_alloc.h"
#include "xfs_btree.h"
+#include "xfs_btree_trace.h"
#include "xfs_ialloc.h"
#include "xfs_itable.h"
#include "xfs_bmap.h"
#include "xfs_error.h"
#include "xfs_quota.h"
-#if defined(XFS_BMBT_TRACE)
-ktrace_t *xfs_bmbt_trace_buf;
-#endif
-
-/*
- * Prototypes for internal btree functions.
- */
-
-
-STATIC int xfs_bmbt_killroot(xfs_btree_cur_t *);
-STATIC void xfs_bmbt_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int);
-STATIC void xfs_bmbt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
-STATIC int xfs_bmbt_lshift(xfs_btree_cur_t *, int, int *);
-STATIC int xfs_bmbt_rshift(xfs_btree_cur_t *, int, int *);
-STATIC int xfs_bmbt_split(xfs_btree_cur_t *, int, xfs_fsblock_t *,
- __uint64_t *, xfs_btree_cur_t **, int *);
-STATIC int xfs_bmbt_updkey(xfs_btree_cur_t *, xfs_bmbt_key_t *, int);
-
-
-#if defined(XFS_BMBT_TRACE)
-
-static char ARGS[] = "args";
-static char ENTRY[] = "entry";
-static char ERROR[] = "error";
-#undef EXIT
-static char EXIT[] = "exit";
-
-/*
- * Add a trace buffer entry for the arguments given to the routine,
- * generic form.
- */
-STATIC void
-xfs_bmbt_trace_enter(
- const char *func,
- xfs_btree_cur_t *cur,
- char *s,
- int type,
- int line,
- __psunsigned_t a0,
- __psunsigned_t a1,
- __psunsigned_t a2,
- __psunsigned_t a3,
- __psunsigned_t a4,
- __psunsigned_t a5,
- __psunsigned_t a6,
- __psunsigned_t a7,
- __psunsigned_t a8,
- __psunsigned_t a9,
- __psunsigned_t a10)
-{
- xfs_inode_t *ip;
- int whichfork;
-
- ip = cur->bc_private.b.ip;
- whichfork = cur->bc_private.b.whichfork;
- ktrace_enter(xfs_bmbt_trace_buf,
- (void *)((__psint_t)type | (whichfork << 8) | (line << 16)),
- (void *)func, (void *)s, (void *)ip, (void *)cur,
- (void *)a0, (void *)a1, (void *)a2, (void *)a3,
- (void *)a4, (void *)a5, (void *)a6, (void *)a7,
- (void *)a8, (void *)a9, (void *)a10);
- ASSERT(ip->i_btrace);
- ktrace_enter(ip->i_btrace,
- (void *)((__psint_t)type | (whichfork << 8) | (line << 16)),
- (void *)func, (void *)s, (void *)ip, (void *)cur,
- (void *)a0, (void *)a1, (void *)a2, (void *)a3,
- (void *)a4, (void *)a5, (void *)a6, (void *)a7,
- (void *)a8, (void *)a9, (void *)a10);
-}
-/*
- * Add a trace buffer entry for arguments, for a buffer & 1 integer arg.
- */
-STATIC void
-xfs_bmbt_trace_argbi(
- const char *func,
- xfs_btree_cur_t *cur,
- xfs_buf_t *b,
- int i,
- int line)
-{
- xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGBI, line,
- (__psunsigned_t)b, i, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0);
-}
-
-/*
- * Add a trace buffer entry for arguments, for a buffer & 2 integer args.
- */
-STATIC void
-xfs_bmbt_trace_argbii(
- const char *func,
- xfs_btree_cur_t *cur,
- xfs_buf_t *b,
- int i0,
- int i1,
- int line)
-{
- xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGBII, line,
- (__psunsigned_t)b, i0, i1, 0,
- 0, 0, 0, 0,
- 0, 0, 0);
-}
-
-/*
- * Add a trace buffer entry for arguments, for 3 block-length args
- * and an integer arg.
- */
-STATIC void
-xfs_bmbt_trace_argfffi(
- const char *func,
- xfs_btree_cur_t *cur,
- xfs_dfiloff_t o,
- xfs_dfsbno_t b,
- xfs_dfilblks_t i,
- int j,
- int line)
-{
- xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGFFFI, line,
- o >> 32, (int)o, b >> 32, (int)b,
- i >> 32, (int)i, (int)j, 0,
- 0, 0, 0);
-}
-
-/*
- * Add a trace buffer entry for arguments, for one integer arg.
- */
-STATIC void
-xfs_bmbt_trace_argi(
- const char *func,
- xfs_btree_cur_t *cur,
- int i,
- int line)
-{
- xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGI, line,
- i, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0);
-}
-
-/*
- * Add a trace buffer entry for arguments, for int, fsblock, key.
- */
-STATIC void
-xfs_bmbt_trace_argifk(
- const char *func,
- xfs_btree_cur_t *cur,
- int i,
- xfs_fsblock_t f,
- xfs_dfiloff_t o,
- int line)
-{
- xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGIFK, line,
- i, (xfs_dfsbno_t)f >> 32, (int)f, o >> 32,
- (int)o, 0, 0, 0,
- 0, 0, 0);
-}
-
-/*
- * Add a trace buffer entry for arguments, for int, fsblock, rec.
- */
-STATIC void
-xfs_bmbt_trace_argifr(
- const char *func,
- xfs_btree_cur_t *cur,
- int i,
- xfs_fsblock_t f,
- xfs_bmbt_rec_t *r,
- int line)
-{
- xfs_dfsbno_t b;
- xfs_dfilblks_t c;
- xfs_dfsbno_t d;
- xfs_dfiloff_t o;
- xfs_bmbt_irec_t s;
-
- d = (xfs_dfsbno_t)f;
- xfs_bmbt_disk_get_all(r, &s);
- o = (xfs_dfiloff_t)s.br_startoff;
- b = (xfs_dfsbno_t)s.br_startblock;
- c = s.br_blockcount;
- xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGIFR, line,
- i, d >> 32, (int)d, o >> 32,
- (int)o, b >> 32, (int)b, c >> 32,
- (int)c, 0, 0);
-}
-
-/*
- * Add a trace buffer entry for arguments, for int, key.
- */
-STATIC void
-xfs_bmbt_trace_argik(
- const char *func,
- xfs_btree_cur_t *cur,
- int i,
- xfs_bmbt_key_t *k,
- int line)
-{
- xfs_dfiloff_t o;
-
- o = be64_to_cpu(k->br_startoff);
- xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGIFK, line,
- i, o >> 32, (int)o, 0,
- 0, 0, 0, 0,
- 0, 0, 0);
-}
-
-/*
- * Add a trace buffer entry for the cursor/operation.
- */
-STATIC void
-xfs_bmbt_trace_cursor(
- const char *func,
- xfs_btree_cur_t *cur,
- char *s,
- int line)
-{
- xfs_bmbt_rec_host_t r;
-
- xfs_bmbt_set_all(&r, &cur->bc_rec.b);
- xfs_bmbt_trace_enter(func, cur, s, XFS_BMBT_KTRACE_CUR, line,
- (cur->bc_nlevels << 24) | (cur->bc_private.b.flags << 16) |
- cur->bc_private.b.allocated,
- r.l0 >> 32, (int)r.l0,
- r.l1 >> 32, (int)r.l1,
- (unsigned long)cur->bc_bufs[0], (unsigned long)cur->bc_bufs[1],
- (unsigned long)cur->bc_bufs[2], (unsigned long)cur->bc_bufs[3],
- (cur->bc_ptrs[0] << 16) | cur->bc_ptrs[1],
- (cur->bc_ptrs[2] << 16) | cur->bc_ptrs[3]);
-}
-
-#define XFS_BMBT_TRACE_ARGBI(c,b,i) \
- xfs_bmbt_trace_argbi(__func__, c, b, i, __LINE__)
-#define XFS_BMBT_TRACE_ARGBII(c,b,i,j) \
- xfs_bmbt_trace_argbii(__func__, c, b, i, j, __LINE__)
-#define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j) \
- xfs_bmbt_trace_argfffi(__func__, c, o, b, i, j, __LINE__)
-#define XFS_BMBT_TRACE_ARGI(c,i) \
- xfs_bmbt_trace_argi(__func__, c, i, __LINE__)
-#define XFS_BMBT_TRACE_ARGIFK(c,i,f,s) \
- xfs_bmbt_trace_argifk(__func__, c, i, f, s, __LINE__)
-#define XFS_BMBT_TRACE_ARGIFR(c,i,f,r) \
- xfs_bmbt_trace_argifr(__func__, c, i, f, r, __LINE__)
-#define XFS_BMBT_TRACE_ARGIK(c,i,k) \
- xfs_bmbt_trace_argik(__func__, c, i, k, __LINE__)
-#define XFS_BMBT_TRACE_CURSOR(c,s) \
- xfs_bmbt_trace_cursor(__func__, c, s, __LINE__)
-#else
-#define XFS_BMBT_TRACE_ARGBI(c,b,i)
-#define XFS_BMBT_TRACE_ARGBII(c,b,i,j)
-#define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j)
-#define XFS_BMBT_TRACE_ARGI(c,i)
-#define XFS_BMBT_TRACE_ARGIFK(c,i,f,s)
-#define XFS_BMBT_TRACE_ARGIFR(c,i,f,r)
-#define XFS_BMBT_TRACE_ARGIK(c,i,k)
-#define XFS_BMBT_TRACE_CURSOR(c,s)
-#endif /* XFS_BMBT_TRACE */
-
-
-/*
- * Internal functions.
- */
-
-/*
- * Delete record pointed to by cur/level.
- */
-STATIC int /* error */
-xfs_bmbt_delrec(
- xfs_btree_cur_t *cur,
- int level,
- int *stat) /* success/failure */
-{
- xfs_bmbt_block_t *block; /* bmap btree block */
- xfs_fsblock_t bno; /* fs-relative block number */
- xfs_buf_t *bp; /* buffer for block */
- int error; /* error return value */
- int i; /* loop counter */
- int j; /* temp state */
- xfs_bmbt_key_t key; /* bmap btree key */
- xfs_bmbt_key_t *kp=NULL; /* pointer to bmap btree key */
- xfs_fsblock_t lbno; /* left sibling block number */
- xfs_buf_t *lbp; /* left buffer pointer */
- xfs_bmbt_block_t *left; /* left btree block */
- xfs_bmbt_key_t *lkp; /* left btree key */
- xfs_bmbt_ptr_t *lpp; /* left address pointer */
- int lrecs=0; /* left record count */
- xfs_bmbt_rec_t *lrp; /* left record pointer */
- xfs_mount_t *mp; /* file system mount point */
- xfs_bmbt_ptr_t *pp; /* pointer to bmap block addr */
- int ptr; /* key/record index */
- xfs_fsblock_t rbno; /* right sibling block number */
- xfs_buf_t *rbp; /* right buffer pointer */
- xfs_bmbt_block_t *right; /* right btree block */
- xfs_bmbt_key_t *rkp; /* right btree key */
- xfs_bmbt_rec_t *rp; /* pointer to bmap btree rec */
- xfs_bmbt_ptr_t *rpp; /* right address pointer */
- xfs_bmbt_block_t *rrblock; /* right-right btree block */
- xfs_buf_t *rrbp; /* right-right buffer pointer */
- int rrecs=0; /* right record count */
- xfs_bmbt_rec_t *rrp; /* right record pointer */
- xfs_btree_cur_t *tcur; /* temporary btree cursor */
- int numrecs; /* temporary numrec count */
- int numlrecs, numrrecs;
-
- XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
- XFS_BMBT_TRACE_ARGI(cur, level);
- ptr = cur->bc_ptrs[level];
- tcur = NULL;
- if (ptr == 0) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- return 0;
- }
- block = xfs_bmbt_get_block(cur, level, &bp);
- numrecs = be16_to_cpu(block->bb_numrecs);
-#ifdef DEBUG
- if ((error = xfs_btree_check_lblock(cur, block, level, bp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
-#endif
- if (ptr > numrecs) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- return 0;
- }
- XFS_STATS_INC(xs_bmbt_delrec);
- if (level > 0) {
- kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
- pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
-#ifdef DEBUG
- for (i = ptr; i < numrecs; i++) {
- if ((error = xfs_btree_check_lptr_disk(cur, pp[i], level))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- }
-#endif
- if (ptr < numrecs) {
- memmove(&kp[ptr - 1], &kp[ptr],
- (numrecs - ptr) * sizeof(*kp));
- memmove(&pp[ptr - 1], &pp[ptr],
- (numrecs - ptr) * sizeof(*pp));
- xfs_bmbt_log_ptrs(cur, bp, ptr, numrecs - 1);
- xfs_bmbt_log_keys(cur, bp, ptr, numrecs - 1);
- }
- } else {
- rp = XFS_BMAP_REC_IADDR(block, 1, cur);
- if (ptr < numrecs) {
- memmove(&rp[ptr - 1], &rp[ptr],
- (numrecs - ptr) * sizeof(*rp));
- xfs_bmbt_log_recs(cur, bp, ptr, numrecs - 1);
- }
- if (ptr == 1) {
- key.br_startoff =
- cpu_to_be64(xfs_bmbt_disk_get_startoff(rp));
- kp = &key;
- }
- }
- numrecs--;
- block->bb_numrecs = cpu_to_be16(numrecs);
- xfs_bmbt_log_block(cur, bp, XFS_BB_NUMRECS);
- /*
- * We're at the root level.
- * First, shrink the root block in-memory.
- * Try to get rid of the next level down.
- * If we can't then there's nothing left to do.
- */
- if (level == cur->bc_nlevels - 1) {
- xfs_iroot_realloc(cur->bc_private.b.ip, -1,
- cur->bc_private.b.whichfork);
- if ((error = xfs_bmbt_killroot(cur))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &j))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 1;
- return 0;
- }
- if (ptr == 1 && (error = xfs_bmbt_updkey(cur, kp, level + 1))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- if (numrecs >= XFS_BMAP_BLOCK_IMINRECS(level, cur)) {
- if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &j))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 1;
- return 0;
- }
- rbno = be64_to_cpu(block->bb_rightsib);
- lbno = be64_to_cpu(block->bb_leftsib);
- /*
- * One child of root, need to get a chance to copy its contents
- * into the root and delete it. Can't go up to next level,
- * there's nothing to delete there.
- */
- if (lbno == NULLFSBLOCK && rbno == NULLFSBLOCK &&
- level == cur->bc_nlevels - 2) {
- if ((error = xfs_bmbt_killroot(cur))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &i))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 1;
- return 0;
- }
- ASSERT(rbno != NULLFSBLOCK || lbno != NULLFSBLOCK);
- if ((error = xfs_btree_dup_cursor(cur, &tcur))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- bno = NULLFSBLOCK;
- if (rbno != NULLFSBLOCK) {
- i = xfs_btree_lastrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if ((error = xfs_bmbt_increment(tcur, level, &i))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- i = xfs_btree_lastrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- rbp = tcur->bc_bufs[level];
- right = XFS_BUF_TO_BMBT_BLOCK(rbp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_lblock(cur, right, level, rbp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
-#endif
- bno = be64_to_cpu(right->bb_leftsib);
- if (be16_to_cpu(right->bb_numrecs) - 1 >=
- XFS_BMAP_BLOCK_IMINRECS(level, cur)) {
- if ((error = xfs_bmbt_lshift(tcur, level, &i))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- if (i) {
- ASSERT(be16_to_cpu(block->bb_numrecs) >=
- XFS_BMAP_BLOCK_IMINRECS(level, tcur));
- xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
- tcur = NULL;
- if (level > 0) {
- if ((error = xfs_bmbt_decrement(cur,
- level, &i))) {
- XFS_BMBT_TRACE_CURSOR(cur,
- ERROR);
- goto error0;
- }
- }
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 1;
- return 0;
- }
- }
- rrecs = be16_to_cpu(right->bb_numrecs);
- if (lbno != NULLFSBLOCK) {
- i = xfs_btree_firstrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if ((error = xfs_bmbt_decrement(tcur, level, &i))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- }
- }
- if (lbno != NULLFSBLOCK) {
- i = xfs_btree_firstrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- /*
- * decrement to last in block
- */
- if ((error = xfs_bmbt_decrement(tcur, level, &i))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- i = xfs_btree_firstrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- lbp = tcur->bc_bufs[level];
- left = XFS_BUF_TO_BMBT_BLOCK(lbp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_lblock(cur, left, level, lbp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
-#endif
- bno = be64_to_cpu(left->bb_rightsib);
- if (be16_to_cpu(left->bb_numrecs) - 1 >=
- XFS_BMAP_BLOCK_IMINRECS(level, cur)) {
- if ((error = xfs_bmbt_rshift(tcur, level, &i))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- if (i) {
- ASSERT(be16_to_cpu(block->bb_numrecs) >=
- XFS_BMAP_BLOCK_IMINRECS(level, tcur));
- xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
- tcur = NULL;
- if (level == 0)
- cur->bc_ptrs[0]++;
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 1;
- return 0;
- }
- }
- lrecs = be16_to_cpu(left->bb_numrecs);
- }
- xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
- tcur = NULL;
- mp = cur->bc_mp;
- ASSERT(bno != NULLFSBLOCK);
- if (lbno != NULLFSBLOCK &&
- lrecs + be16_to_cpu(block->bb_numrecs) <= XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
- rbno = bno;
- right = block;
- rbp = bp;
- if ((error = xfs_btree_read_bufl(mp, cur->bc_tp, lbno, 0, &lbp,
- XFS_BMAP_BTREE_REF))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- left = XFS_BUF_TO_BMBT_BLOCK(lbp);
- if ((error = xfs_btree_check_lblock(cur, left, level, lbp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- } else if (rbno != NULLFSBLOCK &&
- rrecs + be16_to_cpu(block->bb_numrecs) <=
- XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
- lbno = bno;
- left = block;
- lbp = bp;
- if ((error = xfs_btree_read_bufl(mp, cur->bc_tp, rbno, 0, &rbp,
- XFS_BMAP_BTREE_REF))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- right = XFS_BUF_TO_BMBT_BLOCK(rbp);
- if ((error = xfs_btree_check_lblock(cur, right, level, rbp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- lrecs = be16_to_cpu(left->bb_numrecs);
- } else {
- if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &i))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 1;
- return 0;
- }
- numlrecs = be16_to_cpu(left->bb_numrecs);
- numrrecs = be16_to_cpu(right->bb_numrecs);
- if (level > 0) {
- lkp = XFS_BMAP_KEY_IADDR(left, numlrecs + 1, cur);
- lpp = XFS_BMAP_PTR_IADDR(left, numlrecs + 1, cur);
- rkp = XFS_BMAP_KEY_IADDR(right, 1, cur);
- rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
-#ifdef DEBUG
- for (i = 0; i < numrrecs; i++) {
- if ((error = xfs_btree_check_lptr_disk(cur, rpp[i], level))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- }
-#endif
- memcpy(lkp, rkp, numrrecs * sizeof(*lkp));
- memcpy(lpp, rpp, numrrecs * sizeof(*lpp));
- xfs_bmbt_log_keys(cur, lbp, numlrecs + 1, numlrecs + numrrecs);
- xfs_bmbt_log_ptrs(cur, lbp, numlrecs + 1, numlrecs + numrrecs);
- } else {
- lrp = XFS_BMAP_REC_IADDR(left, numlrecs + 1, cur);
- rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
- memcpy(lrp, rrp, numrrecs * sizeof(*lrp));
- xfs_bmbt_log_recs(cur, lbp, numlrecs + 1, numlrecs + numrrecs);
- }
- be16_add_cpu(&left->bb_numrecs, numrrecs);
- left->bb_rightsib = right->bb_rightsib;
- xfs_bmbt_log_block(cur, lbp, XFS_BB_RIGHTSIB | XFS_BB_NUMRECS);
- if (be64_to_cpu(left->bb_rightsib) != NULLDFSBNO) {
- if ((error = xfs_btree_read_bufl(mp, cur->bc_tp,
- be64_to_cpu(left->bb_rightsib),
- 0, &rrbp, XFS_BMAP_BTREE_REF))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- rrblock = XFS_BUF_TO_BMBT_BLOCK(rrbp);
- if ((error = xfs_btree_check_lblock(cur, rrblock, level, rrbp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- rrblock->bb_leftsib = cpu_to_be64(lbno);
- xfs_bmbt_log_block(cur, rrbp, XFS_BB_LEFTSIB);
- }
- xfs_bmap_add_free(XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(rbp)), 1,
- cur->bc_private.b.flist, mp);
- cur->bc_private.b.ip->i_d.di_nblocks--;
- xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, XFS_ILOG_CORE);
- XFS_TRANS_MOD_DQUOT_BYINO(mp, cur->bc_tp, cur->bc_private.b.ip,
- XFS_TRANS_DQ_BCOUNT, -1L);
- xfs_trans_binval(cur->bc_tp, rbp);
- if (bp != lbp) {
- cur->bc_bufs[level] = lbp;
- cur->bc_ptrs[level] += lrecs;
- cur->bc_ra[level] = 0;
- } else if ((error = xfs_bmbt_increment(cur, level + 1, &i))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- if (level > 0)
- cur->bc_ptrs[level]--;
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 2;
- return 0;
-
-error0:
- if (tcur)
- xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
- return error;
-}
-
-/*
- * Insert one record/level. Return information to the caller
- * allowing the next level up to proceed if necessary.
- */
-STATIC int /* error */
-xfs_bmbt_insrec(
- xfs_btree_cur_t *cur,
- int level,
- xfs_fsblock_t *bnop,
- xfs_bmbt_rec_t *recp,
- xfs_btree_cur_t **curp,
- int *stat) /* no-go/done/continue */
-{
- xfs_bmbt_block_t *block; /* bmap btree block */
- xfs_buf_t *bp; /* buffer for block */
- int error; /* error return value */
- int i; /* loop index */
- xfs_bmbt_key_t key; /* bmap btree key */
- xfs_bmbt_key_t *kp=NULL; /* pointer to bmap btree key */
- int logflags; /* inode logging flags */
- xfs_fsblock_t nbno; /* new block number */
- struct xfs_btree_cur *ncur; /* new btree cursor */
- __uint64_t startoff; /* new btree key value */
- xfs_bmbt_rec_t nrec; /* new record count */
- int optr; /* old key/record index */
- xfs_bmbt_ptr_t *pp; /* pointer to bmap block addr */
- int ptr; /* key/record index */
- xfs_bmbt_rec_t *rp=NULL; /* pointer to bmap btree rec */
- int numrecs;
-
- ASSERT(level < cur->bc_nlevels);
- XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
- XFS_BMBT_TRACE_ARGIFR(cur, level, *bnop, recp);
- ncur = NULL;
- key.br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(recp));
- optr = ptr = cur->bc_ptrs[level];
- if (ptr == 0) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- return 0;
- }
- XFS_STATS_INC(xs_bmbt_insrec);
- block = xfs_bmbt_get_block(cur, level, &bp);
- numrecs = be16_to_cpu(block->bb_numrecs);
-#ifdef DEBUG
- if ((error = xfs_btree_check_lblock(cur, block, level, bp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- if (ptr <= numrecs) {
- if (level == 0) {
- rp = XFS_BMAP_REC_IADDR(block, ptr, cur);
- xfs_btree_check_rec(XFS_BTNUM_BMAP, recp, rp);
- } else {
- kp = XFS_BMAP_KEY_IADDR(block, ptr, cur);
- xfs_btree_check_key(XFS_BTNUM_BMAP, &key, kp);
- }
- }
-#endif
- nbno = NULLFSBLOCK;
- if (numrecs == XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
- if (numrecs < XFS_BMAP_BLOCK_DMAXRECS(level, cur)) {
- /*
- * A root block, that can be made bigger.
- */
- xfs_iroot_realloc(cur->bc_private.b.ip, 1,
- cur->bc_private.b.whichfork);
- block = xfs_bmbt_get_block(cur, level, &bp);
- } else if (level == cur->bc_nlevels - 1) {
- if ((error = xfs_bmbt_newroot(cur, &logflags, stat)) ||
- *stat == 0) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
- logflags);
- block = xfs_bmbt_get_block(cur, level, &bp);
- } else {
- if ((error = xfs_bmbt_rshift(cur, level, &i))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- if (i) {
- /* nothing */
- } else {
- if ((error = xfs_bmbt_lshift(cur, level, &i))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- if (i) {
- optr = ptr = cur->bc_ptrs[level];
- } else {
- if ((error = xfs_bmbt_split(cur, level,
- &nbno, &startoff, &ncur,
- &i))) {
- XFS_BMBT_TRACE_CURSOR(cur,
- ERROR);
- return error;
- }
- if (i) {
- block = xfs_bmbt_get_block(
- cur, level, &bp);
-#ifdef DEBUG
- if ((error =
- xfs_btree_check_lblock(cur,
- block, level, bp))) {
- XFS_BMBT_TRACE_CURSOR(
- cur, ERROR);
- return error;
- }
-#endif
- ptr = cur->bc_ptrs[level];
- xfs_bmbt_disk_set_allf(&nrec,
- startoff, 0, 0,
- XFS_EXT_NORM);
- } else {
- XFS_BMBT_TRACE_CURSOR(cur,
- EXIT);
- *stat = 0;
- return 0;
- }
- }
- }
- }
- }
- numrecs = be16_to_cpu(block->bb_numrecs);
- if (level > 0) {
- kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
- pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
-#ifdef DEBUG
- for (i = numrecs; i >= ptr; i--) {
- if ((error = xfs_btree_check_lptr_disk(cur, pp[i - 1],
- level))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- }
-#endif
- memmove(&kp[ptr], &kp[ptr - 1],
- (numrecs - ptr + 1) * sizeof(*kp));
- memmove(&pp[ptr], &pp[ptr - 1],
- (numrecs - ptr + 1) * sizeof(*pp));
-#ifdef DEBUG
- if ((error = xfs_btree_check_lptr(cur, *bnop, level))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
-#endif
- kp[ptr - 1] = key;
- pp[ptr - 1] = cpu_to_be64(*bnop);
- numrecs++;
- block->bb_numrecs = cpu_to_be16(numrecs);
- xfs_bmbt_log_keys(cur, bp, ptr, numrecs);
- xfs_bmbt_log_ptrs(cur, bp, ptr, numrecs);
- } else {
- rp = XFS_BMAP_REC_IADDR(block, 1, cur);
- memmove(&rp[ptr], &rp[ptr - 1],
- (numrecs - ptr + 1) * sizeof(*rp));
- rp[ptr - 1] = *recp;
- numrecs++;
- block->bb_numrecs = cpu_to_be16(numrecs);
- xfs_bmbt_log_recs(cur, bp, ptr, numrecs);
- }
- xfs_bmbt_log_block(cur, bp, XFS_BB_NUMRECS);
-#ifdef DEBUG
- if (ptr < numrecs) {
- if (level == 0)
- xfs_btree_check_rec(XFS_BTNUM_BMAP, rp + ptr - 1,
- rp + ptr);
- else
- xfs_btree_check_key(XFS_BTNUM_BMAP, kp + ptr - 1,
- kp + ptr);
- }
-#endif
- if (optr == 1 && (error = xfs_bmbt_updkey(cur, &key, level + 1))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- *bnop = nbno;
- if (nbno != NULLFSBLOCK) {
- *recp = nrec;
- *curp = ncur;
- }
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 1;
- return 0;
-}
-
-STATIC int
-xfs_bmbt_killroot(
- xfs_btree_cur_t *cur)
-{
- xfs_bmbt_block_t *block;
- xfs_bmbt_block_t *cblock;
- xfs_buf_t *cbp;
- xfs_bmbt_key_t *ckp;
- xfs_bmbt_ptr_t *cpp;
-#ifdef DEBUG
- int error;
-#endif
- int i;
- xfs_bmbt_key_t *kp;
- xfs_inode_t *ip;
- xfs_ifork_t *ifp;
- int level;
- xfs_bmbt_ptr_t *pp;
-
- XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
- level = cur->bc_nlevels - 1;
- ASSERT(level >= 1);
- /*
- * Don't deal with the root block needs to be a leaf case.
- * We're just going to turn the thing back into extents anyway.
- */
- if (level == 1) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- return 0;
- }
- block = xfs_bmbt_get_block(cur, level, &cbp);
- /*
- * Give up if the root has multiple children.
- */
- if (be16_to_cpu(block->bb_numrecs) != 1) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- return 0;
- }
- /*
- * Only do this if the next level will fit.
- * Then the data must be copied up to the inode,
- * instead of freeing the root you free the next level.
- */
- cbp = cur->bc_bufs[level - 1];
- cblock = XFS_BUF_TO_BMBT_BLOCK(cbp);
- if (be16_to_cpu(cblock->bb_numrecs) > XFS_BMAP_BLOCK_DMAXRECS(level, cur)) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- return 0;
- }
- ASSERT(be64_to_cpu(cblock->bb_leftsib) == NULLDFSBNO);
- ASSERT(be64_to_cpu(cblock->bb_rightsib) == NULLDFSBNO);
- ip = cur->bc_private.b.ip;
- ifp = XFS_IFORK_PTR(ip, cur->bc_private.b.whichfork);
- ASSERT(XFS_BMAP_BLOCK_IMAXRECS(level, cur) ==
- XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes));
- i = (int)(be16_to_cpu(cblock->bb_numrecs) - XFS_BMAP_BLOCK_IMAXRECS(level, cur));
- if (i) {
- xfs_iroot_realloc(ip, i, cur->bc_private.b.whichfork);
- block = ifp->if_broot;
- }
- be16_add_cpu(&block->bb_numrecs, i);
- ASSERT(block->bb_numrecs == cblock->bb_numrecs);
- kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
- ckp = XFS_BMAP_KEY_IADDR(cblock, 1, cur);
- memcpy(kp, ckp, be16_to_cpu(block->bb_numrecs) * sizeof(*kp));
- pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
- cpp = XFS_BMAP_PTR_IADDR(cblock, 1, cur);
-#ifdef DEBUG
- for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
- if ((error = xfs_btree_check_lptr_disk(cur, cpp[i], level - 1))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- }
-#endif
- memcpy(pp, cpp, be16_to_cpu(block->bb_numrecs) * sizeof(*pp));
- xfs_bmap_add_free(XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(cbp)), 1,
- cur->bc_private.b.flist, cur->bc_mp);
- ip->i_d.di_nblocks--;
- XFS_TRANS_MOD_DQUOT_BYINO(cur->bc_mp, cur->bc_tp, ip,
- XFS_TRANS_DQ_BCOUNT, -1L);
- xfs_trans_binval(cur->bc_tp, cbp);
- cur->bc_bufs[level - 1] = NULL;
- be16_add_cpu(&block->bb_level, -1);
- xfs_trans_log_inode(cur->bc_tp, ip,
- XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork));
- cur->bc_nlevels--;
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- return 0;
-}
-
-/*
- * Log key values from the btree block.
- */
-STATIC void
-xfs_bmbt_log_keys(
- xfs_btree_cur_t *cur,
- xfs_buf_t *bp,
- int kfirst,
- int klast)
-{
- xfs_trans_t *tp;
-
- XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
- XFS_BMBT_TRACE_ARGBII(cur, bp, kfirst, klast);
- tp = cur->bc_tp;
- if (bp) {
- xfs_bmbt_block_t *block;
- int first;
- xfs_bmbt_key_t *kp;
- int last;
-
- block = XFS_BUF_TO_BMBT_BLOCK(bp);
- kp = XFS_BMAP_KEY_DADDR(block, 1, cur);
- first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block);
- last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block);
- xfs_trans_log_buf(tp, bp, first, last);
- } else {
- xfs_inode_t *ip;
-
- ip = cur->bc_private.b.ip;
- xfs_trans_log_inode(tp, ip,
- XFS_ILOG_FBROOT(cur->bc_private.b.whichfork));
- }
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
-}
-
-/*
- * Log pointer values from the btree block.
- */
-STATIC void
-xfs_bmbt_log_ptrs(
- xfs_btree_cur_t *cur,
- xfs_buf_t *bp,
- int pfirst,
- int plast)
-{
- xfs_trans_t *tp;
-
- XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
- XFS_BMBT_TRACE_ARGBII(cur, bp, pfirst, plast);
- tp = cur->bc_tp;
- if (bp) {
- xfs_bmbt_block_t *block;
- int first;
- int last;
- xfs_bmbt_ptr_t *pp;
-
- block = XFS_BUF_TO_BMBT_BLOCK(bp);
- pp = XFS_BMAP_PTR_DADDR(block, 1, cur);
- first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block);
- last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block);
- xfs_trans_log_buf(tp, bp, first, last);
- } else {
- xfs_inode_t *ip;
-
- ip = cur->bc_private.b.ip;
- xfs_trans_log_inode(tp, ip,
- XFS_ILOG_FBROOT(cur->bc_private.b.whichfork));
- }
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
-}
-
-/*
- * Lookup the record. The cursor is made to point to it, based on dir.
- */
-STATIC int /* error */
-xfs_bmbt_lookup(
- xfs_btree_cur_t *cur,
- xfs_lookup_t dir,
- int *stat) /* success/failure */
-{
- xfs_bmbt_block_t *block=NULL;
- xfs_buf_t *bp;
- xfs_daddr_t d;
- xfs_sfiloff_t diff;
- int error; /* error return value */
- xfs_fsblock_t fsbno=0;
- int high;
- int i;
- int keyno=0;
- xfs_bmbt_key_t *kkbase=NULL;
- xfs_bmbt_key_t *kkp;
- xfs_bmbt_rec_t *krbase=NULL;
- xfs_bmbt_rec_t *krp;
- int level;
- int low;
- xfs_mount_t *mp;
- xfs_bmbt_ptr_t *pp;
- xfs_bmbt_irec_t *rp;
- xfs_fileoff_t startoff;
- xfs_trans_t *tp;
-
- XFS_STATS_INC(xs_bmbt_lookup);
- XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
- XFS_BMBT_TRACE_ARGI(cur, (int)dir);
- tp = cur->bc_tp;
- mp = cur->bc_mp;
- rp = &cur->bc_rec.b;
- for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
- if (level < cur->bc_nlevels - 1) {
- d = XFS_FSB_TO_DADDR(mp, fsbno);
- bp = cur->bc_bufs[level];
- if (bp && XFS_BUF_ADDR(bp) != d)
- bp = NULL;
- if (!bp) {
- if ((error = xfs_btree_read_bufl(mp, tp, fsbno,
- 0, &bp, XFS_BMAP_BTREE_REF))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- xfs_btree_setbuf(cur, level, bp);
- block = XFS_BUF_TO_BMBT_BLOCK(bp);
- if ((error = xfs_btree_check_lblock(cur, block,
- level, bp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- } else
- block = XFS_BUF_TO_BMBT_BLOCK(bp);
- } else
- block = xfs_bmbt_get_block(cur, level, &bp);
- if (diff == 0)
- keyno = 1;
- else {
- if (level > 0)
- kkbase = XFS_BMAP_KEY_IADDR(block, 1, cur);
- else
- krbase = XFS_BMAP_REC_IADDR(block, 1, cur);
- low = 1;
- if (!(high = be16_to_cpu(block->bb_numrecs))) {
- ASSERT(level == 0);
- cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- return 0;
- }
- while (low <= high) {
- XFS_STATS_INC(xs_bmbt_compare);
- keyno = (low + high) >> 1;
- if (level > 0) {
- kkp = kkbase + keyno - 1;
- startoff = be64_to_cpu(kkp->br_startoff);
- } else {
- krp = krbase + keyno - 1;
- startoff = xfs_bmbt_disk_get_startoff(krp);
- }
- diff = (xfs_sfiloff_t)
- (startoff - rp->br_startoff);
- if (diff < 0)
- low = keyno + 1;
- else if (diff > 0)
- high = keyno - 1;
- else
- break;
- }
- }
- if (level > 0) {
- if (diff > 0 && --keyno < 1)
- keyno = 1;
- pp = XFS_BMAP_PTR_IADDR(block, keyno, cur);
- fsbno = be64_to_cpu(*pp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_lptr(cur, fsbno, level))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
-#endif
- cur->bc_ptrs[level] = keyno;
- }
- }
- if (dir != XFS_LOOKUP_LE && diff < 0) {
- keyno++;
- /*
- * If ge search and we went off the end of the block, but it's
- * not the last block, we're in the wrong block.
- */
- if (dir == XFS_LOOKUP_GE && keyno > be16_to_cpu(block->bb_numrecs) &&
- be64_to_cpu(block->bb_rightsib) != NULLDFSBNO) {
- cur->bc_ptrs[0] = keyno;
- if ((error = xfs_bmbt_increment(cur, 0, &i))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- XFS_WANT_CORRUPTED_RETURN(i == 1);
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 1;
- return 0;
- }
- }
- else if (dir == XFS_LOOKUP_LE && diff > 0)
- keyno--;
- cur->bc_ptrs[0] = keyno;
- if (keyno == 0 || keyno > be16_to_cpu(block->bb_numrecs)) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- } else {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0));
- }
- return 0;
-}
-
-/*
- * Move 1 record left from cur/level if possible.
- * Update cur to reflect the new path.
- */
-STATIC int /* error */
-xfs_bmbt_lshift(
- xfs_btree_cur_t *cur,
- int level,
- int *stat) /* success/failure */
-{
- int error; /* error return value */
-#ifdef DEBUG
- int i; /* loop counter */
-#endif
- xfs_bmbt_key_t key; /* bmap btree key */
- xfs_buf_t *lbp; /* left buffer pointer */
- xfs_bmbt_block_t *left; /* left btree block */
- xfs_bmbt_key_t *lkp=NULL; /* left btree key */
- xfs_bmbt_ptr_t *lpp; /* left address pointer */
- int lrecs; /* left record count */
- xfs_bmbt_rec_t *lrp=NULL; /* left record pointer */
- xfs_mount_t *mp; /* file system mount point */
- xfs_buf_t *rbp; /* right buffer pointer */
- xfs_bmbt_block_t *right; /* right btree block */
- xfs_bmbt_key_t *rkp=NULL; /* right btree key */
- xfs_bmbt_ptr_t *rpp=NULL; /* right address pointer */
- xfs_bmbt_rec_t *rrp=NULL; /* right record pointer */
- int rrecs; /* right record count */
-
- XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
- XFS_BMBT_TRACE_ARGI(cur, level);
- if (level == cur->bc_nlevels - 1) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- return 0;
- }
- rbp = cur->bc_bufs[level];
- right = XFS_BUF_TO_BMBT_BLOCK(rbp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_lblock(cur, right, level, rbp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
-#endif
- if (be64_to_cpu(right->bb_leftsib) == NULLDFSBNO) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- return 0;
- }
- if (cur->bc_ptrs[level] <= 1) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- return 0;
- }
- mp = cur->bc_mp;
- if ((error = xfs_btree_read_bufl(mp, cur->bc_tp, be64_to_cpu(right->bb_leftsib), 0,
- &lbp, XFS_BMAP_BTREE_REF))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- left = XFS_BUF_TO_BMBT_BLOCK(lbp);
- if ((error = xfs_btree_check_lblock(cur, left, level, lbp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- if (be16_to_cpu(left->bb_numrecs) == XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- return 0;
- }
- lrecs = be16_to_cpu(left->bb_numrecs) + 1;
- if (level > 0) {
- lkp = XFS_BMAP_KEY_IADDR(left, lrecs, cur);
- rkp = XFS_BMAP_KEY_IADDR(right, 1, cur);
- *lkp = *rkp;
- xfs_bmbt_log_keys(cur, lbp, lrecs, lrecs);
- lpp = XFS_BMAP_PTR_IADDR(left, lrecs, cur);
- rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
-#ifdef DEBUG
- if ((error = xfs_btree_check_lptr_disk(cur, *rpp, level))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
-#endif
- *lpp = *rpp;
- xfs_bmbt_log_ptrs(cur, lbp, lrecs, lrecs);
- } else {
- lrp = XFS_BMAP_REC_IADDR(left, lrecs, cur);
- rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
- *lrp = *rrp;
- xfs_bmbt_log_recs(cur, lbp, lrecs, lrecs);
- }
- left->bb_numrecs = cpu_to_be16(lrecs);
- xfs_bmbt_log_block(cur, lbp, XFS_BB_NUMRECS);
-#ifdef DEBUG
- if (level > 0)
- xfs_btree_check_key(XFS_BTNUM_BMAP, lkp - 1, lkp);
- else
- xfs_btree_check_rec(XFS_BTNUM_BMAP, lrp - 1, lrp);
-#endif
- rrecs = be16_to_cpu(right->bb_numrecs) - 1;
- right->bb_numrecs = cpu_to_be16(rrecs);
- xfs_bmbt_log_block(cur, rbp, XFS_BB_NUMRECS);
- if (level > 0) {
-#ifdef DEBUG
- for (i = 0; i < rrecs; i++) {
- if ((error = xfs_btree_check_lptr_disk(cur, rpp[i + 1],
- level))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- }
-#endif
- memmove(rkp, rkp + 1, rrecs * sizeof(*rkp));
- memmove(rpp, rpp + 1, rrecs * sizeof(*rpp));
- xfs_bmbt_log_keys(cur, rbp, 1, rrecs);
- xfs_bmbt_log_ptrs(cur, rbp, 1, rrecs);
- } else {
- memmove(rrp, rrp + 1, rrecs * sizeof(*rrp));
- xfs_bmbt_log_recs(cur, rbp, 1, rrecs);
- key.br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(rrp));
- rkp = &key;
- }
- if ((error = xfs_bmbt_updkey(cur, rkp, level + 1))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- cur->bc_ptrs[level]--;
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 1;
- return 0;
-}
-
-/*
- * Move 1 record right from cur/level if possible.
- * Update cur to reflect the new path.
- */
-STATIC int /* error */
-xfs_bmbt_rshift(
- xfs_btree_cur_t *cur,
- int level,
- int *stat) /* success/failure */
-{
- int error; /* error return value */
- int i; /* loop counter */
- xfs_bmbt_key_t key; /* bmap btree key */
- xfs_buf_t *lbp; /* left buffer pointer */
- xfs_bmbt_block_t *left; /* left btree block */
- xfs_bmbt_key_t *lkp; /* left btree key */
- xfs_bmbt_ptr_t *lpp; /* left address pointer */
- xfs_bmbt_rec_t *lrp; /* left record pointer */
- xfs_mount_t *mp; /* file system mount point */
- xfs_buf_t *rbp; /* right buffer pointer */
- xfs_bmbt_block_t *right; /* right btree block */
- xfs_bmbt_key_t *rkp; /* right btree key */
- xfs_bmbt_ptr_t *rpp; /* right address pointer */
- xfs_bmbt_rec_t *rrp=NULL; /* right record pointer */
- struct xfs_btree_cur *tcur; /* temporary btree cursor */
-
- XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
- XFS_BMBT_TRACE_ARGI(cur, level);
- if (level == cur->bc_nlevels - 1) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- return 0;
- }
- lbp = cur->bc_bufs[level];
- left = XFS_BUF_TO_BMBT_BLOCK(lbp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_lblock(cur, left, level, lbp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
-#endif
- if (be64_to_cpu(left->bb_rightsib) == NULLDFSBNO) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- return 0;
- }
- if (cur->bc_ptrs[level] >= be16_to_cpu(left->bb_numrecs)) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- return 0;
- }
- mp = cur->bc_mp;
- if ((error = xfs_btree_read_bufl(mp, cur->bc_tp, be64_to_cpu(left->bb_rightsib), 0,
- &rbp, XFS_BMAP_BTREE_REF))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- right = XFS_BUF_TO_BMBT_BLOCK(rbp);
- if ((error = xfs_btree_check_lblock(cur, right, level, rbp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- if (be16_to_cpu(right->bb_numrecs) == XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- return 0;
- }
- if (level > 0) {
- lkp = XFS_BMAP_KEY_IADDR(left, be16_to_cpu(left->bb_numrecs), cur);
- lpp = XFS_BMAP_PTR_IADDR(left, be16_to_cpu(left->bb_numrecs), cur);
- rkp = XFS_BMAP_KEY_IADDR(right, 1, cur);
- rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
-#ifdef DEBUG
- for (i = be16_to_cpu(right->bb_numrecs) - 1; i >= 0; i--) {
- if ((error = xfs_btree_check_lptr_disk(cur, rpp[i], level))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- }
-#endif
- memmove(rkp + 1, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
- memmove(rpp + 1, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
-#ifdef DEBUG
- if ((error = xfs_btree_check_lptr_disk(cur, *lpp, level))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
-#endif
- *rkp = *lkp;
- *rpp = *lpp;
- xfs_bmbt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
- xfs_bmbt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
- } else {
- lrp = XFS_BMAP_REC_IADDR(left, be16_to_cpu(left->bb_numrecs), cur);
- rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
- memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
- *rrp = *lrp;
- xfs_bmbt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
- key.br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(rrp));
- rkp = &key;
- }
- be16_add_cpu(&left->bb_numrecs, -1);
- xfs_bmbt_log_block(cur, lbp, XFS_BB_NUMRECS);
- be16_add_cpu(&right->bb_numrecs, 1);
-#ifdef DEBUG
- if (level > 0)
- xfs_btree_check_key(XFS_BTNUM_BMAP, rkp, rkp + 1);
- else
- xfs_btree_check_rec(XFS_BTNUM_BMAP, rrp, rrp + 1);
-#endif
- xfs_bmbt_log_block(cur, rbp, XFS_BB_NUMRECS);
- if ((error = xfs_btree_dup_cursor(cur, &tcur))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- i = xfs_btree_lastrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if ((error = xfs_bmbt_increment(tcur, level, &i))) {
- XFS_BMBT_TRACE_CURSOR(tcur, ERROR);
- goto error1;
- }
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if ((error = xfs_bmbt_updkey(tcur, rkp, level + 1))) {
- XFS_BMBT_TRACE_CURSOR(tcur, ERROR);
- goto error1;
- }
- xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 1;
- return 0;
-error0:
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
-error1:
- xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
- return error;
-}
-
/*
* Determine the extent state.
*/
@@ -1453,229 +60,15 @@ xfs_extent_state(
return XFS_EXT_NORM;
}
-
-/*
- * Split cur/level block in half.
- * Return new block number and its first record (to be inserted into parent).
- */
-STATIC int /* error */
-xfs_bmbt_split(
- xfs_btree_cur_t *cur,
- int level,
- xfs_fsblock_t *bnop,
- __uint64_t *startoff,
- xfs_btree_cur_t **curp,
- int *stat) /* success/failure */
-{
- xfs_alloc_arg_t args; /* block allocation args */
- int error; /* error return value */
- int i; /* loop counter */
- xfs_fsblock_t lbno; /* left sibling block number */
- xfs_buf_t *lbp; /* left buffer pointer */
- xfs_bmbt_block_t *left; /* left btree block */
- xfs_bmbt_key_t *lkp; /* left btree key */
- xfs_bmbt_ptr_t *lpp; /* left address pointer */
- xfs_bmbt_rec_t *lrp; /* left record pointer */
- xfs_buf_t *rbp; /* right buffer pointer */
- xfs_bmbt_block_t *right; /* right btree block */
- xfs_bmbt_key_t *rkp; /* right btree key */
- xfs_bmbt_ptr_t *rpp; /* right address pointer */
- xfs_bmbt_block_t *rrblock; /* right-right btree block */
- xfs_buf_t *rrbp; /* right-right buffer pointer */
- xfs_bmbt_rec_t *rrp; /* right record pointer */
-
- XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
- XFS_BMBT_TRACE_ARGIFK(cur, level, *bnop, *startoff);
- args.tp = cur->bc_tp;
- args.mp = cur->bc_mp;
- lbp = cur->bc_bufs[level];
- lbno = XFS_DADDR_TO_FSB(args.mp, XFS_BUF_ADDR(lbp));
- left = XFS_BUF_TO_BMBT_BLOCK(lbp);
- args.fsbno = cur->bc_private.b.firstblock;
- args.firstblock = args.fsbno;
- args.minleft = 0;
- if (args.fsbno == NULLFSBLOCK) {
- args.fsbno = lbno;
- args.type = XFS_ALLOCTYPE_START_BNO;
- /*
- * Make sure there is sufficient room left in the AG to
- * complete a full tree split for an extent insert. If
- * we are converting the middle part of an extent then
- * we may need space for two tree splits.
- *
- * We are relying on the caller to make the correct block
- * reservation for this operation to succeed. If the
- * reservation amount is insufficient then we may fail a
- * block allocation here and corrupt the filesystem.
- */
- args.minleft = xfs_trans_get_block_res(args.tp);
- } else if (cur->bc_private.b.flist->xbf_low)
- args.type = XFS_ALLOCTYPE_START_BNO;
- else
- args.type = XFS_ALLOCTYPE_NEAR_BNO;
- args.mod = args.alignment = args.total = args.isfl =
- args.userdata = args.minalignslop = 0;
- args.minlen = args.maxlen = args.prod = 1;
- args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
- if (!args.wasdel && xfs_trans_get_block_res(args.tp) == 0) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return XFS_ERROR(ENOSPC);
- }
- if ((error = xfs_alloc_vextent(&args))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- if (args.fsbno == NULLFSBLOCK && args.minleft) {
- /*
- * Could not find an AG with enough free space to satisfy
- * a full btree split. Try again without minleft and if
- * successful activate the lowspace algorithm.
- */
- args.fsbno = 0;
- args.type = XFS_ALLOCTYPE_FIRST_AG;
- args.minleft = 0;
- if ((error = xfs_alloc_vextent(&args))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- cur->bc_private.b.flist->xbf_low = 1;
- }
- if (args.fsbno == NULLFSBLOCK) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- return 0;
- }
- ASSERT(args.len == 1);
- cur->bc_private.b.firstblock = args.fsbno;
- cur->bc_private.b.allocated++;
- cur->bc_private.b.ip->i_d.di_nblocks++;
- xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE);
- XFS_TRANS_MOD_DQUOT_BYINO(args.mp, args.tp, cur->bc_private.b.ip,
- XFS_TRANS_DQ_BCOUNT, 1L);
- rbp = xfs_btree_get_bufl(args.mp, args.tp, args.fsbno, 0);
- right = XFS_BUF_TO_BMBT_BLOCK(rbp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_lblock(cur, left, level, rbp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
-#endif
- right->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
- right->bb_level = left->bb_level;
- right->bb_numrecs = cpu_to_be16(be16_to_cpu(left->bb_numrecs) / 2);
- if ((be16_to_cpu(left->bb_numrecs) & 1) &&
- cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1)
- be16_add_cpu(&right->bb_numrecs, 1);
- i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1;
- if (level > 0) {
- lkp = XFS_BMAP_KEY_IADDR(left, i, cur);
- lpp = XFS_BMAP_PTR_IADDR(left, i, cur);
- rkp = XFS_BMAP_KEY_IADDR(right, 1, cur);
- rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
-#ifdef DEBUG
- for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
- if ((error = xfs_btree_check_lptr_disk(cur, lpp[i], level))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- }
-#endif
- memcpy(rkp, lkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
- memcpy(rpp, lpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
- xfs_bmbt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
- xfs_bmbt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
- *startoff = be64_to_cpu(rkp->br_startoff);
- } else {
- lrp = XFS_BMAP_REC_IADDR(left, i, cur);
- rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
- memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
- xfs_bmbt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
- *startoff = xfs_bmbt_disk_get_startoff(rrp);
- }
- be16_add_cpu(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs)));
- right->bb_rightsib = left->bb_rightsib;
- left->bb_rightsib = cpu_to_be64(args.fsbno);
- right->bb_leftsib = cpu_to_be64(lbno);
- xfs_bmbt_log_block(cur, rbp, XFS_BB_ALL_BITS);
- xfs_bmbt_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
- if (be64_to_cpu(right->bb_rightsib) != NULLDFSBNO) {
- if ((error = xfs_btree_read_bufl(args.mp, args.tp,
- be64_to_cpu(right->bb_rightsib), 0, &rrbp,
- XFS_BMAP_BTREE_REF))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- rrblock = XFS_BUF_TO_BMBT_BLOCK(rrbp);
- if ((error = xfs_btree_check_lblock(cur, rrblock, level, rrbp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- rrblock->bb_leftsib = cpu_to_be64(args.fsbno);
- xfs_bmbt_log_block(cur, rrbp, XFS_BB_LEFTSIB);
- }
- if (cur->bc_ptrs[level] > be16_to_cpu(left->bb_numrecs) + 1) {
- xfs_btree_setbuf(cur, level, rbp);
- cur->bc_ptrs[level] -= be16_to_cpu(left->bb_numrecs);
- }
- if (level + 1 < cur->bc_nlevels) {
- if ((error = xfs_btree_dup_cursor(cur, curp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- (*curp)->bc_ptrs[level + 1]++;
- }
- *bnop = args.fsbno;
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 1;
- return 0;
-}
-
-
-/*
- * Update keys for the record.
- */
-STATIC int
-xfs_bmbt_updkey(
- xfs_btree_cur_t *cur,
- xfs_bmbt_key_t *keyp, /* on-disk format */
- int level)
-{
- xfs_bmbt_block_t *block;
- xfs_buf_t *bp;
-#ifdef DEBUG
- int error;
-#endif
- xfs_bmbt_key_t *kp;
- int ptr;
-
- ASSERT(level >= 1);
- XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
- XFS_BMBT_TRACE_ARGIK(cur, level, keyp);
- for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
- block = xfs_bmbt_get_block(cur, level, &bp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_lblock(cur, block, level, bp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
-#endif
- ptr = cur->bc_ptrs[level];
- kp = XFS_BMAP_KEY_IADDR(block, ptr, cur);
- *kp = *keyp;
- xfs_bmbt_log_keys(cur, bp, ptr, ptr);
- }
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- return 0;
-}
-
/*
* Convert on-disk form of btree root to in-memory form.
*/
void
xfs_bmdr_to_bmbt(
+ struct xfs_mount *mp,
xfs_bmdr_block_t *dblock,
int dblocklen,
- xfs_bmbt_block_t *rblock,
+ struct xfs_btree_block *rblock,
int rblocklen)
{
int dmxr;
@@ -1688,129 +81,19 @@ xfs_bmdr_to_bmbt(
rblock->bb_level = dblock->bb_level;
ASSERT(be16_to_cpu(rblock->bb_level) > 0);
rblock->bb_numrecs = dblock->bb_numrecs;
- rblock->bb_leftsib = cpu_to_be64(NULLDFSBNO);
- rblock->bb_rightsib = cpu_to_be64(NULLDFSBNO);
- dmxr = (int)XFS_BTREE_BLOCK_MAXRECS(dblocklen, xfs_bmdr, 0);
- fkp = XFS_BTREE_KEY_ADDR(xfs_bmdr, dblock, 1);
- tkp = XFS_BMAP_BROOT_KEY_ADDR(rblock, 1, rblocklen);
- fpp = XFS_BTREE_PTR_ADDR(xfs_bmdr, dblock, 1, dmxr);
- tpp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, rblocklen);
+ rblock->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
+ rblock->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
+ dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0);
+ fkp = XFS_BMDR_KEY_ADDR(dblock, 1);
+ tkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1);
+ fpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr);
+ tpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen);
dmxr = be16_to_cpu(dblock->bb_numrecs);
memcpy(tkp, fkp, sizeof(*fkp) * dmxr);
memcpy(tpp, fpp, sizeof(*fpp) * dmxr);
}
/*
- * Decrement cursor by one record at the level.
- * For nonzero levels the leaf-ward information is untouched.
- */
-int /* error */
-xfs_bmbt_decrement(
- xfs_btree_cur_t *cur,
- int level,
- int *stat) /* success/failure */
-{
- xfs_bmbt_block_t *block;
- xfs_buf_t *bp;
- int error; /* error return value */
- xfs_fsblock_t fsbno;
- int lev;
- xfs_mount_t *mp;
- xfs_trans_t *tp;
-
- XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
- XFS_BMBT_TRACE_ARGI(cur, level);
- ASSERT(level < cur->bc_nlevels);
- if (level < cur->bc_nlevels - 1)
- xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
- if (--cur->bc_ptrs[level] > 0) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 1;
- return 0;
- }
- block = xfs_bmbt_get_block(cur, level, &bp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_lblock(cur, block, level, bp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
-#endif
- if (be64_to_cpu(block->bb_leftsib) == NULLDFSBNO) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- return 0;
- }
- for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
- if (--cur->bc_ptrs[lev] > 0)
- break;
- if (lev < cur->bc_nlevels - 1)
- xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
- }
- if (lev == cur->bc_nlevels) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- return 0;
- }
- tp = cur->bc_tp;
- mp = cur->bc_mp;
- for (block = xfs_bmbt_get_block(cur, lev, &bp); lev > level; ) {
- fsbno = be64_to_cpu(*XFS_BMAP_PTR_IADDR(block, cur->bc_ptrs[lev], cur));
- if ((error = xfs_btree_read_bufl(mp, tp, fsbno, 0, &bp,
- XFS_BMAP_BTREE_REF))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- lev--;
- xfs_btree_setbuf(cur, lev, bp);
- block = XFS_BUF_TO_BMBT_BLOCK(bp);
- if ((error = xfs_btree_check_lblock(cur, block, lev, bp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- cur->bc_ptrs[lev] = be16_to_cpu(block->bb_numrecs);
- }
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 1;
- return 0;
-}
-
-/*
- * Delete the record pointed to by cur.
- */
-int /* error */
-xfs_bmbt_delete(
- xfs_btree_cur_t *cur,
- int *stat) /* success/failure */
-{
- int error; /* error return value */
- int i;
- int level;
-
- XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
- for (level = 0, i = 2; i == 2; level++) {
- if ((error = xfs_bmbt_delrec(cur, level, &i))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- }
- if (i == 0) {
- for (level = 1; level < cur->bc_nlevels; level++) {
- if (cur->bc_ptrs[level] == 0) {
- if ((error = xfs_bmbt_decrement(cur, level,
- &i))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- break;
- }
- }
- }
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = i;
- return 0;
-}
-
-/*
* Convert a compressed bmap extent record to an uncompressed form.
* This code must be in sync with the routines xfs_bmbt_get_startoff,
* xfs_bmbt_get_startblock, xfs_bmbt_get_blockcount and xfs_bmbt_get_state.
@@ -1864,31 +147,6 @@ xfs_bmbt_get_all(
}
/*
- * Get the block pointer for the given level of the cursor.
- * Fill in the buffer pointer, if applicable.
- */
-xfs_bmbt_block_t *
-xfs_bmbt_get_block(
- xfs_btree_cur_t *cur,
- int level,
- xfs_buf_t **bpp)
-{
- xfs_ifork_t *ifp;
- xfs_bmbt_block_t *rval;
-
- if (level < cur->bc_nlevels - 1) {
- *bpp = cur->bc_bufs[level];
- rval = XFS_BUF_TO_BMBT_BLOCK(*bpp);
- } else {
- *bpp = NULL;
- ifp = XFS_IFORK_PTR(cur->bc_private.b.ip,
- cur->bc_private.b.whichfork);
- rval = ifp->if_broot;
- }
- return rval;
-}
-
-/*
* Extract the blockcount field from an in memory bmap extent record.
*/
xfs_filblks_t
@@ -1974,348 +232,6 @@ xfs_bmbt_disk_get_startoff(
XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
}
-/*
- * Increment cursor by one record at the level.
- * For nonzero levels the leaf-ward information is untouched.
- */
-int /* error */
-xfs_bmbt_increment(
- xfs_btree_cur_t *cur,
- int level,
- int *stat) /* success/failure */
-{
- xfs_bmbt_block_t *block;
- xfs_buf_t *bp;
- int error; /* error return value */
- xfs_fsblock_t fsbno;
- int lev;
- xfs_mount_t *mp;
- xfs_trans_t *tp;
-
- XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
- XFS_BMBT_TRACE_ARGI(cur, level);
- ASSERT(level < cur->bc_nlevels);
- if (level < cur->bc_nlevels - 1)
- xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
- block = xfs_bmbt_get_block(cur, level, &bp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_lblock(cur, block, level, bp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
-#endif
- if (++cur->bc_ptrs[level] <= be16_to_cpu(block->bb_numrecs)) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 1;
- return 0;
- }
- if (be64_to_cpu(block->bb_rightsib) == NULLDFSBNO) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- return 0;
- }
- for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
- block = xfs_bmbt_get_block(cur, lev, &bp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_lblock(cur, block, lev, bp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
-#endif
- if (++cur->bc_ptrs[lev] <= be16_to_cpu(block->bb_numrecs))
- break;
- if (lev < cur->bc_nlevels - 1)
- xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
- }
- if (lev == cur->bc_nlevels) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- return 0;
- }
- tp = cur->bc_tp;
- mp = cur->bc_mp;
- for (block = xfs_bmbt_get_block(cur, lev, &bp); lev > level; ) {
- fsbno = be64_to_cpu(*XFS_BMAP_PTR_IADDR(block, cur->bc_ptrs[lev], cur));
- if ((error = xfs_btree_read_bufl(mp, tp, fsbno, 0, &bp,
- XFS_BMAP_BTREE_REF))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- lev--;
- xfs_btree_setbuf(cur, lev, bp);
- block = XFS_BUF_TO_BMBT_BLOCK(bp);
- if ((error = xfs_btree_check_lblock(cur, block, lev, bp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- cur->bc_ptrs[lev] = 1;
- }
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 1;
- return 0;
-}
-
-/*
- * Insert the current record at the point referenced by cur.
- *
- * A multi-level split of the tree on insert will invalidate the original
- * cursor. All callers of this function should assume that the cursor is
- * no longer valid and revalidate it.
- */
-int /* error */
-xfs_bmbt_insert(
- xfs_btree_cur_t *cur,
- int *stat) /* success/failure */
-{
- int error; /* error return value */
- int i;
- int level;
- xfs_fsblock_t nbno;
- xfs_btree_cur_t *ncur;
- xfs_bmbt_rec_t nrec;
- xfs_btree_cur_t *pcur;
-
- XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
- level = 0;
- nbno = NULLFSBLOCK;
- xfs_bmbt_disk_set_all(&nrec, &cur->bc_rec.b);
- ncur = NULL;
- pcur = cur;
- do {
- if ((error = xfs_bmbt_insrec(pcur, level++, &nbno, &nrec, &ncur,
- &i))) {
- if (pcur != cur)
- xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if (pcur != cur && (ncur || nbno == NULLFSBLOCK)) {
- cur->bc_nlevels = pcur->bc_nlevels;
- cur->bc_private.b.allocated +=
- pcur->bc_private.b.allocated;
- pcur->bc_private.b.allocated = 0;
- ASSERT((cur->bc_private.b.firstblock != NULLFSBLOCK) ||
- XFS_IS_REALTIME_INODE(cur->bc_private.b.ip));
- cur->bc_private.b.firstblock =
- pcur->bc_private.b.firstblock;
- ASSERT(cur->bc_private.b.flist ==
- pcur->bc_private.b.flist);
- xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
- }
- if (ncur) {
- pcur = ncur;
- ncur = NULL;
- }
- } while (nbno != NULLFSBLOCK);
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = i;
- return 0;
-error0:
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
-}
-
-/*
- * Log fields from the btree block header.
- */
-void
-xfs_bmbt_log_block(
- xfs_btree_cur_t *cur,
- xfs_buf_t *bp,
- int fields)
-{
- int first;
- int last;
- xfs_trans_t *tp;
- static const short offsets[] = {
- offsetof(xfs_bmbt_block_t, bb_magic),
- offsetof(xfs_bmbt_block_t, bb_level),
- offsetof(xfs_bmbt_block_t, bb_numrecs),
- offsetof(xfs_bmbt_block_t, bb_leftsib),
- offsetof(xfs_bmbt_block_t, bb_rightsib),
- sizeof(xfs_bmbt_block_t)
- };
-
- XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
- XFS_BMBT_TRACE_ARGBI(cur, bp, fields);
- tp = cur->bc_tp;
- if (bp) {
- xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first,
- &last);
- xfs_trans_log_buf(tp, bp, first, last);
- } else
- xfs_trans_log_inode(tp, cur->bc_private.b.ip,
- XFS_ILOG_FBROOT(cur->bc_private.b.whichfork));
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
-}
-
-/*
- * Log record values from the btree block.
- */
-void
-xfs_bmbt_log_recs(
- xfs_btree_cur_t *cur,
- xfs_buf_t *bp,
- int rfirst,
- int rlast)
-{
- xfs_bmbt_block_t *block;
- int first;
- int last;
- xfs_bmbt_rec_t *rp;
- xfs_trans_t *tp;
-
- XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
- XFS_BMBT_TRACE_ARGBII(cur, bp, rfirst, rlast);
- ASSERT(bp);
- tp = cur->bc_tp;
- block = XFS_BUF_TO_BMBT_BLOCK(bp);
- rp = XFS_BMAP_REC_DADDR(block, 1, cur);
- first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block);
- last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block);
- xfs_trans_log_buf(tp, bp, first, last);
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
-}
-
-int /* error */
-xfs_bmbt_lookup_eq(
- xfs_btree_cur_t *cur,
- xfs_fileoff_t off,
- xfs_fsblock_t bno,
- xfs_filblks_t len,
- int *stat) /* success/failure */
-{
- cur->bc_rec.b.br_startoff = off;
- cur->bc_rec.b.br_startblock = bno;
- cur->bc_rec.b.br_blockcount = len;
- return xfs_bmbt_lookup(cur, XFS_LOOKUP_EQ, stat);
-}
-
-int /* error */
-xfs_bmbt_lookup_ge(
- xfs_btree_cur_t *cur,
- xfs_fileoff_t off,
- xfs_fsblock_t bno,
- xfs_filblks_t len,
- int *stat) /* success/failure */
-{
- cur->bc_rec.b.br_startoff = off;
- cur->bc_rec.b.br_startblock = bno;
- cur->bc_rec.b.br_blockcount = len;
- return xfs_bmbt_lookup(cur, XFS_LOOKUP_GE, stat);
-}
-
-/*
- * Give the bmap btree a new root block. Copy the old broot contents
- * down into a real block and make the broot point to it.
- */
-int /* error */
-xfs_bmbt_newroot(
- xfs_btree_cur_t *cur, /* btree cursor */
- int *logflags, /* logging flags for inode */
- int *stat) /* return status - 0 fail */
-{
- xfs_alloc_arg_t args; /* allocation arguments */
- xfs_bmbt_block_t *block; /* bmap btree block */
- xfs_buf_t *bp; /* buffer for block */
- xfs_bmbt_block_t *cblock; /* child btree block */
- xfs_bmbt_key_t *ckp; /* child key pointer */
- xfs_bmbt_ptr_t *cpp; /* child ptr pointer */
- int error; /* error return code */
-#ifdef DEBUG
- int i; /* loop counter */
-#endif
- xfs_bmbt_key_t *kp; /* pointer to bmap btree key */
- int level; /* btree level */
- xfs_bmbt_ptr_t *pp; /* pointer to bmap block addr */
-
- XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
- level = cur->bc_nlevels - 1;
- block = xfs_bmbt_get_block(cur, level, &bp);
- /*
- * Copy the root into a real block.
- */
- args.mp = cur->bc_mp;
- pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
- args.tp = cur->bc_tp;
- args.fsbno = cur->bc_private.b.firstblock;
- args.mod = args.minleft = args.alignment = args.total = args.isfl =
- args.userdata = args.minalignslop = 0;
- args.minlen = args.maxlen = args.prod = 1;
- args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
- args.firstblock = args.fsbno;
- if (args.fsbno == NULLFSBLOCK) {
-#ifdef DEBUG
- if ((error = xfs_btree_check_lptr_disk(cur, *pp, level))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
-#endif
- args.fsbno = be64_to_cpu(*pp);
- args.type = XFS_ALLOCTYPE_START_BNO;
- } else if (cur->bc_private.b.flist->xbf_low)
- args.type = XFS_ALLOCTYPE_START_BNO;
- else
- args.type = XFS_ALLOCTYPE_NEAR_BNO;
- if ((error = xfs_alloc_vextent(&args))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- if (args.fsbno == NULLFSBLOCK) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- return 0;
- }
- ASSERT(args.len == 1);
- cur->bc_private.b.firstblock = args.fsbno;
- cur->bc_private.b.allocated++;
- cur->bc_private.b.ip->i_d.di_nblocks++;
- XFS_TRANS_MOD_DQUOT_BYINO(args.mp, args.tp, cur->bc_private.b.ip,
- XFS_TRANS_DQ_BCOUNT, 1L);
- bp = xfs_btree_get_bufl(args.mp, cur->bc_tp, args.fsbno, 0);
- cblock = XFS_BUF_TO_BMBT_BLOCK(bp);
- *cblock = *block;
- be16_add_cpu(&block->bb_level, 1);
- block->bb_numrecs = cpu_to_be16(1);
- cur->bc_nlevels++;
- cur->bc_ptrs[level + 1] = 1;
- kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
- ckp = XFS_BMAP_KEY_IADDR(cblock, 1, cur);
- memcpy(ckp, kp, be16_to_cpu(cblock->bb_numrecs) * sizeof(*kp));
- cpp = XFS_BMAP_PTR_IADDR(cblock, 1, cur);
-#ifdef DEBUG
- for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
- if ((error = xfs_btree_check_lptr_disk(cur, pp[i], level))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- }
-#endif
- memcpy(cpp, pp, be16_to_cpu(cblock->bb_numrecs) * sizeof(*pp));
-#ifdef DEBUG
- if ((error = xfs_btree_check_lptr(cur, args.fsbno, level))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
-#endif
- *pp = cpu_to_be64(args.fsbno);
- xfs_iroot_realloc(cur->bc_private.b.ip, 1 - be16_to_cpu(cblock->bb_numrecs),
- cur->bc_private.b.whichfork);
- xfs_btree_setbuf(cur, level, bp);
- /*
- * Do all this logging at the end so that
- * the root is at the right level.
- */
- xfs_bmbt_log_block(cur, bp, XFS_BB_ALL_BITS);
- xfs_bmbt_log_keys(cur, bp, 1, be16_to_cpu(cblock->bb_numrecs));
- xfs_bmbt_log_ptrs(cur, bp, 1, be16_to_cpu(cblock->bb_numrecs));
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *logflags |=
- XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork);
- *stat = 1;
- return 0;
-}
/*
* Set all the fields in a bmap extent record from the arguments.
@@ -2512,7 +428,8 @@ xfs_bmbt_set_state(
*/
void
xfs_bmbt_to_bmdr(
- xfs_bmbt_block_t *rblock,
+ struct xfs_mount *mp,
+ struct xfs_btree_block *rblock,
int rblocklen,
xfs_bmdr_block_t *dblock,
int dblocklen)
@@ -2524,67 +441,22 @@ xfs_bmbt_to_bmdr(
__be64 *tpp;
ASSERT(be32_to_cpu(rblock->bb_magic) == XFS_BMAP_MAGIC);
- ASSERT(be64_to_cpu(rblock->bb_leftsib) == NULLDFSBNO);
- ASSERT(be64_to_cpu(rblock->bb_rightsib) == NULLDFSBNO);
+ ASSERT(be64_to_cpu(rblock->bb_u.l.bb_leftsib) == NULLDFSBNO);
+ ASSERT(be64_to_cpu(rblock->bb_u.l.bb_rightsib) == NULLDFSBNO);
ASSERT(be16_to_cpu(rblock->bb_level) > 0);
dblock->bb_level = rblock->bb_level;
dblock->bb_numrecs = rblock->bb_numrecs;
- dmxr = (int)XFS_BTREE_BLOCK_MAXRECS(dblocklen, xfs_bmdr, 0);
- fkp = XFS_BMAP_BROOT_KEY_ADDR(rblock, 1, rblocklen);
- tkp = XFS_BTREE_KEY_ADDR(xfs_bmdr, dblock, 1);
- fpp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, rblocklen);
- tpp = XFS_BTREE_PTR_ADDR(xfs_bmdr, dblock, 1, dmxr);
+ dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0);
+ fkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1);
+ tkp = XFS_BMDR_KEY_ADDR(dblock, 1);
+ fpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen);
+ tpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr);
dmxr = be16_to_cpu(dblock->bb_numrecs);
memcpy(tkp, fkp, sizeof(*fkp) * dmxr);
memcpy(tpp, fpp, sizeof(*fpp) * dmxr);
}
/*
- * Update the record to the passed values.
- */
-int
-xfs_bmbt_update(
- xfs_btree_cur_t *cur,
- xfs_fileoff_t off,
- xfs_fsblock_t bno,
- xfs_filblks_t len,
- xfs_exntst_t state)
-{
- xfs_bmbt_block_t *block;
- xfs_buf_t *bp;
- int error;
- xfs_bmbt_key_t key;
- int ptr;
- xfs_bmbt_rec_t *rp;
-
- XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
- XFS_BMBT_TRACE_ARGFFFI(cur, (xfs_dfiloff_t)off, (xfs_dfsbno_t)bno,
- (xfs_dfilblks_t)len, (int)state);
- block = xfs_bmbt_get_block(cur, 0, &bp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_lblock(cur, block, 0, bp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
-#endif
- ptr = cur->bc_ptrs[0];
- rp = XFS_BMAP_REC_IADDR(block, ptr, cur);
- xfs_bmbt_disk_set_allf(rp, off, bno, len, state);
- xfs_bmbt_log_recs(cur, bp, ptr, ptr);
- if (ptr > 1) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- return 0;
- }
- key.br_startoff = cpu_to_be64(off);
- if ((error = xfs_bmbt_updkey(cur, &key, 1))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- return 0;
-}
-
-/*
* Check extent records, which have just been read, for
* any bit in the extent flag field. ASSERT on debug
* kernels, as this condition should not occur.
@@ -2608,3 +480,451 @@ xfs_check_nostate_extents(
}
return 0;
}
+
+
+STATIC struct xfs_btree_cur *
+xfs_bmbt_dup_cursor(
+ struct xfs_btree_cur *cur)
+{
+ struct xfs_btree_cur *new;
+
+ new = xfs_bmbt_init_cursor(cur->bc_mp, cur->bc_tp,
+ cur->bc_private.b.ip, cur->bc_private.b.whichfork);
+
+ /*
+ * Copy the firstblock, flist, and flags values,
+ * since init cursor doesn't get them.
+ */
+ new->bc_private.b.firstblock = cur->bc_private.b.firstblock;
+ new->bc_private.b.flist = cur->bc_private.b.flist;
+ new->bc_private.b.flags = cur->bc_private.b.flags;
+
+ return new;
+}
+
+STATIC void
+xfs_bmbt_update_cursor(
+ struct xfs_btree_cur *src,
+ struct xfs_btree_cur *dst)
+{
+ ASSERT((dst->bc_private.b.firstblock != NULLFSBLOCK) ||
+ (dst->bc_private.b.ip->i_d.di_flags & XFS_DIFLAG_REALTIME));
+ ASSERT(dst->bc_private.b.flist == src->bc_private.b.flist);
+
+ dst->bc_private.b.allocated += src->bc_private.b.allocated;
+ dst->bc_private.b.firstblock = src->bc_private.b.firstblock;
+
+ src->bc_private.b.allocated = 0;
+}
+
+STATIC int
+xfs_bmbt_alloc_block(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *start,
+ union xfs_btree_ptr *new,
+ int length,
+ int *stat)
+{
+ xfs_alloc_arg_t args; /* block allocation args */
+ int error; /* error return value */
+
+ memset(&args, 0, sizeof(args));
+ args.tp = cur->bc_tp;
+ args.mp = cur->bc_mp;
+ args.fsbno = cur->bc_private.b.firstblock;
+ args.firstblock = args.fsbno;
+
+ if (args.fsbno == NULLFSBLOCK) {
+ args.fsbno = be64_to_cpu(start->l);
+ args.type = XFS_ALLOCTYPE_START_BNO;
+ /*
+ * Make sure there is sufficient room left in the AG to
+ * complete a full tree split for an extent insert. If
+ * we are converting the middle part of an extent then
+ * we may need space for two tree splits.
+ *
+ * We are relying on the caller to make the correct block
+ * reservation for this operation to succeed. If the
+ * reservation amount is insufficient then we may fail a
+ * block allocation here and corrupt the filesystem.
+ */
+ args.minleft = xfs_trans_get_block_res(args.tp);
+ } else if (cur->bc_private.b.flist->xbf_low) {
+ args.type = XFS_ALLOCTYPE_START_BNO;
+ } else {
+ args.type = XFS_ALLOCTYPE_NEAR_BNO;
+ }
+
+ args.minlen = args.maxlen = args.prod = 1;
+ args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
+ if (!args.wasdel && xfs_trans_get_block_res(args.tp) == 0) {
+ error = XFS_ERROR(ENOSPC);
+ goto error0;
+ }
+ error = xfs_alloc_vextent(&args);
+ if (error)
+ goto error0;
+
+ if (args.fsbno == NULLFSBLOCK && args.minleft) {
+ /*
+ * Could not find an AG with enough free space to satisfy
+ * a full btree split. Try again without minleft and if
+ * successful activate the lowspace algorithm.
+ */
+ args.fsbno = 0;
+ args.type = XFS_ALLOCTYPE_FIRST_AG;
+ args.minleft = 0;
+ error = xfs_alloc_vextent(&args);
+ if (error)
+ goto error0;
+ cur->bc_private.b.flist->xbf_low = 1;
+ }
+ if (args.fsbno == NULLFSBLOCK) {
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 0;
+ return 0;
+ }
+ ASSERT(args.len == 1);
+ cur->bc_private.b.firstblock = args.fsbno;
+ cur->bc_private.b.allocated++;
+ cur->bc_private.b.ip->i_d.di_nblocks++;
+ xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE);
+ XFS_TRANS_MOD_DQUOT_BYINO(args.mp, args.tp, cur->bc_private.b.ip,
+ XFS_TRANS_DQ_BCOUNT, 1L);
+
+ new->l = cpu_to_be64(args.fsbno);
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 1;
+ return 0;
+
+ error0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ return error;
+}
+
+STATIC int
+xfs_bmbt_free_block(
+ struct xfs_btree_cur *cur,
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_inode *ip = cur->bc_private.b.ip;
+ struct xfs_trans *tp = cur->bc_tp;
+ xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp));
+
+ xfs_bmap_add_free(fsbno, 1, cur->bc_private.b.flist, mp);
+ ip->i_d.di_nblocks--;
+
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
+ xfs_trans_binval(tp, bp);
+ return 0;
+}
+
+STATIC int
+xfs_bmbt_get_minrecs(
+ struct xfs_btree_cur *cur,
+ int level)
+{
+ if (level == cur->bc_nlevels - 1) {
+ struct xfs_ifork *ifp;
+
+ ifp = XFS_IFORK_PTR(cur->bc_private.b.ip,
+ cur->bc_private.b.whichfork);
+
+ return xfs_bmbt_maxrecs(cur->bc_mp,
+ ifp->if_broot_bytes, level == 0) / 2;
+ }
+
+ return cur->bc_mp->m_bmap_dmnr[level != 0];
+}
+
+int
+xfs_bmbt_get_maxrecs(
+ struct xfs_btree_cur *cur,
+ int level)
+{
+ if (level == cur->bc_nlevels - 1) {
+ struct xfs_ifork *ifp;
+
+ ifp = XFS_IFORK_PTR(cur->bc_private.b.ip,
+ cur->bc_private.b.whichfork);
+
+ return xfs_bmbt_maxrecs(cur->bc_mp,
+ ifp->if_broot_bytes, level == 0);
+ }
+
+ return cur->bc_mp->m_bmap_dmxr[level != 0];
+
+}
+
+/*
+ * Get the maximum records we could store in the on-disk format.
+ *
+ * For non-root nodes this is equivalent to xfs_bmbt_get_maxrecs, but
+ * for the root node this checks the available space in the dinode fork
+ * so that we can resize the in-memory buffer to match it. After a
+ * resize to the maximum size this function returns the same value
+ * as xfs_bmbt_get_maxrecs for the root node, too.
+ */
+STATIC int
+xfs_bmbt_get_dmaxrecs(
+ struct xfs_btree_cur *cur,
+ int level)
+{
+ if (level != cur->bc_nlevels - 1)
+ return cur->bc_mp->m_bmap_dmxr[level != 0];
+ return xfs_bmdr_maxrecs(cur->bc_mp, cur->bc_private.b.forksize,
+ level == 0);
+}
+
+STATIC void
+xfs_bmbt_init_key_from_rec(
+ union xfs_btree_key *key,
+ union xfs_btree_rec *rec)
+{
+ key->bmbt.br_startoff =
+ cpu_to_be64(xfs_bmbt_disk_get_startoff(&rec->bmbt));
+}
+
+STATIC void
+xfs_bmbt_init_rec_from_key(
+ union xfs_btree_key *key,
+ union xfs_btree_rec *rec)
+{
+ ASSERT(key->bmbt.br_startoff != 0);
+
+ xfs_bmbt_disk_set_allf(&rec->bmbt, be64_to_cpu(key->bmbt.br_startoff),
+ 0, 0, XFS_EXT_NORM);
+}
+
+STATIC void
+xfs_bmbt_init_rec_from_cur(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *rec)
+{
+ xfs_bmbt_disk_set_all(&rec->bmbt, &cur->bc_rec.b);
+}
+
+STATIC void
+xfs_bmbt_init_ptr_from_cur(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr)
+{
+ ptr->l = 0;
+}
+
+STATIC __int64_t
+xfs_bmbt_key_diff(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *key)
+{
+ return (__int64_t)be64_to_cpu(key->bmbt.br_startoff) -
+ cur->bc_rec.b.br_startoff;
+}
+
+#ifdef DEBUG
+STATIC int
+xfs_bmbt_keys_inorder(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *k1,
+ union xfs_btree_key *k2)
+{
+ return be64_to_cpu(k1->bmbt.br_startoff) <
+ be64_to_cpu(k2->bmbt.br_startoff);
+}
+
+STATIC int
+xfs_bmbt_recs_inorder(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *r1,
+ union xfs_btree_rec *r2)
+{
+ return xfs_bmbt_disk_get_startoff(&r1->bmbt) +
+ xfs_bmbt_disk_get_blockcount(&r1->bmbt) <=
+ xfs_bmbt_disk_get_startoff(&r2->bmbt);
+}
+#endif /* DEBUG */
+
+#ifdef XFS_BTREE_TRACE
+ktrace_t *xfs_bmbt_trace_buf;
+
+STATIC void
+xfs_bmbt_trace_enter(
+ struct xfs_btree_cur *cur,
+ const char *func,
+ char *s,
+ int type,
+ int line,
+ __psunsigned_t a0,
+ __psunsigned_t a1,
+ __psunsigned_t a2,
+ __psunsigned_t a3,
+ __psunsigned_t a4,
+ __psunsigned_t a5,
+ __psunsigned_t a6,
+ __psunsigned_t a7,
+ __psunsigned_t a8,
+ __psunsigned_t a9,
+ __psunsigned_t a10)
+{
+ struct xfs_inode *ip = cur->bc_private.b.ip;
+ int whichfork = cur->bc_private.b.whichfork;
+
+ ktrace_enter(xfs_bmbt_trace_buf,
+ (void *)((__psint_t)type | (whichfork << 8) | (line << 16)),
+ (void *)func, (void *)s, (void *)ip, (void *)cur,
+ (void *)a0, (void *)a1, (void *)a2, (void *)a3,
+ (void *)a4, (void *)a5, (void *)a6, (void *)a7,
+ (void *)a8, (void *)a9, (void *)a10);
+ ktrace_enter(ip->i_btrace,
+ (void *)((__psint_t)type | (whichfork << 8) | (line << 16)),
+ (void *)func, (void *)s, (void *)ip, (void *)cur,
+ (void *)a0, (void *)a1, (void *)a2, (void *)a3,
+ (void *)a4, (void *)a5, (void *)a6, (void *)a7,
+ (void *)a8, (void *)a9, (void *)a10);
+}
+
+STATIC void
+xfs_bmbt_trace_cursor(
+ struct xfs_btree_cur *cur,
+ __uint32_t *s0,
+ __uint64_t *l0,
+ __uint64_t *l1)
+{
+ struct xfs_bmbt_rec_host r;
+
+ xfs_bmbt_set_all(&r, &cur->bc_rec.b);
+
+ *s0 = (cur->bc_nlevels << 24) |
+ (cur->bc_private.b.flags << 16) |
+ cur->bc_private.b.allocated;
+ *l0 = r.l0;
+ *l1 = r.l1;
+}
+
+STATIC void
+xfs_bmbt_trace_key(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *key,
+ __uint64_t *l0,
+ __uint64_t *l1)
+{
+ *l0 = be64_to_cpu(key->bmbt.br_startoff);
+ *l1 = 0;
+}
+
+STATIC void
+xfs_bmbt_trace_record(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *rec,
+ __uint64_t *l0,
+ __uint64_t *l1,
+ __uint64_t *l2)
+{
+ struct xfs_bmbt_irec irec;
+
+ xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
+ *l0 = irec.br_startoff;
+ *l1 = irec.br_startblock;
+ *l2 = irec.br_blockcount;
+}
+#endif /* XFS_BTREE_TRACE */
+
+static const struct xfs_btree_ops xfs_bmbt_ops = {
+ .rec_len = sizeof(xfs_bmbt_rec_t),
+ .key_len = sizeof(xfs_bmbt_key_t),
+
+ .dup_cursor = xfs_bmbt_dup_cursor,
+ .update_cursor = xfs_bmbt_update_cursor,
+ .alloc_block = xfs_bmbt_alloc_block,
+ .free_block = xfs_bmbt_free_block,
+ .get_maxrecs = xfs_bmbt_get_maxrecs,
+ .get_minrecs = xfs_bmbt_get_minrecs,
+ .get_dmaxrecs = xfs_bmbt_get_dmaxrecs,
+ .init_key_from_rec = xfs_bmbt_init_key_from_rec,
+ .init_rec_from_key = xfs_bmbt_init_rec_from_key,
+ .init_rec_from_cur = xfs_bmbt_init_rec_from_cur,
+ .init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur,
+ .key_diff = xfs_bmbt_key_diff,
+
+#ifdef DEBUG
+ .keys_inorder = xfs_bmbt_keys_inorder,
+ .recs_inorder = xfs_bmbt_recs_inorder,
+#endif
+
+#ifdef XFS_BTREE_TRACE
+ .trace_enter = xfs_bmbt_trace_enter,
+ .trace_cursor = xfs_bmbt_trace_cursor,
+ .trace_key = xfs_bmbt_trace_key,
+ .trace_record = xfs_bmbt_trace_record,
+#endif
+};
+
+/*
+ * Allocate a new bmap btree cursor.
+ */
+struct xfs_btree_cur * /* new bmap btree cursor */
+xfs_bmbt_init_cursor(
+ struct xfs_mount *mp, /* file system mount point */
+ struct xfs_trans *tp, /* transaction pointer */
+ struct xfs_inode *ip, /* inode owning the btree */
+ int whichfork) /* data or attr fork */
+{
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
+ struct xfs_btree_cur *cur;
+
+ cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
+
+ cur->bc_tp = tp;
+ cur->bc_mp = mp;
+ cur->bc_nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1;
+ cur->bc_btnum = XFS_BTNUM_BMAP;
+ cur->bc_blocklog = mp->m_sb.sb_blocklog;
+
+ cur->bc_ops = &xfs_bmbt_ops;
+ cur->bc_flags = XFS_BTREE_LONG_PTRS | XFS_BTREE_ROOT_IN_INODE;
+
+ cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork);
+ cur->bc_private.b.ip = ip;
+ cur->bc_private.b.firstblock = NULLFSBLOCK;
+ cur->bc_private.b.flist = NULL;
+ cur->bc_private.b.allocated = 0;
+ cur->bc_private.b.flags = 0;
+ cur->bc_private.b.whichfork = whichfork;
+
+ return cur;
+}
+
+/*
+ * Calculate number of records in a bmap btree block.
+ */
+int
+xfs_bmbt_maxrecs(
+ struct xfs_mount *mp,
+ int blocklen,
+ int leaf)
+{
+ blocklen -= XFS_BMBT_BLOCK_LEN(mp);
+
+ if (leaf)
+ return blocklen / sizeof(xfs_bmbt_rec_t);
+ return blocklen / (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t));
+}
+
+/*
+ * Calculate number of records in a bmap btree inode root.
+ */
+int
+xfs_bmdr_maxrecs(
+ struct xfs_mount *mp,
+ int blocklen,
+ int leaf)
+{
+ blocklen -= sizeof(xfs_bmdr_block_t);
+
+ if (leaf)
+ return blocklen / sizeof(xfs_bmdr_rec_t);
+ return blocklen / (sizeof(xfs_bmdr_key_t) + sizeof(xfs_bmdr_ptr_t));
+}
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index cd0d4b4bb816..a4555abb6622 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -21,9 +21,10 @@
#define XFS_BMAP_MAGIC 0x424d4150 /* 'BMAP' */
struct xfs_btree_cur;
-struct xfs_btree_lblock;
+struct xfs_btree_block;
struct xfs_mount;
struct xfs_inode;
+struct xfs_trans;
/*
* Bmap root header, on-disk form only.
@@ -145,71 +146,60 @@ typedef struct xfs_bmbt_key {
/* btree pointer type */
typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t;
-/* btree block header type */
-typedef struct xfs_btree_lblock xfs_bmbt_block_t;
-
-#define XFS_BUF_TO_BMBT_BLOCK(bp) ((xfs_bmbt_block_t *)XFS_BUF_PTR(bp))
-
-#define XFS_BMAP_RBLOCK_DSIZE(lev,cur) ((cur)->bc_private.b.forksize)
-#define XFS_BMAP_RBLOCK_ISIZE(lev,cur) \
- ((int)XFS_IFORK_PTR((cur)->bc_private.b.ip, \
- (cur)->bc_private.b.whichfork)->if_broot_bytes)
-
-#define XFS_BMAP_BLOCK_DMAXRECS(lev,cur) \
- (((lev) == (cur)->bc_nlevels - 1 ? \
- XFS_BTREE_BLOCK_MAXRECS(XFS_BMAP_RBLOCK_DSIZE(lev,cur), \
- xfs_bmdr, (lev) == 0) : \
- ((cur)->bc_mp->m_bmap_dmxr[(lev) != 0])))
-#define XFS_BMAP_BLOCK_IMAXRECS(lev,cur) \
- (((lev) == (cur)->bc_nlevels - 1 ? \
- XFS_BTREE_BLOCK_MAXRECS(XFS_BMAP_RBLOCK_ISIZE(lev,cur),\
- xfs_bmbt, (lev) == 0) : \
- ((cur)->bc_mp->m_bmap_dmxr[(lev) != 0])))
-
-#define XFS_BMAP_BLOCK_DMINRECS(lev,cur) \
- (((lev) == (cur)->bc_nlevels - 1 ? \
- XFS_BTREE_BLOCK_MINRECS(XFS_BMAP_RBLOCK_DSIZE(lev,cur),\
- xfs_bmdr, (lev) == 0) : \
- ((cur)->bc_mp->m_bmap_dmnr[(lev) != 0])))
-#define XFS_BMAP_BLOCK_IMINRECS(lev,cur) \
- (((lev) == (cur)->bc_nlevels - 1 ? \
- XFS_BTREE_BLOCK_MINRECS(XFS_BMAP_RBLOCK_ISIZE(lev,cur),\
- xfs_bmbt, (lev) == 0) : \
- ((cur)->bc_mp->m_bmap_dmnr[(lev) != 0])))
-
-#define XFS_BMAP_REC_DADDR(bb,i,cur) (XFS_BTREE_REC_ADDR(xfs_bmbt, bb, i))
-
-#define XFS_BMAP_REC_IADDR(bb,i,cur) (XFS_BTREE_REC_ADDR(xfs_bmbt, bb, i))
-
-#define XFS_BMAP_KEY_DADDR(bb,i,cur) \
- (XFS_BTREE_KEY_ADDR(xfs_bmbt, bb, i))
-
-#define XFS_BMAP_KEY_IADDR(bb,i,cur) \
- (XFS_BTREE_KEY_ADDR(xfs_bmbt, bb, i))
-
-#define XFS_BMAP_PTR_DADDR(bb,i,cur) \
- (XFS_BTREE_PTR_ADDR(xfs_bmbt, bb, i, XFS_BMAP_BLOCK_DMAXRECS( \
- be16_to_cpu((bb)->bb_level), cur)))
-#define XFS_BMAP_PTR_IADDR(bb,i,cur) \
- (XFS_BTREE_PTR_ADDR(xfs_bmbt, bb, i, XFS_BMAP_BLOCK_IMAXRECS( \
- be16_to_cpu((bb)->bb_level), cur)))
+/*
+ * Btree block header size depends on a superblock flag.
+ *
+ * (not quite yet, but soon)
+ */
+#define XFS_BMBT_BLOCK_LEN(mp) XFS_BTREE_LBLOCK_LEN
+
+#define XFS_BMBT_REC_ADDR(mp, block, index) \
+ ((xfs_bmbt_rec_t *) \
+ ((char *)(block) + \
+ XFS_BMBT_BLOCK_LEN(mp) + \
+ ((index) - 1) * sizeof(xfs_bmbt_rec_t)))
+
+#define XFS_BMBT_KEY_ADDR(mp, block, index) \
+ ((xfs_bmbt_key_t *) \
+ ((char *)(block) + \
+ XFS_BMBT_BLOCK_LEN(mp) + \
+ ((index) - 1) * sizeof(xfs_bmbt_key_t)))
+
+#define XFS_BMBT_PTR_ADDR(mp, block, index, maxrecs) \
+ ((xfs_bmbt_ptr_t *) \
+ ((char *)(block) + \
+ XFS_BMBT_BLOCK_LEN(mp) + \
+ (maxrecs) * sizeof(xfs_bmbt_key_t) + \
+ ((index) - 1) * sizeof(xfs_bmbt_ptr_t)))
+
+#define XFS_BMDR_REC_ADDR(block, index) \
+ ((xfs_bmdr_rec_t *) \
+ ((char *)(block) + \
+ sizeof(struct xfs_bmdr_block) + \
+ ((index) - 1) * sizeof(xfs_bmdr_rec_t)))
+
+#define XFS_BMDR_KEY_ADDR(block, index) \
+ ((xfs_bmdr_key_t *) \
+ ((char *)(block) + \
+ sizeof(struct xfs_bmdr_block) + \
+ ((index) - 1) * sizeof(xfs_bmdr_key_t)))
+
+#define XFS_BMDR_PTR_ADDR(block, index, maxrecs) \
+ ((xfs_bmdr_ptr_t *) \
+ ((char *)(block) + \
+ sizeof(struct xfs_bmdr_block) + \
+ (maxrecs) * sizeof(xfs_bmdr_key_t) + \
+ ((index) - 1) * sizeof(xfs_bmdr_ptr_t)))
/*
* These are to be used when we know the size of the block and
* we don't have a cursor.
*/
-#define XFS_BMAP_BROOT_REC_ADDR(bb,i,sz) \
- (XFS_BTREE_REC_ADDR(xfs_bmbt,bb,i))
-#define XFS_BMAP_BROOT_KEY_ADDR(bb,i,sz) \
- (XFS_BTREE_KEY_ADDR(xfs_bmbt,bb,i))
-#define XFS_BMAP_BROOT_PTR_ADDR(bb,i,sz) \
- (XFS_BTREE_PTR_ADDR(xfs_bmbt,bb,i,XFS_BMAP_BROOT_MAXRECS(sz)))
-
-#define XFS_BMAP_BROOT_NUMRECS(bb) be16_to_cpu((bb)->bb_numrecs)
-#define XFS_BMAP_BROOT_MAXRECS(sz) XFS_BTREE_BLOCK_MAXRECS(sz,xfs_bmbt,0)
+#define XFS_BMAP_BROOT_PTR_ADDR(mp, bb, i, sz) \
+ XFS_BMBT_PTR_ADDR(mp, bb, i, xfs_bmbt_maxrecs(mp, sz, 0))
#define XFS_BMAP_BROOT_SPACE_CALC(nrecs) \
- (int)(sizeof(xfs_bmbt_block_t) + \
+ (int)(XFS_BTREE_LBLOCK_LEN + \
((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t))))
#define XFS_BMAP_BROOT_SPACE(bb) \
@@ -223,42 +213,12 @@ typedef struct xfs_btree_lblock xfs_bmbt_block_t;
*/
#define XFS_BM_MAXLEVELS(mp,w) ((mp)->m_bm_maxlevels[(w)])
-#define XFS_BMAP_SANITY_CHECK(mp,bb,level) \
- (be32_to_cpu((bb)->bb_magic) == XFS_BMAP_MAGIC && \
- be16_to_cpu((bb)->bb_level) == level && \
- be16_to_cpu((bb)->bb_numrecs) > 0 && \
- be16_to_cpu((bb)->bb_numrecs) <= (mp)->m_bmap_dmxr[(level) != 0])
-
-
-#ifdef __KERNEL__
-
-#if defined(XFS_BMBT_TRACE)
-/*
- * Trace buffer entry types.
- */
-#define XFS_BMBT_KTRACE_ARGBI 1
-#define XFS_BMBT_KTRACE_ARGBII 2
-#define XFS_BMBT_KTRACE_ARGFFFI 3
-#define XFS_BMBT_KTRACE_ARGI 4
-#define XFS_BMBT_KTRACE_ARGIFK 5
-#define XFS_BMBT_KTRACE_ARGIFR 6
-#define XFS_BMBT_KTRACE_ARGIK 7
-#define XFS_BMBT_KTRACE_CUR 8
-
-#define XFS_BMBT_TRACE_SIZE 4096 /* size of global trace buffer */
-#define XFS_BMBT_KTRACE_SIZE 32 /* size of per-inode trace buffer */
-extern ktrace_t *xfs_bmbt_trace_buf;
-#endif
-
/*
* Prototypes for xfs_bmap.c to call.
*/
-extern void xfs_bmdr_to_bmbt(xfs_bmdr_block_t *, int, xfs_bmbt_block_t *, int);
-extern int xfs_bmbt_decrement(struct xfs_btree_cur *, int, int *);
-extern int xfs_bmbt_delete(struct xfs_btree_cur *, int *);
+extern void xfs_bmdr_to_bmbt(struct xfs_mount *, xfs_bmdr_block_t *, int,
+ struct xfs_btree_block *, int);
extern void xfs_bmbt_get_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s);
-extern xfs_bmbt_block_t *xfs_bmbt_get_block(struct xfs_btree_cur *cur,
- int, struct xfs_buf **bpp);
extern xfs_filblks_t xfs_bmbt_get_blockcount(xfs_bmbt_rec_host_t *r);
extern xfs_fsblock_t xfs_bmbt_get_startblock(xfs_bmbt_rec_host_t *r);
extern xfs_fileoff_t xfs_bmbt_get_startoff(xfs_bmbt_rec_host_t *r);
@@ -268,22 +228,6 @@ extern void xfs_bmbt_disk_get_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s);
extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r);
extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r);
-extern int xfs_bmbt_increment(struct xfs_btree_cur *, int, int *);
-extern int xfs_bmbt_insert(struct xfs_btree_cur *, int *);
-extern void xfs_bmbt_log_block(struct xfs_btree_cur *, struct xfs_buf *, int);
-extern void xfs_bmbt_log_recs(struct xfs_btree_cur *, struct xfs_buf *, int,
- int);
-extern int xfs_bmbt_lookup_eq(struct xfs_btree_cur *, xfs_fileoff_t,
- xfs_fsblock_t, xfs_filblks_t, int *);
-extern int xfs_bmbt_lookup_ge(struct xfs_btree_cur *, xfs_fileoff_t,
- xfs_fsblock_t, xfs_filblks_t, int *);
-
-/*
- * Give the bmap btree a new root block. Copy the old broot contents
- * down into a real block and make the broot point to it.
- */
-extern int xfs_bmbt_newroot(struct xfs_btree_cur *cur, int *lflags, int *stat);
-
extern void xfs_bmbt_set_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s);
extern void xfs_bmbt_set_allf(xfs_bmbt_rec_host_t *r, xfs_fileoff_t o,
xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v);
@@ -296,10 +240,15 @@ extern void xfs_bmbt_disk_set_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s);
extern void xfs_bmbt_disk_set_allf(xfs_bmbt_rec_t *r, xfs_fileoff_t o,
xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v);
-extern void xfs_bmbt_to_bmdr(xfs_bmbt_block_t *, int, xfs_bmdr_block_t *, int);
-extern int xfs_bmbt_update(struct xfs_btree_cur *, xfs_fileoff_t,
- xfs_fsblock_t, xfs_filblks_t, xfs_exntst_t);
+extern void xfs_bmbt_to_bmdr(struct xfs_mount *, struct xfs_btree_block *, int,
+ xfs_bmdr_block_t *, int);
+
+extern int xfs_bmbt_get_maxrecs(struct xfs_btree_cur *, int level);
+extern int xfs_bmdr_maxrecs(struct xfs_mount *, int blocklen, int leaf);
+extern int xfs_bmbt_maxrecs(struct xfs_mount *, int blocklen, int leaf);
+
+extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *,
+ struct xfs_trans *, struct xfs_inode *, int);
-#endif /* __KERNEL__ */
#endif /* __XFS_BMAP_BTREE_H__ */
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index cc593a84c345..7ed59267420d 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -34,7 +34,9 @@
#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
+#include "xfs_inode_item.h"
#include "xfs_btree.h"
+#include "xfs_btree_trace.h"
#include "xfs_ialloc.h"
#include "xfs_error.h"
@@ -50,135 +52,33 @@ const __uint32_t xfs_magics[XFS_BTNUM_MAX] = {
XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC
};
-/*
- * Checking routine: return maxrecs for the block.
- */
-STATIC int /* number of records fitting in block */
-xfs_btree_maxrecs(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_btree_block_t *block) /* generic btree block pointer */
-{
- switch (cur->bc_btnum) {
- case XFS_BTNUM_BNO:
- case XFS_BTNUM_CNT:
- return (int)XFS_ALLOC_BLOCK_MAXRECS(
- be16_to_cpu(block->bb_h.bb_level), cur);
- case XFS_BTNUM_BMAP:
- return (int)XFS_BMAP_BLOCK_IMAXRECS(
- be16_to_cpu(block->bb_h.bb_level), cur);
- case XFS_BTNUM_INO:
- return (int)XFS_INOBT_BLOCK_MAXRECS(
- be16_to_cpu(block->bb_h.bb_level), cur);
- default:
- ASSERT(0);
- return 0;
- }
-}
-
-/*
- * External routines.
- */
-
-#ifdef DEBUG
-/*
- * Debug routine: check that block header is ok.
- */
-void
-xfs_btree_check_block(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_btree_block_t *block, /* generic btree block pointer */
- int level, /* level of the btree block */
- xfs_buf_t *bp) /* buffer containing block, if any */
-{
- if (XFS_BTREE_LONG_PTRS(cur->bc_btnum))
- xfs_btree_check_lblock(cur, (xfs_btree_lblock_t *)block, level,
- bp);
- else
- xfs_btree_check_sblock(cur, (xfs_btree_sblock_t *)block, level,
- bp);
-}
-
-/*
- * Debug routine: check that keys are in the right order.
- */
-void
-xfs_btree_check_key(
- xfs_btnum_t btnum, /* btree identifier */
- void *ak1, /* pointer to left (lower) key */
- void *ak2) /* pointer to right (higher) key */
-{
- switch (btnum) {
- case XFS_BTNUM_BNO: {
- xfs_alloc_key_t *k1;
- xfs_alloc_key_t *k2;
-
- k1 = ak1;
- k2 = ak2;
- ASSERT(be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock));
- break;
- }
- case XFS_BTNUM_CNT: {
- xfs_alloc_key_t *k1;
- xfs_alloc_key_t *k2;
-
- k1 = ak1;
- k2 = ak2;
- ASSERT(be32_to_cpu(k1->ar_blockcount) < be32_to_cpu(k2->ar_blockcount) ||
- (k1->ar_blockcount == k2->ar_blockcount &&
- be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock)));
- break;
- }
- case XFS_BTNUM_BMAP: {
- xfs_bmbt_key_t *k1;
- xfs_bmbt_key_t *k2;
-
- k1 = ak1;
- k2 = ak2;
- ASSERT(be64_to_cpu(k1->br_startoff) < be64_to_cpu(k2->br_startoff));
- break;
- }
- case XFS_BTNUM_INO: {
- xfs_inobt_key_t *k1;
- xfs_inobt_key_t *k2;
-
- k1 = ak1;
- k2 = ak2;
- ASSERT(be32_to_cpu(k1->ir_startino) < be32_to_cpu(k2->ir_startino));
- break;
- }
- default:
- ASSERT(0);
- }
-}
-#endif /* DEBUG */
-/*
- * Checking routine: check that long form block header is ok.
- */
-/* ARGSUSED */
-int /* error (0 or EFSCORRUPTED) */
+STATIC int /* error (0 or EFSCORRUPTED) */
xfs_btree_check_lblock(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_btree_lblock_t *block, /* btree long form block pointer */
+ struct xfs_btree_cur *cur, /* btree cursor */
+ struct xfs_btree_block *block, /* btree long form block pointer */
int level, /* level of the btree block */
- xfs_buf_t *bp) /* buffer for block, if any */
+ struct xfs_buf *bp) /* buffer for block, if any */
{
int lblock_ok; /* block passes checks */
- xfs_mount_t *mp; /* file system mount point */
+ struct xfs_mount *mp; /* file system mount point */
mp = cur->bc_mp;
lblock_ok =
be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
be16_to_cpu(block->bb_level) == level &&
be16_to_cpu(block->bb_numrecs) <=
- xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) &&
- block->bb_leftsib &&
- (be64_to_cpu(block->bb_leftsib) == NULLDFSBNO ||
- XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_leftsib))) &&
- block->bb_rightsib &&
- (be64_to_cpu(block->bb_rightsib) == NULLDFSBNO ||
- XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_rightsib)));
- if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp, XFS_ERRTAG_BTREE_CHECK_LBLOCK,
+ cur->bc_ops->get_maxrecs(cur, level) &&
+ block->bb_u.l.bb_leftsib &&
+ (be64_to_cpu(block->bb_u.l.bb_leftsib) == NULLDFSBNO ||
+ XFS_FSB_SANITY_CHECK(mp,
+ be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
+ block->bb_u.l.bb_rightsib &&
+ (be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO ||
+ XFS_FSB_SANITY_CHECK(mp,
+ be64_to_cpu(block->bb_u.l.bb_rightsib)));
+ if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
+ XFS_ERRTAG_BTREE_CHECK_LBLOCK,
XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
if (bp)
xfs_buftrace("LBTREE ERROR", bp);
@@ -189,98 +89,15 @@ xfs_btree_check_lblock(
return 0;
}
-/*
- * Checking routine: check that (long) pointer is ok.
- */
-int /* error (0 or EFSCORRUPTED) */
-xfs_btree_check_lptr(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_dfsbno_t ptr, /* btree block disk address */
- int level) /* btree block level */
-{
- xfs_mount_t *mp; /* file system mount point */
-
- mp = cur->bc_mp;
- XFS_WANT_CORRUPTED_RETURN(
- level > 0 &&
- ptr != NULLDFSBNO &&
- XFS_FSB_SANITY_CHECK(mp, ptr));
- return 0;
-}
-
-#ifdef DEBUG
-/*
- * Debug routine: check that records are in the right order.
- */
-void
-xfs_btree_check_rec(
- xfs_btnum_t btnum, /* btree identifier */
- void *ar1, /* pointer to left (lower) record */
- void *ar2) /* pointer to right (higher) record */
-{
- switch (btnum) {
- case XFS_BTNUM_BNO: {
- xfs_alloc_rec_t *r1;
- xfs_alloc_rec_t *r2;
-
- r1 = ar1;
- r2 = ar2;
- ASSERT(be32_to_cpu(r1->ar_startblock) +
- be32_to_cpu(r1->ar_blockcount) <=
- be32_to_cpu(r2->ar_startblock));
- break;
- }
- case XFS_BTNUM_CNT: {
- xfs_alloc_rec_t *r1;
- xfs_alloc_rec_t *r2;
-
- r1 = ar1;
- r2 = ar2;
- ASSERT(be32_to_cpu(r1->ar_blockcount) < be32_to_cpu(r2->ar_blockcount) ||
- (r1->ar_blockcount == r2->ar_blockcount &&
- be32_to_cpu(r1->ar_startblock) < be32_to_cpu(r2->ar_startblock)));
- break;
- }
- case XFS_BTNUM_BMAP: {
- xfs_bmbt_rec_t *r1;
- xfs_bmbt_rec_t *r2;
-
- r1 = ar1;
- r2 = ar2;
- ASSERT(xfs_bmbt_disk_get_startoff(r1) +
- xfs_bmbt_disk_get_blockcount(r1) <=
- xfs_bmbt_disk_get_startoff(r2));
- break;
- }
- case XFS_BTNUM_INO: {
- xfs_inobt_rec_t *r1;
- xfs_inobt_rec_t *r2;
-
- r1 = ar1;
- r2 = ar2;
- ASSERT(be32_to_cpu(r1->ir_startino) + XFS_INODES_PER_CHUNK <=
- be32_to_cpu(r2->ir_startino));
- break;
- }
- default:
- ASSERT(0);
- }
-}
-#endif /* DEBUG */
-
-/*
- * Checking routine: check that block header is ok.
- */
-/* ARGSUSED */
-int /* error (0 or EFSCORRUPTED) */
+STATIC int /* error (0 or EFSCORRUPTED) */
xfs_btree_check_sblock(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_btree_sblock_t *block, /* btree short form block pointer */
+ struct xfs_btree_cur *cur, /* btree cursor */
+ struct xfs_btree_block *block, /* btree short form block pointer */
int level, /* level of the btree block */
- xfs_buf_t *bp) /* buffer containing block */
+ struct xfs_buf *bp) /* buffer containing block */
{
- xfs_buf_t *agbp; /* buffer for ag. freespace struct */
- xfs_agf_t *agf; /* ag. freespace structure */
+ struct xfs_buf *agbp; /* buffer for ag. freespace struct */
+ struct xfs_agf *agf; /* ag. freespace structure */
xfs_agblock_t agflen; /* native ag. freespace length */
int sblock_ok; /* block passes checks */
@@ -291,13 +108,13 @@ xfs_btree_check_sblock(
be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
be16_to_cpu(block->bb_level) == level &&
be16_to_cpu(block->bb_numrecs) <=
- xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) &&
- (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK ||
- be32_to_cpu(block->bb_leftsib) < agflen) &&
- block->bb_leftsib &&
- (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK ||
- be32_to_cpu(block->bb_rightsib) < agflen) &&
- block->bb_rightsib;
+ cur->bc_ops->get_maxrecs(cur, level) &&
+ (be32_to_cpu(block->bb_u.s.bb_leftsib) == NULLAGBLOCK ||
+ be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) &&
+ block->bb_u.s.bb_leftsib &&
+ (be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK ||
+ be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&
+ block->bb_u.s.bb_rightsib;
if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
XFS_ERRTAG_BTREE_CHECK_SBLOCK,
XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
@@ -311,27 +128,78 @@ xfs_btree_check_sblock(
}
/*
- * Checking routine: check that (short) pointer is ok.
+ * Debug routine: check that block header is ok.
+ */
+int
+xfs_btree_check_block(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ struct xfs_btree_block *block, /* generic btree block pointer */
+ int level, /* level of the btree block */
+ struct xfs_buf *bp) /* buffer containing block, if any */
+{
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+ return xfs_btree_check_lblock(cur, block, level, bp);
+ else
+ return xfs_btree_check_sblock(cur, block, level, bp);
+}
+
+/*
+ * Check that (long) pointer is ok.
*/
int /* error (0 or EFSCORRUPTED) */
+xfs_btree_check_lptr(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_dfsbno_t bno, /* btree block disk address */
+ int level) /* btree block level */
+{
+ XFS_WANT_CORRUPTED_RETURN(
+ level > 0 &&
+ bno != NULLDFSBNO &&
+ XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
+ return 0;
+}
+
+#ifdef DEBUG
+/*
+ * Check that (short) pointer is ok.
+ */
+STATIC int /* error (0 or EFSCORRUPTED) */
xfs_btree_check_sptr(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_agblock_t ptr, /* btree block disk address */
- int level) /* btree block level */
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agblock_t bno, /* btree block disk address */
+ int level) /* btree block level */
{
- xfs_buf_t *agbp; /* buffer for ag. freespace struct */
- xfs_agf_t *agf; /* ag. freespace structure */
+ xfs_agblock_t agblocks = cur->bc_mp->m_sb.sb_agblocks;
- agbp = cur->bc_private.a.agbp;
- agf = XFS_BUF_TO_AGF(agbp);
XFS_WANT_CORRUPTED_RETURN(
level > 0 &&
- ptr != NULLAGBLOCK && ptr != 0 &&
- ptr < be32_to_cpu(agf->agf_length));
+ bno != NULLAGBLOCK &&
+ bno != 0 &&
+ bno < agblocks);
return 0;
}
/*
+ * Check that block ptr is ok.
+ */
+STATIC int /* error (0 or EFSCORRUPTED) */
+xfs_btree_check_ptr(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ union xfs_btree_ptr *ptr, /* btree block disk address */
+ int index, /* offset from ptr to check */
+ int level) /* btree block level */
+{
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+ return xfs_btree_check_lptr(cur,
+ be64_to_cpu((&ptr->l)[index]), level);
+ } else {
+ return xfs_btree_check_sptr(cur,
+ be32_to_cpu((&ptr->s)[index]), level);
+ }
+}
+#endif
+
+/*
* Delete the btree cursor.
*/
void
@@ -387,16 +255,17 @@ xfs_btree_dup_cursor(
tp = cur->bc_tp;
mp = cur->bc_mp;
+
/*
* Allocate a new cursor like the old one.
*/
- new = xfs_btree_init_cursor(mp, tp, cur->bc_private.a.agbp,
- cur->bc_private.a.agno, cur->bc_btnum, cur->bc_private.b.ip,
- cur->bc_private.b.whichfork);
+ new = cur->bc_ops->dup_cursor(cur);
+
/*
* Copy the record currently in the cursor.
*/
new->bc_rec = cur->bc_rec;
+
/*
* For each level current, re-get the buffer and copy the ptr value.
*/
@@ -416,46 +285,174 @@ xfs_btree_dup_cursor(
} else
new->bc_bufs[i] = NULL;
}
- /*
- * For bmap btrees, copy the firstblock, flist, and flags values,
- * since init cursor doesn't get them.
- */
- if (new->bc_btnum == XFS_BTNUM_BMAP) {
- new->bc_private.b.firstblock = cur->bc_private.b.firstblock;
- new->bc_private.b.flist = cur->bc_private.b.flist;
- new->bc_private.b.flags = cur->bc_private.b.flags;
- }
*ncur = new;
return 0;
}
/*
+ * XFS btree block layout and addressing:
+ *
+ * There are two types of blocks in the btree: leaf and non-leaf blocks.
+ *
+ * The leaf record start with a header then followed by records containing
+ * the values. A non-leaf block also starts with the same header, and
+ * then first contains lookup keys followed by an equal number of pointers
+ * to the btree blocks at the previous level.
+ *
+ * +--------+-------+-------+-------+-------+-------+-------+
+ * Leaf: | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
+ * +--------+-------+-------+-------+-------+-------+-------+
+ *
+ * +--------+-------+-------+-------+-------+-------+-------+
+ * Non-Leaf: | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
+ * +--------+-------+-------+-------+-------+-------+-------+
+ *
+ * The header is called struct xfs_btree_block for reasons better left unknown
+ * and comes in different versions for short (32bit) and long (64bit) block
+ * pointers. The record and key structures are defined by the btree instances
+ * and opaque to the btree core. The block pointers are simple disk endian
+ * integers, available in a short (32bit) and long (64bit) variant.
+ *
+ * The helpers below calculate the offset of a given record, key or pointer
+ * into a btree block (xfs_btree_*_offset) or return a pointer to the given
+ * record, key or pointer (xfs_btree_*_addr). Note that all addressing
+ * inside the btree block is done using indices starting at one, not zero!
+ */
+
+/*
+ * Return size of the btree block header for this btree instance.
+ */
+static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
+{
+ return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
+ XFS_BTREE_LBLOCK_LEN :
+ XFS_BTREE_SBLOCK_LEN;
+}
+
+/*
+ * Return size of btree block pointers for this btree instance.
+ */
+static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
+{
+ return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
+ sizeof(__be64) : sizeof(__be32);
+}
+
+/*
+ * Calculate offset of the n-th record in a btree block.
+ */
+STATIC size_t
+xfs_btree_rec_offset(
+ struct xfs_btree_cur *cur,
+ int n)
+{
+ return xfs_btree_block_len(cur) +
+ (n - 1) * cur->bc_ops->rec_len;
+}
+
+/*
+ * Calculate offset of the n-th key in a btree block.
+ */
+STATIC size_t
+xfs_btree_key_offset(
+ struct xfs_btree_cur *cur,
+ int n)
+{
+ return xfs_btree_block_len(cur) +
+ (n - 1) * cur->bc_ops->key_len;
+}
+
+/*
+ * Calculate offset of the n-th block pointer in a btree block.
+ */
+STATIC size_t
+xfs_btree_ptr_offset(
+ struct xfs_btree_cur *cur,
+ int n,
+ int level)
+{
+ return xfs_btree_block_len(cur) +
+ cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
+ (n - 1) * xfs_btree_ptr_len(cur);
+}
+
+/*
+ * Return a pointer to the n-th record in the btree block.
+ */
+STATIC union xfs_btree_rec *
+xfs_btree_rec_addr(
+ struct xfs_btree_cur *cur,
+ int n,
+ struct xfs_btree_block *block)
+{
+ return (union xfs_btree_rec *)
+ ((char *)block + xfs_btree_rec_offset(cur, n));
+}
+
+/*
+ * Return a pointer to the n-th key in the btree block.
+ */
+STATIC union xfs_btree_key *
+xfs_btree_key_addr(
+ struct xfs_btree_cur *cur,
+ int n,
+ struct xfs_btree_block *block)
+{
+ return (union xfs_btree_key *)
+ ((char *)block + xfs_btree_key_offset(cur, n));
+}
+
+/*
+ * Return a pointer to the n-th block pointer in the btree block.
+ */
+STATIC union xfs_btree_ptr *
+xfs_btree_ptr_addr(
+ struct xfs_btree_cur *cur,
+ int n,
+ struct xfs_btree_block *block)
+{
+ int level = xfs_btree_get_level(block);
+
+ ASSERT(block->bb_level != 0);
+
+ return (union xfs_btree_ptr *)
+ ((char *)block + xfs_btree_ptr_offset(cur, n, level));
+}
+
+/*
+ * Get a the root block which is stored in the inode.
+ *
+ * For now this btree implementation assumes the btree root is always
+ * stored in the if_broot field of an inode fork.
+ */
+STATIC struct xfs_btree_block *
+xfs_btree_get_iroot(
+ struct xfs_btree_cur *cur)
+{
+ struct xfs_ifork *ifp;
+
+ ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork);
+ return (struct xfs_btree_block *)ifp->if_broot;
+}
+
+/*
* Retrieve the block pointer from the cursor at the given level.
- * This may be a bmap btree root or from a buffer.
+ * This may be an inode btree root or from a buffer.
*/
-STATIC xfs_btree_block_t * /* generic btree block pointer */
+STATIC struct xfs_btree_block * /* generic btree block pointer */
xfs_btree_get_block(
- xfs_btree_cur_t *cur, /* btree cursor */
+ struct xfs_btree_cur *cur, /* btree cursor */
int level, /* level in btree */
- xfs_buf_t **bpp) /* buffer containing the block */
-{
- xfs_btree_block_t *block; /* return value */
- xfs_buf_t *bp; /* return buffer */
- xfs_ifork_t *ifp; /* inode fork pointer */
- int whichfork; /* data or attr fork */
-
- if (cur->bc_btnum == XFS_BTNUM_BMAP && level == cur->bc_nlevels - 1) {
- whichfork = cur->bc_private.b.whichfork;
- ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, whichfork);
- block = (xfs_btree_block_t *)ifp->if_broot;
- bp = NULL;
- } else {
- bp = cur->bc_bufs[level];
- block = XFS_BUF_TO_BLOCK(bp);
+ struct xfs_buf **bpp) /* buffer containing the block */
+{
+ if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
+ (level == cur->bc_nlevels - 1)) {
+ *bpp = NULL;
+ return xfs_btree_get_iroot(cur);
}
- ASSERT(block != NULL);
- *bpp = bp;
- return block;
+
+ *bpp = cur->bc_bufs[level];
+ return XFS_BUF_TO_BLOCK(*bpp);
}
/*
@@ -505,97 +502,6 @@ xfs_btree_get_bufs(
}
/*
- * Allocate a new btree cursor.
- * The cursor is either for allocation (A) or bmap (B) or inodes (I).
- */
-xfs_btree_cur_t * /* new btree cursor */
-xfs_btree_init_cursor(
- xfs_mount_t *mp, /* file system mount point */
- xfs_trans_t *tp, /* transaction pointer */
- xfs_buf_t *agbp, /* (A only) buffer for agf structure */
- /* (I only) buffer for agi structure */
- xfs_agnumber_t agno, /* (AI only) allocation group number */
- xfs_btnum_t btnum, /* btree identifier */
- xfs_inode_t *ip, /* (B only) inode owning the btree */
- int whichfork) /* (B only) data or attr fork */
-{
- xfs_agf_t *agf; /* (A) allocation group freespace */
- xfs_agi_t *agi; /* (I) allocation group inodespace */
- xfs_btree_cur_t *cur; /* return value */
- xfs_ifork_t *ifp; /* (I) inode fork pointer */
- int nlevels=0; /* number of levels in the btree */
-
- ASSERT(xfs_btree_cur_zone != NULL);
- /*
- * Allocate a new cursor.
- */
- cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
- /*
- * Deduce the number of btree levels from the arguments.
- */
- switch (btnum) {
- case XFS_BTNUM_BNO:
- case XFS_BTNUM_CNT:
- agf = XFS_BUF_TO_AGF(agbp);
- nlevels = be32_to_cpu(agf->agf_levels[btnum]);
- break;
- case XFS_BTNUM_BMAP:
- ifp = XFS_IFORK_PTR(ip, whichfork);
- nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1;
- break;
- case XFS_BTNUM_INO:
- agi = XFS_BUF_TO_AGI(agbp);
- nlevels = be32_to_cpu(agi->agi_level);
- break;
- default:
- ASSERT(0);
- }
- /*
- * Fill in the common fields.
- */
- cur->bc_tp = tp;
- cur->bc_mp = mp;
- cur->bc_nlevels = nlevels;
- cur->bc_btnum = btnum;
- cur->bc_blocklog = mp->m_sb.sb_blocklog;
- /*
- * Fill in private fields.
- */
- switch (btnum) {
- case XFS_BTNUM_BNO:
- case XFS_BTNUM_CNT:
- /*
- * Allocation btree fields.
- */
- cur->bc_private.a.agbp = agbp;
- cur->bc_private.a.agno = agno;
- break;
- case XFS_BTNUM_INO:
- /*
- * Inode allocation btree fields.
- */
- cur->bc_private.a.agbp = agbp;
- cur->bc_private.a.agno = agno;
- break;
- case XFS_BTNUM_BMAP:
- /*
- * Bmap btree fields.
- */
- cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork);
- cur->bc_private.b.ip = ip;
- cur->bc_private.b.firstblock = NULLFSBLOCK;
- cur->bc_private.b.flist = NULL;
- cur->bc_private.b.allocated = 0;
- cur->bc_private.b.flags = 0;
- cur->bc_private.b.whichfork = whichfork;
- break;
- default:
- ASSERT(0);
- }
- return cur;
-}
-
-/*
* Check for the cursor referring to the last block at the given level.
*/
int /* 1=is last block, 0=not last block */
@@ -603,12 +509,12 @@ xfs_btree_islastblock(
xfs_btree_cur_t *cur, /* btree cursor */
int level) /* level to check */
{
- xfs_btree_block_t *block; /* generic btree block pointer */
+ struct xfs_btree_block *block; /* generic btree block pointer */
xfs_buf_t *bp; /* buffer containing block */
block = xfs_btree_get_block(cur, level, &bp);
xfs_btree_check_block(cur, block, level, bp);
- if (XFS_BTREE_LONG_PTRS(cur->bc_btnum))
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO;
else
return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK;
@@ -618,12 +524,12 @@ xfs_btree_islastblock(
* Change the cursor to point to the first record at the given level.
* Other levels are unaffected.
*/
-int /* success=1, failure=0 */
+STATIC int /* success=1, failure=0 */
xfs_btree_firstrec(
xfs_btree_cur_t *cur, /* btree cursor */
int level) /* level to change */
{
- xfs_btree_block_t *block; /* generic btree block pointer */
+ struct xfs_btree_block *block; /* generic btree block pointer */
xfs_buf_t *bp; /* buffer containing block */
/*
@@ -634,7 +540,7 @@ xfs_btree_firstrec(
/*
* It's empty, there is no such record.
*/
- if (!block->bb_h.bb_numrecs)
+ if (!block->bb_numrecs)
return 0;
/*
* Set the ptr value to 1, that's the first record/key.
@@ -647,12 +553,12 @@ xfs_btree_firstrec(
* Change the cursor to point to the last record in the current block
* at the given level. Other levels are unaffected.
*/
-int /* success=1, failure=0 */
+STATIC int /* success=1, failure=0 */
xfs_btree_lastrec(
xfs_btree_cur_t *cur, /* btree cursor */
int level) /* level to change */
{
- xfs_btree_block_t *block; /* generic btree block pointer */
+ struct xfs_btree_block *block; /* generic btree block pointer */
xfs_buf_t *bp; /* buffer containing block */
/*
@@ -663,12 +569,12 @@ xfs_btree_lastrec(
/*
* It's empty, there is no such record.
*/
- if (!block->bb_h.bb_numrecs)
+ if (!block->bb_numrecs)
return 0;
/*
* Set the ptr value to numrecs, that's the last record/key.
*/
- cur->bc_ptrs[level] = be16_to_cpu(block->bb_h.bb_numrecs);
+ cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs);
return 1;
}
@@ -817,66 +723,84 @@ xfs_btree_reada_bufs(
xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
}
+STATIC int
+xfs_btree_readahead_lblock(
+ struct xfs_btree_cur *cur,
+ int lr,
+ struct xfs_btree_block *block)
+{
+ int rval = 0;
+ xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib);
+ xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib);
+
+ if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) {
+ xfs_btree_reada_bufl(cur->bc_mp, left, 1);
+ rval++;
+ }
+
+ if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) {
+ xfs_btree_reada_bufl(cur->bc_mp, right, 1);
+ rval++;
+ }
+
+ return rval;
+}
+
+STATIC int
+xfs_btree_readahead_sblock(
+ struct xfs_btree_cur *cur,
+ int lr,
+ struct xfs_btree_block *block)
+{
+ int rval = 0;
+ xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib);
+ xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib);
+
+
+ if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
+ xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
+ left, 1);
+ rval++;
+ }
+
+ if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
+ xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
+ right, 1);
+ rval++;
+ }
+
+ return rval;
+}
+
/*
* Read-ahead btree blocks, at the given level.
* Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
*/
-int
-xfs_btree_readahead_core(
- xfs_btree_cur_t *cur, /* btree cursor */
+STATIC int
+xfs_btree_readahead(
+ struct xfs_btree_cur *cur, /* btree cursor */
int lev, /* level in btree */
int lr) /* left/right bits */
{
- xfs_alloc_block_t *a;
- xfs_bmbt_block_t *b;
- xfs_inobt_block_t *i;
- int rval = 0;
+ struct xfs_btree_block *block;
+
+ /*
+ * No readahead needed if we are at the root level and the
+ * btree root is stored in the inode.
+ */
+ if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
+ (lev == cur->bc_nlevels - 1))
+ return 0;
+
+ if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
+ return 0;
- ASSERT(cur->bc_bufs[lev] != NULL);
cur->bc_ra[lev] |= lr;
- switch (cur->bc_btnum) {
- case XFS_BTNUM_BNO:
- case XFS_BTNUM_CNT:
- a = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[lev]);
- if ((lr & XFS_BTCUR_LEFTRA) && be32_to_cpu(a->bb_leftsib) != NULLAGBLOCK) {
- xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
- be32_to_cpu(a->bb_leftsib), 1);
- rval++;
- }
- if ((lr & XFS_BTCUR_RIGHTRA) && be32_to_cpu(a->bb_rightsib) != NULLAGBLOCK) {
- xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
- be32_to_cpu(a->bb_rightsib), 1);
- rval++;
- }
- break;
- case XFS_BTNUM_BMAP:
- b = XFS_BUF_TO_BMBT_BLOCK(cur->bc_bufs[lev]);
- if ((lr & XFS_BTCUR_LEFTRA) && be64_to_cpu(b->bb_leftsib) != NULLDFSBNO) {
- xfs_btree_reada_bufl(cur->bc_mp, be64_to_cpu(b->bb_leftsib), 1);
- rval++;
- }
- if ((lr & XFS_BTCUR_RIGHTRA) && be64_to_cpu(b->bb_rightsib) != NULLDFSBNO) {
- xfs_btree_reada_bufl(cur->bc_mp, be64_to_cpu(b->bb_rightsib), 1);
- rval++;
- }
- break;
- case XFS_BTNUM_INO:
- i = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]);
- if ((lr & XFS_BTCUR_LEFTRA) && be32_to_cpu(i->bb_leftsib) != NULLAGBLOCK) {
- xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
- be32_to_cpu(i->bb_leftsib), 1);
- rval++;
- }
- if ((lr & XFS_BTCUR_RIGHTRA) && be32_to_cpu(i->bb_rightsib) != NULLAGBLOCK) {
- xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
- be32_to_cpu(i->bb_rightsib), 1);
- rval++;
- }
- break;
- default:
- ASSERT(0);
- }
- return rval;
+ block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]);
+
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+ return xfs_btree_readahead_lblock(cur, lr, block);
+ return xfs_btree_readahead_sblock(cur, lr, block);
}
/*
@@ -889,7 +813,7 @@ xfs_btree_setbuf(
int lev, /* level in btree */
xfs_buf_t *bp) /* new buffer to set */
{
- xfs_btree_block_t *b; /* btree block */
+ struct xfs_btree_block *b; /* btree block */
xfs_buf_t *obp; /* old buffer pointer */
obp = cur->bc_bufs[lev];
@@ -900,7 +824,7 @@ xfs_btree_setbuf(
if (!bp)
return;
b = XFS_BUF_TO_BLOCK(bp);
- if (XFS_BTREE_LONG_PTRS(cur->bc_btnum)) {
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO)
cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO)
@@ -912,3 +836,2855 @@ xfs_btree_setbuf(
cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
}
}
+
+STATIC int
+xfs_btree_ptr_is_null(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr)
+{
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+ return be64_to_cpu(ptr->l) == NULLFSBLOCK;
+ else
+ return be32_to_cpu(ptr->s) == NULLAGBLOCK;
+}
+
+STATIC void
+xfs_btree_set_ptr_null(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr)
+{
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+ ptr->l = cpu_to_be64(NULLFSBLOCK);
+ else
+ ptr->s = cpu_to_be32(NULLAGBLOCK);
+}
+
+/*
+ * Get/set/init sibling pointers
+ */
+STATIC void
+xfs_btree_get_sibling(
+ struct xfs_btree_cur *cur,
+ struct xfs_btree_block *block,
+ union xfs_btree_ptr *ptr,
+ int lr)
+{
+ ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
+
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+ if (lr == XFS_BB_RIGHTSIB)
+ ptr->l = block->bb_u.l.bb_rightsib;
+ else
+ ptr->l = block->bb_u.l.bb_leftsib;
+ } else {
+ if (lr == XFS_BB_RIGHTSIB)
+ ptr->s = block->bb_u.s.bb_rightsib;
+ else
+ ptr->s = block->bb_u.s.bb_leftsib;
+ }
+}
+
+STATIC void
+xfs_btree_set_sibling(
+ struct xfs_btree_cur *cur,
+ struct xfs_btree_block *block,
+ union xfs_btree_ptr *ptr,
+ int lr)
+{
+ ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
+
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+ if (lr == XFS_BB_RIGHTSIB)
+ block->bb_u.l.bb_rightsib = ptr->l;
+ else
+ block->bb_u.l.bb_leftsib = ptr->l;
+ } else {
+ if (lr == XFS_BB_RIGHTSIB)
+ block->bb_u.s.bb_rightsib = ptr->s;
+ else
+ block->bb_u.s.bb_leftsib = ptr->s;
+ }
+}
+
+STATIC void
+xfs_btree_init_block(
+ struct xfs_btree_cur *cur,
+ int level,
+ int numrecs,
+ struct xfs_btree_block *new) /* new block */
+{
+ new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
+ new->bb_level = cpu_to_be16(level);
+ new->bb_numrecs = cpu_to_be16(numrecs);
+
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+ new->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK);
+ new->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK);
+ } else {
+ new->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
+ new->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
+ }
+}
+
+/*
+ * Return true if ptr is the last record in the btree and
+ * we need to track updateѕ to this record. The decision
+ * will be further refined in the update_lastrec method.
+ */
+STATIC int
+xfs_btree_is_lastrec(
+ struct xfs_btree_cur *cur,
+ struct xfs_btree_block *block,
+ int level)
+{
+ union xfs_btree_ptr ptr;
+
+ if (level > 0)
+ return 0;
+ if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
+ return 0;
+
+ xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
+ if (!xfs_btree_ptr_is_null(cur, &ptr))
+ return 0;
+ return 1;
+}
+
+STATIC void
+xfs_btree_buf_to_ptr(
+ struct xfs_btree_cur *cur,
+ struct xfs_buf *bp,
+ union xfs_btree_ptr *ptr)
+{
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+ ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
+ XFS_BUF_ADDR(bp)));
+ else {
+ ptr->s = cpu_to_be32(XFS_DADDR_TO_AGBNO(cur->bc_mp,
+ XFS_BUF_ADDR(bp)));
+ }
+}
+
+STATIC xfs_daddr_t
+xfs_btree_ptr_to_daddr(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr)
+{
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+ ASSERT(be64_to_cpu(ptr->l) != NULLFSBLOCK);
+
+ return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
+ } else {
+ ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
+ ASSERT(be32_to_cpu(ptr->s) != NULLAGBLOCK);
+
+ return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
+ be32_to_cpu(ptr->s));
+ }
+}
+
+STATIC void
+xfs_btree_set_refs(
+ struct xfs_btree_cur *cur,
+ struct xfs_buf *bp)
+{
+ switch (cur->bc_btnum) {
+ case XFS_BTNUM_BNO:
+ case XFS_BTNUM_CNT:
+ XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_ALLOC_BTREE_REF);
+ break;
+ case XFS_BTNUM_INO:
+ XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_INOMAP, XFS_INO_BTREE_REF);
+ break;
+ case XFS_BTNUM_BMAP:
+ XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_BMAP_BTREE_REF);
+ break;
+ default:
+ ASSERT(0);
+ }
+}
+
+STATIC int
+xfs_btree_get_buf_block(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr,
+ int flags,
+ struct xfs_btree_block **block,
+ struct xfs_buf **bpp)
+{
+ struct xfs_mount *mp = cur->bc_mp;
+ xfs_daddr_t d;
+
+ /* need to sort out how callers deal with failures first */
+ ASSERT(!(flags & XFS_BUF_TRYLOCK));
+
+ d = xfs_btree_ptr_to_daddr(cur, ptr);
+ *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
+ mp->m_bsize, flags);
+
+ ASSERT(*bpp);
+ ASSERT(!XFS_BUF_GETERROR(*bpp));
+
+ *block = XFS_BUF_TO_BLOCK(*bpp);
+ return 0;
+}
+
+/*
+ * Read in the buffer at the given ptr and return the buffer and
+ * the block pointer within the buffer.
+ */
+STATIC int
+xfs_btree_read_buf_block(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr,
+ int level,
+ int flags,
+ struct xfs_btree_block **block,
+ struct xfs_buf **bpp)
+{
+ struct xfs_mount *mp = cur->bc_mp;
+ xfs_daddr_t d;
+ int error;
+
+ /* need to sort out how callers deal with failures first */
+ ASSERT(!(flags & XFS_BUF_TRYLOCK));
+
+ d = xfs_btree_ptr_to_daddr(cur, ptr);
+ error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
+ mp->m_bsize, flags, bpp);
+ if (error)
+ return error;
+
+ ASSERT(*bpp != NULL);
+ ASSERT(!XFS_BUF_GETERROR(*bpp));
+
+ xfs_btree_set_refs(cur, *bpp);
+ *block = XFS_BUF_TO_BLOCK(*bpp);
+
+ error = xfs_btree_check_block(cur, *block, level, *bpp);
+ if (error)
+ xfs_trans_brelse(cur->bc_tp, *bpp);
+ return error;
+}
+
+/*
+ * Copy keys from one btree block to another.
+ */
+STATIC void
+xfs_btree_copy_keys(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *dst_key,
+ union xfs_btree_key *src_key,
+ int numkeys)
+{
+ ASSERT(numkeys >= 0);
+ memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
+}
+
+/*
+ * Copy records from one btree block to another.
+ */
+STATIC void
+xfs_btree_copy_recs(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *dst_rec,
+ union xfs_btree_rec *src_rec,
+ int numrecs)
+{
+ ASSERT(numrecs >= 0);
+ memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len);
+}
+
+/*
+ * Copy block pointers from one btree block to another.
+ */
+STATIC void
+xfs_btree_copy_ptrs(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *dst_ptr,
+ union xfs_btree_ptr *src_ptr,
+ int numptrs)
+{
+ ASSERT(numptrs >= 0);
+ memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur));
+}
+
+/*
+ * Shift keys one index left/right inside a single btree block.
+ */
+STATIC void
+xfs_btree_shift_keys(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *key,
+ int dir,
+ int numkeys)
+{
+ char *dst_key;
+
+ ASSERT(numkeys >= 0);
+ ASSERT(dir == 1 || dir == -1);
+
+ dst_key = (char *)key + (dir * cur->bc_ops->key_len);
+ memmove(dst_key, key, numkeys * cur->bc_ops->key_len);
+}
+
+/*
+ * Shift records one index left/right inside a single btree block.
+ */
+STATIC void
+xfs_btree_shift_recs(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *rec,
+ int dir,
+ int numrecs)
+{
+ char *dst_rec;
+
+ ASSERT(numrecs >= 0);
+ ASSERT(dir == 1 || dir == -1);
+
+ dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len);
+ memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len);
+}
+
+/*
+ * Shift block pointers one index left/right inside a single btree block.
+ */
+STATIC void
+xfs_btree_shift_ptrs(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr,
+ int dir,
+ int numptrs)
+{
+ char *dst_ptr;
+
+ ASSERT(numptrs >= 0);
+ ASSERT(dir == 1 || dir == -1);
+
+ dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur));
+ memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur));
+}
+
+/*
+ * Log key values from the btree block.
+ */
+STATIC void
+xfs_btree_log_keys(
+ struct xfs_btree_cur *cur,
+ struct xfs_buf *bp,
+ int first,
+ int last)
+{
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+ XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
+
+ if (bp) {
+ xfs_trans_log_buf(cur->bc_tp, bp,
+ xfs_btree_key_offset(cur, first),
+ xfs_btree_key_offset(cur, last + 1) - 1);
+ } else {
+ xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
+ xfs_ilog_fbroot(cur->bc_private.b.whichfork));
+ }
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+}
+
+/*
+ * Log record values from the btree block.
+ */
+void
+xfs_btree_log_recs(
+ struct xfs_btree_cur *cur,
+ struct xfs_buf *bp,
+ int first,
+ int last)
+{
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+ XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
+
+ xfs_trans_log_buf(cur->bc_tp, bp,
+ xfs_btree_rec_offset(cur, first),
+ xfs_btree_rec_offset(cur, last + 1) - 1);
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+}
+
+/*
+ * Log block pointer fields from a btree block (nonleaf).
+ */
+STATIC void
+xfs_btree_log_ptrs(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ struct xfs_buf *bp, /* buffer containing btree block */
+ int first, /* index of first pointer to log */
+ int last) /* index of last pointer to log */
+{
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+ XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
+
+ if (bp) {
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+ int level = xfs_btree_get_level(block);
+
+ xfs_trans_log_buf(cur->bc_tp, bp,
+ xfs_btree_ptr_offset(cur, first, level),
+ xfs_btree_ptr_offset(cur, last + 1, level) - 1);
+ } else {
+ xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
+ xfs_ilog_fbroot(cur->bc_private.b.whichfork));
+ }
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+}
+
+/*
+ * Log fields from a btree block header.
+ */
+void
+xfs_btree_log_block(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ struct xfs_buf *bp, /* buffer containing btree block */
+ int fields) /* mask of fields: XFS_BB_... */
+{
+ int first; /* first byte offset logged */
+ int last; /* last byte offset logged */
+ static const short soffsets[] = { /* table of offsets (short) */
+ offsetof(struct xfs_btree_block, bb_magic),
+ offsetof(struct xfs_btree_block, bb_level),
+ offsetof(struct xfs_btree_block, bb_numrecs),
+ offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
+ offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
+ XFS_BTREE_SBLOCK_LEN
+ };
+ static const short loffsets[] = { /* table of offsets (long) */
+ offsetof(struct xfs_btree_block, bb_magic),
+ offsetof(struct xfs_btree_block, bb_level),
+ offsetof(struct xfs_btree_block, bb_numrecs),
+ offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
+ offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
+ XFS_BTREE_LBLOCK_LEN
+ };
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+ XFS_BTREE_TRACE_ARGBI(cur, bp, fields);
+
+ if (bp) {
+ xfs_btree_offsets(fields,
+ (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
+ loffsets : soffsets,
+ XFS_BB_NUM_BITS, &first, &last);
+ xfs_trans_log_buf(cur->bc_tp, bp, first, last);
+ } else {
+ xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
+ xfs_ilog_fbroot(cur->bc_private.b.whichfork));
+ }
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+}
+
+/*
+ * Increment cursor by one record at the level.
+ * For nonzero levels the leaf-ward information is untouched.
+ */
+int /* error */
+xfs_btree_increment(
+ struct xfs_btree_cur *cur,
+ int level,
+ int *stat) /* success/failure */
+{
+ struct xfs_btree_block *block;
+ union xfs_btree_ptr ptr;
+ struct xfs_buf *bp;
+ int error; /* error return value */
+ int lev;
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+ XFS_BTREE_TRACE_ARGI(cur, level);
+
+ ASSERT(level < cur->bc_nlevels);
+
+ /* Read-ahead to the right at this level. */
+ xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
+
+ /* Get a pointer to the btree block. */
+ block = xfs_btree_get_block(cur, level, &bp);
+
+#ifdef DEBUG
+ error = xfs_btree_check_block(cur, block, level, bp);
+ if (error)
+ goto error0;
+#endif
+
+ /* We're done if we remain in the block after the increment. */
+ if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block))
+ goto out1;
+
+ /* Fail if we just went off the right edge of the tree. */
+ xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
+ if (xfs_btree_ptr_is_null(cur, &ptr))
+ goto out0;
+
+ XFS_BTREE_STATS_INC(cur, increment);
+
+ /*
+ * March up the tree incrementing pointers.
+ * Stop when we don't go off the right edge of a block.
+ */
+ for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
+ block = xfs_btree_get_block(cur, lev, &bp);
+
+#ifdef DEBUG
+ error = xfs_btree_check_block(cur, block, lev, bp);
+ if (error)
+ goto error0;
+#endif
+
+ if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block))
+ break;
+
+ /* Read-ahead the right block for the next loop. */
+ xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
+ }
+
+ /*
+ * If we went off the root then we are either seriously
+ * confused or have the tree root in an inode.
+ */
+ if (lev == cur->bc_nlevels) {
+ if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
+ goto out0;
+ ASSERT(0);
+ error = EFSCORRUPTED;
+ goto error0;
+ }
+ ASSERT(lev < cur->bc_nlevels);
+
+ /*
+ * Now walk back down the tree, fixing up the cursor's buffer
+ * pointers and key numbers.
+ */
+ for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
+ union xfs_btree_ptr *ptrp;
+
+ ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
+ error = xfs_btree_read_buf_block(cur, ptrp, --lev,
+ 0, &block, &bp);
+ if (error)
+ goto error0;
+
+ xfs_btree_setbuf(cur, lev, bp);
+ cur->bc_ptrs[lev] = 1;
+ }
+out1:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 1;
+ return 0;
+
+out0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 0;
+ return 0;
+
+error0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ return error;
+}
+
+/*
+ * Decrement cursor by one record at the level.
+ * For nonzero levels the leaf-ward information is untouched.
+ */
+int /* error */
+xfs_btree_decrement(
+ struct xfs_btree_cur *cur,
+ int level,
+ int *stat) /* success/failure */
+{
+ struct xfs_btree_block *block;
+ xfs_buf_t *bp;
+ int error; /* error return value */
+ int lev;
+ union xfs_btree_ptr ptr;
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+ XFS_BTREE_TRACE_ARGI(cur, level);
+
+ ASSERT(level < cur->bc_nlevels);
+
+ /* Read-ahead to the left at this level. */
+ xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
+
+ /* We're done if we remain in the block after the decrement. */
+ if (--cur->bc_ptrs[level] > 0)
+ goto out1;
+
+ /* Get a pointer to the btree block. */
+ block = xfs_btree_get_block(cur, level, &bp);
+
+#ifdef DEBUG
+ error = xfs_btree_check_block(cur, block, level, bp);
+ if (error)
+ goto error0;
+#endif
+
+ /* Fail if we just went off the left edge of the tree. */
+ xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
+ if (xfs_btree_ptr_is_null(cur, &ptr))
+ goto out0;
+
+ XFS_BTREE_STATS_INC(cur, decrement);
+
+ /*
+ * March up the tree decrementing pointers.
+ * Stop when we don't go off the left edge of a block.
+ */
+ for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
+ if (--cur->bc_ptrs[lev] > 0)
+ break;
+ /* Read-ahead the left block for the next loop. */
+ xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
+ }
+
+ /*
+ * If we went off the root then we are seriously confused.
+ * or the root of the tree is in an inode.
+ */
+ if (lev == cur->bc_nlevels) {
+ if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
+ goto out0;
+ ASSERT(0);
+ error = EFSCORRUPTED;
+ goto error0;
+ }
+ ASSERT(lev < cur->bc_nlevels);
+
+ /*
+ * Now walk back down the tree, fixing up the cursor's buffer
+ * pointers and key numbers.
+ */
+ for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
+ union xfs_btree_ptr *ptrp;
+
+ ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
+ error = xfs_btree_read_buf_block(cur, ptrp, --lev,
+ 0, &block, &bp);
+ if (error)
+ goto error0;
+ xfs_btree_setbuf(cur, lev, bp);
+ cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block);
+ }
+out1:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 1;
+ return 0;
+
+out0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 0;
+ return 0;
+
+error0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ return error;
+}
+
+STATIC int
+xfs_btree_lookup_get_block(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ int level, /* level in the btree */
+ union xfs_btree_ptr *pp, /* ptr to btree block */
+ struct xfs_btree_block **blkp) /* return btree block */
+{
+ struct xfs_buf *bp; /* buffer pointer for btree block */
+ int error = 0;
+
+ /* special case the root block if in an inode */
+ if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
+ (level == cur->bc_nlevels - 1)) {
+ *blkp = xfs_btree_get_iroot(cur);
+ return 0;
+ }
+
+ /*
+ * If the old buffer at this level for the disk address we are
+ * looking for re-use it.
+ *
+ * Otherwise throw it away and get a new one.
+ */
+ bp = cur->bc_bufs[level];
+ if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) {
+ *blkp = XFS_BUF_TO_BLOCK(bp);
+ return 0;
+ }
+
+ error = xfs_btree_read_buf_block(cur, pp, level, 0, blkp, &bp);
+ if (error)
+ return error;
+
+ xfs_btree_setbuf(cur, level, bp);
+ return 0;
+}
+
+/*
+ * Get current search key. For level 0 we don't actually have a key
+ * structure so we make one up from the record. For all other levels
+ * we just return the right key.
+ */
+STATIC union xfs_btree_key *
+xfs_lookup_get_search_key(
+ struct xfs_btree_cur *cur,
+ int level,
+ int keyno,
+ struct xfs_btree_block *block,
+ union xfs_btree_key *kp)
+{
+ if (level == 0) {
+ cur->bc_ops->init_key_from_rec(kp,
+ xfs_btree_rec_addr(cur, keyno, block));
+ return kp;
+ }
+
+ return xfs_btree_key_addr(cur, keyno, block);
+}
+
+/*
+ * Lookup the record. The cursor is made to point to it, based on dir.
+ * Return 0 if can't find any such record, 1 for success.
+ */
+int /* error */
+xfs_btree_lookup(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_lookup_t dir, /* <=, ==, or >= */
+ int *stat) /* success/failure */
+{
+ struct xfs_btree_block *block; /* current btree block */
+ __int64_t diff; /* difference for the current key */
+ int error; /* error return value */
+ int keyno; /* current key number */
+ int level; /* level in the btree */
+ union xfs_btree_ptr *pp; /* ptr to btree block */
+ union xfs_btree_ptr ptr; /* ptr to btree block */
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+ XFS_BTREE_TRACE_ARGI(cur, dir);
+
+ XFS_BTREE_STATS_INC(cur, lookup);
+
+ block = NULL;
+ keyno = 0;
+
+ /* initialise start pointer from cursor */
+ cur->bc_ops->init_ptr_from_cur(cur, &ptr);
+ pp = &ptr;
+
+ /*
+ * Iterate over each level in the btree, starting at the root.
+ * For each level above the leaves, find the key we need, based
+ * on the lookup record, then follow the corresponding block
+ * pointer down to the next level.
+ */
+ for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
+ /* Get the block we need to do the lookup on. */
+ error = xfs_btree_lookup_get_block(cur, level, pp, &block);
+ if (error)
+ goto error0;
+
+ if (diff == 0) {
+ /*
+ * If we already had a key match at a higher level, we
+ * know we need to use the first entry in this block.
+ */
+ keyno = 1;
+ } else {
+ /* Otherwise search this block. Do a binary search. */
+
+ int high; /* high entry number */
+ int low; /* low entry number */
+
+ /* Set low and high entry numbers, 1-based. */
+ low = 1;
+ high = xfs_btree_get_numrecs(block);
+ if (!high) {
+ /* Block is empty, must be an empty leaf. */
+ ASSERT(level == 0 && cur->bc_nlevels == 1);
+
+ cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 0;
+ return 0;
+ }
+
+ /* Binary search the block. */
+ while (low <= high) {
+ union xfs_btree_key key;
+ union xfs_btree_key *kp;
+
+ XFS_BTREE_STATS_INC(cur, compare);
+
+ /* keyno is average of low and high. */
+ keyno = (low + high) >> 1;
+
+ /* Get current search key */
+ kp = xfs_lookup_get_search_key(cur, level,
+ keyno, block, &key);
+
+ /*
+ * Compute difference to get next direction:
+ * - less than, move right
+ * - greater than, move left
+ * - equal, we're done
+ */
+ diff = cur->bc_ops->key_diff(cur, kp);
+ if (diff < 0)
+ low = keyno + 1;
+ else if (diff > 0)
+ high = keyno - 1;
+ else
+ break;
+ }
+ }
+
+ /*
+ * If there are more levels, set up for the next level
+ * by getting the block number and filling in the cursor.
+ */
+ if (level > 0) {
+ /*
+ * If we moved left, need the previous key number,
+ * unless there isn't one.
+ */
+ if (diff > 0 && --keyno < 1)
+ keyno = 1;
+ pp = xfs_btree_ptr_addr(cur, keyno, block);
+
+#ifdef DEBUG
+ error = xfs_btree_check_ptr(cur, pp, 0, level);
+ if (error)
+ goto error0;
+#endif
+ cur->bc_ptrs[level] = keyno;
+ }
+ }
+
+ /* Done with the search. See if we need to adjust the results. */
+ if (dir != XFS_LOOKUP_LE && diff < 0) {
+ keyno++;
+ /*
+ * If ge search and we went off the end of the block, but it's
+ * not the last block, we're in the wrong block.
+ */
+ xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
+ if (dir == XFS_LOOKUP_GE &&
+ keyno > xfs_btree_get_numrecs(block) &&
+ !xfs_btree_ptr_is_null(cur, &ptr)) {
+ int i;
+
+ cur->bc_ptrs[0] = keyno;
+ error = xfs_btree_increment(cur, 0, &i);
+ if (error)
+ goto error0;
+ XFS_WANT_CORRUPTED_RETURN(i == 1);
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 1;
+ return 0;
+ }
+ } else if (dir == XFS_LOOKUP_LE && diff > 0)
+ keyno--;
+ cur->bc_ptrs[0] = keyno;
+
+ /* Return if we succeeded or not. */
+ if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
+ *stat = 0;
+ else if (dir != XFS_LOOKUP_EQ || diff == 0)
+ *stat = 1;
+ else
+ *stat = 0;
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ return 0;
+
+error0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ return error;
+}
+
+/*
+ * Update keys at all levels from here to the root along the cursor's path.
+ */
+STATIC int
+xfs_btree_updkey(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *keyp,
+ int level)
+{
+ struct xfs_btree_block *block;
+ struct xfs_buf *bp;
+ union xfs_btree_key *kp;
+ int ptr;
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+ XFS_BTREE_TRACE_ARGIK(cur, level, keyp);
+
+ ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || level >= 1);
+
+ /*
+ * Go up the tree from this level toward the root.
+ * At each level, update the key value to the value input.
+ * Stop when we reach a level where the cursor isn't pointing
+ * at the first entry in the block.
+ */
+ for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
+#ifdef DEBUG
+ int error;
+#endif
+ block = xfs_btree_get_block(cur, level, &bp);
+#ifdef DEBUG
+ error = xfs_btree_check_block(cur, block, level, bp);
+ if (error) {
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ return error;
+ }
+#endif
+ ptr = cur->bc_ptrs[level];
+ kp = xfs_btree_key_addr(cur, ptr, block);
+ xfs_btree_copy_keys(cur, kp, keyp, 1);
+ xfs_btree_log_keys(cur, bp, ptr, ptr);
+ }
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ return 0;
+}
+
+/*
+ * Update the record referred to by cur to the value in the
+ * given record. This either works (return 0) or gets an
+ * EFSCORRUPTED error.
+ */
+int
+xfs_btree_update(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *rec)
+{
+ struct xfs_btree_block *block;
+ struct xfs_buf *bp;
+ int error;
+ int ptr;
+ union xfs_btree_rec *rp;
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+ XFS_BTREE_TRACE_ARGR(cur, rec);
+
+ /* Pick up the current block. */
+ block = xfs_btree_get_block(cur, 0, &bp);
+
+#ifdef DEBUG
+ error = xfs_btree_check_block(cur, block, 0, bp);
+ if (error)
+ goto error0;
+#endif
+ /* Get the address of the rec to be updated. */
+ ptr = cur->bc_ptrs[0];
+ rp = xfs_btree_rec_addr(cur, ptr, block);
+
+ /* Fill in the new contents and log them. */
+ xfs_btree_copy_recs(cur, rp, rec, 1);
+ xfs_btree_log_recs(cur, bp, ptr, ptr);
+
+ /*
+ * If we are tracking the last record in the tree and
+ * we are at the far right edge of the tree, update it.
+ */
+ if (xfs_btree_is_lastrec(cur, block, 0)) {
+ cur->bc_ops->update_lastrec(cur, block, rec,
+ ptr, LASTREC_UPDATE);
+ }
+
+ /* Updating first rec in leaf. Pass new key value up to our parent. */
+ if (ptr == 1) {
+ union xfs_btree_key key;
+
+ cur->bc_ops->init_key_from_rec(&key, rec);
+ error = xfs_btree_updkey(cur, &key, 1);
+ if (error)
+ goto error0;
+ }
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ return 0;
+
+error0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ return error;
+}
+
+/*
+ * Move 1 record left from cur/level if possible.
+ * Update cur to reflect the new path.
+ */
+STATIC int /* error */
+xfs_btree_lshift(
+ struct xfs_btree_cur *cur,
+ int level,
+ int *stat) /* success/failure */
+{
+ union xfs_btree_key key; /* btree key */
+ struct xfs_buf *lbp; /* left buffer pointer */
+ struct xfs_btree_block *left; /* left btree block */
+ int lrecs; /* left record count */
+ struct xfs_buf *rbp; /* right buffer pointer */
+ struct xfs_btree_block *right; /* right btree block */
+ int rrecs; /* right record count */
+ union xfs_btree_ptr lptr; /* left btree pointer */
+ union xfs_btree_key *rkp = NULL; /* right btree key */
+ union xfs_btree_ptr *rpp = NULL; /* right address pointer */
+ union xfs_btree_rec *rrp = NULL; /* right record pointer */
+ int error; /* error return value */
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+ XFS_BTREE_TRACE_ARGI(cur, level);
+
+ if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
+ level == cur->bc_nlevels - 1)
+ goto out0;
+
+ /* Set up variables for this block as "right". */
+ right = xfs_btree_get_block(cur, level, &rbp);
+
+#ifdef DEBUG
+ error = xfs_btree_check_block(cur, right, level, rbp);
+ if (error)
+ goto error0;
+#endif
+
+ /* If we've got no left sibling then we can't shift an entry left. */
+ xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
+ if (xfs_btree_ptr_is_null(cur, &lptr))
+ goto out0;
+
+ /*
+ * If the cursor entry is the one that would be moved, don't
+ * do it... it's too complicated.
+ */
+ if (cur->bc_ptrs[level] <= 1)
+ goto out0;
+
+ /* Set up the left neighbor as "left". */
+ error = xfs_btree_read_buf_block(cur, &lptr, level, 0, &left, &lbp);
+ if (error)
+ goto error0;
+
+ /* If it's full, it can't take another entry. */
+ lrecs = xfs_btree_get_numrecs(left);
+ if (lrecs == cur->bc_ops->get_maxrecs(cur, level))
+ goto out0;
+
+ rrecs = xfs_btree_get_numrecs(right);
+
+ /*
+ * We add one entry to the left side and remove one for the right side.
+ * Accout for it here, the changes will be updated on disk and logged
+ * later.
+ */
+ lrecs++;
+ rrecs--;
+
+ XFS_BTREE_STATS_INC(cur, lshift);
+ XFS_BTREE_STATS_ADD(cur, moves, 1);
+
+ /*
+ * If non-leaf, copy a key and a ptr to the left block.
+ * Log the changes to the left block.
+ */
+ if (level > 0) {
+ /* It's a non-leaf. Move keys and pointers. */
+ union xfs_btree_key *lkp; /* left btree key */
+ union xfs_btree_ptr *lpp; /* left address pointer */
+
+ lkp = xfs_btree_key_addr(cur, lrecs, left);
+ rkp = xfs_btree_key_addr(cur, 1, right);
+
+ lpp = xfs_btree_ptr_addr(cur, lrecs, left);
+ rpp = xfs_btree_ptr_addr(cur, 1, right);
+#ifdef DEBUG
+ error = xfs_btree_check_ptr(cur, rpp, 0, level);
+ if (error)
+ goto error0;
+#endif
+ xfs_btree_copy_keys(cur, lkp, rkp, 1);
+ xfs_btree_copy_ptrs(cur, lpp, rpp, 1);
+
+ xfs_btree_log_keys(cur, lbp, lrecs, lrecs);
+ xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs);
+
+ ASSERT(cur->bc_ops->keys_inorder(cur,
+ xfs_btree_key_addr(cur, lrecs - 1, left), lkp));
+ } else {
+ /* It's a leaf. Move records. */
+ union xfs_btree_rec *lrp; /* left record pointer */
+
+ lrp = xfs_btree_rec_addr(cur, lrecs, left);
+ rrp = xfs_btree_rec_addr(cur, 1, right);
+
+ xfs_btree_copy_recs(cur, lrp, rrp, 1);
+ xfs_btree_log_recs(cur, lbp, lrecs, lrecs);
+
+ ASSERT(cur->bc_ops->recs_inorder(cur,
+ xfs_btree_rec_addr(cur, lrecs - 1, left), lrp));
+ }
+
+ xfs_btree_set_numrecs(left, lrecs);
+ xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
+
+ xfs_btree_set_numrecs(right, rrecs);
+ xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
+
+ /*
+ * Slide the contents of right down one entry.
+ */
+ XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1);
+ if (level > 0) {
+ /* It's a nonleaf. operate on keys and ptrs */
+#ifdef DEBUG
+ int i; /* loop index */
+
+ for (i = 0; i < rrecs; i++) {
+ error = xfs_btree_check_ptr(cur, rpp, i + 1, level);
+ if (error)
+ goto error0;
+ }
+#endif
+ xfs_btree_shift_keys(cur,
+ xfs_btree_key_addr(cur, 2, right),
+ -1, rrecs);
+ xfs_btree_shift_ptrs(cur,
+ xfs_btree_ptr_addr(cur, 2, right),
+ -1, rrecs);
+
+ xfs_btree_log_keys(cur, rbp, 1, rrecs);
+ xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
+ } else {
+ /* It's a leaf. operate on records */
+ xfs_btree_shift_recs(cur,
+ xfs_btree_rec_addr(cur, 2, right),
+ -1, rrecs);
+ xfs_btree_log_recs(cur, rbp, 1, rrecs);
+
+ /*
+ * If it's the first record in the block, we'll need a key
+ * structure to pass up to the next level (updkey).
+ */
+ cur->bc_ops->init_key_from_rec(&key,
+ xfs_btree_rec_addr(cur, 1, right));
+ rkp = &key;
+ }
+
+ /* Update the parent key values of right. */
+ error = xfs_btree_updkey(cur, rkp, level + 1);
+ if (error)
+ goto error0;
+
+ /* Slide the cursor value left one. */
+ cur->bc_ptrs[level]--;
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 1;
+ return 0;
+
+out0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 0;
+ return 0;
+
+error0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ return error;
+}
+
+/*
+ * Move 1 record right from cur/level if possible.
+ * Update cur to reflect the new path.
+ */
+STATIC int /* error */
+xfs_btree_rshift(
+ struct xfs_btree_cur *cur,
+ int level,
+ int *stat) /* success/failure */
+{
+ union xfs_btree_key key; /* btree key */
+ struct xfs_buf *lbp; /* left buffer pointer */
+ struct xfs_btree_block *left; /* left btree block */
+ struct xfs_buf *rbp; /* right buffer pointer */
+ struct xfs_btree_block *right; /* right btree block */
+ struct xfs_btree_cur *tcur; /* temporary btree cursor */
+ union xfs_btree_ptr rptr; /* right block pointer */
+ union xfs_btree_key *rkp; /* right btree key */
+ int rrecs; /* right record count */
+ int lrecs; /* left record count */
+ int error; /* error return value */
+ int i; /* loop counter */
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+ XFS_BTREE_TRACE_ARGI(cur, level);
+
+ if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
+ (level == cur->bc_nlevels - 1))
+ goto out0;
+
+ /* Set up variables for this block as "left". */
+ left = xfs_btree_get_block(cur, level, &lbp);
+
+#ifdef DEBUG
+ error = xfs_btree_check_block(cur, left, level, lbp);
+ if (error)
+ goto error0;
+#endif
+
+ /* If we've got no right sibling then we can't shift an entry right. */
+ xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
+ if (xfs_btree_ptr_is_null(cur, &rptr))
+ goto out0;
+
+ /*
+ * If the cursor entry is the one that would be moved, don't
+ * do it... it's too complicated.
+ */
+ lrecs = xfs_btree_get_numrecs(left);
+ if (cur->bc_ptrs[level] >= lrecs)
+ goto out0;
+
+ /* Set up the right neighbor as "right". */
+ error = xfs_btree_read_buf_block(cur, &rptr, level, 0, &right, &rbp);
+ if (error)
+ goto error0;
+
+ /* If it's full, it can't take another entry. */
+ rrecs = xfs_btree_get_numrecs(right);
+ if (rrecs == cur->bc_ops->get_maxrecs(cur, level))
+ goto out0;
+
+ XFS_BTREE_STATS_INC(cur, rshift);
+ XFS_BTREE_STATS_ADD(cur, moves, rrecs);
+
+ /*
+ * Make a hole at the start of the right neighbor block, then
+ * copy the last left block entry to the hole.
+ */
+ if (level > 0) {
+ /* It's a nonleaf. make a hole in the keys and ptrs */
+ union xfs_btree_key *lkp;
+ union xfs_btree_ptr *lpp;
+ union xfs_btree_ptr *rpp;
+
+ lkp = xfs_btree_key_addr(cur, lrecs, left);
+ lpp = xfs_btree_ptr_addr(cur, lrecs, left);
+ rkp = xfs_btree_key_addr(cur, 1, right);
+ rpp = xfs_btree_ptr_addr(cur, 1, right);
+
+#ifdef DEBUG
+ for (i = rrecs - 1; i >= 0; i--) {
+ error = xfs_btree_check_ptr(cur, rpp, i, level);
+ if (error)
+ goto error0;
+ }
+#endif
+
+ xfs_btree_shift_keys(cur, rkp, 1, rrecs);
+ xfs_btree_shift_ptrs(cur, rpp, 1, rrecs);
+
+#ifdef DEBUG
+ error = xfs_btree_check_ptr(cur, lpp, 0, level);
+ if (error)
+ goto error0;
+#endif
+
+ /* Now put the new data in, and log it. */
+ xfs_btree_copy_keys(cur, rkp, lkp, 1);
+ xfs_btree_copy_ptrs(cur, rpp, lpp, 1);
+
+ xfs_btree_log_keys(cur, rbp, 1, rrecs + 1);
+ xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1);
+
+ ASSERT(cur->bc_ops->keys_inorder(cur, rkp,
+ xfs_btree_key_addr(cur, 2, right)));
+ } else {
+ /* It's a leaf. make a hole in the records */
+ union xfs_btree_rec *lrp;
+ union xfs_btree_rec *rrp;
+
+ lrp = xfs_btree_rec_addr(cur, lrecs, left);
+ rrp = xfs_btree_rec_addr(cur, 1, right);
+
+ xfs_btree_shift_recs(cur, rrp, 1, rrecs);
+
+ /* Now put the new data in, and log it. */
+ xfs_btree_copy_recs(cur, rrp, lrp, 1);
+ xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
+
+ cur->bc_ops->init_key_from_rec(&key, rrp);
+ rkp = &key;
+
+ ASSERT(cur->bc_ops->recs_inorder(cur, rrp,
+ xfs_btree_rec_addr(cur, 2, right)));
+ }
+
+ /*
+ * Decrement and log left's numrecs, bump and log right's numrecs.
+ */
+ xfs_btree_set_numrecs(left, --lrecs);
+ xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
+
+ xfs_btree_set_numrecs(right, ++rrecs);
+ xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
+
+ /*
+ * Using a temporary cursor, update the parent key values of the
+ * block on the right.
+ */
+ error = xfs_btree_dup_cursor(cur, &tcur);
+ if (error)
+ goto error0;
+ i = xfs_btree_lastrec(tcur, level);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+
+ error = xfs_btree_increment(tcur, level, &i);
+ if (error)
+ goto error1;
+
+ error = xfs_btree_updkey(tcur, rkp, level + 1);
+ if (error)
+ goto error1;
+
+ xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 1;
+ return 0;
+
+out0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 0;
+ return 0;
+
+error0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ return error;
+
+error1:
+ XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
+ xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
+ return error;
+}
+
+/*
+ * Split cur/level block in half.
+ * Return new block number and the key to its first
+ * record (to be inserted into parent).
+ */
+STATIC int /* error */
+xfs_btree_split(
+ struct xfs_btree_cur *cur,
+ int level,
+ union xfs_btree_ptr *ptrp,
+ union xfs_btree_key *key,
+ struct xfs_btree_cur **curp,
+ int *stat) /* success/failure */
+{
+ union xfs_btree_ptr lptr; /* left sibling block ptr */
+ struct xfs_buf *lbp; /* left buffer pointer */
+ struct xfs_btree_block *left; /* left btree block */
+ union xfs_btree_ptr rptr; /* right sibling block ptr */
+ struct xfs_buf *rbp; /* right buffer pointer */
+ struct xfs_btree_block *right; /* right btree block */
+ union xfs_btree_ptr rrptr; /* right-right sibling ptr */
+ struct xfs_buf *rrbp; /* right-right buffer pointer */
+ struct xfs_btree_block *rrblock; /* right-right btree block */
+ int lrecs;
+ int rrecs;
+ int src_index;
+ int error; /* error return value */
+#ifdef DEBUG
+ int i;
+#endif
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+ XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key);
+
+ XFS_BTREE_STATS_INC(cur, split);
+
+ /* Set up left block (current one). */
+ left = xfs_btree_get_block(cur, level, &lbp);
+
+#ifdef DEBUG
+ error = xfs_btree_check_block(cur, left, level, lbp);
+ if (error)
+ goto error0;
+#endif
+
+ xfs_btree_buf_to_ptr(cur, lbp, &lptr);
+
+ /* Allocate the new block. If we can't do it, we're toast. Give up. */
+ error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, 1, stat);
+ if (error)
+ goto error0;
+ if (*stat == 0)
+ goto out0;
+ XFS_BTREE_STATS_INC(cur, alloc);
+
+ /* Set up the new block as "right". */
+ error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp);
+ if (error)
+ goto error0;
+
+ /* Fill in the btree header for the new right block. */
+ xfs_btree_init_block(cur, xfs_btree_get_level(left), 0, right);
+
+ /*
+ * Split the entries between the old and the new block evenly.
+ * Make sure that if there's an odd number of entries now, that
+ * each new block will have the same number of entries.
+ */
+ lrecs = xfs_btree_get_numrecs(left);
+ rrecs = lrecs / 2;
+ if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1)
+ rrecs++;
+ src_index = (lrecs - rrecs + 1);
+
+ XFS_BTREE_STATS_ADD(cur, moves, rrecs);
+
+ /*
+ * Copy btree block entries from the left block over to the
+ * new block, the right. Update the right block and log the
+ * changes.
+ */
+ if (level > 0) {
+ /* It's a non-leaf. Move keys and pointers. */
+ union xfs_btree_key *lkp; /* left btree key */
+ union xfs_btree_ptr *lpp; /* left address pointer */
+ union xfs_btree_key *rkp; /* right btree key */
+ union xfs_btree_ptr *rpp; /* right address pointer */
+
+ lkp = xfs_btree_key_addr(cur, src_index, left);
+ lpp = xfs_btree_ptr_addr(cur, src_index, left);
+ rkp = xfs_btree_key_addr(cur, 1, right);
+ rpp = xfs_btree_ptr_addr(cur, 1, right);
+
+#ifdef DEBUG
+ for (i = src_index; i < rrecs; i++) {
+ error = xfs_btree_check_ptr(cur, lpp, i, level);
+ if (error)
+ goto error0;
+ }
+#endif
+
+ xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
+ xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
+
+ xfs_btree_log_keys(cur, rbp, 1, rrecs);
+ xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
+
+ /* Grab the keys to the entries moved to the right block */
+ xfs_btree_copy_keys(cur, key, rkp, 1);
+ } else {
+ /* It's a leaf. Move records. */
+ union xfs_btree_rec *lrp; /* left record pointer */
+ union xfs_btree_rec *rrp; /* right record pointer */
+
+ lrp = xfs_btree_rec_addr(cur, src_index, left);
+ rrp = xfs_btree_rec_addr(cur, 1, right);
+
+ xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
+ xfs_btree_log_recs(cur, rbp, 1, rrecs);
+
+ cur->bc_ops->init_key_from_rec(key,
+ xfs_btree_rec_addr(cur, 1, right));
+ }
+
+
+ /*
+ * Find the left block number by looking in the buffer.
+ * Adjust numrecs, sibling pointers.
+ */
+ xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
+ xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
+ xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
+ xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
+
+ lrecs -= rrecs;
+ xfs_btree_set_numrecs(left, lrecs);
+ xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
+
+ xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS);
+ xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
+
+ /*
+ * If there's a block to the new block's right, make that block
+ * point back to right instead of to left.
+ */
+ if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
+ error = xfs_btree_read_buf_block(cur, &rrptr, level,
+ 0, &rrblock, &rrbp);
+ if (error)
+ goto error0;
+ xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
+ xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
+ }
+ /*
+ * If the cursor is really in the right block, move it there.
+ * If it's just pointing past the last entry in left, then we'll
+ * insert there, so don't change anything in that case.
+ */
+ if (cur->bc_ptrs[level] > lrecs + 1) {
+ xfs_btree_setbuf(cur, level, rbp);
+ cur->bc_ptrs[level] -= lrecs;
+ }
+ /*
+ * If there are more levels, we'll need another cursor which refers
+ * the right block, no matter where this cursor was.
+ */
+ if (level + 1 < cur->bc_nlevels) {
+ error = xfs_btree_dup_cursor(cur, curp);
+ if (error)
+ goto error0;
+ (*curp)->bc_ptrs[level + 1]++;
+ }
+ *ptrp = rptr;
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 1;
+ return 0;
+out0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 0;
+ return 0;
+
+error0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ return error;
+}
+
+/*
+ * Copy the old inode root contents into a real block and make the
+ * broot point to it.
+ */
+int /* error */
+xfs_btree_new_iroot(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ int *logflags, /* logging flags for inode */
+ int *stat) /* return status - 0 fail */
+{
+ struct xfs_buf *cbp; /* buffer for cblock */
+ struct xfs_btree_block *block; /* btree block */
+ struct xfs_btree_block *cblock; /* child btree block */
+ union xfs_btree_key *ckp; /* child key pointer */
+ union xfs_btree_ptr *cpp; /* child ptr pointer */
+ union xfs_btree_key *kp; /* pointer to btree key */
+ union xfs_btree_ptr *pp; /* pointer to block addr */
+ union xfs_btree_ptr nptr; /* new block addr */
+ int level; /* btree level */
+ int error; /* error return code */
+#ifdef DEBUG
+ int i; /* loop counter */
+#endif
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+ XFS_BTREE_STATS_INC(cur, newroot);
+
+ ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
+
+ level = cur->bc_nlevels - 1;
+
+ block = xfs_btree_get_iroot(cur);
+ pp = xfs_btree_ptr_addr(cur, 1, block);
+
+ /* Allocate the new block. If we can't do it, we're toast. Give up. */
+ error = cur->bc_ops->alloc_block(cur, pp, &nptr, 1, stat);
+ if (error)
+ goto error0;
+ if (*stat == 0) {
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ return 0;
+ }
+ XFS_BTREE_STATS_INC(cur, alloc);
+
+ /* Copy the root into a real block. */
+ error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp);
+ if (error)
+ goto error0;
+
+ memcpy(cblock, block, xfs_btree_block_len(cur));
+
+ be16_add_cpu(&block->bb_level, 1);
+ xfs_btree_set_numrecs(block, 1);
+ cur->bc_nlevels++;
+ cur->bc_ptrs[level + 1] = 1;
+
+ kp = xfs_btree_key_addr(cur, 1, block);
+ ckp = xfs_btree_key_addr(cur, 1, cblock);
+ xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock));
+
+ cpp = xfs_btree_ptr_addr(cur, 1, cblock);
+#ifdef DEBUG
+ for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
+ error = xfs_btree_check_ptr(cur, pp, i, level);
+ if (error)
+ goto error0;
+ }
+#endif
+ xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock));
+
+#ifdef DEBUG
+ error = xfs_btree_check_ptr(cur, &nptr, 0, level);
+ if (error)
+ goto error0;
+#endif
+ xfs_btree_copy_ptrs(cur, pp, &nptr, 1);
+
+ xfs_iroot_realloc(cur->bc_private.b.ip,
+ 1 - xfs_btree_get_numrecs(cblock),
+ cur->bc_private.b.whichfork);
+
+ xfs_btree_setbuf(cur, level, cbp);
+
+ /*
+ * Do all this logging at the end so that
+ * the root is at the right level.
+ */
+ xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
+ xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
+ xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
+
+ *logflags |=
+ XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork);
+ *stat = 1;
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ return 0;
+error0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ return error;
+}
+
+/*
+ * Allocate a new root block, fill it in.
+ */
+STATIC int /* error */
+xfs_btree_new_root(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ int *stat) /* success/failure */
+{
+ struct xfs_btree_block *block; /* one half of the old root block */
+ struct xfs_buf *bp; /* buffer containing block */
+ int error; /* error return value */
+ struct xfs_buf *lbp; /* left buffer pointer */
+ struct xfs_btree_block *left; /* left btree block */
+ struct xfs_buf *nbp; /* new (root) buffer */
+ struct xfs_btree_block *new; /* new (root) btree block */
+ int nptr; /* new value for key index, 1 or 2 */
+ struct xfs_buf *rbp; /* right buffer pointer */
+ struct xfs_btree_block *right; /* right btree block */
+ union xfs_btree_ptr rptr;
+ union xfs_btree_ptr lptr;
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+ XFS_BTREE_STATS_INC(cur, newroot);
+
+ /* initialise our start point from the cursor */
+ cur->bc_ops->init_ptr_from_cur(cur, &rptr);
+
+ /* Allocate the new block. If we can't do it, we're toast. Give up. */
+ error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, 1, stat);
+ if (error)
+ goto error0;
+ if (*stat == 0)
+ goto out0;
+ XFS_BTREE_STATS_INC(cur, alloc);
+
+ /* Set up the new block. */
+ error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp);
+ if (error)
+ goto error0;
+
+ /* Set the root in the holding structure increasing the level by 1. */
+ cur->bc_ops->set_root(cur, &lptr, 1);
+
+ /*
+ * At the previous root level there are now two blocks: the old root,
+ * and the new block generated when it was split. We don't know which
+ * one the cursor is pointing at, so we set up variables "left" and
+ * "right" for each case.
+ */
+ block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp);
+
+#ifdef DEBUG
+ error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp);
+ if (error)
+ goto error0;
+#endif
+
+ xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
+ if (!xfs_btree_ptr_is_null(cur, &rptr)) {
+ /* Our block is left, pick up the right block. */
+ lbp = bp;
+ xfs_btree_buf_to_ptr(cur, lbp, &lptr);
+ left = block;
+ error = xfs_btree_read_buf_block(cur, &rptr,
+ cur->bc_nlevels - 1, 0, &right, &rbp);
+ if (error)
+ goto error0;
+ bp = rbp;
+ nptr = 1;
+ } else {
+ /* Our block is right, pick up the left block. */
+ rbp = bp;
+ xfs_btree_buf_to_ptr(cur, rbp, &rptr);
+ right = block;
+ xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
+ error = xfs_btree_read_buf_block(cur, &lptr,
+ cur->bc_nlevels - 1, 0, &left, &lbp);
+ if (error)
+ goto error0;
+ bp = lbp;
+ nptr = 2;
+ }
+ /* Fill in the new block's btree header and log it. */
+ xfs_btree_init_block(cur, cur->bc_nlevels, 2, new);
+ xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
+ ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
+ !xfs_btree_ptr_is_null(cur, &rptr));
+
+ /* Fill in the key data in the new root. */
+ if (xfs_btree_get_level(left) > 0) {
+ xfs_btree_copy_keys(cur,
+ xfs_btree_key_addr(cur, 1, new),
+ xfs_btree_key_addr(cur, 1, left), 1);
+ xfs_btree_copy_keys(cur,
+ xfs_btree_key_addr(cur, 2, new),
+ xfs_btree_key_addr(cur, 1, right), 1);
+ } else {
+ cur->bc_ops->init_key_from_rec(
+ xfs_btree_key_addr(cur, 1, new),
+ xfs_btree_rec_addr(cur, 1, left));
+ cur->bc_ops->init_key_from_rec(
+ xfs_btree_key_addr(cur, 2, new),
+ xfs_btree_rec_addr(cur, 1, right));
+ }
+ xfs_btree_log_keys(cur, nbp, 1, 2);
+
+ /* Fill in the pointer data in the new root. */
+ xfs_btree_copy_ptrs(cur,
+ xfs_btree_ptr_addr(cur, 1, new), &lptr, 1);
+ xfs_btree_copy_ptrs(cur,
+ xfs_btree_ptr_addr(cur, 2, new), &rptr, 1);
+ xfs_btree_log_ptrs(cur, nbp, 1, 2);
+
+ /* Fix up the cursor. */
+ xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
+ cur->bc_ptrs[cur->bc_nlevels] = nptr;
+ cur->bc_nlevels++;
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 1;
+ return 0;
+error0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ return error;
+out0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 0;
+ return 0;
+}
+
+STATIC int
+xfs_btree_make_block_unfull(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ int level, /* btree level */
+ int numrecs,/* # of recs in block */
+ int *oindex,/* old tree index */
+ int *index, /* new tree index */
+ union xfs_btree_ptr *nptr, /* new btree ptr */
+ struct xfs_btree_cur **ncur, /* new btree cursor */
+ union xfs_btree_rec *nrec, /* new record */
+ int *stat)
+{
+ union xfs_btree_key key; /* new btree key value */
+ int error = 0;
+
+ if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
+ level == cur->bc_nlevels - 1) {
+ struct xfs_inode *ip = cur->bc_private.b.ip;
+
+ if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
+ /* A root block that can be made bigger. */
+
+ xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork);
+ } else {
+ /* A root block that needs replacing */
+ int logflags = 0;
+
+ error = xfs_btree_new_iroot(cur, &logflags, stat);
+ if (error || *stat == 0)
+ return error;
+
+ xfs_trans_log_inode(cur->bc_tp, ip, logflags);
+ }
+
+ return 0;
+ }
+
+ /* First, try shifting an entry to the right neighbor. */
+ error = xfs_btree_rshift(cur, level, stat);
+ if (error || *stat)
+ return error;
+
+ /* Next, try shifting an entry to the left neighbor. */
+ error = xfs_btree_lshift(cur, level, stat);
+ if (error)
+ return error;
+
+ if (*stat) {
+ *oindex = *index = cur->bc_ptrs[level];
+ return 0;
+ }
+
+ /*
+ * Next, try splitting the current block in half.
+ *
+ * If this works we have to re-set our variables because we
+ * could be in a different block now.
+ */
+ error = xfs_btree_split(cur, level, nptr, &key, ncur, stat);
+ if (error || *stat == 0)
+ return error;
+
+
+ *index = cur->bc_ptrs[level];
+ cur->bc_ops->init_rec_from_key(&key, nrec);
+ return 0;
+}
+
+/*
+ * Insert one record/level. Return information to the caller
+ * allowing the next level up to proceed if necessary.
+ */
+STATIC int
+xfs_btree_insrec(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ int level, /* level to insert record at */
+ union xfs_btree_ptr *ptrp, /* i/o: block number inserted */
+ union xfs_btree_rec *recp, /* i/o: record data inserted */
+ struct xfs_btree_cur **curp, /* output: new cursor replacing cur */
+ int *stat) /* success/failure */
+{
+ struct xfs_btree_block *block; /* btree block */
+ struct xfs_buf *bp; /* buffer for block */
+ union xfs_btree_key key; /* btree key */
+ union xfs_btree_ptr nptr; /* new block ptr */
+ struct xfs_btree_cur *ncur; /* new btree cursor */
+ union xfs_btree_rec nrec; /* new record count */
+ int optr; /* old key/record index */
+ int ptr; /* key/record index */
+ int numrecs;/* number of records */
+ int error; /* error return value */
+#ifdef DEBUG
+ int i;
+#endif
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+ XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, recp);
+
+ ncur = NULL;
+
+ /*
+ * If we have an external root pointer, and we've made it to the
+ * root level, allocate a new root block and we're done.
+ */
+ if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
+ (level >= cur->bc_nlevels)) {
+ error = xfs_btree_new_root(cur, stat);
+ xfs_btree_set_ptr_null(cur, ptrp);
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ return error;
+ }
+
+ /* If we're off the left edge, return failure. */
+ ptr = cur->bc_ptrs[level];
+ if (ptr == 0) {
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 0;
+ return 0;
+ }
+
+ /* Make a key out of the record data to be inserted, and save it. */
+ cur->bc_ops->init_key_from_rec(&key, recp);
+
+ optr = ptr;
+
+ XFS_BTREE_STATS_INC(cur, insrec);
+
+ /* Get pointers to the btree buffer and block. */
+ block = xfs_btree_get_block(cur, level, &bp);
+ numrecs = xfs_btree_get_numrecs(block);
+
+#ifdef DEBUG
+ error = xfs_btree_check_block(cur, block, level, bp);
+ if (error)
+ goto error0;
+
+ /* Check that the new entry is being inserted in the right place. */
+ if (ptr <= numrecs) {
+ if (level == 0) {
+ ASSERT(cur->bc_ops->recs_inorder(cur, recp,
+ xfs_btree_rec_addr(cur, ptr, block)));
+ } else {
+ ASSERT(cur->bc_ops->keys_inorder(cur, &key,
+ xfs_btree_key_addr(cur, ptr, block)));
+ }
+ }
+#endif
+
+ /*
+ * If the block is full, we can't insert the new entry until we
+ * make the block un-full.
+ */
+ xfs_btree_set_ptr_null(cur, &nptr);
+ if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) {
+ error = xfs_btree_make_block_unfull(cur, level, numrecs,
+ &optr, &ptr, &nptr, &ncur, &nrec, stat);
+ if (error || *stat == 0)
+ goto error0;
+ }
+
+ /*
+ * The current block may have changed if the block was
+ * previously full and we have just made space in it.
+ */
+ block = xfs_btree_get_block(cur, level, &bp);
+ numrecs = xfs_btree_get_numrecs(block);
+
+#ifdef DEBUG
+ error = xfs_btree_check_block(cur, block, level, bp);
+ if (error)
+ return error;
+#endif
+
+ /*
+ * At this point we know there's room for our new entry in the block
+ * we're pointing at.
+ */
+ XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1);
+
+ if (level > 0) {
+ /* It's a nonleaf. make a hole in the keys and ptrs */
+ union xfs_btree_key *kp;
+ union xfs_btree_ptr *pp;
+
+ kp = xfs_btree_key_addr(cur, ptr, block);
+ pp = xfs_btree_ptr_addr(cur, ptr, block);
+
+#ifdef DEBUG
+ for (i = numrecs - ptr; i >= 0; i--) {
+ error = xfs_btree_check_ptr(cur, pp, i, level);
+ if (error)
+ return error;
+ }
+#endif
+
+ xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
+ xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1);
+
+#ifdef DEBUG
+ error = xfs_btree_check_ptr(cur, ptrp, 0, level);
+ if (error)
+ goto error0;
+#endif
+
+ /* Now put the new data in, bump numrecs and log it. */
+ xfs_btree_copy_keys(cur, kp, &key, 1);
+ xfs_btree_copy_ptrs(cur, pp, ptrp, 1);
+ numrecs++;
+ xfs_btree_set_numrecs(block, numrecs);
+ xfs_btree_log_ptrs(cur, bp, ptr, numrecs);
+ xfs_btree_log_keys(cur, bp, ptr, numrecs);
+#ifdef DEBUG
+ if (ptr < numrecs) {
+ ASSERT(cur->bc_ops->keys_inorder(cur, kp,
+ xfs_btree_key_addr(cur, ptr + 1, block)));
+ }
+#endif
+ } else {
+ /* It's a leaf. make a hole in the records */
+ union xfs_btree_rec *rp;
+
+ rp = xfs_btree_rec_addr(cur, ptr, block);
+
+ xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1);
+
+ /* Now put the new data in, bump numrecs and log it. */
+ xfs_btree_copy_recs(cur, rp, recp, 1);
+ xfs_btree_set_numrecs(block, ++numrecs);
+ xfs_btree_log_recs(cur, bp, ptr, numrecs);
+#ifdef DEBUG
+ if (ptr < numrecs) {
+ ASSERT(cur->bc_ops->recs_inorder(cur, rp,
+ xfs_btree_rec_addr(cur, ptr + 1, block)));
+ }
+#endif
+ }
+
+ /* Log the new number of records in the btree header. */
+ xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
+
+ /* If we inserted at the start of a block, update the parents' keys. */
+ if (optr == 1) {
+ error = xfs_btree_updkey(cur, &key, level + 1);
+ if (error)
+ goto error0;
+ }
+
+ /*
+ * If we are tracking the last record in the tree and
+ * we are at the far right edge of the tree, update it.
+ */
+ if (xfs_btree_is_lastrec(cur, block, level)) {
+ cur->bc_ops->update_lastrec(cur, block, recp,
+ ptr, LASTREC_INSREC);
+ }
+
+ /*
+ * Return the new block number, if any.
+ * If there is one, give back a record value and a cursor too.
+ */
+ *ptrp = nptr;
+ if (!xfs_btree_ptr_is_null(cur, &nptr)) {
+ *recp = nrec;
+ *curp = ncur;
+ }
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 1;
+ return 0;
+
+error0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ return error;
+}
+
+/*
+ * Insert the record at the point referenced by cur.
+ *
+ * A multi-level split of the tree on insert will invalidate the original
+ * cursor. All callers of this function should assume that the cursor is
+ * no longer valid and revalidate it.
+ */
+int
+xfs_btree_insert(
+ struct xfs_btree_cur *cur,
+ int *stat)
+{
+ int error; /* error return value */
+ int i; /* result value, 0 for failure */
+ int level; /* current level number in btree */
+ union xfs_btree_ptr nptr; /* new block number (split result) */
+ struct xfs_btree_cur *ncur; /* new cursor (split result) */
+ struct xfs_btree_cur *pcur; /* previous level's cursor */
+ union xfs_btree_rec rec; /* record to insert */
+
+ level = 0;
+ ncur = NULL;
+ pcur = cur;
+
+ xfs_btree_set_ptr_null(cur, &nptr);
+ cur->bc_ops->init_rec_from_cur(cur, &rec);
+
+ /*
+ * Loop going up the tree, starting at the leaf level.
+ * Stop when we don't get a split block, that must mean that
+ * the insert is finished with this level.
+ */
+ do {
+ /*
+ * Insert nrec/nptr into this level of the tree.
+ * Note if we fail, nptr will be null.
+ */
+ error = xfs_btree_insrec(pcur, level, &nptr, &rec, &ncur, &i);
+ if (error) {
+ if (pcur != cur)
+ xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
+ goto error0;
+ }
+
+ XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+ level++;
+
+ /*
+ * See if the cursor we just used is trash.
+ * Can't trash the caller's cursor, but otherwise we should
+ * if ncur is a new cursor or we're about to be done.
+ */
+ if (pcur != cur &&
+ (ncur || xfs_btree_ptr_is_null(cur, &nptr))) {
+ /* Save the state from the cursor before we trash it */
+ if (cur->bc_ops->update_cursor)
+ cur->bc_ops->update_cursor(pcur, cur);
+ cur->bc_nlevels = pcur->bc_nlevels;
+ xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
+ }
+ /* If we got a new cursor, switch to it. */
+ if (ncur) {
+ pcur = ncur;
+ ncur = NULL;
+ }
+ } while (!xfs_btree_ptr_is_null(cur, &nptr));
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = i;
+ return 0;
+error0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ return error;
+}
+
+/*
+ * Try to merge a non-leaf block back into the inode root.
+ *
+ * Note: the killroot names comes from the fact that we're effectively
+ * killing the old root block. But because we can't just delete the
+ * inode we have to copy the single block it was pointing to into the
+ * inode.
+ */
+int
+xfs_btree_kill_iroot(
+ struct xfs_btree_cur *cur)
+{
+ int whichfork = cur->bc_private.b.whichfork;
+ struct xfs_inode *ip = cur->bc_private.b.ip;
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
+ struct xfs_btree_block *block;
+ struct xfs_btree_block *cblock;
+ union xfs_btree_key *kp;
+ union xfs_btree_key *ckp;
+ union xfs_btree_ptr *pp;
+ union xfs_btree_ptr *cpp;
+ struct xfs_buf *cbp;
+ int level;
+ int index;
+ int numrecs;
+#ifdef DEBUG
+ union xfs_btree_ptr ptr;
+ int i;
+#endif
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+
+ ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
+ ASSERT(cur->bc_nlevels > 1);
+
+ /*
+ * Don't deal with the root block needs to be a leaf case.
+ * We're just going to turn the thing back into extents anyway.
+ */
+ level = cur->bc_nlevels - 1;
+ if (level == 1)
+ goto out0;
+
+ /*
+ * Give up if the root has multiple children.
+ */
+ block = xfs_btree_get_iroot(cur);
+ if (xfs_btree_get_numrecs(block) != 1)
+ goto out0;
+
+ cblock = xfs_btree_get_block(cur, level - 1, &cbp);
+ numrecs = xfs_btree_get_numrecs(cblock);
+
+ /*
+ * Only do this if the next level will fit.
+ * Then the data must be copied up to the inode,
+ * instead of freeing the root you free the next level.
+ */
+ if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level))
+ goto out0;
+
+ XFS_BTREE_STATS_INC(cur, killroot);
+
+#ifdef DEBUG
+ xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
+ ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
+ xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
+ ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
+#endif
+
+ index = numrecs - cur->bc_ops->get_maxrecs(cur, level);
+ if (index) {
+ xfs_iroot_realloc(cur->bc_private.b.ip, index,
+ cur->bc_private.b.whichfork);
+ block = ifp->if_broot;
+ }
+
+ be16_add_cpu(&block->bb_numrecs, index);
+ ASSERT(block->bb_numrecs == cblock->bb_numrecs);
+
+ kp = xfs_btree_key_addr(cur, 1, block);
+ ckp = xfs_btree_key_addr(cur, 1, cblock);
+ xfs_btree_copy_keys(cur, kp, ckp, numrecs);
+
+ pp = xfs_btree_ptr_addr(cur, 1, block);
+ cpp = xfs_btree_ptr_addr(cur, 1, cblock);
+#ifdef DEBUG
+ for (i = 0; i < numrecs; i++) {
+ int error;
+
+ error = xfs_btree_check_ptr(cur, cpp, i, level - 1);
+ if (error) {
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ return error;
+ }
+ }
+#endif
+ xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
+
+ cur->bc_ops->free_block(cur, cbp);
+ XFS_BTREE_STATS_INC(cur, free);
+
+ cur->bc_bufs[level - 1] = NULL;
+ be16_add_cpu(&block->bb_level, -1);
+ xfs_trans_log_inode(cur->bc_tp, ip,
+ XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork));
+ cur->bc_nlevels--;
+out0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ return 0;
+}
+
+STATIC int
+xfs_btree_dec_cursor(
+ struct xfs_btree_cur *cur,
+ int level,
+ int *stat)
+{
+ int error;
+ int i;
+
+ if (level > 0) {
+ error = xfs_btree_decrement(cur, level, &i);
+ if (error)
+ return error;
+ }
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 1;
+ return 0;
+}
+
+/*
+ * Single level of the btree record deletion routine.
+ * Delete record pointed to by cur/level.
+ * Remove the record from its block then rebalance the tree.
+ * Return 0 for error, 1 for done, 2 to go on to the next level.
+ */
+STATIC int /* error */
+xfs_btree_delrec(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ int level, /* level removing record from */
+ int *stat) /* fail/done/go-on */
+{
+ struct xfs_btree_block *block; /* btree block */
+ union xfs_btree_ptr cptr; /* current block ptr */
+ struct xfs_buf *bp; /* buffer for block */
+ int error; /* error return value */
+ int i; /* loop counter */
+ union xfs_btree_key key; /* storage for keyp */
+ union xfs_btree_key *keyp = &key; /* passed to the next level */
+ union xfs_btree_ptr lptr; /* left sibling block ptr */
+ struct xfs_buf *lbp; /* left buffer pointer */
+ struct xfs_btree_block *left; /* left btree block */
+ int lrecs = 0; /* left record count */
+ int ptr; /* key/record index */
+ union xfs_btree_ptr rptr; /* right sibling block ptr */
+ struct xfs_buf *rbp; /* right buffer pointer */
+ struct xfs_btree_block *right; /* right btree block */
+ struct xfs_btree_block *rrblock; /* right-right btree block */
+ struct xfs_buf *rrbp; /* right-right buffer pointer */
+ int rrecs = 0; /* right record count */
+ struct xfs_btree_cur *tcur; /* temporary btree cursor */
+ int numrecs; /* temporary numrec count */
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+ XFS_BTREE_TRACE_ARGI(cur, level);
+
+ tcur = NULL;
+
+ /* Get the index of the entry being deleted, check for nothing there. */
+ ptr = cur->bc_ptrs[level];
+ if (ptr == 0) {
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 0;
+ return 0;
+ }
+
+ /* Get the buffer & block containing the record or key/ptr. */
+ block = xfs_btree_get_block(cur, level, &bp);
+ numrecs = xfs_btree_get_numrecs(block);
+
+#ifdef DEBUG
+ error = xfs_btree_check_block(cur, block, level, bp);
+ if (error)
+ goto error0;
+#endif
+
+ /* Fail if we're off the end of the block. */
+ if (ptr > numrecs) {
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 0;
+ return 0;
+ }
+
+ XFS_BTREE_STATS_INC(cur, delrec);
+ XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr);
+
+ /* Excise the entries being deleted. */
+ if (level > 0) {
+ /* It's a nonleaf. operate on keys and ptrs */
+ union xfs_btree_key *lkp;
+ union xfs_btree_ptr *lpp;
+
+ lkp = xfs_btree_key_addr(cur, ptr + 1, block);
+ lpp = xfs_btree_ptr_addr(cur, ptr + 1, block);
+
+#ifdef DEBUG
+ for (i = 0; i < numrecs - ptr; i++) {
+ error = xfs_btree_check_ptr(cur, lpp, i, level);
+ if (error)
+ goto error0;
+ }
+#endif
+
+ if (ptr < numrecs) {
+ xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr);
+ xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr);
+ xfs_btree_log_keys(cur, bp, ptr, numrecs - 1);
+ xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1);
+ }
+
+ /*
+ * If it's the first record in the block, we'll need to pass a
+ * key up to the next level (updkey).
+ */
+ if (ptr == 1)
+ keyp = xfs_btree_key_addr(cur, 1, block);
+ } else {
+ /* It's a leaf. operate on records */
+ if (ptr < numrecs) {
+ xfs_btree_shift_recs(cur,
+ xfs_btree_rec_addr(cur, ptr + 1, block),
+ -1, numrecs - ptr);
+ xfs_btree_log_recs(cur, bp, ptr, numrecs - 1);
+ }
+
+ /*
+ * If it's the first record in the block, we'll need a key
+ * structure to pass up to the next level (updkey).
+ */
+ if (ptr == 1) {
+ cur->bc_ops->init_key_from_rec(&key,
+ xfs_btree_rec_addr(cur, 1, block));
+ keyp = &key;
+ }
+ }
+
+ /*
+ * Decrement and log the number of entries in the block.
+ */
+ xfs_btree_set_numrecs(block, --numrecs);
+ xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
+
+ /*
+ * If we are tracking the last record in the tree and
+ * we are at the far right edge of the tree, update it.
+ */
+ if (xfs_btree_is_lastrec(cur, block, level)) {
+ cur->bc_ops->update_lastrec(cur, block, NULL,
+ ptr, LASTREC_DELREC);
+ }
+
+ /*
+ * We're at the root level. First, shrink the root block in-memory.
+ * Try to get rid of the next level down. If we can't then there's
+ * nothing left to do.
+ */
+ if (level == cur->bc_nlevels - 1) {
+ if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
+ xfs_iroot_realloc(cur->bc_private.b.ip, -1,
+ cur->bc_private.b.whichfork);
+
+ error = xfs_btree_kill_iroot(cur);
+ if (error)
+ goto error0;
+
+ error = xfs_btree_dec_cursor(cur, level, stat);
+ if (error)
+ goto error0;
+ *stat = 1;
+ return 0;
+ }
+
+ /*
+ * If this is the root level, and there's only one entry left,
+ * and it's NOT the leaf level, then we can get rid of this
+ * level.
+ */
+ if (numrecs == 1 && level > 0) {
+ union xfs_btree_ptr *pp;
+ /*
+ * pp is still set to the first pointer in the block.
+ * Make it the new root of the btree.
+ */
+ pp = xfs_btree_ptr_addr(cur, 1, block);
+ error = cur->bc_ops->kill_root(cur, bp, level, pp);
+ if (error)
+ goto error0;
+ } else if (level > 0) {
+ error = xfs_btree_dec_cursor(cur, level, stat);
+ if (error)
+ goto error0;
+ }
+ *stat = 1;
+ return 0;
+ }
+
+ /*
+ * If we deleted the leftmost entry in the block, update the
+ * key values above us in the tree.
+ */
+ if (ptr == 1) {
+ error = xfs_btree_updkey(cur, keyp, level + 1);
+ if (error)
+ goto error0;
+ }
+
+ /*
+ * If the number of records remaining in the block is at least
+ * the minimum, we're done.
+ */
+ if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) {
+ error = xfs_btree_dec_cursor(cur, level, stat);
+ if (error)
+ goto error0;
+ return 0;
+ }
+
+ /*
+ * Otherwise, we have to move some records around to keep the
+ * tree balanced. Look at the left and right sibling blocks to
+ * see if we can re-balance by moving only one record.
+ */
+ xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
+ xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB);
+
+ if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
+ /*
+ * One child of root, need to get a chance to copy its contents
+ * into the root and delete it. Can't go up to next level,
+ * there's nothing to delete there.
+ */
+ if (xfs_btree_ptr_is_null(cur, &rptr) &&
+ xfs_btree_ptr_is_null(cur, &lptr) &&
+ level == cur->bc_nlevels - 2) {
+ error = xfs_btree_kill_iroot(cur);
+ if (!error)
+ error = xfs_btree_dec_cursor(cur, level, stat);
+ if (error)
+ goto error0;
+ return 0;
+ }
+ }
+
+ ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) ||
+ !xfs_btree_ptr_is_null(cur, &lptr));
+
+ /*
+ * Duplicate the cursor so our btree manipulations here won't
+ * disrupt the next level up.
+ */
+ error = xfs_btree_dup_cursor(cur, &tcur);
+ if (error)
+ goto error0;
+
+ /*
+ * If there's a right sibling, see if it's ok to shift an entry
+ * out of it.
+ */
+ if (!xfs_btree_ptr_is_null(cur, &rptr)) {
+ /*
+ * Move the temp cursor to the last entry in the next block.
+ * Actually any entry but the first would suffice.
+ */
+ i = xfs_btree_lastrec(tcur, level);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+
+ error = xfs_btree_increment(tcur, level, &i);
+ if (error)
+ goto error0;
+ XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+
+ i = xfs_btree_lastrec(tcur, level);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+
+ /* Grab a pointer to the block. */
+ right = xfs_btree_get_block(tcur, level, &rbp);
+#ifdef DEBUG
+ error = xfs_btree_check_block(tcur, right, level, rbp);
+ if (error)
+ goto error0;
+#endif
+ /* Grab the current block number, for future use. */
+ xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB);
+
+ /*
+ * If right block is full enough so that removing one entry
+ * won't make it too empty, and left-shifting an entry out
+ * of right to us works, we're done.
+ */
+ if (xfs_btree_get_numrecs(right) - 1 >=
+ cur->bc_ops->get_minrecs(tcur, level)) {
+ error = xfs_btree_lshift(tcur, level, &i);
+ if (error)
+ goto error0;
+ if (i) {
+ ASSERT(xfs_btree_get_numrecs(block) >=
+ cur->bc_ops->get_minrecs(tcur, level));
+
+ xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
+ tcur = NULL;
+
+ error = xfs_btree_dec_cursor(cur, level, stat);
+ if (error)
+ goto error0;
+ return 0;
+ }
+ }
+
+ /*
+ * Otherwise, grab the number of records in right for
+ * future reference, and fix up the temp cursor to point
+ * to our block again (last record).
+ */
+ rrecs = xfs_btree_get_numrecs(right);
+ if (!xfs_btree_ptr_is_null(cur, &lptr)) {
+ i = xfs_btree_firstrec(tcur, level);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+
+ error = xfs_btree_decrement(tcur, level, &i);
+ if (error)
+ goto error0;
+ XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+ }
+ }
+
+ /*
+ * If there's a left sibling, see if it's ok to shift an entry
+ * out of it.
+ */
+ if (!xfs_btree_ptr_is_null(cur, &lptr)) {
+ /*
+ * Move the temp cursor to the first entry in the
+ * previous block.
+ */
+ i = xfs_btree_firstrec(tcur, level);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+
+ error = xfs_btree_decrement(tcur, level, &i);
+ if (error)
+ goto error0;
+ i = xfs_btree_firstrec(tcur, level);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+
+ /* Grab a pointer to the block. */
+ left = xfs_btree_get_block(tcur, level, &lbp);
+#ifdef DEBUG
+ error = xfs_btree_check_block(cur, left, level, lbp);
+ if (error)
+ goto error0;
+#endif
+ /* Grab the current block number, for future use. */
+ xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB);
+
+ /*
+ * If left block is full enough so that removing one entry
+ * won't make it too empty, and right-shifting an entry out
+ * of left to us works, we're done.
+ */
+ if (xfs_btree_get_numrecs(left) - 1 >=
+ cur->bc_ops->get_minrecs(tcur, level)) {
+ error = xfs_btree_rshift(tcur, level, &i);
+ if (error)
+ goto error0;
+ if (i) {
+ ASSERT(xfs_btree_get_numrecs(block) >=
+ cur->bc_ops->get_minrecs(tcur, level));
+ xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
+ tcur = NULL;
+ if (level == 0)
+ cur->bc_ptrs[0]++;
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 1;
+ return 0;
+ }
+ }
+
+ /*
+ * Otherwise, grab the number of records in right for
+ * future reference.
+ */
+ lrecs = xfs_btree_get_numrecs(left);
+ }
+
+ /* Delete the temp cursor, we're done with it. */
+ xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
+ tcur = NULL;
+
+ /* If here, we need to do a join to keep the tree balanced. */
+ ASSERT(!xfs_btree_ptr_is_null(cur, &cptr));
+
+ if (!xfs_btree_ptr_is_null(cur, &lptr) &&
+ lrecs + xfs_btree_get_numrecs(block) <=
+ cur->bc_ops->get_maxrecs(cur, level)) {
+ /*
+ * Set "right" to be the starting block,
+ * "left" to be the left neighbor.
+ */
+ rptr = cptr;
+ right = block;
+ rbp = bp;
+ error = xfs_btree_read_buf_block(cur, &lptr, level,
+ 0, &left, &lbp);
+ if (error)
+ goto error0;
+
+ /*
+ * If that won't work, see if we can join with the right neighbor block.
+ */
+ } else if (!xfs_btree_ptr_is_null(cur, &rptr) &&
+ rrecs + xfs_btree_get_numrecs(block) <=
+ cur->bc_ops->get_maxrecs(cur, level)) {
+ /*
+ * Set "left" to be the starting block,
+ * "right" to be the right neighbor.
+ */
+ lptr = cptr;
+ left = block;
+ lbp = bp;
+ error = xfs_btree_read_buf_block(cur, &rptr, level,
+ 0, &right, &rbp);
+ if (error)
+ goto error0;
+
+ /*
+ * Otherwise, we can't fix the imbalance.
+ * Just return. This is probably a logic error, but it's not fatal.
+ */
+ } else {
+ error = xfs_btree_dec_cursor(cur, level, stat);
+ if (error)
+ goto error0;
+ return 0;
+ }
+
+ rrecs = xfs_btree_get_numrecs(right);
+ lrecs = xfs_btree_get_numrecs(left);
+
+ /*
+ * We're now going to join "left" and "right" by moving all the stuff
+ * in "right" to "left" and deleting "right".
+ */
+ XFS_BTREE_STATS_ADD(cur, moves, rrecs);
+ if (level > 0) {
+ /* It's a non-leaf. Move keys and pointers. */
+ union xfs_btree_key *lkp; /* left btree key */
+ union xfs_btree_ptr *lpp; /* left address pointer */
+ union xfs_btree_key *rkp; /* right btree key */
+ union xfs_btree_ptr *rpp; /* right address pointer */
+
+ lkp = xfs_btree_key_addr(cur, lrecs + 1, left);
+ lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left);
+ rkp = xfs_btree_key_addr(cur, 1, right);
+ rpp = xfs_btree_ptr_addr(cur, 1, right);
+#ifdef DEBUG
+ for (i = 1; i < rrecs; i++) {
+ error = xfs_btree_check_ptr(cur, rpp, i, level);
+ if (error)
+ goto error0;
+ }
+#endif
+ xfs_btree_copy_keys(cur, lkp, rkp, rrecs);
+ xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs);
+
+ xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
+ xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
+ } else {
+ /* It's a leaf. Move records. */
+ union xfs_btree_rec *lrp; /* left record pointer */
+ union xfs_btree_rec *rrp; /* right record pointer */
+
+ lrp = xfs_btree_rec_addr(cur, lrecs + 1, left);
+ rrp = xfs_btree_rec_addr(cur, 1, right);
+
+ xfs_btree_copy_recs(cur, lrp, rrp, rrecs);
+ xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
+ }
+
+ XFS_BTREE_STATS_INC(cur, join);
+
+ /*
+ * Fix up the the number of records and right block pointer in the
+ * surviving block, and log it.
+ */
+ xfs_btree_set_numrecs(left, lrecs + rrecs);
+ xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB),
+ xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
+ xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
+
+ /* If there is a right sibling, point it to the remaining block. */
+ xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
+ if (!xfs_btree_ptr_is_null(cur, &cptr)) {
+ error = xfs_btree_read_buf_block(cur, &cptr, level,
+ 0, &rrblock, &rrbp);
+ if (error)
+ goto error0;
+ xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);
+ xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
+ }
+
+ /* Free the deleted block. */
+ error = cur->bc_ops->free_block(cur, rbp);
+ if (error)
+ goto error0;
+ XFS_BTREE_STATS_INC(cur, free);
+
+ /*
+ * If we joined with the left neighbor, set the buffer in the
+ * cursor to the left block, and fix up the index.
+ */
+ if (bp != lbp) {
+ cur->bc_bufs[level] = lbp;
+ cur->bc_ptrs[level] += lrecs;
+ cur->bc_ra[level] = 0;
+ }
+ /*
+ * If we joined with the right neighbor and there's a level above
+ * us, increment the cursor at that level.
+ */
+ else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) ||
+ (level + 1 < cur->bc_nlevels)) {
+ error = xfs_btree_increment(cur, level + 1, &i);
+ if (error)
+ goto error0;
+ }
+
+ /*
+ * Readjust the ptr at this level if it's not a leaf, since it's
+ * still pointing at the deletion point, which makes the cursor
+ * inconsistent. If this makes the ptr 0, the caller fixes it up.
+ * We can't use decrement because it would change the next level up.
+ */
+ if (level > 0)
+ cur->bc_ptrs[level]--;
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ /* Return value means the next level up has something to do. */
+ *stat = 2;
+ return 0;
+
+error0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ if (tcur)
+ xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
+ return error;
+}
+
+/*
+ * Delete the record pointed to by cur.
+ * The cursor refers to the place where the record was (could be inserted)
+ * when the operation returns.
+ */
+int /* error */
+xfs_btree_delete(
+ struct xfs_btree_cur *cur,
+ int *stat) /* success/failure */
+{
+ int error; /* error return value */
+ int level;
+ int i;
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+
+ /*
+ * Go up the tree, starting at leaf level.
+ *
+ * If 2 is returned then a join was done; go to the next level.
+ * Otherwise we are done.
+ */
+ for (level = 0, i = 2; i == 2; level++) {
+ error = xfs_btree_delrec(cur, level, &i);
+ if (error)
+ goto error0;
+ }
+
+ if (i == 0) {
+ for (level = 1; level < cur->bc_nlevels; level++) {
+ if (cur->bc_ptrs[level] == 0) {
+ error = xfs_btree_decrement(cur, level, &i);
+ if (error)
+ goto error0;
+ break;
+ }
+ }
+ }
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = i;
+ return 0;
+error0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ return error;
+}
+
+/*
+ * Get the data from the pointed-to record.
+ */
+int /* error */
+xfs_btree_get_rec(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ union xfs_btree_rec **recp, /* output: btree record */
+ int *stat) /* output: success/failure */
+{
+ struct xfs_btree_block *block; /* btree block */
+ struct xfs_buf *bp; /* buffer pointer */
+ int ptr; /* record number */
+#ifdef DEBUG
+ int error; /* error return value */
+#endif
+
+ ptr = cur->bc_ptrs[0];
+ block = xfs_btree_get_block(cur, 0, &bp);
+
+#ifdef DEBUG
+ error = xfs_btree_check_block(cur, block, 0, bp);
+ if (error)
+ return error;
+#endif
+
+ /*
+ * Off the right end or left end, return failure.
+ */
+ if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) {
+ *stat = 0;
+ return 0;
+ }
+
+ /*
+ * Point to the record and extract its data.
+ */
+ *recp = xfs_btree_rec_addr(cur, ptr, block);
+ *stat = 1;
+ return 0;
+}
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 1f528a2a3754..789fffdf8b2f 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -39,39 +39,19 @@ extern kmem_zone_t *xfs_btree_cur_zone;
#define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi)
/*
- * Short form header: space allocation btrees.
- */
-typedef struct xfs_btree_sblock {
- __be32 bb_magic; /* magic number for block type */
- __be16 bb_level; /* 0 is a leaf */
- __be16 bb_numrecs; /* current # of data records */
- __be32 bb_leftsib; /* left sibling block or NULLAGBLOCK */
- __be32 bb_rightsib; /* right sibling block or NULLAGBLOCK */
-} xfs_btree_sblock_t;
-
-/*
- * Long form header: bmap btrees.
- */
-typedef struct xfs_btree_lblock {
- __be32 bb_magic; /* magic number for block type */
- __be16 bb_level; /* 0 is a leaf */
- __be16 bb_numrecs; /* current # of data records */
- __be64 bb_leftsib; /* left sibling block or NULLDFSBNO */
- __be64 bb_rightsib; /* right sibling block or NULLDFSBNO */
-} xfs_btree_lblock_t;
-
-/*
- * Combined header and structure, used by common code.
+ * Generic btree header.
+ *
+ * This is a comination of the actual format used on disk for short and long
+ * format btrees. The first three fields are shared by both format, but
+ * the pointers are different and should be used with care.
+ *
+ * To get the size of the actual short or long form headers please use
+ * the size macros below. Never use sizeof(xfs_btree_block).
*/
-typedef struct xfs_btree_hdr
-{
+struct xfs_btree_block {
__be32 bb_magic; /* magic number for block type */
__be16 bb_level; /* 0 is a leaf */
__be16 bb_numrecs; /* current # of data records */
-} xfs_btree_hdr_t;
-
-typedef struct xfs_btree_block {
- xfs_btree_hdr_t bb_h; /* header */
union {
struct {
__be32 bb_leftsib;
@@ -82,7 +62,36 @@ typedef struct xfs_btree_block {
__be64 bb_rightsib;
} l; /* long form pointers */
} bb_u; /* rest */
-} xfs_btree_block_t;
+};
+
+#define XFS_BTREE_SBLOCK_LEN 16 /* size of a short form block */
+#define XFS_BTREE_LBLOCK_LEN 24 /* size of a long form block */
+
+
+/*
+ * Generic key, ptr and record wrapper structures.
+ *
+ * These are disk format structures, and are converted where necessary
+ * by the btree specific code that needs to interpret them.
+ */
+union xfs_btree_ptr {
+ __be32 s; /* short form ptr */
+ __be64 l; /* long form ptr */
+};
+
+union xfs_btree_key {
+ xfs_bmbt_key_t bmbt;
+ xfs_bmdr_key_t bmbr; /* bmbt root block */
+ xfs_alloc_key_t alloc;
+ xfs_inobt_key_t inobt;
+};
+
+union xfs_btree_rec {
+ xfs_bmbt_rec_t bmbt;
+ xfs_bmdr_rec_t bmbr; /* bmbt root block */
+ xfs_alloc_rec_t alloc;
+ xfs_inobt_rec_t inobt;
+};
/*
* For logging record fields.
@@ -96,46 +105,131 @@ typedef struct xfs_btree_block {
#define XFS_BB_ALL_BITS ((1 << XFS_BB_NUM_BITS) - 1)
/*
- * Boolean to select which form of xfs_btree_block_t.bb_u to use.
- */
-#define XFS_BTREE_LONG_PTRS(btnum) ((btnum) == XFS_BTNUM_BMAP)
-
-/*
* Magic numbers for btree blocks.
*/
extern const __uint32_t xfs_magics[];
/*
- * Maximum and minimum records in a btree block.
- * Given block size, type prefix, and leaf flag (0 or 1).
- * The divisor below is equivalent to lf ? (e1) : (e2) but that produces
- * compiler warnings.
- */
-#define XFS_BTREE_BLOCK_MAXRECS(bsz,t,lf) \
- ((int)(((bsz) - (uint)sizeof(t ## _block_t)) / \
- (((lf) * (uint)sizeof(t ## _rec_t)) + \
- ((1 - (lf)) * \
- ((uint)sizeof(t ## _key_t) + (uint)sizeof(t ## _ptr_t))))))
-#define XFS_BTREE_BLOCK_MINRECS(bsz,t,lf) \
- (XFS_BTREE_BLOCK_MAXRECS(bsz,t,lf) / 2)
-
-/*
- * Record, key, and pointer address calculation macros.
- * Given block size, type prefix, block pointer, and index of requested entry
- * (first entry numbered 1).
- */
-#define XFS_BTREE_REC_ADDR(t,bb,i) \
- ((t ## _rec_t *)((char *)(bb) + sizeof(t ## _block_t) + \
- ((i) - 1) * sizeof(t ## _rec_t)))
-#define XFS_BTREE_KEY_ADDR(t,bb,i) \
- ((t ## _key_t *)((char *)(bb) + sizeof(t ## _block_t) + \
- ((i) - 1) * sizeof(t ## _key_t)))
-#define XFS_BTREE_PTR_ADDR(t,bb,i,mxr) \
- ((t ## _ptr_t *)((char *)(bb) + sizeof(t ## _block_t) + \
- (mxr) * sizeof(t ## _key_t) + ((i) - 1) * sizeof(t ## _ptr_t)))
+ * Generic stats interface
+ */
+#define __XFS_BTREE_STATS_INC(type, stat) \
+ XFS_STATS_INC(xs_ ## type ## _2_ ## stat)
+#define XFS_BTREE_STATS_INC(cur, stat) \
+do { \
+ switch (cur->bc_btnum) { \
+ case XFS_BTNUM_BNO: __XFS_BTREE_STATS_INC(abtb, stat); break; \
+ case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(abtc, stat); break; \
+ case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(bmbt, stat); break; \
+ case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break; \
+ case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
+ } \
+} while (0)
+
+#define __XFS_BTREE_STATS_ADD(type, stat, val) \
+ XFS_STATS_ADD(xs_ ## type ## _2_ ## stat, val)
+#define XFS_BTREE_STATS_ADD(cur, stat, val) \
+do { \
+ switch (cur->bc_btnum) { \
+ case XFS_BTNUM_BNO: __XFS_BTREE_STATS_ADD(abtb, stat, val); break; \
+ case XFS_BTNUM_CNT: __XFS_BTREE_STATS_ADD(abtc, stat, val); break; \
+ case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_ADD(bmbt, stat, val); break; \
+ case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \
+ case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
+ } \
+} while (0)
#define XFS_BTREE_MAXLEVELS 8 /* max of all btrees */
+struct xfs_btree_ops {
+ /* size of the key and record structures */
+ size_t key_len;
+ size_t rec_len;
+
+ /* cursor operations */
+ struct xfs_btree_cur *(*dup_cursor)(struct xfs_btree_cur *);
+ void (*update_cursor)(struct xfs_btree_cur *src,
+ struct xfs_btree_cur *dst);
+
+ /* update btree root pointer */
+ void (*set_root)(struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *nptr, int level_change);
+ int (*kill_root)(struct xfs_btree_cur *cur, struct xfs_buf *bp,
+ int level, union xfs_btree_ptr *newroot);
+
+ /* block allocation / freeing */
+ int (*alloc_block)(struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *start_bno,
+ union xfs_btree_ptr *new_bno,
+ int length, int *stat);
+ int (*free_block)(struct xfs_btree_cur *cur, struct xfs_buf *bp);
+
+ /* update last record information */
+ void (*update_lastrec)(struct xfs_btree_cur *cur,
+ struct xfs_btree_block *block,
+ union xfs_btree_rec *rec,
+ int ptr, int reason);
+
+ /* records in block/level */
+ int (*get_minrecs)(struct xfs_btree_cur *cur, int level);
+ int (*get_maxrecs)(struct xfs_btree_cur *cur, int level);
+
+ /* records on disk. Matter for the root in inode case. */
+ int (*get_dmaxrecs)(struct xfs_btree_cur *cur, int level);
+
+ /* init values of btree structures */
+ void (*init_key_from_rec)(union xfs_btree_key *key,
+ union xfs_btree_rec *rec);
+ void (*init_rec_from_key)(union xfs_btree_key *key,
+ union xfs_btree_rec *rec);
+ void (*init_rec_from_cur)(struct xfs_btree_cur *cur,
+ union xfs_btree_rec *rec);
+ void (*init_ptr_from_cur)(struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr);
+
+ /* difference between key value and cursor value */
+ __int64_t (*key_diff)(struct xfs_btree_cur *cur,
+ union xfs_btree_key *key);
+
+#ifdef DEBUG
+ /* check that k1 is lower than k2 */
+ int (*keys_inorder)(struct xfs_btree_cur *cur,
+ union xfs_btree_key *k1,
+ union xfs_btree_key *k2);
+
+ /* check that r1 is lower than r2 */
+ int (*recs_inorder)(struct xfs_btree_cur *cur,
+ union xfs_btree_rec *r1,
+ union xfs_btree_rec *r2);
+#endif
+
+ /* btree tracing */
+#ifdef XFS_BTREE_TRACE
+ void (*trace_enter)(struct xfs_btree_cur *, const char *,
+ char *, int, int, __psunsigned_t,
+ __psunsigned_t, __psunsigned_t,
+ __psunsigned_t, __psunsigned_t,
+ __psunsigned_t, __psunsigned_t,
+ __psunsigned_t, __psunsigned_t,
+ __psunsigned_t, __psunsigned_t);
+ void (*trace_cursor)(struct xfs_btree_cur *, __uint32_t *,
+ __uint64_t *, __uint64_t *);
+ void (*trace_key)(struct xfs_btree_cur *,
+ union xfs_btree_key *, __uint64_t *,
+ __uint64_t *);
+ void (*trace_record)(struct xfs_btree_cur *,
+ union xfs_btree_rec *, __uint64_t *,
+ __uint64_t *, __uint64_t *);
+#endif
+};
+
+/*
+ * Reasons for the update_lastrec method to be called.
+ */
+#define LASTREC_UPDATE 0
+#define LASTREC_INSREC 1
+#define LASTREC_DELREC 2
+
+
/*
* Btree cursor structure.
* This collects all information needed by the btree code in one place.
@@ -144,6 +238,8 @@ typedef struct xfs_btree_cur
{
struct xfs_trans *bc_tp; /* transaction we're in, if any */
struct xfs_mount *bc_mp; /* file system mount struct */
+ const struct xfs_btree_ops *bc_ops;
+ uint bc_flags; /* btree features - below */
union {
xfs_alloc_rec_incore_t a;
xfs_bmbt_irec_t b;
@@ -175,94 +271,40 @@ typedef struct xfs_btree_cur
} bc_private; /* per-btree type data */
} xfs_btree_cur_t;
+/* cursor flags */
+#define XFS_BTREE_LONG_PTRS (1<<0) /* pointers are 64bits long */
+#define XFS_BTREE_ROOT_IN_INODE (1<<1) /* root may be variable size */
+#define XFS_BTREE_LASTREC_UPDATE (1<<2) /* track last rec externally */
+
+
#define XFS_BTREE_NOERROR 0
#define XFS_BTREE_ERROR 1
/*
* Convert from buffer to btree block header.
*/
-#define XFS_BUF_TO_BLOCK(bp) ((xfs_btree_block_t *)XFS_BUF_PTR(bp))
-#define XFS_BUF_TO_LBLOCK(bp) ((xfs_btree_lblock_t *)XFS_BUF_PTR(bp))
-#define XFS_BUF_TO_SBLOCK(bp) ((xfs_btree_sblock_t *)XFS_BUF_PTR(bp))
+#define XFS_BUF_TO_BLOCK(bp) ((struct xfs_btree_block *)XFS_BUF_PTR(bp))
-#ifdef __KERNEL__
-
-#ifdef DEBUG
/*
- * Debug routine: check that block header is ok.
+ * Check that block header is ok.
*/
-void
+int
xfs_btree_check_block(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_btree_block_t *block, /* generic btree block pointer */
- int level, /* level of the btree block */
- struct xfs_buf *bp); /* buffer containing block, if any */
-
-/*
- * Debug routine: check that keys are in the right order.
- */
-void
-xfs_btree_check_key(
- xfs_btnum_t btnum, /* btree identifier */
- void *ak1, /* pointer to left (lower) key */
- void *ak2); /* pointer to right (higher) key */
-
-/*
- * Debug routine: check that records are in the right order.
- */
-void
-xfs_btree_check_rec(
- xfs_btnum_t btnum, /* btree identifier */
- void *ar1, /* pointer to left (lower) record */
- void *ar2); /* pointer to right (higher) record */
-#else
-#define xfs_btree_check_block(a,b,c,d)
-#define xfs_btree_check_key(a,b,c)
-#define xfs_btree_check_rec(a,b,c)
-#endif /* DEBUG */
-
-/*
- * Checking routine: check that long form block header is ok.
- */
-int /* error (0 or EFSCORRUPTED) */
-xfs_btree_check_lblock(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_btree_lblock_t *block, /* btree long form block pointer */
+ struct xfs_btree_cur *cur, /* btree cursor */
+ struct xfs_btree_block *block, /* generic btree block pointer */
int level, /* level of the btree block */
struct xfs_buf *bp); /* buffer containing block, if any */
/*
- * Checking routine: check that (long) pointer is ok.
+ * Check that (long) pointer is ok.
*/
int /* error (0 or EFSCORRUPTED) */
xfs_btree_check_lptr(
- xfs_btree_cur_t *cur, /* btree cursor */
+ struct xfs_btree_cur *cur, /* btree cursor */
xfs_dfsbno_t ptr, /* btree block disk address */
int level); /* btree block level */
-#define xfs_btree_check_lptr_disk(cur, ptr, level) \
- xfs_btree_check_lptr(cur, be64_to_cpu(ptr), level)
-
-/*
- * Checking routine: check that short form block header is ok.
- */
-int /* error (0 or EFSCORRUPTED) */
-xfs_btree_check_sblock(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_btree_sblock_t *block, /* btree short form block pointer */
- int level, /* level of the btree block */
- struct xfs_buf *bp); /* buffer containing block */
-
-/*
- * Checking routine: check that (short) pointer is ok.
- */
-int /* error (0 or EFSCORRUPTED) */
-xfs_btree_check_sptr(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_agblock_t ptr, /* btree block disk address */
- int level); /* btree block level */
-
/*
* Delete the btree cursor.
*/
@@ -281,15 +323,6 @@ xfs_btree_dup_cursor(
xfs_btree_cur_t **ncur);/* output cursor */
/*
- * Change the cursor to point to the first record in the current block
- * at the given level. Other levels are unaffected.
- */
-int /* success=1, failure=0 */
-xfs_btree_firstrec(
- xfs_btree_cur_t *cur, /* btree cursor */
- int level); /* level to change */
-
-/*
* Get a buffer for the block, return it with no data read.
* Long-form addressing.
*/
@@ -313,20 +346,6 @@ xfs_btree_get_bufs(
uint lock); /* lock flags for get_buf */
/*
- * Allocate a new btree cursor.
- * The cursor is either for allocation (A) or bmap (B).
- */
-xfs_btree_cur_t * /* new btree cursor */
-xfs_btree_init_cursor(
- struct xfs_mount *mp, /* file system mount point */
- struct xfs_trans *tp, /* transaction pointer */
- struct xfs_buf *agbp, /* (A only) buffer for agf structure */
- xfs_agnumber_t agno, /* (A only) allocation group number */
- xfs_btnum_t btnum, /* btree identifier */
- struct xfs_inode *ip, /* (B only) inode owning the btree */
- int whichfork); /* (B only) data/attr fork */
-
-/*
* Check for the cursor referring to the last block at the given level.
*/
int /* 1=is last block, 0=not last block */
@@ -335,15 +354,6 @@ xfs_btree_islastblock(
int level); /* level to check */
/*
- * Change the cursor to point to the last record in the current block
- * at the given level. Other levels are unaffected.
- */
-int /* success=1, failure=0 */
-xfs_btree_lastrec(
- xfs_btree_cur_t *cur, /* btree cursor */
- int level); /* level to change */
-
-/*
* Compute first and last byte offsets for the fields given.
* Interprets the offsets table, which contains struct field offsets.
*/
@@ -404,39 +414,53 @@ xfs_btree_reada_bufs(
xfs_extlen_t count); /* count of filesystem blocks */
/*
- * Read-ahead btree blocks, at the given level.
- * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
+ * Set the buffer for level "lev" in the cursor to bp, releasing
+ * any previous buffer.
*/
-int /* readahead block count */
-xfs_btree_readahead_core(
+void
+xfs_btree_setbuf(
xfs_btree_cur_t *cur, /* btree cursor */
int lev, /* level in btree */
- int lr); /* left/right bits */
+ struct xfs_buf *bp); /* new buffer to set */
-static inline int /* readahead block count */
-xfs_btree_readahead(
- xfs_btree_cur_t *cur, /* btree cursor */
- int lev, /* level in btree */
- int lr) /* left/right bits */
-{
- if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
- return 0;
- return xfs_btree_readahead_core(cur, lev, lr);
-}
+/*
+ * Common btree core entry points.
+ */
+int xfs_btree_increment(struct xfs_btree_cur *, int, int *);
+int xfs_btree_decrement(struct xfs_btree_cur *, int, int *);
+int xfs_btree_lookup(struct xfs_btree_cur *, xfs_lookup_t, int *);
+int xfs_btree_update(struct xfs_btree_cur *, union xfs_btree_rec *);
+int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *);
+int xfs_btree_kill_iroot(struct xfs_btree_cur *);
+int xfs_btree_insert(struct xfs_btree_cur *, int *);
+int xfs_btree_delete(struct xfs_btree_cur *, int *);
+int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *);
+/*
+ * Internal btree helpers also used by xfs_bmap.c.
+ */
+void xfs_btree_log_block(struct xfs_btree_cur *, struct xfs_buf *, int);
+void xfs_btree_log_recs(struct xfs_btree_cur *, struct xfs_buf *, int, int);
/*
- * Set the buffer for level "lev" in the cursor to bp, releasing
- * any previous buffer.
+ * Helpers.
*/
-void
-xfs_btree_setbuf(
- xfs_btree_cur_t *cur, /* btree cursor */
- int lev, /* level in btree */
- struct xfs_buf *bp); /* new buffer to set */
+static inline int xfs_btree_get_numrecs(struct xfs_btree_block *block)
+{
+ return be16_to_cpu(block->bb_numrecs);
+}
+
+static inline void xfs_btree_set_numrecs(struct xfs_btree_block *block,
+ __uint16_t numrecs)
+{
+ block->bb_numrecs = cpu_to_be16(numrecs);
+}
-#endif /* __KERNEL__ */
+static inline int xfs_btree_get_level(struct xfs_btree_block *block)
+{
+ return be16_to_cpu(block->bb_level);
+}
/*
diff --git a/fs/xfs/xfs_btree_trace.c b/fs/xfs/xfs_btree_trace.c
new file mode 100644
index 000000000000..44ff942a0fda
--- /dev/null
+++ b/fs/xfs/xfs_btree_trace.c
@@ -0,0 +1,249 @@
+/*
+ * Copyright (c) 2008 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "xfs.h"
+#include "xfs_types.h"
+#include "xfs_inum.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_inode.h"
+#include "xfs_btree.h"
+#include "xfs_btree_trace.h"
+
+STATIC void
+xfs_btree_trace_ptr(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr ptr,
+ __psunsigned_t *high,
+ __psunsigned_t *low)
+{
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+ __u64 val = be64_to_cpu(ptr.l);
+ *high = val >> 32;
+ *low = (int)val;
+ } else {
+ *high = 0;
+ *low = be32_to_cpu(ptr.s);
+ }
+}
+
+/*
+ * Add a trace buffer entry for arguments, for a buffer & 1 integer arg.
+ */
+void
+xfs_btree_trace_argbi(
+ const char *func,
+ struct xfs_btree_cur *cur,
+ struct xfs_buf *b,
+ int i,
+ int line)
+{
+ cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGBI,
+ line, (__psunsigned_t)b, i, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0);
+}
+
+/*
+ * Add a trace buffer entry for arguments, for a buffer & 2 integer args.
+ */
+void
+xfs_btree_trace_argbii(
+ const char *func,
+ struct xfs_btree_cur *cur,
+ struct xfs_buf *b,
+ int i0,
+ int i1,
+ int line)
+{
+ cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGBII,
+ line, (__psunsigned_t)b, i0, i1, 0, 0, 0, 0,
+ 0, 0, 0, 0);
+}
+
+/*
+ * Add a trace buffer entry for arguments, for 3 block-length args
+ * and an integer arg.
+ */
+void
+xfs_btree_trace_argfffi(
+ const char *func,
+ struct xfs_btree_cur *cur,
+ xfs_dfiloff_t o,
+ xfs_dfsbno_t b,
+ xfs_dfilblks_t i,
+ int j,
+ int line)
+{
+ cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGFFFI,
+ line,
+ o >> 32, (int)o,
+ b >> 32, (int)b,
+ i >> 32, (int)i,
+ (int)j, 0, 0, 0, 0);
+}
+
+/*
+ * Add a trace buffer entry for arguments, for one integer arg.
+ */
+void
+xfs_btree_trace_argi(
+ const char *func,
+ struct xfs_btree_cur *cur,
+ int i,
+ int line)
+{
+ cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGI,
+ line, i, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+}
+
+/*
+ * Add a trace buffer entry for arguments, for int, fsblock, key.
+ */
+void
+xfs_btree_trace_argipk(
+ const char *func,
+ struct xfs_btree_cur *cur,
+ int i,
+ union xfs_btree_ptr ptr,
+ union xfs_btree_key *key,
+ int line)
+{
+ __psunsigned_t high, low;
+ __uint64_t l0, l1;
+
+ xfs_btree_trace_ptr(cur, ptr, &high, &low);
+ cur->bc_ops->trace_key(cur, key, &l0, &l1);
+ cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIPK,
+ line, i, high, low,
+ l0 >> 32, (int)l0,
+ l1 >> 32, (int)l1,
+ 0, 0, 0, 0);
+}
+
+/*
+ * Add a trace buffer entry for arguments, for int, fsblock, rec.
+ */
+void
+xfs_btree_trace_argipr(
+ const char *func,
+ struct xfs_btree_cur *cur,
+ int i,
+ union xfs_btree_ptr ptr,
+ union xfs_btree_rec *rec,
+ int line)
+{
+ __psunsigned_t high, low;
+ __uint64_t l0, l1, l2;
+
+ xfs_btree_trace_ptr(cur, ptr, &high, &low);
+ cur->bc_ops->trace_record(cur, rec, &l0, &l1, &l2);
+ cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIPR,
+ line, i,
+ high, low,
+ l0 >> 32, (int)l0,
+ l1 >> 32, (int)l1,
+ l2 >> 32, (int)l2,
+ 0, 0);
+}
+
+/*
+ * Add a trace buffer entry for arguments, for int, key.
+ */
+void
+xfs_btree_trace_argik(
+ const char *func,
+ struct xfs_btree_cur *cur,
+ int i,
+ union xfs_btree_key *key,
+ int line)
+{
+ __uint64_t l0, l1;
+
+ cur->bc_ops->trace_key(cur, key, &l0, &l1);
+ cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIK,
+ line, i,
+ l0 >> 32, (int)l0,
+ l1 >> 32, (int)l1,
+ 0, 0, 0, 0, 0, 0);
+}
+
+/*
+ * Add a trace buffer entry for arguments, for record.
+ */
+void
+xfs_btree_trace_argr(
+ const char *func,
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *rec,
+ int line)
+{
+ __uint64_t l0, l1, l2;
+
+ cur->bc_ops->trace_record(cur, rec, &l0, &l1, &l2);
+ cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGR,
+ line,
+ l0 >> 32, (int)l0,
+ l1 >> 32, (int)l1,
+ l2 >> 32, (int)l2,
+ 0, 0, 0, 0, 0);
+}
+
+/*
+ * Add a trace buffer entry for the cursor/operation.
+ */
+void
+xfs_btree_trace_cursor(
+ const char *func,
+ struct xfs_btree_cur *cur,
+ int type,
+ int line)
+{
+ __uint32_t s0;
+ __uint64_t l0, l1;
+ char *s;
+
+ switch (type) {
+ case XBT_ARGS:
+ s = "args";
+ break;
+ case XBT_ENTRY:
+ s = "entry";
+ break;
+ case XBT_ERROR:
+ s = "error";
+ break;
+ case XBT_EXIT:
+ s = "exit";
+ break;
+ default:
+ s = "unknown";
+ break;
+ }
+
+ cur->bc_ops->trace_cursor(cur, &s0, &l0, &l1);
+ cur->bc_ops->trace_enter(cur, func, s, XFS_BTREE_KTRACE_CUR, line,
+ s0,
+ l0 >> 32, (int)l0,
+ l1 >> 32, (int)l1,
+ (__psunsigned_t)cur->bc_bufs[0],
+ (__psunsigned_t)cur->bc_bufs[1],
+ (__psunsigned_t)cur->bc_bufs[2],
+ (__psunsigned_t)cur->bc_bufs[3],
+ (cur->bc_ptrs[0] << 16) | cur->bc_ptrs[1],
+ (cur->bc_ptrs[2] << 16) | cur->bc_ptrs[3]);
+}
diff --git a/fs/xfs/xfs_btree_trace.h b/fs/xfs/xfs_btree_trace.h
new file mode 100644
index 000000000000..b3f5eb3c3c6c
--- /dev/null
+++ b/fs/xfs/xfs_btree_trace.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2008 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef __XFS_BTREE_TRACE_H__
+#define __XFS_BTREE_TRACE_H__
+
+struct xfs_btree_cur;
+struct xfs_buf;
+
+
+/*
+ * Trace hooks.
+ * i,j = integer (32 bit)
+ * b = btree block buffer (xfs_buf_t)
+ * p = btree ptr
+ * r = btree record
+ * k = btree key
+ */
+
+#ifdef XFS_BTREE_TRACE
+
+/*
+ * Trace buffer entry types.
+ */
+#define XFS_BTREE_KTRACE_ARGBI 1
+#define XFS_BTREE_KTRACE_ARGBII 2
+#define XFS_BTREE_KTRACE_ARGFFFI 3
+#define XFS_BTREE_KTRACE_ARGI 4
+#define XFS_BTREE_KTRACE_ARGIPK 5
+#define XFS_BTREE_KTRACE_ARGIPR 6
+#define XFS_BTREE_KTRACE_ARGIK 7
+#define XFS_BTREE_KTRACE_ARGR 8
+#define XFS_BTREE_KTRACE_CUR 9
+
+/*
+ * Sub-types for cursor traces.
+ */
+#define XBT_ARGS 0
+#define XBT_ENTRY 1
+#define XBT_ERROR 2
+#define XBT_EXIT 3
+
+void xfs_btree_trace_argbi(const char *, struct xfs_btree_cur *,
+ struct xfs_buf *, int, int);
+void xfs_btree_trace_argbii(const char *, struct xfs_btree_cur *,
+ struct xfs_buf *, int, int, int);
+void xfs_btree_trace_argfffi(const char *, struct xfs_btree_cur *,
+ xfs_dfiloff_t, xfs_dfsbno_t, xfs_dfilblks_t, int, int);
+void xfs_btree_trace_argi(const char *, struct xfs_btree_cur *, int, int);
+void xfs_btree_trace_argipk(const char *, struct xfs_btree_cur *, int,
+ union xfs_btree_ptr, union xfs_btree_key *, int);
+void xfs_btree_trace_argipr(const char *, struct xfs_btree_cur *, int,
+ union xfs_btree_ptr, union xfs_btree_rec *, int);
+void xfs_btree_trace_argik(const char *, struct xfs_btree_cur *, int,
+ union xfs_btree_key *, int);
+void xfs_btree_trace_argr(const char *, struct xfs_btree_cur *,
+ union xfs_btree_rec *, int);
+void xfs_btree_trace_cursor(const char *, struct xfs_btree_cur *, int, int);
+
+
+#define XFS_ALLOCBT_TRACE_SIZE 4096 /* size of global trace buffer */
+extern ktrace_t *xfs_allocbt_trace_buf;
+
+#define XFS_INOBT_TRACE_SIZE 4096 /* size of global trace buffer */
+extern ktrace_t *xfs_inobt_trace_buf;
+
+#define XFS_BMBT_TRACE_SIZE 4096 /* size of global trace buffer */
+#define XFS_BMBT_KTRACE_SIZE 32 /* size of per-inode trace buffer */
+extern ktrace_t *xfs_bmbt_trace_buf;
+
+
+#define XFS_BTREE_TRACE_ARGBI(c, b, i) \
+ xfs_btree_trace_argbi(__func__, c, b, i, __LINE__)
+#define XFS_BTREE_TRACE_ARGBII(c, b, i, j) \
+ xfs_btree_trace_argbii(__func__, c, b, i, j, __LINE__)
+#define XFS_BTREE_TRACE_ARGFFFI(c, o, b, i, j) \
+ xfs_btree_trace_argfffi(__func__, c, o, b, i, j, __LINE__)
+#define XFS_BTREE_TRACE_ARGI(c, i) \
+ xfs_btree_trace_argi(__func__, c, i, __LINE__)
+#define XFS_BTREE_TRACE_ARGIPK(c, i, p, k) \
+ xfs_btree_trace_argipk(__func__, c, i, p, k, __LINE__)
+#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r) \
+ xfs_btree_trace_argipr(__func__, c, i, p, r, __LINE__)
+#define XFS_BTREE_TRACE_ARGIK(c, i, k) \
+ xfs_btree_trace_argik(__func__, c, i, k, __LINE__)
+#define XFS_BTREE_TRACE_ARGR(c, r) \
+ xfs_btree_trace_argr(__func__, c, r, __LINE__)
+#define XFS_BTREE_TRACE_CURSOR(c, t) \
+ xfs_btree_trace_cursor(__func__, c, t, __LINE__)
+#else
+#define XFS_BTREE_TRACE_ARGBI(c, b, i)
+#define XFS_BTREE_TRACE_ARGBII(c, b, i, j)
+#define XFS_BTREE_TRACE_ARGFFFI(c, o, b, i, j)
+#define XFS_BTREE_TRACE_ARGI(c, i)
+#define XFS_BTREE_TRACE_ARGIPK(c, i, p, s)
+#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r)
+#define XFS_BTREE_TRACE_ARGIK(c, i, k)
+#define XFS_BTREE_TRACE_ARGR(c, r)
+#define XFS_BTREE_TRACE_CURSOR(c, t)
+#endif /* XFS_BTREE_TRACE */
+
+#endif /* __XFS_BTREE_TRACE_H__ */
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 002fc2617c8e..d245d04e10ca 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -375,7 +375,7 @@ xfs_buf_item_unpin(
xfs_buf_log_item_t *bip,
int stale)
{
- xfs_mount_t *mp;
+ struct xfs_ail *ailp;
xfs_buf_t *bp;
int freed;
@@ -387,7 +387,7 @@ xfs_buf_item_unpin(
xfs_buftrace("XFS_UNPIN", bp);
freed = atomic_dec_and_test(&bip->bli_refcount);
- mp = bip->bli_item.li_mountp;
+ ailp = bip->bli_item.li_ailp;
xfs_bunpin(bp);
if (freed && stale) {
ASSERT(bip->bli_flags & XFS_BLI_STALE);
@@ -399,17 +399,17 @@ xfs_buf_item_unpin(
xfs_buftrace("XFS_UNPIN STALE", bp);
/*
* If we get called here because of an IO error, we may
- * or may not have the item on the AIL. xfs_trans_delete_ail()
+ * or may not have the item on the AIL. xfs_trans_ail_delete()
* will take care of that situation.
- * xfs_trans_delete_ail() drops the AIL lock.
+ * xfs_trans_ail_delete() drops the AIL lock.
*/
if (bip->bli_flags & XFS_BLI_STALE_INODE) {
xfs_buf_do_callbacks(bp, (xfs_log_item_t *)bip);
XFS_BUF_SET_FSPRIVATE(bp, NULL);
XFS_BUF_CLR_IODONE_FUNC(bp);
} else {
- spin_lock(&mp->m_ail_lock);
- xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip);
+ spin_lock(&ailp->xa_lock);
+ xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip);
xfs_buf_item_relse(bp);
ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL);
}
@@ -731,6 +731,7 @@ xfs_buf_item_init(
bip->bli_item.li_type = XFS_LI_BUF;
bip->bli_item.li_ops = &xfs_buf_item_ops;
bip->bli_item.li_mountp = mp;
+ bip->bli_item.li_ailp = mp->m_ail;
bip->bli_buf = bp;
xfs_buf_hold(bp);
bip->bli_format.blf_type = XFS_LI_BUF;
@@ -1122,27 +1123,23 @@ xfs_buf_iodone(
xfs_buf_t *bp,
xfs_buf_log_item_t *bip)
{
- struct xfs_mount *mp;
+ struct xfs_ail *ailp = bip->bli_item.li_ailp;
ASSERT(bip->bli_buf == bp);
xfs_buf_rele(bp);
- mp = bip->bli_item.li_mountp;
/*
* If we are forcibly shutting down, this may well be
* off the AIL already. That's because we simulate the
* log-committed callbacks to unpin these buffers. Or we may never
* have put this item on AIL because of the transaction was
- * aborted forcibly. xfs_trans_delete_ail() takes care of these.
+ * aborted forcibly. xfs_trans_ail_delete() takes care of these.
*
* Either way, AIL is useless if we're forcing a shutdown.
*/
- spin_lock(&mp->m_ail_lock);
- /*
- * xfs_trans_delete_ail() drops the AIL lock.
- */
- xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip);
+ spin_lock(&ailp->xa_lock);
+ xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip);
xfs_buf_item_free(bip);
}
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h
deleted file mode 100644
index d2ce5dd70d87..000000000000
--- a/fs/xfs/xfs_clnt.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_CLNT_H__
-#define __XFS_CLNT_H__
-
-/*
- * XFS arguments structure, constructed from the arguments we
- * are passed via the mount system call.
- *
- * NOTE: The mount system call is handled differently between
- * Linux and IRIX. In IRIX we worked work with a binary data
- * structure coming in across the syscall interface from user
- * space (the mount userspace knows about each filesystem type
- * and the set of valid options for it, and converts the users
- * argument string into a binary structure _before_ making the
- * system call), and the ABI issues that this implies.
- *
- * In Linux, we are passed a comma separated set of options;
- * ie. a NULL terminated string of characters. Userspace mount
- * code does not have any knowledge of mount options expected by
- * each filesystem type and so each filesystem parses its mount
- * options in kernel space.
- *
- * For the Linux port, we kept this structure pretty much intact
- * and use it internally (because the existing code groks it).
- */
-struct xfs_mount_args {
- int flags; /* flags -> see XFSMNT_... macros below */
- int flags2; /* flags -> see XFSMNT2_... macros below */
- int logbufs; /* Number of log buffers, -1 to default */
- int logbufsize; /* Size of log buffers, -1 to default */
- char fsname[MAXNAMELEN+1]; /* data device name */
- char rtname[MAXNAMELEN+1]; /* realtime device filename */
- char logname[MAXNAMELEN+1]; /* journal device filename */
- char mtpt[MAXNAMELEN+1]; /* filesystem mount point */
- int sunit; /* stripe unit (BBs) */
- int swidth; /* stripe width (BBs), multiple of sunit */
- uchar_t iosizelog; /* log2 of the preferred I/O size */
- int ihashsize; /* inode hash table size (buckets) */
-};
-
-/*
- * XFS mount option flags -- args->flags1
- */
-#define XFSMNT_ATTR2 0x00000001 /* allow ATTR2 EA format */
-#define XFSMNT_WSYNC 0x00000002 /* safe mode nfs mount
- * compatible */
-#define XFSMNT_INO64 0x00000004 /* move inode numbers up
- * past 2^32 */
-#define XFSMNT_UQUOTA 0x00000008 /* user quota accounting */
-#define XFSMNT_PQUOTA 0x00000010 /* IRIX prj quota accounting */
-#define XFSMNT_UQUOTAENF 0x00000020 /* user quota limit
- * enforcement */
-#define XFSMNT_PQUOTAENF 0x00000040 /* IRIX project quota limit
- * enforcement */
-#define XFSMNT_QUIET 0x00000080 /* don't report mount errors */
-#define XFSMNT_NOALIGN 0x00000200 /* don't allocate at
- * stripe boundaries*/
-#define XFSMNT_RETERR 0x00000400 /* return error to user */
-#define XFSMNT_NORECOVERY 0x00000800 /* no recovery, implies
- * read-only mount */
-#define XFSMNT_SHARED 0x00001000 /* shared XFS mount */
-#define XFSMNT_IOSIZE 0x00002000 /* optimize for I/O size */
-#define XFSMNT_OSYNCISOSYNC 0x00004000 /* o_sync is REALLY o_sync */
- /* (osyncisdsync is default) */
-#define XFSMNT_NOATTR2 0x00008000 /* turn off ATTR2 EA format */
-#define XFSMNT_32BITINODES 0x00200000 /* restrict inodes to 32
- * bits of address space */
-#define XFSMNT_GQUOTA 0x00400000 /* group quota accounting */
-#define XFSMNT_GQUOTAENF 0x00800000 /* group quota limit
- * enforcement */
-#define XFSMNT_NOUUID 0x01000000 /* Ignore fs uuid */
-#define XFSMNT_DMAPI 0x02000000 /* enable dmapi/xdsm */
-#define XFSMNT_BARRIER 0x04000000 /* use write barriers */
-#define XFSMNT_IKEEP 0x08000000 /* inode cluster delete */
-#define XFSMNT_SWALLOC 0x10000000 /* turn on stripe width
- * allocation */
-#define XFSMNT_DIRSYNC 0x40000000 /* sync creat,link,unlink,rename
- * symlink,mkdir,rmdir,mknod */
-#define XFSMNT_FLAGS2 0x80000000 /* more flags set in flags2 */
-
-/*
- * XFS mount option flags -- args->flags2
- */
-#define XFSMNT2_COMPAT_IOSIZE 0x00000001 /* don't report large preferred
- * I/O size in stat(2) */
-#define XFSMNT2_FILESTREAMS 0x00000002 /* enable the filestreams
- * allocator */
-
-#endif /* __XFS_CLNT_H__ */
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 8be0b00ede9a..70b710c1792d 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -72,27 +72,7 @@ typedef struct xfs_da_intnode {
typedef struct xfs_da_node_hdr xfs_da_node_hdr_t;
typedef struct xfs_da_node_entry xfs_da_node_entry_t;
-#define XFS_DA_MAXHASH ((xfs_dahash_t)-1) /* largest valid hash value */
-
#define XFS_LBSIZE(mp) (mp)->m_sb.sb_blocksize
-#define XFS_LBLOG(mp) (mp)->m_sb.sb_blocklog
-
-#define XFS_DA_MAKE_BNOENTRY(mp,bno,entry) \
- (((bno) << (mp)->m_dircook_elog) | (entry))
-#define XFS_DA_MAKE_COOKIE(mp,bno,entry,hash) \
- (((xfs_off_t)XFS_DA_MAKE_BNOENTRY(mp, bno, entry) << 32) | (hash))
-#define XFS_DA_COOKIE_HASH(mp,cookie) ((xfs_dahash_t)cookie)
-#define XFS_DA_COOKIE_BNO(mp,cookie) \
- ((((xfs_off_t)(cookie) >> 31) == -1LL ? \
- (xfs_dablk_t)0 : \
- (xfs_dablk_t)((xfs_off_t)(cookie) >> \
- ((mp)->m_dircook_elog + 32))))
-#define XFS_DA_COOKIE_ENTRY(mp,cookie) \
- ((((xfs_off_t)(cookie) >> 31) == -1LL ? \
- (xfs_dablk_t)0 : \
- (xfs_dablk_t)(((xfs_off_t)(cookie) >> 32) & \
- ((1 << (mp)->m_dircook_elog) - 1))))
-
/*========================================================================
* Btree searching and modification structure definitions.
@@ -226,9 +206,8 @@ struct xfs_nameops {
};
-#ifdef __KERNEL__
/*========================================================================
- * Function prototypes for the kernel.
+ * Function prototypes.
*========================================================================*/
/*
@@ -289,6 +268,5 @@ xfs_daddr_t xfs_da_blkno(xfs_dabuf_t *dabuf);
extern struct kmem_zone *xfs_da_state_zone;
extern struct kmem_zone *xfs_dabuf_zone;
-#endif /* __KERNEL__ */
#endif /* __XFS_DA_BTREE_H__ */
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index c9065eaf2a4d..d7cf392cc852 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -78,8 +78,7 @@ typedef struct xfs_dinode
xfs_dinode_core_t di_core;
/*
* In adding anything between the core and the union, be
- * sure to update the macros like XFS_LITINO below and
- * XFS_BMAP_RBLOCK_DSIZE in xfs_bmap_btree.h.
+ * sure to update the macros like XFS_LITINO below.
*/
__be32 di_next_unlinked;/* agi unlinked list ptr */
union {
@@ -166,7 +165,7 @@ typedef enum xfs_dinode_fmt
*/
#define XFS_LITINO(mp) ((mp)->m_litino)
#define XFS_BROOT_SIZE_ADJ \
- (sizeof(xfs_bmbt_block_t) - sizeof(xfs_bmdr_block_t))
+ (XFS_BTREE_LBLOCK_LEN - sizeof(xfs_bmdr_block_t))
/*
* Inode data & attribute fork sizes, per inode.
diff --git a/fs/xfs/xfs_dmops.c b/fs/xfs/xfs_dmops.c
index a1e55fb9d5dd..e71e2581c0c3 100644
--- a/fs/xfs/xfs_dmops.c
+++ b/fs/xfs/xfs_dmops.c
@@ -25,7 +25,6 @@
#include "xfs_inum.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
-#include "xfs_clnt.h"
static struct xfs_dmops xfs_dmcore_stub = {
@@ -38,9 +37,9 @@ static struct xfs_dmops xfs_dmcore_stub = {
};
int
-xfs_dmops_get(struct xfs_mount *mp, struct xfs_mount_args *args)
+xfs_dmops_get(struct xfs_mount *mp)
{
- if (args->flags & XFSMNT_DMAPI) {
+ if (mp->m_flags & XFS_MOUNT_DMAPI) {
cmn_err(CE_WARN,
"XFS: dmapi support not available in this kernel.");
return EINVAL;
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 8aa28f751b2a..05a4bdd4be39 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -108,19 +108,16 @@ xfs_efi_item_pin(xfs_efi_log_item_t *efip)
STATIC void
xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale)
{
- xfs_mount_t *mp;
+ struct xfs_ail *ailp = efip->efi_item.li_ailp;
- mp = efip->efi_item.li_mountp;
- spin_lock(&mp->m_ail_lock);
+ spin_lock(&ailp->xa_lock);
if (efip->efi_flags & XFS_EFI_CANCELED) {
- /*
- * xfs_trans_delete_ail() drops the AIL lock.
- */
- xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip);
+ /* xfs_trans_ail_delete() drops the AIL lock. */
+ xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip);
xfs_efi_item_free(efip);
} else {
efip->efi_flags |= XFS_EFI_COMMITTED;
- spin_unlock(&mp->m_ail_lock);
+ spin_unlock(&ailp->xa_lock);
}
}
@@ -134,26 +131,23 @@ xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale)
STATIC void
xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp)
{
- xfs_mount_t *mp;
+ struct xfs_ail *ailp = efip->efi_item.li_ailp;
xfs_log_item_desc_t *lidp;
- mp = efip->efi_item.li_mountp;
- spin_lock(&mp->m_ail_lock);
+ spin_lock(&ailp->xa_lock);
if (efip->efi_flags & XFS_EFI_CANCELED) {
/*
* free the xaction descriptor pointing to this item
*/
lidp = xfs_trans_find_item(tp, (xfs_log_item_t *) efip);
xfs_trans_free_item(tp, lidp);
- /*
- * pull the item off the AIL.
- * xfs_trans_delete_ail() drops the AIL lock.
- */
- xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip);
+
+ /* xfs_trans_ail_delete() drops the AIL lock. */
+ xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip);
xfs_efi_item_free(efip);
} else {
efip->efi_flags |= XFS_EFI_COMMITTED;
- spin_unlock(&mp->m_ail_lock);
+ spin_unlock(&ailp->xa_lock);
}
}
@@ -268,6 +262,7 @@ xfs_efi_init(xfs_mount_t *mp,
efip->efi_item.li_type = XFS_LI_EFI;
efip->efi_item.li_ops = &xfs_efi_item_ops;
efip->efi_item.li_mountp = mp;
+ efip->efi_item.li_ailp = mp->m_ail;
efip->efi_format.efi_nextents = nextents;
efip->efi_format.efi_id = (__psint_t)(void*)efip;
@@ -345,25 +340,22 @@ void
xfs_efi_release(xfs_efi_log_item_t *efip,
uint nextents)
{
- xfs_mount_t *mp;
- int extents_left;
+ struct xfs_ail *ailp = efip->efi_item.li_ailp;
+ int extents_left;
- mp = efip->efi_item.li_mountp;
ASSERT(efip->efi_next_extent > 0);
ASSERT(efip->efi_flags & XFS_EFI_COMMITTED);
- spin_lock(&mp->m_ail_lock);
+ spin_lock(&ailp->xa_lock);
ASSERT(efip->efi_next_extent >= nextents);
efip->efi_next_extent -= nextents;
extents_left = efip->efi_next_extent;
if (extents_left == 0) {
- /*
- * xfs_trans_delete_ail() drops the AIL lock.
- */
- xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip);
+ /* xfs_trans_ail_delete() drops the AIL lock. */
+ xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip);
xfs_efi_item_free(efip);
} else {
- spin_unlock(&mp->m_ail_lock);
+ spin_unlock(&ailp->xa_lock);
}
}
@@ -565,6 +557,7 @@ xfs_efd_init(xfs_mount_t *mp,
efdp->efd_item.li_type = XFS_LI_EFD;
efdp->efd_item.li_ops = &xfs_efd_item_ops;
efdp->efd_item.li_mountp = mp;
+ efdp->efd_item.li_ailp = mp->m_ail;
efdp->efd_efip = efip;
efdp->efd_format.efd_nextents = nextents;
efdp->efd_format.efd_efi_id = efip->efi_format.efi_id;
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 84583cf73db3..f1d0585041b9 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -126,7 +126,7 @@ xfs_growfs_data_private(
xfs_extlen_t agsize;
xfs_extlen_t tmpsize;
xfs_alloc_rec_t *arec;
- xfs_btree_sblock_t *block;
+ struct xfs_btree_block *block;
xfs_buf_t *bp;
int bucket;
int dpct;
@@ -251,14 +251,14 @@ xfs_growfs_data_private(
bp = xfs_buf_get(mp->m_ddev_targp,
XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)),
BTOBB(mp->m_sb.sb_blocksize), 0);
- block = XFS_BUF_TO_SBLOCK(bp);
+ block = XFS_BUF_TO_BLOCK(bp);
memset(block, 0, mp->m_sb.sb_blocksize);
block->bb_magic = cpu_to_be32(XFS_ABTB_MAGIC);
block->bb_level = 0;
block->bb_numrecs = cpu_to_be16(1);
- block->bb_leftsib = cpu_to_be32(NULLAGBLOCK);
- block->bb_rightsib = cpu_to_be32(NULLAGBLOCK);
- arec = XFS_BTREE_REC_ADDR(xfs_alloc, block, 1);
+ block->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
+ block->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
+ arec = XFS_ALLOC_REC_ADDR(mp, block, 1);
arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp));
arec->ar_blockcount = cpu_to_be32(
agsize - be32_to_cpu(arec->ar_startblock));
@@ -272,14 +272,14 @@ xfs_growfs_data_private(
bp = xfs_buf_get(mp->m_ddev_targp,
XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)),
BTOBB(mp->m_sb.sb_blocksize), 0);
- block = XFS_BUF_TO_SBLOCK(bp);
+ block = XFS_BUF_TO_BLOCK(bp);
memset(block, 0, mp->m_sb.sb_blocksize);
block->bb_magic = cpu_to_be32(XFS_ABTC_MAGIC);
block->bb_level = 0;
block->bb_numrecs = cpu_to_be16(1);
- block->bb_leftsib = cpu_to_be32(NULLAGBLOCK);
- block->bb_rightsib = cpu_to_be32(NULLAGBLOCK);
- arec = XFS_BTREE_REC_ADDR(xfs_alloc, block, 1);
+ block->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
+ block->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
+ arec = XFS_ALLOC_REC_ADDR(mp, block, 1);
arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp));
arec->ar_blockcount = cpu_to_be32(
agsize - be32_to_cpu(arec->ar_startblock));
@@ -294,13 +294,13 @@ xfs_growfs_data_private(
bp = xfs_buf_get(mp->m_ddev_targp,
XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)),
BTOBB(mp->m_sb.sb_blocksize), 0);
- block = XFS_BUF_TO_SBLOCK(bp);
+ block = XFS_BUF_TO_BLOCK(bp);
memset(block, 0, mp->m_sb.sb_blocksize);
block->bb_magic = cpu_to_be32(XFS_IBT_MAGIC);
block->bb_level = 0;
block->bb_numrecs = 0;
- block->bb_leftsib = cpu_to_be32(NULLAGBLOCK);
- block->bb_rightsib = cpu_to_be32(NULLAGBLOCK);
+ block->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
+ block->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
error = xfs_bwrite(mp, bp);
if (error) {
goto error0;
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index aad8c5da38af..c8a56c529642 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -119,6 +119,102 @@ xfs_ialloc_cluster_alignment(
}
/*
+ * Lookup the record equal to ino in the btree given by cur.
+ */
+STATIC int /* error */
+xfs_inobt_lookup_eq(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agino_t ino, /* starting inode of chunk */
+ __int32_t fcnt, /* free inode count */
+ xfs_inofree_t free, /* free inode mask */
+ int *stat) /* success/failure */
+{
+ cur->bc_rec.i.ir_startino = ino;
+ cur->bc_rec.i.ir_freecount = fcnt;
+ cur->bc_rec.i.ir_free = free;
+ return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
+}
+
+/*
+ * Lookup the first record greater than or equal to ino
+ * in the btree given by cur.
+ */
+int /* error */
+xfs_inobt_lookup_ge(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agino_t ino, /* starting inode of chunk */
+ __int32_t fcnt, /* free inode count */
+ xfs_inofree_t free, /* free inode mask */
+ int *stat) /* success/failure */
+{
+ cur->bc_rec.i.ir_startino = ino;
+ cur->bc_rec.i.ir_freecount = fcnt;
+ cur->bc_rec.i.ir_free = free;
+ return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
+}
+
+/*
+ * Lookup the first record less than or equal to ino
+ * in the btree given by cur.
+ */
+int /* error */
+xfs_inobt_lookup_le(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agino_t ino, /* starting inode of chunk */
+ __int32_t fcnt, /* free inode count */
+ xfs_inofree_t free, /* free inode mask */
+ int *stat) /* success/failure */
+{
+ cur->bc_rec.i.ir_startino = ino;
+ cur->bc_rec.i.ir_freecount = fcnt;
+ cur->bc_rec.i.ir_free = free;
+ return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
+}
+
+/*
+ * Update the record referred to by cur to the value given
+ * by [ino, fcnt, free].
+ * This either works (return 0) or gets an EFSCORRUPTED error.
+ */
+STATIC int /* error */
+xfs_inobt_update(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agino_t ino, /* starting inode of chunk */
+ __int32_t fcnt, /* free inode count */
+ xfs_inofree_t free) /* free inode mask */
+{
+ union xfs_btree_rec rec;
+
+ rec.inobt.ir_startino = cpu_to_be32(ino);
+ rec.inobt.ir_freecount = cpu_to_be32(fcnt);
+ rec.inobt.ir_free = cpu_to_be64(free);
+ return xfs_btree_update(cur, &rec);
+}
+
+/*
+ * Get the data from the pointed-to record.
+ */
+int /* error */
+xfs_inobt_get_rec(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agino_t *ino, /* output: starting inode of chunk */
+ __int32_t *fcnt, /* output: number of free inodes */
+ xfs_inofree_t *free, /* output: free inode mask */
+ int *stat) /* output: success/failure */
+{
+ union xfs_btree_rec *rec;
+ int error;
+
+ error = xfs_btree_get_rec(cur, &rec, stat);
+ if (!error && *stat == 1) {
+ *ino = be32_to_cpu(rec->inobt.ir_startino);
+ *fcnt = be32_to_cpu(rec->inobt.ir_freecount);
+ *free = be64_to_cpu(rec->inobt.ir_free);
+ }
+ return error;
+}
+
+/*
* Allocate new inodes in the allocation group specified by agbp.
* Return 0 for success, else error code.
*/
@@ -335,8 +431,7 @@ xfs_ialloc_ag_alloc(
/*
* Insert records describing the new inode chunk into the btree.
*/
- cur = xfs_btree_init_cursor(args.mp, tp, agbp, agno,
- XFS_BTNUM_INO, (xfs_inode_t *)0, 0);
+ cur = xfs_inobt_init_cursor(args.mp, tp, agbp, agno);
for (thisino = newino;
thisino < newino + newlen;
thisino += XFS_INODES_PER_CHUNK) {
@@ -346,7 +441,7 @@ xfs_ialloc_ag_alloc(
return error;
}
ASSERT(i == 0);
- if ((error = xfs_inobt_insert(cur, &i))) {
+ if ((error = xfs_btree_insert(cur, &i))) {
xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
return error;
}
@@ -676,8 +771,7 @@ nextag:
*/
agno = tagno;
*IO_agbp = NULL;
- cur = xfs_btree_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno),
- XFS_BTNUM_INO, (xfs_inode_t *)0, 0);
+ cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno));
/*
* If pagino is 0 (this is the root inode allocation) use newino.
* This must work because we've just allocated some.
@@ -697,7 +791,7 @@ nextag:
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
freecount += rec.ir_freecount;
- if ((error = xfs_inobt_increment(cur, 0, &i)))
+ if ((error = xfs_btree_increment(cur, 0, &i)))
goto error0;
} while (i == 1);
@@ -741,7 +835,7 @@ nextag:
/*
* Search left with tcur, back up 1 record.
*/
- if ((error = xfs_inobt_decrement(tcur, 0, &i)))
+ if ((error = xfs_btree_decrement(tcur, 0, &i)))
goto error1;
doneleft = !i;
if (!doneleft) {
@@ -755,7 +849,7 @@ nextag:
/*
* Search right with cur, go forward 1 record.
*/
- if ((error = xfs_inobt_increment(cur, 0, &i)))
+ if ((error = xfs_btree_increment(cur, 0, &i)))
goto error1;
doneright = !i;
if (!doneright) {
@@ -817,7 +911,7 @@ nextag:
* further left.
*/
if (useleft) {
- if ((error = xfs_inobt_decrement(tcur, 0,
+ if ((error = xfs_btree_decrement(tcur, 0,
&i)))
goto error1;
doneleft = !i;
@@ -837,7 +931,7 @@ nextag:
* further right.
*/
else {
- if ((error = xfs_inobt_increment(cur, 0,
+ if ((error = xfs_btree_increment(cur, 0,
&i)))
goto error1;
doneright = !i;
@@ -892,7 +986,7 @@ nextag:
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
if (rec.ir_freecount > 0)
break;
- if ((error = xfs_inobt_increment(cur, 0, &i)))
+ if ((error = xfs_btree_increment(cur, 0, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
}
@@ -926,7 +1020,7 @@ nextag:
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
freecount += rec.ir_freecount;
- if ((error = xfs_inobt_increment(cur, 0, &i)))
+ if ((error = xfs_btree_increment(cur, 0, &i)))
goto error0;
} while (i == 1);
ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
@@ -1022,8 +1116,7 @@ xfs_difree(
/*
* Initialize the cursor.
*/
- cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO,
- (xfs_inode_t *)0, 0);
+ cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
#ifdef DEBUG
if (cur->bc_nlevels == 1) {
int freecount = 0;
@@ -1036,7 +1129,7 @@ xfs_difree(
goto error0;
if (i) {
freecount += rec.ir_freecount;
- if ((error = xfs_inobt_increment(cur, 0, &i)))
+ if ((error = xfs_btree_increment(cur, 0, &i)))
goto error0;
}
} while (i == 1);
@@ -1098,8 +1191,8 @@ xfs_difree(
xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen);
xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1));
- if ((error = xfs_inobt_delete(cur, &i))) {
- cmn_err(CE_WARN, "xfs_difree: xfs_inobt_delete returned an error %d on %s.\n",
+ if ((error = xfs_btree_delete(cur, &i))) {
+ cmn_err(CE_WARN, "xfs_difree: xfs_btree_delete returned an error %d on %s.\n",
error, mp->m_fsname);
goto error0;
}
@@ -1141,7 +1234,7 @@ xfs_difree(
goto error0;
if (i) {
freecount += rec.ir_freecount;
- if ((error = xfs_inobt_increment(cur, 0, &i)))
+ if ((error = xfs_btree_increment(cur, 0, &i)))
goto error0;
}
} while (i == 1);
@@ -1259,8 +1352,7 @@ xfs_dilocate(
#endif /* DEBUG */
return error;
}
- cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO,
- (xfs_inode_t *)0, 0);
+ cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
if ((error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i))) {
#ifdef DEBUG
xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: "
diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h
index 4e30ec1d13bc..ccf554a6e0a1 100644
--- a/fs/xfs/xfs_ialloc.h
+++ b/fs/xfs/xfs_ialloc.h
@@ -56,7 +56,6 @@ static inline int xfs_ialloc_find_free(xfs_inofree_t *fp)
}
-#ifdef __KERNEL__
/*
* Allocate an inode on disk.
* Mode is used to tell whether the new inode will need space, and whether
@@ -154,6 +153,24 @@ xfs_ialloc_pagi_init(
struct xfs_trans *tp, /* transaction pointer */
xfs_agnumber_t agno); /* allocation group number */
-#endif /* __KERNEL__ */
+/*
+ * Lookup the first record greater than or equal to ino
+ * in the btree given by cur.
+ */
+int xfs_inobt_lookup_ge(struct xfs_btree_cur *cur, xfs_agino_t ino,
+ __int32_t fcnt, xfs_inofree_t free, int *stat);
+
+/*
+ * Lookup the first record less than or equal to ino
+ * in the btree given by cur.
+ */
+int xfs_inobt_lookup_le(struct xfs_btree_cur *cur, xfs_agino_t ino,
+ __int32_t fcnt, xfs_inofree_t free, int *stat);
+
+/*
+ * Get the data from the pointed-to record.
+ */
+extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur, xfs_agino_t *ino,
+ __int32_t *fcnt, xfs_inofree_t *free, int *stat);
#endif /* __XFS_IALLOC_H__ */
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index 83502f3edef0..99f2408e8d8e 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -35,2044 +35,349 @@
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
+#include "xfs_btree_trace.h"
#include "xfs_ialloc.h"
#include "xfs_alloc.h"
#include "xfs_error.h"
-STATIC void xfs_inobt_log_block(xfs_trans_t *, xfs_buf_t *, int);
-STATIC void xfs_inobt_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int);
-STATIC void xfs_inobt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
-STATIC void xfs_inobt_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
-STATIC int xfs_inobt_lshift(xfs_btree_cur_t *, int, int *);
-STATIC int xfs_inobt_newroot(xfs_btree_cur_t *, int *);
-STATIC int xfs_inobt_rshift(xfs_btree_cur_t *, int, int *);
-STATIC int xfs_inobt_split(xfs_btree_cur_t *, int, xfs_agblock_t *,
- xfs_inobt_key_t *, xfs_btree_cur_t **, int *);
-STATIC int xfs_inobt_updkey(xfs_btree_cur_t *, xfs_inobt_key_t *, int);
-/*
- * Single level of the xfs_inobt_delete record deletion routine.
- * Delete record pointed to by cur/level.
- * Remove the record from its block then rebalance the tree.
- * Return 0 for error, 1 for done, 2 to go on to the next level.
- */
-STATIC int /* error */
-xfs_inobt_delrec(
- xfs_btree_cur_t *cur, /* btree cursor */
- int level, /* level removing record from */
- int *stat) /* fail/done/go-on */
+STATIC int
+xfs_inobt_get_minrecs(
+ struct xfs_btree_cur *cur,
+ int level)
{
- xfs_buf_t *agbp; /* buffer for a.g. inode header */
- xfs_mount_t *mp; /* mount structure */
- xfs_agi_t *agi; /* allocation group inode header */
- xfs_inobt_block_t *block; /* btree block record/key lives in */
- xfs_agblock_t bno; /* btree block number */
- xfs_buf_t *bp; /* buffer for block */
- int error; /* error return value */
- int i; /* loop index */
- xfs_inobt_key_t key; /* kp points here if block is level 0 */
- xfs_inobt_key_t *kp = NULL; /* pointer to btree keys */
- xfs_agblock_t lbno; /* left block's block number */
- xfs_buf_t *lbp; /* left block's buffer pointer */
- xfs_inobt_block_t *left; /* left btree block */
- xfs_inobt_key_t *lkp; /* left block key pointer */
- xfs_inobt_ptr_t *lpp; /* left block address pointer */
- int lrecs = 0; /* number of records in left block */
- xfs_inobt_rec_t *lrp; /* left block record pointer */
- xfs_inobt_ptr_t *pp = NULL; /* pointer to btree addresses */
- int ptr; /* index in btree block for this rec */
- xfs_agblock_t rbno; /* right block's block number */
- xfs_buf_t *rbp; /* right block's buffer pointer */
- xfs_inobt_block_t *right; /* right btree block */
- xfs_inobt_key_t *rkp; /* right block key pointer */
- xfs_inobt_rec_t *rp; /* pointer to btree records */
- xfs_inobt_ptr_t *rpp; /* right block address pointer */
- int rrecs = 0; /* number of records in right block */
- int numrecs;
- xfs_inobt_rec_t *rrp; /* right block record pointer */
- xfs_btree_cur_t *tcur; /* temporary btree cursor */
-
- mp = cur->bc_mp;
-
- /*
- * Get the index of the entry being deleted, check for nothing there.
- */
- ptr = cur->bc_ptrs[level];
- if (ptr == 0) {
- *stat = 0;
- return 0;
- }
-
- /*
- * Get the buffer & block containing the record or key/ptr.
- */
- bp = cur->bc_bufs[level];
- block = XFS_BUF_TO_INOBT_BLOCK(bp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
- return error;
-#endif
- /*
- * Fail if we're off the end of the block.
- */
+ return cur->bc_mp->m_inobt_mnr[level != 0];
+}
- numrecs = be16_to_cpu(block->bb_numrecs);
- if (ptr > numrecs) {
- *stat = 0;
- return 0;
- }
- /*
- * It's a nonleaf. Excise the key and ptr being deleted, by
- * sliding the entries past them down one.
- * Log the changed areas of the block.
- */
- if (level > 0) {
- kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
- pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
-#ifdef DEBUG
- for (i = ptr; i < numrecs; i++) {
- if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i]), level)))
- return error;
- }
-#endif
- if (ptr < numrecs) {
- memmove(&kp[ptr - 1], &kp[ptr],
- (numrecs - ptr) * sizeof(*kp));
- memmove(&pp[ptr - 1], &pp[ptr],
- (numrecs - ptr) * sizeof(*kp));
- xfs_inobt_log_keys(cur, bp, ptr, numrecs - 1);
- xfs_inobt_log_ptrs(cur, bp, ptr, numrecs - 1);
- }
- }
- /*
- * It's a leaf. Excise the record being deleted, by sliding the
- * entries past it down one. Log the changed areas of the block.
- */
- else {
- rp = XFS_INOBT_REC_ADDR(block, 1, cur);
- if (ptr < numrecs) {
- memmove(&rp[ptr - 1], &rp[ptr],
- (numrecs - ptr) * sizeof(*rp));
- xfs_inobt_log_recs(cur, bp, ptr, numrecs - 1);
- }
- /*
- * If it's the first record in the block, we'll need a key
- * structure to pass up to the next level (updkey).
- */
- if (ptr == 1) {
- key.ir_startino = rp->ir_startino;
- kp = &key;
- }
- }
- /*
- * Decrement and log the number of entries in the block.
- */
- numrecs--;
- block->bb_numrecs = cpu_to_be16(numrecs);
- xfs_inobt_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
- /*
- * Is this the root level? If so, we're almost done.
- */
- if (level == cur->bc_nlevels - 1) {
- /*
- * If this is the root level,
- * and there's only one entry left,
- * and it's NOT the leaf level,
- * then we can get rid of this level.
- */
- if (numrecs == 1 && level > 0) {
- agbp = cur->bc_private.a.agbp;
- agi = XFS_BUF_TO_AGI(agbp);
- /*
- * pp is still set to the first pointer in the block.
- * Make it the new root of the btree.
- */
- bno = be32_to_cpu(agi->agi_root);
- agi->agi_root = *pp;
- be32_add_cpu(&agi->agi_level, -1);
- /*
- * Free the block.
- */
- if ((error = xfs_free_extent(cur->bc_tp,
- XFS_AGB_TO_FSB(mp, cur->bc_private.a.agno, bno), 1)))
- return error;
- xfs_trans_binval(cur->bc_tp, bp);
- xfs_ialloc_log_agi(cur->bc_tp, agbp,
- XFS_AGI_ROOT | XFS_AGI_LEVEL);
- /*
- * Update the cursor so there's one fewer level.
- */
- cur->bc_bufs[level] = NULL;
- cur->bc_nlevels--;
- } else if (level > 0 &&
- (error = xfs_inobt_decrement(cur, level, &i)))
- return error;
- *stat = 1;
- return 0;
- }
- /*
- * If we deleted the leftmost entry in the block, update the
- * key values above us in the tree.
- */
- if (ptr == 1 && (error = xfs_inobt_updkey(cur, kp, level + 1)))
- return error;
- /*
- * If the number of records remaining in the block is at least
- * the minimum, we're done.
- */
- if (numrecs >= XFS_INOBT_BLOCK_MINRECS(level, cur)) {
- if (level > 0 &&
- (error = xfs_inobt_decrement(cur, level, &i)))
- return error;
- *stat = 1;
- return 0;
- }
- /*
- * Otherwise, we have to move some records around to keep the
- * tree balanced. Look at the left and right sibling blocks to
- * see if we can re-balance by moving only one record.
- */
- rbno = be32_to_cpu(block->bb_rightsib);
- lbno = be32_to_cpu(block->bb_leftsib);
- bno = NULLAGBLOCK;
- ASSERT(rbno != NULLAGBLOCK || lbno != NULLAGBLOCK);
- /*
- * Duplicate the cursor so our btree manipulations here won't
- * disrupt the next level up.
- */
- if ((error = xfs_btree_dup_cursor(cur, &tcur)))
- return error;
- /*
- * If there's a right sibling, see if it's ok to shift an entry
- * out of it.
- */
- if (rbno != NULLAGBLOCK) {
- /*
- * Move the temp cursor to the last entry in the next block.
- * Actually any entry but the first would suffice.
- */
- i = xfs_btree_lastrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if ((error = xfs_inobt_increment(tcur, level, &i)))
- goto error0;
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- i = xfs_btree_lastrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- /*
- * Grab a pointer to the block.
- */
- rbp = tcur->bc_bufs[level];
- right = XFS_BUF_TO_INOBT_BLOCK(rbp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
- goto error0;
-#endif
- /*
- * Grab the current block number, for future use.
- */
- bno = be32_to_cpu(right->bb_leftsib);
- /*
- * If right block is full enough so that removing one entry
- * won't make it too empty, and left-shifting an entry out
- * of right to us works, we're done.
- */
- if (be16_to_cpu(right->bb_numrecs) - 1 >=
- XFS_INOBT_BLOCK_MINRECS(level, cur)) {
- if ((error = xfs_inobt_lshift(tcur, level, &i)))
- goto error0;
- if (i) {
- ASSERT(be16_to_cpu(block->bb_numrecs) >=
- XFS_INOBT_BLOCK_MINRECS(level, cur));
- xfs_btree_del_cursor(tcur,
- XFS_BTREE_NOERROR);
- if (level > 0 &&
- (error = xfs_inobt_decrement(cur, level,
- &i)))
- return error;
- *stat = 1;
- return 0;
- }
- }
- /*
- * Otherwise, grab the number of records in right for
- * future reference, and fix up the temp cursor to point
- * to our block again (last record).
- */
- rrecs = be16_to_cpu(right->bb_numrecs);
- if (lbno != NULLAGBLOCK) {
- xfs_btree_firstrec(tcur, level);
- if ((error = xfs_inobt_decrement(tcur, level, &i)))
- goto error0;
- }
- }
- /*
- * If there's a left sibling, see if it's ok to shift an entry
- * out of it.
- */
- if (lbno != NULLAGBLOCK) {
- /*
- * Move the temp cursor to the first entry in the
- * previous block.
- */
- xfs_btree_firstrec(tcur, level);
- if ((error = xfs_inobt_decrement(tcur, level, &i)))
- goto error0;
- xfs_btree_firstrec(tcur, level);
- /*
- * Grab a pointer to the block.
- */
- lbp = tcur->bc_bufs[level];
- left = XFS_BUF_TO_INOBT_BLOCK(lbp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
- goto error0;
-#endif
- /*
- * Grab the current block number, for future use.
- */
- bno = be32_to_cpu(left->bb_rightsib);
- /*
- * If left block is full enough so that removing one entry
- * won't make it too empty, and right-shifting an entry out
- * of left to us works, we're done.
- */
- if (be16_to_cpu(left->bb_numrecs) - 1 >=
- XFS_INOBT_BLOCK_MINRECS(level, cur)) {
- if ((error = xfs_inobt_rshift(tcur, level, &i)))
- goto error0;
- if (i) {
- ASSERT(be16_to_cpu(block->bb_numrecs) >=
- XFS_INOBT_BLOCK_MINRECS(level, cur));
- xfs_btree_del_cursor(tcur,
- XFS_BTREE_NOERROR);
- if (level == 0)
- cur->bc_ptrs[0]++;
- *stat = 1;
- return 0;
- }
- }
- /*
- * Otherwise, grab the number of records in right for
- * future reference.
- */
- lrecs = be16_to_cpu(left->bb_numrecs);
- }
- /*
- * Delete the temp cursor, we're done with it.
- */
- xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
- /*
- * If here, we need to do a join to keep the tree balanced.
- */
- ASSERT(bno != NULLAGBLOCK);
- /*
- * See if we can join with the left neighbor block.
- */
- if (lbno != NULLAGBLOCK &&
- lrecs + numrecs <= XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
- /*
- * Set "right" to be the starting block,
- * "left" to be the left neighbor.
- */
- rbno = bno;
- right = block;
- rrecs = be16_to_cpu(right->bb_numrecs);
- rbp = bp;
- if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
- cur->bc_private.a.agno, lbno, 0, &lbp,
- XFS_INO_BTREE_REF)))
- return error;
- left = XFS_BUF_TO_INOBT_BLOCK(lbp);
- lrecs = be16_to_cpu(left->bb_numrecs);
- if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
- return error;
- }
- /*
- * If that won't work, see if we can join with the right neighbor block.
- */
- else if (rbno != NULLAGBLOCK &&
- rrecs + numrecs <= XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
- /*
- * Set "left" to be the starting block,
- * "right" to be the right neighbor.
- */
- lbno = bno;
- left = block;
- lrecs = be16_to_cpu(left->bb_numrecs);
- lbp = bp;
- if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
- cur->bc_private.a.agno, rbno, 0, &rbp,
- XFS_INO_BTREE_REF)))
- return error;
- right = XFS_BUF_TO_INOBT_BLOCK(rbp);
- rrecs = be16_to_cpu(right->bb_numrecs);
- if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
- return error;
- }
- /*
- * Otherwise, we can't fix the imbalance.
- * Just return. This is probably a logic error, but it's not fatal.
- */
- else {
- if (level > 0 && (error = xfs_inobt_decrement(cur, level, &i)))
- return error;
- *stat = 1;
- return 0;
- }
- /*
- * We're now going to join "left" and "right" by moving all the stuff
- * in "right" to "left" and deleting "right".
- */
- if (level > 0) {
- /*
- * It's a non-leaf. Move keys and pointers.
- */
- lkp = XFS_INOBT_KEY_ADDR(left, lrecs + 1, cur);
- lpp = XFS_INOBT_PTR_ADDR(left, lrecs + 1, cur);
- rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
- rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
-#ifdef DEBUG
- for (i = 0; i < rrecs; i++) {
- if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
- return error;
- }
-#endif
- memcpy(lkp, rkp, rrecs * sizeof(*lkp));
- memcpy(lpp, rpp, rrecs * sizeof(*lpp));
- xfs_inobt_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
- xfs_inobt_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
- } else {
- /*
- * It's a leaf. Move records.
- */
- lrp = XFS_INOBT_REC_ADDR(left, lrecs + 1, cur);
- rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
- memcpy(lrp, rrp, rrecs * sizeof(*lrp));
- xfs_inobt_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
- }
- /*
- * If we joined with the left neighbor, set the buffer in the
- * cursor to the left block, and fix up the index.
- */
- if (bp != lbp) {
- xfs_btree_setbuf(cur, level, lbp);
- cur->bc_ptrs[level] += lrecs;
- }
- /*
- * If we joined with the right neighbor and there's a level above
- * us, increment the cursor at that level.
- */
- else if (level + 1 < cur->bc_nlevels &&
- (error = xfs_alloc_increment(cur, level + 1, &i)))
- return error;
- /*
- * Fix up the number of records in the surviving block.
- */
- lrecs += rrecs;
- left->bb_numrecs = cpu_to_be16(lrecs);
- /*
- * Fix up the right block pointer in the surviving block, and log it.
- */
- left->bb_rightsib = right->bb_rightsib;
- xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
- /*
- * If there is a right sibling now, make it point to the
- * remaining block.
- */
- if (be32_to_cpu(left->bb_rightsib) != NULLAGBLOCK) {
- xfs_inobt_block_t *rrblock;
- xfs_buf_t *rrbp;
+STATIC struct xfs_btree_cur *
+xfs_inobt_dup_cursor(
+ struct xfs_btree_cur *cur)
+{
+ return xfs_inobt_init_cursor(cur->bc_mp, cur->bc_tp,
+ cur->bc_private.a.agbp, cur->bc_private.a.agno);
+}
- if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
- cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib), 0,
- &rrbp, XFS_INO_BTREE_REF)))
- return error;
- rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp);
- if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
- return error;
- rrblock->bb_leftsib = cpu_to_be32(lbno);
- xfs_inobt_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB);
- }
- /*
- * Free the deleting block.
- */
- if ((error = xfs_free_extent(cur->bc_tp, XFS_AGB_TO_FSB(mp,
- cur->bc_private.a.agno, rbno), 1)))
- return error;
- xfs_trans_binval(cur->bc_tp, rbp);
- /*
- * Readjust the ptr at this level if it's not a leaf, since it's
- * still pointing at the deletion point, which makes the cursor
- * inconsistent. If this makes the ptr 0, the caller fixes it up.
- * We can't use decrement because it would change the next level up.
- */
- if (level > 0)
- cur->bc_ptrs[level]--;
- /*
- * Return value means the next level up has something to do.
- */
- *stat = 2;
- return 0;
+STATIC void
+xfs_inobt_set_root(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *nptr,
+ int inc) /* level change */
+{
+ struct xfs_buf *agbp = cur->bc_private.a.agbp;
+ struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
-error0:
- xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
- return error;
+ agi->agi_root = nptr->s;
+ be32_add_cpu(&agi->agi_level, inc);
+ xfs_ialloc_log_agi(cur->bc_tp, agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL);
}
-/*
- * Insert one record/level. Return information to the caller
- * allowing the next level up to proceed if necessary.
- */
-STATIC int /* error */
-xfs_inobt_insrec(
- xfs_btree_cur_t *cur, /* btree cursor */
- int level, /* level to insert record at */
- xfs_agblock_t *bnop, /* i/o: block number inserted */
- xfs_inobt_rec_t *recp, /* i/o: record data inserted */
- xfs_btree_cur_t **curp, /* output: new cursor replacing cur */
- int *stat) /* success/failure */
+STATIC int
+xfs_inobt_alloc_block(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *start,
+ union xfs_btree_ptr *new,
+ int length,
+ int *stat)
{
- xfs_inobt_block_t *block; /* btree block record/key lives in */
- xfs_buf_t *bp; /* buffer for block */
- int error; /* error return value */
- int i; /* loop index */
- xfs_inobt_key_t key; /* key value being inserted */
- xfs_inobt_key_t *kp=NULL; /* pointer to btree keys */
- xfs_agblock_t nbno; /* block number of allocated block */
- xfs_btree_cur_t *ncur; /* new cursor to be used at next lvl */
- xfs_inobt_key_t nkey; /* new key value, from split */
- xfs_inobt_rec_t nrec; /* new record value, for caller */
- int numrecs;
- int optr; /* old ptr value */
- xfs_inobt_ptr_t *pp; /* pointer to btree addresses */
- int ptr; /* index in btree block for this rec */
- xfs_inobt_rec_t *rp=NULL; /* pointer to btree records */
+ xfs_alloc_arg_t args; /* block allocation args */
+ int error; /* error return value */
+ xfs_agblock_t sbno = be32_to_cpu(start->s);
- /*
- * GCC doesn't understand the (arguably complex) control flow in
- * this function and complains about uninitialized structure fields
- * without this.
- */
- memset(&nrec, 0, sizeof(nrec));
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
- /*
- * If we made it to the root level, allocate a new root block
- * and we're done.
- */
- if (level >= cur->bc_nlevels) {
- error = xfs_inobt_newroot(cur, &i);
- *bnop = NULLAGBLOCK;
- *stat = i;
+ memset(&args, 0, sizeof(args));
+ args.tp = cur->bc_tp;
+ args.mp = cur->bc_mp;
+ args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, sbno);
+ args.minlen = 1;
+ args.maxlen = 1;
+ args.prod = 1;
+ args.type = XFS_ALLOCTYPE_NEAR_BNO;
+
+ error = xfs_alloc_vextent(&args);
+ if (error) {
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
}
- /*
- * Make a key out of the record data to be inserted, and save it.
- */
- key.ir_startino = recp->ir_startino;
- optr = ptr = cur->bc_ptrs[level];
- /*
- * If we're off the left edge, return failure.
- */
- if (ptr == 0) {
+ if (args.fsbno == NULLFSBLOCK) {
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
}
- /*
- * Get pointers to the btree buffer and block.
- */
- bp = cur->bc_bufs[level];
- block = XFS_BUF_TO_INOBT_BLOCK(bp);
- numrecs = be16_to_cpu(block->bb_numrecs);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
- return error;
- /*
- * Check that the new entry is being inserted in the right place.
- */
- if (ptr <= numrecs) {
- if (level == 0) {
- rp = XFS_INOBT_REC_ADDR(block, ptr, cur);
- xfs_btree_check_rec(cur->bc_btnum, recp, rp);
- } else {
- kp = XFS_INOBT_KEY_ADDR(block, ptr, cur);
- xfs_btree_check_key(cur->bc_btnum, &key, kp);
- }
- }
-#endif
- nbno = NULLAGBLOCK;
- ncur = NULL;
- /*
- * If the block is full, we can't insert the new entry until we
- * make the block un-full.
- */
- if (numrecs == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
- /*
- * First, try shifting an entry to the right neighbor.
- */
- if ((error = xfs_inobt_rshift(cur, level, &i)))
- return error;
- if (i) {
- /* nothing */
- }
- /*
- * Next, try shifting an entry to the left neighbor.
- */
- else {
- if ((error = xfs_inobt_lshift(cur, level, &i)))
- return error;
- if (i) {
- optr = ptr = cur->bc_ptrs[level];
- } else {
- /*
- * Next, try splitting the current block
- * in half. If this works we have to
- * re-set our variables because
- * we could be in a different block now.
- */
- if ((error = xfs_inobt_split(cur, level, &nbno,
- &nkey, &ncur, &i)))
- return error;
- if (i) {
- bp = cur->bc_bufs[level];
- block = XFS_BUF_TO_INOBT_BLOCK(bp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur,
- block, level, bp)))
- return error;
-#endif
- ptr = cur->bc_ptrs[level];
- nrec.ir_startino = nkey.ir_startino;
- } else {
- /*
- * Otherwise the insert fails.
- */
- *stat = 0;
- return 0;
- }
- }
- }
- }
- /*
- * At this point we know there's room for our new entry in the block
- * we're pointing at.
- */
- numrecs = be16_to_cpu(block->bb_numrecs);
- if (level > 0) {
- /*
- * It's a non-leaf entry. Make a hole for the new data
- * in the key and ptr regions of the block.
- */
- kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
- pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
-#ifdef DEBUG
- for (i = numrecs; i >= ptr; i--) {
- if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i - 1]), level)))
- return error;
- }
-#endif
- memmove(&kp[ptr], &kp[ptr - 1],
- (numrecs - ptr + 1) * sizeof(*kp));
- memmove(&pp[ptr], &pp[ptr - 1],
- (numrecs - ptr + 1) * sizeof(*pp));
- /*
- * Now stuff the new data in, bump numrecs and log the new data.
- */
-#ifdef DEBUG
- if ((error = xfs_btree_check_sptr(cur, *bnop, level)))
- return error;
-#endif
- kp[ptr - 1] = key;
- pp[ptr - 1] = cpu_to_be32(*bnop);
- numrecs++;
- block->bb_numrecs = cpu_to_be16(numrecs);
- xfs_inobt_log_keys(cur, bp, ptr, numrecs);
- xfs_inobt_log_ptrs(cur, bp, ptr, numrecs);
- } else {
- /*
- * It's a leaf entry. Make a hole for the new record.
- */
- rp = XFS_INOBT_REC_ADDR(block, 1, cur);
- memmove(&rp[ptr], &rp[ptr - 1],
- (numrecs - ptr + 1) * sizeof(*rp));
- /*
- * Now stuff the new record in, bump numrecs
- * and log the new data.
- */
- rp[ptr - 1] = *recp;
- numrecs++;
- block->bb_numrecs = cpu_to_be16(numrecs);
- xfs_inobt_log_recs(cur, bp, ptr, numrecs);
- }
- /*
- * Log the new number of records in the btree header.
- */
- xfs_inobt_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
-#ifdef DEBUG
- /*
- * Check that the key/record is in the right place, now.
- */
- if (ptr < numrecs) {
- if (level == 0)
- xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1,
- rp + ptr);
- else
- xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1,
- kp + ptr);
- }
-#endif
- /*
- * If we inserted at the start of a block, update the parents' keys.
- */
- if (optr == 1 && (error = xfs_inobt_updkey(cur, &key, level + 1)))
- return error;
- /*
- * Return the new block number, if any.
- * If there is one, give back a record value and a cursor too.
- */
- *bnop = nbno;
- if (nbno != NULLAGBLOCK) {
- *recp = nrec;
- *curp = ncur;
- }
+ ASSERT(args.len == 1);
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+
+ new->s = cpu_to_be32(XFS_FSB_TO_AGBNO(args.mp, args.fsbno));
*stat = 1;
return 0;
}
-/*
- * Log header fields from a btree block.
- */
-STATIC void
-xfs_inobt_log_block(
- xfs_trans_t *tp, /* transaction pointer */
- xfs_buf_t *bp, /* buffer containing btree block */
- int fields) /* mask of fields: XFS_BB_... */
+STATIC int
+xfs_inobt_free_block(
+ struct xfs_btree_cur *cur,
+ struct xfs_buf *bp)
{
- int first; /* first byte offset logged */
- int last; /* last byte offset logged */
- static const short offsets[] = { /* table of offsets */
- offsetof(xfs_inobt_block_t, bb_magic),
- offsetof(xfs_inobt_block_t, bb_level),
- offsetof(xfs_inobt_block_t, bb_numrecs),
- offsetof(xfs_inobt_block_t, bb_leftsib),
- offsetof(xfs_inobt_block_t, bb_rightsib),
- sizeof(xfs_inobt_block_t)
- };
+ xfs_fsblock_t fsbno;
+ int error;
- xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first, &last);
- xfs_trans_log_buf(tp, bp, first, last);
+ fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp));
+ error = xfs_free_extent(cur->bc_tp, fsbno, 1);
+ if (error)
+ return error;
+
+ xfs_trans_binval(cur->bc_tp, bp);
+ return error;
}
-/*
- * Log keys from a btree block (nonleaf).
- */
-STATIC void
-xfs_inobt_log_keys(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_buf_t *bp, /* buffer containing btree block */
- int kfirst, /* index of first key to log */
- int klast) /* index of last key to log */
+STATIC int
+xfs_inobt_get_maxrecs(
+ struct xfs_btree_cur *cur,
+ int level)
{
- xfs_inobt_block_t *block; /* btree block to log from */
- int first; /* first byte offset logged */
- xfs_inobt_key_t *kp; /* key pointer in btree block */
- int last; /* last byte offset logged */
-
- block = XFS_BUF_TO_INOBT_BLOCK(bp);
- kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
- first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block);
- last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block);
- xfs_trans_log_buf(cur->bc_tp, bp, first, last);
+ return cur->bc_mp->m_inobt_mxr[level != 0];
}
-/*
- * Log block pointer fields from a btree block (nonleaf).
- */
STATIC void
-xfs_inobt_log_ptrs(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_buf_t *bp, /* buffer containing btree block */
- int pfirst, /* index of first pointer to log */
- int plast) /* index of last pointer to log */
+xfs_inobt_init_key_from_rec(
+ union xfs_btree_key *key,
+ union xfs_btree_rec *rec)
{
- xfs_inobt_block_t *block; /* btree block to log from */
- int first; /* first byte offset logged */
- int last; /* last byte offset logged */
- xfs_inobt_ptr_t *pp; /* block-pointer pointer in btree blk */
-
- block = XFS_BUF_TO_INOBT_BLOCK(bp);
- pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
- first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block);
- last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block);
- xfs_trans_log_buf(cur->bc_tp, bp, first, last);
+ key->inobt.ir_startino = rec->inobt.ir_startino;
}
-/*
- * Log records from a btree block (leaf).
- */
STATIC void
-xfs_inobt_log_recs(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_buf_t *bp, /* buffer containing btree block */
- int rfirst, /* index of first record to log */
- int rlast) /* index of last record to log */
+xfs_inobt_init_rec_from_key(
+ union xfs_btree_key *key,
+ union xfs_btree_rec *rec)
{
- xfs_inobt_block_t *block; /* btree block to log from */
- int first; /* first byte offset logged */
- int last; /* last byte offset logged */
- xfs_inobt_rec_t *rp; /* record pointer for btree block */
+ rec->inobt.ir_startino = key->inobt.ir_startino;
+}
- block = XFS_BUF_TO_INOBT_BLOCK(bp);
- rp = XFS_INOBT_REC_ADDR(block, 1, cur);
- first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block);
- last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block);
- xfs_trans_log_buf(cur->bc_tp, bp, first, last);
+STATIC void
+xfs_inobt_init_rec_from_cur(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *rec)
+{
+ rec->inobt.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino);
+ rec->inobt.ir_freecount = cpu_to_be32(cur->bc_rec.i.ir_freecount);
+ rec->inobt.ir_free = cpu_to_be64(cur->bc_rec.i.ir_free);
}
/*
- * Lookup the record. The cursor is made to point to it, based on dir.
- * Return 0 if can't find any such record, 1 for success.
+ * intial value of ptr for lookup
*/
-STATIC int /* error */
-xfs_inobt_lookup(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_lookup_t dir, /* <=, ==, or >= */
- int *stat) /* success/failure */
+STATIC void
+xfs_inobt_init_ptr_from_cur(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr)
{
- xfs_agblock_t agbno; /* a.g. relative btree block number */
- xfs_agnumber_t agno; /* allocation group number */
- xfs_inobt_block_t *block=NULL; /* current btree block */
- __int64_t diff; /* difference for the current key */
- int error; /* error return value */
- int keyno=0; /* current key number */
- int level; /* level in the btree */
- xfs_mount_t *mp; /* file system mount point */
-
- /*
- * Get the allocation group header, and the root block number.
- */
- mp = cur->bc_mp;
- {
- xfs_agi_t *agi; /* a.g. inode header */
-
- agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp);
- agno = be32_to_cpu(agi->agi_seqno);
- agbno = be32_to_cpu(agi->agi_root);
- }
- /*
- * Iterate over each level in the btree, starting at the root.
- * For each level above the leaves, find the key we need, based
- * on the lookup record, then follow the corresponding block
- * pointer down to the next level.
- */
- for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
- xfs_buf_t *bp; /* buffer pointer for btree block */
- xfs_daddr_t d; /* disk address of btree block */
-
- /*
- * Get the disk address we're looking for.
- */
- d = XFS_AGB_TO_DADDR(mp, agno, agbno);
- /*
- * If the old buffer at this level is for a different block,
- * throw it away, otherwise just use it.
- */
- bp = cur->bc_bufs[level];
- if (bp && XFS_BUF_ADDR(bp) != d)
- bp = NULL;
- if (!bp) {
- /*
- * Need to get a new buffer. Read it, then
- * set it in the cursor, releasing the old one.
- */
- if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
- agno, agbno, 0, &bp, XFS_INO_BTREE_REF)))
- return error;
- xfs_btree_setbuf(cur, level, bp);
- /*
- * Point to the btree block, now that we have the buffer
- */
- block = XFS_BUF_TO_INOBT_BLOCK(bp);
- if ((error = xfs_btree_check_sblock(cur, block, level,
- bp)))
- return error;
- } else
- block = XFS_BUF_TO_INOBT_BLOCK(bp);
- /*
- * If we already had a key match at a higher level, we know
- * we need to use the first entry in this block.
- */
- if (diff == 0)
- keyno = 1;
- /*
- * Otherwise we need to search this block. Do a binary search.
- */
- else {
- int high; /* high entry number */
- xfs_inobt_key_t *kkbase=NULL;/* base of keys in block */
- xfs_inobt_rec_t *krbase=NULL;/* base of records in block */
- int low; /* low entry number */
+ struct xfs_agi *agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp);
- /*
- * Get a pointer to keys or records.
- */
- if (level > 0)
- kkbase = XFS_INOBT_KEY_ADDR(block, 1, cur);
- else
- krbase = XFS_INOBT_REC_ADDR(block, 1, cur);
- /*
- * Set low and high entry numbers, 1-based.
- */
- low = 1;
- if (!(high = be16_to_cpu(block->bb_numrecs))) {
- /*
- * If the block is empty, the tree must
- * be an empty leaf.
- */
- ASSERT(level == 0 && cur->bc_nlevels == 1);
- cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
- *stat = 0;
- return 0;
- }
- /*
- * Binary search the block.
- */
- while (low <= high) {
- xfs_agino_t startino; /* key value */
-
- /*
- * keyno is average of low and high.
- */
- keyno = (low + high) >> 1;
- /*
- * Get startino.
- */
- if (level > 0) {
- xfs_inobt_key_t *kkp;
-
- kkp = kkbase + keyno - 1;
- startino = be32_to_cpu(kkp->ir_startino);
- } else {
- xfs_inobt_rec_t *krp;
-
- krp = krbase + keyno - 1;
- startino = be32_to_cpu(krp->ir_startino);
- }
- /*
- * Compute difference to get next direction.
- */
- diff = (__int64_t)
- startino - cur->bc_rec.i.ir_startino;
- /*
- * Less than, move right.
- */
- if (diff < 0)
- low = keyno + 1;
- /*
- * Greater than, move left.
- */
- else if (diff > 0)
- high = keyno - 1;
- /*
- * Equal, we're done.
- */
- else
- break;
- }
- }
- /*
- * If there are more levels, set up for the next level
- * by getting the block number and filling in the cursor.
- */
- if (level > 0) {
- /*
- * If we moved left, need the previous key number,
- * unless there isn't one.
- */
- if (diff > 0 && --keyno < 1)
- keyno = 1;
- agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, keyno, cur));
-#ifdef DEBUG
- if ((error = xfs_btree_check_sptr(cur, agbno, level)))
- return error;
-#endif
- cur->bc_ptrs[level] = keyno;
- }
- }
- /*
- * Done with the search.
- * See if we need to adjust the results.
- */
- if (dir != XFS_LOOKUP_LE && diff < 0) {
- keyno++;
- /*
- * If ge search and we went off the end of the block, but it's
- * not the last block, we're in the wrong block.
- */
- if (dir == XFS_LOOKUP_GE &&
- keyno > be16_to_cpu(block->bb_numrecs) &&
- be32_to_cpu(block->bb_rightsib) != NULLAGBLOCK) {
- int i;
+ ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno));
- cur->bc_ptrs[0] = keyno;
- if ((error = xfs_inobt_increment(cur, 0, &i)))
- return error;
- ASSERT(i == 1);
- *stat = 1;
- return 0;
- }
- }
- else if (dir == XFS_LOOKUP_LE && diff > 0)
- keyno--;
- cur->bc_ptrs[0] = keyno;
- /*
- * Return if we succeeded or not.
- */
- if (keyno == 0 || keyno > be16_to_cpu(block->bb_numrecs))
- *stat = 0;
- else
- *stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0));
- return 0;
+ ptr->s = agi->agi_root;
}
-/*
- * Move 1 record left from cur/level if possible.
- * Update cur to reflect the new path.
- */
-STATIC int /* error */
-xfs_inobt_lshift(
- xfs_btree_cur_t *cur, /* btree cursor */
- int level, /* level to shift record on */
- int *stat) /* success/failure */
+STATIC __int64_t
+xfs_inobt_key_diff(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *key)
{
- int error; /* error return value */
-#ifdef DEBUG
- int i; /* loop index */
-#endif
- xfs_inobt_key_t key; /* key value for leaf level upward */
- xfs_buf_t *lbp; /* buffer for left neighbor block */
- xfs_inobt_block_t *left; /* left neighbor btree block */
- xfs_inobt_key_t *lkp=NULL; /* key pointer for left block */
- xfs_inobt_ptr_t *lpp; /* address pointer for left block */
- xfs_inobt_rec_t *lrp=NULL; /* record pointer for left block */
- int nrec; /* new number of left block entries */
- xfs_buf_t *rbp; /* buffer for right (current) block */
- xfs_inobt_block_t *right; /* right (current) btree block */
- xfs_inobt_key_t *rkp=NULL; /* key pointer for right block */
- xfs_inobt_ptr_t *rpp=NULL; /* address pointer for right block */
- xfs_inobt_rec_t *rrp=NULL; /* record pointer for right block */
-
- /*
- * Set up variables for this block as "right".
- */
- rbp = cur->bc_bufs[level];
- right = XFS_BUF_TO_INOBT_BLOCK(rbp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
- return error;
-#endif
- /*
- * If we've got no left sibling then we can't shift an entry left.
- */
- if (be32_to_cpu(right->bb_leftsib) == NULLAGBLOCK) {
- *stat = 0;
- return 0;
- }
- /*
- * If the cursor entry is the one that would be moved, don't
- * do it... it's too complicated.
- */
- if (cur->bc_ptrs[level] <= 1) {
- *stat = 0;
- return 0;
- }
- /*
- * Set up the left neighbor as "left".
- */
- if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
- cur->bc_private.a.agno, be32_to_cpu(right->bb_leftsib),
- 0, &lbp, XFS_INO_BTREE_REF)))
- return error;
- left = XFS_BUF_TO_INOBT_BLOCK(lbp);
- if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
- return error;
- /*
- * If it's full, it can't take another entry.
- */
- if (be16_to_cpu(left->bb_numrecs) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
- *stat = 0;
- return 0;
- }
- nrec = be16_to_cpu(left->bb_numrecs) + 1;
- /*
- * If non-leaf, copy a key and a ptr to the left block.
- */
- if (level > 0) {
- lkp = XFS_INOBT_KEY_ADDR(left, nrec, cur);
- rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
- *lkp = *rkp;
- xfs_inobt_log_keys(cur, lbp, nrec, nrec);
- lpp = XFS_INOBT_PTR_ADDR(left, nrec, cur);
- rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*rpp), level)))
- return error;
-#endif
- *lpp = *rpp;
- xfs_inobt_log_ptrs(cur, lbp, nrec, nrec);
- }
- /*
- * If leaf, copy a record to the left block.
- */
- else {
- lrp = XFS_INOBT_REC_ADDR(left, nrec, cur);
- rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
- *lrp = *rrp;
- xfs_inobt_log_recs(cur, lbp, nrec, nrec);
- }
- /*
- * Bump and log left's numrecs, decrement and log right's numrecs.
- */
- be16_add_cpu(&left->bb_numrecs, 1);
- xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
-#ifdef DEBUG
- if (level > 0)
- xfs_btree_check_key(cur->bc_btnum, lkp - 1, lkp);
- else
- xfs_btree_check_rec(cur->bc_btnum, lrp - 1, lrp);
-#endif
- be16_add_cpu(&right->bb_numrecs, -1);
- xfs_inobt_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
- /*
- * Slide the contents of right down one entry.
- */
- if (level > 0) {
-#ifdef DEBUG
- for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
- if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i + 1]),
- level)))
- return error;
- }
-#endif
- memmove(rkp, rkp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
- memmove(rpp, rpp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
- xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
- xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
- } else {
- memmove(rrp, rrp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
- xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
- key.ir_startino = rrp->ir_startino;
- rkp = &key;
- }
- /*
- * Update the parent key values of right.
- */
- if ((error = xfs_inobt_updkey(cur, rkp, level + 1)))
- return error;
- /*
- * Slide the cursor value left one.
- */
- cur->bc_ptrs[level]--;
- *stat = 1;
- return 0;
+ return (__int64_t)be32_to_cpu(key->inobt.ir_startino) -
+ cur->bc_rec.i.ir_startino;
}
-/*
- * Allocate a new root block, fill it in.
- */
-STATIC int /* error */
-xfs_inobt_newroot(
- xfs_btree_cur_t *cur, /* btree cursor */
- int *stat) /* success/failure */
+STATIC int
+xfs_inobt_kill_root(
+ struct xfs_btree_cur *cur,
+ struct xfs_buf *bp,
+ int level,
+ union xfs_btree_ptr *newroot)
{
- xfs_agi_t *agi; /* a.g. inode header */
- xfs_alloc_arg_t args; /* allocation argument structure */
- xfs_inobt_block_t *block; /* one half of the old root block */
- xfs_buf_t *bp; /* buffer containing block */
- int error; /* error return value */
- xfs_inobt_key_t *kp; /* btree key pointer */
- xfs_agblock_t lbno; /* left block number */
- xfs_buf_t *lbp; /* left buffer pointer */
- xfs_inobt_block_t *left; /* left btree block */
- xfs_buf_t *nbp; /* new (root) buffer */
- xfs_inobt_block_t *new; /* new (root) btree block */
- int nptr; /* new value for key index, 1 or 2 */
- xfs_inobt_ptr_t *pp; /* btree address pointer */
- xfs_agblock_t rbno; /* right block number */
- xfs_buf_t *rbp; /* right buffer pointer */
- xfs_inobt_block_t *right; /* right btree block */
- xfs_inobt_rec_t *rp; /* btree record pointer */
+ int error;
- ASSERT(cur->bc_nlevels < XFS_IN_MAXLEVELS(cur->bc_mp));
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+ XFS_BTREE_STATS_INC(cur, killroot);
/*
- * Get a block & a buffer.
+ * Update the root pointer, decreasing the level by 1 and then
+ * free the old root.
*/
- agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp);
- args.tp = cur->bc_tp;
- args.mp = cur->bc_mp;
- args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno,
- be32_to_cpu(agi->agi_root));
- args.mod = args.minleft = args.alignment = args.total = args.wasdel =
- args.isfl = args.userdata = args.minalignslop = 0;
- args.minlen = args.maxlen = args.prod = 1;
- args.type = XFS_ALLOCTYPE_NEAR_BNO;
- if ((error = xfs_alloc_vextent(&args)))
+ xfs_inobt_set_root(cur, newroot, -1);
+ error = xfs_inobt_free_block(cur, bp);
+ if (error) {
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
- /*
- * None available, we fail.
- */
- if (args.fsbno == NULLFSBLOCK) {
- *stat = 0;
- return 0;
- }
- ASSERT(args.len == 1);
- nbp = xfs_btree_get_bufs(args.mp, args.tp, args.agno, args.agbno, 0);
- new = XFS_BUF_TO_INOBT_BLOCK(nbp);
- /*
- * Set the root data in the a.g. inode structure.
- */
- agi->agi_root = cpu_to_be32(args.agbno);
- be32_add_cpu(&agi->agi_level, 1);
- xfs_ialloc_log_agi(args.tp, cur->bc_private.a.agbp,
- XFS_AGI_ROOT | XFS_AGI_LEVEL);
- /*
- * At the previous root level there are now two blocks: the old
- * root, and the new block generated when it was split.
- * We don't know which one the cursor is pointing at, so we
- * set up variables "left" and "right" for each case.
- */
- bp = cur->bc_bufs[cur->bc_nlevels - 1];
- block = XFS_BUF_TO_INOBT_BLOCK(bp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, block, cur->bc_nlevels - 1, bp)))
- return error;
-#endif
- if (be32_to_cpu(block->bb_rightsib) != NULLAGBLOCK) {
- /*
- * Our block is left, pick up the right block.
- */
- lbp = bp;
- lbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(lbp));
- left = block;
- rbno = be32_to_cpu(left->bb_rightsib);
- if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
- rbno, 0, &rbp, XFS_INO_BTREE_REF)))
- return error;
- bp = rbp;
- right = XFS_BUF_TO_INOBT_BLOCK(rbp);
- if ((error = xfs_btree_check_sblock(cur, right,
- cur->bc_nlevels - 1, rbp)))
- return error;
- nptr = 1;
- } else {
- /*
- * Our block is right, pick up the left block.
- */
- rbp = bp;
- rbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(rbp));
- right = block;
- lbno = be32_to_cpu(right->bb_leftsib);
- if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
- lbno, 0, &lbp, XFS_INO_BTREE_REF)))
- return error;
- bp = lbp;
- left = XFS_BUF_TO_INOBT_BLOCK(lbp);
- if ((error = xfs_btree_check_sblock(cur, left,
- cur->bc_nlevels - 1, lbp)))
- return error;
- nptr = 2;
- }
- /*
- * Fill in the new block's btree header and log it.
- */
- new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
- new->bb_level = cpu_to_be16(cur->bc_nlevels);
- new->bb_numrecs = cpu_to_be16(2);
- new->bb_leftsib = cpu_to_be32(NULLAGBLOCK);
- new->bb_rightsib = cpu_to_be32(NULLAGBLOCK);
- xfs_inobt_log_block(args.tp, nbp, XFS_BB_ALL_BITS);
- ASSERT(lbno != NULLAGBLOCK && rbno != NULLAGBLOCK);
- /*
- * Fill in the key data in the new root.
- */
- kp = XFS_INOBT_KEY_ADDR(new, 1, cur);
- if (be16_to_cpu(left->bb_level) > 0) {
- kp[0] = *XFS_INOBT_KEY_ADDR(left, 1, cur);
- kp[1] = *XFS_INOBT_KEY_ADDR(right, 1, cur);
- } else {
- rp = XFS_INOBT_REC_ADDR(left, 1, cur);
- kp[0].ir_startino = rp->ir_startino;
- rp = XFS_INOBT_REC_ADDR(right, 1, cur);
- kp[1].ir_startino = rp->ir_startino;
}
- xfs_inobt_log_keys(cur, nbp, 1, 2);
- /*
- * Fill in the pointer data in the new root.
- */
- pp = XFS_INOBT_PTR_ADDR(new, 1, cur);
- pp[0] = cpu_to_be32(lbno);
- pp[1] = cpu_to_be32(rbno);
- xfs_inobt_log_ptrs(cur, nbp, 1, 2);
- /*
- * Fix up the cursor.
- */
- xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
- cur->bc_ptrs[cur->bc_nlevels] = nptr;
- cur->bc_nlevels++;
- *stat = 1;
- return 0;
-}
-/*
- * Move 1 record right from cur/level if possible.
- * Update cur to reflect the new path.
- */
-STATIC int /* error */
-xfs_inobt_rshift(
- xfs_btree_cur_t *cur, /* btree cursor */
- int level, /* level to shift record on */
- int *stat) /* success/failure */
-{
- int error; /* error return value */
- int i; /* loop index */
- xfs_inobt_key_t key; /* key value for leaf level upward */
- xfs_buf_t *lbp; /* buffer for left (current) block */
- xfs_inobt_block_t *left; /* left (current) btree block */
- xfs_inobt_key_t *lkp; /* key pointer for left block */
- xfs_inobt_ptr_t *lpp; /* address pointer for left block */
- xfs_inobt_rec_t *lrp; /* record pointer for left block */
- xfs_buf_t *rbp; /* buffer for right neighbor block */
- xfs_inobt_block_t *right; /* right neighbor btree block */
- xfs_inobt_key_t *rkp; /* key pointer for right block */
- xfs_inobt_ptr_t *rpp; /* address pointer for right block */
- xfs_inobt_rec_t *rrp=NULL; /* record pointer for right block */
- xfs_btree_cur_t *tcur; /* temporary cursor */
+ XFS_BTREE_STATS_INC(cur, free);
- /*
- * Set up variables for this block as "left".
- */
- lbp = cur->bc_bufs[level];
- left = XFS_BUF_TO_INOBT_BLOCK(lbp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
- return error;
-#endif
- /*
- * If we've got no right sibling then we can't shift an entry right.
- */
- if (be32_to_cpu(left->bb_rightsib) == NULLAGBLOCK) {
- *stat = 0;
- return 0;
- }
- /*
- * If the cursor entry is the one that would be moved, don't
- * do it... it's too complicated.
- */
- if (cur->bc_ptrs[level] >= be16_to_cpu(left->bb_numrecs)) {
- *stat = 0;
- return 0;
- }
- /*
- * Set up the right neighbor as "right".
- */
- if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
- cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib),
- 0, &rbp, XFS_INO_BTREE_REF)))
- return error;
- right = XFS_BUF_TO_INOBT_BLOCK(rbp);
- if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
- return error;
- /*
- * If it's full, it can't take another entry.
- */
- if (be16_to_cpu(right->bb_numrecs) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
- *stat = 0;
- return 0;
- }
- /*
- * Make a hole at the start of the right neighbor block, then
- * copy the last left block entry to the hole.
- */
- if (level > 0) {
- lkp = XFS_INOBT_KEY_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
- lpp = XFS_INOBT_PTR_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
- rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
- rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
-#ifdef DEBUG
- for (i = be16_to_cpu(right->bb_numrecs) - 1; i >= 0; i--) {
- if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
- return error;
- }
-#endif
- memmove(rkp + 1, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
- memmove(rpp + 1, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
-#ifdef DEBUG
- if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*lpp), level)))
- return error;
-#endif
- *rkp = *lkp;
- *rpp = *lpp;
- xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
- xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
- } else {
- lrp = XFS_INOBT_REC_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
- rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
- memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
- *rrp = *lrp;
- xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
- key.ir_startino = rrp->ir_startino;
- rkp = &key;
- }
- /*
- * Decrement and log left's numrecs, bump and log right's numrecs.
- */
- be16_add_cpu(&left->bb_numrecs, -1);
- xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
- be16_add_cpu(&right->bb_numrecs, 1);
-#ifdef DEBUG
- if (level > 0)
- xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1);
- else
- xfs_btree_check_rec(cur->bc_btnum, rrp, rrp + 1);
-#endif
- xfs_inobt_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
- /*
- * Using a temporary cursor, update the parent key values of the
- * block on the right.
- */
- if ((error = xfs_btree_dup_cursor(cur, &tcur)))
- return error;
- xfs_btree_lastrec(tcur, level);
- if ((error = xfs_inobt_increment(tcur, level, &i)) ||
- (error = xfs_inobt_updkey(tcur, rkp, level + 1))) {
- xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
- return error;
- }
- xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
- *stat = 1;
+ cur->bc_bufs[level] = NULL;
+ cur->bc_nlevels--;
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
return 0;
}
-/*
- * Split cur/level block in half.
- * Return new block number and its first record (to be inserted into parent).
- */
-STATIC int /* error */
-xfs_inobt_split(
- xfs_btree_cur_t *cur, /* btree cursor */
- int level, /* level to split */
- xfs_agblock_t *bnop, /* output: block number allocated */
- xfs_inobt_key_t *keyp, /* output: first key of new block */
- xfs_btree_cur_t **curp, /* output: new cursor */
- int *stat) /* success/failure */
-{
- xfs_alloc_arg_t args; /* allocation argument structure */
- int error; /* error return value */
- int i; /* loop index/record number */
- xfs_agblock_t lbno; /* left (current) block number */
- xfs_buf_t *lbp; /* buffer for left block */
- xfs_inobt_block_t *left; /* left (current) btree block */
- xfs_inobt_key_t *lkp; /* left btree key pointer */
- xfs_inobt_ptr_t *lpp; /* left btree address pointer */
- xfs_inobt_rec_t *lrp; /* left btree record pointer */
- xfs_buf_t *rbp; /* buffer for right block */
- xfs_inobt_block_t *right; /* right (new) btree block */
- xfs_inobt_key_t *rkp; /* right btree key pointer */
- xfs_inobt_ptr_t *rpp; /* right btree address pointer */
- xfs_inobt_rec_t *rrp; /* right btree record pointer */
-
- /*
- * Set up left block (current one).
- */
- lbp = cur->bc_bufs[level];
- args.tp = cur->bc_tp;
- args.mp = cur->bc_mp;
- lbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(lbp));
- /*
- * Allocate the new block.
- * If we can't do it, we're toast. Give up.
- */
- args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, lbno);
- args.mod = args.minleft = args.alignment = args.total = args.wasdel =
- args.isfl = args.userdata = args.minalignslop = 0;
- args.minlen = args.maxlen = args.prod = 1;
- args.type = XFS_ALLOCTYPE_NEAR_BNO;
- if ((error = xfs_alloc_vextent(&args)))
- return error;
- if (args.fsbno == NULLFSBLOCK) {
- *stat = 0;
- return 0;
- }
- ASSERT(args.len == 1);
- rbp = xfs_btree_get_bufs(args.mp, args.tp, args.agno, args.agbno, 0);
- /*
- * Set up the new block as "right".
- */
- right = XFS_BUF_TO_INOBT_BLOCK(rbp);
- /*
- * "Left" is the current (according to the cursor) block.
- */
- left = XFS_BUF_TO_INOBT_BLOCK(lbp);
#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
- return error;
-#endif
- /*
- * Fill in the btree header for the new block.
- */
- right->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
- right->bb_level = left->bb_level;
- right->bb_numrecs = cpu_to_be16(be16_to_cpu(left->bb_numrecs) / 2);
- /*
- * Make sure that if there's an odd number of entries now, that
- * each new block will have the same number of entries.
- */
- if ((be16_to_cpu(left->bb_numrecs) & 1) &&
- cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1)
- be16_add_cpu(&right->bb_numrecs, 1);
- i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1;
- /*
- * For non-leaf blocks, copy keys and addresses over to the new block.
- */
- if (level > 0) {
- lkp = XFS_INOBT_KEY_ADDR(left, i, cur);
- lpp = XFS_INOBT_PTR_ADDR(left, i, cur);
- rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
- rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
-#ifdef DEBUG
- for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
- if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level)))
- return error;
- }
-#endif
- memcpy(rkp, lkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
- memcpy(rpp, lpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
- xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
- xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
- *keyp = *rkp;
- }
- /*
- * For leaf blocks, copy records over to the new block.
- */
- else {
- lrp = XFS_INOBT_REC_ADDR(left, i, cur);
- rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
- memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
- xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
- keyp->ir_startino = rrp->ir_startino;
- }
- /*
- * Find the left block number by looking in the buffer.
- * Adjust numrecs, sibling pointers.
- */
- be16_add_cpu(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs)));
- right->bb_rightsib = left->bb_rightsib;
- left->bb_rightsib = cpu_to_be32(args.agbno);
- right->bb_leftsib = cpu_to_be32(lbno);
- xfs_inobt_log_block(args.tp, rbp, XFS_BB_ALL_BITS);
- xfs_inobt_log_block(args.tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
- /*
- * If there's a block to the new block's right, make that block
- * point back to right instead of to left.
- */
- if (be32_to_cpu(right->bb_rightsib) != NULLAGBLOCK) {
- xfs_inobt_block_t *rrblock; /* rr btree block */
- xfs_buf_t *rrbp; /* buffer for rrblock */
-
- if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
- be32_to_cpu(right->bb_rightsib), 0, &rrbp,
- XFS_INO_BTREE_REF)))
- return error;
- rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp);
- if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
- return error;
- rrblock->bb_leftsib = cpu_to_be32(args.agbno);
- xfs_inobt_log_block(args.tp, rrbp, XFS_BB_LEFTSIB);
- }
- /*
- * If the cursor is really in the right block, move it there.
- * If it's just pointing past the last entry in left, then we'll
- * insert there, so don't change anything in that case.
- */
- if (cur->bc_ptrs[level] > be16_to_cpu(left->bb_numrecs) + 1) {
- xfs_btree_setbuf(cur, level, rbp);
- cur->bc_ptrs[level] -= be16_to_cpu(left->bb_numrecs);
- }
- /*
- * If there are more levels, we'll need another cursor which refers
- * the right block, no matter where this cursor was.
- */
- if (level + 1 < cur->bc_nlevels) {
- if ((error = xfs_btree_dup_cursor(cur, curp)))
- return error;
- (*curp)->bc_ptrs[level + 1]++;
- }
- *bnop = args.agbno;
- *stat = 1;
- return 0;
+STATIC int
+xfs_inobt_keys_inorder(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *k1,
+ union xfs_btree_key *k2)
+{
+ return be32_to_cpu(k1->inobt.ir_startino) <
+ be32_to_cpu(k2->inobt.ir_startino);
}
-/*
- * Update keys at all levels from here to the root along the cursor's path.
- */
-STATIC int /* error */
-xfs_inobt_updkey(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_inobt_key_t *keyp, /* new key value to update to */
- int level) /* starting level for update */
+STATIC int
+xfs_inobt_recs_inorder(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *r1,
+ union xfs_btree_rec *r2)
{
- int ptr; /* index of key in block */
-
- /*
- * Go up the tree from this level toward the root.
- * At each level, update the key value to the value input.
- * Stop when we reach a level where the cursor isn't pointing
- * at the first entry in the block.
- */
- for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
- xfs_buf_t *bp; /* buffer for block */
- xfs_inobt_block_t *block; /* btree block */
-#ifdef DEBUG
- int error; /* error return value */
-#endif
- xfs_inobt_key_t *kp; /* ptr to btree block keys */
-
- bp = cur->bc_bufs[level];
- block = XFS_BUF_TO_INOBT_BLOCK(bp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
- return error;
-#endif
- ptr = cur->bc_ptrs[level];
- kp = XFS_INOBT_KEY_ADDR(block, ptr, cur);
- *kp = *keyp;
- xfs_inobt_log_keys(cur, bp, ptr, ptr);
- }
- return 0;
+ return be32_to_cpu(r1->inobt.ir_startino) + XFS_INODES_PER_CHUNK <=
+ be32_to_cpu(r2->inobt.ir_startino);
}
+#endif /* DEBUG */
-/*
- * Externally visible routines.
- */
+#ifdef XFS_BTREE_TRACE
+ktrace_t *xfs_inobt_trace_buf;
-/*
- * Decrement cursor by one record at the level.
- * For nonzero levels the leaf-ward information is untouched.
- */
-int /* error */
-xfs_inobt_decrement(
- xfs_btree_cur_t *cur, /* btree cursor */
- int level, /* level in btree, 0 is leaf */
- int *stat) /* success/failure */
+STATIC void
+xfs_inobt_trace_enter(
+ struct xfs_btree_cur *cur,
+ const char *func,
+ char *s,
+ int type,
+ int line,
+ __psunsigned_t a0,
+ __psunsigned_t a1,
+ __psunsigned_t a2,
+ __psunsigned_t a3,
+ __psunsigned_t a4,
+ __psunsigned_t a5,
+ __psunsigned_t a6,
+ __psunsigned_t a7,
+ __psunsigned_t a8,
+ __psunsigned_t a9,
+ __psunsigned_t a10)
{
- xfs_inobt_block_t *block; /* btree block */
- int error;
- int lev; /* btree level */
-
- ASSERT(level < cur->bc_nlevels);
- /*
- * Read-ahead to the left at this level.
- */
- xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
- /*
- * Decrement the ptr at this level. If we're still in the block
- * then we're done.
- */
- if (--cur->bc_ptrs[level] > 0) {
- *stat = 1;
- return 0;
- }
- /*
- * Get a pointer to the btree block.
- */
- block = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[level]);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, block, level,
- cur->bc_bufs[level])))
- return error;
-#endif
- /*
- * If we just went off the left edge of the tree, return failure.
- */
- if (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK) {
- *stat = 0;
- return 0;
- }
- /*
- * March up the tree decrementing pointers.
- * Stop when we don't go off the left edge of a block.
- */
- for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
- if (--cur->bc_ptrs[lev] > 0)
- break;
- /*
- * Read-ahead the left block, we're going to read it
- * in the next loop.
- */
- xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
- }
- /*
- * If we went off the root then we are seriously confused.
- */
- ASSERT(lev < cur->bc_nlevels);
- /*
- * Now walk back down the tree, fixing up the cursor's buffer
- * pointers and key numbers.
- */
- for (block = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]); lev > level; ) {
- xfs_agblock_t agbno; /* block number of btree block */
- xfs_buf_t *bp; /* buffer containing btree block */
-
- agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
- if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
- cur->bc_private.a.agno, agbno, 0, &bp,
- XFS_INO_BTREE_REF)))
- return error;
- lev--;
- xfs_btree_setbuf(cur, lev, bp);
- block = XFS_BUF_TO_INOBT_BLOCK(bp);
- if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
- return error;
- cur->bc_ptrs[lev] = be16_to_cpu(block->bb_numrecs);
- }
- *stat = 1;
- return 0;
+ ktrace_enter(xfs_inobt_trace_buf, (void *)(__psint_t)type,
+ (void *)func, (void *)s, NULL, (void *)cur,
+ (void *)a0, (void *)a1, (void *)a2, (void *)a3,
+ (void *)a4, (void *)a5, (void *)a6, (void *)a7,
+ (void *)a8, (void *)a9, (void *)a10);
}
-/*
- * Delete the record pointed to by cur.
- * The cursor refers to the place where the record was (could be inserted)
- * when the operation returns.
- */
-int /* error */
-xfs_inobt_delete(
- xfs_btree_cur_t *cur, /* btree cursor */
- int *stat) /* success/failure */
+STATIC void
+xfs_inobt_trace_cursor(
+ struct xfs_btree_cur *cur,
+ __uint32_t *s0,
+ __uint64_t *l0,
+ __uint64_t *l1)
{
- int error;
- int i; /* result code */
- int level; /* btree level */
-
- /*
- * Go up the tree, starting at leaf level.
- * If 2 is returned then a join was done; go to the next level.
- * Otherwise we are done.
- */
- for (level = 0, i = 2; i == 2; level++) {
- if ((error = xfs_inobt_delrec(cur, level, &i)))
- return error;
- }
- if (i == 0) {
- for (level = 1; level < cur->bc_nlevels; level++) {
- if (cur->bc_ptrs[level] == 0) {
- if ((error = xfs_inobt_decrement(cur, level, &i)))
- return error;
- break;
- }
- }
- }
- *stat = i;
- return 0;
+ *s0 = cur->bc_private.a.agno;
+ *l0 = cur->bc_rec.i.ir_startino;
+ *l1 = cur->bc_rec.i.ir_free;
}
-
-/*
- * Get the data from the pointed-to record.
- */
-int /* error */
-xfs_inobt_get_rec(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_agino_t *ino, /* output: starting inode of chunk */
- __int32_t *fcnt, /* output: number of free inodes */
- xfs_inofree_t *free, /* output: free inode mask */
- int *stat) /* output: success/failure */
+STATIC void
+xfs_inobt_trace_key(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *key,
+ __uint64_t *l0,
+ __uint64_t *l1)
{
- xfs_inobt_block_t *block; /* btree block */
- xfs_buf_t *bp; /* buffer containing btree block */
-#ifdef DEBUG
- int error; /* error return value */
-#endif
- int ptr; /* record number */
- xfs_inobt_rec_t *rec; /* record data */
-
- bp = cur->bc_bufs[0];
- ptr = cur->bc_ptrs[0];
- block = XFS_BUF_TO_INOBT_BLOCK(bp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, block, 0, bp)))
- return error;
-#endif
- /*
- * Off the right end or left end, return failure.
- */
- if (ptr > be16_to_cpu(block->bb_numrecs) || ptr <= 0) {
- *stat = 0;
- return 0;
- }
- /*
- * Point to the record and extract its data.
- */
- rec = XFS_INOBT_REC_ADDR(block, ptr, cur);
- *ino = be32_to_cpu(rec->ir_startino);
- *fcnt = be32_to_cpu(rec->ir_freecount);
- *free = be64_to_cpu(rec->ir_free);
- *stat = 1;
- return 0;
+ *l0 = be32_to_cpu(key->inobt.ir_startino);
+ *l1 = 0;
}
-/*
- * Increment cursor by one record at the level.
- * For nonzero levels the leaf-ward information is untouched.
- */
-int /* error */
-xfs_inobt_increment(
- xfs_btree_cur_t *cur, /* btree cursor */
- int level, /* level in btree, 0 is leaf */
- int *stat) /* success/failure */
+STATIC void
+xfs_inobt_trace_record(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *rec,
+ __uint64_t *l0,
+ __uint64_t *l1,
+ __uint64_t *l2)
{
- xfs_inobt_block_t *block; /* btree block */
- xfs_buf_t *bp; /* buffer containing btree block */
- int error; /* error return value */
- int lev; /* btree level */
+ *l0 = be32_to_cpu(rec->inobt.ir_startino);
+ *l1 = be32_to_cpu(rec->inobt.ir_freecount);
+ *l2 = be64_to_cpu(rec->inobt.ir_free);
+}
+#endif /* XFS_BTREE_TRACE */
+
+static const struct xfs_btree_ops xfs_inobt_ops = {
+ .rec_len = sizeof(xfs_inobt_rec_t),
+ .key_len = sizeof(xfs_inobt_key_t),
+
+ .dup_cursor = xfs_inobt_dup_cursor,
+ .set_root = xfs_inobt_set_root,
+ .kill_root = xfs_inobt_kill_root,
+ .alloc_block = xfs_inobt_alloc_block,
+ .free_block = xfs_inobt_free_block,
+ .get_minrecs = xfs_inobt_get_minrecs,
+ .get_maxrecs = xfs_inobt_get_maxrecs,
+ .init_key_from_rec = xfs_inobt_init_key_from_rec,
+ .init_rec_from_key = xfs_inobt_init_rec_from_key,
+ .init_rec_from_cur = xfs_inobt_init_rec_from_cur,
+ .init_ptr_from_cur = xfs_inobt_init_ptr_from_cur,
+ .key_diff = xfs_inobt_key_diff,
- ASSERT(level < cur->bc_nlevels);
- /*
- * Read-ahead to the right at this level.
- */
- xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
- /*
- * Get a pointer to the btree block.
- */
- bp = cur->bc_bufs[level];
- block = XFS_BUF_TO_INOBT_BLOCK(bp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
- return error;
-#endif
- /*
- * Increment the ptr at this level. If we're still in the block
- * then we're done.
- */
- if (++cur->bc_ptrs[level] <= be16_to_cpu(block->bb_numrecs)) {
- *stat = 1;
- return 0;
- }
- /*
- * If we just went off the right edge of the tree, return failure.
- */
- if (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK) {
- *stat = 0;
- return 0;
- }
- /*
- * March up the tree incrementing pointers.
- * Stop when we don't go off the right edge of a block.
- */
- for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
- bp = cur->bc_bufs[lev];
- block = XFS_BUF_TO_INOBT_BLOCK(bp);
#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
- return error;
+ .keys_inorder = xfs_inobt_keys_inorder,
+ .recs_inorder = xfs_inobt_recs_inorder,
#endif
- if (++cur->bc_ptrs[lev] <= be16_to_cpu(block->bb_numrecs))
- break;
- /*
- * Read-ahead the right block, we're going to read it
- * in the next loop.
- */
- xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
- }
- /*
- * If we went off the root then we are seriously confused.
- */
- ASSERT(lev < cur->bc_nlevels);
- /*
- * Now walk back down the tree, fixing up the cursor's buffer
- * pointers and key numbers.
- */
- for (bp = cur->bc_bufs[lev], block = XFS_BUF_TO_INOBT_BLOCK(bp);
- lev > level; ) {
- xfs_agblock_t agbno; /* block number of btree block */
- agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
- if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
- cur->bc_private.a.agno, agbno, 0, &bp,
- XFS_INO_BTREE_REF)))
- return error;
- lev--;
- xfs_btree_setbuf(cur, lev, bp);
- block = XFS_BUF_TO_INOBT_BLOCK(bp);
- if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
- return error;
- cur->bc_ptrs[lev] = 1;
- }
- *stat = 1;
- return 0;
-}
+#ifdef XFS_BTREE_TRACE
+ .trace_enter = xfs_inobt_trace_enter,
+ .trace_cursor = xfs_inobt_trace_cursor,
+ .trace_key = xfs_inobt_trace_key,
+ .trace_record = xfs_inobt_trace_record,
+#endif
+};
/*
- * Insert the current record at the point referenced by cur.
- * The cursor may be inconsistent on return if splits have been done.
+ * Allocate a new inode btree cursor.
*/
-int /* error */
-xfs_inobt_insert(
- xfs_btree_cur_t *cur, /* btree cursor */
- int *stat) /* success/failure */
+struct xfs_btree_cur * /* new inode btree cursor */
+xfs_inobt_init_cursor(
+ struct xfs_mount *mp, /* file system mount point */
+ struct xfs_trans *tp, /* transaction pointer */
+ struct xfs_buf *agbp, /* buffer for agi structure */
+ xfs_agnumber_t agno) /* allocation group number */
{
- int error; /* error return value */
- int i; /* result value, 0 for failure */
- int level; /* current level number in btree */
- xfs_agblock_t nbno; /* new block number (split result) */
- xfs_btree_cur_t *ncur; /* new cursor (split result) */
- xfs_inobt_rec_t nrec; /* record being inserted this level */
- xfs_btree_cur_t *pcur; /* previous level's cursor */
+ struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
+ struct xfs_btree_cur *cur;
- level = 0;
- nbno = NULLAGBLOCK;
- nrec.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino);
- nrec.ir_freecount = cpu_to_be32(cur->bc_rec.i.ir_freecount);
- nrec.ir_free = cpu_to_be64(cur->bc_rec.i.ir_free);
- ncur = NULL;
- pcur = cur;
- /*
- * Loop going up the tree, starting at the leaf level.
- * Stop when we don't get a split block, that must mean that
- * the insert is finished with this level.
- */
- do {
- /*
- * Insert nrec/nbno into this level of the tree.
- * Note if we fail, nbno will be null.
- */
- if ((error = xfs_inobt_insrec(pcur, level++, &nbno, &nrec, &ncur,
- &i))) {
- if (pcur != cur)
- xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
- return error;
- }
- /*
- * See if the cursor we just used is trash.
- * Can't trash the caller's cursor, but otherwise we should
- * if ncur is a new cursor or we're about to be done.
- */
- if (pcur != cur && (ncur || nbno == NULLAGBLOCK)) {
- cur->bc_nlevels = pcur->bc_nlevels;
- xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
- }
- /*
- * If we got a new cursor, switch to it.
- */
- if (ncur) {
- pcur = ncur;
- ncur = NULL;
- }
- } while (nbno != NULLAGBLOCK);
- *stat = i;
- return 0;
-}
+ cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
-/*
- * Lookup the record equal to ino in the btree given by cur.
- */
-int /* error */
-xfs_inobt_lookup_eq(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_agino_t ino, /* starting inode of chunk */
- __int32_t fcnt, /* free inode count */
- xfs_inofree_t free, /* free inode mask */
- int *stat) /* success/failure */
-{
- cur->bc_rec.i.ir_startino = ino;
- cur->bc_rec.i.ir_freecount = fcnt;
- cur->bc_rec.i.ir_free = free;
- return xfs_inobt_lookup(cur, XFS_LOOKUP_EQ, stat);
-}
+ cur->bc_tp = tp;
+ cur->bc_mp = mp;
+ cur->bc_nlevels = be32_to_cpu(agi->agi_level);
+ cur->bc_btnum = XFS_BTNUM_INO;
+ cur->bc_blocklog = mp->m_sb.sb_blocklog;
-/*
- * Lookup the first record greater than or equal to ino
- * in the btree given by cur.
- */
-int /* error */
-xfs_inobt_lookup_ge(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_agino_t ino, /* starting inode of chunk */
- __int32_t fcnt, /* free inode count */
- xfs_inofree_t free, /* free inode mask */
- int *stat) /* success/failure */
-{
- cur->bc_rec.i.ir_startino = ino;
- cur->bc_rec.i.ir_freecount = fcnt;
- cur->bc_rec.i.ir_free = free;
- return xfs_inobt_lookup(cur, XFS_LOOKUP_GE, stat);
-}
+ cur->bc_ops = &xfs_inobt_ops;
-/*
- * Lookup the first record less than or equal to ino
- * in the btree given by cur.
- */
-int /* error */
-xfs_inobt_lookup_le(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_agino_t ino, /* starting inode of chunk */
- __int32_t fcnt, /* free inode count */
- xfs_inofree_t free, /* free inode mask */
- int *stat) /* success/failure */
-{
- cur->bc_rec.i.ir_startino = ino;
- cur->bc_rec.i.ir_freecount = fcnt;
- cur->bc_rec.i.ir_free = free;
- return xfs_inobt_lookup(cur, XFS_LOOKUP_LE, stat);
+ cur->bc_private.a.agbp = agbp;
+ cur->bc_private.a.agno = agno;
+
+ return cur;
}
/*
- * Update the record referred to by cur, to the value given
- * by [ino, fcnt, free].
- * This either works (return 0) or gets an EFSCORRUPTED error.
+ * Calculate number of records in an inobt btree block.
*/
-int /* error */
-xfs_inobt_update(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_agino_t ino, /* starting inode of chunk */
- __int32_t fcnt, /* free inode count */
- xfs_inofree_t free) /* free inode mask */
+int
+xfs_inobt_maxrecs(
+ struct xfs_mount *mp,
+ int blocklen,
+ int leaf)
{
- xfs_inobt_block_t *block; /* btree block to update */
- xfs_buf_t *bp; /* buffer containing btree block */
- int error; /* error return value */
- int ptr; /* current record number (updating) */
- xfs_inobt_rec_t *rp; /* pointer to updated record */
+ blocklen -= XFS_INOBT_BLOCK_LEN(mp);
- /*
- * Pick up the current block.
- */
- bp = cur->bc_bufs[0];
- block = XFS_BUF_TO_INOBT_BLOCK(bp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, block, 0, bp)))
- return error;
-#endif
- /*
- * Get the address of the rec to be updated.
- */
- ptr = cur->bc_ptrs[0];
- rp = XFS_INOBT_REC_ADDR(block, ptr, cur);
- /*
- * Fill in the new contents and log them.
- */
- rp->ir_startino = cpu_to_be32(ino);
- rp->ir_freecount = cpu_to_be32(fcnt);
- rp->ir_free = cpu_to_be64(free);
- xfs_inobt_log_recs(cur, bp, ptr, ptr);
- /*
- * Updating first record in leaf. Pass new key value up to our parent.
- */
- if (ptr == 1) {
- xfs_inobt_key_t key; /* key containing [ino] */
-
- key.ir_startino = cpu_to_be32(ino);
- if ((error = xfs_inobt_updkey(cur, &key, 1)))
- return error;
- }
- return 0;
+ if (leaf)
+ return blocklen / sizeof(xfs_inobt_rec_t);
+ return blocklen / (sizeof(xfs_inobt_key_t) + sizeof(xfs_inobt_ptr_t));
}
diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h
index 8efc4a5b8b92..37e5dd01a577 100644
--- a/fs/xfs/xfs_ialloc_btree.h
+++ b/fs/xfs/xfs_ialloc_btree.h
@@ -24,7 +24,6 @@
struct xfs_buf;
struct xfs_btree_cur;
-struct xfs_btree_sblock;
struct xfs_mount;
/*
@@ -70,11 +69,6 @@ typedef struct xfs_inobt_key {
/* btree pointer type */
typedef __be32 xfs_inobt_ptr_t;
-/* btree block header type */
-typedef struct xfs_btree_sblock xfs_inobt_block_t;
-
-#define XFS_BUF_TO_INOBT_BLOCK(bp) ((xfs_inobt_block_t *)XFS_BUF_PTR(bp))
-
/*
* Bit manipulations for ir_free.
*/
@@ -85,14 +79,6 @@ typedef struct xfs_btree_sblock xfs_inobt_block_t;
#define XFS_INOBT_CLR_FREE(rp,i) ((rp)->ir_free &= ~XFS_INOBT_MASK(i))
/*
- * Real block structures have a size equal to the disk block size.
- */
-#define XFS_INOBT_BLOCK_MAXRECS(lev,cur) ((cur)->bc_mp->m_inobt_mxr[lev != 0])
-#define XFS_INOBT_BLOCK_MINRECS(lev,cur) ((cur)->bc_mp->m_inobt_mnr[lev != 0])
-#define XFS_INOBT_IS_LAST_REC(cur) \
- ((cur)->bc_ptrs[0] == be16_to_cpu(XFS_BUF_TO_INOBT_BLOCK((cur)->bc_bufs[0])->bb_numrecs))
-
-/*
* Maximum number of inode btree levels.
*/
#define XFS_IN_MAXLEVELS(mp) ((mp)->m_in_maxlevels)
@@ -104,75 +90,38 @@ typedef struct xfs_btree_sblock xfs_inobt_block_t;
#define XFS_PREALLOC_BLOCKS(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1))
/*
- * Record, key, and pointer address macros for btree blocks.
- */
-#define XFS_INOBT_REC_ADDR(bb,i,cur) \
- (XFS_BTREE_REC_ADDR(xfs_inobt, bb, i))
-
-#define XFS_INOBT_KEY_ADDR(bb,i,cur) \
- (XFS_BTREE_KEY_ADDR(xfs_inobt, bb, i))
-
-#define XFS_INOBT_PTR_ADDR(bb,i,cur) \
- (XFS_BTREE_PTR_ADDR(xfs_inobt, bb, \
- i, XFS_INOBT_BLOCK_MAXRECS(1, cur)))
-
-/*
- * Decrement cursor by one record at the level.
- * For nonzero levels the leaf-ward information is untouched.
- */
-extern int xfs_inobt_decrement(struct xfs_btree_cur *cur, int level, int *stat);
-
-/*
- * Delete the record pointed to by cur.
- * The cursor refers to the place where the record was (could be inserted)
- * when the operation returns.
- */
-extern int xfs_inobt_delete(struct xfs_btree_cur *cur, int *stat);
-
-/*
- * Get the data from the pointed-to record.
- */
-extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur, xfs_agino_t *ino,
- __int32_t *fcnt, xfs_inofree_t *free, int *stat);
-
-/*
- * Increment cursor by one record at the level.
- * For nonzero levels the leaf-ward information is untouched.
- */
-extern int xfs_inobt_increment(struct xfs_btree_cur *cur, int level, int *stat);
-
-/*
- * Insert the current record at the point referenced by cur.
- * The cursor may be inconsistent on return if splits have been done.
- */
-extern int xfs_inobt_insert(struct xfs_btree_cur *cur, int *stat);
-
-/*
- * Lookup the record equal to ino in the btree given by cur.
- */
-extern int xfs_inobt_lookup_eq(struct xfs_btree_cur *cur, xfs_agino_t ino,
- __int32_t fcnt, xfs_inofree_t free, int *stat);
-
-/*
- * Lookup the first record greater than or equal to ino
- * in the btree given by cur.
- */
-extern int xfs_inobt_lookup_ge(struct xfs_btree_cur *cur, xfs_agino_t ino,
- __int32_t fcnt, xfs_inofree_t free, int *stat);
-
-/*
- * Lookup the first record less than or equal to ino
- * in the btree given by cur.
+ * Btree block header size depends on a superblock flag.
+ *
+ * (not quite yet, but soon)
*/
-extern int xfs_inobt_lookup_le(struct xfs_btree_cur *cur, xfs_agino_t ino,
- __int32_t fcnt, xfs_inofree_t free, int *stat);
+#define XFS_INOBT_BLOCK_LEN(mp) XFS_BTREE_SBLOCK_LEN
/*
- * Update the record referred to by cur, to the value given
- * by [ino, fcnt, free].
- * This either works (return 0) or gets an EFSCORRUPTED error.
- */
-extern int xfs_inobt_update(struct xfs_btree_cur *cur, xfs_agino_t ino,
- __int32_t fcnt, xfs_inofree_t free);
+ * Record, key, and pointer address macros for btree blocks.
+ *
+ * (note that some of these may appear unused, but they are used in userspace)
+ */
+#define XFS_INOBT_REC_ADDR(mp, block, index) \
+ ((xfs_inobt_rec_t *) \
+ ((char *)(block) + \
+ XFS_INOBT_BLOCK_LEN(mp) + \
+ (((index) - 1) * sizeof(xfs_inobt_rec_t))))
+
+#define XFS_INOBT_KEY_ADDR(mp, block, index) \
+ ((xfs_inobt_key_t *) \
+ ((char *)(block) + \
+ XFS_INOBT_BLOCK_LEN(mp) + \
+ ((index) - 1) * sizeof(xfs_inobt_key_t)))
+
+#define XFS_INOBT_PTR_ADDR(mp, block, index, maxrecs) \
+ ((xfs_inobt_ptr_t *) \
+ ((char *)(block) + \
+ XFS_INOBT_BLOCK_LEN(mp) + \
+ (maxrecs) * sizeof(xfs_inobt_key_t) + \
+ ((index) - 1) * sizeof(xfs_inobt_ptr_t)))
+
+extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *,
+ struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t);
+extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int);
#endif /* __XFS_IALLOC_BTREE_H__ */
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index e229e9e001c2..bf4dc5eb4cfc 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -38,164 +38,122 @@
#include "xfs_ialloc.h"
#include "xfs_quota.h"
#include "xfs_utils.h"
+#include "xfs_trans_priv.h"
+#include "xfs_inode_item.h"
/*
- * Look up an inode by number in the given file system.
- * The inode is looked up in the cache held in each AG.
- * If the inode is found in the cache, attach it to the provided
- * vnode.
- *
- * If it is not in core, read it in from the file system's device,
- * add it to the cache and attach the provided vnode.
- *
- * The inode is locked according to the value of the lock_flags parameter.
- * This flag parameter indicates how and if the inode's IO lock and inode lock
- * should be taken.
- *
- * mp -- the mount point structure for the current file system. It points
- * to the inode hash table.
- * tp -- a pointer to the current transaction if there is one. This is
- * simply passed through to the xfs_iread() call.
- * ino -- the number of the inode desired. This is the unique identifier
- * within the file system for the inode being requested.
- * lock_flags -- flags indicating how to lock the inode. See the comment
- * for xfs_ilock() for a list of valid values.
- * bno -- the block number starting the buffer containing the inode,
- * if known (as by bulkstat), else 0.
+ * Check the validity of the inode we just found it the cache
*/
-STATIC int
-xfs_iget_core(
- struct inode *inode,
- xfs_mount_t *mp,
- xfs_trans_t *tp,
- xfs_ino_t ino,
- uint flags,
- uint lock_flags,
- xfs_inode_t **ipp,
- xfs_daddr_t bno)
+static int
+xfs_iget_cache_hit(
+ struct xfs_perag *pag,
+ struct xfs_inode *ip,
+ int flags,
+ int lock_flags) __releases(pag->pag_ici_lock)
{
- struct inode *old_inode;
- xfs_inode_t *ip;
- xfs_inode_t *iq;
- int error;
- unsigned long first_index, mask;
- xfs_perag_t *pag;
- xfs_agino_t agino;
+ struct xfs_mount *mp = ip->i_mount;
+ int error = EAGAIN;
- /* the radix tree exists only in inode capable AGs */
- if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_maxagi)
- return EINVAL;
-
- /* get the perag structure and ensure that it's inode capable */
- pag = xfs_get_perag(mp, ino);
- if (!pag->pagi_inodeok)
- return EINVAL;
- ASSERT(pag->pag_ici_init);
- agino = XFS_INO_TO_AGINO(mp, ino);
+ /*
+ * If INEW is set this inode is being set up
+ * If IRECLAIM is set this inode is being torn down
+ * Pause and try again.
+ */
+ if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) {
+ XFS_STATS_INC(xs_ig_frecycle);
+ goto out_error;
+ }
-again:
- read_lock(&pag->pag_ici_lock);
- ip = radix_tree_lookup(&pag->pag_ici_root, agino);
+ /* If IRECLAIMABLE is set, we've torn down the vfs inode part */
+ if (xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
- if (ip != NULL) {
/*
- * If INEW is set this inode is being set up
- * we need to pause and try again.
+ * If lookup is racing with unlink, then we should return an
+ * error immediately so we don't remove it from the reclaim
+ * list and potentially leak the inode.
*/
- if (xfs_iflags_test(ip, XFS_INEW)) {
- read_unlock(&pag->pag_ici_lock);
- delay(1);
- XFS_STATS_INC(xs_ig_frecycle);
-
- goto again;
+ if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
+ error = ENOENT;
+ goto out_error;
}
- old_inode = ip->i_vnode;
- if (old_inode == NULL) {
- /*
- * If IRECLAIM is set this inode is
- * on its way out of the system,
- * we need to pause and try again.
- */
- if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
- read_unlock(&pag->pag_ici_lock);
- delay(1);
- XFS_STATS_INC(xs_ig_frecycle);
-
- goto again;
- }
- ASSERT(xfs_iflags_test(ip, XFS_IRECLAIMABLE));
-
- /*
- * If lookup is racing with unlink, then we
- * should return an error immediately so we
- * don't remove it from the reclaim list and
- * potentially leak the inode.
- */
- if ((ip->i_d.di_mode == 0) &&
- !(flags & XFS_IGET_CREATE)) {
- read_unlock(&pag->pag_ici_lock);
- xfs_put_perag(mp, pag);
- return ENOENT;
- }
-
- xfs_itrace_exit_tag(ip, "xfs_iget.alloc");
-
- XFS_STATS_INC(xs_ig_found);
- xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
- read_unlock(&pag->pag_ici_lock);
-
- XFS_MOUNT_ILOCK(mp);
- list_del_init(&ip->i_reclaim);
- XFS_MOUNT_IUNLOCK(mp);
-
- goto finish_inode;
-
- } else if (inode != old_inode) {
- /* The inode is being torn down, pause and
- * try again.
- */
- if (old_inode->i_state & (I_FREEING | I_CLEAR)) {
- read_unlock(&pag->pag_ici_lock);
- delay(1);
- XFS_STATS_INC(xs_ig_frecycle);
-
- goto again;
- }
-/* Chances are the other vnode (the one in the inode) is being torn
-* down right now, and we landed on top of it. Question is, what do
-* we do? Unhook the old inode and hook up the new one?
-*/
- cmn_err(CE_PANIC,
- "xfs_iget_core: ambiguous vns: vp/0x%p, invp/0x%p",
- old_inode, inode);
- }
+ xfs_itrace_exit_tag(ip, "xfs_iget.alloc");
/*
- * Inode cache hit
+ * We need to re-initialise the VFS inode as it has been
+ * 'freed' by the VFS. Do this here so we can deal with
+ * errors cleanly, then tag it so it can be set up correctly
+ * later.
*/
- read_unlock(&pag->pag_ici_lock);
- XFS_STATS_INC(xs_ig_found);
-
-finish_inode:
- if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
- xfs_put_perag(mp, pag);
- return ENOENT;
+ if (!inode_init_always(mp->m_super, VFS_I(ip))) {
+ error = ENOMEM;
+ goto out_error;
}
- if (lock_flags != 0)
- xfs_ilock(ip, lock_flags);
+ /*
+ * We must set the XFS_INEW flag before clearing the
+ * XFS_IRECLAIMABLE flag so that if a racing lookup does
+ * not find the XFS_IRECLAIMABLE above but has the igrab()
+ * below succeed we can safely check XFS_INEW to detect
+ * that this inode is still being initialised.
+ */
+ xfs_iflags_set(ip, XFS_INEW);
+ xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
+
+ /* clear the radix tree reclaim flag as well. */
+ __xfs_inode_clear_reclaim_tag(mp, pag, ip);
+ } else if (!igrab(VFS_I(ip))) {
+ /* If the VFS inode is being torn down, pause and try again. */
+ XFS_STATS_INC(xs_ig_frecycle);
+ goto out_error;
+ } else if (xfs_iflags_test(ip, XFS_INEW)) {
+ /*
+ * We are racing with another cache hit that is
+ * currently recycling this inode out of the XFS_IRECLAIMABLE
+ * state. Wait for the initialisation to complete before
+ * continuing.
+ */
+ wait_on_inode(VFS_I(ip));
+ }
- xfs_iflags_clear(ip, XFS_ISTALE);
- xfs_itrace_exit_tag(ip, "xfs_iget.found");
- goto return_ip;
+ if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
+ error = ENOENT;
+ iput(VFS_I(ip));
+ goto out_error;
}
- /*
- * Inode cache miss
- */
+ /* We've got a live one. */
+ read_unlock(&pag->pag_ici_lock);
+
+ if (lock_flags != 0)
+ xfs_ilock(ip, lock_flags);
+
+ xfs_iflags_clear(ip, XFS_ISTALE);
+ xfs_itrace_exit_tag(ip, "xfs_iget.found");
+ XFS_STATS_INC(xs_ig_found);
+ return 0;
+
+out_error:
read_unlock(&pag->pag_ici_lock);
- XFS_STATS_INC(xs_ig_missed);
+ return error;
+}
+
+
+static int
+xfs_iget_cache_miss(
+ struct xfs_mount *mp,
+ struct xfs_perag *pag,
+ xfs_trans_t *tp,
+ xfs_ino_t ino,
+ struct xfs_inode **ipp,
+ xfs_daddr_t bno,
+ int flags,
+ int lock_flags) __releases(pag->pag_ici_lock)
+{
+ struct xfs_inode *ip;
+ int error;
+ unsigned long first_index, mask;
+ xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino);
/*
* Read the disk inode attributes into a new inode structure and get
@@ -203,116 +161,85 @@ finish_inode:
*/
error = xfs_iread(mp, tp, ino, &ip, bno,
(flags & XFS_IGET_BULKSTAT) ? XFS_IMAP_BULKSTAT : 0);
- if (error) {
- xfs_put_perag(mp, pag);
+ if (error)
return error;
- }
xfs_itrace_exit_tag(ip, "xfs_iget.alloc");
-
- mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
- "xfsino", ip->i_ino);
- mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
- init_waitqueue_head(&ip->i_ipin_wait);
- atomic_set(&ip->i_pincount, 0);
-
- /*
- * Because we want to use a counting completion, complete
- * the flush completion once to allow a single access to
- * the flush completion without blocking.
- */
- init_completion(&ip->i_flush);
- complete(&ip->i_flush);
+ if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
+ error = ENOENT;
+ goto out_destroy;
+ }
if (lock_flags)
xfs_ilock(ip, lock_flags);
- if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
- xfs_idestroy(ip);
- xfs_put_perag(mp, pag);
- return ENOENT;
- }
-
/*
* Preload the radix tree so we can insert safely under the
- * write spinlock.
+ * write spinlock. Note that we cannot sleep inside the preload
+ * region.
*/
if (radix_tree_preload(GFP_KERNEL)) {
- xfs_idestroy(ip);
- delay(1);
- goto again;
+ error = EAGAIN;
+ goto out_unlock;
}
+
mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
first_index = agino & mask;
write_lock(&pag->pag_ici_lock);
- /*
- * insert the new inode
- */
+
+ /* insert the new inode */
error = radix_tree_insert(&pag->pag_ici_root, agino, ip);
if (unlikely(error)) {
- BUG_ON(error != -EEXIST);
- write_unlock(&pag->pag_ici_lock);
- radix_tree_preload_end();
- xfs_idestroy(ip);
+ WARN_ON(error != -EEXIST);
XFS_STATS_INC(xs_ig_dup);
- goto again;
+ error = EAGAIN;
+ goto out_preload_end;
}
- /*
- * These values _must_ be set before releasing the radix tree lock!
- */
+ /* These values _must_ be set before releasing the radix tree lock! */
ip->i_udquot = ip->i_gdquot = NULL;
xfs_iflags_set(ip, XFS_INEW);
write_unlock(&pag->pag_ici_lock);
radix_tree_preload_end();
-
- /*
- * Link ip to its mount and thread it on the mount's inode list.
- */
- XFS_MOUNT_ILOCK(mp);
- if ((iq = mp->m_inodes)) {
- ASSERT(iq->i_mprev->i_mnext == iq);
- ip->i_mprev = iq->i_mprev;
- iq->i_mprev->i_mnext = ip;
- iq->i_mprev = ip;
- ip->i_mnext = iq;
- } else {
- ip->i_mnext = ip;
- ip->i_mprev = ip;
- }
- mp->m_inodes = ip;
-
- XFS_MOUNT_IUNLOCK(mp);
- xfs_put_perag(mp, pag);
-
- return_ip:
- ASSERT(ip->i_df.if_ext_max ==
- XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t));
-
- xfs_iflags_set(ip, XFS_IMODIFIED);
*ipp = ip;
-
- /*
- * Set up the Linux with the Linux inode.
- */
- ip->i_vnode = inode;
- inode->i_private = ip;
-
- /*
- * If we have a real type for an on-disk inode, we can set ops(&unlock)
- * now. If it's a new inode being created, xfs_ialloc will handle it.
- */
- if (ip->i_d.di_mode != 0)
- xfs_setup_inode(ip);
return 0;
-}
+out_preload_end:
+ write_unlock(&pag->pag_ici_lock);
+ radix_tree_preload_end();
+out_unlock:
+ if (lock_flags)
+ xfs_iunlock(ip, lock_flags);
+out_destroy:
+ xfs_destroy_inode(ip);
+ return error;
+}
/*
- * The 'normal' internal xfs_iget, if needed it will
- * 'allocate', or 'get', the vnode.
+ * Look up an inode by number in the given file system.
+ * The inode is looked up in the cache held in each AG.
+ * If the inode is found in the cache, initialise the vfs inode
+ * if necessary.
+ *
+ * If it is not in core, read it in from the file system's device,
+ * add it to the cache and initialise the vfs inode.
+ *
+ * The inode is locked according to the value of the lock_flags parameter.
+ * This flag parameter indicates how and if the inode's IO lock and inode lock
+ * should be taken.
+ *
+ * mp -- the mount point structure for the current file system. It points
+ * to the inode hash table.
+ * tp -- a pointer to the current transaction if there is one. This is
+ * simply passed through to the xfs_iread() call.
+ * ino -- the number of the inode desired. This is the unique identifier
+ * within the file system for the inode being requested.
+ * lock_flags -- flags indicating how to lock the inode. See the comment
+ * for xfs_ilock() for a list of valid values.
+ * bno -- the block number starting the buffer containing the inode,
+ * if known (as by bulkstat), else 0.
*/
int
xfs_iget(
@@ -324,61 +251,65 @@ xfs_iget(
xfs_inode_t **ipp,
xfs_daddr_t bno)
{
- struct inode *inode;
xfs_inode_t *ip;
int error;
+ xfs_perag_t *pag;
+ xfs_agino_t agino;
- XFS_STATS_INC(xs_ig_attempts);
-
-retry:
- inode = iget_locked(mp->m_super, ino);
- if (!inode)
- /* If we got no inode we are out of memory */
- return ENOMEM;
-
- if (inode->i_state & I_NEW) {
- XFS_STATS_INC(vn_active);
- XFS_STATS_INC(vn_alloc);
-
- error = xfs_iget_core(inode, mp, tp, ino, flags,
- lock_flags, ipp, bno);
- if (error) {
- make_bad_inode(inode);
- if (inode->i_state & I_NEW)
- unlock_new_inode(inode);
- iput(inode);
- }
- return error;
+ /* the radix tree exists only in inode capable AGs */
+ if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_maxagi)
+ return EINVAL;
+
+ /* get the perag structure and ensure that it's inode capable */
+ pag = xfs_get_perag(mp, ino);
+ if (!pag->pagi_inodeok)
+ return EINVAL;
+ ASSERT(pag->pag_ici_init);
+ agino = XFS_INO_TO_AGINO(mp, ino);
+
+again:
+ error = 0;
+ read_lock(&pag->pag_ici_lock);
+ ip = radix_tree_lookup(&pag->pag_ici_root, agino);
+
+ if (ip) {
+ error = xfs_iget_cache_hit(pag, ip, flags, lock_flags);
+ if (error)
+ goto out_error_or_again;
+ } else {
+ read_unlock(&pag->pag_ici_lock);
+ XFS_STATS_INC(xs_ig_missed);
+
+ error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, bno,
+ flags, lock_flags);
+ if (error)
+ goto out_error_or_again;
}
+ xfs_put_perag(mp, pag);
+
+ xfs_iflags_set(ip, XFS_IMODIFIED);
+ *ipp = ip;
+ ASSERT(ip->i_df.if_ext_max ==
+ XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t));
/*
- * If the inode is not fully constructed due to
- * filehandle mismatches wait for the inode to go
- * away and try again.
- *
- * iget_locked will call __wait_on_freeing_inode
- * to wait for the inode to go away.
+ * If we have a real type for an on-disk inode, we can set ops(&unlock)
+ * now. If it's a new inode being created, xfs_ialloc will handle it.
*/
- if (is_bad_inode(inode)) {
- iput(inode);
- delay(1);
- goto retry;
- }
+ if (xfs_iflags_test(ip, XFS_INEW) && ip->i_d.di_mode != 0)
+ xfs_setup_inode(ip);
+ return 0;
- ip = XFS_I(inode);
- if (!ip) {
- iput(inode);
+out_error_or_again:
+ if (error == EAGAIN) {
delay(1);
- goto retry;
+ goto again;
}
-
- if (lock_flags != 0)
- xfs_ilock(ip, lock_flags);
- XFS_STATS_INC(xs_ig_found);
- *ipp = ip;
- return 0;
+ xfs_put_perag(mp, pag);
+ return error;
}
+
/*
* Look for the inode corresponding to the given ino in the hash table.
* If it is there and its i_transp pointer matches tp, return it.
@@ -462,14 +393,13 @@ xfs_ireclaim(xfs_inode_t *ip)
xfs_iextract(ip);
/*
- * Here we do a spurious inode lock in order to coordinate with
- * xfs_sync(). This is because xfs_sync() references the inodes
- * in the mount list without taking references on the corresponding
- * vnodes. We make that OK here by ensuring that we wait until
- * the inode is unlocked in xfs_sync() before we go ahead and
- * free it. We get both the regular lock and the io lock because
- * the xfs_sync() code may need to drop the regular one but will
- * still hold the io lock.
+ * Here we do a spurious inode lock in order to coordinate with inode
+ * cache radix tree lookups. This is because the lookup can reference
+ * the inodes in the cache without taking references. We make that OK
+ * here by ensuring that we wait until the inode is unlocked after the
+ * lookup before we go ahead and free it. We get both the ilock and
+ * the iolock because the code may need to drop the ilock one but will
+ * still hold the iolock.
*/
xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
@@ -480,14 +410,6 @@ xfs_ireclaim(xfs_inode_t *ip)
XFS_QM_DQDETACH(ip->i_mount, ip);
/*
- * Pull our behavior descriptor from the vnode chain.
- */
- if (ip->i_vnode) {
- ip->i_vnode->i_private = NULL;
- ip->i_vnode = NULL;
- }
-
- /*
* Free all memory associated with the inode.
*/
xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
@@ -505,38 +427,13 @@ xfs_iextract(
{
xfs_mount_t *mp = ip->i_mount;
xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);
- xfs_inode_t *iq;
write_lock(&pag->pag_ici_lock);
radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino));
write_unlock(&pag->pag_ici_lock);
xfs_put_perag(mp, pag);
- /*
- * Remove from mount's inode list.
- */
- XFS_MOUNT_ILOCK(mp);
- ASSERT((ip->i_mnext != NULL) && (ip->i_mprev != NULL));
- iq = ip->i_mnext;
- iq->i_mprev = ip->i_mprev;
- ip->i_mprev->i_mnext = iq;
-
- /*
- * Fix up the head pointer if it points to the inode being deleted.
- */
- if (mp->m_inodes == ip) {
- if (ip == iq) {
- mp->m_inodes = NULL;
- } else {
- mp->m_inodes = iq;
- }
- }
-
- /* Deal with the deleted inodes list */
- list_del_init(&ip->i_reclaim);
-
mp->m_ireclaims++;
- XFS_MOUNT_IUNLOCK(mp);
}
/*
@@ -737,7 +634,7 @@ xfs_iunlock(
* it is in the AIL and anyone is waiting on it. Don't do
* this if the caller has asked us not to.
*/
- xfs_trans_unlocked_item(ip->i_mount,
+ xfs_trans_unlocked_item(ip->i_itemp->ili_item.li_ailp,
(xfs_log_item_t*)(ip->i_itemp));
}
xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address);
diff --git a/fs/xfs/xfs_imap.h b/fs/xfs/xfs_imap.h
index d36450003983..f9ce62890ea5 100644
--- a/fs/xfs/xfs_imap.h
+++ b/fs/xfs/xfs_imap.h
@@ -30,11 +30,9 @@ typedef struct xfs_imap {
ushort im_boffset; /* inode offset in block in bytes */
} xfs_imap_t;
-#ifdef __KERNEL__
struct xfs_mount;
struct xfs_trans;
int xfs_imap(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
xfs_imap_t *, uint);
-#endif
#endif /* __XFS_IMAP_H__ */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index a391b955df01..b97710047062 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -41,6 +41,7 @@
#include "xfs_buf_item.h"
#include "xfs_inode_item.h"
#include "xfs_btree.h"
+#include "xfs_btree_trace.h"
#include "xfs_alloc.h"
#include "xfs_ialloc.h"
#include "xfs_bmap.h"
@@ -221,25 +222,26 @@ xfs_imap_to_bp(
* Use xfs_imap() to determine the size and location of the
* buffer to read from disk.
*/
-STATIC int
+int
xfs_inotobp(
xfs_mount_t *mp,
xfs_trans_t *tp,
xfs_ino_t ino,
xfs_dinode_t **dipp,
xfs_buf_t **bpp,
- int *offset)
+ int *offset,
+ uint imap_flags)
{
xfs_imap_t imap;
xfs_buf_t *bp;
int error;
imap.im_blkno = 0;
- error = xfs_imap(mp, tp, ino, &imap, XFS_IMAP_LOOKUP);
+ error = xfs_imap(mp, tp, ino, &imap, imap_flags | XFS_IMAP_LOOKUP);
if (error)
return error;
- error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, 0);
+ error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, imap_flags);
if (error)
return error;
@@ -621,7 +623,7 @@ xfs_iformat_btree(
ifp = XFS_IFORK_PTR(ip, whichfork);
dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
size = XFS_BMAP_BROOT_SPACE(dfp);
- nrecs = XFS_BMAP_BROOT_NUMRECS(dfp);
+ nrecs = be16_to_cpu(dfp->bb_numrecs);
/*
* blow out if -- fork has less extents than can fit in
@@ -649,8 +651,9 @@ xfs_iformat_btree(
* Copy and convert from the on-disk structure
* to the in-memory structure.
*/
- xfs_bmdr_to_bmbt(dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),
- ifp->if_broot, size);
+ xfs_bmdr_to_bmbt(ip->i_mount, dfp,
+ XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),
+ ifp->if_broot, size);
ifp->if_flags &= ~XFS_IFEXTENTS;
ifp->if_flags |= XFS_IFBROOT;
@@ -788,51 +791,56 @@ xfs_dic2xflags(
}
/*
- * Given a mount structure and an inode number, return a pointer
- * to a newly allocated in-core inode corresponding to the given
- * inode number.
- *
- * Initialize the inode's attributes and extent pointers if it
- * already has them (it will not if the inode has no links).
+ * Allocate and initialise an xfs_inode.
*/
-int
-xfs_iread(
- xfs_mount_t *mp,
- xfs_trans_t *tp,
- xfs_ino_t ino,
- xfs_inode_t **ipp,
- xfs_daddr_t bno,
- uint imap_flags)
+STATIC struct xfs_inode *
+xfs_inode_alloc(
+ struct xfs_mount *mp,
+ xfs_ino_t ino)
{
- xfs_buf_t *bp;
- xfs_dinode_t *dip;
- xfs_inode_t *ip;
- int error;
+ struct xfs_inode *ip;
- ASSERT(xfs_inode_zone != NULL);
+ /*
+ * if this didn't occur in transactions, we could use
+ * KM_MAYFAIL and return NULL here on ENOMEM. Set the
+ * code up to do this anyway.
+ */
+ ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP);
+ if (!ip)
+ return NULL;
- ip = kmem_zone_zalloc(xfs_inode_zone, KM_SLEEP);
- ip->i_ino = ino;
- ip->i_mount = mp;
- atomic_set(&ip->i_iocount, 0);
- spin_lock_init(&ip->i_flags_lock);
+ ASSERT(atomic_read(&ip->i_iocount) == 0);
+ ASSERT(atomic_read(&ip->i_pincount) == 0);
+ ASSERT(!spin_is_locked(&ip->i_flags_lock));
+ ASSERT(completion_done(&ip->i_flush));
/*
- * Get pointer's to the on-disk inode and the buffer containing it.
- * If the inode number refers to a block outside the file system
- * then xfs_itobp() will return NULL. In this case we should
- * return NULL as well. Set i_blkno to 0 so that xfs_itobp() will
- * know that this is a new incore inode.
+ * initialise the VFS inode here to get failures
+ * out of the way early.
*/
- error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags, XFS_BUF_LOCK);
- if (error) {
+ if (!inode_init_always(mp->m_super, VFS_I(ip))) {
kmem_zone_free(xfs_inode_zone, ip);
- return error;
+ return NULL;
}
+ /* initialise the xfs inode */
+ ip->i_ino = ino;
+ ip->i_mount = mp;
+ ip->i_blkno = 0;
+ ip->i_len = 0;
+ ip->i_boffset =0;
+ ip->i_afp = NULL;
+ memset(&ip->i_df, 0, sizeof(xfs_ifork_t));
+ ip->i_flags = 0;
+ ip->i_update_core = 0;
+ ip->i_update_size = 0;
+ ip->i_delayed_blks = 0;
+ memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
+ ip->i_size = 0;
+ ip->i_new_size = 0;
+
/*
* Initialize inode's trace buffers.
- * Do this before xfs_iformat in case it adds entries.
*/
#ifdef XFS_INODE_TRACE
ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS);
@@ -840,7 +848,7 @@ xfs_iread(
#ifdef XFS_BMAP_TRACE
ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS);
#endif
-#ifdef XFS_BMBT_TRACE
+#ifdef XFS_BTREE_TRACE
ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS);
#endif
#ifdef XFS_RW_TRACE
@@ -853,13 +861,51 @@ xfs_iread(
ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS);
#endif
+ return ip;
+}
+
+/*
+ * Given a mount structure and an inode number, return a pointer
+ * to a newly allocated in-core inode corresponding to the given
+ * inode number.
+ *
+ * Initialize the inode's attributes and extent pointers if it
+ * already has them (it will not if the inode has no links).
+ */
+int
+xfs_iread(
+ xfs_mount_t *mp,
+ xfs_trans_t *tp,
+ xfs_ino_t ino,
+ xfs_inode_t **ipp,
+ xfs_daddr_t bno,
+ uint imap_flags)
+{
+ xfs_buf_t *bp;
+ xfs_dinode_t *dip;
+ xfs_inode_t *ip;
+ int error;
+
+ ip = xfs_inode_alloc(mp, ino);
+ if (!ip)
+ return ENOMEM;
+
+ /*
+ * Get pointer's to the on-disk inode and the buffer containing it.
+ * If the inode number refers to a block outside the file system
+ * then xfs_itobp() will return NULL. In this case we should
+ * return NULL as well. Set i_blkno to 0 so that xfs_itobp() will
+ * know that this is a new incore inode.
+ */
+ error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags, XFS_BUF_LOCK);
+ if (error)
+ goto out_destroy_inode;
+
/*
* If we got something that isn't an inode it means someone
* (nfs or dmi) has a stale handle.
*/
if (be16_to_cpu(dip->di_core.di_magic) != XFS_DINODE_MAGIC) {
- kmem_zone_free(xfs_inode_zone, ip);
- xfs_trans_brelse(tp, bp);
#ifdef DEBUG
xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: "
"dip->di_core.di_magic (0x%x) != "
@@ -867,7 +913,8 @@ xfs_iread(
be16_to_cpu(dip->di_core.di_magic),
XFS_DINODE_MAGIC);
#endif /* DEBUG */
- return XFS_ERROR(EINVAL);
+ error = XFS_ERROR(EINVAL);
+ goto out_brelse;
}
/*
@@ -881,14 +928,12 @@ xfs_iread(
xfs_dinode_from_disk(&ip->i_d, &dip->di_core);
error = xfs_iformat(ip, dip);
if (error) {
- kmem_zone_free(xfs_inode_zone, ip);
- xfs_trans_brelse(tp, bp);
#ifdef DEBUG
xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: "
"xfs_iformat() returned error %d",
error);
#endif /* DEBUG */
- return error;
+ goto out_brelse;
}
} else {
ip->i_d.di_magic = be16_to_cpu(dip->di_core.di_magic);
@@ -911,8 +956,6 @@ xfs_iread(
XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
}
- INIT_LIST_HEAD(&ip->i_reclaim);
-
/*
* The inode format changed when we moved the link count and
* made it 32 bits long. If this is an old format inode,
@@ -956,6 +999,12 @@ xfs_iread(
xfs_trans_brelse(tp, bp);
*ipp = ip;
return 0;
+
+ out_brelse:
+ xfs_trans_brelse(tp, bp);
+ out_destroy_inode:
+ xfs_destroy_inode(ip);
+ return error;
}
/*
@@ -1049,6 +1098,7 @@ xfs_ialloc(
uint flags;
int error;
timespec_t tv;
+ int filestreams = 0;
/*
* Call the space management code to pick
@@ -1056,9 +1106,8 @@ xfs_ialloc(
*/
error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc,
ialloc_context, call_again, &ino);
- if (error != 0) {
+ if (error)
return error;
- }
if (*call_again || ino == NULLFSINO) {
*ipp = NULL;
return 0;
@@ -1072,9 +1121,8 @@ xfs_ialloc(
*/
error = xfs_trans_iget(tp->t_mountp, tp, ino,
XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip);
- if (error != 0) {
+ if (error)
return error;
- }
ASSERT(ip != NULL);
ip->i_d.di_mode = (__uint16_t)mode;
@@ -1155,13 +1203,12 @@ xfs_ialloc(
flags |= XFS_ILOG_DEV;
break;
case S_IFREG:
- if (pip && xfs_inode_is_filestream(pip)) {
- error = xfs_filestream_associate(pip, ip);
- if (error < 0)
- return -error;
- if (!error)
- xfs_iflags_set(ip, XFS_IFILESTREAM);
- }
+ /*
+ * we can't set up filestreams until after the VFS inode
+ * is set up properly.
+ */
+ if (pip && xfs_inode_is_filestream(pip))
+ filestreams = 1;
/* fall through */
case S_IFDIR:
if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
@@ -1227,6 +1274,15 @@ xfs_ialloc(
/* now that we have an i_mode we can setup inode ops and unlock */
xfs_setup_inode(ip);
+ /* now we have set up the vfs inode we can associate the filestream */
+ if (filestreams) {
+ error = xfs_filestream_associate(pip, ip);
+ if (error < 0)
+ return -error;
+ if (!error)
+ xfs_iflags_set(ip, XFS_IFILESTREAM);
+ }
+
*ipp = ip;
return 0;
}
@@ -1726,8 +1782,14 @@ xfs_itruncate_finish(
xfs_trans_ijoin(ntp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
xfs_trans_ihold(ntp, ip);
- if (!error)
- error = xfs_trans_reserve(ntp, 0,
+ if (error)
+ return error;
+ /*
+ * transaction commit worked ok so we can drop the extra ticket
+ * reference that we gained in xfs_trans_dup()
+ */
+ xfs_log_ticket_put(ntp->t_ticket);
+ error = xfs_trans_reserve(ntp, 0,
XFS_ITRUNCATE_LOG_RES(mp), 0,
XFS_TRANS_PERM_LOG_RES,
XFS_ITRUNCATE_LOG_COUNT);
@@ -1992,7 +2054,7 @@ xfs_iunlink_remove(
}
next_ino = XFS_AGINO_TO_INO(mp, agno, next_agino);
error = xfs_inotobp(mp, tp, next_ino, &last_dip,
- &last_ibp, &last_offset);
+ &last_ibp, &last_offset, 0);
if (error) {
cmn_err(CE_WARN,
"xfs_iunlink_remove: xfs_inotobp() returned an error %d on %s. Returning error.",
@@ -2160,9 +2222,9 @@ xfs_ifree_cluster(
iip = (xfs_inode_log_item_t *)lip;
ASSERT(iip->ili_logged == 1);
lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done;
- spin_lock(&mp->m_ail_lock);
- iip->ili_flush_lsn = iip->ili_item.li_lsn;
- spin_unlock(&mp->m_ail_lock);
+ xfs_trans_ail_copy_lsn(mp->m_ail,
+ &iip->ili_flush_lsn,
+ &iip->ili_item.li_lsn);
xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
pre_flushed++;
}
@@ -2183,9 +2245,8 @@ xfs_ifree_cluster(
iip->ili_last_fields = iip->ili_format.ilf_fields;
iip->ili_format.ilf_fields = 0;
iip->ili_logged = 1;
- spin_lock(&mp->m_ail_lock);
- iip->ili_flush_lsn = iip->ili_item.li_lsn;
- spin_unlock(&mp->m_ail_lock);
+ xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
+ &iip->ili_item.li_lsn);
xfs_buf_attach_iodone(bp,
(void(*)(xfs_buf_t*,xfs_log_item_t*))
@@ -2312,9 +2373,10 @@ xfs_iroot_realloc(
int rec_diff,
int whichfork)
{
+ struct xfs_mount *mp = ip->i_mount;
int cur_max;
xfs_ifork_t *ifp;
- xfs_bmbt_block_t *new_broot;
+ struct xfs_btree_block *new_broot;
int new_max;
size_t new_size;
char *np;
@@ -2335,8 +2397,7 @@ xfs_iroot_realloc(
*/
if (ifp->if_broot_bytes == 0) {
new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff);
- ifp->if_broot = (xfs_bmbt_block_t*)kmem_alloc(new_size,
- KM_SLEEP);
+ ifp->if_broot = kmem_alloc(new_size, KM_SLEEP);
ifp->if_broot_bytes = (int)new_size;
return;
}
@@ -2347,18 +2408,16 @@ xfs_iroot_realloc(
* location. The records don't change location because
* they are kept butted up against the btree block header.
*/
- cur_max = XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes);
+ cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
new_max = cur_max + rec_diff;
new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max);
- ifp->if_broot = (xfs_bmbt_block_t *)
- kmem_realloc(ifp->if_broot,
- new_size,
+ ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
(size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */
KM_SLEEP);
- op = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1,
- ifp->if_broot_bytes);
- np = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1,
- (int)new_size);
+ op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
+ ifp->if_broot_bytes);
+ np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
+ (int)new_size);
ifp->if_broot_bytes = (int)new_size;
ASSERT(ifp->if_broot_bytes <=
XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ);
@@ -2372,7 +2431,7 @@ xfs_iroot_realloc(
* records, just get rid of the root and clear the status bit.
*/
ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0));
- cur_max = XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes);
+ cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
new_max = cur_max + rec_diff;
ASSERT(new_max >= 0);
if (new_max > 0)
@@ -2380,11 +2439,11 @@ xfs_iroot_realloc(
else
new_size = 0;
if (new_size > 0) {
- new_broot = (xfs_bmbt_block_t *)kmem_alloc(new_size, KM_SLEEP);
+ new_broot = kmem_alloc(new_size, KM_SLEEP);
/*
* First copy over the btree block header.
*/
- memcpy(new_broot, ifp->if_broot, sizeof(xfs_bmbt_block_t));
+ memcpy(new_broot, ifp->if_broot, XFS_BTREE_LBLOCK_LEN);
} else {
new_broot = NULL;
ifp->if_flags &= ~XFS_IFBROOT;
@@ -2397,18 +2456,16 @@ xfs_iroot_realloc(
/*
* First copy the records.
*/
- op = (char *)XFS_BMAP_BROOT_REC_ADDR(ifp->if_broot, 1,
- ifp->if_broot_bytes);
- np = (char *)XFS_BMAP_BROOT_REC_ADDR(new_broot, 1,
- (int)new_size);
+ op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1);
+ np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1);
memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t));
/*
* Then copy the pointers.
*/
- op = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1,
+ op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
ifp->if_broot_bytes);
- np = (char *)XFS_BMAP_BROOT_PTR_ADDR(new_broot, 1,
+ np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1,
(int)new_size);
memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t));
}
@@ -2617,6 +2674,10 @@ xfs_idestroy_fork(
* It must free the inode itself and any buffers allocated for
* if_extents/if_data and if_broot. It must also free the lock
* associated with the inode.
+ *
+ * Note: because we don't initialise everything on reallocation out
+ * of the zone, we must ensure we nullify everything correctly before
+ * freeing the structure.
*/
void
xfs_idestroy(
@@ -2631,8 +2692,6 @@ xfs_idestroy(
}
if (ip->i_afp)
xfs_idestroy_fork(ip, XFS_ATTR_FORK);
- mrfree(&ip->i_lock);
- mrfree(&ip->i_iolock);
#ifdef XFS_INODE_TRACE
ktrace_free(ip->i_trace);
@@ -2640,7 +2699,7 @@ xfs_idestroy(
#ifdef XFS_BMAP_TRACE
ktrace_free(ip->i_xtrace);
#endif
-#ifdef XFS_BMBT_TRACE
+#ifdef XFS_BTREE_TRACE
ktrace_free(ip->i_btrace);
#endif
#ifdef XFS_RW_TRACE
@@ -2658,20 +2717,26 @@ xfs_idestroy(
* inode still in the AIL. If it is there, we should remove
* it to prevent a use-after-free from occurring.
*/
- xfs_mount_t *mp = ip->i_mount;
xfs_log_item_t *lip = &ip->i_itemp->ili_item;
+ struct xfs_ail *ailp = lip->li_ailp;
ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) ||
XFS_FORCED_SHUTDOWN(ip->i_mount));
if (lip->li_flags & XFS_LI_IN_AIL) {
- spin_lock(&mp->m_ail_lock);
+ spin_lock(&ailp->xa_lock);
if (lip->li_flags & XFS_LI_IN_AIL)
- xfs_trans_delete_ail(mp, lip);
+ xfs_trans_ail_delete(ailp, lip);
else
- spin_unlock(&mp->m_ail_lock);
+ spin_unlock(&ailp->xa_lock);
}
xfs_inode_item_destroy(ip);
+ ip->i_itemp = NULL;
}
+ /* asserts to verify all state is correct here */
+ ASSERT(atomic_read(&ip->i_iocount) == 0);
+ ASSERT(atomic_read(&ip->i_pincount) == 0);
+ ASSERT(!spin_is_locked(&ip->i_flags_lock));
+ ASSERT(completion_done(&ip->i_flush));
kmem_zone_free(xfs_inode_zone, ip);
}
@@ -2880,7 +2945,7 @@ xfs_iflush_fork(
ASSERT(ifp->if_broot_bytes <=
(XFS_IFORK_SIZE(ip, whichfork) +
XFS_BROOT_SIZE_ADJ));
- xfs_bmbt_to_bmdr(ifp->if_broot, ifp->if_broot_bytes,
+ xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes,
(xfs_bmdr_block_t *)cp,
XFS_DFORK_SIZE(dip, mp, whichfork));
}
@@ -3418,10 +3483,8 @@ xfs_iflush_int(
iip->ili_format.ilf_fields = 0;
iip->ili_logged = 1;
- ASSERT(sizeof(xfs_lsn_t) == 8); /* don't lock if it shrinks */
- spin_lock(&mp->m_ail_lock);
- iip->ili_flush_lsn = iip->ili_item.li_lsn;
- spin_unlock(&mp->m_ail_lock);
+ xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
+ &iip->ili_item.li_lsn);
/*
* Attach the function xfs_iflush_done to the inode's
@@ -3459,41 +3522,6 @@ corrupt_out:
}
-/*
- * Flush all inactive inodes in mp.
- */
-void
-xfs_iflush_all(
- xfs_mount_t *mp)
-{
- xfs_inode_t *ip;
-
- again:
- XFS_MOUNT_ILOCK(mp);
- ip = mp->m_inodes;
- if (ip == NULL)
- goto out;
-
- do {
- /* Make sure we skip markers inserted by sync */
- if (ip->i_mount == NULL) {
- ip = ip->i_mnext;
- continue;
- }
-
- if (!VFS_I(ip)) {
- XFS_MOUNT_IUNLOCK(mp);
- xfs_finish_reclaim(ip, 0, XFS_IFLUSH_ASYNC);
- goto again;
- }
-
- ASSERT(vn_count(VFS_I(ip)) == 0);
-
- ip = ip->i_mnext;
- } while (ip != mp->m_inodes);
- out:
- XFS_MOUNT_IUNLOCK(mp);
-}
#ifdef XFS_ILOCK_TRACE
ktrace_t *xfs_ilock_trace_buf;
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 1420c49674d7..c6e295e30471 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -20,7 +20,7 @@
struct xfs_dinode;
struct xfs_dinode_core;
-
+struct xfs_inode;
/*
* Fork identifiers.
@@ -63,7 +63,7 @@ typedef struct xfs_ext_irec {
typedef struct xfs_ifork {
int if_bytes; /* bytes in if_u1 */
int if_real_bytes; /* bytes allocated in if_u1 */
- xfs_bmbt_block_t *if_broot; /* file's incore btree root */
+ struct xfs_btree_block *if_broot; /* file's incore btree root */
short if_broot_bytes; /* bytes allocated for root */
unsigned char if_flags; /* per-fork flags */
unsigned char if_ext_max; /* max # of extent records */
@@ -84,54 +84,6 @@ typedef struct xfs_ifork {
} xfs_ifork_t;
/*
- * Flags for xfs_ichgtime().
- */
-#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */
-#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */
-
-/*
- * Per-fork incore inode flags.
- */
-#define XFS_IFINLINE 0x01 /* Inline data is read in */
-#define XFS_IFEXTENTS 0x02 /* All extent pointers are read in */
-#define XFS_IFBROOT 0x04 /* i_broot points to the bmap b-tree root */
-#define XFS_IFEXTIREC 0x08 /* Indirection array of extent blocks */
-
-/*
- * Flags for xfs_itobp(), xfs_imap() and xfs_dilocate().
- */
-#define XFS_IMAP_LOOKUP 0x1
-#define XFS_IMAP_BULKSTAT 0x2
-
-#ifdef __KERNEL__
-struct bhv_desc;
-struct cred;
-struct ktrace;
-struct xfs_buf;
-struct xfs_bmap_free;
-struct xfs_bmbt_irec;
-struct xfs_bmbt_block;
-struct xfs_inode;
-struct xfs_inode_log_item;
-struct xfs_mount;
-struct xfs_trans;
-struct xfs_dquot;
-
-#if defined(XFS_ILOCK_TRACE)
-#define XFS_ILOCK_KTRACE_SIZE 32
-extern ktrace_t *xfs_ilock_trace_buf;
-extern void xfs_ilock_trace(struct xfs_inode *, int, unsigned int, inst_t *);
-#else
-#define xfs_ilock_trace(i,n,f,ra)
-#endif
-
-typedef struct dm_attrs_s {
- __uint32_t da_dmevmask; /* DMIG event mask */
- __uint16_t da_dmstate; /* DMIG state info */
- __uint16_t da_pad; /* DMIG extra padding */
-} dm_attrs_t;
-
-/*
* This is the xfs in-core inode structure.
* Most of the on-disk inode is embedded in the i_d field.
*
@@ -191,19 +143,98 @@ typedef struct xfs_icdinode {
__uint32_t di_gen; /* generation number */
} xfs_icdinode_t;
-typedef struct {
- struct xfs_inode *ip_mnext; /* next inode in mount list */
- struct xfs_inode *ip_mprev; /* ptr to prev inode */
- struct xfs_mount *ip_mount; /* fs mount struct ptr */
-} xfs_iptr_t;
+/*
+ * Flags for xfs_ichgtime().
+ */
+#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */
+#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */
+
+/*
+ * Per-fork incore inode flags.
+ */
+#define XFS_IFINLINE 0x01 /* Inline data is read in */
+#define XFS_IFEXTENTS 0x02 /* All extent pointers are read in */
+#define XFS_IFBROOT 0x04 /* i_broot points to the bmap b-tree root */
+#define XFS_IFEXTIREC 0x08 /* Indirection array of extent blocks */
+
+/*
+ * Flags for xfs_inotobp, xfs_itobp(), xfs_imap() and xfs_dilocate().
+ */
+#define XFS_IMAP_LOOKUP 0x1
+#define XFS_IMAP_BULKSTAT 0x2
+
+/*
+ * Fork handling.
+ */
+
+#define XFS_IFORK_Q(ip) ((ip)->i_d.di_forkoff != 0)
+#define XFS_IFORK_BOFF(ip) ((int)((ip)->i_d.di_forkoff << 3))
+
+#define XFS_IFORK_PTR(ip,w) \
+ ((w) == XFS_DATA_FORK ? \
+ &(ip)->i_df : \
+ (ip)->i_afp)
+#define XFS_IFORK_DSIZE(ip) \
+ (XFS_IFORK_Q(ip) ? \
+ XFS_IFORK_BOFF(ip) : \
+ XFS_LITINO((ip)->i_mount))
+#define XFS_IFORK_ASIZE(ip) \
+ (XFS_IFORK_Q(ip) ? \
+ XFS_LITINO((ip)->i_mount) - XFS_IFORK_BOFF(ip) : \
+ 0)
+#define XFS_IFORK_SIZE(ip,w) \
+ ((w) == XFS_DATA_FORK ? \
+ XFS_IFORK_DSIZE(ip) : \
+ XFS_IFORK_ASIZE(ip))
+#define XFS_IFORK_FORMAT(ip,w) \
+ ((w) == XFS_DATA_FORK ? \
+ (ip)->i_d.di_format : \
+ (ip)->i_d.di_aformat)
+#define XFS_IFORK_FMT_SET(ip,w,n) \
+ ((w) == XFS_DATA_FORK ? \
+ ((ip)->i_d.di_format = (n)) : \
+ ((ip)->i_d.di_aformat = (n)))
+#define XFS_IFORK_NEXTENTS(ip,w) \
+ ((w) == XFS_DATA_FORK ? \
+ (ip)->i_d.di_nextents : \
+ (ip)->i_d.di_anextents)
+#define XFS_IFORK_NEXT_SET(ip,w,n) \
+ ((w) == XFS_DATA_FORK ? \
+ ((ip)->i_d.di_nextents = (n)) : \
+ ((ip)->i_d.di_anextents = (n)))
+
+
+
+#ifdef __KERNEL__
+
+struct bhv_desc;
+struct cred;
+struct ktrace;
+struct xfs_buf;
+struct xfs_bmap_free;
+struct xfs_bmbt_irec;
+struct xfs_inode_log_item;
+struct xfs_mount;
+struct xfs_trans;
+struct xfs_dquot;
+
+#if defined(XFS_ILOCK_TRACE)
+#define XFS_ILOCK_KTRACE_SIZE 32
+extern ktrace_t *xfs_ilock_trace_buf;
+extern void xfs_ilock_trace(struct xfs_inode *, int, unsigned int, inst_t *);
+#else
+#define xfs_ilock_trace(i,n,f,ra)
+#endif
+
+typedef struct dm_attrs_s {
+ __uint32_t da_dmevmask; /* DMIG event mask */
+ __uint16_t da_dmstate; /* DMIG state info */
+ __uint16_t da_pad; /* DMIG extra padding */
+} dm_attrs_t;
typedef struct xfs_inode {
/* Inode linking and identification information. */
- struct xfs_inode *i_mnext; /* next inode in mount list */
- struct xfs_inode *i_mprev; /* ptr to prev inode */
struct xfs_mount *i_mount; /* fs mount struct ptr */
- struct list_head i_reclaim; /* reclaim list */
- struct inode *i_vnode; /* vnode backpointer */
struct xfs_dquot *i_udquot; /* user dquot */
struct xfs_dquot *i_gdquot; /* group dquot */
@@ -238,6 +269,10 @@ typedef struct xfs_inode {
xfs_fsize_t i_size; /* in-memory size */
xfs_fsize_t i_new_size; /* size when write completes */
atomic_t i_iocount; /* outstanding I/O count */
+
+ /* VFS inode */
+ struct inode i_vnode; /* embedded VFS inode */
+
/* Trace buffers per inode. */
#ifdef XFS_INODE_TRACE
struct ktrace *i_trace; /* general inode trace */
@@ -245,7 +280,7 @@ typedef struct xfs_inode {
#ifdef XFS_BMAP_TRACE
struct ktrace *i_xtrace; /* inode extent list trace */
#endif
-#ifdef XFS_BMBT_TRACE
+#ifdef XFS_BTREE_TRACE
struct ktrace *i_btrace; /* inode bmap btree trace */
#endif
#ifdef XFS_RW_TRACE
@@ -265,13 +300,30 @@ typedef struct xfs_inode {
/* Convert from vfs inode to xfs inode */
static inline struct xfs_inode *XFS_I(struct inode *inode)
{
- return (struct xfs_inode *)inode->i_private;
+ return container_of(inode, struct xfs_inode, i_vnode);
}
/* convert from xfs inode to vfs inode */
static inline struct inode *VFS_I(struct xfs_inode *ip)
{
- return (struct inode *)ip->i_vnode;
+ return &ip->i_vnode;
+}
+
+/*
+ * Get rid of a partially initialized inode.
+ *
+ * We have to go through destroy_inode to make sure allocations
+ * from init_inode_always like the security data are undone.
+ *
+ * We mark the inode bad so that it takes the short cut in
+ * the reclaim path instead of going through the flush path
+ * which doesn't make sense for an inode that has never seen the
+ * light of day.
+ */
+static inline void xfs_destroy_inode(struct xfs_inode *ip)
+{
+ make_bad_inode(VFS_I(ip));
+ return destroy_inode(VFS_I(ip));
}
/*
@@ -327,50 +379,26 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
spin_unlock(&ip->i_flags_lock);
return ret;
}
-#endif /* __KERNEL__ */
-
/*
- * Fork handling.
+ * Manage the i_flush queue embedded in the inode. This completion
+ * queue synchronizes processes attempting to flush the in-core
+ * inode back to disk.
*/
+static inline void xfs_iflock(xfs_inode_t *ip)
+{
+ wait_for_completion(&ip->i_flush);
+}
-#define XFS_IFORK_Q(ip) ((ip)->i_d.di_forkoff != 0)
-#define XFS_IFORK_BOFF(ip) ((int)((ip)->i_d.di_forkoff << 3))
-
-#define XFS_IFORK_PTR(ip,w) \
- ((w) == XFS_DATA_FORK ? \
- &(ip)->i_df : \
- (ip)->i_afp)
-#define XFS_IFORK_DSIZE(ip) \
- (XFS_IFORK_Q(ip) ? \
- XFS_IFORK_BOFF(ip) : \
- XFS_LITINO((ip)->i_mount))
-#define XFS_IFORK_ASIZE(ip) \
- (XFS_IFORK_Q(ip) ? \
- XFS_LITINO((ip)->i_mount) - XFS_IFORK_BOFF(ip) : \
- 0)
-#define XFS_IFORK_SIZE(ip,w) \
- ((w) == XFS_DATA_FORK ? \
- XFS_IFORK_DSIZE(ip) : \
- XFS_IFORK_ASIZE(ip))
-#define XFS_IFORK_FORMAT(ip,w) \
- ((w) == XFS_DATA_FORK ? \
- (ip)->i_d.di_format : \
- (ip)->i_d.di_aformat)
-#define XFS_IFORK_FMT_SET(ip,w,n) \
- ((w) == XFS_DATA_FORK ? \
- ((ip)->i_d.di_format = (n)) : \
- ((ip)->i_d.di_aformat = (n)))
-#define XFS_IFORK_NEXTENTS(ip,w) \
- ((w) == XFS_DATA_FORK ? \
- (ip)->i_d.di_nextents : \
- (ip)->i_d.di_anextents)
-#define XFS_IFORK_NEXT_SET(ip,w,n) \
- ((w) == XFS_DATA_FORK ? \
- ((ip)->i_d.di_nextents = (n)) : \
- ((ip)->i_d.di_anextents = (n)))
+static inline int xfs_iflock_nowait(xfs_inode_t *ip)
+{
+ return try_wait_for_completion(&ip->i_flush);
+}
-#ifdef __KERNEL__
+static inline void xfs_ifunlock(xfs_inode_t *ip)
+{
+ complete(&ip->i_flush);
+}
/*
* In-core inode flags.
@@ -484,25 +512,15 @@ int xfs_isilocked(xfs_inode_t *, uint);
uint xfs_ilock_map_shared(xfs_inode_t *);
void xfs_iunlock_map_shared(xfs_inode_t *, uint);
void xfs_ireclaim(xfs_inode_t *);
-int xfs_finish_reclaim(xfs_inode_t *, int, int);
-int xfs_finish_reclaim_all(struct xfs_mount *, int);
/*
* xfs_inode.c prototypes.
*/
-int xfs_itobp(struct xfs_mount *, struct xfs_trans *,
- xfs_inode_t *, struct xfs_dinode **, struct xfs_buf **,
- xfs_daddr_t, uint, uint);
int xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
xfs_inode_t **, xfs_daddr_t, uint);
-int xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int);
int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t,
- xfs_nlink_t, xfs_dev_t, struct cred *, xfs_prid_t,
+ xfs_nlink_t, xfs_dev_t, cred_t *, xfs_prid_t,
int, struct xfs_buf **, boolean_t *, xfs_inode_t **);
-void xfs_dinode_from_disk(struct xfs_icdinode *,
- struct xfs_dinode_core *);
-void xfs_dinode_to_disk(struct xfs_dinode_core *,
- struct xfs_icdinode *);
uint xfs_ip2xflags(struct xfs_inode *);
uint xfs_dic2xflags(struct xfs_dinode *);
@@ -513,17 +531,12 @@ int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *,
xfs_fsize_t, int, int);
int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
-void xfs_idestroy_fork(xfs_inode_t *, int);
void xfs_idestroy(xfs_inode_t *);
-void xfs_idata_realloc(xfs_inode_t *, int, int);
void xfs_iextract(xfs_inode_t *);
void xfs_iext_realloc(xfs_inode_t *, int, int);
-void xfs_iroot_realloc(xfs_inode_t *, int, int);
void xfs_ipin(xfs_inode_t *);
void xfs_iunpin(xfs_inode_t *);
-int xfs_iextents_copy(xfs_inode_t *, xfs_bmbt_rec_t *, int);
int xfs_iflush(xfs_inode_t *, uint);
-void xfs_iflush_all(struct xfs_mount *);
void xfs_ichgtime(xfs_inode_t *, int);
xfs_fsize_t xfs_file_last_byte(xfs_inode_t *);
void xfs_lock_inodes(xfs_inode_t **, int, uint);
@@ -532,6 +545,24 @@ void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
void xfs_synchronize_atime(xfs_inode_t *);
void xfs_mark_inode_dirty_sync(xfs_inode_t *);
+#endif /* __KERNEL__ */
+
+int xfs_inotobp(struct xfs_mount *, struct xfs_trans *,
+ xfs_ino_t, struct xfs_dinode **,
+ struct xfs_buf **, int *, uint);
+int xfs_itobp(struct xfs_mount *, struct xfs_trans *,
+ struct xfs_inode *, struct xfs_dinode **,
+ struct xfs_buf **, xfs_daddr_t, uint, uint);
+void xfs_dinode_from_disk(struct xfs_icdinode *,
+ struct xfs_dinode_core *);
+void xfs_dinode_to_disk(struct xfs_dinode_core *,
+ struct xfs_icdinode *);
+void xfs_idestroy_fork(struct xfs_inode *, int);
+void xfs_idata_realloc(struct xfs_inode *, int, int);
+void xfs_iroot_realloc(struct xfs_inode *, int, int);
+int xfs_iread_extents(struct xfs_trans *, struct xfs_inode *, int);
+int xfs_iextents_copy(struct xfs_inode *, xfs_bmbt_rec_t *, int);
+
xfs_bmbt_rec_host_t *xfs_iext_get_ext(xfs_ifork_t *, xfs_extnum_t);
void xfs_iext_insert(xfs_ifork_t *, xfs_extnum_t, xfs_extnum_t,
xfs_bmbt_irec_t *);
@@ -561,7 +592,8 @@ void xfs_iext_irec_update_extoffs(xfs_ifork_t *, int, int);
#define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount))
#ifdef DEBUG
-void xfs_isize_check(struct xfs_mount *, xfs_inode_t *, xfs_fsize_t);
+void xfs_isize_check(struct xfs_mount *, struct xfs_inode *,
+ xfs_fsize_t);
#else /* DEBUG */
#define xfs_isize_check(mp, ip, isize)
#endif /* DEBUG */
@@ -576,26 +608,4 @@ extern struct kmem_zone *xfs_ifork_zone;
extern struct kmem_zone *xfs_inode_zone;
extern struct kmem_zone *xfs_ili_zone;
-/*
- * Manage the i_flush queue embedded in the inode. This completion
- * queue synchronizes processes attempting to flush the in-core
- * inode back to disk.
- */
-static inline void xfs_iflock(xfs_inode_t *ip)
-{
- wait_for_completion(&ip->i_flush);
-}
-
-static inline int xfs_iflock_nowait(xfs_inode_t *ip)
-{
- return try_wait_for_completion(&ip->i_flush);
-}
-
-static inline void xfs_ifunlock(xfs_inode_t *ip)
-{
- complete(&ip->i_flush);
-}
-
-#endif /* __KERNEL__ */
-
#endif /* __XFS_INODE_H__ */
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 97c7452e2620..aa9bf05060c6 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -932,6 +932,7 @@ xfs_inode_item_init(
iip->ili_item.li_type = XFS_LI_INODE;
iip->ili_item.li_ops = &xfs_inode_item_ops;
iip->ili_item.li_mountp = mp;
+ iip->ili_item.li_ailp = mp->m_ail;
iip->ili_inode = ip;
/*
@@ -976,9 +977,8 @@ xfs_iflush_done(
xfs_buf_t *bp,
xfs_inode_log_item_t *iip)
{
- xfs_inode_t *ip;
-
- ip = iip->ili_inode;
+ xfs_inode_t *ip = iip->ili_inode;
+ struct xfs_ail *ailp = iip->ili_item.li_ailp;
/*
* We only want to pull the item from the AIL if it is
@@ -991,15 +991,12 @@ xfs_iflush_done(
*/
if (iip->ili_logged &&
(iip->ili_item.li_lsn == iip->ili_flush_lsn)) {
- spin_lock(&ip->i_mount->m_ail_lock);
+ spin_lock(&ailp->xa_lock);
if (iip->ili_item.li_lsn == iip->ili_flush_lsn) {
- /*
- * xfs_trans_delete_ail() drops the AIL lock.
- */
- xfs_trans_delete_ail(ip->i_mount,
- (xfs_log_item_t*)iip);
+ /* xfs_trans_ail_delete() drops the AIL lock. */
+ xfs_trans_ail_delete(ailp, (xfs_log_item_t*)iip);
} else {
- spin_unlock(&ip->i_mount->m_ail_lock);
+ spin_unlock(&ailp->xa_lock);
}
}
@@ -1031,21 +1028,20 @@ void
xfs_iflush_abort(
xfs_inode_t *ip)
{
- xfs_inode_log_item_t *iip;
+ xfs_inode_log_item_t *iip = ip->i_itemp;
xfs_mount_t *mp;
iip = ip->i_itemp;
mp = ip->i_mount;
if (iip) {
+ struct xfs_ail *ailp = iip->ili_item.li_ailp;
if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
- spin_lock(&mp->m_ail_lock);
+ spin_lock(&ailp->xa_lock);
if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
- /*
- * xfs_trans_delete_ail() drops the AIL lock.
- */
- xfs_trans_delete_ail(mp, (xfs_log_item_t *)iip);
+ /* xfs_trans_ail_delete() drops the AIL lock. */
+ xfs_trans_ail_delete(ailp, (xfs_log_item_t *)iip);
} else
- spin_unlock(&mp->m_ail_lock);
+ spin_unlock(&ailp->xa_lock);
}
iip->ili_logged = 0;
/*
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 40513077ab36..1ff04cc323ad 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -112,6 +112,24 @@ typedef struct xfs_inode_log_format_64 {
#define XFS_ILI_IOLOCKED_ANY (XFS_ILI_IOLOCKED_EXCL | XFS_ILI_IOLOCKED_SHARED)
+#define XFS_ILOG_FBROOT(w) xfs_ilog_fbroot(w)
+static inline int xfs_ilog_fbroot(int w)
+{
+ return (w == XFS_DATA_FORK ? XFS_ILOG_DBROOT : XFS_ILOG_ABROOT);
+}
+
+#define XFS_ILOG_FEXT(w) xfs_ilog_fext(w)
+static inline int xfs_ilog_fext(int w)
+{
+ return (w == XFS_DATA_FORK ? XFS_ILOG_DEXT : XFS_ILOG_AEXT);
+}
+
+#define XFS_ILOG_FDATA(w) xfs_ilog_fdata(w)
+static inline int xfs_ilog_fdata(int w)
+{
+ return (w == XFS_DATA_FORK ? XFS_ILOG_DDATA : XFS_ILOG_ADATA);
+}
+
#ifdef __KERNEL__
struct xfs_buf;
@@ -148,26 +166,6 @@ typedef struct xfs_inode_log_item {
} xfs_inode_log_item_t;
-#define XFS_ILOG_FDATA(w) xfs_ilog_fdata(w)
-static inline int xfs_ilog_fdata(int w)
-{
- return (w == XFS_DATA_FORK ? XFS_ILOG_DDATA : XFS_ILOG_ADATA);
-}
-
-#endif /* __KERNEL__ */
-
-#define XFS_ILOG_FBROOT(w) xfs_ilog_fbroot(w)
-static inline int xfs_ilog_fbroot(int w)
-{
- return (w == XFS_DATA_FORK ? XFS_ILOG_DBROOT : XFS_ILOG_ABROOT);
-}
-
-#define XFS_ILOG_FEXT(w) xfs_ilog_fext(w)
-static inline int xfs_ilog_fext(int w)
-{
- return (w == XFS_DATA_FORK ? XFS_ILOG_DEXT : XFS_ILOG_AEXT);
-}
-
static inline int xfs_inode_clean(xfs_inode_t *ip)
{
return (!ip->i_itemp ||
@@ -175,9 +173,6 @@ static inline int xfs_inode_clean(xfs_inode_t *ip)
!ip->i_update_core;
}
-
-#ifdef __KERNEL__
-
extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *);
extern void xfs_inode_item_destroy(struct xfs_inode *);
extern void xfs_iflush_done(struct xfs_buf *, xfs_inode_log_item_t *);
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index cf6754a3c5b3..35118032a5d6 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -359,7 +359,6 @@ xfs_bulkstat(
int ubused; /* bytes used by formatter */
xfs_buf_t *bp; /* ptr to on-disk inode cluster buf */
xfs_dinode_t *dip; /* ptr into bp for specific inode */
- xfs_inode_t *ip; /* ptr to in-core inode struct */
/*
* Get the last inode value, see if there's nothing to do.
@@ -416,8 +415,7 @@ xfs_bulkstat(
/*
* Allocate and initialize a btree cursor for ialloc btree.
*/
- cur = xfs_btree_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_INO,
- (xfs_inode_t *)0, 0);
+ cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno);
irbp = irbuf;
irbufend = irbuf + nirbuf;
end_of_ag = 0;
@@ -472,7 +470,7 @@ xfs_bulkstat(
* In any case, increment to the next record.
*/
if (!error)
- error = xfs_inobt_increment(cur, 0, &tmp);
+ error = xfs_btree_increment(cur, 0, &tmp);
} else {
/*
* Start of ag. Lookup the first inode chunk.
@@ -539,7 +537,7 @@ xfs_bulkstat(
* Set agino to after this chunk and bump the cursor.
*/
agino = gino + XFS_INODES_PER_CHUNK;
- error = xfs_inobt_increment(cur, 0, &tmp);
+ error = xfs_btree_increment(cur, 0, &tmp);
cond_resched();
}
/*
@@ -586,6 +584,8 @@ xfs_bulkstat(
if (flags & (BULKSTAT_FG_QUICK |
BULKSTAT_FG_INLINE)) {
+ int offset;
+
ino = XFS_AGINO_TO_INO(mp, agno,
agino);
bno = XFS_AGB_TO_DADDR(mp, agno,
@@ -594,21 +594,15 @@ xfs_bulkstat(
/*
* Get the inode cluster buffer
*/
- ASSERT(xfs_inode_zone != NULL);
- ip = kmem_zone_zalloc(xfs_inode_zone,
- KM_SLEEP);
- ip->i_ino = ino;
- ip->i_mount = mp;
- spin_lock_init(&ip->i_flags_lock);
if (bp)
xfs_buf_relse(bp);
- error = xfs_itobp(mp, NULL, ip,
- &dip, &bp, bno,
- XFS_IMAP_BULKSTAT,
- XFS_BUF_LOCK);
+
+ error = xfs_inotobp(mp, NULL, ino, &dip,
+ &bp, &offset,
+ XFS_IMAP_BULKSTAT);
+
if (!error)
- clustidx = ip->i_boffset / mp->m_sb.sb_inodesize;
- kmem_zone_free(xfs_inode_zone, ip);
+ clustidx = offset / mp->m_sb.sb_inodesize;
if (XFS_TEST_ERROR(error != 0,
mp, XFS_ERRTAG_BULKSTAT_READ_CHUNK,
XFS_RANDOM_BULKSTAT_READ_CHUNK)) {
@@ -842,8 +836,7 @@ xfs_inumbers(
agino = 0;
continue;
}
- cur = xfs_btree_init_cursor(mp, NULL, agbp, agno,
- XFS_BTNUM_INO, (xfs_inode_t *)0, 0);
+ cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno);
error = xfs_inobt_lookup_ge(cur, agino, 0, 0, &tmp);
if (error) {
xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
@@ -887,7 +880,7 @@ xfs_inumbers(
bufidx = 0;
}
if (left) {
- error = xfs_inobt_increment(cur, 0, &tmp);
+ error = xfs_btree_increment(cur, 0, &tmp);
if (error) {
xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
cur = NULL;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 3608a0f0a5f6..4bf44aef644c 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -100,12 +100,11 @@ STATIC void xlog_ungrant_log_space(xlog_t *log,
/* local ticket functions */
-STATIC xlog_ticket_t *xlog_ticket_get(xlog_t *log,
+STATIC xlog_ticket_t *xlog_ticket_alloc(xlog_t *log,
int unit_bytes,
int count,
char clientid,
uint flags);
-STATIC void xlog_ticket_put(xlog_t *log, xlog_ticket_t *ticket);
#if defined(DEBUG)
STATIC void xlog_verify_dest_ptr(xlog_t *log, __psint_t ptr);
@@ -360,7 +359,7 @@ xfs_log_done(xfs_mount_t *mp,
*/
xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)");
xlog_ungrant_log_space(log, ticket);
- xlog_ticket_put(log, ticket);
+ xfs_log_ticket_put(ticket);
} else {
xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)");
xlog_regrant_reserve_log_space(log, ticket);
@@ -514,7 +513,7 @@ xfs_log_reserve(xfs_mount_t *mp,
retval = xlog_regrant_write_log_space(log, internal_ticket);
} else {
/* may sleep if need to allocate more tickets */
- internal_ticket = xlog_ticket_get(log, unit_bytes, cnt,
+ internal_ticket = xlog_ticket_alloc(log, unit_bytes, cnt,
client, flags);
if (!internal_ticket)
return XFS_ERROR(ENOMEM);
@@ -572,12 +571,12 @@ xfs_log_mount(
/*
* Initialize the AIL now we have a log.
*/
- spin_lock_init(&mp->m_ail_lock);
error = xfs_trans_ail_init(mp);
if (error) {
cmn_err(CE_WARN, "XFS: AIL initialisation failed: error %d", error);
goto error;
}
+ mp->m_log->l_ailp = mp->m_ail;
/*
* skip log recovery on a norecovery mount. pretend it all
@@ -749,7 +748,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
if (tic) {
xlog_trace_loggrant(log, tic, "unmount rec");
xlog_ungrant_log_space(log, tic);
- xlog_ticket_put(log, tic);
+ xfs_log_ticket_put(tic);
}
} else {
/*
@@ -906,7 +905,7 @@ xfs_log_move_tail(xfs_mount_t *mp,
int
xfs_log_need_covered(xfs_mount_t *mp)
{
- int needed = 0, gen;
+ int needed = 0;
xlog_t *log = mp->m_log;
if (!xfs_fs_writable(mp))
@@ -915,7 +914,7 @@ xfs_log_need_covered(xfs_mount_t *mp)
spin_lock(&log->l_icloglock);
if (((log->l_covered_state == XLOG_STATE_COVER_NEED) ||
(log->l_covered_state == XLOG_STATE_COVER_NEED2))
- && !xfs_trans_first_ail(mp, &gen)
+ && !xfs_trans_ail_tail(log->l_ailp)
&& xlog_iclogs_empty(log)) {
if (log->l_covered_state == XLOG_STATE_COVER_NEED)
log->l_covered_state = XLOG_STATE_COVER_DONE;
@@ -952,7 +951,7 @@ xlog_assign_tail_lsn(xfs_mount_t *mp)
xfs_lsn_t tail_lsn;
xlog_t *log = mp->m_log;
- tail_lsn = xfs_trans_tail_ail(mp);
+ tail_lsn = xfs_trans_ail_tail(mp->m_ail);
spin_lock(&log->l_grant_lock);
if (tail_lsn != 0) {
log->l_tail_lsn = tail_lsn;
@@ -1446,7 +1445,7 @@ xlog_grant_push_ail(xfs_mount_t *mp,
*/
if (threshold_lsn &&
!XLOG_FORCED_SHUTDOWN(log))
- xfs_trans_push_ail(mp, threshold_lsn);
+ xfs_trans_ail_push(log->l_ailp, threshold_lsn);
} /* xlog_grant_push_ail */
@@ -3222,22 +3221,33 @@ xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog)
*/
/*
- * Free a used ticket.
+ * Free a used ticket when it's refcount falls to zero.
*/
-STATIC void
-xlog_ticket_put(xlog_t *log,
- xlog_ticket_t *ticket)
+void
+xfs_log_ticket_put(
+ xlog_ticket_t *ticket)
{
- sv_destroy(&ticket->t_wait);
- kmem_zone_free(xfs_log_ticket_zone, ticket);
-} /* xlog_ticket_put */
+ ASSERT(atomic_read(&ticket->t_ref) > 0);
+ if (atomic_dec_and_test(&ticket->t_ref)) {
+ sv_destroy(&ticket->t_wait);
+ kmem_zone_free(xfs_log_ticket_zone, ticket);
+ }
+}
+xlog_ticket_t *
+xfs_log_ticket_get(
+ xlog_ticket_t *ticket)
+{
+ ASSERT(atomic_read(&ticket->t_ref) > 0);
+ atomic_inc(&ticket->t_ref);
+ return ticket;
+}
/*
* Allocate and initialise a new log ticket.
*/
STATIC xlog_ticket_t *
-xlog_ticket_get(xlog_t *log,
+xlog_ticket_alloc(xlog_t *log,
int unit_bytes,
int cnt,
char client,
@@ -3308,6 +3318,7 @@ xlog_ticket_get(xlog_t *log,
unit_bytes += 2*BBSIZE;
}
+ atomic_set(&tic->t_ref, 1);
tic->t_unit_res = unit_bytes;
tic->t_curr_res = unit_bytes;
tic->t_cnt = cnt;
@@ -3323,7 +3334,7 @@ xlog_ticket_get(xlog_t *log,
xlog_tic_reset_res(tic);
return tic;
-} /* xlog_ticket_get */
+}
/******************************************************************************
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index d47b91f10822..8a3e84e900a3 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -134,6 +134,7 @@ typedef struct xfs_log_callback {
#ifdef __KERNEL__
/* Log manager interfaces */
struct xfs_mount;
+struct xlog_ticket;
xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
xfs_log_ticket_t ticket,
void **iclog,
@@ -177,6 +178,9 @@ int xfs_log_need_covered(struct xfs_mount *mp);
void xlog_iodone(struct xfs_buf *);
+struct xlog_ticket * xfs_log_ticket_get(struct xlog_ticket *ticket);
+void xfs_log_ticket_put(struct xlog_ticket *ticket);
+
#endif
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index e7d8f84443fa..b39a1980e82d 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -245,6 +245,7 @@ typedef struct xlog_ticket {
struct xlog_ticket *t_next; /* :4|8 */
struct xlog_ticket *t_prev; /* :4|8 */
xlog_tid_t t_tid; /* transaction identifier : 4 */
+ atomic_t t_ref; /* ticket reference count : 4 */
int t_curr_res; /* current reservation in bytes : 4 */
int t_unit_res; /* unit reservation in bytes : 4 */
char t_ocnt; /* original count : 1 */
@@ -404,6 +405,7 @@ typedef struct xlog_in_core {
typedef struct log {
/* The following fields don't need locking */
struct xfs_mount *l_mp; /* mount point */
+ struct xfs_ail *l_ailp; /* AIL log is working with */
struct xfs_buf *l_xbuf; /* extra buffer for log
* wrapping */
struct xfs_buftarg *l_targ; /* buftarg of log */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 70e3ba32e6be..b411d4947318 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -54,10 +54,8 @@ STATIC void xlog_recover_insert_item_backq(xlog_recover_item_t **q,
xlog_recover_item_t *item);
#if defined(DEBUG)
STATIC void xlog_recover_check_summary(xlog_t *);
-STATIC void xlog_recover_check_ail(xfs_mount_t *, xfs_log_item_t *, int);
#else
#define xlog_recover_check_summary(log)
-#define xlog_recover_check_ail(mp, lip, gen)
#endif
@@ -2458,8 +2456,8 @@ xlog_recover_do_inode_trans(
break;
case XFS_ILOG_DBROOT:
- xfs_bmbt_to_bmdr((xfs_bmbt_block_t *)src, len,
- &(dip->di_u.di_bmbt),
+ xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len,
+ &dip->di_u.di_bmbt,
XFS_DFORK_DSIZE(dip, mp));
break;
@@ -2496,8 +2494,8 @@ xlog_recover_do_inode_trans(
case XFS_ILOG_ABROOT:
dest = XFS_DFORK_APTR(dip);
- xfs_bmbt_to_bmdr((xfs_bmbt_block_t *)src, len,
- (xfs_bmdr_block_t*)dest,
+ xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src,
+ len, (xfs_bmdr_block_t*)dest,
XFS_DFORK_ASIZE(dip, mp));
break;
@@ -2689,11 +2687,11 @@ xlog_recover_do_efi_trans(
efip->efi_next_extent = efi_formatp->efi_nextents;
efip->efi_flags |= XFS_EFI_COMMITTED;
- spin_lock(&mp->m_ail_lock);
+ spin_lock(&log->l_ailp->xa_lock);
/*
- * xfs_trans_update_ail() drops the AIL lock.
+ * xfs_trans_ail_update() drops the AIL lock.
*/
- xfs_trans_update_ail(mp, (xfs_log_item_t *)efip, lsn);
+ xfs_trans_ail_update(log->l_ailp, (xfs_log_item_t *)efip, lsn);
return 0;
}
@@ -2712,12 +2710,12 @@ xlog_recover_do_efd_trans(
xlog_recover_item_t *item,
int pass)
{
- xfs_mount_t *mp;
xfs_efd_log_format_t *efd_formatp;
xfs_efi_log_item_t *efip = NULL;
xfs_log_item_t *lip;
- int gen;
__uint64_t efi_id;
+ struct xfs_ail_cursor cur;
+ struct xfs_ail *ailp = log->l_ailp;
if (pass == XLOG_RECOVER_PASS1) {
return;
@@ -2734,25 +2732,26 @@ xlog_recover_do_efd_trans(
* Search for the efi with the id in the efd format structure
* in the AIL.
*/
- mp = log->l_mp;
- spin_lock(&mp->m_ail_lock);
- lip = xfs_trans_first_ail(mp, &gen);
+ spin_lock(&ailp->xa_lock);
+ lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
while (lip != NULL) {
if (lip->li_type == XFS_LI_EFI) {
efip = (xfs_efi_log_item_t *)lip;
if (efip->efi_format.efi_id == efi_id) {
/*
- * xfs_trans_delete_ail() drops the
+ * xfs_trans_ail_delete() drops the
* AIL lock.
*/
- xfs_trans_delete_ail(mp, lip);
+ xfs_trans_ail_delete(ailp, lip);
xfs_efi_item_free(efip);
- return;
+ spin_lock(&ailp->xa_lock);
+ break;
}
}
- lip = xfs_trans_next_ail(mp, lip, &gen, NULL);
+ lip = xfs_trans_ail_cursor_next(ailp, &cur);
}
- spin_unlock(&mp->m_ail_lock);
+ xfs_trans_ail_cursor_done(ailp, &cur);
+ spin_unlock(&ailp->xa_lock);
}
/*
@@ -3036,33 +3035,6 @@ abort_error:
}
/*
- * Verify that once we've encountered something other than an EFI
- * in the AIL that there are no more EFIs in the AIL.
- */
-#if defined(DEBUG)
-STATIC void
-xlog_recover_check_ail(
- xfs_mount_t *mp,
- xfs_log_item_t *lip,
- int gen)
-{
- int orig_gen = gen;
-
- do {
- ASSERT(lip->li_type != XFS_LI_EFI);
- lip = xfs_trans_next_ail(mp, lip, &gen, NULL);
- /*
- * The check will be bogus if we restart from the
- * beginning of the AIL, so ASSERT that we don't.
- * We never should since we're holding the AIL lock
- * the entire time.
- */
- ASSERT(gen == orig_gen);
- } while (lip != NULL);
-}
-#endif /* DEBUG */
-
-/*
* When this is called, all of the EFIs which did not have
* corresponding EFDs should be in the AIL. What we do now
* is free the extents associated with each one.
@@ -3086,20 +3058,23 @@ xlog_recover_process_efis(
{
xfs_log_item_t *lip;
xfs_efi_log_item_t *efip;
- int gen;
- xfs_mount_t *mp;
int error = 0;
+ struct xfs_ail_cursor cur;
+ struct xfs_ail *ailp;
- mp = log->l_mp;
- spin_lock(&mp->m_ail_lock);
-
- lip = xfs_trans_first_ail(mp, &gen);
+ ailp = log->l_ailp;
+ spin_lock(&ailp->xa_lock);
+ lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
while (lip != NULL) {
/*
* We're done when we see something other than an EFI.
+ * There should be no EFIs left in the AIL now.
*/
if (lip->li_type != XFS_LI_EFI) {
- xlog_recover_check_ail(mp, lip, gen);
+#ifdef DEBUG
+ for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur))
+ ASSERT(lip->li_type != XFS_LI_EFI);
+#endif
break;
}
@@ -3108,18 +3083,20 @@ xlog_recover_process_efis(
*/
efip = (xfs_efi_log_item_t *)lip;
if (efip->efi_flags & XFS_EFI_RECOVERED) {
- lip = xfs_trans_next_ail(mp, lip, &gen, NULL);
+ lip = xfs_trans_ail_cursor_next(ailp, &cur);
continue;
}
- spin_unlock(&mp->m_ail_lock);
- error = xlog_recover_process_efi(mp, efip);
+ spin_unlock(&ailp->xa_lock);
+ error = xlog_recover_process_efi(log->l_mp, efip);
+ spin_lock(&ailp->xa_lock);
if (error)
- return error;
- spin_lock(&mp->m_ail_lock);
- lip = xfs_trans_next_ail(mp, lip, &gen, NULL);
+ goto out;
+ lip = xfs_trans_ail_cursor_next(ailp, &cur);
}
- spin_unlock(&mp->m_ail_lock);
+out:
+ xfs_trans_ail_cursor_done(ailp, &cur);
+ spin_unlock(&ailp->xa_lock);
return error;
}
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 15f5dd22fbb2..177976dfea04 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -567,8 +567,6 @@ xfs_readsb(xfs_mount_t *mp, int flags)
STATIC void
xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp)
{
- int i;
-
mp->m_agfrotor = mp->m_agirotor = 0;
spin_lock_init(&mp->m_agirotor_lock);
mp->m_maxagi = mp->m_sb.sb_agcount;
@@ -582,7 +580,6 @@ xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp)
mp->m_blockmask = sbp->sb_blocksize - 1;
mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG;
mp->m_blockwmask = mp->m_blockwsize - 1;
- INIT_LIST_HEAD(&mp->m_del_inodes);
/*
* Setup for attributes, in case they get created.
@@ -605,24 +602,20 @@ xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp)
}
ASSERT(mp->m_attroffset < XFS_LITINO(mp));
- for (i = 0; i < 2; i++) {
- mp->m_alloc_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
- xfs_alloc, i == 0);
- mp->m_alloc_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
- xfs_alloc, i == 0);
- }
- for (i = 0; i < 2; i++) {
- mp->m_bmap_dmxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
- xfs_bmbt, i == 0);
- mp->m_bmap_dmnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
- xfs_bmbt, i == 0);
- }
- for (i = 0; i < 2; i++) {
- mp->m_inobt_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
- xfs_inobt, i == 0);
- mp->m_inobt_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
- xfs_inobt, i == 0);
- }
+ mp->m_alloc_mxr[0] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 1);
+ mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 0);
+ mp->m_alloc_mnr[0] = mp->m_alloc_mxr[0] / 2;
+ mp->m_alloc_mnr[1] = mp->m_alloc_mxr[1] / 2;
+
+ mp->m_inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1);
+ mp->m_inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0);
+ mp->m_inobt_mnr[0] = mp->m_inobt_mxr[0] / 2;
+ mp->m_inobt_mnr[1] = mp->m_inobt_mxr[1] / 2;
+
+ mp->m_bmap_dmxr[0] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 1);
+ mp->m_bmap_dmxr[1] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 0);
+ mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2;
+ mp->m_bmap_dmnr[1] = mp->m_bmap_dmxr[1] / 2;
mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
@@ -1241,7 +1234,7 @@ xfs_unmountfs(
* need to force the log first.
*/
xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);
- xfs_iflush_all(mp);
+ xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_ASYNC);
XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
@@ -1288,11 +1281,6 @@ xfs_unmountfs(
xfs_unmountfs_wait(mp); /* wait for async bufs */
xfs_log_unmount(mp); /* Done! No more fs ops. */
- /*
- * All inodes from this mount point should be freed.
- */
- ASSERT(mp->m_inodes == NULL);
-
if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0)
uuid_table_remove(&mp->m_sb.sb_uuid);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index f3c1024b1241..e3f618c84e47 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -18,6 +18,7 @@
#ifndef __XFS_MOUNT_H__
#define __XFS_MOUNT_H__
+#include "xfs_sync.h"
typedef struct xfs_trans_reservations {
uint tr_write; /* extent alloc trans */
@@ -44,14 +45,14 @@ typedef struct xfs_trans_reservations {
} xfs_trans_reservations_t;
#ifndef __KERNEL__
-/*
- * Moved here from xfs_ag.h to avoid reordering header files
- */
+
#define XFS_DADDR_TO_AGNO(mp,d) \
((xfs_agnumber_t)(XFS_BB_TO_FSBT(mp, d) / (mp)->m_sb.sb_agblocks))
#define XFS_DADDR_TO_AGBNO(mp,d) \
((xfs_agblock_t)(XFS_BB_TO_FSBT(mp, d) % (mp)->m_sb.sb_agblocks))
-#else
+
+#else /* __KERNEL__ */
+
struct cred;
struct log;
struct xfs_mount_args;
@@ -62,6 +63,7 @@ struct xfs_extdelta;
struct xfs_swapext;
struct xfs_mru_cache;
struct xfs_nameops;
+struct xfs_ail;
/*
* Prototypes and functions for the Data Migration subsystem.
@@ -223,18 +225,10 @@ extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int);
#define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0)
#endif
-typedef struct xfs_ail {
- struct list_head xa_ail;
- uint xa_gen;
- struct task_struct *xa_task;
- xfs_lsn_t xa_target;
-} xfs_ail_t;
-
typedef struct xfs_mount {
struct super_block *m_super;
xfs_tid_t m_tid; /* next unused tid for fs */
- spinlock_t m_ail_lock; /* fs AIL mutex */
- xfs_ail_t m_ail; /* fs active log item list */
+ struct xfs_ail *m_ail; /* fs active log item list */
xfs_sb_t m_sb; /* copy of fs superblock */
spinlock_t m_sb_lock; /* sb counter lock */
struct xfs_buf *m_sb_bp; /* buffer for superblock */
@@ -247,9 +241,6 @@ typedef struct xfs_mount {
xfs_agnumber_t m_agirotor; /* last ag dir inode alloced */
spinlock_t m_agirotor_lock;/* .. and lock protecting it */
xfs_agnumber_t m_maxagi; /* highest inode alloc group */
- struct xfs_inode *m_inodes; /* active inode list */
- struct list_head m_del_inodes; /* inodes to reclaim */
- mutex_t m_ilock; /* inode list mutex */
uint m_ireclaims; /* count of calls to reclaim*/
uint m_readio_log; /* min read size log bytes */
uint m_readio_blocks; /* min read size blocks */
@@ -267,7 +258,6 @@ typedef struct xfs_mount {
xfs_buftarg_t *m_ddev_targp; /* saves taking the address */
xfs_buftarg_t *m_logdev_targp;/* ptr to log device */
xfs_buftarg_t *m_rtdev_targp; /* ptr to rt device */
- __uint8_t m_dircook_elog; /* log d-cookie entry bits */
__uint8_t m_blkbit_log; /* blocklog + NBBY */
__uint8_t m_blkbb_log; /* blocklog - BBSHIFT */
__uint8_t m_agno_log; /* log #ag's */
@@ -276,12 +266,12 @@ typedef struct xfs_mount {
uint m_blockmask; /* sb_blocksize-1 */
uint m_blockwsize; /* sb_blocksize in words */
uint m_blockwmask; /* blockwsize-1 */
- uint m_alloc_mxr[2]; /* XFS_ALLOC_BLOCK_MAXRECS */
- uint m_alloc_mnr[2]; /* XFS_ALLOC_BLOCK_MINRECS */
- uint m_bmap_dmxr[2]; /* XFS_BMAP_BLOCK_DMAXRECS */
- uint m_bmap_dmnr[2]; /* XFS_BMAP_BLOCK_DMINRECS */
- uint m_inobt_mxr[2]; /* XFS_INOBT_BLOCK_MAXRECS */
- uint m_inobt_mnr[2]; /* XFS_INOBT_BLOCK_MINRECS */
+ uint m_alloc_mxr[2]; /* max alloc btree records */
+ uint m_alloc_mnr[2]; /* min alloc btree records */
+ uint m_bmap_dmxr[2]; /* max bmap btree records */
+ uint m_bmap_dmnr[2]; /* min bmap btree records */
+ uint m_inobt_mxr[2]; /* max inobt btree records */
+ uint m_inobt_mnr[2]; /* min inobt btree records */
uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */
uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */
uint m_in_maxlevels; /* XFS_IN_MAXLEVELS */
@@ -313,8 +303,7 @@ typedef struct xfs_mount {
int m_attr_magicpct;/* 37% of the blocksize */
int m_dir_magicpct; /* 37% of the dir blocksize */
__uint8_t m_mk_sharedro; /* mark shared ro on unmount */
- __uint8_t m_inode_quiesce;/* call quiesce on new inodes.
- field governed by m_ilock */
+ __uint8_t m_inode_quiesce;/* call quiesce on new inodes. */
__uint8_t m_sectbb_log; /* sectlog - BBSHIFT */
const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */
int m_dirblksize; /* directory block sz--bytes */
@@ -508,7 +497,6 @@ typedef struct xfs_mod_sb {
#define XFS_MOUNT_ILOCK(mp) mutex_lock(&((mp)->m_ilock))
#define XFS_MOUNT_IUNLOCK(mp) mutex_unlock(&((mp)->m_ilock))
-extern void xfs_mod_sb(xfs_trans_t *, __int64_t);
extern int xfs_log_sbcount(xfs_mount_t *, uint);
extern int xfs_mountfs(xfs_mount_t *mp);
extern void xfs_mountfs_check_barriers(xfs_mount_t *mp);
@@ -525,20 +513,20 @@ extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
extern int xfs_readsb(xfs_mount_t *, int);
extern void xfs_freesb(xfs_mount_t *);
extern int xfs_fs_writable(xfs_mount_t *);
-extern int xfs_syncsub(xfs_mount_t *, int, int *);
-extern int xfs_sync_inodes(xfs_mount_t *, int, int *);
-extern xfs_agnumber_t xfs_initialize_perag(xfs_mount_t *, xfs_agnumber_t);
-extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *);
-extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t);
extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t);
-extern int xfs_dmops_get(struct xfs_mount *, struct xfs_mount_args *);
+extern int xfs_dmops_get(struct xfs_mount *);
extern void xfs_dmops_put(struct xfs_mount *);
-extern int xfs_qmops_get(struct xfs_mount *, struct xfs_mount_args *);
+extern int xfs_qmops_get(struct xfs_mount *);
extern void xfs_qmops_put(struct xfs_mount *);
extern struct xfs_dmops xfs_dmcore_xfs;
#endif /* __KERNEL__ */
+extern void xfs_mod_sb(struct xfs_trans *, __int64_t);
+extern xfs_agnumber_t xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t);
+extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *);
+extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t);
+
#endif /* __XFS_MOUNT_H__ */
diff --git a/fs/xfs/xfs_qmops.c b/fs/xfs/xfs_qmops.c
index a294e58db8dd..27f80581520a 100644
--- a/fs/xfs/xfs_qmops.c
+++ b/fs/xfs/xfs_qmops.c
@@ -28,7 +28,6 @@
#include "xfs_mount.h"
#include "xfs_quota.h"
#include "xfs_error.h"
-#include "xfs_clnt.h"
STATIC struct xfs_dquot *
@@ -131,9 +130,9 @@ static struct xfs_qmops xfs_qmcore_stub = {
};
int
-xfs_qmops_get(struct xfs_mount *mp, struct xfs_mount_args *args)
+xfs_qmops_get(struct xfs_mount *mp)
{
- if (args->flags & (XFSMNT_UQUOTA | XFSMNT_PQUOTA | XFSMNT_GQUOTA)) {
+ if (XFS_IS_QUOTA_RUNNING(mp)) {
#ifdef CONFIG_XFS_QUOTA
mp->m_qm_ops = &xfs_qmcore_xfs;
#else
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 4e1c22a23be5..8570b826fedd 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -290,7 +290,7 @@ xfs_trans_dup(
ASSERT(tp->t_ticket != NULL);
ntp->t_flags = XFS_TRANS_PERM_LOG_RES | (tp->t_flags & XFS_TRANS_RESERVE);
- ntp->t_ticket = tp->t_ticket;
+ ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket);
ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used;
tp->t_blk_res = tp->t_blk_res_used;
ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used;
@@ -1260,6 +1260,13 @@ xfs_trans_roll(
trans = *tpp;
/*
+ * transaction commit worked ok so we can drop the extra ticket
+ * reference that we gained in xfs_trans_dup()
+ */
+ xfs_log_ticket_put(trans->t_ticket);
+
+
+ /*
* Reserve space in the log for th next transaction.
* This also pushes items in the "AIL", the list of logged items,
* out to disk if they are taking up space at the tail of the log
@@ -1383,11 +1390,12 @@ xfs_trans_chunk_committed(
xfs_log_item_desc_t *lidp;
xfs_log_item_t *lip;
xfs_lsn_t item_lsn;
- struct xfs_mount *mp;
int i;
lidp = licp->lic_descs;
for (i = 0; i < licp->lic_unused; i++, lidp++) {
+ struct xfs_ail *ailp;
+
if (xfs_lic_isfree(licp, i)) {
continue;
}
@@ -1424,19 +1432,19 @@ xfs_trans_chunk_committed(
* This would cause the earlier transaction to fail
* the test below.
*/
- mp = lip->li_mountp;
- spin_lock(&mp->m_ail_lock);
+ ailp = lip->li_ailp;
+ spin_lock(&ailp->xa_lock);
if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) {
/*
* This will set the item's lsn to item_lsn
* and update the position of the item in
* the AIL.
*
- * xfs_trans_update_ail() drops the AIL lock.
+ * xfs_trans_ail_update() drops the AIL lock.
*/
- xfs_trans_update_ail(mp, lip, item_lsn);
+ xfs_trans_ail_update(ailp, lip, item_lsn);
} else {
- spin_unlock(&mp->m_ail_lock);
+ spin_unlock(&ailp->xa_lock);
}
/*
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 74c80bd2b0ec..d6fe4a88d79f 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -18,6 +18,8 @@
#ifndef __XFS_TRANS_H__
#define __XFS_TRANS_H__
+struct xfs_log_item;
+
/*
* This is the structure written in the log at the head of
* every transaction. It identifies the type and id of the
@@ -98,76 +100,6 @@ typedef struct xfs_trans_header {
#define XFS_TRANS_TYPE_MAX 41
/* new transaction types need to be reflected in xfs_logprint(8) */
-
-#ifdef __KERNEL__
-struct xfs_buf;
-struct xfs_buftarg;
-struct xfs_efd_log_item;
-struct xfs_efi_log_item;
-struct xfs_inode;
-struct xfs_item_ops;
-struct xfs_log_iovec;
-struct xfs_log_item;
-struct xfs_log_item_desc;
-struct xfs_mount;
-struct xfs_trans;
-struct xfs_dquot_acct;
-
-typedef struct xfs_log_item {
- struct list_head li_ail; /* AIL pointers */
- xfs_lsn_t li_lsn; /* last on-disk lsn */
- struct xfs_log_item_desc *li_desc; /* ptr to current desc*/
- struct xfs_mount *li_mountp; /* ptr to fs mount */
- uint li_type; /* item type */
- uint li_flags; /* misc flags */
- struct xfs_log_item *li_bio_list; /* buffer item list */
- void (*li_cb)(struct xfs_buf *,
- struct xfs_log_item *);
- /* buffer item iodone */
- /* callback func */
- struct xfs_item_ops *li_ops; /* function list */
-} xfs_log_item_t;
-
-#define XFS_LI_IN_AIL 0x1
-#define XFS_LI_ABORTED 0x2
-
-typedef struct xfs_item_ops {
- uint (*iop_size)(xfs_log_item_t *);
- void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *);
- void (*iop_pin)(xfs_log_item_t *);
- void (*iop_unpin)(xfs_log_item_t *, int);
- void (*iop_unpin_remove)(xfs_log_item_t *, struct xfs_trans *);
- uint (*iop_trylock)(xfs_log_item_t *);
- void (*iop_unlock)(xfs_log_item_t *);
- xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t);
- void (*iop_push)(xfs_log_item_t *);
- void (*iop_pushbuf)(xfs_log_item_t *);
- void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);
-} xfs_item_ops_t;
-
-#define IOP_SIZE(ip) (*(ip)->li_ops->iop_size)(ip)
-#define IOP_FORMAT(ip,vp) (*(ip)->li_ops->iop_format)(ip, vp)
-#define IOP_PIN(ip) (*(ip)->li_ops->iop_pin)(ip)
-#define IOP_UNPIN(ip, flags) (*(ip)->li_ops->iop_unpin)(ip, flags)
-#define IOP_UNPIN_REMOVE(ip,tp) (*(ip)->li_ops->iop_unpin_remove)(ip, tp)
-#define IOP_TRYLOCK(ip) (*(ip)->li_ops->iop_trylock)(ip)
-#define IOP_UNLOCK(ip) (*(ip)->li_ops->iop_unlock)(ip)
-#define IOP_COMMITTED(ip, lsn) (*(ip)->li_ops->iop_committed)(ip, lsn)
-#define IOP_PUSH(ip) (*(ip)->li_ops->iop_push)(ip)
-#define IOP_PUSHBUF(ip) (*(ip)->li_ops->iop_pushbuf)(ip)
-#define IOP_COMMITTING(ip, lsn) (*(ip)->li_ops->iop_committing)(ip, lsn)
-
-/*
- * Return values for the IOP_TRYLOCK() routines.
- */
-#define XFS_ITEM_SUCCESS 0
-#define XFS_ITEM_PINNED 1
-#define XFS_ITEM_LOCKED 2
-#define XFS_ITEM_FLUSHING 3
-#define XFS_ITEM_PUSHBUF 4
-
-#endif /* __KERNEL__ */
-
/*
* This structure is used to track log items associated with
* a transaction. It points to the log item and keeps some
@@ -176,7 +108,7 @@ typedef struct xfs_item_ops {
* once we get to commit processing (see xfs_trans_commit()).
*/
typedef struct xfs_log_item_desc {
- xfs_log_item_t *lid_item;
+ struct xfs_log_item *lid_item;
ushort lid_size;
unsigned char lid_flags;
unsigned char lid_index;
@@ -276,94 +208,6 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
(xfs_caddr_t)(((xfs_log_item_chunk_t*)0)->lic_descs));
}
-#ifdef __KERNEL__
-/*
- * This structure is used to maintain a list of block ranges that have been
- * freed in the transaction. The ranges are listed in the perag[] busy list
- * between when they're freed and the transaction is committed to disk.
- */
-
-typedef struct xfs_log_busy_slot {
- xfs_agnumber_t lbc_ag;
- ushort lbc_idx; /* index in perag.busy[] */
-} xfs_log_busy_slot_t;
-
-#define XFS_LBC_NUM_SLOTS 31
-typedef struct xfs_log_busy_chunk {
- struct xfs_log_busy_chunk *lbc_next;
- uint lbc_free; /* free slots bitmask */
- ushort lbc_unused; /* first unused */
- xfs_log_busy_slot_t lbc_busy[XFS_LBC_NUM_SLOTS];
-} xfs_log_busy_chunk_t;
-
-#define XFS_LBC_MAX_SLOT (XFS_LBC_NUM_SLOTS - 1)
-#define XFS_LBC_FREEMASK ((1U << XFS_LBC_NUM_SLOTS) - 1)
-
-#define XFS_LBC_INIT(cp) ((cp)->lbc_free = XFS_LBC_FREEMASK)
-#define XFS_LBC_CLAIM(cp, slot) ((cp)->lbc_free &= ~(1 << (slot)))
-#define XFS_LBC_SLOT(cp, slot) (&((cp)->lbc_busy[(slot)]))
-#define XFS_LBC_VACANCY(cp) (((cp)->lbc_free) & XFS_LBC_FREEMASK)
-#define XFS_LBC_ISFREE(cp, slot) ((cp)->lbc_free & (1 << (slot)))
-
-/*
- * This is the type of function which can be given to xfs_trans_callback()
- * to be called upon the transaction's commit to disk.
- */
-typedef void (*xfs_trans_callback_t)(struct xfs_trans *, void *);
-
-/*
- * This is the structure maintained for every active transaction.
- */
-typedef struct xfs_trans {
- unsigned int t_magic; /* magic number */
- xfs_log_callback_t t_logcb; /* log callback struct */
- unsigned int t_type; /* transaction type */
- unsigned int t_log_res; /* amt of log space resvd */
- unsigned int t_log_count; /* count for perm log res */
- unsigned int t_blk_res; /* # of blocks resvd */
- unsigned int t_blk_res_used; /* # of resvd blocks used */
- unsigned int t_rtx_res; /* # of rt extents resvd */
- unsigned int t_rtx_res_used; /* # of resvd rt extents used */
- xfs_log_ticket_t t_ticket; /* log mgr ticket */
- xfs_lsn_t t_lsn; /* log seq num of start of
- * transaction. */
- xfs_lsn_t t_commit_lsn; /* log seq num of end of
- * transaction. */
- struct xfs_mount *t_mountp; /* ptr to fs mount struct */
- struct xfs_dquot_acct *t_dqinfo; /* acctg info for dquots */
- xfs_trans_callback_t t_callback; /* transaction callback */
- void *t_callarg; /* callback arg */
- unsigned int t_flags; /* misc flags */
- int64_t t_icount_delta; /* superblock icount change */
- int64_t t_ifree_delta; /* superblock ifree change */
- int64_t t_fdblocks_delta; /* superblock fdblocks chg */
- int64_t t_res_fdblocks_delta; /* on-disk only chg */
- int64_t t_frextents_delta;/* superblock freextents chg*/
- int64_t t_res_frextents_delta; /* on-disk only chg */
-#ifdef DEBUG
- int64_t t_ag_freeblks_delta; /* debugging counter */
- int64_t t_ag_flist_delta; /* debugging counter */
- int64_t t_ag_btree_delta; /* debugging counter */
-#endif
- int64_t t_dblocks_delta;/* superblock dblocks change */
- int64_t t_agcount_delta;/* superblock agcount change */
- int64_t t_imaxpct_delta;/* superblock imaxpct change */
- int64_t t_rextsize_delta;/* superblock rextsize chg */
- int64_t t_rbmblocks_delta;/* superblock rbmblocks chg */
- int64_t t_rblocks_delta;/* superblock rblocks change */
- int64_t t_rextents_delta;/* superblocks rextents chg */
- int64_t t_rextslog_delta;/* superblocks rextslog chg */
- unsigned int t_items_free; /* log item descs free */
- xfs_log_item_chunk_t t_items; /* first log item desc chunk */
- xfs_trans_header_t t_header; /* header for in-log trans */
- unsigned int t_busy_free; /* busy descs free */
- xfs_log_busy_chunk_t t_busy; /* busy/async free blocks */
- unsigned long t_pflags; /* saved process flags state */
-} xfs_trans_t;
-
-#endif /* __KERNEL__ */
-
-
#define XFS_TRANS_MAGIC 0x5452414E /* 'TRAN' */
/*
* Values for t_flags.
@@ -906,6 +750,157 @@ typedef struct xfs_trans {
#define XFS_DQUOT_REF 1
#ifdef __KERNEL__
+
+struct xfs_buf;
+struct xfs_buftarg;
+struct xfs_efd_log_item;
+struct xfs_efi_log_item;
+struct xfs_inode;
+struct xfs_item_ops;
+struct xfs_log_iovec;
+struct xfs_log_item_desc;
+struct xfs_mount;
+struct xfs_trans;
+struct xfs_dquot_acct;
+
+typedef struct xfs_log_item {
+ struct list_head li_ail; /* AIL pointers */
+ xfs_lsn_t li_lsn; /* last on-disk lsn */
+ struct xfs_log_item_desc *li_desc; /* ptr to current desc*/
+ struct xfs_mount *li_mountp; /* ptr to fs mount */
+ struct xfs_ail *li_ailp; /* ptr to AIL */
+ uint li_type; /* item type */
+ uint li_flags; /* misc flags */
+ struct xfs_log_item *li_bio_list; /* buffer item list */
+ void (*li_cb)(struct xfs_buf *,
+ struct xfs_log_item *);
+ /* buffer item iodone */
+ /* callback func */
+ struct xfs_item_ops *li_ops; /* function list */
+} xfs_log_item_t;
+
+#define XFS_LI_IN_AIL 0x1
+#define XFS_LI_ABORTED 0x2
+
+typedef struct xfs_item_ops {
+ uint (*iop_size)(xfs_log_item_t *);
+ void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *);
+ void (*iop_pin)(xfs_log_item_t *);
+ void (*iop_unpin)(xfs_log_item_t *, int);
+ void (*iop_unpin_remove)(xfs_log_item_t *, struct xfs_trans *);
+ uint (*iop_trylock)(xfs_log_item_t *);
+ void (*iop_unlock)(xfs_log_item_t *);
+ xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t);
+ void (*iop_push)(xfs_log_item_t *);
+ void (*iop_pushbuf)(xfs_log_item_t *);
+ void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);
+} xfs_item_ops_t;
+
+#define IOP_SIZE(ip) (*(ip)->li_ops->iop_size)(ip)
+#define IOP_FORMAT(ip,vp) (*(ip)->li_ops->iop_format)(ip, vp)
+#define IOP_PIN(ip) (*(ip)->li_ops->iop_pin)(ip)
+#define IOP_UNPIN(ip, flags) (*(ip)->li_ops->iop_unpin)(ip, flags)
+#define IOP_UNPIN_REMOVE(ip,tp) (*(ip)->li_ops->iop_unpin_remove)(ip, tp)
+#define IOP_TRYLOCK(ip) (*(ip)->li_ops->iop_trylock)(ip)
+#define IOP_UNLOCK(ip) (*(ip)->li_ops->iop_unlock)(ip)
+#define IOP_COMMITTED(ip, lsn) (*(ip)->li_ops->iop_committed)(ip, lsn)
+#define IOP_PUSH(ip) (*(ip)->li_ops->iop_push)(ip)
+#define IOP_PUSHBUF(ip) (*(ip)->li_ops->iop_pushbuf)(ip)
+#define IOP_COMMITTING(ip, lsn) (*(ip)->li_ops->iop_committing)(ip, lsn)
+
+/*
+ * Return values for the IOP_TRYLOCK() routines.
+ */
+#define XFS_ITEM_SUCCESS 0
+#define XFS_ITEM_PINNED 1
+#define XFS_ITEM_LOCKED 2
+#define XFS_ITEM_FLUSHING 3
+#define XFS_ITEM_PUSHBUF 4
+
+/*
+ * This structure is used to maintain a list of block ranges that have been
+ * freed in the transaction. The ranges are listed in the perag[] busy list
+ * between when they're freed and the transaction is committed to disk.
+ */
+
+typedef struct xfs_log_busy_slot {
+ xfs_agnumber_t lbc_ag;
+ ushort lbc_idx; /* index in perag.busy[] */
+} xfs_log_busy_slot_t;
+
+#define XFS_LBC_NUM_SLOTS 31
+typedef struct xfs_log_busy_chunk {
+ struct xfs_log_busy_chunk *lbc_next;
+ uint lbc_free; /* free slots bitmask */
+ ushort lbc_unused; /* first unused */
+ xfs_log_busy_slot_t lbc_busy[XFS_LBC_NUM_SLOTS];
+} xfs_log_busy_chunk_t;
+
+#define XFS_LBC_MAX_SLOT (XFS_LBC_NUM_SLOTS - 1)
+#define XFS_LBC_FREEMASK ((1U << XFS_LBC_NUM_SLOTS) - 1)
+
+#define XFS_LBC_INIT(cp) ((cp)->lbc_free = XFS_LBC_FREEMASK)
+#define XFS_LBC_CLAIM(cp, slot) ((cp)->lbc_free &= ~(1 << (slot)))
+#define XFS_LBC_SLOT(cp, slot) (&((cp)->lbc_busy[(slot)]))
+#define XFS_LBC_VACANCY(cp) (((cp)->lbc_free) & XFS_LBC_FREEMASK)
+#define XFS_LBC_ISFREE(cp, slot) ((cp)->lbc_free & (1 << (slot)))
+
+/*
+ * This is the type of function which can be given to xfs_trans_callback()
+ * to be called upon the transaction's commit to disk.
+ */
+typedef void (*xfs_trans_callback_t)(struct xfs_trans *, void *);
+
+/*
+ * This is the structure maintained for every active transaction.
+ */
+typedef struct xfs_trans {
+ unsigned int t_magic; /* magic number */
+ xfs_log_callback_t t_logcb; /* log callback struct */
+ unsigned int t_type; /* transaction type */
+ unsigned int t_log_res; /* amt of log space resvd */
+ unsigned int t_log_count; /* count for perm log res */
+ unsigned int t_blk_res; /* # of blocks resvd */
+ unsigned int t_blk_res_used; /* # of resvd blocks used */
+ unsigned int t_rtx_res; /* # of rt extents resvd */
+ unsigned int t_rtx_res_used; /* # of resvd rt extents used */
+ xfs_log_ticket_t t_ticket; /* log mgr ticket */
+ xfs_lsn_t t_lsn; /* log seq num of start of
+ * transaction. */
+ xfs_lsn_t t_commit_lsn; /* log seq num of end of
+ * transaction. */
+ struct xfs_mount *t_mountp; /* ptr to fs mount struct */
+ struct xfs_dquot_acct *t_dqinfo; /* acctg info for dquots */
+ xfs_trans_callback_t t_callback; /* transaction callback */
+ void *t_callarg; /* callback arg */
+ unsigned int t_flags; /* misc flags */
+ int64_t t_icount_delta; /* superblock icount change */
+ int64_t t_ifree_delta; /* superblock ifree change */
+ int64_t t_fdblocks_delta; /* superblock fdblocks chg */
+ int64_t t_res_fdblocks_delta; /* on-disk only chg */
+ int64_t t_frextents_delta;/* superblock freextents chg*/
+ int64_t t_res_frextents_delta; /* on-disk only chg */
+#ifdef DEBUG
+ int64_t t_ag_freeblks_delta; /* debugging counter */
+ int64_t t_ag_flist_delta; /* debugging counter */
+ int64_t t_ag_btree_delta; /* debugging counter */
+#endif
+ int64_t t_dblocks_delta;/* superblock dblocks change */
+ int64_t t_agcount_delta;/* superblock agcount change */
+ int64_t t_imaxpct_delta;/* superblock imaxpct change */
+ int64_t t_rextsize_delta;/* superblock rextsize chg */
+ int64_t t_rbmblocks_delta;/* superblock rbmblocks chg */
+ int64_t t_rblocks_delta;/* superblock rblocks change */
+ int64_t t_rextents_delta;/* superblocks rextents chg */
+ int64_t t_rextslog_delta;/* superblocks rextslog chg */
+ unsigned int t_items_free; /* log item descs free */
+ xfs_log_item_chunk_t t_items; /* first log item desc chunk */
+ xfs_trans_header_t t_header; /* header for in-log trans */
+ unsigned int t_busy_free; /* busy descs free */
+ xfs_log_busy_chunk_t t_busy; /* busy/async free blocks */
+ unsigned long t_pflags; /* saved process flags state */
+} xfs_trans_t;
+
/*
* XFS transaction mechanism exported interfaces that are
* actually macros.
@@ -928,7 +923,6 @@ typedef struct xfs_trans {
/*
* XFS transaction mechanism exported interfaces.
*/
-void xfs_trans_init(struct xfs_mount *);
xfs_trans_t *xfs_trans_alloc(struct xfs_mount *, uint);
xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint);
xfs_trans_t *xfs_trans_dup(xfs_trans_t *);
@@ -975,13 +969,8 @@ int _xfs_trans_commit(xfs_trans_t *,
int *);
#define xfs_trans_commit(tp, flags) _xfs_trans_commit(tp, flags, NULL)
void xfs_trans_cancel(xfs_trans_t *, int);
-int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *);
int xfs_trans_ail_init(struct xfs_mount *);
void xfs_trans_ail_destroy(struct xfs_mount *);
-void xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t);
-xfs_lsn_t xfs_trans_tail_ail(struct xfs_mount *);
-void xfs_trans_unlocked_item(struct xfs_mount *,
- xfs_log_item_t *);
xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp,
xfs_agnumber_t ag,
xfs_extlen_t idx);
@@ -990,4 +979,7 @@ extern kmem_zone_t *xfs_trans_zone;
#endif /* __KERNEL__ */
+void xfs_trans_init(struct xfs_mount *);
+int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *);
+
#endif /* __XFS_TRANS_H__ */
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 1f77c00af566..2d47f10f8bed 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2008 Dave Chinner
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
@@ -28,13 +29,13 @@
#include "xfs_trans_priv.h"
#include "xfs_error.h"
-STATIC void xfs_ail_insert(xfs_ail_t *, xfs_log_item_t *);
-STATIC xfs_log_item_t * xfs_ail_delete(xfs_ail_t *, xfs_log_item_t *);
-STATIC xfs_log_item_t * xfs_ail_min(xfs_ail_t *);
-STATIC xfs_log_item_t * xfs_ail_next(xfs_ail_t *, xfs_log_item_t *);
+STATIC void xfs_ail_insert(struct xfs_ail *, xfs_log_item_t *);
+STATIC xfs_log_item_t * xfs_ail_delete(struct xfs_ail *, xfs_log_item_t *);
+STATIC xfs_log_item_t * xfs_ail_min(struct xfs_ail *);
+STATIC xfs_log_item_t * xfs_ail_next(struct xfs_ail *, xfs_log_item_t *);
#ifdef DEBUG
-STATIC void xfs_ail_check(xfs_ail_t *, xfs_log_item_t *);
+STATIC void xfs_ail_check(struct xfs_ail *, xfs_log_item_t *);
#else
#define xfs_ail_check(a,l)
#endif /* DEBUG */
@@ -50,20 +51,20 @@ STATIC void xfs_ail_check(xfs_ail_t *, xfs_log_item_t *);
* lsn of the last item in the AIL.
*/
xfs_lsn_t
-xfs_trans_tail_ail(
- xfs_mount_t *mp)
+xfs_trans_ail_tail(
+ struct xfs_ail *ailp)
{
xfs_lsn_t lsn;
xfs_log_item_t *lip;
- spin_lock(&mp->m_ail_lock);
- lip = xfs_ail_min(&mp->m_ail);
+ spin_lock(&ailp->xa_lock);
+ lip = xfs_ail_min(ailp);
if (lip == NULL) {
lsn = (xfs_lsn_t)0;
} else {
lsn = lip->li_lsn;
}
- spin_unlock(&mp->m_ail_lock);
+ spin_unlock(&ailp->xa_lock);
return lsn;
}
@@ -85,16 +86,125 @@ xfs_trans_tail_ail(
* any of the objects, so the lock is not needed.
*/
void
-xfs_trans_push_ail(
- xfs_mount_t *mp,
- xfs_lsn_t threshold_lsn)
+xfs_trans_ail_push(
+ struct xfs_ail *ailp,
+ xfs_lsn_t threshold_lsn)
{
- xfs_log_item_t *lip;
+ xfs_log_item_t *lip;
+
+ lip = xfs_ail_min(ailp);
+ if (lip && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) {
+ if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0)
+ xfsaild_wakeup(ailp, threshold_lsn);
+ }
+}
+
+/*
+ * AIL traversal cursor initialisation.
+ *
+ * The cursor keeps track of where our current traversal is up
+ * to by tracking the next ƣtem in the list for us. However, for
+ * this to be safe, removing an object from the AIL needs to invalidate
+ * any cursor that points to it. hence the traversal cursor needs to
+ * be linked to the struct xfs_ail so that deletion can search all the
+ * active cursors for invalidation.
+ *
+ * We don't link the push cursor because it is embedded in the struct
+ * xfs_ail and hence easily findable.
+ */
+STATIC void
+xfs_trans_ail_cursor_init(
+ struct xfs_ail *ailp,
+ struct xfs_ail_cursor *cur)
+{
+ cur->item = NULL;
+ if (cur == &ailp->xa_cursors)
+ return;
+
+ cur->next = ailp->xa_cursors.next;
+ ailp->xa_cursors.next = cur;
+}
+
+/*
+ * Set the cursor to the next item, because when we look
+ * up the cursor the current item may have been freed.
+ */
+STATIC void
+xfs_trans_ail_cursor_set(
+ struct xfs_ail *ailp,
+ struct xfs_ail_cursor *cur,
+ struct xfs_log_item *lip)
+{
+ if (lip)
+ cur->item = xfs_ail_next(ailp, lip);
+}
+
+/*
+ * Get the next item in the traversal and advance the cursor.
+ * If the cursor was invalidated (inidicated by a lip of 1),
+ * restart the traversal.
+ */
+struct xfs_log_item *
+xfs_trans_ail_cursor_next(
+ struct xfs_ail *ailp,
+ struct xfs_ail_cursor *cur)
+{
+ struct xfs_log_item *lip = cur->item;
+
+ if ((__psint_t)lip & 1)
+ lip = xfs_ail_min(ailp);
+ xfs_trans_ail_cursor_set(ailp, cur, lip);
+ return lip;
+}
+
+/*
+ * Now that the traversal is complete, we need to remove the cursor
+ * from the list of traversing cursors. Avoid removing the embedded
+ * push cursor, but use the fact it is alway present to make the
+ * list deletion simple.
+ */
+void
+xfs_trans_ail_cursor_done(
+ struct xfs_ail *ailp,
+ struct xfs_ail_cursor *done)
+{
+ struct xfs_ail_cursor *prev = NULL;
+ struct xfs_ail_cursor *cur;
+
+ done->item = NULL;
+ if (done == &ailp->xa_cursors)
+ return;
+ prev = &ailp->xa_cursors;
+ for (cur = prev->next; cur; prev = cur, cur = prev->next) {
+ if (cur == done) {
+ prev->next = cur->next;
+ break;
+ }
+ }
+ ASSERT(cur);
+}
+
+/*
+ * Invalidate any cursor that is pointing to this item. This is
+ * called when an item is removed from the AIL. Any cursor pointing
+ * to this object is now invalid and the traversal needs to be
+ * terminated so it doesn't reference a freed object. We set the
+ * cursor item to a value of 1 so we can distinguish between an
+ * invalidation and the end of the list when getting the next item
+ * from the cursor.
+ */
+STATIC void
+xfs_trans_ail_cursor_clear(
+ struct xfs_ail *ailp,
+ struct xfs_log_item *lip)
+{
+ struct xfs_ail_cursor *cur;
- lip = xfs_ail_min(&mp->m_ail);
- if (lip && !XFS_FORCED_SHUTDOWN(mp)) {
- if (XFS_LSN_CMP(threshold_lsn, mp->m_ail.xa_target) > 0)
- xfsaild_wakeup(mp, threshold_lsn);
+ /* need to search all cursors */
+ for (cur = &ailp->xa_cursors; cur; cur = cur->next) {
+ if (cur->item == lip)
+ cur->item = (struct xfs_log_item *)
+ ((__psint_t)cur->item | 1);
}
}
@@ -103,25 +213,27 @@ xfs_trans_push_ail(
* Return the current tree generation number for use
* in calls to xfs_trans_next_ail().
*/
-STATIC xfs_log_item_t *
-xfs_trans_first_push_ail(
- xfs_mount_t *mp,
- int *gen,
- xfs_lsn_t lsn)
+xfs_log_item_t *
+xfs_trans_ail_cursor_first(
+ struct xfs_ail *ailp,
+ struct xfs_ail_cursor *cur,
+ xfs_lsn_t lsn)
{
- xfs_log_item_t *lip;
+ xfs_log_item_t *lip;
- lip = xfs_ail_min(&mp->m_ail);
- *gen = (int)mp->m_ail.xa_gen;
+ xfs_trans_ail_cursor_init(ailp, cur);
+ lip = xfs_ail_min(ailp);
if (lsn == 0)
- return lip;
+ goto out;
- list_for_each_entry(lip, &mp->m_ail.xa_ail, li_ail) {
+ list_for_each_entry(lip, &ailp->xa_ail, li_ail) {
if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0)
- return lip;
+ goto out;
}
-
- return NULL;
+ lip = NULL;
+out:
+ xfs_trans_ail_cursor_set(ailp, cur, lip);
+ return lip;
}
/*
@@ -129,29 +241,29 @@ xfs_trans_first_push_ail(
*/
long
xfsaild_push(
- xfs_mount_t *mp,
+ struct xfs_ail *ailp,
xfs_lsn_t *last_lsn)
{
long tout = 1000; /* milliseconds */
xfs_lsn_t last_pushed_lsn = *last_lsn;
- xfs_lsn_t target = mp->m_ail.xa_target;
+ xfs_lsn_t target = ailp->xa_target;
xfs_lsn_t lsn;
xfs_log_item_t *lip;
- int gen;
- int restarts;
int flush_log, count, stuck;
+ xfs_mount_t *mp = ailp->xa_mount;
+ struct xfs_ail_cursor *cur = &ailp->xa_cursors;
-#define XFS_TRANS_PUSH_AIL_RESTARTS 10
-
- spin_lock(&mp->m_ail_lock);
- lip = xfs_trans_first_push_ail(mp, &gen, *last_lsn);
+ spin_lock(&ailp->xa_lock);
+ xfs_trans_ail_cursor_init(ailp, cur);
+ lip = xfs_trans_ail_cursor_first(ailp, cur, *last_lsn);
if (!lip || XFS_FORCED_SHUTDOWN(mp)) {
/*
* AIL is empty or our push has reached the end.
*/
- spin_unlock(&mp->m_ail_lock);
+ xfs_trans_ail_cursor_done(ailp, cur);
+ spin_unlock(&ailp->xa_lock);
last_pushed_lsn = 0;
- goto out;
+ return tout;
}
XFS_STATS_INC(xs_push_ail);
@@ -169,7 +281,7 @@ xfsaild_push(
*/
tout = 10;
lsn = lip->li_lsn;
- flush_log = stuck = count = restarts = 0;
+ flush_log = stuck = count = 0;
while ((XFS_LSN_CMP(lip->li_lsn, target) < 0)) {
int lock_result;
/*
@@ -184,7 +296,7 @@ xfsaild_push(
* skip to the next item in the list.
*/
lock_result = IOP_TRYLOCK(lip);
- spin_unlock(&mp->m_ail_lock);
+ spin_unlock(&ailp->xa_lock);
switch (lock_result) {
case XFS_ITEM_SUCCESS:
XFS_STATS_INC(xs_push_ail_success);
@@ -221,7 +333,7 @@ xfsaild_push(
break;
}
- spin_lock(&mp->m_ail_lock);
+ spin_lock(&ailp->xa_lock);
/* should we bother continuing? */
if (XFS_FORCED_SHUTDOWN(mp))
break;
@@ -244,14 +356,13 @@ xfsaild_push(
if (stuck > 100)
break;
- lip = xfs_trans_next_ail(mp, lip, &gen, &restarts);
+ lip = xfs_trans_ail_cursor_next(ailp, cur);
if (lip == NULL)
break;
- if (restarts > XFS_TRANS_PUSH_AIL_RESTARTS)
- break;
lsn = lip->li_lsn;
}
- spin_unlock(&mp->m_ail_lock);
+ xfs_trans_ail_cursor_done(ailp, cur);
+ spin_unlock(&ailp->xa_lock);
if (flush_log) {
/*
@@ -274,8 +385,7 @@ xfsaild_push(
*/
tout += 20;
last_pushed_lsn = 0;
- } else if ((restarts > XFS_TRANS_PUSH_AIL_RESTARTS) ||
- ((stuck * 100) / count > 90)) {
+ } else if ((stuck * 100) / count > 90) {
/*
* Either there is a lot of contention on the AIL or we
* are stuck due to operations in progress. "Stuck" in this
@@ -287,7 +397,6 @@ xfsaild_push(
*/
tout += 10;
}
-out:
*last_lsn = last_pushed_lsn;
return tout;
} /* xfsaild_push */
@@ -303,7 +412,7 @@ out:
*/
void
xfs_trans_unlocked_item(
- xfs_mount_t *mp,
+ struct xfs_ail *ailp,
xfs_log_item_t *lip)
{
xfs_log_item_t *min_lip;
@@ -315,7 +424,7 @@ xfs_trans_unlocked_item(
* over some potentially valid data.
*/
if (!(lip->li_flags & XFS_LI_IN_AIL) ||
- XFS_FORCED_SHUTDOWN(mp)) {
+ XFS_FORCED_SHUTDOWN(ailp->xa_mount)) {
return;
}
@@ -331,10 +440,10 @@ xfs_trans_unlocked_item(
* the call to xfs_log_move_tail() doesn't do anything if there's
* not enough free space to wake people up so we're safe calling it.
*/
- min_lip = xfs_ail_min(&mp->m_ail);
+ min_lip = xfs_ail_min(ailp);
if (min_lip == lip)
- xfs_log_move_tail(mp, 1);
+ xfs_log_move_tail(ailp->xa_mount, 1);
} /* xfs_trans_unlocked_item */
@@ -347,41 +456,37 @@ xfs_trans_unlocked_item(
* we move in the AIL is the minimum one, update the tail lsn in the
* log manager.
*
- * Increment the AIL's generation count to indicate that the tree
- * has changed.
- *
* This function must be called with the AIL lock held. The lock
* is dropped before returning.
*/
void
-xfs_trans_update_ail(
- xfs_mount_t *mp,
+xfs_trans_ail_update(
+ struct xfs_ail *ailp,
xfs_log_item_t *lip,
- xfs_lsn_t lsn) __releases(mp->m_ail_lock)
+ xfs_lsn_t lsn) __releases(ailp->xa_lock)
{
- xfs_log_item_t *dlip=NULL;
+ xfs_log_item_t *dlip = NULL;
xfs_log_item_t *mlip; /* ptr to minimum lip */
- mlip = xfs_ail_min(&mp->m_ail);
+ mlip = xfs_ail_min(ailp);
if (lip->li_flags & XFS_LI_IN_AIL) {
- dlip = xfs_ail_delete(&mp->m_ail, lip);
+ dlip = xfs_ail_delete(ailp, lip);
ASSERT(dlip == lip);
+ xfs_trans_ail_cursor_clear(ailp, dlip);
} else {
lip->li_flags |= XFS_LI_IN_AIL;
}
lip->li_lsn = lsn;
-
- xfs_ail_insert(&mp->m_ail, lip);
- mp->m_ail.xa_gen++;
+ xfs_ail_insert(ailp, lip);
if (mlip == dlip) {
- mlip = xfs_ail_min(&mp->m_ail);
- spin_unlock(&mp->m_ail_lock);
- xfs_log_move_tail(mp, mlip->li_lsn);
+ mlip = xfs_ail_min(ailp);
+ spin_unlock(&ailp->xa_lock);
+ xfs_log_move_tail(ailp->xa_mount, mlip->li_lsn);
} else {
- spin_unlock(&mp->m_ail_lock);
+ spin_unlock(&ailp->xa_lock);
}
@@ -403,29 +508,30 @@ xfs_trans_update_ail(
* is dropped before returning.
*/
void
-xfs_trans_delete_ail(
- xfs_mount_t *mp,
- xfs_log_item_t *lip) __releases(mp->m_ail_lock)
+xfs_trans_ail_delete(
+ struct xfs_ail *ailp,
+ xfs_log_item_t *lip) __releases(ailp->xa_lock)
{
xfs_log_item_t *dlip;
xfs_log_item_t *mlip;
if (lip->li_flags & XFS_LI_IN_AIL) {
- mlip = xfs_ail_min(&mp->m_ail);
- dlip = xfs_ail_delete(&mp->m_ail, lip);
+ mlip = xfs_ail_min(ailp);
+ dlip = xfs_ail_delete(ailp, lip);
ASSERT(dlip == lip);
+ xfs_trans_ail_cursor_clear(ailp, dlip);
lip->li_flags &= ~XFS_LI_IN_AIL;
lip->li_lsn = 0;
- mp->m_ail.xa_gen++;
if (mlip == dlip) {
- mlip = xfs_ail_min(&mp->m_ail);
- spin_unlock(&mp->m_ail_lock);
- xfs_log_move_tail(mp, (mlip ? mlip->li_lsn : 0));
+ mlip = xfs_ail_min(ailp);
+ spin_unlock(&ailp->xa_lock);
+ xfs_log_move_tail(ailp->xa_mount,
+ (mlip ? mlip->li_lsn : 0));
} else {
- spin_unlock(&mp->m_ail_lock);
+ spin_unlock(&ailp->xa_lock);
}
}
else {
@@ -433,13 +539,13 @@ xfs_trans_delete_ail(
* If the file system is not being shutdown, we are in
* serious trouble if we get to this stage.
*/
- if (XFS_FORCED_SHUTDOWN(mp))
- spin_unlock(&mp->m_ail_lock);
- else {
+ struct xfs_mount *mp = ailp->xa_mount;
+
+ spin_unlock(&ailp->xa_lock);
+ if (!XFS_FORCED_SHUTDOWN(mp)) {
xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp,
"%s: attempting to delete a log item that is not in the AIL",
__func__);
- spin_unlock(&mp->m_ail_lock);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
}
}
@@ -448,56 +554,6 @@ xfs_trans_delete_ail(
/*
- * Return the item in the AIL with the smallest lsn.
- * Return the current tree generation number for use
- * in calls to xfs_trans_next_ail().
- */
-xfs_log_item_t *
-xfs_trans_first_ail(
- xfs_mount_t *mp,
- int *gen)
-{
- xfs_log_item_t *lip;
-
- lip = xfs_ail_min(&mp->m_ail);
- *gen = (int)mp->m_ail.xa_gen;
-
- return lip;
-}
-
-/*
- * If the generation count of the tree has not changed since the
- * caller last took something from the AIL, then return the elmt
- * in the tree which follows the one given. If the count has changed,
- * then return the minimum elmt of the AIL and bump the restarts counter
- * if one is given.
- */
-xfs_log_item_t *
-xfs_trans_next_ail(
- xfs_mount_t *mp,
- xfs_log_item_t *lip,
- int *gen,
- int *restarts)
-{
- xfs_log_item_t *nlip;
-
- ASSERT(mp && lip && gen);
- if (mp->m_ail.xa_gen == *gen) {
- nlip = xfs_ail_next(&mp->m_ail, lip);
- } else {
- nlip = xfs_ail_min(&mp->m_ail);
- *gen = (int)mp->m_ail.xa_gen;
- if (restarts != NULL) {
- XFS_STATS_INC(xs_push_ail_restarts);
- (*restarts)++;
- }
- }
-
- return (nlip);
-}
-
-
-/*
* The active item list (AIL) is a doubly linked list of log
* items sorted by ascending lsn. The base of the list is
* a forw/back pointer pair embedded in the xfs mount structure.
@@ -515,15 +571,35 @@ int
xfs_trans_ail_init(
xfs_mount_t *mp)
{
- INIT_LIST_HEAD(&mp->m_ail.xa_ail);
- return xfsaild_start(mp);
+ struct xfs_ail *ailp;
+ int error;
+
+ ailp = kmem_zalloc(sizeof(struct xfs_ail), KM_MAYFAIL);
+ if (!ailp)
+ return ENOMEM;
+
+ ailp->xa_mount = mp;
+ INIT_LIST_HEAD(&ailp->xa_ail);
+ spin_lock_init(&ailp->xa_lock);
+ error = xfsaild_start(ailp);
+ if (error)
+ goto out_free_ailp;
+ mp->m_ail = ailp;
+ return 0;
+
+out_free_ailp:
+ kmem_free(ailp);
+ return error;
}
void
xfs_trans_ail_destroy(
xfs_mount_t *mp)
{
- xfsaild_stop(mp);
+ struct xfs_ail *ailp = mp->m_ail;
+
+ xfsaild_stop(ailp);
+ kmem_free(ailp);
}
/*
@@ -534,7 +610,7 @@ xfs_trans_ail_destroy(
*/
STATIC void
xfs_ail_insert(
- xfs_ail_t *ailp,
+ struct xfs_ail *ailp,
xfs_log_item_t *lip)
/* ARGSUSED */
{
@@ -568,7 +644,7 @@ xfs_ail_insert(
/*ARGSUSED*/
STATIC xfs_log_item_t *
xfs_ail_delete(
- xfs_ail_t *ailp,
+ struct xfs_ail *ailp,
xfs_log_item_t *lip)
/* ARGSUSED */
{
@@ -585,7 +661,7 @@ xfs_ail_delete(
*/
STATIC xfs_log_item_t *
xfs_ail_min(
- xfs_ail_t *ailp)
+ struct xfs_ail *ailp)
/* ARGSUSED */
{
if (list_empty(&ailp->xa_ail))
@@ -601,7 +677,7 @@ xfs_ail_min(
*/
STATIC xfs_log_item_t *
xfs_ail_next(
- xfs_ail_t *ailp,
+ struct xfs_ail *ailp,
xfs_log_item_t *lip)
/* ARGSUSED */
{
@@ -617,7 +693,7 @@ xfs_ail_next(
*/
STATIC void
xfs_ail_check(
- xfs_ail_t *ailp,
+ struct xfs_ail *ailp,
xfs_log_item_t *lip)
{
xfs_log_item_t *prev_lip;
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 4e855b5ced66..8ee2f8c8b0a6 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -527,9 +527,8 @@ xfs_trans_brelse(xfs_trans_t *tp,
lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
if (lip->li_type == XFS_LI_BUF) {
bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*);
- xfs_trans_unlocked_item(
- bip->bli_item.li_mountp,
- lip);
+ xfs_trans_unlocked_item(bip->bli_item.li_ailp,
+ lip);
}
}
xfs_buf_relse(bp);
@@ -626,7 +625,7 @@ xfs_trans_brelse(xfs_trans_t *tp,
* tell the AIL that the buffer is being unlocked.
*/
if (bip != NULL) {
- xfs_trans_unlocked_item(bip->bli_item.li_mountp,
+ xfs_trans_unlocked_item(bip->bli_item.li_ailp,
(xfs_log_item_t*)bip);
}
diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c
index 3c666e8317f8..e110bf57d7f4 100644
--- a/fs/xfs/xfs_trans_item.c
+++ b/fs/xfs/xfs_trans_item.c
@@ -22,6 +22,14 @@
#include "xfs_inum.h"
#include "xfs_trans.h"
#include "xfs_trans_priv.h"
+/* XXX: from here down needed until struct xfs_trans has it's own ailp */
+#include "xfs_bit.h"
+#include "xfs_buf_item.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_dir2.h"
+#include "xfs_dmapi.h"
+#include "xfs_mount.h"
STATIC int xfs_trans_unlock_chunk(xfs_log_item_chunk_t *,
int, int, xfs_lsn_t);
@@ -79,6 +87,7 @@ xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip)
lidp->lid_size = 0;
lip->li_desc = lidp;
lip->li_mountp = tp->t_mountp;
+ lip->li_ailp = tp->t_mountp->m_ail;
return lidp;
}
@@ -120,6 +129,7 @@ xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip)
lidp->lid_size = 0;
lip->li_desc = lidp;
lip->li_mountp = tp->t_mountp;
+ lip->li_ailp = tp->t_mountp->m_ail;
return lidp;
}
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 3c748c456ed4..73e2ad397432 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -44,25 +44,93 @@ xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp,
xfs_extlen_t idx);
/*
- * From xfs_trans_ail.c
+ * AIL traversal cursor.
+ *
+ * Rather than using a generation number for detecting changes in the ail, use
+ * a cursor that is protected by the ail lock. The aild cursor exists in the
+ * struct xfs_ail, but other traversals can declare it on the stack and link it
+ * to the ail list.
+ *
+ * When an object is deleted from or moved int the AIL, the cursor list is
+ * searched to see if the object is a designated cursor item. If it is, it is
+ * deleted from the cursor so that the next time the cursor is used traversal
+ * will return to the start.
+ *
+ * This means a traversal colliding with a removal will cause a restart of the
+ * list scan, rather than any insertion or deletion anywhere in the list. The
+ * low bit of the item pointer is set if the cursor has been invalidated so
+ * that we can tell the difference between invalidation and reaching the end
+ * of the list to trigger traversal restarts.
*/
-void xfs_trans_update_ail(struct xfs_mount *mp,
- struct xfs_log_item *lip, xfs_lsn_t lsn)
- __releases(mp->m_ail_lock);
-void xfs_trans_delete_ail(struct xfs_mount *mp,
- struct xfs_log_item *lip)
- __releases(mp->m_ail_lock);
-struct xfs_log_item *xfs_trans_first_ail(struct xfs_mount *, int *);
-struct xfs_log_item *xfs_trans_next_ail(struct xfs_mount *,
- struct xfs_log_item *, int *, int *);
+struct xfs_ail_cursor {
+ struct xfs_ail_cursor *next;
+ struct xfs_log_item *item;
+};
+/*
+ * Private AIL structures.
+ *
+ * Eventually we need to drive the locking in here as well.
+ */
+struct xfs_ail {
+ struct xfs_mount *xa_mount;
+ struct list_head xa_ail;
+ uint xa_gen;
+ struct task_struct *xa_task;
+ xfs_lsn_t xa_target;
+ struct xfs_ail_cursor xa_cursors;
+ spinlock_t xa_lock;
+};
/*
- * AIL push thread support
+ * From xfs_trans_ail.c
*/
-long xfsaild_push(struct xfs_mount *, xfs_lsn_t *);
-void xfsaild_wakeup(struct xfs_mount *, xfs_lsn_t);
-int xfsaild_start(struct xfs_mount *);
-void xfsaild_stop(struct xfs_mount *);
+void xfs_trans_ail_update(struct xfs_ail *ailp,
+ struct xfs_log_item *lip, xfs_lsn_t lsn)
+ __releases(ailp->xa_lock);
+void xfs_trans_ail_delete(struct xfs_ail *ailp,
+ struct xfs_log_item *lip)
+ __releases(ailp->xa_lock);
+void xfs_trans_ail_push(struct xfs_ail *, xfs_lsn_t);
+void xfs_trans_unlocked_item(struct xfs_ail *,
+ xfs_log_item_t *);
+
+xfs_lsn_t xfs_trans_ail_tail(struct xfs_ail *ailp);
+
+struct xfs_log_item *xfs_trans_ail_cursor_first(struct xfs_ail *ailp,
+ struct xfs_ail_cursor *cur,
+ xfs_lsn_t lsn);
+struct xfs_log_item *xfs_trans_ail_cursor_next(struct xfs_ail *ailp,
+ struct xfs_ail_cursor *cur);
+void xfs_trans_ail_cursor_done(struct xfs_ail *ailp,
+ struct xfs_ail_cursor *cur);
+
+long xfsaild_push(struct xfs_ail *, xfs_lsn_t *);
+void xfsaild_wakeup(struct xfs_ail *, xfs_lsn_t);
+int xfsaild_start(struct xfs_ail *);
+void xfsaild_stop(struct xfs_ail *);
+#if BITS_PER_LONG != 64
+static inline void
+xfs_trans_ail_copy_lsn(
+ struct xfs_ail *ailp,
+ xfs_lsn_t *dst,
+ xfs_lsn_t *src)
+{
+ ASSERT(sizeof(xfs_lsn_t) == 8); /* don't lock if it shrinks */
+ spin_lock(&ailp->xa_lock);
+ *dst = *src;
+ spin_unlock(&ailp->xa_lock);
+}
+#else
+static inline void
+xfs_trans_ail_copy_lsn(
+ struct xfs_ail *ailp,
+ xfs_lsn_t *dst,
+ xfs_lsn_t *src)
+{
+ ASSERT(sizeof(xfs_lsn_t) == 8);
+ *dst = *src;
+}
+#endif
#endif /* __XFS_TRANS_PRIV_H__ */
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 35d4d414bcc2..771144932ab4 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -172,6 +172,12 @@ xfs_dir_ialloc(
*ipp = NULL;
return code;
}
+
+ /*
+ * transaction commit worked ok so we can drop the extra ticket
+ * reference that we gained in xfs_trans_dup()
+ */
+ xfs_log_ticket_put(tp->t_ticket);
code = xfs_trans_reserve(tp, 0, log_res, 0,
XFS_TRANS_PERM_LOG_RES, log_count);
/*
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 439dd3939dda..305d9f3948e0 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -49,71 +49,15 @@
#include "xfs_extfree_item.h"
#include "xfs_acl.h"
#include "xfs_attr.h"
-#include "xfs_clnt.h"
#include "xfs_mru_cache.h"
#include "xfs_filestream.h"
#include "xfs_fsops.h"
#include "xfs_vnodeops.h"
#include "xfs_vfsops.h"
#include "xfs_utils.h"
+#include "xfs_sync.h"
-STATIC void
-xfs_quiesce_fs(
- xfs_mount_t *mp)
-{
- int count = 0, pincount;
-
- xfs_flush_buftarg(mp->m_ddev_targp, 0);
- xfs_finish_reclaim_all(mp, 0);
-
- /* This loop must run at least twice.
- * The first instance of the loop will flush
- * most meta data but that will generate more
- * meta data (typically directory updates).
- * Which then must be flushed and logged before
- * we can write the unmount record.
- */
- do {
- xfs_syncsub(mp, SYNC_INODE_QUIESCE, NULL);
- pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
- if (!pincount) {
- delay(50);
- count++;
- }
- } while (count < 2);
-}
-
-/*
- * Second stage of a quiesce. The data is already synced, now we have to take
- * care of the metadata. New transactions are already blocked, so we need to
- * wait for any remaining transactions to drain out before proceding.
- */
-void
-xfs_attr_quiesce(
- xfs_mount_t *mp)
-{
- int error = 0;
-
- /* wait for all modifications to complete */
- while (atomic_read(&mp->m_active_trans) > 0)
- delay(100);
-
- /* flush inodes and push all remaining buffers out to disk */
- xfs_quiesce_fs(mp);
-
- ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0);
-
- /* Push the superblock and write an unmount record */
- error = xfs_log_sbcount(mp, 1);
- if (error)
- xfs_fs_cmn_err(CE_WARN, mp,
- "xfs_attr_quiesce: failed to log sb changes. "
- "Frozen image may not be consistent.");
- xfs_log_unmount_write(mp);
- xfs_unmountfs_writesb(mp);
-}
-
/*
* xfs_unmount_flush implements a set of flush operation on special
* inodes, which are needed as a separate set of operations so that
@@ -196,562 +140,3 @@ fscorrupt_out2:
return XFS_ERROR(EFSCORRUPTED);
}
-/*
- * xfs_sync flushes any pending I/O to file system vfsp.
- *
- * This routine is called by vfs_sync() to make sure that things make it
- * out to disk eventually, on sync() system calls to flush out everything,
- * and when the file system is unmounted. For the vfs_sync() case, all
- * we really need to do is sync out the log to make all of our meta-data
- * updates permanent (except for timestamps). For calls from pflushd(),
- * dirty pages are kept moving by calling pdflush() on the inodes
- * containing them. We also flush the inodes that we can lock without
- * sleeping and the superblock if we can lock it without sleeping from
- * vfs_sync() so that items at the tail of the log are always moving out.
- *
- * Flags:
- * SYNC_BDFLUSH - We're being called from vfs_sync() so we don't want
- * to sleep if we can help it. All we really need
- * to do is ensure that the log is synced at least
- * periodically. We also push the inodes and
- * superblock if we can lock them without sleeping
- * and they are not pinned.
- * SYNC_ATTR - We need to flush the inodes. If SYNC_BDFLUSH is not
- * set, then we really want to lock each inode and flush
- * it.
- * SYNC_WAIT - All the flushes that take place in this call should
- * be synchronous.
- * SYNC_DELWRI - This tells us to push dirty pages associated with
- * inodes. SYNC_WAIT and SYNC_BDFLUSH are used to
- * determine if they should be flushed sync, async, or
- * delwri.
- * SYNC_CLOSE - This flag is passed when the system is being
- * unmounted. We should sync and invalidate everything.
- * SYNC_FSDATA - This indicates that the caller would like to make
- * sure the superblock is safe on disk. We can ensure
- * this by simply making sure the log gets flushed
- * if SYNC_BDFLUSH is set, and by actually writing it
- * out otherwise.
- * SYNC_IOWAIT - The caller wants us to wait for all data I/O to complete
- * before we return (including direct I/O). Forms the drain
- * side of the write barrier needed to safely quiesce the
- * filesystem.
- *
- */
-int
-xfs_sync(
- xfs_mount_t *mp,
- int flags)
-{
- int error;
-
- /*
- * Get the Quota Manager to flush the dquots.
- *
- * If XFS quota support is not enabled or this filesystem
- * instance does not use quotas XFS_QM_DQSYNC will always
- * return zero.
- */
- error = XFS_QM_DQSYNC(mp, flags);
- if (error) {
- /*
- * If we got an IO error, we will be shutting down.
- * So, there's nothing more for us to do here.
- */
- ASSERT(error != EIO || XFS_FORCED_SHUTDOWN(mp));
- if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(error);
- }
-
- if (flags & SYNC_IOWAIT)
- xfs_filestream_flush(mp);
-
- return xfs_syncsub(mp, flags, NULL);
-}
-
-/*
- * xfs sync routine for internal use
- *
- * This routine supports all of the flags defined for the generic vfs_sync
- * interface as explained above under xfs_sync.
- *
- */
-int
-xfs_sync_inodes(
- xfs_mount_t *mp,
- int flags,
- int *bypassed)
-{
- xfs_inode_t *ip = NULL;
- struct inode *vp = NULL;
- int error;
- int last_error;
- uint64_t fflag;
- uint lock_flags;
- uint base_lock_flags;
- boolean_t mount_locked;
- boolean_t vnode_refed;
- int preempt;
- xfs_iptr_t *ipointer;
-#ifdef DEBUG
- boolean_t ipointer_in = B_FALSE;
-
-#define IPOINTER_SET ipointer_in = B_TRUE
-#define IPOINTER_CLR ipointer_in = B_FALSE
-#else
-#define IPOINTER_SET
-#define IPOINTER_CLR
-#endif
-
-
-/* Insert a marker record into the inode list after inode ip. The list
- * must be locked when this is called. After the call the list will no
- * longer be locked.
- */
-#define IPOINTER_INSERT(ip, mp) { \
- ASSERT(ipointer_in == B_FALSE); \
- ipointer->ip_mnext = ip->i_mnext; \
- ipointer->ip_mprev = ip; \
- ip->i_mnext = (xfs_inode_t *)ipointer; \
- ipointer->ip_mnext->i_mprev = (xfs_inode_t *)ipointer; \
- preempt = 0; \
- XFS_MOUNT_IUNLOCK(mp); \
- mount_locked = B_FALSE; \
- IPOINTER_SET; \
- }
-
-/* Remove the marker from the inode list. If the marker was the only item
- * in the list then there are no remaining inodes and we should zero out
- * the whole list. If we are the current head of the list then move the head
- * past us.
- */
-#define IPOINTER_REMOVE(ip, mp) { \
- ASSERT(ipointer_in == B_TRUE); \
- if (ipointer->ip_mnext != (xfs_inode_t *)ipointer) { \
- ip = ipointer->ip_mnext; \
- ip->i_mprev = ipointer->ip_mprev; \
- ipointer->ip_mprev->i_mnext = ip; \
- if (mp->m_inodes == (xfs_inode_t *)ipointer) { \
- mp->m_inodes = ip; \
- } \
- } else { \
- ASSERT(mp->m_inodes == (xfs_inode_t *)ipointer); \
- mp->m_inodes = NULL; \
- ip = NULL; \
- } \
- IPOINTER_CLR; \
- }
-
-#define XFS_PREEMPT_MASK 0x7f
-
- ASSERT(!(flags & SYNC_BDFLUSH));
-
- if (bypassed)
- *bypassed = 0;
- if (mp->m_flags & XFS_MOUNT_RDONLY)
- return 0;
- error = 0;
- last_error = 0;
- preempt = 0;
-
- /* Allocate a reference marker */
- ipointer = (xfs_iptr_t *)kmem_zalloc(sizeof(xfs_iptr_t), KM_SLEEP);
-
- fflag = XFS_B_ASYNC; /* default is don't wait */
- if (flags & SYNC_DELWRI)
- fflag = XFS_B_DELWRI;
- if (flags & SYNC_WAIT)
- fflag = 0; /* synchronous overrides all */
-
- base_lock_flags = XFS_ILOCK_SHARED;
- if (flags & (SYNC_DELWRI | SYNC_CLOSE)) {
- /*
- * We need the I/O lock if we're going to call any of
- * the flush/inval routines.
- */
- base_lock_flags |= XFS_IOLOCK_SHARED;
- }
-
- XFS_MOUNT_ILOCK(mp);
-
- ip = mp->m_inodes;
-
- mount_locked = B_TRUE;
- vnode_refed = B_FALSE;
-
- IPOINTER_CLR;
-
- do {
- ASSERT(ipointer_in == B_FALSE);
- ASSERT(vnode_refed == B_FALSE);
-
- lock_flags = base_lock_flags;
-
- /*
- * There were no inodes in the list, just break out
- * of the loop.
- */
- if (ip == NULL) {
- break;
- }
-
- /*
- * We found another sync thread marker - skip it
- */
- if (ip->i_mount == NULL) {
- ip = ip->i_mnext;
- continue;
- }
-
- vp = VFS_I(ip);
-
- /*
- * If the vnode is gone then this is being torn down,
- * call reclaim if it is flushed, else let regular flush
- * code deal with it later in the loop.
- */
-
- if (vp == NULL) {
- /* Skip ones already in reclaim */
- if (ip->i_flags & XFS_IRECLAIM) {
- ip = ip->i_mnext;
- continue;
- }
- if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) {
- ip = ip->i_mnext;
- } else if ((xfs_ipincount(ip) == 0) &&
- xfs_iflock_nowait(ip)) {
- IPOINTER_INSERT(ip, mp);
-
- xfs_finish_reclaim(ip, 1,
- XFS_IFLUSH_DELWRI_ELSE_ASYNC);
-
- XFS_MOUNT_ILOCK(mp);
- mount_locked = B_TRUE;
- IPOINTER_REMOVE(ip, mp);
- } else {
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- ip = ip->i_mnext;
- }
- continue;
- }
-
- if (VN_BAD(vp)) {
- ip = ip->i_mnext;
- continue;
- }
-
- if (XFS_FORCED_SHUTDOWN(mp) && !(flags & SYNC_CLOSE)) {
- XFS_MOUNT_IUNLOCK(mp);
- kmem_free(ipointer);
- return 0;
- }
-
- /*
- * Try to lock without sleeping. We're out of order with
- * the inode list lock here, so if we fail we need to drop
- * the mount lock and try again. If we're called from
- * bdflush() here, then don't bother.
- *
- * The inode lock here actually coordinates with the
- * almost spurious inode lock in xfs_ireclaim() to prevent
- * the vnode we handle here without a reference from
- * being freed while we reference it. If we lock the inode
- * while it's on the mount list here, then the spurious inode
- * lock in xfs_ireclaim() after the inode is pulled from
- * the mount list will sleep until we release it here.
- * This keeps the vnode from being freed while we reference
- * it.
- */
- if (xfs_ilock_nowait(ip, lock_flags) == 0) {
- if (vp == NULL) {
- ip = ip->i_mnext;
- continue;
- }
-
- vp = vn_grab(vp);
- if (vp == NULL) {
- ip = ip->i_mnext;
- continue;
- }
-
- IPOINTER_INSERT(ip, mp);
- xfs_ilock(ip, lock_flags);
-
- ASSERT(vp == VFS_I(ip));
- ASSERT(ip->i_mount == mp);
-
- vnode_refed = B_TRUE;
- }
-
- /* From here on in the loop we may have a marker record
- * in the inode list.
- */
-
- /*
- * If we have to flush data or wait for I/O completion
- * we need to drop the ilock that we currently hold.
- * If we need to drop the lock, insert a marker if we
- * have not already done so.
- */
- if ((flags & (SYNC_CLOSE|SYNC_IOWAIT)) ||
- ((flags & SYNC_DELWRI) && VN_DIRTY(vp))) {
- if (mount_locked) {
- IPOINTER_INSERT(ip, mp);
- }
- xfs_iunlock(ip, XFS_ILOCK_SHARED);
-
- if (flags & SYNC_CLOSE) {
- /* Shutdown case. Flush and invalidate. */
- if (XFS_FORCED_SHUTDOWN(mp))
- xfs_tosspages(ip, 0, -1,
- FI_REMAPF);
- else
- error = xfs_flushinval_pages(ip,
- 0, -1, FI_REMAPF);
- } else if ((flags & SYNC_DELWRI) && VN_DIRTY(vp)) {
- error = xfs_flush_pages(ip, 0,
- -1, fflag, FI_NONE);
- }
-
- /*
- * When freezing, we need to wait ensure all I/O (including direct
- * I/O) is complete to ensure no further data modification can take
- * place after this point
- */
- if (flags & SYNC_IOWAIT)
- vn_iowait(ip);
-
- xfs_ilock(ip, XFS_ILOCK_SHARED);
- }
-
- if ((flags & SYNC_ATTR) &&
- (ip->i_update_core ||
- (ip->i_itemp && ip->i_itemp->ili_format.ilf_fields))) {
- if (mount_locked)
- IPOINTER_INSERT(ip, mp);
-
- if (flags & SYNC_WAIT) {
- xfs_iflock(ip);
- error = xfs_iflush(ip, XFS_IFLUSH_SYNC);
-
- /*
- * If we can't acquire the flush lock, then the inode
- * is already being flushed so don't bother waiting.
- *
- * If we can lock it then do a delwri flush so we can
- * combine multiple inode flushes in each disk write.
- */
- } else if (xfs_iflock_nowait(ip)) {
- error = xfs_iflush(ip, XFS_IFLUSH_DELWRI);
- } else if (bypassed) {
- (*bypassed)++;
- }
- }
-
- if (lock_flags != 0) {
- xfs_iunlock(ip, lock_flags);
- }
-
- if (vnode_refed) {
- /*
- * If we had to take a reference on the vnode
- * above, then wait until after we've unlocked
- * the inode to release the reference. This is
- * because we can be already holding the inode
- * lock when IRELE() calls xfs_inactive().
- *
- * Make sure to drop the mount lock before calling
- * IRELE() so that we don't trip over ourselves if
- * we have to go for the mount lock again in the
- * inactive code.
- */
- if (mount_locked) {
- IPOINTER_INSERT(ip, mp);
- }
-
- IRELE(ip);
-
- vnode_refed = B_FALSE;
- }
-
- if (error) {
- last_error = error;
- }
-
- /*
- * bail out if the filesystem is corrupted.
- */
- if (error == EFSCORRUPTED) {
- if (!mount_locked) {
- XFS_MOUNT_ILOCK(mp);
- IPOINTER_REMOVE(ip, mp);
- }
- XFS_MOUNT_IUNLOCK(mp);
- ASSERT(ipointer_in == B_FALSE);
- kmem_free(ipointer);
- return XFS_ERROR(error);
- }
-
- /* Let other threads have a chance at the mount lock
- * if we have looped many times without dropping the
- * lock.
- */
- if ((++preempt & XFS_PREEMPT_MASK) == 0) {
- if (mount_locked) {
- IPOINTER_INSERT(ip, mp);
- }
- }
-
- if (mount_locked == B_FALSE) {
- XFS_MOUNT_ILOCK(mp);
- mount_locked = B_TRUE;
- IPOINTER_REMOVE(ip, mp);
- continue;
- }
-
- ASSERT(ipointer_in == B_FALSE);
- ip = ip->i_mnext;
-
- } while (ip != mp->m_inodes);
-
- XFS_MOUNT_IUNLOCK(mp);
-
- ASSERT(ipointer_in == B_FALSE);
-
- kmem_free(ipointer);
- return XFS_ERROR(last_error);
-}
-
-/*
- * xfs sync routine for internal use
- *
- * This routine supports all of the flags defined for the generic vfs_sync
- * interface as explained above under xfs_sync.
- *
- */
-int
-xfs_syncsub(
- xfs_mount_t *mp,
- int flags,
- int *bypassed)
-{
- int error = 0;
- int last_error = 0;
- uint log_flags = XFS_LOG_FORCE;
- xfs_buf_t *bp;
- xfs_buf_log_item_t *bip;
-
- /*
- * Sync out the log. This ensures that the log is periodically
- * flushed even if there is not enough activity to fill it up.
- */
- if (flags & SYNC_WAIT)
- log_flags |= XFS_LOG_SYNC;
-
- xfs_log_force(mp, (xfs_lsn_t)0, log_flags);
-
- if (flags & (SYNC_ATTR|SYNC_DELWRI)) {
- if (flags & SYNC_BDFLUSH)
- xfs_finish_reclaim_all(mp, 1);
- else
- error = xfs_sync_inodes(mp, flags, bypassed);
- }
-
- /*
- * Flushing out dirty data above probably generated more
- * log activity, so if this isn't vfs_sync() then flush
- * the log again.
- */
- if (flags & SYNC_DELWRI) {
- xfs_log_force(mp, (xfs_lsn_t)0, log_flags);
- }
-
- if (flags & SYNC_FSDATA) {
- /*
- * If this is vfs_sync() then only sync the superblock
- * if we can lock it without sleeping and it is not pinned.
- */
- if (flags & SYNC_BDFLUSH) {
- bp = xfs_getsb(mp, XFS_BUF_TRYLOCK);
- if (bp != NULL) {
- bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*);
- if ((bip != NULL) &&
- xfs_buf_item_dirty(bip)) {
- if (!(XFS_BUF_ISPINNED(bp))) {
- XFS_BUF_ASYNC(bp);
- error = xfs_bwrite(mp, bp);
- } else {
- xfs_buf_relse(bp);
- }
- } else {
- xfs_buf_relse(bp);
- }
- }
- } else {
- bp = xfs_getsb(mp, 0);
- /*
- * If the buffer is pinned then push on the log so
- * we won't get stuck waiting in the write for
- * someone, maybe ourselves, to flush the log.
- * Even though we just pushed the log above, we
- * did not have the superblock buffer locked at
- * that point so it can become pinned in between
- * there and here.
- */
- if (XFS_BUF_ISPINNED(bp))
- xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
- if (flags & SYNC_WAIT)
- XFS_BUF_UNASYNC(bp);
- else
- XFS_BUF_ASYNC(bp);
- error = xfs_bwrite(mp, bp);
- }
- if (error) {
- last_error = error;
- }
- }
-
- /*
- * Now check to see if the log needs a "dummy" transaction.
- */
- if (!(flags & SYNC_REMOUNT) && xfs_log_need_covered(mp)) {
- xfs_trans_t *tp;
- xfs_inode_t *ip;
-
- /*
- * Put a dummy transaction in the log to tell
- * recovery that all others are OK.
- */
- tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1);
- if ((error = xfs_trans_reserve(tp, 0,
- XFS_ICHANGE_LOG_RES(mp),
- 0, 0, 0))) {
- xfs_trans_cancel(tp, 0);
- return error;
- }
-
- ip = mp->m_rootip;
- xfs_ilock(ip, XFS_ILOCK_EXCL);
-
- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
- xfs_trans_ihold(tp, ip);
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- error = xfs_trans_commit(tp, 0);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- xfs_log_force(mp, (xfs_lsn_t)0, log_flags);
- }
-
- /*
- * When shutting down, we need to insure that the AIL is pushed
- * to disk or the filesystem can appear corrupt from the PROM.
- */
- if ((flags & (SYNC_CLOSE|SYNC_WAIT)) == (SYNC_CLOSE|SYNC_WAIT)) {
- XFS_bflush(mp->m_ddev_targp);
- if (mp->m_rtdev_targp) {
- XFS_bflush(mp->m_rtdev_targp);
- }
- }
-
- return XFS_ERROR(last_error);
-}
diff --git a/fs/xfs/xfs_vfsops.h b/fs/xfs/xfs_vfsops.h
index a74b05087da4..6b8e0b52b95e 100644
--- a/fs/xfs/xfs_vfsops.h
+++ b/fs/xfs/xfs_vfsops.h
@@ -8,9 +8,7 @@ struct kstatfs;
struct xfs_mount;
struct xfs_mount_args;
-int xfs_sync(struct xfs_mount *mp, int flags);
void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname,
int lnnum);
-void xfs_attr_quiesce(struct xfs_mount *mp);
#endif /* _XFS_VFSOPS_H */
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 8b6812f66a15..0574aadc4d3c 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -79,8 +79,7 @@ int
xfs_setattr(
struct xfs_inode *ip,
struct iattr *iattr,
- int flags,
- cred_t *credp)
+ int flags)
{
xfs_mount_t *mp = ip->i_mount;
struct inode *inode = VFS_I(ip);
@@ -233,10 +232,6 @@ xfs_setattr(
/*
* Change file ownership. Must be the owner or privileged.
- * If the system was configured with the "restricted_chown"
- * option, the owner is not permitted to give away the file,
- * and can change the group id only to a group of which he
- * or she is a member.
*/
if (mask & (ATTR_UID|ATTR_GID)) {
/*
@@ -260,9 +255,8 @@ xfs_setattr(
* shall be equal to either the group ID or one of the
* supplementary group IDs of the calling process.
*/
- if (restricted_chown &&
- (iuid != uid || (igid != gid &&
- !in_group_p((gid_t)gid))) &&
+ if ((iuid != uid ||
+ (igid != gid && !in_group_p((gid_t)gid))) &&
!capable(CAP_CHOWN)) {
code = XFS_ERROR(EPERM);
goto error_return;
@@ -456,10 +450,6 @@ xfs_setattr(
/*
* Change file ownership. Must be the owner or privileged.
- * If the system was configured with the "restricted_chown"
- * option, the owner is not permitted to give away the file,
- * and can change the group id only to a group of which he
- * or she is a member.
*/
if (mask & (ATTR_UID|ATTR_GID)) {
/*
@@ -1029,6 +1019,12 @@ xfs_inactive_symlink_rmt(
goto error0;
}
/*
+ * transaction commit worked ok so we can drop the extra ticket
+ * reference that we gained in xfs_trans_dup()
+ */
+ xfs_log_ticket_put(tp->t_ticket);
+
+ /*
* Remove the memory for extent descriptions (just bookkeeping).
*/
if (ip->i_df.if_bytes)
@@ -2009,7 +2005,7 @@ xfs_remove(
goto out_bmap_cancel;
/*
- * Drop the link from dp to ip.
+ * Drop the "." link from ip to self.
*/
error = xfs_droplink(tp, ip);
if (error)
@@ -2024,7 +2020,7 @@ xfs_remove(
}
/*
- * Drop the "." link from ip to self.
+ * Drop the link from dp to ip.
*/
error = xfs_droplink(tp, ip);
if (error)
@@ -2833,122 +2829,10 @@ xfs_reclaim(
if (!ip->i_update_core && (ip->i_itemp == NULL)) {
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_iflock(ip);
- return xfs_finish_reclaim(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC);
- } else {
- xfs_mount_t *mp = ip->i_mount;
-
- /* Protect sync and unpin from us */
- XFS_MOUNT_ILOCK(mp);
- spin_lock(&ip->i_flags_lock);
- __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
- VFS_I(ip)->i_private = NULL;
- ip->i_vnode = NULL;
- spin_unlock(&ip->i_flags_lock);
- list_add_tail(&ip->i_reclaim, &mp->m_del_inodes);
- XFS_MOUNT_IUNLOCK(mp);
- }
- return 0;
-}
-
-int
-xfs_finish_reclaim(
- xfs_inode_t *ip,
- int locked,
- int sync_mode)
-{
- xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
- struct inode *vp = VFS_I(ip);
-
- if (vp && VN_BAD(vp))
- goto reclaim;
-
- /* The hash lock here protects a thread in xfs_iget_core from
- * racing with us on linking the inode back with a vnode.
- * Once we have the XFS_IRECLAIM flag set it will not touch
- * us.
- */
- write_lock(&pag->pag_ici_lock);
- spin_lock(&ip->i_flags_lock);
- if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
- (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) && vp == NULL)) {
- spin_unlock(&ip->i_flags_lock);
- write_unlock(&pag->pag_ici_lock);
- if (locked) {
- xfs_ifunlock(ip);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- }
- return 1;
- }
- __xfs_iflags_set(ip, XFS_IRECLAIM);
- spin_unlock(&ip->i_flags_lock);
- write_unlock(&pag->pag_ici_lock);
- xfs_put_perag(ip->i_mount, pag);
-
- /*
- * If the inode is still dirty, then flush it out. If the inode
- * is not in the AIL, then it will be OK to flush it delwri as
- * long as xfs_iflush() does not keep any references to the inode.
- * We leave that decision up to xfs_iflush() since it has the
- * knowledge of whether it's OK to simply do a delwri flush of
- * the inode or whether we need to wait until the inode is
- * pulled from the AIL.
- * We get the flush lock regardless, though, just to make sure
- * we don't free it while it is being flushed.
- */
- if (!locked) {
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_iflock(ip);
+ xfs_iflags_set(ip, XFS_IRECLAIMABLE);
+ return xfs_reclaim_inode(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC);
}
-
- /*
- * In the case of a forced shutdown we rely on xfs_iflush() to
- * wait for the inode to be unpinned before returning an error.
- */
- if (xfs_iflush(ip, sync_mode) == 0) {
- /* synchronize with xfs_iflush_done */
- xfs_iflock(ip);
- xfs_ifunlock(ip);
- }
-
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
- reclaim:
- xfs_ireclaim(ip);
- return 0;
-}
-
-int
-xfs_finish_reclaim_all(xfs_mount_t *mp, int noblock)
-{
- int purged;
- xfs_inode_t *ip, *n;
- int done = 0;
-
- while (!done) {
- purged = 0;
- XFS_MOUNT_ILOCK(mp);
- list_for_each_entry_safe(ip, n, &mp->m_del_inodes, i_reclaim) {
- if (noblock) {
- if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0)
- continue;
- if (xfs_ipincount(ip) ||
- !xfs_iflock_nowait(ip)) {
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- continue;
- }
- }
- XFS_MOUNT_IUNLOCK(mp);
- if (xfs_finish_reclaim(ip, noblock,
- XFS_IFLUSH_DELWRI_ELSE_ASYNC))
- delay(1);
- purged = 1;
- break;
- }
-
- done = !purged;
- }
-
- XFS_MOUNT_IUNLOCK(mp);
+ xfs_inode_set_reclaim_tag(ip);
return 0;
}
@@ -3474,7 +3358,6 @@ xfs_change_file_space(
int cmd,
xfs_flock64_t *bf,
xfs_off_t offset,
- cred_t *credp,
int attr_flags)
{
xfs_mount_t *mp = ip->i_mount;
@@ -3562,7 +3445,7 @@ xfs_change_file_space(
iattr.ia_valid = ATTR_SIZE;
iattr.ia_size = startoffset;
- error = xfs_setattr(ip, &iattr, attr_flags, credp);
+ error = xfs_setattr(ip, &iattr, attr_flags);
if (error)
return error;
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index e932a96bec54..bf9fd24fa5eb 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -15,8 +15,7 @@ struct xfs_iomap;
int xfs_open(struct xfs_inode *ip);
-int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags,
- struct cred *credp);
+int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags);
#define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */
#define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */
#define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */
@@ -28,24 +27,23 @@ int xfs_inactive(struct xfs_inode *ip);
int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
struct xfs_inode **ipp, struct xfs_name *ci_name);
int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode,
- xfs_dev_t rdev, struct xfs_inode **ipp, struct cred *credp);
+ xfs_dev_t rdev, struct xfs_inode **ipp, cred_t *credp);
int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
struct xfs_inode *ip);
int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
struct xfs_name *target_name);
int xfs_mkdir(struct xfs_inode *dp, struct xfs_name *dir_name,
- mode_t mode, struct xfs_inode **ipp, struct cred *credp);
+ mode_t mode, struct xfs_inode **ipp, cred_t *credp);
int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize,
xfs_off_t *offset, filldir_t filldir);
int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,
const char *target_path, mode_t mode, struct xfs_inode **ipp,
- struct cred *credp);
+ cred_t *credp);
int xfs_inode_flush(struct xfs_inode *ip, int flags);
int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state);
int xfs_reclaim(struct xfs_inode *ip);
int xfs_change_file_space(struct xfs_inode *ip, int cmd,
- xfs_flock64_t *bf, xfs_off_t offset,
- struct cred *credp, int attr_flags);
+ xfs_flock64_t *bf, xfs_off_t offset, int attr_flags);
int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name,
struct xfs_inode *src_ip, struct xfs_inode *target_dp,
struct xfs_name *target_name, struct xfs_inode *target_ip);