summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2010-09-01 10:46:12 +1000
committerStephen Rothwell <sfr@canb.auug.org.au>2010-09-01 10:46:12 +1000
commitdaa77a83c33209717456b4b83a626056a075809a (patch)
treeca35a7ebeba96330ba4e98f17cae4073e28eaec3
parentc2c364d8d7647cf6b488de77e10ac1174433a417 (diff)
parentbc844a4824161d0232eda7787a282b2eec1efcad (diff)
Merge remote branch 'ocfs2/linux-next'
-rw-r--r--fs/ext3/super.c4
-rw-r--r--fs/ext4/super.c8
-rw-r--r--fs/jbd2/journal.c4
-rw-r--r--fs/libfs.c29
-rw-r--r--fs/ocfs2/aops.c9
-rw-r--r--fs/ocfs2/aops.h3
-rw-r--r--fs/ocfs2/blockcheck.c4
-rw-r--r--fs/ocfs2/file.c49
-rw-r--r--fs/ocfs2/inode.c6
-rw-r--r--fs/ocfs2/inode.h11
-rw-r--r--fs/ocfs2/ioctl.c356
-rw-r--r--fs/ocfs2/journal.c9
-rw-r--r--fs/ocfs2/journal.h3
-rw-r--r--fs/ocfs2/mmap.c7
-rw-r--r--fs/ocfs2/ocfs2.h23
-rw-r--r--fs/ocfs2/ocfs2_ioctl.h95
-rw-r--r--fs/ocfs2/refcounttree.c43
-rw-r--r--fs/ocfs2/refcounttree.h7
-rw-r--r--fs/ocfs2/super.c51
-rw-r--r--include/linux/fs.h2
20 files changed, 645 insertions, 78 deletions
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 5dbf4dba03c4..a367dd044280 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1849,8 +1849,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
goto failed_mount;
}
- if (le32_to_cpu(es->s_blocks_count) >
- (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
+ if (generic_check_addressable(sb->s_blocksize_bits,
+ le32_to_cpu(es->s_blocks_count))) {
ext3_msg(sb, KERN_ERR,
"error: filesystem is too large to mount safely");
if (sizeof(sector_t) < 8)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 26147746c272..7f47c366bf15 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2831,15 +2831,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
* Test whether we have more sectors than will fit in sector_t,
* and whether the max offset is addressable by the page cache.
*/
- if ((ext4_blocks_count(es) >
- (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) ||
- (ext4_blocks_count(es) >
- (pgoff_t)(~0ULL) >> (PAGE_CACHE_SHIFT - sb->s_blocksize_bits))) {
+ ret = generic_check_addressable(sb->s_blocksize_bits,
+ ext4_blocks_count(es));
+ if (ret) {
ext4_msg(sb, KERN_ERR, "filesystem"
" too large to mount safely on this system");
if (sizeof(sector_t) < 8)
ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
- ret = -EFBIG;
goto failed_mount;
}
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 0e8014ea6b94..262419f83d80 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1371,6 +1371,10 @@ int jbd2_journal_check_used_features (journal_t *journal, unsigned long compat,
if (!compat && !ro && !incompat)
return 1;
+ /* Load journal superblock if it is not loaded yet. */
+ if (journal->j_format_version == 0 &&
+ journal_get_superblock(journal) != 0)
+ return 0;
if (journal->j_format_version == 1)
return 0;
diff --git a/fs/libfs.c b/fs/libfs.c
index 0a9da95317f7..62baa0387d6e 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -913,6 +913,35 @@ int generic_file_fsync(struct file *file, int datasync)
}
EXPORT_SYMBOL(generic_file_fsync);
+/**
+ * generic_check_addressable - Check addressability of file system
+ * @blocksize_bits: log of file system block size
+ * @num_blocks: number of blocks in file system
+ *
+ * Determine whether a file system with @num_blocks blocks (and a
+ * block size of 2**@blocksize_bits) is addressable by the sector_t
+ * and page cache of the system. Return 0 if so and -EFBIG otherwise.
+ */
+int generic_check_addressable(unsigned blocksize_bits, u64 num_blocks)
+{
+ u64 last_fs_block = num_blocks - 1;
+ u64 last_fs_page =
+ last_fs_block >> (PAGE_CACHE_SHIFT - blocksize_bits);
+
+ if (unlikely(num_blocks == 0))
+ return 0;
+
+ if ((blocksize_bits < 9) || (blocksize_bits > PAGE_CACHE_SHIFT))
+ return -EINVAL;
+
+ if ((last_fs_block > (sector_t)(~0ULL) >> (blocksize_bits - 9)) ||
+ (last_fs_page > (pgoff_t)(~0ULL))) {
+ return -EFBIG;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(generic_check_addressable);
+
/*
* No-op implementation of ->fsync for in-memory filesystems.
*/
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 0de69c9a08be..5cfeee118158 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -883,8 +883,8 @@ struct ocfs2_write_ctxt {
* out in so that future reads from that region will get
* zero's.
*/
- struct page *w_pages[OCFS2_MAX_CTXT_PAGES];
unsigned int w_num_pages;
+ struct page *w_pages[OCFS2_MAX_CTXT_PAGES];
struct page *w_target_page;
/*
@@ -1642,7 +1642,8 @@ static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh,
return ret;
}
-int ocfs2_write_begin_nolock(struct address_space *mapping,
+int ocfs2_write_begin_nolock(struct file *filp,
+ struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata,
struct buffer_head *di_bh, struct page *mmap_page)
@@ -1692,7 +1693,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
mlog_errno(ret);
goto out;
} else if (ret == 1) {
- ret = ocfs2_refcount_cow(inode, di_bh,
+ ret = ocfs2_refcount_cow(inode, filp, di_bh,
wc->w_cpos, wc->w_clen, UINT_MAX);
if (ret) {
mlog_errno(ret);
@@ -1854,7 +1855,7 @@ static int ocfs2_write_begin(struct file *file, struct address_space *mapping,
*/
down_write(&OCFS2_I(inode)->ip_alloc_sem);
- ret = ocfs2_write_begin_nolock(mapping, pos, len, flags, pagep,
+ ret = ocfs2_write_begin_nolock(file, mapping, pos, len, flags, pagep,
fsdata, di_bh, NULL);
if (ret) {
mlog_errno(ret);
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index c48e93ffc513..7606f663da6d 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -48,7 +48,8 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata);
-int ocfs2_write_begin_nolock(struct address_space *mapping,
+int ocfs2_write_begin_nolock(struct file *filp,
+ struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata,
struct buffer_head *di_bh, struct page *mmap_page);
diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c
index ec6d12339593..c7ee03c22226 100644
--- a/fs/ocfs2/blockcheck.c
+++ b/fs/ocfs2/blockcheck.c
@@ -439,7 +439,7 @@ int ocfs2_block_check_validate(void *data, size_t blocksize,
ocfs2_blockcheck_inc_failure(stats);
mlog(ML_ERROR,
- "CRC32 failed: stored: %u, computed %u. Applying ECC.\n",
+ "CRC32 failed: stored: 0x%x, computed 0x%x. Applying ECC.\n",
(unsigned int)check.bc_crc32e, (unsigned int)crc);
/* Ok, try ECC fixups */
@@ -453,7 +453,7 @@ int ocfs2_block_check_validate(void *data, size_t blocksize,
goto out;
}
- mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n",
+ mlog(ML_ERROR, "Fixed CRC32 failed: stored: 0x%x, computed 0x%x\n",
(unsigned int)check.bc_crc32e, (unsigned int)crc);
rc = -EIO;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 81296b4e3646..b714f76b82db 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -36,6 +36,7 @@
#include <linux/writeback.h>
#include <linux/falloc.h>
#include <linux/quotaops.h>
+#include <linux/blkdev.h>
#define MLOG_MASK_PREFIX ML_INODE
#include <cluster/masklog.h>
@@ -63,12 +64,6 @@
#include "buffer_head_io.h"
-static int ocfs2_sync_inode(struct inode *inode)
-{
- filemap_fdatawrite(inode->i_mapping);
- return sync_mapping_buffers(inode->i_mapping);
-}
-
static int ocfs2_init_file_private(struct inode *inode, struct file *file)
{
struct ocfs2_file_private *fp;
@@ -186,12 +181,16 @@ static int ocfs2_sync_file(struct file *file, int datasync)
mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", file, dentry, datasync,
dentry->d_name.len, dentry->d_name.name);
- err = ocfs2_sync_inode(dentry->d_inode);
- if (err)
- goto bail;
-
- if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
+ if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) {
+ /*
+ * We still have to flush drive's caches to get data to the
+ * platter
+ */
+ if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER)
+ blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL,
+ NULL, BLKDEV_IFL_WAIT);
goto bail;
+ }
journal = osb->journal->j_journal;
err = jbd2_journal_force_commit(journal);
@@ -361,7 +360,7 @@ static int ocfs2_cow_file_pos(struct inode *inode,
if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
goto out;
- return ocfs2_refcount_cow(inode, fe_bh, cpos, 1, cpos+1);
+ return ocfs2_refcount_cow(inode, NULL, fe_bh, cpos, 1, cpos+1);
out:
return status;
@@ -904,8 +903,8 @@ static int ocfs2_zero_extend_get_range(struct inode *inode,
zero_clusters = last_cpos - zero_cpos;
if (needs_cow) {
- rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos, zero_clusters,
- UINT_MAX);
+ rc = ocfs2_refcount_cow(inode, NULL, di_bh, zero_cpos,
+ zero_clusters, UINT_MAX);
if (rc) {
mlog_errno(rc);
goto out;
@@ -2053,6 +2052,7 @@ out:
}
static int ocfs2_prepare_inode_for_refcount(struct inode *inode,
+ struct file *file,
loff_t pos, size_t count,
int *meta_level)
{
@@ -2070,7 +2070,7 @@ static int ocfs2_prepare_inode_for_refcount(struct inode *inode,
*meta_level = 1;
- ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX);
+ ret = ocfs2_refcount_cow(inode, file, di_bh, cpos, clusters, UINT_MAX);
if (ret)
mlog_errno(ret);
out:
@@ -2078,7 +2078,7 @@ out:
return ret;
}
-static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
+static int ocfs2_prepare_inode_for_write(struct file *file,
loff_t *ppos,
size_t count,
int appending,
@@ -2086,6 +2086,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
int *has_refcount)
{
int ret = 0, meta_level = 0;
+ struct dentry *dentry = file->f_path.dentry;
struct inode *inode = dentry->d_inode;
loff_t saved_pos, end;
@@ -2141,6 +2142,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
meta_level = -1;
ret = ocfs2_prepare_inode_for_refcount(inode,
+ file,
saved_pos,
count,
&meta_level);
@@ -2255,7 +2257,7 @@ relock:
}
can_do_direct = direct_io;
- ret = ocfs2_prepare_inode_for_write(file->f_path.dentry, ppos,
+ ret = ocfs2_prepare_inode_for_write(file, ppos,
iocb->ki_left, appending,
&can_do_direct, &has_refcount);
if (ret < 0) {
@@ -2303,17 +2305,6 @@ relock:
written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos,
ppos, count, ocount);
if (written < 0) {
- /*
- * direct write may have instantiated a few
- * blocks outside i_size. Trim these off again.
- * Don't need i_size_read because we hold i_mutex.
- *
- * XXX(truncate): this looks buggy because ocfs2 did not
- * actually implement ->truncate. Take a look at
- * the new truncate sequence and update this accordingly
- */
- if (*ppos + count > inode->i_size)
- truncate_setsize(inode, inode->i_size);
ret = written;
goto out_dio;
}
@@ -2385,7 +2376,7 @@ static int ocfs2_splice_to_file(struct pipe_inode_info *pipe,
{
int ret;
- ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos,
+ ret = ocfs2_prepare_inode_for_write(out, &sd->pos,
sd->total_len, 0, NULL, NULL);
if (ret < 0) {
mlog_errno(ret);
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 0492464916b1..eece3e05d9d0 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -488,7 +488,11 @@ static int ocfs2_read_locked_inode(struct inode *inode,
OCFS2_BH_IGNORE_CACHE);
} else {
status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh);
- if (!status)
+ /*
+ * If buffer is in jbd, then its checksum may not have been
+ * computed as yet.
+ */
+ if (!status && !buffer_jbd(bh))
status = ocfs2_validate_inode_block(osb->sb, bh);
}
if (status < 0) {
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 6de5a869db30..0bc477a3aeb8 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -46,27 +46,24 @@ struct ocfs2_inode_info
/* These fields are protected by ip_lock */
spinlock_t ip_lock;
u32 ip_open_count;
- u32 ip_clusters;
struct list_head ip_io_markers;
+ u32 ip_clusters;
+ u16 ip_dyn_features;
struct mutex ip_io_mutex;
-
u32 ip_flags; /* see below */
u32 ip_attr; /* inode attributes */
- u16 ip_dyn_features;
/* protected by recovery_lock. */
struct inode *ip_next_orphan;
- u32 ip_dir_start_lookup;
-
struct ocfs2_caching_info ip_metadata_cache;
-
struct ocfs2_extent_map ip_extent_map;
-
struct inode vfs_inode;
struct jbd2_inode ip_jinode;
+ u32 ip_dir_start_lookup;
+
/* Only valid if the inode is the dir. */
u32 ip_last_used_slot;
u64 ip_last_used_group;
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 7d9d9c132cef..7a4868196152 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -26,6 +26,26 @@
#include <linux/ext2_fs.h>
+#define o2info_from_user(a, b) \
+ copy_from_user(&(a), (b), sizeof(a))
+#define o2info_to_user(a, b) \
+ copy_to_user((typeof(a) __user *)b, &(a), sizeof(a))
+
+/*
+ * This call is void because we are already reporting an error that may
+ * be -EFAULT. The error will be returned from the ioctl(2) call. It's
+ * just a best-effort to tell userspace that this request caused the error.
+ */
+static inline void __o2info_set_request_error(struct ocfs2_info_request *kreq,
+ struct ocfs2_info_request __user *req)
+{
+ kreq->ir_flags |= OCFS2_INFO_FL_ERROR;
+ (void)put_user(kreq->ir_flags, (__u32 __user *)&(req->ir_flags));
+}
+
+#define o2info_set_request_error(a, b) \
+ __o2info_set_request_error((struct ocfs2_info_request *)&(a), b)
+
static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags)
{
int status;
@@ -109,6 +129,328 @@ bail:
return status;
}
+int ocfs2_info_handle_blocksize(struct inode *inode,
+ struct ocfs2_info_request __user *req)
+{
+ int status = -EFAULT;
+ struct ocfs2_info_blocksize oib;
+
+ if (o2info_from_user(oib, req))
+ goto bail;
+
+ oib.ib_blocksize = inode->i_sb->s_blocksize;
+ oib.ib_req.ir_flags |= OCFS2_INFO_FL_FILLED;
+
+ if (o2info_to_user(oib, req))
+ goto bail;
+
+ status = 0;
+bail:
+ if (status)
+ o2info_set_request_error(oib, req);
+
+ return status;
+}
+
+int ocfs2_info_handle_clustersize(struct inode *inode,
+ struct ocfs2_info_request __user *req)
+{
+ int status = -EFAULT;
+ struct ocfs2_info_clustersize oic;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+
+ if (o2info_from_user(oic, req))
+ goto bail;
+
+ oic.ic_clustersize = osb->s_clustersize;
+ oic.ic_req.ir_flags |= OCFS2_INFO_FL_FILLED;
+
+ if (o2info_to_user(oic, req))
+ goto bail;
+
+ status = 0;
+bail:
+ if (status)
+ o2info_set_request_error(oic, req);
+
+ return status;
+}
+
+int ocfs2_info_handle_maxslots(struct inode *inode,
+ struct ocfs2_info_request __user *req)
+{
+ int status = -EFAULT;
+ struct ocfs2_info_maxslots oim;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+
+ if (o2info_from_user(oim, req))
+ goto bail;
+
+ oim.im_max_slots = osb->max_slots;
+ oim.im_req.ir_flags |= OCFS2_INFO_FL_FILLED;
+
+ if (o2info_to_user(oim, req))
+ goto bail;
+
+ status = 0;
+bail:
+ if (status)
+ o2info_set_request_error(oim, req);
+
+ return status;
+}
+
+int ocfs2_info_handle_label(struct inode *inode,
+ struct ocfs2_info_request __user *req)
+{
+ int status = -EFAULT;
+ struct ocfs2_info_label oil;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+
+ if (o2info_from_user(oil, req))
+ goto bail;
+
+ memcpy(oil.il_label, osb->vol_label, OCFS2_MAX_VOL_LABEL_LEN);
+ oil.il_req.ir_flags |= OCFS2_INFO_FL_FILLED;
+
+ if (o2info_to_user(oil, req))
+ goto bail;
+
+ status = 0;
+bail:
+ if (status)
+ o2info_set_request_error(oil, req);
+
+ return status;
+}
+
+int ocfs2_info_handle_uuid(struct inode *inode,
+ struct ocfs2_info_request __user *req)
+{
+ int status = -EFAULT;
+ struct ocfs2_info_uuid oiu;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+
+ if (o2info_from_user(oiu, req))
+ goto bail;
+
+ memcpy(oiu.iu_uuid_str, osb->uuid_str, OCFS2_TEXT_UUID_LEN + 1);
+ oiu.iu_req.ir_flags |= OCFS2_INFO_FL_FILLED;
+
+ if (o2info_to_user(oiu, req))
+ goto bail;
+
+ status = 0;
+bail:
+ if (status)
+ o2info_set_request_error(oiu, req);
+
+ return status;
+}
+
+int ocfs2_info_handle_fs_features(struct inode *inode,
+ struct ocfs2_info_request __user *req)
+{
+ int status = -EFAULT;
+ struct ocfs2_info_fs_features oif;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+
+ if (o2info_from_user(oif, req))
+ goto bail;
+
+ oif.if_compat_features = osb->s_feature_compat;
+ oif.if_incompat_features = osb->s_feature_incompat;
+ oif.if_ro_compat_features = osb->s_feature_ro_compat;
+ oif.if_req.ir_flags |= OCFS2_INFO_FL_FILLED;
+
+ if (o2info_to_user(oif, req))
+ goto bail;
+
+ status = 0;
+bail:
+ if (status)
+ o2info_set_request_error(oif, req);
+
+ return status;
+}
+
+int ocfs2_info_handle_journal_size(struct inode *inode,
+ struct ocfs2_info_request __user *req)
+{
+ int status = -EFAULT;
+ struct ocfs2_info_journal_size oij;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+
+ if (o2info_from_user(oij, req))
+ goto bail;
+
+ oij.ij_journal_size = osb->journal->j_inode->i_size;
+
+ oij.ij_req.ir_flags |= OCFS2_INFO_FL_FILLED;
+
+ if (o2info_to_user(oij, req))
+ goto bail;
+
+ status = 0;
+bail:
+ if (status)
+ o2info_set_request_error(oij, req);
+
+ return status;
+}
+
+int ocfs2_info_handle_unknown(struct inode *inode,
+ struct ocfs2_info_request __user *req)
+{
+ int status = -EFAULT;
+ struct ocfs2_info_request oir;
+
+ if (o2info_from_user(oir, req))
+ goto bail;
+
+ oir.ir_flags &= ~OCFS2_INFO_FL_FILLED;
+
+ if (o2info_to_user(oir, req))
+ goto bail;
+
+ status = 0;
+bail:
+ if (status)
+ o2info_set_request_error(oir, req);
+
+ return status;
+}
+
+/*
+ * Validate and distinguish OCFS2_IOC_INFO requests.
+ *
+ * - validate the magic number.
+ * - distinguish different requests.
+ * - validate size of different requests.
+ */
+int ocfs2_info_handle_request(struct inode *inode,
+ struct ocfs2_info_request __user *req)
+{
+ int status = -EFAULT;
+ struct ocfs2_info_request oir;
+
+ if (o2info_from_user(oir, req))
+ goto bail;
+
+ status = -EINVAL;
+ if (oir.ir_magic != OCFS2_INFO_MAGIC)
+ goto bail;
+
+ switch (oir.ir_code) {
+ case OCFS2_INFO_BLOCKSIZE:
+ if (oir.ir_size == sizeof(struct ocfs2_info_blocksize))
+ status = ocfs2_info_handle_blocksize(inode, req);
+ break;
+ case OCFS2_INFO_CLUSTERSIZE:
+ if (oir.ir_size == sizeof(struct ocfs2_info_clustersize))
+ status = ocfs2_info_handle_clustersize(inode, req);
+ break;
+ case OCFS2_INFO_MAXSLOTS:
+ if (oir.ir_size == sizeof(struct ocfs2_info_maxslots))
+ status = ocfs2_info_handle_maxslots(inode, req);
+ break;
+ case OCFS2_INFO_LABEL:
+ if (oir.ir_size == sizeof(struct ocfs2_info_label))
+ status = ocfs2_info_handle_label(inode, req);
+ break;
+ case OCFS2_INFO_UUID:
+ if (oir.ir_size == sizeof(struct ocfs2_info_uuid))
+ status = ocfs2_info_handle_uuid(inode, req);
+ break;
+ case OCFS2_INFO_FS_FEATURES:
+ if (oir.ir_size == sizeof(struct ocfs2_info_fs_features))
+ status = ocfs2_info_handle_fs_features(inode, req);
+ break;
+ case OCFS2_INFO_JOURNAL_SIZE:
+ if (oir.ir_size == sizeof(struct ocfs2_info_journal_size))
+ status = ocfs2_info_handle_journal_size(inode, req);
+ break;
+ default:
+ status = ocfs2_info_handle_unknown(inode, req);
+ break;
+ }
+
+bail:
+ return status;
+}
+
+int ocfs2_get_request_ptr(struct ocfs2_info *info, int idx,
+ u64 *req_addr, int compat_flag)
+{
+ int status = -EFAULT;
+ u64 __user *bp = NULL;
+
+ if (compat_flag) {
+#ifdef CONFIG_COMPAT
+ /*
+ * pointer bp stores the base address of a pointers array,
+ * which collects all addresses of separate request.
+ */
+ bp = (u64 __user *)(unsigned long)compat_ptr(info->oi_requests);
+#else
+ BUG();
+#endif
+ } else
+ bp = (u64 __user *)(unsigned long)(info->oi_requests);
+
+ if (o2info_from_user(*req_addr, bp + idx))
+ goto bail;
+
+ status = 0;
+bail:
+ return status;
+}
+
+/*
+ * OCFS2_IOC_INFO handles an array of requests passed from userspace.
+ *
+ * ocfs2_info_handle() recevies a large info aggregation, grab and
+ * validate the request count from header, then break it into small
+ * pieces, later specific handlers can handle them one by one.
+ *
+ * Idea here is to make each separate request small enough to ensure
+ * a better backward&forward compatibility, since a small piece of
+ * request will be less likely to be broken if disk layout get changed.
+ */
+int ocfs2_info_handle(struct inode *inode, struct ocfs2_info *info,
+ int compat_flag)
+{
+ int i, status = 0;
+ u64 req_addr;
+ struct ocfs2_info_request __user *reqp;
+
+ if ((info->oi_count > OCFS2_INFO_MAX_REQUEST) ||
+ (!info->oi_requests)) {
+ status = -EINVAL;
+ goto bail;
+ }
+
+ for (i = 0; i < info->oi_count; i++) {
+
+ status = ocfs2_get_request_ptr(info, i, &req_addr, compat_flag);
+ if (status)
+ break;
+
+ reqp = (struct ocfs2_info_request *)(unsigned long)req_addr;
+ if (!reqp) {
+ status = -EINVAL;
+ goto bail;
+ }
+
+ status = ocfs2_info_handle_request(inode, reqp);
+ if (status)
+ break;
+ }
+
+bail:
+ return status;
+}
+
long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct inode *inode = filp->f_path.dentry->d_inode;
@@ -120,6 +462,7 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
struct reflink_arguments args;
const char *old_path, *new_path;
bool preserve;
+ struct ocfs2_info info;
switch (cmd) {
case OCFS2_IOC_GETFLAGS:
@@ -174,6 +517,12 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
preserve = (args.preserve != 0);
return ocfs2_reflink_ioctl(inode, old_path, new_path, preserve);
+ case OCFS2_IOC_INFO:
+ if (copy_from_user(&info, (struct ocfs2_info __user *)arg,
+ sizeof(struct ocfs2_info)))
+ return -EFAULT;
+
+ return ocfs2_info_handle(inode, &info, 0);
default:
return -ENOTTY;
}
@@ -185,6 +534,7 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
bool preserve;
struct reflink_arguments args;
struct inode *inode = file->f_path.dentry->d_inode;
+ struct ocfs2_info info;
switch (cmd) {
case OCFS2_IOC32_GETFLAGS:
@@ -209,6 +559,12 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
return ocfs2_reflink_ioctl(inode, compat_ptr(args.old_path),
compat_ptr(args.new_path), preserve);
+ case OCFS2_IOC_INFO:
+ if (copy_from_user(&info, (struct ocfs2_info __user *)arg,
+ sizeof(struct ocfs2_info)))
+ return -EFAULT;
+
+ return ocfs2_info_handle(inode, &info, 1);
default:
return -ENOIOCTLCMD;
}
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 9b57c0350ff9..faa2303dbf0a 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -301,7 +301,6 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb)
{
int status = 0;
unsigned int flushed;
- unsigned long old_id;
struct ocfs2_journal *journal = NULL;
mlog_entry_void();
@@ -326,7 +325,7 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb)
goto finally;
}
- old_id = ocfs2_inc_trans_id(journal);
+ ocfs2_inc_trans_id(journal);
flushed = atomic_read(&journal->j_num_trans);
atomic_set(&journal->j_num_trans, 0);
@@ -342,9 +341,6 @@ finally:
return status;
}
-/* pass it NULL and it will allocate a new handle object for you. If
- * you pass it a handle however, it may still return error, in which
- * case it has free'd the passed handle for you. */
handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
{
journal_t *journal = osb->journal->j_journal;
@@ -1888,6 +1884,8 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
os = &osb->osb_orphan_scan;
+ mlog(0, "Begin orphan scan\n");
+
if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
goto out;
@@ -1920,6 +1918,7 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
unlock:
ocfs2_orphan_scan_unlock(osb, seqno);
out:
+ mlog(0, "Orphan scan completed\n");
return;
}
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index b5baaa8e710f..43e56b97f9c0 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -67,11 +67,12 @@ struct ocfs2_journal {
struct buffer_head *j_bh; /* Journal disk inode block */
atomic_t j_num_trans; /* Number of transactions
* currently in the system. */
+ spinlock_t j_lock;
unsigned long j_trans_id;
struct rw_semaphore j_trans_barrier;
wait_queue_head_t j_checkpointed;
- spinlock_t j_lock;
+ /* both fields protected by j_lock*/
struct list_head j_la_cleanups;
struct work_struct j_recovery_work;
};
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index af2b8fe1f139..b04d6961c0d4 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -59,10 +59,11 @@ static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf)
return ret;
}
-static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh,
+static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh,
struct page *page)
{
int ret;
+ struct inode *inode = file->f_path.dentry->d_inode;
struct address_space *mapping = inode->i_mapping;
loff_t pos = page_offset(page);
unsigned int len = PAGE_CACHE_SIZE;
@@ -109,7 +110,7 @@ static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh,
if (page->index == last_index)
len = size & ~PAGE_CACHE_MASK;
- ret = ocfs2_write_begin_nolock(mapping, pos, len, 0, &locked_page,
+ ret = ocfs2_write_begin_nolock(file, mapping, pos, len, 0, &locked_page,
&fsdata, di_bh, page);
if (ret) {
if (ret != -ENOSPC)
@@ -157,7 +158,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
*/
down_write(&OCFS2_I(inode)->ip_alloc_sem);
- ret = __ocfs2_page_mkwrite(inode, di_bh, page);
+ ret = __ocfs2_page_mkwrite(vma->vm_file, di_bh, page);
up_write(&OCFS2_I(inode)->ip_alloc_sem);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index c67003b6b5a2..65739b3b3276 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -150,26 +150,33 @@ typedef void (*ocfs2_lock_callback)(int status, unsigned long data);
struct ocfs2_lock_res {
void *l_priv;
struct ocfs2_lock_res_ops *l_ops;
- spinlock_t l_lock;
+
struct list_head l_blocked_list;
struct list_head l_mask_waiters;
- enum ocfs2_lock_type l_type;
unsigned long l_flags;
char l_name[OCFS2_LOCK_ID_MAX_LEN];
- int l_level;
unsigned int l_ro_holders;
unsigned int l_ex_holders;
- struct ocfs2_dlm_lksb l_lksb;
+ unsigned char l_level;
+
+ /* Data packed - type enum ocfs2_lock_type */
+ unsigned char l_type;
/* used from AST/BAST funcs. */
- enum ocfs2_ast_action l_action;
- enum ocfs2_unlock_action l_unlock_action;
- int l_requested;
- int l_blocking;
+ /* Data packed - enum type ocfs2_ast_action */
+ unsigned char l_action;
+ /* Data packed - enum type ocfs2_unlock_action */
+ unsigned char l_unlock_action;
+ unsigned char l_requested;
+ unsigned char l_blocking;
unsigned int l_pending_gen;
+ spinlock_t l_lock;
+
+ struct ocfs2_dlm_lksb l_lksb;
+
wait_queue_head_t l_event;
struct list_head l_debug_list;
diff --git a/fs/ocfs2/ocfs2_ioctl.h b/fs/ocfs2/ocfs2_ioctl.h
index 2d3420af1a83..9bc535499868 100644
--- a/fs/ocfs2/ocfs2_ioctl.h
+++ b/fs/ocfs2/ocfs2_ioctl.h
@@ -76,4 +76,99 @@ struct reflink_arguments {
};
#define OCFS2_IOC_REFLINK _IOW('o', 4, struct reflink_arguments)
+/* Following definitions dedicated for ocfs2_info_request ioctls. */
+#define OCFS2_INFO_MAX_REQUEST (50)
+#define OCFS2_TEXT_UUID_LEN (OCFS2_VOL_UUID_LEN * 2)
+
+/* Magic number of all requests */
+#define OCFS2_INFO_MAGIC (0x4F32494E)
+
+/*
+ * Always try to separate info request into small pieces to
+ * guarantee the backward&forward compatibility.
+ */
+struct ocfs2_info {
+ __u64 oi_requests; /* Array of __u64 pointers to requests */
+ __u32 oi_count; /* Number of requests in info_requests */
+ __u32 oi_pad;
+};
+
+struct ocfs2_info_request {
+/*00*/ __u32 ir_magic; /* Magic number */
+ __u32 ir_code; /* Info request code */
+ __u32 ir_size; /* Size of request */
+ __u32 ir_flags; /* Request flags */
+/*10*/ /* Request specific fields */
+};
+
+struct ocfs2_info_clustersize {
+ struct ocfs2_info_request ic_req;
+ __u32 ic_clustersize;
+ __u32 ic_pad;
+};
+
+struct ocfs2_info_blocksize {
+ struct ocfs2_info_request ib_req;
+ __u32 ib_blocksize;
+ __u32 ib_pad;
+};
+
+struct ocfs2_info_maxslots {
+ struct ocfs2_info_request im_req;
+ __u32 im_max_slots;
+ __u32 im_pad;
+};
+
+struct ocfs2_info_label {
+ struct ocfs2_info_request il_req;
+ __u8 il_label[OCFS2_MAX_VOL_LABEL_LEN];
+} __attribute__ ((packed));
+
+struct ocfs2_info_uuid {
+ struct ocfs2_info_request iu_req;
+ __u8 iu_uuid_str[OCFS2_TEXT_UUID_LEN + 1];
+} __attribute__ ((packed));
+
+struct ocfs2_info_fs_features {
+ struct ocfs2_info_request if_req;
+ __u32 if_compat_features;
+ __u32 if_incompat_features;
+ __u32 if_ro_compat_features;
+ __u32 if_pad;
+};
+
+struct ocfs2_info_journal_size {
+ struct ocfs2_info_request ij_req;
+ __u64 ij_journal_size;
+};
+
+/* Codes for ocfs2_info_request */
+enum ocfs2_info_type {
+ OCFS2_INFO_CLUSTERSIZE = 1,
+ OCFS2_INFO_BLOCKSIZE,
+ OCFS2_INFO_MAXSLOTS,
+ OCFS2_INFO_LABEL,
+ OCFS2_INFO_UUID,
+ OCFS2_INFO_FS_FEATURES,
+ OCFS2_INFO_JOURNAL_SIZE,
+ OCFS2_INFO_NUM_TYPES
+};
+
+/* Flags for struct ocfs2_info_request */
+/* Filled by the caller */
+#define OCFS2_INFO_FL_NON_COHERENT (0x00000001) /* Cluster coherency not
+ required. This is a hint.
+ It is up to ocfs2 whether
+ the request can be fulfilled
+ without locking. */
+/* Filled by ocfs2 */
+#define OCFS2_INFO_FL_FILLED (0x40000000) /* Filesystem understood
+ this request and
+ filled in the answer */
+
+#define OCFS2_INFO_FL_ERROR (0x80000000) /* Error happened during
+ request handling. */
+
+#define OCFS2_IOC_INFO _IOR('o', 5, struct ocfs2_info)
+
#endif /* OCFS2_IOCTL_H */
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 73a11ccfd4c2..7f13d2059b44 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -49,6 +49,7 @@
struct ocfs2_cow_context {
struct inode *inode;
+ struct file *file;
u32 cow_start;
u32 cow_len;
struct ocfs2_extent_tree data_et;
@@ -2932,13 +2933,16 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster);
struct page *page;
pgoff_t page_index;
- unsigned int from, to;
+ unsigned int from, to, readahead_pages;
loff_t offset, end, map_end;
struct address_space *mapping = context->inode->i_mapping;
mlog(0, "old_cluster %u, new %u, len %u at offset %u\n", old_cluster,
new_cluster, new_len, cpos);
+ readahead_pages =
+ (ocfs2_cow_contig_clusters(sb) <<
+ OCFS2_SB(sb)->s_clustersize_bits) >> PAGE_CACHE_SHIFT;
offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits;
end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits);
/*
@@ -2969,6 +2973,14 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize)
BUG_ON(PageDirty(page));
+ if (PageReadahead(page) && context->file) {
+ page_cache_async_readahead(mapping,
+ &context->file->f_ra,
+ context->file,
+ page, page_index,
+ readahead_pages);
+ }
+
if (!PageUptodate(page)) {
ret = block_read_full_page(page, ocfs2_get_block);
if (ret) {
@@ -3408,12 +3420,35 @@ static int ocfs2_replace_cow(struct ocfs2_cow_context *context)
return ret;
}
+static void ocfs2_readahead_for_cow(struct inode *inode,
+ struct file *file,
+ u32 start, u32 len)
+{
+ struct address_space *mapping;
+ pgoff_t index;
+ unsigned long num_pages;
+ int cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits;
+
+ if (!file)
+ return;
+
+ mapping = file->f_mapping;
+ num_pages = (len << cs_bits) >> PAGE_CACHE_SHIFT;
+ if (!num_pages)
+ num_pages = 1;
+
+ index = ((loff_t)start << cs_bits) >> PAGE_CACHE_SHIFT;
+ page_cache_sync_readahead(mapping, &file->f_ra, file,
+ index, num_pages);
+}
+
/*
* Starting at cpos, try to CoW write_len clusters. Don't CoW
* past max_cpos. This will stop when it runs into a hole or an
* unrefcounted extent.
*/
static int ocfs2_refcount_cow_hunk(struct inode *inode,
+ struct file *file,
struct buffer_head *di_bh,
u32 cpos, u32 write_len, u32 max_cpos)
{
@@ -3442,6 +3477,8 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode,
BUG_ON(cow_len == 0);
+ ocfs2_readahead_for_cow(inode, file, cow_start, cow_len);
+
context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS);
if (!context) {
ret = -ENOMEM;
@@ -3463,6 +3500,7 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode,
context->ref_root_bh = ref_root_bh;
context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page;
context->get_clusters = ocfs2_di_get_clusters;
+ context->file = file;
ocfs2_init_dinode_extent_tree(&context->data_et,
INODE_CACHE(inode), di_bh);
@@ -3491,6 +3529,7 @@ out:
* clusters between cpos and cpos+write_len are safe to modify.
*/
int ocfs2_refcount_cow(struct inode *inode,
+ struct file *file,
struct buffer_head *di_bh,
u32 cpos, u32 write_len, u32 max_cpos)
{
@@ -3510,7 +3549,7 @@ int ocfs2_refcount_cow(struct inode *inode,
num_clusters = write_len;
if (ext_flags & OCFS2_EXT_REFCOUNTED) {
- ret = ocfs2_refcount_cow_hunk(inode, di_bh, cpos,
+ ret = ocfs2_refcount_cow_hunk(inode, file, di_bh, cpos,
num_clusters, max_cpos);
if (ret) {
mlog_errno(ret);
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index 9983ba1570e2..c8ce46f7d8e3 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -21,14 +21,14 @@ struct ocfs2_refcount_tree {
struct rb_node rf_node;
u64 rf_blkno;
u32 rf_generation;
+ struct kref rf_getcnt;
struct rw_semaphore rf_sem;
struct ocfs2_lock_res rf_lockres;
- struct kref rf_getcnt;
int rf_removed;
/* the following 4 fields are used by caching_info. */
- struct ocfs2_caching_info rf_ci;
spinlock_t rf_lock;
+ struct ocfs2_caching_info rf_ci;
struct mutex rf_io_mutex;
struct super_block *rf_sb;
};
@@ -52,7 +52,8 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
u32 clusters,
int *credits,
int *ref_blocks);
-int ocfs2_refcount_cow(struct inode *inode, struct buffer_head *di_bh,
+int ocfs2_refcount_cow(struct inode *inode,
+ struct file *filep, struct buffer_head *di_bh,
u32 cpos, u32 write_len, u32 max_cpos);
typedef int (ocfs2_post_refcount_func)(struct inode *inode,
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index fa1be1b304d1..47415398d56a 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1990,6 +1990,36 @@ static int ocfs2_setup_osb_uuid(struct ocfs2_super *osb, const unsigned char *uu
return 0;
}
+/* Make sure entire volume is addressable by our journal. Requires
+ osb_clusters_at_boot to be valid and for the journal to have been
+ initialized by ocfs2_journal_init(). */
+static int ocfs2_journal_addressable(struct ocfs2_super *osb)
+{
+ int status = 0;
+ u64 max_block =
+ ocfs2_clusters_to_blocks(osb->sb,
+ osb->osb_clusters_at_boot) - 1;
+
+ /* 32-bit block number is always OK. */
+ if (max_block <= (u32)~0ULL)
+ goto out;
+
+ /* Volume is "huge", so see if our journal is new enough to
+ support it. */
+ if (!(OCFS2_HAS_COMPAT_FEATURE(osb->sb,
+ OCFS2_FEATURE_COMPAT_JBD2_SB) &&
+ jbd2_journal_check_used_features(osb->journal->j_journal, 0, 0,
+ JBD2_FEATURE_INCOMPAT_64BIT))) {
+ mlog(ML_ERROR, "The journal cannot address the entire volume. "
+ "Enable the 'block64' journal option with tunefs.ocfs2");
+ status = -EFBIG;
+ goto out;
+ }
+
+ out:
+ return status;
+}
+
static int ocfs2_initialize_super(struct super_block *sb,
struct buffer_head *bh,
int sector_size,
@@ -2002,6 +2032,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
struct ocfs2_journal *journal;
__le32 uuid_net_key;
struct ocfs2_super *osb;
+ u64 total_blocks;
mlog_entry_void();
@@ -2214,11 +2245,15 @@ static int ocfs2_initialize_super(struct super_block *sb,
goto bail;
}
- if (ocfs2_clusters_to_blocks(osb->sb, le32_to_cpu(di->i_clusters) - 1)
- > (u32)~0UL) {
- mlog(ML_ERROR, "Volume might try to write to blocks beyond "
- "what jbd can address in 32 bits.\n");
- status = -EINVAL;
+ total_blocks = ocfs2_clusters_to_blocks(osb->sb,
+ le32_to_cpu(di->i_clusters));
+
+ status = generic_check_addressable(osb->sb->s_blocksize_bits,
+ total_blocks);
+ if (status) {
+ mlog(ML_ERROR, "Volume too large "
+ "to mount safely on this system");
+ status = -EFBIG;
goto bail;
}
@@ -2380,6 +2415,12 @@ static int ocfs2_check_volume(struct ocfs2_super *osb)
goto finally;
}
+ /* Now that journal has been initialized, check to make sure
+ entire volume is addressable. */
+ status = ocfs2_journal_addressable(osb);
+ if (status)
+ goto finally;
+
/* If the journal was unmounted cleanly then we don't want to
* recover anything. Otherwise, journal_load will do that
* dirty work for us :) */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 76041b614758..1a759f40ab9e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2374,6 +2374,8 @@ extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
extern int generic_file_fsync(struct file *, int);
+extern int generic_check_addressable(unsigned, u64);
+
#ifdef CONFIG_MIGRATION
extern int buffer_migrate_page(struct address_space *,
struct page *, struct page *);