diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2010-09-01 10:46:12 +1000 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2010-09-01 10:46:12 +1000 |
commit | daa77a83c33209717456b4b83a626056a075809a (patch) | |
tree | ca35a7ebeba96330ba4e98f17cae4073e28eaec3 | |
parent | c2c364d8d7647cf6b488de77e10ac1174433a417 (diff) | |
parent | bc844a4824161d0232eda7787a282b2eec1efcad (diff) |
Merge remote branch 'ocfs2/linux-next'
-rw-r--r-- | fs/ext3/super.c | 4 | ||||
-rw-r--r-- | fs/ext4/super.c | 8 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 4 | ||||
-rw-r--r-- | fs/libfs.c | 29 | ||||
-rw-r--r-- | fs/ocfs2/aops.c | 9 | ||||
-rw-r--r-- | fs/ocfs2/aops.h | 3 | ||||
-rw-r--r-- | fs/ocfs2/blockcheck.c | 4 | ||||
-rw-r--r-- | fs/ocfs2/file.c | 49 | ||||
-rw-r--r-- | fs/ocfs2/inode.c | 6 | ||||
-rw-r--r-- | fs/ocfs2/inode.h | 11 | ||||
-rw-r--r-- | fs/ocfs2/ioctl.c | 356 | ||||
-rw-r--r-- | fs/ocfs2/journal.c | 9 | ||||
-rw-r--r-- | fs/ocfs2/journal.h | 3 | ||||
-rw-r--r-- | fs/ocfs2/mmap.c | 7 | ||||
-rw-r--r-- | fs/ocfs2/ocfs2.h | 23 | ||||
-rw-r--r-- | fs/ocfs2/ocfs2_ioctl.h | 95 | ||||
-rw-r--r-- | fs/ocfs2/refcounttree.c | 43 | ||||
-rw-r--r-- | fs/ocfs2/refcounttree.h | 7 | ||||
-rw-r--r-- | fs/ocfs2/super.c | 51 | ||||
-rw-r--r-- | include/linux/fs.h | 2 |
20 files changed, 645 insertions, 78 deletions
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 5dbf4dba03c4..a367dd044280 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -1849,8 +1849,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) goto failed_mount; } - if (le32_to_cpu(es->s_blocks_count) > - (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { + if (generic_check_addressable(sb->s_blocksize_bits, + le32_to_cpu(es->s_blocks_count))) { ext3_msg(sb, KERN_ERR, "error: filesystem is too large to mount safely"); if (sizeof(sector_t) < 8) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 26147746c272..7f47c366bf15 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2831,15 +2831,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) * Test whether we have more sectors than will fit in sector_t, * and whether the max offset is addressable by the page cache. */ - if ((ext4_blocks_count(es) > - (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) || - (ext4_blocks_count(es) > - (pgoff_t)(~0ULL) >> (PAGE_CACHE_SHIFT - sb->s_blocksize_bits))) { + ret = generic_check_addressable(sb->s_blocksize_bits, + ext4_blocks_count(es)); + if (ret) { ext4_msg(sb, KERN_ERR, "filesystem" " too large to mount safely on this system"); if (sizeof(sector_t) < 8) ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); - ret = -EFBIG; goto failed_mount; } diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 0e8014ea6b94..262419f83d80 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1371,6 +1371,10 @@ int jbd2_journal_check_used_features (journal_t *journal, unsigned long compat, if (!compat && !ro && !incompat) return 1; + /* Load journal superblock if it is not loaded yet. */ + if (journal->j_format_version == 0 && + journal_get_superblock(journal) != 0) + return 0; if (journal->j_format_version == 1) return 0; diff --git a/fs/libfs.c b/fs/libfs.c index 0a9da95317f7..62baa0387d6e 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -913,6 +913,35 @@ int generic_file_fsync(struct file *file, int datasync) } EXPORT_SYMBOL(generic_file_fsync); +/** + * generic_check_addressable - Check addressability of file system + * @blocksize_bits: log of file system block size + * @num_blocks: number of blocks in file system + * + * Determine whether a file system with @num_blocks blocks (and a + * block size of 2**@blocksize_bits) is addressable by the sector_t + * and page cache of the system. Return 0 if so and -EFBIG otherwise. + */ +int generic_check_addressable(unsigned blocksize_bits, u64 num_blocks) +{ + u64 last_fs_block = num_blocks - 1; + u64 last_fs_page = + last_fs_block >> (PAGE_CACHE_SHIFT - blocksize_bits); + + if (unlikely(num_blocks == 0)) + return 0; + + if ((blocksize_bits < 9) || (blocksize_bits > PAGE_CACHE_SHIFT)) + return -EINVAL; + + if ((last_fs_block > (sector_t)(~0ULL) >> (blocksize_bits - 9)) || + (last_fs_page > (pgoff_t)(~0ULL))) { + return -EFBIG; + } + return 0; +} +EXPORT_SYMBOL(generic_check_addressable); + /* * No-op implementation of ->fsync for in-memory filesystems. */ diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 0de69c9a08be..5cfeee118158 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -883,8 +883,8 @@ struct ocfs2_write_ctxt { * out in so that future reads from that region will get * zero's. */ - struct page *w_pages[OCFS2_MAX_CTXT_PAGES]; unsigned int w_num_pages; + struct page *w_pages[OCFS2_MAX_CTXT_PAGES]; struct page *w_target_page; /* @@ -1642,7 +1642,8 @@ static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh, return ret; } -int ocfs2_write_begin_nolock(struct address_space *mapping, +int ocfs2_write_begin_nolock(struct file *filp, + struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata, struct buffer_head *di_bh, struct page *mmap_page) @@ -1692,7 +1693,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, mlog_errno(ret); goto out; } else if (ret == 1) { - ret = ocfs2_refcount_cow(inode, di_bh, + ret = ocfs2_refcount_cow(inode, filp, di_bh, wc->w_cpos, wc->w_clen, UINT_MAX); if (ret) { mlog_errno(ret); @@ -1854,7 +1855,7 @@ static int ocfs2_write_begin(struct file *file, struct address_space *mapping, */ down_write(&OCFS2_I(inode)->ip_alloc_sem); - ret = ocfs2_write_begin_nolock(mapping, pos, len, flags, pagep, + ret = ocfs2_write_begin_nolock(file, mapping, pos, len, flags, pagep, fsdata, di_bh, NULL); if (ret) { mlog_errno(ret); diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h index c48e93ffc513..7606f663da6d 100644 --- a/fs/ocfs2/aops.h +++ b/fs/ocfs2/aops.h @@ -48,7 +48,8 @@ int ocfs2_write_end_nolock(struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata); -int ocfs2_write_begin_nolock(struct address_space *mapping, +int ocfs2_write_begin_nolock(struct file *filp, + struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata, struct buffer_head *di_bh, struct page *mmap_page); diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c index ec6d12339593..c7ee03c22226 100644 --- a/fs/ocfs2/blockcheck.c +++ b/fs/ocfs2/blockcheck.c @@ -439,7 +439,7 @@ int ocfs2_block_check_validate(void *data, size_t blocksize, ocfs2_blockcheck_inc_failure(stats); mlog(ML_ERROR, - "CRC32 failed: stored: %u, computed %u. Applying ECC.\n", + "CRC32 failed: stored: 0x%x, computed 0x%x. Applying ECC.\n", (unsigned int)check.bc_crc32e, (unsigned int)crc); /* Ok, try ECC fixups */ @@ -453,7 +453,7 @@ int ocfs2_block_check_validate(void *data, size_t blocksize, goto out; } - mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n", + mlog(ML_ERROR, "Fixed CRC32 failed: stored: 0x%x, computed 0x%x\n", (unsigned int)check.bc_crc32e, (unsigned int)crc); rc = -EIO; diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 81296b4e3646..b714f76b82db 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -36,6 +36,7 @@ #include <linux/writeback.h> #include <linux/falloc.h> #include <linux/quotaops.h> +#include <linux/blkdev.h> #define MLOG_MASK_PREFIX ML_INODE #include <cluster/masklog.h> @@ -63,12 +64,6 @@ #include "buffer_head_io.h" -static int ocfs2_sync_inode(struct inode *inode) -{ - filemap_fdatawrite(inode->i_mapping); - return sync_mapping_buffers(inode->i_mapping); -} - static int ocfs2_init_file_private(struct inode *inode, struct file *file) { struct ocfs2_file_private *fp; @@ -186,12 +181,16 @@ static int ocfs2_sync_file(struct file *file, int datasync) mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", file, dentry, datasync, dentry->d_name.len, dentry->d_name.name); - err = ocfs2_sync_inode(dentry->d_inode); - if (err) - goto bail; - - if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) + if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) { + /* + * We still have to flush drive's caches to get data to the + * platter + */ + if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) + blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, + NULL, BLKDEV_IFL_WAIT); goto bail; + } journal = osb->journal->j_journal; err = jbd2_journal_force_commit(journal); @@ -361,7 +360,7 @@ static int ocfs2_cow_file_pos(struct inode *inode, if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) goto out; - return ocfs2_refcount_cow(inode, fe_bh, cpos, 1, cpos+1); + return ocfs2_refcount_cow(inode, NULL, fe_bh, cpos, 1, cpos+1); out: return status; @@ -904,8 +903,8 @@ static int ocfs2_zero_extend_get_range(struct inode *inode, zero_clusters = last_cpos - zero_cpos; if (needs_cow) { - rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos, zero_clusters, - UINT_MAX); + rc = ocfs2_refcount_cow(inode, NULL, di_bh, zero_cpos, + zero_clusters, UINT_MAX); if (rc) { mlog_errno(rc); goto out; @@ -2053,6 +2052,7 @@ out: } static int ocfs2_prepare_inode_for_refcount(struct inode *inode, + struct file *file, loff_t pos, size_t count, int *meta_level) { @@ -2070,7 +2070,7 @@ static int ocfs2_prepare_inode_for_refcount(struct inode *inode, *meta_level = 1; - ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX); + ret = ocfs2_refcount_cow(inode, file, di_bh, cpos, clusters, UINT_MAX); if (ret) mlog_errno(ret); out: @@ -2078,7 +2078,7 @@ out: return ret; } -static int ocfs2_prepare_inode_for_write(struct dentry *dentry, +static int ocfs2_prepare_inode_for_write(struct file *file, loff_t *ppos, size_t count, int appending, @@ -2086,6 +2086,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry, int *has_refcount) { int ret = 0, meta_level = 0; + struct dentry *dentry = file->f_path.dentry; struct inode *inode = dentry->d_inode; loff_t saved_pos, end; @@ -2141,6 +2142,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry, meta_level = -1; ret = ocfs2_prepare_inode_for_refcount(inode, + file, saved_pos, count, &meta_level); @@ -2255,7 +2257,7 @@ relock: } can_do_direct = direct_io; - ret = ocfs2_prepare_inode_for_write(file->f_path.dentry, ppos, + ret = ocfs2_prepare_inode_for_write(file, ppos, iocb->ki_left, appending, &can_do_direct, &has_refcount); if (ret < 0) { @@ -2303,17 +2305,6 @@ relock: written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, ppos, count, ocount); if (written < 0) { - /* - * direct write may have instantiated a few - * blocks outside i_size. Trim these off again. - * Don't need i_size_read because we hold i_mutex. - * - * XXX(truncate): this looks buggy because ocfs2 did not - * actually implement ->truncate. Take a look at - * the new truncate sequence and update this accordingly - */ - if (*ppos + count > inode->i_size) - truncate_setsize(inode, inode->i_size); ret = written; goto out_dio; } @@ -2385,7 +2376,7 @@ static int ocfs2_splice_to_file(struct pipe_inode_info *pipe, { int ret; - ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos, + ret = ocfs2_prepare_inode_for_write(out, &sd->pos, sd->total_len, 0, NULL, NULL); if (ret < 0) { mlog_errno(ret); diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 0492464916b1..eece3e05d9d0 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -488,7 +488,11 @@ static int ocfs2_read_locked_inode(struct inode *inode, OCFS2_BH_IGNORE_CACHE); } else { status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh); - if (!status) + /* + * If buffer is in jbd, then its checksum may not have been + * computed as yet. + */ + if (!status && !buffer_jbd(bh)) status = ocfs2_validate_inode_block(osb->sb, bh); } if (status < 0) { diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index 6de5a869db30..0bc477a3aeb8 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h @@ -46,27 +46,24 @@ struct ocfs2_inode_info /* These fields are protected by ip_lock */ spinlock_t ip_lock; u32 ip_open_count; - u32 ip_clusters; struct list_head ip_io_markers; + u32 ip_clusters; + u16 ip_dyn_features; struct mutex ip_io_mutex; - u32 ip_flags; /* see below */ u32 ip_attr; /* inode attributes */ - u16 ip_dyn_features; /* protected by recovery_lock. */ struct inode *ip_next_orphan; - u32 ip_dir_start_lookup; - struct ocfs2_caching_info ip_metadata_cache; - struct ocfs2_extent_map ip_extent_map; - struct inode vfs_inode; struct jbd2_inode ip_jinode; + u32 ip_dir_start_lookup; + /* Only valid if the inode is the dir. */ u32 ip_last_used_slot; u64 ip_last_used_group; diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 7d9d9c132cef..7a4868196152 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -26,6 +26,26 @@ #include <linux/ext2_fs.h> +#define o2info_from_user(a, b) \ + copy_from_user(&(a), (b), sizeof(a)) +#define o2info_to_user(a, b) \ + copy_to_user((typeof(a) __user *)b, &(a), sizeof(a)) + +/* + * This call is void because we are already reporting an error that may + * be -EFAULT. The error will be returned from the ioctl(2) call. It's + * just a best-effort to tell userspace that this request caused the error. + */ +static inline void __o2info_set_request_error(struct ocfs2_info_request *kreq, + struct ocfs2_info_request __user *req) +{ + kreq->ir_flags |= OCFS2_INFO_FL_ERROR; + (void)put_user(kreq->ir_flags, (__u32 __user *)&(req->ir_flags)); +} + +#define o2info_set_request_error(a, b) \ + __o2info_set_request_error((struct ocfs2_info_request *)&(a), b) + static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags) { int status; @@ -109,6 +129,328 @@ bail: return status; } +int ocfs2_info_handle_blocksize(struct inode *inode, + struct ocfs2_info_request __user *req) +{ + int status = -EFAULT; + struct ocfs2_info_blocksize oib; + + if (o2info_from_user(oib, req)) + goto bail; + + oib.ib_blocksize = inode->i_sb->s_blocksize; + oib.ib_req.ir_flags |= OCFS2_INFO_FL_FILLED; + + if (o2info_to_user(oib, req)) + goto bail; + + status = 0; +bail: + if (status) + o2info_set_request_error(oib, req); + + return status; +} + +int ocfs2_info_handle_clustersize(struct inode *inode, + struct ocfs2_info_request __user *req) +{ + int status = -EFAULT; + struct ocfs2_info_clustersize oic; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + + if (o2info_from_user(oic, req)) + goto bail; + + oic.ic_clustersize = osb->s_clustersize; + oic.ic_req.ir_flags |= OCFS2_INFO_FL_FILLED; + + if (o2info_to_user(oic, req)) + goto bail; + + status = 0; +bail: + if (status) + o2info_set_request_error(oic, req); + + return status; +} + +int ocfs2_info_handle_maxslots(struct inode *inode, + struct ocfs2_info_request __user *req) +{ + int status = -EFAULT; + struct ocfs2_info_maxslots oim; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + + if (o2info_from_user(oim, req)) + goto bail; + + oim.im_max_slots = osb->max_slots; + oim.im_req.ir_flags |= OCFS2_INFO_FL_FILLED; + + if (o2info_to_user(oim, req)) + goto bail; + + status = 0; +bail: + if (status) + o2info_set_request_error(oim, req); + + return status; +} + +int ocfs2_info_handle_label(struct inode *inode, + struct ocfs2_info_request __user *req) +{ + int status = -EFAULT; + struct ocfs2_info_label oil; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + + if (o2info_from_user(oil, req)) + goto bail; + + memcpy(oil.il_label, osb->vol_label, OCFS2_MAX_VOL_LABEL_LEN); + oil.il_req.ir_flags |= OCFS2_INFO_FL_FILLED; + + if (o2info_to_user(oil, req)) + goto bail; + + status = 0; +bail: + if (status) + o2info_set_request_error(oil, req); + + return status; +} + +int ocfs2_info_handle_uuid(struct inode *inode, + struct ocfs2_info_request __user *req) +{ + int status = -EFAULT; + struct ocfs2_info_uuid oiu; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + + if (o2info_from_user(oiu, req)) + goto bail; + + memcpy(oiu.iu_uuid_str, osb->uuid_str, OCFS2_TEXT_UUID_LEN + 1); + oiu.iu_req.ir_flags |= OCFS2_INFO_FL_FILLED; + + if (o2info_to_user(oiu, req)) + goto bail; + + status = 0; +bail: + if (status) + o2info_set_request_error(oiu, req); + + return status; +} + +int ocfs2_info_handle_fs_features(struct inode *inode, + struct ocfs2_info_request __user *req) +{ + int status = -EFAULT; + struct ocfs2_info_fs_features oif; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + + if (o2info_from_user(oif, req)) + goto bail; + + oif.if_compat_features = osb->s_feature_compat; + oif.if_incompat_features = osb->s_feature_incompat; + oif.if_ro_compat_features = osb->s_feature_ro_compat; + oif.if_req.ir_flags |= OCFS2_INFO_FL_FILLED; + + if (o2info_to_user(oif, req)) + goto bail; + + status = 0; +bail: + if (status) + o2info_set_request_error(oif, req); + + return status; +} + +int ocfs2_info_handle_journal_size(struct inode *inode, + struct ocfs2_info_request __user *req) +{ + int status = -EFAULT; + struct ocfs2_info_journal_size oij; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + + if (o2info_from_user(oij, req)) + goto bail; + + oij.ij_journal_size = osb->journal->j_inode->i_size; + + oij.ij_req.ir_flags |= OCFS2_INFO_FL_FILLED; + + if (o2info_to_user(oij, req)) + goto bail; + + status = 0; +bail: + if (status) + o2info_set_request_error(oij, req); + + return status; +} + +int ocfs2_info_handle_unknown(struct inode *inode, + struct ocfs2_info_request __user *req) +{ + int status = -EFAULT; + struct ocfs2_info_request oir; + + if (o2info_from_user(oir, req)) + goto bail; + + oir.ir_flags &= ~OCFS2_INFO_FL_FILLED; + + if (o2info_to_user(oir, req)) + goto bail; + + status = 0; +bail: + if (status) + o2info_set_request_error(oir, req); + + return status; +} + +/* + * Validate and distinguish OCFS2_IOC_INFO requests. + * + * - validate the magic number. + * - distinguish different requests. + * - validate size of different requests. + */ +int ocfs2_info_handle_request(struct inode *inode, + struct ocfs2_info_request __user *req) +{ + int status = -EFAULT; + struct ocfs2_info_request oir; + + if (o2info_from_user(oir, req)) + goto bail; + + status = -EINVAL; + if (oir.ir_magic != OCFS2_INFO_MAGIC) + goto bail; + + switch (oir.ir_code) { + case OCFS2_INFO_BLOCKSIZE: + if (oir.ir_size == sizeof(struct ocfs2_info_blocksize)) + status = ocfs2_info_handle_blocksize(inode, req); + break; + case OCFS2_INFO_CLUSTERSIZE: + if (oir.ir_size == sizeof(struct ocfs2_info_clustersize)) + status = ocfs2_info_handle_clustersize(inode, req); + break; + case OCFS2_INFO_MAXSLOTS: + if (oir.ir_size == sizeof(struct ocfs2_info_maxslots)) + status = ocfs2_info_handle_maxslots(inode, req); + break; + case OCFS2_INFO_LABEL: + if (oir.ir_size == sizeof(struct ocfs2_info_label)) + status = ocfs2_info_handle_label(inode, req); + break; + case OCFS2_INFO_UUID: + if (oir.ir_size == sizeof(struct ocfs2_info_uuid)) + status = ocfs2_info_handle_uuid(inode, req); + break; + case OCFS2_INFO_FS_FEATURES: + if (oir.ir_size == sizeof(struct ocfs2_info_fs_features)) + status = ocfs2_info_handle_fs_features(inode, req); + break; + case OCFS2_INFO_JOURNAL_SIZE: + if (oir.ir_size == sizeof(struct ocfs2_info_journal_size)) + status = ocfs2_info_handle_journal_size(inode, req); + break; + default: + status = ocfs2_info_handle_unknown(inode, req); + break; + } + +bail: + return status; +} + +int ocfs2_get_request_ptr(struct ocfs2_info *info, int idx, + u64 *req_addr, int compat_flag) +{ + int status = -EFAULT; + u64 __user *bp = NULL; + + if (compat_flag) { +#ifdef CONFIG_COMPAT + /* + * pointer bp stores the base address of a pointers array, + * which collects all addresses of separate request. + */ + bp = (u64 __user *)(unsigned long)compat_ptr(info->oi_requests); +#else + BUG(); +#endif + } else + bp = (u64 __user *)(unsigned long)(info->oi_requests); + + if (o2info_from_user(*req_addr, bp + idx)) + goto bail; + + status = 0; +bail: + return status; +} + +/* + * OCFS2_IOC_INFO handles an array of requests passed from userspace. + * + * ocfs2_info_handle() recevies a large info aggregation, grab and + * validate the request count from header, then break it into small + * pieces, later specific handlers can handle them one by one. + * + * Idea here is to make each separate request small enough to ensure + * a better backward&forward compatibility, since a small piece of + * request will be less likely to be broken if disk layout get changed. + */ +int ocfs2_info_handle(struct inode *inode, struct ocfs2_info *info, + int compat_flag) +{ + int i, status = 0; + u64 req_addr; + struct ocfs2_info_request __user *reqp; + + if ((info->oi_count > OCFS2_INFO_MAX_REQUEST) || + (!info->oi_requests)) { + status = -EINVAL; + goto bail; + } + + for (i = 0; i < info->oi_count; i++) { + + status = ocfs2_get_request_ptr(info, i, &req_addr, compat_flag); + if (status) + break; + + reqp = (struct ocfs2_info_request *)(unsigned long)req_addr; + if (!reqp) { + status = -EINVAL; + goto bail; + } + + status = ocfs2_info_handle_request(inode, reqp); + if (status) + break; + } + +bail: + return status; +} + long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = filp->f_path.dentry->d_inode; @@ -120,6 +462,7 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) struct reflink_arguments args; const char *old_path, *new_path; bool preserve; + struct ocfs2_info info; switch (cmd) { case OCFS2_IOC_GETFLAGS: @@ -174,6 +517,12 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) preserve = (args.preserve != 0); return ocfs2_reflink_ioctl(inode, old_path, new_path, preserve); + case OCFS2_IOC_INFO: + if (copy_from_user(&info, (struct ocfs2_info __user *)arg, + sizeof(struct ocfs2_info))) + return -EFAULT; + + return ocfs2_info_handle(inode, &info, 0); default: return -ENOTTY; } @@ -185,6 +534,7 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) bool preserve; struct reflink_arguments args; struct inode *inode = file->f_path.dentry->d_inode; + struct ocfs2_info info; switch (cmd) { case OCFS2_IOC32_GETFLAGS: @@ -209,6 +559,12 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) return ocfs2_reflink_ioctl(inode, compat_ptr(args.old_path), compat_ptr(args.new_path), preserve); + case OCFS2_IOC_INFO: + if (copy_from_user(&info, (struct ocfs2_info __user *)arg, + sizeof(struct ocfs2_info))) + return -EFAULT; + + return ocfs2_info_handle(inode, &info, 1); default: return -ENOIOCTLCMD; } diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 9b57c0350ff9..faa2303dbf0a 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -301,7 +301,6 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb) { int status = 0; unsigned int flushed; - unsigned long old_id; struct ocfs2_journal *journal = NULL; mlog_entry_void(); @@ -326,7 +325,7 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb) goto finally; } - old_id = ocfs2_inc_trans_id(journal); + ocfs2_inc_trans_id(journal); flushed = atomic_read(&journal->j_num_trans); atomic_set(&journal->j_num_trans, 0); @@ -342,9 +341,6 @@ finally: return status; } -/* pass it NULL and it will allocate a new handle object for you. If - * you pass it a handle however, it may still return error, in which - * case it has free'd the passed handle for you. */ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs) { journal_t *journal = osb->journal->j_journal; @@ -1888,6 +1884,8 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb) os = &osb->osb_orphan_scan; + mlog(0, "Begin orphan scan\n"); + if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE) goto out; @@ -1920,6 +1918,7 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb) unlock: ocfs2_orphan_scan_unlock(osb, seqno); out: + mlog(0, "Orphan scan completed\n"); return; } diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index b5baaa8e710f..43e56b97f9c0 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -67,11 +67,12 @@ struct ocfs2_journal { struct buffer_head *j_bh; /* Journal disk inode block */ atomic_t j_num_trans; /* Number of transactions * currently in the system. */ + spinlock_t j_lock; unsigned long j_trans_id; struct rw_semaphore j_trans_barrier; wait_queue_head_t j_checkpointed; - spinlock_t j_lock; + /* both fields protected by j_lock*/ struct list_head j_la_cleanups; struct work_struct j_recovery_work; }; diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index af2b8fe1f139..b04d6961c0d4 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -59,10 +59,11 @@ static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf) return ret; } -static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh, +static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh, struct page *page) { int ret; + struct inode *inode = file->f_path.dentry->d_inode; struct address_space *mapping = inode->i_mapping; loff_t pos = page_offset(page); unsigned int len = PAGE_CACHE_SIZE; @@ -109,7 +110,7 @@ static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh, if (page->index == last_index) len = size & ~PAGE_CACHE_MASK; - ret = ocfs2_write_begin_nolock(mapping, pos, len, 0, &locked_page, + ret = ocfs2_write_begin_nolock(file, mapping, pos, len, 0, &locked_page, &fsdata, di_bh, page); if (ret) { if (ret != -ENOSPC) @@ -157,7 +158,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) */ down_write(&OCFS2_I(inode)->ip_alloc_sem); - ret = __ocfs2_page_mkwrite(inode, di_bh, page); + ret = __ocfs2_page_mkwrite(vma->vm_file, di_bh, page); up_write(&OCFS2_I(inode)->ip_alloc_sem); diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index c67003b6b5a2..65739b3b3276 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -150,26 +150,33 @@ typedef void (*ocfs2_lock_callback)(int status, unsigned long data); struct ocfs2_lock_res { void *l_priv; struct ocfs2_lock_res_ops *l_ops; - spinlock_t l_lock; + struct list_head l_blocked_list; struct list_head l_mask_waiters; - enum ocfs2_lock_type l_type; unsigned long l_flags; char l_name[OCFS2_LOCK_ID_MAX_LEN]; - int l_level; unsigned int l_ro_holders; unsigned int l_ex_holders; - struct ocfs2_dlm_lksb l_lksb; + unsigned char l_level; + + /* Data packed - type enum ocfs2_lock_type */ + unsigned char l_type; /* used from AST/BAST funcs. */ - enum ocfs2_ast_action l_action; - enum ocfs2_unlock_action l_unlock_action; - int l_requested; - int l_blocking; + /* Data packed - enum type ocfs2_ast_action */ + unsigned char l_action; + /* Data packed - enum type ocfs2_unlock_action */ + unsigned char l_unlock_action; + unsigned char l_requested; + unsigned char l_blocking; unsigned int l_pending_gen; + spinlock_t l_lock; + + struct ocfs2_dlm_lksb l_lksb; + wait_queue_head_t l_event; struct list_head l_debug_list; diff --git a/fs/ocfs2/ocfs2_ioctl.h b/fs/ocfs2/ocfs2_ioctl.h index 2d3420af1a83..9bc535499868 100644 --- a/fs/ocfs2/ocfs2_ioctl.h +++ b/fs/ocfs2/ocfs2_ioctl.h @@ -76,4 +76,99 @@ struct reflink_arguments { }; #define OCFS2_IOC_REFLINK _IOW('o', 4, struct reflink_arguments) +/* Following definitions dedicated for ocfs2_info_request ioctls. */ +#define OCFS2_INFO_MAX_REQUEST (50) +#define OCFS2_TEXT_UUID_LEN (OCFS2_VOL_UUID_LEN * 2) + +/* Magic number of all requests */ +#define OCFS2_INFO_MAGIC (0x4F32494E) + +/* + * Always try to separate info request into small pieces to + * guarantee the backward&forward compatibility. + */ +struct ocfs2_info { + __u64 oi_requests; /* Array of __u64 pointers to requests */ + __u32 oi_count; /* Number of requests in info_requests */ + __u32 oi_pad; +}; + +struct ocfs2_info_request { +/*00*/ __u32 ir_magic; /* Magic number */ + __u32 ir_code; /* Info request code */ + __u32 ir_size; /* Size of request */ + __u32 ir_flags; /* Request flags */ +/*10*/ /* Request specific fields */ +}; + +struct ocfs2_info_clustersize { + struct ocfs2_info_request ic_req; + __u32 ic_clustersize; + __u32 ic_pad; +}; + +struct ocfs2_info_blocksize { + struct ocfs2_info_request ib_req; + __u32 ib_blocksize; + __u32 ib_pad; +}; + +struct ocfs2_info_maxslots { + struct ocfs2_info_request im_req; + __u32 im_max_slots; + __u32 im_pad; +}; + +struct ocfs2_info_label { + struct ocfs2_info_request il_req; + __u8 il_label[OCFS2_MAX_VOL_LABEL_LEN]; +} __attribute__ ((packed)); + +struct ocfs2_info_uuid { + struct ocfs2_info_request iu_req; + __u8 iu_uuid_str[OCFS2_TEXT_UUID_LEN + 1]; +} __attribute__ ((packed)); + +struct ocfs2_info_fs_features { + struct ocfs2_info_request if_req; + __u32 if_compat_features; + __u32 if_incompat_features; + __u32 if_ro_compat_features; + __u32 if_pad; +}; + +struct ocfs2_info_journal_size { + struct ocfs2_info_request ij_req; + __u64 ij_journal_size; +}; + +/* Codes for ocfs2_info_request */ +enum ocfs2_info_type { + OCFS2_INFO_CLUSTERSIZE = 1, + OCFS2_INFO_BLOCKSIZE, + OCFS2_INFO_MAXSLOTS, + OCFS2_INFO_LABEL, + OCFS2_INFO_UUID, + OCFS2_INFO_FS_FEATURES, + OCFS2_INFO_JOURNAL_SIZE, + OCFS2_INFO_NUM_TYPES +}; + +/* Flags for struct ocfs2_info_request */ +/* Filled by the caller */ +#define OCFS2_INFO_FL_NON_COHERENT (0x00000001) /* Cluster coherency not + required. This is a hint. + It is up to ocfs2 whether + the request can be fulfilled + without locking. */ +/* Filled by ocfs2 */ +#define OCFS2_INFO_FL_FILLED (0x40000000) /* Filesystem understood + this request and + filled in the answer */ + +#define OCFS2_INFO_FL_ERROR (0x80000000) /* Error happened during + request handling. */ + +#define OCFS2_IOC_INFO _IOR('o', 5, struct ocfs2_info) + #endif /* OCFS2_IOCTL_H */ diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 73a11ccfd4c2..7f13d2059b44 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -49,6 +49,7 @@ struct ocfs2_cow_context { struct inode *inode; + struct file *file; u32 cow_start; u32 cow_len; struct ocfs2_extent_tree data_et; @@ -2932,13 +2933,16 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); struct page *page; pgoff_t page_index; - unsigned int from, to; + unsigned int from, to, readahead_pages; loff_t offset, end, map_end; struct address_space *mapping = context->inode->i_mapping; mlog(0, "old_cluster %u, new %u, len %u at offset %u\n", old_cluster, new_cluster, new_len, cpos); + readahead_pages = + (ocfs2_cow_contig_clusters(sb) << + OCFS2_SB(sb)->s_clustersize_bits) >> PAGE_CACHE_SHIFT; offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits); /* @@ -2969,6 +2973,14 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize) BUG_ON(PageDirty(page)); + if (PageReadahead(page) && context->file) { + page_cache_async_readahead(mapping, + &context->file->f_ra, + context->file, + page, page_index, + readahead_pages); + } + if (!PageUptodate(page)) { ret = block_read_full_page(page, ocfs2_get_block); if (ret) { @@ -3408,12 +3420,35 @@ static int ocfs2_replace_cow(struct ocfs2_cow_context *context) return ret; } +static void ocfs2_readahead_for_cow(struct inode *inode, + struct file *file, + u32 start, u32 len) +{ + struct address_space *mapping; + pgoff_t index; + unsigned long num_pages; + int cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits; + + if (!file) + return; + + mapping = file->f_mapping; + num_pages = (len << cs_bits) >> PAGE_CACHE_SHIFT; + if (!num_pages) + num_pages = 1; + + index = ((loff_t)start << cs_bits) >> PAGE_CACHE_SHIFT; + page_cache_sync_readahead(mapping, &file->f_ra, file, + index, num_pages); +} + /* * Starting at cpos, try to CoW write_len clusters. Don't CoW * past max_cpos. This will stop when it runs into a hole or an * unrefcounted extent. */ static int ocfs2_refcount_cow_hunk(struct inode *inode, + struct file *file, struct buffer_head *di_bh, u32 cpos, u32 write_len, u32 max_cpos) { @@ -3442,6 +3477,8 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode, BUG_ON(cow_len == 0); + ocfs2_readahead_for_cow(inode, file, cow_start, cow_len); + context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS); if (!context) { ret = -ENOMEM; @@ -3463,6 +3500,7 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode, context->ref_root_bh = ref_root_bh; context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page; context->get_clusters = ocfs2_di_get_clusters; + context->file = file; ocfs2_init_dinode_extent_tree(&context->data_et, INODE_CACHE(inode), di_bh); @@ -3491,6 +3529,7 @@ out: * clusters between cpos and cpos+write_len are safe to modify. */ int ocfs2_refcount_cow(struct inode *inode, + struct file *file, struct buffer_head *di_bh, u32 cpos, u32 write_len, u32 max_cpos) { @@ -3510,7 +3549,7 @@ int ocfs2_refcount_cow(struct inode *inode, num_clusters = write_len; if (ext_flags & OCFS2_EXT_REFCOUNTED) { - ret = ocfs2_refcount_cow_hunk(inode, di_bh, cpos, + ret = ocfs2_refcount_cow_hunk(inode, file, di_bh, cpos, num_clusters, max_cpos); if (ret) { mlog_errno(ret); diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h index 9983ba1570e2..c8ce46f7d8e3 100644 --- a/fs/ocfs2/refcounttree.h +++ b/fs/ocfs2/refcounttree.h @@ -21,14 +21,14 @@ struct ocfs2_refcount_tree { struct rb_node rf_node; u64 rf_blkno; u32 rf_generation; + struct kref rf_getcnt; struct rw_semaphore rf_sem; struct ocfs2_lock_res rf_lockres; - struct kref rf_getcnt; int rf_removed; /* the following 4 fields are used by caching_info. */ - struct ocfs2_caching_info rf_ci; spinlock_t rf_lock; + struct ocfs2_caching_info rf_ci; struct mutex rf_io_mutex; struct super_block *rf_sb; }; @@ -52,7 +52,8 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode, u32 clusters, int *credits, int *ref_blocks); -int ocfs2_refcount_cow(struct inode *inode, struct buffer_head *di_bh, +int ocfs2_refcount_cow(struct inode *inode, + struct file *filep, struct buffer_head *di_bh, u32 cpos, u32 write_len, u32 max_cpos); typedef int (ocfs2_post_refcount_func)(struct inode *inode, diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index fa1be1b304d1..47415398d56a 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1990,6 +1990,36 @@ static int ocfs2_setup_osb_uuid(struct ocfs2_super *osb, const unsigned char *uu return 0; } +/* Make sure entire volume is addressable by our journal. Requires + osb_clusters_at_boot to be valid and for the journal to have been + initialized by ocfs2_journal_init(). */ +static int ocfs2_journal_addressable(struct ocfs2_super *osb) +{ + int status = 0; + u64 max_block = + ocfs2_clusters_to_blocks(osb->sb, + osb->osb_clusters_at_boot) - 1; + + /* 32-bit block number is always OK. */ + if (max_block <= (u32)~0ULL) + goto out; + + /* Volume is "huge", so see if our journal is new enough to + support it. */ + if (!(OCFS2_HAS_COMPAT_FEATURE(osb->sb, + OCFS2_FEATURE_COMPAT_JBD2_SB) && + jbd2_journal_check_used_features(osb->journal->j_journal, 0, 0, + JBD2_FEATURE_INCOMPAT_64BIT))) { + mlog(ML_ERROR, "The journal cannot address the entire volume. " + "Enable the 'block64' journal option with tunefs.ocfs2"); + status = -EFBIG; + goto out; + } + + out: + return status; +} + static int ocfs2_initialize_super(struct super_block *sb, struct buffer_head *bh, int sector_size, @@ -2002,6 +2032,7 @@ static int ocfs2_initialize_super(struct super_block *sb, struct ocfs2_journal *journal; __le32 uuid_net_key; struct ocfs2_super *osb; + u64 total_blocks; mlog_entry_void(); @@ -2214,11 +2245,15 @@ static int ocfs2_initialize_super(struct super_block *sb, goto bail; } - if (ocfs2_clusters_to_blocks(osb->sb, le32_to_cpu(di->i_clusters) - 1) - > (u32)~0UL) { - mlog(ML_ERROR, "Volume might try to write to blocks beyond " - "what jbd can address in 32 bits.\n"); - status = -EINVAL; + total_blocks = ocfs2_clusters_to_blocks(osb->sb, + le32_to_cpu(di->i_clusters)); + + status = generic_check_addressable(osb->sb->s_blocksize_bits, + total_blocks); + if (status) { + mlog(ML_ERROR, "Volume too large " + "to mount safely on this system"); + status = -EFBIG; goto bail; } @@ -2380,6 +2415,12 @@ static int ocfs2_check_volume(struct ocfs2_super *osb) goto finally; } + /* Now that journal has been initialized, check to make sure + entire volume is addressable. */ + status = ocfs2_journal_addressable(osb); + if (status) + goto finally; + /* If the journal was unmounted cleanly then we don't want to * recover anything. Otherwise, journal_load will do that * dirty work for us :) */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 76041b614758..1a759f40ab9e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2374,6 +2374,8 @@ extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, extern int generic_file_fsync(struct file *, int); +extern int generic_check_addressable(unsigned, u64); + #ifdef CONFIG_MIGRATION extern int buffer_migrate_page(struct address_space *, struct page *, struct page *); |