summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2009-02-13 12:16:31 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2009-02-13 12:16:31 +1100
commit9a2d4114c66910be683fcf54c71a703f7891fce4 (patch)
tree1ab100e62348bbb198f08386182901afc12db223 /fs
parent050b5309065032d5c1f989f73270197109fa6742 (diff)
parent817d4f61d47f081287ccbc6ac8e1456e83027336 (diff)
Merge commit 'ext4/next'
Diffstat (limited to 'fs')
-rw-r--r--fs/ext4/balloc.c9
-rw-r--r--fs/ext4/ext4.h12
-rw-r--r--fs/ext4/ext4_i.h3
-rw-r--r--fs/ext4/ext4_sb.h4
-rw-r--r--fs/ext4/inode.c6
-rw-r--r--fs/ext4/mballoc.c29
-rw-r--r--fs/ext4/mballoc.h1
-rw-r--r--fs/ext4/super.c11
-rw-r--r--fs/jbd2/journal.c17
-rw-r--r--fs/jbd2/transaction.c42
-rw-r--r--fs/ocfs2/journal.h6
11 files changed, 77 insertions, 63 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 38b85fbb40c5..046f638146e1 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -55,7 +55,8 @@ static int ext4_block_in_group(struct super_block *sb, ext4_fsblk_t block,
}
static int ext4_group_used_meta_blocks(struct super_block *sb,
- ext4_group_t block_group)
+ ext4_group_t block_group,
+ struct ext4_group_desc *gdp)
{
ext4_fsblk_t tmp;
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -63,10 +64,6 @@ static int ext4_group_used_meta_blocks(struct super_block *sb,
int used_blocks = sbi->s_itb_per_group + 2;
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
- struct ext4_group_desc *gdp;
- struct buffer_head *bh;
-
- gdp = ext4_get_group_desc(sb, block_group, &bh);
if (!ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp),
block_group))
used_blocks--;
@@ -177,7 +174,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
*/
mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data);
}
- return free_blocks - ext4_group_used_meta_blocks(sb, block_group);
+ return free_blocks - ext4_group_used_meta_blocks(sb, block_group, gdp);
}
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index ae7590fac9de..0db01421da3e 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -33,14 +33,6 @@
#undef EXT4FS_DEBUG
/*
- * Define EXT4_RESERVATION to reserve data blocks for expanding files
- */
-#define EXT4_DEFAULT_RESERVE_BLOCKS 8
-/*max window size: 1024(direct blocks) + 3([t,d]indirect blocks) */
-#define EXT4_MAX_RESERVE_BLOCKS 1027
-#define EXT4_RESERVE_WINDOW_NOT_ALLOCATED 0
-
-/*
* Debug code
*/
#ifdef EXT4FS_DEBUG
@@ -54,8 +46,6 @@
#define ext4_debug(f, a...) do {} while (0)
#endif
-#define EXT4_MULTIBLOCK_ALLOCATOR 1
-
/* prefer goal again. length */
#define EXT4_MB_HINT_MERGE 1
/* blocks already reserved */
@@ -869,7 +859,7 @@ static inline unsigned ext4_rec_len_from_disk(__le16 dlen)
{
unsigned len = le16_to_cpu(dlen);
- if (len == EXT4_MAX_REC_LEN)
+ if (len == EXT4_MAX_REC_LEN || len == 0)
return 1 << 16;
return len;
}
diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h
index e69acc16f5c4..2d516c0a22af 100644
--- a/fs/ext4/ext4_i.h
+++ b/fs/ext4/ext4_i.h
@@ -33,9 +33,6 @@ typedef __u32 ext4_lblk_t;
/* data type for block group number */
typedef unsigned int ext4_group_t;
-#define rsv_start rsv_window._rsv_start
-#define rsv_end rsv_window._rsv_end
-
/*
* storage for cached extent
*/
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h
index 039b6ea1a042..e318f486cc24 100644
--- a/fs/ext4/ext4_sb.h
+++ b/fs/ext4/ext4_sb.h
@@ -65,10 +65,6 @@ struct ext4_sb_info {
struct blockgroup_lock s_blockgroup_lock;
struct proc_dir_entry *s_proc;
- /* root of the per fs reservation window tree */
- spinlock_t s_rsv_window_lock;
- struct rb_root s_rsv_window_root;
-
/* Journaling */
struct inode *s_journal_inode;
struct journal_s *s_journal;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 43e5e7798635..e78551a6dfdd 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -47,8 +47,10 @@
static inline int ext4_begin_ordered_truncate(struct inode *inode,
loff_t new_size)
{
- return jbd2_journal_begin_ordered_truncate(&EXT4_I(inode)->jinode,
- new_size);
+ return jbd2_journal_begin_ordered_truncate(
+ EXT4_SB(inode->i_sb)->s_journal,
+ &EXT4_I(inode)->jinode,
+ new_size);
}
static void ext4_invalidatepage(struct page *page, unsigned long offset);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 8c6ad3b84125..bf4384bfbca5 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4479,23 +4479,26 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
pa->pa_free -= ac->ac_b_ex.fe_len;
pa->pa_len -= ac->ac_b_ex.fe_len;
spin_unlock(&pa->pa_lock);
- /*
- * We want to add the pa to the right bucket.
- * Remove it from the list and while adding
- * make sure the list to which we are adding
- * doesn't grow big.
- */
- if (likely(pa->pa_free)) {
- spin_lock(pa->pa_obj_lock);
- list_del_rcu(&pa->pa_inode_list);
- spin_unlock(pa->pa_obj_lock);
- ext4_mb_add_n_trim(ac);
- }
}
- ext4_mb_put_pa(ac, ac->ac_sb, pa);
}
if (ac->alloc_semp)
up_read(ac->alloc_semp);
+ if (pa) {
+ /*
+ * We want to add the pa to the right bucket.
+ * Remove it from the list and while adding
+ * make sure the list to which we are adding
+ * doesn't grow big. We need to release
+ * alloc_semp before calling ext4_mb_add_n_trim()
+ */
+ if (pa->pa_linear && likely(pa->pa_free)) {
+ spin_lock(pa->pa_obj_lock);
+ list_del_rcu(&pa->pa_inode_list);
+ spin_unlock(pa->pa_obj_lock);
+ ext4_mb_add_n_trim(ac);
+ }
+ ext4_mb_put_pa(ac, ac->ac_sb, pa);
+ }
if (ac->ac_bitmap_page)
page_cache_release(ac->ac_bitmap_page);
if (ac->ac_buddy_page)
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 10a2921baf14..7acf5ef24537 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -247,7 +247,6 @@ static inline void ext4_mb_store_history(struct ext4_allocation_context *ac)
#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
-struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
struct ext4_free_extent *fex)
{
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index a7b8cb1047c9..1a92bf42cbf4 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3048,14 +3048,17 @@ static void ext4_write_super(struct super_block *sb)
static int ext4_sync_fs(struct super_block *sb, int wait)
{
int ret = 0;
+ tid_t target;
trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
sb->s_dirt = 0;
if (EXT4_SB(sb)->s_journal) {
- if (wait)
- ret = ext4_force_commit(sb);
- else
- jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, NULL);
+ if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal,
+ &target)) {
+ if (wait)
+ jbd2_log_wait_commit(EXT4_SB(sb)->s_journal,
+ target);
+ }
} else {
ext4_commit_super(sb, EXT4_SB(sb)->s_es, wait);
}
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index eb343008eded..58144102bf25 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -450,7 +450,7 @@ int __jbd2_log_space_left(journal_t *journal)
}
/*
- * Called under j_state_lock. Returns true if a transaction was started.
+ * Called under j_state_lock. Returns true if a transaction commit was started.
*/
int __jbd2_log_start_commit(journal_t *journal, tid_t target)
{
@@ -518,7 +518,8 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
/*
* Start a commit of the current running transaction (if any). Returns true
- * if a transaction was started, and fills its tid in at *ptid
+ * if a transaction is going to be committed (or is currently already
+ * committing), and fills its tid in at *ptid
*/
int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
{
@@ -528,15 +529,19 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
if (journal->j_running_transaction) {
tid_t tid = journal->j_running_transaction->t_tid;
- ret = __jbd2_log_start_commit(journal, tid);
- if (ret && ptid)
+ __jbd2_log_start_commit(journal, tid);
+ /* There's a running transaction and we've just made sure
+ * it's commit has been scheduled. */
+ if (ptid)
*ptid = tid;
- } else if (journal->j_committing_transaction && ptid) {
+ ret = 1;
+ } else if (journal->j_committing_transaction) {
/*
* If ext3_write_super() recently started a commit, then we
* have to wait for completion of that transaction
*/
- *ptid = journal->j_committing_transaction->t_tid;
+ if (ptid)
+ *ptid = journal->j_committing_transaction->t_tid;
ret = 1;
}
spin_unlock(&journal->j_state_lock);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 46b4e347ed7d..28ce21d8598e 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -2129,26 +2129,46 @@ done:
}
/*
- * This function must be called when inode is journaled in ordered mode
- * before truncation happens. It starts writeout of truncated part in
- * case it is in the committing transaction so that we stand to ordered
- * mode consistency guarantees.
+ * File truncate and transaction commit interact with each other in a
+ * non-trivial way. If a transaction writing data block A is
+ * committing, we cannot discard the data by truncate until we have
+ * written them. Otherwise if we crashed after the transaction with
+ * write has committed but before the transaction with truncate has
+ * committed, we could see stale data in block A. This function is a
+ * helper to solve this problem. It starts writeout of the truncated
+ * part in case it is in the committing transaction.
+ *
+ * Filesystem code must call this function when inode is journaled in
+ * ordered mode before truncation happens and after the inode has been
+ * placed on orphan list with the new inode size. The second condition
+ * avoids the race that someone writes new data and we start
+ * committing the transaction after this function has been called but
+ * before a transaction for truncate is started (and furthermore it
+ * allows us to optimize the case where the addition to orphan list
+ * happens in the same transaction as write --- we don't have to write
+ * any data in such case).
*/
-int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode,
+int jbd2_journal_begin_ordered_truncate(journal_t *journal,
+ struct jbd2_inode *jinode,
loff_t new_size)
{
- journal_t *journal;
- transaction_t *commit_trans;
+ transaction_t *inode_trans, *commit_trans;
int ret = 0;
- if (!inode->i_transaction && !inode->i_next_transaction)
+ /* This is a quick check to avoid locking if not necessary */
+ if (!jinode->i_transaction)
goto out;
- journal = inode->i_transaction->t_journal;
+ /* Locks are here just to force reading of recent values, it is
+ * enough that the transaction was not committing before we started
+ * a transaction adding the inode to orphan list */
spin_lock(&journal->j_state_lock);
commit_trans = journal->j_committing_transaction;
spin_unlock(&journal->j_state_lock);
- if (inode->i_transaction == commit_trans) {
- ret = filemap_fdatawrite_range(inode->i_vfs_inode->i_mapping,
+ spin_lock(&journal->j_list_lock);
+ inode_trans = jinode->i_transaction;
+ spin_unlock(&journal->j_list_lock);
+ if (inode_trans == commit_trans) {
+ ret = filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping,
new_size, LLONG_MAX);
if (ret)
jbd2_journal_abort(journal, ret);
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index c4847caf4cc1..21601ee3f25a 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -524,8 +524,10 @@ static inline int ocfs2_jbd2_file_inode(handle_t *handle, struct inode *inode)
static inline int ocfs2_begin_ordered_truncate(struct inode *inode,
loff_t new_size)
{
- return jbd2_journal_begin_ordered_truncate(&OCFS2_I(inode)->ip_jinode,
- new_size);
+ return jbd2_journal_begin_ordered_truncate(
+ OCFS2_SB(inode->i_sb)->journal->j_journal,
+ &OCFS2_I(inode)->ip_jinode,
+ new_size);
}
#endif /* OCFS2_JOURNAL_H */