From f3e0f3da1b65e84ea82176c1cda03a4b694c9911 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 16 Jun 2015 02:35:14 -0400
Subject: ufs: kill more lock_ufs() calls

a) move it inside ufs_truncate()
b) ufs_free_inode() doesn't need it - it's serialized on ->s_lock
c) ufs_write_inode() doesn't need it either (and can be called without
it anyway).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c    | 13 ++-----------
 fs/ufs/truncate.c |  4 ++--
 2 files changed, 4 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index f913a6924b23..0e4d88e0e709 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -867,11 +867,7 @@ static int ufs_update_inode(struct inode * inode, int do_sync)
 
 int ufs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
-	int ret;
-	lock_ufs(inode->i_sb);
-	ret = ufs_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
-	unlock_ufs(inode->i_sb);
-	return ret;
+	return ufs_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
 }
 
 int ufs_sync_inode (struct inode *inode)
@@ -890,22 +886,17 @@ void ufs_evict_inode(struct inode * inode)
 	if (want_delete) {
 		loff_t old_i_size;
 		/*UFS_I(inode)->i_dtime = CURRENT_TIME;*/
-		lock_ufs(inode->i_sb);
 		mark_inode_dirty(inode);
 		ufs_update_inode(inode, IS_SYNC(inode));
 		old_i_size = inode->i_size;
 		inode->i_size = 0;
 		if (inode->i_blocks && ufs_truncate(inode, old_i_size))
 			ufs_warning(inode->i_sb, __func__, "ufs_truncate failed\n");
-		unlock_ufs(inode->i_sb);
 	}
 
 	invalidate_inode_buffers(inode);
 	clear_inode(inode);
 
-	if (want_delete) {
-		lock_ufs(inode->i_sb);
+	if (want_delete)
 		ufs_free_inode(inode);
-		unlock_ufs(inode->i_sb);
-	}
 }
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 21154704c168..90cf3a76c500 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -457,6 +457,7 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size)
 	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
 		return -EPERM;
 
+	lock_ufs(sb);
 	err = ufs_alloc_lastblock(inode);
 
 	if (err) {
@@ -486,6 +487,7 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size)
 	ufsi->i_lastfrag = DIRECT_FRAGMENT;
 	mark_inode_dirty(inode);
 out:
+	unlock_ufs(sb);
 	UFSD("EXIT: err %d\n", err);
 	return err;
 }
@@ -506,9 +508,7 @@ int ufs_setattr(struct dentry *dentry, struct iattr *attr)
 		/* XXX(truncate): truncate_setsize should be called last */
 		truncate_setsize(inode, attr->ia_size);
 
-		lock_ufs(inode->i_sb);
 		error = ufs_truncate(inode, old_i_size);
-		unlock_ufs(inode->i_sb);
 		if (error)
 			return error;
 	}
-- 
cgit v1.2.3


From d622f167b8435c856376edec130053fb56bf83e4 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 16 Jun 2015 18:04:16 -0400
Subject: ufs: switch ufs_evict_inode() to trimmed-down variant of
 ufs_truncate()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c    |  9 ++-------
 fs/ufs/truncate.c | 60 +++++++++++++++++++++++++++++++++++++------------------
 fs/ufs/ufs.h      |  2 +-
 3 files changed, 44 insertions(+), 27 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 0e4d88e0e709..282b0ced6272 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -884,14 +884,9 @@ void ufs_evict_inode(struct inode * inode)
 
 	truncate_inode_pages_final(&inode->i_data);
 	if (want_delete) {
-		loff_t old_i_size;
-		/*UFS_I(inode)->i_dtime = CURRENT_TIME;*/
-		mark_inode_dirty(inode);
-		ufs_update_inode(inode, IS_SYNC(inode));
-		old_i_size = inode->i_size;
 		inode->i_size = 0;
-		if (inode->i_blocks && ufs_truncate(inode, old_i_size))
-			ufs_warning(inode->i_sb, __func__, "ufs_truncate failed\n");
+		if (inode->i_blocks)
+			ufs_truncate_blocks(inode);
 	}
 
 	invalidate_inode_buffers(inode);
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 90cf3a76c500..5a2e7082a0ae 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -440,12 +440,36 @@ out:
        return err;
 }
 
-int ufs_truncate(struct inode *inode, loff_t old_i_size)
+static void __ufs_truncate_blocks(struct inode *inode)
 {
 	struct ufs_inode_info *ufsi = UFS_I(inode);
 	struct super_block *sb = inode->i_sb;
 	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
-	int retry, err = 0;
+	int retry;
+
+	while (1) {
+		retry = ufs_trunc_direct(inode);
+		retry |= ufs_trunc_indirect(inode, UFS_IND_BLOCK,
+					    ufs_get_direct_data_ptr(uspi, ufsi,
+								    UFS_IND_BLOCK));
+		retry |= ufs_trunc_dindirect(inode, UFS_IND_BLOCK + uspi->s_apb,
+					     ufs_get_direct_data_ptr(uspi, ufsi,
+								     UFS_DIND_BLOCK));
+		retry |= ufs_trunc_tindirect (inode);
+		if (!retry)
+			break;
+		if (IS_SYNC(inode) && (inode->i_state & I_DIRTY))
+			ufs_sync_inode (inode);
+		yield();
+	}
+
+	ufsi->i_lastfrag = DIRECT_FRAGMENT;
+}
+
+int ufs_truncate(struct inode *inode, loff_t old_i_size)
+{
+	struct super_block *sb = inode->i_sb;
+	int err = 0;
 	
 	UFSD("ENTER: ino %lu, i_size: %llu, old_i_size: %llu\n",
 	     inode->i_ino, (unsigned long long)i_size_read(inode),
@@ -467,24 +491,8 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size)
 
 	block_truncate_page(inode->i_mapping, inode->i_size, ufs_getfrag_block);
 
-	while (1) {
-		retry = ufs_trunc_direct(inode);
-		retry |= ufs_trunc_indirect(inode, UFS_IND_BLOCK,
-					    ufs_get_direct_data_ptr(uspi, ufsi,
-								    UFS_IND_BLOCK));
-		retry |= ufs_trunc_dindirect(inode, UFS_IND_BLOCK + uspi->s_apb,
-					     ufs_get_direct_data_ptr(uspi, ufsi,
-								     UFS_DIND_BLOCK));
-		retry |= ufs_trunc_tindirect (inode);
-		if (!retry)
-			break;
-		if (IS_SYNC(inode) && (inode->i_state & I_DIRTY))
-			ufs_sync_inode (inode);
-		yield();
-	}
-
+	__ufs_truncate_blocks(inode);
 	inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
-	ufsi->i_lastfrag = DIRECT_FRAGMENT;
 	mark_inode_dirty(inode);
 out:
 	unlock_ufs(sb);
@@ -492,6 +500,20 @@ out:
 	return err;
 }
 
+void ufs_truncate_blocks(struct inode *inode)
+{
+	struct super_block *sb = inode->i_sb;
+	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+	      S_ISLNK(inode->i_mode)))
+		return;
+	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+		return;
+
+	lock_ufs(sb);
+	__ufs_truncate_blocks(inode);
+	unlock_ufs(sb);
+}
+
 int ufs_setattr(struct dentry *dentry, struct iattr *attr)
 {
 	struct inode *inode = d_inode(dentry);
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index 2e31ea2e35a3..43fcab381de1 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -141,7 +141,7 @@ extern const struct inode_operations ufs_fast_symlink_inode_operations;
 extern const struct inode_operations ufs_symlink_inode_operations;
 
 /* truncate.c */
-extern int ufs_truncate (struct inode *, loff_t);
+extern void ufs_truncate_blocks(struct inode *);
 extern int ufs_setattr(struct dentry *dentry, struct iattr *attr);
 
 static inline struct ufs_sb_info *UFS_SB(struct super_block *sb)
-- 
cgit v1.2.3


From 3b7a3a05e8b006a73c406230b3d2d3da920779d9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 16 Jun 2015 18:06:40 -0400
Subject: ufs: free excessive blocks upon ->write_begin() failure/short copy

Broken in "[PATCH] ufs: truncate should allocate block for last byte";
all way back in 2006.  ufs_setattr() hadn't been the only user of
vmtruncate() and eliminating ->truncate() method required corrections
in a bunch of places.  Eventually those places had migrated into
->write_begin() failure exit and ->write_end() after short copy...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 282b0ced6272..a4fc3adfdc4c 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -530,8 +530,10 @@ static void ufs_write_failed(struct address_space *mapping, loff_t to)
 {
 	struct inode *inode = mapping->host;
 
-	if (to > inode->i_size)
+	if (to > inode->i_size) {
 		truncate_pagecache(inode, inode->i_size);
+		ufs_truncate_blocks(inode);
+	}
 }
 
 static int ufs_write_begin(struct file *file, struct address_space *mapping,
@@ -548,6 +550,18 @@ static int ufs_write_begin(struct file *file, struct address_space *mapping,
 	return ret;
 }
 
+static int ufs_write_end(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned copied,
+			struct page *page, void *fsdata)
+{
+	int ret;
+
+	ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
+	if (ret < len)
+		ufs_write_failed(mapping, pos + len);
+	return ret;
+}
+
 static sector_t ufs_bmap(struct address_space *mapping, sector_t block)
 {
 	return generic_block_bmap(mapping,block,ufs_getfrag_block);
@@ -557,7 +571,7 @@ const struct address_space_operations ufs_aops = {
 	.readpage = ufs_readpage,
 	.writepage = ufs_writepage,
 	.write_begin = ufs_write_begin,
-	.write_end = generic_write_end,
+	.write_end = ufs_write_end,
 	.bmap = ufs_bmap
 };
 
-- 
cgit v1.2.3


From 2401aa29ab5c42cc34853a5c1457fbf66593690f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 16 Jun 2015 18:15:07 -0400
Subject: ufs: move truncate_setsize() down into ufs_truncate()

just prior to __ufs_truncate_blocks(), with matching change of calling
conventions

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/truncate.c | 27 +++++++++++----------------
 1 file changed, 11 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 5a2e7082a0ae..6f56036ff724 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -370,7 +370,7 @@ static int ufs_trunc_tindirect(struct inode *inode)
 	return retry;
 }
 
-static int ufs_alloc_lastblock(struct inode *inode)
+static int ufs_alloc_lastblock(struct inode *inode, loff_t size)
 {
 	int err = 0;
 	struct super_block *sb = inode->i_sb;
@@ -382,7 +382,7 @@ static int ufs_alloc_lastblock(struct inode *inode)
 	struct buffer_head *bh;
 	u64 phys64;
 
-	lastfrag = (i_size_read(inode) + uspi->s_fsize - 1) >> uspi->s_fshift;
+	lastfrag = (size + uspi->s_fsize - 1) >> uspi->s_fshift;
 
 	if (!lastfrag)
 		goto out;
@@ -466,14 +466,14 @@ static void __ufs_truncate_blocks(struct inode *inode)
 	ufsi->i_lastfrag = DIRECT_FRAGMENT;
 }
 
-int ufs_truncate(struct inode *inode, loff_t old_i_size)
+int ufs_truncate(struct inode *inode, loff_t size)
 {
 	struct super_block *sb = inode->i_sb;
 	int err = 0;
 	
 	UFSD("ENTER: ino %lu, i_size: %llu, old_i_size: %llu\n",
-	     inode->i_ino, (unsigned long long)i_size_read(inode),
-	     (unsigned long long)old_i_size);
+	     inode->i_ino, (unsigned long long)size,
+	     (unsigned long long)i_size_read(inode));
 
 	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
 	      S_ISLNK(inode->i_mode)))
@@ -482,14 +482,14 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size)
 		return -EPERM;
 
 	lock_ufs(sb);
-	err = ufs_alloc_lastblock(inode);
+	err = ufs_alloc_lastblock(inode, size);
 
-	if (err) {
-		i_size_write(inode, old_i_size);
+	if (err)
 		goto out;
-	}
 
-	block_truncate_page(inode->i_mapping, inode->i_size, ufs_getfrag_block);
+	block_truncate_page(inode->i_mapping, size, ufs_getfrag_block);
+
+	truncate_setsize(inode, size);
 
 	__ufs_truncate_blocks(inode);
 	inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
@@ -525,12 +525,7 @@ int ufs_setattr(struct dentry *dentry, struct iattr *attr)
 		return error;
 
 	if (ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) {
-		loff_t old_i_size = inode->i_size;
-
-		/* XXX(truncate): truncate_setsize should be called last */
-		truncate_setsize(inode, attr->ia_size);
-
-		error = ufs_truncate(inode, old_i_size);
+		error = ufs_truncate(inode, attr->ia_size);
 		if (error)
 			return error;
 	}
-- 
cgit v1.2.3


From 493b4537a26b104fb3bd07ff4a46b6ede4288e76 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 16 Jun 2015 18:17:28 -0400
Subject: ufs: move lock_ufs() down into __ufs_truncate_blocks()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/truncate.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 6f56036ff724..155e13aea80c 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -447,6 +447,7 @@ static void __ufs_truncate_blocks(struct inode *inode)
 	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
 	int retry;
 
+	lock_ufs(sb);
 	while (1) {
 		retry = ufs_trunc_direct(inode);
 		retry |= ufs_trunc_indirect(inode, UFS_IND_BLOCK,
@@ -464,11 +465,11 @@ static void __ufs_truncate_blocks(struct inode *inode)
 	}
 
 	ufsi->i_lastfrag = DIRECT_FRAGMENT;
+	unlock_ufs(sb);
 }
 
 int ufs_truncate(struct inode *inode, loff_t size)
 {
-	struct super_block *sb = inode->i_sb;
 	int err = 0;
 	
 	UFSD("ENTER: ino %lu, i_size: %llu, old_i_size: %llu\n",
@@ -481,7 +482,6 @@ int ufs_truncate(struct inode *inode, loff_t size)
 	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
 		return -EPERM;
 
-	lock_ufs(sb);
 	err = ufs_alloc_lastblock(inode, size);
 
 	if (err)
@@ -495,23 +495,18 @@ int ufs_truncate(struct inode *inode, loff_t size)
 	inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
 	mark_inode_dirty(inode);
 out:
-	unlock_ufs(sb);
 	UFSD("EXIT: err %d\n", err);
 	return err;
 }
 
 void ufs_truncate_blocks(struct inode *inode)
 {
-	struct super_block *sb = inode->i_sb;
 	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
 	      S_ISLNK(inode->i_mode)))
 		return;
 	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
 		return;
-
-	lock_ufs(sb);
 	__ufs_truncate_blocks(inode);
-	unlock_ufs(sb);
 }
 
 int ufs_setattr(struct dentry *dentry, struct iattr *attr)
-- 
cgit v1.2.3


From 4af7b2c080715b9452fdaefb7ada72b4dc79593e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 17 Jun 2015 19:26:18 -0400
Subject: ufs: bforget() indirect blocks before freeing them

right now it doesn't matter (lock_ufs() serializes everything),
but when we switch to per-inode locking, it will be needed.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/truncate.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 155e13aea80c..9908a6045d7a 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -237,9 +237,9 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p)
 		tmp = ufs_data_ptr_to_cpu(sb, p);
 		ufs_data_ptr_clear(uspi, p);
 
+		ubh_bforget(ind_ubh);
 		ufs_free_blocks (inode, tmp, uspi->s_fpb);
 		mark_inode_dirty(inode);
-		ubh_bforget(ind_ubh);
 		ind_ubh = NULL;
 	}
 	if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh))
@@ -299,9 +299,9 @@ static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
 		tmp = ufs_data_ptr_to_cpu(sb, p);
 		ufs_data_ptr_clear(uspi, p);
 
+		ubh_bforget(dind_bh);
 		ufs_free_blocks(inode, tmp, uspi->s_fpb);
 		mark_inode_dirty(inode);
-		ubh_bforget(dind_bh);
 		dind_bh = NULL;
 	}
 	if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh))
@@ -357,9 +357,9 @@ static int ufs_trunc_tindirect(struct inode *inode)
 		tmp = ufs_data_ptr_to_cpu(sb, p);
 		ufs_data_ptr_clear(uspi, p);
 
+		ubh_bforget(tind_bh);
 		ufs_free_blocks(inode, tmp, uspi->s_fpb);
 		mark_inode_dirty(inode);
-		ubh_bforget(tind_bh);
 		tind_bh = NULL;
 	}
 	if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh))
-- 
cgit v1.2.3


From 724bb09fdc06d4ff03757b25d6dba9ef1b133e8f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 17 Jun 2015 12:02:56 -0400
Subject: ufs: don't use lock_ufs() for block pointers tree protection

* stores to block pointers are under per-inode seqlock (meta_lock) and
mutex (truncate_mutex)
* fetches of block pointers are either under truncate_mutex, or wrapped
into seqretry loop on meta_lock
* all changes of ->i_size are under truncate_mutex and i_mutex
* all changes of ->i_lastfrag are under truncate_mutex

It's similar to what ext2 is doing; the main difference is that unlike
ext2 we can't rely upon the atomicity of stores into block pointers -
on UFS2 they are 64bit.  So we can't cut the corner when switching
a pointer from NULL to non-NULL as we could in ext2_splice_branch()
and need to use meta_lock on all modifications.

We use seqlock where ext2 uses rwlock; ext2 could probably also benefit
from such change...

Another non-trivial difference is that with UFS we *cannot* have reader
grab truncate_mutex in case of race - it has to keep retrying.  That
might be possible to change, but not until we lift tail unpacking
several levels up in call chain.

After that commit we do *NOT* hold fs-wide serialization on accesses
to block pointers anymore.  Moreover, lock_ufs() can become a normal
mutex now - it's only used on statfs, remount and sync_fs and none
of those uses are recursive.  As the matter of fact, *now* it can be
collapsed with ->s_lock, and be eventually replaced with saner
per-cylinder-group spinlocks, but that's a separate story.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/balloc.c   |   4 ++
 fs/ufs/inode.c    | 138 ++++++++++++++++++++++++++++++++++++------------------
 fs/ufs/super.c    |   2 +
 fs/ufs/truncate.c |  22 ++++++++-
 fs/ufs/ufs.h      |   2 +
 5 files changed, 121 insertions(+), 47 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index a7106eda5024..fb8b54eb77c5 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -417,7 +417,9 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
 	if (oldcount == 0) {
 		result = ufs_alloc_fragments (inode, cgno, goal, count, err);
 		if (result) {
+			write_seqlock(&UFS_I(inode)->meta_lock);
 			ufs_cpu_to_data_ptr(sb, p, result);
+			write_sequnlock(&UFS_I(inode)->meta_lock);
 			*err = 0;
 			UFS_I(inode)->i_lastfrag =
 				max(UFS_I(inode)->i_lastfrag, fragment + count);
@@ -473,7 +475,9 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
 		ufs_change_blocknr(inode, fragment - oldcount, oldcount,
 				   uspi->s_sbbase + tmp,
 				   uspi->s_sbbase + result, locked_page);
+		write_seqlock(&UFS_I(inode)->meta_lock);
 		ufs_cpu_to_data_ptr(sb, p, result);
+		write_sequnlock(&UFS_I(inode)->meta_lock);
 		*err = 0;
 		UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag,
 						fragment + count);
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index a4fc3adfdc4c..100f93c6b309 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -41,8 +41,6 @@
 #include "swab.h"
 #include "util.h"
 
-static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock);
-
 static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t offsets[4])
 {
 	struct ufs_sb_private_info *uspi = UFS_SB(inode->i_sb)->s_uspi;
@@ -75,12 +73,53 @@ static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t off
 	return n;
 }
 
+typedef struct {
+	void	*p;
+	union {
+		__fs32	key32;
+		__fs64	key64;
+	};
+	struct buffer_head *bh;
+} Indirect;
+
+static inline int grow_chain32(struct ufs_inode_info *ufsi,
+			       struct buffer_head *bh, __fs32 *v,
+			       Indirect *from, Indirect *to)
+{
+	Indirect *p;
+	unsigned seq;
+	to->bh = bh;
+	do {
+		seq = read_seqbegin(&ufsi->meta_lock);
+		to->key32 = *(__fs32 *)(to->p = v);
+		for (p = from; p <= to && p->key32 == *(__fs32 *)p->p; p++)
+			;
+	} while (read_seqretry(&ufsi->meta_lock, seq));
+	return (p > to);
+}
+
+static inline int grow_chain64(struct ufs_inode_info *ufsi,
+			       struct buffer_head *bh, __fs64 *v,
+			       Indirect *from, Indirect *to)
+{
+	Indirect *p;
+	unsigned seq;
+	to->bh = bh;
+	do {
+		seq = read_seqbegin(&ufsi->meta_lock);
+		to->key64 = *(__fs64 *)(to->p = v);
+		for (p = from; p <= to && p->key64 == *(__fs64 *)p->p; p++)
+			;
+	} while (read_seqretry(&ufsi->meta_lock, seq));
+	return (p > to);
+}
+
 /*
  * Returns the location of the fragment from
  * the beginning of the filesystem.
  */
 
-static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock)
+static u64 ufs_frag_map(struct inode *inode, sector_t frag)
 {
 	struct ufs_inode_info *ufsi = UFS_I(inode);
 	struct super_block *sb = inode->i_sb;
@@ -88,12 +127,10 @@ static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock)
 	u64 mask = (u64) uspi->s_apbmask>>uspi->s_fpbshift;
 	int shift = uspi->s_apbshift-uspi->s_fpbshift;
 	sector_t offsets[4], *p;
+	Indirect chain[4], *q = chain;
 	int depth = ufs_block_to_path(inode, frag >> uspi->s_fpbshift, offsets);
-	u64  ret = 0L;
-	__fs32 block;
-	__fs64 u2_block = 0L;
 	unsigned flags = UFS_SB(sb)->s_flags;
-	u64 temp = 0L;
+	u64 res = 0;
 
 	UFSD(": frag = %llu  depth = %d\n", (unsigned long long)frag, depth);
 	UFSD(": uspi->s_fpbshift = %d ,uspi->s_apbmask = %x, mask=%llx\n",
@@ -101,59 +138,73 @@ static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock)
 		(unsigned long long)mask);
 
 	if (depth == 0)
-		return 0;
+		goto no_block;
 
+again:
 	p = offsets;
 
-	if (needs_lock)
-		lock_ufs(sb);
 	if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2)
 		goto ufs2;
 
-	block = ufsi->i_u1.i_data[*p++];
-	if (!block)
-		goto out;
+	if (!grow_chain32(ufsi, NULL, &ufsi->i_u1.i_data[*p++], chain, q))
+		goto changed;
+	if (!q->key32)
+		goto no_block;
 	while (--depth) {
+		__fs32 *ptr;
 		struct buffer_head *bh;
 		sector_t n = *p++;
 
-		bh = sb_bread(sb, uspi->s_sbbase + fs32_to_cpu(sb, block)+(n>>shift));
+		bh = sb_bread(sb, uspi->s_sbbase +
+				  fs32_to_cpu(sb, q->key32) + (n>>shift));
 		if (!bh)
-			goto out;
-		block = ((__fs32 *) bh->b_data)[n & mask];
-		brelse (bh);
-		if (!block)
-			goto out;
+			goto no_block;
+		ptr = (__fs32 *)bh->b_data + (n & mask);
+		if (!grow_chain32(ufsi, bh, ptr, chain, ++q))
+			goto changed;
+		if (!q->key32)
+			goto no_block;
 	}
-	ret = (u64) (uspi->s_sbbase + fs32_to_cpu(sb, block) + (frag & uspi->s_fpbmask));
-	goto out;
-ufs2:
-	u2_block = ufsi->i_u1.u2_i_data[*p++];
-	if (!u2_block)
-		goto out;
+	res = fs32_to_cpu(sb, q->key32);
+	goto found;
 
+ufs2:
+	if (!grow_chain64(ufsi, NULL, &ufsi->i_u1.u2_i_data[*p++], chain, q))
+		goto changed;
+	if (!q->key64)
+		goto no_block;
 
 	while (--depth) {
+		__fs64 *ptr;
 		struct buffer_head *bh;
 		sector_t n = *p++;
 
-
-		temp = (u64)(uspi->s_sbbase) + fs64_to_cpu(sb, u2_block);
-		bh = sb_bread(sb, temp +(u64) (n>>shift));
+		bh = sb_bread(sb, uspi->s_sbbase +
+				  fs64_to_cpu(sb, q->key64) + (n>>shift));
 		if (!bh)
-			goto out;
-		u2_block = ((__fs64 *)bh->b_data)[n & mask];
-		brelse(bh);
-		if (!u2_block)
-			goto out;
+			goto no_block;
+		ptr = (__fs64 *)bh->b_data + (n & mask);
+		if (!grow_chain64(ufsi, bh, ptr, chain, ++q))
+			goto changed;
+		if (!q->key64)
+			goto no_block;
+	}
+	res = fs64_to_cpu(sb, q->key64);
+found:
+	res += uspi->s_sbbase + (frag & uspi->s_fpbmask);
+no_block:
+	while (q > chain) {
+		brelse(q->bh);
+		q--;
 	}
-	temp = (u64)uspi->s_sbbase + fs64_to_cpu(sb, u2_block);
-	ret = temp + (u64) (frag & uspi->s_fpbmask);
+	return res;
 
-out:
-	if (needs_lock)
-		unlock_ufs(sb);
-	return ret;
+changed:
+	while (q > chain) {
+		brelse(q->bh);
+		q--;
+	}
+	goto again;
 }
 
 /**
@@ -421,10 +472,9 @@ int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head
 	int ret, err, new;
 	unsigned long ptr,phys;
 	u64 phys64 = 0;
-	bool needs_lock = (sbi->mutex_owner != current);
 	
 	if (!create) {
-		phys64 = ufs_frag_map(inode, fragment, needs_lock);
+		phys64 = ufs_frag_map(inode, fragment);
 		UFSD("phys64 = %llu\n", (unsigned long long)phys64);
 		if (phys64)
 			map_bh(bh_result, sb, phys64);
@@ -438,8 +488,7 @@ int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head
 	ret = 0;
 	bh = NULL;
 
-	if (needs_lock)
-		lock_ufs(sb);
+	mutex_lock(&UFS_I(inode)->truncate_mutex);
 
 	UFSD("ENTER, ino %lu, fragment %llu\n", inode->i_ino, (unsigned long long)fragment);
 	if (fragment >
@@ -501,8 +550,7 @@ out:
 		set_buffer_new(bh_result);
 	map_bh(bh_result, sb, phys);
 abort:
-	if (needs_lock)
-		unlock_ufs(sb);
+	mutex_unlock(&UFS_I(inode)->truncate_mutex);
 
 	return err;
 
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 250579a80d90..15cd3338340c 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1429,6 +1429,8 @@ static struct inode *ufs_alloc_inode(struct super_block *sb)
 		return NULL;
 
 	ei->vfs_inode.i_version = 1;
+	seqlock_init(&ei->meta_lock);
+	mutex_init(&ei->truncate_mutex);
 	return &ei->vfs_inode;
 }
 
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 9908a6045d7a..ad34b7f4b499 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -128,7 +128,9 @@ next1:
 		tmp = ufs_data_ptr_to_cpu(sb, p);
 		if (!tmp)
 			continue;
+		write_seqlock(&ufsi->meta_lock);
 		ufs_data_ptr_clear(uspi, p);
+		write_sequnlock(&ufsi->meta_lock);
 
 		if (free_count == 0) {
 			frag_to_free = tmp;
@@ -157,7 +159,9 @@ next1:
 	if (!tmp )
 		ufs_panic(sb, "ufs_truncate_direct", "internal error");
 	frag4 = ufs_fragnum (frag4);
+	write_seqlock(&ufsi->meta_lock);
 	ufs_data_ptr_clear(uspi, p);
+	write_sequnlock(&ufsi->meta_lock);
 
 	ufs_free_fragments (inode, tmp, frag4);
 	mark_inode_dirty(inode);
@@ -199,7 +203,9 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p)
 		return 1;
 	}
 	if (!ind_ubh) {
+		write_seqlock(&UFS_I(inode)->meta_lock);
 		ufs_data_ptr_clear(uspi, p);
+		write_sequnlock(&UFS_I(inode)->meta_lock);
 		return 0;
 	}
 
@@ -210,7 +216,9 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p)
 		if (!tmp)
 			continue;
 
+		write_seqlock(&UFS_I(inode)->meta_lock);
 		ufs_data_ptr_clear(uspi, ind);
+		write_sequnlock(&UFS_I(inode)->meta_lock);
 		ubh_mark_buffer_dirty(ind_ubh);
 		if (free_count == 0) {
 			frag_to_free = tmp;
@@ -235,7 +243,9 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p)
 			break;
 	if (i >= uspi->s_apb) {
 		tmp = ufs_data_ptr_to_cpu(sb, p);
+		write_seqlock(&UFS_I(inode)->meta_lock);
 		ufs_data_ptr_clear(uspi, p);
+		write_sequnlock(&UFS_I(inode)->meta_lock);
 
 		ubh_bforget(ind_ubh);
 		ufs_free_blocks (inode, tmp, uspi->s_fpb);
@@ -278,7 +288,9 @@ static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
 		return 1;
 	}
 	if (!dind_bh) {
+		write_seqlock(&UFS_I(inode)->meta_lock);
 		ufs_data_ptr_clear(uspi, p);
+		write_sequnlock(&UFS_I(inode)->meta_lock);
 		return 0;
 	}
 
@@ -297,7 +309,9 @@ static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
 			break;
 	if (i >= uspi->s_apb) {
 		tmp = ufs_data_ptr_to_cpu(sb, p);
+		write_seqlock(&UFS_I(inode)->meta_lock);
 		ufs_data_ptr_clear(uspi, p);
+		write_sequnlock(&UFS_I(inode)->meta_lock);
 
 		ubh_bforget(dind_bh);
 		ufs_free_blocks(inode, tmp, uspi->s_fpb);
@@ -339,7 +353,9 @@ static int ufs_trunc_tindirect(struct inode *inode)
 		return 1;
 	}
 	if (!tind_bh) {
+		write_seqlock(&ufsi->meta_lock);
 		ufs_data_ptr_clear(uspi, p);
+		write_sequnlock(&ufsi->meta_lock);
 		return 0;
 	}
 
@@ -355,7 +371,9 @@ static int ufs_trunc_tindirect(struct inode *inode)
 			break;
 	if (i >= uspi->s_apb) {
 		tmp = ufs_data_ptr_to_cpu(sb, p);
+		write_seqlock(&ufsi->meta_lock);
 		ufs_data_ptr_clear(uspi, p);
+		write_sequnlock(&ufsi->meta_lock);
 
 		ubh_bforget(tind_bh);
 		ufs_free_blocks(inode, tmp, uspi->s_fpb);
@@ -447,7 +465,7 @@ static void __ufs_truncate_blocks(struct inode *inode)
 	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
 	int retry;
 
-	lock_ufs(sb);
+	mutex_lock(&ufsi->truncate_mutex);
 	while (1) {
 		retry = ufs_trunc_direct(inode);
 		retry |= ufs_trunc_indirect(inode, UFS_IND_BLOCK,
@@ -465,7 +483,7 @@ static void __ufs_truncate_blocks(struct inode *inode)
 	}
 
 	ufsi->i_lastfrag = DIRECT_FRAGMENT;
-	unlock_ufs(sb);
+	mutex_unlock(&ufsi->truncate_mutex);
 }
 
 int ufs_truncate(struct inode *inode, loff_t size)
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index 43fcab381de1..ea28b73a8b74 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -46,6 +46,8 @@ struct ufs_inode_info {
 	__u32	i_oeftflag;
 	__u16	i_osync;
 	__u64	i_lastfrag;
+	seqlock_t meta_lock;
+	struct mutex	truncate_mutex;
 	__u32   i_dir_start_lookup;
 	struct inode vfs_inode;
 };
-- 
cgit v1.2.3


From dff7cfd36e305488421d82a0ed3dd0209c333745 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 16 Jun 2015 04:27:05 -0400
Subject: ufs: kill lock_ufs()

There were 3 remaining users; in two of them we took ->s_lock immediately
after lock_ufs() and held it until just before unlock_ufs(); the third
one (statfs) could not be called from itself or from other two (remount
and sync_fs).  Just use ->s_lock in statfs and don't bother with lock_ufs
at all.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/super.c | 34 ++--------------------------------
 fs/ufs/ufs.h   |  5 -----
 2 files changed, 2 insertions(+), 37 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 15cd3338340c..f6390eec02ca 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -94,22 +94,6 @@
 #include "swab.h"
 #include "util.h"
 
-void lock_ufs(struct super_block *sb)
-{
-	struct ufs_sb_info *sbi = UFS_SB(sb);
-
-	mutex_lock(&sbi->mutex);
-	sbi->mutex_owner = current;
-}
-
-void unlock_ufs(struct super_block *sb)
-{
-	struct ufs_sb_info *sbi = UFS_SB(sb);
-
-	sbi->mutex_owner = NULL;
-	mutex_unlock(&sbi->mutex);
-}
-
 static struct inode *ufs_nfs_get_inode(struct super_block *sb, u64 ino, u32 generation)
 {
 	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
@@ -694,7 +678,6 @@ static int ufs_sync_fs(struct super_block *sb, int wait)
 	struct ufs_super_block_third * usb3;
 	unsigned flags;
 
-	lock_ufs(sb);
 	mutex_lock(&UFS_SB(sb)->s_lock);
 
 	UFSD("ENTER\n");
@@ -714,7 +697,6 @@ static int ufs_sync_fs(struct super_block *sb, int wait)
 
 	UFSD("EXIT\n");
 	mutex_unlock(&UFS_SB(sb)->s_lock);
-	unlock_ufs(sb);
 
 	return 0;
 }
@@ -758,7 +740,6 @@ static void ufs_put_super(struct super_block *sb)
 
 	ubh_brelse_uspi (sbi->s_uspi);
 	kfree (sbi->s_uspi);
-	mutex_destroy(&sbi->mutex);
 	kfree (sbi);
 	sb->s_fs_info = NULL;
 	UFSD("EXIT\n");
@@ -801,7 +782,6 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
 
 	UFSD("flag %u\n", (int)(sb->s_flags & MS_RDONLY));
 	
-	mutex_init(&sbi->mutex);
 	mutex_init(&sbi->s_lock);
 	spin_lock_init(&sbi->work_lock);
 	INIT_DELAYED_WORK(&sbi->sync_work, delayed_sync_fs);
@@ -1257,7 +1237,6 @@ magic_found:
 	return 0;
 
 failed:
-	mutex_destroy(&sbi->mutex);
 	if (ubh)
 		ubh_brelse_uspi (uspi);
 	kfree (uspi);
@@ -1280,7 +1259,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
 	unsigned flags;
 
 	sync_filesystem(sb);
-	lock_ufs(sb);
 	mutex_lock(&UFS_SB(sb)->s_lock);
 	uspi = UFS_SB(sb)->s_uspi;
 	flags = UFS_SB(sb)->s_flags;
@@ -1296,7 +1274,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
 	ufs_set_opt (new_mount_opt, ONERROR_LOCK);
 	if (!ufs_parse_options (data, &new_mount_opt)) {
 		mutex_unlock(&UFS_SB(sb)->s_lock);
-		unlock_ufs(sb);
 		return -EINVAL;
 	}
 	if (!(new_mount_opt & UFS_MOUNT_UFSTYPE)) {
@@ -1304,14 +1281,12 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
 	} else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) {
 		pr_err("ufstype can't be changed during remount\n");
 		mutex_unlock(&UFS_SB(sb)->s_lock);
-		unlock_ufs(sb);
 		return -EINVAL;
 	}
 
 	if ((*mount_flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
 		UFS_SB(sb)->s_mount_opt = new_mount_opt;
 		mutex_unlock(&UFS_SB(sb)->s_lock);
-		unlock_ufs(sb);
 		return 0;
 	}
 	
@@ -1335,7 +1310,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
 #ifndef CONFIG_UFS_FS_WRITE
 		pr_err("ufs was compiled with read-only support, can't be mounted as read-write\n");
 		mutex_unlock(&UFS_SB(sb)->s_lock);
-		unlock_ufs(sb);
 		return -EINVAL;
 #else
 		if (ufstype != UFS_MOUNT_UFSTYPE_SUN && 
@@ -1345,13 +1319,11 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
 		    ufstype != UFS_MOUNT_UFSTYPE_UFS2) {
 			pr_err("this ufstype is read-only supported\n");
 			mutex_unlock(&UFS_SB(sb)->s_lock);
-			unlock_ufs(sb);
 			return -EINVAL;
 		}
 		if (!ufs_read_cylinder_structures(sb)) {
 			pr_err("failed during remounting\n");
 			mutex_unlock(&UFS_SB(sb)->s_lock);
-			unlock_ufs(sb);
 			return -EPERM;
 		}
 		sb->s_flags &= ~MS_RDONLY;
@@ -1359,7 +1331,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
 	}
 	UFS_SB(sb)->s_mount_opt = new_mount_opt;
 	mutex_unlock(&UFS_SB(sb)->s_lock);
-	unlock_ufs(sb);
 	return 0;
 }
 
@@ -1391,8 +1362,7 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	struct ufs_super_block_third *usb3;
 	u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
 
-	lock_ufs(sb);
-
+	mutex_lock(&UFS_SB(sb)->s_lock);
 	usb3 = ubh_get_usb_third(uspi);
 	
 	if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) {
@@ -1413,7 +1383,7 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	buf->f_fsid.val[0] = (u32)id;
 	buf->f_fsid.val[1] = (u32)(id >> 32);
 
-	unlock_ufs(sb);
+	mutex_unlock(&UFS_SB(sb)->s_lock);
 
 	return 0;
 }
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index ea28b73a8b74..478f35b493a6 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -24,8 +24,6 @@ struct ufs_sb_info {
 	unsigned s_cgno[UFS_MAX_GROUP_LOADED];
 	unsigned short s_cg_loaded;
 	unsigned s_mount_opt;
-	struct mutex mutex;
-	struct task_struct *mutex_owner;
 	struct super_block *sb;
 	int work_queued; /* non-zero if the delayed work is queued */
 	struct delayed_work sync_work; /* FS sync delayed work */
@@ -172,7 +170,4 @@ static inline u32 ufs_dtogd(struct ufs_sb_private_info * uspi, u64 b)
 	return do_div(b, uspi->s_fpg);
 }
 
-extern void lock_ufs(struct super_block *sb);
-extern void unlock_ufs(struct super_block *sb);
-
 #endif /* _UFS_UFS_H */
-- 
cgit v1.2.3


From 6a799d3514217d217b4e74a1ee4f016428582dc5 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 16 Jun 2015 18:43:08 -0400
Subject: ufs: ufs_trunc_direct() always returns 0

make it return void

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/truncate.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index ad34b7f4b499..c56f4ef1cb7a 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -63,7 +63,7 @@
 #define DIRECT_FRAGMENT ((inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift)
 
 
-static int ufs_trunc_direct(struct inode *inode)
+static void ufs_trunc_direct(struct inode *inode)
 {
 	struct ufs_inode_info *ufsi = UFS_I(inode);
 	struct super_block * sb;
@@ -72,7 +72,6 @@ static int ufs_trunc_direct(struct inode *inode)
 	u64 frag1, frag2, frag3, frag4, block1, block2;
 	unsigned frag_to_free, free_count;
 	unsigned i, tmp;
-	int retry;
 	
 	UFSD("ENTER: ino %lu\n", inode->i_ino);
 
@@ -81,7 +80,6 @@ static int ufs_trunc_direct(struct inode *inode)
 	
 	frag_to_free = 0;
 	free_count = 0;
-	retry = 0;
 	
 	frag1 = DIRECT_FRAGMENT;
 	frag4 = min_t(u64, UFS_NDIR_FRAGMENT, ufsi->i_lastfrag);
@@ -168,7 +166,6 @@ next1:
  next3:
 
 	UFSD("EXIT: ino %lu\n", inode->i_ino);
-	return retry;
 }
 
 
@@ -467,8 +464,8 @@ static void __ufs_truncate_blocks(struct inode *inode)
 
 	mutex_lock(&ufsi->truncate_mutex);
 	while (1) {
-		retry = ufs_trunc_direct(inode);
-		retry |= ufs_trunc_indirect(inode, UFS_IND_BLOCK,
+		ufs_trunc_direct(inode);
+		retry = ufs_trunc_indirect(inode, UFS_IND_BLOCK,
 					    ufs_get_direct_data_ptr(uspi, ufsi,
 								    UFS_IND_BLOCK));
 		retry |= ufs_trunc_dindirect(inode, UFS_IND_BLOCK + uspi->s_apb,
-- 
cgit v1.2.3


From 687857930d9294100a4636e45b78a244e6ba4125 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 16 Jun 2015 18:45:21 -0400
Subject: ufs: ufs_trunc_...() has exclusion with everything that might cause
 allocations

	Currently - on lock_ufs(), eventually - on per-inode mutex.
lock_ufs() used to be mere BKL, which is much weaker, so it needed
those rechecks.  BKL doesn't provide any exclusion once we lose CPU;
its blind replacement, OTOH, _does_.  Making that per-filesystem was
an atrocity, but at least we can simplify life here.  And yes, we
certainly need to make that sucker per-inode - these days inode.c and
truncate.c uses are needed only to protect the block pointers.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/truncate.c | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index c56f4ef1cb7a..3beaa848e30a 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -195,10 +195,6 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p)
 	if (!tmp)
 		return 0;
 	ind_ubh = ubh_bread(sb, tmp, uspi->s_bsize);
-	if (tmp != ufs_data_ptr_to_cpu(sb, p)) {
-		ubh_brelse (ind_ubh);
-		return 1;
-	}
 	if (!ind_ubh) {
 		write_seqlock(&UFS_I(inode)->meta_lock);
 		ufs_data_ptr_clear(uspi, p);
@@ -280,10 +276,6 @@ static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
 	if (!tmp)
 		return 0;
 	dind_bh = ubh_bread(sb, tmp, uspi->s_bsize);
-	if (tmp != ufs_data_ptr_to_cpu(sb, p)) {
-		ubh_brelse (dind_bh);
-		return 1;
-	}
 	if (!dind_bh) {
 		write_seqlock(&UFS_I(inode)->meta_lock);
 		ufs_data_ptr_clear(uspi, p);
@@ -345,10 +337,6 @@ static int ufs_trunc_tindirect(struct inode *inode)
 	if (!(tmp = ufs_data_ptr_to_cpu(sb, p)))
 		return 0;
 	tind_bh = ubh_bread (sb, tmp, uspi->s_bsize);
-	if (tmp != ufs_data_ptr_to_cpu(sb, p)) {
-		ubh_brelse (tind_bh);
-		return 1;
-	}
 	if (!tind_bh) {
 		write_seqlock(&ufsi->meta_lock);
 		ufs_data_ptr_clear(uspi, p);
-- 
cgit v1.2.3


From 0d23cf7616253b7960edeae720b9f5dfdccee445 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 16 Jun 2015 18:52:28 -0400
Subject: ufs: no retries are needed on truncate

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/truncate.c | 57 +++++++++++++++++--------------------------------------
 1 file changed, 17 insertions(+), 40 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 3beaa848e30a..f84dd3078929 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -169,7 +169,7 @@ next1:
 }
 
 
-static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p)
+static void ufs_trunc_indirect(struct inode *inode, u64 offset, void *p)
 {
 	struct super_block * sb;
 	struct ufs_sb_private_info * uspi;
@@ -177,7 +177,6 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p)
 	void *ind;
 	u64 tmp, indirect_block, i, frag_to_free;
 	unsigned free_count;
-	int retry;
 
 	UFSD("ENTER: ino %lu, offset %llu, p: %p\n",
 	     inode->i_ino, (unsigned long long)offset, p);
@@ -189,17 +188,16 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p)
 
 	frag_to_free = 0;
 	free_count = 0;
-	retry = 0;
 	
 	tmp = ufs_data_ptr_to_cpu(sb, p);
 	if (!tmp)
-		return 0;
+		return;
 	ind_ubh = ubh_bread(sb, tmp, uspi->s_bsize);
 	if (!ind_ubh) {
 		write_seqlock(&UFS_I(inode)->meta_lock);
 		ufs_data_ptr_clear(uspi, p);
 		write_sequnlock(&UFS_I(inode)->meta_lock);
-		return 0;
+		return;
 	}
 
 	indirect_block = (DIRECT_BLOCK > offset) ? (DIRECT_BLOCK - offset) : 0;
@@ -250,18 +248,15 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p)
 	ubh_brelse (ind_ubh);
 	
 	UFSD("EXIT: ino %lu\n", inode->i_ino);
-	
-	return retry;
 }
 
-static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
+static void ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
 {
 	struct super_block * sb;
 	struct ufs_sb_private_info * uspi;
 	struct ufs_buffer_head *dind_bh;
 	u64 i, tmp, dindirect_block;
 	void *dind;
-	int retry = 0;
 	
 	UFSD("ENTER: ino %lu\n", inode->i_ino);
 	
@@ -270,17 +265,16 @@ static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
 
 	dindirect_block = (DIRECT_BLOCK > offset) 
 		? ((DIRECT_BLOCK - offset) >> uspi->s_apbshift) : 0;
-	retry = 0;
 	
 	tmp = ufs_data_ptr_to_cpu(sb, p);
 	if (!tmp)
-		return 0;
+		return;
 	dind_bh = ubh_bread(sb, tmp, uspi->s_bsize);
 	if (!dind_bh) {
 		write_seqlock(&UFS_I(inode)->meta_lock);
 		ufs_data_ptr_clear(uspi, p);
 		write_sequnlock(&UFS_I(inode)->meta_lock);
-		return 0;
+		return;
 	}
 
 	for (i = dindirect_block ; i < uspi->s_apb ; i++) {
@@ -288,7 +282,7 @@ static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
 		tmp = ufs_data_ptr_to_cpu(sb, dind);
 		if (!tmp)
 			continue;
-		retry |= ufs_trunc_indirect (inode, offset + (i << uspi->s_apbshift), dind);
+		ufs_trunc_indirect (inode, offset + (i << uspi->s_apbshift), dind);
 		ubh_mark_buffer_dirty(dind_bh);
 	}
 
@@ -312,11 +306,9 @@ static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
 	ubh_brelse (dind_bh);
 	
 	UFSD("EXIT: ino %lu\n", inode->i_ino);
-	
-	return retry;
 }
 
-static int ufs_trunc_tindirect(struct inode *inode)
+static void ufs_trunc_tindirect(struct inode *inode)
 {
 	struct super_block *sb = inode->i_sb;
 	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
@@ -324,29 +316,26 @@ static int ufs_trunc_tindirect(struct inode *inode)
 	struct ufs_buffer_head * tind_bh;
 	u64 tindirect_block, tmp, i;
 	void *tind, *p;
-	int retry;
 	
 	UFSD("ENTER: ino %lu\n", inode->i_ino);
 
-	retry = 0;
-	
 	tindirect_block = (DIRECT_BLOCK > (UFS_NDADDR + uspi->s_apb + uspi->s_2apb))
 		? ((DIRECT_BLOCK - UFS_NDADDR - uspi->s_apb - uspi->s_2apb) >> uspi->s_2apbshift) : 0;
 
 	p = ufs_get_direct_data_ptr(uspi, ufsi, UFS_TIND_BLOCK);
 	if (!(tmp = ufs_data_ptr_to_cpu(sb, p)))
-		return 0;
+		return;
 	tind_bh = ubh_bread (sb, tmp, uspi->s_bsize);
 	if (!tind_bh) {
 		write_seqlock(&ufsi->meta_lock);
 		ufs_data_ptr_clear(uspi, p);
 		write_sequnlock(&ufsi->meta_lock);
-		return 0;
+		return;
 	}
 
 	for (i = tindirect_block ; i < uspi->s_apb ; i++) {
 		tind = ubh_get_data_ptr(uspi, tind_bh, i);
-		retry |= ufs_trunc_dindirect(inode, UFS_NDADDR + 
+		ufs_trunc_dindirect(inode, UFS_NDADDR + 
 			uspi->s_apb + ((i + 1) << uspi->s_2apbshift), tind);
 		ubh_mark_buffer_dirty(tind_bh);
 	}
@@ -370,7 +359,6 @@ static int ufs_trunc_tindirect(struct inode *inode)
 	ubh_brelse (tind_bh);
 	
 	UFSD("EXIT: ino %lu\n", inode->i_ino);
-	return retry;
 }
 
 static int ufs_alloc_lastblock(struct inode *inode, loff_t size)
@@ -448,25 +436,14 @@ static void __ufs_truncate_blocks(struct inode *inode)
 	struct ufs_inode_info *ufsi = UFS_I(inode);
 	struct super_block *sb = inode->i_sb;
 	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
-	int retry;
 
 	mutex_lock(&ufsi->truncate_mutex);
-	while (1) {
-		ufs_trunc_direct(inode);
-		retry = ufs_trunc_indirect(inode, UFS_IND_BLOCK,
-					    ufs_get_direct_data_ptr(uspi, ufsi,
-								    UFS_IND_BLOCK));
-		retry |= ufs_trunc_dindirect(inode, UFS_IND_BLOCK + uspi->s_apb,
-					     ufs_get_direct_data_ptr(uspi, ufsi,
-								     UFS_DIND_BLOCK));
-		retry |= ufs_trunc_tindirect (inode);
-		if (!retry)
-			break;
-		if (IS_SYNC(inode) && (inode->i_state & I_DIRTY))
-			ufs_sync_inode (inode);
-		yield();
-	}
-
+	ufs_trunc_direct(inode);
+	ufs_trunc_indirect(inode, UFS_IND_BLOCK,
+			   ufs_get_direct_data_ptr(uspi, ufsi, UFS_IND_BLOCK));
+	ufs_trunc_dindirect(inode, UFS_IND_BLOCK + uspi->s_apb,
+			    ufs_get_direct_data_ptr(uspi, ufsi, UFS_DIND_BLOCK));
+	ufs_trunc_tindirect(inode);
 	ufsi->i_lastfrag = DIRECT_FRAGMENT;
 	mutex_unlock(&ufsi->truncate_mutex);
 }
-- 
cgit v1.2.3


From 010d331fc315c96607aa6ecdfebb9fcdd349fc9b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 17 Jun 2015 12:44:14 -0400
Subject: ufs: move truncate code into inode.c

It is closely tied to block pointers handling there, can benefit
from existing helpers, etc. - no point keeping them apart.

Trimmed the trailing whitespaces in inode.c at the same time.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/Makefile   |   2 +-
 fs/ufs/inode.c    | 480 ++++++++++++++++++++++++++++++++++++++++++++++++--
 fs/ufs/truncate.c | 515 ------------------------------------------------------
 fs/ufs/ufs.h      |   6 +-
 4 files changed, 470 insertions(+), 533 deletions(-)
 delete mode 100644 fs/ufs/truncate.c

(limited to 'fs')

diff --git a/fs/ufs/Makefile b/fs/ufs/Makefile
index 4d0e02b022b3..392db25c0b56 100644
--- a/fs/ufs/Makefile
+++ b/fs/ufs/Makefile
@@ -5,5 +5,5 @@
 obj-$(CONFIG_UFS_FS) += ufs.o
 
 ufs-objs := balloc.o cylinder.o dir.o file.o ialloc.o inode.o \
-	    namei.o super.o symlink.o truncate.o util.o
+	    namei.o super.o symlink.o util.o
 ccflags-$(CONFIG_UFS_DEBUG)    += -DDEBUG
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 100f93c6b309..ec758edbda47 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -126,7 +126,7 @@ static u64 ufs_frag_map(struct inode *inode, sector_t frag)
 	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
 	u64 mask = (u64) uspi->s_apbmask>>uspi->s_fpbshift;
 	int shift = uspi->s_apbshift-uspi->s_fpbshift;
-	sector_t offsets[4], *p;
+	unsigned offsets[4], *p;
 	Indirect chain[4], *q = chain;
 	int depth = ufs_block_to_path(inode, frag >> uspi->s_fpbshift, offsets);
 	unsigned flags = UFS_SB(sb)->s_flags;
@@ -290,14 +290,14 @@ repeat:
 					return NULL;
 			}
 			lastfrag = ufsi->i_lastfrag;
-			
+
 		}
 		tmp = ufs_data_ptr_to_cpu(sb,
 					 ufs_get_direct_data_ptr(uspi, ufsi,
 								 lastblock));
 		if (tmp)
 			goal = tmp + uspi->s_fpb;
-		tmp = ufs_new_fragments (inode, p, fragment - blockoff, 
+		tmp = ufs_new_fragments (inode, p, fragment - blockoff,
 					 goal, required + blockoff,
 					 err,
 					 phys != NULL ? locked_page : NULL);
@@ -436,7 +436,7 @@ repeat:
 		if (ufs_data_ptr_to_cpu(sb, p))
 			goto repeat;
 		goto out;
-	}		
+	}
 
 
 	if (!phys) {
@@ -463,7 +463,7 @@ out:
  * readpage, writepage and so on
  */
 
-int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create)
+static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create)
 {
 	struct super_block * sb = inode->i_sb;
 	struct ufs_sb_info * sbi = UFS_SB(sb);
@@ -472,7 +472,7 @@ int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head
 	int ret, err, new;
 	unsigned long ptr,phys;
 	u64 phys64 = 0;
-	
+
 	if (!create) {
 		phys64 = ufs_frag_map(inode, fragment);
 		UFSD("phys64 = %llu\n", (unsigned long long)phys64);
@@ -498,7 +498,7 @@ int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head
 
 	err = 0;
 	ptr = fragment;
-	  
+
 	/*
 	 * ok, these macros clean the logic up a bit and make
 	 * it much more readable:
@@ -574,6 +574,8 @@ int ufs_prepare_chunk(struct page *page, loff_t pos, unsigned len)
 	return __block_write_begin(page, pos, len, ufs_getfrag_block);
 }
 
+static void ufs_truncate_blocks(struct inode *);
+
 static void ufs_write_failed(struct address_space *mapping, loff_t to)
 {
 	struct inode *inode = mapping->host;
@@ -661,7 +663,7 @@ static int ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode)
 		ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino);
 		return -1;
 	}
-	
+
 	/*
 	 * Linux now has 32-bit uid and gid, so we can support EFT.
 	 */
@@ -681,7 +683,7 @@ static int ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode)
 	ufsi->i_shadow = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_shadow);
 	ufsi->i_oeftflag = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_oeftflag);
 
-	
+
 	if (S_ISCHR(mode) || S_ISBLK(mode) || inode->i_blocks) {
 		memcpy(ufsi->i_u1.i_data, &ufs_inode->ui_u2.ui_addr,
 		       sizeof(ufs_inode->ui_u2.ui_addr));
@@ -815,7 +817,7 @@ static void ufs1_update_inode(struct inode *inode, struct ufs_inode *ufs_inode)
 
 	ufs_set_inode_uid(sb, ufs_inode, i_uid_read(inode));
 	ufs_set_inode_gid(sb, ufs_inode, i_gid_read(inode));
-		
+
 	ufs_inode->ui_size = cpu_to_fs64(sb, inode->i_size);
 	ufs_inode->ui_atime.tv_sec = cpu_to_fs32(sb, inode->i_atime.tv_sec);
 	ufs_inode->ui_atime.tv_usec = 0;
@@ -917,12 +919,12 @@ static int ufs_update_inode(struct inode * inode, int do_sync)
 
 		ufs1_update_inode(inode, ufs_inode + ufs_inotofsbo(inode->i_ino));
 	}
-		
+
 	mark_buffer_dirty(bh);
 	if (do_sync)
 		sync_dirty_buffer(bh);
 	brelse (bh);
-	
+
 	UFSD("EXIT\n");
 	return 0;
 }
@@ -957,3 +959,457 @@ void ufs_evict_inode(struct inode * inode)
 	if (want_delete)
 		ufs_free_inode(inode);
 }
+
+#define DIRECT_BLOCK ((inode->i_size + uspi->s_bsize - 1) >> uspi->s_bshift)
+#define DIRECT_FRAGMENT ((inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift)
+
+static void ufs_trunc_direct(struct inode *inode)
+{
+	struct ufs_inode_info *ufsi = UFS_I(inode);
+	struct super_block * sb;
+	struct ufs_sb_private_info * uspi;
+	void *p;
+	u64 frag1, frag2, frag3, frag4, block1, block2;
+	unsigned frag_to_free, free_count;
+	unsigned i, tmp;
+
+	UFSD("ENTER: ino %lu\n", inode->i_ino);
+
+	sb = inode->i_sb;
+	uspi = UFS_SB(sb)->s_uspi;
+
+	frag_to_free = 0;
+	free_count = 0;
+
+	frag1 = DIRECT_FRAGMENT;
+	frag4 = min_t(u64, UFS_NDIR_FRAGMENT, ufsi->i_lastfrag);
+	frag2 = ((frag1 & uspi->s_fpbmask) ? ((frag1 | uspi->s_fpbmask) + 1) : frag1);
+	frag3 = frag4 & ~uspi->s_fpbmask;
+	block1 = block2 = 0;
+	if (frag2 > frag3) {
+		frag2 = frag4;
+		frag3 = frag4 = 0;
+	} else if (frag2 < frag3) {
+		block1 = ufs_fragstoblks (frag2);
+		block2 = ufs_fragstoblks (frag3);
+	}
+
+	UFSD("ino %lu, frag1 %llu, frag2 %llu, block1 %llu, block2 %llu,"
+	     " frag3 %llu, frag4 %llu\n", inode->i_ino,
+	     (unsigned long long)frag1, (unsigned long long)frag2,
+	     (unsigned long long)block1, (unsigned long long)block2,
+	     (unsigned long long)frag3, (unsigned long long)frag4);
+
+	if (frag1 >= frag2)
+		goto next1;
+
+	/*
+	 * Free first free fragments
+	 */
+	p = ufs_get_direct_data_ptr(uspi, ufsi, ufs_fragstoblks(frag1));
+	tmp = ufs_data_ptr_to_cpu(sb, p);
+	if (!tmp )
+		ufs_panic (sb, "ufs_trunc_direct", "internal error");
+	frag2 -= frag1;
+	frag1 = ufs_fragnum (frag1);
+
+	ufs_free_fragments(inode, tmp + frag1, frag2);
+	mark_inode_dirty(inode);
+	frag_to_free = tmp + frag1;
+
+next1:
+	/*
+	 * Free whole blocks
+	 */
+	for (i = block1 ; i < block2; i++) {
+		p = ufs_get_direct_data_ptr(uspi, ufsi, i);
+		tmp = ufs_data_ptr_to_cpu(sb, p);
+		if (!tmp)
+			continue;
+		write_seqlock(&ufsi->meta_lock);
+		ufs_data_ptr_clear(uspi, p);
+		write_sequnlock(&ufsi->meta_lock);
+
+		if (free_count == 0) {
+			frag_to_free = tmp;
+			free_count = uspi->s_fpb;
+		} else if (free_count > 0 && frag_to_free == tmp - free_count)
+			free_count += uspi->s_fpb;
+		else {
+			ufs_free_blocks (inode, frag_to_free, free_count);
+			frag_to_free = tmp;
+			free_count = uspi->s_fpb;
+		}
+		mark_inode_dirty(inode);
+	}
+
+	if (free_count > 0)
+		ufs_free_blocks (inode, frag_to_free, free_count);
+
+	if (frag3 >= frag4)
+		goto next3;
+
+	/*
+	 * Free last free fragments
+	 */
+	p = ufs_get_direct_data_ptr(uspi, ufsi, ufs_fragstoblks(frag3));
+	tmp = ufs_data_ptr_to_cpu(sb, p);
+	if (!tmp )
+		ufs_panic(sb, "ufs_truncate_direct", "internal error");
+	frag4 = ufs_fragnum (frag4);
+	write_seqlock(&ufsi->meta_lock);
+	ufs_data_ptr_clear(uspi, p);
+	write_sequnlock(&ufsi->meta_lock);
+
+	ufs_free_fragments (inode, tmp, frag4);
+	mark_inode_dirty(inode);
+ next3:
+
+	UFSD("EXIT: ino %lu\n", inode->i_ino);
+}
+
+
+static void ufs_trunc_indirect(struct inode *inode, u64 offset, void *p)
+{
+	struct super_block * sb;
+	struct ufs_sb_private_info * uspi;
+	struct ufs_buffer_head * ind_ubh;
+	void *ind;
+	u64 tmp, indirect_block, i, frag_to_free;
+	unsigned free_count;
+
+	UFSD("ENTER: ino %lu, offset %llu, p: %p\n",
+	     inode->i_ino, (unsigned long long)offset, p);
+
+	BUG_ON(!p);
+
+	sb = inode->i_sb;
+	uspi = UFS_SB(sb)->s_uspi;
+
+	frag_to_free = 0;
+	free_count = 0;
+
+	tmp = ufs_data_ptr_to_cpu(sb, p);
+	if (!tmp)
+		return;
+	ind_ubh = ubh_bread(sb, tmp, uspi->s_bsize);
+	if (!ind_ubh) {
+		write_seqlock(&UFS_I(inode)->meta_lock);
+		ufs_data_ptr_clear(uspi, p);
+		write_sequnlock(&UFS_I(inode)->meta_lock);
+		return;
+	}
+
+	indirect_block = (DIRECT_BLOCK > offset) ? (DIRECT_BLOCK - offset) : 0;
+	for (i = indirect_block; i < uspi->s_apb; i++) {
+		ind = ubh_get_data_ptr(uspi, ind_ubh, i);
+		tmp = ufs_data_ptr_to_cpu(sb, ind);
+		if (!tmp)
+			continue;
+
+		write_seqlock(&UFS_I(inode)->meta_lock);
+		ufs_data_ptr_clear(uspi, ind);
+		write_sequnlock(&UFS_I(inode)->meta_lock);
+		ubh_mark_buffer_dirty(ind_ubh);
+		if (free_count == 0) {
+			frag_to_free = tmp;
+			free_count = uspi->s_fpb;
+		} else if (free_count > 0 && frag_to_free == tmp - free_count)
+			free_count += uspi->s_fpb;
+		else {
+			ufs_free_blocks (inode, frag_to_free, free_count);
+			frag_to_free = tmp;
+			free_count = uspi->s_fpb;
+		}
+
+		mark_inode_dirty(inode);
+	}
+
+	if (free_count > 0) {
+		ufs_free_blocks (inode, frag_to_free, free_count);
+	}
+	for (i = 0; i < uspi->s_apb; i++)
+		if (!ufs_is_data_ptr_zero(uspi,
+					  ubh_get_data_ptr(uspi, ind_ubh, i)))
+			break;
+	if (i >= uspi->s_apb) {
+		tmp = ufs_data_ptr_to_cpu(sb, p);
+		write_seqlock(&UFS_I(inode)->meta_lock);
+		ufs_data_ptr_clear(uspi, p);
+		write_sequnlock(&UFS_I(inode)->meta_lock);
+
+		ubh_bforget(ind_ubh);
+		ufs_free_blocks (inode, tmp, uspi->s_fpb);
+		mark_inode_dirty(inode);
+		ind_ubh = NULL;
+	}
+	if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh))
+		ubh_sync_block(ind_ubh);
+	ubh_brelse (ind_ubh);
+
+	UFSD("EXIT: ino %lu\n", inode->i_ino);
+}
+
+static void ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
+{
+	struct super_block * sb;
+	struct ufs_sb_private_info * uspi;
+	struct ufs_buffer_head *dind_bh;
+	u64 i, tmp, dindirect_block;
+	void *dind;
+
+	UFSD("ENTER: ino %lu\n", inode->i_ino);
+
+	sb = inode->i_sb;
+	uspi = UFS_SB(sb)->s_uspi;
+
+	dindirect_block = (DIRECT_BLOCK > offset)
+		? ((DIRECT_BLOCK - offset) >> uspi->s_apbshift) : 0;
+
+	tmp = ufs_data_ptr_to_cpu(sb, p);
+	if (!tmp)
+		return;
+	dind_bh = ubh_bread(sb, tmp, uspi->s_bsize);
+	if (!dind_bh) {
+		write_seqlock(&UFS_I(inode)->meta_lock);
+		ufs_data_ptr_clear(uspi, p);
+		write_sequnlock(&UFS_I(inode)->meta_lock);
+		return;
+	}
+
+	for (i = dindirect_block ; i < uspi->s_apb ; i++) {
+		dind = ubh_get_data_ptr(uspi, dind_bh, i);
+		tmp = ufs_data_ptr_to_cpu(sb, dind);
+		if (!tmp)
+			continue;
+		ufs_trunc_indirect (inode, offset + (i << uspi->s_apbshift), dind);
+		ubh_mark_buffer_dirty(dind_bh);
+	}
+
+	for (i = 0; i < uspi->s_apb; i++)
+		if (!ufs_is_data_ptr_zero(uspi,
+					  ubh_get_data_ptr(uspi, dind_bh, i)))
+			break;
+	if (i >= uspi->s_apb) {
+		tmp = ufs_data_ptr_to_cpu(sb, p);
+		write_seqlock(&UFS_I(inode)->meta_lock);
+		ufs_data_ptr_clear(uspi, p);
+		write_sequnlock(&UFS_I(inode)->meta_lock);
+
+		ubh_bforget(dind_bh);
+		ufs_free_blocks(inode, tmp, uspi->s_fpb);
+		mark_inode_dirty(inode);
+		dind_bh = NULL;
+	}
+	if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh))
+		ubh_sync_block(dind_bh);
+	ubh_brelse (dind_bh);
+
+	UFSD("EXIT: ino %lu\n", inode->i_ino);
+}
+
+static void ufs_trunc_tindirect(struct inode *inode)
+{
+	struct super_block *sb = inode->i_sb;
+	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
+	struct ufs_inode_info *ufsi = UFS_I(inode);
+	struct ufs_buffer_head * tind_bh;
+	u64 tindirect_block, tmp, i;
+	void *tind, *p;
+
+	UFSD("ENTER: ino %lu\n", inode->i_ino);
+
+	tindirect_block = (DIRECT_BLOCK > (UFS_NDADDR + uspi->s_apb + uspi->s_2apb))
+		? ((DIRECT_BLOCK - UFS_NDADDR - uspi->s_apb - uspi->s_2apb) >> uspi->s_2apbshift) : 0;
+
+	p = ufs_get_direct_data_ptr(uspi, ufsi, UFS_TIND_BLOCK);
+	if (!(tmp = ufs_data_ptr_to_cpu(sb, p)))
+		return;
+	tind_bh = ubh_bread (sb, tmp, uspi->s_bsize);
+	if (!tind_bh) {
+		write_seqlock(&ufsi->meta_lock);
+		ufs_data_ptr_clear(uspi, p);
+		write_sequnlock(&ufsi->meta_lock);
+		return;
+	}
+
+	for (i = tindirect_block ; i < uspi->s_apb ; i++) {
+		tind = ubh_get_data_ptr(uspi, tind_bh, i);
+		ufs_trunc_dindirect(inode, UFS_NDADDR +
+			uspi->s_apb + ((i + 1) << uspi->s_2apbshift), tind);
+		ubh_mark_buffer_dirty(tind_bh);
+	}
+	for (i = 0; i < uspi->s_apb; i++)
+		if (!ufs_is_data_ptr_zero(uspi,
+					  ubh_get_data_ptr(uspi, tind_bh, i)))
+			break;
+	if (i >= uspi->s_apb) {
+		tmp = ufs_data_ptr_to_cpu(sb, p);
+		write_seqlock(&ufsi->meta_lock);
+		ufs_data_ptr_clear(uspi, p);
+		write_sequnlock(&ufsi->meta_lock);
+
+		ubh_bforget(tind_bh);
+		ufs_free_blocks(inode, tmp, uspi->s_fpb);
+		mark_inode_dirty(inode);
+		tind_bh = NULL;
+	}
+	if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh))
+		ubh_sync_block(tind_bh);
+	ubh_brelse (tind_bh);
+
+	UFSD("EXIT: ino %lu\n", inode->i_ino);
+}
+
+static int ufs_alloc_lastblock(struct inode *inode, loff_t size)
+{
+	int err = 0;
+	struct super_block *sb = inode->i_sb;
+	struct address_space *mapping = inode->i_mapping;
+	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
+	unsigned i, end;
+	sector_t lastfrag;
+	struct page *lastpage;
+	struct buffer_head *bh;
+	u64 phys64;
+
+	lastfrag = (size + uspi->s_fsize - 1) >> uspi->s_fshift;
+
+	if (!lastfrag)
+		goto out;
+
+	lastfrag--;
+
+	lastpage = ufs_get_locked_page(mapping, lastfrag >>
+				       (PAGE_CACHE_SHIFT - inode->i_blkbits));
+       if (IS_ERR(lastpage)) {
+               err = -EIO;
+               goto out;
+       }
+
+       end = lastfrag & ((1 << (PAGE_CACHE_SHIFT - inode->i_blkbits)) - 1);
+       bh = page_buffers(lastpage);
+       for (i = 0; i < end; ++i)
+               bh = bh->b_this_page;
+
+
+       err = ufs_getfrag_block(inode, lastfrag, bh, 1);
+
+       if (unlikely(err))
+	       goto out_unlock;
+
+       if (buffer_new(bh)) {
+	       clear_buffer_new(bh);
+	       unmap_underlying_metadata(bh->b_bdev,
+					 bh->b_blocknr);
+	       /*
+		* we do not zeroize fragment, because of
+		* if it maped to hole, it already contains zeroes
+		*/
+	       set_buffer_uptodate(bh);
+	       mark_buffer_dirty(bh);
+	       set_page_dirty(lastpage);
+       }
+
+       if (lastfrag >= UFS_IND_FRAGMENT) {
+	       end = uspi->s_fpb - ufs_fragnum(lastfrag) - 1;
+	       phys64 = bh->b_blocknr + 1;
+	       for (i = 0; i < end; ++i) {
+		       bh = sb_getblk(sb, i + phys64);
+		       lock_buffer(bh);
+		       memset(bh->b_data, 0, sb->s_blocksize);
+		       set_buffer_uptodate(bh);
+		       mark_buffer_dirty(bh);
+		       unlock_buffer(bh);
+		       sync_dirty_buffer(bh);
+		       brelse(bh);
+	       }
+       }
+out_unlock:
+       ufs_put_locked_page(lastpage);
+out:
+       return err;
+}
+
+static void __ufs_truncate_blocks(struct inode *inode)
+{
+	struct ufs_inode_info *ufsi = UFS_I(inode);
+	struct super_block *sb = inode->i_sb;
+	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
+
+	mutex_lock(&ufsi->truncate_mutex);
+	ufs_trunc_direct(inode);
+	ufs_trunc_indirect(inode, UFS_IND_BLOCK,
+			   ufs_get_direct_data_ptr(uspi, ufsi, UFS_IND_BLOCK));
+	ufs_trunc_dindirect(inode, UFS_IND_BLOCK + uspi->s_apb,
+			    ufs_get_direct_data_ptr(uspi, ufsi, UFS_DIND_BLOCK));
+	ufs_trunc_tindirect(inode);
+	ufsi->i_lastfrag = DIRECT_FRAGMENT;
+	mutex_unlock(&ufsi->truncate_mutex);
+}
+
+static int ufs_truncate(struct inode *inode, loff_t size)
+{
+	int err = 0;
+
+	UFSD("ENTER: ino %lu, i_size: %llu, old_i_size: %llu\n",
+	     inode->i_ino, (unsigned long long)size,
+	     (unsigned long long)i_size_read(inode));
+
+	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+	      S_ISLNK(inode->i_mode)))
+		return -EINVAL;
+	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+		return -EPERM;
+
+	err = ufs_alloc_lastblock(inode, size);
+
+	if (err)
+		goto out;
+
+	block_truncate_page(inode->i_mapping, size, ufs_getfrag_block);
+
+	truncate_setsize(inode, size);
+
+	__ufs_truncate_blocks(inode);
+	inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
+	mark_inode_dirty(inode);
+out:
+	UFSD("EXIT: err %d\n", err);
+	return err;
+}
+
+void ufs_truncate_blocks(struct inode *inode)
+{
+	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+	      S_ISLNK(inode->i_mode)))
+		return;
+	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+		return;
+	__ufs_truncate_blocks(inode);
+}
+
+int ufs_setattr(struct dentry *dentry, struct iattr *attr)
+{
+	struct inode *inode = d_inode(dentry);
+	unsigned int ia_valid = attr->ia_valid;
+	int error;
+
+	error = inode_change_ok(inode, attr);
+	if (error)
+		return error;
+
+	if (ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) {
+		error = ufs_truncate(inode, attr->ia_size);
+		if (error)
+			return error;
+	}
+
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+	return 0;
+}
+
+const struct inode_operations ufs_file_inode_operations = {
+	.setattr = ufs_setattr,
+};
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
deleted file mode 100644
index f84dd3078929..000000000000
--- a/fs/ufs/truncate.c
+++ /dev/null
@@ -1,515 +0,0 @@
-/*
- *  linux/fs/ufs/truncate.c
- *
- * Copyright (C) 1998
- * Daniel Pirkl <daniel.pirkl@email.cz>
- * Charles University, Faculty of Mathematics and Physics
- *
- *  from
- *
- *  linux/fs/ext2/truncate.c
- *
- * Copyright (C) 1992, 1993, 1994, 1995
- * Remy Card (card@masi.ibp.fr)
- * Laboratoire MASI - Institut Blaise Pascal
- * Universite Pierre et Marie Curie (Paris VI)
- *
- *  from
- *
- *  linux/fs/minix/truncate.c
- *
- *  Copyright (C) 1991, 1992  Linus Torvalds
- *
- *  Big-endian to little-endian byte-swapping/bitmaps by
- *        David S. Miller (davem@caip.rutgers.edu), 1995
- */
-
-/*
- * Real random numbers for secure rm added 94/02/18
- * Idea from Pierre del Perugia <delperug@gla.ecoledoc.ibp.fr>
- */
-
-/*
- * Adoptation to use page cache and UFS2 write support by
- * Evgeniy Dushistov <dushistov@mail.ru>, 2006-2007
- */
-
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/fcntl.h>
-#include <linux/time.h>
-#include <linux/stat.h>
-#include <linux/string.h>
-#include <linux/buffer_head.h>
-#include <linux/blkdev.h>
-#include <linux/sched.h>
-
-#include "ufs_fs.h"
-#include "ufs.h"
-#include "swab.h"
-#include "util.h"
-
-/*
- * Secure deletion currently doesn't work. It interacts very badly
- * with buffers shared with memory mappings, and for that reason
- * can't be done in the truncate() routines. It should instead be
- * done separately in "release()" before calling the truncate routines
- * that will release the actual file blocks.
- *
- *		Linus
- */
-
-#define DIRECT_BLOCK ((inode->i_size + uspi->s_bsize - 1) >> uspi->s_bshift)
-#define DIRECT_FRAGMENT ((inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift)
-
-
-static void ufs_trunc_direct(struct inode *inode)
-{
-	struct ufs_inode_info *ufsi = UFS_I(inode);
-	struct super_block * sb;
-	struct ufs_sb_private_info * uspi;
-	void *p;
-	u64 frag1, frag2, frag3, frag4, block1, block2;
-	unsigned frag_to_free, free_count;
-	unsigned i, tmp;
-	
-	UFSD("ENTER: ino %lu\n", inode->i_ino);
-
-	sb = inode->i_sb;
-	uspi = UFS_SB(sb)->s_uspi;
-	
-	frag_to_free = 0;
-	free_count = 0;
-	
-	frag1 = DIRECT_FRAGMENT;
-	frag4 = min_t(u64, UFS_NDIR_FRAGMENT, ufsi->i_lastfrag);
-	frag2 = ((frag1 & uspi->s_fpbmask) ? ((frag1 | uspi->s_fpbmask) + 1) : frag1);
-	frag3 = frag4 & ~uspi->s_fpbmask;
-	block1 = block2 = 0;
-	if (frag2 > frag3) {
-		frag2 = frag4;
-		frag3 = frag4 = 0;
-	} else if (frag2 < frag3) {
-		block1 = ufs_fragstoblks (frag2);
-		block2 = ufs_fragstoblks (frag3);
-	}
-
-	UFSD("ino %lu, frag1 %llu, frag2 %llu, block1 %llu, block2 %llu,"
-	     " frag3 %llu, frag4 %llu\n", inode->i_ino,
-	     (unsigned long long)frag1, (unsigned long long)frag2,
-	     (unsigned long long)block1, (unsigned long long)block2,
-	     (unsigned long long)frag3, (unsigned long long)frag4);
-
-	if (frag1 >= frag2)
-		goto next1;		
-
-	/*
-	 * Free first free fragments
-	 */
-	p = ufs_get_direct_data_ptr(uspi, ufsi, ufs_fragstoblks(frag1));
-	tmp = ufs_data_ptr_to_cpu(sb, p);
-	if (!tmp )
-		ufs_panic (sb, "ufs_trunc_direct", "internal error");
-	frag2 -= frag1;
-	frag1 = ufs_fragnum (frag1);
-
-	ufs_free_fragments(inode, tmp + frag1, frag2);
-	mark_inode_dirty(inode);
-	frag_to_free = tmp + frag1;
-
-next1:
-	/*
-	 * Free whole blocks
-	 */
-	for (i = block1 ; i < block2; i++) {
-		p = ufs_get_direct_data_ptr(uspi, ufsi, i);
-		tmp = ufs_data_ptr_to_cpu(sb, p);
-		if (!tmp)
-			continue;
-		write_seqlock(&ufsi->meta_lock);
-		ufs_data_ptr_clear(uspi, p);
-		write_sequnlock(&ufsi->meta_lock);
-
-		if (free_count == 0) {
-			frag_to_free = tmp;
-			free_count = uspi->s_fpb;
-		} else if (free_count > 0 && frag_to_free == tmp - free_count)
-			free_count += uspi->s_fpb;
-		else {
-			ufs_free_blocks (inode, frag_to_free, free_count);
-			frag_to_free = tmp;
-			free_count = uspi->s_fpb;
-		}
-		mark_inode_dirty(inode);
-	}
-	
-	if (free_count > 0)
-		ufs_free_blocks (inode, frag_to_free, free_count);
-
-	if (frag3 >= frag4)
-		goto next3;
-
-	/*
-	 * Free last free fragments
-	 */
-	p = ufs_get_direct_data_ptr(uspi, ufsi, ufs_fragstoblks(frag3));
-	tmp = ufs_data_ptr_to_cpu(sb, p);
-	if (!tmp )
-		ufs_panic(sb, "ufs_truncate_direct", "internal error");
-	frag4 = ufs_fragnum (frag4);
-	write_seqlock(&ufsi->meta_lock);
-	ufs_data_ptr_clear(uspi, p);
-	write_sequnlock(&ufsi->meta_lock);
-
-	ufs_free_fragments (inode, tmp, frag4);
-	mark_inode_dirty(inode);
- next3:
-
-	UFSD("EXIT: ino %lu\n", inode->i_ino);
-}
-
-
-static void ufs_trunc_indirect(struct inode *inode, u64 offset, void *p)
-{
-	struct super_block * sb;
-	struct ufs_sb_private_info * uspi;
-	struct ufs_buffer_head * ind_ubh;
-	void *ind;
-	u64 tmp, indirect_block, i, frag_to_free;
-	unsigned free_count;
-
-	UFSD("ENTER: ino %lu, offset %llu, p: %p\n",
-	     inode->i_ino, (unsigned long long)offset, p);
-
-	BUG_ON(!p);
-		
-	sb = inode->i_sb;
-	uspi = UFS_SB(sb)->s_uspi;
-
-	frag_to_free = 0;
-	free_count = 0;
-	
-	tmp = ufs_data_ptr_to_cpu(sb, p);
-	if (!tmp)
-		return;
-	ind_ubh = ubh_bread(sb, tmp, uspi->s_bsize);
-	if (!ind_ubh) {
-		write_seqlock(&UFS_I(inode)->meta_lock);
-		ufs_data_ptr_clear(uspi, p);
-		write_sequnlock(&UFS_I(inode)->meta_lock);
-		return;
-	}
-
-	indirect_block = (DIRECT_BLOCK > offset) ? (DIRECT_BLOCK - offset) : 0;
-	for (i = indirect_block; i < uspi->s_apb; i++) {
-		ind = ubh_get_data_ptr(uspi, ind_ubh, i);
-		tmp = ufs_data_ptr_to_cpu(sb, ind);
-		if (!tmp)
-			continue;
-
-		write_seqlock(&UFS_I(inode)->meta_lock);
-		ufs_data_ptr_clear(uspi, ind);
-		write_sequnlock(&UFS_I(inode)->meta_lock);
-		ubh_mark_buffer_dirty(ind_ubh);
-		if (free_count == 0) {
-			frag_to_free = tmp;
-			free_count = uspi->s_fpb;
-		} else if (free_count > 0 && frag_to_free == tmp - free_count)
-			free_count += uspi->s_fpb;
-		else {
-			ufs_free_blocks (inode, frag_to_free, free_count);
-			frag_to_free = tmp;
-			free_count = uspi->s_fpb;
-		}
-
-		mark_inode_dirty(inode);
-	}
-
-	if (free_count > 0) {
-		ufs_free_blocks (inode, frag_to_free, free_count);
-	}
-	for (i = 0; i < uspi->s_apb; i++)
-		if (!ufs_is_data_ptr_zero(uspi,
-					  ubh_get_data_ptr(uspi, ind_ubh, i)))
-			break;
-	if (i >= uspi->s_apb) {
-		tmp = ufs_data_ptr_to_cpu(sb, p);
-		write_seqlock(&UFS_I(inode)->meta_lock);
-		ufs_data_ptr_clear(uspi, p);
-		write_sequnlock(&UFS_I(inode)->meta_lock);
-
-		ubh_bforget(ind_ubh);
-		ufs_free_blocks (inode, tmp, uspi->s_fpb);
-		mark_inode_dirty(inode);
-		ind_ubh = NULL;
-	}
-	if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh))
-		ubh_sync_block(ind_ubh);
-	ubh_brelse (ind_ubh);
-	
-	UFSD("EXIT: ino %lu\n", inode->i_ino);
-}
-
-static void ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
-{
-	struct super_block * sb;
-	struct ufs_sb_private_info * uspi;
-	struct ufs_buffer_head *dind_bh;
-	u64 i, tmp, dindirect_block;
-	void *dind;
-	
-	UFSD("ENTER: ino %lu\n", inode->i_ino);
-	
-	sb = inode->i_sb;
-	uspi = UFS_SB(sb)->s_uspi;
-
-	dindirect_block = (DIRECT_BLOCK > offset) 
-		? ((DIRECT_BLOCK - offset) >> uspi->s_apbshift) : 0;
-	
-	tmp = ufs_data_ptr_to_cpu(sb, p);
-	if (!tmp)
-		return;
-	dind_bh = ubh_bread(sb, tmp, uspi->s_bsize);
-	if (!dind_bh) {
-		write_seqlock(&UFS_I(inode)->meta_lock);
-		ufs_data_ptr_clear(uspi, p);
-		write_sequnlock(&UFS_I(inode)->meta_lock);
-		return;
-	}
-
-	for (i = dindirect_block ; i < uspi->s_apb ; i++) {
-		dind = ubh_get_data_ptr(uspi, dind_bh, i);
-		tmp = ufs_data_ptr_to_cpu(sb, dind);
-		if (!tmp)
-			continue;
-		ufs_trunc_indirect (inode, offset + (i << uspi->s_apbshift), dind);
-		ubh_mark_buffer_dirty(dind_bh);
-	}
-
-	for (i = 0; i < uspi->s_apb; i++)
-		if (!ufs_is_data_ptr_zero(uspi,
-					  ubh_get_data_ptr(uspi, dind_bh, i)))
-			break;
-	if (i >= uspi->s_apb) {
-		tmp = ufs_data_ptr_to_cpu(sb, p);
-		write_seqlock(&UFS_I(inode)->meta_lock);
-		ufs_data_ptr_clear(uspi, p);
-		write_sequnlock(&UFS_I(inode)->meta_lock);
-
-		ubh_bforget(dind_bh);
-		ufs_free_blocks(inode, tmp, uspi->s_fpb);
-		mark_inode_dirty(inode);
-		dind_bh = NULL;
-	}
-	if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh))
-		ubh_sync_block(dind_bh);
-	ubh_brelse (dind_bh);
-	
-	UFSD("EXIT: ino %lu\n", inode->i_ino);
-}
-
-static void ufs_trunc_tindirect(struct inode *inode)
-{
-	struct super_block *sb = inode->i_sb;
-	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
-	struct ufs_inode_info *ufsi = UFS_I(inode);
-	struct ufs_buffer_head * tind_bh;
-	u64 tindirect_block, tmp, i;
-	void *tind, *p;
-	
-	UFSD("ENTER: ino %lu\n", inode->i_ino);
-
-	tindirect_block = (DIRECT_BLOCK > (UFS_NDADDR + uspi->s_apb + uspi->s_2apb))
-		? ((DIRECT_BLOCK - UFS_NDADDR - uspi->s_apb - uspi->s_2apb) >> uspi->s_2apbshift) : 0;
-
-	p = ufs_get_direct_data_ptr(uspi, ufsi, UFS_TIND_BLOCK);
-	if (!(tmp = ufs_data_ptr_to_cpu(sb, p)))
-		return;
-	tind_bh = ubh_bread (sb, tmp, uspi->s_bsize);
-	if (!tind_bh) {
-		write_seqlock(&ufsi->meta_lock);
-		ufs_data_ptr_clear(uspi, p);
-		write_sequnlock(&ufsi->meta_lock);
-		return;
-	}
-
-	for (i = tindirect_block ; i < uspi->s_apb ; i++) {
-		tind = ubh_get_data_ptr(uspi, tind_bh, i);
-		ufs_trunc_dindirect(inode, UFS_NDADDR + 
-			uspi->s_apb + ((i + 1) << uspi->s_2apbshift), tind);
-		ubh_mark_buffer_dirty(tind_bh);
-	}
-	for (i = 0; i < uspi->s_apb; i++)
-		if (!ufs_is_data_ptr_zero(uspi,
-					  ubh_get_data_ptr(uspi, tind_bh, i)))
-			break;
-	if (i >= uspi->s_apb) {
-		tmp = ufs_data_ptr_to_cpu(sb, p);
-		write_seqlock(&ufsi->meta_lock);
-		ufs_data_ptr_clear(uspi, p);
-		write_sequnlock(&ufsi->meta_lock);
-
-		ubh_bforget(tind_bh);
-		ufs_free_blocks(inode, tmp, uspi->s_fpb);
-		mark_inode_dirty(inode);
-		tind_bh = NULL;
-	}
-	if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh))
-		ubh_sync_block(tind_bh);
-	ubh_brelse (tind_bh);
-	
-	UFSD("EXIT: ino %lu\n", inode->i_ino);
-}
-
-static int ufs_alloc_lastblock(struct inode *inode, loff_t size)
-{
-	int err = 0;
-	struct super_block *sb = inode->i_sb;
-	struct address_space *mapping = inode->i_mapping;
-	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
-	unsigned i, end;
-	sector_t lastfrag;
-	struct page *lastpage;
-	struct buffer_head *bh;
-	u64 phys64;
-
-	lastfrag = (size + uspi->s_fsize - 1) >> uspi->s_fshift;
-
-	if (!lastfrag)
-		goto out;
-
-	lastfrag--;
-
-	lastpage = ufs_get_locked_page(mapping, lastfrag >>
-				       (PAGE_CACHE_SHIFT - inode->i_blkbits));
-       if (IS_ERR(lastpage)) {
-               err = -EIO;
-               goto out;
-       }
-
-       end = lastfrag & ((1 << (PAGE_CACHE_SHIFT - inode->i_blkbits)) - 1);
-       bh = page_buffers(lastpage);
-       for (i = 0; i < end; ++i)
-               bh = bh->b_this_page;
-
-
-       err = ufs_getfrag_block(inode, lastfrag, bh, 1);
-
-       if (unlikely(err))
-	       goto out_unlock;
-
-       if (buffer_new(bh)) {
-	       clear_buffer_new(bh);
-	       unmap_underlying_metadata(bh->b_bdev,
-					 bh->b_blocknr);
-	       /*
-		* we do not zeroize fragment, because of
-		* if it maped to hole, it already contains zeroes
-		*/
-	       set_buffer_uptodate(bh);
-	       mark_buffer_dirty(bh);
-	       set_page_dirty(lastpage);
-       }
-
-       if (lastfrag >= UFS_IND_FRAGMENT) {
-	       end = uspi->s_fpb - ufs_fragnum(lastfrag) - 1;
-	       phys64 = bh->b_blocknr + 1;
-	       for (i = 0; i < end; ++i) {
-		       bh = sb_getblk(sb, i + phys64);
-		       lock_buffer(bh);
-		       memset(bh->b_data, 0, sb->s_blocksize);
-		       set_buffer_uptodate(bh);
-		       mark_buffer_dirty(bh);
-		       unlock_buffer(bh);
-		       sync_dirty_buffer(bh);
-		       brelse(bh);
-	       }
-       }
-out_unlock:
-       ufs_put_locked_page(lastpage);
-out:
-       return err;
-}
-
-static void __ufs_truncate_blocks(struct inode *inode)
-{
-	struct ufs_inode_info *ufsi = UFS_I(inode);
-	struct super_block *sb = inode->i_sb;
-	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
-
-	mutex_lock(&ufsi->truncate_mutex);
-	ufs_trunc_direct(inode);
-	ufs_trunc_indirect(inode, UFS_IND_BLOCK,
-			   ufs_get_direct_data_ptr(uspi, ufsi, UFS_IND_BLOCK));
-	ufs_trunc_dindirect(inode, UFS_IND_BLOCK + uspi->s_apb,
-			    ufs_get_direct_data_ptr(uspi, ufsi, UFS_DIND_BLOCK));
-	ufs_trunc_tindirect(inode);
-	ufsi->i_lastfrag = DIRECT_FRAGMENT;
-	mutex_unlock(&ufsi->truncate_mutex);
-}
-
-int ufs_truncate(struct inode *inode, loff_t size)
-{
-	int err = 0;
-	
-	UFSD("ENTER: ino %lu, i_size: %llu, old_i_size: %llu\n",
-	     inode->i_ino, (unsigned long long)size,
-	     (unsigned long long)i_size_read(inode));
-
-	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
-	      S_ISLNK(inode->i_mode)))
-		return -EINVAL;
-	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
-		return -EPERM;
-
-	err = ufs_alloc_lastblock(inode, size);
-
-	if (err)
-		goto out;
-
-	block_truncate_page(inode->i_mapping, size, ufs_getfrag_block);
-
-	truncate_setsize(inode, size);
-
-	__ufs_truncate_blocks(inode);
-	inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
-	mark_inode_dirty(inode);
-out:
-	UFSD("EXIT: err %d\n", err);
-	return err;
-}
-
-void ufs_truncate_blocks(struct inode *inode)
-{
-	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
-	      S_ISLNK(inode->i_mode)))
-		return;
-	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
-		return;
-	__ufs_truncate_blocks(inode);
-}
-
-int ufs_setattr(struct dentry *dentry, struct iattr *attr)
-{
-	struct inode *inode = d_inode(dentry);
-	unsigned int ia_valid = attr->ia_valid;
-	int error;
-
-	error = inode_change_ok(inode, attr);
-	if (error)
-		return error;
-
-	if (ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) {
-		error = ufs_truncate(inode, attr->ia_size);
-		if (error)
-			return error;
-	}
-
-	setattr_copy(inode, attr);
-	mark_inode_dirty(inode);
-	return 0;
-}
-
-const struct inode_operations ufs_file_inode_operations = {
-	.setattr = ufs_setattr,
-};
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index 478f35b493a6..7da4aca868c0 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -122,7 +122,7 @@ extern struct inode *ufs_iget(struct super_block *, unsigned long);
 extern int ufs_write_inode (struct inode *, struct writeback_control *);
 extern int ufs_sync_inode (struct inode *);
 extern void ufs_evict_inode (struct inode *);
-extern int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create);
+extern int ufs_setattr(struct dentry *dentry, struct iattr *attr);
 
 /* namei.c */
 extern const struct file_operations ufs_dir_operations;
@@ -140,10 +140,6 @@ void ufs_mark_sb_dirty(struct super_block *sb);
 extern const struct inode_operations ufs_fast_symlink_inode_operations;
 extern const struct inode_operations ufs_symlink_inode_operations;
 
-/* truncate.c */
-extern void ufs_truncate_blocks(struct inode *);
-extern int ufs_setattr(struct dentry *dentry, struct iattr *attr);
-
 static inline struct ufs_sb_info *UFS_SB(struct super_block *sb)
 {
 	return sb->s_fs_info;
-- 
cgit v1.2.3


From 4e3911f3d704d681477cdb4e1a2bfd52d5e42d23 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 4 Jun 2015 14:13:14 -0400
Subject: ufs: the offsets ufs_block_to_path() puts into array are not sector_t

type makes no sense - those are indices in block number arrays, not
block numbers.  And no, UFS is not likely to grow indirect blocks with
4Gpointers in them...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index ec758edbda47..43672183fee3 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -41,7 +41,7 @@
 #include "swab.h"
 #include "util.h"
 
-static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t offsets[4])
+static int ufs_block_to_path(struct inode *inode, sector_t i_block, unsigned offsets[4])
 {
 	struct ufs_sb_private_info *uspi = UFS_SB(inode->i_sb)->s_uspi;
 	int ptrs = uspi->s_apb;
@@ -153,7 +153,7 @@ again:
 	while (--depth) {
 		__fs32 *ptr;
 		struct buffer_head *bh;
-		sector_t n = *p++;
+		unsigned n = *p++;
 
 		bh = sb_bread(sb, uspi->s_sbbase +
 				  fs32_to_cpu(sb, q->key32) + (n>>shift));
@@ -177,7 +177,7 @@ ufs2:
 	while (--depth) {
 		__fs64 *ptr;
 		struct buffer_head *bh;
-		sector_t n = *p++;
+		unsigned n = *p++;
 
 		bh = sb_bread(sb, uspi->s_sbbase +
 				  fs64_to_cpu(sb, q->key64) + (n>>shift));
-- 
cgit v1.2.3


From 31cd043e1a09c579c4cd38ea432200fbeae6af1f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 17 Jun 2015 01:10:03 -0400
Subject: ufs: beginning of __ufs_truncate_block() massage

Use ufs_block_to_path() to find the cutoff path in the block pointers' tree.
For now just use the information about the depth (to bypass the fully
preserved subtrees); subsequent commits will use the information about actual
path.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 43672183fee3..afb0f32b921c 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -1336,14 +1336,22 @@ static void __ufs_truncate_blocks(struct inode *inode)
 	struct ufs_inode_info *ufsi = UFS_I(inode);
 	struct super_block *sb = inode->i_sb;
 	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
+	unsigned offsets[4];
+	int depth = ufs_block_to_path(inode, DIRECT_BLOCK, offsets);
 
 	mutex_lock(&ufsi->truncate_mutex);
-	ufs_trunc_direct(inode);
-	ufs_trunc_indirect(inode, UFS_IND_BLOCK,
+	switch (depth) {
+	case 1:
+		ufs_trunc_direct(inode);
+	case 2:
+		ufs_trunc_indirect(inode, UFS_IND_BLOCK,
 			   ufs_get_direct_data_ptr(uspi, ufsi, UFS_IND_BLOCK));
-	ufs_trunc_dindirect(inode, UFS_IND_BLOCK + uspi->s_apb,
+	case 3:
+		ufs_trunc_dindirect(inode, UFS_IND_BLOCK + uspi->s_apb,
 			    ufs_get_direct_data_ptr(uspi, ufsi, UFS_DIND_BLOCK));
-	ufs_trunc_tindirect(inode);
+	case 4:
+		ufs_trunc_tindirect(inode);
+	}
 	ufsi->i_lastfrag = DIRECT_FRAGMENT;
 	mutex_unlock(&ufsi->truncate_mutex);
 }
-- 
cgit v1.2.3


From 18ca51d8211065f10672374336cd08d495968c73 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 18 Jun 2015 13:45:07 -0400
Subject: ufs_trunc_tindirect(): pass the number of blocks to keep

IOW, the distance of cutoff from the begining of the branch
(in blocks).

That (and the fact that block just prior to cutoff is guaranteed to
be present) allows to tell whether to free triple indirect block
just by looking at the offset.

While we are at it, using u64 for index in the block is wrong -
those should be unsigned int.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c | 28 +++++++++++-----------------
 1 file changed, 11 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index afb0f32b921c..5b3f1c44d4b0 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -1208,19 +1208,17 @@ static void ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
 	UFSD("EXIT: ino %lu\n", inode->i_ino);
 }
 
-static void ufs_trunc_tindirect(struct inode *inode)
+static void ufs_trunc_tindirect(struct inode *inode, u64 offset)
 {
 	struct super_block *sb = inode->i_sb;
 	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
 	struct ufs_inode_info *ufsi = UFS_I(inode);
 	struct ufs_buffer_head * tind_bh;
-	u64 tindirect_block, tmp, i;
+	u64 tmp;
 	void *tind, *p;
-
-	UFSD("ENTER: ino %lu\n", inode->i_ino);
-
-	tindirect_block = (DIRECT_BLOCK > (UFS_NDADDR + uspi->s_apb + uspi->s_2apb))
-		? ((DIRECT_BLOCK - UFS_NDADDR - uspi->s_apb - uspi->s_2apb) >> uspi->s_2apbshift) : 0;
+	bool free_it = !offset;
+	unsigned tindirect_block = offset >> uspi->s_2apbshift;
+	unsigned i;
 
 	p = ufs_get_direct_data_ptr(uspi, ufsi, UFS_TIND_BLOCK);
 	if (!(tmp = ufs_data_ptr_to_cpu(sb, p)))
@@ -1239,11 +1237,7 @@ static void ufs_trunc_tindirect(struct inode *inode)
 			uspi->s_apb + ((i + 1) << uspi->s_2apbshift), tind);
 		ubh_mark_buffer_dirty(tind_bh);
 	}
-	for (i = 0; i < uspi->s_apb; i++)
-		if (!ufs_is_data_ptr_zero(uspi,
-					  ubh_get_data_ptr(uspi, tind_bh, i)))
-			break;
-	if (i >= uspi->s_apb) {
+	if (free_it) {
 		tmp = ufs_data_ptr_to_cpu(sb, p);
 		write_seqlock(&ufsi->meta_lock);
 		ufs_data_ptr_clear(uspi, p);
@@ -1252,13 +1246,11 @@ static void ufs_trunc_tindirect(struct inode *inode)
 		ubh_bforget(tind_bh);
 		ufs_free_blocks(inode, tmp, uspi->s_fpb);
 		mark_inode_dirty(inode);
-		tind_bh = NULL;
+		return;
 	}
-	if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh))
+	if (IS_SYNC(inode) && ubh_buffer_dirty(tind_bh))
 		ubh_sync_block(tind_bh);
 	ubh_brelse (tind_bh);
-
-	UFSD("EXIT: ino %lu\n", inode->i_ino);
 }
 
 static int ufs_alloc_lastblock(struct inode *inode, loff_t size)
@@ -1349,8 +1341,10 @@ static void __ufs_truncate_blocks(struct inode *inode)
 	case 3:
 		ufs_trunc_dindirect(inode, UFS_IND_BLOCK + uspi->s_apb,
 			    ufs_get_direct_data_ptr(uspi, ufsi, UFS_DIND_BLOCK));
+		ufs_trunc_tindirect(inode, 0);
+		break;
 	case 4:
-		ufs_trunc_tindirect(inode);
+		ufs_trunc_tindirect(inode, DIRECT_BLOCK - UFS_NDADDR - uspi->s_apb - uspi->s_2apb);
 	}
 	ufsi->i_lastfrag = DIRECT_FRAGMENT;
 	mutex_unlock(&ufsi->truncate_mutex);
-- 
cgit v1.2.3


From 6ac36b8777d934e3cd7eb0f023a5043d5c03b00c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 17 Jun 2015 01:54:58 -0400
Subject: ufs_trunc_indirect(): pass the index of the first pointer to free

... instead of file offset.  Same cleanups as in the tindirect
conversion in previous commit.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c | 56 +++++++++++++++++++++++---------------------------------
 1 file changed, 23 insertions(+), 33 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 5b3f1c44d4b0..9c4471a82d2f 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -1069,25 +1069,16 @@ next1:
 }
 
 
-static void ufs_trunc_indirect(struct inode *inode, u64 offset, void *p)
+static void ufs_trunc_indirect(struct inode *inode, unsigned from, void *p)
 {
-	struct super_block * sb;
-	struct ufs_sb_private_info * uspi;
+	struct super_block *sb = inode->i_sb;
+	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
 	struct ufs_buffer_head * ind_ubh;
 	void *ind;
-	u64 tmp, indirect_block, i, frag_to_free;
-	unsigned free_count;
-
-	UFSD("ENTER: ino %lu, offset %llu, p: %p\n",
-	     inode->i_ino, (unsigned long long)offset, p);
-
-	BUG_ON(!p);
-
-	sb = inode->i_sb;
-	uspi = UFS_SB(sb)->s_uspi;
-
-	frag_to_free = 0;
-	free_count = 0;
+	u64 tmp, frag_to_free = 0;
+	unsigned free_count = 0;
+	bool to_free = !from;
+	unsigned i;
 
 	tmp = ufs_data_ptr_to_cpu(sb, p);
 	if (!tmp)
@@ -1100,8 +1091,7 @@ static void ufs_trunc_indirect(struct inode *inode, u64 offset, void *p)
 		return;
 	}
 
-	indirect_block = (DIRECT_BLOCK > offset) ? (DIRECT_BLOCK - offset) : 0;
-	for (i = indirect_block; i < uspi->s_apb; i++) {
+	for (i = from; i < uspi->s_apb; i++) {
 		ind = ubh_get_data_ptr(uspi, ind_ubh, i);
 		tmp = ufs_data_ptr_to_cpu(sb, ind);
 		if (!tmp)
@@ -1128,11 +1118,7 @@ static void ufs_trunc_indirect(struct inode *inode, u64 offset, void *p)
 	if (free_count > 0) {
 		ufs_free_blocks (inode, frag_to_free, free_count);
 	}
-	for (i = 0; i < uspi->s_apb; i++)
-		if (!ufs_is_data_ptr_zero(uspi,
-					  ubh_get_data_ptr(uspi, ind_ubh, i)))
-			break;
-	if (i >= uspi->s_apb) {
+	if (to_free) {
 		tmp = ufs_data_ptr_to_cpu(sb, p);
 		write_seqlock(&UFS_I(inode)->meta_lock);
 		ufs_data_ptr_clear(uspi, p);
@@ -1141,13 +1127,11 @@ static void ufs_trunc_indirect(struct inode *inode, u64 offset, void *p)
 		ubh_bforget(ind_ubh);
 		ufs_free_blocks (inode, tmp, uspi->s_fpb);
 		mark_inode_dirty(inode);
-		ind_ubh = NULL;
+		return;
 	}
-	if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh))
+	if (IS_SYNC(inode) && ubh_buffer_dirty(ind_ubh))
 		ubh_sync_block(ind_ubh);
 	ubh_brelse (ind_ubh);
-
-	UFSD("EXIT: ino %lu\n", inode->i_ino);
 }
 
 static void ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
@@ -1157,14 +1141,20 @@ static void ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
 	struct ufs_buffer_head *dind_bh;
 	u64 i, tmp, dindirect_block;
 	void *dind;
+	unsigned from;
 
 	UFSD("ENTER: ino %lu\n", inode->i_ino);
 
 	sb = inode->i_sb;
 	uspi = UFS_SB(sb)->s_uspi;
 
-	dindirect_block = (DIRECT_BLOCK > offset)
-		? ((DIRECT_BLOCK - offset) >> uspi->s_apbshift) : 0;
+	if (DIRECT_BLOCK <= offset) {
+		dindirect_block = 0;
+		from = 0;
+	} else {
+		dindirect_block = (DIRECT_BLOCK - offset) >> uspi->s_apbshift;
+		from = (DIRECT_BLOCK - offset) & uspi->s_apbmask;
+	}
 
 	tmp = ufs_data_ptr_to_cpu(sb, p);
 	if (!tmp)
@@ -1177,12 +1167,12 @@ static void ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
 		return;
 	}
 
-	for (i = dindirect_block ; i < uspi->s_apb ; i++) {
+	for (i = dindirect_block ; i < uspi->s_apb ; i++, from = 0) {
 		dind = ubh_get_data_ptr(uspi, dind_bh, i);
 		tmp = ufs_data_ptr_to_cpu(sb, dind);
 		if (!tmp)
 			continue;
-		ufs_trunc_indirect (inode, offset + (i << uspi->s_apbshift), dind);
+		ufs_trunc_indirect(inode, from, dind);
 		ubh_mark_buffer_dirty(dind_bh);
 	}
 
@@ -1328,7 +1318,7 @@ static void __ufs_truncate_blocks(struct inode *inode)
 	struct ufs_inode_info *ufsi = UFS_I(inode);
 	struct super_block *sb = inode->i_sb;
 	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
-	unsigned offsets[4];
+	unsigned offsets[4] = {0,};
 	int depth = ufs_block_to_path(inode, DIRECT_BLOCK, offsets);
 
 	mutex_lock(&ufsi->truncate_mutex);
@@ -1336,7 +1326,7 @@ static void __ufs_truncate_blocks(struct inode *inode)
 	case 1:
 		ufs_trunc_direct(inode);
 	case 2:
-		ufs_trunc_indirect(inode, UFS_IND_BLOCK,
+		ufs_trunc_indirect(inode, offsets[1],
 			   ufs_get_direct_data_ptr(uspi, ufsi, UFS_IND_BLOCK));
 	case 3:
 		ufs_trunc_dindirect(inode, UFS_IND_BLOCK + uspi->s_apb,
-- 
cgit v1.2.3


From 7bad5939fcd04bb83122bdb90981ec5ae2f90e0d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 18 Jun 2015 14:21:09 -0400
Subject: ufs_trunc_dindirect(): pass the number of blocks to keep

same as the previous two.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c | 57 ++++++++++++++++++++++++++-------------------------------
 1 file changed, 26 insertions(+), 31 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 9c4471a82d2f..b4d6398a2d54 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -1136,25 +1136,15 @@ static void ufs_trunc_indirect(struct inode *inode, unsigned from, void *p)
 
 static void ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
 {
-	struct super_block * sb;
-	struct ufs_sb_private_info * uspi;
+	struct super_block *sb = inode->i_sb;
+	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
 	struct ufs_buffer_head *dind_bh;
-	u64 i, tmp, dindirect_block;
+	u64 tmp;
 	void *dind;
-	unsigned from;
-
-	UFSD("ENTER: ino %lu\n", inode->i_ino);
-
-	sb = inode->i_sb;
-	uspi = UFS_SB(sb)->s_uspi;
-
-	if (DIRECT_BLOCK <= offset) {
-		dindirect_block = 0;
-		from = 0;
-	} else {
-		dindirect_block = (DIRECT_BLOCK - offset) >> uspi->s_apbshift;
-		from = (DIRECT_BLOCK - offset) & uspi->s_apbmask;
-	}
+	bool free_it = !offset;
+	unsigned dindirect_block = offset >> uspi->s_apbshift;
+	unsigned from = offset & uspi->s_apbmask;
+	unsigned i;
 
 	tmp = ufs_data_ptr_to_cpu(sb, p);
 	if (!tmp)
@@ -1176,11 +1166,7 @@ static void ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
 		ubh_mark_buffer_dirty(dind_bh);
 	}
 
-	for (i = 0; i < uspi->s_apb; i++)
-		if (!ufs_is_data_ptr_zero(uspi,
-					  ubh_get_data_ptr(uspi, dind_bh, i)))
-			break;
-	if (i >= uspi->s_apb) {
+	if (free_it) {
 		tmp = ufs_data_ptr_to_cpu(sb, p);
 		write_seqlock(&UFS_I(inode)->meta_lock);
 		ufs_data_ptr_clear(uspi, p);
@@ -1189,13 +1175,11 @@ static void ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
 		ubh_bforget(dind_bh);
 		ufs_free_blocks(inode, tmp, uspi->s_fpb);
 		mark_inode_dirty(inode);
-		dind_bh = NULL;
+		return;
 	}
-	if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh))
+	if (IS_SYNC(inode) && ubh_buffer_dirty(dind_bh))
 		ubh_sync_block(dind_bh);
 	ubh_brelse (dind_bh);
-
-	UFSD("EXIT: ino %lu\n", inode->i_ino);
 }
 
 static void ufs_trunc_tindirect(struct inode *inode, u64 offset)
@@ -1210,6 +1194,8 @@ static void ufs_trunc_tindirect(struct inode *inode, u64 offset)
 	unsigned tindirect_block = offset >> uspi->s_2apbshift;
 	unsigned i;
 
+	offset -= tindirect_block << uspi->s_2apbshift;
+
 	p = ufs_get_direct_data_ptr(uspi, ufsi, UFS_TIND_BLOCK);
 	if (!(tmp = ufs_data_ptr_to_cpu(sb, p)))
 		return;
@@ -1221,10 +1207,9 @@ static void ufs_trunc_tindirect(struct inode *inode, u64 offset)
 		return;
 	}
 
-	for (i = tindirect_block ; i < uspi->s_apb ; i++) {
+	for (i = tindirect_block ; i < uspi->s_apb ; i++, offset = 0) {
 		tind = ubh_get_data_ptr(uspi, tind_bh, i);
-		ufs_trunc_dindirect(inode, UFS_NDADDR +
-			uspi->s_apb + ((i + 1) << uspi->s_2apbshift), tind);
+		ufs_trunc_dindirect(inode, offset, tind);
 		ubh_mark_buffer_dirty(tind_bh);
 	}
 	if (free_it) {
@@ -1318,18 +1303,28 @@ static void __ufs_truncate_blocks(struct inode *inode)
 	struct ufs_inode_info *ufsi = UFS_I(inode);
 	struct super_block *sb = inode->i_sb;
 	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
-	unsigned offsets[4] = {0,};
+	unsigned offsets[4];
 	int depth = ufs_block_to_path(inode, DIRECT_BLOCK, offsets);
 
 	mutex_lock(&ufsi->truncate_mutex);
 	switch (depth) {
 	case 1:
 		ufs_trunc_direct(inode);
+		ufs_trunc_indirect(inode, 0,
+			   ufs_get_direct_data_ptr(uspi, ufsi, UFS_IND_BLOCK));
+		ufs_trunc_dindirect(inode, 0,
+			    ufs_get_direct_data_ptr(uspi, ufsi, UFS_DIND_BLOCK));
+		ufs_trunc_tindirect(inode, 0);
+		break;
 	case 2:
 		ufs_trunc_indirect(inode, offsets[1],
 			   ufs_get_direct_data_ptr(uspi, ufsi, UFS_IND_BLOCK));
+		ufs_trunc_dindirect(inode, 0,
+			    ufs_get_direct_data_ptr(uspi, ufsi, UFS_DIND_BLOCK));
+		ufs_trunc_tindirect(inode, 0);
+		break;
 	case 3:
-		ufs_trunc_dindirect(inode, UFS_IND_BLOCK + uspi->s_apb,
+		ufs_trunc_dindirect(inode, DIRECT_BLOCK - UFS_IND_BLOCK - uspi->s_apb,
 			    ufs_get_direct_data_ptr(uspi, ufsi, UFS_DIND_BLOCK));
 		ufs_trunc_tindirect(inode, 0);
 		break;
-- 
cgit v1.2.3


From 7a4fdda72451f094374324a552be9fc7de8f3e8d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 18 Jun 2015 14:28:54 -0400
Subject: __ufs_truncate(); find cutoff distances into branches by offsets[]
 array

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index b4d6398a2d54..c2544d62adf2 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -1324,12 +1324,16 @@ static void __ufs_truncate_blocks(struct inode *inode)
 		ufs_trunc_tindirect(inode, 0);
 		break;
 	case 3:
-		ufs_trunc_dindirect(inode, DIRECT_BLOCK - UFS_IND_BLOCK - uspi->s_apb,
+		ufs_trunc_dindirect(inode,
+			    (offsets[1] << uspi->s_apbshift) + offsets[2],
 			    ufs_get_direct_data_ptr(uspi, ufsi, UFS_DIND_BLOCK));
 		ufs_trunc_tindirect(inode, 0);
 		break;
 	case 4:
-		ufs_trunc_tindirect(inode, DIRECT_BLOCK - UFS_NDADDR - uspi->s_apb - uspi->s_2apb);
+		ufs_trunc_tindirect(inode,
+			   (offsets[1] << uspi->s_2apbshift) +
+			   (offsets[2] << uspi->s_apbshift) +
+			   offsets[3]);
 	}
 	ufsi->i_lastfrag = DIRECT_FRAGMENT;
 	mutex_unlock(&ufsi->truncate_mutex);
-- 
cgit v1.2.3


From 85416288bf730cffb61ab6ce8a7b97b17c73458f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 18 Jun 2015 14:36:31 -0400
Subject: ufs_trunc_...indirect(): pass the array of indices instead of offsets

rather than bitslicing the offset just formed as sum of shifted indices,
pass the array of those indices itself.  NULL is used as equivalent
of "all zeroes" (== free the entire branch).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c | 50 ++++++++++++++++++++++----------------------------
 1 file changed, 22 insertions(+), 28 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index c2544d62adf2..34d8dac4fe8b 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -1069,7 +1069,7 @@ next1:
 }
 
 
-static void ufs_trunc_indirect(struct inode *inode, unsigned from, void *p)
+static void ufs_trunc_indirect(struct inode *inode, unsigned *offsets, void *p)
 {
 	struct super_block *sb = inode->i_sb;
 	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
@@ -1077,7 +1077,8 @@ static void ufs_trunc_indirect(struct inode *inode, unsigned from, void *p)
 	void *ind;
 	u64 tmp, frag_to_free = 0;
 	unsigned free_count = 0;
-	bool to_free = !from;
+	unsigned from = offsets ? *offsets : 0;
+	bool to_free = !offsets || !from;
 	unsigned i;
 
 	tmp = ufs_data_ptr_to_cpu(sb, p);
@@ -1134,16 +1135,15 @@ static void ufs_trunc_indirect(struct inode *inode, unsigned from, void *p)
 	ubh_brelse (ind_ubh);
 }
 
-static void ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
+static void ufs_trunc_dindirect(struct inode *inode, unsigned *offsets, void *p)
 {
 	struct super_block *sb = inode->i_sb;
 	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
 	struct ufs_buffer_head *dind_bh;
 	u64 tmp;
 	void *dind;
-	bool free_it = !offset;
-	unsigned dindirect_block = offset >> uspi->s_apbshift;
-	unsigned from = offset & uspi->s_apbmask;
+	bool free_it = !offsets || !(offsets[0] || offsets[1]);
+	unsigned dindirect_block = offsets ? *offsets++ : 0;
 	unsigned i;
 
 	tmp = ufs_data_ptr_to_cpu(sb, p);
@@ -1157,12 +1157,12 @@ static void ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
 		return;
 	}
 
-	for (i = dindirect_block ; i < uspi->s_apb ; i++, from = 0) {
+	for (i = dindirect_block ; i < uspi->s_apb ; i++, offsets = NULL) {
 		dind = ubh_get_data_ptr(uspi, dind_bh, i);
 		tmp = ufs_data_ptr_to_cpu(sb, dind);
 		if (!tmp)
 			continue;
-		ufs_trunc_indirect(inode, from, dind);
+		ufs_trunc_indirect(inode, offsets, dind);
 		ubh_mark_buffer_dirty(dind_bh);
 	}
 
@@ -1182,7 +1182,7 @@ static void ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
 	ubh_brelse (dind_bh);
 }
 
-static void ufs_trunc_tindirect(struct inode *inode, u64 offset)
+static void ufs_trunc_tindirect(struct inode *inode, unsigned *offsets)
 {
 	struct super_block *sb = inode->i_sb;
 	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
@@ -1190,12 +1190,10 @@ static void ufs_trunc_tindirect(struct inode *inode, u64 offset)
 	struct ufs_buffer_head * tind_bh;
 	u64 tmp;
 	void *tind, *p;
-	bool free_it = !offset;
-	unsigned tindirect_block = offset >> uspi->s_2apbshift;
+	bool free_it = !offsets || !(offsets[0] || offsets[1] || offsets[2]);
+	unsigned tindirect_block = offsets ? *offsets++ : 0;
 	unsigned i;
 
-	offset -= tindirect_block << uspi->s_2apbshift;
-
 	p = ufs_get_direct_data_ptr(uspi, ufsi, UFS_TIND_BLOCK);
 	if (!(tmp = ufs_data_ptr_to_cpu(sb, p)))
 		return;
@@ -1207,9 +1205,9 @@ static void ufs_trunc_tindirect(struct inode *inode, u64 offset)
 		return;
 	}
 
-	for (i = tindirect_block ; i < uspi->s_apb ; i++, offset = 0) {
+	for (i = tindirect_block ; i < uspi->s_apb ; i++, offsets = NULL) {
 		tind = ubh_get_data_ptr(uspi, tind_bh, i);
-		ufs_trunc_dindirect(inode, offset, tind);
+		ufs_trunc_dindirect(inode, offsets, tind);
 		ubh_mark_buffer_dirty(tind_bh);
 	}
 	if (free_it) {
@@ -1310,30 +1308,26 @@ static void __ufs_truncate_blocks(struct inode *inode)
 	switch (depth) {
 	case 1:
 		ufs_trunc_direct(inode);
-		ufs_trunc_indirect(inode, 0,
+		ufs_trunc_indirect(inode, NULL,
 			   ufs_get_direct_data_ptr(uspi, ufsi, UFS_IND_BLOCK));
-		ufs_trunc_dindirect(inode, 0,
+		ufs_trunc_dindirect(inode, NULL,
 			    ufs_get_direct_data_ptr(uspi, ufsi, UFS_DIND_BLOCK));
-		ufs_trunc_tindirect(inode, 0);
+		ufs_trunc_tindirect(inode, NULL);
 		break;
 	case 2:
-		ufs_trunc_indirect(inode, offsets[1],
+		ufs_trunc_indirect(inode, offsets + 1,
 			   ufs_get_direct_data_ptr(uspi, ufsi, UFS_IND_BLOCK));
-		ufs_trunc_dindirect(inode, 0,
+		ufs_trunc_dindirect(inode, NULL,
 			    ufs_get_direct_data_ptr(uspi, ufsi, UFS_DIND_BLOCK));
-		ufs_trunc_tindirect(inode, 0);
+		ufs_trunc_tindirect(inode, NULL);
 		break;
 	case 3:
-		ufs_trunc_dindirect(inode,
-			    (offsets[1] << uspi->s_apbshift) + offsets[2],
+		ufs_trunc_dindirect(inode, offsets + 1,
 			    ufs_get_direct_data_ptr(uspi, ufsi, UFS_DIND_BLOCK));
-		ufs_trunc_tindirect(inode, 0);
+		ufs_trunc_tindirect(inode, NULL);
 		break;
 	case 4:
-		ufs_trunc_tindirect(inode,
-			   (offsets[1] << uspi->s_2apbshift) +
-			   (offsets[2] << uspi->s_apbshift) +
-			   offsets[3]);
+		ufs_trunc_tindirect(inode, offsets + 1);
 	}
 	ufsi->i_lastfrag = DIRECT_FRAGMENT;
 	mutex_unlock(&ufsi->truncate_mutex);
-- 
cgit v1.2.3


From 6775e24d9ccf6a48ebd1d31ca77db5ebfe00ce43 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 18 Jun 2015 14:55:50 -0400
Subject: ufs_trunc_..indirect(): more massage towards unifying

Instead of manually checking that the array contains only zeroes,
find the position of the last non-zero (in __ufs_truncate(), where
we can conveniently do that) and use that to tell if there's
any non-zero in the array tail passed to ufs_trunc_...indirect().

The goal of all that clumsiness is to get fold these functions
together.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c | 43 ++++++++++++++++++++++++++-----------------
 1 file changed, 26 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 34d8dac4fe8b..e90266a221b8 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -1069,7 +1069,7 @@ next1:
 }
 
 
-static void ufs_trunc_indirect(struct inode *inode, unsigned *offsets, void *p)
+static void ufs_trunc_indirect(struct inode *inode, unsigned *offsets, int depth2, void *p)
 {
 	struct super_block *sb = inode->i_sb;
 	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
@@ -1078,7 +1078,7 @@ static void ufs_trunc_indirect(struct inode *inode, unsigned *offsets, void *p)
 	u64 tmp, frag_to_free = 0;
 	unsigned free_count = 0;
 	unsigned from = offsets ? *offsets : 0;
-	bool to_free = !offsets || !from;
+	bool to_free = !offsets || !depth2;
 	unsigned i;
 
 	tmp = ufs_data_ptr_to_cpu(sb, p);
@@ -1135,14 +1135,14 @@ static void ufs_trunc_indirect(struct inode *inode, unsigned *offsets, void *p)
 	ubh_brelse (ind_ubh);
 }
 
-static void ufs_trunc_dindirect(struct inode *inode, unsigned *offsets, void *p)
+static void ufs_trunc_dindirect(struct inode *inode, unsigned *offsets, int depth2, void *p)
 {
 	struct super_block *sb = inode->i_sb;
 	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
 	struct ufs_buffer_head *dind_bh;
 	u64 tmp;
 	void *dind;
-	bool free_it = !offsets || !(offsets[0] || offsets[1]);
+	bool free_it = !offsets || !depth2;
 	unsigned dindirect_block = offsets ? *offsets++ : 0;
 	unsigned i;
 
@@ -1162,7 +1162,7 @@ static void ufs_trunc_dindirect(struct inode *inode, unsigned *offsets, void *p)
 		tmp = ufs_data_ptr_to_cpu(sb, dind);
 		if (!tmp)
 			continue;
-		ufs_trunc_indirect(inode, offsets, dind);
+		ufs_trunc_indirect(inode, offsets, depth2 - 1, dind);
 		ubh_mark_buffer_dirty(dind_bh);
 	}
 
@@ -1182,7 +1182,7 @@ static void ufs_trunc_dindirect(struct inode *inode, unsigned *offsets, void *p)
 	ubh_brelse (dind_bh);
 }
 
-static void ufs_trunc_tindirect(struct inode *inode, unsigned *offsets)
+static void ufs_trunc_tindirect(struct inode *inode, unsigned *offsets, int depth2)
 {
 	struct super_block *sb = inode->i_sb;
 	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
@@ -1190,7 +1190,7 @@ static void ufs_trunc_tindirect(struct inode *inode, unsigned *offsets)
 	struct ufs_buffer_head * tind_bh;
 	u64 tmp;
 	void *tind, *p;
-	bool free_it = !offsets || !(offsets[0] || offsets[1] || offsets[2]);
+	bool free_it = !offsets || !depth2;
 	unsigned tindirect_block = offsets ? *offsets++ : 0;
 	unsigned i;
 
@@ -1207,7 +1207,7 @@ static void ufs_trunc_tindirect(struct inode *inode, unsigned *offsets)
 
 	for (i = tindirect_block ; i < uspi->s_apb ; i++, offsets = NULL) {
 		tind = ubh_get_data_ptr(uspi, tind_bh, i);
-		ufs_trunc_dindirect(inode, offsets, tind);
+		ufs_trunc_dindirect(inode, offsets, depth2 - 1, tind);
 		ubh_mark_buffer_dirty(tind_bh);
 	}
 	if (free_it) {
@@ -1303,31 +1303,40 @@ static void __ufs_truncate_blocks(struct inode *inode)
 	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
 	unsigned offsets[4];
 	int depth = ufs_block_to_path(inode, DIRECT_BLOCK, offsets);
+	int depth2;
+
+	if (!depth)
+		return;
+
+	/* find the last non-zero in offsets[] */
+	for (depth2 = depth - 1; depth2; depth2--)
+		if (offsets[depth2])
+			break;
 
 	mutex_lock(&ufsi->truncate_mutex);
 	switch (depth) {
 	case 1:
 		ufs_trunc_direct(inode);
-		ufs_trunc_indirect(inode, NULL,
+		ufs_trunc_indirect(inode, NULL, 0,
 			   ufs_get_direct_data_ptr(uspi, ufsi, UFS_IND_BLOCK));
-		ufs_trunc_dindirect(inode, NULL,
+		ufs_trunc_dindirect(inode, NULL, 0,
 			    ufs_get_direct_data_ptr(uspi, ufsi, UFS_DIND_BLOCK));
-		ufs_trunc_tindirect(inode, NULL);
+		ufs_trunc_tindirect(inode, NULL, 0);
 		break;
 	case 2:
-		ufs_trunc_indirect(inode, offsets + 1,
+		ufs_trunc_indirect(inode, offsets + 1, depth2,
 			   ufs_get_direct_data_ptr(uspi, ufsi, UFS_IND_BLOCK));
-		ufs_trunc_dindirect(inode, NULL,
+		ufs_trunc_dindirect(inode, NULL, 0,
 			    ufs_get_direct_data_ptr(uspi, ufsi, UFS_DIND_BLOCK));
-		ufs_trunc_tindirect(inode, NULL);
+		ufs_trunc_tindirect(inode, NULL, 0);
 		break;
 	case 3:
-		ufs_trunc_dindirect(inode, offsets + 1,
+		ufs_trunc_dindirect(inode, offsets + 1, depth2,
 			    ufs_get_direct_data_ptr(uspi, ufsi, UFS_DIND_BLOCK));
-		ufs_trunc_tindirect(inode, NULL);
+		ufs_trunc_tindirect(inode, NULL, 0);
 		break;
 	case 4:
-		ufs_trunc_tindirect(inode, offsets + 1);
+		ufs_trunc_tindirect(inode, offsets + 1, depth2);
 	}
 	ufsi->i_lastfrag = DIRECT_FRAGMENT;
 	mutex_unlock(&ufsi->truncate_mutex);
-- 
cgit v1.2.3


From 9e0fbbde2724d5d3bb9edca6b77e26eb28341154 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 18 Jun 2015 15:33:47 -0400
Subject: unify ufs_trunc_..indirect()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c | 198 +++++++++++++++++----------------------------------------
 1 file changed, 60 insertions(+), 138 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index e90266a221b8..0d57c41b7705 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -1068,147 +1068,65 @@ next1:
 	UFSD("EXIT: ino %lu\n", inode->i_ino);
 }
 
-
-static void ufs_trunc_indirect(struct inode *inode, unsigned *offsets, int depth2, void *p)
-{
-	struct super_block *sb = inode->i_sb;
-	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
-	struct ufs_buffer_head * ind_ubh;
-	void *ind;
-	u64 tmp, frag_to_free = 0;
-	unsigned free_count = 0;
-	unsigned from = offsets ? *offsets : 0;
-	bool to_free = !offsets || !depth2;
-	unsigned i;
-
-	tmp = ufs_data_ptr_to_cpu(sb, p);
-	if (!tmp)
-		return;
-	ind_ubh = ubh_bread(sb, tmp, uspi->s_bsize);
-	if (!ind_ubh) {
-		write_seqlock(&UFS_I(inode)->meta_lock);
-		ufs_data_ptr_clear(uspi, p);
-		write_sequnlock(&UFS_I(inode)->meta_lock);
-		return;
-	}
-
-	for (i = from; i < uspi->s_apb; i++) {
-		ind = ubh_get_data_ptr(uspi, ind_ubh, i);
-		tmp = ufs_data_ptr_to_cpu(sb, ind);
-		if (!tmp)
-			continue;
-
-		write_seqlock(&UFS_I(inode)->meta_lock);
-		ufs_data_ptr_clear(uspi, ind);
-		write_sequnlock(&UFS_I(inode)->meta_lock);
-		ubh_mark_buffer_dirty(ind_ubh);
-		if (free_count == 0) {
-			frag_to_free = tmp;
-			free_count = uspi->s_fpb;
-		} else if (free_count > 0 && frag_to_free == tmp - free_count)
-			free_count += uspi->s_fpb;
-		else {
-			ufs_free_blocks (inode, frag_to_free, free_count);
-			frag_to_free = tmp;
-			free_count = uspi->s_fpb;
-		}
-
-		mark_inode_dirty(inode);
-	}
-
-	if (free_count > 0) {
-		ufs_free_blocks (inode, frag_to_free, free_count);
-	}
-	if (to_free) {
-		tmp = ufs_data_ptr_to_cpu(sb, p);
-		write_seqlock(&UFS_I(inode)->meta_lock);
-		ufs_data_ptr_clear(uspi, p);
-		write_sequnlock(&UFS_I(inode)->meta_lock);
-
-		ubh_bforget(ind_ubh);
-		ufs_free_blocks (inode, tmp, uspi->s_fpb);
-		mark_inode_dirty(inode);
-		return;
-	}
-	if (IS_SYNC(inode) && ubh_buffer_dirty(ind_ubh))
-		ubh_sync_block(ind_ubh);
-	ubh_brelse (ind_ubh);
-}
-
-static void ufs_trunc_dindirect(struct inode *inode, unsigned *offsets, int depth2, void *p)
+static void ufs_trunc_branch(struct inode *inode, unsigned *offsets, int depth2, int depth, void *p)
 {
 	struct super_block *sb = inode->i_sb;
 	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
-	struct ufs_buffer_head *dind_bh;
+	struct ufs_inode_info *ufsi = UFS_I(inode);
+	struct ufs_buffer_head *ubh;
 	u64 tmp;
-	void *dind;
 	bool free_it = !offsets || !depth2;
-	unsigned dindirect_block = offsets ? *offsets++ : 0;
+	unsigned from = offsets ? *offsets++ : 0;
 	unsigned i;
 
 	tmp = ufs_data_ptr_to_cpu(sb, p);
 	if (!tmp)
 		return;
-	dind_bh = ubh_bread(sb, tmp, uspi->s_bsize);
-	if (!dind_bh) {
-		write_seqlock(&UFS_I(inode)->meta_lock);
-		ufs_data_ptr_clear(uspi, p);
-		write_sequnlock(&UFS_I(inode)->meta_lock);
-		return;
-	}
-
-	for (i = dindirect_block ; i < uspi->s_apb ; i++, offsets = NULL) {
-		dind = ubh_get_data_ptr(uspi, dind_bh, i);
-		tmp = ufs_data_ptr_to_cpu(sb, dind);
-		if (!tmp)
-			continue;
-		ufs_trunc_indirect(inode, offsets, depth2 - 1, dind);
-		ubh_mark_buffer_dirty(dind_bh);
-	}
-
-	if (free_it) {
-		tmp = ufs_data_ptr_to_cpu(sb, p);
-		write_seqlock(&UFS_I(inode)->meta_lock);
-		ufs_data_ptr_clear(uspi, p);
-		write_sequnlock(&UFS_I(inode)->meta_lock);
-
-		ubh_bforget(dind_bh);
-		ufs_free_blocks(inode, tmp, uspi->s_fpb);
-		mark_inode_dirty(inode);
-		return;
-	}
-	if (IS_SYNC(inode) && ubh_buffer_dirty(dind_bh))
-		ubh_sync_block(dind_bh);
-	ubh_brelse (dind_bh);
-}
-
-static void ufs_trunc_tindirect(struct inode *inode, unsigned *offsets, int depth2)
-{
-	struct super_block *sb = inode->i_sb;
-	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
-	struct ufs_inode_info *ufsi = UFS_I(inode);
-	struct ufs_buffer_head * tind_bh;
-	u64 tmp;
-	void *tind, *p;
-	bool free_it = !offsets || !depth2;
-	unsigned tindirect_block = offsets ? *offsets++ : 0;
-	unsigned i;
-
-	p = ufs_get_direct_data_ptr(uspi, ufsi, UFS_TIND_BLOCK);
-	if (!(tmp = ufs_data_ptr_to_cpu(sb, p)))
-		return;
-	tind_bh = ubh_bread (sb, tmp, uspi->s_bsize);
-	if (!tind_bh) {
+	ubh = ubh_bread (sb, tmp, uspi->s_bsize);
+	if (!ubh) {
 		write_seqlock(&ufsi->meta_lock);
 		ufs_data_ptr_clear(uspi, p);
 		write_sequnlock(&ufsi->meta_lock);
 		return;
 	}
 
-	for (i = tindirect_block ; i < uspi->s_apb ; i++, offsets = NULL) {
-		tind = ubh_get_data_ptr(uspi, tind_bh, i);
-		ufs_trunc_dindirect(inode, offsets, depth2 - 1, tind);
-		ubh_mark_buffer_dirty(tind_bh);
+	if (--depth) {
+		for (i = from ; i < uspi->s_apb ; i++, offsets = NULL) {
+			void *ind = ubh_get_data_ptr(uspi, ubh, i);
+			ufs_trunc_branch(inode, offsets, depth2 - 1, depth, ind);
+			ubh_mark_buffer_dirty(ubh);
+		}
+	} else {
+		u64 frag_to_free = 0;
+		unsigned free_count = 0;
+
+		for (i = from; i < uspi->s_apb; i++) {
+			void *ind = ubh_get_data_ptr(uspi, ubh, i);
+			tmp = ufs_data_ptr_to_cpu(sb, ind);
+			if (!tmp)
+				continue;
+
+			write_seqlock(&UFS_I(inode)->meta_lock);
+			ufs_data_ptr_clear(uspi, ind);
+			write_sequnlock(&UFS_I(inode)->meta_lock);
+			ubh_mark_buffer_dirty(ubh);
+			if (free_count == 0) {
+				frag_to_free = tmp;
+				free_count = uspi->s_fpb;
+			} else if (free_count > 0 && frag_to_free == tmp - free_count)
+				free_count += uspi->s_fpb;
+			else {
+				ufs_free_blocks (inode, frag_to_free, free_count);
+				frag_to_free = tmp;
+				free_count = uspi->s_fpb;
+			}
+
+			mark_inode_dirty(inode);
+		}
+
+		if (free_count > 0) {
+			ufs_free_blocks (inode, frag_to_free, free_count);
+		}
 	}
 	if (free_it) {
 		tmp = ufs_data_ptr_to_cpu(sb, p);
@@ -1216,14 +1134,14 @@ static void ufs_trunc_tindirect(struct inode *inode, unsigned *offsets, int dept
 		ufs_data_ptr_clear(uspi, p);
 		write_sequnlock(&ufsi->meta_lock);
 
-		ubh_bforget(tind_bh);
+		ubh_bforget(ubh);
 		ufs_free_blocks(inode, tmp, uspi->s_fpb);
 		mark_inode_dirty(inode);
 		return;
 	}
-	if (IS_SYNC(inode) && ubh_buffer_dirty(tind_bh))
-		ubh_sync_block(tind_bh);
-	ubh_brelse (tind_bh);
+	if (IS_SYNC(inode) && ubh_buffer_dirty(ubh))
+		ubh_sync_block(ubh);
+	ubh_brelse(ubh);
 }
 
 static int ufs_alloc_lastblock(struct inode *inode, loff_t size)
@@ -1317,26 +1235,30 @@ static void __ufs_truncate_blocks(struct inode *inode)
 	switch (depth) {
 	case 1:
 		ufs_trunc_direct(inode);
-		ufs_trunc_indirect(inode, NULL, 0,
+		ufs_trunc_branch(inode, NULL, 0, 1,
 			   ufs_get_direct_data_ptr(uspi, ufsi, UFS_IND_BLOCK));
-		ufs_trunc_dindirect(inode, NULL, 0,
+		ufs_trunc_branch(inode, NULL, 0, 2,
 			    ufs_get_direct_data_ptr(uspi, ufsi, UFS_DIND_BLOCK));
-		ufs_trunc_tindirect(inode, NULL, 0);
+		ufs_trunc_branch(inode, NULL, 0, 3,
+			    ufs_get_direct_data_ptr(uspi, ufsi, UFS_TIND_BLOCK));
 		break;
 	case 2:
-		ufs_trunc_indirect(inode, offsets + 1, depth2,
+		ufs_trunc_branch(inode, offsets + 1, depth2, 1,
 			   ufs_get_direct_data_ptr(uspi, ufsi, UFS_IND_BLOCK));
-		ufs_trunc_dindirect(inode, NULL, 0,
+		ufs_trunc_branch(inode, NULL, 0, 2,
 			    ufs_get_direct_data_ptr(uspi, ufsi, UFS_DIND_BLOCK));
-		ufs_trunc_tindirect(inode, NULL, 0);
+		ufs_trunc_branch(inode, NULL, 0, 3,
+			    ufs_get_direct_data_ptr(uspi, ufsi, UFS_TIND_BLOCK));
 		break;
 	case 3:
-		ufs_trunc_dindirect(inode, offsets + 1, depth2,
+		ufs_trunc_branch(inode, offsets + 1, depth2, 2,
 			    ufs_get_direct_data_ptr(uspi, ufsi, UFS_DIND_BLOCK));
-		ufs_trunc_tindirect(inode, NULL, 0);
+		ufs_trunc_branch(inode, NULL, 0, 3,
+			    ufs_get_direct_data_ptr(uspi, ufsi, UFS_TIND_BLOCK));
 		break;
 	case 4:
-		ufs_trunc_tindirect(inode, offsets + 1, depth2);
+		ufs_trunc_branch(inode, offsets + 1, depth2, 3,
+			    ufs_get_direct_data_ptr(uspi, ufsi, UFS_TIND_BLOCK));
 	}
 	ufsi->i_lastfrag = DIRECT_FRAGMENT;
 	mutex_unlock(&ufsi->truncate_mutex);
-- 
cgit v1.2.3


From ef3a315d4ca179fd0b56597e695cd262a8b559b7 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 18 Jun 2015 15:42:25 -0400
Subject: __ufs_truncate_blocks(): unify freeing the full branches

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 0d57c41b7705..1427d277a690 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -1235,31 +1235,30 @@ static void __ufs_truncate_blocks(struct inode *inode)
 	switch (depth) {
 	case 1:
 		ufs_trunc_direct(inode);
-		ufs_trunc_branch(inode, NULL, 0, 1,
-			   ufs_get_direct_data_ptr(uspi, ufsi, UFS_IND_BLOCK));
-		ufs_trunc_branch(inode, NULL, 0, 2,
-			    ufs_get_direct_data_ptr(uspi, ufsi, UFS_DIND_BLOCK));
-		ufs_trunc_branch(inode, NULL, 0, 3,
-			    ufs_get_direct_data_ptr(uspi, ufsi, UFS_TIND_BLOCK));
-		break;
+		goto l1;
 	case 2:
 		ufs_trunc_branch(inode, offsets + 1, depth2, 1,
 			   ufs_get_direct_data_ptr(uspi, ufsi, UFS_IND_BLOCK));
-		ufs_trunc_branch(inode, NULL, 0, 2,
-			    ufs_get_direct_data_ptr(uspi, ufsi, UFS_DIND_BLOCK));
-		ufs_trunc_branch(inode, NULL, 0, 3,
-			    ufs_get_direct_data_ptr(uspi, ufsi, UFS_TIND_BLOCK));
-		break;
+		goto l2;
 	case 3:
 		ufs_trunc_branch(inode, offsets + 1, depth2, 2,
 			    ufs_get_direct_data_ptr(uspi, ufsi, UFS_DIND_BLOCK));
-		ufs_trunc_branch(inode, NULL, 0, 3,
-			    ufs_get_direct_data_ptr(uspi, ufsi, UFS_TIND_BLOCK));
-		break;
+		goto l3;
 	case 4:
 		ufs_trunc_branch(inode, offsets + 1, depth2, 3,
 			    ufs_get_direct_data_ptr(uspi, ufsi, UFS_TIND_BLOCK));
+		goto l4;
 	}
+l1:
+	ufs_trunc_branch(inode, NULL, 0, 1,
+			   ufs_get_direct_data_ptr(uspi, ufsi, UFS_IND_BLOCK));
+l2:
+	ufs_trunc_branch(inode, NULL, 0, 2,
+			    ufs_get_direct_data_ptr(uspi, ufsi, UFS_DIND_BLOCK));
+l3:
+	ufs_trunc_branch(inode, NULL, 0, 3,
+			    ufs_get_direct_data_ptr(uspi, ufsi, UFS_TIND_BLOCK));
+l4:
 	ufsi->i_lastfrag = DIRECT_FRAGMENT;
 	mutex_unlock(&ufsi->truncate_mutex);
 }
-- 
cgit v1.2.3


From 42432739b5902f72011f701f5cd5b4227ebe991c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 18 Jun 2015 15:47:17 -0400
Subject: __ufs_trunc_blocks(): turn the part after switch into a loop

... and turn the switch into if (), since all cases with
depth != 1 have just become identical.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c | 35 ++++++++++-------------------------
 1 file changed, 10 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 1427d277a690..285eacd02d60 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -1222,6 +1222,7 @@ static void __ufs_truncate_blocks(struct inode *inode)
 	unsigned offsets[4];
 	int depth = ufs_block_to_path(inode, DIRECT_BLOCK, offsets);
 	int depth2;
+	unsigned i;
 
 	if (!depth)
 		return;
@@ -1232,33 +1233,17 @@ static void __ufs_truncate_blocks(struct inode *inode)
 			break;
 
 	mutex_lock(&ufsi->truncate_mutex);
-	switch (depth) {
-	case 1:
+	if (depth == 1) {
 		ufs_trunc_direct(inode);
-		goto l1;
-	case 2:
-		ufs_trunc_branch(inode, offsets + 1, depth2, 1,
-			   ufs_get_direct_data_ptr(uspi, ufsi, UFS_IND_BLOCK));
-		goto l2;
-	case 3:
-		ufs_trunc_branch(inode, offsets + 1, depth2, 2,
-			    ufs_get_direct_data_ptr(uspi, ufsi, UFS_DIND_BLOCK));
-		goto l3;
-	case 4:
-		ufs_trunc_branch(inode, offsets + 1, depth2, 3,
-			    ufs_get_direct_data_ptr(uspi, ufsi, UFS_TIND_BLOCK));
-		goto l4;
+		offsets[0] = UFS_IND_BLOCK;
+	} else {
+		ufs_trunc_branch(inode, offsets + 1, depth2, depth - 1,
+			   ufs_get_direct_data_ptr(uspi, ufsi, offsets[0]++));
+	}
+	for (i = offsets[0]; i <= UFS_TIND_BLOCK; i++) {
+		ufs_trunc_branch(inode, NULL, 0, i - UFS_IND_BLOCK + 1,
+			   ufs_get_direct_data_ptr(uspi, ufsi, i));
 	}
-l1:
-	ufs_trunc_branch(inode, NULL, 0, 1,
-			   ufs_get_direct_data_ptr(uspi, ufsi, UFS_IND_BLOCK));
-l2:
-	ufs_trunc_branch(inode, NULL, 0, 2,
-			    ufs_get_direct_data_ptr(uspi, ufsi, UFS_DIND_BLOCK));
-l3:
-	ufs_trunc_branch(inode, NULL, 0, 3,
-			    ufs_get_direct_data_ptr(uspi, ufsi, UFS_TIND_BLOCK));
-l4:
 	ufsi->i_lastfrag = DIRECT_FRAGMENT;
 	mutex_unlock(&ufsi->truncate_mutex);
 }
-- 
cgit v1.2.3


From 97e0f8f87c918620689ce542664a3115b752649d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 18 Jun 2015 16:05:18 -0400
Subject: ufs_trunc_branch(): never call with offsets != NULL && depth2 == 0

For calls in __ufs_truncate_blocks() it's just a matter of not
incrementing offsets[0] and not making that call - immediately
following loop will be executed one extra time and we'll be just
fine.  For recursive call in ufs_trunc_branch() itself, just
assing NULL to offsets if we would be about to make such call.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 285eacd02d60..9e409c12afdf 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -1075,7 +1075,7 @@ static void ufs_trunc_branch(struct inode *inode, unsigned *offsets, int depth2,
 	struct ufs_inode_info *ufsi = UFS_I(inode);
 	struct ufs_buffer_head *ubh;
 	u64 tmp;
-	bool free_it = !offsets || !depth2;
+	bool free_it = !offsets;
 	unsigned from = offsets ? *offsets++ : 0;
 	unsigned i;
 
@@ -1091,9 +1091,11 @@ static void ufs_trunc_branch(struct inode *inode, unsigned *offsets, int depth2,
 	}
 
 	if (--depth) {
+		if (!--depth2)
+			offsets = NULL;
 		for (i = from ; i < uspi->s_apb ; i++, offsets = NULL) {
 			void *ind = ubh_get_data_ptr(uspi, ubh, i);
-			ufs_trunc_branch(inode, offsets, depth2 - 1, depth, ind);
+			ufs_trunc_branch(inode, offsets, depth2, depth, ind);
 			ubh_mark_buffer_dirty(ubh);
 		}
 	} else {
@@ -1237,7 +1239,8 @@ static void __ufs_truncate_blocks(struct inode *inode)
 		ufs_trunc_direct(inode);
 		offsets[0] = UFS_IND_BLOCK;
 	} else {
-		ufs_trunc_branch(inode, offsets + 1, depth2, depth - 1,
+		if (depth2)
+			ufs_trunc_branch(inode, offsets + 1, depth2, depth - 1,
 			   ufs_get_direct_data_ptr(uspi, ufsi, offsets[0]++));
 	}
 	for (i = offsets[0]; i <= UFS_TIND_BLOCK; i++) {
-- 
cgit v1.2.3


From a96574233c5d2e50736d83abf65161ec5fa55852 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 18 Jun 2015 16:13:56 -0400
Subject: ufs_trunc_branch(): separate the calls with non-NULL offsets

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 9e409c12afdf..480c34ee1805 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -1091,13 +1091,16 @@ static void ufs_trunc_branch(struct inode *inode, unsigned *offsets, int depth2,
 	}
 
 	if (--depth) {
-		if (!--depth2)
-			offsets = NULL;
-		for (i = from ; i < uspi->s_apb ; i++, offsets = NULL) {
-			void *ind = ubh_get_data_ptr(uspi, ubh, i);
+		if (offsets && --depth2) {
+			void *ind = ubh_get_data_ptr(uspi, ubh, from++);
 			ufs_trunc_branch(inode, offsets, depth2, depth, ind);
 			ubh_mark_buffer_dirty(ubh);
 		}
+		for (i = from ; i < uspi->s_apb ; i++) {
+			void *ind = ubh_get_data_ptr(uspi, ubh, i);
+			ufs_trunc_branch(inode, NULL, 0, depth, ind);
+			ubh_mark_buffer_dirty(ubh);
+		}
 	} else {
 		u64 frag_to_free = 0;
 		unsigned free_count = 0;
-- 
cgit v1.2.3


From a138b4b688c10eb82044451b81534c382d1cddbd Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 18 Jun 2015 02:18:54 -0400
Subject: ufs: unify the logics for collecting adjacent data blocks to free

open-coded in several places...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c | 56 ++++++++++++++++++++++----------------------------------
 1 file changed, 22 insertions(+), 34 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 480c34ee1805..39de7782b7c5 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -960,6 +960,22 @@ void ufs_evict_inode(struct inode * inode)
 		ufs_free_inode(inode);
 }
 
+struct to_free {
+	struct inode *inode;
+	u64 to;
+	unsigned count;
+};
+
+static inline void free_data(struct to_free *ctx, u64 from, unsigned count)
+{
+	if (ctx->count && ctx->to != from) {
+		ufs_free_blocks(ctx->inode, ctx->to - ctx->count, ctx->count);
+		ctx->count = 0;
+	}
+	ctx->count += count;
+	ctx->to = from + count;
+}
+
 #define DIRECT_BLOCK ((inode->i_size + uspi->s_bsize - 1) >> uspi->s_bshift)
 #define DIRECT_FRAGMENT ((inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift)
 
@@ -970,7 +986,7 @@ static void ufs_trunc_direct(struct inode *inode)
 	struct ufs_sb_private_info * uspi;
 	void *p;
 	u64 frag1, frag2, frag3, frag4, block1, block2;
-	unsigned frag_to_free, free_count;
+	struct to_free ctx = {.inode = inode};
 	unsigned i, tmp;
 
 	UFSD("ENTER: ino %lu\n", inode->i_ino);
@@ -978,9 +994,6 @@ static void ufs_trunc_direct(struct inode *inode)
 	sb = inode->i_sb;
 	uspi = UFS_SB(sb)->s_uspi;
 
-	frag_to_free = 0;
-	free_count = 0;
-
 	frag1 = DIRECT_FRAGMENT;
 	frag4 = min_t(u64, UFS_NDIR_FRAGMENT, ufsi->i_lastfrag);
 	frag2 = ((frag1 & uspi->s_fpbmask) ? ((frag1 | uspi->s_fpbmask) + 1) : frag1);
@@ -1015,7 +1028,6 @@ static void ufs_trunc_direct(struct inode *inode)
 
 	ufs_free_fragments(inode, tmp + frag1, frag2);
 	mark_inode_dirty(inode);
-	frag_to_free = tmp + frag1;
 
 next1:
 	/*
@@ -1030,21 +1042,11 @@ next1:
 		ufs_data_ptr_clear(uspi, p);
 		write_sequnlock(&ufsi->meta_lock);
 
-		if (free_count == 0) {
-			frag_to_free = tmp;
-			free_count = uspi->s_fpb;
-		} else if (free_count > 0 && frag_to_free == tmp - free_count)
-			free_count += uspi->s_fpb;
-		else {
-			ufs_free_blocks (inode, frag_to_free, free_count);
-			frag_to_free = tmp;
-			free_count = uspi->s_fpb;
-		}
+		free_data(&ctx, tmp, uspi->s_fpb);
 		mark_inode_dirty(inode);
 	}
 
-	if (free_count > 0)
-		ufs_free_blocks (inode, frag_to_free, free_count);
+	free_data(&ctx, 0, 0);
 
 	if (frag3 >= frag4)
 		goto next3;
@@ -1102,8 +1104,7 @@ static void ufs_trunc_branch(struct inode *inode, unsigned *offsets, int depth2,
 			ubh_mark_buffer_dirty(ubh);
 		}
 	} else {
-		u64 frag_to_free = 0;
-		unsigned free_count = 0;
+		struct to_free ctx = {.inode = inode};
 
 		for (i = from; i < uspi->s_apb; i++) {
 			void *ind = ubh_get_data_ptr(uspi, ubh, i);
@@ -1115,23 +1116,10 @@ static void ufs_trunc_branch(struct inode *inode, unsigned *offsets, int depth2,
 			ufs_data_ptr_clear(uspi, ind);
 			write_sequnlock(&UFS_I(inode)->meta_lock);
 			ubh_mark_buffer_dirty(ubh);
-			if (free_count == 0) {
-				frag_to_free = tmp;
-				free_count = uspi->s_fpb;
-			} else if (free_count > 0 && frag_to_free == tmp - free_count)
-				free_count += uspi->s_fpb;
-			else {
-				ufs_free_blocks (inode, frag_to_free, free_count);
-				frag_to_free = tmp;
-				free_count = uspi->s_fpb;
-			}
-
+			free_data(&ctx, tmp, uspi->s_fpb);
 			mark_inode_dirty(inode);
 		}
-
-		if (free_count > 0) {
-			ufs_free_blocks (inode, frag_to_free, free_count);
-		}
+		free_data(&ctx, 0, 0);
 	}
 	if (free_it) {
 		tmp = ufs_data_ptr_to_cpu(sb, p);
-- 
cgit v1.2.3


From 6d1ebbca2b2fe516ff5f279848cffbd23d2b0270 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 18 Jun 2015 17:11:49 -0400
Subject: split ufs_truncate_branch() into full- and partial-branch variants

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c | 74 +++++++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 58 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 39de7782b7c5..c06556558c9b 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -1070,6 +1070,60 @@ next1:
 	UFSD("EXIT: ino %lu\n", inode->i_ino);
 }
 
+static void free_full_branch(struct inode *inode, int depth, void *p)
+{
+	struct super_block *sb = inode->i_sb;
+	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
+	struct ufs_inode_info *ufsi = UFS_I(inode);
+	struct ufs_buffer_head *ubh;
+	u64 tmp;
+	unsigned i;
+
+	tmp = ufs_data_ptr_to_cpu(sb, p);
+	if (!tmp)
+		return;
+	ubh = ubh_bread (sb, tmp, uspi->s_bsize);
+	if (!ubh) {
+		write_seqlock(&ufsi->meta_lock);
+		ufs_data_ptr_clear(uspi, p);
+		write_sequnlock(&ufsi->meta_lock);
+		return;
+	}
+
+	if (--depth) {
+		for (i = 0 ; i < uspi->s_apb ; i++) {
+			void *ind = ubh_get_data_ptr(uspi, ubh, i);
+			free_full_branch(inode, depth, ind);
+			ubh_mark_buffer_dirty(ubh);
+		}
+	} else {
+		struct to_free ctx = {.inode = inode};
+
+		for (i = 0; i < uspi->s_apb; i++) {
+			void *ind = ubh_get_data_ptr(uspi, ubh, i);
+			tmp = ufs_data_ptr_to_cpu(sb, ind);
+			if (!tmp)
+				continue;
+
+			write_seqlock(&UFS_I(inode)->meta_lock);
+			ufs_data_ptr_clear(uspi, ind);
+			write_sequnlock(&UFS_I(inode)->meta_lock);
+			ubh_mark_buffer_dirty(ubh);
+			free_data(&ctx, tmp, uspi->s_fpb);
+			mark_inode_dirty(inode);
+		}
+		free_data(&ctx, 0, 0);
+	}
+	tmp = ufs_data_ptr_to_cpu(sb, p);
+	write_seqlock(&ufsi->meta_lock);
+	ufs_data_ptr_clear(uspi, p);
+	write_sequnlock(&ufsi->meta_lock);
+
+	ubh_bforget(ubh);
+	ufs_free_blocks(inode, tmp, uspi->s_fpb);
+	mark_inode_dirty(inode);
+}
+
 static void ufs_trunc_branch(struct inode *inode, unsigned *offsets, int depth2, int depth, void *p)
 {
 	struct super_block *sb = inode->i_sb;
@@ -1077,8 +1131,7 @@ static void ufs_trunc_branch(struct inode *inode, unsigned *offsets, int depth2,
 	struct ufs_inode_info *ufsi = UFS_I(inode);
 	struct ufs_buffer_head *ubh;
 	u64 tmp;
-	bool free_it = !offsets;
-	unsigned from = offsets ? *offsets++ : 0;
+	unsigned from = *offsets++;
 	unsigned i;
 
 	tmp = ufs_data_ptr_to_cpu(sb, p);
@@ -1093,14 +1146,14 @@ static void ufs_trunc_branch(struct inode *inode, unsigned *offsets, int depth2,
 	}
 
 	if (--depth) {
-		if (offsets && --depth2) {
+		if (--depth2) {
 			void *ind = ubh_get_data_ptr(uspi, ubh, from++);
 			ufs_trunc_branch(inode, offsets, depth2, depth, ind);
 			ubh_mark_buffer_dirty(ubh);
 		}
 		for (i = from ; i < uspi->s_apb ; i++) {
 			void *ind = ubh_get_data_ptr(uspi, ubh, i);
-			ufs_trunc_branch(inode, NULL, 0, depth, ind);
+			free_full_branch(inode, depth, ind);
 			ubh_mark_buffer_dirty(ubh);
 		}
 	} else {
@@ -1121,17 +1174,6 @@ static void ufs_trunc_branch(struct inode *inode, unsigned *offsets, int depth2,
 		}
 		free_data(&ctx, 0, 0);
 	}
-	if (free_it) {
-		tmp = ufs_data_ptr_to_cpu(sb, p);
-		write_seqlock(&ufsi->meta_lock);
-		ufs_data_ptr_clear(uspi, p);
-		write_sequnlock(&ufsi->meta_lock);
-
-		ubh_bforget(ubh);
-		ufs_free_blocks(inode, tmp, uspi->s_fpb);
-		mark_inode_dirty(inode);
-		return;
-	}
 	if (IS_SYNC(inode) && ubh_buffer_dirty(ubh))
 		ubh_sync_block(ubh);
 	ubh_brelse(ubh);
@@ -1235,7 +1277,7 @@ static void __ufs_truncate_blocks(struct inode *inode)
 			   ufs_get_direct_data_ptr(uspi, ufsi, offsets[0]++));
 	}
 	for (i = offsets[0]; i <= UFS_TIND_BLOCK; i++) {
-		ufs_trunc_branch(inode, NULL, 0, i - UFS_IND_BLOCK + 1,
+		free_full_branch(inode, i - UFS_IND_BLOCK + 1,
 			   ufs_get_direct_data_ptr(uspi, ufsi, i));
 	}
 	ufsi->i_lastfrag = DIRECT_FRAGMENT;
-- 
cgit v1.2.3


From 6aab6dd37946d0d592105872bd533bb7d2931f3f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 18 Jun 2015 17:23:34 -0400
Subject: ufs_trunc_branch(): massage towards killing recursion

We always have 0 < depth2 <= depth in there, so
if (--depth) {
	if (--depth2)
		A
	B
} else {
	C // not using depth2
}
D // not using depth2

is equivalent to

if (--depth2)
	A with s/depth/depth - 1/
if (--depth)
	B
else
	C
D

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index c06556558c9b..dac81c318da7 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -1145,12 +1145,12 @@ static void ufs_trunc_branch(struct inode *inode, unsigned *offsets, int depth2,
 		return;
 	}
 
+	if (--depth2) {
+		void *ind = ubh_get_data_ptr(uspi, ubh, from++);
+		ufs_trunc_branch(inode, offsets, depth2, depth - 1, ind);
+		ubh_mark_buffer_dirty(ubh);
+	}
 	if (--depth) {
-		if (--depth2) {
-			void *ind = ubh_get_data_ptr(uspi, ubh, from++);
-			ufs_trunc_branch(inode, offsets, depth2, depth, ind);
-			ubh_mark_buffer_dirty(ubh);
-		}
 		for (i = from ; i < uspi->s_apb ; i++) {
 			void *ind = ubh_get_data_ptr(uspi, ubh, i);
 			free_full_branch(inode, depth, ind);
-- 
cgit v1.2.3


From 7b4e4f7f815db0059150a12542b28c787e19c0d7 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 18 Jun 2015 19:13:02 -0400
Subject: ufs_trunc_branch(): kill recursion

turn recursion into a pair of loops

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c | 52 ++++++++++++++++++++++++++--------------------------
 1 file changed, 26 insertions(+), 26 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index dac81c318da7..314caad56d83 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -1124,34 +1124,14 @@ static void free_full_branch(struct inode *inode, int depth, void *p)
 	mark_inode_dirty(inode);
 }
 
-static void ufs_trunc_branch(struct inode *inode, unsigned *offsets, int depth2, int depth, void *p)
+static void free_branch_tail(struct inode *inode, unsigned from, struct ufs_buffer_head *ubh, int depth)
 {
 	struct super_block *sb = inode->i_sb;
 	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
-	struct ufs_inode_info *ufsi = UFS_I(inode);
-	struct ufs_buffer_head *ubh;
-	u64 tmp;
-	unsigned from = *offsets++;
 	unsigned i;
 
-	tmp = ufs_data_ptr_to_cpu(sb, p);
-	if (!tmp)
-		return;
-	ubh = ubh_bread (sb, tmp, uspi->s_bsize);
-	if (!ubh) {
-		write_seqlock(&ufsi->meta_lock);
-		ufs_data_ptr_clear(uspi, p);
-		write_sequnlock(&ufsi->meta_lock);
-		return;
-	}
-
-	if (--depth2) {
-		void *ind = ubh_get_data_ptr(uspi, ubh, from++);
-		ufs_trunc_branch(inode, offsets, depth2, depth - 1, ind);
-		ubh_mark_buffer_dirty(ubh);
-	}
 	if (--depth) {
-		for (i = from ; i < uspi->s_apb ; i++) {
+		for (i = from; i < uspi->s_apb ; i++) {
 			void *ind = ubh_get_data_ptr(uspi, ubh, i);
 			free_full_branch(inode, depth, ind);
 			ubh_mark_buffer_dirty(ubh);
@@ -1161,7 +1141,7 @@ static void ufs_trunc_branch(struct inode *inode, unsigned *offsets, int depth2,
 
 		for (i = from; i < uspi->s_apb; i++) {
 			void *ind = ubh_get_data_ptr(uspi, ubh, i);
-			tmp = ufs_data_ptr_to_cpu(sb, ind);
+			u64 tmp = ufs_data_ptr_to_cpu(sb, ind);
 			if (!tmp)
 				continue;
 
@@ -1258,6 +1238,9 @@ static void __ufs_truncate_blocks(struct inode *inode)
 	int depth = ufs_block_to_path(inode, DIRECT_BLOCK, offsets);
 	int depth2;
 	unsigned i;
+	struct ufs_buffer_head *ubh[3];
+	void *p;
+	u64 block;
 
 	if (!depth)
 		return;
@@ -1272,9 +1255,26 @@ static void __ufs_truncate_blocks(struct inode *inode)
 		ufs_trunc_direct(inode);
 		offsets[0] = UFS_IND_BLOCK;
 	} else {
-		if (depth2)
-			ufs_trunc_branch(inode, offsets + 1, depth2, depth - 1,
-			   ufs_get_direct_data_ptr(uspi, ufsi, offsets[0]++));
+		/* get the blocks that should be partially emptied */
+		p = ufs_get_direct_data_ptr(uspi, ufsi, offsets[0]);
+		for (i = 0; i < depth2; i++) {
+			offsets[i]++;	/* next branch is fully freed */
+			block = ufs_data_ptr_to_cpu(sb, p);
+			if (!block)
+				break;
+			ubh[i] = ubh_bread(sb, block, uspi->s_bsize);
+			if (!ubh[i]) {
+				write_seqlock(&ufsi->meta_lock);
+				ufs_data_ptr_clear(uspi, p);
+				write_sequnlock(&ufsi->meta_lock);
+				break;
+			}
+			p = ubh_get_data_ptr(uspi, ubh[i], offsets[i + 1]);
+		}
+		while (i--) {
+			ubh_mark_buffer_dirty(ubh[i]);
+			free_branch_tail(inode, offsets[i + 1], ubh[i], depth - i - 1);
+		}
 	}
 	for (i = offsets[0]; i <= UFS_TIND_BLOCK; i++) {
 		free_full_branch(inode, i - UFS_IND_BLOCK + 1,
-- 
cgit v1.2.3


From 163073db51930d1f9c2960b8e5660c269164f29b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 18 Jun 2015 20:07:08 -0400
Subject: free_full_branch(): saner calling conventions

Have caller fetch the block number *and* remove it from wherever
it was.  Pass the block number instead.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c | 100 +++++++++++++++++++++++++++++----------------------------
 1 file changed, 51 insertions(+), 49 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 314caad56d83..efe71e5acb00 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -1070,57 +1070,48 @@ next1:
 	UFSD("EXIT: ino %lu\n", inode->i_ino);
 }
 
-static void free_full_branch(struct inode *inode, int depth, void *p)
+static void free_full_branch(struct inode *inode, u64 ind_block, int depth)
 {
 	struct super_block *sb = inode->i_sb;
 	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
-	struct ufs_inode_info *ufsi = UFS_I(inode);
-	struct ufs_buffer_head *ubh;
-	u64 tmp;
+	struct ufs_buffer_head *ubh = ubh_bread(sb, ind_block, uspi->s_bsize);
 	unsigned i;
 
-	tmp = ufs_data_ptr_to_cpu(sb, p);
-	if (!tmp)
-		return;
-	ubh = ubh_bread (sb, tmp, uspi->s_bsize);
-	if (!ubh) {
-		write_seqlock(&ufsi->meta_lock);
-		ufs_data_ptr_clear(uspi, p);
-		write_sequnlock(&ufsi->meta_lock);
+	if (!ubh)
 		return;
-	}
 
 	if (--depth) {
-		for (i = 0 ; i < uspi->s_apb ; i++) {
-			void *ind = ubh_get_data_ptr(uspi, ubh, i);
-			free_full_branch(inode, depth, ind);
-			ubh_mark_buffer_dirty(ubh);
+		for (i = 0; i < uspi->s_apb; i++) {
+			void *p = ubh_get_data_ptr(uspi, ubh, i);
+			u64 block = ufs_data_ptr_to_cpu(sb, p);
+			if (block) {
+				write_seqlock(&UFS_I(inode)->meta_lock);
+				ufs_data_ptr_clear(uspi, p);
+				write_sequnlock(&UFS_I(inode)->meta_lock);
+				free_full_branch(inode, block, depth);
+				ubh_mark_buffer_dirty(ubh);
+			}
 		}
 	} else {
 		struct to_free ctx = {.inode = inode};
 
 		for (i = 0; i < uspi->s_apb; i++) {
-			void *ind = ubh_get_data_ptr(uspi, ubh, i);
-			tmp = ufs_data_ptr_to_cpu(sb, ind);
-			if (!tmp)
-				continue;
-
-			write_seqlock(&UFS_I(inode)->meta_lock);
-			ufs_data_ptr_clear(uspi, ind);
-			write_sequnlock(&UFS_I(inode)->meta_lock);
-			ubh_mark_buffer_dirty(ubh);
-			free_data(&ctx, tmp, uspi->s_fpb);
-			mark_inode_dirty(inode);
+			void *p = ubh_get_data_ptr(uspi, ubh, i);
+			u64 block = ufs_data_ptr_to_cpu(sb, p);
+			if (block) {
+				write_seqlock(&UFS_I(inode)->meta_lock);
+				ufs_data_ptr_clear(uspi, p);
+				write_sequnlock(&UFS_I(inode)->meta_lock);
+				ubh_mark_buffer_dirty(ubh);
+				free_data(&ctx, block, uspi->s_fpb);
+				mark_inode_dirty(inode);
+			}
 		}
 		free_data(&ctx, 0, 0);
 	}
-	tmp = ufs_data_ptr_to_cpu(sb, p);
-	write_seqlock(&ufsi->meta_lock);
-	ufs_data_ptr_clear(uspi, p);
-	write_sequnlock(&ufsi->meta_lock);
 
 	ubh_bforget(ubh);
-	ufs_free_blocks(inode, tmp, uspi->s_fpb);
+	ufs_free_blocks(inode, ind_block, uspi->s_fpb);
 	mark_inode_dirty(inode);
 }
 
@@ -1132,25 +1123,30 @@ static void free_branch_tail(struct inode *inode, unsigned from, struct ufs_buff
 
 	if (--depth) {
 		for (i = from; i < uspi->s_apb ; i++) {
-			void *ind = ubh_get_data_ptr(uspi, ubh, i);
-			free_full_branch(inode, depth, ind);
-			ubh_mark_buffer_dirty(ubh);
+			void *p = ubh_get_data_ptr(uspi, ubh, i);
+			u64 block = ufs_data_ptr_to_cpu(sb, p);
+			if (block) {
+				write_seqlock(&UFS_I(inode)->meta_lock);
+				ufs_data_ptr_clear(uspi, p);
+				write_sequnlock(&UFS_I(inode)->meta_lock);
+				ubh_mark_buffer_dirty(ubh);
+				free_full_branch(inode, block, depth);
+			}
 		}
 	} else {
 		struct to_free ctx = {.inode = inode};
 
 		for (i = from; i < uspi->s_apb; i++) {
-			void *ind = ubh_get_data_ptr(uspi, ubh, i);
-			u64 tmp = ufs_data_ptr_to_cpu(sb, ind);
-			if (!tmp)
-				continue;
-
-			write_seqlock(&UFS_I(inode)->meta_lock);
-			ufs_data_ptr_clear(uspi, ind);
-			write_sequnlock(&UFS_I(inode)->meta_lock);
-			ubh_mark_buffer_dirty(ubh);
-			free_data(&ctx, tmp, uspi->s_fpb);
-			mark_inode_dirty(inode);
+			void *p = ubh_get_data_ptr(uspi, ubh, i);
+			u64 block = ufs_data_ptr_to_cpu(sb, p);
+			if (block) {
+				write_seqlock(&UFS_I(inode)->meta_lock);
+				ufs_data_ptr_clear(uspi, p);
+				write_sequnlock(&UFS_I(inode)->meta_lock);
+				ubh_mark_buffer_dirty(ubh);
+				free_data(&ctx, block, uspi->s_fpb);
+				mark_inode_dirty(inode);
+			}
 		}
 		free_data(&ctx, 0, 0);
 	}
@@ -1277,8 +1273,14 @@ static void __ufs_truncate_blocks(struct inode *inode)
 		}
 	}
 	for (i = offsets[0]; i <= UFS_TIND_BLOCK; i++) {
-		free_full_branch(inode, i - UFS_IND_BLOCK + 1,
-			   ufs_get_direct_data_ptr(uspi, ufsi, i));
+		p = ufs_get_direct_data_ptr(uspi, ufsi, i);
+		block = ufs_data_ptr_to_cpu(sb, p);
+		if (block) {
+			write_seqlock(&ufsi->meta_lock);
+			ufs_data_ptr_clear(uspi, p);
+			write_sequnlock(&ufsi->meta_lock);
+			free_full_branch(inode, block, i - UFS_IND_BLOCK + 1);
+		}
 	}
 	ufsi->i_lastfrag = DIRECT_FRAGMENT;
 	mutex_unlock(&ufsi->truncate_mutex);
-- 
cgit v1.2.3


From b6eede0ec642d1be17065110718cb4f4ed7ba5e0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 18 Jun 2015 20:09:39 -0400
Subject: move marking inode dirty to the end of __ufs_truncate_blocks()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index efe71e5acb00..26835a80f7dd 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -1027,7 +1027,6 @@ static void ufs_trunc_direct(struct inode *inode)
 	frag1 = ufs_fragnum (frag1);
 
 	ufs_free_fragments(inode, tmp + frag1, frag2);
-	mark_inode_dirty(inode);
 
 next1:
 	/*
@@ -1043,7 +1042,6 @@ next1:
 		write_sequnlock(&ufsi->meta_lock);
 
 		free_data(&ctx, tmp, uspi->s_fpb);
-		mark_inode_dirty(inode);
 	}
 
 	free_data(&ctx, 0, 0);
@@ -1064,7 +1062,6 @@ next1:
 	write_sequnlock(&ufsi->meta_lock);
 
 	ufs_free_fragments (inode, tmp, frag4);
-	mark_inode_dirty(inode);
  next3:
 
 	UFSD("EXIT: ino %lu\n", inode->i_ino);
@@ -1104,7 +1101,6 @@ static void free_full_branch(struct inode *inode, u64 ind_block, int depth)
 				write_sequnlock(&UFS_I(inode)->meta_lock);
 				ubh_mark_buffer_dirty(ubh);
 				free_data(&ctx, block, uspi->s_fpb);
-				mark_inode_dirty(inode);
 			}
 		}
 		free_data(&ctx, 0, 0);
@@ -1112,7 +1108,6 @@ static void free_full_branch(struct inode *inode, u64 ind_block, int depth)
 
 	ubh_bforget(ubh);
 	ufs_free_blocks(inode, ind_block, uspi->s_fpb);
-	mark_inode_dirty(inode);
 }
 
 static void free_branch_tail(struct inode *inode, unsigned from, struct ufs_buffer_head *ubh, int depth)
@@ -1145,7 +1140,6 @@ static void free_branch_tail(struct inode *inode, unsigned from, struct ufs_buff
 				write_sequnlock(&UFS_I(inode)->meta_lock);
 				ubh_mark_buffer_dirty(ubh);
 				free_data(&ctx, block, uspi->s_fpb);
-				mark_inode_dirty(inode);
 			}
 		}
 		free_data(&ctx, 0, 0);
@@ -1283,6 +1277,7 @@ static void __ufs_truncate_blocks(struct inode *inode)
 		}
 	}
 	ufsi->i_lastfrag = DIRECT_FRAGMENT;
+	mark_inode_dirty(inode);
 	mutex_unlock(&ufsi->truncate_mutex);
 }
 
-- 
cgit v1.2.3


From cc7231e30916f5326bdde55a7a4c59431e15bc1b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 18 Jun 2015 20:14:02 -0400
Subject: free_full_branch(): don't bother modifying the block we are going to
 free

Note that it's already made unreachable from the inode, so we don't have
to worry about ufs_frag_map() walking into something already freed.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 26835a80f7dd..424949f459c8 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -1081,13 +1081,8 @@ static void free_full_branch(struct inode *inode, u64 ind_block, int depth)
 		for (i = 0; i < uspi->s_apb; i++) {
 			void *p = ubh_get_data_ptr(uspi, ubh, i);
 			u64 block = ufs_data_ptr_to_cpu(sb, p);
-			if (block) {
-				write_seqlock(&UFS_I(inode)->meta_lock);
-				ufs_data_ptr_clear(uspi, p);
-				write_sequnlock(&UFS_I(inode)->meta_lock);
+			if (block)
 				free_full_branch(inode, block, depth);
-				ubh_mark_buffer_dirty(ubh);
-			}
 		}
 	} else {
 		struct to_free ctx = {.inode = inode};
@@ -1095,13 +1090,8 @@ static void free_full_branch(struct inode *inode, u64 ind_block, int depth)
 		for (i = 0; i < uspi->s_apb; i++) {
 			void *p = ubh_get_data_ptr(uspi, ubh, i);
 			u64 block = ufs_data_ptr_to_cpu(sb, p);
-			if (block) {
-				write_seqlock(&UFS_I(inode)->meta_lock);
-				ufs_data_ptr_clear(uspi, p);
-				write_sequnlock(&UFS_I(inode)->meta_lock);
-				ubh_mark_buffer_dirty(ubh);
+			if (block)
 				free_data(&ctx, block, uspi->s_fpb);
-			}
 		}
 		free_data(&ctx, 0, 0);
 	}
-- 
cgit v1.2.3


From f53bd1421b3eb84375e9e6964665d23d4190400d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 18 Jun 2015 20:17:32 -0400
Subject: __ufs_truncate_blocks(): avoid excessive dirtying of indirect blocks

There's a case when an indirect block gets dirtied for no good
reason - when there's a hole starting in the middle of area
covered by it and spanning past its end, and truncate() is done
precisely to the beginning of the hole.

The block is obviously not modified at all - all removals happen
beyond it.  However, existing code ends up dirtying it just in
case.  It's trivial to fix and while it's not a real bug by any
stretch of imagination, it makes the damn thing harder to follow.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 424949f459c8..86cc1eea0fb2 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -1251,10 +1251,8 @@ static void __ufs_truncate_blocks(struct inode *inode)
 			}
 			p = ubh_get_data_ptr(uspi, ubh[i], offsets[i + 1]);
 		}
-		while (i--) {
-			ubh_mark_buffer_dirty(ubh[i]);
+		while (i--)
 			free_branch_tail(inode, offsets[i + 1], ubh[i], depth - i - 1);
-		}
 	}
 	for (i = offsets[0]; i <= UFS_TIND_BLOCK; i++) {
 		p = ufs_get_direct_data_ptr(uspi, ufsi, i);
-- 
cgit v1.2.3


From 5a39c25562aa5eab5a798919855cf41ddeed8b0d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 18 Jun 2015 22:39:46 -0400
Subject: ufs_inode_get{frag,block}(): get rid of retries

We are holding ->truncate_mutex, so nobody else can alter our
block pointers.  Rechecks/retries were needed back when we
only held BKL there, and had to cope with write_begin/writepage
and writepage/truncate races.  Can't happen anymore...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c | 43 ++++++++-----------------------------------
 1 file changed, 8 insertions(+), 35 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 86cc1eea0fb2..95cb0a8f5ec9 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -248,20 +248,12 @@ ufs_inode_getfrag(struct inode *inode, u64 fragment,
 
 	goal = 0;
 
-repeat:
 	tmp = ufs_data_ptr_to_cpu(sb, p);
 
 	lastfrag = ufsi->i_lastfrag;
 	if (tmp && fragment < lastfrag) {
 		if (!phys) {
-			result = sb_getblk(sb, uspi->s_sbbase + tmp + blockoff);
-			if (tmp == ufs_data_ptr_to_cpu(sb, p)) {
-				UFSD("EXIT, result %llu\n",
-				     (unsigned long long)tmp + blockoff);
-				return result;
-			}
-			brelse (result);
-			goto repeat;
+			return sb_getblk(sb, uspi->s_sbbase + tmp + blockoff);
 		} else {
 			*phys = uspi->s_sbbase + tmp + blockoff;
 			return NULL;
@@ -283,14 +275,9 @@ repeat:
 						ufs_data_ptr_to_cpu(sb, p2),
 						uspi->s_fpb - lastblockoff,
 						err, locked_page);
-			if (!tmp) {
-				if (lastfrag != ufsi->i_lastfrag)
-					goto repeat;
-				else
-					return NULL;
-			}
+			if (!tmp)
+				return NULL;
 			lastfrag = ufsi->i_lastfrag;
-
 		}
 		tmp = ufs_data_ptr_to_cpu(sb,
 					 ufs_get_direct_data_ptr(uspi, ufsi,
@@ -325,9 +312,6 @@ repeat:
 					phys != NULL ? locked_page : NULL);
 	}
 	if (!tmp) {
-		if ((!blockoff && ufs_data_ptr_to_cpu(sb, p)) ||
-		    (blockoff && lastfrag != ufsi->i_lastfrag))
-			goto repeat;
 		*err = -ENOSPC;
 		return NULL;
 	}
@@ -345,7 +329,6 @@ repeat:
 	if (IS_SYNC(inode))
 		ufs_sync_inode (inode);
 	mark_inode_dirty(inode);
-	UFSD("EXIT, result %llu\n", (unsigned long long)tmp + blockoff);
 	return result;
 
      /* This part : To be implemented ....
@@ -409,19 +392,14 @@ ufs_inode_getblock(struct inode *inode, struct buffer_head *bh,
 		p = (__fs64 *)bh->b_data + block;
 	else
 		p = (__fs32 *)bh->b_data + block;
-repeat:
+
 	tmp = ufs_data_ptr_to_cpu(sb, p);
 	if (tmp) {
-		if (!phys) {
+		if (!phys)
 			result = sb_getblk(sb, uspi->s_sbbase + tmp + blockoff);
-			if (tmp == ufs_data_ptr_to_cpu(sb, p))
-				goto out;
-			brelse (result);
-			goto repeat;
-		} else {
+		else
 			*phys = uspi->s_sbbase + tmp + blockoff;
-			goto out;
-		}
+		goto out;
 	}
 
 	if (block && (uspi->fs_magic == UFS2_MAGIC ?
@@ -432,12 +410,8 @@ repeat:
 		goal = bh->b_blocknr + uspi->s_fpb;
 	tmp = ufs_new_fragments(inode, p, ufs_blknum(new_fragment), goal,
 				uspi->s_fpb, err, locked_page);
-	if (!tmp) {
-		if (ufs_data_ptr_to_cpu(sb, p))
-			goto repeat;
+	if (!tmp)
 		goto out;
-	}
-
 
 	if (!phys) {
 		result = sb_getblk(sb, uspi->s_sbbase + tmp + blockoff);
@@ -451,7 +425,6 @@ repeat:
 		sync_dirty_buffer(bh);
 	inode->i_ctime = CURRENT_TIME_SEC;
 	mark_inode_dirty(inode);
-	UFSD("result %llu\n", (unsigned long long)tmp + blockoff);
 out:
 	brelse (bh);
 	UFSD("EXIT\n");
-- 
cgit v1.2.3


From 4b7068c8b178401637ef2fb068d6256c97d23f4a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 4 Jun 2015 14:27:23 -0400
Subject: ufs: move calculation of offsets into ufs_getfrag_block()

... and massage ufs_frag_map() to take those instead of fragment number.

As it is, we duplicate the damn thing on the write side, open-coded and
bloody hard to follow.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 95cb0a8f5ec9..0f0c6dfccd10 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -119,20 +119,18 @@ static inline int grow_chain64(struct ufs_inode_info *ufsi,
  * the beginning of the filesystem.
  */
 
-static u64 ufs_frag_map(struct inode *inode, sector_t frag)
+static u64 ufs_frag_map(struct inode *inode, unsigned offsets[4], int depth)
 {
 	struct ufs_inode_info *ufsi = UFS_I(inode);
 	struct super_block *sb = inode->i_sb;
 	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
 	u64 mask = (u64) uspi->s_apbmask>>uspi->s_fpbshift;
 	int shift = uspi->s_apbshift-uspi->s_fpbshift;
-	unsigned offsets[4], *p;
 	Indirect chain[4], *q = chain;
-	int depth = ufs_block_to_path(inode, frag >> uspi->s_fpbshift, offsets);
+	unsigned *p;
 	unsigned flags = UFS_SB(sb)->s_flags;
 	u64 res = 0;
 
-	UFSD(": frag = %llu  depth = %d\n", (unsigned long long)frag, depth);
 	UFSD(": uspi->s_fpbshift = %d ,uspi->s_apbmask = %x, mask=%llx\n",
 		uspi->s_fpbshift, uspi->s_apbmask,
 		(unsigned long long)mask);
@@ -191,7 +189,7 @@ ufs2:
 	}
 	res = fs64_to_cpu(sb, q->key64);
 found:
-	res += uspi->s_sbbase + (frag & uspi->s_fpbmask);
+	res += uspi->s_sbbase;
 no_block:
 	while (q > chain) {
 		brelse(q->bh);
@@ -443,14 +441,17 @@ static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buff
 	struct ufs_sb_private_info * uspi = sbi->s_uspi;
 	struct buffer_head * bh;
 	int ret, err, new;
+	unsigned offsets[4];
+	int depth = ufs_block_to_path(inode, fragment >> uspi->s_fpbshift, offsets);
 	unsigned long ptr,phys;
 	u64 phys64 = 0;
 
 	if (!create) {
-		phys64 = ufs_frag_map(inode, fragment);
-		UFSD("phys64 = %llu\n", (unsigned long long)phys64);
-		if (phys64)
+		phys64 = ufs_frag_map(inode, offsets, depth);
+		if (phys64) {
+			phys64 += fragment & uspi->s_fpbmask;
 			map_bh(bh_result, sb, phys64);
+		}
 		return 0;
 	}
 
-- 
cgit v1.2.3


From 71dd42846ffb2bd1a90e9ac2c52df0cc2ed92307 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 4 Jun 2015 14:34:43 -0400
Subject: ufs: use the branch depth in ufs_getfrag_block()

we'd already calculated it...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 0f0c6dfccd10..5c4a4abae652 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -465,9 +465,7 @@ static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buff
 	mutex_lock(&UFS_I(inode)->truncate_mutex);
 
 	UFSD("ENTER, ino %lu, fragment %llu\n", inode->i_ino, (unsigned long long)fragment);
-	if (fragment >
-	    ((UFS_NDADDR + uspi->s_apb + uspi->s_2apb + uspi->s_3apb)
-	     << uspi->s_fpbshift))
+	if (!depth)
 		goto abort_too_big;
 
 	err = 0;
@@ -490,17 +488,17 @@ static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buff
 	ufs_inode_getblock(inode, bh, x, fragment,	\
 			  &err, NULL, NULL, NULL)
 
-	if (ptr < UFS_NDIR_FRAGMENT) {
+	if (depth == 1) {
 		bh = GET_INODE_DATABLOCK(ptr);
 		goto out;
 	}
 	ptr -= UFS_NDIR_FRAGMENT;
-	if (ptr < (1 << (uspi->s_apbshift + uspi->s_fpbshift))) {
+	if (depth == 2) {
 		bh = GET_INODE_PTR(UFS_IND_FRAGMENT + (ptr >> uspi->s_apbshift));
 		goto get_indirect;
 	}
 	ptr -= 1 << (uspi->s_apbshift + uspi->s_fpbshift);
-	if (ptr < (1 << (uspi->s_2apbshift + uspi->s_fpbshift))) {
+	if (depth == 3) {
 		bh = GET_INODE_PTR(UFS_DIND_FRAGMENT + (ptr >> uspi->s_2apbshift));
 		goto get_double;
 	}
-- 
cgit v1.2.3


From bbb3eb9d3432ce55a620778ecf5670fa7942090e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 19 Jun 2015 00:10:00 -0400
Subject: ufs_inode_get{frag,block}(): consolidate success exits

These calling conventions are rudiments of pre-2.3 times; they
really need to be sanitized.  This is the first step; next
will be _always_ returning a block number, instead of this
"return a pointer to buffer_head, except when we get to the
actual data" crap.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c | 50 ++++++++++++++++++++++----------------------------
 1 file changed, 22 insertions(+), 28 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 5c4a4abae652..d65a89030c91 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -226,7 +226,6 @@ ufs_inode_getfrag(struct inode *inode, u64 fragment,
 	struct ufs_inode_info *ufsi = UFS_I(inode);
 	struct super_block *sb = inode->i_sb;
 	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
-	struct buffer_head * result;
 	unsigned blockoff, lastblockoff;
 	u64 tmp, goal, lastfrag, block, lastblock;
 	void *p, *p2;
@@ -249,14 +248,8 @@ ufs_inode_getfrag(struct inode *inode, u64 fragment,
 	tmp = ufs_data_ptr_to_cpu(sb, p);
 
 	lastfrag = ufsi->i_lastfrag;
-	if (tmp && fragment < lastfrag) {
-		if (!phys) {
-			return sb_getblk(sb, uspi->s_sbbase + tmp + blockoff);
-		} else {
-			*phys = uspi->s_sbbase + tmp + blockoff;
-			return NULL;
-		}
-	}
+	if (tmp && fragment < lastfrag)
+		goto out;
 
 	lastblock = ufs_fragstoblks (lastfrag);
 	lastblockoff = ufs_fragnum (lastfrag);
@@ -314,20 +307,22 @@ ufs_inode_getfrag(struct inode *inode, u64 fragment,
 		return NULL;
 	}
 
-	if (!phys) {
-		result = sb_getblk(sb, uspi->s_sbbase + tmp + blockoff);
-	} else {
-		*phys = uspi->s_sbbase + tmp + blockoff;
-		result = NULL;
+	if (phys) {
 		*err = 0;
 		*new = 1;
 	}
-
 	inode->i_ctime = CURRENT_TIME_SEC;
 	if (IS_SYNC(inode))
 		ufs_sync_inode (inode);
 	mark_inode_dirty(inode);
-	return result;
+out:
+	tmp += uspi->s_sbbase + blockoff;
+	if (!phys) {
+		return sb_getblk(sb, tmp);
+	} else {
+		*phys = tmp;
+		return NULL;
+	}
 
      /* This part : To be implemented ....
         Required only for writing, not required for READ-ONLY.
@@ -367,7 +362,7 @@ ufs_inode_getblock(struct inode *inode, struct buffer_head *bh,
 	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
 	struct buffer_head * result;
 	unsigned blockoff;
-	u64 tmp, goal, block;
+	u64 tmp = 0, goal, block;
 	void *p;
 
 	block = ufs_fragstoblks (fragment);
@@ -392,13 +387,8 @@ ufs_inode_getblock(struct inode *inode, struct buffer_head *bh,
 		p = (__fs32 *)bh->b_data + block;
 
 	tmp = ufs_data_ptr_to_cpu(sb, p);
-	if (tmp) {
-		if (!phys)
-			result = sb_getblk(sb, uspi->s_sbbase + tmp + blockoff);
-		else
-			*phys = uspi->s_sbbase + tmp + blockoff;
+	if (tmp)
 		goto out;
-	}
 
 	if (block && (uspi->fs_magic == UFS2_MAGIC ?
 		      (tmp = fs64_to_cpu(sb, ((__fs64 *)bh->b_data)[block-1])) :
@@ -411,12 +401,8 @@ ufs_inode_getblock(struct inode *inode, struct buffer_head *bh,
 	if (!tmp)
 		goto out;
 
-	if (!phys) {
-		result = sb_getblk(sb, uspi->s_sbbase + tmp + blockoff);
-	} else {
-		*phys = uspi->s_sbbase + tmp + blockoff;
+	if (new)
 		*new = 1;
-	}
 
 	mark_buffer_dirty(bh);
 	if (IS_SYNC(inode))
@@ -425,6 +411,14 @@ ufs_inode_getblock(struct inode *inode, struct buffer_head *bh,
 	mark_inode_dirty(inode);
 out:
 	brelse (bh);
+	if (tmp) {
+		tmp += uspi->s_sbbase + blockoff;
+		if (phys) {
+			*phys = tmp;
+		} else {
+			result = sb_getblk(sb, tmp);
+		}
+	}
 	UFSD("EXIT\n");
 	return result;
 }
-- 
cgit v1.2.3


From 8d9dcf14367388674f4d792f494e6f1d6536ac95 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 19 Jun 2015 00:32:42 -0400
Subject: ufs_getfrag_block(): get rid of macro jungles

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c | 51 ++++++++++++++++++++++-----------------------------
 1 file changed, 22 insertions(+), 29 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index d65a89030c91..156ba3c26906 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -465,49 +465,42 @@ static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buff
 	err = 0;
 	ptr = fragment;
 
-	/*
-	 * ok, these macros clean the logic up a bit and make
-	 * it much more readable:
-	 */
-#define GET_INODE_DATABLOCK(x) \
-	ufs_inode_getfrag(inode, x, fragment, 1, &err, &phys, &new,\
-			  bh_result->b_page)
-#define GET_INODE_PTR(x) \
-	ufs_inode_getfrag(inode, x, fragment, uspi->s_fpb, &err, NULL, NULL,\
-			  bh_result->b_page)
-#define GET_INDIRECT_DATABLOCK(x) \
-	ufs_inode_getblock(inode, bh, x, fragment,	\
-			  &err, &phys, &new, bh_result->b_page)
-#define GET_INDIRECT_PTR(x) \
-	ufs_inode_getblock(inode, bh, x, fragment,	\
-			  &err, NULL, NULL, NULL)
-
 	if (depth == 1) {
-		bh = GET_INODE_DATABLOCK(ptr);
+		bh = ufs_inode_getfrag(inode, ptr, fragment, 1, &err, &phys,
+					&new, bh_result->b_page);
 		goto out;
 	}
 	ptr -= UFS_NDIR_FRAGMENT;
 	if (depth == 2) {
-		bh = GET_INODE_PTR(UFS_IND_FRAGMENT + (ptr >> uspi->s_apbshift));
+		bh = ufs_inode_getfrag(inode,
+					UFS_IND_FRAGMENT + (ptr >> uspi->s_apbshift),
+					fragment, uspi->s_fpb, &err, NULL, NULL,
+					bh_result->b_page);
 		goto get_indirect;
 	}
 	ptr -= 1 << (uspi->s_apbshift + uspi->s_fpbshift);
 	if (depth == 3) {
-		bh = GET_INODE_PTR(UFS_DIND_FRAGMENT + (ptr >> uspi->s_2apbshift));
+		bh = ufs_inode_getfrag(inode,
+					UFS_DIND_FRAGMENT + (ptr >> uspi->s_2apbshift),
+					fragment, uspi->s_fpb, &err, NULL, NULL,
+					bh_result->b_page);
 		goto get_double;
 	}
 	ptr -= 1 << (uspi->s_2apbshift + uspi->s_fpbshift);
-	bh = GET_INODE_PTR(UFS_TIND_FRAGMENT + (ptr >> uspi->s_3apbshift));
-	bh = GET_INDIRECT_PTR((ptr >> uspi->s_2apbshift) & uspi->s_apbmask);
+	bh = ufs_inode_getfrag(inode,
+				UFS_TIND_FRAGMENT + (ptr >> uspi->s_3apbshift),
+				fragment, uspi->s_fpb, &err, NULL, NULL,
+				bh_result->b_page);
+	bh = ufs_inode_getblock(inode, bh,
+				(ptr >> uspi->s_2apbshift) & uspi->s_apbmask,
+				fragment, &err, NULL, NULL, NULL);
 get_double:
-	bh = GET_INDIRECT_PTR((ptr >> uspi->s_apbshift) & uspi->s_apbmask);
+	bh = ufs_inode_getblock(inode, bh,
+				(ptr >> uspi->s_apbshift) & uspi->s_apbmask,
+				fragment, &err, NULL, NULL, NULL);
 get_indirect:
-	bh = GET_INDIRECT_DATABLOCK(ptr & uspi->s_apbmask);
-
-#undef GET_INODE_DATABLOCK
-#undef GET_INODE_PTR
-#undef GET_INDIRECT_DATABLOCK
-#undef GET_INDIRECT_PTR
+	bh = ufs_inode_getblock(inode, bh, ptr & uspi->s_apbmask, fragment,
+			  &err, &phys, &new, bh_result->b_page);
 
 out:
 	if (err)
-- 
cgit v1.2.3


From 177848a018cb2cb196feac2990814ac8d7bb3c8e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 19 Jun 2015 00:53:06 -0400
Subject: ufs_inode_get{frag,block}(): leave sb_getblk() to caller

just return the damn block number

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c | 88 ++++++++++++++++++++++++++++++++++++----------------------
 1 file changed, 55 insertions(+), 33 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 156ba3c26906..eeccf45fcd57 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -218,7 +218,7 @@ changed:
  * @new: we set it if we allocate new block
  * @locked_page: for ufs_new_fragments()
  */
-static struct buffer_head *
+static u64
 ufs_inode_getfrag(struct inode *inode, u64 fragment,
 		  sector_t new_fragment, unsigned int required, int *err,
 		  long *phys, int *new, struct page *locked_page)
@@ -267,7 +267,7 @@ ufs_inode_getfrag(struct inode *inode, u64 fragment,
 						uspi->s_fpb - lastblockoff,
 						err, locked_page);
 			if (!tmp)
-				return NULL;
+				return 0;
 			lastfrag = ufsi->i_lastfrag;
 		}
 		tmp = ufs_data_ptr_to_cpu(sb,
@@ -304,7 +304,7 @@ ufs_inode_getfrag(struct inode *inode, u64 fragment,
 	}
 	if (!tmp) {
 		*err = -ENOSPC;
-		return NULL;
+		return 0;
 	}
 
 	if (phys) {
@@ -316,13 +316,7 @@ ufs_inode_getfrag(struct inode *inode, u64 fragment,
 		ufs_sync_inode (inode);
 	mark_inode_dirty(inode);
 out:
-	tmp += uspi->s_sbbase + blockoff;
-	if (!phys) {
-		return sb_getblk(sb, tmp);
-	} else {
-		*phys = tmp;
-		return NULL;
-	}
+	return tmp + uspi->s_sbbase;
 
      /* This part : To be implemented ....
         Required only for writing, not required for READ-ONLY.
@@ -353,26 +347,22 @@ repeat2:
  * @new: see ufs_inode_getfrag()
  * @locked_page: see ufs_inode_getfrag()
  */
-static struct buffer_head *
+static u64
 ufs_inode_getblock(struct inode *inode, struct buffer_head *bh,
 		  u64 fragment, sector_t new_fragment, int *err,
 		  long *phys, int *new, struct page *locked_page)
 {
 	struct super_block *sb = inode->i_sb;
 	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
-	struct buffer_head * result;
-	unsigned blockoff;
 	u64 tmp = 0, goal, block;
 	void *p;
 
 	block = ufs_fragstoblks (fragment);
-	blockoff = ufs_fragnum (fragment);
 
 	UFSD("ENTER, ino %lu, fragment %llu, new_fragment %llu, metadata %d\n",
 	     inode->i_ino, (unsigned long long)fragment,
 	     (unsigned long long)new_fragment, !phys);
 
-	result = NULL;
 	if (!bh)
 		goto out;
 	if (!buffer_uptodate(bh)) {
@@ -411,16 +401,10 @@ ufs_inode_getblock(struct inode *inode, struct buffer_head *bh,
 	mark_inode_dirty(inode);
 out:
 	brelse (bh);
-	if (tmp) {
-		tmp += uspi->s_sbbase + blockoff;
-		if (phys) {
-			*phys = tmp;
-		} else {
-			result = sb_getblk(sb, tmp);
-		}
-	}
 	UFSD("EXIT\n");
-	return result;
+	if (tmp)
+		tmp += uspi->s_sbbase;
+	return tmp;
 }
 
 /**
@@ -439,11 +423,12 @@ static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buff
 	int depth = ufs_block_to_path(inode, fragment >> uspi->s_fpbshift, offsets);
 	unsigned long ptr,phys;
 	u64 phys64 = 0;
+	unsigned frag = fragment & uspi->s_fpbmask;
 
 	if (!create) {
 		phys64 = ufs_frag_map(inode, offsets, depth);
 		if (phys64) {
-			phys64 += fragment & uspi->s_fpbmask;
+			phys64 += frag;
 			map_bh(bh_result, sb, phys64);
 		}
 		return 0;
@@ -466,42 +451,79 @@ static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buff
 	ptr = fragment;
 
 	if (depth == 1) {
-		bh = ufs_inode_getfrag(inode, ptr, fragment, 1, &err, &phys,
+		phys64 = ufs_inode_getfrag(inode, ptr, fragment, 1, &err, &phys,
 					&new, bh_result->b_page);
+		if (phys64) {
+			phys64 += frag;
+			phys = phys64;
+		}
 		goto out;
 	}
 	ptr -= UFS_NDIR_FRAGMENT;
 	if (depth == 2) {
-		bh = ufs_inode_getfrag(inode,
+		phys64 = ufs_inode_getfrag(inode,
 					UFS_IND_FRAGMENT + (ptr >> uspi->s_apbshift),
 					fragment, uspi->s_fpb, &err, NULL, NULL,
 					bh_result->b_page);
+		if (phys64) {
+			phys64 += (ptr >> uspi->s_apbshift) & uspi->s_fpbmask;
+			bh = sb_getblk(sb, phys64);
+		} else {
+			bh = NULL;
+		}
 		goto get_indirect;
 	}
 	ptr -= 1 << (uspi->s_apbshift + uspi->s_fpbshift);
 	if (depth == 3) {
-		bh = ufs_inode_getfrag(inode,
+		phys64 = ufs_inode_getfrag(inode,
 					UFS_DIND_FRAGMENT + (ptr >> uspi->s_2apbshift),
 					fragment, uspi->s_fpb, &err, NULL, NULL,
 					bh_result->b_page);
+		if (phys64) {
+			phys64 += (ptr >> uspi->s_2apbshift) & uspi->s_fpbmask;
+			bh = sb_getblk(sb, phys64);
+		} else {
+			bh = NULL;
+		}
 		goto get_double;
 	}
 	ptr -= 1 << (uspi->s_2apbshift + uspi->s_fpbshift);
-	bh = ufs_inode_getfrag(inode,
+	phys64 = ufs_inode_getfrag(inode,
 				UFS_TIND_FRAGMENT + (ptr >> uspi->s_3apbshift),
 				fragment, uspi->s_fpb, &err, NULL, NULL,
 				bh_result->b_page);
-	bh = ufs_inode_getblock(inode, bh,
+	if (phys64) {
+		phys64 += (ptr >> uspi->s_3apbshift) & uspi->s_fpbmask;
+		bh = sb_getblk(sb, phys64);
+	} else {
+		bh = NULL;
+	}
+	phys64 = ufs_inode_getblock(inode, bh,
 				(ptr >> uspi->s_2apbshift) & uspi->s_apbmask,
 				fragment, &err, NULL, NULL, NULL);
+	if (phys64) {
+		phys64 += (ptr >> uspi->s_2apbshift) & uspi->s_fpbmask,
+		bh = sb_getblk(sb, phys64);
+	} else {
+		bh = NULL;
+	}
 get_double:
-	bh = ufs_inode_getblock(inode, bh,
+	phys64 = ufs_inode_getblock(inode, bh,
 				(ptr >> uspi->s_apbshift) & uspi->s_apbmask,
 				fragment, &err, NULL, NULL, NULL);
+	if (phys64) {
+		phys64 += (ptr >> uspi->s_apbshift) & uspi->s_fpbmask,
+		bh = sb_getblk(sb, phys64);
+	} else {
+		bh = NULL;
+	}
 get_indirect:
-	bh = ufs_inode_getblock(inode, bh, ptr & uspi->s_apbmask, fragment,
+	phys64 = ufs_inode_getblock(inode, bh, ptr & uspi->s_apbmask, fragment,
 			  &err, &phys, &new, bh_result->b_page);
-
+	if (phys64) {
+		phys64 += frag;
+		phys = phys64;
+	}
 out:
 	if (err)
 		goto abort;
-- 
cgit v1.2.3


From 721435a7679e13f810133dbea769f87ad7bae3a1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 19 Jun 2015 01:06:21 -0400
Subject: ufs_inode_getblock(): pass index instead of 'fragment'

The value passed to ufs_inode_getblock() as the 3rd argument
had lower bits ignored; the upper bits were shifted down
and used and they actually make sense - those are _lower_ bits
of index in indirect block (i.e. they form the index within
a fragment within an indirect block).

Pass those as argument.  Upper bits of index (i.e. the number
of fragment within indirect block) will join them shortly.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c | 32 +++++++++++++-------------------
 1 file changed, 13 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index eeccf45fcd57..6866b904f148 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -338,8 +338,7 @@ repeat2:
  * ufs_inode_getblock() - allocate new block
  * @inode: pointer to inode
  * @bh: pointer to block which hold "pointer" to new allocated block
- * @fragment: number of `fragment' which hold pointer
- *   to new allocated block
+ * @index: number of pointer in the indirect block
  * @new_fragment: number of new allocated fragment
  *  (block will hold this fragment and also uspi->s_fpb-1)
  * @err: see ufs_inode_getfrag()
@@ -349,20 +348,14 @@ repeat2:
  */
 static u64
 ufs_inode_getblock(struct inode *inode, struct buffer_head *bh,
-		  u64 fragment, sector_t new_fragment, int *err,
+		  unsigned index, sector_t new_fragment, int *err,
 		  long *phys, int *new, struct page *locked_page)
 {
 	struct super_block *sb = inode->i_sb;
 	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
-	u64 tmp = 0, goal, block;
+	u64 tmp = 0, goal;
 	void *p;
 
-	block = ufs_fragstoblks (fragment);
-
-	UFSD("ENTER, ino %lu, fragment %llu, new_fragment %llu, metadata %d\n",
-	     inode->i_ino, (unsigned long long)fragment,
-	     (unsigned long long)new_fragment, !phys);
-
 	if (!bh)
 		goto out;
 	if (!buffer_uptodate(bh)) {
@@ -372,17 +365,17 @@ ufs_inode_getblock(struct inode *inode, struct buffer_head *bh,
 			goto out;
 	}
 	if (uspi->fs_magic == UFS2_MAGIC)
-		p = (__fs64 *)bh->b_data + block;
+		p = (__fs64 *)bh->b_data + index;
 	else
-		p = (__fs32 *)bh->b_data + block;
+		p = (__fs32 *)bh->b_data + index;
 
 	tmp = ufs_data_ptr_to_cpu(sb, p);
 	if (tmp)
 		goto out;
 
-	if (block && (uspi->fs_magic == UFS2_MAGIC ?
-		      (tmp = fs64_to_cpu(sb, ((__fs64 *)bh->b_data)[block-1])) :
-		      (tmp = fs32_to_cpu(sb, ((__fs32 *)bh->b_data)[block-1]))))
+	if (index && (uspi->fs_magic == UFS2_MAGIC ?
+		      (tmp = fs64_to_cpu(sb, ((__fs64 *)bh->b_data)[index-1])) :
+		      (tmp = fs32_to_cpu(sb, ((__fs32 *)bh->b_data)[index-1]))))
 		goal = tmp + uspi->s_fpb;
 	else
 		goal = bh->b_blocknr + uspi->s_fpb;
@@ -424,6 +417,7 @@ static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buff
 	unsigned long ptr,phys;
 	u64 phys64 = 0;
 	unsigned frag = fragment & uspi->s_fpbmask;
+	unsigned mask = uspi->s_apbmask >> uspi->s_fpbshift;
 
 	if (!create) {
 		phys64 = ufs_frag_map(inode, offsets, depth);
@@ -499,7 +493,7 @@ static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buff
 		bh = NULL;
 	}
 	phys64 = ufs_inode_getblock(inode, bh,
-				(ptr >> uspi->s_2apbshift) & uspi->s_apbmask,
+				offsets[1] & mask,
 				fragment, &err, NULL, NULL, NULL);
 	if (phys64) {
 		phys64 += (ptr >> uspi->s_2apbshift) & uspi->s_fpbmask,
@@ -509,7 +503,7 @@ static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buff
 	}
 get_double:
 	phys64 = ufs_inode_getblock(inode, bh,
-				(ptr >> uspi->s_apbshift) & uspi->s_apbmask,
+				offsets[depth - 2] & mask,
 				fragment, &err, NULL, NULL, NULL);
 	if (phys64) {
 		phys64 += (ptr >> uspi->s_apbshift) & uspi->s_fpbmask,
@@ -518,8 +512,8 @@ get_double:
 		bh = NULL;
 	}
 get_indirect:
-	phys64 = ufs_inode_getblock(inode, bh, ptr & uspi->s_apbmask, fragment,
-			  &err, &phys, &new, bh_result->b_page);
+	phys64 = ufs_inode_getblock(inode, bh, offsets[depth - 1] & mask,
+				fragment, &err, &phys, &new, bh_result->b_page);
 	if (phys64) {
 		phys64 += frag;
 		phys = phys64;
-- 
cgit v1.2.3


From 619cfac09134b4de7a4f232cf3636cf43728577d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 19 Jun 2015 01:23:08 -0400
Subject: ufs_inode_getblock(): pass indirect block number and full index

... instead of messing with buffer_head.  We can bloody well do
sb_bread() in there.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c | 62 +++++++++++++++-------------------------------------------
 1 file changed, 16 insertions(+), 46 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 6866b904f148..25d47df934e2 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -337,8 +337,8 @@ repeat2:
 /**
  * ufs_inode_getblock() - allocate new block
  * @inode: pointer to inode
- * @bh: pointer to block which hold "pointer" to new allocated block
- * @index: number of pointer in the indirect block
+ * @ind_block: block number of the indirect block
+ * @index: number of pointer within the indirect block
  * @new_fragment: number of new allocated fragment
  *  (block will hold this fragment and also uspi->s_fpb-1)
  * @err: see ufs_inode_getfrag()
@@ -347,23 +347,25 @@ repeat2:
  * @locked_page: see ufs_inode_getfrag()
  */
 static u64
-ufs_inode_getblock(struct inode *inode, struct buffer_head *bh,
+ufs_inode_getblock(struct inode *inode, u64 ind_block,
 		  unsigned index, sector_t new_fragment, int *err,
 		  long *phys, int *new, struct page *locked_page)
 {
 	struct super_block *sb = inode->i_sb;
 	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
+	int shift = uspi->s_apbshift - uspi->s_fpbshift;
 	u64 tmp = 0, goal;
+	struct buffer_head *bh;
 	void *p;
 
-	if (!bh)
-		goto out;
-	if (!buffer_uptodate(bh)) {
-		ll_rw_block (READ, 1, &bh);
-		wait_on_buffer (bh);
-		if (!buffer_uptodate(bh))
-			goto out;
-	}
+	if (!ind_block)
+		return 0;
+
+	bh = sb_bread(sb, ind_block + (index >> shift));
+	if (unlikely(!bh))
+		return 0;
+
+	index &= uspi->s_apbmask >> uspi->s_fpbshift;
 	if (uspi->fs_magic == UFS2_MAGIC)
 		p = (__fs64 *)bh->b_data + index;
 	else
@@ -459,12 +461,6 @@ static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buff
 					UFS_IND_FRAGMENT + (ptr >> uspi->s_apbshift),
 					fragment, uspi->s_fpb, &err, NULL, NULL,
 					bh_result->b_page);
-		if (phys64) {
-			phys64 += (ptr >> uspi->s_apbshift) & uspi->s_fpbmask;
-			bh = sb_getblk(sb, phys64);
-		} else {
-			bh = NULL;
-		}
 		goto get_indirect;
 	}
 	ptr -= 1 << (uspi->s_apbshift + uspi->s_fpbshift);
@@ -473,12 +469,6 @@ static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buff
 					UFS_DIND_FRAGMENT + (ptr >> uspi->s_2apbshift),
 					fragment, uspi->s_fpb, &err, NULL, NULL,
 					bh_result->b_page);
-		if (phys64) {
-			phys64 += (ptr >> uspi->s_2apbshift) & uspi->s_fpbmask;
-			bh = sb_getblk(sb, phys64);
-		} else {
-			bh = NULL;
-		}
 		goto get_double;
 	}
 	ptr -= 1 << (uspi->s_2apbshift + uspi->s_fpbshift);
@@ -486,33 +476,13 @@ static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buff
 				UFS_TIND_FRAGMENT + (ptr >> uspi->s_3apbshift),
 				fragment, uspi->s_fpb, &err, NULL, NULL,
 				bh_result->b_page);
-	if (phys64) {
-		phys64 += (ptr >> uspi->s_3apbshift) & uspi->s_fpbmask;
-		bh = sb_getblk(sb, phys64);
-	} else {
-		bh = NULL;
-	}
-	phys64 = ufs_inode_getblock(inode, bh,
-				offsets[1] & mask,
+	phys64 = ufs_inode_getblock(inode, phys64, offsets[1],
 				fragment, &err, NULL, NULL, NULL);
-	if (phys64) {
-		phys64 += (ptr >> uspi->s_2apbshift) & uspi->s_fpbmask,
-		bh = sb_getblk(sb, phys64);
-	} else {
-		bh = NULL;
-	}
 get_double:
-	phys64 = ufs_inode_getblock(inode, bh,
-				offsets[depth - 2] & mask,
+	phys64 = ufs_inode_getblock(inode, phys64, offsets[depth - 2],
 				fragment, &err, NULL, NULL, NULL);
-	if (phys64) {
-		phys64 += (ptr >> uspi->s_apbshift) & uspi->s_fpbmask,
-		bh = sb_getblk(sb, phys64);
-	} else {
-		bh = NULL;
-	}
 get_indirect:
-	phys64 = ufs_inode_getblock(inode, bh, offsets[depth - 1] & mask,
+	phys64 = ufs_inode_getblock(inode, phys64, offsets[depth - 1],
 				fragment, &err, &phys, &new, bh_result->b_page);
 	if (phys64) {
 		phys64 += frag;
-- 
cgit v1.2.3


From 0f3c1294bedcc4544c68d6b84699bdaa334b11b8 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 19 Jun 2015 13:40:25 -0400
Subject: ufs_inode_getfrag(): split extending the partial blocks off

ufs_extend_tail() is handling that now.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c | 128 +++++++++++++++++++++++++++++----------------------------
 1 file changed, 65 insertions(+), 63 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 25d47df934e2..d652f64885fd 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -205,6 +205,40 @@ changed:
 	goto again;
 }
 
+/*
+ * Unpacking tails: we have a file with partial final block and
+ * we had been asked to extend it.  If the fragment being written
+ * is within the same block, we need to extend the tail just to cover
+ * that fragment.  Otherwise the tail is extended to full block.
+ *
+ * Note that we might need to create a _new_ tail, but that will
+ * be handled elsewhere; this is strictly for resizing old
+ * ones.
+ */
+static bool
+ufs_extend_tail(struct inode *inode, u64 writes_to,
+		  int *err, struct page *locked_page)
+{
+	struct ufs_inode_info *ufsi = UFS_I(inode);
+	struct super_block *sb = inode->i_sb;
+	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
+	unsigned lastfrag = ufsi->i_lastfrag;	/* it's a short file, so unsigned is enough */
+	unsigned block = ufs_fragstoblks(lastfrag);
+	unsigned new_size;
+	void *p;
+	u64 tmp;
+
+	if (writes_to < (lastfrag | uspi->s_fpbmask))
+		new_size = (writes_to & uspi->s_fpbmask) + 1;
+	else
+		new_size = uspi->s_fpb;
+
+	p = ufs_get_direct_data_ptr(uspi, ufsi, block);
+	tmp = ufs_new_fragments(inode, p, lastfrag, ufs_data_ptr_to_cpu(sb, p),
+				new_size, err, locked_page);
+	return tmp != 0;
+}
+
 /**
  * ufs_inode_getfrag() - allocate new fragment(s)
  * @inode: pointer to inode
@@ -226,13 +260,10 @@ ufs_inode_getfrag(struct inode *inode, u64 fragment,
 	struct ufs_inode_info *ufsi = UFS_I(inode);
 	struct super_block *sb = inode->i_sb;
 	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
-	unsigned blockoff, lastblockoff;
-	u64 tmp, goal, lastfrag, block, lastblock;
-	void *p, *p2;
-
-	UFSD("ENTER, ino %lu, fragment %llu, new_fragment %llu, required %u, "
-	     "metadata %d\n", inode->i_ino, (unsigned long long)fragment,
-	     (unsigned long long)new_fragment, required, !phys);
+	unsigned blockoff;
+	u64 tmp, goal, lastfrag, block;
+	unsigned nfrags = uspi->s_fpb;
+	void *p;
 
         /* TODO : to be done for write support
         if ( (flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2)
@@ -242,66 +273,27 @@ ufs_inode_getfrag(struct inode *inode, u64 fragment,
 	block = ufs_fragstoblks (fragment);
 	blockoff = ufs_fragnum (fragment);
 	p = ufs_get_direct_data_ptr(uspi, ufsi, block);
-
-	goal = 0;
-
 	tmp = ufs_data_ptr_to_cpu(sb, p);
+	if (tmp)
+		goto out;
 
 	lastfrag = ufsi->i_lastfrag;
-	if (tmp && fragment < lastfrag)
-		goto out;
 
-	lastblock = ufs_fragstoblks (lastfrag);
-	lastblockoff = ufs_fragnum (lastfrag);
-	/*
-	 * We will extend file into new block beyond last allocated block
-	 */
-	if (lastblock < block) {
-		/*
-		 * We must reallocate last allocated block
-		 */
-		if (lastblockoff) {
-			p2 = ufs_get_direct_data_ptr(uspi, ufsi, lastblock);
-			tmp = ufs_new_fragments(inode, p2, lastfrag,
-						ufs_data_ptr_to_cpu(sb, p2),
-						uspi->s_fpb - lastblockoff,
-						err, locked_page);
-			if (!tmp)
-				return 0;
-			lastfrag = ufsi->i_lastfrag;
-		}
-		tmp = ufs_data_ptr_to_cpu(sb,
-					 ufs_get_direct_data_ptr(uspi, ufsi,
-								 lastblock));
-		if (tmp)
-			goal = tmp + uspi->s_fpb;
-		tmp = ufs_new_fragments (inode, p, fragment - blockoff,
-					 goal, required + blockoff,
-					 err,
-					 phys != NULL ? locked_page : NULL);
-	} else if (lastblock == block) {
-	/*
-	 * We will extend last allocated block
-	 */
-		tmp = ufs_new_fragments(inode, p, fragment -
-					(blockoff - lastblockoff),
-					ufs_data_ptr_to_cpu(sb, p),
-					required +  (blockoff - lastblockoff),
-					err, phys != NULL ? locked_page : NULL);
-	} else /* (lastblock > block) */ {
-	/*
-	 * We will allocate new block before last allocated block
-	 */
-		if (block) {
-			tmp = ufs_data_ptr_to_cpu(sb,
-						 ufs_get_direct_data_ptr(uspi, ufsi, block - 1));
-			if (tmp)
-				goal = tmp + uspi->s_fpb;
-		}
-		tmp = ufs_new_fragments(inode, p, fragment - blockoff,
-					goal, uspi->s_fpb, err,
-					phys != NULL ? locked_page : NULL);
+	/* will that be a new tail? */
+	if (new_fragment < UFS_NDIR_FRAGMENT && new_fragment >= lastfrag)
+		nfrags = (new_fragment & uspi->s_fpbmask) + 1;
+
+	goal = 0;
+	if (block) {
+		goal = ufs_data_ptr_to_cpu(sb,
+				 ufs_get_direct_data_ptr(uspi, ufsi, block - 1));
+		if (goal)
+			goal += uspi->s_fpb;
 	}
+	tmp = ufs_new_fragments(inode, p, fragment - blockoff,
+				goal, uspi->s_fpb, err,
+				phys != NULL ? locked_page : NULL);
+
 	if (!tmp) {
 		*err = -ENOSPC;
 		return 0;
@@ -419,7 +411,6 @@ static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buff
 	unsigned long ptr,phys;
 	u64 phys64 = 0;
 	unsigned frag = fragment & uspi->s_fpbmask;
-	unsigned mask = uspi->s_apbmask >> uspi->s_fpbshift;
 
 	if (!create) {
 		phys64 = ufs_frag_map(inode, offsets, depth);
@@ -444,6 +435,17 @@ static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buff
 		goto abort_too_big;
 
 	err = 0;
+
+	if (UFS_I(inode)->i_lastfrag < UFS_NDIR_FRAGMENT) {
+		unsigned lastfrag = UFS_I(inode)->i_lastfrag;
+		unsigned tailfrags = lastfrag & uspi->s_fpbmask;
+		if (tailfrags && fragment >= lastfrag) {
+			if (!ufs_extend_tail(inode, fragment,
+					     &err, bh_result->b_page))
+				goto abort;
+		}
+	}
+
 	ptr = fragment;
 
 	if (depth == 1) {
-- 
cgit v1.2.3


From 5336970be09becb2b59ac3812718b2cb80d33347 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 19 Jun 2015 13:53:52 -0400
Subject: ufs_inode_getfrag(): pass index instead of 'fragment'

same story as with ufs_inode_getblock()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c | 50 +++++++++++++++++---------------------------------
 1 file changed, 17 insertions(+), 33 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index d652f64885fd..c05cf14ef8ff 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -242,10 +242,8 @@ ufs_extend_tail(struct inode *inode, u64 writes_to,
 /**
  * ufs_inode_getfrag() - allocate new fragment(s)
  * @inode: pointer to inode
- * @fragment: number of `fragment' which hold pointer
- *   to new allocated fragment(s)
+ * @index: number of block pointer within the inode's array.
  * @new_fragment: number of new allocated fragment(s)
- * @required: how many fragment(s) we require
  * @err: we set it if something wrong
  * @phys: pointer to where we save physical number of new allocated fragments,
  *   NULL if we allocate not data(indirect blocks for example).
@@ -253,15 +251,14 @@ ufs_extend_tail(struct inode *inode, u64 writes_to,
  * @locked_page: for ufs_new_fragments()
  */
 static u64
-ufs_inode_getfrag(struct inode *inode, u64 fragment,
-		  sector_t new_fragment, unsigned int required, int *err,
+ufs_inode_getfrag(struct inode *inode, unsigned index,
+		  sector_t new_fragment, int *err,
 		  long *phys, int *new, struct page *locked_page)
 {
 	struct ufs_inode_info *ufsi = UFS_I(inode);
 	struct super_block *sb = inode->i_sb;
 	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
-	unsigned blockoff;
-	u64 tmp, goal, lastfrag, block;
+	u64 tmp, goal, lastfrag;
 	unsigned nfrags = uspi->s_fpb;
 	void *p;
 
@@ -270,9 +267,7 @@ ufs_inode_getfrag(struct inode *inode, u64 fragment,
              goto ufs2;
          */
 
-	block = ufs_fragstoblks (fragment);
-	blockoff = ufs_fragnum (fragment);
-	p = ufs_get_direct_data_ptr(uspi, ufsi, block);
+	p = ufs_get_direct_data_ptr(uspi, ufsi, index);
 	tmp = ufs_data_ptr_to_cpu(sb, p);
 	if (tmp)
 		goto out;
@@ -284,13 +279,13 @@ ufs_inode_getfrag(struct inode *inode, u64 fragment,
 		nfrags = (new_fragment & uspi->s_fpbmask) + 1;
 
 	goal = 0;
-	if (block) {
+	if (index) {
 		goal = ufs_data_ptr_to_cpu(sb,
-				 ufs_get_direct_data_ptr(uspi, ufsi, block - 1));
+				 ufs_get_direct_data_ptr(uspi, ufsi, index - 1));
 		if (goal)
 			goal += uspi->s_fpb;
 	}
-	tmp = ufs_new_fragments(inode, p, fragment - blockoff,
+	tmp = ufs_new_fragments(inode, p, ufs_blknum(new_fragment),
 				goal, uspi->s_fpb, err,
 				phys != NULL ? locked_page : NULL);
 
@@ -408,7 +403,7 @@ static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buff
 	int ret, err, new;
 	unsigned offsets[4];
 	int depth = ufs_block_to_path(inode, fragment >> uspi->s_fpbshift, offsets);
-	unsigned long ptr,phys;
+	unsigned long phys;
 	u64 phys64 = 0;
 	unsigned frag = fragment & uspi->s_fpbmask;
 
@@ -446,38 +441,27 @@ static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buff
 		}
 	}
 
-	ptr = fragment;
-
 	if (depth == 1) {
-		phys64 = ufs_inode_getfrag(inode, ptr, fragment, 1, &err, &phys,
-					&new, bh_result->b_page);
+		phys64 = ufs_inode_getfrag(inode, offsets[0], fragment,
+					   &err, &phys, &new, bh_result->b_page);
 		if (phys64) {
 			phys64 += frag;
 			phys = phys64;
 		}
 		goto out;
 	}
-	ptr -= UFS_NDIR_FRAGMENT;
 	if (depth == 2) {
-		phys64 = ufs_inode_getfrag(inode,
-					UFS_IND_FRAGMENT + (ptr >> uspi->s_apbshift),
-					fragment, uspi->s_fpb, &err, NULL, NULL,
-					bh_result->b_page);
+		phys64 = ufs_inode_getfrag(inode, offsets[0], fragment,
+					   &err, NULL, NULL, bh_result->b_page);
 		goto get_indirect;
 	}
-	ptr -= 1 << (uspi->s_apbshift + uspi->s_fpbshift);
 	if (depth == 3) {
-		phys64 = ufs_inode_getfrag(inode,
-					UFS_DIND_FRAGMENT + (ptr >> uspi->s_2apbshift),
-					fragment, uspi->s_fpb, &err, NULL, NULL,
-					bh_result->b_page);
+		phys64 = ufs_inode_getfrag(inode, offsets[0], fragment,
+					   &err, NULL, NULL, bh_result->b_page);
 		goto get_double;
 	}
-	ptr -= 1 << (uspi->s_2apbshift + uspi->s_fpbshift);
-	phys64 = ufs_inode_getfrag(inode,
-				UFS_TIND_FRAGMENT + (ptr >> uspi->s_3apbshift),
-				fragment, uspi->s_fpb, &err, NULL, NULL,
-				bh_result->b_page);
+	phys64 = ufs_inode_getfrag(inode, offsets[0], fragment,
+				   &err, NULL, NULL, bh_result->b_page);
 	phys64 = ufs_inode_getblock(inode, phys64, offsets[1],
 				fragment, &err, NULL, NULL, NULL);
 get_double:
-- 
cgit v1.2.3


From 4eeff4c9326878ff58ef6fe68d2bf22ef877e5a2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 19 Jun 2015 14:08:16 -0400
Subject: ufs_getfrag_block(): turn following indirects into a loop

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c | 32 ++++++++------------------------
 1 file changed, 8 insertions(+), 24 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index c05cf14ef8ff..f2d8cc2166af 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -444,37 +444,21 @@ static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buff
 	if (depth == 1) {
 		phys64 = ufs_inode_getfrag(inode, offsets[0], fragment,
 					   &err, &phys, &new, bh_result->b_page);
-		if (phys64) {
-			phys64 += frag;
-			phys = phys64;
-		}
-		goto out;
-	}
-	if (depth == 2) {
-		phys64 = ufs_inode_getfrag(inode, offsets[0], fragment,
-					   &err, NULL, NULL, bh_result->b_page);
-		goto get_indirect;
-	}
-	if (depth == 3) {
+	} else {
+		int i;
 		phys64 = ufs_inode_getfrag(inode, offsets[0], fragment,
 					   &err, NULL, NULL, bh_result->b_page);
-		goto get_double;
+		for (i = 1; i < depth - 1; i++)
+			phys64 = ufs_inode_getblock(inode, phys64, offsets[i],
+						fragment, &err, NULL, NULL, NULL);
+		phys64 = ufs_inode_getblock(inode, phys64, offsets[depth - 1],
+					fragment, &err, &phys, &new, bh_result->b_page);
 	}
-	phys64 = ufs_inode_getfrag(inode, offsets[0], fragment,
-				   &err, NULL, NULL, bh_result->b_page);
-	phys64 = ufs_inode_getblock(inode, phys64, offsets[1],
-				fragment, &err, NULL, NULL, NULL);
-get_double:
-	phys64 = ufs_inode_getblock(inode, phys64, offsets[depth - 2],
-				fragment, &err, NULL, NULL, NULL);
-get_indirect:
-	phys64 = ufs_inode_getblock(inode, phys64, offsets[depth - 1],
-				fragment, &err, &phys, &new, bh_result->b_page);
+out:
 	if (phys64) {
 		phys64 += frag;
 		phys = phys64;
 	}
-out:
 	if (err)
 		goto abort;
 	if (new)
-- 
cgit v1.2.3


From 5fbfb238f7a0a5c4633438eb5bdfb4810995c76a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 19 Jun 2015 14:10:53 -0400
Subject: ufs_inode_getblock(): failure to read an indirect block is -EIO

... and not "write to beginning of the disk", TYVM...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index f2d8cc2166af..ed70147e1cb4 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -349,8 +349,10 @@ ufs_inode_getblock(struct inode *inode, u64 ind_block,
 		return 0;
 
 	bh = sb_bread(sb, ind_block + (index >> shift));
-	if (unlikely(!bh))
+	if (unlikely(!bh)) {
+		*err = -EIO;
 		return 0;
+	}
 
 	index &= uspi->s_apbmask >> uspi->s_fpbshift;
 	if (uspi->fs_magic == UFS2_MAGIC)
@@ -454,7 +456,6 @@ static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buff
 		phys64 = ufs_inode_getblock(inode, phys64, offsets[depth - 1],
 					fragment, &err, &phys, &new, bh_result->b_page);
 	}
-out:
 	if (phys64) {
 		phys64 += frag;
 		phys = phys64;
-- 
cgit v1.2.3


From 0385f1f9e3e5cb17047474037002500383237f47 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 19 Jun 2015 14:20:21 -0400
Subject: ufs_getfrag_block(): tidy up a bit

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c | 48 +++++++++++++++---------------------------------
 1 file changed, 15 insertions(+), 33 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index ed70147e1cb4..7f551b3e3ba4 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -398,40 +398,30 @@ out:
 
 static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create)
 {
-	struct super_block * sb = inode->i_sb;
-	struct ufs_sb_info * sbi = UFS_SB(sb);
-	struct ufs_sb_private_info * uspi = sbi->s_uspi;
-	struct buffer_head * bh;
-	int ret, err, new;
+	struct super_block *sb = inode->i_sb;
+	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
+	int err = 0, new = 0;
 	unsigned offsets[4];
 	int depth = ufs_block_to_path(inode, fragment >> uspi->s_fpbshift, offsets);
-	unsigned long phys;
 	u64 phys64 = 0;
+	unsigned long phys;
 	unsigned frag = fragment & uspi->s_fpbmask;
 
 	if (!create) {
 		phys64 = ufs_frag_map(inode, offsets, depth);
-		if (phys64) {
-			phys64 += frag;
-			map_bh(bh_result, sb, phys64);
-		}
-		return 0;
+		goto out;
 	}
 
         /* This code entered only while writing ....? */
 
-	err = -EIO;
-	new = 0;
-	ret = 0;
-	bh = NULL;
-
 	mutex_lock(&UFS_I(inode)->truncate_mutex);
 
 	UFSD("ENTER, ino %lu, fragment %llu\n", inode->i_ino, (unsigned long long)fragment);
-	if (!depth)
-		goto abort_too_big;
-
-	err = 0;
+	if (unlikely(!depth)) {
+		ufs_warning(sb, "ufs_get_block", "block > big");
+		err = -EIO;
+		goto out;
+	}
 
 	if (UFS_I(inode)->i_lastfrag < UFS_NDIR_FRAGMENT) {
 		unsigned lastfrag = UFS_I(inode)->i_lastfrag;
@@ -439,7 +429,7 @@ static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buff
 		if (tailfrags && fragment >= lastfrag) {
 			if (!ufs_extend_tail(inode, fragment,
 					     &err, bh_result->b_page))
-				goto abort;
+				goto out;
 		}
 	}
 
@@ -456,23 +446,15 @@ static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buff
 		phys64 = ufs_inode_getblock(inode, phys64, offsets[depth - 1],
 					fragment, &err, &phys, &new, bh_result->b_page);
 	}
+out:
 	if (phys64) {
 		phys64 += frag;
-		phys = phys64;
+		map_bh(bh_result, sb, phys64);
+		if (new)
+			set_buffer_new(bh_result);
 	}
-	if (err)
-		goto abort;
-	if (new)
-		set_buffer_new(bh_result);
-	map_bh(bh_result, sb, phys);
-abort:
 	mutex_unlock(&UFS_I(inode)->truncate_mutex);
-
 	return err;
-
-abort_too_big:
-	ufs_warning(sb, "ufs_get_block", "block > big");
-	goto abort;
 }
 
 static int ufs_writepage(struct page *page, struct writeback_control *wbc)
-- 
cgit v1.2.3


From 4e317ce73aecb735f389ab0d42ae3197a55265e4 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 19 Jun 2015 14:27:10 -0400
Subject: ufs_inode_get{frag,block}(): get rid of 'phys' argument

Just pass NULL as locked_page in case of first block in the indirect
chain.  Old calling conventions aside, a reason for having 'phys'
was that ufs_inode_getfrag() used to be able to do _two_ allocations
- indirect block and extending/reallocating a tail.  We needed
locked_page for the latter (it's a data), but we also needed to
figure out that indirect block is metadata.  So we used to pass
non-NULL locked_page in all cases *and* used NULL phys as
indication of being asked to allocate an indirect.

With tail unpacking taken into a separate function we don't need
those convolutions anymore.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/inode.c | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 7f551b3e3ba4..a064cf44b143 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -245,15 +245,13 @@ ufs_extend_tail(struct inode *inode, u64 writes_to,
  * @index: number of block pointer within the inode's array.
  * @new_fragment: number of new allocated fragment(s)
  * @err: we set it if something wrong
- * @phys: pointer to where we save physical number of new allocated fragments,
- *   NULL if we allocate not data(indirect blocks for example).
  * @new: we set it if we allocate new block
  * @locked_page: for ufs_new_fragments()
  */
 static u64
 ufs_inode_getfrag(struct inode *inode, unsigned index,
 		  sector_t new_fragment, int *err,
-		  long *phys, int *new, struct page *locked_page)
+		  int *new, struct page *locked_page)
 {
 	struct ufs_inode_info *ufsi = UFS_I(inode);
 	struct super_block *sb = inode->i_sb;
@@ -286,18 +284,15 @@ ufs_inode_getfrag(struct inode *inode, unsigned index,
 			goal += uspi->s_fpb;
 	}
 	tmp = ufs_new_fragments(inode, p, ufs_blknum(new_fragment),
-				goal, uspi->s_fpb, err,
-				phys != NULL ? locked_page : NULL);
+				goal, uspi->s_fpb, err, locked_page);
 
 	if (!tmp) {
 		*err = -ENOSPC;
 		return 0;
 	}
 
-	if (phys) {
-		*err = 0;
+	if (new)
 		*new = 1;
-	}
 	inode->i_ctime = CURRENT_TIME_SEC;
 	if (IS_SYNC(inode))
 		ufs_sync_inode (inode);
@@ -329,14 +324,13 @@ repeat2:
  * @new_fragment: number of new allocated fragment
  *  (block will hold this fragment and also uspi->s_fpb-1)
  * @err: see ufs_inode_getfrag()
- * @phys: see ufs_inode_getfrag()
  * @new: see ufs_inode_getfrag()
  * @locked_page: see ufs_inode_getfrag()
  */
 static u64
 ufs_inode_getblock(struct inode *inode, u64 ind_block,
 		  unsigned index, sector_t new_fragment, int *err,
-		  long *phys, int *new, struct page *locked_page)
+		  int *new, struct page *locked_page)
 {
 	struct super_block *sb = inode->i_sb;
 	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
@@ -404,7 +398,6 @@ static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buff
 	unsigned offsets[4];
 	int depth = ufs_block_to_path(inode, fragment >> uspi->s_fpbshift, offsets);
 	u64 phys64 = 0;
-	unsigned long phys;
 	unsigned frag = fragment & uspi->s_fpbmask;
 
 	if (!create) {
@@ -435,16 +428,16 @@ static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buff
 
 	if (depth == 1) {
 		phys64 = ufs_inode_getfrag(inode, offsets[0], fragment,
-					   &err, &phys, &new, bh_result->b_page);
+					   &err, &new, bh_result->b_page);
 	} else {
 		int i;
 		phys64 = ufs_inode_getfrag(inode, offsets[0], fragment,
-					   &err, NULL, NULL, bh_result->b_page);
+					   &err, NULL, NULL);
 		for (i = 1; i < depth - 1; i++)
 			phys64 = ufs_inode_getblock(inode, phys64, offsets[i],
-						fragment, &err, NULL, NULL, NULL);
+						fragment, &err, NULL, NULL);
 		phys64 = ufs_inode_getblock(inode, phys64, offsets[depth - 1],
-					fragment, &err, &phys, &new, bh_result->b_page);
+					fragment, &err, &new, bh_result->b_page);
 	}
 out:
 	if (phys64) {
-- 
cgit v1.2.3


From bee9182d955227f01ff3b80c4cb6acca9bb40b11 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sun, 19 Jul 2015 23:48:20 +0200
Subject: introduce __sb_writers_{acquired,release}() helpers

Preparation to hide the sb->s_writers internals from xfs and btrfs.
Add 2 trivial define's they can use rather than play with ->s_writers
directly. No changes in btrfs/transaction.o and xfs/xfs_aops.o.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Jan Kara <jack@suse.com>
---
 fs/btrfs/transaction.c | 8 ++------
 fs/xfs/xfs_aops.c      | 6 ++----
 include/linux/fs.h     | 5 +++++
 3 files changed, 9 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index f5021fcb154e..a8ab8f5ef38e 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1638,9 +1638,7 @@ static void do_async_commit(struct work_struct *work)
 	 * Tell lockdep about it.
 	 */
 	if (ac->newtrans->type & __TRANS_FREEZABLE)
-		rwsem_acquire_read(
-		     &ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
-		     0, 1, _THIS_IP_);
+		__sb_writers_acquired(ac->root->fs_info->sb, SB_FREEZE_FS);
 
 	current->journal_info = ac->newtrans;
 
@@ -1679,9 +1677,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
 	 * async commit thread will be the one to unlock it.
 	 */
 	if (ac->newtrans->type & __TRANS_FREEZABLE)
-		rwsem_release(
-			&root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
-			1, _THIS_IP_);
+		__sb_writers_release(root->fs_info->sb, SB_FREEZE_FS);
 
 	schedule_work(&ac->work);
 
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 3859f5e27a4d..9bbb3507376a 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -119,8 +119,7 @@ xfs_setfilesize_trans_alloc(
 	 * We may pass freeze protection with a transaction.  So tell lockdep
 	 * we released it.
 	 */
-	rwsem_release(&ioend->io_inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1],
-		      1, _THIS_IP_);
+	__sb_writers_release(ioend->io_inode->i_sb, SB_FREEZE_FS);
 	/*
 	 * We hand off the transaction to the completion thread now, so
 	 * clear the flag here.
@@ -171,8 +170,7 @@ xfs_setfilesize_ioend(
 	 * Similarly for freeze protection.
 	 */
 	current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
-	rwsem_acquire_read(&VFS_I(ip)->i_sb->s_writers.lock_map[SB_FREEZE_FS-1],
-			   0, 1, _THIS_IP_);
+	__sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS);
 
 	return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size);
 }
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 84b783f277f7..acb7cad84edd 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1391,6 +1391,11 @@ extern struct timespec current_fs_time(struct super_block *sb);
 void __sb_end_write(struct super_block *sb, int level);
 int __sb_start_write(struct super_block *sb, int level, bool wait);
 
+#define __sb_writers_acquired(sb, lev)	\
+	rwsem_acquire_read(&(sb)->s_writers.lock_map[(lev)-1], 0, 1, _THIS_IP_)
+#define __sb_writers_release(sb, lev)	\
+	rwsem_release(&(sb)->s_writers.lock_map[(lev)-1], 1, _THIS_IP_)
+
 /**
  * sb_end_write - drop write access to a superblock
  * @sb: the super we wrote to
-- 
cgit v1.2.3


From f4b554af9931585174d4913b482eacab75858964 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 20 Jul 2015 00:50:55 +0200
Subject: fix the broken lockdep logic in __sb_start_write()

1. wait_event(frozen < level) without rwsem_acquire_read() is just
   wrong from lockdep perspective. If we are going to deadlock
   because the caller is buggy, lockdep can't detect this problem.

2. __sb_start_write() can race with thaw_super() + freeze_super(),
   and after "goto retry" the 2nd  acquire_freeze_lock() is wrong.

3. The "tell lockdep we are doing trylock" hack doesn't look nice.

   I think this is correct, but this logic should be more explicit.
   Yes, the recursive read_lock() is fine if we hold the lock on a
   higher level. But we do not need to fool lockdep. If we can not
   deadlock in this case then try-lock must not fail and we can use
   use wait == F throughout this code.

Note: as Dave Chinner explains, the "trylock" hack and the fat comment
can be probably removed. But this needs a separate change and it will
be trivial: just kill __sb_start_write() and rename do_sb_start_write()
back to __sb_start_write().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Jan Kara <jack@suse.com>
---
 fs/super.c | 73 ++++++++++++++++++++++++++++++++++----------------------------
 1 file changed, 40 insertions(+), 33 deletions(-)

(limited to 'fs')

diff --git a/fs/super.c b/fs/super.c
index b61372354f2b..24a76bcd62a5 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1158,38 +1158,11 @@ void __sb_end_write(struct super_block *sb, int level)
 }
 EXPORT_SYMBOL(__sb_end_write);
 
-#ifdef CONFIG_LOCKDEP
-/*
- * We want lockdep to tell us about possible deadlocks with freezing but
- * it's it bit tricky to properly instrument it. Getting a freeze protection
- * works as getting a read lock but there are subtle problems. XFS for example
- * gets freeze protection on internal level twice in some cases, which is OK
- * only because we already hold a freeze protection also on higher level. Due
- * to these cases we have to tell lockdep we are doing trylock when we
- * already hold a freeze protection for a higher freeze level.
- */
-static void acquire_freeze_lock(struct super_block *sb, int level, bool trylock,
+static int do_sb_start_write(struct super_block *sb, int level, bool wait,
 				unsigned long ip)
 {
-	int i;
-
-	if (!trylock) {
-		for (i = 0; i < level - 1; i++)
-			if (lock_is_held(&sb->s_writers.lock_map[i])) {
-				trylock = true;
-				break;
-			}
-	}
-	rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, trylock, ip);
-}
-#endif
-
-/*
- * This is an internal function, please use sb_start_{write,pagefault,intwrite}
- * instead.
- */
-int __sb_start_write(struct super_block *sb, int level, bool wait)
-{
+	if (wait)
+		rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, 0, ip);
 retry:
 	if (unlikely(sb->s_writers.frozen >= level)) {
 		if (!wait)
@@ -1198,9 +1171,6 @@ retry:
 			   sb->s_writers.frozen < level);
 	}
 
-#ifdef CONFIG_LOCKDEP
-	acquire_freeze_lock(sb, level, !wait, _RET_IP_);
-#endif
 	percpu_counter_inc(&sb->s_writers.counter[level-1]);
 	/*
 	 * Make sure counter is updated before we check for frozen.
@@ -1211,8 +1181,45 @@ retry:
 		__sb_end_write(sb, level);
 		goto retry;
 	}
+
+	if (!wait)
+		rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, 1, ip);
 	return 1;
 }
+
+/*
+ * This is an internal function, please use sb_start_{write,pagefault,intwrite}
+ * instead.
+ */
+int __sb_start_write(struct super_block *sb, int level, bool wait)
+{
+	bool force_trylock = false;
+	int ret;
+
+#ifdef CONFIG_LOCKDEP
+	/*
+	 * We want lockdep to tell us about possible deadlocks with freezing
+	 * but it's it bit tricky to properly instrument it. Getting a freeze
+	 * protection works as getting a read lock but there are subtle
+	 * problems. XFS for example gets freeze protection on internal level
+	 * twice in some cases, which is OK only because we already hold a
+	 * freeze protection also on higher level. Due to these cases we have
+	 * to use wait == F (trylock mode) which must not fail.
+	 */
+	if (wait) {
+		int i;
+
+		for (i = 0; i < level - 1; i++)
+			if (lock_is_held(&sb->s_writers.lock_map[i])) {
+				force_trylock = true;
+				break;
+			}
+	}
+#endif
+	ret = do_sb_start_write(sb, level, wait && !force_trylock, _RET_IP_);
+	WARN_ON(force_trylock & !ret);
+	return ret;
+}
 EXPORT_SYMBOL(__sb_start_write);
 
 /**
-- 
cgit v1.2.3


From 0e28e01f1e73015d8e1b8fa1cda071d0bd9a2600 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 11 Aug 2015 16:28:29 +0200
Subject: document rwsem_release() in sb_wait_write()

Not only we need to avoid the warning from lockdep_sys_exit(), the
caller of freeze_super() can never release this lock. Another thread
can do this, so there is another reason for rwsem_release().

Plus the comment should explain why we have to fool lockdep.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Jan Kara <jack@suse.com>
---
 fs/super.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/super.c b/fs/super.c
index 24a76bcd62a5..8aa3cbc571d1 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1236,11 +1236,17 @@ static void sb_wait_write(struct super_block *sb, int level)
 {
 	s64 writers;
 
+	rwsem_acquire(&sb->s_writers.lock_map[level-1], 0, 0, _THIS_IP_);
 	/*
-	 * We just cycle-through lockdep here so that it does not complain
-	 * about returning with lock to userspace
+	 * We are going to return to userspace and forget about this lock, the
+	 * ownership goes to the caller of thaw_super() which does unlock.
+	 *
+	 * FIXME: we should do this before return from freeze_super() after we
+	 * called sync_filesystem(sb) and s_op->freeze_fs(sb), and thaw_super()
+	 * should re-acquire these locks before s_op->unfreeze_fs(sb). However
+	 * this leads to lockdep false-positives, so currently we do the early
+	 * release right after acquire.
 	 */
-	rwsem_acquire(&sb->s_writers.lock_map[level-1], 0, 0, _THIS_IP_);
 	rwsem_release(&sb->s_writers.lock_map[level-1], 1, _THIS_IP_);
 
 	do {
-- 
cgit v1.2.3


From 853b39a7c82826b8413048feec7bf08e98ce7a84 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 22 Jul 2015 20:21:13 +0200
Subject: shift percpu_counter_destroy() into destroy_super_work()

Of course, this patch is ugly as hell. It will be (partially)
reverted later. We add it to ensure that other WIP changes in
percpu_rw_semaphore won't break fs/super.c.

We do not even need this change right now, percpu_free_rwsem()
is fine in atomic context. But we are going to change this, it
will be might_sleep() after we merge the rcu_sync() patches.

And even after that we do not really need destroy_super_work(),
we will kill it in any case. Instead, destroy_super_rcu() should
just check that rss->cb_state == CB_IDLE and do call_rcu() again
in the (very unlikely) case this is not true.

So this is just the temporary kludge which helps us to avoid the
conflicts with the changes which will be (hopefully) routed via
rcu tree.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Jan Kara <jack@suse.com>
---
 fs/super.c         | 23 +++++++++++++++++++----
 include/linux/fs.h |  3 ++-
 2 files changed, 21 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/super.c b/fs/super.c
index 8aa3cbc571d1..c937bd7b4d33 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -135,6 +135,24 @@ static unsigned long super_cache_count(struct shrinker *shrink,
 	return total_objects;
 }
 
+static void destroy_super_work(struct work_struct *work)
+{
+	struct super_block *s = container_of(work, struct super_block,
+							destroy_work);
+	int i;
+
+	for (i = 0; i < SB_FREEZE_LEVELS; i++)
+		percpu_counter_destroy(&s->s_writers.counter[i]);
+	kfree(s);
+}
+
+static void destroy_super_rcu(struct rcu_head *head)
+{
+	struct super_block *s = container_of(head, struct super_block, rcu);
+	INIT_WORK(&s->destroy_work, destroy_super_work);
+	schedule_work(&s->destroy_work);
+}
+
 /**
  *	destroy_super	-	frees a superblock
  *	@s: superblock to free
@@ -143,16 +161,13 @@ static unsigned long super_cache_count(struct shrinker *shrink,
  */
 static void destroy_super(struct super_block *s)
 {
-	int i;
 	list_lru_destroy(&s->s_dentry_lru);
 	list_lru_destroy(&s->s_inode_lru);
-	for (i = 0; i < SB_FREEZE_LEVELS; i++)
-		percpu_counter_destroy(&s->s_writers.counter[i]);
 	security_sb_free(s);
 	WARN_ON(!list_empty(&s->s_mounts));
 	kfree(s->s_subtype);
 	kfree(s->s_options);
-	kfree_rcu(s, rcu);
+	call_rcu(&s->rcu, destroy_super_rcu);
 }
 
 /**
diff --git a/include/linux/fs.h b/include/linux/fs.h
index acb7cad84edd..4bed78966c6b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -30,6 +30,7 @@
 #include <linux/lockdep.h>
 #include <linux/percpu-rwsem.h>
 #include <linux/blk_types.h>
+#include <linux/workqueue.h>
 
 #include <asm/byteorder.h>
 #include <uapi/linux/fs.h>
@@ -1375,7 +1376,7 @@ struct super_block {
 	struct list_lru		s_dentry_lru ____cacheline_aligned_in_smp;
 	struct list_lru		s_inode_lru ____cacheline_aligned_in_smp;
 	struct rcu_head		rcu;
-
+	struct work_struct	destroy_work;
 	/*
 	 * Indicates how deep in a filesystem stack this SB is
 	 */
-- 
cgit v1.2.3


From 8129ed29644bf56ed17ec1bbbeed5c568b43d6a0 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 11 Aug 2015 17:05:04 +0200
Subject: change sb_writers to use percpu_rw_semaphore

We can remove everything from struct sb_writers except frozen
and add the array of percpu_rw_semaphore's instead.

This patch doesn't remove sb_writers->wait_unfrozen yet, we keep
it for get_super_thawed(). We will probably remove it later.

This change tries to address the following problems:

	- Firstly, __sb_start_write() looks simply buggy. It does
	  __sb_end_write() if it sees ->frozen, but if it migrates
	  to another CPU before percpu_counter_dec(), sb_wait_write()
	  can wrongly succeed if there is another task which holds
	  the same "semaphore": sb_wait_write() can miss the result
	  of the previous percpu_counter_inc() but see the result
	  of this percpu_counter_dec().

	- As Dave Hansen reports, it is suboptimal. The trivial
	  microbenchmark that writes to a tmpfs file in a loop runs
	  12% faster if we change this code to rely on RCU and kill
	  the memory barriers.

	- This code doesn't look simple. It would be better to rely
	  on the generic locking code.

	  According to Dave, this change adds the same performance
	  improvement.

Note: with this change both freeze_super() and thaw_super() will do
synchronize_sched_expedited() 3 times. This is just ugly. But:

	- This will be "fixed" by the rcu_sync changes we are going
	  to merge. After that freeze_super()->percpu_down_write()
	  will use synchronize_sched(), and thaw_super() won't use
	  synchronize() at all.

	  This doesn't need any changes in fs/super.c.

	- Once we merge rcu_sync changes, we can also change super.c
	  so that all wb_write->rw_sem's will share the single ->rss
	  in struct sb_writes, then freeze_super() will need only one
	  synchronize_sched().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Jan Kara <jack@suse.com>
---
 fs/super.c         | 111 +++++++++++++++--------------------------------------
 include/linux/fs.h |  19 +++------
 2 files changed, 36 insertions(+), 94 deletions(-)

(limited to 'fs')

diff --git a/fs/super.c b/fs/super.c
index c937bd7b4d33..767b1e10f6ad 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -142,7 +142,7 @@ static void destroy_super_work(struct work_struct *work)
 	int i;
 
 	for (i = 0; i < SB_FREEZE_LEVELS; i++)
-		percpu_counter_destroy(&s->s_writers.counter[i]);
+		percpu_free_rwsem(&s->s_writers.rw_sem[i]);
 	kfree(s);
 }
 
@@ -193,13 +193,11 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
 		goto fail;
 
 	for (i = 0; i < SB_FREEZE_LEVELS; i++) {
-		if (percpu_counter_init(&s->s_writers.counter[i], 0,
-					GFP_KERNEL) < 0)
+		if (__percpu_init_rwsem(&s->s_writers.rw_sem[i],
+					sb_writers_name[i],
+					&type->s_writers_key[i]))
 			goto fail;
-		lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i],
-				 &type->s_writers_key[i], 0);
 	}
-	init_waitqueue_head(&s->s_writers.wait);
 	init_waitqueue_head(&s->s_writers.wait_unfrozen);
 	s->s_bdi = &noop_backing_dev_info;
 	s->s_flags = flags;
@@ -1161,47 +1159,10 @@ out:
  */
 void __sb_end_write(struct super_block *sb, int level)
 {
-	percpu_counter_dec(&sb->s_writers.counter[level-1]);
-	/*
-	 * Make sure s_writers are updated before we wake up waiters in
-	 * freeze_super().
-	 */
-	smp_mb();
-	if (waitqueue_active(&sb->s_writers.wait))
-		wake_up(&sb->s_writers.wait);
-	rwsem_release(&sb->s_writers.lock_map[level-1], 1, _RET_IP_);
+	percpu_up_read(sb->s_writers.rw_sem + level-1);
 }
 EXPORT_SYMBOL(__sb_end_write);
 
-static int do_sb_start_write(struct super_block *sb, int level, bool wait,
-				unsigned long ip)
-{
-	if (wait)
-		rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, 0, ip);
-retry:
-	if (unlikely(sb->s_writers.frozen >= level)) {
-		if (!wait)
-			return 0;
-		wait_event(sb->s_writers.wait_unfrozen,
-			   sb->s_writers.frozen < level);
-	}
-
-	percpu_counter_inc(&sb->s_writers.counter[level-1]);
-	/*
-	 * Make sure counter is updated before we check for frozen.
-	 * freeze_super() first sets frozen and then checks the counter.
-	 */
-	smp_mb();
-	if (unlikely(sb->s_writers.frozen >= level)) {
-		__sb_end_write(sb, level);
-		goto retry;
-	}
-
-	if (!wait)
-		rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, 1, ip);
-	return 1;
-}
-
 /*
  * This is an internal function, please use sb_start_{write,pagefault,intwrite}
  * instead.
@@ -1209,7 +1170,7 @@ retry:
 int __sb_start_write(struct super_block *sb, int level, bool wait)
 {
 	bool force_trylock = false;
-	int ret;
+	int ret = 1;
 
 #ifdef CONFIG_LOCKDEP
 	/*
@@ -1225,13 +1186,17 @@ int __sb_start_write(struct super_block *sb, int level, bool wait)
 		int i;
 
 		for (i = 0; i < level - 1; i++)
-			if (lock_is_held(&sb->s_writers.lock_map[i])) {
+			if (percpu_rwsem_is_held(sb->s_writers.rw_sem + i)) {
 				force_trylock = true;
 				break;
 			}
 	}
 #endif
-	ret = do_sb_start_write(sb, level, wait && !force_trylock, _RET_IP_);
+	if (wait && !force_trylock)
+		percpu_down_read(sb->s_writers.rw_sem + level-1);
+	else
+		ret = percpu_down_read_trylock(sb->s_writers.rw_sem + level-1);
+
 	WARN_ON(force_trylock & !ret);
 	return ret;
 }
@@ -1243,15 +1208,11 @@ EXPORT_SYMBOL(__sb_start_write);
  * @level: type of writers we wait for (normal vs page fault)
  *
  * This function waits until there are no writers of given type to given file
- * system. Caller of this function should make sure there can be no new writers
- * of type @level before calling this function. Otherwise this function can
- * livelock.
+ * system.
  */
 static void sb_wait_write(struct super_block *sb, int level)
 {
-	s64 writers;
-
-	rwsem_acquire(&sb->s_writers.lock_map[level-1], 0, 0, _THIS_IP_);
+	percpu_down_write(sb->s_writers.rw_sem + level-1);
 	/*
 	 * We are going to return to userspace and forget about this lock, the
 	 * ownership goes to the caller of thaw_super() which does unlock.
@@ -1262,24 +1223,18 @@ static void sb_wait_write(struct super_block *sb, int level)
 	 * this leads to lockdep false-positives, so currently we do the early
 	 * release right after acquire.
 	 */
-	rwsem_release(&sb->s_writers.lock_map[level-1], 1, _THIS_IP_);
-
-	do {
-		DEFINE_WAIT(wait);
+	percpu_rwsem_release(sb->s_writers.rw_sem + level-1, 0, _THIS_IP_);
+}
 
-		/*
-		 * We use a barrier in prepare_to_wait() to separate setting
-		 * of frozen and checking of the counter
-		 */
-		prepare_to_wait(&sb->s_writers.wait, &wait,
-				TASK_UNINTERRUPTIBLE);
+static void sb_freeze_unlock(struct super_block *sb)
+{
+	int level;
 
-		writers = percpu_counter_sum(&sb->s_writers.counter[level-1]);
-		if (writers)
-			schedule();
+	for (level = 0; level < SB_FREEZE_LEVELS; ++level)
+		percpu_rwsem_acquire(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
 
-		finish_wait(&sb->s_writers.wait, &wait);
-	} while (writers);
+	for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
+		percpu_up_write(sb->s_writers.rw_sem + level);
 }
 
 /**
@@ -1338,20 +1293,14 @@ int freeze_super(struct super_block *sb)
 		return 0;
 	}
 
-	/* From now on, no new normal writers can start */
 	sb->s_writers.frozen = SB_FREEZE_WRITE;
-	smp_wmb();
-
 	/* Release s_umount to preserve sb_start_write -> s_umount ordering */
 	up_write(&sb->s_umount);
-
 	sb_wait_write(sb, SB_FREEZE_WRITE);
+	down_write(&sb->s_umount);
 
 	/* Now we go and block page faults... */
-	down_write(&sb->s_umount);
 	sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;
-	smp_wmb();
-
 	sb_wait_write(sb, SB_FREEZE_PAGEFAULT);
 
 	/* All writers are done so after syncing there won't be dirty data */
@@ -1359,7 +1308,6 @@ int freeze_super(struct super_block *sb)
 
 	/* Now wait for internal filesystem counter */
 	sb->s_writers.frozen = SB_FREEZE_FS;
-	smp_wmb();
 	sb_wait_write(sb, SB_FREEZE_FS);
 
 	if (sb->s_op->freeze_fs) {
@@ -1368,7 +1316,7 @@ int freeze_super(struct super_block *sb)
 			printk(KERN_ERR
 				"VFS:Filesystem freeze failed\n");
 			sb->s_writers.frozen = SB_UNFROZEN;
-			smp_wmb();
+			sb_freeze_unlock(sb);
 			wake_up(&sb->s_writers.wait_unfrozen);
 			deactivate_locked_super(sb);
 			return ret;
@@ -1400,8 +1348,10 @@ int thaw_super(struct super_block *sb)
 		return -EINVAL;
 	}
 
-	if (sb->s_flags & MS_RDONLY)
+	if (sb->s_flags & MS_RDONLY) {
+		sb->s_writers.frozen = SB_UNFROZEN;
 		goto out;
+	}
 
 	if (sb->s_op->unfreeze_fs) {
 		error = sb->s_op->unfreeze_fs(sb);
@@ -1413,12 +1363,11 @@ int thaw_super(struct super_block *sb)
 		}
 	}
 
-out:
 	sb->s_writers.frozen = SB_UNFROZEN;
-	smp_wmb();
+	sb_freeze_unlock(sb);
+out:
 	wake_up(&sb->s_writers.wait_unfrozen);
 	deactivate_locked_super(sb);
-
 	return 0;
 }
 EXPORT_SYMBOL(thaw_super);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4bed78966c6b..ce356f66cc2a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1,7 +1,6 @@
 #ifndef _LINUX_FS_H
 #define _LINUX_FS_H
 
-
 #include <linux/linkage.h>
 #include <linux/wait.h>
 #include <linux/kdev_t.h>
@@ -31,6 +30,7 @@
 #include <linux/percpu-rwsem.h>
 #include <linux/blk_types.h>
 #include <linux/workqueue.h>
+#include <linux/percpu-rwsem.h>
 
 #include <asm/byteorder.h>
 #include <uapi/linux/fs.h>
@@ -1275,16 +1275,9 @@ enum {
 #define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1)
 
 struct sb_writers {
-	/* Counters for counting writers at each level */
-	struct percpu_counter	counter[SB_FREEZE_LEVELS];
-	wait_queue_head_t	wait;		/* queue for waiting for
-						   writers / faults to finish */
-	int			frozen;		/* Is sb frozen? */
-	wait_queue_head_t	wait_unfrozen;	/* queue for waiting for
-						   sb to be thawed */
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	struct lockdep_map	lock_map[SB_FREEZE_LEVELS];
-#endif
+	int				frozen;		/* Is sb frozen? */
+	wait_queue_head_t		wait_unfrozen;	/* for get_super_thawed() */
+	struct percpu_rw_semaphore	rw_sem[SB_FREEZE_LEVELS];
 };
 
 struct super_block {
@@ -1393,9 +1386,9 @@ void __sb_end_write(struct super_block *sb, int level);
 int __sb_start_write(struct super_block *sb, int level, bool wait);
 
 #define __sb_writers_acquired(sb, lev)	\
-	rwsem_acquire_read(&(sb)->s_writers.lock_map[(lev)-1], 0, 1, _THIS_IP_)
+	percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
 #define __sb_writers_release(sb, lev)	\
-	rwsem_release(&(sb)->s_writers.lock_map[(lev)-1], 1, _THIS_IP_)
+	percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
 
 /**
  * sb_end_write - drop write access to a superblock
-- 
cgit v1.2.3


From d353d7587d02116b9732d5c06615aed75a4d3a47 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Wed, 4 Mar 2015 11:16:36 -0500
Subject: writeback: plug writeback at a high level

Doing writeback on lots of little files causes terrible IOPS storms
because of the per-mapping writeback plugging we do. This
essentially causes imeediate dispatch of IO for each mapping,
regardless of the context in which writeback is occurring.

IOWs, running a concurrent write-lots-of-small 4k files using fsmark
on XFS results in a huge number of IOPS being issued for data
writes.  Metadata writes are sorted and plugged at a high level by
XFS, so aggregate nicely into large IOs. However, data writeback IOs
are dispatched in individual 4k IOs, even when the blocks of two
consecutively written files are adjacent.

Test VM: 8p, 8GB RAM, 4xSSD in RAID0, 100TB sparse XFS filesystem,
metadata CRCs enabled.

Kernel: 3.10-rc5 + xfsdev + my 3.11 xfs queue (~70 patches)

Test:

$ ./fs_mark  -D  10000  -S0  -n  10000  -s  4096  -L  120  -d
/mnt/scratch/0  -d  /mnt/scratch/1  -d  /mnt/scratch/2  -d
/mnt/scratch/3  -d  /mnt/scratch/4  -d  /mnt/scratch/5  -d
/mnt/scratch/6  -d  /mnt/scratch/7

Result:

		wall	sys	create rate	Physical write IO
		time	CPU	(avg files/s)	 IOPS	Bandwidth
		-----	-----	------------	------	---------
unpatched	6m56s	15m47s	24,000+/-500	26,000	130MB/s
patched		5m06s	13m28s	32,800+/-600	 1,500	180MB/s
improvement	-26.44%	-14.68%	  +36.67%	-94.23%	+38.46%

If I use zero length files, this workload at about 500 IOPS, so
plugging drops the data IOs from roughly 25,500/s to 1000/s.
3 lines of code, 35% better throughput for 15% less CPU.

The benefits of plugging at this layer are likely to be higher for
spinning media as the IO patterns for this workload are going make a
much bigger difference on high IO latency devices.....

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Josef Bacik <jbacik@fb.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Tested-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/fs-writeback.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 518c6294bf6c..d98e37bbf417 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1439,7 +1439,9 @@ static long writeback_sb_inodes(struct super_block *sb,
 	unsigned long start_time = jiffies;
 	long write_chunk;
 	long wrote = 0;  /* count both pages and inodes */
+	struct blk_plug plug;
 
+	blk_start_plug(&plug);
 	while (!list_empty(&wb->b_io)) {
 		struct inode *inode = wb_inode(wb->b_io.prev);
 
@@ -1537,6 +1539,7 @@ static long writeback_sb_inodes(struct super_block *sb,
 				break;
 		}
 	}
+	blk_finish_plug(&plug);
 	return wrote;
 }
 
-- 
cgit v1.2.3


From 74278da9f70d84d715601fe794567a6d2bfdf078 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Wed, 4 Mar 2015 12:37:22 -0500
Subject: inode: convert inode_sb_list_lock to per-sb

The process of reducing contention on per-superblock inode lists
starts with moving the locking to match the per-superblock inode
list. This takes the global lock out of the picture and reduces the
contention problems to within a single filesystem. This doesn't get
rid of contention as the locks still have global CPU scope, but it
does isolate operations on different superblocks form each other.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Josef Bacik <jbacik@fb.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: Dave Chinner <dchinner@redhat.com>
---
 fs/block_dev.c                   | 12 ++++++------
 fs/drop_caches.c                 | 10 ++++++----
 fs/fs-writeback.c                | 12 ++++++------
 fs/inode.c                       | 28 +++++++++++++---------------
 fs/internal.h                    |  1 -
 fs/notify/inode_mark.c           | 20 ++++++++++----------
 fs/quota/dquot.c                 | 16 ++++++++--------
 fs/super.c                       |  3 ++-
 include/linux/fs.h               |  5 ++++-
 include/linux/fsnotify_backend.h |  4 ++--
 10 files changed, 57 insertions(+), 54 deletions(-)

(limited to 'fs')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 198243717da5..33b813e04f79 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1769,7 +1769,7 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
 {
 	struct inode *inode, *old_inode = NULL;
 
-	spin_lock(&inode_sb_list_lock);
+	spin_lock(&blockdev_superblock->s_inode_list_lock);
 	list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
 		struct address_space *mapping = inode->i_mapping;
 
@@ -1781,13 +1781,13 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
 		}
 		__iget(inode);
 		spin_unlock(&inode->i_lock);
-		spin_unlock(&inode_sb_list_lock);
+		spin_unlock(&blockdev_superblock->s_inode_list_lock);
 		/*
 		 * We hold a reference to 'inode' so it couldn't have been
 		 * removed from s_inodes list while we dropped the
-		 * inode_sb_list_lock.  We cannot iput the inode now as we can
+		 * s_inode_list_lock  We cannot iput the inode now as we can
 		 * be holding the last reference and we cannot iput it under
-		 * inode_sb_list_lock. So we keep the reference and iput it
+		 * s_inode_list_lock. So we keep the reference and iput it
 		 * later.
 		 */
 		iput(old_inode);
@@ -1795,8 +1795,8 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
 
 		func(I_BDEV(inode), arg);
 
-		spin_lock(&inode_sb_list_lock);
+		spin_lock(&blockdev_superblock->s_inode_list_lock);
 	}
-	spin_unlock(&inode_sb_list_lock);
+	spin_unlock(&blockdev_superblock->s_inode_list_lock);
 	iput(old_inode);
 }
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 5718cb9f7273..d72d52b90433 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -17,7 +17,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
 {
 	struct inode *inode, *toput_inode = NULL;
 
-	spin_lock(&inode_sb_list_lock);
+	spin_lock(&sb->s_inode_list_lock);
 	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
 		spin_lock(&inode->i_lock);
 		if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
@@ -27,13 +27,15 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
 		}
 		__iget(inode);
 		spin_unlock(&inode->i_lock);
-		spin_unlock(&inode_sb_list_lock);
+		spin_unlock(&sb->s_inode_list_lock);
+
 		invalidate_mapping_pages(inode->i_mapping, 0, -1);
 		iput(toput_inode);
 		toput_inode = inode;
-		spin_lock(&inode_sb_list_lock);
+
+		spin_lock(&sb->s_inode_list_lock);
 	}
-	spin_unlock(&inode_sb_list_lock);
+	spin_unlock(&sb->s_inode_list_lock);
 	iput(toput_inode);
 }
 
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index d98e37bbf417..f45bf876579f 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -2124,7 +2124,7 @@ static void wait_sb_inodes(struct super_block *sb)
 	 */
 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
 
-	spin_lock(&inode_sb_list_lock);
+	spin_lock(&sb->s_inode_list_lock);
 
 	/*
 	 * Data integrity sync. Must wait for all pages under writeback,
@@ -2144,14 +2144,14 @@ static void wait_sb_inodes(struct super_block *sb)
 		}
 		__iget(inode);
 		spin_unlock(&inode->i_lock);
-		spin_unlock(&inode_sb_list_lock);
+		spin_unlock(&sb->s_inode_list_lock);
 
 		/*
 		 * We hold a reference to 'inode' so it couldn't have been
 		 * removed from s_inodes list while we dropped the
-		 * inode_sb_list_lock.  We cannot iput the inode now as we can
+		 * s_inode_list_lock.  We cannot iput the inode now as we can
 		 * be holding the last reference and we cannot iput it under
-		 * inode_sb_list_lock. So we keep the reference and iput it
+		 * s_inode_list_lock. So we keep the reference and iput it
 		 * later.
 		 */
 		iput(old_inode);
@@ -2161,9 +2161,9 @@ static void wait_sb_inodes(struct super_block *sb)
 
 		cond_resched();
 
-		spin_lock(&inode_sb_list_lock);
+		spin_lock(&sb->s_inode_list_lock);
 	}
-	spin_unlock(&inode_sb_list_lock);
+	spin_unlock(&sb->s_inode_list_lock);
 	iput(old_inode);
 }
 
diff --git a/fs/inode.c b/fs/inode.c
index d30640f7a193..a2de294f6b77 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -28,8 +28,8 @@
  *   inode->i_state, inode->i_hash, __iget()
  * Inode LRU list locks protect:
  *   inode->i_sb->s_inode_lru, inode->i_lru
- * inode_sb_list_lock protects:
- *   sb->s_inodes, inode->i_sb_list
+ * inode->i_sb->s_inode_list_lock protects:
+ *   inode->i_sb->s_inodes, inode->i_sb_list
  * bdi->wb.list_lock protects:
  *   bdi->wb.b_{dirty,io,more_io,dirty_time}, inode->i_wb_list
  * inode_hash_lock protects:
@@ -37,7 +37,7 @@
  *
  * Lock ordering:
  *
- * inode_sb_list_lock
+ * inode->i_sb->s_inode_list_lock
  *   inode->i_lock
  *     Inode LRU list locks
  *
@@ -45,7 +45,7 @@
  *   inode->i_lock
  *
  * inode_hash_lock
- *   inode_sb_list_lock
+ *   inode->i_sb->s_inode_list_lock
  *   inode->i_lock
  *
  * iunique_lock
@@ -57,8 +57,6 @@ static unsigned int i_hash_shift __read_mostly;
 static struct hlist_head *inode_hashtable __read_mostly;
 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
 
-__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock);
-
 /*
  * Empty aops. Can be used for the cases where the user does not
  * define any of the address_space operations.
@@ -426,18 +424,18 @@ static void inode_lru_list_del(struct inode *inode)
  */
 void inode_sb_list_add(struct inode *inode)
 {
-	spin_lock(&inode_sb_list_lock);
+	spin_lock(&inode->i_sb->s_inode_list_lock);
 	list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
-	spin_unlock(&inode_sb_list_lock);
+	spin_unlock(&inode->i_sb->s_inode_list_lock);
 }
 EXPORT_SYMBOL_GPL(inode_sb_list_add);
 
 static inline void inode_sb_list_del(struct inode *inode)
 {
 	if (!list_empty(&inode->i_sb_list)) {
-		spin_lock(&inode_sb_list_lock);
+		spin_lock(&inode->i_sb->s_inode_list_lock);
 		list_del_init(&inode->i_sb_list);
-		spin_unlock(&inode_sb_list_lock);
+		spin_unlock(&inode->i_sb->s_inode_list_lock);
 	}
 }
 
@@ -594,7 +592,7 @@ void evict_inodes(struct super_block *sb)
 	struct inode *inode, *next;
 	LIST_HEAD(dispose);
 
-	spin_lock(&inode_sb_list_lock);
+	spin_lock(&sb->s_inode_list_lock);
 	list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
 		if (atomic_read(&inode->i_count))
 			continue;
@@ -610,7 +608,7 @@ void evict_inodes(struct super_block *sb)
 		spin_unlock(&inode->i_lock);
 		list_add(&inode->i_lru, &dispose);
 	}
-	spin_unlock(&inode_sb_list_lock);
+	spin_unlock(&sb->s_inode_list_lock);
 
 	dispose_list(&dispose);
 }
@@ -631,7 +629,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
 	struct inode *inode, *next;
 	LIST_HEAD(dispose);
 
-	spin_lock(&inode_sb_list_lock);
+	spin_lock(&sb->s_inode_list_lock);
 	list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
 		spin_lock(&inode->i_lock);
 		if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
@@ -654,7 +652,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
 		spin_unlock(&inode->i_lock);
 		list_add(&inode->i_lru, &dispose);
 	}
-	spin_unlock(&inode_sb_list_lock);
+	spin_unlock(&sb->s_inode_list_lock);
 
 	dispose_list(&dispose);
 
@@ -890,7 +888,7 @@ struct inode *new_inode(struct super_block *sb)
 {
 	struct inode *inode;
 
-	spin_lock_prefetch(&inode_sb_list_lock);
+	spin_lock_prefetch(&sb->s_inode_list_lock);
 
 	inode = new_inode_pseudo(sb);
 	if (inode)
diff --git a/fs/internal.h b/fs/internal.h
index 4d5af583ab03..ee1209c54eb1 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -112,7 +112,6 @@ extern int vfs_open(const struct path *, struct file *, const struct cred *);
 /*
  * inode.c
  */
-extern spinlock_t inode_sb_list_lock;
 extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc);
 extern void inode_add_lru(struct inode *inode);
 
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 3daf513ee99e..a4e1a8f6c329 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -163,17 +163,17 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
 
 /**
  * fsnotify_unmount_inodes - an sb is unmounting.  handle any watched inodes.
- * @list: list of inodes being unmounted (sb->s_inodes)
+ * @sb: superblock being unmounted.
  *
  * Called during unmount with no locks held, so needs to be safe against
- * concurrent modifiers. We temporarily drop inode_sb_list_lock and CAN block.
+ * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block.
  */
-void fsnotify_unmount_inodes(struct list_head *list)
+void fsnotify_unmount_inodes(struct super_block *sb)
 {
 	struct inode *inode, *next_i, *need_iput = NULL;
 
-	spin_lock(&inode_sb_list_lock);
-	list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
+	spin_lock(&sb->s_inode_list_lock);
+	list_for_each_entry_safe(inode, next_i, &sb->s_inodes, i_sb_list) {
 		struct inode *need_iput_tmp;
 
 		/*
@@ -209,7 +209,7 @@ void fsnotify_unmount_inodes(struct list_head *list)
 		spin_unlock(&inode->i_lock);
 
 		/* In case the dropping of a reference would nuke next_i. */
-		while (&next_i->i_sb_list != list) {
+		while (&next_i->i_sb_list != &sb->s_inodes) {
 			spin_lock(&next_i->i_lock);
 			if (!(next_i->i_state & (I_FREEING | I_WILL_FREE)) &&
 						atomic_read(&next_i->i_count)) {
@@ -224,12 +224,12 @@ void fsnotify_unmount_inodes(struct list_head *list)
 		}
 
 		/*
-		 * We can safely drop inode_sb_list_lock here because either
+		 * We can safely drop s_inode_list_lock here because either
 		 * we actually hold references on both inode and next_i or
 		 * end of list.  Also no new inodes will be added since the
 		 * umount has begun.
 		 */
-		spin_unlock(&inode_sb_list_lock);
+		spin_unlock(&sb->s_inode_list_lock);
 
 		if (need_iput_tmp)
 			iput(need_iput_tmp);
@@ -241,7 +241,7 @@ void fsnotify_unmount_inodes(struct list_head *list)
 
 		iput(inode);
 
-		spin_lock(&inode_sb_list_lock);
+		spin_lock(&sb->s_inode_list_lock);
 	}
-	spin_unlock(&inode_sb_list_lock);
+	spin_unlock(&sb->s_inode_list_lock);
 }
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 20d1f74561cf..2863ec6cbadf 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -923,7 +923,7 @@ static void add_dquot_ref(struct super_block *sb, int type)
 	int reserved = 0;
 #endif
 
-	spin_lock(&inode_sb_list_lock);
+	spin_lock(&sb->s_inode_list_lock);
 	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
 		spin_lock(&inode->i_lock);
 		if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
@@ -934,7 +934,7 @@ static void add_dquot_ref(struct super_block *sb, int type)
 		}
 		__iget(inode);
 		spin_unlock(&inode->i_lock);
-		spin_unlock(&inode_sb_list_lock);
+		spin_unlock(&sb->s_inode_list_lock);
 
 #ifdef CONFIG_QUOTA_DEBUG
 		if (unlikely(inode_get_rsv_space(inode) > 0))
@@ -946,15 +946,15 @@ static void add_dquot_ref(struct super_block *sb, int type)
 		/*
 		 * We hold a reference to 'inode' so it couldn't have been
 		 * removed from s_inodes list while we dropped the
-		 * inode_sb_list_lock We cannot iput the inode now as we can be
+		 * s_inode_list_lock. We cannot iput the inode now as we can be
 		 * holding the last reference and we cannot iput it under
-		 * inode_sb_list_lock. So we keep the reference and iput it
+		 * s_inode_list_lock. So we keep the reference and iput it
 		 * later.
 		 */
 		old_inode = inode;
-		spin_lock(&inode_sb_list_lock);
+		spin_lock(&sb->s_inode_list_lock);
 	}
-	spin_unlock(&inode_sb_list_lock);
+	spin_unlock(&sb->s_inode_list_lock);
 	iput(old_inode);
 
 #ifdef CONFIG_QUOTA_DEBUG
@@ -1023,7 +1023,7 @@ static void remove_dquot_ref(struct super_block *sb, int type,
 	struct inode *inode;
 	int reserved = 0;
 
-	spin_lock(&inode_sb_list_lock);
+	spin_lock(&sb->s_inode_list_lock);
 	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
 		/*
 		 *  We have to scan also I_NEW inodes because they can already
@@ -1039,7 +1039,7 @@ static void remove_dquot_ref(struct super_block *sb, int type,
 		}
 		spin_unlock(&dq_data_lock);
 	}
-	spin_unlock(&inode_sb_list_lock);
+	spin_unlock(&sb->s_inode_list_lock);
 #ifdef CONFIG_QUOTA_DEBUG
 	if (reserved) {
 		printk(KERN_WARNING "VFS (%s): Writes happened after quota"
diff --git a/fs/super.c b/fs/super.c
index b61372354f2b..c808183554a2 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -191,6 +191,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
 	INIT_HLIST_NODE(&s->s_instances);
 	INIT_HLIST_BL_HEAD(&s->s_anon);
 	INIT_LIST_HEAD(&s->s_inodes);
+	spin_lock_init(&s->s_inode_list_lock);
 
 	if (list_lru_init_memcg(&s->s_dentry_lru))
 		goto fail;
@@ -399,7 +400,7 @@ void generic_shutdown_super(struct super_block *sb)
 		sync_filesystem(sb);
 		sb->s_flags &= ~MS_ACTIVE;
 
-		fsnotify_unmount_inodes(&sb->s_inodes);
+		fsnotify_unmount_inodes(sb);
 
 		evict_inodes(sb);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4a40fa843040..09bbd38485f9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1309,7 +1309,6 @@ struct super_block {
 #endif
 	const struct xattr_handler **s_xattr;
 
-	struct list_head	s_inodes;	/* all inodes */
 	struct hlist_bl_head	s_anon;		/* anonymous dentries for (nfs) exporting */
 	struct list_head	s_mounts;	/* list of mounts; _not_ for fs use */
 	struct block_device	*s_bdev;
@@ -1380,6 +1379,10 @@ struct super_block {
 	 * Indicates how deep in a filesystem stack this SB is
 	 */
 	int s_stack_depth;
+
+	/* s_inode_list_lock protects s_inodes */
+	spinlock_t		s_inode_list_lock ____cacheline_aligned_in_smp;
+	struct list_head	s_inodes;	/* all inodes */
 };
 
 extern struct timespec current_fs_time(struct super_block *sb);
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 65a517dd32f7..0390ee69c439 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -357,7 +357,7 @@ extern void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, un
 extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group);
 extern void fsnotify_get_mark(struct fsnotify_mark *mark);
 extern void fsnotify_put_mark(struct fsnotify_mark *mark);
-extern void fsnotify_unmount_inodes(struct list_head *list);
+extern void fsnotify_unmount_inodes(struct super_block *sb);
 
 /* put here because inotify does some weird stuff when destroying watches */
 extern void fsnotify_init_event(struct fsnotify_event *event,
@@ -393,7 +393,7 @@ static inline u32 fsnotify_get_cookie(void)
 	return 0;
 }
 
-static inline void fsnotify_unmount_inodes(struct list_head *list)
+static inline void fsnotify_unmount_inodes(struct super_block *sb)
 {}
 
 #endif	/* CONFIG_FSNOTIFY */
-- 
cgit v1.2.3


From e97fedb9ef9868ff24d588be781906cf7c1b59ae Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Wed, 4 Mar 2015 13:40:00 -0500
Subject: sync: serialise per-superblock sync operations

When competing sync(2) calls walk the same filesystem, they need to
walk the list of inodes on the superblock to find all the inodes
that we need to wait for IO completion on. However, when multiple
wait_sb_inodes() calls do this at the same time, they contend on the
the inode_sb_list_lock and the contention causes system wide
slowdowns. In effect, concurrent sync(2) calls can take longer and
burn more CPU than if they were serialised.

Stop the worst of the contention by adding a per-sb mutex to wrap
around wait_sb_inodes() so that we only execute one sync(2) IO
completion walk per superblock superblock at a time and hence avoid
contention being triggered by concurrent sync(2) calls.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Josef Bacik <jbacik@fb.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: Dave Chinner <dchinner@redhat.com>
---
 fs/fs-writeback.c  | 11 +++++++++++
 fs/super.c         |  1 +
 include/linux/fs.h |  2 ++
 3 files changed, 14 insertions(+)

(limited to 'fs')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index f45bf876579f..3c974442bdf0 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -2114,6 +2114,15 @@ out_unlock_inode:
 }
 EXPORT_SYMBOL(__mark_inode_dirty);
 
+/*
+ * The @s_sync_lock is used to serialise concurrent sync operations
+ * to avoid lock contention problems with concurrent wait_sb_inodes() calls.
+ * Concurrent callers will block on the s_sync_lock rather than doing contending
+ * walks. The queueing maintains sync(2) required behaviour as all the IO that
+ * has been issued up to the time this function is enter is guaranteed to be
+ * completed by the time we have gained the lock and waited for all IO that is
+ * in progress regardless of the order callers are granted the lock.
+ */
 static void wait_sb_inodes(struct super_block *sb)
 {
 	struct inode *inode, *old_inode = NULL;
@@ -2124,6 +2133,7 @@ static void wait_sb_inodes(struct super_block *sb)
 	 */
 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
 
+	mutex_lock(&sb->s_sync_lock);
 	spin_lock(&sb->s_inode_list_lock);
 
 	/*
@@ -2165,6 +2175,7 @@ static void wait_sb_inodes(struct super_block *sb)
 	}
 	spin_unlock(&sb->s_inode_list_lock);
 	iput(old_inode);
+	mutex_unlock(&sb->s_sync_lock);
 }
 
 static void __writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr,
diff --git a/fs/super.c b/fs/super.c
index c808183554a2..fd427ec0b372 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -190,6 +190,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
 	s->s_flags = flags;
 	INIT_HLIST_NODE(&s->s_instances);
 	INIT_HLIST_BL_HEAD(&s->s_anon);
+	mutex_init(&s->s_sync_lock);
 	INIT_LIST_HEAD(&s->s_inodes);
 	spin_lock_init(&s->s_inode_list_lock);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 09bbd38485f9..82dfc5519b4b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1375,6 +1375,8 @@ struct super_block {
 	struct list_lru		s_inode_lru ____cacheline_aligned_in_smp;
 	struct rcu_head		rcu;
 
+	struct mutex		s_sync_lock;	/* sync serialisation lock */
+
 	/*
 	 * Indicates how deep in a filesystem stack this SB is
 	 */
-- 
cgit v1.2.3


From c7f5408493aeb01532927b2276316797a03ed6ee Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Wed, 4 Mar 2015 14:07:22 -0500
Subject: inode: rename i_wb_list to i_io_list

There's a small consistency problem between the inode and writeback
naming. Writeback calls the "for IO" inode queues b_io and
b_more_io, but the inode calls these the "writeback list" or
i_wb_list. This makes it hard to an new "under writeback" list to
the inode, or call it an "under IO" list on the bdi because either
way we'll have writeback on IO and IO on writeback and it'll just be
confusing. I'm getting confused just writing this!

So, rename the inode "for IO" list variable to i_io_list so we can
add a new "writeback list" in a subsequent patch.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Josef Bacik <jbacik@fb.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: Dave Chinner <dchinner@redhat.com>
---
 fs/fs-writeback.c  | 46 +++++++++++++++++++++++-----------------------
 fs/inode.c         |  8 ++++----
 fs/internal.h      |  2 +-
 include/linux/fs.h |  2 +-
 mm/backing-dev.c   |  8 ++++----
 5 files changed, 33 insertions(+), 33 deletions(-)

(limited to 'fs')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 3c974442bdf0..63e00f11022e 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -88,7 +88,7 @@ unsigned int dirtytime_expire_interval = 12 * 60 * 60;
 
 static inline struct inode *wb_inode(struct list_head *head)
 {
-	return list_entry(head, struct inode, i_wb_list);
+	return list_entry(head, struct inode, i_io_list);
 }
 
 /*
@@ -125,22 +125,22 @@ static void wb_io_lists_depopulated(struct bdi_writeback *wb)
 }
 
 /**
- * inode_wb_list_move_locked - move an inode onto a bdi_writeback IO list
+ * inode_io_list_move_locked - move an inode onto a bdi_writeback IO list
  * @inode: inode to be moved
  * @wb: target bdi_writeback
  * @head: one of @wb->b_{dirty|io|more_io}
  *
- * Move @inode->i_wb_list to @list of @wb and set %WB_has_dirty_io.
+ * Move @inode->i_io_list to @list of @wb and set %WB_has_dirty_io.
  * Returns %true if @inode is the first occupant of the !dirty_time IO
  * lists; otherwise, %false.
  */
-static bool inode_wb_list_move_locked(struct inode *inode,
+static bool inode_io_list_move_locked(struct inode *inode,
 				      struct bdi_writeback *wb,
 				      struct list_head *head)
 {
 	assert_spin_locked(&wb->list_lock);
 
-	list_move(&inode->i_wb_list, head);
+	list_move(&inode->i_io_list, head);
 
 	/* dirty_time doesn't count as dirty_io until expiration */
 	if (head != &wb->b_dirty_time)
@@ -151,19 +151,19 @@ static bool inode_wb_list_move_locked(struct inode *inode,
 }
 
 /**
- * inode_wb_list_del_locked - remove an inode from its bdi_writeback IO list
+ * inode_io_list_del_locked - remove an inode from its bdi_writeback IO list
  * @inode: inode to be removed
  * @wb: bdi_writeback @inode is being removed from
  *
  * Remove @inode which may be on one of @wb->b_{dirty|io|more_io} lists and
  * clear %WB_has_dirty_io if all are empty afterwards.
  */
-static void inode_wb_list_del_locked(struct inode *inode,
+static void inode_io_list_del_locked(struct inode *inode,
 				     struct bdi_writeback *wb)
 {
 	assert_spin_locked(&wb->list_lock);
 
-	list_del_init(&inode->i_wb_list);
+	list_del_init(&inode->i_io_list);
 	wb_io_lists_depopulated(wb);
 }
 
@@ -351,7 +351,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
 
 	/*
 	 * Once I_FREEING is visible under i_lock, the eviction path owns
-	 * the inode and we shouldn't modify ->i_wb_list.
+	 * the inode and we shouldn't modify ->i_io_list.
 	 */
 	if (unlikely(inode->i_state & I_FREEING))
 		goto skip_switch;
@@ -390,16 +390,16 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
 	 * is always correct including from ->b_dirty_time.  The transfer
 	 * preserves @inode->dirtied_when ordering.
 	 */
-	if (!list_empty(&inode->i_wb_list)) {
+	if (!list_empty(&inode->i_io_list)) {
 		struct inode *pos;
 
-		inode_wb_list_del_locked(inode, old_wb);
+		inode_io_list_del_locked(inode, old_wb);
 		inode->i_wb = new_wb;
-		list_for_each_entry(pos, &new_wb->b_dirty, i_wb_list)
+		list_for_each_entry(pos, &new_wb->b_dirty, i_io_list)
 			if (time_after_eq(inode->dirtied_when,
 					  pos->dirtied_when))
 				break;
-		inode_wb_list_move_locked(inode, new_wb, pos->i_wb_list.prev);
+		inode_io_list_move_locked(inode, new_wb, pos->i_io_list.prev);
 	} else {
 		inode->i_wb = new_wb;
 	}
@@ -961,12 +961,12 @@ void wb_start_background_writeback(struct bdi_writeback *wb)
 /*
  * Remove the inode from the writeback list it is on.
  */
-void inode_wb_list_del(struct inode *inode)
+void inode_io_list_del(struct inode *inode)
 {
 	struct bdi_writeback *wb;
 
 	wb = inode_to_wb_and_lock_list(inode);
-	inode_wb_list_del_locked(inode, wb);
+	inode_io_list_del_locked(inode, wb);
 	spin_unlock(&wb->list_lock);
 }
 
@@ -988,7 +988,7 @@ static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
 		if (time_before(inode->dirtied_when, tail->dirtied_when))
 			inode->dirtied_when = jiffies;
 	}
-	inode_wb_list_move_locked(inode, wb, &wb->b_dirty);
+	inode_io_list_move_locked(inode, wb, &wb->b_dirty);
 }
 
 /*
@@ -996,7 +996,7 @@ static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
  */
 static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
 {
-	inode_wb_list_move_locked(inode, wb, &wb->b_more_io);
+	inode_io_list_move_locked(inode, wb, &wb->b_more_io);
 }
 
 static void inode_sync_complete(struct inode *inode)
@@ -1055,7 +1055,7 @@ static int move_expired_inodes(struct list_head *delaying_queue,
 		if (older_than_this &&
 		    inode_dirtied_after(inode, *older_than_this))
 			break;
-		list_move(&inode->i_wb_list, &tmp);
+		list_move(&inode->i_io_list, &tmp);
 		moved++;
 		if (flags & EXPIRE_DIRTY_ATIME)
 			set_bit(__I_DIRTY_TIME_EXPIRED, &inode->i_state);
@@ -1078,7 +1078,7 @@ static int move_expired_inodes(struct list_head *delaying_queue,
 		list_for_each_prev_safe(pos, node, &tmp) {
 			inode = wb_inode(pos);
 			if (inode->i_sb == sb)
-				list_move(&inode->i_wb_list, dispatch_queue);
+				list_move(&inode->i_io_list, dispatch_queue);
 		}
 	}
 out:
@@ -1232,10 +1232,10 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
 		redirty_tail(inode, wb);
 	} else if (inode->i_state & I_DIRTY_TIME) {
 		inode->dirtied_when = jiffies;
-		inode_wb_list_move_locked(inode, wb, &wb->b_dirty_time);
+		inode_io_list_move_locked(inode, wb, &wb->b_dirty_time);
 	} else {
 		/* The inode is clean. Remove from writeback lists. */
-		inode_wb_list_del_locked(inode, wb);
+		inode_io_list_del_locked(inode, wb);
 	}
 }
 
@@ -1378,7 +1378,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
 	 * touch it. See comment above for explanation.
 	 */
 	if (!(inode->i_state & I_DIRTY_ALL))
-		inode_wb_list_del_locked(inode, wb);
+		inode_io_list_del_locked(inode, wb);
 	spin_unlock(&wb->list_lock);
 	inode_sync_complete(inode);
 out:
@@ -2091,7 +2091,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 			else
 				dirty_list = &wb->b_dirty_time;
 
-			wakeup_bdi = inode_wb_list_move_locked(inode, wb,
+			wakeup_bdi = inode_io_list_move_locked(inode, wb,
 							       dirty_list);
 
 			spin_unlock(&wb->list_lock);
diff --git a/fs/inode.c b/fs/inode.c
index a2de294f6b77..f09148e07198 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -31,7 +31,7 @@
  * inode->i_sb->s_inode_list_lock protects:
  *   inode->i_sb->s_inodes, inode->i_sb_list
  * bdi->wb.list_lock protects:
- *   bdi->wb.b_{dirty,io,more_io,dirty_time}, inode->i_wb_list
+ *   bdi->wb.b_{dirty,io,more_io,dirty_time}, inode->i_io_list
  * inode_hash_lock protects:
  *   inode_hashtable, inode->i_hash
  *
@@ -357,7 +357,7 @@ void inode_init_once(struct inode *inode)
 	memset(inode, 0, sizeof(*inode));
 	INIT_HLIST_NODE(&inode->i_hash);
 	INIT_LIST_HEAD(&inode->i_devices);
-	INIT_LIST_HEAD(&inode->i_wb_list);
+	INIT_LIST_HEAD(&inode->i_io_list);
 	INIT_LIST_HEAD(&inode->i_lru);
 	address_space_init_once(&inode->i_data);
 	i_size_ordered_init(inode);
@@ -525,8 +525,8 @@ static void evict(struct inode *inode)
 	BUG_ON(!(inode->i_state & I_FREEING));
 	BUG_ON(!list_empty(&inode->i_lru));
 
-	if (!list_empty(&inode->i_wb_list))
-		inode_wb_list_del(inode);
+	if (!list_empty(&inode->i_io_list))
+		inode_io_list_del(inode);
 
 	inode_sb_list_del(inode);
 
diff --git a/fs/internal.h b/fs/internal.h
index ee1209c54eb1..71859c4d0b41 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -118,7 +118,7 @@ extern void inode_add_lru(struct inode *inode);
 /*
  * fs-writeback.c
  */
-extern void inode_wb_list_del(struct inode *inode);
+extern void inode_io_list_del(struct inode *inode);
 
 extern long get_nr_dirty_inodes(void);
 extern void evict_inodes(struct super_block *);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 82dfc5519b4b..34cfa60db678 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -636,7 +636,7 @@ struct inode {
 	unsigned long		dirtied_time_when;
 
 	struct hlist_node	i_hash;
-	struct list_head	i_wb_list;	/* backing dev IO list */
+	struct list_head	i_io_list;	/* backing dev IO list */
 #ifdef CONFIG_CGROUP_WRITEBACK
 	struct bdi_writeback	*i_wb;		/* the associated cgroup wb */
 
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index dac5bf59309d..ee8d7fd07be3 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -55,13 +55,13 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
 
 	nr_dirty = nr_io = nr_more_io = nr_dirty_time = 0;
 	spin_lock(&wb->list_lock);
-	list_for_each_entry(inode, &wb->b_dirty, i_wb_list)
+	list_for_each_entry(inode, &wb->b_dirty, i_io_list)
 		nr_dirty++;
-	list_for_each_entry(inode, &wb->b_io, i_wb_list)
+	list_for_each_entry(inode, &wb->b_io, i_io_list)
 		nr_io++;
-	list_for_each_entry(inode, &wb->b_more_io, i_wb_list)
+	list_for_each_entry(inode, &wb->b_more_io, i_io_list)
 		nr_more_io++;
-	list_for_each_entry(inode, &wb->b_dirty_time, i_wb_list)
+	list_for_each_entry(inode, &wb->b_dirty_time, i_io_list)
 		if (inode->i_state & I_DIRTY_TIME)
 			nr_dirty_time++;
 	spin_unlock(&wb->list_lock);
-- 
cgit v1.2.3


From ac05fbb40062411ea1b722aa2cede7feaa94f1b4 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Wed, 4 Mar 2015 16:52:52 -0500
Subject: inode: don't softlockup when evicting inodes

On a box with a lot of ram (148gb) I can make the box softlockup after running
an fs_mark job that creates hundreds of millions of empty files.  This is
because we never generate enough memory pressure to keep the number of inodes on
our unused list low, so when we go to unmount we have to evict ~100 million
inodes.  This makes one processor a very unhappy person, so add a cond_resched()
in dispose_list() and if we need a resched when processing the s_inodes list do
that and run dispose_list() on what we've currently culled.  Thanks,

Signed-off-by: Josef Bacik <jbacik@fb.com>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 fs/inode.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'fs')

diff --git a/fs/inode.c b/fs/inode.c
index f09148e07198..78a17b8859e1 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -575,6 +575,7 @@ static void dispose_list(struct list_head *head)
 		list_del_init(&inode->i_lru);
 
 		evict(inode);
+		cond_resched();
 	}
 }
 
@@ -592,6 +593,7 @@ void evict_inodes(struct super_block *sb)
 	struct inode *inode, *next;
 	LIST_HEAD(dispose);
 
+again:
 	spin_lock(&sb->s_inode_list_lock);
 	list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
 		if (atomic_read(&inode->i_count))
@@ -607,6 +609,18 @@ void evict_inodes(struct super_block *sb)
 		inode_lru_list_del(inode);
 		spin_unlock(&inode->i_lock);
 		list_add(&inode->i_lru, &dispose);
+
+		/*
+		 * We can have a ton of inodes to evict at unmount time given
+		 * enough memory, check to see if we need to go to sleep for a
+		 * bit so we don't livelock.
+		 */
+		if (need_resched()) {
+			spin_unlock(&sb->s_inode_list_lock);
+			cond_resched();
+			dispose_list(&dispose);
+			goto again;
+		}
 	}
 	spin_unlock(&sb->s_inode_list_lock);
 
-- 
cgit v1.2.3


From cde93be45a8a90d8c264c776fab63487b5038a65 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 15 Aug 2015 13:36:12 -0500
Subject: dcache: Handle escaped paths in prepend_path

A rename can result in a dentry that by walking up d_parent
will never reach it's mnt_root.  For lack of a better term
I call this an escaped path.

prepend_path is called by four different functions __d_path,
d_absolute_path, d_path, and getcwd.

__d_path only wants to see paths are connected to the root it passes
in.  So __d_path needs prepend_path to return an error.

d_absolute_path similarly wants to see paths that are connected to
some root.  Escaped paths are not connected to any mnt_root so
d_absolute_path needs prepend_path to return an error greater
than 1.  So escaped paths will be treated like paths on lazily
unmounted mounts.

getcwd needs to prepend "(unreachable)" so getcwd also needs
prepend_path to return an error.

d_path is the interesting hold out.  d_path just wants to print
something, and does not care about the weird cases.  Which raises
the question what should be printed?

Given that <escaped_path>/<anything> should result in -ENOENT I
believe it is desirable for escaped paths to be printed as empty
paths.  As there are not really any meaninful path components when
considered from the perspective of a mount tree.

So tweak prepend_path to return an empty path with an new error
code of 3 when it encounters an escaped path.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index 9b5fe503f6cb..e3b44ca75a1b 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2926,6 +2926,13 @@ restart:
 
 		if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
 			struct mount *parent = ACCESS_ONCE(mnt->mnt_parent);
+			/* Escaped? */
+			if (dentry != vfsmnt->mnt_root) {
+				bptr = *buffer;
+				blen = *buflen;
+				error = 3;
+				break;
+			}
 			/* Global root? */
 			if (mnt != parent) {
 				dentry = ACCESS_ONCE(mnt->mnt_mountpoint);
-- 
cgit v1.2.3


From a03e283bf5c3d4851b4998122196ce9f849e6dfb Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 15 Aug 2015 13:36:41 -0500
Subject: dcache: Reduce the scope of i_lock in d_splice_alias

i_lock is only needed until __d_find_any_alias calls dget on the alias
dentry.  After that the reference to new ensures that dentry_kill and
d_delete will not remove the inode from the dentry, and remove the
dentry from the inode->d_entry list.

The inode i_lock came to be held over the the __d_move calls in
d_splice_alias through a series of introduction of locks with
increasing smaller scope.  First it was the dcache_lock, then
it was the dcache_inode_lock, and finally inode->i_lock.

Furthermore inode->i_lock is not held over any other calls
to d_move or __d_move so it can not provide any meaningful
rename protection.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index e3b44ca75a1b..5c33aeb0f68f 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2718,7 +2718,7 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
  * This helper attempts to cope with remotely renamed directories
  *
  * It assumes that the caller is already holding
- * dentry->d_parent->d_inode->i_mutex, inode->i_lock and rename_lock
+ * dentry->d_parent->d_inode->i_mutex, and rename_lock
  *
  * Note: If ever the locking in lock_rename() changes, then please
  * remember to update this too...
@@ -2744,7 +2744,6 @@ out_unalias:
 	__d_move(alias, dentry, false);
 	ret = 0;
 out_err:
-	spin_unlock(&inode->i_lock);
 	if (m2)
 		mutex_unlock(m2);
 	if (m1)
@@ -2790,10 +2789,11 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
 	if (S_ISDIR(inode->i_mode)) {
 		struct dentry *new = __d_find_any_alias(inode);
 		if (unlikely(new)) {
+			/* The reference to new ensures it remains an alias */
+			spin_unlock(&inode->i_lock);
 			write_seqlock(&rename_lock);
 			if (unlikely(d_ancestor(new, dentry))) {
 				write_sequnlock(&rename_lock);
-				spin_unlock(&inode->i_lock);
 				dput(new);
 				new = ERR_PTR(-ELOOP);
 				pr_warn_ratelimited(
@@ -2812,7 +2812,6 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
 			} else {
 				__d_move(new, dentry, false);
 				write_sequnlock(&rename_lock);
-				spin_unlock(&inode->i_lock);
 				security_d_instantiate(new, inode);
 			}
 			iput(inode);
-- 
cgit v1.2.3


From 397d425dc26da728396e66d392d5dcb8dac30c37 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 15 Aug 2015 20:27:13 -0500
Subject: vfs: Test for and handle paths that are unreachable from their
 mnt_root

In rare cases a directory can be renamed out from under a bind mount.
In those cases without special handling it becomes possible to walk up
the directory tree to the root dentry of the filesystem and down
from the root dentry to every other file or directory on the filesystem.

Like division by zero .. from an unconnected path can not be given
a useful semantic as there is no predicting at which path component
the code will realize it is unconnected.  We certainly can not match
the current behavior as the current behavior is a security hole.

Therefore when encounting .. when following an unconnected path
return -ENOENT.

- Add a function path_connected to verify path->dentry is reachable
  from path->mnt.mnt_root.  AKA to validate that rename did not do
  something nasty to the bind mount.

  To avoid races path_connected must be called after following a path
  component to it's next path component.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 1c2105ed20c5..29b927938b8c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -560,6 +560,24 @@ static int __nd_alloc_stack(struct nameidata *nd)
 	return 0;
 }
 
+/**
+ * path_connected - Verify that a path->dentry is below path->mnt.mnt_root
+ * @path: nameidate to verify
+ *
+ * Rename can sometimes move a file or directory outside of a bind
+ * mount, path_connected allows those cases to be detected.
+ */
+static bool path_connected(const struct path *path)
+{
+	struct vfsmount *mnt = path->mnt;
+
+	/* Only bind mounts can have disconnected paths */
+	if (mnt->mnt_root == mnt->mnt_sb->s_root)
+		return true;
+
+	return is_subdir(path->dentry, mnt->mnt_root);
+}
+
 static inline int nd_alloc_stack(struct nameidata *nd)
 {
 	if (likely(nd->depth != EMBEDDED_LEVELS))
@@ -1296,6 +1314,8 @@ static int follow_dotdot_rcu(struct nameidata *nd)
 				return -ECHILD;
 			nd->path.dentry = parent;
 			nd->seq = seq;
+			if (unlikely(!path_connected(&nd->path)))
+				return -ENOENT;
 			break;
 		} else {
 			struct mount *mnt = real_mount(nd->path.mnt);
@@ -1396,7 +1416,7 @@ static void follow_mount(struct path *path)
 	}
 }
 
-static void follow_dotdot(struct nameidata *nd)
+static int follow_dotdot(struct nameidata *nd)
 {
 	if (!nd->root.mnt)
 		set_root(nd);
@@ -1412,6 +1432,8 @@ static void follow_dotdot(struct nameidata *nd)
 			/* rare case of legitimate dget_parent()... */
 			nd->path.dentry = dget_parent(nd->path.dentry);
 			dput(old);
+			if (unlikely(!path_connected(&nd->path)))
+				return -ENOENT;
 			break;
 		}
 		if (!follow_up(&nd->path))
@@ -1419,6 +1441,7 @@ static void follow_dotdot(struct nameidata *nd)
 	}
 	follow_mount(&nd->path);
 	nd->inode = nd->path.dentry->d_inode;
+	return 0;
 }
 
 /*
@@ -1634,7 +1657,7 @@ static inline int handle_dots(struct nameidata *nd, int type)
 		if (nd->flags & LOOKUP_RCU) {
 			return follow_dotdot_rcu(nd);
 		} else
-			follow_dotdot(nd);
+			return follow_dotdot(nd);
 	}
 	return 0;
 }
-- 
cgit v1.2.3