205 files changed, 4510 insertions, 3935 deletions
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index dfebdbe7440e..3031e3233dd6 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -26,7 +26,6 @@
 #include <linux/fs.h>
 #include <linux/sched.h>
 #include <linux/idr.h>
-#include <asm/semaphore.h>
 #include <net/9p/9p.h>
 #include <net/9p/client.h>
 
diff --git a/fs/Kconfig b/fs/Kconfig
index d7312825592b..fd81a21014b8 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -411,7 +411,7 @@ config JFS_STATISTICS
 	  to be made available to the user in the /proc/fs/jfs/ directory.
 
 config FS_POSIX_ACL
-# Posix ACL utility routines (for now, only ext2/ext3/jfs/reiserfs)
+# Posix ACL utility routines (for now, only ext2/ext3/jfs/reiserfs/nfs4)
 #
 # NOTE: you can implement Posix ACLs without these helpers (XFS does).
 # 	Never use this symbol for ifdefs.
@@ -978,7 +978,8 @@ config TMPFS_POSIX_ACL
 
 config HUGETLBFS
 	bool "HugeTLB file system support"
-	depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || BROKEN
+	depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || \
+		   (S390 && 64BIT) || BROKEN
 	help
 	  hugetlbfs is a filesystem backing for HugeTLB pages, based on
 	  ramfs. For architectures that support it, say Y here and read
@@ -1667,75 +1668,81 @@ config NFSD
 	select LOCKD
 	select SUNRPC
 	select EXPORTFS
-	select NFSD_V2_ACL if NFSD_V3_ACL
 	select NFS_ACL_SUPPORT if NFSD_V2_ACL
-	select NFSD_TCP if NFSD_V4
-	select CRYPTO_MD5 if NFSD_V4
-	select CRYPTO if NFSD_V4
-	select FS_POSIX_ACL if NFSD_V4
 	select PROC_FS if NFSD_V4
 	select PROC_FS if SUNRPC_GSS
 	help
-	  If you want your Linux box to act as an NFS *server*, so that other
-	  computers on your local network which support NFS can access certain
-	  directories on your box transparently, you have two options: you can
-	  use the self-contained user space program nfsd, in which case you
-	  should say N here, or you can say Y and use the kernel based NFS
-	  server. The advantage of the kernel based solution is that it is
-	  faster.
+	  Choose Y here if you want to allow other computers to access
+	  files residing on this system using Sun's Network File System
+	  protocol.  To compile the NFS server support as a module,
+	  choose M here: the module will be called nfsd.
 
-	  In either case, you will need support software; the respective
-	  locations are given in the file <file:Documentation/Changes> in the
-	  NFS section.
+	  You may choose to use a user-space NFS server instead, in which
+	  case you can choose N here.
 
-	  If you say Y here, you will get support for version 2 of the NFS
-	  protocol (NFSv2). If you also want NFSv3, say Y to the next question
-	  as well.
+	  To export local file systems using NFS, you also need to install
+	  user space programs which can be found in the Linux nfs-utils
+	  package, available from http://linux-nfs.org/.  More detail about
+	  the Linux NFS server implementation is available via the
+	  exports(5) man page.
 
-	  Please read the NFS-HOWTO, available from
-	  <http://www.tldp.org/docs.html#howto>.
+	  Below you can choose which versions of the NFS protocol are
+	  available to clients mounting the NFS server on this system.
+	  Support for NFS version 2 (RFC 1094) is always available when
+	  CONFIG_NFSD is selected.
 
-	  To compile the NFS server support as a module, choose M here: the
-	  module will be called nfsd.  If unsure, say N.
+	  If unsure, say N.
 
 config NFSD_V2_ACL
 	bool
 	depends on NFSD
 
 config NFSD_V3
-	bool "Provide NFSv3 server support"
+	bool "NFS server support for NFS version 3"
 	depends on NFSD
 	help
-	  If you would like to include the NFSv3 server as well as the NFSv2
-	  server, say Y here.  If unsure, say Y.
+	  This option enables support in your system's NFS server for
+	  version 3 of the NFS protocol (RFC 1813).
+
+	  If unsure, say Y.
 
 config NFSD_V3_ACL
-	bool "Provide server support for the NFSv3 ACL protocol extension"
+	bool "NFS server support for the NFSv3 ACL protocol extension"
 	depends on NFSD_V3
+	select NFSD_V2_ACL
 	help
-	  Implement the NFSv3 ACL protocol extension for manipulating POSIX
-	  Access Control Lists on exported file systems. NFS clients should
-	  be compiled with the NFSv3 ACL protocol extension; see the
-	  CONFIG_NFS_V3_ACL option.  If unsure, say N.
+	  Solaris NFS servers support an auxiliary NFSv3 ACL protocol that
+	  never became an official part of the NFS version 3 protocol.
+	  This protocol extension allows applications on NFS clients to
+	  manipulate POSIX Access Control Lists on files residing on NFS
+	  servers.  NFS servers enforce POSIX ACLs on local files whether
+	  this protocol is available or not.
+
+	  This option enables support in your system's NFS server for the
+	  NFSv3 ACL protocol extension allowing NFS clients to manipulate
+	  POSIX ACLs on files exported by your system's NFS server.  NFS
+	  clients which support the Solaris NFSv3 ACL protocol can then
+	  access and modify ACLs on your NFS server.
+
+	  To store ACLs on your NFS server, you also need to enable ACL-
+	  related CONFIG options for your local file systems of choice.
+
+	  If unsure, say N.
 
 config NFSD_V4
-	bool "Provide NFSv4 server support (EXPERIMENTAL)"
+	bool "NFS server support for NFS version 4 (EXPERIMENTAL)"
 	depends on NFSD && NFSD_V3 && EXPERIMENTAL
+	select FS_POSIX_ACL
 	select RPCSEC_GSS_KRB5
 	help
-	  If you would like to include the NFSv4 server as well as the NFSv2
-	  and NFSv3 servers, say Y here.  This feature is experimental, and
-	  should only be used if you are interested in helping to test NFSv4.
-	  If unsure, say N.
+	  This option enables support in your system's NFS server for
+	  version 4 of the NFS protocol (RFC 3530).
 
-config NFSD_TCP
-	bool "Provide NFS server over TCP support"
-	depends on NFSD
-	default y
-	help
-	  If you want your NFS server to support TCP connections, say Y here.
-	  TCP connections usually perform better than the default UDP when
-	  the network is lossy or congested.  If unsure, say Y.
+	  To export files using NFSv4, you need to install additional user
+	  space programs which can be found in the Linux nfs-utils package,
+	  available from http://linux-nfs.org/.
+
+	  If unsure, say N.
 
 config ROOT_NFS
 	bool "Root file system on NFS"
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 41a958a7585e..5e1a4fb5cacb 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1424,6 +1424,18 @@ struct elf_note_info {
 	int thread_notes;
 };
 
+/*
+ * When a regset has a writeback hook, we call it on each thread before
+ * dumping user memory.  On register window machines, this makes sure the
+ * user memory backing the register data is up to date before we read it.
+ */
+static void do_thread_regset_writeback(struct task_struct *task,
+				       const struct user_regset *regset)
+{
+	if (regset->writeback)
+		regset->writeback(task, regset, 1);
+}
+
 static int fill_thread_core_info(struct elf_thread_core_info *t,
 				 const struct user_regset_view *view,
 				 long signr, size_t *total)
@@ -1445,6 +1457,8 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
 		  sizeof(t->prstatus), &t->prstatus);
 	*total += notesize(&t->notes[0]);
 
+	do_thread_regset_writeback(t->task, &view->regsets[0]);
+
 	/*
 	 * Each other regset might generate a note too.  For each regset
 	 * that has no core_note_type or is inactive, we leave t->notes[i]
@@ -1452,6 +1466,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
 	 */
 	for (i = 1; i < view->n; ++i) {
 		const struct user_regset *regset = &view->regsets[i];
+		do_thread_regset_writeback(t->task, regset);
 		if (regset->core_note_type &&
 		    (!regset->active || regset->active(t->task, regset))) {
 			int ret;
diff --git a/fs/buffer.c b/fs/buffer.c
index 3ebccf4aa7e3..ddfdd2c80bf9 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -627,8 +627,7 @@ repeat:
 }
 
 /**
- * sync_mapping_buffers - write out and wait upon a mapping's "associated"
- *                        buffers
+ * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers
  * @mapping: the mapping which wants those buffers written
  *
  * Starts I/O against the buffers at mapping->private_list, and waits upon
@@ -836,7 +835,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
 		smp_mb();
 		if (buffer_dirty(bh)) {
 			list_add(&bh->b_assoc_buffers,
-				 &bh->b_assoc_map->private_list);
+				 &mapping->private_list);
 			bh->b_assoc_map = mapping;
 		}
 		spin_unlock(lock);
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index edd248367b36..dbd91461853c 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -6,7 +6,9 @@ and sync so that events like out of disk space get reported properly on
 cached files. Fix setxattr failure to certain Samba versions. Fix mount
 of second share to disconnected server session (autoreconnect on this).
 Add ability to modify cifs acls for handling chmod (when mounted with
-cifsacl flag).
+cifsacl flag). Fix prefixpath path separator so we can handle mounts
+with prefixpaths longer than one directory (one path component) when
+mounted to Windows servers.
 
 Version 1.51
 ------------
diff --git a/fs/cifs/README b/fs/cifs/README
index c623e2f9c5db..50306229b0f9 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -461,7 +461,7 @@ A partial list of the supported mount options follows:
  cifsacl        Report mode bits (e.g. on stat) based on the Windows ACL for
 	        the file. (EXPERIMENTAL)
  servern        Specify the server 's netbios name (RFC1001 name) to use
-		when attempting to setup a session to the server.  This is
+		when attempting to setup a session to the server. 
 		This is needed for mounting to some older servers (such
 		as OS/2 or Windows 98 and Windows ME) since they do not
 		support a default server name.  A server name can be up
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 73c4c419663c..0228ed06069e 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -98,8 +98,7 @@ void cifs_dump_mids(struct TCP_Server_Info *server)
 			if (mid_entry->resp_buf) {
 				cifs_dump_detail(mid_entry->resp_buf);
 				cifs_dump_mem("existing buf: ",
-					mid_entry->resp_buf,
-					62 /* fixme */);
+					mid_entry->resp_buf, 62);
 			}
 		}
 	}
@@ -439,7 +438,7 @@ cifs_stats_read(char *buf, char **beginBuffer, off_t offset,
 
 	return length;
 }
-#endif
+#endif /* STATS */
 
 static struct proc_dir_entry *proc_fs_cifs;
 read_proc_t cifs_txanchor_read;
@@ -482,7 +481,7 @@ cifs_proc_init(void)
 				cifs_stats_read, NULL);
 	if (pde)
 		pde->write_proc = cifs_stats_write;
-#endif
+#endif /* STATS */
 	pde = create_proc_read_entry("cifsFYI", 0, proc_fs_cifs,
 				cifsFYI_read, NULL);
 	if (pde)
@@ -918,4 +917,12 @@ security_flags_write(struct file *file, const char __user *buffer,
 	/* BB should we turn on MAY flags for other MUST options? */
 	return count;
 }
-#endif
+#else
+inline void cifs_proc_init(void)
+{
+}
+
+inline void cifs_proc_clean(void)
+{
+}
+#endif /* PROC_FS */
diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h
index c26cd0d2c6d5..5eb3b83bbfa7 100644
--- a/fs/cifs/cifs_debug.h
+++ b/fs/cifs/cifs_debug.h
@@ -25,8 +25,11 @@
 
 void cifs_dump_mem(char *label, void *data, int length);
 #ifdef CONFIG_CIFS_DEBUG2
+#define DBG2 2
 void cifs_dump_detail(struct smb_hdr *);
 void cifs_dump_mids(struct TCP_Server_Info *);
+#else
+#define DBG2 0
 #endif
 extern int traceSMB;		/* flag which enables the function below */
 void dump_smb(struct smb_hdr *, int);
@@ -64,10 +67,10 @@ extern int cifsERROR;
  *	---------
  */
 #else		/* _CIFS_DEBUG */
-#define cERROR(button,prspec)
-#define cEVENT(format,arg...)
+#define cERROR(button, prspec)
+#define cEVENT(format, arg...)
 #define cFYI(button, prspec)
-#define cifserror(format,arg...)
+#define cifserror(format, arg...)
 #endif		/* _CIFS_DEBUG */
 
 #endif				/* _H_CIFS_DEBUG */
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 6ad447529961..7f8838253410 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -286,7 +286,7 @@ static void dump_referral(const struct dfs_info3_param *ref)
 	cFYI(1, ("DFS: node path: %s", ref->node_name));
 	cFYI(1, ("DFS: fl: %hd, srv_type: %hd", ref->flags, ref->server_type));
 	cFYI(1, ("DFS: ref_flags: %hd, path_consumed: %hd", ref->ref_flag,
-				ref->PathConsumed));
+				ref->path_consumed));
 }
 
 
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index d543accc10dd..6653e29637a7 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -125,7 +125,7 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
 #ifdef CONFIG_CIFS_DEBUG2
 	if (cifsFYI && !IS_ERR(spnego_key)) {
 		struct cifs_spnego_msg *msg = spnego_key->payload.data;
-		cifs_dump_mem("SPNEGO reply blob:", msg->data, min(1024,
+		cifs_dump_mem("SPNEGO reply blob:", msg->data, min(1024U,
 				msg->secblob_len + msg->sesskey_len));
 	}
 #endif /* CONFIG_CIFS_DEBUG2 */
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index b5903b89250d..7d75272a6b3f 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -32,7 +32,7 @@
  *
  */
 int
-cifs_strfromUCS_le(char *to, const __le16 * from,
+cifs_strfromUCS_le(char *to, const __le16 *from,
 		   int len, const struct nls_table *codepage)
 {
 	int i;
@@ -61,7 +61,7 @@ cifs_strfromUCS_le(char *to, const __le16 * from,
  *
  */
 int
-cifs_strtoUCS(__le16 * to, const char *from, int len,
+cifs_strtoUCS(__le16 *to, const char *from, int len,
 	      const struct nls_table *codepage)
 {
 	int charlen;
diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
index 614c11fcdcb6..14eb9a2395d3 100644
--- a/fs/cifs/cifs_unicode.h
+++ b/fs/cifs/cifs_unicode.h
@@ -254,7 +254,8 @@ UniStrstr(const wchar_t *ucs1, const wchar_t *ucs2)
 	const wchar_t *anchor2 = ucs2;
 
 	while (*ucs1) {
-		if (*ucs1 == *ucs2) {	/* Partial match found */
+		if (*ucs1 == *ucs2) {
+			/* Partial match found */
 			ucs1++;
 			ucs2++;
 		} else {
@@ -279,7 +280,8 @@ UniToupper(register wchar_t uc)
 {
 	register const struct UniCaseRange *rp;
 
-	if (uc < sizeof (CifsUniUpperTable)) {	/* Latin characters */
+	if (uc < sizeof(CifsUniUpperTable)) {
+		/* Latin characters */
 		return uc + CifsUniUpperTable[uc];	/* Use base tables */
 	} else {
 		rp = CifsUniUpperRange;	/* Use range tables */
@@ -320,7 +322,8 @@ UniTolower(wchar_t uc)
 {
 	register struct UniCaseRange *rp;
 
-	if (uc < sizeof (UniLowerTable)) {	/* Latin characters */
+	if (uc < sizeof(UniLowerTable)) {
+		/* Latin characters */
 		return uc + UniLowerTable[uc];	/* Use base tables */
 	} else {
 		rp = UniLowerRange;	/* Use range tables */
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index a7035bd18e4e..f93932c21772 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -46,8 +46,7 @@ static struct cifs_wksid wksidarr[NUM_WK_SIDS] = {
 static const struct cifs_sid sid_everyone = {
 	1, 1, {0, 0, 0, 0, 0, 1}, {0} };
 /* group users */
-static const struct cifs_sid sid_user =
-		{1, 2 , {0, 0, 0, 0, 0, 5}, {} };
+static const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {} };
 
 
 int match_sid(struct cifs_sid *ctsid)
@@ -195,9 +194,9 @@ static void access_flags_to_mode(__le32 ace_flags, int type, umode_t *pmode,
 	/* For deny ACEs we change the mask so that subsequent allow access
 	   control entries do not turn on the bits we are denying */
 	if (type == ACCESS_DENIED) {
-		if (flags & GENERIC_ALL) {
+		if (flags & GENERIC_ALL)
 			*pbits_to_set &= ~S_IRWXUGO;
-		}
+
 		if ((flags & GENERIC_WRITE) ||
 			((flags & FILE_WRITE_RIGHTS) == FILE_WRITE_RIGHTS))
 			*pbits_to_set &= ~S_IWUGO;
@@ -216,9 +215,7 @@ static void access_flags_to_mode(__le32 ace_flags, int type, umode_t *pmode,
 
 	if (flags & GENERIC_ALL) {
 		*pmode |= (S_IRWXUGO & (*pbits_to_set));
-#ifdef CONFIG_CIFS_DEBUG2
-		cFYI(1, ("all perms"));
-#endif
+		cFYI(DBG2, ("all perms"));
 		return;
 	}
 	if ((flags & GENERIC_WRITE) ||
@@ -231,9 +228,7 @@ static void access_flags_to_mode(__le32 ace_flags, int type, umode_t *pmode,
 			((flags & FILE_EXEC_RIGHTS) == FILE_EXEC_RIGHTS))
 		*pmode |= (S_IXUGO & (*pbits_to_set));
 
-#ifdef CONFIG_CIFS_DEBUG2
-	cFYI(1, ("access flags 0x%x mode now 0x%x", flags, *pmode));
-#endif
+	cFYI(DBG2, ("access flags 0x%x mode now 0x%x", flags, *pmode));
 	return;
 }
 
@@ -262,9 +257,7 @@ static void mode_to_access_flags(umode_t mode, umode_t bits_to_use,
 	if (mode & S_IXUGO)
 		*pace_flags |= SET_FILE_EXEC_RIGHTS;
 
-#ifdef CONFIG_CIFS_DEBUG2
-	cFYI(1, ("mode: 0x%x, access flags now 0x%x", mode, *pace_flags));
-#endif
+	cFYI(DBG2, ("mode: 0x%x, access flags now 0x%x", mode, *pace_flags));
 	return;
 }
 
@@ -358,11 +351,9 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
 		return;
 	}
 
-#ifdef CONFIG_CIFS_DEBUG2
-	cFYI(1, ("DACL revision %d size %d num aces %d",
+	cFYI(DBG2, ("DACL revision %d size %d num aces %d",
 		le16_to_cpu(pdacl->revision), le16_to_cpu(pdacl->size),
 		le32_to_cpu(pdacl->num_aces)));
-#endif
 
 	/* reset rwx permissions for user/group/other.
 	   Also, if num_aces is 0 i.e. DACL has no ACEs,
@@ -381,10 +372,6 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
 		ppace = kmalloc(num_aces * sizeof(struct cifs_ace *),
 				GFP_KERNEL);
 
-/*		cifscred->cecount = pdacl->num_aces;
-		cifscred->aces = kmalloc(num_aces *
-			sizeof(struct cifs_ace *), GFP_KERNEL);*/
-
 		for (i = 0; i < num_aces; ++i) {
 			ppace[i] = (struct cifs_ace *) (acl_base + acl_size);
 #ifdef CONFIG_CIFS_DEBUG2
@@ -437,7 +424,7 @@ static int set_chmod_dacl(struct cifs_acl *pndacl, struct cifs_sid *pownersid,
 					 &sid_everyone, nmode, S_IRWXO);
 
 	pndacl->size = cpu_to_le16(size + sizeof(struct cifs_acl));
-	pndacl->num_aces = 3;
+	pndacl->num_aces = cpu_to_le32(3);
 
 	return (0);
 }
@@ -495,13 +482,11 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
 				le32_to_cpu(pntsd->gsidoffset));
 	dacloffset = le32_to_cpu(pntsd->dacloffset);
 	dacl_ptr = (struct cifs_acl *)((char *)pntsd + dacloffset);
-#ifdef CONFIG_CIFS_DEBUG2
-	cFYI(1, ("revision %d type 0x%x ooffset 0x%x goffset 0x%x "
+	cFYI(DBG2, ("revision %d type 0x%x ooffset 0x%x goffset 0x%x "
 		 "sacloffset 0x%x dacloffset 0x%x",
 		 pntsd->revision, pntsd->type, le32_to_cpu(pntsd->osidoffset),
 		 le32_to_cpu(pntsd->gsidoffset),
 		 le32_to_cpu(pntsd->sacloffset), dacloffset));
-#endif
 /*	cifs_dump_mem("owner_sid: ", owner_sid_ptr, 64); */
 	rc = parse_sid(owner_sid_ptr, end_of_acl);
 	if (rc)
@@ -636,9 +621,7 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
 	struct super_block *sb;
 	struct cifs_sb_info *cifs_sb;
 
-#ifdef CONFIG_CIFS_DEBUG2
-	cFYI(1, ("set ACL for %s from mode 0x%x", path, inode->i_mode));
-#endif
+	cFYI(DBG2, ("set ACL for %s from mode 0x%x", path, inode->i_mode));
 
 	if (!inode)
 		return (rc);
@@ -669,9 +652,7 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
 	}
 
 	rc = CIFSSMBSetCIFSACL(xid, cifs_sb->tcon, fid, pnntsd, acllen);
-#ifdef CONFIG_CIFS_DEBUG2
-	cFYI(1, ("SetCIFSACL rc = %d", rc));
-#endif
+	cFYI(DBG2, ("SetCIFSACL rc = %d", rc));
 	if (unlock_file == TRUE)
 		atomic_dec(&open_file->wrtPending);
 	else
@@ -689,9 +670,7 @@ void acl_to_uid_mode(struct inode *inode, const char *path)
 	u32 acllen = 0;
 	int rc = 0;
 
-#ifdef CONFIG_CIFS_DEBUG2
-	cFYI(1, ("converting ACL to mode for %s", path));
-#endif
+	cFYI(DBG2, ("converting ACL to mode for %s", path));
 	pntsd = get_cifs_acl(&acllen, inode, path);
 
 	/* if we can retrieve the ACL, now parse Access Control Entries, ACEs */
@@ -712,9 +691,7 @@ int mode_to_acl(struct inode *inode, const char *path, __u64 nmode)
 	struct cifs_ntsd *pntsd = NULL; /* acl obtained from server */
 	struct cifs_ntsd *pnntsd = NULL; /* modified acl to be sent to server */
 
-#ifdef CONFIG_CIFS_DEBUG2
-	cFYI(1, ("set ACL from mode for %s", path));
-#endif
+	cFYI(DBG2, ("set ACL from mode for %s", path));
 
 	/* Get the security descriptor */
 	pntsd = get_cifs_acl(&acllen, inode, path);
@@ -736,16 +713,12 @@ int mode_to_acl(struct inode *inode, const char *path, __u64 nmode)
 
 		rc = build_sec_desc(pntsd, pnntsd, acllen, inode, nmode);
 
-#ifdef CONFIG_CIFS_DEBUG2
-		cFYI(1, ("build_sec_desc rc: %d", rc));
-#endif
+		cFYI(DBG2, ("build_sec_desc rc: %d", rc));
 
 		if (!rc) {
 			/* Set the security descriptor */
 			rc = set_cifs_acl(pnntsd, acllen, inode, path);
-#ifdef CONFIG_CIFS_DEBUG2
-			cFYI(1, ("set_cifs_acl rc: %d", rc));
-#endif
+			cFYI(DBG2, ("set_cifs_acl rc: %d", rc));
 		}
 
 		kfree(pnntsd);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index fcc434227691..a04b17e5a9d0 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -204,9 +204,8 @@ cifs_put_super(struct super_block *sb)
 		return;
 	}
 	rc = cifs_umount(sb, cifs_sb);
-	if (rc) {
+	if (rc)
 		cERROR(1, ("cifs_umount failed with return code %d", rc));
-	}
 #ifdef CONFIG_CIFS_DFS_UPCALL
 	if (cifs_sb->mountdata) {
 		kfree(cifs_sb->mountdata);
@@ -461,7 +460,7 @@ int cifs_xstate_get(struct super_block *sb, struct fs_quota_stat *qstats)
 
 static struct quotactl_ops cifs_quotactl_ops = {
 	.set_xquota	= cifs_xquota_set,
-	.get_xquota	= cifs_xquota_set,
+	.get_xquota	= cifs_xquota_get,
 	.set_xstate	= cifs_xstate_set,
 	.get_xstate	= cifs_xstate_get,
 };
@@ -472,9 +471,7 @@ static void cifs_umount_begin(struct vfsmount *vfsmnt, int flags)
 	struct cifs_sb_info *cifs_sb;
 	struct cifsTconInfo *tcon;
 
-#ifdef CONFIG_CIFS_DFS_UPCALL
 	dfs_shrink_umount_helper(vfsmnt);
-#endif /* CONFIG CIFS_DFS_UPCALL */
 
 	if (!(flags & MNT_FORCE))
 		return;
@@ -992,9 +989,7 @@ static int __init
 init_cifs(void)
 {
 	int rc = 0;
-#ifdef CONFIG_PROC_FS
 	cifs_proc_init();
-#endif
 /*	INIT_LIST_HEAD(&GlobalServerList);*/	/* BB not implemented yet */
 	INIT_LIST_HEAD(&GlobalSMBSessionList);
 	INIT_LIST_HEAD(&GlobalTreeConnectionList);
@@ -1095,19 +1090,15 @@ init_cifs(void)
  out_destroy_inodecache:
 	cifs_destroy_inodecache();
  out_clean_proc:
-#ifdef CONFIG_PROC_FS
 	cifs_proc_clean();
-#endif
 	return rc;
 }
 
 static void __exit
 exit_cifs(void)
 {
-	cFYI(0, ("exit_cifs"));
-#ifdef CONFIG_PROC_FS
+	cFYI(DBG2, ("exit_cifs"));
 	cifs_proc_clean();
-#endif
 #ifdef CONFIG_CIFS_DFS_UPCALL
 	unregister_key_type(&key_type_dns_resolver);
 #endif
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 5d32d8ddc82e..69a2e1942542 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -454,7 +454,7 @@ struct dir_notify_req {
 
 struct dfs_info3_param {
 	int flags; /* DFSREF_REFERRAL_SERVER, DFSREF_STORAGE_SERVER*/
-	int PathConsumed;
+	int path_consumed;
 	int server_type;
 	int ref_flag;
 	char *path_name;
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 2f09f565a3d9..0af63e6b426b 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -53,11 +53,11 @@ extern int SendReceiveNoRsp(const unsigned int xid, struct cifsSesInfo *ses,
 extern int SendReceive2(const unsigned int /* xid */ , struct cifsSesInfo *,
 			struct kvec *, int /* nvec to send */,
 			int * /* type of buf returned */ , const int flags);
-extern int SendReceiveBlockingLock(const unsigned int /* xid */ ,
-					struct cifsTconInfo *,
-				struct smb_hdr * /* input */ ,
-				struct smb_hdr * /* out */ ,
-				int * /* bytes returned */);
+extern int SendReceiveBlockingLock(const unsigned int xid,
+			struct cifsTconInfo *ptcon,
+			struct smb_hdr *in_buf ,
+			struct smb_hdr *out_buf,
+			int *bytes_returned);
 extern int checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length);
 extern int is_valid_oplock_break(struct smb_hdr *smb, struct TCP_Server_Info *);
 extern int is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof);
@@ -84,7 +84,7 @@ extern __u16 GetNextMid(struct TCP_Server_Info *server);
 extern struct oplock_q_entry *AllocOplockQEntry(struct inode *, u16,
 						 struct cifsTconInfo *);
 extern void DeleteOplockQEntry(struct oplock_q_entry *);
-extern struct timespec cifs_NTtimeToUnix(u64 /* utc nanoseconds since 1601 */ );
+extern struct timespec cifs_NTtimeToUnix(u64 utc_nanoseconds_since_1601);
 extern u64 cifs_UnixTimeToNT(struct timespec);
 extern __le64 cnvrtDosCifsTm(__u16 date, __u16 time);
 extern struct timespec cnvrtDosUnixTm(__u16 date, __u16 time);
@@ -104,7 +104,11 @@ extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *,
 extern int cifs_umount(struct super_block *, struct cifs_sb_info *);
 #ifdef CONFIG_CIFS_DFS_UPCALL
 extern void dfs_shrink_umount_helper(struct vfsmount *vfsmnt);
-#endif
+#else
+static inline void dfs_shrink_umount_helper(struct vfsmount *vfsmnt)
+{
+}
+#endif /* DFS_UPCALL */
 void cifs_proc_init(void);
 void cifs_proc_clean(void);
 
@@ -175,11 +179,11 @@ extern int CIFSSMBQFSPosixInfo(const int xid, struct cifsTconInfo *tcon,
 			struct kstatfs *FSData);
 
 extern int CIFSSMBSetTimes(const int xid, struct cifsTconInfo *tcon,
-			const char *fileName, const FILE_BASIC_INFO * data,
+			const char *fileName, const FILE_BASIC_INFO *data,
 			const struct nls_table *nls_codepage,
 			int remap_special_chars);
 extern int CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon,
-			const FILE_BASIC_INFO * data, __u16 fid);
+			const FILE_BASIC_INFO *data, __u16 fid);
 #if 0
 extern int CIFSSMBSetAttrLegacy(int xid, struct cifsTconInfo *tcon,
 			char *fileName, __u16 dos_attributes,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 9409524e4bf8..30bbe448e260 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1,7 +1,7 @@
 /*
  *   fs/cifs/cifssmb.c
  *
- *   Copyright (C) International Business Machines  Corp., 2002,2007
+ *   Copyright (C) International Business Machines  Corp., 2002,2008
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
  *   Contains the routines for constructing the SMB PDUs themselves
@@ -102,10 +102,12 @@ static void mark_open_files_invalid(struct cifsTconInfo *pTcon)
 	   to this tcon */
 }
 
-/* If the return code is zero, this function must fill in request_buf pointer */
+/* Allocate and return pointer to an SMB request buffer, and set basic
+   SMB information in the SMB header.  If the return code is zero, this
+   function must have filled in request_buf pointer */
 static int
 small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
-	 void **request_buf /* returned */)
+		void **request_buf)
 {
 	int rc = 0;
 
@@ -363,7 +365,7 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
 		*response_buf = *request_buf;
 
 	header_assemble((struct smb_hdr *) *request_buf, smb_command, tcon,
-			wct /*wct */ );
+			wct);
 
 	if (tcon != NULL)
 		cifs_stats_inc(&tcon->num_smbs_sent);
@@ -523,7 +525,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 			if (remain >= (MIN_TZ_ADJ / 2))
 				result += MIN_TZ_ADJ;
 			if (val < 0)
-				result = - result;
+				result = -result;
 			server->timeAdj = result;
 		} else {
 			server->timeAdj = (int)tmp;
@@ -600,7 +602,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 	server->maxBuf = min(le32_to_cpu(pSMBr->MaxBufferSize),
 			(__u32) CIFSMaxBufSize + MAX_CIFS_HDR_SIZE);
 	server->maxRw = le32_to_cpu(pSMBr->MaxRawSize);
-	cFYI(0, ("Max buf = %d", ses->server->maxBuf));
+	cFYI(DBG2, ("Max buf = %d", ses->server->maxBuf));
 	GETU32(ses->server->sessid) = le32_to_cpu(pSMBr->SessionKey);
 	server->capabilities = le32_to_cpu(pSMBr->Capabilities);
 	server->timeAdj = (int)(__s16)le16_to_cpu(pSMBr->ServerTimeZone);
@@ -868,9 +870,8 @@ PsxDelete:
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
-	if (rc) {
+	if (rc)
 		cFYI(1, ("Posix delete returned %d", rc));
-	}
 	cifs_buf_release(pSMB);
 
 	cifs_stats_inc(&tcon->num_deletes);
@@ -916,9 +917,8 @@ DelFileRetry:
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	cifs_stats_inc(&tcon->num_deletes);
-	if (rc) {
+	if (rc)
 		cFYI(1, ("Error in RMFile = %d", rc));
-	}
 
 	cifs_buf_release(pSMB);
 	if (rc == -EAGAIN)
@@ -961,9 +961,8 @@ RmDirRetry:
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	cifs_stats_inc(&tcon->num_rmdirs);
-	if (rc) {
+	if (rc)
 		cFYI(1, ("Error in RMDir = %d", rc));
-	}
 
 	cifs_buf_release(pSMB);
 	if (rc == -EAGAIN)
@@ -1005,9 +1004,8 @@ MkDirRetry:
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	cifs_stats_inc(&tcon->num_mkdirs);
-	if (rc) {
+	if (rc)
 		cFYI(1, ("Error in Mkdir = %d", rc));
-	}
 
 	cifs_buf_release(pSMB);
 	if (rc == -EAGAIN)
@@ -1017,7 +1015,7 @@ MkDirRetry:
 
 int
 CIFSPOSIXCreate(const int xid, struct cifsTconInfo *tcon, __u32 posix_flags,
-		__u64 mode, __u16 * netfid, FILE_UNIX_BASIC_INFO *pRetData,
+		__u64 mode, __u16 *netfid, FILE_UNIX_BASIC_INFO *pRetData,
 		__u32 *pOplock, const char *name,
 		const struct nls_table *nls_codepage, int remap)
 {
@@ -1027,8 +1025,8 @@ CIFSPOSIXCreate(const int xid, struct cifsTconInfo *tcon, __u32 posix_flags,
 	int rc = 0;
 	int bytes_returned = 0;
 	__u16 params, param_offset, offset, byte_count, count;
-	OPEN_PSX_REQ * pdata;
-	OPEN_PSX_RSP * psx_rsp;
+	OPEN_PSX_REQ *pdata;
+	OPEN_PSX_RSP *psx_rsp;
 
 	cFYI(1, ("In POSIX Create"));
 PsxCreat:
@@ -1110,9 +1108,7 @@ PsxCreat:
 	/* check to make sure response data is there */
 	if (psx_rsp->ReturnedLevel != cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC)) {
 		pRetData->Type = cpu_to_le32(-1); /* unknown */
-#ifdef CONFIG_CIFS_DEBUG2
-		cFYI(1, ("unknown type"));
-#endif
+		cFYI(DBG2, ("unknown type"));
 	} else {
 		if (pSMBr->ByteCount < sizeof(OPEN_PSX_RSP)
 					+ sizeof(FILE_UNIX_BASIC_INFO)) {
@@ -1169,8 +1165,8 @@ static __u16 convert_disposition(int disposition)
 int
 SMBLegacyOpen(const int xid, struct cifsTconInfo *tcon,
 	    const char *fileName, const int openDisposition,
-	    const int access_flags, const int create_options, __u16 * netfid,
-	    int *pOplock, FILE_ALL_INFO * pfile_info,
+	    const int access_flags, const int create_options, __u16 *netfid,
+	    int *pOplock, FILE_ALL_INFO *pfile_info,
 	    const struct nls_table *nls_codepage, int remap)
 {
 	int rc = -EACCES;
@@ -1221,8 +1217,8 @@ OldOpenRetry:
 
 	if (create_options & CREATE_OPTION_SPECIAL)
 		pSMB->FileAttributes = cpu_to_le16(ATTR_SYSTEM);
-	else
-                pSMB->FileAttributes = cpu_to_le16(0/*ATTR_NORMAL*/); /* BB FIXME */
+	else /* BB FIXME BB */
+		pSMB->FileAttributes = cpu_to_le16(0/*ATTR_NORMAL*/);
 
 	/* if ((omode & S_IWUGO) == 0)
 		pSMB->FileAttributes |= cpu_to_le32(ATTR_READONLY);*/
@@ -1284,8 +1280,8 @@ OldOpenRetry:
 int
 CIFSSMBOpen(const int xid, struct cifsTconInfo *tcon,
 	    const char *fileName, const int openDisposition,
-	    const int access_flags, const int create_options, __u16 * netfid,
-	    int *pOplock, FILE_ALL_INFO * pfile_info,
+	    const int access_flags, const int create_options, __u16 *netfid,
+	    int *pOplock, FILE_ALL_INFO *pfile_info,
 	    const struct nls_table *nls_codepage, int remap)
 {
 	int rc = -EACCES;
@@ -1556,9 +1552,9 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
 	} /* else setting file size with write of zero bytes */
 	if (wct == 14)
 		byte_count = bytes_sent + 1; /* pad */
-	else /* wct == 12 */ {
+	else /* wct == 12 */
 		byte_count = bytes_sent + 5; /* bigger pad, smaller smb hdr */
-	}
+
 	pSMB->DataLengthLow = cpu_to_le16(bytes_sent & 0xFFFF);
 	pSMB->DataLengthHigh = cpu_to_le16(bytes_sent >> 16);
 	pSMB->hdr.smb_buf_length += byte_count;
@@ -1663,7 +1659,7 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
 		rc = -EIO;
 		*nbytes = 0;
 	} else {
-		WRITE_RSP * pSMBr = (WRITE_RSP *)iov[0].iov_base;
+		WRITE_RSP *pSMBr = (WRITE_RSP *)iov[0].iov_base;
 		*nbytes = le16_to_cpu(pSMBr->CountHigh);
 		*nbytes = (*nbytes) << 16;
 		*nbytes += le16_to_cpu(pSMBr->Count);
@@ -1744,9 +1740,8 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
 		/* SMB buffer freed by function above */
 	}
 	cifs_stats_inc(&tcon->num_locks);
-	if (rc) {
+	if (rc)
 		cFYI(1, ("Send error in Lock = %d", rc));
-	}
 
 	/* Note: On -EAGAIN error only caller can retry on handle based calls
 	since file handle passed in no longer valid */
@@ -1791,7 +1786,7 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
 
 	count = sizeof(struct cifs_posix_lock);
 	pSMB->MaxParameterCount = cpu_to_le16(2);
-	pSMB->MaxDataCount = cpu_to_le16(1000); /* BB find max SMB PDU from sess */
+	pSMB->MaxDataCount = cpu_to_le16(1000); /* BB find max SMB from sess */
 	pSMB->SetupCount = 1;
 	pSMB->Reserved3 = 0;
 	if (get_flag)
@@ -1972,9 +1967,8 @@ renameRetry:
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	cifs_stats_inc(&tcon->num_renames);
-	if (rc) {
+	if (rc)
 		cFYI(1, ("Send error in rename = %d", rc));
-	}
 
 	cifs_buf_release(pSMB);
 
@@ -2016,7 +2010,7 @@ int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon,
 	data_offset = (char *) (&pSMB->hdr.Protocol) + offset;
 	rename_info = (struct set_file_rename *) data_offset;
 	pSMB->MaxParameterCount = cpu_to_le16(2);
-	pSMB->MaxDataCount = cpu_to_le16(1000); /* BB find max SMB PDU from sess */
+	pSMB->MaxDataCount = cpu_to_le16(1000); /* BB find max SMB from sess */
 	pSMB->SetupCount = 1;
 	pSMB->Reserved3 = 0;
 	pSMB->SubCommand = cpu_to_le16(TRANS2_SET_FILE_INFORMATION);
@@ -2052,9 +2046,8 @@ int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon,
 	rc = SendReceive(xid, pTcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	cifs_stats_inc(&pTcon->num_t2renames);
-	if (rc) {
+	if (rc)
 		cFYI(1, ("Send error in Rename (by file handle) = %d", rc));
-	}
 
 	cifs_buf_release(pSMB);
 
@@ -2211,9 +2204,8 @@ createSymLinkRetry:
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	cifs_stats_inc(&tcon->num_symlinks);
-	if (rc) {
+	if (rc)
 		cFYI(1, ("Send error in SetPathInfo create symlink = %d", rc));
-	}
 
 	if (pSMB)
 		cifs_buf_release(pSMB);
@@ -2299,9 +2291,8 @@ createHardLinkRetry:
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	cifs_stats_inc(&tcon->num_hardlinks);
-	if (rc) {
+	if (rc)
 		cFYI(1, ("Send error in SetPathInfo (hard link) = %d", rc));
-	}
 
 	cifs_buf_release(pSMB);
 	if (rc == -EAGAIN)
@@ -2370,9 +2361,9 @@ winCreateHardLinkRetry:
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	cifs_stats_inc(&tcon->num_hardlinks);
-	if (rc) {
+	if (rc)
 		cFYI(1, ("Send error in hard link (NT rename) = %d", rc));
-	}
+
 	cifs_buf_release(pSMB);
 	if (rc == -EAGAIN)
 		goto winCreateHardLinkRetry;
@@ -2968,9 +2959,8 @@ setAclRetry:
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
-	if (rc) {
+	if (rc)
 		cFYI(1, ("Set POSIX ACL returned %d", rc));
-	}
 
 setACLerrorExit:
 	cifs_buf_release(pSMB);
@@ -2982,7 +2972,7 @@ setACLerrorExit:
 /* BB fix tabs in this function FIXME BB */
 int
 CIFSGetExtAttr(const int xid, struct cifsTconInfo *tcon,
-	       const int netfid, __u64 * pExtAttrBits, __u64 *pMask)
+	       const int netfid, __u64 *pExtAttrBits, __u64 *pMask)
 {
 	int rc = 0;
 	struct smb_t2_qfi_req *pSMB = NULL;
@@ -3000,7 +2990,7 @@ GetExtAttrRetry:
 	if (rc)
 		return rc;
 
-	params = 2 /* level */ +2 /* fid */;
+	params = 2 /* level */ + 2 /* fid */;
 	pSMB->t2.TotalDataCount = 0;
 	pSMB->t2.MaxParameterCount = cpu_to_le16(4);
 	/* BB find exact max data count below from sess structure BB */
@@ -3071,7 +3061,7 @@ CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
 {
 	int rc = 0;
 	int buf_type = 0;
-	QUERY_SEC_DESC_REQ * pSMB;
+	QUERY_SEC_DESC_REQ *pSMB;
 	struct kvec iov[1];
 
 	cFYI(1, ("GetCifsACL"));
@@ -3101,7 +3091,7 @@ CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
 	if (rc) {
 		cFYI(1, ("Send error in QuerySecDesc = %d", rc));
 	} else {                /* decode response */
-		__le32 * parm;
+		__le32 *parm;
 		__u32 parm_len;
 		__u32 acl_len;
 		struct smb_com_ntransact_rsp *pSMBr;
@@ -3230,8 +3220,8 @@ int SMBQueryInformation(const int xid, struct cifsTconInfo *tcon,
 			FILE_ALL_INFO *pFinfo,
 			const struct nls_table *nls_codepage, int remap)
 {
-	QUERY_INFORMATION_REQ * pSMB;
-	QUERY_INFORMATION_RSP * pSMBr;
+	QUERY_INFORMATION_REQ *pSMB;
+	QUERY_INFORMATION_RSP *pSMBr;
 	int rc = 0;
 	int bytes_returned;
 	int name_len;
@@ -3263,9 +3253,11 @@ QInfRetry:
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	if (rc) {
 		cFYI(1, ("Send error in QueryInfo = %d", rc));
-	} else if (pFinfo) {            /* decode response */
+	} else if (pFinfo) {
 		struct timespec ts;
 		__u32 time = le32_to_cpu(pSMBr->last_write_time);
+
+		/* decode response */
 		/* BB FIXME - add time zone adjustment BB */
 		memset(pFinfo, 0, sizeof(FILE_ALL_INFO));
 		ts.tv_nsec = 0;
@@ -3296,7 +3288,7 @@ QInfRetry:
 int
 CIFSSMBQPathInfo(const int xid, struct cifsTconInfo *tcon,
 		 const unsigned char *searchName,
-		 FILE_ALL_INFO * pFindData,
+		 FILE_ALL_INFO *pFindData,
 		 int legacy /* old style infolevel */,
 		 const struct nls_table *nls_codepage, int remap)
 {
@@ -3371,10 +3363,12 @@ QPathInfoRetry:
 		else if (pFindData) {
 			int size;
 			__u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
-			if (legacy) /* we do not read the last field, EAsize,
-				       fortunately since it varies by subdialect
-				       and on Set vs. Get, is two bytes or 4
-				       bytes depending but we don't care here */
+
+			/* On legacy responses we do not read the last field,
+			EAsize, fortunately since it varies by subdialect and
+			also note it differs on Set vs. Get, ie two bytes or 4
+			bytes depending but we don't care here */
+			if (legacy)
 				size = sizeof(FILE_INFO_STANDARD);
 			else
 				size = sizeof(FILE_ALL_INFO);
@@ -3476,85 +3470,6 @@ UnixQPathInfoRetry:
 	return rc;
 }
 
-#if 0  /* function unused at present */
-int CIFSFindSingle(const int xid, struct cifsTconInfo *tcon,
-	       const char *searchName, FILE_ALL_INFO * findData,
-	       const struct nls_table *nls_codepage)
-{
-/* level 257 SMB_ */
-	TRANSACTION2_FFIRST_REQ *pSMB = NULL;
-	TRANSACTION2_FFIRST_RSP *pSMBr = NULL;
-	int rc = 0;
-	int bytes_returned;
-	int name_len;
-	__u16 params, byte_count;
-
-	cFYI(1, ("In FindUnique"));
-findUniqueRetry:
-	rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
-		      (void **) &pSMBr);
-	if (rc)
-		return rc;
-
-	if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) {
-		name_len =
-		    cifsConvertToUCS((__le16 *) pSMB->FileName, searchName,
-				     PATH_MAX, nls_codepage);
-		name_len++;	/* trailing null */
-		name_len *= 2;
-	} else {	/* BB improve the check for buffer overruns BB */
-		name_len = strnlen(searchName, PATH_MAX);
-		name_len++;	/* trailing null */
-		strncpy(pSMB->FileName, searchName, name_len);
-	}
-
-	params = 12 + name_len /* includes null */ ;
-	pSMB->TotalDataCount = 0;	/* no EAs */
-	pSMB->MaxParameterCount = cpu_to_le16(2);
-	pSMB->MaxDataCount = cpu_to_le16(4000);	/* BB find exact max SMB PDU from sess structure BB */
-	pSMB->MaxSetupCount = 0;
-	pSMB->Reserved = 0;
-	pSMB->Flags = 0;
-	pSMB->Timeout = 0;
-	pSMB->Reserved2 = 0;
-	pSMB->ParameterOffset = cpu_to_le16(
-	 offsetof(struct smb_com_transaction2_ffirst_req, InformationLevel)-4);
-	pSMB->DataCount = 0;
-	pSMB->DataOffset = 0;
-	pSMB->SetupCount = 1;	/* one byte, no need to le convert */
-	pSMB->Reserved3 = 0;
-	pSMB->SubCommand = cpu_to_le16(TRANS2_FIND_FIRST);
-	byte_count = params + 1 /* pad */ ;
-	pSMB->TotalParameterCount = cpu_to_le16(params);
-	pSMB->ParameterCount = pSMB->TotalParameterCount;
-	pSMB->SearchAttributes =
-	    cpu_to_le16(ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM |
-			ATTR_DIRECTORY);
-	pSMB->SearchCount = cpu_to_le16(16);	/* BB increase */
-	pSMB->SearchFlags = cpu_to_le16(1);
-	pSMB->InformationLevel = cpu_to_le16(SMB_FIND_FILE_DIRECTORY_INFO);
-	pSMB->SearchStorageType = 0;	/* BB what should we set this to? BB */
-	pSMB->hdr.smb_buf_length += byte_count;
-	pSMB->ByteCount = cpu_to_le16(byte_count);
-
-	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
-			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
-
-	if (rc) {
-		cFYI(1, ("Send error in FindFileDirInfo = %d", rc));
-	} else {		/* decode response */
-		cifs_stats_inc(&tcon->num_ffirst);
-		/* BB fill in */
-	}
-
-	cifs_buf_release(pSMB);
-	if (rc == -EAGAIN)
-		goto findUniqueRetry;
-
-	return rc;
-}
-#endif /* end unused (temporarily) function */
-
 /* xid, tcon, searchName and codepage are input parms, rest are returned */
 int
 CIFSFindFirst(const int xid, struct cifsTconInfo *tcon,
@@ -3566,7 +3481,7 @@ CIFSFindFirst(const int xid, struct cifsTconInfo *tcon,
 /* level 257 SMB_ */
 	TRANSACTION2_FFIRST_REQ *pSMB = NULL;
 	TRANSACTION2_FFIRST_RSP *pSMBr = NULL;
-	T2_FFIRST_RSP_PARMS * parms;
+	T2_FFIRST_RSP_PARMS *parms;
 	int rc = 0;
 	int bytes_returned = 0;
 	int name_len;
@@ -3697,7 +3612,7 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon,
 {
 	TRANSACTION2_FNEXT_REQ *pSMB = NULL;
 	TRANSACTION2_FNEXT_RSP *pSMBr = NULL;
-	T2_FNEXT_RSP_PARMS * parms;
+	T2_FNEXT_RSP_PARMS *parms;
 	char *response_data;
 	int rc = 0;
 	int bytes_returned, name_len;
@@ -3836,9 +3751,9 @@ CIFSFindClose(const int xid, struct cifsTconInfo *tcon,
 	pSMB->FileID = searchHandle;
 	pSMB->ByteCount = 0;
 	rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
-	if (rc) {
+	if (rc)
 		cERROR(1, ("Send error in FindClose = %d", rc));
-	}
+
 	cifs_stats_inc(&tcon->num_fclose);
 
 	/* Since session is dead, search handle closed on server already */
@@ -3851,7 +3766,7 @@ CIFSFindClose(const int xid, struct cifsTconInfo *tcon,
 int
 CIFSGetSrvInodeNumber(const int xid, struct cifsTconInfo *tcon,
 		      const unsigned char *searchName,
-		      __u64 * inode_number,
+		      __u64 *inode_number,
 		      const struct nls_table *nls_codepage, int remap)
 {
 	int rc = 0;
@@ -4560,9 +4475,8 @@ SETFSUnixRetry:
 		cERROR(1, ("Send error in SETFSUnixInfo = %d", rc));
 	} else {		/* decode response */
 		rc = validate_t2((struct smb_t2_rsp *)pSMBr);
-		if (rc) {
+		if (rc)
 			rc = -EIO;	/* bad smb */
-		}
 	}
 	cifs_buf_release(pSMB);
 
@@ -4744,9 +4658,8 @@ SetEOFRetry:
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
-	if (rc) {
+	if (rc)
 		cFYI(1, ("SetPathInfo (file size) returned %d", rc));
-	}
 
 	cifs_buf_release(pSMB);
 
@@ -4897,9 +4810,8 @@ CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon,
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 	memcpy(data_offset, data, sizeof(FILE_BASIC_INFO));
 	rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
-	if (rc) {
+	if (rc)
 		cFYI(1, ("Send error in Set Time (SetFileInfo) = %d", rc));
-	}
 
 	/* Note: On -EAGAIN error only caller can retry on handle based calls
 		since file handle passed in no longer valid */
@@ -4975,9 +4887,8 @@ SetTimesRetry:
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
-	if (rc) {
+	if (rc)
 		cFYI(1, ("SetPathInfo (times) returned %d", rc));
-	}
 
 	cifs_buf_release(pSMB);
 
@@ -5027,9 +4938,8 @@ SetAttrLgcyRetry:
 	pSMB->ByteCount = cpu_to_le16(name_len + 1);
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
-	if (rc) {
+	if (rc)
 		cFYI(1, ("Error in LegacySetAttr = %d", rc));
-	}
 
 	cifs_buf_release(pSMB);
 
@@ -5138,9 +5048,8 @@ setPermsRetry:
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
-	if (rc) {
+	if (rc)
 		cFYI(1, ("SetPathInfo (perms) returned %d", rc));
-	}
 
 	if (pSMB)
 		cifs_buf_release(pSMB);
@@ -5615,9 +5524,8 @@ SetEARetry:
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
-	if (rc) {
+	if (rc)
 		cFYI(1, ("SetPathInfo (EA) returned %d", rc));
-	}
 
 	cifs_buf_release(pSMB);
 
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 65d0ba72e78f..8dbfa97cd18c 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1722,8 +1722,15 @@ void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon,
 			   originally at mount time */
 			if ((saved_cap & CIFS_UNIX_POSIX_ACL_CAP) == 0)
 				cap &= ~CIFS_UNIX_POSIX_ACL_CAP;
-			if ((saved_cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) == 0)
+			if ((saved_cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) == 0) {
+				if (cap & CIFS_UNIX_POSIX_PATHNAMES_CAP)
+					cERROR(1, ("POSIXPATH support change"));
 				cap &= ~CIFS_UNIX_POSIX_PATHNAMES_CAP;
+			} else if ((cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) == 0) {
+				cERROR(1, ("possible reconnect error"));
+				cERROR(1,
+					("server disabled POSIX path support"));
+			}
 		}
 
 		cap &= CIFS_UNIX_CAP_MASK;
@@ -1753,9 +1760,8 @@ void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon,
 		if (sb && (CIFS_SB(sb)->rsize > 127 * 1024)) {
 			if ((cap & CIFS_UNIX_LARGE_READ_CAP) == 0) {
 				CIFS_SB(sb)->rsize = 127 * 1024;
-#ifdef CONFIG_CIFS_DEBUG2
-				cFYI(1, ("larger reads not supported by srv"));
-#endif
+				cFYI(DBG2,
+					("larger reads not supported by srv"));
 			}
 		}
 
@@ -1792,6 +1798,26 @@ void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon,
 	}
 }
 
+static void
+convert_delimiter(char *path, char delim)
+{
+	int i;
+	char old_delim;
+
+	if (path == NULL)
+		return;
+
+	if (delim == '/') 
+		old_delim = '\\';
+	else
+		old_delim = '/';
+
+	for (i = 0; path[i] != '\0'; i++) {
+		if (path[i] == old_delim)
+			path[i] = delim;
+	}
+}
+
 int
 cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 	   char *mount_data, const char *devname)
@@ -2057,7 +2083,11 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 		cifs_sb->prepath = volume_info.prepath;
 		if (cifs_sb->prepath) {
 			cifs_sb->prepathlen = strlen(cifs_sb->prepath);
-			cifs_sb->prepath[0] = CIFS_DIR_SEP(cifs_sb);
+			/* we can not convert the / to \ in the path
+			separators in the prefixpath yet because we do not
+			know (until reset_cifs_unix_caps is called later)
+			whether POSIX PATH CAP is available. We normalize
+			the / to \ after reset_cifs_unix_caps is called */
 			volume_info.prepath = NULL;
 		} else
 			cifs_sb->prepathlen = 0;
@@ -2225,11 +2255,15 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 		else
 			tcon->unix_ext = 0; /* server does not support them */
 
+		/* convert forward to back slashes in prepath here if needed */
+		if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) == 0)
+			convert_delimiter(cifs_sb->prepath,
+					  CIFS_DIR_SEP(cifs_sb));
+
 		if ((tcon->unix_ext == 0) && (cifs_sb->rsize > (1024 * 127))) {
 			cifs_sb->rsize = 1024 * 127;
-#ifdef CONFIG_CIFS_DEBUG2
-			cFYI(1, ("no very large read support, rsize now 127K"));
-#endif
+			cFYI(DBG2,
+				("no very large read support, rsize now 127K"));
 		}
 		if (!(tcon->ses->capabilities & CAP_LARGE_WRITE_X))
 			cifs_sb->wsize = min(cifs_sb->wsize,
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 699ec1198409..4e83b47c4b34 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -3,7 +3,7 @@
  *
  *   vfs operations that deal with dentries
  *
- *   Copyright (C) International Business Machines  Corp., 2002,2007
+ *   Copyright (C) International Business Machines  Corp., 2002,2008
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
  *   This library is free software; you can redistribute it and/or modify
@@ -111,16 +111,6 @@ cifs_bp_rename_retry:
 	return full_path;
 }
 
-/* char * build_wildcard_path_from_dentry(struct dentry *direntry)
-{
-	if(full_path == NULL)
-		return full_path;
-
-	full_path[namelen] = '\\';
-	full_path[namelen+1] = '*';
-	full_path[namelen+2] = 0;
-BB remove above eight lines BB */
-
 /* Inode operations in similar order to how they appear in Linux file fs.h */
 
 int
@@ -171,9 +161,8 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
 			disposition = FILE_OVERWRITE_IF;
 		else if ((oflags & O_CREAT) == O_CREAT)
 			disposition = FILE_OPEN_IF;
-		else {
+		else
 			cFYI(1, ("Create flag not set in create function"));
-		}
 	}
 
 	/* BB add processing to set equivalent of mode - e.g. via CreateX with
@@ -367,7 +356,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) {
 			int oplock = 0;
 			u16 fileHandle;
-			FILE_ALL_INFO * buf;
+			FILE_ALL_INFO *buf;
 
 			cFYI(1, ("sfu compat create special file"));
 
@@ -534,9 +523,8 @@ cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd)
 	int isValid = 1;
 
 	if (direntry->d_inode) {
-		if (cifs_revalidate(direntry)) {
+		if (cifs_revalidate(direntry))
 			return 0;
-		}
 	} else {
 		cFYI(1, ("neg dentry 0x%p name = %s",
 			 direntry, direntry->d_name.name));
diff --git a/fs/cifs/dns_resolve.h b/fs/cifs/dns_resolve.h
index 073fdc3db419..966e9288930b 100644
--- a/fs/cifs/dns_resolve.h
+++ b/fs/cifs/dns_resolve.h
@@ -1,7 +1,7 @@
 /*
  *   fs/cifs/dns_resolve.h -- DNS Resolver upcall management for CIFS DFS
  *                            Handles host name to IP address resolution
- * 
+ *
  *   Copyright (c) International Business Machines  Corp., 2008
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
diff --git a/fs/cifs/fcntl.c b/fs/cifs/fcntl.c
index 995474c90885..7d1d5aa4c430 100644
--- a/fs/cifs/fcntl.c
+++ b/fs/cifs/fcntl.c
@@ -35,9 +35,8 @@ static __u32 convert_to_cifs_notify_flags(unsigned long fcntl_notify_flags)
 
 	/* No way on Linux VFS to ask to monitor xattr
 	changes (and no stream support either */
-	if (fcntl_notify_flags & DN_ACCESS) {
+	if (fcntl_notify_flags & DN_ACCESS)
 		cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_ACCESS;
-	}
 	if (fcntl_notify_flags & DN_MODIFY) {
 		/* What does this mean on directories? */
 		cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_WRITE |
@@ -47,9 +46,8 @@ static __u32 convert_to_cifs_notify_flags(unsigned long fcntl_notify_flags)
 		cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_CREATION |
 			FILE_NOTIFY_CHANGE_LAST_WRITE;
 	}
-	if (fcntl_notify_flags & DN_DELETE) {
+	if (fcntl_notify_flags & DN_DELETE)
 		cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_WRITE;
-	}
 	if (fcntl_notify_flags & DN_RENAME) {
 		/* BB review this - checking various server behaviors */
 		cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_DIR_NAME |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 5f7c374ae89c..fa849c91d323 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -353,9 +353,9 @@ static int cifs_reopen_file(struct file *file, int can_flush)
 	int disposition = FILE_OPEN;
 	__u16 netfid;
 
-	if (file->private_data) {
+	if (file->private_data)
 		pCifsFile = (struct cifsFileInfo *)file->private_data;
-	} else
+	else
 		return -EBADF;
 
 	xid = GetXid();
@@ -499,9 +499,8 @@ int cifs_close(struct inode *inode, struct file *file)
 					the struct would be in each open file,
 					but this should give enough time to
 					clear the socket */
-#ifdef CONFIG_CIFS_DEBUG2
-					cFYI(1, ("close delay, write pending"));
-#endif /* DEBUG2 */
+					cFYI(DBG2,
+						("close delay, write pending"));
 					msleep(timeout);
 					timeout *= 4;
 				}
@@ -1423,9 +1422,8 @@ static int cifs_writepage(struct page *page, struct writeback_control *wbc)
 	xid = GetXid();
 /* BB add check for wbc flags */
 	page_cache_get(page);
-	if (!PageUptodate(page)) {
+	if (!PageUptodate(page))
 		cFYI(1, ("ppw - page not up to date"));
-	}
 
 	/*
 	 * Set the "writeback" flag, and clear "dirty" in the radix tree.
@@ -1460,9 +1458,9 @@ static int cifs_commit_write(struct file *file, struct page *page,
 	cFYI(1, ("commit write for page %p up to position %lld for %d",
 		 page, position, to));
 	spin_lock(&inode->i_lock);
-	if (position > inode->i_size) {
+	if (position > inode->i_size)
 		i_size_write(inode, position);
-	}
+
 	spin_unlock(&inode->i_lock);
 	if (!PageUptodate(page)) {
 		position =  ((loff_t)page->index << PAGE_CACHE_SHIFT) + offset;
@@ -1596,9 +1594,9 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data,
 	}
 	open_file = (struct cifsFileInfo *)file->private_data;
 
-	if ((file->f_flags & O_ACCMODE) == O_WRONLY) {
+	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
 		cFYI(1, ("attempting read on write only file instance"));
-	}
+
 	for (total_read = 0, current_offset = read_data;
 	     read_size > total_read;
 	     total_read += bytes_read, current_offset += bytes_read) {
@@ -1625,9 +1623,8 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data,
 						smb_read_data +
 						4 /* RFC1001 length field */ +
 						le16_to_cpu(pSMBr->DataOffset),
-						bytes_read)) {
+						bytes_read))
 					rc = -EFAULT;
-				}
 
 				if (buf_type == CIFS_SMALL_BUFFER)
 					cifs_small_buf_release(smb_read_data);
@@ -1814,9 +1811,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 	pTcon = cifs_sb->tcon;
 
 	pagevec_init(&lru_pvec, 0);
-#ifdef CONFIG_CIFS_DEBUG2
-		cFYI(1, ("rpages: num pages %d", num_pages));
-#endif
+		cFYI(DBG2, ("rpages: num pages %d", num_pages));
 	for (i = 0; i < num_pages; ) {
 		unsigned contig_pages;
 		struct page *tmp_page;
@@ -1849,10 +1844,8 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 		/* Read size needs to be in multiples of one page */
 		read_size = min_t(const unsigned int, read_size,
 				  cifs_sb->rsize & PAGE_CACHE_MASK);
-#ifdef CONFIG_CIFS_DEBUG2
-		cFYI(1, ("rpages: read size 0x%x  contiguous pages %d",
+		cFYI(DBG2, ("rpages: read size 0x%x  contiguous pages %d",
 				read_size, contig_pages));
-#endif
 		rc = -EAGAIN;
 		while (rc == -EAGAIN) {
 			if ((open_file->invalidHandle) &&
@@ -2026,7 +2019,7 @@ int is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
 		struct cifs_sb_info *cifs_sb;
 
 		cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
-		if ( cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO ) {
+		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
 			/* since no page cache to corrupt on directio
 			we can change size safely */
 			return 1;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index b1a4a65eaa08..24eb4d392155 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -29,6 +29,130 @@
 #include "cifs_debug.h"
 #include "cifs_fs_sb.h"
 
+
+static void cifs_set_ops(struct inode *inode)
+{
+	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+
+	switch (inode->i_mode & S_IFMT) {
+	case S_IFREG:
+		inode->i_op = &cifs_file_inode_ops;
+		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
+			if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
+				inode->i_fop = &cifs_file_direct_nobrl_ops;
+			else
+				inode->i_fop = &cifs_file_direct_ops;
+		} else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
+			inode->i_fop = &cifs_file_nobrl_ops;
+		else { /* not direct, send byte range locks */
+			inode->i_fop = &cifs_file_ops;
+		}
+
+
+		/* check if server can support readpages */
+		if (cifs_sb->tcon->ses->server->maxBuf <
+				PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE)
+			inode->i_data.a_ops = &cifs_addr_ops_smallbuf;
+		else
+			inode->i_data.a_ops = &cifs_addr_ops;
+		break;
+	case S_IFDIR:
+		inode->i_op = &cifs_dir_inode_ops;
+		inode->i_fop = &cifs_dir_ops;
+		break;
+	case S_IFLNK:
+		inode->i_op = &cifs_symlink_inode_ops;
+		break;
+	default:
+		init_special_inode(inode, inode->i_mode, inode->i_rdev);
+		break;
+	}
+}
+
+static void cifs_unix_info_to_inode(struct inode *inode,
+		FILE_UNIX_BASIC_INFO *info, int force_uid_gid)
+{
+	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+	struct cifsInodeInfo *cifsInfo = CIFS_I(inode);
+	__u64 num_of_bytes = le64_to_cpu(info->NumOfBytes);
+	__u64 end_of_file = le64_to_cpu(info->EndOfFile);
+
+	inode->i_atime = cifs_NTtimeToUnix(le64_to_cpu(info->LastAccessTime));
+	inode->i_mtime =
+		cifs_NTtimeToUnix(le64_to_cpu(info->LastModificationTime));
+	inode->i_ctime = cifs_NTtimeToUnix(le64_to_cpu(info->LastStatusChange));
+	inode->i_mode = le64_to_cpu(info->Permissions);
+
+	/*
+	 * Since we set the inode type below we need to mask off
+	 * to avoid strange results if bits set above.
+	 */
+	inode->i_mode &= ~S_IFMT;
+	switch (le32_to_cpu(info->Type)) {
+	case UNIX_FILE:
+		inode->i_mode |= S_IFREG;
+		break;
+	case UNIX_SYMLINK:
+		inode->i_mode |= S_IFLNK;
+		break;
+	case UNIX_DIR:
+		inode->i_mode |= S_IFDIR;
+		break;
+	case UNIX_CHARDEV:
+		inode->i_mode |= S_IFCHR;
+		inode->i_rdev = MKDEV(le64_to_cpu(info->DevMajor),
+				      le64_to_cpu(info->DevMinor) & MINORMASK);
+		break;
+	case UNIX_BLOCKDEV:
+		inode->i_mode |= S_IFBLK;
+		inode->i_rdev = MKDEV(le64_to_cpu(info->DevMajor),
+				      le64_to_cpu(info->DevMinor) & MINORMASK);
+		break;
+	case UNIX_FIFO:
+		inode->i_mode |= S_IFIFO;
+		break;
+	case UNIX_SOCKET:
+		inode->i_mode |= S_IFSOCK;
+		break;
+	default:
+		/* safest to call it a file if we do not know */
+		inode->i_mode |= S_IFREG;
+		cFYI(1, ("unknown type %d", le32_to_cpu(info->Type)));
+		break;
+	}
+
+	if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) &&
+	    !force_uid_gid)
+		inode->i_uid = cifs_sb->mnt_uid;
+	else
+		inode->i_uid = le64_to_cpu(info->Uid);
+
+	if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) &&
+	    !force_uid_gid)
+		inode->i_gid = cifs_sb->mnt_gid;
+	else
+		inode->i_gid = le64_to_cpu(info->Gid);
+
+	inode->i_nlink = le64_to_cpu(info->Nlinks);
+
+	spin_lock(&inode->i_lock);
+	if (is_size_safe_to_change(cifsInfo, end_of_file)) {
+		/*
+		 * We can not safely change the file size here if the client
+		 * is writing to it due to potential races.
+		 */
+		i_size_write(inode, end_of_file);
+
+		/*
+		 * i_blocks is not related to (i_size / i_blksize),
+		 * but instead 512 byte (2**9) size is required for
+		 * calculating num blocks.
+		 */
+		inode->i_blocks = (512 - 1 + num_of_bytes) >> 9;
+	}
+	spin_unlock(&inode->i_lock);
+}
+
 int cifs_get_inode_info_unix(struct inode **pinode,
 	const unsigned char *search_path, struct super_block *sb, int xid)
 {
@@ -74,7 +198,6 @@ int cifs_get_inode_info_unix(struct inode **pinode,
 		}
 	} else {
 		struct cifsInodeInfo *cifsInfo;
-		__u32 type = le32_to_cpu(findData.Type);
 		__u64 num_of_bytes = le64_to_cpu(findData.NumOfBytes);
 		__u64 end_of_file = le64_to_cpu(findData.EndOfFile);
 
@@ -105,112 +228,16 @@ int cifs_get_inode_info_unix(struct inode **pinode,
 		/* this is ok to set on every inode revalidate */
 		atomic_set(&cifsInfo->inUse, 1);
 
-		inode->i_atime =
-		    cifs_NTtimeToUnix(le64_to_cpu(findData.LastAccessTime));
-		inode->i_mtime =
-		    cifs_NTtimeToUnix(le64_to_cpu
-				(findData.LastModificationTime));
-		inode->i_ctime =
-		    cifs_NTtimeToUnix(le64_to_cpu(findData.LastStatusChange));
-		inode->i_mode = le64_to_cpu(findData.Permissions);
-		/* since we set the inode type below we need to mask off
-		   to avoid strange results if bits set above */
-		inode->i_mode &= ~S_IFMT;
-		if (type == UNIX_FILE) {
-			inode->i_mode |= S_IFREG;
-		} else if (type == UNIX_SYMLINK) {
-			inode->i_mode |= S_IFLNK;
-		} else if (type == UNIX_DIR) {
-			inode->i_mode |= S_IFDIR;
-		} else if (type == UNIX_CHARDEV) {
-			inode->i_mode |= S_IFCHR;
-			inode->i_rdev = MKDEV(le64_to_cpu(findData.DevMajor),
-				le64_to_cpu(findData.DevMinor) & MINORMASK);
-		} else if (type == UNIX_BLOCKDEV) {
-			inode->i_mode |= S_IFBLK;
-			inode->i_rdev = MKDEV(le64_to_cpu(findData.DevMajor),
-				le64_to_cpu(findData.DevMinor) & MINORMASK);
-		} else if (type == UNIX_FIFO) {
-			inode->i_mode |= S_IFIFO;
-		} else if (type == UNIX_SOCKET) {
-			inode->i_mode |= S_IFSOCK;
-		} else {
-			/* safest to call it a file if we do not know */
-			inode->i_mode |= S_IFREG;
-			cFYI(1, ("unknown type %d", type));
-		}
+		cifs_unix_info_to_inode(inode, &findData, 0);
 
-		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)
-			inode->i_uid = cifs_sb->mnt_uid;
-		else
-			inode->i_uid = le64_to_cpu(findData.Uid);
-
-		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)
-			inode->i_gid = cifs_sb->mnt_gid;
-		else
-			inode->i_gid = le64_to_cpu(findData.Gid);
-
-		inode->i_nlink = le64_to_cpu(findData.Nlinks);
-
-		spin_lock(&inode->i_lock);
-		if (is_size_safe_to_change(cifsInfo, end_of_file)) {
-		/* can not safely change the file size here if the
-		   client is writing to it due to potential races */
-			i_size_write(inode, end_of_file);
-
-		/* blksize needs to be multiple of two. So safer to default to
-		blksize and blkbits set in superblock so 2**blkbits and blksize
-		will match rather than setting to:
-		(pTcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE) & 0xFFFFFE00;*/
-
-		/* This seems incredibly stupid but it turns out that i_blocks
-		   is not related to (i_size / i_blksize), instead 512 byte size
-		   is required for calculating num blocks */
-
-		/* 512 bytes (2**9) is the fake blocksize that must be used */
-		/* for this calculation */
-			inode->i_blocks = (512 - 1 + num_of_bytes) >> 9;
-		}
-		spin_unlock(&inode->i_lock);
 
 		if (num_of_bytes < end_of_file)
 			cFYI(1, ("allocation size less than end of file"));
 		cFYI(1, ("Size %ld and blocks %llu",
 			(unsigned long) inode->i_size,
 			(unsigned long long)inode->i_blocks));
-		if (S_ISREG(inode->i_mode)) {
-			cFYI(1, ("File inode"));
-			inode->i_op = &cifs_file_inode_ops;
-			if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
-				if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
-					inode->i_fop =
-						&cifs_file_direct_nobrl_ops;
-				else
-					inode->i_fop = &cifs_file_direct_ops;
-			} else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
-				inode->i_fop = &cifs_file_nobrl_ops;
-			else /* not direct, send byte range locks */
-				inode->i_fop = &cifs_file_ops;
-
-			/* check if server can support readpages */
-			if (pTcon->ses->server->maxBuf <
-			    PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE)
-				inode->i_data.a_ops = &cifs_addr_ops_smallbuf;
-			else
-				inode->i_data.a_ops = &cifs_addr_ops;
-		} else if (S_ISDIR(inode->i_mode)) {
-			cFYI(1, ("Directory inode"));
-			inode->i_op = &cifs_dir_inode_ops;
-			inode->i_fop = &cifs_dir_ops;
-		} else if (S_ISLNK(inode->i_mode)) {
-			cFYI(1, ("Symbolic Link inode"));
-			inode->i_op = &cifs_symlink_inode_ops;
-		/* tmp_inode->i_fop = */ /* do not need to set to anything */
-		} else {
-			cFYI(1, ("Init special inode"));
-			init_special_inode(inode, inode->i_mode,
-					   inode->i_rdev);
-		}
+
+		cifs_set_ops(inode);
 	}
 	return rc;
 }
@@ -490,9 +517,9 @@ int cifs_get_inode_info(struct inode **pinode,
 			if (decode_sfu_inode(inode,
 					 le64_to_cpu(pfindData->EndOfFile),
 					 search_path,
-					 cifs_sb, xid)) {
+					 cifs_sb, xid))
 				cFYI(1, ("Unrecognized sfu inode type"));
-			}
+
 			cFYI(1, ("sfu mode 0%o", inode->i_mode));
 		} else {
 			inode->i_mode |= S_IFREG;
@@ -546,36 +573,7 @@ int cifs_get_inode_info(struct inode **pinode,
 			atomic_set(&cifsInfo->inUse, 1);
 		}
 
-		if (S_ISREG(inode->i_mode)) {
-			cFYI(1, ("File inode"));
-			inode->i_op = &cifs_file_inode_ops;
-			if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
-				if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
-					inode->i_fop =
-						&cifs_file_direct_nobrl_ops;
-				else
-					inode->i_fop = &cifs_file_direct_ops;
-			} else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
-				inode->i_fop = &cifs_file_nobrl_ops;
-			else /* not direct, send byte range locks */
-				inode->i_fop = &cifs_file_ops;
-
-			if (pTcon->ses->server->maxBuf <
-			     PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE)
-				inode->i_data.a_ops = &cifs_addr_ops_smallbuf;
-			else
-				inode->i_data.a_ops = &cifs_addr_ops;
-		} else if (S_ISDIR(inode->i_mode)) {
-			cFYI(1, ("Directory inode"));
-			inode->i_op = &cifs_dir_inode_ops;
-			inode->i_fop = &cifs_dir_ops;
-		} else if (S_ISLNK(inode->i_mode)) {
-			cFYI(1, ("Symbolic Link inode"));
-			inode->i_op = &cifs_symlink_inode_ops;
-		} else {
-			init_special_inode(inode, inode->i_mode,
-					   inode->i_rdev);
-		}
+		cifs_set_ops(inode);
 	}
 	kfree(buf);
 	return rc;
@@ -792,17 +790,12 @@ psx_del_no_retry:
 }
 
 static void posix_fill_in_inode(struct inode *tmp_inode,
-	FILE_UNIX_BASIC_INFO *pData, int *pobject_type, int isNewInode)
+	FILE_UNIX_BASIC_INFO *pData, int isNewInode)
 {
+	struct cifsInodeInfo *cifsInfo = CIFS_I(tmp_inode);
 	loff_t local_size;
 	struct timespec local_mtime;
 
-	struct cifsInodeInfo *cifsInfo = CIFS_I(tmp_inode);
-	struct cifs_sb_info *cifs_sb = CIFS_SB(tmp_inode->i_sb);
-
-	__u32 type = le32_to_cpu(pData->Type);
-	__u64 num_of_bytes = le64_to_cpu(pData->NumOfBytes);
-	__u64 end_of_file = le64_to_cpu(pData->EndOfFile);
 	cifsInfo->time = jiffies;
 	atomic_inc(&cifsInfo->inUse);
 
@@ -810,115 +803,27 @@ static void posix_fill_in_inode(struct inode *tmp_inode,
 	local_mtime = tmp_inode->i_mtime;
 	local_size  = tmp_inode->i_size;
 
-	tmp_inode->i_atime =
-	    cifs_NTtimeToUnix(le64_to_cpu(pData->LastAccessTime));
-	tmp_inode->i_mtime =
-	    cifs_NTtimeToUnix(le64_to_cpu(pData->LastModificationTime));
-	tmp_inode->i_ctime =
-	    cifs_NTtimeToUnix(le64_to_cpu(pData->LastStatusChange));
-
-	tmp_inode->i_mode = le64_to_cpu(pData->Permissions);
-	/* since we set the inode type below we need to mask off type
-	   to avoid strange results if bits above were corrupt */
-	tmp_inode->i_mode &= ~S_IFMT;
-	if (type == UNIX_FILE) {
-		*pobject_type = DT_REG;
-		tmp_inode->i_mode |= S_IFREG;
-	} else if (type == UNIX_SYMLINK) {
-		*pobject_type = DT_LNK;
-		tmp_inode->i_mode |= S_IFLNK;
-	} else if (type == UNIX_DIR) {
-		*pobject_type = DT_DIR;
-		tmp_inode->i_mode |= S_IFDIR;
-	} else if (type == UNIX_CHARDEV) {
-		*pobject_type = DT_CHR;
-		tmp_inode->i_mode |= S_IFCHR;
-		tmp_inode->i_rdev = MKDEV(le64_to_cpu(pData->DevMajor),
-				le64_to_cpu(pData->DevMinor) & MINORMASK);
-	} else if (type == UNIX_BLOCKDEV) {
-		*pobject_type = DT_BLK;
-		tmp_inode->i_mode |= S_IFBLK;
-		tmp_inode->i_rdev = MKDEV(le64_to_cpu(pData->DevMajor),
-				le64_to_cpu(pData->DevMinor) & MINORMASK);
-	} else if (type == UNIX_FIFO) {
-		*pobject_type = DT_FIFO;
-		tmp_inode->i_mode |= S_IFIFO;
-	} else if (type == UNIX_SOCKET) {
-		*pobject_type = DT_SOCK;
-		tmp_inode->i_mode |= S_IFSOCK;
-	} else {
-		/* safest to just call it a file */
-		*pobject_type = DT_REG;
-		tmp_inode->i_mode |= S_IFREG;
-		cFYI(1, ("unknown inode type %d", type));
-	}
-
-#ifdef CONFIG_CIFS_DEBUG2
-	cFYI(1, ("object type: %d", type));
-#endif
-	tmp_inode->i_uid = le64_to_cpu(pData->Uid);
-	tmp_inode->i_gid = le64_to_cpu(pData->Gid);
-	tmp_inode->i_nlink = le64_to_cpu(pData->Nlinks);
-
-	spin_lock(&tmp_inode->i_lock);
-	if (is_size_safe_to_change(cifsInfo, end_of_file)) {
-		/* can not safely change the file size here if the
-		client is writing to it due to potential races */
-		i_size_write(tmp_inode, end_of_file);
+	cifs_unix_info_to_inode(tmp_inode, pData, 1);
+	cifs_set_ops(tmp_inode);
 
-	/* 512 bytes (2**9) is the fake blocksize that must be used */
-	/* for this calculation, not the real blocksize */
-		tmp_inode->i_blocks = (512 - 1 + num_of_bytes) >> 9;
-	}
-	spin_unlock(&tmp_inode->i_lock);
-
-	if (S_ISREG(tmp_inode->i_mode)) {
-		cFYI(1, ("File inode"));
-		tmp_inode->i_op = &cifs_file_inode_ops;
-
-		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
-			if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
-				tmp_inode->i_fop = &cifs_file_direct_nobrl_ops;
-			else
-				tmp_inode->i_fop = &cifs_file_direct_ops;
+	if (!S_ISREG(tmp_inode->i_mode))
+		return;
 
-		} else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
-			tmp_inode->i_fop = &cifs_file_nobrl_ops;
-		else
-			tmp_inode->i_fop = &cifs_file_ops;
-
-		if ((cifs_sb->tcon) && (cifs_sb->tcon->ses) &&
-		   (cifs_sb->tcon->ses->server->maxBuf <
-			PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE))
-			tmp_inode->i_data.a_ops = &cifs_addr_ops_smallbuf;
-		else
-			tmp_inode->i_data.a_ops = &cifs_addr_ops;
-
-		if (isNewInode)
-			return; /* No sense invalidating pages for new inode
-				   since we we have not started caching
-				   readahead file data yet */
+	/*
+	 * No sense invalidating pages for new inode
+	 * since we we have not started caching
+	 * readahead file data yet.
+	 */
+	if (isNewInode)
+		return;
 
-		if (timespec_equal(&tmp_inode->i_mtime, &local_mtime) &&
-			(local_size == tmp_inode->i_size)) {
-			cFYI(1, ("inode exists but unchanged"));
-		} else {
-			/* file may have changed on server */
-			cFYI(1, ("invalidate inode, readdir detected change"));
-			invalidate_remote_inode(tmp_inode);
-		}
-	} else if (S_ISDIR(tmp_inode->i_mode)) {
-		cFYI(1, ("Directory inode"));
-		tmp_inode->i_op = &cifs_dir_inode_ops;
-		tmp_inode->i_fop = &cifs_dir_ops;
-	} else if (S_ISLNK(tmp_inode->i_mode)) {
-		cFYI(1, ("Symbolic Link inode"));
-		tmp_inode->i_op = &cifs_symlink_inode_ops;
-/* tmp_inode->i_fop = *//* do not need to set to anything */
+	if (timespec_equal(&tmp_inode->i_mtime, &local_mtime) &&
+		(local_size == tmp_inode->i_size)) {
+		cFYI(1, ("inode exists but unchanged"));
 	} else {
-		cFYI(1, ("Special inode"));
-		init_special_inode(tmp_inode, tmp_inode->i_mode,
-				   tmp_inode->i_rdev);
+		/* file may have changed on server */
+		cFYI(1, ("invalidate inode, readdir detected change"));
+		invalidate_remote_inode(tmp_inode);
 	}
 }
 
@@ -968,7 +873,6 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
 			cFYI(1, ("posix mkdir returned 0x%x", rc));
 			d_drop(direntry);
 		} else {
-			int obj_type;
 			if (pInfo->Type == cpu_to_le32(-1)) {
 				/* no return info, go query for it */
 				kfree(pInfo);
@@ -1004,7 +908,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
 			/* we already checked in POSIXCreate whether
 			   frame was long enough */
 			posix_fill_in_inode(direntry->d_inode,
-					pInfo, &obj_type, 1 /* NewInode */);
+					pInfo, 1 /* NewInode */);
 #ifdef CONFIG_CIFS_DEBUG2
 			cFYI(1, ("instantiated dentry %p %s to inode %p",
 				direntry, direntry->d_name.name, newinode));
@@ -1214,9 +1118,8 @@ int cifs_rename(struct inode *source_inode, struct dentry *source_direntry,
 		} /* if we can not get memory just leave rc as EEXIST */
 	}
 
-	if (rc) {
+	if (rc)
 		cFYI(1, ("rename rc %d", rc));
-	}
 
 	if ((rc == -EIO) || (rc == -EEXIST)) {
 		int oplock = FALSE;
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index d24fe6880a04..5c792df13d62 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -30,7 +30,7 @@
 
 #define CIFS_IOC_CHECKUMOUNT _IO(0xCF, 2)
 
-int cifs_ioctl (struct inode *inode, struct file *filep,
+int cifs_ioctl(struct inode *inode, struct file *filep,
 		unsigned int command, unsigned long arg)
 {
 	int rc = -ENOTTY; /* strange error - but the precedent */
diff --git a/fs/cifs/md4.c b/fs/cifs/md4.c
index a2415c1a14db..a725c2609d67 100644
--- a/fs/cifs/md4.c
+++ b/fs/cifs/md4.c
@@ -56,7 +56,7 @@ lshift(__u32 x, int s)
 
 /* this applies md4 to 64 byte chunks */
 static void
-mdfour64(__u32 * M, __u32 * A, __u32 *B, __u32 * C, __u32 *D)
+mdfour64(__u32 *M, __u32 *A, __u32 *B, __u32 *C, __u32 *D)
 {
 	int j;
 	__u32 AA, BB, CC, DD;
@@ -137,7 +137,7 @@ mdfour64(__u32 * M, __u32 * A, __u32 *B, __u32 * C, __u32 *D)
 }
 
 static void
-copy64(__u32 * M, unsigned char *in)
+copy64(__u32 *M, unsigned char *in)
 {
 	int i;
 
diff --git a/fs/cifs/md5.c b/fs/cifs/md5.c
index f13f96d42fcf..462bbfefd4b6 100644
--- a/fs/cifs/md5.c
+++ b/fs/cifs/md5.c
@@ -161,7 +161,7 @@ MD5Final(unsigned char digest[16], struct MD5Context *ctx)
 
 /* This is the central step in the MD5 algorithm. */
 #define MD5STEP(f, w, x, y, z, data, s) \
-	( w += f(x, y, z) + data,  w = w<<s | w>>(32-s),  w += x )
+	(w += f(x, y, z) + data,  w = w<<s | w>>(32-s),  w += x)
 
 /*
  * The core of the MD5 algorithm, this alters an existing MD5 hash to
@@ -302,9 +302,8 @@ hmac_md5_init_limK_to_64(const unsigned char *key, int key_len,
 	int i;
 
 	/* if key is longer than 64 bytes truncate it */
-	if (key_len > 64) {
+	if (key_len > 64)
 		key_len = 64;
-	}
 
 	/* start out by storing key in pads */
 	memset(ctx->k_ipad, 0, sizeof(ctx->k_ipad));
@@ -359,9 +358,9 @@ hmac_md5(unsigned char key[16], unsigned char *data, int data_len,
 {
 	struct HMACMD5Context ctx;
 	hmac_md5_init_limK_to_64(key, 16, &ctx);
-	if (data_len != 0) {
+	if (data_len != 0)
 		hmac_md5_update(data, data_len, &ctx);
-	}
+
 	hmac_md5_final(digest, &ctx);
 }
 #endif
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 15546c2354c5..2a42d9fedbb2 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -1,7 +1,7 @@
 /*
  *   fs/cifs/misc.c
  *
- *   Copyright (C) International Business Machines  Corp., 2002,2007
+ *   Copyright (C) International Business Machines  Corp., 2002,2008
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
  *   This library is free software; you can redistribute it and/or modify
@@ -320,9 +320,9 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
 		if (treeCon->ses) {
 			if (treeCon->ses->capabilities & CAP_UNICODE)
 				buffer->Flags2 |= SMBFLG2_UNICODE;
-			if (treeCon->ses->capabilities & CAP_STATUS32) {
+			if (treeCon->ses->capabilities & CAP_STATUS32)
 				buffer->Flags2 |= SMBFLG2_ERR_STATUS;
-			}
+
 			/* Uid is not converted */
 			buffer->Uid = treeCon->ses->Suid;
 			buffer->Mid = GetNextMid(treeCon->ses->server);
@@ -610,7 +610,8 @@ dump_smb(struct smb_hdr *smb_buf, int smb_buf_length)
 
 	buffer = (unsigned char *) smb_buf;
 	for (i = 0, j = 0; i < smb_buf_length; i++, j++) {
-		if (i % 8 == 0) {	/* have reached the beginning of line */
+		if (i % 8 == 0) {
+			/* have reached the beginning of line */
 			printk(KERN_DEBUG "| ");
 			j = 0;
 		}
@@ -621,7 +622,8 @@ dump_smb(struct smb_hdr *smb_buf, int smb_buf_length)
 		else
 			debug_line[1 + (2 * j)] = '_';
 
-		if (i % 8 == 7) { /* reached end of line, time to print ascii */
+		if (i % 8 == 7) {
+			/* reached end of line, time to print ascii */
 			debug_line[16] = 0;
 			printk(" | %s\n", debug_line);
 		}
@@ -631,7 +633,7 @@ dump_smb(struct smb_hdr *smb_buf, int smb_buf_length)
 		debug_line[2 * j] = ' ';
 		debug_line[1 + (2 * j)] = ' ';
 	}
-	printk( " | %s\n", debug_line);
+	printk(" | %s\n", debug_line);
 	return;
 }
 
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 646e1f06941b..3b5a5ce882b6 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -1,7 +1,7 @@
 /*
  *   fs/cifs/netmisc.c
  *
- *   Copyright (c) International Business Machines  Corp., 2002
+ *   Copyright (c) International Business Machines  Corp., 2002,2008
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
  *   Error mapping routines from Samba libsmb/errormap.c
@@ -150,9 +150,7 @@ static int canonicalize_unc(char *cp)
 		if (cp[i] == '\\')
 			break;
 		if (cp[i] == '/') {
-#ifdef CONFIG_CIFS_DEBUG2
-			cFYI(1, ("change slash to backslash in malformed UNC"));
-#endif
+			cFYI(DBG2, ("change slash to \\ in malformed UNC"));
 			cp[i] = '\\';
 			return 1;
 		}
@@ -178,9 +176,7 @@ cifs_inet_pton(int address_family, char *cp, void *dst)
 	} else if (address_family == AF_INET6) {
 		ret = in6_pton(cp, -1 /* len */, dst , '\\', NULL);
 	}
-#ifdef CONFIG_CIFS_DEBUG2
-	cFYI(1, ("address conversion returned %d for %s", ret, cp));
-#endif
+	cFYI(DBG2, ("address conversion returned %d for %s", ret, cp));
 	if (ret > 0)
 		ret = 1;
 	return ret;
@@ -253,7 +249,8 @@ static const struct {
 	ERRDOS, 87, NT_STATUS_INVALID_PARAMETER_MIX}, {
 	ERRHRD, ERRgeneral, NT_STATUS_INVALID_QUOTA_LOWER}, {
 	ERRHRD, ERRgeneral, NT_STATUS_DISK_CORRUPT_ERROR}, {
-	ERRDOS, ERRbadfile, NT_STATUS_OBJECT_NAME_INVALID}, {	/* mapping changed since shell does lookup on * and expects file not found */
+	 /* mapping changed since shell does lookup on * expects FileNotFound */
+	ERRDOS, ERRbadfile, NT_STATUS_OBJECT_NAME_INVALID}, {
 	ERRDOS, ERRbadfile, NT_STATUS_OBJECT_NAME_NOT_FOUND}, {
 	ERRDOS, ERRalreadyexists, NT_STATUS_OBJECT_NAME_COLLISION}, {
 	ERRHRD, ERRgeneral, NT_STATUS_HANDLE_NOT_WAITABLE}, {
@@ -820,7 +817,8 @@ map_smb_to_linux_error(struct smb_hdr *smb, int logErr)
 	/* old style errors */
 
 	/* DOS class smb error codes - map DOS */
-	if (smberrclass == ERRDOS) {  /* 1 byte field no need to byte reverse */
+	if (smberrclass == ERRDOS) {
+		/* 1 byte field no need to byte reverse */
 		for (i = 0;
 		     i <
 		     sizeof(mapping_table_ERRDOS) /
@@ -834,7 +832,8 @@ map_smb_to_linux_error(struct smb_hdr *smb, int logErr)
 			}
 			/* else try next error mapping one to see if match */
 		}
-	} else if (smberrclass == ERRSRV) {   /* server class of error codes */
+	} else if (smberrclass == ERRSRV) {
+		/* server class of error codes */
 		for (i = 0;
 		     i <
 		     sizeof(mapping_table_ERRSRV) /
@@ -922,8 +921,8 @@ struct timespec cnvrtDosUnixTm(__u16 date, __u16 time)
 {
 	struct timespec ts;
 	int sec, min, days, month, year;
-	SMB_TIME * st = (SMB_TIME *)&time;
-	SMB_DATE * sd = (SMB_DATE *)&date;
+	SMB_TIME *st = (SMB_TIME *)&time;
+	SMB_DATE *sd = (SMB_DATE *)&date;
 
 	cFYI(1, ("date %d time %d", date, time));
 
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 0f22def4bdff..32b445edc882 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -3,7 +3,7 @@
  *
  *   Directory search handling
  *
- *   Copyright (C) International Business Machines  Corp., 2004, 2007
+ *   Copyright (C) International Business Machines  Corp., 2004, 2008
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
  *   This library is free software; you can redistribute it and/or modify
@@ -42,17 +42,18 @@ static void dump_cifs_file_struct(struct file *file, char *label)
 			cFYI(1, ("empty cifs private file data"));
 			return;
 		}
-		if (cf->invalidHandle) {
+		if (cf->invalidHandle)
 			cFYI(1, ("invalid handle"));
-		}
-		if (cf->srch_inf.endOfSearch) {
+		if (cf->srch_inf.endOfSearch)
 			cFYI(1, ("end of search"));
-		}
-		if (cf->srch_inf.emptyDir) {
+		if (cf->srch_inf.emptyDir)
 			cFYI(1, ("empty dir"));
-		}
 	}
 }
+#else
+static inline void dump_cifs_file_struct(struct file *file, char *label)
+{
+}
 #endif /* DEBUG2 */
 
 /* Returns one if new inode created (which therefore needs to be hashed) */
@@ -150,7 +151,7 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
 		      cifs_NTtimeToUnix(le64_to_cpu(pfindData->ChangeTime));
 	} else { /* legacy, OS2 and DOS style */
 /*		struct timespec ts;*/
-		FIND_FILE_STANDARD_INFO * pfindData =
+		FIND_FILE_STANDARD_INFO *pfindData =
 			(FIND_FILE_STANDARD_INFO *)buf;
 
 		tmp_inode->i_mtime = cnvrtDosUnixTm(
@@ -198,9 +199,8 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
 	if (attr & ATTR_DIRECTORY) {
 		*pobject_type = DT_DIR;
 		/* override default perms since we do not lock dirs */
-		if (atomic_read(&cifsInfo->inUse) == 0) {
+		if (atomic_read(&cifsInfo->inUse) == 0)
 			tmp_inode->i_mode = cifs_sb->mnt_dir_mode;
-		}
 		tmp_inode->i_mode |= S_IFDIR;
 	} else if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) &&
 		   (attr & ATTR_SYSTEM)) {
@@ -231,9 +231,8 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
 	} /* could add code here - to validate if device or weird share type? */
 
 	/* can not fill in nlink here as in qpathinfo version and Unx search */
-	if (atomic_read(&cifsInfo->inUse) == 0) {
+	if (atomic_read(&cifsInfo->inUse) == 0)
 		atomic_set(&cifsInfo->inUse, 1);
-	}
 
 	spin_lock(&tmp_inode->i_lock);
 	if (is_size_safe_to_change(cifsInfo, end_of_file)) {
@@ -461,9 +460,8 @@ static int initiate_cifs_search(const int xid, struct file *file)
 
 	full_path = build_path_from_dentry(file->f_path.dentry);
 
-	if (full_path == NULL) {
+	if (full_path == NULL)
 		return -ENOMEM;
-	}
 
 	cFYI(1, ("Full path: %s start at: %lld", full_path, file->f_pos));
 
@@ -471,9 +469,9 @@ ffirst_retry:
 	/* test for Unix extensions */
 	/* but now check for them on the share/mount not on the SMB session */
 /*	if (pTcon->ses->capabilities & CAP_UNIX) { */
-	if (pTcon->unix_ext) {
+	if (pTcon->unix_ext)
 		cifsFile->srch_inf.info_level = SMB_FIND_FILE_UNIX;
-	} else if ((pTcon->ses->capabilities &
+	else if ((pTcon->ses->capabilities &
 			(CAP_NT_SMBS | CAP_NT_FIND)) == 0) {
 		cifsFile->srch_inf.info_level = SMB_FIND_FILE_INFO_STANDARD;
 	} else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
@@ -514,10 +512,10 @@ static int cifs_unicode_bytelen(char *str)
 static char *nxt_dir_entry(char *old_entry, char *end_of_smb, int level)
 {
 	char *new_entry;
-	FILE_DIRECTORY_INFO * pDirInfo = (FILE_DIRECTORY_INFO *)old_entry;
+	FILE_DIRECTORY_INFO *pDirInfo = (FILE_DIRECTORY_INFO *)old_entry;
 
 	if (level == SMB_FIND_FILE_INFO_STANDARD) {
-		FIND_FILE_STANDARD_INFO * pfData;
+		FIND_FILE_STANDARD_INFO *pfData;
 		pfData = (FIND_FILE_STANDARD_INFO *)pDirInfo;
 
 		new_entry = old_entry + sizeof(FIND_FILE_STANDARD_INFO) +
@@ -553,7 +551,7 @@ static int cifs_entry_is_dot(char *current_entry, struct cifsFileInfo *cfile)
 	int len = 0;
 
 	if (cfile->srch_inf.info_level == SMB_FIND_FILE_UNIX) {
-		FILE_UNIX_INFO * pFindData = (FILE_UNIX_INFO *)current_entry;
+		FILE_UNIX_INFO *pFindData = (FILE_UNIX_INFO *)current_entry;
 		filename = &pFindData->FileName[0];
 		if (cfile->srch_inf.unicode) {
 			len = cifs_unicode_bytelen(filename);
@@ -562,30 +560,30 @@ static int cifs_entry_is_dot(char *current_entry, struct cifsFileInfo *cfile)
 			len = strnlen(filename, 5);
 		}
 	} else if (cfile->srch_inf.info_level == SMB_FIND_FILE_DIRECTORY_INFO) {
-		FILE_DIRECTORY_INFO * pFindData =
+		FILE_DIRECTORY_INFO *pFindData =
 			(FILE_DIRECTORY_INFO *)current_entry;
 		filename = &pFindData->FileName[0];
 		len = le32_to_cpu(pFindData->FileNameLength);
 	} else if (cfile->srch_inf.info_level ==
 			SMB_FIND_FILE_FULL_DIRECTORY_INFO) {
-		FILE_FULL_DIRECTORY_INFO * pFindData =
+		FILE_FULL_DIRECTORY_INFO *pFindData =
 			(FILE_FULL_DIRECTORY_INFO *)current_entry;
 		filename = &pFindData->FileName[0];
 		len = le32_to_cpu(pFindData->FileNameLength);
 	} else if (cfile->srch_inf.info_level ==
 			SMB_FIND_FILE_ID_FULL_DIR_INFO) {
-		SEARCH_ID_FULL_DIR_INFO * pFindData =
+		SEARCH_ID_FULL_DIR_INFO *pFindData =
 			(SEARCH_ID_FULL_DIR_INFO *)current_entry;
 		filename = &pFindData->FileName[0];
 		len = le32_to_cpu(pFindData->FileNameLength);
 	} else if (cfile->srch_inf.info_level ==
 			SMB_FIND_FILE_BOTH_DIRECTORY_INFO) {
-		FILE_BOTH_DIRECTORY_INFO * pFindData =
+		FILE_BOTH_DIRECTORY_INFO *pFindData =
 			(FILE_BOTH_DIRECTORY_INFO *)current_entry;
 		filename = &pFindData->FileName[0];
 		len = le32_to_cpu(pFindData->FileNameLength);
 	} else if (cfile->srch_inf.info_level == SMB_FIND_FILE_INFO_STANDARD) {
-		FIND_FILE_STANDARD_INFO * pFindData =
+		FIND_FILE_STANDARD_INFO *pFindData =
 			(FIND_FILE_STANDARD_INFO *)current_entry;
 		filename = &pFindData->FileName[0];
 		len = pFindData->FileNameLength;
@@ -666,9 +664,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
 	. and .. for the root of a drive and for those we need
 	to start two entries earlier */
 
-#ifdef CONFIG_CIFS_DEBUG2
 	dump_cifs_file_struct(file, "In fce ");
-#endif
 	if (((index_to_find < cifsFile->srch_inf.index_of_last_entry) &&
 	     is_dir_changed(file)) ||
 	   (index_to_find < first_entry_in_buffer)) {
@@ -718,7 +714,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
 		pos_in_buf = index_to_find - first_entry_in_buffer;
 		cFYI(1, ("found entry - pos_in_buf %d", pos_in_buf));
 
-		for (i=0; (i < (pos_in_buf)) && (current_entry != NULL); i++) {
+		for (i = 0; (i < (pos_in_buf)) && (current_entry != NULL); i++) {
 			/* go entry by entry figuring out which is first */
 			current_entry = nxt_dir_entry(current_entry, end_of_smb,
 						cifsFile->srch_inf.info_level);
@@ -793,7 +789,7 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst,
 		filename = &pFindData->FileName[0];
 		len = le32_to_cpu(pFindData->FileNameLength);
 	} else if (level == SMB_FIND_FILE_INFO_STANDARD) {
-		FIND_FILE_STANDARD_INFO * pFindData =
+		FIND_FILE_STANDARD_INFO *pFindData =
 			(FIND_FILE_STANDARD_INFO *)current_entry;
 		filename = &pFindData->FileName[0];
 		/* one byte length, no name conversion */
@@ -928,7 +924,7 @@ static int cifs_save_resume_key(const char *current_entry,
 	level = cifsFile->srch_inf.info_level;
 
 	if (level == SMB_FIND_FILE_UNIX) {
-		FILE_UNIX_INFO * pFindData = (FILE_UNIX_INFO *)current_entry;
+		FILE_UNIX_INFO *pFindData = (FILE_UNIX_INFO *)current_entry;
 
 		filename = &pFindData->FileName[0];
 		if (cifsFile->srch_inf.unicode) {
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index d2153abcba6d..ed150efbe27c 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -417,10 +417,6 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
 
 		calc_lanman_hash(ses, lnm_session_key);
 		ses->flags |= CIFS_SES_LANMAN;
-/* #ifdef CONFIG_CIFS_DEBUG2
-		cifs_dump_mem("cryptkey: ",ses->server->cryptKey,
-			CIFS_SESS_KEY_SIZE);
-#endif */
 		memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_SESS_KEY_SIZE);
 		bcc_ptr += CIFS_SESS_KEY_SIZE;
 
diff --git a/fs/cifs/smbdes.c b/fs/cifs/smbdes.c
index cfa6d21fb4e8..04943c976f98 100644
--- a/fs/cifs/smbdes.c
+++ b/fs/cifs/smbdes.c
@@ -114,42 +114,42 @@ static uchar sbox[8][4][16] = {
 	{{14, 4, 13, 1, 2, 15, 11, 8, 3, 10, 6, 12, 5, 9, 0, 7},
 	 {0, 15, 7, 4, 14, 2, 13, 1, 10, 6, 12, 11, 9, 5, 3, 8},
 	 {4, 1, 14, 8, 13, 6, 2, 11, 15, 12, 9, 7, 3, 10, 5, 0},
-	 {15, 12, 8, 2, 4, 9, 1, 7, 5, 11, 3, 14, 10, 0, 6, 13}},
+	 {15, 12, 8, 2, 4, 9, 1, 7, 5, 11, 3, 14, 10, 0, 6, 13} },
 
 	{{15, 1, 8, 14, 6, 11, 3, 4, 9, 7, 2, 13, 12, 0, 5, 10},
 	 {3, 13, 4, 7, 15, 2, 8, 14, 12, 0, 1, 10, 6, 9, 11, 5},
 	 {0, 14, 7, 11, 10, 4, 13, 1, 5, 8, 12, 6, 9, 3, 2, 15},
-	 {13, 8, 10, 1, 3, 15, 4, 2, 11, 6, 7, 12, 0, 5, 14, 9}},
+	 {13, 8, 10, 1, 3, 15, 4, 2, 11, 6, 7, 12, 0, 5, 14, 9} },
 
 	{{10, 0, 9, 14, 6, 3, 15, 5, 1, 13, 12, 7, 11, 4, 2, 8},
 	 {13, 7, 0, 9, 3, 4, 6, 10, 2, 8, 5, 14, 12, 11, 15, 1},
 	 {13, 6, 4, 9, 8, 15, 3, 0, 11, 1, 2, 12, 5, 10, 14, 7},
-	 {1, 10, 13, 0, 6, 9, 8, 7, 4, 15, 14, 3, 11, 5, 2, 12}},
+	 {1, 10, 13, 0, 6, 9, 8, 7, 4, 15, 14, 3, 11, 5, 2, 12} },
 
 	{{7, 13, 14, 3, 0, 6, 9, 10, 1, 2, 8, 5, 11, 12, 4, 15},
 	 {13, 8, 11, 5, 6, 15, 0, 3, 4, 7, 2, 12, 1, 10, 14, 9},
 	 {10, 6, 9, 0, 12, 11, 7, 13, 15, 1, 3, 14, 5, 2, 8, 4},
-	 {3, 15, 0, 6, 10, 1, 13, 8, 9, 4, 5, 11, 12, 7, 2, 14}},
+	 {3, 15, 0, 6, 10, 1, 13, 8, 9, 4, 5, 11, 12, 7, 2, 14} },
 
 	{{2, 12, 4, 1, 7, 10, 11, 6, 8, 5, 3, 15, 13, 0, 14, 9},
 	 {14, 11, 2, 12, 4, 7, 13, 1, 5, 0, 15, 10, 3, 9, 8, 6},
 	 {4, 2, 1, 11, 10, 13, 7, 8, 15, 9, 12, 5, 6, 3, 0, 14},
-	 {11, 8, 12, 7, 1, 14, 2, 13, 6, 15, 0, 9, 10, 4, 5, 3}},
+	 {11, 8, 12, 7, 1, 14, 2, 13, 6, 15, 0, 9, 10, 4, 5, 3} },
 
 	{{12, 1, 10, 15, 9, 2, 6, 8, 0, 13, 3, 4, 14, 7, 5, 11},
 	 {10, 15, 4, 2, 7, 12, 9, 5, 6, 1, 13, 14, 0, 11, 3, 8},
 	 {9, 14, 15, 5, 2, 8, 12, 3, 7, 0, 4, 10, 1, 13, 11, 6},
-	 {4, 3, 2, 12, 9, 5, 15, 10, 11, 14, 1, 7, 6, 0, 8, 13}},
+	 {4, 3, 2, 12, 9, 5, 15, 10, 11, 14, 1, 7, 6, 0, 8, 13} },
 
 	{{4, 11, 2, 14, 15, 0, 8, 13, 3, 12, 9, 7, 5, 10, 6, 1},
 	 {13, 0, 11, 7, 4, 9, 1, 10, 14, 3, 5, 12, 2, 15, 8, 6},
 	 {1, 4, 11, 13, 12, 3, 7, 14, 10, 15, 6, 8, 0, 5, 9, 2},
-	 {6, 11, 13, 8, 1, 4, 10, 7, 9, 5, 0, 15, 14, 2, 3, 12}},
+	 {6, 11, 13, 8, 1, 4, 10, 7, 9, 5, 0, 15, 14, 2, 3, 12} },
 
 	{{13, 2, 8, 4, 6, 15, 11, 1, 10, 9, 3, 14, 5, 0, 12, 7},
 	 {1, 15, 13, 8, 10, 3, 7, 4, 12, 5, 6, 11, 0, 14, 9, 2},
 	 {7, 11, 4, 1, 9, 12, 14, 2, 0, 6, 10, 13, 15, 3, 5, 8},
-	 {2, 1, 14, 7, 4, 10, 8, 13, 15, 12, 9, 0, 3, 5, 6, 11}}
+	 {2, 1, 14, 7, 4, 10, 8, 13, 15, 12, 9, 0, 3, 5, 6, 11} }
 };
 
 static void
@@ -313,9 +313,8 @@ str_to_key(unsigned char *str, unsigned char *key)
 	key[5] = ((str[4] & 0x1F) << 2) | (str[5] >> 6);
 	key[6] = ((str[5] & 0x3F) << 1) | (str[6] >> 7);
 	key[7] = str[6] & 0x7F;
-	for (i = 0; i < 8; i++) {
+	for (i = 0; i < 8; i++)
 		key[i] = (key[i] << 1);
-	}
 }
 
 static void
@@ -344,9 +343,8 @@ smbhash(unsigned char *out, unsigned char *in, unsigned char *key, int forw)
 
 	dohash(outb, inb, keyb, forw);
 
-	for (i = 0; i < 8; i++) {
+	for (i = 0; i < 8; i++)
 		out[i] = 0;
-	}
 
 	for (i = 0; i < 64; i++) {
 		if (outb[i])
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 50b623ad9320..3612d6c0a0bb 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -1,7 +1,7 @@
 /*
  *   fs/cifs/transport.c
  *
- *   Copyright (C) International Business Machines  Corp., 2002,2007
+ *   Copyright (C) International Business Machines  Corp., 2002,2008
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *   Jeremy Allison (jra@samba.org) 2006.
  *
@@ -358,9 +358,9 @@ static int allocate_mid(struct cifsSesInfo *ses, struct smb_hdr *in_buf,
 	} else if (ses->status != CifsGood) {
 		/* check if SMB session is bad because we are setting it up */
 		if ((in_buf->Command != SMB_COM_SESSION_SETUP_ANDX) &&
-			(in_buf->Command != SMB_COM_NEGOTIATE)) {
+			(in_buf->Command != SMB_COM_NEGOTIATE))
 			return -EAGAIN;
-		} /* else ok - we are setting up session */
+		/* else ok - we are setting up session */
 	}
 	*ppmidQ = AllocMidQEntry(in_buf, ses);
 	if (*ppmidQ == NULL)
@@ -437,9 +437,8 @@ SendReceiveNoRsp(const unsigned int xid, struct cifsSesInfo *ses,
 	iov[0].iov_len = in_buf->smb_buf_length + 4;
 	flags |= CIFS_NO_RESP;
 	rc = SendReceive2(xid, ses, iov, 1, &resp_buf_type, flags);
-#ifdef CONFIG_CIFS_DEBUG2
-	cFYI(1, ("SendRcvNoR flags %d rc %d", flags, rc));
-#endif
+	cFYI(DBG2, ("SendRcvNoRsp flags %d rc %d", flags, rc));
+
 	return rc;
 }
 
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 54e8ef96cb79..8cd6a445b017 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -139,9 +139,9 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
 	} else if (strncmp(ea_name, CIFS_XATTR_USER_PREFIX, 5) == 0) {
 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
 			goto set_ea_exit;
-		if (strncmp(ea_name, CIFS_XATTR_DOS_ATTRIB, 14) == 0) {
+		if (strncmp(ea_name, CIFS_XATTR_DOS_ATTRIB, 14) == 0)
 			cFYI(1, ("attempt to set cifs inode metadata"));
-		}
+
 		ea_name += 5; /* skip past user. prefix */
 		rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value,
 			(__u16)value_size, cifs_sb->local_nls,
@@ -262,7 +262,7 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
 				cifs_sb->mnt_cifs_flags &
 					CIFS_MOUNT_MAP_SPECIAL_CHR);
 #ifdef CONFIG_CIFS_EXPERIMENTAL
-		else if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
+		else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
 			__u16 fid;
 			int oplock = FALSE;
 			struct cifs_ntsd *pacl = NULL;
@@ -303,11 +303,10 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
 	} else if (strncmp(ea_name,
 		  CIFS_XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) == 0) {
 		cFYI(1, ("Security xattr namespace not supported yet"));
-	} else {
+	} else
 		cFYI(1,
 		    ("illegal xattr request %s (only user namespace supported)",
 			ea_name));
-	}
 
 	/* We could add an additional check for streams ie
 	    if proc/fs/cifs/streamstoxattr is set then
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 350680fd7da7..0c3b618c15b3 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -23,7 +23,6 @@
 #include <linux/buffer_head.h>
 #include <linux/vfs.h>
 #include <linux/mutex.h>
-#include <asm/semaphore.h>
 
 #include <asm/uaccess.h>
 
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index d26e2826ba5b..e9602d85c11d 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -29,10 +29,6 @@
 
 #define DEBUGFS_MAGIC	0x64626720
 
-/* declared over in file.c */
-extern struct file_operations debugfs_file_operations;
-extern struct inode_operations debugfs_link_operations;
-
 static struct vfsmount *debugfs_mount;
 static int debugfs_mount_count;
 
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index c3ad1dff3b25..7ceaea3d983b 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -114,7 +114,7 @@ struct cluster_attribute {
 };
 
 static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field,
-			   unsigned int *info_field, int check_zero,
+			   int *info_field, int check_zero,
 			   const char *buf, size_t len)
 {
 	unsigned int x;
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index d30ea8b433a2..7a8824f475f2 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -37,7 +37,6 @@
 #include <linux/jhash.h>
 #include <linux/miscdevice.h>
 #include <linux/mutex.h>
-#include <asm/semaphore.h>
 #include <asm/uaccess.h>
 
 #include <linux/dlm.h>
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 8f250ac8b928..2d3d1027ce2b 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -165,7 +165,7 @@ void dlm_print_lkb(struct dlm_lkb *lkb)
 	       lkb->lkb_grmode, lkb->lkb_wait_type, lkb->lkb_ast_type);
 }
 
-void dlm_print_rsb(struct dlm_rsb *r)
+static void dlm_print_rsb(struct dlm_rsb *r)
 {
 	printk(KERN_ERR "rsb: nodeid %d flags %lx first %x rlc %d name %s\n",
 	       r->res_nodeid, r->res_flags, r->res_first_lkid,
@@ -1956,8 +1956,7 @@ static void confirm_master(struct dlm_rsb *r, int error)
 			list_del_init(&lkb->lkb_rsb_lookup);
 			r->res_first_lkid = lkb->lkb_id;
 			_request_lock(r, lkb);
-		} else
-			r->res_nodeid = -1;
+		}
 		break;
 
 	default:
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h
index 05d9c82e646b..88e93c80cc22 100644
--- a/fs/dlm/lock.h
+++ b/fs/dlm/lock.h
@@ -13,7 +13,6 @@
 #ifndef __LOCK_DOT_H__
 #define __LOCK_DOT_H__
 
-void dlm_print_rsb(struct dlm_rsb *r);
 void dlm_dump_rsb(struct dlm_rsb *r);
 void dlm_print_lkb(struct dlm_lkb *lkb);
 void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms);
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index dc74b186145d..6df1debdccce 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -263,52 +263,102 @@ out:
 	return 0;
 }
 
-/* This function must zero any hole we create */
+/**
+ * ecryptfs_prepare_write
+ * @file: The eCryptfs file
+ * @page: The eCryptfs page
+ * @from: The start byte from which we will write
+ * @to: The end byte to which we will write
+ *
+ * This function must zero any hole we create
+ *
+ * Returns zero on success; non-zero otherwise
+ */
 static int ecryptfs_prepare_write(struct file *file, struct page *page,
 				  unsigned from, unsigned to)
 {
-	int rc = 0;
 	loff_t prev_page_end_size;
+	int rc = 0;
 
 	if (!PageUptodate(page)) {
-		rc = ecryptfs_read_lower_page_segment(page, page->index, 0,
-						      PAGE_CACHE_SIZE,
-						      page->mapping->host);
-		if (rc) {
-			printk(KERN_ERR "%s: Error attemping to read lower "
-			       "page segment; rc = [%d]\n", __FUNCTION__, rc);
-			ClearPageUptodate(page);
-			goto out;
-		} else
+		struct ecryptfs_crypt_stat *crypt_stat =
+			&ecryptfs_inode_to_private(
+				file->f_path.dentry->d_inode)->crypt_stat;
+
+		if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)
+		    || (crypt_stat->flags & ECRYPTFS_NEW_FILE)) {
+			rc = ecryptfs_read_lower_page_segment(
+				page, page->index, 0, PAGE_CACHE_SIZE,
+				page->mapping->host);
+			if (rc) {
+				printk(KERN_ERR "%s: Error attemping to read "
+				       "lower page segment; rc = [%d]\n",
+				       __FUNCTION__, rc);
+				ClearPageUptodate(page);
+				goto out;
+			} else
+				SetPageUptodate(page);
+		} else if (crypt_stat->flags & ECRYPTFS_VIEW_AS_ENCRYPTED) {
+			if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) {
+				rc = ecryptfs_copy_up_encrypted_with_header(
+					page, crypt_stat);
+				if (rc) {
+					printk(KERN_ERR "%s: Error attempting "
+					       "to copy the encrypted content "
+					       "from the lower file whilst "
+					       "inserting the metadata from "
+					       "the xattr into the header; rc "
+					       "= [%d]\n", __FUNCTION__, rc);
+					ClearPageUptodate(page);
+					goto out;
+				}
+				SetPageUptodate(page);
+			} else {
+				rc = ecryptfs_read_lower_page_segment(
+					page, page->index, 0, PAGE_CACHE_SIZE,
+					page->mapping->host);
+				if (rc) {
+					printk(KERN_ERR "%s: Error reading "
+					       "page; rc = [%d]\n",
+					       __FUNCTION__, rc);
+					ClearPageUptodate(page);
+					goto out;
+				}
+				SetPageUptodate(page);
+			}
+		} else {
+			rc = ecryptfs_decrypt_page(page);
+			if (rc) {
+				printk(KERN_ERR "%s: Error decrypting page "
+				       "at index [%ld]; rc = [%d]\n",
+				       __FUNCTION__, page->index, rc);
+				ClearPageUptodate(page);
+				goto out;
+			}
 			SetPageUptodate(page);
+		}
 	}
-
 	prev_page_end_size = ((loff_t)page->index << PAGE_CACHE_SHIFT);
-
-	/*
-	 * If creating a page or more of holes, zero them out via truncate.
-	 * Note, this will increase i_size.
-	 */
+	/* If creating a page or more of holes, zero them out via truncate.
+	 * Note, this will increase i_size. */
 	if (page->index != 0) {
 		if (prev_page_end_size > i_size_read(page->mapping->host)) {
 			rc = ecryptfs_truncate(file->f_path.dentry,
 					       prev_page_end_size);
 			if (rc) {
-				printk(KERN_ERR "Error on attempt to "
+				printk(KERN_ERR "%s: Error on attempt to "
 				       "truncate to (higher) offset [%lld];"
-				       " rc = [%d]\n", prev_page_end_size, rc);
+				       " rc = [%d]\n", __FUNCTION__,
+				       prev_page_end_size, rc);
 				goto out;
 			}
 		}
 	}
-	/*
-	 * Writing to a new page, and creating a small hole from start of page?
-	 * Zero it out.
-	 */
-	if ((i_size_read(page->mapping->host) == prev_page_end_size) &&
-	    (from != 0)) {
+	/* Writing to a new page, and creating a small hole from start
+	 * of page?  Zero it out. */
+	if ((i_size_read(page->mapping->host) == prev_page_end_size)
+	    && (from != 0))
 		zero_user(page, 0, PAGE_CACHE_SIZE);
-	}
 out:
 	return rc;
 }
diff --git a/fs/exec.c b/fs/exec.c
index a44b142fb460..54a0a557b678 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -173,8 +173,15 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 		return NULL;
 
 	if (write) {
-		struct rlimit *rlim = current->signal->rlim;
 		unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
+		struct rlimit *rlim;
+
+		/*
+		 * We've historically supported up to 32 pages (ARG_MAX)
+		 * of argument strings even with small stacks
+		 */
+		if (size <= ARG_MAX)
+			return page;
 
 		/*
 		 * Limit to 1/4-th the stack size for the argv+env strings.
@@ -183,6 +190,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 		 *  - the program will have a reasonable amount of stack left
 		 *    to work from.
 		 */
+		rlim = current->signal->rlim;
 		if (size > rlim[RLIMIT_STACK].rlim_cur / 4) {
 			put_page(page);
 			return NULL;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 18769cc32377..ad5360664082 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -806,8 +806,8 @@ static match_table_t tokens = {
 	{Opt_quota, "quota"},
 	{Opt_usrquota, "usrquota"},
 	{Opt_barrier, "barrier=%u"},
-	{Opt_err, NULL},
 	{Opt_resize, "resize"},
+	{Opt_err, NULL},
 };
 
 static ext3_fsblk_t get_sb_block(void **data)
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 0737e05ba3dd..2f2820f88e41 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -48,7 +48,6 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
 unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
 		 ext4_group_t block_group, struct ext4_group_desc *gdp)
 {
-	unsigned long start;
 	int bit, bit_max;
 	unsigned free_blocks, group_blocks;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -106,11 +105,12 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
 	free_blocks = group_blocks - bit_max;
 
 	if (bh) {
+		ext4_fsblk_t start;
+
 		for (bit = 0; bit < bit_max; bit++)
 			ext4_set_bit(bit, bh->b_data);
 
-		start = block_group * EXT4_BLOCKS_PER_GROUP(sb) +
-			le32_to_cpu(sbi->s_es->s_first_data_block);
+		start = ext4_group_first_block_no(sb, block_group);
 
 		/* Set bits for block and inode bitmaps, and inode table */
 		ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data);
@@ -127,6 +127,8 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
 		mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data);
 	}
 
+	if (sbi->s_log_groups_per_flex)
+		return free_blocks;
 	return free_blocks - sbi->s_itb_per_group - 2;
 }
 
@@ -752,13 +754,18 @@ do_more:
 	jbd_unlock_bh_state(bitmap_bh);
 
 	spin_lock(sb_bgl_lock(sbi, block_group));
-	desc->bg_free_blocks_count =
-		cpu_to_le16(le16_to_cpu(desc->bg_free_blocks_count) +
-			group_freed);
+	le16_add_cpu(&desc->bg_free_blocks_count, group_freed);
 	desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
 	spin_unlock(sb_bgl_lock(sbi, block_group));
 	percpu_counter_add(&sbi->s_freeblocks_counter, count);
 
+	if (sbi->s_log_groups_per_flex) {
+		ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
+		spin_lock(sb_bgl_lock(sbi, flex_group));
+		sbi->s_flex_groups[flex_group].free_blocks += count;
+		spin_unlock(sb_bgl_lock(sbi, flex_group));
+	}
+
 	/* We dirtied the bitmap block */
 	BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
 	err = ext4_journal_dirty_metadata(handle, bitmap_bh);
@@ -1823,12 +1830,18 @@ allocated:
 	spin_lock(sb_bgl_lock(sbi, group_no));
 	if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
 		gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
-	gdp->bg_free_blocks_count =
-			cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)-num);
+	le16_add_cpu(&gdp->bg_free_blocks_count, -num);
 	gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
 	spin_unlock(sb_bgl_lock(sbi, group_no));
 	percpu_counter_sub(&sbi->s_freeblocks_counter, num);
 
+	if (sbi->s_log_groups_per_flex) {
+		ext4_group_t flex_group = ext4_flex_group(sbi, group_no);
+		spin_lock(sb_bgl_lock(sbi, flex_group));
+		sbi->s_flex_groups[flex_group].free_blocks -= num;
+		spin_unlock(sb_bgl_lock(sbi, flex_group));
+	}
+
 	BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
 	err = ext4_journal_dirty_metadata(handle, gdp_bh);
 	if (!fatal)
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 33888bb58144..88c97f7312be 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -42,11 +42,11 @@ const struct file_operations ext4_dir_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_read_dir,
 	.readdir	= ext4_readdir,		/* we take BKL. needed?*/
-	.ioctl		= ext4_ioctl,		/* BKL held */
+	.unlocked_ioctl = ext4_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= ext4_compat_ioctl,
 #endif
-	.fsync		= ext4_sync_file,	/* BKL held */
+	.fsync		= ext4_sync_file,
 	.release	= ext4_release_dir,
 };
 
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index bc7081f1fbe8..7782121ccefb 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -148,6 +148,7 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
 {
 	struct ext4_inode_info *ei = EXT4_I(inode);
 	ext4_fsblk_t bg_start;
+	ext4_fsblk_t last_block;
 	ext4_grpblk_t colour;
 	int depth;
 
@@ -169,8 +170,13 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
 	/* OK. use inode's group */
 	bg_start = (ei->i_block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) +
 		le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_first_data_block);
-	colour = (current->pid % 16) *
+	last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
+
+	if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
+		colour = (current->pid % 16) *
 			(EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
+	else
+		colour = (current->pid % 16) * ((last_block - bg_start) / 16);
 	return bg_start + colour + block;
 }
 
@@ -349,7 +355,7 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
 #define ext4_ext_show_leaf(inode,path)
 #endif
 
-static void ext4_ext_drop_refs(struct ext4_ext_path *path)
+void ext4_ext_drop_refs(struct ext4_ext_path *path)
 {
 	int depth = path->p_depth;
 	int i;
@@ -608,7 +614,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
 
 	ix->ei_block = cpu_to_le32(logical);
 	ext4_idx_store_pblock(ix, ptr);
-	curp->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(curp->p_hdr->eh_entries)+1);
+	le16_add_cpu(&curp->p_hdr->eh_entries, 1);
 
 	BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries)
 			     > le16_to_cpu(curp->p_hdr->eh_max));
@@ -730,7 +736,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 	}
 	if (m) {
 		memmove(ex, path[depth].p_ext-m, sizeof(struct ext4_extent)*m);
-		neh->eh_entries = cpu_to_le16(le16_to_cpu(neh->eh_entries)+m);
+		le16_add_cpu(&neh->eh_entries, m);
 	}
 
 	set_buffer_uptodate(bh);
@@ -747,8 +753,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 		err = ext4_ext_get_access(handle, inode, path + depth);
 		if (err)
 			goto cleanup;
-		path[depth].p_hdr->eh_entries =
-		     cpu_to_le16(le16_to_cpu(path[depth].p_hdr->eh_entries)-m);
+		le16_add_cpu(&path[depth].p_hdr->eh_entries, -m);
 		err = ext4_ext_dirty(handle, inode, path + depth);
 		if (err)
 			goto cleanup;
@@ -811,8 +816,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 		if (m) {
 			memmove(++fidx, path[i].p_idx - m,
 				sizeof(struct ext4_extent_idx) * m);
-			neh->eh_entries =
-				cpu_to_le16(le16_to_cpu(neh->eh_entries) + m);
+			le16_add_cpu(&neh->eh_entries, m);
 		}
 		set_buffer_uptodate(bh);
 		unlock_buffer(bh);
@@ -828,7 +832,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 			err = ext4_ext_get_access(handle, inode, path + i);
 			if (err)
 				goto cleanup;
-			path[i].p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path[i].p_hdr->eh_entries)-m);
+			le16_add_cpu(&path[i].p_hdr->eh_entries, -m);
 			err = ext4_ext_dirty(handle, inode, path + i);
 			if (err)
 				goto cleanup;
@@ -1363,7 +1367,7 @@ int ext4_ext_try_to_merge(struct inode *inode,
 				* sizeof(struct ext4_extent);
 			memmove(ex + 1, ex + 2, len);
 		}
-		eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries) - 1);
+		le16_add_cpu(&eh->eh_entries, -1);
 		merge_done = 1;
 		WARN_ON(eh->eh_entries == 0);
 		if (!eh->eh_entries)
@@ -1554,7 +1558,7 @@ has_space:
 		path[depth].p_ext = nearex;
 	}
 
-	eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)+1);
+	le16_add_cpu(&eh->eh_entries, 1);
 	nearex = path[depth].p_ext;
 	nearex->ee_block = newext->ee_block;
 	ext4_ext_store_pblock(nearex, ext_pblock(newext));
@@ -1693,7 +1697,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
 	err = ext4_ext_get_access(handle, inode, path);
 	if (err)
 		return err;
-	path->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path->p_hdr->eh_entries)-1);
+	le16_add_cpu(&path->p_hdr->eh_entries, -1);
 	err = ext4_ext_dirty(handle, inode, path);
 	if (err)
 		return err;
@@ -1896,7 +1900,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 		if (num == 0) {
 			/* this extent is removed; mark slot entirely unused */
 			ext4_ext_store_pblock(ex, 0);
-			eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1);
+			le16_add_cpu(&eh->eh_entries, -1);
 		}
 
 		ex->ee_block = cpu_to_le32(block);
@@ -2132,6 +2136,82 @@ void ext4_ext_release(struct super_block *sb)
 #endif
 }
 
+static void bi_complete(struct bio *bio, int error)
+{
+	complete((struct completion *)bio->bi_private);
+}
+
+/* FIXME!! we need to try to merge to left or right after zerout  */
+static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
+{
+	int ret = -EIO;
+	struct bio *bio;
+	int blkbits, blocksize;
+	sector_t ee_pblock;
+	struct completion event;
+	unsigned int ee_len, len, done, offset;
+
+
+	blkbits   = inode->i_blkbits;
+	blocksize = inode->i_sb->s_blocksize;
+	ee_len    = ext4_ext_get_actual_len(ex);
+	ee_pblock = ext_pblock(ex);
+
+	/* convert ee_pblock in 512 byte sector */
+	ee_pblock = ee_pblock << (blkbits >> 9);
+
+
+	while (ee_len > 0) {
+
+		if (ee_len > BIO_MAX_PAGES)
+			len = BIO_MAX_PAGES;
+		else
+			len = ee_len;
+
+		bio = bio_alloc(GFP_NOIO, len);
+		if (!bio)
+			return -ENOMEM;
+		bio->bi_sector = ee_pblock;
+		bio->bi_bdev   = inode->i_sb->s_bdev;
+
+		done = 0;
+		offset = 0;
+		while (done < len) {
+			ret = bio_add_page(bio, ZERO_PAGE(0),
+							blocksize, offset);
+			if (ret != blocksize) {
+				/* We can't add any more page because of
+				 * hardware limitation. Start a new bio
+				 */
+				break;
+			}
+			done++;
+			offset += blocksize;
+			if (offset >= PAGE_CACHE_SIZE)
+				offset = 0;
+		}
+
+		init_completion(&event);
+		bio->bi_private = &event;
+		bio->bi_end_io = bi_complete;
+		submit_bio(WRITE, bio);
+		wait_for_completion(&event);
+
+		if (test_bit(BIO_UPTODATE, &bio->bi_flags))
+			ret = 0;
+		else {
+			ret = -EIO;
+			break;
+		}
+		bio_put(bio);
+		ee_len    -= done;
+		ee_pblock += done  << (blkbits - 9);
+	}
+	return ret;
+}
+
+#define EXT4_EXT_ZERO_LEN 7
+
 /*
  * This function is called by ext4_ext_get_blocks() if someone tries to write
  * to an uninitialized extent. It may result in splitting the uninitialized
@@ -2148,7 +2228,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 						ext4_lblk_t iblock,
 						unsigned long max_blocks)
 {
-	struct ext4_extent *ex, newex;
+	struct ext4_extent *ex, newex, orig_ex;
 	struct ext4_extent *ex1 = NULL;
 	struct ext4_extent *ex2 = NULL;
 	struct ext4_extent *ex3 = NULL;
@@ -2167,6 +2247,25 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 	allocated = ee_len - (iblock - ee_block);
 	newblock = iblock - ee_block + ext_pblock(ex);
 	ex2 = ex;
+	orig_ex.ee_block = ex->ee_block;
+	orig_ex.ee_len   = cpu_to_le16(ee_len);
+	ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
+
+	err = ext4_ext_get_access(handle, inode, path + depth);
+	if (err)
+		goto out;
+	/* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
+	if (ee_len <= 2*EXT4_EXT_ZERO_LEN) {
+		err =  ext4_ext_zeroout(inode, &orig_ex);
+		if (err)
+			goto fix_extent_len;
+		/* update the extent length and mark as initialized */
+		ex->ee_block = orig_ex.ee_block;
+		ex->ee_len   = orig_ex.ee_len;
+		ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+		ext4_ext_dirty(handle, inode, path + depth);
+		return le16_to_cpu(ex->ee_len);
+	}
 
 	/* ex1: ee_block to iblock - 1 : uninitialized */
 	if (iblock > ee_block) {
@@ -2185,33 +2284,103 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 	/* ex3: to ee_block + ee_len : uninitialised */
 	if (allocated > max_blocks) {
 		unsigned int newdepth;
+		/* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
+		if (allocated <= EXT4_EXT_ZERO_LEN) {
+			/* Mark first half uninitialized.
+			 * Mark second half initialized and zero out the
+			 * initialized extent
+			 */
+			ex->ee_block = orig_ex.ee_block;
+			ex->ee_len   = cpu_to_le16(ee_len - allocated);
+			ext4_ext_mark_uninitialized(ex);
+			ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+			ext4_ext_dirty(handle, inode, path + depth);
+
+			ex3 = &newex;
+			ex3->ee_block = cpu_to_le32(iblock);
+			ext4_ext_store_pblock(ex3, newblock);
+			ex3->ee_len = cpu_to_le16(allocated);
+			err = ext4_ext_insert_extent(handle, inode, path, ex3);
+			if (err == -ENOSPC) {
+				err =  ext4_ext_zeroout(inode, &orig_ex);
+				if (err)
+					goto fix_extent_len;
+				ex->ee_block = orig_ex.ee_block;
+				ex->ee_len   = orig_ex.ee_len;
+				ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+				ext4_ext_dirty(handle, inode, path + depth);
+				return le16_to_cpu(ex->ee_len);
+
+			} else if (err)
+				goto fix_extent_len;
+
+			return allocated;
+		}
 		ex3 = &newex;
 		ex3->ee_block = cpu_to_le32(iblock + max_blocks);
 		ext4_ext_store_pblock(ex3, newblock + max_blocks);
 		ex3->ee_len = cpu_to_le16(allocated - max_blocks);
 		ext4_ext_mark_uninitialized(ex3);
 		err = ext4_ext_insert_extent(handle, inode, path, ex3);
-		if (err)
-			goto out;
+		if (err == -ENOSPC) {
+			err =  ext4_ext_zeroout(inode, &orig_ex);
+			if (err)
+				goto fix_extent_len;
+			/* update the extent length and mark as initialized */
+			ex->ee_block = orig_ex.ee_block;
+			ex->ee_len   = orig_ex.ee_len;
+			ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+			ext4_ext_dirty(handle, inode, path + depth);
+			return le16_to_cpu(ex->ee_len);
+
+		} else if (err)
+			goto fix_extent_len;
 		/*
 		 * The depth, and hence eh & ex might change
 		 * as part of the insert above.
 		 */
 		newdepth = ext_depth(inode);
+		/*
+		 * update the extent length after successfull insert of the
+		 * split extent
+		 */
+		orig_ex.ee_len = cpu_to_le16(ee_len -
+						ext4_ext_get_actual_len(ex3));
 		if (newdepth != depth) {
 			depth = newdepth;
-			path = ext4_ext_find_extent(inode, iblock, NULL);
+			ext4_ext_drop_refs(path);
+			path = ext4_ext_find_extent(inode, iblock, path);
 			if (IS_ERR(path)) {
 				err = PTR_ERR(path);
-				path = NULL;
 				goto out;
 			}
 			eh = path[depth].p_hdr;
 			ex = path[depth].p_ext;
 			if (ex2 != &newex)
 				ex2 = ex;
+
+			err = ext4_ext_get_access(handle, inode, path + depth);
+			if (err)
+				goto out;
 		}
 		allocated = max_blocks;
+
+		/* If extent has less than EXT4_EXT_ZERO_LEN and we are trying
+		 * to insert a extent in the middle zerout directly
+		 * otherwise give the extent a chance to merge to left
+		 */
+		if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN &&
+							iblock != ee_block) {
+			err =  ext4_ext_zeroout(inode, &orig_ex);
+			if (err)
+				goto fix_extent_len;
+			/* update the extent length and mark as initialized */
+			ex->ee_block = orig_ex.ee_block;
+			ex->ee_len   = orig_ex.ee_len;
+			ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+			ext4_ext_dirty(handle, inode, path + depth);
+			return le16_to_cpu(ex->ee_len);
+		}
 	}
 	/*
 	 * If there was a change of depth as part of the
@@ -2230,9 +2399,6 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 	ex2->ee_len = cpu_to_le16(allocated);
 	if (ex2 != ex)
 		goto insert;
-	err = ext4_ext_get_access(handle, inode, path + depth);
-	if (err)
-		goto out;
 	/*
 	 * New (initialized) extent starts from the first block
 	 * in the current extent. i.e., ex2 == ex
@@ -2271,14 +2437,47 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 	goto out;
 insert:
 	err = ext4_ext_insert_extent(handle, inode, path, &newex);
+	if (err == -ENOSPC) {
+		err =  ext4_ext_zeroout(inode, &orig_ex);
+		if (err)
+			goto fix_extent_len;
+		/* update the extent length and mark as initialized */
+		ex->ee_block = orig_ex.ee_block;
+		ex->ee_len   = orig_ex.ee_len;
+		ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+		ext4_ext_dirty(handle, inode, path + depth);
+		return le16_to_cpu(ex->ee_len);
+	} else if (err)
+		goto fix_extent_len;
 out:
 	return err ? err : allocated;
+
+fix_extent_len:
+	ex->ee_block = orig_ex.ee_block;
+	ex->ee_len   = orig_ex.ee_len;
+	ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+	ext4_ext_mark_uninitialized(ex);
+	ext4_ext_dirty(handle, inode, path + depth);
+	return err;
 }
 
 /*
+ * Block allocation/map/preallocation routine for extents based files
+ *
+ *
  * Need to be called with
  * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block
  * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
+ *
+ * return > 0, number of of blocks already mapped/allocated
+ *          if create == 0 and these are pre-allocated blocks
+ *          	buffer head is unmapped
+ *          otherwise blocks are mapped
+ *
+ * return = 0, if plain look up failed (blocks have not been allocated)
+ *          buffer head is unmapped
+ *
+ * return < 0, error case.
  */
 int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 			ext4_lblk_t iblock,
@@ -2623,7 +2822,7 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
 	 * modify 1 super block, 1 block bitmap and 1 group descriptor.
 	 */
 	credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3;
-	down_write((&EXT4_I(inode)->i_data_sem));
+	mutex_lock(&inode->i_mutex);
 retry:
 	while (ret >= 0 && ret < max_blocks) {
 		block = block + ret;
@@ -2634,16 +2833,17 @@ retry:
 			break;
 		}
 
-		ret = ext4_ext_get_blocks(handle, inode, block,
+		ret = ext4_get_blocks_wrap(handle, inode, block,
 					  max_blocks, &map_bh,
 					  EXT4_CREATE_UNINITIALIZED_EXT, 0);
-		WARN_ON(ret <= 0);
 		if (ret <= 0) {
-			ext4_error(inode->i_sb, "ext4_fallocate",
-				    "ext4_ext_get_blocks returned error: "
-				    "inode#%lu, block=%u, max_blocks=%lu",
+#ifdef EXT4FS_DEBUG
+			WARN_ON(ret <= 0);
+			printk(KERN_ERR "%s: ext4_ext_get_blocks "
+				    "returned error inode#%lu, block=%u, "
+				    "max_blocks=%lu", __func__,
 				    inode->i_ino, block, max_blocks);
-			ret = -EIO;
+#endif
 			ext4_mark_inode_dirty(handle, inode);
 			ret2 = ext4_journal_stop(handle);
 			break;
@@ -2680,7 +2880,6 @@ retry:
 	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
 		goto retry;
 
-	up_write((&EXT4_I(inode)->i_data_sem));
 	/*
 	 * Time to update the file size.
 	 * Update only when preallocation was requested beyond the file size.
@@ -2692,21 +2891,18 @@ retry:
 			 * if no error, we assume preallocation succeeded
 			 * completely
 			 */
-			mutex_lock(&inode->i_mutex);
 			i_size_write(inode, offset + len);
 			EXT4_I(inode)->i_disksize = i_size_read(inode);
-			mutex_unlock(&inode->i_mutex);
 		} else if (ret < 0 && nblocks) {
 			/* Handle partial allocation scenario */
 			loff_t newsize;
 
-			mutex_lock(&inode->i_mutex);
 			newsize  = (nblocks << blkbits) + i_size_read(inode);
 			i_size_write(inode, EXT4_BLOCK_ALIGN(newsize, blkbits));
 			EXT4_I(inode)->i_disksize = i_size_read(inode);
-			mutex_unlock(&inode->i_mutex);
 		}
 	}
 
+	mutex_unlock(&inode->i_mutex);
 	return ret > 0 ? ret2 : ret;
 }
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index ac35ec58db55..77341c1ce4ea 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -123,17 +123,34 @@ force_commit:
 	return ret;
 }
 
+static struct vm_operations_struct ext4_file_vm_ops = {
+	.fault		= filemap_fault,
+	.page_mkwrite   = ext4_page_mkwrite,
+};
+
+static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct address_space *mapping = file->f_mapping;
+
+	if (!mapping->a_ops->readpage)
+		return -ENOEXEC;
+	file_accessed(file);
+	vma->vm_ops = &ext4_file_vm_ops;
+	vma->vm_flags |= VM_CAN_NONLINEAR;
+	return 0;
+}
+
 const struct file_operations ext4_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
 	.aio_read	= generic_file_aio_read,
 	.aio_write	= ext4_file_write,
-	.ioctl		= ext4_ioctl,
+	.unlocked_ioctl = ext4_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= ext4_compat_ioctl,
 #endif
-	.mmap		= generic_file_mmap,
+	.mmap		= ext4_file_mmap,
 	.open		= generic_file_open,
 	.release	= ext4_release_file,
 	.fsync		= ext4_sync_file,
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 8d50879d1c2c..a04a1ac4e0cf 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -72,6 +72,9 @@ int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync)
 		goto out;
 	}
 
+	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
+		goto out;
+
 	/*
 	 * The VFS has written the file data.  If the inode is unaltered
 	 * then we need not start a commit.
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index da18a74b966a..14621898b1fd 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -158,6 +158,7 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
 	struct ext4_super_block * es;
 	struct ext4_sb_info *sbi;
 	int fatal = 0, err;
+	ext4_group_t flex_group;
 
 	if (atomic_read(&inode->i_count) > 1) {
 		printk ("ext4_free_inode: inode has count=%d\n",
@@ -223,11 +224,9 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
 
 		if (gdp) {
 			spin_lock(sb_bgl_lock(sbi, block_group));
-			gdp->bg_free_inodes_count = cpu_to_le16(
-				le16_to_cpu(gdp->bg_free_inodes_count) + 1);
+			le16_add_cpu(&gdp->bg_free_inodes_count, 1);
 			if (is_directory)
-				gdp->bg_used_dirs_count = cpu_to_le16(
-				  le16_to_cpu(gdp->bg_used_dirs_count) - 1);
+				le16_add_cpu(&gdp->bg_used_dirs_count, -1);
 			gdp->bg_checksum = ext4_group_desc_csum(sbi,
 							block_group, gdp);
 			spin_unlock(sb_bgl_lock(sbi, block_group));
@@ -235,6 +234,12 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
 			if (is_directory)
 				percpu_counter_dec(&sbi->s_dirs_counter);
 
+			if (sbi->s_log_groups_per_flex) {
+				flex_group = ext4_flex_group(sbi, block_group);
+				spin_lock(sb_bgl_lock(sbi, flex_group));
+				sbi->s_flex_groups[flex_group].free_inodes++;
+				spin_unlock(sb_bgl_lock(sbi, flex_group));
+			}
 		}
 		BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata");
 		err = ext4_journal_dirty_metadata(handle, bh2);
@@ -289,6 +294,80 @@ static int find_group_dir(struct super_block *sb, struct inode *parent,
 	return ret;
 }
 
+#define free_block_ratio 10
+
+static int find_group_flex(struct super_block *sb, struct inode *parent,
+			   ext4_group_t *best_group)
+{
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	struct ext4_group_desc *desc;
+	struct buffer_head *bh;
+	struct flex_groups *flex_group = sbi->s_flex_groups;
+	ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
+	ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group);
+	ext4_group_t ngroups = sbi->s_groups_count;
+	int flex_size = ext4_flex_bg_size(sbi);
+	ext4_group_t best_flex = parent_fbg_group;
+	int blocks_per_flex = sbi->s_blocks_per_group * flex_size;
+	int flexbg_free_blocks;
+	int flex_freeb_ratio;
+	ext4_group_t n_fbg_groups;
+	ext4_group_t i;
+
+	n_fbg_groups = (sbi->s_groups_count + flex_size - 1) >>
+		sbi->s_log_groups_per_flex;
+
+find_close_to_parent:
+	flexbg_free_blocks = flex_group[best_flex].free_blocks;
+	flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex;
+	if (flex_group[best_flex].free_inodes &&
+	    flex_freeb_ratio > free_block_ratio)
+		goto found_flexbg;
+
+	if (best_flex && best_flex == parent_fbg_group) {
+		best_flex--;
+		goto find_close_to_parent;
+	}
+
+	for (i = 0; i < n_fbg_groups; i++) {
+		if (i == parent_fbg_group || i == parent_fbg_group - 1)
+			continue;
+
+		flexbg_free_blocks = flex_group[best_flex].free_blocks;
+		flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex;
+
+		if (flex_freeb_ratio > free_block_ratio &&
+		    flex_group[i].free_inodes) {
+			best_flex = i;
+			goto found_flexbg;
+		}
+
+		if (best_flex < 0 ||
+		    (flex_group[i].free_blocks >
+		     flex_group[best_flex].free_blocks &&
+		     flex_group[i].free_inodes))
+			best_flex = i;
+	}
+
+	if (!flex_group[best_flex].free_inodes ||
+	    !flex_group[best_flex].free_blocks)
+		return -1;
+
+found_flexbg:
+	for (i = best_flex * flex_size; i < ngroups &&
+		     i < (best_flex + 1) * flex_size; i++) {
+		desc = ext4_get_group_desc(sb, i, &bh);
+		if (le16_to_cpu(desc->bg_free_inodes_count)) {
+			*best_group = i;
+			goto out;
+		}
+	}
+
+	return -1;
+out:
+	return 0;
+}
+
 /*
  * Orlov's allocator for directories.
  *
@@ -504,6 +583,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
 	struct inode *ret;
 	ext4_group_t i;
 	int free = 0;
+	ext4_group_t flex_group;
 
 	/* Cannot create files in a deleted directory */
 	if (!dir || !dir->i_nlink)
@@ -517,6 +597,12 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
 
 	sbi = EXT4_SB(sb);
 	es = sbi->s_es;
+
+	if (sbi->s_log_groups_per_flex) {
+		ret2 = find_group_flex(sb, dir, &group);
+		goto got_group;
+	}
+
 	if (S_ISDIR(mode)) {
 		if (test_opt (sb, OLDALLOC))
 			ret2 = find_group_dir(sb, dir, &group);
@@ -525,6 +611,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
 	} else
 		ret2 = find_group_other(sb, dir, &group);
 
+got_group:
 	err = -ENOSPC;
 	if (ret2 == -1)
 		goto out;
@@ -664,11 +751,9 @@ got:
 				cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino);
 	}
 
-	gdp->bg_free_inodes_count =
-		cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);
+	le16_add_cpu(&gdp->bg_free_inodes_count, -1);
 	if (S_ISDIR(mode)) {
-		gdp->bg_used_dirs_count =
-			cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
+		le16_add_cpu(&gdp->bg_used_dirs_count, 1);
 	}
 	gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
 	spin_unlock(sb_bgl_lock(sbi, group));
@@ -681,6 +766,13 @@ got:
 		percpu_counter_inc(&sbi->s_dirs_counter);
 	sb->s_dirt = 1;
 
+	if (sbi->s_log_groups_per_flex) {
+		flex_group = ext4_flex_group(sbi, group);
+		spin_lock(sb_bgl_lock(sbi, flex_group));
+		sbi->s_flex_groups[flex_group].free_inodes--;
+		spin_unlock(sb_bgl_lock(sbi, flex_group));
+	}
+
 	inode->i_uid = current->fsuid;
 	if (test_opt (sb, GRPID))
 		inode->i_gid = dir->i_gid;
@@ -702,7 +794,12 @@ got:
 	ei->i_dir_start_lookup = 0;
 	ei->i_disksize = 0;
 
-	ei->i_flags = EXT4_I(dir)->i_flags & ~EXT4_INDEX_FL;
+	/*
+	 * Don't inherit extent flag from directory. We set extent flag on
+	 * newly created directory and file only if -o extent mount option is
+	 * specified
+	 */
+	ei->i_flags = EXT4_I(dir)->i_flags & ~(EXT4_INDEX_FL|EXT4_EXTENTS_FL);
 	if (S_ISLNK(mode))
 		ei->i_flags &= ~(EXT4_IMMUTABLE_FL|EXT4_APPEND_FL);
 	/* dirsync only applies to directories */
@@ -745,12 +842,15 @@ got:
 		goto fail_free_drop;
 	}
 	if (test_opt(sb, EXTENTS)) {
-		EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL;
-		ext4_ext_tree_init(handle, inode);
-		err = ext4_update_incompat_feature(handle, sb,
-						EXT4_FEATURE_INCOMPAT_EXTENTS);
-		if (err)
-			goto fail;
+		/* set extent flag only for diretory, file and normal symlink*/
+		if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) {
+			EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL;
+			ext4_ext_tree_init(handle, inode);
+			err = ext4_update_incompat_feature(handle, sb,
+					EXT4_FEATURE_INCOMPAT_EXTENTS);
+			if (err)
+				goto fail;
+		}
 	}
 
 	ext4_debug("allocating inode %lu\n", inode->i_ino);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7dd9b50d5ebc..42bc6664946c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -403,6 +403,7 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
 	__le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data;
 	__le32 *p;
 	ext4_fsblk_t bg_start;
+	ext4_fsblk_t last_block;
 	ext4_grpblk_t colour;
 
 	/* Try to find previous block */
@@ -420,8 +421,13 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
 	 * into the same cylinder group then.
 	 */
 	bg_start = ext4_group_first_block_no(inode->i_sb, ei->i_block_group);
-	colour = (current->pid % 16) *
+	last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
+
+	if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
+		colour = (current->pid % 16) *
 			(EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
+	else
+		colour = (current->pid % 16) * ((last_block - bg_start) / 16);
 	return bg_start + colour;
 }
 
@@ -768,7 +774,6 @@ err_out:
  *
  * `handle' can be NULL if create == 0.
  *
- * The BKL may not be held on entry here.  Be sure to take it early.
  * return > 0, # of blocks mapped or allocated.
  * return = 0, if plain lookup failed.
  * return < 0, error case.
@@ -903,11 +908,38 @@ out:
  */
 #define DIO_CREDITS 25
 
+
+/*
+ *
+ *
+ * ext4_ext4 get_block() wrapper function
+ * It will do a look up first, and returns if the blocks already mapped.
+ * Otherwise it takes the write lock of the i_data_sem and allocate blocks
+ * and store the allocated blocks in the result buffer head and mark it
+ * mapped.
+ *
+ * If file type is extents based, it will call ext4_ext_get_blocks(),
+ * Otherwise, call with ext4_get_blocks_handle() to handle indirect mapping
+ * based files
+ *
+ * On success, it returns the number of blocks being mapped or allocate.
+ * if create==0 and the blocks are pre-allocated and uninitialized block,
+ * the result buffer head is unmapped. If the create ==1, it will make sure
+ * the buffer head is mapped.
+ *
+ * It returns 0 if plain look up failed (blocks have not been allocated), in
+ * that casem, buffer head is unmapped
+ *
+ * It returns the error in case of allocation failure.
+ */
 int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
 			unsigned long max_blocks, struct buffer_head *bh,
 			int create, int extend_disksize)
 {
 	int retval;
+
+	clear_buffer_mapped(bh);
+
 	/*
 	 * Try to see if we can get  the block without requesting
 	 * for new file system block.
@@ -921,12 +953,26 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
 				inode, block, max_blocks, bh, 0, 0);
 	}
 	up_read((&EXT4_I(inode)->i_data_sem));
-	if (!create || (retval > 0))
+
+	/* If it is only a block(s) look up */
+	if (!create)
+		return retval;
+
+	/*
+	 * Returns if the blocks have already allocated
+	 *
+	 * Note that if blocks have been preallocated
+	 * ext4_ext_get_block() returns th create = 0
+	 * with buffer head unmapped.
+	 */
+	if (retval > 0 && buffer_mapped(bh))
 		return retval;
 
 	/*
-	 * We need to allocate new blocks which will result
-	 * in i_data update
+	 * New blocks allocate and/or writing to uninitialized extent
+	 * will possibly result in updating i_data, so we take
+	 * the write lock of i_data_sem, and call get_blocks()
+	 * with create == 1 flag.
 	 */
 	down_write((&EXT4_I(inode)->i_data_sem));
 	/*
@@ -2455,12 +2501,10 @@ out_stop:
 static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
 		unsigned long ino, struct ext4_iloc *iloc)
 {
-	unsigned long desc, group_desc;
 	ext4_group_t block_group;
 	unsigned long offset;
 	ext4_fsblk_t block;
-	struct buffer_head *bh;
-	struct ext4_group_desc * gdp;
+	struct ext4_group_desc *gdp;
 
 	if (!ext4_valid_inum(sb, ino)) {
 		/*
@@ -2472,22 +2516,10 @@ static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
 	}
 
 	block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
-	if (block_group >= EXT4_SB(sb)->s_groups_count) {
-		ext4_error(sb,"ext4_get_inode_block","group >= groups count");
-		return 0;
-	}
-	smp_rmb();
-	group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
-	desc = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
-	bh = EXT4_SB(sb)->s_group_desc[group_desc];
-	if (!bh) {
-		ext4_error (sb, "ext4_get_inode_block",
-			    "Descriptor not loaded");
+	gdp = ext4_get_group_desc(sb, block_group, NULL);
+	if (!gdp)
 		return 0;
-	}
 
-	gdp = (struct ext4_group_desc *)((__u8 *)bh->b_data +
-		desc * EXT4_DESC_SIZE(sb));
 	/*
 	 * Figure out the offset within the block group inode table
 	 */
@@ -3461,3 +3493,18 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
 
 	return err;
 }
+
+int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+{
+	/*
+	 * if ext4_get_block resulted in a split of an uninitialized extent,
+	 * in file system full case, we will have to take the journal write
+	 * access and zero out the page. The journal handle get initialized
+	 * in ext4_get_block.
+	 */
+	/* FIXME!! should we take inode->i_mutex ? Currently we can't because
+	 * it has a circular locking dependency with DIO. But migrate expect
+	 * i_mutex to ensure no i_data changes
+	 */
+	return block_page_mkwrite(vma, page, ext4_get_block);
+}
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 2ed7c37f897e..53a010bb0483 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -17,9 +17,9 @@
 #include <linux/smp_lock.h>
 #include <asm/uaccess.h>
 
-int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
-		unsigned long arg)
+long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
+	struct inode *inode = filp->f_dentry->d_inode;
 	struct ext4_inode_info *ei = EXT4_I(inode);
 	unsigned int flags;
 	unsigned short rsv_window_size;
@@ -265,9 +265,6 @@ flags_err:
 #ifdef CONFIG_COMPAT
 long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
-	int ret;
-
 	/* These are just misnamed, they actually get/put from/to user an int */
 	switch (cmd) {
 	case EXT4_IOC32_GETFLAGS:
@@ -307,9 +304,6 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	default:
 		return -ENOIOCTLCMD;
 	}
-	lock_kernel();
-	ret = ext4_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
-	unlock_kernel();
-	return ret;
+	return ext4_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
 }
 #endif
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index dd0fcfcb35ce..ad6bf37b92d5 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -576,8 +576,6 @@ static void ext4_mb_store_history(struct ext4_allocation_context *ac);
 
 static struct proc_dir_entry *proc_root_ext4;
 struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
-ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
-			ext4_fsblk_t goal, unsigned long *count, int *errp);
 
 static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
 					ext4_group_t group);
@@ -627,21 +625,19 @@ static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
 	return block;
 }
 
+static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
+{
 #if BITS_PER_LONG == 64
-#define mb_correct_addr_and_bit(bit, addr)		\
-{							\
-	bit += ((unsigned long) addr & 7UL) << 3;	\
-	addr = (void *) ((unsigned long) addr & ~7UL);	\
-}
+	*bit += ((unsigned long) addr & 7UL) << 3;
+	addr = (void *) ((unsigned long) addr & ~7UL);
 #elif BITS_PER_LONG == 32
-#define mb_correct_addr_and_bit(bit, addr)		\
-{							\
-	bit += ((unsigned long) addr & 3UL) << 3;	\
-	addr = (void *) ((unsigned long) addr & ~3UL);	\
-}
+	*bit += ((unsigned long) addr & 3UL) << 3;
+	addr = (void *) ((unsigned long) addr & ~3UL);
 #else
 #error "how many bits you are?!"
 #endif
+	return addr;
+}
 
 static inline int mb_test_bit(int bit, void *addr)
 {
@@ -649,34 +645,54 @@ static inline int mb_test_bit(int bit, void *addr)
 	 * ext4_test_bit on architecture like powerpc
 	 * needs unsigned long aligned address
 	 */
-	mb_correct_addr_and_bit(bit, addr);
+	addr = mb_correct_addr_and_bit(&bit, addr);
 	return ext4_test_bit(bit, addr);
 }
 
 static inline void mb_set_bit(int bit, void *addr)
 {
-	mb_correct_addr_and_bit(bit, addr);
+	addr = mb_correct_addr_and_bit(&bit, addr);
 	ext4_set_bit(bit, addr);
 }
 
 static inline void mb_set_bit_atomic(spinlock_t *lock, int bit, void *addr)
 {
-	mb_correct_addr_and_bit(bit, addr);
+	addr = mb_correct_addr_and_bit(&bit, addr);
 	ext4_set_bit_atomic(lock, bit, addr);
 }
 
 static inline void mb_clear_bit(int bit, void *addr)
 {
-	mb_correct_addr_and_bit(bit, addr);
+	addr = mb_correct_addr_and_bit(&bit, addr);
 	ext4_clear_bit(bit, addr);
 }
 
 static inline void mb_clear_bit_atomic(spinlock_t *lock, int bit, void *addr)
 {
-	mb_correct_addr_and_bit(bit, addr);
+	addr = mb_correct_addr_and_bit(&bit, addr);
 	ext4_clear_bit_atomic(lock, bit, addr);
 }
 
+static inline int mb_find_next_zero_bit(void *addr, int max, int start)
+{
+	int fix = 0;
+	addr = mb_correct_addr_and_bit(&fix, addr);
+	max += fix;
+	start += fix;
+
+	return ext4_find_next_zero_bit(addr, max, start) - fix;
+}
+
+static inline int mb_find_next_bit(void *addr, int max, int start)
+{
+	int fix = 0;
+	addr = mb_correct_addr_and_bit(&fix, addr);
+	max += fix;
+	start += fix;
+
+	return ext4_find_next_bit(addr, max, start) - fix;
+}
+
 static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
 {
 	char *bb;
@@ -906,7 +922,7 @@ static void ext4_mb_mark_free_simple(struct super_block *sb,
 	unsigned short chunk;
 	unsigned short border;
 
-	BUG_ON(len >= EXT4_BLOCKS_PER_GROUP(sb));
+	BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb));
 
 	border = 2 << sb->s_blocksize_bits;
 
@@ -946,12 +962,12 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
 
 	/* initialize buddy from bitmap which is aggregation
 	 * of on-disk bitmap and preallocations */
-	i = ext4_find_next_zero_bit(bitmap, max, 0);
+	i = mb_find_next_zero_bit(bitmap, max, 0);
 	grp->bb_first_free = i;
 	while (i < max) {
 		fragments++;
 		first = i;
-		i = ext4_find_next_bit(bitmap, max, i);
+		i = mb_find_next_bit(bitmap, max, i);
 		len = i - first;
 		free += len;
 		if (len > 1)
@@ -959,7 +975,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
 		else
 			grp->bb_counters[0]++;
 		if (i < max)
-			i = ext4_find_next_zero_bit(bitmap, max, i);
+			i = mb_find_next_zero_bit(bitmap, max, i);
 	}
 	grp->bb_fragments = fragments;
 
@@ -967,6 +983,10 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
 		ext4_error(sb, __FUNCTION__,
 			"EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n",
 			group, free, grp->bb_free);
+		/*
+		 * If we intent to continue, we consider group descritor
+		 * corrupt and update bb_free using bitmap value
+		 */
 		grp->bb_free = free;
 	}
 
@@ -1146,8 +1166,9 @@ out:
 	return err;
 }
 
-static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
-		struct ext4_buddy *e4b)
+static noinline int
+ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
+					struct ext4_buddy *e4b)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	struct inode *inode = sbi->s_buddy_cache;
@@ -1778,7 +1799,7 @@ static void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
 		buddy = mb_find_buddy(e4b, i, &max);
 		BUG_ON(buddy == NULL);
 
-		k = ext4_find_next_zero_bit(buddy, max, 0);
+		k = mb_find_next_zero_bit(buddy, max, 0);
 		BUG_ON(k >= max);
 
 		ac->ac_found++;
@@ -1818,11 +1839,11 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
 	i = e4b->bd_info->bb_first_free;
 
 	while (free && ac->ac_status == AC_STATUS_CONTINUE) {
-		i = ext4_find_next_zero_bit(bitmap,
+		i = mb_find_next_zero_bit(bitmap,
 						EXT4_BLOCKS_PER_GROUP(sb), i);
 		if (i >= EXT4_BLOCKS_PER_GROUP(sb)) {
 			/*
-			 * IF we corrupt the bitmap  we won't find any
+			 * IF we have corrupt bitmap, we won't find any
 			 * free blocks even though group info says we
 			 * we have free blocks
 			 */
@@ -1838,6 +1859,12 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
 			ext4_error(sb, __FUNCTION__, "%d free blocks as per "
 					"group info. But got %d blocks\n",
 					free, ex.fe_len);
+			/*
+			 * The number of free blocks differs. This mostly
+			 * indicate that the bitmap is corrupt. So exit
+			 * without claiming the space.
+			 */
+			break;
 		}
 
 		ext4_mb_measure_extent(ac, &ex, e4b);
@@ -1937,7 +1964,8 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
 	return 0;
 }
 
-static int ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
+static noinline int
+ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 {
 	ext4_group_t group;
 	ext4_group_t i;
@@ -2444,7 +2472,7 @@ static void ext4_mb_history_init(struct super_block *sb)
 	/* if we can't allocate history, then we simple won't use it */
 }
 
-static void ext4_mb_store_history(struct ext4_allocation_context *ac)
+static noinline void ext4_mb_store_history(struct ext4_allocation_context *ac)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
 	struct ext4_mb_history h;
@@ -2780,7 +2808,7 @@ int ext4_mb_release(struct super_block *sb)
 	return 0;
 }
 
-static void ext4_mb_free_committed_blocks(struct super_block *sb)
+static noinline void ext4_mb_free_committed_blocks(struct super_block *sb)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	int err;
@@ -3001,7 +3029,8 @@ void exit_ext4_mballoc(void)
  * Check quota and mark choosed space (ac->ac_b_ex) non-free in bitmaps
  * Returns 0 if success or error code
  */
-static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
+static noinline int
+ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
 				handle_t *handle)
 {
 	struct buffer_head *bitmap_bh = NULL;
@@ -3074,9 +3103,7 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
 						ac->ac_b_ex.fe_group,
 						gdp));
 	}
-	gdp->bg_free_blocks_count =
-		cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)
-				- ac->ac_b_ex.fe_len);
+	le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
 	gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
 	spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
 	percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
@@ -3118,15 +3145,16 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
  * Normalization means making request better in terms of
  * size and alignment
  */
-static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
+static noinline void
+ext4_mb_normalize_request(struct ext4_allocation_context *ac,
 				struct ext4_allocation_request *ar)
 {
 	int bsbits, max;
 	ext4_lblk_t end;
-	struct list_head *cur;
 	loff_t size, orig_size, start_off;
 	ext4_lblk_t start, orig_start;
 	struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
+	struct ext4_prealloc_space *pa;
 
 	/* do normalize only data requests, metadata requests
 	   do not need preallocation */
@@ -3212,12 +3240,9 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
 
 	/* check we don't cross already preallocated blocks */
 	rcu_read_lock();
-	list_for_each_rcu(cur, &ei->i_prealloc_list) {
-		struct ext4_prealloc_space *pa;
+	list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
 		unsigned long pa_end;
 
-		pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
-
 		if (pa->pa_deleted)
 			continue;
 		spin_lock(&pa->pa_lock);
@@ -3259,10 +3284,8 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
 
 	/* XXX: extra loop to check we really don't overlap preallocations */
 	rcu_read_lock();
-	list_for_each_rcu(cur, &ei->i_prealloc_list) {
-		struct ext4_prealloc_space *pa;
+	list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
 		unsigned long pa_end;
-		pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
 		spin_lock(&pa->pa_lock);
 		if (pa->pa_deleted == 0) {
 			pa_end = pa->pa_lstart + pa->pa_len;
@@ -3384,12 +3407,11 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
 /*
  * search goal blocks in preallocated space
  */
-static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
+static noinline int ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
 {
 	struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
 	struct ext4_locality_group *lg;
 	struct ext4_prealloc_space *pa;
-	struct list_head *cur;
 
 	/* only data can be preallocated */
 	if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
@@ -3397,8 +3419,7 @@ static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
 
 	/* first, try per-file preallocation */
 	rcu_read_lock();
-	list_for_each_rcu(cur, &ei->i_prealloc_list) {
-		pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
+	list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
 
 		/* all fields in this condition don't change,
 		 * so we can skip locking for them */
@@ -3430,8 +3451,7 @@ static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
 		return 0;
 
 	rcu_read_lock();
-	list_for_each_rcu(cur, &lg->lg_prealloc_list) {
-		pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
+	list_for_each_entry_rcu(pa, &lg->lg_prealloc_list, pa_inode_list) {
 		spin_lock(&pa->pa_lock);
 		if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) {
 			atomic_inc(&pa->pa_count);
@@ -3551,7 +3571,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
 /*
  * creates new preallocated space for given inode
  */
-static int ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
+static noinline int ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
 {
 	struct super_block *sb = ac->ac_sb;
 	struct ext4_prealloc_space *pa;
@@ -3638,7 +3658,7 @@ static int ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
 /*
  * creates new preallocated space for locality group inodes belongs to
  */
-static int ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
+static noinline int ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
 {
 	struct super_block *sb = ac->ac_sb;
 	struct ext4_locality_group *lg;
@@ -3711,7 +3731,7 @@ static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
  * the caller MUST hold group/inode locks.
  * TODO: optimize the case when there are no in-core structures yet
  */
-static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
+static noinline int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
 				struct buffer_head *bitmap_bh,
 				struct ext4_prealloc_space *pa)
 {
@@ -3740,10 +3760,10 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
 	}
 
 	while (bit < end) {
-		bit = ext4_find_next_zero_bit(bitmap_bh->b_data, end, bit);
+		bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
 		if (bit >= end)
 			break;
-		next = ext4_find_next_bit(bitmap_bh->b_data, end, bit);
+		next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
 		if (next > end)
 			next = end;
 		start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit +
@@ -3771,6 +3791,10 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
 			(unsigned long) pa->pa_len);
 		ext4_error(sb, __FUNCTION__, "free %u, pa_free %u\n",
 						free, pa->pa_free);
+		/*
+		 * pa is already deleted so we use the value obtained
+		 * from the bitmap and continue.
+		 */
 	}
 	atomic_add(free, &sbi->s_mb_discarded);
 	if (ac)
@@ -3779,7 +3803,7 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
 	return err;
 }
 
-static int ext4_mb_release_group_pa(struct ext4_buddy *e4b,
+static noinline int ext4_mb_release_group_pa(struct ext4_buddy *e4b,
 				struct ext4_prealloc_space *pa)
 {
 	struct ext4_allocation_context *ac;
@@ -3821,7 +3845,7 @@ static int ext4_mb_release_group_pa(struct ext4_buddy *e4b,
  * - how many do we discard
  *   1) how many requested
  */
-static int ext4_mb_discard_group_preallocations(struct super_block *sb,
+static noinline int ext4_mb_discard_group_preallocations(struct super_block *sb,
 					ext4_group_t group, int needed)
 {
 	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
@@ -4143,7 +4167,8 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
 	mutex_lock(&ac->ac_lg->lg_mutex);
 }
 
-static int ext4_mb_initialize_context(struct ext4_allocation_context *ac,
+static noinline int
+ext4_mb_initialize_context(struct ext4_allocation_context *ac,
 				struct ext4_allocation_request *ar)
 {
 	struct super_block *sb = ar->inode->i_sb;
@@ -4374,7 +4399,8 @@ static void ext4_mb_poll_new_transaction(struct super_block *sb,
 	ext4_mb_free_committed_blocks(sb);
 }
 
-static int ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
+static noinline int
+ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
 			  ext4_group_t group, ext4_grpblk_t block, int count)
 {
 	struct ext4_group_info *db = e4b->bd_info;
@@ -4564,8 +4590,7 @@ do_more:
 	}
 
 	spin_lock(sb_bgl_lock(sbi, block_group));
-	gdp->bg_free_blocks_count =
-		cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count);
+	le16_add_cpu(&gdp->bg_free_blocks_count, count);
 	gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
 	spin_unlock(sb_bgl_lock(sbi, block_group));
 	percpu_counter_add(&sbi->s_freeblocks_counter, count);
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 8c6c685b9d22..5c1e27de7755 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -43,6 +43,7 @@ static int finish_range(handle_t *handle, struct inode *inode,
 
 	if (IS_ERR(path)) {
 		retval = PTR_ERR(path);
+		path = NULL;
 		goto err_out;
 	}
 
@@ -74,6 +75,10 @@ static int finish_range(handle_t *handle, struct inode *inode,
 	}
 	retval = ext4_ext_insert_extent(handle, inode, path, &newext);
 err_out:
+	if (path) {
+		ext4_ext_drop_refs(path);
+		kfree(path);
+	}
 	lb->first_pblock = 0;
 	return retval;
 }
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index a9347fb43bcc..63c33e053478 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -57,10 +57,15 @@ static struct buffer_head *ext4_append(handle_t *handle,
 
 	*block = inode->i_size >> inode->i_sb->s_blocksize_bits;
 
-	if ((bh = ext4_bread(handle, inode, *block, 1, err))) {
+	bh = ext4_bread(handle, inode, *block, 1, err);
+	if (bh) {
 		inode->i_size += inode->i_sb->s_blocksize;
 		EXT4_I(inode)->i_disksize = inode->i_size;
-		ext4_journal_get_write_access(handle,bh);
+		*err = ext4_journal_get_write_access(handle, bh);
+		if (*err) {
+			brelse(bh);
+			bh = NULL;
+		}
 	}
 	return bh;
 }
@@ -1804,12 +1809,8 @@ retry:
 	inode->i_fop = &ext4_dir_operations;
 	inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize;
 	dir_block = ext4_bread (handle, inode, 0, 1, &err);
-	if (!dir_block) {
-		ext4_dec_count(handle, inode); /* is this nlink == 0? */
-		ext4_mark_inode_dirty(handle, inode);
-		iput (inode);
-		goto out_stop;
-	}
+	if (!dir_block)
+		goto out_clear_inode;
 	BUFFER_TRACE(dir_block, "get_write_access");
 	ext4_journal_get_write_access(handle, dir_block);
 	de = (struct ext4_dir_entry_2 *) dir_block->b_data;
@@ -1832,7 +1833,8 @@ retry:
 	ext4_mark_inode_dirty(handle, inode);
 	err = ext4_add_entry (handle, dentry, inode);
 	if (err) {
-		inode->i_nlink = 0;
+out_clear_inode:
+		clear_nlink(inode);
 		ext4_mark_inode_dirty(handle, inode);
 		iput (inode);
 		goto out_stop;
@@ -2164,7 +2166,7 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry)
 	dir->i_ctime = dir->i_mtime = ext4_current_time(dir);
 	ext4_update_dx_flag(dir);
 	ext4_mark_inode_dirty(handle, dir);
-	ext4_dec_count(handle, inode);
+	drop_nlink(inode);
 	if (!inode->i_nlink)
 		ext4_orphan_add(handle, inode);
 	inode->i_ctime = ext4_current_time(inode);
@@ -2214,16 +2216,17 @@ retry:
 		err = __page_symlink(inode, symname, l,
 				mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
 		if (err) {
-			ext4_dec_count(handle, inode);
+			clear_nlink(inode);
 			ext4_mark_inode_dirty(handle, inode);
 			iput (inode);
 			goto out_stop;
 		}
 	} else {
+		/* clear the extent format for fast symlink */
+		EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL;
 		inode->i_op = &ext4_fast_symlink_inode_operations;
 		memcpy((char*)&EXT4_I(inode)->i_data,symname,l);
 		inode->i_size = l-1;
-		EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL;
 	}
 	EXT4_I(inode)->i_disksize = inode->i_size;
 	err = ext4_add_nondir(handle, dentry, inode);
@@ -2407,7 +2410,7 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
 		ext4_dec_count(handle, old_dir);
 		if (new_inode) {
 			/* checked empty_dir above, can't have another parent,
-			 * ext3_dec_count() won't work for many-linked dirs */
+			 * ext4_dec_count() won't work for many-linked dirs */
 			new_inode->i_nlink = 0;
 		} else {
 			ext4_inc_count(handle, new_dir);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 9477a2bd6ff2..728e3efa84b5 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -502,8 +502,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 	EXT4_SB(sb)->s_gdb_count++;
 	kfree(o_group_desc);
 
-	es->s_reserved_gdt_blocks =
-		cpu_to_le16(le16_to_cpu(es->s_reserved_gdt_blocks) - 1);
+	le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
 	ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
 
 	return 0;
@@ -877,8 +876,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 	 */
 	ext4_blocks_count_set(es, ext4_blocks_count(es) +
 		input->blocks_count);
-	es->s_inodes_count = cpu_to_le32(le32_to_cpu(es->s_inodes_count) +
-		EXT4_INODES_PER_GROUP(sb));
+	le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb));
 
 	/*
 	 * We need to protect s_groups_count against other CPUs seeing
@@ -1037,6 +1035,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 		ext4_warning(sb, __FUNCTION__,
 			     "multiple resizers run on filesystem!");
 		unlock_super(sb);
+		ext4_journal_stop(handle);
 		err = -EBUSY;
 		goto exit_put;
 	}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 13383ba18f1d..2ac34ac0d325 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -518,6 +518,7 @@ static void ext4_put_super (struct super_block * sb)
 	for (i = 0; i < sbi->s_gdb_count; i++)
 		brelse(sbi->s_group_desc[i]);
 	kfree(sbi->s_group_desc);
+	kfree(sbi->s_flex_groups);
 	percpu_counter_destroy(&sbi->s_freeblocks_counter);
 	percpu_counter_destroy(&sbi->s_freeinodes_counter);
 	percpu_counter_destroy(&sbi->s_dirs_counter);
@@ -944,8 +945,8 @@ static match_table_t tokens = {
 	{Opt_mballoc, "mballoc"},
 	{Opt_nomballoc, "nomballoc"},
 	{Opt_stripe, "stripe=%u"},
-	{Opt_err, NULL},
 	{Opt_resize, "resize"},
+	{Opt_err, NULL},
 };
 
 static ext4_fsblk_t get_sb_block(void **data)
@@ -1387,11 +1388,11 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
 		 * a plain journaled filesystem we can keep it set as
 		 * valid forever! :)
 		 */
-	es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & ~EXT4_VALID_FS);
+	es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
 #endif
 	if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
 		es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
-	es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1);
+	le16_add_cpu(&es->s_mnt_count, 1);
 	es->s_mtime = cpu_to_le32(get_seconds());
 	ext4_update_dynamic_rev(sb);
 	EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
@@ -1418,6 +1419,54 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
 	return res;
 }
 
+static int ext4_fill_flex_info(struct super_block *sb)
+{
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	struct ext4_group_desc *gdp = NULL;
+	struct buffer_head *bh;
+	ext4_group_t flex_group_count;
+	ext4_group_t flex_group;
+	int groups_per_flex = 0;
+	__u64 block_bitmap = 0;
+	int i;
+
+	if (!sbi->s_es->s_log_groups_per_flex) {
+		sbi->s_log_groups_per_flex = 0;
+		return 1;
+	}
+
+	sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
+	groups_per_flex = 1 << sbi->s_log_groups_per_flex;
+
+	flex_group_count = (sbi->s_groups_count + groups_per_flex - 1) /
+		groups_per_flex;
+	sbi->s_flex_groups = kmalloc(flex_group_count *
+				     sizeof(struct flex_groups), GFP_KERNEL);
+	if (sbi->s_flex_groups == NULL) {
+		printk(KERN_ERR "EXT4-fs: not enough memory\n");
+		goto failed;
+	}
+	memset(sbi->s_flex_groups, 0, flex_group_count *
+	       sizeof(struct flex_groups));
+
+	gdp = ext4_get_group_desc(sb, 1, &bh);
+	block_bitmap = ext4_block_bitmap(sb, gdp) - 1;
+
+	for (i = 0; i < sbi->s_groups_count; i++) {
+		gdp = ext4_get_group_desc(sb, i, &bh);
+
+		flex_group = ext4_flex_group(sbi, i);
+		sbi->s_flex_groups[flex_group].free_inodes +=
+			le16_to_cpu(gdp->bg_free_inodes_count);
+		sbi->s_flex_groups[flex_group].free_blocks +=
+			le16_to_cpu(gdp->bg_free_blocks_count);
+	}
+
+	return 1;
+failed:
+	return 0;
+}
+
 __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
 			    struct ext4_group_desc *gdp)
 {
@@ -2115,6 +2164,14 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 		printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n");
 		goto failed_mount2;
 	}
+	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
+		if (!ext4_fill_flex_info(sb)) {
+			printk(KERN_ERR
+			       "EXT4-fs: unable to initialize "
+			       "flex_bg meta info!\n");
+			goto failed_mount2;
+		}
+
 	sbi->s_gdb_count = db_count;
 	get_random_bytes(&sbi->s_next_generation, sizeof(u32));
 	spin_lock_init(&sbi->s_next_gen_lock);
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index d7962139c010..52fddc710b84 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -92,6 +92,8 @@ static struct buffer_head *ext4_xattr_cache_find(struct inode *,
 						 struct mb_cache_entry **);
 static void ext4_xattr_rehash(struct ext4_xattr_header *,
 			      struct ext4_xattr_entry *);
+static int ext4_xattr_list(struct inode *inode, char *buffer,
+			   size_t buffer_size);
 
 static struct mb_cache *ext4_xattr_cache;
 
@@ -420,7 +422,7 @@ cleanup:
  * Returns a negative error number on failure, or the number of bytes
  * used / required on success.
  */
-int
+static int
 ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
 {
 	int i_error, b_error;
@@ -484,8 +486,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
 		get_bh(bh);
 		ext4_forget(handle, 1, inode, bh, bh->b_blocknr);
 	} else {
-		BHDR(bh)->h_refcount = cpu_to_le32(
-				le32_to_cpu(BHDR(bh)->h_refcount) - 1);
+		le32_add_cpu(&BHDR(bh)->h_refcount, -1);
 		error = ext4_journal_dirty_metadata(handle, bh);
 		if (IS_SYNC(inode))
 			handle->h_sync = 1;
@@ -789,8 +790,7 @@ inserted:
 				if (error)
 					goto cleanup_dquot;
 				lock_buffer(new_bh);
-				BHDR(new_bh)->h_refcount = cpu_to_le32(1 +
-					le32_to_cpu(BHDR(new_bh)->h_refcount));
+				le32_add_cpu(&BHDR(new_bh)->h_refcount, 1);
 				ea_bdebug(new_bh, "reusing; refcount now=%d",
 					le32_to_cpu(BHDR(new_bh)->h_refcount));
 				unlock_buffer(new_bh);
@@ -808,10 +808,8 @@ inserted:
 			get_bh(new_bh);
 		} else {
 			/* We need to allocate a new block */
-			ext4_fsblk_t goal = le32_to_cpu(
-					EXT4_SB(sb)->s_es->s_first_data_block) +
-				(ext4_fsblk_t)EXT4_I(inode)->i_block_group *
-				EXT4_BLOCKS_PER_GROUP(sb);
+			ext4_fsblk_t goal = ext4_group_first_block_no(sb,
+						EXT4_I(inode)->i_block_group);
 			ext4_fsblk_t block = ext4_new_block(handle, inode,
 							goal, &error);
 			if (error)
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index d7f5d6a12651..5992fe979bb9 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -74,7 +74,6 @@ extern struct xattr_handler ext4_xattr_security_handler;
 extern ssize_t ext4_listxattr(struct dentry *, char *, size_t);
 
 extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t);
-extern int ext4_xattr_list(struct inode *, char *, size_t);
 extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
 extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
 
@@ -99,12 +98,6 @@ ext4_xattr_get(struct inode *inode, int name_index, const char *name,
 }
 
 static inline int
-ext4_xattr_list(struct inode *inode, void *buffer, size_t size)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline int
 ext4_xattr_set(struct inode *inode, int name_index, const char *name,
 	       const void *value, size_t size, int flags)
 {
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 038ed7436199..c6cbb6cd59b2 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -369,7 +369,7 @@ out:
 
 
 /**
- * int journal_restart() - restart a handle .
+ * int journal_restart() - restart a handle.
  * @handle:  handle to restart
  * @nblocks: nr credits requested
  *
@@ -844,8 +844,7 @@ out:
 }
 
 /**
- * int journal_get_undo_access() -  Notify intent to modify metadata with
- *     non-rewindable consequences
+ * int journal_get_undo_access() - Notify intent to modify metadata with non-rewindable consequences
  * @handle: transaction
  * @bh: buffer to undo
  * @credits: store the number of taken credits here (if not NULL)
@@ -921,12 +920,14 @@ out:
 }
 
 /**
- * int journal_dirty_data() -  mark a buffer as containing dirty data which
- *                             needs to be flushed before we can commit the
- *                             current transaction.
+ * int journal_dirty_data() - mark a buffer as containing dirty data to be flushed
  * @handle: transaction
  * @bh: bufferhead to mark
  *
+ * Description:
+ * Mark a buffer as containing dirty data which needs to be flushed before
+ * we can commit the current transaction.
+ *
  * The buffer is placed on the transaction's data list and is marked as
  * belonging to the transaction.
  *
@@ -1098,11 +1099,11 @@ no_journal:
 }
 
 /**
- * int journal_dirty_metadata() -  mark a buffer as containing dirty metadata
+ * int journal_dirty_metadata() - mark a buffer as containing dirty metadata
  * @handle: transaction to add buffer to.
  * @bh: buffer to mark
  *
- * mark dirty metadata which needs to be journaled as part of the current
+ * Mark dirty metadata which needs to be journaled as part of the current
  * transaction.
  *
  * The buffer is placed on the transaction's metadata list and is marked
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index a8173081f831..e0139786f717 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -520,22 +520,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	jbd_debug (3, "JBD: commit phase 2\n");
 
 	/*
-	 * First, drop modified flag: all accesses to the buffers
-	 * will be tracked for a new trasaction only -bzzz
-	 */
-	spin_lock(&journal->j_list_lock);
-	if (commit_transaction->t_buffers) {
-		new_jh = jh = commit_transaction->t_buffers->b_tnext;
-		do {
-			J_ASSERT_JH(new_jh, new_jh->b_modified == 1 ||
-					new_jh->b_modified == 0);
-			new_jh->b_modified = 0;
-			new_jh = new_jh->b_tnext;
-		} while (new_jh != jh);
-	}
-	spin_unlock(&journal->j_list_lock);
-
-	/*
 	 * Now start flushing things to disk, in the order they appear
 	 * on the transaction lists.  Data blocks go first.
 	 */
@@ -584,6 +568,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	stats.u.run.rs_blocks = commit_transaction->t_outstanding_credits;
 	stats.u.run.rs_blocks_logged = 0;
 
+	J_ASSERT(commit_transaction->t_nr_buffers <=
+		 commit_transaction->t_outstanding_credits);
+
 	descriptor = NULL;
 	bufs = 0;
 	while (commit_transaction->t_buffers) {
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 96ba846992e9..67b6a989a0aa 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -219,7 +219,7 @@ static int jbd2_journal_start_thread(journal_t *journal)
 	if (IS_ERR(t))
 		return PTR_ERR(t);
 
-	wait_event(journal->j_wait_done_commit, journal->j_task != 0);
+	wait_event(journal->j_wait_done_commit, journal->j_task != NULL);
 	return 0;
 }
 
@@ -231,7 +231,8 @@ static void journal_kill_thread(journal_t *journal)
 	while (journal->j_task) {
 		wake_up(&journal->j_wait_commit);
 		spin_unlock(&journal->j_state_lock);
-		wait_event(journal->j_wait_done_commit, journal->j_task == 0);
+		wait_event(journal->j_wait_done_commit,
+					journal->j_task == NULL);
 		spin_lock(&journal->j_state_lock);
 	}
 	spin_unlock(&journal->j_state_lock);
@@ -1969,14 +1970,14 @@ static int journal_init_jbd2_journal_head_cache(void)
 {
 	int retval;
 
-	J_ASSERT(jbd2_journal_head_cache == 0);
+	J_ASSERT(!jbd2_journal_head_cache);
 	jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head",
 				sizeof(struct journal_head),
 				0,		/* offset */
 				SLAB_TEMPORARY,	/* flags */
 				NULL);		/* ctor */
 	retval = 0;
-	if (jbd2_journal_head_cache == 0) {
+	if (!jbd2_journal_head_cache) {
 		retval = -ENOMEM;
 		printk(KERN_EMERG "JBD: no memory for journal_head cache\n");
 	}
@@ -2002,14 +2003,14 @@ static struct journal_head *journal_alloc_journal_head(void)
 	atomic_inc(&nr_journal_heads);
 #endif
 	ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS);
-	if (ret == 0) {
+	if (!ret) {
 		jbd_debug(1, "out of memory for journal_head\n");
 		if (time_after(jiffies, last_warning + 5*HZ)) {
 			printk(KERN_NOTICE "ENOMEM in %s, retrying.\n",
 			       __FUNCTION__);
 			last_warning = jiffies;
 		}
-		while (ret == 0) {
+		while (!ret) {
 			yield();
 			ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS);
 		}
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index df36f42e19e1..2e1453a5e998 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -174,13 +174,13 @@ int __init jbd2_journal_init_revoke_caches(void)
 					   0,
 					   SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
 					   NULL);
-	if (jbd2_revoke_record_cache == 0)
+	if (!jbd2_revoke_record_cache)
 		return -ENOMEM;
 
 	jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table",
 					   sizeof(struct jbd2_revoke_table_s),
 					   0, SLAB_TEMPORARY, NULL);
-	if (jbd2_revoke_table_cache == 0) {
+	if (!jbd2_revoke_table_cache) {
 		kmem_cache_destroy(jbd2_revoke_record_cache);
 		jbd2_revoke_record_cache = NULL;
 		return -ENOMEM;
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index b9b0b6f899b9..70245d6638b7 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -618,6 +618,12 @@ repeat:
 		goto done;
 
 	/*
+	 * this is the first time this transaction is touching this buffer,
+	 * reset the modified flag
+	 */
+       jh->b_modified = 0;
+
+	/*
 	 * If there is already a copy-out version of this buffer, then we don't
 	 * need to make another one
 	 */
@@ -829,9 +835,16 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
 
 	if (jh->b_transaction == NULL) {
 		jh->b_transaction = transaction;
+
+		/* first access by this transaction */
+		jh->b_modified = 0;
+
 		JBUFFER_TRACE(jh, "file as BJ_Reserved");
 		__jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
 	} else if (jh->b_transaction == journal->j_committing_transaction) {
+		/* first access by this transaction */
+		jh->b_modified = 0;
+
 		JBUFFER_TRACE(jh, "set next transaction");
 		jh->b_next_transaction = transaction;
 	}
@@ -1230,6 +1243,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
 	struct journal_head *jh;
 	int drop_reserve = 0;
 	int err = 0;
+	int was_modified = 0;
 
 	BUFFER_TRACE(bh, "entry");
 
@@ -1248,6 +1262,9 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
 		goto not_jbd;
 	}
 
+	/* keep track of wether or not this transaction modified us */
+	was_modified = jh->b_modified;
+
 	/*
 	 * The buffer's going from the transaction, we must drop
 	 * all references -bzzz
@@ -1265,7 +1282,12 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
 
 		JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
 
-		drop_reserve = 1;
+		/*
+		 * we only want to drop a reference if this transaction
+		 * modified the buffer
+		 */
+		if (was_modified)
+			drop_reserve = 1;
 
 		/*
 		 * We are no longer going to journal this buffer.
@@ -1305,7 +1327,13 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
 		if (jh->b_next_transaction) {
 			J_ASSERT(jh->b_next_transaction == transaction);
 			jh->b_next_transaction = NULL;
-			drop_reserve = 1;
+
+			/*
+			 * only drop a reference if this transaction modified
+			 * the buffer
+			 */
+			if (was_modified)
+				drop_reserve = 1;
 		}
 	}
 
@@ -2077,7 +2105,7 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh)
 	jh->b_transaction = jh->b_next_transaction;
 	jh->b_next_transaction = NULL;
 	__jbd2_journal_file_buffer(jh, jh->b_transaction,
-				was_dirty ? BJ_Metadata : BJ_Reserved);
+				jh->b_modified ? BJ_Metadata : BJ_Reserved);
 	J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
 
 	if (was_dirty)
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index e26ea78c7892..9dafb53fb1d1 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -36,6 +36,7 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
 	unsigned int ivalid;
 	uint32_t alloclen;
 	int ret;
+	int alloc_type = ALLOC_NORMAL;
 
 	D1(printk(KERN_DEBUG "jffs2_setattr(): ino #%lu\n", inode->i_ino));
 
@@ -115,6 +116,10 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
 		ri->compr = JFFS2_COMPR_ZERO;
 		ri->dsize = cpu_to_je32(iattr->ia_size - inode->i_size);
 		ri->offset = cpu_to_je32(inode->i_size);
+	} else if (ivalid & ATTR_SIZE && !iattr->ia_size) {
+		/* For truncate-to-zero, treat it as deletion because
+		   it'll always be obsoleting all previous nodes */
+		alloc_type = ALLOC_DELETION;
 	}
 	ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8));
 	if (mdatalen)
@@ -122,7 +127,7 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
 	else
 		ri->data_crc = cpu_to_je32(0);
 
-	new_metadata = jffs2_write_dnode(c, f, ri, mdata, mdatalen, ALLOC_NORMAL);
+	new_metadata = jffs2_write_dnode(c, f, ri, mdata, mdatalen, alloc_type);
 	if (S_ISLNK(inode->i_mode))
 		kfree(mdata);
 
@@ -149,6 +154,7 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
 	if (ivalid & ATTR_SIZE && inode->i_size < iattr->ia_size) {
 		jffs2_add_full_dnode_to_inode(c, f, new_metadata);
 		inode->i_size = iattr->ia_size;
+		inode->i_blocks = (inode->i_size + 511) >> 9;
 		f->metadata = NULL;
 	} else {
 		f->metadata = new_metadata;
@@ -167,8 +173,10 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
 	   We are protected from a simultaneous write() extending i_size
 	   back past iattr->ia_size, because do_truncate() holds the
 	   generic inode semaphore. */
-	if (ivalid & ATTR_SIZE && inode->i_size > iattr->ia_size)
-		vmtruncate(inode, iattr->ia_size);
+	if (ivalid & ATTR_SIZE && inode->i_size > iattr->ia_size) {
+		vmtruncate(inode, iattr->ia_size);	
+		inode->i_blocks = (inode->i_size + 511) >> 9;
+	}	
 
 	return 0;
 }
diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h
index 0b78fdc9773b..a841f4973a74 100644
--- a/fs/jffs2/jffs2_fs_i.h
+++ b/fs/jffs2/jffs2_fs_i.h
@@ -15,7 +15,7 @@
 #include <linux/version.h>
 #include <linux/rbtree.h>
 #include <linux/posix_acl.h>
-#include <asm/semaphore.h>
+#include <linux/semaphore.h>
 
 struct jffs2_inode_info {
 	/* We need an internal mutex similar to inode->i_mutex.
diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h
index 3a2197f3c812..18fca2b9e531 100644
--- a/fs/jffs2/jffs2_fs_sb.h
+++ b/fs/jffs2/jffs2_fs_sb.h
@@ -16,7 +16,7 @@
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
 #include <linux/completion.h>
-#include <asm/semaphore.h>
+#include <linux/semaphore.h>
 #include <linux/timer.h>
 #include <linux/wait.h>
 #include <linux/list.h>
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index e1985066b1c6..2bc7d8aa5740 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -2172,7 +2172,7 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
 	}
 
 	/* update the free count for this dmap */
-	dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) - nblocks);
+	le32_add_cpu(&dp->nfree, -nblocks);
 
 	BMAP_LOCK(bmp);
 
@@ -2316,7 +2316,7 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
 
 	/* update the free count for this dmap.
 	 */
-	dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) + nblocks);
+	le32_add_cpu(&dp->nfree, nblocks);
 
 	BMAP_LOCK(bmp);
 
@@ -3226,7 +3226,7 @@ static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno,
 	}
 
 	/* update the free count for this dmap */
-	dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) - nblocks);
+	le32_add_cpu(&dp->nfree, -nblocks);
 
 	/* reconstruct summary tree */
 	dbInitDmapTree(dp);
@@ -3660,9 +3660,8 @@ static int dbInitDmap(struct dmap * dp, s64 Blkno, int nblocks)
 			goto initTree;
 		}
 	} else {
-		dp->nblocks =
-		    cpu_to_le32(le32_to_cpu(dp->nblocks) + nblocks);
-		dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) + nblocks);
+		le32_add_cpu(&dp->nblocks, nblocks);
+		le32_add_cpu(&dp->nfree, nblocks);
 	}
 
 	/* word number containing start block number */
diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h
index 11e6d471b364..1a6eb41569bc 100644
--- a/fs/jfs/jfs_dmap.h
+++ b/fs/jfs/jfs_dmap.h
@@ -61,7 +61,7 @@
  * determine the maximum free string for four (lower level) nodes
  * of the tree.
  */
-static __inline signed char TREEMAX(signed char *cp)
+static inline signed char TREEMAX(signed char *cp)
 {
 	signed char tmp1, tmp2;
 
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 9bf29f771737..734ec916beaf 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -1019,8 +1019,7 @@ int diFree(struct inode *ip)
 		/* update the free inode counts at the iag, ag and
 		 * map level.
 		 */
-		iagp->nfreeinos =
-		    cpu_to_le32(le32_to_cpu(iagp->nfreeinos) + 1);
+		le32_add_cpu(&iagp->nfreeinos, 1);
 		imap->im_agctl[agno].numfree += 1;
 		atomic_inc(&imap->im_numfree);
 
@@ -1219,9 +1218,8 @@ int diFree(struct inode *ip)
 	/* update the number of free inodes and number of free extents
 	 * for the iag.
 	 */
-	iagp->nfreeinos = cpu_to_le32(le32_to_cpu(iagp->nfreeinos) -
-				      (INOSPEREXT - 1));
-	iagp->nfreeexts = cpu_to_le32(le32_to_cpu(iagp->nfreeexts) + 1);
+	le32_add_cpu(&iagp->nfreeinos, -(INOSPEREXT - 1));
+	le32_add_cpu(&iagp->nfreeexts, 1);
 
 	/* update the number of free inodes and backed inodes
 	 * at the ag and inode map level.
@@ -2124,7 +2122,7 @@ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino)
 	/* update the free inode count at the iag, ag, inode
 	 * map levels.
 	 */
-	iagp->nfreeinos = cpu_to_le32(le32_to_cpu(iagp->nfreeinos) - 1);
+	le32_add_cpu(&iagp->nfreeinos, -1);
 	imap->im_agctl[agno].numfree -= 1;
 	atomic_dec(&imap->im_numfree);
 
@@ -2378,9 +2376,8 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
 	/* update the free inode and free extent counts for the
 	 * iag.
 	 */
-	iagp->nfreeinos = cpu_to_le32(le32_to_cpu(iagp->nfreeinos) +
-				      (INOSPEREXT - 1));
-	iagp->nfreeexts = cpu_to_le32(le32_to_cpu(iagp->nfreeexts) - 1);
+	le32_add_cpu(&iagp->nfreeinos, (INOSPEREXT - 1));
+	le32_add_cpu(&iagp->nfreeexts, -1);
 
 	/* update the free and backed inode counts for the ag.
 	 */
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c
index a000aaa75136..5a61ebf2cbcc 100644
--- a/fs/jfs/jfs_xtree.c
+++ b/fs/jfs/jfs_xtree.c
@@ -905,8 +905,7 @@ int xtInsert(tid_t tid,		/* transaction id */
 	XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
 
 	/* advance next available entry index */
-	p->header.nextindex =
-	    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
+	le16_add_cpu(&p->header.nextindex, 1);
 
 	/* Don't log it if there are no links to the file */
 	if (!test_cflag(COMMIT_Nolink, ip)) {
@@ -997,8 +996,7 @@ xtSplitUp(tid_t tid,
 			    split->addr);
 
 		/* advance next available entry index */
-		sp->header.nextindex =
-		    cpu_to_le16(le16_to_cpu(sp->header.nextindex) + 1);
+		le16_add_cpu(&sp->header.nextindex, 1);
 
 		/* Don't log it if there are no links to the file */
 		if (!test_cflag(COMMIT_Nolink, ip)) {
@@ -1167,9 +1165,7 @@ xtSplitUp(tid_t tid,
 				    JFS_SBI(ip->i_sb)->nbperpage, rcbn);
 
 			/* advance next available entry index. */
-			sp->header.nextindex =
-			    cpu_to_le16(le16_to_cpu(sp->header.nextindex) +
-					1);
+			le16_add_cpu(&sp->header.nextindex, 1);
 
 			/* Don't log it if there are no links to the file */
 			if (!test_cflag(COMMIT_Nolink, ip)) {
@@ -1738,8 +1734,7 @@ int xtExtend(tid_t tid,		/* transaction id */
 		XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr);
 
 		/* advance next available entry index */
-		p->header.nextindex =
-		    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
+		le16_add_cpu(&p->header.nextindex, 1);
 	}
 
 	/* get back old entry */
@@ -1905,8 +1900,7 @@ printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n",
 		XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr);
 
 		/* advance next available entry index */
-		p->header.nextindex =
-		    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
+		le16_add_cpu(&p->header.nextindex, 1);
 	}
 
 	/* get back old XAD */
@@ -2567,8 +2561,7 @@ int xtAppend(tid_t tid,		/* transaction id */
 	XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
 
 	/* advance next available entry index */
-	p->header.nextindex =
-	    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
+	le16_add_cpu(&p->header.nextindex, 1);
 
 	xtlck->lwm.offset =
 	    (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index;
@@ -2631,8 +2624,7 @@ int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag)
 	 * delete the entry from the leaf page
 	 */
 	nextindex = le16_to_cpu(p->header.nextindex);
-	p->header.nextindex =
-	    cpu_to_le16(le16_to_cpu(p->header.nextindex) - 1);
+	le16_add_cpu(&p->header.nextindex, -1);
 
 	/*
 	 * if the leaf page bocome empty, free the page
@@ -2795,9 +2787,7 @@ xtDeleteUp(tid_t tid, struct inode *ip,
 					(nextindex - index -
 					 1) << L2XTSLOTSIZE);
 
-			p->header.nextindex =
-			    cpu_to_le16(le16_to_cpu(p->header.nextindex) -
-					1);
+			le16_add_cpu(&p->header.nextindex, -1);
 			jfs_info("xtDeleteUp(entry): 0x%lx[%d]",
 				 (ulong) parent->bn, index);
 		}
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index f1ef49fff118..c7854791898f 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -19,12 +19,11 @@
 
 
 #define NLMDBG_FACILITY		NLMDBG_HOSTCACHE
-#define NLM_HOST_MAX		64
 #define NLM_HOST_NRHASH		32
 #define NLM_ADDRHASH(addr)	(ntohl(addr) & (NLM_HOST_NRHASH-1))
 #define NLM_HOST_REBIND		(60 * HZ)
-#define NLM_HOST_EXPIRE		((nrhosts > NLM_HOST_MAX)? 300 * HZ : 120 * HZ)
-#define NLM_HOST_COLLECT	((nrhosts > NLM_HOST_MAX)? 120 * HZ :  60 * HZ)
+#define NLM_HOST_EXPIRE		(300 * HZ)
+#define NLM_HOST_COLLECT	(120 * HZ)
 
 static struct hlist_head	nlm_hosts[NLM_HOST_NRHASH];
 static unsigned long		next_gc;
@@ -142,9 +141,7 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin,
 	INIT_LIST_HEAD(&host->h_granted);
 	INIT_LIST_HEAD(&host->h_reclaim);
 
-	if (++nrhosts > NLM_HOST_MAX)
-		next_gc = 0;
-
+	nrhosts++;
 out:
 	mutex_unlock(&nlm_host_mutex);
 	return host;
@@ -460,7 +457,7 @@ nlm_gc_hosts(void)
  * Manage NSM handles
  */
 static LIST_HEAD(nsm_handles);
-static DEFINE_MUTEX(nsm_mutex);
+static DEFINE_SPINLOCK(nsm_lock);
 
 static struct nsm_handle *
 __nsm_find(const struct sockaddr_in *sin,
@@ -468,7 +465,7 @@ __nsm_find(const struct sockaddr_in *sin,
 		int create)
 {
 	struct nsm_handle *nsm = NULL;
-	struct list_head *pos;
+	struct nsm_handle *pos;
 
 	if (!sin)
 		return NULL;
@@ -482,38 +479,43 @@ __nsm_find(const struct sockaddr_in *sin,
 		return NULL;
 	}
 
-	mutex_lock(&nsm_mutex);
-	list_for_each(pos, &nsm_handles) {
-		nsm = list_entry(pos, struct nsm_handle, sm_link);
+retry:
+	spin_lock(&nsm_lock);
+	list_for_each_entry(pos, &nsm_handles, sm_link) {
 
 		if (hostname && nsm_use_hostnames) {
-			if (strlen(nsm->sm_name) != hostname_len
-			 || memcmp(nsm->sm_name, hostname, hostname_len))
+			if (strlen(pos->sm_name) != hostname_len
+			 || memcmp(pos->sm_name, hostname, hostname_len))
 				continue;
-		} else if (!nlm_cmp_addr(&nsm->sm_addr, sin))
+		} else if (!nlm_cmp_addr(&pos->sm_addr, sin))
 			continue;
-		atomic_inc(&nsm->sm_count);
-		goto out;
+		atomic_inc(&pos->sm_count);
+		kfree(nsm);
+		nsm = pos;
+		goto found;
 	}
-
-	if (!create) {
-		nsm = NULL;
-		goto out;
+	if (nsm) {
+		list_add(&nsm->sm_link, &nsm_handles);
+		goto found;
 	}
+	spin_unlock(&nsm_lock);
+
+	if (!create)
+		return NULL;
 
 	nsm = kzalloc(sizeof(*nsm) + hostname_len + 1, GFP_KERNEL);
-	if (nsm != NULL) {
-		nsm->sm_addr = *sin;
-		nsm->sm_name = (char *) (nsm + 1);
-		memcpy(nsm->sm_name, hostname, hostname_len);
-		nsm->sm_name[hostname_len] = '\0';
-		atomic_set(&nsm->sm_count, 1);
+	if (nsm == NULL)
+		return NULL;
 
-		list_add(&nsm->sm_link, &nsm_handles);
-	}
+	nsm->sm_addr = *sin;
+	nsm->sm_name = (char *) (nsm + 1);
+	memcpy(nsm->sm_name, hostname, hostname_len);
+	nsm->sm_name[hostname_len] = '\0';
+	atomic_set(&nsm->sm_count, 1);
+	goto retry;
 
-out:
-	mutex_unlock(&nsm_mutex);
+found:
+	spin_unlock(&nsm_lock);
 	return nsm;
 }
 
@@ -532,12 +534,9 @@ nsm_release(struct nsm_handle *nsm)
 {
 	if (!nsm)
 		return;
-	if (atomic_dec_and_test(&nsm->sm_count)) {
-		mutex_lock(&nsm_mutex);
-		if (atomic_read(&nsm->sm_count) == 0) {
-			list_del(&nsm->sm_link);
-			kfree(nsm);
-		}
-		mutex_unlock(&nsm_mutex);
+	if (atomic_dec_and_lock(&nsm->sm_count, &nsm_lock)) {
+		list_del(&nsm->sm_link);
+		spin_unlock(&nsm_lock);
+		kfree(nsm);
 	}
 }
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 1ed8bd4de941..66b5c98c7ff5 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -25,6 +25,7 @@
 #include <linux/smp.h>
 #include <linux/smp_lock.h>
 #include <linux/mutex.h>
+#include <linux/kthread.h>
 #include <linux/freezer.h>
 
 #include <linux/sunrpc/types.h>
@@ -48,14 +49,11 @@ EXPORT_SYMBOL(nlmsvc_ops);
 
 static DEFINE_MUTEX(nlmsvc_mutex);
 static unsigned int		nlmsvc_users;
-static pid_t			nlmsvc_pid;
+static struct task_struct	*nlmsvc_task;
 static struct svc_serv		*nlmsvc_serv;
 int				nlmsvc_grace_period;
 unsigned long			nlmsvc_timeout;
 
-static DECLARE_COMPLETION(lockd_start_done);
-static DECLARE_WAIT_QUEUE_HEAD(lockd_exit);
-
 /*
  * These can be set at insmod time (useful for NFS as root filesystem),
  * and also changed through the sysctl interface.  -- Jamie Lokier, Aug 2003
@@ -111,35 +109,30 @@ static inline void clear_grace_period(void)
 /*
  * This is the lockd kernel thread
  */
-static void
-lockd(struct svc_rqst *rqstp)
+static int
+lockd(void *vrqstp)
 {
 	int		err = 0;
+	struct svc_rqst *rqstp = vrqstp;
 	unsigned long grace_period_expire;
 
-	/* Lock module and set up kernel thread */
-	/* lockd_up is waiting for us to startup, so will
-	 * be holding a reference to this module, so it
-	 * is safe to just claim another reference
-	 */
-	__module_get(THIS_MODULE);
-	lock_kernel();
-
-	/*
-	 * Let our maker know we're running.
-	 */
-	nlmsvc_pid = current->pid;
-	nlmsvc_serv = rqstp->rq_server;
-	complete(&lockd_start_done);
-
-	daemonize("lockd");
+	/* try_to_freeze() is called from svc_recv() */
 	set_freezable();
 
-	/* Process request with signals blocked, but allow SIGKILL.  */
+	/* Allow SIGKILL to tell lockd to drop all of its locks */
 	allow_signal(SIGKILL);
 
 	dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n");
 
+	/*
+	 * FIXME: it would be nice if lockd didn't spend its entire life
+	 * running under the BKL. At the very least, it would be good to
+	 * have someone clarify what it's intended to protect here. I've
+	 * seen some handwavy posts about posix locking needing to be
+	 * done under the BKL, but it's far from clear.
+	 */
+	lock_kernel();
+
 	if (!nlm_timeout)
 		nlm_timeout = LOCKD_DFLT_TIMEO;
 	nlmsvc_timeout = nlm_timeout * HZ;
@@ -148,10 +141,9 @@ lockd(struct svc_rqst *rqstp)
 
 	/*
 	 * The main request loop. We don't terminate until the last
-	 * NFS mount or NFS daemon has gone away, and we've been sent a
-	 * signal, or else another process has taken over our job.
+	 * NFS mount or NFS daemon has gone away.
 	 */
-	while ((nlmsvc_users || !signalled()) && nlmsvc_pid == current->pid) {
+	while (!kthread_should_stop()) {
 		long timeout = MAX_SCHEDULE_TIMEOUT;
 		RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
 
@@ -161,6 +153,7 @@ lockd(struct svc_rqst *rqstp)
 				nlmsvc_invalidate_all();
 				grace_period_expire = set_grace_period();
 			}
+			continue;
 		}
 
 		/*
@@ -195,28 +188,19 @@ lockd(struct svc_rqst *rqstp)
 	}
 
 	flush_signals(current);
+	if (nlmsvc_ops)
+		nlmsvc_invalidate_all();
+	nlm_shutdown_hosts();
 
-	/*
-	 * Check whether there's a new lockd process before
-	 * shutting down the hosts and clearing the slot.
-	 */
-	if (!nlmsvc_pid || current->pid == nlmsvc_pid) {
-		if (nlmsvc_ops)
-			nlmsvc_invalidate_all();
-		nlm_shutdown_hosts();
-		nlmsvc_pid = 0;
-		nlmsvc_serv = NULL;
-	} else
-		printk(KERN_DEBUG
-			"lockd: new process, skipping host shutdown\n");
-	wake_up(&lockd_exit);
+	unlock_kernel();
+
+	nlmsvc_task = NULL;
+	nlmsvc_serv = NULL;
 
 	/* Exit the RPC thread */
 	svc_exit_thread(rqstp);
 
-	/* Release module */
-	unlock_kernel();
-	module_put_and_exit(0);
+	return 0;
 }
 
 /*
@@ -261,14 +245,15 @@ static int make_socks(struct svc_serv *serv, int proto)
 int
 lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */
 {
-	struct svc_serv *	serv;
-	int			error = 0;
+	struct svc_serv *serv;
+	struct svc_rqst *rqstp;
+	int		error = 0;
 
 	mutex_lock(&nlmsvc_mutex);
 	/*
 	 * Check whether we're already up and running.
 	 */
-	if (nlmsvc_pid) {
+	if (nlmsvc_serv) {
 		if (proto)
 			error = make_socks(nlmsvc_serv, proto);
 		goto out;
@@ -295,13 +280,28 @@ lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */
 	/*
 	 * Create the kernel thread and wait for it to start.
 	 */
-	error = svc_create_thread(lockd, serv);
-	if (error) {
+	rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]);
+	if (IS_ERR(rqstp)) {
+		error = PTR_ERR(rqstp);
 		printk(KERN_WARNING
-			"lockd_up: create thread failed, error=%d\n", error);
+			"lockd_up: svc_rqst allocation failed, error=%d\n",
+			error);
+		goto destroy_and_out;
+	}
+
+	svc_sock_update_bufs(serv);
+	nlmsvc_serv = rqstp->rq_server;
+
+	nlmsvc_task = kthread_run(lockd, rqstp, serv->sv_name);
+	if (IS_ERR(nlmsvc_task)) {
+		error = PTR_ERR(nlmsvc_task);
+		nlmsvc_task = NULL;
+		nlmsvc_serv = NULL;
+		printk(KERN_WARNING
+			"lockd_up: kthread_run failed, error=%d\n", error);
+		svc_exit_thread(rqstp);
 		goto destroy_and_out;
 	}
-	wait_for_completion(&lockd_start_done);
 
 	/*
 	 * Note: svc_serv structures have an initial use count of 1,
@@ -323,37 +323,21 @@ EXPORT_SYMBOL(lockd_up);
 void
 lockd_down(void)
 {
-	static int warned;
-
 	mutex_lock(&nlmsvc_mutex);
 	if (nlmsvc_users) {
 		if (--nlmsvc_users)
 			goto out;
-	} else
-		printk(KERN_WARNING "lockd_down: no users! pid=%d\n", nlmsvc_pid);
-
-	if (!nlmsvc_pid) {
-		if (warned++ == 0)
-			printk(KERN_WARNING "lockd_down: no lockd running.\n"); 
-		goto out;
+	} else {
+		printk(KERN_ERR "lockd_down: no users! task=%p\n",
+			nlmsvc_task);
+		BUG();
 	}
-	warned = 0;
 
-	kill_proc(nlmsvc_pid, SIGKILL, 1);
-	/*
-	 * Wait for the lockd process to exit, but since we're holding
-	 * the lockd semaphore, we can't wait around forever ...
-	 */
-	clear_thread_flag(TIF_SIGPENDING);
-	interruptible_sleep_on_timeout(&lockd_exit, HZ);
-	if (nlmsvc_pid) {
-		printk(KERN_WARNING 
-			"lockd_down: lockd failed to exit, clearing pid\n");
-		nlmsvc_pid = 0;
+	if (!nlmsvc_task) {
+		printk(KERN_ERR "lockd_down: no lockd running.\n");
+		BUG();
 	}
-	spin_lock_irq(&current->sighand->siglock);
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
+	kthread_stop(nlmsvc_task);
 out:
 	mutex_unlock(&nlmsvc_mutex);
 }
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index fe9bdb4a220c..4da7c4c27064 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -29,6 +29,7 @@
 #include <linux/sunrpc/svc.h>
 #include <linux/lockd/nlm.h>
 #include <linux/lockd/lockd.h>
+#include <linux/kthread.h>
 
 #define NLMDBG_FACILITY		NLMDBG_SVCLOCK
 
@@ -887,7 +888,7 @@ nlmsvc_retry_blocked(void)
 	unsigned long	timeout = MAX_SCHEDULE_TIMEOUT;
 	struct nlm_block *block;
 
-	while (!list_empty(&nlm_blocked)) {
+	while (!list_empty(&nlm_blocked) && !kthread_should_stop()) {
 		block = list_entry(nlm_blocked.next, struct nlm_block, b_list);
 
 		if (block->b_when == NLM_NEVER)
diff --git a/fs/lockd/svcshare.c b/fs/lockd/svcshare.c
index 068886de4dda..b0ae07008700 100644
--- a/fs/lockd/svcshare.c
+++ b/fs/lockd/svcshare.c
@@ -71,7 +71,8 @@ nlmsvc_unshare_file(struct nlm_host *host, struct nlm_file *file,
 	struct nlm_share	*share, **shpp;
 	struct xdr_netobj	*oh = &argp->lock.oh;
 
-	for (shpp = &file->f_shares; (share = *shpp) != 0; shpp = &share->s_next) {
+	for (shpp = &file->f_shares; (share = *shpp) != NULL;
+					shpp = &share->s_next) {
 		if (share->s_host == host && nlm_cmp_owner(share, oh)) {
 			*shpp = share->s_next;
 			kfree(share);
diff --git a/fs/locks.c b/fs/locks.c
index f36f0e61558d..1100df6ddb9b 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -127,7 +127,6 @@
 #include <linux/rcupdate.h>
 #include <linux/pid_namespace.h>
 
-#include <asm/semaphore.h>
 #include <asm/uaccess.h>
 
 #define IS_POSIX(fl)	(fl->fl_flags & FL_POSIX)
diff --git a/fs/mpage.c b/fs/mpage.c
index 5df564366f36..235e4d3873a8 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -325,16 +325,12 @@ confused:
 }
 
 /**
- * mpage_readpages - populate an address space with some pages, and
- *                       start reads against them.
- *
+ * mpage_readpages - populate an address space with some pages & start reads against them
  * @mapping: the address_space
  * @pages: The address of a list_head which contains the target pages.  These
  *   pages have their ->index populated and are otherwise uninitialised.
- *
  *   The page at @pages->prev has the lowest file offset, and reads should be
  *   issued in @pages->prev to @pages->next order.
- *
  * @nr_pages: The number of pages at *@pages
  * @get_block: The filesystem's block mapper function.
  *
@@ -360,6 +356,7 @@ confused:
  * So an mpage read of the first 16 blocks of an ext2 file will cause I/O to be
  * submitted in the following order:
  * 	12 0 1 2 3 4 5 6 7 8 9 10 11 13 14 15 16
+ *
  * because the indirect block has to be read to get the mappings of blocks
  * 13,14,15,16.  Obviously, this impacts performance.
  *
@@ -656,9 +653,7 @@ out:
 }
 
 /**
- * mpage_writepages - walk the list of dirty pages of the given
- * address space and writepage() all of them.
- * 
+ * mpage_writepages - walk the list of dirty pages of the given address space & writepage() all of them
  * @mapping: address space structure to write
  * @wbc: subtract the number of written pages from *@wbc->nr_to_write
  * @get_block: the filesystem's block mapper function.
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 66648dd92d97..2e5de77ff030 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -15,6 +15,7 @@
 #include <linux/nfs_fs.h>
 #include <linux/mutex.h>
 #include <linux/freezer.h>
+#include <linux/kthread.h>
 
 #include <net/inet_sock.h>
 
@@ -27,9 +28,7 @@
 struct nfs_callback_data {
 	unsigned int users;
 	struct svc_serv *serv;
-	pid_t pid;
-	struct completion started;
-	struct completion stopped;
+	struct task_struct *task;
 };
 
 static struct nfs_callback_data nfs_callback_info;
@@ -57,27 +56,20 @@ module_param_call(callback_tcpport, param_set_port, param_get_int,
 /*
  * This is the callback kernel thread.
  */
-static void nfs_callback_svc(struct svc_rqst *rqstp)
+static int
+nfs_callback_svc(void *vrqstp)
 {
 	int err;
+	struct svc_rqst *rqstp = vrqstp;
 
-	__module_get(THIS_MODULE);
-	lock_kernel();
-
-	nfs_callback_info.pid = current->pid;
-	daemonize("nfsv4-svc");
-	/* Process request with signals blocked, but allow SIGKILL.  */
-	allow_signal(SIGKILL);
 	set_freezable();
 
-	complete(&nfs_callback_info.started);
-
-	for(;;) {
-		if (signalled()) {
-			if (nfs_callback_info.users == 0)
-				break;
-			flush_signals(current);
-		}
+	/*
+	 * FIXME: do we really need to run this under the BKL? If so, please
+	 * add a comment about what it's intended to protect.
+	 */
+	lock_kernel();
+	while (!kthread_should_stop()) {
 		/*
 		 * Listen for a request on the socket
 		 */
@@ -92,13 +84,10 @@ static void nfs_callback_svc(struct svc_rqst *rqstp)
 		}
 		svc_process(rqstp);
 	}
-
-	flush_signals(current);
-	svc_exit_thread(rqstp);
-	nfs_callback_info.pid = 0;
-	complete(&nfs_callback_info.stopped);
 	unlock_kernel();
-	module_put_and_exit(0);
+	nfs_callback_info.task = NULL;
+	svc_exit_thread(rqstp);
+	return 0;
 }
 
 /*
@@ -107,14 +96,13 @@ static void nfs_callback_svc(struct svc_rqst *rqstp)
 int nfs_callback_up(void)
 {
 	struct svc_serv *serv = NULL;
+	struct svc_rqst *rqstp;
 	int ret = 0;
 
 	lock_kernel();
 	mutex_lock(&nfs_callback_mutex);
-	if (nfs_callback_info.users++ || nfs_callback_info.pid != 0)
+	if (nfs_callback_info.users++ || nfs_callback_info.task != NULL)
 		goto out;
-	init_completion(&nfs_callback_info.started);
-	init_completion(&nfs_callback_info.stopped);
 	serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL);
 	ret = -ENOMEM;
 	if (!serv)
@@ -127,15 +115,28 @@ int nfs_callback_up(void)
 	nfs_callback_tcpport = ret;
 	dprintk("Callback port = 0x%x\n", nfs_callback_tcpport);
 
-	ret = svc_create_thread(nfs_callback_svc, serv);
-	if (ret < 0)
+	rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]);
+	if (IS_ERR(rqstp)) {
+		ret = PTR_ERR(rqstp);
 		goto out_err;
+	}
+
+	svc_sock_update_bufs(serv);
 	nfs_callback_info.serv = serv;
-	wait_for_completion(&nfs_callback_info.started);
+
+	nfs_callback_info.task = kthread_run(nfs_callback_svc, rqstp,
+					     "nfsv4-svc");
+	if (IS_ERR(nfs_callback_info.task)) {
+		ret = PTR_ERR(nfs_callback_info.task);
+		nfs_callback_info.serv = NULL;
+		nfs_callback_info.task = NULL;
+		svc_exit_thread(rqstp);
+		goto out_err;
+	}
 out:
 	/*
 	 * svc_create creates the svc_serv with sv_nrthreads == 1, and then
-	 * svc_create_thread increments that. So we need to call svc_destroy
+	 * svc_prepare_thread increments that. So we need to call svc_destroy
 	 * on both success and failure so that the refcount is 1 when the
 	 * thread exits.
 	 */
@@ -159,12 +160,8 @@ void nfs_callback_down(void)
 	lock_kernel();
 	mutex_lock(&nfs_callback_mutex);
 	nfs_callback_info.users--;
-	do {
-		if (nfs_callback_info.users != 0 || nfs_callback_info.pid == 0)
-			break;
-		if (kill_proc(nfs_callback_info.pid, SIGKILL, 1) < 0)
-			break;
-	} while (wait_for_completion_timeout(&nfs_callback_info.stopped, 5*HZ) == 0);
+	if (nfs_callback_info.users == 0 && nfs_callback_info.task != NULL)
+		kthread_stop(nfs_callback_info.task);
 	mutex_unlock(&nfs_callback_mutex);
 	unlock_kernel();
 }
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index c5c0175898f6..06f064d8fbbe 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -170,6 +170,8 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
 	BUG_ON(!RB_EMPTY_ROOT(&clp->cl_state_owners));
 	if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state))
 		nfs_idmap_delete(clp);
+
+	rpc_destroy_wait_queue(&clp->cl_rpcwaitq);
 #endif
 }
 
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index ae04892a5e5d..6cea7479c5b4 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -710,6 +710,8 @@ int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd)
 {
 	struct nfs_server *server = NFS_SERVER(inode);
 
+	if (test_bit(NFS_INO_MOUNTPOINT, &NFS_I(inode)->flags))
+		return 0;
 	if (nd != NULL) {
 		/* VFS wants an on-the-wire revalidation */
 		if (nd->flags & LOOKUP_REVAL)
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 16844f98f50e..e44200579c8d 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -280,6 +280,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
 		.rpc_client = NFS_CLIENT(inode),
 		.rpc_message = &msg,
 		.callback_ops = &nfs_read_direct_ops,
+		.workqueue = nfsiod_workqueue,
 		.flags = RPC_TASK_ASYNC,
 	};
 	unsigned int pgbase;
@@ -323,7 +324,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
 		data->inode = inode;
 		data->cred = msg.rpc_cred;
 		data->args.fh = NFS_FH(inode);
-		data->args.context = ctx;
+		data->args.context = get_nfs_open_context(ctx);
 		data->args.offset = pos;
 		data->args.pgbase = pgbase;
 		data->args.pages = data->pagevec;
@@ -446,6 +447,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 	struct rpc_task_setup task_setup_data = {
 		.rpc_client = NFS_CLIENT(inode),
 		.callback_ops = &nfs_write_direct_ops,
+		.workqueue = nfsiod_workqueue,
 		.flags = RPC_TASK_ASYNC,
 	};
 
@@ -537,6 +539,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
 		.rpc_message = &msg,
 		.callback_ops = &nfs_commit_direct_ops,
 		.callback_data = data,
+		.workqueue = nfsiod_workqueue,
 		.flags = RPC_TASK_ASYNC,
 	};
 
@@ -546,6 +549,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
 	data->args.fh = NFS_FH(data->inode);
 	data->args.offset = 0;
 	data->args.count = 0;
+	data->args.context = get_nfs_open_context(dreq->ctx);
 	data->res.count = 0;
 	data->res.fattr = &data->fattr;
 	data->res.verf = &data->verf;
@@ -682,6 +686,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
 		.rpc_client = NFS_CLIENT(inode),
 		.rpc_message = &msg,
 		.callback_ops = &nfs_write_direct_ops,
+		.workqueue = nfsiod_workqueue,
 		.flags = RPC_TASK_ASYNC,
 	};
 	size_t wsize = NFS_SERVER(inode)->wsize;
@@ -728,7 +733,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
 		data->inode = inode;
 		data->cred = msg.rpc_cred;
 		data->args.fh = NFS_FH(inode);
-		data->args.context = ctx;
+		data->args.context = get_nfs_open_context(ctx);
 		data->args.offset = pos;
 		data->args.pgbase = pgbase;
 		data->args.pages = data->pagevec;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 966a8850aa30..c49f6d8b42d2 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -299,6 +299,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 				else
 					inode->i_op = &nfs_mountpoint_inode_operations;
 				inode->i_fop = NULL;
+				set_bit(NFS_INO_MOUNTPOINT, &nfsi->flags);
 			}
 		} else if (S_ISLNK(inode->i_mode))
 			inode->i_op = &nfs_symlink_inode_operations;
@@ -521,8 +522,12 @@ struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
 
 static void __put_nfs_open_context(struct nfs_open_context *ctx, int wait)
 {
-	struct inode *inode = ctx->path.dentry->d_inode;
+	struct inode *inode;
 
+	if (ctx == NULL)
+		return;
+
+	inode = ctx->path.dentry->d_inode;
 	if (!atomic_dec_and_lock(&ctx->count, &inode->i_lock))
 		return;
 	list_del(&ctx->list);
@@ -1003,8 +1008,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 
 	server = NFS_SERVER(inode);
 	/* Update the fsid? */
-	if (S_ISDIR(inode->i_mode)
-			&& !nfs_fsid_equal(&server->fsid, &fattr->fsid))
+	if (S_ISDIR(inode->i_mode) &&
+			!nfs_fsid_equal(&server->fsid, &fattr->fsid) &&
+			!test_bit(NFS_INO_MOUNTPOINT, &nfsi->flags))
 		server->fsid = fattr->fsid;
 
 	/*
@@ -1215,6 +1221,36 @@ static void nfs_destroy_inodecache(void)
 	kmem_cache_destroy(nfs_inode_cachep);
 }
 
+struct workqueue_struct *nfsiod_workqueue;
+
+/*
+ * start up the nfsiod workqueue
+ */
+static int nfsiod_start(void)
+{
+	struct workqueue_struct *wq;
+	dprintk("RPC:       creating workqueue nfsiod\n");
+	wq = create_singlethread_workqueue("nfsiod");
+	if (wq == NULL)
+		return -ENOMEM;
+	nfsiod_workqueue = wq;
+	return 0;
+}
+
+/*
+ * Destroy the nfsiod workqueue
+ */
+static void nfsiod_stop(void)
+{
+	struct workqueue_struct *wq;
+
+	wq = nfsiod_workqueue;
+	if (wq == NULL)
+		return;
+	nfsiod_workqueue = NULL;
+	destroy_workqueue(wq);
+}
+
 /*
  * Initialize NFS
  */
@@ -1222,6 +1258,10 @@ static int __init init_nfs_fs(void)
 {
 	int err;
 
+	err = nfsiod_start();
+	if (err)
+		goto out6;
+
 	err = nfs_fs_proc_init();
 	if (err)
 		goto out5;
@@ -1268,6 +1308,8 @@ out3:
 out4:
 	nfs_fs_proc_exit();
 out5:
+	nfsiod_stop();
+out6:
 	return err;
 }
 
@@ -1283,6 +1325,7 @@ static void __exit exit_nfs_fs(void)
 #endif
 	unregister_nfs_fs();
 	nfs_fs_proc_exit();
+	nfsiod_stop();
 }
 
 /* Not quite true; I just maintain it */
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 0f5619611b8d..4c1122a13844 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -3,6 +3,7 @@
  */
 
 #include <linux/mount.h>
+#include <linux/security.h>
 
 struct nfs_string;
 
@@ -57,6 +58,8 @@ struct nfs_parsed_mount_data {
 		char			*export_path;
 		int			protocol;
 	} nfs_server;
+
+	struct security_mnt_opts lsm_opts;
 };
 
 /* client.c */
@@ -143,6 +146,7 @@ extern struct rpc_procinfo nfs4_procedures[];
 extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask);
 
 /* inode.c */
+extern struct workqueue_struct *nfsiod_workqueue;
 extern struct inode *nfs_alloc_inode(struct super_block *sb);
 extern void nfs_destroy_inode(struct inode *);
 extern int nfs_write_inode(struct inode *,int);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 7ce07862c2fb..bbb0d58ee6ac 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -51,6 +51,7 @@
 
 #include "nfs4_fs.h"
 #include "delegation.h"
+#include "internal.h"
 #include "iostat.h"
 
 #define NFSDBG_FACILITY		NFSDBG_PROC
@@ -773,6 +774,7 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
 		.rpc_message = &msg,
 		.callback_ops = &nfs4_open_confirm_ops,
 		.callback_data = data,
+		.workqueue = nfsiod_workqueue,
 		.flags = RPC_TASK_ASYNC,
 	};
 	int status;
@@ -910,6 +912,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
 		.rpc_message = &msg,
 		.callback_ops = &nfs4_open_ops,
 		.callback_data = data,
+		.workqueue = nfsiod_workqueue,
 		.flags = RPC_TASK_ASYNC,
 	};
 	int status;
@@ -1315,6 +1318,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
 		.rpc_client = server->client,
 		.rpc_message = &msg,
 		.callback_ops = &nfs4_close_ops,
+		.workqueue = nfsiod_workqueue,
 		.flags = RPC_TASK_ASYNC,
 	};
 	int status = -ENOMEM;
@@ -2761,10 +2765,10 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
 		case -NFS4ERR_STALE_CLIENTID:
 		case -NFS4ERR_STALE_STATEID:
 		case -NFS4ERR_EXPIRED:
-			rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL, NULL);
+			rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
 			nfs4_schedule_state_recovery(clp);
 			if (test_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) == 0)
-				rpc_wake_up_task(task);
+				rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
 			task->tk_status = 0;
 			return -EAGAIN;
 		case -NFS4ERR_DELAY:
@@ -3235,6 +3239,7 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
 		.rpc_client = NFS_CLIENT(lsp->ls_state->inode),
 		.rpc_message = &msg,
 		.callback_ops = &nfs4_locku_ops,
+		.workqueue = nfsiod_workqueue,
 		.flags = RPC_TASK_ASYNC,
 	};
 
@@ -3419,6 +3424,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
 		.rpc_client = NFS_CLIENT(state->inode),
 		.rpc_message = &msg,
 		.callback_ops = &nfs4_lock_ops,
+		.workqueue = nfsiod_workqueue,
 		.flags = RPC_TASK_ASYNC,
 	};
 	int ret;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index b962397004c1..7775435ea7a5 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -292,8 +292,10 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
 	spin_unlock(&clp->cl_lock);
 	if (sp == new)
 		get_rpccred(cred);
-	else
+	else {
+		rpc_destroy_wait_queue(&new->so_sequence.wait);
 		kfree(new);
+	}
 	return sp;
 }
 
@@ -310,6 +312,7 @@ void nfs4_put_state_owner(struct nfs4_state_owner *sp)
 		return;
 	nfs4_remove_state_owner(clp, sp);
 	spin_unlock(&clp->cl_lock);
+	rpc_destroy_wait_queue(&sp->so_sequence.wait);
 	put_rpccred(cred);
 	kfree(sp);
 }
@@ -529,6 +532,7 @@ static void nfs4_free_lock_state(struct nfs4_lock_state *lsp)
 	spin_lock(&clp->cl_lock);
 	nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id);
 	spin_unlock(&clp->cl_lock);
+	rpc_destroy_wait_queue(&lsp->ls_sequence.wait);
 	kfree(lsp);
 }
 
@@ -731,7 +735,7 @@ int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task)
 		list_add_tail(&seqid->list, &sequence->list);
 	if (list_first_entry(&sequence->list, struct nfs_seqid, list) == seqid)
 		goto unlock;
-	rpc_sleep_on(&sequence->wait, task, NULL, NULL);
+	rpc_sleep_on(&sequence->wait, task, NULL);
 	status = -EAGAIN;
 unlock:
 	spin_unlock(&sequence->lock);
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 3d7d9631e125..be9e8270f4d7 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -58,22 +58,19 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
 	return p;
 }
 
-static void nfs_readdata_rcu_free(struct rcu_head *head)
+static void nfs_readdata_free(struct nfs_read_data *p)
 {
-	struct nfs_read_data *p = container_of(head, struct nfs_read_data, task.u.tk_rcu);
 	if (p && (p->pagevec != &p->page_array[0]))
 		kfree(p->pagevec);
 	mempool_free(p, nfs_rdata_mempool);
 }
 
-static void nfs_readdata_free(struct nfs_read_data *rdata)
-{
-	call_rcu_bh(&rdata->task.u.tk_rcu, nfs_readdata_rcu_free);
-}
-
 void nfs_readdata_release(void *data)
 {
-        nfs_readdata_free(data);
+	struct nfs_read_data *rdata = data;
+
+	put_nfs_open_context(rdata->args.context);
+	nfs_readdata_free(rdata);
 }
 
 static
@@ -174,6 +171,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
 		.rpc_message = &msg,
 		.callback_ops = call_ops,
 		.callback_data = data,
+		.workqueue = nfsiod_workqueue,
 		.flags = RPC_TASK_ASYNC | swap_flags,
 	};
 
@@ -186,7 +184,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
 	data->args.pgbase = req->wb_pgbase + offset;
 	data->args.pages  = data->pagevec;
 	data->args.count  = count;
-	data->args.context = req->wb_context;
+	data->args.context = get_nfs_open_context(req->wb_context);
 
 	data->res.fattr   = &data->fattr;
 	data->res.count   = count;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 1fb381843650..fcf4b982c885 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -684,8 +684,9 @@ static void nfs_parse_server_address(char *value,
 static int nfs_parse_mount_options(char *raw,
 				   struct nfs_parsed_mount_data *mnt)
 {
-	char *p, *string;
+	char *p, *string, *secdata;
 	unsigned short port = 0;
+	int rc;
 
 	if (!raw) {
 		dfprintk(MOUNT, "NFS: mount options string was NULL.\n");
@@ -693,6 +694,20 @@ static int nfs_parse_mount_options(char *raw,
 	}
 	dfprintk(MOUNT, "NFS: nfs mount opts='%s'\n", raw);
 
+	secdata = alloc_secdata();
+	if (!secdata)
+		goto out_nomem;
+
+	rc = security_sb_copy_data(raw, secdata);
+	if (rc)
+		goto out_security_failure;
+
+	rc = security_sb_parse_opts_str(secdata, &mnt->lsm_opts);
+	if (rc)
+		goto out_security_failure;
+
+	free_secdata(secdata);
+
 	while ((p = strsep(&raw, ",")) != NULL) {
 		substring_t args[MAX_OPT_ARGS];
 		int option, token;
@@ -1042,7 +1057,10 @@ static int nfs_parse_mount_options(char *raw,
 out_nomem:
 	printk(KERN_INFO "NFS: not enough memory to parse option\n");
 	return 0;
-
+out_security_failure:
+	free_secdata(secdata);
+	printk(KERN_INFO "NFS: security options invalid: %d\n", rc);
+	return 0;
 out_unrec_vers:
 	printk(KERN_INFO "NFS: unrecognized NFS version number\n");
 	return 0;
@@ -1214,6 +1232,33 @@ static int nfs_validate_mount_data(void *options,
 		args->namlen		= data->namlen;
 		args->bsize		= data->bsize;
 		args->auth_flavors[0]	= data->pseudoflavor;
+
+		/*
+		 * The legacy version 6 binary mount data from userspace has a
+		 * field used only to transport selinux information into the
+		 * the kernel.  To continue to support that functionality we
+		 * have a touch of selinux knowledge here in the NFS code. The
+		 * userspace code converted context=blah to just blah so we are
+		 * converting back to the full string selinux understands.
+		 */
+		if (data->context[0]){
+#ifdef CONFIG_SECURITY_SELINUX
+			int rc;
+			char *opts_str = kmalloc(sizeof(data->context) + 8, GFP_KERNEL);
+			if (!opts_str)
+				return -ENOMEM;
+			strcpy(opts_str, "context=");
+			data->context[NFS_MAX_CONTEXT_LEN] = '\0';
+			strcat(opts_str, &data->context[0]);
+			rc = security_sb_parse_opts_str(opts_str, &args->lsm_opts);
+			kfree(opts_str);
+			if (rc)
+				return rc;
+#else
+			return -EINVAL;
+#endif
+		}
+
 		break;
 	default: {
 		unsigned int len;
@@ -1476,6 +1521,8 @@ static int nfs_get_sb(struct file_system_type *fs_type,
 	};
 	int error;
 
+	security_init_mnt_opts(&data.lsm_opts);
+
 	/* Validate the mount data */
 	error = nfs_validate_mount_data(raw_data, &data, &mntfh, dev_name);
 	if (error < 0)
@@ -1515,6 +1562,10 @@ static int nfs_get_sb(struct file_system_type *fs_type,
 		goto error_splat_super;
 	}
 
+	error = security_sb_set_mnt_opts(s, &data.lsm_opts);
+	if (error)
+		goto error_splat_root;
+
 	s->s_flags |= MS_ACTIVE;
 	mnt->mnt_sb = s;
 	mnt->mnt_root = mntroot;
@@ -1523,12 +1574,15 @@ static int nfs_get_sb(struct file_system_type *fs_type,
 out:
 	kfree(data.nfs_server.hostname);
 	kfree(data.mount_server.hostname);
+	security_free_mnt_opts(&data.lsm_opts);
 	return error;
 
 out_err_nosb:
 	nfs_free_server(server);
 	goto out;
 
+error_splat_root:
+	dput(mntroot);
 error_splat_super:
 	up_write(&s->s_umount);
 	deactivate_super(s);
@@ -1608,6 +1662,9 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
 	mnt->mnt_sb = s;
 	mnt->mnt_root = mntroot;
 
+	/* clone any lsm security options from the parent to the new sb */
+	security_sb_clone_mnt_opts(data->sb, s);
+
 	dprintk("<-- nfs_xdev_get_sb() = 0\n");
 	return 0;
 
@@ -1850,6 +1907,8 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
 	};
 	int error;
 
+	security_init_mnt_opts(&data.lsm_opts);
+
 	/* Validate the mount data */
 	error = nfs4_validate_mount_data(raw_data, &data, dev_name);
 	if (error < 0)
@@ -1898,6 +1957,7 @@ out:
 	kfree(data.client_address);
 	kfree(data.nfs_server.export_path);
 	kfree(data.nfs_server.hostname);
+	security_free_mnt_opts(&data.lsm_opts);
 	return error;
 
 out_free:
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 83e865a16ad1..412738dbfbc7 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -10,7 +10,6 @@
  *  nfs symlink handling code
  */
 
-#define NFS_NEED_XDR_TYPES
 #include <linux/time.h>
 #include <linux/errno.h>
 #include <linux/sunrpc/clnt.h>
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index f55c437124a2..1667e3984418 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -58,19 +58,13 @@ struct nfs_write_data *nfs_commit_alloc(void)
 	return p;
 }
 
-static void nfs_commit_rcu_free(struct rcu_head *head)
+void nfs_commit_free(struct nfs_write_data *p)
 {
-	struct nfs_write_data *p = container_of(head, struct nfs_write_data, task.u.tk_rcu);
 	if (p && (p->pagevec != &p->page_array[0]))
 		kfree(p->pagevec);
 	mempool_free(p, nfs_commit_mempool);
 }
 
-void nfs_commit_free(struct nfs_write_data *wdata)
-{
-	call_rcu_bh(&wdata->task.u.tk_rcu, nfs_commit_rcu_free);
-}
-
 struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
 {
 	struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS);
@@ -92,21 +86,18 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
 	return p;
 }
 
-static void nfs_writedata_rcu_free(struct rcu_head *head)
+static void nfs_writedata_free(struct nfs_write_data *p)
 {
-	struct nfs_write_data *p = container_of(head, struct nfs_write_data, task.u.tk_rcu);
 	if (p && (p->pagevec != &p->page_array[0]))
 		kfree(p->pagevec);
 	mempool_free(p, nfs_wdata_mempool);
 }
 
-static void nfs_writedata_free(struct nfs_write_data *wdata)
+void nfs_writedata_release(void *data)
 {
-	call_rcu_bh(&wdata->task.u.tk_rcu, nfs_writedata_rcu_free);
-}
+	struct nfs_write_data *wdata = data;
 
-void nfs_writedata_release(void *wdata)
-{
+	put_nfs_open_context(wdata->args.context);
 	nfs_writedata_free(wdata);
 }
 
@@ -360,15 +351,13 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 /*
  * Insert a write request into an inode
  */
-static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
+static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	int error;
 
 	error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req);
-	BUG_ON(error == -EEXIST);
-	if (error)
-		return error;
+	BUG_ON(error);
 	if (!nfsi->npages) {
 		igrab(inode);
 		if (nfs_have_delegation(inode, FMODE_WRITE))
@@ -378,8 +367,8 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
 	set_page_private(req->wb_page, (unsigned long)req);
 	nfsi->npages++;
 	kref_get(&req->wb_kref);
-	radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
-	return 0;
+	radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index,
+				NFS_PAGE_TAG_LOCKED);
 }
 
 /*
@@ -591,6 +580,13 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
 		/* Loop over all inode entries and see if we find
 		 * A request for the page we wish to update
 		 */
+		if (new) {
+			if (radix_tree_preload(GFP_NOFS)) {
+				nfs_release_request(new);
+				return ERR_PTR(-ENOMEM);
+			}
+		}
+
 		spin_lock(&inode->i_lock);
 		req = nfs_page_find_request_locked(page);
 		if (req) {
@@ -601,28 +597,27 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
 				error = nfs_wait_on_request(req);
 				nfs_release_request(req);
 				if (error < 0) {
-					if (new)
+					if (new) {
+						radix_tree_preload_end();
 						nfs_release_request(new);
+					}
 					return ERR_PTR(error);
 				}
 				continue;
 			}
 			spin_unlock(&inode->i_lock);
-			if (new)
+			if (new) {
+				radix_tree_preload_end();
 				nfs_release_request(new);
+			}
 			break;
 		}
 
 		if (new) {
-			int error;
 			nfs_lock_request_dontget(new);
-			error = nfs_inode_add_request(inode, new);
-			if (error) {
-				spin_unlock(&inode->i_lock);
-				nfs_unlock_request(new);
-				return ERR_PTR(error);
-			}
+			nfs_inode_add_request(inode, new);
 			spin_unlock(&inode->i_lock);
+			radix_tree_preload_end();
 			req = new;
 			goto zero_page;
 		}
@@ -734,7 +729,7 @@ int nfs_updatepage(struct file *file, struct page *page,
 	 */
 	if (nfs_write_pageuptodate(page, inode) &&
 			inode->i_flock == NULL &&
-			!(file->f_mode & O_SYNC)) {
+			!(file->f_flags & O_SYNC)) {
 		count = max(count + offset, nfs_page_length(page));
 		offset = 0;
 	}
@@ -800,6 +795,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
 		.rpc_message = &msg,
 		.callback_ops = call_ops,
 		.callback_data = data,
+		.workqueue = nfsiod_workqueue,
 		.flags = flags,
 		.priority = priority,
 	};
@@ -816,7 +812,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
 	data->args.pgbase = req->wb_pgbase + offset;
 	data->args.pages  = data->pagevec;
 	data->args.count  = count;
-	data->args.context = req->wb_context;
+	data->args.context = get_nfs_open_context(req->wb_context);
 	data->args.stable  = NFS_UNSTABLE;
 	if (how & FLUSH_STABLE) {
 		data->args.stable = NFS_DATA_SYNC;
@@ -1153,8 +1149,11 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-void nfs_commit_release(void *wdata)
+void nfs_commit_release(void *data)
 {
+	struct nfs_write_data *wdata = data;
+
+	put_nfs_open_context(wdata->args.context);
 	nfs_commit_free(wdata);
 }
 
@@ -1181,6 +1180,7 @@ static void nfs_commit_rpcsetup(struct list_head *head,
 		.rpc_message = &msg,
 		.callback_ops = &nfs_commit_ops,
 		.callback_data = data,
+		.workqueue = nfsiod_workqueue,
 		.flags = flags,
 		.priority = priority,
 	};
@@ -1197,6 +1197,7 @@ static void nfs_commit_rpcsetup(struct list_head *head,
 	/* Note: we always request a commit of the entire inode */
 	data->args.offset = 0;
 	data->args.count  = 0;
+	data->args.context = get_nfs_open_context(first->wb_context);
 	data->res.count   = 0;
 	data->res.fattr   = &data->fattr;
 	data->res.verf    = &data->verf;
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index d13403e33622..294992e9bf69 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -10,6 +10,7 @@
 #include <linux/sunrpc/svcauth.h>
 #include <linux/nfsd/nfsd.h>
 #include <linux/nfsd/export.h>
+#include "auth.h"
 
 int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp)
 {
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 8a6f7c924c75..33bfcf09db46 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -35,6 +35,7 @@
 #include <linux/lockd/bind.h>
 #include <linux/sunrpc/msg_prot.h>
 #include <linux/sunrpc/gss_api.h>
+#include <net/ipv6.h>
 
 #define NFSDDBG_FACILITY	NFSDDBG_EXPORT
 
@@ -1548,6 +1549,7 @@ exp_addclient(struct nfsctl_client *ncp)
 {
 	struct auth_domain	*dom;
 	int			i, err;
+	struct in6_addr addr6;
 
 	/* First, consistency check. */
 	err = -EINVAL;
@@ -1566,9 +1568,10 @@ exp_addclient(struct nfsctl_client *ncp)
 		goto out_unlock;
 
 	/* Insert client into hashtable. */
-	for (i = 0; i < ncp->cl_naddr; i++)
-		auth_unix_add_addr(ncp->cl_addrlist[i], dom);
-
+	for (i = 0; i < ncp->cl_naddr; i++) {
+		ipv6_addr_set_v4mapped(ncp->cl_addrlist[i].s_addr, &addr6);
+		auth_unix_add_addr(&addr6, dom);
+	}
 	auth_unix_forget_old(dom);
 	auth_domain_put(dom);
 
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 996bd88b75ba..5b398421b051 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -202,7 +202,7 @@ static struct cache_detail idtoname_cache = {
 	.alloc		= ent_alloc,
 };
 
-int
+static int
 idtoname_parse(struct cache_detail *cd, char *buf, int buflen)
 {
 	struct ent ent, *res;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index bcb97d8e8b8b..b98b12fcc3eb 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1638,6 +1638,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
 	locks_init_lock(&fl);
 	fl.fl_lmops = &nfsd_lease_mng_ops;
 	fl.fl_flags = FL_LEASE;
+	fl.fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
 	fl.fl_end = OFFSET_MAX;
 	fl.fl_owner =  (fl_owner_t)dp;
 	fl.fl_file = stp->st_vfs_file;
@@ -1646,8 +1647,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
 	/* vfs_setlease checks to see if delegation should be handed out.
 	 * the lock_manager callbacks fl_mylease and fl_change are used
 	 */
-	if ((status = vfs_setlease(stp->st_vfs_file,
-		flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK, &flp))) {
+	if ((status = vfs_setlease(stp->st_vfs_file, fl.fl_type, &flp))) {
 		dprintk("NFSD: setlease failed [%d], no delegation\n", status);
 		unhash_delegation(dp);
 		flag = NFS4_OPEN_DELEGATE_NONE;
@@ -1762,10 +1762,6 @@ out:
 	return status;
 }
 
-static struct workqueue_struct *laundry_wq;
-static void laundromat_main(struct work_struct *);
-static DECLARE_DELAYED_WORK(laundromat_work, laundromat_main);
-
 __be32
 nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	    clientid_t *clid)
@@ -1873,7 +1869,11 @@ nfs4_laundromat(void)
 	return clientid_val;
 }
 
-void
+static struct workqueue_struct *laundry_wq;
+static void laundromat_main(struct work_struct *);
+static DECLARE_DELAYED_WORK(laundromat_work, laundromat_main);
+
+static void
 laundromat_main(struct work_struct *not_used)
 {
 	time_t t;
@@ -1974,6 +1974,26 @@ io_during_grace_disallowed(struct inode *inode, int flags)
 		&& mandatory_lock(inode);
 }
 
+static int check_stateid_generation(stateid_t *in, stateid_t *ref)
+{
+	/* If the client sends us a stateid from the future, it's buggy: */
+	if (in->si_generation > ref->si_generation)
+		return nfserr_bad_stateid;
+	/*
+	 * The following, however, can happen.  For example, if the
+	 * client sends an open and some IO at the same time, the open
+	 * may bump si_generation while the IO is still in flight.
+	 * Thanks to hard links and renames, the client never knows what
+	 * file an open will affect.  So it could avoid that situation
+	 * only by serializing all opens and IO from the same open
+	 * owner.  To recover from the old_stateid error, the client
+	 * will just have to retry the IO:
+	 */
+	if (in->si_generation < ref->si_generation)
+		return nfserr_old_stateid;
+	return nfs_ok;
+}
+
 /*
 * Checks for stateid operations
 */
@@ -2022,12 +2042,8 @@ nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int fl
 			goto out;
 		stidp = &stp->st_stateid;
 	}
-	if (stateid->si_generation > stidp->si_generation)
-		goto out;
-
-	/* OLD STATEID */
-	status = nfserr_old_stateid;
-	if (stateid->si_generation < stidp->si_generation)
+	status = check_stateid_generation(stateid, stidp);
+	if (status)
 		goto out;
 	if (stp) {
 		if ((status = nfs4_check_openmode(stp,flags)))
@@ -2035,7 +2051,7 @@ nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int fl
 		renew_client(stp->st_stateowner->so_client);
 		if (filpp)
 			*filpp = stp->st_vfs_file;
-	} else if (dp) {
+	} else {
 		if ((status = nfs4_check_delegmode(dp, flags)))
 			goto out;
 		renew_client(dp->dl_client);
@@ -2064,6 +2080,7 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei
 {
 	struct nfs4_stateid *stp;
 	struct nfs4_stateowner *sop;
+	__be32 status;
 
 	dprintk("NFSD: preprocess_seqid_op: seqid=%d " 
 			"stateid = (%08x/%08x/%08x/%08x)\n", seqid,
@@ -2126,7 +2143,7 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei
                }
 	}
 
-	if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp)) {
+	if (nfs4_check_fh(current_fh, stp)) {
 		dprintk("NFSD: preprocess_seqid_op: fh-stateid mismatch!\n");
 		return nfserr_bad_stateid;
 	}
@@ -2149,15 +2166,9 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei
 				" confirmed yet!\n");
 		return nfserr_bad_stateid;
 	}
-	if (stateid->si_generation > stp->st_stateid.si_generation) {
-		dprintk("NFSD: preprocess_seqid_op: future stateid?!\n");
-		return nfserr_bad_stateid;
-	}
-
-	if (stateid->si_generation < stp->st_stateid.si_generation) {
-		dprintk("NFSD: preprocess_seqid_op: old stateid!\n");
-		return nfserr_old_stateid;
-	}
+	status = check_stateid_generation(stateid, &stp->st_stateid);
+	if (status)
+		return status;
 	renew_client(sop->so_client);
 	return nfs_ok;
 
@@ -2193,7 +2204,7 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 	if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh,
 					oc->oc_seqid, &oc->oc_req_stateid,
-					CHECK_FH | CONFIRM | OPEN_STATE,
+					CONFIRM | OPEN_STATE,
 					&oc->oc_stateowner, &stp, NULL)))
 		goto out; 
 
@@ -2264,7 +2275,7 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
 	if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh,
 					od->od_seqid,
 					&od->od_stateid, 
-					CHECK_FH | OPEN_STATE, 
+					OPEN_STATE,
 					&od->od_stateowner, &stp, NULL)))
 		goto out; 
 
@@ -2317,7 +2328,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh,
 					close->cl_seqid,
 					&close->cl_stateid, 
-					CHECK_FH | OPEN_STATE | CLOSE_STATE,
+					OPEN_STATE | CLOSE_STATE,
 					&close->cl_stateowner, &stp, NULL)))
 		goto out; 
 	status = nfs_ok;
@@ -2622,7 +2633,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		status = nfs4_preprocess_seqid_op(&cstate->current_fh,
 				        lock->lk_new_open_seqid,
 		                        &lock->lk_new_open_stateid,
-		                        CHECK_FH | OPEN_STATE,
+					OPEN_STATE,
 		                        &lock->lk_replay_owner, &open_stp,
 					lock);
 		if (status)
@@ -2649,7 +2660,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		status = nfs4_preprocess_seqid_op(&cstate->current_fh,
 				       lock->lk_old_lock_seqid, 
 				       &lock->lk_old_lock_stateid, 
-				       CHECK_FH | LOCK_STATE, 
+				       LOCK_STATE,
 				       &lock->lk_replay_owner, &lock_stp, lock);
 		if (status)
 			goto out;
@@ -2846,7 +2857,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh,
 					locku->lu_seqid, 
 					&locku->lu_stateid, 
-					CHECK_FH | LOCK_STATE, 
+					LOCK_STATE,
 					&locku->lu_stateowner, &stp, NULL)))
 		goto out;
 
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 0e6a179eccaf..1ba7ad981935 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1867,6 +1867,15 @@ out_serverfault:
 	goto out;
 }
 
+static inline int attributes_need_mount(u32 *bmval)
+{
+	if (bmval[0] & ~(FATTR4_WORD0_RDATTR_ERROR | FATTR4_WORD0_LEASE_TIME))
+		return 1;
+	if (bmval[1] & ~FATTR4_WORD1_MOUNTED_ON_FILEID)
+		return 1;
+	return 0;
+}
+
 static __be32
 nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
 		const char *name, int namlen, __be32 *p, int *buflen)
@@ -1888,9 +1897,7 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
 	 * we will not follow the cross mount and will fill the attribtutes
 	 * directly from the mountpoint dentry.
 	 */
-	if (d_mountpoint(dentry) &&
-	    (cd->rd_bmval[0] & ~FATTR4_WORD0_RDATTR_ERROR) == 0 &&
-	    (cd->rd_bmval[1] & ~FATTR4_WORD1_MOUNTED_ON_FILEID) == 0)
+	if (d_mountpoint(dentry) && !attributes_need_mount(cd->rd_bmval))
 		ignore_crossmnt = 1;
 	else if (d_mountpoint(dentry)) {
 		int err;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 8516137cdbb0..613bcb8171a5 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -37,6 +37,7 @@
 #include <linux/nfsd/syscall.h>
 
 #include <asm/uaccess.h>
+#include <net/ipv6.h>
 
 /*
  *	We have a single directory with 9 nodes in it.
@@ -149,7 +150,6 @@ static const struct file_operations transaction_ops = {
 	.release	= simple_transaction_release,
 };
 
-extern struct seq_operations nfs_exports_op;
 static int exports_open(struct inode *inode, struct file *file)
 {
 	return seq_open(file, &nfs_exports_op);
@@ -222,6 +222,7 @@ static ssize_t write_getfs(struct file *file, char *buf, size_t size)
 	struct auth_domain *clp;
 	int err = 0;
 	struct knfsd_fh *res;
+	struct in6_addr in6;
 
 	if (size < sizeof(*data))
 		return -EINVAL;
@@ -236,7 +237,11 @@ static ssize_t write_getfs(struct file *file, char *buf, size_t size)
 	res = (struct knfsd_fh*)buf;
 
 	exp_readlock();
-	if (!(clp = auth_unix_lookup(sin->sin_addr)))
+
+	ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6);
+
+	clp = auth_unix_lookup(&in6);
+	if (!clp)
 		err = -EPERM;
 	else {
 		err = exp_rootfh(clp, data->gd_path, res, data->gd_maxlen);
@@ -257,6 +262,7 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size)
 	int err = 0;
 	struct knfsd_fh fh;
 	char *res;
+	struct in6_addr in6;
 
 	if (size < sizeof(*data))
 		return -EINVAL;
@@ -271,7 +277,11 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size)
 	res = buf;
 	sin = (struct sockaddr_in *)&data->gd_addr;
 	exp_readlock();
-	if (!(clp = auth_unix_lookup(sin->sin_addr)))
+
+	ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6);
+
+	clp = auth_unix_lookup(&in6);
+	if (!clp)
 		err = -EPERM;
 	else {
 		err = exp_rootfh(clp, data->gd_path, &fh, NFS_FHSIZE);
@@ -347,8 +357,6 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
 	return mesg - buf;	
 }
 
-extern int nfsd_nrthreads(void);
-
 static ssize_t write_threads(struct file *file, char *buf, size_t size)
 {
 	/* if size > 0, look for a number of threads and call nfsd_svc
@@ -371,10 +379,6 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size)
 	return strlen(buf);
 }
 
-extern int nfsd_nrpools(void);
-extern int nfsd_get_nrthreads(int n, int *);
-extern int nfsd_set_nrthreads(int n, int *);
-
 static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
 {
 	/* if size > 0, look for an array of number of threads per node
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 9647b0f7bc0c..941041f4b136 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -244,7 +244,6 @@ static int nfsd_init_socks(int port)
 	if (error < 0)
 		return error;
 
-#ifdef CONFIG_NFSD_TCP
 	error = lockd_up(IPPROTO_TCP);
 	if (error >= 0) {
 		error = svc_create_xprt(nfsd_serv, "tcp", port,
@@ -254,7 +253,6 @@ static int nfsd_init_socks(int port)
 	}
 	if (error < 0)
 		return error;
-#endif
 	return 0;
 }
 
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 46f59d5365a0..17ac51b5df65 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -988,7 +988,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 	 * flushing the data to disk is handled separately below.
 	 */
 
-	if (file->f_op->fsync == 0) {/* COMMIT3 cannot work */
+	if (!file->f_op->fsync) {/* COMMIT3 cannot work */
 	       stable = 2;
 	       *stablep = 2; /* FILE_SYNC */
 	}
@@ -1152,7 +1152,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
 }
 #endif /* CONFIG_NFSD_V3 */
 
-__be32
+static __be32
 nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
 			struct iattr *iap)
 {
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 447206eb5c2e..41f84c92094f 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1029,8 +1029,7 @@ static void ocfs2_rotate_leaf(struct ocfs2_extent_list *el,
 	BUG_ON(!next_free);
 
 	/* The tree code before us didn't allow enough room in the leaf. */
-	if (el->l_next_free_rec == el->l_count && !has_empty)
-		BUG();
+	BUG_ON(el->l_next_free_rec == el->l_count && !has_empty);
 
 	/*
 	 * The easiest way to approach this is to just remove the
@@ -1450,6 +1449,8 @@ static void ocfs2_adjust_root_records(struct ocfs2_extent_list *root_el,
  *   - When our insert into the right path leaf is at the leftmost edge
  *     and requires an update of the path immediately to it's left. This
  *     can occur at the end of some types of rotation and appending inserts.
+ *   - When we've adjusted the last extent record in the left path leaf and the
+ *     1st extent record in the right path leaf during cross extent block merge.
  */
 static void ocfs2_complete_edge_insert(struct inode *inode, handle_t *handle,
 				       struct ocfs2_path *left_path,
@@ -2712,24 +2713,147 @@ static void ocfs2_cleanup_merge(struct ocfs2_extent_list *el,
 	}
 }
 
+static int ocfs2_get_right_path(struct inode *inode,
+				struct ocfs2_path *left_path,
+				struct ocfs2_path **ret_right_path)
+{
+	int ret;
+	u32 right_cpos;
+	struct ocfs2_path *right_path = NULL;
+	struct ocfs2_extent_list *left_el;
+
+	*ret_right_path = NULL;
+
+	/* This function shouldn't be called for non-trees. */
+	BUG_ON(left_path->p_tree_depth == 0);
+
+	left_el = path_leaf_el(left_path);
+	BUG_ON(left_el->l_next_free_rec != left_el->l_count);
+
+	ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, left_path,
+					     &right_cpos);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	/* This function shouldn't be called for the rightmost leaf. */
+	BUG_ON(right_cpos == 0);
+
+	right_path = ocfs2_new_path(path_root_bh(left_path),
+				    path_root_el(left_path));
+	if (!right_path) {
+		ret = -ENOMEM;
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_find_path(inode, right_path, right_cpos);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	*ret_right_path = right_path;
+out:
+	if (ret)
+		ocfs2_free_path(right_path);
+	return ret;
+}
+
 /*
  * Remove split_rec clusters from the record at index and merge them
- * onto the beginning of the record at index + 1.
+ * onto the beginning of the record "next" to it.
+ * For index < l_count - 1, the next means the extent rec at index + 1.
+ * For index == l_count - 1, the "next" means the 1st extent rec of the
+ * next extent block.
  */
-static int ocfs2_merge_rec_right(struct inode *inode, struct buffer_head *bh,
-				handle_t *handle,
-				struct ocfs2_extent_rec *split_rec,
-				struct ocfs2_extent_list *el, int index)
+static int ocfs2_merge_rec_right(struct inode *inode,
+				 struct ocfs2_path *left_path,
+				 handle_t *handle,
+				 struct ocfs2_extent_rec *split_rec,
+				 int index)
 {
-	int ret;
+	int ret, next_free, i;
 	unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters);
 	struct ocfs2_extent_rec *left_rec;
 	struct ocfs2_extent_rec *right_rec;
+	struct ocfs2_extent_list *right_el;
+	struct ocfs2_path *right_path = NULL;
+	int subtree_index = 0;
+	struct ocfs2_extent_list *el = path_leaf_el(left_path);
+	struct buffer_head *bh = path_leaf_bh(left_path);
+	struct buffer_head *root_bh = NULL;
 
 	BUG_ON(index >= le16_to_cpu(el->l_next_free_rec));
-
 	left_rec = &el->l_recs[index];
-	right_rec = &el->l_recs[index + 1];
+
+	if (index == le16_to_cpu(el->l_next_free_rec - 1) &&
+	    le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count)) {
+		/* we meet with a cross extent block merge. */
+		ret = ocfs2_get_right_path(inode, left_path, &right_path);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		right_el = path_leaf_el(right_path);
+		next_free = le16_to_cpu(right_el->l_next_free_rec);
+		BUG_ON(next_free <= 0);
+		right_rec = &right_el->l_recs[0];
+		if (ocfs2_is_empty_extent(right_rec)) {
+			BUG_ON(le16_to_cpu(next_free) <= 1);
+			right_rec = &right_el->l_recs[1];
+		}
+
+		BUG_ON(le32_to_cpu(left_rec->e_cpos) +
+		       le16_to_cpu(left_rec->e_leaf_clusters) !=
+		       le32_to_cpu(right_rec->e_cpos));
+
+		subtree_index = ocfs2_find_subtree_root(inode,
+							left_path, right_path);
+
+		ret = ocfs2_extend_rotate_transaction(handle, subtree_index,
+						      handle->h_buffer_credits,
+						      right_path);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		root_bh = left_path->p_node[subtree_index].bh;
+		BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
+
+		ret = ocfs2_journal_access(handle, inode, root_bh,
+					   OCFS2_JOURNAL_ACCESS_WRITE);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		for (i = subtree_index + 1;
+		     i < path_num_items(right_path); i++) {
+			ret = ocfs2_journal_access(handle, inode,
+						   right_path->p_node[i].bh,
+						   OCFS2_JOURNAL_ACCESS_WRITE);
+			if (ret) {
+				mlog_errno(ret);
+				goto out;
+			}
+
+			ret = ocfs2_journal_access(handle, inode,
+						   left_path->p_node[i].bh,
+						   OCFS2_JOURNAL_ACCESS_WRITE);
+			if (ret) {
+				mlog_errno(ret);
+				goto out;
+			}
+		}
+
+	} else {
+		BUG_ON(index == le16_to_cpu(el->l_next_free_rec) - 1);
+		right_rec = &el->l_recs[index + 1];
+	}
 
 	ret = ocfs2_journal_access(handle, inode, bh,
 				   OCFS2_JOURNAL_ACCESS_WRITE);
@@ -2751,30 +2875,156 @@ static int ocfs2_merge_rec_right(struct inode *inode, struct buffer_head *bh,
 	if (ret)
 		mlog_errno(ret);
 
+	if (right_path) {
+		ret = ocfs2_journal_dirty(handle, path_leaf_bh(right_path));
+		if (ret)
+			mlog_errno(ret);
+
+		ocfs2_complete_edge_insert(inode, handle, left_path,
+					   right_path, subtree_index);
+	}
+out:
+	if (right_path)
+		ocfs2_free_path(right_path);
+	return ret;
+}
+
+static int ocfs2_get_left_path(struct inode *inode,
+			       struct ocfs2_path *right_path,
+			       struct ocfs2_path **ret_left_path)
+{
+	int ret;
+	u32 left_cpos;
+	struct ocfs2_path *left_path = NULL;
+
+	*ret_left_path = NULL;
+
+	/* This function shouldn't be called for non-trees. */
+	BUG_ON(right_path->p_tree_depth == 0);
+
+	ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb,
+					    right_path, &left_cpos);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	/* This function shouldn't be called for the leftmost leaf. */
+	BUG_ON(left_cpos == 0);
+
+	left_path = ocfs2_new_path(path_root_bh(right_path),
+				   path_root_el(right_path));
+	if (!left_path) {
+		ret = -ENOMEM;
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_find_path(inode, left_path, left_cpos);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	*ret_left_path = left_path;
 out:
+	if (ret)
+		ocfs2_free_path(left_path);
 	return ret;
 }
 
 /*
  * Remove split_rec clusters from the record at index and merge them
- * onto the tail of the record at index - 1.
+ * onto the tail of the record "before" it.
+ * For index > 0, the "before" means the extent rec at index - 1.
+ *
+ * For index == 0, the "before" means the last record of the previous
+ * extent block. And there is also a situation that we may need to
+ * remove the rightmost leaf extent block in the right_path and change
+ * the right path to indicate the new rightmost path.
  */
-static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh,
+static int ocfs2_merge_rec_left(struct inode *inode,
+				struct ocfs2_path *right_path,
 				handle_t *handle,
 				struct ocfs2_extent_rec *split_rec,
-				struct ocfs2_extent_list *el, int index)
+				struct ocfs2_cached_dealloc_ctxt *dealloc,
+				int index)
 {
-	int ret, has_empty_extent = 0;
+	int ret, i, subtree_index = 0, has_empty_extent = 0;
 	unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters);
 	struct ocfs2_extent_rec *left_rec;
 	struct ocfs2_extent_rec *right_rec;
+	struct ocfs2_extent_list *el = path_leaf_el(right_path);
+	struct buffer_head *bh = path_leaf_bh(right_path);
+	struct buffer_head *root_bh = NULL;
+	struct ocfs2_path *left_path = NULL;
+	struct ocfs2_extent_list *left_el;
 
-	BUG_ON(index <= 0);
+	BUG_ON(index < 0);
 
-	left_rec = &el->l_recs[index - 1];
 	right_rec = &el->l_recs[index];
-	if (ocfs2_is_empty_extent(&el->l_recs[0]))
-		has_empty_extent = 1;
+	if (index == 0) {
+		/* we meet with a cross extent block merge. */
+		ret = ocfs2_get_left_path(inode, right_path, &left_path);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		left_el = path_leaf_el(left_path);
+		BUG_ON(le16_to_cpu(left_el->l_next_free_rec) !=
+		       le16_to_cpu(left_el->l_count));
+
+		left_rec = &left_el->l_recs[
+				le16_to_cpu(left_el->l_next_free_rec) - 1];
+		BUG_ON(le32_to_cpu(left_rec->e_cpos) +
+		       le16_to_cpu(left_rec->e_leaf_clusters) !=
+		       le32_to_cpu(split_rec->e_cpos));
+
+		subtree_index = ocfs2_find_subtree_root(inode,
+							left_path, right_path);
+
+		ret = ocfs2_extend_rotate_transaction(handle, subtree_index,
+						      handle->h_buffer_credits,
+						      left_path);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		root_bh = left_path->p_node[subtree_index].bh;
+		BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
+
+		ret = ocfs2_journal_access(handle, inode, root_bh,
+					   OCFS2_JOURNAL_ACCESS_WRITE);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		for (i = subtree_index + 1;
+		     i < path_num_items(right_path); i++) {
+			ret = ocfs2_journal_access(handle, inode,
+						   right_path->p_node[i].bh,
+						   OCFS2_JOURNAL_ACCESS_WRITE);
+			if (ret) {
+				mlog_errno(ret);
+				goto out;
+			}
+
+			ret = ocfs2_journal_access(handle, inode,
+						   left_path->p_node[i].bh,
+						   OCFS2_JOURNAL_ACCESS_WRITE);
+			if (ret) {
+				mlog_errno(ret);
+				goto out;
+			}
+		}
+	} else {
+		left_rec = &el->l_recs[index - 1];
+		if (ocfs2_is_empty_extent(&el->l_recs[0]))
+			has_empty_extent = 1;
+	}
 
 	ret = ocfs2_journal_access(handle, inode, bh,
 				   OCFS2_JOURNAL_ACCESS_WRITE);
@@ -2790,9 +3040,8 @@ static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh,
 		*left_rec = *split_rec;
 
 		has_empty_extent = 0;
-	} else {
+	} else
 		le16_add_cpu(&left_rec->e_leaf_clusters, split_clusters);
-	}
 
 	le32_add_cpu(&right_rec->e_cpos, split_clusters);
 	le64_add_cpu(&right_rec->e_blkno,
@@ -2805,13 +3054,44 @@ static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh,
 	if (ret)
 		mlog_errno(ret);
 
+	if (left_path) {
+		ret = ocfs2_journal_dirty(handle, path_leaf_bh(left_path));
+		if (ret)
+			mlog_errno(ret);
+
+		/*
+		 * In the situation that the right_rec is empty and the extent
+		 * block is empty also,  ocfs2_complete_edge_insert can't handle
+		 * it and we need to delete the right extent block.
+		 */
+		if (le16_to_cpu(right_rec->e_leaf_clusters) == 0 &&
+		    le16_to_cpu(el->l_next_free_rec) == 1) {
+
+			ret = ocfs2_remove_rightmost_path(inode, handle,
+							  right_path, dealloc);
+			if (ret) {
+				mlog_errno(ret);
+				goto out;
+			}
+
+			/* Now the rightmost extent block has been deleted.
+			 * So we use the new rightmost path.
+			 */
+			ocfs2_mv_path(right_path, left_path);
+			left_path = NULL;
+		} else
+			ocfs2_complete_edge_insert(inode, handle, left_path,
+						   right_path, subtree_index);
+	}
 out:
+	if (left_path)
+		ocfs2_free_path(left_path);
 	return ret;
 }
 
 static int ocfs2_try_to_merge_extent(struct inode *inode,
 				     handle_t *handle,
-				     struct ocfs2_path *left_path,
+				     struct ocfs2_path *path,
 				     int split_index,
 				     struct ocfs2_extent_rec *split_rec,
 				     struct ocfs2_cached_dealloc_ctxt *dealloc,
@@ -2819,7 +3099,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
 
 {
 	int ret = 0;
-	struct ocfs2_extent_list *el = path_leaf_el(left_path);
+	struct ocfs2_extent_list *el = path_leaf_el(path);
 	struct ocfs2_extent_rec *rec = &el->l_recs[split_index];
 
 	BUG_ON(ctxt->c_contig_type == CONTIG_NONE);
@@ -2832,7 +3112,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
 		 * extents - having more than one in a leaf is
 		 * illegal.
 		 */
-		ret = ocfs2_rotate_tree_left(inode, handle, left_path,
+		ret = ocfs2_rotate_tree_left(inode, handle, path,
 					     dealloc);
 		if (ret) {
 			mlog_errno(ret);
@@ -2847,7 +3127,6 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
 		 * Left-right contig implies this.
 		 */
 		BUG_ON(!ctxt->c_split_covers_rec);
-		BUG_ON(split_index == 0);
 
 		/*
 		 * Since the leftright insert always covers the entire
@@ -2858,9 +3137,14 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
 		 * Since the adding of an empty extent shifts
 		 * everything back to the right, there's no need to
 		 * update split_index here.
+		 *
+		 * When the split_index is zero, we need to merge it to the
+		 * prevoius extent block. It is more efficient and easier
+		 * if we do merge_right first and merge_left later.
 		 */
-		ret = ocfs2_merge_rec_left(inode, path_leaf_bh(left_path),
-					   handle, split_rec, el, split_index);
+		ret = ocfs2_merge_rec_right(inode, path,
+					    handle, split_rec,
+					    split_index);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -2871,32 +3155,30 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
 		 */
 		BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0]));
 
-		/*
-		 * The left merge left us with an empty extent, remove
-		 * it.
-		 */
-		ret = ocfs2_rotate_tree_left(inode, handle, left_path, dealloc);
+		/* The merge left us with an empty extent, remove it. */
+		ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
 		}
-		split_index--;
+
 		rec = &el->l_recs[split_index];
 
 		/*
 		 * Note that we don't pass split_rec here on purpose -
-		 * we've merged it into the left side.
+		 * we've merged it into the rec already.
 		 */
-		ret = ocfs2_merge_rec_right(inode, path_leaf_bh(left_path),
-					    handle, rec, el, split_index);
+		ret = ocfs2_merge_rec_left(inode, path,
+					   handle, rec,
+					   dealloc,
+					   split_index);
+
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
 		}
 
-		BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0]));
-
-		ret = ocfs2_rotate_tree_left(inode, handle, left_path,
+		ret = ocfs2_rotate_tree_left(inode, handle, path,
 					     dealloc);
 		/*
 		 * Error from this last rotate is not critical, so
@@ -2915,8 +3197,9 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
 		 */
 		if (ctxt->c_contig_type == CONTIG_RIGHT) {
 			ret = ocfs2_merge_rec_left(inode,
-						   path_leaf_bh(left_path),
-						   handle, split_rec, el,
+						   path,
+						   handle, split_rec,
+						   dealloc,
 						   split_index);
 			if (ret) {
 				mlog_errno(ret);
@@ -2924,8 +3207,8 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
 			}
 		} else {
 			ret = ocfs2_merge_rec_right(inode,
-						    path_leaf_bh(left_path),
-						    handle, split_rec, el,
+						    path,
+						    handle, split_rec,
 						    split_index);
 			if (ret) {
 				mlog_errno(ret);
@@ -2938,7 +3221,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
 			 * The merge may have left an empty extent in
 			 * our leaf. Try to rotate it away.
 			 */
-			ret = ocfs2_rotate_tree_left(inode, handle, left_path,
+			ret = ocfs2_rotate_tree_left(inode, handle, path,
 						     dealloc);
 			if (ret)
 				mlog_errno(ret);
@@ -3498,20 +3781,57 @@ out:
 }
 
 static enum ocfs2_contig_type
-ocfs2_figure_merge_contig_type(struct inode *inode,
+ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
 			       struct ocfs2_extent_list *el, int index,
 			       struct ocfs2_extent_rec *split_rec)
 {
-	struct ocfs2_extent_rec *rec;
+	int status;
 	enum ocfs2_contig_type ret = CONTIG_NONE;
+	u32 left_cpos, right_cpos;
+	struct ocfs2_extent_rec *rec = NULL;
+	struct ocfs2_extent_list *new_el;
+	struct ocfs2_path *left_path = NULL, *right_path = NULL;
+	struct buffer_head *bh;
+	struct ocfs2_extent_block *eb;
+
+	if (index > 0) {
+		rec = &el->l_recs[index - 1];
+	} else if (path->p_tree_depth > 0) {
+		status = ocfs2_find_cpos_for_left_leaf(inode->i_sb,
+						       path, &left_cpos);
+		if (status)
+			goto out;
+
+		if (left_cpos != 0) {
+			left_path = ocfs2_new_path(path_root_bh(path),
+						   path_root_el(path));
+			if (!left_path)
+				goto out;
+
+			status = ocfs2_find_path(inode, left_path, left_cpos);
+			if (status)
+				goto out;
+
+			new_el = path_leaf_el(left_path);
+
+			if (le16_to_cpu(new_el->l_next_free_rec) !=
+			    le16_to_cpu(new_el->l_count)) {
+				bh = path_leaf_bh(left_path);
+				eb = (struct ocfs2_extent_block *)bh->b_data;
+				OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb,
+								 eb);
+				goto out;
+			}
+			rec = &new_el->l_recs[
+				le16_to_cpu(new_el->l_next_free_rec) - 1];
+		}
+	}
 
 	/*
 	 * We're careful to check for an empty extent record here -
 	 * the merge code will know what to do if it sees one.
 	 */
-
-	if (index > 0) {
-		rec = &el->l_recs[index - 1];
+	if (rec) {
 		if (index == 1 && ocfs2_is_empty_extent(rec)) {
 			if (split_rec->e_cpos == el->l_recs[index].e_cpos)
 				ret = CONTIG_RIGHT;
@@ -3520,10 +3840,45 @@ ocfs2_figure_merge_contig_type(struct inode *inode,
 		}
 	}
 
-	if (index < (le16_to_cpu(el->l_next_free_rec) - 1)) {
+	rec = NULL;
+	if (index < (le16_to_cpu(el->l_next_free_rec) - 1))
+		rec = &el->l_recs[index + 1];
+	else if (le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count) &&
+		 path->p_tree_depth > 0) {
+		status = ocfs2_find_cpos_for_right_leaf(inode->i_sb,
+							path, &right_cpos);
+		if (status)
+			goto out;
+
+		if (right_cpos == 0)
+			goto out;
+
+		right_path = ocfs2_new_path(path_root_bh(path),
+					    path_root_el(path));
+		if (!right_path)
+			goto out;
+
+		status = ocfs2_find_path(inode, right_path, right_cpos);
+		if (status)
+			goto out;
+
+		new_el = path_leaf_el(right_path);
+		rec = &new_el->l_recs[0];
+		if (ocfs2_is_empty_extent(rec)) {
+			if (le16_to_cpu(new_el->l_next_free_rec) <= 1) {
+				bh = path_leaf_bh(right_path);
+				eb = (struct ocfs2_extent_block *)bh->b_data;
+				OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb,
+								 eb);
+				goto out;
+			}
+			rec = &new_el->l_recs[1];
+		}
+	}
+
+	if (rec) {
 		enum ocfs2_contig_type contig_type;
 
-		rec = &el->l_recs[index + 1];
 		contig_type = ocfs2_extent_contig(inode, rec, split_rec);
 
 		if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT)
@@ -3532,6 +3887,12 @@ ocfs2_figure_merge_contig_type(struct inode *inode,
 			ret = contig_type;
 	}
 
+out:
+	if (left_path)
+		ocfs2_free_path(left_path);
+	if (right_path)
+		ocfs2_free_path(right_path);
+
 	return ret;
 }
 
@@ -3994,7 +4355,7 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
 		goto out;
 	}
 
-	ctxt.c_contig_type = ocfs2_figure_merge_contig_type(inode, el,
+	ctxt.c_contig_type = ocfs2_figure_merge_contig_type(inode, path, el,
 							    split_index,
 							    split_rec);
 
@@ -4788,6 +5149,8 @@ static void ocfs2_truncate_log_worker(struct work_struct *work)
 	status = ocfs2_flush_truncate_log(osb);
 	if (status < 0)
 		mlog_errno(status);
+	else
+		ocfs2_init_inode_steal_slot(osb);
 
 	mlog_exit(status);
 }
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 82243127eebf..90383ed61005 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -257,7 +257,7 @@ static int ocfs2_readpage_inline(struct inode *inode, struct page *page)
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 
 	BUG_ON(!PageLocked(page));
-	BUG_ON(!OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL);
+	BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL));
 
 	ret = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &di_bh,
 			       OCFS2_BH_CACHED, inode);
diff --git a/fs/ocfs2/cluster/sys.c b/fs/ocfs2/cluster/sys.c
index 0c095ce7723d..98429fd68499 100644
--- a/fs/ocfs2/cluster/sys.c
+++ b/fs/ocfs2/cluster/sys.c
@@ -57,6 +57,7 @@ static struct kset *o2cb_kset;
 void o2cb_sys_shutdown(void)
 {
 	mlog_sys_shutdown();
+	sysfs_remove_link(NULL, "o2cb");
 	kset_unregister(o2cb_kset);
 }
 
@@ -68,6 +69,14 @@ int o2cb_sys_init(void)
 	if (!o2cb_kset)
 		return -ENOMEM;
 
+	/*
+	 * Create this symlink for backwards compatibility with old
+	 * versions of ocfs2-tools which look for things in /sys/o2cb.
+	 */
+	ret = sysfs_create_link(NULL, &o2cb_kset->kobj, "o2cb");
+	if (ret)
+		goto error;
+
 	ret = sysfs_create_group(&o2cb_kset->kobj, &o2cb_attr_group);
 	if (ret)
 		goto error;
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index ee50c9610e7f..4ea4b0a26975 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -399,8 +399,6 @@ static void o2net_set_nn_state(struct o2net_node *nn,
 	mlog_bug_on_msg(err && valid, "err %d valid %u\n", err, valid);
 	mlog_bug_on_msg(valid && !sc, "valid %u sc %p\n", valid, sc);
 
-	/* we won't reconnect after our valid conn goes away for
-	 * this hb iteration.. here so it shows up in the logs */
 	if (was_valid && !valid && err == 0)
 		err = -ENOTCONN;
 
@@ -430,11 +428,6 @@ static void o2net_set_nn_state(struct o2net_node *nn,
 
 	if (!was_valid && valid) {
 		o2quo_conn_up(o2net_num_from_nn(nn));
-		/* this is a bit of a hack.  we only try reconnecting
-		 * when heartbeating starts until we get a connection.
-		 * if that connection then dies we don't try reconnecting.
-		 * the only way to start connecting again is to down
-		 * heartbeat and bring it back up. */
 		cancel_delayed_work(&nn->nn_connect_expired);
 		printk(KERN_INFO "o2net: %s " SC_NODEF_FMT "\n",
 		       o2nm_this_node() > sc->sc_node->nd_num ?
@@ -451,12 +444,24 @@ static void o2net_set_nn_state(struct o2net_node *nn,
 		/* delay if we're withing a RECONNECT_DELAY of the
 		 * last attempt */
 		delay = (nn->nn_last_connect_attempt +
-			 msecs_to_jiffies(o2net_reconnect_delay(sc->sc_node)))
+			 msecs_to_jiffies(o2net_reconnect_delay(NULL)))
 			- jiffies;
-		if (delay > msecs_to_jiffies(o2net_reconnect_delay(sc->sc_node)))
+		if (delay > msecs_to_jiffies(o2net_reconnect_delay(NULL)))
 			delay = 0;
 		mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay);
 		queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay);
+
+		/*
+		 * Delay the expired work after idle timeout.
+		 *
+		 * We might have lots of failed connection attempts that run
+		 * through here but we only cancel the connect_expired work when
+		 * a connection attempt succeeds.  So only the first enqueue of
+		 * the connect_expired work will do anything.  The rest will see
+		 * that it's already queued and do nothing.
+		 */
+		delay += msecs_to_jiffies(o2net_idle_timeout(NULL));
+		queue_delayed_work(o2net_wq, &nn->nn_connect_expired, delay);
 	}
 
 	/* keep track of the nn's sc ref for the caller */
@@ -1193,6 +1198,7 @@ static int o2net_check_handshake(struct o2net_sock_container *sc)
 	 * shut down already */
 	if (nn->nn_sc == sc) {
 		o2net_sc_reset_idle_timer(sc);
+		atomic_set(&nn->nn_timeout, 0);
 		o2net_set_nn_state(nn, sc, 1, 0);
 	}
 	spin_unlock(&nn->nn_lock);
@@ -1391,6 +1397,7 @@ static void o2net_sc_send_keep_req(struct work_struct *work)
 static void o2net_idle_timer(unsigned long data)
 {
 	struct o2net_sock_container *sc = (struct o2net_sock_container *)data;
+	struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num);
 	struct timeval now;
 
 	do_gettimeofday(&now);
@@ -1413,6 +1420,12 @@ static void o2net_idle_timer(unsigned long data)
 	     sc->sc_tv_func_start.tv_sec, (long) sc->sc_tv_func_start.tv_usec,
 	     sc->sc_tv_func_stop.tv_sec, (long) sc->sc_tv_func_stop.tv_usec);
 
+	/*
+	 * Initialize the nn_timeout so that the next connection attempt
+	 * will continue in o2net_start_connect.
+	 */
+	atomic_set(&nn->nn_timeout, 1);
+
 	o2net_sc_queue_work(sc, &sc->sc_shutdown_work);
 }
 
@@ -1447,6 +1460,7 @@ static void o2net_start_connect(struct work_struct *work)
 	struct socket *sock = NULL;
 	struct sockaddr_in myaddr = {0, }, remoteaddr = {0, };
 	int ret = 0, stop;
+	unsigned int timeout;
 
 	/* if we're greater we initiate tx, otherwise we accept */
 	if (o2nm_this_node() <= o2net_num_from_nn(nn))
@@ -1466,8 +1480,17 @@ static void o2net_start_connect(struct work_struct *work)
 	}
 
 	spin_lock(&nn->nn_lock);
-	/* see if we already have one pending or have given up */
-	stop = (nn->nn_sc || nn->nn_persistent_error);
+	/*
+	 * see if we already have one pending or have given up.
+	 * For nn_timeout, it is set when we close the connection
+	 * because of the idle time out. So it means that we have
+	 * at least connected to that node successfully once,
+	 * now try to connect to it again.
+	 */
+	timeout = atomic_read(&nn->nn_timeout);
+	stop = (nn->nn_sc ||
+		(nn->nn_persistent_error &&
+		(nn->nn_persistent_error != -ENOTCONN || timeout == 0)));
 	spin_unlock(&nn->nn_lock);
 	if (stop)
 		goto out;
@@ -1552,12 +1575,11 @@ static void o2net_connect_expired(struct work_struct *work)
 
 	spin_lock(&nn->nn_lock);
 	if (!nn->nn_sc_valid) {
-		struct o2nm_node *node = nn->nn_sc->sc_node;
 		mlog(ML_ERROR, "no connection established with node %u after "
 		     "%u.%u seconds, giving up and returning errors.\n",
 		     o2net_num_from_nn(nn),
-		     o2net_idle_timeout(node) / 1000,
-		     o2net_idle_timeout(node) % 1000);
+		     o2net_idle_timeout(NULL) / 1000,
+		     o2net_idle_timeout(NULL) % 1000);
 
 		o2net_set_nn_state(nn, NULL, 0, -ENOTCONN);
 	}
@@ -1580,6 +1602,7 @@ void o2net_disconnect_node(struct o2nm_node *node)
 
 	/* don't reconnect until it's heartbeating again */
 	spin_lock(&nn->nn_lock);
+	atomic_set(&nn->nn_timeout, 0);
 	o2net_set_nn_state(nn, NULL, 0, -ENOTCONN);
 	spin_unlock(&nn->nn_lock);
 
@@ -1614,17 +1637,12 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num,
 		(msecs_to_jiffies(o2net_reconnect_delay(node)) + 1);
 
 	if (node_num != o2nm_this_node()) {
-		/* heartbeat doesn't work unless a local node number is
-		 * configured and doing so brings up the o2net_wq, so we can
-		 * use it.. */
-		queue_delayed_work(o2net_wq, &nn->nn_connect_expired,
-		                   msecs_to_jiffies(o2net_idle_timeout(node)));
-
 		/* believe it or not, accept and node hearbeating testing
 		 * can succeed for this node before we got here.. so
 		 * only use set_nn_state to clear the persistent error
 		 * if that hasn't already happened */
 		spin_lock(&nn->nn_lock);
+		atomic_set(&nn->nn_timeout, 0);
 		if (nn->nn_persistent_error)
 			o2net_set_nn_state(nn, NULL, 0, 0);
 		spin_unlock(&nn->nn_lock);
@@ -1748,6 +1766,7 @@ static int o2net_accept_one(struct socket *sock)
 	new_sock = NULL;
 
 	spin_lock(&nn->nn_lock);
+	atomic_set(&nn->nn_timeout, 0);
 	o2net_set_nn_state(nn, sc, 0, 0);
 	spin_unlock(&nn->nn_lock);
 
@@ -1942,6 +1961,7 @@ int o2net_init(void)
 	for (i = 0; i < ARRAY_SIZE(o2net_nodes); i++) {
 		struct o2net_node *nn = o2net_nn_from_num(i);
 
+		atomic_set(&nn->nn_timeout, 0);
 		spin_lock_init(&nn->nn_lock);
 		INIT_DELAYED_WORK(&nn->nn_connect_work, o2net_start_connect);
 		INIT_DELAYED_WORK(&nn->nn_connect_expired,
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index d25b9af28500..b4c5586f46ea 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -95,6 +95,8 @@ struct o2net_node {
 	unsigned			nn_sc_valid:1;
 	/* if this is set tx just returns it */
 	int				nn_persistent_error;
+	/* It is only set to 1 after the idle time out. */
+	atomic_t			nn_timeout;
 
 	/* threads waiting for an sc to arrive wait on the wq for generation
 	 * to increase.  it is increased when a connecting socket succeeds
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index e280833ceb9a..8a1875848080 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -390,9 +390,8 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
 				goto bail;
 			}
 			if (pde)
-				pde->rec_len =
-					cpu_to_le16(le16_to_cpu(pde->rec_len) +
-						    le16_to_cpu(de->rec_len));
+				le16_add_cpu(&pde->rec_len,
+						le16_to_cpu(de->rec_len));
 			else
 				de->inode = 0;
 			dir->i_version++;
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 9843ee17ea27..dc8ea666efdb 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -176,6 +176,7 @@ struct dlm_mig_lockres_priv
 {
 	struct dlm_lock_resource *lockres;
 	u8 real_master;
+	u8 extra_ref;
 };
 
 struct dlm_assert_master_priv
@@ -602,17 +603,19 @@ enum dlm_query_join_response_code {
 	JOIN_PROTOCOL_MISMATCH,
 };
 
+struct dlm_query_join_packet {
+	u8 code;	/* Response code.  dlm_minor and fs_minor
+			   are only valid if this is JOIN_OK */
+	u8 dlm_minor;	/* The minor version of the protocol the
+			   dlm is speaking. */
+	u8 fs_minor;	/* The minor version of the protocol the
+			   filesystem is speaking. */
+	u8 reserved;
+};
+
 union dlm_query_join_response {
 	u32 intval;
-	struct {
-		u8 code;	/* Response code.  dlm_minor and fs_minor
-				   are only valid if this is JOIN_OK */
-		u8 dlm_minor;	/* The minor version of the protocol the
-				   dlm is speaking. */
-		u8 fs_minor;	/* The minor version of the protocol the
-				   filesystem is speaking. */
-		u8 reserved;
-	} packet;
+	struct dlm_query_join_packet packet;
 };
 
 struct dlm_lock_request
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index ecb4d997221e..75997b4deaf3 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -487,7 +487,7 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data,
 			       "cookie=%u:%llu\n",
 		     dlm_get_lock_cookie_node(be64_to_cpu(cnv->cookie)),
 		     dlm_get_lock_cookie_seq(be64_to_cpu(cnv->cookie)));
-		__dlm_print_one_lock_resource(res);
+		dlm_print_one_lock_resource(res);
 		goto leave;
 	}
 
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 638d2ebb892b..0879d86113e3 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -713,14 +713,46 @@ static int dlm_query_join_proto_check(char *proto_type, int node,
 	return rc;
 }
 
+/*
+ * struct dlm_query_join_packet is made up of four one-byte fields.  They
+ * are effectively in big-endian order already.  However, little-endian
+ * machines swap them before putting the packet on the wire (because
+ * query_join's response is a status, and that status is treated as a u32
+ * on the wire).  Thus, a big-endian and little-endian machines will treat
+ * this structure differently.
+ *
+ * The solution is to have little-endian machines swap the structure when
+ * converting from the structure to the u32 representation.  This will
+ * result in the structure having the correct format on the wire no matter
+ * the host endian format.
+ */
+static void dlm_query_join_packet_to_wire(struct dlm_query_join_packet *packet,
+					  u32 *wire)
+{
+	union dlm_query_join_response response;
+
+	response.packet = *packet;
+	*wire = cpu_to_be32(response.intval);
+}
+
+static void dlm_query_join_wire_to_packet(u32 wire,
+					  struct dlm_query_join_packet *packet)
+{
+	union dlm_query_join_response response;
+
+	response.intval = cpu_to_be32(wire);
+	*packet = response.packet;
+}
+
 static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
 				  void **ret_data)
 {
 	struct dlm_query_join_request *query;
-	union dlm_query_join_response response = {
-		.packet.code = JOIN_DISALLOW,
+	struct dlm_query_join_packet packet = {
+		.code = JOIN_DISALLOW,
 	};
 	struct dlm_ctxt *dlm = NULL;
+	u32 response;
 	u8 nodenum;
 
 	query = (struct dlm_query_join_request *) msg->buf;
@@ -737,11 +769,11 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
 		mlog(0, "node %u is not in our live map yet\n",
 		     query->node_idx);
 
-		response.packet.code = JOIN_DISALLOW;
+		packet.code = JOIN_DISALLOW;
 		goto respond;
 	}
 
-	response.packet.code = JOIN_OK_NO_MAP;
+	packet.code = JOIN_OK_NO_MAP;
 
 	spin_lock(&dlm_domain_lock);
 	dlm = __dlm_lookup_domain_full(query->domain, query->name_len);
@@ -760,7 +792,7 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
 				mlog(0, "disallow join as node %u does not "
 				     "have node %u in its nodemap\n",
 				     query->node_idx, nodenum);
-				response.packet.code = JOIN_DISALLOW;
+				packet.code = JOIN_DISALLOW;
 				goto unlock_respond;
 			}
 		}
@@ -780,23 +812,23 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
 			/*If this is a brand new context and we
 			 * haven't started our join process yet, then
 			 * the other node won the race. */
-			response.packet.code = JOIN_OK_NO_MAP;
+			packet.code = JOIN_OK_NO_MAP;
 		} else if (dlm->joining_node != DLM_LOCK_RES_OWNER_UNKNOWN) {
 			/* Disallow parallel joins. */
-			response.packet.code = JOIN_DISALLOW;
+			packet.code = JOIN_DISALLOW;
 		} else if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) {
 			mlog(0, "node %u trying to join, but recovery "
 			     "is ongoing.\n", bit);
-			response.packet.code = JOIN_DISALLOW;
+			packet.code = JOIN_DISALLOW;
 		} else if (test_bit(bit, dlm->recovery_map)) {
 			mlog(0, "node %u trying to join, but it "
 			     "still needs recovery.\n", bit);
-			response.packet.code = JOIN_DISALLOW;
+			packet.code = JOIN_DISALLOW;
 		} else if (test_bit(bit, dlm->domain_map)) {
 			mlog(0, "node %u trying to join, but it "
 			     "is still in the domain! needs recovery?\n",
 			     bit);
-			response.packet.code = JOIN_DISALLOW;
+			packet.code = JOIN_DISALLOW;
 		} else {
 			/* Alright we're fully a part of this domain
 			 * so we keep some state as to who's joining
@@ -807,19 +839,15 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
 			if (dlm_query_join_proto_check("DLM", bit,
 						       &dlm->dlm_locking_proto,
 						       &query->dlm_proto)) {
-				response.packet.code =
-					JOIN_PROTOCOL_MISMATCH;
+				packet.code = JOIN_PROTOCOL_MISMATCH;
 			} else if (dlm_query_join_proto_check("fs", bit,
 							      &dlm->fs_locking_proto,
 							      &query->fs_proto)) {
-				response.packet.code =
-					JOIN_PROTOCOL_MISMATCH;
+				packet.code = JOIN_PROTOCOL_MISMATCH;
 			} else {
-				response.packet.dlm_minor =
-					query->dlm_proto.pv_minor;
-				response.packet.fs_minor =
-					query->fs_proto.pv_minor;
-				response.packet.code = JOIN_OK;
+				packet.dlm_minor = query->dlm_proto.pv_minor;
+				packet.fs_minor = query->fs_proto.pv_minor;
+				packet.code = JOIN_OK;
 				__dlm_set_joining_node(dlm, query->node_idx);
 			}
 		}
@@ -830,9 +858,10 @@ unlock_respond:
 	spin_unlock(&dlm_domain_lock);
 
 respond:
-	mlog(0, "We respond with %u\n", response.packet.code);
+	mlog(0, "We respond with %u\n", packet.code);
 
-	return response.intval;
+	dlm_query_join_packet_to_wire(&packet, &response);
+	return response;
 }
 
 static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data,
@@ -937,7 +966,7 @@ static int dlm_send_join_cancels(struct dlm_ctxt *dlm,
 			 sizeof(unsigned long))) {
 		mlog(ML_ERROR,
 		     "map_size %u != BITS_TO_LONGS(O2NM_MAX_NODES) %u\n",
-		     map_size, BITS_TO_LONGS(O2NM_MAX_NODES));
+		     map_size, (unsigned)BITS_TO_LONGS(O2NM_MAX_NODES));
 		return -EINVAL;
 	}
 
@@ -968,7 +997,8 @@ static int dlm_request_join(struct dlm_ctxt *dlm,
 {
 	int status;
 	struct dlm_query_join_request join_msg;
-	union dlm_query_join_response join_resp;
+	struct dlm_query_join_packet packet;
+	u32 join_resp;
 
 	mlog(0, "querying node %d\n", node);
 
@@ -984,11 +1014,12 @@ static int dlm_request_join(struct dlm_ctxt *dlm,
 
 	status = o2net_send_message(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, &join_msg,
 				    sizeof(join_msg), node,
-				    &join_resp.intval);
+				    &join_resp);
 	if (status < 0 && status != -ENOPROTOOPT) {
 		mlog_errno(status);
 		goto bail;
 	}
+	dlm_query_join_wire_to_packet(join_resp, &packet);
 
 	/* -ENOPROTOOPT from the net code means the other side isn't
 	    listening for our message type -- that's fine, it means
@@ -997,10 +1028,10 @@ static int dlm_request_join(struct dlm_ctxt *dlm,
 	if (status == -ENOPROTOOPT) {
 		status = 0;
 		*response = JOIN_OK_NO_MAP;
-	} else if (join_resp.packet.code == JOIN_DISALLOW ||
-		   join_resp.packet.code == JOIN_OK_NO_MAP) {
-		*response = join_resp.packet.code;
-	} else if (join_resp.packet.code == JOIN_PROTOCOL_MISMATCH) {
+	} else if (packet.code == JOIN_DISALLOW ||
+		   packet.code == JOIN_OK_NO_MAP) {
+		*response = packet.code;
+	} else if (packet.code == JOIN_PROTOCOL_MISMATCH) {
 		mlog(ML_NOTICE,
 		     "This node requested DLM locking protocol %u.%u and "
 		     "filesystem locking protocol %u.%u.  At least one of "
@@ -1012,14 +1043,12 @@ static int dlm_request_join(struct dlm_ctxt *dlm,
 		     dlm->fs_locking_proto.pv_minor,
 		     node);
 		status = -EPROTO;
-		*response = join_resp.packet.code;
-	} else if (join_resp.packet.code == JOIN_OK) {
-		*response = join_resp.packet.code;
+		*response = packet.code;
+	} else if (packet.code == JOIN_OK) {
+		*response = packet.code;
 		/* Use the same locking protocol as the remote node */
-		dlm->dlm_locking_proto.pv_minor =
-			join_resp.packet.dlm_minor;
-		dlm->fs_locking_proto.pv_minor =
-			join_resp.packet.fs_minor;
+		dlm->dlm_locking_proto.pv_minor = packet.dlm_minor;
+		dlm->fs_locking_proto.pv_minor = packet.fs_minor;
 		mlog(0,
 		     "Node %d responds JOIN_OK with DLM locking protocol "
 		     "%u.%u and fs locking protocol %u.%u\n",
@@ -1031,11 +1060,11 @@ static int dlm_request_join(struct dlm_ctxt *dlm,
 	} else {
 		status = -EINVAL;
 		mlog(ML_ERROR, "invalid response %d from node %u\n",
-		     join_resp.packet.code, node);
+		     packet.code, node);
 	}
 
 	mlog(0, "status %d, node %d response is %d\n", status, node,
-		  *response);
+	     *response);
 
 bail:
 	return status;
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index a54d33d95ada..ea6b89577860 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -1663,7 +1663,12 @@ way_up_top:
 		dlm_put_mle(tmpmle);
 	}
 send_response:
-
+	/*
+	 * __dlm_lookup_lockres() grabbed a reference to this lockres.
+	 * The reference is released by dlm_assert_master_worker() under
+	 * the call to dlm_dispatch_assert_master().  If
+	 * dlm_assert_master_worker() isn't called, we drop it here.
+	 */
 	if (dispatch_assert) {
 		if (response != DLM_MASTER_RESP_YES)
 			mlog(ML_ERROR, "invalid response %d\n", response);
@@ -1678,7 +1683,11 @@ send_response:
 		if (ret < 0) {
 			mlog(ML_ERROR, "failed to dispatch assert master work\n");
 			response = DLM_MASTER_RESP_ERROR;
+			dlm_lockres_put(res);
 		}
+	} else {
+		if (res)
+			dlm_lockres_put(res);
 	}
 
 	dlm_put(dlm);
@@ -1695,9 +1704,9 @@ send_response:
  * can periodically run all locks owned by this node
  * and re-assert across the cluster...
  */
-int dlm_do_assert_master(struct dlm_ctxt *dlm,
-			 struct dlm_lock_resource *res,
-			 void *nodemap, u32 flags)
+static int dlm_do_assert_master(struct dlm_ctxt *dlm,
+				struct dlm_lock_resource *res,
+				void *nodemap, u32 flags)
 {
 	struct dlm_assert_master assert;
 	int to, tmpret;
@@ -2348,7 +2357,7 @@ int dlm_deref_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
 			mlog(ML_ERROR, "%s:%.*s: node %u trying to drop ref "
 		     	"but it is already dropped!\n", dlm->name,
 		     	res->lockname.len, res->lockname.name, node);
-			__dlm_print_one_lock_resource(res);
+			dlm_print_one_lock_resource(res);
 		}
 		ret = 0;
 		goto done;
@@ -2408,7 +2417,7 @@ static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data)
 		mlog(ML_ERROR, "%s:%.*s: node %u trying to drop ref "
 		     "but it is already dropped!\n", dlm->name,
 		     res->lockname.len, res->lockname.name, node);
-		__dlm_print_one_lock_resource(res);
+		dlm_print_one_lock_resource(res);
 	}
 
 	dlm_lockres_put(res);
@@ -2933,6 +2942,9 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm,
 				dlm_lockres_clear_refmap_bit(lock->ml.node, res);
 				list_del_init(&lock->list);
 				dlm_lock_put(lock);
+				/* In a normal unlock, we would have added a
+				 * DLM_UNLOCK_FREE_LOCK action. Force it. */
+				dlm_lock_put(lock);
 			}
 		}
 		queue++;
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 91f747b8a538..bcb9260c3735 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -519,9 +519,9 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
 	return 0;
 
 master_here:
-	mlog(0, "(%d) mastering recovery of %s:%u here(this=%u)!\n",
-	     task_pid_nr(dlm->dlm_reco_thread_task),
-	     dlm->name, dlm->reco.dead_node, dlm->node_num);
+	mlog(ML_NOTICE, "(%d) Node %u is the Recovery Master for the Dead Node "
+	     "%u for Domain %s\n", task_pid_nr(dlm->dlm_reco_thread_task),
+	     dlm->node_num, dlm->reco.dead_node, dlm->name);
 
 	status = dlm_remaster_locks(dlm, dlm->reco.dead_node);
 	if (status < 0) {
@@ -1191,7 +1191,7 @@ static int dlm_add_lock_to_array(struct dlm_lock *lock,
 			    (ml->type == LKM_EXMODE ||
 			     memcmp(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN))) {
 				mlog(ML_ERROR, "mismatched lvbs!\n");
-				__dlm_print_one_lock_resource(lock->lockres);
+				dlm_print_one_lock_resource(lock->lockres);
 				BUG();
 			}
 			memcpy(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN);
@@ -1327,6 +1327,7 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
 		(struct dlm_migratable_lockres *)msg->buf;
 	int ret = 0;
 	u8 real_master;
+	u8 extra_refs = 0;
 	char *buf = NULL;
 	struct dlm_work_item *item = NULL;
 	struct dlm_lock_resource *res = NULL;
@@ -1404,16 +1405,28 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
 		__dlm_insert_lockres(dlm, res);
 		spin_unlock(&dlm->spinlock);
 
+		/* Add an extra ref for this lock-less lockres lest the
+		 * dlm_thread purges it before we get the chance to add
+		 * locks to it */
+		dlm_lockres_get(res);
+
+		/* There are three refs that need to be put.
+		 * 1. Taken above.
+		 * 2. kref_init in dlm_new_lockres()->dlm_init_lockres().
+		 * 3. dlm_lookup_lockres()
+		 * The first one is handled at the end of this function. The
+		 * other two are handled in the worker thread after locks have
+		 * been attached. Yes, we don't wait for purge time to match
+		 * kref_init. The lockres will still have atleast one ref
+		 * added because it is in the hash __dlm_insert_lockres() */
+		extra_refs++;
+
 		/* now that the new lockres is inserted,
 		 * make it usable by other processes */
 		spin_lock(&res->spinlock);
 		res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
 		spin_unlock(&res->spinlock);
 		wake_up(&res->wq);
-
-		/* add an extra ref for just-allocated lockres 
-		 * otherwise the lockres will be purged immediately */
-		dlm_lockres_get(res);
 	}
 
 	/* at this point we have allocated everything we need,
@@ -1443,12 +1456,17 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
 	dlm_init_work_item(dlm, item, dlm_mig_lockres_worker, buf);
 	item->u.ml.lockres = res; /* already have a ref */
 	item->u.ml.real_master = real_master;
+	item->u.ml.extra_ref = extra_refs;
 	spin_lock(&dlm->work_lock);
 	list_add_tail(&item->list, &dlm->work_list);
 	spin_unlock(&dlm->work_lock);
 	queue_work(dlm->dlm_worker, &dlm->dispatched_work);
 
 leave:
+	/* One extra ref taken needs to be put here */
+	if (extra_refs)
+		dlm_lockres_put(res);
+
 	dlm_put(dlm);
 	if (ret < 0) {
 		if (buf)
@@ -1464,17 +1482,19 @@ leave:
 
 static void dlm_mig_lockres_worker(struct dlm_work_item *item, void *data)
 {
-	struct dlm_ctxt *dlm = data;
+	struct dlm_ctxt *dlm;
 	struct dlm_migratable_lockres *mres;
 	int ret = 0;
 	struct dlm_lock_resource *res;
 	u8 real_master;
+	u8 extra_ref;
 
 	dlm = item->dlm;
 	mres = (struct dlm_migratable_lockres *)data;
 
 	res = item->u.ml.lockres;
 	real_master = item->u.ml.real_master;
+	extra_ref = item->u.ml.extra_ref;
 
 	if (real_master == DLM_LOCK_RES_OWNER_UNKNOWN) {
 		/* this case is super-rare. only occurs if
@@ -1517,6 +1537,12 @@ again:
 	}
 
 leave:
+	/* See comment in dlm_mig_lockres_handler() */
+	if (res) {
+		if (extra_ref)
+			dlm_lockres_put(res);
+		dlm_lockres_put(res);
+	}
 	kfree(data);
 	mlog_exit(ret);
 }
@@ -1644,7 +1670,8 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
 				/* retry!? */
 				BUG();
 			}
-		}
+		} else /* put.. incase we are not the master */
+			dlm_lockres_put(res);
 		spin_unlock(&res->spinlock);
 	}
 	spin_unlock(&dlm->spinlock);
@@ -1921,6 +1948,7 @@ void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm,
 		     "Recovering res %s:%.*s, is already on recovery list!\n",
 		     dlm->name, res->lockname.len, res->lockname.name);
 		list_del_init(&res->recovering);
+		dlm_lockres_put(res);
 	}
 	/* We need to hold a reference while on the recovery list */
 	dlm_lockres_get(res);
@@ -2130,11 +2158,16 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
 	assert_spin_locked(&dlm->spinlock);
 	assert_spin_locked(&res->spinlock);
 
+	/* We do two dlm_lock_put(). One for removing from list and the other is
+	 * to force the DLM_UNLOCK_FREE_LOCK action so as to free the locks */
+
 	/* TODO: check pending_asts, pending_basts here */
 	list_for_each_entry_safe(lock, next, &res->granted, list) {
 		if (lock->ml.node == dead_node) {
 			list_del_init(&lock->list);
 			dlm_lock_put(lock);
+			/* Can't schedule DLM_UNLOCK_FREE_LOCK - do manually */
+			dlm_lock_put(lock);
 			freed++;
 		}
 	}
@@ -2142,6 +2175,8 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
 		if (lock->ml.node == dead_node) {
 			list_del_init(&lock->list);
 			dlm_lock_put(lock);
+			/* Can't schedule DLM_UNLOCK_FREE_LOCK - do manually */
+			dlm_lock_put(lock);
 			freed++;
 		}
 	}
@@ -2149,6 +2184,8 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
 		if (lock->ml.node == dead_node) {
 			list_del_init(&lock->list);
 			dlm_lock_put(lock);
+			/* Can't schedule DLM_UNLOCK_FREE_LOCK - do manually */
+			dlm_lock_put(lock);
 			freed++;
 		}
 	}
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index cebd089f8955..4060bb328bc8 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -176,12 +176,14 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
 	     res->lockname.name, master);
 
 	if (!master) {
+		/* drop spinlock...  retake below */
+		spin_unlock(&dlm->spinlock);
+
 		spin_lock(&res->spinlock);
 		/* This ensures that clear refmap is sent after the set */
 		__dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG);
 		spin_unlock(&res->spinlock);
-		/* drop spinlock to do messaging, retake below */
-		spin_unlock(&dlm->spinlock);
+
 		/* clear our bit from the master's refmap, ignore errors */
 		ret = dlm_drop_lockres_ref(dlm, res);
 		if (ret < 0) {
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 351130c9b734..1f1873bf41fb 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2409,7 +2409,7 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static struct seq_operations ocfs2_dlm_seq_ops = {
+static const struct seq_operations ocfs2_dlm_seq_ops = {
 	.start =	ocfs2_dlm_seq_start,
 	.stop =		ocfs2_dlm_seq_stop,
 	.next =		ocfs2_dlm_seq_next,
@@ -3042,7 +3042,7 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
        	inode = ocfs2_lock_res_inode(lockres);
 	mapping = inode->i_mapping;
 
-	if (S_ISREG(inode->i_mode))
+	if (!S_ISREG(inode->i_mode))
 		goto out;
 
 	/*
@@ -3219,8 +3219,8 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
 	return UNBLOCK_CONTINUE_POST;
 }
 
-void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
-				struct ocfs2_lock_res *lockres)
+static void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
+				       struct ocfs2_lock_res *lockres)
 {
 	int status;
 	struct ocfs2_unblock_ctl ctl = {0, 0,};
@@ -3356,7 +3356,7 @@ static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb)
 	return should_wake;
 }
 
-int ocfs2_downconvert_thread(void *arg)
+static int ocfs2_downconvert_thread(void *arg)
 {
 	int status = 0;
 	struct ocfs2_super *osb = arg;
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 1d5b0699d0a9..e3cf902404b4 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -109,8 +109,6 @@ void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
 			       struct ocfs2_lock_res *lockres);
 
 /* for the downconvert thread */
-void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
-				struct ocfs2_lock_res *lockres);
 void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb);
 
 struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index ed5d5232e85d..9154c82d3258 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2242,7 +2242,7 @@ const struct file_operations ocfs2_fops = {
 	.open		= ocfs2_file_open,
 	.aio_read	= ocfs2_file_aio_read,
 	.aio_write	= ocfs2_file_aio_write,
-	.ioctl		= ocfs2_ioctl,
+	.unlocked_ioctl	= ocfs2_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl   = ocfs2_compat_ioctl,
 #endif
@@ -2258,7 +2258,7 @@ const struct file_operations ocfs2_dops = {
 	.fsync		= ocfs2_sync_file,
 	.release	= ocfs2_dir_release,
 	.open		= ocfs2_dir_open,
-	.ioctl		= ocfs2_ioctl,
+	.unlocked_ioctl	= ocfs2_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl   = ocfs2_compat_ioctl,
 #endif
diff --git a/fs/ocfs2/heartbeat.c b/fs/ocfs2/heartbeat.c
index c0efd9489fe8..0758daf64da0 100644
--- a/fs/ocfs2/heartbeat.c
+++ b/fs/ocfs2/heartbeat.c
@@ -49,10 +49,15 @@ static inline void __ocfs2_node_map_set_bit(struct ocfs2_node_map *map,
 static inline void __ocfs2_node_map_clear_bit(struct ocfs2_node_map *map,
 					      int bit);
 static inline int __ocfs2_node_map_is_empty(struct ocfs2_node_map *map);
-static void __ocfs2_node_map_dup(struct ocfs2_node_map *target,
-				 struct ocfs2_node_map *from);
-static void __ocfs2_node_map_set(struct ocfs2_node_map *target,
-				 struct ocfs2_node_map *from);
+
+/* special case -1 for now
+ * TODO: should *really* make sure the calling func never passes -1!!  */
+static void ocfs2_node_map_init(struct ocfs2_node_map *map)
+{
+	map->num_nodes = OCFS2_NODE_MAP_MAX_NODES;
+	memset(map->map, 0, BITS_TO_LONGS(OCFS2_NODE_MAP_MAX_NODES) *
+	       sizeof(unsigned long));
+}
 
 void ocfs2_init_node_maps(struct ocfs2_super *osb)
 {
@@ -136,15 +141,6 @@ void ocfs2_stop_heartbeat(struct ocfs2_super *osb)
 		mlog_errno(ret);
 }
 
-/* special case -1 for now
- * TODO: should *really* make sure the calling func never passes -1!!  */
-void ocfs2_node_map_init(struct ocfs2_node_map *map)
-{
-	map->num_nodes = OCFS2_NODE_MAP_MAX_NODES;
-	memset(map->map, 0, BITS_TO_LONGS(OCFS2_NODE_MAP_MAX_NODES) *
-	       sizeof(unsigned long));
-}
-
 static inline void __ocfs2_node_map_set_bit(struct ocfs2_node_map *map,
 					    int bit)
 {
@@ -216,6 +212,8 @@ int ocfs2_node_map_is_empty(struct ocfs2_super *osb,
 	return ret;
 }
 
+#if 0
+
 static void __ocfs2_node_map_dup(struct ocfs2_node_map *target,
 				 struct ocfs2_node_map *from)
 {
@@ -254,6 +252,8 @@ static void __ocfs2_node_map_set(struct ocfs2_node_map *target,
 		target->map[i] = from->map[i];
 }
 
+#endif  /*  0  */
+
 /* Returns whether the recovery bit was actually set - it may not be
  * if a node is still marked as needing recovery */
 int ocfs2_recovery_map_set(struct ocfs2_super *osb,
diff --git a/fs/ocfs2/heartbeat.h b/fs/ocfs2/heartbeat.h
index 56859211888a..eac63aed7611 100644
--- a/fs/ocfs2/heartbeat.h
+++ b/fs/ocfs2/heartbeat.h
@@ -33,7 +33,6 @@ void ocfs2_stop_heartbeat(struct ocfs2_super *osb);
 
 /* node map functions - used to keep track of mounted and in-recovery
  * nodes. */
-void ocfs2_node_map_init(struct ocfs2_node_map *map);
 int ocfs2_node_map_is_empty(struct ocfs2_super *osb,
 			    struct ocfs2_node_map *map);
 void ocfs2_node_map_set_bit(struct ocfs2_super *osb,
@@ -57,9 +56,5 @@ int ocfs2_recovery_map_set(struct ocfs2_super *osb,
 			   int num);
 void ocfs2_recovery_map_clear(struct ocfs2_super *osb,
 			      int num);
-/* returns 1 if bit is the only bit set in target, 0 otherwise */
-int ocfs2_node_map_is_only(struct ocfs2_super *osb,
-			   struct ocfs2_node_map *target,
-			   int bit);
 
 #endif /* OCFS2_HEARTBEAT_H */
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 5177fba5162b..16e389742762 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -112,9 +112,9 @@ bail:
 	return status;
 }
 
-int ocfs2_ioctl(struct inode * inode, struct file * filp,
-	unsigned int cmd, unsigned long arg)
+long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	unsigned int flags;
 	int new_clusters;
 	int status;
@@ -168,9 +168,6 @@ int ocfs2_ioctl(struct inode * inode, struct file * filp,
 #ifdef CONFIG_COMPAT
 long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
-	int ret;
-
 	switch (cmd) {
 	case OCFS2_IOC32_GETFLAGS:
 		cmd = OCFS2_IOC_GETFLAGS;
@@ -190,9 +187,6 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 		return -ENOIOCTLCMD;
 	}
 
-	lock_kernel();
-	ret = ocfs2_ioctl(inode, file, cmd, arg);
-	unlock_kernel();
-	return ret;
+	return ocfs2_ioctl(file, cmd, arg);
 }
 #endif
diff --git a/fs/ocfs2/ioctl.h b/fs/ocfs2/ioctl.h
index 4d6c4f430d0d..cf9a5ee30fef 100644
--- a/fs/ocfs2/ioctl.h
+++ b/fs/ocfs2/ioctl.h
@@ -10,8 +10,7 @@
 #ifndef OCFS2_IOCTL_H
 #define OCFS2_IOCTL_H
 
-int ocfs2_ioctl(struct inode * inode, struct file * filp,
-	unsigned int cmd, unsigned long arg);
+long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
 long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg);
 
 #endif /* OCFS2_IOCTL_H */
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index f31c7e8c19c3..b62d17e82704 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -586,8 +586,7 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local)
 
 	mlog_entry_void();
 
-	if (!journal)
-		BUG();
+	BUG_ON(!journal);
 
 	osb = journal->j_osb;
 
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index add1ffdc5c6c..ce0dc147602a 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -120,9 +120,6 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
 
 	mlog_entry_void();
 
-	if (ocfs2_mount_local(osb))
-		goto bail;
-
 	if (osb->local_alloc_size == 0)
 		goto bail;
 
@@ -450,6 +447,8 @@ out_mutex:
 	iput(main_bm_inode);
 
 out:
+	if (!status)
+		ocfs2_init_inode_steal_slot(osb);
 	mlog_exit(status);
 	return status;
 }
@@ -526,6 +525,8 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
 	}
 
 	ac->ac_inode = local_alloc_inode;
+	/* We should never use localalloc from another slot */
+	ac->ac_alloc_slot = osb->slot_num;
 	ac->ac_which = OCFS2_AC_USE_LOCAL;
 	get_bh(osb->local_alloc_bh);
 	ac->ac_bh = osb->local_alloc_bh;
@@ -588,8 +589,7 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
 	while(bits_wanted--)
 		ocfs2_set_bit(start++, bitmap);
 
-	alloc->id1.bitmap1.i_used = cpu_to_le32(*num_bits +
-				le32_to_cpu(alloc->id1.bitmap1.i_used));
+	le32_add_cpu(&alloc->id1.bitmap1.i_used, *num_bits);
 
 	status = ocfs2_journal_dirty(handle, osb->local_alloc_bh);
 	if (status < 0) {
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index ae9ad9587516..d5d808fe0140 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -424,7 +424,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 	fe->i_fs_generation = cpu_to_le32(osb->fs_generation);
 	fe->i_blkno = cpu_to_le64(fe_blkno);
 	fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
-	fe->i_suballoc_slot = cpu_to_le16(osb->slot_num);
+	fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot);
 	fe->i_uid = cpu_to_le32(current->fsuid);
 	if (dir->i_mode & S_ISGID) {
 		fe->i_gid = cpu_to_le32(dir->i_gid);
@@ -997,7 +997,7 @@ static int ocfs2_rename(struct inode *old_dir,
 	 *
 	 * And that's why, just like the VFS, we need a file system
 	 * rename lock. */
-	if (old_dentry != new_dentry) {
+	if (old_dir != new_dir && S_ISDIR(old_inode->i_mode)) {
 		status = ocfs2_rename_lock(osb);
 		if (status < 0) {
 			mlog_errno(status);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 6546cef212e3..0273a55706e6 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -206,11 +206,14 @@ struct ocfs2_super
 	u32 s_feature_incompat;
 	u32 s_feature_ro_compat;
 
-	/* Protects s_next_generaion, osb_flags. Could protect more on
-	 * osb as it's very short lived. */
+	/* Protects s_next_generation, osb_flags and s_inode_steal_slot.
+	 * Could protect more on osb as it's very short lived.
+	 */
 	spinlock_t osb_lock;
 	u32 s_next_generation;
 	unsigned long osb_flags;
+	s16 s_inode_steal_slot;
+	atomic_t s_num_inodes_stolen;
 
 	unsigned long s_mount_opt;
 	unsigned int s_atime_quantum;
@@ -522,6 +525,33 @@ static inline unsigned int ocfs2_pages_per_cluster(struct super_block *sb)
 	return pages_per_cluster;
 }
 
+static inline void ocfs2_init_inode_steal_slot(struct ocfs2_super *osb)
+{
+	spin_lock(&osb->osb_lock);
+	osb->s_inode_steal_slot = OCFS2_INVALID_SLOT;
+	spin_unlock(&osb->osb_lock);
+	atomic_set(&osb->s_num_inodes_stolen, 0);
+}
+
+static inline void ocfs2_set_inode_steal_slot(struct ocfs2_super *osb,
+					      s16 slot)
+{
+	spin_lock(&osb->osb_lock);
+	osb->s_inode_steal_slot = slot;
+	spin_unlock(&osb->osb_lock);
+}
+
+static inline s16 ocfs2_get_inode_steal_slot(struct ocfs2_super *osb)
+{
+	s16 slot;
+
+	spin_lock(&osb->osb_lock);
+	slot = osb->s_inode_steal_slot;
+	spin_unlock(&osb->osb_lock);
+
+	return slot;
+}
+
 #define ocfs2_set_bit ext2_set_bit
 #define ocfs2_clear_bit ext2_clear_bit
 #define ocfs2_test_bit ext2_test_bit
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c
index 37835ffcb039..8166968e9015 100644
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -597,7 +597,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
 		memset(cr, 0, sizeof(struct ocfs2_chain_rec));
 	}
 
-	cr->c_blkno = le64_to_cpu(input->group);
+	cr->c_blkno = cpu_to_le64(input->group);
 	le32_add_cpu(&cr->c_total, input->clusters * cl_bpc);
 	le32_add_cpu(&cr->c_free, input->frees * cl_bpc);
 
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 72c198a004df..d2d278fb9819 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -46,6 +46,11 @@
 
 #include "buffer_head_io.h"
 
+#define NOT_ALLOC_NEW_GROUP		0
+#define ALLOC_NEW_GROUP			1
+
+#define OCFS2_MAX_INODES_TO_STEAL	1024
+
 static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg);
 static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe);
 static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl);
@@ -106,7 +111,7 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode,
 						u64 *bg_blkno,
 						u16 *bg_bit_off);
 
-void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
+static void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
 {
 	struct inode *inode = ac->ac_inode;
 
@@ -117,9 +122,17 @@ void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
 		mutex_unlock(&inode->i_mutex);
 
 		iput(inode);
+		ac->ac_inode = NULL;
 	}
-	if (ac->ac_bh)
+	if (ac->ac_bh) {
 		brelse(ac->ac_bh);
+		ac->ac_bh = NULL;
+	}
+}
+
+void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
+{
+	ocfs2_free_ac_resource(ac);
 	kfree(ac);
 }
 
@@ -391,7 +404,8 @@ bail:
 static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
 				       struct ocfs2_alloc_context *ac,
 				       int type,
-				       u32 slot)
+				       u32 slot,
+				       int alloc_new_group)
 {
 	int status;
 	u32 bits_wanted = ac->ac_bits_wanted;
@@ -420,6 +434,7 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
 	}
 
 	ac->ac_inode = alloc_inode;
+	ac->ac_alloc_slot = slot;
 
 	fe = (struct ocfs2_dinode *) bh->b_data;
 	if (!OCFS2_IS_VALID_DINODE(fe)) {
@@ -446,6 +461,14 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
 			goto bail;
 		}
 
+		if (alloc_new_group != ALLOC_NEW_GROUP) {
+			mlog(0, "Alloc File %u Full: wanted=%u, free_bits=%u, "
+			     "and we don't alloc a new group for it.\n",
+			     slot, bits_wanted, free_bits);
+			status = -ENOSPC;
+			goto bail;
+		}
+
 		status = ocfs2_block_group_alloc(osb, alloc_inode, bh);
 		if (status < 0) {
 			if (status != -ENOSPC)
@@ -490,7 +513,8 @@ int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
 	(*ac)->ac_group_search = ocfs2_block_group_search;
 
 	status = ocfs2_reserve_suballoc_bits(osb, (*ac),
-					     EXTENT_ALLOC_SYSTEM_INODE, slot);
+					     EXTENT_ALLOC_SYSTEM_INODE,
+					     slot, ALLOC_NEW_GROUP);
 	if (status < 0) {
 		if (status != -ENOSPC)
 			mlog_errno(status);
@@ -508,10 +532,42 @@ bail:
 	return status;
 }
 
+static int ocfs2_steal_inode_from_other_nodes(struct ocfs2_super *osb,
+					      struct ocfs2_alloc_context *ac)
+{
+	int i, status = -ENOSPC;
+	s16 slot = ocfs2_get_inode_steal_slot(osb);
+
+	/* Start to steal inodes from the first slot after ours. */
+	if (slot == OCFS2_INVALID_SLOT)
+		slot = osb->slot_num + 1;
+
+	for (i = 0; i < osb->max_slots; i++, slot++) {
+		if (slot == osb->max_slots)
+			slot = 0;
+
+		if (slot == osb->slot_num)
+			continue;
+
+		status = ocfs2_reserve_suballoc_bits(osb, ac,
+						     INODE_ALLOC_SYSTEM_INODE,
+						     slot, NOT_ALLOC_NEW_GROUP);
+		if (status >= 0) {
+			ocfs2_set_inode_steal_slot(osb, slot);
+			break;
+		}
+
+		ocfs2_free_ac_resource(ac);
+	}
+
+	return status;
+}
+
 int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
 			    struct ocfs2_alloc_context **ac)
 {
 	int status;
+	s16 slot = ocfs2_get_inode_steal_slot(osb);
 
 	*ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
 	if (!(*ac)) {
@@ -525,9 +581,43 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
 
 	(*ac)->ac_group_search = ocfs2_block_group_search;
 
+	/*
+	 * slot is set when we successfully steal inode from other nodes.
+	 * It is reset in 3 places:
+	 * 1. when we flush the truncate log
+	 * 2. when we complete local alloc recovery.
+	 * 3. when we successfully allocate from our own slot.
+	 * After it is set, we will go on stealing inodes until we find the
+	 * need to check our slots to see whether there is some space for us.
+	 */
+	if (slot != OCFS2_INVALID_SLOT &&
+	    atomic_read(&osb->s_num_inodes_stolen) < OCFS2_MAX_INODES_TO_STEAL)
+		goto inode_steal;
+
+	atomic_set(&osb->s_num_inodes_stolen, 0);
 	status = ocfs2_reserve_suballoc_bits(osb, *ac,
 					     INODE_ALLOC_SYSTEM_INODE,
-					     osb->slot_num);
+					     osb->slot_num, ALLOC_NEW_GROUP);
+	if (status >= 0) {
+		status = 0;
+
+		/*
+		 * Some inodes must be freed by us, so try to allocate
+		 * from our own next time.
+		 */
+		if (slot != OCFS2_INVALID_SLOT)
+			ocfs2_init_inode_steal_slot(osb);
+		goto bail;
+	} else if (status < 0 && status != -ENOSPC) {
+		mlog_errno(status);
+		goto bail;
+	}
+
+	ocfs2_free_ac_resource(*ac);
+
+inode_steal:
+	status = ocfs2_steal_inode_from_other_nodes(osb, *ac);
+	atomic_inc(&osb->s_num_inodes_stolen);
 	if (status < 0) {
 		if (status != -ENOSPC)
 			mlog_errno(status);
@@ -557,7 +647,8 @@ int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb,
 
 	status = ocfs2_reserve_suballoc_bits(osb, ac,
 					     GLOBAL_BITMAP_SYSTEM_INODE,
-					     OCFS2_INVALID_SLOT);
+					     OCFS2_INVALID_SLOT,
+					     ALLOC_NEW_GROUP);
 	if (status < 0 && status != -ENOSPC) {
 		mlog_errno(status);
 		goto bail;
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index 8799033bb459..544c600662bd 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -36,6 +36,7 @@ typedef int (group_search_t)(struct inode *,
 struct ocfs2_alloc_context {
 	struct inode *ac_inode;    /* which bitmap are we allocating from? */
 	struct buffer_head *ac_bh; /* file entry bh */
+	u32    ac_alloc_slot;   /* which slot are we allocating from? */
 	u32    ac_bits_wanted;
 	u32    ac_bits_given;
 #define OCFS2_AC_USE_LOCAL 1
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index bec75aff3d9f..94c7a69929ab 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1376,6 +1376,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
 	INIT_LIST_HEAD(&osb->blocked_lock_list);
 	osb->blocked_lock_count = 0;
 	spin_lock_init(&osb->osb_lock);
+	ocfs2_init_inode_steal_slot(osb);
 
 	atomic_set(&osb->alloc_stats.moves, 0);
 	atomic_set(&osb->alloc_stats.local_data, 0);
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 03f808c5b79d..6149e4b58c88 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -473,6 +473,10 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
 		return 0;
 	if (IS_ERR(state))	/* I/O error reading the partition table */
 		return -EIO;
+
+	/* tell userspace that the media / partition table may have changed */
+	kobject_uevent(&disk->dev.kobj, KOBJ_CHANGE);
+
 	for (p = 1; p < state->limit; p++) {
 		sector_t size = state->parts[p].size;
 		sector_t from = state->parts[p].from;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 96ee899d6502..9a4da0aae02e 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -314,9 +314,12 @@ static int proc_pid_schedstat(struct task_struct *task, char *buffer)
 static int lstats_show_proc(struct seq_file *m, void *v)
 {
 	int i;
-	struct task_struct *task = m->private;
-	seq_puts(m, "Latency Top version : v0.1\n");
+	struct inode *inode = m->private;
+	struct task_struct *task = get_proc_task(inode);
 
+	if (!task)
+		return -ESRCH;
+	seq_puts(m, "Latency Top version : v0.1\n");
 	for (i = 0; i < 32; i++) {
 		if (task->latency_record[i].backtrace[0]) {
 			int q;
@@ -341,32 +344,24 @@ static int lstats_show_proc(struct seq_file *m, void *v)
 		}
 
 	}
+	put_task_struct(task);
 	return 0;
 }
 
 static int lstats_open(struct inode *inode, struct file *file)
 {
-	int ret;
-	struct seq_file *m;
-	struct task_struct *task = get_proc_task(inode);
-
-	ret = single_open(file, lstats_show_proc, NULL);
-	if (!ret) {
-		m = file->private_data;
-		m->private = task;
-	}
-	return ret;
+	return single_open(file, lstats_show_proc, inode);
 }
 
 static ssize_t lstats_write(struct file *file, const char __user *buf,
 			    size_t count, loff_t *offs)
 {
-	struct seq_file *m;
-	struct task_struct *task;
+	struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
 
-	m = file->private_data;
-	task = m->private;
+	if (!task)
+		return -ESRCH;
 	clear_all_latency_tracing(task);
+	put_task_struct(task);
 
 	return count;
 }
@@ -2274,6 +2269,9 @@ static const struct pid_entry tgid_base_stuff[] = {
 	DIR("task",       S_IRUGO|S_IXUGO, task),
 	DIR("fd",         S_IRUSR|S_IXUSR, fd),
 	DIR("fdinfo",     S_IRUSR|S_IXUSR, fdinfo),
+#ifdef CONFIG_NET
+	DIR("net",        S_IRUGO|S_IXUSR, net),
+#endif
 	REG("environ",    S_IRUSR, environ),
 	INF("auxv",       S_IRUSR, pid_auxv),
 	ONE("status",     S_IRUGO, pid_status),
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 68971e66cd41..a36ad3c75cf4 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -377,15 +377,14 @@ static struct dentry_operations proc_dentry_operations =
  * Don't create negative dentries here, return -ENOENT by hand
  * instead.
  */
-struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
+struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
+		struct dentry *dentry)
 {
 	struct inode *inode = NULL;
-	struct proc_dir_entry * de;
 	int error = -ENOENT;
 
 	lock_kernel();
 	spin_lock(&proc_subdir_lock);
-	de = PDE(dir);
 	if (de) {
 		for (de = de->subdir; de ; de = de->next) {
 			if (de->namelen != dentry->d_name.len)
@@ -393,8 +392,6 @@ struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nam
 			if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
 				unsigned int ino;
 
-				if (de->shadow_proc)
-					de = de->shadow_proc(current, de);
 				ino = de->low_ino;
 				de_get(de);
 				spin_unlock(&proc_subdir_lock);
@@ -417,6 +414,12 @@ out_unlock:
 	return ERR_PTR(error);
 }
 
+struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry,
+		struct nameidata *nd)
+{
+	return proc_lookup_de(PDE(dir), dir, dentry);
+}
+
 /*
  * This returns non-zero if at EOF, so that the /proc
  * root directory can use this and check if it should
@@ -426,10 +429,9 @@ out_unlock:
  * value of the readdir() call, as long as it's non-negative
  * for success..
  */
-int proc_readdir(struct file * filp,
-	void * dirent, filldir_t filldir)
+int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
+		filldir_t filldir)
 {
-	struct proc_dir_entry * de;
 	unsigned int ino;
 	int i;
 	struct inode *inode = filp->f_path.dentry->d_inode;
@@ -438,7 +440,6 @@ int proc_readdir(struct file * filp,
 	lock_kernel();
 
 	ino = inode->i_ino;
-	de = PDE(inode);
 	if (!de) {
 		ret = -EINVAL;
 		goto out;
@@ -499,6 +500,13 @@ out:	unlock_kernel();
 	return ret;	
 }
 
+int proc_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+	struct inode *inode = filp->f_path.dentry->d_inode;
+
+	return proc_readdir_de(PDE(inode), filp, dirent, filldir);
+}
+
 /*
  * These are the generic /proc directory operations. They
  * use the in-memory "struct proc_dir_entry" tree to parse
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 1c81c8f1aeed..bc72f5c8c47d 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -64,6 +64,8 @@ extern const struct file_operations proc_numa_maps_operations;
 extern const struct file_operations proc_smaps_operations;
 extern const struct file_operations proc_clear_refs_operations;
 extern const struct file_operations proc_pagemap_operations;
+extern const struct file_operations proc_net_operations;
+extern const struct inode_operations proc_net_inode_operations;
 
 void free_proc_entry(struct proc_dir_entry *de);
 
@@ -83,3 +85,8 @@ static inline int proc_fd(struct inode *inode)
 {
 	return PROC_I(inode)->fd;
 }
+
+struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino,
+		struct dentry *dentry);
+int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
+		filldir_t filldir);
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 468805d40e2b..2d563979cb02 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -32,6 +32,7 @@
 #include <linux/interrupt.h>
 #include <linux/swap.h>
 #include <linux/slab.h>
+#include <linux/genhd.h>
 #include <linux/smp.h>
 #include <linux/signal.h>
 #include <linux/module.h>
@@ -377,7 +378,6 @@ static int stram_read_proc(char *page, char **start, off_t off,
 #endif
 
 #ifdef CONFIG_BLOCK
-extern const struct seq_operations partitions_op;
 static int partitions_open(struct inode *inode, struct file *file)
 {
 	return seq_open(file, &partitions_op);
@@ -389,7 +389,6 @@ static const struct file_operations proc_partitions_operations = {
 	.release	= seq_release,
 };
 
-extern const struct seq_operations diskstats_op;
 static int diskstats_open(struct inode *inode, struct file *file)
 {
 	return seq_open(file, &diskstats_op);
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 14e9b5aaf863..4caa5f774fb7 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -63,6 +63,82 @@ int seq_release_net(struct inode *ino, struct file *f)
 }
 EXPORT_SYMBOL_GPL(seq_release_net);
 
+static struct net *get_proc_task_net(struct inode *dir)
+{
+	struct task_struct *task;
+	struct nsproxy *ns;
+	struct net *net = NULL;
+
+	rcu_read_lock();
+	task = pid_task(proc_pid(dir), PIDTYPE_PID);
+	if (task != NULL) {
+		ns = task_nsproxy(task);
+		if (ns != NULL)
+			net = get_net(ns->net_ns);
+	}
+	rcu_read_unlock();
+
+	return net;
+}
+
+static struct dentry *proc_tgid_net_lookup(struct inode *dir,
+		struct dentry *dentry, struct nameidata *nd)
+{
+	struct dentry *de;
+	struct net *net;
+
+	de = ERR_PTR(-ENOENT);
+	net = get_proc_task_net(dir);
+	if (net != NULL) {
+		de = proc_lookup_de(net->proc_net, dir, dentry);
+		put_net(net);
+	}
+	return de;
+}
+
+static int proc_tgid_net_getattr(struct vfsmount *mnt, struct dentry *dentry,
+		struct kstat *stat)
+{
+	struct inode *inode = dentry->d_inode;
+	struct net *net;
+
+	net = get_proc_task_net(inode);
+
+	generic_fillattr(inode, stat);
+
+	if (net != NULL) {
+		stat->nlink = net->proc_net->nlink;
+		put_net(net);
+	}
+
+	return 0;
+}
+
+const struct inode_operations proc_net_inode_operations = {
+	.lookup		= proc_tgid_net_lookup,
+	.getattr	= proc_tgid_net_getattr,
+};
+
+static int proc_tgid_net_readdir(struct file *filp, void *dirent,
+		filldir_t filldir)
+{
+	int ret;
+	struct net *net;
+
+	ret = -EINVAL;
+	net = get_proc_task_net(filp->f_path.dentry->d_inode);
+	if (net != NULL) {
+		ret = proc_readdir_de(net->proc_net, filp, dirent, filldir);
+		put_net(net);
+	}
+	return ret;
+}
+
+const struct file_operations proc_net_operations = {
+	.read		= generic_read_dir,
+	.readdir	= proc_tgid_net_readdir,
+};
+
 
 struct proc_dir_entry *proc_net_fops_create(struct net *net,
 	const char *name, mode_t mode, const struct file_operations *fops)
@@ -83,14 +159,6 @@ struct net *get_proc_net(const struct inode *inode)
 }
 EXPORT_SYMBOL_GPL(get_proc_net);
 
-static struct proc_dir_entry *shadow_pde;
-
-static struct proc_dir_entry *proc_net_shadow(struct task_struct *task,
-						struct proc_dir_entry *de)
-{
-	return task->nsproxy->net_ns->proc_net;
-}
-
 struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
 		struct proc_dir_entry *parent)
 {
@@ -104,45 +172,39 @@ EXPORT_SYMBOL_GPL(proc_net_mkdir);
 
 static __net_init int proc_net_ns_init(struct net *net)
 {
-	struct proc_dir_entry *root, *netd, *net_statd;
+	struct proc_dir_entry *netd, *net_statd;
 	int err;
 
 	err = -ENOMEM;
-	root = kzalloc(sizeof(*root), GFP_KERNEL);
-	if (!root)
+	netd = kzalloc(sizeof(*netd), GFP_KERNEL);
+	if (!netd)
 		goto out;
 
-	err = -EEXIST;
-	netd = proc_net_mkdir(net, "net", root);
-	if (!netd)
-		goto free_root;
+	netd->data = net;
+	netd->nlink = 2;
+	netd->name = "net";
+	netd->namelen = 3;
+	netd->parent = &proc_root;
 
 	err = -EEXIST;
 	net_statd = proc_net_mkdir(net, "stat", netd);
 	if (!net_statd)
 		goto free_net;
 
-	root->data = net;
-
-	net->proc_net_root = root;
 	net->proc_net = netd;
 	net->proc_net_stat = net_statd;
-	err = 0;
+	return 0;
 
+free_net:
+	kfree(netd);
 out:
 	return err;
-free_net:
-	remove_proc_entry("net", root);
-free_root:
-	kfree(root);
-	goto out;
 }
 
 static __net_exit void proc_net_ns_exit(struct net *net)
 {
 	remove_proc_entry("stat", net->proc_net);
-	remove_proc_entry("net", net->proc_net_root);
-	kfree(net->proc_net_root);
+	kfree(net->proc_net);
 }
 
 static struct pernet_operations __net_initdata proc_net_ns_ops = {
@@ -152,8 +214,7 @@ static struct pernet_operations __net_initdata proc_net_ns_ops = {
 
 int __init proc_net_init(void)
 {
-	shadow_pde = proc_mkdir("net", NULL);
-	shadow_pde->shadow_proc = proc_net_shadow;
+	proc_symlink("net", NULL, "self/net");
 
 	return register_pernet_subsys(&proc_net_ns_ops);
 }
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 6dc0334815f7..4206454734e0 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -640,17 +640,17 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
 
 	ret = -EACCES;
 	if (!ptrace_may_attach(task))
-		goto out;
+		goto out_task;
 
 	ret = -EINVAL;
 	/* file position must be aligned */
 	if (*ppos % PM_ENTRY_BYTES)
-		goto out;
+		goto out_task;
 
 	ret = 0;
 	mm = get_task_mm(task);
 	if (!mm)
-		goto out;
+		goto out_task;
 
 	ret = -ENOMEM;
 	uaddr = (unsigned long)buf & PAGE_MASK;
@@ -658,7 +658,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
 	pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE;
 	pages = kmalloc(pagecount * sizeof(struct page *), GFP_KERNEL);
 	if (!pages)
-		goto out_task;
+		goto out_mm;
 
 	down_read(&current->mm->mmap_sem);
 	ret = get_user_pages(current, current->mm, uaddr, pagecount,
@@ -668,6 +668,12 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
 	if (ret < 0)
 		goto out_free;
 
+	if (ret != pagecount) {
+		pagecount = ret;
+		ret = -EFAULT;
+		goto out_pages;
+	}
+
 	pm.out = buf;
 	pm.end = buf + count;
 
@@ -699,15 +705,17 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
 			ret = pm.out - buf;
 	}
 
+out_pages:
 	for (; pagecount; pagecount--) {
 		page = pages[pagecount-1];
 		if (!PageReserved(page))
 			SetPageDirty(page);
 		page_cache_release(page);
 	}
-	mmput(mm);
 out_free:
 	kfree(pages);
+out_mm:
+	mmput(mm);
 out_task:
 	put_task_struct(task);
 out:
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index bb05a3e51b93..060eb3f598e7 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -38,7 +38,7 @@
 #include <asm/system.h>
 
 #include <linux/time.h>
-#include <asm/semaphore.h>
+#include <linux/semaphore.h>
 
 #include <linux/vmalloc.h>
 #include <linux/reiserfs_fs.h>
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 6841452e0dea..393cc22c1717 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2031,7 +2031,7 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
 		return -EXDEV;
 	}
 	/* We must not pack tails for quota files on reiserfs for quota IO to work */
-	if (!REISERFS_I(nd.path.dentry->d_inode)->i_flags & i_nopack_mask) {
+	if (!(REISERFS_I(nd.path.dentry->d_inode)->i_flags & i_nopack_mask)) {
 		reiserfs_warning(sb,
 				 "reiserfs: Quota file must have tail packing disabled.");
 		path_put(&nd.path);
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index eba037b3338f..736bc47dd01b 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -44,7 +44,6 @@
 #include <net/checksum.h>
 #include <linux/smp_lock.h>
 #include <linux/stat.h>
-#include <asm/semaphore.h>
 
 #define FL_READONLY 128
 #define FL_DIR_SEM_HELD 256
diff --git a/fs/splice.c b/fs/splice.c
index 9b559ee711a8..0670c915cd35 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1669,6 +1669,13 @@ static int link_pipe(struct pipe_inode_info *ipipe,
 		i++;
 	} while (len);
 
+	/*
+	 * return EAGAIN if we have the potential of some data in the
+	 * future, otherwise just return 0
+	 */
+	if (!ret && ipipe->waiting_writers && (flags & SPLICE_F_NONBLOCK))
+		ret = -EAGAIN;
+
 	inode_double_unlock(ipipe->inode, opipe->inode);
 
 	/*
@@ -1709,11 +1716,8 @@ static long do_tee(struct file *in, struct file *out, size_t len,
 		ret = link_ipipe_prep(ipipe, flags);
 		if (!ret) {
 			ret = link_opipe_prep(opipe, flags);
-			if (!ret) {
+			if (!ret)
 				ret = link_pipe(ipipe, opipe, len, flags);
-				if (!ret && (flags & SPLICE_F_NONBLOCK))
-					ret = -EAGAIN;
-			}
 		}
 	}
 
diff --git a/fs/super.c b/fs/super.c
index 88811f60c8de..010446d8c40a 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -870,12 +870,12 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
 	if (!mnt)
 		goto out;
 
-	if (data) {
+	if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) {
 		secdata = alloc_secdata();
 		if (!secdata)
 			goto out_mnt;
 
-		error = security_sb_copy_data(type, data, secdata);
+		error = security_sb_copy_data(data, secdata);
 		if (error)
 			goto out_free_secdata;
 	}
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index a271c87c4472..6f5954afa49c 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -12,14 +12,23 @@
 
 #include <linux/module.h>
 #include <linux/kobject.h>
+#include <linux/kallsyms.h>
 #include <linux/namei.h>
 #include <linux/poll.h>
 #include <linux/list.h>
 #include <linux/mutex.h>
+#include <linux/limits.h>
 #include <asm/uaccess.h>
 
 #include "sysfs.h"
 
+/* used in crash dumps to help with debugging */
+static char last_sysfs_file[PATH_MAX];
+void sysfs_printk_last_file(void)
+{
+	printk(KERN_EMERG "last sysfs file: %s\n", last_sysfs_file);
+}
+
 /*
  * There's one sysfs_buffer for each open file and one
  * sysfs_open_dirent for each sysfs_dirent with one or more open
@@ -86,7 +95,12 @@ static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer
 	 * The code works fine with PAGE_SIZE return but it's likely to
 	 * indicate truncated result or overflow in normal use cases.
 	 */
-	BUG_ON(count >= (ssize_t)PAGE_SIZE);
+	if (count >= (ssize_t)PAGE_SIZE) {
+		print_symbol("fill_read_buffer: %s returned bad count\n",
+			(unsigned long)ops->show);
+		/* Try to struggle along */
+		count = PAGE_SIZE - 1;
+	}
 	if (count >= 0) {
 		buffer->needs_read_fill = 0;
 		buffer->count = count;
@@ -320,6 +334,11 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
 	struct sysfs_buffer *buffer;
 	struct sysfs_ops *ops;
 	int error = -EACCES;
+	char *p;
+
+	p = d_path(&file->f_path, last_sysfs_file, sizeof(last_sysfs_file));
+	if (p)
+		memmove(last_sysfs_file, p, strlen(p) + 1);
 
 	/* need attr_sd for attr and ops, its parent for kobj */
 	if (!sysfs_get_active_two(attr_sd))
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 74168266cd59..a3410d6b34a7 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -22,7 +22,7 @@
 /* Random magic number */
 #define SYSFS_MAGIC 0x62656572
 
-static struct vfsmount *sysfs_mount;
+struct vfsmount *sysfs_mount;
 struct super_block * sysfs_sb = NULL;
 struct kmem_cache *sysfs_dir_cachep;
 
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 5f66c4466151..817f5966edca 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -87,7 +87,14 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char
 
 void sysfs_remove_link(struct kobject * kobj, const char * name)
 {
-	sysfs_hash_and_remove(kobj->sd, name);
+	struct sysfs_dirent *parent_sd = NULL;
+
+	if (!kobj)
+		parent_sd = &sysfs_root;
+	else
+		parent_sd = kobj->sd;
+
+	sysfs_hash_and_remove(parent_sd, name);
 }
 
 static int sysfs_get_target_path(struct sysfs_dirent *parent_sd,
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index ff17f8da9b43..52aaa8c11f92 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -91,6 +91,7 @@ struct sysfs_addrm_cxt {
 extern struct sysfs_dirent sysfs_root;
 extern struct super_block *sysfs_sb;
 extern struct kmem_cache *sysfs_dir_cachep;
+extern struct vfsmount *sysfs_mount;
 
 /*
  * dir.c
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index f855dcbbdfb8..1b809bd494bd 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -149,8 +149,7 @@ static bool udf_add_free_space(struct udf_sb_info *sbi,
 		return false;
 
 	lvid = (struct logicalVolIntegrityDesc *)sbi->s_lvid_bh->b_data;
-	lvid->freeSpaceTable[partition] = cpu_to_le32(le32_to_cpu(
-					lvid->freeSpaceTable[partition]) + cnt);
+	le32_add_cpu(&lvid->freeSpaceTable[partition], cnt);
 	return true;
 }
 
@@ -589,10 +588,8 @@ static void udf_table_free_blocks(struct super_block *sb,
 					sptr = oepos.bh->b_data + epos.offset;
 					aed = (struct allocExtDesc *)
 						oepos.bh->b_data;
-					aed->lengthAllocDescs =
-						cpu_to_le32(le32_to_cpu(
-							aed->lengthAllocDescs) +
-								adsize);
+					le32_add_cpu(&aed->lengthAllocDescs,
+							adsize);
 				} else {
 					sptr = iinfo->i_ext.i_data +
 								epos.offset;
@@ -645,9 +642,7 @@ static void udf_table_free_blocks(struct super_block *sb,
 				mark_inode_dirty(table);
 			} else {
 				aed = (struct allocExtDesc *)epos.bh->b_data;
-				aed->lengthAllocDescs =
-					cpu_to_le32(le32_to_cpu(
-					    aed->lengthAllocDescs) + adsize);
+				le32_add_cpu(&aed->lengthAllocDescs, adsize);
 				udf_update_tag(epos.bh->b_data, epos.offset);
 				mark_buffer_dirty(epos.bh);
 			}
diff --git a/fs/udf/crc.c b/fs/udf/crc.c
index b1661296e786..f178c63686e0 100644
--- a/fs/udf/crc.c
+++ b/fs/udf/crc.c
@@ -23,7 +23,7 @@
 
 #include "udfdecl.h"
 
-static uint16_t crc_table[256] = {
+static const uint16_t crc_table[256] = {
 	0x0000U, 0x1021U, 0x2042U, 0x3063U, 0x4084U, 0x50a5U, 0x60c6U, 0x70e7U,
 	0x8108U, 0x9129U, 0xa14aU, 0xb16bU, 0xc18cU, 0xd1adU, 0xe1ceU, 0xf1efU,
 	0x1231U, 0x0210U, 0x3273U, 0x2252U, 0x52b5U, 0x4294U, 0x72f7U, 0x62d6U,
@@ -79,7 +79,7 @@ static uint16_t crc_table[256] = {
  *	July 21, 1997 - Andrew E. Mileski
  *	Adapted from OSTA-UDF(tm) 1.50 standard.
  */
-uint16_t udf_crc(uint8_t *data, uint32_t size, uint16_t crc)
+uint16_t udf_crc(const uint8_t *data, uint32_t size, uint16_t crc)
 {
 	while (size--)
 		crc = crc_table[(crc >> 8 ^ *(data++)) & 0xffU] ^ (crc << 8);
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index 8d8643ada199..62dc270c69d1 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -39,13 +39,13 @@
 static int do_udf_readdir(struct inode *dir, struct file *filp,
 			  filldir_t filldir, void *dirent)
 {
-	struct udf_fileident_bh fibh;
+	struct udf_fileident_bh fibh = { .sbh = NULL, .ebh = NULL};
 	struct fileIdentDesc *fi = NULL;
 	struct fileIdentDesc cfi;
 	int block, iblock;
 	loff_t nf_pos = (filp->f_pos - 1) << 2;
 	int flen;
-	char fname[UDF_NAME_LEN];
+	char *fname = NULL;
 	char *nameptr;
 	uint16_t liu;
 	uint8_t lfi;
@@ -54,23 +54,32 @@ static int do_udf_readdir(struct inode *dir, struct file *filp,
 	kernel_lb_addr eloc;
 	uint32_t elen;
 	sector_t offset;
-	int i, num;
+	int i, num, ret = 0;
 	unsigned int dt_type;
 	struct extent_position epos = { NULL, 0, {0, 0} };
 	struct udf_inode_info *iinfo;
 
 	if (nf_pos >= size)
-		return 0;
+		goto out;
+
+	fname = kmalloc(UDF_NAME_LEN, GFP_NOFS);
+	if (!fname) {
+		ret = -ENOMEM;
+		goto out;
+	}
 
 	if (nf_pos == 0)
 		nf_pos = udf_ext0_offset(dir);
 
 	fibh.soffset = fibh.eoffset = nf_pos & (dir->i_sb->s_blocksize - 1);
 	iinfo = UDF_I(dir);
-	if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
-		fibh.sbh = fibh.ebh = NULL;
-	} else if (inode_bmap(dir, nf_pos >> dir->i_sb->s_blocksize_bits,
-			      &epos, &eloc, &elen, &offset) == (EXT_RECORDED_ALLOCATED >> 30)) {
+	if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
+		if (inode_bmap(dir, nf_pos >> dir->i_sb->s_blocksize_bits,
+		    &epos, &eloc, &elen, &offset)
+		    != (EXT_RECORDED_ALLOCATED >> 30)) {
+			ret = -ENOENT;
+			goto out;
+		}
 		block = udf_get_lb_pblock(dir->i_sb, eloc, offset);
 		if ((++offset << dir->i_sb->s_blocksize_bits) < elen) {
 			if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
@@ -83,8 +92,8 @@ static int do_udf_readdir(struct inode *dir, struct file *filp,
 		}
 
 		if (!(fibh.sbh = fibh.ebh = udf_tread(dir->i_sb, block))) {
-			brelse(epos.bh);
-			return -EIO;
+			ret = -EIO;
+			goto out;
 		}
 
 		if (!(offset & ((16 >> (dir->i_sb->s_blocksize_bits - 9)) - 1))) {
@@ -105,9 +114,6 @@ static int do_udf_readdir(struct inode *dir, struct file *filp,
 					brelse(bha[i]);
 			}
 		}
-	} else {
-		brelse(epos.bh);
-		return -ENOENT;
 	}
 
 	while (nf_pos < size) {
@@ -115,13 +121,8 @@ static int do_udf_readdir(struct inode *dir, struct file *filp,
 
 		fi = udf_fileident_read(dir, &nf_pos, &fibh, &cfi, &epos, &eloc,
 					&elen, &offset);
-		if (!fi) {
-			if (fibh.sbh != fibh.ebh)
-				brelse(fibh.ebh);
-			brelse(fibh.sbh);
-			brelse(epos.bh);
-			return 0;
-		}
+		if (!fi)
+			goto out;
 
 		liu = le16_to_cpu(cfi.lengthOfImpUse);
 		lfi = cfi.lengthFileIdent;
@@ -167,53 +168,23 @@ static int do_udf_readdir(struct inode *dir, struct file *filp,
 			dt_type = DT_UNKNOWN;
 		}
 
-		if (flen) {
-			if (filldir(dirent, fname, flen, filp->f_pos, iblock, dt_type) < 0) {
-				if (fibh.sbh != fibh.ebh)
-					brelse(fibh.ebh);
-				brelse(fibh.sbh);
-				brelse(epos.bh);
-	 			return 0;
-			}
-		}
+		if (flen && filldir(dirent, fname, flen, filp->f_pos,
+				    iblock, dt_type) < 0)
+			goto out;
 	} /* end while */
 
 	filp->f_pos = (nf_pos >> 2) + 1;
 
+out:
 	if (fibh.sbh != fibh.ebh)
 		brelse(fibh.ebh);
 	brelse(fibh.sbh);
 	brelse(epos.bh);
+	kfree(fname);
 
-	return 0;
+	return ret;
 }
 
-/*
- * udf_readdir
- *
- * PURPOSE
- *	Read a directory entry.
- *
- * DESCRIPTION
- *	Optional - sys_getdents() will return -ENOTDIR if this routine is not
- *	available.
- *
- *	Refer to sys_getdents() in fs/readdir.c
- *	sys_getdents() -> .
- *
- * PRE-CONDITIONS
- *	filp			Pointer to directory file.
- *	buf			Pointer to directory entry buffer.
- *	filldir			Pointer to filldir function.
- *
- * POST-CONDITIONS
- *	<return>		>=0 on success.
- *
- * HISTORY
- *	July 1, 1997 - Andrew E. Mileski
- *	Written, tested, and released.
- */
-
 static int udf_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
 	struct inode *dir = filp->f_path.dentry->d_inode;
diff --git a/fs/udf/ecma_167.h b/fs/udf/ecma_167.h
index 56387711589b..a0974df82b31 100644
--- a/fs/udf/ecma_167.h
+++ b/fs/udf/ecma_167.h
@@ -70,19 +70,6 @@ typedef struct {
 	uint8_t		microseconds;
 } __attribute__ ((packed)) timestamp;
 
-typedef struct {
-	uint16_t	typeAndTimezone;
-	int16_t		year;
-	uint8_t		month;
-	uint8_t		day;
-	uint8_t		hour;
-	uint8_t		minute;
-	uint8_t		second;
-	uint8_t		centiseconds;
-	uint8_t		hundredsOfMicroseconds;
-	uint8_t		microseconds;
-} __attribute__ ((packed)) kernel_timestamp;
-
 /* Type and Time Zone (ECMA 167r3 1/7.3.1) */
 #define TIMESTAMP_TYPE_MASK		0xF000
 #define TIMESTAMP_TYPE_CUT		0x0000
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 97c71ae7c689..0ed6e146a0d9 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -27,7 +27,6 @@
 
 #include "udfdecl.h"
 #include <linux/fs.h>
-#include <linux/udf_fs.h>
 #include <asm/uaccess.h>
 #include <linux/kernel.h>
 #include <linux/string.h> /* memset */
@@ -144,40 +143,6 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	return retval;
 }
 
-/*
- * udf_ioctl
- *
- * PURPOSE
- *	Issue an ioctl.
- *
- * DESCRIPTION
- *	Optional - sys_ioctl() will return -ENOTTY if this routine is not
- *	available, and the ioctl cannot be handled without filesystem help.
- *
- *	sys_ioctl() handles these ioctls that apply only to regular files:
- *		FIBMAP [requires udf_block_map()], FIGETBSZ, FIONREAD
- *	These ioctls are also handled by sys_ioctl():
- *		FIOCLEX, FIONCLEX, FIONBIO, FIOASYNC
- *	All other ioctls are passed to the filesystem.
- *
- *	Refer to sys_ioctl() in fs/ioctl.c
- *	sys_ioctl() -> .
- *
- * PRE-CONDITIONS
- *	inode			Pointer to inode that ioctl was issued on.
- *	filp			Pointer to file that ioctl was issued on.
- *	cmd			The ioctl command.
- *	arg			The ioctl argument [can be interpreted as a
- *				user-space pointer if desired].
- *
- * POST-CONDITIONS
- *	<return>		Success (>=0) or an error code (<=0) that
- *				sys_ioctl() will return.
- *
- * HISTORY
- *	July 1, 1997 - Andrew E. Mileski
- *	Written, tested, and released.
- */
 int udf_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
 	      unsigned long arg)
 {
@@ -225,18 +190,6 @@ int udf_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
 	return result;
 }
 
-/*
- * udf_release_file
- *
- * PURPOSE
- *  Called when all references to the file are closed
- *
- * DESCRIPTION
- *  Discard prealloced blocks
- *
- * HISTORY
- *
- */
 static int udf_release_file(struct inode *inode, struct file *filp)
 {
 	if (filp->f_mode & FMODE_WRITE) {
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index 84360315aca2..eb9cfa23dc3d 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -21,7 +21,6 @@
 #include "udfdecl.h"
 #include <linux/fs.h>
 #include <linux/quotaops.h>
-#include <linux/udf_fs.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 
@@ -47,11 +46,9 @@ void udf_free_inode(struct inode *inode)
 		struct logicalVolIntegrityDescImpUse *lvidiu =
 							udf_sb_lvidiu(sbi);
 		if (S_ISDIR(inode->i_mode))
-			lvidiu->numDirs =
-				cpu_to_le32(le32_to_cpu(lvidiu->numDirs) - 1);
+			le32_add_cpu(&lvidiu->numDirs, -1);
 		else
-			lvidiu->numFiles =
-				cpu_to_le32(le32_to_cpu(lvidiu->numFiles) - 1);
+			le32_add_cpu(&lvidiu->numFiles, -1);
 
 		mark_buffer_dirty(sbi->s_lvid_bh);
 	}
@@ -105,11 +102,9 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
 		lvhd = (struct logicalVolHeaderDesc *)
 				(lvid->logicalVolContentsUse);
 		if (S_ISDIR(mode))
-			lvidiu->numDirs =
-				cpu_to_le32(le32_to_cpu(lvidiu->numDirs) + 1);
+			le32_add_cpu(&lvidiu->numDirs, 1);
 		else
-			lvidiu->numFiles =
-				cpu_to_le32(le32_to_cpu(lvidiu->numFiles) + 1);
+			le32_add_cpu(&lvidiu->numFiles, 1);
 		iinfo->i_unique = uniqueID = le64_to_cpu(lvhd->uniqueID);
 		if (!(++uniqueID & 0x00000000FFFFFFFFUL))
 			uniqueID += 16;
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 24cfa55d0fdc..2362bf0c6900 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -66,22 +66,7 @@ static void udf_update_extents(struct inode *,
 			       struct extent_position *);
 static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int);
 
-/*
- * udf_delete_inode
- *
- * PURPOSE
- *	Clean-up before the specified inode is destroyed.
- *
- * DESCRIPTION
- *	This routine is called when the kernel destroys an inode structure
- *	ie. when iput() finds i_count == 0.
- *
- * HISTORY
- *	July 1, 1997 - Andrew E. Mileski
- *	Written, tested, and released.
- *
- *  Called at the last iput() if i_nlink is zero.
- */
+
 void udf_delete_inode(struct inode *inode)
 {
 	truncate_inode_pages(&inode->i_data, 0);
@@ -323,9 +308,6 @@ static int udf_get_block(struct inode *inode, sector_t block,
 
 	lock_kernel();
 
-	if (block < 0)
-		goto abort_negative;
-
 	iinfo = UDF_I(inode);
 	if (block == iinfo->i_next_alloc_block + 1) {
 		iinfo->i_next_alloc_block++;
@@ -347,10 +329,6 @@ static int udf_get_block(struct inode *inode, sector_t block,
 abort:
 	unlock_kernel();
 	return err;
-
-abort_negative:
-	udf_warning(inode->i_sb, "udf_get_block", "block < 0");
-	goto abort;
 }
 
 static struct buffer_head *udf_getblk(struct inode *inode, long block,
@@ -1116,42 +1094,36 @@ static void __udf_read_inode(struct inode *inode)
 	fe = (struct fileEntry *)bh->b_data;
 
 	if (fe->icbTag.strategyType == cpu_to_le16(4096)) {
-		struct buffer_head *ibh = NULL, *nbh = NULL;
-		struct indirectEntry *ie;
+		struct buffer_head *ibh;
 
 		ibh = udf_read_ptagged(inode->i_sb, iinfo->i_location, 1,
 					&ident);
-		if (ident == TAG_IDENT_IE) {
-			if (ibh) {
-				kernel_lb_addr loc;
-				ie = (struct indirectEntry *)ibh->b_data;
-
-				loc = lelb_to_cpu(ie->indirectICB.extLocation);
-
-				if (ie->indirectICB.extLength &&
-				    (nbh = udf_read_ptagged(inode->i_sb, loc, 0,
-							    &ident))) {
-					if (ident == TAG_IDENT_FE ||
-					    ident == TAG_IDENT_EFE) {
-						memcpy(&iinfo->i_location,
-						       &loc,
-						       sizeof(kernel_lb_addr));
-						brelse(bh);
-						brelse(ibh);
-						brelse(nbh);
-						__udf_read_inode(inode);
-						return;
-					} else {
-						brelse(nbh);
-						brelse(ibh);
-					}
-				} else {
+		if (ident == TAG_IDENT_IE && ibh) {
+			struct buffer_head *nbh = NULL;
+			kernel_lb_addr loc;
+			struct indirectEntry *ie;
+
+			ie = (struct indirectEntry *)ibh->b_data;
+			loc = lelb_to_cpu(ie->indirectICB.extLocation);
+
+			if (ie->indirectICB.extLength &&
+				(nbh = udf_read_ptagged(inode->i_sb, loc, 0,
+							&ident))) {
+				if (ident == TAG_IDENT_FE ||
+					ident == TAG_IDENT_EFE) {
+					memcpy(&iinfo->i_location,
+						&loc,
+						sizeof(kernel_lb_addr));
+					brelse(bh);
 					brelse(ibh);
+					brelse(nbh);
+					__udf_read_inode(inode);
+					return;
 				}
+				brelse(nbh);
 			}
-		} else {
-			brelse(ibh);
 		}
+		brelse(ibh);
 	} else if (fe->icbTag.strategyType != cpu_to_le16(4)) {
 		printk(KERN_ERR "udf: unsupported strategy type: %d\n",
 		       le16_to_cpu(fe->icbTag.strategyType));
@@ -1168,8 +1140,6 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
 {
 	struct fileEntry *fe;
 	struct extendedFileEntry *efe;
-	time_t convtime;
-	long convtime_usec;
 	int offset;
 	struct udf_sb_info *sbi = UDF_SB(inode->i_sb);
 	struct udf_inode_info *iinfo = UDF_I(inode);
@@ -1257,29 +1227,15 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
 		inode->i_blocks = le64_to_cpu(fe->logicalBlocksRecorded) <<
 			(inode->i_sb->s_blocksize_bits - 9);
 
-		if (udf_stamp_to_time(&convtime, &convtime_usec,
-				      lets_to_cpu(fe->accessTime))) {
-			inode->i_atime.tv_sec = convtime;
-			inode->i_atime.tv_nsec = convtime_usec * 1000;
-		} else {
+		if (!udf_disk_stamp_to_time(&inode->i_atime, fe->accessTime))
 			inode->i_atime = sbi->s_record_time;
-		}
 
-		if (udf_stamp_to_time(&convtime, &convtime_usec,
-				      lets_to_cpu(fe->modificationTime))) {
-			inode->i_mtime.tv_sec = convtime;
-			inode->i_mtime.tv_nsec = convtime_usec * 1000;
-		} else {
+		if (!udf_disk_stamp_to_time(&inode->i_mtime,
+					    fe->modificationTime))
 			inode->i_mtime = sbi->s_record_time;
-		}
 
-		if (udf_stamp_to_time(&convtime, &convtime_usec,
-				      lets_to_cpu(fe->attrTime))) {
-			inode->i_ctime.tv_sec = convtime;
-			inode->i_ctime.tv_nsec = convtime_usec * 1000;
-		} else {
+		if (!udf_disk_stamp_to_time(&inode->i_ctime, fe->attrTime))
 			inode->i_ctime = sbi->s_record_time;
-		}
 
 		iinfo->i_unique = le64_to_cpu(fe->uniqueID);
 		iinfo->i_lenEAttr = le32_to_cpu(fe->lengthExtendedAttr);
@@ -1289,37 +1245,18 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
 		inode->i_blocks = le64_to_cpu(efe->logicalBlocksRecorded) <<
 		    (inode->i_sb->s_blocksize_bits - 9);
 
-		if (udf_stamp_to_time(&convtime, &convtime_usec,
-				      lets_to_cpu(efe->accessTime))) {
-			inode->i_atime.tv_sec = convtime;
-			inode->i_atime.tv_nsec = convtime_usec * 1000;
-		} else {
+		if (!udf_disk_stamp_to_time(&inode->i_atime, efe->accessTime))
 			inode->i_atime = sbi->s_record_time;
-		}
 
-		if (udf_stamp_to_time(&convtime, &convtime_usec,
-				      lets_to_cpu(efe->modificationTime))) {
-			inode->i_mtime.tv_sec = convtime;
-			inode->i_mtime.tv_nsec = convtime_usec * 1000;
-		} else {
+		if (!udf_disk_stamp_to_time(&inode->i_mtime,
+					    efe->modificationTime))
 			inode->i_mtime = sbi->s_record_time;
-		}
 
-		if (udf_stamp_to_time(&convtime, &convtime_usec,
-				      lets_to_cpu(efe->createTime))) {
-			iinfo->i_crtime.tv_sec = convtime;
-			iinfo->i_crtime.tv_nsec = convtime_usec * 1000;
-		} else {
+		if (!udf_disk_stamp_to_time(&iinfo->i_crtime, efe->createTime))
 			iinfo->i_crtime = sbi->s_record_time;
-		}
 
-		if (udf_stamp_to_time(&convtime, &convtime_usec,
-				      lets_to_cpu(efe->attrTime))) {
-			inode->i_ctime.tv_sec = convtime;
-			inode->i_ctime.tv_nsec = convtime_usec * 1000;
-		} else {
+		if (!udf_disk_stamp_to_time(&inode->i_ctime, efe->attrTime))
 			inode->i_ctime = sbi->s_record_time;
-		}
 
 		iinfo->i_unique = le64_to_cpu(efe->uniqueID);
 		iinfo->i_lenEAttr = le32_to_cpu(efe->lengthExtendedAttr);
@@ -1416,21 +1353,6 @@ static mode_t udf_convert_permissions(struct fileEntry *fe)
 	return mode;
 }
 
-/*
- * udf_write_inode
- *
- * PURPOSE
- *	Write out the specified inode.
- *
- * DESCRIPTION
- *	This routine is called whenever an inode is synced.
- *	Currently this routine is just a placeholder.
- *
- * HISTORY
- *	July 1, 1997 - Andrew E. Mileski
- *	Written, tested, and released.
- */
-
 int udf_write_inode(struct inode *inode, int sync)
 {
 	int ret;
@@ -1455,7 +1377,6 @@ static int udf_update_inode(struct inode *inode, int do_sync)
 	uint32_t udfperms;
 	uint16_t icbflags;
 	uint16_t crclen;
-	kernel_timestamp cpu_time;
 	int err = 0;
 	struct udf_sb_info *sbi = UDF_SB(inode->i_sb);
 	unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits;
@@ -1558,12 +1479,9 @@ static int udf_update_inode(struct inode *inode, int do_sync)
 			(inode->i_blocks + (1 << (blocksize_bits - 9)) - 1) >>
 			(blocksize_bits - 9));
 
-		if (udf_time_to_stamp(&cpu_time, inode->i_atime))
-			fe->accessTime = cpu_to_lets(cpu_time);
-		if (udf_time_to_stamp(&cpu_time, inode->i_mtime))
-			fe->modificationTime = cpu_to_lets(cpu_time);
-		if (udf_time_to_stamp(&cpu_time, inode->i_ctime))
-			fe->attrTime = cpu_to_lets(cpu_time);
+		udf_time_to_disk_stamp(&fe->accessTime, inode->i_atime);
+		udf_time_to_disk_stamp(&fe->modificationTime, inode->i_mtime);
+		udf_time_to_disk_stamp(&fe->attrTime, inode->i_ctime);
 		memset(&(fe->impIdent), 0, sizeof(regid));
 		strcpy(fe->impIdent.ident, UDF_ID_DEVELOPER);
 		fe->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX;
@@ -1598,14 +1516,10 @@ static int udf_update_inode(struct inode *inode, int do_sync)
 		     iinfo->i_crtime.tv_nsec > inode->i_ctime.tv_nsec))
 			iinfo->i_crtime = inode->i_ctime;
 
-		if (udf_time_to_stamp(&cpu_time, inode->i_atime))
-			efe->accessTime = cpu_to_lets(cpu_time);
-		if (udf_time_to_stamp(&cpu_time, inode->i_mtime))
-			efe->modificationTime = cpu_to_lets(cpu_time);
-		if (udf_time_to_stamp(&cpu_time, iinfo->i_crtime))
-			efe->createTime = cpu_to_lets(cpu_time);
-		if (udf_time_to_stamp(&cpu_time, inode->i_ctime))
-			efe->attrTime = cpu_to_lets(cpu_time);
+		udf_time_to_disk_stamp(&efe->accessTime, inode->i_atime);
+		udf_time_to_disk_stamp(&efe->modificationTime, inode->i_mtime);
+		udf_time_to_disk_stamp(&efe->createTime, iinfo->i_crtime);
+		udf_time_to_disk_stamp(&efe->attrTime, inode->i_ctime);
 
 		memset(&(efe->impIdent), 0, sizeof(regid));
 		strcpy(efe->impIdent.ident, UDF_ID_DEVELOPER);
@@ -1778,9 +1692,7 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos,
 
 			if (epos->bh) {
 				aed = (struct allocExtDesc *)epos->bh->b_data;
-				aed->lengthAllocDescs =
-					cpu_to_le32(le32_to_cpu(
-					aed->lengthAllocDescs) + adsize);
+				le32_add_cpu(&aed->lengthAllocDescs, adsize);
 			} else {
 				iinfo->i_lenAlloc += adsize;
 				mark_inode_dirty(inode);
@@ -1830,9 +1742,7 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos,
 		mark_inode_dirty(inode);
 	} else {
 		aed = (struct allocExtDesc *)epos->bh->b_data;
-		aed->lengthAllocDescs =
-			cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) +
-				    adsize);
+		le32_add_cpu(&aed->lengthAllocDescs, adsize);
 		if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) ||
 				UDF_SB(inode->i_sb)->s_udfrev >= 0x0201)
 			udf_update_tag(epos->bh->b_data,
@@ -2046,9 +1956,7 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos,
 			mark_inode_dirty(inode);
 		} else {
 			aed = (struct allocExtDesc *)oepos.bh->b_data;
-			aed->lengthAllocDescs =
-				cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) -
-					    (2 * adsize));
+			le32_add_cpu(&aed->lengthAllocDescs, -(2 * adsize));
 			if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) ||
 			    UDF_SB(inode->i_sb)->s_udfrev >= 0x0201)
 				udf_update_tag(oepos.bh->b_data,
@@ -2065,9 +1973,7 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos,
 			mark_inode_dirty(inode);
 		} else {
 			aed = (struct allocExtDesc *)oepos.bh->b_data;
-			aed->lengthAllocDescs =
-				cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) -
-					    adsize);
+			le32_add_cpu(&aed->lengthAllocDescs, -adsize);
 			if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) ||
 			    UDF_SB(inode->i_sb)->s_udfrev >= 0x0201)
 				udf_update_tag(oepos.bh->b_data,
@@ -2095,11 +2001,6 @@ int8_t inode_bmap(struct inode *inode, sector_t block,
 	int8_t etype;
 	struct udf_inode_info *iinfo;
 
-	if (block < 0) {
-		printk(KERN_ERR "udf: inode_bmap: block < 0\n");
-		return -1;
-	}
-
 	iinfo = UDF_I(inode);
 	pos->offset = 0;
 	pos->block = iinfo->i_location;
diff --git a/fs/udf/lowlevel.c b/fs/udf/lowlevel.c
index 579bae71e67e..703843f30ffd 100644
--- a/fs/udf/lowlevel.c
+++ b/fs/udf/lowlevel.c
@@ -23,7 +23,6 @@
 #include <linux/cdrom.h>
 #include <asm/uaccess.h>
 
-#include <linux/udf_fs.h>
 #include "udf_sb.h"
 
 unsigned int udf_get_last_session(struct super_block *sb)
diff --git a/fs/udf/misc.c b/fs/udf/misc.c
index a1d6da0caf71..581b6e4cc591 100644
--- a/fs/udf/misc.c
+++ b/fs/udf/misc.c
@@ -23,7 +23,6 @@
 
 #include <linux/fs.h>
 #include <linux/string.h>
-#include <linux/udf_fs.h>
 #include <linux/buffer_head.h>
 
 #include "udf_i.h"
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 112a5fb0b27b..68686b79650a 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -149,7 +149,7 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
 	struct fileIdentDesc *fi = NULL;
 	loff_t f_pos;
 	int block, flen;
-	char fname[UDF_NAME_LEN];
+	char *fname = NULL;
 	char *nameptr;
 	uint8_t lfi;
 	uint16_t liu;
@@ -163,12 +163,12 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
 	size = udf_ext0_offset(dir) + dir->i_size;
 	f_pos = udf_ext0_offset(dir);
 
+	fibh->sbh = fibh->ebh = NULL;
 	fibh->soffset = fibh->eoffset = f_pos & (dir->i_sb->s_blocksize - 1);
-	if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
-		fibh->sbh = fibh->ebh = NULL;
-	else if (inode_bmap(dir, f_pos >> dir->i_sb->s_blocksize_bits,
-			      &epos, &eloc, &elen, &offset) ==
-					(EXT_RECORDED_ALLOCATED >> 30)) {
+	if (dinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
+		if (inode_bmap(dir, f_pos >> dir->i_sb->s_blocksize_bits, &epos,
+		    &eloc, &elen, &offset) != (EXT_RECORDED_ALLOCATED >> 30))
+			goto out_err;
 		block = udf_get_lb_pblock(dir->i_sb, eloc, offset);
 		if ((++offset << dir->i_sb->s_blocksize_bits) < elen) {
 			if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
@@ -179,25 +179,19 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
 			offset = 0;
 
 		fibh->sbh = fibh->ebh = udf_tread(dir->i_sb, block);
-		if (!fibh->sbh) {
-			brelse(epos.bh);
-			return NULL;
-		}
-	} else {
-		brelse(epos.bh);
-		return NULL;
+		if (!fibh->sbh)
+			goto out_err;
 	}
 
+	fname = kmalloc(UDF_NAME_LEN, GFP_NOFS);
+	if (!fname)
+		goto out_err;
+
 	while (f_pos < size) {
 		fi = udf_fileident_read(dir, &f_pos, fibh, cfi, &epos, &eloc,
 					&elen, &offset);
-		if (!fi) {
-			if (fibh->sbh != fibh->ebh)
-				brelse(fibh->ebh);
-			brelse(fibh->sbh);
-			brelse(epos.bh);
-			return NULL;
-		}
+		if (!fi)
+			goto out_err;
 
 		liu = le16_to_cpu(cfi->lengthOfImpUse);
 		lfi = cfi->lengthFileIdent;
@@ -237,53 +231,22 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
 
 		flen = udf_get_filename(dir->i_sb, nameptr, fname, lfi);
 		if (flen && udf_match(flen, fname, dentry->d_name.len,
-				      dentry->d_name.name)) {
-			brelse(epos.bh);
-			return fi;
-		}
+				      dentry->d_name.name))
+			goto out_ok;
 	}
 
+out_err:
+	fi = NULL;
 	if (fibh->sbh != fibh->ebh)
 		brelse(fibh->ebh);
 	brelse(fibh->sbh);
+out_ok:
 	brelse(epos.bh);
+	kfree(fname);
 
-	return NULL;
+	return fi;
 }
 
-/*
- * udf_lookup
- *
- * PURPOSE
- *	Look-up the inode for a given name.
- *
- * DESCRIPTION
- *	Required - lookup_dentry() will return -ENOTDIR if this routine is not
- *	available for a directory. The filesystem is useless if this routine is
- *	not available for at least the filesystem's root directory.
- *
- *	This routine is passed an incomplete dentry - it must be completed by
- *	calling d_add(dentry, inode). If the name does not exist, then the
- *	specified inode must be set to null. An error should only be returned
- *	when the lookup fails for a reason other than the name not existing.
- *	Note that the directory inode semaphore is held during the call.
- *
- *	Refer to lookup_dentry() in fs/namei.c
- *	lookup_dentry() -> lookup() -> real_lookup() -> .
- *
- * PRE-CONDITIONS
- *	dir			Pointer to inode of parent directory.
- *	dentry			Pointer to dentry to complete.
- *	nd			Pointer to lookup nameidata
- *
- * POST-CONDITIONS
- *	<return>		Zero on success.
- *
- * HISTORY
- *	July 1, 1997 - Andrew E. Mileski
- *	Written, tested, and released.
- */
-
 static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
 				 struct nameidata *nd)
 {
@@ -336,11 +299,9 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir,
 {
 	struct super_block *sb = dir->i_sb;
 	struct fileIdentDesc *fi = NULL;
-	char name[UDF_NAME_LEN], fname[UDF_NAME_LEN];
+	char *name = NULL;
 	int namelen;
 	loff_t f_pos;
-	int flen;
-	char *nameptr;
 	loff_t size = udf_ext0_offset(dir) + dir->i_size;
 	int nfidlen;
 	uint8_t lfi;
@@ -352,16 +313,23 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir,
 	struct extent_position epos = {};
 	struct udf_inode_info *dinfo;
 
+	fibh->sbh = fibh->ebh = NULL;
+	name = kmalloc(UDF_NAME_LEN, GFP_NOFS);
+	if (!name) {
+		*err = -ENOMEM;
+		goto out_err;
+	}
+
 	if (dentry) {
 		if (!dentry->d_name.len) {
 			*err = -EINVAL;
-			return NULL;
+			goto out_err;
 		}
 		namelen = udf_put_filename(sb, dentry->d_name.name, name,
 						 dentry->d_name.len);
 		if (!namelen) {
 			*err = -ENAMETOOLONG;
-			return NULL;
+			goto out_err;
 		}
 	} else {
 		namelen = 0;
@@ -373,11 +341,14 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir,
 
 	fibh->soffset = fibh->eoffset = f_pos & (dir->i_sb->s_blocksize - 1);
 	dinfo = UDF_I(dir);
-	if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
-		fibh->sbh = fibh->ebh = NULL;
-	else if (inode_bmap(dir, f_pos >> dir->i_sb->s_blocksize_bits,
-			      &epos, &eloc, &elen, &offset) ==
-					(EXT_RECORDED_ALLOCATED >> 30)) {
+	if (dinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
+		if (inode_bmap(dir, f_pos >> dir->i_sb->s_blocksize_bits, &epos,
+		    &eloc, &elen, &offset) != (EXT_RECORDED_ALLOCATED >> 30)) {
+			block = udf_get_lb_pblock(dir->i_sb,
+					dinfo->i_location, 0);
+			fibh->soffset = fibh->eoffset = sb->s_blocksize;
+			goto add;
+		}
 		block = udf_get_lb_pblock(dir->i_sb, eloc, offset);
 		if ((++offset << dir->i_sb->s_blocksize_bits) < elen) {
 			if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
@@ -389,17 +360,11 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir,
 
 		fibh->sbh = fibh->ebh = udf_tread(dir->i_sb, block);
 		if (!fibh->sbh) {
-			brelse(epos.bh);
 			*err = -EIO;
-			return NULL;
+			goto out_err;
 		}
 
 		block = dinfo->i_location.logicalBlockNum;
-	} else {
-		block = udf_get_lb_pblock(dir->i_sb, dinfo->i_location, 0);
-		fibh->sbh = fibh->ebh = NULL;
-		fibh->soffset = fibh->eoffset = sb->s_blocksize;
-		goto add;
 	}
 
 	while (f_pos < size) {
@@ -407,41 +372,16 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir,
 					&elen, &offset);
 
 		if (!fi) {
-			if (fibh->sbh != fibh->ebh)
-				brelse(fibh->ebh);
-			brelse(fibh->sbh);
-			brelse(epos.bh);
 			*err = -EIO;
-			return NULL;
+			goto out_err;
 		}
 
 		liu = le16_to_cpu(cfi->lengthOfImpUse);
 		lfi = cfi->lengthFileIdent;
 
-		if (fibh->sbh == fibh->ebh)
-			nameptr = fi->fileIdent + liu;
-		else {
-			int poffset;	/* Unpaded ending offset */
-
-			poffset = fibh->soffset + sizeof(struct fileIdentDesc) +
-					liu + lfi;
-
-			if (poffset >= lfi)
-				nameptr = (char *)(fibh->ebh->b_data +
-						   poffset - lfi);
-			else {
-				nameptr = fname;
-				memcpy(nameptr, fi->fileIdent + liu,
-					lfi - poffset);
-				memcpy(nameptr + lfi - poffset,
-					fibh->ebh->b_data, poffset);
-			}
-		}
-
 		if ((cfi->fileCharacteristics & FID_FILE_CHAR_DELETED) != 0) {
 			if (((sizeof(struct fileIdentDesc) +
 					liu + lfi + 3) & ~3) == nfidlen) {
-				brelse(epos.bh);
 				cfi->descTag.tagSerialNum = cpu_to_le16(1);
 				cfi->fileVersionNum = cpu_to_le16(1);
 				cfi->fileCharacteristics = 0;
@@ -449,27 +389,13 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir,
 				cfi->lengthOfImpUse = cpu_to_le16(0);
 				if (!udf_write_fi(dir, cfi, fi, fibh, NULL,
 						  name))
-					return fi;
+					goto out_ok;
 				else {
 					*err = -EIO;
-					return NULL;
+					goto out_err;
 				}
 			}
 		}
-
-		if (!lfi || !dentry)
-			continue;
-
-		flen = udf_get_filename(dir->i_sb, nameptr, fname, lfi);
-		if (flen && udf_match(flen, fname, dentry->d_name.len,
-				      dentry->d_name.name)) {
-			if (fibh->sbh != fibh->ebh)
-				brelse(fibh->ebh);
-			brelse(fibh->sbh);
-			brelse(epos.bh);
-			*err = -EEXIST;
-			return NULL;
-		}
 	}
 
 add:
@@ -496,7 +422,7 @@ add:
 		fibh->sbh = fibh->ebh =
 				udf_expand_dir_adinicb(dir, &block, err);
 		if (!fibh->sbh)
-			return NULL;
+			goto out_err;
 		epos.block = dinfo->i_location;
 		epos.offset = udf_file_entry_alloc_offset(dir);
 		/* Load extent udf_expand_dir_adinicb() has created */
@@ -537,11 +463,8 @@ add:
 						dir->i_sb->s_blocksize_bits);
 		fibh->ebh = udf_bread(dir,
 				f_pos >> dir->i_sb->s_blocksize_bits, 1, err);
-		if (!fibh->ebh) {
-			brelse(epos.bh);
-			brelse(fibh->sbh);
-			return NULL;
-		}
+		if (!fibh->ebh)
+			goto out_err;
 
 		if (!fibh->soffset) {
 			if (udf_next_aext(dir, &epos, &eloc, &elen, 1) ==
@@ -572,20 +495,25 @@ add:
 	cfi->lengthFileIdent = namelen;
 	cfi->lengthOfImpUse = cpu_to_le16(0);
 	if (!udf_write_fi(dir, cfi, fi, fibh, NULL, name)) {
-		brelse(epos.bh);
 		dir->i_size += nfidlen;
 		if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
 			dinfo->i_lenAlloc += nfidlen;
 		mark_inode_dirty(dir);
-		return fi;
+		goto out_ok;
 	} else {
-		brelse(epos.bh);
-		if (fibh->sbh != fibh->ebh)
-			brelse(fibh->ebh);
-		brelse(fibh->sbh);
 		*err = -EIO;
-		return NULL;
+		goto out_err;
 	}
+
+out_err:
+	fi = NULL;
+	if (fibh->sbh != fibh->ebh)
+		brelse(fibh->ebh);
+	brelse(fibh->sbh);
+out_ok:
+	brelse(epos.bh);
+	kfree(name);
+	return fi;
 }
 
 static int udf_delete_entry(struct inode *inode, struct fileIdentDesc *fi,
@@ -940,7 +868,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
 	char *ea;
 	int err;
 	int block;
-	char name[UDF_NAME_LEN];
+	char *name = NULL;
 	int namelen;
 	struct buffer_head *bh;
 	struct udf_inode_info *iinfo;
@@ -950,6 +878,12 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
 	if (!inode)
 		goto out;
 
+	name = kmalloc(UDF_NAME_LEN, GFP_NOFS);
+	if (!name) {
+		err = -ENOMEM;
+		goto out_no_entry;
+	}
+
 	iinfo = UDF_I(inode);
 	inode->i_mode = S_IFLNK | S_IRWXUGO;
 	inode->i_data.a_ops = &udf_symlink_aops;
@@ -1089,6 +1023,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
 	err = 0;
 
 out:
+	kfree(name);
 	unlock_kernel();
 	return err;
 
diff --git a/fs/udf/partition.c b/fs/udf/partition.c
index fc533345ab89..307c9c33d184 100644
--- a/fs/udf/partition.c
+++ b/fs/udf/partition.c
@@ -24,7 +24,6 @@
 
 #include <linux/fs.h>
 #include <linux/string.h>
-#include <linux/udf_fs.h>
 #include <linux/slab.h>
 #include <linux/buffer_head.h>
 
diff --git a/fs/udf/super.c b/fs/udf/super.c
index f3ac4abfc946..137153c5005b 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -55,9 +55,9 @@
 #include <linux/errno.h>
 #include <linux/mount.h>
 #include <linux/seq_file.h>
+#include <linux/bitmap.h>
 #include <asm/byteorder.h>
 
-#include <linux/udf_fs.h>
 #include "udf_sb.h"
 #include "udf_i.h"
 
@@ -100,6 +100,8 @@ static void udf_close_lvid(struct super_block *);
 static unsigned int udf_count_free(struct super_block *);
 static int udf_statfs(struct dentry *, struct kstatfs *);
 static int udf_show_options(struct seq_file *, struct vfsmount *);
+static void udf_error(struct super_block *sb, const char *function,
+		      const char *fmt, ...);
 
 struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi)
 {
@@ -587,44 +589,6 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
 	return 0;
 }
 
-/*
- * udf_set_blocksize
- *
- * PURPOSE
- *	Set the block size to be used in all transfers.
- *
- * DESCRIPTION
- *	To allow room for a DMA transfer, it is best to guess big when unsure.
- *	This routine picks 2048 bytes as the blocksize when guessing. This
- *	should be adequate until devices with larger block sizes become common.
- *
- *	Note that the Linux kernel can currently only deal with blocksizes of
- *	512, 1024, 2048, 4096, and 8192 bytes.
- *
- * PRE-CONDITIONS
- *	sb			Pointer to _locked_ superblock.
- *
- * POST-CONDITIONS
- *	sb->s_blocksize		Blocksize.
- *	sb->s_blocksize_bits	log2 of blocksize.
- *	<return>	0	Blocksize is valid.
- *	<return>	1	Blocksize is invalid.
- *
- * HISTORY
- *	July 1, 1997 - Andrew E. Mileski
- *	Written, tested, and released.
- */
-static int udf_set_blocksize(struct super_block *sb, int bsize)
-{
-	if (!sb_min_blocksize(sb, bsize)) {
-		udf_debug("Bad block size (%d)\n", bsize);
-		printk(KERN_ERR "udf: bad block size (%d)\n", bsize);
-		return 0;
-	}
-
-	return sb->s_blocksize;
-}
-
 static int udf_vrs(struct super_block *sb, int silent)
 {
 	struct volStructDesc *vsd = NULL;
@@ -863,18 +827,18 @@ static void udf_find_anchor(struct super_block *sb)
 	}
 
 	for (i = 0; i < ARRAY_SIZE(sbi->s_anchor); i++) {
-		if (sbi->s_anchor[i]) {
-			bh = udf_read_tagged(sb, sbi->s_anchor[i],
-					     sbi->s_anchor[i], &ident);
-			if (!bh)
+		if (!sbi->s_anchor[i])
+			continue;
+		bh = udf_read_tagged(sb, sbi->s_anchor[i],
+					sbi->s_anchor[i], &ident);
+		if (!bh)
+			sbi->s_anchor[i] = 0;
+		else {
+			brelse(bh);
+			if ((ident != TAG_IDENT_AVDP) &&
+				(i || (ident != TAG_IDENT_FE &&
+					ident != TAG_IDENT_EFE)))
 				sbi->s_anchor[i] = 0;
-			else {
-				brelse(bh);
-				if ((ident != TAG_IDENT_AVDP) &&
-				    (i || (ident != TAG_IDENT_FE &&
-					   ident != TAG_IDENT_EFE)))
-					sbi->s_anchor[i] = 0;
-			}
 		}
 	}
 
@@ -974,24 +938,20 @@ static int udf_find_fileset(struct super_block *sb,
 static void udf_load_pvoldesc(struct super_block *sb, struct buffer_head *bh)
 {
 	struct primaryVolDesc *pvoldesc;
-	time_t recording;
-	long recording_usec;
 	struct ustr instr;
 	struct ustr outstr;
 
 	pvoldesc = (struct primaryVolDesc *)bh->b_data;
 
-	if (udf_stamp_to_time(&recording, &recording_usec,
-			      lets_to_cpu(pvoldesc->recordingDateAndTime))) {
-		kernel_timestamp ts;
-		ts = lets_to_cpu(pvoldesc->recordingDateAndTime);
-		udf_debug("recording time %ld/%ld, %04u/%02u/%02u"
+	if (udf_disk_stamp_to_time(&UDF_SB(sb)->s_record_time,
+			      pvoldesc->recordingDateAndTime)) {
+#ifdef UDFFS_DEBUG
+		timestamp *ts = &pvoldesc->recordingDateAndTime;
+		udf_debug("recording time %04u/%02u/%02u"
 			  " %02u:%02u (%x)\n",
-			  recording, recording_usec,
-			  ts.year, ts.month, ts.day, ts.hour,
-			  ts.minute, ts.typeAndTimezone);
-		UDF_SB(sb)->s_record_time.tv_sec = recording;
-		UDF_SB(sb)->s_record_time.tv_nsec = recording_usec * 1000;
+			  le16_to_cpu(ts->year), ts->month, ts->day, ts->hour,
+			  ts->minute, le16_to_cpu(ts->typeAndTimezone));
+#endif
 	}
 
 	if (!udf_build_ustr(&instr, pvoldesc->volIdent, 32))
@@ -1025,10 +985,9 @@ static void udf_load_fileset(struct super_block *sb, struct buffer_head *bh,
 int udf_compute_nr_groups(struct super_block *sb, u32 partition)
 {
 	struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition];
-	return (map->s_partition_len +
-		(sizeof(struct spaceBitmapDesc) << 3) +
-		(sb->s_blocksize * 8) - 1) /
-		(sb->s_blocksize * 8);
+	return DIV_ROUND_UP(map->s_partition_len +
+			    (sizeof(struct spaceBitmapDesc) << 3),
+			    sb->s_blocksize * 8);
 }
 
 static struct udf_bitmap *udf_sb_alloc_bitmap(struct super_block *sb, u32 index)
@@ -1061,128 +1020,118 @@ static struct udf_bitmap *udf_sb_alloc_bitmap(struct super_block *sb, u32 index)
 
 static int udf_load_partdesc(struct super_block *sb, struct buffer_head *bh)
 {
-	struct partitionDesc *p;
-	int i;
+	struct partitionHeaderDesc *phd;
+	struct partitionDesc *p = (struct partitionDesc *)bh->b_data;
 	struct udf_part_map *map;
-	struct udf_sb_info *sbi;
-
-	p = (struct partitionDesc *)bh->b_data;
-	sbi = UDF_SB(sb);
+	struct udf_sb_info *sbi = UDF_SB(sb);
+	bool found = false;
+	int i;
+	u16 partitionNumber = le16_to_cpu(p->partitionNumber);
 
 	for (i = 0; i < sbi->s_partitions; i++) {
 		map = &sbi->s_partmaps[i];
 		udf_debug("Searching map: (%d == %d)\n",
-			  map->s_partition_num,
-			  le16_to_cpu(p->partitionNumber));
-		if (map->s_partition_num ==
-				le16_to_cpu(p->partitionNumber)) {
-			map->s_partition_len =
-				le32_to_cpu(p->partitionLength); /* blocks */
-			map->s_partition_root =
-				le32_to_cpu(p->partitionStartingLocation);
-			if (p->accessType ==
-					cpu_to_le32(PD_ACCESS_TYPE_READ_ONLY))
-				map->s_partition_flags |=
-						UDF_PART_FLAG_READ_ONLY;
-			if (p->accessType ==
-					cpu_to_le32(PD_ACCESS_TYPE_WRITE_ONCE))
-				map->s_partition_flags |=
-						UDF_PART_FLAG_WRITE_ONCE;
-			if (p->accessType ==
-					cpu_to_le32(PD_ACCESS_TYPE_REWRITABLE))
-				map->s_partition_flags |=
-						UDF_PART_FLAG_REWRITABLE;
-			if (p->accessType ==
-				    cpu_to_le32(PD_ACCESS_TYPE_OVERWRITABLE))
-				map->s_partition_flags |=
-						UDF_PART_FLAG_OVERWRITABLE;
-
-			if (!strcmp(p->partitionContents.ident,
-				    PD_PARTITION_CONTENTS_NSR02) ||
-			    !strcmp(p->partitionContents.ident,
-				    PD_PARTITION_CONTENTS_NSR03)) {
-				struct partitionHeaderDesc *phd;
-
-				phd = (struct partitionHeaderDesc *)
-						(p->partitionContentsUse);
-				if (phd->unallocSpaceTable.extLength) {
-					kernel_lb_addr loc = {
-						.logicalBlockNum = le32_to_cpu(phd->unallocSpaceTable.extPosition),
-						.partitionReferenceNum = i,
-					};
-
-					map->s_uspace.s_table =
-						udf_iget(sb, loc);
-					if (!map->s_uspace.s_table) {
-						udf_debug("cannot load unallocSpaceTable (part %d)\n", i);
-						return 1;
-					}
-					map->s_partition_flags |=
-						UDF_PART_FLAG_UNALLOC_TABLE;
-					udf_debug("unallocSpaceTable (part %d) @ %ld\n",
-						  i, map->s_uspace.s_table->i_ino);
-				}
-				if (phd->unallocSpaceBitmap.extLength) {
-					struct udf_bitmap *bitmap =
-						udf_sb_alloc_bitmap(sb, i);
-					map->s_uspace.s_bitmap = bitmap;
-					if (bitmap != NULL) {
-						bitmap->s_extLength =
-							le32_to_cpu(phd->unallocSpaceBitmap.extLength);
-						bitmap->s_extPosition =
-							le32_to_cpu(phd->unallocSpaceBitmap.extPosition);
-						map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_BITMAP;
-						udf_debug("unallocSpaceBitmap (part %d) @ %d\n",
-							  i, bitmap->s_extPosition);
-					}
-				}
-				if (phd->partitionIntegrityTable.extLength)
-					udf_debug("partitionIntegrityTable (part %d)\n", i);
-				if (phd->freedSpaceTable.extLength) {
-					kernel_lb_addr loc = {
-						.logicalBlockNum = le32_to_cpu(phd->freedSpaceTable.extPosition),
-						.partitionReferenceNum = i,
-					};
-
-					map->s_fspace.s_table =
-						udf_iget(sb, loc);
-					if (!map->s_fspace.s_table) {
-						udf_debug("cannot load freedSpaceTable (part %d)\n", i);
-						return 1;
-					}
-					map->s_partition_flags |=
-						UDF_PART_FLAG_FREED_TABLE;
-					udf_debug("freedSpaceTable (part %d) @ %ld\n",
-						  i, map->s_fspace.s_table->i_ino);
-				}
-				if (phd->freedSpaceBitmap.extLength) {
-					struct udf_bitmap *bitmap =
-						udf_sb_alloc_bitmap(sb, i);
-					map->s_fspace.s_bitmap = bitmap;
-					if (bitmap != NULL) {
-						bitmap->s_extLength =
-							le32_to_cpu(phd->freedSpaceBitmap.extLength);
-						bitmap->s_extPosition =
-							le32_to_cpu(phd->freedSpaceBitmap.extPosition);
-						map->s_partition_flags |= UDF_PART_FLAG_FREED_BITMAP;
-						udf_debug("freedSpaceBitmap (part %d) @ %d\n",
-							  i, bitmap->s_extPosition);
-					}
-				}
-			}
+			  map->s_partition_num, partitionNumber);
+		found = map->s_partition_num == partitionNumber;
+		if (found)
 			break;
-		}
 	}
-	if (i == sbi->s_partitions)
+
+	if (!found) {
 		udf_debug("Partition (%d) not found in partition map\n",
-			  le16_to_cpu(p->partitionNumber));
-	else
-		udf_debug("Partition (%d:%d type %x) starts at physical %d, "
-			  "block length %d\n",
-			  le16_to_cpu(p->partitionNumber), i,
-			  map->s_partition_type,
-			  map->s_partition_root,
-			  map->s_partition_len);
+			  partitionNumber);
+		return 0;
+	}
+
+	map->s_partition_len = le32_to_cpu(p->partitionLength); /* blocks */
+	map->s_partition_root = le32_to_cpu(p->partitionStartingLocation);
+
+	if (p->accessType == cpu_to_le32(PD_ACCESS_TYPE_READ_ONLY))
+		map->s_partition_flags |= UDF_PART_FLAG_READ_ONLY;
+	if (p->accessType == cpu_to_le32(PD_ACCESS_TYPE_WRITE_ONCE))
+		map->s_partition_flags |= UDF_PART_FLAG_WRITE_ONCE;
+	if (p->accessType == cpu_to_le32(PD_ACCESS_TYPE_REWRITABLE))
+		map->s_partition_flags |= UDF_PART_FLAG_REWRITABLE;
+	if (p->accessType == cpu_to_le32(PD_ACCESS_TYPE_OVERWRITABLE))
+		map->s_partition_flags |= UDF_PART_FLAG_OVERWRITABLE;
+
+	udf_debug("Partition (%d:%d type %x) starts at physical %d, "
+		  "block length %d\n", partitionNumber, i,
+		  map->s_partition_type, map->s_partition_root,
+		  map->s_partition_len);
+
+	if (strcmp(p->partitionContents.ident, PD_PARTITION_CONTENTS_NSR02) &&
+	    strcmp(p->partitionContents.ident, PD_PARTITION_CONTENTS_NSR03))
+		return 0;
+
+	phd = (struct partitionHeaderDesc *)p->partitionContentsUse;
+	if (phd->unallocSpaceTable.extLength) {
+		kernel_lb_addr loc = {
+			.logicalBlockNum = le32_to_cpu(
+				phd->unallocSpaceTable.extPosition),
+			.partitionReferenceNum = i,
+		};
+
+		map->s_uspace.s_table = udf_iget(sb, loc);
+		if (!map->s_uspace.s_table) {
+			udf_debug("cannot load unallocSpaceTable (part %d)\n",
+					i);
+			return 1;
+		}
+		map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_TABLE;
+		udf_debug("unallocSpaceTable (part %d) @ %ld\n",
+				i, map->s_uspace.s_table->i_ino);
+	}
+
+	if (phd->unallocSpaceBitmap.extLength) {
+		struct udf_bitmap *bitmap = udf_sb_alloc_bitmap(sb, i);
+		map->s_uspace.s_bitmap = bitmap;
+		if (bitmap != NULL) {
+			bitmap->s_extLength = le32_to_cpu(
+				phd->unallocSpaceBitmap.extLength);
+			bitmap->s_extPosition = le32_to_cpu(
+				phd->unallocSpaceBitmap.extPosition);
+			map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_BITMAP;
+			udf_debug("unallocSpaceBitmap (part %d) @ %d\n",
+					i, bitmap->s_extPosition);
+		}
+	}
+
+	if (phd->partitionIntegrityTable.extLength)
+		udf_debug("partitionIntegrityTable (part %d)\n", i);
+
+	if (phd->freedSpaceTable.extLength) {
+		kernel_lb_addr loc = {
+			.logicalBlockNum = le32_to_cpu(
+				phd->freedSpaceTable.extPosition),
+			.partitionReferenceNum = i,
+		};
+
+		map->s_fspace.s_table = udf_iget(sb, loc);
+		if (!map->s_fspace.s_table) {
+			udf_debug("cannot load freedSpaceTable (part %d)\n", i);
+			return 1;
+		}
+
+		map->s_partition_flags |= UDF_PART_FLAG_FREED_TABLE;
+		udf_debug("freedSpaceTable (part %d) @ %ld\n",
+				i, map->s_fspace.s_table->i_ino);
+	}
+
+	if (phd->freedSpaceBitmap.extLength) {
+		struct udf_bitmap *bitmap = udf_sb_alloc_bitmap(sb, i);
+		map->s_fspace.s_bitmap = bitmap;
+		if (bitmap != NULL) {
+			bitmap->s_extLength = le32_to_cpu(
+				phd->freedSpaceBitmap.extLength);
+			bitmap->s_extPosition = le32_to_cpu(
+				phd->freedSpaceBitmap.extPosition);
+			map->s_partition_flags |= UDF_PART_FLAG_FREED_BITMAP;
+			udf_debug("freedSpaceBitmap (part %d) @ %d\n",
+					i, bitmap->s_extPosition);
+		}
+	}
+
 	return 0;
 }
 
@@ -1256,19 +1205,17 @@ static int udf_load_logicalvol(struct super_block *sb, struct buffer_head *bh,
 					map->s_type_specific.s_sparing.
 							s_spar_map[j] = bh2;
 
-					if (bh2 != NULL) {
-						st = (struct sparingTable *)
-								bh2->b_data;
-						if (ident != 0 || strncmp(
-							st->sparingIdent.ident,
-							UDF_ID_SPARING,
-							strlen(UDF_ID_SPARING))) {
-							brelse(bh2);
-							map->s_type_specific.
-								s_sparing.
-								s_spar_map[j] =
-									NULL;
-						}
+					if (bh2 == NULL)
+						continue;
+
+					st = (struct sparingTable *)bh2->b_data;
+					if (ident != 0 || strncmp(
+						st->sparingIdent.ident,
+						UDF_ID_SPARING,
+						strlen(UDF_ID_SPARING))) {
+						brelse(bh2);
+						map->s_type_specific.s_sparing.
+							s_spar_map[j] = NULL;
 					}
 				}
 				map->s_partition_func = udf_get_pblock_spar15;
@@ -1345,7 +1292,7 @@ static void udf_load_logicalvolint(struct super_block *sb, kernel_extent_ad loc)
  *	July 1, 1997 - Andrew E. Mileski
  *	Written, tested, and released.
  */
-static int udf_process_sequence(struct super_block *sb, long block,
+static noinline int udf_process_sequence(struct super_block *sb, long block,
 				long lastblock, kernel_lb_addr *fileset)
 {
 	struct buffer_head *bh = NULL;
@@ -1433,40 +1380,40 @@ static int udf_process_sequence(struct super_block *sb, long block,
 		brelse(bh);
 	}
 	for (i = 0; i < VDS_POS_LENGTH; i++) {
-		if (vds[i].block) {
-			bh = udf_read_tagged(sb, vds[i].block, vds[i].block,
-					     &ident);
-
-			if (i == VDS_POS_PRIMARY_VOL_DESC) {
-				udf_load_pvoldesc(sb, bh);
-			} else if (i == VDS_POS_LOGICAL_VOL_DESC) {
-				if (udf_load_logicalvol(sb, bh, fileset)) {
-					brelse(bh);
-					return 1;
-				}
-			} else if (i == VDS_POS_PARTITION_DESC) {
-				struct buffer_head *bh2 = NULL;
-				if (udf_load_partdesc(sb, bh)) {
-					brelse(bh);
-					return 1;
-				}
-				for (j = vds[i].block + 1;
-				     j <  vds[VDS_POS_TERMINATING_DESC].block;
-				     j++) {
-					bh2 = udf_read_tagged(sb, j, j, &ident);
-					gd = (struct generic_desc *)bh2->b_data;
-					if (ident == TAG_IDENT_PD)
-						if (udf_load_partdesc(sb,
-								      bh2)) {
-							brelse(bh);
-							brelse(bh2);
-							return 1;
-						}
-					brelse(bh2);
-				}
+		if (!vds[i].block)
+			continue;
+
+		bh = udf_read_tagged(sb, vds[i].block, vds[i].block,
+					&ident);
+
+		if (i == VDS_POS_PRIMARY_VOL_DESC)
+			udf_load_pvoldesc(sb, bh);
+		else if (i == VDS_POS_LOGICAL_VOL_DESC) {
+			if (udf_load_logicalvol(sb, bh, fileset)) {
+				brelse(bh);
+				return 1;
+			}
+		} else if (i == VDS_POS_PARTITION_DESC) {
+			struct buffer_head *bh2 = NULL;
+			if (udf_load_partdesc(sb, bh)) {
+				brelse(bh);
+				return 1;
+			}
+			for (j = vds[i].block + 1;
+				j <  vds[VDS_POS_TERMINATING_DESC].block;
+				j++) {
+				bh2 = udf_read_tagged(sb, j, j, &ident);
+				gd = (struct generic_desc *)bh2->b_data;
+				if (ident == TAG_IDENT_PD)
+					if (udf_load_partdesc(sb, bh2)) {
+						brelse(bh);
+						brelse(bh2);
+						return 1;
+					}
+				brelse(bh2);
 			}
-			brelse(bh);
 		}
+		brelse(bh);
 	}
 
 	return 0;
@@ -1478,6 +1425,7 @@ static int udf_process_sequence(struct super_block *sb, long block,
 static int udf_check_valid(struct super_block *sb, int novrs, int silent)
 {
 	long block;
+	struct udf_sb_info *sbi = UDF_SB(sb);
 
 	if (novrs) {
 		udf_debug("Validity check skipped because of novrs option\n");
@@ -1485,18 +1433,13 @@ static int udf_check_valid(struct super_block *sb, int novrs, int silent)
 	}
 	/* Check that it is NSR02 compliant */
 	/* Process any "CD-ROM Volume Descriptor Set" (ECMA 167 2/8.3.1) */
-	else {
-		block = udf_vrs(sb, silent);
-		if (block == -1) {
-			struct udf_sb_info *sbi = UDF_SB(sb);
-			udf_debug("Failed to read byte 32768. Assuming open "
-				  "disc. Skipping validity check\n");
-			if (!sbi->s_last_block)
-				sbi->s_last_block = udf_get_last_block(sb);
-			return 0;
-		} else
-			return !block;
-	}
+	block = udf_vrs(sb, silent);
+	if (block == -1)
+		udf_debug("Failed to read byte 32768. Assuming open "
+			  "disc. Skipping validity check\n");
+	if (block && !sbi->s_last_block)
+		sbi->s_last_block = udf_get_last_block(sb);
+	return !block;
 }
 
 static int udf_load_partition(struct super_block *sb, kernel_lb_addr *fileset)
@@ -1515,6 +1458,7 @@ static int udf_load_partition(struct super_block *sb, kernel_lb_addr *fileset)
 	for (i = 0; i < ARRAY_SIZE(sbi->s_anchor); i++) {
 		if (!sbi->s_anchor[i])
 			continue;
+
 		bh = udf_read_tagged(sb, sbi->s_anchor[i], sbi->s_anchor[i],
 				     &ident);
 		if (!bh)
@@ -1556,72 +1500,73 @@ static int udf_load_partition(struct super_block *sb, kernel_lb_addr *fileset)
 	for (i = 0; i < sbi->s_partitions; i++) {
 		kernel_lb_addr uninitialized_var(ino);
 		struct udf_part_map *map = &sbi->s_partmaps[i];
-		switch (map->s_partition_type) {
-		case UDF_VIRTUAL_MAP15:
-		case UDF_VIRTUAL_MAP20:
-			if (!sbi->s_last_block) {
-				sbi->s_last_block = udf_get_last_block(sb);
-				udf_find_anchor(sb);
-			}
 
-			if (!sbi->s_last_block) {
-				udf_debug("Unable to determine Lastblock (For "
-					  "Virtual Partition)\n");
-				return 1;
-			}
+		if (map->s_partition_type != UDF_VIRTUAL_MAP15 &&
+			map->s_partition_type != UDF_VIRTUAL_MAP20)
+			continue;
 
-			for (j = 0; j < sbi->s_partitions; j++) {
-				struct udf_part_map *map2 = &sbi->s_partmaps[j];
-				if (j != i &&
-				    map->s_volumeseqnum ==
-						map2->s_volumeseqnum &&
-				    map->s_partition_num ==
-						map2->s_partition_num) {
-					ino.partitionReferenceNum = j;
-					ino.logicalBlockNum =
-						sbi->s_last_block -
-							map2->s_partition_root;
-					break;
-				}
+		if (!sbi->s_last_block) {
+			sbi->s_last_block = udf_get_last_block(sb);
+			udf_find_anchor(sb);
+		}
+
+		if (!sbi->s_last_block) {
+			udf_debug("Unable to determine Lastblock (For "
+					"Virtual Partition)\n");
+			return 1;
+		}
+
+		for (j = 0; j < sbi->s_partitions; j++) {
+			struct udf_part_map *map2 = &sbi->s_partmaps[j];
+			if (j != i &&
+				map->s_volumeseqnum ==
+					map2->s_volumeseqnum &&
+				map->s_partition_num ==
+					map2->s_partition_num) {
+				ino.partitionReferenceNum = j;
+				ino.logicalBlockNum =
+					sbi->s_last_block -
+						map2->s_partition_root;
+				break;
 			}
+		}
 
-			if (j == sbi->s_partitions)
-				return 1;
+		if (j == sbi->s_partitions)
+			return 1;
 
-			sbi->s_vat_inode = udf_iget(sb, ino);
-			if (!sbi->s_vat_inode)
-				return 1;
+		sbi->s_vat_inode = udf_iget(sb, ino);
+		if (!sbi->s_vat_inode)
+			return 1;
 
-			if (map->s_partition_type == UDF_VIRTUAL_MAP15) {
-				map->s_type_specific.s_virtual.s_start_offset =
-					udf_ext0_offset(sbi->s_vat_inode);
-				map->s_type_specific.s_virtual.s_num_entries =
-					(sbi->s_vat_inode->i_size - 36) >> 2;
-			} else if (map->s_partition_type == UDF_VIRTUAL_MAP20) {
-				uint32_t pos;
-				struct virtualAllocationTable20 *vat20;
-
-				pos = udf_block_map(sbi->s_vat_inode, 0);
-				bh = sb_bread(sb, pos);
-				if (!bh)
-					return 1;
-				vat20 = (struct virtualAllocationTable20 *)
-					bh->b_data +
-					udf_ext0_offset(sbi->s_vat_inode);
-				map->s_type_specific.s_virtual.s_start_offset =
-					le16_to_cpu(vat20->lengthHeader) +
-					udf_ext0_offset(sbi->s_vat_inode);
-				map->s_type_specific.s_virtual.s_num_entries =
-					(sbi->s_vat_inode->i_size -
-					 map->s_type_specific.s_virtual.
-							s_start_offset) >> 2;
-				brelse(bh);
-			}
-			map->s_partition_root = udf_get_pblock(sb, 0, i, 0);
-			map->s_partition_len =
-				sbi->s_partmaps[ino.partitionReferenceNum].
-								s_partition_len;
+		if (map->s_partition_type == UDF_VIRTUAL_MAP15) {
+			map->s_type_specific.s_virtual.s_start_offset =
+				udf_ext0_offset(sbi->s_vat_inode);
+			map->s_type_specific.s_virtual.s_num_entries =
+				(sbi->s_vat_inode->i_size - 36) >> 2;
+		} else if (map->s_partition_type == UDF_VIRTUAL_MAP20) {
+			uint32_t pos;
+			struct virtualAllocationTable20 *vat20;
+
+			pos = udf_block_map(sbi->s_vat_inode, 0);
+			bh = sb_bread(sb, pos);
+			if (!bh)
+				return 1;
+			vat20 = (struct virtualAllocationTable20 *)
+				bh->b_data +
+				udf_ext0_offset(sbi->s_vat_inode);
+			map->s_type_specific.s_virtual.s_start_offset =
+				le16_to_cpu(vat20->lengthHeader) +
+				udf_ext0_offset(sbi->s_vat_inode);
+			map->s_type_specific.s_virtual.s_num_entries =
+				(sbi->s_vat_inode->i_size -
+					map->s_type_specific.s_virtual.
+						s_start_offset) >> 2;
+			brelse(bh);
 		}
+		map->s_partition_root = udf_get_pblock(sb, 0, i, 0);
+		map->s_partition_len =
+			sbi->s_partmaps[ino.partitionReferenceNum].
+							s_partition_len;
 	}
 	return 0;
 }
@@ -1630,65 +1575,62 @@ static void udf_open_lvid(struct super_block *sb)
 {
 	struct udf_sb_info *sbi = UDF_SB(sb);
 	struct buffer_head *bh = sbi->s_lvid_bh;
-	if (bh) {
-		kernel_timestamp cpu_time;
-		struct logicalVolIntegrityDesc *lvid =
-				(struct logicalVolIntegrityDesc *)bh->b_data;
-		struct logicalVolIntegrityDescImpUse *lvidiu =
-							udf_sb_lvidiu(sbi);
+	struct logicalVolIntegrityDesc *lvid;
+	struct logicalVolIntegrityDescImpUse *lvidiu;
+	if (!bh)
+		return;
 
-		lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX;
-		lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX;
-		if (udf_time_to_stamp(&cpu_time, CURRENT_TIME))
-			lvid->recordingDateAndTime = cpu_to_lets(cpu_time);
-		lvid->integrityType = LVID_INTEGRITY_TYPE_OPEN;
+	lvid = (struct logicalVolIntegrityDesc *)bh->b_data;
+	lvidiu = udf_sb_lvidiu(sbi);
 
-		lvid->descTag.descCRC = cpu_to_le16(
-			udf_crc((char *)lvid + sizeof(tag),
-				le16_to_cpu(lvid->descTag.descCRCLength),
-				0));
+	lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX;
+	lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX;
+	udf_time_to_disk_stamp(&lvid->recordingDateAndTime,
+				CURRENT_TIME);
+	lvid->integrityType = LVID_INTEGRITY_TYPE_OPEN;
 
-		lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag);
-		mark_buffer_dirty(bh);
-	}
+	lvid->descTag.descCRC = cpu_to_le16(
+		udf_crc((char *)lvid + sizeof(tag),
+			le16_to_cpu(lvid->descTag.descCRCLength), 0));
+
+	lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag);
+	mark_buffer_dirty(bh);
 }
 
 static void udf_close_lvid(struct super_block *sb)
 {
-	kernel_timestamp cpu_time;
 	struct udf_sb_info *sbi = UDF_SB(sb);
 	struct buffer_head *bh = sbi->s_lvid_bh;
 	struct logicalVolIntegrityDesc *lvid;
+	struct logicalVolIntegrityDescImpUse *lvidiu;
 
 	if (!bh)
 		return;
 
 	lvid = (struct logicalVolIntegrityDesc *)bh->b_data;
 
-	if (lvid->integrityType == LVID_INTEGRITY_TYPE_OPEN) {
-		struct logicalVolIntegrityDescImpUse *lvidiu =
-							udf_sb_lvidiu(sbi);
-		lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX;
-		lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX;
-		if (udf_time_to_stamp(&cpu_time, CURRENT_TIME))
-			lvid->recordingDateAndTime = cpu_to_lets(cpu_time);
-		if (UDF_MAX_WRITE_VERSION > le16_to_cpu(lvidiu->maxUDFWriteRev))
-			lvidiu->maxUDFWriteRev =
-					cpu_to_le16(UDF_MAX_WRITE_VERSION);
-		if (sbi->s_udfrev > le16_to_cpu(lvidiu->minUDFReadRev))
-			lvidiu->minUDFReadRev = cpu_to_le16(sbi->s_udfrev);
-		if (sbi->s_udfrev > le16_to_cpu(lvidiu->minUDFWriteRev))
-			lvidiu->minUDFWriteRev = cpu_to_le16(sbi->s_udfrev);
-		lvid->integrityType = cpu_to_le32(LVID_INTEGRITY_TYPE_CLOSE);
-
-		lvid->descTag.descCRC = cpu_to_le16(
-			udf_crc((char *)lvid + sizeof(tag),
-				le16_to_cpu(lvid->descTag.descCRCLength),
-				0));
-
-		lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag);
-		mark_buffer_dirty(bh);
-	}
+	if (lvid->integrityType != LVID_INTEGRITY_TYPE_OPEN)
+		return;
+
+	lvidiu = udf_sb_lvidiu(sbi);
+	lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX;
+	lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX;
+	udf_time_to_disk_stamp(&lvid->recordingDateAndTime, CURRENT_TIME);
+	if (UDF_MAX_WRITE_VERSION > le16_to_cpu(lvidiu->maxUDFWriteRev))
+		lvidiu->maxUDFWriteRev = cpu_to_le16(UDF_MAX_WRITE_VERSION);
+	if (sbi->s_udfrev > le16_to_cpu(lvidiu->minUDFReadRev))
+		lvidiu->minUDFReadRev = cpu_to_le16(sbi->s_udfrev);
+	if (sbi->s_udfrev > le16_to_cpu(lvidiu->minUDFWriteRev))
+		lvidiu->minUDFWriteRev = cpu_to_le16(sbi->s_udfrev);
+	lvid->integrityType = cpu_to_le32(LVID_INTEGRITY_TYPE_CLOSE);
+
+	lvid->descTag.descCRC = cpu_to_le16(
+		udf_crc((char *)lvid + sizeof(tag),
+			le16_to_cpu(lvid->descTag.descCRCLength),
+			0));
+
+	lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag);
+	mark_buffer_dirty(bh);
 }
 
 static void udf_sb_free_bitmap(struct udf_bitmap *bitmap)
@@ -1708,22 +1650,6 @@ static void udf_sb_free_bitmap(struct udf_bitmap *bitmap)
 		vfree(bitmap);
 }
 
-/*
- * udf_read_super
- *
- * PURPOSE
- *	Complete the specified super block.
- *
- * PRE-CONDITIONS
- *	sb			Pointer to superblock to complete - never NULL.
- *	sb->s_dev		Device to read suberblock from.
- *	options			Pointer to mount options.
- *	silent			Silent flag.
- *
- * HISTORY
- *	July 1, 1997 - Andrew E. Mileski
- *	Written, tested, and released.
- */
 static int udf_fill_super(struct super_block *sb, void *options, int silent)
 {
 	int i;
@@ -1776,8 +1702,11 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
 	sbi->s_nls_map = uopt.nls_map;
 
 	/* Set the block size for all transfers */
-	if (!udf_set_blocksize(sb, uopt.blocksize))
+	if (!sb_min_blocksize(sb, uopt.blocksize)) {
+		udf_debug("Bad block size (%d)\n", uopt.blocksize);
+		printk(KERN_ERR "udf: bad block size (%d)\n", uopt.blocksize);
 		goto error_out;
+	}
 
 	if (uopt.session == 0xFFFFFFFF)
 		sbi->s_session = udf_get_last_session(sb);
@@ -1856,12 +1785,12 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
 	}
 
 	if (!silent) {
-		kernel_timestamp ts;
-		udf_time_to_stamp(&ts, sbi->s_record_time);
+		timestamp ts;
+		udf_time_to_disk_stamp(&ts, sbi->s_record_time);
 		udf_info("UDF: Mounting volume '%s', "
 			 "timestamp %04u/%02u/%02u %02u:%02u (%x)\n",
-			 sbi->s_volume_ident, ts.year, ts.month, ts.day,
-			 ts.hour, ts.minute, ts.typeAndTimezone);
+			 sbi->s_volume_ident, le16_to_cpu(ts.year), ts.month, ts.day,
+			 ts.hour, ts.minute, le16_to_cpu(ts.typeAndTimezone));
 	}
 	if (!(sb->s_flags & MS_RDONLY))
 		udf_open_lvid(sb);
@@ -1920,8 +1849,8 @@ error_out:
 	return -EINVAL;
 }
 
-void udf_error(struct super_block *sb, const char *function,
-	       const char *fmt, ...)
+static void udf_error(struct super_block *sb, const char *function,
+		      const char *fmt, ...)
 {
 	va_list args;
 
@@ -1948,19 +1877,6 @@ void udf_warning(struct super_block *sb, const char *function,
 	       sb->s_id, function, error_buf);
 }
 
-/*
- * udf_put_super
- *
- * PURPOSE
- *	Prepare for destruction of the superblock.
- *
- * DESCRIPTION
- *	Called before the filesystem is unmounted.
- *
- * HISTORY
- *	July 1, 1997 - Andrew E. Mileski
- *	Written, tested, and released.
- */
 static void udf_put_super(struct super_block *sb)
 {
 	int i;
@@ -1996,19 +1912,6 @@ static void udf_put_super(struct super_block *sb)
 	sb->s_fs_info = NULL;
 }
 
-/*
- * udf_stat_fs
- *
- * PURPOSE
- *	Return info about the filesystem.
- *
- * DESCRIPTION
- *	Called by sys_statfs()
- *
- * HISTORY
- *	July 1, 1997 - Andrew E. Mileski
- *	Written, tested, and released.
- */
 static int udf_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	struct super_block *sb = dentry->d_sb;
@@ -2035,10 +1938,6 @@ static int udf_statfs(struct dentry *dentry, struct kstatfs *buf)
 	return 0;
 }
 
-static unsigned char udf_bitmap_lookup[16] = {
-	0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4
-};
-
 static unsigned int udf_count_free_bitmap(struct super_block *sb,
 					  struct udf_bitmap *bitmap)
 {
@@ -2048,7 +1947,6 @@ static unsigned int udf_count_free_bitmap(struct super_block *sb,
 	int block = 0, newblock;
 	kernel_lb_addr loc;
 	uint32_t bytes;
-	uint8_t value;
 	uint8_t *ptr;
 	uint16_t ident;
 	struct spaceBitmapDesc *bm;
@@ -2074,13 +1972,10 @@ static unsigned int udf_count_free_bitmap(struct super_block *sb,
 	ptr = (uint8_t *)bh->b_data;
 
 	while (bytes > 0) {
-		while ((bytes > 0) && (index < sb->s_blocksize)) {
-			value = ptr[index];
-			accum += udf_bitmap_lookup[value & 0x0f];
-			accum += udf_bitmap_lookup[value >> 4];
-			index++;
-			bytes--;
-		}
+		u32 cur_bytes = min_t(u32, bytes, sb->s_blocksize - index);
+		accum += bitmap_weight((const unsigned long *)(ptr + index),
+					cur_bytes * 8);
+		bytes -= cur_bytes;
 		if (bytes) {
 			brelse(bh);
 			newblock = udf_get_lb_pblock(sb, loc, ++block);
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c
index 6ec99221e50c..c3265e1385d4 100644
--- a/fs/udf/symlink.c
+++ b/fs/udf/symlink.c
@@ -23,7 +23,6 @@
 #include <asm/uaccess.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
-#include <linux/udf_fs.h>
 #include <linux/time.h>
 #include <linux/mm.h>
 #include <linux/stat.h>
diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
index fe61be17cdab..65e19b4f9424 100644
--- a/fs/udf/truncate.c
+++ b/fs/udf/truncate.c
@@ -22,7 +22,6 @@
 #include "udfdecl.h"
 #include <linux/fs.h>
 #include <linux/mm.h>
-#include <linux/udf_fs.h>
 #include <linux/buffer_head.h>
 
 #include "udf_i.h"
@@ -180,6 +179,24 @@ void udf_discard_prealloc(struct inode *inode)
 	brelse(epos.bh);
 }
 
+static void udf_update_alloc_ext_desc(struct inode *inode,
+				      struct extent_position *epos,
+				      u32 lenalloc)
+{
+	struct super_block *sb = inode->i_sb;
+	struct udf_sb_info *sbi = UDF_SB(sb);
+
+	struct allocExtDesc *aed = (struct allocExtDesc *) (epos->bh->b_data);
+	int len = sizeof(struct allocExtDesc);
+
+	aed->lengthAllocDescs =	cpu_to_le32(lenalloc);
+	if (!UDF_QUERY_FLAG(sb, UDF_FLAG_STRICT) || sbi->s_udfrev >= 0x0201)
+		len += lenalloc;
+
+	udf_update_tag(epos->bh->b_data, len);
+	mark_buffer_dirty_inode(epos->bh, inode);
+}
+
 void udf_truncate_extents(struct inode *inode)
 {
 	struct extent_position epos;
@@ -187,7 +204,6 @@ void udf_truncate_extents(struct inode *inode)
 	uint32_t elen, nelen = 0, indirect_ext_len = 0, lenalloc;
 	int8_t etype;
 	struct super_block *sb = inode->i_sb;
-	struct udf_sb_info *sbi = UDF_SB(sb);
 	sector_t first_block = inode->i_size >> sb->s_blocksize_bits, offset;
 	loff_t byte_offset;
 	int adsize;
@@ -224,35 +240,15 @@ void udf_truncate_extents(struct inode *inode)
 				if (indirect_ext_len) {
 					/* We managed to free all extents in the
 					 * indirect extent - free it too */
-					if (!epos.bh)
-						BUG();
+					BUG_ON(!epos.bh);
 					udf_free_blocks(sb, inode, epos.block,
 							0, indirect_ext_len);
-				} else {
-					if (!epos.bh) {
-						iinfo->i_lenAlloc =
-								lenalloc;
-						mark_inode_dirty(inode);
-					} else {
-						struct allocExtDesc *aed =
-							(struct allocExtDesc *)
-							(epos.bh->b_data);
-						int len =
-						    sizeof(struct allocExtDesc);
-
-						aed->lengthAllocDescs =
-						    cpu_to_le32(lenalloc);
-						if (!UDF_QUERY_FLAG(sb,
-							UDF_FLAG_STRICT) ||
-						    sbi->s_udfrev >= 0x0201)
-							len += lenalloc;
-
-						udf_update_tag(epos.bh->b_data,
-								len);
-						mark_buffer_dirty_inode(
-								epos.bh, inode);
-					}
-				}
+				} else if (!epos.bh) {
+					iinfo->i_lenAlloc = lenalloc;
+					mark_inode_dirty(inode);
+				} else
+					udf_update_alloc_ext_desc(inode,
+							&epos, lenalloc);
 				brelse(epos.bh);
 				epos.offset = sizeof(struct allocExtDesc);
 				epos.block = eloc;
@@ -272,29 +268,14 @@ void udf_truncate_extents(struct inode *inode)
 		}
 
 		if (indirect_ext_len) {
-			if (!epos.bh)
-				BUG();
+			BUG_ON(!epos.bh);
 			udf_free_blocks(sb, inode, epos.block, 0,
 					indirect_ext_len);
-		} else {
-			if (!epos.bh) {
-				iinfo->i_lenAlloc = lenalloc;
-				mark_inode_dirty(inode);
-			} else {
-				struct allocExtDesc *aed =
-				    (struct allocExtDesc *)(epos.bh->b_data);
-				aed->lengthAllocDescs = cpu_to_le32(lenalloc);
-				if (!UDF_QUERY_FLAG(sb, UDF_FLAG_STRICT) ||
-				    sbi->s_udfrev >= 0x0201)
-					udf_update_tag(epos.bh->b_data,
-						lenalloc +
-						sizeof(struct allocExtDesc));
-				else
-					udf_update_tag(epos.bh->b_data,
-						sizeof(struct allocExtDesc));
-				mark_buffer_dirty_inode(epos.bh, inode);
-			}
-		}
+		} else if (!epos.bh) {
+			iinfo->i_lenAlloc = lenalloc;
+			mark_inode_dirty(inode);
+		} else
+			udf_update_alloc_ext_desc(inode, &epos, lenalloc);
 	} else if (inode->i_size) {
 		if (byte_offset) {
 			kernel_long_ad extent;
diff --git a/fs/udf/udf_i.h b/fs/udf/udf_i.h
index ccc52f16bf7d..4f86b1d98a5d 100644
--- a/fs/udf/udf_i.h
+++ b/fs/udf/udf_i.h
@@ -1,10 +1,32 @@
-#ifndef __LINUX_UDF_I_H
-#define __LINUX_UDF_I_H
+#ifndef _UDF_I_H
+#define _UDF_I_H
+
+struct udf_inode_info {
+	struct timespec		i_crtime;
+	/* Physical address of inode */
+	kernel_lb_addr		i_location;
+	__u64			i_unique;
+	__u32			i_lenEAttr;
+	__u32			i_lenAlloc;
+	__u64			i_lenExtents;
+	__u32			i_next_alloc_block;
+	__u32			i_next_alloc_goal;
+	unsigned		i_alloc_type : 3;
+	unsigned		i_efe : 1;	/* extendedFileEntry */
+	unsigned		i_use : 1;	/* unallocSpaceEntry */
+	unsigned		i_strat4096 : 1;
+	unsigned		reserved : 26;
+	union {
+		short_ad	*i_sad;
+		long_ad		*i_lad;
+		__u8		*i_data;
+	} i_ext;
+	struct inode vfs_inode;
+};
 
-#include <linux/udf_fs_i.h>
 static inline struct udf_inode_info *UDF_I(struct inode *inode)
 {
 	return list_entry(inode, struct udf_inode_info, vfs_inode);
 }
 
-#endif /* !defined(_LINUX_UDF_I_H) */
+#endif /* _UDF_I_H) */
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h
index 737d1c604eea..fb2a6e9f8dac 100644
--- a/fs/udf/udf_sb.h
+++ b/fs/udf/udf_sb.h
@@ -1,6 +1,8 @@
 #ifndef __LINUX_UDF_SB_H
 #define __LINUX_UDF_SB_H
 
+#include <linux/mutex.h>
+
 /* Since UDF 2.01 is ISO 13346 based... */
 #define UDF_SUPER_MAGIC			0x15013346
 
@@ -38,6 +40,97 @@
 #define UDF_PART_FLAG_REWRITABLE	0x0040
 #define UDF_PART_FLAG_OVERWRITABLE	0x0080
 
+#define UDF_MAX_BLOCK_LOADED	8
+
+#define UDF_TYPE1_MAP15			0x1511U
+#define UDF_VIRTUAL_MAP15		0x1512U
+#define UDF_VIRTUAL_MAP20		0x2012U
+#define UDF_SPARABLE_MAP15		0x1522U
+
+#pragma pack(1) /* XXX(hch): Why?  This file just defines in-core structures */
+
+struct udf_sparing_data {
+	__u16	s_packet_len;
+	struct buffer_head *s_spar_map[4];
+};
+
+struct udf_virtual_data {
+	__u32	s_num_entries;
+	__u16	s_start_offset;
+};
+
+struct udf_bitmap {
+	__u32			s_extLength;
+	__u32			s_extPosition;
+	__u16			s_nr_groups;
+	struct buffer_head 	**s_block_bitmap;
+};
+
+struct udf_part_map {
+	union {
+		struct udf_bitmap	*s_bitmap;
+		struct inode		*s_table;
+	} s_uspace;
+	union {
+		struct udf_bitmap	*s_bitmap;
+		struct inode		*s_table;
+	} s_fspace;
+	__u32	s_partition_root;
+	__u32	s_partition_len;
+	__u16	s_partition_type;
+	__u16	s_partition_num;
+	union {
+		struct udf_sparing_data s_sparing;
+		struct udf_virtual_data s_virtual;
+	} s_type_specific;
+	__u32	(*s_partition_func)(struct super_block *, __u32, __u16, __u32);
+	__u16	s_volumeseqnum;
+	__u16	s_partition_flags;
+};
+
+#pragma pack()
+
+struct udf_sb_info {
+	struct udf_part_map	*s_partmaps;
+	__u8			s_volume_ident[32];
+
+	/* Overall info */
+	__u16			s_partitions;
+	__u16			s_partition;
+
+	/* Sector headers */
+	__s32			s_session;
+	__u32			s_anchor[4];
+	__u32			s_last_block;
+
+	struct buffer_head	*s_lvid_bh;
+
+	/* Default permissions */
+	mode_t			s_umask;
+	gid_t			s_gid;
+	uid_t			s_uid;
+
+	/* Root Info */
+	struct timespec		s_record_time;
+
+	/* Fileset Info */
+	__u16			s_serial_number;
+
+	/* highest UDF revision we have recorded to this media */
+	__u16			s_udfrev;
+
+	/* Miscellaneous flags */
+	__u32			s_flags;
+
+	/* Encoding info */
+	struct nls_table	*s_nls_map;
+
+	/* VAT inode */
+	struct inode		*s_vat_inode;
+
+	struct mutex		s_alloc_mutex;
+};
+
 static inline struct udf_sb_info *UDF_SB(struct super_block *sb)
 {
 	return sb->s_fs_info;
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 681dc2b66cdb..2cb2f5de4245 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -1,17 +1,37 @@
 #ifndef __UDF_DECL_H
 #define __UDF_DECL_H
 
-#include <linux/udf_fs.h>
 #include "ecma_167.h"
 #include "osta_udf.h"
 
 #include <linux/fs.h>
 #include <linux/types.h>
-#include <linux/udf_fs_i.h>
-#include <linux/udf_fs_sb.h>
 #include <linux/buffer_head.h>
+#include <linux/udf_fs_i.h>
 
+#include "udf_sb.h"
 #include "udfend.h"
+#include "udf_i.h"
+
+#define UDF_PREALLOCATE
+#define UDF_DEFAULT_PREALLOC_BLOCKS	8
+
+#undef UDFFS_DEBUG
+
+#ifdef UDFFS_DEBUG
+#define udf_debug(f, a...) \
+do { \
+	printk(KERN_DEBUG "UDF-fs DEBUG %s:%d:%s: ", \
+		__FILE__, __LINE__, __func__); \
+	printk(f, ##a); \
+} while (0)
+#else
+#define udf_debug(f, a...) /**/
+#endif
+
+#define udf_info(f, a...) \
+	printk(KERN_INFO "UDF-fs INFO " f, ##a);
+
 
 #define udf_fixed_to_variable(x) ( ( ( (x) >> 5 ) * 39 ) + ( (x) & 0x0000001F ) )
 #define udf_variable_to_fixed(x) ( ( ( (x) / 39 ) << 5 ) + ( (x) % 39 ) )
@@ -23,16 +43,24 @@
 #define UDF_NAME_LEN		256
 #define UDF_PATH_LEN		1023
 
-#define udf_file_entry_alloc_offset(inode)\
-	(UDF_I(inode)->i_use ?\
-		sizeof(struct unallocSpaceEntry) :\
-		((UDF_I(inode)->i_efe ?\
-			sizeof(struct extendedFileEntry) :\
-			sizeof(struct fileEntry)) + UDF_I(inode)->i_lenEAttr))
-
-#define udf_ext0_offset(inode)\
-	(UDF_I(inode)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB ?\
-		udf_file_entry_alloc_offset(inode) : 0)
+static inline size_t udf_file_entry_alloc_offset(struct inode *inode)
+{
+	struct udf_inode_info *iinfo = UDF_I(inode);
+	if (iinfo->i_use)
+		return sizeof(struct unallocSpaceEntry);
+	else if (iinfo->i_efe)
+		return sizeof(struct extendedFileEntry) + iinfo->i_lenEAttr;
+	else
+		return sizeof(struct fileEntry) + iinfo->i_lenEAttr;
+}
+
+static inline size_t udf_ext0_offset(struct inode *inode)
+{
+	if (UDF_I(inode)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
+		return udf_file_entry_alloc_offset(inode);
+	else
+		return 0;
+}
 
 #define udf_get_lb_pblock(sb,loc,offset) udf_get_pblock((sb), (loc).logicalBlockNum, (loc).partitionReferenceNum, (offset))
 
@@ -83,7 +111,6 @@ struct extent_position {
 };
 
 /* super.c */
-extern void udf_error(struct super_block *, const char *, const char *, ...);
 extern void udf_warning(struct super_block *, const char *, const char *, ...);
 
 /* namei.c */
@@ -157,7 +184,7 @@ extern int udf_get_filename(struct super_block *, uint8_t *, uint8_t *, int);
 extern int udf_put_filename(struct super_block *, const uint8_t *, uint8_t *,
 			    int);
 extern int udf_build_ustr(struct ustr *, dstring *, int);
-extern int udf_CS0toUTF8(struct ustr *, struct ustr *);
+extern int udf_CS0toUTF8(struct ustr *, const struct ustr *);
 
 /* ialloc.c */
 extern void udf_free_inode(struct inode *);
@@ -192,10 +219,11 @@ extern long_ad *udf_get_filelongad(uint8_t *, int, uint32_t *, int);
 extern short_ad *udf_get_fileshortad(uint8_t *, int, uint32_t *, int);
 
 /* crc.c */
-extern uint16_t udf_crc(uint8_t *, uint32_t, uint16_t);
+extern uint16_t udf_crc(const uint8_t *, uint32_t, uint16_t);
 
 /* udftime.c */
-extern time_t *udf_stamp_to_time(time_t *, long *, kernel_timestamp);
-extern kernel_timestamp *udf_time_to_stamp(kernel_timestamp *, struct timespec);
+extern struct timespec *udf_disk_stamp_to_time(struct timespec *dest,
+						timestamp src);
+extern timestamp *udf_time_to_disk_stamp(timestamp *dest, struct timespec src);
 
 #endif				/* __UDF_DECL_H */
diff --git a/fs/udf/udfend.h b/fs/udf/udfend.h
index c4bd1203f857..489f52fb428c 100644
--- a/fs/udf/udfend.h
+++ b/fs/udf/udfend.h
@@ -24,17 +24,6 @@ static inline lb_addr cpu_to_lelb(kernel_lb_addr in)
 	return out;
 }
 
-static inline kernel_timestamp lets_to_cpu(timestamp in)
-{
-	kernel_timestamp out;
-
-	memcpy(&out, &in, sizeof(timestamp));
-	out.typeAndTimezone = le16_to_cpu(in.typeAndTimezone);
-	out.year = le16_to_cpu(in.year);
-
-	return out;
-}
-
 static inline short_ad lesa_to_cpu(short_ad in)
 {
 	short_ad out;
@@ -85,15 +74,4 @@ static inline kernel_extent_ad leea_to_cpu(extent_ad in)
 	return out;
 }
 
-static inline timestamp cpu_to_lets(kernel_timestamp in)
-{
-	timestamp out;
-
-	memcpy(&out, &in, sizeof(timestamp));
-	out.typeAndTimezone = cpu_to_le16(in.typeAndTimezone);
-	out.year = cpu_to_le16(in.year);
-
-	return out;
-}
-
 #endif /* __UDF_ENDIAN_H */
diff --git a/fs/udf/udftime.c b/fs/udf/udftime.c
index ce595732ba6f..5f811655c9b5 100644
--- a/fs/udf/udftime.c
+++ b/fs/udf/udftime.c
@@ -85,39 +85,38 @@ extern struct timezone sys_tz;
 #define SECS_PER_HOUR	(60 * 60)
 #define SECS_PER_DAY	(SECS_PER_HOUR * 24)
 
-time_t *udf_stamp_to_time(time_t *dest, long *dest_usec, kernel_timestamp src)
+struct timespec *udf_disk_stamp_to_time(struct timespec *dest, timestamp src)
 {
 	int yday;
-	uint8_t type = src.typeAndTimezone >> 12;
+	u16 typeAndTimezone = le16_to_cpu(src.typeAndTimezone);
+	u16 year = le16_to_cpu(src.year);
+	uint8_t type = typeAndTimezone >> 12;
 	int16_t offset;
 
 	if (type == 1) {
-		offset = src.typeAndTimezone << 4;
+		offset = typeAndTimezone << 4;
 		/* sign extent offset */
 		offset = (offset >> 4);
 		if (offset == -2047) /* unspecified offset */
 			offset = 0;
-	} else {
+	} else
 		offset = 0;
-	}
 
-	if ((src.year < EPOCH_YEAR) ||
-	    (src.year >= EPOCH_YEAR + MAX_YEAR_SECONDS)) {
-		*dest = -1;
-		*dest_usec = -1;
+	if ((year < EPOCH_YEAR) ||
+	    (year >= EPOCH_YEAR + MAX_YEAR_SECONDS)) {
 		return NULL;
 	}
-	*dest = year_seconds[src.year - EPOCH_YEAR];
-	*dest -= offset * 60;
+	dest->tv_sec = year_seconds[year - EPOCH_YEAR];
+	dest->tv_sec -= offset * 60;
 
-	yday = ((__mon_yday[__isleap(src.year)][src.month - 1]) + src.day - 1);
-	*dest += (((yday * 24) + src.hour) * 60 + src.minute) * 60 + src.second;
-	*dest_usec = src.centiseconds * 10000 +
-			src.hundredsOfMicroseconds * 100 + src.microseconds;
+	yday = ((__mon_yday[__isleap(year)][src.month - 1]) + src.day - 1);
+	dest->tv_sec += (((yday * 24) + src.hour) * 60 + src.minute) * 60 + src.second;
+	dest->tv_nsec = 1000 * (src.centiseconds * 10000 +
+			src.hundredsOfMicroseconds * 100 + src.microseconds);
 	return dest;
 }
 
-kernel_timestamp *udf_time_to_stamp(kernel_timestamp *dest, struct timespec ts)
+timestamp *udf_time_to_disk_stamp(timestamp *dest, struct timespec ts)
 {
 	long int days, rem, y;
 	const unsigned short int *ip;
@@ -128,7 +127,7 @@ kernel_timestamp *udf_time_to_stamp(kernel_timestamp *dest, struct timespec ts)
 	if (!dest)
 		return NULL;
 
-	dest->typeAndTimezone = 0x1000 | (offset & 0x0FFF);
+	dest->typeAndTimezone = cpu_to_le16(0x1000 | (offset & 0x0FFF));
 
 	ts.tv_sec += offset * 60;
 	days = ts.tv_sec / SECS_PER_DAY;
@@ -151,7 +150,7 @@ kernel_timestamp *udf_time_to_stamp(kernel_timestamp *dest, struct timespec ts)
 			 - LEAPS_THRU_END_OF(y - 1));
 		y = yg;
 	}
-	dest->year = y;
+	dest->year = cpu_to_le16(y);
 	ip = __mon_yday[__isleap(y)];
 	for (y = 11; days < (long int)ip[y]; --y)
 		continue;
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index e533b11703bf..f5872ae325a3 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -23,7 +23,6 @@
 #include <linux/kernel.h>
 #include <linux/string.h>	/* for memset */
 #include <linux/nls.h>
-#include <linux/udf_fs.h>
 
 #include "udf_sb.h"
 
@@ -49,14 +48,16 @@ int udf_build_ustr(struct ustr *dest, dstring *ptr, int size)
 {
 	int usesize;
 
-	if ((!dest) || (!ptr) || (!size))
+	if (!dest || !ptr || !size)
 		return -1;
+	BUG_ON(size < 2);
 
-	memset(dest, 0, sizeof(struct ustr));
-	usesize = (size > UDF_NAME_LEN) ? UDF_NAME_LEN : size;
+	usesize = min_t(size_t, ptr[size - 1], sizeof(dest->u_name));
+	usesize = min(usesize, size - 2);
 	dest->u_cmpID = ptr[0];
-	dest->u_len = ptr[size - 1];
-	memcpy(dest->u_name, ptr + 1, usesize - 1);
+	dest->u_len = usesize;
+	memcpy(dest->u_name, ptr + 1, usesize);
+	memset(dest->u_name + usesize, 0, sizeof(dest->u_name) - usesize);
 
 	return 0;
 }
@@ -83,9 +84,6 @@ static int udf_build_ustr_exact(struct ustr *dest, dstring *ptr, int exactsize)
  * PURPOSE
  *	Convert OSTA Compressed Unicode to the UTF-8 equivalent.
  *
- * DESCRIPTION
- *	This routine is only called by udf_filldir().
- *
  * PRE-CONDITIONS
  *	utf			Pointer to UTF-8 output buffer.
  *	ocu			Pointer to OSTA Compressed Unicode input buffer
@@ -99,43 +97,39 @@ static int udf_build_ustr_exact(struct ustr *dest, dstring *ptr, int exactsize)
  *	November 12, 1997 - Andrew E. Mileski
  *	Written, tested, and released.
  */
-int udf_CS0toUTF8(struct ustr *utf_o, struct ustr *ocu_i)
+int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i)
 {
-	uint8_t *ocu;
-	uint32_t c;
+	const uint8_t *ocu;
 	uint8_t cmp_id, ocu_len;
 	int i;
 
-	ocu = ocu_i->u_name;
-
 	ocu_len = ocu_i->u_len;
-	cmp_id = ocu_i->u_cmpID;
-	utf_o->u_len = 0;
-
 	if (ocu_len == 0) {
 		memset(utf_o, 0, sizeof(struct ustr));
-		utf_o->u_cmpID = 0;
-		utf_o->u_len = 0;
 		return 0;
 	}
 
-	if ((cmp_id != 8) && (cmp_id != 16)) {
+	cmp_id = ocu_i->u_cmpID;
+	if (cmp_id != 8 && cmp_id != 16) {
+		memset(utf_o, 0, sizeof(struct ustr));
 		printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n",
 		       cmp_id, ocu_i->u_name);
 		return 0;
 	}
 
+	ocu = ocu_i->u_name;
+	utf_o->u_len = 0;
 	for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN - 3));) {
 
 		/* Expand OSTA compressed Unicode to Unicode */
-		c = ocu[i++];
+		uint32_t c = ocu[i++];
 		if (cmp_id == 16)
 			c = (c << 8) | ocu[i++];
 
 		/* Compress Unicode to UTF-8 */
-		if (c < 0x80U) {
+		if (c < 0x80U)
 			utf_o->u_name[utf_o->u_len++] = (uint8_t)c;
-		} else if (c < 0x800U) {
+		else if (c < 0x800U) {
 			utf_o->u_name[utf_o->u_len++] =
 						(uint8_t)(0xc0 | (c >> 6));
 			utf_o->u_name[utf_o->u_len++] =
@@ -255,35 +249,32 @@ error_out:
 }
 
 static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
-			struct ustr *ocu_i)
+			const struct ustr *ocu_i)
 {
-	uint8_t *ocu;
-	uint32_t c;
+	const uint8_t *ocu;
 	uint8_t cmp_id, ocu_len;
 	int i;
 
-	ocu = ocu_i->u_name;
 
 	ocu_len = ocu_i->u_len;
-	cmp_id = ocu_i->u_cmpID;
-	utf_o->u_len = 0;
-
 	if (ocu_len == 0) {
 		memset(utf_o, 0, sizeof(struct ustr));
-		utf_o->u_cmpID = 0;
-		utf_o->u_len = 0;
 		return 0;
 	}
 
-	if ((cmp_id != 8) && (cmp_id != 16)) {
+	cmp_id = ocu_i->u_cmpID;
+	if (cmp_id != 8 && cmp_id != 16) {
+		memset(utf_o, 0, sizeof(struct ustr));
 		printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n",
 		       cmp_id, ocu_i->u_name);
 		return 0;
 	}
 
+	ocu = ocu_i->u_name;
+	utf_o->u_len = 0;
 	for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN - 3));) {
 		/* Expand OSTA compressed Unicode to Unicode */
-		c = ocu[i++];
+		uint32_t c = ocu[i++];
 		if (cmp_id == 16)
 			c = (c << 8) | ocu[i++];
 
diff --git a/fs/xfs/linux-2.6/sema.h b/fs/xfs/linux-2.6/sema.h
index 2009e6d922ce..3abe7e9ceb33 100644
--- a/fs/xfs/linux-2.6/sema.h
+++ b/fs/xfs/linux-2.6/sema.h
@@ -20,8 +20,8 @@
 
 #include <linux/time.h>
 #include <linux/wait.h>
+#include <linux/semaphore.h>
 #include <asm/atomic.h>
-#include <asm/semaphore.h>
 
 /*
  * sema_t structure just maps to struct semaphore in Linux kernel.
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index e0519529c26c..169e6c062794 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1532,9 +1532,9 @@ xfs_vm_bmap(
 	struct xfs_inode	*ip = XFS_I(inode);
 
 	xfs_itrace_entry(XFS_I(inode));
-	xfs_rwlock(ip, VRWLOCK_READ);
+	xfs_ilock(ip, XFS_IOLOCK_SHARED);
 	xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF);
-	xfs_rwunlock(ip, VRWLOCK_READ);
+	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
 	return generic_block_bmap(mapping, block, xfs_get_blocks);
 }
 
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index ca4f66c4de16..66a9a9e76cbe 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -66,7 +66,7 @@ xfs_fs_encode_fh(
 	int			len;
 
 	/* Directories don't need their parent encoded, they have ".." */
-	if (S_ISDIR(inode->i_mode))
+	if (S_ISDIR(inode->i_mode) || !connectable)
 		fileid_type = FILEID_INO32_GEN;
 	else
 		fileid_type = FILEID_INO32_GEN_PARENT;
@@ -213,17 +213,16 @@ xfs_fs_get_parent(
 	struct dentry		*child)
 {
 	int			error;
-	bhv_vnode_t		*cvp;
+	struct xfs_inode	*cip;
 	struct dentry		*parent;
 
-	cvp = NULL;
-	error = xfs_lookup(XFS_I(child->d_inode), &dotdot, &cvp);
+	error = xfs_lookup(XFS_I(child->d_inode), &dotdot, &cip);
 	if (unlikely(error))
 		return ERR_PTR(-error);
 
-	parent = d_alloc_anon(vn_to_inode(cvp));
+	parent = d_alloc_anon(cip->i_vnode);
 	if (unlikely(!parent)) {
-		VN_RELE(cvp);
+		iput(cip->i_vnode);
 		return ERR_PTR(-ENOMEM);
 	}
 	return parent;
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index edab1ffbb163..05905246434d 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -469,16 +469,11 @@ xfs_file_open_exec(
 	struct inode	*inode)
 {
 	struct xfs_mount *mp = XFS_M(inode->i_sb);
+	struct xfs_inode *ip = XFS_I(inode);
 
-	if (unlikely(mp->m_flags & XFS_MOUNT_DMAPI)) {
-		if (DM_EVENT_ENABLED(XFS_I(inode), DM_EVENT_READ)) {
-			bhv_vnode_t *vp = vn_from_inode(inode);
-
-			return -XFS_SEND_DATA(mp, DM_EVENT_READ,
-						vp, 0, 0, 0, NULL);
-		}
-	}
-
+	if (unlikely(mp->m_flags & XFS_MOUNT_DMAPI) &&
+	             DM_EVENT_ENABLED(ip, DM_EVENT_READ))
+		return -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, 0, 0, 0, NULL);
 	return 0;
 }
 #endif /* HAVE_FOP_OPEN_EXEC */
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index ac6d34cc355d..1eefe61f0e10 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -17,18 +17,7 @@
  */
 #include "xfs.h"
 #include "xfs_vnodeops.h"
-
-/*
- * The following six includes are needed so that we can include
- * xfs_inode.h.  What a mess..
- */
 #include "xfs_bmap_btree.h"
-#include "xfs_inum.h"
-#include "xfs_dir2.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
-
 #include "xfs_inode.h"
 
 int  fs_noerr(void) { return 0; }
@@ -42,11 +31,10 @@ xfs_tosspages(
 	xfs_off_t	last,
 	int		fiopt)
 {
-	bhv_vnode_t	*vp = XFS_ITOV(ip);
-	struct inode	*inode = vn_to_inode(vp);
+	struct address_space *mapping = ip->i_vnode->i_mapping;
 
-	if (VN_CACHED(vp))
-		truncate_inode_pages(inode->i_mapping, first);
+	if (mapping->nrpages)
+		truncate_inode_pages(mapping, first);
 }
 
 int
@@ -56,15 +44,14 @@ xfs_flushinval_pages(
 	xfs_off_t	last,
 	int		fiopt)
 {
-	bhv_vnode_t	*vp = XFS_ITOV(ip);
-	struct inode	*inode = vn_to_inode(vp);
+	struct address_space *mapping = ip->i_vnode->i_mapping;
 	int		ret = 0;
 
-	if (VN_CACHED(vp)) {
+	if (mapping->nrpages) {
 		xfs_iflags_clear(ip, XFS_ITRUNCATED);
-		ret = filemap_write_and_wait(inode->i_mapping);
+		ret = filemap_write_and_wait(mapping);
 		if (!ret)
-			truncate_inode_pages(inode->i_mapping, first);
+			truncate_inode_pages(mapping, first);
 	}
 	return ret;
 }
@@ -77,17 +64,16 @@ xfs_flush_pages(
 	uint64_t	flags,
 	int		fiopt)
 {
-	bhv_vnode_t	*vp = XFS_ITOV(ip);
-	struct inode	*inode = vn_to_inode(vp);
+	struct address_space *mapping = ip->i_vnode->i_mapping;
 	int		ret = 0;
 	int		ret2;
 
-	if (VN_DIRTY(vp)) {
+	if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
 		xfs_iflags_clear(ip, XFS_ITRUNCATED);
-		ret = filemap_fdatawrite(inode->i_mapping);
+		ret = filemap_fdatawrite(mapping);
 		if (flags & XFS_B_ASYNC)
 			return ret;
-		ret2 = filemap_fdatawait(inode->i_mapping);
+		ret2 = filemap_fdatawait(mapping);
 		if (!ret)
 			ret = ret2;
 	}
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index a9952e490ac9..7252963281db 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -651,314 +651,6 @@ xfs_attrmulti_by_handle(
 	return -error;
 }
 
-/* prototypes for a few of the stack-hungry cases that have
- * their own functions.  Functions are defined after their use
- * so gcc doesn't get fancy and inline them with -03 */
-
-STATIC int
-xfs_ioc_space(
-	struct xfs_inode	*ip,
-	struct inode		*inode,
-	struct file		*filp,
-	int			flags,
-	unsigned int		cmd,
-	void			__user *arg);
-
-STATIC int
-xfs_ioc_bulkstat(
-	xfs_mount_t		*mp,
-	unsigned int		cmd,
-	void			__user *arg);
-
-STATIC int
-xfs_ioc_fsgeometry_v1(
-	xfs_mount_t		*mp,
-	void			__user *arg);
-
-STATIC int
-xfs_ioc_fsgeometry(
-	xfs_mount_t		*mp,
-	void			__user *arg);
-
-STATIC int
-xfs_ioc_xattr(
-	xfs_inode_t		*ip,
-	struct file		*filp,
-	unsigned int		cmd,
-	void			__user *arg);
-
-STATIC int
-xfs_ioc_fsgetxattr(
-	xfs_inode_t		*ip,
-	int			attr,
-	void			__user *arg);
-
-STATIC int
-xfs_ioc_getbmap(
-	struct xfs_inode	*ip,
-	int			flags,
-	unsigned int		cmd,
-	void			__user *arg);
-
-STATIC int
-xfs_ioc_getbmapx(
-	struct xfs_inode	*ip,
-	void			__user *arg);
-
-int
-xfs_ioctl(
-	xfs_inode_t		*ip,
-	struct file		*filp,
-	int			ioflags,
-	unsigned int		cmd,
-	void			__user *arg)
-{
-	struct inode		*inode = filp->f_path.dentry->d_inode;
-	xfs_mount_t		*mp = ip->i_mount;
-	int			error;
-
-	xfs_itrace_entry(XFS_I(inode));
-	switch (cmd) {
-
-	case XFS_IOC_ALLOCSP:
-	case XFS_IOC_FREESP:
-	case XFS_IOC_RESVSP:
-	case XFS_IOC_UNRESVSP:
-	case XFS_IOC_ALLOCSP64:
-	case XFS_IOC_FREESP64:
-	case XFS_IOC_RESVSP64:
-	case XFS_IOC_UNRESVSP64:
-		/*
-		 * Only allow the sys admin to reserve space unless
-		 * unwritten extents are enabled.
-		 */
-		if (!XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb) &&
-		    !capable(CAP_SYS_ADMIN))
-			return -EPERM;
-
-		return xfs_ioc_space(ip, inode, filp, ioflags, cmd, arg);
-
-	case XFS_IOC_DIOINFO: {
-		struct dioattr	da;
-		xfs_buftarg_t	*target =
-			XFS_IS_REALTIME_INODE(ip) ?
-			mp->m_rtdev_targp : mp->m_ddev_targp;
-
-		da.d_mem = da.d_miniosz = 1 << target->bt_sshift;
-		da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);
-
-		if (copy_to_user(arg, &da, sizeof(da)))
-			return -XFS_ERROR(EFAULT);
-		return 0;
-	}
-
-	case XFS_IOC_FSBULKSTAT_SINGLE:
-	case XFS_IOC_FSBULKSTAT:
-	case XFS_IOC_FSINUMBERS:
-		return xfs_ioc_bulkstat(mp, cmd, arg);
-
-	case XFS_IOC_FSGEOMETRY_V1:
-		return xfs_ioc_fsgeometry_v1(mp, arg);
-
-	case XFS_IOC_FSGEOMETRY:
-		return xfs_ioc_fsgeometry(mp, arg);
-
-	case XFS_IOC_GETVERSION:
-		return put_user(inode->i_generation, (int __user *)arg);
-
-	case XFS_IOC_FSGETXATTR:
-		return xfs_ioc_fsgetxattr(ip, 0, arg);
-	case XFS_IOC_FSGETXATTRA:
-		return xfs_ioc_fsgetxattr(ip, 1, arg);
-	case XFS_IOC_GETXFLAGS:
-	case XFS_IOC_SETXFLAGS:
-	case XFS_IOC_FSSETXATTR:
-		return xfs_ioc_xattr(ip, filp, cmd, arg);
-
-	case XFS_IOC_FSSETDM: {
-		struct fsdmidata	dmi;
-
-		if (copy_from_user(&dmi, arg, sizeof(dmi)))
-			return -XFS_ERROR(EFAULT);
-
-		error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask,
-				dmi.fsd_dmstate);
-		return -error;
-	}
-
-	case XFS_IOC_GETBMAP:
-	case XFS_IOC_GETBMAPA:
-		return xfs_ioc_getbmap(ip, ioflags, cmd, arg);
-
-	case XFS_IOC_GETBMAPX:
-		return xfs_ioc_getbmapx(ip, arg);
-
-	case XFS_IOC_FD_TO_HANDLE:
-	case XFS_IOC_PATH_TO_HANDLE:
-	case XFS_IOC_PATH_TO_FSHANDLE:
-		return xfs_find_handle(cmd, arg);
-
-	case XFS_IOC_OPEN_BY_HANDLE:
-		return xfs_open_by_handle(mp, arg, filp, inode);
-
-	case XFS_IOC_FSSETDM_BY_HANDLE:
-		return xfs_fssetdm_by_handle(mp, arg, inode);
-
-	case XFS_IOC_READLINK_BY_HANDLE:
-		return xfs_readlink_by_handle(mp, arg, inode);
-
-	case XFS_IOC_ATTRLIST_BY_HANDLE:
-		return xfs_attrlist_by_handle(mp, arg, inode);
-
-	case XFS_IOC_ATTRMULTI_BY_HANDLE:
-		return xfs_attrmulti_by_handle(mp, arg, inode);
-
-	case XFS_IOC_SWAPEXT: {
-		error = xfs_swapext((struct xfs_swapext __user *)arg);
-		return -error;
-	}
-
-	case XFS_IOC_FSCOUNTS: {
-		xfs_fsop_counts_t out;
-
-		error = xfs_fs_counts(mp, &out);
-		if (error)
-			return -error;
-
-		if (copy_to_user(arg, &out, sizeof(out)))
-			return -XFS_ERROR(EFAULT);
-		return 0;
-	}
-
-	case XFS_IOC_SET_RESBLKS: {
-		xfs_fsop_resblks_t inout;
-		__uint64_t	   in;
-
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
-
-		if (copy_from_user(&inout, arg, sizeof(inout)))
-			return -XFS_ERROR(EFAULT);
-
-		/* input parameter is passed in resblks field of structure */
-		in = inout.resblks;
-		error = xfs_reserve_blocks(mp, &in, &inout);
-		if (error)
-			return -error;
-
-		if (copy_to_user(arg, &inout, sizeof(inout)))
-			return -XFS_ERROR(EFAULT);
-		return 0;
-	}
-
-	case XFS_IOC_GET_RESBLKS: {
-		xfs_fsop_resblks_t out;
-
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
-
-		error = xfs_reserve_blocks(mp, NULL, &out);
-		if (error)
-			return -error;
-
-		if (copy_to_user(arg, &out, sizeof(out)))
-			return -XFS_ERROR(EFAULT);
-
-		return 0;
-	}
-
-	case XFS_IOC_FSGROWFSDATA: {
-		xfs_growfs_data_t in;
-
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
-
-		if (copy_from_user(&in, arg, sizeof(in)))
-			return -XFS_ERROR(EFAULT);
-
-		error = xfs_growfs_data(mp, &in);
-		return -error;
-	}
-
-	case XFS_IOC_FSGROWFSLOG: {
-		xfs_growfs_log_t in;
-
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
-
-		if (copy_from_user(&in, arg, sizeof(in)))
-			return -XFS_ERROR(EFAULT);
-
-		error = xfs_growfs_log(mp, &in);
-		return -error;
-	}
-
-	case XFS_IOC_FSGROWFSRT: {
-		xfs_growfs_rt_t in;
-
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
-
-		if (copy_from_user(&in, arg, sizeof(in)))
-			return -XFS_ERROR(EFAULT);
-
-		error = xfs_growfs_rt(mp, &in);
-		return -error;
-	}
-
-	case XFS_IOC_FREEZE:
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
-
-		if (inode->i_sb->s_frozen == SB_UNFROZEN)
-			freeze_bdev(inode->i_sb->s_bdev);
-		return 0;
-
-	case XFS_IOC_THAW:
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
-		if (inode->i_sb->s_frozen != SB_UNFROZEN)
-			thaw_bdev(inode->i_sb->s_bdev, inode->i_sb);
-		return 0;
-
-	case XFS_IOC_GOINGDOWN: {
-		__uint32_t in;
-
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
-
-		if (get_user(in, (__uint32_t __user *)arg))
-			return -XFS_ERROR(EFAULT);
-
-		error = xfs_fs_goingdown(mp, in);
-		return -error;
-	}
-
-	case XFS_IOC_ERROR_INJECTION: {
-		xfs_error_injection_t in;
-
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
-
-		if (copy_from_user(&in, arg, sizeof(in)))
-			return -XFS_ERROR(EFAULT);
-
-		error = xfs_errortag_add(in.errtag, mp);
-		return -error;
-	}
-
-	case XFS_IOC_ERROR_CLEARALL:
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
-
-		error = xfs_errortag_clearall(mp, 1);
-		return -error;
-
-	default:
-		return -ENOTTY;
-	}
-}
-
 STATIC int
 xfs_ioc_space(
 	struct xfs_inode	*ip,
@@ -1332,3 +1024,257 @@ xfs_ioc_getbmapx(
 
 	return 0;
 }
+
+int
+xfs_ioctl(
+	xfs_inode_t		*ip,
+	struct file		*filp,
+	int			ioflags,
+	unsigned int		cmd,
+	void			__user *arg)
+{
+	struct inode		*inode = filp->f_path.dentry->d_inode;
+	xfs_mount_t		*mp = ip->i_mount;
+	int			error;
+
+	xfs_itrace_entry(XFS_I(inode));
+	switch (cmd) {
+
+	case XFS_IOC_ALLOCSP:
+	case XFS_IOC_FREESP:
+	case XFS_IOC_RESVSP:
+	case XFS_IOC_UNRESVSP:
+	case XFS_IOC_ALLOCSP64:
+	case XFS_IOC_FREESP64:
+	case XFS_IOC_RESVSP64:
+	case XFS_IOC_UNRESVSP64:
+		/*
+		 * Only allow the sys admin to reserve space unless
+		 * unwritten extents are enabled.
+		 */
+		if (!xfs_sb_version_hasextflgbit(&mp->m_sb) &&
+		    !capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		return xfs_ioc_space(ip, inode, filp, ioflags, cmd, arg);
+
+	case XFS_IOC_DIOINFO: {
+		struct dioattr	da;
+		xfs_buftarg_t	*target =
+			XFS_IS_REALTIME_INODE(ip) ?
+			mp->m_rtdev_targp : mp->m_ddev_targp;
+
+		da.d_mem = da.d_miniosz = 1 << target->bt_sshift;
+		da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);
+
+		if (copy_to_user(arg, &da, sizeof(da)))
+			return -XFS_ERROR(EFAULT);
+		return 0;
+	}
+
+	case XFS_IOC_FSBULKSTAT_SINGLE:
+	case XFS_IOC_FSBULKSTAT:
+	case XFS_IOC_FSINUMBERS:
+		return xfs_ioc_bulkstat(mp, cmd, arg);
+
+	case XFS_IOC_FSGEOMETRY_V1:
+		return xfs_ioc_fsgeometry_v1(mp, arg);
+
+	case XFS_IOC_FSGEOMETRY:
+		return xfs_ioc_fsgeometry(mp, arg);
+
+	case XFS_IOC_GETVERSION:
+		return put_user(inode->i_generation, (int __user *)arg);
+
+	case XFS_IOC_FSGETXATTR:
+		return xfs_ioc_fsgetxattr(ip, 0, arg);
+	case XFS_IOC_FSGETXATTRA:
+		return xfs_ioc_fsgetxattr(ip, 1, arg);
+	case XFS_IOC_GETXFLAGS:
+	case XFS_IOC_SETXFLAGS:
+	case XFS_IOC_FSSETXATTR:
+		return xfs_ioc_xattr(ip, filp, cmd, arg);
+
+	case XFS_IOC_FSSETDM: {
+		struct fsdmidata	dmi;
+
+		if (copy_from_user(&dmi, arg, sizeof(dmi)))
+			return -XFS_ERROR(EFAULT);
+
+		error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask,
+				dmi.fsd_dmstate);
+		return -error;
+	}
+
+	case XFS_IOC_GETBMAP:
+	case XFS_IOC_GETBMAPA:
+		return xfs_ioc_getbmap(ip, ioflags, cmd, arg);
+
+	case XFS_IOC_GETBMAPX:
+		return xfs_ioc_getbmapx(ip, arg);
+
+	case XFS_IOC_FD_TO_HANDLE:
+	case XFS_IOC_PATH_TO_HANDLE:
+	case XFS_IOC_PATH_TO_FSHANDLE:
+		return xfs_find_handle(cmd, arg);
+
+	case XFS_IOC_OPEN_BY_HANDLE:
+		return xfs_open_by_handle(mp, arg, filp, inode);
+
+	case XFS_IOC_FSSETDM_BY_HANDLE:
+		return xfs_fssetdm_by_handle(mp, arg, inode);
+
+	case XFS_IOC_READLINK_BY_HANDLE:
+		return xfs_readlink_by_handle(mp, arg, inode);
+
+	case XFS_IOC_ATTRLIST_BY_HANDLE:
+		return xfs_attrlist_by_handle(mp, arg, inode);
+
+	case XFS_IOC_ATTRMULTI_BY_HANDLE:
+		return xfs_attrmulti_by_handle(mp, arg, inode);
+
+	case XFS_IOC_SWAPEXT: {
+		error = xfs_swapext((struct xfs_swapext __user *)arg);
+		return -error;
+	}
+
+	case XFS_IOC_FSCOUNTS: {
+		xfs_fsop_counts_t out;
+
+		error = xfs_fs_counts(mp, &out);
+		if (error)
+			return -error;
+
+		if (copy_to_user(arg, &out, sizeof(out)))
+			return -XFS_ERROR(EFAULT);
+		return 0;
+	}
+
+	case XFS_IOC_SET_RESBLKS: {
+		xfs_fsop_resblks_t inout;
+		__uint64_t	   in;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (copy_from_user(&inout, arg, sizeof(inout)))
+			return -XFS_ERROR(EFAULT);
+
+		/* input parameter is passed in resblks field of structure */
+		in = inout.resblks;
+		error = xfs_reserve_blocks(mp, &in, &inout);
+		if (error)
+			return -error;
+
+		if (copy_to_user(arg, &inout, sizeof(inout)))
+			return -XFS_ERROR(EFAULT);
+		return 0;
+	}
+
+	case XFS_IOC_GET_RESBLKS: {
+		xfs_fsop_resblks_t out;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		error = xfs_reserve_blocks(mp, NULL, &out);
+		if (error)
+			return -error;
+
+		if (copy_to_user(arg, &out, sizeof(out)))
+			return -XFS_ERROR(EFAULT);
+
+		return 0;
+	}
+
+	case XFS_IOC_FSGROWFSDATA: {
+		xfs_growfs_data_t in;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (copy_from_user(&in, arg, sizeof(in)))
+			return -XFS_ERROR(EFAULT);
+
+		error = xfs_growfs_data(mp, &in);
+		return -error;
+	}
+
+	case XFS_IOC_FSGROWFSLOG: {
+		xfs_growfs_log_t in;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (copy_from_user(&in, arg, sizeof(in)))
+			return -XFS_ERROR(EFAULT);
+
+		error = xfs_growfs_log(mp, &in);
+		return -error;
+	}
+
+	case XFS_IOC_FSGROWFSRT: {
+		xfs_growfs_rt_t in;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (copy_from_user(&in, arg, sizeof(in)))
+			return -XFS_ERROR(EFAULT);
+
+		error = xfs_growfs_rt(mp, &in);
+		return -error;
+	}
+
+	case XFS_IOC_FREEZE:
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (inode->i_sb->s_frozen == SB_UNFROZEN)
+			freeze_bdev(inode->i_sb->s_bdev);
+		return 0;
+
+	case XFS_IOC_THAW:
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+		if (inode->i_sb->s_frozen != SB_UNFROZEN)
+			thaw_bdev(inode->i_sb->s_bdev, inode->i_sb);
+		return 0;
+
+	case XFS_IOC_GOINGDOWN: {
+		__uint32_t in;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (get_user(in, (__uint32_t __user *)arg))
+			return -XFS_ERROR(EFAULT);
+
+		error = xfs_fs_goingdown(mp, in);
+		return -error;
+	}
+
+	case XFS_IOC_ERROR_INJECTION: {
+		xfs_error_injection_t in;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (copy_from_user(&in, arg, sizeof(in)))
+			return -XFS_ERROR(EFAULT);
+
+		error = xfs_errortag_add(in.errtag, mp);
+		return -error;
+	}
+
+	case XFS_IOC_ERROR_CLEARALL:
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		error = xfs_errortag_clearall(mp, 1);
+		return -error;
+
+	default:
+		return -ENOTTY;
+	}
+}
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index cc4abd3daa49..53f8feb28e58 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -62,12 +62,11 @@ void
 xfs_synchronize_atime(
 	xfs_inode_t	*ip)
 {
-	bhv_vnode_t	*vp;
+	struct inode	*inode = ip->i_vnode;
 
-	vp = XFS_ITOV_NULL(ip);
-	if (vp) {
-		ip->i_d.di_atime.t_sec = (__int32_t)vp->i_atime.tv_sec;
-		ip->i_d.di_atime.t_nsec = (__int32_t)vp->i_atime.tv_nsec;
+	if (inode) {
+		ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
+		ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
 	}
 }
 
@@ -80,11 +79,10 @@ void
 xfs_mark_inode_dirty_sync(
 	xfs_inode_t	*ip)
 {
-	bhv_vnode_t	*vp;
+	struct inode	*inode = ip->i_vnode;
 
-	vp = XFS_ITOV_NULL(ip);
-	if (vp)
-		mark_inode_dirty_sync(vn_to_inode(vp));
+	if (inode)
+		mark_inode_dirty_sync(inode);
 }
 
 /*
@@ -215,48 +213,36 @@ xfs_validate_fields(
  */
 STATIC int
 xfs_init_security(
-	bhv_vnode_t	*vp,
+	struct inode	*inode,
 	struct inode	*dir)
 {
-	struct inode	*ip = vn_to_inode(vp);
+	struct xfs_inode *ip = XFS_I(inode);
 	size_t		length;
 	void		*value;
 	char		*name;
 	int		error;
 
-	error = security_inode_init_security(ip, dir, &name, &value, &length);
+	error = security_inode_init_security(inode, dir, &name,
+					     &value, &length);
 	if (error) {
 		if (error == -EOPNOTSUPP)
 			return 0;
 		return -error;
 	}
 
-	error = xfs_attr_set(XFS_I(ip), name, value,
-			length, ATTR_SECURE);
+	error = xfs_attr_set(ip, name, value, length, ATTR_SECURE);
 	if (!error)
-		xfs_iflags_set(XFS_I(ip), XFS_IMODIFIED);
+		xfs_iflags_set(ip, XFS_IMODIFIED);
 
 	kfree(name);
 	kfree(value);
 	return error;
 }
 
-/*
- * Determine whether a process has a valid fs_struct (kernel daemons
- * like knfsd don't have an fs_struct).
- *
- * XXX(hch):  nfsd is broken, better fix it instead.
- */
-STATIC_INLINE int
-xfs_has_fs_struct(struct task_struct *task)
-{
-	return (task->fs != init_task.fs);
-}
-
 STATIC void
 xfs_cleanup_inode(
 	struct inode	*dir,
-	bhv_vnode_t	*vp,
+	struct inode	*inode,
 	struct dentry	*dentry,
 	int		mode)
 {
@@ -267,14 +253,14 @@ xfs_cleanup_inode(
 	 * xfs_init_security we must back out.
 	 * ENOSPC can hit here, among other things.
 	 */
-	teardown.d_inode = vn_to_inode(vp);
+	teardown.d_inode = inode;
 	teardown.d_name = dentry->d_name;
 
 	if (S_ISDIR(mode))
 		xfs_rmdir(XFS_I(dir), &teardown);
 	else
 		xfs_remove(XFS_I(dir), &teardown);
-	VN_RELE(vp);
+	iput(inode);
 }
 
 STATIC int
@@ -284,8 +270,8 @@ xfs_vn_mknod(
 	int		mode,
 	dev_t		rdev)
 {
-	struct inode	*ip;
-	bhv_vnode_t	*vp = NULL, *dvp = vn_from_inode(dir);
+	struct inode	*inode;
+	struct xfs_inode *ip = NULL;
 	xfs_acl_t	*default_acl = NULL;
 	attrexists_t	test_default_acl = _ACL_DEFAULT_EXISTS;
 	int		error;
@@ -297,59 +283,65 @@ xfs_vn_mknod(
 	if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff))
 		return -EINVAL;
 
-	if (unlikely(test_default_acl && test_default_acl(dvp))) {
+	if (test_default_acl && test_default_acl(dir)) {
 		if (!_ACL_ALLOC(default_acl)) {
 			return -ENOMEM;
 		}
-		if (!_ACL_GET_DEFAULT(dvp, default_acl)) {
+		if (!_ACL_GET_DEFAULT(dir, default_acl)) {
 			_ACL_FREE(default_acl);
 			default_acl = NULL;
 		}
 	}
 
-	if (IS_POSIXACL(dir) && !default_acl && xfs_has_fs_struct(current))
+	if (IS_POSIXACL(dir) && !default_acl)
 		mode &= ~current->fs->umask;
 
 	switch (mode & S_IFMT) {
-	case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:
+	case S_IFCHR:
+	case S_IFBLK:
+	case S_IFIFO:
+	case S_IFSOCK:
 		rdev = sysv_encode_dev(rdev);
 	case S_IFREG:
-		error = xfs_create(XFS_I(dir), dentry, mode, rdev, &vp, NULL);
+		error = xfs_create(XFS_I(dir), dentry, mode, rdev, &ip, NULL);
 		break;
 	case S_IFDIR:
-		error = xfs_mkdir(XFS_I(dir), dentry, mode, &vp, NULL);
+		error = xfs_mkdir(XFS_I(dir), dentry, mode, &ip, NULL);
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 
-	if (unlikely(!error)) {
-		error = xfs_init_security(vp, dir);
-		if (error)
-			xfs_cleanup_inode(dir, vp, dentry, mode);
-	}
+	if (unlikely(error))
+		goto out_free_acl;
 
-	if (unlikely(default_acl)) {
-		if (!error) {
-			error = _ACL_INHERIT(vp, mode, default_acl);
-			if (!error)
-				xfs_iflags_set(XFS_I(vp), XFS_IMODIFIED);
-			else
-				xfs_cleanup_inode(dir, vp, dentry, mode);
-		}
+	inode = ip->i_vnode;
+
+	error = xfs_init_security(inode, dir);
+	if (unlikely(error))
+		goto out_cleanup_inode;
+
+	if (default_acl) {
+		error = _ACL_INHERIT(inode, mode, default_acl);
+		if (unlikely(error))
+			goto out_cleanup_inode;
+		xfs_iflags_set(ip, XFS_IMODIFIED);
 		_ACL_FREE(default_acl);
 	}
 
-	if (likely(!error)) {
-		ASSERT(vp);
-		ip = vn_to_inode(vp);
 
-		if (S_ISDIR(mode))
-			xfs_validate_fields(ip);
-		d_instantiate(dentry, ip);
-		xfs_validate_fields(dir);
-	}
+	if (S_ISDIR(mode))
+		xfs_validate_fields(inode);
+	d_instantiate(dentry, inode);
+	xfs_validate_fields(dir);
+	return -error;
+
+ out_cleanup_inode:
+	xfs_cleanup_inode(dir, inode, dentry, mode);
+ out_free_acl:
+	if (default_acl)
+		_ACL_FREE(default_acl);
 	return -error;
 }
 
@@ -378,13 +370,13 @@ xfs_vn_lookup(
 	struct dentry	*dentry,
 	struct nameidata *nd)
 {
-	bhv_vnode_t	*cvp;
+	struct xfs_inode *cip;
 	int		error;
 
 	if (dentry->d_name.len >= MAXNAMELEN)
 		return ERR_PTR(-ENAMETOOLONG);
 
-	error = xfs_lookup(XFS_I(dir), dentry, &cvp);
+	error = xfs_lookup(XFS_I(dir), dentry, &cip);
 	if (unlikely(error)) {
 		if (unlikely(error != ENOENT))
 			return ERR_PTR(-error);
@@ -392,7 +384,7 @@ xfs_vn_lookup(
 		return NULL;
 	}
 
-	return d_splice_alias(vn_to_inode(cvp), dentry);
+	return d_splice_alias(cip->i_vnode, dentry);
 }
 
 STATIC int
@@ -401,23 +393,22 @@ xfs_vn_link(
 	struct inode	*dir,
 	struct dentry	*dentry)
 {
-	struct inode	*ip;	/* inode of guy being linked to */
-	bhv_vnode_t	*vp;	/* vp of name being linked */
+	struct inode	*inode;	/* inode of guy being linked to */
 	int		error;
 
-	ip = old_dentry->d_inode;	/* inode being linked to */
-	vp = vn_from_inode(ip);
+	inode = old_dentry->d_inode;
 
-	VN_HOLD(vp);
-	error = xfs_link(XFS_I(dir), vp, dentry);
+	igrab(inode);
+	error = xfs_link(XFS_I(dir), XFS_I(inode), dentry);
 	if (unlikely(error)) {
-		VN_RELE(vp);
-	} else {
-		xfs_iflags_set(XFS_I(dir), XFS_IMODIFIED);
-		xfs_validate_fields(ip);
-		d_instantiate(dentry, ip);
+		iput(inode);
+		return -error;
 	}
-	return -error;
+
+	xfs_iflags_set(XFS_I(dir), XFS_IMODIFIED);
+	xfs_validate_fields(inode);
+	d_instantiate(dentry, inode);
+	return 0;
 }
 
 STATIC int
@@ -444,29 +435,33 @@ xfs_vn_symlink(
 	struct dentry	*dentry,
 	const char	*symname)
 {
-	struct inode	*ip;
-	bhv_vnode_t	*cvp;	/* used to lookup symlink to put in dentry */
+	struct inode	*inode;
+	struct xfs_inode *cip = NULL;
 	int		error;
 	mode_t		mode;
 
-	cvp = NULL;
-
 	mode = S_IFLNK |
 		(irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO);
 
 	error = xfs_symlink(XFS_I(dir), dentry, (char *)symname, mode,
-			    &cvp, NULL);
-	if (likely(!error && cvp)) {
-		error = xfs_init_security(cvp, dir);
-		if (likely(!error)) {
-			ip = vn_to_inode(cvp);
-			d_instantiate(dentry, ip);
-			xfs_validate_fields(dir);
-			xfs_validate_fields(ip);
-		} else {
-			xfs_cleanup_inode(dir, cvp, dentry, 0);
-		}
-	}
+			    &cip, NULL);
+	if (unlikely(error))
+		goto out;
+
+	inode = cip->i_vnode;
+
+	error = xfs_init_security(inode, dir);
+	if (unlikely(error))
+		goto out_cleanup_inode;
+
+	d_instantiate(dentry, inode);
+	xfs_validate_fields(dir);
+	xfs_validate_fields(inode);
+	return 0;
+
+ out_cleanup_inode:
+	xfs_cleanup_inode(dir, inode, dentry, 0);
+ out:
 	return -error;
 }
 
@@ -494,12 +489,9 @@ xfs_vn_rename(
 	struct dentry	*ndentry)
 {
 	struct inode	*new_inode = ndentry->d_inode;
-	bhv_vnode_t	*tvp;	/* target directory */
 	int		error;
 
-	tvp = vn_from_inode(ndir);
-
-	error = xfs_rename(XFS_I(odir), odentry, tvp, ndentry);
+	error = xfs_rename(XFS_I(odir), odentry, XFS_I(ndir), ndentry);
 	if (likely(!error)) {
 		if (new_inode)
 			xfs_validate_fields(new_inode);
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 3ca39c4e5d2a..e5143323e71f 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -99,7 +99,6 @@
 /*
  * Feature macros (disable/enable)
  */
-#undef  HAVE_REFCACHE	/* reference cache not needed for NFS in 2.6 */
 #define HAVE_SPLICE	/* a splice(2) exists in 2.6, but not in 2.4 */
 #ifdef CONFIG_SMP
 #define HAVE_PERCPU_SB	/* per cpu superblock counters are a 2.6 feature */
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 166353388490..1d95dca96cfe 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -176,7 +176,6 @@ xfs_read(
 {
 	struct file		*file = iocb->ki_filp;
 	struct inode		*inode = file->f_mapping->host;
-	bhv_vnode_t		*vp = XFS_ITOV(ip);
 	xfs_mount_t		*mp = ip->i_mount;
 	size_t			size = 0;
 	ssize_t			ret = 0;
@@ -228,11 +227,11 @@ xfs_read(
 	xfs_ilock(ip, XFS_IOLOCK_SHARED);
 
 	if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
-		bhv_vrwlock_t locktype = VRWLOCK_READ;
 		int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags);
+		int iolock = XFS_IOLOCK_SHARED;
 
-		ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, vp, *offset, size,
-					dmflags, &locktype);
+		ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *offset, size,
+					dmflags, &iolock);
 		if (ret) {
 			xfs_iunlock(ip, XFS_IOLOCK_SHARED);
 			if (unlikely(ioflags & IO_ISDIRECT))
@@ -242,7 +241,7 @@ xfs_read(
 	}
 
 	if (unlikely(ioflags & IO_ISDIRECT)) {
-		if (VN_CACHED(vp))
+		if (inode->i_mapping->nrpages)
 			ret = xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK),
 						    -1, FI_REMAPF_LOCKED);
 		mutex_unlock(&inode->i_mutex);
@@ -276,7 +275,6 @@ xfs_splice_read(
 	int			flags,
 	int			ioflags)
 {
-	bhv_vnode_t		*vp = XFS_ITOV(ip);
 	xfs_mount_t		*mp = ip->i_mount;
 	ssize_t			ret;
 
@@ -287,11 +285,11 @@ xfs_splice_read(
 	xfs_ilock(ip, XFS_IOLOCK_SHARED);
 
 	if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
-		bhv_vrwlock_t locktype = VRWLOCK_READ;
+		int iolock = XFS_IOLOCK_SHARED;
 		int error;
 
-		error = XFS_SEND_DATA(mp, DM_EVENT_READ, vp, *ppos, count,
-					FILP_DELAY_FLAG(infilp), &locktype);
+		error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *ppos, count,
+					FILP_DELAY_FLAG(infilp), &iolock);
 		if (error) {
 			xfs_iunlock(ip, XFS_IOLOCK_SHARED);
 			return -error;
@@ -317,7 +315,6 @@ xfs_splice_write(
 	int			flags,
 	int			ioflags)
 {
-	bhv_vnode_t		*vp = XFS_ITOV(ip);
 	xfs_mount_t		*mp = ip->i_mount;
 	ssize_t			ret;
 	struct inode		*inode = outfilp->f_mapping->host;
@@ -330,11 +327,11 @@ xfs_splice_write(
 	xfs_ilock(ip, XFS_IOLOCK_EXCL);
 
 	if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) {
-		bhv_vrwlock_t locktype = VRWLOCK_WRITE;
+		int iolock = XFS_IOLOCK_EXCL;
 		int error;
 
-		error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, vp, *ppos, count,
-					FILP_DELAY_FLAG(outfilp), &locktype);
+		error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, *ppos, count,
+					FILP_DELAY_FLAG(outfilp), &iolock);
 		if (error) {
 			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
 			return -error;
@@ -573,14 +570,12 @@ xfs_write(
 	struct file		*file = iocb->ki_filp;
 	struct address_space	*mapping = file->f_mapping;
 	struct inode		*inode = mapping->host;
-	bhv_vnode_t		*vp = XFS_ITOV(xip);
 	unsigned long		segs = nsegs;
 	xfs_mount_t		*mp;
 	ssize_t			ret = 0, error = 0;
 	xfs_fsize_t		isize, new_size;
 	int			iolock;
 	int			eventsent = 0;
-	bhv_vrwlock_t		locktype;
 	size_t			ocount = 0, count;
 	loff_t			pos;
 	int			need_i_mutex;
@@ -607,11 +602,9 @@ xfs_write(
 relock:
 	if (ioflags & IO_ISDIRECT) {
 		iolock = XFS_IOLOCK_SHARED;
-		locktype = VRWLOCK_WRITE_DIRECT;
 		need_i_mutex = 0;
 	} else {
 		iolock = XFS_IOLOCK_EXCL;
-		locktype = VRWLOCK_WRITE;
 		need_i_mutex = 1;
 		mutex_lock(&inode->i_mutex);
 	}
@@ -634,9 +627,8 @@ start:
 			dmflags |= DM_FLAGS_IMUX;
 
 		xfs_iunlock(xip, XFS_ILOCK_EXCL);
-		error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp,
-				      pos, count,
-				      dmflags, &locktype);
+		error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, xip,
+				      pos, count, dmflags, &iolock);
 		if (error) {
 			goto out_unlock_internal;
 		}
@@ -664,10 +656,9 @@ start:
 			return XFS_ERROR(-EINVAL);
 		}
 
-		if (!need_i_mutex && (VN_CACHED(vp) || pos > xip->i_size)) {
+		if (!need_i_mutex && (mapping->nrpages || pos > xip->i_size)) {
 			xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
 			iolock = XFS_IOLOCK_EXCL;
-			locktype = VRWLOCK_WRITE;
 			need_i_mutex = 1;
 			mutex_lock(&inode->i_mutex);
 			xfs_ilock(xip, XFS_ILOCK_EXCL|iolock);
@@ -727,7 +718,7 @@ retry:
 	current->backing_dev_info = mapping->backing_dev_info;
 
 	if ((ioflags & IO_ISDIRECT)) {
-		if (VN_CACHED(vp)) {
+		if (mapping->nrpages) {
 			WARN_ON(need_i_mutex == 0);
 			xfs_inval_cached_trace(xip, pos, -1,
 					(pos & PAGE_CACHE_MASK), -1);
@@ -744,7 +735,6 @@ retry:
 			mutex_unlock(&inode->i_mutex);
 
 			iolock = XFS_IOLOCK_SHARED;
-			locktype = VRWLOCK_WRITE_DIRECT;
 			need_i_mutex = 0;
 		}
 
@@ -781,15 +771,15 @@ retry:
 
 	if (ret == -ENOSPC &&
 	    DM_EVENT_ENABLED(xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) {
-		xfs_rwunlock(xip, locktype);
+		xfs_iunlock(xip, iolock);
 		if (need_i_mutex)
 			mutex_unlock(&inode->i_mutex);
-		error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp,
-				DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL,
+		error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, xip,
+				DM_RIGHT_NULL, xip, DM_RIGHT_NULL, NULL, NULL,
 				0, 0, 0); /* Delay flag intentionally  unused */
 		if (need_i_mutex)
 			mutex_lock(&inode->i_mutex);
-		xfs_rwlock(xip, locktype);
+		xfs_ilock(xip, iolock);
 		if (error)
 			goto out_unlock_internal;
 		pos = xip->i_size;
@@ -817,7 +807,8 @@ retry:
 	/* Handle various SYNC-type writes */
 	if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) {
 		int error2;
-		xfs_rwunlock(xip, locktype);
+
+		xfs_iunlock(xip, iolock);
 		if (need_i_mutex)
 			mutex_unlock(&inode->i_mutex);
 		error2 = sync_page_range(inode, mapping, pos, ret);
@@ -825,7 +816,7 @@ retry:
 			error = error2;
 		if (need_i_mutex)
 			mutex_lock(&inode->i_mutex);
-		xfs_rwlock(xip, locktype);
+		xfs_ilock(xip, iolock);
 		error2 = xfs_write_sync_logforce(mp, xip);
 		if (!error)
 			error = error2;
@@ -846,7 +837,7 @@ retry:
 			xip->i_d.di_size = xip->i_size;
 		xfs_iunlock(xip, XFS_ILOCK_EXCL);
 	}
-	xfs_rwunlock(xip, locktype);
+	xfs_iunlock(xip, iolock);
  out_unlock_mutex:
 	if (need_i_mutex)
 		mutex_unlock(&inode->i_mutex);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 21dfc9da235e..cb9ce90d1deb 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -171,7 +171,7 @@ xfs_parseargs(
 	char			*this_char, *value, *eov;
 	int			dsunit, dswidth, vol_dsunit, vol_dswidth;
 	int			iosize;
-	int			ikeep = 0;
+	int			dmapi_implies_ikeep = 1;
 
 	args->flags |= XFSMNT_BARRIER;
 	args->flags2 |= XFSMNT2_COMPAT_IOSIZE;
@@ -302,10 +302,10 @@ xfs_parseargs(
 		} else if (!strcmp(this_char, MNTOPT_NOBARRIER)) {
 			args->flags &= ~XFSMNT_BARRIER;
 		} else if (!strcmp(this_char, MNTOPT_IKEEP)) {
-			ikeep = 1;
-			args->flags &= ~XFSMNT_IDELETE;
+			args->flags |= XFSMNT_IKEEP;
 		} else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
-			args->flags |= XFSMNT_IDELETE;
+			dmapi_implies_ikeep = 0;
+			args->flags &= ~XFSMNT_IKEEP;
 		} else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
 			args->flags2 &= ~XFSMNT2_COMPAT_IOSIZE;
 		} else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
@@ -410,8 +410,8 @@ xfs_parseargs(
 	 * Note that if "ikeep" or "noikeep" mount options are
 	 * supplied, then they are honored.
 	 */
-	if (!(args->flags & XFSMNT_DMAPI) && !ikeep)
-		args->flags |= XFSMNT_IDELETE;
+	if ((args->flags & XFSMNT_DMAPI) && dmapi_implies_ikeep)
+		args->flags |= XFSMNT_IKEEP;
 
 	if ((args->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
 		if (dsunit) {
@@ -446,6 +446,7 @@ xfs_showargs(
 {
 	static struct proc_xfs_info xfs_info_set[] = {
 		/* the few simple ones we can get from the mount struct */
+		{ XFS_MOUNT_IKEEP,		"," MNTOPT_IKEEP },
 		{ XFS_MOUNT_WSYNC,		"," MNTOPT_WSYNC },
 		{ XFS_MOUNT_INO64,		"," MNTOPT_INO64 },
 		{ XFS_MOUNT_NOALIGN,		"," MNTOPT_NOALIGN },
@@ -461,7 +462,6 @@ xfs_showargs(
 	};
 	static struct proc_xfs_info xfs_info_unset[] = {
 		/* the few simple ones we can get from the mount struct */
-		{ XFS_MOUNT_IDELETE,		"," MNTOPT_IKEEP },
 		{ XFS_MOUNT_COMPAT_IOSIZE,	"," MNTOPT_LARGEIO },
 		{ XFS_MOUNT_BARRIER,		"," MNTOPT_NOBARRIER },
 		{ XFS_MOUNT_SMALL_INUMS,	"," MNTOPT_64BITINODE },
@@ -896,7 +896,8 @@ xfs_fs_write_inode(
 	struct inode		*inode,
 	int			sync)
 {
-	int			error = 0, flags = FLUSH_INODE;
+	int			error = 0;
+	int			flags = 0;
 
 	xfs_itrace_entry(XFS_I(inode));
 	if (sync) {
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index b5ea418693b1..4ed5914adefb 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -46,18 +46,6 @@ static inline struct inode *vn_to_inode(bhv_vnode_t *vnode)
 }
 
 /*
- * Values for the vop_rwlock/rwunlock flags parameter.
- */
-typedef enum bhv_vrwlock {
-	VRWLOCK_NONE,
-	VRWLOCK_READ,
-	VRWLOCK_WRITE,
-	VRWLOCK_WRITE_DIRECT,
-	VRWLOCK_TRY_READ,
-	VRWLOCK_TRY_WRITE
-} bhv_vrwlock_t;
-
-/*
  * Return values for xfs_inactive.  A return value of
  * VN_INACTIVE_NOCACHE implies that the file system behavior
  * has disassociated its state and bhv_desc_t from the vnode.
@@ -73,12 +61,9 @@ typedef enum bhv_vrwlock {
 #define IO_INVIS	0x00020		/* don't update inode timestamps */
 
 /*
- * Flags for vop_iflush call
+ * Flags for xfs_inode_flush
  */
 #define FLUSH_SYNC		1	/* wait for flush to complete	*/
-#define FLUSH_INODE		2	/* flush the inode itself	*/
-#define FLUSH_LOG		4	/* force the last log entry for
-					 * this inode out to disk	*/
 
 /*
  * Flush/Invalidate options for vop_toss/flush/flushinval_pages.
@@ -230,7 +215,7 @@ static inline bhv_vnode_t *vn_grab(bhv_vnode_t *vp)
  */
 #define VNAME(dentry)		((char *) (dentry)->d_name.name)
 #define VNAMELEN(dentry)	((dentry)->d_name.len)
-#define VNAME_TO_VNODE(dentry)	(vn_from_inode((dentry)->d_inode))
+#define VNAME_TO_INODE(dentry)	(XFS_I((dentry)->d_inode))
 
 /*
  * Dealing with bad inodes
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 1f3da5b8657b..8e9c5ae6504d 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -1405,13 +1405,13 @@ xfs_qm_qino_alloc(
 #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
 		unsigned oldv = mp->m_sb.sb_versionnum;
 #endif
-		ASSERT(!XFS_SB_VERSION_HASQUOTA(&mp->m_sb));
+		ASSERT(!xfs_sb_version_hasquota(&mp->m_sb));
 		ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
 				   XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) ==
 		       (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
 			XFS_SB_GQUOTINO | XFS_SB_QFLAGS));
 
-		XFS_SB_VERSION_ADDQUOTA(&mp->m_sb);
+		xfs_sb_version_addquota(&mp->m_sb);
 		mp->m_sb.sb_uquotino = NULLFSINO;
 		mp->m_sb.sb_gquotino = NULLFSINO;
 
@@ -1954,7 +1954,7 @@ xfs_qm_init_quotainos(
 	/*
 	 * Get the uquota and gquota inodes
 	 */
-	if (XFS_SB_VERSION_HASQUOTA(&mp->m_sb)) {
+	if (xfs_sb_version_hasquota(&mp->m_sb)) {
 		if (XFS_IS_UQUOTA_ON(mp) &&
 		    mp->m_sb.sb_uquotino != NULLFSINO) {
 			ASSERT(mp->m_sb.sb_uquotino > 0);
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index 97bb32937585..f4f6c4c861d7 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -118,7 +118,7 @@ xfs_qm_newmount(
 	*quotaflags = 0;
 	*needquotamount = B_FALSE;
 
-	quotaondisk = XFS_SB_VERSION_HASQUOTA(&mp->m_sb) &&
+	quotaondisk = xfs_sb_version_hasquota(&mp->m_sb) &&
 				(mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT);
 
 	if (quotaondisk) {
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 2cc5886cfe85..d2b8be7e75f9 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -377,7 +377,7 @@ xfs_qm_scall_trunc_qfiles(
 	if (!capable(CAP_SYS_ADMIN))
 		return XFS_ERROR(EPERM);
 	error = 0;
-	if (!XFS_SB_VERSION_HASQUOTA(&mp->m_sb) || flags == 0) {
+	if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) {
 		qdprintk("qtrunc flags=%x m_qflags=%x\n", flags, mp->m_qflags);
 		return XFS_ERROR(EINVAL);
 	}
@@ -522,7 +522,7 @@ xfs_qm_scall_getqstat(
 	memset(out, 0, sizeof(fs_quota_stat_t));
 
 	out->qs_version = FS_QSTAT_VERSION;
-	if (! XFS_SB_VERSION_HASQUOTA(&mp->m_sb)) {
+	if (!xfs_sb_version_hasquota(&mp->m_sb)) {
 		out->qs_uquota.qfs_ino = NULLFSINO;
 		out->qs_gquota.qfs_ino = NULLFSINO;
 		return (0);
diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c
index 129067cfcb86..0b75d302508f 100644
--- a/fs/xfs/support/ktrace.c
+++ b/fs/xfs/support/ktrace.c
@@ -24,7 +24,7 @@ static int          ktrace_zentries;
 void __init
 ktrace_init(int zentries)
 {
-	ktrace_zentries = zentries;
+	ktrace_zentries = roundup_pow_of_two(zentries);
 
 	ktrace_hdr_zone = kmem_zone_init(sizeof(ktrace_t),
 					"ktrace_hdr");
@@ -47,13 +47,16 @@ ktrace_uninit(void)
  * ktrace_alloc()
  *
  * Allocate a ktrace header and enough buffering for the given
- * number of entries.
+ * number of entries. Round the number of entries up to a
+ * power of 2 so we can do fast masking to get the index from
+ * the atomic index counter.
  */
 ktrace_t *
 ktrace_alloc(int nentries, unsigned int __nocast sleep)
 {
 	ktrace_t        *ktp;
 	ktrace_entry_t  *ktep;
+	int		entries;
 
 	ktp = (ktrace_t*)kmem_zone_alloc(ktrace_hdr_zone, sleep);
 
@@ -70,11 +73,12 @@ ktrace_alloc(int nentries, unsigned int __nocast sleep)
 	/*
 	 * Special treatment for buffers with the ktrace_zentries entries
 	 */
-	if (nentries == ktrace_zentries) {
+	entries = roundup_pow_of_two(nentries);
+	if (entries == ktrace_zentries) {
 		ktep = (ktrace_entry_t*)kmem_zone_zalloc(ktrace_ent_zone,
 							    sleep);
 	} else {
-		ktep = (ktrace_entry_t*)kmem_zalloc((nentries * sizeof(*ktep)),
+		ktep = (ktrace_entry_t*)kmem_zalloc((entries * sizeof(*ktep)),
 							    sleep | KM_LARGE);
 	}
 
@@ -91,8 +95,10 @@ ktrace_alloc(int nentries, unsigned int __nocast sleep)
 	}
 
 	ktp->kt_entries  = ktep;
-	ktp->kt_nentries = nentries;
-	ktp->kt_index    = 0;
+	ktp->kt_nentries = entries;
+	ASSERT(is_power_of_2(entries));
+	ktp->kt_index_mask = entries - 1;
+	atomic_set(&ktp->kt_index, 0);
 	ktp->kt_rollover = 0;
 	return ktp;
 }
@@ -151,8 +157,6 @@ ktrace_enter(
 	void            *val14,
 	void            *val15)
 {
-	static DEFINE_SPINLOCK(wrap_lock);
-	unsigned long	flags;
 	int             index;
 	ktrace_entry_t  *ktep;
 
@@ -161,12 +165,8 @@ ktrace_enter(
 	/*
 	 * Grab an entry by pushing the index up to the next one.
 	 */
-	spin_lock_irqsave(&wrap_lock, flags);
-	index = ktp->kt_index;
-	if (++ktp->kt_index == ktp->kt_nentries)
-		ktp->kt_index = 0;
-	spin_unlock_irqrestore(&wrap_lock, flags);
-
+	index = atomic_add_return(1, &ktp->kt_index);
+	index = (index - 1) & ktp->kt_index_mask;
 	if (!ktp->kt_rollover && index == ktp->kt_nentries - 1)
 		ktp->kt_rollover = 1;
 
@@ -199,11 +199,12 @@ int
 ktrace_nentries(
 	ktrace_t        *ktp)
 {
-	if (ktp == NULL) {
+	int	index;
+	if (ktp == NULL)
 		return 0;
-	}
 
-	return (ktp->kt_rollover ? ktp->kt_nentries : ktp->kt_index);
+	index = atomic_read(&ktp->kt_index) & ktp->kt_index_mask;
+	return (ktp->kt_rollover ? ktp->kt_nentries : index);
 }
 
 /*
@@ -228,7 +229,7 @@ ktrace_first(ktrace_t   *ktp, ktrace_snap_t     *ktsp)
 	int             nentries;
 
 	if (ktp->kt_rollover)
-		index = ktp->kt_index;
+		index = atomic_read(&ktp->kt_index) & ktp->kt_index_mask;
 	else
 		index = 0;
 
diff --git a/fs/xfs/support/ktrace.h b/fs/xfs/support/ktrace.h
index 56e72b40a859..741d6947ca60 100644
--- a/fs/xfs/support/ktrace.h
+++ b/fs/xfs/support/ktrace.h
@@ -30,7 +30,8 @@ typedef struct ktrace_entry {
  */
 typedef struct ktrace {
 	int		kt_nentries;	/* number of entries in trace buf */
-	int		kt_index;	/* current index in entries */
+	atomic_t	kt_index;	/* current index in entries */
+	unsigned int	kt_index_mask;
 	int		kt_rollover;
 	ktrace_entry_t	*kt_entries;	/* buffer of entries */
 } ktrace_t;
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index b08e2a2a8add..96ba6aa4ed8c 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -227,10 +227,10 @@ STATIC void
 xfs_sbversion_add_attr2(xfs_mount_t *mp, xfs_trans_t *tp)
 {
 	if ((mp->m_flags & XFS_MOUNT_ATTR2) &&
-	    !(XFS_SB_VERSION_HASATTR2(&mp->m_sb))) {
+	    !(xfs_sb_version_hasattr2(&mp->m_sb))) {
 		spin_lock(&mp->m_sb_lock);
-		if (!XFS_SB_VERSION_HASATTR2(&mp->m_sb)) {
-			XFS_SB_VERSION_ADDATTR2(&mp->m_sb);
+		if (!xfs_sb_version_hasattr2(&mp->m_sb)) {
+			xfs_sb_version_addattr2(&mp->m_sb);
 			spin_unlock(&mp->m_sb_lock);
 			xfs_mod_sb(tp, XFS_SB_VERSIONNUM | XFS_SB_FEATURES2);
 		} else
diff --git a/fs/xfs/xfs_bit.c b/fs/xfs/xfs_bit.c
index 48228848f5ae..fab0b6d5a41b 100644
--- a/fs/xfs/xfs_bit.c
+++ b/fs/xfs/xfs_bit.c
@@ -25,6 +25,109 @@
  * XFS bit manipulation routines, used in non-realtime code.
  */
 
+#ifndef HAVE_ARCH_HIGHBIT
+/*
+ * Index of high bit number in byte, -1 for none set, 0..7 otherwise.
+ */
+static const char xfs_highbit[256] = {
+       -1, 0, 1, 1, 2, 2, 2, 2,			/* 00 .. 07 */
+	3, 3, 3, 3, 3, 3, 3, 3,			/* 08 .. 0f */
+	4, 4, 4, 4, 4, 4, 4, 4,			/* 10 .. 17 */
+	4, 4, 4, 4, 4, 4, 4, 4,			/* 18 .. 1f */
+	5, 5, 5, 5, 5, 5, 5, 5,			/* 20 .. 27 */
+	5, 5, 5, 5, 5, 5, 5, 5,			/* 28 .. 2f */
+	5, 5, 5, 5, 5, 5, 5, 5,			/* 30 .. 37 */
+	5, 5, 5, 5, 5, 5, 5, 5,			/* 38 .. 3f */
+	6, 6, 6, 6, 6, 6, 6, 6,			/* 40 .. 47 */
+	6, 6, 6, 6, 6, 6, 6, 6,			/* 48 .. 4f */
+	6, 6, 6, 6, 6, 6, 6, 6,			/* 50 .. 57 */
+	6, 6, 6, 6, 6, 6, 6, 6,			/* 58 .. 5f */
+	6, 6, 6, 6, 6, 6, 6, 6,			/* 60 .. 67 */
+	6, 6, 6, 6, 6, 6, 6, 6,			/* 68 .. 6f */
+	6, 6, 6, 6, 6, 6, 6, 6,			/* 70 .. 77 */
+	6, 6, 6, 6, 6, 6, 6, 6,			/* 78 .. 7f */
+	7, 7, 7, 7, 7, 7, 7, 7,			/* 80 .. 87 */
+	7, 7, 7, 7, 7, 7, 7, 7,			/* 88 .. 8f */
+	7, 7, 7, 7, 7, 7, 7, 7,			/* 90 .. 97 */
+	7, 7, 7, 7, 7, 7, 7, 7,			/* 98 .. 9f */
+	7, 7, 7, 7, 7, 7, 7, 7,			/* a0 .. a7 */
+	7, 7, 7, 7, 7, 7, 7, 7,			/* a8 .. af */
+	7, 7, 7, 7, 7, 7, 7, 7,			/* b0 .. b7 */
+	7, 7, 7, 7, 7, 7, 7, 7,			/* b8 .. bf */
+	7, 7, 7, 7, 7, 7, 7, 7,			/* c0 .. c7 */
+	7, 7, 7, 7, 7, 7, 7, 7,			/* c8 .. cf */
+	7, 7, 7, 7, 7, 7, 7, 7,			/* d0 .. d7 */
+	7, 7, 7, 7, 7, 7, 7, 7,			/* d8 .. df */
+	7, 7, 7, 7, 7, 7, 7, 7,			/* e0 .. e7 */
+	7, 7, 7, 7, 7, 7, 7, 7,			/* e8 .. ef */
+	7, 7, 7, 7, 7, 7, 7, 7,			/* f0 .. f7 */
+	7, 7, 7, 7, 7, 7, 7, 7,			/* f8 .. ff */
+};
+#endif
+
+/*
+ * xfs_highbit32: get high bit set out of 32-bit argument, -1 if none set.
+ */
+inline int
+xfs_highbit32(
+	__uint32_t	v)
+{
+#ifdef HAVE_ARCH_HIGHBIT
+	return highbit32(v);
+#else
+	int		i;
+
+	if (v & 0xffff0000)
+		if (v & 0xff000000)
+			i = 24;
+		else
+			i = 16;
+	else if (v & 0x0000ffff)
+		if (v & 0x0000ff00)
+			i = 8;
+		else
+			i = 0;
+	else
+		return -1;
+	return i + xfs_highbit[(v >> i) & 0xff];
+#endif
+}
+
+/*
+ * xfs_lowbit64: get low bit set out of 64-bit argument, -1 if none set.
+ */
+int
+xfs_lowbit64(
+	__uint64_t	v)
+{
+	__uint32_t	w = (__uint32_t)v;
+	int		n = 0;
+
+	if (w) {	/* lower bits */
+		n = ffs(w);
+	} else {	/* upper bits */
+		w = (__uint32_t)(v >> 32);
+		if (w && (n = ffs(w)))
+			n += 32;
+	}
+	return n - 1;
+}
+
+/*
+ * xfs_highbit64: get high bit set out of 64-bit argument, -1 if none set.
+ */
+int
+xfs_highbit64(
+	__uint64_t	v)
+{
+	__uint32_t	h = (__uint32_t)(v >> 32);
+
+	if (h)
+		return xfs_highbit32(h) + 32;
+	return xfs_highbit32((__uint32_t)v);
+}
+
+
 /*
  * Return whether bitmap is empty.
  * Size is number of words in the bitmap, which is padded to word boundary
diff --git a/fs/xfs/xfs_bit.h b/fs/xfs/xfs_bit.h
index 325a007dec91..082641a9782c 100644
--- a/fs/xfs/xfs_bit.h
+++ b/fs/xfs/xfs_bit.h
@@ -47,30 +47,13 @@ static inline __uint64_t xfs_mask64lo(int n)
 }
 
 /* Get high bit set out of 32-bit argument, -1 if none set */
-static inline int xfs_highbit32(__uint32_t v)
-{
-	return fls(v) - 1;
-}
-
-/* Get high bit set out of 64-bit argument, -1 if none set */
-static inline int xfs_highbit64(__uint64_t v)
-{
-	return fls64(v) - 1;
-}
-
-/* Get low bit set out of 32-bit argument, -1 if none set */
-static inline int xfs_lowbit32(__uint32_t v)
-{
-	__uint32_t t = v;
-	return (t) ? find_first_bit((unsigned long *)&t, 32) : -1;
-}
+extern int xfs_highbit32(__uint32_t v);
 
 /* Get low bit set out of 64-bit argument, -1 if none set */
-static inline int xfs_lowbit64(__uint64_t v)
-{
-	__uint64_t t = v;
-	return (t) ? find_first_bit((unsigned long *)&t, 64) : -1;
-}
+extern int xfs_lowbit64(__uint64_t v);
+
+/* Get high bit set out of 64-bit argument, -1 if none set */
+extern int xfs_highbit64(__uint64_t);
 
 /* Return whether bitmap is empty (1 == empty) */
 extern int xfs_bitmap_empty(uint *map, uint size);
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 1c0a5a585a82..bce8e3bd8ad1 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -4047,17 +4047,17 @@ xfs_bmap_add_attrfork(
 		xfs_trans_log_inode(tp, ip, logflags);
 	if (error)
 		goto error2;
-	if (!XFS_SB_VERSION_HASATTR(&mp->m_sb) ||
-	   (!XFS_SB_VERSION_HASATTR2(&mp->m_sb) && version == 2)) {
+	if (!xfs_sb_version_hasattr(&mp->m_sb) ||
+	   (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) {
 		__int64_t sbfields = 0;
 
 		spin_lock(&mp->m_sb_lock);
-		if (!XFS_SB_VERSION_HASATTR(&mp->m_sb)) {
-			XFS_SB_VERSION_ADDATTR(&mp->m_sb);
+		if (!xfs_sb_version_hasattr(&mp->m_sb)) {
+			xfs_sb_version_addattr(&mp->m_sb);
 			sbfields |= XFS_SB_VERSIONNUM;
 		}
-		if (!XFS_SB_VERSION_HASATTR2(&mp->m_sb) && version == 2) {
-			XFS_SB_VERSION_ADDATTR2(&mp->m_sb);
+		if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) {
+			xfs_sb_version_addattr2(&mp->m_sb);
 			sbfields |= (XFS_SB_VERSIONNUM | XFS_SB_FEATURES2);
 		}
 		if (sbfields) {
@@ -5043,7 +5043,7 @@ xfs_bmapi(
 			 * A wasdelay extent has been initialized, so
 			 * shouldn't be flagged as unwritten.
 			 */
-			if (wr && XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) {
+			if (wr && xfs_sb_version_hasextflgbit(&mp->m_sb)) {
 				if (!wasdelay && (flags & XFS_BMAPI_PREALLOC))
 					got.br_state = XFS_EXT_UNWRITTEN;
 			}
@@ -5483,7 +5483,7 @@ xfs_bunmapi(
 			 * get rid of part of a realtime extent.
 			 */
 			if (del.br_state == XFS_EXT_UNWRITTEN ||
-			    !XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) {
+			    !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
 				/*
 				 * This piece is unwritten, or we're not
 				 * using unwritten extents.  Skip over it.
@@ -5535,7 +5535,7 @@ xfs_bunmapi(
 			} else if ((del.br_startoff == start &&
 				    (del.br_state == XFS_EXT_UNWRITTEN ||
 				     xfs_trans_get_block_res(tp) == 0)) ||
-				   !XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) {
+				   !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
 				/*
 				 * Can't make it unwritten.  There isn't
 				 * a full extent here so just skip it.
@@ -5772,7 +5772,6 @@ xfs_getbmap(
 	int			error;		/* return value */
 	__int64_t		fixlen;		/* length for -1 case */
 	int			i;		/* extent number */
-	bhv_vnode_t		*vp;		/* corresponding vnode */
 	int			lock;		/* lock state */
 	xfs_bmbt_irec_t		*map;		/* buffer for user's data */
 	xfs_mount_t		*mp;		/* file system mount point */
@@ -5789,7 +5788,6 @@ xfs_getbmap(
 	int			bmapi_flags;	/* flags for xfs_bmapi */
 	__int32_t		oflags;		/* getbmapx bmv_oflags field */
 
-	vp = XFS_ITOV(ip);
 	mp = ip->i_mount;
 
 	whichfork = interface & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK;
@@ -5811,7 +5809,7 @@ xfs_getbmap(
 	if ((interface & BMV_IF_NO_DMAPI_READ) == 0 &&
 	    DM_EVENT_ENABLED(ip, DM_EVENT_READ) &&
 	    whichfork == XFS_DATA_FORK) {
-		error = XFS_SEND_DATA(mp, DM_EVENT_READ, vp, 0, 0, 0, NULL);
+		error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, 0, 0, 0, NULL);
 		if (error)
 			return XFS_ERROR(error);
 	}
@@ -5869,6 +5867,10 @@ xfs_getbmap(
 		/* xfs_fsize_t last_byte = xfs_file_last_byte(ip); */
 		error = xfs_flush_pages(ip, (xfs_off_t)0,
 					       -1, 0, FI_REMAPF);
+		if (error) {
+			xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+		return error;
+		}
 	}
 
 	ASSERT(whichfork == XFS_ATTR_FORK || ip->i_delayed_blks == 0);
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index 2d950e975918..cd0d4b4bb816 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -120,7 +120,7 @@ typedef enum {
  * Extent state and extent format macros.
  */
 #define XFS_EXTFMT_INODE(x)	\
-	(XFS_SB_VERSION_HASEXTFLGBIT(&((x)->i_mount->m_sb)) ? \
+	(xfs_sb_version_hasextflgbit(&((x)->i_mount->m_sb)) ? \
 		XFS_EXTFMT_HASSTATE : XFS_EXTFMT_NOSTATE)
 #define ISUNWRITTEN(x)	((x)->br_state == XFS_EXT_UNWRITTEN)
 
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h
index d16c1b971074..d5d1e60ee224 100644
--- a/fs/xfs/xfs_clnt.h
+++ b/fs/xfs/xfs_clnt.h
@@ -86,7 +86,7 @@ struct xfs_mount_args {
 #define XFSMNT_NOUUID		0x01000000	/* Ignore fs uuid */
 #define XFSMNT_DMAPI		0x02000000	/* enable dmapi/xdsm */
 #define XFSMNT_BARRIER		0x04000000	/* use write barriers */
-#define XFSMNT_IDELETE		0x08000000	/* inode cluster delete */
+#define XFSMNT_IKEEP		0x08000000	/* inode cluster delete */
 #define XFSMNT_SWALLOC		0x10000000	/* turn on stripe width
 						 * allocation */
 #define XFSMNT_DIRSYNC		0x40000000	/* sync creat,link,unlink,rename
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index be7c4251fa61..e92e73f0e6af 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -49,7 +49,7 @@ void
 xfs_dir_mount(
 	xfs_mount_t	*mp)
 {
-	ASSERT(XFS_SB_VERSION_HASDIRV2(&mp->m_sb));
+	ASSERT(xfs_sb_version_hasdirv2(&mp->m_sb));
 	ASSERT((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) <=
 	       XFS_MAX_BLOCKSIZE);
 	mp->m_dirblksize = 1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog);
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index eadc1591c795..d3a0f538d6a6 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -77,36 +77,36 @@ xfs_fs_geometry(
 	if (new_version >= 3) {
 		geo->version = XFS_FSOP_GEOM_VERSION;
 		geo->flags =
-			(XFS_SB_VERSION_HASATTR(&mp->m_sb) ?
+			(xfs_sb_version_hasattr(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_ATTR : 0) |
-			(XFS_SB_VERSION_HASNLINK(&mp->m_sb) ?
+			(xfs_sb_version_hasnlink(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_NLINK : 0) |
-			(XFS_SB_VERSION_HASQUOTA(&mp->m_sb) ?
+			(xfs_sb_version_hasquota(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_QUOTA : 0) |
-			(XFS_SB_VERSION_HASALIGN(&mp->m_sb) ?
+			(xfs_sb_version_hasalign(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_IALIGN : 0) |
-			(XFS_SB_VERSION_HASDALIGN(&mp->m_sb) ?
+			(xfs_sb_version_hasdalign(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_DALIGN : 0) |
-			(XFS_SB_VERSION_HASSHARED(&mp->m_sb) ?
+			(xfs_sb_version_hasshared(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_SHARED : 0) |
-			(XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb) ?
+			(xfs_sb_version_hasextflgbit(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_EXTFLG : 0) |
-			(XFS_SB_VERSION_HASDIRV2(&mp->m_sb) ?
+			(xfs_sb_version_hasdirv2(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_DIRV2 : 0) |
-			(XFS_SB_VERSION_HASSECTOR(&mp->m_sb) ?
+			(xfs_sb_version_hassector(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_SECTOR : 0) |
 			(xfs_sb_version_haslazysbcount(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_LAZYSB : 0) |
-			(XFS_SB_VERSION_HASATTR2(&mp->m_sb) ?
+			(xfs_sb_version_hasattr2(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_ATTR2 : 0);
-		geo->logsectsize = XFS_SB_VERSION_HASSECTOR(&mp->m_sb) ?
+		geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ?
 				mp->m_sb.sb_logsectsize : BBSIZE;
 		geo->rtsectsize = mp->m_sb.sb_blocksize;
 		geo->dirblocksize = mp->m_dirblksize;
 	}
 	if (new_version >= 4) {
 		geo->flags |=
-			(XFS_SB_VERSION_HASLOGV2(&mp->m_sb) ?
+			(xfs_sb_version_haslogv2(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_LOGV2 : 0);
 		geo->logsunit = mp->m_sb.sb_logsunit;
 	}
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index c5836b951d0c..5a146cb22980 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -191,7 +191,7 @@ xfs_ialloc_ag_alloc(
 			ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN));
 			args.alignment = args.mp->m_dalign;
 			isaligned = 1;
-		} else if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) &&
+		} else if (xfs_sb_version_hasalign(&args.mp->m_sb) &&
 			   args.mp->m_sb.sb_inoalignmt >=
 			   XFS_B_TO_FSBT(args.mp,
 			  	XFS_INODE_CLUSTER_SIZE(args.mp)))
@@ -230,7 +230,7 @@ xfs_ialloc_ag_alloc(
 		args.agbno = be32_to_cpu(agi->agi_root);
 		args.fsbno = XFS_AGB_TO_FSB(args.mp,
 				be32_to_cpu(agi->agi_seqno), args.agbno);
-		if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) &&
+		if (xfs_sb_version_hasalign(&args.mp->m_sb) &&
 			args.mp->m_sb.sb_inoalignmt >=
 			XFS_B_TO_FSBT(args.mp, XFS_INODE_CLUSTER_SIZE(args.mp)))
 				args.alignment = args.mp->m_sb.sb_inoalignmt;
@@ -271,7 +271,7 @@ xfs_ialloc_ag_alloc(
 	 * use the old version so that old kernels will continue to be
 	 * able to use the file system.
 	 */
-	if (XFS_SB_VERSION_HASNLINK(&args.mp->m_sb))
+	if (xfs_sb_version_hasnlink(&args.mp->m_sb))
 		version = XFS_DINODE_VERSION_2;
 	else
 		version = XFS_DINODE_VERSION_1;
@@ -1053,7 +1053,7 @@ xfs_difree(
 	/*
 	 * When an inode cluster is free, it becomes eligible for removal
 	 */
-	if ((mp->m_flags & XFS_MOUNT_IDELETE) &&
+	if (!(mp->m_flags & XFS_MOUNT_IKEEP) &&
 	    (rec.ir_freecount == XFS_IALLOC_INODES(mp))) {
 
 		*delete = 1;
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index f01b07687faf..e657c5128460 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -78,7 +78,6 @@ xfs_iget_core(
 	xfs_inode_t	*ip;
 	xfs_inode_t	*iq;
 	int		error;
-	xfs_icluster_t	*icl, *new_icl = NULL;
 	unsigned long	first_index, mask;
 	xfs_perag_t	*pag;
 	xfs_agino_t	agino;
@@ -229,29 +228,17 @@ finish_inode:
 	}
 
 	/*
-	 * This is a bit messy - we preallocate everything we _might_
-	 * need before we pick up the ici lock. That way we don't have to
-	 * juggle locks and go all the way back to the start.
+	 * Preload the radix tree so we can insert safely under the
+	 * write spinlock.
 	 */
-	new_icl = kmem_zone_alloc(xfs_icluster_zone, KM_SLEEP);
 	if (radix_tree_preload(GFP_KERNEL)) {
+		xfs_idestroy(ip);
 		delay(1);
 		goto again;
 	}
 	mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
 	first_index = agino & mask;
 	write_lock(&pag->pag_ici_lock);
-
-	/*
-	 * Find the cluster if it exists
-	 */
-	icl = NULL;
-	if (radix_tree_gang_lookup(&pag->pag_ici_root, (void**)&iq,
-							first_index, 1)) {
-		if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) == first_index)
-			icl = iq->i_cluster;
-	}
-
 	/*
 	 * insert the new inode
 	 */
@@ -266,30 +253,13 @@ finish_inode:
 	}
 
 	/*
-	 * These values _must_ be set before releasing ihlock!
+	 * These values _must_ be set before releasing the radix tree lock!
 	 */
 	ip->i_udquot = ip->i_gdquot = NULL;
 	xfs_iflags_set(ip, XFS_INEW);
 
-	ASSERT(ip->i_cluster == NULL);
-
-	if (!icl) {
-		spin_lock_init(&new_icl->icl_lock);
-		INIT_HLIST_HEAD(&new_icl->icl_inodes);
-		icl = new_icl;
-		new_icl = NULL;
-	} else {
-		ASSERT(!hlist_empty(&icl->icl_inodes));
-	}
-	spin_lock(&icl->icl_lock);
-	hlist_add_head(&ip->i_cnode, &icl->icl_inodes);
-	ip->i_cluster = icl;
-	spin_unlock(&icl->icl_lock);
-
 	write_unlock(&pag->pag_ici_lock);
 	radix_tree_preload_end();
-	if (new_icl)
-		kmem_zone_free(xfs_icluster_zone, new_icl);
 
 	/*
 	 * Link ip to its mount and thread it on the mount's inode list.
@@ -528,18 +498,6 @@ xfs_iextract(
 	xfs_put_perag(mp, pag);
 
 	/*
-	 * Remove from cluster list
-	 */
-	mp = ip->i_mount;
-	spin_lock(&ip->i_cluster->icl_lock);
-	hlist_del(&ip->i_cnode);
-	spin_unlock(&ip->i_cluster->icl_lock);
-
-	/* was last inode in cluster? */
-	if (hlist_empty(&ip->i_cluster->icl_inodes))
-		kmem_zone_free(xfs_icluster_zone, ip->i_cluster);
-
-	/*
 	 * Remove from mount's inode list.
 	 */
 	XFS_MOUNT_ILOCK(mp);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index a550546a7083..d7514f8317df 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -55,7 +55,6 @@
 
 kmem_zone_t *xfs_ifork_zone;
 kmem_zone_t *xfs_inode_zone;
-kmem_zone_t *xfs_icluster_zone;
 
 /*
  * Used in xfs_itruncate().  This is the maximum number of extents
@@ -126,6 +125,90 @@ xfs_inobp_check(
 #endif
 
 /*
+ * Find the buffer associated with the given inode map
+ * We do basic validation checks on the buffer once it has been
+ * retrieved from disk.
+ */
+STATIC int
+xfs_imap_to_bp(
+	xfs_mount_t	*mp,
+	xfs_trans_t	*tp,
+	xfs_imap_t	*imap,
+	xfs_buf_t	**bpp,
+	uint		buf_flags,
+	uint		imap_flags)
+{
+	int		error;
+	int		i;
+	int		ni;
+	xfs_buf_t	*bp;
+
+	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
+				   (int)imap->im_len, buf_flags, &bp);
+	if (error) {
+		if (error != EAGAIN) {
+			cmn_err(CE_WARN,
+				"xfs_imap_to_bp: xfs_trans_read_buf()returned "
+				"an error %d on %s.  Returning error.",
+				error, mp->m_fsname);
+		} else {
+			ASSERT(buf_flags & XFS_BUF_TRYLOCK);
+		}
+		return error;
+	}
+
+	/*
+	 * Validate the magic number and version of every inode in the buffer
+	 * (if DEBUG kernel) or the first inode in the buffer, otherwise.
+	 */
+#ifdef DEBUG
+	ni = BBTOB(imap->im_len) >> mp->m_sb.sb_inodelog;
+#else	/* usual case */
+	ni = 1;
+#endif
+
+	for (i = 0; i < ni; i++) {
+		int		di_ok;
+		xfs_dinode_t	*dip;
+
+		dip = (xfs_dinode_t *)xfs_buf_offset(bp,
+					(i << mp->m_sb.sb_inodelog));
+		di_ok = be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC &&
+			    XFS_DINODE_GOOD_VERSION(dip->di_core.di_version);
+		if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
+						XFS_ERRTAG_ITOBP_INOTOBP,
+						XFS_RANDOM_ITOBP_INOTOBP))) {
+			if (imap_flags & XFS_IMAP_BULKSTAT) {
+				xfs_trans_brelse(tp, bp);
+				return XFS_ERROR(EINVAL);
+			}
+			XFS_CORRUPTION_ERROR("xfs_imap_to_bp",
+						XFS_ERRLEVEL_HIGH, mp, dip);
+#ifdef DEBUG
+			cmn_err(CE_PANIC,
+					"Device %s - bad inode magic/vsn "
+					"daddr %lld #%d (magic=%x)",
+				XFS_BUFTARG_NAME(mp->m_ddev_targp),
+				(unsigned long long)imap->im_blkno, i,
+				be16_to_cpu(dip->di_core.di_magic));
+#endif
+			xfs_trans_brelse(tp, bp);
+			return XFS_ERROR(EFSCORRUPTED);
+		}
+	}
+
+	xfs_inobp_check(mp, bp);
+
+	/*
+	 * Mark the buffer as an inode buffer now that it looks good
+	 */
+	XFS_BUF_SET_VTYPE(bp, B_FS_INO);
+
+	*bpp = bp;
+	return 0;
+}
+
+/*
  * This routine is called to map an inode number within a file
  * system to the buffer containing the on-disk version of the
  * inode.  It returns a pointer to the buffer containing the
@@ -147,72 +230,19 @@ xfs_inotobp(
 	xfs_buf_t	**bpp,
 	int		*offset)
 {
-	int		di_ok;
 	xfs_imap_t	imap;
 	xfs_buf_t	*bp;
 	int		error;
-	xfs_dinode_t	*dip;
 
-	/*
-	 * Call the space management code to find the location of the
-	 * inode on disk.
-	 */
 	imap.im_blkno = 0;
 	error = xfs_imap(mp, tp, ino, &imap, XFS_IMAP_LOOKUP);
-	if (error != 0) {
-		cmn_err(CE_WARN,
-	"xfs_inotobp: xfs_imap()  returned an "
-	"error %d on %s.  Returning error.", error, mp->m_fsname);
+	if (error)
 		return error;
-	}
-
-	/*
-	 * If the inode number maps to a block outside the bounds of the
-	 * file system then return NULL rather than calling read_buf
-	 * and panicing when we get an error from the driver.
-	 */
-	if ((imap.im_blkno + imap.im_len) >
-	    XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
-		cmn_err(CE_WARN,
-	"xfs_inotobp: inode number (%llu + %d) maps to a block outside the bounds "
-	"of the file system %s.  Returning EINVAL.",
-			(unsigned long long)imap.im_blkno,
-			imap.im_len, mp->m_fsname);
-		return XFS_ERROR(EINVAL);
-	}
 
-	/*
-	 * Read in the buffer.  If tp is NULL, xfs_trans_read_buf() will
-	 * default to just a read_buf() call.
-	 */
-	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno,
-				   (int)imap.im_len, XFS_BUF_LOCK, &bp);
-
-	if (error) {
-		cmn_err(CE_WARN,
-	"xfs_inotobp: xfs_trans_read_buf()  returned an "
-	"error %d on %s.  Returning error.", error, mp->m_fsname);
+	error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, 0);
+	if (error)
 		return error;
-	}
-	dip = (xfs_dinode_t *)xfs_buf_offset(bp, 0);
-	di_ok =
-		be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC &&
-		XFS_DINODE_GOOD_VERSION(dip->di_core.di_version);
-	if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP,
-			XFS_RANDOM_ITOBP_INOTOBP))) {
-		XFS_CORRUPTION_ERROR("xfs_inotobp", XFS_ERRLEVEL_LOW, mp, dip);
-		xfs_trans_brelse(tp, bp);
-		cmn_err(CE_WARN,
-	"xfs_inotobp: XFS_TEST_ERROR()  returned an "
-	"error on %s.  Returning EFSCORRUPTED.",  mp->m_fsname);
-		return XFS_ERROR(EFSCORRUPTED);
-	}
-
-	xfs_inobp_check(mp, bp);
 
-	/*
-	 * Set *dipp to point to the on-disk inode in the buffer.
-	 */
 	*dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset);
 	*bpp = bp;
 	*offset = imap.im_boffset;
@@ -248,46 +278,21 @@ xfs_itobp(
 	xfs_dinode_t	**dipp,
 	xfs_buf_t	**bpp,
 	xfs_daddr_t	bno,
-	uint		imap_flags)
+	uint		imap_flags,
+	uint		buf_flags)
 {
 	xfs_imap_t	imap;
 	xfs_buf_t	*bp;
 	int		error;
-	int		i;
-	int		ni;
 
 	if (ip->i_blkno == (xfs_daddr_t)0) {
-		/*
-		 * Call the space management code to find the location of the
-		 * inode on disk.
-		 */
 		imap.im_blkno = bno;
-		if ((error = xfs_imap(mp, tp, ip->i_ino, &imap,
-					XFS_IMAP_LOOKUP | imap_flags)))
+		error = xfs_imap(mp, tp, ip->i_ino, &imap,
+					XFS_IMAP_LOOKUP | imap_flags);
+		if (error)
 			return error;
 
 		/*
-		 * If the inode number maps to a block outside the bounds
-		 * of the file system then return NULL rather than calling
-		 * read_buf and panicing when we get an error from the
-		 * driver.
-		 */
-		if ((imap.im_blkno + imap.im_len) >
-		    XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
-#ifdef DEBUG
-			xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: "
-					"(imap.im_blkno (0x%llx) "
-					"+ imap.im_len (0x%llx)) > "
-					" XFS_FSB_TO_BB(mp, "
-					"mp->m_sb.sb_dblocks) (0x%llx)",
-					(unsigned long long) imap.im_blkno,
-					(unsigned long long) imap.im_len,
-					XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
-#endif /* DEBUG */
-			return XFS_ERROR(EINVAL);
-		}
-
-		/*
 		 * Fill in the fields in the inode that will be used to
 		 * map the inode to its buffer from now on.
 		 */
@@ -305,76 +310,17 @@ xfs_itobp(
 	}
 	ASSERT(bno == 0 || bno == imap.im_blkno);
 
-	/*
-	 * Read in the buffer.  If tp is NULL, xfs_trans_read_buf() will
-	 * default to just a read_buf() call.
-	 */
-	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno,
-				   (int)imap.im_len, XFS_BUF_LOCK, &bp);
-	if (error) {
-#ifdef DEBUG
-		xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: "
-				"xfs_trans_read_buf() returned error %d, "
-				"imap.im_blkno 0x%llx, imap.im_len 0x%llx",
-				error, (unsigned long long) imap.im_blkno,
-				(unsigned long long) imap.im_len);
-#endif /* DEBUG */
+	error = xfs_imap_to_bp(mp, tp, &imap, &bp, buf_flags, imap_flags);
+	if (error)
 		return error;
-	}
 
-	/*
-	 * Validate the magic number and version of every inode in the buffer
-	 * (if DEBUG kernel) or the first inode in the buffer, otherwise.
-	 * No validation is done here in userspace (xfs_repair).
-	 */
-#if !defined(__KERNEL__)
-	ni = 0;
-#elif defined(DEBUG)
-	ni = BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog;
-#else	/* usual case */
-	ni = 1;
-#endif
-
-	for (i = 0; i < ni; i++) {
-		int		di_ok;
-		xfs_dinode_t	*dip;
-
-		dip = (xfs_dinode_t *)xfs_buf_offset(bp,
-					(i << mp->m_sb.sb_inodelog));
-		di_ok = be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC &&
-			    XFS_DINODE_GOOD_VERSION(dip->di_core.di_version);
-		if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
-						XFS_ERRTAG_ITOBP_INOTOBP,
-						XFS_RANDOM_ITOBP_INOTOBP))) {
-			if (imap_flags & XFS_IMAP_BULKSTAT) {
-				xfs_trans_brelse(tp, bp);
-				return XFS_ERROR(EINVAL);
-			}
-#ifdef DEBUG
-			cmn_err(CE_ALERT,
-					"Device %s - bad inode magic/vsn "
-					"daddr %lld #%d (magic=%x)",
-				XFS_BUFTARG_NAME(mp->m_ddev_targp),
-				(unsigned long long)imap.im_blkno, i,
-				be16_to_cpu(dip->di_core.di_magic));
-#endif
-			XFS_CORRUPTION_ERROR("xfs_itobp", XFS_ERRLEVEL_HIGH,
-					     mp, dip);
-			xfs_trans_brelse(tp, bp);
-			return XFS_ERROR(EFSCORRUPTED);
-		}
+	if (!bp) {
+		ASSERT(buf_flags & XFS_BUF_TRYLOCK);
+		ASSERT(tp == NULL);
+		*bpp = NULL;
+		return EAGAIN;
 	}
 
-	xfs_inobp_check(mp, bp);
-
-	/*
-	 * Mark the buffer as an inode buffer now that it looks good
-	 */
-	XFS_BUF_SET_VTYPE(bp, B_FS_INO);
-
-	/*
-	 * Set *dipp to point to the on-disk inode in the buffer.
-	 */
 	*dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset);
 	*bpp = bp;
 	return 0;
@@ -878,7 +824,7 @@ xfs_iread(
 	 * return NULL as well.  Set i_blkno to 0 so that xfs_itobp() will
 	 * know that this is a new incore inode.
 	 */
-	error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags);
+	error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags, XFS_BUF_LOCK);
 	if (error) {
 		kmem_zone_free(xfs_inode_zone, ip);
 		return error;
@@ -1147,7 +1093,7 @@ xfs_ialloc(
 	 * the inode version number now.  This way we only do the conversion
 	 * here rather than here and in the flush/logging code.
 	 */
-	if (XFS_SB_VERSION_HASNLINK(&tp->t_mountp->m_sb) &&
+	if (xfs_sb_version_hasnlink(&tp->t_mountp->m_sb) &&
 	    ip->i_d.di_version == XFS_DINODE_VERSION_1) {
 		ip->i_d.di_version = XFS_DINODE_VERSION_2;
 		/*
@@ -1967,7 +1913,7 @@ xfs_iunlink(
 		 * Here we put the head pointer into our next pointer,
 		 * and then we fall through to point the head at us.
 		 */
-		error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0);
+		error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK);
 		if (error)
 			return error;
 
@@ -2075,7 +2021,7 @@ xfs_iunlink_remove(
 		 * of dealing with the buffer when there is no need to
 		 * change it.
 		 */
-		error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0);
+		error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK);
 		if (error) {
 			cmn_err(CE_WARN,
 				"xfs_iunlink_remove: xfs_itobp()  returned an error %d on %s.  Returning error.",
@@ -2137,7 +2083,7 @@ xfs_iunlink_remove(
 		 * Now last_ibp points to the buffer previous to us on
 		 * the unlinked list.  Pull us from the list.
 		 */
-		error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0);
+		error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK);
 		if (error) {
 			cmn_err(CE_WARN,
 				"xfs_iunlink_remove: xfs_itobp()  returned an error %d on %s.  Returning error.",
@@ -2172,13 +2118,6 @@ xfs_iunlink_remove(
 	return 0;
 }
 
-STATIC_INLINE int xfs_inode_clean(xfs_inode_t *ip)
-{
-	return (((ip->i_itemp == NULL) ||
-		!(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) &&
-		(ip->i_update_core == 0));
-}
-
 STATIC void
 xfs_ifree_cluster(
 	xfs_inode_t	*free_ip,
@@ -2400,7 +2339,7 @@ xfs_ifree(
 
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 
-	error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0, 0);
+	error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK);
 	if (error)
 		return error;
 
@@ -2678,14 +2617,31 @@ xfs_imap(
 	fsbno = imap->im_blkno ?
 		XFS_DADDR_TO_FSB(mp, imap->im_blkno) : NULLFSBLOCK;
 	error = xfs_dilocate(mp, tp, ino, &fsbno, &len, &off, flags);
-	if (error != 0) {
+	if (error)
 		return error;
-	}
+
 	imap->im_blkno = XFS_FSB_TO_DADDR(mp, fsbno);
 	imap->im_len = XFS_FSB_TO_BB(mp, len);
 	imap->im_agblkno = XFS_FSB_TO_AGBNO(mp, fsbno);
 	imap->im_ioffset = (ushort)off;
 	imap->im_boffset = (ushort)(off << mp->m_sb.sb_inodelog);
+
+	/*
+	 * If the inode number maps to a block outside the bounds
+	 * of the file system then return NULL rather than calling
+	 * read_buf and panicing when we get an error from the
+	 * driver.
+	 */
+	if ((imap->im_blkno + imap->im_len) >
+	    XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
+		xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
+			"(imap->im_blkno (0x%llx) + imap->im_len (0x%llx)) > "
+			" XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) (0x%llx)",
+			(unsigned long long) imap->im_blkno,
+			(unsigned long long) imap->im_len,
+			XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
+		return EINVAL;
+	}
 	return 0;
 }
 
@@ -2826,38 +2782,41 @@ xfs_iunpin(
 }
 
 /*
- * This is called to wait for the given inode to be unpinned.
- * It will sleep until this happens.  The caller must have the
- * inode locked in at least shared mode so that the buffer cannot
- * be subsequently pinned once someone is waiting for it to be
- * unpinned.
+ * This is called to unpin an inode. It can be directed to wait or to return
+ * immediately without waiting for the inode to be unpinned.  The caller must
+ * have the inode locked in at least shared mode so that the buffer cannot be
+ * subsequently pinned once someone is waiting for it to be unpinned.
  */
 STATIC void
-xfs_iunpin_wait(
-	xfs_inode_t	*ip)
+__xfs_iunpin_wait(
+	xfs_inode_t	*ip,
+	int		wait)
 {
-	xfs_inode_log_item_t	*iip;
-	xfs_lsn_t	lsn;
+	xfs_inode_log_item_t	*iip = ip->i_itemp;
 
 	ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE | MR_ACCESS));
-
-	if (atomic_read(&ip->i_pincount) == 0) {
+	if (atomic_read(&ip->i_pincount) == 0)
 		return;
-	}
 
-	iip = ip->i_itemp;
-	if (iip && iip->ili_last_lsn) {
-		lsn = iip->ili_last_lsn;
-	} else {
-		lsn = (xfs_lsn_t)0;
-	}
+	/* Give the log a push to start the unpinning I/O */
+	xfs_log_force(ip->i_mount, (iip && iip->ili_last_lsn) ?
+				iip->ili_last_lsn : 0, XFS_LOG_FORCE);
+	if (wait)
+		wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0));
+}
 
-	/*
-	 * Give the log a push so we don't wait here too long.
-	 */
-	xfs_log_force(ip->i_mount, lsn, XFS_LOG_FORCE);
+static inline void
+xfs_iunpin_wait(
+	xfs_inode_t	*ip)
+{
+	__xfs_iunpin_wait(ip, 1);
+}
 
-	wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0));
+static inline void
+xfs_iunpin_nowait(
+	xfs_inode_t	*ip)
+{
+	__xfs_iunpin_wait(ip, 0);
 }
 
 
@@ -3027,6 +2986,145 @@ xfs_iflush_fork(
 	return 0;
 }
 
+STATIC int
+xfs_iflush_cluster(
+	xfs_inode_t	*ip,
+	xfs_buf_t	*bp)
+{
+	xfs_mount_t		*mp = ip->i_mount;
+	xfs_perag_t		*pag = xfs_get_perag(mp, ip->i_ino);
+	unsigned long		first_index, mask;
+	int			ilist_size;
+	xfs_inode_t		**ilist;
+	xfs_inode_t		*iq;
+	int			nr_found;
+	int			clcount = 0;
+	int			bufwasdelwri;
+	int			i;
+
+	ASSERT(pag->pagi_inodeok);
+	ASSERT(pag->pag_ici_init);
+
+	ilist_size = XFS_INODE_CLUSTER_SIZE(mp) * sizeof(xfs_inode_t *);
+	ilist = kmem_alloc(ilist_size, KM_MAYFAIL);
+	if (!ilist)
+		return 0;
+
+	mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
+	first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
+	read_lock(&pag->pag_ici_lock);
+	/* really need a gang lookup range call here */
+	nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist,
+					first_index,
+					XFS_INODE_CLUSTER_SIZE(mp));
+	if (nr_found == 0)
+		goto out_free;
+
+	for (i = 0; i < nr_found; i++) {
+		iq = ilist[i];
+		if (iq == ip)
+			continue;
+		/* if the inode lies outside this cluster, we're done. */
+		if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index)
+			break;
+		/*
+		 * Do an un-protected check to see if the inode is dirty and
+		 * is a candidate for flushing.  These checks will be repeated
+		 * later after the appropriate locks are acquired.
+		 */
+		if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0)
+			continue;
+
+		/*
+		 * Try to get locks.  If any are unavailable or it is pinned,
+		 * then this inode cannot be flushed and is skipped.
+		 */
+
+		if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED))
+			continue;
+		if (!xfs_iflock_nowait(iq)) {
+			xfs_iunlock(iq, XFS_ILOCK_SHARED);
+			continue;
+		}
+		if (xfs_ipincount(iq)) {
+			xfs_ifunlock(iq);
+			xfs_iunlock(iq, XFS_ILOCK_SHARED);
+			continue;
+		}
+
+		/*
+		 * arriving here means that this inode can be flushed.  First
+		 * re-check that it's dirty before flushing.
+		 */
+		if (!xfs_inode_clean(iq)) {
+			int	error;
+			error = xfs_iflush_int(iq, bp);
+			if (error) {
+				xfs_iunlock(iq, XFS_ILOCK_SHARED);
+				goto cluster_corrupt_out;
+			}
+			clcount++;
+		} else {
+			xfs_ifunlock(iq);
+		}
+		xfs_iunlock(iq, XFS_ILOCK_SHARED);
+	}
+
+	if (clcount) {
+		XFS_STATS_INC(xs_icluster_flushcnt);
+		XFS_STATS_ADD(xs_icluster_flushinode, clcount);
+	}
+
+out_free:
+	read_unlock(&pag->pag_ici_lock);
+	kmem_free(ilist, ilist_size);
+	return 0;
+
+
+cluster_corrupt_out:
+	/*
+	 * Corruption detected in the clustering loop.  Invalidate the
+	 * inode buffer and shut down the filesystem.
+	 */
+	read_unlock(&pag->pag_ici_lock);
+	/*
+	 * Clean up the buffer.  If it was B_DELWRI, just release it --
+	 * brelse can handle it with no problems.  If not, shut down the
+	 * filesystem before releasing the buffer.
+	 */
+	bufwasdelwri = XFS_BUF_ISDELAYWRITE(bp);
+	if (bufwasdelwri)
+		xfs_buf_relse(bp);
+
+	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+
+	if (!bufwasdelwri) {
+		/*
+		 * Just like incore_relse: if we have b_iodone functions,
+		 * mark the buffer as an error and call them.  Otherwise
+		 * mark it as stale and brelse.
+		 */
+		if (XFS_BUF_IODONE_FUNC(bp)) {
+			XFS_BUF_CLR_BDSTRAT_FUNC(bp);
+			XFS_BUF_UNDONE(bp);
+			XFS_BUF_STALE(bp);
+			XFS_BUF_SHUT(bp);
+			XFS_BUF_ERROR(bp,EIO);
+			xfs_biodone(bp);
+		} else {
+			XFS_BUF_STALE(bp);
+			xfs_buf_relse(bp);
+		}
+	}
+
+	/*
+	 * Unlocks the flush lock
+	 */
+	xfs_iflush_abort(iq);
+	kmem_free(ilist, ilist_size);
+	return XFS_ERROR(EFSCORRUPTED);
+}
+
 /*
  * xfs_iflush() will write a modified inode's changes out to the
  * inode's on disk home.  The caller must have the inode lock held
@@ -3046,11 +3144,7 @@ xfs_iflush(
 	xfs_dinode_t		*dip;
 	xfs_mount_t		*mp;
 	int			error;
-	/* REFERENCED */
-	xfs_inode_t		*iq;
-	int			clcount;	/* count of inodes clustered */
-	int			bufwasdelwri;
-	struct hlist_node	*entry;
+	int			noblock = (flags == XFS_IFLUSH_ASYNC_NOBLOCK);
 	enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) };
 
 	XFS_STATS_INC(xs_iflush_count);
@@ -3067,8 +3161,7 @@ xfs_iflush(
 	 * If the inode isn't dirty, then just release the inode
 	 * flush lock and do nothing.
 	 */
-	if ((ip->i_update_core == 0) &&
-	    ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) {
+	if (xfs_inode_clean(ip)) {
 		ASSERT((iip != NULL) ?
 			 !(iip->ili_item.li_flags & XFS_LI_IN_AIL) : 1);
 		xfs_ifunlock(ip);
@@ -3076,11 +3169,21 @@ xfs_iflush(
 	}
 
 	/*
-	 * We can't flush the inode until it is unpinned, so
-	 * wait for it.  We know noone new can pin it, because
-	 * we are holding the inode lock shared and you need
-	 * to hold it exclusively to pin the inode.
+	 * We can't flush the inode until it is unpinned, so wait for it if we
+	 * are allowed to block.  We know noone new can pin it, because we are
+	 * holding the inode lock shared and you need to hold it exclusively to
+	 * pin the inode.
+	 *
+	 * If we are not allowed to block, force the log out asynchronously so
+	 * that when we come back the inode will be unpinned. If other inodes
+	 * in the same cluster are dirty, they will probably write the inode
+	 * out for us if they occur after the log force completes.
 	 */
+	if (noblock && xfs_ipincount(ip)) {
+		xfs_iunpin_nowait(ip);
+		xfs_ifunlock(ip);
+		return EAGAIN;
+	}
 	xfs_iunpin_wait(ip);
 
 	/*
@@ -3097,15 +3200,6 @@ xfs_iflush(
 	}
 
 	/*
-	 * Get the buffer containing the on-disk inode.
-	 */
-	error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0, 0);
-	if (error) {
-		xfs_ifunlock(ip);
-		return error;
-	}
-
-	/*
 	 * Decide how buffer will be flushed out.  This is done before
 	 * the call to xfs_iflush_int because this field is zeroed by it.
 	 */
@@ -3121,6 +3215,7 @@ xfs_iflush(
 		case XFS_IFLUSH_DELWRI_ELSE_SYNC:
 			flags = 0;
 			break;
+		case XFS_IFLUSH_ASYNC_NOBLOCK:
 		case XFS_IFLUSH_ASYNC:
 		case XFS_IFLUSH_DELWRI_ELSE_ASYNC:
 			flags = INT_ASYNC;
@@ -3140,6 +3235,7 @@ xfs_iflush(
 		case XFS_IFLUSH_DELWRI:
 			flags = INT_DELWRI;
 			break;
+		case XFS_IFLUSH_ASYNC_NOBLOCK:
 		case XFS_IFLUSH_ASYNC:
 			flags = INT_ASYNC;
 			break;
@@ -3154,89 +3250,36 @@ xfs_iflush(
 	}
 
 	/*
-	 * First flush out the inode that xfs_iflush was called with.
+	 * Get the buffer containing the on-disk inode.
 	 */
-	error = xfs_iflush_int(ip, bp);
-	if (error) {
-		goto corrupt_out;
+	error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0, 0,
+				noblock ? XFS_BUF_TRYLOCK : XFS_BUF_LOCK);
+	if (error || !bp) {
+		xfs_ifunlock(ip);
+		return error;
 	}
 
 	/*
-	 * inode clustering:
-	 * see if other inodes can be gathered into this write
+	 * First flush out the inode that xfs_iflush was called with.
 	 */
-	spin_lock(&ip->i_cluster->icl_lock);
-	ip->i_cluster->icl_buf = bp;
-
-	clcount = 0;
-	hlist_for_each_entry(iq, entry, &ip->i_cluster->icl_inodes, i_cnode) {
-		if (iq == ip)
-			continue;
-
-		/*
-		 * Do an un-protected check to see if the inode is dirty and
-		 * is a candidate for flushing.  These checks will be repeated
-		 * later after the appropriate locks are acquired.
-		 */
-		iip = iq->i_itemp;
-		if ((iq->i_update_core == 0) &&
-		    ((iip == NULL) ||
-		     !(iip->ili_format.ilf_fields & XFS_ILOG_ALL)) &&
-		      xfs_ipincount(iq) == 0) {
-			continue;
-		}
-
-		/*
-		 * Try to get locks.  If any are unavailable,
-		 * then this inode cannot be flushed and is skipped.
-		 */
-
-		/* get inode locks (just i_lock) */
-		if (xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) {
-			/* get inode flush lock */
-			if (xfs_iflock_nowait(iq)) {
-				/* check if pinned */
-				if (xfs_ipincount(iq) == 0) {
-					/* arriving here means that
-					 * this inode can be flushed.
-					 * first re-check that it's
-					 * dirty
-					 */
-					iip = iq->i_itemp;
-					if ((iq->i_update_core != 0)||
-					    ((iip != NULL) &&
-					     (iip->ili_format.ilf_fields & XFS_ILOG_ALL))) {
-						clcount++;
-						error = xfs_iflush_int(iq, bp);
-						if (error) {
-							xfs_iunlock(iq,
-								    XFS_ILOCK_SHARED);
-							goto cluster_corrupt_out;
-						}
-					} else {
-						xfs_ifunlock(iq);
-					}
-				} else {
-					xfs_ifunlock(iq);
-				}
-			}
-			xfs_iunlock(iq, XFS_ILOCK_SHARED);
-		}
-	}
-	spin_unlock(&ip->i_cluster->icl_lock);
-
-	if (clcount) {
-		XFS_STATS_INC(xs_icluster_flushcnt);
-		XFS_STATS_ADD(xs_icluster_flushinode, clcount);
-	}
+	error = xfs_iflush_int(ip, bp);
+	if (error)
+		goto corrupt_out;
 
 	/*
-	 * If the buffer is pinned then push on the log so we won't
+	 * If the buffer is pinned then push on the log now so we won't
 	 * get stuck waiting in the write for too long.
 	 */
-	if (XFS_BUF_ISPINNED(bp)){
+	if (XFS_BUF_ISPINNED(bp))
 		xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
-	}
+
+	/*
+	 * inode clustering:
+	 * see if other inodes can be gathered into this write
+	 */
+	error = xfs_iflush_cluster(ip, bp);
+	if (error)
+		goto cluster_corrupt_out;
 
 	if (flags & INT_DELWRI) {
 		xfs_bdwrite(mp, bp);
@@ -3250,52 +3293,11 @@ xfs_iflush(
 corrupt_out:
 	xfs_buf_relse(bp);
 	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
-	xfs_iflush_abort(ip);
-	/*
-	 * Unlocks the flush lock
-	 */
-	return XFS_ERROR(EFSCORRUPTED);
-
 cluster_corrupt_out:
-	/* Corruption detected in the clustering loop.  Invalidate the
-	 * inode buffer and shut down the filesystem.
-	 */
-	spin_unlock(&ip->i_cluster->icl_lock);
-
-	/*
-	 * Clean up the buffer.  If it was B_DELWRI, just release it --
-	 * brelse can handle it with no problems.  If not, shut down the
-	 * filesystem before releasing the buffer.
-	 */
-	if ((bufwasdelwri= XFS_BUF_ISDELAYWRITE(bp))) {
-		xfs_buf_relse(bp);
-	}
-
-	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
-
-	if(!bufwasdelwri)  {
-		/*
-		 * Just like incore_relse: if we have b_iodone functions,
-		 * mark the buffer as an error and call them.  Otherwise
-		 * mark it as stale and brelse.
-		 */
-		if (XFS_BUF_IODONE_FUNC(bp)) {
-			XFS_BUF_CLR_BDSTRAT_FUNC(bp);
-			XFS_BUF_UNDONE(bp);
-			XFS_BUF_STALE(bp);
-			XFS_BUF_SHUT(bp);
-			XFS_BUF_ERROR(bp,EIO);
-			xfs_biodone(bp);
-		} else {
-			XFS_BUF_STALE(bp);
-			xfs_buf_relse(bp);
-		}
-	}
-
-	xfs_iflush_abort(iq);
 	/*
 	 * Unlocks the flush lock
 	 */
+	xfs_iflush_abort(ip);
 	return XFS_ERROR(EFSCORRUPTED);
 }
 
@@ -3325,8 +3327,7 @@ xfs_iflush_int(
 	 * If the inode isn't dirty, then just release the inode
 	 * flush lock and do nothing.
 	 */
-	if ((ip->i_update_core == 0) &&
-	    ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) {
+	if (xfs_inode_clean(ip)) {
 		xfs_ifunlock(ip);
 		return 0;
 	}
@@ -3434,9 +3435,9 @@ xfs_iflush_int(
 	 * has been updated, then make the conversion permanent.
 	 */
 	ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1 ||
-	       XFS_SB_VERSION_HASNLINK(&mp->m_sb));
+	       xfs_sb_version_hasnlink(&mp->m_sb));
 	if (ip->i_d.di_version == XFS_DINODE_VERSION_1) {
-		if (!XFS_SB_VERSION_HASNLINK(&mp->m_sb)) {
+		if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
 			/*
 			 * Convert it back.
 			 */
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index bfcd72cbaeea..93c37697a72c 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -133,19 +133,6 @@ typedef struct dm_attrs_s {
 } dm_attrs_t;
 
 /*
- * This is the xfs inode cluster structure.  This structure is used by
- * xfs_iflush to find inodes that share a cluster and can be flushed to disk at
- * the same time.
- */
-typedef struct xfs_icluster {
-	struct hlist_head	icl_inodes;	/* list of inodes on cluster */
-	xfs_daddr_t		icl_blkno;	/* starting block number of
-						 * the cluster */
-	struct xfs_buf		*icl_buf;	/* the inode buffer */
-	spinlock_t		icl_lock;	/* inode list lock */
-} xfs_icluster_t;
-
-/*
  * This is the xfs in-core inode structure.
  * Most of the on-disk inode is embedded in the i_d field.
  *
@@ -240,10 +227,6 @@ typedef struct xfs_inode {
 	atomic_t		i_pincount;	/* inode pin count */
 	wait_queue_head_t	i_ipin_wait;	/* inode pinning wait queue */
 	spinlock_t		i_flags_lock;	/* inode i_flags lock */
-#ifdef HAVE_REFCACHE
-	struct xfs_inode	**i_refcache;	/* ptr to entry in ref cache */
-	struct xfs_inode	*i_release;	/* inode to unref */
-#endif
 	/* Miscellaneous state. */
 	unsigned short		i_flags;	/* see defined flags below */
 	unsigned char		i_update_core;	/* timestamps/size is dirty */
@@ -252,8 +235,6 @@ typedef struct xfs_inode {
 	unsigned int		i_delayed_blks;	/* count of delay alloc blks */
 
 	xfs_icdinode_t		i_d;		/* most of ondisk inode */
-	xfs_icluster_t		*i_cluster;	/* cluster list header */
-	struct hlist_node	i_cnode;	/* cluster link node */
 
 	xfs_fsize_t		i_size;		/* in-memory size */
 	xfs_fsize_t		i_new_size;	/* size when write completes */
@@ -461,6 +442,7 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
 #define	XFS_IFLUSH_SYNC			3
 #define	XFS_IFLUSH_ASYNC		4
 #define	XFS_IFLUSH_DELWRI		5
+#define	XFS_IFLUSH_ASYNC_NOBLOCK	6
 
 /*
  * Flags for xfs_itruncate_start().
@@ -515,7 +497,7 @@ int		xfs_finish_reclaim_all(struct xfs_mount *, int);
  */
 int		xfs_itobp(struct xfs_mount *, struct xfs_trans *,
 			  xfs_inode_t *, struct xfs_dinode **, struct xfs_buf **,
-			  xfs_daddr_t, uint);
+			  xfs_daddr_t, uint, uint);
 int		xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
 			  xfs_inode_t **, xfs_daddr_t, uint);
 int		xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int);
@@ -597,7 +579,6 @@ void		xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
 #define	xfs_inobp_check(mp, bp)
 #endif /* DEBUG */
 
-extern struct kmem_zone	*xfs_icluster_zone;
 extern struct kmem_zone	*xfs_ifork_zone;
 extern struct kmem_zone	*xfs_inode_zone;
 extern struct kmem_zone	*xfs_ili_zone;
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 034ca7202295..2c775b4ae9e6 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -296,9 +296,9 @@ xfs_inode_item_format(
 	 */
 	mp = ip->i_mount;
 	ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1 ||
-	       XFS_SB_VERSION_HASNLINK(&mp->m_sb));
+	       xfs_sb_version_hasnlink(&mp->m_sb));
 	if (ip->i_d.di_version == XFS_DINODE_VERSION_1) {
-		if (!XFS_SB_VERSION_HASNLINK(&mp->m_sb)) {
+		if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
 			/*
 			 * Convert it back.
 			 */
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index bfe92ea17952..40513077ab36 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -168,6 +168,14 @@ static inline int xfs_ilog_fext(int w)
 	return (w == XFS_DATA_FORK ? XFS_ILOG_DEXT : XFS_ILOG_AEXT);
 }
 
+static inline int xfs_inode_clean(xfs_inode_t *ip)
+{
+	return (!ip->i_itemp ||
+		!(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) &&
+	       !ip->i_update_core;
+}
+
+
 #ifdef __KERNEL__
 
 extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *);
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 658aab6b1bbf..45d8776408ef 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -45,7 +45,7 @@ xfs_internal_inum(
 	xfs_ino_t	ino)
 {
 	return (ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino ||
-		(XFS_SB_VERSION_HASQUOTA(&mp->m_sb) &&
+		(xfs_sb_version_hasquota(&mp->m_sb) &&
 		 (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino)));
 }
 
@@ -614,7 +614,8 @@ xfs_bulkstat(
 							xfs_buf_relse(bp);
 						error = xfs_itobp(mp, NULL, ip,
 								&dip, &bp, bno,
-								XFS_IMAP_BULKSTAT);
+								XFS_IMAP_BULKSTAT,
+								XFS_BUF_LOCK);
 						if (!error)
 							clustidx = ip->i_boffset / mp->m_sb.sb_inodesize;
 						kmem_zone_free(xfs_inode_zone, ip);
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index a75edca1860f..1fa980933895 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -675,7 +675,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
 
 		spin_lock(&log->l_icloglock);
 		iclog = log->l_iclog;
-		iclog->ic_refcnt++;
+		atomic_inc(&iclog->ic_refcnt);
 		spin_unlock(&log->l_icloglock);
 		xlog_state_want_sync(log, iclog);
 		(void) xlog_state_release_iclog(log, iclog);
@@ -713,7 +713,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
 		 */
 		spin_lock(&log->l_icloglock);
 		iclog = log->l_iclog;
-		iclog->ic_refcnt++;
+		atomic_inc(&iclog->ic_refcnt);
 		spin_unlock(&log->l_icloglock);
 
 		xlog_state_want_sync(log, iclog);
@@ -1090,7 +1090,7 @@ xlog_get_iclog_buffer_size(xfs_mount_t	*mp,
 			size >>= 1;
 		}
 
-		if (XFS_SB_VERSION_HASLOGV2(&mp->m_sb)) {
+		if (xfs_sb_version_haslogv2(&mp->m_sb)) {
 			/* # headers = size / 32K
 			 * one header holds cycles from 32K of data
 			 */
@@ -1186,13 +1186,13 @@ xlog_alloc_log(xfs_mount_t	*mp,
 	log->l_grant_reserve_cycle = 1;
 	log->l_grant_write_cycle = 1;
 
-	if (XFS_SB_VERSION_HASSECTOR(&mp->m_sb)) {
+	if (xfs_sb_version_hassector(&mp->m_sb)) {
 		log->l_sectbb_log = mp->m_sb.sb_logsectlog - BBSHIFT;
 		ASSERT(log->l_sectbb_log <= mp->m_sectbb_log);
 		/* for larger sector sizes, must have v2 or external log */
 		ASSERT(log->l_sectbb_log == 0 ||
 			log->l_logBBstart == 0 ||
-			XFS_SB_VERSION_HASLOGV2(&mp->m_sb));
+			xfs_sb_version_haslogv2(&mp->m_sb));
 		ASSERT(mp->m_sb.sb_logsectlog >= BBSHIFT);
 	}
 	log->l_sectbb_mask = (1 << log->l_sectbb_log) - 1;
@@ -1247,7 +1247,7 @@ xlog_alloc_log(xfs_mount_t	*mp,
 		memset(head, 0, sizeof(xlog_rec_header_t));
 		head->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM);
 		head->h_version = cpu_to_be32(
-			XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) ? 2 : 1);
+			xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? 2 : 1);
 		head->h_size = cpu_to_be32(log->l_iclog_size);
 		/* new fields */
 		head->h_fmt = cpu_to_be32(XLOG_FMT);
@@ -1402,10 +1402,10 @@ xlog_sync(xlog_t		*log,
 	int		roundoff;       /* roundoff to BB or stripe */
 	int		split = 0;	/* split write into two regions */
 	int		error;
-	int		v2 = XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb);
+	int		v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb);
 
 	XFS_STATS_INC(xs_log_writes);
-	ASSERT(iclog->ic_refcnt == 0);
+	ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
 
 	/* Add for LR header */
 	count_init = log->l_iclog_hsize + iclog->ic_offset;
@@ -2309,7 +2309,7 @@ xlog_state_done_syncing(
 
 	ASSERT(iclog->ic_state == XLOG_STATE_SYNCING ||
 	       iclog->ic_state == XLOG_STATE_IOERROR);
-	ASSERT(iclog->ic_refcnt == 0);
+	ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
 	ASSERT(iclog->ic_bwritecnt == 1 || iclog->ic_bwritecnt == 2);
 
 
@@ -2391,7 +2391,7 @@ restart:
 	ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
 	head = &iclog->ic_header;
 
-	iclog->ic_refcnt++;			/* prevents sync */
+	atomic_inc(&iclog->ic_refcnt);	/* prevents sync */
 	log_offset = iclog->ic_offset;
 
 	/* On the 1st write to an iclog, figure out lsn.  This works
@@ -2423,12 +2423,12 @@ restart:
 		xlog_state_switch_iclogs(log, iclog, iclog->ic_size);
 
 		/* If I'm the only one writing to this iclog, sync it to disk */
-		if (iclog->ic_refcnt == 1) {
+		if (atomic_read(&iclog->ic_refcnt) == 1) {
 			spin_unlock(&log->l_icloglock);
 			if ((error = xlog_state_release_iclog(log, iclog)))
 				return error;
 		} else {
-			iclog->ic_refcnt--;
+			atomic_dec(&iclog->ic_refcnt);
 			spin_unlock(&log->l_icloglock);
 		}
 		goto restart;
@@ -2813,33 +2813,35 @@ xlog_state_put_ticket(xlog_t	    *log,
  *
  */
 STATIC int
-xlog_state_release_iclog(xlog_t		*log,
-			 xlog_in_core_t	*iclog)
+xlog_state_release_iclog(
+	xlog_t		*log,
+	xlog_in_core_t	*iclog)
 {
 	int		sync = 0;	/* do we sync? */
 
-	xlog_assign_tail_lsn(log->l_mp);
+	if (iclog->ic_state & XLOG_STATE_IOERROR)
+		return XFS_ERROR(EIO);
 
-	spin_lock(&log->l_icloglock);
+	ASSERT(atomic_read(&iclog->ic_refcnt) > 0);
+	if (!atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock))
+		return 0;
 
 	if (iclog->ic_state & XLOG_STATE_IOERROR) {
 		spin_unlock(&log->l_icloglock);
 		return XFS_ERROR(EIO);
 	}
-
-	ASSERT(iclog->ic_refcnt > 0);
 	ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE ||
 	       iclog->ic_state == XLOG_STATE_WANT_SYNC);
 
-	if (--iclog->ic_refcnt == 0 &&
-	    iclog->ic_state == XLOG_STATE_WANT_SYNC) {
+	if (iclog->ic_state == XLOG_STATE_WANT_SYNC) {
+		/* update tail before writing to iclog */
+		xlog_assign_tail_lsn(log->l_mp);
 		sync++;
 		iclog->ic_state = XLOG_STATE_SYNCING;
 		iclog->ic_header.h_tail_lsn = cpu_to_be64(log->l_tail_lsn);
 		xlog_verify_tail_lsn(log, iclog, log->l_tail_lsn);
 		/* cycle incremented when incrementing curr_block */
 	}
-
 	spin_unlock(&log->l_icloglock);
 
 	/*
@@ -2849,11 +2851,9 @@ xlog_state_release_iclog(xlog_t		*log,
 	 * this iclog has consistent data, so we ignore IOERROR
 	 * flags after this point.
 	 */
-	if (sync) {
+	if (sync)
 		return xlog_sync(log, iclog);
-	}
 	return 0;
-
 }	/* xlog_state_release_iclog */
 
 
@@ -2881,7 +2881,7 @@ xlog_state_switch_iclogs(xlog_t		*log,
 	log->l_curr_block += BTOBB(eventual_size)+BTOBB(log->l_iclog_hsize);
 
 	/* Round up to next log-sunit */
-	if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) &&
+	if (xfs_sb_version_haslogv2(&log->l_mp->m_sb) &&
 	    log->l_mp->m_sb.sb_logsunit > 1) {
 		__uint32_t sunit_bb = BTOBB(log->l_mp->m_sb.sb_logsunit);
 		log->l_curr_block = roundup(log->l_curr_block, sunit_bb);
@@ -2953,7 +2953,8 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed)
 		 * previous iclog and go to sleep.
 		 */
 		if (iclog->ic_state == XLOG_STATE_DIRTY ||
-		    (iclog->ic_refcnt == 0 && iclog->ic_offset == 0)) {
+		    (atomic_read(&iclog->ic_refcnt) == 0
+		     && iclog->ic_offset == 0)) {
 			iclog = iclog->ic_prev;
 			if (iclog->ic_state == XLOG_STATE_ACTIVE ||
 			    iclog->ic_state == XLOG_STATE_DIRTY)
@@ -2961,14 +2962,14 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed)
 			else
 				goto maybe_sleep;
 		} else {
-			if (iclog->ic_refcnt == 0) {
+			if (atomic_read(&iclog->ic_refcnt) == 0) {
 				/* We are the only one with access to this
 				 * iclog.  Flush it out now.  There should
 				 * be a roundoff of zero to show that someone
 				 * has already taken care of the roundoff from
 				 * the previous sync.
 				 */
-				iclog->ic_refcnt++;
+				atomic_inc(&iclog->ic_refcnt);
 				lsn = be64_to_cpu(iclog->ic_header.h_lsn);
 				xlog_state_switch_iclogs(log, iclog, 0);
 				spin_unlock(&log->l_icloglock);
@@ -3100,7 +3101,7 @@ try_again:
 			already_slept = 1;
 			goto try_again;
 		} else {
-			iclog->ic_refcnt++;
+			atomic_inc(&iclog->ic_refcnt);
 			xlog_state_switch_iclogs(log, iclog, 0);
 			spin_unlock(&log->l_icloglock);
 			if (xlog_state_release_iclog(log, iclog))
@@ -3334,7 +3335,7 @@ xlog_ticket_get(xlog_t		*log,
 	unit_bytes += sizeof(xlog_op_header_t) * num_headers;
 
 	/* for roundoff padding for transaction data and one for commit record */
-	if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) &&
+	if (xfs_sb_version_haslogv2(&log->l_mp->m_sb) &&
 	    log->l_mp->m_sb.sb_logsunit > 1) {
 		/* log su roundoff */
 		unit_bytes += 2*log->l_mp->m_sb.sb_logsunit;
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index e008233ee249..01c63db25a1d 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -49,10 +49,10 @@ struct xfs_mount;
 #define XLOG_HEADER_SIZE	512
 
 #define XLOG_REC_SHIFT(log) \
-	BTOBB(1 << (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) ? \
+	BTOBB(1 << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \
 	 XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT))
 #define XLOG_TOTAL_REC_SHIFT(log) \
-	BTOBB(XLOG_MAX_ICLOGS << (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) ? \
+	BTOBB(XLOG_MAX_ICLOGS << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \
 	 XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT))
 
 
@@ -339,7 +339,7 @@ typedef struct xlog_iclog_fields {
 #endif
 	int			ic_size;
 	int			ic_offset;
-	int			ic_refcnt;
+	atomic_t		ic_refcnt;
 	int			ic_bwritecnt;
 	ushort_t		ic_state;
 	char			*ic_datap;	/* pointer to iclog data */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index b82d5d4d2462..cd24711ae276 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -478,7 +478,7 @@ xlog_find_verify_log_record(
 	 * reset last_blk.  Only when last_blk points in the middle of a log
 	 * record do we update last_blk.
 	 */
-	if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
+	if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
 		uint	h_size = be32_to_cpu(head->h_size);
 
 		xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE;
@@ -888,7 +888,7 @@ xlog_find_tail(
 	 * unmount record if there is one, so we pass the lsn of the
 	 * unmount record rather than the block after it.
 	 */
-	if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
+	if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
 		int	h_size = be32_to_cpu(rhead->h_size);
 		int	h_version = be32_to_cpu(rhead->h_version);
 
@@ -1101,7 +1101,7 @@ xlog_add_record(
 	recp->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM);
 	recp->h_cycle = cpu_to_be32(cycle);
 	recp->h_version = cpu_to_be32(
-			XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) ? 2 : 1);
+			xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? 2 : 1);
 	recp->h_lsn = cpu_to_be64(xlog_assign_lsn(cycle, block));
 	recp->h_tail_lsn = cpu_to_be64(xlog_assign_lsn(tail_cycle, tail_block));
 	recp->h_fmt = cpu_to_be32(XLOG_FMT);
@@ -3214,7 +3214,8 @@ xlog_recover_process_iunlinks(
 					 * next inode in the bucket.
 					 */
 					error = xfs_itobp(mp, NULL, ip, &dip,
-							&ibp, 0, 0);
+							&ibp, 0, 0,
+							XFS_BUF_LOCK);
 					ASSERT(error || (dip != NULL));
 				}
 
@@ -3348,7 +3349,7 @@ xlog_pack_data(
 		dp += BBSIZE;
 	}
 
-	if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
+	if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
 		xhdr = (xlog_in_core_2_t *)&iclog->ic_header;
 		for ( ; i < BTOBB(size); i++) {
 			j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
@@ -3388,7 +3389,7 @@ xlog_unpack_data_checksum(
 			    be32_to_cpu(rhead->h_chksum), chksum);
 		    cmn_err(CE_DEBUG,
 "XFS: Disregard message if filesystem was created with non-DEBUG kernel");
-		    if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
+		    if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
 			    cmn_err(CE_DEBUG,
 				"XFS: LogR this is a LogV2 filesystem\n");
 		    }
@@ -3415,7 +3416,7 @@ xlog_unpack_data(
 		dp += BBSIZE;
 	}
 
-	if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
+	if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
 		xhdr = (xlog_in_core_2_t *)rhead;
 		for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) {
 			j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
@@ -3494,7 +3495,7 @@ xlog_do_recovery_pass(
 	 * Read the header of the tail block and get the iclog buffer size from
 	 * h_size.  Use this to tell how many sectors make up the log header.
 	 */
-	if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
+	if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
 		/*
 		 * When using variable length iclogs, read first sector of
 		 * iclog header and extract the header size from it.  Get a
@@ -3838,7 +3839,7 @@ xlog_do_recover(
 	sbp = &log->l_mp->m_sb;
 	xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp));
 	ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC);
-	ASSERT(XFS_SB_GOOD_VERSION(sbp));
+	ASSERT(xfs_sb_good_version(sbp));
 	xfs_buf_relse(bp);
 
 	/* We've re-read the superblock so re-initialize per-cpu counters */
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 6409b3762995..5be0328bbfbb 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -44,7 +44,7 @@
 #include "xfs_quota.h"
 #include "xfs_fsops.h"
 
-STATIC void	xfs_mount_log_sbunit(xfs_mount_t *, __int64_t);
+STATIC void	xfs_mount_log_sb(xfs_mount_t *, __int64_t);
 STATIC int	xfs_uuid_mount(xfs_mount_t *);
 STATIC void	xfs_uuid_unmount(xfs_mount_t *mp);
 STATIC void	xfs_unmountfs_wait(xfs_mount_t *);
@@ -119,6 +119,7 @@ static const struct {
     { offsetof(xfs_sb_t, sb_logsectsize),0 },
     { offsetof(xfs_sb_t, sb_logsunit),	 0 },
     { offsetof(xfs_sb_t, sb_features2),	 0 },
+    { offsetof(xfs_sb_t, sb_bad_features2), 0 },
     { sizeof(xfs_sb_t),			 0 }
 };
 
@@ -225,7 +226,7 @@ xfs_mount_validate_sb(
 		return XFS_ERROR(EWRONGFS);
 	}
 
-	if (!XFS_SB_GOOD_VERSION(sbp)) {
+	if (!xfs_sb_good_version(sbp)) {
 		xfs_fs_mount_cmn_err(flags, "bad version");
 		return XFS_ERROR(EWRONGFS);
 	}
@@ -300,7 +301,7 @@ xfs_mount_validate_sb(
 	/*
 	 * Version 1 directory format has never worked on Linux.
 	 */
-	if (unlikely(!XFS_SB_VERSION_HASDIRV2(sbp))) {
+	if (unlikely(!xfs_sb_version_hasdirv2(sbp))) {
 		xfs_fs_mount_cmn_err(flags,
 			"file system using version 1 directory format");
 		return XFS_ERROR(ENOSYS);
@@ -449,6 +450,7 @@ xfs_sb_from_disk(
 	to->sb_logsectsize = be16_to_cpu(from->sb_logsectsize);
 	to->sb_logsunit = be32_to_cpu(from->sb_logsunit);
 	to->sb_features2 = be32_to_cpu(from->sb_features2);
+	to->sb_bad_features2 = be32_to_cpu(from->sb_bad_features2);
 }
 
 /*
@@ -781,7 +783,7 @@ xfs_update_alignment(xfs_mount_t *mp, int mfsi_flags, __uint64_t *update_flags)
 		 * Update superblock with new values
 		 * and log changes
 		 */
-		if (XFS_SB_VERSION_HASDALIGN(sbp)) {
+		if (xfs_sb_version_hasdalign(sbp)) {
 			if (sbp->sb_unit != mp->m_dalign) {
 				sbp->sb_unit = mp->m_dalign;
 				*update_flags |= XFS_SB_UNIT;
@@ -792,7 +794,7 @@ xfs_update_alignment(xfs_mount_t *mp, int mfsi_flags, __uint64_t *update_flags)
 			}
 		}
 	} else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN &&
-		    XFS_SB_VERSION_HASDALIGN(&mp->m_sb)) {
+		    xfs_sb_version_hasdalign(&mp->m_sb)) {
 			mp->m_dalign = sbp->sb_unit;
 			mp->m_swidth = sbp->sb_width;
 	}
@@ -869,7 +871,7 @@ xfs_set_rw_sizes(xfs_mount_t *mp)
 STATIC void
 xfs_set_inoalignment(xfs_mount_t *mp)
 {
-	if (XFS_SB_VERSION_HASALIGN(&mp->m_sb) &&
+	if (xfs_sb_version_hasalign(&mp->m_sb) &&
 	    mp->m_sb.sb_inoalignmt >=
 	    XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size))
 		mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1;
@@ -962,14 +964,29 @@ xfs_mountfs(
 	int		uuid_mounted = 0;
 	int		error = 0;
 
-	if (mp->m_sb_bp == NULL) {
-		error = xfs_readsb(mp, mfsi_flags);
-		if (error)
-			return error;
-	}
 	xfs_mount_common(mp, sbp);
 
 	/*
+	 * Check for a bad features2 field alignment. This happened on
+	 * some platforms due to xfs_sb_t not being 64bit size aligned
+	 * when sb_features was added and hence the compiler put it in
+	 * the wrong place.
+	 *
+	 * If we detect a bad field, we or the set bits into the existing
+	 * features2 field in case it has already been modified and we
+	 * don't want to lose any features. Zero the bad one and mark
+	 * the two fields as needing updates once the transaction subsystem
+	 * is online.
+	 */
+	if (xfs_sb_has_bad_features2(sbp)) {
+		cmn_err(CE_WARN,
+			"XFS: correcting sb_features alignment problem");
+		sbp->sb_features2 |= sbp->sb_bad_features2;
+		sbp->sb_bad_features2 = 0;
+		update_flags |= XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2;
+	}
+
+	/*
 	 * Check if sb_agblocks is aligned at stripe boundary
 	 * If sb_agblocks is NOT aligned turn off m_dalign since
 	 * allocator alignment is within an ag, therefore ag has
@@ -1159,11 +1176,10 @@ xfs_mountfs(
 	}
 
 	/*
-	 * If fs is not mounted readonly, then update the superblock
-	 * unit and width changes.
+	 * If fs is not mounted readonly, then update the superblock changes.
 	 */
 	if (update_flags && !(mp->m_flags & XFS_MOUNT_RDONLY))
-		xfs_mount_log_sbunit(mp, update_flags);
+		xfs_mount_log_sb(mp, update_flags);
 
 	/*
 	 * Initialise the XFS quota management subsystem for this mount
@@ -1878,13 +1894,14 @@ xfs_uuid_unmount(
  * be altered by the mount options. Only the first superblock is updated.
  */
 STATIC void
-xfs_mount_log_sbunit(
+xfs_mount_log_sb(
 	xfs_mount_t	*mp,
 	__int64_t	fields)
 {
 	xfs_trans_t	*tp;
 
-	ASSERT(fields & (XFS_SB_UNIT|XFS_SB_WIDTH|XFS_SB_UUID));
+	ASSERT(fields & (XFS_SB_UNIT | XFS_SB_WIDTH | XFS_SB_UUID |
+			 XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2));
 
 	tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT);
 	if (xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index f7c620ec6e69..7b37fa009297 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -66,17 +66,17 @@ struct xfs_mru_cache;
  * Prototypes and functions for the Data Migration subsystem.
  */
 
-typedef int	(*xfs_send_data_t)(int, bhv_vnode_t *,
-			xfs_off_t, size_t, int, bhv_vrwlock_t *);
+typedef int	(*xfs_send_data_t)(int, struct xfs_inode *,
+			xfs_off_t, size_t, int, int *);
 typedef int	(*xfs_send_mmap_t)(struct vm_area_struct *, uint);
-typedef int	(*xfs_send_destroy_t)(bhv_vnode_t *, dm_right_t);
+typedef int	(*xfs_send_destroy_t)(struct xfs_inode *, dm_right_t);
 typedef int	(*xfs_send_namesp_t)(dm_eventtype_t, struct xfs_mount *,
-			bhv_vnode_t *,
-			dm_right_t, bhv_vnode_t *, dm_right_t,
+			struct xfs_inode *, dm_right_t,
+			struct xfs_inode *, dm_right_t,
 			char *, char *, mode_t, int, int);
 typedef int	(*xfs_send_mount_t)(struct xfs_mount *, dm_right_t,
 			char *, char *);
-typedef void	(*xfs_send_unmount_t)(struct xfs_mount *, bhv_vnode_t *,
+typedef void	(*xfs_send_unmount_t)(struct xfs_mount *, struct xfs_inode *,
 			dm_right_t, mode_t, int, int);
 
 typedef struct xfs_dmops {
@@ -88,20 +88,20 @@ typedef struct xfs_dmops {
 	xfs_send_unmount_t	xfs_send_unmount;
 } xfs_dmops_t;
 
-#define XFS_SEND_DATA(mp, ev,vp,off,len,fl,lock) \
-	(*(mp)->m_dm_ops->xfs_send_data)(ev,vp,off,len,fl,lock)
+#define XFS_SEND_DATA(mp, ev,ip,off,len,fl,lock) \
+	(*(mp)->m_dm_ops->xfs_send_data)(ev,ip,off,len,fl,lock)
 #define XFS_SEND_MMAP(mp, vma,fl) \
 	(*(mp)->m_dm_ops->xfs_send_mmap)(vma,fl)
-#define XFS_SEND_DESTROY(mp, vp,right) \
-	(*(mp)->m_dm_ops->xfs_send_destroy)(vp,right)
+#define XFS_SEND_DESTROY(mp, ip,right) \
+	(*(mp)->m_dm_ops->xfs_send_destroy)(ip,right)
 #define XFS_SEND_NAMESP(mp, ev,b1,r1,b2,r2,n1,n2,mode,rval,fl) \
 	(*(mp)->m_dm_ops->xfs_send_namesp)(ev,NULL,b1,r1,b2,r2,n1,n2,mode,rval,fl)
 #define XFS_SEND_PREUNMOUNT(mp,b1,r1,b2,r2,n1,n2,mode,rval,fl) \
 	(*(mp)->m_dm_ops->xfs_send_namesp)(DM_EVENT_PREUNMOUNT,mp,b1,r1,b2,r2,n1,n2,mode,rval,fl)
 #define XFS_SEND_MOUNT(mp,right,path,name) \
 	(*(mp)->m_dm_ops->xfs_send_mount)(mp,right,path,name)
-#define XFS_SEND_UNMOUNT(mp, vp,right,mode,rval,fl) \
-	(*(mp)->m_dm_ops->xfs_send_unmount)(mp,vp,right,mode,rval,fl)
+#define XFS_SEND_UNMOUNT(mp, ip,right,mode,rval,fl) \
+	(*(mp)->m_dm_ops->xfs_send_unmount)(mp,ip,right,mode,rval,fl)
 
 
 /*
@@ -366,7 +366,7 @@ typedef struct xfs_mount {
 #define XFS_MOUNT_SMALL_INUMS	(1ULL << 15)	/* users wants 32bit inodes */
 #define XFS_MOUNT_NOUUID	(1ULL << 16)	/* ignore uuid during mount */
 #define XFS_MOUNT_BARRIER	(1ULL << 17)
-#define XFS_MOUNT_IDELETE	(1ULL << 18)	/* delete empty inode clusters*/
+#define XFS_MOUNT_IKEEP		(1ULL << 18)	/* keep empty inode clusters*/
 #define XFS_MOUNT_SWALLOC	(1ULL << 19)	/* turn on stripe width
 						 * allocation */
 #define XFS_MOUNT_RDONLY	(1ULL << 20)	/* read-only fs */
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index 7eb157a59f9e..c4d0bac56a5a 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -36,7 +36,6 @@
 #include "xfs_bmap.h"
 #include "xfs_error.h"
 #include "xfs_quota.h"
-#include "xfs_refcache.h"
 #include "xfs_utils.h"
 #include "xfs_trans_space.h"
 #include "xfs_vnodeops.h"
@@ -94,7 +93,8 @@ xfs_lock_for_rename(
 	xfs_inode_t	**i_tab,/* array of inode returned, sorted */
 	int		*num_inodes)  /* number of inodes in array */
 {
-	xfs_inode_t		*ip1, *ip2, *temp;
+	xfs_inode_t		*ip1 = VNAME_TO_INODE(vname1);
+	xfs_inode_t		*ip2, *temp;
 	xfs_ino_t		inum1, inum2;
 	int			error;
 	int			i, j;
@@ -110,16 +110,11 @@ xfs_lock_for_rename(
 	 * to see if we still have the right inodes, directories, etc.
 	 */
 	lock_mode = xfs_ilock_map_shared(dp1);
-	error = xfs_get_dir_entry(vname1, &ip1);
-	if (error) {
-		xfs_iunlock_map_shared(dp1, lock_mode);
-		return error;
-	}
+	IHOLD(ip1);
+	xfs_itrace_ref(ip1);
 
 	inum1 = ip1->i_ino;
 
-	ASSERT(ip1);
-	xfs_itrace_ref(ip1);
 
 	/*
 	 * Unlock dp1 and lock dp2 if they are different.
@@ -224,12 +219,11 @@ int
 xfs_rename(
 	xfs_inode_t	*src_dp,
 	bhv_vname_t	*src_vname,
-	bhv_vnode_t	*target_dir_vp,
+	xfs_inode_t	*target_dp,
 	bhv_vname_t	*target_vname)
 {
-	bhv_vnode_t	*src_dir_vp = XFS_ITOV(src_dp);
 	xfs_trans_t	*tp;
-	xfs_inode_t	*target_dp, *src_ip, *target_ip;
+	xfs_inode_t	*src_ip, *target_ip;
 	xfs_mount_t	*mp = src_dp->i_mount;
 	int		new_parent;		/* moving to a new dir */
 	int		src_is_directory;	/* src_name is a directory */
@@ -249,22 +243,13 @@ xfs_rename(
 	int		target_namelen = VNAMELEN(target_vname);
 
 	xfs_itrace_entry(src_dp);
-	xfs_itrace_entry(xfs_vtoi(target_dir_vp));
-
-	/*
-	 * Find the XFS behavior descriptor for the target directory
-	 * vnode since it was not handed to us.
-	 */
-	target_dp = xfs_vtoi(target_dir_vp);
-	if (target_dp == NULL) {
-		return XFS_ERROR(EXDEV);
-	}
+	xfs_itrace_entry(target_dp);
 
 	if (DM_EVENT_ENABLED(src_dp, DM_EVENT_RENAME) ||
 	    DM_EVENT_ENABLED(target_dp, DM_EVENT_RENAME)) {
 		error = XFS_SEND_NAMESP(mp, DM_EVENT_RENAME,
-					src_dir_vp, DM_RIGHT_NULL,
-					target_dir_vp, DM_RIGHT_NULL,
+					src_dp, DM_RIGHT_NULL,
+					target_dp, DM_RIGHT_NULL,
 					src_name, target_name,
 					0, 0, 0);
 		if (error) {
@@ -365,10 +350,10 @@ xfs_rename(
 	 * them when they unlock the inodes.  Also, we need to be careful
 	 * not to add an inode to the transaction more than once.
 	 */
-	VN_HOLD(src_dir_vp);
+	IHOLD(src_dp);
 	xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL);
 	if (new_parent) {
-		VN_HOLD(target_dir_vp);
+		IHOLD(target_dp);
 		xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL);
 	}
 	if ((src_ip != src_dp) && (src_ip != target_dp)) {
@@ -580,10 +565,8 @@ xfs_rename(
 	 * the vnode references.
 	 */
 	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-	if (target_ip != NULL) {
-		xfs_refcache_purge_ip(target_ip);
+	if (target_ip != NULL)
 		IRELE(target_ip);
-	}
 	/*
 	 * Let interposed file systems know about removed links.
 	 */
@@ -598,8 +581,8 @@ std_return:
 	if (DM_EVENT_ENABLED(src_dp, DM_EVENT_POSTRENAME) ||
 	    DM_EVENT_ENABLED(target_dp, DM_EVENT_POSTRENAME)) {
 		(void) XFS_SEND_NAMESP (mp, DM_EVENT_POSTRENAME,
-					src_dir_vp, DM_RIGHT_NULL,
-					target_dir_vp, DM_RIGHT_NULL,
+					src_dp, DM_RIGHT_NULL,
+					target_dp, DM_RIGHT_NULL,
 					src_name, target_name,
 					0, error, 0);
 	}
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index ca83ddf72af4..47082c01872d 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -73,6 +73,18 @@ STATIC int xfs_rtmodify_summary(xfs_mount_t *, xfs_trans_t *, int,
  */
 
 /*
+ * xfs_lowbit32: get low bit set out of 32-bit argument, -1 if none set.
+ */
+STATIC int
+xfs_lowbit32(
+	__uint32_t	v)
+{
+	if (v)
+		return ffs(v) - 1;
+	return -1;
+}
+
+/*
  * Allocate space to the bitmap or summary file, and zero it, for growfs.
  */
 STATIC int				/* error */
@@ -432,7 +444,6 @@ xfs_rtallocate_extent_near(
 	}
 	bbno = XFS_BITTOBLOCK(mp, bno);
 	i = 0;
-	ASSERT(minlen != 0);
 	log2len = xfs_highbit32(minlen);
 	/*
 	 * Loop over all bitmap blocks (bbno + i is current block).
@@ -601,8 +612,6 @@ xfs_rtallocate_extent_size(
 	xfs_suminfo_t	sum;		/* summary information for extents */
 
 	ASSERT(minlen % prod == 0 && maxlen % prod == 0);
-	ASSERT(maxlen != 0);
-
 	/*
 	 * Loop over all the levels starting with maxlen.
 	 * At each level, look at all the bitmap blocks, to see if there
@@ -660,9 +669,6 @@ xfs_rtallocate_extent_size(
 		*rtblock = NULLRTBLOCK;
 		return 0;
 	}
-	ASSERT(minlen != 0);
-	ASSERT(maxlen != 0);
-
 	/*
 	 * Loop over sizes, from maxlen down to minlen.
 	 * This time, when we do the allocations, allow smaller ones
@@ -1948,7 +1954,6 @@ xfs_growfs_rt(
 				  nsbp->sb_blocksize * nsbp->sb_rextsize);
 		nsbp->sb_rextents = nsbp->sb_rblocks;
 		do_div(nsbp->sb_rextents, nsbp->sb_rextsize);
-		ASSERT(nsbp->sb_rextents != 0);
 		nsbp->sb_rextslog = xfs_highbit32(nsbp->sb_rextents);
 		nrsumlevels = nmp->m_rsumlevels = nsbp->sb_rextslog + 1;
 		nrsumsize =
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index 94660b1a6ccc..b1a83f8ec044 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -89,6 +89,7 @@ struct xfs_mount;
 
 /*
  * Superblock - in core version.  Must match the ondisk version below.
+ * Must be padded to 64 bit alignment.
  */
 typedef struct xfs_sb {
 	__uint32_t	sb_magicnum;	/* magic number == XFS_SB_MAGIC */
@@ -145,10 +146,21 @@ typedef struct xfs_sb {
 	__uint16_t	sb_logsectsize;	/* sector size for the log, bytes */
 	__uint32_t	sb_logsunit;	/* stripe unit size for the log */
 	__uint32_t	sb_features2;	/* additional feature bits */
+
+	/*
+	 * bad features2 field as a result of failing to pad the sb
+	 * structure to 64 bits. Some machines will be using this field
+	 * for features2 bits. Easiest just to mark it bad and not use
+	 * it for anything else.
+	 */
+	__uint32_t	sb_bad_features2;
+
+	/* must be padded to 64 bit alignment */
 } xfs_sb_t;
 
 /*
- * Superblock - on disk version.  Must match the in core version below.
+ * Superblock - on disk version.  Must match the in core version above.
+ * Must be padded to 64 bit alignment.
  */
 typedef struct xfs_dsb {
 	__be32		sb_magicnum;	/* magic number == XFS_SB_MAGIC */
@@ -205,6 +217,15 @@ typedef struct xfs_dsb {
 	__be16		sb_logsectsize;	/* sector size for the log, bytes */
 	__be32		sb_logsunit;	/* stripe unit size for the log */
 	__be32		sb_features2;	/* additional feature bits */
+	/*
+	 * bad features2 field as a result of failing to pad the sb
+	 * structure to 64 bits. Some machines will be using this field
+	 * for features2 bits. Easiest just to mark it bad and not use
+	 * it for anything else.
+	 */
+	__be32	sb_bad_features2;
+
+	/* must be padded to 64 bit alignment */
 } xfs_dsb_t;
 
 /*
@@ -223,7 +244,7 @@ typedef enum {
 	XFS_SBS_GQUOTINO, XFS_SBS_QFLAGS, XFS_SBS_FLAGS, XFS_SBS_SHARED_VN,
 	XFS_SBS_INOALIGNMT, XFS_SBS_UNIT, XFS_SBS_WIDTH, XFS_SBS_DIRBLKLOG,
 	XFS_SBS_LOGSECTLOG, XFS_SBS_LOGSECTSIZE, XFS_SBS_LOGSUNIT,
-	XFS_SBS_FEATURES2,
+	XFS_SBS_FEATURES2, XFS_SBS_BAD_FEATURES2,
 	XFS_SBS_FIELDCOUNT
 } xfs_sb_field_t;
 
@@ -248,13 +269,15 @@ typedef enum {
 #define XFS_SB_IFREE		XFS_SB_MVAL(IFREE)
 #define XFS_SB_FDBLOCKS		XFS_SB_MVAL(FDBLOCKS)
 #define XFS_SB_FEATURES2	XFS_SB_MVAL(FEATURES2)
+#define XFS_SB_BAD_FEATURES2	XFS_SB_MVAL(BAD_FEATURES2)
 #define	XFS_SB_NUM_BITS		((int)XFS_SBS_FIELDCOUNT)
 #define	XFS_SB_ALL_BITS		((1LL << XFS_SB_NUM_BITS) - 1)
 #define	XFS_SB_MOD_BITS		\
 	(XFS_SB_UUID | XFS_SB_ROOTINO | XFS_SB_RBMINO | XFS_SB_RSUMINO | \
 	 XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \
 	 XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \
-	 XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2)
+	 XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2 | \
+	 XFS_SB_BAD_FEATURES2)
 
 
 /*
@@ -271,7 +294,6 @@ typedef enum {
 
 #define	XFS_SB_VERSION_NUM(sbp)	((sbp)->sb_versionnum & XFS_SB_VERSION_NUMBITS)
 
-#define	XFS_SB_GOOD_VERSION(sbp)	xfs_sb_good_version(sbp)
 #ifdef __KERNEL__
 static inline int xfs_sb_good_version(xfs_sb_t *sbp)
 {
@@ -297,7 +319,14 @@ static inline int xfs_sb_good_version(xfs_sb_t *sbp)
 }
 #endif /* __KERNEL__ */
 
-#define	XFS_SB_VERSION_TONEW(v)	xfs_sb_version_tonew(v)
+/*
+ * Detect a bad features2 field
+ */
+static inline int xfs_sb_has_bad_features2(xfs_sb_t *sbp)
+{
+	return (sbp->sb_bad_features2 != 0);
+}
+
 static inline unsigned xfs_sb_version_tonew(unsigned v)
 {
 	return ((((v) == XFS_SB_VERSION_1) ? \
@@ -308,7 +337,6 @@ static inline unsigned xfs_sb_version_tonew(unsigned v)
 		XFS_SB_VERSION_4);
 }
 
-#define	XFS_SB_VERSION_TOOLD(v)	xfs_sb_version_toold(v)
 static inline unsigned xfs_sb_version_toold(unsigned v)
 {
 	return (((v) & (XFS_SB_VERSION_QUOTABIT | XFS_SB_VERSION_ALIGNBIT)) ? \
@@ -320,7 +348,6 @@ static inline unsigned xfs_sb_version_toold(unsigned v)
 				XFS_SB_VERSION_1)));
 }
 
-#define	XFS_SB_VERSION_HASATTR(sbp)	xfs_sb_version_hasattr(sbp)
 static inline int xfs_sb_version_hasattr(xfs_sb_t *sbp)
 {
 	return ((sbp)->sb_versionnum == XFS_SB_VERSION_2) || \
@@ -329,7 +356,6 @@ static inline int xfs_sb_version_hasattr(xfs_sb_t *sbp)
 		  ((sbp)->sb_versionnum & XFS_SB_VERSION_ATTRBIT));
 }
 
-#define	XFS_SB_VERSION_ADDATTR(sbp)	xfs_sb_version_addattr(sbp)
 static inline void xfs_sb_version_addattr(xfs_sb_t *sbp)
 {
 	(sbp)->sb_versionnum = (((sbp)->sb_versionnum == XFS_SB_VERSION_1) ? \
@@ -339,7 +365,6 @@ static inline void xfs_sb_version_addattr(xfs_sb_t *sbp)
 			(XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT)));
 }
 
-#define	XFS_SB_VERSION_HASNLINK(sbp)	xfs_sb_version_hasnlink(sbp)
 static inline int xfs_sb_version_hasnlink(xfs_sb_t *sbp)
 {
 	return ((sbp)->sb_versionnum == XFS_SB_VERSION_3) || \
@@ -347,7 +372,6 @@ static inline int xfs_sb_version_hasnlink(xfs_sb_t *sbp)
 		  ((sbp)->sb_versionnum & XFS_SB_VERSION_NLINKBIT));
 }
 
-#define	XFS_SB_VERSION_ADDNLINK(sbp)	xfs_sb_version_addnlink(sbp)
 static inline void xfs_sb_version_addnlink(xfs_sb_t *sbp)
 {
 	(sbp)->sb_versionnum = ((sbp)->sb_versionnum <= XFS_SB_VERSION_2 ? \
@@ -355,115 +379,63 @@ static inline void xfs_sb_version_addnlink(xfs_sb_t *sbp)
 		((sbp)->sb_versionnum | XFS_SB_VERSION_NLINKBIT));
 }
 
-#define	XFS_SB_VERSION_HASQUOTA(sbp)	xfs_sb_version_hasquota(sbp)
 static inline int xfs_sb_version_hasquota(xfs_sb_t *sbp)
 {
 	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
 		((sbp)->sb_versionnum & XFS_SB_VERSION_QUOTABIT);
 }
 
-#define	XFS_SB_VERSION_ADDQUOTA(sbp)	xfs_sb_version_addquota(sbp)
 static inline void xfs_sb_version_addquota(xfs_sb_t *sbp)
 {
 	(sbp)->sb_versionnum = \
 		 (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 ? \
 			((sbp)->sb_versionnum | XFS_SB_VERSION_QUOTABIT) : \
-			(XFS_SB_VERSION_TONEW((sbp)->sb_versionnum) | \
+			(xfs_sb_version_tonew((sbp)->sb_versionnum) | \
 			 XFS_SB_VERSION_QUOTABIT));
 }
 
-#define	XFS_SB_VERSION_HASALIGN(sbp)	xfs_sb_version_hasalign(sbp)
 static inline int xfs_sb_version_hasalign(xfs_sb_t *sbp)
 {
 	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
 		((sbp)->sb_versionnum & XFS_SB_VERSION_ALIGNBIT);
 }
 
-#define	XFS_SB_VERSION_SUBALIGN(sbp)	xfs_sb_version_subalign(sbp)
-static inline void xfs_sb_version_subalign(xfs_sb_t *sbp)
-{
-	(sbp)->sb_versionnum = \
-	 XFS_SB_VERSION_TOOLD((sbp)->sb_versionnum & ~XFS_SB_VERSION_ALIGNBIT);
-}
-
-#define XFS_SB_VERSION_HASDALIGN(sbp)	xfs_sb_version_hasdalign(sbp)
 static inline int xfs_sb_version_hasdalign(xfs_sb_t *sbp)
 {
 	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
 		((sbp)->sb_versionnum & XFS_SB_VERSION_DALIGNBIT);
 }
 
-#define XFS_SB_VERSION_ADDDALIGN(sbp)	xfs_sb_version_adddalign(sbp)
-static inline int xfs_sb_version_adddalign(xfs_sb_t *sbp)
-{
-	return (sbp)->sb_versionnum = \
-		((sbp)->sb_versionnum | XFS_SB_VERSION_DALIGNBIT);
-}
-
-#define XFS_SB_VERSION_HASSHARED(sbp)	xfs_sb_version_hasshared(sbp)
 static inline int xfs_sb_version_hasshared(xfs_sb_t *sbp)
 {
 	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
 		((sbp)->sb_versionnum & XFS_SB_VERSION_SHAREDBIT);
 }
 
-#define XFS_SB_VERSION_ADDSHARED(sbp)	xfs_sb_version_addshared(sbp)
-static inline int xfs_sb_version_addshared(xfs_sb_t *sbp)
-{
-	return (sbp)->sb_versionnum = \
-		((sbp)->sb_versionnum | XFS_SB_VERSION_SHAREDBIT);
-}
-
-#define XFS_SB_VERSION_SUBSHARED(sbp)	xfs_sb_version_subshared(sbp)
-static inline int xfs_sb_version_subshared(xfs_sb_t *sbp)
-{
-	return (sbp)->sb_versionnum = \
-		((sbp)->sb_versionnum & ~XFS_SB_VERSION_SHAREDBIT);
-}
-
-#define XFS_SB_VERSION_HASDIRV2(sbp)	xfs_sb_version_hasdirv2(sbp)
 static inline int xfs_sb_version_hasdirv2(xfs_sb_t *sbp)
 {
 	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
 		((sbp)->sb_versionnum & XFS_SB_VERSION_DIRV2BIT);
 }
 
-#define XFS_SB_VERSION_HASLOGV2(sbp)   xfs_sb_version_haslogv2(sbp)
 static inline int xfs_sb_version_haslogv2(xfs_sb_t *sbp)
 {
 	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
 		((sbp)->sb_versionnum & XFS_SB_VERSION_LOGV2BIT);
 }
 
-#define XFS_SB_VERSION_HASEXTFLGBIT(sbp)	xfs_sb_version_hasextflgbit(sbp)
 static inline int xfs_sb_version_hasextflgbit(xfs_sb_t *sbp)
 {
 	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
 		((sbp)->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT);
 }
 
-#define XFS_SB_VERSION_ADDEXTFLGBIT(sbp)	xfs_sb_version_addextflgbit(sbp)
-static inline int xfs_sb_version_addextflgbit(xfs_sb_t *sbp)
-{
-	return (sbp)->sb_versionnum = \
-		((sbp)->sb_versionnum | XFS_SB_VERSION_EXTFLGBIT);
-}
-
-#define XFS_SB_VERSION_SUBEXTFLGBIT(sbp)	xfs_sb_version_subextflgbit(sbp)
-static inline int xfs_sb_version_subextflgbit(xfs_sb_t *sbp)
-{
-	return (sbp)->sb_versionnum = \
-		((sbp)->sb_versionnum & ~XFS_SB_VERSION_EXTFLGBIT);
-}
-
-#define XFS_SB_VERSION_HASSECTOR(sbp)   xfs_sb_version_hassector(sbp)
 static inline int xfs_sb_version_hassector(xfs_sb_t *sbp)
 {
 	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
 		((sbp)->sb_versionnum & XFS_SB_VERSION_SECTORBIT);
 }
 
-#define XFS_SB_VERSION_HASMOREBITS(sbp)	xfs_sb_version_hasmorebits(sbp)
 static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp)
 {
 	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
@@ -476,24 +448,22 @@ static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp)
  * For example, for a bit defined as XFS_SB_VERSION2_FUNBIT, has a macro:
  *
  * SB_VERSION_HASFUNBIT(xfs_sb_t *sbp)
- *	((XFS_SB_VERSION_HASMOREBITS(sbp) &&
+ *	((xfs_sb_version_hasmorebits(sbp) &&
  *	 ((sbp)->sb_features2 & XFS_SB_VERSION2_FUNBIT)
  */
 
 static inline int xfs_sb_version_haslazysbcount(xfs_sb_t *sbp)
 {
-	return (XFS_SB_VERSION_HASMOREBITS(sbp) &&	\
+	return (xfs_sb_version_hasmorebits(sbp) &&	\
 		((sbp)->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT));
 }
 
-#define XFS_SB_VERSION_HASATTR2(sbp)	xfs_sb_version_hasattr2(sbp)
 static inline int xfs_sb_version_hasattr2(xfs_sb_t *sbp)
 {
-	return (XFS_SB_VERSION_HASMOREBITS(sbp)) &&	\
+	return (xfs_sb_version_hasmorebits(sbp)) &&	\
 		((sbp)->sb_features2 & XFS_SB_VERSION2_ATTR2BIT);
 }
 
-#define XFS_SB_VERSION_ADDATTR2(sbp)	xfs_sb_version_addattr2(sbp)
 static inline void xfs_sb_version_addattr2(xfs_sb_t *sbp)
 {
 	((sbp)->sb_versionnum =	\
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 7f40628d85c7..b5effce00089 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -341,7 +341,6 @@ typedef struct xfs_trans {
 	unsigned int		t_rtx_res;	/* # of rt extents resvd */
 	unsigned int		t_rtx_res_used;	/* # of resvd rt extents used */
 	xfs_log_ticket_t	t_ticket;	/* log mgr ticket */
-	sema_t			t_sema;		/* sema for commit completion */
 	xfs_lsn_t		t_lsn;		/* log seq num of start of
 						 * transaction. */
 	xfs_lsn_t		t_commit_lsn;	/* log seq num of end of
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 4d6330eddc8d..76d470d8a1e6 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -261,16 +261,19 @@ xfsaild_push(
 		xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
 	}
 
-	/*
-	 * We reached the target so wait a bit longer for I/O to complete and
-	 * remove pushed items from the AIL before we start the next scan from
-	 * the start of the AIL.
-	 */
-	if ((XFS_LSN_CMP(lsn, target) >= 0)) {
+	if (!count) {
+		/* We're past our target or empty, so idle */
+		tout = 1000;
+	} else if (XFS_LSN_CMP(lsn, target) >= 0) {
+		/*
+		 * We reached the target so wait a bit longer for I/O to
+		 * complete and remove pushed items from the AIL before we
+		 * start the next scan from the start of the AIL.
+		 */
 		tout += 20;
 		last_pushed_lsn = 0;
 	} else if ((restarts > XFS_TRANS_PUSH_AIL_RESTARTS) ||
-		   (count && ((stuck * 100) / count > 90))) {
+		   ((stuck * 100) / count > 90)) {
 		/*
 		 * Either there is a lot of contention on the AIL or we
 		 * are stuck due to operations in progress. "Stuck" in this
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 60b6b898022b..4e5c010f5040 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -304,7 +304,8 @@ xfs_trans_read_buf(
 	if (tp == NULL) {
 		bp = xfs_buf_read_flags(target, blkno, len, flags | BUF_BUSY);
 		if (!bp)
-			return XFS_ERROR(ENOMEM);
+			return (flags & XFS_BUF_TRYLOCK) ?
+					EAGAIN : XFS_ERROR(ENOMEM);
 
 		if ((bp != NULL) && (XFS_BUF_GETERROR(bp) != 0)) {
 			xfs_ioerror_alert("xfs_trans_read_buf", mp,
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 45d740df53b7..47c45ff4a067 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -40,28 +40,6 @@
 #include "xfs_itable.h"
 #include "xfs_utils.h"
 
-/*
- * xfs_get_dir_entry is used to get a reference to an inode given
- * its parent directory inode and the name of the file.	 It does
- * not lock the child inode, and it unlocks the directory before
- * returning.  The directory's generation number is returned for
- * use by a later call to xfs_lock_dir_and_entry.
- */
-int
-xfs_get_dir_entry(
-	bhv_vname_t	*dentry,
-	xfs_inode_t	**ipp)
-{
-	bhv_vnode_t	*vp;
-
-	vp = VNAME_TO_VNODE(dentry);
-
-	*ipp = xfs_vtoi(vp);
-	if (!*ipp)
-		return XFS_ERROR(ENOENT);
-	VN_HOLD(vp);
-	return 0;
-}
 
 int
 xfs_dir_lookup_int(
@@ -339,10 +317,10 @@ xfs_bump_ino_vers2(
 	ip->i_d.di_onlink = 0;
 	memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
 	mp = tp->t_mountp;
-	if (!XFS_SB_VERSION_HASNLINK(&mp->m_sb)) {
+	if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
 		spin_lock(&mp->m_sb_lock);
-		if (!XFS_SB_VERSION_HASNLINK(&mp->m_sb)) {
-			XFS_SB_VERSION_ADDNLINK(&mp->m_sb);
+		if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
+			xfs_sb_version_addnlink(&mp->m_sb);
 			spin_unlock(&mp->m_sb_lock);
 			xfs_mod_sb(tp, XFS_SB_VERSIONNUM);
 		} else {
diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h
index f857fcccb723..c4c4a6aa6549 100644
--- a/fs/xfs/xfs_utils.h
+++ b/fs/xfs/xfs_utils.h
@@ -21,7 +21,6 @@
 #define IRELE(ip)	VN_RELE(XFS_ITOV(ip))
 #define IHOLD(ip)	VN_HOLD(XFS_ITOV(ip))
 
-extern int xfs_get_dir_entry (bhv_vname_t *, xfs_inode_t **);
 extern int xfs_dir_lookup_int (xfs_inode_t *, uint, bhv_vname_t *, xfs_ino_t *,
 				xfs_inode_t **);
 extern int xfs_truncate_file (xfs_mount_t *, xfs_inode_t *);
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 413587f02155..4c132a87d437 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -43,7 +43,6 @@
 #include "xfs_error.h"
 #include "xfs_bmap.h"
 #include "xfs_rw.h"
-#include "xfs_refcache.h"
 #include "xfs_buf_item.h"
 #include "xfs_log_priv.h"
 #include "xfs_dir2_trace.h"
@@ -113,9 +112,6 @@ xfs_init(void)
 	xfs_ili_zone =
 		kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
 					KM_ZONE_SPREAD, NULL);
-	xfs_icluster_zone =
-		kmem_zone_init_flags(sizeof(xfs_icluster_t), "xfs_icluster",
-					KM_ZONE_SPREAD, NULL);
 
 	/*
 	 * Allocate global trace buffers.
@@ -153,11 +149,9 @@ xfs_cleanup(void)
 	extern kmem_zone_t	*xfs_inode_zone;
 	extern kmem_zone_t	*xfs_efd_zone;
 	extern kmem_zone_t	*xfs_efi_zone;
-	extern kmem_zone_t	*xfs_icluster_zone;
 
 	xfs_cleanup_procfs();
 	xfs_sysctl_unregister();
-	xfs_refcache_destroy();
 	xfs_filestream_uninit();
 	xfs_mru_cache_uninit();
 	xfs_acl_zone_destroy(xfs_acl_zone);
@@ -189,7 +183,6 @@ xfs_cleanup(void)
 	kmem_zone_destroy(xfs_efi_zone);
 	kmem_zone_destroy(xfs_ifork_zone);
 	kmem_zone_destroy(xfs_ili_zone);
-	kmem_zone_destroy(xfs_icluster_zone);
 }
 
 /*
@@ -281,8 +274,8 @@ xfs_start_flags(
 		mp->m_readio_log = mp->m_writeio_log = ap->iosizelog;
 	}
 
-	if (ap->flags & XFSMNT_IDELETE)
-		mp->m_flags |= XFS_MOUNT_IDELETE;
+	if (ap->flags & XFSMNT_IKEEP)
+		mp->m_flags |= XFS_MOUNT_IKEEP;
 	if (ap->flags & XFSMNT_DIRSYNC)
 		mp->m_flags |= XFS_MOUNT_DIRSYNC;
 	if (ap->flags & XFSMNT_ATTR2)
@@ -330,7 +323,7 @@ xfs_finish_flags(
 	int			ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
 
 	/* Fail a mount where the logbuf is smaller then the log stripe */
-	if (XFS_SB_VERSION_HASLOGV2(&mp->m_sb)) {
+	if (xfs_sb_version_haslogv2(&mp->m_sb)) {
 		if ((ap->logbufsize <= 0) &&
 		    (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) {
 			mp->m_logbsize = mp->m_sb.sb_logsunit;
@@ -349,9 +342,8 @@ xfs_finish_flags(
 		}
 	}
 
-	if (XFS_SB_VERSION_HASATTR2(&mp->m_sb)) {
+	if (xfs_sb_version_hasattr2(&mp->m_sb))
 		mp->m_flags |= XFS_MOUNT_ATTR2;
-	}
 
 	/*
 	 * prohibit r/w mounts of read-only filesystems
@@ -366,7 +358,7 @@ xfs_finish_flags(
 	 * check for shared mount.
 	 */
 	if (ap->flags & XFSMNT_SHARED) {
-		if (!XFS_SB_VERSION_HASSHARED(&mp->m_sb))
+		if (!xfs_sb_version_hasshared(&mp->m_sb))
 			return XFS_ERROR(EINVAL);
 
 		/*
@@ -512,7 +504,7 @@ xfs_mount(
 	if (!error && logdev && logdev != ddev) {
 		unsigned int	log_sector_size = BBSIZE;
 
-		if (XFS_SB_VERSION_HASSECTOR(&mp->m_sb))
+		if (xfs_sb_version_hassector(&mp->m_sb))
 			log_sector_size = mp->m_sb.sb_logsectsize;
 		error = xfs_setsize_buftarg(mp->m_logdev_targp,
 					    mp->m_sb.sb_blocksize,
@@ -574,7 +566,7 @@ xfs_unmount(
 #ifdef HAVE_DMAPI
 	if (mp->m_flags & XFS_MOUNT_DMAPI) {
 		error = XFS_SEND_PREUNMOUNT(mp,
-				rvp, DM_RIGHT_NULL, rvp, DM_RIGHT_NULL,
+				rip, DM_RIGHT_NULL, rip, DM_RIGHT_NULL,
 				NULL, NULL, 0, 0,
 				(mp->m_dmevmask & (1<<DM_EVENT_PREUNMOUNT))?
 					0:DM_FLAGS_UNWANTED);
@@ -585,11 +577,6 @@ xfs_unmount(
 					0 : DM_FLAGS_UNWANTED;
 	}
 #endif
-	/*
-	 * First blow any referenced inode from this file system
-	 * out of the reference cache, and delete the timer.
-	 */
-	xfs_refcache_purge_mp(mp);
 
 	/*
 	 * Blow away any referenced inode in the filestreams cache.
@@ -630,7 +617,7 @@ out:
 		/* Note: mp structure must still exist for
 		 * XFS_SEND_UNMOUNT() call.
 		 */
-		XFS_SEND_UNMOUNT(mp, error == 0 ? rvp : NULL,
+		XFS_SEND_UNMOUNT(mp, error == 0 ? rip : NULL,
 			DM_RIGHT_NULL, 0, error, unmount_event_flags);
 	}
 	if (xfs_unmountfs_needed) {
@@ -653,7 +640,6 @@ xfs_quiesce_fs(
 {
 	int			count = 0, pincount;
 
-	xfs_refcache_purge_mp(mp);
 	xfs_flush_buftarg(mp->m_ddev_targp, 0);
 	xfs_finish_reclaim_all(mp, 0);
 
@@ -1324,18 +1310,6 @@ xfs_syncsub(
 	}
 
 	/*
-	 * If this is the periodic sync, then kick some entries out of
-	 * the reference cache.  This ensures that idle entries are
-	 * eventually kicked out of the cache.
-	 */
-	if (flags & SYNC_REFCACHE) {
-		if (flags & SYNC_WAIT)
-			xfs_refcache_purge_mp(mp);
-		else
-			xfs_refcache_purge_some(mp);
-	}
-
-	/*
 	 * If asked, update the disk superblock with incore counter values if we
 	 * are using non-persistent counters so that they don't get too far out
 	 * of sync if we crash or get a forced shutdown. We don't want to force
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 51305242ff8c..3418c94bcf17 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -48,7 +48,6 @@
 #include "xfs_quota.h"
 #include "xfs_utils.h"
 #include "xfs_rtalloc.h"
-#include "xfs_refcache.h"
 #include "xfs_trans_space.h"
 #include "xfs_log_priv.h"
 #include "xfs_filestream.h"
@@ -327,7 +326,7 @@ xfs_setattr(
 		if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) &&
 		    !(flags & ATTR_DMI)) {
 			int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR;
-			code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, vp,
+			code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, ip,
 				vap->va_size, 0, dmflags, NULL);
 			if (code) {
 				lock_flags = 0;
@@ -634,6 +633,15 @@ xfs_setattr(
 	 * Truncate file.  Must have write permission and not be a directory.
 	 */
 	if (mask & XFS_AT_SIZE) {
+		/*
+		 * Only change the c/mtime if we are changing the size
+		 * or we are explicitly asked to change it. This handles
+		 * the semantic difference between truncate() and ftruncate()
+		 * as implemented in the VFS.
+		 */
+		if (vap->va_size != ip->i_size || (mask & XFS_AT_CTIME))
+			timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
+
 		if (vap->va_size > ip->i_size) {
 			xfs_igrow_finish(tp, ip, vap->va_size,
 			    !(flags & ATTR_DMI));
@@ -662,10 +670,6 @@ xfs_setattr(
 			 */
 			xfs_iflags_set(ip, XFS_ITRUNCATED);
 		}
-		/*
-		 * Have to do this even if the file's size doesn't change.
-		 */
-		timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
 	}
 
 	/*
@@ -877,7 +881,7 @@ xfs_setattr(
 
 	if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) &&
 	    !(flags & ATTR_DMI)) {
-		(void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, vp, DM_RIGHT_NULL,
+		(void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL,
 					NULL, DM_RIGHT_NULL, NULL, NULL,
 					0, 0, AT_DELAY_FLAG(flags));
 	}
@@ -1520,12 +1524,6 @@ xfs_release(
 			xfs_flush_pages(ip, 0, -1, XFS_B_ASYNC, FI_NONE);
 	}
 
-#ifdef HAVE_REFCACHE
-	/* If we are in the NFS reference cache then don't do this now */
-	if (ip->i_refcache)
-		return 0;
-#endif
-
 	if (ip->i_d.di_nlink != 0) {
 		if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
 		     ((ip->i_size > 0) || (VN_CACHED(vp) > 0 ||
@@ -1588,9 +1586,8 @@ xfs_inactive(
 
 	mp = ip->i_mount;
 
-	if (ip->i_d.di_nlink == 0 && DM_EVENT_ENABLED(ip, DM_EVENT_DESTROY)) {
-		(void) XFS_SEND_DESTROY(mp, vp, DM_RIGHT_NULL);
-	}
+	if (ip->i_d.di_nlink == 0 && DM_EVENT_ENABLED(ip, DM_EVENT_DESTROY))
+		XFS_SEND_DESTROY(mp, ip, DM_RIGHT_NULL);
 
 	error = 0;
 
@@ -1766,7 +1763,7 @@ int
 xfs_lookup(
 	xfs_inode_t		*dp,
 	bhv_vname_t		*dentry,
-	bhv_vnode_t		**vpp)
+	xfs_inode_t		**ipp)
 {
 	xfs_inode_t		*ip;
 	xfs_ino_t		e_inum;
@@ -1781,7 +1778,7 @@ xfs_lookup(
 	lock_mode = xfs_ilock_map_shared(dp);
 	error = xfs_dir_lookup_int(dp, lock_mode, dentry, &e_inum, &ip);
 	if (!error) {
-		*vpp = XFS_ITOV(ip);
+		*ipp = ip;
 		xfs_itrace_ref(ip);
 	}
 	xfs_iunlock_map_shared(dp, lock_mode);
@@ -1794,14 +1791,12 @@ xfs_create(
 	bhv_vname_t		*dentry,
 	mode_t			mode,
 	xfs_dev_t		rdev,
-	bhv_vnode_t		**vpp,
+	xfs_inode_t		**ipp,
 	cred_t			*credp)
 {
 	char			*name = VNAME(dentry);
 	xfs_mount_t	        *mp = dp->i_mount;
-	bhv_vnode_t		*dir_vp = XFS_ITOV(dp);
 	xfs_inode_t		*ip;
-	bhv_vnode_t	        *vp = NULL;
 	xfs_trans_t		*tp;
 	int                     error;
 	xfs_bmap_free_t		free_list;
@@ -1815,14 +1810,14 @@ xfs_create(
 	uint			resblks;
 	int			namelen;
 
-	ASSERT(!*vpp);
+	ASSERT(!*ipp);
 	xfs_itrace_entry(dp);
 
 	namelen = VNAMELEN(dentry);
 
 	if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) {
 		error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE,
-				dir_vp, DM_RIGHT_NULL, NULL,
+				dp, DM_RIGHT_NULL, NULL,
 				DM_RIGHT_NULL, name, NULL,
 				mode, 0, 0);
 
@@ -1914,7 +1909,7 @@ xfs_create(
 	 * the transaction cancel unlocking dp so don't do it explicitly in the
 	 * error path.
 	 */
-	VN_HOLD(dir_vp);
+	IHOLD(dp);
 	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
 	unlock_dp_on_error = B_FALSE;
 
@@ -1952,7 +1947,6 @@ xfs_create(
 	 * vnode to the caller, we bump the vnode ref count now.
 	 */
 	IHOLD(ip);
-	vp = XFS_ITOV(ip);
 
 	error = xfs_bmap_finish(&tp, &free_list, &committed);
 	if (error) {
@@ -1970,16 +1964,16 @@ xfs_create(
 	XFS_QM_DQRELE(mp, udqp);
 	XFS_QM_DQRELE(mp, gdqp);
 
-	*vpp = vp;
+	*ipp = ip;
 
 	/* Fallthrough to std_return with error = 0  */
 
 std_return:
-	if ((*vpp || (error != 0 && dm_event_sent != 0)) &&
+	if ((*ipp || (error != 0 && dm_event_sent != 0)) &&
 	    DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) {
 		(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE,
-			dir_vp, DM_RIGHT_NULL,
-			*vpp ? vp:NULL,
+			dp, DM_RIGHT_NULL,
+			*ipp ? ip : NULL,
 			DM_RIGHT_NULL, name, NULL,
 			mode, error, 0);
 	}
@@ -2274,44 +2268,32 @@ xfs_remove(
 	xfs_inode_t             *dp,
 	bhv_vname_t		*dentry)
 {
-	bhv_vnode_t		*dir_vp = XFS_ITOV(dp);
 	char			*name = VNAME(dentry);
 	xfs_mount_t		*mp = dp->i_mount;
-	xfs_inode_t             *ip;
+	xfs_inode_t             *ip = VNAME_TO_INODE(dentry);
+	int			namelen = VNAMELEN(dentry);
 	xfs_trans_t             *tp = NULL;
 	int                     error = 0;
 	xfs_bmap_free_t         free_list;
 	xfs_fsblock_t           first_block;
 	int			cancel_flags;
 	int			committed;
-	int			dm_di_mode = 0;
 	int			link_zero;
 	uint			resblks;
-	int			namelen;
 
 	xfs_itrace_entry(dp);
 
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return XFS_ERROR(EIO);
 
-	namelen = VNAMELEN(dentry);
-
-	if (!xfs_get_dir_entry(dentry, &ip)) {
-	        dm_di_mode = ip->i_d.di_mode;
-		IRELE(ip);
-	}
-
 	if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) {
-		error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dir_vp,
+		error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dp,
 					DM_RIGHT_NULL, NULL, DM_RIGHT_NULL,
-					name, NULL, dm_di_mode, 0, 0);
+					name, NULL, ip->i_d.di_mode, 0, 0);
 		if (error)
 			return error;
 	}
 
-	/* From this point on, return through std_return */
-	ip = NULL;
-
 	/*
 	 * We need to get a reference to ip before we get our log
 	 * reservation. The reason for this is that we cannot call
@@ -2324,13 +2306,7 @@ xfs_remove(
 	 * when we call xfs_iget.  Instead we get an unlocked reference
 	 * to the inode before getting our log reservation.
 	 */
-	error = xfs_get_dir_entry(dentry, &ip);
-	if (error) {
-		REMOVE_DEBUG_TRACE(__LINE__);
-		goto std_return;
-	}
-
-	dm_di_mode = ip->i_d.di_mode;
+	IHOLD(ip);
 
 	xfs_itrace_entry(ip);
 	xfs_itrace_ref(ip);
@@ -2449,14 +2425,6 @@ xfs_remove(
 	}
 
 	/*
-	 * Before we drop our extra reference to the inode, purge it
-	 * from the refcache if it is there.  By waiting until afterwards
-	 * to do the IRELE, we ensure that we won't go inactive in the
-	 * xfs_refcache_purge_ip routine (although that would be OK).
-	 */
-	xfs_refcache_purge_ip(ip);
-
-	/*
 	 * If we are using filestreams, kill the stream association.
 	 * If the file is still open it may get a new one but that
 	 * will get killed on last close in xfs_close() so we don't
@@ -2472,9 +2440,9 @@ xfs_remove(
  std_return:
 	if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) {
 		(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE,
-				dir_vp, DM_RIGHT_NULL,
+				dp, DM_RIGHT_NULL,
 				NULL, DM_RIGHT_NULL,
-				name, NULL, dm_di_mode, error, 0);
+				name, NULL, ip->i_d.di_mode, error, 0);
 	}
 	return error;
 
@@ -2495,14 +2463,6 @@ xfs_remove(
 	cancel_flags |= XFS_TRANS_ABORT;
 	xfs_trans_cancel(tp, cancel_flags);
 
-	/*
-	 * Before we drop our extra reference to the inode, purge it
-	 * from the refcache if it is there.  By waiting until afterwards
-	 * to do the IRELE, we ensure that we won't go inactive in the
-	 * xfs_refcache_purge_ip routine (although that would be OK).
-	 */
-	xfs_refcache_purge_ip(ip);
-
 	IRELE(ip);
 
 	goto std_return;
@@ -2511,12 +2471,10 @@ xfs_remove(
 int
 xfs_link(
 	xfs_inode_t		*tdp,
-	bhv_vnode_t		*src_vp,
+	xfs_inode_t		*sip,
 	bhv_vname_t		*dentry)
 {
-	bhv_vnode_t		*target_dir_vp = XFS_ITOV(tdp);
 	xfs_mount_t		*mp = tdp->i_mount;
-	xfs_inode_t		*sip = xfs_vtoi(src_vp);
 	xfs_trans_t		*tp;
 	xfs_inode_t		*ips[2];
 	int			error;
@@ -2529,18 +2487,18 @@ xfs_link(
 	int			target_namelen;
 
 	xfs_itrace_entry(tdp);
-	xfs_itrace_entry(xfs_vtoi(src_vp));
+	xfs_itrace_entry(sip);
 
 	target_namelen = VNAMELEN(dentry);
-	ASSERT(!VN_ISDIR(src_vp));
+	ASSERT(!S_ISDIR(sip->i_d.di_mode));
 
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return XFS_ERROR(EIO);
 
 	if (DM_EVENT_ENABLED(tdp, DM_EVENT_LINK)) {
 		error = XFS_SEND_NAMESP(mp, DM_EVENT_LINK,
-					target_dir_vp, DM_RIGHT_NULL,
-					src_vp, DM_RIGHT_NULL,
+					tdp, DM_RIGHT_NULL,
+					sip, DM_RIGHT_NULL,
 					target_name, NULL, 0, 0, 0);
 		if (error)
 			return error;
@@ -2584,8 +2542,8 @@ xfs_link(
 	 * xfs_trans_cancel will both unlock the inodes and
 	 * decrement the associated ref counts.
 	 */
-	VN_HOLD(src_vp);
-	VN_HOLD(target_dir_vp);
+	IHOLD(sip);
+	IHOLD(tdp);
 	xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL);
 	xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
 
@@ -2650,8 +2608,8 @@ xfs_link(
 std_return:
 	if (DM_EVENT_ENABLED(sip, DM_EVENT_POSTLINK)) {
 		(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTLINK,
-				target_dir_vp, DM_RIGHT_NULL,
-				src_vp, DM_RIGHT_NULL,
+				tdp, DM_RIGHT_NULL,
+				sip, DM_RIGHT_NULL,
 				target_name, NULL, 0, error, 0);
 	}
 	return error;
@@ -2671,15 +2629,13 @@ xfs_mkdir(
 	xfs_inode_t             *dp,
 	bhv_vname_t		*dentry,
 	mode_t			mode,
-	bhv_vnode_t		**vpp,
+	xfs_inode_t		**ipp,
 	cred_t			*credp)
 {
-	bhv_vnode_t		*dir_vp = XFS_ITOV(dp);
 	char			*dir_name = VNAME(dentry);
 	int			dir_namelen = VNAMELEN(dentry);
 	xfs_mount_t		*mp = dp->i_mount;
 	xfs_inode_t		*cdp;	/* inode of created dir */
-	bhv_vnode_t		*cvp;	/* vnode of created dir */
 	xfs_trans_t		*tp;
 	int			cancel_flags;
 	int			error;
@@ -2700,7 +2656,7 @@ xfs_mkdir(
 
 	if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) {
 		error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE,
-					dir_vp, DM_RIGHT_NULL, NULL,
+					dp, DM_RIGHT_NULL, NULL,
 					DM_RIGHT_NULL, dir_name, NULL,
 					mode, 0, 0);
 		if (error)
@@ -2786,7 +2742,7 @@ xfs_mkdir(
 	 * from here on will result in the transaction cancel
 	 * unlocking dp so don't do it explicitly in the error path.
 	 */
-	VN_HOLD(dir_vp);
+	IHOLD(dp);
 	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
 	unlock_dp_on_error = B_FALSE;
 
@@ -2817,11 +2773,9 @@ xfs_mkdir(
 	if (error)
 		goto error2;
 
-	cvp = XFS_ITOV(cdp);
-
 	created = B_TRUE;
 
-	*vpp = cvp;
+	*ipp = cdp;
 	IHOLD(cdp);
 
 	/*
@@ -2858,8 +2812,8 @@ std_return:
 	if ((created || (error != 0 && dm_event_sent != 0)) &&
 	    DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) {
 		(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE,
-					dir_vp, DM_RIGHT_NULL,
-					created ? XFS_ITOV(cdp):NULL,
+					dp, DM_RIGHT_NULL,
+					created ? cdp : NULL,
 					DM_RIGHT_NULL,
 					dir_name, NULL,
 					mode, error, 0);
@@ -2891,14 +2845,13 @@ xfs_rmdir(
 	char			*name = VNAME(dentry);
 	int			namelen = VNAMELEN(dentry);
 	xfs_mount_t		*mp = dp->i_mount;
-  	xfs_inode_t             *cdp;   /* child directory */
+  	xfs_inode_t             *cdp = VNAME_TO_INODE(dentry);
 	xfs_trans_t             *tp;
 	int                     error;
 	xfs_bmap_free_t         free_list;
 	xfs_fsblock_t           first_block;
 	int			cancel_flags;
 	int			committed;
-	int			dm_di_mode = S_IFDIR;
 	int			last_cdp_link;
 	uint			resblks;
 
@@ -2907,24 +2860,15 @@ xfs_rmdir(
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return XFS_ERROR(EIO);
 
-	if (!xfs_get_dir_entry(dentry, &cdp)) {
-	        dm_di_mode = cdp->i_d.di_mode;
-		IRELE(cdp);
-	}
-
 	if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) {
 		error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE,
-					dir_vp, DM_RIGHT_NULL,
+					dp, DM_RIGHT_NULL,
 					NULL, DM_RIGHT_NULL,
-					name, NULL, dm_di_mode, 0, 0);
+					name, NULL, cdp->i_d.di_mode, 0, 0);
 		if (error)
 			return XFS_ERROR(error);
 	}
 
-	/* Return through std_return after this point. */
-
-	cdp = NULL;
-
 	/*
 	 * We need to get a reference to cdp before we get our log
 	 * reservation.  The reason for this is that we cannot call
@@ -2937,13 +2881,7 @@ xfs_rmdir(
 	 * when we call xfs_iget.  Instead we get an unlocked reference
 	 * to the inode before getting our log reservation.
 	 */
-	error = xfs_get_dir_entry(dentry, &cdp);
-	if (error) {
-		REMOVE_DEBUG_TRACE(__LINE__);
-		goto std_return;
-	}
-	mp = dp->i_mount;
-	dm_di_mode = cdp->i_d.di_mode;
+	IHOLD(cdp);
 
 	/*
 	 * Get the dquots for the inodes.
@@ -3098,9 +3036,9 @@ xfs_rmdir(
  std_return:
 	if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) {
 		(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE,
-					dir_vp, DM_RIGHT_NULL,
+					dp, DM_RIGHT_NULL,
 					NULL, DM_RIGHT_NULL,
-					name, NULL, dm_di_mode,
+					name, NULL, cdp->i_d.di_mode,
 					error, 0);
 	}
 	return error;
@@ -3121,10 +3059,9 @@ xfs_symlink(
 	bhv_vname_t		*dentry,
 	char			*target_path,
 	mode_t			mode,
-	bhv_vnode_t		**vpp,
+	xfs_inode_t		**ipp,
 	cred_t			*credp)
 {
-	bhv_vnode_t		*dir_vp = XFS_ITOV(dp);
 	xfs_mount_t		*mp = dp->i_mount;
 	xfs_trans_t		*tp;
 	xfs_inode_t		*ip;
@@ -3150,7 +3087,7 @@ xfs_symlink(
 	char			*link_name = VNAME(dentry);
 	int			link_namelen;
 
-	*vpp = NULL;
+	*ipp = NULL;
 	error = 0;
 	ip = NULL;
 	tp = NULL;
@@ -3195,7 +3132,7 @@ xfs_symlink(
 	}
 
 	if (DM_EVENT_ENABLED(dp, DM_EVENT_SYMLINK)) {
-		error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dir_vp,
+		error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dp,
 					DM_RIGHT_NULL, NULL, DM_RIGHT_NULL,
 					link_name, target_path, 0, 0, 0);
 		if (error)
@@ -3289,7 +3226,7 @@ xfs_symlink(
 	 * transaction cancel unlocking dp so don't do it explicitly in the
 	 * error path.
 	 */
-	VN_HOLD(dir_vp);
+	IHOLD(dp);
 	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
 	unlock_dp_on_error = B_FALSE;
 
@@ -3399,19 +3336,14 @@ xfs_symlink(
 std_return:
 	if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTSYMLINK)) {
 		(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTSYMLINK,
-					dir_vp, DM_RIGHT_NULL,
-					error ? NULL : XFS_ITOV(ip),
+					dp, DM_RIGHT_NULL,
+					error ? NULL : ip,
 					DM_RIGHT_NULL, link_name, target_path,
 					0, error, 0);
 	}
 
-	if (!error) {
-		bhv_vnode_t *vp;
-
-		ASSERT(ip);
-		vp = XFS_ITOV(ip);
-		*vpp = vp;
-	}
+	if (!error)
+		*ipp = ip;
 	return error;
 
  error2:
@@ -3431,60 +3363,11 @@ std_return:
 }
 
 int
-xfs_rwlock(
-	xfs_inode_t	*ip,
-	bhv_vrwlock_t	locktype)
-{
-	if (S_ISDIR(ip->i_d.di_mode))
-		return 1;
-	if (locktype == VRWLOCK_WRITE) {
-		xfs_ilock(ip, XFS_IOLOCK_EXCL);
-	} else if (locktype == VRWLOCK_TRY_READ) {
-		return xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED);
-	} else if (locktype == VRWLOCK_TRY_WRITE) {
-		return xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL);
-	} else {
-		ASSERT((locktype == VRWLOCK_READ) ||
-		       (locktype == VRWLOCK_WRITE_DIRECT));
-		xfs_ilock(ip, XFS_IOLOCK_SHARED);
-	}
-
-	return 1;
-}
-
-
-void
-xfs_rwunlock(
-	xfs_inode_t     *ip,
-	bhv_vrwlock_t	locktype)
-{
- 	if (S_ISDIR(ip->i_d.di_mode))
-  		return;
-	if (locktype == VRWLOCK_WRITE) {
-		/*
-		 * In the write case, we may have added a new entry to
-		 * the reference cache.  This might store a pointer to
-		 * an inode to be released in this inode.  If it is there,
-		 * clear the pointer and release the inode after unlocking
-		 * this one.
-		 */
-		xfs_refcache_iunlock(ip, XFS_IOLOCK_EXCL);
-	} else {
-		ASSERT((locktype == VRWLOCK_READ) ||
-		       (locktype == VRWLOCK_WRITE_DIRECT));
-		xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-	}
-	return;
-}
-
-
-int
 xfs_inode_flush(
 	xfs_inode_t	*ip,
 	int		flags)
 {
 	xfs_mount_t	*mp = ip->i_mount;
-	xfs_inode_log_item_t *iip = ip->i_itemp;
 	int		error = 0;
 
 	if (XFS_FORCED_SHUTDOWN(mp))
@@ -3494,33 +3377,9 @@ xfs_inode_flush(
 	 * Bypass inodes which have already been cleaned by
 	 * the inode flush clustering code inside xfs_iflush
 	 */
-	if ((ip->i_update_core == 0) &&
-	    ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL)))
+	if (xfs_inode_clean(ip))
 		return 0;
 
-	if (flags & FLUSH_LOG) {
-		if (iip && iip->ili_last_lsn) {
-			xlog_t		*log = mp->m_log;
-			xfs_lsn_t	sync_lsn;
-			int		log_flags = XFS_LOG_FORCE;
-
-			spin_lock(&log->l_grant_lock);
-			sync_lsn = log->l_last_sync_lsn;
-			spin_unlock(&log->l_grant_lock);
-
-			if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) > 0)) {
-				if (flags & FLUSH_SYNC)
-					log_flags |= XFS_LOG_SYNC;
-				error = xfs_log_force(mp, iip->ili_last_lsn, log_flags);
-				if (error)
-					return error;
-			}
-
-			if (ip->i_update_core == 0)
-				return 0;
-		}
-	}
-
 	/*
 	 * We make this non-blocking if the inode is contended,
 	 * return EAGAIN to indicate to the caller that they
@@ -3528,30 +3387,22 @@ xfs_inode_flush(
 	 * blocking on inodes inside another operation right
 	 * now, they get caught later by xfs_sync.
 	 */
-	if (flags & FLUSH_INODE) {
-		int	flush_flags;
-
-		if (flags & FLUSH_SYNC) {
-			xfs_ilock(ip, XFS_ILOCK_SHARED);
-			xfs_iflock(ip);
-		} else if (xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
-			if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) {
-				xfs_iunlock(ip, XFS_ILOCK_SHARED);
-				return EAGAIN;
-			}
-		} else {
+	if (flags & FLUSH_SYNC) {
+		xfs_ilock(ip, XFS_ILOCK_SHARED);
+		xfs_iflock(ip);
+	} else if (xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
+		if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) {
+			xfs_iunlock(ip, XFS_ILOCK_SHARED);
 			return EAGAIN;
 		}
-
-		if (flags & FLUSH_SYNC)
-			flush_flags = XFS_IFLUSH_SYNC;
-		else
-			flush_flags = XFS_IFLUSH_ASYNC;
-
-		error = xfs_iflush(ip, flush_flags);
-		xfs_iunlock(ip, XFS_ILOCK_SHARED);
+	} else {
+		return EAGAIN;
 	}
 
+	error = xfs_iflush(ip, (flags & FLUSH_SYNC) ? XFS_IFLUSH_SYNC
+						    : XFS_IFLUSH_ASYNC_NOBLOCK);
+	xfs_iunlock(ip, XFS_ILOCK_SHARED);
+
 	return error;
 }
 
@@ -3694,12 +3545,12 @@ xfs_finish_reclaim(
 	 * We get the flush lock regardless, though, just to make sure
 	 * we don't free it while it is being flushed.
 	 */
-	if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-		if (!locked) {
-			xfs_ilock(ip, XFS_ILOCK_EXCL);
-			xfs_iflock(ip);
-		}
+	if (!locked) {
+		xfs_ilock(ip, XFS_ILOCK_EXCL);
+		xfs_iflock(ip);
+	}
 
+	if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
 		if (ip->i_update_core ||
 		    ((ip->i_itemp != NULL) &&
 		     (ip->i_itemp->ili_format.ilf_fields != 0))) {
@@ -3719,17 +3570,11 @@ xfs_finish_reclaim(
 		ASSERT(ip->i_update_core == 0);
 		ASSERT(ip->i_itemp == NULL ||
 		       ip->i_itemp->ili_format.ilf_fields == 0);
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
-	} else if (locked) {
-		/*
-		 * We are not interested in doing an iflush if we're
-		 * in the process of shutting down the filesystem forcibly.
-		 * So, just reclaim the inode.
-		 */
-		xfs_ifunlock(ip);
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	}
 
+	xfs_ifunlock(ip);
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
  reclaim:
 	xfs_ireclaim(ip);
 	return 0;
@@ -3845,9 +3690,8 @@ xfs_alloc_file_space(
 		end_dmi_offset = offset+len;
 		if (end_dmi_offset > ip->i_size)
 			end_dmi_offset = ip->i_size;
-		error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, XFS_ITOV(ip),
-			offset, end_dmi_offset - offset,
-			0, NULL);
+		error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, offset,
+				      end_dmi_offset - offset, 0, NULL);
 		if (error)
 			return error;
 	}
@@ -3956,8 +3800,8 @@ dmapi_enospc_check:
 	if (error == ENOSPC && (attr_flags & ATTR_DMI) == 0 &&
 	    DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE)) {
 		error = XFS_SEND_NAMESP(mp, DM_EVENT_NOSPACE,
-				XFS_ITOV(ip), DM_RIGHT_NULL,
-				XFS_ITOV(ip), DM_RIGHT_NULL,
+				ip, DM_RIGHT_NULL,
+				ip, DM_RIGHT_NULL,
 				NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */
 		if (error == 0)
 			goto retry;	/* Maybe DMAPI app. has made space */
@@ -4102,7 +3946,7 @@ xfs_free_file_space(
 	    DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) {
 		if (end_dmi_offset > ip->i_size)
 			end_dmi_offset = ip->i_size;
-		error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, vp,
+		error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip,
 				offset, end_dmi_offset - offset,
 				AT_DELAY_FLAG(attr_flags), NULL);
 		if (error)
@@ -4132,7 +3976,7 @@ xfs_free_file_space(
 	 * actually need to zero the extent edges.  Otherwise xfs_bunmapi
 	 * will take care of it for us.
 	 */
-	if (rt && !XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) {
+	if (rt && !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
 		nimap = 1;
 		error = xfs_bmapi(NULL, ip, startoffset_fsb,
 			1, 0, NULL, 0, &imap, &nimap, NULL, NULL);
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 4e3970f0e5e3..12e581865bdf 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -24,22 +24,20 @@ int xfs_fsync(struct xfs_inode *ip, int flag, xfs_off_t start,
 int xfs_release(struct xfs_inode *ip);
 int xfs_inactive(struct xfs_inode *ip);
 int xfs_lookup(struct xfs_inode *dp, bhv_vname_t *dentry,
-		bhv_vnode_t **vpp);
+		struct xfs_inode **ipp);
 int xfs_create(struct xfs_inode *dp, bhv_vname_t *dentry, mode_t mode,
-		xfs_dev_t rdev, bhv_vnode_t **vpp, struct cred *credp);
+		xfs_dev_t rdev, struct xfs_inode **ipp, struct cred *credp);
 int xfs_remove(struct xfs_inode *dp, bhv_vname_t	*dentry);
-int xfs_link(struct xfs_inode *tdp, bhv_vnode_t *src_vp,
+int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
 		bhv_vname_t *dentry);
 int xfs_mkdir(struct xfs_inode *dp, bhv_vname_t *dentry,
-		mode_t mode, bhv_vnode_t **vpp, struct cred *credp);
+		mode_t mode, struct xfs_inode **ipp, struct cred *credp);
 int xfs_rmdir(struct xfs_inode *dp, bhv_vname_t *dentry);
 int xfs_readdir(struct xfs_inode	*dp, void *dirent, size_t bufsize,
 		       xfs_off_t *offset, filldir_t filldir);
 int xfs_symlink(struct xfs_inode *dp, bhv_vname_t *dentry,
-		char *target_path, mode_t mode, bhv_vnode_t **vpp,
+		char *target_path, mode_t mode, struct xfs_inode **ipp,
 		struct cred *credp);
-int xfs_rwlock(struct xfs_inode *ip, bhv_vrwlock_t locktype);
-void xfs_rwunlock(struct xfs_inode *ip, bhv_vrwlock_t locktype);
 int xfs_inode_flush(struct xfs_inode *ip, int flags);
 int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state);
 int xfs_reclaim(struct xfs_inode *ip);
@@ -47,7 +45,7 @@ int xfs_change_file_space(struct xfs_inode *ip, int cmd,
 		xfs_flock64_t *bf, xfs_off_t offset,
 		struct cred *credp, int	attr_flags);
 int xfs_rename(struct xfs_inode *src_dp, bhv_vname_t *src_vname,
-		bhv_vnode_t *target_dir_vp, bhv_vname_t *target_vname);
+		struct xfs_inode *target_dp, bhv_vname_t *target_vname);
 int xfs_attr_get(struct xfs_inode *ip, const char *name, char *value,
 		int *valuelenp, int flags, cred_t *cred);
 int xfs_attr_set(struct xfs_inode *dp, const char *name, char *value,