Merge branch 'akpm-current/current'

author: Stephen Rothwell <sfr@canb.auug.org.au> 2013-07-19 13:01:49 +1000
committer: Stephen Rothwell <sfr@canb.auug.org.au> 2013-07-19 13:01:49 +1000
commit: a25f6f2e88b6f2bb492f22b8b91028604d51bfb2 (patch)
tree: 2b6744bfab0871da7942bb9864458d989e89df1e
parent: d03792f9db9b892f494d3aa19d767ddf0365d1ff (diff)
parent: 1f02e5b30607cbef3c8b4d1376cba3689c399288 (diff)
131 files changed, 4218 insertions, 1422 deletions
diff --git a/.gitignore b/.gitignore
index 3b8b9b33be38..7e9932e55475 100644
--- a/.gitignore
+++ b/.gitignore
@@ -29,6 +29,7 @@ modules.builtin
 *.bz2
 *.lzma
 *.xz
+*.lz4
 *.lzo
 *.patch
 *.gcno
diff --git a/Documentation/development-process/2.Process b/Documentation/development-process/2.Process
index 4823577c6509..2e0617936e8f 100644
--- a/Documentation/development-process/2.Process
+++ b/Documentation/development-process/2.Process
@@ -276,7 +276,7 @@ mainline get there via -mm.
 The current -mm patch is available in the "mmotm" (-mm of the moment)
 directory at:
 
-	http://userweb.kernel.org/~akpm/mmotm/
+	http://www.ozlabs.org/~akpm/mmotm/
 
 Use of the MMOTM tree is likely to be a frustrating experience, though;
 there is a definite chance that it will not even compile.
@@ -287,7 +287,7 @@ the mainline is expected to look like after the next merge window closes.
 Linux-next trees are announced on the linux-kernel and linux-next mailing
 lists when they are assembled; they can be downloaded from:
 
-	http://www.kernel.org/pub/linux/kernel/people/sfr/linux-next/
+	http://www.kernel.org/pub/linux/kernel/next/
 
 Some information about linux-next has been gathered at:
 
diff --git a/MAINTAINERS b/MAINTAINERS
index 5d3facfd7899..3034c1fdd4e8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2871,7 +2871,7 @@ F:	drivers/media/usb/dvb-usb-v2/dvb_usb*
 F:	drivers/media/usb/dvb-usb-v2/usb_urb.c
 
 DYNAMIC DEBUG
-M:	Jason Baron <jbaron@redhat.com>
+M:	Jason Baron <jbaron@akamai.com>
 S:	Maintained
 F:	lib/dynamic_debug.c
 F:	include/linux/dynamic_debug.h
@@ -7642,6 +7642,11 @@ W:	http://tifmxx.berlios.de/
 S:	Maintained
 F:	drivers/memstick/host/tifm_ms.c
 
+SONY MEMORYSTICK STANDARD SUPPORT
+M:	Maxim Levitsky <maximlevitsky@gmail.com>
+S:	Maintained
+F:	drivers/memstick/core/ms_block.*
+
 SOUND
 M:	Jaroslav Kysela <perex@perex.cz>
 M:	Takashi Iwai <tiwai@suse.de>
diff --git a/arch/alpha/lib/csum_partial_copy.c b/arch/alpha/lib/csum_partial_copy.c
index 40736da9bea8..ffb19b7da999 100644
--- a/arch/alpha/lib/csum_partial_copy.c
+++ b/arch/alpha/lib/csum_partial_copy.c
@@ -338,6 +338,11 @@ csum_partial_copy_from_user(const void __user *src, void *dst, int len,
 	unsigned long doff = 7 & (unsigned long) dst;
 
 	if (len) {
+		if (!access_ok(VERIFY_READ, src, len)) {
+			*errp = -EFAULT;
+			memset(dst, 0, len);
+			return sum;
+		}
 		if (!doff) {
 			if (!soff)
 				checksum = csum_partial_cfu_aligned(
diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c
index 3d0ddbc005fe..71368850dfc0 100644
--- a/arch/sparc/kernel/sys_sparc32.c
+++ b/arch/sparc/kernel/sys_sparc32.c
@@ -169,10 +169,10 @@ COMPAT_SYSCALL_DEFINE5(rt_sigaction, int, sig,
 		new_ka.ka_restorer = restorer;
 		ret = get_user(u_handler, &act->sa_handler);
 		new_ka.sa.sa_handler =  compat_ptr(u_handler);
-		ret |= __copy_from_user(&set32, &act->sa_mask, sizeof(compat_sigset_t));
+		ret |= copy_from_user(&set32, &act->sa_mask, sizeof(compat_sigset_t));
 		sigset_from_compat(&new_ka.sa.sa_mask, &set32);
-		ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags);
-		ret |= __get_user(u_restorer, &act->sa_restorer);
+		ret |= get_user(new_ka.sa.sa_flags, &act->sa_flags);
+		ret |= get_user(u_restorer, &act->sa_restorer);
 		new_ka.sa.sa_restorer = compat_ptr(u_restorer);
                 if (ret)
                 	return -EFAULT;
@@ -183,9 +183,9 @@ COMPAT_SYSCALL_DEFINE5(rt_sigaction, int, sig,
 	if (!ret && oact) {
 		sigset_to_compat(&set32, &old_ka.sa.sa_mask);
 		ret = put_user(ptr_to_compat(old_ka.sa.sa_handler), &oact->sa_handler);
-		ret |= __copy_to_user(&oact->sa_mask, &set32, sizeof(compat_sigset_t));
-		ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
-		ret |= __put_user(ptr_to_compat(old_ka.sa.sa_restorer), &oact->sa_restorer);
+		ret |= copy_to_user(&oact->sa_mask, &set32, sizeof(compat_sigset_t));
+		ret |= put_user(old_ka.sa.sa_flags, &oact->sa_flags);
+		ret |= put_user(ptr_to_compat(old_ka.sa.sa_restorer), &oact->sa_restorer);
 		if (ret)
 			ret = -EFAULT;
         }
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index cccd07fa5e3a..b8e9224f0b45 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -17,6 +17,8 @@ extern unsigned long pci_mem_start;
 extern int e820_any_mapped(u64 start, u64 end, unsigned type);
 extern int e820_all_mapped(u64 start, u64 end, unsigned type);
 extern void e820_add_region(u64 start, u64 size, int type);
+extern void e820_add_limit_region(u64 start, u64 size, int type);
+extern void e820_adjust_region(u64 *start, u64 *size);
 extern void e820_print_map(char *who);
 extern int
 sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, u32 *pnr_map);
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index d32abeabbda5..0d5bb689649a 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -47,6 +47,7 @@ unsigned long pci_mem_start = 0xaeedbabe;
 #ifdef CONFIG_PCI
 EXPORT_SYMBOL(pci_mem_start);
 #endif
+static u64 mem_limit = ~0ULL;
 
 /*
  * This function checks if any part of the range <start,end> is mapped
@@ -108,7 +109,7 @@ int __init e820_all_mapped(u64 start, u64 end, unsigned type)
  * Add a memory region to the kernel e820 map.
  */
 static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
-					 int type)
+					 int type, bool limited)
 {
 	int x = e820x->nr_map;
 
@@ -119,6 +120,22 @@ static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
 		return;
 	}
 
+	if (limited) {
+		if (start >= mem_limit) {
+			printk(KERN_ERR "e820: ignoring [mem %#010llx-%#010llx]\n",
+			       (unsigned long long)start,
+			       (unsigned long long)(start + size - 1));
+			return;
+		}
+
+		if (mem_limit - start < size) {
+			printk(KERN_ERR "e820: ignoring [mem %#010llx-%#010llx]\n",
+			       (unsigned long long)mem_limit,
+			       (unsigned long long)(start + size - 1));
+			size = mem_limit - start;
+		}
+	}
+
 	e820x->map[x].addr = start;
 	e820x->map[x].size = size;
 	e820x->map[x].type = type;
@@ -127,7 +144,37 @@ static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
 
 void __init e820_add_region(u64 start, u64 size, int type)
 {
-	__e820_add_region(&e820, start, size, type);
+	__e820_add_region(&e820, start, size, type, false);
+}
+
+/*
+ * do_add_efi_memmap() calls this function().
+ *
+ * Note: BOOT_SERVICES_{CODE,DATA} regions on some efi machines are marked
+ * as E820_RAM, and they are needed to be mapped. Please use e820_add_region()
+ * to add BOOT_SERVICES_{CODE,DATA} regions.
+ */
+void __init e820_add_limit_region(u64 start, u64 size, int type)
+{
+	/*
+	 * efi_init() is called after finish_e820_parsing(), so we should
+	 * check whether [start, start + size) contains address above
+	 * mem_limit if the type is E820_RAM.
+	 */
+	__e820_add_region(&e820, start, size, type, type == E820_RAM);
+}
+
+void __init e820_adjust_region(u64 *start, u64 *size)
+{
+	if (*start >= mem_limit) {
+		*size = 0;
+		return;
+	}
+
+	if (mem_limit - *start < *size)
+		*size = mem_limit - *start;
+
+	return;
 }
 
 static void __init e820_print_type(u32 type)
@@ -455,8 +502,9 @@ static u64 __init __e820_update_range(struct e820map *e820x, u64 start,
 
 		/* new range is totally covered? */
 		if (ei->addr < start && ei_end > end) {
-			__e820_add_region(e820x, start, size, new_type);
-			__e820_add_region(e820x, end, ei_end - end, ei->type);
+			__e820_add_region(e820x, start, size, new_type, false);
+			__e820_add_region(e820x, end, ei_end - end, ei->type,
+					  false);
 			ei->size = start - ei->addr;
 			real_updated_size += size;
 			continue;
@@ -469,7 +517,7 @@ static u64 __init __e820_update_range(struct e820map *e820x, u64 start,
 			continue;
 
 		__e820_add_region(e820x, final_start, final_end - final_start,
-				  new_type);
+				  new_type, false);
 
 		real_updated_size += final_end - final_start;
 
@@ -809,7 +857,7 @@ static int userdef __initdata;
 /* "mem=nopentium" disables the 4MB page tables. */
 static int __init parse_memopt(char *p)
 {
-	u64 mem_size;
+	char *oldp;
 
 	if (!p)
 		return -EINVAL;
@@ -825,11 +873,11 @@ static int __init parse_memopt(char *p)
 	}
 
 	userdef = 1;
-	mem_size = memparse(p, &p);
+	oldp = p;
+	mem_limit = memparse(p, &p);
 	/* don't remove all of memory when handling "mem={invalid}" param */
-	if (mem_size == 0)
+	if (mem_limit == 0 || p == oldp)
 		return -EINVAL;
-	e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
 
 	return 0;
 }
@@ -895,6 +943,12 @@ early_param("memmap", parse_memmap_opt);
 
 void __init finish_e820_parsing(void)
 {
+	if (mem_limit != ~0ULL) {
+		userdef = 1;
+		e820_remove_range(mem_limit, ULLONG_MAX - mem_limit,
+				  E820_RAM, 1);
+	}
+
 	if (userdef) {
 		u32 nr = e820.nr_map;
 
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 282375f13c7e..f030cbe669a5 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -103,6 +103,7 @@ static void flush_tlb_func(void *info)
 	if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm))
 		return;
 
+	count_vm_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
 	if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
 		if (f->flush_end == TLB_FLUSH_ALL)
 			local_flush_tlb();
@@ -130,6 +131,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
 	info.flush_start = start;
 	info.flush_end = end;
 
+	count_vm_event(NR_TLB_REMOTE_FLUSH);
 	if (is_uv_system()) {
 		unsigned int cpu;
 
@@ -149,6 +151,7 @@ void flush_tlb_current_task(void)
 
 	preempt_disable();
 
+	count_vm_event(NR_TLB_LOCAL_FLUSH_ALL);
 	local_flush_tlb();
 	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
 		flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
@@ -211,16 +214,19 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
 	act_entries = mm->total_vm > tlb_entries ? tlb_entries : mm->total_vm;
 
 	/* tlb_flushall_shift is on balance point, details in commit log */
-	if ((end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift)
+	if ((end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift) {
+		count_vm_event(NR_TLB_LOCAL_FLUSH_ALL);
 		local_flush_tlb();
-	else {
+	} else {
 		if (has_large_page(mm, start, end)) {
 			local_flush_tlb();
 			goto flush_all;
 		}
 		/* flush range by one by one 'invlpg' */
-		for (addr = start; addr < end;	addr += PAGE_SIZE)
+		for (addr = start; addr < end;	addr += PAGE_SIZE) {
+			count_vm_event(NR_TLB_LOCAL_FLUSH_ONE);
 			__flush_tlb_single(addr);
+		}
 
 		if (cpumask_any_but(mm_cpumask(mm),
 				smp_processor_id()) < nr_cpu_ids)
@@ -256,6 +262,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)
 
 static void do_flush_tlb_all(void *info)
 {
+	count_vm_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
 	__flush_tlb_all();
 	if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
 		leave_mm(smp_processor_id());
@@ -263,6 +270,7 @@ static void do_flush_tlb_all(void *info)
 
 void flush_tlb_all(void)
 {
+	count_vm_event(NR_TLB_REMOTE_FLUSH);
 	on_each_cpu(do_flush_tlb_all, NULL, 1);
 }
 
@@ -272,8 +280,10 @@ static void do_kernel_range_flush(void *info)
 	unsigned long addr;
 
 	/* flush range by one by one 'invlpg' */
-	for (addr = f->flush_start; addr < f->flush_end; addr += PAGE_SIZE)
+	for (addr = f->flush_start; addr < f->flush_end; addr += PAGE_SIZE) {
+		count_vm_event(NR_TLB_LOCAL_FLUSH_ONE_KERNEL);
 		__flush_tlb_single(addr);
+	}
 }
 
 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c
index 643b8b5eee86..8244f5ec2f4c 100644
--- a/arch/x86/platform/ce4100/ce4100.c
+++ b/arch/x86/platform/ce4100/ce4100.c
@@ -12,6 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/irq.h>
 #include <linux/module.h>
+#include <linux/reboot.h>
 #include <linux/serial_reg.h>
 #include <linux/serial_8250.h>
 #include <linux/reboot.h>
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 90f6ed127096..f0d732ce9134 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -343,10 +343,17 @@ static void __init do_add_efi_memmap(void)
 		int e820_type;
 
 		switch (md->type) {
-		case EFI_LOADER_CODE:
-		case EFI_LOADER_DATA:
 		case EFI_BOOT_SERVICES_CODE:
 		case EFI_BOOT_SERVICES_DATA:
+			/* EFI_BOOT_SERVICES_{CODE,DATA} needs to be mapped */
+			if (md->attribute & EFI_MEMORY_WB)
+				e820_type = E820_RAM;
+			else
+				e820_type = E820_RESERVED;
+			e820_add_region(start, size, e820_type);
+			continue;
+		case EFI_LOADER_CODE:
+		case EFI_LOADER_DATA:
 		case EFI_CONVENTIONAL_MEMORY:
 			if (md->attribute & EFI_MEMORY_WB)
 				e820_type = E820_RAM;
@@ -371,7 +378,7 @@ static void __init do_add_efi_memmap(void)
 			e820_type = E820_RESERVED;
 			break;
 		}
-		e820_add_region(start, size, e820_type);
+		e820_add_limit_region(start, size, e820_type);
 	}
 	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
 }
@@ -479,6 +486,8 @@ void __init efi_free_boot_services(void)
 		    md->type != EFI_BOOT_SERVICES_DATA)
 			continue;
 
+		e820_adjust_region(&start, &size);
+
 		/* Could not reserve boot area */
 		if (!size)
 			continue;
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index 7e5d474dc6ba..fbd5a67cb773 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -70,7 +70,7 @@ static int compat_hdio_getgeo(struct gendisk *disk, struct block_device *bdev,
 		return ret;
 
 	ret = copy_to_user(ugeo, &geo, 4);
-	ret |= __put_user(geo.start, &ugeo->start);
+	ret |= put_user(geo.start, &ugeo->start);
 	if (ret)
 		ret = -EFAULT;
 
diff --git a/block/genhd.c b/block/genhd.c
index dadf42b454a3..01b481363711 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -849,7 +849,7 @@ static int show_partition(struct seq_file *seqf, void *v)
 	char buf[BDEVNAME_SIZE];
 
 	/* Don't show non-partitionable removeable devices or empty devices */
-	if (!get_capacity(sgp) || (!disk_max_parts(sgp) &&
+	if (!get_capacity(sgp) || (!(disk_max_parts(sgp) > 1) &&
 				   (sgp->flags & GENHD_FL_REMOVABLE)))
 		return 0;
 	if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
diff --git a/drivers/accessibility/braille/braille_console.c b/drivers/accessibility/braille/braille_console.c
index d21167bfc865..dc34a5b8bcee 100644
--- a/drivers/accessibility/braille/braille_console.c
+++ b/drivers/accessibility/braille/braille_console.c
@@ -359,6 +359,9 @@ int braille_register_console(struct console *console, int index,
 		char *console_options, char *braille_options)
 {
 	int ret;
+
+	if (!(console->flags & CON_BRL))
+		return 0;
 	if (!console_options)
 		/* Only support VisioBraille for now */
 		console_options = "57600o8";
@@ -374,15 +377,17 @@ int braille_register_console(struct console *console, int index,
 	braille_co = console;
 	register_keyboard_notifier(&keyboard_notifier_block);
 	register_vt_notifier(&vt_notifier_block);
-	return 0;
+	return 1;
 }
 
 int braille_unregister_console(struct console *console)
 {
 	if (braille_co != console)
 		return -EINVAL;
+	if (!(console->flags & CON_BRL))
+		return 0;
 	unregister_keyboard_notifier(&keyboard_notifier_block);
 	unregister_vt_notifier(&vt_notifier_block);
 	braille_co = NULL;
-	return 0;
+	return 1;
 }
diff --git a/drivers/atm/he.c b/drivers/atm/he.c
index 507362a76a73..80f9743f596a 100644
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -2872,15 +2872,4 @@ static struct pci_driver he_driver = {
 	.id_table =	he_pci_tbl,
 };
 
-static int __init he_init(void)
-{
-	return pci_register_driver(&he_driver);
-}
-
-static void __exit he_cleanup(void)
-{
-	pci_unregister_driver(&he_driver);
-}
-
-module_init(he_init);
-module_exit(he_cleanup);
+module_pci_driver(he_driver);
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 7616a77ca322..bc9f43bf7e29 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -125,13 +125,7 @@ static ssize_t node_read_meminfo(struct device *dev,
 		       nid, K(node_page_state(nid, NR_WRITEBACK)),
 		       nid, K(node_page_state(nid, NR_FILE_PAGES)),
 		       nid, K(node_page_state(nid, NR_FILE_MAPPED)),
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-		       nid, K(node_page_state(nid, NR_ANON_PAGES)
-			+ node_page_state(nid, NR_ANON_TRANSPARENT_HUGEPAGES) *
-			HPAGE_PMD_NR),
-#else
 		       nid, K(node_page_state(nid, NR_ANON_PAGES)),
-#endif
 		       nid, K(node_page_state(nid, NR_SHMEM)),
 		       nid, node_page_state(nid, NR_KERNEL_STACK) *
 				THREAD_SIZE / 1024,
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index eb760a218da4..fa0affb699b4 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -14,7 +14,7 @@
  * of and an antecedent to, SMBIOS, which stands for System
  * Management BIOS.  See further: http://www.dmtf.org/standards
  */
-static char dmi_empty_string[] = "        ";
+static const char dmi_empty_string[] = "        ";
 
 static u16 __initdata dmi_ver;
 /*
@@ -49,7 +49,7 @@ static const char * __init dmi_string_nosave(const struct dmi_header *dm, u8 s)
 	return "";
 }
 
-static char * __init dmi_string(const struct dmi_header *dm, u8 s)
+static const char * __init dmi_string(const struct dmi_header *dm, u8 s)
 {
 	const char *bp = dmi_string_nosave(dm, s);
 	char *str;
@@ -62,8 +62,6 @@ static char * __init dmi_string(const struct dmi_header *dm, u8 s)
 	str = dmi_alloc(len);
 	if (str != NULL)
 		strcpy(str, bp);
-	else
-		printk(KERN_ERR "dmi_string: cannot allocate %Zu bytes.\n", len);
 
 	return str;
 }
@@ -133,17 +131,18 @@ static int __init dmi_checksum(const u8 *buf, u8 len)
 	return sum == 0;
 }
 
-static char *dmi_ident[DMI_STRING_MAX];
+static const char *dmi_ident[DMI_STRING_MAX];
 static LIST_HEAD(dmi_devices);
 int dmi_available;
 
 /*
  *	Save a DMI string
  */
-static void __init dmi_save_ident(const struct dmi_header *dm, int slot, int string)
+static void __init dmi_save_ident(const struct dmi_header *dm, int slot,
+		int string)
 {
-	const char *d = (const char*) dm;
-	char *p;
+	const char *d = (const char *) dm;
+	const char *p;
 
 	if (dmi_ident[slot])
 		return;
@@ -155,9 +154,10 @@ static void __init dmi_save_ident(const struct dmi_header *dm, int slot, int str
 	dmi_ident[slot] = p;
 }
 
-static void __init dmi_save_uuid(const struct dmi_header *dm, int slot, int index)
+static void __init dmi_save_uuid(const struct dmi_header *dm, int slot,
+		int index)
 {
-	const u8 *d = (u8*) dm + index;
+	const u8 *d = (u8 *) dm + index;
 	char *s;
 	int is_ff = 1, is_00 = 1, i;
 
@@ -188,12 +188,13 @@ static void __init dmi_save_uuid(const struct dmi_header *dm, int slot, int inde
 	else
 		sprintf(s, "%pUB", d);
 
-        dmi_ident[slot] = s;
+	dmi_ident[slot] = s;
 }
 
-static void __init dmi_save_type(const struct dmi_header *dm, int slot, int index)
+static void __init dmi_save_type(const struct dmi_header *dm, int slot,
+		int index)
 {
-	const u8 *d = (u8*) dm + index;
+	const u8 *d = (u8 *) dm + index;
 	char *s;
 
 	if (dmi_ident[slot])
@@ -216,10 +217,8 @@ static void __init dmi_save_one_device(int type, const char *name)
 		return;
 
 	dev = dmi_alloc(sizeof(*dev) + strlen(name) + 1);
-	if (!dev) {
-		printk(KERN_ERR "dmi_save_one_device: out of memory.\n");
+	if (!dev)
 		return;
-	}
 
 	dev->type = type;
 	strcpy((char *)(dev + 1), name);
@@ -249,17 +248,14 @@ static void __init dmi_save_oem_strings_devices(const struct dmi_header *dm)
 	struct dmi_device *dev;
 
 	for (i = 1; i <= count; i++) {
-		char *devname = dmi_string(dm, i);
+		const char *devname = dmi_string(dm, i);
 
 		if (devname == dmi_empty_string)
 			continue;
 
 		dev = dmi_alloc(sizeof(*dev));
-		if (!dev) {
-			printk(KERN_ERR
-			   "dmi_save_oem_strings_devices: out of memory.\n");
+		if (!dev)
 			break;
-		}
 
 		dev->type = DMI_DEV_TYPE_OEM_STRING;
 		dev->name = devname;
@@ -272,21 +268,17 @@ static void __init dmi_save_oem_strings_devices(const struct dmi_header *dm)
 static void __init dmi_save_ipmi_device(const struct dmi_header *dm)
 {
 	struct dmi_device *dev;
-	void * data;
+	void *data;
 
 	data = dmi_alloc(dm->length);
-	if (data == NULL) {
-		printk(KERN_ERR "dmi_save_ipmi_device: out of memory.\n");
+	if (data == NULL)
 		return;
-	}
 
 	memcpy(data, dm, dm->length);
 
 	dev = dmi_alloc(sizeof(*dev));
-	if (!dev) {
-		printk(KERN_ERR "dmi_save_ipmi_device: out of memory.\n");
+	if (!dev)
 		return;
-	}
 
 	dev->type = DMI_DEV_TYPE_IPMI;
 	dev->name = "IPMI controller";
@@ -301,10 +293,9 @@ static void __init dmi_save_dev_onboard(int instance, int segment, int bus,
 	struct dmi_dev_onboard *onboard_dev;
 
 	onboard_dev = dmi_alloc(sizeof(*onboard_dev) + strlen(name) + 1);
-	if (!onboard_dev) {
-		printk(KERN_ERR "dmi_save_dev_onboard: out of memory.\n");
+	if (!onboard_dev)
 		return;
-	}
+
 	onboard_dev->instance = instance;
 	onboard_dev->segment = segment;
 	onboard_dev->bus = bus;
@@ -320,7 +311,7 @@ static void __init dmi_save_dev_onboard(int instance, int segment, int bus,
 
 static void __init dmi_save_extended_devices(const struct dmi_header *dm)
 {
-	const u8 *d = (u8*) dm + 5;
+	const u8 *d = (u8 *) dm + 5;
 
 	/* Skip disabled device */
 	if ((*d & 0x80) == 0)
@@ -338,7 +329,7 @@ static void __init dmi_save_extended_devices(const struct dmi_header *dm)
  */
 static void __init dmi_decode(const struct dmi_header *dm, void *dummy)
 {
-	switch(dm->type) {
+	switch (dm->type) {
 	case 0:		/* BIOS Information */
 		dmi_save_ident(dm, DMI_BIOS_VENDOR, 4);
 		dmi_save_ident(dm, DMI_BIOS_VERSION, 5);
@@ -419,6 +410,13 @@ static void __init dmi_format_ids(char *buf, size_t len)
 			    dmi_get_system_info(DMI_BIOS_DATE));
 }
 
+/*
+ * Check for DMI/SMBIOS headers in the system firmware image.  Any
+ * SMBIOS header must start 16 bytes before the DMI header, so take a
+ * 32 byte buffer and check for DMI at offset 16 and SMBIOS at offset
+ * 0.  If the DMI header is present, set dmi_ver accordingly (SMBIOS
+ * takes precedence) and return 0.  Otherwise return 1.
+ */
 static int __init dmi_present(const u8 *buf)
 {
 	int smbios_ver;
@@ -495,17 +493,18 @@ void __init dmi_scan_machine(void)
 			dmi_available = 1;
 			goto out;
 		}
-	}
-	else {
-		/*
-		 * no iounmap() for that ioremap(); it would be a no-op, but
-		 * it's so early in setup that sucker gets confused into doing
-		 * what it shouldn't if we actually call it.
-		 */
+	} else {
 		p = dmi_ioremap(0xF0000, 0x10000);
 		if (p == NULL)
 			goto error;
 
+		/*
+		 * Iterate over all possible DMI header addresses q.
+		 * Maintain the 32 bytes around q in buf.  On the
+		 * first iteration, substitute zero for the
+		 * out-of-range bytes so there is no chance of falsely
+		 * detecting an SMBIOS header.
+		 */
 		memset(buf, 0, 16);
 		for (q = p; q < p + 0x10000; q += 16) {
 			memcpy_fromio(buf + 16, q, 16);
@@ -519,7 +518,7 @@ void __init dmi_scan_machine(void)
 		dmi_iounmap(p, 0x10000);
 	}
  error:
-	printk(KERN_INFO "DMI not present or invalid.\n");
+	pr_info("DMI not present or invalid.\n");
  out:
 	dmi_initialized = 1;
 }
@@ -655,7 +654,7 @@ int dmi_name_in_serial(const char *str)
 
 /**
  *	dmi_name_in_vendors - Check if string is in the DMI system or board vendor name
- *	@str: 	Case sensitive Name
+ *	@str: Case sensitive Name
  */
 int dmi_name_in_vendors(const char *str)
 {
@@ -682,13 +681,13 @@ EXPORT_SYMBOL(dmi_name_in_vendors);
  *	A new search is initiated by passing %NULL as the @from argument.
  *	If @from is not %NULL, searches continue from next device.
  */
-const struct dmi_device * dmi_find_device(int type, const char *name,
+const struct dmi_device *dmi_find_device(int type, const char *name,
 				    const struct dmi_device *from)
 {
 	const struct list_head *head = from ? &from->list : &dmi_devices;
 	struct list_head *d;
 
-	for(d = head->next; d != &dmi_devices; d = d->next) {
+	for (d = head->next; d != &dmi_devices; d = d->next) {
 		const struct dmi_device *dev =
 			list_entry(d, struct dmi_device, list);
 
diff --git a/drivers/gpu/drm/cirrus/cirrus_mode.c b/drivers/gpu/drm/cirrus/cirrus_mode.c
index 60685b21cc36..379a47ea99f6 100644
--- a/drivers/gpu/drm/cirrus/cirrus_mode.c
+++ b/drivers/gpu/drm/cirrus/cirrus_mode.c
@@ -273,8 +273,8 @@ static int cirrus_crtc_mode_set(struct drm_crtc *crtc,
 		sr07 |= 0x11;
 		break;
 	case 16:
-		sr07 |= 0xc1;
-		hdr = 0xc0;
+		sr07 |= 0x17;
+		hdr = 0xc1;
 		break;
 	case 24:
 		sr07 |= 0x15;
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 3d13ca6e257f..f6f6cc7fc133 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -416,6 +416,14 @@ static void drm_fb_helper_dpms(struct fb_info *info, int dpms_mode)
 		return;
 
 	/*
+	 * fbdev->blank can be called from irq context in case of a panic.
+	 * Since we already have our own special panic handler which will
+	 * restore the fbdev console mode completely, just bail out early.
+	 */
+	if (oops_in_progress)
+		return;
+
+	/*
 	 * For each CRTC in this fb, turn the connectors on/off.
 	 */
 	drm_modeset_lock_all(dev);
diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c
index d97f20069d3e..4d70fb73d41b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_acpi.c
+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c
@@ -46,6 +46,7 @@ bool nouveau_is_v1_dsm(void) {
 #define NOUVEAU_DSM_HAS_MUX 0x1
 #define NOUVEAU_DSM_HAS_OPT 0x2
 
+#ifdef CONFIG_VGA_SWITCHEROO
 static const char nouveau_dsm_muid[] = {
 	0xA0, 0xA0, 0x95, 0x9D, 0x60, 0x00, 0x48, 0x4D,
 	0xB3, 0x4D, 0x7E, 0x5F, 0xEA, 0x12, 0x9F, 0xD4,
@@ -337,6 +338,10 @@ void nouveau_unregister_dsm_handler(void)
 	if (nouveau_dsm_priv.optimus_detected || nouveau_dsm_priv.dsm_detected)
 		vga_switcheroo_unregister_handler();
 }
+#else
+void nouveau_register_dsm_handler(void) {}
+void nouveau_unregister_dsm_handler(void) {}
+#endif
 
 /* retrieve the ROM in 4k blocks */
 static int nouveau_rom_call(acpi_handle rom_handle, uint8_t *bios,
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 784b97cb05b0..f2ef7ef0f36f 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -383,14 +383,11 @@ static int cm_alloc_id(struct cm_id_private *cm_id_priv)
 {
 	unsigned long flags;
 	int id;
-	static int next_id;
 
 	idr_preload(GFP_KERNEL);
 	spin_lock_irqsave(&cm.lock, flags);
 
-	id = idr_alloc(&cm.local_id_table, cm_id_priv, next_id, 0, GFP_NOWAIT);
-	if (id >= 0)
-		next_id = max(id + 1, 0);
+	id = idr_alloc_cyclic(&cm.local_id_table, cm_id_priv, 0, 0, GFP_NOWAIT);
 
 	spin_unlock_irqrestore(&cm.lock, flags);
 	idr_preload_end();
diff --git a/drivers/isdn/hisax/amd7930_fn.c b/drivers/isdn/hisax/amd7930_fn.c
index 1063babe1d3a..36817e0a0b94 100644
--- a/drivers/isdn/hisax/amd7930_fn.c
+++ b/drivers/isdn/hisax/amd7930_fn.c
@@ -314,7 +314,7 @@ Amd7930_empty_Dfifo(struct IsdnCardState *cs, int flag)
 
 							t += sprintf(t, "Amd7930: empty_Dfifo cnt: %d |", cs->rcvidx);
 							QuickHex(t, cs->rcvbuf, cs->rcvidx);
-							debugl1(cs, cs->dlog);
+							debugl1(cs, "%s", cs->dlog);
 						}
 						/* moves received data in sk-buffer */
 						memcpy(skb_put(skb, cs->rcvidx), cs->rcvbuf, cs->rcvidx);
@@ -406,7 +406,7 @@ Amd7930_fill_Dfifo(struct IsdnCardState *cs)
 
 		t += sprintf(t, "Amd7930: fill_Dfifo cnt: %d |", count);
 		QuickHex(t, deb_ptr, count);
-		debugl1(cs, cs->dlog);
+		debugl1(cs, "%s", cs->dlog);
 	}
 	/* AMD interrupts on */
 	AmdIrqOn(cs);
diff --git a/drivers/isdn/hisax/avm_pci.c b/drivers/isdn/hisax/avm_pci.c
index ee9b9a03cffa..d1427bd6452d 100644
--- a/drivers/isdn/hisax/avm_pci.c
+++ b/drivers/isdn/hisax/avm_pci.c
@@ -285,7 +285,7 @@ hdlc_empty_fifo(struct BCState *bcs, int count)
 		t += sprintf(t, "hdlc_empty_fifo %c cnt %d",
 			     bcs->channel ? 'B' : 'A', count);
 		QuickHex(t, p, count);
-		debugl1(cs, bcs->blog);
+		debugl1(cs, "%s", bcs->blog);
 	}
 }
 
@@ -345,7 +345,7 @@ hdlc_fill_fifo(struct BCState *bcs)
 		t += sprintf(t, "hdlc_fill_fifo %c cnt %d",
 			     bcs->channel ? 'B' : 'A', count);
 		QuickHex(t, p, count);
-		debugl1(cs, bcs->blog);
+		debugl1(cs, "%s", bcs->blog);
 	}
 }
 
diff --git a/drivers/isdn/hisax/config.c b/drivers/isdn/hisax/config.c
index bf04d2a3cf4a..b33f53b3ca93 100644
--- a/drivers/isdn/hisax/config.c
+++ b/drivers/isdn/hisax/config.c
@@ -1896,7 +1896,7 @@ static void EChannel_proc_rcv(struct hisax_d_if *d_if)
 				ptr--;
 				*ptr++ = '\n';
 				*ptr = 0;
-				HiSax_putstatus(cs, NULL, cs->dlog);
+				HiSax_putstatus(cs, NULL, "%s", cs->dlog);
 			} else
 				HiSax_putstatus(cs, "LogEcho: ",
 						"warning Frame too big (%d)",
diff --git a/drivers/isdn/hisax/diva.c b/drivers/isdn/hisax/diva.c
index 8d0cf6e4dc00..4fc90de68d18 100644
--- a/drivers/isdn/hisax/diva.c
+++ b/drivers/isdn/hisax/diva.c
@@ -427,7 +427,7 @@ Memhscx_empty_fifo(struct BCState *bcs, int count)
 		t += sprintf(t, "hscx_empty_fifo %c cnt %d",
 			     bcs->hw.hscx.hscx ? 'B' : 'A', count);
 		QuickHex(t, ptr, count);
-		debugl1(cs, bcs->blog);
+		debugl1(cs, "%s", bcs->blog);
 	}
 }
 
@@ -469,7 +469,7 @@ Memhscx_fill_fifo(struct BCState *bcs)
 		t += sprintf(t, "hscx_fill_fifo %c cnt %d",
 			     bcs->hw.hscx.hscx ? 'B' : 'A', count);
 		QuickHex(t, ptr, count);
-		debugl1(cs, bcs->blog);
+		debugl1(cs, "%s", bcs->blog);
 	}
 }
 
diff --git a/drivers/isdn/hisax/elsa.c b/drivers/isdn/hisax/elsa.c
index 1df6f9a56ca2..2be1c8a3bb5f 100644
--- a/drivers/isdn/hisax/elsa.c
+++ b/drivers/isdn/hisax/elsa.c
@@ -535,7 +535,7 @@ check_arcofi(struct IsdnCardState *cs)
 		t = tmp;
 		t += sprintf(tmp, "Arcofi data");
 		QuickHex(t, p, cs->dc.isac.mon_rxp);
-		debugl1(cs, tmp);
+		debugl1(cs, "%s", tmp);
 		if ((cs->dc.isac.mon_rxp == 2) && (cs->dc.isac.mon_rx[0] == 0xa0)) {
 			switch (cs->dc.isac.mon_rx[1]) {
 			case 0x80:
diff --git a/drivers/isdn/hisax/elsa_ser.c b/drivers/isdn/hisax/elsa_ser.c
index d4c98d330bfe..3f84dd8f1757 100644
--- a/drivers/isdn/hisax/elsa_ser.c
+++ b/drivers/isdn/hisax/elsa_ser.c
@@ -344,7 +344,7 @@ static inline void receive_chars(struct IsdnCardState *cs,
 
 		t += sprintf(t, "modem read cnt %d", cs->hw.elsa.rcvcnt);
 		QuickHex(t, cs->hw.elsa.rcvbuf, cs->hw.elsa.rcvcnt);
-		debugl1(cs, tmp);
+		debugl1(cs, "%s", tmp);
 	}
 	cs->hw.elsa.rcvcnt = 0;
 }
diff --git a/drivers/isdn/hisax/hfc_pci.c b/drivers/isdn/hisax/hfc_pci.c
index 3ccd724ff8c2..497bd026c237 100644
--- a/drivers/isdn/hisax/hfc_pci.c
+++ b/drivers/isdn/hisax/hfc_pci.c
@@ -901,7 +901,7 @@ Begin:
 					ptr--;
 					*ptr++ = '\n';
 					*ptr = 0;
-					HiSax_putstatus(cs, NULL, cs->dlog);
+					HiSax_putstatus(cs, NULL, "%s", cs->dlog);
 				} else
 					HiSax_putstatus(cs, "LogEcho: ", "warning Frame too big (%d)", total - 3);
 			}
diff --git a/drivers/isdn/hisax/hfc_sx.c b/drivers/isdn/hisax/hfc_sx.c
index dc4574f735ef..fa1fefd711cd 100644
--- a/drivers/isdn/hisax/hfc_sx.c
+++ b/drivers/isdn/hisax/hfc_sx.c
@@ -674,7 +674,7 @@ receive_emsg(struct IsdnCardState *cs)
 					ptr--;
 					*ptr++ = '\n';
 					*ptr = 0;
-					HiSax_putstatus(cs, NULL, cs->dlog);
+					HiSax_putstatus(cs, NULL, "%s", cs->dlog);
 				} else
 					HiSax_putstatus(cs, "LogEcho: ", "warning Frame too big (%d)", skb->len);
 			}
diff --git a/drivers/isdn/hisax/hscx_irq.c b/drivers/isdn/hisax/hscx_irq.c
index f398d4838937..a8d6188402c6 100644
--- a/drivers/isdn/hisax/hscx_irq.c
+++ b/drivers/isdn/hisax/hscx_irq.c
@@ -75,7 +75,7 @@ hscx_empty_fifo(struct BCState *bcs, int count)
 		t += sprintf(t, "hscx_empty_fifo %c cnt %d",
 			     bcs->hw.hscx.hscx ? 'B' : 'A', count);
 		QuickHex(t, ptr, count);
-		debugl1(cs, bcs->blog);
+		debugl1(cs, "%s", bcs->blog);
 	}
 }
 
@@ -115,7 +115,7 @@ hscx_fill_fifo(struct BCState *bcs)
 		t += sprintf(t, "hscx_fill_fifo %c cnt %d",
 			     bcs->hw.hscx.hscx ? 'B' : 'A', count);
 		QuickHex(t, ptr, count);
-		debugl1(cs, bcs->blog);
+		debugl1(cs, "%s", bcs->blog);
 	}
 }
 
diff --git a/drivers/isdn/hisax/icc.c b/drivers/isdn/hisax/icc.c
index db5321f6379b..51dae9167238 100644
--- a/drivers/isdn/hisax/icc.c
+++ b/drivers/isdn/hisax/icc.c
@@ -134,7 +134,7 @@ icc_empty_fifo(struct IsdnCardState *cs, int count)
 
 		t += sprintf(t, "icc_empty_fifo cnt %d", count);
 		QuickHex(t, ptr, count);
-		debugl1(cs, cs->dlog);
+		debugl1(cs, "%s", cs->dlog);
 	}
 }
 
@@ -176,7 +176,7 @@ icc_fill_fifo(struct IsdnCardState *cs)
 
 		t += sprintf(t, "icc_fill_fifo cnt %d", count);
 		QuickHex(t, ptr, count);
-		debugl1(cs, cs->dlog);
+		debugl1(cs, "%s", cs->dlog);
 	}
 }
 
diff --git a/drivers/isdn/hisax/ipacx.c b/drivers/isdn/hisax/ipacx.c
index 74feb5c83067..5faa5de24305 100644
--- a/drivers/isdn/hisax/ipacx.c
+++ b/drivers/isdn/hisax/ipacx.c
@@ -260,7 +260,7 @@ dch_empty_fifo(struct IsdnCardState *cs, int count)
 
 		t += sprintf(t, "dch_empty_fifo() cnt %d", count);
 		QuickHex(t, ptr, count);
-		debugl1(cs, cs->dlog);
+		debugl1(cs, "%s", cs->dlog);
 	}
 }
 
@@ -307,7 +307,7 @@ dch_fill_fifo(struct IsdnCardState *cs)
 
 		t += sprintf(t, "dch_fill_fifo() cnt %d", count);
 		QuickHex(t, ptr, count);
-		debugl1(cs, cs->dlog);
+		debugl1(cs, "%s", cs->dlog);
 	}
 }
 
@@ -539,7 +539,7 @@ bch_empty_fifo(struct BCState *bcs, int count)
 
 		t += sprintf(t, "bch_empty_fifo() B-%d cnt %d", hscx, count);
 		QuickHex(t, ptr, count);
-		debugl1(cs, bcs->blog);
+		debugl1(cs, "%s", bcs->blog);
 	}
 }
 
@@ -582,7 +582,7 @@ bch_fill_fifo(struct BCState *bcs)
 
 		t += sprintf(t, "chb_fill_fifo() B-%d cnt %d", hscx, count);
 		QuickHex(t, ptr, count);
-		debugl1(cs, bcs->blog);
+		debugl1(cs, "%s", bcs->blog);
 	}
 }
 
diff --git a/drivers/isdn/hisax/isac.c b/drivers/isdn/hisax/isac.c
index a365ccc1c99c..7fdf78f46433 100644
--- a/drivers/isdn/hisax/isac.c
+++ b/drivers/isdn/hisax/isac.c
@@ -137,7 +137,7 @@ isac_empty_fifo(struct IsdnCardState *cs, int count)
 
 		t += sprintf(t, "isac_empty_fifo cnt %d", count);
 		QuickHex(t, ptr, count);
-		debugl1(cs, cs->dlog);
+		debugl1(cs, "%s", cs->dlog);
 	}
 }
 
@@ -179,7 +179,7 @@ isac_fill_fifo(struct IsdnCardState *cs)
 
 		t += sprintf(t, "isac_fill_fifo cnt %d", count);
 		QuickHex(t, ptr, count);
-		debugl1(cs, cs->dlog);
+		debugl1(cs, "%s", cs->dlog);
 	}
 }
 
diff --git a/drivers/isdn/hisax/isar.c b/drivers/isdn/hisax/isar.c
index 7fdf34704fe5..f4956c73aa11 100644
--- a/drivers/isdn/hisax/isar.c
+++ b/drivers/isdn/hisax/isar.c
@@ -74,7 +74,7 @@ sendmsg(struct IsdnCardState *cs, u_char his, u_char creg, u_char len,
 				t = tmp;
 				t += sprintf(t, "sendmbox cnt %d", len);
 				QuickHex(t, &msg[len-i], (i > 64) ? 64 : i);
-				debugl1(cs, tmp);
+				debugl1(cs, "%s", tmp);
 				i -= 64;
 			}
 		}
@@ -105,7 +105,7 @@ rcv_mbox(struct IsdnCardState *cs, struct isar_reg *ireg, u_char *msg)
 				t = tmp;
 				t += sprintf(t, "rcv_mbox cnt %d", ireg->clsb);
 				QuickHex(t, &msg[ireg->clsb - i], (i > 64) ? 64 : i);
-				debugl1(cs, tmp);
+				debugl1(cs, "%s", tmp);
 				i -= 64;
 			}
 		}
@@ -1248,7 +1248,7 @@ isar_int_main(struct IsdnCardState *cs)
 			tp += sprintf(debbuf, "msg iis(%x) msb(%x)",
 				      ireg->iis, ireg->cmsb);
 			QuickHex(tp, (u_char *)ireg->par, ireg->clsb);
-			debugl1(cs, debbuf);
+			debugl1(cs, "%s", debbuf);
 		}
 		break;
 	case ISAR_IIS_INVMSG:
diff --git a/drivers/isdn/hisax/jade.c b/drivers/isdn/hisax/jade.c
index f946c58d8ab1..e2ae7871a209 100644
--- a/drivers/isdn/hisax/jade.c
+++ b/drivers/isdn/hisax/jade.c
@@ -81,10 +81,7 @@ modejade(struct BCState *bcs, int mode, int bc)
 	int jade = bcs->hw.hscx.hscx;
 
 	if (cs->debug & L1_DEB_HSCX) {
-		char tmp[40];
-		sprintf(tmp, "jade %c mode %d ichan %d",
-			'A' + jade, mode, bc);
-		debugl1(cs, tmp);
+		debugl1(cs, "jade %c mode %d ichan %d", 'A' + jade, mode, bc);
 	}
 	bcs->mode = mode;
 	bcs->channel = bc;
@@ -257,23 +254,18 @@ void
 clear_pending_jade_ints(struct IsdnCardState *cs)
 {
 	int val;
-	char tmp[64];
 
 	cs->BC_Write_Reg(cs, 0, jade_HDLC_IMR, 0x00);
 	cs->BC_Write_Reg(cs, 1, jade_HDLC_IMR, 0x00);
 
 	val = cs->BC_Read_Reg(cs, 1, jade_HDLC_ISR);
-	sprintf(tmp, "jade B ISTA %x", val);
-	debugl1(cs, tmp);
+	debugl1(cs, "jade B ISTA %x", val);
 	val = cs->BC_Read_Reg(cs, 0, jade_HDLC_ISR);
-	sprintf(tmp, "jade A ISTA %x", val);
-	debugl1(cs, tmp);
+	debugl1(cs, "jade A ISTA %x", val);
 	val = cs->BC_Read_Reg(cs, 1, jade_HDLC_STAR);
-	sprintf(tmp, "jade B STAR %x", val);
-	debugl1(cs, tmp);
+	debugl1(cs, "jade B STAR %x", val);
 	val = cs->BC_Read_Reg(cs, 0, jade_HDLC_STAR);
-	sprintf(tmp, "jade A STAR %x", val);
-	debugl1(cs, tmp);
+	debugl1(cs, "jade A STAR %x", val);
 	/* Unmask ints */
 	cs->BC_Write_Reg(cs, 0, jade_HDLC_IMR, 0xF8);
 	cs->BC_Write_Reg(cs, 1, jade_HDLC_IMR, 0xF8);
diff --git a/drivers/isdn/hisax/jade_irq.c b/drivers/isdn/hisax/jade_irq.c
index f521fc83dc76..b930da9b5aa6 100644
--- a/drivers/isdn/hisax/jade_irq.c
+++ b/drivers/isdn/hisax/jade_irq.c
@@ -65,7 +65,7 @@ jade_empty_fifo(struct BCState *bcs, int count)
 		t += sprintf(t, "jade_empty_fifo %c cnt %d",
 			     bcs->hw.hscx.hscx ? 'B' : 'A', count);
 		QuickHex(t, ptr, count);
-		debugl1(cs, bcs->blog);
+		debugl1(cs, "%s", bcs->blog);
 	}
 }
 
@@ -105,7 +105,7 @@ jade_fill_fifo(struct BCState *bcs)
 		t += sprintf(t, "jade_fill_fifo %c cnt %d",
 			     bcs->hw.hscx.hscx ? 'B' : 'A', count);
 		QuickHex(t, ptr, count);
-		debugl1(cs, bcs->blog);
+		debugl1(cs, "%s", bcs->blog);
 	}
 }
 
diff --git a/drivers/isdn/hisax/l3_1tr6.c b/drivers/isdn/hisax/l3_1tr6.c
index 4c1bca5caa1d..875402e76d0a 100644
--- a/drivers/isdn/hisax/l3_1tr6.c
+++ b/drivers/isdn/hisax/l3_1tr6.c
@@ -63,7 +63,7 @@ l3_1tr6_error(struct l3_process *pc, u_char *msg, struct sk_buff *skb)
 {
 	dev_kfree_skb(skb);
 	if (pc->st->l3.debug & L3_DEB_WARN)
-		l3_debug(pc->st, msg);
+		l3_debug(pc->st, "%s", msg);
 	l3_1tr6_release_req(pc, 0, NULL);
 }
 
@@ -161,7 +161,6 @@ l3_1tr6_setup(struct l3_process *pc, u_char pr, void *arg)
 {
 	u_char *p;
 	int bcfound = 0;
-	char tmp[80];
 	struct sk_buff *skb = arg;
 
 	/* Channel Identification */
@@ -214,10 +213,9 @@ l3_1tr6_setup(struct l3_process *pc, u_char pr, void *arg)
 	/* Signal all services, linklevel takes care of Service-Indicator */
 	if (bcfound) {
 		if ((pc->para.setup.si1 != 7) && (pc->st->l3.debug & L3_DEB_WARN)) {
-			sprintf(tmp, "non-digital call: %s -> %s",
+			l3_debug(pc->st, "non-digital call: %s -> %s",
 				pc->para.setup.phone,
 				pc->para.setup.eazmsn);
-			l3_debug(pc->st, tmp);
 		}
 		newl3state(pc, 6);
 		pc->st->l3.l3l4(pc->st, CC_SETUP | INDICATION, pc);
@@ -301,7 +299,7 @@ l3_1tr6_info(struct l3_process *pc, u_char pr, void *arg)
 {
 	u_char *p;
 	int i, tmpcharge = 0;
-	char a_charge[8], tmp[32];
+	char a_charge[8];
 	struct sk_buff *skb = arg;
 
 	p = skb->data;
@@ -316,8 +314,8 @@ l3_1tr6_info(struct l3_process *pc, u_char pr, void *arg)
 			pc->st->l3.l3l4(pc->st, CC_CHARGE | INDICATION, pc);
 		}
 		if (pc->st->l3.debug & L3_DEB_CHARGE) {
-			sprintf(tmp, "charging info %d", pc->para.chargeinfo);
-			l3_debug(pc->st, tmp);
+			l3_debug(pc->st, "charging info %d",
+				 pc->para.chargeinfo);
 		}
 	} else if (pc->st->l3.debug & L3_DEB_CHARGE)
 		l3_debug(pc->st, "charging info not found");
@@ -399,7 +397,7 @@ l3_1tr6_disc(struct l3_process *pc, u_char pr, void *arg)
 	struct sk_buff *skb = arg;
 	u_char *p;
 	int i, tmpcharge = 0;
-	char a_charge[8], tmp[32];
+	char a_charge[8];
 
 	StopAllL3Timer(pc);
 	p = skb->data;
@@ -414,8 +412,8 @@ l3_1tr6_disc(struct l3_process *pc, u_char pr, void *arg)
 			pc->st->l3.l3l4(pc->st, CC_CHARGE | INDICATION, pc);
 		}
 		if (pc->st->l3.debug & L3_DEB_CHARGE) {
-			sprintf(tmp, "charging info %d", pc->para.chargeinfo);
-			l3_debug(pc->st, tmp);
+			l3_debug(pc->st, "charging info %d",
+				 pc->para.chargeinfo);
 		}
 	} else if (pc->st->l3.debug & L3_DEB_CHARGE)
 		l3_debug(pc->st, "charging info not found");
@@ -746,7 +744,6 @@ up1tr6(struct PStack *st, int pr, void *arg)
 	int i, mt, cr;
 	struct l3_process *proc;
 	struct sk_buff *skb = arg;
-	char tmp[80];
 
 	switch (pr) {
 	case (DL_DATA | INDICATION):
@@ -762,26 +759,23 @@ up1tr6(struct PStack *st, int pr, void *arg)
 	}
 	if (skb->len < 4) {
 		if (st->l3.debug & L3_DEB_PROTERR) {
-			sprintf(tmp, "up1tr6 len only %d", skb->len);
-			l3_debug(st, tmp);
+			l3_debug(st, "up1tr6 len only %d", skb->len);
 		}
 		dev_kfree_skb(skb);
 		return;
 	}
 	if ((skb->data[0] & 0xfe) != PROTO_DIS_N0) {
 		if (st->l3.debug & L3_DEB_PROTERR) {
-			sprintf(tmp, "up1tr6%sunexpected discriminator %x message len %d",
+			l3_debug(st, "up1tr6%sunexpected discriminator %x message len %d",
 				(pr == (DL_DATA | INDICATION)) ? " " : "(broadcast) ",
 				skb->data[0], skb->len);
-			l3_debug(st, tmp);
 		}
 		dev_kfree_skb(skb);
 		return;
 	}
 	if (skb->data[1] != 1) {
 		if (st->l3.debug & L3_DEB_PROTERR) {
-			sprintf(tmp, "up1tr6 CR len not 1");
-			l3_debug(st, tmp);
+			l3_debug(st, "up1tr6 CR len not 1");
 		}
 		dev_kfree_skb(skb);
 		return;
@@ -791,9 +785,8 @@ up1tr6(struct PStack *st, int pr, void *arg)
 	if (skb->data[0] == PROTO_DIS_N0) {
 		dev_kfree_skb(skb);
 		if (st->l3.debug & L3_DEB_STATE) {
-			sprintf(tmp, "up1tr6%s N0 mt %x unhandled",
+			l3_debug(st, "up1tr6%s N0 mt %x unhandled",
 				(pr == (DL_DATA | INDICATION)) ? " " : "(broadcast) ", mt);
-			l3_debug(st, tmp);
 		}
 	} else if (skb->data[0] == PROTO_DIS_N1) {
 		if (!(proc = getl3proc(st, cr))) {
@@ -801,8 +794,7 @@ up1tr6(struct PStack *st, int pr, void *arg)
 				if (cr < 128) {
 					if (!(proc = new_l3_process(st, cr))) {
 						if (st->l3.debug & L3_DEB_PROTERR) {
-							sprintf(tmp, "up1tr6 no roc mem");
-							l3_debug(st, tmp);
+							l3_debug(st, "up1tr6 no roc mem");
 						}
 						dev_kfree_skb(skb);
 						return;
@@ -821,8 +813,7 @@ up1tr6(struct PStack *st, int pr, void *arg)
 			} else {
 				if (!(proc = new_l3_process(st, cr))) {
 					if (st->l3.debug & L3_DEB_PROTERR) {
-						sprintf(tmp, "up1tr6 no roc mem");
-						l3_debug(st, tmp);
+						l3_debug(st, "up1tr6 no roc mem");
 					}
 					dev_kfree_skb(skb);
 					return;
@@ -837,18 +828,16 @@ up1tr6(struct PStack *st, int pr, void *arg)
 		if (i == ARRAY_SIZE(datastln1)) {
 			dev_kfree_skb(skb);
 			if (st->l3.debug & L3_DEB_STATE) {
-				sprintf(tmp, "up1tr6%sstate %d mt %x unhandled",
+				l3_debug(st, "up1tr6%sstate %d mt %x unhandled",
 					(pr == (DL_DATA | INDICATION)) ? " " : "(broadcast) ",
 					proc->state, mt);
-				l3_debug(st, tmp);
 			}
 			return;
 		} else {
 			if (st->l3.debug & L3_DEB_STATE) {
-				sprintf(tmp, "up1tr6%sstate %d mt %x",
+				l3_debug(st, "up1tr6%sstate %d mt %x",
 					(pr == (DL_DATA | INDICATION)) ? " " : "(broadcast) ",
 					proc->state, mt);
-				l3_debug(st, tmp);
 			}
 			datastln1[i].rout(proc, pr, skb);
 		}
@@ -861,7 +850,6 @@ down1tr6(struct PStack *st, int pr, void *arg)
 	int i, cr;
 	struct l3_process *proc;
 	struct Channel *chan;
-	char tmp[80];
 
 	if ((DL_ESTABLISH | REQUEST) == pr) {
 		l3_msg(st, pr, NULL);
@@ -888,15 +876,13 @@ down1tr6(struct PStack *st, int pr, void *arg)
 			break;
 	if (i == ARRAY_SIZE(downstl)) {
 		if (st->l3.debug & L3_DEB_STATE) {
-			sprintf(tmp, "down1tr6 state %d prim %d unhandled",
+			l3_debug(st, "down1tr6 state %d prim %d unhandled",
 				proc->state, pr);
-			l3_debug(st, tmp);
 		}
 	} else {
 		if (st->l3.debug & L3_DEB_STATE) {
-			sprintf(tmp, "down1tr6 state %d prim %d",
+			l3_debug(st, "down1tr6 state %d prim %d",
 				proc->state, pr);
-			l3_debug(st, tmp);
 		}
 		downstl[i].rout(proc, pr, arg);
 	}
diff --git a/drivers/isdn/hisax/netjet.c b/drivers/isdn/hisax/netjet.c
index b646eed379df..233e432e06f6 100644
--- a/drivers/isdn/hisax/netjet.c
+++ b/drivers/isdn/hisax/netjet.c
@@ -176,7 +176,7 @@ static void printframe(struct IsdnCardState *cs, u_char *buf, int count, char *s
 		else
 			j = i;
 		QuickHex(t, p, j);
-		debugl1(cs, tmp);
+		debugl1(cs, "%s", tmp);
 		p += j;
 		i -= j;
 		t = tmp;
diff --git a/drivers/isdn/hisax/q931.c b/drivers/isdn/hisax/q931.c
index 041bf52d9d0a..af1b020a81f1 100644
--- a/drivers/isdn/hisax/q931.c
+++ b/drivers/isdn/hisax/q931.c
@@ -1179,7 +1179,7 @@ LogFrame(struct IsdnCardState *cs, u_char *buf, int size)
 		dp--;
 		*dp++ = '\n';
 		*dp = 0;
-		HiSax_putstatus(cs, NULL, cs->dlog);
+		HiSax_putstatus(cs, NULL, "%s", cs->dlog);
 	} else
 		HiSax_putstatus(cs, "LogFrame: ", "warning Frame too big (%d)", size);
 }
@@ -1246,7 +1246,7 @@ dlogframe(struct IsdnCardState *cs, struct sk_buff *skb, int dir)
 	}
 	if (finish) {
 		*dp = 0;
-		HiSax_putstatus(cs, NULL, cs->dlog);
+		HiSax_putstatus(cs, NULL, "%s", cs->dlog);
 		return;
 	}
 	if ((0xfe & buf[0]) == PROTO_DIS_N0) {	/* 1TR6 */
@@ -1509,5 +1509,5 @@ dlogframe(struct IsdnCardState *cs, struct sk_buff *skb, int dir)
 		dp += sprintf(dp, "Unknown protocol %x!", buf[0]);
 	}
 	*dp = 0;
-	HiSax_putstatus(cs, NULL, cs->dlog);
+	HiSax_putstatus(cs, NULL, "%s", cs->dlog);
 }
diff --git a/drivers/isdn/hisax/w6692.c b/drivers/isdn/hisax/w6692.c
index d8cac6935818..a85895585d90 100644
--- a/drivers/isdn/hisax/w6692.c
+++ b/drivers/isdn/hisax/w6692.c
@@ -154,7 +154,7 @@ W6692_empty_fifo(struct IsdnCardState *cs, int count)
 
 		t += sprintf(t, "W6692_empty_fifo cnt %d", count);
 		QuickHex(t, ptr, count);
-		debugl1(cs, cs->dlog);
+		debugl1(cs, "%s", cs->dlog);
 	}
 }
 
@@ -196,7 +196,7 @@ W6692_fill_fifo(struct IsdnCardState *cs)
 
 		t += sprintf(t, "W6692_fill_fifo cnt %d", count);
 		QuickHex(t, ptr, count);
-		debugl1(cs, cs->dlog);
+		debugl1(cs, "%s", cs->dlog);
 	}
 }
 
@@ -226,7 +226,7 @@ W6692B_empty_fifo(struct BCState *bcs, int count)
 		t += sprintf(t, "W6692B_empty_fifo %c cnt %d",
 			     bcs->channel + '1', count);
 		QuickHex(t, ptr, count);
-		debugl1(cs, bcs->blog);
+		debugl1(cs, "%s", bcs->blog);
 	}
 }
 
@@ -264,7 +264,7 @@ W6692B_fill_fifo(struct BCState *bcs)
 		t += sprintf(t, "W6692B_fill_fifo %c cnt %d",
 			     bcs->channel + '1', count);
 		QuickHex(t, ptr, count);
-		debugl1(cs, bcs->blog);
+		debugl1(cs, "%s", bcs->blog);
 	}
 }
 
diff --git a/drivers/isdn/mISDN/core.c b/drivers/isdn/mISDN/core.c
index da30c5cb9609..174aa3f17afb 100644
--- a/drivers/isdn/mISDN/core.c
+++ b/drivers/isdn/mISDN/core.c
@@ -21,10 +21,14 @@
 #include "core.h"
 
 static u_int debug;
+static u_int gid;
+kgid_t misdn_permitted_gid;
 
 MODULE_AUTHOR("Karsten Keil");
 MODULE_LICENSE("GPL");
 module_param(debug, uint, S_IRUGO | S_IWUSR);
+module_param(gid, uint, 0);
+MODULE_PARM_DESC(gid, "Unix group for accessing misdn socket (default 0)");
 
 static u64		device_ids;
 #define MAX_DEVICE_ID	63
@@ -372,6 +376,8 @@ mISDNInit(void)
 {
 	int	err;
 
+	misdn_permitted_gid = make_kgid(current_user_ns(), gid);
+
 	printk(KERN_INFO "Modular ISDN core version %d.%d.%d\n",
 	       MISDN_MAJOR_VERSION, MISDN_MINOR_VERSION, MISDN_RELEASE);
 	mISDN_init_clock(&debug);
diff --git a/drivers/isdn/mISDN/core.h b/drivers/isdn/mISDN/core.h
index 52695bb81ee7..5f509bf93c00 100644
--- a/drivers/isdn/mISDN/core.h
+++ b/drivers/isdn/mISDN/core.h
@@ -17,6 +17,7 @@
 
 extern struct mISDNdevice	*get_mdevice(u_int);
 extern int			get_mdevice_count(void);
+extern kgid_t misdn_permitted_gid;
 
 /* stack status flag */
 #define mISDN_STACK_ACTION_MASK		0x0000ffff
diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
index e47dcb9d1e91..8dcef368e9da 100644
--- a/drivers/isdn/mISDN/socket.c
+++ b/drivers/isdn/mISDN/socket.c
@@ -612,6 +612,11 @@ data_sock_create(struct net *net, struct socket *sock, int protocol)
 {
 	struct sock *sk;
 
+	if (!capable(CAP_SYS_ADMIN) &&
+			!gid_eq(misdn_permitted_gid, current_gid()) &&
+			!in_group_p(misdn_permitted_gid))
+		return -EPERM;
+
 	if (sock->type != SOCK_DGRAM)
 		return -ESOCKTNOSUPPORT;
 
@@ -694,6 +699,10 @@ base_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	case IMSETDEVNAME:
 	{
 		struct mISDN_devrename dn;
+		if (!capable(CAP_SYS_ADMIN) &&
+				!gid_eq(misdn_permitted_gid, current_gid()) &&
+				!in_group_p(misdn_permitted_gid))
+			return -EPERM;
 		if (copy_from_user(&dn, (void __user *)arg,
 				   sizeof(dn))) {
 			err = -EFAULT;
diff --git a/drivers/memstick/core/Kconfig b/drivers/memstick/core/Kconfig
index 95f1814b5368..1d389491d5fd 100644
--- a/drivers/memstick/core/Kconfig
+++ b/drivers/memstick/core/Kconfig
@@ -24,3 +24,15 @@ config MSPRO_BLOCK
 	  support. This provides a block device driver, which you can use
 	  to mount the filesystem. Almost everyone wishing MemoryStick
 	  support should say Y or M here.
+
+config MS_BLOCK
+	tristate "MemoryStick Standard device driver"
+	depends on BLOCK
+	help
+	  Say Y here to enable the MemoryStick Standard device driver
+	  support. This provides a block device driver, which you can use
+	  to mount the filesystem.
+	  This driver works with old (bulky) MemoryStick and MemoryStick Duo
+	  but not PRO. Say Y if you have such card.
+	  Driver is new and not yet well tested, thus it can damage your card
+	  (even permanently)
diff --git a/drivers/memstick/core/Makefile b/drivers/memstick/core/Makefile
index ecd029937738..0d7f90c0ff25 100644
--- a/drivers/memstick/core/Makefile
+++ b/drivers/memstick/core/Makefile
@@ -3,5 +3,5 @@
 #
 
 obj-$(CONFIG_MEMSTICK)		+= memstick.o
-
+obj-$(CONFIG_MS_BLOCK)		+= ms_block.o
 obj-$(CONFIG_MSPRO_BLOCK)	+= mspro_block.o
diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c
new file mode 100644
index 000000000000..08e70232062f
--- /dev/null
+++ b/drivers/memstick/core/ms_block.c
@@ -0,0 +1,2385 @@
+/*
+ *  ms_block.c - Sony MemoryStick (legacy) storage support
+
+ *  Copyright (C) 2013 Maxim Levitsky <maximlevitsky@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Minor portions of the driver were copied from mspro_block.c which is
+ * Copyright (C) 2007 Alex Dubov <oakad@yahoo.com>
+ *
+ */
+#define DRIVER_NAME "ms_block"
+#define pr_fmt(fmt) DRIVER_NAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/blkdev.h>
+#include <linux/memstick.h>
+#include <linux/idr.h>
+#include <linux/hdreg.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/bitmap.h>
+#include <linux/scatterlist.h>
+#include <linux/jiffies.h>
+#include <linux/workqueue.h>
+#include <linux/mutex.h>
+#include "ms_block.h"
+
+static int debug;
+static int cache_flush_timeout = 1000;
+static bool verify_writes;
+
+/*
+ * Copies section of 'sg_from' starting from offset 'offset' and with length
+ * 'len' To another scatterlist of to_nents enties
+ */
+static size_t msb_sg_copy(struct scatterlist *sg_from,
+	struct scatterlist *sg_to, int to_nents, size_t offset, size_t len)
+{
+	size_t copied = 0;
+
+	while (offset > 0) {
+		if (offset >= sg_from->length) {
+			if (sg_is_last(sg_from))
+				return 0;
+
+			offset -= sg_from->length;
+			sg_from = sg_next(sg_from);
+			continue;
+		}
+
+		copied = min(len, sg_from->length - offset);
+		sg_set_page(sg_to, sg_page(sg_from),
+			copied, sg_from->offset + offset);
+
+		len -= copied;
+		offset = 0;
+
+		if (sg_is_last(sg_from) || !len)
+			goto out;
+
+		sg_to = sg_next(sg_to);
+		to_nents--;
+		sg_from = sg_next(sg_from);
+	}
+
+	while (len > sg_from->length && to_nents--) {
+		len -= sg_from->length;
+		copied += sg_from->length;
+
+		sg_set_page(sg_to, sg_page(sg_from),
+				sg_from->length, sg_from->offset);
+
+		if (sg_is_last(sg_from) || !len)
+			goto out;
+
+		sg_from = sg_next(sg_from);
+		sg_to = sg_next(sg_to);
+	}
+
+	if (len && to_nents) {
+		sg_set_page(sg_to, sg_page(sg_from), len, sg_from->offset);
+		copied += len;
+	}
+out:
+	sg_mark_end(sg_to);
+	return copied;
+}
+
+/*
+ * Compares section of 'sg' starting from offset 'offset' and with length 'len'
+ * to linear buffer of length 'len' at address 'buffer'
+ * Returns 0 if equal and  -1 otherwice
+ */
+static int msb_sg_compare_to_buffer(struct scatterlist *sg,
+					size_t offset, u8 *buffer, size_t len)
+{
+	int retval = 0, cmplen;
+	struct sg_mapping_iter miter;
+
+	sg_miter_start(&miter, sg, sg_nents(sg),
+					SG_MITER_ATOMIC | SG_MITER_FROM_SG);
+
+	while (sg_miter_next(&miter) && len > 0) {
+		if (offset >= miter.length) {
+			offset -= miter.length;
+			continue;
+		}
+
+		cmplen = min(miter.length - offset, len);
+		retval = memcmp(miter.addr + offset, buffer, cmplen) ? -1 : 0;
+		if (retval)
+			break;
+
+		buffer += cmplen;
+		len -= cmplen;
+		offset = 0;
+	}
+
+	if (!retval && len)
+		retval = -1;
+
+	sg_miter_stop(&miter);
+	return retval;
+}
+
+
+/* Get zone at which block with logical address 'lba' lives
+ * Flash is broken into zones.
+ * Each zone consists of 512 eraseblocks, out of which in first
+ * zone 494 are used and 496 are for all following zones.
+ * Therefore zone #0 hosts blocks 0-493, zone #1 blocks 494-988, etc...
+*/
+static int msb_get_zone_from_lba(int lba)
+{
+	if (lba < 494)
+		return 0;
+	return ((lba - 494) / 496) + 1;
+}
+
+/* Get zone of physical block. Trivial */
+static int msb_get_zone_from_pba(int pba)
+{
+	return pba / MS_BLOCKS_IN_ZONE;
+}
+
+/* Debug test to validate free block counts */
+static int msb_validate_used_block_bitmap(struct msb_data *msb)
+{
+	int total_free_blocks = 0;
+	int i;
+
+	if (!debug)
+		return 0;
+
+	for (i = 0; i < msb->zone_count; i++)
+		total_free_blocks += msb->free_block_count[i];
+
+	if (msb->block_count - bitmap_weight(msb->used_blocks_bitmap,
+					msb->block_count) == total_free_blocks)
+		return 0;
+
+	pr_err("BUG: free block counts don't match the bitmap");
+	msb->read_only = true;
+	return -EINVAL;
+}
+
+/* Mark physical block as used */
+static void msb_mark_block_used(struct msb_data *msb, int pba)
+{
+	int zone = msb_get_zone_from_pba(pba);
+
+	if (test_bit(pba, msb->used_blocks_bitmap)) {
+		pr_err(
+		"BUG: attempt to mark already used pba %d as used", pba);
+		msb->read_only = true;
+		return;
+	}
+
+	if (msb_validate_used_block_bitmap(msb))
+		return;
+
+	/* No races because all IO is single threaded */
+	__set_bit(pba, msb->used_blocks_bitmap);
+	msb->free_block_count[zone]--;
+}
+
+/* Mark physical block as free */
+static void msb_mark_block_unused(struct msb_data *msb, int pba)
+{
+	int zone = msb_get_zone_from_pba(pba);
+
+	if (!test_bit(pba, msb->used_blocks_bitmap)) {
+		pr_err("BUG: attempt to mark already unused pba %d as unused" , pba);
+		msb->read_only = true;
+		return;
+	}
+
+	if (msb_validate_used_block_bitmap(msb))
+		return;
+
+	/* No races because all IO is single threaded */
+	__clear_bit(pba, msb->used_blocks_bitmap);
+	msb->free_block_count[zone]++;
+}
+
+/* Invalidate current register window */
+static void msb_invalidate_reg_window(struct msb_data *msb)
+{
+	msb->reg_addr.w_offset = offsetof(struct ms_register, id);
+	msb->reg_addr.w_length = sizeof(struct ms_id_register);
+	msb->reg_addr.r_offset = offsetof(struct ms_register, id);
+	msb->reg_addr.r_length = sizeof(struct ms_id_register);
+	msb->addr_valid = false;
+}
+
+/* Start a state machine */
+static int msb_run_state_machine(struct msb_data *msb, int   (*state_func)
+		(struct memstick_dev *card, struct memstick_request **req))
+{
+	struct memstick_dev *card = msb->card;
+
+	WARN_ON(msb->state != -1);
+	msb->int_polling = false;
+	msb->state = 0;
+	msb->exit_error = 0;
+
+	memset(&card->current_mrq, 0, sizeof(card->current_mrq));
+
+	card->next_request = state_func;
+	memstick_new_req(card->host);
+	wait_for_completion(&card->mrq_complete);
+
+	WARN_ON(msb->state != -1);
+	return msb->exit_error;
+}
+
+/* State machines call that to exit */
+static int msb_exit_state_machine(struct msb_data *msb, int error)
+{
+	WARN_ON(msb->state == -1);
+
+	msb->state = -1;
+	msb->exit_error = error;
+	msb->card->next_request = h_msb_default_bad;
+
+	/* Invalidate reg window on errors */
+	if (error)
+		msb_invalidate_reg_window(msb);
+
+	complete(&msb->card->mrq_complete);
+	return -ENXIO;
+}
+
+/* read INT register */
+static int msb_read_int_reg(struct msb_data *msb, long timeout)
+{
+	struct memstick_request *mrq = &msb->card->current_mrq;
+
+	WARN_ON(msb->state == -1);
+
+	if (!msb->int_polling) {
+		msb->int_timeout = jiffies +
+			msecs_to_jiffies(timeout == -1 ? 500 : timeout);
+		msb->int_polling = true;
+	} else if (time_after(jiffies, msb->int_timeout)) {
+		mrq->data[0] = MEMSTICK_INT_CMDNAK;
+		return 0;
+	}
+
+	if ((msb->caps & MEMSTICK_CAP_AUTO_GET_INT) &&
+				mrq->need_card_int && !mrq->error) {
+		mrq->data[0] = mrq->int_reg;
+		mrq->need_card_int = false;
+		return 0;
+	} else {
+		memstick_init_req(mrq, MS_TPC_GET_INT, NULL, 1);
+		return 1;
+	}
+}
+
+/* Read a register */
+static int msb_read_regs(struct msb_data *msb, int offset, int len)
+{
+	struct memstick_request *req = &msb->card->current_mrq;
+
+	if (msb->reg_addr.r_offset != offset ||
+	    msb->reg_addr.r_length != len || !msb->addr_valid) {
+
+		msb->reg_addr.r_offset = offset;
+		msb->reg_addr.r_length = len;
+		msb->addr_valid = true;
+
+		memstick_init_req(req, MS_TPC_SET_RW_REG_ADRS,
+			&msb->reg_addr, sizeof(msb->reg_addr));
+		return 0;
+	}
+
+	memstick_init_req(req, MS_TPC_READ_REG, NULL, len);
+	return 1;
+}
+
+/* Write a card register */
+static int msb_write_regs(struct msb_data *msb, int offset, int len, void *buf)
+{
+	struct memstick_request *req = &msb->card->current_mrq;
+
+	if (msb->reg_addr.w_offset != offset ||
+		msb->reg_addr.w_length != len  || !msb->addr_valid) {
+
+		msb->reg_addr.w_offset = offset;
+		msb->reg_addr.w_length = len;
+		msb->addr_valid = true;
+
+		memstick_init_req(req, MS_TPC_SET_RW_REG_ADRS,
+			&msb->reg_addr, sizeof(msb->reg_addr));
+		return 0;
+	}
+
+	memstick_init_req(req, MS_TPC_WRITE_REG, buf, len);
+	return 1;
+}
+
+/* Handler for absence of IO */
+static int h_msb_default_bad(struct memstick_dev *card,
+						struct memstick_request **mrq)
+{
+	return -ENXIO;
+}
+
+/*
+ * This function is a handler for reads of one page from device.
+ * Writes output to msb->current_sg, takes sector address from msb->reg.param
+ * Can also be used to read extra data only. Set params accordintly.
+ */
+static int h_msb_read_page(struct memstick_dev *card,
+					struct memstick_request **out_mrq)
+{
+	struct msb_data *msb = memstick_get_drvdata(card);
+	struct memstick_request *mrq = *out_mrq = &card->current_mrq;
+	struct scatterlist sg[2];
+	u8 command, intreg;
+
+	if (mrq->error) {
+		dbg("read_page, unknown error");
+		return msb_exit_state_machine(msb, mrq->error);
+	}
+again:
+	switch (msb->state) {
+	case MSB_RP_SEND_BLOCK_ADDRESS:
+		/* msb_write_regs sometimes "fails" because it needs to update
+			the reg window, and thus it returns request for that.
+			Then we stay in this state and retry */
+		if (!msb_write_regs(msb,
+			offsetof(struct ms_register, param),
+			sizeof(struct ms_param_register),
+			(unsigned char *)&msb->regs.param))
+			return 0;
+
+		msb->state = MSB_RP_SEND_READ_COMMAND;
+		return 0;
+
+	case MSB_RP_SEND_READ_COMMAND:
+		command = MS_CMD_BLOCK_READ;
+		memstick_init_req(mrq, MS_TPC_SET_CMD, &command, 1);
+		msb->state = MSB_RP_SEND_INT_REQ;
+		return 0;
+
+	case MSB_RP_SEND_INT_REQ:
+		msb->state = MSB_RP_RECEIVE_INT_REQ_RESULT;
+		/* If dont actually need to send the int read request (only in
+			serial mode), then just fall through */
+		if (msb_read_int_reg(msb, -1))
+			return 0;
+		/* fallthrough */
+
+	case MSB_RP_RECEIVE_INT_REQ_RESULT:
+		intreg = mrq->data[0];
+		msb->regs.status.interrupt = intreg;
+
+		if (intreg & MEMSTICK_INT_CMDNAK)
+			return msb_exit_state_machine(msb, -EIO);
+
+		if (!(intreg & MEMSTICK_INT_CED)) {
+			msb->state = MSB_RP_SEND_INT_REQ;
+			goto again;
+		}
+
+		msb->int_polling = false;
+		msb->state = (intreg & MEMSTICK_INT_ERR) ?
+			MSB_RP_SEND_READ_STATUS_REG : MSB_RP_SEND_OOB_READ;
+		goto again;
+
+	case MSB_RP_SEND_READ_STATUS_REG:
+		 /* read the status register to understand source of the INT_ERR */
+		if (!msb_read_regs(msb,
+			offsetof(struct ms_register, status),
+			sizeof(struct ms_status_register)))
+			return 0;
+
+		msb->state = MSB_RP_RECEIVE_OOB_READ;
+		return 0;
+
+	case MSB_RP_RECIVE_STATUS_REG:
+		msb->regs.status = *(struct ms_status_register *)mrq->data;
+		msb->state = MSB_RP_SEND_OOB_READ;
+		/* fallthrough */
+
+	case MSB_RP_SEND_OOB_READ:
+		if (!msb_read_regs(msb,
+			offsetof(struct ms_register, extra_data),
+			sizeof(struct ms_extra_data_register)))
+			return 0;
+
+		msb->state = MSB_RP_RECEIVE_OOB_READ;
+		return 0;
+
+	case MSB_RP_RECEIVE_OOB_READ:
+		msb->regs.extra_data =
+			*(struct ms_extra_data_register *) mrq->data;
+		msb->state = MSB_RP_SEND_READ_DATA;
+		/* fallthrough */
+
+	case MSB_RP_SEND_READ_DATA:
+		/* Skip that state if we only read the oob */
+		if (msb->regs.param.cp == MEMSTICK_CP_EXTRA) {
+			msb->state = MSB_RP_RECEIVE_READ_DATA;
+			goto again;
+		}
+
+		sg_init_table(sg, ARRAY_SIZE(sg));
+		msb_sg_copy(msb->current_sg, sg, ARRAY_SIZE(sg),
+			msb->current_sg_offset,
+			msb->page_size);
+
+		memstick_init_req_sg(mrq, MS_TPC_READ_LONG_DATA, sg);
+		msb->state = MSB_RP_RECEIVE_READ_DATA;
+		return 0;
+
+	case MSB_RP_RECEIVE_READ_DATA:
+		if (!(msb->regs.status.interrupt & MEMSTICK_INT_ERR)) {
+			msb->current_sg_offset += msb->page_size;
+			return msb_exit_state_machine(msb, 0);
+		}
+
+		if (msb->regs.status.status1 & MEMSTICK_UNCORR_ERROR) {
+			dbg("read_page: uncorrectable error");
+			return msb_exit_state_machine(msb, -EBADMSG);
+		}
+
+		if (msb->regs.status.status1 & MEMSTICK_CORR_ERROR) {
+			dbg("read_page: correctable error");
+			msb->current_sg_offset += msb->page_size;
+			return msb_exit_state_machine(msb, -EUCLEAN);
+		} else {
+			dbg("read_page: INT error, but no status error bits");
+			return msb_exit_state_machine(msb, -EIO);
+		}
+	}
+
+	BUG();
+}
+
+/*
+ * Handler of writes of exactly one block.
+ * Takes address from msb->regs.param.
+ * Writes same extra data to blocks, also taken
+ * from msb->regs.extra
+ * Returns -EBADMSG if write fails due to uncorrectable error, or -EIO if
+ * device refuses to take the command or something else
+ */
+static int h_msb_write_block(struct memstick_dev *card,
+					struct memstick_request **out_mrq)
+{
+	struct msb_data *msb = memstick_get_drvdata(card);
+	struct memstick_request *mrq = *out_mrq = &card->current_mrq;
+	struct scatterlist sg[2];
+	u8 intreg, command;
+
+	if (mrq->error)
+		return msb_exit_state_machine(msb, mrq->error);
+
+again:
+	switch (msb->state) {
+
+	/* HACK: Jmicon handling of TPCs between 8 and
+	 *	sizeof(memstick_request.data) is broken due to hardware
+	 *	bug in PIO mode that is used for these TPCs
+	 *	Therefore split the write
+	 */
+
+	case MSB_WB_SEND_WRITE_PARAMS:
+		if (!msb_write_regs(msb,
+			offsetof(struct ms_register, param),
+			sizeof(struct ms_param_register),
+			&msb->regs.param))
+			return 0;
+
+		msb->state = MSB_WB_SEND_WRITE_OOB;
+		return 0;
+
+	case MSB_WB_SEND_WRITE_OOB:
+		if (!msb_write_regs(msb,
+			offsetof(struct ms_register, extra_data),
+			sizeof(struct ms_extra_data_register),
+			&msb->regs.extra_data))
+			return 0;
+		msb->state = MSB_WB_SEND_WRITE_COMMAND;
+		return 0;
+
+
+	case MSB_WB_SEND_WRITE_COMMAND:
+		command = MS_CMD_BLOCK_WRITE;
+		memstick_init_req(mrq, MS_TPC_SET_CMD, &command, 1);
+		msb->state = MSB_WB_SEND_INT_REQ;
+		return 0;
+
+	case MSB_WB_SEND_INT_REQ:
+		msb->state = MSB_WB_RECEIVE_INT_REQ;
+		if (msb_read_int_reg(msb, -1))
+			return 0;
+		/* fallthrough */
+
+	case MSB_WB_RECEIVE_INT_REQ:
+		intreg = mrq->data[0];
+		msb->regs.status.interrupt = intreg;
+
+		/* errors mean out of here, and fast... */
+		if (intreg & (MEMSTICK_INT_CMDNAK))
+			return msb_exit_state_machine(msb, -EIO);
+
+		if (intreg & MEMSTICK_INT_ERR)
+			return msb_exit_state_machine(msb, -EBADMSG);
+
+
+		/* for last page we need to poll CED */
+		if (msb->current_page == msb->pages_in_block) {
+			if (intreg & MEMSTICK_INT_CED)
+				return msb_exit_state_machine(msb, 0);
+			msb->state = MSB_WB_SEND_INT_REQ;
+			goto again;
+
+		}
+
+		/* for non-last page we need BREQ before writing next chunk */
+		if (!(intreg & MEMSTICK_INT_BREQ)) {
+			msb->state = MSB_WB_SEND_INT_REQ;
+			goto again;
+		}
+
+		msb->int_polling = false;
+		msb->state = MSB_WB_SEND_WRITE_DATA;
+		/* fallthrough */
+
+	case MSB_WB_SEND_WRITE_DATA:
+		sg_init_table(sg, ARRAY_SIZE(sg));
+
+		if (msb_sg_copy(msb->current_sg, sg, ARRAY_SIZE(sg),
+			msb->current_sg_offset,
+			msb->page_size) < msb->page_size)
+			return msb_exit_state_machine(msb, -EIO);
+
+		memstick_init_req_sg(mrq, MS_TPC_WRITE_LONG_DATA, sg);
+		mrq->need_card_int = 1;
+		msb->state = MSB_WB_RECEIVE_WRITE_CONFIRMATION;
+		return 0;
+
+	case MSB_WB_RECEIVE_WRITE_CONFIRMATION:
+		msb->current_page++;
+		msb->current_sg_offset += msb->page_size;
+		msb->state = MSB_WB_SEND_INT_REQ;
+		goto again;
+	default:
+		BUG();
+	}
+
+	return 0;
+}
+
+/*
+ * This function is used to send simple IO requests to device that consist
+ * of register write + command
+ */
+static int h_msb_send_command(struct memstick_dev *card,
+					struct memstick_request **out_mrq)
+{
+	struct msb_data *msb = memstick_get_drvdata(card);
+	struct memstick_request *mrq = *out_mrq = &card->current_mrq;
+	u8 intreg;
+
+	if (mrq->error) {
+		dbg("send_command: unknown error");
+		return msb_exit_state_machine(msb, mrq->error);
+	}
+again:
+	switch (msb->state) {
+
+	/* HACK: see h_msb_write_block */
+	case MSB_SC_SEND_WRITE_PARAMS: /* write param register*/
+		if (!msb_write_regs(msb,
+			offsetof(struct ms_register, param),
+			sizeof(struct ms_param_register),
+			&msb->regs.param))
+			return 0;
+		msb->state = MSB_SC_SEND_WRITE_OOB;
+		return 0;
+
+	case MSB_SC_SEND_WRITE_OOB:
+		if (!msb->command_need_oob) {
+			msb->state = MSB_SC_SEND_COMMAND;
+			goto again;
+		}
+
+		if (!msb_write_regs(msb,
+			offsetof(struct ms_register, extra_data),
+			sizeof(struct ms_extra_data_register),
+			&msb->regs.extra_data))
+			return 0;
+
+		msb->state = MSB_SC_SEND_COMMAND;
+		return 0;
+
+	case MSB_SC_SEND_COMMAND:
+		memstick_init_req(mrq, MS_TPC_SET_CMD, &msb->command_value, 1);
+		msb->state = MSB_SC_SEND_INT_REQ;
+		return 0;
+
+	case MSB_SC_SEND_INT_REQ:
+		msb->state = MSB_SC_RECEIVE_INT_REQ;
+		if (msb_read_int_reg(msb, -1))
+			return 0;
+		/* fallthrough */
+
+	case MSB_SC_RECEIVE_INT_REQ:
+		intreg = mrq->data[0];
+
+		if (intreg & MEMSTICK_INT_CMDNAK)
+			return msb_exit_state_machine(msb, -EIO);
+		if (intreg & MEMSTICK_INT_ERR)
+			return msb_exit_state_machine(msb, -EBADMSG);
+
+		if (!(intreg & MEMSTICK_INT_CED)) {
+			msb->state = MSB_SC_SEND_INT_REQ;
+			goto again;
+		}
+
+		return msb_exit_state_machine(msb, 0);
+	}
+
+	BUG();
+}
+
+/* Small handler for card reset */
+static int h_msb_reset(struct memstick_dev *card,
+					struct memstick_request **out_mrq)
+{
+	u8 command = MS_CMD_RESET;
+	struct msb_data *msb = memstick_get_drvdata(card);
+	struct memstick_request *mrq = *out_mrq = &card->current_mrq;
+
+	if (mrq->error)
+		return msb_exit_state_machine(msb, mrq->error);
+
+	switch (msb->state) {
+	case MSB_RS_SEND:
+		memstick_init_req(mrq, MS_TPC_SET_CMD, &command, 1);
+		mrq->need_card_int = 0;
+		msb->state = MSB_RS_CONFIRM;
+		return 0;
+	case MSB_RS_CONFIRM:
+		return msb_exit_state_machine(msb, 0);
+	}
+	BUG();
+}
+
+/* This handler is used to do serial->parallel switch */
+static int h_msb_parallel_switch(struct memstick_dev *card,
+					struct memstick_request **out_mrq)
+{
+	struct msb_data *msb = memstick_get_drvdata(card);
+	struct memstick_request *mrq = *out_mrq = &card->current_mrq;
+	struct memstick_host *host = card->host;
+
+	if (mrq->error) {
+		dbg("parallel_switch: error");
+		msb->regs.param.system &= ~MEMSTICK_SYS_PAM;
+		return msb_exit_state_machine(msb, mrq->error);
+	}
+
+	switch (msb->state) {
+	case MSB_PS_SEND_SWITCH_COMMAND:
+		/* Set the parallel interface on memstick side */
+		msb->regs.param.system |= MEMSTICK_SYS_PAM;
+
+		if (!msb_write_regs(msb,
+			offsetof(struct ms_register, param),
+			1,
+			(unsigned char *)&msb->regs.param))
+			return 0;
+
+		msb->state = MSB_PS_SWICH_HOST;
+		return 0;
+
+	case MSB_PS_SWICH_HOST:
+		 /* Set parallel interface on our side + send a dummy request
+			to see if card responds */
+		host->set_param(host, MEMSTICK_INTERFACE, MEMSTICK_PAR4);
+		memstick_init_req(mrq, MS_TPC_GET_INT, NULL, 1);
+		msb->state = MSB_PS_CONFIRM;
+		return 0;
+
+	case MSB_PS_CONFIRM:
+		return msb_exit_state_machine(msb, 0);
+	}
+
+	BUG();
+}
+
+static int msb_switch_to_parallel(struct msb_data *msb);
+
+/* Reset the card, to guard against hw errors beeing treated as bad blocks */
+static int msb_reset(struct msb_data *msb, bool full)
+{
+
+	bool was_parallel = msb->regs.param.system & MEMSTICK_SYS_PAM;
+	struct memstick_dev *card = msb->card;
+	struct memstick_host *host = card->host;
+	int error;
+
+	/* Reset the card */
+	msb->regs.param.system = MEMSTICK_SYS_BAMD;
+
+	if (full) {
+		error =  host->set_param(host,
+					MEMSTICK_POWER, MEMSTICK_POWER_OFF);
+		if (error)
+			goto out_error;
+
+		msb_invalidate_reg_window(msb);
+
+		error = host->set_param(host,
+					MEMSTICK_POWER, MEMSTICK_POWER_ON);
+		if (error)
+			goto out_error;
+
+		error = host->set_param(host,
+					MEMSTICK_INTERFACE, MEMSTICK_SERIAL);
+		if (error) {
+out_error:
+			dbg("Failed to reset the host controller");
+			msb->read_only = true;
+			return -EFAULT;
+		}
+	}
+
+	error = msb_run_state_machine(msb, h_msb_reset);
+	if (error) {
+		dbg("Failed to reset the card");
+		msb->read_only = true;
+		return -ENODEV;
+	}
+
+	/* Set parallel mode */
+	if (was_parallel)
+		msb_switch_to_parallel(msb);
+	return 0;
+}
+
+/* Attempts to switch interface to parallel mode */
+static int msb_switch_to_parallel(struct msb_data *msb)
+{
+	int error;
+
+	error = msb_run_state_machine(msb, h_msb_parallel_switch);
+	if (error) {
+		pr_err("Switch to parallel failed");
+		msb->regs.param.system &= ~MEMSTICK_SYS_PAM;
+		msb_reset(msb, true);
+		return -EFAULT;
+	}
+
+	msb->caps |= MEMSTICK_CAP_AUTO_GET_INT;
+	return 0;
+}
+
+/* Changes overwrite flag on a page */
+static int msb_set_overwrite_flag(struct msb_data *msb,
+						u16 pba, u8 page, u8 flag)
+{
+	if (msb->read_only)
+		return -EROFS;
+
+	msb->regs.param.block_address = cpu_to_be16(pba);
+	msb->regs.param.page_address = page;
+	msb->regs.param.cp = MEMSTICK_CP_OVERWRITE;
+	msb->regs.extra_data.overwrite_flag = flag;
+	msb->command_value = MS_CMD_BLOCK_WRITE;
+	msb->command_need_oob = true;
+
+	dbg_verbose("changing overwrite flag to %02x for sector %d, page %d",
+							flag, pba, page);
+	return msb_run_state_machine(msb, h_msb_send_command);
+}
+
+static int msb_mark_bad(struct msb_data *msb, int pba)
+{
+	pr_notice("marking pba %d as bad", pba);
+	msb_reset(msb, true);
+	return msb_set_overwrite_flag(
+			msb, pba, 0, 0xFF & ~MEMSTICK_OVERWRITE_BKST);
+}
+
+static int msb_mark_page_bad(struct msb_data *msb, int pba, int page)
+{
+	dbg("marking page %d of pba %d as bad", page, pba);
+	msb_reset(msb, true);
+	return msb_set_overwrite_flag(msb,
+		pba, page, ~MEMSTICK_OVERWRITE_PGST0);
+}
+
+/* Erases one physical block */
+static int msb_erase_block(struct msb_data *msb, u16 pba)
+{
+	int error, try;
+	if (msb->read_only)
+		return -EROFS;
+
+	dbg_verbose("erasing pba %d", pba);
+
+	for (try = 1; try < 3; try++) {
+		msb->regs.param.block_address = cpu_to_be16(pba);
+		msb->regs.param.page_address = 0;
+		msb->regs.param.cp = MEMSTICK_CP_BLOCK;
+		msb->command_value = MS_CMD_BLOCK_ERASE;
+		msb->command_need_oob = false;
+
+
+		error = msb_run_state_machine(msb, h_msb_send_command);
+		if (!error || msb_reset(msb, true))
+			break;
+	}
+
+	if (error) {
+		pr_err("erase failed, marking pba %d as bad", pba);
+		msb_mark_bad(msb, pba);
+	}
+
+	dbg_verbose("erase success, marking pba %d as unused", pba);
+	msb_mark_block_unused(msb, pba);
+	__set_bit(pba, msb->erased_blocks_bitmap);
+	return error;
+}
+
+/* Reads one page from device */
+static int msb_read_page(struct msb_data *msb,
+	u16 pba, u8 page, struct ms_extra_data_register *extra,
+					struct scatterlist *sg,  int offset)
+{
+	int try, error;
+
+	if (pba == MS_BLOCK_INVALID) {
+		unsigned long flags;
+		struct sg_mapping_iter miter;
+		size_t len = msb->page_size;
+
+		dbg_verbose("read unmapped sector. returning 0xFF");
+
+		local_irq_save(flags);
+		sg_miter_start(&miter, sg, sg_nents(sg),
+				SG_MITER_ATOMIC | SG_MITER_TO_SG);
+
+		while (sg_miter_next(&miter) && len > 0) {
+
+			int chunklen;
+
+			if (offset && offset >= miter.length) {
+				offset -= miter.length;
+				continue;
+			}
+
+			chunklen = min(miter.length - offset, len);
+			memset(miter.addr + offset, 0xFF, chunklen);
+			len -= chunklen;
+			offset = 0;
+		}
+
+		sg_miter_stop(&miter);
+		local_irq_restore(flags);
+
+		if (offset)
+			return -EFAULT;
+
+		if (extra)
+			memset(extra, 0xFF, sizeof(*extra));
+		return 0;
+	}
+
+	if (pba >= msb->block_count) {
+		pr_err("BUG: attempt to read beyond the end of the card at pba %d", pba);
+		return -EINVAL;
+	}
+
+	for (try = 1; try < 3; try++) {
+		msb->regs.param.block_address = cpu_to_be16(pba);
+		msb->regs.param.page_address = page;
+		msb->regs.param.cp = MEMSTICK_CP_PAGE;
+
+		msb->current_sg = sg;
+		msb->current_sg_offset = offset;
+		error = msb_run_state_machine(msb, h_msb_read_page);
+
+
+		if (error == -EUCLEAN) {
+			pr_notice("correctable error on pba %d, page %d",
+				pba, page);
+			error = 0;
+		}
+
+		if (!error && extra)
+			*extra = msb->regs.extra_data;
+
+		if (!error || msb_reset(msb, true))
+			break;
+
+	}
+
+	/* Mark bad pages */
+	if (error == -EBADMSG) {
+		pr_err("uncorrectable error on read of pba %d, page %d",
+			pba, page);
+
+		if (msb->regs.extra_data.overwrite_flag &
+					MEMSTICK_OVERWRITE_PGST0)
+			msb_mark_page_bad(msb, pba, page);
+		return -EBADMSG;
+	}
+
+	if (error)
+		pr_err("read of pba %d, page %d failed with error %d",
+			pba, page, error);
+	return error;
+}
+
+/* Reads oob of page only */
+static int msb_read_oob(struct msb_data *msb, u16 pba, u16 page,
+	struct ms_extra_data_register *extra)
+{
+	int error;
+
+	BUG_ON(!extra);
+	msb->regs.param.block_address = cpu_to_be16(pba);
+	msb->regs.param.page_address = page;
+	msb->regs.param.cp = MEMSTICK_CP_EXTRA;
+
+	if (pba > msb->block_count) {
+		pr_err("BUG: attempt to read beyond the end of card at pba %d", pba);
+		return -EINVAL;
+	}
+
+	error = msb_run_state_machine(msb, h_msb_read_page);
+	*extra = msb->regs.extra_data;
+
+	if (error == -EUCLEAN) {
+		pr_notice("correctable error on pba %d, page %d",
+			pba, page);
+		return 0;
+	}
+
+	return error;
+}
+
+/* Reads a block and compares it with data contained in scatterlist orig_sg */
+static int msb_verify_block(struct msb_data *msb, u16 pba,
+				struct scatterlist *orig_sg,  int offset)
+{
+	struct scatterlist sg;
+	int page = 0, error;
+
+	sg_init_one(&sg, msb->block_buffer, msb->block_size);
+
+	while (page < msb->pages_in_block) {
+
+		error = msb_read_page(msb, pba, page,
+				NULL, &sg, page * msb->page_size);
+		if (error)
+			return error;
+		page++;
+	}
+
+	if (msb_sg_compare_to_buffer(orig_sg, offset,
+				msb->block_buffer, msb->block_size))
+		return -EIO;
+	return 0;
+}
+
+/* Writes exectly one block + oob */
+static int msb_write_block(struct msb_data *msb,
+			u16 pba, u32 lba, struct scatterlist *sg, int offset)
+{
+	int error, current_try = 1;
+	BUG_ON(sg->length < msb->page_size);
+
+	if (msb->read_only)
+		return -EROFS;
+
+	if (pba == MS_BLOCK_INVALID) {
+		pr_err(
+			"BUG: write: attempt to write MS_BLOCK_INVALID block");
+		return -EINVAL;
+	}
+
+	if (pba >= msb->block_count || lba >= msb->logical_block_count) {
+		pr_err(
+		"BUG: write: attempt to write beyond the end of device");
+		return -EINVAL;
+	}
+
+	if (msb_get_zone_from_lba(lba) != msb_get_zone_from_pba(pba)) {
+		pr_err("BUG: write: lba zone mismatch");
+		return -EINVAL;
+	}
+
+	if (pba == msb->boot_block_locations[0] ||
+		pba == msb->boot_block_locations[1]) {
+		pr_err("BUG: write: attempt to write to boot blocks!");
+		return -EINVAL;
+	}
+
+	while (1) {
+
+		if (msb->read_only)
+			return -EROFS;
+
+		msb->regs.param.cp = MEMSTICK_CP_BLOCK;
+		msb->regs.param.page_address = 0;
+		msb->regs.param.block_address = cpu_to_be16(pba);
+
+		msb->regs.extra_data.management_flag = 0xFF;
+		msb->regs.extra_data.overwrite_flag = 0xF8;
+		msb->regs.extra_data.logical_address = cpu_to_be16(lba);
+
+		msb->current_sg = sg;
+		msb->current_sg_offset = offset;
+		msb->current_page = 0;
+
+		error = msb_run_state_machine(msb, h_msb_write_block);
+
+		/* Sector we just wrote to is assumed erased since its pba
+			was erased. If it wasn't erased, write will succeed
+			and will just clear the bits that were set in the block
+			thus test that what we have written,
+			matches what we expect.
+			We do trust the blocks that we erased */
+		if (!error && (verify_writes ||
+				!test_bit(pba, msb->erased_blocks_bitmap)))
+			error = msb_verify_block(msb, pba, sg, offset);
+
+		if (!error)
+			break;
+
+		if (current_try > 1 || msb_reset(msb, true))
+			break;
+
+		pr_err("write failed, trying to erase the pba %d", pba);
+		error = msb_erase_block(msb, pba);
+		if (error)
+			break;
+
+		current_try++;
+	}
+	return error;
+}
+
+/* Finds a free block for write replacement */
+static u16 msb_get_free_block(struct msb_data *msb, int zone)
+{
+	u16 pos;
+	int pba = zone * MS_BLOCKS_IN_ZONE;
+	int i;
+
+	get_random_bytes(&pos, sizeof(pos));
+
+	if (!msb->free_block_count[zone]) {
+		pr_err("NO free blocks in the zone %d, to use for a write, (media is WORN out) switching to RO mode", zone);
+		msb->read_only = true;
+		return MS_BLOCK_INVALID;
+	}
+
+	pos %= msb->free_block_count[zone];
+
+	dbg_verbose("have %d choices for a free block, selected randomally: %d",
+		msb->free_block_count[zone], pos);
+
+	pba = find_next_zero_bit(msb->used_blocks_bitmap,
+							msb->block_count, pba);
+	for (i = 0; i < pos; ++i)
+		pba = find_next_zero_bit(msb->used_blocks_bitmap,
+						msb->block_count, pba + 1);
+
+	dbg_verbose("result of the free blocks scan: pba %d", pba);
+
+	if (pba == msb->block_count || (msb_get_zone_from_pba(pba)) != zone) {
+		pr_err("BUG: cant get a free block");
+		msb->read_only = true;
+		return MS_BLOCK_INVALID;
+	}
+
+	msb_mark_block_used(msb, pba);
+	return pba;
+}
+
+static int msb_update_block(struct msb_data *msb, u16 lba,
+	struct scatterlist *sg, int offset)
+{
+	u16 pba, new_pba;
+	int error, try;
+
+	pba = msb->lba_to_pba_table[lba];
+	dbg_verbose("start of a block update at lba  %d, pba %d", lba, pba);
+
+	if (pba != MS_BLOCK_INVALID) {
+		dbg_verbose("setting the update flag on the block");
+		msb_set_overwrite_flag(msb, pba, 0,
+				0xFF & ~MEMSTICK_OVERWRITE_UDST);
+	}
+
+	for (try = 0; try < 3; try++) {
+		new_pba = msb_get_free_block(msb,
+			msb_get_zone_from_lba(lba));
+
+		if (new_pba == MS_BLOCK_INVALID) {
+			error = -EIO;
+			goto out;
+		}
+
+		dbg_verbose("block update: writing updated block to the pba %d",
+								new_pba);
+		error = msb_write_block(msb, new_pba, lba, sg, offset);
+		if (error == -EBADMSG) {
+			msb_mark_bad(msb, new_pba);
+			continue;
+		}
+
+		if (error)
+			goto out;
+
+		dbg_verbose("block update: erasing the old block");
+		msb_erase_block(msb, pba);
+		msb->lba_to_pba_table[lba] = new_pba;
+		return 0;
+	}
+out:
+	if (error) {
+		pr_err("block update error after %d tries,  switching to r/o mode", try);
+		msb->read_only = true;
+	}
+	return error;
+}
+
+/* Converts endiannes in the boot block for easy use */
+static void msb_fix_boot_page_endianness(struct ms_boot_page *p)
+{
+	p->header.block_id = be16_to_cpu(p->header.block_id);
+	p->header.format_reserved = be16_to_cpu(p->header.format_reserved);
+	p->entry.disabled_block.start_addr
+		= be32_to_cpu(p->entry.disabled_block.start_addr);
+	p->entry.disabled_block.data_size
+		= be32_to_cpu(p->entry.disabled_block.data_size);
+	p->entry.cis_idi.start_addr
+		= be32_to_cpu(p->entry.cis_idi.start_addr);
+	p->entry.cis_idi.data_size
+		= be32_to_cpu(p->entry.cis_idi.data_size);
+	p->attr.block_size = be16_to_cpu(p->attr.block_size);
+	p->attr.number_of_blocks = be16_to_cpu(p->attr.number_of_blocks);
+	p->attr.number_of_effective_blocks
+		= be16_to_cpu(p->attr.number_of_effective_blocks);
+	p->attr.page_size = be16_to_cpu(p->attr.page_size);
+	p->attr.memory_manufacturer_code
+		= be16_to_cpu(p->attr.memory_manufacturer_code);
+	p->attr.memory_device_code = be16_to_cpu(p->attr.memory_device_code);
+	p->attr.implemented_capacity
+		= be16_to_cpu(p->attr.implemented_capacity);
+	p->attr.controller_number = be16_to_cpu(p->attr.controller_number);
+	p->attr.controller_function = be16_to_cpu(p->attr.controller_function);
+}
+
+static int msb_read_boot_blocks(struct msb_data *msb)
+{
+	int pba = 0;
+	struct scatterlist sg;
+	struct ms_extra_data_register extra;
+	struct ms_boot_page *page;
+
+	msb->boot_block_locations[0] = MS_BLOCK_INVALID;
+	msb->boot_block_locations[1] = MS_BLOCK_INVALID;
+	msb->boot_block_count = 0;
+
+	dbg_verbose("Start of a scan for the boot blocks");
+
+	if (!msb->boot_page) {
+		page = kmalloc(sizeof(struct ms_boot_page)*2, GFP_KERNEL);
+		if (!page)
+			return -ENOMEM;
+
+		msb->boot_page = page;
+	} else
+		page = msb->boot_page;
+
+	msb->block_count = MS_BLOCK_MAX_BOOT_ADDR;
+
+	for (pba = 0; pba < MS_BLOCK_MAX_BOOT_ADDR; pba++) {
+
+		sg_init_one(&sg, page, sizeof(*page));
+		if (msb_read_page(msb, pba, 0, &extra, &sg, 0)) {
+			dbg("boot scan: can't read pba %d", pba);
+			continue;
+		}
+
+		if (extra.management_flag & MEMSTICK_MANAGEMENT_SYSFLG) {
+			dbg("managment flag doesn't indicate boot block %d",
+									pba);
+			continue;
+		}
+
+		if (be16_to_cpu(page->header.block_id) != MS_BLOCK_BOOT_ID) {
+			dbg("the pba at %d doesn' contain boot block ID", pba);
+			continue;
+		}
+
+		msb_fix_boot_page_endianness(page);
+		msb->boot_block_locations[msb->boot_block_count] = pba;
+
+		page++;
+		msb->boot_block_count++;
+
+		if (msb->boot_block_count == 2)
+			break;
+	}
+
+	if (!msb->boot_block_count) {
+		pr_err("media doesn't contain master page, aborting");
+		return -EIO;
+	}
+
+	dbg_verbose("End of scan for boot blocks");
+	return 0;
+}
+
+static int msb_read_bad_block_table(struct msb_data *msb, int block_nr)
+{
+	struct ms_boot_page *boot_block;
+	struct scatterlist sg;
+	u16 *buffer = NULL;
+	int offset = 0;
+	int i, error = 0;
+	int data_size, data_offset, page, page_offset, size_to_read;
+	u16 pba;
+
+	BUG_ON(block_nr > 1);
+	boot_block = &msb->boot_page[block_nr];
+	pba = msb->boot_block_locations[block_nr];
+
+	if (msb->boot_block_locations[block_nr] == MS_BLOCK_INVALID)
+		return -EINVAL;
+
+	data_size = boot_block->entry.disabled_block.data_size;
+	data_offset = sizeof(struct ms_boot_page) +
+			boot_block->entry.disabled_block.start_addr;
+	if (!data_size)
+		return 0;
+
+	page = data_offset / msb->page_size;
+	page_offset = data_offset % msb->page_size;
+	size_to_read =
+		DIV_ROUND_UP(data_size + page_offset, msb->page_size) *
+			msb->page_size;
+
+	dbg("reading bad block of boot block at pba %d, offset %d len %d",
+		pba, data_offset, data_size);
+
+	buffer = kzalloc(size_to_read, GFP_KERNEL);
+	if (!buffer)
+		return -ENOMEM;
+
+	/* Read the buffer */
+	sg_init_one(&sg, buffer, size_to_read);
+
+	while (offset < size_to_read) {
+		error = msb_read_page(msb, pba, page, NULL, &sg, offset);
+		if (error)
+			goto out;
+
+		page++;
+		offset += msb->page_size;
+
+		if (page == msb->pages_in_block) {
+			pr_err(
+			"bad block table extends beyond the boot block");
+			break;
+		}
+	}
+
+	/* Process the bad block table */
+	for (i = page_offset; i < data_size / sizeof(u16); i++) {
+
+		u16 bad_block = be16_to_cpu(buffer[i]);
+
+		if (bad_block >= msb->block_count) {
+			dbg("bad block table contains invalid block %d",
+								bad_block);
+			continue;
+		}
+
+		if (test_bit(bad_block, msb->used_blocks_bitmap))  {
+			dbg("duplicate bad block %d in the table",
+				bad_block);
+			continue;
+		}
+
+		dbg("block %d is marked as factory bad", bad_block);
+		msb_mark_block_used(msb, bad_block);
+	}
+out:
+	kfree(buffer);
+	return error;
+}
+
+static int msb_ftl_initialize(struct msb_data *msb)
+{
+	int i;
+
+	if (msb->ftl_initialized)
+		return 0;
+
+	msb->zone_count = msb->block_count / MS_BLOCKS_IN_ZONE;
+	msb->logical_block_count = msb->zone_count * 496 - 2;
+
+	msb->used_blocks_bitmap = kzalloc(msb->block_count / 8, GFP_KERNEL);
+	msb->erased_blocks_bitmap = kzalloc(msb->block_count / 8, GFP_KERNEL);
+	msb->lba_to_pba_table =
+		kmalloc(msb->logical_block_count * sizeof(u16), GFP_KERNEL);
+
+	if (!msb->used_blocks_bitmap || !msb->lba_to_pba_table ||
+						!msb->erased_blocks_bitmap) {
+		kfree(msb->used_blocks_bitmap);
+		kfree(msb->lba_to_pba_table);
+		kfree(msb->erased_blocks_bitmap);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < msb->zone_count; i++)
+		msb->free_block_count[i] = MS_BLOCKS_IN_ZONE;
+
+	memset(msb->lba_to_pba_table, MS_BLOCK_INVALID,
+			msb->logical_block_count * sizeof(u16));
+
+	dbg("initial FTL tables created. Zone count = %d, Logical block count = %d",
+		msb->zone_count, msb->logical_block_count);
+
+	msb->ftl_initialized = true;
+	return 0;
+}
+
+static int msb_ftl_scan(struct msb_data *msb)
+{
+	u16 pba, lba, other_block;
+	u8 overwrite_flag, managment_flag, other_overwrite_flag;
+	int error;
+	struct ms_extra_data_register extra;
+	u8 *overwrite_flags = kzalloc(msb->block_count, GFP_KERNEL);
+
+	if (!overwrite_flags)
+		return -ENOMEM;
+
+	dbg("Start of media scanning");
+	for (pba = 0; pba < msb->block_count; pba++) {
+
+		if (pba == msb->boot_block_locations[0] ||
+			pba == msb->boot_block_locations[1]) {
+			dbg_verbose("pba %05d -> [boot block]", pba);
+			msb_mark_block_used(msb, pba);
+			continue;
+		}
+
+		if (test_bit(pba, msb->used_blocks_bitmap)) {
+			dbg_verbose("pba %05d -> [factory bad]", pba);
+			continue;
+		}
+
+		memset(&extra, 0, sizeof(extra));
+		error = msb_read_oob(msb, pba, 0, &extra);
+
+		/* can't trust the page if we can't read the oob */
+		if (error == -EBADMSG) {
+			pr_notice(
+			"oob of pba %d damaged, will try to erase it", pba);
+			msb_mark_block_used(msb, pba);
+			msb_erase_block(msb, pba);
+			continue;
+		} else if (error) {
+			pr_err("unknown error %d on read of oob of pba %d - aborting",
+				error, pba);
+
+			kfree(overwrite_flags);
+			return error;
+		}
+
+		lba = be16_to_cpu(extra.logical_address);
+		managment_flag = extra.management_flag;
+		overwrite_flag = extra.overwrite_flag;
+		overwrite_flags[pba] = overwrite_flag;
+
+		/* Skip bad blocks */
+		if (!(overwrite_flag & MEMSTICK_OVERWRITE_BKST)) {
+			dbg("pba %05d -> [BAD]", pba);
+			msb_mark_block_used(msb, pba);
+			continue;
+		}
+
+		/* Skip system/drm blocks */
+		if ((managment_flag & MEMSTICK_MANAGMENT_FLAG_NORMAL) !=
+			MEMSTICK_MANAGMENT_FLAG_NORMAL) {
+			dbg("pba %05d -> [reserved managment flag %02x]",
+							pba, managment_flag);
+			msb_mark_block_used(msb, pba);
+			continue;
+		}
+
+		/* Erase temporary tables */
+		if (!(managment_flag & MEMSTICK_MANAGEMENT_ATFLG)) {
+			dbg("pba %05d -> [temp table] - will erase", pba);
+
+			msb_mark_block_used(msb, pba);
+			msb_erase_block(msb, pba);
+			continue;
+		}
+
+		if (lba == MS_BLOCK_INVALID) {
+			dbg_verbose("pba %05d -> [free]", pba);
+			continue;
+		}
+
+		msb_mark_block_used(msb, pba);
+
+		/* Block has LBA not according to zoning*/
+		if (msb_get_zone_from_lba(lba) != msb_get_zone_from_pba(pba)) {
+			pr_notice("pba %05d -> [bad lba %05d] - will erase",
+								pba, lba);
+			msb_erase_block(msb, pba);
+			continue;
+		}
+
+		/* No collisions - great */
+		if (msb->lba_to_pba_table[lba] == MS_BLOCK_INVALID) {
+			dbg_verbose("pba %05d -> [lba %05d]", pba, lba);
+			msb->lba_to_pba_table[lba] = pba;
+			continue;
+		}
+
+		other_block = msb->lba_to_pba_table[lba];
+		other_overwrite_flag = overwrite_flags[other_block];
+
+		pr_notice("Collision between pba %d and pba %d",
+			pba, other_block);
+
+		if (!(overwrite_flag & MEMSTICK_OVERWRITE_UDST)) {
+			pr_notice("pba %d is marked as stable, use it", pba);
+			msb_erase_block(msb, other_block);
+			msb->lba_to_pba_table[lba] = pba;
+			continue;
+		}
+
+		if (!(other_overwrite_flag & MEMSTICK_OVERWRITE_UDST)) {
+			pr_notice("pba %d is marked as stable, use it",
+								other_block);
+			msb_erase_block(msb, pba);
+			continue;
+		}
+
+		pr_notice("collision between blocks %d and %d, without stable flag set on both, erasing pba %d",
+				pba, other_block, other_block);
+
+		msb_erase_block(msb, other_block);
+		msb->lba_to_pba_table[lba] = pba;
+	}
+
+	dbg("End of media scanning");
+	kfree(overwrite_flags);
+	return 0;
+}
+
+static void msb_cache_flush_timer(unsigned long data)
+{
+	struct msb_data *msb = (struct msb_data *)data;
+	msb->need_flush_cache = true;
+	queue_work(msb->io_queue, &msb->io_work);
+}
+
+
+static void msb_cache_discard(struct msb_data *msb)
+{
+	if (msb->cache_block_lba == MS_BLOCK_INVALID)
+		return;
+
+	del_timer_sync(&msb->cache_flush_timer);
+
+	dbg_verbose("Discarding the write cache");
+	msb->cache_block_lba = MS_BLOCK_INVALID;
+	bitmap_zero(&msb->valid_cache_bitmap, msb->pages_in_block);
+}
+
+static int msb_cache_init(struct msb_data *msb)
+{
+	setup_timer(&msb->cache_flush_timer, msb_cache_flush_timer,
+		(unsigned long)msb);
+
+	if (!msb->cache)
+		msb->cache = kzalloc(msb->block_size, GFP_KERNEL);
+	if (!msb->cache)
+		return -ENOMEM;
+
+	msb_cache_discard(msb);
+	return 0;
+}
+
+static int msb_cache_flush(struct msb_data *msb)
+{
+	struct scatterlist sg;
+	struct ms_extra_data_register extra;
+	int page, offset, error;
+	u16 pba, lba;
+
+	if (msb->read_only)
+		return -EROFS;
+
+	if (msb->cache_block_lba == MS_BLOCK_INVALID)
+		return 0;
+
+	lba = msb->cache_block_lba;
+	pba = msb->lba_to_pba_table[lba];
+
+	dbg_verbose("Flushing the write cache of pba %d (LBA %d)",
+						pba, msb->cache_block_lba);
+
+	sg_init_one(&sg, msb->cache , msb->block_size);
+
+	/* Read all missing pages in cache */
+	for (page = 0; page < msb->pages_in_block; page++) {
+
+		if (test_bit(page, &msb->valid_cache_bitmap))
+			continue;
+
+		offset = page * msb->page_size;
+
+		dbg_verbose("reading non-present sector %d of cache block %d",
+			page, lba);
+		error = msb_read_page(msb, pba, page, &extra, &sg, offset);
+
+		/* Bad pages are copied with 00 page status */
+		if (error == -EBADMSG) {
+			pr_err("read error on sector %d, contents probably damaged", page);
+			continue;
+		}
+
+		if (error)
+			return error;
+
+		if ((extra.overwrite_flag & MEMSTICK_OV_PG_NORMAL) !=
+							MEMSTICK_OV_PG_NORMAL) {
+			dbg("page %d is marked as bad", page);
+			continue;
+		}
+
+		set_bit(page, &msb->valid_cache_bitmap);
+	}
+
+	/* Write the cache now */
+	error = msb_update_block(msb, msb->cache_block_lba, &sg, 0);
+	pba = msb->lba_to_pba_table[msb->cache_block_lba];
+
+	/* Mark invalid pages */
+	if (!error) {
+		for (page = 0; page < msb->pages_in_block; page++) {
+
+			if (test_bit(page, &msb->valid_cache_bitmap))
+				continue;
+
+			dbg("marking page %d as containing damaged data",
+				page);
+			msb_set_overwrite_flag(msb,
+				pba , page, 0xFF & ~MEMSTICK_OV_PG_NORMAL);
+		}
+	}
+
+	msb_cache_discard(msb);
+	return error;
+}
+
+static int msb_cache_write(struct msb_data *msb, int lba,
+	int page, bool add_to_cache_only, struct scatterlist *sg, int offset)
+{
+	int error;
+	struct scatterlist sg_tmp[10];
+
+	if (msb->read_only)
+		return -EROFS;
+
+	if (msb->cache_block_lba == MS_BLOCK_INVALID ||
+						lba != msb->cache_block_lba)
+		if (add_to_cache_only)
+			return 0;
+
+	/* If we need to write different block */
+	if (msb->cache_block_lba != MS_BLOCK_INVALID &&
+						lba != msb->cache_block_lba) {
+		dbg_verbose("first flush the cache");
+		error = msb_cache_flush(msb);
+		if (error)
+			return error;
+	}
+
+	if (msb->cache_block_lba  == MS_BLOCK_INVALID) {
+		msb->cache_block_lba  = lba;
+		mod_timer(&msb->cache_flush_timer,
+			jiffies + msecs_to_jiffies(cache_flush_timeout));
+	}
+
+	dbg_verbose("Write of LBA %d page %d to cache ", lba, page);
+
+	sg_init_table(sg_tmp, ARRAY_SIZE(sg_tmp));
+	msb_sg_copy(sg, sg_tmp, ARRAY_SIZE(sg_tmp), offset, msb->page_size);
+
+	sg_copy_to_buffer(sg_tmp, sg_nents(sg_tmp),
+		msb->cache + page * msb->page_size, msb->page_size);
+
+	set_bit(page, &msb->valid_cache_bitmap);
+	return 0;
+}
+
+static int msb_cache_read(struct msb_data *msb, int lba,
+				int page, struct scatterlist *sg, int offset)
+{
+	int pba = msb->lba_to_pba_table[lba];
+	struct scatterlist sg_tmp[10];
+	int error = 0;
+
+	if (lba == msb->cache_block_lba &&
+			test_bit(page, &msb->valid_cache_bitmap)) {
+
+		dbg_verbose("Read of LBA %d (pba %d) sector %d from cache",
+							lba, pba, page);
+
+		sg_init_table(sg_tmp, ARRAY_SIZE(sg_tmp));
+		msb_sg_copy(sg, sg_tmp, ARRAY_SIZE(sg_tmp),
+			offset, msb->page_size);
+		sg_copy_from_buffer(sg_tmp, sg_nents(sg_tmp),
+			msb->cache + msb->page_size * page,
+							msb->page_size);
+	} else {
+		dbg_verbose("Read of LBA %d (pba %d) sector %d from device",
+							lba, pba, page);
+
+		error = msb_read_page(msb, pba, page, NULL, sg, offset);
+		if (error)
+			return error;
+
+		msb_cache_write(msb, lba, page, true, sg, offset);
+	}
+	return error;
+}
+
+/* Emulated geometry table
+ * This table content isn't that importaint,
+ * One could put here different values, providing that they still
+ * cover whole disk.
+ * 64 MB entry is what windows reports for my 64M memstick */
+
+static const struct chs_entry chs_table[] = {
+/*        size sectors cylynders  heads */
+	{ 4,    16,    247,       2  },
+	{ 8,    16,    495,       2  },
+	{ 16,   16,    495,       4  },
+	{ 32,   16,    991,       4  },
+	{ 64,   16,    991,       8  },
+	{128,   16,    991,       16 },
+	{ 0 }
+};
+
+/* Load information about the card */
+static int msb_init_card(struct memstick_dev *card)
+{
+	struct msb_data *msb = memstick_get_drvdata(card);
+	struct memstick_host *host = card->host;
+	struct ms_boot_page *boot_block;
+	int error = 0, i, raw_size_in_megs;
+
+	msb->caps = 0;
+
+	if (card->id.class >= MEMSTICK_CLASS_ROM &&
+				card->id.class <= MEMSTICK_CLASS_ROM)
+		msb->read_only = true;
+
+	msb->state = -1;
+	error = msb_reset(msb, false);
+	if (error)
+		return error;
+
+	/* Due to a bug in Jmicron driver written by Alex Dubov,
+	 its serial mode barely works,
+	 so we switch to parallel mode right away */
+	if (host->caps & MEMSTICK_CAP_PAR4)
+		msb_switch_to_parallel(msb);
+
+	msb->page_size = sizeof(struct ms_boot_page);
+
+	/* Read the boot page */
+	error = msb_read_boot_blocks(msb);
+	if (error)
+		return -EIO;
+
+	boot_block = &msb->boot_page[0];
+
+	/* Save intersting attributes from boot page */
+	msb->block_count = boot_block->attr.number_of_blocks;
+	msb->page_size = boot_block->attr.page_size;
+
+	msb->pages_in_block = boot_block->attr.block_size * 2;
+	msb->block_size = msb->page_size * msb->pages_in_block;
+
+	if (msb->page_size > PAGE_SIZE) {
+		/* this isn't supported by linux at all, anyway*/
+		dbg("device page %d size isn't supported", msb->page_size);
+		return -EINVAL;
+	}
+
+	msb->block_buffer = kzalloc(msb->block_size, GFP_KERNEL);
+	if (!msb->block_buffer)
+		return -ENOMEM;
+
+	raw_size_in_megs = (msb->block_size * msb->block_count) >> 20;
+
+	for (i = 0; chs_table[i].size; i++) {
+
+		if (chs_table[i].size != raw_size_in_megs)
+			continue;
+
+		msb->geometry.cylinders = chs_table[i].cyl;
+		msb->geometry.heads = chs_table[i].head;
+		msb->geometry.sectors = chs_table[i].sec;
+		break;
+	}
+
+	if (boot_block->attr.transfer_supporting == 1)
+		msb->caps |= MEMSTICK_CAP_PAR4;
+
+	if (boot_block->attr.device_type & 0x03)
+		msb->read_only = true;
+
+	dbg("Total block count = %d", msb->block_count);
+	dbg("Each block consists of %d pages", msb->pages_in_block);
+	dbg("Page size = %d bytes", msb->page_size);
+	dbg("Parallel mode supported: %d", !!(msb->caps & MEMSTICK_CAP_PAR4));
+	dbg("Read only: %d", msb->read_only);
+
+#if 0
+	/* Now we can switch the interface */
+	if (host->caps & msb->caps & MEMSTICK_CAP_PAR4)
+		msb_switch_to_parallel(msb);
+#endif
+
+	error = msb_cache_init(msb);
+	if (error)
+		return error;
+
+	error = msb_ftl_initialize(msb);
+	if (error)
+		return error;
+
+
+	/* Read the bad block table */
+	error = msb_read_bad_block_table(msb, 0);
+
+	if (error && error != -ENOMEM) {
+		dbg("failed to read bad block table from primary boot block, trying from backup");
+		error = msb_read_bad_block_table(msb, 1);
+	}
+
+	if (error)
+		return error;
+
+	/* *drum roll* Scan the media */
+	error = msb_ftl_scan(msb);
+	if (error) {
+		pr_err("Scan of media failed");
+		return error;
+	}
+
+	return 0;
+
+}
+
+static int msb_do_write_request(struct msb_data *msb, int lba,
+	int page, struct scatterlist *sg, size_t len, int *sucessfuly_written)
+{
+	int error = 0;
+	off_t offset = 0;
+	*sucessfuly_written = 0;
+
+	while (offset < len) {
+		if (page == 0 && len - offset >= msb->block_size) {
+
+			if (msb->cache_block_lba == lba)
+				msb_cache_discard(msb);
+
+			dbg_verbose("Writing whole lba %d", lba);
+			error = msb_update_block(msb, lba, sg, offset);
+			if (error)
+				return error;
+
+			offset += msb->block_size;
+			*sucessfuly_written += msb->block_size;
+			lba++;
+			continue;
+		}
+
+		error = msb_cache_write(msb, lba, page, false, sg, offset);
+		if (error)
+			return error;
+
+		offset += msb->page_size;
+		*sucessfuly_written += msb->page_size;
+
+		page++;
+		if (page == msb->pages_in_block) {
+			page = 0;
+			lba++;
+		}
+	}
+	return 0;
+}
+
+static int msb_do_read_request(struct msb_data *msb, int lba,
+		int page, struct scatterlist *sg, int len, int *sucessfuly_read)
+{
+	int error = 0;
+	int offset = 0;
+	*sucessfuly_read = 0;
+
+	while (offset < len) {
+
+		error = msb_cache_read(msb, lba, page, sg, offset);
+		if (error)
+			return error;
+
+		offset += msb->page_size;
+		*sucessfuly_read += msb->page_size;
+
+		page++;
+		if (page == msb->pages_in_block) {
+			page = 0;
+			lba++;
+		}
+	}
+	return 0;
+}
+
+static void msb_io_work(struct work_struct *work)
+{
+	struct msb_data *msb = container_of(work, struct msb_data, io_work);
+	int page, error, len;
+	sector_t lba;
+	unsigned long flags;
+	struct scatterlist *sg = msb->prealloc_sg;
+
+	dbg_verbose("IO: work started");
+
+	while (1) {
+		spin_lock_irqsave(&msb->q_lock, flags);
+
+		if (msb->need_flush_cache) {
+			msb->need_flush_cache = false;
+			spin_unlock_irqrestore(&msb->q_lock, flags);
+			msb_cache_flush(msb);
+			continue;
+		}
+
+		if (!msb->req) {
+			msb->req = blk_fetch_request(msb->queue);
+			if (!msb->req) {
+				dbg_verbose("IO: no more requests exiting");
+				spin_unlock_irqrestore(&msb->q_lock, flags);
+				return;
+			}
+		}
+
+		spin_unlock_irqrestore(&msb->q_lock, flags);
+
+		/* If card was removed meanwhile */
+		if (!msb->req)
+			return;
+
+		/* process the request */
+		dbg_verbose("IO: processing new request");
+		blk_rq_map_sg(msb->queue, msb->req, sg);
+
+		lba = blk_rq_pos(msb->req);
+
+		sector_div(lba, msb->page_size / 512);
+		page = do_div(lba, msb->pages_in_block);
+
+		if (rq_data_dir(msb->req) == READ)
+			error = msb_do_read_request(msb, lba, page, sg,
+				blk_rq_bytes(msb->req), &len);
+		else
+			error = msb_do_write_request(msb, lba, page, sg,
+				blk_rq_bytes(msb->req), &len);
+
+		spin_lock_irqsave(&msb->q_lock, flags);
+
+		if (len)
+			if (!__blk_end_request(msb->req, 0, len))
+				msb->req = NULL;
+
+		if (error && msb->req) {
+			dbg_verbose("IO: ending one sector of the request with error");
+			if (!__blk_end_request(msb->req, error, msb->page_size))
+				msb->req = NULL;
+		}
+
+		if (msb->req)
+			dbg_verbose("IO: request still pending");
+
+		spin_unlock_irqrestore(&msb->q_lock, flags);
+	}
+}
+
+static DEFINE_IDR(msb_disk_idr); /*set of used disk numbers */
+static DEFINE_MUTEX(msb_disk_lock); /* protects against races in open/release */
+
+static int msb_bd_open(struct block_device *bdev, fmode_t mode)
+{
+	struct gendisk *disk = bdev->bd_disk;
+	struct msb_data *msb = disk->private_data;
+
+	dbg_verbose("block device open");
+
+	mutex_lock(&msb_disk_lock);
+
+	if (msb && msb->card)
+		msb->usage_count++;
+
+	mutex_unlock(&msb_disk_lock);
+	return 0;
+}
+
+static void msb_data_clear(struct msb_data *msb)
+{
+	kfree(msb->boot_page);
+	kfree(msb->used_blocks_bitmap);
+	kfree(msb->lba_to_pba_table);
+	kfree(msb->cache);
+	msb->card = NULL;
+}
+
+static int msb_disk_release(struct gendisk *disk)
+{
+	struct msb_data *msb = disk->private_data;
+
+	dbg_verbose("block device release");
+	mutex_lock(&msb_disk_lock);
+
+	if (msb) {
+		if (msb->usage_count)
+			msb->usage_count--;
+
+		if (!msb->usage_count) {
+			disk->private_data = NULL;
+			idr_remove(&msb_disk_idr, msb->disk_id);
+			put_disk(disk);
+			kfree(msb);
+		}
+	}
+	mutex_unlock(&msb_disk_lock);
+	return 0;
+}
+
+static void msb_bd_release(struct gendisk *disk, fmode_t mode)
+{
+	msb_disk_release(disk);
+}
+
+static int msb_bd_getgeo(struct block_device *bdev,
+				 struct hd_geometry *geo)
+{
+	struct msb_data *msb = bdev->bd_disk->private_data;
+	*geo = msb->geometry;
+	return 0;
+}
+
+static int msb_prepare_req(struct request_queue *q, struct request *req)
+{
+	if (req->cmd_type != REQ_TYPE_FS &&
+				req->cmd_type != REQ_TYPE_BLOCK_PC) {
+		blk_dump_rq_flags(req, "MS unsupported request");
+		return BLKPREP_KILL;
+	}
+	req->cmd_flags |= REQ_DONTPREP;
+	return BLKPREP_OK;
+}
+
+static void msb_submit_req(struct request_queue *q)
+{
+	struct memstick_dev *card = q->queuedata;
+	struct msb_data *msb = memstick_get_drvdata(card);
+	struct request *req = NULL;
+
+	dbg_verbose("Submit request");
+
+	if (msb->card_dead) {
+		dbg("Refusing requests on removed card");
+
+		WARN_ON(!msb->io_queue_stopped);
+
+		while ((req = blk_fetch_request(q)) != NULL)
+			__blk_end_request_all(req, -ENODEV);
+		return;
+	}
+
+	if (msb->req)
+		return;
+
+	if (!msb->io_queue_stopped)
+		queue_work(msb->io_queue, &msb->io_work);
+}
+
+static int msb_check_card(struct memstick_dev *card)
+{
+	struct msb_data *msb = memstick_get_drvdata(card);
+	return (msb->card_dead == 0);
+}
+
+static void msb_stop(struct memstick_dev *card)
+{
+	struct msb_data *msb = memstick_get_drvdata(card);
+	unsigned long flags;
+
+	dbg("Stopping all msblock IO");
+
+	spin_lock_irqsave(&msb->q_lock, flags);
+	blk_stop_queue(msb->queue);
+	msb->io_queue_stopped = true;
+	spin_unlock_irqrestore(&msb->q_lock, flags);
+
+	del_timer_sync(&msb->cache_flush_timer);
+	flush_workqueue(msb->io_queue);
+
+	if (msb->req) {
+		spin_lock_irqsave(&msb->q_lock, flags);
+		blk_requeue_request(msb->queue, msb->req);
+		msb->req = NULL;
+		spin_unlock_irqrestore(&msb->q_lock, flags);
+	}
+
+}
+
+static void msb_start(struct memstick_dev *card)
+{
+	struct msb_data *msb = memstick_get_drvdata(card);
+	unsigned long flags;
+
+	dbg("Resuming IO from msblock");
+
+	msb_invalidate_reg_window(msb);
+
+	spin_lock_irqsave(&msb->q_lock, flags);
+	if (!msb->io_queue_stopped || msb->card_dead) {
+		spin_unlock_irqrestore(&msb->q_lock, flags);
+		return;
+	}
+	spin_unlock_irqrestore(&msb->q_lock, flags);
+
+	/* Kick cache flush anyway, its harmless */
+	msb->need_flush_cache = true;
+	msb->io_queue_stopped = false;
+
+	spin_lock_irqsave(&msb->q_lock, flags);
+	blk_start_queue(msb->queue);
+	spin_unlock_irqrestore(&msb->q_lock, flags);
+
+	queue_work(msb->io_queue, &msb->io_work);
+
+}
+
+static const struct block_device_operations msb_bdops = {
+	.open    = msb_bd_open,
+	.release = msb_bd_release,
+	.getgeo  = msb_bd_getgeo,
+	.owner   = THIS_MODULE
+};
+
+/* Registers the block device */
+static int msb_init_disk(struct memstick_dev *card)
+{
+	struct msb_data *msb = memstick_get_drvdata(card);
+	struct memstick_host *host = card->host;
+	int rc;
+	u64 limit = BLK_BOUNCE_HIGH;
+	unsigned long capacity;
+
+	if (host->dev.dma_mask && *(host->dev.dma_mask))
+		limit = *(host->dev.dma_mask);
+
+	mutex_lock(&msb_disk_lock);
+	msb->disk_id = idr_alloc(&msb_disk_idr, card, 0, 256, GFP_KERNEL);
+	mutex_unlock(&msb_disk_lock);
+
+	if (msb->disk_id  < 0)
+		return msb->disk_id;
+
+	msb->disk = alloc_disk(0);
+	if (!msb->disk) {
+		rc = -ENOMEM;
+		goto out_release_id;
+	}
+
+	msb->queue = blk_init_queue(msb_submit_req, &msb->q_lock);
+	if (!msb->queue) {
+		rc = -ENOMEM;
+		goto out_put_disk;
+	}
+
+	msb->queue->queuedata = card;
+	blk_queue_prep_rq(msb->queue, msb_prepare_req);
+
+	blk_queue_bounce_limit(msb->queue, limit);
+	blk_queue_max_hw_sectors(msb->queue, MS_BLOCK_MAX_PAGES);
+	blk_queue_max_segments(msb->queue, MS_BLOCK_MAX_SEGS);
+	blk_queue_max_segment_size(msb->queue,
+				   MS_BLOCK_MAX_PAGES * msb->page_size);
+	blk_queue_logical_block_size(msb->queue, msb->page_size);
+
+	sprintf(msb->disk->disk_name, "msblk%d", msb->disk_id);
+	msb->disk->fops = &msb_bdops;
+	msb->disk->private_data = msb;
+	msb->disk->queue = msb->queue;
+	msb->disk->driverfs_dev = &card->dev;
+	msb->disk->flags |= GENHD_FL_EXT_DEVT;
+
+	capacity = msb->pages_in_block * msb->logical_block_count;
+	capacity *= (msb->page_size / 512);
+	set_capacity(msb->disk, capacity);
+	dbg("Set total disk size to %lu sectors", capacity);
+
+	msb->usage_count = 1;
+	msb->io_queue = alloc_ordered_workqueue("ms_block", WQ_MEM_RECLAIM);
+	INIT_WORK(&msb->io_work, msb_io_work);
+	sg_init_table(msb->prealloc_sg, MS_BLOCK_MAX_SEGS+1);
+
+	if (msb->read_only)
+		set_disk_ro(msb->disk, 1);
+
+	msb_start(card);
+	add_disk(msb->disk);
+	dbg("Disk added");
+	return 0;
+
+out_put_disk:
+	put_disk(msb->disk);
+out_release_id:
+	mutex_lock(&msb_disk_lock);
+	idr_remove(&msb_disk_idr, msb->disk_id);
+	mutex_unlock(&msb_disk_lock);
+	return rc;
+}
+
+static int msb_probe(struct memstick_dev *card)
+{
+	struct msb_data *msb;
+	int rc = 0;
+
+	msb = kzalloc(sizeof(struct msb_data), GFP_KERNEL);
+	if (!msb)
+		return -ENOMEM;
+	memstick_set_drvdata(card, msb);
+	msb->card = card;
+	spin_lock_init(&msb->q_lock);
+
+	rc = msb_init_card(card);
+	if (rc)
+		goto out_free;
+
+	rc = msb_init_disk(card);
+	if (!rc) {
+		card->check = msb_check_card;
+		card->stop = msb_stop;
+		card->start = msb_start;
+		return 0;
+	}
+out_free:
+	memstick_set_drvdata(card, NULL);
+	msb_data_clear(msb);
+	kfree(msb);
+	return rc;
+}
+
+static void msb_remove(struct memstick_dev *card)
+{
+	struct msb_data *msb = memstick_get_drvdata(card);
+	unsigned long flags;
+
+	if (!msb->io_queue_stopped)
+		msb_stop(card);
+
+	dbg("Removing the disk device");
+
+	/* Take care of unhandled + new requests from now on */
+	spin_lock_irqsave(&msb->q_lock, flags);
+	msb->card_dead = true;
+	blk_start_queue(msb->queue);
+	spin_unlock_irqrestore(&msb->q_lock, flags);
+
+	/* Remove the disk */
+	del_gendisk(msb->disk);
+	blk_cleanup_queue(msb->queue);
+	msb->queue = NULL;
+
+	mutex_lock(&msb_disk_lock);
+	msb_data_clear(msb);
+	mutex_unlock(&msb_disk_lock);
+
+	msb_disk_release(msb->disk);
+	memstick_set_drvdata(card, NULL);
+}
+
+#ifdef CONFIG_PM
+
+static int msb_suspend(struct memstick_dev *card, pm_message_t state)
+{
+	msb_stop(card);
+	return 0;
+}
+
+static int msb_resume(struct memstick_dev *card)
+{
+	struct msb_data *msb = memstick_get_drvdata(card);
+	struct msb_data *new_msb = NULL;
+	bool card_dead = true;
+
+#ifndef CONFIG_MEMSTICK_UNSAFE_RESUME
+	msb->card_dead = true;
+	return 0;
+#endif
+	mutex_lock(&card->host->lock);
+
+	new_msb = kzalloc(sizeof(struct msb_data), GFP_KERNEL);
+	if (!new_msb)
+		goto out;
+
+	new_msb->card = card;
+	memstick_set_drvdata(card, new_msb);
+	spin_lock_init(&new_msb->q_lock);
+	sg_init_table(msb->prealloc_sg, MS_BLOCK_MAX_SEGS+1);
+
+	if (msb_init_card(card))
+		goto out;
+
+	if (msb->block_size != new_msb->block_size)
+		goto out;
+
+	if (memcmp(msb->boot_page, new_msb->boot_page,
+					sizeof(struct ms_boot_page)))
+		goto out;
+
+	if (msb->logical_block_count != new_msb->logical_block_count ||
+		memcmp(msb->lba_to_pba_table, new_msb->lba_to_pba_table,
+						msb->logical_block_count))
+		goto out;
+
+	if (msb->block_count != new_msb->block_count ||
+		memcmp(msb->used_blocks_bitmap, new_msb->used_blocks_bitmap,
+							msb->block_count / 8))
+		goto out;
+
+	card_dead = false;
+out:
+	if (card_dead)
+		dbg("Card was removed/replaced during suspend");
+
+	msb->card_dead = card_dead;
+	memstick_set_drvdata(card, msb);
+
+	if (new_msb) {
+		msb_data_clear(new_msb);
+		kfree(new_msb);
+	}
+
+	msb_start(card);
+	mutex_unlock(&card->host->lock);
+	return 0;
+}
+#else
+
+#define msb_suspend NULL
+#define msb_resume NULL
+
+#endif /* CONFIG_PM */
+
+static struct memstick_device_id msb_id_tbl[] = {
+	{MEMSTICK_MATCH_ALL, MEMSTICK_TYPE_LEGACY, MEMSTICK_CATEGORY_STORAGE,
+	 MEMSTICK_CLASS_FLASH},
+
+	{MEMSTICK_MATCH_ALL, MEMSTICK_TYPE_LEGACY, MEMSTICK_CATEGORY_STORAGE,
+	 MEMSTICK_CLASS_ROM},
+
+	{MEMSTICK_MATCH_ALL, MEMSTICK_TYPE_LEGACY, MEMSTICK_CATEGORY_STORAGE,
+	 MEMSTICK_CLASS_RO},
+
+	{MEMSTICK_MATCH_ALL, MEMSTICK_TYPE_LEGACY, MEMSTICK_CATEGORY_STORAGE,
+	 MEMSTICK_CLASS_WP},
+
+	{MEMSTICK_MATCH_ALL, MEMSTICK_TYPE_DUO, MEMSTICK_CATEGORY_STORAGE_DUO,
+	 MEMSTICK_CLASS_DUO},
+	{}
+};
+MODULE_DEVICE_TABLE(memstick, msb_id_tbl);
+
+
+static struct memstick_driver msb_driver = {
+	.driver = {
+		.name  = DRIVER_NAME,
+		.owner = THIS_MODULE
+	},
+	.id_table = msb_id_tbl,
+	.probe    = msb_probe,
+	.remove   = msb_remove,
+	.suspend  = msb_suspend,
+	.resume   = msb_resume
+};
+
+static int major;
+
+static int __init msb_init(void)
+{
+	int rc = register_blkdev(0, DRIVER_NAME);
+
+	if (rc < 0) {
+		pr_err("failed to register major (error %d)\n", rc);
+		return rc;
+	}
+
+	major = rc;
+	rc = memstick_register_driver(&msb_driver);
+	if (rc) {
+		unregister_blkdev(major, DRIVER_NAME);
+		pr_err("failed to register memstick driver (error %d)\n", rc);
+	}
+
+	return rc;
+}
+
+static void __exit msb_exit(void)
+{
+	memstick_unregister_driver(&msb_driver);
+	unregister_blkdev(major, DRIVER_NAME);
+	idr_destroy(&msb_disk_idr);
+}
+
+module_init(msb_init);
+module_exit(msb_exit);
+
+module_param(cache_flush_timeout, int, S_IRUGO);
+MODULE_PARM_DESC(cache_flush_timeout,
+				"Cache flush timeout in msec (1000 default)");
+module_param(debug, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(debug, "Debug level (0-2)");
+
+module_param(verify_writes, bool, S_IRUGO);
+MODULE_PARM_DESC(verify_writes, "Read back and check all data that is written");
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Maxim Levitsky");
+MODULE_DESCRIPTION("Sony MemoryStick block device driver");
diff --git a/drivers/memstick/core/ms_block.h b/drivers/memstick/core/ms_block.h
new file mode 100644
index 000000000000..96e637550988
--- /dev/null
+++ b/drivers/memstick/core/ms_block.h
@@ -0,0 +1,290 @@
+/*
+ *  ms_block.h - Sony MemoryStick (legacy) storage support
+
+ *  Copyright (C) 2013 Maxim Levitsky <maximlevitsky@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Minor portions of the driver are copied from mspro_block.c which is
+ * Copyright (C) 2007 Alex Dubov <oakad@yahoo.com>
+ *
+ * Also ms structures were copied from old broken driver by same author
+ * These probably come from MS spec
+ *
+ */
+
+#ifndef MS_BLOCK_NEW_H
+#define MS_BLOCK_NEW_H
+
+#define MS_BLOCK_MAX_SEGS      32
+#define MS_BLOCK_MAX_PAGES     ((2 << 16) - 1)
+
+#define MS_BLOCK_MAX_BOOT_ADDR 0x000c
+#define MS_BLOCK_BOOT_ID       0x0001
+#define MS_BLOCK_INVALID       0xffff
+#define MS_MAX_ZONES           16
+#define MS_BLOCKS_IN_ZONE      512
+
+#define MS_BLOCK_MAP_LINE_SZ   16
+#define MS_BLOCK_PART_SHIFT    3
+
+
+#define MEMSTICK_UNCORR_ERROR (MEMSTICK_STATUS1_UCFG | \
+		MEMSTICK_STATUS1_UCEX | MEMSTICK_STATUS1_UCDT)
+
+#define MEMSTICK_CORR_ERROR (MEMSTICK_STATUS1_FGER | MEMSTICK_STATUS1_EXER | \
+	MEMSTICK_STATUS1_DTER)
+
+#define MEMSTICK_INT_ERROR (MEMSTICK_INT_CMDNAK | MEMSTICK_INT_ERR)
+
+#define MEMSTICK_OVERWRITE_FLAG_NORMAL \
+	(MEMSTICK_OVERWRITE_PGST1 | \
+	MEMSTICK_OVERWRITE_PGST0  | \
+	MEMSTICK_OVERWRITE_BKST)
+
+#define MEMSTICK_OV_PG_NORMAL \
+	(MEMSTICK_OVERWRITE_PGST1 | MEMSTICK_OVERWRITE_PGST0)
+
+#define MEMSTICK_MANAGMENT_FLAG_NORMAL \
+	(MEMSTICK_MANAGEMENT_SYSFLG |  \
+	MEMSTICK_MANAGEMENT_SCMS1   |  \
+	MEMSTICK_MANAGEMENT_SCMS0)     \
+
+struct ms_boot_header {
+	unsigned short block_id;
+	unsigned short format_reserved;
+	unsigned char  reserved0[184];
+	unsigned char  data_entry;
+	unsigned char  reserved1[179];
+} __packed;
+
+
+struct ms_system_item {
+	unsigned int  start_addr;
+	unsigned int  data_size;
+	unsigned char data_type_id;
+	unsigned char reserved[3];
+} __packed;
+
+struct ms_system_entry {
+	struct ms_system_item disabled_block;
+	struct ms_system_item cis_idi;
+	unsigned char         reserved[24];
+} __packed;
+
+struct ms_boot_attr_info {
+	unsigned char      memorystick_class;
+	unsigned char      format_unique_value1;
+	unsigned short     block_size;
+	unsigned short     number_of_blocks;
+	unsigned short     number_of_effective_blocks;
+	unsigned short     page_size;
+	unsigned char      extra_data_size;
+	unsigned char      format_unique_value2;
+	unsigned char      assembly_time[8];
+	unsigned char      format_unique_value3;
+	unsigned char      serial_number[3];
+	unsigned char      assembly_manufacturer_code;
+	unsigned char      assembly_model_code[3];
+	unsigned short     memory_manufacturer_code;
+	unsigned short     memory_device_code;
+	unsigned short     implemented_capacity;
+	unsigned char      format_unique_value4[2];
+	unsigned char      vcc;
+	unsigned char      vpp;
+	unsigned short     controller_number;
+	unsigned short     controller_function;
+	unsigned char      reserved0[9];
+	unsigned char      transfer_supporting;
+	unsigned short     format_unique_value5;
+	unsigned char      format_type;
+	unsigned char      memorystick_application;
+	unsigned char      device_type;
+	unsigned char      reserved1[22];
+	unsigned char      format_uniqure_value6[2];
+	unsigned char      reserved2[15];
+} __packed;
+
+struct ms_cis_idi {
+	unsigned short general_config;
+	unsigned short logical_cylinders;
+	unsigned short reserved0;
+	unsigned short logical_heads;
+	unsigned short track_size;
+	unsigned short page_size;
+	unsigned short pages_per_track;
+	unsigned short msw;
+	unsigned short lsw;
+	unsigned short reserved1;
+	unsigned char  serial_number[20];
+	unsigned short buffer_type;
+	unsigned short buffer_size_increments;
+	unsigned short long_command_ecc;
+	unsigned char  firmware_version[28];
+	unsigned char  model_name[18];
+	unsigned short reserved2[5];
+	unsigned short pio_mode_number;
+	unsigned short dma_mode_number;
+	unsigned short field_validity;
+	unsigned short current_logical_cylinders;
+	unsigned short current_logical_heads;
+	unsigned short current_pages_per_track;
+	unsigned int   current_page_capacity;
+	unsigned short mutiple_page_setting;
+	unsigned int   addressable_pages;
+	unsigned short single_word_dma;
+	unsigned short multi_word_dma;
+	unsigned char  reserved3[128];
+} __packed;
+
+
+struct ms_boot_page {
+	struct ms_boot_header    header;
+	struct ms_system_entry   entry;
+	struct ms_boot_attr_info attr;
+} __packed;
+
+struct msb_data {
+	unsigned int			usage_count;
+	struct memstick_dev		*card;
+	struct gendisk			*disk;
+	struct request_queue		*queue;
+	spinlock_t			q_lock;
+	struct hd_geometry		geometry;
+	struct attribute_group		attr_group;
+	struct request			*req;
+	int				caps;
+	int				disk_id;
+
+	/* IO */
+	struct workqueue_struct		*io_queue;
+	bool				io_queue_stopped;
+	struct work_struct		io_work;
+	bool				card_dead;
+
+	/* Media properties */
+	struct ms_boot_page		*boot_page;
+	u16				boot_block_locations[2];
+	int				boot_block_count;
+
+	bool				read_only;
+	unsigned short			page_size;
+	int				block_size;
+	int				pages_in_block;
+	int				zone_count;
+	int				block_count;
+	int				logical_block_count;
+
+	/* FTL tables */
+	unsigned long			*used_blocks_bitmap;
+	unsigned long			*erased_blocks_bitmap;
+	u16				*lba_to_pba_table;
+	int				free_block_count[MS_MAX_ZONES];
+	bool				ftl_initialized;
+
+	/* Cache */
+	unsigned char			*cache;
+	unsigned long			valid_cache_bitmap;
+	int				cache_block_lba;
+	bool				need_flush_cache;
+	struct timer_list		cache_flush_timer;
+
+	/* Preallocated buffers */
+	unsigned char			*block_buffer;
+	struct scatterlist		prealloc_sg[MS_BLOCK_MAX_SEGS+1];
+
+
+	/* handler's local data */
+	struct ms_register_addr		reg_addr;
+	bool				addr_valid;
+
+	u8				command_value;
+	bool				command_need_oob;
+	struct scatterlist		*current_sg;
+	int				current_sg_offset;
+
+	struct ms_register		regs;
+	int				current_page;
+
+	int				state;
+	int				exit_error;
+	bool				int_polling;
+	unsigned long			int_timeout;
+
+};
+
+enum msb_readpage_states {
+	MSB_RP_SEND_BLOCK_ADDRESS = 0,
+	MSB_RP_SEND_READ_COMMAND,
+
+	MSB_RP_SEND_INT_REQ,
+	MSB_RP_RECEIVE_INT_REQ_RESULT,
+
+	MSB_RP_SEND_READ_STATUS_REG,
+	MSB_RP_RECIVE_STATUS_REG,
+
+	MSB_RP_SEND_OOB_READ,
+	MSB_RP_RECEIVE_OOB_READ,
+
+	MSB_RP_SEND_READ_DATA,
+	MSB_RP_RECEIVE_READ_DATA,
+};
+
+enum msb_write_block_states {
+	MSB_WB_SEND_WRITE_PARAMS = 0,
+	MSB_WB_SEND_WRITE_OOB,
+	MSB_WB_SEND_WRITE_COMMAND,
+
+	MSB_WB_SEND_INT_REQ,
+	MSB_WB_RECEIVE_INT_REQ,
+
+	MSB_WB_SEND_WRITE_DATA,
+	MSB_WB_RECEIVE_WRITE_CONFIRMATION,
+};
+
+enum msb_send_command_states {
+	MSB_SC_SEND_WRITE_PARAMS,
+	MSB_SC_SEND_WRITE_OOB,
+	MSB_SC_SEND_COMMAND,
+
+	MSB_SC_SEND_INT_REQ,
+	MSB_SC_RECEIVE_INT_REQ,
+
+};
+
+enum msb_reset_states {
+	MSB_RS_SEND,
+	MSB_RS_CONFIRM,
+};
+
+enum msb_par_switch_states {
+	MSB_PS_SEND_SWITCH_COMMAND,
+	MSB_PS_SWICH_HOST,
+	MSB_PS_CONFIRM,
+};
+
+struct chs_entry {
+	unsigned long size;
+	unsigned char sec;
+	unsigned short cyl;
+	unsigned char head;
+};
+
+static int msb_reset(struct msb_data *msb, bool full);
+
+static int h_msb_default_bad(struct memstick_dev *card,
+						struct memstick_request **mrq);
+
+#define __dbg(level, format, ...) \
+	do { \
+		if (debug >= level) \
+			pr_err(format "\n", ## __VA_ARGS__); \
+	} while (0)
+
+
+#define dbg(format, ...)		__dbg(1, format, ## __VA_ARGS__)
+#define dbg_verbose(format, ...)	__dbg(2, format, ## __VA_ARGS__)
+
+#endif
diff --git a/drivers/mtd/chips/gen_probe.c b/drivers/mtd/chips/gen_probe.c
index 74dbb6bcf488..ffb36ba8a6e0 100644
--- a/drivers/mtd/chips/gen_probe.c
+++ b/drivers/mtd/chips/gen_probe.c
@@ -211,9 +211,7 @@ static inline struct mtd_info *cfi_cmdset_unknown(struct map_info *map,
 
 	probe_function = __symbol_get(probename);
 	if (!probe_function) {
-		char modname[sizeof("cfi_cmdset_%4.4X")];
-		sprintf(modname, "cfi_cmdset_%4.4X", type);
-		request_module(modname);
+		request_module("cfi_cmdset_%4.4X", type);
 		probe_function = __symbol_get(probename);
 	}
 
diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c
index 35853b43d66e..741f11edf341 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_main.c
+++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c
@@ -102,6 +102,19 @@ static int ehea_probe_adapter(struct platform_device *dev);
 
 static int ehea_remove(struct platform_device *dev);
 
+static struct of_device_id ehea_module_device_table[] = {
+	{
+		.name = "lhea",
+		.compatible = "IBM,lhea",
+	},
+	{
+		.type = "network",
+		.compatible = "IBM,lhea-ethernet",
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, ehea_module_device_table);
+
 static struct of_device_id ehea_device_table[] = {
 	{
 		.name = "lhea",
@@ -109,7 +122,6 @@ static struct of_device_id ehea_device_table[] = {
 	},
 	{},
 };
-MODULE_DEVICE_TABLE(of, ehea_device_table);
 
 static struct platform_driver ehea_driver = {
 	.driver = {
diff --git a/drivers/net/irda/donauboe.c b/drivers/net/irda/donauboe.c
index 510b9c8d23a9..c6bfc4ad8b50 100644
--- a/drivers/net/irda/donauboe.c
+++ b/drivers/net/irda/donauboe.c
@@ -1755,17 +1755,4 @@ static struct pci_driver donauboe_pci_driver = {
 	.resume		= toshoboe_wakeup 
 };
 
-static int __init
-donauboe_init (void)
-{
-  return pci_register_driver(&donauboe_pci_driver);
-}
-
-static void __exit
-donauboe_cleanup (void)
-{
-  pci_unregister_driver(&donauboe_pci_driver);
-}
-
-module_init(donauboe_init);
-module_exit(donauboe_cleanup);
+module_pci_driver(donauboe_pci_driver);
diff --git a/drivers/pcmcia/pd6729.c b/drivers/pcmcia/pd6729.c
index a4c16ee5c718..622dd6fe7347 100644
--- a/drivers/pcmcia/pd6729.c
+++ b/drivers/pcmcia/pd6729.c
@@ -777,15 +777,4 @@ static struct pci_driver pd6729_pci_driver = {
 	.remove		= pd6729_pci_remove,
 };
 
-static int pd6729_module_init(void)
-{
-	return pci_register_driver(&pd6729_pci_driver);
-}
-
-static void pd6729_module_exit(void)
-{
-	pci_unregister_driver(&pd6729_pci_driver);
-}
-
-module_init(pd6729_module_init);
-module_exit(pd6729_module_exit);
+module_pci_driver(pd6729_pci_driver);
diff --git a/drivers/pcmcia/yenta_socket.c b/drivers/pcmcia/yenta_socket.c
index 6b4ff099fb13..dc18a3a5e010 100644
--- a/drivers/pcmcia/yenta_socket.c
+++ b/drivers/pcmcia/yenta_socket.c
@@ -1439,20 +1439,6 @@ static struct pci_driver yenta_cardbus_driver = {
 	.driver.pm	= YENTA_PM_OPS,
 };
 
-
-static int __init yenta_socket_init(void)
-{
-	return pci_register_driver(&yenta_cardbus_driver);
-}
-
-
-static void __exit yenta_socket_exit(void)
-{
-	pci_unregister_driver(&yenta_cardbus_driver);
-}
-
-
-module_init(yenta_socket_init);
-module_exit(yenta_socket_exit);
+module_pci_driver(yenta_cardbus_driver);
 
 MODULE_LICENSE("GPL");
diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c
index f4f30af2df68..2e8a20cac588 100644
--- a/drivers/rapidio/rio.c
+++ b/drivers/rapidio/rio.c
@@ -1715,11 +1715,13 @@ int rio_unregister_scan(int mport_id, struct rio_scan *scan_ops)
 		    (mport_id == RIO_MPORT_ANY && port->nscan == scan_ops))
 			port->nscan = NULL;
 
-	list_for_each_entry(scan, &rio_scans, node)
+	list_for_each_entry(scan, &rio_scans, node) {
 		if (scan->mport_id == mport_id) {
 			list_del(&scan->node);
 			kfree(scan);
+			break;
 		}
+	}
 
 	mutex_unlock(&rio_mport_list_lock);
 
diff --git a/drivers/rtc/rtc-hid-sensor-time.c b/drivers/rtc/rtc-hid-sensor-time.c
index 7273b0139e5c..b5a2874b15ef 100644
--- a/drivers/rtc/rtc-hid-sensor-time.c
+++ b/drivers/rtc/rtc-hid-sensor-time.c
@@ -23,10 +23,6 @@
 #include <linux/iio/iio.h>
 #include <linux/rtc.h>
 
-/* Format: HID-SENSOR-usage_id_in_hex */
-/* Usage ID from spec for Time: 0x2000A0 */
-#define DRIVER_NAME "HID-SENSOR-2000a0" /* must be lowercase */
-
 enum hid_time_channel {
 	CHANNEL_SCAN_INDEX_YEAR,
 	CHANNEL_SCAN_INDEX_MONTH,
@@ -300,9 +296,19 @@ static int hid_time_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static struct platform_device_id hid_time_ids[] = {
+	{
+		/* Format: HID-SENSOR-usage_id_in_hex_lowercase */
+		.name = "HID-SENSOR-2000a0",
+	},
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(platform, hid_time_ids);
+
 static struct platform_driver hid_time_platform_driver = {
+	.id_table = hid_time_ids,
 	.driver = {
-		.name	= DRIVER_NAME,
+		.name	= KBUILD_MODNAME,
 		.owner	= THIS_MODULE,
 	},
 	.probe		= hid_time_probe,
diff --git a/drivers/scsi/a100u2w.c b/drivers/scsi/a100u2w.c
index 0163457c12bb..db3710ff1b68 100644
--- a/drivers/scsi/a100u2w.c
+++ b/drivers/scsi/a100u2w.c
@@ -1227,19 +1227,9 @@ static struct pci_driver inia100_pci_driver = {
 	.remove		= inia100_remove_one,
 };
 
-static int __init inia100_init(void)
-{
-	return pci_register_driver(&inia100_pci_driver);
-}
-
-static void __exit inia100_exit(void)
-{
-	pci_unregister_driver(&inia100_pci_driver);
-}
+module_pci_driver(inia100_pci_driver);
 
 MODULE_DESCRIPTION("Initio A100U2W SCSI driver");
 MODULE_AUTHOR("Initio Corporation");
 MODULE_LICENSE("Dual BSD/GPL");
 
-module_init(inia100_init);
-module_exit(inia100_exit);
diff --git a/drivers/scsi/dc395x.c b/drivers/scsi/dc395x.c
index 694e13c45dfd..e73445bd53a8 100644
--- a/drivers/scsi/dc395x.c
+++ b/drivers/scsi/dc395x.c
@@ -4882,29 +4882,7 @@ static struct pci_driver dc395x_driver = {
 	.remove         = dc395x_remove_one,
 };
 
-
-/**
- * dc395x_module_init - Module initialization function
- *
- * Used by both module and built-in driver to initialise this driver.
- **/
-static int __init dc395x_module_init(void)
-{
-	return pci_register_driver(&dc395x_driver);
-}
-
-
-/**
- * dc395x_module_exit - Module cleanup function.
- **/
-static void __exit dc395x_module_exit(void)
-{
-	pci_unregister_driver(&dc395x_driver);
-}
-
-
-module_init(dc395x_module_init);
-module_exit(dc395x_module_exit);
+module_pci_driver(dc395x_driver);
 
 MODULE_AUTHOR("C.L. Huang / Erich Chen / Kurt Garloff");
 MODULE_DESCRIPTION("SCSI host adapter driver for Tekram TRM-S1040 based adapters: Tekram DC395 and DC315 series");
diff --git a/drivers/scsi/dmx3191d.c b/drivers/scsi/dmx3191d.c
index 4b0dd8c56707..bb28062a3237 100644
--- a/drivers/scsi/dmx3191d.c
+++ b/drivers/scsi/dmx3191d.c
@@ -153,18 +153,7 @@ static struct pci_driver dmx3191d_pci_driver = {
 	.remove		= dmx3191d_remove_one,
 };
 
-static int __init dmx3191d_init(void)
-{
-	return pci_register_driver(&dmx3191d_pci_driver);
-}
-
-static void __exit dmx3191d_exit(void)
-{
-	pci_unregister_driver(&dmx3191d_pci_driver);
-}
-
-module_init(dmx3191d_init);
-module_exit(dmx3191d_exit);
+module_pci_driver(dmx3191d_pci_driver);
 
 MODULE_AUTHOR("Massimo Piccioni <dafastidio@libero.it>");
 MODULE_DESCRIPTION("Domex DMX3191D SCSI driver");
diff --git a/drivers/scsi/initio.c b/drivers/scsi/initio.c
index 280d5af113d1..1befc26fd95e 100644
--- a/drivers/scsi/initio.c
+++ b/drivers/scsi/initio.c
@@ -2995,19 +2995,8 @@ static struct pci_driver initio_pci_driver = {
 	.remove		= initio_remove_one,
 };
 
-static int __init initio_init_driver(void)
-{
-	return pci_register_driver(&initio_pci_driver);
-}
-
-static void __exit initio_exit_driver(void)
-{
-	pci_unregister_driver(&initio_pci_driver);
-}
+module_pci_driver(initio_pci_driver);
 
 MODULE_DESCRIPTION("Initio INI-9X00U/UW SCSI device driver");
 MODULE_AUTHOR("Initio Corporation");
 MODULE_LICENSE("GPL");
-
-module_init(initio_init_driver);
-module_exit(initio_exit_driver);
diff --git a/drivers/scsi/mvumi.c b/drivers/scsi/mvumi.c
index c3601b57a80c..d0b6a0320d94 100644
--- a/drivers/scsi/mvumi.c
+++ b/drivers/scsi/mvumi.c
@@ -2735,22 +2735,4 @@ static struct pci_driver mvumi_pci_driver = {
 #endif
 };
 
-/**
- * mvumi_init - Driver load entry point
- */
-static int __init mvumi_init(void)
-{
-	return pci_register_driver(&mvumi_pci_driver);
-}
-
-/**
- * mvumi_exit - Driver unload entry point
- */
-static void __exit mvumi_exit(void)
-{
-
-	pci_unregister_driver(&mvumi_pci_driver);
-}
-
-module_init(mvumi_init);
-module_exit(mvumi_exit);
+module_pci_driver(mvumi_pci_driver);
diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h
index 042a2bc432be..2af15d41e77a 100644
--- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h
@@ -63,9 +63,9 @@
 #if BITS_PER_LONG == 32
 /* limit to lowmem on 32-bit systems */
 #define NUM_CACHEPAGES \
-	min(num_physpages, 1UL << (30 - PAGE_CACHE_SHIFT) * 3 / 4)
+	min(totalram_pages, 1UL << (30 - PAGE_CACHE_SHIFT) * 3 / 4)
 #else
-#define NUM_CACHEPAGES num_physpages
+#define NUM_CACHEPAGES totalram_pages
 #endif
 
 /*
@@ -79,42 +79,4 @@
 	do { __oldfs = get_fs(); set_fs(get_ds());} while(0)
 #define MMSPACE_CLOSE	       set_fs(__oldfs)
 
-/*
- * Shrinker
- */
-
-# define SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)  \
-		       struct shrinker *shrinker, \
-		       struct shrink_control *sc
-# define shrink_param(sc, var) ((sc)->var)
-
-typedef int (*shrinker_t)(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask));
-
-static inline
-struct shrinker *set_shrinker(int seek, shrinker_t func)
-{
-	struct shrinker *s;
-
-	s = kmalloc(sizeof(*s), GFP_KERNEL);
-	if (s == NULL)
-		return (NULL);
-
-	s->shrink = func;
-	s->seeks = seek;
-
-	register_shrinker(s);
-
-	return s;
-}
-
-static inline
-void remove_shrinker(struct shrinker *shrinker)
-{
-	if (shrinker == NULL)
-		return;
-
-	unregister_shrinker(shrinker);
-	kfree(shrinker);
-}
-
 #endif /* __LINUX_CFS_MEM_H__ */
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c
index 42df53072dc3..aace5342b76a 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c
@@ -339,8 +339,8 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
 	cli->cl_avail_grant = 0;
 	/* FIXME: Should limit this for the sum of all cl_dirty_max. */
 	cli->cl_dirty_max = OSC_MAX_DIRTY_DEFAULT * 1024 * 1024;
-	if (cli->cl_dirty_max >> PAGE_CACHE_SHIFT > num_physpages / 8)
-		cli->cl_dirty_max = num_physpages << (PAGE_CACHE_SHIFT - 3);
+	if (cli->cl_dirty_max >> PAGE_CACHE_SHIFT > totalram_pages / 8)
+		cli->cl_dirty_max = totalram_pages << (PAGE_CACHE_SHIFT - 3);
 	INIT_LIST_HEAD(&cli->cl_cache_waiters);
 	INIT_LIST_HEAD(&cli->cl_loi_ready_list);
 	INIT_LIST_HEAD(&cli->cl_loi_hp_ready_list);
@@ -388,11 +388,11 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
 
 	if (!strcmp(name, LUSTRE_MDC_NAME)) {
 		cli->cl_max_rpcs_in_flight = MDC_MAX_RIF_DEFAULT;
-	} else if (num_physpages >> (20 - PAGE_CACHE_SHIFT) <= 128 /* MB */) {
+	} else if (totalram_pages >> (20 - PAGE_CACHE_SHIFT) <= 128 /* MB */) {
 		cli->cl_max_rpcs_in_flight = 2;
-	} else if (num_physpages >> (20 - PAGE_CACHE_SHIFT) <= 256 /* MB */) {
+	} else if (totalram_pages >> (20 - PAGE_CACHE_SHIFT) <= 256 /* MB */) {
 		cli->cl_max_rpcs_in_flight = 3;
-	} else if (num_physpages >> (20 - PAGE_CACHE_SHIFT) <= 512 /* MB */) {
+	} else if (totalram_pages >> (20 - PAGE_CACHE_SHIFT) <= 512 /* MB */) {
 		cli->cl_max_rpcs_in_flight = 4;
 	} else {
 		if (osc_on_mdt(obddev->obd_name))
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
index b3b60288e5f5..4c41e02604f5 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
@@ -524,7 +524,7 @@ static int ldlm_cli_pool_shrink(struct ldlm_pool *pl,
 				int nr, unsigned int gfp_mask)
 {
 	struct ldlm_namespace *ns;
-	int canceled = 0, unused;
+	int unused;
 
 	ns = ldlm_pl2ns(pl);
 
@@ -543,14 +543,10 @@ static int ldlm_cli_pool_shrink(struct ldlm_pool *pl,
 	unused = ns->ns_nr_unused;
 	spin_unlock(&ns->ns_lock);
 
-	if (nr) {
-		canceled = ldlm_cancel_lru(ns, nr, LCF_ASYNC,
-					   LDLM_CANCEL_SHRINK);
-	}
-	/*
-	 * Return the number of potentially reclaimable locks.
-	 */
-	return ((unused - canceled) / 100) * sysctl_vfs_cache_pressure;
+	if (nr == 0)
+		return (unused / 100) * sysctl_vfs_cache_pressure;
+	else
+		return ldlm_cancel_lru(ns, nr, LCF_ASYNC, LDLM_CANCEL_SHRINK);
 }
 
 struct ldlm_pool_ops ldlm_srv_pool_ops = {
@@ -605,9 +601,10 @@ int ldlm_pool_recalc(struct ldlm_pool *pl)
 }
 EXPORT_SYMBOL(ldlm_pool_recalc);
 
-/**
+/*
  * Pool shrink wrapper. Will call either client or server pool recalc callback
- * depending what pool \a pl is used.
+ * depending what pool pl is used. When nr == 0, just return the number of
+ * freeable locks. Otherwise, return the number of canceled locks.
  */
 int ldlm_pool_shrink(struct ldlm_pool *pl, int nr,
 		     unsigned int gfp_mask)
@@ -1025,28 +1022,23 @@ static int ldlm_pool_granted(struct ldlm_pool *pl)
 }
 
 static struct ptlrpc_thread *ldlm_pools_thread;
-static struct shrinker *ldlm_pools_srv_shrinker;
-static struct shrinker *ldlm_pools_cli_shrinker;
 static struct completion ldlm_pools_comp;
 
 /*
- * Cancel \a nr locks from all namespaces (if possible). Returns number of
- * cached locks after shrink is finished. All namespaces are asked to
- * cancel approximately equal amount of locks to keep balancing.
+ * count locks from all namespaces (if possible). Returns number of
+ * cached locks.
  */
-static int ldlm_pools_shrink(ldlm_side_t client, int nr,
-			     unsigned int gfp_mask)
+static unsigned long ldlm_pools_count(ldlm_side_t client, unsigned int gfp_mask)
 {
-	int total = 0, cached = 0, nr_ns;
+	unsigned long total = 0, nr_ns;
 	struct ldlm_namespace *ns;
 	void *cookie;
 
-	if (client == LDLM_NAMESPACE_CLIENT && nr != 0 &&
-	    !(gfp_mask & __GFP_FS))
-		return -1;
+	if (client == LDLM_NAMESPACE_CLIENT && !(gfp_mask & __GFP_FS))
+		return 0;
 
-	CDEBUG(D_DLMTRACE, "Request to shrink %d %s locks from all pools\n",
-	       nr, client == LDLM_NAMESPACE_CLIENT ? "client" : "server");
+	CDEBUG(D_DLMTRACE, "Request to count %s locks from all pools\n",
+	       client == LDLM_NAMESPACE_CLIENT ? "client" : "server");
 
 	cookie = cl_env_reenter();
 
@@ -1070,16 +1062,26 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr,
 		ldlm_namespace_put(ns);
 	}
 
-	if (nr == 0 || total == 0) {
-		cl_env_reexit(cookie);
-		return total;
-	}
+	cl_env_reexit(cookie);
+	return total;
+}
+
+static unsigned long ldlm_pools_scan(ldlm_side_t client, int nr, unsigned int gfp_mask)
+{
+	unsigned long freed = 0;
+        int tmp, nr_ns;
+	struct ldlm_namespace *ns;
+	void *cookie;
 
+	if (client == LDLM_NAMESPACE_CLIENT && !(gfp_mask & __GFP_FS))
+		return -1;
+
+	cookie = cl_env_reenter();
 	/*
 	 * Shrink at least ldlm_namespace_nr(client) namespaces.
 	 */
-	for (nr_ns = atomic_read(ldlm_namespace_nr(client));
-	     nr_ns > 0; nr_ns--)
+	for (tmp = nr_ns = atomic_read(ldlm_namespace_nr(client));
+	     tmp > 0; tmp--)
 	{
 		int cancel, nr_locks;
 
@@ -1089,12 +1091,6 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr,
 		mutex_lock(ldlm_namespace_lock(client));
 		if (list_empty(ldlm_namespace_list(client))) {
 			mutex_unlock(ldlm_namespace_lock(client));
-			/*
-			 * If list is empty, we can't return any @cached > 0,
-			 * that probably would cause needless shrinker
-			 * call.
-			 */
-			cached = 0;
 			break;
 		}
 		ns = ldlm_namespace_first_locked(client);
@@ -1103,29 +1099,42 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr,
 		mutex_unlock(ldlm_namespace_lock(client));
 
 		nr_locks = ldlm_pool_granted(&ns->ns_pool);
-		cancel = 1 + nr_locks * nr / total;
-		ldlm_pool_shrink(&ns->ns_pool, cancel, gfp_mask);
-		cached += ldlm_pool_granted(&ns->ns_pool);
+		/*
+		 * We use to shrink propotionally but with new shrinker API,
+		 * we lost the total number of freeable locks.
+		 */
+		cancel = 1 + min_t(int, nr_locks, nr / nr_ns);
+		freed += ldlm_pool_shrink(&ns->ns_pool, cancel, gfp_mask);
 		ldlm_namespace_put(ns);
 	}
 	cl_env_reexit(cookie);
-	/* we only decrease the SLV in server pools shrinker, return -1 to
-	 * kernel to avoid needless loop. LU-1128 */
-	return (client == LDLM_NAMESPACE_SERVER) ? -1 : cached;
+	/*
+	 * we only decrease the SLV in server pools shrinker, return
+	 * SHRINK_STOP to kernel to avoid needless loop. LU-1128
+	 */
+	return (client == LDLM_NAMESPACE_SERVER) ? SHRINK_STOP : freed;
+}
+
+static unsigned long ldlm_pools_srv_count(struct shrinker *s, struct shrink_control *sc)
+{
+	return ldlm_pools_count(LDLM_NAMESPACE_SERVER, sc->gfp_mask);
 }
 
-static int ldlm_pools_srv_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask))
+static unsigned long ldlm_pools_srv_scan(struct shrinker *s, struct shrink_control *sc)
 {
-	return ldlm_pools_shrink(LDLM_NAMESPACE_SERVER,
-				 shrink_param(sc, nr_to_scan),
-				 shrink_param(sc, gfp_mask));
+	return ldlm_pools_scan(LDLM_NAMESPACE_SERVER, sc->nr_to_scan,
+			       sc->gfp_mask);
 }
 
-static int ldlm_pools_cli_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask))
+static unsigned long ldlm_pools_cli_count(struct shrinker *s, struct shrink_control *sc)
 {
-	return ldlm_pools_shrink(LDLM_NAMESPACE_CLIENT,
-				 shrink_param(sc, nr_to_scan),
-				 shrink_param(sc, gfp_mask));
+	return ldlm_pools_count(LDLM_NAMESPACE_CLIENT, sc->gfp_mask);
+}
+
+static unsigned long ldlm_pools_cli_scan(struct shrinker *s, struct shrink_control *sc)
+{
+	return ldlm_pools_scan(LDLM_NAMESPACE_CLIENT, sc->nr_to_scan,
+			       sc->gfp_mask);
 }
 
 void ldlm_pools_recalc(ldlm_side_t client)
@@ -1351,6 +1360,18 @@ static void ldlm_pools_thread_stop(void)
 	EXIT;
 }
 
+static struct shrinker ldlm_pools_srv_shrinker = {
+	.count_objects	= ldlm_pools_srv_count,
+	.scan_objects	= ldlm_pools_srv_scan,
+	.seeks		= DEFAULT_SEEKS,
+};
+
+static struct shrinker ldlm_pools_cli_shrinker = {
+	.count_objects	= ldlm_pools_cli_count,
+	.scan_objects	= ldlm_pools_cli_scan,
+	.seeks		= DEFAULT_SEEKS,
+};
+
 int ldlm_pools_init(void)
 {
 	int rc;
@@ -1358,12 +1379,8 @@ int ldlm_pools_init(void)
 
 	rc = ldlm_pools_thread_start();
 	if (rc == 0) {
-		ldlm_pools_srv_shrinker =
-			set_shrinker(DEFAULT_SEEKS,
-					 ldlm_pools_srv_shrink);
-		ldlm_pools_cli_shrinker =
-			set_shrinker(DEFAULT_SEEKS,
-					 ldlm_pools_cli_shrink);
+		register_shrinker(&ldlm_pools_srv_shrinker);
+		register_shrinker(&ldlm_pools_cli_shrinker);
 	}
 	RETURN(rc);
 }
@@ -1371,14 +1388,8 @@ EXPORT_SYMBOL(ldlm_pools_init);
 
 void ldlm_pools_fini(void)
 {
-	if (ldlm_pools_srv_shrinker != NULL) {
-		remove_shrinker(ldlm_pools_srv_shrinker);
-		ldlm_pools_srv_shrinker = NULL;
-	}
-	if (ldlm_pools_cli_shrinker != NULL) {
-		remove_shrinker(ldlm_pools_cli_shrinker);
-		ldlm_pools_cli_shrinker = NULL;
-	}
+	unregister_shrinker(&ldlm_pools_srv_shrinker);
+	unregister_shrinker(&ldlm_pools_cli_shrinker);
 	ldlm_pools_thread_stop();
 }
 EXPORT_SYMBOL(ldlm_pools_fini);
diff --git a/drivers/staging/lustre/lustre/libcfs/linux/linux-tracefile.c b/drivers/staging/lustre/lustre/libcfs/linux/linux-tracefile.c
index 6f563436a255..a500a0b5ba9f 100644
--- a/drivers/staging/lustre/lustre/libcfs/linux/linux-tracefile.c
+++ b/drivers/staging/lustre/lustre/libcfs/linux/linux-tracefile.c
@@ -269,7 +269,7 @@ void cfs_print_to_console(struct ptldebug_header *hdr, int mask,
 
 int cfs_trace_max_debug_mb(void)
 {
-	int  total_mb = (num_physpages >> (20 - PAGE_SHIFT));
+	int  total_mb = (totalram_pages >> (20 - PAGE_SHIFT));
 
 	return MAX(512, (total_mb * 80)/100);
 }
diff --git a/drivers/staging/lustre/lustre/llite/lproc_llite.c b/drivers/staging/lustre/lustre/llite/lproc_llite.c
index 6a82505c7933..a30c411b583b 100644
--- a/drivers/staging/lustre/lustre/llite/lproc_llite.c
+++ b/drivers/staging/lustre/lustre/llite/lproc_llite.c
@@ -243,9 +243,9 @@ static ssize_t ll_max_readahead_mb_seq_write(struct file *file, const char *buff
 	if (rc)
 		return rc;
 
-	if (pages_number < 0 || pages_number > num_physpages / 2) {
+	if (pages_number < 0 || pages_number > totalram_pages / 2) {
 		CERROR("can't set file readahead more than %lu MB\n",
-		       num_physpages >> (20 - PAGE_CACHE_SHIFT + 1)); /*1/2 of RAM*/
+		       totalram_pages >> (20 - PAGE_CACHE_SHIFT + 1)); /*1/2 of RAM*/
 		return -ERANGE;
 	}
 
@@ -388,10 +388,10 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file, const char *buffer,
 	if (rc)
 		RETURN(rc);
 
-	if (pages_number < 0 || pages_number > num_physpages) {
+	if (pages_number < 0 || pages_number > totalram_pages) {
 		CERROR("%s: can't set max cache more than %lu MB\n",
 		       ll_get_fsname(sb, NULL, 0),
-		       num_physpages >> (20 - PAGE_CACHE_SHIFT));
+		       totalram_pages >> (20 - PAGE_CACHE_SHIFT));
 		RETURN(-ERANGE);
 	}
 
diff --git a/drivers/staging/lustre/lustre/obdclass/class_obd.c b/drivers/staging/lustre/lustre/obdclass/class_obd.c
index af1c2d09c47b..0715cf2af3ce 100644
--- a/drivers/staging/lustre/lustre/obdclass/class_obd.c
+++ b/drivers/staging/lustre/lustre/obdclass/class_obd.c
@@ -558,10 +558,10 @@ static int __init init_obdclass(void)
 	/* Default the dirty page cache cap to 1/2 of system memory.
 	 * For clients with less memory, a larger fraction is needed
 	 * for other purposes (mostly for BGL). */
-	if (num_physpages <= 512 << (20 - PAGE_CACHE_SHIFT))
-		obd_max_dirty_pages = num_physpages / 4;
+	if (totalram_pages <= 512 << (20 - PAGE_CACHE_SHIFT))
+		obd_max_dirty_pages = totalram_pages / 4;
 	else
-		obd_max_dirty_pages = num_physpages / 2;
+		obd_max_dirty_pages = totalram_pages / 2;
 
 	err = obd_init_caches();
 	if (err)
diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c
index 46aad6813cab..7b94cb7b58e0 100644
--- a/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c
+++ b/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c
@@ -202,12 +202,12 @@ int LL_PROC_PROTO(proc_max_dirty_pages_in_mb)
 					       1 << (20 - PAGE_CACHE_SHIFT));
 		/* Don't allow them to let dirty pages exceed 90% of system
 		 * memory and set a hard minimum of 4MB. */
-		if (obd_max_dirty_pages > ((num_physpages / 10) * 9)) {
+		if (obd_max_dirty_pages > ((totalram_pages / 10) * 9)) {
 			CERROR("Refusing to set max dirty pages to %u, which "
 			       "is more than 90%% of available RAM; setting "
 			       "to %lu\n", obd_max_dirty_pages,
-			       ((num_physpages / 10) * 9));
-			obd_max_dirty_pages = ((num_physpages / 10) * 9);
+			       ((totalram_pages / 10) * 9));
+			obd_max_dirty_pages = ((totalram_pages / 10) * 9);
 		} else if (obd_max_dirty_pages < 4 << (20 - PAGE_CACHE_SHIFT)) {
 			obd_max_dirty_pages = 4 << (20 - PAGE_CACHE_SHIFT);
 		}
diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c
index fdf0ed367693..1a37c2c60add 100644
--- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
+++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
@@ -849,7 +849,7 @@ static int lu_htable_order(void)
 	 *
 	 * Size of lu_object is (arbitrary) taken as 1K (together with inode).
 	 */
-	cache_size = num_physpages;
+	cache_size = totalram_pages;
 
 #if BITS_PER_LONG == 32
 	/* limit hashtable size for lowmem systems to low RAM */
@@ -1783,7 +1783,6 @@ int lu_env_refill_by_tags(struct lu_env *env, __u32 ctags,
 }
 EXPORT_SYMBOL(lu_env_refill_by_tags);
 
-static struct shrinker *lu_site_shrinker = NULL;
 
 typedef struct lu_site_stats{
 	unsigned	lss_populated;
@@ -1839,61 +1838,68 @@ static void lu_site_stats_get(cfs_hash_t *hs,
  * objects without taking the  lu_sites_guard lock, but this is not
  * possible in the current implementation.
  */
-static int lu_cache_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask))
+static unsigned long lu_cache_shrink_count(struct shrinker *sk,
+					   struct shrink_control *sc)
 {
 	lu_site_stats_t stats;
 	struct lu_site *s;
 	struct lu_site *tmp;
-	int cached = 0;
-	int remain = shrink_param(sc, nr_to_scan);
-	LIST_HEAD(splice);
-
-	if (!(shrink_param(sc, gfp_mask) & __GFP_FS)) {
-		if (remain != 0)
-			return -1;
-		else
-			/* We must not take the lu_sites_guard lock when
-			 * __GFP_FS is *not* set because of the deadlock
-			 * possibility detailed above. Additionally,
-			 * since we cannot determine the number of
-			 * objects in the cache without taking this
-			 * lock, we're in a particularly tough spot. As
-			 * a result, we'll just lie and say our cache is
-			 * empty. This _should_ be ok, as we can't
-			 * reclaim objects when __GFP_FS is *not* set
-			 * anyways.
-			 */
-			return 0;
-	}
+	unsigned long cached = 0;
 
-	CDEBUG(D_INODE, "Shrink %d objects\n", remain);
+	if (!sc->gfp_mask & __GFP_FS)
+		return 0;
 
 	mutex_lock(&lu_sites_guard);
 	list_for_each_entry_safe(s, tmp, &lu_sites, ls_linkage) {
-		if (shrink_param(sc, nr_to_scan) != 0) {
-			remain = lu_site_purge(&lu_shrink_env, s, remain);
-			/*
-			 * Move just shrunk site to the tail of site list to
-			 * assure shrinking fairness.
-			 */
-			list_move_tail(&s->ls_linkage, &splice);
-		}
-
 		memset(&stats, 0, sizeof(stats));
 		lu_site_stats_get(s->ls_obj_hash, &stats, 0);
 		cached += stats.lss_total - stats.lss_busy;
-		if (shrink_param(sc, nr_to_scan) && remain <= 0)
-			break;
 	}
-	list_splice(&splice, lu_sites.prev);
 	mutex_unlock(&lu_sites_guard);
 
 	cached = (cached / 100) * sysctl_vfs_cache_pressure;
-	if (shrink_param(sc, nr_to_scan) == 0)
-		CDEBUG(D_INODE, "%d objects cached\n", cached);
+	CDEBUG(D_INODE, "%ld objects cached\n", cached);
 	return cached;
 }
 
+static unsigned long lu_cache_shrink_scan(struct shrinker *sk,
+					  struct shrink_control *sc)
+{
+	struct lu_site *s;
+	struct lu_site *tmp;
+	unsigned long remain = sc->nr_to_scan, freed = 0;
+	LIST_HEAD(splice);
+
+	if (!sc->gfp_mask & __GFP_FS)
+		/* We must not take the lu_sites_guard lock when
+		 * __GFP_FS is *not* set because of the deadlock
+		 * possibility detailed above. Additionally,
+		 * since we cannot determine the number of
+		 * objects in the cache without taking this
+		 * lock, we're in a particularly tough spot. As
+		 * a result, we'll just lie and say our cache is
+		 * empty. This _should_ be ok, as we can't
+		 * reclaim objects when __GFP_FS is *not* set
+		 * anyways.
+		 */
+		return SHRINK_STOP;
+
+	mutex_lock(&lu_sites_guard);
+	list_for_each_entry_safe(s, tmp, &lu_sites, ls_linkage) {
+		freed = lu_site_purge(&lu_shrink_env, s, remain);
+		remain -= freed;
+		/*
+		 * Move just shrunk site to the tail of site list to
+		 * assure shrinking fairness.
+		 */
+		list_move_tail(&s->ls_linkage, &splice);
+	}
+	list_splice(&splice, lu_sites.prev);
+	mutex_unlock(&lu_sites_guard);
+
+	return sc->nr_to_scan - remain;
+}
+
 /*
  * Debugging stuff.
  */
@@ -1917,6 +1923,12 @@ int lu_printk_printer(const struct lu_env *env,
 	return 0;
 }
 
+static struct shrinker lu_site_shrinker = {
+	.count_objects	= lu_cache_shrink_count,
+	.scan_objects	= lu_cache_shrink_scan,
+	.seeks 		= DEFAULT_SEEKS,
+};
+
 /**
  * Initialization of global lu_* data.
  */
@@ -1951,9 +1963,7 @@ int lu_global_init(void)
 	 * inode, one for ea. Unfortunately setting this high value results in
 	 * lu_object/inode cache consuming all the memory.
 	 */
-	lu_site_shrinker = set_shrinker(DEFAULT_SEEKS, lu_cache_shrink);
-	if (lu_site_shrinker == NULL)
-		return -ENOMEM;
+	register_shrinker(&lu_site_shrinker);
 
 	return result;
 }
@@ -1963,11 +1973,7 @@ int lu_global_init(void)
  */
 void lu_global_fini(void)
 {
-	if (lu_site_shrinker != NULL) {
-		remove_shrinker(lu_site_shrinker);
-		lu_site_shrinker = NULL;
-	}
-
+	unregister_shrinker(&lu_site_shrinker);
 	lu_context_key_degister(&lu_global_key);
 
 	/*
diff --git a/drivers/staging/lustre/lustre/osc/lproc_osc.c b/drivers/staging/lustre/lustre/osc/lproc_osc.c
index 198cf3ba1374..d2f219877927 100644
--- a/drivers/staging/lustre/lustre/osc/lproc_osc.c
+++ b/drivers/staging/lustre/lustre/osc/lproc_osc.c
@@ -146,7 +146,7 @@ static ssize_t osc_max_dirty_mb_seq_write(struct file *file, const char *buffer,
 
 	if (pages_number <= 0 ||
 	    pages_number > OSC_MAX_DIRTY_MB_MAX << (20 - PAGE_CACHE_SHIFT) ||
-	    pages_number > num_physpages / 4) /* 1/4 of RAM */
+	    pages_number > totalram_pages / 4) /* 1/4 of RAM */
 		return -ERANGE;
 
 	client_obd_list_lock(&cli->cl_loi_list_lock);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c
index 3e7325499d01..2bd0d985bf1d 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c
@@ -302,7 +302,7 @@ ptlrpc_lprocfs_req_history_max_seq_write(struct file *file, const char *buffer,
 	 * hose a kernel by allowing the request history to grow too
 	 * far. */
 	bufpages = (svc->srv_buf_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-	if (val > num_physpages/(2 * bufpages))
+	if (val > totalram_pages / (2 * bufpages))
 		return -ERANGE;
 
 	spin_lock(&svc->srv_lock);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
index bf53f1bc1742..e90c8fb7da6a 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
@@ -121,13 +121,6 @@ static struct ptlrpc_enc_page_pool {
 } page_pools;
 
 /*
- * memory shrinker
- */
-const int pools_shrinker_seeks = DEFAULT_SEEKS;
-static struct shrinker *pools_shrinker = NULL;
-
-
-/*
  * /proc/fs/lustre/sptlrpc/encrypt_page_pools
  */
 int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v)
@@ -156,7 +149,7 @@ int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v)
 		      "max waitqueue depth:     %u\n"
 		      "max wait time:	   "CFS_TIME_T"/%u\n"
 		      ,
-		      num_physpages,
+		      totalram_pages,
 		      PAGES_PER_POOL,
 		      page_pools.epp_max_pages,
 		      page_pools.epp_max_pools,
@@ -226,30 +219,46 @@ static void enc_pools_release_free_pages(long npages)
 }
 
 /*
- * could be called frequently for query (@nr_to_scan == 0).
  * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool.
  */
-static int enc_pools_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask))
+static unsigned long enc_pools_shrink_count(struct shrinker *s,
+					    struct shrink_control *sc)
 {
-	if (unlikely(shrink_param(sc, nr_to_scan) != 0)) {
+	/*
+	 * if no pool access for a long time, we consider it's fully idle.
+	 * a little race here is fine.
+	 */
+	if (unlikely(cfs_time_current_sec() - page_pools.epp_last_access >
+		     CACHE_QUIESCENT_PERIOD)) {
 		spin_lock(&page_pools.epp_lock);
-		shrink_param(sc, nr_to_scan) = min_t(unsigned long,
-						   shrink_param(sc, nr_to_scan),
-						   page_pools.epp_free_pages -
-						   PTLRPC_MAX_BRW_PAGES);
-		if (shrink_param(sc, nr_to_scan) > 0) {
-			enc_pools_release_free_pages(shrink_param(sc,
-								  nr_to_scan));
-			CDEBUG(D_SEC, "released %ld pages, %ld left\n",
-			       (long)shrink_param(sc, nr_to_scan),
-			       page_pools.epp_free_pages);
-
-			page_pools.epp_st_shrinks++;
-			page_pools.epp_last_shrink = cfs_time_current_sec();
-		}
+		page_pools.epp_idle_idx = IDLE_IDX_MAX;
 		spin_unlock(&page_pools.epp_lock);
 	}
 
+	LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX);
+	return max((int)page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES, 0) *
+		(IDLE_IDX_MAX - page_pools.epp_idle_idx) / IDLE_IDX_MAX;
+}
+
+/*
+ * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool.
+ */
+static unsigned long enc_pools_shrink_scan(struct shrinker *s,
+					   struct shrink_control *sc)
+{
+	spin_lock(&page_pools.epp_lock);
+	sc->nr_to_scan = min_t(unsigned long, sc->nr_to_scan,
+			      page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES);
+	if (sc->nr_to_scan > 0) {
+		enc_pools_release_free_pages(sc->nr_to_scan);
+		CDEBUG(D_SEC, "released %ld pages, %ld left\n",
+		       (long)sc->nr_to_scan, page_pools.epp_free_pages);
+
+		page_pools.epp_st_shrinks++;
+		page_pools.epp_last_shrink = cfs_time_current_sec();
+	}
+	spin_unlock(&page_pools.epp_lock);
+
 	/*
 	 * if no pool access for a long time, we consider it's fully idle.
 	 * a little race here is fine.
@@ -262,8 +271,7 @@ static int enc_pools_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask))
 	}
 
 	LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX);
-	return max((int)page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES, 0) *
-		(IDLE_IDX_MAX - page_pools.epp_idle_idx) / IDLE_IDX_MAX;
+	return sc->nr_to_scan;
 }
 
 static inline
@@ -699,13 +707,19 @@ static inline void enc_pools_free(void)
 		       sizeof(*page_pools.epp_pools));
 }
 
+static struct shrinker pools_shrinker = {
+	.count_objects	= enc_pools_shrink_count,
+	.scan_objects	= enc_pools_shrink_scan,
+	.seeks		= DEFAULT_SEEKS,
+};
+
 int sptlrpc_enc_pool_init(void)
 {
 	/*
 	 * maximum capacity is 1/8 of total physical memory.
 	 * is the 1/8 a good number?
 	 */
-	page_pools.epp_max_pages = num_physpages / 8;
+	page_pools.epp_max_pages = totalram_pages / 8;
 	page_pools.epp_max_pools = npages_to_npools(page_pools.epp_max_pages);
 
 	init_waitqueue_head(&page_pools.epp_waitq);
@@ -736,12 +750,7 @@ int sptlrpc_enc_pool_init(void)
 	if (page_pools.epp_pools == NULL)
 		return -ENOMEM;
 
-	pools_shrinker = set_shrinker(pools_shrinker_seeks,
-					  enc_pools_shrink);
-	if (pools_shrinker == NULL) {
-		enc_pools_free();
-		return -ENOMEM;
-	}
+	register_shrinker(&pools_shrinker);
 
 	return 0;
 }
@@ -750,11 +759,10 @@ void sptlrpc_enc_pool_fini(void)
 {
 	unsigned long cleaned, npools;
 
-	LASSERT(pools_shrinker);
 	LASSERT(page_pools.epp_pools);
 	LASSERT(page_pools.epp_total_pages == page_pools.epp_free_pages);
 
-	remove_shrinker(pools_shrinker);
+	unregister_shrinker(&pools_shrinker);
 
 	npools = npages_to_npools(page_pools.epp_total_pages);
 	cleaned = enc_pools_cleanup(page_pools.epp_pools, npools);
diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c
index 810143a6aa33..f36950e4134f 100644
--- a/drivers/thermal/x86_pkg_temp_thermal.c
+++ b/drivers/thermal/x86_pkg_temp_thermal.c
@@ -599,7 +599,6 @@ static int __init pkg_temp_thermal_init(void)
 	return 0;
 
 err_ret:
-	get_online_cpus();
 	for_each_online_cpu(i)
 		put_core_offline(i);
 	put_online_cpus();
diff --git a/drivers/video/acornfb.c b/drivers/video/acornfb.c
index 6488a7351a60..7e8346ec9cdc 100644
--- a/drivers/video/acornfb.c
+++ b/drivers/video/acornfb.c
@@ -38,14 +38,6 @@
 #include "acornfb.h"
 
 /*
- * VIDC machines can't do 16 or 32BPP modes.
- */
-#ifdef HAS_VIDC
-#undef FBCON_HAS_CFB16
-#undef FBCON_HAS_CFB32
-#endif
-
-/*
  * Default resolution.
  * NOTE that it has to be supported in the table towards
  * the end of this file.
@@ -106,238 +98,6 @@ static struct vidc_timing current_vidc;
 
 extern unsigned int vram_size;	/* set by setup.c */
 
-#ifdef HAS_VIDC
-
-#define MAX_SIZE	480*1024
-
-/* CTL     VIDC	Actual
- * 24.000  0	 8.000
- * 25.175  0	 8.392
- * 36.000  0	12.000
- * 24.000  1	12.000
- * 25.175  1	12.588
- * 24.000  2	16.000
- * 25.175  2	16.783
- * 36.000  1	18.000
- * 24.000  3	24.000
- * 36.000  2	24.000
- * 25.175  3	25.175
- * 36.000  3	36.000
- */
-struct pixclock {
-	u_long	min_clock;
-	u_long	max_clock;
-	u_int	vidc_ctl;
-	u_int	vid_ctl;
-};
-
-static struct pixclock arc_clocks[] = {
-	/* we allow +/-1% on these */
-	{ 123750, 126250, VIDC_CTRL_DIV3,   VID_CTL_24MHz },	/*  8.000MHz */
-	{  82500,  84167, VIDC_CTRL_DIV2,   VID_CTL_24MHz },	/* 12.000MHz */
-	{  61875,  63125, VIDC_CTRL_DIV1_5, VID_CTL_24MHz },	/* 16.000MHz */
-	{  41250,  42083, VIDC_CTRL_DIV1,   VID_CTL_24MHz },	/* 24.000MHz */
-};
-
-static struct pixclock *
-acornfb_valid_pixrate(struct fb_var_screeninfo *var)
-{
-	u_long pixclock = var->pixclock;
-	u_int i;
-
-	if (!var->pixclock)
-		return NULL;
-
-	for (i = 0; i < ARRAY_SIZE(arc_clocks); i++)
-		if (pixclock > arc_clocks[i].min_clock &&
-		    pixclock < arc_clocks[i].max_clock)
-			return arc_clocks + i;
-
-	return NULL;
-}
-
-/* VIDC Rules:
- * hcr  : must be even (interlace, hcr/2 must be even)
- * hswr : must be even
- * hdsr : must be odd
- * hder : must be odd
- *
- * vcr  : must be odd
- * vswr : >= 1
- * vdsr : >= 1
- * vder : >= vdsr
- * if interlaced, then hcr/2 must be even
- */
-static void
-acornfb_set_timing(struct fb_var_screeninfo *var)
-{
-	struct pixclock *pclk;
-	struct vidc_timing vidc;
-	u_int horiz_correction;
-	u_int sync_len, display_start, display_end, cycle;
-	u_int is_interlaced;
-	u_int vid_ctl, vidc_ctl;
-	u_int bandwidth;
-
-	memset(&vidc, 0, sizeof(vidc));
-
-	pclk = acornfb_valid_pixrate(var);
-	vidc_ctl = pclk->vidc_ctl;
-	vid_ctl  = pclk->vid_ctl;
-
-	bandwidth = var->pixclock * 8 / var->bits_per_pixel;
-	/* 25.175, 4bpp = 79.444ns per byte, 317.776ns per word: fifo = 2,6 */
-	if (bandwidth > 143500)
-		vidc_ctl |= VIDC_CTRL_FIFO_3_7;
-	else if (bandwidth > 71750)
-		vidc_ctl |= VIDC_CTRL_FIFO_2_6;
-	else if (bandwidth > 35875)
-		vidc_ctl |= VIDC_CTRL_FIFO_1_5;
-	else
-		vidc_ctl |= VIDC_CTRL_FIFO_0_4;
-
-	switch (var->bits_per_pixel) {
-	case 1:
-		horiz_correction = 19;
-		vidc_ctl |= VIDC_CTRL_1BPP;
-		break;
-
-	case 2:
-		horiz_correction = 11;
-		vidc_ctl |= VIDC_CTRL_2BPP;
-		break;
-
-	case 4:
-		horiz_correction = 7;
-		vidc_ctl |= VIDC_CTRL_4BPP;
-		break;
-
-	default:
-	case 8:
-		horiz_correction = 5;
-		vidc_ctl |= VIDC_CTRL_8BPP;
-		break;
-	}
-
-	if (var->sync & FB_SYNC_COMP_HIGH_ACT) /* should be FB_SYNC_COMP */
-		vidc_ctl |= VIDC_CTRL_CSYNC;
-	else {
-		if (!(var->sync & FB_SYNC_HOR_HIGH_ACT))
-			vid_ctl |= VID_CTL_HS_NHSYNC;
-
-		if (!(var->sync & FB_SYNC_VERT_HIGH_ACT))
-			vid_ctl |= VID_CTL_VS_NVSYNC;
-	}
-
-	sync_len	= var->hsync_len;
-	display_start	= sync_len + var->left_margin;
-	display_end	= display_start + var->xres;
-	cycle		= display_end + var->right_margin;
-
-	/* if interlaced, then hcr/2 must be even */
-	is_interlaced = (var->vmode & FB_VMODE_MASK) == FB_VMODE_INTERLACED;
-
-	if (is_interlaced) {
-		vidc_ctl |= VIDC_CTRL_INTERLACE;
-		if (cycle & 2) {
-			cycle += 2;
-			var->right_margin += 2;
-		}
-	}
-
-	vidc.h_cycle		= (cycle - 2) / 2;
-	vidc.h_sync_width	= (sync_len - 2) / 2;
-	vidc.h_border_start	= (display_start - 1) / 2;
-	vidc.h_display_start	= (display_start - horiz_correction) / 2;
-	vidc.h_display_end	= (display_end - horiz_correction) / 2;
-	vidc.h_border_end	= (display_end - 1) / 2;
-	vidc.h_interlace	= (vidc.h_cycle + 1) / 2;
-
-	sync_len	= var->vsync_len;
-	display_start	= sync_len + var->upper_margin;
-	display_end	= display_start + var->yres;
-	cycle		= display_end + var->lower_margin;
-
-	if (is_interlaced)
-		cycle = (cycle - 3) / 2;
-	else
-		cycle = cycle - 1;
-
-	vidc.v_cycle		= cycle;
-	vidc.v_sync_width	= sync_len - 1;
-	vidc.v_border_start	= display_start - 1;
-	vidc.v_display_start	= vidc.v_border_start;
-	vidc.v_display_end	= display_end - 1;
-	vidc.v_border_end	= vidc.v_display_end;
-
-	if (machine_is_a5k())
-		__raw_writeb(vid_ctl, IOEB_VID_CTL);
-
-	if (memcmp(&current_vidc, &vidc, sizeof(vidc))) {
-		current_vidc = vidc;
-
-		vidc_writel(0xe0000000 | vidc_ctl);
-		vidc_writel(0x80000000 | (vidc.h_cycle << 14));
-		vidc_writel(0x84000000 | (vidc.h_sync_width << 14));
-		vidc_writel(0x88000000 | (vidc.h_border_start << 14));
-		vidc_writel(0x8c000000 | (vidc.h_display_start << 14));
-		vidc_writel(0x90000000 | (vidc.h_display_end << 14));
-		vidc_writel(0x94000000 | (vidc.h_border_end << 14));
-		vidc_writel(0x98000000);
-		vidc_writel(0x9c000000 | (vidc.h_interlace << 14));
-		vidc_writel(0xa0000000 | (vidc.v_cycle << 14));
-		vidc_writel(0xa4000000 | (vidc.v_sync_width << 14));
-		vidc_writel(0xa8000000 | (vidc.v_border_start << 14));
-		vidc_writel(0xac000000 | (vidc.v_display_start << 14));
-		vidc_writel(0xb0000000 | (vidc.v_display_end << 14));
-		vidc_writel(0xb4000000 | (vidc.v_border_end << 14));
-		vidc_writel(0xb8000000);
-		vidc_writel(0xbc000000);
-	}
-#ifdef DEBUG_MODE_SELECTION
-	printk(KERN_DEBUG "VIDC registers for %dx%dx%d:\n", var->xres,
-	       var->yres, var->bits_per_pixel);
-	printk(KERN_DEBUG " H-cycle          : %d\n", vidc.h_cycle);
-	printk(KERN_DEBUG " H-sync-width     : %d\n", vidc.h_sync_width);
-	printk(KERN_DEBUG " H-border-start   : %d\n", vidc.h_border_start);
-	printk(KERN_DEBUG " H-display-start  : %d\n", vidc.h_display_start);
-	printk(KERN_DEBUG " H-display-end    : %d\n", vidc.h_display_end);
-	printk(KERN_DEBUG " H-border-end     : %d\n", vidc.h_border_end);
-	printk(KERN_DEBUG " H-interlace      : %d\n", vidc.h_interlace);
-	printk(KERN_DEBUG " V-cycle          : %d\n", vidc.v_cycle);
-	printk(KERN_DEBUG " V-sync-width     : %d\n", vidc.v_sync_width);
-	printk(KERN_DEBUG " V-border-start   : %d\n", vidc.v_border_start);
-	printk(KERN_DEBUG " V-display-start  : %d\n", vidc.v_display_start);
-	printk(KERN_DEBUG " V-display-end    : %d\n", vidc.v_display_end);
-	printk(KERN_DEBUG " V-border-end     : %d\n", vidc.v_border_end);
-	printk(KERN_DEBUG " VIDC Ctrl (E)    : 0x%08X\n", vidc_ctl);
-	printk(KERN_DEBUG " IOEB Ctrl        : 0x%08X\n", vid_ctl);
-#endif
-}
-
-static int
-acornfb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
-		  u_int trans, struct fb_info *info)
-{
-	union palette pal;
-
-	if (regno >= current_par.palette_size)
-		return 1;
-
-	pal.p = 0;
-	pal.vidc.reg   = regno;
-	pal.vidc.red   = red >> 12;
-	pal.vidc.green = green >> 12;
-	pal.vidc.blue  = blue >> 12;
-
-	current_par.palette[regno] = pal;
-
-	vidc_writel(pal.p);
-
-	return 0;
-}
-#endif
-
 #ifdef HAS_VIDC20
 #include <mach/acornfb.h>
 
@@ -634,16 +394,7 @@ acornfb_adjust_timing(struct fb_info *info, struct fb_var_screeninfo *var, u_int
 	/* hsync_len must be even */
 	var->hsync_len = (var->hsync_len + 1) & ~1;
 
-#ifdef HAS_VIDC
-	/* left_margin must be odd */
-	if ((var->left_margin & 1) == 0) {
-		var->left_margin -= 1;
-		var->right_margin += 1;
-	}
-
-	/* right_margin must be odd */
-	var->right_margin |= 1;
-#elif defined(HAS_VIDC20)
+#if defined(HAS_VIDC20)
 	/* left_margin must be even */
 	if (var->left_margin & 1) {
 		var->left_margin += 1;
@@ -787,11 +538,7 @@ static int acornfb_set_par(struct fb_info *info)
 		break;
 	case 8:
 		current_par.palette_size = VIDC_PALETTE_SIZE;
-#ifdef HAS_VIDC
-		info->fix.visual = FB_VISUAL_STATIC_PSEUDOCOLOR;
-#else
 		info->fix.visual = FB_VISUAL_PSEUDOCOLOR;
-#endif
 		break;
 #ifdef HAS_VIDC20
 	case 16:
@@ -971,9 +718,6 @@ static void acornfb_init_fbinfo(void)
 #if defined(HAS_VIDC20)
 	fb_info.var.red.length	   = 8;
 	fb_info.var.transp.length  = 4;
-#elif defined(HAS_VIDC)
-	fb_info.var.red.length	   = 4;
-	fb_info.var.transp.length  = 1;
 #endif
 	fb_info.var.green	   = fb_info.var.red;
 	fb_info.var.blue	   = fb_info.var.red;
@@ -1310,14 +1054,6 @@ static int acornfb_probe(struct platform_device *dev)
 		fb_info.fix.smem_start = handle;
 	}
 #endif
-#if defined(HAS_VIDC)
-	/*
-	 * Archimedes/A5000 machines use a fixed address for their
-	 * framebuffers.  Free unused pages
-	 */
-	free_unused_pages(PAGE_OFFSET + size, PAGE_OFFSET + MAX_SIZE);
-#endif
-
 	fb_info.fix.smem_len = size;
 	current_par.palette_size   = VIDC_PALETTE_SIZE;
 
diff --git a/drivers/video/acornfb.h b/drivers/video/acornfb.h
index fb2a7fffe506..175c8ff3367c 100644
--- a/drivers/video/acornfb.h
+++ b/drivers/video/acornfb.h
@@ -13,10 +13,6 @@
 #include <asm/hardware/iomd.h>
 #define VIDC_PALETTE_SIZE	256
 #define VIDC_NAME		"VIDC20"
-#elif defined(HAS_VIDC)
-#include <asm/hardware/memc.h>
-#define VIDC_PALETTE_SIZE	16
-#define VIDC_NAME		"VIDC"
 #endif
 
 #define EXTEND8(x) ((x)|(x)<<8)
@@ -101,31 +97,6 @@ struct modex_params {
 	const struct modey_params *modey;
 };
 
-#ifdef HAS_VIDC
-
-#define VID_CTL_VS_NVSYNC	(1 << 3)
-#define VID_CTL_HS_NHSYNC	(1 << 2)
-#define VID_CTL_24MHz		(0)
-#define VID_CTL_25MHz		(1)
-#define VID_CTL_36MHz		(2)
-
-#define VIDC_CTRL_CSYNC		(1 << 7)
-#define VIDC_CTRL_INTERLACE	(1 << 6)
-#define VIDC_CTRL_FIFO_0_4	(0 << 4)
-#define VIDC_CTRL_FIFO_1_5	(1 << 4)
-#define VIDC_CTRL_FIFO_2_6	(2 << 4)
-#define VIDC_CTRL_FIFO_3_7	(3 << 4)
-#define VIDC_CTRL_1BPP		(0 << 2)
-#define VIDC_CTRL_2BPP		(1 << 2)
-#define VIDC_CTRL_4BPP		(2 << 2)
-#define VIDC_CTRL_8BPP		(3 << 2)
-#define VIDC_CTRL_DIV3		(0 << 0)
-#define VIDC_CTRL_DIV2		(1 << 0)
-#define VIDC_CTRL_DIV1_5	(2 << 0)
-#define VIDC_CTRL_DIV1		(3 << 0)
-
-#endif
-
 #ifdef HAS_VIDC20
 /*
  * VIDC20 registers
diff --git a/drivers/video/cyber2000fb.c b/drivers/video/cyber2000fb.c
index 57886787ead0..e78d9f2233b8 100644
--- a/drivers/video/cyber2000fb.c
+++ b/drivers/video/cyber2000fb.c
@@ -518,6 +518,9 @@ static void cyber2000fb_set_timing(struct cfb_info *cfb, struct par_info *hw)
 	cyber2000_grphw(0xb9, 0x00, cfb);
 	spin_unlock(&cfb->reg_b0_lock);
 
+	/* wait (for the PLL?) to avoid palette corruption at higher clocks */
+	msleep(1000);
+
 	cfb->ramdac_ctrl = hw->ramdac;
 	cyber2000fb_write_ramdac_ctrl(cfb);
 
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 3f1128b37e46..16d3288c808d 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -104,7 +104,7 @@ struct autofs_sb_info {
 	u32 magic;
 	int pipefd;
 	struct file *pipe;
-	pid_t oz_pgrp;
+	struct pid *oz_pgrp;
 	int catatonic;
 	int version;
 	int sub_version;
@@ -139,7 +139,7 @@ static inline struct autofs_info *autofs4_dentry_ino(struct dentry *dentry)
    filesystem without "magic".) */
 
 static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) {
-	return sbi->catatonic || task_pgrp_nr(current) == sbi->oz_pgrp;
+	return sbi->catatonic || task_pgrp(current) == sbi->oz_pgrp;
 }
 
 /* Does a dentry have some pending activity? */
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 743c7c2c949d..91838211b66d 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -346,6 +346,7 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp,
 {
 	int pipefd;
 	int err = 0;
+	struct pid *new_pid = NULL;
 
 	if (param->setpipefd.pipefd == -1)
 		return -EINVAL;
@@ -357,7 +358,17 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp,
 		mutex_unlock(&sbi->wq_mutex);
 		return -EBUSY;
 	} else {
-		struct file *pipe = fget(pipefd);
+		struct file *pipe;
+
+		new_pid = get_task_pid(current, PIDTYPE_PGID);
+
+		if (ns_of_pid(new_pid) != ns_of_pid(sbi->oz_pgrp)) {
+			AUTOFS_WARN("Not allowed to change PID namespace");
+			err = -EINVAL;
+			goto out;
+		}
+
+		pipe = fget(pipefd);
 		if (!pipe) {
 			err = -EBADF;
 			goto out;
@@ -367,12 +378,13 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp,
 			fput(pipe);
 			goto out;
 		}
-		sbi->oz_pgrp = task_pgrp_nr(current);
+		swap(sbi->oz_pgrp, new_pid);
 		sbi->pipefd = pipefd;
 		sbi->pipe = pipe;
 		sbi->catatonic = 0;
 	}
 out:
+	put_pid(new_pid);
 	mutex_unlock(&sbi->wq_mutex);
 	return err;
 }
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index b104726e2d0a..1b045ecfcea2 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -62,6 +62,8 @@ void autofs4_kill_sb(struct super_block *sb)
 	/* Free wait queues, close pipe */
 	autofs4_catatonic_mode(sbi);
 
+	put_pid(sbi->oz_pgrp);
+
 	sb->s_fs_info = NULL;
 	kfree(sbi);
 
@@ -85,7 +87,7 @@ static int autofs4_show_options(struct seq_file *m, struct dentry *root)
 	if (!gid_eq(root_inode->i_gid, GLOBAL_ROOT_GID))
 		seq_printf(m, ",gid=%u",
 			from_kgid_munged(&init_user_ns, root_inode->i_gid));
-	seq_printf(m, ",pgrp=%d", sbi->oz_pgrp);
+	seq_printf(m, ",pgrp=%d", pid_vnr(sbi->oz_pgrp));
 	seq_printf(m, ",timeout=%lu", sbi->exp_timeout/HZ);
 	seq_printf(m, ",minproto=%d", sbi->min_proto);
 	seq_printf(m, ",maxproto=%d", sbi->max_proto);
@@ -129,7 +131,8 @@ static const match_table_t tokens = {
 };
 
 static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid,
-		pid_t *pgrp, unsigned int *type, int *minproto, int *maxproto)
+			 int *pgrp, bool *pgrp_set, unsigned int *type,
+			 int *minproto, int *maxproto)
 {
 	char *p;
 	substring_t args[MAX_OPT_ARGS];
@@ -137,7 +140,6 @@ static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid,
 
 	*uid = current_uid();
 	*gid = current_gid();
-	*pgrp = task_pgrp_nr(current);
 
 	*minproto = AUTOFS_MIN_PROTO_VERSION;
 	*maxproto = AUTOFS_MAX_PROTO_VERSION;
@@ -176,6 +178,7 @@ static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid,
 			if (match_int(args, &option))
 				return 1;
 			*pgrp = option;
+			*pgrp_set = true;
 			break;
 		case Opt_minproto:
 			if (match_int(args, &option))
@@ -211,6 +214,8 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	int pipefd;
 	struct autofs_sb_info *sbi;
 	struct autofs_info *ino;
+	int pgrp;
+	bool pgrp_set = false;
 
 	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
 	if (!sbi)
@@ -223,7 +228,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	sbi->pipe = NULL;
 	sbi->catatonic = 1;
 	sbi->exp_timeout = 0;
-	sbi->oz_pgrp = task_pgrp_nr(current);
+	sbi->oz_pgrp = NULL;
 	sbi->sb = s;
 	sbi->version = 0;
 	sbi->sub_version = 0;
@@ -260,12 +265,23 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 
 	/* Can this call block? */
 	if (parse_options(data, &pipefd, &root_inode->i_uid, &root_inode->i_gid,
-				&sbi->oz_pgrp, &sbi->type, &sbi->min_proto,
-				&sbi->max_proto)) {
+			  &pgrp, &pgrp_set, &sbi->type, &sbi->min_proto,
+			  &sbi->max_proto)) {
 		printk("autofs: called with bogus options\n");
 		goto fail_dput;
 	}
 
+	if (pgrp_set) {
+		sbi->oz_pgrp = find_get_pid(pgrp);
+		if (!sbi->oz_pgrp) {
+			pr_warn("autofs: could not find process group %d\n",
+				pgrp);
+			goto fail_dput;
+		}
+	} else {
+		sbi->oz_pgrp = get_task_pid(current, PIDTYPE_PGID);
+	}
+
 	if (autofs_type_trigger(sbi->type))
 		__managed_dentry_set_managed(root);
 
@@ -289,9 +305,9 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 		sbi->version = sbi->max_proto;
 	sbi->sub_version = AUTOFS_PROTO_SUBVERSION;
 
-	DPRINTK("pipe fd = %d, pgrp = %u", pipefd, sbi->oz_pgrp);
+	DPRINTK("pipe fd = %d, pgrp = %u", pipefd, pid_nr(sbi->oz_pgrp));
 	pipe = fget(pipefd);
-	
+
 	if (!pipe) {
 		printk("autofs: could not open pipe file descriptor\n");
 		goto fail_dput;
@@ -321,6 +337,7 @@ fail_dput:
 fail_ino:
 	kfree(ino);
 fail_free:
+	put_pid(sbi->oz_pgrp);
 	kfree(sbi);
 	s->s_fs_info = NULL;
 fail_unlock:
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 3db70dae40d3..309ca6bcbb09 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -353,11 +353,23 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 	struct qstr qstr;
 	char *name;
 	int status, ret, type;
+	pid_t pid;
+	pid_t tgid;
 
 	/* In catatonic mode, we don't wait for nobody */
 	if (sbi->catatonic)
 		return -ENOENT;
 
+	/*
+	 * Try translating pids to the namespace of the daemon.
+	 *
+	 * Zero means failure: we are in an unrelated pid namespace.
+	 */
+	pid = task_pid_nr_ns(current, ns_of_pid(sbi->oz_pgrp));
+	tgid = task_tgid_nr_ns(current, ns_of_pid(sbi->oz_pgrp));
+	if (pid == 0 || tgid == 0)
+		return -ENOENT;
+
 	if (!dentry->d_inode) {
 		/*
 		 * A wait for a negative dentry is invalid for certain
@@ -423,8 +435,8 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 		wq->ino = autofs4_get_ino(sbi);
 		wq->uid = current_uid();
 		wq->gid = current_gid();
-		wq->pid = current->pid;
-		wq->tgid = current->tgid;
+		wq->pid = pid;
+		wq->tgid = tgid;
 		wq->status = -EINTR; /* Status return if interrupted */
 		wq->wait_ctr = 2;
 		mutex_unlock(&sbi->wq_mutex);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 7d863a4de5a5..dc8227940b64 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -140,6 +140,25 @@ static int padzero(unsigned long elf_bss)
 #define ELF_BASE_PLATFORM NULL
 #endif
 
+/*
+ * Use get_random_int() to implement AT_RANDOM while avoiding depletion
+ * of the entropy pool.
+ */
+static void get_atrandom_bytes(unsigned char *buf, size_t nbytes)
+{
+	unsigned char *p = buf;
+
+	while (nbytes) {
+		unsigned int random_variable;
+		size_t chunk = min(nbytes, sizeof(random_variable));
+
+		random_variable = get_random_int();
+		memcpy(p, &random_variable, chunk);
+		p += chunk;
+		nbytes -= chunk;
+	}
+}
+
 static int
 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 		unsigned long load_addr, unsigned long interp_load_addr)
@@ -201,7 +220,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 	/*
 	 * Generate 16 random bytes for userspace PRNG seeding.
 	 */
-	get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
+	get_atrandom_bytes(k_rand_bytes, sizeof(k_rand_bytes));
 	u_rand_bytes = (elf_addr_t __user *)
 		       STACK_ALLOC(p, sizeof(k_rand_bytes));
 	if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 9ad17b15b454..d5e7de07901f 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1966,23 +1966,23 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events,
 		size_t, sigsetsize)
 {
 	int error;
-	sigset_t ksigmask, sigsaved;
-
 	/*
 	 * If the caller wants a certain signal mask to be set during the wait,
 	 * we apply it here.
 	 */
 	if (sigmask) {
+		sigset_t ksigmask;
+
 		if (sigsetsize != sizeof(sigset_t))
 			return -EINVAL;
 		if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
 			return -EFAULT;
-		sigsaved = current->blocked;
+
+		current->saved_sigmask = current->blocked;
 		set_current_blocked(&ksigmask);
 	}
 
 	error = sys_epoll_wait(epfd, events, maxevents, timeout);
-
 	/*
 	 * If we changed the signal mask, we need to restore the original one.
 	 * In case we've got a signal while waiting, we do not restore the
@@ -1990,12 +1990,10 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events,
 	 * the way back to userspace, before the signal mask is restored.
 	 */
 	if (sigmask) {
-		if (error == -EINTR) {
-			memcpy(&current->saved_sigmask, &sigsaved,
-			       sizeof(sigsaved));
+		if (error == -EINTR)
 			set_restore_sigmask();
-		} else
-			set_current_blocked(&sigsaved);
+		else
+			__set_current_blocked(&current->saved_sigmask);
 	}
 
 	return error;
@@ -2009,25 +2007,25 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd,
 			compat_size_t, sigsetsize)
 {
 	long err;
-	compat_sigset_t csigmask;
-	sigset_t ksigmask, sigsaved;
-
 	/*
 	 * If the caller wants a certain signal mask to be set during the wait,
 	 * we apply it here.
 	 */
 	if (sigmask) {
+		compat_sigset_t csigmask;
+		sigset_t ksigmask;
+
 		if (sigsetsize != sizeof(compat_sigset_t))
 			return -EINVAL;
 		if (copy_from_user(&csigmask, sigmask, sizeof(csigmask)))
 			return -EFAULT;
 		sigset_from_compat(&ksigmask, &csigmask);
-		sigsaved = current->blocked;
+
+		current->saved_sigmask = current->blocked;
 		set_current_blocked(&ksigmask);
 	}
 
 	err = sys_epoll_wait(epfd, events, maxevents, timeout);
-
 	/*
 	 * If we changed the signal mask, we need to restore the original one.
 	 * In case we've got a signal while waiting, we do not restore the
@@ -2035,12 +2033,10 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd,
 	 * the way back to userspace, before the signal mask is restored.
 	 */
 	if (sigmask) {
-		if (err == -EINTR) {
-			memcpy(&current->saved_sigmask, &sigsaved,
-			       sizeof(sigsaved));
+		if (err == -EINTR)
 			set_restore_sigmask();
-		} else
-			set_current_blocked(&sigsaved);
+		else
+			__set_current_blocked(&current->saved_sigmask);
 	}
 
 	return err;
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 9b104f543056..00b5810212da 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -17,8 +17,11 @@
 #include <linux/blkdev.h>
 #include <linux/fsnotify.h>
 #include <linux/security.h>
+#include <linux/falloc.h>
 #include "fat.h"
 
+static long fat_fallocate(struct file *file, int mode,
+				loff_t offset, loff_t len);
 static int fat_ioctl_get_attributes(struct inode *inode, u32 __user *user_attr)
 {
 	u32 attr;
@@ -148,6 +151,22 @@ static long fat_generic_compat_ioctl(struct file *filp, unsigned int cmd,
 
 static int fat_file_release(struct inode *inode, struct file *filp)
 {
+
+	struct super_block *sb = inode->i_sb;
+	loff_t mmu_private_ideal;
+
+	/*
+	 * Release unwritten fallocated blocks on file release.
+	 * Do this only when the last open file descriptor is closed.
+	 */
+	mutex_lock(&inode->i_mutex);
+	mmu_private_ideal = round_up(inode->i_size, sb->s_blocksize);
+
+	if (mmu_private_ideal < MSDOS_I(inode)->mmu_private &&
+	    filp->f_dentry->d_count == 1)
+		fat_truncate_blocks(inode, inode->i_size);
+	mutex_unlock(&inode->i_mutex);
+
 	if ((filp->f_mode & FMODE_WRITE) &&
 	     MSDOS_SB(inode->i_sb)->options.flush) {
 		fat_flush_inodes(inode->i_sb, inode, NULL);
@@ -182,6 +201,7 @@ const struct file_operations fat_file_operations = {
 #endif
 	.fsync		= fat_file_fsync,
 	.splice_read	= generic_file_splice_read,
+	.fallocate      = fat_fallocate,
 };
 
 static int fat_cont_expand(struct inode *inode, loff_t size)
@@ -220,6 +240,88 @@ out:
 	return err;
 }
 
+/*
+ * Preallocate space for a file. This implements fat's fallocate file
+ * operation, which gets called from sys_fallocate system call. User
+ * space requests len bytes at offset. If FALLOC_FL_KEEP_SIZE is set
+ * we just allocate clusters without zeroing them out. Otherwise we
+ * allocate and zero out clusters via an expanding truncate. The
+ * allocated clusters are freed in fat_file_release().
+ */
+static long fat_fallocate(struct file *file, int mode,
+				loff_t offset, loff_t len)
+{
+	int cluster, fclus, dclus;
+	int nr_cluster; /* Number of clusters to be allocated */
+	loff_t nr_bytes; /* Number of bytes to be allocated*/
+	loff_t free_bytes; /* Unused bytes in the last cluster of file*/
+	struct inode *inode = file->f_mapping->host;
+	struct super_block *sb = inode->i_sb;
+	struct msdos_sb_info *sbi = MSDOS_SB(sb);
+	int err = 0;
+
+	/* No support for hole punch or other fallocate flags. */
+	if (mode & ~FALLOC_FL_KEEP_SIZE)
+		return -EOPNOTSUPP;
+
+	mutex_lock(&inode->i_mutex);
+	if ((offset + len) <= MSDOS_I(inode)->mmu_private) {
+		fat_msg(sb, KERN_ERR,
+			"fat_fallocate(): Blocks already allocated");
+		err = -EINVAL;
+		goto error;
+	}
+
+	if (mode & FALLOC_FL_KEEP_SIZE) {
+		/* First compute the number of clusters to be allocated */
+		if (inode->i_size > 0) {
+			err = fat_get_cluster(inode, FAT_ENT_EOF,
+					      &fclus, &dclus);
+			if (err < 0) {
+				fat_msg(sb, KERN_ERR,
+					"fat_fallocate(): fat_get_cluster() error");
+				goto error;
+			}
+			free_bytes = ((fclus + 1) << sbi->cluster_bits) -
+				     inode->i_size;
+			nr_bytes = offset + len - inode->i_size - free_bytes;
+			MSDOS_I(inode)->mmu_private = (fclus + 1) <<
+						      sbi->cluster_bits;
+		} else
+			nr_bytes = offset + len - inode->i_size;
+
+		nr_cluster = (nr_bytes + (sbi->cluster_size - 1)) >>
+			     sbi->cluster_bits;
+
+		/* Start the allocation.We are not zeroing out the clusters */
+		while (nr_cluster-- > 0) {
+			err = fat_alloc_clusters(inode, &cluster, 1);
+			if (err) {
+				fat_msg(sb, KERN_ERR,
+					"fat_fallocate(): fat_alloc_clusters() error");
+				goto error;
+			}
+			err = fat_chain_add(inode, cluster, 1);
+			if (err) {
+				fat_free_clusters(inode, cluster);
+				goto error;
+			}
+			MSDOS_I(inode)->mmu_private += sbi->cluster_size;
+		}
+	} else {
+		/* This is just an expanding truncate */
+		err = fat_cont_expand(inode, (offset + len));
+		if (err) {
+			fat_msg(sb, KERN_ERR,
+				"fat_fallocate(): fat_cont_expand() error");
+		}
+	}
+
+error:
+	mutex_unlock(&inode->i_mutex);
+	return err;
+}
+
 /* Free all clusters after the skip'th cluster. */
 static int fat_free(struct inode *inode, int skip)
 {
@@ -386,6 +488,9 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
 	struct inode *inode = dentry->d_inode;
 	unsigned int ia_valid;
 	int error;
+	loff_t mmu_private_ideal;
+
+	mmu_private_ideal = round_up(inode->i_size, dentry->d_sb->s_blocksize);
 
 	/* Check for setting the inode time. */
 	ia_valid = attr->ia_valid;
@@ -411,7 +516,8 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
 	if (attr->ia_valid & ATTR_SIZE) {
 		inode_dio_wait(inode);
 
-		if (attr->ia_size > inode->i_size) {
+		if (attr->ia_size > inode->i_size &&
+		    MSDOS_I(inode)->mmu_private <= mmu_private_ideal) {
 			error = fat_cont_expand(inode, attr->ia_size);
 			if (error || attr->ia_valid == ATTR_SIZE)
 				goto out;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 11b51bb55b42..fc6b37d66533 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -152,11 +152,65 @@ static void fat_write_failed(struct address_space *mapping, loff_t to)
 	}
 }
 
+static int fat_zero_falloc_area(struct file *file,
+				struct address_space *mapping, loff_t pos)
+{
+	struct page *page;
+	struct inode *inode = mapping->host;
+	loff_t curpos = i_size_read(inode);
+	size_t count = pos - curpos;
+	int err;
+
+	do {
+		unsigned offset;
+		size_t bytes;
+		void *fsdata;
+
+		offset = (curpos & (PAGE_CACHE_SIZE - 1));
+		bytes = PAGE_CACHE_SIZE - offset;
+		bytes = min(bytes, count);
+
+		err = pagecache_write_begin(NULL, mapping, curpos, bytes,
+					AOP_FLAG_UNINTERRUPTIBLE,
+					&page, &fsdata);
+		if (err)
+			break;
+
+		zero_user(page, offset, bytes);
+
+		err = pagecache_write_end(NULL, mapping, curpos, bytes, bytes,
+					page, fsdata);
+		if (err < 0)
+			break;
+		curpos += bytes;
+		count -= bytes;
+		err = 0;
+	} while (count);
+
+	return err;
+}
+
 static int fat_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
 {
 	int err;
+	loff_t mmu_private_ideal, mmu_private_actual;
+	loff_t size;
+	struct inode *inode = mapping->host;
+	struct super_block *sb = inode->i_sb;
+
+	size = i_size_read(inode);
+	mmu_private_actual = MSDOS_I(inode)->mmu_private;
+	mmu_private_ideal = round_up(size, sb->s_blocksize);
+	if ((mmu_private_actual > mmu_private_ideal) && (pos > size)) {
+		err = fat_zero_falloc_area(file, mapping, pos);
+		if (err) {
+			fat_msg(sb, KERN_ERR,
+				"Error (%d) zeroing fallocated area", err);
+			return err;
+		}
+	}
 
 	*pagep = NULL;
 	err = cont_write_begin(file, mapping, pos, len, flags,
diff --git a/fs/file_table.c b/fs/file_table.c
index b44e4c559786..7528930dab1c 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -311,8 +311,7 @@ void fput(struct file *file)
 				return;
 			/*
 			 * After this task has run exit_task_work(),
-			 * task_work_add() will fail.  free_ipc_ns()->
-			 * shm_destroy() can do this.  Fall through to delayed
+			 * task_work_add() will fail.  Fall through to delayed
 			 * fput to avoid leaking *file.
 			 */
 		}
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 17e6bdde96c5..2fc6779655e6 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -7126,7 +7126,7 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
 	if (end > i_size_read(inode))
 		end = i_size_read(inode);
 
-	BUG_ON(start >= end);
+	BUG_ON(start > end);
 
 	if (!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) ||
 	    !(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL) ||
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 41000f223ca4..b0415c2a887b 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -474,11 +474,6 @@ static int ocfs2_truncate_file(struct inode *inode,
 		goto bail;
 	}
 
-	/* lets handle the simple truncate cases before doing any more
-	 * cluster locking. */
-	if (new_i_size == le64_to_cpu(fe->i_size))
-		goto bail;
-
 	down_write(&OCFS2_I(inode)->ip_alloc_sem);
 
 	ocfs2_resv_discard(&osb->osb_la_resmap,
@@ -723,7 +718,8 @@ leave:
  * While a write will already be ordering the data, a truncate will not.
  * Thus, we need to explicitly order the zeroed pages.
  */
-static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode)
+static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode,
+						struct buffer_head *di_bh)
 {
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	handle_t *handle = NULL;
@@ -740,7 +736,14 @@ static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode)
 	}
 
 	ret = ocfs2_jbd2_file_inode(handle, inode);
-	if (ret < 0)
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret)
 		mlog_errno(ret);
 
 out:
@@ -756,7 +759,7 @@ out:
  * to be too fragile to do exactly what we need without us having to
  * worry about recursive locking in ->write_begin() and ->write_end(). */
 static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
-				 u64 abs_to)
+				 u64 abs_to, struct buffer_head *di_bh)
 {
 	struct address_space *mapping = inode->i_mapping;
 	struct page *page;
@@ -764,6 +767,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
 	handle_t *handle = NULL;
 	int ret = 0;
 	unsigned zero_from, zero_to, block_start, block_end;
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
 
 	BUG_ON(abs_from >= abs_to);
 	BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT));
@@ -806,7 +810,8 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
 		}
 
 		if (!handle) {
-			handle = ocfs2_zero_start_ordered_transaction(inode);
+			handle = ocfs2_zero_start_ordered_transaction(inode,
+								      di_bh);
 			if (IS_ERR(handle)) {
 				ret = PTR_ERR(handle);
 				handle = NULL;
@@ -823,8 +828,22 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
 			ret = 0;
 	}
 
-	if (handle)
+	if (handle) {
+		/*
+		 * fs-writeback will release the dirty pages without page lock
+		 * whose offset are over inode size, the release happens at
+		 * block_write_full_page_endio().
+		 */
+		i_size_write(inode, abs_to);
+		inode->i_blocks = ocfs2_inode_sector_count(inode);
+		di->i_size = cpu_to_le64((u64)i_size_read(inode));
+		inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+		di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
+		di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
+		di->i_mtime_nsec = di->i_ctime_nsec;
+		ocfs2_journal_dirty(handle, di_bh);
 		ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
+	}
 
 out_unlock:
 	unlock_page(page);
@@ -920,7 +939,7 @@ out:
  * has made sure that the entire range needs zeroing.
  */
 static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start,
-				   u64 range_end)
+				   u64 range_end, struct buffer_head *di_bh)
 {
 	int rc = 0;
 	u64 next_pos;
@@ -936,7 +955,7 @@ static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start,
 		next_pos = (zero_pos & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE;
 		if (next_pos > range_end)
 			next_pos = range_end;
-		rc = ocfs2_write_zero_page(inode, zero_pos, next_pos);
+		rc = ocfs2_write_zero_page(inode, zero_pos, next_pos, di_bh);
 		if (rc < 0) {
 			mlog_errno(rc);
 			break;
@@ -982,7 +1001,7 @@ int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
 			range_end = zero_to_size;
 
 		ret = ocfs2_zero_extend_range(inode, range_start,
-					      range_end);
+					      range_end, di_bh);
 		if (ret) {
 			mlog_errno(ret);
 			break;
@@ -1150,14 +1169,14 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
 		goto bail_unlock_rw;
 	}
 
-	if (size_change && attr->ia_size != i_size_read(inode)) {
+	if (size_change) {
 		status = inode_newsize_ok(inode, attr->ia_size);
 		if (status)
 			goto bail_unlock;
 
 		inode_dio_wait(inode);
 
-		if (i_size_read(inode) > attr->ia_size) {
+		if (i_size_read(inode) >= attr->ia_size) {
 			if (ocfs2_should_order_data(inode)) {
 				status = ocfs2_begin_ordered_truncate(inode,
 								      attr->ia_size);
@@ -2626,7 +2645,16 @@ static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int whence)
 	case SEEK_SET:
 		break;
 	case SEEK_END:
-		offset += inode->i_size;
+		/* SEEK_END requires the OCFS2 inode lock for the file
+		 * because it references the file's size.
+		 */
+		ret = ocfs2_inode_lock(inode, NULL, 0);
+		if (ret < 0) {
+			mlog_errno(ret);
+			goto out;
+		}
+		offset += i_size_read(inode);
+		ocfs2_inode_unlock(inode, 0);
 		break;
 	case SEEK_CUR:
 		if (offset == 0) {
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index be3f8676a438..cec861d9d480 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -2101,17 +2101,17 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
 		goto leave;
 	}
 
-	/* remove it from the orphan directory */
-	status = ocfs2_delete_entry(handle, orphan_dir_inode, &lookup);
+	status = ocfs2_journal_access_di(handle,
+					 INODE_CACHE(orphan_dir_inode),
+					 orphan_dir_bh,
+					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto leave;
 	}
 
-	status = ocfs2_journal_access_di(handle,
-					 INODE_CACHE(orphan_dir_inode),
-					 orphan_dir_bh,
-					 OCFS2_JOURNAL_ACCESS_WRITE);
+	/* remove it from the orphan directory */
+	status = ocfs2_delete_entry(handle, orphan_dir_inode, &lookup);
 	if (status < 0) {
 		mlog_errno(status);
 		goto leave;
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 998b17eda09d..9f6b96a09615 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2965,6 +2965,11 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
 			to = map_end & (PAGE_CACHE_SIZE - 1);
 
 		page = find_or_create_page(mapping, page_index, GFP_NOFS);
+		if (!page) {
+			ret = -ENOMEM;
+			mlog_errno(ret);
+			break;
+		}
 
 		/*
 		 * In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 5aa847a603c0..59d85d608898 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -132,13 +132,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 		K(i.freeswap),
 		K(global_page_state(NR_FILE_DIRTY)),
 		K(global_page_state(NR_WRITEBACK)),
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-		K(global_page_state(NR_ANON_PAGES)
-		  + global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) *
-		  HPAGE_PMD_NR),
-#else
 		K(global_page_state(NR_ANON_PAGES)),
-#endif
 		K(global_page_state(NR_FILE_MAPPED)),
 		K(global_page_state(NR_SHMEM)),
 		K(global_page_state(NR_SLAB_RECLAIMABLE) +
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 05bcc0903766..ecc8e01f492a 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -94,6 +94,11 @@
  */
 #define in_nmi()	(preempt_count() & NMI_MASK)
 
+/*
+ * Are we in nmi,irq context, or softirq context?
+ */
+#define in_serving_irq() (in_nmi() || in_irq() || in_serving_softirq())
+
 #if defined(CONFIG_PREEMPT_COUNT)
 # define PREEMPT_CHECK_OFFSET 1
 #else
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index b60de92e2edc..3935428c57cf 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -96,9 +96,6 @@ extern int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 			  pmd_t *dst_pmd, pmd_t *src_pmd,
 			  struct vm_area_struct *vma,
 			  unsigned long addr, unsigned long end);
-extern int handle_pte_fault(struct mm_struct *mm,
-			    struct vm_area_struct *vma, unsigned long address,
-			    pte_t *pte, pmd_t *pmd, unsigned int flags);
 extern int split_huge_page_to_list(struct page *page, struct list_head *list);
 static inline int split_huge_page(struct page *page)
 {
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 5fa5afeeb759..1a311e08b5ef 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -344,16 +344,6 @@ static inline void enable_irq_lockdep_irqrestore(unsigned int irq, unsigned long
 /* IRQ wakeup (PM) control: */
 extern int irq_set_irq_wake(unsigned int irq, unsigned int on);
 
-static inline int enable_irq_wake(unsigned int irq)
-{
-	return irq_set_irq_wake(irq, 1);
-}
-
-static inline int disable_irq_wake(unsigned int irq)
-{
-	return irq_set_irq_wake(irq, 0);
-}
-
 #else /* !CONFIG_GENERIC_HARDIRQS */
 /*
  * NOTE: non-genirq architectures, if they want to support the lock
@@ -370,16 +360,23 @@ static inline int disable_irq_wake(unsigned int irq)
 						enable_irq(irq)
 # endif
 
-static inline int enable_irq_wake(unsigned int irq)
+/* IRQ wakeup (PM) control: */
+static inline int irq_set_irq_wake(unsigned int irq, unsigned int on)
 {
 	return 0;
 }
 
+#endif /* CONFIG_GENERIC_HARDIRQS */
+
+static inline int enable_irq_wake(unsigned int irq)
+{
+	return irq_set_irq_wake(irq, 1);
+}
+
 static inline int disable_irq_wake(unsigned int irq)
 {
-	return 0;
+	return irq_set_irq_wake(irq, 0);
 }
-#endif /* CONFIG_GENERIC_HARDIRQS */
 
 
 #ifdef CONFIG_IRQ_FORCED_THREADING
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index c4d870b0d5e6..19c19a5eee29 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -22,7 +22,7 @@ struct ipc_ids {
 	int in_use;
 	unsigned short seq;
 	unsigned short seq_max;
-	struct rw_semaphore rw_mutex;
+	struct rw_semaphore rwsem;
 	struct idr ipcs_idr;
 	int next_id;
 };
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 0d7df39a5885..b2f897789838 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -91,7 +91,6 @@ static inline struct mempolicy *mpol_dup(struct mempolicy *pol)
 }
 
 #define vma_policy(vma) ((vma)->vm_policy)
-#define vma_set_policy(vma, pol) ((vma)->vm_policy = (pol))
 
 static inline void mpol_get(struct mempolicy *pol)
 {
@@ -126,6 +125,7 @@ struct shared_policy {
 	spinlock_t lock;
 };
 
+int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst);
 void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol);
 int mpol_set_shared_policy(struct shared_policy *info,
 				struct vm_area_struct *vma,
@@ -240,7 +240,12 @@ mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx)
 }
 
 #define vma_policy(vma) NULL
-#define vma_set_policy(vma, pol) do {} while(0)
+
+static inline int
+vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst)
+{
+	return 0;
+}
 
 static inline void numa_policy_init(void)
 {
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f0224608d15e..d5c82dc33805 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -884,11 +884,12 @@ static inline int page_mapped(struct page *page)
 #define VM_FAULT_NOPAGE	0x0100	/* ->fault installed the pte, not return page */
 #define VM_FAULT_LOCKED	0x0200	/* ->fault locked the returned page */
 #define VM_FAULT_RETRY	0x0400	/* ->fault blocked, must retry */
+#define VM_FAULT_FALLBACK 0x0800	/* huge page fault failed, fall back to small */
 
 #define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */
 
 #define VM_FAULT_ERROR	(VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | \
-			 VM_FAULT_HWPOISON_LARGE)
+			 VM_FAULT_FALLBACK | VM_FAULT_HWPOISON_LARGE)
 
 /* Encode hstate index for a hwpoisoned large page */
 #define VM_FAULT_SET_HINDEX(x) ((x) << 12)
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 6d53675c2b54..f1a5b5937be4 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -228,9 +228,9 @@ PAGEFLAG(OwnerPriv1, owner_priv_1) TESTCLEARFLAG(OwnerPriv1, owner_priv_1)
 TESTPAGEFLAG(Writeback, writeback) TESTSCFLAG(Writeback, writeback)
 PAGEFLAG(MappedToDisk, mappedtodisk)
 
-/* PG_readahead is only used for file reads; PG_reclaim is only for writes */
+/* PG_readahead is only used for reads; PG_reclaim is only for writes */
 PAGEFLAG(Reclaim, reclaim) TESTCLEARFLAG(Reclaim, reclaim)
-PAGEFLAG(Readahead, reclaim)		/* Reminder to do async read-ahead */
+PAGEFLAG(Readahead, reclaim) TESTCLEARFLAG(Readahead, reclaim)
 
 #ifdef CONFIG_HIGHMEM
 /*
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 50d04b92ceda..fc09d217d870 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2167,15 +2167,15 @@ static inline bool thread_group_leader(struct task_struct *p)
  * all we care about is that we have a task with the appropriate
  * pid, we don't actually care if we have the right task.
  */
-static inline int has_group_leader_pid(struct task_struct *p)
+static inline bool has_group_leader_pid(struct task_struct *p)
 {
-	return p->pid == p->tgid;
+	return task_pid(p) == p->signal->leader_pid;
 }
 
 static inline
-int same_thread_group(struct task_struct *p1, struct task_struct *p2)
+bool same_thread_group(struct task_struct *p1, struct task_struct *p2)
 {
-	return p1->tgid == p2->tgid;
+	return p1->signal == p2->signal;
 }
 
 static inline struct task_struct *next_thread(const struct task_struct *p)
diff --git a/include/linux/smp.h b/include/linux/smp.h
index c181399f2c20..c8488763277f 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -11,6 +11,7 @@
 #include <linux/list.h>
 #include <linux/cpumask.h>
 #include <linux/init.h>
+#include <linux/irqflags.h>
 
 extern void cpu_idle(void);
 
@@ -139,14 +140,17 @@ static inline int up_smp_call_function(smp_call_func_t func, void *info)
 }
 #define smp_call_function(func, info, wait) \
 			(up_smp_call_function(func, info))
-#define on_each_cpu(func, info, wait)		\
-	({					\
-		unsigned long __flags;		\
-		local_irq_save(__flags);	\
-		func(info);			\
-		local_irq_restore(__flags);	\
-		0;				\
-	})
+
+static inline int on_each_cpu(smp_call_func_t func, void *info, int wait)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	func(info);
+	local_irq_restore(flags);
+	return 0;
+}
+
 /*
  * Note we still need to test the mask even for UP
  * because we actually can get an empty mask from
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index bd6cf61142be..dc2cdf07ac14 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -70,6 +70,11 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		THP_ZERO_PAGE_ALLOC,
 		THP_ZERO_PAGE_ALLOC_FAILED,
 #endif
+		NR_TLB_REMOTE_FLUSH,	/* cpu tried to flush others' tlbs */
+		NR_TLB_REMOTE_FLUSH_RECEIVED,/* cpu received ipi for flush */
+		NR_TLB_LOCAL_FLUSH_ALL,
+		NR_TLB_LOCAL_FLUSH_ONE,
+		NR_TLB_LOCAL_FLUSH_ONE_KERNEL,
 		NR_VM_EVENT_ITEMS
 };
 
diff --git a/ipc/msg.c b/ipc/msg.c
index bd60d7e159e8..14d64f8023f2 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -70,8 +70,6 @@ struct msg_sender {
 
 #define msg_ids(ns)	((ns)->ids[IPC_MSG_IDS])
 
-#define msg_unlock(msq)		ipc_unlock(&(msq)->q_perm)
-
 static void freeque(struct ipc_namespace *, struct kern_ipc_perm *);
 static int newque(struct ipc_namespace *, struct ipc_params *);
 #ifdef CONFIG_PROC_FS
@@ -172,7 +170,7 @@ static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s)
  * @ns: namespace
  * @params: ptr to the structure that contains the key and msgflg
  *
- * Called with msg_ids.rw_mutex held (writer)
+ * Called with msg_ids.rwsem held (writer)
  */
 static int newque(struct ipc_namespace *ns, struct ipc_params *params)
 {
@@ -259,8 +257,8 @@ static void expunge_all(struct msg_queue *msq, int res)
  * removes the message queue from message queue ID IDR, and cleans up all the
  * messages associated with this queue.
  *
- * msg_ids.rw_mutex (writer) and the spinlock for this message queue are held
- * before freeque() is called. msg_ids.rw_mutex remains locked on exit.
+ * msg_ids.rwsem (writer) and the spinlock for this message queue are held
+ * before freeque() is called. msg_ids.rwsem remains locked on exit.
  */
 static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 {
@@ -270,7 +268,8 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 	expunge_all(msq, -EIDRM);
 	ss_wakeup(&msq->q_senders, 1);
 	msg_rmid(ns, msq);
-	msg_unlock(msq);
+	ipc_unlock_object(&msq->q_perm);
+	rcu_read_unlock();
 
 	list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) {
 		atomic_dec(&ns->msg_hdrs);
@@ -282,7 +281,7 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 }
 
 /*
- * Called with msg_ids.rw_mutex and ipcp locked.
+ * Called with msg_ids.rwsem and ipcp locked.
  */
 static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg)
 {
@@ -386,9 +385,9 @@ copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version)
 }
 
 /*
- * This function handles some msgctl commands which require the rw_mutex
+ * This function handles some msgctl commands which require the rwsem
  * to be held in write mode.
- * NOTE: no locks must be held, the rw_mutex is taken inside this function.
+ * NOTE: no locks must be held, the rwsem is taken inside this function.
  */
 static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
 		       struct msqid_ds __user *buf, int version)
@@ -403,7 +402,7 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
 			return -EFAULT;
 	}
 
-	down_write(&msg_ids(ns).rw_mutex);
+	down_write(&msg_ids(ns).rwsem);
 	rcu_read_lock();
 
 	ipcp = ipcctl_pre_down_nolock(ns, &msg_ids(ns), msqid, cmd,
@@ -459,7 +458,7 @@ out_unlock0:
 out_unlock1:
 	rcu_read_unlock();
 out_up:
-	up_write(&msg_ids(ns).rw_mutex);
+	up_write(&msg_ids(ns).rwsem);
 	return err;
 }
 
@@ -494,7 +493,7 @@ static int msgctl_nolock(struct ipc_namespace *ns, int msqid,
 		msginfo.msgmnb = ns->msg_ctlmnb;
 		msginfo.msgssz = MSGSSZ;
 		msginfo.msgseg = MSGSEG;
-		down_read(&msg_ids(ns).rw_mutex);
+		down_read(&msg_ids(ns).rwsem);
 		if (cmd == MSG_INFO) {
 			msginfo.msgpool = msg_ids(ns).in_use;
 			msginfo.msgmap = atomic_read(&ns->msg_hdrs);
@@ -505,7 +504,7 @@ static int msgctl_nolock(struct ipc_namespace *ns, int msqid,
 			msginfo.msgtql = MSGTQL;
 		}
 		max_id = ipc_get_maxid(&msg_ids(ns));
-		up_read(&msg_ids(ns).rw_mutex);
+		up_read(&msg_ids(ns).rwsem);
 		if (copy_to_user(buf, &msginfo, sizeof(struct msginfo)))
 			return -EFAULT;
 		return (max_id < 0) ? 0 : max_id;
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 7ee61bf44933..67dc744974c6 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -81,7 +81,7 @@ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
 	int next_id;
 	int total, in_use;
 
-	down_write(&ids->rw_mutex);
+	down_write(&ids->rwsem);
 
 	in_use = ids->in_use;
 
@@ -93,7 +93,7 @@ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
 		free(ns, perm);
 		total++;
 	}
-	up_write(&ids->rw_mutex);
+	up_write(&ids->rwsem);
 }
 
 static void free_ipc_ns(struct ipc_namespace *ns)
diff --git a/ipc/sem.c b/ipc/sem.c
index 41088899783d..69b6a21f3844 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -322,7 +322,7 @@ static inline void sem_unlock(struct sem_array *sma, int locknum)
 }
 
 /*
- * sem_lock_(check_) routines are called in the paths where the rw_mutex
+ * sem_lock_(check_) routines are called in the paths where the rwsem
  * is not held.
  *
  * The caller holds the RCU read lock.
@@ -426,7 +426,7 @@ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
  * @ns: namespace
  * @params: ptr to the structure that contains key, semflg and nsems
  *
- * Called with sem_ids.rw_mutex held (as a writer)
+ * Called with sem_ids.rwsem held (as a writer)
  */
 
 static int newary(struct ipc_namespace *ns, struct ipc_params *params)
@@ -492,7 +492,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 
 
 /*
- * Called with sem_ids.rw_mutex and ipcp locked.
+ * Called with sem_ids.rwsem and ipcp locked.
  */
 static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg)
 {
@@ -503,7 +503,7 @@ static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg)
 }
 
 /*
- * Called with sem_ids.rw_mutex and ipcp locked.
+ * Called with sem_ids.rwsem and ipcp locked.
  */
 static inline int sem_more_checks(struct kern_ipc_perm *ipcp,
 				struct ipc_params *params)
@@ -994,8 +994,8 @@ static int count_semzcnt (struct sem_array * sma, ushort semnum)
 	return semzcnt;
 }
 
-/* Free a semaphore set. freeary() is called with sem_ids.rw_mutex locked
- * as a writer and the spinlock for this semaphore set hold. sem_ids.rw_mutex
+/* Free a semaphore set. freeary() is called with sem_ids.rwsem locked
+ * as a writer and the spinlock for this semaphore set hold. sem_ids.rwsem
  * remains locked on exit.
  */
 static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
@@ -1116,7 +1116,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid,
 		seminfo.semmnu = SEMMNU;
 		seminfo.semmap = SEMMAP;
 		seminfo.semume = SEMUME;
-		down_read(&sem_ids(ns).rw_mutex);
+		down_read(&sem_ids(ns).rwsem);
 		if (cmd == SEM_INFO) {
 			seminfo.semusz = sem_ids(ns).in_use;
 			seminfo.semaem = ns->used_sems;
@@ -1125,7 +1125,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid,
 			seminfo.semaem = SEMAEM;
 		}
 		max_id = ipc_get_maxid(&sem_ids(ns));
-		up_read(&sem_ids(ns).rw_mutex);
+		up_read(&sem_ids(ns).rwsem);
 		if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) 
 			return -EFAULT;
 		return (max_id < 0) ? 0: max_id;
@@ -1431,9 +1431,9 @@ copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version)
 }
 
 /*
- * This function handles some semctl commands which require the rw_mutex
+ * This function handles some semctl commands which require the rwsem
  * to be held in write mode.
- * NOTE: no locks must be held, the rw_mutex is taken inside this function.
+ * NOTE: no locks must be held, the rwsem is taken inside this function.
  */
 static int semctl_down(struct ipc_namespace *ns, int semid,
 		       int cmd, int version, void __user *p)
@@ -1448,7 +1448,7 @@ static int semctl_down(struct ipc_namespace *ns, int semid,
 			return -EFAULT;
 	}
 
-	down_write(&sem_ids(ns).rw_mutex);
+	down_write(&sem_ids(ns).rwsem);
 	rcu_read_lock();
 
 	ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd,
@@ -1487,7 +1487,7 @@ out_unlock0:
 out_unlock1:
 	rcu_read_unlock();
 out_up:
-	up_write(&sem_ids(ns).rw_mutex);
+	up_write(&sem_ids(ns).rwsem);
 	return err;
 }
 
diff --git a/ipc/shm.c b/ipc/shm.c
index c6b4ad5ce3b7..59f2194481ce 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -19,6 +19,9 @@
  * namespaces support
  * OpenVZ, SWsoft Inc.
  * Pavel Emelianov <xemul@openvz.org>
+ *
+ * Better ipc lock (kern_ipc_perm.lock) handling
+ * Davidlohr Bueso <davidlohr.bueso@hp.com>, June 2013.
  */
 
 #include <linux/slab.h>
@@ -80,8 +83,8 @@ void shm_init_ns(struct ipc_namespace *ns)
 }
 
 /*
- * Called with shm_ids.rw_mutex (writer) and the shp structure locked.
- * Only shm_ids.rw_mutex remains locked on exit.
+ * Called with shm_ids.rwsem (writer) and the shp structure locked.
+ * Only shm_ids.rwsem remains locked on exit.
  */
 static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 {
@@ -124,8 +127,28 @@ void __init shm_init (void)
 				IPC_SHM_IDS, sysvipc_shm_proc_show);
 }
 
+static inline struct shmid_kernel *shm_obtain_object(struct ipc_namespace *ns, int id)
+{
+	struct kern_ipc_perm *ipcp = ipc_obtain_object(&shm_ids(ns), id);
+
+	if (IS_ERR(ipcp))
+		return ERR_CAST(ipcp);
+
+	return container_of(ipcp, struct shmid_kernel, shm_perm);
+}
+
+static inline struct shmid_kernel *shm_obtain_object_check(struct ipc_namespace *ns, int id)
+{
+	struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&shm_ids(ns), id);
+
+	if (IS_ERR(ipcp))
+		return ERR_CAST(ipcp);
+
+	return container_of(ipcp, struct shmid_kernel, shm_perm);
+}
+
 /*
- * shm_lock_(check_) routines are called in the paths where the rw_mutex
+ * shm_lock_(check_) routines are called in the paths where the rwsem
  * is not necessarily held.
  */
 static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
@@ -182,7 +205,7 @@ static void shm_open(struct vm_area_struct *vma)
  * @ns: namespace
  * @shp: struct to free
  *
- * It has to be called with shp and shm_ids.rw_mutex (writer) locked,
+ * It has to be called with shp and shm_ids.rwsem (writer) locked,
  * but returns with shp unlocked and freed.
  */
 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
@@ -230,7 +253,7 @@ static void shm_close(struct vm_area_struct *vma)
 	struct shmid_kernel *shp;
 	struct ipc_namespace *ns = sfd->ns;
 
-	down_write(&shm_ids(ns).rw_mutex);
+	down_write(&shm_ids(ns).rwsem);
 	/* remove from the list of attaches of the shm segment */
 	shp = shm_lock(ns, sfd->id);
 	BUG_ON(IS_ERR(shp));
@@ -241,10 +264,10 @@ static void shm_close(struct vm_area_struct *vma)
 		shm_destroy(ns, shp);
 	else
 		shm_unlock(shp);
-	up_write(&shm_ids(ns).rw_mutex);
+	up_write(&shm_ids(ns).rwsem);
 }
 
-/* Called with ns->shm_ids(ns).rw_mutex locked */
+/* Called with ns->shm_ids(ns).rwsem locked */
 static int shm_try_destroy_current(int id, void *p, void *data)
 {
 	struct ipc_namespace *ns = data;
@@ -275,7 +298,7 @@ static int shm_try_destroy_current(int id, void *p, void *data)
 	return 0;
 }
 
-/* Called with ns->shm_ids(ns).rw_mutex locked */
+/* Called with ns->shm_ids(ns).rwsem locked */
 static int shm_try_destroy_orphaned(int id, void *p, void *data)
 {
 	struct ipc_namespace *ns = data;
@@ -286,7 +309,7 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data)
 	 * We want to destroy segments without users and with already
 	 * exit'ed originating process.
 	 *
-	 * As shp->* are changed under rw_mutex, it's safe to skip shp locking.
+	 * As shp->* are changed under rwsem, it's safe to skip shp locking.
 	 */
 	if (shp->shm_creator != NULL)
 		return 0;
@@ -300,10 +323,10 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data)
 
 void shm_destroy_orphaned(struct ipc_namespace *ns)
 {
-	down_write(&shm_ids(ns).rw_mutex);
+	down_write(&shm_ids(ns).rwsem);
 	if (shm_ids(ns).in_use)
 		idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns);
-	up_write(&shm_ids(ns).rw_mutex);
+	up_write(&shm_ids(ns).rwsem);
 }
 
 
@@ -315,10 +338,10 @@ void exit_shm(struct task_struct *task)
 		return;
 
 	/* Destroy all already created segments, but not mapped yet */
-	down_write(&shm_ids(ns).rw_mutex);
+	down_write(&shm_ids(ns).rwsem);
 	if (shm_ids(ns).in_use)
 		idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns);
-	up_write(&shm_ids(ns).rw_mutex);
+	up_write(&shm_ids(ns).rwsem);
 }
 
 static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -452,7 +475,7 @@ static const struct vm_operations_struct shm_vm_ops = {
  * @ns: namespace
  * @params: ptr to the structure that contains key, size and shmflg
  *
- * Called with shm_ids.rw_mutex held as a writer.
+ * Called with shm_ids.rwsem held as a writer.
  */
 
 static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
@@ -560,7 +583,7 @@ no_file:
 }
 
 /*
- * Called with shm_ids.rw_mutex and ipcp locked.
+ * Called with shm_ids.rwsem and ipcp locked.
  */
 static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg)
 {
@@ -571,7 +594,7 @@ static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg)
 }
 
 /*
- * Called with shm_ids.rw_mutex and ipcp locked.
+ * Called with shm_ids.rwsem and ipcp locked.
  */
 static inline int shm_more_checks(struct kern_ipc_perm *ipcp,
 				struct ipc_params *params)
@@ -684,7 +707,7 @@ static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminf
 
 /*
  * Calculate and add used RSS and swap pages of a shm.
- * Called with shm_ids.rw_mutex held as a reader
+ * Called with shm_ids.rwsem held as a reader
  */
 static void shm_add_rss_swap(struct shmid_kernel *shp,
 	unsigned long *rss_add, unsigned long *swp_add)
@@ -711,7 +734,7 @@ static void shm_add_rss_swap(struct shmid_kernel *shp,
 }
 
 /*
- * Called with shm_ids.rw_mutex held as a reader
+ * Called with shm_ids.rwsem held as a reader
  */
 static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
 		unsigned long *swp)
@@ -740,9 +763,9 @@ static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
 }
 
 /*
- * This function handles some shmctl commands which require the rw_mutex
+ * This function handles some shmctl commands which require the rwsem
  * to be held in write mode.
- * NOTE: no locks must be held, the rw_mutex is taken inside this function.
+ * NOTE: no locks must be held, the rwsem is taken inside this function.
  */
 static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
 		       struct shmid_ds __user *buf, int version)
@@ -757,14 +780,13 @@ static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
 			return -EFAULT;
 	}
 
-	down_write(&shm_ids(ns).rw_mutex);
+	down_write(&shm_ids(ns).rwsem);
 	rcu_read_lock();
 
-	ipcp = ipcctl_pre_down(ns, &shm_ids(ns), shmid, cmd,
-			       &shmid64.shm_perm, 0);
+	ipcp = ipcctl_pre_down_nolock(ns, &shm_ids(ns), shmid, cmd,
+				      &shmid64.shm_perm, 0);
 	if (IS_ERR(ipcp)) {
 		err = PTR_ERR(ipcp);
-		/* the ipc lock is not held upon failure */
 		goto out_unlock1;
 	}
 
@@ -772,14 +794,16 @@ static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
 
 	err = security_shm_shmctl(shp, cmd);
 	if (err)
-		goto out_unlock0;
+		goto out_unlock1;
 
 	switch (cmd) {
 	case IPC_RMID:
+		ipc_lock_object(&shp->shm_perm);
 		/* do_shm_rmid unlocks the ipc object and rcu */
 		do_shm_rmid(ns, ipcp);
 		goto out_up;
 	case IPC_SET:
+		ipc_lock_object(&shp->shm_perm);
 		err = ipc_update_perm(&shmid64.shm_perm, ipcp);
 		if (err)
 			goto out_unlock0;
@@ -787,6 +811,7 @@ static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
 		break;
 	default:
 		err = -EINVAL;
+		goto out_unlock1;
 	}
 
 out_unlock0:
@@ -794,33 +819,28 @@ out_unlock0:
 out_unlock1:
 	rcu_read_unlock();
 out_up:
-	up_write(&shm_ids(ns).rw_mutex);
+	up_write(&shm_ids(ns).rwsem);
 	return err;
 }
 
-SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
+static int shmctl_nolock(struct ipc_namespace *ns, int shmid,
+			 int cmd, int version, void __user *buf)
 {
+	int err;
 	struct shmid_kernel *shp;
-	int err, version;
-	struct ipc_namespace *ns;
 
-	if (cmd < 0 || shmid < 0) {
-		err = -EINVAL;
-		goto out;
+	/* preliminary security checks for *_INFO */
+	if (cmd == IPC_INFO || cmd == SHM_INFO) {
+		err = security_shm_shmctl(NULL, cmd);
+		if (err)
+			return err;
 	}
 
-	version = ipc_parse_version(&cmd);
-	ns = current->nsproxy->ipc_ns;
-
-	switch (cmd) { /* replace with proc interface ? */
+	switch (cmd) {
 	case IPC_INFO:
 	{
 		struct shminfo64 shminfo;
 
-		err = security_shm_shmctl(NULL, cmd);
-		if (err)
-			return err;
-
 		memset(&shminfo, 0, sizeof(shminfo));
 		shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni;
 		shminfo.shmmax = ns->shm_ctlmax;
@@ -830,9 +850,9 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
 		if(copy_shminfo_to_user (buf, &shminfo, version))
 			return -EFAULT;
 
-		down_read(&shm_ids(ns).rw_mutex);
+		down_read(&shm_ids(ns).rwsem);
 		err = ipc_get_maxid(&shm_ids(ns));
-		up_read(&shm_ids(ns).rw_mutex);
+		up_read(&shm_ids(ns).rwsem);
 
 		if(err<0)
 			err = 0;
@@ -842,19 +862,15 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
 	{
 		struct shm_info shm_info;
 
-		err = security_shm_shmctl(NULL, cmd);
-		if (err)
-			return err;
-
 		memset(&shm_info, 0, sizeof(shm_info));
-		down_read(&shm_ids(ns).rw_mutex);
+		down_read(&shm_ids(ns).rwsem);
 		shm_info.used_ids = shm_ids(ns).in_use;
 		shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp);
 		shm_info.shm_tot = ns->shm_tot;
 		shm_info.swap_attempts = 0;
 		shm_info.swap_successes = 0;
 		err = ipc_get_maxid(&shm_ids(ns));
-		up_read(&shm_ids(ns).rw_mutex);
+		up_read(&shm_ids(ns).rwsem);
 		if (copy_to_user(buf, &shm_info, sizeof(shm_info))) {
 			err = -EFAULT;
 			goto out;
@@ -869,27 +885,31 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
 		struct shmid64_ds tbuf;
 		int result;
 
+		rcu_read_lock();
 		if (cmd == SHM_STAT) {
-			shp = shm_lock(ns, shmid);
+			shp = shm_obtain_object(ns, shmid);
 			if (IS_ERR(shp)) {
 				err = PTR_ERR(shp);
-				goto out;
+				goto out_unlock;
 			}
 			result = shp->shm_perm.id;
 		} else {
-			shp = shm_lock_check(ns, shmid);
+			shp = shm_obtain_object_check(ns, shmid);
 			if (IS_ERR(shp)) {
 				err = PTR_ERR(shp);
-				goto out;
+				goto out_unlock;
 			}
 			result = 0;
 		}
+
 		err = -EACCES;
 		if (ipcperms(ns, &shp->shm_perm, S_IRUGO))
 			goto out_unlock;
+
 		err = security_shm_shmctl(shp, cmd);
 		if (err)
 			goto out_unlock;
+
 		memset(&tbuf, 0, sizeof(tbuf));
 		kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm);
 		tbuf.shm_segsz	= shp->shm_segsz;
@@ -899,43 +919,76 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
 		tbuf.shm_cpid	= shp->shm_cprid;
 		tbuf.shm_lpid	= shp->shm_lprid;
 		tbuf.shm_nattch	= shp->shm_nattch;
-		shm_unlock(shp);
-		if(copy_shmid_to_user (buf, &tbuf, version))
+		rcu_read_unlock();
+
+		if (copy_shmid_to_user(buf, &tbuf, version))
 			err = -EFAULT;
 		else
 			err = result;
 		goto out;
 	}
+	default:
+		return -EINVAL;
+	}
+
+out_unlock:
+	rcu_read_unlock();
+out:
+	return err;
+}
+
+SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
+{
+	struct shmid_kernel *shp;
+	int err, version;
+	struct ipc_namespace *ns;
+
+	if (cmd < 0 || shmid < 0)
+		return -EINVAL;
+
+	version = ipc_parse_version(&cmd);
+	ns = current->nsproxy->ipc_ns;
+
+	switch (cmd) {
+	case IPC_INFO:
+	case SHM_INFO:
+	case SHM_STAT:
+	case IPC_STAT:
+		return shmctl_nolock(ns, shmid, cmd, version, buf);
+	case IPC_RMID:
+	case IPC_SET:
+		return shmctl_down(ns, shmid, cmd, buf, version);
 	case SHM_LOCK:
 	case SHM_UNLOCK:
 	{
 		struct file *shm_file;
 
-		shp = shm_lock_check(ns, shmid);
+		rcu_read_lock();
+		shp = shm_obtain_object_check(ns, shmid);
 		if (IS_ERR(shp)) {
 			err = PTR_ERR(shp);
-			goto out;
+			goto out_unlock1;
 		}
 
 		audit_ipc_obj(&(shp->shm_perm));
+		err = security_shm_shmctl(shp, cmd);
+		if (err)
+			goto out_unlock1;
 
+		ipc_lock_object(&shp->shm_perm);
 		if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) {
 			kuid_t euid = current_euid();
 			err = -EPERM;
 			if (!uid_eq(euid, shp->shm_perm.uid) &&
 			    !uid_eq(euid, shp->shm_perm.cuid))
-				goto out_unlock;
+				goto out_unlock0;
 			if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK))
-				goto out_unlock;
+				goto out_unlock0;
 		}
 
-		err = security_shm_shmctl(shp, cmd);
-		if (err)
-			goto out_unlock;
-
 		shm_file = shp->shm_file;
 		if (is_file_hugepages(shm_file))
-			goto out_unlock;
+			goto out_unlock0;
 
 		if (cmd == SHM_LOCK) {
 			struct user_struct *user = current_user();
@@ -944,32 +997,31 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
 				shp->shm_perm.mode |= SHM_LOCKED;
 				shp->mlock_user = user;
 			}
-			goto out_unlock;
+			goto out_unlock0;
 		}
 
 		/* SHM_UNLOCK */
 		if (!(shp->shm_perm.mode & SHM_LOCKED))
-			goto out_unlock;
+			goto out_unlock0;
 		shmem_lock(shm_file, 0, shp->mlock_user);
 		shp->shm_perm.mode &= ~SHM_LOCKED;
 		shp->mlock_user = NULL;
 		get_file(shm_file);
-		shm_unlock(shp);
+		ipc_unlock_object(&shp->shm_perm);
+		rcu_read_unlock();
 		shmem_unlock_mapping(shm_file->f_mapping);
+
 		fput(shm_file);
-		goto out;
-	}
-	case IPC_RMID:
-	case IPC_SET:
-		err = shmctl_down(ns, shmid, cmd, buf, version);
 		return err;
+	}
 	default:
 		return -EINVAL;
 	}
 
-out_unlock:
-	shm_unlock(shp);
-out:
+out_unlock0:
+	ipc_unlock_object(&shp->shm_perm);
+out_unlock1:
+	rcu_read_unlock();
 	return err;
 }
 
@@ -1037,7 +1089,8 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
 	 * additional creator id...
 	 */
 	ns = current->nsproxy->ipc_ns;
-	shp = shm_lock_check(ns, shmid);
+	rcu_read_lock();
+	shp = shm_obtain_object_check(ns, shmid);
 	if (IS_ERR(shp)) {
 		err = PTR_ERR(shp);
 		goto out;
@@ -1051,24 +1104,31 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
 	if (err)
 		goto out_unlock;
 
+	ipc_lock_object(&shp->shm_perm);
 	path = shp->shm_file->f_path;
 	path_get(&path);
 	shp->shm_nattch++;
 	size = i_size_read(path.dentry->d_inode);
-	shm_unlock(shp);
+	ipc_unlock_object(&shp->shm_perm);
+	rcu_read_unlock();
 
 	err = -ENOMEM;
 	sfd = kzalloc(sizeof(*sfd), GFP_KERNEL);
-	if (!sfd)
-		goto out_put_dentry;
+	if (!sfd) {
+		path_put(&path);
+		goto out_nattch;
+	}
 
 	file = alloc_file(&path, f_mode,
 			  is_file_hugepages(shp->shm_file) ?
 				&shm_file_operations_huge :
 				&shm_file_operations);
 	err = PTR_ERR(file);
-	if (IS_ERR(file))
-		goto out_free;
+	if (IS_ERR(file)) {
+		kfree(sfd);
+		path_put(&path);
+		goto out_nattch;
+	}
 
 	file->private_data = sfd;
 	file->f_mapping = shp->shm_file->f_mapping;
@@ -1094,7 +1154,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
 		    addr > current->mm->start_stack - size - PAGE_SIZE * 5)
 			goto invalid;
 	}
-		
+
 	addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate);
 	*raddr = addr;
 	err = 0;
@@ -1109,7 +1169,7 @@ out_fput:
 	fput(file);
 
 out_nattch:
-	down_write(&shm_ids(ns).rw_mutex);
+	down_write(&shm_ids(ns).rwsem);
 	shp = shm_lock(ns, shmid);
 	BUG_ON(IS_ERR(shp));
 	shp->shm_nattch--;
@@ -1117,20 +1177,13 @@ out_nattch:
 		shm_destroy(ns, shp);
 	else
 		shm_unlock(shp);
-	up_write(&shm_ids(ns).rw_mutex);
-
-out:
+	up_write(&shm_ids(ns).rwsem);
 	return err;
 
 out_unlock:
-	shm_unlock(shp);
-	goto out;
-
-out_free:
-	kfree(sfd);
-out_put_dentry:
-	path_put(&path);
-	goto out_nattch;
+	rcu_read_unlock();
+out:
+	return err;
 }
 
 SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg)
diff --git a/ipc/util.c b/ipc/util.c
index 4704223bfad4..1ddadcf9a2ab 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -15,6 +15,14 @@
  * Jun 2006 - namespaces ssupport
  *            OpenVZ, SWsoft Inc.
  *            Pavel Emelianov <xemul@openvz.org>
+ *
+ * General sysv ipc locking scheme:
+ *  when doing ipc id lookups, take the ids->rwsem
+ *      rcu_read_lock()
+ *          obtain the ipc object (kern_ipc_perm)
+ *          perform security, capabilities, auditing and permission checks, etc.
+ *          acquire the ipc lock (kern_ipc_perm.lock) throught ipc_lock_object()
+ *             perform data updates (ie: SET, RMID, LOCK/UNLOCK commands)
  */
 
 #include <linux/mm.h>
@@ -119,7 +127,7 @@ __initcall(ipc_init);
  
 void ipc_init_ids(struct ipc_ids *ids)
 {
-	init_rwsem(&ids->rw_mutex);
+	init_rwsem(&ids->rwsem);
 
 	ids->in_use = 0;
 	ids->seq = 0;
@@ -174,7 +182,7 @@ void __init ipc_init_proc_interface(const char *path, const char *header,
  *	@ids: Identifier set
  *	@key: The key to find
  *	
- *	Requires ipc_ids.rw_mutex locked.
+ *	Requires ipc_ids.rwsem locked.
  *	Returns the LOCKED pointer to the ipc structure if found or NULL
  *	if not.
  *	If key is found ipc points to the owning ipc structure
@@ -208,7 +216,7 @@ static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key)
  *	ipc_get_maxid 	-	get the last assigned id
  *	@ids: IPC identifier set
  *
- *	Called with ipc_ids.rw_mutex held.
+ *	Called with ipc_ids.rwsem held.
  */
 
 int ipc_get_maxid(struct ipc_ids *ids)
@@ -246,7 +254,7 @@ int ipc_get_maxid(struct ipc_ids *ids)
  *	is returned. The 'new' entry is returned in a locked state on success.
  *	On failure the entry is not locked and a negative err-code is returned.
  *
- *	Called with writer ipc_ids.rw_mutex held.
+ *	Called with writer ipc_ids.rwsem held.
  */
 int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size)
 {
@@ -312,9 +320,9 @@ static int ipcget_new(struct ipc_namespace *ns, struct ipc_ids *ids,
 {
 	int err;
 
-	down_write(&ids->rw_mutex);
+	down_write(&ids->rwsem);
 	err = ops->getnew(ns, params);
-	up_write(&ids->rw_mutex);
+	up_write(&ids->rwsem);
 	return err;
 }
 
@@ -331,7 +339,7 @@ static int ipcget_new(struct ipc_namespace *ns, struct ipc_ids *ids,
  *
  *	On success, the IPC id is returned.
  *
- *	It is called with ipc_ids.rw_mutex and ipcp->lock held.
+ *	It is called with ipc_ids.rwsem and ipcp->lock held.
  */
 static int ipc_check_perms(struct ipc_namespace *ns,
 			   struct kern_ipc_perm *ipcp,
@@ -376,7 +384,7 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids,
 	 * Take the lock as a writer since we are potentially going to add
 	 * a new entry + read locks are not "upgradable"
 	 */
-	down_write(&ids->rw_mutex);
+	down_write(&ids->rwsem);
 	ipcp = ipc_findkey(ids, params->key);
 	if (ipcp == NULL) {
 		/* key not used */
@@ -402,7 +410,7 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids,
 		}
 		ipc_unlock(ipcp);
 	}
-	up_write(&ids->rw_mutex);
+	up_write(&ids->rwsem);
 
 	return err;
 }
@@ -413,7 +421,7 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids,
  *	@ids: IPC identifier set
  *	@ipcp: ipc perm structure containing the identifier to remove
  *
- *	ipc_ids.rw_mutex (as a writer) and the spinlock for this ID are held
+ *	ipc_ids.rwsem (as a writer) and the spinlock for this ID are held
  *	before this function is called, and remain locked on the exit.
  */
  
@@ -621,7 +629,7 @@ struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id)
 }
 
 /**
- * ipc_lock - Lock an ipc structure without rw_mutex held
+ * ipc_lock - Lock an ipc structure without rwsem held
  * @ids: IPC identifier set
  * @id: ipc id to look for
  *
@@ -733,7 +741,7 @@ int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out)
 }
 
 /**
- * ipcctl_pre_down - retrieve an ipc and check permissions for some IPC_XXX cmd
+ * ipcctl_pre_down_nolock - retrieve an ipc and check permissions for some IPC_XXX cmd
  * @ns:  the ipc namespace
  * @ids:  the table of ids where to look for the ipc
  * @id:   the id of the ipc to retrieve
@@ -746,29 +754,13 @@ int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out)
  * It must be called without any lock held and
  *  - retrieves the ipc with the given id in the given table.
  *  - performs some audit and permission check, depending on the given cmd
- *  - returns the ipc with the ipc lock held in case of success
- *    or an err-code without any lock held otherwise.
+ *  - returns a pointer to the ipc object or otherwise, the corresponding error.
  *
- * Call holding the both the rw_mutex and the rcu read lock.
+ * Call holding the both the rwsem and the rcu read lock.
  */
-struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns,
-				      struct ipc_ids *ids, int id, int cmd,
-				      struct ipc64_perm *perm, int extra_perm)
-{
-	struct kern_ipc_perm *ipcp;
-
-	ipcp = ipcctl_pre_down_nolock(ns, ids, id, cmd, perm, extra_perm);
-	if (IS_ERR(ipcp))
-		goto out;
-
-	spin_lock(&ipcp->lock);
-out:
-	return ipcp;
-}
-
 struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns,
-					     struct ipc_ids *ids, int id, int cmd,
-					     struct ipc64_perm *perm, int extra_perm)
+					struct ipc_ids *ids, int id, int cmd,
+					struct ipc64_perm *perm, int extra_perm)
 {
 	kuid_t euid;
 	int err = -EPERM;
@@ -884,7 +876,7 @@ static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos)
 	 * Take the lock - this will be released by the corresponding
 	 * call to stop().
 	 */
-	down_read(&ids->rw_mutex);
+	down_read(&ids->rwsem);
 
 	/* pos < 0 is invalid */
 	if (*pos < 0)
@@ -911,7 +903,7 @@ static void sysvipc_proc_stop(struct seq_file *s, void *it)
 
 	ids = &iter->ns->ids[iface->ids];
 	/* Release the lock we took in start() */
-	up_read(&ids->rw_mutex);
+	up_read(&ids->rwsem);
 }
 
 static int sysvipc_proc_show(struct seq_file *s, void *it)
diff --git a/ipc/util.h b/ipc/util.h
index b6a6a88f3002..0a362ffca972 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -94,10 +94,10 @@ void __init ipc_init_proc_interface(const char *path, const char *header,
 #define ipcid_to_idx(id) ((id) % SEQ_MULTIPLIER)
 #define ipcid_to_seqx(id) ((id) / SEQ_MULTIPLIER)
 
-/* must be called with ids->rw_mutex acquired for writing */
+/* must be called with ids->rwsem acquired for writing */
 int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int);
 
-/* must be called with ids->rw_mutex acquired for reading */
+/* must be called with ids->rwsem acquired for reading */
 int ipc_get_maxid(struct ipc_ids *);
 
 /* must be called with both locks acquired. */
@@ -131,9 +131,6 @@ int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out);
 struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns,
 					     struct ipc_ids *ids, int id, int cmd,
 					     struct ipc64_perm *perm, int extra_perm);
-struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns,
-				      struct ipc_ids *ids, int id, int cmd,
-				      struct ipc64_perm *perm, int extra_perm);
 
 #ifndef CONFIG_ARCH_WANT_IPC_PARSE_VERSION
   /* On IA-64, we always use the "64-bit version" of the IPC structures.  */ 
diff --git a/kernel/Makefile b/kernel/Makefile
index 470839d1a30e..35ef1185e359 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the linux kernel.
 #
 
-obj-y     = fork.o exec_domain.o panic.o printk.o \
+obj-y     = fork.o exec_domain.o panic.o \
 	    cpu.o exit.o itimer.o time.o softirq.o resource.o \
 	    sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \
 	    signal.o sys.o kmod.o workqueue.o pid.o task_work.o \
@@ -24,6 +24,7 @@ endif
 
 obj-y += sched/
 obj-y += power/
+obj-y += printk/
 obj-y += cpu/
 
 obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o
diff --git a/kernel/fork.c b/kernel/fork.c
index 403d2bb8a968..9d1a5af6f132 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -351,7 +351,6 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 	struct rb_node **rb_link, *rb_parent;
 	int retval;
 	unsigned long charge;
-	struct mempolicy *pol;
 
 	uprobe_start_dup_mmap();
 	down_write(&oldmm->mmap_sem);
@@ -400,11 +399,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 			goto fail_nomem;
 		*tmp = *mpnt;
 		INIT_LIST_HEAD(&tmp->anon_vma_chain);
-		pol = mpol_dup(vma_policy(mpnt));
-		retval = PTR_ERR(pol);
-		if (IS_ERR(pol))
+		retval = vma_dup_policy(mpnt, tmp);
+		if (retval)
 			goto fail_nomem_policy;
-		vma_set_policy(tmp, pol);
 		tmp->vm_mm = mm;
 		if (anon_vma_fork(tmp, mpnt))
 			goto fail_nomem_anon_vma_fork;
@@ -472,7 +469,7 @@ out:
 	uprobe_end_dup_mmap();
 	return retval;
 fail_nomem_anon_vma_fork:
-	mpol_put(pol);
+	mpol_put(vma_policy(tmp));
 fail_nomem_policy:
 	kmem_cache_free(vm_area_cachep, tmp);
 fail_nomem:
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 383319bae3f7..deb3ad944fd1 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1399,6 +1399,8 @@ retry:
 		expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC);
 	else
 		expires_next = ktime_add(now, delta);
+	if (expires_next.tv64 < 0)
+		expires_next.tv64 = KTIME_MAX;
 	tick_program_event(expires_next, 1);
 	printk_once(KERN_WARNING "hrtimer: interrupt took %llu ns\n",
 		    ktime_to_ns(delta));
diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile
new file mode 100644
index 000000000000..85405bdcf2b3
--- /dev/null
+++ b/kernel/printk/Makefile
@@ -0,0 +1,2 @@
+obj-y	= printk.o
+obj-$(CONFIG_A11Y_BRAILLE_CONSOLE)	+= braille.o
diff --git a/kernel/printk/braille.c b/kernel/printk/braille.c
new file mode 100644
index 000000000000..b51087fb9ace
--- /dev/null
+++ b/kernel/printk/braille.c
@@ -0,0 +1,48 @@
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/console.h>
+#include <linux/string.h>
+
+#include "console_cmdline.h"
+#include "braille.h"
+
+char *_braille_console_setup(char **str, char **brl_options)
+{
+	if (!memcmp(*str, "brl,", 4)) {
+		*brl_options = "";
+		*str += 4;
+	} else if (!memcmp(str, "brl=", 4)) {
+		*brl_options = *str + 4;
+		*str = strchr(*brl_options, ',');
+		if (!*str)
+			pr_err("need port name after brl=\n");
+		else
+			*((*str)++) = 0;
+	}
+
+	return *str;
+}
+
+int
+_braille_register_console(struct console *console, struct console_cmdline *c)
+{
+	int rtn = 0;
+
+	if (c->brl_options) {
+		console->flags |= CON_BRL;
+		rtn = braille_register_console(console, c->index, c->options,
+					       c->brl_options);
+	}
+
+	return rtn;
+}
+
+int
+_braille_unregister_console(struct console *console)
+{
+	if (console->flags & CON_BRL)
+		return braille_unregister_console(console);
+
+	return 0;
+}
diff --git a/kernel/printk/braille.h b/kernel/printk/braille.h
new file mode 100644
index 000000000000..769d771145c8
--- /dev/null
+++ b/kernel/printk/braille.h
@@ -0,0 +1,48 @@
+#ifndef _PRINTK_BRAILLE_H
+#define _PRINTK_BRAILLE_H
+
+#ifdef CONFIG_A11Y_BRAILLE_CONSOLE
+
+static inline void
+braille_set_options(struct console_cmdline *c, char *brl_options)
+{
+	c->brl_options = brl_options;
+}
+
+char *
+_braille_console_setup(char **str, char **brl_options);
+
+int
+_braille_register_console(struct console *console, struct console_cmdline *c);
+
+int
+_braille_unregister_console(struct console *console);
+
+#else
+
+static inline void
+braille_set_options(struct console_cmdline *c, char *brl_options)
+{
+}
+
+static inline char *
+_braille_console_setup(char **str, char **brl_options)
+{
+	return NULL;
+}
+
+static inline int
+_braille_register_console(struct console *console, struct console_cmdline *c)
+{
+	return 0;
+}
+
+static inline int
+_braille_unregister_console(struct console *console)
+{
+	return 0;
+}
+
+#endif
+
+#endif
diff --git a/kernel/printk/console_cmdline.h b/kernel/printk/console_cmdline.h
new file mode 100644
index 000000000000..cbd69d842341
--- /dev/null
+++ b/kernel/printk/console_cmdline.h
@@ -0,0 +1,14 @@
+#ifndef _CONSOLE_CMDLINE_H
+#define _CONSOLE_CMDLINE_H
+
+struct console_cmdline
+{
+	char	name[8];			/* Name of the driver	    */
+	int	index;				/* Minor dev. to use	    */
+	char	*options;			/* Options for the driver   */
+#ifdef CONFIG_A11Y_BRAILLE_CONSOLE
+	char	*brl_options;			/* Options for braille driver */
+#endif
+};
+
+#endif
diff --git a/kernel/printk.c b/kernel/printk/printk.c
index a9cb8976e8c1..49739db37b92 100644
--- a/kernel/printk.c
+++ b/kernel/printk/printk.c
@@ -51,6 +51,9 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/printk.h>
 
+#include "console_cmdline.h"
+#include "braille.h"
+
 /* printk's without a loglevel use this.. */
 #define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL
 
@@ -105,19 +108,11 @@ static struct console *exclusive_console;
 /*
  *	Array of consoles built from command line options (console=)
  */
-struct console_cmdline
-{
-	char	name[8];			/* Name of the driver	    */
-	int	index;				/* Minor dev. to use	    */
-	char	*options;			/* Options for the driver   */
-#ifdef CONFIG_A11Y_BRAILLE_CONSOLE
-	char	*brl_options;			/* Options for braille driver */
-#endif
-};
 
 #define MAX_CMDLINECONSOLES 8
 
 static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES];
+
 static int selected_console = -1;
 static int preferred_console = -1;
 int console_set_on_cmdline;
@@ -178,7 +173,7 @@ static int console_may_schedule;
  *         67                           "g"
  *   0032     00 00 00                  padding to next message header
  *
- * The 'struct log' buffer header must never be directly exported to
+ * The 'struct printk_log' buffer header must never be directly exported to
  * userspace, it is a kernel-private implementation detail that might
  * need to be changed in the future, when the requirements change.
  *
@@ -200,7 +195,7 @@ enum log_flags {
 	LOG_CONT	= 8,	/* text is a fragment of a continuation line */
 };
 
-struct log {
+struct printk_log {
 	u64 ts_nsec;		/* timestamp in nanoseconds */
 	u16 len;		/* length of entire record */
 	u16 text_len;		/* length of text buffer */
@@ -248,7 +243,7 @@ static u32 clear_idx;
 #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
 #define LOG_ALIGN 4
 #else
-#define LOG_ALIGN __alignof__(struct log)
+#define LOG_ALIGN __alignof__(struct printk_log)
 #endif
 #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
 static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
@@ -259,35 +254,35 @@ static u32 log_buf_len = __LOG_BUF_LEN;
 static volatile unsigned int logbuf_cpu = UINT_MAX;
 
 /* human readable text of the record */
-static char *log_text(const struct log *msg)
+static char *log_text(const struct printk_log *msg)
 {
-	return (char *)msg + sizeof(struct log);
+	return (char *)msg + sizeof(struct printk_log);
 }
 
 /* optional key/value pair dictionary attached to the record */
-static char *log_dict(const struct log *msg)
+static char *log_dict(const struct printk_log *msg)
 {
-	return (char *)msg + sizeof(struct log) + msg->text_len;
+	return (char *)msg + sizeof(struct printk_log) + msg->text_len;
 }
 
 /* get record by index; idx must point to valid msg */
-static struct log *log_from_idx(u32 idx)
+static struct printk_log *log_from_idx(u32 idx)
 {
-	struct log *msg = (struct log *)(log_buf + idx);
+	struct printk_log *msg = (struct printk_log *)(log_buf + idx);
 
 	/*
 	 * A length == 0 record is the end of buffer marker. Wrap around and
 	 * read the message at the start of the buffer.
 	 */
 	if (!msg->len)
-		return (struct log *)log_buf;
+		return (struct printk_log *)log_buf;
 	return msg;
 }
 
 /* get next record; idx must point to valid msg */
 static u32 log_next(u32 idx)
 {
-	struct log *msg = (struct log *)(log_buf + idx);
+	struct printk_log *msg = (struct printk_log *)(log_buf + idx);
 
 	/* length == 0 indicates the end of the buffer; wrap */
 	/*
@@ -296,7 +291,7 @@ static u32 log_next(u32 idx)
 	 * return the one after that.
 	 */
 	if (!msg->len) {
-		msg = (struct log *)log_buf;
+		msg = (struct printk_log *)log_buf;
 		return msg->len;
 	}
 	return idx + msg->len;
@@ -308,11 +303,11 @@ static void log_store(int facility, int level,
 		      const char *dict, u16 dict_len,
 		      const char *text, u16 text_len)
 {
-	struct log *msg;
+	struct printk_log *msg;
 	u32 size, pad_len;
 
 	/* number of '\0' padding bytes to next message */
-	size = sizeof(struct log) + text_len + dict_len;
+	size = sizeof(struct printk_log) + text_len + dict_len;
 	pad_len = (-size) & (LOG_ALIGN - 1);
 	size += pad_len;
 
@@ -324,7 +319,7 @@ static void log_store(int facility, int level,
 		else
 			free = log_first_idx - log_next_idx;
 
-		if (free > size + sizeof(struct log))
+		if (free > size + sizeof(struct printk_log))
 			break;
 
 		/* drop old messages until we have enough contiuous space */
@@ -332,18 +327,18 @@ static void log_store(int facility, int level,
 		log_first_seq++;
 	}
 
-	if (log_next_idx + size + sizeof(struct log) >= log_buf_len) {
+	if (log_next_idx + size + sizeof(struct printk_log) >= log_buf_len) {
 		/*
 		 * This message + an additional empty header does not fit
 		 * at the end of the buffer. Add an empty header with len == 0
 		 * to signify a wrap around.
 		 */
-		memset(log_buf + log_next_idx, 0, sizeof(struct log));
+		memset(log_buf + log_next_idx, 0, sizeof(struct printk_log));
 		log_next_idx = 0;
 	}
 
 	/* fill message */
-	msg = (struct log *)(log_buf + log_next_idx);
+	msg = (struct printk_log *)(log_buf + log_next_idx);
 	memcpy(log_text(msg), text, text_len);
 	msg->text_len = text_len;
 	memcpy(log_dict(msg), dict, dict_len);
@@ -356,7 +351,7 @@ static void log_store(int facility, int level,
 	else
 		msg->ts_nsec = local_clock();
 	memset(log_dict(msg) + dict_len, 0, pad_len);
-	msg->len = sizeof(struct log) + text_len + dict_len + pad_len;
+	msg->len = sizeof(struct printk_log) + text_len + dict_len + pad_len;
 
 	/* insert message */
 	log_next_idx += msg->len;
@@ -479,7 +474,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
 			    size_t count, loff_t *ppos)
 {
 	struct devkmsg_user *user = file->private_data;
-	struct log *msg;
+	struct printk_log *msg;
 	u64 ts_usec;
 	size_t i;
 	char cont = '-';
@@ -724,14 +719,14 @@ void log_buf_kexec_setup(void)
 	VMCOREINFO_SYMBOL(log_first_idx);
 	VMCOREINFO_SYMBOL(log_next_idx);
 	/*
-	 * Export struct log size and field offsets. User space tools can
+	 * Export struct printk_log size and field offsets. User space tools can
 	 * parse it and detect any changes to structure down the line.
 	 */
-	VMCOREINFO_STRUCT_SIZE(log);
-	VMCOREINFO_OFFSET(log, ts_nsec);
-	VMCOREINFO_OFFSET(log, len);
-	VMCOREINFO_OFFSET(log, text_len);
-	VMCOREINFO_OFFSET(log, dict_len);
+	VMCOREINFO_STRUCT_SIZE(printk_log);
+	VMCOREINFO_OFFSET(printk_log, ts_nsec);
+	VMCOREINFO_OFFSET(printk_log, len);
+	VMCOREINFO_OFFSET(printk_log, text_len);
+	VMCOREINFO_OFFSET(printk_log, dict_len);
 }
 #endif
 
@@ -884,7 +879,7 @@ static size_t print_time(u64 ts, char *buf)
 		       (unsigned long)ts, rem_nsec / 1000);
 }
 
-static size_t print_prefix(const struct log *msg, bool syslog, char *buf)
+static size_t print_prefix(const struct printk_log *msg, bool syslog, char *buf)
 {
 	size_t len = 0;
 	unsigned int prefix = (msg->facility << 3) | msg->level;
@@ -907,7 +902,7 @@ static size_t print_prefix(const struct log *msg, bool syslog, char *buf)
 	return len;
 }
 
-static size_t msg_print_text(const struct log *msg, enum log_flags prev,
+static size_t msg_print_text(const struct printk_log *msg, enum log_flags prev,
 			     bool syslog, char *buf, size_t size)
 {
 	const char *text = log_text(msg);
@@ -969,7 +964,7 @@ static size_t msg_print_text(const struct log *msg, enum log_flags prev,
 static int syslog_print(char __user *buf, int size)
 {
 	char *text;
-	struct log *msg;
+	struct printk_log *msg;
 	int len = 0;
 
 	text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
@@ -1060,7 +1055,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
 		idx = clear_idx;
 		prev = 0;
 		while (seq < log_next_seq) {
-			struct log *msg = log_from_idx(idx);
+			struct printk_log *msg = log_from_idx(idx);
 
 			len += msg_print_text(msg, prev, true, NULL, 0);
 			prev = msg->flags;
@@ -1073,7 +1068,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
 		idx = clear_idx;
 		prev = 0;
 		while (len > size && seq < log_next_seq) {
-			struct log *msg = log_from_idx(idx);
+			struct printk_log *msg = log_from_idx(idx);
 
 			len -= msg_print_text(msg, prev, true, NULL, 0);
 			prev = msg->flags;
@@ -1087,7 +1082,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
 		len = 0;
 		prev = 0;
 		while (len >= 0 && seq < next_seq) {
-			struct log *msg = log_from_idx(idx);
+			struct printk_log *msg = log_from_idx(idx);
 			int textlen;
 
 			textlen = msg_print_text(msg, prev, true, text,
@@ -1233,7 +1228,7 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
 
 			error = 0;
 			while (seq < log_next_seq) {
-				struct log *msg = log_from_idx(idx);
+				struct printk_log *msg = log_from_idx(idx);
 
 				error += msg_print_text(msg, prev, true, NULL, 0);
 				idx = log_next(idx);
@@ -1720,10 +1715,10 @@ static struct cont {
 	u8 level;
 	bool flushed:1;
 } cont;
-static struct log *log_from_idx(u32 idx) { return NULL; }
+static struct printk_log *log_from_idx(u32 idx) { return NULL; }
 static u32 log_next(u32 idx) { return 0; }
 static void call_console_drivers(int level, const char *text, size_t len) {}
-static size_t msg_print_text(const struct log *msg, enum log_flags prev,
+static size_t msg_print_text(const struct printk_log *msg, enum log_flags prev,
 			     bool syslog, char *buf, size_t size) { return 0; }
 static size_t cont_print_text(char *text, size_t size) { return 0; }
 
@@ -1762,23 +1757,23 @@ static int __add_preferred_console(char *name, int idx, char *options,
 	 *	See if this tty is not yet registered, and
 	 *	if we have a slot free.
 	 */
-	for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++)
-		if (strcmp(console_cmdline[i].name, name) == 0 &&
-			  console_cmdline[i].index == idx) {
-				if (!brl_options)
-					selected_console = i;
-				return 0;
+	for (i = 0, c = console_cmdline;
+	     i < MAX_CMDLINECONSOLES && c->name[0];
+	     i++, c++) {
+		if (strcmp(c->name, name) == 0 && c->index == idx) {
+			if (!brl_options)
+				selected_console = i;
+			return 0;
 		}
+	}
 	if (i == MAX_CMDLINECONSOLES)
 		return -E2BIG;
 	if (!brl_options)
 		selected_console = i;
-	c = &console_cmdline[i];
 	strlcpy(c->name, name, sizeof(c->name));
 	c->options = options;
-#ifdef CONFIG_A11Y_BRAILLE_CONSOLE
-	c->brl_options = brl_options;
-#endif
+	braille_set_options(c, brl_options);
+
 	c->index = idx;
 	return 0;
 }
@@ -1791,20 +1786,8 @@ static int __init console_setup(char *str)
 	char *s, *options, *brl_options = NULL;
 	int idx;
 
-#ifdef CONFIG_A11Y_BRAILLE_CONSOLE
-	if (!memcmp(str, "brl,", 4)) {
-		brl_options = "";
-		str += 4;
-	} else if (!memcmp(str, "brl=", 4)) {
-		brl_options = str + 4;
-		str = strchr(brl_options, ',');
-		if (!str) {
-			printk(KERN_ERR "need port name after brl=\n");
-			return 1;
-		}
-		*(str++) = 0;
-	}
-#endif
+	if (_braille_console_setup(&str, &brl_options))
+		return 1;
 
 	/*
 	 * Decode str into name, index, options.
@@ -1859,15 +1842,15 @@ int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, cha
 	struct console_cmdline *c;
 	int i;
 
-	for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++)
-		if (strcmp(console_cmdline[i].name, name) == 0 &&
-			  console_cmdline[i].index == idx) {
-				c = &console_cmdline[i];
-				strlcpy(c->name, name_new, sizeof(c->name));
-				c->name[sizeof(c->name) - 1] = 0;
-				c->options = options;
-				c->index = idx_new;
-				return i;
+	for (i = 0, c = console_cmdline;
+	     i < MAX_CMDLINECONSOLES && c->name[0];
+	     i++, c++)
+		if (strcmp(c->name, name) == 0 && c->index == idx) {
+			strlcpy(c->name, name_new, sizeof(c->name));
+			c->name[sizeof(c->name) - 1] = 0;
+			c->options = options;
+			c->index = idx_new;
+			return i;
 		}
 	/* not found */
 	return -1;
@@ -2047,7 +2030,7 @@ void console_unlock(void)
 	console_cont_flush(text, sizeof(text));
 again:
 	for (;;) {
-		struct log *msg;
+		struct printk_log *msg;
 		size_t len;
 		int level;
 
@@ -2242,6 +2225,7 @@ void register_console(struct console *newcon)
 	int i;
 	unsigned long flags;
 	struct console *bcon = NULL;
+	struct console_cmdline *c;
 
 	/*
 	 * before we register a new CON_BOOT console, make sure we don't
@@ -2289,30 +2273,25 @@ void register_console(struct console *newcon)
 	 *	See if this console matches one we selected on
 	 *	the command line.
 	 */
-	for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0];
-			i++) {
-		if (strcmp(console_cmdline[i].name, newcon->name) != 0)
+	for (i = 0, c = console_cmdline;
+	     i < MAX_CMDLINECONSOLES && c->name[0];
+	     i++, c++) {
+		if (strcmp(c->name, newcon->name) != 0)
 			continue;
 		if (newcon->index >= 0 &&
-		    newcon->index != console_cmdline[i].index)
+		    newcon->index != c->index)
 			continue;
 		if (newcon->index < 0)
-			newcon->index = console_cmdline[i].index;
-#ifdef CONFIG_A11Y_BRAILLE_CONSOLE
-		if (console_cmdline[i].brl_options) {
-			newcon->flags |= CON_BRL;
-			braille_register_console(newcon,
-					console_cmdline[i].index,
-					console_cmdline[i].options,
-					console_cmdline[i].brl_options);
+			newcon->index = c->index;
+
+		if (_braille_register_console(newcon, c))
 			return;
-		}
-#endif
+
 		if (newcon->setup &&
 		    newcon->setup(newcon, console_cmdline[i].options) != 0)
 			break;
 		newcon->flags |= CON_ENABLED;
-		newcon->index = console_cmdline[i].index;
+		newcon->index = c->index;
 		if (i == selected_console) {
 			newcon->flags |= CON_CONSDEV;
 			preferred_console = selected_console;
@@ -2395,13 +2374,13 @@ EXPORT_SYMBOL(register_console);
 int unregister_console(struct console *console)
 {
         struct console *a, *b;
-	int res = 1;
+	int res;
 
-#ifdef CONFIG_A11Y_BRAILLE_CONSOLE
-	if (console->flags & CON_BRL)
-		return braille_unregister_console(console);
-#endif
+	res = _braille_unregister_console(console);
+	if (res)
+		return res;
 
+	res = 1;
 	console_lock();
 	if (console_drivers == console) {
 		console_drivers=console->next;
@@ -2667,7 +2646,7 @@ void kmsg_dump(enum kmsg_dump_reason reason)
 bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog,
 			       char *line, size_t size, size_t *len)
 {
-	struct log *msg;
+	struct printk_log *msg;
 	size_t l = 0;
 	bool ret = false;
 
@@ -2779,7 +2758,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
 	idx = dumper->cur_idx;
 	prev = 0;
 	while (seq < dumper->next_seq) {
-		struct log *msg = log_from_idx(idx);
+		struct printk_log *msg = log_from_idx(idx);
 
 		l += msg_print_text(msg, prev, true, NULL, 0);
 		idx = log_next(idx);
@@ -2792,7 +2771,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
 	idx = dumper->cur_idx;
 	prev = 0;
 	while (l > size && seq < dumper->next_seq) {
-		struct log *msg = log_from_idx(idx);
+		struct printk_log *msg = log_from_idx(idx);
 
 		l -= msg_print_text(msg, prev, true, NULL, 0);
 		idx = log_next(idx);
@@ -2807,7 +2786,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
 	l = 0;
 	prev = 0;
 	while (seq < dumper->next_seq) {
-		struct log *msg = log_from_idx(idx);
+		struct printk_log *msg = log_from_idx(idx);
 
 		l += msg_print_text(msg, prev, syslog, buf + l, size - l);
 		idx = log_next(idx);
diff --git a/kernel/relay.c b/kernel/relay.c
index 5001c9887db1..08148fe25b13 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -339,6 +339,10 @@ static void wakeup_readers(unsigned long data)
 {
 	struct rchan_buf *buf = (struct rchan_buf *)data;
 	wake_up_interruptible(&buf->read_wait);
+	/*
+	 * Stupid polling for now:
+	 */
+	mod_timer(&buf->timer, HZ / 10);
 }
 
 /**
@@ -356,6 +360,7 @@ static void __relay_reset(struct rchan_buf *buf, unsigned int init)
 		init_waitqueue_head(&buf->read_wait);
 		kref_init(&buf->kref);
 		setup_timer(&buf->timer, wakeup_readers, (unsigned long)buf);
+		mod_timer(&buf->timer, HZ / 10);
 	} else
 		del_timer_sync(&buf->timer);
 
@@ -739,15 +744,6 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length)
 		else
 			buf->early_bytes += buf->chan->subbuf_size -
 					    buf->padding[old_subbuf];
-		smp_mb();
-		if (waitqueue_active(&buf->read_wait))
-			/*
-			 * Calling wake_up_interruptible() from here
-			 * will deadlock if we happen to be logging
-			 * from the scheduler (trying to re-grab
-			 * rq->lock), so defer it.
-			 */
-			mod_timer(&buf->timer, jiffies + 1);
 	}
 
 	old = buf->data;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index bb456f44b7b1..9565645e3202 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -851,7 +851,7 @@ void task_numa_fault(int node, int pages, bool migrated)
 {
 	struct task_struct *p = current;
 
-	if (!sched_feat_numa(NUMA))
+	if (!numabalancing_enabled)
 		return;
 
 	/* FIXME: Allocate task-specific structure for placement policy here */
@@ -5786,7 +5786,7 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
 		entity_tick(cfs_rq, se, queued);
 	}
 
-	if (sched_feat_numa(NUMA))
+	if (numabalancing_enabled)
 		task_tick_numa(rq, curr);
 
 	update_rq_runnable_avg(rq, 1);
diff --git a/kernel/signal.c b/kernel/signal.c
index 50e41075ac77..ded28b91fa53 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3394,7 +3394,7 @@ COMPAT_SYSCALL_DEFINE4(rt_sigaction, int, sig,
 		new_ka.sa.sa_restorer = compat_ptr(restorer);
 #endif
 		ret |= copy_from_user(&mask, &act->sa_mask, sizeof(mask));
-		ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags);
+		ret |= get_user(new_ka.sa.sa_flags, &act->sa_flags);
 		if (ret)
 			return -EFAULT;
 		sigset_from_compat(&new_ka.sa.sa_mask, &mask);
@@ -3406,7 +3406,7 @@ COMPAT_SYSCALL_DEFINE4(rt_sigaction, int, sig,
 		ret = put_user(ptr_to_compat(old_ka.sa.sa_handler), 
 			       &oact->sa_handler);
 		ret |= copy_to_user(&oact->sa_mask, &mask, sizeof(mask));
-		ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
+		ret |= put_user(old_ka.sa.sa_flags, &oact->sa_flags);
 #ifdef __ARCH_HAS_SA_RESTORER
 		ret |= put_user(ptr_to_compat(old_ka.sa.sa_restorer),
 				&oact->sa_restorer);
diff --git a/kernel/smp.c b/kernel/smp.c
index fe9f773d7114..2a3a01784bc8 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -12,6 +12,7 @@
 #include <linux/gfp.h>
 #include <linux/smp.h>
 #include <linux/cpu.h>
+#include <linux/hardirq.h>
 
 #include "smpboot.h"
 
@@ -48,10 +49,13 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
 				cpu_to_node(cpu)))
 			return notifier_from_errno(-ENOMEM);
 		if (!zalloc_cpumask_var_node(&cfd->cpumask_ipi, GFP_KERNEL,
-				cpu_to_node(cpu)))
+				cpu_to_node(cpu))) {
+			free_cpumask_var(cfd->cpumask);
 			return notifier_from_errno(-ENOMEM);
+		}
 		cfd->csd = alloc_percpu(struct call_single_data);
 		if (!cfd->csd) {
+			free_cpumask_var(cfd->cpumask_ipi);
 			free_cpumask_var(cfd->cpumask);
 			return notifier_from_errno(-ENOMEM);
 		}
@@ -240,8 +244,9 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
 	 * send smp call function interrupt to this cpu and as such deadlocks
 	 * can't happen.
 	 */
-	WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
-		     && !oops_in_progress);
+	WARN_ON_ONCE(cpu_online(this_cpu)
+		&& (irqs_disabled() || in_serving_irq())
+		&& !oops_in_progress);
 
 	if (cpu == this_cpu) {
 		local_irq_save(flags);
@@ -378,8 +383,9 @@ void smp_call_function_many(const struct cpumask *mask,
 	 * send smp call function interrupt to this cpu and as such deadlocks
 	 * can't happen.
 	 */
-	WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
-		     && !oops_in_progress && !early_boot_irqs_disabled);
+	WARN_ON_ONCE(cpu_online(this_cpu)
+		&& (irqs_disabled() || in_serving_irq())
+		&& !oops_in_progress && !early_boot_irqs_disabled);
 
 	/* Try to fastpath.  So, what's a CPU they want? Ignoring this one. */
 	cpu = cpumask_first_and(mask, cpu_online_mask);
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 1241d8c91d5e..d24c4bbb2d7c 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -239,10 +239,12 @@ static void watchdog_overflow_callback(struct perf_event *event,
 		if (__this_cpu_read(hard_watchdog_warn) == true)
 			return;
 
-		if (hardlockup_panic)
+		if (hardlockup_panic) {
+			trigger_all_cpu_backtrace();
 			panic("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
-		else
+		} else {
 			WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu);
+		}
 
 		__this_cpu_write(hard_watchdog_warn, true);
 		return;
@@ -323,8 +325,10 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 		else
 			dump_stack();
 
-		if (softlockup_panic)
+		if (softlockup_panic) {
+			trigger_all_cpu_backtrace();
 			panic("softlockup: hung tasks");
+		}
 		__this_cpu_write(soft_watchdog_warn, true);
 	} else
 		__this_cpu_write(soft_watchdog_warn, false);
diff --git a/mm/filemap.c b/mm/filemap.c
index 4b51ac1acae7..a6981feed8e7 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -467,32 +467,34 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
 	error = mem_cgroup_cache_charge(page, current->mm,
 					gfp_mask & GFP_RECLAIM_MASK);
 	if (error)
-		goto out;
+		return error;
 
 	error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
-	if (error == 0) {
-		page_cache_get(page);
-		page->mapping = mapping;
-		page->index = offset;
-
-		spin_lock_irq(&mapping->tree_lock);
-		error = radix_tree_insert(&mapping->page_tree, offset, page);
-		if (likely(!error)) {
-			mapping->nrpages++;
-			__inc_zone_page_state(page, NR_FILE_PAGES);
-			spin_unlock_irq(&mapping->tree_lock);
-			trace_mm_filemap_add_to_page_cache(page);
-		} else {
-			page->mapping = NULL;
-			/* Leave page->index set: truncation relies upon it */
-			spin_unlock_irq(&mapping->tree_lock);
-			mem_cgroup_uncharge_cache_page(page);
-			page_cache_release(page);
-		}
-		radix_tree_preload_end();
-	} else
+	if (error) {
 		mem_cgroup_uncharge_cache_page(page);
-out:
+		return error;
+	}
+
+	page_cache_get(page);
+	page->mapping = mapping;
+	page->index = offset;
+
+	spin_lock_irq(&mapping->tree_lock);
+	error = radix_tree_insert(&mapping->page_tree, offset, page);
+	radix_tree_preload_end();
+	if (unlikely(error))
+		goto err_insert;
+	mapping->nrpages++;
+	__inc_zone_page_state(page, NR_FILE_PAGES);
+	spin_unlock_irq(&mapping->tree_lock);
+	trace_mm_filemap_add_to_page_cache(page);
+	return 0;
+err_insert:
+	page->mapping = NULL;
+	/* Leave page->index set: truncation relies upon it */
+	spin_unlock_irq(&mapping->tree_lock);
+	mem_cgroup_uncharge_cache_page(page);
+	page_cache_release(page);
 	return error;
 }
 EXPORT_SYMBOL(add_to_page_cache_locked);
@@ -637,6 +639,7 @@ void __lock_page(struct page *page)
 {
 	DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
 
+	VM_BUG_ON(PageTail(page));
 	__wait_on_bit_lock(page_waitqueue(page), &wait, sleep_on_page,
 							TASK_UNINTERRUPTIBLE);
 }
@@ -646,6 +649,7 @@ int __lock_page_killable(struct page *page)
 {
 	DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
 
+	VM_BUG_ON(PageTail(page));
 	return __wait_on_bit_lock(page_waitqueue(page), &wait,
 					sleep_on_page_killable, TASK_KILLABLE);
 }
@@ -1088,7 +1092,6 @@ static void shrink_readahead_size_eio(struct file *filp,
  * @filp:	the file to read
  * @ppos:	current file position
  * @desc:	read_descriptor
- * @actor:	read method
  *
  * This is a generic file read routine, and uses the
  * mapping->a_ops->readpage() function for the actual low-level stuff.
@@ -1097,7 +1100,7 @@ static void shrink_readahead_size_eio(struct file *filp,
  * of the logic when it comes to error handling etc.
  */
 static void do_generic_file_read(struct file *filp, loff_t *ppos,
-		read_descriptor_t *desc, read_actor_t actor)
+		read_descriptor_t *desc)
 {
 	struct address_space *mapping = filp->f_mapping;
 	struct inode *inode = mapping->host;
@@ -1198,13 +1201,14 @@ page_ok:
 		 * Ok, we have the page, and it's up-to-date, so
 		 * now we can copy it to user space...
 		 *
-		 * The actor routine returns how many bytes were actually used..
+		 * The file_read_actor routine returns how many bytes were
+		 * actually used..
 		 * NOTE! This may not be the same as how much of a user buffer
 		 * we filled up (we may be padding etc), so we can only update
 		 * "pos" here (the actor routine has to update the user buffer
 		 * pointers and the remaining count).
 		 */
-		ret = actor(desc, page, offset, nr);
+		ret = file_read_actor(desc, page, offset, nr);
 		offset += ret;
 		index += offset >> PAGE_CACHE_SHIFT;
 		offset &= ~PAGE_CACHE_MASK;
@@ -1477,7 +1481,7 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
 		if (desc.count == 0)
 			continue;
 		desc.error = 0;
-		do_generic_file_read(filp, ppos, &desc, file_read_actor);
+		do_generic_file_read(filp, ppos, &desc);
 		retval += desc.written;
 		if (desc.error) {
 			retval = retval ?: desc.error;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 243e710c6039..c3b8c9c16eae 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -690,11 +690,10 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
 	return pmd;
 }
 
-static inline pmd_t mk_huge_pmd(struct page *page, struct vm_area_struct *vma)
+static inline pmd_t mk_huge_pmd(struct page *page, pgprot_t prot)
 {
 	pmd_t entry;
-	entry = mk_pmd(page, vma->vm_page_prot);
-	entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
+	entry = mk_pmd(page, prot);
 	entry = pmd_mkhuge(entry);
 	return entry;
 }
@@ -727,7 +726,8 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
 		pte_free(mm, pgtable);
 	} else {
 		pmd_t entry;
-		entry = mk_huge_pmd(page, vma);
+		entry = mk_huge_pmd(page, vma->vm_page_prot);
+		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
 		page_add_new_anon_rmap(page, vma, haddr);
 		pgtable_trans_huge_deposit(mm, pmd, pgtable);
 		set_pmd_at(mm, haddr, pmd, entry);
@@ -783,77 +783,55 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 {
 	struct page *page;
 	unsigned long haddr = address & HPAGE_PMD_MASK;
-	pte_t *pte;
 
-	if (haddr >= vma->vm_start && haddr + HPAGE_PMD_SIZE <= vma->vm_end) {
-		if (unlikely(anon_vma_prepare(vma)))
-			return VM_FAULT_OOM;
-		if (unlikely(khugepaged_enter(vma)))
+	if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end)
+		return VM_FAULT_FALLBACK;
+	if (unlikely(anon_vma_prepare(vma)))
+		return VM_FAULT_OOM;
+	if (unlikely(khugepaged_enter(vma)))
+		return VM_FAULT_OOM;
+	if (!(flags & FAULT_FLAG_WRITE) &&
+			transparent_hugepage_use_zero_page()) {
+		pgtable_t pgtable;
+		struct page *zero_page;
+		bool set;
+		pgtable = pte_alloc_one(mm, haddr);
+		if (unlikely(!pgtable))
 			return VM_FAULT_OOM;
-		if (!(flags & FAULT_FLAG_WRITE) &&
-				transparent_hugepage_use_zero_page()) {
-			pgtable_t pgtable;
-			struct page *zero_page;
-			bool set;
-			pgtable = pte_alloc_one(mm, haddr);
-			if (unlikely(!pgtable))
-				return VM_FAULT_OOM;
-			zero_page = get_huge_zero_page();
-			if (unlikely(!zero_page)) {
-				pte_free(mm, pgtable);
-				count_vm_event(THP_FAULT_FALLBACK);
-				goto out;
-			}
-			spin_lock(&mm->page_table_lock);
-			set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd,
-					zero_page);
-			spin_unlock(&mm->page_table_lock);
-			if (!set) {
-				pte_free(mm, pgtable);
-				put_huge_zero_page();
-			}
-			return 0;
-		}
-		page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
-					  vma, haddr, numa_node_id(), 0);
-		if (unlikely(!page)) {
+		zero_page = get_huge_zero_page();
+		if (unlikely(!zero_page)) {
+			pte_free(mm, pgtable);
 			count_vm_event(THP_FAULT_FALLBACK);
-			goto out;
-		}
-		count_vm_event(THP_FAULT_ALLOC);
-		if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) {
-			put_page(page);
-			goto out;
+			return VM_FAULT_FALLBACK;
 		}
-		if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd,
-							  page))) {
-			mem_cgroup_uncharge_page(page);
-			put_page(page);
-			goto out;
+		spin_lock(&mm->page_table_lock);
+		set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd,
+				zero_page);
+		spin_unlock(&mm->page_table_lock);
+		if (!set) {
+			pte_free(mm, pgtable);
+			put_huge_zero_page();
 		}
-
 		return 0;
 	}
-out:
-	/*
-	 * Use __pte_alloc instead of pte_alloc_map, because we can't
-	 * run pte_offset_map on the pmd, if an huge pmd could
-	 * materialize from under us from a different thread.
-	 */
-	if (unlikely(pmd_none(*pmd)) &&
-	    unlikely(__pte_alloc(mm, vma, pmd, address)))
-		return VM_FAULT_OOM;
-	/* if an huge pmd materialized from under us just retry later */
-	if (unlikely(pmd_trans_huge(*pmd)))
-		return 0;
-	/*
-	 * A regular pmd is established and it can't morph into a huge pmd
-	 * from under us anymore at this point because we hold the mmap_sem
-	 * read mode and khugepaged takes it in write mode. So now it's
-	 * safe to run pte_offset_map().
-	 */
-	pte = pte_offset_map(pmd, address);
-	return handle_pte_fault(mm, vma, address, pte, pmd, flags);
+	page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
+			vma, haddr, numa_node_id(), 0);
+	if (unlikely(!page)) {
+		count_vm_event(THP_FAULT_FALLBACK);
+		return VM_FAULT_FALLBACK;
+	}
+	count_vm_event(THP_FAULT_ALLOC);
+	if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) {
+		put_page(page);
+		return VM_FAULT_FALLBACK;
+	}
+	if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page))) {
+		mem_cgroup_uncharge_page(page);
+		put_page(page);
+		return VM_FAULT_FALLBACK;
+	}
+
+	return 0;
 }
 
 int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
@@ -1210,7 +1188,8 @@ alloc:
 		goto out_mn;
 	} else {
 		pmd_t entry;
-		entry = mk_huge_pmd(new_page, vma);
+		entry = mk_huge_pmd(new_page, vma->vm_page_prot);
+		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
 		pmdp_clear_flush(vma, haddr, pmd);
 		page_add_new_anon_rmap(new_page, vma, haddr);
 		set_pmd_at(mm, haddr, pmd, entry);
@@ -1620,7 +1599,9 @@ static void __split_huge_page_refcount(struct page *page,
 				     ((1L << PG_referenced) |
 				      (1L << PG_swapbacked) |
 				      (1L << PG_mlocked) |
-				      (1L << PG_uptodate)));
+				      (1L << PG_uptodate) |
+				      (1L << PG_active) |
+				      (1L << PG_unevictable)));
 		page_tail->flags |= (1L << PG_dirty);
 
 		/* clear PageTail before overwriting first_page */
@@ -1659,7 +1640,6 @@ static void __split_huge_page_refcount(struct page *page,
 	BUG_ON(atomic_read(&page->_count) <= 0);
 
 	__mod_zone_page_state(zone, NR_ANON_TRANSPARENT_HUGEPAGES, -1);
-	__mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR);
 
 	ClearPageCompound(page);
 	compound_unlock(page);
@@ -2355,7 +2335,8 @@ static void collapse_huge_page(struct mm_struct *mm,
 	__SetPageUptodate(new_page);
 	pgtable = pmd_pgtable(_pmd);
 
-	_pmd = mk_huge_pmd(new_page, vma);
+	_pmd = mk_huge_pmd(new_page, vma->vm_page_prot);
+	_pmd = maybe_pmd_mkwrite(pmd_mkdirty(_pmd), vma);
 
 	/*
 	 * spin_lock() below is not the equivalent of smp_wmb(), so
diff --git a/mm/madvise.c b/mm/madvise.c
index 7055883e6e25..936799f042cc 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -42,11 +42,11 @@ static int madvise_need_mmap_write(int behavior)
  * We can potentially split a vm area into separate
  * areas, each area with its own behavior.
  */
-static long madvise_behavior(struct vm_area_struct * vma,
+static long madvise_behavior(struct vm_area_struct *vma,
 		     struct vm_area_struct **prev,
 		     unsigned long start, unsigned long end, int behavior)
 {
-	struct mm_struct * mm = vma->vm_mm;
+	struct mm_struct *mm = vma->vm_mm;
 	int error = 0;
 	pgoff_t pgoff;
 	unsigned long new_flags = vma->vm_flags;
@@ -215,8 +215,8 @@ static void force_shm_swapin_readahead(struct vm_area_struct *vma,
 /*
  * Schedule all required I/O operations.  Do not wait for completion.
  */
-static long madvise_willneed(struct vm_area_struct * vma,
-			     struct vm_area_struct ** prev,
+static long madvise_willneed(struct vm_area_struct *vma,
+			     struct vm_area_struct **prev,
 			     unsigned long start, unsigned long end)
 {
 	struct file *file = vma->vm_file;
@@ -270,8 +270,8 @@ static long madvise_willneed(struct vm_area_struct * vma,
  * An interface that causes the system to free clean pages and flush
  * dirty pages is already available as msync(MS_INVALIDATE).
  */
-static long madvise_dontneed(struct vm_area_struct * vma,
-			     struct vm_area_struct ** prev,
+static long madvise_dontneed(struct vm_area_struct *vma,
+			     struct vm_area_struct **prev,
 			     unsigned long start, unsigned long end)
 {
 	*prev = vma;
@@ -459,7 +459,7 @@ madvise_behavior_valid(int behavior)
 SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
 {
 	unsigned long end, tmp;
-	struct vm_area_struct * vma, *prev;
+	struct vm_area_struct *vma, *prev;
 	int unmapped_error = 0;
 	int error = -EINVAL;
 	int write;
diff --git a/mm/memory.c b/mm/memory.c
index 1ce2e2a734fc..f2ab2a8b39b2 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3693,7 +3693,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
  * but allow concurrent faults), and pte mapped but not yet locked.
  * We return with mmap_sem still held, but pte unmapped and unlocked.
  */
-int handle_pte_fault(struct mm_struct *mm,
+static int handle_pte_fault(struct mm_struct *mm,
 		     struct vm_area_struct *vma, unsigned long address,
 		     pte_t *pte, pmd_t *pmd, unsigned int flags)
 {
@@ -3780,9 +3780,12 @@ retry:
 	if (!pmd)
 		return VM_FAULT_OOM;
 	if (pmd_none(*pmd) && transparent_hugepage_enabled(vma)) {
+		int ret = VM_FAULT_FALLBACK;
 		if (!vma->vm_ops)
-			return do_huge_pmd_anonymous_page(mm, vma, address,
-							  pmd, flags);
+			ret = do_huge_pmd_anonymous_page(mm, vma, address,
+					pmd, flags);
+		if (!(ret & VM_FAULT_FALLBACK))
+			return ret;
 	} else {
 		pmd_t orig_pmd = *pmd;
 		int ret;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 74310017296e..6b1d426731ae 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -732,7 +732,10 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
 		if (prev) {
 			vma = prev;
 			next = vma->vm_next;
-			continue;
+			if (mpol_equal(vma_policy(vma), new_pol))
+				continue;
+			/* vma_merge() joined vma && vma->next, case 8 */
+			goto replace;
 		}
 		if (vma->vm_start != vmstart) {
 			err = split_vma(vma->vm_mm, vma, vmstart, 1);
@@ -744,6 +747,7 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
 			if (err)
 				goto out;
 		}
+ replace:
 		err = vma_replace_policy(vma, new_pol);
 		if (err)
 			goto out;
@@ -2061,6 +2065,16 @@ retry_cpuset:
 }
 EXPORT_SYMBOL(alloc_pages_current);
 
+int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst)
+{
+	struct mempolicy *pol = mpol_dup(vma_policy(src));
+
+	if (IS_ERR(pol))
+		return PTR_ERR(pol);
+	dst->vm_policy = pol;
+	return 0;
+}
+
 /*
  * If mpol_dup() sees current->cpuset == cpuset_being_rebound, then it
  * rebinds the mempolicy its copying by calling mpol_rebind_policy()
diff --git a/mm/mmap.c b/mm/mmap.c
index fbad7b091090..e8ca6e484e52 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -865,7 +865,7 @@ again:			remove_next = 1 + (end > next->vm_end);
 		if (next->anon_vma)
 			anon_vma_merge(vma, next);
 		mm->map_count--;
-		vma_set_policy(vma, vma_policy(next));
+		mpol_put(vma_policy(next));
 		kmem_cache_free(vm_area_cachep, next);
 		/*
 		 * In mprotect's case 6 (see comments on vma_merge),
@@ -1202,7 +1202,6 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 			unsigned long *populate)
 {
 	struct mm_struct * mm = current->mm;
-	struct inode *inode;
 	vm_flags_t vm_flags;
 
 	*populate = 0;
@@ -1265,9 +1264,9 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 			return -EAGAIN;
 	}
 
-	inode = file ? file_inode(file) : NULL;
-
 	if (file) {
+		struct inode *inode = file_inode(file);
+
 		switch (flags & MAP_TYPE) {
 		case MAP_SHARED:
 			if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
@@ -1327,6 +1326,9 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 		}
 	}
 
+	/* Only MAP_PRIVATE|MAP_ANONYMOUS can use MAP_GROWS */
+	if ((vm_flags & VM_MAYSHARE) && (vm_flags & (VM_GROWSDOWN|VM_GROWSUP)))
+		return -EINVAL;
 	/*
 	 * Set 'VM_NORESERVE' if we should not account for the
 	 * memory use of this mapping.
@@ -1476,11 +1478,9 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 {
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma, *prev;
-	int correct_wcount = 0;
 	int error;
 	struct rb_node **rb_link, *rb_parent;
 	unsigned long charged = 0;
-	struct inode *inode =  file ? file_inode(file) : NULL;
 
 	/* Check against address space limit. */
 	if (!may_expand_vm(mm, len >> PAGE_SHIFT)) {
@@ -1544,16 +1544,11 @@ munmap_back:
 	vma->vm_pgoff = pgoff;
 	INIT_LIST_HEAD(&vma->anon_vma_chain);
 
-	error = -EINVAL;	/* when rejecting VM_GROWSDOWN|VM_GROWSUP */
-
 	if (file) {
-		if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
-			goto free_vma;
 		if (vm_flags & VM_DENYWRITE) {
 			error = deny_write_access(file);
 			if (error)
 				goto free_vma;
-			correct_wcount = 1;
 		}
 		vma->vm_file = get_file(file);
 		error = file->f_op->mmap(file, vma);
@@ -1573,8 +1568,6 @@ munmap_back:
 		pgoff = vma->vm_pgoff;
 		vm_flags = vma->vm_flags;
 	} else if (vm_flags & VM_SHARED) {
-		if (unlikely(vm_flags & (VM_GROWSDOWN|VM_GROWSUP)))
-			goto free_vma;
 		error = shmem_zero_setup(vma);
 		if (error)
 			goto free_vma;
@@ -1596,11 +1589,10 @@ munmap_back:
 	}
 
 	vma_link(mm, vma, prev, rb_link, rb_parent);
-	file = vma->vm_file;
-
 	/* Once vma denies write, undo our temporary denial count */
-	if (correct_wcount)
-		atomic_inc(&inode->i_writecount);
+	if (vm_flags & VM_DENYWRITE)
+		allow_write_access(file);
+	file = vma->vm_file;
 out:
 	perf_event_mmap(vma);
 
@@ -1619,8 +1611,8 @@ out:
 	return addr;
 
 unmap_and_free_vma:
-	if (correct_wcount)
-		atomic_inc(&inode->i_writecount);
+	if (vm_flags & VM_DENYWRITE)
+		allow_write_access(file);
 	vma->vm_file = NULL;
 	fput(file);
 
@@ -2380,7 +2372,6 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
 static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
 	      unsigned long addr, int new_below)
 {
-	struct mempolicy *pol;
 	struct vm_area_struct *new;
 	int err = -ENOMEM;
 
@@ -2404,12 +2395,9 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
 		new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
 	}
 
-	pol = mpol_dup(vma_policy(vma));
-	if (IS_ERR(pol)) {
-		err = PTR_ERR(pol);
+	err = vma_dup_policy(vma, new);
+	if (err)
 		goto out_free_vma;
-	}
-	vma_set_policy(new, pol);
 
 	if (anon_vma_clone(new, vma))
 		goto out_free_mpol;
@@ -2437,7 +2425,7 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
 		fput(new->vm_file);
 	unlink_anon_vmas(new);
  out_free_mpol:
-	mpol_put(pol);
+	mpol_put(vma_policy(new));
  out_free_vma:
 	kmem_cache_free(vm_area_cachep, new);
  out_err:
@@ -2780,7 +2768,6 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 	struct mm_struct *mm = vma->vm_mm;
 	struct vm_area_struct *new_vma, *prev;
 	struct rb_node **rb_link, *rb_parent;
-	struct mempolicy *pol;
 	bool faulted_in_anon_vma = true;
 
 	/*
@@ -2825,10 +2812,8 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 			new_vma->vm_start = addr;
 			new_vma->vm_end = addr + len;
 			new_vma->vm_pgoff = pgoff;
-			pol = mpol_dup(vma_policy(vma));
-			if (IS_ERR(pol))
+			if (vma_dup_policy(vma, new_vma))
 				goto out_free_vma;
-			vma_set_policy(new_vma, pol);
 			INIT_LIST_HEAD(&new_vma->anon_vma_chain);
 			if (anon_vma_clone(new_vma, vma))
 				goto out_free_mempol;
@@ -2843,7 +2828,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 	return new_vma;
 
  out_free_mempol:
-	mpol_put(pol);
+	mpol_put(vma_policy(new_vma));
  out_free_vma:
 	kmem_cache_free(vm_area_cachep, new_vma);
 	return NULL;
diff --git a/mm/rmap.c b/mm/rmap.c
index cd356df4f71a..706647014cc4 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1055,11 +1055,11 @@ void do_page_add_anon_rmap(struct page *page,
 {
 	int first = atomic_inc_and_test(&page->_mapcount);
 	if (first) {
-		if (!PageTransHuge(page))
-			__inc_zone_page_state(page, NR_ANON_PAGES);
-		else
+		if (PageTransHuge(page))
 			__inc_zone_page_state(page,
 					      NR_ANON_TRANSPARENT_HUGEPAGES);
+		__mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
+				hpage_nr_pages(page));
 	}
 	if (unlikely(PageKsm(page)))
 		return;
@@ -1088,10 +1088,10 @@ void page_add_new_anon_rmap(struct page *page,
 	VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
 	SetPageSwapBacked(page);
 	atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */
-	if (!PageTransHuge(page))
-		__inc_zone_page_state(page, NR_ANON_PAGES);
-	else
+	if (PageTransHuge(page))
 		__inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
+	__mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
+			hpage_nr_pages(page));
 	__page_set_anon_rmap(page, vma, address, 1);
 	if (!mlocked_vma_newpage(vma, page)) {
 		SetPageActive(page);
@@ -1151,11 +1151,11 @@ void page_remove_rmap(struct page *page)
 		goto out;
 	if (anon) {
 		mem_cgroup_uncharge_page(page);
-		if (!PageTransHuge(page))
-			__dec_zone_page_state(page, NR_ANON_PAGES);
-		else
+		if (PageTransHuge(page))
 			__dec_zone_page_state(page,
 					      NR_ANON_TRANSPARENT_HUGEPAGES);
+		__mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
+				hpage_nr_pages(page));
 	} else {
 		__dec_zone_page_state(page, NR_FILE_MAPPED);
 		mem_cgroup_dec_page_stat(page, MEMCG_NR_FILE_MAPPED);
diff --git a/mm/shmem.c b/mm/shmem.c
index a87990cf9f94..eb6ff2f37de0 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1464,7 +1464,7 @@ shmem_write_end(struct file *file, struct address_space *mapping,
 	return copied;
 }
 
-static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc, read_actor_t actor)
+static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc)
 {
 	struct inode *inode = file_inode(filp);
 	struct address_space *mapping = inode->i_mapping;
@@ -1546,13 +1546,14 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_
 		 * Ok, we have the page, and it's up-to-date, so
 		 * now we can copy it to user space...
 		 *
-		 * The actor routine returns how many bytes were actually used..
+		 * The file_read_actor routine returns how many bytes were actually
+		 * used..
 		 * NOTE! This may not be the same as how much of a user buffer
 		 * we filled up (we may be padding etc), so we can only update
-		 * "pos" here (the actor routine has to update the user buffer
+		 * "pos" here (file_read_actor has to update the user buffer
 		 * pointers and the remaining count).
 		 */
-		ret = actor(desc, page, offset, nr);
+		ret = file_read_actor(desc, page, offset, nr);
 		offset += ret;
 		index += offset >> PAGE_CACHE_SHIFT;
 		offset &= ~PAGE_CACHE_MASK;
@@ -1590,7 +1591,7 @@ static ssize_t shmem_file_aio_read(struct kiocb *iocb,
 		if (desc.count == 0)
 			continue;
 		desc.error = 0;
-		do_shmem_file_read(filp, ppos, &desc, file_read_actor);
+		do_shmem_file_read(filp, ppos, &desc);
 		retval += desc.written;
 		if (desc.error) {
 			retval = retval ?: desc.error;
diff --git a/mm/swap.c b/mm/swap.c
index 4a1d0d2c52fa..62b78a6e224f 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -512,12 +512,7 @@ EXPORT_SYMBOL(__lru_cache_add);
  */
 void lru_cache_add(struct page *page)
 {
-	if (PageActive(page)) {
-		VM_BUG_ON(PageUnevictable(page));
-	} else if (PageUnevictable(page)) {
-		VM_BUG_ON(PageActive(page));
-	}
-
+	VM_BUG_ON(PageActive(page) && PageUnevictable(page));
 	VM_BUG_ON(PageLRU(page));
 	__lru_cache_add(page);
 }
@@ -539,6 +534,7 @@ void add_page_to_unevictable_list(struct page *page)
 
 	spin_lock_irq(&zone->lru_lock);
 	lruvec = mem_cgroup_page_lruvec(page, zone);
+	ClearPageActive(page);
 	SetPageUnevictable(page);
 	SetPageLRU(page);
 	add_page_to_lru_list(page, lruvec, LRU_UNEVICTABLE);
@@ -774,8 +770,6 @@ EXPORT_SYMBOL(__pagevec_release);
 void lru_add_page_tail(struct page *page, struct page *page_tail,
 		       struct lruvec *lruvec, struct list_head *list)
 {
-	int uninitialized_var(active);
-	enum lru_list lru;
 	const int file = 0;
 
 	VM_BUG_ON(!PageHead(page));
@@ -787,20 +781,6 @@ void lru_add_page_tail(struct page *page, struct page *page_tail,
 	if (!list)
 		SetPageLRU(page_tail);
 
-	if (page_evictable(page_tail)) {
-		if (PageActive(page)) {
-			SetPageActive(page_tail);
-			active = 1;
-			lru = LRU_ACTIVE_ANON;
-		} else {
-			active = 0;
-			lru = LRU_INACTIVE_ANON;
-		}
-	} else {
-		SetPageUnevictable(page_tail);
-		lru = LRU_UNEVICTABLE;
-	}
-
 	if (likely(PageLRU(page)))
 		list_add_tail(&page_tail->lru, &page->lru);
 	else if (list) {
@@ -816,13 +796,13 @@ void lru_add_page_tail(struct page *page, struct page *page_tail,
 		 * Use the standard add function to put page_tail on the list,
 		 * but then correct its position so they all end up in order.
 		 */
-		add_page_to_lru_list(page_tail, lruvec, lru);
+		add_page_to_lru_list(page_tail, lruvec, page_lru(page_tail));
 		list_head = page_tail->lru.prev;
 		list_move_tail(&page_tail->lru, list_head);
 	}
 
 	if (!PageUnevictable(page))
-		update_page_reclaim_stat(lruvec, file, active);
+		update_page_reclaim_stat(lruvec, file, PageActive(page_tail));
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
@@ -833,7 +813,6 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec,
 	int active = PageActive(page);
 	enum lru_list lru = page_lru(page);
 
-	VM_BUG_ON(PageUnevictable(page));
 	VM_BUG_ON(PageLRU(page));
 
 	SetPageLRU(page);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index f24ab0dff554..9a22fc65ffa4 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -63,6 +63,8 @@ unsigned long total_swapcache_pages(void)
 	return ret;
 }
 
+static atomic_t swapin_readahead_hits = ATOMIC_INIT(4);
+
 void show_swap_cache_info(void)
 {
 	printk("%lu pages in swap cache\n", total_swapcache_pages());
@@ -286,8 +288,11 @@ struct page * lookup_swap_cache(swp_entry_t entry)
 
 	page = find_get_page(swap_address_space(entry), entry.val);
 
-	if (page)
+	if (page) {
 		INC_CACHE_INFO(find_success);
+		if (TestClearPageReadahead(page))
+			atomic_inc(&swapin_readahead_hits);
+	}
 
 	INC_CACHE_INFO(find_total);
 	return page;
@@ -389,6 +394,50 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 	return found_page;
 }
 
+static unsigned long swapin_nr_pages(unsigned long offset)
+{
+	static unsigned long prev_offset;
+	unsigned int pages, max_pages, last_ra;
+	static atomic_t last_readahead_pages;
+
+	max_pages = 1 << ACCESS_ONCE(page_cluster);
+	if (max_pages <= 1)
+		return 1;
+
+	/*
+	 * This heuristic has been found to work well on both sequential and
+	 * random loads, swapping to hard disk or to SSD: please don't ask
+	 * what the "+ 2" means, it just happens to work well, that's all.
+	 */
+	pages = atomic_xchg(&swapin_readahead_hits, 0) + 2;
+	if (pages == 2) {
+		/*
+		 * We can have no readahead hits to judge by: but must not get
+		 * stuck here forever, so check for an adjacent offset instead
+		 * (and don't even bother to check whether swap type is same).
+		 */
+		if (offset != prev_offset + 1 && offset != prev_offset - 1)
+			pages = 1;
+		prev_offset = offset;
+	} else {
+		unsigned int roundup = 4;
+		while (roundup < pages)
+			roundup <<= 1;
+		pages = roundup;
+	}
+
+	if (pages > max_pages)
+		pages = max_pages;
+
+	/* Don't shrink readahead too fast */
+	last_ra = atomic_read(&last_readahead_pages) / 2;
+	if (pages < last_ra)
+		pages = last_ra;
+	atomic_set(&last_readahead_pages, pages);
+
+	return pages;
+}
+
 /**
  * swapin_readahead - swap in pages in hope we need them soon
  * @entry: swap entry of this memory
@@ -412,11 +461,16 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
 			struct vm_area_struct *vma, unsigned long addr)
 {
 	struct page *page;
-	unsigned long offset = swp_offset(entry);
+	unsigned long entry_offset = swp_offset(entry);
+	unsigned long offset = entry_offset;
 	unsigned long start_offset, end_offset;
-	unsigned long mask = (1UL << page_cluster) - 1;
+	unsigned long mask;
 	struct blk_plug plug;
 
+	mask = swapin_nr_pages(offset) - 1;
+	if (!mask)
+		goto skip;
+
 	/* Read a page_cluster sized and aligned cluster around offset. */
 	start_offset = offset & ~mask;
 	end_offset = offset | mask;
@@ -430,10 +484,13 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
 						gfp_mask, vma, addr);
 		if (!page)
 			continue;
+		if (offset != entry_offset)
+			SetPageReadahead(page);
 		page_cache_release(page);
 	}
 	blk_finish_plug(&plug);
 
 	lru_add_drain();	/* Push any new pages onto the LRU now */
+skip:
 	return read_swap_cache_async(entry, gfp_mask, vma, addr);
 }
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 36af6eeaa67e..4383db8ba8d0 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -527,16 +527,16 @@ static struct swap_info_struct *swap_info_get(swp_entry_t entry)
 	return p;
 
 bad_free:
-	printk(KERN_ERR "swap_free: %s%08lx\n", Unused_offset, entry.val);
+	pr_err("swap_free: %s%08lx\n", Unused_offset, entry.val);
 	goto out;
 bad_offset:
-	printk(KERN_ERR "swap_free: %s%08lx\n", Bad_offset, entry.val);
+	pr_err("swap_free: %s%08lx\n", Bad_offset, entry.val);
 	goto out;
 bad_device:
-	printk(KERN_ERR "swap_free: %s%08lx\n", Unused_file, entry.val);
+	pr_err("swap_free: %s%08lx\n", Unused_file, entry.val);
 	goto out;
 bad_nofile:
-	printk(KERN_ERR "swap_free: %s%08lx\n", Bad_file, entry.val);
+	pr_err("swap_free: %s%08lx\n", Bad_file, entry.val);
 out:
 	return NULL;
 }
@@ -1911,9 +1911,10 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
 	int i;
 	unsigned long maxpages;
 	unsigned long swapfilepages;
+	unsigned long last_page;
 
 	if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) {
-		printk(KERN_ERR "Unable to find swap-space signature\n");
+		pr_err("Unable to find swap-space signature\n");
 		return 0;
 	}
 
@@ -1927,9 +1928,8 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
 	}
 	/* Check the swap header's sub-version */
 	if (swap_header->info.version != 1) {
-		printk(KERN_WARNING
-		       "Unable to handle swap header version %d\n",
-		       swap_header->info.version);
+		pr_warn("Unable to handle swap header version %d\n",
+			swap_header->info.version);
 		return 0;
 	}
 
@@ -1953,8 +1953,14 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
 	 */
 	maxpages = swp_offset(pte_to_swp_entry(
 			swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
-	if (maxpages > swap_header->info.last_page) {
-		maxpages = swap_header->info.last_page + 1;
+	last_page = swap_header->info.last_page;
+	if (last_page > maxpages) {
+		pr_warn("Truncating oversized swap area, only using %luk out of %luk\n",
+			maxpages << (PAGE_SHIFT - 10),
+			last_page << (PAGE_SHIFT - 10));
+	}
+	if (maxpages > last_page) {
+		maxpages = last_page + 1;
 		/* p->max is an unsigned int: don't overflow it */
 		if ((unsigned int)maxpages == 0)
 			maxpages = UINT_MAX;
@@ -1965,8 +1971,7 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
 		return 0;
 	swapfilepages = i_size_read(inode) >> PAGE_SHIFT;
 	if (swapfilepages && maxpages > swapfilepages) {
-		printk(KERN_WARNING
-		       "Swap area shorter than signature indicates\n");
+		pr_warn("Swap area shorter than signature indicates\n");
 		return 0;
 	}
 	if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode))
@@ -2009,7 +2014,7 @@ static int setup_swap_map_and_extents(struct swap_info_struct *p,
 		nr_good_pages = p->pages;
 	}
 	if (!nr_good_pages) {
-		printk(KERN_WARNING "Empty swap-file\n");
+		pr_warn("Empty swap-file\n");
 		return -EINVAL;
 	}
 
@@ -2163,8 +2168,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 			if (p->flags & SWP_AREA_DISCARD) {
 				int err = discard_swap(p);
 				if (unlikely(err))
-					printk(KERN_ERR
-					       "swapon: discard_swap(%p): %d\n",
+					pr_err("swapon: discard_swap(%p): %d\n",
 						p, err);
 			}
 		}
@@ -2177,7 +2181,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 		  (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT;
 	enable_swap_info(p, prio, swap_map, frontswap_map);
 
-	printk(KERN_INFO "Adding %uk swap on %s.  "
+	pr_info("Adding %uk swap on %s.  "
 			"Priority:%d extents:%d across:%lluk %s%s%s%s%s\n",
 		p->pages<<(PAGE_SHIFT-10), name->name, p->prio,
 		nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10),
@@ -2311,7 +2315,7 @@ out:
 	return err;
 
 bad_file:
-	printk(KERN_ERR "swap_dup: %s%08lx\n", Bad_file, entry.val);
+	pr_err("swap_dup: %s%08lx\n", Bad_file, entry.val);
 	goto out;
 }
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 20c2ef4458fa..00382c53f582 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -817,6 +817,11 @@ const char * const vmstat_text[] = {
 	"thp_zero_page_alloc",
 	"thp_zero_page_alloc_failed",
 #endif
+	"nr_tlb_remote_flush",
+	"nr_tlb_remote_flush_received",
+	"nr_tlb_local_flush_all",
+	"nr_tlb_local_flush_one",
+	"nr_tlb_local_flush_one_kernel",
 
 #endif /* CONFIG_VM_EVENTS_COUNTERS */
 };
diff --git a/mm/zbud.c b/mm/zbud.c
index 9bb4710e3589..ad1e781284fd 100644
--- a/mm/zbud.c
+++ b/mm/zbud.c
@@ -257,7 +257,7 @@ int zbud_alloc(struct zbud_pool *pool, int size, gfp_t gfp,
 
 	if (size <= 0 || gfp & __GFP_HIGHMEM)
 		return -EINVAL;
-	if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED)
+	if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE)
 		return -ENOSPC;
 	chunks = size_to_chunks(size);
 	spin_lock(&pool->lock);
diff --git a/mm/zswap.c b/mm/zswap.c
index deda2b671e12..efed4c8b7f5b 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -409,7 +409,7 @@ static int zswap_get_swap_cache_page(swp_entry_t entry,
 				struct page **retpage)
 {
 	struct page *found_page, *new_page = NULL;
-	struct address_space *swapper_space = &swapper_spaces[swp_type(entry)];
+	struct address_space *swapper_space = swap_address_space(entry);
 	int err;
 
 	*retpage = NULL;
diff --git a/net/socket.c b/net/socket.c
index 829b460acb87..c64d03f1772c 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -3072,12 +3072,12 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
 
 	uifmap32 = &uifr32->ifr_ifru.ifru_map;
 	err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
-	err |= __get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
-	err |= __get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
-	err |= __get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
-	err |= __get_user(ifr.ifr_map.irq, &uifmap32->irq);
-	err |= __get_user(ifr.ifr_map.dma, &uifmap32->dma);
-	err |= __get_user(ifr.ifr_map.port, &uifmap32->port);
+	err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
+	err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
+	err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
+	err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
+	err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
+	err |= get_user(ifr.ifr_map.port, &uifmap32->port);
 	if (err)
 		return -EFAULT;
 
@@ -3088,12 +3088,12 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
 
 	if (cmd == SIOCGIFMAP && !err) {
 		err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
-		err |= __put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
-		err |= __put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
-		err |= __put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
-		err |= __put_user(ifr.ifr_map.irq, &uifmap32->irq);
-		err |= __put_user(ifr.ifr_map.dma, &uifmap32->dma);
-		err |= __put_user(ifr.ifr_map.port, &uifmap32->port);
+		err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
+		err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
+		err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
+		err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
+		err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
+		err |= put_user(ifr.ifr_map.port, &uifmap32->port);
 		if (err)
 			err = -EFAULT;
 	}
@@ -3167,25 +3167,25 @@ static int routing_ioctl(struct net *net, struct socket *sock,
 		struct in6_rtmsg32 __user *ur6 = argp;
 		ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
 			3 * sizeof(struct in6_addr));
-		ret |= __get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
-		ret |= __get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
-		ret |= __get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
-		ret |= __get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
-		ret |= __get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
-		ret |= __get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
-		ret |= __get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
+		ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
+		ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
+		ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
+		ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
+		ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
+		ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
+		ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
 
 		r = (void *) &r6;
 	} else { /* ipv4 */
 		struct rtentry32 __user *ur4 = argp;
 		ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
 					3 * sizeof(struct sockaddr));
-		ret |= __get_user(r4.rt_flags, &(ur4->rt_flags));
-		ret |= __get_user(r4.rt_metric, &(ur4->rt_metric));
-		ret |= __get_user(r4.rt_mtu, &(ur4->rt_mtu));
-		ret |= __get_user(r4.rt_window, &(ur4->rt_window));
-		ret |= __get_user(r4.rt_irtt, &(ur4->rt_irtt));
-		ret |= __get_user(rtdev, &(ur4->rt_dev));
+		ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
+		ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
+		ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
+		ret |= get_user(r4.rt_window, &(ur4->rt_window));
+		ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
+		ret |= get_user(rtdev, &(ur4->rt_dev));
 		if (rtdev) {
 			ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
 			r4.rt_dev = (char __user __force *)devname;
author	Stephen Rothwell <sfr@canb.auug.org.au>	2013-07-19 13:01:49 +1000
committer	Stephen Rothwell <sfr@canb.auug.org.au>	2013-07-19 13:01:49 +1000
commit	a25f6f2e88b6f2bb492f22b8b91028604d51bfb2 (patch)
tree	2b6744bfab0871da7942bb9864458d989e89df1e
parent	d03792f9db9b892f494d3aa19d767ddf0365d1ff (diff)
parent	1f02e5b30607cbef3c8b4d1376cba3689c399288 (diff)