17 files changed, 3627 insertions, 326 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index 54aaf4feaf6c..8ade0a7a91e0 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -2,6 +2,9 @@
 # Library configuration
 #
 
+config BINARY_PRINTF
+	def_bool n
+
 menu "Library routines"
 
 config BITREVERSE
@@ -98,6 +101,20 @@ config LZO_DECOMPRESS
 	tristate
 
 #
+# These all provide a common interface (hence the apparent duplication with
+# ZLIB_INFLATE; DECOMPRESS_GZIP is just a wrapper.)
+#
+config DECOMPRESS_GZIP
+	select ZLIB_INFLATE
+	tristate
+
+config DECOMPRESS_BZIP2
+	tristate
+
+config DECOMPRESS_LZMA
+	tristate
+
+#
 # Generic allocator support is selected if needed
 #
 config GENERIC_ALLOCATOR
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 28565111fcb3..ca22df88d1fc 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -186,6 +186,44 @@ config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
 	default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
 	default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
 
+config DETECT_HUNG_TASK
+	bool "Detect Hung Tasks"
+	depends on DEBUG_KERNEL
+	default y
+	help
+	  Say Y here to enable the kernel to detect "hung tasks",
+	  which are bugs that cause the task to be stuck in
+	  uninterruptible "D" state indefinitiley.
+
+	  When a hung task is detected, the kernel will print the
+	  current stack trace (which you should report), but the
+	  task will stay in uninterruptible state. If lockdep is
+	  enabled then all held locks will also be reported. This
+	  feature has negligible overhead.
+
+config BOOTPARAM_HUNG_TASK_PANIC
+	bool "Panic (Reboot) On Hung Tasks"
+	depends on DETECT_HUNG_TASK
+	help
+	  Say Y here to enable the kernel to panic on "hung tasks",
+	  which are bugs that cause the kernel to leave a task stuck
+	  in uninterruptible "D" state.
+
+	  The panic can be used in combination with panic_timeout,
+	  to cause the system to reboot automatically after a
+	  hung task has been detected. This feature is useful for
+	  high-availability systems that have uptime guarantees and
+	  where a hung tasks must be resolved ASAP.
+
+	  Say N if unsure.
+
+config BOOTPARAM_HUNG_TASK_PANIC_VALUE
+	int
+	depends on DETECT_HUNG_TASK
+	range 0 1
+	default 0 if !BOOTPARAM_HUNG_TASK_PANIC
+	default 1 if BOOTPARAM_HUNG_TASK_PANIC
+
 config SCHED_DEBUG
 	bool "Collect scheduler debugging info"
 	depends on DEBUG_KERNEL && PROC_FS
@@ -422,7 +460,7 @@ config LOCKDEP
 	bool
 	depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
 	select STACKTRACE
-	select FRAME_POINTER if !X86 && !MIPS && !PPC
+	select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND
 	select KALLSYMS
 	select KALLSYMS_ALL
 
@@ -816,6 +854,7 @@ config SYSCTL_SYSCALL_CHECK
 	  to properly maintain and use. This enables checks that help
 	  you to keep things correct.
 
+source mm/Kconfig.debug
 source kernel/trace/Kconfig
 
 config PROVIDE_OHCI1394_DMA_INIT
@@ -932,6 +971,17 @@ config DYNAMIC_DEBUG
 
 	  See Documentation/dynamic-debug-howto.txt for additional information.
 
+config DMA_API_DEBUG
+	bool "Enable debugging of DMA-API usage"
+	depends on HAVE_DMA_API_DEBUG
+	help
+	  Enable this option to debug the use of the DMA API by device drivers.
+	  With this option you will be able to detect common bugs in device
+	  drivers like double-freeing of DMA mappings or freeing mappings that
+	  were never allocated.
+	  This option causes a performance degredation.  Use only if you want
+	  to debug device drivers. If unsure, say N.
+
 source "samples/Kconfig"
 
 source "lib/Kconfig.kgdb"
diff --git a/lib/Makefile b/lib/Makefile
index 8bdc647e6d62..d6edd6753f40 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -12,7 +12,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 idr.o int_sqrt.o extable.o prio_tree.o \
 	 sha1.o irq_regs.o reciprocal_div.o argv_split.o \
 	 proportions.o prio_heap.o ratelimit.o show_mem.o \
-	 is_single_threaded.o plist.o
+	 is_single_threaded.o plist.o decompress.o
 
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
@@ -65,6 +65,10 @@ obj-$(CONFIG_REED_SOLOMON) += reed_solomon/
 obj-$(CONFIG_LZO_COMPRESS) += lzo/
 obj-$(CONFIG_LZO_DECOMPRESS) += lzo/
 
+lib-$(CONFIG_DECOMPRESS_GZIP) += decompress_inflate.o
+lib-$(CONFIG_DECOMPRESS_BZIP2) += decompress_bunzip2.o
+lib-$(CONFIG_DECOMPRESS_LZMA) += decompress_unlzma.o
+
 obj-$(CONFIG_TEXTSEARCH) += textsearch.o
 obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o
 obj-$(CONFIG_TEXTSEARCH_BM) += ts_bm.o
@@ -86,6 +90,8 @@ obj-$(CONFIG_DYNAMIC_DEBUG) += dynamic_debug.o
 
 obj-$(CONFIG_NLATTR) += nlattr.o
 
+obj-$(CONFIG_DMA_API_DEBUG) += dma-debug.o
+
 hostprogs-y	:= gen_crc32table
 clean-files	:= crc32table.h
 
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 5d99be1fd988..2755a3bd16a1 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -30,7 +30,7 @@ struct debug_bucket {
 
 static struct debug_bucket	obj_hash[ODEBUG_HASH_SIZE];
 
-static struct debug_obj		obj_static_pool[ODEBUG_POOL_SIZE];
+static struct debug_obj		obj_static_pool[ODEBUG_POOL_SIZE] __initdata;
 
 static DEFINE_SPINLOCK(pool_lock);
 
@@ -50,12 +50,23 @@ static int			debug_objects_enabled __read_mostly
 
 static struct debug_obj_descr	*descr_test  __read_mostly;
 
+static void free_obj_work(struct work_struct *work);
+static DECLARE_WORK(debug_obj_work, free_obj_work);
+
 static int __init enable_object_debug(char *str)
 {
 	debug_objects_enabled = 1;
 	return 0;
 }
+
+static int __init disable_object_debug(char *str)
+{
+	debug_objects_enabled = 0;
+	return 0;
+}
+
 early_param("debug_objects", enable_object_debug);
+early_param("no_debug_objects", disable_object_debug);
 
 static const char *obj_states[ODEBUG_STATE_MAX] = {
 	[ODEBUG_STATE_NONE]		= "none",
@@ -146,25 +157,51 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr)
 }
 
 /*
- * Put the object back into the pool or give it back to kmem_cache:
+ * workqueue function to free objects.
  */
-static void free_object(struct debug_obj *obj)
+static void free_obj_work(struct work_struct *work)
 {
-	unsigned long idx = (unsigned long)(obj - obj_static_pool);
+	struct debug_obj *obj;
 	unsigned long flags;
 
-	if (obj_pool_free < ODEBUG_POOL_SIZE || idx < ODEBUG_POOL_SIZE) {
-		spin_lock_irqsave(&pool_lock, flags);
-		hlist_add_head(&obj->node, &obj_pool);
-		obj_pool_free++;
-		obj_pool_used--;
-		spin_unlock_irqrestore(&pool_lock, flags);
-	} else {
-		spin_lock_irqsave(&pool_lock, flags);
-		obj_pool_used--;
+	spin_lock_irqsave(&pool_lock, flags);
+	while (obj_pool_free > ODEBUG_POOL_SIZE) {
+		obj = hlist_entry(obj_pool.first, typeof(*obj), node);
+		hlist_del(&obj->node);
+		obj_pool_free--;
+		/*
+		 * We release pool_lock across kmem_cache_free() to
+		 * avoid contention on pool_lock.
+		 */
 		spin_unlock_irqrestore(&pool_lock, flags);
 		kmem_cache_free(obj_cache, obj);
+		spin_lock_irqsave(&pool_lock, flags);
 	}
+	spin_unlock_irqrestore(&pool_lock, flags);
+}
+
+/*
+ * Put the object back into the pool and schedule work to free objects
+ * if necessary.
+ */
+static void free_object(struct debug_obj *obj)
+{
+	unsigned long flags;
+	int sched = 0;
+
+	spin_lock_irqsave(&pool_lock, flags);
+	/*
+	 * schedule work when the pool is filled and the cache is
+	 * initialized:
+	 */
+	if (obj_pool_free > ODEBUG_POOL_SIZE && obj_cache)
+		sched = !work_pending(&debug_obj_work);
+	hlist_add_head(&obj->node, &obj_pool);
+	obj_pool_free++;
+	obj_pool_used--;
+	spin_unlock_irqrestore(&pool_lock, flags);
+	if (sched)
+		schedule_work(&debug_obj_work);
 }
 
 /*
@@ -876,6 +913,63 @@ void __init debug_objects_early_init(void)
 }
 
 /*
+ * Convert the statically allocated objects to dynamic ones:
+ */
+static int debug_objects_replace_static_objects(void)
+{
+	struct debug_bucket *db = obj_hash;
+	struct hlist_node *node, *tmp;
+	struct debug_obj *obj, *new;
+	HLIST_HEAD(objects);
+	int i, cnt = 0;
+
+	for (i = 0; i < ODEBUG_POOL_SIZE; i++) {
+		obj = kmem_cache_zalloc(obj_cache, GFP_KERNEL);
+		if (!obj)
+			goto free;
+		hlist_add_head(&obj->node, &objects);
+	}
+
+	/*
+	 * When debug_objects_mem_init() is called we know that only
+	 * one CPU is up, so disabling interrupts is enough
+	 * protection. This avoids the lockdep hell of lock ordering.
+	 */
+	local_irq_disable();
+
+	/* Remove the statically allocated objects from the pool */
+	hlist_for_each_entry_safe(obj, node, tmp, &obj_pool, node)
+		hlist_del(&obj->node);
+	/* Move the allocated objects to the pool */
+	hlist_move_list(&objects, &obj_pool);
+
+	/* Replace the active object references */
+	for (i = 0; i < ODEBUG_HASH_SIZE; i++, db++) {
+		hlist_move_list(&db->list, &objects);
+
+		hlist_for_each_entry(obj, node, &objects, node) {
+			new = hlist_entry(obj_pool.first, typeof(*obj), node);
+			hlist_del(&new->node);
+			/* copy object data */
+			*new = *obj;
+			hlist_add_head(&new->node, &db->list);
+			cnt++;
+		}
+	}
+
+	printk(KERN_DEBUG "ODEBUG: %d of %d active objects replaced\n", cnt,
+	       obj_pool_used);
+	local_irq_enable();
+	return 0;
+free:
+	hlist_for_each_entry_safe(obj, node, tmp, &objects, node) {
+		hlist_del(&obj->node);
+		kmem_cache_free(obj_cache, obj);
+	}
+	return -ENOMEM;
+}
+
+/*
  * Called after the kmem_caches are functional to setup a dedicated
  * cache pool, which has the SLAB_DEBUG_OBJECTS flag set. This flag
  * prevents that the debug code is called on kmem_cache_free() for the
@@ -890,8 +984,11 @@ void __init debug_objects_mem_init(void)
 				      sizeof (struct debug_obj), 0,
 				      SLAB_DEBUG_OBJECTS, NULL);
 
-	if (!obj_cache)
+	if (!obj_cache || debug_objects_replace_static_objects()) {
 		debug_objects_enabled = 0;
-	else
+		if (obj_cache)
+			kmem_cache_destroy(obj_cache);
+		printk(KERN_WARNING "ODEBUG: out of memory.\n");
+	} else
 		debug_objects_selftest();
 }
diff --git a/lib/decompress.c b/lib/decompress.c
new file mode 100644
index 000000000000..d2842f571674
--- /dev/null
+++ b/lib/decompress.c
@@ -0,0 +1,54 @@
+/*
+ * decompress.c
+ *
+ * Detect the decompression method based on magic number
+ */
+
+#include <linux/decompress/generic.h>
+
+#include <linux/decompress/bunzip2.h>
+#include <linux/decompress/unlzma.h>
+#include <linux/decompress/inflate.h>
+
+#include <linux/types.h>
+#include <linux/string.h>
+
+#ifndef CONFIG_DECOMPRESS_GZIP
+# define gunzip NULL
+#endif
+#ifndef CONFIG_DECOMPRESS_BZIP2
+# define bunzip2 NULL
+#endif
+#ifndef CONFIG_DECOMPRESS_LZMA
+# define unlzma NULL
+#endif
+
+static const struct compress_format {
+	unsigned char magic[2];
+	const char *name;
+	decompress_fn decompressor;
+} compressed_formats[] = {
+	{ {037, 0213}, "gzip", gunzip },
+	{ {037, 0236}, "gzip", gunzip },
+	{ {0x42, 0x5a}, "bzip2", bunzip2 },
+	{ {0x5d, 0x00}, "lzma", unlzma },
+	{ {0, 0}, NULL, NULL }
+};
+
+decompress_fn decompress_method(const unsigned char *inbuf, int len,
+				const char **name)
+{
+	const struct compress_format *cf;
+
+	if (len < 2)
+		return NULL;	/* Need at least this much... */
+
+	for (cf = compressed_formats; cf->name; cf++) {
+		if (!memcmp(inbuf, cf->magic, 2))
+			break;
+
+	}
+	if (name)
+		*name = cf->name;
+	return cf->decompressor;
+}
diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c
new file mode 100644
index 000000000000..708e2a86d87b
--- /dev/null
+++ b/lib/decompress_bunzip2.c
@@ -0,0 +1,736 @@
+/* vi: set sw = 4 ts = 4: */
+/*	Small bzip2 deflate implementation, by Rob Landley (rob@landley.net).
+
+	Based on bzip2 decompression code by Julian R Seward (jseward@acm.org),
+	which also acknowledges contributions by Mike Burrows, David Wheeler,
+	Peter Fenwick, Alistair Moffat, Radford Neal, Ian H. Witten,
+	Robert Sedgewick, and Jon L. Bentley.
+
+	This code is licensed under the LGPLv2:
+		LGPL (http://www.gnu.org/copyleft/lgpl.html
+*/
+
+/*
+	Size and speed optimizations by Manuel Novoa III  (mjn3@codepoet.org).
+
+	More efficient reading of Huffman codes, a streamlined read_bunzip()
+	function, and various other tweaks.  In (limited) tests, approximately
+	20% faster than bzcat on x86 and about 10% faster on arm.
+
+	Note that about 2/3 of the time is spent in read_unzip() reversing
+	the Burrows-Wheeler transformation.  Much of that time is delay
+	resulting from cache misses.
+
+	I would ask that anyone benefiting from this work, especially those
+	using it in commercial products, consider making a donation to my local
+	non-profit hospice organization in the name of the woman I loved, who
+	passed away Feb. 12, 2003.
+
+		In memory of Toni W. Hagan
+
+		Hospice of Acadiana, Inc.
+		2600 Johnston St., Suite 200
+		Lafayette, LA 70503-3240
+
+		Phone (337) 232-1234 or 1-800-738-2226
+		Fax   (337) 232-1297
+
+		http://www.hospiceacadiana.com/
+
+	Manuel
+ */
+
+/*
+	Made it fit for running in Linux Kernel by Alain Knaff (alain@knaff.lu)
+*/
+
+
+#ifndef STATIC
+#include <linux/decompress/bunzip2.h>
+#endif /* !STATIC */
+
+#include <linux/decompress/mm.h>
+#include <linux/slab.h>
+
+#ifndef INT_MAX
+#define INT_MAX 0x7fffffff
+#endif
+
+/* Constants for Huffman coding */
+#define MAX_GROUPS		6
+#define GROUP_SIZE   		50	/* 64 would have been more efficient */
+#define MAX_HUFCODE_BITS 	20	/* Longest Huffman code allowed */
+#define MAX_SYMBOLS 		258	/* 256 literals + RUNA + RUNB */
+#define SYMBOL_RUNA		0
+#define SYMBOL_RUNB		1
+
+/* Status return values */
+#define RETVAL_OK			0
+#define RETVAL_LAST_BLOCK		(-1)
+#define RETVAL_NOT_BZIP_DATA		(-2)
+#define RETVAL_UNEXPECTED_INPUT_EOF	(-3)
+#define RETVAL_UNEXPECTED_OUTPUT_EOF	(-4)
+#define RETVAL_DATA_ERROR		(-5)
+#define RETVAL_OUT_OF_MEMORY		(-6)
+#define RETVAL_OBSOLETE_INPUT		(-7)
+
+/* Other housekeeping constants */
+#define BZIP2_IOBUF_SIZE		4096
+
+/* This is what we know about each Huffman coding group */
+struct group_data {
+	/* We have an extra slot at the end of limit[] for a sentinal value. */
+	int limit[MAX_HUFCODE_BITS+1];
+	int base[MAX_HUFCODE_BITS];
+	int permute[MAX_SYMBOLS];
+	int minLen, maxLen;
+};
+
+/* Structure holding all the housekeeping data, including IO buffers and
+   memory that persists between calls to bunzip */
+struct bunzip_data {
+	/* State for interrupting output loop */
+	int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent;
+	/* I/O tracking data (file handles, buffers, positions, etc.) */
+	int (*fill)(void*, unsigned int);
+	int inbufCount, inbufPos /*, outbufPos*/;
+	unsigned char *inbuf /*,*outbuf*/;
+	unsigned int inbufBitCount, inbufBits;
+	/* The CRC values stored in the block header and calculated from the
+	data */
+	unsigned int crc32Table[256], headerCRC, totalCRC, writeCRC;
+	/* Intermediate buffer and its size (in bytes) */
+	unsigned int *dbuf, dbufSize;
+	/* These things are a bit too big to go on the stack */
+	unsigned char selectors[32768];		/* nSelectors = 15 bits */
+	struct group_data groups[MAX_GROUPS];	/* Huffman coding tables */
+	int io_error;			/* non-zero if we have IO error */
+};
+
+
+/* Return the next nnn bits of input.  All reads from the compressed input
+   are done through this function.  All reads are big endian */
+static unsigned int INIT get_bits(struct bunzip_data *bd, char bits_wanted)
+{
+	unsigned int bits = 0;
+
+	/* If we need to get more data from the byte buffer, do so.
+	   (Loop getting one byte at a time to enforce endianness and avoid
+	   unaligned access.) */
+	while (bd->inbufBitCount < bits_wanted) {
+		/* If we need to read more data from file into byte buffer, do
+		   so */
+		if (bd->inbufPos == bd->inbufCount) {
+			if (bd->io_error)
+				return 0;
+			bd->inbufCount = bd->fill(bd->inbuf, BZIP2_IOBUF_SIZE);
+			if (bd->inbufCount <= 0) {
+				bd->io_error = RETVAL_UNEXPECTED_INPUT_EOF;
+				return 0;
+			}
+			bd->inbufPos = 0;
+		}
+		/* Avoid 32-bit overflow (dump bit buffer to top of output) */
+		if (bd->inbufBitCount >= 24) {
+			bits = bd->inbufBits&((1 << bd->inbufBitCount)-1);
+			bits_wanted -= bd->inbufBitCount;
+			bits <<= bits_wanted;
+			bd->inbufBitCount = 0;
+		}
+		/* Grab next 8 bits of input from buffer. */
+		bd->inbufBits = (bd->inbufBits << 8)|bd->inbuf[bd->inbufPos++];
+		bd->inbufBitCount += 8;
+	}
+	/* Calculate result */
+	bd->inbufBitCount -= bits_wanted;
+	bits |= (bd->inbufBits >> bd->inbufBitCount)&((1 << bits_wanted)-1);
+
+	return bits;
+}
+
+/* Unpacks the next block and sets up for the inverse burrows-wheeler step. */
+
+static int INIT get_next_block(struct bunzip_data *bd)
+{
+	struct group_data *hufGroup = NULL;
+	int *base = NULL;
+	int *limit = NULL;
+	int dbufCount, nextSym, dbufSize, groupCount, selector,
+		i, j, k, t, runPos, symCount, symTotal, nSelectors,
+		byteCount[256];
+	unsigned char uc, symToByte[256], mtfSymbol[256], *selectors;
+	unsigned int *dbuf, origPtr;
+
+	dbuf = bd->dbuf;
+	dbufSize = bd->dbufSize;
+	selectors = bd->selectors;
+
+	/* Read in header signature and CRC, then validate signature.
+	   (last block signature means CRC is for whole file, return now) */
+	i = get_bits(bd, 24);
+	j = get_bits(bd, 24);
+	bd->headerCRC = get_bits(bd, 32);
+	if ((i == 0x177245) && (j == 0x385090))
+		return RETVAL_LAST_BLOCK;
+	if ((i != 0x314159) || (j != 0x265359))
+		return RETVAL_NOT_BZIP_DATA;
+	/* We can add support for blockRandomised if anybody complains.
+	   There was some code for this in busybox 1.0.0-pre3, but nobody ever
+	   noticed that it didn't actually work. */
+	if (get_bits(bd, 1))
+		return RETVAL_OBSOLETE_INPUT;
+	origPtr = get_bits(bd, 24);
+	if (origPtr > dbufSize)
+		return RETVAL_DATA_ERROR;
+	/* mapping table: if some byte values are never used (encoding things
+	   like ascii text), the compression code removes the gaps to have fewer
+	   symbols to deal with, and writes a sparse bitfield indicating which
+	   values were present.  We make a translation table to convert the
+	   symbols back to the corresponding bytes. */
+	t = get_bits(bd, 16);
+	symTotal = 0;
+	for (i = 0; i < 16; i++) {
+		if (t&(1 << (15-i))) {
+			k = get_bits(bd, 16);
+			for (j = 0; j < 16; j++)
+				if (k&(1 << (15-j)))
+					symToByte[symTotal++] = (16*i)+j;
+		}
+	}
+	/* How many different Huffman coding groups does this block use? */
+	groupCount = get_bits(bd, 3);
+	if (groupCount < 2 || groupCount > MAX_GROUPS)
+		return RETVAL_DATA_ERROR;
+	/* nSelectors: Every GROUP_SIZE many symbols we select a new
+	   Huffman coding group.  Read in the group selector list,
+	   which is stored as MTF encoded bit runs.  (MTF = Move To
+	   Front, as each value is used it's moved to the start of the
+	   list.) */
+	nSelectors = get_bits(bd, 15);
+	if (!nSelectors)
+		return RETVAL_DATA_ERROR;
+	for (i = 0; i < groupCount; i++)
+		mtfSymbol[i] = i;
+	for (i = 0; i < nSelectors; i++) {
+		/* Get next value */
+		for (j = 0; get_bits(bd, 1); j++)
+			if (j >= groupCount)
+				return RETVAL_DATA_ERROR;
+		/* Decode MTF to get the next selector */
+		uc = mtfSymbol[j];
+		for (; j; j--)
+			mtfSymbol[j] = mtfSymbol[j-1];
+		mtfSymbol[0] = selectors[i] = uc;
+	}
+	/* Read the Huffman coding tables for each group, which code
+	   for symTotal literal symbols, plus two run symbols (RUNA,
+	   RUNB) */
+	symCount = symTotal+2;
+	for (j = 0; j < groupCount; j++) {
+		unsigned char length[MAX_SYMBOLS], temp[MAX_HUFCODE_BITS+1];
+		int	minLen,	maxLen, pp;
+		/* Read Huffman code lengths for each symbol.  They're
+		   stored in a way similar to mtf; record a starting
+		   value for the first symbol, and an offset from the
+		   previous value for everys symbol after that.
+		   (Subtracting 1 before the loop and then adding it
+		   back at the end is an optimization that makes the
+		   test inside the loop simpler: symbol length 0
+		   becomes negative, so an unsigned inequality catches
+		   it.) */
+		t = get_bits(bd, 5)-1;
+		for (i = 0; i < symCount; i++) {
+			for (;;) {
+				if (((unsigned)t) > (MAX_HUFCODE_BITS-1))
+					return RETVAL_DATA_ERROR;
+
+				/* If first bit is 0, stop.  Else
+				   second bit indicates whether to
+				   increment or decrement the value.
+				   Optimization: grab 2 bits and unget
+				   the second if the first was 0. */
+
+				k = get_bits(bd, 2);
+				if (k < 2) {
+					bd->inbufBitCount++;
+					break;
+				}
+				/* Add one if second bit 1, else
+				 * subtract 1.  Avoids if/else */
+				t += (((k+1)&2)-1);
+			}
+			/* Correct for the initial -1, to get the
+			 * final symbol length */
+			length[i] = t+1;
+		}
+		/* Find largest and smallest lengths in this group */
+		minLen = maxLen = length[0];
+
+		for (i = 1; i < symCount; i++) {
+			if (length[i] > maxLen)
+				maxLen = length[i];
+			else if (length[i] < minLen)
+				minLen = length[i];
+		}
+
+		/* Calculate permute[], base[], and limit[] tables from
+		 * length[].
+		 *
+		 * permute[] is the lookup table for converting
+		 * Huffman coded symbols into decoded symbols.  base[]
+		 * is the amount to subtract from the value of a
+		 * Huffman symbol of a given length when using
+		 * permute[].
+		 *
+		 * limit[] indicates the largest numerical value a
+		 * symbol with a given number of bits can have.  This
+		 * is how the Huffman codes can vary in length: each
+		 * code with a value > limit[length] needs another
+		 * bit.
+		 */
+		hufGroup = bd->groups+j;
+		hufGroup->minLen = minLen;
+		hufGroup->maxLen = maxLen;
+		/* Note that minLen can't be smaller than 1, so we
+		   adjust the base and limit array pointers so we're
+		   not always wasting the first entry.  We do this
+		   again when using them (during symbol decoding).*/
+		base = hufGroup->base-1;
+		limit = hufGroup->limit-1;
+		/* Calculate permute[].  Concurently, initialize
+		 * temp[] and limit[]. */
+		pp = 0;
+		for (i = minLen; i <= maxLen; i++) {
+			temp[i] = limit[i] = 0;
+			for (t = 0; t < symCount; t++)
+				if (length[t] == i)
+					hufGroup->permute[pp++] = t;
+		}
+		/* Count symbols coded for at each bit length */
+		for (i = 0; i < symCount; i++)
+			temp[length[i]]++;
+		/* Calculate limit[] (the largest symbol-coding value
+		 *at each bit length, which is (previous limit <<
+		 *1)+symbols at this level), and base[] (number of
+		 *symbols to ignore at each bit length, which is limit
+		 *minus the cumulative count of symbols coded for
+		 *already). */
+		pp = t = 0;
+		for (i = minLen; i < maxLen; i++) {
+			pp += temp[i];
+			/* We read the largest possible symbol size
+			   and then unget bits after determining how
+			   many we need, and those extra bits could be
+			   set to anything.  (They're noise from
+			   future symbols.)  At each level we're
+			   really only interested in the first few
+			   bits, so here we set all the trailing
+			   to-be-ignored bits to 1 so they don't
+			   affect the value > limit[length]
+			   comparison. */
+			limit[i] = (pp << (maxLen - i)) - 1;
+			pp <<= 1;
+			base[i+1] = pp-(t += temp[i]);
+		}
+		limit[maxLen+1] = INT_MAX; /* Sentinal value for
+					    * reading next sym. */
+		limit[maxLen] = pp+temp[maxLen]-1;
+		base[minLen] = 0;
+	}
+	/* We've finished reading and digesting the block header.  Now
+	   read this block's Huffman coded symbols from the file and
+	   undo the Huffman coding and run length encoding, saving the
+	   result into dbuf[dbufCount++] = uc */
+
+	/* Initialize symbol occurrence counters and symbol Move To
+	 * Front table */
+	for (i = 0; i < 256; i++) {
+		byteCount[i] = 0;
+		mtfSymbol[i] = (unsigned char)i;
+	}
+	/* Loop through compressed symbols. */
+	runPos = dbufCount = symCount = selector = 0;
+	for (;;) {
+		/* Determine which Huffman coding group to use. */
+		if (!(symCount--)) {
+			symCount = GROUP_SIZE-1;
+			if (selector >= nSelectors)
+				return RETVAL_DATA_ERROR;
+			hufGroup = bd->groups+selectors[selector++];
+			base = hufGroup->base-1;
+			limit = hufGroup->limit-1;
+		}
+		/* Read next Huffman-coded symbol. */
+		/* Note: It is far cheaper to read maxLen bits and
+		   back up than it is to read minLen bits and then an
+		   additional bit at a time, testing as we go.
+		   Because there is a trailing last block (with file
+		   CRC), there is no danger of the overread causing an
+		   unexpected EOF for a valid compressed file.  As a
+		   further optimization, we do the read inline
+		   (falling back to a call to get_bits if the buffer
+		   runs dry).  The following (up to got_huff_bits:) is
+		   equivalent to j = get_bits(bd, hufGroup->maxLen);
+		 */
+		while (bd->inbufBitCount < hufGroup->maxLen) {
+			if (bd->inbufPos == bd->inbufCount) {
+				j = get_bits(bd, hufGroup->maxLen);
+				goto got_huff_bits;
+			}
+			bd->inbufBits =
+				(bd->inbufBits << 8)|bd->inbuf[bd->inbufPos++];
+			bd->inbufBitCount += 8;
+		};
+		bd->inbufBitCount -= hufGroup->maxLen;
+		j = (bd->inbufBits >> bd->inbufBitCount)&
+			((1 << hufGroup->maxLen)-1);
+got_huff_bits:
+		/* Figure how how many bits are in next symbol and
+		 * unget extras */
+		i = hufGroup->minLen;
+		while (j > limit[i])
+			++i;
+		bd->inbufBitCount += (hufGroup->maxLen - i);
+		/* Huffman decode value to get nextSym (with bounds checking) */
+		if ((i > hufGroup->maxLen)
+			|| (((unsigned)(j = (j>>(hufGroup->maxLen-i))-base[i]))
+				>= MAX_SYMBOLS))
+			return RETVAL_DATA_ERROR;
+		nextSym = hufGroup->permute[j];
+		/* We have now decoded the symbol, which indicates
+		   either a new literal byte, or a repeated run of the
+		   most recent literal byte.  First, check if nextSym
+		   indicates a repeated run, and if so loop collecting
+		   how many times to repeat the last literal. */
+		if (((unsigned)nextSym) <= SYMBOL_RUNB) { /* RUNA or RUNB */
+			/* If this is the start of a new run, zero out
+			 * counter */
+			if (!runPos) {
+				runPos = 1;
+				t = 0;
+			}
+			/* Neat trick that saves 1 symbol: instead of
+			   or-ing 0 or 1 at each bit position, add 1
+			   or 2 instead.  For example, 1011 is 1 << 0
+			   + 1 << 1 + 2 << 2.  1010 is 2 << 0 + 2 << 1
+			   + 1 << 2.  You can make any bit pattern
+			   that way using 1 less symbol than the basic
+			   or 0/1 method (except all bits 0, which
+			   would use no symbols, but a run of length 0
+			   doesn't mean anything in this context).
+			   Thus space is saved. */
+			t += (runPos << nextSym);
+			/* +runPos if RUNA; +2*runPos if RUNB */
+
+			runPos <<= 1;
+			continue;
+		}
+		/* When we hit the first non-run symbol after a run,
+		   we now know how many times to repeat the last
+		   literal, so append that many copies to our buffer
+		   of decoded symbols (dbuf) now.  (The last literal
+		   used is the one at the head of the mtfSymbol
+		   array.) */
+		if (runPos) {
+			runPos = 0;
+			if (dbufCount+t >= dbufSize)
+				return RETVAL_DATA_ERROR;
+
+			uc = symToByte[mtfSymbol[0]];
+			byteCount[uc] += t;
+			while (t--)
+				dbuf[dbufCount++] = uc;
+		}
+		/* Is this the terminating symbol? */
+		if (nextSym > symTotal)
+			break;
+		/* At this point, nextSym indicates a new literal
+		   character.  Subtract one to get the position in the
+		   MTF array at which this literal is currently to be
+		   found.  (Note that the result can't be -1 or 0,
+		   because 0 and 1 are RUNA and RUNB.  But another
+		   instance of the first symbol in the mtf array,
+		   position 0, would have been handled as part of a
+		   run above.  Therefore 1 unused mtf position minus 2
+		   non-literal nextSym values equals -1.) */
+		if (dbufCount >= dbufSize)
+			return RETVAL_DATA_ERROR;
+		i = nextSym - 1;
+		uc = mtfSymbol[i];
+		/* Adjust the MTF array.  Since we typically expect to
+		 *move only a small number of symbols, and are bound
+		 *by 256 in any case, using memmove here would
+		 *typically be bigger and slower due to function call
+		 *overhead and other assorted setup costs. */
+		do {
+			mtfSymbol[i] = mtfSymbol[i-1];
+		} while (--i);
+		mtfSymbol[0] = uc;
+		uc = symToByte[uc];
+		/* We have our literal byte.  Save it into dbuf. */
+		byteCount[uc]++;
+		dbuf[dbufCount++] = (unsigned int)uc;
+	}
+	/* At this point, we've read all the Huffman-coded symbols
+	   (and repeated runs) for this block from the input stream,
+	   and decoded them into the intermediate buffer.  There are
+	   dbufCount many decoded bytes in dbuf[].  Now undo the
+	   Burrows-Wheeler transform on dbuf.  See
+	   http://dogma.net/markn/articles/bwt/bwt.htm
+	 */
+	/* Turn byteCount into cumulative occurrence counts of 0 to n-1. */
+	j = 0;
+	for (i = 0; i < 256; i++) {
+		k = j+byteCount[i];
+		byteCount[i] = j;
+		j = k;
+	}
+	/* Figure out what order dbuf would be in if we sorted it. */
+	for (i = 0; i < dbufCount; i++) {
+		uc = (unsigned char)(dbuf[i] & 0xff);
+		dbuf[byteCount[uc]] |= (i << 8);
+		byteCount[uc]++;
+	}
+	/* Decode first byte by hand to initialize "previous" byte.
+	   Note that it doesn't get output, and if the first three
+	   characters are identical it doesn't qualify as a run (hence
+	   writeRunCountdown = 5). */
+	if (dbufCount) {
+		if (origPtr >= dbufCount)
+			return RETVAL_DATA_ERROR;
+		bd->writePos = dbuf[origPtr];
+		bd->writeCurrent = (unsigned char)(bd->writePos&0xff);
+		bd->writePos >>= 8;
+		bd->writeRunCountdown = 5;
+	}
+	bd->writeCount = dbufCount;
+
+	return RETVAL_OK;
+}
+
+/* Undo burrows-wheeler transform on intermediate buffer to produce output.
+   If start_bunzip was initialized with out_fd =-1, then up to len bytes of
+   data are written to outbuf.  Return value is number of bytes written or
+   error (all errors are negative numbers).  If out_fd!=-1, outbuf and len
+   are ignored, data is written to out_fd and return is RETVAL_OK or error.
+*/
+
+static int INIT read_bunzip(struct bunzip_data *bd, char *outbuf, int len)
+{
+	const unsigned int *dbuf;
+	int pos, xcurrent, previous, gotcount;
+
+	/* If last read was short due to end of file, return last block now */
+	if (bd->writeCount < 0)
+		return bd->writeCount;
+
+	gotcount = 0;
+	dbuf = bd->dbuf;
+	pos = bd->writePos;
+	xcurrent = bd->writeCurrent;
+
+	/* We will always have pending decoded data to write into the output
+	   buffer unless this is the very first call (in which case we haven't
+	   Huffman-decoded a block into the intermediate buffer yet). */
+
+	if (bd->writeCopies) {
+		/* Inside the loop, writeCopies means extra copies (beyond 1) */
+		--bd->writeCopies;
+		/* Loop outputting bytes */
+		for (;;) {
+			/* If the output buffer is full, snapshot
+			 * state and return */
+			if (gotcount >= len) {
+				bd->writePos = pos;
+				bd->writeCurrent = xcurrent;
+				bd->writeCopies++;
+				return len;
+			}
+			/* Write next byte into output buffer, updating CRC */
+			outbuf[gotcount++] = xcurrent;
+			bd->writeCRC = (((bd->writeCRC) << 8)
+				^bd->crc32Table[((bd->writeCRC) >> 24)
+				^xcurrent]);
+			/* Loop now if we're outputting multiple
+			 * copies of this byte */
+			if (bd->writeCopies) {
+				--bd->writeCopies;
+				continue;
+			}
+decode_next_byte:
+			if (!bd->writeCount--)
+				break;
+			/* Follow sequence vector to undo
+			 * Burrows-Wheeler transform */
+			previous = xcurrent;
+			pos = dbuf[pos];
+			xcurrent = pos&0xff;
+			pos >>= 8;
+			/* After 3 consecutive copies of the same
+			   byte, the 4th is a repeat count.  We count
+			   down from 4 instead *of counting up because
+			   testing for non-zero is faster */
+			if (--bd->writeRunCountdown) {
+				if (xcurrent != previous)
+					bd->writeRunCountdown = 4;
+			} else {
+				/* We have a repeated run, this byte
+				 * indicates the count */
+				bd->writeCopies = xcurrent;
+				xcurrent = previous;
+				bd->writeRunCountdown = 5;
+				/* Sometimes there are just 3 bytes
+				 * (run length 0) */
+				if (!bd->writeCopies)
+					goto decode_next_byte;
+				/* Subtract the 1 copy we'd output
+				 * anyway to get extras */
+				--bd->writeCopies;
+			}
+		}
+		/* Decompression of this block completed successfully */
+		bd->writeCRC = ~bd->writeCRC;
+		bd->totalCRC = ((bd->totalCRC << 1) |
+				(bd->totalCRC >> 31)) ^ bd->writeCRC;
+		/* If this block had a CRC error, force file level CRC error. */
+		if (bd->writeCRC != bd->headerCRC) {
+			bd->totalCRC = bd->headerCRC+1;
+			return RETVAL_LAST_BLOCK;
+		}
+	}
+
+	/* Refill the intermediate buffer by Huffman-decoding next
+	 * block of input */
+	/* (previous is just a convenient unused temp variable here) */
+	previous = get_next_block(bd);
+	if (previous) {
+		bd->writeCount = previous;
+		return (previous != RETVAL_LAST_BLOCK) ? previous : gotcount;
+	}
+	bd->writeCRC = 0xffffffffUL;
+	pos = bd->writePos;
+	xcurrent = bd->writeCurrent;
+	goto decode_next_byte;
+}
+
+static int INIT nofill(void *buf, unsigned int len)
+{
+	return -1;
+}
+
+/* Allocate the structure, read file header.  If in_fd ==-1, inbuf must contain
+   a complete bunzip file (len bytes long).  If in_fd!=-1, inbuf and len are
+   ignored, and data is read from file handle into temporary buffer. */
+static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len,
+			     int (*fill)(void*, unsigned int))
+{
+	struct bunzip_data *bd;
+	unsigned int i, j, c;
+	const unsigned int BZh0 =
+		(((unsigned int)'B') << 24)+(((unsigned int)'Z') << 16)
+		+(((unsigned int)'h') << 8)+(unsigned int)'0';
+
+	/* Figure out how much data to allocate */
+	i = sizeof(struct bunzip_data);
+
+	/* Allocate bunzip_data.  Most fields initialize to zero. */
+	bd = *bdp = malloc(i);
+	memset(bd, 0, sizeof(struct bunzip_data));
+	/* Setup input buffer */
+	bd->inbuf = inbuf;
+	bd->inbufCount = len;
+	if (fill != NULL)
+		bd->fill = fill;
+	else
+		bd->fill = nofill;
+
+	/* Init the CRC32 table (big endian) */
+	for (i = 0; i < 256; i++) {
+		c = i << 24;
+		for (j = 8; j; j--)
+			c = c&0x80000000 ? (c << 1)^0x04c11db7 : (c << 1);
+		bd->crc32Table[i] = c;
+	}
+
+	/* Ensure that file starts with "BZh['1'-'9']." */
+	i = get_bits(bd, 32);
+	if (((unsigned int)(i-BZh0-1)) >= 9)
+		return RETVAL_NOT_BZIP_DATA;
+
+	/* Fourth byte (ascii '1'-'9'), indicates block size in units of 100k of
+	   uncompressed data.  Allocate intermediate buffer for block. */
+	bd->dbufSize = 100000*(i-BZh0);
+
+	bd->dbuf = large_malloc(bd->dbufSize * sizeof(int));
+	return RETVAL_OK;
+}
+
+/* Example usage: decompress src_fd to dst_fd.  (Stops at end of bzip2 data,
+   not end of file.) */
+STATIC int INIT bunzip2(unsigned char *buf, int len,
+			int(*fill)(void*, unsigned int),
+			int(*flush)(void*, unsigned int),
+			unsigned char *outbuf,
+			int *pos,
+			void(*error_fn)(char *x))
+{
+	struct bunzip_data *bd;
+	int i = -1;
+	unsigned char *inbuf;
+
+	set_error_fn(error_fn);
+	if (flush)
+		outbuf = malloc(BZIP2_IOBUF_SIZE);
+	else
+		len -= 4; /* Uncompressed size hack active in pre-boot
+			     environment */
+	if (!outbuf) {
+		error("Could not allocate output bufer");
+		return -1;
+	}
+	if (buf)
+		inbuf = buf;
+	else
+		inbuf = malloc(BZIP2_IOBUF_SIZE);
+	if (!inbuf) {
+		error("Could not allocate input bufer");
+		goto exit_0;
+	}
+	i = start_bunzip(&bd, inbuf, len, fill);
+	if (!i) {
+		for (;;) {
+			i = read_bunzip(bd, outbuf, BZIP2_IOBUF_SIZE);
+			if (i <= 0)
+				break;
+			if (!flush)
+				outbuf += i;
+			else
+				if (i != flush(outbuf, i)) {
+					i = RETVAL_UNEXPECTED_OUTPUT_EOF;
+					break;
+				}
+		}
+	}
+	/* Check CRC and release memory */
+	if (i == RETVAL_LAST_BLOCK) {
+		if (bd->headerCRC != bd->totalCRC)
+			error("Data integrity error when decompressing.");
+		else
+			i = RETVAL_OK;
+	} else if (i == RETVAL_UNEXPECTED_OUTPUT_EOF) {
+		error("Compressed file ends unexpectedly");
+	}
+	if (bd->dbuf)
+		large_free(bd->dbuf);
+	if (pos)
+		*pos = bd->inbufPos;
+	free(bd);
+	if (!buf)
+		free(inbuf);
+exit_0:
+	if (flush)
+		free(outbuf);
+	return i;
+}
+
+#define decompress bunzip2
diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c
new file mode 100644
index 000000000000..e36b296fc9f8
--- /dev/null
+++ b/lib/decompress_inflate.c
@@ -0,0 +1,168 @@
+#ifdef STATIC
+/* Pre-boot environment: included */
+
+/* prevent inclusion of _LINUX_KERNEL_H in pre-boot environment: lots
+ * errors about console_printk etc... on ARM */
+#define _LINUX_KERNEL_H
+
+#include "zlib_inflate/inftrees.c"
+#include "zlib_inflate/inffast.c"
+#include "zlib_inflate/inflate.c"
+
+#else /* STATIC */
+/* initramfs et al: linked */
+
+#include <linux/zutil.h>
+
+#include "zlib_inflate/inftrees.h"
+#include "zlib_inflate/inffast.h"
+#include "zlib_inflate/inflate.h"
+
+#include "zlib_inflate/infutil.h"
+
+#endif /* STATIC */
+
+#include <linux/decompress/mm.h>
+#include <linux/slab.h>
+
+#define INBUF_LEN (16*1024)
+
+/* Included from initramfs et al code */
+STATIC int INIT gunzip(unsigned char *buf, int len,
+		       int(*fill)(void*, unsigned int),
+		       int(*flush)(void*, unsigned int),
+		       unsigned char *out_buf,
+		       int *pos,
+		       void(*error_fn)(char *x)) {
+	u8 *zbuf;
+	struct z_stream_s *strm;
+	int rc;
+	size_t out_len;
+
+	set_error_fn(error_fn);
+	rc = -1;
+	if (flush) {
+		out_len = 0x8000; /* 32 K */
+		out_buf = malloc(out_len);
+	} else {
+		out_len = 0x7fffffff; /* no limit */
+	}
+	if (!out_buf) {
+		error("Out of memory while allocating output buffer");
+		goto gunzip_nomem1;
+	}
+
+	if (buf)
+		zbuf = buf;
+	else {
+		zbuf = malloc(INBUF_LEN);
+		len = 0;
+	}
+	if (!zbuf) {
+		error("Out of memory while allocating input buffer");
+		goto gunzip_nomem2;
+	}
+
+	strm = malloc(sizeof(*strm));
+	if (strm == NULL) {
+		error("Out of memory while allocating z_stream");
+		goto gunzip_nomem3;
+	}
+
+	strm->workspace = malloc(flush ? zlib_inflate_workspacesize() :
+				 sizeof(struct inflate_state));
+	if (strm->workspace == NULL) {
+		error("Out of memory while allocating workspace");
+		goto gunzip_nomem4;
+	}
+
+	if (len == 0)
+		len = fill(zbuf, INBUF_LEN);
+
+	/* verify the gzip header */
+	if (len < 10 ||
+	   zbuf[0] != 0x1f || zbuf[1] != 0x8b || zbuf[2] != 0x08) {
+		if (pos)
+			*pos = 0;
+		error("Not a gzip file");
+		goto gunzip_5;
+	}
+
+	/* skip over gzip header (1f,8b,08... 10 bytes total +
+	 * possible asciz filename)
+	 */
+	strm->next_in = zbuf + 10;
+	/* skip over asciz filename */
+	if (zbuf[3] & 0x8) {
+		while (strm->next_in[0])
+			strm->next_in++;
+		strm->next_in++;
+	}
+	strm->avail_in = len - (strm->next_in - zbuf);
+
+	strm->next_out = out_buf;
+	strm->avail_out = out_len;
+
+	rc = zlib_inflateInit2(strm, -MAX_WBITS);
+
+	if (!flush) {
+		WS(strm)->inflate_state.wsize = 0;
+		WS(strm)->inflate_state.window = NULL;
+	}
+
+	while (rc == Z_OK) {
+		if (strm->avail_in == 0) {
+			/* TODO: handle case where both pos and fill are set */
+			len = fill(zbuf, INBUF_LEN);
+			if (len < 0) {
+				rc = -1;
+				error("read error");
+				break;
+			}
+			strm->next_in = zbuf;
+			strm->avail_in = len;
+		}
+		rc = zlib_inflate(strm, 0);
+
+		/* Write any data generated */
+		if (flush && strm->next_out > out_buf) {
+			int l = strm->next_out - out_buf;
+			if (l != flush(out_buf, l)) {
+				rc = -1;
+				error("write error");
+				break;
+			}
+			strm->next_out = out_buf;
+			strm->avail_out = out_len;
+		}
+
+		/* after Z_FINISH, only Z_STREAM_END is "we unpacked it all" */
+		if (rc == Z_STREAM_END) {
+			rc = 0;
+			break;
+		} else if (rc != Z_OK) {
+			error("uncompression error");
+			rc = -1;
+		}
+	}
+
+	zlib_inflateEnd(strm);
+	if (pos)
+		/* add + 8 to skip over trailer */
+		*pos = strm->next_in - zbuf+8;
+
+gunzip_5:
+	free(strm->workspace);
+gunzip_nomem4:
+	free(strm);
+gunzip_nomem3:
+	if (!buf)
+		free(zbuf);
+gunzip_nomem2:
+	if (flush)
+		free(out_buf);
+gunzip_nomem1:
+	return rc; /* returns Z_OK (0) if successful */
+}
+
+#define decompress gunzip
diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c
new file mode 100644
index 000000000000..32123a1340e6
--- /dev/null
+++ b/lib/decompress_unlzma.c
@@ -0,0 +1,648 @@
+/* Lzma decompressor for Linux kernel. Shamelessly snarfed
+ *from busybox 1.1.1
+ *
+ *Linux kernel adaptation
+ *Copyright (C) 2006  Alain < alain@knaff.lu >
+ *
+ *Based on small lzma deflate implementation/Small range coder
+ *implementation for lzma.
+ *Copyright (C) 2006  Aurelien Jacobs < aurel@gnuage.org >
+ *
+ *Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/)
+ *Copyright (C) 1999-2005  Igor Pavlov
+ *
+ *Copyrights of the parts, see headers below.
+ *
+ *
+ *This program is free software; you can redistribute it and/or
+ *modify it under the terms of the GNU Lesser General Public
+ *License as published by the Free Software Foundation; either
+ *version 2.1 of the License, or (at your option) any later version.
+ *
+ *This program is distributed in the hope that it will be useful,
+ *but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *Lesser General Public License for more details.
+ *
+ *You should have received a copy of the GNU Lesser General Public
+ *License along with this library; if not, write to the Free Software
+ *Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#ifndef STATIC
+#include <linux/decompress/unlzma.h>
+#endif /* STATIC */
+
+#include <linux/decompress/mm.h>
+#include <linux/slab.h>
+
+#define	MIN(a, b) (((a) < (b)) ? (a) : (b))
+
+static long long INIT read_int(unsigned char *ptr, int size)
+{
+	int i;
+	long long ret = 0;
+
+	for (i = 0; i < size; i++)
+		ret = (ret << 8) | ptr[size-i-1];
+	return ret;
+}
+
+#define ENDIAN_CONVERT(x) \
+  x = (typeof(x))read_int((unsigned char *)&x, sizeof(x))
+
+
+/* Small range coder implementation for lzma.
+ *Copyright (C) 2006  Aurelien Jacobs < aurel@gnuage.org >
+ *
+ *Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/)
+ *Copyright (c) 1999-2005  Igor Pavlov
+ */
+
+#include <linux/compiler.h>
+
+#define LZMA_IOBUF_SIZE	0x10000
+
+struct rc {
+	int (*fill)(void*, unsigned int);
+	uint8_t *ptr;
+	uint8_t *buffer;
+	uint8_t *buffer_end;
+	int buffer_size;
+	uint32_t code;
+	uint32_t range;
+	uint32_t bound;
+};
+
+
+#define RC_TOP_BITS 24
+#define RC_MOVE_BITS 5
+#define RC_MODEL_TOTAL_BITS 11
+
+
+/* Called twice: once at startup and once in rc_normalize() */
+static void INIT rc_read(struct rc *rc)
+{
+	rc->buffer_size = rc->fill((char *)rc->buffer, LZMA_IOBUF_SIZE);
+	if (rc->buffer_size <= 0)
+		error("unexpected EOF");
+	rc->ptr = rc->buffer;
+	rc->buffer_end = rc->buffer + rc->buffer_size;
+}
+
+/* Called once */
+static inline void INIT rc_init(struct rc *rc,
+				       int (*fill)(void*, unsigned int),
+				       char *buffer, int buffer_size)
+{
+	rc->fill = fill;
+	rc->buffer = (uint8_t *)buffer;
+	rc->buffer_size = buffer_size;
+	rc->buffer_end = rc->buffer + rc->buffer_size;
+	rc->ptr = rc->buffer;
+
+	rc->code = 0;
+	rc->range = 0xFFFFFFFF;
+}
+
+static inline void INIT rc_init_code(struct rc *rc)
+{
+	int i;
+
+	for (i = 0; i < 5; i++) {
+		if (rc->ptr >= rc->buffer_end)
+			rc_read(rc);
+		rc->code = (rc->code << 8) | *rc->ptr++;
+	}
+}
+
+
+/* Called once. TODO: bb_maybe_free() */
+static inline void INIT rc_free(struct rc *rc)
+{
+	free(rc->buffer);
+}
+
+/* Called twice, but one callsite is in inline'd rc_is_bit_0_helper() */
+static void INIT rc_do_normalize(struct rc *rc)
+{
+	if (rc->ptr >= rc->buffer_end)
+		rc_read(rc);
+	rc->range <<= 8;
+	rc->code = (rc->code << 8) | *rc->ptr++;
+}
+static inline void INIT rc_normalize(struct rc *rc)
+{
+	if (rc->range < (1 << RC_TOP_BITS))
+		rc_do_normalize(rc);
+}
+
+/* Called 9 times */
+/* Why rc_is_bit_0_helper exists?
+ *Because we want to always expose (rc->code < rc->bound) to optimizer
+ */
+static inline uint32_t INIT rc_is_bit_0_helper(struct rc *rc, uint16_t *p)
+{
+	rc_normalize(rc);
+	rc->bound = *p * (rc->range >> RC_MODEL_TOTAL_BITS);
+	return rc->bound;
+}
+static inline int INIT rc_is_bit_0(struct rc *rc, uint16_t *p)
+{
+	uint32_t t = rc_is_bit_0_helper(rc, p);
+	return rc->code < t;
+}
+
+/* Called ~10 times, but very small, thus inlined */
+static inline void INIT rc_update_bit_0(struct rc *rc, uint16_t *p)
+{
+	rc->range = rc->bound;
+	*p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS;
+}
+static inline void rc_update_bit_1(struct rc *rc, uint16_t *p)
+{
+	rc->range -= rc->bound;
+	rc->code -= rc->bound;
+	*p -= *p >> RC_MOVE_BITS;
+}
+
+/* Called 4 times in unlzma loop */
+static int INIT rc_get_bit(struct rc *rc, uint16_t *p, int *symbol)
+{
+	if (rc_is_bit_0(rc, p)) {
+		rc_update_bit_0(rc, p);
+		*symbol *= 2;
+		return 0;
+	} else {
+		rc_update_bit_1(rc, p);
+		*symbol = *symbol * 2 + 1;
+		return 1;
+	}
+}
+
+/* Called once */
+static inline int INIT rc_direct_bit(struct rc *rc)
+{
+	rc_normalize(rc);
+	rc->range >>= 1;
+	if (rc->code >= rc->range) {
+		rc->code -= rc->range;
+		return 1;
+	}
+	return 0;
+}
+
+/* Called twice */
+static inline void INIT
+rc_bit_tree_decode(struct rc *rc, uint16_t *p, int num_levels, int *symbol)
+{
+	int i = num_levels;
+
+	*symbol = 1;
+	while (i--)
+		rc_get_bit(rc, p + *symbol, symbol);
+	*symbol -= 1 << num_levels;
+}
+
+
+/*
+ * Small lzma deflate implementation.
+ * Copyright (C) 2006  Aurelien Jacobs < aurel@gnuage.org >
+ *
+ * Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/)
+ * Copyright (C) 1999-2005  Igor Pavlov
+ */
+
+
+struct lzma_header {
+	uint8_t pos;
+	uint32_t dict_size;
+	uint64_t dst_size;
+} __attribute__ ((packed)) ;
+
+
+#define LZMA_BASE_SIZE 1846
+#define LZMA_LIT_SIZE 768
+
+#define LZMA_NUM_POS_BITS_MAX 4
+
+#define LZMA_LEN_NUM_LOW_BITS 3
+#define LZMA_LEN_NUM_MID_BITS 3
+#define LZMA_LEN_NUM_HIGH_BITS 8
+
+#define LZMA_LEN_CHOICE 0
+#define LZMA_LEN_CHOICE_2 (LZMA_LEN_CHOICE + 1)
+#define LZMA_LEN_LOW (LZMA_LEN_CHOICE_2 + 1)
+#define LZMA_LEN_MID (LZMA_LEN_LOW \
+		      + (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_LOW_BITS)))
+#define LZMA_LEN_HIGH (LZMA_LEN_MID \
+		       +(1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_MID_BITS)))
+#define LZMA_NUM_LEN_PROBS (LZMA_LEN_HIGH + (1 << LZMA_LEN_NUM_HIGH_BITS))
+
+#define LZMA_NUM_STATES 12
+#define LZMA_NUM_LIT_STATES 7
+
+#define LZMA_START_POS_MODEL_INDEX 4
+#define LZMA_END_POS_MODEL_INDEX 14
+#define LZMA_NUM_FULL_DISTANCES (1 << (LZMA_END_POS_MODEL_INDEX >> 1))
+
+#define LZMA_NUM_POS_SLOT_BITS 6
+#define LZMA_NUM_LEN_TO_POS_STATES 4
+
+#define LZMA_NUM_ALIGN_BITS 4
+
+#define LZMA_MATCH_MIN_LEN 2
+
+#define LZMA_IS_MATCH 0
+#define LZMA_IS_REP (LZMA_IS_MATCH + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX))
+#define LZMA_IS_REP_G0 (LZMA_IS_REP + LZMA_NUM_STATES)
+#define LZMA_IS_REP_G1 (LZMA_IS_REP_G0 + LZMA_NUM_STATES)
+#define LZMA_IS_REP_G2 (LZMA_IS_REP_G1 + LZMA_NUM_STATES)
+#define LZMA_IS_REP_0_LONG (LZMA_IS_REP_G2 + LZMA_NUM_STATES)
+#define LZMA_POS_SLOT (LZMA_IS_REP_0_LONG \
+		       + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX))
+#define LZMA_SPEC_POS (LZMA_POS_SLOT \
+		       +(LZMA_NUM_LEN_TO_POS_STATES << LZMA_NUM_POS_SLOT_BITS))
+#define LZMA_ALIGN (LZMA_SPEC_POS \
+		    + LZMA_NUM_FULL_DISTANCES - LZMA_END_POS_MODEL_INDEX)
+#define LZMA_LEN_CODER (LZMA_ALIGN + (1 << LZMA_NUM_ALIGN_BITS))
+#define LZMA_REP_LEN_CODER (LZMA_LEN_CODER + LZMA_NUM_LEN_PROBS)
+#define LZMA_LITERAL (LZMA_REP_LEN_CODER + LZMA_NUM_LEN_PROBS)
+
+
+struct writer {
+	uint8_t *buffer;
+	uint8_t previous_byte;
+	size_t buffer_pos;
+	int bufsize;
+	size_t global_pos;
+	int(*flush)(void*, unsigned int);
+	struct lzma_header *header;
+};
+
+struct cstate {
+	int state;
+	uint32_t rep0, rep1, rep2, rep3;
+};
+
+static inline size_t INIT get_pos(struct writer *wr)
+{
+	return
+		wr->global_pos + wr->buffer_pos;
+}
+
+static inline uint8_t INIT peek_old_byte(struct writer *wr,
+						uint32_t offs)
+{
+	if (!wr->flush) {
+		int32_t pos;
+		while (offs > wr->header->dict_size)
+			offs -= wr->header->dict_size;
+		pos = wr->buffer_pos - offs;
+		return wr->buffer[pos];
+	} else {
+		uint32_t pos = wr->buffer_pos - offs;
+		while (pos >= wr->header->dict_size)
+			pos += wr->header->dict_size;
+		return wr->buffer[pos];
+	}
+
+}
+
+static inline void INIT write_byte(struct writer *wr, uint8_t byte)
+{
+	wr->buffer[wr->buffer_pos++] = wr->previous_byte = byte;
+	if (wr->flush && wr->buffer_pos == wr->header->dict_size) {
+		wr->buffer_pos = 0;
+		wr->global_pos += wr->header->dict_size;
+		wr->flush((char *)wr->buffer, wr->header->dict_size);
+	}
+}
+
+
+static inline void INIT copy_byte(struct writer *wr, uint32_t offs)
+{
+	write_byte(wr, peek_old_byte(wr, offs));
+}
+
+static inline void INIT copy_bytes(struct writer *wr,
+					 uint32_t rep0, int len)
+{
+	do {
+		copy_byte(wr, rep0);
+		len--;
+	} while (len != 0 && wr->buffer_pos < wr->header->dst_size);
+}
+
+static inline void INIT process_bit0(struct writer *wr, struct rc *rc,
+				     struct cstate *cst, uint16_t *p,
+				     int pos_state, uint16_t *prob,
+				     int lc, uint32_t literal_pos_mask) {
+	int mi = 1;
+	rc_update_bit_0(rc, prob);
+	prob = (p + LZMA_LITERAL +
+		(LZMA_LIT_SIZE
+		 * (((get_pos(wr) & literal_pos_mask) << lc)
+		    + (wr->previous_byte >> (8 - lc))))
+		);
+
+	if (cst->state >= LZMA_NUM_LIT_STATES) {
+		int match_byte = peek_old_byte(wr, cst->rep0);
+		do {
+			int bit;
+			uint16_t *prob_lit;
+
+			match_byte <<= 1;
+			bit = match_byte & 0x100;
+			prob_lit = prob + 0x100 + bit + mi;
+			if (rc_get_bit(rc, prob_lit, &mi)) {
+				if (!bit)
+					break;
+			} else {
+				if (bit)
+					break;
+			}
+		} while (mi < 0x100);
+	}
+	while (mi < 0x100) {
+		uint16_t *prob_lit = prob + mi;
+		rc_get_bit(rc, prob_lit, &mi);
+	}
+	write_byte(wr, mi);
+	if (cst->state < 4)
+		cst->state = 0;
+	else if (cst->state < 10)
+		cst->state -= 3;
+	else
+		cst->state -= 6;
+}
+
+static inline void INIT process_bit1(struct writer *wr, struct rc *rc,
+					    struct cstate *cst, uint16_t *p,
+					    int pos_state, uint16_t *prob) {
+  int offset;
+	uint16_t *prob_len;
+	int num_bits;
+	int len;
+
+	rc_update_bit_1(rc, prob);
+	prob = p + LZMA_IS_REP + cst->state;
+	if (rc_is_bit_0(rc, prob)) {
+		rc_update_bit_0(rc, prob);
+		cst->rep3 = cst->rep2;
+		cst->rep2 = cst->rep1;
+		cst->rep1 = cst->rep0;
+		cst->state = cst->state < LZMA_NUM_LIT_STATES ? 0 : 3;
+		prob = p + LZMA_LEN_CODER;
+	} else {
+		rc_update_bit_1(rc, prob);
+		prob = p + LZMA_IS_REP_G0 + cst->state;
+		if (rc_is_bit_0(rc, prob)) {
+			rc_update_bit_0(rc, prob);
+			prob = (p + LZMA_IS_REP_0_LONG
+				+ (cst->state <<
+				   LZMA_NUM_POS_BITS_MAX) +
+				pos_state);
+			if (rc_is_bit_0(rc, prob)) {
+				rc_update_bit_0(rc, prob);
+
+				cst->state = cst->state < LZMA_NUM_LIT_STATES ?
+					9 : 11;
+				copy_byte(wr, cst->rep0);
+				return;
+			} else {
+				rc_update_bit_1(rc, prob);
+			}
+		} else {
+			uint32_t distance;
+
+			rc_update_bit_1(rc, prob);
+			prob = p + LZMA_IS_REP_G1 + cst->state;
+			if (rc_is_bit_0(rc, prob)) {
+				rc_update_bit_0(rc, prob);
+				distance = cst->rep1;
+			} else {
+				rc_update_bit_1(rc, prob);
+				prob = p + LZMA_IS_REP_G2 + cst->state;
+				if (rc_is_bit_0(rc, prob)) {
+					rc_update_bit_0(rc, prob);
+					distance = cst->rep2;
+				} else {
+					rc_update_bit_1(rc, prob);
+					distance = cst->rep3;
+					cst->rep3 = cst->rep2;
+				}
+				cst->rep2 = cst->rep1;
+			}
+			cst->rep1 = cst->rep0;
+			cst->rep0 = distance;
+		}
+		cst->state = cst->state < LZMA_NUM_LIT_STATES ? 8 : 11;
+		prob = p + LZMA_REP_LEN_CODER;
+	}
+
+	prob_len = prob + LZMA_LEN_CHOICE;
+	if (rc_is_bit_0(rc, prob_len)) {
+		rc_update_bit_0(rc, prob_len);
+		prob_len = (prob + LZMA_LEN_LOW
+			    + (pos_state <<
+			       LZMA_LEN_NUM_LOW_BITS));
+		offset = 0;
+		num_bits = LZMA_LEN_NUM_LOW_BITS;
+	} else {
+		rc_update_bit_1(rc, prob_len);
+		prob_len = prob + LZMA_LEN_CHOICE_2;
+		if (rc_is_bit_0(rc, prob_len)) {
+			rc_update_bit_0(rc, prob_len);
+			prob_len = (prob + LZMA_LEN_MID
+				    + (pos_state <<
+				       LZMA_LEN_NUM_MID_BITS));
+			offset = 1 << LZMA_LEN_NUM_LOW_BITS;
+			num_bits = LZMA_LEN_NUM_MID_BITS;
+		} else {
+			rc_update_bit_1(rc, prob_len);
+			prob_len = prob + LZMA_LEN_HIGH;
+			offset = ((1 << LZMA_LEN_NUM_LOW_BITS)
+				  + (1 << LZMA_LEN_NUM_MID_BITS));
+			num_bits = LZMA_LEN_NUM_HIGH_BITS;
+		}
+	}
+
+	rc_bit_tree_decode(rc, prob_len, num_bits, &len);
+	len += offset;
+
+	if (cst->state < 4) {
+		int pos_slot;
+
+		cst->state += LZMA_NUM_LIT_STATES;
+		prob =
+			p + LZMA_POS_SLOT +
+			((len <
+			  LZMA_NUM_LEN_TO_POS_STATES ? len :
+			  LZMA_NUM_LEN_TO_POS_STATES - 1)
+			 << LZMA_NUM_POS_SLOT_BITS);
+		rc_bit_tree_decode(rc, prob,
+				   LZMA_NUM_POS_SLOT_BITS,
+				   &pos_slot);
+		if (pos_slot >= LZMA_START_POS_MODEL_INDEX) {
+			int i, mi;
+			num_bits = (pos_slot >> 1) - 1;
+			cst->rep0 = 2 | (pos_slot & 1);
+			if (pos_slot < LZMA_END_POS_MODEL_INDEX) {
+				cst->rep0 <<= num_bits;
+				prob = p + LZMA_SPEC_POS +
+					cst->rep0 - pos_slot - 1;
+			} else {
+				num_bits -= LZMA_NUM_ALIGN_BITS;
+				while (num_bits--)
+					cst->rep0 = (cst->rep0 << 1) |
+						rc_direct_bit(rc);
+				prob = p + LZMA_ALIGN;
+				cst->rep0 <<= LZMA_NUM_ALIGN_BITS;
+				num_bits = LZMA_NUM_ALIGN_BITS;
+			}
+			i = 1;
+			mi = 1;
+			while (num_bits--) {
+				if (rc_get_bit(rc, prob + mi, &mi))
+					cst->rep0 |= i;
+				i <<= 1;
+			}
+		} else
+			cst->rep0 = pos_slot;
+		if (++(cst->rep0) == 0)
+			return;
+	}
+
+	len += LZMA_MATCH_MIN_LEN;
+
+	copy_bytes(wr, cst->rep0, len);
+}
+
+
+
+STATIC inline int INIT unlzma(unsigned char *buf, int in_len,
+			      int(*fill)(void*, unsigned int),
+			      int(*flush)(void*, unsigned int),
+			      unsigned char *output,
+			      int *posp,
+			      void(*error_fn)(char *x)
+	)
+{
+	struct lzma_header header;
+	int lc, pb, lp;
+	uint32_t pos_state_mask;
+	uint32_t literal_pos_mask;
+	uint16_t *p;
+	int num_probs;
+	struct rc rc;
+	int i, mi;
+	struct writer wr;
+	struct cstate cst;
+	unsigned char *inbuf;
+	int ret = -1;
+
+	set_error_fn(error_fn);
+	if (!flush)
+		in_len -= 4; /* Uncompressed size hack active in pre-boot
+				environment */
+	if (buf)
+		inbuf = buf;
+	else
+		inbuf = malloc(LZMA_IOBUF_SIZE);
+	if (!inbuf) {
+		error("Could not allocate input bufer");
+		goto exit_0;
+	}
+
+	cst.state = 0;
+	cst.rep0 = cst.rep1 = cst.rep2 = cst.rep3 = 1;
+
+	wr.header = &header;
+	wr.flush = flush;
+	wr.global_pos = 0;
+	wr.previous_byte = 0;
+	wr.buffer_pos = 0;
+
+	rc_init(&rc, fill, inbuf, in_len);
+
+	for (i = 0; i < sizeof(header); i++) {
+		if (rc.ptr >= rc.buffer_end)
+			rc_read(&rc);
+		((unsigned char *)&header)[i] = *rc.ptr++;
+	}
+
+	if (header.pos >= (9 * 5 * 5))
+		error("bad header");
+
+	mi = 0;
+	lc = header.pos;
+	while (lc >= 9) {
+		mi++;
+		lc -= 9;
+	}
+	pb = 0;
+	lp = mi;
+	while (lp >= 5) {
+		pb++;
+		lp -= 5;
+	}
+	pos_state_mask = (1 << pb) - 1;
+	literal_pos_mask = (1 << lp) - 1;
+
+	ENDIAN_CONVERT(header.dict_size);
+	ENDIAN_CONVERT(header.dst_size);
+
+	if (header.dict_size == 0)
+		header.dict_size = 1;
+
+	if (output)
+		wr.buffer = output;
+	else {
+		wr.bufsize = MIN(header.dst_size, header.dict_size);
+		wr.buffer = large_malloc(wr.bufsize);
+	}
+	if (wr.buffer == NULL)
+		goto exit_1;
+
+	num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp));
+	p = (uint16_t *) large_malloc(num_probs * sizeof(*p));
+	if (p == 0)
+		goto exit_2;
+	num_probs = LZMA_LITERAL + (LZMA_LIT_SIZE << (lc + lp));
+	for (i = 0; i < num_probs; i++)
+		p[i] = (1 << RC_MODEL_TOTAL_BITS) >> 1;
+
+	rc_init_code(&rc);
+
+	while (get_pos(&wr) < header.dst_size) {
+		int pos_state =	get_pos(&wr) & pos_state_mask;
+		uint16_t *prob = p + LZMA_IS_MATCH +
+			(cst.state << LZMA_NUM_POS_BITS_MAX) + pos_state;
+		if (rc_is_bit_0(&rc, prob))
+			process_bit0(&wr, &rc, &cst, p, pos_state, prob,
+				     lc, literal_pos_mask);
+		else {
+			process_bit1(&wr, &rc, &cst, p, pos_state, prob);
+			if (cst.rep0 == 0)
+				break;
+		}
+	}
+
+	if (posp)
+		*posp = rc.ptr-rc.buffer;
+	if (wr.flush)
+		wr.flush(wr.buffer, wr.buffer_pos);
+	ret = 0;
+	large_free(p);
+exit_2:
+	if (!output)
+		large_free(wr.buffer);
+exit_1:
+	if (!buf)
+		free(inbuf);
+exit_0:
+	return ret;
+}
+
+#define decompress unlzma
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
new file mode 100644
index 000000000000..d3da7edc034f
--- /dev/null
+++ b/lib/dma-debug.c
@@ -0,0 +1,955 @@
+/*
+ * Copyright (C) 2008 Advanced Micro Devices, Inc.
+ *
+ * Author: Joerg Roedel <joerg.roedel@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/scatterlist.h>
+#include <linux/dma-mapping.h>
+#include <linux/stacktrace.h>
+#include <linux/dma-debug.h>
+#include <linux/spinlock.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+
+#include <asm/sections.h>
+
+#define HASH_SIZE       1024ULL
+#define HASH_FN_SHIFT   13
+#define HASH_FN_MASK    (HASH_SIZE - 1)
+
+enum {
+	dma_debug_single,
+	dma_debug_page,
+	dma_debug_sg,
+	dma_debug_coherent,
+};
+
+#define DMA_DEBUG_STACKTRACE_ENTRIES 5
+
+struct dma_debug_entry {
+	struct list_head list;
+	struct device    *dev;
+	int              type;
+	phys_addr_t      paddr;
+	u64              dev_addr;
+	u64              size;
+	int              direction;
+	int		 sg_call_ents;
+	int		 sg_mapped_ents;
+#ifdef CONFIG_STACKTRACE
+	struct		 stack_trace stacktrace;
+	unsigned long	 st_entries[DMA_DEBUG_STACKTRACE_ENTRIES];
+#endif
+};
+
+struct hash_bucket {
+	struct list_head list;
+	spinlock_t lock;
+} ____cacheline_aligned_in_smp;
+
+/* Hash list to save the allocated dma addresses */
+static struct hash_bucket dma_entry_hash[HASH_SIZE];
+/* List of pre-allocated dma_debug_entry's */
+static LIST_HEAD(free_entries);
+/* Lock for the list above */
+static DEFINE_SPINLOCK(free_entries_lock);
+
+/* Global disable flag - will be set in case of an error */
+static bool global_disable __read_mostly;
+
+/* Global error count */
+static u32 error_count;
+
+/* Global error show enable*/
+static u32 show_all_errors __read_mostly;
+/* Number of errors to show */
+static u32 show_num_errors = 1;
+
+static u32 num_free_entries;
+static u32 min_free_entries;
+
+/* number of preallocated entries requested by kernel cmdline */
+static u32 req_entries;
+
+/* debugfs dentry's for the stuff above */
+static struct dentry *dma_debug_dent        __read_mostly;
+static struct dentry *global_disable_dent   __read_mostly;
+static struct dentry *error_count_dent      __read_mostly;
+static struct dentry *show_all_errors_dent  __read_mostly;
+static struct dentry *show_num_errors_dent  __read_mostly;
+static struct dentry *num_free_entries_dent __read_mostly;
+static struct dentry *min_free_entries_dent __read_mostly;
+
+static const char *type2name[4] = { "single", "page",
+				    "scather-gather", "coherent" };
+
+static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE",
+				   "DMA_FROM_DEVICE", "DMA_NONE" };
+
+/*
+ * The access to some variables in this macro is racy. We can't use atomic_t
+ * here because all these variables are exported to debugfs. Some of them even
+ * writeable. This is also the reason why a lock won't help much. But anyway,
+ * the races are no big deal. Here is why:
+ *
+ *   error_count: the addition is racy, but the worst thing that can happen is
+ *                that we don't count some errors
+ *   show_num_errors: the subtraction is racy. Also no big deal because in
+ *                    worst case this will result in one warning more in the
+ *                    system log than the user configured. This variable is
+ *                    writeable via debugfs.
+ */
+static inline void dump_entry_trace(struct dma_debug_entry *entry)
+{
+#ifdef CONFIG_STACKTRACE
+	if (entry) {
+		printk(KERN_WARNING "Mapped at:\n");
+		print_stack_trace(&entry->stacktrace, 0);
+	}
+#endif
+}
+
+#define err_printk(dev, entry, format, arg...) do {		\
+		error_count += 1;				\
+		if (show_all_errors || show_num_errors > 0) {	\
+			WARN(1, "%s %s: " format,		\
+			     dev_driver_string(dev),		\
+			     dev_name(dev) , ## arg);		\
+			dump_entry_trace(entry);		\
+		}						\
+		if (!show_all_errors && show_num_errors > 0)	\
+			show_num_errors -= 1;			\
+	} while (0);
+
+/*
+ * Hash related functions
+ *
+ * Every DMA-API request is saved into a struct dma_debug_entry. To
+ * have quick access to these structs they are stored into a hash.
+ */
+static int hash_fn(struct dma_debug_entry *entry)
+{
+	/*
+	 * Hash function is based on the dma address.
+	 * We use bits 20-27 here as the index into the hash
+	 */
+	return (entry->dev_addr >> HASH_FN_SHIFT) & HASH_FN_MASK;
+}
+
+/*
+ * Request exclusive access to a hash bucket for a given dma_debug_entry.
+ */
+static struct hash_bucket *get_hash_bucket(struct dma_debug_entry *entry,
+					   unsigned long *flags)
+{
+	int idx = hash_fn(entry);
+	unsigned long __flags;
+
+	spin_lock_irqsave(&dma_entry_hash[idx].lock, __flags);
+	*flags = __flags;
+	return &dma_entry_hash[idx];
+}
+
+/*
+ * Give up exclusive access to the hash bucket
+ */
+static void put_hash_bucket(struct hash_bucket *bucket,
+			    unsigned long *flags)
+{
+	unsigned long __flags = *flags;
+
+	spin_unlock_irqrestore(&bucket->lock, __flags);
+}
+
+/*
+ * Search a given entry in the hash bucket list
+ */
+static struct dma_debug_entry *hash_bucket_find(struct hash_bucket *bucket,
+						struct dma_debug_entry *ref)
+{
+	struct dma_debug_entry *entry;
+
+	list_for_each_entry(entry, &bucket->list, list) {
+		if ((entry->dev_addr == ref->dev_addr) &&
+		    (entry->dev == ref->dev))
+			return entry;
+	}
+
+	return NULL;
+}
+
+/*
+ * Add an entry to a hash bucket
+ */
+static void hash_bucket_add(struct hash_bucket *bucket,
+			    struct dma_debug_entry *entry)
+{
+	list_add_tail(&entry->list, &bucket->list);
+}
+
+/*
+ * Remove entry from a hash bucket list
+ */
+static void hash_bucket_del(struct dma_debug_entry *entry)
+{
+	list_del(&entry->list);
+}
+
+/*
+ * Dump mapping entries for debugging purposes
+ */
+void debug_dma_dump_mappings(struct device *dev)
+{
+	int idx;
+
+	for (idx = 0; idx < HASH_SIZE; idx++) {
+		struct hash_bucket *bucket = &dma_entry_hash[idx];
+		struct dma_debug_entry *entry;
+		unsigned long flags;
+
+		spin_lock_irqsave(&bucket->lock, flags);
+
+		list_for_each_entry(entry, &bucket->list, list) {
+			if (!dev || dev == entry->dev) {
+				dev_info(entry->dev,
+					 "%s idx %d P=%Lx D=%Lx L=%Lx %s\n",
+					 type2name[entry->type], idx,
+					 (unsigned long long)entry->paddr,
+					 entry->dev_addr, entry->size,
+					 dir2name[entry->direction]);
+			}
+		}
+
+		spin_unlock_irqrestore(&bucket->lock, flags);
+	}
+}
+EXPORT_SYMBOL(debug_dma_dump_mappings);
+
+/*
+ * Wrapper function for adding an entry to the hash.
+ * This function takes care of locking itself.
+ */
+static void add_dma_entry(struct dma_debug_entry *entry)
+{
+	struct hash_bucket *bucket;
+	unsigned long flags;
+
+	bucket = get_hash_bucket(entry, &flags);
+	hash_bucket_add(bucket, entry);
+	put_hash_bucket(bucket, &flags);
+}
+
+/* struct dma_entry allocator
+ *
+ * The next two functions implement the allocator for
+ * struct dma_debug_entries.
+ */
+static struct dma_debug_entry *dma_entry_alloc(void)
+{
+	struct dma_debug_entry *entry = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&free_entries_lock, flags);
+
+	if (list_empty(&free_entries)) {
+		printk(KERN_ERR "DMA-API: debugging out of memory "
+				"- disabling\n");
+		global_disable = true;
+		goto out;
+	}
+
+	entry = list_entry(free_entries.next, struct dma_debug_entry, list);
+	list_del(&entry->list);
+	memset(entry, 0, sizeof(*entry));
+
+#ifdef CONFIG_STACKTRACE
+	entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES;
+	entry->stacktrace.entries = entry->st_entries;
+	entry->stacktrace.skip = 2;
+	save_stack_trace(&entry->stacktrace);
+#endif
+	num_free_entries -= 1;
+	if (num_free_entries < min_free_entries)
+		min_free_entries = num_free_entries;
+
+out:
+	spin_unlock_irqrestore(&free_entries_lock, flags);
+
+	return entry;
+}
+
+static void dma_entry_free(struct dma_debug_entry *entry)
+{
+	unsigned long flags;
+
+	/*
+	 * add to beginning of the list - this way the entries are
+	 * more likely cache hot when they are reallocated.
+	 */
+	spin_lock_irqsave(&free_entries_lock, flags);
+	list_add(&entry->list, &free_entries);
+	num_free_entries += 1;
+	spin_unlock_irqrestore(&free_entries_lock, flags);
+}
+
+/*
+ * DMA-API debugging init code
+ *
+ * The init code does two things:
+ *   1. Initialize core data structures
+ *   2. Preallocate a given number of dma_debug_entry structs
+ */
+
+static int prealloc_memory(u32 num_entries)
+{
+	struct dma_debug_entry *entry, *next_entry;
+	int i;
+
+	for (i = 0; i < num_entries; ++i) {
+		entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+		if (!entry)
+			goto out_err;
+
+		list_add_tail(&entry->list, &free_entries);
+	}
+
+	num_free_entries = num_entries;
+	min_free_entries = num_entries;
+
+	printk(KERN_INFO "DMA-API: preallocated %d debug entries\n",
+			num_entries);
+
+	return 0;
+
+out_err:
+
+	list_for_each_entry_safe(entry, next_entry, &free_entries, list) {
+		list_del(&entry->list);
+		kfree(entry);
+	}
+
+	return -ENOMEM;
+}
+
+static int dma_debug_fs_init(void)
+{
+	dma_debug_dent = debugfs_create_dir("dma-api", NULL);
+	if (!dma_debug_dent) {
+		printk(KERN_ERR "DMA-API: can not create debugfs directory\n");
+		return -ENOMEM;
+	}
+
+	global_disable_dent = debugfs_create_bool("disabled", 0444,
+			dma_debug_dent,
+			(u32 *)&global_disable);
+	if (!global_disable_dent)
+		goto out_err;
+
+	error_count_dent = debugfs_create_u32("error_count", 0444,
+			dma_debug_dent, &error_count);
+	if (!error_count_dent)
+		goto out_err;
+
+	show_all_errors_dent = debugfs_create_u32("all_errors", 0644,
+			dma_debug_dent,
+			&show_all_errors);
+	if (!show_all_errors_dent)
+		goto out_err;
+
+	show_num_errors_dent = debugfs_create_u32("num_errors", 0644,
+			dma_debug_dent,
+			&show_num_errors);
+	if (!show_num_errors_dent)
+		goto out_err;
+
+	num_free_entries_dent = debugfs_create_u32("num_free_entries", 0444,
+			dma_debug_dent,
+			&num_free_entries);
+	if (!num_free_entries_dent)
+		goto out_err;
+
+	min_free_entries_dent = debugfs_create_u32("min_free_entries", 0444,
+			dma_debug_dent,
+			&min_free_entries);
+	if (!min_free_entries_dent)
+		goto out_err;
+
+	return 0;
+
+out_err:
+	debugfs_remove_recursive(dma_debug_dent);
+
+	return -ENOMEM;
+}
+
+static int device_dma_allocations(struct device *dev)
+{
+	struct dma_debug_entry *entry;
+	unsigned long flags;
+	int count = 0, i;
+
+	for (i = 0; i < HASH_SIZE; ++i) {
+		spin_lock_irqsave(&dma_entry_hash[i].lock, flags);
+		list_for_each_entry(entry, &dma_entry_hash[i].list, list) {
+			if (entry->dev == dev)
+				count += 1;
+		}
+		spin_unlock_irqrestore(&dma_entry_hash[i].lock, flags);
+	}
+
+	return count;
+}
+
+static int dma_debug_device_change(struct notifier_block *nb,
+				    unsigned long action, void *data)
+{
+	struct device *dev = data;
+	int count;
+
+
+	switch (action) {
+	case BUS_NOTIFY_UNBIND_DRIVER:
+		count = device_dma_allocations(dev);
+		if (count == 0)
+			break;
+		err_printk(dev, NULL, "DMA-API: device driver has pending "
+				"DMA allocations while released from device "
+				"[count=%d]\n", count);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+void dma_debug_add_bus(struct bus_type *bus)
+{
+	struct notifier_block *nb;
+
+	nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL);
+	if (nb == NULL) {
+		printk(KERN_ERR "dma_debug_add_bus: out of memory\n");
+		return;
+	}
+
+	nb->notifier_call = dma_debug_device_change;
+
+	bus_register_notifier(bus, nb);
+}
+
+/*
+ * Let the architectures decide how many entries should be preallocated.
+ */
+void dma_debug_init(u32 num_entries)
+{
+	int i;
+
+	if (global_disable)
+		return;
+
+	for (i = 0; i < HASH_SIZE; ++i) {
+		INIT_LIST_HEAD(&dma_entry_hash[i].list);
+		dma_entry_hash[i].lock = SPIN_LOCK_UNLOCKED;
+	}
+
+	if (dma_debug_fs_init() != 0) {
+		printk(KERN_ERR "DMA-API: error creating debugfs entries "
+				"- disabling\n");
+		global_disable = true;
+
+		return;
+	}
+
+	if (req_entries)
+		num_entries = req_entries;
+
+	if (prealloc_memory(num_entries) != 0) {
+		printk(KERN_ERR "DMA-API: debugging out of memory error "
+				"- disabled\n");
+		global_disable = true;
+
+		return;
+	}
+
+	printk(KERN_INFO "DMA-API: debugging enabled by kernel config\n");
+}
+
+static __init int dma_debug_cmdline(char *str)
+{
+	if (!str)
+		return -EINVAL;
+
+	if (strncmp(str, "off", 3) == 0) {
+		printk(KERN_INFO "DMA-API: debugging disabled on kernel "
+				 "command line\n");
+		global_disable = true;
+	}
+
+	return 0;
+}
+
+static __init int dma_debug_entries_cmdline(char *str)
+{
+	int res;
+
+	if (!str)
+		return -EINVAL;
+
+	res = get_option(&str, &req_entries);
+
+	if (!res)
+		req_entries = 0;
+
+	return 0;
+}
+
+__setup("dma_debug=", dma_debug_cmdline);
+__setup("dma_debug_entries=", dma_debug_entries_cmdline);
+
+static void check_unmap(struct dma_debug_entry *ref)
+{
+	struct dma_debug_entry *entry;
+	struct hash_bucket *bucket;
+	unsigned long flags;
+
+	if (dma_mapping_error(ref->dev, ref->dev_addr)) {
+		err_printk(ref->dev, NULL, "DMA-API: device driver tries "
+			   "to free an invalid DMA memory address\n");
+		return;
+	}
+
+	bucket = get_hash_bucket(ref, &flags);
+	entry = hash_bucket_find(bucket, ref);
+
+	if (!entry) {
+		err_printk(ref->dev, NULL, "DMA-API: device driver tries "
+			   "to free DMA memory it has not allocated "
+			   "[device address=0x%016llx] [size=%llu bytes]\n",
+			   ref->dev_addr, ref->size);
+		goto out;
+	}
+
+	if (ref->size != entry->size) {
+		err_printk(ref->dev, entry, "DMA-API: device driver frees "
+			   "DMA memory with different size "
+			   "[device address=0x%016llx] [map size=%llu bytes] "
+			   "[unmap size=%llu bytes]\n",
+			   ref->dev_addr, entry->size, ref->size);
+	}
+
+	if (ref->type != entry->type) {
+		err_printk(ref->dev, entry, "DMA-API: device driver frees "
+			   "DMA memory with wrong function "
+			   "[device address=0x%016llx] [size=%llu bytes] "
+			   "[mapped as %s] [unmapped as %s]\n",
+			   ref->dev_addr, ref->size,
+			   type2name[entry->type], type2name[ref->type]);
+	} else if ((entry->type == dma_debug_coherent) &&
+		   (ref->paddr != entry->paddr)) {
+		err_printk(ref->dev, entry, "DMA-API: device driver frees "
+			   "DMA memory with different CPU address "
+			   "[device address=0x%016llx] [size=%llu bytes] "
+			   "[cpu alloc address=%p] [cpu free address=%p]",
+			   ref->dev_addr, ref->size,
+			   (void *)entry->paddr, (void *)ref->paddr);
+	}
+
+	if (ref->sg_call_ents && ref->type == dma_debug_sg &&
+	    ref->sg_call_ents != entry->sg_call_ents) {
+		err_printk(ref->dev, entry, "DMA-API: device driver frees "
+			   "DMA sg list with different entry count "
+			   "[map count=%d] [unmap count=%d]\n",
+			   entry->sg_call_ents, ref->sg_call_ents);
+	}
+
+	/*
+	 * This may be no bug in reality - but most implementations of the
+	 * DMA API don't handle this properly, so check for it here
+	 */
+	if (ref->direction != entry->direction) {
+		err_printk(ref->dev, entry, "DMA-API: device driver frees "
+			   "DMA memory with different direction "
+			   "[device address=0x%016llx] [size=%llu bytes] "
+			   "[mapped with %s] [unmapped with %s]\n",
+			   ref->dev_addr, ref->size,
+			   dir2name[entry->direction],
+			   dir2name[ref->direction]);
+	}
+
+	hash_bucket_del(entry);
+	dma_entry_free(entry);
+
+out:
+	put_hash_bucket(bucket, &flags);
+}
+
+static void check_for_stack(struct device *dev, void *addr)
+{
+	if (object_is_on_stack(addr))
+		err_printk(dev, NULL, "DMA-API: device driver maps memory from"
+				"stack [addr=%p]\n", addr);
+}
+
+static inline bool overlap(void *addr, u64 size, void *start, void *end)
+{
+	void *addr2 = (char *)addr + size;
+
+	return ((addr >= start && addr < end) ||
+		(addr2 >= start && addr2 < end) ||
+		((addr < start) && (addr2 >= end)));
+}
+
+static void check_for_illegal_area(struct device *dev, void *addr, u64 size)
+{
+	if (overlap(addr, size, _text, _etext) ||
+	    overlap(addr, size, __start_rodata, __end_rodata))
+		err_printk(dev, NULL, "DMA-API: device driver maps "
+				"memory from kernel text or rodata "
+				"[addr=%p] [size=%llu]\n", addr, size);
+}
+
+static void check_sync(struct device *dev, dma_addr_t addr,
+		       u64 size, u64 offset, int direction, bool to_cpu)
+{
+	struct dma_debug_entry ref = {
+		.dev            = dev,
+		.dev_addr       = addr,
+		.size           = size,
+		.direction      = direction,
+	};
+	struct dma_debug_entry *entry;
+	struct hash_bucket *bucket;
+	unsigned long flags;
+
+	bucket = get_hash_bucket(&ref, &flags);
+
+	entry = hash_bucket_find(bucket, &ref);
+
+	if (!entry) {
+		err_printk(dev, NULL, "DMA-API: device driver tries "
+				"to sync DMA memory it has not allocated "
+				"[device address=0x%016llx] [size=%llu bytes]\n",
+				(unsigned long long)addr, size);
+		goto out;
+	}
+
+	if ((offset + size) > entry->size) {
+		err_printk(dev, entry, "DMA-API: device driver syncs"
+				" DMA memory outside allocated range "
+				"[device address=0x%016llx] "
+				"[allocation size=%llu bytes] [sync offset=%llu] "
+				"[sync size=%llu]\n", entry->dev_addr, entry->size,
+				offset, size);
+	}
+
+	if (direction != entry->direction) {
+		err_printk(dev, entry, "DMA-API: device driver syncs "
+				"DMA memory with different direction "
+				"[device address=0x%016llx] [size=%llu bytes] "
+				"[mapped with %s] [synced with %s]\n",
+				(unsigned long long)addr, entry->size,
+				dir2name[entry->direction],
+				dir2name[direction]);
+	}
+
+	if (entry->direction == DMA_BIDIRECTIONAL)
+		goto out;
+
+	if (to_cpu && !(entry->direction == DMA_FROM_DEVICE) &&
+		      !(direction == DMA_TO_DEVICE))
+		err_printk(dev, entry, "DMA-API: device driver syncs "
+				"device read-only DMA memory for cpu "
+				"[device address=0x%016llx] [size=%llu bytes] "
+				"[mapped with %s] [synced with %s]\n",
+				(unsigned long long)addr, entry->size,
+				dir2name[entry->direction],
+				dir2name[direction]);
+
+	if (!to_cpu && !(entry->direction == DMA_TO_DEVICE) &&
+		       !(direction == DMA_FROM_DEVICE))
+		err_printk(dev, entry, "DMA-API: device driver syncs "
+				"device write-only DMA memory to device "
+				"[device address=0x%016llx] [size=%llu bytes] "
+				"[mapped with %s] [synced with %s]\n",
+				(unsigned long long)addr, entry->size,
+				dir2name[entry->direction],
+				dir2name[direction]);
+
+out:
+	put_hash_bucket(bucket, &flags);
+
+}
+
+void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
+			size_t size, int direction, dma_addr_t dma_addr,
+			bool map_single)
+{
+	struct dma_debug_entry *entry;
+
+	if (unlikely(global_disable))
+		return;
+
+	if (unlikely(dma_mapping_error(dev, dma_addr)))
+		return;
+
+	entry = dma_entry_alloc();
+	if (!entry)
+		return;
+
+	entry->dev       = dev;
+	entry->type      = dma_debug_page;
+	entry->paddr     = page_to_phys(page) + offset;
+	entry->dev_addr  = dma_addr;
+	entry->size      = size;
+	entry->direction = direction;
+
+	if (map_single)
+		entry->type = dma_debug_single;
+
+	if (!PageHighMem(page)) {
+		void *addr = ((char *)page_address(page)) + offset;
+		check_for_stack(dev, addr);
+		check_for_illegal_area(dev, addr, size);
+	}
+
+	add_dma_entry(entry);
+}
+EXPORT_SYMBOL(debug_dma_map_page);
+
+void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
+			  size_t size, int direction, bool map_single)
+{
+	struct dma_debug_entry ref = {
+		.type           = dma_debug_page,
+		.dev            = dev,
+		.dev_addr       = addr,
+		.size           = size,
+		.direction      = direction,
+	};
+
+	if (unlikely(global_disable))
+		return;
+
+	if (map_single)
+		ref.type = dma_debug_single;
+
+	check_unmap(&ref);
+}
+EXPORT_SYMBOL(debug_dma_unmap_page);
+
+void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
+		      int nents, int mapped_ents, int direction)
+{
+	struct dma_debug_entry *entry;
+	struct scatterlist *s;
+	int i;
+
+	if (unlikely(global_disable))
+		return;
+
+	for_each_sg(sg, s, mapped_ents, i) {
+		entry = dma_entry_alloc();
+		if (!entry)
+			return;
+
+		entry->type           = dma_debug_sg;
+		entry->dev            = dev;
+		entry->paddr          = sg_phys(s);
+		entry->size           = s->length;
+		entry->dev_addr       = s->dma_address;
+		entry->direction      = direction;
+		entry->sg_call_ents   = nents;
+		entry->sg_mapped_ents = mapped_ents;
+
+		if (!PageHighMem(sg_page(s))) {
+			check_for_stack(dev, sg_virt(s));
+			check_for_illegal_area(dev, sg_virt(s), s->length);
+		}
+
+		add_dma_entry(entry);
+	}
+}
+EXPORT_SYMBOL(debug_dma_map_sg);
+
+void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
+			int nelems, int dir)
+{
+	struct dma_debug_entry *entry;
+	struct scatterlist *s;
+	int mapped_ents = 0, i;
+	unsigned long flags;
+
+	if (unlikely(global_disable))
+		return;
+
+	for_each_sg(sglist, s, nelems, i) {
+
+		struct dma_debug_entry ref = {
+			.type           = dma_debug_sg,
+			.dev            = dev,
+			.paddr          = sg_phys(s),
+			.dev_addr       = s->dma_address,
+			.size           = s->length,
+			.direction      = dir,
+			.sg_call_ents   = 0,
+		};
+
+		if (mapped_ents && i >= mapped_ents)
+			break;
+
+		if (mapped_ents == 0) {
+			struct hash_bucket *bucket;
+			ref.sg_call_ents = nelems;
+			bucket = get_hash_bucket(&ref, &flags);
+			entry = hash_bucket_find(bucket, &ref);
+			if (entry)
+				mapped_ents = entry->sg_mapped_ents;
+			put_hash_bucket(bucket, &flags);
+		}
+
+		check_unmap(&ref);
+	}
+}
+EXPORT_SYMBOL(debug_dma_unmap_sg);
+
+void debug_dma_alloc_coherent(struct device *dev, size_t size,
+			      dma_addr_t dma_addr, void *virt)
+{
+	struct dma_debug_entry *entry;
+
+	if (unlikely(global_disable))
+		return;
+
+	if (unlikely(virt == NULL))
+		return;
+
+	entry = dma_entry_alloc();
+	if (!entry)
+		return;
+
+	entry->type      = dma_debug_coherent;
+	entry->dev       = dev;
+	entry->paddr     = virt_to_phys(virt);
+	entry->size      = size;
+	entry->dev_addr  = dma_addr;
+	entry->direction = DMA_BIDIRECTIONAL;
+
+	add_dma_entry(entry);
+}
+EXPORT_SYMBOL(debug_dma_alloc_coherent);
+
+void debug_dma_free_coherent(struct device *dev, size_t size,
+			 void *virt, dma_addr_t addr)
+{
+	struct dma_debug_entry ref = {
+		.type           = dma_debug_coherent,
+		.dev            = dev,
+		.paddr          = virt_to_phys(virt),
+		.dev_addr       = addr,
+		.size           = size,
+		.direction      = DMA_BIDIRECTIONAL,
+	};
+
+	if (unlikely(global_disable))
+		return;
+
+	check_unmap(&ref);
+}
+EXPORT_SYMBOL(debug_dma_free_coherent);
+
+void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
+				   size_t size, int direction)
+{
+	if (unlikely(global_disable))
+		return;
+
+	check_sync(dev, dma_handle, size, 0, direction, true);
+}
+EXPORT_SYMBOL(debug_dma_sync_single_for_cpu);
+
+void debug_dma_sync_single_for_device(struct device *dev,
+				      dma_addr_t dma_handle, size_t size,
+				      int direction)
+{
+	if (unlikely(global_disable))
+		return;
+
+	check_sync(dev, dma_handle, size, 0, direction, false);
+}
+EXPORT_SYMBOL(debug_dma_sync_single_for_device);
+
+void debug_dma_sync_single_range_for_cpu(struct device *dev,
+					 dma_addr_t dma_handle,
+					 unsigned long offset, size_t size,
+					 int direction)
+{
+	if (unlikely(global_disable))
+		return;
+
+	check_sync(dev, dma_handle, size, offset, direction, true);
+}
+EXPORT_SYMBOL(debug_dma_sync_single_range_for_cpu);
+
+void debug_dma_sync_single_range_for_device(struct device *dev,
+					    dma_addr_t dma_handle,
+					    unsigned long offset,
+					    size_t size, int direction)
+{
+	if (unlikely(global_disable))
+		return;
+
+	check_sync(dev, dma_handle, size, offset, direction, false);
+}
+EXPORT_SYMBOL(debug_dma_sync_single_range_for_device);
+
+void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+			       int nelems, int direction)
+{
+	struct scatterlist *s;
+	int i;
+
+	if (unlikely(global_disable))
+		return;
+
+	for_each_sg(sg, s, nelems, i) {
+		check_sync(dev, s->dma_address, s->dma_length, 0,
+				direction, true);
+	}
+}
+EXPORT_SYMBOL(debug_dma_sync_sg_for_cpu);
+
+void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
+				  int nelems, int direction)
+{
+	struct scatterlist *s;
+	int i;
+
+	if (unlikely(global_disable))
+		return;
+
+	for_each_sg(sg, s, nelems, i) {
+		check_sync(dev, s->dma_address, s->dma_length, 0,
+				direction, false);
+	}
+}
+EXPORT_SYMBOL(debug_dma_sync_sg_for_device);
+
diff --git a/lib/lmb.c b/lib/lmb.c
index 97e547037084..e4a6482d8b26 100644
--- a/lib/lmb.c
+++ b/lib/lmb.c
@@ -29,33 +29,33 @@ static int __init early_lmb(char *p)
 }
 early_param("lmb", early_lmb);
 
-void lmb_dump_all(void)
+static void lmb_dump(struct lmb_region *region, char *name)
 {
-	unsigned long i;
+	unsigned long long base, size;
+	int i;
+
+	pr_info(" %s.cnt  = 0x%lx\n", name, region->cnt);
+
+	for (i = 0; i < region->cnt; i++) {
+		base = region->region[i].base;
+		size = region->region[i].size;
+
+		pr_info(" %s[0x%x]\t0x%016llx - 0x%016llx, 0x%llx bytes\n",
+		    name, i, base, base + size - 1, size);
+	}
+}
 
+void lmb_dump_all(void)
+{
 	if (!lmb_debug)
 		return;
 
-	pr_info("lmb_dump_all:\n");
-	pr_info("    memory.cnt		  = 0x%lx\n", lmb.memory.cnt);
-	pr_info("    memory.size		  = 0x%llx\n",
-	    (unsigned long long)lmb.memory.size);
-	for (i=0; i < lmb.memory.cnt ;i++) {
-		pr_info("    memory.region[0x%lx].base       = 0x%llx\n",
-		    i, (unsigned long long)lmb.memory.region[i].base);
-		pr_info("		      .size     = 0x%llx\n",
-		    (unsigned long long)lmb.memory.region[i].size);
-	}
+	pr_info("LMB configuration:\n");
+	pr_info(" rmo_size    = 0x%llx\n", (unsigned long long)lmb.rmo_size);
+	pr_info(" memory.size = 0x%llx\n", (unsigned long long)lmb.memory.size);
 
-	pr_info("    reserved.cnt	  = 0x%lx\n", lmb.reserved.cnt);
-	pr_info("    reserved.size	  = 0x%llx\n",
-	    (unsigned long long)lmb.memory.size);
-	for (i=0; i < lmb.reserved.cnt ;i++) {
-		pr_info("    reserved.region[0x%lx].base       = 0x%llx\n",
-		    i, (unsigned long long)lmb.reserved.region[i].base);
-		pr_info("		      .size     = 0x%llx\n",
-		    (unsigned long long)lmb.reserved.region[i].size);
-	}
+	lmb_dump(&lmb.memory, "memory");
+	lmb_dump(&lmb.reserved, "reserved");
 }
 
 static unsigned long lmb_addrs_overlap(u64 base1, u64 size1, u64 base2,
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 280332c1827c..619313ed6c46 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -157,11 +157,11 @@ static void init_shared_classes(void)
 #define SOFTIRQ_ENTER()				\
 		local_bh_disable();		\
 		local_irq_disable();		\
-		trace_softirq_enter();		\
+		lockdep_softirq_enter();	\
 		WARN_ON(!in_softirq());
 
 #define SOFTIRQ_EXIT()				\
-		trace_softirq_exit();		\
+		lockdep_softirq_exit();		\
 		local_irq_enable();		\
 		local_bh_enable();
 
diff --git a/lib/nlattr.c b/lib/nlattr.c
index 80009a24e21d..c4706eb98d3d 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -133,6 +133,32 @@ errout:
 }
 
 /**
+ * nla_policy_len - Determin the max. length of a policy
+ * @policy: policy to use
+ * @n: number of policies
+ *
+ * Determines the max. length of the policy.  It is currently used
+ * to allocated Netlink buffers roughly the size of the actual
+ * message.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int
+nla_policy_len(const struct nla_policy *p, int n)
+{
+	int i, len = 0;
+
+	for (i = 0; i < n; i++) {
+		if (p->len)
+			len += nla_total_size(p->len);
+		else if (nla_attr_minlen[p->type])
+			len += nla_total_size(nla_attr_minlen[p->type]);
+	}
+
+	return len;
+}
+
+/**
  * nla_parse - Parse a stream of attributes into a tb buffer
  * @tb: destination array with maxtype+1 elements
  * @maxtype: maximum attribute type to be expected
@@ -467,6 +493,7 @@ EXPORT_SYMBOL(nla_append);
 #endif
 
 EXPORT_SYMBOL(nla_validate);
+EXPORT_SYMBOL(nla_policy_len);
 EXPORT_SYMBOL(nla_parse);
 EXPORT_SYMBOL(nla_find);
 EXPORT_SYMBOL(nla_strlcpy);
diff --git a/lib/rbtree.c b/lib/rbtree.c
index 9956b99649f0..f653659e0bc1 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -163,17 +163,14 @@ static void __rb_erase_color(struct rb_node *node, struct rb_node *parent,
 			{
 				if (!other->rb_right || rb_is_black(other->rb_right))
 				{
-					struct rb_node *o_left;
-					if ((o_left = other->rb_left))
-						rb_set_black(o_left);
+					rb_set_black(other->rb_left);
 					rb_set_red(other);
 					__rb_rotate_right(other, root);
 					other = parent->rb_right;
 				}
 				rb_set_color(other, rb_color(parent));
 				rb_set_black(parent);
-				if (other->rb_right)
-					rb_set_black(other->rb_right);
+				rb_set_black(other->rb_right);
 				__rb_rotate_left(parent, root);
 				node = root->rb_node;
 				break;
@@ -200,17 +197,14 @@ static void __rb_erase_color(struct rb_node *node, struct rb_node *parent,
 			{
 				if (!other->rb_left || rb_is_black(other->rb_left))
 				{
-					register struct rb_node *o_right;
-					if ((o_right = other->rb_right))
-						rb_set_black(o_right);
+					rb_set_black(other->rb_right);
 					rb_set_red(other);
 					__rb_rotate_left(other, root);
 					other = parent->rb_left;
 				}
 				rb_set_color(other, rb_color(parent));
 				rb_set_black(parent);
-				if (other->rb_left)
-					rb_set_black(other->rb_left);
+				rb_set_black(other->rb_left);
 				__rb_rotate_right(parent, root);
 				node = root->rb_node;
 				break;
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 1f991acc2a05..32e2bd3b1142 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -145,7 +145,7 @@ static void *swiotlb_bus_to_virt(dma_addr_t address)
 	return phys_to_virt(swiotlb_bus_to_phys(address));
 }
 
-int __weak swiotlb_arch_range_needs_mapping(void *ptr, size_t size)
+int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size)
 {
 	return 0;
 }
@@ -315,9 +315,9 @@ address_needs_mapping(struct device *hwdev, dma_addr_t addr, size_t size)
 	return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
 }
 
-static inline int range_needs_mapping(void *ptr, size_t size)
+static inline int range_needs_mapping(phys_addr_t paddr, size_t size)
 {
-	return swiotlb_force || swiotlb_arch_range_needs_mapping(ptr, size);
+	return swiotlb_force || swiotlb_arch_range_needs_mapping(paddr, size);
 }
 
 static int is_swiotlb_buffer(char *addr)
@@ -636,11 +636,14 @@ swiotlb_full(struct device *dev, size_t size, int dir, int do_panic)
  * Once the device is given the dma address, the device owns this memory until
  * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed.
  */
-dma_addr_t
-swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
-			 int dir, struct dma_attrs *attrs)
-{
-	dma_addr_t dev_addr = swiotlb_virt_to_bus(hwdev, ptr);
+dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
+			    unsigned long offset, size_t size,
+			    enum dma_data_direction dir,
+			    struct dma_attrs *attrs)
+{
+	phys_addr_t phys = page_to_phys(page) + offset;
+	void *ptr = page_address(page) + offset;
+	dma_addr_t dev_addr = swiotlb_phys_to_bus(dev, phys);
 	void *map;
 
 	BUG_ON(dir == DMA_NONE);
@@ -649,37 +652,30 @@ swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
 	 * we can safely return the device addr and not worry about bounce
 	 * buffering it.
 	 */
-	if (!address_needs_mapping(hwdev, dev_addr, size) &&
-	    !range_needs_mapping(ptr, size))
+	if (!address_needs_mapping(dev, dev_addr, size) &&
+	    !range_needs_mapping(virt_to_phys(ptr), size))
 		return dev_addr;
 
 	/*
 	 * Oh well, have to allocate and map a bounce buffer.
 	 */
-	map = map_single(hwdev, virt_to_phys(ptr), size, dir);
+	map = map_single(dev, phys, size, dir);
 	if (!map) {
-		swiotlb_full(hwdev, size, dir, 1);
+		swiotlb_full(dev, size, dir, 1);
 		map = io_tlb_overflow_buffer;
 	}
 
-	dev_addr = swiotlb_virt_to_bus(hwdev, map);
+	dev_addr = swiotlb_virt_to_bus(dev, map);
 
 	/*
 	 * Ensure that the address returned is DMA'ble
 	 */
-	if (address_needs_mapping(hwdev, dev_addr, size))
+	if (address_needs_mapping(dev, dev_addr, size))
 		panic("map_single: bounce buffer is not DMA'ble");
 
 	return dev_addr;
 }
-EXPORT_SYMBOL(swiotlb_map_single_attrs);
-
-dma_addr_t
-swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir)
-{
-	return swiotlb_map_single_attrs(hwdev, ptr, size, dir, NULL);
-}
-EXPORT_SYMBOL(swiotlb_map_single);
+EXPORT_SYMBOL_GPL(swiotlb_map_page);
 
 /*
  * Unmap a single streaming mode DMA translation.  The dma_addr and size must
@@ -689,9 +685,9 @@ EXPORT_SYMBOL(swiotlb_map_single);
  * After this call, reads by the cpu to the buffer are guaranteed to see
  * whatever the device wrote there.
  */
-void
-swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr,
-			   size_t size, int dir, struct dma_attrs *attrs)
+void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
+			size_t size, enum dma_data_direction dir,
+			struct dma_attrs *attrs)
 {
 	char *dma_addr = swiotlb_bus_to_virt(dev_addr);
 
@@ -701,15 +697,7 @@ swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr,
 	else if (dir == DMA_FROM_DEVICE)
 		dma_mark_clean(dma_addr, size);
 }
-EXPORT_SYMBOL(swiotlb_unmap_single_attrs);
-
-void
-swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size,
-		     int dir)
-{
-	return swiotlb_unmap_single_attrs(hwdev, dev_addr, size, dir, NULL);
-}
-EXPORT_SYMBOL(swiotlb_unmap_single);
+EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
 
 /*
  * Make physical memory consistent for a single streaming mode DMA translation
@@ -736,7 +724,7 @@ swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
 
 void
 swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
-			    size_t size, int dir)
+			    size_t size, enum dma_data_direction dir)
 {
 	swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU);
 }
@@ -744,7 +732,7 @@ EXPORT_SYMBOL(swiotlb_sync_single_for_cpu);
 
 void
 swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
-			       size_t size, int dir)
+			       size_t size, enum dma_data_direction dir)
 {
 	swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE);
 }
@@ -769,7 +757,8 @@ swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr,
 
 void
 swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
-				  unsigned long offset, size_t size, int dir)
+				  unsigned long offset, size_t size,
+				  enum dma_data_direction dir)
 {
 	swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
 				  SYNC_FOR_CPU);
@@ -778,7 +767,8 @@ EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_cpu);
 
 void
 swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr,
-				     unsigned long offset, size_t size, int dir)
+				     unsigned long offset, size_t size,
+				     enum dma_data_direction dir)
 {
 	swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
 				  SYNC_FOR_DEVICE);
@@ -803,7 +793,7 @@ EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device);
  */
 int
 swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
-		     int dir, struct dma_attrs *attrs)
+		     enum dma_data_direction dir, struct dma_attrs *attrs)
 {
 	struct scatterlist *sg;
 	int i;
@@ -811,10 +801,10 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
 	BUG_ON(dir == DMA_NONE);
 
 	for_each_sg(sgl, sg, nelems, i) {
-		void *addr = sg_virt(sg);
-		dma_addr_t dev_addr = swiotlb_virt_to_bus(hwdev, addr);
+		phys_addr_t paddr = sg_phys(sg);
+		dma_addr_t dev_addr = swiotlb_phys_to_bus(hwdev, paddr);
 
-		if (range_needs_mapping(addr, sg->length) ||
+		if (range_needs_mapping(paddr, sg->length) ||
 		    address_needs_mapping(hwdev, dev_addr, sg->length)) {
 			void *map = map_single(hwdev, sg_phys(sg),
 					       sg->length, dir);
@@ -850,7 +840,7 @@ EXPORT_SYMBOL(swiotlb_map_sg);
  */
 void
 swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
-		       int nelems, int dir, struct dma_attrs *attrs)
+		       int nelems, enum dma_data_direction dir, struct dma_attrs *attrs)
 {
 	struct scatterlist *sg;
 	int i;
@@ -858,11 +848,11 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
 	BUG_ON(dir == DMA_NONE);
 
 	for_each_sg(sgl, sg, nelems, i) {
-		if (sg->dma_address != swiotlb_virt_to_bus(hwdev, sg_virt(sg)))
+		if (sg->dma_address != swiotlb_phys_to_bus(hwdev, sg_phys(sg)))
 			unmap_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
 				     sg->dma_length, dir);
 		else if (dir == DMA_FROM_DEVICE)
-			dma_mark_clean(sg_virt(sg), sg->dma_length);
+			dma_mark_clean(swiotlb_bus_to_virt(sg->dma_address), sg->dma_length);
 	}
 }
 EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
@@ -892,17 +882,17 @@ swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
 	BUG_ON(dir == DMA_NONE);
 
 	for_each_sg(sgl, sg, nelems, i) {
-		if (sg->dma_address != swiotlb_virt_to_bus(hwdev, sg_virt(sg)))
+		if (sg->dma_address != swiotlb_phys_to_bus(hwdev, sg_phys(sg)))
 			sync_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
 				    sg->dma_length, dir, target);
 		else if (dir == DMA_FROM_DEVICE)
-			dma_mark_clean(sg_virt(sg), sg->dma_length);
+			dma_mark_clean(swiotlb_bus_to_virt(sg->dma_address), sg->dma_length);
 	}
 }
 
 void
 swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
-			int nelems, int dir)
+			int nelems, enum dma_data_direction dir)
 {
 	swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU);
 }
@@ -910,7 +900,7 @@ EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu);
 
 void
 swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
-			   int nelems, int dir)
+			   int nelems, enum dma_data_direction dir)
 {
 	swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE);
 }
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 0fbd0121d91d..be3001f912e4 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -396,7 +396,38 @@ static noinline char* put_dec(char *buf, unsigned long long num)
 #define SMALL	32		/* Must be 32 == 0x20 */
 #define SPECIAL	64		/* 0x */
 
-static char *number(char *buf, char *end, unsigned long long num, int base, int size, int precision, int type)
+enum format_type {
+	FORMAT_TYPE_NONE, /* Just a string part */
+	FORMAT_TYPE_WIDTH,
+	FORMAT_TYPE_PRECISION,
+	FORMAT_TYPE_CHAR,
+	FORMAT_TYPE_STR,
+	FORMAT_TYPE_PTR,
+	FORMAT_TYPE_PERCENT_CHAR,
+	FORMAT_TYPE_INVALID,
+	FORMAT_TYPE_LONG_LONG,
+	FORMAT_TYPE_ULONG,
+	FORMAT_TYPE_LONG,
+	FORMAT_TYPE_USHORT,
+	FORMAT_TYPE_SHORT,
+	FORMAT_TYPE_UINT,
+	FORMAT_TYPE_INT,
+	FORMAT_TYPE_NRCHARS,
+	FORMAT_TYPE_SIZE_T,
+	FORMAT_TYPE_PTRDIFF
+};
+
+struct printf_spec {
+	enum format_type	type;
+	int			flags;		/* flags to number() */
+	int			field_width;	/* width of output field */
+	int			base;
+	int			precision;	/* # of digits/chars */
+	int			qualifier;
+};
+
+static char *number(char *buf, char *end, unsigned long long num,
+			struct printf_spec spec)
 {
 	/* we are called with base 8, 10 or 16, only, thus don't need "G..."  */
 	static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
@@ -404,32 +435,32 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int
 	char tmp[66];
 	char sign;
 	char locase;
-	int need_pfx = ((type & SPECIAL) && base != 10);
+	int need_pfx = ((spec.flags & SPECIAL) && spec.base != 10);
 	int i;
 
 	/* locase = 0 or 0x20. ORing digits or letters with 'locase'
 	 * produces same digits or (maybe lowercased) letters */
-	locase = (type & SMALL);
-	if (type & LEFT)
-		type &= ~ZEROPAD;
+	locase = (spec.flags & SMALL);
+	if (spec.flags & LEFT)
+		spec.flags &= ~ZEROPAD;
 	sign = 0;
-	if (type & SIGN) {
+	if (spec.flags & SIGN) {
 		if ((signed long long) num < 0) {
 			sign = '-';
 			num = - (signed long long) num;
-			size--;
-		} else if (type & PLUS) {
+			spec.field_width--;
+		} else if (spec.flags & PLUS) {
 			sign = '+';
-			size--;
-		} else if (type & SPACE) {
+			spec.field_width--;
+		} else if (spec.flags & SPACE) {
 			sign = ' ';
-			size--;
+			spec.field_width--;
 		}
 	}
 	if (need_pfx) {
-		size--;
-		if (base == 16)
-			size--;
+		spec.field_width--;
+		if (spec.base == 16)
+			spec.field_width--;
 	}
 
 	/* generate full string in tmp[], in reverse order */
@@ -441,10 +472,10 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int
 		tmp[i++] = (digits[do_div(num,base)] | locase);
 	} while (num != 0);
 	*/
-	else if (base != 10) { /* 8 or 16 */
-		int mask = base - 1;
+	else if (spec.base != 10) { /* 8 or 16 */
+		int mask = spec.base - 1;
 		int shift = 3;
-		if (base == 16) shift = 4;
+		if (spec.base == 16) shift = 4;
 		do {
 			tmp[i++] = (digits[((unsigned char)num) & mask] | locase);
 			num >>= shift;
@@ -454,12 +485,12 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int
 	}
 
 	/* printing 100 using %2d gives "100", not "00" */
-	if (i > precision)
-		precision = i;
+	if (i > spec.precision)
+		spec.precision = i;
 	/* leading space padding */
-	size -= precision;
-	if (!(type & (ZEROPAD+LEFT))) {
-		while(--size >= 0) {
+	spec.field_width -= spec.precision;
+	if (!(spec.flags & (ZEROPAD+LEFT))) {
+		while(--spec.field_width >= 0) {
 			if (buf < end)
 				*buf = ' ';
 			++buf;
@@ -476,23 +507,23 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int
 		if (buf < end)
 			*buf = '0';
 		++buf;
-		if (base == 16) {
+		if (spec.base == 16) {
 			if (buf < end)
 				*buf = ('X' | locase);
 			++buf;
 		}
 	}
 	/* zero or space padding */
-	if (!(type & LEFT)) {
-		char c = (type & ZEROPAD) ? '0' : ' ';
-		while (--size >= 0) {
+	if (!(spec.flags & LEFT)) {
+		char c = (spec.flags & ZEROPAD) ? '0' : ' ';
+		while (--spec.field_width >= 0) {
 			if (buf < end)
 				*buf = c;
 			++buf;
 		}
 	}
 	/* hmm even more zero padding? */
-	while (i <= --precision) {
+	while (i <= --spec.precision) {
 		if (buf < end)
 			*buf = '0';
 		++buf;
@@ -504,7 +535,7 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int
 		++buf;
 	}
 	/* trailing space padding */
-	while (--size >= 0) {
+	while (--spec.field_width >= 0) {
 		if (buf < end)
 			*buf = ' ';
 		++buf;
@@ -512,17 +543,17 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int
 	return buf;
 }
 
-static char *string(char *buf, char *end, char *s, int field_width, int precision, int flags)
+static char *string(char *buf, char *end, char *s, struct printf_spec spec)
 {
 	int len, i;
 
 	if ((unsigned long)s < PAGE_SIZE)
 		s = "<NULL>";
 
-	len = strnlen(s, precision);
+	len = strnlen(s, spec.precision);
 
-	if (!(flags & LEFT)) {
-		while (len < field_width--) {
+	if (!(spec.flags & LEFT)) {
+		while (len < spec.field_width--) {
 			if (buf < end)
 				*buf = ' ';
 			++buf;
@@ -533,7 +564,7 @@ static char *string(char *buf, char *end, char *s, int field_width, int precisio
 			*buf = *s;
 		++buf; ++s;
 	}
-	while (len < field_width--) {
+	while (len < spec.field_width--) {
 		if (buf < end)
 			*buf = ' ';
 		++buf;
@@ -541,21 +572,24 @@ static char *string(char *buf, char *end, char *s, int field_width, int precisio
 	return buf;
 }
 
-static char *symbol_string(char *buf, char *end, void *ptr, int field_width, int precision, int flags)
+static char *symbol_string(char *buf, char *end, void *ptr,
+				struct printf_spec spec)
 {
 	unsigned long value = (unsigned long) ptr;
 #ifdef CONFIG_KALLSYMS
 	char sym[KSYM_SYMBOL_LEN];
 	sprint_symbol(sym, value);
-	return string(buf, end, sym, field_width, precision, flags);
+	return string(buf, end, sym, spec);
 #else
-	field_width = 2*sizeof(void *);
-	flags |= SPECIAL | SMALL | ZEROPAD;
-	return number(buf, end, value, 16, field_width, precision, flags);
+	spec.field_width = 2*sizeof(void *);
+	spec.flags |= SPECIAL | SMALL | ZEROPAD;
+	spec.base = 16;
+	return number(buf, end, value, spec);
 #endif
 }
 
-static char *resource_string(char *buf, char *end, struct resource *res, int field_width, int precision, int flags)
+static char *resource_string(char *buf, char *end, struct resource *res,
+				struct printf_spec spec)
 {
 #ifndef IO_RSRC_PRINTK_SIZE
 #define IO_RSRC_PRINTK_SIZE	4
@@ -564,7 +598,11 @@ static char *resource_string(char *buf, char *end, struct resource *res, int fie
 #ifndef MEM_RSRC_PRINTK_SIZE
 #define MEM_RSRC_PRINTK_SIZE	8
 #endif
-
+	struct printf_spec num_spec = {
+		.base = 16,
+		.precision = -1,
+		.flags = SPECIAL | SMALL | ZEROPAD,
+	};
 	/* room for the actual numbers, the two "0x", -, [, ] and the final zero */
 	char sym[4*sizeof(resource_size_t) + 8];
 	char *p = sym, *pend = sym + sizeof(sym);
@@ -576,17 +614,18 @@ static char *resource_string(char *buf, char *end, struct resource *res, int fie
 		size = MEM_RSRC_PRINTK_SIZE;
 
 	*p++ = '[';
-	p = number(p, pend, res->start, 16, size, -1, SPECIAL | SMALL | ZEROPAD);
+	num_spec.field_width = size;
+	p = number(p, pend, res->start, num_spec);
 	*p++ = '-';
-	p = number(p, pend, res->end, 16, size, -1, SPECIAL | SMALL | ZEROPAD);
+	p = number(p, pend, res->end, num_spec);
 	*p++ = ']';
 	*p = 0;
 
-	return string(buf, end, sym, field_width, precision, flags);
+	return string(buf, end, sym, spec);
 }
 
-static char *mac_address_string(char *buf, char *end, u8 *addr, int field_width,
-				int precision, int flags)
+static char *mac_address_string(char *buf, char *end, u8 *addr,
+				struct printf_spec spec)
 {
 	char mac_addr[6 * 3]; /* (6 * 2 hex digits), 5 colons and trailing zero */
 	char *p = mac_addr;
@@ -594,16 +633,17 @@ static char *mac_address_string(char *buf, char *end, u8 *addr, int field_width,
 
 	for (i = 0; i < 6; i++) {
 		p = pack_hex_byte(p, addr[i]);
-		if (!(flags & SPECIAL) && i != 5)
+		if (!(spec.flags & SPECIAL) && i != 5)
 			*p++ = ':';
 	}
 	*p = '\0';
+	spec.flags &= ~SPECIAL;
 
-	return string(buf, end, mac_addr, field_width, precision, flags & ~SPECIAL);
+	return string(buf, end, mac_addr, spec);
 }
 
-static char *ip6_addr_string(char *buf, char *end, u8 *addr, int field_width,
-			 int precision, int flags)
+static char *ip6_addr_string(char *buf, char *end, u8 *addr,
+				struct printf_spec spec)
 {
 	char ip6_addr[8 * 5]; /* (8 * 4 hex digits), 7 colons and trailing zero */
 	char *p = ip6_addr;
@@ -612,16 +652,17 @@ static char *ip6_addr_string(char *buf, char *end, u8 *addr, int field_width,
 	for (i = 0; i < 8; i++) {
 		p = pack_hex_byte(p, addr[2 * i]);
 		p = pack_hex_byte(p, addr[2 * i + 1]);
-		if (!(flags & SPECIAL) && i != 7)
+		if (!(spec.flags & SPECIAL) && i != 7)
 			*p++ = ':';
 	}
 	*p = '\0';
+	spec.flags &= ~SPECIAL;
 
-	return string(buf, end, ip6_addr, field_width, precision, flags & ~SPECIAL);
+	return string(buf, end, ip6_addr, spec);
 }
 
-static char *ip4_addr_string(char *buf, char *end, u8 *addr, int field_width,
-			 int precision, int flags)
+static char *ip4_addr_string(char *buf, char *end, u8 *addr,
+				struct printf_spec spec)
 {
 	char ip4_addr[4 * 4]; /* (4 * 3 decimal digits), 3 dots and trailing zero */
 	char temp[3];	/* hold each IP quad in reverse order */
@@ -637,8 +678,9 @@ static char *ip4_addr_string(char *buf, char *end, u8 *addr, int field_width,
 			*p++ = '.';
 	}
 	*p = '\0';
+	spec.flags &= ~SPECIAL;
 
-	return string(buf, end, ip4_addr, field_width, precision, flags & ~SPECIAL);
+	return string(buf, end, ip4_addr, spec);
 }
 
 /*
@@ -663,41 +705,233 @@ static char *ip4_addr_string(char *buf, char *end, u8 *addr, int field_width,
  * function pointers are really function descriptors, which contain a
  * pointer to the real address.
  */
-static char *pointer(const char *fmt, char *buf, char *end, void *ptr, int field_width, int precision, int flags)
+static char *pointer(const char *fmt, char *buf, char *end, void *ptr,
+			struct printf_spec spec)
 {
 	if (!ptr)
-		return string(buf, end, "(null)", field_width, precision, flags);
+		return string(buf, end, "(null)", spec);
 
 	switch (*fmt) {
 	case 'F':
 		ptr = dereference_function_descriptor(ptr);
 		/* Fallthrough */
 	case 'S':
-		return symbol_string(buf, end, ptr, field_width, precision, flags);
+		return symbol_string(buf, end, ptr, spec);
 	case 'R':
-		return resource_string(buf, end, ptr, field_width, precision, flags);
+		return resource_string(buf, end, ptr, spec);
 	case 'm':
-		flags |= SPECIAL;
+		spec.flags |= SPECIAL;
 		/* Fallthrough */
 	case 'M':
-		return mac_address_string(buf, end, ptr, field_width, precision, flags);
+		return mac_address_string(buf, end, ptr, spec);
 	case 'i':
-		flags |= SPECIAL;
+		spec.flags |= SPECIAL;
 		/* Fallthrough */
 	case 'I':
 		if (fmt[1] == '6')
-			return ip6_addr_string(buf, end, ptr, field_width, precision, flags);
+			return ip6_addr_string(buf, end, ptr, spec);
 		if (fmt[1] == '4')
-			return ip4_addr_string(buf, end, ptr, field_width, precision, flags);
-		flags &= ~SPECIAL;
+			return ip4_addr_string(buf, end, ptr, spec);
+		spec.flags &= ~SPECIAL;
+		break;
+	}
+	spec.flags |= SMALL;
+	if (spec.field_width == -1) {
+		spec.field_width = 2*sizeof(void *);
+		spec.flags |= ZEROPAD;
+	}
+	spec.base = 16;
+
+	return number(buf, end, (unsigned long) ptr, spec);
+}
+
+/*
+ * Helper function to decode printf style format.
+ * Each call decode a token from the format and return the
+ * number of characters read (or likely the delta where it wants
+ * to go on the next call).
+ * The decoded token is returned through the parameters
+ *
+ * 'h', 'l', or 'L' for integer fields
+ * 'z' support added 23/7/1999 S.H.
+ * 'z' changed to 'Z' --davidm 1/25/99
+ * 't' added for ptrdiff_t
+ *
+ * @fmt: the format string
+ * @type of the token returned
+ * @flags: various flags such as +, -, # tokens..
+ * @field_width: overwritten width
+ * @base: base of the number (octal, hex, ...)
+ * @precision: precision of a number
+ * @qualifier: qualifier of a number (long, size_t, ...)
+ */
+static int format_decode(const char *fmt, struct printf_spec *spec)
+{
+	const char *start = fmt;
+
+	/* we finished early by reading the field width */
+	if (spec->type == FORMAT_TYPE_WIDTH) {
+		if (spec->field_width < 0) {
+			spec->field_width = -spec->field_width;
+			spec->flags |= LEFT;
+		}
+		spec->type = FORMAT_TYPE_NONE;
+		goto precision;
+	}
+
+	/* we finished early by reading the precision */
+	if (spec->type == FORMAT_TYPE_PRECISION) {
+		if (spec->precision < 0)
+			spec->precision = 0;
+
+		spec->type = FORMAT_TYPE_NONE;
+		goto qualifier;
+	}
+
+	/* By default */
+	spec->type = FORMAT_TYPE_NONE;
+
+	for (; *fmt ; ++fmt) {
+		if (*fmt == '%')
+			break;
+	}
+
+	/* Return the current non-format string */
+	if (fmt != start || !*fmt)
+		return fmt - start;
+
+	/* Process flags */
+	spec->flags = 0;
+
+	while (1) { /* this also skips first '%' */
+		bool found = true;
+
+		++fmt;
+
+		switch (*fmt) {
+		case '-': spec->flags |= LEFT;    break;
+		case '+': spec->flags |= PLUS;    break;
+		case ' ': spec->flags |= SPACE;   break;
+		case '#': spec->flags |= SPECIAL; break;
+		case '0': spec->flags |= ZEROPAD; break;
+		default:  found = false;
+		}
+
+		if (!found)
+			break;
+	}
+
+	/* get field width */
+	spec->field_width = -1;
+
+	if (isdigit(*fmt))
+		spec->field_width = skip_atoi(&fmt);
+	else if (*fmt == '*') {
+		/* it's the next argument */
+		spec->type = FORMAT_TYPE_WIDTH;
+		return ++fmt - start;
+	}
+
+precision:
+	/* get the precision */
+	spec->precision = -1;
+	if (*fmt == '.') {
+		++fmt;
+		if (isdigit(*fmt)) {
+			spec->precision = skip_atoi(&fmt);
+			if (spec->precision < 0)
+				spec->precision = 0;
+		} else if (*fmt == '*') {
+			/* it's the next argument */
+			spec->type = FORMAT_TYPE_PRECISION;
+			return ++fmt - start;
+		}
+	}
+
+qualifier:
+	/* get the conversion qualifier */
+	spec->qualifier = -1;
+	if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' ||
+	    *fmt == 'Z' || *fmt == 'z' || *fmt == 't') {
+		spec->qualifier = *fmt;
+		++fmt;
+		if (spec->qualifier == 'l' && *fmt == 'l') {
+			spec->qualifier = 'L';
+			++fmt;
+		}
+	}
+
+	/* default base */
+	spec->base = 10;
+	switch (*fmt) {
+	case 'c':
+		spec->type = FORMAT_TYPE_CHAR;
+		return ++fmt - start;
+
+	case 's':
+		spec->type = FORMAT_TYPE_STR;
+		return ++fmt - start;
+
+	case 'p':
+		spec->type = FORMAT_TYPE_PTR;
+		return fmt - start;
+		/* skip alnum */
+
+	case 'n':
+		spec->type = FORMAT_TYPE_NRCHARS;
+		return ++fmt - start;
+
+	case '%':
+		spec->type = FORMAT_TYPE_PERCENT_CHAR;
+		return ++fmt - start;
+
+	/* integer number formats - set up the flags and "break" */
+	case 'o':
+		spec->base = 8;
 		break;
+
+	case 'x':
+		spec->flags |= SMALL;
+
+	case 'X':
+		spec->base = 16;
+		break;
+
+	case 'd':
+	case 'i':
+		spec->flags |= SIGN;
+	case 'u':
+		break;
+
+	default:
+		spec->type = FORMAT_TYPE_INVALID;
+		return fmt - start;
 	}
-	flags |= SMALL;
-	if (field_width == -1) {
-		field_width = 2*sizeof(void *);
-		flags |= ZEROPAD;
+
+	if (spec->qualifier == 'L')
+		spec->type = FORMAT_TYPE_LONG_LONG;
+	else if (spec->qualifier == 'l') {
+		if (spec->flags & SIGN)
+			spec->type = FORMAT_TYPE_LONG;
+		else
+			spec->type = FORMAT_TYPE_ULONG;
+	} else if (spec->qualifier == 'Z' || spec->qualifier == 'z') {
+		spec->type = FORMAT_TYPE_SIZE_T;
+	} else if (spec->qualifier == 't') {
+		spec->type = FORMAT_TYPE_PTRDIFF;
+	} else if (spec->qualifier == 'h') {
+		if (spec->flags & SIGN)
+			spec->type = FORMAT_TYPE_SHORT;
+		else
+			spec->type = FORMAT_TYPE_USHORT;
+	} else {
+		if (spec->flags & SIGN)
+			spec->type = FORMAT_TYPE_INT;
+		else
+			spec->type = FORMAT_TYPE_UINT;
 	}
-	return number(buf, end, (unsigned long) ptr, 16, field_width, precision, flags);
+
+	return ++fmt - start;
 }
 
 /**
@@ -726,18 +960,9 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr, int field
 int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
 {
 	unsigned long long num;
-	int base;
 	char *str, *end, c;
-
-	int flags;		/* flags to number() */
-
-	int field_width;	/* width of output field */
-	int precision;		/* min. # of digits for integers; max
-				   number of chars for from string */
-	int qualifier;		/* 'h', 'l', or 'L' for integer fields */
-				/* 'z' support added 23/7/1999 S.H.    */
-				/* 'z' changed to 'Z' --davidm 1/25/99 */
-				/* 't' added for ptrdiff_t */
+	int read;
+	struct printf_spec spec = {0};
 
 	/* Reject out-of-range values early.  Large positive sizes are
 	   used for unknown buffer sizes. */
@@ -758,184 +983,144 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
 		size = end - buf;
 	}
 
-	for (; *fmt ; ++fmt) {
-		if (*fmt != '%') {
-			if (str < end)
-				*str = *fmt;
-			++str;
-			continue;
-		}
+	while (*fmt) {
+		const char *old_fmt = fmt;
 
-		/* process flags */
-		flags = 0;
-		repeat:
-			++fmt;		/* this also skips first '%' */
-			switch (*fmt) {
-				case '-': flags |= LEFT; goto repeat;
-				case '+': flags |= PLUS; goto repeat;
-				case ' ': flags |= SPACE; goto repeat;
-				case '#': flags |= SPECIAL; goto repeat;
-				case '0': flags |= ZEROPAD; goto repeat;
-			}
+		read = format_decode(fmt, &spec);
 
-		/* get field width */
-		field_width = -1;
-		if (isdigit(*fmt))
-			field_width = skip_atoi(&fmt);
-		else if (*fmt == '*') {
-			++fmt;
-			/* it's the next argument */
-			field_width = va_arg(args, int);
-			if (field_width < 0) {
-				field_width = -field_width;
-				flags |= LEFT;
-			}
-		}
+		fmt += read;
 
-		/* get the precision */
-		precision = -1;
-		if (*fmt == '.') {
-			++fmt;	
-			if (isdigit(*fmt))
-				precision = skip_atoi(&fmt);
-			else if (*fmt == '*') {
-				++fmt;
-				/* it's the next argument */
-				precision = va_arg(args, int);
+		switch (spec.type) {
+		case FORMAT_TYPE_NONE: {
+			int copy = read;
+			if (str < end) {
+				if (copy > end - str)
+					copy = end - str;
+				memcpy(str, old_fmt, copy);
 			}
-			if (precision < 0)
-				precision = 0;
+			str += read;
+			break;
 		}
 
-		/* get the conversion qualifier */
-		qualifier = -1;
-		if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' ||
-		    *fmt =='Z' || *fmt == 'z' || *fmt == 't') {
-			qualifier = *fmt;
-			++fmt;
-			if (qualifier == 'l' && *fmt == 'l') {
-				qualifier = 'L';
-				++fmt;
-			}
-		}
+		case FORMAT_TYPE_WIDTH:
+			spec.field_width = va_arg(args, int);
+			break;
 
-		/* default base */
-		base = 10;
+		case FORMAT_TYPE_PRECISION:
+			spec.precision = va_arg(args, int);
+			break;
 
-		switch (*fmt) {
-			case 'c':
-				if (!(flags & LEFT)) {
-					while (--field_width > 0) {
-						if (str < end)
-							*str = ' ';
-						++str;
-					}
-				}
-				c = (unsigned char) va_arg(args, int);
-				if (str < end)
-					*str = c;
-				++str;
-				while (--field_width > 0) {
+		case FORMAT_TYPE_CHAR:
+			if (!(spec.flags & LEFT)) {
+				while (--spec.field_width > 0) {
 					if (str < end)
 						*str = ' ';
 					++str;
-				}
-				continue;
-
-			case 's':
-				str = string(str, end, va_arg(args, char *), field_width, precision, flags);
-				continue;
-
-			case 'p':
-				str = pointer(fmt+1, str, end,
-						va_arg(args, void *),
-						field_width, precision, flags);
-				/* Skip all alphanumeric pointer suffixes */
-				while (isalnum(fmt[1]))
-					fmt++;
-				continue;
-
-			case 'n':
-				/* FIXME:
-				* What does C99 say about the overflow case here? */
-				if (qualifier == 'l') {
-					long * ip = va_arg(args, long *);
-					*ip = (str - buf);
-				} else if (qualifier == 'Z' || qualifier == 'z') {
-					size_t * ip = va_arg(args, size_t *);
-					*ip = (str - buf);
-				} else {
-					int * ip = va_arg(args, int *);
-					*ip = (str - buf);
-				}
-				continue;
 
-			case '%':
+				}
+			}
+			c = (unsigned char) va_arg(args, int);
+			if (str < end)
+				*str = c;
+			++str;
+			while (--spec.field_width > 0) {
 				if (str < end)
-					*str = '%';
+					*str = ' ';
 				++str;
-				continue;
+			}
+			break;
 
-				/* integer number formats - set up the flags and "break" */
-			case 'o':
-				base = 8;
-				break;
+		case FORMAT_TYPE_STR:
+			str = string(str, end, va_arg(args, char *), spec);
+			break;
 
-			case 'x':
-				flags |= SMALL;
-			case 'X':
-				base = 16;
-				break;
+		case FORMAT_TYPE_PTR:
+			str = pointer(fmt+1, str, end, va_arg(args, void *),
+				      spec);
+			while (isalnum(*fmt))
+				fmt++;
+			break;
 
-			case 'd':
-			case 'i':
-				flags |= SIGN;
-			case 'u':
-				break;
+		case FORMAT_TYPE_PERCENT_CHAR:
+			if (str < end)
+				*str = '%';
+			++str;
+			break;
 
-			default:
+		case FORMAT_TYPE_INVALID:
+			if (str < end)
+				*str = '%';
+			++str;
+			if (*fmt) {
 				if (str < end)
-					*str = '%';
+					*str = *fmt;
 				++str;
-				if (*fmt) {
-					if (str < end)
-						*str = *fmt;
-					++str;
-				} else {
-					--fmt;
-				}
-				continue;
+			} else {
+				--fmt;
+			}
+			break;
+
+		case FORMAT_TYPE_NRCHARS: {
+			int qualifier = spec.qualifier;
+
+			if (qualifier == 'l') {
+				long *ip = va_arg(args, long *);
+				*ip = (str - buf);
+			} else if (qualifier == 'Z' ||
+					qualifier == 'z') {
+				size_t *ip = va_arg(args, size_t *);
+				*ip = (str - buf);
+			} else {
+				int *ip = va_arg(args, int *);
+				*ip = (str - buf);
+			}
+			break;
 		}
-		if (qualifier == 'L')
-			num = va_arg(args, long long);
-		else if (qualifier == 'l') {
-			num = va_arg(args, unsigned long);
-			if (flags & SIGN)
-				num = (signed long) num;
-		} else if (qualifier == 'Z' || qualifier == 'z') {
-			num = va_arg(args, size_t);
-		} else if (qualifier == 't') {
-			num = va_arg(args, ptrdiff_t);
-		} else if (qualifier == 'h') {
-			num = (unsigned short) va_arg(args, int);
-			if (flags & SIGN)
-				num = (signed short) num;
-		} else {
-			num = va_arg(args, unsigned int);
-			if (flags & SIGN)
-				num = (signed int) num;
+
+		default:
+			switch (spec.type) {
+			case FORMAT_TYPE_LONG_LONG:
+				num = va_arg(args, long long);
+				break;
+			case FORMAT_TYPE_ULONG:
+				num = va_arg(args, unsigned long);
+				break;
+			case FORMAT_TYPE_LONG:
+				num = va_arg(args, long);
+				break;
+			case FORMAT_TYPE_SIZE_T:
+				num = va_arg(args, size_t);
+				break;
+			case FORMAT_TYPE_PTRDIFF:
+				num = va_arg(args, ptrdiff_t);
+				break;
+			case FORMAT_TYPE_USHORT:
+				num = (unsigned short) va_arg(args, int);
+				break;
+			case FORMAT_TYPE_SHORT:
+				num = (short) va_arg(args, int);
+				break;
+			case FORMAT_TYPE_INT:
+				num = (int) va_arg(args, int);
+				break;
+			default:
+				num = va_arg(args, unsigned int);
+			}
+
+			str = number(str, end, num, spec);
 		}
-		str = number(str, end, num, base,
-				field_width, precision, flags);
 	}
+
 	if (size > 0) {
 		if (str < end)
 			*str = '\0';
 		else
 			end[-1] = '\0';
 	}
+
 	/* the trailing null byte doesn't count towards the total */
 	return str-buf;
+
 }
 EXPORT_SYMBOL(vsnprintf);
 
@@ -1058,6 +1243,372 @@ int sprintf(char * buf, const char *fmt, ...)
 }
 EXPORT_SYMBOL(sprintf);
 
+#ifdef CONFIG_BINARY_PRINTF
+/*
+ * bprintf service:
+ * vbin_printf() - VA arguments to binary data
+ * bstr_printf() - Binary data to text string
+ */
+
+/**
+ * vbin_printf - Parse a format string and place args' binary value in a buffer
+ * @bin_buf: The buffer to place args' binary value
+ * @size: The size of the buffer(by words(32bits), not characters)
+ * @fmt: The format string to use
+ * @args: Arguments for the format string
+ *
+ * The format follows C99 vsnprintf, except %n is ignored, and its argument
+ * is skiped.
+ *
+ * The return value is the number of words(32bits) which would be generated for
+ * the given input.
+ *
+ * NOTE:
+ * If the return value is greater than @size, the resulting bin_buf is NOT
+ * valid for bstr_printf().
+ */
+int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args)
+{
+	struct printf_spec spec = {0};
+	char *str, *end;
+	int read;
+
+	str = (char *)bin_buf;
+	end = (char *)(bin_buf + size);
+
+#define save_arg(type)							\
+do {									\
+	if (sizeof(type) == 8) {					\
+		unsigned long long value;				\
+		str = PTR_ALIGN(str, sizeof(u32));			\
+		value = va_arg(args, unsigned long long);		\
+		if (str + sizeof(type) <= end) {			\
+			*(u32 *)str = *(u32 *)&value;			\
+			*(u32 *)(str + 4) = *((u32 *)&value + 1);	\
+		}							\
+	} else {							\
+		unsigned long value;					\
+		str = PTR_ALIGN(str, sizeof(type));			\
+		value = va_arg(args, int);				\
+		if (str + sizeof(type) <= end)				\
+			*(typeof(type) *)str = (type)value;		\
+	}								\
+	str += sizeof(type);						\
+} while (0)
+
+
+	while (*fmt) {
+		read = format_decode(fmt, &spec);
+
+		fmt += read;
+
+		switch (spec.type) {
+		case FORMAT_TYPE_NONE:
+			break;
+
+		case FORMAT_TYPE_WIDTH:
+		case FORMAT_TYPE_PRECISION:
+			save_arg(int);
+			break;
+
+		case FORMAT_TYPE_CHAR:
+			save_arg(char);
+			break;
+
+		case FORMAT_TYPE_STR: {
+			const char *save_str = va_arg(args, char *);
+			size_t len;
+			if ((unsigned long)save_str > (unsigned long)-PAGE_SIZE
+					|| (unsigned long)save_str < PAGE_SIZE)
+				save_str = "<NULL>";
+			len = strlen(save_str);
+			if (str + len + 1 < end)
+				memcpy(str, save_str, len + 1);
+			str += len + 1;
+			break;
+		}
+
+		case FORMAT_TYPE_PTR:
+			save_arg(void *);
+			/* skip all alphanumeric pointer suffixes */
+			while (isalnum(*fmt))
+				fmt++;
+			break;
+
+		case FORMAT_TYPE_PERCENT_CHAR:
+			break;
+
+		case FORMAT_TYPE_INVALID:
+			if (!*fmt)
+				--fmt;
+			break;
+
+		case FORMAT_TYPE_NRCHARS: {
+			/* skip %n 's argument */
+			int qualifier = spec.qualifier;
+			void *skip_arg;
+			if (qualifier == 'l')
+				skip_arg = va_arg(args, long *);
+			else if (qualifier == 'Z' || qualifier == 'z')
+				skip_arg = va_arg(args, size_t *);
+			else
+				skip_arg = va_arg(args, int *);
+			break;
+		}
+
+		default:
+			switch (spec.type) {
+
+			case FORMAT_TYPE_LONG_LONG:
+				save_arg(long long);
+				break;
+			case FORMAT_TYPE_ULONG:
+			case FORMAT_TYPE_LONG:
+				save_arg(unsigned long);
+				break;
+			case FORMAT_TYPE_SIZE_T:
+				save_arg(size_t);
+				break;
+			case FORMAT_TYPE_PTRDIFF:
+				save_arg(ptrdiff_t);
+				break;
+			case FORMAT_TYPE_USHORT:
+			case FORMAT_TYPE_SHORT:
+				save_arg(short);
+				break;
+			default:
+				save_arg(int);
+			}
+		}
+	}
+	return (u32 *)(PTR_ALIGN(str, sizeof(u32))) - bin_buf;
+
+#undef save_arg
+}
+EXPORT_SYMBOL_GPL(vbin_printf);
+
+/**
+ * bstr_printf - Format a string from binary arguments and place it in a buffer
+ * @buf: The buffer to place the result into
+ * @size: The size of the buffer, including the trailing null space
+ * @fmt: The format string to use
+ * @bin_buf: Binary arguments for the format string
+ *
+ * This function like C99 vsnprintf, but the difference is that vsnprintf gets
+ * arguments from stack, and bstr_printf gets arguments from @bin_buf which is
+ * a binary buffer that generated by vbin_printf.
+ *
+ * The format follows C99 vsnprintf, but has some extensions:
+ * %pS output the name of a text symbol
+ * %pF output the name of a function pointer
+ * %pR output the address range in a struct resource
+ * %n is ignored
+ *
+ * The return value is the number of characters which would
+ * be generated for the given input, excluding the trailing
+ * '\0', as per ISO C99. If you want to have the exact
+ * number of characters written into @buf as return value
+ * (not including the trailing '\0'), use vscnprintf(). If the
+ * return is greater than or equal to @size, the resulting
+ * string is truncated.
+ */
+int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
+{
+	unsigned long long num;
+	char *str, *end, c;
+	const char *args = (const char *)bin_buf;
+
+	struct printf_spec spec = {0};
+
+	if (unlikely((int) size < 0)) {
+		/* There can be only one.. */
+		static char warn = 1;
+		WARN_ON(warn);
+		warn = 0;
+		return 0;
+	}
+
+	str = buf;
+	end = buf + size;
+
+#define get_arg(type)							\
+({									\
+	typeof(type) value;						\
+	if (sizeof(type) == 8) {					\
+		args = PTR_ALIGN(args, sizeof(u32));			\
+		*(u32 *)&value = *(u32 *)args;				\
+		*((u32 *)&value + 1) = *(u32 *)(args + 4);		\
+	} else {							\
+		args = PTR_ALIGN(args, sizeof(type));			\
+		value = *(typeof(type) *)args;				\
+	}								\
+	args += sizeof(type);						\
+	value;								\
+})
+
+	/* Make sure end is always >= buf */
+	if (end < buf) {
+		end = ((void *)-1);
+		size = end - buf;
+	}
+
+	while (*fmt) {
+		int read;
+		const char *old_fmt = fmt;
+
+		read = format_decode(fmt, &spec);
+
+		fmt += read;
+
+		switch (spec.type) {
+		case FORMAT_TYPE_NONE: {
+			int copy = read;
+			if (str < end) {
+				if (copy > end - str)
+					copy = end - str;
+				memcpy(str, old_fmt, copy);
+			}
+			str += read;
+			break;
+		}
+
+		case FORMAT_TYPE_WIDTH:
+			spec.field_width = get_arg(int);
+			break;
+
+		case FORMAT_TYPE_PRECISION:
+			spec.precision = get_arg(int);
+			break;
+
+		case FORMAT_TYPE_CHAR:
+			if (!(spec.flags & LEFT)) {
+				while (--spec.field_width > 0) {
+					if (str < end)
+						*str = ' ';
+					++str;
+				}
+			}
+			c = (unsigned char) get_arg(char);
+			if (str < end)
+				*str = c;
+			++str;
+			while (--spec.field_width > 0) {
+				if (str < end)
+					*str = ' ';
+				++str;
+			}
+			break;
+
+		case FORMAT_TYPE_STR: {
+			const char *str_arg = args;
+			size_t len = strlen(str_arg);
+			args += len + 1;
+			str = string(str, end, (char *)str_arg, spec);
+			break;
+		}
+
+		case FORMAT_TYPE_PTR:
+			str = pointer(fmt+1, str, end, get_arg(void *), spec);
+			while (isalnum(*fmt))
+				fmt++;
+			break;
+
+		case FORMAT_TYPE_PERCENT_CHAR:
+			if (str < end)
+				*str = '%';
+			++str;
+			break;
+
+		case FORMAT_TYPE_INVALID:
+			if (str < end)
+				*str = '%';
+			++str;
+			if (*fmt) {
+				if (str < end)
+					*str = *fmt;
+				++str;
+			} else {
+				--fmt;
+			}
+			break;
+
+		case FORMAT_TYPE_NRCHARS:
+			/* skip */
+			break;
+
+		default:
+			switch (spec.type) {
+
+			case FORMAT_TYPE_LONG_LONG:
+				num = get_arg(long long);
+				break;
+			case FORMAT_TYPE_ULONG:
+				num = get_arg(unsigned long);
+				break;
+			case FORMAT_TYPE_LONG:
+				num = get_arg(unsigned long);
+				break;
+			case FORMAT_TYPE_SIZE_T:
+				num = get_arg(size_t);
+				break;
+			case FORMAT_TYPE_PTRDIFF:
+				num = get_arg(ptrdiff_t);
+				break;
+			case FORMAT_TYPE_USHORT:
+				num = get_arg(unsigned short);
+				break;
+			case FORMAT_TYPE_SHORT:
+				num = get_arg(short);
+				break;
+			case FORMAT_TYPE_UINT:
+				num = get_arg(unsigned int);
+				break;
+			default:
+				num = get_arg(int);
+			}
+
+			str = number(str, end, num, spec);
+		}
+	}
+
+	if (size > 0) {
+		if (str < end)
+			*str = '\0';
+		else
+			end[-1] = '\0';
+	}
+
+#undef get_arg
+
+	/* the trailing null byte doesn't count towards the total */
+	return str - buf;
+}
+EXPORT_SYMBOL_GPL(bstr_printf);
+
+/**
+ * bprintf - Parse a format string and place args' binary value in a buffer
+ * @bin_buf: The buffer to place args' binary value
+ * @size: The size of the buffer(by words(32bits), not characters)
+ * @fmt: The format string to use
+ * @...: Arguments for the format string
+ *
+ * The function returns the number of words(u32) written
+ * into @bin_buf.
+ */
+int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...)
+{
+	va_list args;
+	int ret;
+
+	va_start(args, fmt);
+	ret = vbin_printf(bin_buf, size, fmt, args);
+	va_end(args);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(bprintf);
+
+#endif /* CONFIG_BINARY_PRINTF */
+
 /**
  * vsscanf - Unformat a buffer into a list of arguments
  * @buf:	input buffer
diff --git a/lib/zlib_inflate/inflate.h b/lib/zlib_inflate/inflate.h
index df8a6c92052d..3d17b3d1b21f 100644
--- a/lib/zlib_inflate/inflate.h
+++ b/lib/zlib_inflate/inflate.h
@@ -1,3 +1,6 @@
+#ifndef INFLATE_H
+#define INFLATE_H
+
 /* inflate.h -- internal inflate state definition
  * Copyright (C) 1995-2004 Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
@@ -105,3 +108,4 @@ struct inflate_state {
     unsigned short work[288];   /* work area for code table building */
     code codes[ENOUGH];         /* space for code tables */
 };
+#endif
diff --git a/lib/zlib_inflate/inftrees.h b/lib/zlib_inflate/inftrees.h
index 5f5219b1240e..b70b4731ac7a 100644
--- a/lib/zlib_inflate/inftrees.h
+++ b/lib/zlib_inflate/inftrees.h
@@ -1,3 +1,6 @@
+#ifndef INFTREES_H
+#define INFTREES_H
+
 /* inftrees.h -- header to use inftrees.c
  * Copyright (C) 1995-2005 Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
@@ -53,3 +56,4 @@ typedef enum {
 extern int zlib_inflate_table (codetype type, unsigned short *lens,
                              unsigned codes, code **table,
                              unsigned *bits, unsigned short *work);
+#endif