15 files changed, 408 insertions, 20 deletions
diff --git a/Documentation/DocBook/mtdnand.tmpl b/Documentation/DocBook/mtdnand.tmpl
index f508a8a27fea..5e7d84b48505 100644
--- a/Documentation/DocBook/mtdnand.tmpl
+++ b/Documentation/DocBook/mtdnand.tmpl
@@ -174,7 +174,7 @@
 		</para>
 		<programlisting>
 static struct mtd_info *board_mtd;
-static unsigned long baseaddr;
+static void __iomem *baseaddr;
 		</programlisting>
 		<para>
 			Static example
@@ -182,7 +182,7 @@ static unsigned long baseaddr;
 		<programlisting>
 static struct mtd_info board_mtd;
 static struct nand_chip board_chip;
-static unsigned long baseaddr;
+static void __iomem *baseaddr;
 		</programlisting>
 	</sect1>
 	<sect1 id="Partition_defines">
@@ -283,8 +283,8 @@ int __init board_init (void)
 	}
 
 	/* map physical address */
-	baseaddr = (unsigned long)ioremap(CHIP_PHYSICAL_ADDRESS, 1024);
-	if(!baseaddr){
+	baseaddr = ioremap(CHIP_PHYSICAL_ADDRESS, 1024);
+	if (!baseaddr) {
 		printk("Ioremap to access NAND chip failed\n");
 		err = -EIO;
 		goto out_mtd;
@@ -316,7 +316,7 @@ int __init board_init (void)
 	goto out;
 
 out_ior:
-	iounmap((void *)baseaddr);
+	iounmap(baseaddr);
 out_mtd:
 	kfree (board_mtd);
 out:
@@ -341,7 +341,7 @@ static void __exit board_cleanup (void)
 	nand_release (board_mtd);
 
 	/* unmap physical address */
-	iounmap((void *)baseaddr);
+	iounmap(baseaddr);
 	
 	/* Free the MTD device structure */
 	kfree (board_mtd);
diff --git a/Documentation/IO-mapping.txt b/Documentation/IO-mapping.txt
index 78a440695e11..1b5aa10df845 100644
--- a/Documentation/IO-mapping.txt
+++ b/Documentation/IO-mapping.txt
@@ -157,7 +157,7 @@ For such memory, you can do things like
 	 * access only the 640k-1MB area, so anything else
 	 * has to be remapped.
 	 */
-	char * baseptr = ioremap(0xFC000000, 1024*1024);
+	void __iomem *baseptr = ioremap(0xFC000000, 1024*1024);
 
 	/* write a 'A' to the offset 10 of the area */
 	writeb('A',baseptr+10);
diff --git a/Documentation/DMA-mapping.txt b/Documentation/PCI/PCI-DMA-mapping.txt
index ecad88d9fe59..ecad88d9fe59 100644
--- a/Documentation/DMA-mapping.txt
+++ b/Documentation/PCI/PCI-DMA-mapping.txt
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index 8d2158a1c6aa..6fab97ea7e6b 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -186,7 +186,7 @@ a virtual address mapping (unlike the earlier scheme of virtual address
 do not have a corresponding kernel virtual address space mapping) and
 low-memory pages.
 
-Note: Please refer to Documentation/DMA-mapping.txt for a discussion
+Note: Please refer to Documentation/PCI/PCI-DMA-mapping.txt for a discussion
 on PCI high mem DMA aspects and mapping of scatter gather lists, and support
 for 64 bit PCI.
 
diff --git a/Documentation/cpu-freq/governors.txt b/Documentation/cpu-freq/governors.txt
index aed082f49d09..737988fca64d 100644
--- a/Documentation/cpu-freq/governors.txt
+++ b/Documentation/cpu-freq/governors.txt
@@ -145,8 +145,8 @@ show_sampling_rate_max: THIS INTERFACE IS DEPRECATED, DON'T USE IT.
 up_threshold: defines what the average CPU usage between the samplings
 of 'sampling_rate' needs to be for the kernel to make a decision on
 whether it should increase the frequency.  For example when it is set
-to its default value of '80' it means that between the checking
-intervals the CPU needs to be on average more than 80% in use to then
+to its default value of '95' it means that between the checking
+intervals the CPU needs to be on average more than 95% in use to then
 decide that the CPU frequency needs to be increased.  
 
 ignore_nice_load: this parameter takes a value of '0' or '1'. When
diff --git a/Documentation/dontdiff b/Documentation/dontdiff
index 3ad6acead949..d9bcffd59433 100644
--- a/Documentation/dontdiff
+++ b/Documentation/dontdiff
@@ -69,7 +69,6 @@ av_permissions.h
 bbootsect
 bin2c
 binkernel.spec
-binoffset
 bootsect
 bounds.h
 bsetup
diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX
index 875d49696b6e..5139b8c9d5af 100644
--- a/Documentation/filesystems/00-INDEX
+++ b/Documentation/filesystems/00-INDEX
@@ -62,6 +62,8 @@ jfs.txt
 	- info and mount options for the JFS filesystem.
 locks.txt
 	- info on file locking implementations, flock() vs. fcntl(), etc.
+logfs.txt
+	- info on the LogFS flash filesystem.
 mandatory-locking.txt
 	- info on the Linux implementation of Sys V mandatory file locking.
 ncpfs.txt
diff --git a/Documentation/filesystems/ceph.txt b/Documentation/filesystems/ceph.txt
new file mode 100644
index 000000000000..6e03917316bd
--- /dev/null
+++ b/Documentation/filesystems/ceph.txt
@@ -0,0 +1,139 @@
+Ceph Distributed File System
+============================
+
+Ceph is a distributed network file system designed to provide good
+performance, reliability, and scalability.
+
+Basic features include:
+
+ * POSIX semantics
+ * Seamless scaling from 1 to many thousands of nodes
+ * High availability and reliability.  No single points of failure.
+ * N-way replication of data across storage nodes
+ * Fast recovery from node failures
+ * Automatic rebalancing of data on node addition/removal
+ * Easy deployment: most FS components are userspace daemons
+
+Also,
+ * Flexible snapshots (on any directory)
+ * Recursive accounting (nested files, directories, bytes)
+
+In contrast to cluster filesystems like GFS, OCFS2, and GPFS that rely
+on symmetric access by all clients to shared block devices, Ceph
+separates data and metadata management into independent server
+clusters, similar to Lustre.  Unlike Lustre, however, metadata and
+storage nodes run entirely as user space daemons.  Storage nodes
+utilize btrfs to store data objects, leveraging its advanced features
+(checksumming, metadata replication, etc.).  File data is striped
+across storage nodes in large chunks to distribute workload and
+facilitate high throughputs.  When storage nodes fail, data is
+re-replicated in a distributed fashion by the storage nodes themselves
+(with some minimal coordination from a cluster monitor), making the
+system extremely efficient and scalable.
+
+Metadata servers effectively form a large, consistent, distributed
+in-memory cache above the file namespace that is extremely scalable,
+dynamically redistributes metadata in response to workload changes,
+and can tolerate arbitrary (well, non-Byzantine) node failures.  The
+metadata server takes a somewhat unconventional approach to metadata
+storage to significantly improve performance for common workloads.  In
+particular, inodes with only a single link are embedded in
+directories, allowing entire directories of dentries and inodes to be
+loaded into its cache with a single I/O operation.  The contents of
+extremely large directories can be fragmented and managed by
+independent metadata servers, allowing scalable concurrent access.
+
+The system offers automatic data rebalancing/migration when scaling
+from a small cluster of just a few nodes to many hundreds, without
+requiring an administrator carve the data set into static volumes or
+go through the tedious process of migrating data between servers.
+When the file system approaches full, new nodes can be easily added
+and things will "just work."
+
+Ceph includes flexible snapshot mechanism that allows a user to create
+a snapshot on any subdirectory (and its nested contents) in the
+system.  Snapshot creation and deletion are as simple as 'mkdir
+.snap/foo' and 'rmdir .snap/foo'.
+
+Ceph also provides some recursive accounting on directories for nested
+files and bytes.  That is, a 'getfattr -d foo' on any directory in the
+system will reveal the total number of nested regular files and
+subdirectories, and a summation of all nested file sizes.  This makes
+the identification of large disk space consumers relatively quick, as
+no 'du' or similar recursive scan of the file system is required.
+
+
+Mount Syntax
+============
+
+The basic mount syntax is:
+
+ # mount -t ceph monip[:port][,monip2[:port]...]:/[subdir] mnt
+
+You only need to specify a single monitor, as the client will get the
+full list when it connects.  (However, if the monitor you specify
+happens to be down, the mount won't succeed.)  The port can be left
+off if the monitor is using the default.  So if the monitor is at
+1.2.3.4,
+
+ # mount -t ceph 1.2.3.4:/ /mnt/ceph
+
+is sufficient.  If /sbin/mount.ceph is installed, a hostname can be
+used instead of an IP address.
+
+
+
+Mount Options
+=============
+
+  ip=A.B.C.D[:N]
+	Specify the IP and/or port the client should bind to locally.
+	There is normally not much reason to do this.  If the IP is not
+	specified, the client's IP address is determined by looking at the
+	address it's connection to the monitor originates from.
+
+  wsize=X
+	Specify the maximum write size in bytes.  By default there is no
+	maximu.  Ceph will normally size writes based on the file stripe
+	size.
+
+  rsize=X
+	Specify the maximum readahead.
+
+  mount_timeout=X
+	Specify the timeout value for mount (in seconds), in the case
+	of a non-responsive Ceph file system.  The default is 30
+	seconds.
+
+  rbytes
+	When stat() is called on a directory, set st_size to 'rbytes',
+	the summation of file sizes over all files nested beneath that
+	directory.  This is the default.
+
+  norbytes
+	When stat() is called on a directory, set st_size to the
+	number of entries in that directory.
+
+  nocrc
+	Disable CRC32C calculation for data writes.  If set, the OSD
+	must rely on TCP's error correction to detect data corruption
+	in the data payload.
+
+  noasyncreaddir
+	Disable client's use its local cache to satisfy	readdir
+	requests.  (This does not change correctness; the client uses
+	cached metadata only when a lease or capability ensures it is
+	valid.)
+
+
+More Information
+================
+
+For more information on Ceph, see the home page at
+	http://ceph.newdream.net/
+
+The Linux kernel client source tree is available at
+	git://ceph.newdream.net/linux-ceph-client.git
+
+and the source for the full system is at
+	git://ceph.newdream.net/ceph.git
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
index af6885c3c821..e1def1786e50 100644
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -196,7 +196,7 @@ nobarrier		This also requires an IO stack which can support
 			also be used to enable or disable barriers, for
 			consistency with other ext4 mount options.
 
-inode_readahead=n	This tuning parameter controls the maximum
+inode_readahead_blks=n	This tuning parameter controls the maximum
 			number of inode table blocks that ext4's inode
 			table readahead algorithm will pre-read into
 			the buffer cache.  The default value is 32 blocks.
diff --git a/Documentation/filesystems/logfs.txt b/Documentation/filesystems/logfs.txt
new file mode 100644
index 000000000000..e64c94ba401a
--- /dev/null
+++ b/Documentation/filesystems/logfs.txt
@@ -0,0 +1,241 @@
+
+The LogFS Flash Filesystem
+==========================
+
+Specification
+=============
+
+Superblocks
+-----------
+
+Two superblocks exist at the beginning and end of the filesystem.
+Each superblock is 256 Bytes large, with another 3840 Bytes reserved
+for future purposes, making a total of 4096 Bytes.
+
+Superblock locations may differ for MTD and block devices.  On MTD the
+first non-bad block contains a superblock in the first 4096 Bytes and
+the last non-bad block contains a superblock in the last 4096 Bytes.
+On block devices, the first 4096 Bytes of the device contain the first
+superblock and the last aligned 4096 Byte-block contains the second
+superblock.
+
+For the most part, the superblocks can be considered read-only.  They
+are written only to correct errors detected within the superblocks,
+move the journal and change the filesystem parameters through tunefs.
+As a result, the superblock does not contain any fields that require
+constant updates, like the amount of free space, etc.
+
+Segments
+--------
+
+The space in the device is split up into equal-sized segments.
+Segments are the primary write unit of LogFS.  Within each segments,
+writes happen from front (low addresses) to back (high addresses.  If
+only a partial segment has been written, the segment number, the
+current position within and optionally a write buffer are stored in
+the journal.
+
+Segments are erased as a whole.  Therefore Garbage Collection may be
+required to completely free a segment before doing so.
+
+Journal
+--------
+
+The journal contains all global information about the filesystem that
+is subject to frequent change.  At mount time, it has to be scanned
+for the most recent commit entry, which contains a list of pointers to
+all currently valid entries.
+
+Object Store
+------------
+
+All space except for the superblocks and journal is part of the object
+store.  Each segment contains a segment header and a number of
+objects, each consisting of the object header and the payload.
+Objects are either inodes, directory entries (dentries), file data
+blocks or indirect blocks.
+
+Levels
+------
+
+Garbage collection (GC) may fail if all data is written
+indiscriminately.  One requirement of GC is that data is seperated
+roughly according to the distance between the tree root and the data.
+Effectively that means all file data is on level 0, indirect blocks
+are on levels 1, 2, 3 4 or 5 for 1x, 2x, 3x, 4x or 5x indirect blocks,
+respectively.  Inode file data is on level 6 for the inodes and 7-11
+for indirect blocks.
+
+Each segment contains objects of a single level only.  As a result,
+each level requires its own seperate segment to be open for writing.
+
+Inode File
+----------
+
+All inodes are stored in a special file, the inode file.  Single
+exception is the inode file's inode (master inode) which for obvious
+reasons is stored in the journal instead.  Instead of data blocks, the
+leaf nodes of the inode files are inodes.
+
+Aliases
+-------
+
+Writes in LogFS are done by means of a wandering tree.  A naïve
+implementation would require that for each write or a block, all
+parent blocks are written as well, since the block pointers have
+changed.  Such an implementation would not be very efficient.
+
+In LogFS, the block pointer changes are cached in the journal by means
+of alias entries.  Each alias consists of its logical address - inode
+number, block index, level and child number (index into block) - and
+the changed data.  Any 8-byte word can be changes in this manner.
+
+Currently aliases are used for block pointers, file size, file used
+bytes and the height of an inodes indirect tree.
+
+Segment Aliases
+---------------
+
+Related to regular aliases, these are used to handle bad blocks.
+Initially, bad blocks are handled by moving the affected segment
+content to a spare segment and noting this move in the journal with a
+segment alias, a simple (to, from) tupel.  GC will later empty this
+segment and the alias can be removed again.  This is used on MTD only.
+
+Vim
+---
+
+By cleverly predicting the life time of data, it is possible to
+seperate long-living data from short-living data and thereby reduce
+the GC overhead later.  Each type of distinc life expectency (vim) can
+have a seperate segment open for writing.  Each (level, vim) tupel can
+be open just once.  If an open segment with unknown vim is encountered
+at mount time, it is closed and ignored henceforth.
+
+Indirect Tree
+-------------
+
+Inodes in LogFS are similar to FFS-style filesystems with direct and
+indirect block pointers.  One difference is that LogFS uses a single
+indirect pointer that can be either a 1x, 2x, etc. indirect pointer.
+A height field in the inode defines the height of the indirect tree
+and thereby the indirection of the pointer.
+
+Another difference is the addressing of indirect blocks.  In LogFS,
+the first 16 pointers in the first indirect block are left empty,
+corresponding to the 16 direct pointers in the inode.  In ext2 (maybe
+others as well) the first pointer in the first indirect block
+corresponds to logical block 12, skipping the 12 direct pointers.
+So where ext2 is using arithmetic to better utilize space, LogFS keeps
+arithmetic simple and uses compression to save space.
+
+Compression
+-----------
+
+Both file data and metadata can be compressed.  Compression for file
+data can be enabled with chattr +c and disabled with chattr -c.  Doing
+so has no effect on existing data, but new data will be stored
+accordingly.  New inodes will inherit the compression flag of the
+parent directory.
+
+Metadata is always compressed.  However, the space accounting ignores
+this and charges for the uncompressed size.  Failing to do so could
+result in GC failures when, after moving some data, indirect blocks
+compress worse than previously.  Even on a 100% full medium, GC may
+not consume any extra space, so the compression gains are lost space
+to the user.
+
+However, they are not lost space to the filesystem internals.  By
+cheating the user for those bytes, the filesystem gained some slack
+space and GC will run less often and faster.
+
+Garbage Collection and Wear Leveling
+------------------------------------
+
+Garbage collection is invoked whenever the number of free segments
+falls below a threshold.  The best (known) candidate is picked based
+on the least amount of valid data contained in the segment.  All
+remaining valid data is copied elsewhere, thereby invalidating it.
+
+The GC code also checks for aliases and writes then back if their
+number gets too large.
+
+Wear leveling is done by occasionally picking a suboptimal segment for
+garbage collection.  If a stale segments erase count is significantly
+lower than the active segments' erase counts, it will be picked.  Wear
+leveling is rate limited, so it will never monopolize the device for
+more than one segment worth at a time.
+
+Values for "occasionally", "significantly lower" are compile time
+constants.
+
+Hashed directories
+------------------
+
+To satisfy efficient lookup(), directory entries are hashed and
+located based on the hash.  In order to both support large directories
+and not be overly inefficient for small directories, several hash
+tables of increasing size are used.  For each table, the hash value
+modulo the table size gives the table index.
+
+Tables sizes are chosen to limit the number of indirect blocks with a
+fully populated table to 0, 1, 2 or 3 respectively.  So the first
+table contains 16 entries, the second 512-16, etc.
+
+The last table is special in several ways.  First its size depends on
+the effective 32bit limit on telldir/seekdir cookies.  Since logfs
+uses the upper half of the address space for indirect blocks, the size
+is limited to 2^31.  Secondly the table contains hash buckets with 16
+entries each.
+
+Using single-entry buckets would result in birthday "attacks".  At
+just 2^16 used entries, hash collisions would be likely (P >= 0.5).
+My math skills are insufficient to do the combinatorics for the 17x
+collisions necessary to overflow a bucket, but testing showed that in
+10,000 runs the lowest directory fill before a bucket overflow was
+188,057,130 entries with an average of 315,149,915 entries.  So for
+directory sizes of up to a million, bucket overflows should be
+virtually impossible under normal circumstances.
+
+With carefully chosen filenames, it is obviously possible to cause an
+overflow with just 21 entries (4 higher tables + 16 entries + 1).  So
+there may be a security concern if a malicious user has write access
+to a directory.
+
+Open For Discussion
+===================
+
+Device Address Space
+--------------------
+
+A device address space is used for caching.  Both block devices and
+MTD provide functions to either read a single page or write a segment.
+Partial segments may be written for data integrity, but where possible
+complete segments are written for performance on simple block device
+flash media.
+
+Meta Inodes
+-----------
+
+Inodes are stored in the inode file, which is just a regular file for
+most purposes.  At umount time, however, the inode file needs to
+remain open until all dirty inodes are written.  So
+generic_shutdown_super() may not close this inode, but shouldn't
+complain about remaining inodes due to the inode file either.  Same
+goes for mapping inode of the device address space.
+
+Currently logfs uses a hack that essentially copies part of fs/inode.c
+code over.  A general solution would be preferred.
+
+Indirect block mapping
+----------------------
+
+With compression, the block device (or mapping inode) cannot be used
+to cache indirect blocks.  Some other place is required.  Currently
+logfs uses the top half of each inode's address space.  The low 8TB
+(on 32bit) are filled with file data, the high 8TB are used for
+indirect blocks.
+
+One problem is that 16TB files created on 64bit systems actually have
+data in the top 8TB.  But files >16TB would cause problems anyway, so
+only the limit has changed.
diff --git a/Documentation/hwmon/k10temp b/Documentation/hwmon/k10temp
index a7a18d453a51..6526eee525a6 100644
--- a/Documentation/hwmon/k10temp
+++ b/Documentation/hwmon/k10temp
@@ -3,8 +3,8 @@ Kernel driver k10temp
 
 Supported chips:
 * AMD Family 10h processors:
-  Socket F: Quad-Core/Six-Core/Embedded Opteron
-  Socket AM2+: Opteron, Phenom (II) X3/X4
+  Socket F: Quad-Core/Six-Core/Embedded Opteron (but see below)
+  Socket AM2+: Quad-Core Opteron, Phenom (II) X3/X4, Athlon X2 (but see below)
   Socket AM3: Quad-Core Opteron, Athlon/Phenom II X2/X3/X4, Sempron II
   Socket S1G3: Athlon II, Sempron, Turion II
 * AMD Family 11h processors:
@@ -36,10 +36,15 @@ Description
 This driver permits reading of the internal temperature sensor of AMD
 Family 10h and 11h processors.
 
-All these processors have a sensor, but on older revisions of Family 10h
-processors, the sensor may return inconsistent values (erratum 319). The
-driver will refuse to load on these revisions unless you specify the
-"force=1" module parameter.
+All these processors have a sensor, but on those for Socket F or AM2+,
+the sensor may return inconsistent values (erratum 319).  The driver
+will refuse to load on these revisions unless you specify the "force=1"
+module parameter.
+
+Due to technical reasons, the driver can detect only the mainboard's
+socket type, not the processor's actual capabilities.  Therefore, if you
+are using an AM3 processor on an AM2+ mainboard, you can safely use the
+"force=1" parameter.
 
 There is one temperature measurement value, available as temp1_input in
 sysfs. It is measured in degrees Celsius with a resolution of 1/8th degree.
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 947374977ca5..91cfdd76131e 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -182,6 +182,7 @@ Code	Seq#	Include File		Comments
 					<http://www.proximity.com.au/~brian/winradio/>
 0x90	00	drivers/cdrom/sbpcd.h
 0x93	60-7F	linux/auto_fs.h
+0x97    00-7F   fs/ceph/ioctl.h         Ceph file system
 0x99	00-0F				537-Addinboard driver
 					<mailto:buk@buks.ipn.de>
 0xA0	all	linux/sdp/sdp.h		Industrial Device Project
diff --git a/Documentation/sound/alsa/Procfile.txt b/Documentation/sound/alsa/Procfile.txt
index 719a819f8cc2..07301de12cc4 100644
--- a/Documentation/sound/alsa/Procfile.txt
+++ b/Documentation/sound/alsa/Procfile.txt
@@ -95,7 +95,7 @@ card*/pcm*/xrun_debug
 	It takes an integer value, can be changed by writing to this
 	file, such as
 
-		 # cat 5 > /proc/asound/card0/pcm0p/xrun_debug
+		 # echo 5 > /proc/asound/card0/pcm0p/xrun_debug
 
 	The value consists of the following bit flags:
 	  bit 0 = Enable XRUN/jiffies debug messages
diff --git a/Documentation/vgaarbiter.txt b/Documentation/vgaarbiter.txt
index 987f9b0a5ece..43a9b0694fdd 100644
--- a/Documentation/vgaarbiter.txt
+++ b/Documentation/vgaarbiter.txt
@@ -103,7 +103,7 @@ I.2 libpciaccess
 ----------------
 
 To use the vga arbiter char device it was implemented an API inside the
-libpciaccess library. One fieldd was added to struct pci_device (each device
+libpciaccess library. One field was added to struct pci_device (each device
 on the system):
 
     /* the type of resource decoded by the device */
diff --git a/Documentation/video4linux/gspca.txt b/Documentation/video4linux/gspca.txt
index 1800a62cf135..98ee599b4eb8 100644
--- a/Documentation/video4linux/gspca.txt
+++ b/Documentation/video4linux/gspca.txt
@@ -142,6 +142,7 @@ sunplus		04fc:5360	Sunplus Generic
 spca500		04fc:7333	PalmPixDC85
 sunplus		04fc:ffff	Pure DigitalDakota
 spca501		0506:00df	3Com HomeConnect Lite
+sunplus		052b:1507	Megapixel 5 Pretec DC-1007
 sunplus		052b:1513	Megapix V4
 sunplus		052b:1803	MegaImage VI
 tv8532		0545:808b	Veo Stingray