diff options
Diffstat (limited to 'Documentation')
-rw-r--r-- | Documentation/DocBook/mtdnand.tmpl | 12 | ||||
-rw-r--r-- | Documentation/IO-mapping.txt | 2 | ||||
-rw-r--r-- | Documentation/PCI/PCI-DMA-mapping.txt (renamed from Documentation/DMA-mapping.txt) | 0 | ||||
-rw-r--r-- | Documentation/block/biodoc.txt | 2 | ||||
-rw-r--r-- | Documentation/cpu-freq/governors.txt | 4 | ||||
-rw-r--r-- | Documentation/dontdiff | 1 | ||||
-rw-r--r-- | Documentation/filesystems/00-INDEX | 2 | ||||
-rw-r--r-- | Documentation/filesystems/ceph.txt | 139 | ||||
-rw-r--r-- | Documentation/filesystems/ext4.txt | 2 | ||||
-rw-r--r-- | Documentation/filesystems/logfs.txt | 241 | ||||
-rw-r--r-- | Documentation/hwmon/k10temp | 17 | ||||
-rw-r--r-- | Documentation/ioctl/ioctl-number.txt | 1 | ||||
-rw-r--r-- | Documentation/sound/alsa/Procfile.txt | 2 | ||||
-rw-r--r-- | Documentation/vgaarbiter.txt | 2 | ||||
-rw-r--r-- | Documentation/video4linux/gspca.txt | 1 |
15 files changed, 408 insertions, 20 deletions
diff --git a/Documentation/DocBook/mtdnand.tmpl b/Documentation/DocBook/mtdnand.tmpl index f508a8a27fea..5e7d84b48505 100644 --- a/Documentation/DocBook/mtdnand.tmpl +++ b/Documentation/DocBook/mtdnand.tmpl @@ -174,7 +174,7 @@ </para> <programlisting> static struct mtd_info *board_mtd; -static unsigned long baseaddr; +static void __iomem *baseaddr; </programlisting> <para> Static example @@ -182,7 +182,7 @@ static unsigned long baseaddr; <programlisting> static struct mtd_info board_mtd; static struct nand_chip board_chip; -static unsigned long baseaddr; +static void __iomem *baseaddr; </programlisting> </sect1> <sect1 id="Partition_defines"> @@ -283,8 +283,8 @@ int __init board_init (void) } /* map physical address */ - baseaddr = (unsigned long)ioremap(CHIP_PHYSICAL_ADDRESS, 1024); - if(!baseaddr){ + baseaddr = ioremap(CHIP_PHYSICAL_ADDRESS, 1024); + if (!baseaddr) { printk("Ioremap to access NAND chip failed\n"); err = -EIO; goto out_mtd; @@ -316,7 +316,7 @@ int __init board_init (void) goto out; out_ior: - iounmap((void *)baseaddr); + iounmap(baseaddr); out_mtd: kfree (board_mtd); out: @@ -341,7 +341,7 @@ static void __exit board_cleanup (void) nand_release (board_mtd); /* unmap physical address */ - iounmap((void *)baseaddr); + iounmap(baseaddr); /* Free the MTD device structure */ kfree (board_mtd); diff --git a/Documentation/IO-mapping.txt b/Documentation/IO-mapping.txt index 78a440695e11..1b5aa10df845 100644 --- a/Documentation/IO-mapping.txt +++ b/Documentation/IO-mapping.txt @@ -157,7 +157,7 @@ For such memory, you can do things like * access only the 640k-1MB area, so anything else * has to be remapped. */ - char * baseptr = ioremap(0xFC000000, 1024*1024); + void __iomem *baseptr = ioremap(0xFC000000, 1024*1024); /* write a 'A' to the offset 10 of the area */ writeb('A',baseptr+10); diff --git a/Documentation/DMA-mapping.txt b/Documentation/PCI/PCI-DMA-mapping.txt index ecad88d9fe59..ecad88d9fe59 100644 --- a/Documentation/DMA-mapping.txt +++ b/Documentation/PCI/PCI-DMA-mapping.txt diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt index 8d2158a1c6aa..6fab97ea7e6b 100644 --- a/Documentation/block/biodoc.txt +++ b/Documentation/block/biodoc.txt @@ -186,7 +186,7 @@ a virtual address mapping (unlike the earlier scheme of virtual address do not have a corresponding kernel virtual address space mapping) and low-memory pages. -Note: Please refer to Documentation/DMA-mapping.txt for a discussion +Note: Please refer to Documentation/PCI/PCI-DMA-mapping.txt for a discussion on PCI high mem DMA aspects and mapping of scatter gather lists, and support for 64 bit PCI. diff --git a/Documentation/cpu-freq/governors.txt b/Documentation/cpu-freq/governors.txt index aed082f49d09..737988fca64d 100644 --- a/Documentation/cpu-freq/governors.txt +++ b/Documentation/cpu-freq/governors.txt @@ -145,8 +145,8 @@ show_sampling_rate_max: THIS INTERFACE IS DEPRECATED, DON'T USE IT. up_threshold: defines what the average CPU usage between the samplings of 'sampling_rate' needs to be for the kernel to make a decision on whether it should increase the frequency. For example when it is set -to its default value of '80' it means that between the checking -intervals the CPU needs to be on average more than 80% in use to then +to its default value of '95' it means that between the checking +intervals the CPU needs to be on average more than 95% in use to then decide that the CPU frequency needs to be increased. ignore_nice_load: this parameter takes a value of '0' or '1'. When diff --git a/Documentation/dontdiff b/Documentation/dontdiff index 3ad6acead949..d9bcffd59433 100644 --- a/Documentation/dontdiff +++ b/Documentation/dontdiff @@ -69,7 +69,6 @@ av_permissions.h bbootsect bin2c binkernel.spec -binoffset bootsect bounds.h bsetup diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX index 875d49696b6e..5139b8c9d5af 100644 --- a/Documentation/filesystems/00-INDEX +++ b/Documentation/filesystems/00-INDEX @@ -62,6 +62,8 @@ jfs.txt - info and mount options for the JFS filesystem. locks.txt - info on file locking implementations, flock() vs. fcntl(), etc. +logfs.txt + - info on the LogFS flash filesystem. mandatory-locking.txt - info on the Linux implementation of Sys V mandatory file locking. ncpfs.txt diff --git a/Documentation/filesystems/ceph.txt b/Documentation/filesystems/ceph.txt new file mode 100644 index 000000000000..6e03917316bd --- /dev/null +++ b/Documentation/filesystems/ceph.txt @@ -0,0 +1,139 @@ +Ceph Distributed File System +============================ + +Ceph is a distributed network file system designed to provide good +performance, reliability, and scalability. + +Basic features include: + + * POSIX semantics + * Seamless scaling from 1 to many thousands of nodes + * High availability and reliability. No single points of failure. + * N-way replication of data across storage nodes + * Fast recovery from node failures + * Automatic rebalancing of data on node addition/removal + * Easy deployment: most FS components are userspace daemons + +Also, + * Flexible snapshots (on any directory) + * Recursive accounting (nested files, directories, bytes) + +In contrast to cluster filesystems like GFS, OCFS2, and GPFS that rely +on symmetric access by all clients to shared block devices, Ceph +separates data and metadata management into independent server +clusters, similar to Lustre. Unlike Lustre, however, metadata and +storage nodes run entirely as user space daemons. Storage nodes +utilize btrfs to store data objects, leveraging its advanced features +(checksumming, metadata replication, etc.). File data is striped +across storage nodes in large chunks to distribute workload and +facilitate high throughputs. When storage nodes fail, data is +re-replicated in a distributed fashion by the storage nodes themselves +(with some minimal coordination from a cluster monitor), making the +system extremely efficient and scalable. + +Metadata servers effectively form a large, consistent, distributed +in-memory cache above the file namespace that is extremely scalable, +dynamically redistributes metadata in response to workload changes, +and can tolerate arbitrary (well, non-Byzantine) node failures. The +metadata server takes a somewhat unconventional approach to metadata +storage to significantly improve performance for common workloads. In +particular, inodes with only a single link are embedded in +directories, allowing entire directories of dentries and inodes to be +loaded into its cache with a single I/O operation. The contents of +extremely large directories can be fragmented and managed by +independent metadata servers, allowing scalable concurrent access. + +The system offers automatic data rebalancing/migration when scaling +from a small cluster of just a few nodes to many hundreds, without +requiring an administrator carve the data set into static volumes or +go through the tedious process of migrating data between servers. +When the file system approaches full, new nodes can be easily added +and things will "just work." + +Ceph includes flexible snapshot mechanism that allows a user to create +a snapshot on any subdirectory (and its nested contents) in the +system. Snapshot creation and deletion are as simple as 'mkdir +.snap/foo' and 'rmdir .snap/foo'. + +Ceph also provides some recursive accounting on directories for nested +files and bytes. That is, a 'getfattr -d foo' on any directory in the +system will reveal the total number of nested regular files and +subdirectories, and a summation of all nested file sizes. This makes +the identification of large disk space consumers relatively quick, as +no 'du' or similar recursive scan of the file system is required. + + +Mount Syntax +============ + +The basic mount syntax is: + + # mount -t ceph monip[:port][,monip2[:port]...]:/[subdir] mnt + +You only need to specify a single monitor, as the client will get the +full list when it connects. (However, if the monitor you specify +happens to be down, the mount won't succeed.) The port can be left +off if the monitor is using the default. So if the monitor is at +1.2.3.4, + + # mount -t ceph 1.2.3.4:/ /mnt/ceph + +is sufficient. If /sbin/mount.ceph is installed, a hostname can be +used instead of an IP address. + + + +Mount Options +============= + + ip=A.B.C.D[:N] + Specify the IP and/or port the client should bind to locally. + There is normally not much reason to do this. If the IP is not + specified, the client's IP address is determined by looking at the + address it's connection to the monitor originates from. + + wsize=X + Specify the maximum write size in bytes. By default there is no + maximu. Ceph will normally size writes based on the file stripe + size. + + rsize=X + Specify the maximum readahead. + + mount_timeout=X + Specify the timeout value for mount (in seconds), in the case + of a non-responsive Ceph file system. The default is 30 + seconds. + + rbytes + When stat() is called on a directory, set st_size to 'rbytes', + the summation of file sizes over all files nested beneath that + directory. This is the default. + + norbytes + When stat() is called on a directory, set st_size to the + number of entries in that directory. + + nocrc + Disable CRC32C calculation for data writes. If set, the OSD + must rely on TCP's error correction to detect data corruption + in the data payload. + + noasyncreaddir + Disable client's use its local cache to satisfy readdir + requests. (This does not change correctness; the client uses + cached metadata only when a lease or capability ensures it is + valid.) + + +More Information +================ + +For more information on Ceph, see the home page at + http://ceph.newdream.net/ + +The Linux kernel client source tree is available at + git://ceph.newdream.net/linux-ceph-client.git + +and the source for the full system is at + git://ceph.newdream.net/ceph.git diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index af6885c3c821..e1def1786e50 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt @@ -196,7 +196,7 @@ nobarrier This also requires an IO stack which can support also be used to enable or disable barriers, for consistency with other ext4 mount options. -inode_readahead=n This tuning parameter controls the maximum +inode_readahead_blks=n This tuning parameter controls the maximum number of inode table blocks that ext4's inode table readahead algorithm will pre-read into the buffer cache. The default value is 32 blocks. diff --git a/Documentation/filesystems/logfs.txt b/Documentation/filesystems/logfs.txt new file mode 100644 index 000000000000..e64c94ba401a --- /dev/null +++ b/Documentation/filesystems/logfs.txt @@ -0,0 +1,241 @@ + +The LogFS Flash Filesystem +========================== + +Specification +============= + +Superblocks +----------- + +Two superblocks exist at the beginning and end of the filesystem. +Each superblock is 256 Bytes large, with another 3840 Bytes reserved +for future purposes, making a total of 4096 Bytes. + +Superblock locations may differ for MTD and block devices. On MTD the +first non-bad block contains a superblock in the first 4096 Bytes and +the last non-bad block contains a superblock in the last 4096 Bytes. +On block devices, the first 4096 Bytes of the device contain the first +superblock and the last aligned 4096 Byte-block contains the second +superblock. + +For the most part, the superblocks can be considered read-only. They +are written only to correct errors detected within the superblocks, +move the journal and change the filesystem parameters through tunefs. +As a result, the superblock does not contain any fields that require +constant updates, like the amount of free space, etc. + +Segments +-------- + +The space in the device is split up into equal-sized segments. +Segments are the primary write unit of LogFS. Within each segments, +writes happen from front (low addresses) to back (high addresses. If +only a partial segment has been written, the segment number, the +current position within and optionally a write buffer are stored in +the journal. + +Segments are erased as a whole. Therefore Garbage Collection may be +required to completely free a segment before doing so. + +Journal +-------- + +The journal contains all global information about the filesystem that +is subject to frequent change. At mount time, it has to be scanned +for the most recent commit entry, which contains a list of pointers to +all currently valid entries. + +Object Store +------------ + +All space except for the superblocks and journal is part of the object +store. Each segment contains a segment header and a number of +objects, each consisting of the object header and the payload. +Objects are either inodes, directory entries (dentries), file data +blocks or indirect blocks. + +Levels +------ + +Garbage collection (GC) may fail if all data is written +indiscriminately. One requirement of GC is that data is seperated +roughly according to the distance between the tree root and the data. +Effectively that means all file data is on level 0, indirect blocks +are on levels 1, 2, 3 4 or 5 for 1x, 2x, 3x, 4x or 5x indirect blocks, +respectively. Inode file data is on level 6 for the inodes and 7-11 +for indirect blocks. + +Each segment contains objects of a single level only. As a result, +each level requires its own seperate segment to be open for writing. + +Inode File +---------- + +All inodes are stored in a special file, the inode file. Single +exception is the inode file's inode (master inode) which for obvious +reasons is stored in the journal instead. Instead of data blocks, the +leaf nodes of the inode files are inodes. + +Aliases +------- + +Writes in LogFS are done by means of a wandering tree. A naïve +implementation would require that for each write or a block, all +parent blocks are written as well, since the block pointers have +changed. Such an implementation would not be very efficient. + +In LogFS, the block pointer changes are cached in the journal by means +of alias entries. Each alias consists of its logical address - inode +number, block index, level and child number (index into block) - and +the changed data. Any 8-byte word can be changes in this manner. + +Currently aliases are used for block pointers, file size, file used +bytes and the height of an inodes indirect tree. + +Segment Aliases +--------------- + +Related to regular aliases, these are used to handle bad blocks. +Initially, bad blocks are handled by moving the affected segment +content to a spare segment and noting this move in the journal with a +segment alias, a simple (to, from) tupel. GC will later empty this +segment and the alias can be removed again. This is used on MTD only. + +Vim +--- + +By cleverly predicting the life time of data, it is possible to +seperate long-living data from short-living data and thereby reduce +the GC overhead later. Each type of distinc life expectency (vim) can +have a seperate segment open for writing. Each (level, vim) tupel can +be open just once. If an open segment with unknown vim is encountered +at mount time, it is closed and ignored henceforth. + +Indirect Tree +------------- + +Inodes in LogFS are similar to FFS-style filesystems with direct and +indirect block pointers. One difference is that LogFS uses a single +indirect pointer that can be either a 1x, 2x, etc. indirect pointer. +A height field in the inode defines the height of the indirect tree +and thereby the indirection of the pointer. + +Another difference is the addressing of indirect blocks. In LogFS, +the first 16 pointers in the first indirect block are left empty, +corresponding to the 16 direct pointers in the inode. In ext2 (maybe +others as well) the first pointer in the first indirect block +corresponds to logical block 12, skipping the 12 direct pointers. +So where ext2 is using arithmetic to better utilize space, LogFS keeps +arithmetic simple and uses compression to save space. + +Compression +----------- + +Both file data and metadata can be compressed. Compression for file +data can be enabled with chattr +c and disabled with chattr -c. Doing +so has no effect on existing data, but new data will be stored +accordingly. New inodes will inherit the compression flag of the +parent directory. + +Metadata is always compressed. However, the space accounting ignores +this and charges for the uncompressed size. Failing to do so could +result in GC failures when, after moving some data, indirect blocks +compress worse than previously. Even on a 100% full medium, GC may +not consume any extra space, so the compression gains are lost space +to the user. + +However, they are not lost space to the filesystem internals. By +cheating the user for those bytes, the filesystem gained some slack +space and GC will run less often and faster. + +Garbage Collection and Wear Leveling +------------------------------------ + +Garbage collection is invoked whenever the number of free segments +falls below a threshold. The best (known) candidate is picked based +on the least amount of valid data contained in the segment. All +remaining valid data is copied elsewhere, thereby invalidating it. + +The GC code also checks for aliases and writes then back if their +number gets too large. + +Wear leveling is done by occasionally picking a suboptimal segment for +garbage collection. If a stale segments erase count is significantly +lower than the active segments' erase counts, it will be picked. Wear +leveling is rate limited, so it will never monopolize the device for +more than one segment worth at a time. + +Values for "occasionally", "significantly lower" are compile time +constants. + +Hashed directories +------------------ + +To satisfy efficient lookup(), directory entries are hashed and +located based on the hash. In order to both support large directories +and not be overly inefficient for small directories, several hash +tables of increasing size are used. For each table, the hash value +modulo the table size gives the table index. + +Tables sizes are chosen to limit the number of indirect blocks with a +fully populated table to 0, 1, 2 or 3 respectively. So the first +table contains 16 entries, the second 512-16, etc. + +The last table is special in several ways. First its size depends on +the effective 32bit limit on telldir/seekdir cookies. Since logfs +uses the upper half of the address space for indirect blocks, the size +is limited to 2^31. Secondly the table contains hash buckets with 16 +entries each. + +Using single-entry buckets would result in birthday "attacks". At +just 2^16 used entries, hash collisions would be likely (P >= 0.5). +My math skills are insufficient to do the combinatorics for the 17x +collisions necessary to overflow a bucket, but testing showed that in +10,000 runs the lowest directory fill before a bucket overflow was +188,057,130 entries with an average of 315,149,915 entries. So for +directory sizes of up to a million, bucket overflows should be +virtually impossible under normal circumstances. + +With carefully chosen filenames, it is obviously possible to cause an +overflow with just 21 entries (4 higher tables + 16 entries + 1). So +there may be a security concern if a malicious user has write access +to a directory. + +Open For Discussion +=================== + +Device Address Space +-------------------- + +A device address space is used for caching. Both block devices and +MTD provide functions to either read a single page or write a segment. +Partial segments may be written for data integrity, but where possible +complete segments are written for performance on simple block device +flash media. + +Meta Inodes +----------- + +Inodes are stored in the inode file, which is just a regular file for +most purposes. At umount time, however, the inode file needs to +remain open until all dirty inodes are written. So +generic_shutdown_super() may not close this inode, but shouldn't +complain about remaining inodes due to the inode file either. Same +goes for mapping inode of the device address space. + +Currently logfs uses a hack that essentially copies part of fs/inode.c +code over. A general solution would be preferred. + +Indirect block mapping +---------------------- + +With compression, the block device (or mapping inode) cannot be used +to cache indirect blocks. Some other place is required. Currently +logfs uses the top half of each inode's address space. The low 8TB +(on 32bit) are filled with file data, the high 8TB are used for +indirect blocks. + +One problem is that 16TB files created on 64bit systems actually have +data in the top 8TB. But files >16TB would cause problems anyway, so +only the limit has changed. diff --git a/Documentation/hwmon/k10temp b/Documentation/hwmon/k10temp index a7a18d453a51..6526eee525a6 100644 --- a/Documentation/hwmon/k10temp +++ b/Documentation/hwmon/k10temp @@ -3,8 +3,8 @@ Kernel driver k10temp Supported chips: * AMD Family 10h processors: - Socket F: Quad-Core/Six-Core/Embedded Opteron - Socket AM2+: Opteron, Phenom (II) X3/X4 + Socket F: Quad-Core/Six-Core/Embedded Opteron (but see below) + Socket AM2+: Quad-Core Opteron, Phenom (II) X3/X4, Athlon X2 (but see below) Socket AM3: Quad-Core Opteron, Athlon/Phenom II X2/X3/X4, Sempron II Socket S1G3: Athlon II, Sempron, Turion II * AMD Family 11h processors: @@ -36,10 +36,15 @@ Description This driver permits reading of the internal temperature sensor of AMD Family 10h and 11h processors. -All these processors have a sensor, but on older revisions of Family 10h -processors, the sensor may return inconsistent values (erratum 319). The -driver will refuse to load on these revisions unless you specify the -"force=1" module parameter. +All these processors have a sensor, but on those for Socket F or AM2+, +the sensor may return inconsistent values (erratum 319). The driver +will refuse to load on these revisions unless you specify the "force=1" +module parameter. + +Due to technical reasons, the driver can detect only the mainboard's +socket type, not the processor's actual capabilities. Therefore, if you +are using an AM3 processor on an AM2+ mainboard, you can safely use the +"force=1" parameter. There is one temperature measurement value, available as temp1_input in sysfs. It is measured in degrees Celsius with a resolution of 1/8th degree. diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt index 947374977ca5..91cfdd76131e 100644 --- a/Documentation/ioctl/ioctl-number.txt +++ b/Documentation/ioctl/ioctl-number.txt @@ -182,6 +182,7 @@ Code Seq# Include File Comments <http://www.proximity.com.au/~brian/winradio/> 0x90 00 drivers/cdrom/sbpcd.h 0x93 60-7F linux/auto_fs.h +0x97 00-7F fs/ceph/ioctl.h Ceph file system 0x99 00-0F 537-Addinboard driver <mailto:buk@buks.ipn.de> 0xA0 all linux/sdp/sdp.h Industrial Device Project diff --git a/Documentation/sound/alsa/Procfile.txt b/Documentation/sound/alsa/Procfile.txt index 719a819f8cc2..07301de12cc4 100644 --- a/Documentation/sound/alsa/Procfile.txt +++ b/Documentation/sound/alsa/Procfile.txt @@ -95,7 +95,7 @@ card*/pcm*/xrun_debug It takes an integer value, can be changed by writing to this file, such as - # cat 5 > /proc/asound/card0/pcm0p/xrun_debug + # echo 5 > /proc/asound/card0/pcm0p/xrun_debug The value consists of the following bit flags: bit 0 = Enable XRUN/jiffies debug messages diff --git a/Documentation/vgaarbiter.txt b/Documentation/vgaarbiter.txt index 987f9b0a5ece..43a9b0694fdd 100644 --- a/Documentation/vgaarbiter.txt +++ b/Documentation/vgaarbiter.txt @@ -103,7 +103,7 @@ I.2 libpciaccess ---------------- To use the vga arbiter char device it was implemented an API inside the -libpciaccess library. One fieldd was added to struct pci_device (each device +libpciaccess library. One field was added to struct pci_device (each device on the system): /* the type of resource decoded by the device */ diff --git a/Documentation/video4linux/gspca.txt b/Documentation/video4linux/gspca.txt index 1800a62cf135..98ee599b4eb8 100644 --- a/Documentation/video4linux/gspca.txt +++ b/Documentation/video4linux/gspca.txt @@ -142,6 +142,7 @@ sunplus 04fc:5360 Sunplus Generic spca500 04fc:7333 PalmPixDC85 sunplus 04fc:ffff Pure DigitalDakota spca501 0506:00df 3Com HomeConnect Lite +sunplus 052b:1507 Megapixel 5 Pretec DC-1007 sunplus 052b:1513 Megapix V4 sunplus 052b:1803 MegaImage VI tv8532 0545:808b Veo Stingray |