diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2013-04-19 17:41:48 +1000 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2013-04-19 17:41:48 +1000 |
commit | 4dbdb686ef7ac0633321a0a44af5ea83fe802f7c (patch) | |
tree | 7b88a6f22f6006e72d644dbb6b2e323934a1876f | |
parent | 80343d9e1b5fbf8920813c8b47cb2428bca086bf (diff) | |
parent | c9941b7ec7840ad33f5822c7f238157558d40132 (diff) |
Merge branch 'akpm/master'
761 files changed, 15079 insertions, 8716 deletions
@@ -761,6 +761,10 @@ S: Northampton S: NN1 3QT S: United Kingdom +N: Massimo Dal Zotto +E: dz@debian.org +D: i8k Dell laptop SMM driver + N: Uwe Dannowski E: Uwe.Dannowski@ira.uka.de W: http://i30www.ira.uka.de/~dannowsk/ diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index 17b91e012de0..1178e2357acb 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt @@ -40,6 +40,7 @@ Features: - soft limit - moving (recharging) account at moving a task is selectable. - usage threshold notifier + - memory pressure notifier - oom-killer disable knob and oom-notifier - Root cgroup has no limit controls. @@ -65,11 +66,13 @@ Brief summary of control files. memory.stat # show various statistics memory.use_hierarchy # set/show hierarchical account enabled memory.force_empty # trigger forced move charge to parent + memory.pressure_level # set memory pressure notifications memory.swappiness # set/show swappiness parameter of vmscan (See sysctl's vm.swappiness) memory.move_charge_at_immigrate # set/show controls of moving charges memory.oom_control # set/show oom controls. memory.numa_stat # show the number of memory usage per numa node + memory.dangling_memcgs # show debugging information about dangling groups memory.kmem.limit_in_bytes # set/show hard limit for kernel memory memory.kmem.usage_in_bytes # show current kernel memory allocation @@ -577,6 +580,21 @@ unevictable=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ... And we have total = file + anon + unevictable. +5.7 dangling_memcgs + +This file will only be ever present in the root cgroup, if the option +CONFIG_MEMCG_DEBUG_ASYNC_DESTROY is set. When a memcg is destroyed, the memory +consumed by it may not be immediately freed. This is because when some +extensions are used, such as swap or kernel memory, objects can outlive the +group and hold a reference to it. + +If this is the case, the dangling_memcgs file will show information about what +are the memcgs still alive, and which references are still preventing it to be +freed. There is nothing wrong with that, but it is very useful when debugging, +to know where this memory is being held. This is a developer-oriented debugging +facility only, and no guarantees of interface stability will be given. The file +is read-only, and has the sole purpose of displaying information. + 6. Hierarchy support The memory controller supports a deep hierarchy and hierarchical accounting. @@ -762,7 +780,73 @@ At reading, current status of OOM is shown. under_oom 0 or 1 (if 1, the memory cgroup is under OOM, tasks may be stopped.) -11. TODO +11. Memory Pressure + +The pressure level notifications can be used to monitor the memory +allocation cost; based on the pressure, applications can implement +different strategies of managing their memory resources. The pressure +levels are defined as following: + +The "low" level means that the system is reclaiming memory for new +allocations. Monitoring this reclaiming activity might be useful for +maintaining cache level. Upon notification, the program (typically +"Activity Manager") might analyze vmstat and act in advance (i.e. +prematurely shutdown unimportant services). + +The "medium" level means that the system is experiencing medium memory +pressure, the system might be making swap, paging out active file caches, +etc. Upon this event applications may decide to further analyze +vmstat/zoneinfo/memcg or internal memory usage statistics and free any +resources that can be easily reconstructed or re-read from a disk. + +The "critical" level means that the system is actively thrashing, it is +about to out of memory (OOM) or even the in-kernel OOM killer is on its +way to trigger. Applications should do whatever they can to help the +system. It might be too late to consult with vmstat or any other +statistics, so it's advisable to take an immediate action. + +The events are propagated upward until the event is handled, i.e. the +events are not pass-through. Here is what this means: for example you have +three cgroups: A->B->C. Now you set up an event listener on cgroups A, B +and C, and suppose group C experiences some pressure. In this situation, +only group C will receive the notification, i.e. groups A and B will not +receive it. This is done to avoid excessive "broadcasting" of messages, +which disturbs the system and which is especially bad if we are low on +memory or thrashing. So, organize the cgroups wisely, or propagate the +events manually (or, ask us to implement the pass-through events, +explaining why would you need them.) + +The file memory.pressure_level is only used to setup an eventfd. To +register a notification, an application must: + +- create an eventfd using eventfd(2); +- open memory.pressure_level; +- write string like "<event_fd> <fd of memory.pressure_level> <level>" + to cgroup.event_control. + +Application will be notified through eventfd when memory pressure is at +the specific level (or higher). Read/write operations to +memory.pressure_level are no implemented. + +Test: + + Here is a small script example that makes a new cgroup, sets up a + memory limit, sets up a notification in the cgroup and then makes child + cgroup experience a critical pressure: + + # cd /sys/fs/cgroup/memory/ + # mkdir foo + # cd foo + # cgroup_event_listener memory.pressure_level low & + # echo 8000000 > memory.limit_in_bytes + # echo 8000000 > memory.memsw.limit_in_bytes + # echo $$ > tasks + # dd if=/dev/zero | read x + + (Expect a bunch of notifications, and eventually, the oom-killer will + trigger.) + +12. TODO 1. Add support for accounting huge pages (as a separate controller) 2. Make per-cgroup scanner reclaim not-shared pages first diff --git a/Documentation/devicetree/bindings/media/coda.txt b/Documentation/devicetree/bindings/media/coda.txt new file mode 100644 index 000000000000..2865d04e4030 --- /dev/null +++ b/Documentation/devicetree/bindings/media/coda.txt @@ -0,0 +1,30 @@ +Chips&Media Coda multi-standard codec IP +======================================== + +Coda codec IPs are present in i.MX SoCs in various versions, +called VPU (Video Processing Unit). + +Required properties: +- compatible : should be "fsl,<chip>-src" for i.MX SoCs: + (a) "fsl,imx27-vpu" for CodaDx6 present in i.MX27 + (b) "fsl,imx53-vpu" for CODA7541 present in i.MX53 + (c) "fsl,imx6q-vpu" for CODA960 present in i.MX6q +- reg: should be register base and length as documented in the + SoC reference manual +- interrupts : Should contain the VPU interrupt. For CODA960, + a second interrupt is needed for the MJPEG unit. +- clocks : Should contain the ahb and per clocks, in the order + determined by the clock-names property. +- clock-names : Should be "ahb", "per" +- iram : phandle pointing to the SRAM device node + +Example: + +vpu: vpu@63ff4000 { + compatible = "fsl,imx53-vpu"; + reg = <0x63ff4000 0x1000>; + interrupts = <9>; + clocks = <&clks 63>, <&clks 63>; + clock-names = "ahb", "per"; + iram = <&ocram>; +}; diff --git a/Documentation/devicetree/bindings/misc/sram.txt b/Documentation/devicetree/bindings/misc/sram.txt new file mode 100644 index 000000000000..4d0a00e453a8 --- /dev/null +++ b/Documentation/devicetree/bindings/misc/sram.txt @@ -0,0 +1,16 @@ +Generic on-chip SRAM + +Simple IO memory regions to be managed by the genalloc API. + +Required properties: + +- compatible : mmio-sram + +- reg : SRAM iomem address range + +Example: + +sram: sram@5c000000 { + compatible = "mmio-sram"; + reg = <0x5c000000 0x40000>; /* 256 KiB SRAM at address 0x5c000000 */ +}; diff --git a/Documentation/devicetree/bindings/rtc/atmel,at91rm9200-rtc.txt b/Documentation/devicetree/bindings/rtc/atmel,at91rm9200-rtc.txt new file mode 100644 index 000000000000..2a3feabd3b22 --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/atmel,at91rm9200-rtc.txt @@ -0,0 +1,15 @@ +Atmel AT91RM9200 Real Time Clock + +Required properties: +- compatible: should be: "atmel,at91rm9200-rtc" +- reg: physical base address of the controller and length of memory mapped + region. +- interrupts: rtc alarm/event interrupt + +Example: + +rtc@fffffe00 { + compatible = "atmel,at91rm9200-rtc"; + reg = <0xfffffe00 0x100>; + interrupts = <1 4 7>; +}; diff --git a/Documentation/devicetree/bindings/video/simple-framebuffer.txt b/Documentation/devicetree/bindings/video/simple-framebuffer.txt new file mode 100644 index 000000000000..3ea460583111 --- /dev/null +++ b/Documentation/devicetree/bindings/video/simple-framebuffer.txt @@ -0,0 +1,25 @@ +Simple Framebuffer + +A simple frame-buffer describes a raw memory region that may be rendered to, +with the assumption that the display hardware has already been set up to scan +out from that buffer. + +Required properties: +- compatible: "simple-framebuffer" +- reg: Should contain the location and size of the framebuffer memory. +- width: The width of the framebuffer in pixels. +- height: The height of the framebuffer in pixels. +- stride: The number of bytes in each line of the framebuffer. +- format: The format of the framebuffer surface. Valid values are: + - r5g6b5 (16-bit pixels, d[15:11]=r, d[10:5]=g, d[4:0]=b). + +Example: + + framebuffer { + compatible = "simple-framebuffer"; + reg = <0x1d385000 (1600 * 1200 * 2)>; + width = <1600>; + height = <1200>; + stride = <(1600 * 2)>; + format = "r5g6b5"; + }; diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index fd8d0d594fc7..488c0940f3d8 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -473,7 +473,8 @@ This file is only present if the CONFIG_MMU kernel configuration option is enabled. The /proc/PID/clear_refs is used to reset the PG_Referenced and ACCESSED/YOUNG -bits on both physical and virtual pages associated with a process. +bits on both physical and virtual pages associated with a process, and the +soft-dirty bit on pte (see Documentation/vm/soft-dirty.txt for details). To clear the bits for all the pages associated with the process > echo 1 > /proc/PID/clear_refs @@ -482,11 +483,17 @@ To clear the bits for the anonymous pages associated with the process To clear the bits for the file mapped pages associated with the process > echo 3 > /proc/PID/clear_refs + +To clear the soft-dirty bit + > echo 4 > /proc/PID/clear_refs + Any other value written to /proc/PID/clear_refs will have no effect. The /proc/pid/pagemap gives the PFN, which can be used to find the pageflags using /proc/kpageflags and number of times a page is mapped using /proc/kpagecount. For detailed explanation, see Documentation/vm/pagemap.txt. +(There's also a /proc/pid/pagemap2 file which is the 2nd version of the + pagemap one). 1.2 Kernel data --------------- diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt index d230dd9c99b0..4a93e98b290a 100644 --- a/Documentation/filesystems/vfat.txt +++ b/Documentation/filesystems/vfat.txt @@ -150,12 +150,28 @@ discard -- If set, issues discard/TRIM commands to the block device when blocks are freed. This is useful for SSD devices and sparse/thinly-provisoned LUNs. -nfs -- This option maintains an index (cache) of directory - inodes by i_logstart which is used by the nfs-related code to - improve look-ups. +nfs=stale_rw|nostale_ro + Enable this only if you want to export the FAT filesystem + over NFS. + + stale_rw: This option maintains an index (cache) of directory + inodes by i_logstart which is used by the nfs-related code to + improve look-ups. Full file operations (read/write) over NFS is + supported but with cache eviction at NFS server, this could + result in ESTALE issues. + + nostale_ro: This option bases the inode number and filehandle + on the on-disk location of a file in the MS-DOS directory entry. + This ensures that ESTALE will not be returned after a file is + evicted from the inode cache. However, it means that operations + such as rename, create and unlink could cause filehandles that + previously pointed at one file to point at a different file, + potentially causing data corruption. For this reason, this + option also mounts the filesystem readonly. + + To maintain backward compatibility, '-o nfs' is also accepted, + defaulting to stale_rw - Enable this only if you want to export the FAT filesystem - over NFS <bool>: 0,1,yes,no,true,false diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 078701fdbd4d..a5717c38834a 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -18,6 +18,7 @@ files can be found in mm/swap.c. Currently, these files are in /proc/sys/vm: +- admin_reserve_kbytes - block_dump - compact_memory - dirty_background_bytes @@ -53,11 +54,41 @@ Currently, these files are in /proc/sys/vm: - percpu_pagelist_fraction - stat_interval - swappiness +- user_reserve_kbytes - vfs_cache_pressure - zone_reclaim_mode ============================================================== +admin_reserve_kbytes + +The amount of free memory in the system that should be reserved for users +with the capability cap_sys_admin. + +admin_reserve_kbytes defaults to min(3% of free pages, 8MB) + +That should provide enough for the admin to log in and kill a process, +if necessary, under the default overcommit 'guess' mode. + +Systems running under overcommit 'never' should increase this to account +for the full Virtual Memory Size of programs used to recover. Otherwise, +root may not be able to log in to recover the system. + +How do you calculate a minimum useful reserve? + +sshd or login + bash (or some other shell) + top (or ps, kill, etc.) + +For overcommit 'guess', we can sum resident set sizes (RSS). +On x86_64 this is about 8MB. + +For overcommit 'never', we can take the max of their virtual sizes (VSZ) +and add the sum of their RSS. +On x86_64 this is about 128MB. + +Changing this takes effect whenever an application requests memory. + +============================================================== + block_dump block_dump enables block I/O debugging when set to a nonzero value. More @@ -138,18 +169,39 @@ Setting this to zero disables periodic writeback altogether. drop_caches -Writing to this will cause the kernel to drop clean caches, dentries and -inodes from memory, causing that memory to become free. +Writing to this will cause the kernel to drop clean caches, as well as +reclaimable slab objects like dentries and inodes. Once dropped, their +memory becomes free. To free pagecache: echo 1 > /proc/sys/vm/drop_caches -To free dentries and inodes: +To free reclaimable slab objects (includes dentries and inodes): echo 2 > /proc/sys/vm/drop_caches -To free pagecache, dentries and inodes: +To free slab objects and pagecache: echo 3 > /proc/sys/vm/drop_caches -As this is a non-destructive operation and dirty objects are not freeable, the -user should run `sync' first. +This is a non-destructive operation and will not free any dirty objects. +To increase the number of objects freed by this operation, the user may run +`sync' prior to writing to /proc/sys/vm/drop_caches. This will minimize the +number of dirty objects on the system and create more candidates to be +dropped. + +This file is not a means to control the growth of the various kernel caches +(inodes, dentries, pagecache, etc...) These objects are automatically +reclaimed by the kernel when memory is needed elsewhere on the system. + +Use of this file can cause performance problems. Since it discards cached +objects, it may cost a significant amount of I/O and CPU to recreate the +dropped objects, especially if they were under heavy use. Because of this, +use outside of a testing or debugging environment is not recommended. + +You may see informational messages in your kernel log when this file is +used: + + cat (1234): dropped kernel caches: 3 + +These are informational only. They do not mean that anything is wrong +with your system. ============================================================== @@ -542,6 +594,7 @@ memory until it actually runs out. When this flag is 2, the kernel uses a "never overcommit" policy that attempts to prevent any overcommit of memory. +Note that user_reserve_kbytes affects this policy. This feature can be very useful because there are a lot of programs that malloc() huge amounts of memory "just-in-case" @@ -645,6 +698,24 @@ The default value is 60. ============================================================== +- user_reserve_kbytes + +When overcommit_memory is set to 2, "never overommit" mode, reserve +min(3% of current process size, user_reserve_kbytes) of free memory. +This is intended to prevent a user from starting a single memory hogging +process, such that they cannot recover (kill the hog). + +user_reserve_kbytes defaults to min(3% of the current process size, 128MB). + +If this is reduced to zero, then the user will be allowed to allocate +all free memory with a single process, minus admin_reserve_kbytes. +Any subsequent attempts to execute a command will result in +"fork: Cannot allocate memory". + +Changing this takes effect whenever an application requests memory. + +============================================================== + vfs_cache_pressure ------------------ diff --git a/Documentation/vm/overcommit-accounting b/Documentation/vm/overcommit-accounting index 706d7ed9d8d2..8eaa2fc4b8fa 100644 --- a/Documentation/vm/overcommit-accounting +++ b/Documentation/vm/overcommit-accounting @@ -8,7 +8,9 @@ The Linux kernel supports the following overcommit handling modes default. 1 - Always overcommit. Appropriate for some scientific - applications. + applications. Classic example is code using sparse arrays + and just relying on the virtual memory consisting almost + entirely of zero pages. 2 - Don't overcommit. The total address space commit for the system is not permitted to exceed swap + a @@ -18,6 +20,10 @@ The Linux kernel supports the following overcommit handling modes pages but will receive errors on memory allocation as appropriate. + Useful for applications that want to guarantee their + memory allocations will be available in the future + without having to initialize every page. + The overcommit policy is set via the sysctl `vm.overcommit_memory'. The overcommit percentage is set via `vm.overcommit_ratio'. diff --git a/Documentation/vm/pagemap.txt b/Documentation/vm/pagemap.txt index 7587493c67f1..394cc03c8df6 100644 --- a/Documentation/vm/pagemap.txt +++ b/Documentation/vm/pagemap.txt @@ -30,6 +30,11 @@ There are three components to pagemap: determine which areas of memory are actually mapped and llseek to skip over unmapped regions. + * /proc/pid/pagemap2. This file provides the same info as the pagemap + does, but bits 56-60 are reserved for future use and thus zero + + Bit 55 means pte is soft-dirty (see Documentation/vm/soft-dirty.txt) + * /proc/kpagecount. This file contains a 64-bit count of the number of times each page is mapped, indexed by PFN. diff --git a/Documentation/vm/soft-dirty.txt b/Documentation/vm/soft-dirty.txt new file mode 100644 index 000000000000..9a12a5956bc0 --- /dev/null +++ b/Documentation/vm/soft-dirty.txt @@ -0,0 +1,36 @@ + SOFT-DIRTY PTEs + + The soft-dirty is a bit on a PTE which helps to track which pages a task +writes to. In order to do this tracking one should + + 1. Clear soft-dirty bits from the task's PTEs. + + This is done by writing "4" into the /proc/PID/clear_refs file of the + task in question. + + 2. Wait some time. + + 3. Read soft-dirty bits from the PTEs. + + This is done by reading from the /proc/PID/pagemap. The bit 55 of the + 64-bit qword is the soft-dirty one. If set, the respective PTE was + written to since step 1. + + + Internally, to do this tracking, the writable bit is cleared from PTEs +when the soft-dirty bit is cleared. So, after this, when the task tries to +modify a page at some virtual address the #PF occurs and the kernel sets +the soft-dirty bit on the respective PTE. + + Note, that although all the task's address space is marked as r/o after the +soft-dirty bits clear, the #PF-s that occur after that are processed fast. +This is so, since the pages are still mapped to physical memory, and thus all +the kernel does is finds this fact out and puts both writable and soft-dirty +bits on the PTE. + + + This feature is actively used by the checkpoint-restore project. You +can find more details about it on http://criu.org + + +-- Pavel Emelyanov, Apr 9, 2013 diff --git a/MAINTAINERS b/MAINTAINERS index 63adbfb5663c..6c0d68b64a13 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -90,6 +90,9 @@ Descriptions of section entries: F: drivers/net/* all files in drivers/net, but not below F: */net/* all files in "any top level directory"/net One pattern per line. Multiple F: lines acceptable. + N: Files and directories with regex patterns. + N: [^a-z]tegra all files whose path contains the word tegra + One pattern per line. Multiple N: lines acceptable. X: Files and directories that are NOT maintained, same rules as F: Files exclusions are tested before file matches. Can be useful for excluding a specific subdirectory, for instance: @@ -97,13 +100,12 @@ Descriptions of section entries: X: net/ipv6/ matches all files in and below net excluding net/ipv6/ K: Keyword perl extended regex pattern to match content in a - patch or file, or an affected filename. For instance: + patch or file. For instance: K: of_get_profile - matches patch or file content, or filenames, that contain - "of_get_profile" + matches patches or files that contain "of_get_profile" K: \b(printk|pr_(info|err))\b - matches patch or file content, or filenames, that contain one or - more of the words printk, pr_info or pr_err + matches patches or files that contain one or more of the words + printk, pr_info or pr_err One regex pattern per line. Multiple K: lines acceptable. Note: For the hard of thinking, this list is meant to remain in alphabetical @@ -2471,9 +2473,7 @@ S: Maintained F: drivers/platform/x86/dell-laptop.c DELL LAPTOP SMM DRIVER -M: Massimo Dal Zotto <dz@debian.org> -W: http://www.debian.org/~dz/i8k/ -S: Maintained +S: Orphan F: drivers/char/i8k.c F: include/uapi/linux/i8k.h @@ -7988,7 +7988,7 @@ L: linux-tegra@vger.kernel.org Q: http://patchwork.ozlabs.org/project/linux-tegra/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/swarren/linux-tegra.git S: Supported -K: (?i)[^a-z]tegra +N: [^a-z]tegra TEHUTI ETHERNET DRIVER M: Andy Gospodarek <andy@greyhouse.net> @@ -8474,9 +8474,10 @@ S: Maintained F: drivers/usb/serial/option.c USB PEGASUS DRIVER -M: Petko Manolov <petkan@users.sourceforge.net> +M: Petko Manolov <petkan@nucleusys.com> L: linux-usb@vger.kernel.org L: netdev@vger.kernel.org +T: git git://git.code.sf.net/p/pegasus2/git W: http://pegasus2.sourceforge.net/ S: Maintained F: drivers/net/usb/pegasus.* @@ -8496,9 +8497,10 @@ S: Supported F: drivers/usb/class/usblp.c USB RTL8150 DRIVER -M: Petko Manolov <petkan@users.sourceforge.net> +M: Petko Manolov <petkan@nucleusys.com> L: linux-usb@vger.kernel.org L: netdev@vger.kernel.org +T: git git://git.code.sf.net/p/pegasus2/git W: http://pegasus2.sourceforge.net/ S: Maintained F: drivers/net/usb/rtl8150.c diff --git a/arch/Kconfig b/arch/Kconfig index 5ac8ac6a8286..820b784dbb1e 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -365,6 +365,9 @@ config HAVE_IRQ_TIME_ACCOUNTING config HAVE_ARCH_TRANSPARENT_HUGEPAGE bool +config HAVE_ARCH_SOFT_DIRTY + bool + config HAVE_MOD_ARCH_SPECIFIC bool help diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index a3fd8a29ccac..ab80a80d38a2 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -175,6 +175,7 @@ machine_power_off(void) void show_regs(struct pt_regs *regs) { + show_regs_print_info(KERN_DEFAULT); dik_show_regs(regs, NULL); } diff --git a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c index 1383f8601a93..1d4aabfcf9a1 100644 --- a/arch/alpha/kernel/sys_nautilus.c +++ b/arch/alpha/kernel/sys_nautilus.c @@ -185,7 +185,6 @@ nautilus_machine_check(unsigned long vector, unsigned long la_ptr) mb(); } -extern void free_reserved_mem(void *, void *); extern void pcibios_claim_one_bus(struct pci_bus *); static struct resource irongate_io = { @@ -239,8 +238,8 @@ nautilus_init_pci(void) if (pci_mem < memtop) memtop = pci_mem; if (memtop > alpha_mv.min_mem_address) { - free_reserved_mem(__va(alpha_mv.min_mem_address), - __va(memtop)); + free_reserved_area((unsigned long)__va(alpha_mv.min_mem_address), + (unsigned long)__va(memtop), 0, NULL); printk("nautilus_init_pci: %ldk freed\n", (memtop - alpha_mv.min_mem_address) >> 10); } diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index 4037461a6493..affccb959a9e 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -169,13 +169,6 @@ void show_stack(struct task_struct *task, unsigned long *sp) dik_show_trace(sp); } -void dump_stack(void) -{ - show_stack(NULL, NULL); -} - -EXPORT_SYMBOL(dump_stack); - void die_if_kernel(char * str, struct pt_regs *regs, long err, unsigned long *r9_15) { diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c index 1ad6ca74bed2..0ba85ee4a466 100644 --- a/arch/alpha/mm/init.c +++ b/arch/alpha/mm/init.c @@ -31,6 +31,7 @@ #include <asm/console.h> #include <asm/tlb.h> #include <asm/setup.h> +#include <asm/sections.h> extern void die_if_kernel(char *,struct pt_regs *,long); @@ -281,8 +282,6 @@ printk_memory_info(void) { unsigned long codesize, reservedpages, datasize, initsize, tmp; extern int page_is_ram(unsigned long) __init; - extern char _text, _etext, _data, _edata; - extern char __init_begin, __init_end; /* printk all informations */ reservedpages = 0; @@ -318,32 +317,15 @@ mem_init(void) #endif /* CONFIG_DISCONTIGMEM */ void -free_reserved_mem(void *start, void *end) -{ - void *__start = start; - for (; __start < end; __start += PAGE_SIZE) { - ClearPageReserved(virt_to_page(__start)); - init_page_count(virt_to_page(__start)); - free_page((long)__start); - totalram_pages++; - } -} - -void free_initmem(void) { - extern char __init_begin, __init_end; - - free_reserved_mem(&__init_begin, &__init_end); - printk ("Freeing unused kernel memory: %ldk freed\n", - (&__init_end - &__init_begin) >> 10); + free_initmem_default(0); } #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_mem((void *)start, (void *)end); - printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); + free_reserved_area(start, end, 0, "initrd"); } #endif diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c index 3973ae395772..33885048fa36 100644 --- a/arch/alpha/mm/numa.c +++ b/arch/alpha/mm/numa.c @@ -17,6 +17,7 @@ #include <asm/hwrpb.h> #include <asm/pgalloc.h> +#include <asm/sections.h> pg_data_t node_data[MAX_NUMNODES]; EXPORT_SYMBOL(node_data); @@ -325,8 +326,6 @@ void __init mem_init(void) { unsigned long codesize, reservedpages, datasize, initsize, pfn; extern int page_is_ram(unsigned long) __init; - extern char _text, _etext, _data, _edata; - extern char __init_begin, __init_end; unsigned long nid, i; high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c index a63ff842564b..ca0207b9d5b6 100644 --- a/arch/arc/kernel/stacktrace.c +++ b/arch/arc/kernel/stacktrace.c @@ -220,13 +220,6 @@ void show_stack(struct task_struct *tsk, unsigned long *sp) show_stacktrace(tsk, NULL); } -/* Expected by Rest of kernel code */ -void dump_stack(void) -{ - show_stacktrace(NULL, NULL); -} -EXPORT_SYMBOL(dump_stack); - /* Another API expected by schedular, shows up in "ps" as Wait Channel * Ofcourse just returning schedule( ) would be pointless so unwind until * the function is not in schedular code diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c index d94fa120a9f7..c672c8380a3a 100644 --- a/arch/arc/kernel/troubleshoot.c +++ b/arch/arc/kernel/troubleshoot.c @@ -75,7 +75,7 @@ void print_task_path_n_nm(struct task_struct *tsk, char *buf) } done: - pr_info("%s, TGID %u\n", path_nm, tsk->tgid); + pr_info("Path: %s\n", path_nm); } EXPORT_SYMBOL(print_task_path_n_nm); @@ -171,6 +171,7 @@ void show_regs(struct pt_regs *regs) return; print_task_path_n_nm(tsk, buf); + show_regs_print_info(KERN_INFO); if (current->thread.cause_code) show_ecr_verbose(regs); diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index 6634cf50e3b4..4a177365b2c4 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -141,37 +141,18 @@ void __init mem_init(void) PAGES_TO_KB(reserved_pages)); } -static void __init free_init_pages(const char *what, unsigned long begin, - unsigned long end) -{ - unsigned long addr; - - pr_info("Freeing %s: %ldk [%lx] to [%lx]\n", - what, TO_KB(end - begin), begin, end); - - /* need to check that the page we free is not a partial page */ - for (addr = begin; addr + PAGE_SIZE <= end; addr += PAGE_SIZE) { - ClearPageReserved(virt_to_page(addr)); - init_page_count(virt_to_page(addr)); - free_page(addr); - totalram_pages++; - } -} - /* * free_initmem: Free all the __init memory. */ void __init_refok free_initmem(void) { - free_init_pages("unused kernel memory", - (unsigned long)__init_begin, - (unsigned long)__init_end); + free_initmem_default(0); } #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { - free_init_pages("initrd memory", start, end); + free_reserved_area(start, end, 0, "initrd"); } #endif diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 7233c055710c..10c34665979d 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -40,6 +40,7 @@ config ARM select HAVE_HW_BREAKPOINT if (PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7)) select HAVE_IDE if PCI || ISA || PCMCIA select HAVE_KERNEL_GZIP + select HAVE_KERNEL_LZ4 select HAVE_KERNEL_LZMA select HAVE_KERNEL_LZO select HAVE_KERNEL_XZ diff --git a/arch/arm/boot/compressed/.gitignore b/arch/arm/boot/compressed/.gitignore index f79a08efe000..47279aa96a6a 100644 --- a/arch/arm/boot/compressed/.gitignore +++ b/arch/arm/boot/compressed/.gitignore @@ -6,6 +6,7 @@ piggy.gzip piggy.lzo piggy.lzma piggy.xzkern +piggy.lz4 vmlinux vmlinux.lds diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 3580d57ea218..001a13a0cf6f 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -27,6 +27,9 @@ OBJS += misc.o decompress.o ifeq ($(CONFIG_DEBUG_UNCOMPRESS),y) OBJS += debug.o endif +ifeq ($(CONFIG_KERNEL_LZ4),y) +CFLAGS_decompress.o := -Os +endif FONTC = $(srctree)/drivers/video/console/font_acorn_8x8.c # string library code (-Os is enforced to keep it much smaller) @@ -91,6 +94,7 @@ suffix_$(CONFIG_KERNEL_GZIP) = gzip suffix_$(CONFIG_KERNEL_LZO) = lzo suffix_$(CONFIG_KERNEL_LZMA) = lzma suffix_$(CONFIG_KERNEL_XZ) = xzkern +suffix_$(CONFIG_KERNEL_LZ4) = lz4 # Borrowed libfdt files for the ATAG compatibility mode @@ -115,7 +119,7 @@ targets := vmlinux vmlinux.lds \ font.o font.c head.o misc.o $(OBJS) # Make sure files are removed during clean -extra-y += piggy.gzip piggy.lzo piggy.lzma piggy.xzkern \ +extra-y += piggy.gzip piggy.lzo piggy.lzma piggy.xzkern piggy.lz4 \ lib1funcs.S ashldi3.S $(libfdt) $(libfdt_hdrs) ifeq ($(CONFIG_FUNCTION_TRACER),y) diff --git a/arch/arm/boot/compressed/decompress.c b/arch/arm/boot/compressed/decompress.c index 24b0475cb8bf..bd245d34952d 100644 --- a/arch/arm/boot/compressed/decompress.c +++ b/arch/arm/boot/compressed/decompress.c @@ -51,6 +51,10 @@ extern char * strstr(const char * s1, const char *s2); #include "../../../../lib/decompress_unxz.c" #endif +#ifdef CONFIG_KERNEL_LZ4 +#include "../../../../lib/decompress_unlz4.c" +#endif + int do_decompress(u8 *input, int len, u8 *output, void (*error)(char *x)) { return decompress(input, len, NULL, NULL, output, NULL, error); diff --git a/arch/arm/boot/compressed/piggy.lz4.S b/arch/arm/boot/compressed/piggy.lz4.S new file mode 100644 index 000000000000..3d9a575618a3 --- /dev/null +++ b/arch/arm/boot/compressed/piggy.lz4.S @@ -0,0 +1,6 @@ + .section .piggydata,#alloc + .globl input_data +input_data: + .incbin "arch/arm/boot/compressed/piggy.lz4" + .globl input_data_end +input_data_end: diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 80d6fc4dbe4a..9bcd262a9008 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -61,6 +61,15 @@ extern void __pgd_error(const char *file, int line, pgd_t); #define FIRST_USER_ADDRESS PAGE_SIZE /* + * Use TASK_SIZE as the ceiling argument for free_pgtables() and + * free_pgd_range() to avoid freeing the modules pmd when LPAE is enabled (pmd + * page shared between user and kernel). + */ +#ifdef CONFIG_ARM_LPAE +#define USER_PGTABLES_CEILING TASK_SIZE +#endif + +/* * The pgprot_* and protection_map entries will be fixed up in runtime * to include the cachable and bufferable bits based on memory policy, * as well as any architecture dependent bits like global/ASID and SMP diff --git a/arch/arm/kernel/early_printk.c b/arch/arm/kernel/early_printk.c index 85aa2b292692..43076536965c 100644 --- a/arch/arm/kernel/early_printk.c +++ b/arch/arm/kernel/early_printk.c @@ -29,28 +29,17 @@ static void early_console_write(struct console *con, const char *s, unsigned n) early_write(s, n); } -static struct console early_console = { +static struct console early_console_dev = { .name = "earlycon", .write = early_console_write, .flags = CON_PRINTBUFFER | CON_BOOT, .index = -1, }; -asmlinkage void early_printk(const char *fmt, ...) -{ - char buf[512]; - int n; - va_list ap; - - va_start(ap, fmt); - n = vscnprintf(buf, sizeof(buf), fmt, ap); - early_write(buf, n); - va_end(ap); -} - static int __init setup_early_printk(char *buf) { - register_console(&early_console); + early_console = &early_console_dev; + register_console(&early_console_dev); return 0; } diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index f1895e49a11e..f21970316836 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -225,11 +225,8 @@ void __show_regs(struct pt_regs *regs) unsigned long flags; char buf[64]; - printk("CPU: %d %s (%s %.*s)\n", - raw_smp_processor_id(), print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); + show_regs_print_info(KERN_DEFAULT); + print_symbol("PC is at %s\n", instruction_pointer(regs)); print_symbol("LR is at %s\n", regs->ARM_lr); printk("pc : [<%08lx>] lr : [<%08lx>] psr: %08lx\n" @@ -284,7 +281,6 @@ void __show_regs(struct pt_regs *regs) void show_regs(struct pt_regs * regs) { printk("\n"); - printk("Pid: %d, comm: %20s\n", task_pid_nr(current), current->comm); __show_regs(regs); dump_stack(); } diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index ec64571462d2..486e12a0f26a 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -204,13 +204,6 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) } #endif -void dump_stack(void) -{ - dump_backtrace(NULL, NULL); -} - -EXPORT_SYMBOL(dump_stack); - void show_stack(struct task_struct *tsk, unsigned long *sp) { dump_backtrace(NULL, tsk); diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index ad722f1208a5..9a5cdc01fcdf 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -99,6 +99,9 @@ void show_mem(unsigned int filter) printk("Mem-info:\n"); show_free_areas(filter); + if (filter & SHOW_MEM_FILTER_PAGE_COUNT) + return; + for_each_bank (i, mi) { struct membank *bank = &mi->bank[i]; unsigned int pfn1, pfn2; @@ -424,24 +427,6 @@ void __init bootmem_init(void) max_pfn = max_high - PHYS_PFN_OFFSET; } -static inline int free_area(unsigned long pfn, unsigned long end, char *s) -{ - unsigned int pages = 0, size = (end - pfn) << (PAGE_SHIFT - 10); - - for (; pfn < end; pfn++) { - struct page *page = pfn_to_page(pfn); - ClearPageReserved(page); - init_page_count(page); - __free_page(page); - pages++; - } - - if (size && s) - printk(KERN_INFO "Freeing %s memory: %dK\n", s, size); - - return pages; -} - /* * Poison init memory with an undefined instruction (ARM) or a branch to an * undefined instruction (Thumb). @@ -534,6 +519,14 @@ static void __init free_unused_memmap(struct meminfo *mi) #endif } +#ifdef CONFIG_HIGHMEM +static inline void free_area_high(unsigned long pfn, unsigned long end) +{ + for (; pfn < end; pfn++) + free_highmem_page(pfn_to_page(pfn)); +} +#endif + static void __init free_highpages(void) { #ifdef CONFIG_HIGHMEM @@ -569,8 +562,7 @@ static void __init free_highpages(void) if (res_end > end) res_end = end; if (res_start != start) - totalhigh_pages += free_area(start, res_start, - NULL); + free_area_high(start, res_start); start = res_end; if (start == end) break; @@ -578,9 +570,8 @@ static void __init free_highpages(void) /* And now free anything which remains */ if (start < end) - totalhigh_pages += free_area(start, end, NULL); + free_area_high(start, end); } - totalram_pages += totalhigh_pages; #endif } @@ -609,8 +600,7 @@ void __init mem_init(void) #ifdef CONFIG_SA1111 /* now that our DMA memory is actually so designated, we can free it */ - totalram_pages += free_area(PHYS_PFN_OFFSET, - __phys_to_pfn(__pa(swapper_pg_dir)), NULL); + free_reserved_area(__va(PHYS_PFN_OFFSET), swapper_pg_dir, 0, NULL); #endif free_highpages(); @@ -738,16 +728,12 @@ void free_initmem(void) extern char __tcm_start, __tcm_end; poison_init_mem(&__tcm_start, &__tcm_end - &__tcm_start); - totalram_pages += free_area(__phys_to_pfn(__pa(&__tcm_start)), - __phys_to_pfn(__pa(&__tcm_end)), - "TCM link"); + free_reserved_area(&__tcm_start, &__tcm_end, 0, "TCM link"); #endif poison_init_mem(__init_begin, __init_end - __init_begin); if (!machine_is_integrator() && !machine_is_cintegrator()) - totalram_pages += free_area(__phys_to_pfn(__pa(__init_begin)), - __phys_to_pfn(__pa(__init_end)), - "init"); + free_initmem_default(0); } #ifdef CONFIG_BLK_DEV_INITRD @@ -758,9 +744,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) { if (!keep_initrd) { poison_init_mem((void *)start, PAGE_ALIGN(end) - start); - totalram_pages += free_area(__phys_to_pfn(__pa(start)), - __phys_to_pfn(__pa(end)), - "initrd"); + free_reserved_area(start, end, 0, "initrd"); } } diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index 10062ceadd1c..0c6356255fe3 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -181,11 +181,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm) if (mmap_is_legacy()) { mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; mm->get_unmapped_area = arch_get_unmapped_area; - mm->unmap_area = arch_unmap_area; } else { mm->mmap_base = mmap_base(random_factor); mm->get_unmapped_area = arch_get_unmapped_area_topdown; - mm->unmap_area = arch_unmap_area_topdown; } } diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 145ec8846daf..23ae690ab07e 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -145,11 +145,7 @@ void __show_regs(struct pt_regs *regs) { int i; - printk("CPU: %d %s (%s %.*s)\n", - raw_smp_processor_id(), print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); + show_regs_print_info(KERN_DEFAULT); print_symbol("PC is at %s\n", instruction_pointer(regs)); print_symbol("LR is at %s\n", regs->regs[30]); printk("pc : [<%016llx>] lr : [<%016llx>] pstate: %08llx\n", @@ -166,7 +162,6 @@ void __show_regs(struct pt_regs *regs) void show_regs(struct pt_regs * regs) { printk("\n"); - printk("Pid: %d, comm: %20s\n", task_pid_nr(current), current->comm); __show_regs(regs); } diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index b3c5f628bdb4..61d7dd29f756 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -167,13 +167,6 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) } } -void dump_stack(void) -{ - dump_backtrace(NULL, NULL); -} - -EXPORT_SYMBOL(dump_stack); - void show_stack(struct task_struct *tsk, unsigned long *sp) { dump_backtrace(NULL, tsk); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 800aac306a08..f497ca77925a 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -197,24 +197,6 @@ void __init bootmem_init(void) max_pfn = max_low_pfn = max; } -static inline int free_area(unsigned long pfn, unsigned long end, char *s) -{ - unsigned int pages = 0, size = (end - pfn) << (PAGE_SHIFT - 10); - - for (; pfn < end; pfn++) { - struct page *page = pfn_to_page(pfn); - ClearPageReserved(page); - init_page_count(page); - __free_page(page); - pages++; - } - - if (size && s) - pr_info("Freeing %s memory: %dK\n", s, size); - - return pages; -} - /* * Poison init memory with an undefined instruction (0x0). */ @@ -405,9 +387,7 @@ void __init mem_init(void) void free_initmem(void) { poison_init_mem(__init_begin, __init_end - __init_begin); - totalram_pages += free_area(__phys_to_pfn(__pa(__init_begin)), - __phys_to_pfn(__pa(__init_end)), - "init"); + free_initmem_default(0); } #ifdef CONFIG_BLK_DEV_INITRD @@ -418,9 +398,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) { if (!keep_initrd) { poison_init_mem((void *)start, PAGE_ALIGN(end) - start); - totalram_pages += free_area(__phys_to_pfn(__pa(start)), - __phys_to_pfn(__pa(end)), - "initrd"); + free_reserved_area(start, end, 0, "initrd"); } } diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index 7c7be7855638..8ed6cb1a900f 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c @@ -90,11 +90,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm) if (mmap_is_legacy()) { mm->mmap_base = TASK_UNMAPPED_BASE; mm->get_unmapped_area = arch_get_unmapped_area; - mm->unmap_area = arch_unmap_area; } else { mm->mmap_base = mmap_base(); mm->get_unmapped_area = arch_get_unmapped_area_topdown; - mm->unmap_area = arch_unmap_area_topdown; } } EXPORT_SYMBOL_GPL(arch_pick_mmap_layout); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 70b8cd4021c4..eeecc9c8ed68 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -391,17 +391,14 @@ int kern_addr_valid(unsigned long addr) } #ifdef CONFIG_SPARSEMEM_VMEMMAP #ifdef CONFIG_ARM64_64K_PAGES -int __meminit vmemmap_populate(struct page *start_page, - unsigned long size, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) { - return vmemmap_populate_basepages(start_page, size, node); + return vmemmap_populate_basepages(start, end, node); } #else /* !CONFIG_ARM64_64K_PAGES */ -int __meminit vmemmap_populate(struct page *start_page, - unsigned long size, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) { - unsigned long addr = (unsigned long)start_page; - unsigned long end = (unsigned long)(start_page + size); + unsigned long addr = start; unsigned long next; pgd_t *pgd; pud_t *pud; @@ -434,7 +431,7 @@ int __meminit vmemmap_populate(struct page *start_page, return 0; } #endif /* CONFIG_ARM64_64K_PAGES */ -void vmemmap_free(struct page *memmap, unsigned long nr_pages) +void vmemmap_free(unsigned long start, unsigned long end) { } #endif /* CONFIG_SPARSEMEM_VMEMMAP */ diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c index 073c3c2fa521..e7b61494c312 100644 --- a/arch/avr32/kernel/process.c +++ b/arch/avr32/kernel/process.c @@ -204,14 +204,6 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) show_stack_log_lvl(tsk, (unsigned long)stack, NULL, ""); } -void dump_stack(void) -{ - unsigned long stack; - - show_trace_log_lvl(current, &stack, NULL, ""); -} -EXPORT_SYMBOL(dump_stack); - static const char *cpu_modes[] = { "Application", "Supervisor", "Interrupt level 0", "Interrupt level 1", "Interrupt level 2", "Interrupt level 3", "Exception", "NMI" @@ -223,6 +215,8 @@ void show_regs_log_lvl(struct pt_regs *regs, const char *log_lvl) unsigned long lr = regs->lr; unsigned long mode = (regs->sr & MODE_MASK) >> MODE_SHIFT; + show_regs_print_info(log_lvl); + if (!user_mode(regs)) { sp = (unsigned long)regs + FRAME_SIZE_FULL; @@ -260,9 +254,6 @@ void show_regs_log_lvl(struct pt_regs *regs, const char *log_lvl) regs->sr & SR_I0M ? '0' : '.', regs->sr & SR_GM ? 'G' : 'g'); printk("%sCPU Mode: %s\n", log_lvl, cpu_modes[mode]); - printk("%sProcess: %s [%d] (task: %p thread: %p)\n", - log_lvl, current->comm, current->pid, current, - task_thread_info(current)); } void show_regs(struct pt_regs *regs) diff --git a/arch/avr32/mm/init.c b/arch/avr32/mm/init.c index 2798c2d4a1cf..e66e8406f992 100644 --- a/arch/avr32/mm/init.c +++ b/arch/avr32/mm/init.c @@ -146,34 +146,14 @@ void __init mem_init(void) initsize >> 10); } -static inline void free_area(unsigned long addr, unsigned long end, char *s) -{ - unsigned int size = (end - addr) >> 10; - - for (; addr < end; addr += PAGE_SIZE) { - struct page *page = virt_to_page(addr); - ClearPageReserved(page); - init_page_count(page); - free_page(addr); - totalram_pages++; - } - - if (size && s) - printk(KERN_INFO "Freeing %s memory: %dK (%lx - %lx)\n", - s, size, end - (size << 10), end); -} - void free_initmem(void) { - free_area((unsigned long)__init_begin, (unsigned long)__init_end, - "init"); + free_initmem_default(0); } #ifdef CONFIG_BLK_DEV_INITRD - void free_initrd_mem(unsigned long start, unsigned long end) { - free_area(start, end, "initrd"); + free_reserved_area(start, end, 0, "initrd"); } - #endif diff --git a/arch/blackfin/kernel/dumpstack.c b/arch/blackfin/kernel/dumpstack.c index 5cfbaa298211..95ba6d9e9a3d 100644 --- a/arch/blackfin/kernel/dumpstack.c +++ b/arch/blackfin/kernel/dumpstack.c @@ -168,6 +168,7 @@ void dump_stack(void) #endif trace_buffer_save(tflags); dump_bfin_trace_buffer(); + dump_stack_print_info(KERN_DEFAULT); show_stack(current, &stack); trace_buffer_restore(tflags); } diff --git a/arch/blackfin/kernel/early_printk.c b/arch/blackfin/kernel/early_printk.c index 84ed8375113c..61fbd2de993d 100644 --- a/arch/blackfin/kernel/early_printk.c +++ b/arch/blackfin/kernel/early_printk.c @@ -25,8 +25,6 @@ extern struct console *bfin_earlyserial_init(unsigned int port, extern struct console *bfin_jc_early_init(void); #endif -static struct console *early_console; - /* Default console */ #define DEFAULT_PORT 0 #define DEFAULT_CFLAG CS8|B57600 diff --git a/arch/blackfin/kernel/trace.c b/arch/blackfin/kernel/trace.c index f7f7a18abca9..c36efa0c7163 100644 --- a/arch/blackfin/kernel/trace.c +++ b/arch/blackfin/kernel/trace.c @@ -853,6 +853,8 @@ void show_regs(struct pt_regs *fp) unsigned char in_atomic = (bfin_read_IPEND() & 0x10) || in_atomic(); pr_notice("\n"); + show_regs_print_info(KERN_NOTICE); + if (CPUID != bfin_cpuid()) pr_notice("Compiled for cpu family 0x%04x (Rev %d), " "but running on:0x%04x (Rev %d)\n", diff --git a/arch/blackfin/mm/init.c b/arch/blackfin/mm/init.c index 9cb85537bd2b..82d01a71207f 100644 --- a/arch/blackfin/mm/init.c +++ b/arch/blackfin/mm/init.c @@ -103,7 +103,7 @@ void __init mem_init(void) max_mapnr = num_physpages = MAP_NR(high_memory); printk(KERN_DEBUG "Kernel managed physical pages: %lu\n", num_physpages); - /* This will put all memory onto the freelists. */ + /* This will put all low memory onto the freelists. */ totalram_pages = free_all_bootmem(); reservedpages = 0; @@ -129,24 +129,11 @@ void __init mem_init(void) initk, codek, datak, DMA_UNCACHED_REGION >> 10, (reservedpages << (PAGE_SHIFT-10))); } -static void __init free_init_pages(const char *what, unsigned long begin, unsigned long end) -{ - unsigned long addr; - /* next to check that the page we free is not a partial page */ - for (addr = begin; addr + PAGE_SIZE <= end; addr += PAGE_SIZE) { - ClearPageReserved(virt_to_page(addr)); - init_page_count(virt_to_page(addr)); - free_page(addr); - totalram_pages++; - } - printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); -} - #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { #ifndef CONFIG_MPU - free_init_pages("initrd memory", start, end); + free_reserved_area(start, end, 0, "initrd"); #endif } #endif @@ -154,10 +141,7 @@ void __init free_initrd_mem(unsigned long start, unsigned long end) void __init_refok free_initmem(void) { #if defined CONFIG_RAMKERNEL && !defined CONFIG_MPU - free_init_pages("unused kernel memory", - (unsigned long)(&__init_begin), - (unsigned long)(&__init_end)); - + free_initmem_default(0); if (memory_start == (unsigned long)(&__init_end)) memory_start = (unsigned long)(&__init_begin); #endif diff --git a/arch/c6x/kernel/traps.c b/arch/c6x/kernel/traps.c index 1be74e5b4788..dcc2c2f6d67c 100644 --- a/arch/c6x/kernel/traps.c +++ b/arch/c6x/kernel/traps.c @@ -31,6 +31,7 @@ void __init trap_init(void) void show_regs(struct pt_regs *regs) { pr_err("\n"); + show_regs_print_info(KERN_ERR); pr_err("PC: %08lx SP: %08lx\n", regs->pc, regs->sp); pr_err("Status: %08lx ORIG_A4: %08lx\n", regs->csr, regs->orig_a4); pr_err("A0: %08lx B0: %08lx\n", regs->a0, regs->b0); @@ -67,15 +68,6 @@ void show_regs(struct pt_regs *regs) pr_err("A31: %08lx B31: %08lx\n", regs->a31, regs->b31); } -void dump_stack(void) -{ - unsigned long stack; - - show_stack(current, &stack); -} -EXPORT_SYMBOL(dump_stack); - - void die(char *str, struct pt_regs *fp, int nr) { console_verbose(); diff --git a/arch/c6x/mm/init.c b/arch/c6x/mm/init.c index 89395f09648a..a9fcd89b251b 100644 --- a/arch/c6x/mm/init.c +++ b/arch/c6x/mm/init.c @@ -77,37 +77,11 @@ void __init mem_init(void) #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { - int pages = 0; - for (; start < end; start += PAGE_SIZE) { - ClearPageReserved(virt_to_page(start)); - init_page_count(virt_to_page(start)); - free_page(start); - totalram_pages++; - pages++; - } - printk(KERN_INFO "Freeing initrd memory: %luk freed\n", - (pages * PAGE_SIZE) >> 10); + free_reserved_area(start, end, 0, "initrd"); } #endif void __init free_initmem(void) { - unsigned long addr; - - /* - * The following code should be cool even if these sections - * are not page aligned. - */ - addr = PAGE_ALIGN((unsigned long)(__init_begin)); - - /* next to check that the page we free is not a partial page */ - for (; addr + PAGE_SIZE < (unsigned long)(__init_end); - addr += PAGE_SIZE) { - ClearPageReserved(virt_to_page(addr)); - init_page_count(virt_to_page(addr)); - free_page(addr); - totalram_pages++; - } - printk(KERN_INFO "Freeing unused kernel memory: %dK freed\n", - (int) ((addr - PAGE_ALIGN((long) &__init_begin)) >> 10)); + free_initmem_default(0); } diff --git a/arch/cris/arch-v10/kernel/process.c b/arch/cris/arch-v10/kernel/process.c index 2ba23c13df68..753e9a03cf87 100644 --- a/arch/cris/arch-v10/kernel/process.c +++ b/arch/cris/arch-v10/kernel/process.c @@ -176,6 +176,9 @@ unsigned long get_wchan(struct task_struct *p) void show_regs(struct pt_regs * regs) { unsigned long usp = rdusp(); + + show_regs_print_info(KERN_DEFAULT); + printk("IRP: %08lx SRP: %08lx DCCR: %08lx USP: %08lx MOF: %08lx\n", regs->irp, regs->srp, regs->dccr, usp, regs->mof ); printk(" r0: %08lx r1: %08lx r2: %08lx r3: %08lx\n", diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c index 57451faa9b20..cebd32e2a8fb 100644 --- a/arch/cris/arch-v32/kernel/process.c +++ b/arch/cris/arch-v32/kernel/process.c @@ -164,6 +164,9 @@ get_wchan(struct task_struct *p) void show_regs(struct pt_regs * regs) { unsigned long usp = rdusp(); + + show_regs_print_info(KERN_DEFAULT); + printk("ERP: %08lx SRP: %08lx CCS: %08lx USP: %08lx MOF: %08lx\n", regs->erp, regs->srp, regs->ccs, usp, regs->mof); diff --git a/arch/cris/kernel/traps.c b/arch/cris/kernel/traps.c index a11ad3229f8c..0ffda73734f5 100644 --- a/arch/cris/kernel/traps.c +++ b/arch/cris/kernel/traps.c @@ -147,13 +147,6 @@ show_stack(void) #endif void -dump_stack(void) -{ - show_stack(NULL, NULL); -} -EXPORT_SYMBOL(dump_stack); - -void set_nmi_handler(void (*handler)(struct pt_regs *)) { nmi_handler = handler; diff --git a/arch/cris/mm/init.c b/arch/cris/mm/init.c index d72ab58fd83e..9ac80946dada 100644 --- a/arch/cris/mm/init.c +++ b/arch/cris/mm/init.c @@ -12,12 +12,10 @@ #include <linux/init.h> #include <linux/bootmem.h> #include <asm/tlb.h> +#include <asm/sections.h> unsigned long empty_zero_page; -extern char _stext, _edata, _etext; /* From linkerscript */ -extern char __init_begin, __init_end; - void __init mem_init(void) { @@ -67,15 +65,5 @@ mem_init(void) void free_initmem(void) { - unsigned long addr; - - addr = (unsigned long)(&__init_begin); - for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { - ClearPageReserved(virt_to_page(addr)); - init_page_count(virt_to_page(addr)); - free_page(addr); - totalram_pages++; - } - printk (KERN_INFO "Freeing unused kernel memory: %luk freed\n", - (unsigned long)((&__init_end - &__init_begin) >> 10)); + free_initmem_default(0); } diff --git a/arch/frv/kernel/traps.c b/arch/frv/kernel/traps.c index 5cfd1420b091..4bff48c19d29 100644 --- a/arch/frv/kernel/traps.c +++ b/arch/frv/kernel/traps.c @@ -466,17 +466,6 @@ asmlinkage void compound_exception(unsigned long esfr1, BUG(); } /* end compound_exception() */ -/*****************************************************************************/ -/* - * The architecture-independent backtrace generator - */ -void dump_stack(void) -{ - show_stack(NULL, NULL); -} - -EXPORT_SYMBOL(dump_stack); - void show_stack(struct task_struct *task, unsigned long *sp) { } @@ -508,6 +497,7 @@ void show_regs(struct pt_regs *regs) int loop; printk("\n"); + show_regs_print_info(KERN_DEFAULT); printk("Frame: @%08lx [%s]\n", (unsigned long) regs, @@ -522,8 +512,6 @@ void show_regs(struct pt_regs *regs) else printk(" | "); } - - printk("Process %s (pid: %d)\n", current->comm, current->pid); } void die_if_kernel(const char *str, ...) diff --git a/arch/frv/mm/init.c b/arch/frv/mm/init.c index 92e97b0894a6..dee354fa6b64 100644 --- a/arch/frv/mm/init.c +++ b/arch/frv/mm/init.c @@ -122,7 +122,7 @@ void __init mem_init(void) #endif int codek = 0, datak = 0; - /* this will put all memory onto the freelists */ + /* this will put all low memory onto the freelists */ totalram_pages = free_all_bootmem(); #ifdef CONFIG_MMU @@ -131,14 +131,8 @@ void __init mem_init(void) datapages++; #ifdef CONFIG_HIGHMEM - for (pfn = num_physpages - 1; pfn >= num_mappedpages; pfn--) { - struct page *page = &mem_map[pfn]; - - ClearPageReserved(page); - init_page_count(page); - __free_page(page); - totalram_pages++; - } + for (pfn = num_physpages - 1; pfn >= num_mappedpages; pfn--) + free_highmem_page(&mem_map[pfn]); #endif codek = ((unsigned long) &_etext - (unsigned long) &_stext) >> 10; @@ -168,21 +162,7 @@ void __init mem_init(void) void free_initmem(void) { #if defined(CONFIG_RAMKERNEL) && !defined(CONFIG_PROTECT_KERNEL) - unsigned long start, end, addr; - - start = PAGE_ALIGN((unsigned long) &__init_begin); /* round up */ - end = ((unsigned long) &__init_end) & PAGE_MASK; /* round down */ - - /* next to check that the page we free is not a partial page */ - for (addr = start; addr < end; addr += PAGE_SIZE) { - ClearPageReserved(virt_to_page(addr)); - init_page_count(virt_to_page(addr)); - free_page(addr); - totalram_pages++; - } - - printk("Freeing unused kernel memory: %ldKiB freed (0x%lx - 0x%lx)\n", - (end - start) >> 10, start, end); + free_initmem_default(0); #endif } /* end free_initmem() */ @@ -193,14 +173,6 @@ void free_initmem(void) #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { - int pages = 0; - for (; start < end; start += PAGE_SIZE) { - ClearPageReserved(virt_to_page(start)); - init_page_count(virt_to_page(start)); - free_page(start); - totalram_pages++; - pages++; - } - printk("Freeing initrd memory: %dKiB freed\n", (pages * PAGE_SIZE) >> 10); + free_reserved_area(start, end, 0, "initrd"); } /* end free_initrd_mem() */ #endif diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c index a17d2cd463d2..1a744ab7e7e5 100644 --- a/arch/h8300/kernel/process.c +++ b/arch/h8300/kernel/process.c @@ -83,6 +83,8 @@ void machine_power_off(void) void show_regs(struct pt_regs * regs) { + show_regs_print_info(KERN_DEFAULT); + printk("\nPC: %08lx Status: %02x", regs->pc, regs->ccr); printk("\nORIG_ER0: %08lx ER0: %08lx ER1: %08lx", diff --git a/arch/h8300/kernel/traps.c b/arch/h8300/kernel/traps.c index 7833aa3e7c7d..cfe494dbe3da 100644 --- a/arch/h8300/kernel/traps.c +++ b/arch/h8300/kernel/traps.c @@ -164,10 +164,3 @@ void show_trace_task(struct task_struct *tsk) { show_stack(tsk,(unsigned long *)tsk->thread.esp0); } - -void dump_stack(void) -{ - show_stack(NULL,NULL); -} - -EXPORT_SYMBOL(dump_stack); diff --git a/arch/h8300/mm/init.c b/arch/h8300/mm/init.c index 981e25094b1a..ff349d70a29b 100644 --- a/arch/h8300/mm/init.c +++ b/arch/h8300/mm/init.c @@ -139,7 +139,7 @@ void __init mem_init(void) start_mem = PAGE_ALIGN(start_mem); max_mapnr = num_physpages = MAP_NR(high_memory); - /* this will put all memory onto the freelists */ + /* this will put all low memory onto the freelists */ totalram_pages = free_all_bootmem(); codek = (_etext - _stext) >> 10; @@ -161,15 +161,7 @@ void __init mem_init(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - int pages = 0; - for (; start < end; start += PAGE_SIZE) { - ClearPageReserved(virt_to_page(start)); - init_page_count(virt_to_page(start)); - free_page(start); - totalram_pages++; - pages++; - } - printk ("Freeing initrd memory: %dk freed\n", pages); + free_reserved_area(start, end, 0, "initrd"); } #endif @@ -177,23 +169,7 @@ void free_initmem(void) { #ifdef CONFIG_RAMKERNEL - unsigned long addr; -/* - * the following code should be cool even if these sections - * are not page aligned. - */ - addr = PAGE_ALIGN((unsigned long)(__init_begin)); - /* next to check that the page we free is not a partial page */ - for (; addr + PAGE_SIZE < (unsigned long)__init_end; addr +=PAGE_SIZE) { - ClearPageReserved(virt_to_page(addr)); - init_page_count(virt_to_page(addr)); - free_page(addr); - totalram_pages++; - } - printk(KERN_INFO "Freeing unused kernel memory: %ldk freed (0x%x - 0x%x)\n", - (addr - PAGE_ALIGN((long) __init_begin)) >> 10, - (int)(PAGE_ALIGN((unsigned long)__init_begin)), - (int)(addr - PAGE_SIZE)); + free_initmem_default(0); #endif } diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c index aaf53f883710..7858663352b9 100644 --- a/arch/hexagon/kernel/traps.c +++ b/arch/hexagon/kernel/traps.c @@ -195,14 +195,6 @@ void show_stack(struct task_struct *task, unsigned long *fp) do_show_stack(task, fp, 0); } -void dump_stack(void) -{ - unsigned long *fp; - asm("%0 = r30" : "=r" (fp)); - show_stack(current, fp); -} -EXPORT_SYMBOL(dump_stack); - int die(const char *str, struct pt_regs *regs, long err) { static struct { diff --git a/arch/hexagon/kernel/vm_events.c b/arch/hexagon/kernel/vm_events.c index 3e4453091889..741aaa917cda 100644 --- a/arch/hexagon/kernel/vm_events.c +++ b/arch/hexagon/kernel/vm_events.c @@ -33,6 +33,8 @@ */ void show_regs(struct pt_regs *regs) { + show_regs_print_info(KERN_EMERG); + printk(KERN_EMERG "restart_r0: \t0x%08lx syscall_nr: %ld\n", regs->restart_r0, regs->syscall_nr); printk(KERN_EMERG "preds: \t\t0x%08lx\n", regs->preds); diff --git a/arch/ia64/include/asm/hugetlb.h b/arch/ia64/include/asm/hugetlb.h index 94eaa5bd5d0c..aa910054b8e7 100644 --- a/arch/ia64/include/asm/hugetlb.h +++ b/arch/ia64/include/asm/hugetlb.h @@ -2,6 +2,7 @@ #define _ASM_IA64_HUGETLB_H #include <asm/page.h> +#include <asm-generic/hugetlb.h> void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index a26fc640e4ce..55d4ba47a907 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -96,21 +96,13 @@ show_stack (struct task_struct *task, unsigned long *sp) } void -dump_stack (void) -{ - show_stack(NULL, NULL); -} - -EXPORT_SYMBOL(dump_stack); - -void show_regs (struct pt_regs *regs) { unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri; print_modules(); - printk("\nPid: %d, CPU %d, comm: %20s\n", task_pid_nr(current), - smp_processor_id(), current->comm); + printk("\n"); + show_regs_print_info(KERN_DEFAULT); printk("psr : %016lx ifs : %016lx ip : [<%016lx>] %s (%s)\n", regs->cr_ipsr, regs->cr_ifs, ip, print_tainted(), init_utsname()->release); diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 2029cc0d2fc6..13bfdd22afc8 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -1063,6 +1063,7 @@ check_bugs (void) static int __init run_dmi_scan(void) { dmi_scan_machine(); + dmi_set_dump_stack_arch_desc(); return 0; } core_initcall(run_dmi_scan); diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index 80dab509dfb0..67c59ebec899 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c @@ -47,6 +47,8 @@ void show_mem(unsigned int filter) printk(KERN_INFO "Mem-info:\n"); show_free_areas(filter); printk(KERN_INFO "Node memory in pages:\n"); + if (filter & SHOW_MEM_FILTER_PAGE_COUNT) + return; for_each_online_pgdat(pgdat) { unsigned long present; unsigned long flags; diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index c2e955ee79a8..ae4db4bd6d97 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -623,6 +623,8 @@ void show_mem(unsigned int filter) printk(KERN_INFO "Mem-info:\n"); show_free_areas(filter); + if (filter & SHOW_MEM_FILTER_PAGE_COUNT) + return; printk(KERN_INFO "Node memory in pages:\n"); for_each_online_pgdat(pgdat) { unsigned long present; @@ -817,13 +819,12 @@ void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat) #endif #ifdef CONFIG_SPARSEMEM_VMEMMAP -int __meminit vmemmap_populate(struct page *start_page, - unsigned long size, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) { - return vmemmap_populate_basepages(start_page, size, node); + return vmemmap_populate_basepages(start, end, node); } -void vmemmap_free(struct page *memmap, unsigned long nr_pages) +void vmemmap_free(unsigned long start, unsigned long end) { } #endif diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 20bc967c7209..d1fe4b402601 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -154,25 +154,14 @@ ia64_init_addr_space (void) void free_initmem (void) { - unsigned long addr, eaddr; - - addr = (unsigned long) ia64_imva(__init_begin); - eaddr = (unsigned long) ia64_imva(__init_end); - while (addr < eaddr) { - ClearPageReserved(virt_to_page(addr)); - init_page_count(virt_to_page(addr)); - free_page(addr); - ++totalram_pages; - addr += PAGE_SIZE; - } - printk(KERN_INFO "Freeing unused kernel memory: %ldkB freed\n", - (__init_end - __init_begin) >> 10); + free_reserved_area((unsigned long)ia64_imva(__init_begin), + (unsigned long)ia64_imva(__init_end), + 0, "unused kernel"); } void __init free_initrd_mem (unsigned long start, unsigned long end) { - struct page *page; /* * EFI uses 4KB pages while the kernel can use 4KB or bigger. * Thus EFI and the kernel may have different page sizes. It is @@ -213,11 +202,7 @@ free_initrd_mem (unsigned long start, unsigned long end) for (; start < end; start += PAGE_SIZE) { if (!virt_addr_valid(start)) continue; - page = virt_to_page(start); - ClearPageReserved(page); - init_page_count(page); - free_page(start); - ++totalram_pages; + free_reserved_page(virt_to_page(start)); } } diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c index def782e31aac..4248492b9321 100644 --- a/arch/ia64/mm/numa.c +++ b/arch/ia64/mm/numa.c @@ -61,13 +61,26 @@ paddr_to_nid(unsigned long paddr) int __meminit __early_pfn_to_nid(unsigned long pfn) { int i, section = pfn >> PFN_SECTION_SHIFT, ssec, esec; + /* + * NOTE: The following SMP-unsafe globals are only used early in boot + * when the kernel is running single-threaded. + */ + static int __meminitdata last_ssec, last_esec; + static int __meminitdata last_nid; + + if (section >= last_ssec && section < last_esec) + return last_nid; for (i = 0; i < num_node_memblks; i++) { ssec = node_memblk[i].start_paddr >> PA_SECTION_SHIFT; esec = (node_memblk[i].start_paddr + node_memblk[i].size + ((1L << PA_SECTION_SHIFT) - 1)) >> PA_SECTION_SHIFT; - if (section >= ssec && section < esec) + if (section >= ssec && section < esec) { + last_ssec = ssec; + last_esec = esec; + last_nid = node_memblk[i].nid; return node_memblk[i].nid; + } } return -1; diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c index e2d049018c3b..e69221d581d5 100644 --- a/arch/m32r/kernel/process.c +++ b/arch/m32r/kernel/process.c @@ -73,6 +73,8 @@ void machine_power_off(void) void show_regs(struct pt_regs * regs) { printk("\n"); + show_regs_print_info(KERN_DEFAULT); + printk("BPC[%08lx]:PSW[%08lx]:LR [%08lx]:FP [%08lx]\n", \ regs->bpc, regs->psw, regs->lr, regs->fp); printk("BBPC[%08lx]:BBPSW[%08lx]:SPU[%08lx]:SPI[%08lx]\n", \ diff --git a/arch/m32r/kernel/traps.c b/arch/m32r/kernel/traps.c index 3bcb207e5b6d..5623ea34b4fd 100644 --- a/arch/m32r/kernel/traps.c +++ b/arch/m32r/kernel/traps.c @@ -169,15 +169,6 @@ void show_stack(struct task_struct *task, unsigned long *sp) show_trace(task, sp); } -void dump_stack(void) -{ - unsigned long stack; - - show_trace(current, &stack); -} - -EXPORT_SYMBOL(dump_stack); - static void show_registers(struct pt_regs *regs) { int i = 0; diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c index 78b660e903da..ab4cbce91a9b 100644 --- a/arch/m32r/mm/init.c +++ b/arch/m32r/mm/init.c @@ -28,10 +28,7 @@ #include <asm/mmu_context.h> #include <asm/setup.h> #include <asm/tlb.h> - -/* References to section boundaries */ -extern char _text, _etext, _edata; -extern char __init_begin, __init_end; +#include <asm/sections.h> pgd_t swapper_pg_dir[1024]; @@ -184,17 +181,7 @@ void __init mem_init(void) *======================================================================*/ void free_initmem(void) { - unsigned long addr; - - addr = (unsigned long)(&__init_begin); - for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { - ClearPageReserved(virt_to_page(addr)); - init_page_count(virt_to_page(addr)); - free_page(addr); - totalram_pages++; - } - printk (KERN_INFO "Freeing unused kernel memory: %dk freed\n", \ - (int)(&__init_end - &__init_begin) >> 10); + free_initmem_default(0); } #ifdef CONFIG_BLK_DEV_INITRD @@ -204,13 +191,6 @@ void free_initmem(void) *======================================================================*/ void free_initrd_mem(unsigned long start, unsigned long end) { - unsigned long p; - for (p = start; p < end; p += PAGE_SIZE) { - ClearPageReserved(virt_to_page(p)); - init_page_count(virt_to_page(p)); - free_page(p); - totalram_pages++; - } - printk (KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10); + free_reserved_area(start, end, 0, "initrd"); } #endif diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c index f32ab22e7ed3..88fcd8c70e7b 100644 --- a/arch/m68k/kernel/traps.c +++ b/arch/m68k/kernel/traps.c @@ -992,18 +992,6 @@ void show_stack(struct task_struct *task, unsigned long *stack) } /* - * The architecture-independent backtrace generator - */ -void dump_stack(void) -{ - unsigned long stack; - - show_trace(&stack); -} - -EXPORT_SYMBOL(dump_stack); - -/* * The vector number returned in the frame pointer may also contain * the "fs" (Fault Status) bits on ColdFire. These are in the bottom * 2 bits, and upper 2 bits. So we need to mask out the real vector diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c index 519aad8fa812..1af2ca3411f6 100644 --- a/arch/m68k/mm/init.c +++ b/arch/m68k/mm/init.c @@ -110,18 +110,7 @@ void __init paging_init(void) void free_initmem(void) { #ifndef CONFIG_MMU_SUN3 - unsigned long addr; - - addr = (unsigned long) __init_begin; - for (; addr < ((unsigned long) __init_end); addr += PAGE_SIZE) { - ClearPageReserved(virt_to_page(addr)); - init_page_count(virt_to_page(addr)); - free_page(addr); - totalram_pages++; - } - pr_notice("Freeing unused kernel memory: %luk freed (0x%x - 0x%x)\n", - (addr - (unsigned long) __init_begin) >> 10, - (unsigned int) __init_begin, (unsigned int) __init_end); + free_initmem_default(0); #endif /* CONFIG_MMU_SUN3 */ } @@ -213,15 +202,6 @@ void __init mem_init(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - int pages = 0; - for (; start < end; start += PAGE_SIZE) { - ClearPageReserved(virt_to_page(start)); - init_page_count(virt_to_page(start)); - free_page(start); - totalram_pages++; - pages++; - } - pr_notice("Freeing initrd memory: %dk freed\n", - pages << (PAGE_SHIFT - 10)); + free_reserved_area(start, end, 0, "initrd"); } #endif diff --git a/arch/metag/kernel/process.c b/arch/metag/kernel/process.c index dc5923544560..483dff986a23 100644 --- a/arch/metag/kernel/process.c +++ b/arch/metag/kernel/process.c @@ -129,6 +129,8 @@ void show_regs(struct pt_regs *regs) "D1.7 " }; + show_regs_print_info(KERN_INFO); + pr_info(" pt_regs @ %p\n", regs); pr_info(" SaveMask = 0x%04hx\n", regs->ctx.SaveMask); pr_info(" Flags = 0x%04hx (%c%c%c%c)\n", regs->ctx.Flags, diff --git a/arch/metag/kernel/traps.c b/arch/metag/kernel/traps.c index 8961f247b500..2ceeaae5b199 100644 --- a/arch/metag/kernel/traps.c +++ b/arch/metag/kernel/traps.c @@ -987,9 +987,3 @@ void show_stack(struct task_struct *tsk, unsigned long *sp) show_trace(tsk, sp, NULL); } - -void dump_stack(void) -{ - show_stack(NULL, NULL); -} -EXPORT_SYMBOL(dump_stack); diff --git a/arch/metag/mm/init.c b/arch/metag/mm/init.c index 504a398d5f8b..d05b8455c44c 100644 --- a/arch/metag/mm/init.c +++ b/arch/metag/mm/init.c @@ -380,14 +380,8 @@ void __init mem_init(void) #ifdef CONFIG_HIGHMEM unsigned long tmp; - for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) { - struct page *page = pfn_to_page(tmp); - ClearPageReserved(page); - init_page_count(page); - __free_page(page); - totalhigh_pages++; - } - totalram_pages += totalhigh_pages; + for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) + free_highmem_page(pfn_to_page(tmp)); num_physpages += totalhigh_pages; #endif /* CONFIG_HIGHMEM */ @@ -412,32 +406,15 @@ void __init mem_init(void) return; } -static void free_init_pages(char *what, unsigned long begin, unsigned long end) -{ - unsigned long addr; - - for (addr = begin; addr < end; addr += PAGE_SIZE) { - ClearPageReserved(virt_to_page(addr)); - init_page_count(virt_to_page(addr)); - memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); - free_page(addr); - totalram_pages++; - } - pr_info("Freeing %s: %luk freed\n", what, (end - begin) >> 10); -} - void free_initmem(void) { - free_init_pages("unused kernel memory", - (unsigned long)(&__init_begin), - (unsigned long)(&__init_end)); + free_initmem_default(POISON_FREE_INITMEM); } #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - end = end & PAGE_MASK; - free_init_pages("initrd memory", start, end); + free_reserved_area(start, end, POISON_FREE_INITMEM, "initrd"); } #endif diff --git a/arch/microblaze/include/asm/setup.h b/arch/microblaze/include/asm/setup.h index 0e0b0a5ec756..f05df5630c84 100644 --- a/arch/microblaze/include/asm/setup.h +++ b/arch/microblaze/include/asm/setup.h @@ -46,7 +46,6 @@ void machine_shutdown(void); void machine_halt(void); void machine_power_off(void); -void free_init_pages(char *what, unsigned long begin, unsigned long end); extern void *alloc_maybe_bootmem(size_t size, gfp_t mask); extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask); diff --git a/arch/microblaze/kernel/early_printk.c b/arch/microblaze/kernel/early_printk.c index 60dcacc68038..365f2d53f1b2 100644 --- a/arch/microblaze/kernel/early_printk.c +++ b/arch/microblaze/kernel/early_printk.c @@ -21,7 +21,6 @@ #include <asm/setup.h> #include <asm/prom.h> -static u32 early_console_initialized; static u32 base_addr; #ifdef CONFIG_SERIAL_UARTLITE_CONSOLE @@ -109,27 +108,11 @@ static struct console early_serial_uart16550_console = { }; #endif /* CONFIG_SERIAL_8250_CONSOLE */ -static struct console *early_console; - -void early_printk(const char *fmt, ...) -{ - char buf[512]; - int n; - va_list ap; - - if (early_console_initialized) { - va_start(ap, fmt); - n = vscnprintf(buf, 512, fmt, ap); - early_console->write(early_console, buf, n); - va_end(ap); - } -} - int __init setup_early_printk(char *opt) { int version = 0; - if (early_console_initialized) + if (early_console) return 1; base_addr = of_early_console(&version); @@ -159,7 +142,6 @@ int __init setup_early_printk(char *opt) } register_console(early_console); - early_console_initialized = 1; return 0; } return 1; @@ -169,7 +151,7 @@ int __init setup_early_printk(char *opt) * only for early console because of performance degression */ void __init remap_early_printk(void) { - if (!early_console_initialized || !early_console) + if (!early_console) return; pr_info("early_printk_console remapping from 0x%x to ", base_addr); base_addr = (u32) ioremap(base_addr, PAGE_SIZE); @@ -194,9 +176,9 @@ void __init remap_early_printk(void) void __init disable_early_printk(void) { - if (!early_console_initialized || !early_console) + if (!early_console) return; pr_warn("disabling early console\n"); unregister_console(early_console); - early_console_initialized = 0; + early_console = NULL; } diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c index 7cce2e9c1719..a55893807274 100644 --- a/arch/microblaze/kernel/process.c +++ b/arch/microblaze/kernel/process.c @@ -20,6 +20,8 @@ void show_regs(struct pt_regs *regs) { + show_regs_print_info(KERN_INFO); + pr_info(" Registers dump: mode=%X\r\n", regs->pt_mode); pr_info(" r1=%08lX, r2=%08lX, r3=%08lX, r4=%08lX\n", regs->r1, regs->r2, regs->r3, regs->r4); diff --git a/arch/microblaze/kernel/traps.c b/arch/microblaze/kernel/traps.c index 30e6b5004a6a..cb619533a192 100644 --- a/arch/microblaze/kernel/traps.c +++ b/arch/microblaze/kernel/traps.c @@ -75,9 +75,3 @@ void show_stack(struct task_struct *task, unsigned long *sp) debug_show_held_locks(task); } - -void dump_stack(void) -{ - show_stack(NULL, NULL); -} -EXPORT_SYMBOL(dump_stack); diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index 8f8b367c079e..4ec137d13ad7 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -82,13 +82,9 @@ static unsigned long highmem_setup(void) /* FIXME not sure about */ if (memblock_is_reserved(pfn << PAGE_SHIFT)) continue; - ClearPageReserved(page); - init_page_count(page); - __free_page(page); - totalhigh_pages++; + free_highmem_page(page); reservedpages++; } - totalram_pages += totalhigh_pages; pr_info("High memory: %luk\n", totalhigh_pages << (PAGE_SHIFT-10)); @@ -236,40 +232,16 @@ void __init setup_memory(void) paging_init(); } -void free_init_pages(char *what, unsigned long begin, unsigned long end) -{ - unsigned long addr; - - for (addr = begin; addr < end; addr += PAGE_SIZE) { - ClearPageReserved(virt_to_page(addr)); - init_page_count(virt_to_page(addr)); - free_page(addr); - totalram_pages++; - } - pr_info("Freeing %s: %ldk freed\n", what, (end - begin) >> 10); -} - #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - int pages = 0; - for (; start < end; start += PAGE_SIZE) { - ClearPageReserved(virt_to_page(start)); - init_page_count(virt_to_page(start)); - free_page(start); - totalram_pages++; - pages++; - } - pr_notice("Freeing initrd memory: %dk freed\n", - (int)(pages * (PAGE_SIZE / 1024))); + free_reserved_area(start, end, 0, "initrd"); } #endif void free_initmem(void) { - free_init_pages("unused kernel memory", - (unsigned long)(&__init_begin), - (unsigned long)(&__init_end)); + free_initmem_default(0); } void __init mem_init(void) diff --git a/arch/mips/include/asm/hugetlb.h b/arch/mips/include/asm/hugetlb.h index ef99db994c2f..fe0d15d32660 100644 --- a/arch/mips/include/asm/hugetlb.h +++ b/arch/mips/include/asm/hugetlb.h @@ -10,6 +10,7 @@ #define __ASM_HUGETLB_H #include <asm/page.h> +#include <asm-generic/hugetlb.h> static inline int is_hugepage_only_range(struct mm_struct *mm, diff --git a/arch/mips/kernel/early_printk.c b/arch/mips/kernel/early_printk.c index 9e6440eaa455..505cb77d1280 100644 --- a/arch/mips/kernel/early_printk.c +++ b/arch/mips/kernel/early_printk.c @@ -7,7 +7,9 @@ * Copyright (C) 2007 MIPS Technologies, Inc. * written by Ralf Baechle (ralf@linux-mips.org) */ +#include <linux/kernel.h> #include <linux/console.h> +#include <linux/printk.h> #include <linux/init.h> #include <asm/setup.h> @@ -24,20 +26,18 @@ static void early_console_write(struct console *con, const char *s, unsigned n) } } -static struct console early_console = { +static struct console early_console_prom = { .name = "early", .write = early_console_write, .flags = CON_PRINTBUFFER | CON_BOOT, .index = -1 }; -static int early_console_initialized __initdata; - void __init setup_early_printk(void) { - if (early_console_initialized) + if (early_console) return; - early_console_initialized = 1; + early_console = &early_console_prom; - register_console(&early_console); + register_console(&early_console_prom); } diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 7a99e60dadbd..e5e97a0eb2c1 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -206,19 +206,6 @@ void show_stack(struct task_struct *task, unsigned long *sp) show_stacktrace(task, ®s); } -/* - * The architecture-independent dump_stack generator - */ -void dump_stack(void) -{ - struct pt_regs regs; - - prepare_frametrace(®s); - show_backtrace(current, ®s); -} - -EXPORT_SYMBOL(dump_stack); - static void show_code(unsigned int __user *pc) { long i; @@ -244,7 +231,7 @@ static void __show_regs(const struct pt_regs *regs) unsigned int cause = regs->cp0_cause; int i; - printk("Cpu %d\n", smp_processor_id()); + show_regs_print_info(KERN_DEFAULT); /* * Saved main processor registers diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 67929251286c..3d0346dbccf4 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -77,10 +77,9 @@ EXPORT_SYMBOL_GPL(empty_zero_page); /* * Not static inline because used by IP27 special magic initialization code */ -unsigned long setup_zero_pages(void) +void setup_zero_pages(void) { - unsigned int order; - unsigned long size; + unsigned int order, i; struct page *page; if (cpu_has_vce) @@ -94,15 +93,10 @@ unsigned long setup_zero_pages(void) page = virt_to_page((void *)empty_zero_page); split_page(page, order); - while (page < virt_to_page((void *)(empty_zero_page + (PAGE_SIZE << order)))) { - SetPageReserved(page); - page++; - } - - size = PAGE_SIZE << order; - zero_page_mask = (size - 1) & PAGE_MASK; + for (i = 0; i < (1 << order); i++, page++) + mark_page_reserved(page); - return 1UL << order; + zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK; } #ifdef CONFIG_MIPS_MT_SMTC @@ -380,7 +374,7 @@ void __init mem_init(void) high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); totalram_pages += free_all_bootmem(); - totalram_pages -= setup_zero_pages(); /* Setup zeroed pages. */ + setup_zero_pages(); /* Setup zeroed pages. */ reservedpages = ram = 0; for (tmp = 0; tmp < max_low_pfn; tmp++) @@ -399,12 +393,8 @@ void __init mem_init(void) SetPageReserved(page); continue; } - ClearPageReserved(page); - init_page_count(page); - __free_page(page); - totalhigh_pages++; + free_highmem_page(page); } - totalram_pages += totalhigh_pages; num_physpages += totalhigh_pages; #endif @@ -440,11 +430,8 @@ void free_init_pages(const char *what, unsigned long begin, unsigned long end) struct page *page = pfn_to_page(pfn); void *addr = phys_to_virt(PFN_PHYS(pfn)); - ClearPageReserved(page); - init_page_count(page); memset(addr, POISON_FREE_INITMEM, PAGE_SIZE); - __free_page(page); - totalram_pages++; + free_reserved_page(page); } printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); } @@ -452,18 +439,14 @@ void free_init_pages(const char *what, unsigned long begin, unsigned long end) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_init_pages("initrd memory", - virt_to_phys((void *)start), - virt_to_phys((void *)end)); + free_reserved_area(start, end, POISON_FREE_INITMEM, "initrd"); } #endif void __init_refok free_initmem(void) { prom_free_prom_memory(); - free_init_pages("unused kernel memory", - __pa_symbol(&__init_begin), - __pa_symbol(&__init_end)); + free_initmem_default(POISON_FREE_INITMEM); } #ifndef CONFIG_MIPS_PGD_C0_CONTEXT diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c index 7e5fe2790d8a..f1baadd56e82 100644 --- a/arch/mips/mm/mmap.c +++ b/arch/mips/mm/mmap.c @@ -158,11 +158,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm) if (mmap_is_legacy()) { mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; mm->get_unmapped_area = arch_get_unmapped_area; - mm->unmap_area = arch_unmap_area; } else { mm->mmap_base = mmap_base(random_factor); mm->get_unmapped_area = arch_get_unmapped_area_topdown; - mm->unmap_area = arch_unmap_area_topdown; } } diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index 3505d08ff2fd..5f2bddb1860e 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -457,7 +457,7 @@ void __init prom_free_prom_memory(void) /* We got nothing to free here ... */ } -extern unsigned long setup_zero_pages(void); +extern void setup_zero_pages(void); void __init paging_init(void) { @@ -492,7 +492,7 @@ void __init mem_init(void) totalram_pages += free_all_bootmem_node(NODE_DATA(node)); } - totalram_pages -= setup_zero_pages(); /* This comes from node 0 */ + setup_zero_pages(); /* This comes from node 0 */ codesize = (unsigned long) &_etext - (unsigned long) &_text; datasize = (unsigned long) &_edata - (unsigned long) &_etext; diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c index 2da39fb8b3b2..3707da583d05 100644 --- a/arch/mn10300/kernel/process.c +++ b/arch/mn10300/kernel/process.c @@ -97,6 +97,7 @@ void machine_power_off(void) void show_regs(struct pt_regs *regs) { + show_regs_print_info(KERN_DEFAULT); } /* diff --git a/arch/mn10300/kernel/traps.c b/arch/mn10300/kernel/traps.c index b900e5afa0ae..a7a987c7954f 100644 --- a/arch/mn10300/kernel/traps.c +++ b/arch/mn10300/kernel/traps.c @@ -294,17 +294,6 @@ void show_stack(struct task_struct *task, unsigned long *sp) } /* - * the architecture-independent dump_stack generator - */ -void dump_stack(void) -{ - unsigned long stack; - - show_stack(current, &stack); -} -EXPORT_SYMBOL(dump_stack); - -/* * dump the register file in the specified exception frame */ void show_registers_only(struct pt_regs *regs) diff --git a/arch/mn10300/mm/init.c b/arch/mn10300/mm/init.c index e57e5bc23562..5a8ace63a6b4 100644 --- a/arch/mn10300/mm/init.c +++ b/arch/mn10300/mm/init.c @@ -139,30 +139,11 @@ void __init mem_init(void) } /* - * - */ -void free_init_pages(char *what, unsigned long begin, unsigned long end) -{ - unsigned long addr; - - for (addr = begin; addr < end; addr += PAGE_SIZE) { - ClearPageReserved(virt_to_page(addr)); - init_page_count(virt_to_page(addr)); - memset((void *) addr, 0xcc, PAGE_SIZE); - free_page(addr); - totalram_pages++; - } - printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); -} - -/* * recycle memory containing stuff only required for initialisation */ void free_initmem(void) { - free_init_pages("unused kernel memory", - (unsigned long) &__init_begin, - (unsigned long) &__init_end); + free_initmem_default(POISON_FREE_INITMEM); } /* @@ -171,6 +152,6 @@ void free_initmem(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_init_pages("initrd memory", start, end); + free_reserved_area(start, end, POISON_FREE_INITMEM, "initrd"); } #endif diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c index 00c233bf0d06..386af258591d 100644 --- a/arch/openrisc/kernel/process.c +++ b/arch/openrisc/kernel/process.c @@ -90,6 +90,7 @@ void show_regs(struct pt_regs *regs) { extern void show_registers(struct pt_regs *regs); + show_regs_print_info(KERN_DEFAULT); /* __PHX__ cleanup this mess */ show_registers(regs); } diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c index 5cce396016d0..3d3f6062f49c 100644 --- a/arch/openrisc/kernel/traps.c +++ b/arch/openrisc/kernel/traps.c @@ -105,17 +105,6 @@ void show_trace_task(struct task_struct *tsk) */ } -/* - * The architecture-independent backtrace generator - */ -void dump_stack(void) -{ - unsigned long stack; - - show_stack(current, &stack); -} -EXPORT_SYMBOL(dump_stack); - void show_registers(struct pt_regs *regs) { int i; diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c index e7fdc50c4bf0..b3cbc6703837 100644 --- a/arch/openrisc/mm/init.c +++ b/arch/openrisc/mm/init.c @@ -43,6 +43,7 @@ #include <asm/kmap_types.h> #include <asm/fixmap.h> #include <asm/tlbflush.h> +#include <asm/sections.h> int mem_init_done; @@ -201,9 +202,6 @@ void __init paging_init(void) /* References to section boundaries */ -extern char _stext, _etext, _edata, __bss_start, _end; -extern char __init_begin, __init_end; - static int __init free_pages_init(void) { int reservedpages, pfn; @@ -263,30 +261,11 @@ void __init mem_init(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - printk(KERN_INFO "Freeing initrd memory: %ldk freed\n", - (end - start) >> 10); - - for (; start < end; start += PAGE_SIZE) { - ClearPageReserved(virt_to_page(start)); - init_page_count(virt_to_page(start)); - free_page(start); - totalram_pages++; - } + free_reserved_area(start, end, 0, "initrd"); } #endif void free_initmem(void) { - unsigned long addr; - - addr = (unsigned long)(&__init_begin); - for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { - ClearPageReserved(virt_to_page(addr)); - init_page_count(virt_to_page(addr)); - free_page(addr); - totalram_pages++; - } - printk(KERN_INFO "Freeing unused kernel memory: %luk freed\n", - ((unsigned long)&__init_end - - (unsigned long)&__init_begin) >> 10); + free_initmem_default(0); } diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 0821e702d03f..b51c082c5cc6 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -1,5 +1,6 @@ config PARISC def_bool y + select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS select HAVE_IDE select HAVE_OPROFILE select HAVE_FUNCTION_TRACER if 64BIT diff --git a/arch/parisc/Kconfig.debug b/arch/parisc/Kconfig.debug index 7305ac8f7f5b..bc989e522a04 100644 --- a/arch/parisc/Kconfig.debug +++ b/arch/parisc/Kconfig.debug @@ -12,18 +12,4 @@ config DEBUG_RODATA portion of the kernel code won't be covered by a TLB anymore. If in doubt, say "N". -config DEBUG_STRICT_USER_COPY_CHECKS - bool "Strict copy size checks" - depends on DEBUG_KERNEL && !TRACE_BRANCH_PROFILING - ---help--- - Enabling this option turns a certain set of sanity checks for user - copy operations into compile time failures. - - The copy_from_user() etc checks are there to help test if there - are sufficient security checks on the length argument of - the copy operation, by having gcc prove that the argument is - within bounds. - - If unsure, or if you run an older (pre 4.4) gcc, say N. - endmenu diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index aeb8f8f2c07a..f702bff0bed9 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -126,6 +126,8 @@ void show_regs(struct pt_regs *regs) user = user_mode(regs); level = user ? KERN_DEBUG : KERN_CRIT; + show_regs_print_info(level); + print_gr(level, regs); for (i = 0; i < 8; i += 4) @@ -158,14 +160,6 @@ void show_regs(struct pt_regs *regs) } } - -void dump_stack(void) -{ - show_stack(NULL, NULL); -} - -EXPORT_SYMBOL(dump_stack); - static void do_show_stack(struct unwind_frame_info *info) { int i = 1; diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 3ac462de53a4..157b931e7b09 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -505,7 +505,6 @@ static void __init map_pages(unsigned long start_vaddr, void free_initmem(void) { - unsigned long addr; unsigned long init_begin = (unsigned long)__init_begin; unsigned long init_end = (unsigned long)__init_end; @@ -533,19 +532,10 @@ void free_initmem(void) * pages are no-longer executable */ flush_icache_range(init_begin, init_end); - for (addr = init_begin; addr < init_end; addr += PAGE_SIZE) { - ClearPageReserved(virt_to_page(addr)); - init_page_count(virt_to_page(addr)); - free_page(addr); - num_physpages++; - totalram_pages++; - } + num_physpages += free_initmem_default(0); /* set up a new led state on systems shipped LED State panel */ pdc_chassis_send_status(PDC_CHASSIS_DIRECT_BCOMPLETE); - - printk(KERN_INFO "Freeing unused kernel memory: %luk freed\n", - (init_end - init_begin) >> 10); } @@ -697,6 +687,8 @@ void show_mem(unsigned int filter) printk(KERN_INFO "Mem-info:\n"); show_free_areas(filter); + if (filter & SHOW_MEM_FILTER_PAGE_COUNT) + return; #ifndef CONFIG_DISCONTIGMEM i = max_mapnr; while (i-- > 0) { @@ -1107,15 +1099,6 @@ void flush_tlb_all(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - if (start >= end) - return; - printk(KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10); - for (; start < end; start += PAGE_SIZE) { - ClearPageReserved(virt_to_page(start)); - init_page_count(virt_to_page(start)); - free_page(start); - num_physpages++; - totalram_pages++; - } + num_physpages += free_reserved_area(start, end, 0, "initrd"); } #endif diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 62e11a32c4c2..4fcbd6b14a3a 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -3,6 +3,7 @@ #ifdef CONFIG_HUGETLB_PAGE #include <asm/page.h> +#include <asm-generic/hugetlb.h> extern struct kmem_cache *hugepte_cache; diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h index cd915d6b093d..88693cef4f3d 100644 --- a/arch/powerpc/include/asm/page_64.h +++ b/arch/powerpc/include/asm/page_64.h @@ -99,8 +99,7 @@ extern unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, unsigned long flags, unsigned int psize, - int topdown, - int use_cache); + int topdown); extern unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr); diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index b3ba5163eae2..9ec3fe174cba 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -150,10 +150,7 @@ void crash_free_reserved_phys_range(unsigned long begin, unsigned long end) if (addr <= rtas_end && ((addr + PAGE_SIZE) > rtas_start)) continue; - ClearPageReserved(pfn_to_page(addr >> PAGE_SHIFT)); - init_page_count(pfn_to_page(addr >> PAGE_SHIFT)); - free_page((unsigned long)__va(addr)); - totalram_pages++; + free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT)); } } #endif diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 06c8202a69cf..2230fd0ca3e4 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -1045,10 +1045,7 @@ static void fadump_release_memory(unsigned long begin, unsigned long end) if (addr <= ra_end && ((addr + PAGE_SIZE) > ra_start)) continue; - ClearPageReserved(pfn_to_page(addr >> PAGE_SHIFT)); - init_page_count(pfn_to_page(addr >> PAGE_SHIFT)); - free_page((unsigned long)__va(addr)); - totalram_pages++; + free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT)); } } diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index a61b133c4f99..6782221d49bd 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -756,12 +756,7 @@ static __init void kvm_free_tmp(void) end = (ulong)&kvm_tmp[ARRAY_SIZE(kvm_tmp)] & PAGE_MASK; /* Free the tmp space we don't need */ - for (; start < end; start += PAGE_SIZE) { - ClearPageReserved(virt_to_page(start)); - init_page_count(virt_to_page(start)); - free_page(start); - totalram_pages++; - } + free_reserved_area(start, end, 0, NULL); } static int __init kvm_guest_init(void) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 16e77a81ab4f..13a8d9d0b5cb 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -831,6 +831,8 @@ void show_regs(struct pt_regs * regs) { int i, trap; + show_regs_print_info(KERN_DEFAULT); + printk("NIP: "REG" LR: "REG" CTR: "REG"\n", regs->nip, regs->link, regs->ctr); printk("REGS: %p TRAP: %04lx %s (%s)\n", @@ -850,12 +852,6 @@ void show_regs(struct pt_regs * regs) #else printk("DAR: "REG", DSISR: %08lx\n", regs->dar, regs->dsisr); #endif - printk("TASK = %p[%d] '%s' THREAD: %p", - current, task_pid_nr(current), current->comm, task_thread_info(current)); - -#ifdef CONFIG_SMP - printk(" CPU: %d", raw_smp_processor_id()); -#endif /* CONFIG_SMP */ for (i = 0; i < 32; i++) { if ((i % REGS_PER_LINE) == 0) @@ -1362,12 +1358,6 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) } while (count++ < kstack_depth_to_print); } -void dump_stack(void) -{ - show_stack(current, NULL); -} -EXPORT_SYMBOL(dump_stack); - #ifdef CONFIG_PPC64 /* Called with hard IRQs off */ void __ppc64_runlatch_on(void) diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c index f9748498fe58..13b867093499 100644 --- a/arch/powerpc/kernel/udbg.c +++ b/arch/powerpc/kernel/udbg.c @@ -156,15 +156,13 @@ static struct console udbg_console = { .index = 0, }; -static int early_console_initialized; - /* * Called by setup_system after ppc_md->probe and ppc_md->early_init. * Call it again after setting udbg_putc in ppc_md->setup_arch. */ void __init register_early_udbg_console(void) { - if (early_console_initialized) + if (early_console) return; if (!udbg_putc) @@ -174,7 +172,7 @@ void __init register_early_udbg_console(void) printk(KERN_INFO "early console immortal !\n"); udbg_console.flags &= ~CON_BOOT; } - early_console_initialized = 1; + early_console = &udbg_console; register_console(&udbg_console); } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 1a6de0a7d8eb..5dc52d803ed8 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -742,7 +742,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, struct hstate *hstate = hstate_file(file); int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); - return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0); + return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1); } #endif diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 7e2246fb2f31..5a535b73ea18 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -263,19 +263,14 @@ static __meminit void vmemmap_list_populate(unsigned long phys, vmemmap_list = vmem_back; } -int __meminit vmemmap_populate(struct page *start_page, - unsigned long nr_pages, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) { - unsigned long start = (unsigned long)start_page; - unsigned long end = (unsigned long)(start_page + nr_pages); unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; /* Align to the page size of the linear mapping. */ start = _ALIGN_DOWN(start, page_size); - pr_debug("vmemmap_populate page %p, %ld pages, node %d\n", - start_page, nr_pages, node); - pr_debug(" -> map %lx..%lx\n", start, end); + pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node); for (; start < end; start += page_size) { void *p; @@ -298,7 +293,7 @@ int __meminit vmemmap_populate(struct page *start_page, return 0; } -void vmemmap_free(struct page *memmap, unsigned long nr_pages) +void vmemmap_free(unsigned long start, unsigned long end) { } diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 056732e8ca38..0988a26e0413 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -351,13 +351,9 @@ void __init mem_init(void) struct page *page = pfn_to_page(pfn); if (memblock_is_reserved(paddr)) continue; - ClearPageReserved(page); - init_page_count(page); - __free_page(page); - totalhigh_pages++; + free_highmem_page(page); reservedpages--; } - totalram_pages += totalhigh_pages; printk(KERN_DEBUG "High memory: %luk\n", totalhigh_pages << (PAGE_SHIFT-10)); } @@ -404,39 +400,14 @@ void __init mem_init(void) void free_initmem(void) { - unsigned long addr; - ppc_md.progress = ppc_printk_progress; - - addr = (unsigned long)__init_begin; - for (; addr < (unsigned long)__init_end; addr += PAGE_SIZE) { - memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); - ClearPageReserved(virt_to_page(addr)); - init_page_count(virt_to_page(addr)); - free_page(addr); - totalram_pages++; - } - pr_info("Freeing unused kernel memory: %luk freed\n", - ((unsigned long)__init_end - - (unsigned long)__init_begin) >> 10); + free_initmem_default(POISON_FREE_INITMEM); } #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { - if (start >= end) - return; - - start = _ALIGN_DOWN(start, PAGE_SIZE); - end = _ALIGN_UP(end, PAGE_SIZE); - pr_info("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); - - for (; start < end; start += PAGE_SIZE) { - ClearPageReserved(virt_to_page(start)); - init_page_count(virt_to_page(start)); - free_page(start); - totalram_pages++; - } + free_reserved_area(start, end, 0, "initrd"); } #endif diff --git a/arch/powerpc/mm/mmap_64.c b/arch/powerpc/mm/mmap_64.c index 67a42ed0d2fc..cb8bdbe4972f 100644 --- a/arch/powerpc/mm/mmap_64.c +++ b/arch/powerpc/mm/mmap_64.c @@ -92,10 +92,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm) if (mmap_is_legacy()) { mm->mmap_base = TASK_UNMAPPED_BASE; mm->get_unmapped_area = arch_get_unmapped_area; - mm->unmap_area = arch_unmap_area; } else { mm->mmap_base = mmap_base(); mm->get_unmapped_area = arch_get_unmapped_area_topdown; - mm->unmap_area = arch_unmap_area_topdown; } } diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 7218e9d3a0bc..72dd71d56c67 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -62,14 +62,11 @@ static int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS]; */ static void __init setup_node_to_cpumask_map(void) { - unsigned int node, num = 0; + unsigned int node; /* setup nr_node_ids if not done yet */ - if (nr_node_ids == MAX_NUMNODES) { - for_each_node_mask(node, node_possible_map) - num = node; - nr_node_ids = num + 1; - } + if (nr_node_ids == MAX_NUMNODES) + setup_nr_node_ids(); /* allocate the map */ for (node = 0; node < nr_node_ids; node++) diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index cf9dada734b6..3e99c149271a 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -237,134 +237,112 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz #endif } +/* + * Compute which slice addr is part of; + * set *boundary_addr to the start or end boundary of that slice + * (depending on 'end' parameter); + * return boolean indicating if the slice is marked as available in the + * 'available' slice_mark. + */ +static bool slice_scan_available(unsigned long addr, + struct slice_mask available, + int end, + unsigned long *boundary_addr) +{ + unsigned long slice; + if (addr < SLICE_LOW_TOP) { + slice = GET_LOW_SLICE_INDEX(addr); + *boundary_addr = (slice + end) << SLICE_LOW_SHIFT; + return !!(available.low_slices & (1u << slice)); + } else { + slice = GET_HIGH_SLICE_INDEX(addr); + *boundary_addr = (slice + end) ? + ((slice + end) << SLICE_HIGH_SHIFT) : SLICE_LOW_TOP; + return !!(available.high_slices & (1u << slice)); + } +} + static unsigned long slice_find_area_bottomup(struct mm_struct *mm, unsigned long len, struct slice_mask available, - int psize, int use_cache) + int psize) { - struct vm_area_struct *vma; - unsigned long start_addr, addr; - struct slice_mask mask; int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); - - if (use_cache) { - if (len <= mm->cached_hole_size) { - start_addr = addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; - } else - start_addr = addr = mm->free_area_cache; - } else - start_addr = addr = TASK_UNMAPPED_BASE; - -full_search: - for (;;) { - addr = _ALIGN_UP(addr, 1ul << pshift); - if ((TASK_SIZE - len) < addr) - break; - vma = find_vma(mm, addr); - BUG_ON(vma && (addr >= vma->vm_end)); - - mask = slice_range_to_mask(addr, len); - if (!slice_check_fit(mask, available)) { - if (addr < SLICE_LOW_TOP) - addr = _ALIGN_UP(addr + 1, 1ul << SLICE_LOW_SHIFT); - else - addr = _ALIGN_UP(addr + 1, 1ul << SLICE_HIGH_SHIFT); + unsigned long addr, found, next_end; + struct vm_unmapped_area_info info; + + info.flags = 0; + info.length = len; + info.align_mask = PAGE_MASK & ((1ul << pshift) - 1); + info.align_offset = 0; + + addr = TASK_UNMAPPED_BASE; + while (addr < TASK_SIZE) { + info.low_limit = addr; + if (!slice_scan_available(addr, available, 1, &addr)) continue; + + next_slice: + /* + * At this point [info.low_limit; addr) covers + * available slices only and ends at a slice boundary. + * Check if we need to reduce the range, or if we can + * extend it to cover the next available slice. + */ + if (addr >= TASK_SIZE) + addr = TASK_SIZE; + else if (slice_scan_available(addr, available, 1, &next_end)) { + addr = next_end; + goto next_slice; } - if (!vma || addr + len <= vma->vm_start) { - /* - * Remember the place where we stopped the search: - */ - if (use_cache) - mm->free_area_cache = addr + len; - return addr; - } - if (use_cache && (addr + mm->cached_hole_size) < vma->vm_start) - mm->cached_hole_size = vma->vm_start - addr; - addr = vma->vm_end; - } + info.high_limit = addr; - /* Make sure we didn't miss any holes */ - if (use_cache && start_addr != TASK_UNMAPPED_BASE) { - start_addr = addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; - goto full_search; + found = vm_unmapped_area(&info); + if (!(found & ~PAGE_MASK)) + return found; } + return -ENOMEM; } static unsigned long slice_find_area_topdown(struct mm_struct *mm, unsigned long len, struct slice_mask available, - int psize, int use_cache) + int psize) { - struct vm_area_struct *vma; - unsigned long addr; - struct slice_mask mask; int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); + unsigned long addr, found, prev; + struct vm_unmapped_area_info info; - /* check if free_area_cache is useful for us */ - if (use_cache) { - if (len <= mm->cached_hole_size) { - mm->cached_hole_size = 0; - mm->free_area_cache = mm->mmap_base; - } - - /* either no address requested or can't fit in requested - * address hole - */ - addr = mm->free_area_cache; - - /* make sure it can fit in the remaining address space */ - if (addr > len) { - addr = _ALIGN_DOWN(addr - len, 1ul << pshift); - mask = slice_range_to_mask(addr, len); - if (slice_check_fit(mask, available) && - slice_area_is_free(mm, addr, len)) - /* remember the address as a hint for - * next time - */ - return (mm->free_area_cache = addr); - } - } + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.length = len; + info.align_mask = PAGE_MASK & ((1ul << pshift) - 1); + info.align_offset = 0; addr = mm->mmap_base; - while (addr > len) { - /* Go down by chunk size */ - addr = _ALIGN_DOWN(addr - len, 1ul << pshift); - - /* Check for hit with different page size */ - mask = slice_range_to_mask(addr, len); - if (!slice_check_fit(mask, available)) { - if (addr < SLICE_LOW_TOP) - addr = _ALIGN_DOWN(addr, 1ul << SLICE_LOW_SHIFT); - else if (addr < (1ul << SLICE_HIGH_SHIFT)) - addr = SLICE_LOW_TOP; - else - addr = _ALIGN_DOWN(addr, 1ul << SLICE_HIGH_SHIFT); + while (addr > PAGE_SIZE) { + info.high_limit = addr; + if (!slice_scan_available(addr - 1, available, 0, &addr)) continue; - } + prev_slice: /* - * Lookup failure means no vma is above this address, - * else if new region fits below vma->vm_start, - * return with success: + * At this point [addr; info.high_limit) covers + * available slices only and starts at a slice boundary. + * Check if we need to reduce the range, or if we can + * extend it to cover the previous available slice. */ - vma = find_vma(mm, addr); - if (!vma || (addr + len) <= vma->vm_start) { - /* remember the address as a hint for next time */ - if (use_cache) - mm->free_area_cache = addr; - return addr; + if (addr < PAGE_SIZE) + addr = PAGE_SIZE; + else if (slice_scan_available(addr - 1, available, 0, &prev)) { + addr = prev; + goto prev_slice; } + info.low_limit = addr; - /* remember the largest hole we saw so far */ - if (use_cache && (addr + mm->cached_hole_size) < vma->vm_start) - mm->cached_hole_size = vma->vm_start - addr; - - /* try just below the current vma->vm_start */ - addr = vma->vm_start; + found = vm_unmapped_area(&info); + if (!(found & ~PAGE_MASK)) + return found; } /* @@ -373,28 +351,18 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm, * can happen with large stack limits and large mmap() * allocations. */ - addr = slice_find_area_bottomup(mm, len, available, psize, 0); - - /* - * Restore the topdown base: - */ - if (use_cache) { - mm->free_area_cache = mm->mmap_base; - mm->cached_hole_size = ~0UL; - } - - return addr; + return slice_find_area_bottomup(mm, len, available, psize); } static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len, struct slice_mask mask, int psize, - int topdown, int use_cache) + int topdown) { if (topdown) - return slice_find_area_topdown(mm, len, mask, psize, use_cache); + return slice_find_area_topdown(mm, len, mask, psize); else - return slice_find_area_bottomup(mm, len, mask, psize, use_cache); + return slice_find_area_bottomup(mm, len, mask, psize); } #define or_mask(dst, src) do { \ @@ -415,7 +383,7 @@ static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len, unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, unsigned long flags, unsigned int psize, - int topdown, int use_cache) + int topdown) { struct slice_mask mask = {0, 0}; struct slice_mask good_mask; @@ -430,8 +398,8 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, BUG_ON(mm->task_size == 0); slice_dbg("slice_get_unmapped_area(mm=%p, psize=%d...\n", mm, psize); - slice_dbg(" addr=%lx, len=%lx, flags=%lx, topdown=%d, use_cache=%d\n", - addr, len, flags, topdown, use_cache); + slice_dbg(" addr=%lx, len=%lx, flags=%lx, topdown=%d\n", + addr, len, flags, topdown); if (len > mm->task_size) return -ENOMEM; @@ -503,8 +471,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, /* Now let's see if we can find something in the existing * slices for that size */ - newaddr = slice_find_area(mm, len, good_mask, psize, topdown, - use_cache); + newaddr = slice_find_area(mm, len, good_mask, psize, topdown); if (newaddr != -ENOMEM) { /* Found within the good mask, we don't have to setup, * we thus return directly @@ -536,8 +503,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, * anywhere in the good area. */ if (addr) { - addr = slice_find_area(mm, len, good_mask, psize, topdown, - use_cache); + addr = slice_find_area(mm, len, good_mask, psize, topdown); if (addr != -ENOMEM) { slice_dbg(" found area at 0x%lx\n", addr); return addr; @@ -547,15 +513,14 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, /* Now let's see if we can find something in the existing slices * for that size plus free slices */ - addr = slice_find_area(mm, len, potential_mask, psize, topdown, - use_cache); + addr = slice_find_area(mm, len, potential_mask, psize, topdown); #ifdef CONFIG_PPC_64K_PAGES if (addr == -ENOMEM && psize == MMU_PAGE_64K) { /* retry the search with 4k-page slices included */ or_mask(potential_mask, compat_mask); addr = slice_find_area(mm, len, potential_mask, psize, - topdown, use_cache); + topdown); } #endif @@ -586,8 +551,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long flags) { return slice_get_unmapped_area(addr, len, flags, - current->mm->context.user_psize, - 0, 1); + current->mm->context.user_psize, 0); } unsigned long arch_get_unmapped_area_topdown(struct file *filp, @@ -597,8 +561,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp, const unsigned long flags) { return slice_get_unmapped_area(addr0, len, flags, - current->mm->context.user_psize, - 1, 1); + current->mm->context.user_psize, 1); } unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr) diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c index 7642cd7aad73..6eb94ab99d39 100644 --- a/arch/powerpc/platforms/512x/mpc512x_shared.c +++ b/arch/powerpc/platforms/512x/mpc512x_shared.c @@ -172,12 +172,9 @@ static struct fsl_diu_shared_fb __attribute__ ((__aligned__(8))) diu_shared_fb; static inline void mpc512x_free_bootmem(struct page *page) { - __ClearPageReserved(page); BUG_ON(PageTail(page)); BUG_ON(atomic_read(&page->_count) > 1); - atomic_set(&page->_count, 1); - __free_page(page); - totalram_pages++; + free_reserved_page(page); } void mpc512x_release_bootmem(void) diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index d43d2d0b90e3..90986923a53a 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -351,7 +351,7 @@ static unsigned long spufs_get_unmapped_area(struct file *file, /* Else, try to obtain a 64K pages slice */ return slice_get_unmapped_area(addr, len, flags, - MMU_PAGE_64K, 1, 0); + MMU_PAGE_64K, 1); } #endif /* CONFIG_SPU_FS_64K_LS */ diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 2372c609fa2b..9a432de363b8 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -72,6 +72,7 @@ unsigned long memory_block_size_bytes(void) return get_memblock_size(); } +#ifdef CONFIG_MEMORY_HOTREMOVE static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size) { unsigned long start, start_pfn; @@ -153,6 +154,17 @@ static int pseries_remove_memory(struct device_node *np) ret = pseries_remove_memblock(base, lmb_size); return ret; } +#else +static inline int pseries_remove_memblock(unsigned long base, + unsigned int memblock_size) +{ + return -EOPNOTSUPP; +} +static inline int pseries_remove_memory(struct device_node *np) +{ + return -EOPNOTSUPP; +} +#endif /* CONFIG_MEMORY_HOTREMOVE */ static int pseries_add_memory(struct device_node *np) { diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 34c3c3e36023..242a03d6e9f5 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -91,6 +91,7 @@ config S390 select ARCH_INLINE_WRITE_UNLOCK_BH select ARCH_INLINE_WRITE_UNLOCK_IRQ select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE + select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS select ARCH_SAVE_PAGE_KEYS if HIBERNATION select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_EXTABLE_SORT diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug index fc32a2df4974..c56878e1245f 100644 --- a/arch/s390/Kconfig.debug +++ b/arch/s390/Kconfig.debug @@ -17,20 +17,6 @@ config STRICT_DEVMEM If you are unsure, say Y. -config DEBUG_STRICT_USER_COPY_CHECKS - def_bool n - prompt "Strict user copy size checks" - ---help--- - Enabling this option turns a certain set of sanity checks for user - copy operations into compile time warnings. - - The copy_from_user() etc checks are there to help test if there - are sufficient security checks on the length argument of - the copy operation, by having gcc prove that the argument is - within bounds. - - If unsure, or if you run an older (pre 4.4) gcc, say N. - config S390_PTDUMP bool "Export kernel pagetable layout to userspace via debugfs" depends on DEBUG_KERNEL diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 5f7d7ba2874c..7a539f4f5e30 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -21,6 +21,7 @@ #include <linux/module.h> #include <linux/seq_file.h> #include <linux/mount.h> +#include <linux/aio.h> #include <asm/ebcdic.h> #include "hypfs.h" diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index 593753ee07f3..bd90359d6d22 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -114,7 +114,7 @@ static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, #define huge_ptep_set_wrprotect(__mm, __addr, __ptep) \ ({ \ pte_t __pte = huge_ptep_get(__ptep); \ - if (pte_write(__pte)) { \ + if (huge_pte_write(__pte)) { \ huge_ptep_invalidate(__mm, __addr, __ptep); \ set_huge_pte_at(__mm, __addr, __ptep, \ huge_pte_wrprotect(__pte)); \ @@ -127,4 +127,58 @@ static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, huge_ptep_invalidate(vma->vm_mm, address, ptep); } +static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot) +{ + pte_t pte; + pmd_t pmd; + + pmd = mk_pmd_phys(page_to_phys(page), pgprot); + pte_val(pte) = pmd_val(pmd); + return pte; +} + +static inline int huge_pte_write(pte_t pte) +{ + pmd_t pmd; + + pmd_val(pmd) = pte_val(pte); + return pmd_write(pmd); +} + +static inline int huge_pte_dirty(pte_t pte) +{ + /* No dirty bit in the segment table entry. */ + return 0; +} + +static inline pte_t huge_pte_mkwrite(pte_t pte) +{ + pmd_t pmd; + + pmd_val(pmd) = pte_val(pte); + pte_val(pte) = pmd_val(pmd_mkwrite(pmd)); + return pte; +} + +static inline pte_t huge_pte_mkdirty(pte_t pte) +{ + /* No dirty bit in the segment table entry. */ + return pte; +} + +static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot) +{ + pmd_t pmd; + + pmd_val(pmd) = pte_val(pte); + pte_val(pte) = pmd_val(pmd_modify(pmd, newprot)); + return pte; +} + +static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + pmd_clear((pmd_t *) ptep); +} + #endif /* _ASM_S390_HUGETLB_H */ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 4a64c0e5428f..b4622915bd15 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -424,6 +424,13 @@ extern unsigned long MODULES_END; #define __S110 PAGE_RW #define __S111 PAGE_RW +/* + * Segment entry (large page) protection definitions. + */ +#define SEGMENT_NONE __pgprot(_HPAGE_TYPE_NONE) +#define SEGMENT_RO __pgprot(_HPAGE_TYPE_RO) +#define SEGMENT_RW __pgprot(_HPAGE_TYPE_RW) + static inline int mm_exclusive(struct mm_struct *mm) { return likely(mm == current->active_mm && @@ -914,26 +921,6 @@ static inline pte_t pte_mkspecial(pte_t pte) #ifdef CONFIG_HUGETLB_PAGE static inline pte_t pte_mkhuge(pte_t pte) { - /* - * PROT_NONE needs to be remapped from the pte type to the ste type. - * The HW invalid bit is also different for pte and ste. The pte - * invalid bit happens to be the same as the ste _SEGMENT_ENTRY_LARGE - * bit, so we don't have to clear it. - */ - if (pte_val(pte) & _PAGE_INVALID) { - if (pte_val(pte) & _PAGE_SWT) - pte_val(pte) |= _HPAGE_TYPE_NONE; - pte_val(pte) |= _SEGMENT_ENTRY_INV; - } - /* - * Clear SW pte bits, there are no SW bits in a segment table entry. - */ - pte_val(pte) &= ~(_PAGE_SWT | _PAGE_SWX | _PAGE_SWC | - _PAGE_SWR | _PAGE_SWW); - /* - * Also set the change-override bit because we don't need dirty bit - * tracking for hugetlbfs pages. - */ pte_val(pte) |= (_SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO); return pte; } @@ -1278,31 +1265,7 @@ static inline void __pmd_idte(unsigned long address, pmd_t *pmdp) } } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - -#define SEGMENT_NONE __pgprot(_HPAGE_TYPE_NONE) -#define SEGMENT_RO __pgprot(_HPAGE_TYPE_RO) -#define SEGMENT_RW __pgprot(_HPAGE_TYPE_RW) - -#define __HAVE_ARCH_PGTABLE_DEPOSIT -extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable); - -#define __HAVE_ARCH_PGTABLE_WITHDRAW -extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm); - -static inline int pmd_trans_splitting(pmd_t pmd) -{ - return pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT; -} - -static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp, pmd_t entry) -{ - if (!(pmd_val(entry) & _SEGMENT_ENTRY_INV) && MACHINE_HAS_EDAT1) - pmd_val(entry) |= _SEGMENT_ENTRY_CO; - *pmdp = entry; -} - +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE) static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot) { /* @@ -1323,10 +1286,11 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) return pmd; } -static inline pmd_t pmd_mkhuge(pmd_t pmd) +static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot) { - pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE; - return pmd; + pmd_t __pmd; + pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot); + return __pmd; } static inline pmd_t pmd_mkwrite(pmd_t pmd) @@ -1336,6 +1300,34 @@ static inline pmd_t pmd_mkwrite(pmd_t pmd) pmd_val(pmd) &= ~_SEGMENT_ENTRY_RO; return pmd; } +#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */ + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + +#define __HAVE_ARCH_PGTABLE_DEPOSIT +extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable); + +#define __HAVE_ARCH_PGTABLE_WITHDRAW +extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm); + +static inline int pmd_trans_splitting(pmd_t pmd) +{ + return pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT; +} + +static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t entry) +{ + if (!(pmd_val(entry) & _SEGMENT_ENTRY_INV) && MACHINE_HAS_EDAT1) + pmd_val(entry) |= _SEGMENT_ENTRY_CO; + *pmdp = entry; +} + +static inline pmd_t pmd_mkhuge(pmd_t pmd) +{ + pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE; + return pmd; +} static inline pmd_t pmd_wrprotect(pmd_t pmd) { @@ -1432,13 +1424,6 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, } } -static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot) -{ - pmd_t __pmd; - pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot); - return __pmd; -} - #define pfn_pmd(pfn, pgprot) mk_pmd_phys(__pa((pfn) << PAGE_SHIFT), (pgprot)) #define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 03dce39d01ee..298297477257 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -129,23 +129,6 @@ static void show_last_breaking_event(struct pt_regs *regs) #endif } -/* - * The architecture-independent dump_stack generator - */ -void dump_stack(void) -{ - printk("CPU: %d %s %s %.*s\n", - task_thread_info(current)->cpu, print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - printk("Process %s (pid: %d, task: %p, ksp: %p)\n", - current->comm, current->pid, current, - (void *) current->thread.ksp); - show_stack(NULL, NULL); -} -EXPORT_SYMBOL(dump_stack); - static inline int mask_bits(struct pt_regs *regs, unsigned long bits) { return (regs->psw.mask & bits) / ((~bits + 1) & bits); @@ -183,14 +166,7 @@ void show_registers(struct pt_regs *regs) void show_regs(struct pt_regs *regs) { - printk("CPU: %d %s %s %.*s\n", - task_thread_info(current)->cpu, print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - printk("Process %s (pid: %d, task: %p, ksp: %p)\n", - current->comm, current->pid, current, - (void *) current->thread.ksp); + show_regs_print_info(KERN_DEFAULT); show_registers(regs); /* Show stack backtrace if pt_regs is from kernel mode */ if (!user_mode(regs)) diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 6ab0d0b5cec8..20b0e97a7df2 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -3,7 +3,6 @@ # lib-y += delay.o string.o uaccess_std.o uaccess_pt.o -obj-y += usercopy.o obj-$(CONFIG_32BIT) += div64.o qrnnd.o ucmpdi2.o mem32.o obj-$(CONFIG_64BIT) += mem64.o lib-$(CONFIG_64BIT) += uaccess_mvcos.o diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 532525ec88c1..121089d57802 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -39,7 +39,7 @@ int arch_prepare_hugepage(struct page *page) if (!ptep) return -ENOMEM; - pte = mk_pte(page, PAGE_RW); + pte_val(pte) = addr; for (i = 0; i < PTRS_PER_PTE; i++) { set_pte_at(&init_mm, addr + i * PAGE_SIZE, ptep + i, pte); pte_val(pte) += PAGE_SIZE; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 9f9c315b4c07..0b09b2342302 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -42,11 +42,10 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE))); unsigned long empty_zero_page, zero_page_mask; EXPORT_SYMBOL(empty_zero_page); -static unsigned long __init setup_zero_pages(void) +static void __init setup_zero_pages(void) { struct cpuid cpu_id; unsigned int order; - unsigned long size; struct page *page; int i; @@ -83,14 +82,11 @@ static unsigned long __init setup_zero_pages(void) page = virt_to_page((void *) empty_zero_page); split_page(page, order); for (i = 1 << order; i > 0; i--) { - SetPageReserved(page); + mark_page_reserved(page); page++; } - size = PAGE_SIZE << order; - zero_page_mask = (size - 1) & PAGE_MASK; - - return 1UL << order; + zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK; } /* @@ -147,7 +143,7 @@ void __init mem_init(void) /* this will put all low memory onto the freelists */ totalram_pages += free_all_bootmem(); - totalram_pages -= setup_zero_pages(); /* Setup zeroed pages. */ + setup_zero_pages(); /* Setup zeroed pages. */ reservedpages = 0; @@ -166,34 +162,15 @@ void __init mem_init(void) PFN_ALIGN((unsigned long)&_eshared) - 1); } -void free_init_pages(char *what, unsigned long begin, unsigned long end) -{ - unsigned long addr = begin; - - if (begin >= end) - return; - for (; addr < end; addr += PAGE_SIZE) { - ClearPageReserved(virt_to_page(addr)); - init_page_count(virt_to_page(addr)); - memset((void *)(addr & PAGE_MASK), POISON_FREE_INITMEM, - PAGE_SIZE); - free_page(addr); - totalram_pages++; - } - printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); -} - void free_initmem(void) { - free_init_pages("unused kernel memory", - (unsigned long)&__init_begin, - (unsigned long)&__init_end); + free_initmem_default(0); } #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { - free_init_pages("initrd memory", start, end); + free_reserved_area(start, end, POISON_FREE_INITMEM, "initrd"); } #endif diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 06bafec00278..40023290ee5b 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -91,11 +91,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm) if (mmap_is_legacy()) { mm->mmap_base = TASK_UNMAPPED_BASE; mm->get_unmapped_area = arch_get_unmapped_area; - mm->unmap_area = arch_unmap_area; } else { mm->mmap_base = mmap_base(); mm->get_unmapped_area = arch_get_unmapped_area_topdown; - mm->unmap_area = arch_unmap_area_topdown; } } @@ -176,11 +174,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm) if (mmap_is_legacy()) { mm->mmap_base = TASK_UNMAPPED_BASE; mm->get_unmapped_area = s390_get_unmapped_area; - mm->unmap_area = arch_unmap_area; } else { mm->mmap_base = mmap_base(); mm->get_unmapped_area = s390_get_unmapped_area_topdown; - mm->unmap_area = arch_unmap_area_topdown; } } diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index ffab84db6907..35837054f734 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -191,19 +191,16 @@ static void vmem_remove_range(unsigned long start, unsigned long size) /* * Add a backed mem_map array to the virtual mem_map array. */ -int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) { - unsigned long address, start_addr, end_addr; + unsigned long address = start; pgd_t *pg_dir; pud_t *pu_dir; pmd_t *pm_dir; pte_t *pt_dir; int ret = -ENOMEM; - start_addr = (unsigned long) start; - end_addr = (unsigned long) (start + nr); - - for (address = start_addr; address < end_addr;) { + for (address = start; address < end;) { pg_dir = pgd_offset_k(address); if (pgd_none(*pg_dir)) { pu_dir = vmem_pud_alloc(); @@ -262,14 +259,14 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) } address += PAGE_SIZE; } - memset(start, 0, nr * sizeof(struct page)); + memset((void *)start, 0, end - start); ret = 0; out: - flush_tlb_kernel_range(start_addr, end_addr); + flush_tlb_kernel_range(start, end); return ret; } -void vmemmap_free(struct page *memmap, unsigned long nr_pages) +void vmemmap_free(unsigned long start, unsigned long end) { } diff --git a/arch/score/kernel/traps.c b/arch/score/kernel/traps.c index 0e46fb19a848..1517a7dcd6d9 100644 --- a/arch/score/kernel/traps.c +++ b/arch/score/kernel/traps.c @@ -117,6 +117,8 @@ static void show_code(unsigned int *pc) */ void show_regs(struct pt_regs *regs) { + show_regs_print_info(KERN_DEFAULT); + printk("r0 : %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n", regs->regs[0], regs->regs[1], regs->regs[2], regs->regs[3], regs->regs[4], regs->regs[5], regs->regs[6], regs->regs[7]); @@ -149,16 +151,6 @@ static void show_registers(struct pt_regs *regs) printk(KERN_NOTICE "\n"); } -/* - * The architecture-independent dump_stack generator - */ -void dump_stack(void) -{ - show_stack(current_thread_info()->task, - (long *) get_irq_regs()->regs[0]); -} -EXPORT_SYMBOL(dump_stack); - void __die(const char *str, struct pt_regs *regs, const char *file, const char *func, unsigned long line) { diff --git a/arch/score/mm/init.c b/arch/score/mm/init.c index cee6bce1e30c..1592aad7dbc4 100644 --- a/arch/score/mm/init.c +++ b/arch/score/mm/init.c @@ -43,7 +43,7 @@ EXPORT_SYMBOL_GPL(empty_zero_page); static struct kcore_list kcore_mem, kcore_vmalloc; -static unsigned long setup_zero_page(void) +static void setup_zero_page(void) { struct page *page; @@ -52,9 +52,7 @@ static unsigned long setup_zero_page(void) panic("Oh boy, that early out of memory?"); page = virt_to_page((void *) empty_zero_page); - SetPageReserved(page); - - return 1UL; + mark_page_reserved(page); } #ifndef CONFIG_NEED_MULTIPLE_NODES @@ -84,7 +82,7 @@ void __init mem_init(void) high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); totalram_pages += free_all_bootmem(); - totalram_pages -= setup_zero_page(); /* Setup zeroed pages. */ + setup_zero_page(); /* Setup zeroed pages. */ reservedpages = 0; for (tmp = 0; tmp < max_low_pfn; tmp++) @@ -109,37 +107,16 @@ void __init mem_init(void) } #endif /* !CONFIG_NEED_MULTIPLE_NODES */ -static void free_init_pages(const char *what, unsigned long begin, unsigned long end) -{ - unsigned long pfn; - - for (pfn = PFN_UP(begin); pfn < PFN_DOWN(end); pfn++) { - struct page *page = pfn_to_page(pfn); - void *addr = phys_to_virt(PFN_PHYS(pfn)); - - ClearPageReserved(page); - init_page_count(page); - memset(addr, POISON_FREE_INITMEM, PAGE_SIZE); - __free_page(page); - totalram_pages++; - } - printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); -} - #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_init_pages("initrd memory", - virt_to_phys((void *) start), - virt_to_phys((void *) end)); + free_reserved_area(start, end, POISON_FREE_INITMEM, "initrd"); } #endif void __init_refok free_initmem(void) { - free_init_pages("unused kernel memory", - __pa(&__init_begin), - __pa(&__init_end)); + free_initmem_default(POISON_FREE_INITMEM); } unsigned long pgd_current; diff --git a/arch/sh/include/asm/hugetlb.h b/arch/sh/include/asm/hugetlb.h index b3808c7d67b2..699255d6d1c6 100644 --- a/arch/sh/include/asm/hugetlb.h +++ b/arch/sh/include/asm/hugetlb.h @@ -3,6 +3,7 @@ #include <asm/cacheflush.h> #include <asm/page.h> +#include <asm-generic/hugetlb.h> static inline int is_hugepage_only_range(struct mm_struct *mm, diff --git a/arch/sh/kernel/dumpstack.c b/arch/sh/kernel/dumpstack.c index 7617dc4129ac..b959f5592604 100644 --- a/arch/sh/kernel/dumpstack.c +++ b/arch/sh/kernel/dumpstack.c @@ -158,9 +158,3 @@ void show_stack(struct task_struct *tsk, unsigned long *sp) (unsigned long)task_stack_page(tsk)); show_trace(tsk, sp, NULL); } - -void dump_stack(void) -{ - show_stack(NULL, NULL); -} -EXPORT_SYMBOL(dump_stack); diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index 73eb66fc6253..ebd3933005b4 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -32,11 +32,7 @@ void show_regs(struct pt_regs * regs) { printk("\n"); - printk("Pid : %d, Comm: \t\t%s\n", task_pid_nr(current), current->comm); - printk("CPU : %d \t\t%s (%s %.*s)\n\n", - smp_processor_id(), print_tainted(), init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); + show_regs_print_info(KERN_DEFAULT); print_symbol("PC is at %s\n", instruction_pointer(regs)); print_symbol("PR is at %s\n", regs->pr); diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c index e611c85144b1..174d124b419e 100644 --- a/arch/sh/kernel/process_64.c +++ b/arch/sh/kernel/process_64.c @@ -40,6 +40,7 @@ void show_regs(struct pt_regs *regs) unsigned long long ah, al, bh, bl, ch, cl; printk("\n"); + show_regs_print_info(KERN_DEFAULT); ah = (regs->pc) >> 32; al = (regs->pc) & 0xffffffff; diff --git a/arch/sh/kernel/sh_bios.c b/arch/sh/kernel/sh_bios.c index 47475cca068a..fe584e516964 100644 --- a/arch/sh/kernel/sh_bios.c +++ b/arch/sh/kernel/sh_bios.c @@ -104,6 +104,7 @@ void sh_bios_vbr_reload(void) ); } +#ifdef CONFIG_EARLY_PRINTK /* * Print a string through the BIOS */ @@ -144,8 +145,6 @@ static struct console bios_console = { .index = -1, }; -static struct console *early_console; - static int __init setup_early_printk(char *buf) { int keep_early = 0; @@ -170,3 +169,4 @@ static int __init setup_early_printk(char *buf) return 0; } early_param("earlyprintk", setup_early_printk); +#endif diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 105794037143..20f9ead650d3 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -417,15 +417,13 @@ void __init mem_init(void) for_each_online_node(nid) { pg_data_t *pgdat = NODE_DATA(nid); - unsigned long node_pages = 0; void *node_high_memory; num_physpages += pgdat->node_present_pages; if (pgdat->node_spanned_pages) - node_pages = free_all_bootmem_node(pgdat); + totalram_pages += free_all_bootmem_node(pgdat); - totalram_pages += node_pages; node_high_memory = (void *)__va((pgdat->node_start_pfn + pgdat->node_spanned_pages) << @@ -501,31 +499,13 @@ void __init mem_init(void) void free_initmem(void) { - unsigned long addr; - - addr = (unsigned long)(&__init_begin); - for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { - ClearPageReserved(virt_to_page(addr)); - init_page_count(virt_to_page(addr)); - free_page(addr); - totalram_pages++; - } - printk("Freeing unused kernel memory: %ldk freed\n", - ((unsigned long)&__init_end - - (unsigned long)&__init_begin) >> 10); + free_initmem_default(0); } #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - unsigned long p; - for (p = start; p < end; p += PAGE_SIZE) { - ClearPageReserved(virt_to_page(p)); - init_page_count(virt_to_page(p)); - free_page(p); - totalram_pages++; - } - printk("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); + free_reserved_area(start, end, 0, "initrd"); } #endif diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h index 7eb57d245044..e4cab465b81f 100644 --- a/arch/sparc/include/asm/hugetlb.h +++ b/arch/sparc/include/asm/hugetlb.h @@ -2,6 +2,7 @@ #define _ASM_SPARC64_HUGETLB_H #include <asm/page.h> +#include <asm-generic/hugetlb.h> void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c index 9b40c9c12a0c..6cfc1b09ec25 100644 --- a/arch/sparc/kernel/leon_smp.c +++ b/arch/sparc/kernel/leon_smp.c @@ -253,24 +253,15 @@ void __init leon_smp_done(void) /* Free unneeded trap tables */ if (!cpu_present(1)) { - ClearPageReserved(virt_to_page(&trapbase_cpu1)); - init_page_count(virt_to_page(&trapbase_cpu1)); - free_page((unsigned long)&trapbase_cpu1); - totalram_pages++; + free_reserved_page(virt_to_page(&trapbase_cpu1)); num_physpages++; } if (!cpu_present(2)) { - ClearPageReserved(virt_to_page(&trapbase_cpu2)); - init_page_count(virt_to_page(&trapbase_cpu2)); - free_page((unsigned long)&trapbase_cpu2); - totalram_pages++; + free_reserved_page(virt_to_page(&trapbase_cpu2)); num_physpages++; } if (!cpu_present(3)) { - ClearPageReserved(virt_to_page(&trapbase_cpu3)); - init_page_count(virt_to_page(&trapbase_cpu3)); - free_page((unsigned long)&trapbase_cpu3); - totalram_pages++; + free_reserved_page(virt_to_page(&trapbase_cpu3)); num_physpages++; } /* Ok, they are spinning and ready to go. */ diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index c85241006e32..fdd819dfdacf 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -112,6 +112,8 @@ void show_regs(struct pt_regs *r) { struct reg_window32 *rw = (struct reg_window32 *) r->u_regs[14]; + show_regs_print_info(KERN_DEFAULT); + printk("PSR: %08lx PC: %08lx NPC: %08lx Y: %08lx %s\n", r->psr, r->pc, r->npc, r->y, print_tainted()); printk("PC: <%pS>\n", (void *) r->pc); @@ -142,11 +144,13 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp) struct reg_window32 *rw; int count = 0; - if (tsk != NULL) - task_base = (unsigned long) task_stack_page(tsk); - else - task_base = (unsigned long) current_thread_info(); + if (!tsk) + tsk = current; + + if (tsk == current && !_ksp) + __asm__ __volatile__("mov %%fp, %0" : "=r" (_ksp)); + task_base = (unsigned long) task_stack_page(tsk); fp = (unsigned long) _ksp; do { /* Bogus frame pointer? */ @@ -162,17 +166,6 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp) printk("\n"); } -void dump_stack(void) -{ - unsigned long *ksp; - - __asm__ __volatile__("mov %%fp, %0" - : "=r" (ksp)); - show_stack(current, ksp); -} - -EXPORT_SYMBOL(dump_stack); - /* * Note: sparc64 has a pretty intricated thread_saved_pc, check it out. */ diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 9fbf0d14a361..c6dc1ba069ca 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -163,6 +163,8 @@ static void show_regwindow(struct pt_regs *regs) void show_regs(struct pt_regs *regs) { + show_regs_print_info(KERN_DEFAULT); + printk("TSTATE: %016lx TPC: %016lx TNPC: %016lx Y: %08x %s\n", regs->tstate, regs->tpc, regs->tnpc, regs->y, print_tainted()); printk("TPC: <%pS>\n", (void *) regs->tpc); diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 2daaaa6eda23..51561b8b15ba 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -290,7 +290,6 @@ void arch_pick_mmap_layout(struct mm_struct *mm) sysctl_legacy_va_layout) { mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; mm->get_unmapped_area = arch_get_unmapped_area; - mm->unmap_area = arch_unmap_area; } else { /* We know it's 32-bit */ unsigned long task_size = STACK_TOP32; @@ -302,7 +301,6 @@ void arch_pick_mmap_layout(struct mm_struct *mm) mm->mmap_base = PAGE_ALIGN(task_size - gap - random_factor); mm->get_unmapped_area = arch_get_unmapped_area_topdown; - mm->unmap_area = arch_unmap_area_topdown; } } diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index 8d38ca97aa23..b3f833ab90eb 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -2350,13 +2350,6 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp) } while (++count < 16); } -void dump_stack(void) -{ - show_stack(current, NULL); -} - -EXPORT_SYMBOL(dump_stack); - static inline struct reg_window *kernel_stack_up(struct reg_window *rw) { unsigned long fp = rw->ins[6]; diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index 8410065f2862..dbe119b63b48 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -45,4 +45,3 @@ obj-y += iomap.o obj-$(CONFIG_SPARC32) += atomic32.o ucmpdi2.o obj-y += ksyms.o obj-$(CONFIG_SPARC64) += PeeCeeI.o -obj-y += usercopy.o diff --git a/arch/sparc/lib/usercopy.c b/arch/sparc/lib/usercopy.c deleted file mode 100644 index 5c4284ce1c03..000000000000 --- a/arch/sparc/lib/usercopy.c +++ /dev/null @@ -1,9 +0,0 @@ -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/bug.h> - -void copy_from_user_overflow(void) -{ - WARN(1, "Buffer overflow detected!\n"); -} -EXPORT_SYMBOL(copy_from_user_overflow); diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c index 48e0c030e8f5..af472cf7c69a 100644 --- a/arch/sparc/mm/init_32.c +++ b/arch/sparc/mm/init_32.c @@ -282,14 +282,8 @@ static void map_high_region(unsigned long start_pfn, unsigned long end_pfn) printk("mapping high region %08lx - %08lx\n", start_pfn, end_pfn); #endif - for (tmp = start_pfn; tmp < end_pfn; tmp++) { - struct page *page = pfn_to_page(tmp); - - ClearPageReserved(page); - init_page_count(page); - __free_page(page); - totalhigh_pages++; - } + for (tmp = start_pfn; tmp < end_pfn; tmp++) + free_highmem_page(pfn_to_page(tmp)); } void __init mem_init(void) @@ -347,8 +341,6 @@ void __init mem_init(void) map_high_region(start_pfn, end_pfn); } - totalram_pages += totalhigh_pages; - codepages = (((unsigned long) &_etext) - ((unsigned long)&_start)); codepages = PAGE_ALIGN(codepages) >> PAGE_SHIFT; datapages = (((unsigned long) &_edata) - ((unsigned long)&_etext)); @@ -374,45 +366,14 @@ void __init mem_init(void) void free_initmem (void) { - unsigned long addr; - unsigned long freed; - - addr = (unsigned long)(&__init_begin); - freed = (unsigned long)(&__init_end) - addr; - for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { - struct page *p; - - memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); - p = virt_to_page(addr); - - ClearPageReserved(p); - init_page_count(p); - __free_page(p); - totalram_pages++; - num_physpages++; - } - printk(KERN_INFO "Freeing unused kernel memory: %ldk freed\n", - freed >> 10); + num_physpages += free_initmem_default(POISON_FREE_INITMEM); } #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - if (start < end) - printk(KERN_INFO "Freeing initrd memory: %ldk freed\n", - (end - start) >> 10); - for (; start < end; start += PAGE_SIZE) { - struct page *p; - - memset((void *)start, POISON_FREE_INITMEM, PAGE_SIZE); - p = virt_to_page(start); - - ClearPageReserved(p); - init_page_count(p); - __free_page(p); - totalram_pages++; - num_physpages++; - } + num_physpages += free_reserved_area(start, end, POISON_FREE_INITMEM, + "initrd"); } #endif diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 4ccaa1b9961f..a7171997adfd 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2059,8 +2059,7 @@ void __init mem_init(void) /* We subtract one to account for the mem_map_zero page * allocated below. */ - totalram_pages -= 1; - num_physpages = totalram_pages; + num_physpages = totalram_pages - 1; /* * Set up the zero page, mark it reserved, so that page count @@ -2071,7 +2070,7 @@ void __init mem_init(void) prom_printf("paging_init: Cannot alloc zero page.\n"); prom_halt(); } - SetPageReserved(mem_map_zero); + mark_page_reserved(mem_map_zero); codepages = (((unsigned long) _etext) - ((unsigned long) _start)); codepages = PAGE_ALIGN(codepages) >> PAGE_SHIFT; @@ -2111,37 +2110,22 @@ void free_initmem(void) initend = (unsigned long)(__init_end) & PAGE_MASK; for (; addr < initend; addr += PAGE_SIZE) { unsigned long page; - struct page *p; page = (addr + ((unsigned long) __va(kern_base)) - ((unsigned long) KERNBASE)); memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); - if (do_free) { - p = virt_to_page(page); - - ClearPageReserved(p); - init_page_count(p); - __free_page(p); - totalram_pages++; - } + if (do_free) + free_reserved_page(virt_to_page(page)); } } #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - if (start < end) - printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); - for (; start < end; start += PAGE_SIZE) { - struct page *p = virt_to_page(start); - - ClearPageReserved(p); - init_page_count(p); - __free_page(p); - totalram_pages++; - } + num_physpages += free_reserved_area(start, end, POISON_FREE_INITMEM, + "initrd"); } #endif @@ -2178,10 +2162,9 @@ unsigned long vmemmap_table[VMEMMAP_SIZE]; static long __meminitdata addr_start, addr_end; static int __meminitdata node_start; -int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) +int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend, + int node) { - unsigned long vstart = (unsigned long) start; - unsigned long vend = (unsigned long) (start + nr); unsigned long phys_start = (vstart - VMEMMAP_BASE); unsigned long phys_end = (vend - VMEMMAP_BASE); unsigned long addr = phys_start & VMEMMAP_CHUNK_MASK; @@ -2233,7 +2216,7 @@ void __meminit vmemmap_populate_print_last(void) } } -void vmemmap_free(struct page *memmap, unsigned long nr_pages) +void vmemmap_free(unsigned long start, unsigned long end) { } diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index ed368c3dc451..5a977a0a3cae 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -18,6 +18,7 @@ config TILE select HAVE_DEBUG_BUGVERBOSE select VIRT_TO_BUS select SYS_HYPERVISOR + select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS select ARCH_HAVE_NMI_SAFE_CMPXCHG select GENERIC_CLOCKEVENTS select MODULES_USE_ELF_RELA @@ -108,13 +109,6 @@ config STRICT_DEVMEM config SMP def_bool y -# Allow checking for compile-time determined overflow errors in -# copy_from_user(). There are still unprovable places in the -# generic code as of 2.6.34, so this option is not really compatible -# with -Werror, which is more useful in general. -config DEBUG_COPY_FROM_USER - def_bool n - config HVC_TILE depends on TTY select HVC_DRIVER diff --git a/arch/tile/include/asm/hugetlb.h b/arch/tile/include/asm/hugetlb.h index 0f885af2b621..3257733003f8 100644 --- a/arch/tile/include/asm/hugetlb.h +++ b/arch/tile/include/asm/hugetlb.h @@ -16,6 +16,7 @@ #define _ASM_TILE_HUGETLB_H #include <asm/page.h> +#include <asm-generic/hugetlb.h> static inline int is_hugepage_only_range(struct mm_struct *mm, diff --git a/arch/tile/include/asm/uaccess.h b/arch/tile/include/asm/uaccess.h index 9ab078a4605d..8a082bc6bca5 100644 --- a/arch/tile/include/asm/uaccess.h +++ b/arch/tile/include/asm/uaccess.h @@ -395,7 +395,12 @@ _copy_from_user(void *to, const void __user *from, unsigned long n) return n; } -#ifdef CONFIG_DEBUG_COPY_FROM_USER +#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS +/* + * There are still unprovable places in the generic code as of 2.6.34, so this + * option is not really compatible with -Werror, which is more useful in + * general. + */ extern void copy_from_user_overflow(void) __compiletime_warning("copy_from_user() size is not provably correct"); diff --git a/arch/tile/kernel/early_printk.c b/arch/tile/kernel/early_printk.c index afb9c9a0d887..34d72a151bf3 100644 --- a/arch/tile/kernel/early_printk.c +++ b/arch/tile/kernel/early_printk.c @@ -17,6 +17,7 @@ #include <linux/init.h> #include <linux/string.h> #include <linux/irqflags.h> +#include <linux/printk.h> #include <asm/setup.h> #include <hv/hypervisor.h> @@ -33,25 +34,8 @@ static struct console early_hv_console = { }; /* Direct interface for emergencies */ -static struct console *early_console = &early_hv_console; -static int early_console_initialized; static int early_console_complete; -static void early_vprintk(const char *fmt, va_list ap) -{ - char buf[512]; - int n = vscnprintf(buf, sizeof(buf), fmt, ap); - early_console->write(early_console, buf, n); -} - -void early_printk(const char *fmt, ...) -{ - va_list ap; - va_start(ap, fmt); - early_vprintk(fmt, ap); - va_end(ap); -} - void early_panic(const char *fmt, ...) { va_list ap; @@ -69,14 +53,13 @@ static int __initdata keep_early; static int __init setup_early_printk(char *str) { - if (early_console_initialized) + if (early_console) return 1; if (str != NULL && strncmp(str, "keep", 4) == 0) keep_early = 1; early_console = &early_hv_console; - early_console_initialized = 1; register_console(early_console); return 0; @@ -85,12 +68,12 @@ static int __init setup_early_printk(char *str) void __init disable_early_printk(void) { early_console_complete = 1; - if (!early_console_initialized || !early_console) + if (!early_console) return; if (!keep_early) { early_printk("disabling early console\n"); unregister_console(early_console); - early_console_initialized = 0; + early_console = NULL; } else { early_printk("keeping early console\n"); } @@ -98,7 +81,7 @@ void __init disable_early_printk(void) void warn_early_printk(void) { - if (early_console_complete || early_console_initialized) + if (early_console_complete || early_console) return; early_printk("\ Machine shutting down before console output is fully initialized.\n\ diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 80b2a18deb87..8ac304484f98 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -573,8 +573,7 @@ void show_regs(struct pt_regs *regs) int i; pr_err("\n"); - pr_err(" Pid: %d, comm: %20s, CPU: %d\n", - tsk->pid, tsk->comm, smp_processor_id()); + show_regs_print_info(KERN_ERR); #ifdef __tilegx__ for (i = 0; i < 51; i += 3) pr_err(" r%-2d: "REGFMT" r%-2d: "REGFMT" r%-2d: "REGFMT"\n", diff --git a/arch/tile/lib/uaccess.c b/arch/tile/lib/uaccess.c index f8d398c9ee7f..030abe3ee4f1 100644 --- a/arch/tile/lib/uaccess.c +++ b/arch/tile/lib/uaccess.c @@ -22,11 +22,3 @@ int __range_ok(unsigned long addr, unsigned long size) is_arch_mappable_range(addr, size)); } EXPORT_SYMBOL(__range_ok); - -#ifdef CONFIG_DEBUG_COPY_FROM_USER -void copy_from_user_overflow(void) -{ - WARN(1, "Buffer overflow detected!\n"); -} -EXPORT_SYMBOL(copy_from_user_overflow); -#endif diff --git a/arch/tile/mm/mmap.c b/arch/tile/mm/mmap.c index f96f4cec602a..d67d91ebf63e 100644 --- a/arch/tile/mm/mmap.c +++ b/arch/tile/mm/mmap.c @@ -66,10 +66,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm) if (!is_32bit || rlimit(RLIMIT_STACK) == RLIM_INFINITY) { mm->mmap_base = TASK_UNMAPPED_BASE; mm->get_unmapped_area = arch_get_unmapped_area; - mm->unmap_area = arch_unmap_area; } else { mm->mmap_base = mmap_base(mm); mm->get_unmapped_area = arch_get_unmapped_area_topdown; - mm->unmap_area = arch_unmap_area_topdown; } } diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c index b3b4972c2451..dfd63ce87327 100644 --- a/arch/tile/mm/pgtable.c +++ b/arch/tile/mm/pgtable.c @@ -592,12 +592,7 @@ void iounmap(volatile void __iomem *addr_in) in parallel. Reuse of the virtual address is prevented by leaving it in the global lists until we're done with it. cpa takes care of the direct mappings. */ - read_lock(&vmlist_lock); - for (p = vmlist; p; p = p->next) { - if (p->addr == addr) - break; - } - read_unlock(&vmlist_lock); + p = find_vm_area((void *)addr); if (!p) { pr_err("iounmap: bad address %p\n", addr); diff --git a/arch/um/kernel/early_printk.c b/arch/um/kernel/early_printk.c index 49480f092456..4a0800bc37b2 100644 --- a/arch/um/kernel/early_printk.c +++ b/arch/um/kernel/early_printk.c @@ -16,7 +16,7 @@ static void early_console_write(struct console *con, const char *s, unsigned int um_early_printk(s, n); } -static struct console early_console = { +static struct console early_console_dev = { .name = "earlycon", .write = early_console_write, .flags = CON_BOOT, @@ -25,8 +25,10 @@ static struct console early_console = { static int __init setup_early_printk(char *buf) { - register_console(&early_console); - + if (!early_console) { + early_console = &early_console_dev; + register_console(&early_console_dev); + } return 0; } diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index 5abcbfbe7e25..9df292b270a8 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -42,17 +42,12 @@ static unsigned long brk_end; static void setup_highmem(unsigned long highmem_start, unsigned long highmem_len) { - struct page *page; unsigned long highmem_pfn; int i; highmem_pfn = __pa(highmem_start) >> PAGE_SHIFT; - for (i = 0; i < highmem_len >> PAGE_SHIFT; i++) { - page = &mem_map[highmem_pfn + i]; - ClearPageReserved(page); - init_page_count(page); - __free_page(page); - } + for (i = 0; i < highmem_len >> PAGE_SHIFT; i++) + free_highmem_page(&mem_map[highmem_pfn + i]); } #endif @@ -73,18 +68,13 @@ void __init mem_init(void) totalram_pages = free_all_bootmem(); max_low_pfn = totalram_pages; #ifdef CONFIG_HIGHMEM - totalhigh_pages = highmem >> PAGE_SHIFT; - totalram_pages += totalhigh_pages; + setup_highmem(end_iomem, highmem); #endif num_physpages = totalram_pages; max_pfn = totalram_pages; printk(KERN_INFO "Memory: %luk available\n", nr_free_pages() << (PAGE_SHIFT-10)); kmalloc_ok = 1; - -#ifdef CONFIG_HIGHMEM - setup_highmem(end_iomem, highmem); -#endif } /* @@ -254,15 +244,7 @@ void free_initmem(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - if (start < end) - printk(KERN_INFO "Freeing initrd memory: %ldk freed\n", - (end - start) >> 10); - for (; start < end; start += PAGE_SIZE) { - ClearPageReserved(virt_to_page(start)); - init_page_count(virt_to_page(start)); - free_page(start); - totalram_pages++; - } + free_reserved_area(start, end, 0, "initrd"); } #endif diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c index e562ff80409a..7d101a2a1541 100644 --- a/arch/um/kernel/sysrq.c +++ b/arch/um/kernel/sysrq.c @@ -35,18 +35,6 @@ void show_trace(struct task_struct *task, unsigned long * stack) } #endif -/* - * stack dumps generator - this is used by arch-independent code. - * And this is identical to i386 currently. - */ -void dump_stack(void) -{ - unsigned long stack; - - show_trace(current, &stack); -} -EXPORT_SYMBOL(dump_stack); - /*Stolen from arch/i386/kernel/traps.c */ static const int kstack_depth_to_print = 24; diff --git a/arch/um/sys-ppc/sysrq.c b/arch/um/sys-ppc/sysrq.c index f889449f9285..1ff1ad7f27da 100644 --- a/arch/um/sys-ppc/sysrq.c +++ b/arch/um/sys-ppc/sysrq.c @@ -11,6 +11,8 @@ void show_regs(struct pt_regs_subarch *regs) { printk("\n"); + show_regs_print_info(KERN_DEFAULT); + printk("show_regs(): insert regs here.\n"); #if 0 printk("\n"); diff --git a/arch/unicore32/kernel/early_printk.c b/arch/unicore32/kernel/early_printk.c index 3922255f1fa8..9be0d5d02a9a 100644 --- a/arch/unicore32/kernel/early_printk.c +++ b/arch/unicore32/kernel/early_printk.c @@ -33,21 +33,17 @@ static struct console early_ocd_console = { .index = -1, }; -/* Direct interface for emergencies */ -static struct console *early_console = &early_ocd_console; - -static int __initdata keep_early; - static int __init setup_early_printk(char *buf) { - if (!buf) + int keep_early; + + if (!buf || early_console) return 0; if (strstr(buf, "keep")) keep_early = 1; - if (!strncmp(buf, "ocd", 3)) - early_console = &early_ocd_console; + early_console = &early_ocd_console; if (keep_early) early_console->flags &= ~CON_BOOT; diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c index 7fab86d7c5d4..c9447691bdac 100644 --- a/arch/unicore32/kernel/process.c +++ b/arch/unicore32/kernel/process.c @@ -144,11 +144,7 @@ void __show_regs(struct pt_regs *regs) unsigned long flags; char buf[64]; - printk(KERN_DEFAULT "CPU: %d %s (%s %.*s)\n", - raw_smp_processor_id(), print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); + show_regs_print_info(KERN_DEFAULT); print_symbol("PC is at %s\n", instruction_pointer(regs)); print_symbol("LR is at %s\n", regs->UCreg_lr); printk(KERN_DEFAULT "pc : [<%08lx>] lr : [<%08lx>] psr: %08lx\n" diff --git a/arch/unicore32/kernel/traps.c b/arch/unicore32/kernel/traps.c index 0870b68d2ad9..c54e32410ead 100644 --- a/arch/unicore32/kernel/traps.c +++ b/arch/unicore32/kernel/traps.c @@ -170,12 +170,6 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) c_backtrace(fp, mode); } -void dump_stack(void) -{ - dump_backtrace(NULL, NULL); -} -EXPORT_SYMBOL(dump_stack); - void show_stack(struct task_struct *tsk, unsigned long *sp) { dump_backtrace(NULL, tsk); diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c index de186bde8975..63df12d71ce3 100644 --- a/arch/unicore32/mm/init.c +++ b/arch/unicore32/mm/init.c @@ -66,6 +66,9 @@ void show_mem(unsigned int filter) printk(KERN_DEFAULT "Mem-info:\n"); show_free_areas(filter); + if (filter & SHOW_MEM_FILTER_PAGE_COUNT) + return; + for_each_bank(i, mi) { struct membank *bank = &mi->bank[i]; unsigned int pfn1, pfn2; @@ -313,24 +316,6 @@ void __init bootmem_init(void) max_pfn = max_high - PHYS_PFN_OFFSET; } -static inline int free_area(unsigned long pfn, unsigned long end, char *s) -{ - unsigned int pages = 0, size = (end - pfn) << (PAGE_SHIFT - 10); - - for (; pfn < end; pfn++) { - struct page *page = pfn_to_page(pfn); - ClearPageReserved(page); - init_page_count(page); - __free_page(page); - pages++; - } - - if (size && s) - printk(KERN_INFO "Freeing %s memory: %dK\n", s, size); - - return pages; -} - static inline void free_memmap(unsigned long start_pfn, unsigned long end_pfn) { @@ -404,9 +389,9 @@ void __init mem_init(void) max_mapnr = pfn_to_page(max_pfn + PHYS_PFN_OFFSET) - mem_map; - /* this will put all unused low memory onto the freelists */ free_unused_memmap(&meminfo); + /* this will put all unused low memory onto the freelists */ totalram_pages += free_all_bootmem(); reserved_pages = free_pages = 0; @@ -491,9 +476,7 @@ void __init mem_init(void) void free_initmem(void) { - totalram_pages += free_area(__phys_to_pfn(__pa(__init_begin)), - __phys_to_pfn(__pa(__init_end)), - "init"); + free_initmem_default(0); } #ifdef CONFIG_BLK_DEV_INITRD @@ -503,9 +486,7 @@ static int keep_initrd; void free_initrd_mem(unsigned long start, unsigned long end) { if (!keep_initrd) - totalram_pages += free_area(__phys_to_pfn(__pa(start)), - __phys_to_pfn(__pa(end)), - "initrd"); + free_reserved_area(start, end, 0, "initrd"); } static int __init keepinitrd_setup(char *__unused) diff --git a/arch/unicore32/mm/ioremap.c b/arch/unicore32/mm/ioremap.c index b7a605597b08..13068ee22f33 100644 --- a/arch/unicore32/mm/ioremap.c +++ b/arch/unicore32/mm/ioremap.c @@ -235,7 +235,7 @@ EXPORT_SYMBOL(__uc32_ioremap_cached); void __uc32_iounmap(volatile void __iomem *io_addr) { void *addr = (void *)(PAGE_MASK & (unsigned long)io_addr); - struct vm_struct **p, *tmp; + struct vm_struct *vm; /* * If this is a section based mapping we need to handle it @@ -244,17 +244,10 @@ void __uc32_iounmap(volatile void __iomem *io_addr) * all the mappings before the area can be reclaimed * by someone else. */ - write_lock(&vmlist_lock); - for (p = &vmlist ; (tmp = *p) ; p = &tmp->next) { - if ((tmp->flags & VM_IOREMAP) && (tmp->addr == addr)) { - if (tmp->flags & VM_UNICORE_SECTION_MAPPING) { - unmap_area_sections((unsigned long)tmp->addr, - tmp->size); - } - break; - } - } - write_unlock(&vmlist_lock); + vm = find_vm_area(addr); + if (vm && (vm->flags & VM_IOREMAP) && + (vm->flags & VM_UNICORE_SECTION_MAPPING)) + unmap_area_sections((unsigned long)vm->addr, vm->size); vunmap(addr); } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index dbd05558d06f..67b7dfc3094b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -20,6 +20,7 @@ config X86_64 ### Arch settings config X86 def_bool y + select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS select HAVE_AOUT if X86_32 select HAVE_UNSTABLE_SCHED_CLOCK select ARCH_SUPPORTS_NUMA_BALANCING @@ -64,6 +65,7 @@ config X86 select HAVE_KERNEL_LZMA select HAVE_KERNEL_XZ select HAVE_KERNEL_LZO + select HAVE_KERNEL_LZ4 select HAVE_HW_BREAKPOINT select HAVE_MIXED_BREAKPOINTS_REGS select PERF_EVENTS @@ -102,6 +104,7 @@ config X86 select HAVE_ARCH_SECCOMP_FILTER select BUILDTIME_EXTABLE_SORT select GENERIC_CMOS_UPDATE + select HAVE_ARCH_SOFT_DIRTY select CLOCKSOURCE_WATCHDOG select GENERIC_CLOCKEVENTS select ARCH_CLOCKSOURCE_DATA if X86_64 diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 16f738385dcb..c198b7e13e7b 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -292,20 +292,6 @@ config OPTIMIZE_INLINING If unsure, say N. -config DEBUG_STRICT_USER_COPY_CHECKS - bool "Strict copy size checks" - depends on DEBUG_KERNEL && !TRACE_BRANCH_PROFILING - ---help--- - Enabling this option turns a certain set of sanity checks for user - copy operations into compile time failures. - - The copy_from_user() etc checks are there to help test if there - are sufficient security checks on the length argument of - the copy operation, by having gcc prove that the argument is - within bounds. - - If unsure, or if you run an older (pre 4.4) gcc, say N. - config DEBUG_NMI_SELFTEST bool "NMI Selftest" depends on DEBUG_KERNEL && X86_LOCAL_APIC diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 5ef205c5f37b..dcd90df10ab4 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -4,7 +4,8 @@ # create a compressed vmlinux image from the original vmlinux # -targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma vmlinux.bin.xz vmlinux.bin.lzo +targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \ + vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4 KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 KBUILD_CFLAGS += -fno-strict-aliasing -fPIC @@ -63,12 +64,15 @@ $(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) FORCE $(call if_changed,xzkern) $(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE $(call if_changed,lzo) +$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) FORCE + $(call if_changed,lz4) suffix-$(CONFIG_KERNEL_GZIP) := gz suffix-$(CONFIG_KERNEL_BZIP2) := bz2 suffix-$(CONFIG_KERNEL_LZMA) := lzma suffix-$(CONFIG_KERNEL_XZ) := xz suffix-$(CONFIG_KERNEL_LZO) := lzo +suffix-$(CONFIG_KERNEL_LZ4) := lz4 quiet_cmd_mkpiggy = MKPIGGY $@ cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false ) diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 7cb56c6ca351..0319c88290a5 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -145,6 +145,10 @@ static int lines, cols; #include "../../../../lib/decompress_unlzo.c" #endif +#ifdef CONFIG_KERNEL_LZ4 +#include "../../../../lib/decompress_unlz4.c" +#endif + static void scroll(void) { int i; diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 03abf9b70011..0f9a4728a467 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -162,7 +162,6 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, fs = get_fs(); set_fs(KERNEL_DS); has_dumped = 1; - current->flags |= PF_DUMPCORE; strncpy(dump.u_comm, current->comm, sizeof(current->comm)); dump.u_ar0 = offsetof(struct user32, regs); dump.signal = signr; @@ -309,8 +308,6 @@ static int load_aout_binary(struct linux_binprm *bprm) (current->mm->start_data = N_DATADDR(ex)); current->mm->brk = ex.a_bss + (current->mm->start_brk = N_BSSADDR(ex)); - current->mm->free_area_cache = TASK_UNMAPPED_BASE; - current->mm->cached_hole_size = 0; retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT); if (retval < 0) { diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index 11e1152222d0..2f03ff018d36 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -37,7 +37,4 @@ do { \ #include <asm-generic/bug.h> - -extern void show_regs_common(void); - #endif /* _ASM_X86_BUG_H */ diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index cccd07fa5e3a..b8e9224f0b45 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h @@ -17,6 +17,8 @@ extern unsigned long pci_mem_start; extern int e820_any_mapped(u64 start, u64 end, unsigned type); extern int e820_all_mapped(u64 start, u64 end, unsigned type); extern void e820_add_region(u64 start, u64 size, int type); +extern void e820_add_limit_region(u64 start, u64 size, int type); +extern void e820_adjust_region(u64 *start, u64 *size); extern void e820_print_map(char *who); extern int sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, u32 *pnr_map); diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h index bdd35dbd0605..a8091216963b 100644 --- a/arch/x86/include/asm/hugetlb.h +++ b/arch/x86/include/asm/hugetlb.h @@ -2,6 +2,7 @@ #define _ASM_X86_HUGETLB_H #include <asm/page.h> +#include <asm-generic/hugetlb.h> static inline int is_hugepage_only_range(struct mm_struct *mm, diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 1e672234c4ff..ebf937362479 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -207,7 +207,7 @@ static inline pte_t pte_mkexec(pte_t pte) static inline pte_t pte_mkdirty(pte_t pte) { - return pte_set_flags(pte, _PAGE_DIRTY); + return pte_set_flags(pte, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); } static inline pte_t pte_mkyoung(pte_t pte) @@ -271,7 +271,7 @@ static inline pmd_t pmd_wrprotect(pmd_t pmd) static inline pmd_t pmd_mkdirty(pmd_t pmd) { - return pmd_set_flags(pmd, _PAGE_DIRTY); + return pmd_set_flags(pmd, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); } static inline pmd_t pmd_mkhuge(pmd_t pmd) @@ -294,6 +294,26 @@ static inline pmd_t pmd_mknotpresent(pmd_t pmd) return pmd_clear_flags(pmd, _PAGE_PRESENT); } +static inline int pte_soft_dirty(pte_t pte) +{ + return pte_flags(pte) & _PAGE_SOFT_DIRTY; +} + +static inline int pmd_soft_dirty(pmd_t pmd) +{ + return pmd_flags(pmd) & _PAGE_SOFT_DIRTY; +} + +static inline pte_t pte_mksoft_dirty(pte_t pte) +{ + return pte_set_flags(pte, _PAGE_SOFT_DIRTY); +} + +static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) +{ + return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY); +} + /* * Mask out unsupported bits in a present pgprot. Non-present pgprots * can use those bits for other purposes, so leave them be. diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index e6423002c10b..c98ac63aae48 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -55,6 +55,18 @@ #define _PAGE_HIDDEN (_AT(pteval_t, 0)) #endif +/* + * The same hidden bit is used by kmemcheck, but since kmemcheck + * works on kernel pages while soft-dirty engine on user space, + * they do not conflict with each other. + */ + +#ifdef CONFIG_MEM_SOFT_DIRTY +#define _PAGE_SOFT_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN) +#else +#define _PAGE_SOFT_DIRTY (_AT(pteval_t, 0)) +#endif + #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) #define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) #else diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 5f81bcefbe14..895f62e36ebb 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -44,10 +44,10 @@ obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o perf_event_amd_ibs.o obj-$(CONFIG_HYPERVISOR_GUEST) += vmware.o hypervisor.o mshyperv.o quiet_cmd_mkcapflags = MKCAP $@ - cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@ + cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $< $@ cpufeature = $(src)/../../include/asm/cpufeature.h targets += capflags.c -$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.pl FORCE +$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE $(call if_changed,mkcapflags) diff --git a/arch/x86/kernel/cpu/mkcapflags.pl b/arch/x86/kernel/cpu/mkcapflags.pl deleted file mode 100644 index 091972ef49de..000000000000 --- a/arch/x86/kernel/cpu/mkcapflags.pl +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/perl -w -# -# Generate the x86_cap_flags[] array from include/asm-x86/cpufeature.h -# - -($in, $out) = @ARGV; - -open(IN, "< $in\0") or die "$0: cannot open: $in: $!\n"; -open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n"; - -print OUT "#ifndef _ASM_X86_CPUFEATURE_H\n"; -print OUT "#include <asm/cpufeature.h>\n"; -print OUT "#endif\n"; -print OUT "\n"; -print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n"; - -%features = (); -$err = 0; - -while (defined($line = <IN>)) { - if ($line =~ /^\s*\#\s*define\s+(X86_FEATURE_(\S+))\s+(.*)$/) { - $macro = $1; - $feature = "\L$2"; - $tail = $3; - if ($tail =~ /\/\*\s*\"([^"]*)\".*\*\//) { - $feature = "\L$1"; - } - - next if ($feature eq ''); - - if ($features{$feature}++) { - print STDERR "$in: duplicate feature name: $feature\n"; - $err++; - } - printf OUT "\t%-32s = \"%s\",\n", "[$macro]", $feature; - } -} -print OUT "};\n"; - -close(IN); -close(OUT); - -if ($err) { - unlink($out); - exit(1); -} - -exit(0); diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh new file mode 100644 index 000000000000..2bf616505499 --- /dev/null +++ b/arch/x86/kernel/cpu/mkcapflags.sh @@ -0,0 +1,41 @@ +#!/bin/sh +# +# Generate the x86_cap_flags[] array from include/asm/cpufeature.h +# + +IN=$1 +OUT=$2 + +TABS="$(printf '\t\t\t\t\t')" +trap 'rm "$OUT"' EXIT + +( + echo "#ifndef _ASM_X86_CPUFEATURE_H" + echo "#include <asm/cpufeature.h>" + echo "#endif" + echo "" + echo "const char * const x86_cap_flags[NCAPINTS*32] = {" + + # Iterate through any input lines starting with #define X86_FEATURE_ + sed -n -e 's/\t/ /g' -e 's/^ *# *define *X86_FEATURE_//p' $IN | + while read i + do + # Name is everything up to the first whitespace + NAME="$(echo "$i" | sed 's/ .*//')" + + # If the /* comment */ starts with a quote string, grab that. + VALUE="$(echo "$i" | sed -n 's@.*/\* *\("[^"]*"\).*\*/@\1@p')" + [ -z "$VALUE" ] && VALUE="\"$NAME\"" + [ "$VALUE" == '""' ] && continue + + # Name is uppercase, VALUE is all lowercase + VALUE="$(echo "$VALUE" | tr A-Z a-z)" + + TABCOUNT=$(( ( 5*8 - 14 - $(echo "$NAME" | wc -c) ) / 8 )) + printf "\t[%s]%.*s = %s,\n" \ + "X86_FEATURE_$NAME" "$TABCOUNT" "$TABS" "$VALUE" + done + echo "};" +) > $OUT + +trap - EXIT diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index c8797d55b245..deb6421c9e69 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -176,26 +176,20 @@ void show_trace(struct task_struct *task, struct pt_regs *regs, void show_stack(struct task_struct *task, unsigned long *sp) { - show_stack_log_lvl(task, NULL, sp, 0, ""); -} - -/* - * The architecture-independent dump_stack generator - */ -void dump_stack(void) -{ - unsigned long bp; + unsigned long bp = 0; unsigned long stack; - bp = stack_frame(current, NULL); - printk("Pid: %d, comm: %.20s %s %s %.*s\n", - current->pid, current->comm, print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - show_trace(NULL, NULL, &stack, bp); + /* + * Stack frames below this one aren't interesting. Don't show them + * if we're printing for %current. + */ + if (!sp && (!task || task == current)) { + sp = &stack; + bp = stack_frame(current, NULL); + } + + show_stack_log_lvl(task, NULL, sp, bp, ""); } -EXPORT_SYMBOL(dump_stack); static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; static int die_owner = -1; diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 1038a417ea53..f2a1770ca176 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -86,11 +86,9 @@ void show_regs(struct pt_regs *regs) { int i; + show_regs_print_info(KERN_EMERG); __show_regs(regs, !user_mode_vm(regs)); - pr_emerg("Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n", - TASK_COMM_LEN, current->comm, task_pid_nr(current), - current_thread_info(), current, task_thread_info(current)); /* * When in-kernel, we also print out the stack and code at the * time of the fault.. diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index b653675d5288..addb207dab92 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -249,14 +249,10 @@ void show_regs(struct pt_regs *regs) { int i; unsigned long sp; - const int cpu = smp_processor_id(); - struct task_struct *cur = current; sp = regs->sp; - printk("CPU %d ", cpu); + show_regs_print_info(KERN_DEFAULT); __show_regs(regs, 1); - printk(KERN_DEFAULT "Process %s (pid: %d, threadinfo %p, task %p)\n", - cur->comm, cur->pid, task_thread_info(cur), cur); /* * When in-kernel, we also print out the stack and code at the diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index d32abeabbda5..0d5bb689649a 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -47,6 +47,7 @@ unsigned long pci_mem_start = 0xaeedbabe; #ifdef CONFIG_PCI EXPORT_SYMBOL(pci_mem_start); #endif +static u64 mem_limit = ~0ULL; /* * This function checks if any part of the range <start,end> is mapped @@ -108,7 +109,7 @@ int __init e820_all_mapped(u64 start, u64 end, unsigned type) * Add a memory region to the kernel e820 map. */ static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size, - int type) + int type, bool limited) { int x = e820x->nr_map; @@ -119,6 +120,22 @@ static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size, return; } + if (limited) { + if (start >= mem_limit) { + printk(KERN_ERR "e820: ignoring [mem %#010llx-%#010llx]\n", + (unsigned long long)start, + (unsigned long long)(start + size - 1)); + return; + } + + if (mem_limit - start < size) { + printk(KERN_ERR "e820: ignoring [mem %#010llx-%#010llx]\n", + (unsigned long long)mem_limit, + (unsigned long long)(start + size - 1)); + size = mem_limit - start; + } + } + e820x->map[x].addr = start; e820x->map[x].size = size; e820x->map[x].type = type; @@ -127,7 +144,37 @@ static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size, void __init e820_add_region(u64 start, u64 size, int type) { - __e820_add_region(&e820, start, size, type); + __e820_add_region(&e820, start, size, type, false); +} + +/* + * do_add_efi_memmap() calls this function(). + * + * Note: BOOT_SERVICES_{CODE,DATA} regions on some efi machines are marked + * as E820_RAM, and they are needed to be mapped. Please use e820_add_region() + * to add BOOT_SERVICES_{CODE,DATA} regions. + */ +void __init e820_add_limit_region(u64 start, u64 size, int type) +{ + /* + * efi_init() is called after finish_e820_parsing(), so we should + * check whether [start, start + size) contains address above + * mem_limit if the type is E820_RAM. + */ + __e820_add_region(&e820, start, size, type, type == E820_RAM); +} + +void __init e820_adjust_region(u64 *start, u64 *size) +{ + if (*start >= mem_limit) { + *size = 0; + return; + } + + if (mem_limit - *start < *size) + *size = mem_limit - *start; + + return; } static void __init e820_print_type(u32 type) @@ -455,8 +502,9 @@ static u64 __init __e820_update_range(struct e820map *e820x, u64 start, /* new range is totally covered? */ if (ei->addr < start && ei_end > end) { - __e820_add_region(e820x, start, size, new_type); - __e820_add_region(e820x, end, ei_end - end, ei->type); + __e820_add_region(e820x, start, size, new_type, false); + __e820_add_region(e820x, end, ei_end - end, ei->type, + false); ei->size = start - ei->addr; real_updated_size += size; continue; @@ -469,7 +517,7 @@ static u64 __init __e820_update_range(struct e820map *e820x, u64 start, continue; __e820_add_region(e820x, final_start, final_end - final_start, - new_type); + new_type, false); real_updated_size += final_end - final_start; @@ -809,7 +857,7 @@ static int userdef __initdata; /* "mem=nopentium" disables the 4MB page tables. */ static int __init parse_memopt(char *p) { - u64 mem_size; + char *oldp; if (!p) return -EINVAL; @@ -825,11 +873,11 @@ static int __init parse_memopt(char *p) } userdef = 1; - mem_size = memparse(p, &p); + oldp = p; + mem_limit = memparse(p, &p); /* don't remove all of memory when handling "mem={invalid}" param */ - if (mem_size == 0) + if (mem_limit == 0 || p == oldp) return -EINVAL; - e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1); return 0; } @@ -895,6 +943,12 @@ early_param("memmap", parse_memmap_opt); void __init finish_e820_parsing(void) { + if (mem_limit != ~0ULL) { + userdef = 1; + e820_remove_range(mem_limit, ULLONG_MAX - mem_limit, + E820_RAM, 1); + } + if (userdef) { u32 nr = e820.nr_map; diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 9b9f18b49918..d15f575a861b 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -169,25 +169,9 @@ static struct console early_serial_console = { .index = -1, }; -/* Direct interface for emergencies */ -static struct console *early_console = &early_vga_console; -static int __initdata early_console_initialized; - -asmlinkage void early_printk(const char *fmt, ...) -{ - char buf[512]; - int n; - va_list ap; - - va_start(ap, fmt); - n = vscnprintf(buf, sizeof(buf), fmt, ap); - early_console->write(early_console, buf, n); - va_end(ap); -} - static inline void early_console_register(struct console *con, int keep_early) { - if (early_console->index != -1) { + if (con->index != -1) { printk(KERN_CRIT "ERROR: earlyprintk= %s already used\n", con->name); return; @@ -207,9 +191,8 @@ static int __init setup_early_printk(char *buf) if (!buf) return 0; - if (early_console_initialized) + if (early_console) return 0; - early_console_initialized = 1; keep = (strstr(buf, "keep") != NULL); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 6833bffaadb7..f30f424d50ce 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -121,30 +121,6 @@ void exit_thread(void) drop_fpu(me); } -void show_regs_common(void) -{ - const char *vendor, *product, *board; - - vendor = dmi_get_system_info(DMI_SYS_VENDOR); - if (!vendor) - vendor = ""; - product = dmi_get_system_info(DMI_PRODUCT_NAME); - if (!product) - product = ""; - - /* Board Name is optional */ - board = dmi_get_system_info(DMI_BOARD_NAME); - - printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s %s %s%s%s\n", - current->pid, current->comm, print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version, - vendor, product, - board ? "/" : "", - board ? board : ""); -} - void flush_thread(void) { struct task_struct *tsk = current; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index b5a8905785e6..7305f7dfc7ab 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -84,8 +84,6 @@ void __show_regs(struct pt_regs *regs, int all) savesegment(gs, gs); } - show_regs_common(); - printk(KERN_DEFAULT "EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", (u16)regs->cs, regs->ip, regs->flags, smp_processor_id()); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 0f49677da51e..355ae06dbf94 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -62,7 +62,6 @@ void __show_regs(struct pt_regs *regs, int all) unsigned int fsindex, gsindex; unsigned int ds, cs, es; - show_regs_common(); printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); printk_address(regs->ip, 1); printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 90d8cc930f5e..91b9e7ce4fa7 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -970,6 +970,7 @@ void __init setup_arch(char **cmdline_p) efi_init(); dmi_scan_machine(); + dmi_set_dump_stack_arch_desc(); /* * VMware detection requires dmi to be available, so this diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index f0312d746402..3eb18acd0e40 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -689,9 +689,3 @@ _copy_from_user(void *to, const void __user *from, unsigned long n) return n; } EXPORT_SYMBOL(_copy_from_user); - -void copy_from_user_overflow(void) -{ - WARN(1, "Buffer overflow detected!\n"); -} -EXPORT_SYMBOL(copy_from_user_overflow); diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 6f31ee56c008..252b8f5489ba 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -137,5 +137,4 @@ void __init set_highmem_pages_init(void) add_highpages_with_active_regions(nid, zone_start_pfn, zone_end_pfn); } - totalram_pages += totalhigh_pages; } diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 59b7fc453277..fdc5dca14fb3 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -515,11 +515,8 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); for (; addr < end; addr += PAGE_SIZE) { - ClearPageReserved(virt_to_page(addr)); - init_page_count(virt_to_page(addr)); memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); - free_page(addr); - totalram_pages++; + free_reserved_page(virt_to_page(addr)); } #endif } diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 2d19001151d5..3ac7e319918d 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -427,14 +427,6 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base) pkmap_page_table = pte; } -static void __init add_one_highpage_init(struct page *page) -{ - ClearPageReserved(page); - init_page_count(page); - __free_page(page); - totalhigh_pages++; -} - void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn, unsigned long end_pfn) { @@ -448,7 +440,7 @@ void __init add_highpages_with_active_regions(int nid, start_pfn, end_pfn); for ( ; pfn < e_pfn; pfn++) if (pfn_valid(pfn)) - add_one_highpage_init(pfn_to_page(pfn)); + free_highmem_page(pfn_to_page(pfn)); } } #else diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index dafdeb2a5938..caad9a0ee19f 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1011,11 +1011,8 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct) flush_tlb_all(); } -void __ref vmemmap_free(struct page *memmap, unsigned long nr_pages) +void __ref vmemmap_free(unsigned long start, unsigned long end) { - unsigned long start = (unsigned long)memmap; - unsigned long end = (unsigned long)(memmap + nr_pages); - remove_pagetable(start, end, false); } @@ -1067,10 +1064,9 @@ void __init mem_init(void) /* clear_bss() already clear the empty_zero_page */ - reservedpages = 0; - - /* this will put all low memory onto the freelists */ register_page_bootmem_info(); + + /* this will put all memory onto the freelists */ totalram_pages = free_all_bootmem(); absent_pages = absent_pages_in_range(0, max_pfn); @@ -1285,18 +1281,17 @@ static long __meminitdata addr_start, addr_end; static void __meminitdata *p_start, *p_end; static int __meminitdata node_start; -int __meminit -vmemmap_populate(struct page *start_page, unsigned long size, int node) +static int __meminit vmemmap_populate_hugepages(unsigned long start, + unsigned long end, int node) { - unsigned long addr = (unsigned long)start_page; - unsigned long end = (unsigned long)(start_page + size); + unsigned long addr; unsigned long next; pgd_t *pgd; pud_t *pud; pmd_t *pmd; - for (; addr < end; addr = next) { - void *p = NULL; + for (addr = start; addr < end; addr = next) { + next = pmd_addr_end(addr, end); pgd = vmemmap_pgd_populate(addr, node); if (!pgd) @@ -1306,31 +1301,14 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node) if (!pud) return -ENOMEM; - if (!cpu_has_pse) { - next = (addr + PAGE_SIZE) & PAGE_MASK; - pmd = vmemmap_pmd_populate(pud, addr, node); - - if (!pmd) - return -ENOMEM; - - p = vmemmap_pte_populate(pmd, addr, node); + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) { + void *p; - if (!p) - return -ENOMEM; - - addr_end = addr + PAGE_SIZE; - p_end = p + PAGE_SIZE; - } else { - next = pmd_addr_end(addr, end); - - pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) { + p = vmemmap_alloc_block_buf(PMD_SIZE, node); + if (p) { pte_t entry; - p = vmemmap_alloc_block_buf(PMD_SIZE, node); - if (!p) - return -ENOMEM; - entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL_LARGE); set_pmd(pmd, __pmd(pte_val(entry))); @@ -1347,15 +1325,32 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node) addr_end = addr + PMD_SIZE; p_end = p + PMD_SIZE; - } else - vmemmap_verify((pte_t *)pmd, node, addr, next); + continue; + } + } else if (pmd_large(*pmd)) { + vmemmap_verify((pte_t *)pmd, node, addr, next); + continue; } - + pr_warn_once("vmemmap: falling back to regular page backing\n"); + if (vmemmap_populate_basepages(addr, next, node)) + return -ENOMEM; } - sync_global_pgds((unsigned long)start_page, end - 1); return 0; } +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +{ + int err; + + if (cpu_has_pse) + err = vmemmap_populate_hugepages(start, end, node); + else + err = vmemmap_populate_basepages(start, end, node); + if (!err) + sync_global_pgds(start, end - 1); + return err; +} + #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE) void register_page_bootmem_memmap(unsigned long section_nr, struct page *start_page, unsigned long size) diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 78fe3f1ac49f..9a1e6583910c 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -282,12 +282,7 @@ void iounmap(volatile void __iomem *addr) in parallel. Reuse of the virtual address is prevented by leaving it in the global lists until we're done with it. cpa takes care of the direct mappings. */ - read_lock(&vmlist_lock); - for (p = vmlist; p; p = p->next) { - if (p->addr == (void __force *)addr) - break; - } - read_unlock(&vmlist_lock); + p = find_vm_area((void __force *)addr); if (!p) { printk(KERN_ERR "iounmap: bad address %p\n", addr); diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 845df6835f9f..62c29a5bfe26 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -115,10 +115,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm) if (mmap_is_legacy()) { mm->mmap_base = mmap_legacy_base(); mm->get_unmapped_area = arch_get_unmapped_area; - mm->unmap_area = arch_unmap_area; } else { mm->mmap_base = mmap_base(); mm->get_unmapped_area = arch_get_unmapped_area_topdown; - mm->unmap_area = arch_unmap_area_topdown; } } diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 72fe01e9e414..a71c4e207679 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -114,14 +114,11 @@ void numa_clear_node(int cpu) */ void __init setup_node_to_cpumask_map(void) { - unsigned int node, num = 0; + unsigned int node; /* setup nr_node_ids if not done yet */ - if (nr_node_ids == MAX_NUMNODES) { - for_each_node_mask(node, node_possible_map) - num = node; - nr_node_ids = num + 1; - } + if (nr_node_ids == MAX_NUMNODES) + setup_nr_node_ids(); /* allocate the map */ for (node = 0; node < nr_node_ids; node++) diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c index 0e38951e65eb..d0b1773d9d2e 100644 --- a/arch/x86/mm/pageattr-test.c +++ b/arch/x86/mm/pageattr-test.c @@ -130,13 +130,12 @@ static int pageattr_test(void) } failed += print_split(&sa); - srandom32(100); for (i = 0; i < NTEST; i++) { - unsigned long pfn = random32() % max_pfn_mapped; + unsigned long pfn = prandom_u32() % max_pfn_mapped; addr[i] = (unsigned long)__va(pfn << PAGE_SHIFT); - len[i] = random32() % 100; + len[i] = prandom_u32() % 100; len[i] = min_t(unsigned long, len[i], max_pfn_mapped - pfn - 1); if (len[i] == 0) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 4b70be21fe0a..1c1880e947b5 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -324,10 +324,17 @@ static void __init do_add_efi_memmap(void) int e820_type; switch (md->type) { - case EFI_LOADER_CODE: - case EFI_LOADER_DATA: case EFI_BOOT_SERVICES_CODE: case EFI_BOOT_SERVICES_DATA: + /* EFI_BOOT_SERVICES_{CODE,DATA} needs to be mapped */ + if (md->attribute & EFI_MEMORY_WB) + e820_type = E820_RAM; + else + e820_type = E820_RESERVED; + e820_add_region(start, size, e820_type); + continue; + case EFI_LOADER_CODE: + case EFI_LOADER_DATA: case EFI_CONVENTIONAL_MEMORY: if (md->attribute & EFI_MEMORY_WB) e820_type = E820_RAM; @@ -352,7 +359,7 @@ static void __init do_add_efi_memmap(void) e820_type = E820_RESERVED; break; } - e820_add_region(start, size, e820_type); + e820_add_limit_region(start, size, e820_type); } sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); } @@ -460,6 +467,8 @@ void __init efi_free_boot_services(void) md->type != EFI_BOOT_SERVICES_DATA) continue; + e820_adjust_region(&start, &size); + /* Could not reserve boot area */ if (!size) continue; diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index 923db5c15278..458186dab5dc 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -383,6 +383,8 @@ void show_regs(struct pt_regs * regs) { int i, wmask; + show_regs_print_info(KERN_DEFAULT); + wmask = regs->wmask & ~1; for (i = 0; i < 16; i++) { @@ -481,14 +483,6 @@ void show_stack(struct task_struct *task, unsigned long *sp) show_trace(task, stack); } -void dump_stack(void) -{ - show_stack(current, NULL); -} - -EXPORT_SYMBOL(dump_stack); - - void show_code(unsigned int *pc) { long i; diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index 7a5156ffebb6..bba125b4bb06 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -208,32 +208,17 @@ void __init mem_init(void) highmemsize >> 10); } -void -free_reserved_mem(void *start, void *end) -{ - for (; start < end; start += PAGE_SIZE) { - ClearPageReserved(virt_to_page(start)); - init_page_count(virt_to_page(start)); - free_page((unsigned long)start); - totalram_pages++; - } -} - #ifdef CONFIG_BLK_DEV_INITRD extern int initrd_is_mapped; void free_initrd_mem(unsigned long start, unsigned long end) { - if (initrd_is_mapped) { - free_reserved_mem((void*)start, (void*)end); - printk ("Freeing initrd memory: %ldk freed\n",(end-start)>>10); - } + if (initrd_is_mapped) + free_reserved_area(start, end, 0, "initrd"); } #endif void free_initmem(void) { - free_reserved_mem(__init_begin, __init_end); - printk("Freeing unused kernel memory: %zuk freed\n", - (__init_end - __init_begin) >> 10); + free_initmem_default(0); } diff --git a/block/blk-core.c b/block/blk-core.c index 33c33bc99ddd..94aa4e75d626 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -153,7 +153,8 @@ void blk_rq_init(struct request_queue *q, struct request *rq) EXPORT_SYMBOL(blk_rq_init); static void req_bio_endio(struct request *rq, struct bio *bio, - unsigned int nbytes, int error) + unsigned int nbytes, int error, + struct batch_complete *batch) { if (error) clear_bit(BIO_UPTODATE, &bio->bi_flags); @@ -167,7 +168,7 @@ static void req_bio_endio(struct request *rq, struct bio *bio, /* don't actually finish bio if it's part of flush sequence */ if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ)) - bio_endio(bio, error); + bio_endio_batch(bio, error, batch); } void blk_dump_rq_flags(struct request *rq, char *msg) @@ -2281,7 +2282,8 @@ EXPORT_SYMBOL(blk_fetch_request); * %false - this request doesn't have any more data * %true - this request has more data **/ -bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) +bool blk_update_request(struct request *req, int error, unsigned int nr_bytes, + struct batch_complete *batch) { int total_bytes; @@ -2337,7 +2339,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) if (bio_bytes == bio->bi_size) req->bio = bio->bi_next; - req_bio_endio(req, bio, bio_bytes, error); + req_bio_endio(req, bio, bio_bytes, error, batch); total_bytes += bio_bytes; nr_bytes -= bio_bytes; @@ -2390,14 +2392,15 @@ EXPORT_SYMBOL_GPL(blk_update_request); static bool blk_update_bidi_request(struct request *rq, int error, unsigned int nr_bytes, - unsigned int bidi_bytes) + unsigned int bidi_bytes, + struct batch_complete *batch) { - if (blk_update_request(rq, error, nr_bytes)) + if (blk_update_request(rq, error, nr_bytes, batch)) return true; /* Bidi request must be completed as a whole */ if (unlikely(blk_bidi_rq(rq)) && - blk_update_request(rq->next_rq, error, bidi_bytes)) + blk_update_request(rq->next_rq, error, bidi_bytes, batch)) return true; if (blk_queue_add_random(rq->q)) @@ -2480,7 +2483,7 @@ static bool blk_end_bidi_request(struct request *rq, int error, struct request_queue *q = rq->q; unsigned long flags; - if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) + if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes, NULL)) return true; spin_lock_irqsave(q->queue_lock, flags); @@ -2506,9 +2509,11 @@ static bool blk_end_bidi_request(struct request *rq, int error, * %true - still buffers pending for this request **/ bool __blk_end_bidi_request(struct request *rq, int error, - unsigned int nr_bytes, unsigned int bidi_bytes) + unsigned int nr_bytes, + unsigned int bidi_bytes, + struct batch_complete *batch) { - if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) + if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes, batch)) return true; blk_finish_request(rq, error); @@ -2609,7 +2614,7 @@ EXPORT_SYMBOL_GPL(blk_end_request_err); **/ bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes) { - return __blk_end_bidi_request(rq, error, nr_bytes, 0); + return __blk_end_bidi_request(rq, error, nr_bytes, 0, NULL); } EXPORT_SYMBOL(__blk_end_request); @@ -2621,7 +2626,8 @@ EXPORT_SYMBOL(__blk_end_request); * Description: * Completely finish @rq. Must be called with queue lock held. */ -void __blk_end_request_all(struct request *rq, int error) +void blk_end_request_all_batch(struct request *rq, int error, + struct batch_complete *batch) { bool pending; unsigned int bidi_bytes = 0; @@ -2629,10 +2635,11 @@ void __blk_end_request_all(struct request *rq, int error) if (unlikely(blk_bidi_rq(rq))) bidi_bytes = blk_rq_bytes(rq->next_rq); - pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes); + pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), + bidi_bytes, batch); BUG_ON(pending); } -EXPORT_SYMBOL(__blk_end_request_all); +EXPORT_SYMBOL(blk_end_request_all_batch); /** * __blk_end_request_cur - Helper function to finish the current request chunk. diff --git a/block/blk-flush.c b/block/blk-flush.c index cc2b827a853c..ab0ed2358947 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -316,7 +316,7 @@ void blk_insert_flush(struct request *rq) * complete the request. */ if (!policy) { - __blk_end_bidi_request(rq, 0, 0, 0); + __blk_end_bidi_request(rq, 0, 0, 0, NULL); return; } @@ -384,7 +384,8 @@ void blk_abort_flushes(struct request_queue *q) } } -static void bio_end_flush(struct bio *bio, int err) +static void bio_end_flush(struct bio *bio, int err, + struct batch_complete *batch) { if (err) clear_bit(BIO_UPTODATE, &bio->bi_flags); diff --git a/block/blk-lib.c b/block/blk-lib.c index d6f50d572565..279f9de415be 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -15,7 +15,8 @@ struct bio_batch { struct completion *wait; }; -static void bio_batch_end_io(struct bio *bio, int err) +static void bio_batch_end_io(struct bio *bio, int err, + struct batch_complete *batch) { struct bio_batch *bb = bio->bi_private; diff --git a/block/blk.h b/block/blk.h index e837b8f619b7..dc8fee6d41d6 100644 --- a/block/blk.h +++ b/block/blk.h @@ -31,7 +31,8 @@ void blk_queue_bypass_end(struct request_queue *q); void blk_dequeue_request(struct request *rq); void __blk_queue_free_tags(struct request_queue *q); bool __blk_end_bidi_request(struct request *rq, int error, - unsigned int nr_bytes, unsigned int bidi_bytes); + unsigned int nr_bytes, unsigned int bidi_bytes, + struct batch_complete *batch); void blk_rq_timed_out_timer(unsigned long data); void blk_delete_timer(struct request *); diff --git a/block/genhd.c b/block/genhd.c index 20625eed5511..5a9f8931b0e6 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -849,7 +849,7 @@ static int show_partition(struct seq_file *seqf, void *v) char buf[BDEVNAME_SIZE]; /* Don't show non-partitionable removeable devices or empty devices */ - if (!get_capacity(sgp) || (!disk_max_parts(sgp) && + if (!get_capacity(sgp) || (!(disk_max_parts(sgp) > 1) && (sgp->flags & GENHD_FL_REMOVABLE))) return 0; if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO) diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 9a87daa6f4fb..a5ffcc988f0b 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -27,6 +27,7 @@ #include <linux/ratelimit.h> #include <linux/slab.h> #include <linux/times.h> +#include <linux/uio.h> #include <asm/uaccess.h> #include <scsi/scsi.h> diff --git a/crypto/Kconfig b/crypto/Kconfig index 0e7a23723b45..7b3418a7c80f 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1271,6 +1271,22 @@ config CRYPTO_842 help This is the 842 algorithm. +config CRYPTO_LZ4 + tristate "LZ4 compression algorithm" + select CRYPTO_ALGAPI + select LZ4_COMPRESS + select LZ4_DECOMPRESS + help + This is the LZ4 algorithm. + +config CRYPTO_LZ4HC + tristate "LZ4HC compression algorithm" + select CRYPTO_ALGAPI + select LZ4HC_COMPRESS + select LZ4_DECOMPRESS + help + This is the LZ4 high compression mode algorithm. + comment "Random Number Generation" config CRYPTO_ANSI_CPRNG diff --git a/crypto/Makefile b/crypto/Makefile index a8e9b0fefbe9..2ba0df2f908f 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -85,6 +85,8 @@ obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o obj-$(CONFIG_CRYPTO_CRC32) += crc32.o obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o obj-$(CONFIG_CRYPTO_LZO) += lzo.o +obj-$(CONFIG_CRYPTO_LZ4) += lz4.o +obj-$(CONFIG_CRYPTO_LZ4HC) += lz4hc.o obj-$(CONFIG_CRYPTO_842) += 842.o obj-$(CONFIG_CRYPTO_RNG2) += rng.o obj-$(CONFIG_CRYPTO_RNG2) += krng.o diff --git a/crypto/async_tx/raid6test.c b/crypto/async_tx/raid6test.c index aa2b0270ed16..4a92bac744dc 100644 --- a/crypto/async_tx/raid6test.c +++ b/crypto/async_tx/raid6test.c @@ -46,15 +46,10 @@ static void callback(void *param) static void makedata(int disks) { - int i, j; + int i; for (i = 0; i < disks; i++) { - for (j = 0; j < PAGE_SIZE/sizeof(u32); j += sizeof(u32)) { - u32 *p = page_address(data[i]) + j; - - *p = random32(); - } - + prandom_bytes(page_address(data[i]), PAGE_SIZE); dataptrs[i] = data[i]; } } diff --git a/crypto/lz4.c b/crypto/lz4.c new file mode 100644 index 000000000000..4586dd15b0d8 --- /dev/null +++ b/crypto/lz4.c @@ -0,0 +1,106 @@ +/* + * Cryptographic API. + * + * Copyright (c) 2013 Chanho Min <chanho.min@lge.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/crypto.h> +#include <linux/vmalloc.h> +#include <linux/lz4.h> + +struct lz4_ctx { + void *lz4_comp_mem; +}; + +static int lz4_init(struct crypto_tfm *tfm) +{ + struct lz4_ctx *ctx = crypto_tfm_ctx(tfm); + + ctx->lz4_comp_mem = vmalloc(LZ4_MEM_COMPRESS); + if (!ctx->lz4_comp_mem) + return -ENOMEM; + + return 0; +} + +static void lz4_exit(struct crypto_tfm *tfm) +{ + struct lz4_ctx *ctx = crypto_tfm_ctx(tfm); + vfree(ctx->lz4_comp_mem); +} + +static int lz4_compress_crypto(struct crypto_tfm *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen) +{ + struct lz4_ctx *ctx = crypto_tfm_ctx(tfm); + size_t tmp_len = *dlen; + int err; + + err = lz4_compress(src, slen, dst, &tmp_len, ctx->lz4_comp_mem); + + if (err < 0) + return -EINVAL; + + *dlen = tmp_len; + return 0; +} + +static int lz4_decompress_crypto(struct crypto_tfm *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen) +{ + int err; + size_t tmp_len = *dlen; + size_t __slen = slen; + + err = lz4_decompress(src, &__slen, dst, tmp_len); + if (err < 0) + return -EINVAL; + + *dlen = tmp_len; + return err; +} + +static struct crypto_alg alg_lz4 = { + .cra_name = "lz4", + .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, + .cra_ctxsize = sizeof(struct lz4_ctx), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(alg_lz4.cra_list), + .cra_init = lz4_init, + .cra_exit = lz4_exit, + .cra_u = { .compress = { + .coa_compress = lz4_compress_crypto, + .coa_decompress = lz4_decompress_crypto } } +}; + +static int __init lz4_mod_init(void) +{ + return crypto_register_alg(&alg_lz4); +} + +static void __exit lz4_mod_fini(void) +{ + crypto_unregister_alg(&alg_lz4); +} + +module_init(lz4_mod_init); +module_exit(lz4_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZ4 Compression Algorithm"); diff --git a/crypto/lz4hc.c b/crypto/lz4hc.c new file mode 100644 index 000000000000..151ba31d34e3 --- /dev/null +++ b/crypto/lz4hc.c @@ -0,0 +1,106 @@ +/* + * Cryptographic API. + * + * Copyright (c) 2013 Chanho Min <chanho.min@lge.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ +#include <linux/init.h> +#include <linux/module.h> +#include <linux/crypto.h> +#include <linux/vmalloc.h> +#include <linux/lz4.h> + +struct lz4hc_ctx { + void *lz4hc_comp_mem; +}; + +static int lz4hc_init(struct crypto_tfm *tfm) +{ + struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm); + + ctx->lz4hc_comp_mem = vmalloc(LZ4HC_MEM_COMPRESS); + if (!ctx->lz4hc_comp_mem) + return -ENOMEM; + + return 0; +} + +static void lz4hc_exit(struct crypto_tfm *tfm) +{ + struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm); + + vfree(ctx->lz4hc_comp_mem); +} + +static int lz4hc_compress_crypto(struct crypto_tfm *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen) +{ + struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm); + size_t tmp_len = *dlen; + int err; + + err = lz4hc_compress(src, slen, dst, &tmp_len, ctx->lz4hc_comp_mem); + + if (err < 0) + return -EINVAL; + + *dlen = tmp_len; + return 0; +} + +static int lz4hc_decompress_crypto(struct crypto_tfm *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen) +{ + int err; + size_t tmp_len = *dlen; + size_t __slen = slen; + + err = lz4_decompress(src, &__slen, dst, tmp_len); + if (err < 0) + return -EINVAL; + + *dlen = tmp_len; + return err; +} + +static struct crypto_alg alg_lz4hc = { + .cra_name = "lz4hc", + .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, + .cra_ctxsize = sizeof(struct lz4hc_ctx), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(alg_lz4hc.cra_list), + .cra_init = lz4hc_init, + .cra_exit = lz4hc_exit, + .cra_u = { .compress = { + .coa_compress = lz4hc_compress_crypto, + .coa_decompress = lz4hc_decompress_crypto } } +}; + +static int __init lz4hc_mod_init(void) +{ + return crypto_register_alg(&alg_lz4hc); +} + +static void __exit lz4hc_mod_fini(void) +{ + crypto_unregister_alg(&alg_lz4hc); +} + +module_init(lz4hc_mod_init); +module_exit(lz4hc_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZ4HC Compression Algorithm"); diff --git a/drivers/Kconfig b/drivers/Kconfig index 1d855bfce4da..9953a42809ec 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -120,6 +120,8 @@ source "drivers/vfio/Kconfig" source "drivers/vlynq/Kconfig" +source "drivers/virt/Kconfig" + source "drivers/virtio/Kconfig" source "drivers/hv/Kconfig" @@ -144,8 +146,6 @@ source "drivers/remoteproc/Kconfig" source "drivers/rpmsg/Kconfig" -source "drivers/virt/Kconfig" - source "drivers/devfreq/Kconfig" source "drivers/extcon/Kconfig" diff --git a/drivers/base/memory.c b/drivers/base/memory.c index a51007b79032..14f8a6954da0 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -93,16 +93,6 @@ int register_memory(struct memory_block *memory) return error; } -static void -unregister_memory(struct memory_block *memory) -{ - BUG_ON(memory->dev.bus != &memory_subsys); - - /* drop the ref. we got in remove_memory_block() */ - kobject_put(&memory->dev.kobj); - device_unregister(&memory->dev); -} - unsigned long __weak memory_block_size_bytes(void) { return MIN_MEMORY_BLOCK_SIZE; @@ -217,8 +207,7 @@ int memory_isolate_notify(unsigned long val, void *v) * The probe routines leave the pages reserved, just as the bootmem code does. * Make sure they're still that way. */ -static bool pages_correctly_reserved(unsigned long start_pfn, - unsigned long nr_pages) +static bool pages_correctly_reserved(unsigned long start_pfn) { int i, j; struct page *page; @@ -266,7 +255,7 @@ memory_block_action(unsigned long phys_index, unsigned long action, int online_t switch (action) { case MEM_ONLINE: - if (!pages_correctly_reserved(start_pfn, nr_pages)) + if (!pages_correctly_reserved(start_pfn)) return -EBUSY; ret = online_pages(start_pfn, nr_pages, online_type); @@ -637,8 +626,28 @@ static int add_memory_section(int nid, struct mem_section *section, return ret; } -int remove_memory_block(unsigned long node_id, struct mem_section *section, - int phys_device) +/* + * need an interface for the VM to add new memory regions, + * but without onlining it. + */ +int register_new_memory(int nid, struct mem_section *section) +{ + return add_memory_section(nid, section, NULL, MEM_OFFLINE, HOTPLUG); +} + +#ifdef CONFIG_MEMORY_HOTREMOVE +static void +unregister_memory(struct memory_block *memory) +{ + BUG_ON(memory->dev.bus != &memory_subsys); + + /* drop the ref. we got in remove_memory_block() */ + kobject_put(&memory->dev.kobj); + device_unregister(&memory->dev); +} + +static int remove_memory_block(unsigned long node_id, + struct mem_section *section, int phys_device) { struct memory_block *mem; @@ -661,15 +670,6 @@ int remove_memory_block(unsigned long node_id, struct mem_section *section, return 0; } -/* - * need an interface for the VM to add new memory regions, - * but without onlining it. - */ -int register_new_memory(int nid, struct mem_section *section) -{ - return add_memory_section(nid, section, NULL, MEM_OFFLINE, HOTPLUG); -} - int unregister_memory_section(struct mem_section *section) { if (!present_section(section)) @@ -677,6 +677,7 @@ int unregister_memory_section(struct mem_section *section) return remove_memory_block(0, section, 0); } +#endif /* CONFIG_MEMORY_HOTREMOVE */ /* * offline one memory block. If the memory block has been offlined, do nothing. diff --git a/drivers/base/node.c b/drivers/base/node.c index fac124a7e1c5..7616a77ca322 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -7,6 +7,7 @@ #include <linux/mm.h> #include <linux/memory.h> #include <linux/vmstat.h> +#include <linux/notifier.h> #include <linux/node.h> #include <linux/hugetlb.h> #include <linux/compaction.h> @@ -683,8 +684,11 @@ static int __init register_node_type(void) ret = subsys_system_register(&node_subsys, cpu_root_attr_groups); if (!ret) { - hotplug_memory_notifier(node_memory_callback, - NODE_CALLBACK_PRI); + static struct notifier_block node_memory_callback_nb = { + .notifier_call = node_memory_callback, + .priority = NODE_CALLBACK_PRI, + }; + register_hotmemory_notifier(&node_memory_callback_nb); } /* diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c index 42e67ad6bd20..ab41be625a53 100644 --- a/drivers/block/aoe/aoechr.c +++ b/drivers/block/aoe/aoechr.c @@ -139,13 +139,12 @@ bail: spin_unlock_irqrestore(&emsgs_lock, flags); return; } - mp = kmalloc(n, GFP_ATOMIC); + mp = kmemdup(msg, n, GFP_ATOMIC); if (mp == NULL) { printk(KERN_ERR "aoe: allocation failure, len=%ld\n", n); goto bail; } - memcpy(mp, msg, n); em->msg = mp; em->flags |= EMFL_VALID; em->len = n; diff --git a/drivers/block/blockconsole.c b/drivers/block/blockconsole.c index 01ddbc6fa6b6..1600a19c183a 100644 --- a/drivers/block/blockconsole.c +++ b/drivers/block/blockconsole.c @@ -164,7 +164,8 @@ static void bcon_advance_console_bytes(struct blockconsole *bc, int bytes) } while (cmpxchg64(&bc->console_bytes, old, new) != old); } -static void request_complete(struct bio *bio, int err) +static void request_complete(struct bio *bio, int err, + struct batch_complete *batch) { complete((struct completion *)bio->bi_private); } @@ -289,7 +290,7 @@ static void bcon_unregister(struct work_struct *work) } #define BCON_MAX_ERRORS 10 -static void bcon_end_io(struct bio *bio, int err) +static void bcon_end_io(struct bio *bio, int err, struct batch_complete *batch) { struct bcon_bio *bcon_bio = container_of(bio, struct bcon_bio, bio); struct blockconsole *bc = bio->bi_private; diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 64fbb8385cdc..046aa1793514 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -948,7 +948,8 @@ static void bm_aio_ctx_destroy(struct kref *kref) } /* bv_page may be a copy, or may be the original */ -static void bm_async_io_complete(struct bio *bio, int error) +static void bm_async_io_complete(struct bio *bio, int error, + struct batch_complete *batch) { struct bm_aio_ctx *ctx = bio->bi_private; struct drbd_conf *mdev = ctx->mdev; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 0f449bbf0edf..4222affff488 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -757,7 +757,8 @@ static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn, struct acc rcu_read_unlock(); timeo = connect_int * HZ; - timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */ + /* 28.5% random jitter */ + timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7; err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo); if (err <= 0) @@ -954,7 +955,7 @@ retry: conn_warn(tconn, "Error receiving initial packet\n"); sock_release(s); randomize: - if (random32() & 1) + if (prandom_u32() & 1) goto retry; } } diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 891c0ecaa292..04a80af8fddb 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -64,7 +64,8 @@ rwlock_t global_state_lock; /* used for synchronous meta data and bitmap IO * submitted by drbd_md_sync_page_io() */ -void drbd_md_io_complete(struct bio *bio, int error) +void drbd_md_io_complete(struct bio *bio, int error, + struct batch_complete *batch) { struct drbd_md_io *md_io; struct drbd_conf *mdev; @@ -167,7 +168,8 @@ static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __rel /* writes on behalf of the partner, or resync writes, * "submitted" by the receiver. */ -void drbd_peer_request_endio(struct bio *bio, int error) +void drbd_peer_request_endio(struct bio *bio, int error, + struct batch_complete *batch) { struct drbd_peer_request *peer_req = bio->bi_private; struct drbd_conf *mdev = peer_req->w.mdev; @@ -203,7 +205,8 @@ void drbd_peer_request_endio(struct bio *bio, int error) /* read, readA or write requests on R_PRIMARY coming from drbd_make_request */ -void drbd_request_endio(struct bio *bio, int error) +void drbd_request_endio(struct bio *bio, int error, + struct batch_complete *batch) { unsigned long flags; struct drbd_request *req = bio->bi_private; diff --git a/drivers/block/drbd/drbd_wrappers.h b/drivers/block/drbd/drbd_wrappers.h index 328f18e4b4ee..d443dc06b854 100644 --- a/drivers/block/drbd/drbd_wrappers.h +++ b/drivers/block/drbd/drbd_wrappers.h @@ -20,9 +20,12 @@ static inline void drbd_set_my_capacity(struct drbd_conf *mdev, #define drbd_bio_uptodate(bio) bio_flagged(bio, BIO_UPTODATE) /* bi_end_io handlers */ -extern void drbd_md_io_complete(struct bio *bio, int error); -extern void drbd_peer_request_endio(struct bio *bio, int error); -extern void drbd_request_endio(struct bio *bio, int error); +extern void drbd_md_io_complete(struct bio *bio, int error, + struct batch_complete *batch); +extern void drbd_peer_request_endio(struct bio *bio, int error, + struct batch_complete *batch); +extern void drbd_request_endio(struct bio *bio, int error, + struct batch_complete *batch); /* * used to submit our private bio diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 83232639034e..629b6d506cd0 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -3748,7 +3748,8 @@ static unsigned int floppy_check_events(struct gendisk *disk, * a disk in the drive, and whether that disk is writable. */ -static void floppy_rb0_complete(struct bio *bio, int err) +static void floppy_rb0_complete(struct bio *bio, int err, + struct batch_complete *batch) { complete((struct completion *)bio->bi_private); } diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c index 076ae7f1b781..a56cfcd5d648 100644 --- a/drivers/block/mg_disk.c +++ b/drivers/block/mg_disk.c @@ -780,6 +780,7 @@ static const struct block_device_operations mg_disk_ops = { .getgeo = mg_getgeo }; +#ifdef CONFIG_PM_SLEEP static int mg_suspend(struct device *dev) { struct mg_drv_data *prv_data = dev->platform_data; @@ -824,6 +825,7 @@ static int mg_resume(struct device *dev) return 0; } +#endif static SIMPLE_DEV_PM_OPS(mg_pm, mg_suspend, mg_resume); diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 53bad6298226..4b9603ec624e 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -151,6 +151,9 @@ static void mtip_command_cleanup(struct driver_data *dd) struct mtip_cmd *command; struct mtip_port *port = dd->port; static int in_progress; + struct batch_complete batch; + + batch_complete_init(&batch); if (in_progress) return; @@ -166,11 +169,9 @@ static void mtip_command_cleanup(struct driver_data *dd) command = &port->commands[commandindex]; if (atomic_read(&command->active) - && (command->async_callback)) { - command->async_callback(command->async_data, - -ENODEV); - command->async_callback = NULL; - command->async_data = NULL; + && (command->bio)) { + bio_endio_batch(command->bio, -ENODEV, &batch); + command->bio = NULL; } dma_unmap_sg(&port->dd->pdev->dev, @@ -178,9 +179,10 @@ static void mtip_command_cleanup(struct driver_data *dd) command->scatter_ents, command->direction); } + up(&port->cmd_slot); } - up(&port->cmd_slot); + batch_complete(&batch); set_bit(MTIP_DDF_CLEANUP_BIT, &dd->dd_flag); in_progress = 0; @@ -580,6 +582,9 @@ static void mtip_timeout_function(unsigned long int data) unsigned int bit, group; unsigned int num_command_slots; unsigned long to, tagaccum[SLOTBITS_IN_LONGS]; + struct batch_complete batch; + + batch_complete_init(&batch); if (unlikely(!port)) return; @@ -622,11 +627,9 @@ static void mtip_timeout_function(unsigned long int data) writel(1 << bit, port->completed[group]); /* Call the async completion callback. */ - if (likely(command->async_callback)) - command->async_callback(command->async_data, - -EIO); - command->async_callback = NULL; - command->comp_func = NULL; + if (likely(command->bio)) + bio_endio_batch(command->bio, -EIO, &batch); + command->bio = NULL; /* Unmap the DMA scatter list entries */ dma_unmap_sg(&port->dd->pdev->dev, @@ -645,6 +648,8 @@ static void mtip_timeout_function(unsigned long int data) } } + batch_complete(&batch); + if (cmdto_cnt) { print_tags(port->dd, "timed out", tagaccum, cmdto_cnt); if (!test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) { @@ -695,7 +700,8 @@ static void mtip_timeout_function(unsigned long int data) static void mtip_async_complete(struct mtip_port *port, int tag, void *data, - int status) + int status, + struct batch_complete *batch) { struct mtip_cmd *command; struct driver_data *dd = data; @@ -712,11 +718,10 @@ static void mtip_async_complete(struct mtip_port *port, } /* Upper layer callback */ - if (likely(command->async_callback)) - command->async_callback(command->async_data, cb_status); + if (likely(command->bio)) + bio_endio_batch(command->bio, cb_status, batch); - command->async_callback = NULL; - command->comp_func = NULL; + command->bio = NULL; /* Unmap the DMA scatter list entries */ dma_unmap_sg(&dd->pdev->dev, @@ -749,24 +754,22 @@ static void mtip_async_complete(struct mtip_port *port, static void mtip_completion(struct mtip_port *port, int tag, void *data, - int status) + int status, + struct batch_complete *batch) { - struct mtip_cmd *command = &port->commands[tag]; struct completion *waiting = data; if (unlikely(status == PORT_IRQ_TF_ERR)) dev_warn(&port->dd->pdev->dev, "Internal command %d completed with TFE\n", tag); - command->async_callback = NULL; - command->comp_func = NULL; - complete(waiting); } static void mtip_null_completion(struct mtip_port *port, int tag, void *data, - int status) + int status, + struct batch_complete *batch) { return; } @@ -795,6 +798,7 @@ static void mtip_handle_tfe(struct driver_data *dd) unsigned char *buf; char *fail_reason = NULL; int fail_all_ncq_write = 0, fail_all_ncq_cmds = 0; + struct batch_complete batch; dev_warn(&dd->pdev->dev, "Taskfile error\n"); @@ -812,13 +816,14 @@ static void mtip_handle_tfe(struct driver_data *dd) atomic_inc(&cmd->active); /* active > 1 indicates error */ if (cmd->comp_data && cmd->comp_func) { cmd->comp_func(port, MTIP_TAG_INTERNAL, - cmd->comp_data, PORT_IRQ_TF_ERR); + cmd->comp_data, PORT_IRQ_TF_ERR, NULL); } goto handle_tfe_exit; } /* clear the tag accumulator */ memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long)); + batch_complete_init(&batch); /* Loop through all the groups */ for (group = 0; group < dd->slot_groups; group++) { @@ -845,7 +850,7 @@ static void mtip_handle_tfe(struct driver_data *dd) cmd->comp_func(port, tag, cmd->comp_data, - 0); + 0, &batch); } else { dev_err(&port->dd->pdev->dev, "Missing completion func for tag %d", @@ -858,6 +863,7 @@ static void mtip_handle_tfe(struct driver_data *dd) } } } + batch_complete(&batch); print_tags(dd, "completed (TFE)", tagaccum, cmd_cnt); @@ -899,6 +905,7 @@ static void mtip_handle_tfe(struct driver_data *dd) /* clear the tag accumulator */ memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long)); + batch_complete_init(&batch); /* Loop through all the groups */ for (group = 0; group < dd->slot_groups; group++) { @@ -932,7 +939,7 @@ static void mtip_handle_tfe(struct driver_data *dd) if (cmd->comp_func) { cmd->comp_func(port, tag, cmd->comp_data, - -ENODATA); + -ENODATA, &batch); } continue; } @@ -962,13 +969,15 @@ static void mtip_handle_tfe(struct driver_data *dd) port, tag, cmd->comp_data, - PORT_IRQ_TF_ERR); + PORT_IRQ_TF_ERR, &batch); else dev_warn(&port->dd->pdev->dev, "Bad completion for tag %d\n", tag); } } + + batch_complete(&batch); print_tags(dd, "reissued (TFE)", tagaccum, cmd_cnt); handle_tfe_exit: @@ -989,6 +998,9 @@ static inline void mtip_workq_sdbfx(struct mtip_port *port, int group, struct driver_data *dd = port->dd; int tag, bit; struct mtip_cmd *command; + struct batch_complete batch; + + batch_complete_init(&batch); if (!completed) { WARN_ON_ONCE(!completed); @@ -1013,7 +1025,8 @@ static inline void mtip_workq_sdbfx(struct mtip_port *port, int group, port, tag, command->comp_data, - 0); + 0, + &batch); } else { dev_warn(&dd->pdev->dev, "Null completion " @@ -1023,13 +1036,16 @@ static inline void mtip_workq_sdbfx(struct mtip_port *port, int group, if (mtip_check_surprise_removal( dd->pdev)) { mtip_command_cleanup(dd); - return; + goto out; } } } completed >>= 1; } +out: + batch_complete(&batch); + /* If last, re-enable interrupts */ if (atomic_dec_return(&dd->irq_workers_active) == 0) writel(0xffffffff, dd->mmio + HOST_IRQ_STAT); @@ -1050,7 +1066,7 @@ static inline void mtip_process_legacy(struct driver_data *dd, u32 port_stat) cmd->comp_func(port, MTIP_TAG_INTERNAL, cmd->comp_data, - 0); + 0, NULL); return; } } @@ -2558,8 +2574,8 @@ static int mtip_hw_ioctl(struct driver_data *dd, unsigned int cmd, * None */ static void mtip_hw_submit_io(struct driver_data *dd, sector_t sector, - int nsect, int nents, int tag, void *callback, - void *data, int dir) + int nsect, int nents, int tag, + struct bio *bio, int dir) { struct host_to_dev_fis *fis; struct mtip_port *port = dd->port; @@ -2614,12 +2630,7 @@ static void mtip_hw_submit_io(struct driver_data *dd, sector_t sector, command->comp_func = mtip_async_complete; command->direction = dma_dir; - /* - * Set the completion function and data for the command passed - * from the upper layer. - */ - command->async_data = data; - command->async_callback = callback; + command->bio = bio; /* * To prevent this command from being issued @@ -3900,7 +3911,6 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio) bio_sectors(bio), nents, tag, - bio_endio, bio, bio_data_dir(bio)); } else diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index 8e8334c9dd0f..78456ec8fe6a 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -325,11 +325,9 @@ struct mtip_cmd { void (*comp_func)(struct mtip_port *port, int tag, void *data, - int status); - /* Additional callback function that may be called by comp_func() */ - void (*async_callback)(void *data, int status); - - void *async_data; /* Addl. data passed to async_callback() */ + int status, + struct batch_complete *batch); + struct bio *bio; int scatter_ents; /* Number of scatter list entries used */ diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 7fecc784be01..037288e7874d 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -856,6 +856,8 @@ static int __init nbd_init(void) disk->queue->limits.discard_granularity = 512; disk->queue->limits.max_discard_sectors = UINT_MAX; disk->queue->limits.discard_zeroes_data = 0; + blk_queue_max_hw_sectors(disk->queue, 65536); + disk->queue->limits.max_sectors = 256; } if (register_blkdev(NBD_MAJOR, "nbd")) { diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index dcb18a3d3314..ce42c14808ac 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -980,7 +980,8 @@ static void pkt_make_local_copy(struct packet_data *pkt, struct bio_vec *bvec) } } -static void pkt_end_io_read(struct bio *bio, int err) +static void pkt_end_io_read(struct bio *bio, int err, + struct batch_complete *batch) { struct packet_data *pkt = bio->bi_private; struct pktcdvd_device *pd = pkt->pd; @@ -998,7 +999,8 @@ static void pkt_end_io_read(struct bio *bio, int err) pkt_bio_finished(pd); } -static void pkt_end_io_packet_write(struct bio *bio, int err) +static void pkt_end_io_packet_write(struct bio *bio, int err, + struct batch_complete *batch) { struct packet_data *pkt = bio->bi_private; struct pktcdvd_device *pd = pkt->pd; @@ -2339,7 +2341,8 @@ static int pkt_close(struct gendisk *disk, fmode_t mode) } -static void pkt_end_io_read_cloned(struct bio *bio, int err) +static void pkt_end_io_read_cloned(struct bio *bio, int err, + struct batch_complete *batch) { struct packet_stacked_data *psd = bio->bi_private; struct pktcdvd_device *pd = psd->pd; diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index 758f2ac878cf..deb722d63d68 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -775,7 +775,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id) if (intr & ERROR_INTR) { n = fs->scount - 1 - resid / 512; if (n > 0) { - blk_update_request(req, 0, n << 9); + blk_update_request(req, 0, n << 9, NULL); fs->req_sector += n; } if (fs->retries < 5) { diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 64723953e1c9..49d0ec2472c5 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -217,7 +217,8 @@ static void virtblk_bio_send_flush_work(struct work_struct *work) virtblk_bio_send_flush(vbr); } -static inline void virtblk_request_done(struct virtblk_req *vbr) +static inline void virtblk_request_done(struct virtblk_req *vbr, + struct batch_complete *batch) { struct virtio_blk *vblk = vbr->vblk; struct request *req = vbr->req; @@ -231,11 +232,12 @@ static inline void virtblk_request_done(struct virtblk_req *vbr) req->errors = (error != 0); } - __blk_end_request_all(req, error); + blk_end_request_all_batch(req, error, batch); mempool_free(vbr, vblk->pool); } -static inline void virtblk_bio_flush_done(struct virtblk_req *vbr) +static inline void virtblk_bio_flush_done(struct virtblk_req *vbr, + struct batch_complete *batch) { struct virtio_blk *vblk = vbr->vblk; @@ -244,12 +246,13 @@ static inline void virtblk_bio_flush_done(struct virtblk_req *vbr) INIT_WORK(&vbr->work, virtblk_bio_send_data_work); queue_work(virtblk_wq, &vbr->work); } else { - bio_endio(vbr->bio, virtblk_result(vbr)); + bio_endio_batch(vbr->bio, virtblk_result(vbr), batch); mempool_free(vbr, vblk->pool); } } -static inline void virtblk_bio_data_done(struct virtblk_req *vbr) +static inline void virtblk_bio_data_done(struct virtblk_req *vbr, + struct batch_complete *batch) { struct virtio_blk *vblk = vbr->vblk; @@ -259,17 +262,18 @@ static inline void virtblk_bio_data_done(struct virtblk_req *vbr) INIT_WORK(&vbr->work, virtblk_bio_send_flush_work); queue_work(virtblk_wq, &vbr->work); } else { - bio_endio(vbr->bio, virtblk_result(vbr)); + bio_endio_batch(vbr->bio, virtblk_result(vbr), batch); mempool_free(vbr, vblk->pool); } } -static inline void virtblk_bio_done(struct virtblk_req *vbr) +static inline void virtblk_bio_done(struct virtblk_req *vbr, + struct batch_complete *batch) { if (unlikely(vbr->flags & VBLK_IS_FLUSH)) - virtblk_bio_flush_done(vbr); + virtblk_bio_flush_done(vbr, batch); else - virtblk_bio_data_done(vbr); + virtblk_bio_data_done(vbr, batch); } static void virtblk_done(struct virtqueue *vq) @@ -279,16 +283,19 @@ static void virtblk_done(struct virtqueue *vq) struct virtblk_req *vbr; unsigned long flags; unsigned int len; + struct batch_complete batch; + + batch_complete_init(&batch); spin_lock_irqsave(vblk->disk->queue->queue_lock, flags); do { virtqueue_disable_cb(vq); while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) { if (vbr->bio) { - virtblk_bio_done(vbr); + virtblk_bio_done(vbr, &batch); bio_done = true; } else { - virtblk_request_done(vbr); + virtblk_request_done(vbr, &batch); req_done = true; } } @@ -298,6 +305,8 @@ static void virtblk_done(struct virtqueue *vq) blk_start_queue(vblk->disk->queue); spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags); + batch_complete(&batch); + if (bio_done) wake_up(&vblk->queue_wait); } diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index dd5b2fed97e9..990c1d81849f 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -741,7 +741,8 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) /* * bio callback. */ -static void end_block_io_op(struct bio *bio, int error) +static void end_block_io_op(struct bio *bio, int error, + struct batch_complete *batch) { __end_block_io_op(bio->bi_private, error); bio_put(bio); diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 2c644afbcdd4..1ccbe9482faa 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -28,6 +28,7 @@ #include <linux/pfn.h> #include <linux/export.h> #include <linux/io.h> +#include <linux/aio.h> #include <asm/uaccess.h> @@ -627,6 +628,18 @@ static ssize_t write_null(struct file *file, const char __user *buf, return count; } +static ssize_t aio_read_null(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + return 0; +} + +static ssize_t aio_write_null(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + return iov_length(iov, nr_segs); +} + static int pipe_to_null(struct pipe_inode_info *info, struct pipe_buffer *buf, struct splice_desc *sd) { @@ -670,6 +683,24 @@ static ssize_t read_zero(struct file *file, char __user *buf, return written ? written : -EFAULT; } +static ssize_t aio_read_zero(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + size_t written = 0; + unsigned long i; + ssize_t ret; + + for (i = 0; i < nr_segs; i++) { + ret = read_zero(iocb->ki_filp, iov[i].iov_base, iov[i].iov_len, + &pos); + if (ret < 0) + break; + written += ret; + } + + return written ? written : -EFAULT; +} + static int mmap_zero(struct file *file, struct vm_area_struct *vma) { #ifndef CONFIG_MMU @@ -738,6 +769,7 @@ static int open_port(struct inode *inode, struct file *filp) #define full_lseek null_lseek #define write_zero write_null #define read_full read_zero +#define aio_write_zero aio_write_null #define open_mem open_port #define open_kmem open_mem #define open_oldmem open_mem @@ -766,6 +798,8 @@ static const struct file_operations null_fops = { .llseek = null_lseek, .read = read_null, .write = write_null, + .aio_read = aio_read_null, + .aio_write = aio_write_null, .splice_write = splice_write_null, }; @@ -782,6 +816,8 @@ static const struct file_operations zero_fops = { .llseek = zero_lseek, .read = read_zero, .write = write_zero, + .aio_read = aio_read_zero, + .aio_write = aio_write_zero, .mmap = mmap_zero, }; diff --git a/drivers/char/mwave/tp3780i.c b/drivers/char/mwave/tp3780i.c index c68969708068..04e6d6a27994 100644 --- a/drivers/char/mwave/tp3780i.c +++ b/drivers/char/mwave/tp3780i.c @@ -479,6 +479,7 @@ int tp3780I_QueryAbilities(THINKPAD_BD_DATA * pBDData, MW_ABILITIES * pAbilities PRINTK_2(TRACE_TP3780I, "tp3780i::tp3780I_QueryAbilities entry pBDData %p\n", pBDData); + memset(pAbilities, 0, sizeof(*pAbilities)); /* fill out standard constant fields */ pAbilities->instr_per_sec = pBDData->rDspSettings.uIps; pAbilities->data_size = pBDData->rDspSettings.uDStoreSize; diff --git a/drivers/char/random.c b/drivers/char/random.c index cd9a6211dcad..73e52b7796f9 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -957,10 +957,23 @@ static ssize_t extract_entropy(struct entropy_store *r, void *buf, { ssize_t ret = 0, i; __u8 tmp[EXTRACT_SIZE]; + unsigned long flags; /* if last_data isn't primed, we need EXTRACT_SIZE extra bytes */ - if (fips_enabled && !r->last_data_init) - nbytes += EXTRACT_SIZE; + if (fips_enabled) { + spin_lock_irqsave(&r->lock, flags); + if (!r->last_data_init) { + r->last_data_init = true; + spin_unlock_irqrestore(&r->lock, flags); + trace_extract_entropy(r->name, EXTRACT_SIZE, + r->entropy_count, _RET_IP_); + xfer_secondary_pool(r, EXTRACT_SIZE); + extract_buf(r, tmp); + spin_lock_irqsave(&r->lock, flags); + memcpy(r->last_data, tmp, EXTRACT_SIZE); + } + spin_unlock_irqrestore(&r->lock, flags); + } trace_extract_entropy(r->name, nbytes, r->entropy_count, _RET_IP_); xfer_secondary_pool(r, nbytes); @@ -970,19 +983,6 @@ static ssize_t extract_entropy(struct entropy_store *r, void *buf, extract_buf(r, tmp); if (fips_enabled) { - unsigned long flags; - - - /* prime last_data value if need be, per fips 140-2 */ - if (!r->last_data_init) { - spin_lock_irqsave(&r->lock, flags); - memcpy(r->last_data, tmp, EXTRACT_SIZE); - r->last_data_init = true; - nbytes -= EXTRACT_SIZE; - spin_unlock_irqrestore(&r->lock, flags); - extract_buf(r, tmp); - } - spin_lock_irqsave(&r->lock, flags); if (!memcmp(tmp, r->last_data, EXTRACT_SIZE)) panic("Hardware RNG duplicated output!\n"); diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index 4cd392dbf115..b95159b33c39 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -22,6 +22,9 @@ static u16 __initdata dmi_ver; */ static int dmi_initialized; +/* DMI system identification string used during boot */ +static char dmi_ids_string[128] __initdata; + static const char * __init dmi_string_nosave(const struct dmi_header *dm, u8 s) { const u8 *bp = ((u8 *) dm) + dm->length; @@ -376,99 +379,103 @@ static void __init dmi_decode(const struct dmi_header *dm, void *dummy) } } -static void __init print_filtered(const char *info) +static int __init print_filtered(char *buf, size_t len, const char *info) { + int c = 0; const char *p; if (!info) - return; + return c; for (p = info; *p; p++) if (isprint(*p)) - printk(KERN_CONT "%c", *p); + c += scnprintf(buf + c, len - c, "%c", *p); else - printk(KERN_CONT "\\x%02x", *p & 0xff); + c += scnprintf(buf + c, len - c, "\\x%02x", *p & 0xff); + return c; } -static void __init dmi_dump_ids(void) +static void __init dmi_format_ids(char *buf, size_t len) { + int c = 0; const char *board; /* Board Name is optional */ - printk(KERN_DEBUG "DMI: "); - print_filtered(dmi_get_system_info(DMI_SYS_VENDOR)); - printk(KERN_CONT " "); - print_filtered(dmi_get_system_info(DMI_PRODUCT_NAME)); + c += print_filtered(buf + c, len - c, + dmi_get_system_info(DMI_SYS_VENDOR)); + c += scnprintf(buf + c, len - c, " "); + c += print_filtered(buf + c, len - c, + dmi_get_system_info(DMI_PRODUCT_NAME)); + board = dmi_get_system_info(DMI_BOARD_NAME); if (board) { - printk(KERN_CONT "/"); - print_filtered(board); + c += scnprintf(buf + c, len - c, "/"); + c += print_filtered(buf + c, len - c, board); } - printk(KERN_CONT ", BIOS "); - print_filtered(dmi_get_system_info(DMI_BIOS_VERSION)); - printk(KERN_CONT " "); - print_filtered(dmi_get_system_info(DMI_BIOS_DATE)); - printk(KERN_CONT "\n"); + c += scnprintf(buf + c, len - c, ", BIOS "); + c += print_filtered(buf + c, len - c, + dmi_get_system_info(DMI_BIOS_VERSION)); + c += scnprintf(buf + c, len - c, " "); + c += print_filtered(buf + c, len - c, + dmi_get_system_info(DMI_BIOS_DATE)); } -static int __init dmi_present(const char __iomem *p) +static int __init dmi_present(const u8 *buf) { - u8 buf[15]; + int smbios_ver; + + if (memcmp(buf, "_SM_", 4) == 0 && + buf[5] < 32 && dmi_checksum(buf, buf[5])) { + smbios_ver = (buf[6] << 8) + buf[7]; + + /* Some BIOS report weird SMBIOS version, fix that up */ + switch (smbios_ver) { + case 0x021F: + case 0x0221: + pr_debug("SMBIOS version fixup(2.%d->2.%d)\n", + smbios_ver & 0xFF, 3); + smbios_ver = 0x0203; + break; + case 0x0233: + pr_debug("SMBIOS version fixup(2.%d->2.%d)\n", 51, 6); + smbios_ver = 0x0206; + break; + } + } else { + smbios_ver = 0; + } - memcpy_fromio(buf, p, 15); - if (dmi_checksum(buf, 15)) { + buf += 16; + + if (memcmp(buf, "_DMI_", 5) == 0 && dmi_checksum(buf, 15)) { dmi_num = (buf[13] << 8) | buf[12]; dmi_len = (buf[7] << 8) | buf[6]; dmi_base = (buf[11] << 24) | (buf[10] << 16) | (buf[9] << 8) | buf[8]; if (dmi_walk_early(dmi_decode) == 0) { - if (dmi_ver) + if (smbios_ver) { + dmi_ver = smbios_ver; pr_info("SMBIOS %d.%d present.\n", dmi_ver >> 8, dmi_ver & 0xFF); - else { + } else { dmi_ver = (buf[14] & 0xF0) << 4 | (buf[14] & 0x0F); pr_info("Legacy DMI %d.%d present.\n", dmi_ver >> 8, dmi_ver & 0xFF); } - dmi_dump_ids(); + dmi_format_ids(dmi_ids_string, sizeof(dmi_ids_string)); + printk(KERN_DEBUG "DMI: %s\n", dmi_ids_string); return 0; } } - dmi_ver = 0; - return 1; -} - -static int __init smbios_present(const char __iomem *p) -{ - u8 buf[32]; - memcpy_fromio(buf, p, 32); - if ((buf[5] < 32) && dmi_checksum(buf, buf[5])) { - dmi_ver = (buf[6] << 8) + buf[7]; - - /* Some BIOS report weird SMBIOS version, fix that up */ - switch (dmi_ver) { - case 0x021F: - case 0x0221: - pr_debug("SMBIOS version fixup(2.%d->2.%d)\n", - dmi_ver & 0xFF, 3); - dmi_ver = 0x0203; - break; - case 0x0233: - pr_debug("SMBIOS version fixup(2.%d->2.%d)\n", 51, 6); - dmi_ver = 0x0206; - break; - } - return memcmp(p + 16, "_DMI_", 5) || dmi_present(p + 16); - } return 1; } void __init dmi_scan_machine(void) { char __iomem *p, *q; - int rc; + char buf[32]; if (efi_enabled(EFI_CONFIG_TABLES)) { if (efi.smbios == EFI_INVALID_TABLE_ADDR) @@ -481,10 +488,10 @@ void __init dmi_scan_machine(void) p = dmi_ioremap(efi.smbios, 32); if (p == NULL) goto error; - - rc = smbios_present(p); + memcpy_fromio(buf, p, 32); dmi_iounmap(p, 32); - if (!rc) { + + if (!dmi_present(buf)) { dmi_available = 1; goto out; } @@ -499,18 +506,15 @@ void __init dmi_scan_machine(void) if (p == NULL) goto error; + memset(buf, 0, 16); for (q = p; q < p + 0x10000; q += 16) { - if (memcmp(q, "_SM_", 4) == 0 && q - p <= 0xFFE0) - rc = smbios_present(q); - else if (memcmp(q, "_DMI_", 5) == 0) - rc = dmi_present(q); - else - continue; - if (!rc) { + memcpy_fromio(buf + 16, q, 16); + if (!dmi_present(buf)) { dmi_available = 1; dmi_iounmap(p, 0x10000); goto out; } + memcpy(buf, buf + 16, 16); } dmi_iounmap(p, 0x10000); } @@ -521,6 +525,19 @@ void __init dmi_scan_machine(void) } /** + * dmi_set_dump_stack_arch_desc - set arch description for dump_stack() + * + * Invoke dump_stack_set_arch_desc() with DMI system information so that + * DMI identifiers are printed out on task dumps. Arch boot code should + * call this function after dmi_scan_machine() if it wants to print out DMI + * identifiers on task dumps. + */ +void __init dmi_set_dump_stack_arch_desc(void) +{ + dump_stack_set_arch_desc("%s", dmi_ids_string); +} + +/** * dmi_matches - check if dmi_system_id structure matches system DMI data * @dmi: pointer to the dmi_system_id structure to check */ diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c index 0b5b5f619c75..e2e04b007e15 100644 --- a/drivers/firmware/memmap.c +++ b/drivers/firmware/memmap.c @@ -114,12 +114,9 @@ static void __meminit release_firmware_map_entry(struct kobject *kobj) * map_entries_bootmem here, and deleted from &map_entries in * firmware_map_remove_entry(). */ - if (firmware_map_find_entry(entry->start, entry->end, - entry->type)) { - spin_lock(&map_entries_bootmem_lock); - list_add(&entry->list, &map_entries_bootmem); - spin_unlock(&map_entries_bootmem_lock); - } + spin_lock(&map_entries_bootmem_lock); + list_add(&entry->list, &map_entries_bootmem); + spin_unlock(&map_entries_bootmem_lock); return; } diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index b78cbe74dadf..442a15443fa5 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -399,6 +399,14 @@ static void drm_fb_helper_dpms(struct fb_info *info, int dpms_mode) return; /* + * fbdev->blank can be called from irq context in case of a panic. + * Since we already have our own special panic handler which will + * restore the fbdev console mode completely, just bail out early. + */ + if (oops_in_progress) + return; + + /* * For each CRTC in this fb, turn the connectors on/off. */ drm_modeset_lock_all(dev); diff --git a/drivers/infiniband/hw/amso1100/c2.h b/drivers/infiniband/hw/amso1100/c2.h index ba7a1208ff9e..d619d735838b 100644 --- a/drivers/infiniband/hw/amso1100/c2.h +++ b/drivers/infiniband/hw/amso1100/c2.h @@ -265,7 +265,6 @@ struct c2_pd_table { struct c2_qp_table { struct idr idr; spinlock_t lock; - int last; }; struct c2_element { diff --git a/drivers/infiniband/hw/amso1100/c2_qp.c b/drivers/infiniband/hw/amso1100/c2_qp.c index 0ab826b280b2..86708dee58b1 100644 --- a/drivers/infiniband/hw/amso1100/c2_qp.c +++ b/drivers/infiniband/hw/amso1100/c2_qp.c @@ -385,8 +385,7 @@ static int c2_alloc_qpn(struct c2_dev *c2dev, struct c2_qp *qp) idr_preload(GFP_KERNEL); spin_lock_irq(&c2dev->qp_table.lock); - ret = idr_alloc(&c2dev->qp_table.idr, qp, c2dev->qp_table.last++, 0, - GFP_NOWAIT); + ret = idr_alloc_cyclic(&c2dev->qp_table.idr, qp, 0, 0, GFP_NOWAIT); if (ret >= 0) qp->qpn = ret; diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index aed8afee56da..6d7f453b4d05 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -40,6 +40,7 @@ #include <linux/slab.h> #include <linux/highmem.h> #include <linux/io.h> +#include <linux/aio.h> #include <linux/jiffies.h> #include <linux/cpu.h> #include <asm/pgtable.h> diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c index add98d01476c..d1f5f1dd77b0 100644 --- a/drivers/infiniband/hw/mlx4/cm.c +++ b/drivers/infiniband/hw/mlx4/cm.c @@ -204,7 +204,6 @@ static struct id_map_entry * id_map_alloc(struct ib_device *ibdev, int slave_id, u32 sl_cm_id) { int ret; - static int next_id; struct id_map_entry *ent; struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov; @@ -223,9 +222,8 @@ id_map_alloc(struct ib_device *ibdev, int slave_id, u32 sl_cm_id) idr_preload(GFP_KERNEL); spin_lock(&to_mdev(ibdev)->sriov.id_map_lock); - ret = idr_alloc(&sriov->pv_id_table, ent, next_id, 0, GFP_NOWAIT); + ret = idr_alloc_cyclic(&sriov->pv_id_table, ent, 0, 0, GFP_NOWAIT); if (ret >= 0) { - next_id = max(ret + 1, 0); ent->pv_cm_id = (u32)ret; sl_id_map_add(ibdev, ent); list_add_tail(&ent->list, &sriov->cm_list); diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index 4f7aa301b3b1..b56c9428f3c5 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -39,7 +39,7 @@ #include <linux/vmalloc.h> #include <linux/highmem.h> #include <linux/io.h> -#include <linux/uio.h> +#include <linux/aio.h> #include <linux/jiffies.h> #include <asm/pgtable.h> #include <linux/delay.h> diff --git a/drivers/leds/leds-ot200.c b/drivers/leds/leds-ot200.c index ee14662ed5ce..98cae529373f 100644 --- a/drivers/leds/leds-ot200.c +++ b/drivers/leds/leds-ot200.c @@ -47,37 +47,37 @@ static struct ot200_led leds[] = { { .name = "led_1", .port = 0x49, - .mask = BIT(7), + .mask = BIT(6), }, { .name = "led_2", .port = 0x49, - .mask = BIT(6), + .mask = BIT(5), }, { .name = "led_3", .port = 0x49, - .mask = BIT(5), + .mask = BIT(4), }, { .name = "led_4", .port = 0x49, - .mask = BIT(4), + .mask = BIT(3), }, { .name = "led_5", .port = 0x49, - .mask = BIT(3), + .mask = BIT(2), }, { .name = "led_6", .port = 0x49, - .mask = BIT(2), + .mask = BIT(1), }, { .name = "led_7", .port = 0x49, - .mask = BIT(1), + .mask = BIT(0), } }; diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index 3b62be160a6e..864baabaee25 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c @@ -686,7 +686,7 @@ static unsigned int new_pgdir(struct lg_cpu *cpu, * We pick one entry at random to throw out. Choosing the Least * Recently Used might be better, but this is easy. */ - next = random32() % ARRAY_SIZE(cpu->lg->pgdirs); + next = prandom_u32() % ARRAY_SIZE(cpu->lg->pgdirs); /* If it's never been allocated at all before, try now. */ if (!cpu->lg->pgdirs[next].pgdir) { cpu->lg->pgdirs[next].pgdir = diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 2879487d036a..d94d058bdf25 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -156,7 +156,8 @@ static void discard_finish(struct work_struct *w) closure_put(&ca->set->cl); } -static void discard_endio(struct bio *bio, int error) +static void discard_endio(struct bio *bio, int error, + struct batch_complete *batch) { struct discard *d = container_of(bio, struct discard, bio); schedule_work(&d->work); diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 852340793777..03e44c1a3bb4 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -134,7 +134,8 @@ static uint64_t btree_csum_set(struct btree *b, struct bset *i) return crc ^ 0xffffffffffffffffULL; } -static void btree_bio_endio(struct bio *bio, int error) +static void btree_bio_endio(struct bio *bio, int error, + struct batch_complete *batch) { struct closure *cl = bio->bi_private; struct btree *b = container_of(cl, struct btree, io.cl); diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index f565512f6fac..8bb275901fb8 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -9,7 +9,8 @@ #include "bset.h" #include "debug.h" -static void bch_bi_idx_hack_endio(struct bio *bio, int error) +static void bch_bi_idx_hack_endio(struct bio *bio, int error, + struct batch_complete *batch) { struct bio *p = bio->bi_private; @@ -199,7 +200,8 @@ static void bch_bio_submit_split_done(struct closure *cl) mempool_free(s, s->p->bio_split_hook); } -static void bch_bio_submit_split_endio(struct bio *bio, int error) +static void bch_bio_submit_split_endio(struct bio *bio, int error, + struct batch_complete *batch) { struct closure *cl = bio->bi_private; struct bio_split_hook *s = container_of(cl, struct bio_split_hook, cl); diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 8c8dfdcd9d4c..bff194bb3e08 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -22,7 +22,8 @@ * bit. */ -static void journal_read_endio(struct bio *bio, int error) +static void journal_read_endio(struct bio *bio, int error, + struct batch_complete *batch) { struct closure *cl = bio->bi_private; closure_put(cl); @@ -390,7 +391,8 @@ found: #define last_seq(j) ((j)->seq - fifo_used(&(j)->pin) + 1) -static void journal_discard_endio(struct bio *bio, int error) +static void journal_discard_endio(struct bio *bio, int error, + struct batch_complete *batch) { struct journal_device *ja = container_of(bio, struct journal_device, discard_bio); @@ -535,7 +537,8 @@ void bch_journal_next(struct journal *j) pr_debug("journal_pin full (%zu)", fifo_used(&j->pin)); } -static void journal_write_endio(struct bio *bio, int error) +static void journal_write_endio(struct bio *bio, int error, + struct batch_complete *batch) { struct journal_write *w = bio->bi_private; diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index 8589512c972e..8bf7ae12d4b0 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -61,7 +61,8 @@ static void write_moving_finish(struct closure *cl) closure_return_with_destructor(cl, moving_io_destructor); } -static void read_moving_endio(struct bio *bio, int error) +static void read_moving_endio(struct bio *bio, int error, + struct batch_complete *batch) { struct moving_io *io = container_of(bio->bi_private, struct moving_io, s.cl); diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 83731dc36f34..478f487fa87a 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -456,7 +456,8 @@ static void bch_insert_data_error(struct closure *cl) bch_journal(cl); } -static void bch_insert_data_endio(struct bio *bio, int error) +static void bch_insert_data_endio(struct bio *bio, int error, + struct batch_complete *batch) { struct closure *cl = bio->bi_private; struct btree_op *op = container_of(cl, struct btree_op, cl); @@ -621,7 +622,8 @@ void bch_btree_insert_async(struct closure *cl) /* Common code for the make_request functions */ -static void request_endio(struct bio *bio, int error) +static void request_endio(struct bio *bio, int error, + struct batch_complete *batch) { struct closure *cl = bio->bi_private; @@ -636,7 +638,8 @@ static void request_endio(struct bio *bio, int error) closure_put(cl); } -void bch_cache_read_endio(struct bio *bio, int error) +void bch_cache_read_endio(struct bio *bio, int error, + struct batch_complete *batch) { struct bbio *b = container_of(bio, struct bbio, bio); struct closure *cl = bio->bi_private; diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h index 254d9ab5707c..3b794625c4c1 100644 --- a/drivers/md/bcache/request.h +++ b/drivers/md/bcache/request.h @@ -29,11 +29,10 @@ struct search { struct btree_op op; }; -void bch_cache_read_endio(struct bio *, int); +void bch_cache_read_endio(struct bio *, int, struct batch_complete *batch); int bch_get_congested(struct cache_set *); void bch_insert_data(struct closure *cl); void bch_btree_insert_async(struct closure *); -void bch_cache_read_endio(struct bio *, int); void bch_open_buckets_free(struct cache_set *); int bch_open_buckets_alloc(struct cache_set *); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 5fa3cd2d9ff0..f3bf310187c7 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -204,7 +204,8 @@ err: return err; } -static void write_bdev_super_endio(struct bio *bio, int error) +static void write_bdev_super_endio(struct bio *bio, int error, + struct batch_complete *batch) { struct cached_dev *dc = bio->bi_private; /* XXX: error checking */ @@ -265,7 +266,8 @@ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent) closure_return(cl); } -static void write_super_endio(struct bio *bio, int error) +static void write_super_endio(struct bio *bio, int error, + struct batch_complete *batch) { struct cache *ca = bio->bi_private; @@ -306,7 +308,7 @@ void bcache_write_super(struct cache_set *c) /* UUID io */ -static void uuid_endio(struct bio *bio, int error) +static void uuid_endio(struct bio *bio, int error, struct batch_complete *batch) { struct closure *cl = bio->bi_private; struct cache_set *c = container_of(cl, struct cache_set, uuid_write.cl); @@ -470,7 +472,8 @@ static struct uuid_entry *uuid_find_empty(struct cache_set *c) * disk. */ -static void prio_endio(struct bio *bio, int error) +static void prio_endio(struct bio *bio, int error, + struct batch_complete *batch) { struct cache *ca = bio->bi_private; diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 93e7e31a4bd3..daf9347833be 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -253,7 +253,8 @@ static void write_dirty_finish(struct closure *cl) closure_return_with_destructor(cl, dirty_io_destructor); } -static void dirty_endio(struct bio *bio, int error) +static void dirty_endio(struct bio *bio, int error, + struct batch_complete *batch) { struct keybuf_key *w = bio->bi_private; struct dirty_io *io = w->private; @@ -281,7 +282,8 @@ static void write_dirty(struct closure *cl) continue_at(cl, write_dirty_finish, dirty_wq); } -static void read_dirty_endio(struct bio *bio, int error) +static void read_dirty_endio(struct bio *bio, int error, + struct batch_complete *batch) { struct keybuf_key *w = bio->bi_private; struct dirty_io *io = w->private; @@ -289,7 +291,7 @@ static void read_dirty_endio(struct bio *bio, int error) bch_count_io_errors(PTR_CACHE(io->dc->disk.c, &w->key, 0), error, "reading dirty data from cache"); - dirty_endio(bio, error); + dirty_endio(bio, error, NULL); } static void read_dirty_submit(struct closure *cl) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index c6083132c4b8..6f1b57ab8868 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -472,7 +472,7 @@ static void dmio_complete(unsigned long error, void *context) { struct dm_buffer *b = context; - b->bio.bi_end_io(&b->bio, error ? -EIO : 0); + b->bio.bi_end_io(&b->bio, error ? -EIO : 0, NULL); } static void use_dmio(struct dm_buffer *b, int rw, sector_t block, @@ -503,7 +503,7 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t block, r = dm_io(&io_req, 1, ®ion, NULL); if (r) - end_io(&b->bio, r); + end_io(&b->bio, r, NULL); } static void use_inline_bio(struct dm_buffer *b, int rw, sector_t block, @@ -570,7 +570,8 @@ static void submit_io(struct dm_buffer *b, int rw, sector_t block, * Set the error, clear B_WRITING bit and wake anyone who was waiting on * it. */ -static void write_endio(struct bio *bio, int error) +static void write_endio(struct bio *bio, int error, + struct batch_complete *batch) { struct dm_buffer *b = container_of(bio, struct dm_buffer, bio); @@ -943,7 +944,7 @@ found_buffer: * The endio routine for reading: set the error, clear the bit and wake up * anyone waiting on the buffer. */ -static void read_endio(struct bio *bio, int error) +static void read_endio(struct bio *bio, int error, struct batch_complete *batch) { struct dm_buffer *b = container_of(bio, struct dm_buffer, bio); diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 10744091e6ca..c5ce8a3b5505 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -652,7 +652,8 @@ static void defer_writethrough_bio(struct cache *cache, struct bio *bio) wake_worker(cache); } -static void writethrough_endio(struct bio *bio, int err) +static void writethrough_endio(struct bio *bio, int err, + struct batch_complete *batch) { struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT); bio->bi_end_io = pb->saved_bi_end_io; diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 6d2d41ae9e32..ec0e3c0883b5 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -929,7 +929,8 @@ static void crypt_dec_pending(struct dm_crypt_io *io) * The work is done per CPU global for all dm-crypt instances. * They should not depend on each other and do not block. */ -static void crypt_endio(struct bio *clone, int error) +static void crypt_endio(struct bio *clone, int error, + struct batch_complete *batch) { struct dm_crypt_io *io = clone->bi_private; struct crypt_config *cc = io->cc; diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index ea49834377c8..a727b267f86d 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -136,7 +136,7 @@ static void dec_count(struct io *io, unsigned int region, int error) } } -static void endio(struct bio *bio, int error) +static void endio(struct bio *bio, int error, struct batch_complete *batch) { struct io *io; unsigned region; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index c0e07026a8d1..eb32e35bca8f 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1485,7 +1485,8 @@ static void start_copy(struct dm_snap_pending_exception *pe) dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe); } -static void full_bio_end_io(struct bio *bio, int error) +static void full_bio_end_io(struct bio *bio, int error, + struct batch_complete *batch) { void *callback_data = bio->bi_private; diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 004ad1652b73..905b75f60cc7 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -553,7 +553,8 @@ static void copy_complete(int read_err, unsigned long write_err, void *context) spin_unlock_irqrestore(&pool->lock, flags); } -static void overwrite_endio(struct bio *bio, int err) +static void overwrite_endio(struct bio *bio, int err, + struct batch_complete *batch) { unsigned long flags; struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index b948fd864d45..b373bb7d1c2d 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -413,7 +413,8 @@ static void verity_work(struct work_struct *w) verity_finish_io(io, verity_verify_io(io)); } -static void verity_end_io(struct bio *bio, int error) +static void verity_end_io(struct bio *bio, int error, + struct batch_complete *batch) { struct dm_verity_io *io = bio->bi_private; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 9a0bdad9ad8f..0baa5e75683e 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -617,7 +617,8 @@ static void dec_pending(struct dm_io *io, int error) } } -static void clone_endio(struct bio *bio, int error) +static void clone_endio(struct bio *bio, int error, + struct batch_complete *batch) { int r = 0; struct dm_target_io *tio = bio->bi_private; @@ -652,7 +653,8 @@ static void clone_endio(struct bio *bio, int error) /* * Partial completion handling for request-based dm */ -static void end_clone_bio(struct bio *clone, int error) +static void end_clone_bio(struct bio *clone, int error, + struct batch_complete *batch) { struct dm_rq_clone_bio_info *info = clone->bi_private; struct dm_rq_target_io *tio = info->tio; @@ -696,7 +698,7 @@ static void end_clone_bio(struct bio *clone, int error) * Do not use blk_end_request() here, because it may complete * the original request before the clone, and break the ordering. */ - blk_update_request(tio->orig, 0, nr_bytes); + blk_update_request(tio->orig, 0, nr_bytes, NULL); } /* diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c index 3193aefe982b..ac8af5253fb6 100644 --- a/drivers/md/faulty.c +++ b/drivers/md/faulty.c @@ -70,7 +70,8 @@ #include <linux/seq_file.h> -static void faulty_fail(struct bio *bio, int error) +static void faulty_fail(struct bio *bio, int error, + struct batch_complete *batch) { struct bio *b = bio->bi_private; diff --git a/drivers/md/md.c b/drivers/md/md.c index b631b757ddf1..98e6a920be85 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -379,7 +379,8 @@ EXPORT_SYMBOL(mddev_congested); * Generic flush handling for md */ -static void md_end_flush(struct bio *bio, int err) +static void md_end_flush(struct bio *bio, int err, + struct batch_complete *batch) { struct md_rdev *rdev = bio->bi_private; struct mddev *mddev = rdev->mddev; @@ -756,7 +757,8 @@ void md_rdev_clear(struct md_rdev *rdev) } EXPORT_SYMBOL_GPL(md_rdev_clear); -static void super_written(struct bio *bio, int error) +static void super_written(struct bio *bio, int error, + struct batch_complete *batch) { struct md_rdev *rdev = bio->bi_private; struct mddev *mddev = rdev->mddev; @@ -807,7 +809,8 @@ void md_super_wait(struct mddev *mddev) finish_wait(&mddev->sb_wait, &wq); } -static void bi_complete(struct bio *bio, int error) +static void bi_complete(struct bio *bio, int error, + struct batch_complete *batch) { complete((struct completion*)bio->bi_private); } diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 1642eae75a33..fecad70f53f6 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -83,7 +83,8 @@ static void multipath_end_bh_io (struct multipath_bh *mp_bh, int err) mempool_free(mp_bh, conf->pool); } -static void multipath_end_request(struct bio *bio, int error) +static void multipath_end_request(struct bio *bio, int error, + struct batch_complete *batch) { int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct multipath_bh *mp_bh = bio->bi_private; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index aeb4e3f74791..619ba71cc6a5 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -294,7 +294,8 @@ static int find_bio_disk(struct r1bio *r1_bio, struct bio *bio) return mirror; } -static void raid1_end_read_request(struct bio *bio, int error) +static void raid1_end_read_request(struct bio *bio, int error, + struct batch_complete *batch) { int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct r1bio *r1_bio = bio->bi_private; @@ -379,7 +380,8 @@ static void r1_bio_write_done(struct r1bio *r1_bio) } } -static void raid1_end_write_request(struct bio *bio, int error) +static void raid1_end_write_request(struct bio *bio, int error, + struct batch_complete *batch) { int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct r1bio *r1_bio = bio->bi_private; @@ -1607,7 +1609,8 @@ abort: } -static void end_sync_read(struct bio *bio, int error) +static void end_sync_read(struct bio *bio, int error, + struct batch_complete *batch) { struct r1bio *r1_bio = bio->bi_private; @@ -1625,7 +1628,8 @@ static void end_sync_read(struct bio *bio, int error) reschedule_retry(r1_bio); } -static void end_sync_write(struct bio *bio, int error) +static void end_sync_write(struct bio *bio, int error, + struct batch_complete *batch) { int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct r1bio *r1_bio = bio->bi_private; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index fc5f60ce9f8c..6005d9637ba7 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -101,7 +101,8 @@ static int enough(struct r10conf *conf, int ignore); static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *skipped); static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio); -static void end_reshape_write(struct bio *bio, int error); +static void end_reshape_write(struct bio *bio, int error, + struct batch_complete *batch); static void end_reshape(struct r10conf *conf); static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data) @@ -358,7 +359,8 @@ static int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio, return r10_bio->devs[slot].devnum; } -static void raid10_end_read_request(struct bio *bio, int error) +static void raid10_end_read_request(struct bio *bio, int error, + struct batch_complete *batch) { int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct r10bio *r10_bio = bio->bi_private; @@ -441,7 +443,8 @@ static void one_write_done(struct r10bio *r10_bio) } } -static void raid10_end_write_request(struct bio *bio, int error) +static void raid10_end_write_request(struct bio *bio, int error, + struct batch_complete *batch) { int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct r10bio *r10_bio = bio->bi_private; @@ -1907,7 +1910,8 @@ abort: } -static void end_sync_read(struct bio *bio, int error) +static void end_sync_read(struct bio *bio, int error, + struct batch_complete *batch) { struct r10bio *r10_bio = bio->bi_private; struct r10conf *conf = r10_bio->mddev->private; @@ -1968,7 +1972,8 @@ static void end_sync_request(struct r10bio *r10_bio) } } -static void end_sync_write(struct bio *bio, int error) +static void end_sync_write(struct bio *bio, int error, + struct batch_complete *batch) { int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct r10bio *r10_bio = bio->bi_private; @@ -4592,7 +4597,8 @@ static int handle_reshape_read_error(struct mddev *mddev, return 0; } -static void end_reshape_write(struct bio *bio, int error) +static void end_reshape_write(struct bio *bio, int error, + struct batch_complete *batch) { int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct r10bio *r10_bio = bio->bi_private; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 0e8c9cedf7f0..f745a1b86e1d 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -593,9 +593,11 @@ static int use_new_offset(struct r5conf *conf, struct stripe_head *sh) } static void -raid5_end_read_request(struct bio *bi, int error); +raid5_end_read_request(struct bio *bi, int error, + struct batch_complete *batch); static void -raid5_end_write_request(struct bio *bi, int error); +raid5_end_write_request(struct bio *bi, int error, + struct batch_complete *batch); static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) { @@ -1774,7 +1776,8 @@ static void shrink_stripes(struct r5conf *conf) conf->slab_cache = NULL; } -static void raid5_end_read_request(struct bio * bi, int error) +static void raid5_end_read_request(struct bio *bi, int error, + struct batch_complete *batch) { struct stripe_head *sh = bi->bi_private; struct r5conf *conf = sh->raid_conf; @@ -1894,7 +1897,8 @@ static void raid5_end_read_request(struct bio * bi, int error) release_stripe(sh); } -static void raid5_end_write_request(struct bio *bi, int error) +static void raid5_end_write_request(struct bio *bi, int error, + struct batch_complete *batch) { struct stripe_head *sh = bi->bi_private; struct r5conf *conf = sh->raid_conf; @@ -3972,7 +3976,8 @@ static struct bio *remove_bio_from_retry(struct r5conf *conf) * first). * If the read failed.. */ -static void raid5_align_endio(struct bio *bi, int error) +static void raid5_align_endio(struct bio *bi, int error, + struct batch_complete *batch) { struct bio* raid_bi = bi->bi_private; struct mddev *mddev; diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig index ca4da8a375ba..0cbe1ff925f1 100644 --- a/drivers/media/platform/Kconfig +++ b/drivers/media/platform/Kconfig @@ -145,7 +145,6 @@ config VIDEO_CODA depends on VIDEO_DEV && VIDEO_V4L2 && ARCH_MXC select VIDEOBUF2_DMA_CONTIG select V4L2_MEM2MEM_DEV - select IRAM_ALLOC if SOC_IMX53 ---help--- Coda is a range of video codec IPs that supports H.264, MPEG-4, and other video formats. diff --git a/drivers/media/platform/coda.c b/drivers/media/platform/coda.c index 20827ba168fc..b931c2a5c7fc 100644 --- a/drivers/media/platform/coda.c +++ b/drivers/media/platform/coda.c @@ -14,6 +14,7 @@ #include <linux/clk.h> #include <linux/delay.h> #include <linux/firmware.h> +#include <linux/genalloc.h> #include <linux/interrupt.h> #include <linux/io.h> #include <linux/irq.h> @@ -23,7 +24,7 @@ #include <linux/slab.h> #include <linux/videodev2.h> #include <linux/of.h> -#include <linux/platform_data/imx-iram.h> +#include <linux/platform_data/coda.h> #include <media/v4l2-ctrls.h> #include <media/v4l2-device.h> @@ -43,6 +44,7 @@ #define CODA7_WORK_BUF_SIZE (512 * 1024 + CODA_FMO_BUF_SIZE * 8 * 1024) #define CODA_PARA_BUF_SIZE (10 * 1024) #define CODA_ISRAM_SIZE (2048 * 2) +#define CODADX6_IRAM_SIZE 0xb000 #define CODA7_IRAM_SIZE 0x14000 /* 81920 bytes */ #define CODA_MAX_FRAMEBUFFERS 2 @@ -128,7 +130,10 @@ struct coda_dev { struct coda_aux_buf codebuf; struct coda_aux_buf workbuf; + struct gen_pool *iram_pool; + long unsigned int iram_vaddr; long unsigned int iram_paddr; + unsigned long iram_size; spinlock_t irqlock; struct mutex dev_mutex; @@ -1926,6 +1931,9 @@ static int coda_probe(struct platform_device *pdev) const struct of_device_id *of_id = of_match_device(of_match_ptr(coda_dt_ids), &pdev->dev); const struct platform_device_id *pdev_id; + struct coda_platform_data *pdata = pdev->dev.platform_data; + struct device_node *np = pdev->dev.of_node; + struct gen_pool *pool; struct coda_dev *dev; struct resource *res; int ret, irq; @@ -1988,6 +1996,16 @@ static int coda_probe(struct platform_device *pdev) return -ENOENT; } + /* Get IRAM pool from device tree or platform data */ + pool = of_get_named_gen_pool(np, "iram", 0); + if (!pool && pdata) + pool = dev_get_gen_pool(pdata->iram_dev); + if (!pool) { + dev_err(&pdev->dev, "iram pool not available\n"); + return -ENOMEM; + } + dev->iram_pool = pool; + ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev); if (ret) return ret; @@ -2022,18 +2040,17 @@ static int coda_probe(struct platform_device *pdev) return -ENOMEM; } - if (dev->devtype->product == CODA_DX6) { - dev->iram_paddr = 0xffff4c00; - } else { - void __iomem *iram_vaddr; - - iram_vaddr = iram_alloc(CODA7_IRAM_SIZE, - &dev->iram_paddr); - if (!iram_vaddr) { - dev_err(&pdev->dev, "unable to alloc iram\n"); - return -ENOMEM; - } + if (dev->devtype->product == CODA_DX6) + dev->iram_size = CODADX6_IRAM_SIZE; + else + dev->iram_size = CODA7_IRAM_SIZE; + dev->iram_vaddr = gen_pool_alloc(dev->iram_pool, dev->iram_size); + if (!dev->iram_vaddr) { + dev_err(&pdev->dev, "unable to alloc iram\n"); + return -ENOMEM; } + dev->iram_paddr = gen_pool_virt_to_phys(dev->iram_pool, + dev->iram_vaddr); platform_set_drvdata(pdev, dev); @@ -2050,8 +2067,8 @@ static int coda_remove(struct platform_device *pdev) if (dev->alloc_ctx) vb2_dma_contig_cleanup_ctx(dev->alloc_ctx); v4l2_device_unregister(&dev->v4l2_dev); - if (dev->iram_paddr) - iram_free(dev->iram_paddr, CODA7_IRAM_SIZE); + if (dev->iram_vaddr) + gen_pool_free(dev->iram_pool, dev->iram_vaddr, dev->iram_size); if (dev->codebuf.vaddr) dma_free_coherent(&pdev->dev, dev->codebuf.size, &dev->codebuf.vaddr, dev->codebuf.paddr); diff --git a/drivers/memstick/host/r592.c b/drivers/memstick/host/r592.c index a7c5b31c0d50..9718661c1fb6 100644 --- a/drivers/memstick/host/r592.c +++ b/drivers/memstick/host/r592.c @@ -847,7 +847,7 @@ static void r592_remove(struct pci_dev *pdev) dev->dummy_dma_page_physical_address); } -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP static int r592_suspend(struct device *core_dev) { struct pci_dev *pdev = to_pci_dev(core_dev); @@ -870,10 +870,10 @@ static int r592_resume(struct device *core_dev) r592_update_card_detect(dev); return 0; } - -SIMPLE_DEV_PM_OPS(r592_pm_ops, r592_suspend, r592_resume); #endif +static SIMPLE_DEV_PM_OPS(r592_pm_ops, r592_suspend, r592_resume); + MODULE_DEVICE_TABLE(pci, r592_pci_id_tbl); static struct pci_driver r852_pci_driver = { @@ -881,9 +881,7 @@ static struct pci_driver r852_pci_driver = { .id_table = r592_pci_id_tbl, .probe = r592_probe, .remove = r592_remove, -#ifdef CONFIG_PM .driver.pm = &r592_pm_ops, -#endif }; static __init int r592_module_init(void) diff --git a/drivers/message/i2o/i2o_config.c b/drivers/message/i2o/i2o_config.c index 5451beff183f..a60c188c2bd9 100644 --- a/drivers/message/i2o/i2o_config.c +++ b/drivers/message/i2o/i2o_config.c @@ -687,6 +687,11 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd, } size = size >> 16; size *= 4; + if (size > sizeof(rmsg)) { + rcode = -EINVAL; + goto sg_list_cleanup; + } + /* Copy in the user's I2O command */ if (copy_from_user(rmsg, user_msg, size)) { rcode = -EFAULT; @@ -922,6 +927,11 @@ static int i2o_cfg_passthru(unsigned long arg) } size = size >> 16; size *= 4; + if (size > sizeof(rmsg)) { + rcode = -EFAULT; + goto sg_list_cleanup; + } + /* Copy in the user's I2O command */ if (copy_from_user(rmsg, user_msg, size)) { rcode = -EFAULT; diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index b9e56546b493..021f0bc13259 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -518,6 +518,15 @@ config LATTICE_ECP3_CONFIG If unsure, say N. +config SRAM + bool "Generic on-chip SRAM driver" + depends on HAS_IOMEM + select GENERIC_ALLOCATOR + help + This driver allows you to declare a memory region to be managed by + the genalloc API. It is supposed to be used for small on-chip SRAM + areas found on many SoCs. + source "drivers/misc/c2port/Kconfig" source "drivers/misc/eeprom/Kconfig" source "drivers/misc/cb710/Kconfig" diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index 865cbc6a7ae1..c235d5b68311 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -52,3 +52,4 @@ obj-$(CONFIG_ALTERA_STAPL) +=altera-stapl/ obj-$(CONFIG_INTEL_MEI) += mei/ obj-$(CONFIG_VMWARE_VMCI) += vmw_vmci/ obj-$(CONFIG_LATTICE_ECP3_CONFIG) += lattice-ecp3-config.o +obj-$(CONFIG_SRAM) += sram.o diff --git a/drivers/misc/sram.c b/drivers/misc/sram.c new file mode 100644 index 000000000000..437192e43006 --- /dev/null +++ b/drivers/misc/sram.c @@ -0,0 +1,121 @@ +/* + * Generic on-chip SRAM allocation driver + * + * Copyright (C) 2012 Philipp Zabel, Pengutronix + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/clk.h> +#include <linux/err.h> +#include <linux/io.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/genalloc.h> + +#define SRAM_GRANULARITY 32 + +struct sram_dev { + struct gen_pool *pool; + struct clk *clk; +}; + +static int sram_probe(struct platform_device *pdev) +{ + void __iomem *virt_base; + struct sram_dev *sram; + struct resource *res; + unsigned long size; + int ret; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -EINVAL; + + size = resource_size(res); + + virt_base = devm_request_and_ioremap(&pdev->dev, res); + if (!virt_base) + return -EADDRNOTAVAIL; + + sram = devm_kzalloc(&pdev->dev, sizeof(*sram), GFP_KERNEL); + if (!sram) + return -ENOMEM; + + sram->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(sram->clk)) + sram->clk = NULL; + else + clk_prepare_enable(sram->clk); + + sram->pool = devm_gen_pool_create(&pdev->dev, ilog2(SRAM_GRANULARITY), -1); + if (!sram->pool) + return -ENOMEM; + + ret = gen_pool_add_virt(sram->pool, (unsigned long)virt_base, + res->start, size, -1); + if (ret < 0) { + gen_pool_destroy(sram->pool); + return ret; + } + + platform_set_drvdata(pdev, sram); + + dev_dbg(&pdev->dev, "SRAM pool: %ld KiB @ 0x%p\n", size / 1024, virt_base); + + return 0; +} + +static int sram_remove(struct platform_device *pdev) +{ + struct sram_dev *sram = platform_get_drvdata(pdev); + + if (gen_pool_avail(sram->pool) < gen_pool_size(sram->pool)) + dev_dbg(&pdev->dev, "removed while SRAM allocated\n"); + + gen_pool_destroy(sram->pool); + + if (sram->clk) + clk_disable_unprepare(sram->clk); + + return 0; +} + +#ifdef CONFIG_OF +static struct of_device_id sram_dt_ids[] = { + { .compatible = "mmio-sram" }, + {} +}; +#endif + +static struct platform_driver sram_driver = { + .driver = { + .name = "sram", + .of_match_table = of_match_ptr(sram_dt_ids), + }, + .probe = sram_probe, + .remove = sram_remove, +}; + +static int __init sram_init(void) +{ + return platform_driver_register(&sram_driver); +} + +postcore_initcall(sram_init); diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 65f9ca7a56ad..c40396f23202 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -120,8 +120,8 @@ static void mmc_should_fail_request(struct mmc_host *host, !should_fail(&host->fail_mmc_request, data->blksz * data->blocks)) return; - data->error = data_errors[random32() % ARRAY_SIZE(data_errors)]; - data->bytes_xfered = (random32() % (data->bytes_xfered >> 9)) << 9; + data->error = data_errors[prandom_u32() % ARRAY_SIZE(data_errors)]; + data->bytes_xfered = (prandom_u32() % (data->bytes_xfered >> 9)) << 9; } #else /* CONFIG_FAIL_MMC_REQUEST */ diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c index 149a3a038491..5abdd4894082 100644 --- a/drivers/net/ethernet/broadcom/cnic.c +++ b/drivers/net/ethernet/broadcom/cnic.c @@ -4085,7 +4085,7 @@ static int cnic_cm_alloc_mem(struct cnic_dev *dev) if (!cp->csk_tbl) return -ENOMEM; - port_id = random32(); + port_id = prandom_u32(); port_id %= CNIC_LOCAL_PORT_RANGE; if (cnic_init_id_tbl(&cp->csk_port_tbl, CNIC_LOCAL_PORT_RANGE, CNIC_LOCAL_PORT_MIN, port_id)) { @@ -4145,7 +4145,7 @@ static int cnic_cm_init_bnx2_hw(struct cnic_dev *dev) { u32 seed; - seed = random32(); + seed = prandom_u32(); cnic_ctx_wr(dev, 45, 0, seed); return 0; } diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c index 49b8b58fc5c6..484f77ec2ce1 100644 --- a/drivers/net/hamradio/baycom_epp.c +++ b/drivers/net/hamradio/baycom_epp.c @@ -449,7 +449,7 @@ static int transmit(struct baycom_state *bc, int cnt, unsigned char stat) if ((--bc->hdlctx.slotcnt) > 0) return 0; bc->hdlctx.slotcnt = bc->ch_params.slottime; - if ((random32() % 256) > bc->ch_params.ppersist) + if ((prandom_u32() % 256) > bc->ch_params.ppersist) return 0; } } diff --git a/drivers/net/hamradio/hdlcdrv.c b/drivers/net/hamradio/hdlcdrv.c index a4a3516b6bbf..3169252613fa 100644 --- a/drivers/net/hamradio/hdlcdrv.c +++ b/drivers/net/hamradio/hdlcdrv.c @@ -389,7 +389,7 @@ void hdlcdrv_arbitrate(struct net_device *dev, struct hdlcdrv_state *s) if ((--s->hdlctx.slotcnt) > 0) return; s->hdlctx.slotcnt = s->ch_params.slottime; - if ((random32() % 256) > s->ch_params.ppersist) + if ((prandom_u32() % 256) > s->ch_params.ppersist) return; start_tx(dev, s); } diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c index b2d863f2ea42..0721e72f9299 100644 --- a/drivers/net/hamradio/yam.c +++ b/drivers/net/hamradio/yam.c @@ -638,7 +638,7 @@ static void yam_arbitrate(struct net_device *dev) yp->slotcnt = yp->slot / 10; /* is random > persist ? */ - if ((random32() % 256) > yp->pers) + if ((prandom_u32() % 256) > yp->pers) return; yam_start_tx(dev, yp); diff --git a/drivers/net/team/team_mode_random.c b/drivers/net/team/team_mode_random.c index 9eabfaa22f3e..5ca14d463ba7 100644 --- a/drivers/net/team/team_mode_random.c +++ b/drivers/net/team/team_mode_random.c @@ -18,7 +18,7 @@ static u32 random_N(unsigned int N) { - return reciprocal_divide(random32(), N); + return reciprocal_divide(prandom_u32(), N); } static bool rnd_transmit(struct team *team, struct sk_buff *skb) diff --git a/drivers/net/wireless/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/brcm80211/brcmfmac/p2p.c index 2b90da0d85f3..c0362103b344 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/p2p.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/p2p.c @@ -1117,7 +1117,7 @@ static void brcmf_p2p_afx_handler(struct work_struct *work) if (afx_hdl->is_listen && afx_hdl->my_listen_chan) /* 100ms ~ 300ms */ err = brcmf_p2p_discover_listen(p2p, afx_hdl->my_listen_chan, - 100 * (1 + (random32() % 3))); + 100 * (1 + (prandom_u32() % 3))); else err = brcmf_p2p_act_frm_search(p2p, afx_hdl->peer_listen_chan); diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c index 47012947a447..8fc311f99bd8 100644 --- a/drivers/net/wireless/mwifiex/cfg80211.c +++ b/drivers/net/wireless/mwifiex/cfg80211.c @@ -216,7 +216,7 @@ mwifiex_cfg80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, mwifiex_form_mgmt_frame(skb, buf, len); mwifiex_queue_tx_pkt(priv, skb); - *cookie = random32() | 1; + *cookie = prandom_u32() | 1; cfg80211_mgmt_tx_status(wdev, *cookie, buf, len, true, GFP_ATOMIC); wiphy_dbg(wiphy, "info: management frame transmitted\n"); @@ -271,7 +271,7 @@ mwifiex_cfg80211_remain_on_channel(struct wiphy *wiphy, duration); if (!ret) { - *cookie = random32() | 1; + *cookie = prandom_u32() | 1; priv->roc_cfg.cookie = *cookie; priv->roc_cfg.chan = *chan; diff --git a/drivers/pps/Kconfig b/drivers/pps/Kconfig index 982d16b5a846..7512e98e9311 100644 --- a/drivers/pps/Kconfig +++ b/drivers/pps/Kconfig @@ -20,10 +20,10 @@ config PPS To compile this driver as a module, choose M here: the module will be called pps_core.ko. +if PPS config PPS_DEBUG bool "PPS debugging messages" - depends on PPS help Say Y here if you want the PPS support to produce a bunch of debug messages to the system log. Select this if you are having a @@ -31,13 +31,15 @@ config PPS_DEBUG config NTP_PPS bool "PPS kernel consumer support" - depends on PPS && !NO_HZ + depends on !NO_HZ help This option adds support for direct in-kernel time synchronization using an external PPS signal. It doesn't work on tickless systems at the moment. +endif + source drivers/pps/clients/Kconfig source drivers/pps/generators/Kconfig diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c index 8cc7171b7179..b6135d4d54eb 100644 --- a/drivers/rpmsg/virtio_rpmsg_bus.c +++ b/drivers/rpmsg/virtio_rpmsg_bus.c @@ -972,8 +972,10 @@ static int rpmsg_probe(struct virtio_device *vdev) bufs_va = dma_alloc_coherent(vdev->dev.parent->parent, RPMSG_TOTAL_BUF_SPACE, &vrp->bufs_dma, GFP_KERNEL); - if (!bufs_va) + if (!bufs_va) { + err = -ENOMEM; goto vqs_del; + } dev_dbg(&vdev->dev, "buffers: va %p, dma 0x%llx\n", bufs_va, (unsigned long long)vrp->bufs_dma); diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index 9b742d3ffb94..66385402d20e 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -259,6 +259,76 @@ void rtc_device_unregister(struct rtc_device *rtc) } EXPORT_SYMBOL_GPL(rtc_device_unregister); +static void devm_rtc_device_release(struct device *dev, void *res) +{ + struct rtc_device *rtc = *(struct rtc_device **)res; + + rtc_device_unregister(rtc); +} + +static int devm_rtc_device_match(struct device *dev, void *res, void *data) +{ + struct rtc **r = res; + + return *r == data; +} + +/** + * devm_rtc_device_register - resource managed rtc_device_register() + * @dev: the device to register + * @name: the name of the device + * @ops: the rtc operations structure + * @owner: the module owner + * + * @return a struct rtc on success, or an ERR_PTR on error + * + * Managed rtc_device_register(). The rtc_device returned from this function + * are automatically freed on driver detach. See rtc_device_register() + * for more information. + */ + +struct rtc_device *devm_rtc_device_register(struct device *dev, + const char *name, + const struct rtc_class_ops *ops, + struct module *owner) +{ + struct rtc_device **ptr, *rtc; + + ptr = devres_alloc(devm_rtc_device_release, sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return ERR_PTR(-ENOMEM); + + rtc = rtc_device_register(name, dev, ops, owner); + if (!IS_ERR(rtc)) { + *ptr = rtc; + devres_add(dev, ptr); + } else { + devres_free(ptr); + } + + return rtc; +} +EXPORT_SYMBOL_GPL(devm_rtc_device_register); + +/** + * devm_rtc_device_unregister - resource managed devm_rtc_device_unregister() + * @dev: the device to unregister + * @rtc: the RTC class device to unregister + * + * Deallocated a rtc allocated with devm_rtc_device_register(). Normally this + * function will not need to be called and the resource management code will + * ensure that the resource is freed. + */ +void devm_rtc_device_unregister(struct device *dev, struct rtc_device *rtc) +{ + int rc; + + rc = devres_release(dev, devm_rtc_device_release, + devm_rtc_device_match, rtc); + WARN_ON(rc); +} +EXPORT_SYMBOL_GPL(devm_rtc_device_unregister); + static int __init rtc_init(void) { rtc_class = class_create(THIS_MODULE, "rtc"); diff --git a/drivers/rtc/rtc-88pm80x.c b/drivers/rtc/rtc-88pm80x.c index 63b17ebe90e8..f3742f364eb8 100644 --- a/drivers/rtc/rtc-88pm80x.c +++ b/drivers/rtc/rtc-88pm80x.c @@ -234,7 +234,7 @@ static const struct rtc_class_ops pm80x_rtc_ops = { .alarm_irq_enable = pm80x_rtc_alarm_irq_enable, }; -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP static int pm80x_rtc_suspend(struct device *dev) { return pm80x_dev_suspend(dev); @@ -312,7 +312,7 @@ static int pm80x_rtc_probe(struct platform_device *pdev) } rtc_tm_to_time(&tm, &ticks); - info->rtc_dev = rtc_device_register("88pm80x-rtc", &pdev->dev, + info->rtc_dev = devm_rtc_device_register(&pdev->dev, "88pm80x-rtc", &pm80x_rtc_ops, THIS_MODULE); if (IS_ERR(info->rtc_dev)) { ret = PTR_ERR(info->rtc_dev); @@ -346,7 +346,6 @@ static int pm80x_rtc_remove(struct platform_device *pdev) { struct pm80x_rtc_info *info = platform_get_drvdata(pdev); platform_set_drvdata(pdev, NULL); - rtc_device_unregister(info->rtc_dev); pm80x_free_irq(info->chip, info->irq, info); return 0; } diff --git a/drivers/rtc/rtc-88pm860x.c b/drivers/rtc/rtc-88pm860x.c index f663746f4603..0f2b91bfee37 100644 --- a/drivers/rtc/rtc-88pm860x.c +++ b/drivers/rtc/rtc-88pm860x.c @@ -318,14 +318,14 @@ static int pm860x_rtc_probe(struct platform_device *pdev) pdata = pdev->dev.platform_data; - info = kzalloc(sizeof(struct pm860x_rtc_info), GFP_KERNEL); + info = devm_kzalloc(&pdev->dev, sizeof(struct pm860x_rtc_info), + GFP_KERNEL); if (!info) return -ENOMEM; info->irq = platform_get_irq(pdev, 0); if (info->irq < 0) { dev_err(&pdev->dev, "No IRQ resource!\n"); - ret = -EINVAL; - goto out; + return info->irq; } info->chip = chip; @@ -333,12 +333,13 @@ static int pm860x_rtc_probe(struct platform_device *pdev) info->dev = &pdev->dev; dev_set_drvdata(&pdev->dev, info); - ret = request_threaded_irq(info->irq, NULL, rtc_update_handler, - IRQF_ONESHOT, "rtc", info); + ret = devm_request_threaded_irq(&pdev->dev, info->irq, NULL, + rtc_update_handler, IRQF_ONESHOT, "rtc", + info); if (ret < 0) { dev_err(chip->dev, "Failed to request IRQ: #%d: %d\n", info->irq, ret); - goto out; + return ret; } /* set addresses of 32-bit base value for RTC time */ @@ -350,7 +351,7 @@ static int pm860x_rtc_probe(struct platform_device *pdev) ret = pm860x_rtc_read_time(&pdev->dev, &tm); if (ret < 0) { dev_err(&pdev->dev, "Failed to read initial time.\n"); - goto out_rtc; + return ret; } if ((tm.tm_year < 70) || (tm.tm_year > 138)) { tm.tm_year = 70; @@ -362,7 +363,7 @@ static int pm860x_rtc_probe(struct platform_device *pdev) ret = pm860x_rtc_set_time(&pdev->dev, &tm); if (ret < 0) { dev_err(&pdev->dev, "Failed to set initial time.\n"); - goto out_rtc; + return ret; } } rtc_tm_to_time(&tm, &ticks); @@ -373,12 +374,12 @@ static int pm860x_rtc_probe(struct platform_device *pdev) } } - info->rtc_dev = rtc_device_register("88pm860x-rtc", &pdev->dev, + info->rtc_dev = devm_rtc_device_register(&pdev->dev, "88pm860x-rtc", &pm860x_rtc_ops, THIS_MODULE); ret = PTR_ERR(info->rtc_dev); if (IS_ERR(info->rtc_dev)) { dev_err(&pdev->dev, "Failed to register RTC device: %d\n", ret); - goto out_rtc; + return ret; } /* @@ -405,11 +406,6 @@ static int pm860x_rtc_probe(struct platform_device *pdev) device_init_wakeup(&pdev->dev, 1); return 0; -out_rtc: - free_irq(info->irq, info); -out: - kfree(info); - return ret; } static int pm860x_rtc_remove(struct platform_device *pdev) @@ -423,9 +419,6 @@ static int pm860x_rtc_remove(struct platform_device *pdev) #endif /* VRTC_CALIBRATION */ platform_set_drvdata(pdev, NULL); - rtc_device_unregister(info->rtc_dev); - free_irq(info->irq, info); - kfree(info); return 0; } diff --git a/drivers/rtc/rtc-ab3100.c b/drivers/rtc/rtc-ab3100.c index 261a07e0fb24..47a4f2c4d30e 100644 --- a/drivers/rtc/rtc-ab3100.c +++ b/drivers/rtc/rtc-ab3100.c @@ -229,8 +229,8 @@ static int __init ab3100_rtc_probe(struct platform_device *pdev) /* Ignore any error on this write */ } - rtc = rtc_device_register("ab3100-rtc", &pdev->dev, &ab3100_rtc_ops, - THIS_MODULE); + rtc = devm_rtc_device_register(&pdev->dev, "ab3100-rtc", + &ab3100_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) { err = PTR_ERR(rtc); return err; @@ -242,9 +242,6 @@ static int __init ab3100_rtc_probe(struct platform_device *pdev) static int __exit ab3100_rtc_remove(struct platform_device *pdev) { - struct rtc_device *rtc = platform_get_drvdata(pdev); - - rtc_device_unregister(rtc); platform_set_drvdata(pdev, NULL); return 0; } @@ -257,19 +254,7 @@ static struct platform_driver ab3100_rtc_driver = { .remove = __exit_p(ab3100_rtc_remove), }; -static int __init ab3100_rtc_init(void) -{ - return platform_driver_probe(&ab3100_rtc_driver, - ab3100_rtc_probe); -} - -static void __exit ab3100_rtc_exit(void) -{ - platform_driver_unregister(&ab3100_rtc_driver); -} - -module_init(ab3100_rtc_init); -module_exit(ab3100_rtc_exit); +module_platform_driver_probe(ab3100_rtc_driver, ab3100_rtc_probe); MODULE_AUTHOR("Linus Walleij <linus.walleij@stericsson.com>"); MODULE_DESCRIPTION("AB3100 RTC Driver"); diff --git a/drivers/rtc/rtc-ab8500.c b/drivers/rtc/rtc-ab8500.c index 57cde2b061e6..63cfa314a39f 100644 --- a/drivers/rtc/rtc-ab8500.c +++ b/drivers/rtc/rtc-ab8500.c @@ -422,20 +422,19 @@ static int ab8500_rtc_probe(struct platform_device *pdev) device_init_wakeup(&pdev->dev, true); - rtc = rtc_device_register("ab8500-rtc", &pdev->dev, &ab8500_rtc_ops, - THIS_MODULE); + rtc = devm_rtc_device_register(&pdev->dev, "ab8500-rtc", + &ab8500_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) { dev_err(&pdev->dev, "Registration failed\n"); err = PTR_ERR(rtc); return err; } - err = request_threaded_irq(irq, NULL, rtc_alarm_handler, - IRQF_NO_SUSPEND | IRQF_ONESHOT, "ab8500-rtc", rtc); - if (err < 0) { - rtc_device_unregister(rtc); + err = devm_request_threaded_irq(&pdev->dev, irq, NULL, + rtc_alarm_handler, IRQF_NO_SUSPEND | IRQF_ONESHOT, + "ab8500-rtc", rtc); + if (err < 0) return err; - } platform_set_drvdata(pdev, rtc); @@ -450,13 +449,8 @@ static int ab8500_rtc_probe(struct platform_device *pdev) static int ab8500_rtc_remove(struct platform_device *pdev) { - struct rtc_device *rtc = platform_get_drvdata(pdev); - int irq = platform_get_irq_byname(pdev, "ALARM"); - ab8500_sysfs_rtc_unregister(&pdev->dev); - free_irq(irq, rtc); - rtc_device_unregister(rtc); platform_set_drvdata(pdev, NULL); return 0; diff --git a/drivers/rtc/rtc-at32ap700x.c b/drivers/rtc/rtc-at32ap700x.c index 8dd08305aae1..f47fbb5eee8b 100644 --- a/drivers/rtc/rtc-at32ap700x.c +++ b/drivers/rtc/rtc-at32ap700x.c @@ -202,7 +202,8 @@ static int __init at32_rtc_probe(struct platform_device *pdev) int irq; int ret; - rtc = kzalloc(sizeof(struct rtc_at32ap700x), GFP_KERNEL); + rtc = devm_kzalloc(&pdev->dev, sizeof(struct rtc_at32ap700x), + GFP_KERNEL); if (!rtc) { dev_dbg(&pdev->dev, "out of memory\n"); return -ENOMEM; @@ -223,7 +224,7 @@ static int __init at32_rtc_probe(struct platform_device *pdev) } rtc->irq = irq; - rtc->regs = ioremap(regs->start, resource_size(regs)); + rtc->regs = devm_ioremap(&pdev->dev, regs->start, resource_size(regs)); if (!rtc->regs) { ret = -ENOMEM; dev_dbg(&pdev->dev, "could not map I/O memory\n"); @@ -244,20 +245,21 @@ static int __init at32_rtc_probe(struct platform_device *pdev) | RTC_BIT(CTRL_EN)); } - ret = request_irq(irq, at32_rtc_interrupt, IRQF_SHARED, "rtc", rtc); + ret = devm_request_irq(&pdev->dev, irq, at32_rtc_interrupt, IRQF_SHARED, + "rtc", rtc); if (ret) { dev_dbg(&pdev->dev, "could not request irq %d\n", irq); - goto out_iounmap; + goto out; } platform_set_drvdata(pdev, rtc); - rtc->rtc = rtc_device_register(pdev->name, &pdev->dev, + rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &at32_rtc_ops, THIS_MODULE); if (IS_ERR(rtc->rtc)) { dev_dbg(&pdev->dev, "could not register rtc device\n"); ret = PTR_ERR(rtc->rtc); - goto out_free_irq; + goto out; } device_init_wakeup(&pdev->dev, 1); @@ -267,26 +269,15 @@ static int __init at32_rtc_probe(struct platform_device *pdev) return 0; -out_free_irq: - platform_set_drvdata(pdev, NULL); - free_irq(irq, rtc); -out_iounmap: - iounmap(rtc->regs); out: - kfree(rtc); + platform_set_drvdata(pdev, NULL); return ret; } static int __exit at32_rtc_remove(struct platform_device *pdev) { - struct rtc_at32ap700x *rtc = platform_get_drvdata(pdev); - device_init_wakeup(&pdev->dev, 0); - free_irq(rtc->irq, rtc); - iounmap(rtc->regs); - rtc_device_unregister(rtc->rtc); - kfree(rtc); platform_set_drvdata(pdev, NULL); return 0; @@ -302,17 +293,7 @@ static struct platform_driver at32_rtc_driver = { }, }; -static int __init at32_rtc_init(void) -{ - return platform_driver_probe(&at32_rtc_driver, at32_rtc_probe); -} -module_init(at32_rtc_init); - -static void __exit at32_rtc_exit(void) -{ - platform_driver_unregister(&at32_rtc_driver); -} -module_exit(at32_rtc_exit); +module_platform_driver_probe(at32_rtc_driver, at32_rtc_probe); MODULE_AUTHOR("Hans-Christian Egtvedt <hcegtvedt@atmel.com>"); MODULE_DESCRIPTION("Real time clock for AVR32 AT32AP700x"); diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c index 434ebc3a99dc..a654071f57df 100644 --- a/drivers/rtc/rtc-at91rm9200.c +++ b/drivers/rtc/rtc-at91rm9200.c @@ -28,6 +28,8 @@ #include <linux/ioctl.h> #include <linux/completion.h> #include <linux/io.h> +#include <linux/of.h> +#include <linux/of_device.h> #include <asm/uaccess.h> @@ -337,7 +339,7 @@ static int __exit at91_rtc_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP /* AT91RM9200 RTC Power management control */ @@ -369,39 +371,27 @@ static int at91_rtc_resume(struct device *dev) } return 0; } +#endif -static const struct dev_pm_ops at91_rtc_pm = { - .suspend = at91_rtc_suspend, - .resume = at91_rtc_resume, -}; - -#define at91_rtc_pm_ptr &at91_rtc_pm +static SIMPLE_DEV_PM_OPS(at91_rtc_pm_ops, at91_rtc_suspend, at91_rtc_resume); -#else -#define at91_rtc_pm_ptr NULL -#endif +static const struct of_device_id at91_rtc_dt_ids[] = { + { .compatible = "atmel,at91rm9200-rtc" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, at91_rtc_dt_ids); static struct platform_driver at91_rtc_driver = { .remove = __exit_p(at91_rtc_remove), .driver = { .name = "at91_rtc", .owner = THIS_MODULE, - .pm = at91_rtc_pm_ptr, + .pm = &at91_rtc_pm_ops, + .of_match_table = of_match_ptr(at91_rtc_dt_ids), }, }; -static int __init at91_rtc_init(void) -{ - return platform_driver_probe(&at91_rtc_driver, at91_rtc_probe); -} - -static void __exit at91_rtc_exit(void) -{ - platform_driver_unregister(&at91_rtc_driver); -} - -module_init(at91_rtc_init); -module_exit(at91_rtc_exit); +module_platform_driver_probe(at91_rtc_driver, at91_rtc_probe); MODULE_AUTHOR("Rick Bronson"); MODULE_DESCRIPTION("RTC driver for Atmel AT91RM9200"); diff --git a/drivers/rtc/rtc-at91sam9.c b/drivers/rtc/rtc-at91sam9.c index 39cfd2ee0042..b60a34cb145a 100644 --- a/drivers/rtc/rtc-at91sam9.c +++ b/drivers/rtc/rtc-at91sam9.c @@ -20,6 +20,7 @@ #include <linux/ioctl.h> #include <linux/slab.h> #include <linux/platform_data/atmel.h> +#include <linux/io.h> #include <mach/at91_rtt.h> #include <mach/cpu.h> @@ -309,7 +310,7 @@ static int at91_rtc_probe(struct platform_device *pdev) return irq; } - rtc = kzalloc(sizeof *rtc, GFP_KERNEL); + rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL); if (!rtc) return -ENOMEM; @@ -320,18 +321,19 @@ static int at91_rtc_probe(struct platform_device *pdev) device_init_wakeup(&pdev->dev, 1); platform_set_drvdata(pdev, rtc); - rtc->rtt = ioremap(r->start, resource_size(r)); + rtc->rtt = devm_ioremap(&pdev->dev, r->start, resource_size(r)); if (!rtc->rtt) { dev_err(&pdev->dev, "failed to map registers, aborting.\n"); ret = -ENOMEM; goto fail; } - rtc->gpbr = ioremap(r_gpbr->start, resource_size(r_gpbr)); + rtc->gpbr = devm_ioremap(&pdev->dev, r_gpbr->start, + resource_size(r_gpbr)); if (!rtc->gpbr) { dev_err(&pdev->dev, "failed to map gpbr registers, aborting.\n"); ret = -ENOMEM; - goto fail_gpbr; + goto fail; } mr = rtt_readl(rtc, MR); @@ -346,20 +348,19 @@ static int at91_rtc_probe(struct platform_device *pdev) mr &= ~(AT91_RTT_ALMIEN | AT91_RTT_RTTINCIEN); rtt_writel(rtc, MR, mr); - rtc->rtcdev = rtc_device_register(pdev->name, &pdev->dev, - &at91_rtc_ops, THIS_MODULE); + rtc->rtcdev = devm_rtc_device_register(&pdev->dev, pdev->name, + &at91_rtc_ops, THIS_MODULE); if (IS_ERR(rtc->rtcdev)) { ret = PTR_ERR(rtc->rtcdev); - goto fail_register; + goto fail; } /* register irq handler after we know what name we'll use */ - ret = request_irq(rtc->irq, at91_rtc_interrupt, IRQF_SHARED, - dev_name(&rtc->rtcdev->dev), rtc); + ret = devm_request_irq(&pdev->dev, rtc->irq, at91_rtc_interrupt, + IRQF_SHARED, dev_name(&rtc->rtcdev->dev), rtc); if (ret) { dev_dbg(&pdev->dev, "can't share IRQ %d?\n", rtc->irq); - rtc_device_unregister(rtc->rtcdev); - goto fail_register; + goto fail; } /* NOTE: sam9260 rev A silicon has a ROM bug which resets the @@ -374,13 +375,8 @@ static int at91_rtc_probe(struct platform_device *pdev) return 0; -fail_register: - iounmap(rtc->gpbr); -fail_gpbr: - iounmap(rtc->rtt); fail: platform_set_drvdata(pdev, NULL); - kfree(rtc); return ret; } @@ -394,14 +390,8 @@ static int at91_rtc_remove(struct platform_device *pdev) /* disable all interrupts */ rtt_writel(rtc, MR, mr & ~(AT91_RTT_ALMIEN | AT91_RTT_RTTINCIEN)); - free_irq(rtc->irq, rtc); - - rtc_device_unregister(rtc->rtcdev); - iounmap(rtc->gpbr); - iounmap(rtc->rtt); platform_set_drvdata(pdev, NULL); - kfree(rtc); return 0; } @@ -414,14 +404,13 @@ static void at91_rtc_shutdown(struct platform_device *pdev) rtt_writel(rtc, MR, mr & ~rtc->imr); } -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP /* AT91SAM9 RTC Power management control */ -static int at91_rtc_suspend(struct platform_device *pdev, - pm_message_t state) +static int at91_rtc_suspend(struct device *dev) { - struct sam9_rtc *rtc = platform_get_drvdata(pdev); + struct sam9_rtc *rtc = dev_get_drvdata(dev); u32 mr = rtt_readl(rtc, MR); /* @@ -430,7 +419,7 @@ static int at91_rtc_suspend(struct platform_device *pdev, */ rtc->imr = mr & (AT91_RTT_ALMIEN | AT91_RTT_RTTINCIEN); if (rtc->imr) { - if (device_may_wakeup(&pdev->dev) && (mr & AT91_RTT_ALMIEN)) { + if (device_may_wakeup(dev) && (mr & AT91_RTT_ALMIEN)) { enable_irq_wake(rtc->irq); /* don't let RTTINC cause wakeups */ if (mr & AT91_RTT_RTTINCIEN) @@ -442,13 +431,13 @@ static int at91_rtc_suspend(struct platform_device *pdev, return 0; } -static int at91_rtc_resume(struct platform_device *pdev) +static int at91_rtc_resume(struct device *dev) { - struct sam9_rtc *rtc = platform_get_drvdata(pdev); + struct sam9_rtc *rtc = dev_get_drvdata(dev); u32 mr; if (rtc->imr) { - if (device_may_wakeup(&pdev->dev)) + if (device_may_wakeup(dev)) disable_irq_wake(rtc->irq); mr = rtt_readl(rtc, MR); rtt_writel(rtc, MR, mr | rtc->imr); @@ -456,20 +445,18 @@ static int at91_rtc_resume(struct platform_device *pdev) return 0; } -#else -#define at91_rtc_suspend NULL -#define at91_rtc_resume NULL #endif +static SIMPLE_DEV_PM_OPS(at91_rtc_pm_ops, at91_rtc_suspend, at91_rtc_resume); + static struct platform_driver at91_rtc_driver = { .probe = at91_rtc_probe, .remove = at91_rtc_remove, .shutdown = at91_rtc_shutdown, - .suspend = at91_rtc_suspend, - .resume = at91_rtc_resume, .driver = { .name = "rtc-at91sam9", .owner = THIS_MODULE, + .pm = &at91_rtc_pm_ops, }, }; diff --git a/drivers/rtc/rtc-au1xxx.c b/drivers/rtc/rtc-au1xxx.c index b309da4ec745..7995abc391fc 100644 --- a/drivers/rtc/rtc-au1xxx.c +++ b/drivers/rtc/rtc-au1xxx.c @@ -101,7 +101,7 @@ static int au1xtoy_rtc_probe(struct platform_device *pdev) while (au_readl(SYS_COUNTER_CNTRL) & SYS_CNTRL_C0S) msleep(1); - rtcdev = rtc_device_register("rtc-au1xxx", &pdev->dev, + rtcdev = devm_rtc_device_register(&pdev->dev, "rtc-au1xxx", &au1xtoy_rtc_ops, THIS_MODULE); if (IS_ERR(rtcdev)) { ret = PTR_ERR(rtcdev); @@ -118,9 +118,6 @@ out_err: static int au1xtoy_rtc_remove(struct platform_device *pdev) { - struct rtc_device *rtcdev = platform_get_drvdata(pdev); - - rtc_device_unregister(rtcdev); platform_set_drvdata(pdev, NULL); return 0; @@ -134,18 +131,7 @@ static struct platform_driver au1xrtc_driver = { .remove = au1xtoy_rtc_remove, }; -static int __init au1xtoy_rtc_init(void) -{ - return platform_driver_probe(&au1xrtc_driver, au1xtoy_rtc_probe); -} - -static void __exit au1xtoy_rtc_exit(void) -{ - platform_driver_unregister(&au1xrtc_driver); -} - -module_init(au1xtoy_rtc_init); -module_exit(au1xtoy_rtc_exit); +module_platform_driver_probe(au1xrtc_driver, au1xtoy_rtc_probe); MODULE_DESCRIPTION("Au1xxx TOY-counter-based RTC driver"); MODULE_AUTHOR("Manuel Lauss <manuel.lauss@gmail.com>"); diff --git a/drivers/rtc/rtc-bfin.c b/drivers/rtc/rtc-bfin.c index 4ec614b0954d..ad44ec5dc29a 100644 --- a/drivers/rtc/rtc-bfin.c +++ b/drivers/rtc/rtc-bfin.c @@ -352,14 +352,14 @@ static int bfin_rtc_probe(struct platform_device *pdev) dev_dbg_stamp(dev); /* Allocate memory for our RTC struct */ - rtc = kzalloc(sizeof(*rtc), GFP_KERNEL); + rtc = devm_kzalloc(dev, sizeof(*rtc), GFP_KERNEL); if (unlikely(!rtc)) return -ENOMEM; platform_set_drvdata(pdev, rtc); device_init_wakeup(dev, 1); /* Register our RTC with the RTC framework */ - rtc->rtc_dev = rtc_device_register(pdev->name, dev, &bfin_rtc_ops, + rtc->rtc_dev = devm_rtc_device_register(dev, pdev->name, &bfin_rtc_ops, THIS_MODULE); if (unlikely(IS_ERR(rtc->rtc_dev))) { ret = PTR_ERR(rtc->rtc_dev); @@ -367,9 +367,10 @@ static int bfin_rtc_probe(struct platform_device *pdev) } /* Grab the IRQ and init the hardware */ - ret = request_irq(IRQ_RTC, bfin_rtc_interrupt, 0, pdev->name, dev); + ret = devm_request_irq(dev, IRQ_RTC, bfin_rtc_interrupt, 0, + pdev->name, dev); if (unlikely(ret)) - goto err_reg; + goto err; /* sometimes the bootloader touched things, but the write complete was not * enabled, so let's just do a quick timeout here since the IRQ will not fire ... */ @@ -381,32 +382,23 @@ static int bfin_rtc_probe(struct platform_device *pdev) return 0; -err_reg: - rtc_device_unregister(rtc->rtc_dev); err: - kfree(rtc); return ret; } static int bfin_rtc_remove(struct platform_device *pdev) { - struct bfin_rtc *rtc = platform_get_drvdata(pdev); struct device *dev = &pdev->dev; bfin_rtc_reset(dev, 0); - free_irq(IRQ_RTC, dev); - rtc_device_unregister(rtc->rtc_dev); platform_set_drvdata(pdev, NULL); - kfree(rtc); return 0; } -#ifdef CONFIG_PM -static int bfin_rtc_suspend(struct platform_device *pdev, pm_message_t state) +#ifdef CONFIG_PM_SLEEP +static int bfin_rtc_suspend(struct device *dev) { - struct device *dev = &pdev->dev; - dev_dbg_stamp(dev); if (device_may_wakeup(dev)) { @@ -418,10 +410,8 @@ static int bfin_rtc_suspend(struct platform_device *pdev, pm_message_t state) return 0; } -static int bfin_rtc_resume(struct platform_device *pdev) +static int bfin_rtc_resume(struct device *dev) { - struct device *dev = &pdev->dev; - dev_dbg_stamp(dev); if (device_may_wakeup(dev)) @@ -440,20 +430,18 @@ static int bfin_rtc_resume(struct platform_device *pdev) return 0; } -#else -# define bfin_rtc_suspend NULL -# define bfin_rtc_resume NULL #endif +static SIMPLE_DEV_PM_OPS(bfin_rtc_pm_ops, bfin_rtc_suspend, bfin_rtc_resume); + static struct platform_driver bfin_rtc_driver = { .driver = { .name = "rtc-bfin", .owner = THIS_MODULE, + .pm = &bfin_rtc_pm_ops, }, .probe = bfin_rtc_probe, .remove = bfin_rtc_remove, - .suspend = bfin_rtc_suspend, - .resume = bfin_rtc_resume, }; module_platform_driver(bfin_rtc_driver); diff --git a/drivers/rtc/rtc-bq32k.c b/drivers/rtc/rtc-bq32k.c index 036cb89f8188..fea78bc713ca 100644 --- a/drivers/rtc/rtc-bq32k.c +++ b/drivers/rtc/rtc-bq32k.c @@ -153,7 +153,7 @@ static int bq32k_probe(struct i2c_client *client, if (error) return error; - rtc = rtc_device_register(bq32k_driver.driver.name, &client->dev, + rtc = devm_rtc_device_register(&client->dev, bq32k_driver.driver.name, &bq32k_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) return PTR_ERR(rtc); @@ -165,9 +165,6 @@ static int bq32k_probe(struct i2c_client *client, static int bq32k_remove(struct i2c_client *client) { - struct rtc_device *rtc = i2c_get_clientdata(client); - - rtc_device_unregister(rtc); return 0; } diff --git a/drivers/rtc/rtc-bq4802.c b/drivers/rtc/rtc-bq4802.c index 693be71b5b18..af2886784a7b 100644 --- a/drivers/rtc/rtc-bq4802.c +++ b/drivers/rtc/rtc-bq4802.c @@ -142,7 +142,7 @@ static const struct rtc_class_ops bq4802_ops = { static int bq4802_probe(struct platform_device *pdev) { - struct bq4802 *p = kzalloc(sizeof(*p), GFP_KERNEL); + struct bq4802 *p = devm_kzalloc(&pdev->dev, sizeof(*p), GFP_KERNEL); int err = -ENOMEM; if (!p) @@ -155,54 +155,41 @@ static int bq4802_probe(struct platform_device *pdev) p->r = platform_get_resource(pdev, IORESOURCE_IO, 0); err = -EINVAL; if (!p->r) - goto out_free; + goto out; } if (p->r->flags & IORESOURCE_IO) { p->ioport = p->r->start; p->read = bq4802_read_io; p->write = bq4802_write_io; } else if (p->r->flags & IORESOURCE_MEM) { - p->regs = ioremap(p->r->start, resource_size(p->r)); + p->regs = devm_ioremap(&pdev->dev, p->r->start, + resource_size(p->r)); p->read = bq4802_read_mem; p->write = bq4802_write_mem; } else { err = -EINVAL; - goto out_free; + goto out; } platform_set_drvdata(pdev, p); - p->rtc = rtc_device_register("bq4802", &pdev->dev, - &bq4802_ops, THIS_MODULE); + p->rtc = devm_rtc_device_register(&pdev->dev, "bq4802", + &bq4802_ops, THIS_MODULE); if (IS_ERR(p->rtc)) { err = PTR_ERR(p->rtc); - goto out_iounmap; + goto out; } err = 0; out: return err; -out_iounmap: - if (p->r->flags & IORESOURCE_MEM) - iounmap(p->regs); -out_free: - kfree(p); - goto out; } static int bq4802_remove(struct platform_device *pdev) { - struct bq4802 *p = platform_get_drvdata(pdev); - - rtc_device_unregister(p->rtc); - if (p->r->flags & IORESOURCE_MEM) - iounmap(p->regs); - platform_set_drvdata(pdev, NULL); - kfree(p); - return 0; } diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index af97c94e8a3a..cc5bea9c4b1c 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -804,9 +804,8 @@ static int cmos_suspend(struct device *dev) mask = RTC_IRQMASK; tmp &= ~mask; CMOS_WRITE(tmp, RTC_CONTROL); + hpet_mask_rtc_irq_bit(mask); - /* shut down hpet emulation - we don't need it for alarm */ - hpet_mask_rtc_irq_bit(RTC_PIE|RTC_AIE|RTC_UIE); cmos_checkintr(cmos, tmp); } spin_unlock_irq(&rtc_lock); @@ -870,6 +869,7 @@ static int cmos_resume(struct device *dev) rtc_update_irq(cmos->rtc, 1, mask); tmp &= ~RTC_AIE; hpet_mask_rtc_irq_bit(RTC_AIE); + hpet_rtc_timer_init(); } while (mask & RTC_AIE); spin_unlock_irq(&rtc_lock); } diff --git a/drivers/rtc/rtc-coh901331.c b/drivers/rtc/rtc-coh901331.c index 2d28ec1aa1cd..93c06588ddca 100644 --- a/drivers/rtc/rtc-coh901331.c +++ b/drivers/rtc/rtc-coh901331.c @@ -47,7 +47,7 @@ struct coh901331_port { u32 physize; void __iomem *virtbase; int irq; -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP u32 irqmaskstore; #endif }; @@ -155,7 +155,6 @@ static int __exit coh901331_remove(struct platform_device *pdev) struct coh901331_port *rtap = dev_get_drvdata(&pdev->dev); if (rtap) { - rtc_device_unregister(rtap->rtc); clk_unprepare(rtap->clk); platform_set_drvdata(pdev, NULL); } @@ -211,8 +210,8 @@ static int __init coh901331_probe(struct platform_device *pdev) clk_disable(rtap->clk); platform_set_drvdata(pdev, rtap); - rtap->rtc = rtc_device_register("coh901331", &pdev->dev, &coh901331_ops, - THIS_MODULE); + rtap->rtc = devm_rtc_device_register(&pdev->dev, "coh901331", + &coh901331_ops, THIS_MODULE); if (IS_ERR(rtap->rtc)) { ret = PTR_ERR(rtap->rtc); goto out_no_rtc; @@ -226,17 +225,17 @@ static int __init coh901331_probe(struct platform_device *pdev) return ret; } -#ifdef CONFIG_PM -static int coh901331_suspend(struct platform_device *pdev, pm_message_t state) +#ifdef CONFIG_PM_SLEEP +static int coh901331_suspend(struct device *dev) { - struct coh901331_port *rtap = dev_get_drvdata(&pdev->dev); + struct coh901331_port *rtap = dev_get_drvdata(dev); /* * If this RTC alarm will be used for waking the system up, * don't disable it of course. Else we just disable the alarm * and await suspension. */ - if (device_may_wakeup(&pdev->dev)) { + if (device_may_wakeup(dev)) { enable_irq_wake(rtap->irq); } else { clk_enable(rtap->clk); @@ -248,12 +247,12 @@ static int coh901331_suspend(struct platform_device *pdev, pm_message_t state) return 0; } -static int coh901331_resume(struct platform_device *pdev) +static int coh901331_resume(struct device *dev) { - struct coh901331_port *rtap = dev_get_drvdata(&pdev->dev); + struct coh901331_port *rtap = dev_get_drvdata(dev); clk_prepare(rtap->clk); - if (device_may_wakeup(&pdev->dev)) { + if (device_may_wakeup(dev)) { disable_irq_wake(rtap->irq); } else { clk_enable(rtap->clk); @@ -262,11 +261,10 @@ static int coh901331_resume(struct platform_device *pdev) } return 0; } -#else -#define coh901331_suspend NULL -#define coh901331_resume NULL #endif +static SIMPLE_DEV_PM_OPS(coh901331_pm_ops, coh901331_suspend, coh901331_resume); + static void coh901331_shutdown(struct platform_device *pdev) { struct coh901331_port *rtap = dev_get_drvdata(&pdev->dev); @@ -280,25 +278,13 @@ static struct platform_driver coh901331_driver = { .driver = { .name = "rtc-coh901331", .owner = THIS_MODULE, + .pm = &coh901331_pm_ops, }, .remove = __exit_p(coh901331_remove), - .suspend = coh901331_suspend, - .resume = coh901331_resume, .shutdown = coh901331_shutdown, }; -static int __init coh901331_init(void) -{ - return platform_driver_probe(&coh901331_driver, coh901331_probe); -} - -static void __exit coh901331_exit(void) -{ - platform_driver_unregister(&coh901331_driver); -} - -module_init(coh901331_init); -module_exit(coh901331_exit); +module_platform_driver_probe(coh901331_driver, coh901331_probe); MODULE_AUTHOR("Linus Walleij <linus.walleij@stericsson.com>"); MODULE_DESCRIPTION("ST-Ericsson AB COH 901 331 RTC Driver"); diff --git a/drivers/rtc/rtc-da9052.c b/drivers/rtc/rtc-da9052.c index 969abbad7fe3..7286b279cf2d 100644 --- a/drivers/rtc/rtc-da9052.c +++ b/drivers/rtc/rtc-da9052.c @@ -247,7 +247,7 @@ static int da9052_rtc_probe(struct platform_device *pdev) return ret; } - rtc->rtc = rtc_device_register(pdev->name, &pdev->dev, + rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &da9052_rtc_ops, THIS_MODULE); if (IS_ERR(rtc->rtc)) return PTR_ERR(rtc->rtc); @@ -257,9 +257,6 @@ static int da9052_rtc_probe(struct platform_device *pdev) static int da9052_rtc_remove(struct platform_device *pdev) { - struct da9052_rtc *rtc = pdev->dev.platform_data; - - rtc_device_unregister(rtc->rtc); platform_set_drvdata(pdev, NULL); return 0; diff --git a/drivers/rtc/rtc-da9055.c b/drivers/rtc/rtc-da9055.c index 8f0dcfedb83c..73858ca9709a 100644 --- a/drivers/rtc/rtc-da9055.c +++ b/drivers/rtc/rtc-da9055.c @@ -294,7 +294,7 @@ static int da9055_rtc_probe(struct platform_device *pdev) device_init_wakeup(&pdev->dev, 1); - rtc->rtc = rtc_device_register(pdev->name, &pdev->dev, + rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &da9055_rtc_ops, THIS_MODULE); if (IS_ERR(rtc->rtc)) { ret = PTR_ERR(rtc->rtc); @@ -317,9 +317,6 @@ err_rtc: static int da9055_rtc_remove(struct platform_device *pdev) { - struct da9055_rtc *rtc = pdev->dev.platform_data; - - rtc_device_unregister(rtc->rtc); platform_set_drvdata(pdev, NULL); return 0; diff --git a/drivers/rtc/rtc-davinci.c b/drivers/rtc/rtc-davinci.c index 56b73089bb29..a55048c3e26f 100644 --- a/drivers/rtc/rtc-davinci.c +++ b/drivers/rtc/rtc-davinci.c @@ -523,7 +523,7 @@ static int __init davinci_rtc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, davinci_rtc); - davinci_rtc->rtc = rtc_device_register(pdev->name, &pdev->dev, + davinci_rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &davinci_rtc_ops, THIS_MODULE); if (IS_ERR(davinci_rtc->rtc)) { ret = PTR_ERR(davinci_rtc->rtc); @@ -543,7 +543,7 @@ static int __init davinci_rtc_probe(struct platform_device *pdev) 0, "davinci_rtc", davinci_rtc); if (ret < 0) { dev_err(dev, "unable to register davinci RTC interrupt\n"); - goto fail2; + goto fail1; } /* Enable interrupts */ @@ -557,14 +557,12 @@ static int __init davinci_rtc_probe(struct platform_device *pdev) return 0; -fail2: - rtc_device_unregister(davinci_rtc->rtc); fail1: platform_set_drvdata(pdev, NULL); return ret; } -static int davinci_rtc_remove(struct platform_device *pdev) +static int __exit davinci_rtc_remove(struct platform_device *pdev) { struct davinci_rtc *davinci_rtc = platform_get_drvdata(pdev); @@ -572,8 +570,6 @@ static int davinci_rtc_remove(struct platform_device *pdev) rtcif_write(davinci_rtc, 0, PRTCIF_INTEN); - rtc_device_unregister(davinci_rtc->rtc); - platform_set_drvdata(pdev, NULL); return 0; @@ -581,24 +577,14 @@ static int davinci_rtc_remove(struct platform_device *pdev) static struct platform_driver davinci_rtc_driver = { .probe = davinci_rtc_probe, - .remove = davinci_rtc_remove, + .remove = __exit_p(davinci_rtc_remove), .driver = { .name = "rtc_davinci", .owner = THIS_MODULE, }, }; -static int __init rtc_init(void) -{ - return platform_driver_probe(&davinci_rtc_driver, davinci_rtc_probe); -} -module_init(rtc_init); - -static void __exit rtc_exit(void) -{ - platform_driver_unregister(&davinci_rtc_driver); -} -module_exit(rtc_exit); +module_platform_driver_probe(davinci_rtc_driver, davinci_rtc_probe); MODULE_AUTHOR("Miguel Aguilar <miguel.aguilar@ridgerun.com>"); MODULE_DESCRIPTION("Texas Instruments DaVinci PRTC Driver"); diff --git a/drivers/rtc/rtc-dm355evm.c b/drivers/rtc/rtc-dm355evm.c index b2ed2c94b081..1e1ca63d58a9 100644 --- a/drivers/rtc/rtc-dm355evm.c +++ b/drivers/rtc/rtc-dm355evm.c @@ -127,8 +127,8 @@ static int dm355evm_rtc_probe(struct platform_device *pdev) { struct rtc_device *rtc; - rtc = rtc_device_register(pdev->name, - &pdev->dev, &dm355evm_rtc_ops, THIS_MODULE); + rtc = devm_rtc_device_register(&pdev->dev, pdev->name, + &dm355evm_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) { dev_err(&pdev->dev, "can't register RTC device, err %ld\n", PTR_ERR(rtc)); @@ -141,9 +141,6 @@ static int dm355evm_rtc_probe(struct platform_device *pdev) static int dm355evm_rtc_remove(struct platform_device *pdev) { - struct rtc_device *rtc = platform_get_drvdata(pdev); - - rtc_device_unregister(rtc); platform_set_drvdata(pdev, NULL); return 0; } diff --git a/drivers/rtc/rtc-ds1216.c b/drivers/rtc/rtc-ds1216.c index 45cd8c9f5a39..c7702b7269f7 100644 --- a/drivers/rtc/rtc-ds1216.c +++ b/drivers/rtc/rtc-ds1216.c @@ -30,8 +30,6 @@ struct ds1216_regs { struct ds1216_priv { struct rtc_device *rtc; void __iomem *ioaddr; - size_t size; - unsigned long baseaddr; }; static const u8 magic[] = { @@ -144,57 +142,33 @@ static int __init ds1216_rtc_probe(struct platform_device *pdev) { struct resource *res; struct ds1216_priv *priv; - int ret = 0; u8 dummy[8]; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) return -ENODEV; - priv = kzalloc(sizeof *priv, GFP_KERNEL); + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; platform_set_drvdata(pdev, priv); - priv->size = resource_size(res); - if (!request_mem_region(res->start, priv->size, pdev->name)) { - ret = -EBUSY; - goto out; - } - priv->baseaddr = res->start; - priv->ioaddr = ioremap(priv->baseaddr, priv->size); - if (!priv->ioaddr) { - ret = -ENOMEM; - goto out; - } - priv->rtc = rtc_device_register("ds1216", &pdev->dev, - &ds1216_rtc_ops, THIS_MODULE); - if (IS_ERR(priv->rtc)) { - ret = PTR_ERR(priv->rtc); - goto out; - } + priv->ioaddr = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(priv->ioaddr)) + return PTR_ERR(priv->ioaddr); + + priv->rtc = devm_rtc_device_register(&pdev->dev, "ds1216", + &ds1216_rtc_ops, THIS_MODULE); + if (IS_ERR(priv->rtc)) + return PTR_ERR(priv->rtc); /* dummy read to get clock into a known state */ ds1216_read(priv->ioaddr, dummy); return 0; - -out: - if (priv->ioaddr) - iounmap(priv->ioaddr); - if (priv->baseaddr) - release_mem_region(priv->baseaddr, priv->size); - kfree(priv); - return ret; } static int __exit ds1216_rtc_remove(struct platform_device *pdev) { - struct ds1216_priv *priv = platform_get_drvdata(pdev); - - rtc_device_unregister(priv->rtc); - iounmap(priv->ioaddr); - release_mem_region(priv->baseaddr, priv->size); - kfree(priv); return 0; } diff --git a/drivers/rtc/rtc-ds1286.c b/drivers/rtc/rtc-ds1286.c index d989412a348a..398c96a98fc4 100644 --- a/drivers/rtc/rtc-ds1286.c +++ b/drivers/rtc/rtc-ds1286.c @@ -25,8 +25,6 @@ struct ds1286_priv { struct rtc_device *rtc; u32 __iomem *rtcregs; - size_t size; - unsigned long baseaddr; spinlock_t lock; }; @@ -270,7 +268,6 @@ static int ds1286_set_time(struct device *dev, struct rtc_time *tm) static int ds1286_read_alarm(struct device *dev, struct rtc_wkalrm *alm) { struct ds1286_priv *priv = dev_get_drvdata(dev); - unsigned char cmd; unsigned long flags; /* @@ -281,7 +278,7 @@ static int ds1286_read_alarm(struct device *dev, struct rtc_wkalrm *alm) alm->time.tm_min = ds1286_rtc_read(priv, RTC_MINUTES_ALARM) & 0x7f; alm->time.tm_hour = ds1286_rtc_read(priv, RTC_HOURS_ALARM) & 0x1f; alm->time.tm_wday = ds1286_rtc_read(priv, RTC_DAY_ALARM) & 0x07; - cmd = ds1286_rtc_read(priv, RTC_CMD); + ds1286_rtc_read(priv, RTC_CMD); spin_unlock_irqrestore(&priv->lock, flags); alm->time.tm_min = bcd2bin(alm->time.tm_min); @@ -334,56 +331,30 @@ static int ds1286_probe(struct platform_device *pdev) struct rtc_device *rtc; struct resource *res; struct ds1286_priv *priv; - int ret = 0; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) return -ENODEV; - priv = kzalloc(sizeof(struct ds1286_priv), GFP_KERNEL); + priv = devm_kzalloc(&pdev->dev, sizeof(struct ds1286_priv), GFP_KERNEL); if (!priv) return -ENOMEM; - priv->size = resource_size(res); - if (!request_mem_region(res->start, priv->size, pdev->name)) { - ret = -EBUSY; - goto out; - } - priv->baseaddr = res->start; - priv->rtcregs = ioremap(priv->baseaddr, priv->size); - if (!priv->rtcregs) { - ret = -ENOMEM; - goto out; - } + priv->rtcregs = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(priv->rtcregs)) + return PTR_ERR(priv->rtcregs); + spin_lock_init(&priv->lock); platform_set_drvdata(pdev, priv); - rtc = rtc_device_register("ds1286", &pdev->dev, - &ds1286_ops, THIS_MODULE); - if (IS_ERR(rtc)) { - ret = PTR_ERR(rtc); - goto out; - } + rtc = devm_rtc_device_register(&pdev->dev, "ds1286", &ds1286_ops, + THIS_MODULE); + if (IS_ERR(rtc)) + return PTR_ERR(rtc); priv->rtc = rtc; return 0; - -out: - if (priv->rtc) - rtc_device_unregister(priv->rtc); - if (priv->rtcregs) - iounmap(priv->rtcregs); - if (priv->baseaddr) - release_mem_region(priv->baseaddr, priv->size); - kfree(priv); - return ret; } static int ds1286_remove(struct platform_device *pdev) { - struct ds1286_priv *priv = platform_get_drvdata(pdev); - - rtc_device_unregister(priv->rtc); - iounmap(priv->rtcregs); - release_mem_region(priv->baseaddr, priv->size); - kfree(priv); return 0; } diff --git a/drivers/rtc/rtc-ds1302.c b/drivers/rtc/rtc-ds1302.c index fdbcdb289d60..d13954346286 100644 --- a/drivers/rtc/rtc-ds1302.c +++ b/drivers/rtc/rtc-ds1302.c @@ -224,7 +224,7 @@ static int __init ds1302_rtc_probe(struct platform_device *pdev) return -ENODEV; } - rtc = rtc_device_register("ds1302", &pdev->dev, + rtc = devm_rtc_device_register(&pdev->dev, "ds1302", &ds1302_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) return PTR_ERR(rtc); @@ -234,11 +234,8 @@ static int __init ds1302_rtc_probe(struct platform_device *pdev) return 0; } -static int ds1302_rtc_remove(struct platform_device *pdev) +static int __exit ds1302_rtc_remove(struct platform_device *pdev) { - struct rtc_device *rtc = platform_get_drvdata(pdev); - - rtc_device_unregister(rtc); platform_set_drvdata(pdev, NULL); return 0; @@ -249,21 +246,10 @@ static struct platform_driver ds1302_platform_driver = { .name = DRV_NAME, .owner = THIS_MODULE, }, - .remove = ds1302_rtc_remove, + .remove = __exit_p(ds1302_rtc_remove), }; -static int __init ds1302_rtc_init(void) -{ - return platform_driver_probe(&ds1302_platform_driver, ds1302_rtc_probe); -} - -static void __exit ds1302_rtc_exit(void) -{ - platform_driver_unregister(&ds1302_platform_driver); -} - -module_init(ds1302_rtc_init); -module_exit(ds1302_rtc_exit); +module_platform_driver_probe(ds1302_platform_driver, ds1302_rtc_probe); MODULE_DESCRIPTION("Dallas DS1302 RTC driver"); MODULE_VERSION(DRV_VERSION); diff --git a/drivers/rtc/rtc-ds1305.c b/drivers/rtc/rtc-ds1305.c index b05a6dc96405..bb5f13f63630 100644 --- a/drivers/rtc/rtc-ds1305.c +++ b/drivers/rtc/rtc-ds1305.c @@ -619,7 +619,7 @@ static int ds1305_probe(struct spi_device *spi) return -EINVAL; /* set up driver data */ - ds1305 = kzalloc(sizeof *ds1305, GFP_KERNEL); + ds1305 = devm_kzalloc(&spi->dev, sizeof(*ds1305), GFP_KERNEL); if (!ds1305) return -ENOMEM; ds1305->spi = spi; @@ -632,7 +632,7 @@ static int ds1305_probe(struct spi_device *spi) if (status < 0) { dev_dbg(&spi->dev, "can't %s, %d\n", "read", status); - goto fail0; + return status; } dev_dbg(&spi->dev, "ctrl %s: %3ph\n", "read", ds1305->ctrl); @@ -644,8 +644,7 @@ static int ds1305_probe(struct spi_device *spi) */ if ((ds1305->ctrl[0] & 0x38) != 0 || (ds1305->ctrl[1] & 0xfc) != 0) { dev_dbg(&spi->dev, "RTC chip is not present\n"); - status = -ENODEV; - goto fail0; + return -ENODEV; } if (ds1305->ctrl[2] == 0) dev_dbg(&spi->dev, "chip may not be present\n"); @@ -664,7 +663,7 @@ static int ds1305_probe(struct spi_device *spi) dev_dbg(&spi->dev, "clear WP --> %d\n", status); if (status < 0) - goto fail0; + return status; } /* on DS1305, maybe start oscillator; like most low power @@ -718,7 +717,7 @@ static int ds1305_probe(struct spi_device *spi) if (status < 0) { dev_dbg(&spi->dev, "can't %s, %d\n", "write", status); - goto fail0; + return status; } dev_dbg(&spi->dev, "ctrl %s: %3ph\n", "write", ds1305->ctrl); @@ -730,7 +729,7 @@ static int ds1305_probe(struct spi_device *spi) &value, sizeof value); if (status < 0) { dev_dbg(&spi->dev, "read HOUR --> %d\n", status); - goto fail0; + return status; } ds1305->hr12 = (DS1305_HR_12 & value) != 0; @@ -738,12 +737,12 @@ static int ds1305_probe(struct spi_device *spi) dev_dbg(&spi->dev, "AM/PM\n"); /* register RTC ... from here on, ds1305->ctrl needs locking */ - ds1305->rtc = rtc_device_register("ds1305", &spi->dev, + ds1305->rtc = devm_rtc_device_register(&spi->dev, "ds1305", &ds1305_ops, THIS_MODULE); if (IS_ERR(ds1305->rtc)) { status = PTR_ERR(ds1305->rtc); dev_dbg(&spi->dev, "register rtc --> %d\n", status); - goto fail0; + return status; } /* Maybe set up alarm IRQ; be ready to handle it triggering right @@ -754,12 +753,12 @@ static int ds1305_probe(struct spi_device *spi) */ if (spi->irq) { INIT_WORK(&ds1305->work, ds1305_work); - status = request_irq(spi->irq, ds1305_irq, + status = devm_request_irq(&spi->dev, spi->irq, ds1305_irq, 0, dev_name(&ds1305->rtc->dev), ds1305); if (status < 0) { dev_dbg(&spi->dev, "request_irq %d --> %d\n", spi->irq, status); - goto fail1; + return status; } device_set_wakeup_capable(&spi->dev, 1); @@ -769,18 +768,10 @@ static int ds1305_probe(struct spi_device *spi) status = sysfs_create_bin_file(&spi->dev.kobj, &nvram); if (status < 0) { dev_dbg(&spi->dev, "register nvram --> %d\n", status); - goto fail2; + return status; } return 0; - -fail2: - free_irq(spi->irq, ds1305); -fail1: - rtc_device_unregister(ds1305->rtc); -fail0: - kfree(ds1305); - return status; } static int ds1305_remove(struct spi_device *spi) @@ -792,13 +783,11 @@ static int ds1305_remove(struct spi_device *spi) /* carefully shut down irq and workqueue, if present */ if (spi->irq) { set_bit(FLAG_EXITING, &ds1305->flags); - free_irq(spi->irq, ds1305); + devm_free_irq(&spi->dev, spi->irq, ds1305); cancel_work_sync(&ds1305->work); } - rtc_device_unregister(ds1305->rtc); spi_set_drvdata(spi, NULL); - kfree(ds1305); return 0; } diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index 970a236b147a..b53992ab3090 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -4,6 +4,7 @@ * Copyright (C) 2005 James Chapman (ds1337 core) * Copyright (C) 2006 David Brownell * Copyright (C) 2009 Matthias Fuchs (rx8025 support) + * Copyright (C) 2012 Bertrand Achard (nvram access fixes) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -196,7 +197,7 @@ static s32 ds1307_read_block_data_once(const struct i2c_client *client, static s32 ds1307_read_block_data(const struct i2c_client *client, u8 command, u8 length, u8 *values) { - u8 oldvalues[I2C_SMBUS_BLOCK_MAX]; + u8 oldvalues[255]; s32 ret; int tries = 0; @@ -222,7 +223,7 @@ static s32 ds1307_read_block_data(const struct i2c_client *client, u8 command, static s32 ds1307_write_block_data(const struct i2c_client *client, u8 command, u8 length, const u8 *values) { - u8 currvalues[I2C_SMBUS_BLOCK_MAX]; + u8 currvalues[255]; int tries = 0; dev_dbg(&client->dev, "ds1307_write_block_data (length=%d)\n", length); @@ -250,6 +251,57 @@ static s32 ds1307_write_block_data(const struct i2c_client *client, u8 command, /*----------------------------------------------------------------------*/ +/* These RTC devices are not designed to be connected to a SMbus adapter. + SMbus limits block operations length to 32 bytes, whereas it's not + limited on I2C buses. As a result, accesses may exceed 32 bytes; + in that case, split them into smaller blocks */ + +static s32 ds1307_native_smbus_write_block_data(const struct i2c_client *client, + u8 command, u8 length, const u8 *values) +{ + u8 suboffset = 0; + + if (length <= I2C_SMBUS_BLOCK_MAX) + return i2c_smbus_write_i2c_block_data(client, + command, length, values); + + while (suboffset < length) { + s32 retval = i2c_smbus_write_i2c_block_data(client, + command + suboffset, + min(I2C_SMBUS_BLOCK_MAX, length - suboffset), + values + suboffset); + if (retval < 0) + return retval; + + suboffset += I2C_SMBUS_BLOCK_MAX; + } + return length; +} + +static s32 ds1307_native_smbus_read_block_data(const struct i2c_client *client, + u8 command, u8 length, u8 *values) +{ + u8 suboffset = 0; + + if (length <= I2C_SMBUS_BLOCK_MAX) + return i2c_smbus_read_i2c_block_data(client, + command, length, values); + + while (suboffset < length) { + s32 retval = i2c_smbus_read_i2c_block_data(client, + command + suboffset, + min(I2C_SMBUS_BLOCK_MAX, length - suboffset), + values + suboffset); + if (retval < 0) + return retval; + + suboffset += I2C_SMBUS_BLOCK_MAX; + } + return length; +} + +/*----------------------------------------------------------------------*/ + /* * The IRQ logic includes a "real" handler running in IRQ context just * long enough to schedule this workqueue entry. We need a task context @@ -646,8 +698,8 @@ static int ds1307_probe(struct i2c_client *client, buf = ds1307->regs; if (i2c_check_functionality(adapter, I2C_FUNC_SMBUS_I2C_BLOCK)) { - ds1307->read_block_data = i2c_smbus_read_i2c_block_data; - ds1307->write_block_data = i2c_smbus_write_i2c_block_data; + ds1307->read_block_data = ds1307_native_smbus_read_block_data; + ds1307->write_block_data = ds1307_native_smbus_write_block_data; } else { ds1307->read_block_data = ds1307_read_block_data; ds1307->write_block_data = ds1307_write_block_data; @@ -661,7 +713,7 @@ static int ds1307_probe(struct i2c_client *client, tmp = ds1307->read_block_data(ds1307->client, DS1337_REG_CONTROL, 2, buf); if (tmp != 2) { - pr_debug("read error %d\n", tmp); + dev_dbg(&client->dev, "read error %d\n", tmp); err = -EIO; goto exit_free; } @@ -700,7 +752,7 @@ static int ds1307_probe(struct i2c_client *client, tmp = i2c_smbus_read_i2c_block_data(ds1307->client, RX8025_REG_CTRL1 << 4 | 0x08, 2, buf); if (tmp != 2) { - pr_debug("read error %d\n", tmp); + dev_dbg(&client->dev, "read error %d\n", tmp); err = -EIO; goto exit_free; } @@ -744,7 +796,7 @@ static int ds1307_probe(struct i2c_client *client, tmp = i2c_smbus_read_i2c_block_data(ds1307->client, RX8025_REG_CTRL1 << 4 | 0x08, 2, buf); if (tmp != 2) { - pr_debug("read error %d\n", tmp); + dev_dbg(&client->dev, "read error %d\n", tmp); err = -EIO; goto exit_free; } @@ -772,7 +824,7 @@ read_rtc: /* read RTC registers */ tmp = ds1307->read_block_data(ds1307->client, ds1307->offset, 8, buf); if (tmp != 8) { - pr_debug("read error %d\n", tmp); + dev_dbg(&client->dev, "read error %d\n", tmp); err = -EIO; goto exit_free; } @@ -814,7 +866,7 @@ read_rtc: tmp = i2c_smbus_read_byte_data(client, DS1340_REG_FLAG); if (tmp < 0) { - pr_debug("read error %d\n", tmp); + dev_dbg(&client->dev, "read error %d\n", tmp); err = -EIO; goto exit_free; } @@ -908,8 +960,8 @@ read_rtc: ds1307->nvram->attr.name = "nvram"; ds1307->nvram->attr.mode = S_IRUGO | S_IWUSR; sysfs_bin_attr_init(ds1307->nvram); - ds1307->nvram->read = ds1307_nvram_read, - ds1307->nvram->write = ds1307_nvram_write, + ds1307->nvram->read = ds1307_nvram_read; + ds1307->nvram->write = ds1307_nvram_write; ds1307->nvram->size = chip->nvram_size; ds1307->nvram_offset = chip->nvram_offset; err = sysfs_create_bin_file(&client->dev.kobj, ds1307->nvram); diff --git a/drivers/rtc/rtc-ds1374.c b/drivers/rtc/rtc-ds1374.c index fef76868aae0..94366e12f40f 100644 --- a/drivers/rtc/rtc-ds1374.c +++ b/drivers/rtc/rtc-ds1374.c @@ -347,7 +347,7 @@ static int ds1374_probe(struct i2c_client *client, struct ds1374 *ds1374; int ret; - ds1374 = kzalloc(sizeof(struct ds1374), GFP_KERNEL); + ds1374 = devm_kzalloc(&client->dev, sizeof(struct ds1374), GFP_KERNEL); if (!ds1374) return -ENOMEM; @@ -359,36 +359,27 @@ static int ds1374_probe(struct i2c_client *client, ret = ds1374_check_rtc_status(client); if (ret) - goto out_free; + return ret; if (client->irq > 0) { - ret = request_irq(client->irq, ds1374_irq, 0, + ret = devm_request_irq(&client->dev, client->irq, ds1374_irq, 0, "ds1374", client); if (ret) { dev_err(&client->dev, "unable to request IRQ\n"); - goto out_free; + return ret; } device_set_wakeup_capable(&client->dev, 1); } - ds1374->rtc = rtc_device_register(client->name, &client->dev, + ds1374->rtc = devm_rtc_device_register(&client->dev, client->name, &ds1374_rtc_ops, THIS_MODULE); if (IS_ERR(ds1374->rtc)) { - ret = PTR_ERR(ds1374->rtc); dev_err(&client->dev, "unable to register the class device\n"); - goto out_irq; + return PTR_ERR(ds1374->rtc); } return 0; - -out_irq: - if (client->irq > 0) - free_irq(client->irq, client); - -out_free: - kfree(ds1374); - return ret; } static int ds1374_remove(struct i2c_client *client) @@ -400,16 +391,14 @@ static int ds1374_remove(struct i2c_client *client) ds1374->exiting = 1; mutex_unlock(&ds1374->mutex); - free_irq(client->irq, client); + devm_free_irq(&client->dev, client->irq, client); cancel_work_sync(&ds1374->work); } - rtc_device_unregister(ds1374->rtc); - kfree(ds1374); return 0; } -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP static int ds1374_suspend(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); @@ -427,19 +416,15 @@ static int ds1374_resume(struct device *dev) disable_irq_wake(client->irq); return 0; } +#endif static SIMPLE_DEV_PM_OPS(ds1374_pm, ds1374_suspend, ds1374_resume); -#define DS1374_PM (&ds1374_pm) -#else -#define DS1374_PM NULL -#endif - static struct i2c_driver ds1374_driver = { .driver = { .name = "rtc-ds1374", .owner = THIS_MODULE, - .pm = DS1374_PM, + .pm = &ds1374_pm, }, .probe = ds1374_probe, .remove = ds1374_remove, diff --git a/drivers/rtc/rtc-ds1390.c b/drivers/rtc/rtc-ds1390.c index f994257981a0..289af419dff4 100644 --- a/drivers/rtc/rtc-ds1390.c +++ b/drivers/rtc/rtc-ds1390.c @@ -131,26 +131,24 @@ static int ds1390_probe(struct spi_device *spi) spi->bits_per_word = 8; spi_setup(spi); - chip = kzalloc(sizeof *chip, GFP_KERNEL); + chip = devm_kzalloc(&spi->dev, sizeof(*chip), GFP_KERNEL); if (!chip) { dev_err(&spi->dev, "unable to allocate device memory\n"); return -ENOMEM; } - dev_set_drvdata(&spi->dev, chip); + spi_set_drvdata(spi, chip); res = ds1390_get_reg(&spi->dev, DS1390_REG_SECONDS, &tmp); if (res != 0) { dev_err(&spi->dev, "unable to read device\n"); - kfree(chip); return res; } - chip->rtc = rtc_device_register("ds1390", - &spi->dev, &ds1390_rtc_ops, THIS_MODULE); + chip->rtc = devm_rtc_device_register(&spi->dev, "ds1390", + &ds1390_rtc_ops, THIS_MODULE); if (IS_ERR(chip->rtc)) { dev_err(&spi->dev, "unable to register device\n"); res = PTR_ERR(chip->rtc); - kfree(chip); } return res; @@ -158,11 +156,6 @@ static int ds1390_probe(struct spi_device *spi) static int ds1390_remove(struct spi_device *spi) { - struct ds1390 *chip = spi_get_drvdata(spi); - - rtc_device_unregister(chip->rtc); - kfree(chip); - return 0; } diff --git a/drivers/rtc/rtc-ds1511.c b/drivers/rtc/rtc-ds1511.c index 6a3fcfe3b0e7..6ce8a997cf51 100644 --- a/drivers/rtc/rtc-ds1511.c +++ b/drivers/rtc/rtc-ds1511.c @@ -538,15 +538,14 @@ static int ds1511_rtc_probe(struct platform_device *pdev) } } - rtc = rtc_device_register(pdev->name, &pdev->dev, &ds1511_rtc_ops, - THIS_MODULE); + rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &ds1511_rtc_ops, + THIS_MODULE); if (IS_ERR(rtc)) return PTR_ERR(rtc); pdata->rtc = rtc; ret = sysfs_create_bin_file(&pdev->dev.kobj, &ds1511_nvram_attr); - if (ret) - rtc_device_unregister(pdata->rtc); + return ret; } @@ -555,7 +554,6 @@ static int ds1511_rtc_remove(struct platform_device *pdev) struct rtc_plat_data *pdata = platform_get_drvdata(pdev); sysfs_remove_bin_file(&pdev->dev.kobj, &ds1511_nvram_attr); - rtc_device_unregister(pdata->rtc); if (pdata->irq > 0) { /* * disable the alarm interrupt diff --git a/drivers/rtc/rtc-ds1553.c b/drivers/rtc/rtc-ds1553.c index 25ce0621ade9..8c6c952e90b1 100644 --- a/drivers/rtc/rtc-ds1553.c +++ b/drivers/rtc/rtc-ds1553.c @@ -326,15 +326,14 @@ static int ds1553_rtc_probe(struct platform_device *pdev) } } - rtc = rtc_device_register(pdev->name, &pdev->dev, + rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &ds1553_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) return PTR_ERR(rtc); pdata->rtc = rtc; ret = sysfs_create_bin_file(&pdev->dev.kobj, &ds1553_nvram_attr); - if (ret) - rtc_device_unregister(rtc); + return ret; } @@ -343,7 +342,6 @@ static int ds1553_rtc_remove(struct platform_device *pdev) struct rtc_plat_data *pdata = platform_get_drvdata(pdev); sysfs_remove_bin_file(&pdev->dev.kobj, &ds1553_nvram_attr); - rtc_device_unregister(pdata->rtc); if (pdata->irq > 0) writeb(0, pdata->ioaddr + RTC_INTERRUPTS); return 0; diff --git a/drivers/rtc/rtc-ds1672.c b/drivers/rtc/rtc-ds1672.c index 45d65c0b3a85..3fc2a4738027 100644 --- a/drivers/rtc/rtc-ds1672.c +++ b/drivers/rtc/rtc-ds1672.c @@ -155,11 +155,6 @@ static const struct rtc_class_ops ds1672_rtc_ops = { static int ds1672_remove(struct i2c_client *client) { - struct rtc_device *rtc = i2c_get_clientdata(client); - - if (rtc) - rtc_device_unregister(rtc); - return 0; } @@ -177,7 +172,7 @@ static int ds1672_probe(struct i2c_client *client, dev_info(&client->dev, "chip found, driver version " DRV_VERSION "\n"); - rtc = rtc_device_register(ds1672_driver.driver.name, &client->dev, + rtc = devm_rtc_device_register(&client->dev, ds1672_driver.driver.name, &ds1672_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) @@ -202,7 +197,6 @@ static int ds1672_probe(struct i2c_client *client, return 0; exit_devreg: - rtc_device_unregister(rtc); return err; } diff --git a/drivers/rtc/rtc-ds1742.c b/drivers/rtc/rtc-ds1742.c index 609c870e2cc5..eccdc62ae1c0 100644 --- a/drivers/rtc/rtc-ds1742.c +++ b/drivers/rtc/rtc-ds1742.c @@ -208,17 +208,14 @@ static int ds1742_rtc_probe(struct platform_device *pdev) pdata->last_jiffies = jiffies; platform_set_drvdata(pdev, pdata); - rtc = rtc_device_register(pdev->name, &pdev->dev, + rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &ds1742_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) return PTR_ERR(rtc); pdata->rtc = rtc; ret = sysfs_create_bin_file(&pdev->dev.kobj, &pdata->nvram_attr); - if (ret) { - dev_err(&pdev->dev, "creating nvram file in sysfs failed\n"); - rtc_device_unregister(rtc); - } + return ret; } @@ -227,7 +224,6 @@ static int ds1742_rtc_remove(struct platform_device *pdev) struct rtc_plat_data *pdata = platform_get_drvdata(pdev); sysfs_remove_bin_file(&pdev->dev.kobj, &pdata->nvram_attr); - rtc_device_unregister(pdata->rtc); return 0; } diff --git a/drivers/rtc/rtc-ds2404.c b/drivers/rtc/rtc-ds2404.c index b04fc4272fb3..2ca5a23aba8a 100644 --- a/drivers/rtc/rtc-ds2404.c +++ b/drivers/rtc/rtc-ds2404.c @@ -228,7 +228,7 @@ static int rtc_probe(struct platform_device *pdev) struct ds2404 *chip; int retval = -EBUSY; - chip = kzalloc(sizeof(struct ds2404), GFP_KERNEL); + chip = devm_kzalloc(&pdev->dev, sizeof(struct ds2404), GFP_KERNEL); if (!chip) return -ENOMEM; @@ -244,8 +244,8 @@ static int rtc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, chip); - chip->rtc = rtc_device_register("ds2404", - &pdev->dev, &ds2404_rtc_ops, THIS_MODULE); + chip->rtc = devm_rtc_device_register(&pdev->dev, "ds2404", + &ds2404_rtc_ops, THIS_MODULE); if (IS_ERR(chip->rtc)) { retval = PTR_ERR(chip->rtc); goto err_io; @@ -257,20 +257,14 @@ static int rtc_probe(struct platform_device *pdev) err_io: chip->ops->unmap_io(chip); err_chip: - kfree(chip); return retval; } static int rtc_remove(struct platform_device *dev) { struct ds2404 *chip = platform_get_drvdata(dev); - struct rtc_device *rtc = chip->rtc; - - if (rtc) - rtc_device_unregister(rtc); chip->ops->unmap_io(chip); - kfree(chip); return 0; } diff --git a/drivers/rtc/rtc-ds3232.c b/drivers/rtc/rtc-ds3232.c index db0ca08db315..b83bb5a527f8 100644 --- a/drivers/rtc/rtc-ds3232.c +++ b/drivers/rtc/rtc-ds3232.c @@ -397,7 +397,7 @@ static int ds3232_probe(struct i2c_client *client, struct ds3232 *ds3232; int ret; - ds3232 = kzalloc(sizeof(struct ds3232), GFP_KERNEL); + ds3232 = devm_kzalloc(&client->dev, sizeof(struct ds3232), GFP_KERNEL); if (!ds3232) return -ENOMEM; @@ -409,34 +409,25 @@ static int ds3232_probe(struct i2c_client *client, ret = ds3232_check_rtc_status(client); if (ret) - goto out_free; + return ret; - ds3232->rtc = rtc_device_register(client->name, &client->dev, + ds3232->rtc = devm_rtc_device_register(&client->dev, client->name, &ds3232_rtc_ops, THIS_MODULE); if (IS_ERR(ds3232->rtc)) { - ret = PTR_ERR(ds3232->rtc); dev_err(&client->dev, "unable to register the class device\n"); - goto out_irq; + return PTR_ERR(ds3232->rtc); } if (client->irq >= 0) { - ret = request_irq(client->irq, ds3232_irq, 0, + ret = devm_request_irq(&client->dev, client->irq, ds3232_irq, 0, "ds3232", client); if (ret) { dev_err(&client->dev, "unable to request IRQ\n"); - goto out_free; + return ret; } } return 0; - -out_irq: - if (client->irq >= 0) - free_irq(client->irq, client); - -out_free: - kfree(ds3232); - return ret; } static int ds3232_remove(struct i2c_client *client) @@ -448,12 +439,10 @@ static int ds3232_remove(struct i2c_client *client) ds3232->exiting = 1; mutex_unlock(&ds3232->mutex); - free_irq(client->irq, client); + devm_free_irq(&client->dev, client->irq, client); cancel_work_sync(&ds3232->work); } - rtc_device_unregister(ds3232->rtc); - kfree(ds3232); return 0; } diff --git a/drivers/rtc/rtc-ds3234.c b/drivers/rtc/rtc-ds3234.c index 7a4495ef1c39..ba98c0e9580d 100644 --- a/drivers/rtc/rtc-ds3234.c +++ b/drivers/rtc/rtc-ds3234.c @@ -146,21 +146,18 @@ static int ds3234_probe(struct spi_device *spi) ds3234_get_reg(&spi->dev, DS3234_REG_CONT_STAT, &tmp); dev_info(&spi->dev, "Ctrl/Stat Reg: 0x%02x\n", tmp); - rtc = rtc_device_register("ds3234", - &spi->dev, &ds3234_rtc_ops, THIS_MODULE); + rtc = devm_rtc_device_register(&spi->dev, "ds3234", + &ds3234_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) return PTR_ERR(rtc); - dev_set_drvdata(&spi->dev, rtc); + spi_set_drvdata(spi, rtc); return 0; } static int ds3234_remove(struct spi_device *spi) { - struct rtc_device *rtc = spi_get_drvdata(spi); - - rtc_device_unregister(rtc); return 0; } diff --git a/drivers/rtc/rtc-efi.c b/drivers/rtc/rtc-efi.c index 1a0c37c9152b..b3c8c0b1709d 100644 --- a/drivers/rtc/rtc-efi.c +++ b/drivers/rtc/rtc-efi.c @@ -191,7 +191,7 @@ static int __init efi_rtc_probe(struct platform_device *dev) { struct rtc_device *rtc; - rtc = rtc_device_register("rtc-efi", &dev->dev, &efi_rtc_ops, + rtc = devm_rtc_device_register(&dev->dev, "rtc-efi", &efi_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) return PTR_ERR(rtc); @@ -203,10 +203,6 @@ static int __init efi_rtc_probe(struct platform_device *dev) static int __exit efi_rtc_remove(struct platform_device *dev) { - struct rtc_device *rtc = platform_get_drvdata(dev); - - rtc_device_unregister(rtc); - return 0; } @@ -218,18 +214,7 @@ static struct platform_driver efi_rtc_driver = { .remove = __exit_p(efi_rtc_remove), }; -static int __init efi_rtc_init(void) -{ - return platform_driver_probe(&efi_rtc_driver, efi_rtc_probe); -} - -static void __exit efi_rtc_exit(void) -{ - platform_driver_unregister(&efi_rtc_driver); -} - -module_init(efi_rtc_init); -module_exit(efi_rtc_exit); +module_platform_driver_probe(efi_rtc_driver, efi_rtc_probe); MODULE_AUTHOR("dann frazier <dannf@hp.com>"); MODULE_LICENSE("GPL"); diff --git a/drivers/rtc/rtc-em3027.c b/drivers/rtc/rtc-em3027.c index f6c24ce35d36..3f9eb57d0486 100644 --- a/drivers/rtc/rtc-em3027.c +++ b/drivers/rtc/rtc-em3027.c @@ -121,7 +121,7 @@ static int em3027_probe(struct i2c_client *client, if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) return -ENODEV; - rtc = rtc_device_register(em3027_driver.driver.name, &client->dev, + rtc = devm_rtc_device_register(&client->dev, em3027_driver.driver.name, &em3027_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) return PTR_ERR(rtc); @@ -133,11 +133,6 @@ static int em3027_probe(struct i2c_client *client, static int em3027_remove(struct i2c_client *client) { - struct rtc_device *rtc = i2c_get_clientdata(client); - - if (rtc) - rtc_device_unregister(rtc); - return 0; } diff --git a/drivers/rtc/rtc-ep93xx.c b/drivers/rtc/rtc-ep93xx.c index 1a4e5e4a70cd..5807b77c444a 100644 --- a/drivers/rtc/rtc-ep93xx.c +++ b/drivers/rtc/rtc-ep93xx.c @@ -153,8 +153,8 @@ static int ep93xx_rtc_probe(struct platform_device *pdev) pdev->dev.platform_data = ep93xx_rtc; platform_set_drvdata(pdev, ep93xx_rtc); - ep93xx_rtc->rtc = rtc_device_register(pdev->name, - &pdev->dev, &ep93xx_rtc_ops, THIS_MODULE); + ep93xx_rtc->rtc = devm_rtc_device_register(&pdev->dev, + pdev->name, &ep93xx_rtc_ops, THIS_MODULE); if (IS_ERR(ep93xx_rtc->rtc)) { err = PTR_ERR(ep93xx_rtc->rtc); goto exit; @@ -162,12 +162,10 @@ static int ep93xx_rtc_probe(struct platform_device *pdev) err = sysfs_create_group(&pdev->dev.kobj, &ep93xx_rtc_sysfs_files); if (err) - goto fail; + goto exit; return 0; -fail: - rtc_device_unregister(ep93xx_rtc->rtc); exit: platform_set_drvdata(pdev, NULL); pdev->dev.platform_data = NULL; @@ -176,11 +174,8 @@ exit: static int ep93xx_rtc_remove(struct platform_device *pdev) { - struct ep93xx_rtc *ep93xx_rtc = platform_get_drvdata(pdev); - sysfs_remove_group(&pdev->dev.kobj, &ep93xx_rtc_sysfs_files); platform_set_drvdata(pdev, NULL); - rtc_device_unregister(ep93xx_rtc->rtc); pdev->dev.platform_data = NULL; return 0; diff --git a/drivers/rtc/rtc-fm3130.c b/drivers/rtc/rtc-fm3130.c index bff3cdc5140e..2835fb6c1965 100644 --- a/drivers/rtc/rtc-fm3130.c +++ b/drivers/rtc/rtc-fm3130.c @@ -358,7 +358,7 @@ static int fm3130_probe(struct i2c_client *client, I2C_FUNC_I2C | I2C_FUNC_SMBUS_WRITE_BYTE_DATA)) return -EIO; - fm3130 = kzalloc(sizeof(struct fm3130), GFP_KERNEL); + fm3130 = devm_kzalloc(&client->dev, sizeof(struct fm3130), GFP_KERNEL); if (!fm3130) return -ENOMEM; @@ -395,7 +395,7 @@ static int fm3130_probe(struct i2c_client *client, tmp = i2c_transfer(adapter, fm3130->msg, 4); if (tmp != 4) { - pr_debug("read error %d\n", tmp); + dev_dbg(&client->dev, "read error %d\n", tmp); err = -EIO; goto exit_free; } @@ -507,7 +507,7 @@ bad_clock: /* We won't bail out here because we just got invalid data. Time setting from u-boot doesn't work anyway */ - fm3130->rtc = rtc_device_register(client->name, &client->dev, + fm3130->rtc = devm_rtc_device_register(&client->dev, client->name, &fm3130_rtc_ops, THIS_MODULE); if (IS_ERR(fm3130->rtc)) { err = PTR_ERR(fm3130->rtc); @@ -517,16 +517,11 @@ bad_clock: } return 0; exit_free: - kfree(fm3130); return err; } static int fm3130_remove(struct i2c_client *client) { - struct fm3130 *fm3130 = i2c_get_clientdata(client); - - rtc_device_unregister(fm3130->rtc); - kfree(fm3130); return 0; } diff --git a/drivers/rtc/rtc-generic.c b/drivers/rtc/rtc-generic.c index 98322004ad2e..06279ce6bff2 100644 --- a/drivers/rtc/rtc-generic.c +++ b/drivers/rtc/rtc-generic.c @@ -38,8 +38,8 @@ static int __init generic_rtc_probe(struct platform_device *dev) { struct rtc_device *rtc; - rtc = rtc_device_register("rtc-generic", &dev->dev, &generic_rtc_ops, - THIS_MODULE); + rtc = devm_rtc_device_register(&dev->dev, "rtc-generic", + &generic_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) return PTR_ERR(rtc); @@ -50,10 +50,6 @@ static int __init generic_rtc_probe(struct platform_device *dev) static int __exit generic_rtc_remove(struct platform_device *dev) { - struct rtc_device *rtc = platform_get_drvdata(dev); - - rtc_device_unregister(rtc); - return 0; } @@ -65,18 +61,7 @@ static struct platform_driver generic_rtc_driver = { .remove = __exit_p(generic_rtc_remove), }; -static int __init generic_rtc_init(void) -{ - return platform_driver_probe(&generic_rtc_driver, generic_rtc_probe); -} - -static void __exit generic_rtc_fini(void) -{ - platform_driver_unregister(&generic_rtc_driver); -} - -module_init(generic_rtc_init); -module_exit(generic_rtc_fini); +module_platform_driver_probe(generic_rtc_driver, generic_rtc_probe); MODULE_AUTHOR("Kyle McMartin <kyle@mcmartin.ca>"); MODULE_LICENSE("GPL"); diff --git a/drivers/rtc/rtc-hid-sensor-time.c b/drivers/rtc/rtc-hid-sensor-time.c index 31c5728ef629..63024505dddc 100644 --- a/drivers/rtc/rtc-hid-sensor-time.c +++ b/drivers/rtc/rtc-hid-sensor-time.c @@ -255,8 +255,9 @@ static int hid_time_probe(struct platform_device *pdev) return ret; } - time_state->rtc = rtc_device_register("hid-sensor-time", - &pdev->dev, &hid_time_rtc_ops, THIS_MODULE); + time_state->rtc = devm_rtc_device_register(&pdev->dev, + "hid-sensor-time", &hid_time_rtc_ops, + THIS_MODULE); if (IS_ERR(time_state->rtc)) { dev_err(&pdev->dev, "rtc device register failed!\n"); @@ -269,9 +270,7 @@ static int hid_time_probe(struct platform_device *pdev) static int hid_time_remove(struct platform_device *pdev) { struct hid_sensor_hub_device *hsdev = pdev->dev.platform_data; - struct hid_time_state *time_state = platform_get_drvdata(pdev); - rtc_device_unregister(time_state->rtc); sensor_hub_remove_callback(hsdev, HID_USAGE_SENSOR_TIME); return 0; diff --git a/drivers/rtc/rtc-imxdi.c b/drivers/rtc/rtc-imxdi.c index 82aad695979e..d3a8c8e255de 100644 --- a/drivers/rtc/rtc-imxdi.c +++ b/drivers/rtc/rtc-imxdi.c @@ -369,7 +369,7 @@ static void dryice_work(struct work_struct *work) /* * probe for dryice rtc device */ -static int dryice_rtc_probe(struct platform_device *pdev) +static int __init dryice_rtc_probe(struct platform_device *pdev) { struct resource *res; struct imxdi_dev *imxdi; @@ -464,7 +464,7 @@ static int dryice_rtc_probe(struct platform_device *pdev) } platform_set_drvdata(pdev, imxdi); - imxdi->rtc = rtc_device_register(pdev->name, &pdev->dev, + imxdi->rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &dryice_rtc_ops, THIS_MODULE); if (IS_ERR(imxdi->rtc)) { rc = PTR_ERR(imxdi->rtc); @@ -479,7 +479,7 @@ err: return rc; } -static int dryice_rtc_remove(struct platform_device *pdev) +static int __exit dryice_rtc_remove(struct platform_device *pdev) { struct imxdi_dev *imxdi = platform_get_drvdata(pdev); @@ -488,8 +488,6 @@ static int dryice_rtc_remove(struct platform_device *pdev) /* mask all interrupts */ __raw_writel(0, imxdi->ioaddr + DIER); - rtc_device_unregister(imxdi->rtc); - clk_disable_unprepare(imxdi->clk); return 0; @@ -510,21 +508,10 @@ static struct platform_driver dryice_rtc_driver = { .owner = THIS_MODULE, .of_match_table = of_match_ptr(dryice_dt_ids), }, - .remove = dryice_rtc_remove, + .remove = __exit_p(dryice_rtc_remove), }; -static int __init dryice_rtc_init(void) -{ - return platform_driver_probe(&dryice_rtc_driver, dryice_rtc_probe); -} - -static void __exit dryice_rtc_exit(void) -{ - platform_driver_unregister(&dryice_rtc_driver); -} - -module_init(dryice_rtc_init); -module_exit(dryice_rtc_exit); +module_platform_driver_probe(dryice_rtc_driver, dryice_rtc_probe); MODULE_AUTHOR("Freescale Semiconductor, Inc."); MODULE_AUTHOR("Baruch Siach <baruch@tkos.co.il>"); diff --git a/drivers/rtc/rtc-isl12022.c b/drivers/rtc/rtc-isl12022.c index 6b4298ea683d..1af0aa5cbd27 100644 --- a/drivers/rtc/rtc-isl12022.c +++ b/drivers/rtc/rtc-isl12022.c @@ -252,12 +252,11 @@ static int isl12022_probe(struct i2c_client *client, { struct isl12022 *isl12022; - int ret = 0; - if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) return -ENODEV; - isl12022 = kzalloc(sizeof(struct isl12022), GFP_KERNEL); + isl12022 = devm_kzalloc(&client->dev, sizeof(struct isl12022), + GFP_KERNEL); if (!isl12022) return -ENOMEM; @@ -265,31 +264,17 @@ static int isl12022_probe(struct i2c_client *client, i2c_set_clientdata(client, isl12022); - isl12022->rtc = rtc_device_register(isl12022_driver.driver.name, - &client->dev, - &isl12022_rtc_ops, - THIS_MODULE); - - if (IS_ERR(isl12022->rtc)) { - ret = PTR_ERR(isl12022->rtc); - goto exit_kfree; - } + isl12022->rtc = devm_rtc_device_register(&client->dev, + isl12022_driver.driver.name, + &isl12022_rtc_ops, THIS_MODULE); + if (IS_ERR(isl12022->rtc)) + return PTR_ERR(isl12022->rtc); return 0; - -exit_kfree: - kfree(isl12022); - - return ret; } static int isl12022_remove(struct i2c_client *client) { - struct isl12022 *isl12022 = i2c_get_clientdata(client); - - rtc_device_unregister(isl12022->rtc); - kfree(isl12022); - return 0; } diff --git a/drivers/rtc/rtc-lp8788.c b/drivers/rtc/rtc-lp8788.c index 9a4631218f41..9853ac15b296 100644 --- a/drivers/rtc/rtc-lp8788.c +++ b/drivers/rtc/rtc-lp8788.c @@ -299,7 +299,7 @@ static int lp8788_rtc_probe(struct platform_device *pdev) device_init_wakeup(dev, 1); - rtc->rdev = rtc_device_register("lp8788_rtc", dev, + rtc->rdev = devm_rtc_device_register(dev, "lp8788_rtc", &lp8788_rtc_ops, THIS_MODULE); if (IS_ERR(rtc->rdev)) { dev_err(dev, "can not register rtc device\n"); @@ -314,9 +314,6 @@ static int lp8788_rtc_probe(struct platform_device *pdev) static int lp8788_rtc_remove(struct platform_device *pdev) { - struct lp8788_rtc *rtc = platform_get_drvdata(pdev); - - rtc_device_unregister(rtc->rdev); platform_set_drvdata(pdev, NULL); return 0; diff --git a/drivers/rtc/rtc-lpc32xx.c b/drivers/rtc/rtc-lpc32xx.c index 40a598332bac..787550d756e9 100644 --- a/drivers/rtc/rtc-lpc32xx.c +++ b/drivers/rtc/rtc-lpc32xx.c @@ -273,8 +273,8 @@ static int lpc32xx_rtc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, rtc); - rtc->rtc = rtc_device_register(RTC_NAME, &pdev->dev, &lpc32xx_rtc_ops, - THIS_MODULE); + rtc->rtc = devm_rtc_device_register(&pdev->dev, RTC_NAME, + &lpc32xx_rtc_ops, THIS_MODULE); if (IS_ERR(rtc->rtc)) { dev_err(&pdev->dev, "Can't get RTC\n"); platform_set_drvdata(pdev, NULL); @@ -307,7 +307,6 @@ static int lpc32xx_rtc_remove(struct platform_device *pdev) device_init_wakeup(&pdev->dev, 0); platform_set_drvdata(pdev, NULL); - rtc_device_unregister(rtc->rtc); return 0; } diff --git a/drivers/rtc/rtc-ls1x.c b/drivers/rtc/rtc-ls1x.c index f59b6349551a..db82f91f4562 100644 --- a/drivers/rtc/rtc-ls1x.c +++ b/drivers/rtc/rtc-ls1x.c @@ -172,7 +172,7 @@ static int ls1x_rtc_probe(struct platform_device *pdev) while (readl(SYS_COUNTER_CNTRL) & SYS_CNTRL_TTS) usleep_range(1000, 3000); - rtcdev = rtc_device_register("ls1x-rtc", &pdev->dev, + rtcdev = devm_rtc_device_register(&pdev->dev, "ls1x-rtc", &ls1x_rtc_ops , THIS_MODULE); if (IS_ERR(rtcdev)) { ret = PTR_ERR(rtcdev); @@ -187,9 +187,6 @@ err: static int ls1x_rtc_remove(struct platform_device *pdev) { - struct rtc_device *rtcdev = platform_get_drvdata(pdev); - - rtc_device_unregister(rtcdev); platform_set_drvdata(pdev, NULL); return 0; diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c index b885bcd08908..89674b5e6efd 100644 --- a/drivers/rtc/rtc-m41t80.c +++ b/drivers/rtc/rtc-m41t80.c @@ -637,7 +637,8 @@ static int m41t80_probe(struct i2c_client *client, dev_info(&client->dev, "chip found, driver version " DRV_VERSION "\n"); - clientdata = kzalloc(sizeof(*clientdata), GFP_KERNEL); + clientdata = devm_kzalloc(&client->dev, sizeof(*clientdata), + GFP_KERNEL); if (!clientdata) { rc = -ENOMEM; goto exit; @@ -646,8 +647,8 @@ static int m41t80_probe(struct i2c_client *client, clientdata->features = id->driver_data; i2c_set_clientdata(client, clientdata); - rtc = rtc_device_register(client->name, &client->dev, - &m41t80_rtc_ops, THIS_MODULE); + rtc = devm_rtc_device_register(&client->dev, client->name, + &m41t80_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) { rc = PTR_ERR(rtc); rtc = NULL; @@ -718,26 +719,19 @@ ht_err: goto exit; exit: - if (rtc) - rtc_device_unregister(rtc); - kfree(clientdata); return rc; } static int m41t80_remove(struct i2c_client *client) { +#ifdef CONFIG_RTC_DRV_M41T80_WDT struct m41t80_data *clientdata = i2c_get_clientdata(client); - struct rtc_device *rtc = clientdata->rtc; -#ifdef CONFIG_RTC_DRV_M41T80_WDT if (clientdata->features & M41T80_FEATURE_HT) { misc_deregister(&wdt_dev); unregister_reboot_notifier(&wdt_notifier); } #endif - if (rtc) - rtc_device_unregister(rtc); - kfree(clientdata); return 0; } diff --git a/drivers/rtc/rtc-m41t93.c b/drivers/rtc/rtc-m41t93.c index 49169680786e..9707d36e8b15 100644 --- a/drivers/rtc/rtc-m41t93.c +++ b/drivers/rtc/rtc-m41t93.c @@ -184,12 +184,12 @@ static int m41t93_probe(struct spi_device *spi) return -ENODEV; } - rtc = rtc_device_register(m41t93_driver.driver.name, - &spi->dev, &m41t93_rtc_ops, THIS_MODULE); + rtc = devm_rtc_device_register(&spi->dev, m41t93_driver.driver.name, + &m41t93_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) return PTR_ERR(rtc); - dev_set_drvdata(&spi->dev, rtc); + spi_set_drvdata(spi, rtc); return 0; } @@ -197,11 +197,6 @@ static int m41t93_probe(struct spi_device *spi) static int m41t93_remove(struct spi_device *spi) { - struct rtc_device *rtc = spi_get_drvdata(spi); - - if (rtc) - rtc_device_unregister(rtc); - return 0; } diff --git a/drivers/rtc/rtc-m41t94.c b/drivers/rtc/rtc-m41t94.c index 89266c6764bc..7454ef0a4cfa 100644 --- a/drivers/rtc/rtc-m41t94.c +++ b/drivers/rtc/rtc-m41t94.c @@ -124,23 +124,18 @@ static int m41t94_probe(struct spi_device *spi) return res; } - rtc = rtc_device_register(m41t94_driver.driver.name, - &spi->dev, &m41t94_rtc_ops, THIS_MODULE); + rtc = devm_rtc_device_register(&spi->dev, m41t94_driver.driver.name, + &m41t94_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) return PTR_ERR(rtc); - dev_set_drvdata(&spi->dev, rtc); + spi_set_drvdata(spi, rtc); return 0; } static int m41t94_remove(struct spi_device *spi) { - struct rtc_device *rtc = spi_get_drvdata(spi); - - if (rtc) - rtc_device_unregister(rtc); - return 0; } diff --git a/drivers/rtc/rtc-m48t35.c b/drivers/rtc/rtc-m48t35.c index 31c9190a1fcb..37444246e5e4 100644 --- a/drivers/rtc/rtc-m48t35.c +++ b/drivers/rtc/rtc-m48t35.c @@ -145,12 +145,11 @@ static int m48t35_probe(struct platform_device *pdev) { struct resource *res; struct m48t35_priv *priv; - int ret = 0; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) return -ENODEV; - priv = kzalloc(sizeof(struct m48t35_priv), GFP_KERNEL); + priv = devm_kzalloc(&pdev->dev, sizeof(struct m48t35_priv), GFP_KERNEL); if (!priv) return -ENOMEM; @@ -160,50 +159,29 @@ static int m48t35_probe(struct platform_device *pdev) * conflicts are resolved */ #ifndef CONFIG_SGI_IP27 - if (!request_mem_region(res->start, priv->size, pdev->name)) { - ret = -EBUSY; - goto out; - } + if (!devm_request_mem_region(&pdev->dev, res->start, priv->size, + pdev->name)) + return -EBUSY; #endif priv->baseaddr = res->start; - priv->reg = ioremap(priv->baseaddr, priv->size); - if (!priv->reg) { - ret = -ENOMEM; - goto out; - } + priv->reg = devm_ioremap(&pdev->dev, priv->baseaddr, priv->size); + if (!priv->reg) + return -ENOMEM; spin_lock_init(&priv->lock); platform_set_drvdata(pdev, priv); - priv->rtc = rtc_device_register("m48t35", &pdev->dev, + priv->rtc = devm_rtc_device_register(&pdev->dev, "m48t35", &m48t35_ops, THIS_MODULE); - if (IS_ERR(priv->rtc)) { - ret = PTR_ERR(priv->rtc); - goto out; - } + if (IS_ERR(priv->rtc)) + return PTR_ERR(priv->rtc); return 0; - -out: - if (priv->reg) - iounmap(priv->reg); - if (priv->baseaddr) - release_mem_region(priv->baseaddr, priv->size); - kfree(priv); - return ret; } static int m48t35_remove(struct platform_device *pdev) { - struct m48t35_priv *priv = platform_get_drvdata(pdev); - - rtc_device_unregister(priv->rtc); - iounmap(priv->reg); -#ifndef CONFIG_SGI_IP27 - release_mem_region(priv->baseaddr, priv->size); -#endif - kfree(priv); return 0; } diff --git a/drivers/rtc/rtc-m48t86.c b/drivers/rtc/rtc-m48t86.c index 2ffbcacd2439..33a91c484533 100644 --- a/drivers/rtc/rtc-m48t86.c +++ b/drivers/rtc/rtc-m48t86.c @@ -148,8 +148,10 @@ static int m48t86_rtc_probe(struct platform_device *dev) { unsigned char reg; struct m48t86_ops *ops = dev->dev.platform_data; - struct rtc_device *rtc = rtc_device_register("m48t86", - &dev->dev, &m48t86_rtc_ops, THIS_MODULE); + struct rtc_device *rtc; + + rtc = devm_rtc_device_register(&dev->dev, "m48t86", + &m48t86_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) return PTR_ERR(rtc); @@ -166,11 +168,6 @@ static int m48t86_rtc_probe(struct platform_device *dev) static int m48t86_rtc_remove(struct platform_device *dev) { - struct rtc_device *rtc = platform_get_drvdata(dev); - - if (rtc) - rtc_device_unregister(rtc); - platform_set_drvdata(dev, NULL); return 0; diff --git a/drivers/rtc/rtc-max6900.c b/drivers/rtc/rtc-max6900.c index a00e33204b91..8669d6d09a00 100644 --- a/drivers/rtc/rtc-max6900.c +++ b/drivers/rtc/rtc-max6900.c @@ -214,11 +214,6 @@ static int max6900_rtc_set_time(struct device *dev, struct rtc_time *tm) static int max6900_remove(struct i2c_client *client) { - struct rtc_device *rtc = i2c_get_clientdata(client); - - if (rtc) - rtc_device_unregister(rtc); - return 0; } @@ -237,8 +232,8 @@ max6900_probe(struct i2c_client *client, const struct i2c_device_id *id) dev_info(&client->dev, "chip found, driver version " DRV_VERSION "\n"); - rtc = rtc_device_register(max6900_driver.driver.name, - &client->dev, &max6900_rtc_ops, THIS_MODULE); + rtc = devm_rtc_device_register(&client->dev, max6900_driver.driver.name, + &max6900_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) return PTR_ERR(rtc); diff --git a/drivers/rtc/rtc-max6902.c b/drivers/rtc/rtc-max6902.c index 7d0bf698b79e..e3aea00c3145 100644 --- a/drivers/rtc/rtc-max6902.c +++ b/drivers/rtc/rtc-max6902.c @@ -93,24 +93,24 @@ static int max6902_set_time(struct device *dev, struct rtc_time *dt) dt->tm_year = dt->tm_year + 1900; /* Remove write protection */ - max6902_set_reg(dev, 0xF, 0); + max6902_set_reg(dev, MAX6902_REG_CONTROL, 0); - max6902_set_reg(dev, 0x01, bin2bcd(dt->tm_sec)); - max6902_set_reg(dev, 0x03, bin2bcd(dt->tm_min)); - max6902_set_reg(dev, 0x05, bin2bcd(dt->tm_hour)); + max6902_set_reg(dev, MAX6902_REG_SECONDS, bin2bcd(dt->tm_sec)); + max6902_set_reg(dev, MAX6902_REG_MINUTES, bin2bcd(dt->tm_min)); + max6902_set_reg(dev, MAX6902_REG_HOURS, bin2bcd(dt->tm_hour)); - max6902_set_reg(dev, 0x07, bin2bcd(dt->tm_mday)); - max6902_set_reg(dev, 0x09, bin2bcd(dt->tm_mon + 1)); - max6902_set_reg(dev, 0x0B, bin2bcd(dt->tm_wday)); - max6902_set_reg(dev, 0x0D, bin2bcd(dt->tm_year % 100)); - max6902_set_reg(dev, 0x13, bin2bcd(dt->tm_year / 100)); + max6902_set_reg(dev, MAX6902_REG_DATE, bin2bcd(dt->tm_mday)); + max6902_set_reg(dev, MAX6902_REG_MONTH, bin2bcd(dt->tm_mon + 1)); + max6902_set_reg(dev, MAX6902_REG_DAY, bin2bcd(dt->tm_wday)); + max6902_set_reg(dev, MAX6902_REG_YEAR, bin2bcd(dt->tm_year % 100)); + max6902_set_reg(dev, MAX6902_REG_CENTURY, bin2bcd(dt->tm_year / 100)); /* Compulab used a delay here. However, the datasheet * does not mention a delay being required anywhere... */ /* delay(2000); */ /* Write protect */ - max6902_set_reg(dev, 0xF, 0x80); + max6902_set_reg(dev, MAX6902_REG_CONTROL, 0x80); return 0; } @@ -134,20 +134,17 @@ static int max6902_probe(struct spi_device *spi) if (res != 0) return res; - rtc = rtc_device_register("max6902", - &spi->dev, &max6902_rtc_ops, THIS_MODULE); + rtc = devm_rtc_device_register(&spi->dev, "max6902", + &max6902_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) return PTR_ERR(rtc); - dev_set_drvdata(&spi->dev, rtc); + spi_set_drvdata(spi, rtc); return 0; } static int max6902_remove(struct spi_device *spi) { - struct rtc_device *rtc = dev_get_drvdata(&spi->dev); - - rtc_device_unregister(rtc); return 0; } diff --git a/drivers/rtc/rtc-max77686.c b/drivers/rtc/rtc-max77686.c index 6b1337f9baf4..771812d62e6b 100644 --- a/drivers/rtc/rtc-max77686.c +++ b/drivers/rtc/rtc-max77686.c @@ -24,7 +24,7 @@ /* RTC Control Register */ #define BCD_EN_SHIFT 0 -#define BCD_EN_MASK (1 << BCD_EN_SHIFT) +#define BCD_EN_MASK (1 << BCD_EN_SHIFT) #define MODEL24_SHIFT 1 #define MODEL24_MASK (1 << MODEL24_SHIFT) /* RTC Update Register1 */ @@ -33,12 +33,12 @@ #define RTC_RBUDR_SHIFT 4 #define RTC_RBUDR_MASK (1 << RTC_RBUDR_SHIFT) /* WTSR and SMPL Register */ -#define WTSRT_SHIFT 0 -#define SMPLT_SHIFT 2 +#define WTSRT_SHIFT 0 +#define SMPLT_SHIFT 2 #define WTSR_EN_SHIFT 6 #define SMPL_EN_SHIFT 7 -#define WTSRT_MASK (3 << WTSRT_SHIFT) -#define SMPLT_MASK (3 << SMPLT_SHIFT) +#define WTSRT_MASK (3 << WTSRT_SHIFT) +#define SMPLT_MASK (3 << SMPLT_SHIFT) #define WTSR_EN_MASK (1 << WTSR_EN_SHIFT) #define SMPL_EN_MASK (1 << SMPL_EN_SHIFT) /* RTC Hour register */ @@ -466,7 +466,7 @@ static void max77686_rtc_enable_smpl(struct max77686_rtc_info *info, bool enable val = 0; regmap_read(info->max77686->rtc_regmap, MAX77686_WTSR_SMPL_CNTL, &val); - pr_info("%s: WTSR_SMPL(0x%02x)\n", __func__, val); + dev_info(info->dev, "%s: WTSR_SMPL(0x%02x)\n", __func__, val); } #endif /* MAX77686_RTC_WTSR_SMPL */ @@ -505,7 +505,8 @@ static int max77686_rtc_probe(struct platform_device *pdev) dev_info(&pdev->dev, "%s\n", __func__); - info = kzalloc(sizeof(struct max77686_rtc_info), GFP_KERNEL); + info = devm_kzalloc(&pdev->dev, sizeof(struct max77686_rtc_info), + GFP_KERNEL); if (!info) return -ENOMEM; @@ -513,13 +514,12 @@ static int max77686_rtc_probe(struct platform_device *pdev) info->dev = &pdev->dev; info->max77686 = max77686; info->rtc = max77686->rtc; - info->max77686->rtc_regmap = regmap_init_i2c(info->max77686->rtc, + info->max77686->rtc_regmap = devm_regmap_init_i2c(info->max77686->rtc, &max77686_rtc_regmap_config); if (IS_ERR(info->max77686->rtc_regmap)) { ret = PTR_ERR(info->max77686->rtc_regmap); dev_err(info->max77686->dev, "Failed to allocate register map: %d\n", ret); - kfree(info); return ret; } platform_set_drvdata(pdev, info); @@ -538,8 +538,8 @@ static int max77686_rtc_probe(struct platform_device *pdev) device_init_wakeup(&pdev->dev, 1); - info->rtc_dev = rtc_device_register("max77686-rtc", &pdev->dev, - &max77686_rtc_ops, THIS_MODULE); + info->rtc_dev = devm_rtc_device_register(&pdev->dev, "max77686-rtc", + &max77686_rtc_ops, THIS_MODULE); if (IS_ERR(info->rtc_dev)) { dev_info(&pdev->dev, "%s: fail\n", __func__); @@ -551,36 +551,24 @@ static int max77686_rtc_probe(struct platform_device *pdev) goto err_rtc; } virq = irq_create_mapping(max77686->irq_domain, MAX77686_RTCIRQ_RTCA1); - if (!virq) + if (!virq) { + ret = -ENXIO; goto err_rtc; + } info->virq = virq; - ret = request_threaded_irq(virq, NULL, max77686_rtc_alarm_irq, 0, - "rtc-alarm0", info); - if (ret < 0) { + ret = devm_request_threaded_irq(&pdev->dev, virq, NULL, + max77686_rtc_alarm_irq, 0, "rtc-alarm0", info); + if (ret < 0) dev_err(&pdev->dev, "Failed to request alarm IRQ: %d: %d\n", info->virq, ret); - goto err_rtc; - } - goto out; err_rtc: - kfree(info); - return ret; -out: return ret; } static int max77686_rtc_remove(struct platform_device *pdev) { - struct max77686_rtc_info *info = platform_get_drvdata(pdev); - - if (info) { - free_irq(info->virq, info); - rtc_device_unregister(info->rtc_dev); - kfree(info); - } - return 0; } @@ -594,11 +582,14 @@ static void max77686_rtc_shutdown(struct platform_device *pdev) for (i = 0; i < 3; i++) { max77686_rtc_enable_wtsr(info, false); regmap_read(info->max77686->rtc_regmap, MAX77686_WTSR_SMPL_CNTL, &val); - pr_info("%s: WTSR_SMPL reg(0x%02x)\n", __func__, val); - if (val & WTSR_EN_MASK) - pr_emerg("%s: fail to disable WTSR\n", __func__); - else { - pr_info("%s: success to disable WTSR\n", __func__); + dev_info(info->dev, "%s: WTSR_SMPL reg(0x%02x)\n", __func__, + val); + if (val & WTSR_EN_MASK) { + dev_emerg(info->dev, "%s: fail to disable WTSR\n", + __func__); + } else { + dev_info(info->dev, "%s: success to disable WTSR\n", + __func__); break; } } @@ -624,18 +615,8 @@ static struct platform_driver max77686_rtc_driver = { .id_table = rtc_id, }; -static int __init max77686_rtc_init(void) -{ - return platform_driver_register(&max77686_rtc_driver); -} -module_init(max77686_rtc_init); - -static void __exit max77686_rtc_exit(void) -{ - platform_driver_unregister(&max77686_rtc_driver); -} -module_exit(max77686_rtc_exit); +module_platform_driver(max77686_rtc_driver); MODULE_DESCRIPTION("Maxim MAX77686 RTC driver"); -MODULE_AUTHOR("<woong.byun@samsung.com>"); +MODULE_AUTHOR("Chiwoong Byun <woong.byun@samsung.com>"); MODULE_LICENSE("GPL"); diff --git a/drivers/rtc/rtc-max8907.c b/drivers/rtc/rtc-max8907.c index 31ca8faf9f05..86afb797125d 100644 --- a/drivers/rtc/rtc-max8907.c +++ b/drivers/rtc/rtc-max8907.c @@ -190,7 +190,7 @@ static int max8907_rtc_probe(struct platform_device *pdev) rtc->max8907 = max8907; rtc->regmap = max8907->regmap_rtc; - rtc->rtc_dev = rtc_device_register("max8907-rtc", &pdev->dev, + rtc->rtc_dev = devm_rtc_device_register(&pdev->dev, "max8907-rtc", &max8907_rtc_ops, THIS_MODULE); if (IS_ERR(rtc->rtc_dev)) { ret = PTR_ERR(rtc->rtc_dev); @@ -200,33 +200,21 @@ static int max8907_rtc_probe(struct platform_device *pdev) rtc->irq = regmap_irq_get_virq(max8907->irqc_rtc, MAX8907_IRQ_RTC_ALARM0); - if (rtc->irq < 0) { - ret = rtc->irq; - goto err_unregister; - } + if (rtc->irq < 0) + return rtc->irq; ret = devm_request_threaded_irq(&pdev->dev, rtc->irq, NULL, max8907_irq_handler, IRQF_ONESHOT, "max8907-alarm0", rtc); - if (ret < 0) { + if (ret < 0) dev_err(&pdev->dev, "Failed to request IRQ%d: %d\n", rtc->irq, ret); - goto err_unregister; - } - return 0; - -err_unregister: - rtc_device_unregister(rtc->rtc_dev); return ret; } static int max8907_rtc_remove(struct platform_device *pdev) { - struct max8907_rtc *rtc = platform_get_drvdata(pdev); - - rtc_device_unregister(rtc->rtc_dev); - return 0; } diff --git a/drivers/rtc/rtc-max8925.c b/drivers/rtc/rtc-max8925.c index a0c8265646d2..7c90f4e45e27 100644 --- a/drivers/rtc/rtc-max8925.c +++ b/drivers/rtc/rtc-max8925.c @@ -253,7 +253,8 @@ static int max8925_rtc_probe(struct platform_device *pdev) struct max8925_rtc_info *info; int ret; - info = kzalloc(sizeof(struct max8925_rtc_info), GFP_KERNEL); + info = devm_kzalloc(&pdev->dev, sizeof(struct max8925_rtc_info), + GFP_KERNEL); if (!info) return -ENOMEM; info->chip = chip; @@ -261,12 +262,13 @@ static int max8925_rtc_probe(struct platform_device *pdev) info->dev = &pdev->dev; info->irq = platform_get_irq(pdev, 0); - ret = request_threaded_irq(info->irq, NULL, rtc_update_handler, - IRQF_ONESHOT, "rtc-alarm0", info); + ret = devm_request_threaded_irq(&pdev->dev, info->irq, NULL, + rtc_update_handler, IRQF_ONESHOT, + "rtc-alarm0", info); if (ret < 0) { dev_err(chip->dev, "Failed to request IRQ: #%d: %d\n", info->irq, ret); - goto out_irq; + goto err; } dev_set_drvdata(&pdev->dev, info); @@ -275,32 +277,22 @@ static int max8925_rtc_probe(struct platform_device *pdev) device_init_wakeup(&pdev->dev, 1); - info->rtc_dev = rtc_device_register("max8925-rtc", &pdev->dev, + info->rtc_dev = devm_rtc_device_register(&pdev->dev, "max8925-rtc", &max8925_rtc_ops, THIS_MODULE); ret = PTR_ERR(info->rtc_dev); if (IS_ERR(info->rtc_dev)) { dev_err(&pdev->dev, "Failed to register RTC device: %d\n", ret); - goto out_rtc; + goto err; } return 0; -out_rtc: +err: platform_set_drvdata(pdev, NULL); - free_irq(info->irq, info); -out_irq: - kfree(info); return ret; } static int max8925_rtc_remove(struct platform_device *pdev) { - struct max8925_rtc_info *info = platform_get_drvdata(pdev); - - if (info) { - free_irq(info->irq, info); - rtc_device_unregister(info->rtc_dev); - kfree(info); - } return 0; } diff --git a/drivers/rtc/rtc-max8997.c b/drivers/rtc/rtc-max8997.c index f15202c771da..dacf48db7925 100644 --- a/drivers/rtc/rtc-max8997.c +++ b/drivers/rtc/rtc-max8997.c @@ -479,8 +479,8 @@ static int max8997_rtc_probe(struct platform_device *pdev) device_init_wakeup(&pdev->dev, 1); - info->rtc_dev = rtc_device_register("max8997-rtc", &pdev->dev, - &max8997_rtc_ops, THIS_MODULE); + info->rtc_dev = devm_rtc_device_register(&pdev->dev, "max8997-rtc", + &max8997_rtc_ops, THIS_MODULE); if (IS_ERR(info->rtc_dev)) { ret = PTR_ERR(info->rtc_dev); @@ -491,6 +491,7 @@ static int max8997_rtc_probe(struct platform_device *pdev) virq = irq_create_mapping(max8997->irq_domain, MAX8997_PMICIRQ_RTCA1); if (!virq) { dev_err(&pdev->dev, "Failed to create mapping alarm IRQ\n"); + ret = -ENXIO; goto err_out; } info->virq = virq; @@ -498,26 +499,16 @@ static int max8997_rtc_probe(struct platform_device *pdev) ret = devm_request_threaded_irq(&pdev->dev, virq, NULL, max8997_rtc_alarm_irq, 0, "rtc-alarm0", info); - if (ret < 0) { + if (ret < 0) dev_err(&pdev->dev, "Failed to request alarm IRQ: %d: %d\n", info->virq, ret); - goto err_out; - } - - return ret; err_out: - rtc_device_unregister(info->rtc_dev); return ret; } static int max8997_rtc_remove(struct platform_device *pdev) { - struct max8997_rtc_info *info = platform_get_drvdata(pdev); - - if (info) - rtc_device_unregister(info->rtc_dev); - return 0; } diff --git a/drivers/rtc/rtc-max8998.c b/drivers/rtc/rtc-max8998.c index 8f234a075e8f..48b6612fae7f 100644 --- a/drivers/rtc/rtc-max8998.c +++ b/drivers/rtc/rtc-max8998.c @@ -256,7 +256,8 @@ static int max8998_rtc_probe(struct platform_device *pdev) struct max8998_rtc_info *info; int ret; - info = kzalloc(sizeof(struct max8998_rtc_info), GFP_KERNEL); + info = devm_kzalloc(&pdev->dev, sizeof(struct max8998_rtc_info), + GFP_KERNEL); if (!info) return -ENOMEM; @@ -267,7 +268,7 @@ static int max8998_rtc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, info); - info->rtc_dev = rtc_device_register("max8998-rtc", &pdev->dev, + info->rtc_dev = devm_rtc_device_register(&pdev->dev, "max8998-rtc", &max8998_rtc_ops, THIS_MODULE); if (IS_ERR(info->rtc_dev)) { @@ -276,8 +277,8 @@ static int max8998_rtc_probe(struct platform_device *pdev) goto out_rtc; } - ret = request_threaded_irq(info->irq, NULL, max8998_rtc_alarm_irq, 0, - "rtc-alarm0", info); + ret = devm_request_threaded_irq(&pdev->dev, info->irq, NULL, + max8998_rtc_alarm_irq, 0, "rtc-alarm0", info); if (ret < 0) dev_err(&pdev->dev, "Failed to request alarm IRQ: %d: %d\n", @@ -294,20 +295,11 @@ static int max8998_rtc_probe(struct platform_device *pdev) out_rtc: platform_set_drvdata(pdev, NULL); - kfree(info); return ret; } static int max8998_rtc_remove(struct platform_device *pdev) { - struct max8998_rtc_info *info = platform_get_drvdata(pdev); - - if (info) { - free_irq(info->irq, info); - rtc_device_unregister(info->rtc_dev); - kfree(info); - } - return 0; } diff --git a/drivers/rtc/rtc-mc13xxx.c b/drivers/rtc/rtc-mc13xxx.c index 2643d8874925..7a8ed27a5f2e 100644 --- a/drivers/rtc/rtc-mc13xxx.c +++ b/drivers/rtc/rtc-mc13xxx.c @@ -316,7 +316,7 @@ static int __init mc13xxx_rtc_probe(struct platform_device *pdev) struct mc13xxx *mc13xxx; int rtcrst_pending; - priv = kzalloc(sizeof(*priv), GFP_KERNEL); + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; @@ -351,8 +351,8 @@ static int __init mc13xxx_rtc_probe(struct platform_device *pdev) mc13xxx_unlock(mc13xxx); - priv->rtc = rtc_device_register(pdev->name, - &pdev->dev, &mc13xxx_rtc_ops, THIS_MODULE); + priv->rtc = devm_rtc_device_register(&pdev->dev, pdev->name, + &mc13xxx_rtc_ops, THIS_MODULE); if (IS_ERR(priv->rtc)) { ret = PTR_ERR(priv->rtc); @@ -372,7 +372,6 @@ err_reset_irq_request: mc13xxx_unlock(mc13xxx); platform_set_drvdata(pdev, NULL); - kfree(priv); } return ret; @@ -384,8 +383,6 @@ static int __exit mc13xxx_rtc_remove(struct platform_device *pdev) mc13xxx_lock(priv->mc13xxx); - rtc_device_unregister(priv->rtc); - mc13xxx_irq_free(priv->mc13xxx, MC13XXX_IRQ_TODA, priv); mc13xxx_irq_free(priv->mc13xxx, MC13XXX_IRQ_1HZ, priv); mc13xxx_irq_free(priv->mc13xxx, MC13XXX_IRQ_RTCRST, priv); @@ -394,8 +391,6 @@ static int __exit mc13xxx_rtc_remove(struct platform_device *pdev) platform_set_drvdata(pdev, NULL); - kfree(priv); - return 0; } @@ -420,17 +415,7 @@ static struct platform_driver mc13xxx_rtc_driver = { }, }; -static int __init mc13xxx_rtc_init(void) -{ - return platform_driver_probe(&mc13xxx_rtc_driver, &mc13xxx_rtc_probe); -} -module_init(mc13xxx_rtc_init); - -static void __exit mc13xxx_rtc_exit(void) -{ - platform_driver_unregister(&mc13xxx_rtc_driver); -} -module_exit(mc13xxx_rtc_exit); +module_platform_driver_probe(mc13xxx_rtc_driver, &mc13xxx_rtc_probe); MODULE_AUTHOR("Sascha Hauer <s.hauer@pengutronix.de>"); MODULE_DESCRIPTION("RTC driver for Freescale MC13XXX PMIC"); diff --git a/drivers/rtc/rtc-msm6242.c b/drivers/rtc/rtc-msm6242.c index fcb113c11122..771f86a05d14 100644 --- a/drivers/rtc/rtc-msm6242.c +++ b/drivers/rtc/rtc-msm6242.c @@ -194,30 +194,28 @@ static const struct rtc_class_ops msm6242_rtc_ops = { .set_time = msm6242_set_time, }; -static int __init msm6242_rtc_probe(struct platform_device *dev) +static int __init msm6242_rtc_probe(struct platform_device *pdev) { struct resource *res; struct msm6242_priv *priv; struct rtc_device *rtc; int error; - res = platform_get_resource(dev, IORESOURCE_MEM, 0); + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) return -ENODEV; - priv = kzalloc(sizeof(*priv), GFP_KERNEL); + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; - priv->regs = ioremap(res->start, resource_size(res)); - if (!priv->regs) { - error = -ENOMEM; - goto out_free_priv; - } - platform_set_drvdata(dev, priv); + priv->regs = devm_ioremap(&pdev->dev, res->start, resource_size(res)); + if (!priv->regs) + return -ENOMEM; + platform_set_drvdata(pdev, priv); - rtc = rtc_device_register("rtc-msm6242", &dev->dev, &msm6242_rtc_ops, - THIS_MODULE); + rtc = devm_rtc_device_register(&pdev->dev, "rtc-msm6242", + &msm6242_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) { error = PTR_ERR(rtc); goto out_unmap; @@ -227,20 +225,12 @@ static int __init msm6242_rtc_probe(struct platform_device *dev) return 0; out_unmap: - platform_set_drvdata(dev, NULL); - iounmap(priv->regs); -out_free_priv: - kfree(priv); + platform_set_drvdata(pdev, NULL); return error; } -static int __exit msm6242_rtc_remove(struct platform_device *dev) +static int __exit msm6242_rtc_remove(struct platform_device *pdev) { - struct msm6242_priv *priv = platform_get_drvdata(dev); - - rtc_device_unregister(priv->rtc); - iounmap(priv->regs); - kfree(priv); return 0; } @@ -252,18 +242,7 @@ static struct platform_driver msm6242_rtc_driver = { .remove = __exit_p(msm6242_rtc_remove), }; -static int __init msm6242_rtc_init(void) -{ - return platform_driver_probe(&msm6242_rtc_driver, msm6242_rtc_probe); -} - -static void __exit msm6242_rtc_fini(void) -{ - platform_driver_unregister(&msm6242_rtc_driver); -} - -module_init(msm6242_rtc_init); -module_exit(msm6242_rtc_fini); +module_platform_driver_probe(msm6242_rtc_driver, msm6242_rtc_probe); MODULE_AUTHOR("Geert Uytterhoeven <geert@linux-m68k.org>"); MODULE_LICENSE("GPL"); diff --git a/drivers/rtc/rtc-mv.c b/drivers/rtc/rtc-mv.c index 8f87fec27ce7..baab802f2153 100644 --- a/drivers/rtc/rtc-mv.c +++ b/drivers/rtc/rtc-mv.c @@ -217,7 +217,7 @@ static const struct rtc_class_ops mv_rtc_alarm_ops = { .alarm_irq_enable = mv_rtc_alarm_irq_enable, }; -static int mv_rtc_probe(struct platform_device *pdev) +static int __init mv_rtc_probe(struct platform_device *pdev) { struct resource *res; struct rtc_plat_data *pdata; @@ -272,12 +272,13 @@ static int mv_rtc_probe(struct platform_device *pdev) if (pdata->irq >= 0) { device_init_wakeup(&pdev->dev, 1); - pdata->rtc = rtc_device_register(pdev->name, &pdev->dev, + pdata->rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &mv_rtc_alarm_ops, THIS_MODULE); - } else - pdata->rtc = rtc_device_register(pdev->name, &pdev->dev, + } else { + pdata->rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &mv_rtc_ops, THIS_MODULE); + } if (IS_ERR(pdata->rtc)) { ret = PTR_ERR(pdata->rtc); goto out; @@ -308,7 +309,6 @@ static int __exit mv_rtc_remove(struct platform_device *pdev) if (pdata->irq >= 0) device_init_wakeup(&pdev->dev, 0); - rtc_device_unregister(pdata->rtc); if (!IS_ERR(pdata->clk)) clk_disable_unprepare(pdata->clk); @@ -331,18 +331,7 @@ static struct platform_driver mv_rtc_driver = { }, }; -static __init int mv_init(void) -{ - return platform_driver_probe(&mv_rtc_driver, mv_rtc_probe); -} - -static __exit void mv_exit(void) -{ - platform_driver_unregister(&mv_rtc_driver); -} - -module_init(mv_init); -module_exit(mv_exit); +module_platform_driver_probe(mv_rtc_driver, mv_rtc_probe); MODULE_AUTHOR("Saeed Bishara <saeed@marvell.com>"); MODULE_DESCRIPTION("Marvell RTC driver"); diff --git a/drivers/rtc/rtc-mxc.c b/drivers/rtc/rtc-mxc.c index 1c3ef7289565..9a3895bc4f4d 100644 --- a/drivers/rtc/rtc-mxc.c +++ b/drivers/rtc/rtc-mxc.c @@ -439,7 +439,7 @@ static int mxc_rtc_probe(struct platform_device *pdev) if (pdata->irq >=0) device_init_wakeup(&pdev->dev, 1); - rtc = rtc_device_register(pdev->name, &pdev->dev, &mxc_rtc_ops, + rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &mxc_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) { ret = PTR_ERR(rtc); @@ -464,15 +464,13 @@ static int mxc_rtc_remove(struct platform_device *pdev) { struct rtc_plat_data *pdata = platform_get_drvdata(pdev); - rtc_device_unregister(pdata->rtc); - clk_disable_unprepare(pdata->clk); platform_set_drvdata(pdev, NULL); return 0; } -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP static int mxc_rtc_suspend(struct device *dev) { struct rtc_plat_data *pdata = dev_get_drvdata(dev); @@ -492,19 +490,14 @@ static int mxc_rtc_resume(struct device *dev) return 0; } - -static struct dev_pm_ops mxc_rtc_pm_ops = { - .suspend = mxc_rtc_suspend, - .resume = mxc_rtc_resume, -}; #endif +static SIMPLE_DEV_PM_OPS(mxc_rtc_pm_ops, mxc_rtc_suspend, mxc_rtc_resume); + static struct platform_driver mxc_rtc_driver = { .driver = { .name = "mxc_rtc", -#ifdef CONFIG_PM .pm = &mxc_rtc_pm_ops, -#endif .owner = THIS_MODULE, }, .id_table = imx_rtc_devtype, diff --git a/drivers/rtc/rtc-nuc900.c b/drivers/rtc/rtc-nuc900.c index a63680850fef..f5dfb6e5e7d9 100644 --- a/drivers/rtc/rtc-nuc900.c +++ b/drivers/rtc/rtc-nuc900.c @@ -222,13 +222,13 @@ static struct rtc_class_ops nuc900_rtc_ops = { .alarm_irq_enable = nuc900_alarm_irq_enable, }; -static int nuc900_rtc_probe(struct platform_device *pdev) +static int __init nuc900_rtc_probe(struct platform_device *pdev) { struct resource *res; struct nuc900_rtc *nuc900_rtc; - int err = 0; - nuc900_rtc = kzalloc(sizeof(struct nuc900_rtc), GFP_KERNEL); + nuc900_rtc = devm_kzalloc(&pdev->dev, sizeof(struct nuc900_rtc), + GFP_KERNEL); if (!nuc900_rtc) { dev_err(&pdev->dev, "kzalloc nuc900_rtc failed\n"); return -ENOMEM; @@ -236,93 +236,51 @@ static int nuc900_rtc_probe(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) { dev_err(&pdev->dev, "platform_get_resource failed\n"); - err = -ENXIO; - goto fail1; + return -ENXIO; } - if (!request_mem_region(res->start, resource_size(res), - pdev->name)) { - dev_err(&pdev->dev, "request_mem_region failed\n"); - err = -EBUSY; - goto fail1; - } - - nuc900_rtc->rtc_reg = ioremap(res->start, resource_size(res)); - if (!nuc900_rtc->rtc_reg) { - dev_err(&pdev->dev, "ioremap rtc_reg failed\n"); - err = -ENOMEM; - goto fail2; - } + nuc900_rtc->rtc_reg = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(nuc900_rtc->rtc_reg)) + return PTR_ERR(nuc900_rtc->rtc_reg); platform_set_drvdata(pdev, nuc900_rtc); - nuc900_rtc->rtcdev = rtc_device_register(pdev->name, &pdev->dev, + nuc900_rtc->rtcdev = devm_rtc_device_register(&pdev->dev, pdev->name, &nuc900_rtc_ops, THIS_MODULE); if (IS_ERR(nuc900_rtc->rtcdev)) { dev_err(&pdev->dev, "rtc device register failed\n"); - err = PTR_ERR(nuc900_rtc->rtcdev); - goto fail3; + return PTR_ERR(nuc900_rtc->rtcdev); } __raw_writel(__raw_readl(nuc900_rtc->rtc_reg + REG_RTC_TSSR) | MODE24, nuc900_rtc->rtc_reg + REG_RTC_TSSR); nuc900_rtc->irq_num = platform_get_irq(pdev, 0); - if (request_irq(nuc900_rtc->irq_num, nuc900_rtc_interrupt, - 0, "nuc900rtc", nuc900_rtc)) { + if (devm_request_irq(&pdev->dev, nuc900_rtc->irq_num, + nuc900_rtc_interrupt, 0, "nuc900rtc", nuc900_rtc)) { dev_err(&pdev->dev, "NUC900 RTC request irq failed\n"); - err = -EBUSY; - goto fail4; + return -EBUSY; } return 0; - -fail4: rtc_device_unregister(nuc900_rtc->rtcdev); -fail3: iounmap(nuc900_rtc->rtc_reg); -fail2: release_mem_region(res->start, resource_size(res)); -fail1: kfree(nuc900_rtc); - return err; } -static int nuc900_rtc_remove(struct platform_device *pdev) +static int __exit nuc900_rtc_remove(struct platform_device *pdev) { - struct nuc900_rtc *nuc900_rtc = platform_get_drvdata(pdev); - struct resource *res; - - free_irq(nuc900_rtc->irq_num, nuc900_rtc); - rtc_device_unregister(nuc900_rtc->rtcdev); - iounmap(nuc900_rtc->rtc_reg); - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - release_mem_region(res->start, resource_size(res)); - - kfree(nuc900_rtc); - platform_set_drvdata(pdev, NULL); return 0; } static struct platform_driver nuc900_rtc_driver = { - .remove = nuc900_rtc_remove, + .remove = __exit_p(nuc900_rtc_remove), .driver = { .name = "nuc900-rtc", .owner = THIS_MODULE, }, }; -static int __init nuc900_rtc_init(void) -{ - return platform_driver_probe(&nuc900_rtc_driver, nuc900_rtc_probe); -} - -static void __exit nuc900_rtc_exit(void) -{ - platform_driver_unregister(&nuc900_rtc_driver); -} - -module_init(nuc900_rtc_init); -module_exit(nuc900_rtc_exit); +module_platform_driver_probe(nuc900_rtc_driver, nuc900_rtc_probe); MODULE_AUTHOR("Wan ZongShun <mcuos.com@gmail.com>"); MODULE_DESCRIPTION("nuc910/nuc920 RTC driver"); diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c index 600971407aac..4e1bdb832e37 100644 --- a/drivers/rtc/rtc-omap.c +++ b/drivers/rtc/rtc-omap.c @@ -324,7 +324,7 @@ MODULE_DEVICE_TABLE(of, omap_rtc_of_match); static int __init omap_rtc_probe(struct platform_device *pdev) { - struct resource *res, *mem; + struct resource *res; struct rtc_device *rtc; u8 reg, new_ctrl; const struct platform_device_id *id_entry; @@ -352,18 +352,9 @@ static int __init omap_rtc_probe(struct platform_device *pdev) return -ENOENT; } - mem = request_mem_region(res->start, resource_size(res), pdev->name); - if (!mem) { - pr_debug("%s: RTC registers at %08x are not free\n", - pdev->name, res->start); - return -EBUSY; - } - - rtc_base = ioremap(res->start, resource_size(res)); - if (!rtc_base) { - pr_debug("%s: RTC registers can't be mapped\n", pdev->name); - goto fail; - } + rtc_base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(rtc_base)) + return PTR_ERR(rtc_base); /* Enable the clock/module so that we can access the registers */ pm_runtime_enable(&pdev->dev); @@ -375,7 +366,7 @@ static int __init omap_rtc_probe(struct platform_device *pdev) rtc_writel(KICK1_VALUE, OMAP_RTC_KICK1_REG); } - rtc = rtc_device_register(pdev->name, &pdev->dev, + rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &omap_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) { pr_debug("%s: can't register RTC device, err %ld\n", @@ -383,7 +374,6 @@ static int __init omap_rtc_probe(struct platform_device *pdev) goto fail0; } platform_set_drvdata(pdev, rtc); - dev_set_drvdata(&rtc->dev, mem); /* clear pending irqs, and set 1/second periodic, * which we'll use instead of update irqs @@ -401,18 +391,18 @@ static int __init omap_rtc_probe(struct platform_device *pdev) rtc_write(OMAP_RTC_STATUS_ALARM, OMAP_RTC_STATUS_REG); /* handle periodic and alarm irqs */ - if (request_irq(omap_rtc_timer, rtc_irq, 0, + if (devm_request_irq(&pdev->dev, omap_rtc_timer, rtc_irq, 0, dev_name(&rtc->dev), rtc)) { pr_debug("%s: RTC timer interrupt IRQ%d already claimed\n", pdev->name, omap_rtc_timer); - goto fail1; + goto fail0; } if ((omap_rtc_timer != omap_rtc_alarm) && - (request_irq(omap_rtc_alarm, rtc_irq, 0, + (devm_request_irq(&pdev->dev, omap_rtc_alarm, rtc_irq, 0, dev_name(&rtc->dev), rtc))) { pr_debug("%s: RTC alarm interrupt IRQ%d already claimed\n", pdev->name, omap_rtc_alarm); - goto fail2; + goto fail0; } /* On boards with split power, RTC_ON_NOFF won't reset the RTC */ @@ -446,25 +436,16 @@ static int __init omap_rtc_probe(struct platform_device *pdev) return 0; -fail2: - free_irq(omap_rtc_timer, rtc); -fail1: - rtc_device_unregister(rtc); fail0: if (id_entry && (id_entry->driver_data & OMAP_RTC_HAS_KICKER)) rtc_writel(0, OMAP_RTC_KICK0_REG); pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); - iounmap(rtc_base); -fail: - release_mem_region(mem->start, resource_size(mem)); return -EIO; } static int __exit omap_rtc_remove(struct platform_device *pdev) { - struct rtc_device *rtc = platform_get_drvdata(pdev); - struct resource *mem = dev_get_drvdata(&rtc->dev); const struct platform_device_id *id_entry = platform_get_device_id(pdev); @@ -473,12 +454,6 @@ static int __exit omap_rtc_remove(struct platform_device *pdev) /* leave rtc running, but disable irqs */ rtc_write(0, OMAP_RTC_INTERRUPTS_REG); - free_irq(omap_rtc_timer, rtc); - - if (omap_rtc_timer != omap_rtc_alarm) - free_irq(omap_rtc_alarm, rtc); - - rtc_device_unregister(rtc); if (id_entry && (id_entry->driver_data & OMAP_RTC_HAS_KICKER)) rtc_writel(0, OMAP_RTC_KICK0_REG); @@ -486,16 +461,13 @@ static int __exit omap_rtc_remove(struct platform_device *pdev) pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); - iounmap(rtc_base); - release_mem_region(mem->start, resource_size(mem)); return 0; } -#ifdef CONFIG_PM - +#ifdef CONFIG_PM_SLEEP static u8 irqstat; -static int omap_rtc_suspend(struct platform_device *pdev, pm_message_t state) +static int omap_rtc_suspend(struct device *dev) { irqstat = rtc_read(OMAP_RTC_INTERRUPTS_REG); @@ -503,34 +475,32 @@ static int omap_rtc_suspend(struct platform_device *pdev, pm_message_t state) * source, and in fact this enable() call is just saving a flag * that's never used... */ - if (device_may_wakeup(&pdev->dev)) + if (device_may_wakeup(dev)) enable_irq_wake(omap_rtc_alarm); else rtc_write(0, OMAP_RTC_INTERRUPTS_REG); /* Disable the clock/module */ - pm_runtime_put_sync(&pdev->dev); + pm_runtime_put_sync(dev); return 0; } -static int omap_rtc_resume(struct platform_device *pdev) +static int omap_rtc_resume(struct device *dev) { /* Enable the clock/module so that we can access the registers */ - pm_runtime_get_sync(&pdev->dev); + pm_runtime_get_sync(dev); - if (device_may_wakeup(&pdev->dev)) + if (device_may_wakeup(dev)) disable_irq_wake(omap_rtc_alarm); else rtc_write(irqstat, OMAP_RTC_INTERRUPTS_REG); return 0; } - -#else -#define omap_rtc_suspend NULL -#define omap_rtc_resume NULL #endif +static SIMPLE_DEV_PM_OPS(omap_rtc_pm_ops, omap_rtc_suspend, omap_rtc_resume); + static void omap_rtc_shutdown(struct platform_device *pdev) { rtc_write(0, OMAP_RTC_INTERRUPTS_REG); @@ -539,28 +509,17 @@ static void omap_rtc_shutdown(struct platform_device *pdev) MODULE_ALIAS("platform:omap_rtc"); static struct platform_driver omap_rtc_driver = { .remove = __exit_p(omap_rtc_remove), - .suspend = omap_rtc_suspend, - .resume = omap_rtc_resume, .shutdown = omap_rtc_shutdown, .driver = { .name = DRIVER_NAME, .owner = THIS_MODULE, + .pm = &omap_rtc_pm_ops, .of_match_table = of_match_ptr(omap_rtc_of_match), }, .id_table = omap_rtc_devtype, }; -static int __init rtc_init(void) -{ - return platform_driver_probe(&omap_rtc_driver, omap_rtc_probe); -} -module_init(rtc_init); - -static void __exit rtc_exit(void) -{ - platform_driver_unregister(&omap_rtc_driver); -} -module_exit(rtc_exit); +module_platform_driver_probe(omap_rtc_driver, omap_rtc_probe); MODULE_AUTHOR("George G. Davis (and others)"); MODULE_LICENSE("GPL"); diff --git a/drivers/rtc/rtc-palmas.c b/drivers/rtc/rtc-palmas.c index 59c42986254e..50204d474eb7 100644 --- a/drivers/rtc/rtc-palmas.c +++ b/drivers/rtc/rtc-palmas.c @@ -30,6 +30,7 @@ #include <linux/kernel.h> #include <linux/mfd/palmas.h> #include <linux/module.h> +#include <linux/of.h> #include <linux/rtc.h> #include <linux/types.h> #include <linux/platform_device.h> @@ -264,7 +265,7 @@ static int palmas_rtc_probe(struct platform_device *pdev) palmas_rtc->irq = platform_get_irq(pdev, 0); - palmas_rtc->rtc = rtc_device_register(pdev->name, &pdev->dev, + palmas_rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &palmas_rtc_ops, THIS_MODULE); if (IS_ERR(palmas_rtc->rtc)) { ret = PTR_ERR(palmas_rtc->rtc); @@ -272,14 +273,13 @@ static int palmas_rtc_probe(struct platform_device *pdev) return ret; } - ret = request_threaded_irq(palmas_rtc->irq, NULL, + ret = devm_request_threaded_irq(&pdev->dev, palmas_rtc->irq, NULL, palmas_rtc_interrupt, IRQF_TRIGGER_LOW | IRQF_ONESHOT | IRQF_EARLY_RESUME, dev_name(&pdev->dev), palmas_rtc); if (ret < 0) { dev_err(&pdev->dev, "IRQ request failed, err = %d\n", ret); - rtc_device_unregister(palmas_rtc->rtc); return ret; } @@ -289,11 +289,7 @@ static int palmas_rtc_probe(struct platform_device *pdev) static int palmas_rtc_remove(struct platform_device *pdev) { - struct palmas_rtc *palmas_rtc = platform_get_drvdata(pdev); - palmas_rtc_alarm_irq_enable(&pdev->dev, 0); - free_irq(palmas_rtc->irq, palmas_rtc); - rtc_device_unregister(palmas_rtc->rtc); return 0; } @@ -321,6 +317,14 @@ static const struct dev_pm_ops palmas_rtc_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(palmas_rtc_suspend, palmas_rtc_resume) }; +#ifdef CONFIG_OF +static struct of_device_id of_palmas_rtc_match[] = { + { .compatible = "ti,palmas-rtc"}, + { }, +}; +MODULE_DEVICE_TABLE(of, of_palmas_rtc_match); +#endif + static struct platform_driver palmas_rtc_driver = { .probe = palmas_rtc_probe, .remove = palmas_rtc_remove, @@ -328,6 +332,7 @@ static struct platform_driver palmas_rtc_driver = { .owner = THIS_MODULE, .name = "palmas-rtc", .pm = &palmas_rtc_pm_ops, + .of_match_table = of_match_ptr(of_palmas_rtc_match), }, }; diff --git a/drivers/rtc/rtc-pcap.c b/drivers/rtc/rtc-pcap.c index e0019cd0bf71..539a90b98bc5 100644 --- a/drivers/rtc/rtc-pcap.c +++ b/drivers/rtc/rtc-pcap.c @@ -139,13 +139,14 @@ static const struct rtc_class_ops pcap_rtc_ops = { .alarm_irq_enable = pcap_rtc_alarm_irq_enable, }; -static int pcap_rtc_probe(struct platform_device *pdev) +static int __init pcap_rtc_probe(struct platform_device *pdev) { struct pcap_rtc *pcap_rtc; int timer_irq, alarm_irq; int err = -ENOMEM; - pcap_rtc = kmalloc(sizeof(struct pcap_rtc), GFP_KERNEL); + pcap_rtc = devm_kzalloc(&pdev->dev, sizeof(struct pcap_rtc), + GFP_KERNEL); if (!pcap_rtc) return err; @@ -153,68 +154,46 @@ static int pcap_rtc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, pcap_rtc); - pcap_rtc->rtc = rtc_device_register("pcap", &pdev->dev, - &pcap_rtc_ops, THIS_MODULE); + pcap_rtc->rtc = devm_rtc_device_register(&pdev->dev, "pcap", + &pcap_rtc_ops, THIS_MODULE); if (IS_ERR(pcap_rtc->rtc)) { err = PTR_ERR(pcap_rtc->rtc); - goto fail_rtc; + goto fail; } - timer_irq = pcap_to_irq(pcap_rtc->pcap, PCAP_IRQ_1HZ); alarm_irq = pcap_to_irq(pcap_rtc->pcap, PCAP_IRQ_TODA); - err = request_irq(timer_irq, pcap_rtc_irq, 0, "RTC Timer", pcap_rtc); + err = devm_request_irq(&pdev->dev, timer_irq, pcap_rtc_irq, 0, + "RTC Timer", pcap_rtc); if (err) - goto fail_timer; + goto fail; - err = request_irq(alarm_irq, pcap_rtc_irq, 0, "RTC Alarm", pcap_rtc); + err = devm_request_irq(&pdev->dev, alarm_irq, pcap_rtc_irq, 0, + "RTC Alarm", pcap_rtc); if (err) - goto fail_alarm; + goto fail; return 0; -fail_alarm: - free_irq(timer_irq, pcap_rtc); -fail_timer: - rtc_device_unregister(pcap_rtc->rtc); -fail_rtc: +fail: platform_set_drvdata(pdev, NULL); - kfree(pcap_rtc); return err; } -static int pcap_rtc_remove(struct platform_device *pdev) +static int __exit pcap_rtc_remove(struct platform_device *pdev) { - struct pcap_rtc *pcap_rtc = platform_get_drvdata(pdev); - - free_irq(pcap_to_irq(pcap_rtc->pcap, PCAP_IRQ_1HZ), pcap_rtc); - free_irq(pcap_to_irq(pcap_rtc->pcap, PCAP_IRQ_TODA), pcap_rtc); - rtc_device_unregister(pcap_rtc->rtc); - kfree(pcap_rtc); - return 0; } static struct platform_driver pcap_rtc_driver = { - .remove = pcap_rtc_remove, + .remove = __exit_p(pcap_rtc_remove), .driver = { .name = "pcap-rtc", .owner = THIS_MODULE, }, }; -static int __init rtc_pcap_init(void) -{ - return platform_driver_probe(&pcap_rtc_driver, pcap_rtc_probe); -} - -static void __exit rtc_pcap_exit(void) -{ - platform_driver_unregister(&pcap_rtc_driver); -} - -module_init(rtc_pcap_init); -module_exit(rtc_pcap_exit); +module_platform_driver_probe(pcap_rtc_driver, pcap_rtc_probe); MODULE_DESCRIPTION("Motorola pcap rtc driver"); MODULE_AUTHOR("guiming zhuo <gmzhuo@gmail.com>"); diff --git a/drivers/rtc/rtc-pcf2123.c b/drivers/rtc/rtc-pcf2123.c index 02b742afa761..6a3f329c3df4 100644 --- a/drivers/rtc/rtc-pcf2123.c +++ b/drivers/rtc/rtc-pcf2123.c @@ -226,7 +226,8 @@ static int pcf2123_probe(struct spi_device *spi) u8 txbuf[2], rxbuf[2]; int ret, i; - pdata = kzalloc(sizeof(struct pcf2123_plat_data), GFP_KERNEL); + pdata = devm_kzalloc(&spi->dev, sizeof(struct pcf2123_plat_data), + GFP_KERNEL); if (!pdata) return -ENOMEM; spi->dev.platform_data = pdata; @@ -281,7 +282,7 @@ static int pcf2123_probe(struct spi_device *spi) pcf2123_delay_trec(); /* Finalize the initialization */ - rtc = rtc_device_register(pcf2123_driver.driver.name, &spi->dev, + rtc = devm_rtc_device_register(&spi->dev, pcf2123_driver.driver.name, &pcf2123_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) { @@ -314,7 +315,6 @@ sysfs_exit: device_remove_file(&spi->dev, &pdata->regs[i].attr); kfree_exit: - kfree(pdata); spi->dev.platform_data = NULL; return ret; } @@ -325,15 +325,10 @@ static int pcf2123_remove(struct spi_device *spi) int i; if (pdata) { - struct rtc_device *rtc = pdata->rtc; - - if (rtc) - rtc_device_unregister(rtc); for (i = 0; i < 16; i++) if (pdata->regs[i].name[0]) device_remove_file(&spi->dev, &pdata->regs[i].attr); - kfree(pdata); } return 0; diff --git a/drivers/rtc/rtc-pcf50633.c b/drivers/rtc/rtc-pcf50633.c index e9f3135d305f..e6b6911c8e05 100644 --- a/drivers/rtc/rtc-pcf50633.c +++ b/drivers/rtc/rtc-pcf50633.c @@ -252,20 +252,17 @@ static int pcf50633_rtc_probe(struct platform_device *pdev) { struct pcf50633_rtc *rtc; - rtc = kzalloc(sizeof(*rtc), GFP_KERNEL); + rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL); if (!rtc) return -ENOMEM; rtc->pcf = dev_to_pcf50633(pdev->dev.parent); platform_set_drvdata(pdev, rtc); - rtc->rtc_dev = rtc_device_register("pcf50633-rtc", &pdev->dev, + rtc->rtc_dev = devm_rtc_device_register(&pdev->dev, "pcf50633-rtc", &pcf50633_rtc_ops, THIS_MODULE); - if (IS_ERR(rtc->rtc_dev)) { - int ret = PTR_ERR(rtc->rtc_dev); - kfree(rtc); - return ret; - } + if (IS_ERR(rtc->rtc_dev)) + return PTR_ERR(rtc->rtc_dev); pcf50633_register_irq(rtc->pcf, PCF50633_IRQ_ALARM, pcf50633_rtc_irq, rtc); @@ -277,12 +274,8 @@ static int pcf50633_rtc_remove(struct platform_device *pdev) struct pcf50633_rtc *rtc; rtc = platform_get_drvdata(pdev); - pcf50633_free_irq(rtc->pcf, PCF50633_IRQ_ALARM); - rtc_device_unregister(rtc->rtc_dev); - kfree(rtc); - return 0; } diff --git a/drivers/rtc/rtc-pcf8523.c b/drivers/rtc/rtc-pcf8523.c index 889e3160e701..305c9515e5bb 100644 --- a/drivers/rtc/rtc-pcf8523.c +++ b/drivers/rtc/rtc-pcf8523.c @@ -307,7 +307,7 @@ static int pcf8523_probe(struct i2c_client *client, if (err < 0) return err; - pcf->rtc = rtc_device_register(DRIVER_NAME, &client->dev, + pcf->rtc = devm_rtc_device_register(&client->dev, DRIVER_NAME, &pcf8523_rtc_ops, THIS_MODULE); if (IS_ERR(pcf->rtc)) return PTR_ERR(pcf->rtc); @@ -319,10 +319,6 @@ static int pcf8523_probe(struct i2c_client *client, static int pcf8523_remove(struct i2c_client *client) { - struct pcf8523 *pcf = i2c_get_clientdata(client); - - rtc_device_unregister(pcf->rtc); - return 0; } diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c index f7daf18a112e..97b354a26a44 100644 --- a/drivers/rtc/rtc-pcf8563.c +++ b/drivers/rtc/rtc-pcf8563.c @@ -245,14 +245,13 @@ static int pcf8563_probe(struct i2c_client *client, { struct pcf8563 *pcf8563; - int err = 0; - dev_dbg(&client->dev, "%s\n", __func__); if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) return -ENODEV; - pcf8563 = kzalloc(sizeof(struct pcf8563), GFP_KERNEL); + pcf8563 = devm_kzalloc(&client->dev, sizeof(struct pcf8563), + GFP_KERNEL); if (!pcf8563) return -ENOMEM; @@ -260,31 +259,18 @@ static int pcf8563_probe(struct i2c_client *client, i2c_set_clientdata(client, pcf8563); - pcf8563->rtc = rtc_device_register(pcf8563_driver.driver.name, - &client->dev, &pcf8563_rtc_ops, THIS_MODULE); + pcf8563->rtc = devm_rtc_device_register(&client->dev, + pcf8563_driver.driver.name, + &pcf8563_rtc_ops, THIS_MODULE); - if (IS_ERR(pcf8563->rtc)) { - err = PTR_ERR(pcf8563->rtc); - goto exit_kfree; - } + if (IS_ERR(pcf8563->rtc)) + return PTR_ERR(pcf8563->rtc); return 0; - -exit_kfree: - kfree(pcf8563); - - return err; } static int pcf8563_remove(struct i2c_client *client) { - struct pcf8563 *pcf8563 = i2c_get_clientdata(client); - - if (pcf8563->rtc) - rtc_device_unregister(pcf8563->rtc); - - kfree(pcf8563); - return 0; } diff --git a/drivers/rtc/rtc-pcf8583.c b/drivers/rtc/rtc-pcf8583.c index 5f97c61247d5..95886dcf4a39 100644 --- a/drivers/rtc/rtc-pcf8583.c +++ b/drivers/rtc/rtc-pcf8583.c @@ -268,39 +268,29 @@ static int pcf8583_probe(struct i2c_client *client, const struct i2c_device_id *id) { struct pcf8583 *pcf8583; - int err; if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) return -ENODEV; - pcf8583 = kzalloc(sizeof(struct pcf8583), GFP_KERNEL); + pcf8583 = devm_kzalloc(&client->dev, sizeof(struct pcf8583), + GFP_KERNEL); if (!pcf8583) return -ENOMEM; i2c_set_clientdata(client, pcf8583); - pcf8583->rtc = rtc_device_register(pcf8583_driver.driver.name, - &client->dev, &pcf8583_rtc_ops, THIS_MODULE); + pcf8583->rtc = devm_rtc_device_register(&client->dev, + pcf8583_driver.driver.name, + &pcf8583_rtc_ops, THIS_MODULE); - if (IS_ERR(pcf8583->rtc)) { - err = PTR_ERR(pcf8583->rtc); - goto exit_kfree; - } + if (IS_ERR(pcf8583->rtc)) + return PTR_ERR(pcf8583->rtc); return 0; - -exit_kfree: - kfree(pcf8583); - return err; } static int pcf8583_remove(struct i2c_client *client) { - struct pcf8583 *pcf8583 = i2c_get_clientdata(client); - - if (pcf8583->rtc) - rtc_device_unregister(pcf8583->rtc); - kfree(pcf8583); return 0; } diff --git a/drivers/rtc/rtc-ps3.c b/drivers/rtc/rtc-ps3.c index 968133ce1ee8..4bb825bb5804 100644 --- a/drivers/rtc/rtc-ps3.c +++ b/drivers/rtc/rtc-ps3.c @@ -62,7 +62,7 @@ static int __init ps3_rtc_probe(struct platform_device *dev) { struct rtc_device *rtc; - rtc = rtc_device_register("rtc-ps3", &dev->dev, &ps3_rtc_ops, + rtc = devm_rtc_device_register(&dev->dev, "rtc-ps3", &ps3_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) return PTR_ERR(rtc); @@ -73,7 +73,6 @@ static int __init ps3_rtc_probe(struct platform_device *dev) static int __exit ps3_rtc_remove(struct platform_device *dev) { - rtc_device_unregister(platform_get_drvdata(dev)); return 0; } @@ -85,18 +84,7 @@ static struct platform_driver ps3_rtc_driver = { .remove = __exit_p(ps3_rtc_remove), }; -static int __init ps3_rtc_init(void) -{ - return platform_driver_probe(&ps3_rtc_driver, ps3_rtc_probe); -} - -static void __exit ps3_rtc_fini(void) -{ - platform_driver_unregister(&ps3_rtc_driver); -} - -module_init(ps3_rtc_init); -module_exit(ps3_rtc_fini); +module_platform_driver_probe(ps3_rtc_driver, ps3_rtc_probe); MODULE_AUTHOR("Sony Corporation"); MODULE_LICENSE("GPL"); diff --git a/drivers/rtc/rtc-puv3.c b/drivers/rtc/rtc-puv3.c index 0407e13d4de4..72f437170d2e 100644 --- a/drivers/rtc/rtc-puv3.c +++ b/drivers/rtc/rtc-puv3.c @@ -207,14 +207,14 @@ static const struct rtc_class_ops puv3_rtcops = { .proc = puv3_rtc_proc, }; -static void puv3_rtc_enable(struct platform_device *pdev, int en) +static void puv3_rtc_enable(struct device *dev, int en) { if (!en) { writel(readl(RTC_RTSR) & ~RTC_RTSR_HZE, RTC_RTSR); } else { /* re-enable the device, and check it is ok */ if ((readl(RTC_RTSR) & RTC_RTSR_HZE) == 0) { - dev_info(&pdev->dev, "rtc disabled, re-enabling\n"); + dev_info(dev, "rtc disabled, re-enabling\n"); writel(readl(RTC_RTSR) | RTC_RTSR_HZE, RTC_RTSR); } } @@ -276,7 +276,7 @@ static int puv3_rtc_probe(struct platform_device *pdev) goto err_nores; } - puv3_rtc_enable(pdev, 1); + puv3_rtc_enable(&pdev->dev, 1); /* register RTC and exit */ rtc = rtc_device_register("pkunity", &pdev->dev, &puv3_rtcops, @@ -296,44 +296,41 @@ static int puv3_rtc_probe(struct platform_device *pdev) return 0; err_nortc: - puv3_rtc_enable(pdev, 0); + puv3_rtc_enable(&pdev->dev, 0); release_resource(puv3_rtc_mem); err_nores: return ret; } -#ifdef CONFIG_PM - +#ifdef CONFIG_PM_SLEEP static int ticnt_save; -static int puv3_rtc_suspend(struct platform_device *pdev, pm_message_t state) +static int puv3_rtc_suspend(struct device *dev) { /* save RTAR for anyone using periodic interrupts */ ticnt_save = readl(RTC_RTAR); - puv3_rtc_enable(pdev, 0); + puv3_rtc_enable(dev, 0); return 0; } -static int puv3_rtc_resume(struct platform_device *pdev) +static int puv3_rtc_resume(struct device *dev) { - puv3_rtc_enable(pdev, 1); + puv3_rtc_enable(dev, 1); writel(ticnt_save, RTC_RTAR); return 0; } -#else -#define puv3_rtc_suspend NULL -#define puv3_rtc_resume NULL #endif +static SIMPLE_DEV_PM_OPS(puv3_rtc_pm_ops, puv3_rtc_suspend, puv3_rtc_resume); + static struct platform_driver puv3_rtc_driver = { .probe = puv3_rtc_probe, .remove = puv3_rtc_remove, - .suspend = puv3_rtc_suspend, - .resume = puv3_rtc_resume, .driver = { .name = "PKUnity-v3-RTC", .owner = THIS_MODULE, + .pm = &puv3_rtc_pm_ops, } }; diff --git a/drivers/rtc/rtc-pxa.c b/drivers/rtc/rtc-pxa.c index 03c85ee719a7..a2073eb968a2 100644 --- a/drivers/rtc/rtc-pxa.c +++ b/drivers/rtc/rtc-pxa.c @@ -19,6 +19,7 @@ * */ +#include <linux/delay.h> #include <linux/init.h> #include <linux/platform_device.h> #include <linux/module.h> @@ -80,22 +81,29 @@ #define RYAR1 0x1c #define RTCPICR 0x34 #define PIAR 0x38 +#define PSBR_RTC 0x00 #define rtc_readl(pxa_rtc, reg) \ __raw_readl((pxa_rtc)->base + (reg)) #define rtc_writel(pxa_rtc, reg, value) \ __raw_writel((value), (pxa_rtc)->base + (reg)) +#define rtc_readl_psbr(pxa_rtc, reg) \ + __raw_readl((pxa_rtc)->base_psbr + (reg)) +#define rtc_writel_psbr(pxa_rtc, reg, value) \ + __raw_writel((value), (pxa_rtc)->base_psbr + (reg)) struct pxa_rtc { struct resource *ress; + struct resource *ress_psbr; void __iomem *base; + void __iomem *base_psbr; int irq_1Hz; int irq_Alrm; struct rtc_device *rtc; spinlock_t lock; /* Protects this structure */ }; - +static struct pxa_rtc *rtc_info; static u32 ryxr_calc(struct rtc_time *tm) { return ((tm->tm_year + 1900) << RYxR_YEAR_S) @@ -117,7 +125,7 @@ static void tm_calc(u32 rycr, u32 rdcr, struct rtc_time *tm) tm->tm_year = ((rycr & RYxR_YEAR_MASK) >> RYxR_YEAR_S) - 1900; tm->tm_mon = (((rycr & RYxR_MONTH_MASK) >> RYxR_MONTH_S)) - 1; tm->tm_mday = (rycr & RYxR_DAY_MASK); - tm->tm_wday = ((rycr & RDxR_DOW_MASK) >> RDxR_DOW_S) - 1; + tm->tm_wday = ((rdcr & RDxR_DOW_MASK) >> RDxR_DOW_S) - 1; tm->tm_hour = (rdcr & RDxR_HOUR_MASK) >> RDxR_HOUR_S; tm->tm_min = (rdcr & RDxR_MIN_MASK) >> RDxR_MIN_S; tm->tm_sec = rdcr & RDxR_SEC_MASK; @@ -175,7 +183,6 @@ static irqreturn_t pxa_rtc_irq(int irq, void *dev_id) /* enable back rtc interrupts */ rtc_writel(pxa_rtc, RTSR, rtsr & ~RTSR_TRIG_MASK); - spin_unlock(&pxa_rtc->lock); return IRQ_HANDLED; } @@ -250,12 +257,45 @@ static int pxa_rtc_read_time(struct device *dev, struct rtc_time *tm) static int pxa_rtc_set_time(struct device *dev, struct rtc_time *tm) { struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev); - + /* sequence to wirte pxa rtc register RCNR RDCR RYCR is + *1. set PSBR[RWE] bit, take 2x32-khz to complete + *2. write to RTC register,take 2x32-khz to complete + *3. clear PSBR[RWE] bit,take 2x32-khz to complete + */ + if ((tm->tm_year < 70) || (tm->tm_year > 138)) + return -EINVAL; + rtc_writel_psbr(rtc_info, PSBR_RTC, 0x01); + udelay(100); rtc_writel(pxa_rtc, RYCR, ryxr_calc(tm)); rtc_writel(pxa_rtc, RDCR, rdxr_calc(tm)); + udelay(100); + rtc_writel_psbr(rtc_info, PSBR_RTC, 0x00); + udelay(100); + pxa_rtc_read_time(dev, tm); + dev_info(dev, "tm.year = %d, tm.month = %d, tm.day = %d\n", + tm->tm_year + 1900, tm->tm_mon, tm->tm_mday); + return 0; +} +int pxa_rtc_sync_time(unsigned int ticks) +{ + /* sequence to wirte pxa rtc register RCNR RDCR RYCR is + *1. set PSBR[RWE] bit, take 2x32-khz to complete + *2. write to RTC register,take 2x32-khz to complete + *3. clear PSBR[RWE] bit,take 2x32-khz to complete + */ + struct rtc_time tm; + rtc_time_to_tm(ticks, &tm); + rtc_writel_psbr(rtc_info, PSBR_RTC, 0x01); + udelay(100); + rtc_writel(rtc_info, RYCR, ryxr_calc(&tm)); + rtc_writel(rtc_info, RDCR, rdxr_calc(&tm)); + udelay(100); + rtc_writel_psbr(rtc_info, PSBR_RTC, 0x00); + udelay(100); return 0; } +EXPORT_SYMBOL(pxa_rtc_sync_time); static int pxa_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) { @@ -324,10 +364,10 @@ static int __init pxa_rtc_probe(struct platform_device *pdev) int ret; u32 rttr; - pxa_rtc = kzalloc(sizeof(struct pxa_rtc), GFP_KERNEL); + pxa_rtc = devm_kzalloc(dev, sizeof(struct pxa_rtc), GFP_KERNEL); if (!pxa_rtc) return -ENOMEM; - + rtc_info = pxa_rtc; spin_lock_init(&pxa_rtc->lock); platform_set_drvdata(pdev, pxa_rtc); @@ -335,28 +375,38 @@ static int __init pxa_rtc_probe(struct platform_device *pdev) pxa_rtc->ress = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!pxa_rtc->ress) { dev_err(dev, "No I/O memory resource defined\n"); - goto err_ress; + return ret; + } + pxa_rtc->ress_psbr = platform_get_resource(pdev, IORESOURCE_MEM, 1); + if (!pxa_rtc->ress_psbr) { + dev_err(dev, "No I/O memory resource defined\n"); + return ret; } pxa_rtc->irq_1Hz = platform_get_irq(pdev, 0); if (pxa_rtc->irq_1Hz < 0) { dev_err(dev, "No 1Hz IRQ resource defined\n"); - goto err_ress; + return ret; } pxa_rtc->irq_Alrm = platform_get_irq(pdev, 1); if (pxa_rtc->irq_Alrm < 0) { dev_err(dev, "No alarm IRQ resource defined\n"); - goto err_ress; + return ret; } - pxa_rtc_open(dev); ret = -ENOMEM; - pxa_rtc->base = ioremap(pxa_rtc->ress->start, + pxa_rtc->base = devm_ioremap(&pdev->dev, pxa_rtc->ress->start, resource_size(pxa_rtc->ress)); if (!pxa_rtc->base) { dev_err(&pdev->dev, "Unable to map pxa RTC I/O memory\n"); - goto err_map; + return ret; } + pxa_rtc->base_psbr = devm_ioremap(&pdev->dev, pxa_rtc->ress_psbr->start, + resource_size(pxa_rtc->ress_psbr)); + if (!pxa_rtc->base_psbr) { + dev_err(&pdev->dev, "Unable to map pxa RTC PSBR I/O memory\n"); + return ret; + } /* * If the clock divider is uninitialized then reset it to the * default value to get the 1Hz clock. @@ -370,41 +420,24 @@ static int __init pxa_rtc_probe(struct platform_device *pdev) rtsr_clear_bits(pxa_rtc, RTSR_PIALE | RTSR_RDALE1 | RTSR_HZE); - pxa_rtc->rtc = rtc_device_register("pxa-rtc", &pdev->dev, &pxa_rtc_ops, - THIS_MODULE); + pxa_rtc->rtc = devm_rtc_device_register(&pdev->dev, "pxa-rtc", + &pxa_rtc_ops, THIS_MODULE); ret = PTR_ERR(pxa_rtc->rtc); if (IS_ERR(pxa_rtc->rtc)) { dev_err(dev, "Failed to register RTC device -> %d\n", ret); - goto err_rtc_reg; + return ret; } device_init_wakeup(dev, 1); - + pxa_rtc_open(dev); return 0; - -err_rtc_reg: - iounmap(pxa_rtc->base); -err_ress: -err_map: - kfree(pxa_rtc); - return ret; } static int __exit pxa_rtc_remove(struct platform_device *pdev) { - struct pxa_rtc *pxa_rtc = platform_get_drvdata(pdev); - struct device *dev = &pdev->dev; pxa_rtc_release(dev); - rtc_device_unregister(pxa_rtc->rtc); - - spin_lock_irq(&pxa_rtc->lock); - iounmap(pxa_rtc->base); - spin_unlock_irq(&pxa_rtc->lock); - - kfree(pxa_rtc); - return 0; } @@ -416,7 +449,7 @@ static struct of_device_id pxa_rtc_dt_ids[] = { MODULE_DEVICE_TABLE(of, pxa_rtc_dt_ids); #endif -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP static int pxa_rtc_suspend(struct device *dev) { struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev); @@ -434,36 +467,20 @@ static int pxa_rtc_resume(struct device *dev) disable_irq_wake(pxa_rtc->irq_Alrm); return 0; } - -static const struct dev_pm_ops pxa_rtc_pm_ops = { - .suspend = pxa_rtc_suspend, - .resume = pxa_rtc_resume, -}; #endif +static SIMPLE_DEV_PM_OPS(pxa_rtc_pm_ops, pxa_rtc_suspend, pxa_rtc_resume); + static struct platform_driver pxa_rtc_driver = { .remove = __exit_p(pxa_rtc_remove), .driver = { .name = "pxa-rtc", .of_match_table = of_match_ptr(pxa_rtc_dt_ids), -#ifdef CONFIG_PM .pm = &pxa_rtc_pm_ops, -#endif }, }; -static int __init pxa_rtc_init(void) -{ - return platform_driver_probe(&pxa_rtc_driver, pxa_rtc_probe); -} - -static void __exit pxa_rtc_exit(void) -{ - platform_driver_unregister(&pxa_rtc_driver); -} - -module_init(pxa_rtc_init); -module_exit(pxa_rtc_exit); +module_platform_driver_probe(pxa_rtc_driver, pxa_rtc_probe); MODULE_AUTHOR("Robert Jarzmik <robert.jarzmik@free.fr>"); MODULE_DESCRIPTION("PXA27x/PXA3xx Realtime Clock Driver (RTC)"); diff --git a/drivers/rtc/rtc-r9701.c b/drivers/rtc/rtc-r9701.c index 7726f4a4f2d0..feeedbd82000 100644 --- a/drivers/rtc/rtc-r9701.c +++ b/drivers/rtc/rtc-r9701.c @@ -154,21 +154,18 @@ static int r9701_probe(struct spi_device *spi) } } - rtc = rtc_device_register("r9701", - &spi->dev, &r9701_rtc_ops, THIS_MODULE); + rtc = devm_rtc_device_register(&spi->dev, "r9701", + &r9701_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) return PTR_ERR(rtc); - dev_set_drvdata(&spi->dev, rtc); + spi_set_drvdata(spi, rtc); return 0; } static int r9701_remove(struct spi_device *spi) { - struct rtc_device *rtc = dev_get_drvdata(&spi->dev); - - rtc_device_unregister(rtc); return 0; } diff --git a/drivers/rtc/rtc-rc5t583.c b/drivers/rtc/rtc-rc5t583.c index eb3194d664a8..8eabcf51b35a 100644 --- a/drivers/rtc/rtc-rc5t583.c +++ b/drivers/rtc/rtc-rc5t583.c @@ -256,7 +256,7 @@ static int rc5t583_rtc_probe(struct platform_device *pdev) } device_init_wakeup(&pdev->dev, 1); - ricoh_rtc->rtc = rtc_device_register(pdev->name, &pdev->dev, + ricoh_rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &rc5t583_rtc_ops, THIS_MODULE); if (IS_ERR(ricoh_rtc->rtc)) { ret = PTR_ERR(ricoh_rtc->rtc); @@ -276,13 +276,10 @@ static int rc5t583_rtc_remove(struct platform_device *pdev) struct rc5t583_rtc *rc5t583_rtc = dev_get_drvdata(&pdev->dev); rc5t583_rtc_alarm_irq_enable(&rc5t583_rtc->rtc->dev, 0); - - rtc_device_unregister(rc5t583_rtc->rtc); return 0; } #ifdef CONFIG_PM_SLEEP - static int rc5t583_rtc_suspend(struct device *dev) { struct rc5t583 *rc5t583 = dev_get_drvdata(dev->parent); @@ -304,24 +301,18 @@ static int rc5t583_rtc_resume(struct device *dev) return regmap_write(rc5t583->regmap, RC5T583_RTC_CTL1, rc5t583_rtc->irqen); } - -static const struct dev_pm_ops rc5t583_rtc_pm_ops = { - .suspend = rc5t583_rtc_suspend, - .resume = rc5t583_rtc_resume, -}; - -#define DEV_PM_OPS (&rc5t583_rtc_pm_ops) -#else -#define DEV_PM_OPS NULL #endif +static SIMPLE_DEV_PM_OPS(rc5t583_rtc_pm_ops, rc5t583_rtc_suspend, + rc5t583_rtc_resume); + static struct platform_driver rc5t583_rtc_driver = { .probe = rc5t583_rtc_probe, .remove = rc5t583_rtc_remove, .driver = { .owner = THIS_MODULE, .name = "rtc-rc5t583", - .pm = DEV_PM_OPS, + .pm = &rc5t583_rtc_pm_ops, }, }; diff --git a/drivers/rtc/rtc-rp5c01.c b/drivers/rtc/rtc-rp5c01.c index 359da6d020b9..873c689f01c3 100644 --- a/drivers/rtc/rtc-rp5c01.c +++ b/drivers/rtc/rtc-rp5c01.c @@ -230,15 +230,13 @@ static int __init rp5c01_rtc_probe(struct platform_device *dev) if (!res) return -ENODEV; - priv = kzalloc(sizeof(*priv), GFP_KERNEL); + priv = devm_kzalloc(&dev->dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; - priv->regs = ioremap(res->start, resource_size(res)); - if (!priv->regs) { - error = -ENOMEM; - goto out_free_priv; - } + priv->regs = devm_ioremap(&dev->dev, res->start, resource_size(res)); + if (!priv->regs) + return -ENOMEM; sysfs_bin_attr_init(&priv->nvram_attr); priv->nvram_attr.attr.name = "nvram"; @@ -251,27 +249,22 @@ static int __init rp5c01_rtc_probe(struct platform_device *dev) platform_set_drvdata(dev, priv); - rtc = rtc_device_register("rtc-rp5c01", &dev->dev, &rp5c01_rtc_ops, + rtc = devm_rtc_device_register(&dev->dev, "rtc-rp5c01", &rp5c01_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) { error = PTR_ERR(rtc); - goto out_unmap; + goto out; } priv->rtc = rtc; error = sysfs_create_bin_file(&dev->dev.kobj, &priv->nvram_attr); if (error) - goto out_unregister; + goto out; return 0; -out_unregister: - rtc_device_unregister(rtc); -out_unmap: +out: platform_set_drvdata(dev, NULL); - iounmap(priv->regs); -out_free_priv: - kfree(priv); return error; } @@ -280,9 +273,6 @@ static int __exit rp5c01_rtc_remove(struct platform_device *dev) struct rp5c01_priv *priv = platform_get_drvdata(dev); sysfs_remove_bin_file(&dev->dev.kobj, &priv->nvram_attr); - rtc_device_unregister(priv->rtc); - iounmap(priv->regs); - kfree(priv); return 0; } @@ -294,18 +284,7 @@ static struct platform_driver rp5c01_rtc_driver = { .remove = __exit_p(rp5c01_rtc_remove), }; -static int __init rp5c01_rtc_init(void) -{ - return platform_driver_probe(&rp5c01_rtc_driver, rp5c01_rtc_probe); -} - -static void __exit rp5c01_rtc_fini(void) -{ - platform_driver_unregister(&rp5c01_rtc_driver); -} - -module_init(rp5c01_rtc_init); -module_exit(rp5c01_rtc_fini); +module_platform_driver_probe(rp5c01_rtc_driver, rp5c01_rtc_probe); MODULE_AUTHOR("Geert Uytterhoeven <geert@linux-m68k.org>"); MODULE_LICENSE("GPL"); diff --git a/drivers/rtc/rtc-rs5c313.c b/drivers/rtc/rtc-rs5c313.c index d98ea5b759c8..8089fc63e403 100644 --- a/drivers/rtc/rtc-rs5c313.c +++ b/drivers/rtc/rtc-rs5c313.c @@ -367,7 +367,7 @@ static const struct rtc_class_ops rs5c313_rtc_ops = { static int rs5c313_rtc_probe(struct platform_device *pdev) { - struct rtc_device *rtc = rtc_device_register("rs5c313", &pdev->dev, + struct rtc_device *rtc = devm_rtc_device_register(&pdev->dev, "rs5c313", &rs5c313_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) @@ -380,10 +380,6 @@ static int rs5c313_rtc_probe(struct platform_device *pdev) static int rs5c313_rtc_remove(struct platform_device *pdev) { - struct rtc_device *rtc = platform_get_drvdata( pdev ); - - rtc_device_unregister(rtc); - return 0; } diff --git a/drivers/rtc/rtc-rs5c348.c b/drivers/rtc/rtc-rs5c348.c index 72ef10be8662..2c37df3586c7 100644 --- a/drivers/rtc/rtc-rs5c348.c +++ b/drivers/rtc/rtc-rs5c348.c @@ -158,7 +158,8 @@ static int rs5c348_probe(struct spi_device *spi) struct rtc_device *rtc; struct rs5c348_plat_data *pdata; - pdata = kzalloc(sizeof(struct rs5c348_plat_data), GFP_KERNEL); + pdata = devm_kzalloc(&spi->dev, sizeof(struct rs5c348_plat_data), + GFP_KERNEL); if (!pdata) return -ENOMEM; spi->dev.platform_data = pdata; @@ -202,7 +203,7 @@ static int rs5c348_probe(struct spi_device *spi) if (ret & RS5C348_BIT_24H) pdata->rtc_24h = 1; - rtc = rtc_device_register(rs5c348_driver.driver.name, &spi->dev, + rtc = devm_rtc_device_register(&spi->dev, rs5c348_driver.driver.name, &rs5c348_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) { @@ -214,18 +215,11 @@ static int rs5c348_probe(struct spi_device *spi) return 0; kfree_exit: - kfree(pdata); return ret; } static int rs5c348_remove(struct spi_device *spi) { - struct rs5c348_plat_data *pdata = spi->dev.platform_data; - struct rtc_device *rtc = pdata->rtc; - - if (rtc) - rtc_device_unregister(rtc); - kfree(pdata); return 0; } diff --git a/drivers/rtc/rtc-rs5c372.c b/drivers/rtc/rtc-rs5c372.c index 581739f40097..224d634322b4 100644 --- a/drivers/rtc/rtc-rs5c372.c +++ b/drivers/rtc/rtc-rs5c372.c @@ -579,7 +579,9 @@ static int rs5c372_probe(struct i2c_client *client, } } - if (!(rs5c372 = kzalloc(sizeof(struct rs5c372), GFP_KERNEL))) { + rs5c372 = devm_kzalloc(&client->dev, sizeof(struct rs5c372), + GFP_KERNEL); + if (!rs5c372) { err = -ENOMEM; goto exit; } @@ -594,7 +596,7 @@ static int rs5c372_probe(struct i2c_client *client, err = rs5c_get_regs(rs5c372); if (err < 0) - goto exit_kfree; + goto exit; /* clock may be set for am/pm or 24 hr time */ switch (rs5c372->type) { @@ -617,7 +619,7 @@ static int rs5c372_probe(struct i2c_client *client, break; default: dev_err(&client->dev, "unknown RTC type\n"); - goto exit_kfree; + goto exit; } /* if the oscillator lost power and no other software (like @@ -629,7 +631,7 @@ static int rs5c372_probe(struct i2c_client *client, err = rs5c_oscillator_setup(rs5c372); if (unlikely(err < 0)) { dev_err(&client->dev, "setup error\n"); - goto exit_kfree; + goto exit; } if (rs5c372_get_datetime(client, &tm) < 0) @@ -648,38 +650,28 @@ static int rs5c372_probe(struct i2c_client *client, ); /* REVISIT use client->irq to register alarm irq ... */ - - rs5c372->rtc = rtc_device_register(rs5c372_driver.driver.name, - &client->dev, &rs5c372_rtc_ops, THIS_MODULE); + rs5c372->rtc = devm_rtc_device_register(&client->dev, + rs5c372_driver.driver.name, + &rs5c372_rtc_ops, THIS_MODULE); if (IS_ERR(rs5c372->rtc)) { err = PTR_ERR(rs5c372->rtc); - goto exit_kfree; + goto exit; } err = rs5c_sysfs_register(&client->dev); if (err) - goto exit_devreg; + goto exit; return 0; -exit_devreg: - rtc_device_unregister(rs5c372->rtc); - -exit_kfree: - kfree(rs5c372); - exit: return err; } static int rs5c372_remove(struct i2c_client *client) { - struct rs5c372 *rs5c372 = i2c_get_clientdata(client); - - rtc_device_unregister(rs5c372->rtc); rs5c_sysfs_unregister(&client->dev); - kfree(rs5c372); return 0; } diff --git a/drivers/rtc/rtc-rv3029c2.c b/drivers/rtc/rtc-rv3029c2.c index f8ee8ad7825e..5032c24ec159 100644 --- a/drivers/rtc/rtc-rv3029c2.c +++ b/drivers/rtc/rtc-rv3029c2.c @@ -395,9 +395,8 @@ static int rv3029c2_probe(struct i2c_client *client, if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_EMUL)) return -ENODEV; - rtc = rtc_device_register(client->name, - &client->dev, &rv3029c2_rtc_ops, - THIS_MODULE); + rtc = devm_rtc_device_register(&client->dev, client->name, + &rv3029c2_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) return PTR_ERR(rtc); @@ -407,23 +406,14 @@ static int rv3029c2_probe(struct i2c_client *client, rc = rv3029c2_i2c_get_sr(client, buf); if (rc < 0) { dev_err(&client->dev, "reading status failed\n"); - goto exit_unregister; + return rc; } return 0; - -exit_unregister: - rtc_device_unregister(rtc); - - return rc; } static int rv3029c2_remove(struct i2c_client *client) { - struct rtc_device *rtc = i2c_get_clientdata(client); - - rtc_device_unregister(rtc); - return 0; } diff --git a/drivers/rtc/rtc-rx4581.c b/drivers/rtc/rtc-rx4581.c index 599ec73ec886..84eb08d65d30 100644 --- a/drivers/rtc/rtc-rx4581.c +++ b/drivers/rtc/rtc-rx4581.c @@ -273,20 +273,17 @@ static int rx4581_probe(struct spi_device *spi) if (res != 0) return res; - rtc = rtc_device_register("rx4581", - &spi->dev, &rx4581_rtc_ops, THIS_MODULE); + rtc = devm_rtc_device_register(&spi->dev, "rx4581", + &rx4581_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) return PTR_ERR(rtc); - dev_set_drvdata(&spi->dev, rtc); + spi_set_drvdata(spi, rtc); return 0; } static int rx4581_remove(struct spi_device *spi) { - struct rtc_device *rtc = dev_get_drvdata(&spi->dev); - - rtc_device_unregister(rtc); return 0; } diff --git a/drivers/rtc/rtc-rx8581.c b/drivers/rtc/rtc-rx8581.c index b0c272658fa2..07f3037b18f4 100644 --- a/drivers/rtc/rtc-rx8581.c +++ b/drivers/rtc/rtc-rx8581.c @@ -240,8 +240,8 @@ static int rx8581_probe(struct i2c_client *client, dev_info(&client->dev, "chip found, driver version " DRV_VERSION "\n"); - rtc = rtc_device_register(rx8581_driver.driver.name, - &client->dev, &rx8581_rtc_ops, THIS_MODULE); + rtc = devm_rtc_device_register(&client->dev, rx8581_driver.driver.name, + &rx8581_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) return PTR_ERR(rtc); @@ -253,10 +253,6 @@ static int rx8581_probe(struct i2c_client *client, static int rx8581_remove(struct i2c_client *client) { - struct rtc_device *rtc = i2c_get_clientdata(client); - - rtc_device_unregister(rtc); - return 0; } diff --git a/drivers/rtc/rtc-s35390a.c b/drivers/rtc/rtc-s35390a.c index 8a092325188d..f40afdd0e5f5 100644 --- a/drivers/rtc/rtc-s35390a.c +++ b/drivers/rtc/rtc-s35390a.c @@ -338,7 +338,8 @@ static int s35390a_probe(struct i2c_client *client, goto exit; } - s35390a = kzalloc(sizeof(struct s35390a), GFP_KERNEL); + s35390a = devm_kzalloc(&client->dev, sizeof(struct s35390a), + GFP_KERNEL); if (!s35390a) { err = -ENOMEM; goto exit; @@ -386,8 +387,9 @@ static int s35390a_probe(struct i2c_client *client, device_set_wakeup_capable(&client->dev, 1); - s35390a->rtc = rtc_device_register(s35390a_driver.driver.name, - &client->dev, &s35390a_rtc_ops, THIS_MODULE); + s35390a->rtc = devm_rtc_device_register(&client->dev, + s35390a_driver.driver.name, + &s35390a_rtc_ops, THIS_MODULE); if (IS_ERR(s35390a->rtc)) { err = PTR_ERR(s35390a->rtc); @@ -399,7 +401,6 @@ exit_dummy: for (i = 1; i < 8; ++i) if (s35390a->client[i]) i2c_unregister_device(s35390a->client[i]); - kfree(s35390a); exit: return err; @@ -408,15 +409,12 @@ exit: static int s35390a_remove(struct i2c_client *client) { unsigned int i; - struct s35390a *s35390a = i2c_get_clientdata(client); + for (i = 1; i < 8; ++i) if (s35390a->client[i]) i2c_unregister_device(s35390a->client[i]); - rtc_device_unregister(s35390a->rtc); - kfree(s35390a); - return 0; } diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index fb994e9ddc15..8e96c00936be 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -51,7 +51,6 @@ static struct clk *rtc_clk; static void __iomem *s3c_rtc_base; static int s3c_rtc_alarmno = NO_IRQ; static int s3c_rtc_tickno = NO_IRQ; -static bool wake_en; static enum s3c_cpu_type s3c_rtc_cpu_type; static DEFINE_SPINLOCK(s3c_rtc_pie_lock); @@ -423,13 +422,11 @@ static void s3c_rtc_enable(struct platform_device *pdev, int en) static int s3c_rtc_remove(struct platform_device *dev) { - struct rtc_device *rtc = platform_get_drvdata(dev); - platform_set_drvdata(dev, NULL); - rtc_device_unregister(rtc); s3c_rtc_setaie(&dev->dev, 0); + clk_unprepare(rtc_clk); rtc_clk = NULL; return 0; @@ -498,7 +495,7 @@ static int s3c_rtc_probe(struct platform_device *pdev) return ret; } - clk_enable(rtc_clk); + clk_prepare_enable(rtc_clk); /* check to see if everything is setup correctly */ @@ -511,7 +508,7 @@ static int s3c_rtc_probe(struct platform_device *pdev) /* register RTC and exit */ - rtc = rtc_device_register("s3c", &pdev->dev, &s3c_rtcops, + rtc = devm_rtc_device_register(&pdev->dev, "s3c", &s3c_rtcops, THIS_MODULE); if (IS_ERR(rtc)) { @@ -574,23 +571,24 @@ static int s3c_rtc_probe(struct platform_device *pdev) err_alarm_irq: platform_set_drvdata(pdev, NULL); - rtc_device_unregister(rtc); err_nortc: s3c_rtc_enable(pdev, 0); - clk_disable(rtc_clk); + clk_disable_unprepare(rtc_clk); return ret; } -#ifdef CONFIG_PM - +#ifdef CONFIG_PM_SLEEP /* RTC Power management control */ static int ticnt_save, ticnt_en_save; +static bool wake_en; -static int s3c_rtc_suspend(struct platform_device *pdev, pm_message_t state) +static int s3c_rtc_suspend(struct device *dev) { + struct platform_device *pdev = to_platform_device(dev); + clk_enable(rtc_clk); /* save TICNT for anyone using periodic interrupts */ ticnt_save = readb(s3c_rtc_base + S3C2410_TICNT); @@ -600,19 +598,20 @@ static int s3c_rtc_suspend(struct platform_device *pdev, pm_message_t state) } s3c_rtc_enable(pdev, 0); - if (device_may_wakeup(&pdev->dev) && !wake_en) { + if (device_may_wakeup(dev) && !wake_en) { if (enable_irq_wake(s3c_rtc_alarmno) == 0) wake_en = true; else - dev_err(&pdev->dev, "enable_irq_wake failed\n"); + dev_err(dev, "enable_irq_wake failed\n"); } clk_disable(rtc_clk); return 0; } -static int s3c_rtc_resume(struct platform_device *pdev) +static int s3c_rtc_resume(struct device *dev) { + struct platform_device *pdev = to_platform_device(dev); unsigned int tmp; clk_enable(rtc_clk); @@ -623,7 +622,7 @@ static int s3c_rtc_resume(struct platform_device *pdev) writew(tmp | ticnt_en_save, s3c_rtc_base + S3C2410_RTCCON); } - if (device_may_wakeup(&pdev->dev) && wake_en) { + if (device_may_wakeup(dev) && wake_en) { disable_irq_wake(s3c_rtc_alarmno); wake_en = false; } @@ -631,11 +630,10 @@ static int s3c_rtc_resume(struct platform_device *pdev) return 0; } -#else -#define s3c_rtc_suspend NULL -#define s3c_rtc_resume NULL #endif +static SIMPLE_DEV_PM_OPS(s3c_rtc_pm_ops, s3c_rtc_suspend, s3c_rtc_resume); + #ifdef CONFIG_OF static struct s3c_rtc_drv_data s3c_rtc_drv_data_array[] = { [TYPE_S3C2410] = { TYPE_S3C2410 }, @@ -685,12 +683,11 @@ MODULE_DEVICE_TABLE(platform, s3c_rtc_driver_ids); static struct platform_driver s3c_rtc_driver = { .probe = s3c_rtc_probe, .remove = s3c_rtc_remove, - .suspend = s3c_rtc_suspend, - .resume = s3c_rtc_resume, .id_table = s3c_rtc_driver_ids, .driver = { .name = "s3c-rtc", .owner = THIS_MODULE, + .pm = &s3c_rtc_pm_ops, .of_match_table = of_match_ptr(s3c_rtc_dt_match), }, }; diff --git a/drivers/rtc/rtc-sa1100.c b/drivers/rtc/rtc-sa1100.c index 5ec5036df0bc..00605601dbf7 100644 --- a/drivers/rtc/rtc-sa1100.c +++ b/drivers/rtc/rtc-sa1100.c @@ -234,14 +234,13 @@ static int sa1100_rtc_probe(struct platform_device *pdev) if (irq_1hz < 0 || irq_alarm < 0) return -ENODEV; - info = kzalloc(sizeof(struct sa1100_rtc), GFP_KERNEL); + info = devm_kzalloc(&pdev->dev, sizeof(struct sa1100_rtc), GFP_KERNEL); if (!info) return -ENOMEM; - info->clk = clk_get(&pdev->dev, NULL); + info->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(info->clk)) { dev_err(&pdev->dev, "failed to find rtc clock source\n"); - ret = PTR_ERR(info->clk); - goto err_clk; + return PTR_ERR(info->clk); } info->irq_1hz = irq_1hz; info->irq_alarm = irq_alarm; @@ -268,8 +267,8 @@ static int sa1100_rtc_probe(struct platform_device *pdev) device_init_wakeup(&pdev->dev, 1); - rtc = rtc_device_register(pdev->name, &pdev->dev, &sa1100_rtc_ops, - THIS_MODULE); + rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &sa1100_rtc_ops, + THIS_MODULE); if (IS_ERR(rtc)) { ret = PTR_ERR(rtc); @@ -306,9 +305,6 @@ err_dev: clk_disable_unprepare(info->clk); err_enable_clk: platform_set_drvdata(pdev, NULL); - clk_put(info->clk); -err_clk: - kfree(info); return ret; } @@ -317,17 +313,14 @@ static int sa1100_rtc_remove(struct platform_device *pdev) struct sa1100_rtc *info = platform_get_drvdata(pdev); if (info) { - rtc_device_unregister(info->rtc); clk_disable_unprepare(info->clk); - clk_put(info->clk); platform_set_drvdata(pdev, NULL); - kfree(info); } return 0; } -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP static int sa1100_rtc_suspend(struct device *dev) { struct sa1100_rtc *info = dev_get_drvdata(dev); @@ -343,13 +336,11 @@ static int sa1100_rtc_resume(struct device *dev) disable_irq_wake(info->irq_alarm); return 0; } - -static const struct dev_pm_ops sa1100_rtc_pm_ops = { - .suspend = sa1100_rtc_suspend, - .resume = sa1100_rtc_resume, -}; #endif +static SIMPLE_DEV_PM_OPS(sa1100_rtc_pm_ops, sa1100_rtc_suspend, + sa1100_rtc_resume); + #ifdef CONFIG_OF static struct of_device_id sa1100_rtc_dt_ids[] = { { .compatible = "mrvl,sa1100-rtc", }, @@ -364,9 +355,7 @@ static struct platform_driver sa1100_rtc_driver = { .remove = sa1100_rtc_remove, .driver = { .name = "sa1100-rtc", -#ifdef CONFIG_PM .pm = &sa1100_rtc_pm_ops, -#endif .of_match_table = of_match_ptr(sa1100_rtc_dt_ids), }, }; diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c index e55a7635ae5f..8d5bd2e36776 100644 --- a/drivers/rtc/rtc-sh.c +++ b/drivers/rtc/rtc-sh.c @@ -790,6 +790,7 @@ static void sh_rtc_set_irq_wake(struct device *dev, int enabled) } } +#ifdef CONFIG_PM_SLEEP static int sh_rtc_suspend(struct device *dev) { if (device_may_wakeup(dev)) @@ -805,33 +806,20 @@ static int sh_rtc_resume(struct device *dev) return 0; } +#endif -static const struct dev_pm_ops sh_rtc_dev_pm_ops = { - .suspend = sh_rtc_suspend, - .resume = sh_rtc_resume, -}; +static SIMPLE_DEV_PM_OPS(sh_rtc_pm_ops, sh_rtc_suspend, sh_rtc_resume); static struct platform_driver sh_rtc_platform_driver = { .driver = { .name = DRV_NAME, .owner = THIS_MODULE, - .pm = &sh_rtc_dev_pm_ops, + .pm = &sh_rtc_pm_ops, }, .remove = __exit_p(sh_rtc_remove), }; -static int __init sh_rtc_init(void) -{ - return platform_driver_probe(&sh_rtc_platform_driver, sh_rtc_probe); -} - -static void __exit sh_rtc_exit(void) -{ - platform_driver_unregister(&sh_rtc_platform_driver); -} - -module_init(sh_rtc_init); -module_exit(sh_rtc_exit); +module_platform_driver_probe(sh_rtc_platform_driver, sh_rtc_probe); MODULE_DESCRIPTION("SuperH on-chip RTC driver"); MODULE_VERSION(DRV_VERSION); diff --git a/drivers/rtc/rtc-snvs.c b/drivers/rtc/rtc-snvs.c index f7d90703db5e..b04f09a1df2a 100644 --- a/drivers/rtc/rtc-snvs.c +++ b/drivers/rtc/rtc-snvs.c @@ -283,7 +283,7 @@ static int snvs_rtc_probe(struct platform_device *pdev) return ret; } - data->rtc = rtc_device_register(pdev->name, &pdev->dev, + data->rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &snvs_rtc_ops, THIS_MODULE); if (IS_ERR(data->rtc)) { ret = PTR_ERR(data->rtc); @@ -296,10 +296,6 @@ static int snvs_rtc_probe(struct platform_device *pdev) static int snvs_rtc_remove(struct platform_device *pdev) { - struct snvs_rtc_data *data = platform_get_drvdata(pdev); - - rtc_device_unregister(data->rtc); - return 0; } diff --git a/drivers/rtc/rtc-spear.c b/drivers/rtc/rtc-spear.c index a18c3192ed40..574359c48f65 100644 --- a/drivers/rtc/rtc-spear.c +++ b/drivers/rtc/rtc-spear.c @@ -400,8 +400,8 @@ static int spear_rtc_probe(struct platform_device *pdev) spin_lock_init(&config->lock); platform_set_drvdata(pdev, config); - config->rtc = rtc_device_register(pdev->name, &pdev->dev, - &spear_rtc_ops, THIS_MODULE); + config->rtc = devm_rtc_device_register(&pdev->dev, pdev->name, + &spear_rtc_ops, THIS_MODULE); if (IS_ERR(config->rtc)) { dev_err(&pdev->dev, "can't register RTC device, err %ld\n", PTR_ERR(config->rtc)); @@ -427,7 +427,6 @@ static int spear_rtc_remove(struct platform_device *pdev) { struct spear_rtc_config *config = platform_get_drvdata(pdev); - rtc_device_unregister(config->rtc); spear_rtc_disable_interrupt(config); clk_disable_unprepare(config->clk); device_init_wakeup(&pdev->dev, 0); @@ -435,10 +434,10 @@ static int spear_rtc_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM - -static int spear_rtc_suspend(struct platform_device *pdev, pm_message_t state) +#ifdef CONFIG_PM_SLEEP +static int spear_rtc_suspend(struct device *dev) { + struct platform_device *pdev = to_platform_device(dev); struct spear_rtc_config *config = platform_get_drvdata(pdev); int irq; @@ -454,8 +453,9 @@ static int spear_rtc_suspend(struct platform_device *pdev, pm_message_t state) return 0; } -static int spear_rtc_resume(struct platform_device *pdev) +static int spear_rtc_resume(struct device *dev) { + struct platform_device *pdev = to_platform_device(dev); struct spear_rtc_config *config = platform_get_drvdata(pdev); int irq; @@ -473,12 +473,10 @@ static int spear_rtc_resume(struct platform_device *pdev) return 0; } - -#else -#define spear_rtc_suspend NULL -#define spear_rtc_resume NULL #endif +static SIMPLE_DEV_PM_OPS(spear_rtc_pm_ops, spear_rtc_suspend, spear_rtc_resume); + static void spear_rtc_shutdown(struct platform_device *pdev) { struct spear_rtc_config *config = platform_get_drvdata(pdev); @@ -498,11 +496,10 @@ MODULE_DEVICE_TABLE(of, spear_rtc_id_table); static struct platform_driver spear_rtc_driver = { .probe = spear_rtc_probe, .remove = spear_rtc_remove, - .suspend = spear_rtc_suspend, - .resume = spear_rtc_resume, .shutdown = spear_rtc_shutdown, .driver = { .name = "rtc-spear", + .pm = &spear_rtc_pm_ops, .of_match_table = of_match_ptr(spear_rtc_id_table), }, }; diff --git a/drivers/rtc/rtc-starfire.c b/drivers/rtc/rtc-starfire.c index 5be98bfd7ed3..987b5ec0ae56 100644 --- a/drivers/rtc/rtc-starfire.c +++ b/drivers/rtc/rtc-starfire.c @@ -39,8 +39,10 @@ static const struct rtc_class_ops starfire_rtc_ops = { static int __init starfire_rtc_probe(struct platform_device *pdev) { - struct rtc_device *rtc = rtc_device_register("starfire", &pdev->dev, - &starfire_rtc_ops, THIS_MODULE); + struct rtc_device *rtc; + + rtc = devm_rtc_device_register(&pdev->dev, "starfire", + &starfire_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) return PTR_ERR(rtc); @@ -51,10 +53,6 @@ static int __init starfire_rtc_probe(struct platform_device *pdev) static int __exit starfire_rtc_remove(struct platform_device *pdev) { - struct rtc_device *rtc = platform_get_drvdata(pdev); - - rtc_device_unregister(rtc); - return 0; } @@ -66,15 +64,4 @@ static struct platform_driver starfire_rtc_driver = { .remove = __exit_p(starfire_rtc_remove), }; -static int __init starfire_rtc_init(void) -{ - return platform_driver_probe(&starfire_rtc_driver, starfire_rtc_probe); -} - -static void __exit starfire_rtc_exit(void) -{ - platform_driver_unregister(&starfire_rtc_driver); -} - -module_init(starfire_rtc_init); -module_exit(starfire_rtc_exit); +module_platform_driver_probe(starfire_rtc_driver, starfire_rtc_probe); diff --git a/drivers/rtc/rtc-stk17ta8.c b/drivers/rtc/rtc-stk17ta8.c index 7e4a6f65cb91..af5e97e3f272 100644 --- a/drivers/rtc/rtc-stk17ta8.c +++ b/drivers/rtc/rtc-stk17ta8.c @@ -336,14 +336,13 @@ static int stk17ta8_rtc_probe(struct platform_device *pdev) } } - pdata->rtc = rtc_device_register(pdev->name, &pdev->dev, + pdata->rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &stk17ta8_rtc_ops, THIS_MODULE); if (IS_ERR(pdata->rtc)) return PTR_ERR(pdata->rtc); ret = sysfs_create_bin_file(&pdev->dev.kobj, &stk17ta8_nvram_attr); - if (ret) - rtc_device_unregister(pdata->rtc); + return ret; } @@ -352,7 +351,6 @@ static int stk17ta8_rtc_remove(struct platform_device *pdev) struct rtc_plat_data *pdata = platform_get_drvdata(pdev); sysfs_remove_bin_file(&pdev->dev.kobj, &stk17ta8_nvram_attr); - rtc_device_unregister(pdata->rtc); if (pdata->irq > 0) writeb(0, pdata->ioaddr + RTC_INTERRUPTS); return 0; diff --git a/drivers/rtc/rtc-stmp3xxx.c b/drivers/rtc/rtc-stmp3xxx.c index 67d26128bc85..483ce086990b 100644 --- a/drivers/rtc/rtc-stmp3xxx.c +++ b/drivers/rtc/rtc-stmp3xxx.c @@ -225,11 +225,7 @@ static int stmp3xxx_rtc_remove(struct platform_device *pdev) writel(STMP3XXX_RTC_CTRL_ALARM_IRQ_EN, rtc_data->io + STMP3XXX_RTC_CTRL_CLR); - free_irq(rtc_data->irq_alarm, &pdev->dev); - rtc_device_unregister(rtc_data->rtc); platform_set_drvdata(pdev, NULL); - iounmap(rtc_data->io); - kfree(rtc_data); return 0; } @@ -240,22 +236,20 @@ static int stmp3xxx_rtc_probe(struct platform_device *pdev) struct resource *r; int err; - rtc_data = kzalloc(sizeof *rtc_data, GFP_KERNEL); + rtc_data = devm_kzalloc(&pdev->dev, sizeof(*rtc_data), GFP_KERNEL); if (!rtc_data) return -ENOMEM; r = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!r) { dev_err(&pdev->dev, "failed to get resource\n"); - err = -ENXIO; - goto out_free; + return -ENXIO; } - rtc_data->io = ioremap(r->start, resource_size(r)); + rtc_data->io = devm_ioremap(&pdev->dev, r->start, resource_size(r)); if (!rtc_data->io) { dev_err(&pdev->dev, "ioremap failed\n"); - err = -EIO; - goto out_free; + return -EIO; } rtc_data->irq_alarm = platform_get_irq(pdev, 0); @@ -263,8 +257,7 @@ static int stmp3xxx_rtc_probe(struct platform_device *pdev) if (!(readl(STMP3XXX_RTC_STAT + rtc_data->io) & STMP3XXX_RTC_STAT_RTC_PRESENT)) { dev_err(&pdev->dev, "no device onboard\n"); - err = -ENODEV; - goto out_remap; + return -ENODEV; } platform_set_drvdata(pdev, rtc_data); @@ -279,43 +272,38 @@ static int stmp3xxx_rtc_probe(struct platform_device *pdev) STMP3XXX_RTC_CTRL_ALARM_IRQ_EN, rtc_data->io + STMP3XXX_RTC_CTRL_CLR); - rtc_data->rtc = rtc_device_register(pdev->name, &pdev->dev, + rtc_data->rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &stmp3xxx_rtc_ops, THIS_MODULE); if (IS_ERR(rtc_data->rtc)) { err = PTR_ERR(rtc_data->rtc); - goto out_remap; + goto out; } - err = request_irq(rtc_data->irq_alarm, stmp3xxx_rtc_interrupt, 0, - "RTC alarm", &pdev->dev); + err = devm_request_irq(&pdev->dev, rtc_data->irq_alarm, + stmp3xxx_rtc_interrupt, 0, "RTC alarm", &pdev->dev); if (err) { dev_err(&pdev->dev, "Cannot claim IRQ%d\n", rtc_data->irq_alarm); - goto out_irq_alarm; + goto out; } stmp3xxx_wdt_register(pdev); return 0; -out_irq_alarm: - rtc_device_unregister(rtc_data->rtc); -out_remap: +out: platform_set_drvdata(pdev, NULL); - iounmap(rtc_data->io); -out_free: - kfree(rtc_data); return err; } -#ifdef CONFIG_PM -static int stmp3xxx_rtc_suspend(struct platform_device *dev, pm_message_t state) +#ifdef CONFIG_PM_SLEEP +static int stmp3xxx_rtc_suspend(struct device *dev) { return 0; } -static int stmp3xxx_rtc_resume(struct platform_device *dev) +static int stmp3xxx_rtc_resume(struct device *dev) { - struct stmp3xxx_rtc_data *rtc_data = platform_get_drvdata(dev); + struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev); stmp_reset_block(rtc_data->io); writel(STMP3XXX_RTC_PERSISTENT0_ALARM_EN | @@ -324,11 +312,11 @@ static int stmp3xxx_rtc_resume(struct platform_device *dev) rtc_data->io + STMP3XXX_RTC_PERSISTENT0_CLR); return 0; } -#else -#define stmp3xxx_rtc_suspend NULL -#define stmp3xxx_rtc_resume NULL #endif +static SIMPLE_DEV_PM_OPS(stmp3xxx_rtc_pm_ops, stmp3xxx_rtc_suspend, + stmp3xxx_rtc_resume); + static const struct of_device_id rtc_dt_ids[] = { { .compatible = "fsl,stmp3xxx-rtc", }, { /* sentinel */ } @@ -338,11 +326,10 @@ MODULE_DEVICE_TABLE(of, rtc_dt_ids); static struct platform_driver stmp3xxx_rtcdrv = { .probe = stmp3xxx_rtc_probe, .remove = stmp3xxx_rtc_remove, - .suspend = stmp3xxx_rtc_suspend, - .resume = stmp3xxx_rtc_resume, .driver = { .name = "stmp3xxx-rtc", .owner = THIS_MODULE, + .pm = &stmp3xxx_rtc_pm_ops, .of_match_table = of_match_ptr(rtc_dt_ids), }, }; diff --git a/drivers/rtc/rtc-sun4v.c b/drivers/rtc/rtc-sun4v.c index 59b5c2dcb58c..ce42e5fa9e09 100644 --- a/drivers/rtc/rtc-sun4v.c +++ b/drivers/rtc/rtc-sun4v.c @@ -81,8 +81,10 @@ static const struct rtc_class_ops sun4v_rtc_ops = { static int __init sun4v_rtc_probe(struct platform_device *pdev) { - struct rtc_device *rtc = rtc_device_register("sun4v", &pdev->dev, - &sun4v_rtc_ops, THIS_MODULE); + struct rtc_device *rtc; + + rtc = devm_rtc_device_register(&pdev->dev, "sun4v", + &sun4v_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) return PTR_ERR(rtc); @@ -92,9 +94,6 @@ static int __init sun4v_rtc_probe(struct platform_device *pdev) static int __exit sun4v_rtc_remove(struct platform_device *pdev) { - struct rtc_device *rtc = platform_get_drvdata(pdev); - - rtc_device_unregister(rtc); return 0; } @@ -106,18 +105,7 @@ static struct platform_driver sun4v_rtc_driver = { .remove = __exit_p(sun4v_rtc_remove), }; -static int __init sun4v_rtc_init(void) -{ - return platform_driver_probe(&sun4v_rtc_driver, sun4v_rtc_probe); -} - -static void __exit sun4v_rtc_exit(void) -{ - platform_driver_unregister(&sun4v_rtc_driver); -} - -module_init(sun4v_rtc_init); -module_exit(sun4v_rtc_exit); +module_platform_driver_probe(sun4v_rtc_driver, sun4v_rtc_probe); MODULE_AUTHOR("David S. Miller <davem@davemloft.net>"); MODULE_DESCRIPTION("SUN4V RTC driver"); diff --git a/drivers/rtc/rtc-tegra.c b/drivers/rtc/rtc-tegra.c index 7c033756d6b5..a34315d25478 100644 --- a/drivers/rtc/rtc-tegra.c +++ b/drivers/rtc/rtc-tegra.c @@ -26,6 +26,7 @@ #include <linux/delay.h> #include <linux/rtc.h> #include <linux/platform_device.h> +#include <linux/pm.h> /* set to 1 = busy every eight 32kHz clocks during copy of sec+msec to AHB */ #define TEGRA_RTC_REG_BUSY 0x004 @@ -309,7 +310,7 @@ static const struct of_device_id tegra_rtc_dt_match[] = { }; MODULE_DEVICE_TABLE(of, tegra_rtc_dt_match); -static int tegra_rtc_probe(struct platform_device *pdev) +static int __init tegra_rtc_probe(struct platform_device *pdev) { struct tegra_rtc_info *info; struct resource *res; @@ -348,53 +349,35 @@ static int tegra_rtc_probe(struct platform_device *pdev) device_init_wakeup(&pdev->dev, 1); - info->rtc_dev = rtc_device_register( - pdev->name, &pdev->dev, &tegra_rtc_ops, THIS_MODULE); + info->rtc_dev = devm_rtc_device_register(&pdev->dev, + dev_name(&pdev->dev), &tegra_rtc_ops, + THIS_MODULE); if (IS_ERR(info->rtc_dev)) { ret = PTR_ERR(info->rtc_dev); - info->rtc_dev = NULL; - dev_err(&pdev->dev, - "Unable to register device (err=%d).\n", + dev_err(&pdev->dev, "Unable to register device (err=%d).\n", ret); return ret; } ret = devm_request_irq(&pdev->dev, info->tegra_rtc_irq, tegra_rtc_irq_handler, IRQF_TRIGGER_HIGH, - "rtc alarm", &pdev->dev); + dev_name(&pdev->dev), &pdev->dev); if (ret) { dev_err(&pdev->dev, "Unable to request interrupt for device (err=%d).\n", ret); - goto err_dev_unreg; + return ret; } dev_notice(&pdev->dev, "Tegra internal Real Time Clock\n"); return 0; - -err_dev_unreg: - rtc_device_unregister(info->rtc_dev); - - return ret; } -static int tegra_rtc_remove(struct platform_device *pdev) +#ifdef CONFIG_PM_SLEEP +static int tegra_rtc_suspend(struct device *dev) { - struct tegra_rtc_info *info = platform_get_drvdata(pdev); - - rtc_device_unregister(info->rtc_dev); - - platform_set_drvdata(pdev, NULL); - - return 0; -} - -#ifdef CONFIG_PM -static int tegra_rtc_suspend(struct platform_device *pdev, pm_message_t state) -{ - struct device *dev = &pdev->dev; - struct tegra_rtc_info *info = platform_get_drvdata(pdev); + struct tegra_rtc_info *info = dev_get_drvdata(dev); tegra_rtc_wait_while_busy(dev); @@ -416,10 +399,9 @@ static int tegra_rtc_suspend(struct platform_device *pdev, pm_message_t state) return 0; } -static int tegra_rtc_resume(struct platform_device *pdev) +static int tegra_rtc_resume(struct device *dev) { - struct device *dev = &pdev->dev; - struct tegra_rtc_info *info = platform_get_drvdata(pdev); + struct tegra_rtc_info *info = dev_get_drvdata(dev); dev_vdbg(dev, "Resume (device_may_wakeup=%d)\n", device_may_wakeup(dev)); @@ -431,6 +413,8 @@ static int tegra_rtc_resume(struct platform_device *pdev) } #endif +static SIMPLE_DEV_PM_OPS(tegra_rtc_pm_ops, tegra_rtc_suspend, tegra_rtc_resume); + static void tegra_rtc_shutdown(struct platform_device *pdev) { dev_vdbg(&pdev->dev, "disabling interrupts.\n"); @@ -439,30 +423,16 @@ static void tegra_rtc_shutdown(struct platform_device *pdev) MODULE_ALIAS("platform:tegra_rtc"); static struct platform_driver tegra_rtc_driver = { - .remove = tegra_rtc_remove, .shutdown = tegra_rtc_shutdown, .driver = { .name = "tegra_rtc", .owner = THIS_MODULE, .of_match_table = tegra_rtc_dt_match, + .pm = &tegra_rtc_pm_ops, }, -#ifdef CONFIG_PM - .suspend = tegra_rtc_suspend, - .resume = tegra_rtc_resume, -#endif }; -static int __init tegra_rtc_init(void) -{ - return platform_driver_probe(&tegra_rtc_driver, tegra_rtc_probe); -} -module_init(tegra_rtc_init); - -static void __exit tegra_rtc_exit(void) -{ - platform_driver_unregister(&tegra_rtc_driver); -} -module_exit(tegra_rtc_exit); +module_platform_driver_probe(tegra_rtc_driver, tegra_rtc_probe); MODULE_AUTHOR("Jon Mayo <jmayo@nvidia.com>"); MODULE_DESCRIPTION("driver for Tegra internal RTC"); diff --git a/drivers/rtc/rtc-test.c b/drivers/rtc/rtc-test.c index b92e0f6383e6..7746e65b93f2 100644 --- a/drivers/rtc/rtc-test.c +++ b/drivers/rtc/rtc-test.c @@ -99,8 +99,10 @@ static DEVICE_ATTR(irq, S_IRUGO | S_IWUSR, test_irq_show, test_irq_store); static int test_probe(struct platform_device *plat_dev) { int err; - struct rtc_device *rtc = rtc_device_register("test", &plat_dev->dev, - &test_rtc_ops, THIS_MODULE); + struct rtc_device *rtc; + + rtc = devm_rtc_device_register(&plat_dev->dev, "test", + &test_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) { err = PTR_ERR(rtc); return err; @@ -115,15 +117,11 @@ static int test_probe(struct platform_device *plat_dev) return 0; err: - rtc_device_unregister(rtc); return err; } static int test_remove(struct platform_device *plat_dev) { - struct rtc_device *rtc = platform_get_drvdata(plat_dev); - - rtc_device_unregister(rtc); device_remove_file(&plat_dev->dev, &dev_attr_irq); return 0; diff --git a/drivers/rtc/rtc-tile.c b/drivers/rtc/rtc-tile.c index 62db4841078b..249b6531f119 100644 --- a/drivers/rtc/rtc-tile.c +++ b/drivers/rtc/rtc-tile.c @@ -80,8 +80,8 @@ static int tile_rtc_probe(struct platform_device *dev) { struct rtc_device *rtc; - rtc = rtc_device_register("tile", - &dev->dev, &tile_rtc_ops, THIS_MODULE); + rtc = devm_rtc_device_register(&dev->dev, "tile", + &tile_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) return PTR_ERR(rtc); @@ -96,11 +96,6 @@ static int tile_rtc_probe(struct platform_device *dev) */ static int tile_rtc_remove(struct platform_device *dev) { - struct rtc_device *rtc = platform_get_drvdata(dev); - - if (rtc) - rtc_device_unregister(rtc); - platform_set_drvdata(dev, NULL); return 0; diff --git a/drivers/rtc/rtc-tps6586x.c b/drivers/rtc/rtc-tps6586x.c index aab4e8c93622..459c2ffc95a6 100644 --- a/drivers/rtc/rtc-tps6586x.c +++ b/drivers/rtc/rtc-tps6586x.c @@ -274,7 +274,7 @@ static int tps6586x_rtc_probe(struct platform_device *pdev) } platform_set_drvdata(pdev, rtc); - rtc->rtc = rtc_device_register(dev_name(&pdev->dev), &pdev->dev, + rtc->rtc = devm_rtc_device_register(&pdev->dev, dev_name(&pdev->dev), &tps6586x_rtc_ops, THIS_MODULE); if (IS_ERR(rtc->rtc)) { ret = PTR_ERR(rtc->rtc); @@ -289,15 +289,12 @@ static int tps6586x_rtc_probe(struct platform_device *pdev) if (ret < 0) { dev_err(&pdev->dev, "request IRQ(%d) failed with ret %d\n", rtc->irq, ret); - goto fail_req_irq; + goto fail_rtc_register; } disable_irq(rtc->irq); device_set_wakeup_capable(&pdev->dev, 1); return 0; -fail_req_irq: - rtc_device_unregister(rtc->rtc); - fail_rtc_register: tps6586x_update(tps_dev, RTC_CTRL, 0, RTC_ENABLE | OSC_SRC_SEL | PRE_BYPASS | CL_SEL_MASK); @@ -306,12 +303,10 @@ fail_rtc_register: static int tps6586x_rtc_remove(struct platform_device *pdev) { - struct tps6586x_rtc *rtc = platform_get_drvdata(pdev); struct device *tps_dev = to_tps6586x_dev(&pdev->dev); tps6586x_update(tps_dev, RTC_CTRL, 0, RTC_ENABLE | OSC_SRC_SEL | PRE_BYPASS | CL_SEL_MASK); - rtc_device_unregister(rtc->rtc); return 0; } @@ -335,9 +330,8 @@ static int tps6586x_rtc_resume(struct device *dev) } #endif -static const struct dev_pm_ops tps6586x_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(tps6586x_rtc_suspend, tps6586x_rtc_resume) -}; +static SIMPLE_DEV_PM_OPS(tps6586x_pm_ops, tps6586x_rtc_suspend, + tps6586x_rtc_resume); static struct platform_driver tps6586x_rtc_driver = { .driver = { diff --git a/drivers/rtc/rtc-tps65910.c b/drivers/rtc/rtc-tps65910.c index 8bd8115329b5..a9caf043b0ce 100644 --- a/drivers/rtc/rtc-tps65910.c +++ b/drivers/rtc/rtc-tps65910.c @@ -263,7 +263,7 @@ static int tps65910_rtc_probe(struct platform_device *pdev) if (irq <= 0) { dev_warn(&pdev->dev, "Wake up is not possible as irq = %d\n", irq); - return ret; + return -ENXIO; } ret = devm_request_threaded_irq(&pdev->dev, irq, NULL, @@ -276,7 +276,7 @@ static int tps65910_rtc_probe(struct platform_device *pdev) tps_rtc->irq = irq; device_set_wakeup_capable(&pdev->dev, 1); - tps_rtc->rtc = rtc_device_register(pdev->name, &pdev->dev, + tps_rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &tps65910_rtc_ops, THIS_MODULE); if (IS_ERR(tps_rtc->rtc)) { ret = PTR_ERR(tps_rtc->rtc); @@ -295,12 +295,8 @@ static int tps65910_rtc_probe(struct platform_device *pdev) */ static int tps65910_rtc_remove(struct platform_device *pdev) { - /* leave rtc running, but disable irqs */ - struct tps65910_rtc *tps_rtc = platform_get_drvdata(pdev); - tps65910_rtc_alarm_irq_enable(&pdev->dev, 0); - rtc_device_unregister(tps_rtc->rtc); return 0; } @@ -324,9 +320,8 @@ static int tps65910_rtc_resume(struct device *dev) } #endif -static const struct dev_pm_ops tps65910_rtc_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(tps65910_rtc_suspend, tps65910_rtc_resume) -}; +static SIMPLE_DEV_PM_OPS(tps65910_rtc_pm_ops, tps65910_rtc_suspend, + tps65910_rtc_resume); static struct platform_driver tps65910_rtc_driver = { .probe = tps65910_rtc_probe, diff --git a/drivers/rtc/rtc-tps80031.c b/drivers/rtc/rtc-tps80031.c index 9aaf8aaebae9..72662eafb938 100644 --- a/drivers/rtc/rtc-tps80031.c +++ b/drivers/rtc/rtc-tps80031.c @@ -277,7 +277,7 @@ static int tps80031_rtc_probe(struct platform_device *pdev) return ret; } - rtc->rtc = rtc_device_register(pdev->name, &pdev->dev, + rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &tps80031_rtc_ops, THIS_MODULE); if (IS_ERR(rtc->rtc)) { ret = PTR_ERR(rtc->rtc); @@ -292,7 +292,6 @@ static int tps80031_rtc_probe(struct platform_device *pdev) if (ret < 0) { dev_err(&pdev->dev, "request IRQ:%d failed, err = %d\n", rtc->irq, ret); - rtc_device_unregister(rtc->rtc); return ret; } device_set_wakeup_capable(&pdev->dev, 1); @@ -301,9 +300,6 @@ static int tps80031_rtc_probe(struct platform_device *pdev) static int tps80031_rtc_remove(struct platform_device *pdev) { - struct tps80031_rtc *rtc = platform_get_drvdata(pdev); - - rtc_device_unregister(rtc->rtc); return 0; } @@ -327,9 +323,8 @@ static int tps80031_rtc_resume(struct device *dev) }; #endif -static const struct dev_pm_ops tps80031_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(tps80031_rtc_suspend, tps80031_rtc_resume) -}; +static SIMPLE_DEV_PM_OPS(tps80031_pm_ops, tps80031_rtc_suspend, + tps80031_rtc_resume); static struct platform_driver tps80031_rtc_driver = { .driver = { diff --git a/drivers/rtc/rtc-twl.c b/drivers/rtc/rtc-twl.c index 8bc6c80b184c..8751a5240c99 100644 --- a/drivers/rtc/rtc-twl.c +++ b/drivers/rtc/rtc-twl.c @@ -566,11 +566,10 @@ static void twl_rtc_shutdown(struct platform_device *pdev) mask_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_TIMER_M); } -#ifdef CONFIG_PM - +#ifdef CONFIG_PM_SLEEP static unsigned char irqstat; -static int twl_rtc_suspend(struct platform_device *pdev, pm_message_t state) +static int twl_rtc_suspend(struct device *dev) { irqstat = rtc_irq_bits; @@ -578,17 +577,15 @@ static int twl_rtc_suspend(struct platform_device *pdev, pm_message_t state) return 0; } -static int twl_rtc_resume(struct platform_device *pdev) +static int twl_rtc_resume(struct device *dev) { set_rtc_irq_bit(irqstat); return 0; } - -#else -#define twl_rtc_suspend NULL -#define twl_rtc_resume NULL #endif +static SIMPLE_DEV_PM_OPS(twl_rtc_pm_ops, twl_rtc_suspend, twl_rtc_resume); + #ifdef CONFIG_OF static const struct of_device_id twl_rtc_of_match[] = { {.compatible = "ti,twl4030-rtc", }, @@ -603,11 +600,10 @@ static struct platform_driver twl4030rtc_driver = { .probe = twl_rtc_probe, .remove = twl_rtc_remove, .shutdown = twl_rtc_shutdown, - .suspend = twl_rtc_suspend, - .resume = twl_rtc_resume, .driver = { .owner = THIS_MODULE, .name = "twl_rtc", + .pm = &twl_rtc_pm_ops, .of_match_table = of_match_ptr(twl_rtc_of_match), }, }; diff --git a/drivers/rtc/rtc-tx4939.c b/drivers/rtc/rtc-tx4939.c index a12bfac49d36..f9a0677e4e3b 100644 --- a/drivers/rtc/rtc-tx4939.c +++ b/drivers/rtc/rtc-tx4939.c @@ -268,14 +268,13 @@ static int __init tx4939_rtc_probe(struct platform_device *pdev) if (devm_request_irq(&pdev->dev, irq, tx4939_rtc_interrupt, 0, pdev->name, &pdev->dev) < 0) return -EBUSY; - rtc = rtc_device_register(pdev->name, &pdev->dev, + rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &tx4939_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) return PTR_ERR(rtc); pdata->rtc = rtc; ret = sysfs_create_bin_file(&pdev->dev.kobj, &tx4939_rtc_nvram_attr); - if (ret) - rtc_device_unregister(rtc); + return ret; } @@ -284,7 +283,6 @@ static int __exit tx4939_rtc_remove(struct platform_device *pdev) struct tx4939rtc_plat_data *pdata = platform_get_drvdata(pdev); sysfs_remove_bin_file(&pdev->dev.kobj, &tx4939_rtc_nvram_attr); - rtc_device_unregister(pdata->rtc); spin_lock_irq(&pdata->lock); tx4939_rtc_cmd(pdata->rtcreg, TX4939_RTCCTL_COMMAND_NOP); spin_unlock_irq(&pdata->lock); @@ -299,18 +297,7 @@ static struct platform_driver tx4939_rtc_driver = { }, }; -static int __init tx4939rtc_init(void) -{ - return platform_driver_probe(&tx4939_rtc_driver, tx4939_rtc_probe); -} - -static void __exit tx4939rtc_exit(void) -{ - platform_driver_unregister(&tx4939_rtc_driver); -} - -module_init(tx4939rtc_init); -module_exit(tx4939rtc_exit); +module_platform_driver_probe(tx4939_rtc_driver, tx4939_rtc_probe); MODULE_AUTHOR("Atsushi Nemoto <anemo@mba.ocn.ne.jp>"); MODULE_DESCRIPTION("TX4939 internal RTC driver"); diff --git a/drivers/rtc/rtc-v3020.c b/drivers/rtc/rtc-v3020.c index bca5d677bc85..d87878eee8c1 100644 --- a/drivers/rtc/rtc-v3020.c +++ b/drivers/rtc/rtc-v3020.c @@ -49,18 +49,13 @@ struct v3020_chip_ops { #define V3020_RD 2 #define V3020_IO 3 -struct v3020_gpio { - const char *name; - unsigned int gpio; -}; - struct v3020 { /* MMIO access */ void __iomem *ioaddress; int leftshift; /* GPIO access */ - struct v3020_gpio *gpio; + struct gpio *gpio; struct v3020_chip_ops *ops; @@ -107,48 +102,40 @@ static struct v3020_chip_ops v3020_mmio_ops = { .write_bit = v3020_mmio_write_bit, }; -static struct v3020_gpio v3020_gpio[] = { - { "RTC CS", 0 }, - { "RTC WR", 0 }, - { "RTC RD", 0 }, - { "RTC IO", 0 }, +static struct gpio v3020_gpio[] = { + { 0, GPIOF_OUT_INIT_HIGH, "RTC CS"}, + { 0, GPIOF_OUT_INIT_HIGH, "RTC WR"}, + { 0, GPIOF_OUT_INIT_HIGH, "RTC RD"}, + { 0, GPIOF_OUT_INIT_HIGH, "RTC IO"}, }; static int v3020_gpio_map(struct v3020 *chip, struct platform_device *pdev, struct v3020_platform_data *pdata) { - int i, err; + int err; v3020_gpio[V3020_CS].gpio = pdata->gpio_cs; v3020_gpio[V3020_WR].gpio = pdata->gpio_wr; v3020_gpio[V3020_RD].gpio = pdata->gpio_rd; v3020_gpio[V3020_IO].gpio = pdata->gpio_io; - for (i = 0; i < ARRAY_SIZE(v3020_gpio); i++) { - err = gpio_request(v3020_gpio[i].gpio, v3020_gpio[i].name); - if (err) - goto err_request; - - gpio_direction_output(v3020_gpio[i].gpio, 1); - } + err = gpio_request_array(v3020_gpio, ARRAY_SIZE(v3020_gpio)); + if (err) + goto err_request; chip->gpio = v3020_gpio; return 0; err_request: - while (--i >= 0) - gpio_free(v3020_gpio[i].gpio); + gpio_free_array(v3020_gpio, ARRAY_SIZE(v3020_gpio)); return err; } static void v3020_gpio_unmap(struct v3020 *chip) { - int i; - - for (i = 0; i < ARRAY_SIZE(v3020_gpio); i++) - gpio_free(v3020_gpio[i].gpio); + gpio_free_array(v3020_gpio, ARRAY_SIZE(v3020_gpio)); } static void v3020_gpio_write_bit(struct v3020 *chip, unsigned char bit) @@ -309,7 +296,7 @@ static int rtc_probe(struct platform_device *pdev) int i; int temp; - chip = kzalloc(sizeof *chip, GFP_KERNEL); + chip = devm_kzalloc(&pdev->dev, sizeof(*chip), GFP_KERNEL); if (!chip) return -ENOMEM; @@ -353,8 +340,8 @@ static int rtc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, chip); - chip->rtc = rtc_device_register("v3020", - &pdev->dev, &v3020_rtc_ops, THIS_MODULE); + chip->rtc = devm_rtc_device_register(&pdev->dev, "v3020", + &v3020_rtc_ops, THIS_MODULE); if (IS_ERR(chip->rtc)) { retval = PTR_ERR(chip->rtc); goto err_io; @@ -365,21 +352,14 @@ static int rtc_probe(struct platform_device *pdev) err_io: chip->ops->unmap_io(chip); err_chip: - kfree(chip); - return retval; } static int rtc_remove(struct platform_device *dev) { struct v3020 *chip = platform_get_drvdata(dev); - struct rtc_device *rtc = chip->rtc; - - if (rtc) - rtc_device_unregister(rtc); chip->ops->unmap_io(chip); - kfree(chip); return 0; } diff --git a/drivers/rtc/rtc-vt8500.c b/drivers/rtc/rtc-vt8500.c index a000bc0a8bff..d89efee6d29e 100644 --- a/drivers/rtc/rtc-vt8500.c +++ b/drivers/rtc/rtc-vt8500.c @@ -252,7 +252,7 @@ static int vt8500_rtc_probe(struct platform_device *pdev) writel(VT8500_RTC_CR_ENABLE, vt8500_rtc->regbase + VT8500_RTC_CR); - vt8500_rtc->rtc = rtc_device_register("vt8500-rtc", &pdev->dev, + vt8500_rtc->rtc = devm_rtc_device_register(&pdev->dev, "vt8500-rtc", &vt8500_rtc_ops, THIS_MODULE); if (IS_ERR(vt8500_rtc->rtc)) { ret = PTR_ERR(vt8500_rtc->rtc); @@ -266,13 +266,11 @@ static int vt8500_rtc_probe(struct platform_device *pdev) if (ret < 0) { dev_err(&pdev->dev, "can't get irq %i, err %d\n", vt8500_rtc->irq_alarm, ret); - goto err_unreg; + goto err_return; } return 0; -err_unreg: - rtc_device_unregister(vt8500_rtc->rtc); err_return: return ret; } @@ -281,8 +279,6 @@ static int vt8500_rtc_remove(struct platform_device *pdev) { struct vt8500_rtc *vt8500_rtc = platform_get_drvdata(pdev); - rtc_device_unregister(vt8500_rtc->rtc); - /* Disable alarm matching */ writel(0, vt8500_rtc->regbase + VT8500_RTC_IS); diff --git a/drivers/rtc/rtc-wm831x.c b/drivers/rtc/rtc-wm831x.c index 2f0ac7b30a0c..8d65b94e5a7e 100644 --- a/drivers/rtc/rtc-wm831x.c +++ b/drivers/rtc/rtc-wm831x.c @@ -436,7 +436,7 @@ static int wm831x_rtc_probe(struct platform_device *pdev) device_init_wakeup(&pdev->dev, 1); - wm831x_rtc->rtc = rtc_device_register("wm831x", &pdev->dev, + wm831x_rtc->rtc = devm_rtc_device_register(&pdev->dev, "wm831x", &wm831x_rtc_ops, THIS_MODULE); if (IS_ERR(wm831x_rtc->rtc)) { ret = PTR_ERR(wm831x_rtc->rtc); @@ -462,10 +462,6 @@ err: static int wm831x_rtc_remove(struct platform_device *pdev) { - struct wm831x_rtc *wm831x_rtc = platform_get_drvdata(pdev); - - rtc_device_unregister(wm831x_rtc->rtc); - return 0; } diff --git a/drivers/rtc/rtc-wm8350.c b/drivers/rtc/rtc-wm8350.c index 8ad86ae0d30f..fa247deb9cf4 100644 --- a/drivers/rtc/rtc-wm8350.c +++ b/drivers/rtc/rtc-wm8350.c @@ -339,7 +339,7 @@ static const struct rtc_class_ops wm8350_rtc_ops = { .alarm_irq_enable = wm8350_rtc_alarm_irq_enable, }; -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP static int wm8350_rtc_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); @@ -375,10 +375,6 @@ static int wm8350_rtc_resume(struct device *dev) return 0; } - -#else -#define wm8350_rtc_suspend NULL -#define wm8350_rtc_resume NULL #endif static int wm8350_rtc_probe(struct platform_device *pdev) @@ -439,8 +435,8 @@ static int wm8350_rtc_probe(struct platform_device *pdev) device_init_wakeup(&pdev->dev, 1); - wm_rtc->rtc = rtc_device_register("wm8350", &pdev->dev, - &wm8350_rtc_ops, THIS_MODULE); + wm_rtc->rtc = devm_rtc_device_register(&pdev->dev, "wm8350", + &wm8350_rtc_ops, THIS_MODULE); if (IS_ERR(wm_rtc->rtc)) { ret = PTR_ERR(wm_rtc->rtc); dev_err(&pdev->dev, "failed to register RTC: %d\n", ret); @@ -462,20 +458,15 @@ static int wm8350_rtc_probe(struct platform_device *pdev) static int wm8350_rtc_remove(struct platform_device *pdev) { struct wm8350 *wm8350 = platform_get_drvdata(pdev); - struct wm8350_rtc *wm_rtc = &wm8350->rtc; wm8350_free_irq(wm8350, WM8350_IRQ_RTC_SEC, wm8350); wm8350_free_irq(wm8350, WM8350_IRQ_RTC_ALM, wm8350); - rtc_device_unregister(wm_rtc->rtc); - return 0; } -static struct dev_pm_ops wm8350_rtc_pm_ops = { - .suspend = wm8350_rtc_suspend, - .resume = wm8350_rtc_resume, -}; +static SIMPLE_DEV_PM_OPS(wm8350_rtc_pm_ops, wm8350_rtc_suspend, + wm8350_rtc_resume); static struct platform_driver wm8350_rtc_driver = { .probe = wm8350_rtc_probe, diff --git a/drivers/rtc/rtc-x1205.c b/drivers/rtc/rtc-x1205.c index f36e59c6bc01..fa9b0679fb60 100644 --- a/drivers/rtc/rtc-x1205.c +++ b/drivers/rtc/rtc-x1205.c @@ -630,8 +630,8 @@ static int x1205_probe(struct i2c_client *client, dev_info(&client->dev, "chip found, driver version " DRV_VERSION "\n"); - rtc = rtc_device_register(x1205_driver.driver.name, &client->dev, - &x1205_rtc_ops, THIS_MODULE); + rtc = devm_rtc_device_register(&client->dev, x1205_driver.driver.name, + &x1205_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) return PTR_ERR(rtc); @@ -653,21 +653,13 @@ static int x1205_probe(struct i2c_client *client, err = x1205_sysfs_register(&client->dev); if (err) - goto exit_devreg; + return err; return 0; - -exit_devreg: - rtc_device_unregister(rtc); - - return err; } static int x1205_remove(struct i2c_client *client) { - struct rtc_device *rtc = i2c_get_clientdata(client); - - rtc_device_unregister(rtc); x1205_sysfs_unregister(&client->dev); return 0; } diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c index a76247201be5..cd743c545ce9 100644 --- a/drivers/scsi/fcoe/fcoe_ctlr.c +++ b/drivers/scsi/fcoe/fcoe_ctlr.c @@ -2161,7 +2161,7 @@ static void fcoe_ctlr_vn_restart(struct fcoe_ctlr *fip) if (fip->probe_tries < FIP_VN_RLIM_COUNT) { fip->probe_tries++; - wait = random32() % FIP_VN_PROBE_WAIT; + wait = prandom_u32() % FIP_VN_PROBE_WAIT; } else wait = FIP_VN_RLIM_INT; mod_timer(&fip->timer, jiffies + msecs_to_jiffies(wait)); @@ -2794,7 +2794,7 @@ static void fcoe_ctlr_vn_timeout(struct fcoe_ctlr *fip) fcoe_all_vn2vn, 0); fip->port_ka_time = jiffies + msecs_to_jiffies(FIP_VN_BEACON_INT + - (random32() % FIP_VN_BEACON_FUZZ)); + (prandom_u32() % FIP_VN_BEACON_FUZZ)); } if (time_before(fip->port_ka_time, next_time)) next_time = fip->port_ka_time; diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 240492208aba..326e05a65a73 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -1735,7 +1735,7 @@ lpfc_check_pending_fcoe_event(struct lpfc_hba *phba, uint8_t unreg_fcf) * use through a sequence of @fcf_cnt eligible FCF records with equal * probability. To perform integer manunipulation of random numbers with * size unit32_t, the lower 16 bits of the 32-bit random number returned - * from random32() are taken as the random random number generated. + * from prandom_u32() are taken as the random random number generated. * * Returns true when outcome is for the newly read FCF record should be * chosen; otherwise, return false when outcome is for keeping the previously @@ -1747,7 +1747,7 @@ lpfc_sli4_new_fcf_random_select(struct lpfc_hba *phba, uint32_t fcf_cnt) uint32_t rand_num; /* Get 16-bit uniform random number */ - rand_num = (0xFFFF & random32()); + rand_num = 0xFFFF & prandom_u32(); /* Decision with probability 1/fcf_cnt */ if ((fcf_cnt * rand_num) < 0xFFFF) @@ -2385,7 +2385,7 @@ lpfc_mbx_cmpl_fcf_scan_read_fcf_rec(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq) phba->fcf.eligible_fcf_cnt = 1; /* Seeding the random number generator for random selection */ seed = (uint32_t)(0xFFFFFFFF & jiffies); - srandom32(seed); + prandom_seed(seed); } spin_unlock_irq(&phba->hbalock); goto read_next_fcf; diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 9f0c46547459..df5e961484e1 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -35,6 +35,7 @@ static int sg_version_num = 30534; /* 2 digits for each component */ #include <linux/sched.h> #include <linux/string.h> #include <linux/mm.h> +#include <linux/aio.h> #include <linux/errno.h> #include <linux/mtio.h> #include <linux/ioctl.h> diff --git a/drivers/staging/android/logger.c b/drivers/staging/android/logger.c index b14a55742559..b040200a5a55 100644 --- a/drivers/staging/android/logger.c +++ b/drivers/staging/android/logger.c @@ -28,6 +28,7 @@ #include <linux/slab.h> #include <linux/time.h> #include <linux/vmalloc.h> +#include <linux/aio.h> #include "logger.h" #include <asm/ioctls.h> diff --git a/drivers/staging/speakup/kobjects.c b/drivers/staging/speakup/kobjects.c index d6d9264e4ca7..943b6c134a22 100644 --- a/drivers/staging/speakup/kobjects.c +++ b/drivers/staging/speakup/kobjects.c @@ -15,6 +15,7 @@ #include <linux/kernel.h> #include <linux/kobject.h> #include <linux/string.h> +#include <linux/string_helpers.h> #include <linux/sysfs.h> #include <linux/ctype.h> @@ -417,7 +418,7 @@ static ssize_t synth_direct_store(struct kobject *kobj, bytes = min_t(size_t, len, 250); strncpy(tmp, ptr, bytes); tmp[bytes] = '\0'; - spk_xlate(tmp); + string_unescape_any_inplace(tmp); synth_printf("%s", tmp); ptr += bytes; len -= bytes; @@ -605,7 +606,8 @@ ssize_t spk_var_store(struct kobject *kobj, struct kobj_attribute *attr, if (param->data == NULL) return 0; ret = 0; - cp = spk_xlate((char *) buf); + cp = (char *)buf; + string_unescape_any_inplace(cp); spk_lock(flags); switch (param->var_type) { diff --git a/drivers/staging/speakup/speakup.h b/drivers/staging/speakup/speakup.h index c387a02fc1c2..0126f714821a 100644 --- a/drivers/staging/speakup/speakup.h +++ b/drivers/staging/speakup/speakup.h @@ -54,7 +54,6 @@ void spk_get_index_count(int *linecount, int *sentcount); extern int spk_set_key_info(const u_char *key_info, u_char *k_buffer); extern char *spk_strlwr(char *s); extern char *spk_s2uchar(char *start, char *dest); -extern char *spk_xlate(char *s); extern int speakup_kobj_init(void); extern void speakup_kobj_exit(void); extern int spk_chartab_get_value(char *keyword); diff --git a/drivers/staging/speakup/varhandlers.c b/drivers/staging/speakup/varhandlers.c index 0099cb12e560..7f6288fc2299 100644 --- a/drivers/staging/speakup/varhandlers.c +++ b/drivers/staging/speakup/varhandlers.c @@ -328,49 +328,3 @@ char *spk_s2uchar(char *start, char *dest) *dest = (u_char)val; return start; } - -char *spk_xlate(char *s) -{ - static const char finds[] = "nrtvafe"; - static const char subs[] = "\n\r\t\013\001\014\033"; - static const char hx[] = "0123456789abcdefABCDEF"; - char *p = s, *p1, *p2, c; - int num; - while ((p = strchr(p, '\\'))) { - p1 = p+1; - p2 = strchr(finds, *p1); - if (p2) { - *p++ = subs[p2-finds]; - p1++; - } else if (*p1 >= '0' && *p1 <= '7') { - num = (*p1++)&7; - while (num < 32 && *p1 >= '0' && *p1 <= '7') { - num <<= 3; - num += (*p1++)&7; - } - *p++ = num; - } else if (*p1 == 'x' && - strchr(hx, p1[1]) && strchr(hx, p1[2])) { - p1++; - c = *p1++; - if (c > '9') - c = (c - '7') & 0x0f; - else - c -= '0'; - num = c << 4; - c = *p1++; - if (c > '9') - c = (c-'7')&0x0f; - else - c -= '0'; - num += c; - *p++ = num; - } else - *p++ = *p1++; - p2 = p; - while (*p1) - *p2++ = *p1++; - *p2 = '\0'; - } - return s; -} diff --git a/drivers/staging/zcache/Kconfig b/drivers/staging/zcache/Kconfig index 05e87a1e5d93..2d7b2da3b9e0 100644 --- a/drivers/staging/zcache/Kconfig +++ b/drivers/staging/zcache/Kconfig @@ -1,5 +1,5 @@ config ZCACHE - bool "Dynamic compression of swap pages and clean pagecache pages" + tristate "Dynamic compression of swap pages and clean pagecache pages" depends on CRYPTO=y && SWAP=y && CLEANCACHE && FRONTSWAP select CRYPTO_LZO default n @@ -19,8 +19,8 @@ config ZCACHE_DEBUG how zcache is doing. You probably want to set this to 'N'. config RAMSTER - bool "Cross-machine RAM capacity sharing, aka peer-to-peer tmem" - depends on CONFIGFS_FS=y && SYSFS=y && !HIGHMEM && ZCACHE=y + tristate "Cross-machine RAM capacity sharing, aka peer-to-peer tmem" + depends on CONFIGFS_FS=y && SYSFS=y && !HIGHMEM && ZCACHE depends on NET # must ensure struct page is 8-byte aligned select HAVE_ALIGNED_STRUCT_PAGE if !64BIT diff --git a/drivers/staging/zcache/ramster.h b/drivers/staging/zcache/ramster.h index 1b71aea2ff62..e1f91d5a0f6a 100644 --- a/drivers/staging/zcache/ramster.h +++ b/drivers/staging/zcache/ramster.h @@ -11,10 +11,14 @@ #ifndef _ZCACHE_RAMSTER_H_ #define _ZCACHE_RAMSTER_H_ +#ifdef CONFIG_RAMSTER_MODULE +#define CONFIG_RAMSTER +#endif + #ifdef CONFIG_RAMSTER #include "ramster/ramster.h" #else -static inline void ramster_init(bool x, bool y, bool z) +static inline void ramster_init(bool x, bool y, bool z, bool w) { } diff --git a/drivers/staging/zcache/ramster/debug.c b/drivers/staging/zcache/ramster/debug.c index bf34133cc631..327e4f0d98e1 100644 --- a/drivers/staging/zcache/ramster/debug.c +++ b/drivers/staging/zcache/ramster/debug.c @@ -43,7 +43,7 @@ static struct debug_entry { }; #undef ATTR -int __init ramster_debugfs_init(void) +int ramster_debugfs_init(void) { int i; struct dentry *root = debugfs_create_dir("ramster", NULL); diff --git a/drivers/staging/zcache/ramster/nodemanager.c b/drivers/staging/zcache/ramster/nodemanager.c index c0f48158735d..2cfe93342c0d 100644 --- a/drivers/staging/zcache/ramster/nodemanager.c +++ b/drivers/staging/zcache/ramster/nodemanager.c @@ -949,7 +949,7 @@ static void __exit exit_r2nm(void) r2hb_exit(); } -static int __init init_r2nm(void) +int r2nm_init(void) { int ret = -1; @@ -986,10 +986,11 @@ out_r2hb: out: return ret; } +EXPORT_SYMBOL_GPL(r2nm_init); MODULE_AUTHOR("Oracle"); MODULE_LICENSE("GPL"); -/* module_init(init_r2nm) */ -late_initcall(init_r2nm); -/* module_exit(exit_r2nm) */ +#ifndef CONFIG_RAMSTER_MODULE +late_initcall(r2nm_init); +#endif diff --git a/drivers/staging/zcache/ramster/ramster.c b/drivers/staging/zcache/ramster/ramster.c index 87816279ce3c..b18b887db79f 100644 --- a/drivers/staging/zcache/ramster/ramster.c +++ b/drivers/staging/zcache/ramster/ramster.c @@ -121,6 +121,7 @@ int ramster_do_preload_flnode(struct tmem_pool *pool) kmem_cache_free(ramster_flnode_cache, flnode); return ret; } +EXPORT_SYMBOL_GPL(ramster_do_preload_flnode); /* * Called by the message handler after a (still compressed) page has been @@ -388,6 +389,7 @@ void *ramster_pampd_free(void *pampd, struct tmem_pool *pool, } return local_pampd; } +EXPORT_SYMBOL_GPL(ramster_pampd_free); void ramster_count_foreign_pages(bool eph, int count) { @@ -408,6 +410,7 @@ void ramster_count_foreign_pages(bool eph, int count) } } } +EXPORT_SYMBOL_GPL(ramster_count_foreign_pages); /* * For now, just push over a few pages every few seconds to @@ -593,7 +596,7 @@ requeue: ramster_remotify_queue_delayed_work(HZ); } -void __init ramster_remotify_init(void) +void ramster_remotify_init(void) { unsigned long n = 60UL; ramster_remotify_workqueue = @@ -768,8 +771,10 @@ static bool frontswap_selfshrinking __read_mostly; static void selfshrink_process(struct work_struct *work); static DECLARE_DELAYED_WORK(selfshrink_worker, selfshrink_process); +#ifndef CONFIG_RAMSTER_MODULE /* Enable/disable with kernel boot option. */ -static bool use_frontswap_selfshrink __initdata = true; +static bool use_frontswap_selfshrink = true; +#endif /* * The default values for the following parameters were deemed reasonable @@ -824,6 +829,7 @@ static void frontswap_selfshrink(void) frontswap_shrink(tgt_frontswap_pages); } +#ifndef CONFIG_RAMSTER_MODULE static int __init ramster_nofrontswap_selfshrink_setup(char *s) { use_frontswap_selfshrink = false; @@ -831,6 +837,7 @@ static int __init ramster_nofrontswap_selfshrink_setup(char *s) } __setup("noselfshrink", ramster_nofrontswap_selfshrink_setup); +#endif static void selfshrink_process(struct work_struct *work) { @@ -849,6 +856,7 @@ void ramster_cpu_up(int cpu) per_cpu(ramster_remoteputmem1, cpu) = p1; per_cpu(ramster_remoteputmem2, cpu) = p2; } +EXPORT_SYMBOL_GPL(ramster_cpu_up); void ramster_cpu_down(int cpu) { @@ -864,6 +872,7 @@ void ramster_cpu_down(int cpu) kp->flnode = NULL; } } +EXPORT_SYMBOL_GPL(ramster_cpu_down); void ramster_register_pamops(struct tmem_pamops *pamops) { @@ -874,9 +883,11 @@ void ramster_register_pamops(struct tmem_pamops *pamops) pamops->repatriate = ramster_pampd_repatriate; pamops->repatriate_preload = ramster_pampd_repatriate_preload; } +EXPORT_SYMBOL_GPL(ramster_register_pamops); -void __init ramster_init(bool cleancache, bool frontswap, - bool frontswap_exclusive_gets) +void ramster_init(bool cleancache, bool frontswap, + bool frontswap_exclusive_gets, + bool frontswap_selfshrink) { int ret = 0; @@ -891,10 +902,17 @@ void __init ramster_init(bool cleancache, bool frontswap, if (ret) pr_err("ramster: can't create sysfs for ramster\n"); (void)r2net_register_handlers(); +#ifdef CONFIG_RAMSTER_MODULE + ret = r2nm_init(); + if (ret) + pr_err("ramster: can't init r2net\n"); + frontswap_selfshrinking = frontswap_selfshrink; +#else + frontswap_selfshrinking = use_frontswap_selfshrink; +#endif INIT_LIST_HEAD(&ramster_rem_op_list); ramster_flnode_cache = kmem_cache_create("ramster_flnode", sizeof(struct flushlist_node), 0, 0, NULL); - frontswap_selfshrinking = use_frontswap_selfshrink; if (frontswap_selfshrinking) { pr_info("ramster: Initializing frontswap selfshrink driver.\n"); schedule_delayed_work(&selfshrink_worker, @@ -902,3 +920,4 @@ void __init ramster_init(bool cleancache, bool frontswap, } ramster_remotify_init(); } +EXPORT_SYMBOL_GPL(ramster_init); diff --git a/drivers/staging/zcache/ramster/ramster.h b/drivers/staging/zcache/ramster/ramster.h index 12ae56f09ca4..6d41a7a772e3 100644 --- a/drivers/staging/zcache/ramster/ramster.h +++ b/drivers/staging/zcache/ramster/ramster.h @@ -147,7 +147,7 @@ extern int r2net_register_handlers(void); extern int r2net_remote_target_node_set(int); extern int ramster_remotify_pageframe(bool); -extern void ramster_init(bool, bool, bool); +extern void ramster_init(bool, bool, bool, bool); extern void ramster_register_pamops(struct tmem_pamops *); extern int ramster_localify(int, struct tmem_oid *oidp, uint32_t, char *, unsigned int, void *); diff --git a/drivers/staging/zcache/ramster/ramster_nodemanager.h b/drivers/staging/zcache/ramster/ramster_nodemanager.h index 49f879d943ab..dbaae34ea613 100644 --- a/drivers/staging/zcache/ramster/ramster_nodemanager.h +++ b/drivers/staging/zcache/ramster/ramster_nodemanager.h @@ -36,4 +36,6 @@ /* host name, group name, cluster name all 64 bytes */ #define R2NM_MAX_NAME_LEN 64 /* __NEW_UTS_LEN */ +extern int r2nm_init(void); + #endif /* _RAMSTER_NODEMANAGER_H */ diff --git a/drivers/staging/zcache/tmem.c b/drivers/staging/zcache/tmem.c index a2b7e03b6062..d7e51e4152eb 100644 --- a/drivers/staging/zcache/tmem.c +++ b/drivers/staging/zcache/tmem.c @@ -35,7 +35,8 @@ #include <linux/list.h> #include <linux/spinlock.h> #include <linux/atomic.h> -#ifdef CONFIG_RAMSTER +#include <linux/export.h> +#if defined(CONFIG_RAMSTER) || defined(CONFIG_RAMSTER_MODULE) #include <linux/delay.h> #endif @@ -641,6 +642,7 @@ void *tmem_localify_get_pampd(struct tmem_pool *pool, struct tmem_oid *oidp, /* note, hashbucket remains locked */ return pampd; } +EXPORT_SYMBOL_GPL(tmem_localify_get_pampd); void tmem_localify_finish(struct tmem_obj *obj, uint32_t index, void *pampd, void *saved_hb, bool delete) @@ -658,6 +660,7 @@ void tmem_localify_finish(struct tmem_obj *obj, uint32_t index, } spin_unlock(&hb->lock); } +EXPORT_SYMBOL_GPL(tmem_localify_finish); /* * For ramster only. Helper function to support asynchronous tmem_get. @@ -719,6 +722,7 @@ out: spin_unlock(&hb->lock); return ret; } +EXPORT_SYMBOL_GPL(tmem_replace); #endif /* diff --git a/drivers/staging/zcache/tmem.h b/drivers/staging/zcache/tmem.h index adbe5a8f28aa..d128ce290f1f 100644 --- a/drivers/staging/zcache/tmem.h +++ b/drivers/staging/zcache/tmem.h @@ -126,7 +126,7 @@ static inline unsigned tmem_oid_hash(struct tmem_oid *oidp) TMEM_HASH_BUCKET_BITS); } -#ifdef CONFIG_RAMSTER +#if defined(CONFIG_RAMSTER) || defined(CONFIG_RAMSTER_MODULE) struct tmem_xhandle { uint8_t client_id; uint8_t xh_data_cksum; @@ -171,7 +171,7 @@ struct tmem_obj { unsigned int objnode_tree_height; unsigned long objnode_count; long pampd_count; -#ifdef CONFIG_RAMSTER +#if defined(CONFIG_RAMSTER) || defined(CONFIG_RAMSTER_MODULE) /* * for current design of ramster, all pages belonging to * an object reside on the same remotenode and extra is @@ -215,7 +215,7 @@ struct tmem_pamops { uint32_t); void (*free)(void *, struct tmem_pool *, struct tmem_oid *, uint32_t, bool); -#ifdef CONFIG_RAMSTER +#if defined(CONFIG_RAMSTER) || defined(CONFIG_RAMSTER_MODULE) void (*new_obj)(struct tmem_obj *); void (*free_obj)(struct tmem_pool *, struct tmem_obj *, bool); void *(*repatriate_preload)(void *, struct tmem_pool *, @@ -247,7 +247,7 @@ extern int tmem_flush_page(struct tmem_pool *, struct tmem_oid *, extern int tmem_flush_object(struct tmem_pool *, struct tmem_oid *); extern int tmem_destroy_pool(struct tmem_pool *); extern void tmem_new_pool(struct tmem_pool *, uint32_t); -#ifdef CONFIG_RAMSTER +#if defined(CONFIG_RAMSTER) || defined(CONFIG_RAMSTER_MODULE) extern int tmem_replace(struct tmem_pool *, struct tmem_oid *, uint32_t index, void *); extern void *tmem_localify_get_pampd(struct tmem_pool *, struct tmem_oid *, diff --git a/drivers/staging/zcache/zcache-main.c b/drivers/staging/zcache/zcache-main.c index e23d814b5392..522cb8e55142 100644 --- a/drivers/staging/zcache/zcache-main.c +++ b/drivers/staging/zcache/zcache-main.c @@ -37,8 +37,10 @@ #include "debug.h" #ifdef CONFIG_RAMSTER static bool ramster_enabled __read_mostly; +static int disable_frontswap_selfshrink; #else #define ramster_enabled false +#define disable_frontswap_selfshrink 0 #endif #ifndef __PG_WAS_ACTIVE @@ -81,8 +83,12 @@ static char *namestr __read_mostly = "zcache"; (__GFP_FS | __GFP_NORETRY | __GFP_NOWARN | __GFP_NOMEMALLOC) /* crypto API for zcache */ +#ifdef CONFIG_ZCACHE_MODULE +static char *zcache_comp_name = "lzo"; +#else #define ZCACHE_COMP_NAME_SZ CRYPTO_MAX_ALG_NAME static char zcache_comp_name[ZCACHE_COMP_NAME_SZ] __read_mostly; +#endif static struct crypto_comp * __percpu *zcache_comp_pcpu_tfms __read_mostly; enum comp_op { @@ -1576,9 +1582,9 @@ static struct cleancache_ops zcache_cleancache_ops = { .init_fs = zcache_cleancache_init_fs }; -struct cleancache_ops zcache_cleancache_register_ops(void) +struct cleancache_ops *zcache_cleancache_register_ops(void) { - struct cleancache_ops old_ops = + struct cleancache_ops *old_ops = cleancache_register_ops(&zcache_cleancache_ops); return old_ops; @@ -1707,9 +1713,9 @@ static struct frontswap_ops zcache_frontswap_ops = { .init = zcache_frontswap_init }; -struct frontswap_ops zcache_frontswap_register_ops(void) +struct frontswap_ops *zcache_frontswap_register_ops(void) { - struct frontswap_ops old_ops = + struct frontswap_ops *old_ops = frontswap_register_ops(&zcache_frontswap_ops); return old_ops; @@ -1721,6 +1727,7 @@ struct frontswap_ops zcache_frontswap_register_ops(void) * OR NOTHING HAPPENS! */ +#ifndef CONFIG_ZCACHE_MODULE static int __init enable_zcache(char *s) { zcache_enabled = true; @@ -1787,18 +1794,27 @@ static int __init enable_zcache_compressor(char *s) return 1; } __setup("zcache=", enable_zcache_compressor); +#endif -static int __init zcache_comp_init(void) +static int zcache_comp_init(void) { int ret = 0; /* check crypto algorithm */ +#ifdef CONFIG_ZCACHE_MODULE + ret = crypto_has_comp(zcache_comp_name, 0, 0); + if (!ret) { + ret = -1; + goto out; + } +#else if (*zcache_comp_name != '\0') { ret = crypto_has_comp(zcache_comp_name, 0, 0); if (!ret) pr_info("zcache: %s not supported\n", zcache_comp_name); + goto out; } if (!ret) strcpy(zcache_comp_name, "lzo"); @@ -1807,6 +1823,7 @@ static int __init zcache_comp_init(void) ret = 1; goto out; } +#endif pr_info("zcache: using %s compressor\n", zcache_comp_name); /* alloc percpu transforms */ @@ -1818,10 +1835,13 @@ out: return ret; } -static int __init zcache_init(void) +static int zcache_init(void) { int ret = 0; +#ifdef CONFIG_ZCACHE_MODULE + zcache_enabled = 1; +#endif if (ramster_enabled) { namestr = "ramster"; ramster_register_pamops(&zcache_pamops); @@ -1860,7 +1880,7 @@ static int __init zcache_init(void) } zbud_init(); if (zcache_enabled && !disable_cleancache) { - struct cleancache_ops old_ops; + struct cleancache_ops *old_ops; register_shrinker(&zcache_shrinker); old_ops = zcache_cleancache_register_ops(); @@ -1870,11 +1890,11 @@ static int __init zcache_init(void) pr_info("%s: cleancache: ignorenonactive = %d\n", namestr, !disable_cleancache_ignore_nonactive); #endif - if (old_ops.init_fs != NULL) + if (old_ops != NULL) pr_warn("%s: cleancache_ops overridden\n", namestr); } if (zcache_enabled && !disable_frontswap) { - struct frontswap_ops old_ops; + struct frontswap_ops *old_ops; old_ops = zcache_frontswap_register_ops(); if (frontswap_has_exclusive_gets) @@ -1886,14 +1906,36 @@ static int __init zcache_init(void) namestr, frontswap_has_exclusive_gets, !disable_frontswap_ignore_nonactive); #endif - if (old_ops.init != NULL) + if (IS_ERR(old_ops) || old_ops) { + if (IS_ERR(old_ops)) + return PTR_RET(old_ops); pr_warn("%s: frontswap_ops overridden\n", namestr); + } } if (ramster_enabled) ramster_init(!disable_cleancache, !disable_frontswap, - frontswap_has_exclusive_gets); + frontswap_has_exclusive_gets, + !disable_frontswap_selfshrink); out: return ret; } +#ifdef CONFIG_ZCACHE_MODULE +#ifdef CONFIG_RAMSTER +module_param(ramster_enabled, int, S_IRUGO); +module_param(disable_frontswap_selfshrink, int, S_IRUGO); +#endif +module_param(disable_cleancache, int, S_IRUGO); +module_param(disable_frontswap, int, S_IRUGO); +#ifdef FRONTSWAP_HAS_EXCLUSIVE_GETS +module_param(frontswap_has_exclusive_gets, bool, S_IRUGO); +#endif +module_param(disable_frontswap_ignore_nonactive, int, S_IRUGO); +module_param(zcache_comp_name, charp, S_IRUGO); +module_init(zcache_init); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Dan Magenheimer <dan.magenheimer@oracle.com>"); +MODULE_DESCRIPTION("In-kernel compression of cleancache/frontswap pages"); +#else late_initcall(zcache_init); +#endif diff --git a/drivers/staging/zcache/zcache.h b/drivers/staging/zcache/zcache.h index 81722b33b087..849120095e79 100644 --- a/drivers/staging/zcache/zcache.h +++ b/drivers/staging/zcache/zcache.h @@ -39,7 +39,7 @@ extern int zcache_flush_page(int, int, struct tmem_oid *, uint32_t); extern int zcache_flush_object(int, int, struct tmem_oid *); extern void zcache_decompress_to_page(char *, unsigned int, struct page *); -#ifdef CONFIG_RAMSTER +#if defined(CONFIG_RAMSTER) || defined(CONFIG_RAMSTER_MODULE) extern void *zcache_pampd_create(char *, unsigned int, bool, int, struct tmem_handle *); int zcache_autocreate_pool(unsigned int cli_id, unsigned int pool_id, bool eph); diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 8bcc514ec8b6..c2e5ca9b8303 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -271,7 +271,8 @@ static void iblock_complete_cmd(struct se_cmd *cmd) kfree(ibr); } -static void iblock_bio_done(struct bio *bio, int err) +static void iblock_bio_done(struct bio *bio, int err, + struct batch_complete *batch) { struct se_cmd *cmd = bio->bi_private; struct iblock_req *ibr = cmd->priv; @@ -335,7 +336,8 @@ static void iblock_submit_bios(struct bio_list *list, int rw) blk_finish_plug(&plug); } -static void iblock_end_io_flush(struct bio *bio, int err) +static void iblock_end_io_flush(struct bio *bio, int err, + struct batch_complete *batch) { struct se_cmd *cmd = bio->bi_private; diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index e992b27aa090..1e9873179860 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -835,7 +835,8 @@ static ssize_t pscsi_show_configfs_dev_params(struct se_device *dev, char *b) return bl; } -static void pscsi_bi_endio(struct bio *bio, int error) +static void pscsi_bi_endio(struct bio *bio, int error, + struct batch_complete *batch) { bio_put(bio); } diff --git a/drivers/usb/gadget/amd5536udc.c b/drivers/usb/gadget/amd5536udc.c index f52dcfe8f545..24bd363ca351 100644 --- a/drivers/usb/gadget/amd5536udc.c +++ b/drivers/usb/gadget/amd5536udc.c @@ -3099,7 +3099,7 @@ static int init_dma_pools(struct udc *dev) } /* DMA setup */ - dev->data_requests = dma_pool_create("data_requests", NULL, + dev->data_requests = dma_pool_create("data_requests", &dev->pdev->dev, sizeof(struct udc_data_dma), 0, 0); if (!dev->data_requests) { DBG(dev, "can't get request data pool\n"); @@ -3111,7 +3111,7 @@ static int init_dma_pools(struct udc *dev) dev->ep[UDC_EP0IN_IX].dma = &dev->regs->ctl; /* dma desc for setup data */ - dev->stp_requests = dma_pool_create("setup requests", NULL, + dev->stp_requests = dma_pool_create("setup requests", &dev->pdev->dev, sizeof(struct udc_stp_dma), 0, 0); if (!dev->stp_requests) { DBG(dev, "can't get stp request pool\n"); diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c index dda0dc4a5567..570c005062ab 100644 --- a/drivers/usb/gadget/inode.c +++ b/drivers/usb/gadget/inode.c @@ -24,6 +24,8 @@ #include <linux/sched.h> #include <linux/slab.h> #include <linux/poll.h> +#include <linux/mmu_context.h> +#include <linux/aio.h> #include <linux/device.h> #include <linux/moduleparam.h> @@ -513,6 +515,9 @@ static long ep_ioctl(struct file *fd, unsigned code, unsigned long value) struct kiocb_priv { struct usb_request *req; struct ep_data *epdata; + struct kiocb *iocb; + struct mm_struct *mm; + struct work_struct work; void *buf; const struct iovec *iv; unsigned long nr_segs; @@ -528,7 +533,6 @@ static int ep_aio_cancel(struct kiocb *iocb, struct io_event *e) local_irq_disable(); epdata = priv->epdata; // spin_lock(&epdata->dev->lock); - kiocbSetCancelled(iocb); if (likely(epdata && epdata->ep && priv->req)) value = usb_ep_dequeue (epdata->ep, priv->req); else @@ -540,15 +544,12 @@ static int ep_aio_cancel(struct kiocb *iocb, struct io_event *e) return value; } -static ssize_t ep_aio_read_retry(struct kiocb *iocb) +static ssize_t ep_copy_to_user(struct kiocb_priv *priv) { - struct kiocb_priv *priv = iocb->private; ssize_t len, total; void *to_copy; int i; - /* we "retry" to get the right mm context for this: */ - /* copy stuff into user buffers */ total = priv->actual; len = 0; @@ -568,9 +569,26 @@ static ssize_t ep_aio_read_retry(struct kiocb *iocb) if (total == 0) break; } + + return len; +} + +static void ep_user_copy_worker(struct work_struct *work) +{ + struct kiocb_priv *priv = container_of(work, struct kiocb_priv, work); + struct mm_struct *mm = priv->mm; + struct kiocb *iocb = priv->iocb; + size_t ret; + + use_mm(mm); + ret = ep_copy_to_user(priv); + unuse_mm(mm); + + /* completing the iocb can drop the ctx and mm, don't touch mm after */ + aio_complete(iocb, ret, ret); + kfree(priv->buf); kfree(priv); - return len; } static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req) @@ -596,14 +614,14 @@ static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req) aio_complete(iocb, req->actual ? req->actual : req->status, req->status); } else { - /* retry() won't report both; so we hide some faults */ + /* ep_copy_to_user() won't report both; we hide some faults */ if (unlikely(0 != req->status)) DBG(epdata->dev, "%s fault %d len %d\n", ep->name, req->status, req->actual); priv->buf = req->buf; priv->actual = req->actual; - kick_iocb(iocb); + schedule_work(&priv->work); } spin_unlock(&epdata->dev->lock); @@ -633,8 +651,10 @@ fail: return value; } iocb->private = priv; + priv->iocb = iocb; priv->iv = iv; priv->nr_segs = nr_segs; + INIT_WORK(&priv->work, ep_user_copy_worker); value = get_ready_ep(iocb->ki_filp->f_flags, epdata); if (unlikely(value < 0)) { @@ -642,10 +662,11 @@ fail: goto fail; } - iocb->ki_cancel = ep_aio_cancel; + kiocb_set_cancel_fn(iocb, ep_aio_cancel); get_ep(epdata); priv->epdata = epdata; priv->actual = 0; + priv->mm = current->mm; /* mm teardown waits for iocbs in exit_aio() */ /* each kiocb is coupled to one usb_request, but we can't * allocate or submit those if the host disconnected. @@ -674,7 +695,7 @@ fail: kfree(priv); put_ep(epdata); } else - value = (iv ? -EIOCBRETRY : -EIOCBQUEUED); + value = -EIOCBQUEUED; return value; } @@ -692,7 +713,6 @@ ep_aio_read(struct kiocb *iocb, const struct iovec *iov, if (unlikely(!buf)) return -ENOMEM; - iocb->ki_retry = ep_aio_read_retry; return ep_aio_rwtail(iocb, buf, iocb->ki_left, epdata, iov, nr_segs); } diff --git a/drivers/uwb/rsv.c b/drivers/uwb/rsv.c index 0b0d8bce842e..f4ae05f78c42 100644 --- a/drivers/uwb/rsv.c +++ b/drivers/uwb/rsv.c @@ -231,7 +231,7 @@ void uwb_rsv_backoff_win_increment(struct uwb_rc *rc) return; bow->window <<= 1; - bow->n = random32() & (bow->window - 1); + bow->n = prandom_u32() & (bow->window - 1); dev_dbg(dev, "new_window=%d, n=%d\n: ", bow->window, bow->n); /* reset the timer associated variables */ @@ -557,7 +557,7 @@ int uwb_rsv_establish(struct uwb_rsv *rsv) if (ret) goto out; - rsv->tiebreaker = random32() & 1; + rsv->tiebreaker = prandom_u32() & 1; /* get available mas bitmap */ uwb_drp_available(rc, &available); diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig index fc0d3154f18b..f37d8ce8cc18 100644 --- a/drivers/video/Kconfig +++ b/drivers/video/Kconfig @@ -2442,6 +2442,32 @@ config FB_PUV3_UNIGFX Choose this option if you want to use the Unigfx device as a framebuffer device. Without the support of PCI & AGP. +config FB_HYPERV + tristate "Microsoft Hyper-V Synthetic Video support" + depends on FB && HYPERV + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + This framebuffer driver supports Microsoft Hyper-V Synthetic Video. + +config FB_SIMPLE + bool "Simple framebuffer support" + depends on (FB = y) && OF + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + Say Y if you want support for a simple frame-buffer. + + This driver assumes that the display hardware has been initialized + before the kernel boots, and the kernel will simply render to the + pre-allocated frame buffer surface. + + Configuration re: surface address, size, and format must be provided + through device tree, or potentially plain old platform data in the + future. + source "drivers/video/omap/Kconfig" source "drivers/video/omap2/Kconfig" source "drivers/video/exynos/Kconfig" diff --git a/drivers/video/Makefile b/drivers/video/Makefile index e414378d6a51..e8bae8dd4804 100644 --- a/drivers/video/Makefile +++ b/drivers/video/Makefile @@ -149,6 +149,7 @@ obj-$(CONFIG_FB_MSM) += msm/ obj-$(CONFIG_FB_NUC900) += nuc900fb.o obj-$(CONFIG_FB_JZ4740) += jz4740_fb.o obj-$(CONFIG_FB_PUV3_UNIGFX) += fb-puv3.o +obj-$(CONFIG_FB_HYPERV) += hyperv_fb.o # Platform or fallback drivers go here obj-$(CONFIG_FB_UVESA) += uvesafb.o @@ -165,6 +166,7 @@ obj-$(CONFIG_FB_MX3) += mx3fb.o obj-$(CONFIG_FB_DA8XX) += da8xx-fb.o obj-$(CONFIG_FB_MXS) += mxsfb.o obj-$(CONFIG_FB_SSD1307) += ssd1307fb.o +obj-$(CONFIG_FB_SIMPLE) += simplefb.o # the test framebuffer is last obj-$(CONFIG_FB_VIRTUAL) += vfb.o diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig index b83d1557d421..d5ab6583f440 100644 --- a/drivers/video/backlight/Kconfig +++ b/drivers/video/backlight/Kconfig @@ -59,6 +59,13 @@ config LCD_LTV350QV The LTV350QV panel is present on all ATSTK1000 boards. +config LCD_ILI922X + tristate "ILI Technology ILI9221/ILI9222 support" + depends on SPI + help + If you have a panel based on the ILI9221/9222 controller + chip then say y to include a driver for it. + config LCD_ILI9320 tristate "ILI Technology ILI9320 controller support" depends on SPI @@ -161,7 +168,7 @@ if BACKLIGHT_CLASS_DEVICE config BACKLIGHT_ATMEL_LCDC bool "Atmel LCDC Contrast-as-Backlight control" depends on FB_ATMEL - default y if MACH_SAM9261EK || MACH_SAM9G10EK || MACH_SAM9263EK + default y if MACH_AT91SAM9261EK || MACH_AT91SAM9G10EK || MACH_AT91SAM9263EK help This provides a backlight control internal to the Atmel LCDC driver. If the LCD "contrast control" on your board is wired diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile index 96c4d620c5ce..92711fe60464 100644 --- a/drivers/video/backlight/Makefile +++ b/drivers/video/backlight/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_LCD_CLASS_DEVICE) += lcd.o obj-$(CONFIG_LCD_CORGI) += corgi_lcd.o obj-$(CONFIG_LCD_HP700) += jornada720_lcd.o obj-$(CONFIG_LCD_HX8357) += hx8357.o +obj-$(CONFIG_LCD_ILI922X) += ili922x.o obj-$(CONFIG_LCD_ILI9320) += ili9320.o obj-$(CONFIG_LCD_L4F00242T03) += l4f00242t03.o obj-$(CONFIG_LCD_LD9040) += ld9040.o diff --git a/drivers/video/backlight/adp5520_bl.c b/drivers/video/backlight/adp5520_bl.c index a1e41d4faa71..c84701b7ca6e 100644 --- a/drivers/video/backlight/adp5520_bl.c +++ b/drivers/video/backlight/adp5520_bl.c @@ -143,13 +143,16 @@ static int adp5520_bl_setup(struct backlight_device *bl) static ssize_t adp5520_show(struct device *dev, char *buf, int reg) { struct adp5520_bl *data = dev_get_drvdata(dev); - int error; + int ret; uint8_t reg_val; mutex_lock(&data->lock); - error = adp5520_read(data->master, reg, ®_val); + ret = adp5520_read(data->master, reg, ®_val); mutex_unlock(&data->lock); + if (ret < 0) + return ret; + return sprintf(buf, "%u\n", reg_val); } @@ -349,35 +352,34 @@ static int adp5520_bl_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM -static int adp5520_bl_suspend(struct platform_device *pdev, - pm_message_t state) +#ifdef CONFIG_PM_SLEEP +static int adp5520_bl_suspend(struct device *dev) { - struct backlight_device *bl = platform_get_drvdata(pdev); + struct backlight_device *bl = dev_get_drvdata(dev); + return adp5520_bl_set(bl, 0); } -static int adp5520_bl_resume(struct platform_device *pdev) +static int adp5520_bl_resume(struct device *dev) { - struct backlight_device *bl = platform_get_drvdata(pdev); + struct backlight_device *bl = dev_get_drvdata(dev); backlight_update_status(bl); return 0; } -#else -#define adp5520_bl_suspend NULL -#define adp5520_bl_resume NULL #endif +static SIMPLE_DEV_PM_OPS(adp5520_bl_pm_ops, adp5520_bl_suspend, + adp5520_bl_resume); + static struct platform_driver adp5520_bl_driver = { .driver = { .name = "adp5520-backlight", .owner = THIS_MODULE, + .pm = &adp5520_bl_pm_ops, }, .probe = adp5520_bl_probe, .remove = adp5520_bl_remove, - .suspend = adp5520_bl_suspend, - .resume = adp5520_bl_resume, }; module_platform_driver(adp5520_bl_driver); diff --git a/drivers/video/backlight/adp8860_bl.c b/drivers/video/backlight/adp8860_bl.c index a77c9cad3320..75b10f876127 100644 --- a/drivers/video/backlight/adp8860_bl.c +++ b/drivers/video/backlight/adp8860_bl.c @@ -249,12 +249,14 @@ static int adp8860_led_probe(struct i2c_client *client) if (led_dat->id > 7 || led_dat->id < 1) { dev_err(&client->dev, "Invalid LED ID %d\n", led_dat->id); + ret = -EINVAL; goto err; } if (pdata->bl_led_assign & (1 << (led_dat->id - 1))) { dev_err(&client->dev, "LED %d used by Backlight\n", led_dat->id); + ret = -EBUSY; goto err; } @@ -773,25 +775,29 @@ static int adp8860_remove(struct i2c_client *client) return 0; } -#ifdef CONFIG_PM -static int adp8860_i2c_suspend(struct i2c_client *client, pm_message_t message) +#ifdef CONFIG_PM_SLEEP +static int adp8860_i2c_suspend(struct device *dev) { + struct i2c_client *client = to_i2c_client(dev); + adp8860_clr_bits(client, ADP8860_MDCR, NSTBY); return 0; } -static int adp8860_i2c_resume(struct i2c_client *client) +static int adp8860_i2c_resume(struct device *dev) { + struct i2c_client *client = to_i2c_client(dev); + adp8860_set_bits(client, ADP8860_MDCR, NSTBY | BLEN); return 0; } -#else -#define adp8860_i2c_suspend NULL -#define adp8860_i2c_resume NULL #endif +static SIMPLE_DEV_PM_OPS(adp8860_i2c_pm_ops, adp8860_i2c_suspend, + adp8860_i2c_resume); + static const struct i2c_device_id adp8860_id[] = { { "adp8860", adp8860 }, { "adp8861", adp8861 }, @@ -802,12 +808,11 @@ MODULE_DEVICE_TABLE(i2c, adp8860_id); static struct i2c_driver adp8860_driver = { .driver = { - .name = KBUILD_MODNAME, + .name = KBUILD_MODNAME, + .pm = &adp8860_i2c_pm_ops, }, .probe = adp8860_probe, .remove = adp8860_remove, - .suspend = adp8860_i2c_suspend, - .resume = adp8860_i2c_resume, .id_table = adp8860_id, }; diff --git a/drivers/video/backlight/adp8870_bl.c b/drivers/video/backlight/adp8870_bl.c index 712c25a0d8fe..90049d7b5c60 100644 --- a/drivers/video/backlight/adp8870_bl.c +++ b/drivers/video/backlight/adp8870_bl.c @@ -274,12 +274,14 @@ static int adp8870_led_probe(struct i2c_client *client) if (led_dat->id > 7 || led_dat->id < 1) { dev_err(&client->dev, "Invalid LED ID %d\n", led_dat->id); + ret = -EINVAL; goto err; } if (pdata->bl_led_assign & (1 << (led_dat->id - 1))) { dev_err(&client->dev, "LED %d used by Backlight\n", led_dat->id); + ret = -EBUSY; goto err; } @@ -895,13 +897,13 @@ static int adp8870_probe(struct i2c_client *client, data->bl = bl; - if (pdata->en_ambl_sens) + if (pdata->en_ambl_sens) { ret = sysfs_create_group(&bl->dev.kobj, &adp8870_bl_attr_group); - - if (ret) { - dev_err(&client->dev, "failed to register sysfs\n"); - goto out1; + if (ret) { + dev_err(&client->dev, "failed to register sysfs\n"); + goto out1; + } } ret = adp8870_bl_setup(bl); @@ -947,25 +949,29 @@ static int adp8870_remove(struct i2c_client *client) return 0; } -#ifdef CONFIG_PM -static int adp8870_i2c_suspend(struct i2c_client *client, pm_message_t message) +#ifdef CONFIG_PM_SLEEP +static int adp8870_i2c_suspend(struct device *dev) { + struct i2c_client *client = to_i2c_client(dev); + adp8870_clr_bits(client, ADP8870_MDCR, NSTBY); return 0; } -static int adp8870_i2c_resume(struct i2c_client *client) +static int adp8870_i2c_resume(struct device *dev) { + struct i2c_client *client = to_i2c_client(dev); + adp8870_set_bits(client, ADP8870_MDCR, NSTBY | BLEN); return 0; } -#else -#define adp8870_i2c_suspend NULL -#define adp8870_i2c_resume NULL #endif +static SIMPLE_DEV_PM_OPS(adp8870_i2c_pm_ops, adp8870_i2c_suspend, + adp8870_i2c_resume); + static const struct i2c_device_id adp8870_id[] = { { "adp8870", 0 }, { } @@ -974,12 +980,11 @@ MODULE_DEVICE_TABLE(i2c, adp8870_id); static struct i2c_driver adp8870_driver = { .driver = { - .name = KBUILD_MODNAME, + .name = KBUILD_MODNAME, + .pm = &adp8870_i2c_pm_ops, }, .probe = adp8870_probe, .remove = adp8870_remove, - .suspend = adp8870_i2c_suspend, - .resume = adp8870_i2c_resume, .id_table = adp8870_id, }; diff --git a/drivers/video/backlight/ams369fg06.c b/drivers/video/backlight/ams369fg06.c index c02aa2c2575a..319fef6cb422 100644 --- a/drivers/video/backlight/ams369fg06.c +++ b/drivers/video/backlight/ams369fg06.c @@ -533,12 +533,12 @@ static int ams369fg06_remove(struct spi_device *spi) return 0; } -#if defined(CONFIG_PM) -static int ams369fg06_suspend(struct spi_device *spi, pm_message_t mesg) +#ifdef CONFIG_PM_SLEEP +static int ams369fg06_suspend(struct device *dev) { - struct ams369fg06 *lcd = spi_get_drvdata(spi); + struct ams369fg06 *lcd = dev_get_drvdata(dev); - dev_dbg(&spi->dev, "lcd->power = %d\n", lcd->power); + dev_dbg(dev, "lcd->power = %d\n", lcd->power); /* * when lcd panel is suspend, lcd panel becomes off @@ -547,19 +547,19 @@ static int ams369fg06_suspend(struct spi_device *spi, pm_message_t mesg) return ams369fg06_power(lcd, FB_BLANK_POWERDOWN); } -static int ams369fg06_resume(struct spi_device *spi) +static int ams369fg06_resume(struct device *dev) { - struct ams369fg06 *lcd = spi_get_drvdata(spi); + struct ams369fg06 *lcd = dev_get_drvdata(dev); lcd->power = FB_BLANK_POWERDOWN; return ams369fg06_power(lcd, FB_BLANK_UNBLANK); } -#else -#define ams369fg06_suspend NULL -#define ams369fg06_resume NULL #endif +static SIMPLE_DEV_PM_OPS(ams369fg06_pm_ops, ams369fg06_suspend, + ams369fg06_resume); + static void ams369fg06_shutdown(struct spi_device *spi) { struct ams369fg06 *lcd = spi_get_drvdata(spi); @@ -571,12 +571,11 @@ static struct spi_driver ams369fg06_driver = { .driver = { .name = "ams369fg06", .owner = THIS_MODULE, + .pm = &ams369fg06_pm_ops, }, .probe = ams369fg06_probe, .remove = ams369fg06_remove, .shutdown = ams369fg06_shutdown, - .suspend = ams369fg06_suspend, - .resume = ams369fg06_resume, }; module_spi_driver(ams369fg06_driver); diff --git a/drivers/video/backlight/as3711_bl.c b/drivers/video/backlight/as3711_bl.c index 41d52fe52543..123887cd76bd 100644 --- a/drivers/video/backlight/as3711_bl.c +++ b/drivers/video/backlight/as3711_bl.c @@ -258,6 +258,109 @@ static int as3711_bl_register(struct platform_device *pdev, return 0; } +static int as3711_backlight_parse_dt(struct device *dev) +{ + struct as3711_bl_pdata *pdata = dev_get_platdata(dev); + struct device_node *bl = + of_find_node_by_name(dev->parent->of_node, "backlight"), *fb; + int ret; + + if (!bl) { + dev_dbg(dev, "backlight node not found\n"); + return -ENODEV; + } + + fb = of_parse_phandle(bl, "su1-dev", 0); + if (fb) { + pdata->su1_fb = fb->full_name; + + ret = of_property_read_u32(bl, "su1-max-uA", &pdata->su1_max_uA); + if (pdata->su1_max_uA <= 0) + ret = -EINVAL; + if (ret < 0) + return ret; + } + + fb = of_parse_phandle(bl, "su2-dev", 0); + if (fb) { + int count = 0; + + pdata->su2_fb = fb->full_name; + + ret = of_property_read_u32(bl, "su2-max-uA", &pdata->su2_max_uA); + if (pdata->su2_max_uA <= 0) + ret = -EINVAL; + if (ret < 0) + return ret; + + if (of_find_property(bl, "su2-feedback-voltage", NULL)) { + pdata->su2_feedback = AS3711_SU2_VOLTAGE; + count++; + } + if (of_find_property(bl, "su2-feedback-curr1", NULL)) { + pdata->su2_feedback = AS3711_SU2_CURR1; + count++; + } + if (of_find_property(bl, "su2-feedback-curr2", NULL)) { + pdata->su2_feedback = AS3711_SU2_CURR2; + count++; + } + if (of_find_property(bl, "su2-feedback-curr3", NULL)) { + pdata->su2_feedback = AS3711_SU2_CURR3; + count++; + } + if (of_find_property(bl, "su2-feedback-curr-auto", NULL)) { + pdata->su2_feedback = AS3711_SU2_CURR_AUTO; + count++; + } + if (count != 1) + return -EINVAL; + + count = 0; + if (of_find_property(bl, "su2-fbprot-lx-sd4", NULL)) { + pdata->su2_fbprot = AS3711_SU2_LX_SD4; + count++; + } + if (of_find_property(bl, "su2-fbprot-gpio2", NULL)) { + pdata->su2_fbprot = AS3711_SU2_GPIO2; + count++; + } + if (of_find_property(bl, "su2-fbprot-gpio3", NULL)) { + pdata->su2_fbprot = AS3711_SU2_GPIO3; + count++; + } + if (of_find_property(bl, "su2-fbprot-gpio4", NULL)) { + pdata->su2_fbprot = AS3711_SU2_GPIO4; + count++; + } + if (count != 1) + return -EINVAL; + + count = 0; + if (of_find_property(bl, "su2-auto-curr1", NULL)) { + pdata->su2_auto_curr1 = true; + count++; + } + if (of_find_property(bl, "su2-auto-curr2", NULL)) { + pdata->su2_auto_curr2 = true; + count++; + } + if (of_find_property(bl, "su2-auto-curr3", NULL)) { + pdata->su2_auto_curr3 = true; + count++; + } + + /* + * At least one su2-auto-curr* must be specified iff + * AS3711_SU2_CURR_AUTO is used + */ + if (!count ^ (pdata->su2_feedback != AS3711_SU2_CURR_AUTO)) + return -EINVAL; + } + + return 0; +} + static int as3711_backlight_probe(struct platform_device *pdev) { struct as3711_bl_pdata *pdata = dev_get_platdata(&pdev->dev); @@ -267,11 +370,24 @@ static int as3711_backlight_probe(struct platform_device *pdev) unsigned int max_brightness; int ret; - if (!pdata || (!pdata->su1_fb && !pdata->su2_fb)) { + if (!pdata) { dev_err(&pdev->dev, "No platform data, exiting...\n"); return -ENODEV; } + if (pdev->dev.parent->of_node) { + ret = as3711_backlight_parse_dt(&pdev->dev); + if (ret < 0) { + dev_err(&pdev->dev, "DT parsing failed: %d\n", ret); + return ret; + } + } + + if (!pdata->su1_fb && !pdata->su2_fb) { + dev_err(&pdev->dev, "No framebuffer specified\n"); + return -EINVAL; + } + /* * Due to possible hardware damage I chose to block all modes, * unsupported on my hardware. Anyone, wishing to use any of those modes diff --git a/drivers/video/backlight/atmel-pwm-bl.c b/drivers/video/backlight/atmel-pwm-bl.c index de5e5e74e2a7..a60d6afca97c 100644 --- a/drivers/video/backlight/atmel-pwm-bl.c +++ b/drivers/video/backlight/atmel-pwm-bl.c @@ -118,7 +118,7 @@ static const struct backlight_ops atmel_pwm_bl_ops = { .update_status = atmel_pwm_bl_set_intensity, }; -static int atmel_pwm_bl_probe(struct platform_device *pdev) +static int __init atmel_pwm_bl_probe(struct platform_device *pdev) { struct backlight_properties props; const struct atmel_pwm_bl_platform_data *pdata; @@ -225,17 +225,7 @@ static struct platform_driver atmel_pwm_bl_driver = { .remove = __exit_p(atmel_pwm_bl_remove), }; -static int __init atmel_pwm_bl_init(void) -{ - return platform_driver_probe(&atmel_pwm_bl_driver, atmel_pwm_bl_probe); -} -module_init(atmel_pwm_bl_init); - -static void __exit atmel_pwm_bl_exit(void) -{ - platform_driver_unregister(&atmel_pwm_bl_driver); -} -module_exit(atmel_pwm_bl_exit); +module_platform_driver_probe(atmel_pwm_bl_driver, atmel_pwm_bl_probe); MODULE_AUTHOR("Hans-Christian egtvedt <hans-christian.egtvedt@atmel.com>"); MODULE_DESCRIPTION("Atmel PWM backlight driver"); diff --git a/drivers/video/backlight/corgi_lcd.c b/drivers/video/backlight/corgi_lcd.c index aa782f302983..c97867a717a7 100644 --- a/drivers/video/backlight/corgi_lcd.c +++ b/drivers/video/backlight/corgi_lcd.c @@ -457,10 +457,10 @@ static const struct backlight_ops corgi_bl_ops = { .update_status = corgi_bl_update_status, }; -#ifdef CONFIG_PM -static int corgi_lcd_suspend(struct spi_device *spi, pm_message_t state) +#ifdef CONFIG_PM_SLEEP +static int corgi_lcd_suspend(struct device *dev) { - struct corgi_lcd *lcd = spi_get_drvdata(spi); + struct corgi_lcd *lcd = dev_get_drvdata(dev); corgibl_flags |= CORGIBL_SUSPENDED; corgi_bl_set_intensity(lcd, 0); @@ -468,20 +468,19 @@ static int corgi_lcd_suspend(struct spi_device *spi, pm_message_t state) return 0; } -static int corgi_lcd_resume(struct spi_device *spi) +static int corgi_lcd_resume(struct device *dev) { - struct corgi_lcd *lcd = spi_get_drvdata(spi); + struct corgi_lcd *lcd = dev_get_drvdata(dev); corgibl_flags &= ~CORGIBL_SUSPENDED; corgi_lcd_set_power(lcd->lcd_dev, FB_BLANK_UNBLANK); backlight_update_status(lcd->bl_dev); return 0; } -#else -#define corgi_lcd_suspend NULL -#define corgi_lcd_resume NULL #endif +static SIMPLE_DEV_PM_OPS(corgi_lcd_pm_ops, corgi_lcd_suspend, corgi_lcd_resume); + static int setup_gpio_backlight(struct corgi_lcd *lcd, struct corgi_lcd_platform_data *pdata) { @@ -611,11 +610,10 @@ static struct spi_driver corgi_lcd_driver = { .driver = { .name = "corgi-lcd", .owner = THIS_MODULE, + .pm = &corgi_lcd_pm_ops, }, .probe = corgi_lcd_probe, .remove = corgi_lcd_remove, - .suspend = corgi_lcd_suspend, - .resume = corgi_lcd_resume, }; module_spi_driver(corgi_lcd_driver); diff --git a/drivers/video/backlight/da903x_bl.c b/drivers/video/backlight/da903x_bl.c index 8179cef0730f..67cadd30e273 100644 --- a/drivers/video/backlight/da903x_bl.c +++ b/drivers/video/backlight/da903x_bl.c @@ -88,16 +88,21 @@ static int da903x_backlight_update_status(struct backlight_device *bl) if (bl->props.fb_blank != FB_BLANK_UNBLANK) brightness = 0; + if (bl->props.state & BL_CORE_SUSPENDED) + brightness = 0; + return da903x_backlight_set(bl, brightness); } static int da903x_backlight_get_brightness(struct backlight_device *bl) { struct da903x_backlight_data *data = bl_get_data(bl); + return data->current_brightness; } static const struct backlight_ops da903x_backlight_ops = { + .options = BL_CORE_SUSPENDRESUME, .update_status = da903x_backlight_update_status, .get_brightness = da903x_backlight_get_brightness, }; @@ -161,35 +166,10 @@ static int da903x_backlight_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM -static int da903x_backlight_suspend(struct device *dev) -{ - struct backlight_device *bl = dev_get_drvdata(dev); - - return da903x_backlight_set(bl, 0); -} - -static int da903x_backlight_resume(struct device *dev) -{ - struct backlight_device *bl = dev_get_drvdata(dev); - - backlight_update_status(bl); - return 0; -} - -static const struct dev_pm_ops da903x_backlight_pm_ops = { - .suspend = da903x_backlight_suspend, - .resume = da903x_backlight_resume, -}; -#endif - static struct platform_driver da903x_backlight_driver = { .driver = { .name = "da903x-backlight", .owner = THIS_MODULE, -#ifdef CONFIG_PM - .pm = &da903x_backlight_pm_ops, -#endif }, .probe = da903x_backlight_probe, .remove = da903x_backlight_remove, diff --git a/drivers/video/backlight/ep93xx_bl.c b/drivers/video/backlight/ep93xx_bl.c index ef3e21e8f825..33455821dd31 100644 --- a/drivers/video/backlight/ep93xx_bl.c +++ b/drivers/video/backlight/ep93xx_bl.c @@ -60,7 +60,7 @@ static const struct backlight_ops ep93xxbl_ops = { .get_brightness = ep93xxbl_get_brightness, }; -static int __init ep93xxbl_probe(struct platform_device *dev) +static int ep93xxbl_probe(struct platform_device *dev) { struct ep93xxbl *ep93xxbl; struct backlight_device *bl; @@ -115,35 +115,33 @@ static int ep93xxbl_remove(struct platform_device *dev) return 0; } -#ifdef CONFIG_PM -static int ep93xxbl_suspend(struct platform_device *dev, pm_message_t state) +#ifdef CONFIG_PM_SLEEP +static int ep93xxbl_suspend(struct device *dev) { - struct backlight_device *bl = platform_get_drvdata(dev); + struct backlight_device *bl = dev_get_drvdata(dev); return ep93xxbl_set(bl, 0); } -static int ep93xxbl_resume(struct platform_device *dev) +static int ep93xxbl_resume(struct device *dev) { - struct backlight_device *bl = platform_get_drvdata(dev); + struct backlight_device *bl = dev_get_drvdata(dev); backlight_update_status(bl); return 0; } -#else -#define ep93xxbl_suspend NULL -#define ep93xxbl_resume NULL #endif +static SIMPLE_DEV_PM_OPS(ep93xxbl_pm_ops, ep93xxbl_suspend, ep93xxbl_resume); + static struct platform_driver ep93xxbl_driver = { .driver = { .name = "ep93xx-bl", .owner = THIS_MODULE, + .pm = &ep93xxbl_pm_ops, }, .probe = ep93xxbl_probe, .remove = ep93xxbl_remove, - .suspend = ep93xxbl_suspend, - .resume = ep93xxbl_resume, }; module_platform_driver(ep93xxbl_driver); diff --git a/drivers/video/backlight/generic_bl.c b/drivers/video/backlight/generic_bl.c index 0ae155be9c89..19e393b41438 100644 --- a/drivers/video/backlight/generic_bl.c +++ b/drivers/video/backlight/generic_bl.c @@ -9,8 +9,6 @@ * */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> @@ -108,7 +106,7 @@ static int genericbl_probe(struct platform_device *pdev) generic_backlight_device = bd; - pr_info("Generic Backlight Driver Initialized.\n"); + dev_info(&pdev->dev, "Generic Backlight Driver Initialized.\n"); return 0; } @@ -122,7 +120,7 @@ static int genericbl_remove(struct platform_device *pdev) backlight_device_unregister(bd); - pr_info("Generic Backlight Driver Unloaded\n"); + dev_info(&pdev->dev, "Generic Backlight Driver Unloaded\n"); return 0; } diff --git a/drivers/video/backlight/hp680_bl.c b/drivers/video/backlight/hp680_bl.c index 5cefd73526f8..00076ecfe9b8 100644 --- a/drivers/video/backlight/hp680_bl.c +++ b/drivers/video/backlight/hp680_bl.c @@ -64,29 +64,28 @@ static void hp680bl_send_intensity(struct backlight_device *bd) } -#ifdef CONFIG_PM -static int hp680bl_suspend(struct platform_device *pdev, pm_message_t state) +#ifdef CONFIG_PM_SLEEP +static int hp680bl_suspend(struct device *dev) { - struct backlight_device *bd = platform_get_drvdata(pdev); + struct backlight_device *bd = dev_get_drvdata(dev); hp680bl_suspended = 1; hp680bl_send_intensity(bd); return 0; } -static int hp680bl_resume(struct platform_device *pdev) +static int hp680bl_resume(struct device *dev) { - struct backlight_device *bd = platform_get_drvdata(pdev); + struct backlight_device *bd = dev_get_drvdata(dev); hp680bl_suspended = 0; hp680bl_send_intensity(bd); return 0; } -#else -#define hp680bl_suspend NULL -#define hp680bl_resume NULL #endif +static SIMPLE_DEV_PM_OPS(hp680bl_pm_ops, hp680bl_suspend, hp680bl_resume); + static int hp680bl_set_intensity(struct backlight_device *bd) { hp680bl_send_intensity(bd); @@ -140,10 +139,9 @@ static int hp680bl_remove(struct platform_device *pdev) static struct platform_driver hp680bl_driver = { .probe = hp680bl_probe, .remove = hp680bl_remove, - .suspend = hp680bl_suspend, - .resume = hp680bl_resume, .driver = { .name = "hp680-bl", + .pm = &hp680bl_pm_ops, }, }; diff --git a/drivers/video/backlight/ili922x.c b/drivers/video/backlight/ili922x.c new file mode 100644 index 000000000000..d9f65c2d9b01 --- /dev/null +++ b/drivers/video/backlight/ili922x.c @@ -0,0 +1,555 @@ +/* + * (C) Copyright 2008 + * Stefano Babic, DENX Software Engineering, sbabic@denx.de. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This driver implements a lcd device for the ILITEK 922x display + * controller. The interface to the display is SPI and the display's + * memory is cyclically updated over the RGB interface. + */ + +#include <linux/fb.h> +#include <linux/delay.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/lcd.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/slab.h> +#include <linux/spi/spi.h> +#include <linux/string.h> + +/* Register offset, see manual section 8.2 */ +#define REG_START_OSCILLATION 0x00 +#define REG_DRIVER_CODE_READ 0x00 +#define REG_DRIVER_OUTPUT_CONTROL 0x01 +#define REG_LCD_AC_DRIVEING_CONTROL 0x02 +#define REG_ENTRY_MODE 0x03 +#define REG_COMPARE_1 0x04 +#define REG_COMPARE_2 0x05 +#define REG_DISPLAY_CONTROL_1 0x07 +#define REG_DISPLAY_CONTROL_2 0x08 +#define REG_DISPLAY_CONTROL_3 0x09 +#define REG_FRAME_CYCLE_CONTROL 0x0B +#define REG_EXT_INTF_CONTROL 0x0C +#define REG_POWER_CONTROL_1 0x10 +#define REG_POWER_CONTROL_2 0x11 +#define REG_POWER_CONTROL_3 0x12 +#define REG_POWER_CONTROL_4 0x13 +#define REG_RAM_ADDRESS_SET 0x21 +#define REG_WRITE_DATA_TO_GRAM 0x22 +#define REG_RAM_WRITE_MASK1 0x23 +#define REG_RAM_WRITE_MASK2 0x24 +#define REG_GAMMA_CONTROL_1 0x30 +#define REG_GAMMA_CONTROL_2 0x31 +#define REG_GAMMA_CONTROL_3 0x32 +#define REG_GAMMA_CONTROL_4 0x33 +#define REG_GAMMA_CONTROL_5 0x34 +#define REG_GAMMA_CONTROL_6 0x35 +#define REG_GAMMA_CONTROL_7 0x36 +#define REG_GAMMA_CONTROL_8 0x37 +#define REG_GAMMA_CONTROL_9 0x38 +#define REG_GAMMA_CONTROL_10 0x39 +#define REG_GATE_SCAN_CONTROL 0x40 +#define REG_VERT_SCROLL_CONTROL 0x41 +#define REG_FIRST_SCREEN_DRIVE_POS 0x42 +#define REG_SECOND_SCREEN_DRIVE_POS 0x43 +#define REG_RAM_ADDR_POS_H 0x44 +#define REG_RAM_ADDR_POS_V 0x45 +#define REG_OSCILLATOR_CONTROL 0x4F +#define REG_GPIO 0x60 +#define REG_OTP_VCM_PROGRAMMING 0x61 +#define REG_OTP_VCM_STATUS_ENABLE 0x62 +#define REG_OTP_PROGRAMMING_ID_KEY 0x65 + +/* + * maximum frequency for register access + * (not for the GRAM access) + */ +#define ILITEK_MAX_FREQ_REG 4000000 + +/* + * Device ID as found in the datasheet (supports 9221 and 9222) + */ +#define ILITEK_DEVICE_ID 0x9220 +#define ILITEK_DEVICE_ID_MASK 0xFFF0 + +/* Last two bits in the START BYTE */ +#define START_RS_INDEX 0 +#define START_RS_REG 1 +#define START_RW_WRITE 0 +#define START_RW_READ 1 + +/** + * START_BYTE(id, rs, rw) + * + * Set the start byte according to the required operation. + * The start byte is defined as: + * ---------------------------------- + * | 0 | 1 | 1 | 1 | 0 | ID | RS | RW | + * ---------------------------------- + * @id: display's id as set by the manufacturer + * @rs: operation type bit, one of: + * - START_RS_INDEX set the index register + * - START_RS_REG write/read registers/GRAM + * @rw: read/write operation + * - START_RW_WRITE write + * - START_RW_READ read + */ +#define START_BYTE(id, rs, rw) \ + (0x70 | (((id) & 0x01) << 2) | (((rs) & 0x01) << 1) | ((rw) & 0x01)) + +/** + * CHECK_FREQ_REG(spi_device s, spi_transfer x) - Check the frequency + * for the SPI transfer. According to the datasheet, the controller + * accept higher frequency for the GRAM transfer, but it requires + * lower frequency when the registers are read/written. + * The macro sets the frequency in the spi_transfer structure if + * the frequency exceeds the maximum value. + */ +#define CHECK_FREQ_REG(s, x) \ + do { \ + if (s->max_speed_hz > ILITEK_MAX_FREQ_REG) \ + ((struct spi_transfer *)x)->speed_hz = \ + ILITEK_MAX_FREQ_REG; \ + } while (0) + +#define CMD_BUFSIZE 16 + +#define POWER_IS_ON(pwr) ((pwr) <= FB_BLANK_NORMAL) + +#define set_tx_byte(b) (tx_invert ? ~(b) : b) + +/** + * ili922x_id - id as set by manufacturer + */ +static int ili922x_id = 1; +module_param(ili922x_id, int, 0); + +static int tx_invert; +module_param(tx_invert, int, 0); + +/** + * driver's private structure + */ +struct ili922x { + struct spi_device *spi; + struct lcd_device *ld; + int power; +}; + +/** + * ili922x_read_status - read status register from display + * @spi: spi device + * @rs: output value + */ +static int ili922x_read_status(struct spi_device *spi, u16 *rs) +{ + struct spi_message msg; + struct spi_transfer xfer; + unsigned char tbuf[CMD_BUFSIZE]; + unsigned char rbuf[CMD_BUFSIZE]; + int ret, i; + + memset(&xfer, 0, sizeof(struct spi_transfer)); + spi_message_init(&msg); + xfer.tx_buf = tbuf; + xfer.rx_buf = rbuf; + xfer.cs_change = 1; + CHECK_FREQ_REG(spi, &xfer); + + tbuf[0] = set_tx_byte(START_BYTE(ili922x_id, START_RS_INDEX, + START_RW_READ)); + /* + * we need 4-byte xfer here due to invalid dummy byte + * received after start byte + */ + for (i = 1; i < 4; i++) + tbuf[i] = set_tx_byte(0); /* dummy */ + + xfer.bits_per_word = 8; + xfer.len = 4; + spi_message_add_tail(&xfer, &msg); + ret = spi_sync(spi, &msg); + if (ret < 0) { + dev_dbg(&spi->dev, "Error sending SPI message 0x%x", ret); + return ret; + } + + *rs = (rbuf[2] << 8) + rbuf[3]; + return 0; +} + +/** + * ili922x_read - read register from display + * @spi: spi device + * @reg: offset of the register to be read + * @rx: output value + */ +static int ili922x_read(struct spi_device *spi, u8 reg, u16 *rx) +{ + struct spi_message msg; + struct spi_transfer xfer_regindex, xfer_regvalue; + unsigned char tbuf[CMD_BUFSIZE]; + unsigned char rbuf[CMD_BUFSIZE]; + int ret, len = 0, send_bytes; + + memset(&xfer_regindex, 0, sizeof(struct spi_transfer)); + memset(&xfer_regvalue, 0, sizeof(struct spi_transfer)); + spi_message_init(&msg); + xfer_regindex.tx_buf = tbuf; + xfer_regindex.rx_buf = rbuf; + xfer_regindex.cs_change = 1; + CHECK_FREQ_REG(spi, &xfer_regindex); + + tbuf[0] = set_tx_byte(START_BYTE(ili922x_id, START_RS_INDEX, + START_RW_WRITE)); + tbuf[1] = set_tx_byte(0); + tbuf[2] = set_tx_byte(reg); + xfer_regindex.bits_per_word = 8; + len = xfer_regindex.len = 3; + spi_message_add_tail(&xfer_regindex, &msg); + + send_bytes = len; + + tbuf[len++] = set_tx_byte(START_BYTE(ili922x_id, START_RS_REG, + START_RW_READ)); + tbuf[len++] = set_tx_byte(0); + tbuf[len] = set_tx_byte(0); + + xfer_regvalue.cs_change = 1; + xfer_regvalue.len = 3; + xfer_regvalue.tx_buf = &tbuf[send_bytes]; + xfer_regvalue.rx_buf = &rbuf[send_bytes]; + CHECK_FREQ_REG(spi, &xfer_regvalue); + + spi_message_add_tail(&xfer_regvalue, &msg); + ret = spi_sync(spi, &msg); + if (ret < 0) { + dev_dbg(&spi->dev, "Error sending SPI message 0x%x", ret); + return ret; + } + + *rx = (rbuf[1 + send_bytes] << 8) + rbuf[2 + send_bytes]; + return 0; +} + +/** + * ili922x_write - write a controller register + * @spi: struct spi_device * + * @reg: offset of the register to be written + * @value: value to be written + */ +static int ili922x_write(struct spi_device *spi, u8 reg, u16 value) +{ + struct spi_message msg; + struct spi_transfer xfer_regindex, xfer_regvalue; + unsigned char tbuf[CMD_BUFSIZE]; + unsigned char rbuf[CMD_BUFSIZE]; + int ret, len = 0; + + memset(&xfer_regindex, 0, sizeof(struct spi_transfer)); + memset(&xfer_regvalue, 0, sizeof(struct spi_transfer)); + + spi_message_init(&msg); + xfer_regindex.tx_buf = tbuf; + xfer_regindex.rx_buf = rbuf; + xfer_regindex.cs_change = 1; + CHECK_FREQ_REG(spi, &xfer_regindex); + + tbuf[0] = set_tx_byte(START_BYTE(ili922x_id, START_RS_INDEX, + START_RW_WRITE)); + tbuf[1] = set_tx_byte(0); + tbuf[2] = set_tx_byte(reg); + xfer_regindex.bits_per_word = 8; + xfer_regindex.len = 3; + spi_message_add_tail(&xfer_regindex, &msg); + + ret = spi_sync(spi, &msg); + + spi_message_init(&msg); + len = 0; + tbuf[0] = set_tx_byte(START_BYTE(ili922x_id, START_RS_REG, + START_RW_WRITE)); + tbuf[1] = set_tx_byte((value & 0xFF00) >> 8); + tbuf[2] = set_tx_byte(value & 0x00FF); + + xfer_regvalue.cs_change = 1; + xfer_regvalue.len = 3; + xfer_regvalue.tx_buf = tbuf; + xfer_regvalue.rx_buf = rbuf; + CHECK_FREQ_REG(spi, &xfer_regvalue); + + spi_message_add_tail(&xfer_regvalue, &msg); + + ret = spi_sync(spi, &msg); + if (ret < 0) { + dev_err(&spi->dev, "Error sending SPI message 0x%x", ret); + return ret; + } + return 0; +} + +#ifdef DEBUG +/** + * ili922x_reg_dump - dump all registers + */ +static void ili922x_reg_dump(struct spi_device *spi) +{ + u8 reg; + u16 rx; + + dev_dbg(&spi->dev, "ILI922x configuration registers:\n"); + for (reg = REG_START_OSCILLATION; + reg <= REG_OTP_PROGRAMMING_ID_KEY; reg++) { + ili922x_read(spi, reg, &rx); + dev_dbg(&spi->dev, "reg @ 0x%02X: 0x%04X\n", reg, rx); + } +} +#else +static inline void ili922x_reg_dump(struct spi_device *spi) {} +#endif + +/** + * set_write_to_gram_reg - initialize the display to write the GRAM + * @spi: spi device + */ +static void set_write_to_gram_reg(struct spi_device *spi) +{ + struct spi_message msg; + struct spi_transfer xfer; + unsigned char tbuf[CMD_BUFSIZE]; + + memset(&xfer, 0, sizeof(struct spi_transfer)); + + spi_message_init(&msg); + xfer.tx_buf = tbuf; + xfer.rx_buf = NULL; + xfer.cs_change = 1; + + tbuf[0] = START_BYTE(ili922x_id, START_RS_INDEX, START_RW_WRITE); + tbuf[1] = 0; + tbuf[2] = REG_WRITE_DATA_TO_GRAM; + + xfer.bits_per_word = 8; + xfer.len = 3; + spi_message_add_tail(&xfer, &msg); + spi_sync(spi, &msg); +} + +/** + * ili922x_poweron - turn the display on + * @spi: spi device + * + * The sequence to turn on the display is taken from + * the datasheet and/or the example code provided by the + * manufacturer. + */ +static int ili922x_poweron(struct spi_device *spi) +{ + int ret; + + /* Power on */ + ret = ili922x_write(spi, REG_POWER_CONTROL_1, 0x0000); + usleep_range(10000, 10500); + ret += ili922x_write(spi, REG_POWER_CONTROL_2, 0x0000); + ret += ili922x_write(spi, REG_POWER_CONTROL_3, 0x0000); + msleep(40); + ret += ili922x_write(spi, REG_POWER_CONTROL_4, 0x0000); + msleep(40); + /* register 0x56 is not documented in the datasheet */ + ret += ili922x_write(spi, 0x56, 0x080F); + ret += ili922x_write(spi, REG_POWER_CONTROL_1, 0x4240); + usleep_range(10000, 10500); + ret += ili922x_write(spi, REG_POWER_CONTROL_2, 0x0000); + ret += ili922x_write(spi, REG_POWER_CONTROL_3, 0x0014); + msleep(40); + ret += ili922x_write(spi, REG_POWER_CONTROL_4, 0x1319); + msleep(40); + + return ret; +} + +/** + * ili922x_poweroff - turn the display off + * @spi: spi device + */ +static int ili922x_poweroff(struct spi_device *spi) +{ + int ret; + + /* Power off */ + ret = ili922x_write(spi, REG_POWER_CONTROL_1, 0x0000); + usleep_range(10000, 10500); + ret += ili922x_write(spi, REG_POWER_CONTROL_2, 0x0000); + ret += ili922x_write(spi, REG_POWER_CONTROL_3, 0x0000); + msleep(40); + ret += ili922x_write(spi, REG_POWER_CONTROL_4, 0x0000); + msleep(40); + + return ret; +} + +/** + * ili922x_display_init - initialize the display by setting + * the configuration registers + * @spi: spi device + */ +static void ili922x_display_init(struct spi_device *spi) +{ + ili922x_write(spi, REG_START_OSCILLATION, 1); + usleep_range(10000, 10500); + ili922x_write(spi, REG_DRIVER_OUTPUT_CONTROL, 0x691B); + ili922x_write(spi, REG_LCD_AC_DRIVEING_CONTROL, 0x0700); + ili922x_write(spi, REG_ENTRY_MODE, 0x1030); + ili922x_write(spi, REG_COMPARE_1, 0x0000); + ili922x_write(spi, REG_COMPARE_2, 0x0000); + ili922x_write(spi, REG_DISPLAY_CONTROL_1, 0x0037); + ili922x_write(spi, REG_DISPLAY_CONTROL_2, 0x0202); + ili922x_write(spi, REG_DISPLAY_CONTROL_3, 0x0000); + ili922x_write(spi, REG_FRAME_CYCLE_CONTROL, 0x0000); + + /* Set RGB interface */ + ili922x_write(spi, REG_EXT_INTF_CONTROL, 0x0110); + + ili922x_poweron(spi); + + ili922x_write(spi, REG_GAMMA_CONTROL_1, 0x0302); + ili922x_write(spi, REG_GAMMA_CONTROL_2, 0x0407); + ili922x_write(spi, REG_GAMMA_CONTROL_3, 0x0304); + ili922x_write(spi, REG_GAMMA_CONTROL_4, 0x0203); + ili922x_write(spi, REG_GAMMA_CONTROL_5, 0x0706); + ili922x_write(spi, REG_GAMMA_CONTROL_6, 0x0407); + ili922x_write(spi, REG_GAMMA_CONTROL_7, 0x0706); + ili922x_write(spi, REG_GAMMA_CONTROL_8, 0x0000); + ili922x_write(spi, REG_GAMMA_CONTROL_9, 0x0C06); + ili922x_write(spi, REG_GAMMA_CONTROL_10, 0x0F00); + ili922x_write(spi, REG_RAM_ADDRESS_SET, 0x0000); + ili922x_write(spi, REG_GATE_SCAN_CONTROL, 0x0000); + ili922x_write(spi, REG_VERT_SCROLL_CONTROL, 0x0000); + ili922x_write(spi, REG_FIRST_SCREEN_DRIVE_POS, 0xDB00); + ili922x_write(spi, REG_SECOND_SCREEN_DRIVE_POS, 0xDB00); + ili922x_write(spi, REG_RAM_ADDR_POS_H, 0xAF00); + ili922x_write(spi, REG_RAM_ADDR_POS_V, 0xDB00); + ili922x_reg_dump(spi); + set_write_to_gram_reg(spi); +} + +static int ili922x_lcd_power(struct ili922x *lcd, int power) +{ + int ret = 0; + + if (POWER_IS_ON(power) && !POWER_IS_ON(lcd->power)) + ret = ili922x_poweron(lcd->spi); + else if (!POWER_IS_ON(power) && POWER_IS_ON(lcd->power)) + ret = ili922x_poweroff(lcd->spi); + + if (!ret) + lcd->power = power; + + return ret; +} + +static int ili922x_set_power(struct lcd_device *ld, int power) +{ + struct ili922x *ili = lcd_get_data(ld); + + return ili922x_lcd_power(ili, power); +} + +static int ili922x_get_power(struct lcd_device *ld) +{ + struct ili922x *ili = lcd_get_data(ld); + + return ili->power; +} + +static struct lcd_ops ili922x_ops = { + .get_power = ili922x_get_power, + .set_power = ili922x_set_power, +}; + +static int ili922x_probe(struct spi_device *spi) +{ + struct ili922x *ili; + struct lcd_device *lcd; + int ret; + u16 reg = 0; + + ili = devm_kzalloc(&spi->dev, sizeof(*ili), GFP_KERNEL); + if (!ili) { + dev_err(&spi->dev, "cannot alloc priv data\n"); + return -ENOMEM; + } + + ili->spi = spi; + spi_set_drvdata(spi, ili); + + /* check if the device is connected */ + ret = ili922x_read(spi, REG_DRIVER_CODE_READ, ®); + if (ret || ((reg & ILITEK_DEVICE_ID_MASK) != ILITEK_DEVICE_ID)) { + dev_err(&spi->dev, + "no LCD found: Chip ID 0x%x, ret %d\n", + reg, ret); + return -ENODEV; + } else { + dev_info(&spi->dev, "ILI%x found, SPI freq %d, mode %d\n", + reg, spi->max_speed_hz, spi->mode); + } + + ret = ili922x_read_status(spi, ®); + if (ret) { + dev_err(&spi->dev, "reading RS failed...\n"); + return ret; + } else + dev_dbg(&spi->dev, "status: 0x%x\n", reg); + + ili922x_display_init(spi); + + ili->power = FB_BLANK_POWERDOWN; + + lcd = lcd_device_register("ili922xlcd", &spi->dev, ili, + &ili922x_ops); + if (IS_ERR(lcd)) { + dev_err(&spi->dev, "cannot register LCD\n"); + return PTR_ERR(lcd); + } + + ili->ld = lcd; + spi_set_drvdata(spi, ili); + + ili922x_lcd_power(ili, FB_BLANK_UNBLANK); + + return 0; +} + +static int ili922x_remove(struct spi_device *spi) +{ + struct ili922x *ili = spi_get_drvdata(spi); + + ili922x_poweroff(spi); + lcd_device_unregister(ili->ld); + return 0; +} + +static struct spi_driver ili922x_driver = { + .driver = { + .name = "ili922x", + .owner = THIS_MODULE, + }, + .probe = ili922x_probe, + .remove = ili922x_remove, +}; + +module_spi_driver(ili922x_driver); + +MODULE_AUTHOR("Stefano Babic <sbabic@denx.de>"); +MODULE_DESCRIPTION("ILI9221/9222 LCD driver"); +MODULE_LICENSE("GPL"); +MODULE_PARM_DESC(ili922x_id, "set controller identifier (default=1)"); +MODULE_PARM_DESC(tx_invert, "invert bytes before sending"); diff --git a/drivers/video/backlight/ili9320.c b/drivers/video/backlight/ili9320.c index 1235bf9defc4..f8be90c5dedc 100644 --- a/drivers/video/backlight/ili9320.c +++ b/drivers/video/backlight/ili9320.c @@ -231,7 +231,7 @@ int ili9320_probe_spi(struct spi_device *spi, ili->power = FB_BLANK_POWERDOWN; ili->platdata = cfg; - dev_set_drvdata(&spi->dev, ili); + spi_set_drvdata(spi, ili); ili9320_setup_spi(ili, spi); @@ -270,27 +270,21 @@ int ili9320_remove(struct ili9320 *ili) } EXPORT_SYMBOL_GPL(ili9320_remove); -#ifdef CONFIG_PM -int ili9320_suspend(struct ili9320 *lcd, pm_message_t state) +#ifdef CONFIG_PM_SLEEP +int ili9320_suspend(struct ili9320 *lcd) { int ret; - dev_dbg(lcd->dev, "%s: event %d\n", __func__, state.event); + ret = ili9320_power(lcd, FB_BLANK_POWERDOWN); - if (state.event == PM_EVENT_SUSPEND) { - ret = ili9320_power(lcd, FB_BLANK_POWERDOWN); - - if (lcd->platdata->suspend == ILI9320_SUSPEND_DEEP) { - ili9320_write(lcd, ILI9320_POWER1, lcd->power1 | - ILI9320_POWER1_SLP | - ILI9320_POWER1_DSTB); - lcd->initialised = 0; - } - - return ret; + if (lcd->platdata->suspend == ILI9320_SUSPEND_DEEP) { + ili9320_write(lcd, ILI9320_POWER1, lcd->power1 | + ILI9320_POWER1_SLP | + ILI9320_POWER1_DSTB); + lcd->initialised = 0; } - return 0; + return ret; } EXPORT_SYMBOL_GPL(ili9320_suspend); diff --git a/drivers/video/backlight/ili9320.h b/drivers/video/backlight/ili9320.h index e0db738f7bb9..42329e7aa9a8 100644 --- a/drivers/video/backlight/ili9320.h +++ b/drivers/video/backlight/ili9320.h @@ -76,5 +76,5 @@ extern void ili9320_shutdown(struct ili9320 *lcd); /* PM */ -extern int ili9320_suspend(struct ili9320 *lcd, pm_message_t state); +extern int ili9320_suspend(struct ili9320 *lcd); extern int ili9320_resume(struct ili9320 *lcd); diff --git a/drivers/video/backlight/jornada720_bl.c b/drivers/video/backlight/jornada720_bl.c index fef6ce4fad71..3ccb89340f22 100644 --- a/drivers/video/backlight/jornada720_bl.c +++ b/drivers/video/backlight/jornada720_bl.c @@ -9,8 +9,6 @@ * */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - #include <linux/backlight.h> #include <linux/device.h> #include <linux/fb.h> @@ -40,11 +38,13 @@ static int jornada_bl_get_brightness(struct backlight_device *bd) ret = jornada_ssp_byte(GETBRIGHTNESS); if (jornada_ssp_byte(GETBRIGHTNESS) != TXDUMMY) { - pr_err("get brightness timeout\n"); + dev_err(&bd->dev, "get brightness timeout\n"); jornada_ssp_end(); return -ETIMEDOUT; - } else /* exchange txdummy for value */ + } else { + /* exchange txdummy for value */ ret = jornada_ssp_byte(TXDUMMY); + } jornada_ssp_end(); @@ -61,7 +61,7 @@ static int jornada_bl_update_status(struct backlight_device *bd) if ((bd->props.power != FB_BLANK_UNBLANK) || (bd->props.fb_blank != FB_BLANK_UNBLANK)) { ret = jornada_ssp_byte(BRIGHTNESSOFF); if (ret != TXDUMMY) { - pr_info("brightness off timeout\n"); + dev_info(&bd->dev, "brightness off timeout\n"); /* turn off backlight */ PPSR &= ~PPC_LDD1; PPDR |= PPC_LDD1; @@ -72,7 +72,7 @@ static int jornada_bl_update_status(struct backlight_device *bd) /* send command to our mcu */ if (jornada_ssp_byte(SETBRIGHTNESS) != TXDUMMY) { - pr_info("failed to set brightness\n"); + dev_info(&bd->dev, "failed to set brightness\n"); ret = -ETIMEDOUT; goto out; } @@ -86,7 +86,7 @@ static int jornada_bl_update_status(struct backlight_device *bd) */ if (jornada_ssp_byte(BL_MAX_BRIGHT - bd->props.brightness) != TXDUMMY) { - pr_err("set brightness failed\n"); + dev_err(&bd->dev, "set brightness failed\n"); ret = -ETIMEDOUT; } @@ -120,7 +120,7 @@ static int jornada_bl_probe(struct platform_device *pdev) if (IS_ERR(bd)) { ret = PTR_ERR(bd); - pr_err("failed to register device, err=%x\n", ret); + dev_err(&pdev->dev, "failed to register device, err=%x\n", ret); return ret; } @@ -134,7 +134,7 @@ static int jornada_bl_probe(struct platform_device *pdev) jornada_bl_update_status(bd); platform_set_drvdata(pdev, bd); - pr_info("HP Jornada 700 series backlight driver\n"); + dev_info(&pdev->dev, "HP Jornada 700 series backlight driver\n"); return 0; } diff --git a/drivers/video/backlight/jornada720_lcd.c b/drivers/video/backlight/jornada720_lcd.c index 635b30523fd5..b061413f1a65 100644 --- a/drivers/video/backlight/jornada720_lcd.c +++ b/drivers/video/backlight/jornada720_lcd.c @@ -9,8 +9,6 @@ * */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - #include <linux/device.h> #include <linux/fb.h> #include <linux/kernel.h> @@ -27,7 +25,7 @@ #define LCD_MAX_CONTRAST 0xff #define LCD_DEF_CONTRAST 0x80 -static int jornada_lcd_get_power(struct lcd_device *dev) +static int jornada_lcd_get_power(struct lcd_device *ld) { /* LDD2 in PPC = LCD POWER */ if (PPSR & PPC_LDD2) @@ -36,17 +34,17 @@ static int jornada_lcd_get_power(struct lcd_device *dev) return FB_BLANK_POWERDOWN; /* PW OFF */ } -static int jornada_lcd_get_contrast(struct lcd_device *dev) +static int jornada_lcd_get_contrast(struct lcd_device *ld) { int ret; - if (jornada_lcd_get_power(dev) != FB_BLANK_UNBLANK) + if (jornada_lcd_get_power(ld) != FB_BLANK_UNBLANK) return 0; jornada_ssp_start(); if (jornada_ssp_byte(GETCONTRAST) != TXDUMMY) { - pr_err("get contrast failed\n"); + dev_err(&ld->dev, "get contrast failed\n"); jornada_ssp_end(); return -ETIMEDOUT; } else { @@ -56,7 +54,7 @@ static int jornada_lcd_get_contrast(struct lcd_device *dev) } } -static int jornada_lcd_set_contrast(struct lcd_device *dev, int value) +static int jornada_lcd_set_contrast(struct lcd_device *ld, int value) { int ret; @@ -67,7 +65,7 @@ static int jornada_lcd_set_contrast(struct lcd_device *dev, int value) /* push the new value */ if (jornada_ssp_byte(value) != TXDUMMY) { - pr_err("set contrast failed\n"); + dev_err(&ld->dev, "set contrast failed\n"); jornada_ssp_end(); return -ETIMEDOUT; } @@ -78,13 +76,14 @@ static int jornada_lcd_set_contrast(struct lcd_device *dev, int value) return 0; } -static int jornada_lcd_set_power(struct lcd_device *dev, int power) +static int jornada_lcd_set_power(struct lcd_device *ld, int power) { if (power != FB_BLANK_UNBLANK) { PPSR &= ~PPC_LDD2; PPDR |= PPC_LDD2; - } else + } else { PPSR |= PPC_LDD2; + } return 0; } @@ -105,7 +104,7 @@ static int jornada_lcd_probe(struct platform_device *pdev) if (IS_ERR(lcd_device)) { ret = PTR_ERR(lcd_device); - pr_err("failed to register device\n"); + dev_err(&pdev->dev, "failed to register device\n"); return ret; } diff --git a/drivers/video/backlight/kb3886_bl.c b/drivers/video/backlight/kb3886_bl.c index 6c5ed6b242cc..bca6ccc74dfb 100644 --- a/drivers/video/backlight/kb3886_bl.c +++ b/drivers/video/backlight/kb3886_bl.c @@ -106,29 +106,28 @@ static int kb3886bl_send_intensity(struct backlight_device *bd) return 0; } -#ifdef CONFIG_PM -static int kb3886bl_suspend(struct platform_device *pdev, pm_message_t state) +#ifdef CONFIG_PM_SLEEP +static int kb3886bl_suspend(struct device *dev) { - struct backlight_device *bd = platform_get_drvdata(pdev); + struct backlight_device *bd = dev_get_drvdata(dev); kb3886bl_flags |= KB3886BL_SUSPENDED; backlight_update_status(bd); return 0; } -static int kb3886bl_resume(struct platform_device *pdev) +static int kb3886bl_resume(struct device *dev) { - struct backlight_device *bd = platform_get_drvdata(pdev); + struct backlight_device *bd = dev_get_drvdata(dev); kb3886bl_flags &= ~KB3886BL_SUSPENDED; backlight_update_status(bd); return 0; } -#else -#define kb3886bl_suspend NULL -#define kb3886bl_resume NULL #endif +static SIMPLE_DEV_PM_OPS(kb3886bl_pm_ops, kb3886bl_suspend, kb3886bl_resume); + static int kb3886bl_get_intensity(struct backlight_device *bd) { return kb3886bl_intensity; @@ -179,10 +178,9 @@ static int kb3886bl_remove(struct platform_device *pdev) static struct platform_driver kb3886bl_driver = { .probe = kb3886bl_probe, .remove = kb3886bl_remove, - .suspend = kb3886bl_suspend, - .resume = kb3886bl_resume, .driver = { .name = "kb3886-bl", + .pm = &kb3886bl_pm_ops, }, }; diff --git a/drivers/video/backlight/l4f00242t03.c b/drivers/video/backlight/l4f00242t03.c index fb6155771326..a35a38c709cf 100644 --- a/drivers/video/backlight/l4f00242t03.c +++ b/drivers/video/backlight/l4f00242t03.c @@ -51,14 +51,33 @@ static void l4f00242t03_lcd_init(struct spi_device *spi) struct l4f00242t03_pdata *pdata = spi->dev.platform_data; struct l4f00242t03_priv *priv = spi_get_drvdata(spi); const u16 cmd[] = { 0x36, param(0), 0x3A, param(0x60) }; + int ret; dev_dbg(&spi->dev, "initializing LCD\n"); - regulator_set_voltage(priv->io_reg, 1800000, 1800000); - regulator_enable(priv->io_reg); + ret = regulator_set_voltage(priv->io_reg, 1800000, 1800000); + if (ret) { + dev_err(&spi->dev, "failed to set the IO regulator voltage.\n"); + return; + } + ret = regulator_enable(priv->io_reg); + if (ret) { + dev_err(&spi->dev, "failed to enable the IO regulator.\n"); + return; + } - regulator_set_voltage(priv->core_reg, 2800000, 2800000); - regulator_enable(priv->core_reg); + ret = regulator_set_voltage(priv->core_reg, 2800000, 2800000); + if (ret) { + dev_err(&spi->dev, "failed to set the core regulator voltage.\n"); + regulator_disable(priv->io_reg); + return; + } + ret = regulator_enable(priv->core_reg); + if (ret) { + dev_err(&spi->dev, "failed to enable the core regulator.\n"); + regulator_disable(priv->io_reg); + return; + } l4f00242t03_reset(pdata->reset_gpio); diff --git a/drivers/video/backlight/ld9040.c b/drivers/video/backlight/ld9040.c index 1b642f5f381a..1e0a3093ce50 100644 --- a/drivers/video/backlight/ld9040.c +++ b/drivers/video/backlight/ld9040.c @@ -775,12 +775,12 @@ static int ld9040_remove(struct spi_device *spi) return 0; } -#if defined(CONFIG_PM) -static int ld9040_suspend(struct spi_device *spi, pm_message_t mesg) +#ifdef CONFIG_PM_SLEEP +static int ld9040_suspend(struct device *dev) { - struct ld9040 *lcd = spi_get_drvdata(spi); + struct ld9040 *lcd = dev_get_drvdata(dev); - dev_dbg(&spi->dev, "lcd->power = %d\n", lcd->power); + dev_dbg(dev, "lcd->power = %d\n", lcd->power); /* * when lcd panel is suspend, lcd panel becomes off @@ -789,19 +789,18 @@ static int ld9040_suspend(struct spi_device *spi, pm_message_t mesg) return ld9040_power(lcd, FB_BLANK_POWERDOWN); } -static int ld9040_resume(struct spi_device *spi) +static int ld9040_resume(struct device *dev) { - struct ld9040 *lcd = spi_get_drvdata(spi); + struct ld9040 *lcd = dev_get_drvdata(dev); lcd->power = FB_BLANK_POWERDOWN; return ld9040_power(lcd, FB_BLANK_UNBLANK); } -#else -#define ld9040_suspend NULL -#define ld9040_resume NULL #endif +static SIMPLE_DEV_PM_OPS(ld9040_pm_ops, ld9040_suspend, ld9040_resume); + /* Power down all displays on reboot, poweroff or halt. */ static void ld9040_shutdown(struct spi_device *spi) { @@ -814,12 +813,11 @@ static struct spi_driver ld9040_driver = { .driver = { .name = "ld9040", .owner = THIS_MODULE, + .pm = &ld9040_pm_ops, }, .probe = ld9040_probe, .remove = ld9040_remove, .shutdown = ld9040_shutdown, - .suspend = ld9040_suspend, - .resume = ld9040_resume, }; module_spi_driver(ld9040_driver); diff --git a/drivers/video/backlight/lm3533_bl.c b/drivers/video/backlight/lm3533_bl.c index 5d18d4d7f470..1d1dbfb789e3 100644 --- a/drivers/video/backlight/lm3533_bl.c +++ b/drivers/video/backlight/lm3533_bl.c @@ -368,29 +368,28 @@ static int lm3533_bl_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM -static int lm3533_bl_suspend(struct platform_device *pdev, pm_message_t state) +#ifdef CONFIG_PM_SLEEP +static int lm3533_bl_suspend(struct device *dev) { - struct lm3533_bl *bl = platform_get_drvdata(pdev); + struct lm3533_bl *bl = dev_get_drvdata(dev); - dev_dbg(&pdev->dev, "%s\n", __func__); + dev_dbg(dev, "%s\n", __func__); return lm3533_ctrlbank_disable(&bl->cb); } -static int lm3533_bl_resume(struct platform_device *pdev) +static int lm3533_bl_resume(struct device *dev) { - struct lm3533_bl *bl = platform_get_drvdata(pdev); + struct lm3533_bl *bl = dev_get_drvdata(dev); - dev_dbg(&pdev->dev, "%s\n", __func__); + dev_dbg(dev, "%s\n", __func__); return lm3533_ctrlbank_enable(&bl->cb); } -#else -#define lm3533_bl_suspend NULL -#define lm3533_bl_resume NULL #endif +static SIMPLE_DEV_PM_OPS(lm3533_bl_pm_ops, lm3533_bl_suspend, lm3533_bl_resume); + static void lm3533_bl_shutdown(struct platform_device *pdev) { struct lm3533_bl *bl = platform_get_drvdata(pdev); @@ -404,12 +403,11 @@ static struct platform_driver lm3533_bl_driver = { .driver = { .name = "lm3533-backlight", .owner = THIS_MODULE, + .pm = &lm3533_bl_pm_ops, }, .probe = lm3533_bl_probe, .remove = lm3533_bl_remove, .shutdown = lm3533_bl_shutdown, - .suspend = lm3533_bl_suspend, - .resume = lm3533_bl_resume, }; module_platform_driver(lm3533_bl_driver); diff --git a/drivers/video/backlight/lms501kf03.c b/drivers/video/backlight/lms501kf03.c index b43882abefaf..cf01b9ac8131 100644 --- a/drivers/video/backlight/lms501kf03.c +++ b/drivers/video/backlight/lms501kf03.c @@ -387,13 +387,12 @@ static int lms501kf03_remove(struct spi_device *spi) return 0; } -#if defined(CONFIG_PM) - -static int lms501kf03_suspend(struct spi_device *spi, pm_message_t mesg) +#ifdef CONFIG_PM_SLEEP +static int lms501kf03_suspend(struct device *dev) { - struct lms501kf03 *lcd = spi_get_drvdata(spi); + struct lms501kf03 *lcd = dev_get_drvdata(dev); - dev_dbg(&spi->dev, "lcd->power = %d\n", lcd->power); + dev_dbg(dev, "lcd->power = %d\n", lcd->power); /* * when lcd panel is suspend, lcd panel becomes off @@ -402,19 +401,19 @@ static int lms501kf03_suspend(struct spi_device *spi, pm_message_t mesg) return lms501kf03_power(lcd, FB_BLANK_POWERDOWN); } -static int lms501kf03_resume(struct spi_device *spi) +static int lms501kf03_resume(struct device *dev) { - struct lms501kf03 *lcd = spi_get_drvdata(spi); + struct lms501kf03 *lcd = dev_get_drvdata(dev); lcd->power = FB_BLANK_POWERDOWN; return lms501kf03_power(lcd, FB_BLANK_UNBLANK); } -#else -#define lms501kf03_suspend NULL -#define lms501kf03_resume NULL #endif +static SIMPLE_DEV_PM_OPS(lms501kf03_pm_ops, lms501kf03_suspend, + lms501kf03_resume); + static void lms501kf03_shutdown(struct spi_device *spi) { struct lms501kf03 *lcd = spi_get_drvdata(spi); @@ -426,12 +425,11 @@ static struct spi_driver lms501kf03_driver = { .driver = { .name = "lms501kf03", .owner = THIS_MODULE, + .pm = &lms501kf03_pm_ops, }, .probe = lms501kf03_probe, .remove = lms501kf03_remove, .shutdown = lms501kf03_shutdown, - .suspend = lms501kf03_suspend, - .resume = lms501kf03_resume, }; module_spi_driver(lms501kf03_driver); diff --git a/drivers/video/backlight/locomolcd.c b/drivers/video/backlight/locomolcd.c index 146fea8aa431..6c3ec4259a60 100644 --- a/drivers/video/backlight/locomolcd.c +++ b/drivers/video/backlight/locomolcd.c @@ -157,25 +157,24 @@ static const struct backlight_ops locomobl_data = { .update_status = locomolcd_set_intensity, }; -#ifdef CONFIG_PM -static int locomolcd_suspend(struct locomo_dev *dev, pm_message_t state) +#ifdef CONFIG_PM_SLEEP +static int locomolcd_suspend(struct device *dev) { locomolcd_flags |= LOCOMOLCD_SUSPENDED; locomolcd_set_intensity(locomolcd_bl_device); return 0; } -static int locomolcd_resume(struct locomo_dev *dev) +static int locomolcd_resume(struct device *dev) { locomolcd_flags &= ~LOCOMOLCD_SUSPENDED; locomolcd_set_intensity(locomolcd_bl_device); return 0; } -#else -#define locomolcd_suspend NULL -#define locomolcd_resume NULL #endif +static SIMPLE_DEV_PM_OPS(locomolcd_pm_ops, locomolcd_suspend, locomolcd_resume); + static int locomolcd_probe(struct locomo_dev *ldev) { struct backlight_properties props; @@ -230,13 +229,12 @@ static int locomolcd_remove(struct locomo_dev *dev) static struct locomo_driver poodle_lcd_driver = { .drv = { - .name = "locomo-backlight", + .name = "locomo-backlight", + .pm = &locomolcd_pm_ops, }, .devid = LOCOMO_DEVID_BACKLIGHT, .probe = locomolcd_probe, .remove = locomolcd_remove, - .suspend = locomolcd_suspend, - .resume = locomolcd_resume, }; static int __init locomolcd_init(void) diff --git a/drivers/video/backlight/lp855x_bl.c b/drivers/video/backlight/lp855x_bl.c index 7ae9ae6f4655..c98bdbfdc697 100644 --- a/drivers/video/backlight/lp855x_bl.c +++ b/drivers/video/backlight/lp855x_bl.c @@ -35,7 +35,6 @@ #define LP8557_EPROM_START 0x10 #define LP8557_EPROM_END 0x1E -#define BUF_SIZE 20 #define DEFAULT_BL_NAME "lcd-backlight" #define MAX_BRIGHTNESS 255 @@ -304,7 +303,7 @@ static ssize_t lp855x_get_chip_id(struct device *dev, struct device_attribute *attr, char *buf) { struct lp855x *lp = dev_get_drvdata(dev); - return scnprintf(buf, BUF_SIZE, "%s\n", lp->chipname); + return scnprintf(buf, PAGE_SIZE, "%s\n", lp->chipname); } static ssize_t lp855x_get_bl_ctl_mode(struct device *dev, @@ -319,7 +318,7 @@ static ssize_t lp855x_get_bl_ctl_mode(struct device *dev, else if (mode == REGISTER_BASED) strmode = "register based"; - return scnprintf(buf, BUF_SIZE, "%s\n", strmode); + return scnprintf(buf, PAGE_SIZE, "%s\n", strmode); } static DEVICE_ATTR(chip_id, S_IRUGO, lp855x_get_chip_id, NULL); @@ -339,7 +338,6 @@ static int lp855x_probe(struct i2c_client *cl, const struct i2c_device_id *id) { struct lp855x *lp; struct lp855x_platform_data *pdata = cl->dev.platform_data; - enum lp855x_brightness_ctrl_mode mode; int ret; if (!pdata) { @@ -354,7 +352,6 @@ static int lp855x_probe(struct i2c_client *cl, const struct i2c_device_id *id) if (!lp) return -ENOMEM; - mode = pdata->mode; lp->client = cl; lp->dev = &cl->dev; lp->pdata = pdata; diff --git a/drivers/video/backlight/ltv350qv.c b/drivers/video/backlight/ltv350qv.c index c0b4b8f2de98..ed1b39268131 100644 --- a/drivers/video/backlight/ltv350qv.c +++ b/drivers/video/backlight/ltv350qv.c @@ -271,25 +271,24 @@ static int ltv350qv_remove(struct spi_device *spi) return 0; } -#ifdef CONFIG_PM -static int ltv350qv_suspend(struct spi_device *spi, pm_message_t state) +#ifdef CONFIG_PM_SLEEP +static int ltv350qv_suspend(struct device *dev) { - struct ltv350qv *lcd = spi_get_drvdata(spi); + struct ltv350qv *lcd = dev_get_drvdata(dev); return ltv350qv_power(lcd, FB_BLANK_POWERDOWN); } -static int ltv350qv_resume(struct spi_device *spi) +static int ltv350qv_resume(struct device *dev) { - struct ltv350qv *lcd = spi_get_drvdata(spi); + struct ltv350qv *lcd = dev_get_drvdata(dev); return ltv350qv_power(lcd, FB_BLANK_UNBLANK); } -#else -#define ltv350qv_suspend NULL -#define ltv350qv_resume NULL #endif +static SIMPLE_DEV_PM_OPS(ltv350qv_pm_ops, ltv350qv_suspend, ltv350qv_resume); + /* Power down all displays on reboot, poweroff or halt */ static void ltv350qv_shutdown(struct spi_device *spi) { @@ -302,13 +301,12 @@ static struct spi_driver ltv350qv_driver = { .driver = { .name = "ltv350qv", .owner = THIS_MODULE, + .pm = <v350qv_pm_ops, }, .probe = ltv350qv_probe, .remove = ltv350qv_remove, .shutdown = ltv350qv_shutdown, - .suspend = ltv350qv_suspend, - .resume = ltv350qv_resume, }; module_spi_driver(ltv350qv_driver); diff --git a/drivers/video/backlight/omap1_bl.c b/drivers/video/backlight/omap1_bl.c index 627110163067..812e22e35cab 100644 --- a/drivers/video/backlight/omap1_bl.c +++ b/drivers/video/backlight/omap1_bl.c @@ -18,8 +18,6 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> @@ -73,27 +71,24 @@ static void omapbl_blank(struct omap_backlight *bl, int mode) } } -#ifdef CONFIG_PM -static int omapbl_suspend(struct platform_device *pdev, pm_message_t state) +#ifdef CONFIG_PM_SLEEP +static int omapbl_suspend(struct device *dev) { - struct backlight_device *dev = platform_get_drvdata(pdev); - struct omap_backlight *bl = bl_get_data(dev); + struct backlight_device *bl_dev = dev_get_drvdata(dev); + struct omap_backlight *bl = bl_get_data(bl_dev); omapbl_blank(bl, FB_BLANK_POWERDOWN); return 0; } -static int omapbl_resume(struct platform_device *pdev) +static int omapbl_resume(struct device *dev) { - struct backlight_device *dev = platform_get_drvdata(pdev); - struct omap_backlight *bl = bl_get_data(dev); + struct backlight_device *bl_dev = dev_get_drvdata(dev); + struct omap_backlight *bl = bl_get_data(bl_dev); omapbl_blank(bl, bl->powermode); return 0; } -#else -#define omapbl_suspend NULL -#define omapbl_resume NULL #endif static int omapbl_set_power(struct backlight_device *dev, int state) @@ -170,7 +165,7 @@ static int omapbl_probe(struct platform_device *pdev) dev->props.brightness = pdata->default_intensity; omapbl_update_status(dev); - pr_info("OMAP LCD backlight initialised\n"); + dev_info(&pdev->dev, "OMAP LCD backlight initialised\n"); return 0; } @@ -184,13 +179,14 @@ static int omapbl_remove(struct platform_device *pdev) return 0; } +static SIMPLE_DEV_PM_OPS(omapbl_pm_ops, omapbl_suspend, omapbl_resume); + static struct platform_driver omapbl_driver = { .probe = omapbl_probe, .remove = omapbl_remove, - .suspend = omapbl_suspend, - .resume = omapbl_resume, .driver = { .name = "omap-bl", + .pm = &omapbl_pm_ops, }, }; diff --git a/drivers/video/backlight/platform_lcd.c b/drivers/video/backlight/platform_lcd.c index 17a6b83f97af..056836706708 100644 --- a/drivers/video/backlight/platform_lcd.c +++ b/drivers/video/backlight/platform_lcd.c @@ -86,6 +86,12 @@ static int platform_lcd_probe(struct platform_device *pdev) return -EINVAL; } + if (pdata->probe) { + err = pdata->probe(pdata); + if (err) + return err; + } + plcd = devm_kzalloc(&pdev->dev, sizeof(struct platform_lcd), GFP_KERNEL); if (!plcd) { @@ -121,7 +127,7 @@ static int platform_lcd_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP static int platform_lcd_suspend(struct device *dev) { struct platform_lcd *plcd = dev_get_drvdata(dev); @@ -141,10 +147,10 @@ static int platform_lcd_resume(struct device *dev) return 0; } +#endif static SIMPLE_DEV_PM_OPS(platform_lcd_pm_ops, platform_lcd_suspend, platform_lcd_resume); -#endif #ifdef CONFIG_OF static const struct of_device_id platform_lcd_of_match[] = { @@ -158,9 +164,7 @@ static struct platform_driver platform_lcd_driver = { .driver = { .name = "platform-lcd", .owner = THIS_MODULE, -#ifdef CONFIG_PM .pm = &platform_lcd_pm_ops, -#endif .of_match_table = of_match_ptr(platform_lcd_of_match), }, .probe = platform_lcd_probe, diff --git a/drivers/video/backlight/s6e63m0.c b/drivers/video/backlight/s6e63m0.c index 9c2677f0ef7d..b37bb1854bf4 100644 --- a/drivers/video/backlight/s6e63m0.c +++ b/drivers/video/backlight/s6e63m0.c @@ -817,12 +817,12 @@ static int s6e63m0_remove(struct spi_device *spi) return 0; } -#if defined(CONFIG_PM) -static int s6e63m0_suspend(struct spi_device *spi, pm_message_t mesg) +#ifdef CONFIG_PM_SLEEP +static int s6e63m0_suspend(struct device *dev) { - struct s6e63m0 *lcd = spi_get_drvdata(spi); + struct s6e63m0 *lcd = dev_get_drvdata(dev); - dev_dbg(&spi->dev, "lcd->power = %d\n", lcd->power); + dev_dbg(dev, "lcd->power = %d\n", lcd->power); /* * when lcd panel is suspend, lcd panel becomes off @@ -831,19 +831,18 @@ static int s6e63m0_suspend(struct spi_device *spi, pm_message_t mesg) return s6e63m0_power(lcd, FB_BLANK_POWERDOWN); } -static int s6e63m0_resume(struct spi_device *spi) +static int s6e63m0_resume(struct device *dev) { - struct s6e63m0 *lcd = spi_get_drvdata(spi); + struct s6e63m0 *lcd = dev_get_drvdata(dev); lcd->power = FB_BLANK_POWERDOWN; return s6e63m0_power(lcd, FB_BLANK_UNBLANK); } -#else -#define s6e63m0_suspend NULL -#define s6e63m0_resume NULL #endif +static SIMPLE_DEV_PM_OPS(s6e63m0_pm_ops, s6e63m0_suspend, s6e63m0_resume); + /* Power down all displays on reboot, poweroff or halt. */ static void s6e63m0_shutdown(struct spi_device *spi) { @@ -856,12 +855,11 @@ static struct spi_driver s6e63m0_driver = { .driver = { .name = "s6e63m0", .owner = THIS_MODULE, + .pm = &s6e63m0_pm_ops, }, .probe = s6e63m0_probe, .remove = s6e63m0_remove, .shutdown = s6e63m0_shutdown, - .suspend = s6e63m0_suspend, - .resume = s6e63m0_resume, }; module_spi_driver(s6e63m0_driver); diff --git a/drivers/video/backlight/tdo24m.c b/drivers/video/backlight/tdo24m.c index 00162085eec0..18cdf466d50a 100644 --- a/drivers/video/backlight/tdo24m.c +++ b/drivers/video/backlight/tdo24m.c @@ -412,25 +412,24 @@ static int tdo24m_remove(struct spi_device *spi) return 0; } -#ifdef CONFIG_PM -static int tdo24m_suspend(struct spi_device *spi, pm_message_t state) +#ifdef CONFIG_PM_SLEEP +static int tdo24m_suspend(struct device *dev) { - struct tdo24m *lcd = spi_get_drvdata(spi); + struct tdo24m *lcd = dev_get_drvdata(dev); return tdo24m_power(lcd, FB_BLANK_POWERDOWN); } -static int tdo24m_resume(struct spi_device *spi) +static int tdo24m_resume(struct device *dev) { - struct tdo24m *lcd = spi_get_drvdata(spi); + struct tdo24m *lcd = dev_get_drvdata(dev); return tdo24m_power(lcd, FB_BLANK_UNBLANK); } -#else -#define tdo24m_suspend NULL -#define tdo24m_resume NULL #endif +static SIMPLE_DEV_PM_OPS(tdo24m_pm_ops, tdo24m_suspend, tdo24m_resume); + /* Power down all displays on reboot, poweroff or halt */ static void tdo24m_shutdown(struct spi_device *spi) { @@ -443,12 +442,11 @@ static struct spi_driver tdo24m_driver = { .driver = { .name = "tdo24m", .owner = THIS_MODULE, + .pm = &tdo24m_pm_ops, }, .probe = tdo24m_probe, .remove = tdo24m_remove, .shutdown = tdo24m_shutdown, - .suspend = tdo24m_suspend, - .resume = tdo24m_resume, }; module_spi_driver(tdo24m_driver); diff --git a/drivers/video/backlight/tosa_bl.c b/drivers/video/backlight/tosa_bl.c index 2326fa810c59..9df66ac68b34 100644 --- a/drivers/video/backlight/tosa_bl.c +++ b/drivers/video/backlight/tosa_bl.c @@ -134,28 +134,27 @@ static int tosa_bl_remove(struct i2c_client *client) return 0; } -#ifdef CONFIG_PM -static int tosa_bl_suspend(struct i2c_client *client, pm_message_t pm) +#ifdef CONFIG_PM_SLEEP +static int tosa_bl_suspend(struct device *dev) { - struct tosa_bl_data *data = i2c_get_clientdata(client); + struct tosa_bl_data *data = dev_get_drvdata(dev); tosa_bl_set_backlight(data, 0); return 0; } -static int tosa_bl_resume(struct i2c_client *client) +static int tosa_bl_resume(struct device *dev) { - struct tosa_bl_data *data = i2c_get_clientdata(client); + struct tosa_bl_data *data = dev_get_drvdata(dev); backlight_update_status(data->bl); return 0; } -#else -#define tosa_bl_suspend NULL -#define tosa_bl_resume NULL #endif +static SIMPLE_DEV_PM_OPS(tosa_bl_pm_ops, tosa_bl_suspend, tosa_bl_resume); + static const struct i2c_device_id tosa_bl_id[] = { { "tosa-bl", 0 }, { }, @@ -165,11 +164,10 @@ static struct i2c_driver tosa_bl_driver = { .driver = { .name = "tosa-bl", .owner = THIS_MODULE, + .pm = &tosa_bl_pm_ops, }, .probe = tosa_bl_probe, .remove = tosa_bl_remove, - .suspend = tosa_bl_suspend, - .resume = tosa_bl_resume, .id_table = tosa_bl_id, }; diff --git a/drivers/video/backlight/tosa_lcd.c b/drivers/video/backlight/tosa_lcd.c index 666fe2593ea4..bf081573e5b5 100644 --- a/drivers/video/backlight/tosa_lcd.c +++ b/drivers/video/backlight/tosa_lcd.c @@ -240,19 +240,19 @@ static int tosa_lcd_remove(struct spi_device *spi) return 0; } -#ifdef CONFIG_PM -static int tosa_lcd_suspend(struct spi_device *spi, pm_message_t state) +#ifdef CONFIG_PM_SLEEP +static int tosa_lcd_suspend(struct device *dev) { - struct tosa_lcd_data *data = spi_get_drvdata(spi); + struct tosa_lcd_data *data = dev_get_drvdata(dev); tosa_lcd_tg_off(data); return 0; } -static int tosa_lcd_resume(struct spi_device *spi) +static int tosa_lcd_resume(struct device *dev) { - struct tosa_lcd_data *data = spi_get_drvdata(spi); + struct tosa_lcd_data *data = dev_get_drvdata(dev); tosa_lcd_tg_init(data); if (POWER_IS_ON(data->lcd_power)) @@ -262,20 +262,18 @@ static int tosa_lcd_resume(struct spi_device *spi) return 0; } -#else -#define tosa_lcd_suspend NULL -#define tosa_lcd_resume NULL #endif +static SIMPLE_DEV_PM_OPS(tosa_lcd_pm_ops, tosa_lcd_suspend, tosa_lcd_resume); + static struct spi_driver tosa_lcd_driver = { .driver = { .name = "tosa-lcd", .owner = THIS_MODULE, + .pm = &tosa_lcd_pm_ops, }, .probe = tosa_lcd_probe, .remove = tosa_lcd_remove, - .suspend = tosa_lcd_suspend, - .resume = tosa_lcd_resume, }; module_spi_driver(tosa_lcd_driver); diff --git a/drivers/video/backlight/vgg2432a4.c b/drivers/video/backlight/vgg2432a4.c index 84d582f591dc..d538947a67d3 100644 --- a/drivers/video/backlight/vgg2432a4.c +++ b/drivers/video/backlight/vgg2432a4.c @@ -205,18 +205,15 @@ static int vgg2432a4_lcd_init(struct ili9320 *lcd, return ret; } -#ifdef CONFIG_PM -static int vgg2432a4_suspend(struct spi_device *spi, pm_message_t state) +#ifdef CONFIG_PM_SLEEP +static int vgg2432a4_suspend(struct device *dev) { - return ili9320_suspend(spi_get_drvdata(spi), state); + return ili9320_suspend(dev_get_drvdata(dev)); } -static int vgg2432a4_resume(struct spi_device *spi) +static int vgg2432a4_resume(struct device *dev) { - return ili9320_resume(spi_get_drvdata(spi)); + return ili9320_resume(dev_get_drvdata(dev)); } -#else -#define vgg2432a4_suspend NULL -#define vgg2432a4_resume NULL #endif static struct ili9320_client vgg2432a4_client = { @@ -249,16 +246,17 @@ static void vgg2432a4_shutdown(struct spi_device *spi) ili9320_shutdown(spi_get_drvdata(spi)); } +static SIMPLE_DEV_PM_OPS(vgg2432a4_pm_ops, vgg2432a4_suspend, vgg2432a4_resume); + static struct spi_driver vgg2432a4_driver = { .driver = { .name = "VGG2432A4", .owner = THIS_MODULE, + .pm = &vgg2432a4_pm_ops, }, .probe = vgg2432a4_probe, .remove = vgg2432a4_remove, .shutdown = vgg2432a4_shutdown, - .suspend = vgg2432a4_suspend, - .resume = vgg2432a4_resume, }; module_spi_driver(vgg2432a4_driver); diff --git a/drivers/video/console/fbcon_cw.c b/drivers/video/console/fbcon_cw.c index 6a737827beb1..a93670ef7f89 100644 --- a/drivers/video/console/fbcon_cw.c +++ b/drivers/video/console/fbcon_cw.c @@ -27,7 +27,7 @@ static void cw_update_attr(u8 *dst, u8 *src, int attribute, { int i, j, offset = (vc->vc_font.height < 10) ? 1 : 2; int width = (vc->vc_font.height + 7) >> 3; - u8 c, t = 0, msk = ~(0xff >> offset); + u8 c, msk = ~(0xff >> offset); for (i = 0; i < vc->vc_font.width; i++) { for (j = 0; j < width; j++) { @@ -40,7 +40,6 @@ static void cw_update_attr(u8 *dst, u8 *src, int attribute, c = ~c; src++; *dst++ = c; - t = c; } } } diff --git a/drivers/video/cyber2000fb.c b/drivers/video/cyber2000fb.c index 57886787ead0..e78d9f2233b8 100644 --- a/drivers/video/cyber2000fb.c +++ b/drivers/video/cyber2000fb.c @@ -518,6 +518,9 @@ static void cyber2000fb_set_timing(struct cfb_info *cfb, struct par_info *hw) cyber2000_grphw(0xb9, 0x00, cfb); spin_unlock(&cfb->reg_b0_lock); + /* wait (for the PLL?) to avoid palette corruption at higher clocks */ + msleep(1000); + cfb->ramdac_ctrl = hw->ramdac; cyber2000fb_write_ramdac_ctrl(cfb); diff --git a/drivers/video/ep93xx-fb.c b/drivers/video/ep93xx-fb.c index e06cd5d90c97..ee1ee5401544 100644 --- a/drivers/video/ep93xx-fb.c +++ b/drivers/video/ep93xx-fb.c @@ -419,7 +419,7 @@ static struct fb_ops ep93xxfb_ops = { .fb_mmap = ep93xxfb_mmap, }; -static int __init ep93xxfb_calc_fbsize(struct ep93xxfb_mach_info *mach_info) +static int ep93xxfb_calc_fbsize(struct ep93xxfb_mach_info *mach_info) { int i, fb_size = 0; @@ -441,7 +441,7 @@ static int __init ep93xxfb_calc_fbsize(struct ep93xxfb_mach_info *mach_info) return fb_size; } -static int __init ep93xxfb_alloc_videomem(struct fb_info *info) +static int ep93xxfb_alloc_videomem(struct fb_info *info) { struct ep93xx_fbi *fbi = info->par; char __iomem *virt_addr; @@ -627,19 +627,7 @@ static struct platform_driver ep93xxfb_driver = { .owner = THIS_MODULE, }, }; - -static int ep93xxfb_init(void) -{ - return platform_driver_register(&ep93xxfb_driver); -} - -static void __exit ep93xxfb_exit(void) -{ - platform_driver_unregister(&ep93xxfb_driver); -} - -module_init(ep93xxfb_init); -module_exit(ep93xxfb_exit); +module_platform_driver(ep93xxfb_driver); MODULE_DESCRIPTION("EP93XX Framebuffer Driver"); MODULE_ALIAS("platform:ep93xx-fb"); diff --git a/drivers/video/exynos/exynos_mipi_dsi.c b/drivers/video/exynos/exynos_mipi_dsi.c index 3dd43ca2b95f..32e540600f99 100644 --- a/drivers/video/exynos/exynos_mipi_dsi.c +++ b/drivers/video/exynos/exynos_mipi_dsi.c @@ -32,6 +32,7 @@ #include <linux/notifier.h> #include <linux/regulator/consumer.h> #include <linux/pm_runtime.h> +#include <linux/err.h> #include <video/exynos_mipi_dsim.h> @@ -382,10 +383,9 @@ static int exynos_mipi_dsi_probe(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - dsim->reg_base = devm_request_and_ioremap(&pdev->dev, res); - if (!dsim->reg_base) { - dev_err(&pdev->dev, "failed to remap io region\n"); - ret = -ENOMEM; + dsim->reg_base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(dsim->reg_base)) { + ret = PTR_ERR(dsim->reg_base); goto error; } diff --git a/drivers/video/hyperv_fb.c b/drivers/video/hyperv_fb.c new file mode 100644 index 000000000000..d4d2c5fe2488 --- /dev/null +++ b/drivers/video/hyperv_fb.c @@ -0,0 +1,829 @@ +/* + * Copyright (c) 2012, Microsoft Corporation. + * + * Author: + * Haiyang Zhang <haiyangz@microsoft.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + */ + +/* + * Hyper-V Synthetic Video Frame Buffer Driver + * + * This is the driver for the Hyper-V Synthetic Video, which supports + * screen resolution up to Full HD 1920x1080 with 32 bit color on Windows + * Server 2012, and 1600x1200 with 16 bit color on Windows Server 2008 R2 + * or earlier. + * + * It also solves the double mouse cursor issue of the emulated video mode. + * + * The default screen resolution is 1152x864, which may be changed by a + * kernel parameter: + * video=hyperv_fb:<width>x<height> + * For example: video=hyperv_fb:1280x1024 + * + * Portrait orientation is also supported: + * For example: video=hyperv_fb:864x1152 + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/completion.h> +#include <linux/fb.h> +#include <linux/pci.h> + +#include <linux/hyperv.h> + + +/* Hyper-V Synthetic Video Protocol definitions and structures */ +#define MAX_VMBUS_PKT_SIZE 0x4000 + +#define SYNTHVID_VERSION(major, minor) ((minor) << 16 | (major)) +#define SYNTHVID_VERSION_WIN7 SYNTHVID_VERSION(3, 0) +#define SYNTHVID_VERSION_WIN8 SYNTHVID_VERSION(3, 2) + +#define SYNTHVID_DEPTH_WIN7 16 +#define SYNTHVID_DEPTH_WIN8 32 + +#define SYNTHVID_FB_SIZE_WIN7 (4 * 1024 * 1024) +#define SYNTHVID_WIDTH_MAX_WIN7 1600 +#define SYNTHVID_HEIGHT_MAX_WIN7 1200 + +#define SYNTHVID_FB_SIZE_WIN8 (8 * 1024 * 1024) + +#define PCI_VENDOR_ID_MICROSOFT 0x1414 +#define PCI_DEVICE_ID_HYPERV_VIDEO 0x5353 + + +enum pipe_msg_type { + PIPE_MSG_INVALID, + PIPE_MSG_DATA, + PIPE_MSG_MAX +}; + +struct pipe_msg_hdr { + u32 type; + u32 size; /* size of message after this field */ +} __packed; + + +enum synthvid_msg_type { + SYNTHVID_ERROR = 0, + SYNTHVID_VERSION_REQUEST = 1, + SYNTHVID_VERSION_RESPONSE = 2, + SYNTHVID_VRAM_LOCATION = 3, + SYNTHVID_VRAM_LOCATION_ACK = 4, + SYNTHVID_SITUATION_UPDATE = 5, + SYNTHVID_SITUATION_UPDATE_ACK = 6, + SYNTHVID_POINTER_POSITION = 7, + SYNTHVID_POINTER_SHAPE = 8, + SYNTHVID_FEATURE_CHANGE = 9, + SYNTHVID_DIRT = 10, + + SYNTHVID_MAX = 11 +}; + +struct synthvid_msg_hdr { + u32 type; + u32 size; /* size of this header + payload after this field*/ +} __packed; + + +struct synthvid_version_req { + u32 version; +} __packed; + +struct synthvid_version_resp { + u32 version; + u8 is_accepted; + u8 max_video_outputs; +} __packed; + +struct synthvid_vram_location { + u64 user_ctx; + u8 is_vram_gpa_specified; + u64 vram_gpa; +} __packed; + +struct synthvid_vram_location_ack { + u64 user_ctx; +} __packed; + +struct video_output_situation { + u8 active; + u32 vram_offset; + u8 depth_bits; + u32 width_pixels; + u32 height_pixels; + u32 pitch_bytes; +} __packed; + +struct synthvid_situation_update { + u64 user_ctx; + u8 video_output_count; + struct video_output_situation video_output[1]; +} __packed; + +struct synthvid_situation_update_ack { + u64 user_ctx; +} __packed; + +struct synthvid_pointer_position { + u8 is_visible; + u8 video_output; + s32 image_x; + s32 image_y; +} __packed; + + +#define CURSOR_MAX_X 96 +#define CURSOR_MAX_Y 96 +#define CURSOR_ARGB_PIXEL_SIZE 4 +#define CURSOR_MAX_SIZE (CURSOR_MAX_X * CURSOR_MAX_Y * CURSOR_ARGB_PIXEL_SIZE) +#define CURSOR_COMPLETE (-1) + +struct synthvid_pointer_shape { + u8 part_idx; + u8 is_argb; + u32 width; /* CURSOR_MAX_X at most */ + u32 height; /* CURSOR_MAX_Y at most */ + u32 hot_x; /* hotspot relative to upper-left of pointer image */ + u32 hot_y; + u8 data[4]; +} __packed; + +struct synthvid_feature_change { + u8 is_dirt_needed; + u8 is_ptr_pos_needed; + u8 is_ptr_shape_needed; + u8 is_situ_needed; +} __packed; + +struct rect { + s32 x1, y1; /* top left corner */ + s32 x2, y2; /* bottom right corner, exclusive */ +} __packed; + +struct synthvid_dirt { + u8 video_output; + u8 dirt_count; + struct rect rect[1]; +} __packed; + +struct synthvid_msg { + struct pipe_msg_hdr pipe_hdr; + struct synthvid_msg_hdr vid_hdr; + union { + struct synthvid_version_req ver_req; + struct synthvid_version_resp ver_resp; + struct synthvid_vram_location vram; + struct synthvid_vram_location_ack vram_ack; + struct synthvid_situation_update situ; + struct synthvid_situation_update_ack situ_ack; + struct synthvid_pointer_position ptr_pos; + struct synthvid_pointer_shape ptr_shape; + struct synthvid_feature_change feature_chg; + struct synthvid_dirt dirt; + }; +} __packed; + + + +/* FB driver definitions and structures */ +#define HVFB_WIDTH 1152 /* default screen width */ +#define HVFB_HEIGHT 864 /* default screen height */ +#define HVFB_WIDTH_MIN 640 +#define HVFB_HEIGHT_MIN 480 + +#define RING_BUFSIZE (256 * 1024) +#define VSP_TIMEOUT (10 * HZ) +#define HVFB_UPDATE_DELAY (HZ / 20) + +struct hvfb_par { + struct fb_info *info; + bool fb_ready; /* fb device is ready */ + struct completion wait; + u32 synthvid_version; + + struct delayed_work dwork; + bool update; + + u32 pseudo_palette[16]; + u8 init_buf[MAX_VMBUS_PKT_SIZE]; + u8 recv_buf[MAX_VMBUS_PKT_SIZE]; +}; + +static uint screen_width = HVFB_WIDTH; +static uint screen_height = HVFB_HEIGHT; +static uint screen_depth; +static uint screen_fb_size; + +/* Send message to Hyper-V host */ +static inline int synthvid_send(struct hv_device *hdev, + struct synthvid_msg *msg) +{ + static atomic64_t request_id = ATOMIC64_INIT(0); + int ret; + + msg->pipe_hdr.type = PIPE_MSG_DATA; + msg->pipe_hdr.size = msg->vid_hdr.size; + + ret = vmbus_sendpacket(hdev->channel, msg, + msg->vid_hdr.size + sizeof(struct pipe_msg_hdr), + atomic64_inc_return(&request_id), + VM_PKT_DATA_INBAND, 0); + + if (ret) + pr_err("Unable to send packet via vmbus\n"); + + return ret; +} + + +/* Send screen resolution info to host */ +static int synthvid_send_situ(struct hv_device *hdev) +{ + struct fb_info *info = hv_get_drvdata(hdev); + struct synthvid_msg msg; + + if (!info) + return -ENODEV; + + memset(&msg, 0, sizeof(struct synthvid_msg)); + + msg.vid_hdr.type = SYNTHVID_SITUATION_UPDATE; + msg.vid_hdr.size = sizeof(struct synthvid_msg_hdr) + + sizeof(struct synthvid_situation_update); + msg.situ.user_ctx = 0; + msg.situ.video_output_count = 1; + msg.situ.video_output[0].active = 1; + msg.situ.video_output[0].vram_offset = 0; + msg.situ.video_output[0].depth_bits = info->var.bits_per_pixel; + msg.situ.video_output[0].width_pixels = info->var.xres; + msg.situ.video_output[0].height_pixels = info->var.yres; + msg.situ.video_output[0].pitch_bytes = info->fix.line_length; + + synthvid_send(hdev, &msg); + + return 0; +} + +/* Send mouse pointer info to host */ +static int synthvid_send_ptr(struct hv_device *hdev) +{ + struct synthvid_msg msg; + + memset(&msg, 0, sizeof(struct synthvid_msg)); + msg.vid_hdr.type = SYNTHVID_POINTER_POSITION; + msg.vid_hdr.size = sizeof(struct synthvid_msg_hdr) + + sizeof(struct synthvid_pointer_position); + msg.ptr_pos.is_visible = 1; + msg.ptr_pos.video_output = 0; + msg.ptr_pos.image_x = 0; + msg.ptr_pos.image_y = 0; + synthvid_send(hdev, &msg); + + memset(&msg, 0, sizeof(struct synthvid_msg)); + msg.vid_hdr.type = SYNTHVID_POINTER_SHAPE; + msg.vid_hdr.size = sizeof(struct synthvid_msg_hdr) + + sizeof(struct synthvid_pointer_shape); + msg.ptr_shape.part_idx = CURSOR_COMPLETE; + msg.ptr_shape.is_argb = 1; + msg.ptr_shape.width = 1; + msg.ptr_shape.height = 1; + msg.ptr_shape.hot_x = 0; + msg.ptr_shape.hot_y = 0; + msg.ptr_shape.data[0] = 0; + msg.ptr_shape.data[1] = 1; + msg.ptr_shape.data[2] = 1; + msg.ptr_shape.data[3] = 1; + synthvid_send(hdev, &msg); + + return 0; +} + +/* Send updated screen area (dirty rectangle) location to host */ +static int synthvid_update(struct fb_info *info) +{ + struct hv_device *hdev = device_to_hv_device(info->device); + struct synthvid_msg msg; + + memset(&msg, 0, sizeof(struct synthvid_msg)); + + msg.vid_hdr.type = SYNTHVID_DIRT; + msg.vid_hdr.size = sizeof(struct synthvid_msg_hdr) + + sizeof(struct synthvid_dirt); + msg.dirt.video_output = 0; + msg.dirt.dirt_count = 1; + msg.dirt.rect[0].x1 = 0; + msg.dirt.rect[0].y1 = 0; + msg.dirt.rect[0].x2 = info->var.xres; + msg.dirt.rect[0].y2 = info->var.yres; + + synthvid_send(hdev, &msg); + + return 0; +} + + +/* + * Actions on received messages from host: + * Complete the wait event. + * Or, reply with screen and cursor info. + */ +static void synthvid_recv_sub(struct hv_device *hdev) +{ + struct fb_info *info = hv_get_drvdata(hdev); + struct hvfb_par *par; + struct synthvid_msg *msg; + + if (!info) + return; + + par = info->par; + msg = (struct synthvid_msg *)par->recv_buf; + + /* Complete the wait event */ + if (msg->vid_hdr.type == SYNTHVID_VERSION_RESPONSE || + msg->vid_hdr.type == SYNTHVID_VRAM_LOCATION_ACK) { + memcpy(par->init_buf, msg, MAX_VMBUS_PKT_SIZE); + complete(&par->wait); + return; + } + + /* Reply with screen and cursor info */ + if (msg->vid_hdr.type == SYNTHVID_FEATURE_CHANGE) { + if (par->fb_ready) { + synthvid_send_ptr(hdev); + synthvid_send_situ(hdev); + } + + par->update = msg->feature_chg.is_dirt_needed; + if (par->update) + schedule_delayed_work(&par->dwork, HVFB_UPDATE_DELAY); + } +} + +/* Receive callback for messages from the host */ +static void synthvid_receive(void *ctx) +{ + struct hv_device *hdev = ctx; + struct fb_info *info = hv_get_drvdata(hdev); + struct hvfb_par *par; + struct synthvid_msg *recv_buf; + u32 bytes_recvd; + u64 req_id; + int ret; + + if (!info) + return; + + par = info->par; + recv_buf = (struct synthvid_msg *)par->recv_buf; + + do { + ret = vmbus_recvpacket(hdev->channel, recv_buf, + MAX_VMBUS_PKT_SIZE, + &bytes_recvd, &req_id); + if (bytes_recvd > 0 && + recv_buf->pipe_hdr.type == PIPE_MSG_DATA) + synthvid_recv_sub(hdev); + } while (bytes_recvd > 0 && ret == 0); +} + +/* Check synthetic video protocol version with the host */ +static int synthvid_negotiate_ver(struct hv_device *hdev, u32 ver) +{ + struct fb_info *info = hv_get_drvdata(hdev); + struct hvfb_par *par = info->par; + struct synthvid_msg *msg = (struct synthvid_msg *)par->init_buf; + int t, ret = 0; + + memset(msg, 0, sizeof(struct synthvid_msg)); + msg->vid_hdr.type = SYNTHVID_VERSION_REQUEST; + msg->vid_hdr.size = sizeof(struct synthvid_msg_hdr) + + sizeof(struct synthvid_version_req); + msg->ver_req.version = ver; + synthvid_send(hdev, msg); + + t = wait_for_completion_timeout(&par->wait, VSP_TIMEOUT); + if (!t) { + pr_err("Time out on waiting version response\n"); + ret = -ETIMEDOUT; + goto out; + } + if (!msg->ver_resp.is_accepted) { + ret = -ENODEV; + goto out; + } + + par->synthvid_version = ver; + +out: + return ret; +} + +/* Connect to VSP (Virtual Service Provider) on host */ +static int synthvid_connect_vsp(struct hv_device *hdev) +{ + struct fb_info *info = hv_get_drvdata(hdev); + struct hvfb_par *par = info->par; + int ret; + + ret = vmbus_open(hdev->channel, RING_BUFSIZE, RING_BUFSIZE, + NULL, 0, synthvid_receive, hdev); + if (ret) { + pr_err("Unable to open vmbus channel\n"); + return ret; + } + + /* Negotiate the protocol version with host */ + if (vmbus_proto_version == VERSION_WS2008 || + vmbus_proto_version == VERSION_WIN7) + ret = synthvid_negotiate_ver(hdev, SYNTHVID_VERSION_WIN7); + else + ret = synthvid_negotiate_ver(hdev, SYNTHVID_VERSION_WIN8); + + if (ret) { + pr_err("Synthetic video device version not accepted\n"); + goto error; + } + + if (par->synthvid_version == SYNTHVID_VERSION_WIN7) { + screen_depth = SYNTHVID_DEPTH_WIN7; + screen_fb_size = SYNTHVID_FB_SIZE_WIN7; + } else { + screen_depth = SYNTHVID_DEPTH_WIN8; + screen_fb_size = SYNTHVID_FB_SIZE_WIN8; + } + + return 0; + +error: + vmbus_close(hdev->channel); + return ret; +} + +/* Send VRAM and Situation messages to the host */ +static int synthvid_send_config(struct hv_device *hdev) +{ + struct fb_info *info = hv_get_drvdata(hdev); + struct hvfb_par *par = info->par; + struct synthvid_msg *msg = (struct synthvid_msg *)par->init_buf; + int t, ret = 0; + + /* Send VRAM location */ + memset(msg, 0, sizeof(struct synthvid_msg)); + msg->vid_hdr.type = SYNTHVID_VRAM_LOCATION; + msg->vid_hdr.size = sizeof(struct synthvid_msg_hdr) + + sizeof(struct synthvid_vram_location); + msg->vram.user_ctx = msg->vram.vram_gpa = info->fix.smem_start; + msg->vram.is_vram_gpa_specified = 1; + synthvid_send(hdev, msg); + + t = wait_for_completion_timeout(&par->wait, VSP_TIMEOUT); + if (!t) { + pr_err("Time out on waiting vram location ack\n"); + ret = -ETIMEDOUT; + goto out; + } + if (msg->vram_ack.user_ctx != info->fix.smem_start) { + pr_err("Unable to set VRAM location\n"); + ret = -ENODEV; + goto out; + } + + /* Send pointer and situation update */ + synthvid_send_ptr(hdev); + synthvid_send_situ(hdev); + +out: + return ret; +} + + +/* + * Delayed work callback: + * It is called at HVFB_UPDATE_DELAY or longer time interval to process + * screen updates. It is re-scheduled if further update is necessary. + */ +static void hvfb_update_work(struct work_struct *w) +{ + struct hvfb_par *par = container_of(w, struct hvfb_par, dwork.work); + struct fb_info *info = par->info; + + if (par->fb_ready) + synthvid_update(info); + + if (par->update) + schedule_delayed_work(&par->dwork, HVFB_UPDATE_DELAY); +} + + +/* Framebuffer operation handlers */ + +static int hvfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) +{ + if (var->xres < HVFB_WIDTH_MIN || var->yres < HVFB_HEIGHT_MIN || + var->xres > screen_width || var->yres > screen_height || + var->bits_per_pixel != screen_depth) + return -EINVAL; + + var->xres_virtual = var->xres; + var->yres_virtual = var->yres; + + return 0; +} + +static int hvfb_set_par(struct fb_info *info) +{ + struct hv_device *hdev = device_to_hv_device(info->device); + + return synthvid_send_situ(hdev); +} + + +static inline u32 chan_to_field(u32 chan, struct fb_bitfield *bf) +{ + return ((chan & 0xffff) >> (16 - bf->length)) << bf->offset; +} + +static int hvfb_setcolreg(unsigned regno, unsigned red, unsigned green, + unsigned blue, unsigned transp, struct fb_info *info) +{ + u32 *pal = info->pseudo_palette; + + if (regno > 15) + return -EINVAL; + + pal[regno] = chan_to_field(red, &info->var.red) + | chan_to_field(green, &info->var.green) + | chan_to_field(blue, &info->var.blue) + | chan_to_field(transp, &info->var.transp); + + return 0; +} + + +static struct fb_ops hvfb_ops = { + .owner = THIS_MODULE, + .fb_check_var = hvfb_check_var, + .fb_set_par = hvfb_set_par, + .fb_setcolreg = hvfb_setcolreg, + .fb_fillrect = cfb_fillrect, + .fb_copyarea = cfb_copyarea, + .fb_imageblit = cfb_imageblit, +}; + + +/* Get options from kernel paramenter "video=" */ +static void hvfb_get_option(struct fb_info *info) +{ + struct hvfb_par *par = info->par; + char *opt = NULL, *p; + uint x = 0, y = 0; + + if (fb_get_options(KBUILD_MODNAME, &opt) || !opt || !*opt) + return; + + p = strsep(&opt, "x"); + if (!*p || kstrtouint(p, 0, &x) || + !opt || !*opt || kstrtouint(opt, 0, &y)) { + pr_err("Screen option is invalid: skipped\n"); + return; + } + + if (x < HVFB_WIDTH_MIN || y < HVFB_HEIGHT_MIN || + (par->synthvid_version == SYNTHVID_VERSION_WIN8 && + x * y * screen_depth / 8 > SYNTHVID_FB_SIZE_WIN8) || + (par->synthvid_version == SYNTHVID_VERSION_WIN7 && + (x > SYNTHVID_WIDTH_MAX_WIN7 || y > SYNTHVID_HEIGHT_MAX_WIN7))) { + pr_err("Screen resolution option is out of range: skipped\n"); + return; + } + + screen_width = x; + screen_height = y; + return; +} + + +/* Get framebuffer memory from Hyper-V video pci space */ +static int hvfb_getmem(struct fb_info *info) +{ + struct pci_dev *pdev; + ulong fb_phys; + void __iomem *fb_virt; + + pdev = pci_get_device(PCI_VENDOR_ID_MICROSOFT, + PCI_DEVICE_ID_HYPERV_VIDEO, NULL); + if (!pdev) { + pr_err("Unable to find PCI Hyper-V video\n"); + return -ENODEV; + } + + if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) || + pci_resource_len(pdev, 0) < screen_fb_size) + goto err1; + + fb_phys = pci_resource_end(pdev, 0) - screen_fb_size + 1; + if (!request_mem_region(fb_phys, screen_fb_size, KBUILD_MODNAME)) + goto err1; + + fb_virt = ioremap(fb_phys, screen_fb_size); + if (!fb_virt) + goto err2; + + info->apertures = alloc_apertures(1); + if (!info->apertures) + goto err3; + + info->apertures->ranges[0].base = pci_resource_start(pdev, 0); + info->apertures->ranges[0].size = pci_resource_len(pdev, 0); + info->fix.smem_start = fb_phys; + info->fix.smem_len = screen_fb_size; + info->screen_base = fb_virt; + info->screen_size = screen_fb_size; + + pci_dev_put(pdev); + return 0; + +err3: + iounmap(fb_virt); +err2: + release_mem_region(fb_phys, screen_fb_size); +err1: + pci_dev_put(pdev); + return -ENOMEM; +} + +/* Release the framebuffer */ +static void hvfb_putmem(struct fb_info *info) +{ + iounmap(info->screen_base); + release_mem_region(info->fix.smem_start, screen_fb_size); +} + + +static int hvfb_probe(struct hv_device *hdev, + const struct hv_vmbus_device_id *dev_id) +{ + struct fb_info *info; + struct hvfb_par *par; + int ret; + + info = framebuffer_alloc(sizeof(struct hvfb_par), &hdev->device); + if (!info) { + pr_err("No memory for framebuffer info\n"); + return -ENOMEM; + } + + par = info->par; + par->info = info; + par->fb_ready = false; + init_completion(&par->wait); + INIT_DELAYED_WORK(&par->dwork, hvfb_update_work); + + /* Connect to VSP */ + hv_set_drvdata(hdev, info); + ret = synthvid_connect_vsp(hdev); + if (ret) { + pr_err("Unable to connect to VSP\n"); + goto error1; + } + + ret = hvfb_getmem(info); + if (ret) { + pr_err("No memory for framebuffer\n"); + goto error2; + } + + hvfb_get_option(info); + pr_info("Screen resolution: %dx%d, Color depth: %d\n", + screen_width, screen_height, screen_depth); + + + /* Set up fb_info */ + info->flags = FBINFO_DEFAULT; + + info->var.xres_virtual = info->var.xres = screen_width; + info->var.yres_virtual = info->var.yres = screen_height; + info->var.bits_per_pixel = screen_depth; + + if (info->var.bits_per_pixel == 16) { + info->var.red = (struct fb_bitfield){11, 5, 0}; + info->var.green = (struct fb_bitfield){5, 6, 0}; + info->var.blue = (struct fb_bitfield){0, 5, 0}; + info->var.transp = (struct fb_bitfield){0, 0, 0}; + } else { + info->var.red = (struct fb_bitfield){16, 8, 0}; + info->var.green = (struct fb_bitfield){8, 8, 0}; + info->var.blue = (struct fb_bitfield){0, 8, 0}; + info->var.transp = (struct fb_bitfield){24, 8, 0}; + } + + info->var.activate = FB_ACTIVATE_NOW; + info->var.height = -1; + info->var.width = -1; + info->var.vmode = FB_VMODE_NONINTERLACED; + + strcpy(info->fix.id, KBUILD_MODNAME); + info->fix.type = FB_TYPE_PACKED_PIXELS; + info->fix.visual = FB_VISUAL_TRUECOLOR; + info->fix.line_length = screen_width * screen_depth / 8; + info->fix.accel = FB_ACCEL_NONE; + + info->fbops = &hvfb_ops; + info->pseudo_palette = par->pseudo_palette; + + /* Send config to host */ + ret = synthvid_send_config(hdev); + if (ret) + goto error; + + ret = register_framebuffer(info); + if (ret) { + pr_err("Unable to register framebuffer\n"); + goto error; + } + + par->fb_ready = true; + + return 0; + +error: + hvfb_putmem(info); +error2: + vmbus_close(hdev->channel); +error1: + cancel_delayed_work_sync(&par->dwork); + hv_set_drvdata(hdev, NULL); + framebuffer_release(info); + return ret; +} + + +static int hvfb_remove(struct hv_device *hdev) +{ + struct fb_info *info = hv_get_drvdata(hdev); + struct hvfb_par *par = info->par; + + par->update = false; + par->fb_ready = false; + + unregister_framebuffer(info); + cancel_delayed_work_sync(&par->dwork); + + vmbus_close(hdev->channel); + hv_set_drvdata(hdev, NULL); + + hvfb_putmem(info); + framebuffer_release(info); + + return 0; +} + + +static const struct hv_vmbus_device_id id_table[] = { + /* Synthetic Video Device GUID */ + {HV_SYNTHVID_GUID}, + {} +}; + +MODULE_DEVICE_TABLE(vmbus, id_table); + +static struct hv_driver hvfb_drv = { + .name = KBUILD_MODNAME, + .id_table = id_table, + .probe = hvfb_probe, + .remove = hvfb_remove, +}; + + +static int __init hvfb_drv_init(void) +{ + return vmbus_driver_register(&hvfb_drv); +} + +static void __exit hvfb_drv_exit(void) +{ + vmbus_driver_unregister(&hvfb_drv); +} + +module_init(hvfb_drv_init); +module_exit(hvfb_drv_exit); + +MODULE_LICENSE("GPL"); +MODULE_VERSION(HV_DRV_VERSION); +MODULE_DESCRIPTION("Microsoft Hyper-V Synthetic Video Frame Buffer Driver"); diff --git a/drivers/video/matrox/matroxfb_maven.c b/drivers/video/matrox/matroxfb_maven.c index 217678e0b983..fd2897455696 100644 --- a/drivers/video/matrox/matroxfb_maven.c +++ b/drivers/video/matrox/matroxfb_maven.c @@ -137,8 +137,20 @@ static int* get_ctrl_ptr(struct maven_data* md, int idx) { static int maven_get_reg(struct i2c_client* c, char reg) { char dst; - struct i2c_msg msgs[] = {{ c->addr, I2C_M_REV_DIR_ADDR, sizeof(reg), ® }, - { c->addr, I2C_M_RD | I2C_M_NOSTART, sizeof(dst), &dst }}; + struct i2c_msg msgs[] = { + { + .addr = c->addr, + .flags = I2C_M_REV_DIR_ADDR, + .len = sizeof(reg), + .buf = ® + }, + { + .addr = c->addr, + .flags = I2C_M_RD | I2C_M_NOSTART, + .len = sizeof(dst), + .buf = &dst + } + }; s32 err; err = i2c_transfer(c->adapter, msgs, 2); diff --git a/drivers/video/mmp/hw/mmp_ctrl.h b/drivers/video/mmp/hw/mmp_ctrl.h index 6408d8ef3abb..edd2002b0e99 100644 --- a/drivers/video/mmp/hw/mmp_ctrl.h +++ b/drivers/video/mmp/hw/mmp_ctrl.h @@ -961,56 +961,7 @@ struct lcd_regs { LCD_TVG_CUTVLN : PN2_LCD_GRA_CUTVLN) : LCD_GRA_CUTVLN) /* - * defined Video Memory Color format for DMA control 0 register - * DMA0 bit[23:20] - */ -#define VMODE_RGB565 0x0 -#define VMODE_RGB1555 0x1 -#define VMODE_RGB888PACKED 0x2 -#define VMODE_RGB888UNPACKED 0x3 -#define VMODE_RGBA888 0x4 -#define VMODE_YUV422PACKED 0x5 -#define VMODE_YUV422PLANAR 0x6 -#define VMODE_YUV420PLANAR 0x7 -#define VMODE_SMPNCMD 0x8 -#define VMODE_PALETTE4BIT 0x9 -#define VMODE_PALETTE8BIT 0xa -#define VMODE_RESERVED 0xb - -/* - * defined Graphic Memory Color format for DMA control 0 register - * DMA0 bit[19:16] - */ -#define GMODE_RGB565 0x0 -#define GMODE_RGB1555 0x1 -#define GMODE_RGB888PACKED 0x2 -#define GMODE_RGB888UNPACKED 0x3 -#define GMODE_RGBA888 0x4 -#define GMODE_YUV422PACKED 0x5 -#define GMODE_YUV422PLANAR 0x6 -#define GMODE_YUV420PLANAR 0x7 -#define GMODE_SMPNCMD 0x8 -#define GMODE_PALETTE4BIT 0x9 -#define GMODE_PALETTE8BIT 0xa -#define GMODE_RESERVED 0xb - -/* - * define for DMA control 1 register - */ -#define DMA1_FRAME_TRIG 31 /* bit location */ -#define DMA1_VSYNC_MODE 28 -#define DMA1_VSYNC_INV 27 -#define DMA1_CKEY 24 -#define DMA1_CARRY 23 -#define DMA1_LNBUF_ENA 22 -#define DMA1_GATED_ENA 21 -#define DMA1_PWRDN_ENA 20 -#define DMA1_DSCALE 18 -#define DMA1_ALPHA_MODE 16 -#define DMA1_ALPHA 08 -#define DMA1_PXLCMD 00 - -/* + * defined for Configure Dumb Mode * defined for Configure Dumb Mode * DUMB LCD Panel bit[31:28] */ @@ -1050,18 +1001,6 @@ struct lcd_regs { #define CFG_CYC_BURST_LEN16 (1<<4) #define CFG_CYC_BURST_LEN8 (0<<4) -/* - * defined Dumb Panel Clock Divider register - * SCLK_Source bit[31] - */ - /* 0: PLL clock select*/ -#define AXI_BUS_SEL 0x80000000 -#define CCD_CLK_SEL 0x40000000 -#define DCON_CLK_SEL 0x20000000 -#define ENA_CLK_INT_DIV CONFIG_FB_DOVE_CLCD_SCLK_DIV -#define IDLE_CLK_INT_DIV 0x1 /* idle Integer Divider */ -#define DIS_CLK_INT_DIV 0x0 /* Disable Integer Divider */ - /* SRAM ID */ #define SRAMID_GAMMA_YR 0x0 #define SRAMID_GAMMA_UG 0x1 @@ -1471,422 +1410,6 @@ struct dsi_regs { #define LVDS_FREQ_OFFSET_MODE_CK_DIV4_OUT (0x1 << 1) #define LVDS_FREQ_OFFSET_MODE_EN (0x1 << 0) -/* VDMA */ -struct vdma_ch_regs { -#define VDMA_DC_SADDR_1 0x320 -#define VDMA_DC_SADDR_2 0x3A0 -#define VDMA_DC_SZ_1 0x324 -#define VDMA_DC_SZ_2 0x3A4 -#define VDMA_CTRL_1 0x328 -#define VDMA_CTRL_2 0x3A8 -#define VDMA_SRC_SZ_1 0x32C -#define VDMA_SRC_SZ_2 0x3AC -#define VDMA_SA_1 0x330 -#define VDMA_SA_2 0x3B0 -#define VDMA_DA_1 0x334 -#define VDMA_DA_2 0x3B4 -#define VDMA_SZ_1 0x338 -#define VDMA_SZ_2 0x3B8 - u32 dc_saddr; - u32 dc_size; - u32 ctrl; - u32 src_size; - u32 src_addr; - u32 dst_addr; - u32 dst_size; -#define VDMA_PITCH_1 0x33C -#define VDMA_PITCH_2 0x3BC -#define VDMA_ROT_CTRL_1 0x340 -#define VDMA_ROT_CTRL_2 0x3C0 -#define VDMA_RAM_CTRL0_1 0x344 -#define VDMA_RAM_CTRL0_2 0x3C4 -#define VDMA_RAM_CTRL1_1 0x348 -#define VDMA_RAM_CTRL1_2 0x3C8 - u32 pitch; - u32 rot_ctrl; - u32 ram_ctrl0; - u32 ram_ctrl1; - -}; -struct vdma_regs { -#define VDMA_ARBR_CTRL 0x300 -#define VDMA_IRQR 0x304 -#define VDMA_IRQM 0x308 -#define VDMA_IRQS 0x30C -#define VDMA_MDMA_ARBR_CTRL 0x310 - u32 arbr_ctr; - u32 irq_raw; - u32 irq_mask; - u32 irq_status; - u32 mdma_arbr_ctrl; - u32 reserved[3]; - - struct vdma_ch_regs ch1; - u32 reserved2[21]; - struct vdma_ch_regs ch2; -}; - -/* CMU */ -#define CMU_PIP_DE_H_CFG 0x0008 -#define CMU_PRI1_H_CFG 0x000C -#define CMU_PRI2_H_CFG 0x0010 -#define CMU_ACE_MAIN_DE1_H_CFG 0x0014 -#define CMU_ACE_MAIN_DE2_H_CFG 0x0018 -#define CMU_ACE_PIP_DE1_H_CFG 0x001C -#define CMU_ACE_PIP_DE2_H_CFG 0x0020 -#define CMU_PIP_DE_V_CFG 0x0024 -#define CMU_PRI_V_CFG 0x0028 -#define CMU_ACE_MAIN_DE_V_CFG 0x002C -#define CMU_ACE_PIP_DE_V_CFG 0x0030 -#define CMU_BAR_0_CFG 0x0034 -#define CMU_BAR_1_CFG 0x0038 -#define CMU_BAR_2_CFG 0x003C -#define CMU_BAR_3_CFG 0x0040 -#define CMU_BAR_4_CFG 0x0044 -#define CMU_BAR_5_CFG 0x0048 -#define CMU_BAR_6_CFG 0x004C -#define CMU_BAR_7_CFG 0x0050 -#define CMU_BAR_8_CFG 0x0054 -#define CMU_BAR_9_CFG 0x0058 -#define CMU_BAR_10_CFG 0x005C -#define CMU_BAR_11_CFG 0x0060 -#define CMU_BAR_12_CFG 0x0064 -#define CMU_BAR_13_CFG 0x0068 -#define CMU_BAR_14_CFG 0x006C -#define CMU_BAR_15_CFG 0x0070 -#define CMU_BAR_CTRL 0x0074 -#define PATTERN_TOTAL 0x0078 -#define PATTERN_ACTIVE 0x007C -#define PATTERN_FRONT_PORCH 0x0080 -#define PATTERN_BACK_PORCH 0x0084 -#define CMU_CLK_CTRL 0x0088 - -#define CMU_ICSC_M_C0_L 0x0900 -#define CMU_ICSC_M_C0_H 0x0901 -#define CMU_ICSC_M_C1_L 0x0902 -#define CMU_ICSC_M_C1_H 0x0903 -#define CMU_ICSC_M_C2_L 0x0904 -#define CMU_ICSC_M_C2_H 0x0905 -#define CMU_ICSC_M_C3_L 0x0906 -#define CMU_ICSC_M_C3_H 0x0907 -#define CMU_ICSC_M_C4_L 0x0908 -#define CMU_ICSC_M_C4_H 0x0909 -#define CMU_ICSC_M_C5_L 0x090A -#define CMU_ICSC_M_C5_H 0x090B -#define CMU_ICSC_M_C6_L 0x090C -#define CMU_ICSC_M_C6_H 0x090D -#define CMU_ICSC_M_C7_L 0x090E -#define CMU_ICSC_M_C7_H 0x090F -#define CMU_ICSC_M_C8_L 0x0910 -#define CMU_ICSC_M_C8_H 0x0911 -#define CMU_ICSC_M_O1_0 0x0914 -#define CMU_ICSC_M_O1_1 0x0915 -#define CMU_ICSC_M_O1_2 0x0916 -#define CMU_ICSC_M_O2_0 0x0918 -#define CMU_ICSC_M_O2_1 0x0919 -#define CMU_ICSC_M_O2_2 0x091A -#define CMU_ICSC_M_O3_0 0x091C -#define CMU_ICSC_M_O3_1 0x091D -#define CMU_ICSC_M_O3_2 0x091E -#define CMU_ICSC_P_C0_L 0x0920 -#define CMU_ICSC_P_C0_H 0x0921 -#define CMU_ICSC_P_C1_L 0x0922 -#define CMU_ICSC_P_C1_H 0x0923 -#define CMU_ICSC_P_C2_L 0x0924 -#define CMU_ICSC_P_C2_H 0x0925 -#define CMU_ICSC_P_C3_L 0x0926 -#define CMU_ICSC_P_C3_H 0x0927 -#define CMU_ICSC_P_C4_L 0x0928 -#define CMU_ICSC_P_C4_H 0x0929 -#define CMU_ICSC_P_C5_L 0x092A -#define CMU_ICSC_P_C5_H 0x092B -#define CMU_ICSC_P_C6_L 0x092C -#define CMU_ICSC_P_C6_H 0x092D -#define CMU_ICSC_P_C7_L 0x092E -#define CMU_ICSC_P_C7_H 0x092F -#define CMU_ICSC_P_C8_L 0x0930 -#define CMU_ICSC_P_C8_H 0x0931 -#define CMU_ICSC_P_O1_0 0x0934 -#define CMU_ICSC_P_O1_1 0x0935 -#define CMU_ICSC_P_O1_2 0x0936 -#define CMU_ICSC_P_O2_0 0x0938 -#define CMU_ICSC_P_O2_1 0x0939 -#define CMU_ICSC_P_O2_2 0x093A -#define CMU_ICSC_P_O3_0 0x093C -#define CMU_ICSC_P_O3_1 0x093D -#define CMU_ICSC_P_O3_2 0x093E -#define CMU_BR_M_EN 0x0940 -#define CMU_BR_M_TH1_L 0x0942 -#define CMU_BR_M_TH1_H 0x0943 -#define CMU_BR_M_TH2_L 0x0944 -#define CMU_BR_M_TH2_H 0x0945 -#define CMU_ACE_M_EN 0x0950 -#define CMU_ACE_M_WFG1 0x0951 -#define CMU_ACE_M_WFG2 0x0952 -#define CMU_ACE_M_WFG3 0x0953 -#define CMU_ACE_M_TH0 0x0954 -#define CMU_ACE_M_TH1 0x0955 -#define CMU_ACE_M_TH2 0x0956 -#define CMU_ACE_M_TH3 0x0957 -#define CMU_ACE_M_TH4 0x0958 -#define CMU_ACE_M_TH5 0x0959 -#define CMU_ACE_M_OP0_L 0x095A -#define CMU_ACE_M_OP0_H 0x095B -#define CMU_ACE_M_OP5_L 0x095C -#define CMU_ACE_M_OP5_H 0x095D -#define CMU_ACE_M_GB2 0x095E -#define CMU_ACE_M_GB3 0x095F -#define CMU_ACE_M_MS1 0x0960 -#define CMU_ACE_M_MS2 0x0961 -#define CMU_ACE_M_MS3 0x0962 -#define CMU_BR_P_EN 0x0970 -#define CMU_BR_P_TH1_L 0x0972 -#define CMU_BR_P_TH1_H 0x0973 -#define CMU_BR_P_TH2_L 0x0974 -#define CMU_BR_P_TH2_H 0x0975 -#define CMU_ACE_P_EN 0x0980 -#define CMU_ACE_P_WFG1 0x0981 -#define CMU_ACE_P_WFG2 0x0982 -#define CMU_ACE_P_WFG3 0x0983 -#define CMU_ACE_P_TH0 0x0984 -#define CMU_ACE_P_TH1 0x0985 -#define CMU_ACE_P_TH2 0x0986 -#define CMU_ACE_P_TH3 0x0987 -#define CMU_ACE_P_TH4 0x0988 -#define CMU_ACE_P_TH5 0x0989 -#define CMU_ACE_P_OP0_L 0x098A -#define CMU_ACE_P_OP0_H 0x098B -#define CMU_ACE_P_OP5_L 0x098C -#define CMU_ACE_P_OP5_H 0x098D -#define CMU_ACE_P_GB2 0x098E -#define CMU_ACE_P_GB3 0x098F -#define CMU_ACE_P_MS1 0x0990 -#define CMU_ACE_P_MS2 0x0991 -#define CMU_ACE_P_MS3 0x0992 -#define CMU_FTDC_M_EN 0x09A0 -#define CMU_FTDC_P_EN 0x09A1 -#define CMU_FTDC_INLOW_L 0x09A2 -#define CMU_FTDC_INLOW_H 0x09A3 -#define CMU_FTDC_INHIGH_L 0x09A4 -#define CMU_FTDC_INHIGH_H 0x09A5 -#define CMU_FTDC_OUTLOW_L 0x09A6 -#define CMU_FTDC_OUTLOW_H 0x09A7 -#define CMU_FTDC_OUTHIGH_L 0x09A8 -#define CMU_FTDC_OUTHIGH_H 0x09A9 -#define CMU_FTDC_YLOW 0x09AA -#define CMU_FTDC_YHIGH 0x09AB -#define CMU_FTDC_CH1 0x09AC -#define CMU_FTDC_CH2_L 0x09AE -#define CMU_FTDC_CH2_H 0x09AF -#define CMU_FTDC_CH3_L 0x09B0 -#define CMU_FTDC_CH3_H 0x09B1 -#define CMU_FTDC_1_C00_6 0x09B2 -#define CMU_FTDC_1_C01_6 0x09B8 -#define CMU_FTDC_1_C11_6 0x09BE -#define CMU_FTDC_1_C10_6 0x09C4 -#define CMU_FTDC_1_OFF00_6 0x09CA -#define CMU_FTDC_1_OFF10_6 0x09D0 -#define CMU_HS_M_EN 0x0A00 -#define CMU_HS_M_AX1_L 0x0A02 -#define CMU_HS_M_AX1_H 0x0A03 -#define CMU_HS_M_AX2_L 0x0A04 -#define CMU_HS_M_AX2_H 0x0A05 -#define CMU_HS_M_AX3_L 0x0A06 -#define CMU_HS_M_AX3_H 0x0A07 -#define CMU_HS_M_AX4_L 0x0A08 -#define CMU_HS_M_AX4_H 0x0A09 -#define CMU_HS_M_AX5_L 0x0A0A -#define CMU_HS_M_AX5_H 0x0A0B -#define CMU_HS_M_AX6_L 0x0A0C -#define CMU_HS_M_AX6_H 0x0A0D -#define CMU_HS_M_AX7_L 0x0A0E -#define CMU_HS_M_AX7_H 0x0A0F -#define CMU_HS_M_AX8_L 0x0A10 -#define CMU_HS_M_AX8_H 0x0A11 -#define CMU_HS_M_AX9_L 0x0A12 -#define CMU_HS_M_AX9_H 0x0A13 -#define CMU_HS_M_AX10_L 0x0A14 -#define CMU_HS_M_AX10_H 0x0A15 -#define CMU_HS_M_AX11_L 0x0A16 -#define CMU_HS_M_AX11_H 0x0A17 -#define CMU_HS_M_AX12_L 0x0A18 -#define CMU_HS_M_AX12_H 0x0A19 -#define CMU_HS_M_AX13_L 0x0A1A -#define CMU_HS_M_AX13_H 0x0A1B -#define CMU_HS_M_AX14_L 0x0A1C -#define CMU_HS_M_AX14_H 0x0A1D -#define CMU_HS_M_H1_H14 0x0A1E -#define CMU_HS_M_S1_S14 0x0A2C -#define CMU_HS_M_GL 0x0A3A -#define CMU_HS_M_MAXSAT_RGB_Y_L 0x0A3C -#define CMU_HS_M_MAXSAT_RGB_Y_H 0x0A3D -#define CMU_HS_M_MAXSAT_RCR_L 0x0A3E -#define CMU_HS_M_MAXSAT_RCR_H 0x0A3F -#define CMU_HS_M_MAXSAT_RCB_L 0x0A40 -#define CMU_HS_M_MAXSAT_RCB_H 0x0A41 -#define CMU_HS_M_MAXSAT_GCR_L 0x0A42 -#define CMU_HS_M_MAXSAT_GCR_H 0x0A43 -#define CMU_HS_M_MAXSAT_GCB_L 0x0A44 -#define CMU_HS_M_MAXSAT_GCB_H 0x0A45 -#define CMU_HS_M_MAXSAT_BCR_L 0x0A46 -#define CMU_HS_M_MAXSAT_BCR_H 0x0A47 -#define CMU_HS_M_MAXSAT_BCB_L 0x0A48 -#define CMU_HS_M_MAXSAT_BCB_H 0x0A49 -#define CMU_HS_M_ROFF_L 0x0A4A -#define CMU_HS_M_ROFF_H 0x0A4B -#define CMU_HS_M_GOFF_L 0x0A4C -#define CMU_HS_M_GOFF_H 0x0A4D -#define CMU_HS_M_BOFF_L 0x0A4E -#define CMU_HS_M_BOFF_H 0x0A4F -#define CMU_HS_P_EN 0x0A50 -#define CMU_HS_P_AX1_L 0x0A52 -#define CMU_HS_P_AX1_H 0x0A53 -#define CMU_HS_P_AX2_L 0x0A54 -#define CMU_HS_P_AX2_H 0x0A55 -#define CMU_HS_P_AX3_L 0x0A56 -#define CMU_HS_P_AX3_H 0x0A57 -#define CMU_HS_P_AX4_L 0x0A58 -#define CMU_HS_P_AX4_H 0x0A59 -#define CMU_HS_P_AX5_L 0x0A5A -#define CMU_HS_P_AX5_H 0x0A5B -#define CMU_HS_P_AX6_L 0x0A5C -#define CMU_HS_P_AX6_H 0x0A5D -#define CMU_HS_P_AX7_L 0x0A5E -#define CMU_HS_P_AX7_H 0x0A5F -#define CMU_HS_P_AX8_L 0x0A60 -#define CMU_HS_P_AX8_H 0x0A61 -#define CMU_HS_P_AX9_L 0x0A62 -#define CMU_HS_P_AX9_H 0x0A63 -#define CMU_HS_P_AX10_L 0x0A64 -#define CMU_HS_P_AX10_H 0x0A65 -#define CMU_HS_P_AX11_L 0x0A66 -#define CMU_HS_P_AX11_H 0x0A67 -#define CMU_HS_P_AX12_L 0x0A68 -#define CMU_HS_P_AX12_H 0x0A69 -#define CMU_HS_P_AX13_L 0x0A6A -#define CMU_HS_P_AX13_H 0x0A6B -#define CMU_HS_P_AX14_L 0x0A6C -#define CMU_HS_P_AX14_H 0x0A6D -#define CMU_HS_P_H1_H14 0x0A6E -#define CMU_HS_P_S1_S14 0x0A7C -#define CMU_HS_P_GL 0x0A8A -#define CMU_HS_P_MAXSAT_RGB_Y_L 0x0A8C -#define CMU_HS_P_MAXSAT_RGB_Y_H 0x0A8D -#define CMU_HS_P_MAXSAT_RCR_L 0x0A8E -#define CMU_HS_P_MAXSAT_RCR_H 0x0A8F -#define CMU_HS_P_MAXSAT_RCB_L 0x0A90 -#define CMU_HS_P_MAXSAT_RCB_H 0x0A91 -#define CMU_HS_P_MAXSAT_GCR_L 0x0A92 -#define CMU_HS_P_MAXSAT_GCR_H 0x0A93 -#define CMU_HS_P_MAXSAT_GCB_L 0x0A94 -#define CMU_HS_P_MAXSAT_GCB_H 0x0A95 -#define CMU_HS_P_MAXSAT_BCR_L 0x0A96 -#define CMU_HS_P_MAXSAT_BCR_H 0x0A97 -#define CMU_HS_P_MAXSAT_BCB_L 0x0A98 -#define CMU_HS_P_MAXSAT_BCB_H 0x0A99 -#define CMU_HS_P_ROFF_L 0x0A9A -#define CMU_HS_P_ROFF_H 0x0A9B -#define CMU_HS_P_GOFF_L 0x0A9C -#define CMU_HS_P_GOFF_H 0x0A9D -#define CMU_HS_P_BOFF_L 0x0A9E -#define CMU_HS_P_BOFF_H 0x0A9F -#define CMU_GLCSC_M_C0_L 0x0AA0 -#define CMU_GLCSC_M_C0_H 0x0AA1 -#define CMU_GLCSC_M_C1_L 0x0AA2 -#define CMU_GLCSC_M_C1_H 0x0AA3 -#define CMU_GLCSC_M_C2_L 0x0AA4 -#define CMU_GLCSC_M_C2_H 0x0AA5 -#define CMU_GLCSC_M_C3_L 0x0AA6 -#define CMU_GLCSC_M_C3_H 0x0AA7 -#define CMU_GLCSC_M_C4_L 0x0AA8 -#define CMU_GLCSC_M_C4_H 0x0AA9 -#define CMU_GLCSC_M_C5_L 0x0AAA -#define CMU_GLCSC_M_C5_H 0x0AAB -#define CMU_GLCSC_M_C6_L 0x0AAC -#define CMU_GLCSC_M_C6_H 0x0AAD -#define CMU_GLCSC_M_C7_L 0x0AAE -#define CMU_GLCSC_M_C7_H 0x0AAF -#define CMU_GLCSC_M_C8_L 0x0AB0 -#define CMU_GLCSC_M_C8_H 0x0AB1 -#define CMU_GLCSC_M_O1_1 0x0AB4 -#define CMU_GLCSC_M_O1_2 0x0AB5 -#define CMU_GLCSC_M_O1_3 0x0AB6 -#define CMU_GLCSC_M_O2_1 0x0AB8 -#define CMU_GLCSC_M_O2_2 0x0AB9 -#define CMU_GLCSC_M_O2_3 0x0ABA -#define CMU_GLCSC_M_O3_1 0x0ABC -#define CMU_GLCSC_M_O3_2 0x0ABD -#define CMU_GLCSC_M_O3_3 0x0ABE -#define CMU_GLCSC_P_C0_L 0x0AC0 -#define CMU_GLCSC_P_C0_H 0x0AC1 -#define CMU_GLCSC_P_C1_L 0x0AC2 -#define CMU_GLCSC_P_C1_H 0x0AC3 -#define CMU_GLCSC_P_C2_L 0x0AC4 -#define CMU_GLCSC_P_C2_H 0x0AC5 -#define CMU_GLCSC_P_C3_L 0x0AC6 -#define CMU_GLCSC_P_C3_H 0x0AC7 -#define CMU_GLCSC_P_C4_L 0x0AC8 -#define CMU_GLCSC_P_C4_H 0x0AC9 -#define CMU_GLCSC_P_C5_L 0x0ACA -#define CMU_GLCSC_P_C5_H 0x0ACB -#define CMU_GLCSC_P_C6_L 0x0ACC -#define CMU_GLCSC_P_C6_H 0x0ACD -#define CMU_GLCSC_P_C7_L 0x0ACE -#define CMU_GLCSC_P_C7_H 0x0ACF -#define CMU_GLCSC_P_C8_L 0x0AD0 -#define CMU_GLCSC_P_C8_H 0x0AD1 -#define CMU_GLCSC_P_O1_1 0x0AD4 -#define CMU_GLCSC_P_O1_2 0x0AD5 -#define CMU_GLCSC_P_O1_3 0x0AD6 -#define CMU_GLCSC_P_O2_1 0x0AD8 -#define CMU_GLCSC_P_O2_2 0x0AD9 -#define CMU_GLCSC_P_O2_3 0x0ADA -#define CMU_GLCSC_P_O3_1 0x0ADC -#define CMU_GLCSC_P_O3_2 0x0ADD -#define CMU_GLCSC_P_O3_3 0x0ADE -#define CMU_PIXVAL_M_EN 0x0AE0 -#define CMU_PIXVAL_P_EN 0x0AE1 - -#define CMU_CLK_CTRL_TCLK 0x0 -#define CMU_CLK_CTRL_SCLK 0x2 -#define CMU_CLK_CTRL_MSK 0x2 -#define CMU_CLK_CTRL_ENABLE 0x1 - -#define LCD_TOP_CTRL_TV 0x2 -#define LCD_TOP_CTRL_PN 0x0 -#define LCD_TOP_CTRL_SEL_MSK 0x2 -#define LCD_IO_CMU_IN_SEL_MSK (0x3 << 20) -#define LCD_IO_CMU_IN_SEL_TV 0 -#define LCD_IO_CMU_IN_SEL_PN 1 -#define LCD_IO_CMU_IN_SEL_PN2 2 -#define LCD_IO_TV_OUT_SEL_MSK (0x3 << 26) -#define LCD_IO_PN_OUT_SEL_MSK (0x3 << 24) -#define LCD_IO_PN2_OUT_SEL_MSK (0x3 << 28) -#define LCD_IO_TV_OUT_SEL_NON 3 -#define LCD_IO_PN_OUT_SEL_NON 3 -#define LCD_IO_PN2_OUT_SEL_NON 3 -#define LCD_TOP_CTRL_CMU_ENABLE 0x1 -#define LCD_IO_OVERL_MSK 0xC00000 -#define LCD_IO_OVERL_TV 0x0 -#define LCD_IO_OVERL_LCD1 0x400000 -#define LCD_IO_OVERL_LCD2 0xC00000 -#define HINVERT_MSK 0x4 -#define VINVERT_MSK 0x8 -#define HINVERT_LEN 0x2 -#define VINVERT_LEN 0x3 - -#define CMU_CTRL 0x88 -#define CMU_CTRL_A0_MSK 0x6 -#define CMU_CTRL_A0_TV 0x0 -#define CMU_CTRL_A0_LCD1 0x1 -#define CMU_CTRL_A0_LCD2 0x2 -#define CMU_CTRL_A0_HDMI 0x3 - -#define ICR_DRV_ROUTE_OFF 0x0 -#define ICR_DRV_ROUTE_TV 0x1 -#define ICR_DRV_ROUTE_LCD1 0x2 -#define ICR_DRV_ROUTE_LCD2 0x3 - enum { PATH_PN = 0, PATH_TV, diff --git a/drivers/video/simplefb.c b/drivers/video/simplefb.c new file mode 100644 index 000000000000..e2e9e3e61b72 --- /dev/null +++ b/drivers/video/simplefb.c @@ -0,0 +1,234 @@ +/* + * Simplest possible simple frame-buffer driver, as a platform device + * + * Copyright (c) 2013, Stephen Warren + * + * Based on q40fb.c, which was: + * Copyright (C) 2001 Richard Zidlicky <rz@linux-m68k.org> + * + * Also based on offb.c, which was: + * Copyright (C) 1997 Geert Uytterhoeven + * Copyright (C) 1996 Paul Mackerras + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include <linux/errno.h> +#include <linux/fb.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/platform_device.h> + +static struct fb_fix_screeninfo simplefb_fix = { + .id = "simple", + .type = FB_TYPE_PACKED_PIXELS, + .visual = FB_VISUAL_TRUECOLOR, + .accel = FB_ACCEL_NONE, +}; + +static struct fb_var_screeninfo simplefb_var = { + .height = -1, + .width = -1, + .activate = FB_ACTIVATE_NOW, + .vmode = FB_VMODE_NONINTERLACED, +}; + +static int simplefb_setcolreg(u_int regno, u_int red, u_int green, u_int blue, + u_int transp, struct fb_info *info) +{ + u32 *pal = info->pseudo_palette; + u32 cr = red >> (16 - info->var.red.length); + u32 cg = green >> (16 - info->var.green.length); + u32 cb = blue >> (16 - info->var.blue.length); + u32 value; + + if (regno >= 16) + return -EINVAL; + + value = (cr << info->var.red.offset) | + (cg << info->var.green.offset) | + (cb << info->var.blue.offset); + if (info->var.transp.length > 0) { + u32 mask = (1 << info->var.transp.length) - 1; + mask <<= info->var.transp.offset; + value |= mask; + } + pal[regno] = value; + + return 0; +} + +static struct fb_ops simplefb_ops = { + .owner = THIS_MODULE, + .fb_setcolreg = simplefb_setcolreg, + .fb_fillrect = cfb_fillrect, + .fb_copyarea = cfb_copyarea, + .fb_imageblit = cfb_imageblit, +}; + +struct simplefb_format { + const char *name; + u32 bits_per_pixel; + struct fb_bitfield red; + struct fb_bitfield green; + struct fb_bitfield blue; + struct fb_bitfield transp; +}; + +static struct simplefb_format simplefb_formats[] = { + { "r5g6b5", 16, {11, 5}, {5, 6}, {0, 5}, {0, 0} }, +}; + +struct simplefb_params { + u32 width; + u32 height; + u32 stride; + struct simplefb_format *format; +}; + +static int simplefb_parse_dt(struct platform_device *pdev, + struct simplefb_params *params) +{ + struct device_node *np = pdev->dev.of_node; + int ret; + const char *format; + int i; + + ret = of_property_read_u32(np, "width", ¶ms->width); + if (ret) { + dev_err(&pdev->dev, "Can't parse width property\n"); + return ret; + } + + ret = of_property_read_u32(np, "height", ¶ms->height); + if (ret) { + dev_err(&pdev->dev, "Can't parse height property\n"); + return ret; + } + + ret = of_property_read_u32(np, "stride", ¶ms->stride); + if (ret) { + dev_err(&pdev->dev, "Can't parse stride property\n"); + return ret; + } + + ret = of_property_read_string(np, "format", &format); + if (ret) { + dev_err(&pdev->dev, "Can't parse format property\n"); + return ret; + } + params->format = NULL; + for (i = 0; i < ARRAY_SIZE(simplefb_formats); i++) { + if (strcmp(format, simplefb_formats[i].name)) + continue; + params->format = &simplefb_formats[i]; + break; + } + if (!params->format) { + dev_err(&pdev->dev, "Invalid format value\n"); + return -EINVAL; + } + + return 0; +} + +static int simplefb_probe(struct platform_device *pdev) +{ + int ret; + struct simplefb_params params; + struct fb_info *info; + struct resource *mem; + + if (fb_get_options("simplefb", NULL)) + return -ENODEV; + + ret = simplefb_parse_dt(pdev, ¶ms); + if (ret) + return ret; + + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!mem) { + dev_err(&pdev->dev, "No memory resource\n"); + return -EINVAL; + } + + info = framebuffer_alloc(sizeof(u32) * 16, &pdev->dev); + if (!info) + return -ENOMEM; + platform_set_drvdata(pdev, info); + + info->fix = simplefb_fix; + info->fix.smem_start = mem->start; + info->fix.smem_len = resource_size(mem); + info->fix.line_length = params.stride; + + info->var = simplefb_var; + info->var.xres = params.width; + info->var.yres = params.height; + info->var.xres_virtual = params.width; + info->var.yres_virtual = params.height; + info->var.bits_per_pixel = params.format->bits_per_pixel; + info->var.red = params.format->red; + info->var.green = params.format->green; + info->var.blue = params.format->blue; + info->var.transp = params.format->transp; + + info->fbops = &simplefb_ops; + info->flags = FBINFO_DEFAULT; + info->screen_base = devm_ioremap(&pdev->dev, info->fix.smem_start, + info->fix.smem_len); + if (!info->screen_base) { + framebuffer_release(info); + return -ENODEV; + } + info->pseudo_palette = (void *)(info + 1); + + ret = register_framebuffer(info); + if (ret < 0) { + dev_err(&pdev->dev, "Unable to register simplefb: %d\n", ret); + framebuffer_release(info); + return ret; + } + + dev_info(&pdev->dev, "fb%d: simplefb registered!\n", info->node); + + return 0; +} + +static int simplefb_remove(struct platform_device *pdev) +{ + struct fb_info *info = platform_get_drvdata(pdev); + + unregister_framebuffer(info); + framebuffer_release(info); + + return 0; +} + +static const struct of_device_id simplefb_of_match[] = { + { .compatible = "simple-framebuffer", }, + { }, +}; +MODULE_DEVICE_TABLE(of, simplefb_of_match); + +static struct platform_driver simplefb_driver = { + .driver = { + .name = "simple-framebuffer", + .owner = THIS_MODULE, + .of_match_table = simplefb_of_match, + }, + .probe = simplefb_probe, + .remove = simplefb_remove, +}; +module_platform_driver(simplefb_driver); + +MODULE_AUTHOR("Stephen Warren <swarren@wwwdotorg.org>"); +MODULE_DESCRIPTION("Simple framebuffer driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/video/uvesafb.c b/drivers/video/uvesafb.c index d4284458377e..e328a61b64ba 100644 --- a/drivers/video/uvesafb.c +++ b/drivers/video/uvesafb.c @@ -166,7 +166,7 @@ static int uvesafb_exec(struct uvesafb_ktask *task) memcpy(&m->id, &uvesafb_cn_id, sizeof(m->id)); m->seq = seq; m->len = len; - m->ack = random32(); + m->ack = prandom_u32(); /* uvesafb_task structure */ memcpy(m + 1, &task->t, sizeof(task->t)); diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 67af155cf602..dd4d9cb86243 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -145,9 +145,9 @@ config SWIOTLB_XEN select SWIOTLB config XEN_TMEM - bool + tristate depends on !ARM - default y if (CLEANCACHE || FRONTSWAP) + default m if (CLEANCACHE || FRONTSWAP) help Shim to interface in-kernel Transcendent Memory hooks (e.g. cleancache and frontswap) to Xen tmem hypercalls. diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c index 3ee836d42581..e3600be4e7fa 100644 --- a/drivers/xen/tmem.c +++ b/drivers/xen/tmem.c @@ -5,6 +5,7 @@ * Author: Dan Magenheimer */ +#include <linux/module.h> #include <linux/kernel.h> #include <linux/types.h> #include <linux/init.h> @@ -128,6 +129,7 @@ static int xen_tmem_flush_object(u32 pool_id, struct tmem_oid oid) return xen_tmem_op(TMEM_FLUSH_OBJECT, pool_id, oid, 0, 0, 0, 0, 0); } +#ifndef CONFIG_XEN_TMEM_MODULE bool __read_mostly tmem_enabled = false; static int __init enable_tmem(char *s) @@ -136,6 +138,7 @@ static int __init enable_tmem(char *s) return 1; } __setup("tmem", enable_tmem); +#endif #ifdef CONFIG_CLEANCACHE static int xen_tmem_destroy_pool(u32 pool_id) @@ -227,16 +230,21 @@ static int tmem_cleancache_init_shared_fs(char *uuid, size_t pagesize) return xen_tmem_new_pool(shared_uuid, TMEM_POOL_SHARED, pagesize); } -static bool __initdata use_cleancache = true; - +static bool disable_cleancache __read_mostly; +static bool disable_selfballooning __read_mostly; +#ifdef CONFIG_XEN_TMEM_MODULE +module_param(disable_cleancache, bool, S_IRUGO); +module_param(disable_selfballooning, bool, S_IRUGO); +#else static int __init no_cleancache(char *s) { - use_cleancache = false; + disable_cleancache = true; return 1; } __setup("nocleancache", no_cleancache); +#endif -static struct cleancache_ops __initdata tmem_cleancache_ops = { +static struct cleancache_ops tmem_cleancache_ops = { .put_page = tmem_cleancache_put_page, .get_page = tmem_cleancache_get_page, .invalidate_page = tmem_cleancache_flush_page, @@ -353,54 +361,71 @@ static void tmem_frontswap_init(unsigned ignored) xen_tmem_new_pool(private, TMEM_POOL_PERSIST, PAGE_SIZE); } -static bool __initdata use_frontswap = true; - +static bool disable_frontswap __read_mostly; +static bool disable_frontswap_selfshrinking __read_mostly; +#ifdef CONFIG_XEN_TMEM_MODULE +module_param(disable_frontswap, bool, S_IRUGO); +module_param(disable_frontswap_selfshrinking, bool, S_IRUGO); +#else static int __init no_frontswap(char *s) { - use_frontswap = false; + disable_frontswap = true; return 1; } __setup("nofrontswap", no_frontswap); +#endif -static struct frontswap_ops __initdata tmem_frontswap_ops = { +static struct frontswap_ops tmem_frontswap_ops = { .store = tmem_frontswap_store, .load = tmem_frontswap_load, .invalidate_page = tmem_frontswap_flush_page, .invalidate_area = tmem_frontswap_flush_area, .init = tmem_frontswap_init }; +#else /* CONFIG_FRONTSWAP */ +#define disable_frontswap_selfshrinking 1 #endif -static int __init xen_tmem_init(void) +static int xen_tmem_init(void) { if (!xen_domain()) return 0; #ifdef CONFIG_FRONTSWAP - if (tmem_enabled && use_frontswap) { + if (tmem_enabled && !disable_frontswap) { char *s = ""; - struct frontswap_ops old_ops = + struct frontswap_ops *old_ops = frontswap_register_ops(&tmem_frontswap_ops); tmem_frontswap_poolid = -1; - if (old_ops.init != NULL) + if (IS_ERR(old_ops) || old_ops) { + if (IS_ERR(old_ops)) + return PTR_ERR(old_ops); s = " (WARNING: frontswap_ops overridden)"; + } printk(KERN_INFO "frontswap enabled, RAM provided by " "Xen Transcendent Memory%s\n", s); } #endif #ifdef CONFIG_CLEANCACHE BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid)); - if (tmem_enabled && use_cleancache) { + if (tmem_enabled && !disable_cleancache) { char *s = ""; - struct cleancache_ops old_ops = + struct cleancache_ops *old_ops = cleancache_register_ops(&tmem_cleancache_ops); - if (old_ops.init_fs != NULL) + if (old_ops) s = " (WARNING: cleancache_ops overridden)"; printk(KERN_INFO "cleancache enabled, RAM provided by " "Xen Transcendent Memory%s\n", s); } #endif +#ifdef CONFIG_XEN_SELFBALLOONING + xen_selfballoon_init(!disable_selfballooning, + !disable_frontswap_selfshrinking); +#endif return 0; } module_init(xen_tmem_init) +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Dan Magenheimer <dan.magenheimer@oracle.com>"); +MODULE_DESCRIPTION("Shim to Xen transcendent memory"); diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c index 2552d3e0a70f..f2ef569c7cc1 100644 --- a/drivers/xen/xen-selfballoon.c +++ b/drivers/xen/xen-selfballoon.c @@ -121,7 +121,7 @@ static DECLARE_DELAYED_WORK(selfballoon_worker, selfballoon_process); static bool frontswap_selfshrinking __read_mostly; /* Enable/disable with kernel boot option. */ -static bool use_frontswap_selfshrink __initdata = true; +static bool use_frontswap_selfshrink = true; /* * The default values for the following parameters were deemed reasonable @@ -185,7 +185,7 @@ static int __init xen_nofrontswap_selfshrink_setup(char *s) __setup("noselfshrink", xen_nofrontswap_selfshrink_setup); /* Disable with kernel boot option. */ -static bool use_selfballooning __initdata = true; +static bool use_selfballooning = true; static int __init xen_noselfballooning_setup(char *s) { @@ -196,7 +196,7 @@ static int __init xen_noselfballooning_setup(char *s) __setup("noselfballooning", xen_noselfballooning_setup); #else /* !CONFIG_FRONTSWAP */ /* Enable with kernel boot option. */ -static bool use_selfballooning __initdata = false; +static bool use_selfballooning; static int __init xen_selfballooning_setup(char *s) { @@ -537,7 +537,7 @@ int register_xen_selfballooning(struct device *dev) } EXPORT_SYMBOL(register_xen_selfballooning); -static int __init xen_selfballoon_init(void) +int xen_selfballoon_init(bool use_selfballooning, bool use_frontswap_selfshrink) { bool enable = false; @@ -571,7 +571,4 @@ static int __init xen_selfballoon_init(void) return 0; } - -subsys_initcall(xen_selfballoon_init); - -MODULE_LICENSE("GPL"); +EXPORT_SYMBOL(xen_selfballoon_init); diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index 0ad61c6a65a5..055562c580b4 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -33,6 +33,7 @@ #include <linux/pagemap.h> #include <linux/idr.h> #include <linux/sched.h> +#include <linux/aio.h> #include <net/9p/9p.h> #include <net/9p/client.h> diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 0efd1524b977..370b24cee4d8 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -65,6 +65,20 @@ config CORE_DUMP_DEFAULT_ELF_HEADERS This config option changes the default setting of coredump_filter seen at boot time. If unsure, say Y. +config BINFMT_SCRIPT + tristate "Kernel support for scripts starting with #!" + default y + help + Say Y here if you want to execute interpreted scripts starting with + #! followed by the path to an interpreter. + + You can build this support as a module; however, until that module + gets loaded, you cannot run scripts. Thus, if you want to load this + module from an initramfs, the portion of the initramfs before loading + this module must consist of compiled binaries only. + + Most systems will not boot if you say M or N here. If unsure, say Y. + config BINFMT_FLAT bool "Kernel support for flat binaries" depends on !MMU && (!FRV || BROKEN) diff --git a/fs/Makefile b/fs/Makefile index b691a965dc1a..9759acae25c6 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -10,7 +10,7 @@ obj-y := open.o read_write.o file_table.o super.o \ ioctl.o readdir.o select.o dcache.o inode.o \ attr.o bad_inode.o file.o filesystems.o namespace.o \ seq_file.o xattr.o libfs.o fs-writeback.o \ - pnode.o drop_caches.o splice.o sync.o utimes.o \ + pnode.o splice.o sync.o utimes.o \ stack.o fs_struct.o statfs.o ifeq ($(CONFIG_BLOCK),y) @@ -34,10 +34,7 @@ obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o obj-$(CONFIG_BINFMT_EM86) += binfmt_em86.o obj-$(CONFIG_BINFMT_MISC) += binfmt_misc.o - -# binfmt_script is always there -obj-y += binfmt_script.o - +obj-$(CONFIG_BINFMT_SCRIPT) += binfmt_script.o obj-$(CONFIG_BINFMT_ELF) += binfmt_elf.o obj-$(CONFIG_COMPAT_BINFMT_ELF) += compat_binfmt_elf.o obj-$(CONFIG_BINFMT_ELF_FDPIC) += binfmt_elf_fdpic.o @@ -49,6 +46,7 @@ obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o obj-$(CONFIG_NFS_COMMON) += nfs_common/ obj-$(CONFIG_GENERIC_ACL) += generic_acl.o obj-$(CONFIG_COREDUMP) += coredump.o +obj-$(CONFIG_SYSCTL) += drop_caches.o obj-$(CONFIG_FHANDLE) += fhandle.o diff --git a/fs/afs/write.c b/fs/afs/write.c index 7e03eadb40c0..a890db4b9898 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -14,6 +14,7 @@ #include <linux/pagemap.h> #include <linux/writeback.h> #include <linux/pagevec.h> +#include <linux/aio.h> #include "internal.h" static int afs_write_back_from_locked_page(struct afs_writeback *wb, @@ -8,6 +8,8 @@ * * See ../COPYING for licensing terms. */ +#define pr_fmt(fmt) "%s: " fmt, __func__ + #include <linux/kernel.h> #include <linux/init.h> #include <linux/errno.h> @@ -18,14 +20,14 @@ #include <linux/backing-dev.h> #include <linux/uio.h> -#define DEBUG 0 - #include <linux/sched.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/mm.h> #include <linux/mman.h> +#include <linux/bio.h> #include <linux/mmu_context.h> +#include <linux/percpu.h> #include <linux/slab.h> #include <linux/timer.h> #include <linux/aio.h> @@ -35,15 +37,111 @@ #include <linux/eventfd.h> #include <linux/blkdev.h> #include <linux/compat.h> +#include <linux/percpu-refcount.h> #include <asm/kmap_types.h> #include <asm/uaccess.h> -#if DEBUG > 1 -#define dprintk printk -#else -#define dprintk(x...) do { ; } while (0) -#endif +#define AIO_RING_MAGIC 0xa10a10a1 +#define AIO_RING_COMPAT_FEATURES 1 +#define AIO_RING_INCOMPAT_FEATURES 0 +struct aio_ring { + unsigned id; /* kernel internal index number */ + unsigned nr; /* number of io_events */ + unsigned head; + unsigned tail; + + unsigned magic; + unsigned compat_features; + unsigned incompat_features; + unsigned header_length; /* size of aio_ring */ + + + struct io_event io_events[0]; +}; /* 128 bytes + ring size */ + +#define AIO_RING_PAGES 8 + +struct kioctx_cpu { + unsigned reqs_available; +}; + +struct kioctx { + struct percpu_ref users; + + /* This needs improving */ + unsigned long user_id; + struct hlist_node list; + + struct __percpu kioctx_cpu *cpu; + + /* + * For percpu reqs_available, number of slots we move to/from global + * counter at a time: + */ + unsigned req_batch; + /* + * This is what userspace passed to io_setup(), it's not used for + * anything but counting against the global max_reqs quota. + * + * The real limit is nr_events - 1, which will be larger (see + * aio_setup_ring()) + */ + unsigned max_reqs; + + /* Size of ringbuffer, in units of struct io_event */ + unsigned nr_events; + + unsigned long mmap_base; + unsigned long mmap_size; + + struct page **ring_pages; + long nr_pages; + + struct rcu_head rcu_head; + struct work_struct rcu_work; + + struct { + /* + * This counts the number of available slots in the ringbuffer, + * so we avoid overflowing it: it's decremented (if positive) + * when allocating a kiocb and incremented when the resulting + * io_event is pulled off the ringbuffer. + * + * We batch accesses to it with a percpu version. + */ + atomic_t reqs_available; + } ____cacheline_aligned_in_smp; + + struct { + spinlock_t ctx_lock; + struct list_head active_reqs; /* used for cancellation */ + } ____cacheline_aligned_in_smp; + + struct { + struct mutex ring_lock; + wait_queue_head_t wait; + + /* + * Copy of the real tail - to reduce cacheline bouncing. Updated + * by aio_complete() whenever it updates the real tail. + */ + unsigned shadow_tail; + } ____cacheline_aligned_in_smp; + + struct { + /* + * This is the canonical copy of the tail pointer, updated by + * aio_complete(). But aio_complete() also uses it as a lock, so + * other code can't use it; aio_complete() keeps shadow_tail in + * sync with the real value of the tail pointer for other code + * to use. + */ + unsigned tail; + } ____cacheline_aligned_in_smp; + + struct page *internal_pages[AIO_RING_PAGES]; +}; /*------ sysctl variables----*/ static DEFINE_SPINLOCK(aio_nr_lock); @@ -54,11 +152,6 @@ unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio request static struct kmem_cache *kiocb_cachep; static struct kmem_cache *kioctx_cachep; -static struct workqueue_struct *aio_wq; - -static void aio_kick_handler(struct work_struct *); -static void aio_queue_work(struct kioctx *); - /* aio_setup * Creates the slab caches used by the aio routines, panic on * failure as this is done early during the boot sequence. @@ -68,10 +161,7 @@ static int __init aio_setup(void) kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); - aio_wq = alloc_workqueue("aio", 0, 1); /* used to limit concurrency */ - BUG_ON(!aio_wq); - - pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page)); + pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page)); return 0; } @@ -79,28 +169,23 @@ __initcall(aio_setup); static void aio_free_ring(struct kioctx *ctx) { - struct aio_ring_info *info = &ctx->ring_info; long i; - for (i=0; i<info->nr_pages; i++) - put_page(info->ring_pages[i]); + for (i = 0; i < ctx->nr_pages; i++) + put_page(ctx->ring_pages[i]); - if (info->mmap_size) { - BUG_ON(ctx->mm != current->mm); - vm_munmap(info->mmap_base, info->mmap_size); - } + if (ctx->mmap_size) + vm_munmap(ctx->mmap_base, ctx->mmap_size); - if (info->ring_pages && info->ring_pages != info->internal_pages) - kfree(info->ring_pages); - info->ring_pages = NULL; - info->nr = 0; + if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) + kfree(ctx->ring_pages); } static int aio_setup_ring(struct kioctx *ctx) { struct aio_ring *ring; - struct aio_ring_info *info = &ctx->ring_info; unsigned nr_events = ctx->max_reqs; + struct mm_struct *mm = current->mm; unsigned long size, populate; int nr_pages; @@ -116,46 +201,44 @@ static int aio_setup_ring(struct kioctx *ctx) nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring)) / sizeof(struct io_event); - info->nr = 0; - info->ring_pages = info->internal_pages; + ctx->nr_events = 0; + ctx->ring_pages = ctx->internal_pages; if (nr_pages > AIO_RING_PAGES) { - info->ring_pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); - if (!info->ring_pages) + ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *), + GFP_KERNEL); + if (!ctx->ring_pages) return -ENOMEM; } - info->mmap_size = nr_pages * PAGE_SIZE; - dprintk("attempting mmap of %lu bytes\n", info->mmap_size); - down_write(&ctx->mm->mmap_sem); - info->mmap_base = do_mmap_pgoff(NULL, 0, info->mmap_size, - PROT_READ|PROT_WRITE, - MAP_ANONYMOUS|MAP_PRIVATE, 0, - &populate); - if (IS_ERR((void *)info->mmap_base)) { - up_write(&ctx->mm->mmap_sem); - info->mmap_size = 0; + ctx->mmap_size = nr_pages * PAGE_SIZE; + pr_debug("attempting mmap of %lu bytes\n", ctx->mmap_size); + down_write(&mm->mmap_sem); + ctx->mmap_base = do_mmap_pgoff(NULL, 0, ctx->mmap_size, + PROT_READ|PROT_WRITE, + MAP_ANONYMOUS|MAP_PRIVATE, 0, &populate); + if (IS_ERR((void *)ctx->mmap_base)) { + up_write(&mm->mmap_sem); + ctx->mmap_size = 0; aio_free_ring(ctx); return -EAGAIN; } - dprintk("mmap address: 0x%08lx\n", info->mmap_base); - info->nr_pages = get_user_pages(current, ctx->mm, - info->mmap_base, nr_pages, - 1, 0, info->ring_pages, NULL); - up_write(&ctx->mm->mmap_sem); + pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base); + ctx->nr_pages = get_user_pages(current, mm, ctx->mmap_base, nr_pages, + 1, 0, ctx->ring_pages, NULL); + up_write(&mm->mmap_sem); - if (unlikely(info->nr_pages != nr_pages)) { + if (unlikely(ctx->nr_pages != nr_pages)) { aio_free_ring(ctx); return -EAGAIN; } if (populate) - mm_populate(info->mmap_base, populate); + mm_populate(ctx->mmap_base, populate); - ctx->user_id = info->mmap_base; + ctx->user_id = ctx->mmap_base; + ctx->nr_events = nr_events; /* trusted copy */ - info->nr = nr_events; /* trusted copy */ - - ring = kmap_atomic(info->ring_pages[0]); + ring = kmap_atomic(ctx->ring_pages[0]); ring->nr = nr_events; /* user copy */ ring->id = ctx->user_id; ring->head = ring->tail = 0; @@ -164,72 +247,145 @@ static int aio_setup_ring(struct kioctx *ctx) ring->incompat_features = AIO_RING_INCOMPAT_FEATURES; ring->header_length = sizeof(struct aio_ring); kunmap_atomic(ring); + flush_dcache_page(ctx->ring_pages[0]); return 0; } - -/* aio_ring_event: returns a pointer to the event at the given index from - * kmap_atomic(). Release the pointer with put_aio_ring_event(); - */ #define AIO_EVENTS_PER_PAGE (PAGE_SIZE / sizeof(struct io_event)) #define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event)) #define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE) -#define aio_ring_event(info, nr) ({ \ - unsigned pos = (nr) + AIO_EVENTS_OFFSET; \ - struct io_event *__event; \ - __event = kmap_atomic( \ - (info)->ring_pages[pos / AIO_EVENTS_PER_PAGE]); \ - __event += pos % AIO_EVENTS_PER_PAGE; \ - __event; \ -}) - -#define put_aio_ring_event(event) do { \ - struct io_event *__event = (event); \ - (void)__event; \ - kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK)); \ -} while(0) - -static void ctx_rcu_free(struct rcu_head *head) +void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel) +{ + struct kioctx *ctx = req->ki_ctx; + unsigned long flags; + + spin_lock_irqsave(&ctx->ctx_lock, flags); + + if (!req->ki_list.next) + list_add(&req->ki_list, &ctx->active_reqs); + + req->ki_cancel = cancel; + + spin_unlock_irqrestore(&ctx->ctx_lock, flags); +} +EXPORT_SYMBOL(kiocb_set_cancel_fn); + +static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb, + struct io_event *res) +{ + kiocb_cancel_fn *old, *cancel; + int ret = -EINVAL; + + /* + * Don't want to set kiocb->ki_cancel = KIOCB_CANCELLED unless it + * actually has a cancel function, hence the cmpxchg() + */ + + cancel = ACCESS_ONCE(kiocb->ki_cancel); + do { + if (!cancel || cancel == KIOCB_CANCELLED) + return ret; + + old = cancel; + cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED); + } while (cancel != old); + + atomic_inc(&kiocb->ki_users); + spin_unlock_irq(&ctx->ctx_lock); + + memset(res, 0, sizeof(*res)); + res->obj = (u64)(unsigned long)kiocb->ki_obj.user; + res->data = kiocb->ki_user_data; + ret = cancel(kiocb, res); + + spin_lock_irq(&ctx->ctx_lock); + + return ret; +} + +static void free_ioctx_rcu(struct rcu_head *head) { struct kioctx *ctx = container_of(head, struct kioctx, rcu_head); + + free_percpu(ctx->cpu); kmem_cache_free(kioctx_cachep, ctx); } -/* __put_ioctx - * Called when the last user of an aio context has gone away, - * and the struct needs to be freed. +/* + * When this function runs, the kioctx has been removed from the "hash table" + * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted - + * now it's safe to cancel any that need to be. */ -static void __put_ioctx(struct kioctx *ctx) +static void free_ioctx(struct kioctx *ctx) { - unsigned nr_events = ctx->max_reqs; - BUG_ON(ctx->reqs_active); + struct aio_ring *ring; + struct io_event res; + struct kiocb *req; + unsigned cpu, head, avail; - cancel_delayed_work_sync(&ctx->wq); - aio_free_ring(ctx); - mmdrop(ctx->mm); - ctx->mm = NULL; - if (nr_events) { - spin_lock(&aio_nr_lock); - BUG_ON(aio_nr - nr_events > aio_nr); - aio_nr -= nr_events; - spin_unlock(&aio_nr_lock); + spin_lock_irq(&ctx->ctx_lock); + + while (!list_empty(&ctx->active_reqs)) { + req = list_first_entry(&ctx->active_reqs, + struct kiocb, ki_list); + + list_del_init(&req->ki_list); + kiocb_cancel(ctx, req, &res); } - pr_debug("__put_ioctx: freeing %p\n", ctx); - call_rcu(&ctx->rcu_head, ctx_rcu_free); -} -static inline int try_get_ioctx(struct kioctx *kioctx) -{ - return atomic_inc_not_zero(&kioctx->users); + spin_unlock_irq(&ctx->ctx_lock); + + for_each_possible_cpu(cpu) { + struct kioctx_cpu *kcpu = per_cpu_ptr(ctx->cpu, cpu); + + atomic_add(kcpu->reqs_available, &ctx->reqs_available); + kcpu->reqs_available = 0; + } + + ring = kmap_atomic(ctx->ring_pages[0]); + head = ring->head; + kunmap_atomic(ring); + + while (atomic_read(&ctx->reqs_available) < ctx->nr_events - 1) { + wait_event(ctx->wait, + (head != ctx->shadow_tail) || + (atomic_read(&ctx->reqs_available) >= ctx->nr_events - 1)); + + avail = (head <= ctx->shadow_tail + ? ctx->shadow_tail : ctx->nr_events) - head; + + atomic_add(avail, &ctx->reqs_available); + head += avail; + head %= ctx->nr_events; + } + + WARN_ON(atomic_read(&ctx->reqs_available) > ctx->nr_events - 1); + + aio_free_ring(ctx); + + spin_lock(&aio_nr_lock); + BUG_ON(aio_nr - ctx->max_reqs > aio_nr); + aio_nr -= ctx->max_reqs; + spin_unlock(&aio_nr_lock); + + pr_debug("freeing %p\n", ctx); + + /* + * Here the call_rcu() is between the wait_event() for reqs_active to + * hit 0, and freeing the ioctx. + * + * aio_complete() decrements reqs_active, but it has to touch the ioctx + * after to issue a wakeup so we use rcu. + */ + call_rcu(&ctx->rcu_head, free_ioctx_rcu); } -static inline void put_ioctx(struct kioctx *kioctx) +static void put_ioctx(struct kioctx *ctx) { - BUG_ON(atomic_read(&kioctx->users) <= 0); - if (unlikely(atomic_dec_and_test(&kioctx->users))) - __put_ioctx(kioctx); + if (percpu_ref_put(&ctx->users)) + free_ioctx(ctx); } /* ioctx_alloc @@ -237,10 +393,22 @@ static inline void put_ioctx(struct kioctx *kioctx) */ static struct kioctx *ioctx_alloc(unsigned nr_events) { - struct mm_struct *mm; + struct mm_struct *mm = current->mm; struct kioctx *ctx; int err = -ENOMEM; + /* + * We keep track of the number of available ringbuffer slots, to prevent + * overflow (reqs_available), and we also use percpu counters for this. + * + * So since up to half the slots might be on other cpu's percpu counters + * and unavailable, double nr_events so userspace sees what they + * expected: additionally, we move req_batch slots to/from percpu + * counters at a time, so make sure that isn't 0: + */ + nr_events = max(nr_events, num_possible_cpus() * 4); + nr_events *= 2; + /* Prevent overflows */ if ((nr_events > (0x10000000U / sizeof(struct io_event))) || (nr_events > (0x10000000U / sizeof(struct kiocb)))) { @@ -256,21 +424,29 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) return ERR_PTR(-ENOMEM); ctx->max_reqs = nr_events; - mm = ctx->mm = current->mm; - atomic_inc(&mm->mm_count); - atomic_set(&ctx->users, 2); + percpu_ref_init(&ctx->users); + rcu_read_lock(); + percpu_ref_get(&ctx->users); + rcu_read_unlock(); + spin_lock_init(&ctx->ctx_lock); - spin_lock_init(&ctx->ring_info.ring_lock); + mutex_init(&ctx->ring_lock); init_waitqueue_head(&ctx->wait); INIT_LIST_HEAD(&ctx->active_reqs); - INIT_LIST_HEAD(&ctx->run_list); - INIT_DELAYED_WORK(&ctx->wq, aio_kick_handler); - if (aio_setup_ring(ctx) < 0) + ctx->cpu = alloc_percpu(struct kioctx_cpu); + if (!ctx->cpu) goto out_freectx; + if (aio_setup_ring(ctx) < 0) + goto out_freepcpu; + + atomic_set(&ctx->reqs_available, ctx->nr_events - 1); + ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4); + BUG_ON(!ctx->req_batch); + /* limit the number of system wide aios */ spin_lock(&aio_nr_lock); if (aio_nr + nr_events > aio_max_nr || @@ -286,64 +462,58 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) hlist_add_head_rcu(&ctx->list, &mm->ioctx_list); spin_unlock(&mm->ioctx_lock); - dprintk("aio: allocated ioctx %p[%ld]: mm=%p mask=0x%x\n", - ctx, ctx->user_id, current->mm, ctx->ring_info.nr); + pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n", + ctx, ctx->user_id, mm, ctx->nr_events); return ctx; out_cleanup: err = -EAGAIN; aio_free_ring(ctx); +out_freepcpu: + free_percpu(ctx->cpu); out_freectx: - mmdrop(mm); kmem_cache_free(kioctx_cachep, ctx); - dprintk("aio: error allocating ioctx %d\n", err); + pr_debug("error allocating ioctx %d\n", err); return ERR_PTR(err); } -/* kill_ctx - * Cancels all outstanding aio requests on an aio context. Used - * when the processes owning a context have all exited to encourage - * the rapid destruction of the kioctx. - */ -static void kill_ctx(struct kioctx *ctx) +static void kill_ioctx_work(struct work_struct *work) { - int (*cancel)(struct kiocb *, struct io_event *); - struct task_struct *tsk = current; - DECLARE_WAITQUEUE(wait, tsk); - struct io_event res; + struct kioctx *ctx = container_of(work, struct kioctx, rcu_work); - spin_lock_irq(&ctx->ctx_lock); - ctx->dead = 1; - while (!list_empty(&ctx->active_reqs)) { - struct list_head *pos = ctx->active_reqs.next; - struct kiocb *iocb = list_kiocb(pos); - list_del_init(&iocb->ki_list); - cancel = iocb->ki_cancel; - kiocbSetCancelled(iocb); - if (cancel) { - iocb->ki_users++; - spin_unlock_irq(&ctx->ctx_lock); - cancel(iocb, &res); - spin_lock_irq(&ctx->ctx_lock); - } - } + wake_up_all(&ctx->wait); + put_ioctx(ctx); +} - if (!ctx->reqs_active) - goto out; +static void kill_ioctx_rcu(struct rcu_head *head) +{ + struct kioctx *ctx = container_of(head, struct kioctx, rcu_head); - add_wait_queue(&ctx->wait, &wait); - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - while (ctx->reqs_active) { - spin_unlock_irq(&ctx->ctx_lock); - io_schedule(); - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - spin_lock_irq(&ctx->ctx_lock); - } - __set_task_state(tsk, TASK_RUNNING); - remove_wait_queue(&ctx->wait, &wait); + INIT_WORK(&ctx->rcu_work, kill_ioctx_work); + schedule_work(&ctx->rcu_work); +} -out: - spin_unlock_irq(&ctx->ctx_lock); +/* kill_ioctx + * Cancels all outstanding aio requests on an aio context. Used + * when the processes owning a context have all exited to encourage + * the rapid destruction of the kioctx. + */ +static void kill_ioctx(struct kioctx *ctx) +{ + if (percpu_ref_kill(&ctx->users)) { + hlist_del_rcu(&ctx->list); + /* Between hlist_del_rcu() and dropping the initial ref */ + synchronize_rcu(); + + /* + * We can't punt to workqueue here because put_ioctx() -> + * free_ioctx() will unmap the ringbuffer, and that has to be + * done in the original process's context. kill_ioctx_rcu/work() + * exist for exit_aio(), as in that path free_ioctx() won't do + * the unmap. + */ + kill_ioctx_work(&ctx->rcu_work); + } } /* wait_on_sync_kiocb: @@ -351,9 +521,9 @@ out: */ ssize_t wait_on_sync_kiocb(struct kiocb *iocb) { - while (iocb->ki_users) { + while (atomic_read(&iocb->ki_users)) { set_current_state(TASK_UNINTERRUPTIBLE); - if (!iocb->ki_users) + if (!atomic_read(&iocb->ki_users)) break; io_schedule(); } @@ -362,28 +532,20 @@ ssize_t wait_on_sync_kiocb(struct kiocb *iocb) } EXPORT_SYMBOL(wait_on_sync_kiocb); -/* exit_aio: called when the last user of mm goes away. At this point, - * there is no way for any new requests to be submited or any of the - * io_* syscalls to be called on the context. However, there may be - * outstanding requests which hold references to the context; as they - * go away, they will call put_ioctx and release any pinned memory - * associated with the request (held via struct page * references). +/* + * exit_aio: called when the last user of mm goes away. At this point, there is + * no way for any new requests to be submited or any of the io_* syscalls to be + * called on the context. + * + * There may be outstanding kiocbs, but free_ioctx() will explicitly wait on + * them. */ void exit_aio(struct mm_struct *mm) { struct kioctx *ctx; + struct hlist_node *n; - while (!hlist_empty(&mm->ioctx_list)) { - ctx = hlist_entry(mm->ioctx_list.first, struct kioctx, list); - hlist_del_rcu(&ctx->list); - - kill_ctx(ctx); - - if (1 != atomic_read(&ctx->users)) - printk(KERN_DEBUG - "exit_aio:ioctx still alive: %d %d %d\n", - atomic_read(&ctx->users), ctx->dead, - ctx->reqs_active); + hlist_for_each_entry_safe(ctx, n, &mm->ioctx_list, list) { /* * We don't need to bother with munmap() here - * exit_mmap(mm) is coming and it'll unmap everything. @@ -391,150 +553,95 @@ void exit_aio(struct mm_struct *mm) * as indicator that it needs to unmap the area, * just set it to 0; aio_free_ring() is the only * place that uses ->mmap_size, so it's safe. - * That way we get all munmap done to current->mm - - * all other callers have ctx->mm == current->mm. */ - ctx->ring_info.mmap_size = 0; - put_ioctx(ctx); + ctx->mmap_size = 0; + + if (percpu_ref_kill(&ctx->users)) { + hlist_del_rcu(&ctx->list); + call_rcu(&ctx->rcu_head, kill_ioctx_rcu); + } } } -/* aio_get_req - * Allocate a slot for an aio request. Increments the users count - * of the kioctx so that the kioctx stays around until all requests are - * complete. Returns NULL if no requests are free. - * - * Returns with kiocb->users set to 2. The io submit code path holds - * an extra reference while submitting the i/o. - * This prevents races between the aio code path referencing the - * req (after submitting it) and aio_complete() freeing the req. - */ -static struct kiocb *__aio_get_req(struct kioctx *ctx) +static void put_reqs_available(struct kioctx *ctx, unsigned nr) { - struct kiocb *req = NULL; + struct kioctx_cpu *kcpu; - req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL); - if (unlikely(!req)) - return NULL; + preempt_disable(); + kcpu = this_cpu_ptr(ctx->cpu); - req->ki_flags = 0; - req->ki_users = 2; - req->ki_key = 0; - req->ki_ctx = ctx; - req->ki_cancel = NULL; - req->ki_retry = NULL; - req->ki_dtor = NULL; - req->private = NULL; - req->ki_iovec = NULL; - INIT_LIST_HEAD(&req->ki_run_list); - req->ki_eventfd = NULL; + kcpu->reqs_available += nr; + while (kcpu->reqs_available >= ctx->req_batch * 2) { + kcpu->reqs_available -= ctx->req_batch; + atomic_add(ctx->req_batch, &ctx->reqs_available); + } - return req; + preempt_enable(); } -/* - * struct kiocb's are allocated in batches to reduce the number of - * times the ctx lock is acquired and released. - */ -#define KIOCB_BATCH_SIZE 32L -struct kiocb_batch { - struct list_head head; - long count; /* number of requests left to allocate */ -}; - -static void kiocb_batch_init(struct kiocb_batch *batch, long total) +static bool get_reqs_available(struct kioctx *ctx) { - INIT_LIST_HEAD(&batch->head); - batch->count = total; -} + struct kioctx_cpu *kcpu; + bool ret = false; -static void kiocb_batch_free(struct kioctx *ctx, struct kiocb_batch *batch) -{ - struct kiocb *req, *n; + preempt_disable(); + kcpu = this_cpu_ptr(ctx->cpu); - if (list_empty(&batch->head)) - return; + if (!kcpu->reqs_available) { + int old, avail = atomic_read(&ctx->reqs_available); - spin_lock_irq(&ctx->ctx_lock); - list_for_each_entry_safe(req, n, &batch->head, ki_batch) { - list_del(&req->ki_batch); - list_del(&req->ki_list); - kmem_cache_free(kiocb_cachep, req); - ctx->reqs_active--; - } - if (unlikely(!ctx->reqs_active && ctx->dead)) - wake_up_all(&ctx->wait); - spin_unlock_irq(&ctx->ctx_lock); -} - -/* - * Allocate a batch of kiocbs. This avoids taking and dropping the - * context lock a lot during setup. - */ -static int kiocb_batch_refill(struct kioctx *ctx, struct kiocb_batch *batch) -{ - unsigned short allocated, to_alloc; - long avail; - struct kiocb *req, *n; - struct aio_ring *ring; - - to_alloc = min(batch->count, KIOCB_BATCH_SIZE); - for (allocated = 0; allocated < to_alloc; allocated++) { - req = __aio_get_req(ctx); - if (!req) - /* allocation failed, go with what we've got */ - break; - list_add(&req->ki_batch, &batch->head); - } - - if (allocated == 0) - goto out; + do { + if (avail < ctx->req_batch) + goto out; - spin_lock_irq(&ctx->ctx_lock); - ring = kmap_atomic(ctx->ring_info.ring_pages[0]); - - avail = aio_ring_avail(&ctx->ring_info, ring) - ctx->reqs_active; - BUG_ON(avail < 0); - if (avail < allocated) { - /* Trim back the number of requests. */ - list_for_each_entry_safe(req, n, &batch->head, ki_batch) { - list_del(&req->ki_batch); - kmem_cache_free(kiocb_cachep, req); - if (--allocated <= avail) - break; - } - } + old = avail; + avail = atomic_cmpxchg(&ctx->reqs_available, + avail, avail - ctx->req_batch); + } while (avail != old); - batch->count -= allocated; - list_for_each_entry(req, &batch->head, ki_batch) { - list_add(&req->ki_list, &ctx->active_reqs); - ctx->reqs_active++; + kcpu->reqs_available += ctx->req_batch; } - kunmap_atomic(ring); - spin_unlock_irq(&ctx->ctx_lock); - + ret = true; + kcpu->reqs_available--; out: - return allocated; + preempt_enable(); + return ret; } -static inline struct kiocb *aio_get_req(struct kioctx *ctx, - struct kiocb_batch *batch) +/* aio_get_req + * Allocate a slot for an aio request. Increments the ki_users count + * of the kioctx so that the kioctx stays around until all requests are + * complete. Returns NULL if no requests are free. + * + * Returns with kiocb->ki_users set to 2. The io submit code path holds + * an extra reference while submitting the i/o. + * This prevents races between the aio code path referencing the + * req (after submitting it) and aio_complete() freeing the req. + */ +static inline struct kiocb *aio_get_req(struct kioctx *ctx) { struct kiocb *req; - if (list_empty(&batch->head)) - if (kiocb_batch_refill(ctx, batch) == 0) - return NULL; - req = list_first_entry(&batch->head, struct kiocb, ki_batch); - list_del(&req->ki_batch); + if (!get_reqs_available(ctx)) + return NULL; + + req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO); + if (unlikely(!req)) + goto out_put; + + atomic_set(&req->ki_users, 2); + req->ki_ctx = ctx; return req; +out_put: + put_reqs_available(ctx, 1); + return NULL; } -static inline void really_put_req(struct kioctx *ctx, struct kiocb *req) +static void kiocb_free(struct kiocb *req) { - assert_spin_locked(&ctx->ctx_lock); - + if (req->ki_filp) + fput(req->ki_filp); if (req->ki_eventfd != NULL) eventfd_ctx_put(req->ki_eventfd); if (req->ki_dtor) @@ -542,48 +649,12 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req) if (req->ki_iovec != &req->ki_inline_vec) kfree(req->ki_iovec); kmem_cache_free(kiocb_cachep, req); - ctx->reqs_active--; - - if (unlikely(!ctx->reqs_active && ctx->dead)) - wake_up_all(&ctx->wait); } -/* __aio_put_req - * Returns true if this put was the last user of the request. - */ -static int __aio_put_req(struct kioctx *ctx, struct kiocb *req) +void aio_put_req(struct kiocb *req) { - dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n", - req, atomic_long_read(&req->ki_filp->f_count)); - - assert_spin_locked(&ctx->ctx_lock); - - req->ki_users--; - BUG_ON(req->ki_users < 0); - if (likely(req->ki_users)) - return 0; - list_del(&req->ki_list); /* remove from active_reqs */ - req->ki_cancel = NULL; - req->ki_retry = NULL; - - fput(req->ki_filp); - req->ki_filp = NULL; - really_put_req(ctx, req); - return 1; -} - -/* aio_put_req - * Returns true if this put was the last user of the kiocb, - * false if the request is still in use. - */ -int aio_put_req(struct kiocb *req) -{ - struct kioctx *ctx = req->ki_ctx; - int ret; - spin_lock_irq(&ctx->ctx_lock); - ret = __aio_put_req(ctx, req); - spin_unlock_irq(&ctx->ctx_lock); - return ret; + if (atomic_dec_and_test(&req->ki_users)) + kiocb_free(req); } EXPORT_SYMBOL(aio_put_req); @@ -595,13 +666,8 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id) rcu_read_lock(); hlist_for_each_entry_rcu(ctx, &mm->ioctx_list, list) { - /* - * RCU protects us against accessing freed memory but - * we have to be careful not to get a reference when the - * reference count already dropped to 0 (ctx->dead test - * is unreliable because of races). - */ - if (ctx->user_id == ctx_id && !ctx->dead && try_get_ioctx(ctx)){ + if (ctx->user_id == ctx_id) { + percpu_ref_get(&ctx->users); ret = ctx; break; } @@ -611,610 +677,332 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id) return ret; } -/* - * Queue up a kiocb to be retried. Assumes that the kiocb - * has already been marked as kicked, and places it on - * the retry run list for the corresponding ioctx, if it - * isn't already queued. Returns 1 if it actually queued - * the kiocb (to tell the caller to activate the work - * queue to process it), or 0, if it found that it was - * already queued. - */ -static inline int __queue_kicked_iocb(struct kiocb *iocb) +static inline unsigned kioctx_ring_put(struct kioctx *ctx, struct kiocb *req, + unsigned tail) { - struct kioctx *ctx = iocb->ki_ctx; - - assert_spin_locked(&ctx->ctx_lock); + struct io_event *ev_page, *event; + unsigned pos = tail + AIO_EVENTS_OFFSET; - if (list_empty(&iocb->ki_run_list)) { - list_add_tail(&iocb->ki_run_list, - &ctx->run_list); - return 1; - } - return 0; -} + if (++tail >= ctx->nr_events) + tail = 0; -/* aio_run_iocb - * This is the core aio execution routine. It is - * invoked both for initial i/o submission and - * subsequent retries via the aio_kick_handler. - * Expects to be invoked with iocb->ki_ctx->lock - * already held. The lock is released and reacquired - * as needed during processing. - * - * Calls the iocb retry method (already setup for the - * iocb on initial submission) for operation specific - * handling, but takes care of most of common retry - * execution details for a given iocb. The retry method - * needs to be non-blocking as far as possible, to avoid - * holding up other iocbs waiting to be serviced by the - * retry kernel thread. - * - * The trickier parts in this code have to do with - * ensuring that only one retry instance is in progress - * for a given iocb at any time. Providing that guarantee - * simplifies the coding of individual aio operations as - * it avoids various potential races. - */ -static ssize_t aio_run_iocb(struct kiocb *iocb) -{ - struct kioctx *ctx = iocb->ki_ctx; - ssize_t (*retry)(struct kiocb *); - ssize_t ret; + ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); + event = ev_page + pos % AIO_EVENTS_PER_PAGE; - if (!(retry = iocb->ki_retry)) { - printk("aio_run_iocb: iocb->ki_retry = NULL\n"); - return 0; - } + event->obj = (u64)(unsigned long)req->ki_obj.user; + event->data = req->ki_user_data; + event->res = req->ki_res; + event->res2 = req->ki_res2; - /* - * We don't want the next retry iteration for this - * operation to start until this one has returned and - * updated the iocb state. However, wait_queue functions - * can trigger a kick_iocb from interrupt context in the - * meantime, indicating that data is available for the next - * iteration. We want to remember that and enable the - * next retry iteration _after_ we are through with - * this one. - * - * So, in order to be able to register a "kick", but - * prevent it from being queued now, we clear the kick - * flag, but make the kick code *think* that the iocb is - * still on the run list until we are actually done. - * When we are done with this iteration, we check if - * the iocb was kicked in the meantime and if so, queue - * it up afresh. - */ + kunmap_atomic(ev_page); + flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); - kiocbClearKicked(iocb); + pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n", + ctx, tail, req, req->ki_obj.user, req->ki_user_data, + req->ki_res, req->ki_res2); - /* - * This is so that aio_complete knows it doesn't need to - * pull the iocb off the run list (We can't just call - * INIT_LIST_HEAD because we don't want a kick_iocb to - * queue this on the run list yet) - */ - iocb->ki_run_list.next = iocb->ki_run_list.prev = NULL; - spin_unlock_irq(&ctx->ctx_lock); + return tail; +} - /* Quit retrying if the i/o has been cancelled */ - if (kiocbIsCancelled(iocb)) { - ret = -EINTR; - aio_complete(iocb, ret, 0); - /* must not access the iocb after this */ - goto out; - } +static inline unsigned kioctx_ring_lock(struct kioctx *ctx) +{ + unsigned tail; /* - * Now we are all set to call the retry method in async - * context. + * ctx->tail is both our lock and the canonical version of the tail + * pointer. */ - ret = retry(iocb); - - if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) { - /* - * There's no easy way to restart the syscall since other AIO's - * may be already running. Just fail this IO with EINTR. - */ - if (unlikely(ret == -ERESTARTSYS || ret == -ERESTARTNOINTR || - ret == -ERESTARTNOHAND || ret == -ERESTART_RESTARTBLOCK)) - ret = -EINTR; - aio_complete(iocb, ret, 0); - } -out: - spin_lock_irq(&ctx->ctx_lock); - - if (-EIOCBRETRY == ret) { - /* - * OK, now that we are done with this iteration - * and know that there is more left to go, - * this is where we let go so that a subsequent - * "kick" can start the next iteration - */ + while ((tail = xchg(&ctx->tail, UINT_MAX)) == UINT_MAX) + cpu_relax(); - /* will make __queue_kicked_iocb succeed from here on */ - INIT_LIST_HEAD(&iocb->ki_run_list); - /* we must queue the next iteration ourselves, if it - * has already been kicked */ - if (kiocbIsKicked(iocb)) { - __queue_kicked_iocb(iocb); - - /* - * __queue_kicked_iocb will always return 1 here, because - * iocb->ki_run_list is empty at this point so it should - * be safe to unconditionally queue the context into the - * work queue. - */ - aio_queue_work(ctx); - } - } - return ret; + return tail; } -/* - * __aio_run_iocbs: - * Process all pending retries queued on the ioctx - * run list. - * Assumes it is operating within the aio issuer's mm - * context. - */ -static int __aio_run_iocbs(struct kioctx *ctx) +static inline void kioctx_ring_unlock(struct kioctx *ctx, unsigned tail) { - struct kiocb *iocb; - struct list_head run_list; + struct aio_ring *ring; - assert_spin_locked(&ctx->ctx_lock); + if (!ctx) + return; - list_replace_init(&ctx->run_list, &run_list); - while (!list_empty(&run_list)) { - iocb = list_entry(run_list.next, struct kiocb, - ki_run_list); - list_del(&iocb->ki_run_list); - /* - * Hold an extra reference while retrying i/o. - */ - iocb->ki_users++; /* grab extra reference */ - aio_run_iocb(iocb); - __aio_put_req(ctx, iocb); - } - if (!list_empty(&ctx->run_list)) - return 1; - return 0; -} + smp_wmb(); + /* make event visible before updating tail */ -static void aio_queue_work(struct kioctx * ctx) -{ - unsigned long timeout; - /* - * if someone is waiting, get the work started right - * away, otherwise, use a longer delay - */ - smp_mb(); - if (waitqueue_active(&ctx->wait)) - timeout = 1; - else - timeout = HZ/10; - queue_delayed_work(aio_wq, &ctx->wq, timeout); -} + ctx->shadow_tail = tail; -/* - * aio_run_all_iocbs: - * Process all pending retries queued on the ioctx - * run list, and keep running them until the list - * stays empty. - * Assumes it is operating within the aio issuer's mm context. - */ -static inline void aio_run_all_iocbs(struct kioctx *ctx) -{ - spin_lock_irq(&ctx->ctx_lock); - while (__aio_run_iocbs(ctx)) - ; - spin_unlock_irq(&ctx->ctx_lock); -} + ring = kmap_atomic(ctx->ring_pages[0]); + ring->tail = tail; + kunmap_atomic(ring); + flush_dcache_page(ctx->ring_pages[0]); -/* - * aio_kick_handler: - * Work queue handler triggered to process pending - * retries on an ioctx. Takes on the aio issuer's - * mm context before running the iocbs, so that - * copy_xxx_user operates on the issuer's address - * space. - * Run on aiod's context. - */ -static void aio_kick_handler(struct work_struct *work) -{ - struct kioctx *ctx = container_of(work, struct kioctx, wq.work); - mm_segment_t oldfs = get_fs(); - struct mm_struct *mm; - int requeue; + /* unlock, make new tail visible before checking waitlist */ + smp_mb(); - set_fs(USER_DS); - use_mm(ctx->mm); - spin_lock_irq(&ctx->ctx_lock); - requeue =__aio_run_iocbs(ctx); - mm = ctx->mm; - spin_unlock_irq(&ctx->ctx_lock); - unuse_mm(mm); - set_fs(oldfs); - /* - * we're in a worker thread already; no point using non-zero delay - */ - if (requeue) - queue_delayed_work(aio_wq, &ctx->wq, 0); -} + ctx->tail = tail; + if (waitqueue_active(&ctx->wait)) + wake_up(&ctx->wait); +} -/* - * Called by kick_iocb to queue the kiocb for retry - * and if required activate the aio work queue to process - * it - */ -static void try_queue_kicked_iocb(struct kiocb *iocb) +void batch_complete_aio(struct batch_complete *batch) { - struct kioctx *ctx = iocb->ki_ctx; + struct kioctx *ctx = NULL; + struct eventfd_ctx *eventfd = NULL; + struct rb_node *n; unsigned long flags; - int run = 0; + unsigned tail = 0; - spin_lock_irqsave(&ctx->ctx_lock, flags); - /* set this inside the lock so that we can't race with aio_run_iocb() - * testing it and putting the iocb on the run list under the lock */ - if (!kiocbTryKick(iocb)) - run = __queue_kicked_iocb(iocb); - spin_unlock_irqrestore(&ctx->ctx_lock, flags); - if (run) - aio_queue_work(ctx); -} - -/* - * kick_iocb: - * Called typically from a wait queue callback context - * to trigger a retry of the iocb. - * The retry is usually executed by aio workqueue - * threads (See aio_kick_handler). - */ -void kick_iocb(struct kiocb *iocb) -{ - /* sync iocbs are easy: they can only ever be executing from a - * single context. */ - if (is_sync_kiocb(iocb)) { - kiocbSetKicked(iocb); - wake_up_process(iocb->ki_obj.tsk); + if (RB_EMPTY_ROOT(&batch->kiocb)) return; - } - - try_queue_kicked_iocb(iocb); -} -EXPORT_SYMBOL(kick_iocb); - -/* aio_complete - * Called when the io request on the given iocb is complete. - * Returns true if this is the last user of the request. The - * only other user of the request can be the cancellation code. - */ -int aio_complete(struct kiocb *iocb, long res, long res2) -{ - struct kioctx *ctx = iocb->ki_ctx; - struct aio_ring_info *info; - struct aio_ring *ring; - struct io_event *event; - unsigned long flags; - unsigned long tail; - int ret; /* - * Special case handling for sync iocbs: - * - events go directly into the iocb for fast handling - * - the sync task with the iocb in its stack holds the single iocb - * ref, no other paths have a way to get another ref - * - the sync task helpfully left a reference to itself in the iocb + * Take rcu_read_lock() in case the kioctx is being destroyed, as we + * need to issue a wakeup after incrementing reqs_available. */ - if (is_sync_kiocb(iocb)) { - BUG_ON(iocb->ki_users != 1); - iocb->ki_user_data = res; - iocb->ki_users = 0; - wake_up_process(iocb->ki_obj.tsk); - return 1; - } - - info = &ctx->ring_info; + rcu_read_lock(); + local_irq_save(flags); - /* add a completion event to the ring buffer. - * must be done holding ctx->ctx_lock to prevent - * other code from messing with the tail - * pointer since we might be called from irq - * context. - */ - spin_lock_irqsave(&ctx->ctx_lock, flags); + n = rb_first(&batch->kiocb); + while (n) { + struct kiocb *req = container_of(n, struct kiocb, ki_node); - if (iocb->ki_run_list.prev && !list_empty(&iocb->ki_run_list)) - list_del_init(&iocb->ki_run_list); + if (n->rb_right) { + n->rb_right->__rb_parent_color = n->__rb_parent_color; + n = n->rb_right; - /* - * cancelled requests don't get events, userland was given one - * when the event got cancelled. - */ - if (kiocbIsCancelled(iocb)) - goto put_rq; + while (n->rb_left) + n = n->rb_left; + } else { + n = rb_parent(n); + } - ring = kmap_atomic(info->ring_pages[0]); + if (unlikely(req->ki_eventfd != eventfd)) { + if (eventfd) { + /* Make event visible */ + kioctx_ring_unlock(ctx, tail); + ctx = NULL; - tail = info->tail; - event = aio_ring_event(info, tail); - if (++tail >= info->nr) - tail = 0; - - event->obj = (u64)(unsigned long)iocb->ki_obj.user; - event->data = iocb->ki_user_data; - event->res = res; - event->res2 = res2; + eventfd_signal(eventfd, 1); + eventfd_ctx_put(eventfd); + } - dprintk("aio_complete: %p[%lu]: %p: %p %Lx %lx %lx\n", - ctx, tail, iocb, iocb->ki_obj.user, iocb->ki_user_data, - res, res2); + eventfd = req->ki_eventfd; + req->ki_eventfd = NULL; + } - /* after flagging the request as done, we - * must never even look at it again - */ - smp_wmb(); /* make event visible before updating tail */ + if (unlikely(req->ki_ctx != ctx)) { + kioctx_ring_unlock(ctx, tail); - info->tail = tail; - ring->tail = tail; + ctx = req->ki_ctx; + tail = kioctx_ring_lock(ctx); + } - put_aio_ring_event(event); - kunmap_atomic(ring); + tail = kioctx_ring_put(ctx, req, tail); + aio_put_req(req); + } - pr_debug("added to ring %p at [%lu]\n", iocb, tail); + kioctx_ring_unlock(ctx, tail); + local_irq_restore(flags); + rcu_read_unlock(); /* * Check if the user asked us to deliver the result through an * eventfd. The eventfd_signal() function is safe to be called * from IRQ context. */ - if (iocb->ki_eventfd != NULL) - eventfd_signal(iocb->ki_eventfd, 1); + if (eventfd) { + eventfd_signal(eventfd, 1); + eventfd_ctx_put(eventfd); + } +} +EXPORT_SYMBOL(batch_complete_aio); -put_rq: - /* everything turned out well, dispose of the aiocb. */ - ret = __aio_put_req(ctx, iocb); +/* aio_complete_batch + * Called when the io request on the given iocb is complete; @batch may be + * NULL. + */ +void aio_complete_batch(struct kiocb *req, long res, long res2, + struct batch_complete *batch) +{ + req->ki_res = res; + req->ki_res2 = res2; + + if (req->ki_list.next) { + struct kioctx *ctx = req->ki_ctx; + unsigned long flags; + + spin_lock_irqsave(&ctx->ctx_lock, flags); + list_del(&req->ki_list); + spin_unlock_irqrestore(&ctx->ctx_lock, flags); + } /* - * We have to order our ring_info tail store above and test - * of the wait list below outside the wait lock. This is - * like in wake_up_bit() where clearing a bit has to be - * ordered with the unlocked test. + * Special case handling for sync iocbs: + * - events go directly into the iocb for fast handling + * - the sync task with the iocb in its stack holds the single iocb + * ref, no other paths have a way to get another ref + * - the sync task helpfully left a reference to itself in the iocb */ - smp_mb(); + if (is_sync_kiocb(req)) { + BUG_ON(atomic_read(&req->ki_users) != 1); + req->ki_user_data = req->ki_res; + atomic_set(&req->ki_users, 0); + wake_up_process(req->ki_obj.tsk); + } else if (batch) { + int res; + struct kiocb *t; + struct rb_node **n = &batch->kiocb.rb_node, *parent = NULL; + + while (*n) { + parent = *n; + t = container_of(*n, struct kiocb, ki_node); + + res = req->ki_ctx != t->ki_ctx + ? req->ki_ctx < t->ki_ctx + : req->ki_eventfd != t->ki_eventfd + ? req->ki_eventfd < t->ki_eventfd + : req < t; + + n = res ? &(*n)->rb_left : &(*n)->rb_right; + } - if (waitqueue_active(&ctx->wait)) - wake_up(&ctx->wait); + rb_link_node(&req->ki_node, parent, n); + rb_insert_color(&req->ki_node, &batch->kiocb); + } else { + struct batch_complete batch_stack; - spin_unlock_irqrestore(&ctx->ctx_lock, flags); - return ret; + memset(&req->ki_node, 0, sizeof(req->ki_node)); + batch_stack.kiocb.rb_node = &req->ki_node; + + batch_complete_aio(&batch_stack); + } } -EXPORT_SYMBOL(aio_complete); +EXPORT_SYMBOL(aio_complete_batch); -/* aio_read_evt - * Pull an event off of the ioctx's event ring. Returns the number of - * events fetched (0 or 1 ;-) - * FIXME: make this use cmpxchg. - * TODO: make the ringbuffer user mmap()able (requires FIXME). +/* aio_read_events + * Pull an event off of the ioctx's event ring. Returns the number of + * events fetched */ -static int aio_read_evt(struct kioctx *ioctx, struct io_event *ent) +static long aio_read_events_ring(struct kioctx *ctx, + struct io_event __user *event, long nr) { - struct aio_ring_info *info = &ioctx->ring_info; struct aio_ring *ring; - unsigned long head; - int ret = 0; - - ring = kmap_atomic(info->ring_pages[0]); - dprintk("in aio_read_evt h%lu t%lu m%lu\n", - (unsigned long)ring->head, (unsigned long)ring->tail, - (unsigned long)ring->nr); - - if (ring->head == ring->tail) - goto out; + unsigned head, pos; + long ret = 0; + int copy_ret; - spin_lock(&info->ring_lock); - - head = ring->head % info->nr; - if (head != ring->tail) { - struct io_event *evp = aio_ring_event(info, head); - *ent = *evp; - head = (head + 1) % info->nr; - smp_mb(); /* finish reading the event before updatng the head */ - ring->head = head; - ret = 1; - put_aio_ring_event(evp); - } - spin_unlock(&info->ring_lock); + mutex_lock(&ctx->ring_lock); -out: + ring = kmap_atomic(ctx->ring_pages[0]); + head = ring->head; kunmap_atomic(ring); - dprintk("leaving aio_read_evt: %d h%lu t%lu\n", ret, - (unsigned long)ring->head, (unsigned long)ring->tail); - return ret; -} - -struct aio_timeout { - struct timer_list timer; - int timed_out; - struct task_struct *p; -}; -static void timeout_func(unsigned long data) -{ - struct aio_timeout *to = (struct aio_timeout *)data; + pr_debug("h%u t%u m%u\n", head, ctx->shadow_tail, ctx->nr_events); - to->timed_out = 1; - wake_up_process(to->p); -} + if (head == ctx->shadow_tail) + goto out; -static inline void init_timeout(struct aio_timeout *to) -{ - setup_timer_on_stack(&to->timer, timeout_func, (unsigned long) to); - to->timed_out = 0; - to->p = current; -} + while (ret < nr) { + long avail; + struct io_event *ev; + struct page *page; -static inline void set_timeout(long start_jiffies, struct aio_timeout *to, - const struct timespec *ts) -{ - to->timer.expires = start_jiffies + timespec_to_jiffies(ts); - if (time_after(to->timer.expires, jiffies)) - add_timer(&to->timer); - else - to->timed_out = 1; -} + avail = (head <= ctx->shadow_tail ? + ctx->shadow_tail : ctx->nr_events) - head; + if (head == ctx->shadow_tail) + break; -static inline void clear_timeout(struct aio_timeout *to) -{ - del_singleshot_timer_sync(&to->timer); -} + avail = min(avail, nr - ret); + avail = min_t(long, avail, AIO_EVENTS_PER_PAGE - + ((head + AIO_EVENTS_OFFSET) % AIO_EVENTS_PER_PAGE)); -static int read_events(struct kioctx *ctx, - long min_nr, long nr, - struct io_event __user *event, - struct timespec __user *timeout) -{ - long start_jiffies = jiffies; - struct task_struct *tsk = current; - DECLARE_WAITQUEUE(wait, tsk); - int ret; - int i = 0; - struct io_event ent; - struct aio_timeout to; - int retry = 0; - - /* needed to zero any padding within an entry (there shouldn't be - * any, but C is fun! - */ - memset(&ent, 0, sizeof(ent)); -retry: - ret = 0; - while (likely(i < nr)) { - ret = aio_read_evt(ctx, &ent); - if (unlikely(ret <= 0)) - break; + pos = head + AIO_EVENTS_OFFSET; + page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]; + pos %= AIO_EVENTS_PER_PAGE; - dprintk("read event: %Lx %Lx %Lx %Lx\n", - ent.data, ent.obj, ent.res, ent.res2); + ev = kmap(page); + copy_ret = copy_to_user(event + ret, ev + pos, + sizeof(*ev) * avail); + kunmap(page); - /* Could we split the check in two? */ - ret = -EFAULT; - if (unlikely(copy_to_user(event, &ent, sizeof(ent)))) { - dprintk("aio: lost an event due to EFAULT.\n"); - break; + if (unlikely(copy_ret)) { + ret = -EFAULT; + goto out; } - ret = 0; - /* Good, event copied to userland, update counts. */ - event ++; - i ++; + ret += avail; + head += avail; + head %= ctx->nr_events; } - if (min_nr <= i) - return i; - if (ret) - return ret; - - /* End fast path */ + ring = kmap_atomic(ctx->ring_pages[0]); + ring->head = head; + kunmap_atomic(ring); + flush_dcache_page(ctx->ring_pages[0]); - /* racey check, but it gets redone */ - if (!retry && unlikely(!list_empty(&ctx->run_list))) { - retry = 1; - aio_run_all_iocbs(ctx); - goto retry; - } + pr_debug("%li h%u t%u\n", ret, head, ctx->shadow_tail); - init_timeout(&to); - if (timeout) { - struct timespec ts; - ret = -EFAULT; - if (unlikely(copy_from_user(&ts, timeout, sizeof(ts)))) - goto out; - - set_timeout(start_jiffies, &to, &ts); - } + put_reqs_available(ctx, ret); +out: + mutex_unlock(&ctx->ring_lock); - while (likely(i < nr)) { - add_wait_queue_exclusive(&ctx->wait, &wait); - do { - set_task_state(tsk, TASK_INTERRUPTIBLE); - ret = aio_read_evt(ctx, &ent); - if (ret) - break; - if (min_nr <= i) - break; - if (unlikely(ctx->dead)) { - ret = -EINVAL; - break; - } - if (to.timed_out) /* Only check after read evt */ - break; - /* Try to only show up in io wait if there are ops - * in flight */ - if (ctx->reqs_active) - io_schedule(); - else - schedule(); - if (signal_pending(tsk)) { - ret = -EINTR; - break; - } - /*ret = aio_read_evt(ctx, &ent);*/ - } while (1) ; + return ret; +} - set_task_state(tsk, TASK_RUNNING); - remove_wait_queue(&ctx->wait, &wait); +static bool aio_read_events(struct kioctx *ctx, long min_nr, long nr, + struct io_event __user *event, long *i) +{ + long ret = aio_read_events_ring(ctx, event + *i, nr - *i); - if (unlikely(ret <= 0)) - break; + if (ret > 0) + *i += ret; - ret = -EFAULT; - if (unlikely(copy_to_user(event, &ent, sizeof(ent)))) { - dprintk("aio: lost an event due to EFAULT.\n"); - break; - } + if (unlikely(percpu_ref_dead(&ctx->users))) + ret = -EINVAL; - /* Good, event copied to userland, update counts. */ - event ++; - i ++; - } + if (!*i) + *i = ret; - if (timeout) - clear_timeout(&to); -out: - destroy_timer_on_stack(&to.timer); - return i ? i : ret; + return ret < 0 || *i >= min_nr; } -/* Take an ioctx and remove it from the list of ioctx's. Protects - * against races with itself via ->dead. - */ -static void io_destroy(struct kioctx *ioctx) +static long read_events(struct kioctx *ctx, long min_nr, long nr, + struct io_event __user *event, + struct timespec __user *timeout) { - struct mm_struct *mm = current->mm; - int was_dead; + ktime_t until = { .tv64 = KTIME_MAX }; + long ret = 0; - /* delete the entry from the list is someone else hasn't already */ - spin_lock(&mm->ioctx_lock); - was_dead = ioctx->dead; - ioctx->dead = 1; - hlist_del_rcu(&ioctx->list); - spin_unlock(&mm->ioctx_lock); + if (timeout) { + struct timespec ts; - dprintk("aio_release(%p)\n", ioctx); - if (likely(!was_dead)) - put_ioctx(ioctx); /* twice for the list */ + if (unlikely(copy_from_user(&ts, timeout, sizeof(ts)))) + return -EFAULT; - kill_ctx(ioctx); + until = timespec_to_ktime(ts); + } /* - * Wake up any waiters. The setting of ctx->dead must be seen - * by other CPUs at this point. Right now, we rely on the - * locking done by the above calls to ensure this consistency. + * Note that aio_read_events() is being called as the conditional - i.e. + * we're calling it after prepare_to_wait() has set task state to + * TASK_INTERRUPTIBLE. + * + * But aio_read_events() can block, and if it blocks it's going to flip + * the task state back to TASK_RUNNING. + * + * This should be ok, provided it doesn't flip the state back to + * TASK_RUNNING and return 0 too much - that causes us to spin. That + * will only happen if the mutex_lock() call blocks, and we then find + * the ringbuffer empty. So in practice we should be ok, but it's + * something to be aware of when touching this code. */ - wake_up_all(&ioctx->wait); + wait_event_interruptible_hrtimeout(ctx->wait, + aio_read_events(ctx, min_nr, nr, event, &ret), until); + + if (!ret && signal_pending(current)) + ret = -EINTR; + + return ret; } /* sys_io_setup: @@ -1252,7 +1040,7 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp) if (!IS_ERR(ioctx)) { ret = put_user(ioctx->user_id, ctxp); if (ret) - io_destroy(ioctx); + kill_ioctx(ioctx); put_ioctx(ioctx); } @@ -1270,7 +1058,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) { struct kioctx *ioctx = lookup_ioctx(ctx); if (likely(NULL != ioctx)) { - io_destroy(ioctx); + kill_ioctx(ioctx); put_ioctx(ioctx); return 0; } @@ -1301,30 +1089,21 @@ static void aio_advance_iovec(struct kiocb *iocb, ssize_t ret) BUG_ON(ret > 0 && iocb->ki_left == 0); } -static ssize_t aio_rw_vect_retry(struct kiocb *iocb) +typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *, + unsigned long, loff_t); + +static ssize_t aio_rw_vect_retry(struct kiocb *iocb, int rw, aio_rw_op *rw_op) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; - ssize_t (*rw_op)(struct kiocb *, const struct iovec *, - unsigned long, loff_t); ssize_t ret = 0; - unsigned short opcode; - - if ((iocb->ki_opcode == IOCB_CMD_PREADV) || - (iocb->ki_opcode == IOCB_CMD_PREAD)) { - rw_op = file->f_op->aio_read; - opcode = IOCB_CMD_PREADV; - } else { - rw_op = file->f_op->aio_write; - opcode = IOCB_CMD_PWRITEV; - } /* This matches the pread()/pwrite() logic */ if (iocb->ki_pos < 0) return -EINVAL; - if (opcode == IOCB_CMD_PWRITEV) + if (rw == WRITE) file_start_write(file); do { ret = rw_op(iocb, &iocb->ki_iovec[iocb->ki_cur_seg], @@ -1336,9 +1115,9 @@ static ssize_t aio_rw_vect_retry(struct kiocb *iocb) /* retry all partial writes. retry partial reads as long as its a * regular file. */ } while (ret > 0 && iocb->ki_left > 0 && - (opcode == IOCB_CMD_PWRITEV || + (rw == WRITE || (!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode)))); - if (opcode == IOCB_CMD_PWRITEV) + if (rw == WRITE) file_end_write(file); /* This means we must have transferred all that we could */ @@ -1348,81 +1127,49 @@ static ssize_t aio_rw_vect_retry(struct kiocb *iocb) /* If we managed to write some out we return that, rather than * the eventual error. */ - if (opcode == IOCB_CMD_PWRITEV - && ret < 0 && ret != -EIOCBQUEUED && ret != -EIOCBRETRY + if (rw == WRITE + && ret < 0 && ret != -EIOCBQUEUED && iocb->ki_nbytes - iocb->ki_left) ret = iocb->ki_nbytes - iocb->ki_left; return ret; } -static ssize_t aio_fdsync(struct kiocb *iocb) -{ - struct file *file = iocb->ki_filp; - ssize_t ret = -EINVAL; - - if (file->f_op->aio_fsync) - ret = file->f_op->aio_fsync(iocb, 1); - return ret; -} - -static ssize_t aio_fsync(struct kiocb *iocb) -{ - struct file *file = iocb->ki_filp; - ssize_t ret = -EINVAL; - - if (file->f_op->aio_fsync) - ret = file->f_op->aio_fsync(iocb, 0); - return ret; -} - -static ssize_t aio_setup_vectored_rw(int type, struct kiocb *kiocb, bool compat) +static ssize_t aio_setup_vectored_rw(int rw, struct kiocb *kiocb, bool compat) { ssize_t ret; + kiocb->ki_nr_segs = kiocb->ki_nbytes; + #ifdef CONFIG_COMPAT if (compat) - ret = compat_rw_copy_check_uvector(type, + ret = compat_rw_copy_check_uvector(rw, (struct compat_iovec __user *)kiocb->ki_buf, - kiocb->ki_nbytes, 1, &kiocb->ki_inline_vec, + kiocb->ki_nr_segs, 1, &kiocb->ki_inline_vec, &kiocb->ki_iovec); else #endif - ret = rw_copy_check_uvector(type, + ret = rw_copy_check_uvector(rw, (struct iovec __user *)kiocb->ki_buf, - kiocb->ki_nbytes, 1, &kiocb->ki_inline_vec, + kiocb->ki_nr_segs, 1, &kiocb->ki_inline_vec, &kiocb->ki_iovec); if (ret < 0) - goto out; - - ret = rw_verify_area(type, kiocb->ki_filp, &kiocb->ki_pos, ret); - if (ret < 0) - goto out; + return ret; - kiocb->ki_nr_segs = kiocb->ki_nbytes; - kiocb->ki_cur_seg = 0; - /* ki_nbytes/left now reflect bytes instead of segs */ + /* ki_nbytes now reflect bytes instead of segs */ kiocb->ki_nbytes = ret; - kiocb->ki_left = ret; - - ret = 0; -out: - return ret; + return 0; } -static ssize_t aio_setup_single_vector(int type, struct file * file, struct kiocb *kiocb) +static ssize_t aio_setup_single_vector(int rw, struct kiocb *kiocb) { - int bytes; - - bytes = rw_verify_area(type, file, &kiocb->ki_pos, kiocb->ki_left); - if (bytes < 0) - return bytes; + if (unlikely(!access_ok(!rw, kiocb->ki_buf, kiocb->ki_nbytes))) + return -EFAULT; kiocb->ki_iovec = &kiocb->ki_inline_vec; kiocb->ki_iovec->iov_base = kiocb->ki_buf; - kiocb->ki_iovec->iov_len = bytes; + kiocb->ki_iovec->iov_len = kiocb->ki_nbytes; kiocb->ki_nr_segs = 1; - kiocb->ki_cur_seg = 0; return 0; } @@ -1431,96 +1178,95 @@ static ssize_t aio_setup_single_vector(int type, struct file * file, struct kioc * Performs the initial checks and aio retry method * setup for the kiocb at the time of io submission. */ -static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat) +static ssize_t aio_run_iocb(struct kiocb *req, bool compat) { - struct file *file = kiocb->ki_filp; - ssize_t ret = 0; + struct file *file = req->ki_filp; + ssize_t ret; + int rw; + fmode_t mode; + aio_rw_op *rw_op; - switch (kiocb->ki_opcode) { + switch (req->ki_opcode) { case IOCB_CMD_PREAD: - ret = -EBADF; - if (unlikely(!(file->f_mode & FMODE_READ))) - break; - ret = -EFAULT; - if (unlikely(!access_ok(VERIFY_WRITE, kiocb->ki_buf, - kiocb->ki_left))) - break; - ret = aio_setup_single_vector(READ, file, kiocb); - if (ret) - break; - ret = -EINVAL; - if (file->f_op->aio_read) - kiocb->ki_retry = aio_rw_vect_retry; - break; - case IOCB_CMD_PWRITE: - ret = -EBADF; - if (unlikely(!(file->f_mode & FMODE_WRITE))) - break; - ret = -EFAULT; - if (unlikely(!access_ok(VERIFY_READ, kiocb->ki_buf, - kiocb->ki_left))) - break; - ret = aio_setup_single_vector(WRITE, file, kiocb); - if (ret) - break; - ret = -EINVAL; - if (file->f_op->aio_write) - kiocb->ki_retry = aio_rw_vect_retry; - break; case IOCB_CMD_PREADV: - ret = -EBADF; - if (unlikely(!(file->f_mode & FMODE_READ))) - break; - ret = aio_setup_vectored_rw(READ, kiocb, compat); - if (ret) - break; - ret = -EINVAL; - if (file->f_op->aio_read) - kiocb->ki_retry = aio_rw_vect_retry; - break; + mode = FMODE_READ; + rw = READ; + rw_op = file->f_op->aio_read; + goto rw_common; + + case IOCB_CMD_PWRITE: case IOCB_CMD_PWRITEV: - ret = -EBADF; - if (unlikely(!(file->f_mode & FMODE_WRITE))) - break; - ret = aio_setup_vectored_rw(WRITE, kiocb, compat); + mode = FMODE_WRITE; + rw = WRITE; + rw_op = file->f_op->aio_write; + goto rw_common; +rw_common: + if (unlikely(!(file->f_mode & mode))) + return -EBADF; + + if (!rw_op) + return -EINVAL; + + ret = (req->ki_opcode == IOCB_CMD_PREADV || + req->ki_opcode == IOCB_CMD_PWRITEV) + ? aio_setup_vectored_rw(rw, req, compat) + : aio_setup_single_vector(rw, req); if (ret) - break; - ret = -EINVAL; - if (file->f_op->aio_write) - kiocb->ki_retry = aio_rw_vect_retry; + return ret; + + ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes); + if (ret < 0) + return ret; + + req->ki_nbytes = ret; + req->ki_left = ret; + + ret = aio_rw_vect_retry(req, rw, rw_op); break; + case IOCB_CMD_FDSYNC: - ret = -EINVAL; - if (file->f_op->aio_fsync) - kiocb->ki_retry = aio_fdsync; + if (!file->f_op->aio_fsync) + return -EINVAL; + + ret = file->f_op->aio_fsync(req, 1); break; + case IOCB_CMD_FSYNC: - ret = -EINVAL; - if (file->f_op->aio_fsync) - kiocb->ki_retry = aio_fsync; + if (!file->f_op->aio_fsync) + return -EINVAL; + + ret = file->f_op->aio_fsync(req, 0); break; + default: - dprintk("EINVAL: io_submit: no operation provided\n"); - ret = -EINVAL; + pr_debug("EINVAL: no operation provided\n"); + return -EINVAL; } - if (!kiocb->ki_retry) - return ret; + if (ret != -EIOCBQUEUED) { + /* + * There's no easy way to restart the syscall since other AIO's + * may be already running. Just fail this IO with EINTR. + */ + if (unlikely(ret == -ERESTARTSYS || ret == -ERESTARTNOINTR || + ret == -ERESTARTNOHAND || + ret == -ERESTART_RESTARTBLOCK)) + ret = -EINTR; + aio_complete(req, ret, 0); + } return 0; } static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, - struct iocb *iocb, struct kiocb_batch *batch, - bool compat) + struct iocb *iocb, bool compat) { struct kiocb *req; - struct file *file; ssize_t ret; /* enforce forwards compatibility on users */ if (unlikely(iocb->aio_reserved1 || iocb->aio_reserved2)) { - pr_debug("EINVAL: io_submit: reserve field set\n"); + pr_debug("EINVAL: reserve field set\n"); return -EINVAL; } @@ -1534,16 +1280,16 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, return -EINVAL; } - file = fget(iocb->aio_fildes); - if (unlikely(!file)) - return -EBADF; - - req = aio_get_req(ctx, batch); /* returns with 2 references to req */ - if (unlikely(!req)) { - fput(file); + req = aio_get_req(ctx); + if (unlikely(!req)) return -EAGAIN; + + req->ki_filp = fget(iocb->aio_fildes); + if (unlikely(!req->ki_filp)) { + ret = -EBADF; + goto out_put_req; } - req->ki_filp = file; + if (iocb->aio_flags & IOCB_FLAG_RESFD) { /* * If the IOCB_FLAG_RESFD flag of aio_flags is set, get an @@ -1559,9 +1305,9 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, } } - ret = put_user(req->ki_key, &user_iocb->aio_key); + ret = put_user(KIOCB_KEY, &user_iocb->aio_key); if (unlikely(ret)) { - dprintk("EFAULT: aio_key\n"); + pr_debug("EFAULT: aio_key\n"); goto out_put_req; } @@ -1573,41 +1319,14 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, req->ki_left = req->ki_nbytes = iocb->aio_nbytes; req->ki_opcode = iocb->aio_lio_opcode; - ret = aio_setup_iocb(req, compat); - + ret = aio_run_iocb(req, compat); if (ret) goto out_put_req; - spin_lock_irq(&ctx->ctx_lock); - /* - * We could have raced with io_destroy() and are currently holding a - * reference to ctx which should be destroyed. We cannot submit IO - * since ctx gets freed as soon as io_submit() puts its reference. The - * check here is reliable: io_destroy() sets ctx->dead before waiting - * for outstanding IO and the barrier between these two is realized by - * unlock of mm->ioctx_lock and lock of ctx->ctx_lock. Analogously we - * increment ctx->reqs_active before checking for ctx->dead and the - * barrier is realized by unlock and lock of ctx->ctx_lock. Thus if we - * don't see ctx->dead set here, io_destroy() waits for our IO to - * finish. - */ - if (ctx->dead) { - spin_unlock_irq(&ctx->ctx_lock); - ret = -EINVAL; - goto out_put_req; - } - aio_run_iocb(req); - if (!list_empty(&ctx->run_list)) { - /* drain the run list */ - while (__aio_run_iocbs(ctx)) - ; - } - spin_unlock_irq(&ctx->ctx_lock); - aio_put_req(req); /* drop extra ref to req */ return 0; - out_put_req: + put_reqs_available(ctx, 1); aio_put_req(req); /* drop extra ref to req */ aio_put_req(req); /* drop i/o ref to req */ return ret; @@ -1620,7 +1339,6 @@ long do_io_submit(aio_context_t ctx_id, long nr, long ret = 0; int i = 0; struct blk_plug plug; - struct kiocb_batch batch; if (unlikely(nr < 0)) return -EINVAL; @@ -1633,12 +1351,10 @@ long do_io_submit(aio_context_t ctx_id, long nr, ctx = lookup_ioctx(ctx_id); if (unlikely(!ctx)) { - pr_debug("EINVAL: io_submit: invalid context id\n"); + pr_debug("EINVAL: invalid context id\n"); return -EINVAL; } - kiocb_batch_init(&batch, nr); - blk_start_plug(&plug); /* @@ -1659,13 +1375,12 @@ long do_io_submit(aio_context_t ctx_id, long nr, break; } - ret = io_submit_one(ctx, user_iocb, &tmp, &batch, compat); + ret = io_submit_one(ctx, user_iocb, &tmp, compat); if (ret) break; } blk_finish_plug(&plug); - kiocb_batch_free(ctx, &batch); put_ioctx(ctx); return i ? i : ret; } @@ -1698,10 +1413,13 @@ static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, assert_spin_locked(&ctx->ctx_lock); + if (key != KIOCB_KEY) + return NULL; + /* TODO: use a hash or array, this sucks. */ list_for_each(pos, &ctx->active_reqs) { struct kiocb *kiocb = list_kiocb(pos); - if (kiocb->ki_obj.user == iocb && kiocb->ki_key == key) + if (kiocb->ki_obj.user == iocb) return kiocb; } return NULL; @@ -1720,7 +1438,7 @@ static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, struct io_event __user *, result) { - int (*cancel)(struct kiocb *iocb, struct io_event *res); + struct io_event res; struct kioctx *ctx; struct kiocb *kiocb; u32 key; @@ -1735,32 +1453,22 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, return -EINVAL; spin_lock_irq(&ctx->ctx_lock); - ret = -EAGAIN; + kiocb = lookup_kiocb(ctx, iocb, key); - if (kiocb && kiocb->ki_cancel) { - cancel = kiocb->ki_cancel; - kiocb->ki_users ++; - kiocbSetCancelled(kiocb); - } else - cancel = NULL; + if (kiocb) + ret = kiocb_cancel(ctx, kiocb, &res); + else + ret = -EINVAL; + spin_unlock_irq(&ctx->ctx_lock); - if (NULL != cancel) { - struct io_event tmp; - pr_debug("calling cancel\n"); - memset(&tmp, 0, sizeof(tmp)); - tmp.obj = (u64)(unsigned long)kiocb->ki_obj.user; - tmp.data = kiocb->ki_user_data; - ret = cancel(kiocb, &tmp); - if (!ret) { - /* Cancellation succeeded -- copy the result - * into the user's buffer. - */ - if (copy_to_user(result, &tmp, sizeof(tmp))) - ret = -EFAULT; - } - } else - ret = -EINVAL; + if (!ret) { + /* Cancellation succeeded -- copy the result + * into the user's buffer. + */ + if (copy_to_user(result, &res, sizeof(res))) + ret = -EFAULT; + } put_ioctx(ctx); diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index bbc8f8827eac..14b7ea3c8f5e 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -62,7 +62,6 @@ static int aout_core_dump(struct coredump_params *cprm) fs = get_fs(); set_fs(KERNEL_DS); has_dumped = 1; - current->flags |= PF_DUMPCORE; strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm)); dump.u_ar0 = offsetof(struct user, regs); dump.signal = cprm->siginfo->si_signo; @@ -256,8 +255,6 @@ static int load_aout_binary(struct linux_binprm * bprm) (current->mm->start_data = N_DATADDR(ex)); current->mm->brk = ex.a_bss + (current->mm->start_brk = N_BSSADDR(ex)); - current->mm->free_area_cache = current->mm->mmap_base; - current->mm->cached_hole_size = 0; retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT); if (retval < 0) { diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 86af964c2425..95698cd60a1d 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -140,6 +140,25 @@ static int padzero(unsigned long elf_bss) #define ELF_BASE_PLATFORM NULL #endif +/* + * Use get_random_int() to implement AT_RANDOM while avoiding depletion + * of the entropy pool. + */ +static void get_atrandom_bytes(unsigned char *buf, size_t nbytes) +{ + unsigned char *p = buf; + + while (nbytes) { + unsigned int random_variable; + size_t chunk = min(nbytes, sizeof(random_variable)); + + random_variable = get_random_int(); + memcpy(p, &random_variable, chunk); + p += chunk; + nbytes -= chunk; + } +} + static int create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, unsigned long load_addr, unsigned long interp_load_addr) @@ -201,7 +220,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, /* * Generate 16 random bytes for userspace PRNG seeding. */ - get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes)); + get_atrandom_bytes(k_rand_bytes, sizeof(k_rand_bytes)); u_rand_bytes = (elf_addr_t __user *) STACK_ALLOC(p, sizeof(k_rand_bytes)); if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes))) @@ -735,8 +754,6 @@ static int load_elf_binary(struct linux_binprm *bprm) /* Do this so that we can load the interpreter, if need be. We will change some of these later */ - current->mm->free_area_cache = current->mm->mmap_base; - current->mm->cached_hole_size = 0; retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP), executable_stack); if (retval < 0) { @@ -803,7 +820,8 @@ static int load_elf_binary(struct linux_binprm *bprm) * follow the loader, and is not movable. */ #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE /* Memory randomization might have been switched off - * in runtime via sysctl. + * in runtime via sysctl or explicit setting of + * personality flags. * If that is the case, retain the original non-zero * load_bias value in order to establish proper * non-randomized mappings. @@ -2091,8 +2109,7 @@ static int elf_core_dump(struct coredump_params *cprm) goto cleanup; has_dumped = 1; - current->flags |= PF_DUMPCORE; - + fs = get_fs(); set_fs(KERNEL_DS); diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 9c13e023e2b7..c1cc06aed601 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1687,8 +1687,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) fill_elf_fdpic_header(elf, e_phnum); has_dumped = 1; - current->flags |= PF_DUMPCORE; - /* * Set up the notes in similar form to SVR4 core dumps made * with info from their /proc. diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 751df5e4f61a..1c740e152f38 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -23,6 +23,7 @@ #include <linux/binfmts.h> #include <linux/slab.h> #include <linux/ctype.h> +#include <linux/string_helpers.h> #include <linux/file.h> #include <linux/pagemap.h> #include <linux/namei.h> @@ -234,24 +235,6 @@ static char *scanarg(char *s, char del) return s; } -static int unquote(char *from) -{ - char c = 0, *s = from, *p = from; - - while ((c = *s++) != '\0') { - if (c == '\\' && *s == 'x') { - s++; - c = toupper(*s++); - *p = (c - (isdigit(c) ? '0' : 'A' - 10)) << 4; - c = toupper(*s++); - *p++ |= c - (isdigit(c) ? '0' : 'A' - 10); - continue; - } - *p++ = c; - } - return p - from; -} - static char * check_special_flags (char * sfs, Node * e) { char * p = sfs; @@ -354,8 +337,9 @@ static Node *create_entry(const char __user *buffer, size_t count) p[-1] = '\0'; if (!e->mask[0]) e->mask = NULL; - e->size = unquote(e->magic); - if (e->mask && unquote(e->mask) != e->size) + e->size = string_unescape_inplace(e->magic, UNESCAPE_HEX); + if (e->mask && + string_unescape_inplace(e->mask, UNESCAPE_HEX) != e->size) goto Einval; if (e->size + e->offset > BINPRM_BUF_SIZE) goto Einval; diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 8fb42916d8a2..69f6f802b09e 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -510,7 +510,8 @@ static void bio_integrity_verify_fn(struct work_struct *work) * in process context. This function postpones completion * accordingly. */ -void bio_integrity_endio(struct bio *bio, int error) +void bio_integrity_endio(struct bio *bio, int error, + struct batch_complete *batch) { struct bio_integrity_payload *bip = bio->bi_integrity; @@ -19,6 +19,7 @@ #include <linux/swap.h> #include <linux/bio.h> #include <linux/blkdev.h> +#include <linux/uio.h> #include <linux/iocontext.h> #include <linux/slab.h> #include <linux/init.h> @@ -27,6 +28,7 @@ #include <linux/mempool.h> #include <linux/workqueue.h> #include <linux/cgroup.h> +#include <linux/aio.h> #include <scsi/sg.h> /* for struct sg_iovec */ #include <trace/events/block.h> @@ -759,7 +761,8 @@ struct submit_bio_ret { int error; }; -static void submit_bio_wait_endio(struct bio *bio, int error) +static void submit_bio_wait_endio(struct bio *bio, int error, + struct batch_complete *batch) { struct submit_bio_ret *ret = bio->bi_private; @@ -1413,7 +1416,8 @@ void bio_unmap_user(struct bio *bio) } EXPORT_SYMBOL(bio_unmap_user); -static void bio_map_kern_endio(struct bio *bio, int err) +static void bio_map_kern_endio(struct bio *bio, int err, + struct batch_complete *batch) { bio_put(bio); } @@ -1485,7 +1489,8 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len, } EXPORT_SYMBOL(bio_map_kern); -static void bio_copy_kern_endio(struct bio *bio, int err) +static void bio_copy_kern_endio(struct bio *bio, int err, + struct batch_complete *batch) { struct bio_vec *bvec; const int read = bio_data_dir(bio) == READ; @@ -1684,31 +1689,40 @@ void bio_flush_dcache_pages(struct bio *bi) EXPORT_SYMBOL(bio_flush_dcache_pages); #endif -/** - * bio_endio - end I/O on a bio - * @bio: bio - * @error: error, if any - * - * Description: - * bio_endio() will end I/O on the whole bio. bio_endio() is the - * preferred way to end I/O on a bio, it takes care of clearing - * BIO_UPTODATE on error. @error is 0 on success, and and one of the - * established -Exxxx (-EIO, for instance) error values in case - * something went wrong. No one should call bi_end_io() directly on a - * bio unless they own it and thus know that it has an end_io - * function. - **/ -void bio_endio(struct bio *bio, int error) +static inline void __bio_endio(struct bio *bio, struct batch_complete *batch) { - if (error) + if (bio->bi_error) clear_bit(BIO_UPTODATE, &bio->bi_flags); else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) - error = -EIO; + bio->bi_error = -EIO; if (bio->bi_end_io) - bio->bi_end_io(bio, error); + bio->bi_end_io(bio, bio->bi_error, batch); +} + +void bio_endio_batch(struct bio *bio, int error, struct batch_complete *batch) +{ + if (error) + bio->bi_error = error; + + if (batch) + bio_list_add(&batch->bio, bio); + else + __bio_endio(bio, batch); + +} +EXPORT_SYMBOL(bio_endio_batch); + +void batch_complete(struct batch_complete *batch) +{ + struct bio *bio; + + while ((bio = bio_list_pop(&batch->bio))) + __bio_endio(bio, batch); + + batch_complete_aio(batch); } -EXPORT_SYMBOL(bio_endio); +EXPORT_SYMBOL(batch_complete); void bio_pair_release(struct bio_pair *bp) { @@ -1721,7 +1735,8 @@ void bio_pair_release(struct bio_pair *bp) } EXPORT_SYMBOL(bio_pair_release); -static void bio_pair_end_1(struct bio *bi, int err) +static void bio_pair_end_1(struct bio *bi, int err, + struct batch_complete *batch) { struct bio_pair *bp = container_of(bi, struct bio_pair, bio1); @@ -1731,7 +1746,8 @@ static void bio_pair_end_1(struct bio *bi, int err) bio_pair_release(bp); } -static void bio_pair_end_2(struct bio *bi, int err) +static void bio_pair_end_2(struct bio *bi, int err, + struct batch_complete *batch) { struct bio_pair *bp = container_of(bi, struct bio_pair, bio2); diff --git a/fs/block_dev.c b/fs/block_dev.c index aae187a7f94a..95ff88b54e98 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -27,6 +27,7 @@ #include <linux/namei.h> #include <linux/log2.h> #include <linux/cleancache.h> +#include <linux/aio.h> #include <asm/uaccess.h> #include "internal.h" @@ -617,11 +618,9 @@ void bd_forget(struct inode *inode) struct block_device *bdev = NULL; spin_lock(&bdev_lock); - if (inode->i_bdev) { - if (!sb_is_blkdev_sb(inode->i_sb)) - bdev = inode->i_bdev; - __bd_forget(inode); - } + if (!sb_is_blkdev_sb(inode->i_sb)) + bdev = inode->i_bdev; + __bd_forget(inode); spin_unlock(&bdev_lock); if (bdev) diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 18af6f48781a..3c617b3244c0 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -323,7 +323,8 @@ static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx); static int btrfsic_read_block(struct btrfsic_state *state, struct btrfsic_block_data_ctx *block_ctx); static void btrfsic_dump_database(struct btrfsic_state *state); -static void btrfsic_complete_bio_end_io(struct bio *bio, int err); +static void btrfsic_complete_bio_end_io(struct bio *bio, int err, + struct batch_complete *batch); static int btrfsic_test_for_metadata(struct btrfsic_state *state, char **datav, unsigned int num_pages); static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, @@ -336,7 +337,8 @@ static int btrfsic_process_written_superblock( struct btrfsic_state *state, struct btrfsic_block *const block, struct btrfs_super_block *const super_hdr); -static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status); +static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status, + struct batch_complete *batch); static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate); static int btrfsic_is_block_ref_by_superblock(const struct btrfsic_state *state, const struct btrfsic_block *block, @@ -1751,7 +1753,8 @@ static int btrfsic_read_block(struct btrfsic_state *state, return block_ctx->len; } -static void btrfsic_complete_bio_end_io(struct bio *bio, int err) +static void btrfsic_complete_bio_end_io(struct bio *bio, int err, + struct batch_complete *batch) { complete((struct completion *)bio->bi_private); } @@ -2294,7 +2297,8 @@ continue_loop: goto again; } -static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status) +static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status, + struct batch_complete *batch) { struct btrfsic_block *block = (struct btrfsic_block *)bp->bi_private; int iodone_w_error; @@ -2342,7 +2346,7 @@ static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status) block = next_block; } while (NULL != block); - bp->bi_end_io(bp, bio_error_status); + bp->bi_end_io(bp, bio_error_status, batch); } static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 15b94089abc4..74ae115edba0 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -153,7 +153,8 @@ fail: * The compressed pages are freed here, and it must be run * in process context */ -static void end_compressed_bio_read(struct bio *bio, int err) +static void end_compressed_bio_read(struct bio *bio, int err, + struct batch_complete *batch) { struct compressed_bio *cb = bio->bi_private; struct inode *inode; @@ -263,7 +264,8 @@ static noinline void end_compressed_writeback(struct inode *inode, u64 start, * This also calls the writeback end hooks for the file pages so that * metadata and checksums can be updated in the file. */ -static void end_compressed_bio_write(struct bio *bio, int err) +static void end_compressed_bio_write(struct bio *bio, int err, + struct batch_complete *batch) { struct extent_io_tree *tree; struct compressed_bio *cb = bio->bi_private; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6d19a0a554aa..8e7250029f64 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -669,7 +669,8 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror) return -EIO; /* we fixed nothing */ } -static void end_workqueue_bio(struct bio *bio, int err) +static void end_workqueue_bio(struct bio *bio, int err, + struct batch_complete *batch) { struct end_io_wq *end_io_wq = bio->bi_private; struct btrfs_fs_info *fs_info; @@ -2957,7 +2958,8 @@ static int write_dev_supers(struct btrfs_device *device, * endio for the write_dev_flush, this will wake anyone waiting * for the barrier when it is done */ -static void btrfs_end_empty_barrier(struct bio *bio, int err) +static void btrfs_end_empty_barrier(struct bio *bio, int err, + struct batch_complete *batch) { if (err) { if (err == -EOPNOTSUPP) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 73f2bfe3ac93..fbf0d44851e4 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1937,7 +1937,8 @@ static int free_io_failure(struct inode *inode, struct io_failure_record *rec, return err; } -static void repair_io_failure_callback(struct bio *bio, int err) +static void repair_io_failure_callback(struct bio *bio, int err, + struct batch_complete *batch) { complete(bio->bi_private); } @@ -2317,7 +2318,8 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end) * Scheduling is not allowed, so the extent state tree is expected * to have one and only one object corresponding to this IO. */ -static void end_bio_extent_writepage(struct bio *bio, int err) +static void end_bio_extent_writepage(struct bio *bio, int err, + struct batch_complete *batch) { struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; struct extent_io_tree *tree; @@ -2363,7 +2365,8 @@ static void end_bio_extent_writepage(struct bio *bio, int err) * Scheduling is not allowed, so the extent state tree is expected * to have one and only one object corresponding to this IO. */ -static void end_bio_extent_readpage(struct bio *bio, int err) +static void end_bio_extent_readpage(struct bio *bio, int err, + struct batch_complete *batch) { int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; @@ -3186,7 +3189,8 @@ static void end_extent_buffer_writeback(struct extent_buffer *eb) wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK); } -static void end_bio_extent_buffer_writepage(struct bio *bio, int err) +static void end_bio_extent_buffer_writepage(struct bio *bio, int err, + struct batch_complete *batch) { int uptodate = err == 0; struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index bb8b7a0e28a6..bc4d54c465a0 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -24,6 +24,7 @@ #include <linux/string.h> #include <linux/backing-dev.h> #include <linux/mpage.h> +#include <linux/aio.h> #include <linux/falloc.h> #include <linux/swap.h> #include <linux/writeback.h> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 09c58a35b429..00313057e444 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -32,6 +32,7 @@ #include <linux/writeback.h> #include <linux/statfs.h> #include <linux/compat.h> +#include <linux/aio.h> #include <linux/bit_spinlock.h> #include <linux/xattr.h> #include <linux/posix_acl.h> @@ -6913,7 +6914,8 @@ struct btrfs_dio_private { struct bio *orig_bio; }; -static void btrfs_endio_direct_read(struct bio *bio, int err) +static void btrfs_endio_direct_read(struct bio *bio, int err, + struct batch_complete *batch) { struct btrfs_dio_private *dip = bio->bi_private; struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; @@ -6967,10 +6969,11 @@ failed: /* If we had a csum failure make sure to clear the uptodate flag */ if (err) clear_bit(BIO_UPTODATE, &bio->bi_flags); - dio_end_io(bio, err); + dio_end_io(bio, err, batch); } -static void btrfs_endio_direct_write(struct bio *bio, int err) +static void btrfs_endio_direct_write(struct bio *bio, int err, + struct batch_complete *batch) { struct btrfs_dio_private *dip = bio->bi_private; struct inode *inode = dip->inode; @@ -7012,7 +7015,7 @@ out_done: /* If we had an error make sure to clear the uptodate flag */ if (err) clear_bit(BIO_UPTODATE, &bio->bi_flags); - dio_end_io(bio, err); + dio_end_io(bio, err, batch); } static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw, @@ -7026,7 +7029,8 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw, return 0; } -static void btrfs_end_dio_bio(struct bio *bio, int err) +static void btrfs_end_dio_bio(struct bio *bio, int err, + struct batch_complete *batch) { struct btrfs_dio_private *dip = bio->bi_private; diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 9a79fb790adb..6df1ac8d0adf 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -850,7 +850,8 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err, int uptodate) * end io function used by finish_rmw. When we finally * get here, we've written a full stripe */ -static void raid_write_end_io(struct bio *bio, int err) +static void raid_write_end_io(struct bio *bio, int err, + struct batch_complete *batch) { struct btrfs_raid_bio *rbio = bio->bi_private; @@ -1384,7 +1385,8 @@ static void set_bio_pages_uptodate(struct bio *bio) * This will usually kick off finish_rmw once all the bios are read in, but it * may trigger parity reconstruction if we had any errors along the way */ -static void raid_rmw_end_io(struct bio *bio, int err) +static void raid_rmw_end_io(struct bio *bio, int err, + struct batch_complete *batch) { struct btrfs_raid_bio *rbio = bio->bi_private; @@ -1905,7 +1907,8 @@ cleanup_io: * This is called only for stripes we've read from disk to * reconstruct the parity. */ -static void raid_recover_end_io(struct bio *bio, int err) +static void raid_recover_end_io(struct bio *bio, int err, + struct batch_complete *batch) { struct btrfs_raid_bio *rbio = bio->bi_private; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 85e072b956d5..fc29a119436a 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -200,7 +200,8 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, int is_metadata, int have_csum, const u8 *csum, u64 generation, u16 csum_size); -static void scrub_complete_bio_end_io(struct bio *bio, int err); +static void scrub_complete_bio_end_io(struct bio *bio, int err, + struct batch_complete *batch); static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, struct scrub_block *sblock_good, int force_write); @@ -223,7 +224,8 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len, u64 physical, struct btrfs_device *dev, u64 flags, u64 gen, int mirror_num, u8 *csum, int force, u64 physical_for_dev_replace); -static void scrub_bio_end_io(struct bio *bio, int err); +static void scrub_bio_end_io(struct bio *bio, int err, + struct batch_complete *batch); static void scrub_bio_end_io_worker(struct btrfs_work *work); static void scrub_block_complete(struct scrub_block *sblock); static void scrub_remap_extent(struct btrfs_fs_info *fs_info, @@ -240,7 +242,8 @@ static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx); static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx, struct scrub_page *spage); static void scrub_wr_submit(struct scrub_ctx *sctx); -static void scrub_wr_bio_end_io(struct bio *bio, int err); +static void scrub_wr_bio_end_io(struct bio *bio, int err, + struct batch_complete *batch); static void scrub_wr_bio_end_io_worker(struct btrfs_work *work); static int write_page_nocow(struct scrub_ctx *sctx, u64 physical_for_dev_replace, struct page *page); @@ -1386,7 +1389,8 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, sblock->checksum_error = 1; } -static void scrub_complete_bio_end_io(struct bio *bio, int err) +static void scrub_complete_bio_end_io(struct bio *bio, int err, + struct batch_complete *batch) { complete((struct completion *)bio->bi_private); } @@ -1586,7 +1590,8 @@ static void scrub_wr_submit(struct scrub_ctx *sctx) btrfsic_submit_bio(WRITE, sbio->bio); } -static void scrub_wr_bio_end_io(struct bio *bio, int err) +static void scrub_wr_bio_end_io(struct bio *bio, int err, + struct batch_complete *batch) { struct scrub_bio *sbio = bio->bi_private; struct btrfs_fs_info *fs_info = sbio->dev->dev_root->fs_info; @@ -2056,7 +2061,8 @@ leave_nomem: return 0; } -static void scrub_bio_end_io(struct bio *bio, int err) +static void scrub_bio_end_io(struct bio *bio, int err, + struct batch_complete *batch) { struct scrub_bio *sbio = bio->bi_private; struct btrfs_fs_info *fs_info = sbio->dev->dev_root->fs_info; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 678977226570..0182898a6c88 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5029,7 +5029,8 @@ static unsigned int extract_stripe_index_from_bio_private(void *bi_private) return (unsigned int)((uintptr_t)bi_private) & 3; } -static void btrfs_end_bio(struct bio *bio, int err) +static void btrfs_end_bio(struct bio *bio, int err, + struct batch_complete *batch) { struct btrfs_bio *bbio = extract_bbio_from_bio_private(bio->bi_private); int is_orig_bio = 0; @@ -5086,7 +5087,7 @@ static void btrfs_end_bio(struct bio *bio, int err) } kfree(bbio); - bio_endio(bio, err); + bio_endio_batch(bio, err, batch); } else if (!is_orig_bio) { bio_put(bio); } diff --git a/fs/buffer.c b/fs/buffer.c index ecd3792ae0e9..52ca25d814dd 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2884,7 +2884,8 @@ sector_t generic_block_bmap(struct address_space *mapping, sector_t block, } EXPORT_SYMBOL(generic_block_bmap); -static void end_bio_bh_io_sync(struct bio *bio, int err) +static void end_bio_bh_io_sync(struct bio *bio, int err, + struct batch_complete *batch) { struct buffer_head *bh = bio->bi_private; @@ -2949,7 +2950,7 @@ static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh) } } -int submit_bh(int rw, struct buffer_head * bh) +int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags) { struct bio *bio; int ret = 0; @@ -2983,6 +2984,7 @@ int submit_bh(int rw, struct buffer_head * bh) bio->bi_end_io = end_bio_bh_io_sync; bio->bi_private = bh; + bio->bi_flags |= bio_flags; /* Take care of bh's that straddle the end of the device */ guard_bh_eod(rw, bio, bh); @@ -2996,6 +2998,12 @@ int submit_bh(int rw, struct buffer_head * bh) bio_put(bio); return ret; } +EXPORT_SYMBOL_GPL(_submit_bh); + +int submit_bh(int rw, struct buffer_head *bh) +{ + return _submit_bh(rw, bh, 0); +} EXPORT_SYMBOL(submit_bh); /** diff --git a/fs/ceph/file.c b/fs/ceph/file.c index c639d9279fdd..013b0772dd24 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -7,6 +7,7 @@ #include <linux/mount.h> #include <linux/namei.h> #include <linux/writeback.h> +#include <linux/aio.h> #include "super.h" #include "mds_client.h" diff --git a/fs/compat.c b/fs/compat.c index d0560c93973d..d172b71b83ef 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -47,6 +47,7 @@ #include <linux/fs_struct.h> #include <linux/slab.h> #include <linux/pagemap.h> +#include <linux/aio.h> #include <asm/uaccess.h> #include <asm/mmu_context.h> diff --git a/fs/coredump.c b/fs/coredump.c index a987f3d39d93..a9abe313e8d5 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -263,7 +263,6 @@ static int zap_process(struct task_struct *start, int exit_code) struct task_struct *t; int nr = 0; - start->signal->flags = SIGNAL_GROUP_EXIT; start->signal->group_exit_code = exit_code; start->signal->group_stop_count = 0; @@ -280,8 +279,8 @@ static int zap_process(struct task_struct *start, int exit_code) return nr; } -static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, - struct core_state *core_state, int exit_code) +static int zap_threads(struct task_struct *tsk, struct mm_struct *mm, + struct core_state *core_state, int exit_code) { struct task_struct *g, *p; unsigned long flags; @@ -291,11 +290,16 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, if (!signal_group_exit(tsk->signal)) { mm->core_state = core_state; nr = zap_process(tsk, exit_code); + tsk->signal->group_exit_task = tsk; + /* ignore all signals except SIGKILL, see prepare_signal() */ + tsk->signal->flags = SIGNAL_GROUP_COREDUMP; + clear_tsk_thread_flag(tsk, TIF_SIGPENDING); } spin_unlock_irq(&tsk->sighand->siglock); if (unlikely(nr < 0)) return nr; + tsk->flags = PF_DUMPCORE; if (atomic_read(&mm->mm_users) == nr + 1) goto done; /* @@ -340,6 +344,7 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, if (unlikely(p->mm == mm)) { lock_task_sighand(p, &flags); nr += zap_process(p, exit_code); + p->signal->flags = SIGNAL_GROUP_EXIT; unlock_task_sighand(p, &flags); } break; @@ -386,11 +391,18 @@ static int coredump_wait(int exit_code, struct core_state *core_state) return core_waiters; } -static void coredump_finish(struct mm_struct *mm) +static void coredump_finish(struct mm_struct *mm, bool core_dumped) { struct core_thread *curr, *next; struct task_struct *task; + spin_lock_irq(¤t->sighand->siglock); + if (core_dumped && !__fatal_signal_pending(current)) + current->signal->group_exit_code |= 0x80; + current->signal->group_exit_task = NULL; + current->signal->flags = SIGNAL_GROUP_EXIT; + spin_unlock_irq(¤t->sighand->siglock); + next = mm->core_state->dumper.next; while ((curr = next) != NULL) { next = curr->next; @@ -407,6 +419,17 @@ static void coredump_finish(struct mm_struct *mm) mm->core_state = NULL; } +static bool dump_interrupted(void) +{ + /* + * SIGKILL or freezing() interrupt the coredumping. Perhaps we + * can do try_to_freeze() and check __fatal_signal_pending(), + * but then we need to teach dump_write() to restart and clear + * TIF_SIGPENDING. + */ + return signal_pending(current); +} + static void wait_for_dump_helpers(struct file *file) { struct pipe_inode_info *pipe = file->private_data; @@ -414,17 +437,20 @@ static void wait_for_dump_helpers(struct file *file) pipe_lock(pipe); pipe->readers++; pipe->writers--; + wake_up_interruptible_sync(&pipe->wait); + kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); + pipe_unlock(pipe); - while ((pipe->readers > 1) && (!signal_pending(current))) { - wake_up_interruptible_sync(&pipe->wait); - kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); - pipe_wait(pipe); - } + /* + * We actually want wait_event_freezable() but then we need + * to clear TIF_SIGPENDING and improve dump_interrupted(). + */ + wait_event_interruptible(pipe->wait, pipe->readers == 1); + pipe_lock(pipe); pipe->readers--; pipe->writers++; pipe_unlock(pipe); - } /* @@ -469,6 +495,7 @@ void do_coredump(siginfo_t *siginfo) int ispipe; struct files_struct *displaced; bool need_nonrelative = false; + bool core_dumped = false; static atomic_t core_dump_count = ATOMIC_INIT(0); struct coredump_params cprm = { .siginfo = siginfo, @@ -512,17 +539,12 @@ void do_coredump(siginfo_t *siginfo) old_cred = override_creds(cred); - /* - * Clear any false indication of pending signals that might - * be seen by the filesystem code called to write the core file. - */ - clear_thread_flag(TIF_SIGPENDING); - ispipe = format_corename(&cn, &cprm); - if (ispipe) { + if (ispipe) { int dump_count; char **helper_argv; + struct subprocess_info *sub_info; if (ispipe < 0) { printk(KERN_WARNING "format_corename failed\n"); @@ -569,15 +591,20 @@ void do_coredump(siginfo_t *siginfo) goto fail_dropcount; } - retval = call_usermodehelper_fns(helper_argv[0], helper_argv, - NULL, UMH_WAIT_EXEC, umh_pipe_setup, - NULL, &cprm); + retval = -ENOMEM; + sub_info = call_usermodehelper_setup(helper_argv[0], + helper_argv, NULL, GFP_KERNEL, + umh_pipe_setup, NULL, &cprm); + if (sub_info) + retval = call_usermodehelper_exec(sub_info, + UMH_WAIT_EXEC); + argv_free(helper_argv); if (retval) { - printk(KERN_INFO "Core dump to %s pipe failed\n", + printk(KERN_INFO "Core dump to %s pipe failed\n", cn.corename); goto close_fail; - } + } } else { struct inode *inode; @@ -628,9 +655,7 @@ void do_coredump(siginfo_t *siginfo) if (displaced) put_files_struct(displaced); file_start_write(cprm.file); - retval = binfmt->core_dump(&cprm); - if (retval) - current->signal->group_exit_code |= 0x80; + core_dumped = !dump_interrupted() && binfmt->core_dump(&cprm); file_end_write(cprm.file); if (ispipe && core_pipe_limit) @@ -644,7 +669,7 @@ fail_dropcount: fail_unlock: kfree(cn.corename); fail_corename: - coredump_finish(mm); + coredump_finish(mm, core_dumped); revert_creds(old_cred); fail_creds: put_cred(cred); @@ -659,7 +684,9 @@ fail: */ int dump_write(struct file *file, const void *addr, int nr) { - return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr; + return !dump_interrupted() && + access_ok(VERIFY_READ, addr, nr) && + file->f_op->write(file, addr, nr, &file->f_pos) == nr; } EXPORT_SYMBOL(dump_write); @@ -668,7 +695,8 @@ int dump_seek(struct file *file, loff_t off) int ret = 1; if (file->f_op->llseek && file->f_op->llseek != no_llseek) { - if (file->f_op->llseek(file, off, SEEK_CUR) < 0) + if (dump_interrupted() || + file->f_op->llseek(file, off, SEEK_CUR) < 0) return 0; } else { char *buf = (char *)get_zeroed_page(GFP_KERNEL); diff --git a/fs/dcache.c b/fs/dcache.c index 3f957c7e7143..a161ebcab9d2 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1217,8 +1217,10 @@ void shrink_dcache_parent(struct dentry * parent) LIST_HEAD(dispose); int found; - while ((found = select_parent(parent, &dispose)) != 0) + while ((found = select_parent(parent, &dispose)) != 0) { shrink_dentry_list(&dispose); + cond_resched(); + } } EXPORT_SYMBOL(shrink_dcache_parent); diff --git a/fs/direct-io.c b/fs/direct-io.c index 38484b08a39a..b4dd97c8cea3 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -37,6 +37,7 @@ #include <linux/uio.h> #include <linux/atomic.h> #include <linux/prefetch.h> +#include <linux/aio.h> /* * How many user pages to map in one call to get_user_pages(). This determines @@ -229,7 +230,8 @@ static inline struct page *dio_get_page(struct dio *dio, * filesystems can use it to hold additional state between get_block calls and * dio_complete. */ -static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, bool is_async) +static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, + bool is_async, struct batch_complete *batch) { ssize_t transferred = 0; @@ -263,7 +265,7 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, bool is } else { inode_dio_done(dio->inode); if (is_async) - aio_complete(dio->iocb, ret, 0); + aio_complete_batch(dio->iocb, ret, 0, batch); } return ret; @@ -273,7 +275,8 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio); /* * Asynchronous IO callback. */ -static void dio_bio_end_aio(struct bio *bio, int error) +static void dio_bio_end_aio(struct bio *bio, int error, + struct batch_complete *batch) { struct dio *dio = bio->bi_private; unsigned long remaining; @@ -289,7 +292,7 @@ static void dio_bio_end_aio(struct bio *bio, int error) spin_unlock_irqrestore(&dio->bio_lock, flags); if (remaining == 0) { - dio_complete(dio, dio->iocb->ki_pos, 0, true); + dio_complete(dio, dio->iocb->ki_pos, 0, true, batch); kmem_cache_free(dio_cache, dio); } } @@ -323,12 +326,12 @@ static void dio_bio_end_io(struct bio *bio, int error) * so that the DIO specific endio actions are dealt with after the filesystem * has done it's completion work. */ -void dio_end_io(struct bio *bio, int error) +void dio_end_io(struct bio *bio, int error, struct batch_complete *batch) { struct dio *dio = bio->bi_private; if (dio->is_async) - dio_bio_end_aio(bio, error); + dio_bio_end_aio(bio, error, batch); else dio_bio_end_io(bio, error); } @@ -349,10 +352,7 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio, bio->bi_bdev = bdev; bio->bi_sector = first_sector; - if (dio->is_async) - bio->bi_end_io = dio_bio_end_aio; - else - bio->bi_end_io = dio_bio_end_io; + bio->bi_end_io = dio_end_io; sdio->bio = bio; sdio->logical_offset_in_bio = sdio->cur_page_fs_offset; @@ -672,12 +672,6 @@ static inline int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio, if (sdio->final_block_in_bio != sdio->cur_page_block || cur_offset != bio_next_offset) dio_bio_submit(dio, sdio); - /* - * Submit now if the underlying fs is about to perform a - * metadata read - */ - else if (sdio->boundary) - dio_bio_submit(dio, sdio); } if (sdio->bio == NULL) { @@ -737,16 +731,6 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page, sdio->cur_page_block + (sdio->cur_page_len >> sdio->blkbits) == blocknr) { sdio->cur_page_len += len; - - /* - * If sdio->boundary then we want to schedule the IO now to - * avoid metadata seeks. - */ - if (sdio->boundary) { - ret = dio_send_cur_page(dio, sdio, map_bh); - page_cache_release(sdio->cur_page); - sdio->cur_page = NULL; - } goto out; } @@ -758,7 +742,7 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page, page_cache_release(sdio->cur_page); sdio->cur_page = NULL; if (ret) - goto out; + return ret; } page_cache_get(page); /* It is in dio */ @@ -768,6 +752,16 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page, sdio->cur_page_block = blocknr; sdio->cur_page_fs_offset = sdio->block_in_file << sdio->blkbits; out: + /* + * If sdio->boundary then we want to schedule the IO now to + * avoid metadata seeks. + */ + if (sdio->boundary) { + ret = dio_send_cur_page(dio, sdio, map_bh); + dio_bio_submit(dio, sdio); + page_cache_release(sdio->cur_page); + sdio->cur_page = NULL; + } return ret; } @@ -969,7 +963,8 @@ do_holes: this_chunk_bytes = this_chunk_blocks << blkbits; BUG_ON(this_chunk_bytes == 0); - sdio->boundary = buffer_boundary(map_bh); + if (this_chunk_blocks == sdio->blocks_available) + sdio->boundary = buffer_boundary(map_bh); ret = submit_page_section(dio, sdio, page, offset_in_page, this_chunk_bytes, @@ -1272,7 +1267,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, dio_await_completion(dio); if (drop_refcount(dio) == 0) { - retval = dio_complete(dio, offset, retval, false); + retval = dio_complete(dio, offset, retval, false, NULL); kmem_cache_free(dio_cache, dio); } else BUG_ON(retval != -EIOCBQUEUED); diff --git a/fs/drop_caches.c b/fs/drop_caches.c index c00e055b6282..f23d2a7ed438 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -58,6 +58,8 @@ int drop_caches_sysctl_handler(ctl_table *table, int write, if (ret) return ret; if (write) { + printk(KERN_NOTICE "%s (%d): dropped kernel caches: %d\n", + current->comm, task_pid_nr(current), sysctl_drop_caches); if (sysctl_drop_caches & 1) iterate_supers(drop_pagecache_sb, NULL); if (sysctl_drop_caches & 2) diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 63b1f54b6a1f..201f0a0d6b0a 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -31,6 +31,7 @@ #include <linux/security.h> #include <linux/compat.h> #include <linux/fs_stack.h> +#include <linux/aio.h> #include "ecryptfs_kernel.h" /** diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 495d15558f42..4ffbbcca5ffb 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -105,7 +105,7 @@ struct epoll_filefd { struct file *file; int fd; -}; +} __packed; /* * Structure used to track possible nested calls, for too deep recursions @@ -129,6 +129,8 @@ struct nested_calls { /* * Each file descriptor added to the eventpoll interface will * have an entry of this type linked to the "rbr" RB tree. + * Avoid increasing the size of this struct, there can be many thousands + * of these on a server and we do not want this to take another cache line. */ struct epitem { /* RB tree node used to link this structure to the eventpoll RB tree */ @@ -159,7 +161,7 @@ struct epitem { struct list_head fllink; /* wakeup_source used when EPOLLWAKEUP is set */ - struct wakeup_source *ws; + struct wakeup_source __rcu *ws; /* The structure that describe the interested events and the source fd */ struct epoll_event event; @@ -537,6 +539,38 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi) } } +/* call only when ep->mtx is held */ +static inline struct wakeup_source *ep_wakeup_source(struct epitem *epi) +{ + return rcu_dereference_check(epi->ws, lockdep_is_held(&epi->ep->mtx)); +} + +/* call only when ep->mtx is held */ +static inline void ep_pm_stay_awake(struct epitem *epi) +{ + struct wakeup_source *ws = ep_wakeup_source(epi); + + if (ws) + __pm_stay_awake(ws); +} + +static inline bool ep_has_wakeup_source(struct epitem *epi) +{ + return rcu_access_pointer(epi->ws) ? true : false; +} + +/* call when ep->mtx cannot be held (ep_poll_callback) */ +static inline void ep_pm_stay_awake_rcu(struct epitem *epi) +{ + struct wakeup_source *ws; + + rcu_read_lock(); + ws = rcu_dereference(epi->ws); + if (ws) + __pm_stay_awake(ws); + rcu_read_unlock(); +} + /** * ep_scan_ready_list - Scans the ready list in a way that makes possible for * the scan code, to call f_op->poll(). Also allows for @@ -600,7 +634,7 @@ static int ep_scan_ready_list(struct eventpoll *ep, */ if (!ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); - __pm_stay_awake(epi->ws); + ep_pm_stay_awake(epi); } } /* @@ -669,7 +703,7 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) list_del_init(&epi->rdllink); spin_unlock_irqrestore(&ep->lock, flags); - wakeup_source_unregister(epi->ws); + wakeup_source_unregister(ep_wakeup_source(epi)); /* At this point it is safe to free the eventpoll item */ kmem_cache_free(epi_cache, epi); @@ -712,11 +746,15 @@ static void ep_free(struct eventpoll *ep) * point we are sure no poll callbacks will be lingering around, and also by * holding "epmutex" we can be sure that no file cleanup code will hit * us during this operation. So we can avoid the lock on "ep->lock". + * We do not need to lock ep->mtx, either, we only do it to prevent + * a lockdep warning. */ + mutex_lock(&ep->mtx); while ((rbp = rb_first(&ep->rbr)) != NULL) { epi = rb_entry(rbp, struct epitem, rbn); ep_remove(ep, epi); } + mutex_unlock(&ep->mtx); mutex_unlock(&epmutex); mutex_destroy(&ep->mtx); @@ -735,6 +773,13 @@ static int ep_eventpoll_release(struct inode *inode, struct file *file) return 0; } +static inline unsigned int ep_item_poll(struct epitem *epi, poll_table *pt) +{ + pt->_key = epi->event.events; + + return epi->ffd.file->f_op->poll(epi->ffd.file, pt) & epi->event.events; +} + static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head, void *priv) { @@ -742,10 +787,9 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head, poll_table pt; init_poll_funcptr(&pt, NULL); + list_for_each_entry_safe(epi, tmp, head, rdllink) { - pt._key = epi->event.events; - if (epi->ffd.file->f_op->poll(epi->ffd.file, &pt) & - epi->event.events) + if (ep_item_poll(epi, &pt)) return POLLIN | POLLRDNORM; else { /* @@ -753,7 +797,7 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head, * callback, but it's not actually ready, as far as * caller requested events goes. We can remove it here. */ - __pm_relax(epi->ws); + __pm_relax(ep_wakeup_source(epi)); list_del_init(&epi->rdllink); } } @@ -985,7 +1029,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k /* If this file is already in the ready list we exit soon */ if (!ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); - __pm_stay_awake(epi->ws); + ep_pm_stay_awake_rcu(epi); } /* @@ -1147,6 +1191,7 @@ static int reverse_path_check(void) static int ep_create_wakeup_source(struct epitem *epi) { const char *name; + struct wakeup_source *ws; if (!epi->ep->ws) { epi->ep->ws = wakeup_source_register("eventpoll"); @@ -1155,17 +1200,29 @@ static int ep_create_wakeup_source(struct epitem *epi) } name = epi->ffd.file->f_path.dentry->d_name.name; - epi->ws = wakeup_source_register(name); - if (!epi->ws) + ws = wakeup_source_register(name); + + if (!ws) return -ENOMEM; + rcu_assign_pointer(epi->ws, ws); return 0; } -static void ep_destroy_wakeup_source(struct epitem *epi) +/* rare code path, only used when EPOLL_CTL_MOD removes a wakeup source */ +static noinline void ep_destroy_wakeup_source(struct epitem *epi) { - wakeup_source_unregister(epi->ws); - epi->ws = NULL; + struct wakeup_source *ws = ep_wakeup_source(epi); + + rcu_assign_pointer(epi->ws, NULL); + + /* + * wait for ep_pm_stay_awake_rcu to finish, synchronize_rcu is + * used internally by wakeup_source_remove, too (called by + * wakeup_source_unregister), so we cannot use call_rcu + */ + synchronize_rcu(); + wakeup_source_unregister(ws); } /* @@ -1200,13 +1257,12 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, if (error) goto error_create_wakeup_source; } else { - epi->ws = NULL; + RCU_INIT_POINTER(epi->ws, NULL); } /* Initialize the poll table using the queue callback */ epq.epi = epi; init_poll_funcptr(&epq.pt, ep_ptable_queue_proc); - epq.pt._key = event->events; /* * Attach the item to the poll hooks and get current event bits. @@ -1215,7 +1271,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, * this operation completes, the poll callback can start hitting * the new item. */ - revents = tfile->f_op->poll(tfile, &epq.pt); + revents = ep_item_poll(epi, &epq.pt); /* * We have to check if something went wrong during the poll wait queue @@ -1248,7 +1304,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, /* If the file is already "ready" we drop it inside the ready list */ if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); - __pm_stay_awake(epi->ws); + ep_pm_stay_awake(epi); /* Notify waiting tasks that events are available */ if (waitqueue_active(&ep->wq)) @@ -1289,7 +1345,7 @@ error_unregister: list_del_init(&epi->rdllink); spin_unlock_irqrestore(&ep->lock, flags); - wakeup_source_unregister(epi->ws); + wakeup_source_unregister(ep_wakeup_source(epi)); error_create_wakeup_source: kmem_cache_free(epi_cache, epi); @@ -1315,12 +1371,11 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even * f_op->poll() call and the new event set registering. */ epi->event.events = event->events; /* need barrier below */ - pt._key = event->events; epi->event.data = event->data; /* protected by mtx */ if (epi->event.events & EPOLLWAKEUP) { - if (!epi->ws) + if (!ep_has_wakeup_source(epi)) ep_create_wakeup_source(epi); - } else if (epi->ws) { + } else if (ep_has_wakeup_source(epi)) { ep_destroy_wakeup_source(epi); } @@ -1348,7 +1403,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even * Get current event bits. We can safely use the file* here because * its usage count has been increased by the caller of this function. */ - revents = epi->ffd.file->f_op->poll(epi->ffd.file, &pt); + revents = ep_item_poll(epi, &pt); /* * If the item is "hot" and it is not registered inside the ready @@ -1358,7 +1413,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even spin_lock_irq(&ep->lock); if (!ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); - __pm_stay_awake(epi->ws); + ep_pm_stay_awake(epi); /* Notify waiting tasks that events are available */ if (waitqueue_active(&ep->wq)) @@ -1384,6 +1439,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, unsigned int revents; struct epitem *epi; struct epoll_event __user *uevent; + struct wakeup_source *ws; poll_table pt; init_poll_funcptr(&pt, NULL); @@ -1406,14 +1462,16 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, * instead, but then epi->ws would temporarily be out of sync * with ep_is_linked(). */ - if (epi->ws && epi->ws->active) - __pm_stay_awake(ep->ws); - __pm_relax(epi->ws); + ws = ep_wakeup_source(epi); + if (ws) { + if (ws->active) + __pm_stay_awake(ep->ws); + __pm_relax(ws); + } + list_del_init(&epi->rdllink); - pt._key = epi->event.events; - revents = epi->ffd.file->f_op->poll(epi->ffd.file, &pt) & - epi->event.events; + revents = ep_item_poll(epi, &pt); /* * If the event mask intersect the caller-requested one, @@ -1425,7 +1483,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, if (__put_user(revents, &uevent->events) || __put_user(epi->event.data, &uevent->data)) { list_add(&epi->rdllink, head); - __pm_stay_awake(epi->ws); + ep_pm_stay_awake(epi); return eventcnt ? eventcnt : -EFAULT; } eventcnt++; @@ -1445,7 +1503,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, * poll callback will queue them in ep->ovflist. */ list_add_tail(&epi->rdllink, &ep->rdllist); - __pm_stay_awake(epi->ws); + ep_pm_stay_awake(epi); } } } @@ -2011,6 +2069,12 @@ static int __init eventpoll_init(void) /* Initialize the structure used to perform file's f_op->poll() calls */ ep_nested_calls_init(&poll_readywalk_ncalls); + /* + * We can have many thousands of epitems, so prevent this from + * using an extra cache line on 64-bit (and smaller) CPUs + */ + BUILD_BUG_ON(sizeof(void *) <= 8 && sizeof(struct epitem) > 128); + /* Allocates slab cache used to allocate "struct epitem" items */ epi_cache = kmem_cache_create("eventpoll_epi", sizeof(struct epitem), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); diff --git a/fs/exec.c b/fs/exec.c index a96a4885bbbf..963f510a25ab 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -613,7 +613,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) * when the old and new regions overlap clear from new_end. */ free_pgd_range(&tlb, new_end, old_end, new_end, - vma->vm_next ? vma->vm_next->vm_start : 0); + vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING); } else { /* * otherwise, clean from old_start; this is done to not touch @@ -622,7 +622,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) * for the others its just a little faster. */ free_pgd_range(&tlb, old_start, old_end, new_end, - vma->vm_next ? vma->vm_next->vm_start : 0); + vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING); } tlb_finish_mmu(&tlb, new_end, old_end); @@ -898,11 +898,13 @@ static int de_thread(struct task_struct *tsk) sig->notify_count = -1; /* for exit_notify() */ for (;;) { + threadgroup_change_begin(tsk); write_lock_irq(&tasklist_lock); if (likely(leader->exit_state)) break; __set_current_state(TASK_KILLABLE); write_unlock_irq(&tasklist_lock); + threadgroup_change_end(tsk); schedule(); if (unlikely(__fatal_signal_pending(tsk))) goto killed; @@ -960,6 +962,7 @@ static int de_thread(struct task_struct *tsk) if (unlikely(leader->ptrace)) __wake_up_parent(leader, leader->parent); write_unlock_irq(&tasklist_lock); + threadgroup_change_end(tsk); release_task(leader); } @@ -1027,17 +1030,7 @@ EXPORT_SYMBOL_GPL(get_task_comm); void set_task_comm(struct task_struct *tsk, char *buf) { task_lock(tsk); - trace_task_rename(tsk, buf); - - /* - * Threads may access current->comm without holding - * the task lock, so write the string carefully. - * Readers without a lock may see incomplete new - * names but are safe from non-terminating string reads. - */ - memset(tsk->comm, 0, TASK_COMM_LEN); - wmb(); strlcpy(tsk->comm, buf, sizeof(tsk->comm)); task_unlock(tsk); perf_event_comm(tsk); diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index fe60cc1117d8..0a87bb10998d 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -31,6 +31,7 @@ #include <linux/mpage.h> #include <linux/fiemap.h> #include <linux/namei.h> +#include <linux/aio.h> #include "ext2.h" #include "acl.h" #include "xip.h" diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index d706dbfa6220..23c712825640 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -27,6 +27,7 @@ #include <linux/writeback.h> #include <linux/mpage.h> #include <linux/namei.h> +#include <linux/aio.h> #include "ext3.h" #include "xattr.h" #include "acl.h" diff --git a/fs/ext3/super.c b/fs/ext3/super.c index fb5120a5505c..3dc48cc8b6eb 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2067,7 +2067,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal": test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": "writeback"); - sb->s_flags |= MS_SNAP_STABLE; return 0; diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 64848b595b24..4959e29573b6 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -23,6 +23,7 @@ #include <linux/jbd2.h> #include <linux/mount.h> #include <linux/path.h> +#include <linux/aio.h> #include <linux/quotaops.h> #include <linux/pagevec.h> #include "ext4.h" diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 98be6f697463..b8d5d351e24f 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -20,6 +20,7 @@ * (sct@redhat.com), 1993, 1998 */ +#include <linux/aio.h> #include "ext4_jbd2.h" #include "truncate.h" #include "ext4_extents.h" /* Needed for EXT_MAX_BLOCKS */ diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 62492e954483..d01e9267ec08 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -37,6 +37,7 @@ #include <linux/printk.h> #include <linux/slab.h> #include <linux/ratelimit.h> +#include <linux/aio.h> #include "ext4_jbd2.h" #include "xattr.h" diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 5929cd0baa20..0f5670970915 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -18,6 +18,7 @@ #include <linux/pagevec.h> #include <linux/mpage.h> #include <linux/namei.h> +#include <linux/aio.h> #include <linux/uio.h> #include <linux/bio.h> #include <linux/workqueue.h> @@ -257,7 +258,8 @@ static void buffer_io_error(struct buffer_head *bh) (unsigned long long)bh->b_blocknr); } -static void ext4_end_bio(struct bio *bio, int error) +static void ext4_end_bio(struct bio *bio, int error, + struct batch_complete *batch) { ext4_io_end_t *io_end = bio->bi_private; struct inode *inode; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 3c31ec7d633d..ea200d9e414c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -12,6 +12,7 @@ #include <linux/f2fs_fs.h> #include <linux/buffer_head.h> #include <linux/mpage.h> +#include <linux/aio.h> #include <linux/writeback.h> #include <linux/backing-dev.h> #include <linux/blkdev.h> @@ -315,7 +316,7 @@ struct page *get_new_data_page(struct inode *inode, pgoff_t index, return page; } -static void read_end_io(struct bio *bio, int err) +static void read_end_io(struct bio *bio, int err, struct batch_complete *batch) { const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 7c67ec2b63c0..c83a59a125a7 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -632,7 +632,8 @@ static const struct segment_allocation default_salloc_ops = { .allocate_segment = allocate_segment_by_default, }; -static void f2fs_end_io_write(struct bio *bio, int err) +static void f2fs_end_io_write(struct bio *bio, int err, + struct batch_complete *batch) { const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 165012ef363a..7a6f02caf286 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -964,6 +964,29 @@ int fat_scan(struct inode *dir, const unsigned char *name, } EXPORT_SYMBOL_GPL(fat_scan); +/* + * Scans a directory for a given logstart. + * Returns an error code or zero. + */ +int fat_scan_logstart(struct inode *dir, int i_logstart, + struct fat_slot_info *sinfo) +{ + struct super_block *sb = dir->i_sb; + + sinfo->slot_off = 0; + sinfo->bh = NULL; + while (fat_get_short_entry(dir, &sinfo->slot_off, &sinfo->bh, + &sinfo->de) >= 0) { + if (fat_get_start(MSDOS_SB(sb), sinfo->de) == i_logstart) { + sinfo->slot_off -= sizeof(*sinfo->de); + sinfo->nr_slots = 1; + sinfo->i_pos = fat_make_i_pos(sb, sinfo->bh, sinfo->de); + return 0; + } + } + return -ENOENT; +} + static int __fat_remove_entries(struct inode *dir, loff_t pos, int nr_slots) { struct super_block *sb = dir->i_sb; diff --git a/fs/fat/fat.h b/fs/fat/fat.h index e9cc3f0d58e2..21664fcf3616 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -23,6 +23,9 @@ #define FAT_ERRORS_PANIC 2 /* panic on error */ #define FAT_ERRORS_RO 3 /* remount r/o on error */ +#define FAT_NFS_STALE_RW 1 /* NFS RW support, can cause ESTALE */ +#define FAT_NFS_NOSTALE_RO 2 /* NFS RO support, no ESTALE issue */ + struct fat_mount_options { kuid_t fs_uid; kgid_t fs_gid; @@ -34,6 +37,7 @@ struct fat_mount_options { unsigned short shortname; /* flags for shortname display/create rule */ unsigned char name_check; /* r = relaxed, n = normal, s = strict */ unsigned char errors; /* On error: continue, panic, remount-ro */ + unsigned char nfs; /* NFS support: nostale_ro, stale_rw */ unsigned short allow_utime;/* permission for setting the [am]time */ unsigned quiet:1, /* set = fake successful chmods and chowns */ showexec:1, /* set = only set x bit for com/exe/bat */ @@ -48,8 +52,7 @@ struct fat_mount_options { usefree:1, /* Use free_clusters for FAT32 */ tz_set:1, /* Filesystem timestamps' offset set */ rodir:1, /* allow ATTR_RO for directory */ - discard:1, /* Issue discard requests on deletions */ - nfs:1; /* Do extra work needed for NFS export */ + discard:1; /* Issue discard requests on deletions */ }; #define FAT_HASH_BITS 8 @@ -72,6 +75,7 @@ struct msdos_sb_info { unsigned long root_cluster; /* first cluster of the root directory */ unsigned long fsinfo_sector; /* sector number of FAT32 fsinfo */ struct mutex fat_lock; + struct mutex nfs_build_inode_lock; struct mutex s_lock; unsigned int prev_free; /* previously allocated cluster number */ unsigned int free_clusters; /* -1 if undefined */ @@ -215,6 +219,27 @@ static inline sector_t fat_clus_to_blknr(struct msdos_sb_info *sbi, int clus) + sbi->data_start; } +static inline void fat_get_blknr_offset(struct msdos_sb_info *sbi, + loff_t i_pos, sector_t *blknr, int *offset) +{ + *blknr = i_pos >> sbi->dir_per_block_bits; + *offset = i_pos & (sbi->dir_per_block - 1); +} + +static inline loff_t fat_i_pos_read(struct msdos_sb_info *sbi, + struct inode *inode) +{ + loff_t i_pos; +#if BITS_PER_LONG == 32 + spin_lock(&sbi->inode_hash_lock); +#endif + i_pos = MSDOS_I(inode)->i_pos; +#if BITS_PER_LONG == 32 + spin_unlock(&sbi->inode_hash_lock); +#endif + return i_pos; +} + static inline void fat16_towchar(wchar_t *dst, const __u8 *src, size_t len) { #ifdef __BIG_ENDIAN @@ -271,6 +296,8 @@ extern int fat_dir_empty(struct inode *dir); extern int fat_subdirs(struct inode *dir); extern int fat_scan(struct inode *dir, const unsigned char *name, struct fat_slot_info *sinfo); +extern int fat_scan_logstart(struct inode *dir, int i_logstart, + struct fat_slot_info *sinfo); extern int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh, struct msdos_dir_entry **de); extern int fat_alloc_new_dir(struct inode *dir, struct timespec *ts); @@ -348,6 +375,7 @@ extern struct inode *fat_build_inode(struct super_block *sb, extern int fat_sync_inode(struct inode *inode); extern int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, void (*setup)(struct super_block *)); +extern int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de); extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, struct inode *i2); @@ -382,12 +410,8 @@ int fat_cache_init(void); void fat_cache_destroy(void); /* fat/nfs.c */ -struct fid; -extern struct dentry *fat_fh_to_dentry(struct super_block *sb, struct fid *fid, - int fh_len, int fh_type); -extern struct dentry *fat_fh_to_parent(struct super_block *sb, struct fid *fid, - int fh_len, int fh_type); -extern struct dentry *fat_get_parent(struct dentry *child_dir); +extern const struct export_operations fat_export_ops; +extern const struct export_operations fat_export_ops_nostale; /* helper for printk */ typedef unsigned long long llu; diff --git a/fs/fat/file.c b/fs/fat/file.c index 3978f8ca1823..73264395f705 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -17,8 +17,11 @@ #include <linux/blkdev.h> #include <linux/fsnotify.h> #include <linux/security.h> +#include <linux/falloc.h> #include "fat.h" +static long fat_fallocate(struct file *file, int mode, + loff_t offset, loff_t len); static int fat_ioctl_get_attributes(struct inode *inode, u32 __user *user_attr) { u32 attr; @@ -140,6 +143,22 @@ static long fat_generic_compat_ioctl(struct file *filp, unsigned int cmd, static int fat_file_release(struct inode *inode, struct file *filp) { + + struct super_block *sb = inode->i_sb; + loff_t mmu_private_ideal; + + /* + * Release unwritten fallocated blocks on file release. + * Do this only when the last open file descriptor is closed. + */ + mutex_lock(&inode->i_mutex); + mmu_private_ideal = round_up(inode->i_size, sb->s_blocksize); + + if (mmu_private_ideal < MSDOS_I(inode)->mmu_private && + filp->f_dentry->d_count == 1) + fat_truncate_blocks(inode, inode->i_size); + mutex_unlock(&inode->i_mutex); + if ((filp->f_mode & FMODE_WRITE) && MSDOS_SB(inode->i_sb)->options.flush) { fat_flush_inodes(inode->i_sb, inode, NULL); @@ -174,6 +193,7 @@ const struct file_operations fat_file_operations = { #endif .fsync = fat_file_fsync, .splice_read = generic_file_splice_read, + .fallocate = fat_fallocate, }; static int fat_cont_expand(struct inode *inode, loff_t size) @@ -212,6 +232,88 @@ out: return err; } +/* + * Preallocate space for a file. This implements fat's fallocate file + * operation, which gets called from sys_fallocate system call. User + * space requests len bytes at offset. If FALLOC_FL_KEEP_SIZE is set + * we just allocate clusters without zeroing them out. Otherwise we + * allocate and zero out clusters via an expanding truncate. The + * allocated clusters are freed in fat_file_release(). + */ +static long fat_fallocate(struct file *file, int mode, + loff_t offset, loff_t len) +{ + int cluster, fclus, dclus; + int nr_cluster; /* Number of clusters to be allocated */ + loff_t nr_bytes; /* Number of bytes to be allocated*/ + loff_t free_bytes; /* Unused bytes in the last cluster of file*/ + struct inode *inode = file->f_mapping->host; + struct super_block *sb = inode->i_sb; + struct msdos_sb_info *sbi = MSDOS_SB(sb); + int err = 0; + + /* No support for hole punch or other fallocate flags. */ + if (mode & ~FALLOC_FL_KEEP_SIZE) + return -EOPNOTSUPP; + + mutex_lock(&inode->i_mutex); + if ((offset + len) <= MSDOS_I(inode)->mmu_private) { + fat_msg(sb, KERN_ERR, + "fat_fallocate(): Blocks already allocated"); + err = -EINVAL; + goto error; + } + + if (mode & FALLOC_FL_KEEP_SIZE) { + /* First compute the number of clusters to be allocated */ + if (inode->i_size > 0) { + err = fat_get_cluster(inode, FAT_ENT_EOF, + &fclus, &dclus); + if (err < 0) { + fat_msg(sb, KERN_ERR, + "fat_fallocate(): fat_get_cluster() error"); + goto error; + } + free_bytes = ((fclus + 1) << sbi->cluster_bits) - + inode->i_size; + nr_bytes = offset + len - inode->i_size - free_bytes; + MSDOS_I(inode)->mmu_private = (fclus + 1) << + sbi->cluster_bits; + } else + nr_bytes = offset + len - inode->i_size; + + nr_cluster = (nr_bytes + (sbi->cluster_size - 1)) >> + sbi->cluster_bits; + + /* Start the allocation.We are not zeroing out the clusters */ + while (nr_cluster-- > 0) { + err = fat_alloc_clusters(inode, &cluster, 1); + if (err) { + fat_msg(sb, KERN_ERR, + "fat_fallocate(): fat_alloc_clusters() error"); + goto error; + } + err = fat_chain_add(inode, cluster, 1); + if (err) { + fat_free_clusters(inode, cluster); + goto error; + } + MSDOS_I(inode)->mmu_private += sbi->cluster_size; + } + } else { + /* This is just an expanding truncate */ + err = fat_cont_expand(inode, (offset + len)); + if (err) { + fat_msg(sb, KERN_ERR, + "fat_fallocate(): fat_cont_expand() error"); + } + } + +error: + mutex_unlock(&inode->i_mutex); + return err; +} + /* Free all clusters after the skip'th cluster. */ static int fat_free(struct inode *inode, int skip) { @@ -306,6 +408,11 @@ int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) struct inode *inode = dentry->d_inode; generic_fillattr(inode, stat); stat->blksize = MSDOS_SB(inode->i_sb)->cluster_size; + + if (MSDOS_SB(inode->i_sb)->options.nfs == FAT_NFS_NOSTALE_RO) { + /* Use i_pos for ino. This is used as fileid of nfs. */ + stat->ino = fat_i_pos_read(MSDOS_SB(inode->i_sb), inode); + } return 0; } EXPORT_SYMBOL_GPL(fat_getattr); @@ -373,6 +480,9 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) struct inode *inode = dentry->d_inode; unsigned int ia_valid; int error; + loff_t mmu_private_ideal; + + mmu_private_ideal = round_up(inode->i_size, dentry->d_sb->s_blocksize); /* Check for setting the inode time. */ ia_valid = attr->ia_valid; @@ -398,7 +508,8 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) if (attr->ia_valid & ATTR_SIZE) { inode_dio_wait(inode); - if (attr->ia_size > inode->i_size) { + if (attr->ia_size > inode->i_size && + MSDOS_I(inode)->mmu_private <= mmu_private_ideal) { error = fat_cont_expand(inode, attr->ia_size); if (error || attr->ia_valid == ATTR_SIZE) goto out; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index acf6e479b443..275221d44eea 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -18,8 +18,8 @@ #include <linux/pagemap.h> #include <linux/mpage.h> #include <linux/buffer_head.h> -#include <linux/exportfs.h> #include <linux/mount.h> +#include <linux/aio.h> #include <linux/vfs.h> #include <linux/parser.h> #include <linux/uio.h> @@ -152,11 +152,64 @@ static void fat_write_failed(struct address_space *mapping, loff_t to) } } +static int fat_zero_falloc_area(struct file *file, + struct address_space *mapping, loff_t pos) +{ + struct page *page; + struct inode *inode = mapping->host; + loff_t curpos = i_size_read(inode); + size_t count = pos - curpos; + int err; + + do { + unsigned offset, bytes; + void *fsdata; + + offset = (curpos & (PAGE_CACHE_SIZE - 1)); + bytes = PAGE_CACHE_SIZE - offset; + bytes = min(bytes, count); + + err = pagecache_write_begin(NULL, mapping, curpos, bytes, + AOP_FLAG_UNINTERRUPTIBLE, + &page, &fsdata); + if (err) + break; + + zero_user(page, offset, bytes); + + err = pagecache_write_end(NULL, mapping, curpos, bytes, bytes, + page, fsdata); + if (err < 0) + break; + curpos += bytes; + count -= bytes; + err = 0; + } while (count); + + return err; +} + static int fat_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { int err; + loff_t mmu_private_ideal, mmu_private_actual; + loff_t size; + struct inode *inode = mapping->host; + struct super_block *sb = inode->i_sb; + + size = i_size_read(inode); + mmu_private_actual = MSDOS_I(inode)->mmu_private; + mmu_private_ideal = round_up(size, sb->s_blocksize); + if ((mmu_private_actual > mmu_private_ideal) && (pos > size)) { + err = fat_zero_falloc_area(file, mapping, pos); + if (err) { + fat_msg(sb, KERN_ERR, + "Error (%d) zeroing fallocated area", err); + return err; + } + } *pagep = NULL; err = cont_write_begin(file, mapping, pos, len, flags, @@ -385,7 +438,7 @@ static int fat_calc_dir_size(struct inode *inode) } /* doesn't deal with root inode */ -static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) +int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) { struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); int error; @@ -444,12 +497,25 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) return 0; } +static inline void fat_lock_build_inode(struct msdos_sb_info *sbi) +{ + if (sbi->options.nfs == FAT_NFS_NOSTALE_RO) + mutex_lock(&sbi->nfs_build_inode_lock); +} + +static inline void fat_unlock_build_inode(struct msdos_sb_info *sbi) +{ + if (sbi->options.nfs == FAT_NFS_NOSTALE_RO) + mutex_unlock(&sbi->nfs_build_inode_lock); +} + struct inode *fat_build_inode(struct super_block *sb, struct msdos_dir_entry *de, loff_t i_pos) { struct inode *inode; int err; + fat_lock_build_inode(MSDOS_SB(sb)); inode = fat_iget(sb, i_pos); if (inode) goto out; @@ -469,6 +535,7 @@ struct inode *fat_build_inode(struct super_block *sb, fat_attach(inode, i_pos); insert_inode_hash(inode); out: + fat_unlock_build_inode(MSDOS_SB(sb)); return inode; } @@ -655,20 +722,6 @@ static int fat_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } -static inline loff_t fat_i_pos_read(struct msdos_sb_info *sbi, - struct inode *inode) -{ - loff_t i_pos; -#if BITS_PER_LONG == 32 - spin_lock(&sbi->inode_hash_lock); -#endif - i_pos = MSDOS_I(inode)->i_pos; -#if BITS_PER_LONG == 32 - spin_unlock(&sbi->inode_hash_lock); -#endif - return i_pos; -} - static int __fat_write_inode(struct inode *inode, int wait) { struct super_block *sb = inode->i_sb; @@ -676,7 +729,8 @@ static int __fat_write_inode(struct inode *inode, int wait) struct buffer_head *bh; struct msdos_dir_entry *raw_entry; loff_t i_pos; - int err; + sector_t blocknr; + int err, offset; if (inode->i_ino == MSDOS_ROOT_INO) return 0; @@ -686,7 +740,8 @@ retry: if (!i_pos) return 0; - bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits); + fat_get_blknr_offset(sbi, i_pos, &blocknr, &offset); + bh = sb_bread(sb, blocknr); if (!bh) { fat_msg(sb, KERN_ERR, "unable to read inode block " "for updating (i_pos %lld)", i_pos); @@ -699,8 +754,7 @@ retry: goto retry; } - raw_entry = &((struct msdos_dir_entry *) (bh->b_data)) - [i_pos & (sbi->dir_per_block - 1)]; + raw_entry = &((struct msdos_dir_entry *) (bh->b_data))[offset]; if (S_ISDIR(inode->i_mode)) raw_entry->size = 0; else @@ -761,12 +815,6 @@ static const struct super_operations fat_sops = { .show_options = fat_show_options, }; -static const struct export_operations fat_export_ops = { - .fh_to_dentry = fat_fh_to_dentry, - .fh_to_parent = fat_fh_to_parent, - .get_parent = fat_get_parent, -}; - static int fat_show_options(struct seq_file *m, struct dentry *root) { struct msdos_sb_info *sbi = MSDOS_SB(root->d_sb); @@ -814,8 +862,6 @@ static int fat_show_options(struct seq_file *m, struct dentry *root) seq_puts(m, ",usefree"); if (opts->quiet) seq_puts(m, ",quiet"); - if (opts->nfs) - seq_puts(m, ",nfs"); if (opts->showexec) seq_puts(m, ",showexec"); if (opts->sys_immutable) @@ -849,6 +895,10 @@ static int fat_show_options(struct seq_file *m, struct dentry *root) seq_puts(m, ",errors=panic"); else seq_puts(m, ",errors=remount-ro"); + if (opts->nfs == FAT_NFS_NOSTALE_RO) + seq_puts(m, ",nfs=nostale_ro"); + else if (opts->nfs) + seq_puts(m, ",nfs=stale_rw"); if (opts->discard) seq_puts(m, ",discard"); @@ -865,7 +915,7 @@ enum { Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes, Opt_obsolete, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err_cont, Opt_err_panic, Opt_err_ro, Opt_discard, Opt_nfs, Opt_time_offset, - Opt_err, + Opt_nfs_stale_rw, Opt_nfs_nostale_ro, Opt_err, }; static const match_table_t fat_tokens = { @@ -895,7 +945,9 @@ static const match_table_t fat_tokens = { {Opt_err_panic, "errors=panic"}, {Opt_err_ro, "errors=remount-ro"}, {Opt_discard, "discard"}, - {Opt_nfs, "nfs"}, + {Opt_nfs_stale_rw, "nfs"}, + {Opt_nfs_stale_rw, "nfs=stale_rw"}, + {Opt_nfs_nostale_ro, "nfs=nostale_ro"}, {Opt_obsolete, "conv=binary"}, {Opt_obsolete, "conv=text"}, {Opt_obsolete, "conv=auto"}, @@ -1092,6 +1144,12 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat, case Opt_err_ro: opts->errors = FAT_ERRORS_RO; break; + case Opt_nfs_stale_rw: + opts->nfs = FAT_NFS_STALE_RW; + break; + case Opt_nfs_nostale_ro: + opts->nfs = FAT_NFS_NOSTALE_RO; + break; /* msdos specific */ case Opt_dots: @@ -1150,9 +1208,6 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat, case Opt_discard: opts->discard = 1; break; - case Opt_nfs: - opts->nfs = 1; - break; /* obsolete mount options */ case Opt_obsolete: @@ -1183,6 +1238,10 @@ out: opts->allow_utime = ~opts->fs_dmask & (S_IWGRP | S_IWOTH); if (opts->unicode_xlate) opts->utf8 = 0; + if (opts->nfs == FAT_NFS_NOSTALE_RO) { + sb->s_flags |= MS_RDONLY; + sb->s_export_op = &fat_export_ops_nostale; + } return 0; } @@ -1193,7 +1252,7 @@ static int fat_read_root(struct inode *inode) struct msdos_sb_info *sbi = MSDOS_SB(sb); int error; - MSDOS_I(inode)->i_pos = 0; + MSDOS_I(inode)->i_pos = MSDOS_ROOT_INO; inode->i_uid = sbi->options.fs_uid; inode->i_gid = sbi->options.fs_gid; inode->i_version++; @@ -1256,6 +1315,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, sb->s_magic = MSDOS_SUPER_MAGIC; sb->s_op = &fat_sops; sb->s_export_op = &fat_export_ops; + mutex_init(&sbi->nfs_build_inode_lock); ratelimit_state_init(&sbi->ratelimit, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); diff --git a/fs/fat/nfs.c b/fs/fat/nfs.c index 499c10438ca2..93e14933dcb6 100644 --- a/fs/fat/nfs.c +++ b/fs/fat/nfs.c @@ -14,6 +14,18 @@ #include <linux/exportfs.h> #include "fat.h" +struct fat_fid { + u32 i_gen; + u32 i_pos_low; + u16 i_pos_hi; + u16 parent_i_pos_hi; + u32 parent_i_pos_low; + u32 parent_i_gen; +}; + +#define FAT_FID_SIZE_WITHOUT_PARENT 3 +#define FAT_FID_SIZE_WITH_PARENT (sizeof(struct fat_fid)/sizeof(u32)) + /** * Look up a directory inode given its starting cluster. */ @@ -38,63 +50,252 @@ static struct inode *fat_dget(struct super_block *sb, int i_logstart) return inode; } -static struct inode *fat_nfs_get_inode(struct super_block *sb, - u64 ino, u32 generation) +static struct inode *fat_ilookup(struct super_block *sb, u64 ino, loff_t i_pos) { - struct inode *inode; + if (MSDOS_SB(sb)->options.nfs == FAT_NFS_NOSTALE_RO) + return fat_iget(sb, i_pos); - if ((ino < MSDOS_ROOT_INO) || (ino == MSDOS_FSINFO_INO)) - return NULL; + else { + if ((ino < MSDOS_ROOT_INO) || (ino == MSDOS_FSINFO_INO)) + return NULL; + return ilookup(sb, ino); + } +} + +static struct inode *__fat_nfs_get_inode(struct super_block *sb, + u64 ino, u32 generation, loff_t i_pos) +{ + struct inode *inode = fat_ilookup(sb, ino, i_pos); - inode = ilookup(sb, ino); if (inode && generation && (inode->i_generation != generation)) { iput(inode); inode = NULL; } + if (inode == NULL && MSDOS_SB(sb)->options.nfs == FAT_NFS_NOSTALE_RO) { + struct buffer_head *bh = NULL; + struct msdos_dir_entry *de ; + sector_t blocknr; + int offset; + fat_get_blknr_offset(MSDOS_SB(sb), i_pos, &blocknr, &offset); + bh = sb_bread(sb, blocknr); + if (!bh) { + fat_msg(sb, KERN_ERR, + "unable to read block(%llu) for building NFS inode", + (llu)blocknr); + return inode; + } + de = (struct msdos_dir_entry *)bh->b_data; + /* If a file is deleted on server and client is not updated + * yet, we must not build the inode upon a lookup call. + */ + if (IS_FREE(de[offset].name)) + inode = NULL; + else + inode = fat_build_inode(sb, &de[offset], i_pos); + brelse(bh); + } return inode; } +static struct inode *fat_nfs_get_inode(struct super_block *sb, + u64 ino, u32 generation) +{ + + return __fat_nfs_get_inode(sb, ino, generation, 0); +} + +static int +fat_encode_fh_nostale(struct inode *inode, __u32 *fh, int *lenp, + struct inode *parent) +{ + int len = *lenp; + struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); + struct fat_fid *fid = (struct fat_fid *) fh; + loff_t i_pos; + int type = FILEID_FAT_WITHOUT_PARENT; + + if (parent) { + if (len < FAT_FID_SIZE_WITH_PARENT) { + *lenp = FAT_FID_SIZE_WITH_PARENT; + return FILEID_INVALID; + } + } else { + if (len < FAT_FID_SIZE_WITHOUT_PARENT) { + *lenp = FAT_FID_SIZE_WITHOUT_PARENT; + return FILEID_INVALID; + } + } + + i_pos = fat_i_pos_read(sbi, inode); + *lenp = FAT_FID_SIZE_WITHOUT_PARENT; + fid->i_gen = inode->i_generation; + fid->i_pos_low = i_pos & 0xFFFFFFFF; + fid->i_pos_hi = (i_pos >> 32) & 0xFFFF; + if (parent) { + i_pos = fat_i_pos_read(sbi, parent); + fid->parent_i_pos_hi = (i_pos >> 32) & 0xFFFF; + fid->parent_i_pos_low = i_pos & 0xFFFFFFFF; + fid->parent_i_gen = parent->i_generation; + type = FILEID_FAT_WITH_PARENT; + *lenp = FAT_FID_SIZE_WITH_PARENT; + } + + return type; +} + /** * Map a NFS file handle to a corresponding dentry. * The dentry may or may not be connected to the filesystem root. */ -struct dentry *fat_fh_to_dentry(struct super_block *sb, struct fid *fid, +static struct dentry *fat_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { return generic_fh_to_dentry(sb, fid, fh_len, fh_type, fat_nfs_get_inode); } +static struct dentry *fat_fh_to_dentry_nostale(struct super_block *sb, + struct fid *fh, int fh_len, + int fh_type) +{ + struct inode *inode = NULL; + struct fat_fid *fid = (struct fat_fid *)fh; + loff_t i_pos; + + switch (fh_type) { + case FILEID_FAT_WITHOUT_PARENT: + if (fh_len < FAT_FID_SIZE_WITHOUT_PARENT) + return NULL; + break; + case FILEID_FAT_WITH_PARENT: + if (fh_len < FAT_FID_SIZE_WITH_PARENT) + return NULL; + break; + default: + return NULL; + } + i_pos = fid->i_pos_hi; + i_pos = (i_pos << 32) | (fid->i_pos_low); + inode = __fat_nfs_get_inode(sb, 0, fid->i_gen, i_pos); + + return d_obtain_alias(inode); +} + /* * Find the parent for a file specified by NFS handle. * This requires that the handle contain the i_ino of the parent. */ -struct dentry *fat_fh_to_parent(struct super_block *sb, struct fid *fid, +static struct dentry *fat_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { return generic_fh_to_parent(sb, fid, fh_len, fh_type, fat_nfs_get_inode); } +static struct dentry *fat_fh_to_parent_nostale(struct super_block *sb, + struct fid *fh, int fh_len, + int fh_type) +{ + struct inode *inode = NULL; + struct fat_fid *fid = (struct fat_fid *)fh; + loff_t i_pos; + + if (fh_len < FAT_FID_SIZE_WITH_PARENT) + return NULL; + + switch (fh_type) { + case FILEID_FAT_WITH_PARENT: + i_pos = fid->parent_i_pos_hi; + i_pos = (i_pos << 32) | (fid->parent_i_pos_low); + inode = __fat_nfs_get_inode(sb, 0, fid->parent_i_gen, i_pos); + break; + } + + return d_obtain_alias(inode); +} + +/* + * Rebuild the parent for a directory that is not connected + * to the filesystem root + */ +static +struct inode *fat_rebuild_parent(struct super_block *sb, int parent_logstart) +{ + int search_clus, clus_to_match; + struct msdos_dir_entry *de; + struct inode *parent = NULL; + struct inode *dummy_grand_parent = NULL; + struct fat_slot_info sinfo; + struct msdos_sb_info *sbi = MSDOS_SB(sb); + sector_t blknr = fat_clus_to_blknr(sbi, parent_logstart); + struct buffer_head *parent_bh = sb_bread(sb, blknr); + if (!parent_bh) { + fat_msg(sb, KERN_ERR, + "unable to read cluster of parent directory"); + return NULL; + } + + de = (struct msdos_dir_entry *) parent_bh->b_data; + clus_to_match = fat_get_start(sbi, &de[0]); + search_clus = fat_get_start(sbi, &de[1]); + + dummy_grand_parent = fat_dget(sb, search_clus); + if (!dummy_grand_parent) { + dummy_grand_parent = new_inode(sb); + if (!dummy_grand_parent) { + brelse(parent_bh); + return parent; + } + + dummy_grand_parent->i_ino = iunique(sb, MSDOS_ROOT_INO); + fat_fill_inode(dummy_grand_parent, &de[1]); + MSDOS_I(dummy_grand_parent)->i_pos = -1; + } + + if (!fat_scan_logstart(dummy_grand_parent, clus_to_match, &sinfo)) + parent = fat_build_inode(sb, sinfo.de, sinfo.i_pos); + + brelse(parent_bh); + iput(dummy_grand_parent); + + return parent; +} + /* * Find the parent for a directory that is not currently connected to * the filesystem root. * * On entry, the caller holds child_dir->d_inode->i_mutex. */ -struct dentry *fat_get_parent(struct dentry *child_dir) +static struct dentry *fat_get_parent(struct dentry *child_dir) { struct super_block *sb = child_dir->d_sb; struct buffer_head *bh = NULL; struct msdos_dir_entry *de; struct inode *parent_inode = NULL; + struct msdos_sb_info *sbi = MSDOS_SB(sb); if (!fat_get_dotdot_entry(child_dir->d_inode, &bh, &de)) { - int parent_logstart = fat_get_start(MSDOS_SB(sb), de); + int parent_logstart = fat_get_start(sbi, de); parent_inode = fat_dget(sb, parent_logstart); + if (!parent_inode && sbi->options.nfs == FAT_NFS_NOSTALE_RO) + parent_inode = fat_rebuild_parent(sb, parent_logstart); } brelse(bh); return d_obtain_alias(parent_inode); } + +const struct export_operations fat_export_ops = { + .fh_to_dentry = fat_fh_to_dentry, + .fh_to_parent = fat_fh_to_parent, + .get_parent = fat_get_parent, +}; + +const struct export_operations fat_export_ops_nostale = { + .encode_fh = fat_encode_fh_nostale, + .fh_to_dentry = fat_fh_to_dentry_nostale, + .fh_to_parent = fat_fh_to_parent_nostale, + .get_parent = fat_get_parent, +}; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 8067d3719e94..3be57189efd5 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1009,6 +1009,7 @@ void bdi_writeback_workfn(struct work_struct *work) struct backing_dev_info *bdi = wb->bdi; long pages_written; + set_worker_desc("flush-%s", dev_name(bdi->dev)); current->flags |= PF_SWAPWRITE; if (likely(!current_is_workqueue_rescuer() || diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index b3aaf7b3578b..aef34b1e635e 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -38,6 +38,7 @@ #include <linux/device.h> #include <linux/file.h> #include <linux/fs.h> +#include <linux/aio.h> #include <linux/kdev_t.h> #include <linux/kthread.h> #include <linux/list.h> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index a6c1664e330b..1d55f9465400 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -19,6 +19,7 @@ #include <linux/pipe_fs_i.h> #include <linux/swap.h> #include <linux/splice.h> +#include <linux/aio.h> MODULE_ALIAS_MISCDEV(FUSE_MINOR); MODULE_ALIAS("devname:fuse"); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 71b5710c61b0..128c18bf28ac 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -15,6 +15,7 @@ #include <linux/module.h> #include <linux/compat.h> #include <linux/swap.h> +#include <linux/aio.h> static const struct file_operations fuse_direct_io_file_operations; diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 9883694f1e7c..0bad69ed6336 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -20,6 +20,7 @@ #include <linux/swap.h> #include <linux/gfs2_ondisk.h> #include <linux/backing-dev.h> +#include <linux/aio.h> #include "gfs2.h" #include "incore.h" diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index d79c2dadc536..acd16764b133 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -25,6 +25,7 @@ #include <asm/uaccess.h> #include <linux/dlm.h> #include <linux/dlm_plock.h> +#include <linux/aio.h> #include "gfs2.h" #include "incore.h" diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index c5fa758fd844..91a5ebb614ca 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -200,7 +200,8 @@ static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct bio_vec *bvec, * */ -static void gfs2_end_log_write(struct bio *bio, int error) +static void gfs2_end_log_write(struct bio *bio, int error, + struct batch_complete *batch) { struct gfs2_sbd *sdp = bio->bi_private; struct bio_vec *bvec; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 60ede2a0f43f..86eb657aeaca 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -155,7 +155,8 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent) return -EINVAL; } -static void end_bio_io_page(struct bio *bio, int error) +static void end_bio_io_page(struct bio *bio, int error, + struct batch_complete *batch) { struct page *page = bio->bi_private; diff --git a/fs/hfs/bfind.c b/fs/hfs/bfind.c index 571abe97b42a..de69d8a24f6d 100644 --- a/fs/hfs/bfind.c +++ b/fs/hfs/bfind.c @@ -22,7 +22,8 @@ int hfs_find_init(struct hfs_btree *tree, struct hfs_find_data *fd) return -ENOMEM; fd->search_key = ptr; fd->key = ptr + tree->max_key_len + 2; - dprint(DBG_BNODE_REFS, "find_init: %d (%p)\n", tree->cnid, __builtin_return_address(0)); + hfs_dbg(BNODE_REFS, "find_init: %d (%p)\n", + tree->cnid, __builtin_return_address(0)); mutex_lock(&tree->tree_lock); return 0; } @@ -31,7 +32,8 @@ void hfs_find_exit(struct hfs_find_data *fd) { hfs_bnode_put(fd->bnode); kfree(fd->search_key); - dprint(DBG_BNODE_REFS, "find_exit: %d (%p)\n", fd->tree->cnid, __builtin_return_address(0)); + hfs_dbg(BNODE_REFS, "find_exit: %d (%p)\n", + fd->tree->cnid, __builtin_return_address(0)); mutex_unlock(&fd->tree->tree_lock); fd->tree = NULL; } @@ -135,8 +137,8 @@ int hfs_brec_find(struct hfs_find_data *fd) return res; invalid: - printk(KERN_ERR "hfs: inconsistency in B*Tree (%d,%d,%d,%u,%u)\n", - height, bnode->height, bnode->type, nidx, parent); + pr_err("inconsistency in B*Tree (%d,%d,%d,%u,%u)\n", + height, bnode->height, bnode->type, nidx, parent); res = -EIO; release: hfs_bnode_put(bnode); diff --git a/fs/hfs/bitmap.c b/fs/hfs/bitmap.c index c6e97366e8ac..28307bc9ec1e 100644 --- a/fs/hfs/bitmap.c +++ b/fs/hfs/bitmap.c @@ -158,7 +158,7 @@ u32 hfs_vbm_search_free(struct super_block *sb, u32 goal, u32 *num_bits) } } - dprint(DBG_BITMAP, "alloc_bits: %u,%u\n", pos, *num_bits); + hfs_dbg(BITMAP, "alloc_bits: %u,%u\n", pos, *num_bits); HFS_SB(sb)->free_ablocks -= *num_bits; hfs_bitmap_dirty(sb); out: @@ -200,7 +200,7 @@ int hfs_clear_vbm_bits(struct super_block *sb, u16 start, u16 count) if (!count) return 0; - dprint(DBG_BITMAP, "clear_bits: %u,%u\n", start, count); + hfs_dbg(BITMAP, "clear_bits: %u,%u\n", start, count); /* are all of the bits in range? */ if ((start + count) > HFS_SB(sb)->fs_ablocks) return -2; diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c index cdb41a1f6a64..f3b1a15ccd59 100644 --- a/fs/hfs/bnode.c +++ b/fs/hfs/bnode.c @@ -100,7 +100,7 @@ void hfs_bnode_copy(struct hfs_bnode *dst_node, int dst, struct hfs_btree *tree; struct page *src_page, *dst_page; - dprint(DBG_BNODE_MOD, "copybytes: %u,%u,%u\n", dst, src, len); + hfs_dbg(BNODE_MOD, "copybytes: %u,%u,%u\n", dst, src, len); if (!len) return; tree = src_node->tree; @@ -120,7 +120,7 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len) struct page *page; void *ptr; - dprint(DBG_BNODE_MOD, "movebytes: %u,%u,%u\n", dst, src, len); + hfs_dbg(BNODE_MOD, "movebytes: %u,%u,%u\n", dst, src, len); if (!len) return; src += node->page_offset; @@ -138,16 +138,16 @@ void hfs_bnode_dump(struct hfs_bnode *node) __be32 cnid; int i, off, key_off; - dprint(DBG_BNODE_MOD, "bnode: %d\n", node->this); + hfs_dbg(BNODE_MOD, "bnode: %d\n", node->this); hfs_bnode_read(node, &desc, 0, sizeof(desc)); - dprint(DBG_BNODE_MOD, "%d, %d, %d, %d, %d\n", + hfs_dbg(BNODE_MOD, "%d, %d, %d, %d, %d\n", be32_to_cpu(desc.next), be32_to_cpu(desc.prev), desc.type, desc.height, be16_to_cpu(desc.num_recs)); off = node->tree->node_size - 2; for (i = be16_to_cpu(desc.num_recs); i >= 0; off -= 2, i--) { key_off = hfs_bnode_read_u16(node, off); - dprint(DBG_BNODE_MOD, " %d", key_off); + hfs_dbg_cont(BNODE_MOD, " %d", key_off); if (i && node->type == HFS_NODE_INDEX) { int tmp; @@ -155,17 +155,18 @@ void hfs_bnode_dump(struct hfs_bnode *node) tmp = (hfs_bnode_read_u8(node, key_off) | 1) + 1; else tmp = node->tree->max_key_len + 1; - dprint(DBG_BNODE_MOD, " (%d,%d", tmp, hfs_bnode_read_u8(node, key_off)); + hfs_dbg_cont(BNODE_MOD, " (%d,%d", + tmp, hfs_bnode_read_u8(node, key_off)); hfs_bnode_read(node, &cnid, key_off + tmp, 4); - dprint(DBG_BNODE_MOD, ",%d)", be32_to_cpu(cnid)); + hfs_dbg_cont(BNODE_MOD, ",%d)", be32_to_cpu(cnid)); } else if (i && node->type == HFS_NODE_LEAF) { int tmp; tmp = hfs_bnode_read_u8(node, key_off); - dprint(DBG_BNODE_MOD, " (%d)", tmp); + hfs_dbg_cont(BNODE_MOD, " (%d)", tmp); } } - dprint(DBG_BNODE_MOD, "\n"); + hfs_dbg_cont(BNODE_MOD, "\n"); } void hfs_bnode_unlink(struct hfs_bnode *node) @@ -220,7 +221,7 @@ struct hfs_bnode *hfs_bnode_findhash(struct hfs_btree *tree, u32 cnid) struct hfs_bnode *node; if (cnid >= tree->node_count) { - printk(KERN_ERR "hfs: request for non-existent node %d in B*Tree\n", cnid); + pr_err("request for non-existent node %d in B*Tree\n", cnid); return NULL; } @@ -243,7 +244,7 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid) loff_t off; if (cnid >= tree->node_count) { - printk(KERN_ERR "hfs: request for non-existent node %d in B*Tree\n", cnid); + pr_err("request for non-existent node %d in B*Tree\n", cnid); return NULL; } @@ -257,8 +258,8 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid) node->this = cnid; set_bit(HFS_BNODE_NEW, &node->flags); atomic_set(&node->refcnt, 1); - dprint(DBG_BNODE_REFS, "new_node(%d:%d): 1\n", - node->tree->cnid, node->this); + hfs_dbg(BNODE_REFS, "new_node(%d:%d): 1\n", + node->tree->cnid, node->this); init_waitqueue_head(&node->lock_wq); spin_lock(&tree->hash_lock); node2 = hfs_bnode_findhash(tree, cnid); @@ -301,7 +302,7 @@ void hfs_bnode_unhash(struct hfs_bnode *node) { struct hfs_bnode **p; - dprint(DBG_BNODE_REFS, "remove_node(%d:%d): %d\n", + hfs_dbg(BNODE_REFS, "remove_node(%d:%d): %d\n", node->tree->cnid, node->this, atomic_read(&node->refcnt)); for (p = &node->tree->node_hash[hfs_bnode_hash(node->this)]; *p && *p != node; p = &(*p)->next_hash) @@ -443,8 +444,9 @@ void hfs_bnode_get(struct hfs_bnode *node) { if (node) { atomic_inc(&node->refcnt); - dprint(DBG_BNODE_REFS, "get_node(%d:%d): %d\n", - node->tree->cnid, node->this, atomic_read(&node->refcnt)); + hfs_dbg(BNODE_REFS, "get_node(%d:%d): %d\n", + node->tree->cnid, node->this, + atomic_read(&node->refcnt)); } } @@ -455,8 +457,9 @@ void hfs_bnode_put(struct hfs_bnode *node) struct hfs_btree *tree = node->tree; int i; - dprint(DBG_BNODE_REFS, "put_node(%d:%d): %d\n", - node->tree->cnid, node->this, atomic_read(&node->refcnt)); + hfs_dbg(BNODE_REFS, "put_node(%d:%d): %d\n", + node->tree->cnid, node->this, + atomic_read(&node->refcnt)); BUG_ON(!atomic_read(&node->refcnt)); if (!atomic_dec_and_lock(&node->refcnt, &tree->hash_lock)) return; diff --git a/fs/hfs/brec.c b/fs/hfs/brec.c index 92fb358ce824..9f4ee7f52026 100644 --- a/fs/hfs/brec.c +++ b/fs/hfs/brec.c @@ -47,15 +47,13 @@ u16 hfs_brec_keylen(struct hfs_bnode *node, u16 rec) if (node->tree->attributes & HFS_TREE_BIGKEYS) { retval = hfs_bnode_read_u16(node, recoff) + 2; if (retval > node->tree->max_key_len + 2) { - printk(KERN_ERR "hfs: keylen %d too large\n", - retval); + pr_err("keylen %d too large\n", retval); retval = 0; } } else { retval = (hfs_bnode_read_u8(node, recoff) | 1) + 1; if (retval > node->tree->max_key_len + 1) { - printk(KERN_ERR "hfs: keylen %d too large\n", - retval); + pr_err("keylen %d too large\n", retval); retval = 0; } } @@ -94,7 +92,8 @@ again: end_rec_off = tree->node_size - (node->num_recs + 1) * 2; end_off = hfs_bnode_read_u16(node, end_rec_off); end_rec_off -= 2; - dprint(DBG_BNODE_MOD, "insert_rec: %d, %d, %d, %d\n", rec, size, end_off, end_rec_off); + hfs_dbg(BNODE_MOD, "insert_rec: %d, %d, %d, %d\n", + rec, size, end_off, end_rec_off); if (size > end_rec_off - end_off) { if (new_node) panic("not enough room!\n"); @@ -190,7 +189,8 @@ again: mark_inode_dirty(tree->inode); } hfs_bnode_dump(node); - dprint(DBG_BNODE_MOD, "remove_rec: %d, %d\n", fd->record, fd->keylength + fd->entrylength); + hfs_dbg(BNODE_MOD, "remove_rec: %d, %d\n", + fd->record, fd->keylength + fd->entrylength); if (!--node->num_recs) { hfs_bnode_unlink(node); if (!node->parent) @@ -240,7 +240,7 @@ static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd) if (IS_ERR(new_node)) return new_node; hfs_bnode_get(node); - dprint(DBG_BNODE_MOD, "split_nodes: %d - %d - %d\n", + hfs_dbg(BNODE_MOD, "split_nodes: %d - %d - %d\n", node->this, new_node->this, node->next); new_node->next = node->next; new_node->prev = node->this; @@ -374,7 +374,8 @@ again: newkeylen = (hfs_bnode_read_u8(node, 14) | 1) + 1; else fd->keylength = newkeylen = tree->max_key_len + 1; - dprint(DBG_BNODE_MOD, "update_rec: %d, %d, %d\n", rec, fd->keylength, newkeylen); + hfs_dbg(BNODE_MOD, "update_rec: %d, %d, %d\n", + rec, fd->keylength, newkeylen); rec_off = tree->node_size - (rec + 2) * 2; end_rec_off = tree->node_size - (parent->num_recs + 1) * 2; @@ -385,7 +386,7 @@ again: end_off = hfs_bnode_read_u16(parent, end_rec_off); if (end_rec_off - end_off < diff) { - printk(KERN_DEBUG "hfs: splitting index node...\n"); + printk(KERN_DEBUG "splitting index node...\n"); fd->bnode = parent; new_node = hfs_bnode_split(fd); if (IS_ERR(new_node)) diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c index 1cbdeea1db44..1ab19e660e69 100644 --- a/fs/hfs/btree.c +++ b/fs/hfs/btree.c @@ -48,7 +48,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke mdb->drXTFlSize, be32_to_cpu(mdb->drXTClpSiz)); if (HFS_I(tree->inode)->alloc_blocks > HFS_I(tree->inode)->first_blocks) { - printk(KERN_ERR "hfs: invalid btree extent records\n"); + pr_err("invalid btree extent records\n"); unlock_new_inode(tree->inode); goto free_inode; } @@ -60,8 +60,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke mdb->drCTFlSize, be32_to_cpu(mdb->drCTClpSiz)); if (!HFS_I(tree->inode)->first_blocks) { - printk(KERN_ERR "hfs: invalid btree extent records " - "(0 size).\n"); + pr_err("invalid btree extent records (0 size)\n"); unlock_new_inode(tree->inode); goto free_inode; } @@ -100,15 +99,15 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke switch (id) { case HFS_EXT_CNID: if (tree->max_key_len != HFS_MAX_EXT_KEYLEN) { - printk(KERN_ERR "hfs: invalid extent max_key_len %d\n", - tree->max_key_len); + pr_err("invalid extent max_key_len %d\n", + tree->max_key_len); goto fail_page; } break; case HFS_CAT_CNID: if (tree->max_key_len != HFS_MAX_CAT_KEYLEN) { - printk(KERN_ERR "hfs: invalid catalog max_key_len %d\n", - tree->max_key_len); + pr_err("invalid catalog max_key_len %d\n", + tree->max_key_len); goto fail_page; } break; @@ -146,8 +145,9 @@ void hfs_btree_close(struct hfs_btree *tree) while ((node = tree->node_hash[i])) { tree->node_hash[i] = node->next_hash; if (atomic_read(&node->refcnt)) - printk(KERN_ERR "hfs: node %d:%d still has %d user(s)!\n", - node->tree->cnid, node->this, atomic_read(&node->refcnt)); + pr_err("node %d:%d still has %d user(s)!\n", + node->tree->cnid, node->this, + atomic_read(&node->refcnt)); hfs_bnode_free(node); tree->node_hash_cnt--; } @@ -290,7 +290,7 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) kunmap(*pagep); nidx = node->next; if (!nidx) { - printk(KERN_DEBUG "hfs: create new bmap node...\n"); + printk(KERN_DEBUG "create new bmap node...\n"); next_node = hfs_bmap_new_bmap(node, idx); } else next_node = hfs_bnode_find(tree, nidx); @@ -316,7 +316,7 @@ void hfs_bmap_free(struct hfs_bnode *node) u32 nidx; u8 *data, byte, m; - dprint(DBG_BNODE_MOD, "btree_free_node: %u\n", node->this); + hfs_dbg(BNODE_MOD, "btree_free_node: %u\n", node->this); tree = node->tree; nidx = node->this; node = hfs_bnode_find(tree, 0); @@ -331,7 +331,8 @@ void hfs_bmap_free(struct hfs_bnode *node) hfs_bnode_put(node); if (!i) { /* panic */; - printk(KERN_CRIT "hfs: unable to free bnode %u. bmap not found!\n", node->this); + pr_crit("unable to free bnode %u. bmap not found!\n", + node->this); return; } node = hfs_bnode_find(tree, i); @@ -339,7 +340,8 @@ void hfs_bmap_free(struct hfs_bnode *node) return; if (node->type != HFS_NODE_MAP) { /* panic */; - printk(KERN_CRIT "hfs: invalid bmap found! (%u,%d)\n", node->this, node->type); + pr_crit("invalid bmap found! (%u,%d)\n", + node->this, node->type); hfs_bnode_put(node); return; } @@ -352,7 +354,8 @@ void hfs_bmap_free(struct hfs_bnode *node) m = 1 << (~nidx & 7); byte = data[off]; if (!(byte & m)) { - printk(KERN_CRIT "hfs: trying to free free bnode %u(%d)\n", node->this, node->type); + pr_crit("trying to free free bnode %u(%d)\n", + node->this, node->type); kunmap(page); hfs_bnode_put(node); return; diff --git a/fs/hfs/catalog.c b/fs/hfs/catalog.c index 424b0337f524..ff0316b925a5 100644 --- a/fs/hfs/catalog.c +++ b/fs/hfs/catalog.c @@ -87,12 +87,15 @@ int hfs_cat_create(u32 cnid, struct inode *dir, struct qstr *str, struct inode * int entry_size; int err; - dprint(DBG_CAT_MOD, "create_cat: %s,%u(%d)\n", str->name, cnid, inode->i_nlink); + hfs_dbg(CAT_MOD, "create_cat: %s,%u(%d)\n", + str->name, cnid, inode->i_nlink); if (dir->i_size >= HFS_MAX_VALENCE) return -ENOSPC; sb = dir->i_sb; - hfs_find_init(HFS_SB(sb)->cat_tree, &fd); + err = hfs_find_init(HFS_SB(sb)->cat_tree, &fd); + if (err) + return err; hfs_cat_build_key(sb, fd.search_key, cnid, NULL); entry_size = hfs_cat_build_thread(sb, &entry, S_ISDIR(inode->i_mode) ? @@ -184,14 +187,14 @@ int hfs_cat_find_brec(struct super_block *sb, u32 cnid, type = rec.type; if (type != HFS_CDR_THD && type != HFS_CDR_FTH) { - printk(KERN_ERR "hfs: found bad thread record in catalog\n"); + pr_err("found bad thread record in catalog\n"); return -EIO; } fd->search_key->cat.ParID = rec.thread.ParID; len = fd->search_key->cat.CName.len = rec.thread.CName.len; if (len > HFS_NAMELEN) { - printk(KERN_ERR "hfs: bad catalog namelength\n"); + pr_err("bad catalog namelength\n"); return -EIO; } memcpy(fd->search_key->cat.CName.name, rec.thread.CName.name, len); @@ -212,9 +215,11 @@ int hfs_cat_delete(u32 cnid, struct inode *dir, struct qstr *str) struct list_head *pos; int res, type; - dprint(DBG_CAT_MOD, "delete_cat: %s,%u\n", str ? str->name : NULL, cnid); + hfs_dbg(CAT_MOD, "delete_cat: %s,%u\n", str ? str->name : NULL, cnid); sb = dir->i_sb; - hfs_find_init(HFS_SB(sb)->cat_tree, &fd); + res = hfs_find_init(HFS_SB(sb)->cat_tree, &fd); + if (res) + return res; hfs_cat_build_key(sb, fd.search_key, dir->i_ino, str); res = hfs_brec_find(&fd); @@ -278,10 +283,13 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, struct qstr *src_name, int entry_size, type; int err; - dprint(DBG_CAT_MOD, "rename_cat: %u - %lu,%s - %lu,%s\n", cnid, src_dir->i_ino, src_name->name, + hfs_dbg(CAT_MOD, "rename_cat: %u - %lu,%s - %lu,%s\n", + cnid, src_dir->i_ino, src_name->name, dst_dir->i_ino, dst_name->name); sb = src_dir->i_sb; - hfs_find_init(HFS_SB(sb)->cat_tree, &src_fd); + err = hfs_find_init(HFS_SB(sb)->cat_tree, &src_fd); + if (err) + return err; dst_fd = src_fd; /* find the old dir entry and read the data */ diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index 5f7f1abd5f6d..17c22a8fd40a 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c @@ -25,7 +25,9 @@ static struct dentry *hfs_lookup(struct inode *dir, struct dentry *dentry, struct inode *inode = NULL; int res; - hfs_find_init(HFS_SB(dir->i_sb)->cat_tree, &fd); + res = hfs_find_init(HFS_SB(dir->i_sb)->cat_tree, &fd); + if (res) + return ERR_PTR(res); hfs_cat_build_key(dir->i_sb, fd.search_key, dir->i_ino, &dentry->d_name); res = hfs_brec_read(&fd, &rec, sizeof(rec)); if (res) { @@ -63,7 +65,9 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (filp->f_pos >= inode->i_size) return 0; - hfs_find_init(HFS_SB(sb)->cat_tree, &fd); + err = hfs_find_init(HFS_SB(sb)->cat_tree, &fd); + if (err) + return err; hfs_cat_build_key(sb, fd.search_key, inode->i_ino, NULL); err = hfs_brec_find(&fd); if (err) @@ -84,12 +88,12 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir) hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength); if (entry.type != HFS_CDR_THD) { - printk(KERN_ERR "hfs: bad catalog folder thread\n"); + pr_err("bad catalog folder thread\n"); err = -EIO; goto out; } //if (fd.entrylength < HFS_MIN_THREAD_SZ) { - // printk(KERN_ERR "hfs: truncated catalog thread\n"); + // pr_err("truncated catalog thread\n"); // err = -EIO; // goto out; //} @@ -108,7 +112,7 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir) for (;;) { if (be32_to_cpu(fd.key->cat.ParID) != inode->i_ino) { - printk(KERN_ERR "hfs: walked past end of dir\n"); + pr_err("walked past end of dir\n"); err = -EIO; goto out; } @@ -123,7 +127,7 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir) len = hfs_mac2asc(sb, strbuf, &fd.key->cat.CName); if (type == HFS_CDR_DIR) { if (fd.entrylength < sizeof(struct hfs_cat_dir)) { - printk(KERN_ERR "hfs: small dir entry\n"); + pr_err("small dir entry\n"); err = -EIO; goto out; } @@ -132,7 +136,7 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir) break; } else if (type == HFS_CDR_FIL) { if (fd.entrylength < sizeof(struct hfs_cat_file)) { - printk(KERN_ERR "hfs: small file entry\n"); + pr_err("small file entry\n"); err = -EIO; goto out; } @@ -140,7 +144,7 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir) be32_to_cpu(entry.file.FlNum), DT_REG)) break; } else { - printk(KERN_ERR "hfs: bad catalog entry type %d\n", type); + pr_err("bad catalog entry type %d\n", type); err = -EIO; goto out; } diff --git a/fs/hfs/extent.c b/fs/hfs/extent.c index a67955a0c36f..e33a0d36a93e 100644 --- a/fs/hfs/extent.c +++ b/fs/hfs/extent.c @@ -107,7 +107,7 @@ static u16 hfs_ext_lastblock(struct hfs_extent *ext) return be16_to_cpu(ext->block) + be16_to_cpu(ext->count); } -static void __hfs_ext_write_extent(struct inode *inode, struct hfs_find_data *fd) +static int __hfs_ext_write_extent(struct inode *inode, struct hfs_find_data *fd) { int res; @@ -116,26 +116,31 @@ static void __hfs_ext_write_extent(struct inode *inode, struct hfs_find_data *fd res = hfs_brec_find(fd); if (HFS_I(inode)->flags & HFS_FLG_EXT_NEW) { if (res != -ENOENT) - return; + return res; hfs_brec_insert(fd, HFS_I(inode)->cached_extents, sizeof(hfs_extent_rec)); HFS_I(inode)->flags &= ~(HFS_FLG_EXT_DIRTY|HFS_FLG_EXT_NEW); } else { if (res) - return; + return res; hfs_bnode_write(fd->bnode, HFS_I(inode)->cached_extents, fd->entryoffset, fd->entrylength); HFS_I(inode)->flags &= ~HFS_FLG_EXT_DIRTY; } + return 0; } -void hfs_ext_write_extent(struct inode *inode) +int hfs_ext_write_extent(struct inode *inode) { struct hfs_find_data fd; + int res = 0; if (HFS_I(inode)->flags & HFS_FLG_EXT_DIRTY) { - hfs_find_init(HFS_SB(inode->i_sb)->ext_tree, &fd); - __hfs_ext_write_extent(inode, &fd); + res = hfs_find_init(HFS_SB(inode->i_sb)->ext_tree, &fd); + if (res) + return res; + res = __hfs_ext_write_extent(inode, &fd); hfs_find_exit(&fd); } + return res; } static inline int __hfs_ext_read_extent(struct hfs_find_data *fd, struct hfs_extent *extent, @@ -161,8 +166,11 @@ static inline int __hfs_ext_cache_extent(struct hfs_find_data *fd, struct inode { int res; - if (HFS_I(inode)->flags & HFS_FLG_EXT_DIRTY) - __hfs_ext_write_extent(inode, fd); + if (HFS_I(inode)->flags & HFS_FLG_EXT_DIRTY) { + res = __hfs_ext_write_extent(inode, fd); + if (res) + return res; + } res = __hfs_ext_read_extent(fd, HFS_I(inode)->cached_extents, inode->i_ino, block, HFS_IS_RSRC(inode) ? HFS_FK_RSRC : HFS_FK_DATA); @@ -185,9 +193,11 @@ static int hfs_ext_read_extent(struct inode *inode, u16 block) block < HFS_I(inode)->cached_start + HFS_I(inode)->cached_blocks) return 0; - hfs_find_init(HFS_SB(inode->i_sb)->ext_tree, &fd); - res = __hfs_ext_cache_extent(&fd, inode, block); - hfs_find_exit(&fd); + res = hfs_find_init(HFS_SB(inode->i_sb)->ext_tree, &fd); + if (!res) { + res = __hfs_ext_cache_extent(&fd, inode, block); + hfs_find_exit(&fd); + } return res; } @@ -195,11 +205,12 @@ static void hfs_dump_extent(struct hfs_extent *extent) { int i; - dprint(DBG_EXTENT, " "); + hfs_dbg(EXTENT, " "); for (i = 0; i < 3; i++) - dprint(DBG_EXTENT, " %u:%u", be16_to_cpu(extent[i].block), - be16_to_cpu(extent[i].count)); - dprint(DBG_EXTENT, "\n"); + hfs_dbg_cont(EXTENT, " %u:%u", + be16_to_cpu(extent[i].block), + be16_to_cpu(extent[i].count)); + hfs_dbg_cont(EXTENT, "\n"); } static int hfs_add_extent(struct hfs_extent *extent, u16 offset, @@ -298,7 +309,9 @@ int hfs_free_fork(struct super_block *sb, struct hfs_cat_file *file, int type) if (total_blocks == blocks) return 0; - hfs_find_init(HFS_SB(sb)->ext_tree, &fd); + res = hfs_find_init(HFS_SB(sb)->ext_tree, &fd); + if (res) + return res; do { res = __hfs_ext_read_extent(&fd, extent, cnid, total_blocks, type); if (res) @@ -392,10 +405,10 @@ int hfs_extend_file(struct inode *inode) goto out; } - dprint(DBG_EXTENT, "extend %lu: %u,%u\n", inode->i_ino, start, len); + hfs_dbg(EXTENT, "extend %lu: %u,%u\n", inode->i_ino, start, len); if (HFS_I(inode)->alloc_blocks == HFS_I(inode)->first_blocks) { if (!HFS_I(inode)->first_blocks) { - dprint(DBG_EXTENT, "first extents\n"); + hfs_dbg(EXTENT, "first extents\n"); /* no extents yet */ HFS_I(inode)->first_extents[0].block = cpu_to_be16(start); HFS_I(inode)->first_extents[0].count = cpu_to_be16(len); @@ -437,8 +450,10 @@ out: return res; insert_extent: - dprint(DBG_EXTENT, "insert new extent\n"); - hfs_ext_write_extent(inode); + hfs_dbg(EXTENT, "insert new extent\n"); + res = hfs_ext_write_extent(inode); + if (res) + goto out; memset(HFS_I(inode)->cached_extents, 0, sizeof(hfs_extent_rec)); HFS_I(inode)->cached_extents[0].block = cpu_to_be16(start); @@ -460,13 +475,13 @@ void hfs_file_truncate(struct inode *inode) u32 size; int res; - dprint(DBG_INODE, "truncate: %lu, %Lu -> %Lu\n", inode->i_ino, - (long long)HFS_I(inode)->phys_size, inode->i_size); + hfs_dbg(INODE, "truncate: %lu, %Lu -> %Lu\n", + inode->i_ino, (long long)HFS_I(inode)->phys_size, + inode->i_size); if (inode->i_size > HFS_I(inode)->phys_size) { struct address_space *mapping = inode->i_mapping; void *fsdata; struct page *page; - int res; /* XXX: Can use generic_cont_expand? */ size = inode->i_size - 1; @@ -488,7 +503,12 @@ void hfs_file_truncate(struct inode *inode) goto out; mutex_lock(&HFS_I(inode)->extents_lock); - hfs_find_init(HFS_SB(sb)->ext_tree, &fd); + res = hfs_find_init(HFS_SB(sb)->ext_tree, &fd); + if (res) { + mutex_unlock(&HFS_I(inode)->extents_lock); + /* XXX: We lack error handling of hfs_file_truncate() */ + return; + } while (1) { if (alloc_cnt == HFS_I(inode)->first_blocks) { hfs_free_extents(sb, HFS_I(inode)->first_extents, diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index 693df9fe52b2..a73b11839a41 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h @@ -9,6 +9,12 @@ #ifndef _LINUX_HFS_FS_H #define _LINUX_HFS_FS_H +#ifdef pr_fmt +#undef pr_fmt +#endif + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/slab.h> #include <linux/types.h> #include <linux/mutex.h> @@ -34,8 +40,18 @@ //#define DBG_MASK (DBG_CAT_MOD|DBG_BNODE_REFS|DBG_INODE|DBG_EXTENT) #define DBG_MASK (0) -#define dprint(flg, fmt, args...) \ - if (flg & DBG_MASK) printk(fmt , ## args) +#define hfs_dbg(flg, fmt, ...) \ +do { \ + if (DBG_##flg & DBG_MASK) \ + printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \ +} while (0) + +#define hfs_dbg_cont(flg, fmt, ...) \ +do { \ + if (DBG_##flg & DBG_MASK) \ + pr_cont(fmt, ##__VA_ARGS__); \ +} while (0) + /* * struct hfs_inode_info @@ -174,7 +190,7 @@ extern const struct inode_operations hfs_dir_inode_operations; /* extent.c */ extern int hfs_ext_keycmp(const btree_key *, const btree_key *); extern int hfs_free_fork(struct super_block *, struct hfs_cat_file *, int); -extern void hfs_ext_write_extent(struct inode *); +extern int hfs_ext_write_extent(struct inode *); extern int hfs_extend_file(struct inode *); extern void hfs_file_truncate(struct inode *); diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 3031dfdd2358..f9299d8a64e3 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -14,6 +14,7 @@ #include <linux/pagemap.h> #include <linux/mpage.h> #include <linux/sched.h> +#include <linux/aio.h> #include "hfs_fs.h" #include "btree.h" @@ -237,7 +238,7 @@ void hfs_delete_inode(struct inode *inode) { struct super_block *sb = inode->i_sb; - dprint(DBG_INODE, "delete_inode: %lu\n", inode->i_ino); + hfs_dbg(INODE, "delete_inode: %lu\n", inode->i_ino); if (S_ISDIR(inode->i_mode)) { HFS_SB(sb)->folder_count--; if (HFS_I(inode)->cat_key.ParID == cpu_to_be32(HFS_ROOT_CNID)) @@ -416,9 +417,12 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) struct inode *main_inode = inode; struct hfs_find_data fd; hfs_cat_rec rec; + int res; - dprint(DBG_INODE, "hfs_write_inode: %lu\n", inode->i_ino); - hfs_ext_write_extent(inode); + hfs_dbg(INODE, "hfs_write_inode: %lu\n", inode->i_ino); + res = hfs_ext_write_extent(inode); + if (res) + return res; if (inode->i_ino < HFS_FIRSTUSER_CNID) { switch (inode->i_ino) { @@ -515,7 +519,11 @@ static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry, if (!inode) return ERR_PTR(-ENOMEM); - hfs_find_init(HFS_SB(dir->i_sb)->cat_tree, &fd); + res = hfs_find_init(HFS_SB(dir->i_sb)->cat_tree, &fd); + if (res) { + iput(inode); + return ERR_PTR(res); + } fd.search_key->cat = HFS_I(dir)->cat_key; res = hfs_brec_read(&fd, &rec, sizeof(rec)); if (!res) { diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c index b7ec224910c5..aa3f0d6d043c 100644 --- a/fs/hfs/mdb.c +++ b/fs/hfs/mdb.c @@ -48,7 +48,7 @@ static int hfs_get_last_session(struct super_block *sb, *start = (sector_t)te.cdte_addr.lba << 2; return 0; } - printk(KERN_ERR "hfs: invalid session number or type of track\n"); + pr_err("invalid session number or type of track\n"); return -EINVAL; } ms_info.addr_format = CDROM_LBA; @@ -101,7 +101,7 @@ int hfs_mdb_get(struct super_block *sb) HFS_SB(sb)->alloc_blksz = size = be32_to_cpu(mdb->drAlBlkSiz); if (!size || (size & (HFS_SECTOR_SIZE - 1))) { - printk(KERN_ERR "hfs: bad allocation block size %d\n", size); + pr_err("bad allocation block size %d\n", size); goto out_bh; } @@ -118,7 +118,7 @@ int hfs_mdb_get(struct super_block *sb) size >>= 1; brelse(bh); if (!sb_set_blocksize(sb, size)) { - printk(KERN_ERR "hfs: unable to set blocksize to %u\n", size); + pr_err("unable to set blocksize to %u\n", size); goto out; } @@ -162,8 +162,8 @@ int hfs_mdb_get(struct super_block *sb) } if (!HFS_SB(sb)->alt_mdb) { - printk(KERN_WARNING "hfs: unable to locate alternate MDB\n"); - printk(KERN_WARNING "hfs: continuing without an alternate MDB\n"); + pr_warn("unable to locate alternate MDB\n"); + pr_warn("continuing without an alternate MDB\n"); } HFS_SB(sb)->bitmap = (__be32 *)__get_free_pages(GFP_KERNEL, PAGE_SIZE < 8192 ? 1 : 0); @@ -178,7 +178,7 @@ int hfs_mdb_get(struct super_block *sb) while (size) { bh = sb_bread(sb, off >> sb->s_blocksize_bits); if (!bh) { - printk(KERN_ERR "hfs: unable to read volume bitmap\n"); + pr_err("unable to read volume bitmap\n"); goto out; } off2 = off & (sb->s_blocksize - 1); @@ -192,23 +192,22 @@ int hfs_mdb_get(struct super_block *sb) HFS_SB(sb)->ext_tree = hfs_btree_open(sb, HFS_EXT_CNID, hfs_ext_keycmp); if (!HFS_SB(sb)->ext_tree) { - printk(KERN_ERR "hfs: unable to open extent tree\n"); + pr_err("unable to open extent tree\n"); goto out; } HFS_SB(sb)->cat_tree = hfs_btree_open(sb, HFS_CAT_CNID, hfs_cat_keycmp); if (!HFS_SB(sb)->cat_tree) { - printk(KERN_ERR "hfs: unable to open catalog tree\n"); + pr_err("unable to open catalog tree\n"); goto out; } attrib = mdb->drAtrb; if (!(attrib & cpu_to_be16(HFS_SB_ATTRIB_UNMNT))) { - printk(KERN_WARNING "hfs: filesystem was not cleanly unmounted, " - "running fsck.hfs is recommended. mounting read-only.\n"); + pr_warn("filesystem was not cleanly unmounted, running fsck.hfs is recommended. mounting read-only.\n"); sb->s_flags |= MS_RDONLY; } if ((attrib & cpu_to_be16(HFS_SB_ATTRIB_SLOCK))) { - printk(KERN_WARNING "hfs: filesystem is marked locked, mounting read-only.\n"); + pr_warn("filesystem is marked locked, mounting read-only.\n"); sb->s_flags |= MS_RDONLY; } if (!(sb->s_flags & MS_RDONLY)) { @@ -312,7 +311,7 @@ void hfs_mdb_commit(struct super_block *sb) while (size) { bh = sb_bread(sb, block); if (!bh) { - printk(KERN_ERR "hfs: unable to read volume bitmap\n"); + pr_err("unable to read volume bitmap\n"); break; } len = min((int)sb->s_blocksize - off, size); diff --git a/fs/hfs/super.c b/fs/hfs/super.c index bbaaa8a4ee64..2d2039e754cd 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -117,12 +117,11 @@ static int hfs_remount(struct super_block *sb, int *flags, char *data) return 0; if (!(*flags & MS_RDONLY)) { if (!(HFS_SB(sb)->mdb->drAtrb & cpu_to_be16(HFS_SB_ATTRIB_UNMNT))) { - printk(KERN_WARNING "hfs: filesystem was not cleanly unmounted, " - "running fsck.hfs is recommended. leaving read-only.\n"); + pr_warn("filesystem was not cleanly unmounted, running fsck.hfs is recommended. leaving read-only.\n"); sb->s_flags |= MS_RDONLY; *flags |= MS_RDONLY; } else if (HFS_SB(sb)->mdb->drAtrb & cpu_to_be16(HFS_SB_ATTRIB_SLOCK)) { - printk(KERN_WARNING "hfs: filesystem is marked locked, leaving read-only.\n"); + pr_warn("filesystem is marked locked, leaving read-only.\n"); sb->s_flags |= MS_RDONLY; *flags |= MS_RDONLY; } @@ -253,29 +252,29 @@ static int parse_options(char *options, struct hfs_sb_info *hsb) switch (token) { case opt_uid: if (match_int(&args[0], &tmp)) { - printk(KERN_ERR "hfs: uid requires an argument\n"); + pr_err("uid requires an argument\n"); return 0; } hsb->s_uid = make_kuid(current_user_ns(), (uid_t)tmp); if (!uid_valid(hsb->s_uid)) { - printk(KERN_ERR "hfs: invalid uid %d\n", tmp); + pr_err("invalid uid %d\n", tmp); return 0; } break; case opt_gid: if (match_int(&args[0], &tmp)) { - printk(KERN_ERR "hfs: gid requires an argument\n"); + pr_err("gid requires an argument\n"); return 0; } hsb->s_gid = make_kgid(current_user_ns(), (gid_t)tmp); if (!gid_valid(hsb->s_gid)) { - printk(KERN_ERR "hfs: invalid gid %d\n", tmp); + pr_err("invalid gid %d\n", tmp); return 0; } break; case opt_umask: if (match_octal(&args[0], &tmp)) { - printk(KERN_ERR "hfs: umask requires a value\n"); + pr_err("umask requires a value\n"); return 0; } hsb->s_file_umask = (umode_t)tmp; @@ -283,39 +282,39 @@ static int parse_options(char *options, struct hfs_sb_info *hsb) break; case opt_file_umask: if (match_octal(&args[0], &tmp)) { - printk(KERN_ERR "hfs: file_umask requires a value\n"); + pr_err("file_umask requires a value\n"); return 0; } hsb->s_file_umask = (umode_t)tmp; break; case opt_dir_umask: if (match_octal(&args[0], &tmp)) { - printk(KERN_ERR "hfs: dir_umask requires a value\n"); + pr_err("dir_umask requires a value\n"); return 0; } hsb->s_dir_umask = (umode_t)tmp; break; case opt_part: if (match_int(&args[0], &hsb->part)) { - printk(KERN_ERR "hfs: part requires an argument\n"); + pr_err("part requires an argument\n"); return 0; } break; case opt_session: if (match_int(&args[0], &hsb->session)) { - printk(KERN_ERR "hfs: session requires an argument\n"); + pr_err("session requires an argument\n"); return 0; } break; case opt_type: if (match_fourchar(&args[0], &hsb->s_type)) { - printk(KERN_ERR "hfs: type requires a 4 character value\n"); + pr_err("type requires a 4 character value\n"); return 0; } break; case opt_creator: if (match_fourchar(&args[0], &hsb->s_creator)) { - printk(KERN_ERR "hfs: creator requires a 4 character value\n"); + pr_err("creator requires a 4 character value\n"); return 0; } break; @@ -324,14 +323,14 @@ static int parse_options(char *options, struct hfs_sb_info *hsb) break; case opt_codepage: if (hsb->nls_disk) { - printk(KERN_ERR "hfs: unable to change codepage\n"); + pr_err("unable to change codepage\n"); return 0; } p = match_strdup(&args[0]); if (p) hsb->nls_disk = load_nls(p); if (!hsb->nls_disk) { - printk(KERN_ERR "hfs: unable to load codepage \"%s\"\n", p); + pr_err("unable to load codepage \"%s\"\n", p); kfree(p); return 0; } @@ -339,14 +338,14 @@ static int parse_options(char *options, struct hfs_sb_info *hsb) break; case opt_iocharset: if (hsb->nls_io) { - printk(KERN_ERR "hfs: unable to change iocharset\n"); + pr_err("unable to change iocharset\n"); return 0; } p = match_strdup(&args[0]); if (p) hsb->nls_io = load_nls(p); if (!hsb->nls_io) { - printk(KERN_ERR "hfs: unable to load iocharset \"%s\"\n", p); + pr_err("unable to load iocharset \"%s\"\n", p); kfree(p); return 0; } @@ -360,7 +359,7 @@ static int parse_options(char *options, struct hfs_sb_info *hsb) if (hsb->nls_disk && !hsb->nls_io) { hsb->nls_io = load_nls_default(); if (!hsb->nls_io) { - printk(KERN_ERR "hfs: unable to load default iocharset\n"); + pr_err("unable to load default iocharset\n"); return 0; } } @@ -400,7 +399,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent) res = -EINVAL; if (!parse_options((char *)data, sbi)) { - printk(KERN_ERR "hfs: unable to parse mount options.\n"); + pr_err("unable to parse mount options\n"); goto bail; } @@ -411,14 +410,16 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent) res = hfs_mdb_get(sb); if (res) { if (!silent) - printk(KERN_WARNING "hfs: can't find a HFS filesystem on dev %s.\n", + pr_warn("can't find a HFS filesystem on dev %s\n", hfs_mdb_name(sb)); res = -EINVAL; goto bail; } /* try to get the root inode */ - hfs_find_init(HFS_SB(sb)->cat_tree, &fd); + res = hfs_find_init(HFS_SB(sb)->cat_tree, &fd); + if (res) + goto bail_no_root; res = hfs_cat_find_brec(sb, HFS_ROOT_CNID, &fd); if (!res) { if (fd.entrylength > sizeof(rec) || fd.entrylength < 0) { @@ -447,7 +448,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent) return 0; bail_no_root: - printk(KERN_ERR "hfs: get root inode failed.\n"); + pr_err("get root inode failed\n"); bail: hfs_mdb_put(sb); return res; diff --git a/fs/hfsplus/attributes.c b/fs/hfsplus/attributes.c index 8d691f124714..0f47890299c4 100644 --- a/fs/hfsplus/attributes.c +++ b/fs/hfsplus/attributes.c @@ -56,7 +56,7 @@ int hfsplus_attr_build_key(struct super_block *sb, hfsplus_btree_key *key, if (name) { len = strlen(name); if (len > HFSPLUS_ATTR_MAX_STRLEN) { - printk(KERN_ERR "hfs: invalid xattr name's length\n"); + pr_err("invalid xattr name's length\n"); return -EINVAL; } hfsplus_asc2uni(sb, @@ -166,10 +166,10 @@ int hfsplus_find_attr(struct super_block *sb, u32 cnid, { int err = 0; - dprint(DBG_ATTR_MOD, "find_attr: %s,%d\n", name ? name : NULL, cnid); + hfs_dbg(ATTR_MOD, "find_attr: %s,%d\n", name ? name : NULL, cnid); if (!HFSPLUS_SB(sb)->attr_tree) { - printk(KERN_ERR "hfs: attributes file doesn't exist\n"); + pr_err("attributes file doesn't exist\n"); return -EINVAL; } @@ -228,11 +228,11 @@ int hfsplus_create_attr(struct inode *inode, int entry_size; int err; - dprint(DBG_ATTR_MOD, "create_attr: %s,%ld\n", + hfs_dbg(ATTR_MOD, "create_attr: %s,%ld\n", name ? name : NULL, inode->i_ino); if (!HFSPLUS_SB(sb)->attr_tree) { - printk(KERN_ERR "hfs: attributes file doesn't exist\n"); + pr_err("attributes file doesn't exist\n"); return -EINVAL; } @@ -307,10 +307,10 @@ static int __hfsplus_delete_attr(struct inode *inode, u32 cnid, break; case HFSPLUS_ATTR_FORK_DATA: case HFSPLUS_ATTR_EXTENTS: - printk(KERN_ERR "hfs: only inline data xattr are supported\n"); + pr_err("only inline data xattr are supported\n"); return -EOPNOTSUPP; default: - printk(KERN_ERR "hfs: invalid extended attribute record\n"); + pr_err("invalid extended attribute record\n"); return -ENOENT; } @@ -328,11 +328,11 @@ int hfsplus_delete_attr(struct inode *inode, const char *name) struct super_block *sb = inode->i_sb; struct hfs_find_data fd; - dprint(DBG_ATTR_MOD, "delete_attr: %s,%ld\n", + hfs_dbg(ATTR_MOD, "delete_attr: %s,%ld\n", name ? name : NULL, inode->i_ino); if (!HFSPLUS_SB(sb)->attr_tree) { - printk(KERN_ERR "hfs: attributes file doesn't exist\n"); + pr_err("attributes file doesn't exist\n"); return -EINVAL; } @@ -346,7 +346,7 @@ int hfsplus_delete_attr(struct inode *inode, const char *name) if (err) goto out; } else { - printk(KERN_ERR "hfs: invalid extended attribute name\n"); + pr_err("invalid extended attribute name\n"); err = -EINVAL; goto out; } @@ -369,10 +369,10 @@ int hfsplus_delete_all_attrs(struct inode *dir, u32 cnid) int err = 0; struct hfs_find_data fd; - dprint(DBG_ATTR_MOD, "delete_all_attrs: %d\n", cnid); + hfs_dbg(ATTR_MOD, "delete_all_attrs: %d\n", cnid); if (!HFSPLUS_SB(dir->i_sb)->attr_tree) { - printk(KERN_ERR "hfs: attributes file doesn't exist\n"); + pr_err("attributes file doesn't exist\n"); return -EINVAL; } @@ -384,7 +384,7 @@ int hfsplus_delete_all_attrs(struct inode *dir, u32 cnid) err = hfsplus_find_attr(dir->i_sb, cnid, NULL, &fd); if (err) { if (err != -ENOENT) - printk(KERN_ERR "hfs: xattr search failed.\n"); + pr_err("xattr search failed\n"); goto end_delete_all; } diff --git a/fs/hfsplus/bfind.c b/fs/hfsplus/bfind.c index d73c98d1ee99..c1422d91cd36 100644 --- a/fs/hfsplus/bfind.c +++ b/fs/hfsplus/bfind.c @@ -22,7 +22,7 @@ int hfs_find_init(struct hfs_btree *tree, struct hfs_find_data *fd) return -ENOMEM; fd->search_key = ptr; fd->key = ptr + tree->max_key_len + 2; - dprint(DBG_BNODE_REFS, "find_init: %d (%p)\n", + hfs_dbg(BNODE_REFS, "find_init: %d (%p)\n", tree->cnid, __builtin_return_address(0)); switch (tree->cnid) { case HFSPLUS_CAT_CNID: @@ -44,7 +44,7 @@ void hfs_find_exit(struct hfs_find_data *fd) { hfs_bnode_put(fd->bnode); kfree(fd->search_key); - dprint(DBG_BNODE_REFS, "find_exit: %d (%p)\n", + hfs_dbg(BNODE_REFS, "find_exit: %d (%p)\n", fd->tree->cnid, __builtin_return_address(0)); mutex_unlock(&fd->tree->tree_lock); fd->tree = NULL; @@ -56,7 +56,8 @@ int hfs_find_1st_rec_by_cnid(struct hfs_bnode *bnode, int *end, int *cur_rec) { - __be32 cur_cnid, search_cnid; + __be32 cur_cnid; + __be32 search_cnid; if (bnode->tree->cnid == HFSPLUS_EXT_CNID) { cur_cnid = fd->key->ext.cnid; @@ -67,8 +68,11 @@ int hfs_find_1st_rec_by_cnid(struct hfs_bnode *bnode, } else if (bnode->tree->cnid == HFSPLUS_ATTR_CNID) { cur_cnid = fd->key->attr.cnid; search_cnid = fd->search_key->attr.cnid; - } else + } else { + cur_cnid = 0; /* used-uninitialized warning */ + search_cnid = 0; BUG(); + } if (cur_cnid == search_cnid) { (*end) = (*cur_rec); @@ -204,7 +208,7 @@ int hfs_brec_find(struct hfs_find_data *fd, search_strategy_t do_key_compare) return res; invalid: - printk(KERN_ERR "hfs: inconsistency in B*Tree (%d,%d,%d,%u,%u)\n", + pr_err("inconsistency in B*Tree (%d,%d,%d,%u,%u)\n", height, bnode->height, bnode->type, nidx, parent); res = -EIO; release: diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c index 6feefc0cb48a..826e864acb54 100644 --- a/fs/hfsplus/bitmap.c +++ b/fs/hfsplus/bitmap.c @@ -30,7 +30,7 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size, if (!len) return size; - dprint(DBG_BITMAP, "block_allocate: %u,%u,%u\n", size, offset, len); + hfs_dbg(BITMAP, "block_allocate: %u,%u,%u\n", size, offset, len); mutex_lock(&sbi->alloc_mutex); mapping = sbi->alloc_file->i_mapping; page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS, NULL); @@ -89,14 +89,14 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size, else end = pptr + ((size + 31) & (PAGE_CACHE_BITS - 1)) / 32; } - dprint(DBG_BITMAP, "bitmap full\n"); + hfs_dbg(BITMAP, "bitmap full\n"); start = size; goto out; found: start = offset + (curr - pptr) * 32 + i; if (start >= size) { - dprint(DBG_BITMAP, "bitmap full\n"); + hfs_dbg(BITMAP, "bitmap full\n"); goto out; } /* do any partial u32 at the start */ @@ -154,7 +154,7 @@ done: *max = offset + (curr - pptr) * 32 + i - start; sbi->free_blocks -= *max; hfsplus_mark_mdb_dirty(sb); - dprint(DBG_BITMAP, "-> %u,%u\n", start, *max); + hfs_dbg(BITMAP, "-> %u,%u\n", start, *max); out: mutex_unlock(&sbi->alloc_mutex); return start; @@ -173,7 +173,7 @@ int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count) if (!count) return 0; - dprint(DBG_BITMAP, "block_free: %u,%u\n", offset, count); + hfs_dbg(BITMAP, "block_free: %u,%u\n", offset, count); /* are all of the bits in range? */ if ((offset + count) > sbi->total_blocks) return -ENOENT; @@ -238,7 +238,7 @@ out: return 0; kaboom: - printk(KERN_CRIT "hfsplus: unable to mark blocks free: error %ld\n", + pr_crit("hfsplus: unable to mark blocks free: error %ld\n", PTR_ERR(page)); mutex_unlock(&sbi->alloc_mutex); diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c index f31ac6f404f1..11c860204520 100644 --- a/fs/hfsplus/bnode.c +++ b/fs/hfsplus/bnode.c @@ -130,7 +130,7 @@ void hfs_bnode_copy(struct hfs_bnode *dst_node, int dst, struct page **src_page, **dst_page; int l; - dprint(DBG_BNODE_MOD, "copybytes: %u,%u,%u\n", dst, src, len); + hfs_dbg(BNODE_MOD, "copybytes: %u,%u,%u\n", dst, src, len); if (!len) return; tree = src_node->tree; @@ -188,7 +188,7 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len) struct page **src_page, **dst_page; int l; - dprint(DBG_BNODE_MOD, "movebytes: %u,%u,%u\n", dst, src, len); + hfs_dbg(BNODE_MOD, "movebytes: %u,%u,%u\n", dst, src, len); if (!len) return; src += node->page_offset; @@ -302,16 +302,16 @@ void hfs_bnode_dump(struct hfs_bnode *node) __be32 cnid; int i, off, key_off; - dprint(DBG_BNODE_MOD, "bnode: %d\n", node->this); + hfs_dbg(BNODE_MOD, "bnode: %d\n", node->this); hfs_bnode_read(node, &desc, 0, sizeof(desc)); - dprint(DBG_BNODE_MOD, "%d, %d, %d, %d, %d\n", + hfs_dbg(BNODE_MOD, "%d, %d, %d, %d, %d\n", be32_to_cpu(desc.next), be32_to_cpu(desc.prev), desc.type, desc.height, be16_to_cpu(desc.num_recs)); off = node->tree->node_size - 2; for (i = be16_to_cpu(desc.num_recs); i >= 0; off -= 2, i--) { key_off = hfs_bnode_read_u16(node, off); - dprint(DBG_BNODE_MOD, " %d", key_off); + hfs_dbg(BNODE_MOD, " %d", key_off); if (i && node->type == HFS_NODE_INDEX) { int tmp; @@ -320,17 +320,17 @@ void hfs_bnode_dump(struct hfs_bnode *node) tmp = hfs_bnode_read_u16(node, key_off) + 2; else tmp = node->tree->max_key_len + 2; - dprint(DBG_BNODE_MOD, " (%d", tmp); + hfs_dbg_cont(BNODE_MOD, " (%d", tmp); hfs_bnode_read(node, &cnid, key_off + tmp, 4); - dprint(DBG_BNODE_MOD, ",%d)", be32_to_cpu(cnid)); + hfs_dbg_cont(BNODE_MOD, ",%d)", be32_to_cpu(cnid)); } else if (i && node->type == HFS_NODE_LEAF) { int tmp; tmp = hfs_bnode_read_u16(node, key_off); - dprint(DBG_BNODE_MOD, " (%d)", tmp); + hfs_dbg_cont(BNODE_MOD, " (%d)", tmp); } } - dprint(DBG_BNODE_MOD, "\n"); + hfs_dbg_cont(BNODE_MOD, "\n"); } void hfs_bnode_unlink(struct hfs_bnode *node) @@ -366,7 +366,7 @@ void hfs_bnode_unlink(struct hfs_bnode *node) /* move down? */ if (!node->prev && !node->next) - dprint(DBG_BNODE_MOD, "hfs_btree_del_level\n"); + hfs_dbg(BNODE_MOD, "hfs_btree_del_level\n"); if (!node->parent) { tree->root = 0; tree->depth = 0; @@ -386,7 +386,7 @@ struct hfs_bnode *hfs_bnode_findhash(struct hfs_btree *tree, u32 cnid) struct hfs_bnode *node; if (cnid >= tree->node_count) { - printk(KERN_ERR "hfs: request for non-existent node " + pr_err("request for non-existent node " "%d in B*Tree\n", cnid); return NULL; @@ -409,7 +409,7 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid) loff_t off; if (cnid >= tree->node_count) { - printk(KERN_ERR "hfs: request for non-existent node " + pr_err("request for non-existent node " "%d in B*Tree\n", cnid); return NULL; @@ -425,8 +425,8 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid) node->this = cnid; set_bit(HFS_BNODE_NEW, &node->flags); atomic_set(&node->refcnt, 1); - dprint(DBG_BNODE_REFS, "new_node(%d:%d): 1\n", - node->tree->cnid, node->this); + hfs_dbg(BNODE_REFS, "new_node(%d:%d): 1\n", + node->tree->cnid, node->this); init_waitqueue_head(&node->lock_wq); spin_lock(&tree->hash_lock); node2 = hfs_bnode_findhash(tree, cnid); @@ -470,7 +470,7 @@ void hfs_bnode_unhash(struct hfs_bnode *node) { struct hfs_bnode **p; - dprint(DBG_BNODE_REFS, "remove_node(%d:%d): %d\n", + hfs_dbg(BNODE_REFS, "remove_node(%d:%d): %d\n", node->tree->cnid, node->this, atomic_read(&node->refcnt)); for (p = &node->tree->node_hash[hfs_bnode_hash(node->this)]; *p && *p != node; p = &(*p)->next_hash) @@ -588,7 +588,7 @@ struct hfs_bnode *hfs_bnode_create(struct hfs_btree *tree, u32 num) node = hfs_bnode_findhash(tree, num); spin_unlock(&tree->hash_lock); if (node) { - printk(KERN_CRIT "new node %u already hashed?\n", num); + pr_crit("new node %u already hashed?\n", num); WARN_ON(1); return node; } @@ -620,7 +620,7 @@ void hfs_bnode_get(struct hfs_bnode *node) { if (node) { atomic_inc(&node->refcnt); - dprint(DBG_BNODE_REFS, "get_node(%d:%d): %d\n", + hfs_dbg(BNODE_REFS, "get_node(%d:%d): %d\n", node->tree->cnid, node->this, atomic_read(&node->refcnt)); } @@ -633,7 +633,7 @@ void hfs_bnode_put(struct hfs_bnode *node) struct hfs_btree *tree = node->tree; int i; - dprint(DBG_BNODE_REFS, "put_node(%d:%d): %d\n", + hfs_dbg(BNODE_REFS, "put_node(%d:%d): %d\n", node->tree->cnid, node->this, atomic_read(&node->refcnt)); BUG_ON(!atomic_read(&node->refcnt)); diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c index 298d4e45604b..6e560d56094b 100644 --- a/fs/hfsplus/brec.c +++ b/fs/hfsplus/brec.c @@ -45,13 +45,13 @@ u16 hfs_brec_keylen(struct hfs_bnode *node, u16 rec) if (!recoff) return 0; if (recoff > node->tree->node_size - 2) { - printk(KERN_ERR "hfs: recoff %d too large\n", recoff); + pr_err("recoff %d too large\n", recoff); return 0; } retval = hfs_bnode_read_u16(node, recoff) + 2; if (retval > node->tree->max_key_len + 2) { - printk(KERN_ERR "hfs: keylen %d too large\n", + pr_err("keylen %d too large\n", retval); retval = 0; } @@ -90,7 +90,7 @@ again: end_rec_off = tree->node_size - (node->num_recs + 1) * 2; end_off = hfs_bnode_read_u16(node, end_rec_off); end_rec_off -= 2; - dprint(DBG_BNODE_MOD, "insert_rec: %d, %d, %d, %d\n", + hfs_dbg(BNODE_MOD, "insert_rec: %d, %d, %d, %d\n", rec, size, end_off, end_rec_off); if (size > end_rec_off - end_off) { if (new_node) @@ -191,7 +191,7 @@ again: mark_inode_dirty(tree->inode); } hfs_bnode_dump(node); - dprint(DBG_BNODE_MOD, "remove_rec: %d, %d\n", + hfs_dbg(BNODE_MOD, "remove_rec: %d, %d\n", fd->record, fd->keylength + fd->entrylength); if (!--node->num_recs) { hfs_bnode_unlink(node); @@ -244,7 +244,7 @@ static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd) if (IS_ERR(new_node)) return new_node; hfs_bnode_get(node); - dprint(DBG_BNODE_MOD, "split_nodes: %d - %d - %d\n", + hfs_dbg(BNODE_MOD, "split_nodes: %d - %d - %d\n", node->this, new_node->this, node->next); new_node->next = node->next; new_node->prev = node->this; @@ -379,7 +379,7 @@ again: newkeylen = hfs_bnode_read_u16(node, 14) + 2; else fd->keylength = newkeylen = tree->max_key_len + 2; - dprint(DBG_BNODE_MOD, "update_rec: %d, %d, %d\n", + hfs_dbg(BNODE_MOD, "update_rec: %d, %d, %d\n", rec, fd->keylength, newkeylen); rec_off = tree->node_size - (rec + 2) * 2; @@ -391,7 +391,7 @@ again: end_off = hfs_bnode_read_u16(parent, end_rec_off); if (end_rec_off - end_off < diff) { - dprint(DBG_BNODE_MOD, "hfs: splitting index node.\n"); + hfs_dbg(BNODE_MOD, "splitting index node\n"); fd->bnode = parent; new_node = hfs_bnode_split(fd); if (IS_ERR(new_node)) diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c index efb689c21a95..0c6540c91167 100644 --- a/fs/hfsplus/btree.c +++ b/fs/hfsplus/btree.c @@ -40,8 +40,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) tree->inode = inode; if (!HFSPLUS_I(tree->inode)->first_blocks) { - printk(KERN_ERR - "hfs: invalid btree extent records (0 size).\n"); + pr_err("invalid btree extent records (0 size)\n"); goto free_inode; } @@ -68,12 +67,12 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) switch (id) { case HFSPLUS_EXT_CNID: if (tree->max_key_len != HFSPLUS_EXT_KEYLEN - sizeof(u16)) { - printk(KERN_ERR "hfs: invalid extent max_key_len %d\n", + pr_err("invalid extent max_key_len %d\n", tree->max_key_len); goto fail_page; } if (tree->attributes & HFS_TREE_VARIDXKEYS) { - printk(KERN_ERR "hfs: invalid extent btree flag\n"); + pr_err("invalid extent btree flag\n"); goto fail_page; } @@ -81,12 +80,12 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) break; case HFSPLUS_CAT_CNID: if (tree->max_key_len != HFSPLUS_CAT_KEYLEN - sizeof(u16)) { - printk(KERN_ERR "hfs: invalid catalog max_key_len %d\n", + pr_err("invalid catalog max_key_len %d\n", tree->max_key_len); goto fail_page; } if (!(tree->attributes & HFS_TREE_VARIDXKEYS)) { - printk(KERN_ERR "hfs: invalid catalog btree flag\n"); + pr_err("invalid catalog btree flag\n"); goto fail_page; } @@ -100,19 +99,19 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) break; case HFSPLUS_ATTR_CNID: if (tree->max_key_len != HFSPLUS_ATTR_KEYLEN - sizeof(u16)) { - printk(KERN_ERR "hfs: invalid attributes max_key_len %d\n", + pr_err("invalid attributes max_key_len %d\n", tree->max_key_len); goto fail_page; } tree->keycmp = hfsplus_attr_bin_cmp_key; break; default: - printk(KERN_ERR "hfs: unknown B*Tree requested\n"); + pr_err("unknown B*Tree requested\n"); goto fail_page; } if (!(tree->attributes & HFS_TREE_BIGKEYS)) { - printk(KERN_ERR "hfs: invalid btree flag\n"); + pr_err("invalid btree flag\n"); goto fail_page; } @@ -155,7 +154,7 @@ void hfs_btree_close(struct hfs_btree *tree) while ((node = tree->node_hash[i])) { tree->node_hash[i] = node->next_hash; if (atomic_read(&node->refcnt)) - printk(KERN_CRIT "hfs: node %d:%d " + pr_crit("node %d:%d " "still has %d user(s)!\n", node->tree->cnid, node->this, atomic_read(&node->refcnt)); @@ -303,7 +302,7 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) kunmap(*pagep); nidx = node->next; if (!nidx) { - dprint(DBG_BNODE_MOD, "hfs: create new bmap node.\n"); + hfs_dbg(BNODE_MOD, "create new bmap node\n"); next_node = hfs_bmap_new_bmap(node, idx); } else next_node = hfs_bnode_find(tree, nidx); @@ -329,7 +328,7 @@ void hfs_bmap_free(struct hfs_bnode *node) u32 nidx; u8 *data, byte, m; - dprint(DBG_BNODE_MOD, "btree_free_node: %u\n", node->this); + hfs_dbg(BNODE_MOD, "btree_free_node: %u\n", node->this); BUG_ON(!node->this); tree = node->tree; nidx = node->this; @@ -345,7 +344,7 @@ void hfs_bmap_free(struct hfs_bnode *node) hfs_bnode_put(node); if (!i) { /* panic */; - printk(KERN_CRIT "hfs: unable to free bnode %u. " + pr_crit("unable to free bnode %u. " "bmap not found!\n", node->this); return; @@ -355,7 +354,7 @@ void hfs_bmap_free(struct hfs_bnode *node) return; if (node->type != HFS_NODE_MAP) { /* panic */; - printk(KERN_CRIT "hfs: invalid bmap found! " + pr_crit("invalid bmap found! " "(%u,%d)\n", node->this, node->type); hfs_bnode_put(node); @@ -370,7 +369,7 @@ void hfs_bmap_free(struct hfs_bnode *node) m = 1 << (~nidx & 7); byte = data[off]; if (!(byte & m)) { - printk(KERN_CRIT "hfs: trying to free free bnode " + pr_crit("trying to free free bnode " "%u(%d)\n", node->this, node->type); kunmap(page); diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c index 840d71edd193..968ce411db53 100644 --- a/fs/hfsplus/catalog.c +++ b/fs/hfsplus/catalog.c @@ -188,12 +188,12 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid, type = be16_to_cpu(tmp.type); if (type != HFSPLUS_FOLDER_THREAD && type != HFSPLUS_FILE_THREAD) { - printk(KERN_ERR "hfs: found bad thread record in catalog\n"); + pr_err("found bad thread record in catalog\n"); return -EIO; } if (be16_to_cpu(tmp.thread.nodeName.length) > 255) { - printk(KERN_ERR "hfs: catalog name length corrupted\n"); + pr_err("catalog name length corrupted\n"); return -EIO; } @@ -212,7 +212,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir, int entry_size; int err; - dprint(DBG_CAT_MOD, "create_cat: %s,%u(%d)\n", + hfs_dbg(CAT_MOD, "create_cat: %s,%u(%d)\n", str->name, cnid, inode->i_nlink); err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); if (err) @@ -271,8 +271,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) int err, off; u16 type; - dprint(DBG_CAT_MOD, "delete_cat: %s,%u\n", - str ? str->name : NULL, cnid); + hfs_dbg(CAT_MOD, "delete_cat: %s,%u\n", str ? str->name : NULL, cnid); err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); if (err) return err; @@ -361,7 +360,7 @@ int hfsplus_rename_cat(u32 cnid, int entry_size, type; int err; - dprint(DBG_CAT_MOD, "rename_cat: %u - %lu,%s - %lu,%s\n", + hfs_dbg(CAT_MOD, "rename_cat: %u - %lu,%s - %lu,%s\n", cnid, src_dir->i_ino, src_name->name, dst_dir->i_ino, dst_name->name); err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &src_fd); diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 031c24e50521..a37ac934732f 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -103,7 +103,7 @@ again: } else if (!dentry->d_fsdata) dentry->d_fsdata = (void *)(unsigned long)cnid; } else { - printk(KERN_ERR "hfs: invalid catalog entry type in lookup\n"); + pr_err("invalid catalog entry type in lookup\n"); err = -EIO; goto fail; } @@ -159,12 +159,12 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength); if (be16_to_cpu(entry.type) != HFSPLUS_FOLDER_THREAD) { - printk(KERN_ERR "hfs: bad catalog folder thread\n"); + pr_err("bad catalog folder thread\n"); err = -EIO; goto out; } if (fd.entrylength < HFSPLUS_MIN_THREAD_SZ) { - printk(KERN_ERR "hfs: truncated catalog thread\n"); + pr_err("truncated catalog thread\n"); err = -EIO; goto out; } @@ -183,7 +183,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) for (;;) { if (be32_to_cpu(fd.key->cat.parent) != inode->i_ino) { - printk(KERN_ERR "hfs: walked past end of dir\n"); + pr_err("walked past end of dir\n"); err = -EIO; goto out; } @@ -203,7 +203,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) if (type == HFSPLUS_FOLDER) { if (fd.entrylength < sizeof(struct hfsplus_cat_folder)) { - printk(KERN_ERR "hfs: small dir entry\n"); + pr_err("small dir entry\n"); err = -EIO; goto out; } @@ -216,7 +216,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) break; } else if (type == HFSPLUS_FILE) { if (fd.entrylength < sizeof(struct hfsplus_cat_file)) { - printk(KERN_ERR "hfs: small file entry\n"); + pr_err("small file entry\n"); err = -EIO; goto out; } @@ -224,7 +224,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) be32_to_cpu(entry.file.id), DT_REG)) break; } else { - printk(KERN_ERR "hfs: bad catalog entry type\n"); + pr_err("bad catalog entry type\n"); err = -EIO; goto out; } diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index fe0a76213d9e..fbb212fbb1ef 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c @@ -83,7 +83,7 @@ static u32 hfsplus_ext_lastblock(struct hfsplus_extent *ext) return be32_to_cpu(ext->start_block) + be32_to_cpu(ext->block_count); } -static void __hfsplus_ext_write_extent(struct inode *inode, +static int __hfsplus_ext_write_extent(struct inode *inode, struct hfs_find_data *fd) { struct hfsplus_inode_info *hip = HFSPLUS_I(inode); @@ -98,13 +98,13 @@ static void __hfsplus_ext_write_extent(struct inode *inode, res = hfs_brec_find(fd, hfs_find_rec_by_key); if (hip->extent_state & HFSPLUS_EXT_NEW) { if (res != -ENOENT) - return; + return res; hfs_brec_insert(fd, hip->cached_extents, sizeof(hfsplus_extent_rec)); hip->extent_state &= ~(HFSPLUS_EXT_DIRTY | HFSPLUS_EXT_NEW); } else { if (res) - return; + return res; hfs_bnode_write(fd->bnode, hip->cached_extents, fd->entryoffset, fd->entrylength); hip->extent_state &= ~HFSPLUS_EXT_DIRTY; @@ -117,11 +117,13 @@ static void __hfsplus_ext_write_extent(struct inode *inode, * to explicily mark the inode dirty, too. */ set_bit(HFSPLUS_I_EXT_DIRTY, &hip->flags); + + return 0; } static int hfsplus_ext_write_extent_locked(struct inode *inode) { - int res; + int res = 0; if (HFSPLUS_I(inode)->extent_state & HFSPLUS_EXT_DIRTY) { struct hfs_find_data fd; @@ -129,10 +131,10 @@ static int hfsplus_ext_write_extent_locked(struct inode *inode) res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd); if (res) return res; - __hfsplus_ext_write_extent(inode, &fd); + res = __hfsplus_ext_write_extent(inode, &fd); hfs_find_exit(&fd); } - return 0; + return res; } int hfsplus_ext_write_extent(struct inode *inode) @@ -175,8 +177,11 @@ static inline int __hfsplus_ext_cache_extent(struct hfs_find_data *fd, WARN_ON(!mutex_is_locked(&hip->extents_lock)); - if (hip->extent_state & HFSPLUS_EXT_DIRTY) - __hfsplus_ext_write_extent(inode, fd); + if (hip->extent_state & HFSPLUS_EXT_DIRTY) { + res = __hfsplus_ext_write_extent(inode, fd); + if (res) + return res; + } res = __hfsplus_ext_read_extent(fd, hip->cached_extents, inode->i_ino, block, HFSPLUS_IS_RSRC(inode) ? @@ -265,7 +270,7 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock, mutex_unlock(&hip->extents_lock); done: - dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n", + hfs_dbg(EXTENT, "get_block(%lu): %llu - %u\n", inode->i_ino, (long long)iblock, dblock); mask = (1 << sbi->fs_shift) - 1; @@ -288,11 +293,12 @@ static void hfsplus_dump_extent(struct hfsplus_extent *extent) { int i; - dprint(DBG_EXTENT, " "); + hfs_dbg(EXTENT, " "); for (i = 0; i < 8; i++) - dprint(DBG_EXTENT, " %u:%u", be32_to_cpu(extent[i].start_block), - be32_to_cpu(extent[i].block_count)); - dprint(DBG_EXTENT, "\n"); + hfs_dbg_cont(EXTENT, " %u:%u", + be32_to_cpu(extent[i].start_block), + be32_to_cpu(extent[i].block_count)); + hfs_dbg_cont(EXTENT, "\n"); } static int hfsplus_add_extent(struct hfsplus_extent *extent, u32 offset, @@ -348,8 +354,8 @@ found: if (count <= block_nr) { err = hfsplus_block_free(sb, start, count); if (err) { - printk(KERN_ERR "hfs: can't free extent\n"); - dprint(DBG_EXTENT, " start: %u count: %u\n", + pr_err("can't free extent\n"); + hfs_dbg(EXTENT, " start: %u count: %u\n", start, count); } extent->block_count = 0; @@ -359,8 +365,8 @@ found: count -= block_nr; err = hfsplus_block_free(sb, start + count, block_nr); if (err) { - printk(KERN_ERR "hfs: can't free extent\n"); - dprint(DBG_EXTENT, " start: %u count: %u\n", + pr_err("can't free extent\n"); + hfs_dbg(EXTENT, " start: %u count: %u\n", start, count); } extent->block_count = cpu_to_be32(count); @@ -432,7 +438,7 @@ int hfsplus_file_extend(struct inode *inode) if (sbi->alloc_file->i_size * 8 < sbi->total_blocks - sbi->free_blocks + 8) { /* extend alloc file */ - printk(KERN_ERR "hfs: extend alloc file! " + pr_err("extend alloc file! " "(%llu,%u,%u)\n", sbi->alloc_file->i_size * 8, sbi->total_blocks, sbi->free_blocks); @@ -459,11 +465,11 @@ int hfsplus_file_extend(struct inode *inode) } } - dprint(DBG_EXTENT, "extend %lu: %u,%u\n", inode->i_ino, start, len); + hfs_dbg(EXTENT, "extend %lu: %u,%u\n", inode->i_ino, start, len); if (hip->alloc_blocks <= hip->first_blocks) { if (!hip->first_blocks) { - dprint(DBG_EXTENT, "first extents\n"); + hfs_dbg(EXTENT, "first extents\n"); /* no extents yet */ hip->first_extents[0].start_block = cpu_to_be32(start); hip->first_extents[0].block_count = cpu_to_be32(len); @@ -500,7 +506,7 @@ out: return res; insert_extent: - dprint(DBG_EXTENT, "insert new extent\n"); + hfs_dbg(EXTENT, "insert new extent\n"); res = hfsplus_ext_write_extent_locked(inode); if (res) goto out; @@ -525,9 +531,8 @@ void hfsplus_file_truncate(struct inode *inode) u32 alloc_cnt, blk_cnt, start; int res; - dprint(DBG_INODE, "truncate: %lu, %llu -> %llu\n", - inode->i_ino, (long long)hip->phys_size, - inode->i_size); + hfs_dbg(INODE, "truncate: %lu, %llu -> %llu\n", + inode->i_ino, (long long)hip->phys_size, inode->i_size); if (inode->i_size > hip->phys_size) { struct address_space *mapping = inode->i_mapping; diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 05b11f36024c..60b0a3388b26 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -10,6 +10,12 @@ #ifndef _LINUX_HFSPLUS_FS_H #define _LINUX_HFSPLUS_FS_H +#ifdef pr_fmt +#undef pr_fmt +#endif + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/fs.h> #include <linux/mutex.h> #include <linux/buffer_head.h> @@ -32,9 +38,17 @@ #endif #define DBG_MASK (0) -#define dprint(flg, fmt, args...) \ - if (flg & DBG_MASK) \ - printk(fmt , ## args) +#define hfs_dbg(flg, fmt, ...) \ +do { \ + if (DBG_##flg & DBG_MASK) \ + printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \ +} while (0) + +#define hfs_dbg_cont(flg, fmt, ...) \ +do { \ + if (DBG_##flg & DBG_MASK) \ + pr_cont(fmt, ##__VA_ARGS__); \ +} while (0) /* Runtime config options */ #define HFSPLUS_DEF_CR_TYPE 0x3F3F3F3F /* '????' */ diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 160ccc9cdb4b..f833d35630ab 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -14,6 +14,7 @@ #include <linux/pagemap.h> #include <linux/mpage.h> #include <linux/sched.h> +#include <linux/aio.h> #include "hfsplus_fs.h" #include "hfsplus_raw.h" @@ -357,7 +358,7 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end, if (!error) error = error2; } else { - printk(KERN_ERR "hfs: sync non-existent attributes tree\n"); + pr_err("sync non-existent attributes tree\n"); } } @@ -573,7 +574,7 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) inode->i_ctime = hfsp_mt2ut(file->attribute_mod_date); HFSPLUS_I(inode)->create_date = file->create_date; } else { - printk(KERN_ERR "hfs: bad catalog entry used to create inode\n"); + pr_err("bad catalog entry used to create inode\n"); res = -EIO; } return res; diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c index ed257c671615..968eab5bc1f5 100644 --- a/fs/hfsplus/options.c +++ b/fs/hfsplus/options.c @@ -113,67 +113,67 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi) switch (token) { case opt_creator: if (match_fourchar(&args[0], &sbi->creator)) { - printk(KERN_ERR "hfs: creator requires a 4 character value\n"); + pr_err("creator requires a 4 character value\n"); return 0; } break; case opt_type: if (match_fourchar(&args[0], &sbi->type)) { - printk(KERN_ERR "hfs: type requires a 4 character value\n"); + pr_err("type requires a 4 character value\n"); return 0; } break; case opt_umask: if (match_octal(&args[0], &tmp)) { - printk(KERN_ERR "hfs: umask requires a value\n"); + pr_err("umask requires a value\n"); return 0; } sbi->umask = (umode_t)tmp; break; case opt_uid: if (match_int(&args[0], &tmp)) { - printk(KERN_ERR "hfs: uid requires an argument\n"); + pr_err("uid requires an argument\n"); return 0; } sbi->uid = make_kuid(current_user_ns(), (uid_t)tmp); if (!uid_valid(sbi->uid)) { - printk(KERN_ERR "hfs: invalid uid specified\n"); + pr_err("invalid uid specified\n"); return 0; } break; case opt_gid: if (match_int(&args[0], &tmp)) { - printk(KERN_ERR "hfs: gid requires an argument\n"); + pr_err("gid requires an argument\n"); return 0; } sbi->gid = make_kgid(current_user_ns(), (gid_t)tmp); if (!gid_valid(sbi->gid)) { - printk(KERN_ERR "hfs: invalid gid specified\n"); + pr_err("invalid gid specified\n"); return 0; } break; case opt_part: if (match_int(&args[0], &sbi->part)) { - printk(KERN_ERR "hfs: part requires an argument\n"); + pr_err("part requires an argument\n"); return 0; } break; case opt_session: if (match_int(&args[0], &sbi->session)) { - printk(KERN_ERR "hfs: session requires an argument\n"); + pr_err("session requires an argument\n"); return 0; } break; case opt_nls: if (sbi->nls) { - printk(KERN_ERR "hfs: unable to change nls mapping\n"); + pr_err("unable to change nls mapping\n"); return 0; } p = match_strdup(&args[0]); if (p) sbi->nls = load_nls(p); if (!sbi->nls) { - printk(KERN_ERR "hfs: unable to load " + pr_err("unable to load " "nls mapping \"%s\"\n", p); kfree(p); diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 7b87284e46dc..4c4d142cf890 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -132,7 +132,7 @@ static int hfsplus_system_write_inode(struct inode *inode) if (tree) { int err = hfs_btree_write(tree); if (err) { - printk(KERN_ERR "hfs: b-tree write err: %d, ino %lu\n", + pr_err("b-tree write err: %d, ino %lu\n", err, inode->i_ino); return err; } @@ -145,7 +145,7 @@ static int hfsplus_write_inode(struct inode *inode, { int err; - dprint(DBG_INODE, "hfsplus_write_inode: %lu\n", inode->i_ino); + hfs_dbg(INODE, "hfsplus_write_inode: %lu\n", inode->i_ino); err = hfsplus_ext_write_extent(inode); if (err) @@ -160,7 +160,7 @@ static int hfsplus_write_inode(struct inode *inode, static void hfsplus_evict_inode(struct inode *inode) { - dprint(DBG_INODE, "hfsplus_evict_inode: %lu\n", inode->i_ino); + hfs_dbg(INODE, "hfsplus_evict_inode: %lu\n", inode->i_ino); truncate_inode_pages(&inode->i_data, 0); clear_inode(inode); if (HFSPLUS_IS_RSRC(inode)) { @@ -179,7 +179,7 @@ static int hfsplus_sync_fs(struct super_block *sb, int wait) if (!wait) return 0; - dprint(DBG_SUPER, "hfsplus_sync_fs\n"); + hfs_dbg(SUPER, "hfsplus_sync_fs\n"); /* * Explicitly write out the special metadata inodes. @@ -251,7 +251,7 @@ static void delayed_sync_fs(struct work_struct *work) err = hfsplus_sync_fs(sbi->alloc_file->i_sb, 1); if (err) - printk(KERN_ERR "hfs: delayed sync fs err %d\n", err); + pr_err("delayed sync fs err %d\n", err); } void hfsplus_mark_mdb_dirty(struct super_block *sb) @@ -275,7 +275,7 @@ static void hfsplus_put_super(struct super_block *sb) { struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); - dprint(DBG_SUPER, "hfsplus_put_super\n"); + hfs_dbg(SUPER, "hfsplus_put_super\n"); cancel_delayed_work_sync(&sbi->sync_work); @@ -333,25 +333,19 @@ static int hfsplus_remount(struct super_block *sb, int *flags, char *data) return -EINVAL; if (!(vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_UNMNT))) { - printk(KERN_WARNING "hfs: filesystem was " - "not cleanly unmounted, " - "running fsck.hfsplus is recommended. " - "leaving read-only.\n"); + pr_warn("filesystem was not cleanly unmounted, running fsck.hfsplus is recommended. leaving read-only.\n"); sb->s_flags |= MS_RDONLY; *flags |= MS_RDONLY; } else if (force) { /* nothing */ } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { - printk(KERN_WARNING "hfs: filesystem is marked locked, " - "leaving read-only.\n"); + pr_warn("filesystem is marked locked, leaving read-only.\n"); sb->s_flags |= MS_RDONLY; *flags |= MS_RDONLY; } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) { - printk(KERN_WARNING "hfs: filesystem is " - "marked journaled, " - "leaving read-only.\n"); + pr_warn("filesystem is marked journaled, leaving read-only.\n"); sb->s_flags |= MS_RDONLY; *flags |= MS_RDONLY; } @@ -397,7 +391,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) err = -EINVAL; if (!hfsplus_parse_options(data, sbi)) { - printk(KERN_ERR "hfs: unable to parse mount options\n"); + pr_err("unable to parse mount options\n"); goto out_unload_nls; } @@ -405,14 +399,14 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) nls = sbi->nls; sbi->nls = load_nls("utf8"); if (!sbi->nls) { - printk(KERN_ERR "hfs: unable to load nls for utf8\n"); + pr_err("unable to load nls for utf8\n"); goto out_unload_nls; } /* Grab the volume header */ if (hfsplus_read_wrapper(sb)) { if (!silent) - printk(KERN_WARNING "hfs: unable to find HFS+ superblock\n"); + pr_warn("unable to find HFS+ superblock\n"); goto out_unload_nls; } vhdr = sbi->s_vhdr; @@ -421,7 +415,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) sb->s_magic = HFSPLUS_VOLHEAD_SIG; if (be16_to_cpu(vhdr->version) < HFSPLUS_MIN_VERSION || be16_to_cpu(vhdr->version) > HFSPLUS_CURRENT_VERSION) { - printk(KERN_ERR "hfs: wrong filesystem version\n"); + pr_err("wrong filesystem version\n"); goto out_free_vhdr; } sbi->total_blocks = be32_to_cpu(vhdr->total_blocks); @@ -445,7 +439,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) if ((last_fs_block > (sector_t)(~0ULL) >> (sbi->alloc_blksz_shift - 9)) || (last_fs_page > (pgoff_t)(~0ULL))) { - printk(KERN_ERR "hfs: filesystem size too large.\n"); + pr_err("filesystem size too large\n"); goto out_free_vhdr; } @@ -454,22 +448,16 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) sb->s_maxbytes = MAX_LFS_FILESIZE; if (!(vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_UNMNT))) { - printk(KERN_WARNING "hfs: Filesystem was " - "not cleanly unmounted, " - "running fsck.hfsplus is recommended. " - "mounting read-only.\n"); + pr_warn("Filesystem was not cleanly unmounted, running fsck.hfsplus is recommended. mounting read-only.\n"); sb->s_flags |= MS_RDONLY; } else if (test_and_clear_bit(HFSPLUS_SB_FORCE, &sbi->flags)) { /* nothing */ } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { - printk(KERN_WARNING "hfs: Filesystem is marked locked, mounting read-only.\n"); + pr_warn("Filesystem is marked locked, mounting read-only.\n"); sb->s_flags |= MS_RDONLY; } else if ((vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) && !(sb->s_flags & MS_RDONLY)) { - printk(KERN_WARNING "hfs: write access to " - "a journaled filesystem is not supported, " - "use the force option at your own risk, " - "mounting read-only.\n"); + pr_warn("write access to a journaled filesystem is not supported, use the force option at your own risk, mounting read-only.\n"); sb->s_flags |= MS_RDONLY; } @@ -478,18 +466,18 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) /* Load metadata objects (B*Trees) */ sbi->ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID); if (!sbi->ext_tree) { - printk(KERN_ERR "hfs: failed to load extents file\n"); + pr_err("failed to load extents file\n"); goto out_free_vhdr; } sbi->cat_tree = hfs_btree_open(sb, HFSPLUS_CAT_CNID); if (!sbi->cat_tree) { - printk(KERN_ERR "hfs: failed to load catalog file\n"); + pr_err("failed to load catalog file\n"); goto out_close_ext_tree; } if (vhdr->attr_file.total_blocks != 0) { sbi->attr_tree = hfs_btree_open(sb, HFSPLUS_ATTR_CNID); if (!sbi->attr_tree) { - printk(KERN_ERR "hfs: failed to load attributes file\n"); + pr_err("failed to load attributes file\n"); goto out_close_cat_tree; } } @@ -497,7 +485,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) inode = hfsplus_iget(sb, HFSPLUS_ALLOC_CNID); if (IS_ERR(inode)) { - printk(KERN_ERR "hfs: failed to load allocation file\n"); + pr_err("failed to load allocation file\n"); err = PTR_ERR(inode); goto out_close_attr_tree; } @@ -506,7 +494,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) /* Load the root directory */ root = hfsplus_iget(sb, HFSPLUS_ROOT_CNID); if (IS_ERR(root)) { - printk(KERN_ERR "hfs: failed to load root directory\n"); + pr_err("failed to load root directory\n"); err = PTR_ERR(root); goto out_put_alloc_file; } diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index 90effcccca9a..96375a5124b2 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c @@ -24,7 +24,8 @@ struct hfsplus_wd { u16 embed_count; }; -static void hfsplus_end_io_sync(struct bio *bio, int err) +static void hfsplus_end_io_sync(struct bio *bio, int err, + struct batch_complete *batch) { if (err) clear_bit(BIO_UPTODATE, &bio->bi_flags); @@ -156,7 +157,7 @@ static int hfsplus_get_last_session(struct super_block *sb, *start = (sector_t)te.cdte_addr.lba << 2; return 0; } - printk(KERN_ERR "hfs: invalid session number or type of track\n"); + pr_err("invalid session number or type of track\n"); return -EINVAL; } ms_info.addr_format = CDROM_LBA; @@ -234,8 +235,7 @@ reread: error = -EINVAL; if (sbi->s_backup_vhdr->signature != sbi->s_vhdr->signature) { - printk(KERN_WARNING - "hfs: invalid secondary volume header\n"); + pr_warn("invalid secondary volume header\n"); goto out_free_backup_vhdr; } @@ -259,8 +259,7 @@ reread: blocksize >>= 1; if (sb_set_blocksize(sb, blocksize) != blocksize) { - printk(KERN_ERR "hfs: unable to set blocksize to %u!\n", - blocksize); + pr_err("unable to set blocksize to %u!\n", blocksize); goto out_free_backup_vhdr; } diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c index e8a4b0815c61..f66346155df5 100644 --- a/fs/hfsplus/xattr.c +++ b/fs/hfsplus/xattr.c @@ -107,19 +107,19 @@ int __hfsplus_setxattr(struct inode *inode, const char *name, err = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &cat_fd); if (err) { - printk(KERN_ERR "hfs: can't init xattr find struct\n"); + pr_err("can't init xattr find struct\n"); return err; } err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &cat_fd); if (err) { - printk(KERN_ERR "hfs: catalog searching failed\n"); + pr_err("catalog searching failed\n"); goto end_setxattr; } if (!strcmp_xattr_finder_info(name)) { if (flags & XATTR_CREATE) { - printk(KERN_ERR "hfs: xattr exists yet\n"); + pr_err("xattr exists yet\n"); err = -EOPNOTSUPP; goto end_setxattr; } @@ -165,7 +165,7 @@ int __hfsplus_setxattr(struct inode *inode, const char *name, if (hfsplus_attr_exists(inode, name)) { if (flags & XATTR_CREATE) { - printk(KERN_ERR "hfs: xattr exists yet\n"); + pr_err("xattr exists yet\n"); err = -EOPNOTSUPP; goto end_setxattr; } @@ -177,7 +177,7 @@ int __hfsplus_setxattr(struct inode *inode, const char *name, goto end_setxattr; } else { if (flags & XATTR_REPLACE) { - printk(KERN_ERR "hfs: cannot replace xattr\n"); + pr_err("cannot replace xattr\n"); err = -EOPNOTSUPP; goto end_setxattr; } @@ -210,7 +210,7 @@ int __hfsplus_setxattr(struct inode *inode, const char *name, cat_entry_flags); hfsplus_mark_inode_dirty(inode, HFSPLUS_I_CAT_DIRTY); } else { - printk(KERN_ERR "hfs: invalid catalog entry type\n"); + pr_err("invalid catalog entry type\n"); err = -EIO; goto end_setxattr; } @@ -269,7 +269,7 @@ static ssize_t hfsplus_getxattr_finder_info(struct dentry *dentry, if (size >= record_len) { res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd); if (res) { - printk(KERN_ERR "hfs: can't init xattr find struct\n"); + pr_err("can't init xattr find struct\n"); return res; } res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); @@ -340,13 +340,13 @@ ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, entry = hfsplus_alloc_attr_entry(); if (!entry) { - printk(KERN_ERR "hfs: can't allocate xattr entry\n"); + pr_err("can't allocate xattr entry\n"); return -ENOMEM; } res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->attr_tree, &fd); if (res) { - printk(KERN_ERR "hfs: can't init xattr find struct\n"); + pr_err("can't init xattr find struct\n"); goto failed_getxattr_init; } @@ -355,7 +355,7 @@ ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, if (res == -ENOENT) res = -ENODATA; else - printk(KERN_ERR "hfs: xattr searching failed\n"); + pr_err("xattr searching failed\n"); goto out; } @@ -368,17 +368,17 @@ ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, offsetof(struct hfsplus_attr_inline_data, length)); if (record_length > HFSPLUS_MAX_INLINE_DATA_SIZE) { - printk(KERN_ERR "hfs: invalid xattr record size\n"); + pr_err("invalid xattr record size\n"); res = -EIO; goto out; } } else if (record_type == HFSPLUS_ATTR_FORK_DATA || record_type == HFSPLUS_ATTR_EXTENTS) { - printk(KERN_ERR "hfs: only inline data xattr are supported\n"); + pr_err("only inline data xattr are supported\n"); res = -EOPNOTSUPP; goto out; } else { - printk(KERN_ERR "hfs: invalid xattr record\n"); + pr_err("invalid xattr record\n"); res = -EIO; goto out; } @@ -427,7 +427,7 @@ static ssize_t hfsplus_listxattr_finder_info(struct dentry *dentry, res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd); if (res) { - printk(KERN_ERR "hfs: can't init xattr find struct\n"); + pr_err("can't init xattr find struct\n"); return res; } @@ -506,7 +506,7 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size) err = hfs_find_init(HFSPLUS_SB(inode->i_sb)->attr_tree, &fd); if (err) { - printk(KERN_ERR "hfs: can't init xattr find struct\n"); + pr_err("can't init xattr find struct\n"); return err; } @@ -525,8 +525,7 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size) for (;;) { key_len = hfs_bnode_read_u16(fd.bnode, fd.keyoffset); if (key_len == 0 || key_len > fd.tree->max_key_len) { - printk(KERN_ERR "hfs: invalid xattr key length: %d\n", - key_len); + pr_err("invalid xattr key length: %d\n", key_len); res = -EIO; goto end_listxattr; } @@ -541,7 +540,7 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size) if (hfsplus_uni2asc(inode->i_sb, (const struct hfsplus_unistr *)&fd.key->attr.key_name, strbuf, &xattr_name_len)) { - printk(KERN_ERR "hfs: unicode conversion failed\n"); + pr_err("unicode conversion failed\n"); res = -EIO; goto end_listxattr; } @@ -598,13 +597,13 @@ int hfsplus_removexattr(struct dentry *dentry, const char *name) err = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &cat_fd); if (err) { - printk(KERN_ERR "hfs: can't init xattr find struct\n"); + pr_err("can't init xattr find struct\n"); return err; } err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &cat_fd); if (err) { - printk(KERN_ERR "hfs: catalog searching failed\n"); + pr_err("catalog searching failed\n"); goto end_removexattr; } @@ -643,7 +642,7 @@ int hfsplus_removexattr(struct dentry *dentry, const char *name) flags); hfsplus_mark_inode_dirty(inode, HFSPLUS_I_CAT_DIRTY); } else { - printk(KERN_ERR "hfs: invalid catalog entry type\n"); + pr_err("invalid catalog entry type\n"); err = -EIO; goto end_removexattr; } diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 86b39b167c23..11bb11f48b3a 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -162,8 +162,17 @@ static void journal_do_submit_data(struct buffer_head **wbuf, int bufs, for (i = 0; i < bufs; i++) { wbuf[i]->b_end_io = end_buffer_write_sync; - /* We use-up our safety reference in submit_bh() */ - submit_bh(write_op, wbuf[i]); + /* + * Here we write back pagecache data that may be mmaped. Since + * we cannot afford to clean the page and set PageWriteback + * here due to lock ordering (page lock ranks above transaction + * start), the data can change while IO is in flight. Tell the + * block layer it should bounce the bio pages if stable data + * during write is required. + * + * We use up our safety reference in submit_bh(). + */ + _submit_bh(write_op, wbuf[i], 1 << BIO_SNAP_STABLE); } } @@ -667,7 +676,17 @@ start_journal_io: clear_buffer_dirty(bh); set_buffer_uptodate(bh); bh->b_end_io = journal_end_buffer_io_sync; - submit_bh(write_op, bh); + /* + * In data=journal mode, here we can end up + * writing pagecache data that might be + * mmapped. Since we can't afford to clean the + * page and set PageWriteback (see the comment + * near the other use of _submit_bh()), the + * data can change while the write is in + * flight. Tell the block layer to bounce the + * bio pages if stable pages are required. + */ + _submit_bh(write_op, bh, 1 << BIO_SNAP_STABLE); } cond_resched(); diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index b7dc47ba675e..1781f06aa1c1 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -23,6 +23,7 @@ #include <linux/pagemap.h> #include <linux/quotaops.h> #include <linux/writeback.h> +#include <linux/aio.h> #include "jfs_incore.h" #include "jfs_inode.h" #include "jfs_filsys.h" diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index 8ae5e350da43..e641f6e74ffc 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -2152,7 +2152,7 @@ static void lbmStartIO(struct lbuf * bp) /* check if journaling to disk has been disabled */ if (log->no_integrity) { bio->bi_size = 0; - lbmIODone(bio, 0); + lbmIODone(bio, 0, NULL); } else { submit_bio(WRITE_SYNC, bio); INCREMENT(lmStat.submitted); @@ -2190,7 +2190,7 @@ static int lbmIOWait(struct lbuf * bp, int flag) * * executed at INTIODONE level */ -static void lbmIODone(struct bio *bio, int error) +static void lbmIODone(struct bio *bio, int error, struct batch_complete *batch) { struct lbuf *bp = bio->bi_private; struct lbuf *nextbp, *tail; diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 6740d34cd82b..6ba675782e9f 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -283,7 +283,8 @@ static void last_read_complete(struct page *page) unlock_page(page); } -static void metapage_read_end_io(struct bio *bio, int err) +static void metapage_read_end_io(struct bio *bio, int err, + struct batch_complete *batch) { struct page *page = bio->bi_private; @@ -338,7 +339,8 @@ static void last_write_complete(struct page *page) end_page_writeback(page); } -static void metapage_write_end_io(struct bio *bio, int err) +static void metapage_write_end_io(struct bio *bio, int err, + struct batch_complete *batch) { struct page *page = bio->bi_private; diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c index 550475ca6a0e..0ae2254f74bf 100644 --- a/fs/logfs/dev_bdev.c +++ b/fs/logfs/dev_bdev.c @@ -14,7 +14,8 @@ #define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1)) -static void request_complete(struct bio *bio, int err) +static void request_complete(struct bio *bio, int err, + struct batch_complete *batch) { complete((struct completion *)bio->bi_private); } @@ -64,7 +65,8 @@ static int bdev_readpage(void *_sb, struct page *page) static DECLARE_WAIT_QUEUE_HEAD(wq); -static void writeseg_end_io(struct bio *bio, int err) +static void writeseg_end_io(struct bio *bio, int err, + struct batch_complete *batch) { const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; @@ -168,7 +170,7 @@ static void bdev_writeseg(struct super_block *sb, u64 ofs, size_t len) } -static void erase_end_io(struct bio *bio, int err) +static void erase_end_io(struct bio *bio, int err, struct batch_complete *batch) { const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct super_block *sb = bio->bi_private; diff --git a/fs/mpage.c b/fs/mpage.c index 0face1c4d4c6..a4089bbfee0a 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -41,7 +41,7 @@ * status of that page is hard. See end_buffer_async_read() for the details. * There is no point in duplicating all that complexity. */ -static void mpage_end_io(struct bio *bio, int err) +static void mpage_end_io(struct bio *bio, int err, struct batch_complete *batch) { const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 434b93ec0970..76cf69557051 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -143,7 +143,7 @@ bl_submit_bio(int rw, struct bio *bio) static struct bio *bl_alloc_init_bio(int npg, sector_t isect, struct pnfs_block_extent *be, - void (*end_io)(struct bio *, int err), + bio_end_io_t *end_io, struct parallel_io *par) { struct bio *bio; @@ -167,7 +167,7 @@ static struct bio *bl_alloc_init_bio(int npg, sector_t isect, static struct bio *do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect, struct page *page, struct pnfs_block_extent *be, - void (*end_io)(struct bio *, int err), + bio_end_io_t *end_io, struct parallel_io *par, unsigned int offset, int len) { @@ -190,7 +190,7 @@ retry: static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect, struct page *page, struct pnfs_block_extent *be, - void (*end_io)(struct bio *, int err), + bio_end_io_t *end_io, struct parallel_io *par) { return do_add_page_to_bio(bio, npg, rw, isect, page, be, @@ -198,7 +198,8 @@ static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw, } /* This is basically copied from mpage_end_io_read */ -static void bl_end_io_read(struct bio *bio, int err) +static void bl_end_io_read(struct bio *bio, int err, + struct batch_complete *batch) { struct parallel_io *par = bio->bi_private; const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); @@ -380,7 +381,8 @@ static void mark_extents_written(struct pnfs_block_layout *bl, } } -static void bl_end_io_write_zero(struct bio *bio, int err) +static void bl_end_io_write_zero(struct bio *bio, int err, + struct batch_complete *batch) { struct parallel_io *par = bio->bi_private; const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); @@ -408,7 +410,8 @@ static void bl_end_io_write_zero(struct bio *bio, int err) put_parallel(par); } -static void bl_end_io_write(struct bio *bio, int err) +static void bl_end_io_write(struct bio *bio, int err, + struct batch_complete *batch) { struct parallel_io *par = bio->bi_private; const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); @@ -487,7 +490,7 @@ map_block(struct buffer_head *bh, sector_t isect, struct pnfs_block_extent *be) } static void -bl_read_single_end_io(struct bio *bio, int error) +bl_read_single_end_io(struct bio *bio, int error, struct batch_complete *batch) { struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; struct page *page = bvec->bv_page; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e39bf5381bca..3dd49dc96cd9 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -326,7 +326,6 @@ static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab) { struct idr *stateids = &cl->cl_stateids; - static int min_stateid = 0; struct nfs4_stid *stid; int new_id; @@ -334,7 +333,7 @@ kmem_cache *slab) if (!stid) return NULL; - new_id = idr_alloc(stateids, stid, min_stateid, 0, GFP_KERNEL); + new_id = idr_alloc_cyclic(stateids, stid, 0, 0, GFP_KERNEL); if (new_id < 0) goto out_free; stid->sc_client = cl; @@ -353,10 +352,6 @@ kmem_cache *slab) * amount of time until an id is reused, by ensuring they always * "increase" (mod INT_MAX): */ - - min_stateid = new_id+1; - if (min_stateid == INT_MAX) - min_stateid = 0; return stid; out_free: kmem_cache_free(slab, stid); diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 6b49f14eac8c..e1aeb0761928 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -25,7 +25,7 @@ #include <linux/gfp.h> #include <linux/mpage.h> #include <linux/writeback.h> -#include <linux/uio.h> +#include <linux/aio.h> #include "nilfs.h" #include "btnode.h" #include "segment.h" @@ -175,6 +175,11 @@ static int nilfs_writepages(struct address_space *mapping, struct inode *inode = mapping->host; int err = 0; + if (inode->i_sb->s_flags & MS_RDONLY) { + nilfs_clear_dirty_pages(mapping, false); + return -EROFS; + } + if (wbc->sync_mode == WB_SYNC_ALL) err = nilfs_construct_dsync_segment(inode->i_sb, inode, wbc->range_start, @@ -187,6 +192,18 @@ static int nilfs_writepage(struct page *page, struct writeback_control *wbc) struct inode *inode = page->mapping->host; int err; + if (inode && (inode->i_sb->s_flags & MS_RDONLY)) { + /* + * It means that filesystem was remounted in read-only + * mode because of error or metadata corruption. But we + * have dirty pages that try to be flushed in background. + * So, here we simply discard this dirty page. + */ + nilfs_clear_dirty_page(page, false); + unlock_page(page); + return -EROFS; + } + redirty_page_for_writepage(wbc, page); unlock_page(page); diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index f9897d09c693..c4dcd1db57ee 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -375,14 +375,25 @@ int nilfs_mdt_fetch_dirty(struct inode *inode) static int nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) { - struct inode *inode; + struct inode *inode = page->mapping->host; struct super_block *sb; int err = 0; + if (inode && (inode->i_sb->s_flags & MS_RDONLY)) { + /* + * It means that filesystem was remounted in read-only + * mode because of error or metadata corruption. But we + * have dirty pages that try to be flushed in background. + * So, here we simply discard this dirty page. + */ + nilfs_clear_dirty_page(page, false); + unlock_page(page); + return -EROFS; + } + redirty_page_for_writepage(wbc, page); unlock_page(page); - inode = page->mapping->host; if (!inode) return 0; @@ -561,10 +572,10 @@ void nilfs_mdt_restore_from_shadow_map(struct inode *inode) if (mi->mi_palloc_cache) nilfs_palloc_clear_cache(inode); - nilfs_clear_dirty_pages(inode->i_mapping); + nilfs_clear_dirty_pages(inode->i_mapping, true); nilfs_copy_back_pages(inode->i_mapping, &shadow->frozen_data); - nilfs_clear_dirty_pages(&ii->i_btnode_cache); + nilfs_clear_dirty_pages(&ii->i_btnode_cache, true); nilfs_copy_back_pages(&ii->i_btnode_cache, &shadow->frozen_btnodes); nilfs_bmap_restore(ii->i_bmap, &shadow->bmap_store); diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 07f76db04ec7..2a2187f83005 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -370,7 +370,12 @@ repeat: goto repeat; } -void nilfs_clear_dirty_pages(struct address_space *mapping) +/** + * nilfs_clear_dirty_pages - discard dirty pages in address space + * @mapping: address space with dirty pages for discarding + * @silent: suppress [true] or print [false] warning messages + */ +void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent) { struct pagevec pvec; unsigned int i; @@ -382,25 +387,9 @@ void nilfs_clear_dirty_pages(struct address_space *mapping) PAGEVEC_SIZE)) { for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; - struct buffer_head *bh, *head; lock_page(page); - ClearPageUptodate(page); - ClearPageMappedToDisk(page); - bh = head = page_buffers(page); - do { - lock_buffer(bh); - clear_buffer_dirty(bh); - clear_buffer_nilfs_volatile(bh); - clear_buffer_nilfs_checked(bh); - clear_buffer_nilfs_redirected(bh); - clear_buffer_uptodate(bh); - clear_buffer_mapped(bh); - unlock_buffer(bh); - bh = bh->b_this_page; - } while (bh != head); - - __nilfs_clear_page_dirty(page); + nilfs_clear_dirty_page(page, silent); unlock_page(page); } pagevec_release(&pvec); @@ -408,6 +397,51 @@ void nilfs_clear_dirty_pages(struct address_space *mapping) } } +/** + * nilfs_clear_dirty_page - discard dirty page + * @page: dirty page that will be discarded + * @silent: suppress [true] or print [false] warning messages + */ +void nilfs_clear_dirty_page(struct page *page, bool silent) +{ + struct inode *inode = page->mapping->host; + struct super_block *sb = inode->i_sb; + + BUG_ON(!test_bit(PG_locked, &page->flags)); + + if (!silent) { + nilfs_warning(sb, __func__, + "discard page: offset %lld, ino %lu", + page_offset(page), inode->i_ino); + } + + ClearPageUptodate(page); + ClearPageMappedToDisk(page); + + if (page_has_buffers(page)) { + struct buffer_head *bh, *head; + + bh = head = page_buffers(page); + do { + lock_buffer(bh); + if (!silent) { + nilfs_warning(sb, __func__, + "discard block %llu, size %lu", + (u64)bh->b_blocknr, bh->b_size); + } + clear_buffer_dirty(bh); + clear_buffer_nilfs_volatile(bh); + clear_buffer_nilfs_checked(bh); + clear_buffer_nilfs_redirected(bh); + clear_buffer_uptodate(bh); + clear_buffer_mapped(bh); + unlock_buffer(bh); + } while (bh = bh->b_this_page, bh != head); + } + + __nilfs_clear_page_dirty(page); +} + unsigned nilfs_page_count_clean_buffers(struct page *page, unsigned from, unsigned to) { diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h index fb7de71605a0..ef30c5c2426f 100644 --- a/fs/nilfs2/page.h +++ b/fs/nilfs2/page.h @@ -55,7 +55,8 @@ void nilfs_page_bug(struct page *); int nilfs_copy_dirty_pages(struct address_space *, struct address_space *); void nilfs_copy_back_pages(struct address_space *, struct address_space *); -void nilfs_clear_dirty_pages(struct address_space *); +void nilfs_clear_dirty_page(struct page *, bool); +void nilfs_clear_dirty_pages(struct address_space *, bool); void nilfs_mapping_init(struct address_space *mapping, struct inode *inode, struct backing_dev_info *bdi); unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned); diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index dc9a913784ab..680b65b8a74d 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -338,7 +338,8 @@ void nilfs_add_checksums_on_logs(struct list_head *logs, u32 seed) /* * BIO operations */ -static void nilfs_end_bio_write(struct bio *bio, int err) +static void nilfs_end_bio_write(struct bio *bio, int err, + struct batch_complete *batch) { const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct nilfs_segment_buffer *segbuf = bio->bi_private; diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index e0f7c1241a6a..8562bd3af947 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -359,7 +359,6 @@ static int inotify_find_inode(const char __user *dirname, struct path *path, uns } static int inotify_add_to_idr(struct idr *idr, spinlock_t *idr_lock, - int *last_wd, struct inotify_inode_mark *i_mark) { int ret; @@ -367,11 +366,10 @@ static int inotify_add_to_idr(struct idr *idr, spinlock_t *idr_lock, idr_preload(GFP_KERNEL); spin_lock(idr_lock); - ret = idr_alloc(idr, i_mark, *last_wd + 1, 0, GFP_NOWAIT); + ret = idr_alloc_cyclic(idr, i_mark, 1, 0, GFP_NOWAIT); if (ret >= 0) { /* we added the mark to the idr, take a reference */ i_mark->wd = ret; - *last_wd = i_mark->wd; fsnotify_get_mark(&i_mark->fsn_mark); } @@ -638,8 +636,7 @@ static int inotify_new_watch(struct fsnotify_group *group, if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches) goto out_err; - ret = inotify_add_to_idr(idr, idr_lock, &group->inotify_data.last_wd, - tmp_i_mark); + ret = inotify_add_to_idr(idr, idr_lock, tmp_i_mark); if (ret) goto out_err; @@ -697,7 +694,6 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events) spin_lock_init(&group->inotify_data.idr_lock); idr_init(&group->inotify_data.idr); - group->inotify_data.last_wd = 0; group->inotify_data.user = get_current_user(); if (atomic_inc_return(&group->inotify_data.user->inotify_devs) > diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 1da4b81e6f76..c5670b8d198c 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -27,6 +27,7 @@ #include <linux/swap.h> #include <linux/uio.h> #include <linux/writeback.h> +#include <linux/aio.h> #include <asm/page.h> #include <asm/uaccess.h> diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index d3e118cc6ffa..2778b0255dc6 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -28,6 +28,7 @@ #include <linux/quotaops.h> #include <linux/slab.h> #include <linux/log2.h> +#include <linux/aio.h> #include "aops.h" #include "attrib.h" diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h index ffb2da370a99..f671e49beb34 100644 --- a/fs/ocfs2/aops.h +++ b/fs/ocfs2/aops.h @@ -22,6 +22,8 @@ #ifndef OCFS2_AOPS_H #define OCFS2_AOPS_H +#include <linux/aio.h> + handle_t *ocfs2_start_walk_page_trans(struct inode *inode, struct page *page, unsigned from, diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 8c3318bf2252..0cc19d0417bf 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -372,8 +372,8 @@ static void o2hb_wait_on_io(struct o2hb_region *reg, wait_for_completion(&wc->wc_io_complete); } -static void o2hb_bio_end_io(struct bio *bio, - int error) +static void o2hb_bio_end_io(struct bio *bio, int error, + struct batch_complete *batch) { struct o2hb_bio_wait_ctxt *wc = bio->bi_private; diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 12ae194ac943..3a44a648dae7 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -2322,7 +2322,7 @@ int ocfs2_inode_lock_full_nested(struct inode *inode, status = __ocfs2_cluster_lock(osb, lockres, level, dlm_flags, arg_flags, subclass, _RET_IP_); if (status < 0) { - if (status != -EAGAIN && status != -EIOCBRETRY) + if (status != -EAGAIN) mlog_errno(status); goto bail; } diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index 88924a3133fa..c765bdf6d60e 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h @@ -28,6 +28,8 @@ #include "extent_map.h" +struct iocb; + /* OCFS2 Inode Private Data */ struct ocfs2_inode_info { diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 752f0b26221d..0c60ef2d8056 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -101,13 +101,6 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags, if (!S_ISDIR(inode->i_mode)) flags &= ~OCFS2_DIRSYNC_FL; - handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); - if (IS_ERR(handle)) { - status = PTR_ERR(handle); - mlog_errno(status); - goto bail_unlock; - } - oldflags = ocfs2_inode->ip_attr; flags = flags & mask; flags |= oldflags & ~mask; @@ -120,7 +113,14 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags, if ((oldflags & OCFS2_IMMUTABLE_FL) || ((flags ^ oldflags) & (OCFS2_APPEND_FL | OCFS2_IMMUTABLE_FL))) { if (!capable(CAP_LINUX_IMMUTABLE)) - goto bail_commit; + goto bail_unlock; + } + + handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + goto bail_unlock; } ocfs2_inode->ip_attr = flags; @@ -130,8 +130,8 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags, if (status < 0) mlog_errno(status); -bail_commit: ocfs2_commit_trans(osb, handle); + bail_unlock: ocfs2_inode_unlock(inode, 1); bail: @@ -706,8 +706,10 @@ int ocfs2_info_handle_freefrag(struct inode *inode, o2info_set_request_filled(&oiff->iff_req); - if (o2info_to_user(*oiff, req)) + if (o2info_to_user(*oiff, req)) { + status = -EFAULT; goto bail; + } status = 0; bail: diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index 9f8dcadd9a50..f1fc172175b6 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c @@ -471,7 +471,7 @@ static int ocfs2_validate_and_adjust_move_goal(struct inode *inode, int ret, goal_bit = 0; struct buffer_head *gd_bh = NULL; - struct ocfs2_group_desc *bg = NULL; + struct ocfs2_group_desc *bg; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); int c_to_b = 1 << (osb->s_clustersize_bits - inode->i_sb->s_blocksize_bits); @@ -482,13 +482,6 @@ static int ocfs2_validate_and_adjust_move_goal(struct inode *inode, range->me_goal = ocfs2_block_to_cluster_start(inode->i_sb, range->me_goal); /* - * moving goal is not allowd to start with a group desc blok(#0 blk) - * let's compromise to the latter cluster. - */ - if (range->me_goal == le64_to_cpu(bg->bg_blkno)) - range->me_goal += c_to_b; - - /* * validate goal sits within global_bitmap, and return the victim * group desc */ @@ -502,6 +495,13 @@ static int ocfs2_validate_and_adjust_move_goal(struct inode *inode, bg = (struct ocfs2_group_desc *)gd_bh->b_data; /* + * moving goal is not allowd to start with a group desc blok(#0 blk) + * let's compromise to the latter cluster. + */ + if (range->me_goal == le64_to_cpu(bg->bg_blkno)) + range->me_goal += c_to_b; + + /* * movement is not gonna cross two groups. */ if ((le16_to_cpu(bg->bg_bits) - goal_bit) * osb->s_clustersize < @@ -1057,42 +1057,40 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp) struct inode *inode = file_inode(filp); struct ocfs2_move_extents range; - struct ocfs2_move_extents_context *context = NULL; + struct ocfs2_move_extents_context *context; + + if (!argp) + return -EINVAL; status = mnt_want_write_file(filp); if (status) return status; if ((!S_ISREG(inode->i_mode)) || !(filp->f_mode & FMODE_WRITE)) - goto out; + goto out_drop; if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) { status = -EPERM; - goto out; + goto out_drop; } context = kzalloc(sizeof(struct ocfs2_move_extents_context), GFP_NOFS); if (!context) { status = -ENOMEM; mlog_errno(status); - goto out; + goto out_drop; } context->inode = inode; context->file = filp; - if (argp) { - if (copy_from_user(&range, argp, sizeof(range))) { - status = -EFAULT; - goto out; - } - } else { - status = -EINVAL; - goto out; + if (copy_from_user(&range, argp, sizeof(range))) { + status = -EFAULT; + goto out_free; } if (range.me_start > i_size_read(inode)) - goto out; + goto out_free; if (range.me_start + range.me_len > i_size_read(inode)) range.me_len = i_size_read(inode) - range.me_start; @@ -1124,25 +1122,24 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp) status = ocfs2_validate_and_adjust_move_goal(inode, &range); if (status) - goto out; + goto out_copy; } status = ocfs2_move_extents(context); if (status) mlog_errno(status); -out: +out_copy: /* * movement/defragmentation may end up being partially completed, * that's the reason why we need to return userspace the finished * length and new_offset even if failure happens somewhere. */ - if (argp) { - if (copy_to_user(argp, &range, sizeof(range))) - status = -EFAULT; - } + if (copy_to_user(argp, &range, sizeof(range))) + status = -EFAULT; +out_free: kfree(context); - +out_drop: mnt_drop_write_file(filp); return status; diff --git a/fs/pipe.c b/fs/pipe.c index a029a14bacf1..d2c45e14e6d8 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -21,6 +21,7 @@ #include <linux/audit.h> #include <linux/syscalls.h> #include <linux/fcntl.h> +#include <linux/aio.h> #include <asm/uaccess.h> #include <asm/ioctls.h> diff --git a/fs/proc/Makefile b/fs/proc/Makefile index 712f24db9600..ab30716584f5 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -5,7 +5,7 @@ obj-y += proc.o proc-y := nommu.o task_nommu.o -proc-$(CONFIG_MMU) := mmu.o task_mmu.o +proc-$(CONFIG_MMU) := task_mmu.o proc-y += inode.o root.o base.o generic.o array.o \ fd.o diff --git a/fs/proc/base.c b/fs/proc/base.c index 593e7c5ddb49..4c1e2adbeb90 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1347,11 +1347,10 @@ static ssize_t comm_write(struct file *file, const char __user *buf, struct inode *inode = file_inode(file); struct task_struct *p; char buffer[TASK_COMM_LEN]; + const size_t maxlen = sizeof(buffer) - 1; memset(buffer, 0, sizeof(buffer)); - if (count > sizeof(buffer) - 1) - count = sizeof(buffer) - 1; - if (copy_from_user(buffer, buf, count)) + if (copy_from_user(buffer, buf, count > maxlen ? maxlen : count)) return -EFAULT; p = get_proc_task(inode); @@ -2537,6 +2536,7 @@ static const struct pid_entry tgid_base_stuff[] = { REG("clear_refs", S_IWUSR, proc_clear_refs_operations), REG("smaps", S_IRUGO, proc_pid_smaps_operations), REG("pagemap", S_IRUGO, proc_pagemap_operations), + REG("pagemap2", S_IRUGO, proc_pagemap2_operations), #endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), @@ -2890,6 +2890,7 @@ static const struct pid_entry tid_base_stuff[] = { REG("clear_refs", S_IWUSR, proc_clear_refs_operations), REG("smaps", S_IRUGO, proc_tid_smaps_operations), REG("pagemap", S_IRUGO, proc_pagemap_operations), + REG("pagemap2", S_IRUGO, proc_pagemap2_operations), #endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 46a7e2a7b904..188b886302ac 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -30,24 +30,6 @@ extern int proc_net_init(void); static inline int proc_net_init(void) { return 0; } #endif -struct vmalloc_info { - unsigned long used; - unsigned long largest_chunk; -}; - -#ifdef CONFIG_MMU -#define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START) -extern void get_vmalloc_info(struct vmalloc_info *vmi); -#else - -#define VMALLOC_TOTAL 0UL -#define get_vmalloc_info(vmi) \ -do { \ - (vmi)->used = 0; \ - (vmi)->largest_chunk = 0; \ -} while(0) -#endif - extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task); extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns, @@ -67,6 +49,7 @@ extern const struct file_operations proc_pid_smaps_operations; extern const struct file_operations proc_tid_smaps_operations; extern const struct file_operations proc_clear_refs_operations; extern const struct file_operations proc_pagemap_operations; +extern const struct file_operations proc_pagemap2_operations; extern const struct file_operations proc_net_operations; extern const struct inode_operations proc_net_inode_operations; extern const struct inode_operations proc_pid_link_inode_operations; diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index eda6f017f272..f6a13f489e30 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -15,6 +15,7 @@ #include <linux/capability.h> #include <linux/elf.h> #include <linux/elfcore.h> +#include <linux/notifier.h> #include <linux/vmalloc.h> #include <linux/highmem.h> #include <linux/printk.h> @@ -564,7 +565,6 @@ static const struct file_operations proc_kcore_operations = { .llseek = default_llseek, }; -#ifdef CONFIG_MEMORY_HOTPLUG /* just remember that we have to update kcore */ static int __meminit kcore_callback(struct notifier_block *self, unsigned long action, void *arg) @@ -578,8 +578,11 @@ static int __meminit kcore_callback(struct notifier_block *self, } return NOTIFY_OK; } -#endif +static struct notifier_block kcore_callback_nb __meminitdata = { + .notifier_call = kcore_callback, + .priority = 0, +}; static struct kcore_list kcore_vmalloc; @@ -631,7 +634,7 @@ static int __init proc_kcore_init(void) add_modules_range(); /* Store direct-map area from physical memory map */ kcore_update_ram(); - hotplug_memory_notifier(kcore_callback, 0); + register_hotmemory_notifier(&kcore_callback_nb); return 0; } diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 1efaaa19c4f3..5aa847a603c0 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -11,6 +11,7 @@ #include <linux/swap.h> #include <linux/vmstat.h> #include <linux/atomic.h> +#include <linux/vmalloc.h> #include <asm/page.h> #include <asm/pgtable.h> #include "internal.h" diff --git a/fs/proc/mmu.c b/fs/proc/mmu.c deleted file mode 100644 index 8ae221dfd010..000000000000 --- a/fs/proc/mmu.c +++ /dev/null @@ -1,60 +0,0 @@ -/* mmu.c: mmu memory info files - * - * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include <linux/spinlock.h> -#include <linux/vmalloc.h> -#include <linux/highmem.h> -#include <asm/pgtable.h> -#include "internal.h" - -void get_vmalloc_info(struct vmalloc_info *vmi) -{ - struct vm_struct *vma; - unsigned long free_area_size; - unsigned long prev_end; - - vmi->used = 0; - - if (!vmlist) { - vmi->largest_chunk = VMALLOC_TOTAL; - } - else { - vmi->largest_chunk = 0; - - prev_end = VMALLOC_START; - - read_lock(&vmlist_lock); - - for (vma = vmlist; vma; vma = vma->next) { - unsigned long addr = (unsigned long) vma->addr; - - /* - * Some archs keep another range for modules in vmlist - */ - if (addr < VMALLOC_START) - continue; - if (addr >= VMALLOC_END) - break; - - vmi->used += vma->size; - - free_area_size = addr - prev_end; - if (vmi->largest_chunk < free_area_size) - vmi->largest_chunk = free_area_size; - - prev_end = vma->size + addr; - } - - if (VMALLOC_END - prev_end > vmi->largest_chunk) - vmi->largest_chunk = VMALLOC_END - prev_end; - - read_unlock(&vmlist_lock); - } -} diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 3e636d864d56..3240a497926c 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -688,10 +688,41 @@ const struct file_operations proc_tid_smaps_operations = { .release = seq_release_private, }; +enum clear_refs_types { + CLEAR_REFS_ALL = 1, + CLEAR_REFS_ANON, + CLEAR_REFS_MAPPED, + CLEAR_REFS_SOFT_DIRTY, + CLEAR_REFS_LAST, +}; + +struct clear_refs_private { + struct vm_area_struct *vma; + enum clear_refs_types type; +}; + +static inline void clear_soft_dirty(struct vm_area_struct *vma, + unsigned long addr, pte_t *pte) +{ +#ifdef CONFIG_MEM_SOFT_DIRTY + /* + * The soft-dirty tracker uses #PF-s to catch writes + * to pages, so write-protect the pte as well. See the + * Documentation/vm/soft-dirty.txt for full description + * of how soft-dirty works. + */ + pte_t ptent = *pte; + ptent = pte_wrprotect(ptent); + ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY); + set_pte_at(vma->vm_mm, addr, pte, ptent); +#endif +} + static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { - struct vm_area_struct *vma = walk->private; + struct clear_refs_private *cp = walk->private; + struct vm_area_struct *vma = cp->vma; pte_t *pte, ptent; spinlock_t *ptl; struct page *page; @@ -706,6 +737,11 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, if (!pte_present(ptent)) continue; + if (cp->type == CLEAR_REFS_SOFT_DIRTY) { + clear_soft_dirty(vma, addr, pte); + continue; + } + page = vm_normal_page(vma, addr, ptent); if (!page) continue; @@ -719,10 +755,6 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, return 0; } -#define CLEAR_REFS_ALL 1 -#define CLEAR_REFS_ANON 2 -#define CLEAR_REFS_MAPPED 3 - static ssize_t clear_refs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { @@ -730,7 +762,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, char buffer[PROC_NUMBUF]; struct mm_struct *mm; struct vm_area_struct *vma; - int type; + enum clear_refs_types type; + int itype; int rv; memset(buffer, 0, sizeof(buffer)); @@ -738,23 +771,28 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, count = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count)) return -EFAULT; - rv = kstrtoint(strstrip(buffer), 10, &type); + rv = kstrtoint(strstrip(buffer), 10, &itype); if (rv < 0) return rv; - if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) + type = (enum clear_refs_types)itype; + if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST) return -EINVAL; task = get_proc_task(file_inode(file)); if (!task) return -ESRCH; mm = get_task_mm(task); if (mm) { + struct clear_refs_private cp = { + .type = type, + }; struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range, .mm = mm, + .private = &cp, }; down_read(&mm->mmap_sem); for (vma = mm->mmap; vma; vma = vma->vm_next) { - clear_refs_walk.private = vma; + cp.vma = vma; if (is_vm_hugetlb_page(vma)) continue; /* @@ -794,6 +832,7 @@ typedef struct { struct pagemapread { int pos, len; pagemap_entry_t *buffer; + bool v2; }; #define PAGEMAP_WALK_SIZE (PMD_SIZE) @@ -807,14 +846,17 @@ struct pagemapread { #define PM_PSHIFT_BITS 6 #define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS) #define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET) -#define PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK) +#define __PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK) #define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1) #define PM_PFRAME(x) ((x) & PM_PFRAME_MASK) +/* in pagemap2 pshift bits are occupied with more status bits */ +#define PM_STATUS2(v2, x) (__PM_PSHIFT(v2 ? x : PAGE_SHIFT)) +#define __PM_SOFT_DIRTY (1LL) #define PM_PRESENT PM_STATUS(4LL) #define PM_SWAP PM_STATUS(2LL) #define PM_FILE PM_STATUS(1LL) -#define PM_NOT_PRESENT PM_PSHIFT(PAGE_SHIFT) +#define PM_NOT_PRESENT(v2) PM_STATUS2(v2, 0) #define PM_END_OF_BUFFER 1 static inline pagemap_entry_t make_pme(u64 val) @@ -837,7 +879,7 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end, struct pagemapread *pm = walk->private; unsigned long addr; int err = 0; - pagemap_entry_t pme = make_pme(PM_NOT_PRESENT); + pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); for (addr = start; addr < end; addr += PAGE_SIZE) { err = add_to_pagemap(addr, &pme, pm); @@ -847,11 +889,12 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end, return err; } -static void pte_to_pagemap_entry(pagemap_entry_t *pme, +static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, struct vm_area_struct *vma, unsigned long addr, pte_t pte) { u64 frame, flags; struct page *page = NULL; + int flags2 = 0; if (pte_present(pte)) { frame = pte_pfn(pte); @@ -866,19 +909,21 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, if (is_migration_entry(entry)) page = migration_entry_to_page(entry); } else { - *pme = make_pme(PM_NOT_PRESENT); + *pme = make_pme(PM_NOT_PRESENT(pm->v2)); return; } if (page && !PageAnon(page)) flags |= PM_FILE; + if (pte_soft_dirty(pte)) + flags2 |= __PM_SOFT_DIRTY; - *pme = make_pme(PM_PFRAME(frame) | PM_PSHIFT(PAGE_SHIFT) | flags); + *pme = make_pme(PM_PFRAME(frame) | PM_STATUS2(pm->v2, flags2) | flags); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE -static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, - pmd_t pmd, int offset) +static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, + pmd_t pmd, int offset, int pmd_flags2) { /* * Currently pmd for thp is always present because thp can not be @@ -887,13 +932,13 @@ static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, */ if (pmd_present(pmd)) *pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset) - | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT); + | PM_STATUS2(pm->v2, pmd_flags2) | PM_PRESENT); else - *pme = make_pme(PM_NOT_PRESENT); + *pme = make_pme(PM_NOT_PRESENT(pm->v2)); } #else -static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, - pmd_t pmd, int offset) +static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, + pmd_t pmd, int offset, int pmd_flags2) { } #endif @@ -905,17 +950,20 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct pagemapread *pm = walk->private; pte_t *pte; int err = 0; - pagemap_entry_t pme = make_pme(PM_NOT_PRESENT); + pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); /* find the first VMA at or above 'addr' */ vma = find_vma(walk->mm, addr); if (vma && pmd_trans_huge_lock(pmd, vma) == 1) { + int pmd_flags2; + + pmd_flags2 = (pmd_soft_dirty(*pmd) ? __PM_SOFT_DIRTY : 0); for (; addr != end; addr += PAGE_SIZE) { unsigned long offset; offset = (addr & ~PAGEMAP_WALK_MASK) >> PAGE_SHIFT; - thp_pmd_to_pagemap_entry(&pme, *pmd, offset); + thp_pmd_to_pagemap_entry(&pme, pm, *pmd, offset, pmd_flags2); err = add_to_pagemap(addr, &pme, pm); if (err) break; @@ -932,7 +980,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, * and need a new, higher one */ if (vma && (addr >= vma->vm_end)) { vma = find_vma(walk->mm, addr); - pme = make_pme(PM_NOT_PRESENT); + pme = make_pme(PM_NOT_PRESENT(pm->v2)); } /* check that 'vma' actually covers this address, @@ -940,7 +988,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, if (vma && (vma->vm_start <= addr) && !is_vm_hugetlb_page(vma)) { pte = pte_offset_map(pmd, addr); - pte_to_pagemap_entry(&pme, vma, addr, *pte); + pte_to_pagemap_entry(&pme, pm, vma, addr, *pte); /* unmap before userspace copy */ pte_unmap(pte); } @@ -955,14 +1003,14 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, } #ifdef CONFIG_HUGETLB_PAGE -static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, +static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, pte_t pte, int offset) { if (pte_present(pte)) *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset) - | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT); + | PM_STATUS2(pm->v2, 0) | PM_PRESENT); else - *pme = make_pme(PM_NOT_PRESENT); + *pme = make_pme(PM_NOT_PRESENT(pm->v2)); } /* This function walks within one hugetlb entry in the single call */ @@ -976,7 +1024,7 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask, for (; addr != end; addr += PAGE_SIZE) { int offset = (addr & ~hmask) >> PAGE_SHIFT; - huge_pte_to_pagemap_entry(&pme, *pte, offset); + huge_pte_to_pagemap_entry(&pme, pm, *pte, offset); err = add_to_pagemap(addr, &pme, pm); if (err) return err; @@ -1012,8 +1060,8 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask, * determine which areas of memory are actually mapped and llseek to * skip over unmapped regions. */ -static ssize_t pagemap_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) +static ssize_t do_pagemap_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos, bool v2) { struct task_struct *task = get_proc_task(file_inode(file)); struct mm_struct *mm; @@ -1038,6 +1086,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, if (!count) goto out_task; + pm.v2 = v2; pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); ret = -ENOMEM; @@ -1110,10 +1159,27 @@ out: return ret; } +static ssize_t pagemap_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + return do_pagemap_read(file, buf, count, ppos, false); +} + const struct file_operations proc_pagemap_operations = { .llseek = mem_lseek, /* borrow this */ .read = pagemap_read, }; + +static ssize_t pagemap2_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + return do_pagemap_read(file, buf, count, ppos, true); +} + +const struct file_operations proc_pagemap2_operations = { + .llseek = mem_lseek, /* borrow this */ + .read = pagemap2_read, +}; #endif /* CONFIG_PROC_PAGE_MONITOR */ #ifdef CONFIG_NUMA diff --git a/fs/read_write.c b/fs/read_write.c index d0f0872d6b5c..7eb7ef34b4eb 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -9,6 +9,7 @@ #include <linux/fcntl.h> #include <linux/file.h> #include <linux/uio.h> +#include <linux/aio.h> #include <linux/fsnotify.h> #include <linux/security.h> #include <linux/export.h> @@ -326,16 +327,6 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count return count > MAX_RW_COUNT ? MAX_RW_COUNT : count; } -static void wait_on_retry_sync_kiocb(struct kiocb *iocb) -{ - set_current_state(TASK_UNINTERRUPTIBLE); - if (!kiocbIsKicked(iocb)) - schedule(); - else - kiocbClearKicked(iocb); - __set_current_state(TASK_RUNNING); -} - ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) { struct iovec iov = { .iov_base = buf, .iov_len = len }; @@ -347,13 +338,7 @@ ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *pp kiocb.ki_left = len; kiocb.ki_nbytes = len; - for (;;) { - ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos); - if (ret != -EIOCBRETRY) - break; - wait_on_retry_sync_kiocb(&kiocb); - } - + ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos); if (-EIOCBQUEUED == ret) ret = wait_on_sync_kiocb(&kiocb); *ppos = kiocb.ki_pos; @@ -403,13 +388,7 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof kiocb.ki_left = len; kiocb.ki_nbytes = len; - for (;;) { - ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos); - if (ret != -EIOCBRETRY) - break; - wait_on_retry_sync_kiocb(&kiocb); - } - + ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos); if (-EIOCBQUEUED == ret) ret = wait_on_sync_kiocb(&kiocb); *ppos = kiocb.ki_pos; @@ -589,13 +568,7 @@ static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, kiocb.ki_left = len; kiocb.ki_nbytes = len; - for (;;) { - ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos); - if (ret != -EIOCBRETRY) - break; - wait_on_retry_sync_kiocb(&kiocb); - } - + ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos); if (ret == -EIOCBQUEUED) ret = wait_on_sync_kiocb(&kiocb); *ppos = kiocb.ki_pos; diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index ea5061fd4f3e..77d6d47abc83 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -18,6 +18,7 @@ #include <linux/writeback.h> #include <linux/quotaops.h> #include <linux/swap.h> +#include <linux/aio.h> int reiserfs_commit_write(struct file *f, struct page *page, unsigned from, unsigned to); diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index f12189d2db1d..14374530784c 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -50,6 +50,7 @@ */ #include "ubifs.h" +#include <linux/aio.h> #include <linux/mount.h> #include <linux/namei.h> #include <linux/slab.h> diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 7a12e48ad819..b6d15d349810 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -38,6 +38,7 @@ #include <linux/slab.h> #include <linux/crc-itu-t.h> #include <linux/mpage.h> +#include <linux/aio.h> #include "udf_i.h" #include "udf_sb.h" diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 3244c988d379..f64ee7130509 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -31,6 +31,7 @@ #include "xfs_vnodeops.h" #include "xfs_trace.h" #include "xfs_bmap.h" +#include <linux/aio.h> #include <linux/gfp.h> #include <linux/mpage.h> #include <linux/pagevec.h> @@ -379,7 +380,8 @@ xfs_imap_valid( STATIC void xfs_end_bio( struct bio *bio, - int error) + int error, + struct batch_complete *batch) { xfs_ioend_t *ioend = bio->bi_private; diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 82b70bda9f47..cee0e42b5389 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1224,7 +1224,8 @@ _xfs_buf_ioend( STATIC void xfs_buf_bio_end_io( struct bio *bio, - int error) + int error, + struct batch_complete *batch) { xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 3800128d2171..baa88dd04190 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -36,6 +36,7 @@ #include "xfs_ioctl.h" #include "xfs_trace.h" +#include <linux/aio.h> #include <linux/dcache.h> #include <linux/falloc.h> #include <linux/pagevec.h> diff --git a/include/Kbuild b/include/Kbuild index 1dfd33e8d43b..bab1145bc7a7 100644 --- a/include/Kbuild +++ b/include/Kbuild @@ -1,5 +1,2 @@ # Top-level Makefile calls into asm-$(ARCH) # List only non-arch directories below - -header-y += video/ -header-y += scsi/ diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h new file mode 100644 index 000000000000..d06079c774a0 --- /dev/null +++ b/include/asm-generic/hugetlb.h @@ -0,0 +1,40 @@ +#ifndef _ASM_GENERIC_HUGETLB_H +#define _ASM_GENERIC_HUGETLB_H + +static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot) +{ + return mk_pte(page, pgprot); +} + +static inline int huge_pte_write(pte_t pte) +{ + return pte_write(pte); +} + +static inline int huge_pte_dirty(pte_t pte) +{ + return pte_dirty(pte); +} + +static inline pte_t huge_pte_mkwrite(pte_t pte) +{ + return pte_mkwrite(pte); +} + +static inline pte_t huge_pte_mkdirty(pte_t pte) +{ + return pte_mkdirty(pte); +} + +static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot) +{ + return pte_modify(pte, newprot); +} + +static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + pte_clear(mm, addr, ptep); +} + +#endif /* _ASM_GENERIC_HUGETLB_H */ diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index bfd87685fc1f..347fb7bfcbe2 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -7,6 +7,16 @@ #include <linux/mm_types.h> #include <linux/bug.h> +/* + * On almost all architectures and configurations, 0 can be used as the + * upper ceiling to free_pgtables(): on many architectures it has the same + * effect as using TASK_SIZE. However, there is one configuration which + * must impose a more careful limit, to avoid freeing kernel pgtables. + */ +#ifndef USER_PGTABLES_CEILING +#define USER_PGTABLES_CEILING 0UL +#endif + #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, @@ -386,6 +396,28 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm, #define arch_start_context_switch(prev) do {} while (0) #endif +#ifndef CONFIG_HAVE_ARCH_SOFT_DIRTY +static inline int pte_soft_dirty(pte_t pte) +{ + return 0; +} + +static inline int pmd_soft_dirty(pmd_t pmd) +{ + return 0; +} + +static inline pte_t pte_mksoft_dirty(pte_t pte) +{ + return pte; +} + +static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) +{ + return pmd; +} +#endif + #ifndef __HAVE_PFNMAP_TRACKING /* * Interfaces that can be used by architecture code to keep track of diff --git a/include/linux/aio.h b/include/linux/aio.h index 31ff6dba4872..a7e4c595825e 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -6,94 +6,38 @@ #include <linux/aio_abi.h> #include <linux/uio.h> #include <linux/rcupdate.h> - #include <linux/atomic.h> - -#define AIO_MAXSEGS 4 -#define AIO_KIOGRP_NR_ATOMIC 8 +#include <linux/batch_complete.h> struct kioctx; +struct kiocb; +struct batch_complete; -/* Notes on cancelling a kiocb: - * If a kiocb is cancelled, aio_complete may return 0 to indicate - * that cancel has not yet disposed of the kiocb. All cancel - * operations *must* call aio_put_req to dispose of the kiocb - * to guard against races with the completion code. - */ -#define KIOCB_C_CANCELLED 0x01 -#define KIOCB_C_COMPLETE 0x02 - -#define KIOCB_SYNC_KEY (~0U) +#define KIOCB_KEY 0 -/* ki_flags bits */ /* - * This may be used for cancel/retry serialization in the future, but - * for now it's unused and we probably don't want modules to even - * think they can use it. + * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either + * cancelled or completed (this makes a certain amount of sense because + * successful cancellation - io_cancel() - does deliver the completion to + * userspace). + * + * And since most things don't implement kiocb cancellation and we'd really like + * kiocb completion to be lockless when possible, we use ki_cancel to + * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED + * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel(). */ -/* #define KIF_LOCKED 0 */ -#define KIF_KICKED 1 -#define KIF_CANCELLED 2 - -#define kiocbTryLock(iocb) test_and_set_bit(KIF_LOCKED, &(iocb)->ki_flags) -#define kiocbTryKick(iocb) test_and_set_bit(KIF_KICKED, &(iocb)->ki_flags) +#define KIOCB_CANCELLED ((void *) (~0ULL)) -#define kiocbSetLocked(iocb) set_bit(KIF_LOCKED, &(iocb)->ki_flags) -#define kiocbSetKicked(iocb) set_bit(KIF_KICKED, &(iocb)->ki_flags) -#define kiocbSetCancelled(iocb) set_bit(KIF_CANCELLED, &(iocb)->ki_flags) +typedef int (kiocb_cancel_fn)(struct kiocb *, struct io_event *); -#define kiocbClearLocked(iocb) clear_bit(KIF_LOCKED, &(iocb)->ki_flags) -#define kiocbClearKicked(iocb) clear_bit(KIF_KICKED, &(iocb)->ki_flags) -#define kiocbClearCancelled(iocb) clear_bit(KIF_CANCELLED, &(iocb)->ki_flags) - -#define kiocbIsLocked(iocb) test_bit(KIF_LOCKED, &(iocb)->ki_flags) -#define kiocbIsKicked(iocb) test_bit(KIF_KICKED, &(iocb)->ki_flags) -#define kiocbIsCancelled(iocb) test_bit(KIF_CANCELLED, &(iocb)->ki_flags) - -/* is there a better place to document function pointer methods? */ -/** - * ki_retry - iocb forward progress callback - * @kiocb: The kiocb struct to advance by performing an operation. - * - * This callback is called when the AIO core wants a given AIO operation - * to make forward progress. The kiocb argument describes the operation - * that is to be performed. As the operation proceeds, perhaps partially, - * ki_retry is expected to update the kiocb with progress made. Typically - * ki_retry is set in the AIO core and it itself calls file_operations - * helpers. - * - * ki_retry's return value determines when the AIO operation is completed - * and an event is generated in the AIO event ring. Except the special - * return values described below, the value that is returned from ki_retry - * is transferred directly into the completion ring as the operation's - * resulting status. Once this has happened ki_retry *MUST NOT* reference - * the kiocb pointer again. - * - * If ki_retry returns -EIOCBQUEUED it has made a promise that aio_complete() - * will be called on the kiocb pointer in the future. The AIO core will - * not ask the method again -- ki_retry must ensure forward progress. - * aio_complete() must be called once and only once in the future, multiple - * calls may result in undefined behaviour. - * - * If ki_retry returns -EIOCBRETRY it has made a promise that kick_iocb() - * will be called on the kiocb pointer in the future. This may happen - * through generic helpers that associate kiocb->ki_wait with a wait - * queue head that ki_retry uses via current->io_wait. It can also happen - * with custom tracking and manual calls to kick_iocb(), though that is - * discouraged. In either case, kick_iocb() must be called once and only - * once. ki_retry must ensure forward progress, the AIO core will wait - * indefinitely for kick_iocb() to be called. - */ struct kiocb { - struct list_head ki_run_list; - unsigned long ki_flags; - int ki_users; - unsigned ki_key; /* id of this request */ + struct rb_node ki_node; + + atomic_t ki_users; struct file *ki_filp; - struct kioctx *ki_ctx; /* may be NULL for sync ops */ - int (*ki_cancel)(struct kiocb *, struct io_event *); - ssize_t (*ki_retry)(struct kiocb *); + struct kioctx *ki_ctx; /* NULL for sync ops */ + kiocb_cancel_fn *ki_cancel; void (*ki_dtor)(struct kiocb *); union { @@ -102,6 +46,9 @@ struct kiocb { } ki_obj; __u64 ki_user_data; /* user's data for completion */ + long ki_res; + long ki_res2; + loff_t ki_pos; void *private; @@ -117,7 +64,6 @@ struct kiocb { struct list_head ki_list; /* the aio core uses this * for cancellation */ - struct list_head ki_batch; /* batch allocation */ /* * If the aio_resfd field of the userspace iocb is not zero, @@ -128,108 +74,55 @@ struct kiocb { static inline bool is_sync_kiocb(struct kiocb *kiocb) { - return kiocb->ki_key == KIOCB_SYNC_KEY; + return kiocb->ki_ctx == NULL; } static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) { *kiocb = (struct kiocb) { - .ki_users = 1, - .ki_key = KIOCB_SYNC_KEY, + .ki_users = ATOMIC_INIT(1), + .ki_ctx = NULL, .ki_filp = filp, .ki_obj.tsk = current, }; } -#define AIO_RING_MAGIC 0xa10a10a1 -#define AIO_RING_COMPAT_FEATURES 1 -#define AIO_RING_INCOMPAT_FEATURES 0 -struct aio_ring { - unsigned id; /* kernel internal index number */ - unsigned nr; /* number of io_events */ - unsigned head; - unsigned tail; - - unsigned magic; - unsigned compat_features; - unsigned incompat_features; - unsigned header_length; /* size of aio_ring */ - - - struct io_event io_events[0]; -}; /* 128 bytes + ring size */ - -#define AIO_RING_PAGES 8 -struct aio_ring_info { - unsigned long mmap_base; - unsigned long mmap_size; - - struct page **ring_pages; - spinlock_t ring_lock; - long nr_pages; - - unsigned nr, tail; - - struct page *internal_pages[AIO_RING_PAGES]; -}; - -static inline unsigned aio_ring_avail(struct aio_ring_info *info, - struct aio_ring *ring) -{ - return (ring->head + info->nr - 1 - ring->tail) % info->nr; -} - -struct kioctx { - atomic_t users; - int dead; - struct mm_struct *mm; - - /* This needs improving */ - unsigned long user_id; - struct hlist_node list; - - wait_queue_head_t wait; - - spinlock_t ctx_lock; - - int reqs_active; - struct list_head active_reqs; /* used for cancellation */ - struct list_head run_list; /* used for kicked reqs */ - - /* sys_io_setup currently limits this to an unsigned int */ - unsigned max_reqs; - - struct aio_ring_info ring_info; - - struct delayed_work wq; - - struct rcu_head rcu_head; -}; - /* prototypes */ -extern unsigned aio_max_size; - #ifdef CONFIG_AIO extern ssize_t wait_on_sync_kiocb(struct kiocb *iocb); -extern int aio_put_req(struct kiocb *iocb); -extern void kick_iocb(struct kiocb *iocb); -extern int aio_complete(struct kiocb *iocb, long res, long res2); +extern void aio_put_req(struct kiocb *iocb); +extern void batch_complete_aio(struct batch_complete *batch); +extern void aio_complete_batch(struct kiocb *iocb, long res, long res2, + struct batch_complete *batch); struct mm_struct; extern void exit_aio(struct mm_struct *mm); extern long do_io_submit(aio_context_t ctx_id, long nr, struct iocb __user *__user *iocbpp, bool compat); +void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel); #else static inline ssize_t wait_on_sync_kiocb(struct kiocb *iocb) { return 0; } -static inline int aio_put_req(struct kiocb *iocb) { return 0; } -static inline void kick_iocb(struct kiocb *iocb) { } -static inline int aio_complete(struct kiocb *iocb, long res, long res2) { return 0; } +static inline void aio_put_req(struct kiocb *iocb) { } + +static inline void batch_complete_aio(struct batch_complete *batch) { } +static inline void aio_complete_batch(struct kiocb *iocb, long res, long res2, + struct batch_complete *batch) +{ + return; +} struct mm_struct; static inline void exit_aio(struct mm_struct *mm) { } static inline long do_io_submit(aio_context_t ctx_id, long nr, struct iocb __user * __user *iocbpp, bool compat) { return 0; } +static inline void kiocb_set_cancel_fn(struct kiocb *req, + kiocb_cancel_fn *cancel) { } #endif /* CONFIG_AIO */ +static inline void aio_complete(struct kiocb *iocb, long res, long res2) +{ + aio_complete_batch(iocb, res, res2, NULL); +} + static inline struct kiocb *list_kiocb(struct list_head *h) { return list_entry(h, struct kiocb, ki_list); diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h index f7f1d7169b11..6fd5cc80f62f 100644 --- a/include/linux/balloon_compaction.h +++ b/include/linux/balloon_compaction.h @@ -213,8 +213,15 @@ static inline bool balloon_compaction_check(void) return true; } +static inline void balloon_event_count(enum vm_event_item item) +{ + count_vm_event(item); +} #else /* !CONFIG_BALLOON_COMPACTION */ +/* A macro, to avoid generating references to the undefined COMPACTBALLOON* */ +#define balloon_event_count(item) do { } while (0) + static inline void *balloon_mapping_alloc(void *balloon_device, const struct address_space_operations *a_ops) { diff --git a/include/linux/batch_complete.h b/include/linux/batch_complete.h new file mode 100644 index 000000000000..8167a9d306fb --- /dev/null +++ b/include/linux/batch_complete.h @@ -0,0 +1,23 @@ +#ifndef _LINUX_BATCH_COMPLETE_H +#define _LINUX_BATCH_COMPLETE_H + +#include <linux/rbtree.h> + +/* + * Common stuff to the aio and block code for batch completion. Everything + * important is elsewhere: + */ + +struct bio; + +struct bio_list { + struct bio *head; + struct bio *tail; +}; + +struct batch_complete { + struct bio_list bio; + struct rb_root kiocb; +}; + +#endif diff --git a/include/linux/bio.h b/include/linux/bio.h index ef24466d8f82..5db8a51eebb1 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -24,6 +24,7 @@ #include <linux/mempool.h> #include <linux/ioprio.h> #include <linux/bug.h> +#include <linux/batch_complete.h> #ifdef CONFIG_BLOCK @@ -69,6 +70,8 @@ #define bio_sectors(bio) ((bio)->bi_size >> 9) #define bio_end_sector(bio) ((bio)->bi_sector + bio_sectors((bio))) +void bio_endio_batch(struct bio *bio, int error, struct batch_complete *batch); + static inline unsigned int bio_cur_bytes(struct bio *bio) { if (bio->bi_vcnt) @@ -252,7 +255,25 @@ static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask) } -extern void bio_endio(struct bio *, int); +/** + * bio_endio - end I/O on a bio + * @bio: bio + * @error: error, if any + * + * Description: + * bio_endio() will end I/O on the whole bio. bio_endio() is the + * preferred way to end I/O on a bio, it takes care of clearing + * BIO_UPTODATE on error. @error is 0 on success, and and one of the + * established -Exxxx (-EIO, for instance) error values in case + * something went wrong. No one should call bi_end_io() directly on a + * bio unless they own it and thus know that it has an end_io + * function. + **/ +static inline void bio_endio(struct bio *bio, int error) +{ + bio_endio_batch(bio, error, NULL); +} + struct request_queue; extern int bio_phys_segments(struct request_queue *, struct bio *); @@ -404,10 +425,6 @@ static inline bool bio_mergeable(struct bio *bio) * member of the bio. The bio_list also caches the last list member to allow * fast access to the tail. */ -struct bio_list { - struct bio *head; - struct bio *tail; -}; static inline int bio_list_empty(const struct bio_list *bl) { @@ -554,6 +571,15 @@ struct biovec_slab { */ #define BIO_SPLIT_ENTRIES 2 +static inline void batch_complete_init(struct batch_complete *batch) +{ + bio_list_init(&batch->bio); + batch->kiocb = RB_ROOT; +} + +void batch_complete(struct batch_complete *batch); + + #if defined(CONFIG_BLK_DEV_INTEGRITY) #define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)])) @@ -580,7 +606,7 @@ extern int bio_integrity_enabled(struct bio *bio); extern int bio_integrity_set_tag(struct bio *, void *, unsigned int); extern int bio_integrity_get_tag(struct bio *, void *, unsigned int); extern int bio_integrity_prep(struct bio *); -extern void bio_integrity_endio(struct bio *, int); +extern void bio_integrity_endio(struct bio *, int, struct batch_complete *); extern void bio_integrity_advance(struct bio *, unsigned int); extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int); extern void bio_integrity_split(struct bio *, struct bio_pair *, int); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index e8de67053cd4..9d3cafa6bbcd 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -16,7 +16,8 @@ struct page; struct block_device; struct io_context; struct cgroup_subsys_state; -typedef void (bio_end_io_t) (struct bio *, int); +struct batch_complete; +typedef void (bio_end_io_t) (struct bio *, int, struct batch_complete *); typedef void (bio_destructor_t) (struct bio *); /* @@ -42,6 +43,7 @@ struct bio { * top bits priority */ + short bi_error; unsigned short bi_vcnt; /* how many bio_vec's */ unsigned short bi_idx; /* current index into bvl_vec */ @@ -111,13 +113,14 @@ struct bio { #define BIO_FS_INTEGRITY 9 /* fs owns integrity data, not block layer */ #define BIO_QUIET 10 /* Make BIO Quiet */ #define BIO_MAPPED_INTEGRITY 11/* integrity metadata has been remapped */ +#define BIO_SNAP_STABLE 12 /* bio data must be snapshotted during write */ /* * Flags starting here get preserved by bio_reset() - this includes * BIO_POOL_IDX() */ -#define BIO_RESET_BITS 12 -#define BIO_OWNS_VEC 12 /* bio_free() should free bvec */ +#define BIO_RESET_BITS 13 +#define BIO_OWNS_VEC 13 /* bio_free() should free bvec */ #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 89d89c7162aa..07aa5f67c9a1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -883,7 +883,8 @@ extern struct request *blk_fetch_request(struct request_queue *q); * This prevents code duplication in drivers. */ extern bool blk_update_request(struct request *rq, int error, - unsigned int nr_bytes); + unsigned int nr_bytes, + struct batch_complete *batch); extern bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes); extern void blk_end_request_all(struct request *rq, int error); @@ -891,10 +892,17 @@ extern bool blk_end_request_cur(struct request *rq, int error); extern bool blk_end_request_err(struct request *rq, int error); extern bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes); -extern void __blk_end_request_all(struct request *rq, int error); extern bool __blk_end_request_cur(struct request *rq, int error); extern bool __blk_end_request_err(struct request *rq, int error); +extern void blk_end_request_all_batch(struct request *rq, int error, + struct batch_complete *batch); + +static inline void __blk_end_request_all(struct request *rq, int error) +{ + blk_end_request_all_batch(rq, error, NULL); +} + extern void blk_complete_request(struct request *); extern void __blk_complete_request(struct request *); extern void blk_abort_request(struct request *); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 5afc4f94d110..4c16c4a88d47 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -181,6 +181,7 @@ void ll_rw_block(int, int, struct buffer_head * bh[]); int sync_dirty_buffer(struct buffer_head *bh); int __sync_dirty_buffer(struct buffer_head *bh, int rw); void write_dirty_buffer(struct buffer_head *bh, int rw); +int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags); int submit_bh(int, struct buffer_head *); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 2b8dd03bddd5..5f0c96112db1 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -28,6 +28,7 @@ struct cgroup_subsys; struct inode; struct cgroup; struct css_id; +struct eventfd_ctx; extern int cgroup_init_early(void); extern int cgroup_init(void); @@ -823,13 +824,6 @@ void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css); struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id); -/* - * Get a cgroup whose id is greater than or equal to id under tree of root. - * Returning a cgroup_subsys_state or NULL. - */ -struct cgroup_subsys_state *css_get_next(struct cgroup_subsys *ss, int id, - struct cgroup_subsys_state *root, int *foundid); - /* Returns true if root is ancestor of cg */ bool css_is_ancestor(struct cgroup_subsys_state *cg, const struct cgroup_subsys_state *root); diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h index 42e55deee757..4ce9056b31a8 100644 --- a/include/linux/cleancache.h +++ b/include/linux/cleancache.h @@ -33,7 +33,7 @@ struct cleancache_ops { void (*invalidate_fs)(int); }; -extern struct cleancache_ops +extern struct cleancache_ops * cleancache_register_ops(struct cleancache_ops *ops); extern void __cleancache_init_fs(struct super_block *); extern void __cleancache_init_shared_fs(char *, struct super_block *); @@ -42,9 +42,9 @@ extern void __cleancache_put_page(struct page *); extern void __cleancache_invalidate_page(struct address_space *, struct page *); extern void __cleancache_invalidate_inode(struct address_space *); extern void __cleancache_invalidate_fs(struct super_block *); -extern int cleancache_enabled; #ifdef CONFIG_CLEANCACHE +#define cleancache_enabled (1) static inline bool cleancache_fs_enabled(struct page *page) { return page->mapping->host->i_sb->cleancache_poolid >= 0; diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index 68b162d92254..842de225055f 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -13,7 +13,7 @@ #define __must_check __attribute__((warn_unused_result)) #define __compiler_offsetof(a,b) __builtin_offsetof(a,b) -#if GCC_VERSION >= 40100 +#if GCC_VERSION >= 40100 && GCC_VERSION < 40600 # define __compiletime_object_size(obj) __builtin_object_size(obj, 0) #endif diff --git a/include/linux/console.h b/include/linux/console.h index d4101cc467c4..c5448f37cd6c 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -142,6 +142,7 @@ struct console { for (con = console_drivers; con != NULL; con = con->next) extern int console_set_on_cmdline; +extern struct console *early_console; extern int add_preferred_console(char *name, int idx, char *options); extern int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options); diff --git a/include/linux/ctype.h b/include/linux/ctype.h index 8acfe312f947..653589e3e30e 100644 --- a/include/linux/ctype.h +++ b/include/linux/ctype.h @@ -61,4 +61,10 @@ static inline char _tolower(const char c) return c | 0x20; } +/* Fast check for octal digit */ +static inline int isodigit(const char c) +{ + return c >= '0' && c <= '7'; +} + #endif diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h index 3bd46f766751..21ca773f77bf 100644 --- a/include/linux/debug_locks.h +++ b/include/linux/debug_locks.h @@ -27,7 +27,7 @@ extern int debug_locks_off(void); \ if (!oops_in_progress && unlikely(c)) { \ if (debug_locks_off() && !debug_locks_silent) \ - WARN_ON(1); \ + WARN(1, "DEBUG_LOCKS_WARN_ON(%s)", #c); \ __ret = 1; \ } \ __ret; \ diff --git a/include/linux/decompress/unlz4.h b/include/linux/decompress/unlz4.h new file mode 100644 index 000000000000..d5b68bf3ec92 --- /dev/null +++ b/include/linux/decompress/unlz4.h @@ -0,0 +1,10 @@ +#ifndef DECOMPRESS_UNLZ4_H +#define DECOMPRESS_UNLZ4_H + +int unlz4(unsigned char *inbuf, int len, + int(*fill)(void*, unsigned int), + int(*flush)(void*, unsigned int), + unsigned char *output, + int *pos, + void(*error)(char *x)); +#endif diff --git a/include/linux/dmi.h b/include/linux/dmi.h index f156cca25ad0..b6eb7a05d58e 100644 --- a/include/linux/dmi.h +++ b/include/linux/dmi.h @@ -99,6 +99,7 @@ extern const char * dmi_get_system_info(int field); extern const struct dmi_device * dmi_find_device(int type, const char *name, const struct dmi_device *from); extern void dmi_scan_machine(void); +extern void dmi_set_dump_stack_arch_desc(void); extern bool dmi_get_date(int field, int *yearp, int *monthp, int *dayp); extern int dmi_name_in_vendors(const char *str); extern int dmi_name_in_serial(const char *str); @@ -114,6 +115,7 @@ static inline const char * dmi_get_system_info(int field) { return NULL; } static inline const struct dmi_device * dmi_find_device(int type, const char *name, const struct dmi_device *from) { return NULL; } static inline void dmi_scan_machine(void) { return; } +static inline void dmi_set_dump_stack_arch_desc(void) { } static inline bool dmi_get_date(int field, int *yearp, int *monthp, int *dayp) { if (yearp) diff --git a/include/linux/errno.h b/include/linux/errno.h index f6bf082d4d4f..89627b9187f9 100644 --- a/include/linux/errno.h +++ b/include/linux/errno.h @@ -28,6 +28,5 @@ #define EBADTYPE 527 /* Type not supported by server */ #define EJUKEBOX 528 /* Request initiated, but will not complete before timeout */ #define EIOCBQUEUED 529 /* iocb queued, will get completion event */ -#define EIOCBRETRY 530 /* iocb queued, will trigger a retry */ #endif diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 5b9b5b317180..41b223a59a63 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -85,6 +85,17 @@ enum fid_type { FILEID_NILFS_WITH_PARENT = 0x62, /* + * 32 bit generation number, 40 bit i_pos. + */ + FILEID_FAT_WITHOUT_PARENT = 0x71, + + /* + * 32 bit generation number, 40 bit i_pos, + * 32 bit parent generation number, 40 bit parent i_pos + */ + FILEID_FAT_WITH_PARENT = 0x72, + + /* * Filesystems must not use 0xff file ID. */ FILEID_INVALID = 0xff, diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h index 30442547b9e6..8293262401de 100644 --- a/include/linux/frontswap.h +++ b/include/linux/frontswap.h @@ -14,7 +14,7 @@ struct frontswap_ops { }; extern bool frontswap_enabled; -extern struct frontswap_ops +extern struct frontswap_ops * frontswap_register_ops(struct frontswap_ops *ops); extern void frontswap_shrink(unsigned long); extern unsigned long frontswap_curr_pages(void); @@ -22,33 +22,19 @@ extern void frontswap_writethrough(bool); #define FRONTSWAP_HAS_EXCLUSIVE_GETS extern void frontswap_tmem_exclusive_gets(bool); -extern void __frontswap_init(unsigned type); +extern bool __frontswap_test(struct swap_info_struct *, pgoff_t); +extern void __frontswap_init(unsigned type, unsigned long *map); extern int __frontswap_store(struct page *page); extern int __frontswap_load(struct page *page); extern void __frontswap_invalidate_page(unsigned, pgoff_t); extern void __frontswap_invalidate_area(unsigned); #ifdef CONFIG_FRONTSWAP +#define frontswap_enabled (1) static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset) { - bool ret = false; - - if (frontswap_enabled && sis->frontswap_map) - ret = test_bit(offset, sis->frontswap_map); - return ret; -} - -static inline void frontswap_set(struct swap_info_struct *sis, pgoff_t offset) -{ - if (frontswap_enabled && sis->frontswap_map) - set_bit(offset, sis->frontswap_map); -} - -static inline void frontswap_clear(struct swap_info_struct *sis, pgoff_t offset) -{ - if (frontswap_enabled && sis->frontswap_map) - clear_bit(offset, sis->frontswap_map); + return __frontswap_test(sis, offset); } static inline void frontswap_map_set(struct swap_info_struct *p, @@ -71,14 +57,6 @@ static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset) return false; } -static inline void frontswap_set(struct swap_info_struct *sis, pgoff_t offset) -{ -} - -static inline void frontswap_clear(struct swap_info_struct *sis, pgoff_t offset) -{ -} - static inline void frontswap_map_set(struct swap_info_struct *p, unsigned long *map) { @@ -120,10 +98,10 @@ static inline void frontswap_invalidate_area(unsigned type) __frontswap_invalidate_area(type); } -static inline void frontswap_init(unsigned type) +static inline void frontswap_init(unsigned type, unsigned long *map) { if (frontswap_enabled) - __frontswap_init(type); + __frontswap_init(type, map); } #endif /* _LINUX_FRONTSWAP_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index b1f28b02ede6..8d47c9afa2d8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -675,9 +675,11 @@ static inline loff_t i_size_read(const struct inode *inode) static inline void i_size_write(struct inode *inode, loff_t i_size) { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) + preempt_disable(); write_seqcount_begin(&inode->i_size_seqcount); inode->i_size = i_size; write_seqcount_end(&inode->i_size_seqcount); + preempt_enable(); #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) preempt_disable(); inode->i_size = i_size; @@ -2444,7 +2446,7 @@ enum { DIO_SKIP_HOLES = 0x02, }; -void dio_end_io(struct bio *bio, int error); +void dio_end_io(struct bio *bio, int error, struct batch_complete *batch); ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, struct block_device *bdev, const struct iovec *iov, loff_t offset, diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index d5b0910d4961..4b2ee8d12f5e 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -157,7 +157,6 @@ struct fsnotify_group { struct inotify_group_private_data { spinlock_t idr_lock; struct idr idr; - u32 last_wd; struct user_struct *user; } inotify_data; #endif diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h index dd7c569aacad..661d374aeb2d 100644 --- a/include/linux/genalloc.h +++ b/include/linux/genalloc.h @@ -29,6 +29,10 @@ #ifndef __GENALLOC_H__ #define __GENALLOC_H__ + +struct device; +struct device_node; + /** * Allocation callback function type definition * @map: Pointer to bitmap @@ -105,4 +109,18 @@ extern unsigned long gen_pool_first_fit(unsigned long *map, unsigned long size, extern unsigned long gen_pool_best_fit(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, void *data); +extern struct gen_pool *devm_gen_pool_create(struct device *dev, + int min_alloc_order, int nid); +extern struct gen_pool *dev_get_gen_pool(struct device *dev); + +#ifdef CONFIG_OF +extern struct gen_pool *of_get_named_gen_pool(struct device_node *np, + const char *propname, int index); +#else +static inline struct gen_pool *of_get_named_gen_pool(struct device_node *np, + const char *propname, int index) +{ + return NULL; +} +#endif #endif /* __GENALLOC_H__ */ diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index c1d6555d2567..f3cec6856a4b 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -94,6 +94,11 @@ */ #define in_nmi() (preempt_count() & NMI_MASK) +/* + * Are we in nmi,irq context, or softirq context? + */ +#define in_serving_irq() (in_nmi() || in_irq() || in_serving_softirq()) + #if defined(CONFIG_PREEMPT_COUNT) # define PREEMPT_CHECK_OFFSET 1 #else diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index ee1c244a62a1..528454c2caa9 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -99,7 +99,11 @@ extern int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, extern int handle_pte_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *pte, pmd_t *pmd, unsigned int flags); -extern int split_huge_page(struct page *page); +extern int split_huge_page_to_list(struct page *page, struct list_head *list); +static inline int split_huge_page(struct page *page) +{ + return split_huge_page_to_list(page, NULL); +} extern void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address, pmd_t *pmd); #define split_huge_page_pmd(__vma, __address, __pmd) \ @@ -186,6 +190,11 @@ extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vm #define transparent_hugepage_enabled(__vma) 0 #define transparent_hugepage_flags 0UL +static inline int +split_huge_page_to_list(struct page *page, struct list_head *list) +{ + return 0; +} static inline int split_huge_page(struct page *page) { return 0; diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 16e4e9a643fb..3a62df310f2e 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -58,6 +58,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, int hugetlb_prefault(struct address_space *, struct vm_area_struct *); void hugetlb_report_meminfo(struct seq_file *); int hugetlb_report_node_meminfo(int, char *); +void hugetlb_show_meminfo(void); unsigned long hugetlb_total_pages(void); int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags); @@ -114,6 +115,9 @@ static inline void hugetlb_report_meminfo(struct seq_file *m) { } #define hugetlb_report_node_meminfo(n, buf) 0 +static inline void hugetlb_show_meminfo(void) +{ +} #define follow_huge_pmd(mm, addr, pmd, write) NULL #define follow_huge_pud(mm, addr, pud, write) NULL #define prepare_hugepage_range(file, addr, len) (-EINVAL) diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 95d0850584da..c2559847d7ee 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1318,6 +1318,17 @@ void vmbus_driver_unregister(struct hv_driver *hv_driver); 0x96, 0xae, 0x3a, 0x6e, 0xba, 0xcb, 0xa4, 0x40 \ } /* + * Synthetic Video GUID + * {DA0A7802-E377-4aac-8E77-0558EB1073F8} + */ +#define HV_SYNTHVID_GUID \ + .guid = { \ + 0x02, 0x78, 0x0a, 0xda, 0x77, 0xe3, 0xac, 0x4a, \ + 0x8e, 0x77, 0x05, 0x58, 0xeb, 0x10, 0x73, 0xf8 \ + } + + +/* * Common header for Hyper-V ICs */ diff --git a/include/linux/idr.h b/include/linux/idr.h index 6ece0583362a..871a213a8477 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -42,6 +42,7 @@ struct idr { struct idr_layer *id_free; int layers; /* only valid w/o concurrent changes */ int id_free_cnt; + int cur; /* current pos for cyclic allocation */ spinlock_t lock; }; @@ -75,6 +76,7 @@ struct idr { void *idr_find_slowpath(struct idr *idp, int id); void idr_preload(gfp_t gfp_mask); int idr_alloc(struct idr *idp, void *ptr, int start, int end, gfp_t gfp_mask); +int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask); int idr_for_each(struct idr *idp, int (*fn)(int id, void *p, void *data), void *data); void *idr_get_next(struct idr *idp, int *nextid); diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 85ac9b9b72a2..89b7c24a36e9 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -192,6 +192,10 @@ extern struct resource * __request_region(struct resource *, extern int __check_region(struct resource *, resource_size_t, resource_size_t); extern void __release_region(struct resource *, resource_size_t, resource_size_t); +#ifdef CONFIG_MEMORY_HOTREMOVE +extern int release_mem_region_adjustable(struct resource *, resource_size_t, + resource_size_t); +#endif static inline int __deprecated check_region(resource_size_t s, resource_size_t n) diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index ae221a7b5092..c4d870b0d5e6 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -43,8 +43,8 @@ struct ipc_namespace { size_t shm_ctlmax; size_t shm_ctlall; + unsigned long shm_tot; int shm_ctlmni; - int shm_tot; /* * Defines whether IPC_RMID is forced for _all_ shm segments regardless * of shmctl() diff --git a/include/linux/kernel.h b/include/linux/kernel.h index a661acf1888b..e96329ceb28c 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -791,6 +791,4 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } # define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD #endif -extern int do_sysinfo(struct sysinfo *info); - #endif diff --git a/include/linux/kmod.h b/include/linux/kmod.h index 5398d5807075..0555cc66a15b 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -67,16 +67,15 @@ struct subprocess_info { }; extern int -call_usermodehelper_fns(char *path, char **argv, char **envp, int wait, - int (*init)(struct subprocess_info *info, struct cred *new), - void (*cleanup)(struct subprocess_info *), void *data); +call_usermodehelper(char *path, char **argv, char **envp, int wait); -static inline int -call_usermodehelper(char *path, char **argv, char **envp, int wait) -{ - return call_usermodehelper_fns(path, argv, envp, wait, - NULL, NULL, NULL); -} +extern struct subprocess_info * +call_usermodehelper_setup(char *path, char **argv, char **envp, gfp_t gfp_mask, + int (*init)(struct subprocess_info *info, struct cred *new), + void (*cleanup)(struct subprocess_info *), void *data); + +extern int +call_usermodehelper_exec(struct subprocess_info *info, int wait); extern struct ctl_table usermodehelper_table[]; diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 8d816646f766..7dcef3317689 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -43,6 +43,7 @@ bool kthread_should_stop(void); bool kthread_should_park(void); bool kthread_freezable_should_stop(bool *was_frozen); void *kthread_data(struct task_struct *k); +void *probe_kthread_data(struct task_struct *k); int kthread_park(struct task_struct *k); void kthread_unpark(struct task_struct *k); void kthread_parkme(void); diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index f1e877b79ed8..cfc2f119779a 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -365,7 +365,7 @@ extern void lockdep_trace_alloc(gfp_t mask); #define lockdep_recursing(tsk) ((tsk)->lockdep_recursion) -#else /* !LOCKDEP */ +#else /* !CONFIG_LOCKDEP */ static inline void lockdep_off(void) { @@ -479,82 +479,36 @@ static inline void print_irqtrace_events(struct task_struct *curr) * on the per lock-class debug mode: */ -#ifdef CONFIG_DEBUG_LOCK_ALLOC -# ifdef CONFIG_PROVE_LOCKING -# define spin_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, NULL, i) -# define spin_acquire_nest(l, s, t, n, i) lock_acquire(l, s, t, 0, 2, n, i) -# else -# define spin_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, NULL, i) -# define spin_acquire_nest(l, s, t, n, i) lock_acquire(l, s, t, 0, 1, NULL, i) -# endif -# define spin_release(l, n, i) lock_release(l, n, i) +#ifdef CONFIG_PROVE_LOCKING + #define lock_acquire_exclusive(l, s, t, n, i) lock_acquire(l, s, t, 0, 2, n, i) + #define lock_acquire_shared(l, s, t, n, i) lock_acquire(l, s, t, 1, 2, n, i) + #define lock_acquire_shared_recursive(l, s, t, n, i) lock_acquire(l, s, t, 2, 2, n, i) #else -# define spin_acquire(l, s, t, i) do { } while (0) -# define spin_release(l, n, i) do { } while (0) + #define lock_acquire_exclusive(l, s, t, n, i) lock_acquire(l, s, t, 0, 1, n, i) + #define lock_acquire_shared(l, s, t, n, i) lock_acquire(l, s, t, 1, 1, n, i) + #define lock_acquire_shared_recursive(l, s, t, n, i) lock_acquire(l, s, t, 2, 1, n, i) #endif -#ifdef CONFIG_DEBUG_LOCK_ALLOC -# ifdef CONFIG_PROVE_LOCKING -# define rwlock_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, NULL, i) -# define rwlock_acquire_read(l, s, t, i) lock_acquire(l, s, t, 2, 2, NULL, i) -# else -# define rwlock_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, NULL, i) -# define rwlock_acquire_read(l, s, t, i) lock_acquire(l, s, t, 2, 1, NULL, i) -# endif -# define rwlock_release(l, n, i) lock_release(l, n, i) -#else -# define rwlock_acquire(l, s, t, i) do { } while (0) -# define rwlock_acquire_read(l, s, t, i) do { } while (0) -# define rwlock_release(l, n, i) do { } while (0) -#endif +#define spin_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) +#define spin_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i) +#define spin_release(l, n, i) lock_release(l, n, i) -#ifdef CONFIG_DEBUG_LOCK_ALLOC -# ifdef CONFIG_PROVE_LOCKING -# define mutex_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, NULL, i) -# define mutex_acquire_nest(l, s, t, n, i) lock_acquire(l, s, t, 0, 2, n, i) -# else -# define mutex_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, NULL, i) -# define mutex_acquire_nest(l, s, t, n, i) lock_acquire(l, s, t, 0, 1, n, i) -# endif -# define mutex_release(l, n, i) lock_release(l, n, i) -#else -# define mutex_acquire(l, s, t, i) do { } while (0) -# define mutex_acquire_nest(l, s, t, n, i) do { } while (0) -# define mutex_release(l, n, i) do { } while (0) -#endif +#define rwlock_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) +#define rwlock_acquire_read(l, s, t, i) lock_acquire_shared_recursive(l, s, t, NULL, i) +#define rwlock_release(l, n, i) lock_release(l, n, i) -#ifdef CONFIG_DEBUG_LOCK_ALLOC -# ifdef CONFIG_PROVE_LOCKING -# define rwsem_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, NULL, i) -# define rwsem_acquire_nest(l, s, t, n, i) lock_acquire(l, s, t, 0, 2, n, i) -# define rwsem_acquire_read(l, s, t, i) lock_acquire(l, s, t, 1, 2, NULL, i) -# else -# define rwsem_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, NULL, i) -# define rwsem_acquire_nest(l, s, t, n, i) lock_acquire(l, s, t, 0, 1, n, i) -# define rwsem_acquire_read(l, s, t, i) lock_acquire(l, s, t, 1, 1, NULL, i) -# endif +#define mutex_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) +#define mutex_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i) +#define mutex_release(l, n, i) lock_release(l, n, i) + +#define rwsem_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) +#define rwsem_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i) +#define rwsem_acquire_read(l, s, t, i) lock_acquire_shared(l, s, t, NULL, i) # define rwsem_release(l, n, i) lock_release(l, n, i) -#else -# define rwsem_acquire(l, s, t, i) do { } while (0) -# define rwsem_acquire_nest(l, s, t, n, i) do { } while (0) -# define rwsem_acquire_read(l, s, t, i) do { } while (0) -# define rwsem_release(l, n, i) do { } while (0) -#endif -#ifdef CONFIG_DEBUG_LOCK_ALLOC -# ifdef CONFIG_PROVE_LOCKING -# define lock_map_acquire(l) lock_acquire(l, 0, 0, 0, 2, NULL, _THIS_IP_) -# define lock_map_acquire_read(l) lock_acquire(l, 0, 0, 2, 2, NULL, _THIS_IP_) -# else -# define lock_map_acquire(l) lock_acquire(l, 0, 0, 0, 1, NULL, _THIS_IP_) -# define lock_map_acquire_read(l) lock_acquire(l, 0, 0, 2, 1, NULL, _THIS_IP_) -# endif +#define lock_map_acquire(l) lock_acquire_exclusive(l, 0, 0, NULL, _THIS_IP_) +#define lock_map_acquire_read(l) lock_acquire_shared_recursive(l, 0, 0, NULL, _THIS_IP_) # define lock_map_release(l) lock_release(l, 1, _THIS_IP_) -#else -# define lock_map_acquire(l) do { } while (0) -# define lock_map_acquire_read(l) do { } while (0) -# define lock_map_release(l) do { } while (0) -#endif #ifdef CONFIG_PROVE_LOCKING # define might_lock(lock) \ diff --git a/include/linux/lz4.h b/include/linux/lz4.h new file mode 100644 index 000000000000..d21c13f10a64 --- /dev/null +++ b/include/linux/lz4.h @@ -0,0 +1,87 @@ +#ifndef __LZ4_H__ +#define __LZ4_H__ +/* + * LZ4 Kernel Interface + * + * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#define LZ4_MEM_COMPRESS (4096 * sizeof(unsigned char *)) +#define LZ4HC_MEM_COMPRESS (65538 * sizeof(unsigned char *)) + +/* + * lz4_compressbound() + * Provides the maximum size that LZ4 may output in a "worst case" scenario + * (input data not compressible) + */ +static inline size_t lz4_compressbound(size_t isize) +{ + return isize + (isize / 255) + 16; +} + +/* + * lz4_compress() + * src : source address of the original data + * src_len : size of the original data + * dst : output buffer address of the compressed data + * This requires 'dst' of size LZ4_COMPRESSBOUND. + * dst_len : is the output size, which is returned after compress done + * workmem : address of the working memory. + * This requires 'workmem' of size LZ4_MEM_COMPRESS. + * return : Success if return 0 + * Error if return (< 0) + * note : Destination buffer and workmem must be already allocated with + * the defined size. + */ +int lz4_compress(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem); + + /* + * lz4hc_compress() + * src : source address of the original data + * src_len : size of the original data + * dst : output buffer address of the compressed data + * This requires 'dst' of size LZ4_COMPRESSBOUND. + * dst_len : is the output size, which is returned after compress done + * workmem : address of the working memory. + * This requires 'workmem' of size LZ4HC_MEM_COMPRESS. + * return : Success if return 0 + * Error if return (< 0) + * note : Destination buffer and workmem must be already allocated with + * the defined size. + */ +int lz4hc_compress(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem); + +/* + * lz4_decompress() + * src : source address of the compressed data + * src_len : is the input size, whcih is returned after decompress done + * dest : output buffer address of the decompressed data + * actual_dest_len: is the size of uncompressed data, supposing it's known + * return : Success if return 0 + * Error if return (< 0) + * note : Destination buffer must be already allocated. + * slightly faster than lz4_decompress_unknownoutputsize() + */ +int lz4_decompress(const char *src, size_t *src_len, char *dest, + size_t actual_dest_len); + +/* + * lz4_decompress_unknownoutputsize() + * src : source address of the compressed data + * src_len : is the input size, therefore the compressed size + * dest : output buffer address of the decompressed data + * dest_len: is the max size of the destination buffer, which is + * returned with actual size of decompressed data after + * decompress done + * return : Success if return 0 + * Error if return (< 0) + * note : Destination buffer must be already allocated. + */ +int lz4_decompress_unknownoutputsize(const char *src, size_t src_len, + char *dest, size_t *dest_len); +#endif diff --git a/include/linux/memory.h b/include/linux/memory.h index 45e93b468878..73817af8b480 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -18,6 +18,7 @@ #include <linux/node.h> #include <linux/compiler.h> #include <linux/mutex.h> +#include <linux/notifier.h> #define MIN_MEMORY_BLOCK_SIZE (1UL << SECTION_SIZE_BITS) @@ -114,9 +115,10 @@ extern void unregister_memory_notifier(struct notifier_block *nb); extern int register_memory_isolate_notifier(struct notifier_block *nb); extern void unregister_memory_isolate_notifier(struct notifier_block *nb); extern int register_new_memory(int, struct mem_section *); +#ifdef CONFIG_MEMORY_HOTREMOVE extern int unregister_memory_section(struct mem_section *); +#endif extern int memory_dev_init(void); -extern int remove_memory_block(unsigned long, struct mem_section *, int); extern int memory_notify(unsigned long val, void *v); extern int memory_isolate_notify(unsigned long val, void *v); extern struct memory_block *find_memory_block_hinted(struct mem_section *, @@ -127,13 +129,18 @@ enum mem_add_context { BOOT, HOTPLUG }; #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ #ifdef CONFIG_MEMORY_HOTPLUG -#define hotplug_memory_notifier(fn, pri) { \ +#define hotplug_memory_notifier(fn, pri) ({ \ static __meminitdata struct notifier_block fn##_mem_nb =\ - { .notifier_call = fn, .priority = pri }; \ + { .notifier_call = fn, .priority = pri };\ register_memory_notifier(&fn##_mem_nb); \ -} +}) +#define register_hotmemory_notifier(nb) register_memory_notifier(nb) +#define unregister_hotmemory_notifier(nb) unregister_memory_notifier(nb) #else -#define hotplug_memory_notifier(fn, pri) do { } while (0) +#define hotplug_memory_notifier(fn, pri) (0) +/* These aren't inline functions due to a GCC bug. */ +#define register_hotmemory_notifier(nb) ({ (void)(nb); 0; }) +#define unregister_hotmemory_notifier(nb) ({ (void)(nb); }) #endif /* diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index b6a3be7d47bf..3e622c610925 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -97,13 +97,13 @@ extern void __online_page_free(struct page *page); #ifdef CONFIG_MEMORY_HOTREMOVE extern bool is_pageblock_removable_nolock(struct page *page); extern int arch_remove_memory(u64 start, u64 size); +extern int __remove_pages(struct zone *zone, unsigned long start_pfn, + unsigned long nr_pages); #endif /* CONFIG_MEMORY_HOTREMOVE */ /* reasonably generic interface to expand the physical pages in a zone */ extern int __add_pages(int nid, struct zone *zone, unsigned long start_pfn, unsigned long nr_pages); -extern int __remove_pages(struct zone *zone, unsigned long start_pfn, - unsigned long nr_pages); #ifdef CONFIG_NUMA extern int memory_add_physaddr_to_nid(u64 start); diff --git a/include/linux/mm.h b/include/linux/mm.h index 2972d9f45e20..1a7f19e7f1a0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -44,6 +44,9 @@ extern int sysctl_legacy_va_layout; #include <asm/pgtable.h> #include <asm/processor.h> +extern unsigned long sysctl_user_reserve_kbytes; +extern unsigned long sysctl_admin_reserve_kbytes; + #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) /* to align the pointer to the (next) page boundary */ @@ -899,7 +902,8 @@ extern void pagefault_out_of_memory(void); * Flags passed to show_mem() and show_free_areas() to suppress output in * various contexts. */ -#define SHOW_MEM_FILTER_NODES (0x0001u) /* filter disallowed nodes */ +#define SHOW_MEM_FILTER_NODES (0x0001u) /* disallowed nodes */ +#define SHOW_MEM_FILTER_PAGE_COUNT (0x0002u) /* page type count */ extern void show_free_areas(unsigned int flags); extern bool skip_free_areas_node(unsigned int flags, int nid); @@ -1291,6 +1295,61 @@ extern void free_area_init_node(int nid, unsigned long * zones_size, unsigned long zone_start_pfn, unsigned long *zholes_size); extern void free_initmem(void); +/* + * Free reserved pages within range [PAGE_ALIGN(start), end & PAGE_MASK) + * into the buddy system. The freed pages will be poisoned with pattern + * "poison" if it's non-zero. + * Return pages freed into the buddy system. + */ +extern unsigned long free_reserved_area(unsigned long start, unsigned long end, + int poison, char *s); +#ifdef CONFIG_HIGHMEM +/* + * Free a highmem page into the buddy system, adjusting totalhigh_pages + * and totalram_pages. + */ +extern void free_highmem_page(struct page *page); +#endif + +static inline void adjust_managed_page_count(struct page *page, long count) +{ + totalram_pages += count; +} + +/* Free the reserved page into the buddy system, so it gets managed. */ +static inline void __free_reserved_page(struct page *page) +{ + ClearPageReserved(page); + init_page_count(page); + __free_page(page); +} + +static inline void free_reserved_page(struct page *page) +{ + __free_reserved_page(page); + adjust_managed_page_count(page, 1); +} + +static inline void mark_page_reserved(struct page *page) +{ + SetPageReserved(page); + adjust_managed_page_count(page, -1); +} + +/* + * Default method to free all the __init memory into the buddy system. + * The freed pages will be poisoned with pattern "poison" if it is + * non-zero. Return pages freed into the buddy system. + */ +static inline unsigned long free_initmem_default(int poison) +{ + extern char __init_begin[], __init_end[]; + + return free_reserved_area(PAGE_ALIGN((unsigned long)&__init_begin) , + ((unsigned long)&__init_end) & PAGE_MASK, + poison, "unused kernel"); +} + #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP /* * With CONFIG_HAVE_MEMBLOCK_NODE_MAP set, an architecture may initialise its @@ -1672,8 +1731,12 @@ int in_gate_area_no_mm(unsigned long addr); #define in_gate_area(mm, addr) ({(void)mm; in_gate_area_no_mm(addr);}) #endif /* __HAVE_ARCH_GATE_AREA */ +#ifdef CONFIG_SYSCTL +extern int sysctl_drop_caches; int drop_caches_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); +#endif + unsigned long shrink_slab(struct shrink_control *shrink, unsigned long nr_pages_scanned, unsigned long lru_pages); @@ -1701,12 +1764,12 @@ pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node); void *vmemmap_alloc_block(unsigned long size, int node); void *vmemmap_alloc_block_buf(unsigned long size, int node); void vmemmap_verify(pte_t *, int, unsigned long, unsigned long); -int vmemmap_populate_basepages(struct page *start_page, - unsigned long pages, int node); -int vmemmap_populate(struct page *start_page, unsigned long pages, int node); +int vmemmap_populate_basepages(unsigned long start, unsigned long end, + int node); +int vmemmap_populate(unsigned long start, unsigned long end, int node); void vmemmap_populate_print_last(void); #ifdef CONFIG_MEMORY_HOTPLUG -void vmemmap_free(struct page *memmap, unsigned long nr_pages); +void vmemmap_free(unsigned long start, unsigned long end); #endif void register_page_bootmem_memmap(unsigned long section_nr, struct page *map, unsigned long size); @@ -1753,5 +1816,11 @@ static inline unsigned int debug_guardpage_minorder(void) { return 0; } static inline bool page_is_guard(struct page *page) { return false; } #endif /* CONFIG_DEBUG_PAGEALLOC */ +#if MAX_NUMNODES > 1 +void __init setup_nr_node_ids(void); +#else +static inline void setup_nr_node_ids(void) {} +#endif + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index ace9a5f01c64..fb425aa16c01 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -330,12 +330,9 @@ struct mm_struct { unsigned long (*get_unmapped_area) (struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); - void (*unmap_area) (struct mm_struct *mm, unsigned long addr); #endif unsigned long mmap_base; /* base of mmap area */ unsigned long task_size; /* size of task vm space */ - unsigned long cached_hole_size; /* if non-zero, the largest hole below free_area_cache */ - unsigned long free_area_cache; /* first hole of size cached_hole_size or larger */ unsigned long highest_vm_end; /* highest vma end address */ pgd_t * pgd; atomic_t mm_users; /* How many users with user space? */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 5c76737d836b..8c9f859ab558 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -815,7 +815,10 @@ unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); /* * zone_idx() returns 0 for the ZONE_DMA zone, 1 for the ZONE_NORMAL zone, etc. */ -#define zone_idx(zone) ((zone) - (zone)->zone_pgdat->node_zones) +static inline enum zone_type zone_idx(struct zone *zone) +{ + return zone - zone->zone_pgdat->node_zones; +} static inline int populated_zone(struct zone *zone) { @@ -856,25 +859,18 @@ static inline int is_normal_idx(enum zone_type idx) */ static inline int is_highmem(struct zone *zone) { -#ifdef CONFIG_HIGHMEM - int zone_off = (char *)zone - (char *)zone->zone_pgdat->node_zones; - return zone_off == ZONE_HIGHMEM * sizeof(*zone) || - (zone_off == ZONE_MOVABLE * sizeof(*zone) && - zone_movable_is_highmem()); -#else - return 0; -#endif + return is_highmem_idx(zone_idx(zone)); } static inline int is_normal(struct zone *zone) { - return zone == zone->zone_pgdat->node_zones + ZONE_NORMAL; + return zone_idx(zone) == ZONE_NORMAL; } static inline int is_dma32(struct zone *zone) { #ifdef CONFIG_ZONE_DMA32 - return zone == zone->zone_pgdat->node_zones + ZONE_DMA32; + return zone_idx(zone) == ZONE_DMA32; #else return 0; #endif @@ -883,7 +879,7 @@ static inline int is_dma32(struct zone *zone) static inline int is_dma(struct zone *zone) { #ifdef CONFIG_ZONE_DMA - return zone == zone->zone_pgdat->node_zones + ZONE_DMA; + return zone_idx(zone) == ZONE_DMA; #else return 0; #endif diff --git a/include/linux/net.h b/include/linux/net.h index aa1673160a45..99c9f0c103c2 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -240,8 +240,8 @@ do { \ #define net_dbg_ratelimited(fmt, ...) \ net_ratelimited_function(pr_debug, fmt, ##__VA_ARGS__) -#define net_random() random32() -#define net_srandom(seed) srandom32((__force u32)seed) +#define net_random() prandom_u32() +#define net_srandom(seed) prandom_seed((__force u32)(seed)) extern int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t len); diff --git a/include/linux/notifier.h b/include/linux/notifier.h index d65746efc954..d14a4c362465 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -47,8 +47,11 @@ * runtime initialization. */ +typedef int (*notifier_fn_t)(struct notifier_block *nb, + unsigned long action, void *data); + struct notifier_block { - int (*notifier_call)(struct notifier_block *, unsigned long, void *); + notifier_fn_t notifier_call; struct notifier_block __rcu *next; int priority; }; diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 0e38e13eb249..e3dea75a078b 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -149,7 +149,7 @@ static inline int page_cache_get_speculative(struct page *page) { VM_BUG_ON(in_interrupt()); -#if !defined(CONFIG_SMP) && defined(CONFIG_TREE_RCU) +#ifdef CONFIG_TINY_RCU # ifdef CONFIG_PREEMPT_COUNT VM_BUG_ON(!in_atomic()); # endif diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h new file mode 100644 index 000000000000..d0cf8872dc43 --- /dev/null +++ b/include/linux/percpu-refcount.h @@ -0,0 +1,114 @@ +/* + * Dynamic percpu refcounts: + * (C) 2012 Google, Inc. + * Author: Kent Overstreet <koverstreet@google.com> + * + * This implements a refcount with similar semantics to atomic_t - atomic_inc(), + * atomic_dec_and_test() - but potentially percpu. + * + * There's one important difference between percpu refs and normal atomic_t + * refcounts; you have to keep track of your initial refcount, and then when you + * start shutting down you call percpu_ref_kill() _before_ dropping the initial + * refcount. + * + * Before you call percpu_ref_kill(), percpu_ref_put() does not check for the + * refcount hitting 0 - it can't, if it was in percpu mode. percpu_ref_kill() + * puts the ref back in single atomic_t mode, collecting the per cpu refs and + * issuing the appropriate barriers, and then marks the ref as shutting down so + * that percpu_ref_put() will check for the ref hitting 0. After it returns, + * it's safe to drop the initial ref. + * + * BACKGROUND: + * + * Percpu refcounts are quite useful for performance, but if we blindly + * converted all refcounts to percpu counters we'd waste quite a bit of memory. + * + * Think about all the refcounts embedded in kobjects, files, etc. most of which + * aren't used much. These start out as simple atomic counters - a little bigger + * than a bare atomic_t, 16 bytes instead of 4 - but if we exceed some arbitrary + * number of gets in one second, we then switch to percpu counters. + * + * This heuristic isn't perfect because it'll fire if the refcount was only + * being used on one cpu; ideally we'd be able to count the number of cache + * misses on percpu_ref_get() or something similar, but that'd make the non + * percpu path significantly heavier/more complex. We can count the number of + * gets() without any extra atomic instructions on arches that support + * atomic64_t - simply by changing the atomic_inc() to atomic_add_return(). + * + * USAGE: + * + * See fs/aio.c for some example usage; it's used there for struct kioctx, which + * is created when userspaces calls io_setup(), and destroyed when userspace + * calls io_destroy() or the process exits. + * + * In the aio code, kill_ioctx() is called when we wish to destroy a kioctx; it + * calls percpu_ref_kill(), then hlist_del_rcu() and sychronize_rcu() to remove + * the kioctx from the proccess's list of kioctxs - after that, there can't be + * any new users of the kioctx (from lookup_ioctx()) and it's then safe to drop + * the initial ref with percpu_ref_put(). + * + * Code that does a two stage shutdown like this often needs some kind of + * explicit synchronization to ensure the initial refcount can only be dropped + * once - percpu_ref_kill() does this for you, it returns true once and false if + * someone else already called it. The aio code uses it this way, but it's not + * necessary if the code has some other mechanism to synchronize teardown. + * + * As mentioned previously, we decide when to convert a ref to percpu counters + * in percpu_ref_get(). However, since percpu_ref_get() will often be called + * with rcu_read_lock() held, it's not done there - percpu_ref_get() returns + * true if the ref should be converted to percpu counters. + * + * The caller should then call percpu_ref_alloc() after dropping + * rcu_read_lock(); if there is an uncommonly used codepath where it's + * inconvenient to call percpu_ref_alloc() after get(), it may be safely skipped + * and percpu_ref_get() will return true again the next time the counter wraps + * around. + */ + +#ifndef _LINUX_PERCPU_REFCOUNT_H +#define _LINUX_PERCPU_REFCOUNT_H + +#include <linux/atomic.h> +#include <linux/percpu.h> + +struct percpu_ref { + atomic64_t count; + unsigned long pcpu_count; +}; + +void percpu_ref_init(struct percpu_ref *ref); +void __percpu_ref_get(struct percpu_ref *ref, bool alloc); +int percpu_ref_put(struct percpu_ref *ref); + +int percpu_ref_kill(struct percpu_ref *ref); +int percpu_ref_dead(struct percpu_ref *ref); + +/** + * percpu_ref_get - increment a dynamic percpu refcount + * + * Increments @ref and possibly converts it to percpu counters. Must be called + * with rcu_read_lock() held, and may potentially drop/reacquire rcu_read_lock() + * to allocate percpu counters - if sleeping/allocation isn't safe for some + * other reason (e.g. a spinlock), see percpu_ref_get_noalloc(). + * + * Analagous to atomic_inc(). + */ +static inline void percpu_ref_get(struct percpu_ref *ref) +{ + __percpu_ref_get(ref, true); +} + +/** + * percpu_ref_get_noalloc - increment a dynamic percpu refcount + * + * Increments @ref, to be used when it's not safe to allocate percpu counters. + * Must be called with rcu_read_lock() held. + * + * Analagous to atomic_inc(). + */ +static inline void percpu_ref_get_noalloc(struct percpu_ref *ref) +{ + __percpu_ref_get(ref, false); +} + +#endif diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 5524f8cfa950..e2772666f004 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -4,6 +4,7 @@ #include <linux/sched.h> #include <linux/bug.h> #include <linux/mm.h> +#include <linux/workqueue.h> #include <linux/threads.h> #include <linux/nsproxy.h> #include <linux/kref.h> @@ -13,7 +14,9 @@ struct pidmap { void *page; }; -#define PIDMAP_ENTRIES ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8) +#define BITS_PER_PAGE (PAGE_SIZE * 8) +#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) +#define PIDMAP_ENTRIES ((PID_MAX_LIMIT+BITS_PER_PAGE-1)/BITS_PER_PAGE) struct bsd_acct_struct; diff --git a/include/linux/platform_data/coda.h b/include/linux/platform_data/coda.h new file mode 100644 index 000000000000..6ad4410d9e20 --- /dev/null +++ b/include/linux/platform_data/coda.h @@ -0,0 +1,18 @@ +/* + * Copyright (C) 2013 Philipp Zabel, Pengutronix + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ +#ifndef PLATFORM_CODA_H +#define PLATFORM_CODA_H + +struct device; + +struct coda_platform_data { + struct device *iram_dev; +}; + +#endif diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 042058fdb0af..92a51df74d12 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -7,14 +7,20 @@ #include <linux/timex.h> #include <linux/alarmtimer.h> -union cpu_time_count { - cputime_t cpu; - unsigned long long sched; -}; + +static inline unsigned long long cputime_to_expires(cputime_t expires) +{ + return (__force unsigned long long)expires; +} + +static inline cputime_t expires_to_cputime(unsigned long long expires) +{ + return (__force cputime_t)expires; +} struct cpu_timer_list { struct list_head entry; - union cpu_time_count expires, incr; + unsigned long long expires, incr; struct task_struct *task; int firing; }; diff --git a/include/linux/printk.h b/include/linux/printk.h index 822171fcb1c8..6af944ab38f0 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -1,6 +1,7 @@ #ifndef __KERNEL_PRINTK__ #define __KERNEL_PRINTK__ +#include <stdarg.h> #include <linux/init.h> #include <linux/kern_levels.h> @@ -95,8 +96,14 @@ int no_printk(const char *fmt, ...) return 0; } +#ifdef CONFIG_EARLY_PRINTK extern asmlinkage __printf(1, 2) void early_printk(const char *fmt, ...); +void early_vprintk(const char *fmt, va_list ap); +#else +static inline __printf(1, 2) __cold +void early_printk(const char *s, ...) { } +#endif #ifdef CONFIG_PRINTK asmlinkage __printf(5, 0) @@ -138,6 +145,9 @@ extern void wake_up_klogd(void); void log_buf_kexec_setup(void); void __init setup_log_buf(int early); +void dump_stack_set_arch_desc(const char *fmt, ...); +void dump_stack_print_info(const char *log_lvl); +void show_regs_print_info(const char *log_lvl); #else static inline __printf(1, 0) int vprintk(const char *s, va_list args) @@ -175,6 +185,18 @@ static inline void log_buf_kexec_setup(void) static inline void setup_log_buf(int early) { } + +static inline void dump_stack_set_arch_desc(const char *fmt, ...) +{ +} + +static inline void dump_stack_print_info(const char *log_lvl) +{ +} + +static inline void show_regs_print_info(const char *log_lvl) +{ +} #endif extern void dump_stack(void) __cold; diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h index 5bf5500db83d..69e37c2d1ea5 100644 --- a/include/linux/ramfs.h +++ b/include/linux/ramfs.h @@ -6,7 +6,13 @@ struct inode *ramfs_get_inode(struct super_block *sb, const struct inode *dir, extern struct dentry *ramfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data); -#ifndef CONFIG_MMU +#ifdef CONFIG_MMU +static inline int +ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) +{ + return 0; +} +#else extern int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize); extern unsigned long ramfs_nommu_get_unmapped_area(struct file *file, unsigned long addr, diff --git a/include/linux/random.h b/include/linux/random.h index 347ce553a306..3b9377d6b7a5 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -29,13 +29,6 @@ u32 prandom_u32(void); void prandom_bytes(void *buf, int nbytes); void prandom_seed(u32 seed); -/* - * These macros are preserved for backward compatibility and should be - * removed as soon as a transition is finished. - */ -#define random32() prandom_u32() -#define srandom32(seed) prandom_seed(seed) - u32 prandom_u32_state(struct rnd_state *); void prandom_bytes_state(struct rnd_state *state, void *buf, int nbytes); diff --git a/include/linux/relay.h b/include/linux/relay.h index 91cacc34c159..d7c8359693c6 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -20,9 +20,6 @@ #include <linux/poll.h> #include <linux/kref.h> -/* Needs a _much_ better name... */ -#define FIX_SIZE(x) ((((x) - 1) & PAGE_MASK) + PAGE_SIZE) - /* * Tracks changes to rchan/rchan_buf structs */ diff --git a/include/linux/rtc-pxa.h b/include/linux/rtc-pxa.h new file mode 100644 index 000000000000..71bc45f060fc --- /dev/null +++ b/include/linux/rtc-pxa.h @@ -0,0 +1,18 @@ +/* + * include/linux/rtc-pxa.h + * + * RTC PXA Header file + * + * Copyright (C) 2010 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __LINUX_RTC_PXA_H +#define __LINUX_RTC_PXA_H + +extern int pxa_rtc_sync_time(unsigned int ticks); + +#endif /* __LINUX_RTC_PXA_H */ diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 580b24c8b8ca..c2c28975293c 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -133,7 +133,13 @@ extern struct rtc_device *rtc_device_register(const char *name, struct device *dev, const struct rtc_class_ops *ops, struct module *owner); +extern struct rtc_device *devm_rtc_device_register(struct device *dev, + const char *name, + const struct rtc_class_ops *ops, + struct module *owner); extern void rtc_device_unregister(struct rtc_device *rtc); +extern void devm_rtc_device_unregister(struct device *dev, + struct rtc_device *rtc); extern int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm); extern int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm); diff --git a/include/linux/sched.h b/include/linux/sched.h index 612ae456b9ca..4b09d01e9dfb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -314,8 +314,6 @@ extern int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner); struct nsproxy; struct user_namespace; -#include <linux/aio.h> - #ifdef CONFIG_MMU extern void arch_pick_mmap_layout(struct mm_struct *mm); extern unsigned long @@ -325,8 +323,6 @@ extern unsigned long arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); -extern void arch_unmap_area(struct mm_struct *, unsigned long); -extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); #else static inline void arch_pick_mmap_layout(struct mm_struct *mm) {} #endif @@ -626,6 +622,7 @@ struct signal_struct { #define SIGNAL_STOP_STOPPED 0x00000001 /* job control stop in effect */ #define SIGNAL_STOP_CONTINUED 0x00000002 /* SIGCONT since WCONTINUED reap */ #define SIGNAL_GROUP_EXIT 0x00000004 /* group exit in progress */ +#define SIGNAL_GROUP_COREDUMP 0x00000008 /* coredump in progress */ /* * Pending notifications to parent. */ @@ -2250,27 +2247,18 @@ static inline void threadgroup_change_end(struct task_struct *tsk) * * Lock the threadgroup @tsk belongs to. No new task is allowed to enter * and member tasks aren't allowed to exit (as indicated by PF_EXITING) or - * perform exec. This is useful for cases where the threadgroup needs to - * stay stable across blockable operations. + * change ->group_leader/pid. This is useful for cases where the threadgroup + * needs to stay stable across blockable operations. * * fork and exit paths explicitly call threadgroup_change_{begin|end}() for * synchronization. While held, no new task will be added to threadgroup * and no existing live task will have its PF_EXITING set. * - * During exec, a task goes and puts its thread group through unusual - * changes. After de-threading, exclusive access is assumed to resources - * which are usually shared by tasks in the same group - e.g. sighand may - * be replaced with a new one. Also, the exec'ing task takes over group - * leader role including its pid. Exclude these changes while locked by - * grabbing cred_guard_mutex which is used to synchronize exec path. + * de_thread() does threadgroup_change_{begin|end}() when a non-leader + * sub-thread becomes a new leader. */ static inline void threadgroup_lock(struct task_struct *tsk) { - /* - * exec uses exit for de-threading nesting group_rwsem inside - * cred_guard_mutex. Grab cred_guard_mutex first. - */ - mutex_lock(&tsk->signal->cred_guard_mutex); down_write(&tsk->signal->group_rwsem); } @@ -2283,7 +2271,6 @@ static inline void threadgroup_lock(struct task_struct *tsk) static inline void threadgroup_unlock(struct task_struct *tsk) { up_write(&tsk->signal->group_rwsem); - mutex_unlock(&tsk->signal->cred_guard_mutex); } #else static inline void threadgroup_change_begin(struct task_struct *tsk) {} diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index a3eb2f65b656..3eeee9672a4a 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -13,4 +13,62 @@ enum string_size_units { int string_get_size(u64 size, enum string_size_units units, char *buf, int len); +#define UNESCAPE_SPACE 0x01 +#define UNESCAPE_OCTAL 0x02 +#define UNESCAPE_HEX 0x04 +#define UNESCAPE_SPECIAL 0x08 +#define UNESCAPE_ANY \ + (UNESCAPE_SPACE | UNESCAPE_OCTAL | UNESCAPE_HEX | UNESCAPE_SPECIAL) + +/** + * string_unescape - unquote characters in the given string + * @src: source buffer (escaped) + * @dst: destination buffer (unescaped) + * @size: size of the destination buffer (0 to unlimit) + * @flags: combination of the flags (bitwise OR): + * %UNESCAPE_SPACE: + * '\f' - form feed + * '\n' - new line + * '\r' - carriage return + * '\t' - horizontal tab + * '\v' - vertical tab + * %UNESCAPE_OCTAL: + * '\NNN' - byte with octal value NNN (1 to 3 digits) + * %UNESCAPE_HEX: + * '\xHH' - byte with hexadecimal value HH (1 to 2 digits) + * %UNESCAPE_SPECIAL: + * '\"' - double quote + * '\\' - backslash + * '\a' - alert (BEL) + * '\e' - escape + * %UNESCAPE_ANY: + * all previous together + * + * Returns amount of characters processed to the destination buffer excluding + * trailing '\0'. + * + * Because the size of the output will be the same as or less than the size of + * the input, the transformation may be performed in place. + * + * Caller must provide valid source and destination pointers. Be aware that + * destination buffer will always be NULL-terminated. Source string must be + * NULL-terminated as well. + */ +int string_unescape(char *src, char *dst, size_t size, unsigned int flags); + +static inline int string_unescape_inplace(char *buf, unsigned int flags) +{ + return string_unescape(buf, buf, 0, flags); +} + +static inline int string_unescape_any(char *src, char *dst, size_t size) +{ + return string_unescape(src, dst, size, UNESCAPE_ANY); +} + +static inline int string_unescape_any_inplace(char *buf) +{ + return string_unescape_any(buf, buf, 0); +} + #endif diff --git a/include/linux/swap.h b/include/linux/swap.h index 2818a123f3ea..ca031f7abee4 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -236,7 +236,7 @@ extern unsigned long nr_free_pagecache_pages(void); extern void __lru_cache_add(struct page *, enum lru_list lru); extern void lru_cache_add_lru(struct page *, enum lru_list lru); extern void lru_add_page_tail(struct page *page, struct page *page_tail, - struct lruvec *lruvec); + struct lruvec *lruvec, struct list_head *head); extern void activate_page(struct page *); extern void mark_page_accessed(struct page *); extern void lru_add_drain(void); @@ -330,8 +330,14 @@ static inline void mem_cgroup_uncharge_swap(swp_entry_t ent) /* linux/mm/page_io.c */ extern int swap_readpage(struct page *); extern int swap_writepage(struct page *page, struct writeback_control *wbc); +extern void end_swap_bio_write(struct bio *bio, int err, + struct batch_complete *batch); +extern int __swap_writepage(struct page *page, struct writeback_control *wbc, + void (*end_write_func)(struct bio *bio, int err, + struct batch_complete *batch)); extern int swap_set_page_dirty(struct page *page); -extern void end_swap_bio_read(struct bio *bio, int err); +extern void end_swap_bio_read(struct bio *bio, int err, + struct batch_complete *batch); int add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, unsigned long nr_pages, sector_t start_block); @@ -343,8 +349,9 @@ extern struct address_space swapper_spaces[]; #define swap_address_space(entry) (&swapper_spaces[swp_type(entry)]) extern unsigned long total_swapcache_pages(void); extern void show_swap_cache_info(void); -extern int add_to_swap(struct page *); +extern int add_to_swap(struct page *, struct list_head *list); extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t); +extern int __add_to_swap_cache(struct page *page, swp_entry_t entry); extern void __delete_from_swap_cache(struct page *); extern void delete_from_swap_cache(struct page *); extern void free_page_and_swap_cache(struct page *); @@ -461,7 +468,7 @@ static inline struct page *lookup_swap_cache(swp_entry_t swp) return NULL; } -static inline int add_to_swap(struct page *page) +static inline int add_to_swap(struct page *page, struct list_head *list) { return 0; } diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index bd6cf61142be..d4b7a184f08c 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -50,7 +50,12 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, COMPACTMIGRATE_SCANNED, COMPACTFREE_SCANNED, COMPACTISOLATED, COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS, -#endif +#ifdef CONFIG_BALLOON_COMPACTION + COMPACTBALLOONISOLATED, /* isolated from balloon pagelist */ + COMPACTBALLOONMIGRATED, /* balloon page sucessfully migrated */ + COMPACTBALLOONRETURNED, /* putback to pagelist, not-migrated */ +#endif /* CONFIG_BALLOON_COMPACTION */ +#endif /* CONFIG_COMPACTION */ #ifdef CONFIG_HUGETLB_PAGE HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL, #endif diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 6071e911c7f4..7d5773a99f20 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -3,7 +3,9 @@ #include <linux/spinlock.h> #include <linux/init.h> +#include <linux/list.h> #include <asm/page.h> /* pgprot_t */ +#include <linux/rbtree.h> struct vm_area_struct; /* vma defining user mapping in mm_types.h */ @@ -35,6 +37,17 @@ struct vm_struct { const void *caller; }; +struct vmap_area { + unsigned long va_start; + unsigned long va_end; + unsigned long flags; + struct rb_node rb_node; /* address sorted rbtree */ + struct list_head list; /* address sorted list */ + struct list_head purge_list; /* "lazy purge" list */ + struct vm_struct *vm; + struct rcu_head rcu_head; +}; + /* * Highlevel APIs for driver use */ @@ -130,8 +143,7 @@ extern long vwrite(char *buf, char *addr, unsigned long count); /* * Internals. Dont't use.. */ -extern rwlock_t vmlist_lock; -extern struct vm_struct *vmlist; +extern struct list_head vmap_area_list; extern __init void vm_area_add_early(struct vm_struct *vm); extern __init void vm_area_register_early(struct vm_struct *vm, size_t align); @@ -158,4 +170,22 @@ pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) # endif #endif +struct vmalloc_info { + unsigned long used; + unsigned long largest_chunk; +}; + +#ifdef CONFIG_MMU +#define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START) +extern void get_vmalloc_info(struct vmalloc_info *vmi); +#else + +#define VMALLOC_TOTAL 0UL +#define get_vmalloc_info(vmi) \ +do { \ + (vmi)->used = 0; \ + (vmi)->largest_chunk = 0; \ +} while (0) +#endif + #endif /* _LINUX_VMALLOC_H */ diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h new file mode 100644 index 000000000000..76be077340ea --- /dev/null +++ b/include/linux/vmpressure.h @@ -0,0 +1,47 @@ +#ifndef __LINUX_VMPRESSURE_H +#define __LINUX_VMPRESSURE_H + +#include <linux/mutex.h> +#include <linux/list.h> +#include <linux/workqueue.h> +#include <linux/gfp.h> +#include <linux/types.h> +#include <linux/cgroup.h> + +struct vmpressure { + unsigned long scanned; + unsigned long reclaimed; + /* The lock is used to keep the scanned/reclaimed above in sync. */ + struct mutex sr_lock; + + /* The list of vmpressure_event structs. */ + struct list_head events; + /* Have to grab the lock on events traversal or modifications. */ + struct mutex events_lock; + + struct work_struct work; +}; + +struct mem_cgroup; + +#ifdef CONFIG_MEMCG +extern void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, + unsigned long scanned, unsigned long reclaimed); +extern void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio); + +extern void vmpressure_init(struct vmpressure *vmpr); +extern struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg); +extern struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr); +extern struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css); +extern int vmpressure_register_event(struct cgroup *cg, struct cftype *cft, + struct eventfd_ctx *eventfd, + const char *args); +extern void vmpressure_unregister_event(struct cgroup *cg, struct cftype *cft, + struct eventfd_ctx *eventfd); +#else +static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, + unsigned long scanned, unsigned long reclaimed) {} +static inline void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, + int prio) {} +#endif /* CONFIG_MEMCG */ +#endif /* __LINUX_VMPRESSURE_H */ diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 5fd71a7d0dfd..c586679b6fef 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -48,13 +48,8 @@ static inline void count_vm_events(enum vm_event_item item, long delta) } extern void all_vm_events(unsigned long *); -#ifdef CONFIG_HOTPLUG + extern void vm_events_fold_cpu(int cpu); -#else -static inline void vm_events_fold_cpu(int cpu) -{ -} -#endif #else diff --git a/include/linux/wait.h b/include/linux/wait.h index 7cb64d4b499d..ac38be2692d8 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -330,6 +330,92 @@ do { \ __ret; \ }) +#define __wait_event_hrtimeout(wq, condition, timeout, state) \ +({ \ + int __ret = 0; \ + DEFINE_WAIT(__wait); \ + struct hrtimer_sleeper __t; \ + \ + hrtimer_init_on_stack(&__t.timer, CLOCK_MONOTONIC, \ + HRTIMER_MODE_REL); \ + hrtimer_init_sleeper(&__t, current); \ + if ((timeout).tv64 != KTIME_MAX) \ + hrtimer_start_range_ns(&__t.timer, timeout, \ + current->timer_slack_ns, \ + HRTIMER_MODE_REL); \ + \ + for (;;) { \ + prepare_to_wait(&wq, &__wait, state); \ + if (condition) \ + break; \ + if (state == TASK_INTERRUPTIBLE && \ + signal_pending(current)) { \ + __ret = -ERESTARTSYS; \ + break; \ + } \ + if (!__t.task) { \ + __ret = -ETIME; \ + break; \ + } \ + schedule(); \ + } \ + \ + hrtimer_cancel(&__t.timer); \ + destroy_hrtimer_on_stack(&__t.timer); \ + finish_wait(&wq, &__wait); \ + __ret; \ +}) + +/** + * wait_event_hrtimeout - sleep until a condition gets true or a timeout elapses + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @timeout: timeout, as a ktime_t + * + * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the + * @condition evaluates to true or a signal is received. + * The @condition is checked each time the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * The function returns 0 if @condition became true, or -ETIME if the timeout + * elapsed. + */ +#define wait_event_hrtimeout(wq, condition, timeout) \ +({ \ + int __ret = 0; \ + if (!(condition)) \ + __ret = __wait_event_hrtimeout(wq, condition, timeout, \ + TASK_UNINTERRUPTIBLE); \ + __ret; \ +}) + +/** + * wait_event_interruptible_hrtimeout - sleep until a condition gets true or a timeout elapses + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @timeout: timeout, as a ktime_t + * + * The process is put to sleep (TASK_INTERRUPTIBLE) until the + * @condition evaluates to true or a signal is received. + * The @condition is checked each time the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * The function returns 0 if @condition became true, -ERESTARTSYS if it was + * interrupted by a signal, or -ETIME if the timeout elapsed. + */ +#define wait_event_interruptible_hrtimeout(wq, condition, timeout) \ +({ \ + long __ret = 0; \ + if (!(condition)) \ + __ret = __wait_event_hrtimeout(wq, condition, timeout, \ + TASK_INTERRUPTIBLE); \ + __ret; \ +}) + #define __wait_event_interruptible_exclusive(wq, condition, ret) \ do { \ DEFINE_WAIT(__wait); \ diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 717975639378..623488fdc1f5 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -92,6 +92,9 @@ enum { /* bit mask for work_busy() return values */ WORK_BUSY_PENDING = 1 << 0, WORK_BUSY_RUNNING = 1 << 1, + + /* maximum string length for set_worker_desc() */ + WORKER_DESC_LEN = 24, }; struct work_struct { @@ -447,6 +450,8 @@ extern void workqueue_set_max_active(struct workqueue_struct *wq, extern bool current_is_workqueue_rescuer(void); extern bool workqueue_congested(int cpu, struct workqueue_struct *wq); extern unsigned int work_busy(struct work_struct *work); +extern __printf(1, 2) void set_worker_desc(const char *fmt, ...); +extern void print_worker_info(const char *log_lvl, struct task_struct *task); /** * queue_work - queue work on a workqueue diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 9a9367c0c076..579a5007c696 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -5,6 +5,7 @@ #define WRITEBACK_H #include <linux/sched.h> +#include <linux/workqueue.h> #include <linux/fs.h> DECLARE_PER_CPU(int, dirty_throttle_leaks); diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index f9f5b057b480..47323155fc17 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -905,7 +905,7 @@ struct ip_vs_app { struct ipvs_master_sync_state { struct list_head sync_queue; struct ip_vs_sync_buff *sync_buff; - int sync_queue_len; + unsigned long sync_queue_len; unsigned int sync_queue_delay; struct task_struct *master_thread; struct delayed_work master_wakeup_work; @@ -998,7 +998,7 @@ struct netns_ipvs { int sysctl_snat_reroute; int sysctl_sync_ver; int sysctl_sync_ports; - int sysctl_sync_qlen_max; + unsigned long sysctl_sync_qlen_max; int sysctl_sync_sock_size; int sysctl_cache_bypass; int sysctl_expire_nodest_conn; @@ -1085,7 +1085,7 @@ static inline int sysctl_sync_ports(struct netns_ipvs *ipvs) return ACCESS_ONCE(ipvs->sysctl_sync_ports); } -static inline int sysctl_sync_qlen_max(struct netns_ipvs *ipvs) +static inline unsigned long sysctl_sync_qlen_max(struct netns_ipvs *ipvs) { return ipvs->sysctl_sync_qlen_max; } @@ -1138,7 +1138,7 @@ static inline int sysctl_sync_ports(struct netns_ipvs *ipvs) return 1; } -static inline int sysctl_sync_qlen_max(struct netns_ipvs *ipvs) +static inline unsigned long sysctl_sync_qlen_max(struct netns_ipvs *ipvs) { return IPVS_SYNC_QLEN_MAX; } diff --git a/include/scsi/Kbuild b/include/scsi/Kbuild deleted file mode 100644 index 562ff9d591b8..000000000000 --- a/include/scsi/Kbuild +++ /dev/null @@ -1 +0,0 @@ -header-y += fc/ diff --git a/include/trace/events/filemap.h b/include/trace/events/filemap.h new file mode 100644 index 000000000000..0421f49a20f7 --- /dev/null +++ b/include/trace/events/filemap.h @@ -0,0 +1,58 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM filemap + +#if !defined(_TRACE_FILEMAP_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_FILEMAP_H + +#include <linux/types.h> +#include <linux/tracepoint.h> +#include <linux/mm.h> +#include <linux/memcontrol.h> +#include <linux/device.h> +#include <linux/kdev_t.h> + +DECLARE_EVENT_CLASS(mm_filemap_op_page_cache, + + TP_PROTO(struct page *page), + + TP_ARGS(page), + + TP_STRUCT__entry( + __field(struct page *, page) + __field(unsigned long, i_ino) + __field(unsigned long, index) + __field(dev_t, s_dev) + ), + + TP_fast_assign( + __entry->page = page; + __entry->i_ino = page->mapping->host->i_ino; + __entry->index = page->index; + if (page->mapping->host->i_sb) + __entry->s_dev = page->mapping->host->i_sb->s_dev; + else + __entry->s_dev = page->mapping->host->i_rdev; + ), + + TP_printk("dev %d:%d ino %lx page=%p pfn=%lu ofs=%lu", + MAJOR(__entry->s_dev), MINOR(__entry->s_dev), + __entry->i_ino, + __entry->page, + page_to_pfn(__entry->page), + __entry->index << PAGE_SHIFT) +); + +DEFINE_EVENT(mm_filemap_op_page_cache, mm_filemap_delete_from_page_cache, + TP_PROTO(struct page *page), + TP_ARGS(page) + ); + +DEFINE_EVENT(mm_filemap_op_page_cache, mm_filemap_add_to_page_cache, + TP_PROTO(struct page *page), + TP_ARGS(page) + ); + +#endif /* _TRACE_FILEMAP_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/events/printk.h b/include/trace/events/printk.h index 94ec79cc011a..c008bc99f9fa 100644 --- a/include/trace/events/printk.h +++ b/include/trace/events/printk.h @@ -6,31 +6,18 @@ #include <linux/tracepoint.h> -TRACE_EVENT_CONDITION(console, - TP_PROTO(const char *log_buf, unsigned start, unsigned end, - unsigned log_buf_len), +TRACE_EVENT(console, + TP_PROTO(const char *text, size_t len), - TP_ARGS(log_buf, start, end, log_buf_len), - - TP_CONDITION(start != end), + TP_ARGS(text, len), TP_STRUCT__entry( - __dynamic_array(char, msg, end - start + 1) + __dynamic_array(char, msg, len + 1) ), TP_fast_assign( - if ((start & (log_buf_len - 1)) > (end & (log_buf_len - 1))) { - memcpy(__get_dynamic_array(msg), - log_buf + (start & (log_buf_len - 1)), - log_buf_len - (start & (log_buf_len - 1))); - memcpy((char *)__get_dynamic_array(msg) + - log_buf_len - (start & (log_buf_len - 1)), - log_buf, end & (log_buf_len - 1)); - } else - memcpy(__get_dynamic_array(msg), - log_buf + (start & (log_buf_len - 1)), - end - start); - ((char *)__get_dynamic_array(msg))[end - start] = 0; + memcpy(__get_dynamic_array(msg), text, len); + ((char *)__get_dynamic_array(msg))[len] = 0; ), TP_printk("%s", __get_str(msg)) diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index c7fc1e6517c3..a4ed56cf0eac 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -88,7 +88,6 @@ struct inodes_stat_t { #define MS_STRICTATIME (1<<24) /* Always perform atime updates */ /* These sb flags are internal to the kernel */ -#define MS_SNAP_STABLE (1<<27) /* Snapshot pages during writeback, if needed */ #define MS_NOSEC (1<<28) #define MS_BORN (1<<29) #define MS_ACTIVE (1<<30) diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h index 022ab186a812..52ebcc89f306 100644 --- a/include/uapi/linux/ptrace.h +++ b/include/uapi/linux/ptrace.h @@ -5,6 +5,7 @@ /* has the defines to get at the registers. */ +#include <linux/types.h> #define PTRACE_TRACEME 0 #define PTRACE_PEEKTEXT 1 @@ -52,6 +53,17 @@ #define PTRACE_INTERRUPT 0x4207 #define PTRACE_LISTEN 0x4208 +#define PTRACE_PEEKSIGINFO 0x4209 + +struct ptrace_peeksiginfo_args { + __u64 off; /* from which siginfo to start */ + __u32 flags; + __s32 nr; /* how may siginfos to take */ +}; + +/* Read signals from a shared (process wide) queue */ +#define PTRACE_PEEKSIGINFO_SHARED (1 << 0) + /* Wait extended result codes for the above trace options. */ #define PTRACE_EVENT_FORK 1 #define PTRACE_EVENT_VFORK 2 diff --git a/include/video/platform_lcd.h b/include/video/platform_lcd.h index ad3bdfe743b2..23864b284147 100644 --- a/include/video/platform_lcd.h +++ b/include/video/platform_lcd.h @@ -15,6 +15,7 @@ struct plat_lcd_data; struct fb_info; struct plat_lcd_data { + int (*probe)(struct plat_lcd_data *); void (*set_power)(struct plat_lcd_data *, unsigned int power); int (*match_fb)(struct plat_lcd_data *, struct fb_info *); }; diff --git a/include/xen/tmem.h b/include/xen/tmem.h index 591550a22ac7..3930a90045ff 100644 --- a/include/xen/tmem.h +++ b/include/xen/tmem.h @@ -3,7 +3,15 @@ #include <linux/types.h> +#ifdef CONFIG_XEN_TMEM_MODULE +#define tmem_enabled true +#else /* defined in drivers/xen/tmem.c */ extern bool tmem_enabled; +#endif + +#ifdef CONFIG_XEN_SELFBALLOONING +extern int xen_selfballoon_init(bool, bool); +#endif #endif /* _XEN_TMEM_H */ diff --git a/init/Kconfig b/init/Kconfig index ec3cb7bd085d..346cd1b069af 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -98,10 +98,13 @@ config HAVE_KERNEL_XZ config HAVE_KERNEL_LZO bool +config HAVE_KERNEL_LZ4 + bool + choice prompt "Kernel compression mode" default KERNEL_GZIP - depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_XZ || HAVE_KERNEL_LZO + depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_XZ || HAVE_KERNEL_LZO || HAVE_KERNEL_LZ4 help The linux kernel is a kind of self-extracting executable. Several compression algorithms are available, which differ @@ -168,6 +171,18 @@ config KERNEL_LZO size is about 10% bigger than gzip; however its speed (both compression and decompression) is the fastest. +config KERNEL_LZ4 + bool "LZ4" + depends on HAVE_KERNEL_LZ4 + help + LZ4 is an LZ77-type compressor with a fixed, byte-oriented encoding. + A preliminary version of LZ4 de/compression tool is available at + <https://code.google.com/p/lz4/>. + + Its compression ratio is worse than LZO. The size of the kernel + is about 8% bigger than LZO. But the decompression speed is + faster than LZO. + endchoice config DEFAULT_HOSTNAME @@ -931,6 +946,23 @@ config MEMCG_KMEM the kmem extension can use it to guarantee that no group of processes will ever exhaust kernel resources alone. +config MEMCG_DEBUG_ASYNC_DESTROY + bool "Memory Resource Controller Debug asynchronous object destruction" + depends on MEMCG_KMEM || MEMCG_SWAP + default n + help + When a memcg is destroyed, the memory consumed by it may not be + immediately freed. This is because when some extensions are used, such + as swap or kernel memory, objects can outlive the group and hold a + reference to it. + + If this is the case, the dangling_memcgs file will show information + about what are the memcgs still alive, and which references are still + preventing it to be freed. There is nothing wrong with that, but it is + very useful when debugging, to know where this memory is being held. + This is a developer-oriented debugging facility only, and no + guarantees of interface stability will be given. + config CGROUP_HUGETLB bool "HugeTLB Resource Controller for Control Groups" depends on RESOURCE_COUNTERS && HUGETLB_PAGE @@ -1221,7 +1253,7 @@ config SYSCTL config ANON_INODES bool -menuconfig EXPERT +config EXPERT bool "Configure standard kernel features (expert users)" # Unhide debug options, to make the on-by-default options visible select DEBUG_KERNEL diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index a32ec1ce882b..3e0878e8a80d 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -50,6 +50,7 @@ static int init_linuxrc(struct subprocess_info *info, struct cred *new) static void __init handle_initrd(void) { + struct subprocess_info *info; static char *argv[] = { "linuxrc", NULL, }; extern char *envp_init[]; int error; @@ -70,8 +71,11 @@ static void __init handle_initrd(void) */ current->flags |= PF_FREEZER_SKIP; - call_usermodehelper_fns("/linuxrc", argv, envp_init, UMH_WAIT_PROC, - init_linuxrc, NULL, NULL); + info = call_usermodehelper_setup("/linuxrc", argv, envp_init, + GFP_KERNEL, init_linuxrc, NULL, NULL); + if (!info) + return; + call_usermodehelper_exec(info, UMH_WAIT_PROC); current->flags &= ~PF_FREEZER_SKIP; diff --git a/init/main.c b/init/main.c index d9c02e1168f3..b3bc3c95562c 100644 --- a/init/main.c +++ b/init/main.c @@ -174,8 +174,8 @@ static int __init obsolete_checksetup(char *line) if (line[n] == '\0' || line[n] == '=') had_early_param = 1; } else if (!p->setup_func) { - printk(KERN_WARNING "Parameter %s is obsolete," - " ignored\n", p->str); + pr_warn("Parameter %s is obsolete, ignored\n", + p->str); return 1; } else if (p->setup_func(line + n)) return 1; @@ -398,8 +398,7 @@ static int __init do_early_param(char *param, char *val, const char *unused) strcmp(p->str, "earlycon") == 0) ) { if (p->setup_func(val) != 0) - printk(KERN_WARNING - "Malformed early option '%s'\n", param); + pr_warn("Malformed early option '%s'\n", param); } } /* We accept everything at this stage. */ @@ -496,7 +495,7 @@ asmlinkage void __init start_kernel(void) */ boot_cpu_init(); page_address_init(); - printk(KERN_NOTICE "%s", linux_banner); + pr_notice("%s", linux_banner); setup_arch(&command_line); mm_init_owner(&init_mm, &init_task); mm_init_cpumask(&init_mm); @@ -508,7 +507,7 @@ asmlinkage void __init start_kernel(void) build_all_zonelists(NULL, NULL); page_alloc_init(); - printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line); + pr_notice("Kernel command line: %s\n", boot_command_line); parse_early_param(); parse_args("Booting kernel", static_command_line, __start___param, __stop___param - __start___param, @@ -538,11 +537,8 @@ asmlinkage void __init start_kernel(void) * fragile until we cpu_idle() for the first time. */ preempt_disable(); - if (!irqs_disabled()) { - printk(KERN_WARNING "start_kernel(): bug: interrupts were " - "enabled *very* early, fixing it\n"); + if (WARN(!irqs_disabled(), "Interrupts were enabled *very* early, fixing it\n")) local_irq_disable(); - } idr_init_cache(); perf_event_init(); rcu_init(); @@ -558,9 +554,7 @@ asmlinkage void __init start_kernel(void) time_init(); profile_init(); call_function_init(); - if (!irqs_disabled()) - printk(KERN_CRIT "start_kernel(): bug: interrupts were " - "enabled early\n"); + WARN(!irqs_disabled(), "Interrupts were enabled early\n"); early_boot_irqs_disabled = false; local_irq_enable(); @@ -587,8 +581,7 @@ asmlinkage void __init start_kernel(void) #ifdef CONFIG_BLK_DEV_INITRD if (initrd_start && !initrd_below_start_ok && page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) { - printk(KERN_CRIT "initrd overwritten (0x%08lx < 0x%08lx) - " - "disabling it.\n", + pr_crit("initrd overwritten (0x%08lx < 0x%08lx) - disabling it.\n", page_to_pfn(virt_to_page((void *)initrd_start)), min_low_pfn); initrd_start = 0; @@ -667,14 +660,14 @@ static int __init_or_module do_one_initcall_debug(initcall_t fn) unsigned long long duration; int ret; - printk(KERN_DEBUG "calling %pF @ %i\n", fn, task_pid_nr(current)); + pr_debug("calling %pF @ %i\n", fn, task_pid_nr(current)); calltime = ktime_get(); ret = fn(); rettime = ktime_get(); delta = ktime_sub(rettime, calltime); duration = (unsigned long long) ktime_to_ns(delta) >> 10; - printk(KERN_DEBUG "initcall %pF returned %d after %lld usecs\n", fn, - ret, duration); + pr_debug("initcall %pF returned %d after %lld usecs\n", + fn, ret, duration); return ret; } @@ -702,9 +695,7 @@ int __init_or_module do_one_initcall(initcall_t fn) strlcat(msgbuf, "disabled interrupts ", sizeof(msgbuf)); local_irq_enable(); } - if (msgbuf[0]) { - printk("initcall %pF returned with %s\n", fn, msgbuf); - } + WARN(msgbuf[0], "initcall %pF returned with %s\n", fn, msgbuf); return ret; } @@ -832,8 +823,7 @@ static int __ref kernel_init(void *unused) if (ramdisk_execute_command) { if (!run_init_process(ramdisk_execute_command)) return 0; - printk(KERN_WARNING "Failed to execute %s\n", - ramdisk_execute_command); + pr_err("Failed to execute %s\n", ramdisk_execute_command); } /* @@ -845,8 +835,8 @@ static int __ref kernel_init(void *unused) if (execute_command) { if (!run_init_process(execute_command)) return 0; - printk(KERN_WARNING "Failed to execute %s. Attempting " - "defaults...\n", execute_command); + pr_err("Failed to execute %s. Attempting defaults...\n", + execute_command); } if (!run_init_process("/sbin/init") || !run_init_process("/etc/init") || @@ -891,7 +881,7 @@ static noinline void __init kernel_init_freeable(void) /* Open the /dev/console on the rootfs, this should never fail */ if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0) - printk(KERN_WARNING "Warning: unable to open an initial console.\n"); + pr_err("Warning: unable to open an initial console.\n"); (void) sys_dup(0); (void) sys_dup(0); diff --git a/ipc/msg.c b/ipc/msg.c index fede1d06ef30..9d11955dc0f3 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -66,6 +66,7 @@ struct msg_sender { #define SEARCH_EQUAL 2 #define SEARCH_NOTEQUAL 3 #define SEARCH_LESSEQUAL 4 +#define SEARCH_NUMBER 5 #define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS]) @@ -237,14 +238,9 @@ static inline void ss_del(struct msg_sender *mss) static void ss_wakeup(struct list_head *h, int kill) { - struct list_head *tmp; + struct msg_sender *mss, *t; - tmp = h->next; - while (tmp != h) { - struct msg_sender *mss; - - mss = list_entry(tmp, struct msg_sender, list); - tmp = tmp->next; + list_for_each_entry_safe(mss, t, h, list) { if (kill) mss->list.next = NULL; wake_up_process(mss->tsk); @@ -253,14 +249,9 @@ static void ss_wakeup(struct list_head *h, int kill) static void expunge_all(struct msg_queue *msq, int res) { - struct list_head *tmp; - - tmp = msq->q_receivers.next; - while (tmp != &msq->q_receivers) { - struct msg_receiver *msr; + struct msg_receiver *msr, *t; - msr = list_entry(tmp, struct msg_receiver, r_list); - tmp = tmp->next; + list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) { msr->r_msg = NULL; wake_up_process(msr->r_tsk); smp_mb(); @@ -278,7 +269,7 @@ static void expunge_all(struct msg_queue *msq, int res) */ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) { - struct list_head *tmp; + struct msg_msg *msg, *t; struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); expunge_all(msq, -EIDRM); @@ -286,11 +277,7 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) msg_rmid(ns, msq); msg_unlock(msq); - tmp = msq->q_messages.next; - while (tmp != &msq->q_messages) { - struct msg_msg *msg = list_entry(tmp, struct msg_msg, m_list); - - tmp = tmp->next; + list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) { atomic_dec(&ns->msg_hdrs); free_msg(msg); } @@ -583,6 +570,7 @@ static int testmsg(struct msg_msg *msg, long type, int mode) switch(mode) { case SEARCH_ANY: + case SEARCH_NUMBER: return 1; case SEARCH_LESSEQUAL: if (msg->m_type <=type) @@ -602,14 +590,9 @@ static int testmsg(struct msg_msg *msg, long type, int mode) static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg) { - struct list_head *tmp; + struct msg_receiver *msr, *t; - tmp = msq->q_receivers.next; - while (tmp != &msq->q_receivers) { - struct msg_receiver *msr; - - msr = list_entry(tmp, struct msg_receiver, r_list); - tmp = tmp->next; + list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) { if (testmsg(msg, msr->r_msgtype, msr->r_mode) && !security_msg_queue_msgrcv(msq, msg, msr->r_tsk, msr->r_msgtype, msr->r_mode)) { @@ -738,6 +721,8 @@ SYSCALL_DEFINE4(msgsnd, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, static inline int convert_mode(long *msgtyp, int msgflg) { + if (msgflg & MSG_COPY) + return SEARCH_NUMBER; /* * find message of correct type. * msgtyp = 0 => get first. @@ -774,14 +759,10 @@ static long do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz) * This function creates new kernel message structure, large enough to store * bufsz message bytes. */ -static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz, - int msgflg, long *msgtyp, - unsigned long *copy_number) +static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz) { struct msg_msg *copy; - *copy_number = *msgtyp; - *msgtyp = 0; /* * Create dummy message to copy real message to. */ @@ -797,9 +778,7 @@ static inline void free_copy(struct msg_msg *copy) free_msg(copy); } #else -static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz, - int msgflg, long *msgtyp, - unsigned long *copy_number) +static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz) { return ERR_PTR(-ENOSYS); } @@ -809,6 +788,30 @@ static inline void free_copy(struct msg_msg *copy) } #endif +static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode) +{ + struct msg_msg *msg; + long count = 0; + + list_for_each_entry(msg, &msq->q_messages, m_list) { + if (testmsg(msg, *msgtyp, mode) && + !security_msg_queue_msgrcv(msq, msg, current, + *msgtyp, mode)) { + if (mode == SEARCH_LESSEQUAL && msg->m_type != 1) { + *msgtyp = msg->m_type - 1; + } else if (mode == SEARCH_NUMBER) { + if (*msgtyp == count) + return msg; + } else + return msg; + count++; + } + } + + return ERR_PTR(-EAGAIN); +} + + long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgflg, long (*msg_handler)(void __user *, struct msg_msg *, size_t)) @@ -818,19 +821,16 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int mode; struct ipc_namespace *ns; struct msg_msg *copy = NULL; - unsigned long copy_number = 0; - - ns = current->nsproxy->ipc_ns; if (msqid < 0 || (long) bufsz < 0) return -EINVAL; if (msgflg & MSG_COPY) { - copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax), - msgflg, &msgtyp, ©_number); + copy = prepare_copy(buf, bufsz); if (IS_ERR(copy)) return PTR_ERR(copy); } mode = convert_mode(&msgtyp, msgflg); + ns = current->nsproxy->ipc_ns; msq = msg_lock_check(ns, msqid); if (IS_ERR(msq)) { @@ -840,45 +840,13 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, for (;;) { struct msg_receiver msr_d; - struct list_head *tmp; - long msg_counter = 0; msg = ERR_PTR(-EACCES); if (ipcperms(ns, &msq->q_perm, S_IRUGO)) goto out_unlock; - msg = ERR_PTR(-EAGAIN); - tmp = msq->q_messages.next; - while (tmp != &msq->q_messages) { - struct msg_msg *walk_msg; - - walk_msg = list_entry(tmp, struct msg_msg, m_list); - if (testmsg(walk_msg, msgtyp, mode) && - !security_msg_queue_msgrcv(msq, walk_msg, current, - msgtyp, mode)) { - - msg = walk_msg; - if (mode == SEARCH_LESSEQUAL && - walk_msg->m_type != 1) { - msgtyp = walk_msg->m_type - 1; - } else if (msgflg & MSG_COPY) { - if (copy_number == msg_counter) { - /* - * Found requested message. - * Copy it. - */ - msg = copy_msg(msg, copy); - if (IS_ERR(msg)) - goto out_unlock; - break; - } - msg = ERR_PTR(-EAGAIN); - } else - break; - msg_counter++; - } - tmp = tmp->next; - } + msg = find_msg(msq, &msgtyp, mode); + if (!IS_ERR(msg)) { /* * Found a suitable message. @@ -892,8 +860,10 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, * If we are copying, then do not unlink message and do * not update queue parameters. */ - if (msgflg & MSG_COPY) + if (msgflg & MSG_COPY) { + msg = copy_msg(msg, copy); goto out_unlock; + } list_del(&msg->m_list); msq->q_qnum--; msq->q_rtime = get_seconds(); diff --git a/ipc/msgutil.c b/ipc/msgutil.c index 5df8e4bf1db0..d43439e6eb47 100644 --- a/ipc/msgutil.c +++ b/ipc/msgutil.c @@ -17,7 +17,7 @@ #include <linux/ipc_namespace.h> #include <linux/utsname.h> #include <linux/proc_fs.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include "util.h" @@ -37,59 +37,70 @@ struct ipc_namespace init_ipc_ns = { atomic_t nr_ipc_ns = ATOMIC_INIT(1); struct msg_msgseg { - struct msg_msgseg* next; + struct msg_msgseg *next; /* the next part of the message follows immediately */ }; -#define DATALEN_MSG (PAGE_SIZE-sizeof(struct msg_msg)) -#define DATALEN_SEG (PAGE_SIZE-sizeof(struct msg_msgseg)) +#define DATALEN_MSG (int)(PAGE_SIZE-sizeof(struct msg_msg)) +#define DATALEN_SEG (int)(PAGE_SIZE-sizeof(struct msg_msgseg)) -struct msg_msg *load_msg(const void __user *src, int len) + +static struct msg_msg *alloc_msg(int len) { struct msg_msg *msg; struct msg_msgseg **pseg; - int err; int alen; - alen = len; - if (alen > DATALEN_MSG) - alen = DATALEN_MSG; - + alen = min(len, DATALEN_MSG); msg = kmalloc(sizeof(*msg) + alen, GFP_KERNEL); if (msg == NULL) - return ERR_PTR(-ENOMEM); + return NULL; msg->next = NULL; msg->security = NULL; - if (copy_from_user(msg + 1, src, alen)) { - err = -EFAULT; - goto out_err; - } - len -= alen; - src = ((char __user *)src) + alen; pseg = &msg->next; while (len > 0) { struct msg_msgseg *seg; - alen = len; - if (alen > DATALEN_SEG) - alen = DATALEN_SEG; - seg = kmalloc(sizeof(*seg) + alen, - GFP_KERNEL); - if (seg == NULL) { - err = -ENOMEM; + alen = min(len, DATALEN_SEG); + seg = kmalloc(sizeof(*seg) + alen, GFP_KERNEL); + if (seg == NULL) goto out_err; - } *pseg = seg; seg->next = NULL; - if (copy_from_user(seg + 1, src, alen)) { - err = -EFAULT; - goto out_err; - } pseg = &seg->next; len -= alen; - src = ((char __user *)src) + alen; + } + + return msg; + +out_err: + free_msg(msg); + return NULL; +} + +struct msg_msg *load_msg(const void __user *src, int len) +{ + struct msg_msg *msg; + struct msg_msgseg *seg; + int err = -EFAULT; + int alen; + + msg = alloc_msg(len); + if (msg == NULL) + return ERR_PTR(-ENOMEM); + + alen = min(len, DATALEN_MSG); + if (copy_from_user(msg + 1, src, alen)) + goto out_err; + + for (seg = msg->next; seg != NULL; seg = seg->next) { + len -= alen; + src = (char __user *)src + alen; + alen = min(len, DATALEN_SEG); + if (copy_from_user(seg + 1, src, alen)) + goto out_err; } err = security_msg_msg_alloc(msg); @@ -113,23 +124,16 @@ struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst) if (src->m_ts > dst->m_ts) return ERR_PTR(-EINVAL); - alen = len; - if (alen > DATALEN_MSG) - alen = DATALEN_MSG; - + alen = min(len, DATALEN_MSG); memcpy(dst + 1, src + 1, alen); - len -= alen; - dst_pseg = dst->next; - src_pseg = src->next; - while (len > 0) { - alen = len; - if (alen > DATALEN_SEG) - alen = DATALEN_SEG; - memcpy(dst_pseg + 1, src_pseg + 1, alen); - dst_pseg = dst_pseg->next; + for (dst_pseg = dst->next, src_pseg = src->next; + src_pseg != NULL; + dst_pseg = dst_pseg->next, src_pseg = src_pseg->next) { + len -= alen; - src_pseg = src_pseg->next; + alen = min(len, DATALEN_SEG); + memcpy(dst_pseg + 1, src_pseg + 1, alen); } dst->m_type = src->m_type; @@ -148,24 +152,16 @@ int store_msg(void __user *dest, struct msg_msg *msg, int len) int alen; struct msg_msgseg *seg; - alen = len; - if (alen > DATALEN_MSG) - alen = DATALEN_MSG; + alen = min(len, DATALEN_MSG); if (copy_to_user(dest, msg + 1, alen)) return -1; - len -= alen; - dest = ((char __user *)dest) + alen; - seg = msg->next; - while (len > 0) { - alen = len; - if (alen > DATALEN_SEG) - alen = DATALEN_SEG; + for (seg = msg->next; seg != NULL; seg = seg->next) { + len -= alen; + dest = (char __user *)dest + alen; + alen = min(len, DATALEN_SEG); if (copy_to_user(dest, seg + 1, alen)) return -1; - len -= alen; - dest = ((char __user *)dest) + alen; - seg = seg->next; } return 0; } diff --git a/ipc/sem.c b/ipc/sem.c index 5b167d00efa6..57116161a5be 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -61,8 +61,8 @@ * - A woken up task may not even touch the semaphore array anymore, it may * have been destroyed already by a semctl(RMID). * - The synchronizations between wake-ups due to a timeout/signal and a - * wake-up due to a completed semaphore operation is achieved by using an - * intermediate state (IN_WAKEUP). + * wake-up due to a completed semaphore operation is achieved by using a + * special wakeup scheme (queuewakeup_wait and support functions) * - UNDO values are stored in an array (one per process and per * semaphore array, lazily allocated). For backwards compatibility, multiple * modes for the UNDO variables are supported (per process, per thread) @@ -90,16 +90,145 @@ #include <asm/uaccess.h> #include "util.h" + +#ifdef CONFIG_PREEMPT_RT_BASE + #define SYSVSEM_COMPLETION 1 +#else + #define SYSVSEM_CUSTOM 1 +#endif + +#ifdef SYSVSEM_COMPLETION + /* Using a completion causes some overhead, but avoids a busy loop + * that increases the worst case latency. + */ + struct queue_done { + struct completion done; + }; + + static void queuewakeup_prepare(void) + { + /* no preparation necessary */ + } + + static void queuewakeup_completed(void) + { + /* empty */ + } + + static void queuewakeup_block(struct queue_done *qd) + { + /* empty */ + } + + static void queuewakeup_handsoff(struct queue_done *qd) + { + complete_all(&qd->done); + } + + static void queuewakeup_init(struct queue_done *qd) + { + init_completion(&qd->done); + } + + static void queuewakeup_wait(struct queue_done *qd) + { + wait_for_completion(&qd->done); + } + +#elif defined(SYSVSEM_SPINLOCK) + /* Note: Spinlocks do not work because: + * - lockdep complains [could be fixed] + * - only 255 concurrent spin_lock() calls are permitted, then the + * preempt-counter overflows + */ +#error SYSVSEM_SPINLOCK is a prove of concept, does not work. + struct queue_done { + spinlock_t done; + }; + + static void queuewakeup_prepare(void) + { + /* empty */ + } + + static void queuewakeup_completed(void) + { + /* empty */ + } + + static void queuewakeup_block(struct queue_done *qd) + { + BUG_ON(spin_is_locked(&qd->done)); + spin_lock(&qd->done); + } + + static void queuewakeup_handsoff(struct queue_done *qd) + { + spin_unlock(&qd->done); + } + + static void queuewakeup_init(struct queue_done *qd) + { + spin_lock_init(&qd->done); + } + + static void queuewakeup_wait(struct queue_done *qd) + { + spin_unlock_wait(&qd->done); + } +#else + struct queue_done { + atomic_t done; + }; + + static void queuewakeup_prepare(void) + { + preempt_disable(); + } + + static void queuewakeup_completed(void) + { + preempt_enable(); + } + + static void queuewakeup_block(struct queue_done *qd) + { + BUG_ON(atomic_read(&qd->done) != 1); + atomic_set(&qd->done, 2); + } + + static void queuewakeup_handsoff(struct queue_done *qd) + { + BUG_ON(atomic_read(&qd->done) != 2); + smp_mb(); + atomic_set(&qd->done, 1); + } + + static void queuewakeup_init(struct queue_done *qd) + { + atomic_set(&qd->done, 1); + } + + static void queuewakeup_wait(struct queue_done *qd) + { + while (atomic_read(&qd->done) != 1) + cpu_relax(); + + smp_mb(); + } +#endif + + /* One semaphore structure for each semaphore in the system. */ struct sem { int semval; /* current value */ int sempid; /* pid of last operation */ + spinlock_t lock; /* spinlock for fine-grained semtimedop */ struct list_head sem_pending; /* pending single-sop operations */ }; /* One queue for each sleeping process in the system. */ struct sem_queue { - struct list_head simple_list; /* queue of pending operations */ struct list_head list; /* queue of pending operations */ struct task_struct *sleeper; /* this process */ struct sem_undo *undo; /* undo structure */ @@ -108,6 +237,7 @@ struct sem_queue { struct sembuf *sops; /* array of pending operations */ int nsops; /* number of operations */ int alter; /* does *sops alter the array? */ + struct queue_done done; /* completion synchronization */ }; /* Each task has a list of undo requests. They are executed automatically @@ -138,7 +268,6 @@ struct sem_undo_list { #define sem_ids(ns) ((ns)->ids[IPC_SEM_IDS]) -#define sem_unlock(sma) ipc_unlock(&(sma)->sem_perm) #define sem_checkid(sma, semid) ipc_checkid(&sma->sem_perm, semid) static int newary(struct ipc_namespace *, struct ipc_params *); @@ -191,47 +320,147 @@ void __init sem_init (void) } /* + * If the sem_array contains just one semaphore, or if multiple + * semops are performed in one syscall, or if there are complex + * operations pending, the whole sem_array is locked. + * If one semop is performed on an array with multiple semaphores, + * get a shared lock on the array, and lock the individual semaphore. + * + * Carefully guard against sma->complex_count changing between zero + * and non-zero while we are spinning for the lock. The value of + * sma->complex_count cannot change while we are holding the lock, + * so sem_unlock should be fine. + */ +static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, + int nsops) +{ + int locknum; + if (nsops == 1 && !sma->complex_count) { + struct sem *sem = sma->sem_base + sops->sem_num; + + /* Lock just the semaphore we are interested in. */ + spin_lock(&sem->lock); + + /* + * If sma->complex_count was set while we were spinning, + * we may need to look at things we did not lock here. + */ + if (unlikely(sma->complex_count)) { + spin_unlock(&sem->lock); + goto lock_all; + } + locknum = sops->sem_num; + } else { + int i; + /* Lock the sem_array, and all the semaphore locks */ + lock_all: + spin_lock(&sma->sem_perm.lock); + for (i = 0; i < sma->sem_nsems; i++) { + struct sem *sem = sma->sem_base + i; + spin_lock(&sem->lock); + } + locknum = -1; + } + return locknum; +} + +static inline void sem_unlock(struct sem_array *sma, int locknum) +{ + if (locknum == -1) { + int i; + for (i = 0; i < sma->sem_nsems; i++) { + struct sem *sem = sma->sem_base + i; + spin_unlock(&sem->lock); + } + spin_unlock(&sma->sem_perm.lock); + } else { + struct sem *sem = sma->sem_base + locknum; + spin_unlock(&sem->lock); + } + rcu_read_unlock(); +} + +/* * sem_lock_(check_) routines are called in the paths where the rw_mutex * is not held. */ -static inline struct sem_array *sem_lock(struct ipc_namespace *ns, int id) +static inline struct sem_array *sem_obtain_lock(struct ipc_namespace *ns, + int id, struct sembuf *sops, int nsops, int *locknum) { - struct kern_ipc_perm *ipcp = ipc_lock(&sem_ids(ns), id); + struct kern_ipc_perm *ipcp; + struct sem_array *sma; + + rcu_read_lock(); + ipcp = ipc_obtain_object(&sem_ids(ns), id); + if (IS_ERR(ipcp)) { + sma = ERR_CAST(ipcp); + goto err; + } + + sma = container_of(ipcp, struct sem_array, sem_perm); + *locknum = sem_lock(sma, sops, nsops); + + /* ipc_rmid() may have already freed the ID while sem_lock + * was spinning: verify that the structure is still valid + */ + if (!ipcp->deleted) + return container_of(ipcp, struct sem_array, sem_perm); + + sem_unlock(sma, *locknum); + sma = ERR_PTR(-EINVAL); +err: + rcu_read_unlock(); + return sma; +} + +static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int id) +{ + struct kern_ipc_perm *ipcp = ipc_obtain_object(&sem_ids(ns), id); if (IS_ERR(ipcp)) - return (struct sem_array *)ipcp; + return ERR_CAST(ipcp); return container_of(ipcp, struct sem_array, sem_perm); } -static inline struct sem_array *sem_lock_check(struct ipc_namespace *ns, - int id) +static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns, + int id) { - struct kern_ipc_perm *ipcp = ipc_lock_check(&sem_ids(ns), id); + struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&sem_ids(ns), id); if (IS_ERR(ipcp)) - return (struct sem_array *)ipcp; + return ERR_CAST(ipcp); return container_of(ipcp, struct sem_array, sem_perm); } static inline void sem_lock_and_putref(struct sem_array *sma) { - ipc_lock_by_ptr(&sma->sem_perm); + rcu_read_lock(); + sem_lock(sma, NULL, -1); ipc_rcu_putref(sma); } static inline void sem_getref_and_unlock(struct sem_array *sma) { ipc_rcu_getref(sma); - ipc_unlock(&(sma)->sem_perm); + sem_unlock(sma, -1); } static inline void sem_putref(struct sem_array *sma) { - ipc_lock_by_ptr(&sma->sem_perm); - ipc_rcu_putref(sma); - ipc_unlock(&(sma)->sem_perm); + sem_lock_and_putref(sma); + sem_unlock(sma, -1); +} + +/* + * Call inside the rcu read section. + */ +static inline void sem_getref(struct sem_array *sma) +{ + sem_lock(sma, NULL, -1); + ipc_rcu_getref(sma); + sem_unlock(sma, -1); } static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) @@ -245,23 +474,27 @@ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) * - queue.status is initialized to -EINTR before blocking. * - wakeup is performed by * * unlinking the queue entry from sma->sem_pending - * * setting queue.status to IN_WAKEUP - * This is the notification for the blocked thread that a - * result value is imminent. + * * setting queue.status to the actual result code + * This is the notification for the blocked thread that someone + * (usually: update_queue()) completed the semtimedop() operation. * * call wake_up_process - * * set queue.status to the final value. + * * queuewakeup_handsoff(&q->done); * - the previously blocked thread checks queue.status: - * * if it's IN_WAKEUP, then it must wait until the value changes - * * if it's not -EINTR, then the operation was completed by - * update_queue. semtimedop can return queue.status without - * performing any operation on the sem array. - * * otherwise it must acquire the spinlock and check what's up. + * * if it's not -EINTR, then someone completed the operation. + * First, queuewakeup_wait() must be called. Afterwards, + * semtimedop must return queue.status without performing any + * operation on the sem array. + * - otherwise it must acquire the spinlock and repeat the test + * - If it is still -EINTR, then no update_queue() completed the + * operation, thus semtimedop() can proceed normally. * - * The two-stage algorithm is necessary to protect against the following + * queuewakeup_wait() is necessary to protect against the following * races: * - if queue.status is set after wake_up_process, then the woken up idle * thread could race forward and try (and fail) to acquire sma->lock - * before update_queue had a chance to set queue.status + * before update_queue had a chance to set queue.status. + * More importantly, it would mean that wake_up_process must be done + * while holding sma->lock, i.e. this would reduce the scalability. * - if queue.status is written before wake_up_process and if the * blocked process is woken up by a signal between writing * queue.status and the wake_up_process, then the woken up @@ -271,7 +504,6 @@ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) * (yes, this happened on s390 with sysv msg). * */ -#define IN_WAKEUP 1 /** * newary - Create a new semaphore set @@ -324,15 +556,18 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) sma->sem_base = (struct sem *) &sma[1]; - for (i = 0; i < nsems; i++) + for (i = 0; i < nsems; i++) { INIT_LIST_HEAD(&sma->sem_base[i].sem_pending); + spin_lock_init(&sma->sem_base[i].lock); + spin_lock(&sma->sem_base[i].lock); + } sma->complex_count = 0; INIT_LIST_HEAD(&sma->sem_pending); INIT_LIST_HEAD(&sma->list_id); sma->sem_nsems = nsems; sma->sem_ctime = get_seconds(); - sem_unlock(sma); + sem_unlock(sma, -1); return sma->sem_perm.id; } @@ -461,17 +696,13 @@ undo: static void wake_up_sem_queue_prepare(struct list_head *pt, struct sem_queue *q, int error) { - if (list_empty(pt)) { - /* - * Hold preempt off so that we don't get preempted and have the - * wakee busy-wait until we're scheduled back on. - */ - preempt_disable(); - } - q->status = IN_WAKEUP; - q->pid = error; + if (list_empty(pt)) + queuewakeup_prepare(); - list_add_tail(&q->simple_list, pt); + queuewakeup_block(&q->done); + q->status = error; + + list_add_tail(&q->list, pt); } /** @@ -480,8 +711,8 @@ static void wake_up_sem_queue_prepare(struct list_head *pt, * * Do the actual wake-up. * The function is called without any locks held, thus the semaphore array - * could be destroyed already and the tasks can disappear as soon as the - * status is set to the actual return code. + * could be destroyed already and the tasks can disappear as soon as + * queuewakeup_handsoff() is called. */ static void wake_up_sem_queue_do(struct list_head *pt) { @@ -489,22 +720,19 @@ static void wake_up_sem_queue_do(struct list_head *pt) int did_something; did_something = !list_empty(pt); - list_for_each_entry_safe(q, t, pt, simple_list) { + list_for_each_entry_safe(q, t, pt, list) { wake_up_process(q->sleeper); - /* q can disappear immediately after writing q->status. */ - smp_wmb(); - q->status = q->pid; + /* q can disappear immediately after completing q->done */ + queuewakeup_handsoff(&q->done); } if (did_something) - preempt_enable(); + queuewakeup_completed(); } static void unlink_queue(struct sem_array *sma, struct sem_queue *q) { list_del(&q->list); - if (q->nsops == 1) - list_del(&q->simple_list); - else + if (q->nsops > 1) sma->complex_count--; } @@ -557,9 +785,9 @@ static int check_restart(struct sem_array *sma, struct sem_queue *q) } /* * semval is 0. Check if there are wait-for-zero semops. - * They must be the first entries in the per-semaphore simple queue + * They must be the first entries in the per-semaphore queue */ - h = list_first_entry(&curr->sem_pending, struct sem_queue, simple_list); + h = list_first_entry(&curr->sem_pending, struct sem_queue, list); BUG_ON(h->nsops != 1); BUG_ON(h->sops[0].sem_num != q->sops[0].sem_num); @@ -579,8 +807,9 @@ static int check_restart(struct sem_array *sma, struct sem_queue *q) * @pt: list head for the tasks that must be woken up. * * update_queue must be called after a semaphore in a semaphore array - * was modified. If multiple semaphore were modified, then @semnum - * must be set to -1. + * was modified. If multiple semaphores were modified, update_queue must + * be called with semnum = -1, as well as with the number of each modified + * semaphore. * The tasks that must be woken up are added to @pt. The return code * is stored in q->pid. * The function return 1 if at least one semop was completed successfully. @@ -590,30 +819,19 @@ static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt) struct sem_queue *q; struct list_head *walk; struct list_head *pending_list; - int offset; int semop_completed = 0; - /* if there are complex operations around, then knowing the semaphore - * that was modified doesn't help us. Assume that multiple semaphores - * were modified. - */ - if (sma->complex_count) - semnum = -1; - - if (semnum == -1) { + if (semnum == -1) pending_list = &sma->sem_pending; - offset = offsetof(struct sem_queue, list); - } else { + else pending_list = &sma->sem_base[semnum].sem_pending; - offset = offsetof(struct sem_queue, simple_list); - } again: walk = pending_list->next; while (walk != pending_list) { int error, restart; - q = (struct sem_queue *)((char *)walk - offset); + q = container_of(walk, struct sem_queue, list); walk = walk->next; /* If we are scanning the single sop, per-semaphore list of @@ -672,9 +890,18 @@ static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsop if (sma->complex_count || sops == NULL) { if (update_queue(sma, -1, pt)) otime = 1; + } + + if (!sops) { + /* No semops; something special is going on. */ + for (i = 0; i < sma->sem_nsems; i++) { + if (update_queue(sma, i, pt)) + otime = 1; + } goto done; } + /* Check the semaphores that were modified. */ for (i = 0; i < nsops; i++) { if (sops[i].sem_op > 0 || (sops[i].sem_op < 0 && @@ -745,6 +972,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) struct sem_queue *q, *tq; struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); struct list_head tasks; + int i; /* Free the existing undo structures for this semaphore set. */ assert_spin_locked(&sma->sem_perm.lock); @@ -763,10 +991,17 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) unlink_queue(sma, q); wake_up_sem_queue_prepare(&tasks, q, -EIDRM); } + for (i = 0; i < sma->sem_nsems; i++) { + struct sem *sem = sma->sem_base + i; + list_for_each_entry_safe(q, tq, &sem->sem_pending, list) { + unlink_queue(sma, q); + wake_up_sem_queue_prepare(&tasks, q, -EIDRM); + } + } /* Remove the semaphore set from the IDR */ sem_rmid(ns, sma); - sem_unlock(sma); + sem_unlock(sma, -1); wake_up_sem_queue_do(&tasks); ns->used_sems -= sma->sem_nsems; @@ -842,18 +1077,25 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, case SEM_STAT: { struct semid64_ds tbuf; - int id; + int id = 0; + + memset(&tbuf, 0, sizeof(tbuf)); if (cmd == SEM_STAT) { - sma = sem_lock(ns, semid); - if (IS_ERR(sma)) - return PTR_ERR(sma); + rcu_read_lock(); + sma = sem_obtain_object(ns, semid); + if (IS_ERR(sma)) { + err = PTR_ERR(sma); + goto out_unlock; + } id = sma->sem_perm.id; } else { - sma = sem_lock_check(ns, semid); - if (IS_ERR(sma)) - return PTR_ERR(sma); - id = 0; + rcu_read_lock(); + sma = sem_obtain_object_check(ns, semid); + if (IS_ERR(sma)) { + err = PTR_ERR(sma); + goto out_unlock; + } } err = -EACCES; @@ -864,13 +1106,11 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, if (err) goto out_unlock; - memset(&tbuf, 0, sizeof(tbuf)); - kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm); tbuf.sem_otime = sma->sem_otime; tbuf.sem_ctime = sma->sem_ctime; tbuf.sem_nsems = sma->sem_nsems; - sem_unlock(sma); + rcu_read_unlock(); if (copy_semid_to_user(p, &tbuf, version)) return -EFAULT; return id; @@ -879,7 +1119,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, return -EINVAL; } out_unlock: - sem_unlock(sma); + rcu_read_unlock(); return err; } @@ -890,7 +1130,6 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, struct sem_array *sma; struct sem* curr; int err; - int nsems; struct list_head tasks; int val; #if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN) @@ -901,31 +1140,39 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, val = arg; #endif - sma = sem_lock_check(ns, semid); - if (IS_ERR(sma)) - return PTR_ERR(sma); + if (val > SEMVMX || val < 0) + return -ERANGE; INIT_LIST_HEAD(&tasks); - nsems = sma->sem_nsems; - err = -EACCES; - if (ipcperms(ns, &sma->sem_perm, S_IWUGO)) - goto out_unlock; + rcu_read_lock(); + sma = sem_obtain_object_check(ns, semid); + if (IS_ERR(sma)) { + rcu_read_unlock(); + return PTR_ERR(sma); + } + + if (semnum < 0 || semnum >= sma->sem_nsems) { + rcu_read_unlock(); + return -EINVAL; + } + + + if (ipcperms(ns, &sma->sem_perm, S_IWUGO)) { + rcu_read_unlock(); + return -EACCES; + } err = security_sem_semctl(sma, SETVAL); - if (err) - goto out_unlock; + if (err) { + rcu_read_unlock(); + return -EACCES; + } - err = -EINVAL; - if(semnum < 0 || semnum >= nsems) - goto out_unlock; + sem_lock(sma, NULL, -1); curr = &sma->sem_base[semnum]; - err = -ERANGE; - if (val > SEMVMX || val < 0) - goto out_unlock; - assert_spin_locked(&sma->sem_perm.lock); list_for_each_entry(un, &sma->list_id, list_id) un->semadj[semnum] = 0; @@ -935,11 +1182,9 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, sma->sem_ctime = get_seconds(); /* maybe some queued-up processes were waiting for this */ do_smart_update(sma, NULL, 0, 0, &tasks); - err = 0; -out_unlock: - sem_unlock(sma); + sem_unlock(sma, -1); wake_up_sem_queue_do(&tasks); - return err; + return 0; } static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, @@ -947,27 +1192,34 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, { struct sem_array *sma; struct sem* curr; - int err; + int err, nsems; ushort fast_sem_io[SEMMSL_FAST]; ushort* sem_io = fast_sem_io; - int nsems; struct list_head tasks; - sma = sem_lock_check(ns, semid); - if (IS_ERR(sma)) + INIT_LIST_HEAD(&tasks); + + rcu_read_lock(); + sma = sem_obtain_object_check(ns, semid); + if (IS_ERR(sma)) { + rcu_read_unlock(); return PTR_ERR(sma); + } - INIT_LIST_HEAD(&tasks); nsems = sma->sem_nsems; err = -EACCES; if (ipcperms(ns, &sma->sem_perm, - cmd == SETALL ? S_IWUGO : S_IRUGO)) - goto out_unlock; + cmd == SETALL ? S_IWUGO : S_IRUGO)) { + rcu_read_unlock(); + goto out_wakeup; + } err = security_sem_semctl(sma, cmd); - if (err) - goto out_unlock; + if (err) { + rcu_read_unlock(); + goto out_wakeup; + } err = -EACCES; switch (cmd) { @@ -977,7 +1229,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, int i; if(nsems > SEMMSL_FAST) { - sem_getref_and_unlock(sma); + sem_getref(sma); sem_io = ipc_alloc(sizeof(ushort)*nsems); if(sem_io == NULL) { @@ -987,15 +1239,17 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, sem_lock_and_putref(sma); if (sma->sem_perm.deleted) { - sem_unlock(sma); + sem_unlock(sma, -1); err = -EIDRM; goto out_free; } + sem_unlock(sma, -1); } + sem_lock(sma, NULL, -1); for (i = 0; i < sma->sem_nsems; i++) sem_io[i] = sma->sem_base[i].semval; - sem_unlock(sma); + sem_unlock(sma, -1); err = 0; if(copy_to_user(array, sem_io, nsems*sizeof(ushort))) err = -EFAULT; @@ -1006,7 +1260,8 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, int i; struct sem_undo *un; - sem_getref_and_unlock(sma); + ipc_rcu_getref(sma); + rcu_read_unlock(); if(nsems > SEMMSL_FAST) { sem_io = ipc_alloc(sizeof(ushort)*nsems); @@ -1031,7 +1286,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, } sem_lock_and_putref(sma); if (sma->sem_perm.deleted) { - sem_unlock(sma); + sem_unlock(sma, -1); err = -EIDRM; goto out_free; } @@ -1053,9 +1308,12 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, /* GETVAL, GETPID, GETNCTN, GETZCNT: fall-through */ } err = -EINVAL; - if(semnum < 0 || semnum >= nsems) - goto out_unlock; + if (semnum < 0 || semnum >= nsems) { + rcu_read_unlock(); + goto out_wakeup; + } + sem_lock(sma, NULL, -1); curr = &sma->sem_base[semnum]; switch (cmd) { @@ -1072,10 +1330,11 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, err = count_semzcnt(sma,semnum); goto out_unlock; } + out_unlock: - sem_unlock(sma); + sem_unlock(sma, -1); +out_wakeup: wake_up_sem_queue_do(&tasks); - out_free: if(sem_io != fast_sem_io) ipc_free(sem_io, sizeof(ushort)*nsems); @@ -1126,33 +1385,39 @@ static int semctl_down(struct ipc_namespace *ns, int semid, return -EFAULT; } - ipcp = ipcctl_pre_down(ns, &sem_ids(ns), semid, cmd, - &semid64.sem_perm, 0); + ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd, + &semid64.sem_perm, 0); if (IS_ERR(ipcp)) return PTR_ERR(ipcp); sma = container_of(ipcp, struct sem_array, sem_perm); err = security_sem_semctl(sma, cmd); - if (err) + if (err) { + rcu_read_unlock(); goto out_unlock; + } switch(cmd){ case IPC_RMID: + sem_lock(sma, NULL, -1); freeary(ns, ipcp); goto out_up; case IPC_SET: + sem_lock(sma, NULL, -1); err = ipc_update_perm(&semid64.sem_perm, ipcp); if (err) goto out_unlock; sma->sem_ctime = get_seconds(); break; default: + rcu_read_unlock(); err = -EINVAL; + goto out_up; } out_unlock: - sem_unlock(sma); + sem_unlock(sma, -1); out_up: up_write(&sem_ids(ns).rw_mutex); return err; @@ -1277,16 +1542,18 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) spin_unlock(&ulp->lock); if (likely(un!=NULL)) goto out; - rcu_read_unlock(); /* no undo structure around - allocate one. */ /* step 1: figure out the size of the semaphore array */ - sma = sem_lock_check(ns, semid); - if (IS_ERR(sma)) + sma = sem_obtain_object_check(ns, semid); + if (IS_ERR(sma)) { + rcu_read_unlock(); return ERR_CAST(sma); + } nsems = sma->sem_nsems; - sem_getref_and_unlock(sma); + ipc_rcu_getref(sma); + rcu_read_unlock(); /* step 2: allocate new undo structure */ new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); @@ -1298,7 +1565,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) /* step 3: Acquire the lock on semaphore array */ sem_lock_and_putref(sma); if (sma->sem_perm.deleted) { - sem_unlock(sma); + sem_unlock(sma, -1); kfree(new); un = ERR_PTR(-EIDRM); goto out; @@ -1326,38 +1593,11 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) success: spin_unlock(&ulp->lock); rcu_read_lock(); - sem_unlock(sma); + sem_unlock(sma, -1); out: return un; } - -/** - * get_queue_result - Retrieve the result code from sem_queue - * @q: Pointer to queue structure - * - * Retrieve the return code from the pending queue. If IN_WAKEUP is found in - * q->status, then we must loop until the value is replaced with the final - * value: This may happen if a task is woken up by an unrelated event (e.g. - * signal) and in parallel the task is woken up by another task because it got - * the requested semaphores. - * - * The function can be called with or without holding the semaphore spinlock. - */ -static int get_queue_result(struct sem_queue *q) -{ - int error; - - error = q->status; - while (unlikely(error == IN_WAKEUP)) { - cpu_relax(); - error = q->status; - } - - return error; -} - - SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, unsigned, nsops, const struct timespec __user *, timeout) { @@ -1366,7 +1606,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, struct sembuf fast_sops[SEMOPM_FAST]; struct sembuf* sops = fast_sops, *sop; struct sem_undo *un; - int undos = 0, alter = 0, max; + int undos = 0, alter = 0, max, locknum; struct sem_queue queue; unsigned long jiffies_left = 0; struct ipc_namespace *ns; @@ -1421,7 +1661,8 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, INIT_LIST_HEAD(&tasks); - sma = sem_lock_check(ns, semid); + rcu_read_lock(); + sma = sem_obtain_object_check(ns, semid); if (IS_ERR(sma)) { if (un) rcu_read_unlock(); @@ -1429,6 +1670,24 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, goto out_free; } + error = -EFBIG; + if (max >= sma->sem_nsems) { + rcu_read_unlock(); + goto out_wakeup; + } + + error = -EACCES; + if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) { + rcu_read_unlock(); + goto out_wakeup; + } + + error = security_sem_semop(sma, sops, nsops, alter); + if (error) { + rcu_read_unlock(); + goto out_wakeup; + } + /* * semid identifiers are not unique - find_alloc_undo may have * allocated an undo structure, it was invalidated by an RMID @@ -1437,6 +1696,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, * "un" itself is guaranteed by rcu. */ error = -EIDRM; + locknum = sem_lock(sma, sops, nsops); if (un) { if (un->semid == -1) { rcu_read_unlock(); @@ -1454,18 +1714,6 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, } } - error = -EFBIG; - if (max >= sma->sem_nsems) - goto out_unlock_free; - - error = -EACCES; - if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) - goto out_unlock_free; - - error = security_sem_semop(sma, sops, nsops, alter); - if (error) - goto out_unlock_free; - error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current)); if (error <= 0) { if (alter && error == 0) @@ -1483,73 +1731,63 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, queue.undo = un; queue.pid = task_tgid_vnr(current); queue.alter = alter; - if (alter) - list_add_tail(&queue.list, &sma->sem_pending); - else - list_add(&queue.list, &sma->sem_pending); if (nsops == 1) { struct sem *curr; curr = &sma->sem_base[sops->sem_num]; if (alter) - list_add_tail(&queue.simple_list, &curr->sem_pending); + list_add_tail(&queue.list, &curr->sem_pending); else - list_add(&queue.simple_list, &curr->sem_pending); + list_add(&queue.list, &curr->sem_pending); } else { - INIT_LIST_HEAD(&queue.simple_list); + if (alter) + list_add_tail(&queue.list, &sma->sem_pending); + else + list_add(&queue.list, &sma->sem_pending); sma->complex_count++; } queue.status = -EINTR; queue.sleeper = current; + queuewakeup_init(&queue.done); sleep_again: current->state = TASK_INTERRUPTIBLE; - sem_unlock(sma); + sem_unlock(sma, locknum); if (timeout) jiffies_left = schedule_timeout(jiffies_left); else schedule(); - error = get_queue_result(&queue); + error = queue.status; if (error != -EINTR) { /* fast path: update_queue already obtained all requested - * resources. - * Perform a smp_mb(): User space could assume that semop() - * is a memory barrier: Without the mb(), the cpu could - * speculatively read in user space stale data that was - * overwritten by the previous owner of the semaphore. + * resources. Just ensure that update_queue completed + * it's access to &queue. */ - smp_mb(); + queuewakeup_wait(&queue.done); goto out_free; } - sma = sem_lock(ns, semid); + sma = sem_obtain_lock(ns, semid, sops, nsops, &locknum); /* * Wait until it's guaranteed that no wakeup_sem_queue_do() is ongoing. */ - error = get_queue_result(&queue); - - /* - * Array removed? If yes, leave without sem_unlock(). - */ - if (IS_ERR(sma)) { - goto out_free; - } - - - /* - * If queue.status != -EINTR we are woken up by another process. - * Leave without unlink_queue(), but with sem_unlock(). - */ - + error = queue.status; if (error != -EINTR) { - goto out_unlock_free; + /* If there is a return code, then we can leave immediately. */ + if (!IS_ERR(sma)) { + /* sem_lock() succeeded - then unlock */ + sem_unlock(sma, locknum); + } + /* Except that we must wait for the hands-off */ + queuewakeup_wait(&queue.done); + goto out_free; } /* @@ -1567,8 +1805,8 @@ sleep_again: unlink_queue(sma, &queue); out_unlock_free: - sem_unlock(sma); - + sem_unlock(sma, locknum); +out_wakeup: wake_up_sem_queue_do(&tasks); out_free: if(sops != fast_sops) @@ -1641,12 +1879,14 @@ void exit_sem(struct task_struct *tsk) semid = -1; else semid = un->semid; - rcu_read_unlock(); - if (semid == -1) + if (semid == -1) { + rcu_read_unlock(); break; + } - sma = sem_lock_check(tsk->nsproxy->ipc_ns, un->semid); + sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, un->semid); + sem_lock(sma, NULL, -1); /* exit_sem raced with IPC_RMID, nothing to do */ if (IS_ERR(sma)) @@ -1657,7 +1897,7 @@ void exit_sem(struct task_struct *tsk) /* exit_sem raced with IPC_RMID+semget() that created * exactly the same semid. Nothing to do. */ - sem_unlock(sma); + sem_unlock(sma, -1); continue; } @@ -1697,7 +1937,7 @@ void exit_sem(struct task_struct *tsk) /* maybe some queued-up processes were waiting for this */ INIT_LIST_HEAD(&tasks); do_smart_update(sma, NULL, 0, 1, &tasks); - sem_unlock(sma); + sem_unlock(sma, -1); wake_up_sem_queue_do(&tasks); kfree_rcu(un, rcu); diff --git a/ipc/shm.c b/ipc/shm.c index cb858df061d3..8247c49ec073 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -462,7 +462,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) size_t size = params->u.size; int error; struct shmid_kernel *shp; - int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT; + size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; struct file * file; char name[13]; int id; diff --git a/ipc/util.c b/ipc/util.c index b6db68131a0e..18135bcfba38 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -23,6 +23,7 @@ #include <linux/msg.h> #include <linux/vmalloc.h> #include <linux/slab.h> +#include <linux/notifier.h> #include <linux/capability.h> #include <linux/highuid.h> #include <linux/security.h> @@ -47,19 +48,16 @@ struct ipc_proc_iface { int (*show)(struct seq_file *, void *); }; -#ifdef CONFIG_MEMORY_HOTPLUG - static void ipc_memory_notifier(struct work_struct *work) { ipcns_notify(IPCNS_MEMCHANGED); } -static DECLARE_WORK(ipc_memory_wq, ipc_memory_notifier); - - static int ipc_memory_callback(struct notifier_block *self, unsigned long action, void *arg) { + static DECLARE_WORK(ipc_memory_wq, ipc_memory_notifier); + switch (action) { case MEM_ONLINE: /* memory successfully brought online */ case MEM_OFFLINE: /* or offline: it's time to recompute msgmni */ @@ -85,7 +83,10 @@ static int ipc_memory_callback(struct notifier_block *self, return NOTIFY_OK; } -#endif /* CONFIG_MEMORY_HOTPLUG */ +static struct notifier_block ipc_memory_nb = { + .notifier_call = ipc_memory_callback, + .priority = IPC_CALLBACK_PRI, +}; /** * ipc_init - initialise IPC subsystem @@ -102,7 +103,7 @@ static int __init ipc_init(void) sem_init(); msg_init(); shm_init(); - hotplug_memory_notifier(ipc_memory_callback, IPC_CALLBACK_PRI); + register_hotmemory_notifier(&ipc_memory_nb); register_ipcns_notifier(&init_ipc_ns); return 0; } @@ -668,38 +669,81 @@ void ipc64_perm_to_ipc_perm (struct ipc64_perm *in, struct ipc_perm *out) } /** + * ipc_obtain_object + * @ids: ipc identifier set + * @id: ipc id to look for + * + * Look for an id in the ipc ids idr and return associated ipc object. + * + * Call inside the RCU critical section. + * The ipc object is *not* locked on exit. + */ +struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id) +{ + struct kern_ipc_perm *out; + int lid = ipcid_to_idx(id); + + out = idr_find(&ids->ipcs_idr, lid); + if (!out) + return ERR_PTR(-EINVAL); + + return out; +} + +/** * ipc_lock - Lock an ipc structure without rw_mutex held * @ids: IPC identifier set * @id: ipc id to look for * * Look for an id in the ipc ids idr and lock the associated ipc object. * - * The ipc object is locked on exit. + * The ipc object is locked on successful exit. */ - struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id) { struct kern_ipc_perm *out; - int lid = ipcid_to_idx(id); rcu_read_lock(); - out = idr_find(&ids->ipcs_idr, lid); - if (out == NULL) { - rcu_read_unlock(); - return ERR_PTR(-EINVAL); - } + out = ipc_obtain_object(ids, id); + if (IS_ERR(out)) + goto err1; spin_lock(&out->lock); - + /* ipc_rmid() may have already freed the ID while ipc_lock * was spinning: here verify that the structure is still valid */ - if (out->deleted) { - spin_unlock(&out->lock); - rcu_read_unlock(); - return ERR_PTR(-EINVAL); - } + if (!out->deleted) + return out; + + spin_unlock(&out->lock); + out = ERR_PTR(-EINVAL); +err1: + rcu_read_unlock(); + return out; +} +/** + * ipc_obtain_object_check + * @ids: ipc identifier set + * @id: ipc id to look for + * + * Similar to ipc_obtain_object() but also checks + * the ipc object reference counter. + * + * Call inside the RCU critical section. + * The ipc object is *not* locked on exit. + */ +struct kern_ipc_perm *ipc_obtain_object_check(struct ipc_ids *ids, int id) +{ + struct kern_ipc_perm *out = ipc_obtain_object(ids, id); + + if (IS_ERR(out)) + goto out; + + if (ipc_checkid(out, id)) + return ERR_PTR(-EIDRM); +out: return out; } @@ -780,11 +824,28 @@ struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns, struct ipc64_perm *perm, int extra_perm) { struct kern_ipc_perm *ipcp; + + ipcp = ipcctl_pre_down_nolock(ns, ids, id, cmd, perm, extra_perm); + if (IS_ERR(ipcp)) + goto out; + + spin_lock(&ipcp->lock); +out: + return ipcp; +} + +struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns, + struct ipc_ids *ids, int id, int cmd, + struct ipc64_perm *perm, int extra_perm) +{ kuid_t euid; - int err; + int err = -EPERM; + struct kern_ipc_perm *ipcp; down_write(&ids->rw_mutex); - ipcp = ipc_lock_check(ids, id); + rcu_read_lock(); + + ipcp = ipc_obtain_object_check(ids, id); if (IS_ERR(ipcp)) { err = PTR_ERR(ipcp); goto out_up; @@ -793,17 +854,21 @@ struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns, audit_ipc_obj(ipcp); if (cmd == IPC_SET) audit_ipc_set_perm(extra_perm, perm->uid, - perm->gid, perm->mode); + perm->gid, perm->mode); euid = current_euid(); if (uid_eq(euid, ipcp->cuid) || uid_eq(euid, ipcp->uid) || ns_capable(ns->user_ns, CAP_SYS_ADMIN)) return ipcp; - err = -EPERM; - ipc_unlock(ipcp); out_up: + /* + * Unsuccessful lookup, unlock and return + * the corresponding error. + */ + rcu_read_unlock(); up_write(&ids->rw_mutex); + return ERR_PTR(err); } diff --git a/ipc/util.h b/ipc/util.h index eeb79a1fbd83..c36b9977c957 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -123,10 +123,14 @@ void ipc_rcu_getref(void *ptr); void ipc_rcu_putref(void *ptr); struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int); +struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id); void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out); void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out); int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out); +struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns, + struct ipc_ids *ids, int id, int cmd, + struct ipc64_perm *perm, int extra_perm); struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns, struct ipc_ids *ids, int id, int cmd, struct ipc64_perm *perm, int extra_perm); @@ -150,14 +154,9 @@ static inline int ipc_buildid(int id, int seq) return SEQ_MULTIPLIER * seq + id; } -/* - * Must be called with ipcp locked - */ static inline int ipc_checkid(struct kern_ipc_perm *ipcp, int uid) { - if (uid / SEQ_MULTIPLIER != ipcp->seq) - return 1; - return 0; + return uid / SEQ_MULTIPLIER != ipcp->seq; } static inline void ipc_lock_by_ptr(struct kern_ipc_perm *perm) @@ -172,7 +171,13 @@ static inline void ipc_unlock(struct kern_ipc_perm *perm) rcu_read_unlock(); } +static inline void ipc_lock_object(struct kern_ipc_perm *perm) +{ + spin_lock(&perm->lock); +} + struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id); +struct kern_ipc_perm *ipc_obtain_object_check(struct ipc_ids *ids, int id); int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids, struct ipc_ops *ops, struct ipc_params *params); void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids, diff --git a/kernel/audit.c b/kernel/audit.c index 488f85f76335..0b084fa44b1f 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -660,14 +660,14 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) /* As soon as there's any sign of userspace auditd, * start kauditd to talk to it */ - if (!kauditd_task) + if (!kauditd_task) { kauditd_task = kthread_run(kauditd_thread, NULL, "kauditd"); - if (IS_ERR(kauditd_task)) { - err = PTR_ERR(kauditd_task); - kauditd_task = NULL; - return err; + if (IS_ERR(kauditd_task)) { + err = PTR_ERR(kauditd_task); + kauditd_task = NULL; + return err; + } } - loginuid = audit_get_loginuid(current); sessionid = audit_get_sessionid(current); security_task_getsecid(current, &sid); diff --git a/kernel/audit.h b/kernel/audit.h index d51cba868e1b..11468d99dad0 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -59,10 +59,7 @@ struct audit_entry { struct audit_krule rule; }; -#ifdef CONFIG_AUDIT -extern int audit_enabled; extern int audit_ever_enabled; -#endif extern int audit_pid; diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index f9fc54bbe06f..267436826c3b 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -594,6 +594,10 @@ exit_nofree: return entry; exit_free: + if (entry->rule.watch) + audit_put_watch(entry->rule.watch); /* matches initial get */ + if (entry->rule.tree) + audit_put_tree(entry->rule.tree); /* that's the temporary one */ audit_free_rule(entry); return ERR_PTR(err); } diff --git a/kernel/auditsc.c b/kernel/auditsc.c index a371f857a0a9..c68229411a7c 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1034,21 +1034,15 @@ static inline void audit_free_aux(struct audit_context *context) } } -static inline void audit_zero_context(struct audit_context *context, - enum audit_state state) -{ - memset(context, 0, sizeof(*context)); - context->state = state; - context->prio = state == AUDIT_RECORD_CONTEXT ? ~0ULL : 0; -} - static inline struct audit_context *audit_alloc_context(enum audit_state state) { struct audit_context *context; - if (!(context = kmalloc(sizeof(*context), GFP_KERNEL))) + context = kzalloc(sizeof(*context), GFP_KERNEL); + if (!context) return NULL; - audit_zero_context(context, state); + context->state = state; + context->prio = state == AUDIT_RECORD_CONTEXT ? ~0ULL : 0; INIT_LIST_HEAD(&context->killed_trees); INIT_LIST_HEAD(&context->names_list); return context; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index dd3384e93cb9..af623f333d33 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -5282,55 +5282,6 @@ struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id) } EXPORT_SYMBOL_GPL(css_lookup); -/** - * css_get_next - lookup next cgroup under specified hierarchy. - * @ss: pointer to subsystem - * @id: current position of iteration. - * @root: pointer to css. search tree under this. - * @foundid: position of found object. - * - * Search next css under the specified hierarchy of rootid. Calling under - * rcu_read_lock() is necessary. Returns NULL if it reaches the end. - */ -struct cgroup_subsys_state * -css_get_next(struct cgroup_subsys *ss, int id, - struct cgroup_subsys_state *root, int *foundid) -{ - struct cgroup_subsys_state *ret = NULL; - struct css_id *tmp; - int tmpid; - int rootid = css_id(root); - int depth = css_depth(root); - - if (!rootid) - return NULL; - - BUG_ON(!ss->use_id); - WARN_ON_ONCE(!rcu_read_lock_held()); - - /* fill start point for scan */ - tmpid = id; - while (1) { - /* - * scan next entry from bitmap(tree), tmpid is updated after - * idr_get_next(). - */ - tmp = idr_get_next(&ss->idr, &tmpid); - if (!tmp) - break; - if (tmp->depth >= depth && tmp->stack[depth] == rootid) { - ret = rcu_dereference(tmp->css); - if (ret) { - *foundid = tmpid; - break; - } - } - /* continue to scan from next id */ - tmpid = tmpid + 1; - } - return ret; -} - /* * get corresponding css from file open on cgroupfs directory */ diff --git a/kernel/compat.c b/kernel/compat.c index c5620d6435e0..0a09e481b70b 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -1119,71 +1119,6 @@ asmlinkage long compat_sys_migrate_pages(compat_pid_t pid, } #endif -struct compat_sysinfo { - s32 uptime; - u32 loads[3]; - u32 totalram; - u32 freeram; - u32 sharedram; - u32 bufferram; - u32 totalswap; - u32 freeswap; - u16 procs; - u16 pad; - u32 totalhigh; - u32 freehigh; - u32 mem_unit; - char _f[20-2*sizeof(u32)-sizeof(int)]; -}; - -asmlinkage long -compat_sys_sysinfo(struct compat_sysinfo __user *info) -{ - struct sysinfo s; - - do_sysinfo(&s); - - /* Check to see if any memory value is too large for 32-bit and scale - * down if needed - */ - if ((s.totalram >> 32) || (s.totalswap >> 32)) { - int bitcount = 0; - - while (s.mem_unit < PAGE_SIZE) { - s.mem_unit <<= 1; - bitcount++; - } - - s.totalram >>= bitcount; - s.freeram >>= bitcount; - s.sharedram >>= bitcount; - s.bufferram >>= bitcount; - s.totalswap >>= bitcount; - s.freeswap >>= bitcount; - s.totalhigh >>= bitcount; - s.freehigh >>= bitcount; - } - - if (!access_ok(VERIFY_WRITE, info, sizeof(struct compat_sysinfo)) || - __put_user (s.uptime, &info->uptime) || - __put_user (s.loads[0], &info->loads[0]) || - __put_user (s.loads[1], &info->loads[1]) || - __put_user (s.loads[2], &info->loads[2]) || - __put_user (s.totalram, &info->totalram) || - __put_user (s.freeram, &info->freeram) || - __put_user (s.sharedram, &info->sharedram) || - __put_user (s.bufferram, &info->bufferram) || - __put_user (s.totalswap, &info->totalswap) || - __put_user (s.freeswap, &info->freeswap) || - __put_user (s.procs, &info->procs) || - __put_user (s.totalhigh, &info->totalhigh) || - __put_user (s.freehigh, &info->freehigh) || - __put_user (s.mem_unit, &info->mem_unit)) - return -EFAULT; - - return 0; -} - COMPAT_SYSCALL_DEFINE2(sched_rr_get_interval, compat_pid_t, pid, struct compat_timespec __user *, interval) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index a11e1858e9de..f57a3fde2eff 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2201,7 +2201,6 @@ void cpuset_update_active_cpus(bool cpu_online) schedule_work(&cpuset_hotplug_work); } -#ifdef CONFIG_MEMORY_HOTPLUG /* * Keep top_cpuset.mems_allowed tracking node_states[N_MEMORY]. * Call this routine anytime after node_states[N_MEMORY] changes. @@ -2213,20 +2212,23 @@ static int cpuset_track_online_nodes(struct notifier_block *self, schedule_work(&cpuset_hotplug_work); return NOTIFY_OK; } -#endif + +static struct notifier_block cpuset_track_online_nodes_nb = { + .notifier_call = cpuset_track_online_nodes, + .priority = 10, /* ??! */ +}; /** * cpuset_init_smp - initialize cpus_allowed * * Description: Finish top cpuset after cpu, node maps are initialized - **/ - + */ void __init cpuset_init_smp(void) { cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); top_cpuset.mems_allowed = node_states[N_MEMORY]; - hotplug_memory_notifier(cpuset_track_online_nodes, 10); + register_hotmemory_notifier(&cpuset_track_online_nodes_nb); cpuset_propagate_hotplug_wq = alloc_ordered_workqueue("cpuset_hotplug", 0); diff --git a/kernel/fork.c b/kernel/fork.c index 0519c9b3261c..6fcc2e24561e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -70,6 +70,7 @@ #include <linux/khugepaged.h> #include <linux/signalfd.h> #include <linux/uprobes.h> +#include <linux/aio.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -364,8 +365,6 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) mm->locked_vm = 0; mm->mmap = NULL; mm->mmap_cache = NULL; - mm->free_area_cache = oldmm->mmap_base; - mm->cached_hole_size = ~0UL; mm->map_count = 0; cpumask_clear(mm_cpumask(mm)); mm->mm_rb = RB_ROOT; @@ -539,8 +538,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) mm->nr_ptes = 0; memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); spin_lock_init(&mm->page_table_lock); - mm->free_area_cache = TASK_UNMAPPED_BASE; - mm->cached_hole_size = ~0UL; mm_init_aio(mm); mm_init_owner(mm, p); diff --git a/kernel/kexec.c b/kernel/kexec.c index bddd3d7a74b6..38f5fab4d5eb 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -786,7 +786,7 @@ static int kimage_load_normal_segment(struct kimage *image, struct kexec_segment *segment) { unsigned long maddr; - unsigned long ubytes, mbytes; + size_t ubytes, mbytes; int result; unsigned char __user *buf; @@ -819,13 +819,9 @@ static int kimage_load_normal_segment(struct kimage *image, /* Start with a clear page */ clear_page(ptr); ptr += maddr & ~PAGE_MASK; - mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK); - if (mchunk > mbytes) - mchunk = mbytes; - - uchunk = mchunk; - if (uchunk > ubytes) - uchunk = ubytes; + mchunk = min_t(size_t, mbytes, + PAGE_SIZE - (maddr & ~PAGE_MASK)); + uchunk = min(ubytes, mchunk); result = copy_from_user(ptr, buf, uchunk); kunmap(page); @@ -850,7 +846,7 @@ static int kimage_load_crash_segment(struct kimage *image, * We do things a page at a time for the sake of kmap. */ unsigned long maddr; - unsigned long ubytes, mbytes; + size_t ubytes, mbytes; int result; unsigned char __user *buf; @@ -871,13 +867,10 @@ static int kimage_load_crash_segment(struct kimage *image, } ptr = kmap(page); ptr += maddr & ~PAGE_MASK; - mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK); - if (mchunk > mbytes) - mchunk = mbytes; - - uchunk = mchunk; - if (uchunk > ubytes) { - uchunk = ubytes; + mchunk = min_t(size_t, mbytes, + PAGE_SIZE - (maddr & ~PAGE_MASK)); + uchunk = min(ubytes, mchunk); + if (mchunk > uchunk) { /* Zero the trailing part of the page */ memset(ptr + uchunk, 0, mchunk - uchunk); } @@ -1118,12 +1111,8 @@ void __weak crash_free_reserved_phys_range(unsigned long begin, { unsigned long addr; - for (addr = begin; addr < end; addr += PAGE_SIZE) { - ClearPageReserved(pfn_to_page(addr >> PAGE_SHIFT)); - init_page_count(pfn_to_page(addr >> PAGE_SHIFT)); - free_page((unsigned long)__va(addr)); - totalram_pages++; - } + for (addr = begin; addr < end; addr += PAGE_SIZE) + free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT)); } int crash_shrink_memory(unsigned long new_size) @@ -1452,14 +1441,13 @@ void vmcoreinfo_append_str(const char *fmt, ...) { va_list args; char buf[0x50]; - int r; + size_t r; va_start(args, fmt); r = vsnprintf(buf, sizeof(buf), fmt, args); va_end(args); - if (r + vmcoreinfo_size > vmcoreinfo_max_size) - r = vmcoreinfo_max_size - vmcoreinfo_size; + r = min(r, vmcoreinfo_max_size - vmcoreinfo_size); memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r); @@ -1489,7 +1477,7 @@ static int __init crash_save_vmcoreinfo_init(void) VMCOREINFO_SYMBOL(swapper_pg_dir); #endif VMCOREINFO_SYMBOL(_stext); - VMCOREINFO_SYMBOL(vmlist); + VMCOREINFO_SYMBOL(vmap_area_list); #ifndef CONFIG_NEED_MULTIPLE_NODES VMCOREINFO_SYMBOL(mem_map); @@ -1527,7 +1515,8 @@ static int __init crash_save_vmcoreinfo_init(void) VMCOREINFO_OFFSET(free_area, free_list); VMCOREINFO_OFFSET(list_head, next); VMCOREINFO_OFFSET(list_head, prev); - VMCOREINFO_OFFSET(vm_struct, addr); + VMCOREINFO_OFFSET(vmap_area, va_start); + VMCOREINFO_OFFSET(vmap_area, list); VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER); log_buf_kexec_setup(); VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); diff --git a/kernel/kmod.c b/kernel/kmod.c index 56dd34976d7b..1296e72e4161 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -77,6 +77,7 @@ static void free_modprobe_argv(struct subprocess_info *info) static int call_modprobe(char *module_name, int wait) { + struct subprocess_info *info; static char *envp[] = { "HOME=/", "TERM=linux", @@ -98,8 +99,15 @@ static int call_modprobe(char *module_name, int wait) argv[3] = module_name; /* check free_modprobe_argv() */ argv[4] = NULL; - return call_usermodehelper_fns(modprobe_path, argv, envp, - wait | UMH_KILLABLE, NULL, free_modprobe_argv, NULL); + info = call_usermodehelper_setup(modprobe_path, argv, envp, GFP_KERNEL, + NULL, free_modprobe_argv, NULL); + if (!info) + goto free_module_name; + + return call_usermodehelper_exec(info, wait | UMH_KILLABLE); + +free_module_name: + kfree(module_name); free_argv: kfree(argv); out: @@ -502,14 +510,28 @@ static void helper_unlock(void) * @argv: arg vector for process * @envp: environment for process * @gfp_mask: gfp mask for memory allocation + * @cleanup: a cleanup function + * @init: an init function + * @data: arbitrary context sensitive data * * Returns either %NULL on allocation failure, or a subprocess_info * structure. This should be passed to call_usermodehelper_exec to * exec the process and free the structure. + * + * The init function is used to customize the helper process prior to + * exec. A non-zero return code causes the process to error out, exit, + * and return the failure to the calling process + * + * The cleanup function is just before ethe subprocess_info is about to + * be freed. This can be used for freeing the argv and envp. The + * Function must be runnable in either a process context or the + * context in which call_usermodehelper_exec is called. */ -static struct subprocess_info *call_usermodehelper_setup(char *path, char **argv, - char **envp, gfp_t gfp_mask) + char **envp, gfp_t gfp_mask, + int (*init)(struct subprocess_info *info, struct cred *new), + void (*cleanup)(struct subprocess_info *info), + void *data) { struct subprocess_info *sub_info; sub_info = kzalloc(sizeof(struct subprocess_info), gfp_mask); @@ -520,50 +542,27 @@ struct subprocess_info *call_usermodehelper_setup(char *path, char **argv, sub_info->path = path; sub_info->argv = argv; sub_info->envp = envp; + + sub_info->cleanup = cleanup; + sub_info->init = init; + sub_info->data = data; out: return sub_info; } - -/** - * call_usermodehelper_setfns - set a cleanup/init function - * @info: a subprocess_info returned by call_usermodehelper_setup - * @cleanup: a cleanup function - * @init: an init function - * @data: arbitrary context sensitive data - * - * The init function is used to customize the helper process prior to - * exec. A non-zero return code causes the process to error out, exit, - * and return the failure to the calling process - * - * The cleanup function is just before ethe subprocess_info is about to - * be freed. This can be used for freeing the argv and envp. The - * Function must be runnable in either a process context or the - * context in which call_usermodehelper_exec is called. - */ -static -void call_usermodehelper_setfns(struct subprocess_info *info, - int (*init)(struct subprocess_info *info, struct cred *new), - void (*cleanup)(struct subprocess_info *info), - void *data) -{ - info->cleanup = cleanup; - info->init = init; - info->data = data; -} +EXPORT_SYMBOL(call_usermodehelper_setup); /** * call_usermodehelper_exec - start a usermode application * @sub_info: information about the subprocessa * @wait: wait for the application to finish and return status. - * when -1 don't wait at all, but you get no useful error back when - * the program couldn't be exec'ed. This makes it safe to call + * when UMH_NO_WAIT don't wait at all, but you get no useful error back + * when the program couldn't be exec'ed. This makes it safe to call * from interrupt context. * * Runs a user-space application. The application is started * asynchronously if wait is not set, and runs as a child of keventd. * (ie. it runs with full root capabilities). */ -static int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) { DECLARE_COMPLETION_ONSTACK(done); @@ -615,31 +614,34 @@ unlock: helper_unlock(); return retval; } +EXPORT_SYMBOL(call_usermodehelper_exec); -/* - * call_usermodehelper_fns() will not run the caller-provided cleanup function - * if a memory allocation failure is experienced. So the caller might need to - * check the call_usermodehelper_fns() return value: if it is -ENOMEM, perform - * the necessaary cleanup within the caller. +/** + * call_usermodehelper() - prepare and start a usermode application + * @path: path to usermode executable + * @argv: arg vector for process + * @envp: environment for process + * @wait: wait for the application to finish and return status. + * when UMH_NO_WAIT don't wait at all, but you get no useful error back + * when the program couldn't be exec'ed. This makes it safe to call + * from interrupt context. + * + * This function is the equivalent to use call_usermodehelper_setup() and + * call_usermodehelper_exec(). */ -int call_usermodehelper_fns( - char *path, char **argv, char **envp, int wait, - int (*init)(struct subprocess_info *info, struct cred *new), - void (*cleanup)(struct subprocess_info *), void *data) +int call_usermodehelper(char *path, char **argv, char **envp, int wait) { struct subprocess_info *info; gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL; - info = call_usermodehelper_setup(path, argv, envp, gfp_mask); - + info = call_usermodehelper_setup(path, argv, envp, gfp_mask, + NULL, NULL, NULL); if (info == NULL) return -ENOMEM; - call_usermodehelper_setfns(info, init, cleanup, data); - return call_usermodehelper_exec(info, wait); } -EXPORT_SYMBOL(call_usermodehelper_fns); +EXPORT_SYMBOL(call_usermodehelper); static int proc_cap_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) diff --git a/kernel/kthread.c b/kernel/kthread.c index d6b72880d0c7..760e86df8c20 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -17,6 +17,7 @@ #include <linux/slab.h> #include <linux/freezer.h> #include <linux/ptrace.h> +#include <linux/uaccess.h> #include <trace/events/sched.h> static DEFINE_SPINLOCK(kthread_create_lock); @@ -52,8 +53,21 @@ enum KTHREAD_BITS { KTHREAD_IS_PARKED, }; -#define to_kthread(tsk) \ - container_of((tsk)->vfork_done, struct kthread, exited) +#define __to_kthread(vfork) \ + container_of(vfork, struct kthread, exited) + +static inline struct kthread *to_kthread(struct task_struct *k) +{ + return __to_kthread(k->vfork_done); +} + +static struct kthread *to_live_kthread(struct task_struct *k) +{ + struct completion *vfork = ACCESS_ONCE(k->vfork_done); + if (likely(vfork)) + return __to_kthread(vfork); + return NULL; +} /** * kthread_should_stop - should this kthread return now? @@ -122,6 +136,24 @@ void *kthread_data(struct task_struct *task) return to_kthread(task)->data; } +/** + * probe_kthread_data - speculative version of kthread_data() + * @task: possible kthread task in question + * + * @task could be a kthread task. Return the data value specified when it + * was created if accessible. If @task isn't a kthread task or its data is + * inaccessible for any reason, %NULL is returned. This function requires + * that @task itself is safe to dereference. + */ +void *probe_kthread_data(struct task_struct *task) +{ + struct kthread *kthread = to_kthread(task); + void *data = NULL; + + probe_kernel_read(&data, &kthread->data, sizeof(data)); + return data; +} + static void __kthread_parkme(struct kthread *self) { __set_current_state(TASK_PARKED); @@ -311,19 +343,6 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data), return p; } -static struct kthread *task_get_live_kthread(struct task_struct *k) -{ - struct kthread *kthread; - - get_task_struct(k); - kthread = to_kthread(k); - /* It might have exited */ - barrier(); - if (k->vfork_done != NULL) - return kthread; - return NULL; -} - static void __kthread_unpark(struct task_struct *k, struct kthread *kthread) { clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); @@ -350,11 +369,10 @@ static void __kthread_unpark(struct task_struct *k, struct kthread *kthread) */ void kthread_unpark(struct task_struct *k) { - struct kthread *kthread = task_get_live_kthread(k); + struct kthread *kthread = to_live_kthread(k); if (kthread) __kthread_unpark(k, kthread); - put_task_struct(k); } /** @@ -371,7 +389,7 @@ void kthread_unpark(struct task_struct *k) */ int kthread_park(struct task_struct *k) { - struct kthread *kthread = task_get_live_kthread(k); + struct kthread *kthread = to_live_kthread(k); int ret = -ENOSYS; if (kthread) { @@ -384,7 +402,6 @@ int kthread_park(struct task_struct *k) } ret = 0; } - put_task_struct(k); return ret; } @@ -405,10 +422,13 @@ int kthread_park(struct task_struct *k) */ int kthread_stop(struct task_struct *k) { - struct kthread *kthread = task_get_live_kthread(k); + struct kthread *kthread; int ret; trace_sched_kthread_stop(k); + + get_task_struct(k); + kthread = to_live_kthread(k); if (kthread) { set_bit(KTHREAD_SHOULD_STOP, &kthread->flags); __kthread_unpark(k, kthread); @@ -416,10 +436,9 @@ int kthread_stop(struct task_struct *k) wait_for_completion(&kthread->exited); } ret = k->exit_code; - put_task_struct(k); - trace_sched_kthread_stop_ret(ret); + trace_sched_kthread_stop_ret(ret); return ret; } EXPORT_SYMBOL(kthread_stop); diff --git a/kernel/lglock.c b/kernel/lglock.c index 6535a667a5a7..86ae2aebf004 100644 --- a/kernel/lglock.c +++ b/kernel/lglock.c @@ -21,7 +21,7 @@ void lg_local_lock(struct lglock *lg) arch_spinlock_t *lock; preempt_disable(); - rwlock_acquire_read(&lg->lock_dep_map, 0, 0, _RET_IP_); + lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_); lock = this_cpu_ptr(lg->lock); arch_spin_lock(lock); } @@ -31,7 +31,7 @@ void lg_local_unlock(struct lglock *lg) { arch_spinlock_t *lock; - rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); + lock_release(&lg->lock_dep_map, 1, _RET_IP_); lock = this_cpu_ptr(lg->lock); arch_spin_unlock(lock); preempt_enable(); @@ -43,7 +43,7 @@ void lg_local_lock_cpu(struct lglock *lg, int cpu) arch_spinlock_t *lock; preempt_disable(); - rwlock_acquire_read(&lg->lock_dep_map, 0, 0, _RET_IP_); + lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_); lock = per_cpu_ptr(lg->lock, cpu); arch_spin_lock(lock); } @@ -53,7 +53,7 @@ void lg_local_unlock_cpu(struct lglock *lg, int cpu) { arch_spinlock_t *lock; - rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); + lock_release(&lg->lock_dep_map, 1, _RET_IP_); lock = per_cpu_ptr(lg->lock, cpu); arch_spin_unlock(lock); preempt_enable(); @@ -65,7 +65,7 @@ void lg_global_lock(struct lglock *lg) int i; preempt_disable(); - rwlock_acquire(&lg->lock_dep_map, 0, 0, _RET_IP_); + lock_acquire_exclusive(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_); for_each_possible_cpu(i) { arch_spinlock_t *lock; lock = per_cpu_ptr(lg->lock, i); @@ -78,7 +78,7 @@ void lg_global_unlock(struct lglock *lg) { int i; - rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); + lock_release(&lg->lock_dep_map, 1, _RET_IP_); for_each_possible_cpu(i) { arch_spinlock_t *lock; lock = per_cpu_ptr(lg->lock, i); diff --git a/kernel/panic.c b/kernel/panic.c index 7c57cc9eee2c..167ec097ce8b 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -22,7 +22,6 @@ #include <linux/sysrq.h> #include <linux/init.h> #include <linux/nmi.h> -#include <linux/dmi.h> #define PANIC_TIMER_STEP 100 #define PANIC_BLINK_SPD 18 @@ -400,13 +399,8 @@ struct slowpath_args { static void warn_slowpath_common(const char *file, int line, void *caller, unsigned taint, struct slowpath_args *args) { - const char *board; - printk(KERN_WARNING "------------[ cut here ]------------\n"); printk(KERN_WARNING "WARNING: at %s:%d %pS()\n", file, line, caller); - board = dmi_get_system_info(DMI_PRODUCT_NAME); - if (board) - printk(KERN_WARNING "Hardware name: %s\n", board); if (args) vprintk(args->fmt, args->args); diff --git a/kernel/pid.c b/kernel/pid.c index 047dc6264638..6283d6412aff 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -51,9 +51,6 @@ int pid_max = PID_MAX_DEFAULT; int pid_max_min = RESERVED_PIDS + 1; int pid_max_max = PID_MAX_LIMIT; -#define BITS_PER_PAGE (PAGE_SIZE*8) -#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) - static inline int mk_pid(struct pid_namespace *pid_ns, struct pidmap *map, int off) { @@ -183,15 +180,19 @@ static int alloc_pidmap(struct pid_namespace *pid_ns) break; } if (likely(atomic_read(&map->nr_free))) { - do { + for ( ; ; ) { if (!test_and_set_bit(offset, map->page)) { atomic_dec(&map->nr_free); set_last_pid(pid_ns, last, pid); return pid; } offset = find_next_offset(map, offset); + if (offset >= BITS_PER_PAGE) + break; pid = mk_pid(pid_ns, map, offset); - } while (offset < BITS_PER_PAGE && pid < pid_max); + if (pid >= pid_max) + break; + } } if (map < &pid_ns->pidmap[(pid_max-1)/BITS_PER_PAGE]) { ++map; diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index bea15bdf82b0..69473c4a653f 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -19,8 +19,6 @@ #include <linux/reboot.h> #include <linux/export.h> -#define BITS_PER_PAGE (PAGE_SIZE*8) - struct pid_cache { int nr_ids; char name[16]; diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 8fd709c9bb58..877439bfcbee 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -49,59 +49,28 @@ static int check_clock(const clockid_t which_clock) return error; } -static inline union cpu_time_count +static inline unsigned long long timespec_to_sample(const clockid_t which_clock, const struct timespec *tp) { - union cpu_time_count ret; - ret.sched = 0; /* high half always zero when .cpu used */ + unsigned long long ret; + + ret = 0; /* high half always zero when .cpu used */ if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { - ret.sched = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec; + ret = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec; } else { - ret.cpu = timespec_to_cputime(tp); + ret = cputime_to_expires(timespec_to_cputime(tp)); } return ret; } static void sample_to_timespec(const clockid_t which_clock, - union cpu_time_count cpu, + unsigned long long expires, struct timespec *tp) { if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) - *tp = ns_to_timespec(cpu.sched); + *tp = ns_to_timespec(expires); else - cputime_to_timespec(cpu.cpu, tp); -} - -static inline int cpu_time_before(const clockid_t which_clock, - union cpu_time_count now, - union cpu_time_count then) -{ - if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { - return now.sched < then.sched; - } else { - return now.cpu < then.cpu; - } -} -static inline void cpu_time_add(const clockid_t which_clock, - union cpu_time_count *acc, - union cpu_time_count val) -{ - if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { - acc->sched += val.sched; - } else { - acc->cpu += val.cpu; - } -} -static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock, - union cpu_time_count a, - union cpu_time_count b) -{ - if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { - a.sched -= b.sched; - } else { - a.cpu -= b.cpu; - } - return a; + cputime_to_timespec((__force cputime_t)expires, tp); } /* @@ -109,65 +78,49 @@ static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock, * given the current clock sample. */ static void bump_cpu_timer(struct k_itimer *timer, - union cpu_time_count now) + unsigned long long now) { int i; + unsigned long long delta, incr; - if (timer->it.cpu.incr.sched == 0) + if (timer->it.cpu.incr == 0) return; - if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) { - unsigned long long delta, incr; + if (now < timer->it.cpu.expires) + return; - if (now.sched < timer->it.cpu.expires.sched) - return; - incr = timer->it.cpu.incr.sched; - delta = now.sched + incr - timer->it.cpu.expires.sched; - /* Don't use (incr*2 < delta), incr*2 might overflow. */ - for (i = 0; incr < delta - incr; i++) - incr = incr << 1; - for (; i >= 0; incr >>= 1, i--) { - if (delta < incr) - continue; - timer->it.cpu.expires.sched += incr; - timer->it_overrun += 1 << i; - delta -= incr; - } - } else { - cputime_t delta, incr; + incr = timer->it.cpu.incr; + delta = now + incr - timer->it.cpu.expires; - if (now.cpu < timer->it.cpu.expires.cpu) - return; - incr = timer->it.cpu.incr.cpu; - delta = now.cpu + incr - timer->it.cpu.expires.cpu; - /* Don't use (incr*2 < delta), incr*2 might overflow. */ - for (i = 0; incr < delta - incr; i++) - incr += incr; - for (; i >= 0; incr = incr >> 1, i--) { - if (delta < incr) - continue; - timer->it.cpu.expires.cpu += incr; - timer->it_overrun += 1 << i; - delta -= incr; - } + /* Don't use (incr*2 < delta), incr*2 might overflow. */ + for (i = 0; incr < delta - incr; i++) + incr = incr << 1; + + for (; i >= 0; incr >>= 1, i--) { + if (delta < incr) + continue; + + timer->it.cpu.expires += incr; + timer->it_overrun += 1 << i; + delta -= incr; } } -static inline cputime_t prof_ticks(struct task_struct *p) +static inline unsigned long long prof_ticks(struct task_struct *p) { cputime_t utime, stime; task_cputime(p, &utime, &stime); - return utime + stime; + return cputime_to_expires(utime + stime); } -static inline cputime_t virt_ticks(struct task_struct *p) +static inline unsigned long long virt_ticks(struct task_struct *p) { cputime_t utime; task_cputime(p, &utime, NULL); - return utime; + return cputime_to_expires(utime); } static int @@ -208,19 +161,19 @@ posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp) * Sample a per-thread clock for the given task. */ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p, - union cpu_time_count *cpu) + unsigned long long *sample) { switch (CPUCLOCK_WHICH(which_clock)) { default: return -EINVAL; case CPUCLOCK_PROF: - cpu->cpu = prof_ticks(p); + *sample = prof_ticks(p); break; case CPUCLOCK_VIRT: - cpu->cpu = virt_ticks(p); + *sample = virt_ticks(p); break; case CPUCLOCK_SCHED: - cpu->sched = task_sched_runtime(p); + *sample = task_sched_runtime(p); break; } return 0; @@ -267,7 +220,7 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) */ static int cpu_clock_sample_group(const clockid_t which_clock, struct task_struct *p, - union cpu_time_count *cpu) + unsigned long long *sample) { struct task_cputime cputime; @@ -276,15 +229,15 @@ static int cpu_clock_sample_group(const clockid_t which_clock, return -EINVAL; case CPUCLOCK_PROF: thread_group_cputime(p, &cputime); - cpu->cpu = cputime.utime + cputime.stime; + *sample = cputime_to_expires(cputime.utime + cputime.stime); break; case CPUCLOCK_VIRT: thread_group_cputime(p, &cputime); - cpu->cpu = cputime.utime; + *sample = cputime_to_expires(cputime.utime); break; case CPUCLOCK_SCHED: thread_group_cputime(p, &cputime); - cpu->sched = cputime.sum_exec_runtime; + *sample = cputime.sum_exec_runtime; break; } return 0; @@ -295,7 +248,7 @@ static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) { const pid_t pid = CPUCLOCK_PID(which_clock); int error = -EINVAL; - union cpu_time_count rtn; + unsigned long long rtn; if (pid == 0) { /* @@ -429,6 +382,15 @@ static int posix_cpu_timer_del(struct k_itimer *timer) return ret; } +static void cleanup_timers_list(struct list_head *head, + unsigned long long curr) +{ + struct cpu_timer_list *timer, *next; + + list_for_each_entry_safe(timer, next, head, entry) + list_del_init(&timer->entry); +} + /* * Clean out CPU timers still ticking when a thread exited. The task * pointer is cleared, and the expiry time is replaced with the residual @@ -439,37 +401,12 @@ static void cleanup_timers(struct list_head *head, cputime_t utime, cputime_t stime, unsigned long long sum_exec_runtime) { - struct cpu_timer_list *timer, *next; - cputime_t ptime = utime + stime; - - list_for_each_entry_safe(timer, next, head, entry) { - list_del_init(&timer->entry); - if (timer->expires.cpu < ptime) { - timer->expires.cpu = 0; - } else { - timer->expires.cpu -= ptime; - } - } - ++head; - list_for_each_entry_safe(timer, next, head, entry) { - list_del_init(&timer->entry); - if (timer->expires.cpu < utime) { - timer->expires.cpu = 0; - } else { - timer->expires.cpu -= utime; - } - } + cputime_t ptime = utime + stime; - ++head; - list_for_each_entry_safe(timer, next, head, entry) { - list_del_init(&timer->entry); - if (timer->expires.sched < sum_exec_runtime) { - timer->expires.sched = 0; - } else { - timer->expires.sched -= sum_exec_runtime; - } - } + cleanup_timers_list(head, cputime_to_expires(ptime)); + cleanup_timers_list(++head, cputime_to_expires(utime)); + cleanup_timers_list(++head, sum_exec_runtime); } /* @@ -499,17 +436,21 @@ void posix_cpu_timers_exit_group(struct task_struct *tsk) tsk->se.sum_exec_runtime + sig->sum_sched_runtime); } -static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now) +static void clear_dead_task(struct k_itimer *itimer, unsigned long long now) { + struct cpu_timer_list *timer = &itimer->it.cpu; + /* * That's all for this thread or process. * We leave our residual in expires to be reported. */ - put_task_struct(timer->it.cpu.task); - timer->it.cpu.task = NULL; - timer->it.cpu.expires = cpu_time_sub(timer->it_clock, - timer->it.cpu.expires, - now); + put_task_struct(timer->task); + timer->task = NULL; + if (timer->expires < now) { + timer->expires = 0; + } else { + timer->expires -= now; + } } static inline int expires_gt(cputime_t expires, cputime_t new_exp) @@ -541,14 +482,14 @@ static void arm_timer(struct k_itimer *timer) listpos = head; list_for_each_entry(next, head, entry) { - if (cpu_time_before(timer->it_clock, nt->expires, next->expires)) + if (nt->expires < next->expires) break; listpos = &next->entry; } list_add(&nt->entry, listpos); if (listpos == head) { - union cpu_time_count *exp = &nt->expires; + unsigned long long exp = nt->expires; /* * We are the new earliest-expiring POSIX 1.b timer, hence @@ -559,17 +500,17 @@ static void arm_timer(struct k_itimer *timer) switch (CPUCLOCK_WHICH(timer->it_clock)) { case CPUCLOCK_PROF: - if (expires_gt(cputime_expires->prof_exp, exp->cpu)) - cputime_expires->prof_exp = exp->cpu; + if (expires_gt(cputime_expires->prof_exp, expires_to_cputime(exp))) + cputime_expires->prof_exp = expires_to_cputime(exp); break; case CPUCLOCK_VIRT: - if (expires_gt(cputime_expires->virt_exp, exp->cpu)) - cputime_expires->virt_exp = exp->cpu; + if (expires_gt(cputime_expires->virt_exp, expires_to_cputime(exp))) + cputime_expires->virt_exp = expires_to_cputime(exp); break; case CPUCLOCK_SCHED: if (cputime_expires->sched_exp == 0 || - cputime_expires->sched_exp > exp->sched) - cputime_expires->sched_exp = exp->sched; + cputime_expires->sched_exp > exp) + cputime_expires->sched_exp = exp; break; } } @@ -584,20 +525,20 @@ static void cpu_timer_fire(struct k_itimer *timer) /* * User don't want any signal. */ - timer->it.cpu.expires.sched = 0; + timer->it.cpu.expires = 0; } else if (unlikely(timer->sigq == NULL)) { /* * This a special case for clock_nanosleep, * not a normal timer from sys_timer_create. */ wake_up_process(timer->it_process); - timer->it.cpu.expires.sched = 0; - } else if (timer->it.cpu.incr.sched == 0) { + timer->it.cpu.expires = 0; + } else if (timer->it.cpu.incr == 0) { /* * One-shot timer. Clear it as soon as it's fired. */ posix_timer_event(timer, 0); - timer->it.cpu.expires.sched = 0; + timer->it.cpu.expires = 0; } else if (posix_timer_event(timer, ++timer->it_requeue_pending)) { /* * The signal did not get queued because the signal @@ -615,7 +556,7 @@ static void cpu_timer_fire(struct k_itimer *timer) */ static int cpu_timer_sample_group(const clockid_t which_clock, struct task_struct *p, - union cpu_time_count *cpu) + unsigned long long *sample) { struct task_cputime cputime; @@ -624,13 +565,13 @@ static int cpu_timer_sample_group(const clockid_t which_clock, default: return -EINVAL; case CPUCLOCK_PROF: - cpu->cpu = cputime.utime + cputime.stime; + *sample = cputime_to_expires(cputime.utime + cputime.stime); break; case CPUCLOCK_VIRT: - cpu->cpu = cputime.utime; + *sample = cputime_to_expires(cputime.utime); break; case CPUCLOCK_SCHED: - cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p); + *sample = cputime.sum_exec_runtime + task_delta_exec(p); break; } return 0; @@ -646,7 +587,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, struct itimerspec *new, struct itimerspec *old) { struct task_struct *p = timer->it.cpu.task; - union cpu_time_count old_expires, new_expires, old_incr, val; + unsigned long long old_expires, new_expires, old_incr, val; int ret; if (unlikely(p == NULL)) { @@ -701,7 +642,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, } if (old) { - if (old_expires.sched == 0) { + if (old_expires == 0) { old->it_value.tv_sec = 0; old->it_value.tv_nsec = 0; } else { @@ -716,11 +657,8 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, * new setting. */ bump_cpu_timer(timer, val); - if (cpu_time_before(timer->it_clock, val, - timer->it.cpu.expires)) { - old_expires = cpu_time_sub( - timer->it_clock, - timer->it.cpu.expires, val); + if (val < timer->it.cpu.expires) { + old_expires = timer->it.cpu.expires - val; sample_to_timespec(timer->it_clock, old_expires, &old->it_value); @@ -743,8 +681,8 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, goto out; } - if (new_expires.sched != 0 && !(flags & TIMER_ABSTIME)) { - cpu_time_add(timer->it_clock, &new_expires, val); + if (new_expires != 0 && !(flags & TIMER_ABSTIME)) { + new_expires += val; } /* @@ -753,8 +691,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, * arm the timer (we'll just fake it for timer_gettime). */ timer->it.cpu.expires = new_expires; - if (new_expires.sched != 0 && - cpu_time_before(timer->it_clock, val, new_expires)) { + if (new_expires != 0 && val < new_expires) { arm_timer(timer); } @@ -778,8 +715,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, timer->it_overrun_last = 0; timer->it_overrun = -1; - if (new_expires.sched != 0 && - !cpu_time_before(timer->it_clock, val, new_expires)) { + if (new_expires != 0 && !(val < new_expires)) { /* * The designated time already passed, so we notify * immediately, even if the thread never runs to @@ -799,7 +735,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) { - union cpu_time_count now; + unsigned long long now; struct task_struct *p = timer->it.cpu.task; int clear_dead; @@ -809,7 +745,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) sample_to_timespec(timer->it_clock, timer->it.cpu.incr, &itp->it_interval); - if (timer->it.cpu.expires.sched == 0) { /* Timer not armed at all. */ + if (timer->it.cpu.expires == 0) { /* Timer not armed at all. */ itp->it_value.tv_sec = itp->it_value.tv_nsec = 0; return; } @@ -841,7 +777,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) */ put_task_struct(p); timer->it.cpu.task = NULL; - timer->it.cpu.expires.sched = 0; + timer->it.cpu.expires = 0; read_unlock(&tasklist_lock); goto dead; } else { @@ -862,10 +798,9 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) goto dead; } - if (cpu_time_before(timer->it_clock, now, timer->it.cpu.expires)) { + if (now < timer->it.cpu.expires) { sample_to_timespec(timer->it_clock, - cpu_time_sub(timer->it_clock, - timer->it.cpu.expires, now), + timer->it.cpu.expires - now, &itp->it_value); } else { /* @@ -877,6 +812,28 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) } } +static unsigned long long +check_timers_list(struct list_head *timers, + struct list_head *firing, + unsigned long long curr) +{ + int maxfire = 20; + + while (!list_empty(timers)) { + struct cpu_timer_list *t; + + t = list_first_entry(timers, struct cpu_timer_list, entry); + + if (!--maxfire || curr < t->expires) + return t->expires; + + t->firing = 1; + list_move_tail(&t->entry, firing); + } + + return 0; +} + /* * Check for any per-thread CPU timers that have fired and move them off * the tsk->cpu_timers[N] list onto the firing list. Here we update the @@ -885,54 +842,20 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) static void check_thread_timers(struct task_struct *tsk, struct list_head *firing) { - int maxfire; struct list_head *timers = tsk->cpu_timers; struct signal_struct *const sig = tsk->signal; + struct task_cputime *tsk_expires = &tsk->cputime_expires; + unsigned long long expires; unsigned long soft; - maxfire = 20; - tsk->cputime_expires.prof_exp = 0; - while (!list_empty(timers)) { - struct cpu_timer_list *t = list_first_entry(timers, - struct cpu_timer_list, - entry); - if (!--maxfire || prof_ticks(tsk) < t->expires.cpu) { - tsk->cputime_expires.prof_exp = t->expires.cpu; - break; - } - t->firing = 1; - list_move_tail(&t->entry, firing); - } + expires = check_timers_list(timers, firing, prof_ticks(tsk)); + tsk_expires->prof_exp = expires_to_cputime(expires); - ++timers; - maxfire = 20; - tsk->cputime_expires.virt_exp = 0; - while (!list_empty(timers)) { - struct cpu_timer_list *t = list_first_entry(timers, - struct cpu_timer_list, - entry); - if (!--maxfire || virt_ticks(tsk) < t->expires.cpu) { - tsk->cputime_expires.virt_exp = t->expires.cpu; - break; - } - t->firing = 1; - list_move_tail(&t->entry, firing); - } + expires = check_timers_list(++timers, firing, virt_ticks(tsk)); + tsk_expires->virt_exp = expires_to_cputime(expires); - ++timers; - maxfire = 20; - tsk->cputime_expires.sched_exp = 0; - while (!list_empty(timers)) { - struct cpu_timer_list *t = list_first_entry(timers, - struct cpu_timer_list, - entry); - if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) { - tsk->cputime_expires.sched_exp = t->expires.sched; - break; - } - t->firing = 1; - list_move_tail(&t->entry, firing); - } + tsk_expires->sched_exp = check_timers_list(++timers, firing, + tsk->se.sum_exec_runtime); /* * Check for the special case thread timers. @@ -980,7 +903,8 @@ static void stop_process_timers(struct signal_struct *sig) static u32 onecputick; static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, - cputime_t *expires, cputime_t cur_time, int signo) + unsigned long long *expires, + unsigned long long cur_time, int signo) { if (!it->expires) return; @@ -1031,9 +955,8 @@ static inline int task_cputime_zero(const struct task_cputime *cputime) static void check_process_timers(struct task_struct *tsk, struct list_head *firing) { - int maxfire; struct signal_struct *const sig = tsk->signal; - cputime_t utime, ptime, virt_expires, prof_expires; + unsigned long long utime, ptime, virt_expires, prof_expires; unsigned long long sum_sched_runtime, sched_expires; struct list_head *timers = sig->cpu_timers; struct task_cputime cputime; @@ -1043,52 +966,13 @@ static void check_process_timers(struct task_struct *tsk, * Collect the current process totals. */ thread_group_cputimer(tsk, &cputime); - utime = cputime.utime; - ptime = utime + cputime.stime; + utime = cputime_to_expires(cputime.utime); + ptime = utime + cputime_to_expires(cputime.stime); sum_sched_runtime = cputime.sum_exec_runtime; - maxfire = 20; - prof_expires = 0; - while (!list_empty(timers)) { - struct cpu_timer_list *tl = list_first_entry(timers, - struct cpu_timer_list, - entry); - if (!--maxfire || ptime < tl->expires.cpu) { - prof_expires = tl->expires.cpu; - break; - } - tl->firing = 1; - list_move_tail(&tl->entry, firing); - } - ++timers; - maxfire = 20; - virt_expires = 0; - while (!list_empty(timers)) { - struct cpu_timer_list *tl = list_first_entry(timers, - struct cpu_timer_list, - entry); - if (!--maxfire || utime < tl->expires.cpu) { - virt_expires = tl->expires.cpu; - break; - } - tl->firing = 1; - list_move_tail(&tl->entry, firing); - } - - ++timers; - maxfire = 20; - sched_expires = 0; - while (!list_empty(timers)) { - struct cpu_timer_list *tl = list_first_entry(timers, - struct cpu_timer_list, - entry); - if (!--maxfire || sum_sched_runtime < tl->expires.sched) { - sched_expires = tl->expires.sched; - break; - } - tl->firing = 1; - list_move_tail(&tl->entry, firing); - } + prof_expires = check_timers_list(timers, firing, ptime); + virt_expires = check_timers_list(++timers, firing, utime); + sched_expires = check_timers_list(++timers, firing, sum_sched_runtime); /* * Check for the special case process timers. @@ -1127,8 +1011,8 @@ static void check_process_timers(struct task_struct *tsk, } } - sig->cputime_expires.prof_exp = prof_expires; - sig->cputime_expires.virt_exp = virt_expires; + sig->cputime_expires.prof_exp = expires_to_cputime(prof_expires); + sig->cputime_expires.virt_exp = expires_to_cputime(virt_expires); sig->cputime_expires.sched_exp = sched_expires; if (task_cputime_zero(&sig->cputime_expires)) stop_process_timers(sig); @@ -1141,7 +1025,7 @@ static void check_process_timers(struct task_struct *tsk, void posix_cpu_timer_schedule(struct k_itimer *timer) { struct task_struct *p = timer->it.cpu.task; - union cpu_time_count now; + unsigned long long now; if (unlikely(p == NULL)) /* @@ -1170,7 +1054,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) */ put_task_struct(p); timer->it.cpu.task = p = NULL; - timer->it.cpu.expires.sched = 0; + timer->it.cpu.expires = 0; goto out_unlock; } else if (unlikely(p->exit_state) && thread_group_empty(p)) { /* @@ -1178,6 +1062,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) * not yet reaped. Take this opportunity to * drop our task ref. */ + cpu_timer_sample_group(timer->it_clock, p, &now); clear_dead_task(timer, now); goto out_unlock; } @@ -1345,7 +1230,7 @@ void run_posix_cpu_timers(struct task_struct *tsk) void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, cputime_t *newval, cputime_t *oldval) { - union cpu_time_count now; + unsigned long long now; BUG_ON(clock_idx == CPUCLOCK_SCHED); cpu_timer_sample_group(clock_idx, tsk, &now); @@ -1357,17 +1242,17 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, * it to be absolute. */ if (*oldval) { - if (*oldval <= now.cpu) { + if (*oldval <= now) { /* Just about to fire. */ *oldval = cputime_one_jiffy; } else { - *oldval -= now.cpu; + *oldval -= now; } } if (!*newval) return; - *newval += now.cpu; + *newval += now; } /* @@ -1415,7 +1300,7 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags, } while (!signal_pending(current)) { - if (timer.it.cpu.expires.sched == 0) { + if (timer.it.cpu.expires == 0) { /* * Our timer fired and was reset, below * deletion can not fail. diff --git a/kernel/printk.c b/kernel/printk.c index a0584ceb4b4c..a1bdf61d38ba 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -32,6 +32,7 @@ #include <linux/security.h> #include <linux/bootmem.h> #include <linux/memblock.h> +#include <linux/aio.h> #include <linux/syscalls.h> #include <linux/kexec.h> #include <linux/kdb.h> @@ -43,19 +44,13 @@ #include <linux/rculist.h> #include <linux/poll.h> #include <linux/irq_work.h> +#include <linux/utsname.h> #include <asm/uaccess.h> #define CREATE_TRACE_POINTS #include <trace/events/printk.h> -/* - * Architectures can override it: - */ -void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...) -{ -} - /* printk's without a loglevel use this.. */ #define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL @@ -368,6 +363,46 @@ static void log_store(int facility, int level, log_next_seq++; } +#ifdef CONFIG_SECURITY_DMESG_RESTRICT +int dmesg_restrict = 1; +#else +int dmesg_restrict; +#endif + +static int syslog_action_restricted(int type) +{ + if (dmesg_restrict) + return 1; + /* Unless restricted, we allow "read all" and "get buffer size" for everybody */ + return type != SYSLOG_ACTION_READ_ALL && type != SYSLOG_ACTION_SIZE_BUFFER; +} + +static int check_syslog_permissions(int type, bool from_file) +{ + /* + * If this is from /proc/kmsg and we've already opened it, then we've + * already done the capabilities checks at open time. + */ + if (from_file && type != SYSLOG_ACTION_OPEN) + goto ok; + + if (syslog_action_restricted(type)) { + if (capable(CAP_SYSLOG)) + goto ok; + /* For historical reasons, accept CAP_SYS_ADMIN too, with a warning */ + if (capable(CAP_SYS_ADMIN)) { + printk_once(KERN_WARNING "%s (%d): " + "Attempt to access syslog with CAP_SYS_ADMIN " + "but no CAP_SYSLOG (deprecated).\n", + current->comm, task_pid_nr(current)); + goto ok; + } + return -EPERM; + } +ok: + return security_syslog(type); +} + /* /dev/kmsg - userspace message inject/listen interface */ struct devkmsg_user { u64 seq; @@ -443,10 +478,16 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, char cont = '-'; size_t len; ssize_t ret; + int err; if (!user) return -EBADF; + err = check_syslog_permissions(SYSLOG_ACTION_READ_ALL, + SYSLOG_FROM_FILE); + if (err) + return err; + ret = mutex_lock_interruptible(&user->lock); if (ret) return ret; @@ -608,7 +649,8 @@ static unsigned int devkmsg_poll(struct file *file, poll_table *wait) /* return error when data has vanished underneath us */ if (user->seq < log_first_seq) ret = POLLIN|POLLRDNORM|POLLERR|POLLPRI; - ret = POLLIN|POLLRDNORM; + else + ret = POLLIN|POLLRDNORM; } raw_spin_unlock_irq(&logbuf_lock); @@ -624,7 +666,7 @@ static int devkmsg_open(struct inode *inode, struct file *file) if ((file->f_flags & O_ACCMODE) == O_WRONLY) return 0; - err = security_syslog(SYSLOG_ACTION_READ_ALL); + err = check_syslog_permissions(SYSLOG_ACTION_OPEN, SYSLOG_FROM_FILE); if (err) return err; @@ -817,45 +859,6 @@ static inline void boot_delay_msec(int level) } #endif -#ifdef CONFIG_SECURITY_DMESG_RESTRICT -int dmesg_restrict = 1; -#else -int dmesg_restrict; -#endif - -static int syslog_action_restricted(int type) -{ - if (dmesg_restrict) - return 1; - /* Unless restricted, we allow "read all" and "get buffer size" for everybody */ - return type != SYSLOG_ACTION_READ_ALL && type != SYSLOG_ACTION_SIZE_BUFFER; -} - -static int check_syslog_permissions(int type, bool from_file) -{ - /* - * If this is from /proc/kmsg and we've already opened it, then we've - * already done the capabilities checks at open time. - */ - if (from_file && type != SYSLOG_ACTION_OPEN) - return 0; - - if (syslog_action_restricted(type)) { - if (capable(CAP_SYSLOG)) - return 0; - /* For historical reasons, accept CAP_SYS_ADMIN too, with a warning */ - if (capable(CAP_SYS_ADMIN)) { - printk_once(KERN_WARNING "%s (%d): " - "Attempt to access syslog with CAP_SYS_ADMIN " - "but no CAP_SYSLOG (deprecated).\n", - current->comm, task_pid_nr(current)); - return 0; - } - return -EPERM; - } - return 0; -} - #if defined(CONFIG_PRINTK_TIME) static bool printk_time = 1; #else @@ -1131,10 +1134,6 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) if (error) goto out; - error = security_syslog(type); - if (error) - return error; - switch (type) { case SYSLOG_ACTION_CLOSE: /* Close log */ break; @@ -1265,7 +1264,7 @@ static void call_console_drivers(int level, const char *text, size_t len) { struct console *con; - trace_console(text, 0, len, len); + trace_console(text, len); if (!console_drivers) return; @@ -1724,6 +1723,29 @@ static size_t cont_print_text(char *text, size_t size) { return 0; } #endif /* CONFIG_PRINTK */ +#ifdef CONFIG_EARLY_PRINTK +struct console *early_console; + +void early_vprintk(const char *fmt, va_list ap) +{ + if (early_console) { + char buf[512]; + int n = vscnprintf(buf, sizeof(buf), fmt, ap); + + early_console->write(early_console, buf, n); + } +} + +asmlinkage void early_printk(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + early_vprintk(fmt, ap); + va_end(ap); +} +#endif + static int __add_preferred_console(char *name, int idx, char *options, char *brl_options) { @@ -2833,4 +2855,65 @@ void kmsg_dump_rewind(struct kmsg_dumper *dumper) raw_spin_unlock_irqrestore(&logbuf_lock, flags); } EXPORT_SYMBOL_GPL(kmsg_dump_rewind); + +static char dump_stack_arch_desc_str[128]; + +/** + * dump_stack_set_arch_desc - set arch-specific str to show with task dumps + * @fmt: printf-style format string + * @...: arguments for the format string + * + * The configured string will be printed right after utsname during task + * dumps. Usually used to add arch-specific system identifiers. If an + * arch wants to make use of such an ID string, it should initialize this + * as soon as possible during boot. + */ +void __init dump_stack_set_arch_desc(const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vsnprintf(dump_stack_arch_desc_str, sizeof(dump_stack_arch_desc_str), + fmt, args); + va_end(args); +} + +/** + * dump_stack_print_info - print generic debug info for dump_stack() + * @log_lvl: log level + * + * Arch-specific dump_stack() implementations can use this function to + * print out the same debug information as the generic dump_stack(). + */ +void dump_stack_print_info(const char *log_lvl) +{ + printk("%sCPU: %d PID: %d Comm: %.20s %s %s %.*s\n", + log_lvl, raw_smp_processor_id(), current->pid, current->comm, + print_tainted(), init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); + + if (dump_stack_arch_desc_str[0] != '\0') + printk("%sHardware name: %s\n", + log_lvl, dump_stack_arch_desc_str); + + print_worker_info(log_lvl, current); +} + +/** + * show_regs_print_info - print generic debug info for show_regs() + * @log_lvl: log level + * + * show_regs() implementations can use this function to print out generic + * debug information. + */ +void show_regs_print_info(const char *log_lvl) +{ + dump_stack_print_info(log_lvl); + + printk("%stask: %p ti: %p task.ti: %p\n", + log_lvl, current, current_thread_info(), + task_thread_info(current)); +} + #endif diff --git a/kernel/ptrace.c b/kernel/ptrace.c index acbd28424d81..aed981a3f69c 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -17,6 +17,7 @@ #include <linux/ptrace.h> #include <linux/security.h> #include <linux/signal.h> +#include <linux/uio.h> #include <linux/audit.h> #include <linux/pid_namespace.h> #include <linux/syscalls.h> @@ -24,6 +25,7 @@ #include <linux/regset.h> #include <linux/hw_breakpoint.h> #include <linux/cn_proc.h> +#include <linux/compat.h> static int ptrace_trapping_sleep_fn(void *flags) @@ -618,6 +620,81 @@ static int ptrace_setsiginfo(struct task_struct *child, const siginfo_t *info) return error; } +static int ptrace_peek_siginfo(struct task_struct *child, + unsigned long addr, + unsigned long data) +{ + struct ptrace_peeksiginfo_args arg; + struct sigpending *pending; + struct sigqueue *q; + int ret, i; + + ret = copy_from_user(&arg, (void __user *) addr, + sizeof(struct ptrace_peeksiginfo_args)); + if (ret) + return -EFAULT; + + if (arg.flags & ~PTRACE_PEEKSIGINFO_SHARED) + return -EINVAL; /* unknown flags */ + + if (arg.nr < 0) + return -EINVAL; + + if (arg.flags & PTRACE_PEEKSIGINFO_SHARED) + pending = &child->signal->shared_pending; + else + pending = &child->pending; + + for (i = 0; i < arg.nr; ) { + siginfo_t info; + s32 off = arg.off + i; + + spin_lock_irq(&child->sighand->siglock); + list_for_each_entry(q, &pending->list, list) { + if (!off--) { + copy_siginfo(&info, &q->info); + break; + } + } + spin_unlock_irq(&child->sighand->siglock); + + if (off >= 0) /* beyond the end of the list */ + break; + +#ifdef CONFIG_COMPAT + if (unlikely(is_compat_task())) { + compat_siginfo_t __user *uinfo = compat_ptr(data); + + ret = copy_siginfo_to_user32(uinfo, &info); + ret |= __put_user(info.si_code, &uinfo->si_code); + } else +#endif + { + siginfo_t __user *uinfo = (siginfo_t __user *) data; + + ret = copy_siginfo_to_user(uinfo, &info); + ret |= __put_user(info.si_code, &uinfo->si_code); + } + + if (ret) { + ret = -EFAULT; + break; + } + + data += sizeof(siginfo_t); + i++; + + if (signal_pending(current)) + break; + + cond_resched(); + } + + if (i > 0) + return i; + + return ret; +} #ifdef PTRACE_SINGLESTEP #define is_singlestep(request) ((request) == PTRACE_SINGLESTEP) @@ -748,6 +825,10 @@ int ptrace_request(struct task_struct *child, long request, ret = put_user(child->ptrace_message, datalp); break; + case PTRACE_PEEKSIGINFO: + ret = ptrace_peek_siginfo(child, addr, data); + break; + case PTRACE_GETSIGINFO: ret = ptrace_getsiginfo(child, &siginfo); if (!ret) diff --git a/kernel/range.c b/kernel/range.c index 9b8ae2d6ed68..071b0ab455cb 100644 --- a/kernel/range.c +++ b/kernel/range.c @@ -97,7 +97,8 @@ void subtract_range(struct range *range, int az, u64 start, u64 end) range[i].end = range[j].end; range[i].start = end; } else { - printk(KERN_ERR "run of slot in ranges\n"); + pr_err("%s: run out of slot in ranges\n", + __func__); } range[j].end = start; continue; diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 2d5f94c1c7fb..d8534308fd05 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1441,7 +1441,7 @@ static int rcu_gp_init(struct rcu_state *rsp) rnp->grphi, rnp->qsmask); raw_spin_unlock_irq(&rnp->lock); #ifdef CONFIG_PROVE_RCU_DELAY - if ((random32() % (rcu_num_nodes * 8)) == 0 && + if ((prandom_u32() % (rcu_num_nodes * 8)) == 0 && system_state == SYSTEM_RUNNING) schedule_timeout_uninterruptible(2); #endif /* #ifdef CONFIG_PROVE_RCU_DELAY */ diff --git a/kernel/relay.c b/kernel/relay.c index 01ab081ac53a..eef0d113b79e 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -588,7 +588,7 @@ struct rchan *relay_open(const char *base_filename, chan->version = RELAYFS_CHANNEL_VERSION; chan->n_subbufs = n_subbufs; chan->subbuf_size = subbuf_size; - chan->alloc_size = FIX_SIZE(subbuf_size * n_subbufs); + chan->alloc_size = PAGE_ALIGN(subbuf_size * n_subbufs); chan->parent = parent; chan->private_data = private_data; if (base_filename) { @@ -1099,8 +1099,7 @@ static size_t relay_file_read_end_pos(struct rchan_buf *buf, static int subbuf_read_actor(size_t read_start, struct rchan_buf *buf, size_t avail, - read_descriptor_t *desc, - read_actor_t actor) + read_descriptor_t *desc) { void *from; int ret = 0; @@ -1121,15 +1120,13 @@ static int subbuf_read_actor(size_t read_start, typedef int (*subbuf_actor_t) (size_t read_start, struct rchan_buf *buf, size_t avail, - read_descriptor_t *desc, - read_actor_t actor); + read_descriptor_t *desc); /* * relay_file_read_subbufs - read count bytes, bridging subbuf boundaries */ static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos, subbuf_actor_t subbuf_actor, - read_actor_t actor, read_descriptor_t *desc) { struct rchan_buf *buf = filp->private_data; @@ -1150,7 +1147,7 @@ static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos, break; avail = min(desc->count, avail); - ret = subbuf_actor(read_start, buf, avail, desc, actor); + ret = subbuf_actor(read_start, buf, avail, desc); if (desc->error < 0) break; @@ -1174,8 +1171,7 @@ static ssize_t relay_file_read(struct file *filp, desc.count = count; desc.arg.buf = buffer; desc.error = 0; - return relay_file_read_subbufs(filp, ppos, subbuf_read_actor, - NULL, &desc); + return relay_file_read_subbufs(filp, ppos, subbuf_read_actor, &desc); } static void relay_consume_bytes(struct rchan_buf *rbuf, int bytes_consumed) diff --git a/kernel/resource.c b/kernel/resource.c index 73f35d4b30b9..4aef8867fd4b 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -706,24 +706,13 @@ void insert_resource_expand_to_fit(struct resource *root, struct resource *new) write_unlock(&resource_lock); } -/** - * adjust_resource - modify a resource's start and size - * @res: resource to modify - * @start: new start value - * @size: new size - * - * Given an existing resource, change its start and size to match the - * arguments. Returns 0 on success, -EBUSY if it can't fit. - * Existing children of the resource are assumed to be immutable. - */ -int adjust_resource(struct resource *res, resource_size_t start, resource_size_t size) +static int __adjust_resource(struct resource *res, resource_size_t start, + resource_size_t size) { struct resource *tmp, *parent = res->parent; resource_size_t end = start + size - 1; int result = -EBUSY; - write_lock(&resource_lock); - if (!parent) goto skip; @@ -751,6 +740,26 @@ skip: result = 0; out: + return result; +} + +/** + * adjust_resource - modify a resource's start and size + * @res: resource to modify + * @start: new start value + * @size: new size + * + * Given an existing resource, change its start and size to match the + * arguments. Returns 0 on success, -EBUSY if it can't fit. + * Existing children of the resource are assumed to be immutable. + */ +int adjust_resource(struct resource *res, resource_size_t start, + resource_size_t size) +{ + int result; + + write_lock(&resource_lock); + result = __adjust_resource(res, start, size); write_unlock(&resource_lock); return result; } @@ -1012,6 +1021,109 @@ void __release_region(struct resource *parent, resource_size_t start, } EXPORT_SYMBOL(__release_region); +#ifdef CONFIG_MEMORY_HOTREMOVE +/** + * release_mem_region_adjustable - release a previously reserved memory region + * @parent: parent resource descriptor + * @start: resource start address + * @size: resource region size + * + * This interface is intended for memory hot-delete. The requested region + * is released from a currently busy memory resource. The requested region + * must either match exactly or fit into a single busy resource entry. In + * the latter case, the remaining resource is adjusted accordingly. + * Existing children of the busy memory resource must be immutable in the + * request. + * + * Note: + * - Additional release conditions, such as overlapping region, can be + * supported after they are confirmed as valid cases. + * - When a busy memory resource gets split into two entries, the code + * assumes that all children remain in the lower address entry for + * simplicity. Enhance this logic when necessary. + */ +int release_mem_region_adjustable(struct resource *parent, + resource_size_t start, resource_size_t size) +{ + struct resource **p; + struct resource *res; + struct resource *new_res; + resource_size_t end; + int ret = -EINVAL; + + end = start + size - 1; + if ((start < parent->start) || (end > parent->end)) + return ret; + + /* The kzalloc() result gets checked later */ + new_res = kzalloc(sizeof(struct resource), GFP_KERNEL); + + p = &parent->child; + write_lock(&resource_lock); + + while ((res = *p)) { + if (res->start >= end) + break; + + /* look for the next resource if it does not fit into */ + if (res->start > start || res->end < end) { + p = &res->sibling; + continue; + } + + if (!(res->flags & IORESOURCE_MEM)) + break; + + if (!(res->flags & IORESOURCE_BUSY)) { + p = &res->child; + continue; + } + + /* found the target resource; let's adjust accordingly */ + if (res->start == start && res->end == end) { + /* free the whole entry */ + *p = res->sibling; + kfree(res); + ret = 0; + } else if (res->start == start && res->end != end) { + /* adjust the start */ + ret = __adjust_resource(res, end + 1, + res->end - end); + } else if (res->start != start && res->end == end) { + /* adjust the end */ + ret = __adjust_resource(res, res->start, + start - res->start); + } else { + /* split into two entries */ + if (!new_res) { + ret = -ENOMEM; + break; + } + new_res->name = res->name; + new_res->start = end + 1; + new_res->end = res->end; + new_res->flags = res->flags; + new_res->parent = res->parent; + new_res->sibling = res->sibling; + new_res->child = NULL; + + ret = __adjust_resource(res, res->start, + start - res->start); + if (ret) + break; + res->sibling = new_res; + new_res = NULL; + } + + break; + } + + write_unlock(&resource_lock); + kfree(new_res); + return ret; +} +#endif /* CONFIG_MEMORY_HOTREMOVE */ + /* * Managed region resource */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index d1110752b48c..7a6cc81c3db1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4649,6 +4649,7 @@ void sched_show_task(struct task_struct *p) task_pid_nr(p), ppid, (unsigned long)task_thread_info(p)->flags); + print_worker_info(KERN_INFO, p); show_stack(p, NULL); } diff --git a/kernel/semaphore.c b/kernel/semaphore.c index 4567fc020fe3..6815171a4fff 100644 --- a/kernel/semaphore.c +++ b/kernel/semaphore.c @@ -193,7 +193,7 @@ EXPORT_SYMBOL(up); struct semaphore_waiter { struct list_head list; struct task_struct *task; - int up; + bool up; }; /* @@ -209,12 +209,12 @@ static inline int __sched __down_common(struct semaphore *sem, long state, list_add_tail(&waiter.list, &sem->wait_list); waiter.task = task; - waiter.up = 0; + waiter.up = false; for (;;) { if (signal_pending_state(state, task)) goto interrupted; - if (timeout <= 0) + if (unlikely(timeout <= 0)) goto timed_out; __set_task_state(task, state); raw_spin_unlock_irq(&sem->lock); @@ -258,6 +258,6 @@ static noinline void __sched __up(struct semaphore *sem) struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list, struct semaphore_waiter, list); list_del(&waiter->list); - waiter->up = 1; + waiter->up = true; wake_up_process(waiter->task); } diff --git a/kernel/signal.c b/kernel/signal.c index 06ff7764ab7c..113411bfe8b1 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -855,12 +855,14 @@ static void ptrace_trap_notify(struct task_struct *t) * Returns true if the signal should be actually delivered, otherwise * it should be dropped. */ -static int prepare_signal(int sig, struct task_struct *p, bool force) +static bool prepare_signal(int sig, struct task_struct *p, bool force) { struct signal_struct *signal = p->signal; struct task_struct *t; - if (unlikely(signal->flags & SIGNAL_GROUP_EXIT)) { + if (signal->flags & (SIGNAL_GROUP_EXIT | SIGNAL_GROUP_COREDUMP)) { + if (signal->flags & SIGNAL_GROUP_COREDUMP) + return sig == SIGKILL; /* * The process is in the middle of dying, nothing to do. */ @@ -1161,8 +1163,7 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t, static void print_fatal_signal(int signr) { struct pt_regs *regs = signal_pt_regs(); - printk(KERN_INFO "%s/%d: potentially unexpected fatal signal %d.\n", - current->comm, task_pid_nr(current), signr); + printk(KERN_INFO "potentially unexpected fatal signal %d.\n", signr); #if defined(__i386__) && !defined(__arch_um__) printk(KERN_INFO "code at %08lx: ", regs->ip); diff --git a/kernel/smp.c b/kernel/smp.c index 8e451f3ff51b..31670c8d8f89 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -12,6 +12,7 @@ #include <linux/gfp.h> #include <linux/smp.h> #include <linux/cpu.h> +#include <linux/hardirq.h> #include "smpboot.h" @@ -100,16 +101,16 @@ void __init call_function_init(void) * previous function call. For multi-cpu calls its even more interesting * as we'll have to ensure no other cpu is observing our csd. */ -static void csd_lock_wait(struct call_single_data *data) +static void csd_lock_wait(struct call_single_data *csd) { - while (data->flags & CSD_FLAG_LOCK) + while (csd->flags & CSD_FLAG_LOCK) cpu_relax(); } -static void csd_lock(struct call_single_data *data) +static void csd_lock(struct call_single_data *csd) { - csd_lock_wait(data); - data->flags = CSD_FLAG_LOCK; + csd_lock_wait(csd); + csd->flags = CSD_FLAG_LOCK; /* * prevent CPU from reordering the above assignment @@ -119,16 +120,16 @@ static void csd_lock(struct call_single_data *data) smp_mb(); } -static void csd_unlock(struct call_single_data *data) +static void csd_unlock(struct call_single_data *csd) { - WARN_ON(!(data->flags & CSD_FLAG_LOCK)); + WARN_ON(!(csd->flags & CSD_FLAG_LOCK)); /* * ensure we're all done before releasing data: */ smp_mb(); - data->flags &= ~CSD_FLAG_LOCK; + csd->flags &= ~CSD_FLAG_LOCK; } /* @@ -137,7 +138,7 @@ static void csd_unlock(struct call_single_data *data) * ->func, ->info, and ->flags set. */ static -void generic_exec_single(int cpu, struct call_single_data *data, int wait) +void generic_exec_single(int cpu, struct call_single_data *csd, int wait) { struct call_single_queue *dst = &per_cpu(call_single_queue, cpu); unsigned long flags; @@ -145,7 +146,7 @@ void generic_exec_single(int cpu, struct call_single_data *data, int wait) raw_spin_lock_irqsave(&dst->lock, flags); ipi = list_empty(&dst->list); - list_add_tail(&data->list, &dst->list); + list_add_tail(&csd->list, &dst->list); raw_spin_unlock_irqrestore(&dst->lock, flags); /* @@ -163,7 +164,7 @@ void generic_exec_single(int cpu, struct call_single_data *data, int wait) arch_send_call_function_single_ipi(cpu); if (wait) - csd_lock_wait(data); + csd_lock_wait(csd); } /* @@ -173,7 +174,6 @@ void generic_exec_single(int cpu, struct call_single_data *data, int wait) void generic_smp_call_function_single_interrupt(void) { struct call_single_queue *q = &__get_cpu_var(call_single_queue); - unsigned int data_flags; LIST_HEAD(list); /* @@ -186,25 +186,26 @@ void generic_smp_call_function_single_interrupt(void) raw_spin_unlock(&q->lock); while (!list_empty(&list)) { - struct call_single_data *data; + struct call_single_data *csd; + unsigned int csd_flags; - data = list_entry(list.next, struct call_single_data, list); - list_del(&data->list); + csd = list_entry(list.next, struct call_single_data, list); + list_del(&csd->list); /* - * 'data' can be invalid after this call if flags == 0 + * 'csd' can be invalid after this call if flags == 0 * (when called through generic_exec_single()), * so save them away before making the call: */ - data_flags = data->flags; + csd_flags = csd->flags; - data->func(data->info); + csd->func(csd->info); /* * Unlocked CSDs are valid through generic_exec_single(): */ - if (data_flags & CSD_FLAG_LOCK) - csd_unlock(data); + if (csd_flags & CSD_FLAG_LOCK) + csd_unlock(csd); } } @@ -240,8 +241,9 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info, * send smp call function interrupt to this cpu and as such deadlocks * can't happen. */ - WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled() - && !oops_in_progress); + WARN_ON_ONCE(cpu_online(this_cpu) + && (irqs_disabled() || in_serving_irq()) + && !oops_in_progress); if (cpu == this_cpu) { local_irq_save(flags); @@ -249,16 +251,16 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info, local_irq_restore(flags); } else { if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) { - struct call_single_data *data = &d; + struct call_single_data *csd = &d; if (!wait) - data = &__get_cpu_var(csd_data); + csd = &__get_cpu_var(csd_data); - csd_lock(data); + csd_lock(csd); - data->func = func; - data->info = info; - generic_exec_single(cpu, data, wait); + csd->func = func; + csd->info = info; + generic_exec_single(cpu, csd, wait); } else { err = -ENXIO; /* CPU not online */ } @@ -325,7 +327,7 @@ EXPORT_SYMBOL_GPL(smp_call_function_any); * pre-allocated data structure. Useful for embedding @data inside * other structures, for instance. */ -void __smp_call_function_single(int cpu, struct call_single_data *data, +void __smp_call_function_single(int cpu, struct call_single_data *csd, int wait) { unsigned int this_cpu; @@ -343,11 +345,11 @@ void __smp_call_function_single(int cpu, struct call_single_data *data, if (cpu == this_cpu) { local_irq_save(flags); - data->func(data->info); + csd->func(csd->info); local_irq_restore(flags); } else { - csd_lock(data); - generic_exec_single(cpu, data, wait); + csd_lock(csd); + generic_exec_single(cpu, csd, wait); } put_cpu(); } @@ -369,7 +371,7 @@ void __smp_call_function_single(int cpu, struct call_single_data *data, void smp_call_function_many(const struct cpumask *mask, smp_call_func_t func, void *info, bool wait) { - struct call_function_data *data; + struct call_function_data *cfd; int cpu, next_cpu, this_cpu = smp_processor_id(); /* @@ -378,8 +380,9 @@ void smp_call_function_many(const struct cpumask *mask, * send smp call function interrupt to this cpu and as such deadlocks * can't happen. */ - WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled() - && !oops_in_progress && !early_boot_irqs_disabled); + WARN_ON_ONCE(cpu_online(this_cpu) + && (irqs_disabled() || in_serving_irq()) + && !oops_in_progress && !early_boot_irqs_disabled); /* Try to fastpath. So, what's a CPU they want? Ignoring this one. */ cpu = cpumask_first_and(mask, cpu_online_mask); @@ -401,24 +404,24 @@ void smp_call_function_many(const struct cpumask *mask, return; } - data = &__get_cpu_var(cfd_data); + cfd = &__get_cpu_var(cfd_data); - cpumask_and(data->cpumask, mask, cpu_online_mask); - cpumask_clear_cpu(this_cpu, data->cpumask); + cpumask_and(cfd->cpumask, mask, cpu_online_mask); + cpumask_clear_cpu(this_cpu, cfd->cpumask); /* Some callers race with other cpus changing the passed mask */ - if (unlikely(!cpumask_weight(data->cpumask))) + if (unlikely(!cpumask_weight(cfd->cpumask))) return; /* - * After we put an entry into the list, data->cpumask - * may be cleared again when another CPU sends another IPI for - * a SMP function call, so data->cpumask will be zero. + * After we put an entry into the list, cfd->cpumask may be cleared + * again when another CPU sends another IPI for a SMP function call, so + * cfd->cpumask will be zero. */ - cpumask_copy(data->cpumask_ipi, data->cpumask); + cpumask_copy(cfd->cpumask_ipi, cfd->cpumask); - for_each_cpu(cpu, data->cpumask) { - struct call_single_data *csd = per_cpu_ptr(data->csd, cpu); + for_each_cpu(cpu, cfd->cpumask) { + struct call_single_data *csd = per_cpu_ptr(cfd->csd, cpu); struct call_single_queue *dst = &per_cpu(call_single_queue, cpu); unsigned long flags; @@ -433,12 +436,13 @@ void smp_call_function_many(const struct cpumask *mask, } /* Send a message to all CPUs in the map */ - arch_send_call_function_ipi_mask(data->cpumask_ipi); + arch_send_call_function_ipi_mask(cfd->cpumask_ipi); if (wait) { - for_each_cpu(cpu, data->cpumask) { - struct call_single_data *csd = - per_cpu_ptr(data->csd, cpu); + for_each_cpu(cpu, cfd->cpumask) { + struct call_single_data *csd; + + csd = per_cpu_ptr(cfd->csd, cpu); csd_lock_wait(csd); } } diff --git a/kernel/sys.c b/kernel/sys.c index 4b6fff5e7400..b95d3c72ba21 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -49,6 +49,11 @@ #include <linux/user_namespace.h> #include <linux/binfmts.h> +#include <linux/sched.h> +#include <linux/rcupdate.h> +#include <linux/uidgid.h> +#include <linux/cred.h> + #include <linux/kmsg_dump.h> /* Move somewhere else to avoid recompiling? */ #include <generated/utsrelease.h> @@ -1044,6 +1049,67 @@ change_okay: return old_fsgid; } +/** + * sys_getpid - return the thread group id of the current process + * + * Note, despite the name, this returns the tgid not the pid. The tgid and + * the pid are identical unless CLONE_THREAD was specified on clone() in + * which case the tgid is the same in all threads of the same group. + * + * This is SMP safe as current->tgid does not change. + */ +SYSCALL_DEFINE0(getpid) +{ + return task_tgid_vnr(current); +} + +/* Thread ID - the internal kernel "pid" */ +SYSCALL_DEFINE0(gettid) +{ + return task_pid_vnr(current); +} + +/* + * Accessing ->real_parent is not SMP-safe, it could + * change from under us. However, we can use a stale + * value of ->real_parent under rcu_read_lock(), see + * release_task()->call_rcu(delayed_put_task_struct). + */ +SYSCALL_DEFINE0(getppid) +{ + int pid; + + rcu_read_lock(); + pid = task_tgid_vnr(rcu_dereference(current->real_parent)); + rcu_read_unlock(); + + return pid; +} + +SYSCALL_DEFINE0(getuid) +{ + /* Only we change this so SMP safe */ + return from_kuid_munged(current_user_ns(), current_uid()); +} + +SYSCALL_DEFINE0(geteuid) +{ + /* Only we change this so SMP safe */ + return from_kuid_munged(current_user_ns(), current_euid()); +} + +SYSCALL_DEFINE0(getgid) +{ + /* Only we change this so SMP safe */ + return from_kgid_munged(current_user_ns(), current_gid()); +} + +SYSCALL_DEFINE0(getegid) +{ + /* Only we change this so SMP safe */ + return from_kgid_munged(current_user_ns(), current_egid()); +} + void do_sys_times(struct tms *tms) { cputime_t tgutime, tgstime, cutime, cstime; @@ -1805,7 +1871,6 @@ SYSCALL_DEFINE1(umask, int, mask) return mask; } -#ifdef CONFIG_CHECKPOINT_RESTORE static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) { struct fd exe; @@ -1999,17 +2064,12 @@ out: return error; } +#ifdef CONFIG_CHECKPOINT_RESTORE static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) { return put_user(me->clear_child_tid, tid_addr); } - -#else /* CONFIG_CHECKPOINT_RESTORE */ -static int prctl_set_mm(int opt, unsigned long addr, - unsigned long arg4, unsigned long arg5) -{ - return -EINVAL; -} +#else static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) { return -EINVAL; @@ -2259,3 +2319,148 @@ int orderly_poweroff(bool force) return 0; } EXPORT_SYMBOL_GPL(orderly_poweroff); + +/** + * do_sysinfo - fill in sysinfo struct + * @info: pointer to buffer to fill + */ +static int do_sysinfo(struct sysinfo *info) +{ + unsigned long mem_total, sav_total; + unsigned int mem_unit, bitcount; + struct timespec tp; + + memset(info, 0, sizeof(struct sysinfo)); + + ktime_get_ts(&tp); + monotonic_to_bootbased(&tp); + info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); + + get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT); + + info->procs = nr_threads; + + si_meminfo(info); + si_swapinfo(info); + + /* + * If the sum of all the available memory (i.e. ram + swap) + * is less than can be stored in a 32 bit unsigned long then + * we can be binary compatible with 2.2.x kernels. If not, + * well, in that case 2.2.x was broken anyways... + * + * -Erik Andersen <andersee@debian.org> + */ + + mem_total = info->totalram + info->totalswap; + if (mem_total < info->totalram || mem_total < info->totalswap) + goto out; + bitcount = 0; + mem_unit = info->mem_unit; + while (mem_unit > 1) { + bitcount++; + mem_unit >>= 1; + sav_total = mem_total; + mem_total <<= 1; + if (mem_total < sav_total) + goto out; + } + + /* + * If mem_total did not overflow, multiply all memory values by + * info->mem_unit and set it to 1. This leaves things compatible + * with 2.2.x, and also retains compatibility with earlier 2.4.x + * kernels... + */ + + info->mem_unit = 1; + info->totalram <<= bitcount; + info->freeram <<= bitcount; + info->sharedram <<= bitcount; + info->bufferram <<= bitcount; + info->totalswap <<= bitcount; + info->freeswap <<= bitcount; + info->totalhigh <<= bitcount; + info->freehigh <<= bitcount; + +out: + return 0; +} + +SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info) +{ + struct sysinfo val; + + do_sysinfo(&val); + + if (copy_to_user(info, &val, sizeof(struct sysinfo))) + return -EFAULT; + + return 0; +} + +#ifdef CONFIG_COMPAT +struct compat_sysinfo { + s32 uptime; + u32 loads[3]; + u32 totalram; + u32 freeram; + u32 sharedram; + u32 bufferram; + u32 totalswap; + u32 freeswap; + u16 procs; + u16 pad; + u32 totalhigh; + u32 freehigh; + u32 mem_unit; + char _f[20-2*sizeof(u32)-sizeof(int)]; +}; + +COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info) +{ + struct sysinfo s; + + do_sysinfo(&s); + + /* Check to see if any memory value is too large for 32-bit and scale + * down if needed + */ + if ((s.totalram >> 32) || (s.totalswap >> 32)) { + int bitcount = 0; + + while (s.mem_unit < PAGE_SIZE) { + s.mem_unit <<= 1; + bitcount++; + } + + s.totalram >>= bitcount; + s.freeram >>= bitcount; + s.sharedram >>= bitcount; + s.bufferram >>= bitcount; + s.totalswap >>= bitcount; + s.freeswap >>= bitcount; + s.totalhigh >>= bitcount; + s.freehigh >>= bitcount; + } + + if (!access_ok(VERIFY_WRITE, info, sizeof(struct compat_sysinfo)) || + __put_user(s.uptime, &info->uptime) || + __put_user(s.loads[0], &info->loads[0]) || + __put_user(s.loads[1], &info->loads[1]) || + __put_user(s.loads[2], &info->loads[2]) || + __put_user(s.totalram, &info->totalram) || + __put_user(s.freeram, &info->freeram) || + __put_user(s.sharedram, &info->sharedram) || + __put_user(s.bufferram, &info->bufferram) || + __put_user(s.totalswap, &info->totalswap) || + __put_user(s.freeswap, &info->freeswap) || + __put_user(s.procs, &info->procs) || + __put_user(s.totalhigh, &info->totalhigh) || + __put_user(s.freehigh, &info->freehigh) || + __put_user(s.mem_unit, &info->mem_unit)) + return -EFAULT; + + return 0; +} +#endif /* CONFIG_COMPAT */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index afc1dc60f3f8..9edcf456e0fc 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -106,7 +106,6 @@ extern unsigned int core_pipe_limit; #endif extern int pid_max; extern int pid_max_min, pid_max_max; -extern int sysctl_drop_caches; extern int percpu_pagelist_fraction; extern int compat_log; extern int latencytop_enabled; @@ -1430,6 +1429,20 @@ static struct ctl_table vm_table[] = { .extra2 = &one, }, #endif + { + .procname = "user_reserve_kbytes", + .data = &sysctl_user_reserve_kbytes, + .maxlen = sizeof(sysctl_user_reserve_kbytes), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + }, + { + .procname = "admin_reserve_kbytes", + .data = &sysctl_admin_reserve_kbytes, + .maxlen = sizeof(sysctl_admin_reserve_kbytes), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + }, { } }; diff --git a/kernel/test_kprobes.c b/kernel/test_kprobes.c index f8b11a283171..12d6ebbfdd83 100644 --- a/kernel/test_kprobes.c +++ b/kernel/test_kprobes.c @@ -365,7 +365,7 @@ int init_test_probes(void) target2 = kprobe_target2; do { - rand1 = random32(); + rand1 = prandom_u32(); } while (rand1 <= div_factor); printk(KERN_INFO "Kprobe smoke test started\n"); diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index af5a7e9f164b..3bdf28323012 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -20,6 +20,13 @@ #include <asm/uaccess.h> + +struct timer_list_iter { + int cpu; + bool second_pass; + u64 now; +}; + typedef void (*print_fn_t)(struct seq_file *m, unsigned int *classes); DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases); @@ -133,7 +140,6 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now) struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); int i; - SEQ_printf(m, "\n"); SEQ_printf(m, "cpu: %d\n", cpu); for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { SEQ_printf(m, " clock %d:\n", i); @@ -187,6 +193,7 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now) #undef P #undef P_ns + SEQ_printf(m, "\n"); } #ifdef CONFIG_GENERIC_CLOCKEVENTS @@ -195,7 +202,6 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu) { struct clock_event_device *dev = td->evtdev; - SEQ_printf(m, "\n"); SEQ_printf(m, "Tick Device: mode: %d\n", td->mode); if (cpu < 0) SEQ_printf(m, "Broadcast device\n"); @@ -230,12 +236,11 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu) print_name_offset(m, dev->event_handler); SEQ_printf(m, "\n"); SEQ_printf(m, " retries: %lu\n", dev->retries); + SEQ_printf(m, "\n"); } -static void timer_list_show_tickdevices(struct seq_file *m) +static void timer_list_show_tickdevices_header(struct seq_file *m) { - int cpu; - #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST print_tickdevice(m, tick_get_broadcast_device(), -1); SEQ_printf(m, "tick_broadcast_mask: %08lx\n", @@ -246,47 +251,104 @@ static void timer_list_show_tickdevices(struct seq_file *m) #endif SEQ_printf(m, "\n"); #endif - for_each_online_cpu(cpu) - print_tickdevice(m, tick_get_device(cpu), cpu); - SEQ_printf(m, "\n"); } -#else -static void timer_list_show_tickdevices(struct seq_file *m) { } #endif +static inline void timer_list_header(struct seq_file *m, u64 now) +{ + SEQ_printf(m, "Timer List Version: v0.7\n"); + SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES); + SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now); + SEQ_printf(m, "\n"); +} + static int timer_list_show(struct seq_file *m, void *v) { + struct timer_list_iter *iter = v; + u64 now = ktime_to_ns(ktime_get()); + + if (iter->cpu == -1 && !iter->second_pass) + timer_list_header(m, now); + else if (!iter->second_pass) + print_cpu(m, iter->cpu, iter->now); +#ifdef CONFIG_GENERIC_CLOCKEVENTS + else if (iter->cpu == -1 && iter->second_pass) + timer_list_show_tickdevices_header(m); + else + print_tickdevice(m, tick_get_device(iter->cpu), iter->cpu); +#endif + return 0; +} + +void sysrq_timer_list_show(void) +{ u64 now = ktime_to_ns(ktime_get()); int cpu; - SEQ_printf(m, "Timer List Version: v0.7\n"); - SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES); - SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now); + timer_list_header(NULL, now); for_each_online_cpu(cpu) - print_cpu(m, cpu, now); + print_cpu(NULL, cpu, now); - SEQ_printf(m, "\n"); - timer_list_show_tickdevices(m); +#ifdef CONFIG_GENERIC_CLOCKEVENTS + timer_list_show_tickdevices_header(NULL); + for_each_online_cpu(cpu) + print_tickdevice(NULL, tick_get_device(cpu), cpu); +#endif + return; +} - return 0; +static void *timer_list_start(struct seq_file *file, loff_t *offset) +{ + struct timer_list_iter *iter = file->private; + + if (!*offset) { + iter->cpu = -1; + iter->now = ktime_to_ns(ktime_get()); + } else if (iter->cpu >= nr_cpu_ids) { +#ifdef CONFIG_GENERIC_CLOCKEVENTS + if (!iter->second_pass) { + iter->cpu = -1; + iter->second_pass = true; + } else + return NULL; +#else + return NULL; +#endif + } + return iter; } -void sysrq_timer_list_show(void) +static void *timer_list_next(struct seq_file *file, void *v, loff_t *offset) +{ + struct timer_list_iter *iter = file->private; + iter->cpu = cpumask_next(iter->cpu, cpu_online_mask); + ++*offset; + return timer_list_start(file, offset); +} + +static void timer_list_stop(struct seq_file *seq, void *v) { - timer_list_show(NULL, NULL); } +static const struct seq_operations timer_list_sops = { + .start = timer_list_start, + .next = timer_list_next, + .stop = timer_list_stop, + .show = timer_list_show, +}; + static int timer_list_open(struct inode *inode, struct file *filp) { - return single_open(filp, timer_list_show, NULL); + return seq_open_private(filp, &timer_list_sops, + sizeof(struct timer_list_iter)); } static const struct file_operations timer_list_fops = { .open = timer_list_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = seq_release_private, }; static int __init init_timer_list_procfs(void) diff --git a/kernel/timer.c b/kernel/timer.c index 1b7489fdea41..a860bba34412 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1,7 +1,7 @@ /* * linux/kernel/timer.c * - * Kernel internal timers, basic process system calls + * Kernel internal timers * * Copyright (C) 1991, 1992 Linus Torvalds * @@ -41,6 +41,7 @@ #include <linux/sched.h> #include <linux/sched/sysctl.h> #include <linux/slab.h> +#include <linux/compat.h> #include <asm/uaccess.h> #include <asm/unistd.h> @@ -1395,61 +1396,6 @@ SYSCALL_DEFINE1(alarm, unsigned int, seconds) #endif -/** - * sys_getpid - return the thread group id of the current process - * - * Note, despite the name, this returns the tgid not the pid. The tgid and - * the pid are identical unless CLONE_THREAD was specified on clone() in - * which case the tgid is the same in all threads of the same group. - * - * This is SMP safe as current->tgid does not change. - */ -SYSCALL_DEFINE0(getpid) -{ - return task_tgid_vnr(current); -} - -/* - * Accessing ->real_parent is not SMP-safe, it could - * change from under us. However, we can use a stale - * value of ->real_parent under rcu_read_lock(), see - * release_task()->call_rcu(delayed_put_task_struct). - */ -SYSCALL_DEFINE0(getppid) -{ - int pid; - - rcu_read_lock(); - pid = task_tgid_vnr(rcu_dereference(current->real_parent)); - rcu_read_unlock(); - - return pid; -} - -SYSCALL_DEFINE0(getuid) -{ - /* Only we change this so SMP safe */ - return from_kuid_munged(current_user_ns(), current_uid()); -} - -SYSCALL_DEFINE0(geteuid) -{ - /* Only we change this so SMP safe */ - return from_kuid_munged(current_user_ns(), current_euid()); -} - -SYSCALL_DEFINE0(getgid) -{ - /* Only we change this so SMP safe */ - return from_kgid_munged(current_user_ns(), current_gid()); -} - -SYSCALL_DEFINE0(getegid) -{ - /* Only we change this so SMP safe */ - return from_kgid_munged(current_user_ns(), current_egid()); -} - static void process_timeout(unsigned long __data) { wake_up_process((struct task_struct *)__data); @@ -1557,91 +1503,6 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout) } EXPORT_SYMBOL(schedule_timeout_uninterruptible); -/* Thread ID - the internal kernel "pid" */ -SYSCALL_DEFINE0(gettid) -{ - return task_pid_vnr(current); -} - -/** - * do_sysinfo - fill in sysinfo struct - * @info: pointer to buffer to fill - */ -int do_sysinfo(struct sysinfo *info) -{ - unsigned long mem_total, sav_total; - unsigned int mem_unit, bitcount; - struct timespec tp; - - memset(info, 0, sizeof(struct sysinfo)); - - ktime_get_ts(&tp); - monotonic_to_bootbased(&tp); - info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); - - get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT); - - info->procs = nr_threads; - - si_meminfo(info); - si_swapinfo(info); - - /* - * If the sum of all the available memory (i.e. ram + swap) - * is less than can be stored in a 32 bit unsigned long then - * we can be binary compatible with 2.2.x kernels. If not, - * well, in that case 2.2.x was broken anyways... - * - * -Erik Andersen <andersee@debian.org> - */ - - mem_total = info->totalram + info->totalswap; - if (mem_total < info->totalram || mem_total < info->totalswap) - goto out; - bitcount = 0; - mem_unit = info->mem_unit; - while (mem_unit > 1) { - bitcount++; - mem_unit >>= 1; - sav_total = mem_total; - mem_total <<= 1; - if (mem_total < sav_total) - goto out; - } - - /* - * If mem_total did not overflow, multiply all memory values by - * info->mem_unit and set it to 1. This leaves things compatible - * with 2.2.x, and also retains compatibility with earlier 2.4.x - * kernels... - */ - - info->mem_unit = 1; - info->totalram <<= bitcount; - info->freeram <<= bitcount; - info->sharedram <<= bitcount; - info->bufferram <<= bitcount; - info->totalswap <<= bitcount; - info->freeswap <<= bitcount; - info->totalhigh <<= bitcount; - info->freehigh <<= bitcount; - -out: - return 0; -} - -SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info) -{ - struct sysinfo val; - - do_sysinfo(&val); - - if (copy_to_user(info, &val, sizeof(struct sysinfo))) - return -EFAULT; - - return 0; -} - static int __cpuinit init_timers_cpu(int cpu) { int j; diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 05039e348f07..ea741c32d596 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -239,10 +239,12 @@ static void watchdog_overflow_callback(struct perf_event *event, if (__this_cpu_read(hard_watchdog_warn) == true) return; - if (hardlockup_panic) + if (hardlockup_panic) { + trigger_all_cpu_backtrace(); panic("Watchdog detected hard LOCKUP on cpu %d", this_cpu); - else + } else { WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu); + } __this_cpu_write(hard_watchdog_warn, true); return; @@ -323,8 +325,10 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) else dump_stack(); - if (softlockup_panic) + if (softlockup_panic) { + trigger_all_cpu_backtrace(); panic("softlockup: hung tasks"); + } __this_cpu_write(soft_watchdog_warn, true); } else __this_cpu_write(soft_watchdog_warn, false); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 154aa12af48e..4aa9f5bc6b2d 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -46,6 +46,7 @@ #include <linux/rculist.h> #include <linux/nodemask.h> #include <linux/moduleparam.h> +#include <linux/uaccess.h> #include "workqueue_internal.h" @@ -2197,6 +2198,7 @@ __acquires(&pool->lock) worker->current_work = NULL; worker->current_func = NULL; worker->current_pwq = NULL; + worker->desc_valid = false; pwq_dec_nr_in_flight(pwq, work_color); } @@ -4365,6 +4367,83 @@ unsigned int work_busy(struct work_struct *work) } EXPORT_SYMBOL_GPL(work_busy); +/** + * set_worker_desc - set description for the current work item + * @fmt: printf-style format string + * @...: arguments for the format string + * + * This function can be called by a running work function to describe what + * the work item is about. If the worker task gets dumped, this + * information will be printed out together to help debugging. The + * description can be at most WORKER_DESC_LEN including the trailing '\0'. + */ +void set_worker_desc(const char *fmt, ...) +{ + struct worker *worker = current_wq_worker(); + va_list args; + + if (worker) { + va_start(args, fmt); + vsnprintf(worker->desc, sizeof(worker->desc), fmt, args); + va_end(args); + worker->desc_valid = true; + } +} + +/** + * print_worker_info - print out worker information and description + * @log_lvl: the log level to use when printing + * @task: target task + * + * If @task is a worker and currently executing a work item, print out the + * name of the workqueue being serviced and worker description set with + * set_worker_desc() by the currently executing work item. + * + * This function can be safely called on any task as long as the + * task_struct itself is accessible. While safe, this function isn't + * synchronized and may print out mixups or garbages of limited length. + */ +void print_worker_info(const char *log_lvl, struct task_struct *task) +{ + work_func_t *fn = NULL; + char name[WQ_NAME_LEN] = { }; + char desc[WORKER_DESC_LEN] = { }; + struct pool_workqueue *pwq = NULL; + struct workqueue_struct *wq = NULL; + bool desc_valid = false; + struct worker *worker; + + if (!(task->flags & PF_WQ_WORKER)) + return; + + /* + * This function is called without any synchronization and @task + * could be in any state. Be careful with dereferences. + */ + worker = probe_kthread_data(task); + + /* + * Carefully copy the associated workqueue's workfn and name. Keep + * the original last '\0' in case the original contains garbage. + */ + probe_kernel_read(&fn, &worker->current_func, sizeof(fn)); + probe_kernel_read(&pwq, &worker->current_pwq, sizeof(pwq)); + probe_kernel_read(&wq, &pwq->wq, sizeof(wq)); + probe_kernel_read(name, wq->name, sizeof(name) - 1); + + /* copy worker description */ + probe_kernel_read(&desc_valid, &worker->desc_valid, sizeof(desc_valid)); + if (desc_valid) + probe_kernel_read(desc, worker->desc, sizeof(desc) - 1); + + if (fn || name[0] || desc[0]) { + printk("%sWorkqueue: %s %pf", log_lvl, name, fn); + if (desc[0]) + pr_cont(" (%s)", desc); + pr_cont("\n"); + } +} + /* * CPU hotplug. * diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h index 84ab6e1dc6fb..ad83c96b2ece 100644 --- a/kernel/workqueue_internal.h +++ b/kernel/workqueue_internal.h @@ -29,15 +29,25 @@ struct worker { struct work_struct *current_work; /* L: work being processed */ work_func_t current_func; /* L: current_work's fn */ struct pool_workqueue *current_pwq; /* L: current_work's pwq */ + bool desc_valid; /* ->desc is valid */ struct list_head scheduled; /* L: scheduled works */ + + /* 64 bytes boundary on 64bit, 32 on 32bit */ + struct task_struct *task; /* I: worker task */ struct worker_pool *pool; /* I: the associated pool */ /* L: for rescuers */ - /* 64 bytes boundary on 64bit, 32 on 32bit */ + unsigned long last_active; /* L: last active timestamp */ unsigned int flags; /* X: flags */ int id; /* I: worker id */ + /* + * Opaque string set with work_set_desc(). Printed out with task + * dump for debugging - WARN, BUG, panic or sysrq. + */ + char desc[WORKER_DESC_LEN]; + /* used only by rescuers to point to the target workqueue */ struct workqueue_struct *rescue_wq; /* I: the workqueue to rescue */ }; diff --git a/lib/Kconfig b/lib/Kconfig index 3958dc4389f9..7bfdbbcb6abd 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -189,6 +189,15 @@ config LZO_COMPRESS config LZO_DECOMPRESS tristate +config LZ4_COMPRESS + tristate + +config LZ4HC_COMPRESS + tristate + +config LZ4_DECOMPRESS + tristate + source "lib/xz/Kconfig" # @@ -213,6 +222,10 @@ config DECOMPRESS_LZO select LZO_DECOMPRESS tristate +config DECOMPRESS_LZ4 + select LZ4_DECOMPRESS + tristate + # # Generic allocator support is selected if needed # diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 28be08c09bab..770a422a42e8 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1292,6 +1292,24 @@ config LATENCYTOP Enable this option if you want to use the LatencyTOP tool to find out which userspace is blocking on what kernel operations. +config ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS + bool + +config DEBUG_STRICT_USER_COPY_CHECKS + bool "Strict user copy size checks" + depends on ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS + depends on DEBUG_KERNEL && !TRACE_BRANCH_PROFILING + help + Enabling this option turns a certain set of sanity checks for user + copy operations into compile time failures. + + The copy_from_user() etc checks are there to help test if there + are sufficient security checks on the length argument of + the copy operation, by having gcc prove that the argument is + within bounds. + + If unsure, say N. + source mm/Kconfig.debug source kernel/trace/Kconfig @@ -1463,5 +1481,8 @@ source "lib/Kconfig.kgdb" source "lib/Kconfig.kmemcheck" +config TEST_STRING_HELPERS + tristate "Test functions located in the string_helpers module at runtime" + config TEST_KSTRTOX tristate "Test kstrto*() family of functions at runtime" diff --git a/lib/Makefile b/lib/Makefile index d7946ff75b2e..370a1d6e8608 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -13,8 +13,9 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ - earlycpio.o + earlycpio.o percpu-refcount.o +obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o @@ -22,8 +23,10 @@ lib-y += kobject.o klist.o obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ - string_helpers.o gcd.o lcm.o list_sort.o uuid.o flex_array.o \ + gcd.o lcm.o list_sort.o uuid.o flex_array.o \ bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o +obj-y += string_helpers.o +obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o obj-y += kstrtox.o obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o @@ -72,6 +75,9 @@ obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ obj-$(CONFIG_BCH) += bch.o obj-$(CONFIG_LZO_COMPRESS) += lzo/ obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ +obj-$(CONFIG_LZ4_COMPRESS) += lz4/ +obj-$(CONFIG_LZ4HC_COMPRESS) += lz4/ +obj-$(CONFIG_LZ4_DECOMPRESS) += lz4/ obj-$(CONFIG_XZ_DEC) += xz/ obj-$(CONFIG_RAID6_PQ) += raid6/ @@ -80,6 +86,7 @@ lib-$(CONFIG_DECOMPRESS_BZIP2) += decompress_bunzip2.o lib-$(CONFIG_DECOMPRESS_LZMA) += decompress_unlzma.o lib-$(CONFIG_DECOMPRESS_XZ) += decompress_unxz.o lib-$(CONFIG_DECOMPRESS_LZO) += decompress_unlzo.o +lib-$(CONFIG_DECOMPRESS_LZ4) += decompress_unlz4.o obj-$(CONFIG_TEXTSEARCH) += textsearch.o obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o diff --git a/lib/argv_split.c b/lib/argv_split.c index 1e9a6cbc3689..e927ed0e18a8 100644 --- a/lib/argv_split.c +++ b/lib/argv_split.c @@ -8,23 +8,17 @@ #include <linux/slab.h> #include <linux/export.h> -static const char *skip_arg(const char *cp) -{ - while (*cp && !isspace(*cp)) - cp++; - - return cp; -} - static int count_argc(const char *str) { int count = 0; + bool was_space; - while (*str) { - str = skip_spaces(str); - if (*str) { + for (was_space = true; *str; str++) { + if (isspace(*str)) { + was_space = true; + } else if (was_space) { + was_space = false; count++; - str = skip_arg(str); } } @@ -39,10 +33,8 @@ static int count_argc(const char *str) */ void argv_free(char **argv) { - char **p; - for (p = argv; *p; p++) - kfree(*p); - + argv--; + kfree(argv[0]); kfree(argv); } EXPORT_SYMBOL(argv_free); @@ -59,43 +51,44 @@ EXPORT_SYMBOL(argv_free); * considered to be a single argument separator. The returned array * is always NULL-terminated. Returns NULL on memory allocation * failure. + * + * The source string at `str' may be undergoing concurrent alteration via + * userspace sysctl activity (at least). The argv_split() implementation + * attempts to handle this gracefully by taking a local copy to work on. */ char **argv_split(gfp_t gfp, const char *str, int *argcp) { - int argc = count_argc(str); - char **argv = kzalloc(sizeof(*argv) * (argc+1), gfp); - char **argvp; - - if (argv == NULL) - goto out; - - if (argcp) - *argcp = argc; - - argvp = argv; - - while (*str) { - str = skip_spaces(str); - - if (*str) { - const char *p = str; - char *t; - - str = skip_arg(str); + char *argv_str; + bool was_space; + char **argv, **argv_ret; + int argc; + + argv_str = kstrndup(str, KMALLOC_MAX_SIZE - 1, gfp); + if (!argv_str) + return NULL; + + argc = count_argc(argv_str); + argv = kmalloc(sizeof(*argv) * (argc + 2), gfp); + if (!argv) { + kfree(argv_str); + return NULL; + } - t = kstrndup(p, str-p, gfp); - if (t == NULL) - goto fail; - *argvp++ = t; + *argv = argv_str; + argv_ret = ++argv; + for (was_space = true; *argv_str; argv_str++) { + if (isspace(*argv_str)) { + was_space = true; + *argv_str = 0; + } else if (was_space) { + was_space = false; + *argv++ = argv_str; } } - *argvp = NULL; - - out: - return argv; + *argv = NULL; - fail: - argv_free(argv); - return NULL; + if (argcp) + *argcp = argc; + return argv_ret; } EXPORT_SYMBOL(argv_split); diff --git a/lib/decompress.c b/lib/decompress.c index 31a804277282..c70810ea8590 100644 --- a/lib/decompress.c +++ b/lib/decompress.c @@ -11,6 +11,7 @@ #include <linux/decompress/unxz.h> #include <linux/decompress/inflate.h> #include <linux/decompress/unlzo.h> +#include <linux/decompress/unlz4.h> #include <linux/types.h> #include <linux/string.h> @@ -31,6 +32,9 @@ #ifndef CONFIG_DECOMPRESS_LZO # define unlzo NULL #endif +#ifndef CONFIG_DECOMPRESS_LZ4 +# define unlz4 NULL +#endif struct compress_format { unsigned char magic[2]; @@ -45,6 +49,7 @@ static const struct compress_format compressed_formats[] __initdata = { { {0x5d, 0x00}, "lzma", unlzma }, { {0xfd, 0x37}, "xz", unxz }, { {0x89, 0x4c}, "lzo", unlzo }, + { {0x02, 0x21}, "lz4", unlz4 }, { {0, 0}, NULL, NULL } }; diff --git a/lib/decompress_unlz4.c b/lib/decompress_unlz4.c new file mode 100644 index 000000000000..3e67cfad16ad --- /dev/null +++ b/lib/decompress_unlz4.c @@ -0,0 +1,187 @@ +/* + * Wrapper for decompressing LZ4-compressed kernel, initramfs, and initrd + * + * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifdef STATIC +#define PREBOOT +#include "lz4/lz4_decompress.c" +#else +#include <linux/decompress/unlz4.h> +#endif +#include <linux/types.h> +#include <linux/lz4.h> +#include <linux/decompress/mm.h> +#include <linux/compiler.h> + +#include <asm/unaligned.h> + +/* + * Note: Uncompressed chunk size is used in the compressor side + * (userspace side for compression). + * It is hardcoded because there is not proper way to extract it + * from the binary stream which is generated by the preliminary + * version of LZ4 tool so far. + */ +#define LZ4_DEFAULT_UNCOMPRESSED_CHUNK_SIZE (8 << 20) +#define ARCHIVE_MAGICNUMBER 0x184C2102 + +STATIC inline int INIT unlz4(u8 *input, int in_len, + int (*fill) (void *, unsigned int), + int (*flush) (void *, unsigned int), + u8 *output, int *posp, + void (*error) (char *x)) +{ + int ret = -1; + size_t chunksize = 0; + size_t uncomp_chunksize = LZ4_DEFAULT_UNCOMPRESSED_CHUNK_SIZE; + u8 *inp; + u8 *inp_start; + u8 *outp; + int size = in_len; +#ifdef PREBOOT + size_t out_len = get_unaligned_le32(input + in_len); +#endif + size_t dest_len; + + + if (output) { + outp = output; + } else if (!flush) { + error("NULL output pointer and no flush function provided"); + goto exit_0; + } else { + outp = large_malloc(uncomp_chunksize); + if (!outp) { + error("Could not allocate output buffer"); + goto exit_0; + } + } + + if (input && fill) { + error("Both input pointer and fill function provided,"); + goto exit_1; + } else if (input) { + inp = input; + } else if (!fill) { + error("NULL input pointer and missing fill function"); + goto exit_1; + } else { + inp = large_malloc(lz4_compressbound(uncomp_chunksize)); + if (!inp) { + error("Could not allocate input buffer"); + goto exit_1; + } + } + inp_start = inp; + + if (posp) + *posp = 0; + + if (fill) + fill(inp, 4); + + chunksize = get_unaligned_le32(inp); + if (chunksize == ARCHIVE_MAGICNUMBER) { + inp += 4; + size -= 4; + } else { + error("invalid header"); + goto exit_2; + } + + if (posp) + *posp += 4; + + for (;;) { + + if (fill) + fill(inp, 4); + + chunksize = get_unaligned_le32(inp); + if (chunksize == ARCHIVE_MAGICNUMBER) { + inp += 4; + size -= 4; + if (posp) + *posp += 4; + continue; + } + inp += 4; + size -= 4; + + if (posp) + *posp += 4; + + if (fill) { + if (chunksize > lz4_compressbound(uncomp_chunksize)) { + error("chunk length is longer than allocated"); + goto exit_2; + } + fill(inp, chunksize); + } +#ifdef PREBOOT + if (out_len >= uncomp_chunksize) { + dest_len = uncomp_chunksize; + out_len -= dest_len; + } else + dest_len = out_len; + ret = lz4_decompress(inp, &chunksize, outp, dest_len); +#else + dest_len = uncomp_chunksize; + ret = lz4_decompress_unknownoutputsize(inp, chunksize, outp, + &dest_len); +#endif + if (ret < 0) { + error("Decoding failed"); + goto exit_2; + } + + if (flush && flush(outp, dest_len) != dest_len) + goto exit_2; + if (output) + outp += dest_len; + if (posp) + *posp += chunksize; + + size -= chunksize; + + if (size == 0) + break; + else if (size < 0) { + error("data corrupted"); + goto exit_2; + } + + inp += chunksize; + if (fill) + inp = inp_start; + } + + ret = 0; +exit_2: + if (!input) + large_free(inp_start); +exit_1: + if (!output) + large_free(outp); +exit_0: + return ret; +} + +#ifdef PREBOOT +STATIC int INIT decompress(unsigned char *buf, int in_len, + int(*fill)(void*, unsigned int), + int(*flush)(void*, unsigned int), + unsigned char *output, + int *posp, + void(*error)(char *x) + ) +{ + return unlz4(buf, in_len - 4, fill, flush, output, posp, error); +} +#endif diff --git a/lib/dump_stack.c b/lib/dump_stack.c index 42f4f55c9458..53bad099ebd6 100644 --- a/lib/dump_stack.c +++ b/lib/dump_stack.c @@ -5,11 +5,16 @@ #include <linux/kernel.h> #include <linux/export.h> +#include <linux/sched.h> +/** + * dump_stack - dump the current task information and its stack trace + * + * Architectures can override this implementation by implementing its own. + */ void dump_stack(void) { - printk(KERN_NOTICE - "This architecture does not implement dump_stack()\n"); + dump_stack_print_info(KERN_DEFAULT); + show_stack(NULL, NULL); } - EXPORT_SYMBOL(dump_stack); diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 5276b99ca650..99fec3ae405a 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -24,6 +24,7 @@ #include <linux/sysctl.h> #include <linux/ctype.h> #include <linux/string.h> +#include <linux/string_helpers.h> #include <linux/uaccess.h> #include <linux/dynamic_debug.h> #include <linux/debugfs.h> @@ -276,48 +277,6 @@ static inline int parse_lineno(const char *str, unsigned int *val) return 0; } -/* - * Undo octal escaping in a string, inplace. This is useful to - * allow the user to express a query which matches a format - * containing embedded spaces. - */ -#define isodigit(c) ((c) >= '0' && (c) <= '7') -static char *unescape(char *str) -{ - char *in = str; - char *out = str; - - while (*in) { - if (*in == '\\') { - if (in[1] == '\\') { - *out++ = '\\'; - in += 2; - continue; - } else if (in[1] == 't') { - *out++ = '\t'; - in += 2; - continue; - } else if (in[1] == 'n') { - *out++ = '\n'; - in += 2; - continue; - } else if (isodigit(in[1]) && - isodigit(in[2]) && - isodigit(in[3])) { - *out++ = (((in[1] - '0') << 6) | - ((in[2] - '0') << 3) | - (in[3] - '0')); - in += 4; - continue; - } - } - *out++ = *in++; - } - *out = '\0'; - - return str; -} - static int check_set(const char **dest, char *src, char *name) { int rc = 0; @@ -371,8 +330,10 @@ static int ddebug_parse_query(char *words[], int nwords, } else if (!strcmp(words[i], "module")) { rc = check_set(&query->module, words[i+1], "module"); } else if (!strcmp(words[i], "format")) { - rc = check_set(&query->format, unescape(words[i+1]), - "format"); + string_unescape_inplace(words[i+1], UNESCAPE_SPACE | + UNESCAPE_OCTAL | + UNESCAPE_SPECIAL); + rc = check_set(&query->format, words[i+1], "format"); } else if (!strcmp(words[i], "line")) { char *first = words[i+1]; char *last = strchr(first, '-'); diff --git a/lib/fault-inject.c b/lib/fault-inject.c index f7210ad6cffd..c5c7a762b850 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -122,7 +122,7 @@ bool should_fail(struct fault_attr *attr, ssize_t size) return false; } - if (attr->probability <= random32() % 100) + if (attr->probability <= prandom_u32() % 100) return false; if (!fail_stacktrace(attr)) diff --git a/lib/genalloc.c b/lib/genalloc.c index 54920433705a..b35cfa9bc3d4 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -34,6 +34,8 @@ #include <linux/rculist.h> #include <linux/interrupt.h> #include <linux/genalloc.h> +#include <linux/of_address.h> +#include <linux/of_device.h> static int set_bits_ll(unsigned long *addr, unsigned long mask_to_set) { @@ -480,3 +482,82 @@ unsigned long gen_pool_best_fit(unsigned long *map, unsigned long size, return start_bit; } EXPORT_SYMBOL(gen_pool_best_fit); + +static void devm_gen_pool_release(struct device *dev, void *res) +{ + gen_pool_destroy(*(struct gen_pool **)res); +} + +/** + * devm_gen_pool_create - managed gen_pool_create + * @dev: device that provides the gen_pool + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents + * @nid: node id of the node the pool structure should be allocated on, or -1 + * + * Create a new special memory pool that can be used to manage special purpose + * memory not managed by the regular kmalloc/kfree interface. The pool will be + * automatically destroyed by the device management code. + */ +struct gen_pool *devm_gen_pool_create(struct device *dev, int min_alloc_order, + int nid) +{ + struct gen_pool **ptr, *pool; + + ptr = devres_alloc(devm_gen_pool_release, sizeof(*ptr), GFP_KERNEL); + + pool = gen_pool_create(min_alloc_order, nid); + if (pool) { + *ptr = pool; + devres_add(dev, ptr); + } else { + devres_free(ptr); + } + + return pool; +} + +/** + * dev_get_gen_pool - Obtain the gen_pool (if any) for a device + * @dev: device to retrieve the gen_pool from + * @name: Optional name for the gen_pool, usually NULL + * + * Returns the gen_pool for the device if one is present, or NULL. + */ +struct gen_pool *dev_get_gen_pool(struct device *dev) +{ + struct gen_pool **p = devres_find(dev, devm_gen_pool_release, NULL, + NULL); + + if (!p) + return NULL; + return *p; +} +EXPORT_SYMBOL_GPL(dev_get_gen_pool); + +#ifdef CONFIG_OF +/** + * of_get_named_gen_pool - find a pool by phandle property + * @np: device node + * @propname: property name containing phandle(s) + * @index: index into the phandle array + * + * Returns the pool that contains the chunk starting at the physical + * address of the device tree node pointed at by the phandle property, + * or NULL if not found. + */ +struct gen_pool *of_get_named_gen_pool(struct device_node *np, + const char *propname, int index) +{ + struct platform_device *pdev; + struct device_node *np_pool; + + np_pool = of_parse_phandle(np, propname, index); + if (!np_pool) + return NULL; + pdev = of_find_device_by_node(np_pool); + if (!pdev) + return NULL; + return dev_get_gen_pool(&pdev->dev); +} +EXPORT_SYMBOL_GPL(of_get_named_gen_pool); +#endif /* CONFIG_OF */ diff --git a/lib/idr.c b/lib/idr.c index 322e2816f2fb..cca4b9302a71 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -495,6 +495,33 @@ int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask) } EXPORT_SYMBOL_GPL(idr_alloc); +/** + * idr_alloc_cyclic - allocate new idr entry in a cyclical fashion + * @idr: the (initialized) idr + * @ptr: pointer to be associated with the new id + * @start: the minimum id (inclusive) + * @end: the maximum id (exclusive, <= 0 for max) + * @gfp_mask: memory allocation flags + * + * Essentially the same as idr_alloc, but prefers to allocate progressively + * higher ids if it can. If the "cur" counter wraps, then it will start again + * at the "start" end of the range and allocate one that has already been used. + */ +int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end, + gfp_t gfp_mask) +{ + int id; + + id = idr_alloc(idr, ptr, max(start, idr->cur), end, gfp_mask); + if (id == -ENOSPC) + id = idr_alloc(idr, ptr, start, end, gfp_mask); + + if (likely(id >= 0)) + idr->cur = id + 1; + return id; +} +EXPORT_SYMBOL(idr_alloc_cyclic); + static void idr_remove_warning(int id) { printk(KERN_WARNING diff --git a/lib/int_sqrt.c b/lib/int_sqrt.c index fc2eeb7cb2ea..1ef4cc344977 100644 --- a/lib/int_sqrt.c +++ b/lib/int_sqrt.c @@ -1,3 +1,9 @@ +/* + * Copyright (C) 2013 Davidlohr Bueso <davidlohr.bueso@hp.com> + * + * Based on the shift-and-subtract algorithm for computing integer + * square root from Guy L. Steele. + */ #include <linux/kernel.h> #include <linux/export.h> @@ -10,23 +16,23 @@ */ unsigned long int_sqrt(unsigned long x) { - unsigned long op, res, one; + unsigned long b, m, y = 0; - op = x; - res = 0; + if (x <= 1) + return x; - one = 1UL << (BITS_PER_LONG - 2); - while (one > op) - one >>= 2; + m = 1UL << (BITS_PER_LONG - 2); + while (m != 0) { + b = y + m; + y >>= 1; - while (one != 0) { - if (op >= res + one) { - op = op - (res + one); - res = res + 2 * one; + if (x >= b) { + x -= b; + y += m; } - res /= 2; - one /= 4; + m >>= 2; } - return res; + + return y; } EXPORT_SYMBOL(int_sqrt); diff --git a/lib/list_sort.c b/lib/list_sort.c index d7325c6b103f..1183fa70a44d 100644 --- a/lib/list_sort.c +++ b/lib/list_sort.c @@ -229,7 +229,7 @@ static int __init list_sort_test(void) goto exit; } /* force some equivalencies */ - el->value = random32() % (TEST_LIST_LEN/3); + el->value = prandom_u32() % (TEST_LIST_LEN / 3); el->serial = i; el->poison1 = TEST_POISON1; el->poison2 = TEST_POISON2; diff --git a/lib/lz4/Makefile b/lib/lz4/Makefile new file mode 100644 index 000000000000..8085d04e9309 --- /dev/null +++ b/lib/lz4/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_LZ4_COMPRESS) += lz4_compress.o +obj-$(CONFIG_LZ4HC_COMPRESS) += lz4hc_compress.o +obj-$(CONFIG_LZ4_DECOMPRESS) += lz4_decompress.o diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c new file mode 100644 index 000000000000..fd94058bd7f9 --- /dev/null +++ b/lib/lz4/lz4_compress.c @@ -0,0 +1,443 @@ +/* + * LZ4 - Fast LZ compression algorithm + * Copyright (C) 2011-2012, Yann Collet. + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at : + * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html + * - LZ4 source repository : http://code.google.com/p/lz4/ + * + * Changed for kernel use by: + * Chanho Min <chanho.min@lge.com> + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/lz4.h> +#include <asm/unaligned.h> +#include "lz4defs.h" + +/* + * LZ4_compressCtx : + * ----------------- + * Compress 'isize' bytes from 'source' into an output buffer 'dest' of + * maximum size 'maxOutputSize'. * If it cannot achieve it, compression + * will stop, and result of the function will be zero. + * return : the number of bytes written in buffer 'dest', or 0 if the + * compression fails + */ +static inline int lz4_compressctx(void *ctx, + const char *source, + char *dest, + int isize, + int maxoutputsize) +{ + HTYPE *hashtable = (HTYPE *)ctx; + const u8 *ip = (u8 *)source; +#if LZ4_ARCH64 + const BYTE * const base = ip; +#else + const int base = 0; +#endif + const u8 *anchor = ip; + const u8 *const iend = ip + isize; + const u8 *const mflimit = iend - MFLIMIT; + #define MATCHLIMIT (iend - LASTLITERALS) + + u8 *op = (u8 *) dest; + u8 *const oend = op + maxoutputsize; + int length; + const int skipstrength = SKIPSTRENGTH; + u32 forwardh; + int lastrun; + + /* Init */ + if (isize < MINLENGTH) + goto _last_literals; + + memset((void *)hashtable, 0, LZ4_MEM_COMPRESS); + + /* First Byte */ + hashtable[LZ4_HASH_VALUE(ip)] = ip - base; + ip++; + forwardh = LZ4_HASH_VALUE(ip); + + /* Main Loop */ + for (;;) { + int findmatchattempts = (1U << skipstrength) + 3; + const u8 *forwardip = ip; + const u8 *ref; + u8 *token; + + /* Find a match */ + do { + u32 h = forwardh; + int step = findmatchattempts++ >> skipstrength; + ip = forwardip; + forwardip = ip + step; + + if (unlikely(forwardip > mflimit)) + goto _last_literals; + + forwardh = LZ4_HASH_VALUE(forwardip); + ref = base + hashtable[h]; + hashtable[h] = ip - base; + } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); + + /* Catch up */ + while ((ip > anchor) && (ref > (u8 *)source) && + unlikely(ip[-1] == ref[-1])) { + ip--; + ref--; + } + + /* Encode Literal length */ + length = (int)(ip - anchor); + token = op++; + /* check output limit */ + if (unlikely(op + length + (2 + 1 + LASTLITERALS) + + (length >> 8) > oend)) + return 0; + + if (length >= (int)RUN_MASK) { + int len; + *token = (RUN_MASK << ML_BITS); + len = length - RUN_MASK; + for (; len > 254 ; len -= 255) + *op++ = 255; + *op++ = (u8)len; + } else + *token = (length << ML_BITS); + + /* Copy Literals */ + LZ4_BLINDCOPY(anchor, op, length); +_next_match: + /* Encode Offset */ + LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref)); + + /* Start Counting */ + ip += MINMATCH; + /* MinMatch verified */ + ref += MINMATCH; + anchor = ip; + while (likely(ip < MATCHLIMIT - (STEPSIZE - 1))) { + #if LZ4_ARCH64 + u64 diff = A64(ref) ^ A64(ip); + #else + u32 diff = A32(ref) ^ A32(ip); + #endif + if (!diff) { + ip += STEPSIZE; + ref += STEPSIZE; + continue; + } + ip += LZ4_NBCOMMONBYTES(diff); + goto _endcount; + } + #if LZ4_ARCH64 + if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) { + ip += 4; + ref += 4; + } + #endif + if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) { + ip += 2; + ref += 2; + } + if ((ip < MATCHLIMIT) && (*ref == *ip)) + ip++; +_endcount: + /* Encode MatchLength */ + length = (int)(ip - anchor); + /* Check output limit */ + if (unlikely(op + (1 + LASTLITERALS) + (length >> 8) > oend)) + return 0; + if (length >= (int)ML_MASK) { + *token += ML_MASK; + length -= ML_MASK; + for (; length > 509 ; length -= 510) { + *op++ = 255; + *op++ = 255; + } + if (length > 254) { + length -= 255; + *op++ = 255; + } + *op++ = (u8)length; + } else + *token += length; + + /* Test end of chunk */ + if (ip > mflimit) { + anchor = ip; + break; + } + + /* Fill table */ + hashtable[LZ4_HASH_VALUE(ip-2)] = ip - 2 - base; + + /* Test next position */ + ref = base + hashtable[LZ4_HASH_VALUE(ip)]; + hashtable[LZ4_HASH_VALUE(ip)] = ip - base; + if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { + token = op++; + *token = 0; + goto _next_match; + } + + /* Prepare next loop */ + anchor = ip++; + forwardh = LZ4_HASH_VALUE(ip); + } + +_last_literals: + /* Encode Last Literals */ + lastrun = (int)(iend - anchor); + if (((char *)op - dest) + lastrun + 1 + + ((lastrun + 255 - RUN_MASK) / 255) > (u32)maxoutputsize) + return 0; + + if (lastrun >= (int)RUN_MASK) { + *op++ = (RUN_MASK << ML_BITS); + lastrun -= RUN_MASK; + for (; lastrun > 254 ; lastrun -= 255) + *op++ = 255; + *op++ = (u8)lastrun; + } else + *op++ = (lastrun << ML_BITS); + memcpy(op, anchor, iend - anchor); + op += iend - anchor; + + /* End */ + return (int)(((char *)op) - dest); +} + +static inline int lz4_compress64kctx(void *ctx, + const char *source, + char *dest, + int isize, + int maxoutputsize) +{ + u16 *hashtable = (u16 *)ctx; + const u8 *ip = (u8 *) source; + const u8 *anchor = ip; + const u8 *const base = ip; + const u8 *const iend = ip + isize; + const u8 *const mflimit = iend - MFLIMIT; + #define MATCHLIMIT (iend - LASTLITERALS) + + u8 *op = (u8 *) dest; + u8 *const oend = op + maxoutputsize; + int len, length; + const int skipstrength = SKIPSTRENGTH; + u32 forwardh; + int lastrun; + + /* Init */ + if (isize < MINLENGTH) + goto _last_literals; + + memset((void *)hashtable, 0, LZ4_MEM_COMPRESS); + + /* First Byte */ + ip++; + forwardh = LZ4_HASH64K_VALUE(ip); + + /* Main Loop */ + for (;;) { + int findmatchattempts = (1U << skipstrength) + 3; + const u8 *forwardip = ip; + const u8 *ref; + u8 *token; + + /* Find a match */ + do { + u32 h = forwardh; + int step = findmatchattempts++ >> skipstrength; + ip = forwardip; + forwardip = ip + step; + + if (forwardip > mflimit) + goto _last_literals; + + forwardh = LZ4_HASH64K_VALUE(forwardip); + ref = base + hashtable[h]; + hashtable[h] = (u16)(ip - base); + } while (A32(ref) != A32(ip)); + + /* Catch up */ + while ((ip > anchor) && (ref > (u8 *)source) + && (ip[-1] == ref[-1])) { + ip--; + ref--; + } + + /* Encode Literal length */ + length = (int)(ip - anchor); + token = op++; + /* Check output limit */ + if (unlikely(op + length + (2 + 1 + LASTLITERALS) + + (length >> 8) > oend)) + return 0; + if (length >= (int)RUN_MASK) { + *token = (RUN_MASK << ML_BITS); + len = length - RUN_MASK; + for (; len > 254 ; len -= 255) + *op++ = 255; + *op++ = (u8)len; + } else + *token = (length << ML_BITS); + + /* Copy Literals */ + LZ4_BLINDCOPY(anchor, op, length); + +_next_match: + /* Encode Offset */ + LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref)); + + /* Start Counting */ + ip += MINMATCH; + /* MinMatch verified */ + ref += MINMATCH; + anchor = ip; + + while (ip < MATCHLIMIT - (STEPSIZE - 1)) { + #if LZ4_ARCH64 + u64 diff = A64(ref) ^ A64(ip); + #else + u32 diff = A32(ref) ^ A32(ip); + #endif + + if (!diff) { + ip += STEPSIZE; + ref += STEPSIZE; + continue; + } + ip += LZ4_NBCOMMONBYTES(diff); + goto _endcount; + } + #if LZ4_ARCH64 + if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) { + ip += 4; + ref += 4; + } + #endif + if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) { + ip += 2; + ref += 2; + } + if ((ip < MATCHLIMIT) && (*ref == *ip)) + ip++; +_endcount: + + /* Encode MatchLength */ + len = (int)(ip - anchor); + /* Check output limit */ + if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)) + return 0; + if (len >= (int)ML_MASK) { + *token += ML_MASK; + len -= ML_MASK; + for (; len > 509 ; len -= 510) { + *op++ = 255; + *op++ = 255; + } + if (len > 254) { + len -= 255; + *op++ = 255; + } + *op++ = (u8)len; + } else + *token += len; + + /* Test end of chunk */ + if (ip > mflimit) { + anchor = ip; + break; + } + + /* Fill table */ + hashtable[LZ4_HASH64K_VALUE(ip-2)] = (u16)(ip - 2 - base); + + /* Test next position */ + ref = base + hashtable[LZ4_HASH64K_VALUE(ip)]; + hashtable[LZ4_HASH64K_VALUE(ip)] = (u16)(ip - base); + if (A32(ref) == A32(ip)) { + token = op++; + *token = 0; + goto _next_match; + } + + /* Prepare next loop */ + anchor = ip++; + forwardh = LZ4_HASH64K_VALUE(ip); + } + +_last_literals: + /* Encode Last Literals */ + lastrun = (int)(iend - anchor); + if (op + lastrun + 1 + (lastrun - RUN_MASK + 255) / 255 > oend) + return 0; + if (lastrun >= (int)RUN_MASK) { + *op++ = (RUN_MASK << ML_BITS); + lastrun -= RUN_MASK; + for (; lastrun > 254 ; lastrun -= 255) + *op++ = 255; + *op++ = (u8)lastrun; + } else + *op++ = (lastrun << ML_BITS); + memcpy(op, anchor, iend - anchor); + op += iend - anchor; + /* End */ + return (int)(((char *)op) - dest); +} + +int lz4_compress(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem) +{ + int ret = -1; + int out_len = 0; + + if (src_len < LZ4_64KLIMIT) + out_len = lz4_compress64kctx(wrkmem, src, dst, src_len, + lz4_compressbound(src_len)); + else + out_len = lz4_compressctx(wrkmem, src, dst, src_len, + lz4_compressbound(src_len)); + + if (out_len < 0) + goto exit; + + *dst_len = out_len; + + return 0; +exit: + return ret; +} +EXPORT_SYMBOL_GPL(lz4_compress); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZ4 compressor"); diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c new file mode 100644 index 000000000000..d3414eae73a1 --- /dev/null +++ b/lib/lz4/lz4_decompress.c @@ -0,0 +1,326 @@ +/* + * LZ4 Decompressor for Linux kernel + * + * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com> + * + * Based on LZ4 implementation by Yann Collet. + * + * LZ4 - Fast LZ compression algorithm + * Copyright (C) 2011-2012, Yann Collet. + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at : + * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html + * - LZ4 source repository : http://code.google.com/p/lz4/ + */ + +#ifndef STATIC +#include <linux/module.h> +#include <linux/kernel.h> +#endif +#include <linux/lz4.h> + +#include <asm/unaligned.h> + +#include "lz4defs.h" + +static int lz4_uncompress(const char *source, char *dest, int osize) +{ + const BYTE *ip = (const BYTE *) source; + const BYTE *ref; + BYTE *op = (BYTE *) dest; + BYTE * const oend = op + osize; + BYTE *cpy; + unsigned token; + size_t length; + size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; +#if LZ4_ARCH64 + size_t dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3}; +#endif + + while (1) { + + /* get runlength */ + token = *ip++; + length = (token >> ML_BITS); + if (length == RUN_MASK) { + size_t len; + + len = *ip++; + for (; len == 255; length += 255) + len = *ip++; + length += len; + } + + /* copy literals */ + cpy = op + length; + if (unlikely(cpy > oend - COPYLENGTH)) { + /* + * Error: not enough place for another match + * (min 4) + 5 literals + */ + if (cpy != oend) + goto _output_error; + + memcpy(op, ip, length); + ip += length; + break; /* EOF */ + } + LZ4_WILDCOPY(ip, op, cpy); + ip -= (op - cpy); + op = cpy; + + /* get offset */ + LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); + ip += 2; + + /* Error: offset create reference outside destination buffer */ + if (unlikely(ref < (BYTE *const) dest)) + goto _output_error; + + /* get matchlength */ + length = token & ML_MASK; + if (length == ML_MASK) { + for (; *ip == 255; length += 255) + ip++; + length += *ip++; + } + + /* copy repeated sequence */ + if (unlikely((op - ref) < STEPSIZE)) { +#if LZ4_ARCH64 + size_t dec64 = dec64table[op - ref]; +#else + const int dec64 = 0; +#endif + op[0] = ref[0]; + op[1] = ref[1]; + op[2] = ref[2]; + op[3] = ref[3]; + op += 4; + ref += 4; + ref -= dec32table[op-ref]; + PUT4(ref, op); + op += STEPSIZE - 4; + ref -= dec64; + } else { + LZ4_COPYSTEP(ref, op); + } + cpy = op + length - (STEPSIZE - 4); + if (cpy > (oend - COPYLENGTH)) { + + /* Error: request to write beyond destination buffer */ + if (cpy > oend) + goto _output_error; + LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); + while (op < cpy) + *op++ = *ref++; + op = cpy; + /* + * Check EOF (should never happen, since last 5 bytes + * are supposed to be literals) + */ + if (op == oend) + goto _output_error; + continue; + } + LZ4_SECURECOPY(ref, op, cpy); + op = cpy; /* correction */ + } + /* end of decoding */ + return (int) (((char *)ip) - source); + + /* write overflow error detected */ +_output_error: + return (int) (-(((char *)ip) - source)); +} + +static int lz4_uncompress_unknownoutputsize(const char *source, char *dest, + int isize, size_t maxoutputsize) +{ + const BYTE *ip = (const BYTE *) source; + const BYTE *const iend = ip + isize; + const BYTE *ref; + + + BYTE *op = (BYTE *) dest; + BYTE * const oend = op + maxoutputsize; + BYTE *cpy; + + size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; +#if LZ4_ARCH64 + size_t dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3}; +#endif + + /* Main Loop */ + while (ip < iend) { + + unsigned token; + size_t length; + + /* get runlength */ + token = *ip++; + length = (token >> ML_BITS); + if (length == RUN_MASK) { + int s = 255; + while ((ip < iend) && (s == 255)) { + s = *ip++; + length += s; + } + } + /* copy literals */ + cpy = op + length; + if ((cpy > oend - COPYLENGTH) || + (ip + length > iend - COPYLENGTH)) { + + if (cpy > oend) + goto _output_error;/* writes beyond buffer */ + + if (ip + length != iend) + goto _output_error;/* + * Error: LZ4 format requires + * to consume all input + * at this stage + */ + memcpy(op, ip, length); + op += length; + break;/* Necessarily EOF, due to parsing restrictions */ + } + LZ4_WILDCOPY(ip, op, cpy); + ip -= (op - cpy); + op = cpy; + + /* get offset */ + LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); + ip += 2; + if (ref < (BYTE * const) dest) + goto _output_error; + /* + * Error : offset creates reference + * outside of destination buffer + */ + + /* get matchlength */ + length = (token & ML_MASK); + if (length == ML_MASK) { + while (ip < iend) { + int s = *ip++; + length += s; + if (s == 255) + continue; + break; + } + } + + /* copy repeated sequence */ + if (unlikely((op - ref) < STEPSIZE)) { +#if LZ4_ARCH64 + size_t dec64 = dec64table[op - ref]; +#else + const int dec64 = 0; +#endif + op[0] = ref[0]; + op[1] = ref[1]; + op[2] = ref[2]; + op[3] = ref[3]; + op += 4; + ref += 4; + ref -= dec32table[op - ref]; + PUT4(ref, op); + op += STEPSIZE - 4; + ref -= dec64; + } else { + LZ4_COPYSTEP(ref, op); + } + cpy = op + length - (STEPSIZE-4); + if (cpy > oend - COPYLENGTH) { + if (cpy > oend) + goto _output_error; /* write outside of buf */ + + LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); + while (op < cpy) + *op++ = *ref++; + op = cpy; + /* + * Check EOF (should never happen, since last 5 bytes + * are supposed to be literals) + */ + if (op == oend) + goto _output_error; + continue; + } + LZ4_SECURECOPY(ref, op, cpy); + op = cpy; /* correction */ + } + /* end of decoding */ + return (int) (((char *) op) - dest); + + /* write overflow error detected */ +_output_error: + return (int) (-(((char *) ip) - source)); +} + +int lz4_decompress(const char *src, size_t *src_len, char *dest, + size_t actual_dest_len) +{ + int ret = -1; + int input_len = 0; + + input_len = lz4_uncompress(src, dest, actual_dest_len); + if (input_len < 0) + goto exit_0; + *src_len = input_len; + + return 0; +exit_0: + return ret; +} +#ifndef STATIC +EXPORT_SYMBOL_GPL(lz4_decompress); +#endif + +int lz4_decompress_unknownoutputsize(const char *src, size_t src_len, + char *dest, size_t *dest_len) +{ + int ret = -1; + int out_len = 0; + + out_len = lz4_uncompress_unknownoutputsize(src, dest, src_len, + *dest_len); + if (out_len < 0) + goto exit_0; + *dest_len = out_len; + + return 0; +exit_0: + return ret; +} +#ifndef STATIC +EXPORT_SYMBOL_GPL(lz4_decompress_unknownoutputsize); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZ4 Decompressor"); +#endif diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h new file mode 100644 index 000000000000..abcecdc2d0f2 --- /dev/null +++ b/lib/lz4/lz4defs.h @@ -0,0 +1,156 @@ +/* + * lz4defs.h -- architecture specific defines + * + * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * Detects 64 bits mode + */ +#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) \ + || defined(__ppc64__) || defined(__LP64__)) +#define LZ4_ARCH64 1 +#else +#define LZ4_ARCH64 0 +#endif + +/* + * Architecture-specific macros + */ +#define BYTE u8 +typedef struct _U16_S { u16 v; } U16_S; +typedef struct _U32_S { u32 v; } U32_S; +typedef struct _U64_S { u64 v; } U64_S; +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) \ + || defined(CONFIG_ARM) && __LINUX_ARM_ARCH__ >= 6 \ + && defined(ARM_EFFICIENT_UNALIGNED_ACCESS) + +#define A16(x) (((U16_S *)(x))->v) +#define A32(x) (((U32_S *)(x))->v) +#define A64(x) (((U64_S *)(x))->v) + +#define PUT4(s, d) (A32(d) = A32(s)) +#define PUT8(s, d) (A64(d) = A64(s)) +#define LZ4_WRITE_LITTLEENDIAN_16(p, v) \ + do { \ + A16(p) = v; \ + p += 2; \ + } while (0) +#else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ + +#define A64(x) get_unaligned((u64 *)&(((U16_S *)(x))->v)) +#define A32(x) get_unaligned((u32 *)&(((U16_S *)(x))->v)) +#define A16(x) get_unaligned((u16 *)&(((U16_S *)(x))->v)) + +#define PUT4(s, d) \ + put_unaligned(get_unaligned((const u32 *) s), (u32 *) d) +#define PUT8(s, d) \ + put_unaligned(get_unaligned((const u64 *) s), (u64 *) d) + +#define LZ4_WRITE_LITTLEENDIAN_16(p, v) \ + do { \ + put_unaligned(v, (u16 *)(p)); \ + p += 2; \ + } while (0) +#endif + +#define COPYLENGTH 8 +#define ML_BITS 4 +#define ML_MASK ((1U << ML_BITS) - 1) +#define RUN_BITS (8 - ML_BITS) +#define RUN_MASK ((1U << RUN_BITS) - 1) +#define MEMORY_USAGE 14 +#define MINMATCH 4 +#define SKIPSTRENGTH 6 +#define LASTLITERALS 5 +#define MFLIMIT (COPYLENGTH + MINMATCH) +#define MINLENGTH (MFLIMIT + 1) +#define MAXD_LOG 16 +#define MAXD (1 << MAXD_LOG) +#define MAXD_MASK (u32)(MAXD - 1) +#define MAX_DISTANCE (MAXD - 1) +#define HASH_LOG (MAXD_LOG - 1) +#define HASHTABLESIZE (1 << HASH_LOG) +#define MAX_NB_ATTEMPTS 256 +#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH) +#define LZ4_64KLIMIT ((1<<16) + (MFLIMIT - 1)) +#define HASHLOG64K ((MEMORY_USAGE - 2) + 1) +#define HASH64KTABLESIZE (1U << HASHLOG64K) +#define LZ4_HASH_VALUE(p) (((A32(p)) * 2654435761U) >> \ + ((MINMATCH * 8) - (MEMORY_USAGE-2))) +#define LZ4_HASH64K_VALUE(p) (((A32(p)) * 2654435761U) >> \ + ((MINMATCH * 8) - HASHLOG64K)) +#define HASH_VALUE(p) (((A32(p)) * 2654435761U) >> \ + ((MINMATCH * 8) - HASH_LOG)) + +#if LZ4_ARCH64/* 64-bit */ +#define STEPSIZE 8 + +#define LZ4_COPYSTEP(s, d) \ + do { \ + PUT8(s, d); \ + d += 8; \ + s += 8; \ + } while (0) + +#define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d) + +#define LZ4_SECURECOPY(s, d, e) \ + do { \ + if (d < e) { \ + LZ4_WILDCOPY(s, d, e); \ + } \ + } while (0) +#define HTYPE u32 + +#ifdef __BIG_ENDIAN +#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3) +#else +#define LZ4_NBCOMMONBYTES(val) (__builtin_ctzll(val) >> 3) +#endif + +#else /* 32-bit */ +#define STEPSIZE 4 + +#define LZ4_COPYSTEP(s, d) \ + do { \ + PUT4(s, d); \ + d += 4; \ + s += 4; \ + } while (0) + +#define LZ4_COPYPACKET(s, d) \ + do { \ + LZ4_COPYSTEP(s, d); \ + LZ4_COPYSTEP(s, d); \ + } while (0) + +#define LZ4_SECURECOPY LZ4_WILDCOPY +#define HTYPE const u8* + +#ifdef __BIG_ENDIAN +#define LZ4_NBCOMMONBYTES(val) (__builtin_clz(val) >> 3) +#else +#define LZ4_NBCOMMONBYTES(val) (__builtin_ctz(val) >> 3) +#endif + +#endif + +#define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ + (d = s - get_unaligned_le16(p)) + +#define LZ4_WILDCOPY(s, d, e) \ + do { \ + LZ4_COPYPACKET(s, d); \ + } while (d < e) + +#define LZ4_BLINDCOPY(s, d, l) \ + do { \ + u8 *e = (d) + l; \ + LZ4_WILDCOPY(s, d, e); \ + d = e; \ + } while (0) diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c new file mode 100644 index 000000000000..eb1a74f5e368 --- /dev/null +++ b/lib/lz4/lz4hc_compress.c @@ -0,0 +1,539 @@ +/* + * LZ4 HC - High Compression Mode of LZ4 + * Copyright (C) 2011-2012, Yann Collet. + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at : + * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html + * - LZ4 source repository : http://code.google.com/p/lz4/ + * + * Changed for kernel use by: + * Chanho Min <chanho.min@lge.com> + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/lz4.h> +#include <asm/unaligned.h> +#include "lz4defs.h" + +struct lz4hc_data { + const u8 *base; + HTYPE hashtable[HASHTABLESIZE]; + u16 chaintable[MAXD]; + const u8 *nexttoupdate; +} __attribute__((__packed__)); + +static inline int lz4hc_init(struct lz4hc_data *hc4, const u8 *base) +{ + memset((void *)hc4->hashtable, 0, sizeof(hc4->hashtable)); + memset(hc4->chaintable, 0xFF, sizeof(hc4->chaintable)); + +#if LZ4_ARCH64 + hc4->nexttoupdate = base + 1; +#else + hc4->nexttoupdate = base; +#endif + hc4->base = base; + return 1; +} + +/* Update chains up to ip (excluded) */ +static inline void lz4hc_insert(struct lz4hc_data *hc4, const u8 *ip) +{ + u16 *chaintable = hc4->chaintable; + HTYPE *hashtable = hc4->hashtable; +#if LZ4_ARCH64 + const BYTE * const base = hc4->base; +#else + const int base = 0; +#endif + + while (hc4->nexttoupdate < ip) { + const u8 *p = hc4->nexttoupdate; + size_t delta = p - (hashtable[HASH_VALUE(p)] + base); + if (delta > MAX_DISTANCE) + delta = MAX_DISTANCE; + chaintable[(size_t)(p) & MAXD_MASK] = (u16)delta; + hashtable[HASH_VALUE(p)] = (p) - base; + hc4->nexttoupdate++; + } +} + +static inline size_t lz4hc_commonlength(const u8 *p1, const u8 *p2, + const u8 *const matchlimit) +{ + const u8 *p1t = p1; + + while (p1t < matchlimit - (STEPSIZE - 1)) { +#if LZ4_ARCH64 + u64 diff = A64(p2) ^ A64(p1t); +#else + u32 diff = A32(p2) ^ A32(p1t); +#endif + if (!diff) { + p1t += STEPSIZE; + p2 += STEPSIZE; + continue; + } + p1t += LZ4_NBCOMMONBYTES(diff); + return p1t - p1; + } +#if LZ4_ARCH64 + if ((p1t < (matchlimit-3)) && (A32(p2) == A32(p1t))) { + p1t += 4; + p2 += 4; + } +#endif + + if ((p1t < (matchlimit - 1)) && (A16(p2) == A16(p1t))) { + p1t += 2; + p2 += 2; + } + if ((p1t < matchlimit) && (*p2 == *p1t)) + p1t++; + return p1t - p1; +} + +static inline int lz4hc_insertandfindbestmatch(struct lz4hc_data *hc4, + const u8 *ip, const u8 *const matchlimit, const u8 **matchpos) +{ + u16 *const chaintable = hc4->chaintable; + HTYPE *const hashtable = hc4->hashtable; + const u8 *ref; +#if LZ4_ARCH64 + const BYTE * const base = hc4->base; +#else + const int base = 0; +#endif + int nbattempts = MAX_NB_ATTEMPTS; + size_t repl = 0, ml = 0; + u16 delta; + + /* HC4 match finder */ + lz4hc_insert(hc4, ip); + ref = hashtable[HASH_VALUE(ip)] + base; + + /* potential repetition */ + if (ref >= ip-4) { + /* confirmed */ + if (A32(ref) == A32(ip)) { + delta = (u16)(ip-ref); + repl = ml = lz4hc_commonlength(ip + MINMATCH, + ref + MINMATCH, matchlimit) + MINMATCH; + *matchpos = ref; + } + ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK]; + } + + while ((ref >= ip - MAX_DISTANCE) && nbattempts) { + nbattempts--; + if (*(ref + ml) == *(ip + ml)) { + if (A32(ref) == A32(ip)) { + size_t mlt = + lz4hc_commonlength(ip + MINMATCH, + ref + MINMATCH, matchlimit) + MINMATCH; + if (mlt > ml) { + ml = mlt; + *matchpos = ref; + } + } + } + ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK]; + } + + /* Complete table */ + if (repl) { + const BYTE *ptr = ip; + const BYTE *end; + end = ip + repl - (MINMATCH-1); + /* Pre-Load */ + while (ptr < end - delta) { + chaintable[(size_t)(ptr) & MAXD_MASK] = delta; + ptr++; + } + do { + chaintable[(size_t)(ptr) & MAXD_MASK] = delta; + /* Head of chain */ + hashtable[HASH_VALUE(ptr)] = (ptr) - base; + ptr++; + } while (ptr < end); + hc4->nexttoupdate = end; + } + + return (int)ml; +} + +static inline int lz4hc_insertandgetwidermatch(struct lz4hc_data *hc4, + const u8 *ip, const u8 *startlimit, const u8 *matchlimit, int longest, + const u8 **matchpos, const u8 **startpos) +{ + u16 *const chaintable = hc4->chaintable; + HTYPE *const hashtable = hc4->hashtable; +#if LZ4_ARCH64 + const BYTE * const base = hc4->base; +#else + const int base = 0; +#endif + const u8 *ref; + int nbattempts = MAX_NB_ATTEMPTS; + int delta = (int)(ip - startlimit); + + /* First Match */ + lz4hc_insert(hc4, ip); + ref = hashtable[HASH_VALUE(ip)] + base; + + while ((ref >= ip - MAX_DISTANCE) && (ref >= hc4->base) + && (nbattempts)) { + nbattempts--; + if (*(startlimit + longest) == *(ref - delta + longest)) { + if (A32(ref) == A32(ip)) { + const u8 *reft = ref + MINMATCH; + const u8 *ipt = ip + MINMATCH; + const u8 *startt = ip; + + while (ipt < matchlimit-(STEPSIZE - 1)) { + #if LZ4_ARCH64 + u64 diff = A64(reft) ^ A64(ipt); + #else + u32 diff = A32(reft) ^ A32(ipt); + #endif + + if (!diff) { + ipt += STEPSIZE; + reft += STEPSIZE; + continue; + } + ipt += LZ4_NBCOMMONBYTES(diff); + goto _endcount; + } + #if LZ4_ARCH64 + if ((ipt < (matchlimit - 3)) + && (A32(reft) == A32(ipt))) { + ipt += 4; + reft += 4; + } + ipt += 2; + #endif + if ((ipt < (matchlimit - 1)) + && (A16(reft) == A16(ipt))) { + reft += 2; + } + if ((ipt < matchlimit) && (*reft == *ipt)) + ipt++; +_endcount: + reft = ref; + + while ((startt > startlimit) + && (reft > hc4->base) + && (startt[-1] == reft[-1])) { + startt--; + reft--; + } + + if ((ipt - startt) > longest) { + longest = (int)(ipt - startt); + *matchpos = reft; + *startpos = startt; + } + } + } + ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK]; + } + return longest; +} + +static inline int lz4_encodesequence(const u8 **ip, u8 **op, const u8 **anchor, + int ml, const u8 *ref) +{ + int length, len; + u8 *token; + + /* Encode Literal length */ + length = (int)(*ip - *anchor); + token = (*op)++; + if (length >= (int)RUN_MASK) { + *token = (RUN_MASK << ML_BITS); + len = length - RUN_MASK; + for (; len > 254 ; len -= 255) + *(*op)++ = 255; + *(*op)++ = (u8)len; + } else + *token = (length << ML_BITS); + + /* Copy Literals */ + LZ4_BLINDCOPY(*anchor, *op, length); + + /* Encode Offset */ + LZ4_WRITE_LITTLEENDIAN_16(*op, (u16)(*ip - ref)); + + /* Encode MatchLength */ + len = (int)(ml - MINMATCH); + if (len >= (int)ML_MASK) { + *token += ML_MASK; + len -= ML_MASK; + for (; len > 509 ; len -= 510) { + *(*op)++ = 255; + *(*op)++ = 255; + } + if (len > 254) { + len -= 255; + *(*op)++ = 255; + } + *(*op)++ = (u8)len; + } else + *token += len; + + /* Prepare next loop */ + *ip += ml; + *anchor = *ip; + + return 0; +} + +static int lz4_compresshcctx(struct lz4hc_data *ctx, + const char *source, + char *dest, + int isize) +{ + const u8 *ip = (const u8 *)source; + const u8 *anchor = ip; + const u8 *const iend = ip + isize; + const u8 *const mflimit = iend - MFLIMIT; + const u8 *const matchlimit = (iend - LASTLITERALS); + + u8 *op = (u8 *)dest; + + int ml, ml2, ml3, ml0; + const u8 *ref = NULL; + const u8 *start2 = NULL; + const u8 *ref2 = NULL; + const u8 *start3 = NULL; + const u8 *ref3 = NULL; + const u8 *start0; + const u8 *ref0; + int lastrun; + + ip++; + + /* Main Loop */ + while (ip < mflimit) { + ml = lz4hc_insertandfindbestmatch(ctx, ip, matchlimit, (&ref)); + if (!ml) { + ip++; + continue; + } + + /* saved, in case we would skip too much */ + start0 = ip; + ref0 = ref; + ml0 = ml; +_search2: + if (ip+ml < mflimit) + ml2 = lz4hc_insertandgetwidermatch(ctx, ip + ml - 2, + ip + 1, matchlimit, ml, &ref2, &start2); + else + ml2 = ml; + /* No better match */ + if (ml2 == ml) { + lz4_encodesequence(&ip, &op, &anchor, ml, ref); + continue; + } + + if (start0 < ip) { + /* empirical */ + if (start2 < ip + ml0) { + ip = start0; + ref = ref0; + ml = ml0; + } + } + /* + * Here, start0==ip + * First Match too small : removed + */ + if ((start2 - ip) < 3) { + ml = ml2; + ip = start2; + ref = ref2; + goto _search2; + } + +_search3: + /* + * Currently we have : + * ml2 > ml1, and + * ip1+3 <= ip2 (usually < ip1+ml1) + */ + if ((start2 - ip) < OPTIMAL_ML) { + int correction; + int new_ml = ml; + if (new_ml > OPTIMAL_ML) + new_ml = OPTIMAL_ML; + if (ip + new_ml > start2 + ml2 - MINMATCH) + new_ml = (int)(start2 - ip) + ml2 - MINMATCH; + correction = new_ml - (int)(start2 - ip); + if (correction > 0) { + start2 += correction; + ref2 += correction; + ml2 -= correction; + } + } + /* + * Now, we have start2 = ip+new_ml, + * with new_ml=min(ml, OPTIMAL_ML=18) + */ + if (start2 + ml2 < mflimit) + ml3 = lz4hc_insertandgetwidermatch(ctx, + start2 + ml2 - 3, start2, matchlimit, + ml2, &ref3, &start3); + else + ml3 = ml2; + + /* No better match : 2 sequences to encode */ + if (ml3 == ml2) { + /* ip & ref are known; Now for ml */ + if (start2 < ip+ml) + ml = (int)(start2 - ip); + + /* Now, encode 2 sequences */ + lz4_encodesequence(&ip, &op, &anchor, ml, ref); + ip = start2; + lz4_encodesequence(&ip, &op, &anchor, ml2, ref2); + continue; + } + + /* Not enough space for match 2 : remove it */ + if (start3 < ip + ml + 3) { + /* + * can write Seq1 immediately ==> Seq2 is removed, + * so Seq3 becomes Seq1 + */ + if (start3 >= (ip + ml)) { + if (start2 < ip + ml) { + int correction = + (int)(ip + ml - start2); + start2 += correction; + ref2 += correction; + ml2 -= correction; + if (ml2 < MINMATCH) { + start2 = start3; + ref2 = ref3; + ml2 = ml3; + } + } + + lz4_encodesequence(&ip, &op, &anchor, ml, ref); + ip = start3; + ref = ref3; + ml = ml3; + + start0 = start2; + ref0 = ref2; + ml0 = ml2; + goto _search2; + } + + start2 = start3; + ref2 = ref3; + ml2 = ml3; + goto _search3; + } + + /* + * OK, now we have 3 ascending matches; let's write at least + * the first one ip & ref are known; Now for ml + */ + if (start2 < ip + ml) { + if ((start2 - ip) < (int)ML_MASK) { + int correction; + if (ml > OPTIMAL_ML) + ml = OPTIMAL_ML; + if (ip + ml > start2 + ml2 - MINMATCH) + ml = (int)(start2 - ip) + ml2 + - MINMATCH; + correction = ml - (int)(start2 - ip); + if (correction > 0) { + start2 += correction; + ref2 += correction; + ml2 -= correction; + } + } else + ml = (int)(start2 - ip); + } + lz4_encodesequence(&ip, &op, &anchor, ml, ref); + + ip = start2; + ref = ref2; + ml = ml2; + + start2 = start3; + ref2 = ref3; + ml2 = ml3; + + goto _search3; + } + + /* Encode Last Literals */ + lastrun = (int)(iend - anchor); + if (lastrun >= (int)RUN_MASK) { + *op++ = (RUN_MASK << ML_BITS); + lastrun -= RUN_MASK; + for (; lastrun > 254 ; lastrun -= 255) + *op++ = 255; + *op++ = (u8) lastrun; + } else + *op++ = (lastrun << ML_BITS); + memcpy(op, anchor, iend - anchor); + op += iend - anchor; + /* End */ + return (int) (((char *)op) - dest); +} + +int lz4hc_compress(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem) +{ + int ret = -1; + int out_len = 0; + + struct lz4hc_data *hc4 = (struct lz4hc_data *)wrkmem; + lz4hc_init(hc4, (const u8 *)src); + out_len = lz4_compresshcctx((struct lz4hc_data *)hc4, (const u8 *)src, + (char *)dst, (int)src_len); + + if (out_len < 0) + goto exit; + + *dst_len = out_len; + return 0; + +exit: + return ret; +} +EXPORT_SYMBOL_GPL(lz4hc_compress); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZ4HC compressor"); diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c new file mode 100644 index 000000000000..b2bfa1057f7b --- /dev/null +++ b/lib/percpu-refcount.c @@ -0,0 +1,251 @@ +#define pr_fmt(fmt) "%s: " fmt "\n", __func__ + +#include <linux/kernel.h> +#include <linux/jiffies.h> +#include <linux/percpu-refcount.h> +#include <linux/rcupdate.h> + +/* + * A percpu refcount can be in 4 different modes. The state is tracked in the + * low two bits of percpu_ref->pcpu_count: + * + * PCPU_REF_NONE - the initial state, no percpu counters allocated. + * + * PCPU_REF_PTR - using percpu counters for the refcount. + * + * PCPU_REF_DYING - we're shutting down so get()/put() should use the embedded + * atomic counter, but we're not finished updating the atomic counter from the + * percpu counters - this means that percpu_ref_put() can't check for the ref + * hitting 0 yet. + * + * PCPU_REF_DEAD - we've finished the teardown sequence, percpu_ref_put() should + * now check for the ref hitting 0. + * + * In PCPU_REF_NONE mode, we need to count the number of times percpu_ref_get() + * is called; this is done with the high bits of the raw atomic counter. We also + * track the time, in jiffies, when the get count last wrapped - this is done + * with the remaining bits of percpu_ref->percpu_count. + * + * So, when percpu_ref_get() is called it increments the get count and checks if + * it wrapped; if it did, it checks if the last time it wrapped was less than + * one second ago; if so, we want to allocate percpu counters. + * + * PCPU_COUNT_BITS determines the threshold where we convert to percpu: of the + * raw 64 bit counter, we use PCPU_COUNT_BITS for the refcount, and the + * remaining (high) bits to count the number of times percpu_ref_get() has been + * called. It's currently (completely arbitrarily) 16384 times in one second. + * + * Percpu mode (PCPU_REF_PTR): + * + * In percpu mode all we do on get and put is increment or decrement the cpu + * local counter, which is a 32 bit unsigned int. + * + * Note that all the gets() could be happening on one cpu, and all the puts() on + * another - the individual cpu counters can wrap (potentially many times). + * + * But this is fine because we don't need to check for the ref hitting 0 in + * percpu mode; before we set the state to PCPU_REF_DEAD we simply sum up all + * the percpu counters and add them to the atomic counter. Since addition and + * subtraction in modular arithmatic is still associative, the result will be + * correct. + */ + +#define PCPU_COUNT_BITS 50 +#define PCPU_COUNT_MASK ((1LL << PCPU_COUNT_BITS) - 1) + +#define PCPU_STATUS_BITS 2 +#define PCPU_STATUS_MASK ((1 << PCPU_STATUS_BITS) - 1) + +#define PCPU_REF_PTR 0 +#define PCPU_REF_NONE 1 +#define PCPU_REF_DYING 2 +#define PCPU_REF_DEAD 3 + +#define REF_STATUS(count) (count & PCPU_STATUS_MASK) + +/** + * percpu_ref_init - initialize a dynamic percpu refcount + * + * Initializes the refcount in single atomic counter mode with a refcount of 1; + * analagous to atomic_set(ref, 1). + */ +void percpu_ref_init(struct percpu_ref *ref) +{ + unsigned long now = jiffies; + + atomic64_set(&ref->count, 1); + + now <<= PCPU_STATUS_BITS; + now |= PCPU_REF_NONE; + + ref->pcpu_count = now; +} + +static void percpu_ref_alloc(struct percpu_ref *ref, unsigned long pcpu_count) +{ + unsigned long new, now = jiffies; + + now <<= PCPU_STATUS_BITS; + now |= PCPU_REF_NONE; + + if (now - pcpu_count <= HZ << PCPU_STATUS_BITS) { + rcu_read_unlock(); + new = (unsigned long) alloc_percpu(unsigned); + rcu_read_lock(); + + if (!new) + goto update_time; + + BUG_ON(new & PCPU_STATUS_MASK); + + if (cmpxchg(&ref->pcpu_count, pcpu_count, new) != pcpu_count) + free_percpu((void __percpu *) new); + else + pr_debug("created"); + } else { +update_time: + new = now; + cmpxchg(&ref->pcpu_count, pcpu_count, new); + } +} + +void __percpu_ref_get(struct percpu_ref *ref, bool alloc) +{ + unsigned long pcpu_count; + uint64_t v; + + pcpu_count = ACCESS_ONCE(ref->pcpu_count); + + if (REF_STATUS(pcpu_count) == PCPU_REF_PTR) { + /* for rcu - we're not using rcu_dereference() */ + smp_read_barrier_depends(); + __this_cpu_inc(*((unsigned __percpu *) pcpu_count)); + } else { + v = atomic64_add_return(1 + (1ULL << PCPU_COUNT_BITS), + &ref->count); + + /* + * The high bits of the counter count the number of gets() that + * have occured; we check for overflow to call + * percpu_ref_alloc() every (1 << (64 - PCPU_COUNT_BITS)) + * iterations. + */ + + if (!(v >> PCPU_COUNT_BITS) && + REF_STATUS(pcpu_count) == PCPU_REF_NONE && alloc) + percpu_ref_alloc(ref, pcpu_count); + } +} + +/** + * percpu_ref_put - decrement a dynamic percpu refcount + * + * Returns true if the result is 0, otherwise false; only checks for the ref + * hitting 0 after percpu_ref_kill() has been called. Analagous to + * atomic_dec_and_test(). + */ +int percpu_ref_put(struct percpu_ref *ref) +{ + unsigned long pcpu_count; + uint64_t v; + int ret = 0; + + rcu_read_lock(); + + pcpu_count = ACCESS_ONCE(ref->pcpu_count); + + switch (REF_STATUS(pcpu_count)) { + case PCPU_REF_PTR: + /* for rcu - we're not using rcu_dereference() */ + smp_read_barrier_depends(); + __this_cpu_dec(*((unsigned __percpu *) pcpu_count)); + break; + case PCPU_REF_NONE: + case PCPU_REF_DYING: + atomic64_dec(&ref->count); + break; + case PCPU_REF_DEAD: + v = atomic64_dec_return(&ref->count); + v &= PCPU_COUNT_MASK; + + ret = v == 0; + break; + } + + rcu_read_unlock(); + + return ret; +} + +/** + * percpu_ref_kill - prepare a dynamic percpu refcount for teardown + * + * Must be called before dropping the initial ref, so that percpu_ref_put() + * knows to check for the refcount hitting 0. If the refcount was in percpu + * mode, converts it back to single atomic counter mode. + * + * Returns true the first time called on @ref and false if @ref is already + * shutting down, so it may be used by the caller for synchronizing other parts + * of a two stage shutdown. + */ +int percpu_ref_kill(struct percpu_ref *ref) +{ + unsigned long old, new, status, pcpu_count; + + pcpu_count = ACCESS_ONCE(ref->pcpu_count); + + do { + status = REF_STATUS(pcpu_count); + + switch (status) { + case PCPU_REF_PTR: + new = PCPU_REF_DYING; + break; + case PCPU_REF_NONE: + new = PCPU_REF_DEAD; + break; + case PCPU_REF_DYING: + case PCPU_REF_DEAD: + return 0; + } + + old = pcpu_count; + pcpu_count = cmpxchg(&ref->pcpu_count, old, new); + } while (pcpu_count != old); + + if (status == PCPU_REF_PTR) { + unsigned count = 0, cpu; + + synchronize_rcu(); + + for_each_possible_cpu(cpu) + count += *per_cpu_ptr((unsigned __percpu *)pcpu_count, + cpu); + + pr_debug("global %lli pcpu %i", + atomic64_read(&ref->count) & PCPU_COUNT_MASK, + (int) count); + + atomic64_add((int) count, &ref->count); + smp_wmb(); + /* Between setting global count and setting PCPU_REF_DEAD */ + ref->pcpu_count = PCPU_REF_DEAD; + + free_percpu((unsigned __percpu *) pcpu_count); + } + + return 1; +} + +/** + * percpu_ref_dead - check if a dynamic percpu refcount is shutting down + * + * Returns true if percpu_ref_kill() has been called on @ref, false otherwise. + */ +int percpu_ref_dead(struct percpu_ref *ref) +{ + unsigned status = REF_STATUS(ref->pcpu_count); + + return status == PCPU_REF_DYING || + status == PCPU_REF_DEAD; +} diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c index af38aedbd874..122f02f9941b 100644 --- a/lib/rbtree_test.c +++ b/lib/rbtree_test.c @@ -117,8 +117,7 @@ static int black_path_count(struct rb_node *rb) static void check(int nr_nodes) { struct rb_node *rb; - int count = 0; - int blacks = 0; + int count = 0, blacks = 0; u32 prev_key = 0; for (rb = rb_first(&root); rb; rb = rb_next(rb)) { @@ -134,7 +133,9 @@ static void check(int nr_nodes) prev_key = node->key; count++; } + WARN_ON_ONCE(count != nr_nodes); + WARN_ON_ONCE(count < (1 << black_path_count(rb_last(&root))) - 1); } static void check_augmented(int nr_nodes) @@ -148,7 +149,7 @@ static void check_augmented(int nr_nodes) } } -static int rbtree_test_init(void) +static int __init rbtree_test_init(void) { int i, j; cycles_t time1, time2, time; @@ -221,7 +222,7 @@ static int rbtree_test_init(void) return -EAGAIN; /* Fail will directly unload the module */ } -static void rbtree_test_exit(void) +static void __exit rbtree_test_exit(void) { printk(KERN_ALERT "test exit\n"); } diff --git a/lib/show_mem.c b/lib/show_mem.c index 4407f8c9b1f7..b7c72311ad0c 100644 --- a/lib/show_mem.c +++ b/lib/show_mem.c @@ -18,6 +18,9 @@ void show_mem(unsigned int filter) printk("Mem-Info:\n"); show_free_areas(filter); + if (filter & SHOW_MEM_FILTER_PAGE_COUNT) + return; + for_each_online_pgdat(pgdat) { unsigned long i, flags; diff --git a/lib/string_helpers.c b/lib/string_helpers.c index 1cffc223bff5..ed5c1454dd62 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -2,10 +2,12 @@ * Helpers for formatting and printing strings * * Copyright 31 August 2008 James Bottomley + * Copyright (C) 2013, Intel Corporation */ #include <linux/kernel.h> #include <linux/math64.h> #include <linux/export.h> +#include <linux/ctype.h> #include <linux/string_helpers.h> /** @@ -66,3 +68,134 @@ int string_get_size(u64 size, const enum string_size_units units, return 0; } EXPORT_SYMBOL(string_get_size); + +static bool unescape_space(char **src, char **dst) +{ + char *p = *dst, *q = *src; + + switch (*q) { + case 'n': + *p = '\n'; + break; + case 'r': + *p = '\r'; + break; + case 't': + *p = '\t'; + break; + case 'v': + *p = '\v'; + break; + case 'f': + *p = '\f'; + break; + default: + return false; + } + *dst += 1; + *src += 1; + return true; +} + +static bool unescape_octal(char **src, char **dst) +{ + char *p = *dst, *q = *src; + u8 num; + + if (isodigit(*q) == 0) + return false; + + num = (*q++) & 7; + while (num < 32 && isodigit(*q) && (q - *src < 3)) { + num <<= 3; + num += (*q++) & 7; + } + *p = num; + *dst += 1; + *src = q; + return true; +} + +static bool unescape_hex(char **src, char **dst) +{ + char *p = *dst, *q = *src; + int digit; + u8 num; + + if (*q++ != 'x') + return false; + + num = digit = hex_to_bin(*q++); + if (digit < 0) + return false; + + digit = hex_to_bin(*q); + if (digit >= 0) { + q++; + num = (num << 4) | digit; + } + *p = num; + *dst += 1; + *src = q; + return true; +} + +static bool unescape_special(char **src, char **dst) +{ + char *p = *dst, *q = *src; + + switch (*q) { + case '\"': + *p = '\"'; + break; + case '\\': + *p = '\\'; + break; + case 'a': + *p = '\a'; + break; + case 'e': + *p = '\e'; + break; + default: + return false; + } + *dst += 1; + *src += 1; + return true; +} + +int string_unescape(char *src, char *dst, size_t size, unsigned int flags) +{ + char *out = dst; + + while (*src && --size) { + if (src[0] == '\\' && src[1] != '\0' && size > 1) { + src++; + size--; + + if (flags & UNESCAPE_SPACE && + unescape_space(&src, &out)) + continue; + + if (flags & UNESCAPE_OCTAL && + unescape_octal(&src, &out)) + continue; + + if (flags & UNESCAPE_HEX && + unescape_hex(&src, &out)) + continue; + + if (flags & UNESCAPE_SPECIAL && + unescape_special(&src, &out)) + continue; + + *out++ = '\\'; + } + *out++ = *src++; + } + *out = '\0'; + + return out - dst; +} +EXPORT_SYMBOL(string_unescape); diff --git a/lib/test-string_helpers.c b/lib/test-string_helpers.c new file mode 100644 index 000000000000..6ac48de04c0e --- /dev/null +++ b/lib/test-string_helpers.c @@ -0,0 +1,103 @@ +/* + * Test cases for lib/string_helpers.c module. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/random.h> +#include <linux/string.h> +#include <linux/string_helpers.h> + +struct test_string { + const char *in; + const char *out; + unsigned int flags; +}; + +static const struct test_string strings[] __initconst = { + { + .in = "\\f\\ \\n\\r\\t\\v", + .out = "\f\\ \n\r\t\v", + .flags = UNESCAPE_SPACE, + }, + { + .in = "\\40\\1\\387\\0064\\05\\040\\8a\\110\\777", + .out = " \001\00387\0064\005 \\8aH?7", + .flags = UNESCAPE_OCTAL, + }, + { + .in = "\\xv\\xa\\x2c\\xD\\x6f2", + .out = "\\xv\n,\ro2", + .flags = UNESCAPE_HEX, + }, + { + .in = "\\h\\\\\\\"\\a\\e\\", + .out = "\\h\\\"\a\e\\", + .flags = UNESCAPE_SPECIAL, + }, +}; + +static void __init test_string_unescape(unsigned int flags, bool inplace) +{ + char in[256]; + char out_test[256]; + char out_real[256]; + int i, p = 0, q_test = 0, q_real = sizeof(out_real); + + for (i = 0; i < ARRAY_SIZE(strings); i++) { + const char *s = strings[i].in; + int len = strlen(strings[i].in); + + /* Copy string to in buffer */ + memcpy(&in[p], s, len); + p += len; + + /* Copy expected result for given flags */ + if (flags & strings[i].flags) { + s = strings[i].out; + len = strlen(strings[i].out); + } + memcpy(&out_test[q_test], s, len); + q_test += len; + } + in[p++] = '\0'; + + /* Call string_unescape and compare result */ + if (inplace) { + memcpy(out_real, in, p); + if (flags == UNESCAPE_ANY) + q_real = string_unescape_any_inplace(out_real); + else + q_real = string_unescape_inplace(out_real, flags); + } else if (flags == UNESCAPE_ANY) { + q_real = string_unescape_any(in, out_real, q_real); + } else { + q_real = string_unescape(in, out_real, q_real, flags); + } + + if (q_real != q_test || memcmp(out_test, out_real, q_test)) { + pr_warn("Test failed: flags = %u\n", flags); + print_hex_dump(KERN_WARNING, "Input: ", + DUMP_PREFIX_NONE, 16, 1, in, p - 1, true); + print_hex_dump(KERN_WARNING, "Expected: ", + DUMP_PREFIX_NONE, 16, 1, out_test, q_test, true); + print_hex_dump(KERN_WARNING, "Got: ", + DUMP_PREFIX_NONE, 16, 1, out_real, q_real, true); + } +} + +static int __init test_string_helpers_init(void) +{ + unsigned int i; + + pr_info("Running tests...\n"); + for (i = 0; i < UNESCAPE_ANY + 1; i++) + test_string_unescape(i, false); + test_string_unescape(get_random_int() % (UNESCAPE_ANY + 1), true); + + return -EINVAL; +} +module_init(test_string_helpers_init); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/arch/s390/lib/usercopy.c b/lib/usercopy.c index 14b363fec8a2..4f5b1ddbcd25 100644 --- a/arch/s390/lib/usercopy.c +++ b/lib/usercopy.c @@ -1,5 +1,6 @@ -#include <linux/module.h> +#include <linux/export.h> #include <linux/bug.h> +#include <linux/uaccess.h> void copy_from_user_overflow(void) { diff --git a/lib/uuid.c b/lib/uuid.c index 52a6fe6387de..398821e4dce1 100644 --- a/lib/uuid.c +++ b/lib/uuid.c @@ -25,13 +25,7 @@ static void __uuid_gen_common(__u8 b[16]) { - int i; - u32 r; - - for (i = 0; i < 4; i++) { - r = random32(); - memcpy(b + i * 4, &r, 4); - } + prandom_bytes(b, 16); /* reversion 0b10 */ b[8] = (b[8] & 0x3F) | 0x80; } diff --git a/mm/Kconfig b/mm/Kconfig index 3bea74f1ccfe..7deac661a418 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -471,3 +471,15 @@ config FRONTSWAP and swap data is stored as normal on the matching swap device. If unsure, say Y to enable frontswap. + +config MEM_SOFT_DIRTY + bool "Track memory changes" + depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY + select PROC_PAGE_MONITOR + help + This option enables memory changes tracking by introducing a + soft-dirty bit on pte-s. This bit it set when someone writes + into a page just as regular dirty bit, but unlike the latter + it can be cleared by hands. + + See Documentation/vm/soft-dirty.txt for more details. diff --git a/mm/Makefile b/mm/Makefile index 3a4628751f89..72c5acb9345f 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -50,7 +50,7 @@ obj-$(CONFIG_FS_XIP) += filemap_xip.o obj-$(CONFIG_MIGRATION) += migrate.o obj-$(CONFIG_QUICKLIST) += quicklist.o obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o -obj-$(CONFIG_MEMCG) += memcontrol.o page_cgroup.o +obj-$(CONFIG_MEMCG) += memcontrol.o page_cgroup.o vmpressure.o obj-$(CONFIG_CGROUP_HUGETLB) += hugetlb_cgroup.o obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c index 07dbc8ec46cf..2c8ce496804f 100644 --- a/mm/balloon_compaction.c +++ b/mm/balloon_compaction.c @@ -242,6 +242,7 @@ bool balloon_page_isolate(struct page *page) if (__is_movable_balloon_page(page) && page_count(page) == 2) { __isolate_balloon_page(page); + balloon_event_count(COMPACTBALLOONISOLATED); unlock_page(page); return true; } @@ -265,6 +266,7 @@ void balloon_page_putback(struct page *page) __putback_balloon_page(page); /* drop the extra ref count taken for page isolation */ put_page(page); + balloon_event_count(COMPACTBALLOONRETURNED); } else { WARN_ON(1); dump_page(page); diff --git a/mm/bounce.c b/mm/bounce.c index f5326b24d65d..708c1e99f57b 100644 --- a/mm/bounce.c +++ b/mm/bounce.c @@ -147,12 +147,14 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool, int err) bio_put(bio); } -static void bounce_end_io_write(struct bio *bio, int err) +static void bounce_end_io_write(struct bio *bio, int err, + struct batch_complete *batch) { bounce_end_io(bio, page_pool, err); } -static void bounce_end_io_write_isa(struct bio *bio, int err) +static void bounce_end_io_write_isa(struct bio *bio, int err, + struct batch_complete *batch) { bounce_end_io(bio, isa_page_pool, err); @@ -168,12 +170,14 @@ static void __bounce_end_io_read(struct bio *bio, mempool_t *pool, int err) bounce_end_io(bio, pool, err); } -static void bounce_end_io_read(struct bio *bio, int err) +static void bounce_end_io_read(struct bio *bio, int err, + struct batch_complete *batch) { __bounce_end_io_read(bio, page_pool, err); } -static void bounce_end_io_read_isa(struct bio *bio, int err) +static void bounce_end_io_read_isa(struct bio *bio, int err, + struct batch_complete *batch) { __bounce_end_io_read(bio, isa_page_pool, err); } @@ -181,32 +185,13 @@ static void bounce_end_io_read_isa(struct bio *bio, int err) #ifdef CONFIG_NEED_BOUNCE_POOL static int must_snapshot_stable_pages(struct request_queue *q, struct bio *bio) { - struct page *page; - struct backing_dev_info *bdi; - struct address_space *mapping; - struct bio_vec *from; - int i; - if (bio_data_dir(bio) != WRITE) return 0; if (!bdi_cap_stable_pages_required(&q->backing_dev_info)) return 0; - /* - * Based on the first page that has a valid mapping, decide whether or - * not we have to employ bounce buffering to guarantee stable pages. - */ - bio_for_each_segment(from, bio, i) { - page = from->bv_page; - mapping = page_mapping(page); - if (!mapping) - continue; - bdi = mapping->backing_dev_info; - return mapping->host->i_sb->s_flags & MS_SNAP_STABLE; - } - - return 0; + return test_bit(BIO_SNAP_STABLE, &bio->bi_flags); } #else static int must_snapshot_stable_pages(struct request_queue *q, struct bio *bio) diff --git a/mm/cleancache.c b/mm/cleancache.c index d76ba74be2d0..5875f48ce279 100644 --- a/mm/cleancache.c +++ b/mm/cleancache.c @@ -19,20 +19,10 @@ #include <linux/cleancache.h> /* - * This global enablement flag may be read thousands of times per second - * by cleancache_get/put/invalidate even on systems where cleancache_ops - * is not claimed (e.g. cleancache is config'ed on but remains - * disabled), so is preferred to the slower alternative: a function - * call that checks a non-global. - */ -int cleancache_enabled __read_mostly; -EXPORT_SYMBOL(cleancache_enabled); - -/* * cleancache_ops is set by cleancache_ops_register to contain the pointers * to the cleancache "backend" implementation functions. */ -static struct cleancache_ops cleancache_ops __read_mostly; +static struct cleancache_ops *cleancache_ops __read_mostly; /* * Counters available via /sys/kernel/debug/frontswap (if debugfs is @@ -45,15 +35,101 @@ static u64 cleancache_puts; static u64 cleancache_invalidates; /* - * register operations for cleancache, returning previous thus allowing - * detection of multiple backends and possible nesting + * When no backend is registered all calls to init_fs and init_shared_fs + * are registered and fake poolids (FAKE_FS_POOLID_OFFSET or + * FAKE_SHARED_FS_POOLID_OFFSET, plus offset in the respective array + * [shared_|]fs_poolid_map) are given to the respective super block + * (sb->cleancache_poolid) and no tmem_pools are created. When a backend + * registers with cleancache the previous calls to init_fs and init_shared_fs + * are executed to create tmem_pools and set the respective poolids. While no + * backend is registered all "puts", "gets" and "flushes" are ignored or failed. + */ +#define MAX_INITIALIZABLE_FS 32 +#define FAKE_FS_POOLID_OFFSET 1000 +#define FAKE_SHARED_FS_POOLID_OFFSET 2000 + +#define FS_NO_BACKEND (-1) +#define FS_UNKNOWN (-2) +static int fs_poolid_map[MAX_INITIALIZABLE_FS]; +static int shared_fs_poolid_map[MAX_INITIALIZABLE_FS]; +static char *uuids[MAX_INITIALIZABLE_FS]; +/* + * Mutex for the [shared_|]fs_poolid_map to guard against multiple threads + * invoking umount (and ending in __cleancache_invalidate_fs) and also multiple + * threads calling mount (and ending up in __cleancache_init_[shared|]fs). + */ +static DEFINE_MUTEX(poolid_mutex); +/* + * When set to false (default) all calls to the cleancache functions, except + * the __cleancache_invalidate_fs and __cleancache_init_[shared|]fs are guarded + * by the if (!cleancache_ops) return. This means multiple threads (from + * different filesystems) will be checking cleancache_ops. The usage of a + * bool instead of a atomic_t or a bool guarded by a spinlock is OK - we are + * OK if the time between the backend's have been initialized (and + * cleancache_ops has been set to not NULL) and when the filesystems start + * actually calling the backends. The inverse (when unloading) is obviously + * not good - but this shim does not do that (yet). + */ + +/* + * The backends and filesystems work all asynchronously. This is b/c the + * backends can be built as modules. + * The usual sequence of events is: + * a) mount / -> __cleancache_init_fs is called. We set the + * [shared_|]fs_poolid_map and uuids for. + * + * b). user does I/Os -> we call the rest of __cleancache_* functions + * which return immediately as cleancache_ops is false. + * + * c). modprobe zcache -> cleancache_register_ops. We init the backend + * and set cleancache_ops to true, and for any fs_poolid_map + * (which is set by __cleancache_init_fs) we initialize the poolid. + * + * d). user does I/Os -> now that cleancache_ops is true all the + * __cleancache_* functions can call the backend. They all check + * that fs_poolid_map is valid and if so invoke the backend. + * + * e). umount / -> __cleancache_invalidate_fs, the fs_poolid_map is + * reset (which is the second check in the __cleancache_* ops + * to call the backend). + * + * The sequence of event could also be c), followed by a), and d). and e). The + * c) would not happen anymore. There is also the chance of c), and one thread + * doing a) + d), and another doing e). For that case we depend on the + * filesystem calling __cleancache_invalidate_fs in the proper sequence (so + * that it handles all I/Os before it invalidates the fs (which is last part + * of unmounting process). + * + * Note: The acute reader will notice that there is no "rmmod zcache" case. + * This is b/c the functionality for that is not yet implemented and when + * done, will require some extra locking not yet devised. + */ + +/* + * Register operations for cleancache, returning previous thus allowing + * detection of multiple backends and possible nesting. */ -struct cleancache_ops cleancache_register_ops(struct cleancache_ops *ops) +struct cleancache_ops *cleancache_register_ops(struct cleancache_ops *ops) { - struct cleancache_ops old = cleancache_ops; + struct cleancache_ops *old = cleancache_ops; + int i; - cleancache_ops = *ops; - cleancache_enabled = 1; + mutex_lock(&poolid_mutex); + for (i = 0; i < MAX_INITIALIZABLE_FS; i++) { + if (fs_poolid_map[i] == FS_NO_BACKEND) + fs_poolid_map[i] = ops->init_fs(PAGE_SIZE); + if (shared_fs_poolid_map[i] == FS_NO_BACKEND) + shared_fs_poolid_map[i] = ops->init_shared_fs + (uuids[i], PAGE_SIZE); + } + /* + * We MUST set cleancache_ops _after_ we have called the backends + * init_fs or init_shared_fs functions. Otherwise the compiler might + * re-order where cleancache_ops is set in this function. + */ + barrier(); + cleancache_ops = ops; + mutex_unlock(&poolid_mutex); return old; } EXPORT_SYMBOL(cleancache_register_ops); @@ -61,15 +137,42 @@ EXPORT_SYMBOL(cleancache_register_ops); /* Called by a cleancache-enabled filesystem at time of mount */ void __cleancache_init_fs(struct super_block *sb) { - sb->cleancache_poolid = (*cleancache_ops.init_fs)(PAGE_SIZE); + int i; + + mutex_lock(&poolid_mutex); + for (i = 0; i < MAX_INITIALIZABLE_FS; i++) { + if (fs_poolid_map[i] == FS_UNKNOWN) { + sb->cleancache_poolid = i + FAKE_FS_POOLID_OFFSET; + if (cleancache_ops) + fs_poolid_map[i] = cleancache_ops->init_fs(PAGE_SIZE); + else + fs_poolid_map[i] = FS_NO_BACKEND; + break; + } + } + mutex_unlock(&poolid_mutex); } EXPORT_SYMBOL(__cleancache_init_fs); /* Called by a cleancache-enabled clustered filesystem at time of mount */ void __cleancache_init_shared_fs(char *uuid, struct super_block *sb) { - sb->cleancache_poolid = - (*cleancache_ops.init_shared_fs)(uuid, PAGE_SIZE); + int i; + + mutex_lock(&poolid_mutex); + for (i = 0; i < MAX_INITIALIZABLE_FS; i++) { + if (shared_fs_poolid_map[i] == FS_UNKNOWN) { + sb->cleancache_poolid = i + FAKE_SHARED_FS_POOLID_OFFSET; + uuids[i] = uuid; + if (cleancache_ops) + shared_fs_poolid_map[i] = cleancache_ops->init_shared_fs + (uuid, PAGE_SIZE); + else + shared_fs_poolid_map[i] = FS_NO_BACKEND; + break; + } + } + mutex_unlock(&poolid_mutex); } EXPORT_SYMBOL(__cleancache_init_shared_fs); @@ -99,27 +202,53 @@ static int cleancache_get_key(struct inode *inode, } /* + * Returns a pool_id that is associated with a given fake poolid. + */ +static int get_poolid_from_fake(int fake_pool_id) +{ + if (fake_pool_id >= FAKE_SHARED_FS_POOLID_OFFSET) + return shared_fs_poolid_map[fake_pool_id - + FAKE_SHARED_FS_POOLID_OFFSET]; + else if (fake_pool_id >= FAKE_FS_POOLID_OFFSET) + return fs_poolid_map[fake_pool_id - FAKE_FS_POOLID_OFFSET]; + return FS_NO_BACKEND; +} + +/* * "Get" data from cleancache associated with the poolid/inode/index * that were specified when the data was put to cleanache and, if * successful, use it to fill the specified page with data and return 0. * The pageframe is unchanged and returns -1 if the get fails. * Page must be locked by caller. + * + * The function has two checks before any action is taken - whether + * a backend is registered and whether the sb->cleancache_poolid + * is correct. */ int __cleancache_get_page(struct page *page) { int ret = -1; int pool_id; + int fake_pool_id; struct cleancache_filekey key = { .u.key = { 0 } }; + if (!cleancache_ops) { + cleancache_failed_gets++; + goto out; + } + VM_BUG_ON(!PageLocked(page)); - pool_id = page->mapping->host->i_sb->cleancache_poolid; - if (pool_id < 0) + fake_pool_id = page->mapping->host->i_sb->cleancache_poolid; + if (fake_pool_id < 0) goto out; + pool_id = get_poolid_from_fake(fake_pool_id); if (cleancache_get_key(page->mapping->host, &key) < 0) goto out; - ret = (*cleancache_ops.get_page)(pool_id, key, page->index, page); + if (pool_id >= 0) + ret = cleancache_ops->get_page(pool_id, + key, page->index, page); if (ret == 0) cleancache_succ_gets++; else @@ -134,17 +263,32 @@ EXPORT_SYMBOL(__cleancache_get_page); * (previously-obtained per-filesystem) poolid and the page's, * inode and page index. Page must be locked. Note that a put_page * always "succeeds", though a subsequent get_page may succeed or fail. + * + * The function has two checks before any action is taken - whether + * a backend is registered and whether the sb->cleancache_poolid + * is correct. */ void __cleancache_put_page(struct page *page) { int pool_id; + int fake_pool_id; struct cleancache_filekey key = { .u.key = { 0 } }; + if (!cleancache_ops) { + cleancache_puts++; + return; + } + VM_BUG_ON(!PageLocked(page)); - pool_id = page->mapping->host->i_sb->cleancache_poolid; + fake_pool_id = page->mapping->host->i_sb->cleancache_poolid; + if (fake_pool_id < 0) + return; + + pool_id = get_poolid_from_fake(fake_pool_id); + if (pool_id >= 0 && - cleancache_get_key(page->mapping->host, &key) >= 0) { - (*cleancache_ops.put_page)(pool_id, key, page->index, page); + cleancache_get_key(page->mapping->host, &key) >= 0) { + cleancache_ops->put_page(pool_id, key, page->index, page); cleancache_puts++; } } @@ -153,19 +297,31 @@ EXPORT_SYMBOL(__cleancache_put_page); /* * Invalidate any data from cleancache associated with the poolid and the * page's inode and page index so that a subsequent "get" will fail. + * + * The function has two checks before any action is taken - whether + * a backend is registered and whether the sb->cleancache_poolid + * is correct. */ void __cleancache_invalidate_page(struct address_space *mapping, struct page *page) { /* careful... page->mapping is NULL sometimes when this is called */ - int pool_id = mapping->host->i_sb->cleancache_poolid; + int pool_id; + int fake_pool_id = mapping->host->i_sb->cleancache_poolid; struct cleancache_filekey key = { .u.key = { 0 } }; - if (pool_id >= 0) { + if (!cleancache_ops) + return; + + if (fake_pool_id >= 0) { + pool_id = get_poolid_from_fake(fake_pool_id); + if (pool_id < 0) + return; + VM_BUG_ON(!PageLocked(page)); if (cleancache_get_key(mapping->host, &key) >= 0) { - (*cleancache_ops.invalidate_page)(pool_id, - key, page->index); + cleancache_ops->invalidate_page(pool_id, + key, page->index); cleancache_invalidates++; } } @@ -176,34 +332,63 @@ EXPORT_SYMBOL(__cleancache_invalidate_page); * Invalidate all data from cleancache associated with the poolid and the * mappings's inode so that all subsequent gets to this poolid/inode * will fail. + * + * The function has two checks before any action is taken - whether + * a backend is registered and whether the sb->cleancache_poolid + * is correct. */ void __cleancache_invalidate_inode(struct address_space *mapping) { - int pool_id = mapping->host->i_sb->cleancache_poolid; + int pool_id; + int fake_pool_id = mapping->host->i_sb->cleancache_poolid; struct cleancache_filekey key = { .u.key = { 0 } }; + if (!cleancache_ops) + return; + + if (fake_pool_id < 0) + return; + + pool_id = get_poolid_from_fake(fake_pool_id); + if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0) - (*cleancache_ops.invalidate_inode)(pool_id, key); + cleancache_ops->invalidate_inode(pool_id, key); } EXPORT_SYMBOL(__cleancache_invalidate_inode); /* * Called by any cleancache-enabled filesystem at time of unmount; - * note that pool_id is surrendered and may be reutrned by a subsequent - * cleancache_init_fs or cleancache_init_shared_fs + * note that pool_id is surrendered and may be returned by a subsequent + * cleancache_init_fs or cleancache_init_shared_fs. */ void __cleancache_invalidate_fs(struct super_block *sb) { - if (sb->cleancache_poolid >= 0) { - int old_poolid = sb->cleancache_poolid; - sb->cleancache_poolid = -1; - (*cleancache_ops.invalidate_fs)(old_poolid); + int index; + int fake_pool_id = sb->cleancache_poolid; + int old_poolid = fake_pool_id; + + mutex_lock(&poolid_mutex); + if (fake_pool_id >= FAKE_SHARED_FS_POOLID_OFFSET) { + index = fake_pool_id - FAKE_SHARED_FS_POOLID_OFFSET; + old_poolid = shared_fs_poolid_map[index]; + shared_fs_poolid_map[index] = FS_UNKNOWN; + uuids[index] = NULL; + } else if (fake_pool_id >= FAKE_FS_POOLID_OFFSET) { + index = fake_pool_id - FAKE_FS_POOLID_OFFSET; + old_poolid = fs_poolid_map[index]; + fs_poolid_map[index] = FS_UNKNOWN; } + sb->cleancache_poolid = -1; + if (cleancache_ops) + cleancache_ops->invalidate_fs(old_poolid); + mutex_unlock(&poolid_mutex); } EXPORT_SYMBOL(__cleancache_invalidate_fs); static int __init init_cleancache(void) { + int i; + #ifdef CONFIG_DEBUG_FS struct dentry *root = debugfs_create_dir("cleancache", NULL); if (root == NULL) @@ -215,6 +400,10 @@ static int __init init_cleancache(void) debugfs_create_u64("invalidates", S_IRUGO, root, &cleancache_invalidates); #endif + for (i = 0; i < MAX_INITIALIZABLE_FS; i++) { + fs_poolid_map[i] = FS_UNKNOWN; + shared_fs_poolid_map[i] = FS_UNKNOWN; + } return 0; } module_init(init_cleancache) diff --git a/mm/dmapool.c b/mm/dmapool.c index c69781e97cf9..668f26316e2e 100644 --- a/mm/dmapool.c +++ b/mm/dmapool.c @@ -132,6 +132,7 @@ struct dma_pool *dma_pool_create(const char *name, struct device *dev, { struct dma_pool *retval; size_t allocation; + int node; if (align == 0) { align = 1; @@ -156,7 +157,9 @@ struct dma_pool *dma_pool_create(const char *name, struct device *dev, return NULL; } - retval = kmalloc_node(sizeof(*retval), GFP_KERNEL, dev_to_node(dev)); + node = WARN_ON(!dev) ? -1 : dev_to_node(dev); + + retval = kmalloc_node(sizeof(*retval), GFP_KERNEL, node); if (!retval) return retval; diff --git a/mm/filemap.c b/mm/filemap.c index cbde8842a374..7905fe721aa8 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -35,6 +35,9 @@ #include <linux/cleancache.h> #include "internal.h" +#define CREATE_TRACE_POINTS +#include <trace/events/filemap.h> + /* * FIXME: remove all knowledge of the buffer layer from the core VM */ @@ -113,6 +116,7 @@ void __delete_from_page_cache(struct page *page) { struct address_space *mapping = page->mapping; + trace_mm_filemap_delete_from_page_cache(page); /* * if we're uptodate, flush out into the cleancache, otherwise * invalidate any existing cleancache entries. We can't leave @@ -184,6 +188,17 @@ static int sleep_on_page_killable(void *word) return fatal_signal_pending(current) ? -EINTR : 0; } +static int filemap_check_errors(struct address_space *mapping) +{ + int ret = 0; + /* Check for outstanding write errors */ + if (test_and_clear_bit(AS_ENOSPC, &mapping->flags)) + ret = -ENOSPC; + if (test_and_clear_bit(AS_EIO, &mapping->flags)) + ret = -EIO; + return ret; +} + /** * __filemap_fdatawrite_range - start writeback on mapping dirty pages in range * @mapping: address space structure to write @@ -265,10 +280,10 @@ int filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte, pgoff_t end = end_byte >> PAGE_CACHE_SHIFT; struct pagevec pvec; int nr_pages; - int ret = 0; + int ret2, ret = 0; if (end_byte < start_byte) - return 0; + goto out; pagevec_init(&pvec, 0); while ((index <= end) && @@ -291,12 +306,10 @@ int filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte, pagevec_release(&pvec); cond_resched(); } - - /* Check for outstanding write errors */ - if (test_and_clear_bit(AS_ENOSPC, &mapping->flags)) - ret = -ENOSPC; - if (test_and_clear_bit(AS_EIO, &mapping->flags)) - ret = -EIO; +out: + ret2 = filemap_check_errors(mapping); + if (!ret) + ret = ret2; return ret; } @@ -337,6 +350,8 @@ int filemap_write_and_wait(struct address_space *mapping) if (!err) err = err2; } + } else { + err = filemap_check_errors(mapping); } return err; } @@ -368,6 +383,8 @@ int filemap_write_and_wait_range(struct address_space *mapping, if (!err) err = err2; } + } else { + err = filemap_check_errors(mapping); } return err; } @@ -464,6 +481,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, mapping->nrpages++; __inc_zone_page_state(page, NR_FILE_PAGES); spin_unlock_irq(&mapping->tree_lock); + trace_mm_filemap_add_to_page_cache(page); } else { page->mapping = NULL; /* Leave page->index set: truncation relies upon it */ diff --git a/mm/frontswap.c b/mm/frontswap.c index 2890e67d6026..538367ef1372 100644 --- a/mm/frontswap.c +++ b/mm/frontswap.c @@ -24,15 +24,7 @@ * frontswap_ops is set by frontswap_register_ops to contain the pointers * to the frontswap "backend" implementation functions. */ -static struct frontswap_ops frontswap_ops __read_mostly; - -/* - * This global enablement flag reduces overhead on systems where frontswap_ops - * has not been registered, so is preferred to the slower alternative: a - * function call that checks a non-global. - */ -bool frontswap_enabled __read_mostly; -EXPORT_SYMBOL(frontswap_enabled); +static struct frontswap_ops *frontswap_ops __read_mostly; /* * If enabled, frontswap_store will return failure even on success. As @@ -80,16 +72,70 @@ static inline void inc_frontswap_succ_stores(void) { } static inline void inc_frontswap_failed_stores(void) { } static inline void inc_frontswap_invalidates(void) { } #endif + +/* + * Due to the asynchronous nature of the backends loading potentially + * _after_ the swap system has been activated, we have chokepoints + * on all frontswap functions to not call the backend until the backend + * has registered. + * + * Specifically when no backend is registered (nobody called + * frontswap_register_ops) all calls to frontswap_init (which is done via + * swapon -> enable_swap_info -> frontswap_init) are registered and remembered + * (via the setting of need_init bitmap) but fail to create tmem_pools. When a + * backend registers with frontswap at some later point the previous + * calls to frontswap_init are executed (by iterating over the need_init + * bitmap) to create tmem_pools and set the respective poolids. All of that is + * guarded by us using atomic bit operations on the 'need_init' bitmap. + * + * This would not guards us against the user deciding to call swapoff right as + * we are calling the backend to initialize (so swapon is in action). + * Fortunatly for us, the swapon_mutex has been taked by the callee so we are + * OK. The other scenario where calls to frontswap_store (called via + * swap_writepage) is racing with frontswap_invalidate_area (called via + * swapoff) is again guarded by the swap subsystem. + * + * While no backend is registered all calls to frontswap_[store|load| + * invalidate_area|invalidate_page] are ignored or fail. + * + * The time between the backend being registered and the swap file system + * calling the backend (via the frontswap_* functions) is indeterminate as + * frontswap_ops is not atomic_t (or a value guarded by a spinlock). + * That is OK as we are comfortable missing some of these calls to the newly + * registered backend. + * + * Obviously the opposite (unloading the backend) must be done after all + * the frontswap_[store|load|invalidate_area|invalidate_page] start + * ignorning or failing the requests - at which point frontswap_ops + * would have to be made in some fashion atomic. + */ +static DECLARE_BITMAP(need_init, MAX_SWAPFILES); + /* * Register operations for frontswap, returning previous thus allowing * detection of multiple backends and possible nesting. */ -struct frontswap_ops frontswap_register_ops(struct frontswap_ops *ops) +struct frontswap_ops *frontswap_register_ops(struct frontswap_ops *ops) { - struct frontswap_ops old = frontswap_ops; - - frontswap_ops = *ops; - frontswap_enabled = true; + struct frontswap_ops *old = frontswap_ops; + int i; + + for (i = 0; i < MAX_SWAPFILES; i++) { + if (test_and_clear_bit(i, need_init)) { + struct swap_info_struct *sis = swap_info[i]; + /* __frontswap_init _should_ have set it! */ + if (!sis->frontswap_map) + return ERR_PTR(-EINVAL); + ops->init(i); + } + } + /* + * We MUST have frontswap_ops set _after_ the frontswap_init's + * have been called. Otherwise __frontswap_store might fail. Hence + * the barrier to make sure compiler does not re-order us. + */ + barrier(); + frontswap_ops = ops; return old; } EXPORT_SYMBOL(frontswap_register_ops); @@ -115,20 +161,48 @@ EXPORT_SYMBOL(frontswap_tmem_exclusive_gets); /* * Called when a swap device is swapon'd. */ -void __frontswap_init(unsigned type) +void __frontswap_init(unsigned type, unsigned long *map) { struct swap_info_struct *sis = swap_info[type]; BUG_ON(sis == NULL); - if (sis->frontswap_map == NULL) + + /* + * p->frontswap is a bitmap that we MUST have to figure out which page + * has gone in frontswap. Without it there is no point of continuing. + */ + if (WARN_ON(!map)) return; - frontswap_ops.init(type); + /* + * Irregardless of whether the frontswap backend has been loaded + * before this function or it will be later, we _MUST_ have the + * p->frontswap set to something valid to work properly. + */ + frontswap_map_set(sis, map); + if (frontswap_ops) + frontswap_ops->init(type); + else { + BUG_ON(type > MAX_SWAPFILES); + set_bit(type, need_init); + } } EXPORT_SYMBOL(__frontswap_init); -static inline void __frontswap_clear(struct swap_info_struct *sis, pgoff_t offset) +bool __frontswap_test(struct swap_info_struct *sis, + pgoff_t offset) +{ + bool ret = false; + + if (frontswap_ops && sis->frontswap_map) + ret = test_bit(offset, sis->frontswap_map); + return ret; +} +EXPORT_SYMBOL(__frontswap_test); + +static inline void __frontswap_clear(struct swap_info_struct *sis, + pgoff_t offset) { - frontswap_clear(sis, offset); + clear_bit(offset, sis->frontswap_map); atomic_dec(&sis->frontswap_pages); } @@ -147,13 +221,20 @@ int __frontswap_store(struct page *page) struct swap_info_struct *sis = swap_info[type]; pgoff_t offset = swp_offset(entry); + /* + * Return if no backend registed. + * Don't need to inc frontswap_failed_stores here. + */ + if (!frontswap_ops) + return ret; + BUG_ON(!PageLocked(page)); BUG_ON(sis == NULL); - if (frontswap_test(sis, offset)) + if (__frontswap_test(sis, offset)) dup = 1; - ret = frontswap_ops.store(type, offset, page); + ret = frontswap_ops->store(type, offset, page); if (ret == 0) { - frontswap_set(sis, offset); + set_bit(offset, sis->frontswap_map); inc_frontswap_succ_stores(); if (!dup) atomic_inc(&sis->frontswap_pages); @@ -188,13 +269,16 @@ int __frontswap_load(struct page *page) BUG_ON(!PageLocked(page)); BUG_ON(sis == NULL); - if (frontswap_test(sis, offset)) - ret = frontswap_ops.load(type, offset, page); + /* + * __frontswap_test() will check whether there is backend registered + */ + if (__frontswap_test(sis, offset)) + ret = frontswap_ops->load(type, offset, page); if (ret == 0) { inc_frontswap_loads(); if (frontswap_tmem_exclusive_gets_enabled) { SetPageDirty(page); - frontswap_clear(sis, offset); + __frontswap_clear(sis, offset); } } return ret; @@ -210,8 +294,11 @@ void __frontswap_invalidate_page(unsigned type, pgoff_t offset) struct swap_info_struct *sis = swap_info[type]; BUG_ON(sis == NULL); - if (frontswap_test(sis, offset)) { - frontswap_ops.invalidate_page(type, offset); + /* + * __frontswap_test() will check whether there is backend registered + */ + if (__frontswap_test(sis, offset)) { + frontswap_ops->invalidate_page(type, offset); __frontswap_clear(sis, offset); inc_frontswap_invalidates(); } @@ -226,12 +313,15 @@ void __frontswap_invalidate_area(unsigned type) { struct swap_info_struct *sis = swap_info[type]; - BUG_ON(sis == NULL); - if (sis->frontswap_map == NULL) - return; - frontswap_ops.invalidate_area(type); - atomic_set(&sis->frontswap_pages, 0); - memset(sis->frontswap_map, 0, sis->max / sizeof(long)); + if (frontswap_ops) { + BUG_ON(sis == NULL); + if (sis->frontswap_map == NULL) + return; + frontswap_ops->invalidate_area(type); + atomic_set(&sis->frontswap_pages, 0); + memset(sis->frontswap_map, 0, sis->max / sizeof(long)); + } + clear_bit(type, need_init); } EXPORT_SYMBOL(__frontswap_invalidate_area); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index e2f7f5aaaafb..4cb16844d66d 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -163,25 +163,24 @@ static int start_khugepaged(void) } static atomic_t huge_zero_refcount; -static unsigned long huge_zero_pfn __read_mostly; +static struct page *huge_zero_page __read_mostly; -static inline bool is_huge_zero_pfn(unsigned long pfn) +static inline bool is_huge_zero_page(struct page *page) { - unsigned long zero_pfn = ACCESS_ONCE(huge_zero_pfn); - return zero_pfn && pfn == zero_pfn; + return ACCESS_ONCE(huge_zero_page) == page; } static inline bool is_huge_zero_pmd(pmd_t pmd) { - return is_huge_zero_pfn(pmd_pfn(pmd)); + return is_huge_zero_page(pmd_page(pmd)); } -static unsigned long get_huge_zero_page(void) +static struct page *get_huge_zero_page(void) { struct page *zero_page; retry: if (likely(atomic_inc_not_zero(&huge_zero_refcount))) - return ACCESS_ONCE(huge_zero_pfn); + return ACCESS_ONCE(huge_zero_page); zero_page = alloc_pages((GFP_TRANSHUGE | __GFP_ZERO) & ~__GFP_MOVABLE, HPAGE_PMD_ORDER); @@ -191,7 +190,7 @@ retry: } count_vm_event(THP_ZERO_PAGE_ALLOC); preempt_disable(); - if (cmpxchg(&huge_zero_pfn, 0, page_to_pfn(zero_page))) { + if (cmpxchg(&huge_zero_page, NULL, zero_page)) { preempt_enable(); __free_page(zero_page); goto retry; @@ -200,7 +199,7 @@ retry: /* We take additional reference here. It will be put back by shrinker */ atomic_set(&huge_zero_refcount, 2); preempt_enable(); - return ACCESS_ONCE(huge_zero_pfn); + return ACCESS_ONCE(huge_zero_page); } static void put_huge_zero_page(void) @@ -220,9 +219,9 @@ static int shrink_huge_zero_page(struct shrinker *shrink, return atomic_read(&huge_zero_refcount) == 1 ? HPAGE_PMD_NR : 0; if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) { - unsigned long zero_pfn = xchg(&huge_zero_pfn, 0); - BUG_ON(zero_pfn == 0); - __free_page(__pfn_to_page(zero_pfn)); + struct page *zero_page = xchg(&huge_zero_page, NULL); + BUG_ON(zero_page == NULL); + __free_page(zero_page); } return 0; @@ -713,6 +712,11 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, return VM_FAULT_OOM; clear_huge_page(page, haddr, HPAGE_PMD_NR); + /* + * The memory barrier inside __SetPageUptodate makes sure that + * clear_huge_page writes become visible before the set_pmd_at() + * write. + */ __SetPageUptodate(page); spin_lock(&mm->page_table_lock); @@ -724,12 +728,6 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, } else { pmd_t entry; entry = mk_huge_pmd(page, vma); - /* - * The spinlocking to take the lru_lock inside - * page_add_new_anon_rmap() acts as a full memory - * barrier to be sure clear_huge_page writes become - * visible after the set_pmd_at() write. - */ page_add_new_anon_rmap(page, vma, haddr); set_pmd_at(mm, haddr, pmd, entry); pgtable_trans_huge_deposit(mm, pgtable); @@ -765,12 +763,12 @@ static inline struct page *alloc_hugepage(int defrag) static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd, - unsigned long zero_pfn) + struct page *zero_page) { pmd_t entry; if (!pmd_none(*pmd)) return false; - entry = pfn_pmd(zero_pfn, vma->vm_page_prot); + entry = mk_pmd(zero_page, vma->vm_page_prot); entry = pmd_wrprotect(entry); entry = pmd_mkhuge(entry); set_pmd_at(mm, haddr, pmd, entry); @@ -795,20 +793,20 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, if (!(flags & FAULT_FLAG_WRITE) && transparent_hugepage_use_zero_page()) { pgtable_t pgtable; - unsigned long zero_pfn; + struct page *zero_page; bool set; pgtable = pte_alloc_one(mm, haddr); if (unlikely(!pgtable)) return VM_FAULT_OOM; - zero_pfn = get_huge_zero_page(); - if (unlikely(!zero_pfn)) { + zero_page = get_huge_zero_page(); + if (unlikely(!zero_page)) { pte_free(mm, pgtable); count_vm_event(THP_FAULT_FALLBACK); goto out; } spin_lock(&mm->page_table_lock); set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd, - zero_pfn); + zero_page); spin_unlock(&mm->page_table_lock); if (!set) { pte_free(mm, pgtable); @@ -887,16 +885,16 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, * a page table. */ if (is_huge_zero_pmd(pmd)) { - unsigned long zero_pfn; + struct page *zero_page; bool set; /* * get_huge_zero_page() will never allocate a new page here, * since we already have a zero page to copy. It just takes a * reference. */ - zero_pfn = get_huge_zero_page(); + zero_page = get_huge_zero_page(); set = set_huge_zero_page(pgtable, dst_mm, vma, addr, dst_pmd, - zero_pfn); + zero_page); BUG_ON(!set); /* unexpected !pmd_none(dst_pmd) */ ret = 0; goto out_unlock; @@ -1431,7 +1429,7 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, if (ret == 1) { pmd = pmdp_get_and_clear(mm, old_addr, old_pmd); VM_BUG_ON(!pmd_none(*new_pmd)); - set_pmd_at(mm, new_addr, new_pmd, pmd); + set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd)); spin_unlock(&mm->page_table_lock); } out: @@ -1560,7 +1558,8 @@ static int __split_huge_page_splitting(struct page *page, return ret; } -static void __split_huge_page_refcount(struct page *page) +static void __split_huge_page_refcount(struct page *page, + struct list_head *list) { int i; struct zone *zone = page_zone(page); @@ -1646,7 +1645,7 @@ static void __split_huge_page_refcount(struct page *page) BUG_ON(!PageDirty(page_tail)); BUG_ON(!PageSwapBacked(page_tail)); - lru_add_page_tail(page, page_tail, lruvec); + lru_add_page_tail(page, page_tail, lruvec, list); } atomic_sub(tail_count, &page->_count); BUG_ON(atomic_read(&page->_count) <= 0); @@ -1753,7 +1752,8 @@ static int __split_huge_page_map(struct page *page, /* must be called with anon_vma->root->rwsem held */ static void __split_huge_page(struct page *page, - struct anon_vma *anon_vma) + struct anon_vma *anon_vma, + struct list_head *list) { int mapcount, mapcount2; pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); @@ -1784,7 +1784,7 @@ static void __split_huge_page(struct page *page, mapcount, page_mapcount(page)); BUG_ON(mapcount != page_mapcount(page)); - __split_huge_page_refcount(page); + __split_huge_page_refcount(page, list); mapcount2 = 0; anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) { @@ -1799,12 +1799,19 @@ static void __split_huge_page(struct page *page, BUG_ON(mapcount != mapcount2); } -int split_huge_page(struct page *page) +/* + * Split a hugepage into normal pages. This doesn't change the position of head + * page. If @list is null, tail pages will be added to LRU list, otherwise, to + * @list. Both head page and tail pages will inherit mapping, flags, and so on + * from the hugepage. + * Return 0 if the hugepage is split successfully otherwise return 1. + */ +int split_huge_page_to_list(struct page *page, struct list_head *list) { struct anon_vma *anon_vma; int ret = 1; - BUG_ON(is_huge_zero_pfn(page_to_pfn(page))); + BUG_ON(is_huge_zero_page(page)); BUG_ON(!PageAnon(page)); /* @@ -1824,7 +1831,7 @@ int split_huge_page(struct page *page) goto out_unlock; BUG_ON(!PageSwapBacked(page)); - __split_huge_page(page, anon_vma); + __split_huge_page(page, anon_vma, list); count_vm_event(THP_SPLIT); BUG_ON(PageCompound(page)); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 4e3cf4f38b87..f8feeeca6686 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2121,6 +2121,21 @@ int hugetlb_report_node_meminfo(int nid, char *buf) nid, h->surplus_huge_pages_node[nid]); } +void hugetlb_show_meminfo(void) +{ + struct hstate *h; + int nid; + + for_each_node_state(nid, N_MEMORY) + for_each_hstate(h) + pr_info("Node %d hugepages_total=%u hugepages_free=%u hugepages_surp=%u hugepages_size=%lukB\n", + nid, + h->nr_huge_pages_node[nid], + h->free_huge_pages_node[nid], + h->surplus_huge_pages_node[nid], + 1UL << (huge_page_order(h) + PAGE_SHIFT - 10)); +} + /* Return the number pages of memory we physically have, in PAGE_SIZE units. */ unsigned long hugetlb_total_pages(void) { @@ -2247,10 +2262,11 @@ static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page, pte_t entry; if (writable) { - entry = - pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); + entry = huge_pte_mkwrite(huge_pte_mkdirty(mk_huge_pte(page, + vma->vm_page_prot))); } else { - entry = huge_pte_wrprotect(mk_pte(page, vma->vm_page_prot)); + entry = huge_pte_wrprotect(mk_huge_pte(page, + vma->vm_page_prot)); } entry = pte_mkyoung(entry); entry = pte_mkhuge(entry); @@ -2264,7 +2280,7 @@ static void set_huge_ptep_writable(struct vm_area_struct *vma, { pte_t entry; - entry = pte_mkwrite(pte_mkdirty(huge_ptep_get(ptep))); + entry = huge_pte_mkwrite(huge_pte_mkdirty(huge_ptep_get(ptep))); if (huge_ptep_set_access_flags(vma, address, ptep, entry, 1)) update_mmu_cache(vma, address, ptep); } @@ -2379,7 +2395,7 @@ again: * HWPoisoned hugepage is already unmapped and dropped reference */ if (unlikely(is_hugetlb_entry_hwpoisoned(pte))) { - pte_clear(mm, address, ptep); + huge_pte_clear(mm, address, ptep); continue; } @@ -2403,7 +2419,7 @@ again: pte = huge_ptep_get_and_clear(mm, address, ptep); tlb_remove_tlb_entry(tlb, ptep, address); - if (pte_dirty(pte)) + if (huge_pte_dirty(pte)) set_page_dirty(page); page_remove_rmap(page); @@ -2856,7 +2872,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, * page now as it is used to determine if a reservation has been * consumed. */ - if ((flags & FAULT_FLAG_WRITE) && !pte_write(entry)) { + if ((flags & FAULT_FLAG_WRITE) && !huge_pte_write(entry)) { if (vma_needs_reservation(h, vma, address) < 0) { ret = VM_FAULT_OOM; goto out_mutex; @@ -2886,12 +2902,12 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (flags & FAULT_FLAG_WRITE) { - if (!pte_write(entry)) { + if (!huge_pte_write(entry)) { ret = hugetlb_cow(mm, vma, address, ptep, entry, pagecache_page); goto out_page_table_lock; } - entry = pte_mkdirty(entry); + entry = huge_pte_mkdirty(entry); } entry = pte_mkyoung(entry); if (huge_ptep_set_access_flags(vma, address, ptep, entry, @@ -2972,7 +2988,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, * directly from any kind of swap entries. */ if (absent || is_swap_pte(huge_ptep_get(pte)) || - ((flags & FOLL_WRITE) && !pte_write(huge_ptep_get(pte)))) { + ((flags & FOLL_WRITE) && + !huge_pte_write(huge_ptep_get(pte)))) { int ret; spin_unlock(&mm->page_table_lock); @@ -3042,7 +3059,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, } if (!huge_pte_none(huge_ptep_get(ptep))) { pte = huge_ptep_get_and_clear(mm, address, ptep); - pte = pte_mkhuge(pte_modify(pte, newprot)); + pte = pte_mkhuge(huge_pte_modify(pte, newprot)); pte = arch_make_huge_pte(pte, vma, NULL, 0); set_huge_pte_at(mm, address, ptep, pte); pages++; diff --git a/mm/madvise.c b/mm/madvise.c index c58c94b56c3d..7055883e6e25 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -473,27 +473,27 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) if (!madvise_behavior_valid(behavior)) return error; - write = madvise_need_mmap_write(behavior); - if (write) - down_write(¤t->mm->mmap_sem); - else - down_read(¤t->mm->mmap_sem); - if (start & ~PAGE_MASK) - goto out; + return error; len = (len_in + ~PAGE_MASK) & PAGE_MASK; /* Check to see whether len was rounded up from small -ve to zero */ if (len_in && !len) - goto out; + return error; end = start + len; if (end < start) - goto out; + return error; error = 0; if (end == start) - goto out; + return error; + + write = madvise_need_mmap_write(behavior); + if (write) + down_write(¤t->mm->mmap_sem); + else + down_read(¤t->mm->mmap_sem); /* * If the interval [start,end) covers some unmapped address @@ -509,14 +509,14 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) /* Still start < end. */ error = -ENOMEM; if (!vma) - goto out_plug; + goto out; /* Here start < (end|vma->vm_end). */ if (start < vma->vm_start) { unmapped_error = -ENOMEM; start = vma->vm_start; if (start >= end) - goto out_plug; + goto out; } /* Here vma->vm_start <= start < (end|vma->vm_end) */ @@ -527,21 +527,20 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */ error = madvise_vma(vma, &prev, start, tmp, behavior); if (error) - goto out_plug; + goto out; start = tmp; if (prev && start < prev->vm_end) start = prev->vm_end; error = unmapped_error; if (start >= end) - goto out_plug; + goto out; if (prev) vma = prev->vm_next; else /* madvise_remove dropped mmap_sem */ vma = find_vma(current->mm, start); } -out_plug: - blk_finish_plug(&plug); out: + blk_finish_plug(&plug); if (write) up_write(¤t->mm->mmap_sem); else diff --git a/mm/memblock.c b/mm/memblock.c index b8d9147e5c08..c5fad932fa51 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -322,10 +322,11 @@ static void __init_memblock memblock_merge_regions(struct memblock_type *type) /** * memblock_insert_region - insert new memblock region - * @type: memblock type to insert into - * @idx: index for the insertion point - * @base: base address of the new region - * @size: size of the new region + * @type: memblock type to insert into + * @idx: index for the insertion point + * @base: base address of the new region + * @size: size of the new region + * @nid: node id of the new region * * Insert new memblock region [@base,@base+@size) into @type at @idx. * @type must already have extra room to accomodate the new region. @@ -771,6 +772,9 @@ static phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size, { phys_addr_t found; + if (WARN_ON(!align)) + align = __alignof__(long long); + /* align @size to avoid excessive fragmentation on reserved array */ size = round_up(size, align); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index fb7440dd7ecc..374f632f2ea8 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -49,6 +49,7 @@ #include <linux/fs.h> #include <linux/seq_file.h> #include <linux/vmalloc.h> +#include <linux/vmpressure.h> #include <linux/mm_inline.h> #include <linux/page_cgroup.h> #include <linux/cpu.h> @@ -152,8 +153,13 @@ struct mem_cgroup_stat_cpu { }; struct mem_cgroup_reclaim_iter { - /* css_id of the last scanned hierarchy member */ - int position; + /* + * last scanned hierarchy member. Valid only if last_dead_count + * matches memcg->dead_count of the hierarchy root group. + */ + struct mem_cgroup *last_visited; + unsigned long last_dead_count; + /* scan generation, increased every round-trip */ unsigned int generation; }; @@ -256,6 +262,9 @@ struct mem_cgroup { */ struct res_counter res; + /* vmpressure notifications */ + struct vmpressure vmpressure; + union { /* * the counter to account for mem+swap usage. @@ -310,14 +319,31 @@ struct mem_cgroup { /* thresholds for mem+swap usage. RCU-protected */ struct mem_cgroup_thresholds memsw_thresholds; - /* For oom notifier event fd */ - struct list_head oom_notify; + union { + /* For oom notifier event fd */ + struct list_head oom_notify; + /* + * we can only trigger an oom event if the memcg is alive. + * so we will reuse this field to hook the memcg in the list + * of dead memcgs. + */ + struct list_head dead; + }; - /* - * Should we move charges of a task when a task is moved into this - * mem_cgroup ? And what type of charges should we move ? - */ - unsigned long move_charge_at_immigrate; + union { + /* + * Should we move charges of a task when a task is moved into + * this mem_cgroup ? And what type of charges should we move ? + */ + unsigned long move_charge_at_immigrate; + + /* + * We are no longer concerned about moving charges after memcg + * is dead. So we will fill this up with its name, to aid + * debugging. + */ + char *memcg_name; + }; /* * set > 0 if pages under this cgroup are moving to other cgroup. */ @@ -335,6 +361,7 @@ struct mem_cgroup { struct mem_cgroup_stat_cpu nocpu_base; spinlock_t pcp_counter_lock; + atomic_t dead_count; #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET) struct tcp_memcontrol tcp_mem; #endif @@ -353,6 +380,7 @@ struct mem_cgroup { atomic_t numainfo_events; atomic_t numainfo_updating; #endif + /* * Per cgroup active and inactive list, similar to the * per zone LRU lists. @@ -369,6 +397,55 @@ static size_t memcg_size(void) nr_node_ids * sizeof(struct mem_cgroup_per_node); } +#ifdef CONFIG_MEMCG_DEBUG_ASYNC_DESTROY +static LIST_HEAD(dangling_memcgs); +static DEFINE_MUTEX(dangling_memcgs_mutex); + +static inline void memcg_dangling_free(struct mem_cgroup *memcg) +{ + mutex_lock(&dangling_memcgs_mutex); + list_del(&memcg->dead); + mutex_unlock(&dangling_memcgs_mutex); + free_pages((unsigned long)memcg->memcg_name, 0); +} + +static inline void memcg_dangling_add(struct mem_cgroup *memcg) +{ + /* + * cgroup.c will do page-sized allocations most of the time, + * so we'll just follow the pattern. Also, __get_free_pages + * is a better interface than kmalloc for us here, because + * we'd like this memory to be always billed to the root cgroup, + * not to the process removing the memcg. While kmalloc would + * require us to wrap it into memcg_stop/resume_kmem_account, + * with __get_free_pages we just don't pass the memcg flag. + */ + memcg->memcg_name = (char *)__get_free_pages(GFP_KERNEL, 0); + + /* + * we will, in general, just ignore failures. No need to go crazy, + * being this just a debugging interface. It is nice to copy a memcg + * name over, but if we (unlikely) can't, just the address will do + */ + if (!memcg->memcg_name) + goto add_list; + + if (cgroup_path(memcg->css.cgroup, memcg->memcg_name, PAGE_SIZE) < 0) { + free_pages((unsigned long)memcg->memcg_name, 0); + memcg->memcg_name = NULL; + } + +add_list: + INIT_LIST_HEAD(&memcg->dead); + mutex_lock(&dangling_memcgs_mutex); + list_add(&memcg->dead, &dangling_memcgs); + mutex_unlock(&dangling_memcgs_mutex); +} +#else +static inline void memcg_dangling_free(struct mem_cgroup *memcg) {} +static inline void memcg_dangling_add(struct mem_cgroup *memcg) {} +#endif + /* internal only representation about the status of kmem accounting. */ enum { KMEM_ACCOUNTED_ACTIVE = 0, /* accounted by this cgroup itself */ @@ -504,6 +581,24 @@ struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *s) return container_of(s, struct mem_cgroup, css); } +/* Some nice accessors for the vmpressure. */ +struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg) +{ + if (!memcg) + memcg = root_mem_cgroup; + return &memcg->vmpressure; +} + +struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr) +{ + return &container_of(vmpr, struct mem_cgroup, vmpressure)->css; +} + +struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css) +{ + return &mem_cgroup_from_css(css)->vmpressure; +} + static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) { return (memcg == root_mem_cgroup); @@ -1067,6 +1162,51 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) return memcg; } +/* + * Returns a next (in a pre-order walk) alive memcg (with elevated css + * ref. count) or NULL if the whole root's subtree has been visited. + * + * helper function to be used by mem_cgroup_iter + */ +static struct mem_cgroup *__mem_cgroup_iter_next(struct mem_cgroup *root, + struct mem_cgroup *last_visited) +{ + struct cgroup *prev_cgroup, *next_cgroup; + + /* + * Root is not visited by cgroup iterators so it needs an + * explicit visit. + */ + if (!last_visited) + return root; + + prev_cgroup = (last_visited == root) ? NULL + : last_visited->css.cgroup; +skip_node: + next_cgroup = cgroup_next_descendant_pre( + prev_cgroup, root->css.cgroup); + + /* + * Even if we found a group we have to make sure it is + * alive. css && !memcg means that the groups should be + * skipped and we should continue the tree walk. + * last_visited css is safe to use because it is + * protected by css_get and the tree walk is rcu safe. + */ + if (next_cgroup) { + struct mem_cgroup *mem = mem_cgroup_from_cont( + next_cgroup); + if (css_tryget(&mem->css)) + return mem; + else { + prev_cgroup = next_cgroup; + goto skip_node; + } + } + + return NULL; +} + /** * mem_cgroup_iter - iterate over memory cgroup hierarchy * @root: hierarchy root @@ -1089,7 +1229,8 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, struct mem_cgroup_reclaim_cookie *reclaim) { struct mem_cgroup *memcg = NULL; - int id = 0; + struct mem_cgroup *last_visited = NULL; + unsigned long uninitialized_var(dead_count); if (mem_cgroup_disabled()) return NULL; @@ -1098,20 +1239,17 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, root = root_mem_cgroup; if (prev && !reclaim) - id = css_id(&prev->css); - - if (prev && prev != root) - css_put(&prev->css); + last_visited = prev; if (!root->use_hierarchy && root != root_mem_cgroup) { if (prev) - return NULL; + goto out_css_put; return root; } + rcu_read_lock(); while (!memcg) { struct mem_cgroup_reclaim_iter *uninitialized_var(iter); - struct cgroup_subsys_state *css; if (reclaim) { int nid = zone_to_nid(reclaim->zone); @@ -1120,31 +1258,60 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, mz = mem_cgroup_zoneinfo(root, nid, zid); iter = &mz->reclaim_iter[reclaim->priority]; - if (prev && reclaim->generation != iter->generation) - return NULL; - id = iter->position; + last_visited = iter->last_visited; + if (prev && reclaim->generation != iter->generation) { + iter->last_visited = NULL; + goto out_unlock; + } + + /* + * If the dead_count mismatches, a destruction + * has happened or is happening concurrently. + * If the dead_count matches, a destruction + * might still happen concurrently, but since + * we checked under RCU, that destruction + * won't free the object until we release the + * RCU reader lock. Thus, the dead_count + * check verifies the pointer is still valid, + * css_tryget() verifies the cgroup pointed to + * is alive. + */ + dead_count = atomic_read(&root->dead_count); + smp_rmb(); + last_visited = iter->last_visited; + if (last_visited) { + if ((dead_count != iter->last_dead_count) || + !css_tryget(&last_visited->css)) { + last_visited = NULL; + } + } } - rcu_read_lock(); - css = css_get_next(&mem_cgroup_subsys, id + 1, &root->css, &id); - if (css) { - if (css == &root->css || css_tryget(css)) - memcg = mem_cgroup_from_css(css); - } else - id = 0; - rcu_read_unlock(); + memcg = __mem_cgroup_iter_next(root, last_visited); if (reclaim) { - iter->position = id; - if (!css) + if (last_visited) + css_put(&last_visited->css); + + iter->last_visited = memcg; + smp_wmb(); + iter->last_dead_count = dead_count; + + if (!memcg) iter->generation++; else if (!prev && memcg) reclaim->generation = iter->generation; } - if (prev && !css) - return NULL; + if (prev && !memcg) + goto out_unlock; } +out_unlock: + rcu_read_unlock(); +out_css_put: + if (prev && prev != root) + css_put(&prev->css); + return memcg; } @@ -3114,12 +3281,12 @@ void memcg_release_cache(struct kmem_cache *s) root = s->memcg_params->root_cache; root->memcg_params->memcg_caches[id] = NULL; - mem_cgroup_put(memcg); mutex_lock(&memcg->slab_caches_mutex); list_del(&s->memcg_params->list); mutex_unlock(&memcg->slab_caches_mutex); + mem_cgroup_put(memcg); out: kfree(s->memcg_params); } @@ -4948,9 +5115,6 @@ static ssize_t mem_cgroup_read(struct cgroup *cont, struct cftype *cft, type = MEMFILE_TYPE(cft->private); name = MEMFILE_ATTR(cft->private); - if (!do_swap_account && type == _MEMSWAP) - return -EOPNOTSUPP; - switch (type) { case _MEM: if (name == RES_USAGE) @@ -4975,6 +5139,107 @@ static ssize_t mem_cgroup_read(struct cgroup *cont, struct cftype *cft, return simple_read_from_buffer(buf, nbytes, ppos, str, len); } +#ifdef CONFIG_MEMCG_DEBUG_ASYNC_DESTROY +static void +mem_cgroup_dangling_swap(struct mem_cgroup *memcg, struct seq_file *m) +{ +#ifdef CONFIG_MEMCG_SWAP + u64 kmem; + u64 memsw; + + /* + * kmem will also propagate here, so we are only interested in the + * difference. See comment in mem_cgroup_reparent_charges for details. + * + * We could save this value for later consumption by kmem reports, but + * there is not a lot of problem if the figures differ slightly. + */ + kmem = res_counter_read_u64(&memcg->kmem, RES_USAGE); + memsw = res_counter_read_u64(&memcg->memsw, RES_USAGE) - kmem; + seq_printf(m, "\t%llu swap bytes\n", memsw); +#endif +} + + +static void +mem_cgroup_dangling_tcp(struct mem_cgroup *memcg, struct seq_file *m) +{ +#if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM) + struct tcp_memcontrol *tcp = &memcg->tcp_mem; + s64 tcp_socks; + u64 tcp_bytes; + + tcp_socks = percpu_counter_sum_positive(&tcp->tcp_sockets_allocated); + tcp_bytes = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE); + seq_printf(m, "\t%llu tcp bytes", tcp_bytes); + /* + * if tcp_bytes == 0, tcp_socks != 0 is a bug. One more reason to print + * it! + */ + if (tcp_bytes || tcp_socks) + seq_printf(m, ", in %lld sockets", tcp_socks); + seq_printf(m, "\n"); + +#endif +} + +static void +mem_cgroup_dangling_kmem(struct mem_cgroup *memcg, struct seq_file *m) +{ +#ifdef CONFIG_MEMCG_KMEM + u64 kmem; + struct memcg_cache_params *params; + + kmem = res_counter_read_u64(&memcg->kmem, RES_USAGE); + seq_printf(m, "\t%llu kmem bytes", kmem); + + /* list below may not be initialized, so not even try */ + if (!kmem) + return; + + seq_printf(m, " in caches"); + mutex_lock(&memcg->slab_caches_mutex); + list_for_each_entry(params, &memcg->memcg_slab_caches, list) { + struct kmem_cache *s = memcg_params_to_cache(params); + + seq_printf(m, " %s", s->name); + } + mutex_unlock(&memcg->slab_caches_mutex); + seq_printf(m, "\n"); +#endif +} + +/* + * After a memcg is destroyed, it may still be kept around in memory. + * Currently, the two main reasons for it are swap entries, and kernel memory. + * Because they will be freed assynchronously, they will pin the memcg structure + * and its resources until the last reference goes away. + * + * This root-only file will show information about which users + */ +static int mem_cgroup_dangling_read(struct cgroup *cont, struct cftype *cft, + struct seq_file *m) +{ + struct mem_cgroup *memcg; + + mutex_lock(&dangling_memcgs_mutex); + + list_for_each_entry(memcg, &dangling_memcgs, dead) { + if (memcg->memcg_name) + seq_printf(m, "%s:\n", memcg->memcg_name); + else + seq_printf(m, "%p (name lost):\n", memcg); + + mem_cgroup_dangling_swap(memcg, m); + mem_cgroup_dangling_tcp(memcg, m); + mem_cgroup_dangling_kmem(memcg, m); + } + + mutex_unlock(&dangling_memcgs_mutex); + return 0; +} +#endif + static int memcg_update_kmem_limit(struct cgroup *cont, u64 val) { int ret = -EINVAL; @@ -5085,9 +5350,6 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft, type = MEMFILE_TYPE(cft->private); name = MEMFILE_ATTR(cft->private); - if (!do_swap_account && type == _MEMSWAP) - return -EOPNOTSUPP; - switch (name) { case RES_LIMIT: if (mem_cgroup_is_root(memcg)) { /* Can't set limit on root */ @@ -5164,9 +5426,6 @@ static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) type = MEMFILE_TYPE(event); name = MEMFILE_ATTR(event); - if (!do_swap_account && type == _MEMSWAP) - return -EOPNOTSUPP; - switch (name) { case RES_MAX_USAGE: if (type == _MEM) @@ -5745,7 +6004,7 @@ static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss) return ret; return mem_cgroup_sockets_init(memcg, ss); -}; +} static void kmem_cgroup_destroy(struct mem_cgroup *memcg) { @@ -5840,6 +6099,11 @@ static struct cftype mem_cgroup_files[] = { .unregister_event = mem_cgroup_oom_unregister_event, .private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL), }, + { + .name = "pressure_level", + .register_event = vmpressure_register_event, + .unregister_event = vmpressure_unregister_event, + }, #ifdef CONFIG_NUMA { .name = "numa_stat", @@ -5877,6 +6141,14 @@ static struct cftype mem_cgroup_files[] = { }, #endif #endif + +#ifdef CONFIG_MEMCG_DEBUG_ASYNC_DESTROY + { + .name = "dangling_memcgs", + .read_seq_string = mem_cgroup_dangling_read, + .flags = CFTYPE_ONLY_ON_ROOT, + }, +#endif { }, /* terminate */ }; @@ -6026,6 +6298,8 @@ static void free_work(struct work_struct *work) struct mem_cgroup *memcg; memcg = container_of(work, struct mem_cgroup, work_freeing); + + memcg_dangling_free(memcg); __mem_cgroup_free(memcg); } @@ -6121,6 +6395,7 @@ mem_cgroup_css_alloc(struct cgroup *cont) memcg->move_charge_at_immigrate = 0; mutex_init(&memcg->thresholds_lock); spin_lock_init(&memcg->move_lock); + vmpressure_init(&memcg->vmpressure); return &memcg->css; @@ -6186,10 +6461,29 @@ mem_cgroup_css_online(struct cgroup *cont) return error; } +/* + * Announce all parents that a group from their hierarchy is gone. + */ +static void mem_cgroup_invalidate_reclaim_iterators(struct mem_cgroup *memcg) +{ + struct mem_cgroup *parent = memcg; + + while ((parent = parent_mem_cgroup(parent))) + atomic_inc(&parent->dead_count); + + /* + * if the root memcg is not hierarchical we have to check it + * explicitely. + */ + if (!root_mem_cgroup->use_hierarchy) + atomic_inc(&root_mem_cgroup->dead_count); +} + static void mem_cgroup_css_offline(struct cgroup *cont) { struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); + mem_cgroup_invalidate_reclaim_iterators(memcg); mem_cgroup_reparent_charges(memcg); mem_cgroup_destroy_all_caches(memcg); } @@ -6200,6 +6494,7 @@ static void mem_cgroup_css_free(struct cgroup *cont) kmem_cgroup_destroy(memcg); + memcg_dangling_add(memcg); mem_cgroup_put(memcg); } diff --git a/mm/memory-failure.c b/mm/memory-failure.c index df0694c6adef..ceb0c7f1932f 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -785,10 +785,10 @@ static struct page_state { { sc|dirty, sc, "clean swapcache", me_swapcache_clean }, { mlock|dirty, mlock|dirty, "dirty mlocked LRU", me_pagecache_dirty }, - { mlock, mlock, "clean mlocked LRU", me_pagecache_clean }, + { mlock|dirty, mlock, "clean mlocked LRU", me_pagecache_clean }, { unevict|dirty, unevict|dirty, "dirty unevictable LRU", me_pagecache_dirty }, - { unevict, unevict, "clean unevictable LRU", me_pagecache_clean }, + { unevict|dirty, unevict, "clean unevictable LRU", me_pagecache_clean }, { lru|dirty, lru|dirty, "dirty LRU", me_pagecache_dirty }, { lru|dirty, lru, "clean LRU", me_pagecache_clean }, diff --git a/mm/memory.c b/mm/memory.c index ba94dec5b259..f7a1fba85d14 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3244,6 +3244,11 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, page = alloc_zeroed_user_highpage_movable(vma, address); if (!page) goto oom; + /* + * The memory barrier inside __SetPageUptodate makes sure that + * preceeding stores to the page contents become visible before + * the set_pte_at() write. + */ __SetPageUptodate(page); if (mem_cgroup_newpage_charge(page, mm, GFP_KERNEL)) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index ee3765760818..60f6daad1076 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -436,6 +436,40 @@ static int __meminit __add_section(int nid, struct zone *zone, return register_new_memory(nid, __pfn_to_section(phys_start_pfn)); } +/* + * Reasonably generic function for adding memory. It is + * expected that archs that support memory hotplug will + * call this function after deciding the zone to which to + * add the new pages. + */ +int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn, + unsigned long nr_pages) +{ + unsigned long i; + int err = 0; + int start_sec, end_sec; + /* during initialize mem_map, align hot-added range to section */ + start_sec = pfn_to_section_nr(phys_start_pfn); + end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1); + + for (i = start_sec; i <= end_sec; i++) { + err = __add_section(nid, zone, i << PFN_SECTION_SHIFT); + + /* + * EEXIST is finally dealt with by ioresource collision + * check. see add_memory() => register_memory_resource() + * Warning will be printed if there is collision. + */ + if (err && (err != -EEXIST)) + break; + err = 0; + } + + return err; +} +EXPORT_SYMBOL_GPL(__add_pages); + +#ifdef CONFIG_MEMORY_HOTREMOVE /* find the smallest valid pfn in the range [start_pfn, end_pfn) */ static int find_smallest_section_pfn(int nid, struct zone *zone, unsigned long start_pfn, @@ -658,39 +692,6 @@ static int __remove_section(struct zone *zone, struct mem_section *ms) return 0; } -/* - * Reasonably generic function for adding memory. It is - * expected that archs that support memory hotplug will - * call this function after deciding the zone to which to - * add the new pages. - */ -int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn, - unsigned long nr_pages) -{ - unsigned long i; - int err = 0; - int start_sec, end_sec; - /* during initialize mem_map, align hot-added range to section */ - start_sec = pfn_to_section_nr(phys_start_pfn); - end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1); - - for (i = start_sec; i <= end_sec; i++) { - err = __add_section(nid, zone, i << PFN_SECTION_SHIFT); - - /* - * EEXIST is finally dealt with by ioresource collision - * check. see add_memory() => register_memory_resource() - * Warning will be printed if there is collision. - */ - if (err && (err != -EEXIST)) - break; - err = 0; - } - - return err; -} -EXPORT_SYMBOL_GPL(__add_pages); - /** * __remove_pages() - remove sections of pages from a zone * @zone: zone from which pages need to be removed @@ -705,8 +706,10 @@ EXPORT_SYMBOL_GPL(__add_pages); int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, unsigned long nr_pages) { - unsigned long i, ret = 0; + unsigned long i; int sections_to_remove; + resource_size_t start, size; + int ret = 0; /* * We can only remove entire sections @@ -714,7 +717,12 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, BUG_ON(phys_start_pfn & ~PAGE_SECTION_MASK); BUG_ON(nr_pages % PAGES_PER_SECTION); - release_mem_region(phys_start_pfn << PAGE_SHIFT, nr_pages * PAGE_SIZE); + start = phys_start_pfn << PAGE_SHIFT; + size = nr_pages * PAGE_SIZE; + ret = release_mem_region_adjustable(&iomem_resource, start, size); + if (ret) + pr_warn("Unable to release resource <%016llx-%016llx> (%d)\n", + start, start + size - 1, ret); sections_to_remove = nr_pages / PAGES_PER_SECTION; for (i = 0; i < sections_to_remove; i++) { @@ -726,6 +734,7 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, return ret; } EXPORT_SYMBOL_GPL(__remove_pages); +#endif /* CONFIG_MEMORY_HOTREMOVE */ int set_online_page_callback(online_page_callback_t callback) { @@ -1613,7 +1622,7 @@ int offline_pages(unsigned long start_pfn, unsigned long nr_pages) /** * walk_memory_range - walks through all mem sections in [start_pfn, end_pfn) * @start_pfn: start pfn of the memory range - * @end_pfn: end pft of the memory range + * @end_pfn: end pfn of the memory range * @arg: argument passed to func * @func: callback for each memory section walked * diff --git a/mm/migrate.c b/mm/migrate.c index 3bbaf5d230b0..c9c5eeebe5a4 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -736,7 +736,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage, if (PageWriteback(page)) { /* - * Only in the case of a full syncronous migration is it + * Only in the case of a full synchronous migration is it * necessary to wait for PageWriteback. In the async case, * the retry loop is too short and in the sync-light case, * the overhead of stalling is too much @@ -876,6 +876,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, dec_zone_page_state(page, NR_ISOLATED_ANON + page_is_file_cache(page)); balloon_page_free(page); + balloon_event_count(COMPACTBALLOONMIGRATED); return MIGRATEPAGE_SUCCESS; } out: @@ -973,19 +974,23 @@ out: } /* - * migrate_pages + * migrate_pages - migrate the pages specified in a list, to the free pages + * supplied as the target for the page migration * - * The function takes one list of pages to migrate and a function - * that determines from the page to be migrated and the private data - * the target of the move and allocates the page. + * @from: The list of pages to be migrated. + * @get_new_page: The function used to allocate free pages to be used + * as the target of the page migration. + * @private: Private data to be passed on to get_new_page() + * @mode: The migration mode that specifies the constraints for + * page migration, if any. + * @reason: The reason for page migration. * - * The function returns after 10 attempts or if no pages - * are movable anymore because to has become empty - * or no retryable pages exist anymore. - * Caller should call putback_lru_pages to return pages to the LRU + * The function returns after 10 attempts or if no pages are movable any more + * because the list has become empty or no retryable pages exist any more. + * The caller should call putback_lru_pages() to return pages to the LRU * or free list only if ret != 0. * - * Return: Number of pages not migrated or error code. + * Returns the number of pages that were not migrated, or an error code. */ int migrate_pages(struct list_head *from, new_page_t get_new_page, unsigned long private, enum migrate_mode mode, int reason) diff --git a/mm/mmap.c b/mm/mmap.c index 0db0de1c2fbe..ae488f02220f 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -6,6 +6,7 @@ * Address space accounting code <alan@lxorguk.ukuu.org.uk> */ +#include <linux/kernel.h> #include <linux/slab.h> #include <linux/backing-dev.h> #include <linux/mm.h> @@ -33,6 +34,8 @@ #include <linux/uprobes.h> #include <linux/rbtree_augmented.h> #include <linux/sched/sysctl.h> +#include <linux/notifier.h> +#include <linux/memory.h> #include <asm/uaccess.h> #include <asm/cacheflush.h> @@ -84,6 +87,8 @@ EXPORT_SYMBOL(vm_get_page_prot); int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS; /* heuristic overcommit */ int sysctl_overcommit_ratio __read_mostly = 50; /* default is 50% */ int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; +unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */ +unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */ /* * Make sure vm_committed_as in one cacheline and not cacheline shared with * other variables. It can be updated by several CPUs frequently. @@ -122,7 +127,7 @@ EXPORT_SYMBOL_GPL(vm_memory_committed); */ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) { - unsigned long free, allowed; + unsigned long free, allowed, reserve; vm_acct_memory(pages); @@ -163,10 +168,10 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) free -= totalreserve_pages; /* - * Leave the last 3% for root + * Reserve some for root */ if (!cap_sys_admin) - free -= free / 32; + free -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10); if (free > pages) return 0; @@ -177,16 +182,19 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) allowed = (totalram_pages - hugetlb_total_pages()) * sysctl_overcommit_ratio / 100; /* - * Leave the last 3% for root + * Reserve some for root */ if (!cap_sys_admin) - allowed -= allowed / 32; + allowed -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10); allowed += total_swap_pages; - /* Don't let a single process grow too big: - leave 3% of the size of this process for other processes */ - if (mm) - allowed -= mm->total_vm / 32; + /* + * Don't let a single process grow so big a user can't recover + */ + if (mm) { + reserve = sysctl_user_reserve_kbytes >> (PAGE_SHIFT - 10); + allowed -= min(mm->total_vm / 32, reserve); + } if (percpu_counter_read_positive(&vm_committed_as) < allowed) return 0; @@ -543,6 +551,34 @@ static int find_vma_links(struct mm_struct *mm, unsigned long addr, return 0; } +static unsigned long count_vma_pages_range(struct mm_struct *mm, + unsigned long addr, unsigned long end) +{ + unsigned long nr_pages = 0; + struct vm_area_struct *vma; + + /* Find first overlaping mapping */ + vma = find_vma_intersection(mm, addr, end); + if (!vma) + return 0; + + nr_pages = (min(end, vma->vm_end) - + max(addr, vma->vm_start)) >> PAGE_SHIFT; + + /* Iterate over the rest of the overlaps */ + for (vma = vma->vm_next; vma; vma = vma->vm_next) { + unsigned long overlap_len; + + if (vma->vm_start > end) + break; + + overlap_len = min(end, vma->vm_end) - vma->vm_start; + nr_pages += overlap_len >> PAGE_SHIFT; + } + + return nr_pages; +} + void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma, struct rb_node **rb_link, struct rb_node *rb_parent) { @@ -829,7 +865,7 @@ again: remove_next = 1 + (end > next->vm_end); if (next->anon_vma) anon_vma_merge(vma, next); mm->map_count--; - mpol_put(vma_policy(next)); + vma_set_policy(vma, vma_policy(next)); kmem_cache_free(vm_area_cachep, next); /* * In mprotect's case 6 (see comments on vma_merge), @@ -1435,6 +1471,23 @@ unsigned long mmap_region(struct file *file, unsigned long addr, unsigned long charged = 0; struct inode *inode = file ? file_inode(file) : NULL; + /* Check against address space limit. */ + if (!may_expand_vm(mm, len >> PAGE_SHIFT)) { + unsigned long nr_pages; + + /* + * MAP_FIXED may remove pages of mappings that intersects with + * requested mapping. Account for the pages it would unmap. + */ + if (!(vm_flags & MAP_FIXED)) + return -ENOMEM; + + nr_pages = count_vma_pages_range(mm, addr, addr + len); + + if (!may_expand_vm(mm, (len >> PAGE_SHIFT) - nr_pages)) + return -ENOMEM; + } + /* Clear old maps */ error = -ENOMEM; munmap_back: @@ -1444,10 +1497,6 @@ munmap_back: goto munmap_back; } - /* Check against address space limit. */ - if (!may_expand_vm(mm, len >> PAGE_SHIFT)) - return -ENOMEM; - /* * Private writable mapping: check memory availability */ @@ -1818,15 +1867,6 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, } #endif -void arch_unmap_area(struct mm_struct *mm, unsigned long addr) -{ - /* - * Is this a new hole at the lowest possible address? - */ - if (addr >= TASK_UNMAPPED_BASE && addr < mm->free_area_cache) - mm->free_area_cache = addr; -} - /* * This mmap-allocator allocates new areas top-down from below the * stack's low limit (the base): @@ -1883,19 +1923,6 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, } #endif -void arch_unmap_area_topdown(struct mm_struct *mm, unsigned long addr) -{ - /* - * Is this a new hole at the highest possible address? - */ - if (addr > mm->free_area_cache) - mm->free_area_cache = addr; - - /* dont allow allocations above current base */ - if (mm->free_area_cache > mm->mmap_base) - mm->free_area_cache = mm->mmap_base; -} - unsigned long get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) @@ -1935,9 +1962,6 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) { struct vm_area_struct *vma = NULL; - if (WARN_ON_ONCE(!mm)) /* Remove this in linux-3.6 */ - return NULL; - /* Check the cache first. */ /* (Cache hit rate is typically around 35%.) */ vma = ACCESS_ONCE(mm->mmap_cache); @@ -2305,7 +2329,7 @@ static void unmap_region(struct mm_struct *mm, update_hiwater_rss(mm); unmap_vmas(&tlb, vma, start, end); free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS, - next ? next->vm_start : 0); + next ? next->vm_start : USER_PGTABLES_CEILING); tlb_finish_mmu(&tlb, start, end); } @@ -2319,7 +2343,6 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma, { struct vm_area_struct **insertion_point; struct vm_area_struct *tail_vma = NULL; - unsigned long addr; insertion_point = (prev ? &prev->vm_next : &mm->mmap); vma->vm_prev = NULL; @@ -2336,11 +2359,6 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma, } else mm->highest_vm_end = prev ? prev->vm_end : 0; tail_vma->vm_next = NULL; - if (mm->unmap_area == arch_unmap_area) - addr = prev ? prev->vm_end : mm->mmap_base; - else - addr = vma ? vma->vm_start : mm->mmap_base; - mm->unmap_area(mm, addr); mm->mmap_cache = NULL; /* Kill the cache. */ } @@ -2685,7 +2703,7 @@ void exit_mmap(struct mm_struct *mm) /* Use -1 here to ensure all VMAs in the mm are unmapped */ unmap_vmas(&tlb, vma, 0, -1); - free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0); + free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING); tlb_finish_mmu(&tlb, 0, -1); /* @@ -3097,3 +3115,115 @@ void __init mmap_init(void) ret = percpu_counter_init(&vm_committed_as, 0); VM_BUG_ON(ret); } + +/* + * Initialise sysctl_user_reserve_kbytes. + * + * This is intended to prevent a user from starting a single memory hogging + * process, such that they cannot recover (kill the hog) in OVERCOMMIT_NEVER + * mode. + * + * The default value is min(3% of free memory, 128MB) + * 128MB is enough to recover with sshd/login, bash, and top/kill. + */ +static int init_user_reserve(void) +{ + unsigned long free_kbytes; + + free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); + + sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17); + return 0; +} +module_init(init_user_reserve) + +/* + * Initialise sysctl_admin_reserve_kbytes. + * + * The purpose of sysctl_admin_reserve_kbytes is to allow the sys admin + * to log in and kill a memory hogging process. + * + * Systems with more than 256MB will reserve 8MB, enough to recover + * with sshd, bash, and top in OVERCOMMIT_GUESS. Smaller systems will + * only reserve 3% of free pages by default. + */ +static int init_admin_reserve(void) +{ + unsigned long free_kbytes; + + free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); + + sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13); + return 0; +} +module_init(init_admin_reserve) + +/* + * Reinititalise user and admin reserves if memory is added or removed. + * + * The default user reserve max is 128MB, and the default max for the + * admin reserve is 8MB. These are usually, but not always, enough to + * enable recovery from a memory hogging process using login/sshd, a shell, + * and tools like top. It may make sense to increase or even disable the + * reserve depending on the existence of swap or variations in the recovery + * tools. So, the admin may have changed them. + * + * If memory is added and the reserves have been eliminated or increased above + * the default max, then we'll trust the admin. + * + * If memory is removed and there isn't enough free memory, then we + * need to reset the reserves. + * + * Otherwise keep the reserve set by the admin. + */ +static int reserve_mem_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + unsigned long tmp, free_kbytes; + + switch (action) { + case MEM_ONLINE: + /* Default max is 128MB. Leave alone if modified by operator. */ + tmp = sysctl_user_reserve_kbytes; + if (0 < tmp && tmp < (1UL << 17)) + init_user_reserve(); + + /* Default max is 8MB. Leave alone if modified by operator. */ + tmp = sysctl_admin_reserve_kbytes; + if (0 < tmp && tmp < (1UL << 13)) + init_admin_reserve(); + + break; + case MEM_OFFLINE: + free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); + + if (sysctl_user_reserve_kbytes > free_kbytes) { + init_user_reserve(); + pr_info("vm.user_reserve_kbytes reset to %lu\n", + sysctl_user_reserve_kbytes); + } + + if (sysctl_admin_reserve_kbytes > free_kbytes) { + init_admin_reserve(); + pr_info("vm.admin_reserve_kbytes reset to %lu\n", + sysctl_admin_reserve_kbytes); + } + break; + default: + break; + } + return NOTIFY_OK; +} + +static struct notifier_block reserve_mem_nb = { + .notifier_call = reserve_mem_notifier, +}; + +int __meminit init_reserve_notifier(void) +{ + if (register_hotmemory_notifier(&reserve_mem_nb)) + printk("Failed registering memory add/remove notifier for admin reserve"); + + return 0; +} +module_init(init_reserve_notifier) diff --git a/mm/mmu_context.c b/mm/mmu_context.c index 3dcfaf4ed355..8a8cd0265e52 100644 --- a/mm/mmu_context.c +++ b/mm/mmu_context.c @@ -14,9 +14,6 @@ * use_mm * Makes the calling kernel thread take on the specified * mm context. - * Called by the retry thread execute retries within the - * iocb issuer's mm context, so that copy_from/to_user - * operations work seamlessly for aio. * (Note: this routine is intended to be called only * from a kernel thread context) */ diff --git a/mm/mremap.c b/mm/mremap.c index 463a25705ac6..3708655378e9 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -126,7 +126,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, continue; pte = ptep_get_and_clear(mm, old_addr, old_pte); pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr); - set_pte_at(mm, new_addr, new_pte, pte); + set_pte_at(mm, new_addr, new_pte, pte_mksoft_dirty(pte)); } arch_leave_lazy_mmu_mode(); diff --git a/mm/nommu.c b/mm/nommu.c index 02ed648585e7..247ef84e5639 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -63,6 +63,8 @@ int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ int sysctl_overcommit_ratio = 50; /* default is 50% */ int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS; +unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */ +unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */ int heap_stack_gap = 0; atomic_long_t mmap_pages_allocated; @@ -228,8 +230,7 @@ int follow_pfn(struct vm_area_struct *vma, unsigned long address, } EXPORT_SYMBOL(follow_pfn); -DEFINE_RWLOCK(vmlist_lock); -struct vm_struct *vmlist; +LIST_HEAD(vmap_area_list); void vfree(const void *addr) { @@ -1858,10 +1859,6 @@ unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr, return -ENOMEM; } -void arch_unmap_area(struct mm_struct *mm, unsigned long addr) -{ -} - void unmap_mapping_range(struct address_space *mapping, loff_t const holebegin, loff_t const holelen, int even_cows) @@ -1887,7 +1884,7 @@ EXPORT_SYMBOL(unmap_mapping_range); */ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) { - unsigned long free, allowed; + unsigned long free, allowed, reserve; vm_acct_memory(pages); @@ -1928,10 +1925,10 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) free -= totalreserve_pages; /* - * Leave the last 3% for root + * Reserve some for root */ if (!cap_sys_admin) - free -= free / 32; + free -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10); if (free > pages) return 0; @@ -1941,16 +1938,19 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) allowed = totalram_pages * sysctl_overcommit_ratio / 100; /* - * Leave the last 3% for root + * Reserve some 3% for root */ if (!cap_sys_admin) - allowed -= allowed / 32; + allowed -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10); allowed += total_swap_pages; - /* Don't let a single process grow too big: - leave 3% of the size of this process for other processes */ - if (mm) - allowed -= mm->total_vm / 32; + /* + * Don't let a single process grow so big a user can't recover + */ + if (mm) { + reserve = sysctl_user_reserve_kbytes >> (PAGE_SHIFT - 10); + allowed -= min(mm->total_vm / 32, reserve); + } if (percpu_counter_read_positive(&vm_committed_as) < allowed) return 0; @@ -2112,3 +2112,45 @@ int nommu_shrink_inode_mappings(struct inode *inode, size_t size, up_write(&nommu_region_sem); return 0; } + +/* + * Initialise sysctl_user_reserve_kbytes. + * + * This is intended to prevent a user from starting a single memory hogging + * process, such that they cannot recover (kill the hog) in OVERCOMMIT_NEVER + * mode. + * + * The default value is min(3% of free memory, 128MB) + * 128MB is enough to recover with sshd/login, bash, and top/kill. + */ +static int __meminit init_user_reserve(void) +{ + unsigned long free_kbytes; + + free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); + + sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17); + return 0; +} +module_init(init_user_reserve) + +/* + * Initialise sysctl_admin_reserve_kbytes. + * + * The purpose of sysctl_admin_reserve_kbytes is to allow the sys admin + * to log in and kill a memory hogging process. + * + * Systems with more than 256MB will reserve 8MB, enough to recover + * with sshd, bash, and top in OVERCOMMIT_GUESS. Smaller systems will + * only reserve 3% of free pages by default. + */ +static int __meminit init_admin_reserve(void) +{ + unsigned long free_kbytes; + + free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); + + sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13); + return 0; +} +module_init(init_admin_reserve) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index efe68148f621..4514ad7415c3 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2311,10 +2311,6 @@ void wait_for_stable_page(struct page *page) if (!bdi_cap_stable_pages_required(bdi)) return; -#ifdef CONFIG_NEED_BOUNCE_POOL - if (mapping->host->i_sb->s_flags & MS_SNAP_STABLE) - return; -#endif /* CONFIG_NEED_BOUNCE_POOL */ wait_on_page_writeback(page); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7ff1536f01b8..76350aacdfe5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -58,6 +58,7 @@ #include <linux/prefetch.h> #include <linux/migrate.h> #include <linux/page-debug-flags.h> +#include <linux/hugetlb.h> #include <linux/sched/rt.h> #include <asm/tlbflush.h> @@ -1941,9 +1942,24 @@ zonelist_scan: continue; default: /* did we reclaim enough */ - if (!zone_watermark_ok(zone, order, mark, + if (zone_watermark_ok(zone, order, mark, classzone_idx, alloc_flags)) + goto try_this_zone; + + /* + * Failed to reclaim enough to meet watermark. + * Only mark the zone full if checking the min + * watermark or if we failed to reclaim just + * 1<<order pages or else the page allocator + * fastpath will prematurely mark zones full + * when the watermark is between the low and + * min watermarks. + */ + if (((alloc_flags & ALLOC_WMARK_MASK) == ALLOC_WMARK_MIN) || + ret == ZONE_RECLAIM_SOME) goto this_zone_full; + + continue; } } @@ -2003,6 +2019,13 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...) return; /* + * Walking all memory to count page types is very expensive and should + * be inhibited in non-blockable contexts. + */ + if (!(gfp_mask & __GFP_WAIT)) + filter |= SHOW_MEM_FILTER_PAGE_COUNT; + + /* * This documents exceptions given to allocations in certain * contexts that are allowed to allocate outside current's set * of allowed nodes. @@ -3106,6 +3129,8 @@ void show_free_areas(unsigned int filter) printk("= %lukB\n", K(total)); } + hugetlb_show_meminfo(); + printk("%ld total pagecache pages\n", global_page_state(NR_FILE_PAGES)); show_swap_cache_info(); @@ -3901,8 +3926,11 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, * exist on hotplugged memory. */ if (context == MEMMAP_EARLY) { - if (!early_pfn_valid(pfn)) + if (!early_pfn_valid(pfn)) { + pfn = ALIGN(pfn + MAX_ORDER_NR_PAGES, + MAX_ORDER_NR_PAGES) - 1; continue; + } if (!early_pfn_in_nid(pfn, nid)) continue; } @@ -4162,10 +4190,23 @@ int __meminit __early_pfn_to_nid(unsigned long pfn) { unsigned long start_pfn, end_pfn; int i, nid; + /* + * NOTE: The following SMP-unsafe globals are only used early in boot + * when the kernel is running single-threaded. + */ + static unsigned long __meminitdata last_start_pfn, last_end_pfn; + static int __meminitdata last_nid; + + if (last_start_pfn <= pfn && pfn < last_end_pfn) + return last_nid; for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) - if (start_pfn <= pfn && pfn < end_pfn) + if (start_pfn <= pfn && pfn < end_pfn) { + last_start_pfn = start_pfn; + last_end_pfn = end_pfn; + last_nid = nid; return nid; + } /* This is a memory hole */ return -1; } @@ -4711,7 +4752,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size, /* * Figure out the number of possible node ids. */ -static void __init setup_nr_node_ids(void) +void __init setup_nr_node_ids(void) { unsigned int node; unsigned int highest = 0; @@ -4720,10 +4761,6 @@ static void __init setup_nr_node_ids(void) highest = node; nr_node_ids = highest + 1; } -#else -static inline void setup_nr_node_ids(void) -{ -} #endif /** @@ -5114,6 +5151,35 @@ early_param("movablecore", cmdline_parse_movablecore); #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ +unsigned long free_reserved_area(unsigned long start, unsigned long end, + int poison, char *s) +{ + unsigned long pages, pos; + + pos = start = PAGE_ALIGN(start); + end &= PAGE_MASK; + for (pages = 0; pos < end; pos += PAGE_SIZE, pages++) { + if (poison) + memset((void *)pos, poison, PAGE_SIZE); + free_reserved_page(virt_to_page(pos)); + } + + if (pages && s) + pr_info("Freeing %s memory: %ldK (%lx - %lx)\n", + s, pages << (PAGE_SHIFT - 10), start, end); + + return pages; +} + +#ifdef CONFIG_HIGHMEM +void free_highmem_page(struct page *page) +{ + __free_reserved_page(page); + totalram_pages++; + totalhigh_pages++; +} +#endif + /** * set_dma_reserve - set the specified number of pages reserved in the first zone * @new_dma_reserve: The number of pages to mark reserved diff --git a/mm/page_io.c b/mm/page_io.c index 8d3c0c088105..50bb6efa4976 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -20,6 +20,7 @@ #include <linux/buffer_head.h> #include <linux/writeback.h> #include <linux/frontswap.h> +#include <linux/aio.h> #include <asm/pgtable.h> static struct bio *get_swap_bio(gfp_t gfp_flags, @@ -41,7 +42,8 @@ static struct bio *get_swap_bio(gfp_t gfp_flags, return bio; } -static void end_swap_bio_write(struct bio *bio, int err) +void end_swap_bio_write(struct bio *bio, int err, + struct batch_complete *batch) { const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct page *page = bio->bi_io_vec[0].bv_page; @@ -67,7 +69,7 @@ static void end_swap_bio_write(struct bio *bio, int err) bio_put(bio); } -void end_swap_bio_read(struct bio *bio, int err) +void end_swap_bio_read(struct bio *bio, int err, struct batch_complete *batch) { const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct page *page = bio->bi_io_vec[0].bv_page; @@ -184,9 +186,7 @@ bad_bmap: */ int swap_writepage(struct page *page, struct writeback_control *wbc) { - struct bio *bio; - int ret = 0, rw = WRITE; - struct swap_info_struct *sis = page_swap_info(page); + int ret = 0; if (try_to_free_swap(page)) { unlock_page(page); @@ -198,6 +198,18 @@ int swap_writepage(struct page *page, struct writeback_control *wbc) end_page_writeback(page); goto out; } + ret = __swap_writepage(page, wbc, end_swap_bio_write); +out: + return ret; +} + +int __swap_writepage(struct page *page, struct writeback_control *wbc, + void (*end_write_func)(struct bio *bio, int err, + struct batch_complete *batch)) +{ + struct bio *bio; + int ret = 0, rw = WRITE; + struct swap_info_struct *sis = page_swap_info(page); if (sis->flags & SWP_FILE) { struct kiocb kiocb; @@ -225,7 +237,7 @@ int swap_writepage(struct page *page, struct writeback_control *wbc) return ret; } - bio = get_swap_bio(GFP_NOIO, page, end_swap_bio_write); + bio = get_swap_bio(GFP_NOIO, page, end_write_func); if (bio == NULL) { set_page_dirty(page); unlock_page(page); diff --git a/mm/rmap.c b/mm/rmap.c index 807c96bf0dc6..6280da86b5d6 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1513,6 +1513,9 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags) unsigned long max_nl_size = 0; unsigned int mapcount; + if (PageHuge(page)) + pgoff = page->index << compound_order(page); + mutex_lock(&mapping->i_mmap_mutex); vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { unsigned long address = vma_address(page, vma); diff --git a/mm/shmem.c b/mm/shmem.c index 1c44af71fcf5..5e6a8422658b 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -25,11 +25,13 @@ #include <linux/init.h> #include <linux/vfs.h> #include <linux/mount.h> +#include <linux/ramfs.h> #include <linux/pagemap.h> #include <linux/file.h> #include <linux/mm.h> #include <linux/export.h> #include <linux/swap.h> +#include <linux/aio.h> static struct vfsmount *shm_mnt; @@ -2830,8 +2832,6 @@ out4: * effectively equivalent, but much lighter weight. */ -#include <linux/ramfs.h> - static struct file_system_type shmem_fs_type = { .name = "tmpfs", .mount = ramfs_mount, @@ -2931,11 +2931,9 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags d_instantiate(path.dentry, inode); inode->i_size = size; clear_nlink(inode); /* It is unlinked */ -#ifndef CONFIG_MMU res = ERR_PTR(ramfs_nommu_expand_for_mapping(inode, size)); if (IS_ERR(res)) goto put_dentry; -#endif res = alloc_file(&path, FMODE_WRITE | FMODE_READ, &shmem_file_operations); diff --git a/mm/slub.c b/mm/slub.c index 2f6ea01d79c1..57707f01bcfb 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -18,6 +18,7 @@ #include <linux/slab.h> #include "slab.h" #include <linux/proc_fs.h> +#include <linux/notifier.h> #include <linux/seq_file.h> #include <linux/kmemcheck.h> #include <linux/cpu.h> @@ -3425,7 +3426,6 @@ int kmem_cache_shrink(struct kmem_cache *s) } EXPORT_SYMBOL(kmem_cache_shrink); -#if defined(CONFIG_MEMORY_HOTPLUG) static int slab_mem_going_offline_callback(void *arg) { struct kmem_cache *s; @@ -3540,7 +3540,10 @@ static int slab_memory_callback(struct notifier_block *self, return ret; } -#endif /* CONFIG_MEMORY_HOTPLUG */ +static struct notifier_block slab_memory_callback_nb = { + .notifier_call = slab_memory_callback, + .priority = SLAB_CALLBACK_PRI, +}; /******************************************************************** * Basic setup of slabs @@ -3597,7 +3600,7 @@ void __init kmem_cache_init(void) create_boot_cache(kmem_cache_node, "kmem_cache_node", sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN); - hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI); + register_hotmemory_notifier(&slab_memory_callback_nb); /* Able to allocate the per node structures */ slab_state = PARTIAL; diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 1b7e22ab9b09..27eeab3be757 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -53,10 +53,12 @@ void * __meminit vmemmap_alloc_block(unsigned long size, int node) struct page *page; if (node_state(node, N_HIGH_MEMORY)) - page = alloc_pages_node(node, - GFP_KERNEL | __GFP_ZERO, get_order(size)); + page = alloc_pages_node( + node, GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT, + get_order(size)); else - page = alloc_pages(GFP_KERNEL | __GFP_ZERO, + page = alloc_pages( + GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT, get_order(size)); if (page) return page_address(page); @@ -145,11 +147,10 @@ pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node) return pgd; } -int __meminit vmemmap_populate_basepages(struct page *start_page, - unsigned long size, int node) +int __meminit vmemmap_populate_basepages(unsigned long start, + unsigned long end, int node) { - unsigned long addr = (unsigned long)start_page; - unsigned long end = (unsigned long)(start_page + size); + unsigned long addr = start; pgd_t *pgd; pud_t *pud; pmd_t *pmd; @@ -176,9 +177,15 @@ int __meminit vmemmap_populate_basepages(struct page *start_page, struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid) { - struct page *map = pfn_to_page(pnum * PAGES_PER_SECTION); - int error = vmemmap_populate(map, PAGES_PER_SECTION, nid); - if (error) + unsigned long start; + unsigned long end; + struct page *map; + + map = pfn_to_page(pnum * PAGES_PER_SECTION); + start = (unsigned long)map; + end = (unsigned long)(map + PAGES_PER_SECTION); + + if (vmemmap_populate(start, end, nid)) return NULL; return map; diff --git a/mm/sparse.c b/mm/sparse.c index 7ca6dc847947..1c91f0d3f6ab 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -615,12 +615,20 @@ static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid, } static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages) { - vmemmap_free(memmap, nr_pages); + unsigned long start = (unsigned long)memmap; + unsigned long end = (unsigned long)(memmap + nr_pages); + + vmemmap_free(start, end); } +#ifdef CONFIG_MEMORY_HOTREMOVE static void free_map_bootmem(struct page *memmap, unsigned long nr_pages) { - vmemmap_free(memmap, nr_pages); + unsigned long start = (unsigned long)memmap; + unsigned long end = (unsigned long)(memmap + nr_pages); + + vmemmap_free(start, end); } +#endif /* CONFIG_MEMORY_HOTREMOVE */ #else static struct page *__kmalloc_section_memmap(unsigned long nr_pages) { @@ -658,6 +666,7 @@ static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages) get_order(sizeof(struct page) * nr_pages)); } +#ifdef CONFIG_MEMORY_HOTREMOVE static void free_map_bootmem(struct page *memmap, unsigned long nr_pages) { unsigned long maps_section_nr, removing_section_nr, i; @@ -684,40 +693,9 @@ static void free_map_bootmem(struct page *memmap, unsigned long nr_pages) put_page_bootmem(page); } } +#endif /* CONFIG_MEMORY_HOTREMOVE */ #endif /* CONFIG_SPARSEMEM_VMEMMAP */ -static void free_section_usemap(struct page *memmap, unsigned long *usemap) -{ - struct page *usemap_page; - unsigned long nr_pages; - - if (!usemap) - return; - - usemap_page = virt_to_page(usemap); - /* - * Check to see if allocation came from hot-plug-add - */ - if (PageSlab(usemap_page) || PageCompound(usemap_page)) { - kfree(usemap); - if (memmap) - __kfree_section_memmap(memmap, PAGES_PER_SECTION); - return; - } - - /* - * The usemap came from bootmem. This is packed with other usemaps - * on the section which has pgdat at boot time. Just keep it as is now. - */ - - if (memmap) { - nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page)) - >> PAGE_SHIFT; - - free_map_bootmem(memmap, nr_pages); - } -} - /* * returns the number of sections whose mem_maps were properly * set. If this is <=0, then that means that the passed-in @@ -794,6 +772,39 @@ static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages) } #endif +#ifdef CONFIG_MEMORY_HOTREMOVE +static void free_section_usemap(struct page *memmap, unsigned long *usemap) +{ + struct page *usemap_page; + unsigned long nr_pages; + + if (!usemap) + return; + + usemap_page = virt_to_page(usemap); + /* + * Check to see if allocation came from hot-plug-add + */ + if (PageSlab(usemap_page) || PageCompound(usemap_page)) { + kfree(usemap); + if (memmap) + __kfree_section_memmap(memmap, PAGES_PER_SECTION); + return; + } + + /* + * The usemap came from bootmem. This is packed with other usemaps + * on the section which has pgdat at boot time. Just keep it as is now. + */ + + if (memmap) { + nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page)) + >> PAGE_SHIFT; + + free_map_bootmem(memmap, nr_pages); + } +} + void sparse_remove_one_section(struct zone *zone, struct mem_section *ms) { struct page *memmap = NULL; @@ -813,4 +824,5 @@ void sparse_remove_one_section(struct zone *zone, struct mem_section *ms) clear_hwpoisoned_pages(memmap, PAGES_PER_SECTION); free_section_usemap(memmap, usemap); } -#endif +#endif /* CONFIG_MEMORY_HOTREMOVE */ +#endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/mm/swap.c b/mm/swap.c index 8a529a01e8fc..dfd7d71d6841 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -30,6 +30,7 @@ #include <linux/backing-dev.h> #include <linux/memcontrol.h> #include <linux/gfp.h> +#include <linux/uio.h> #include "internal.h" @@ -737,7 +738,7 @@ EXPORT_SYMBOL(__pagevec_release); #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* used by __split_huge_page_refcount() */ void lru_add_page_tail(struct page *page, struct page *page_tail, - struct lruvec *lruvec) + struct lruvec *lruvec, struct list_head *list) { int uninitialized_var(active); enum lru_list lru; @@ -749,7 +750,8 @@ void lru_add_page_tail(struct page *page, struct page *page_tail, VM_BUG_ON(NR_CPUS != 1 && !spin_is_locked(&lruvec_zone(lruvec)->lru_lock)); - SetPageLRU(page_tail); + if (!list) + SetPageLRU(page_tail); if (page_evictable(page_tail)) { if (PageActive(page)) { @@ -767,7 +769,11 @@ void lru_add_page_tail(struct page *page, struct page *page_tail, if (likely(PageLRU(page))) list_add_tail(&page_tail->lru, &page->lru); - else { + else if (list) { + /* page reclaim is reclaiming a huge page */ + get_page(page_tail); + list_add_tail(&page_tail->lru, list); + } else { struct list_head *list_head; /* * Head page has not yet been counted, as an hpage, diff --git a/mm/swap_state.c b/mm/swap_state.c index 7efcf1525921..b3d40dcf3624 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -78,7 +78,7 @@ void show_swap_cache_info(void) * __add_to_swap_cache resembles add_to_page_cache_locked on swapper_space, * but sets SwapCache flag and private instead of mapping and index. */ -static int __add_to_swap_cache(struct page *page, swp_entry_t entry) +int __add_to_swap_cache(struct page *page, swp_entry_t entry) { int error; struct address_space *address_space; @@ -160,7 +160,7 @@ void __delete_from_swap_cache(struct page *page) * Allocate swap space for the page and add the page to the * swap cache. Caller needs to hold the page lock. */ -int add_to_swap(struct page *page) +int add_to_swap(struct page *page, struct list_head *list) { swp_entry_t entry; int err; @@ -173,7 +173,7 @@ int add_to_swap(struct page *page) return 0; if (unlikely(PageTransHuge(page))) - if (unlikely(split_huge_page(page))) { + if (unlikely(split_huge_page_to_list(page, list))) { swapcache_free(entry, NULL); return 0; } diff --git a/mm/swapfile.c b/mm/swapfile.c index a1f7772a01fc..6c340d908b27 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1509,8 +1509,7 @@ static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span) } static void _enable_swap_info(struct swap_info_struct *p, int prio, - unsigned char *swap_map, - unsigned long *frontswap_map) + unsigned char *swap_map) { int i, prev; @@ -1519,7 +1518,6 @@ static void _enable_swap_info(struct swap_info_struct *p, int prio, else p->prio = --least_priority; p->swap_map = swap_map; - frontswap_map_set(p, frontswap_map); p->flags |= SWP_WRITEOK; atomic_long_add(p->pages, &nr_swap_pages); total_swap_pages += p->pages; @@ -1542,10 +1540,10 @@ static void enable_swap_info(struct swap_info_struct *p, int prio, unsigned char *swap_map, unsigned long *frontswap_map) { + frontswap_init(p->type, frontswap_map); spin_lock(&swap_lock); spin_lock(&p->lock); - _enable_swap_info(p, prio, swap_map, frontswap_map); - frontswap_init(p->type); + _enable_swap_info(p, prio, swap_map); spin_unlock(&p->lock); spin_unlock(&swap_lock); } @@ -1554,7 +1552,7 @@ static void reinsert_swap_info(struct swap_info_struct *p) { spin_lock(&swap_lock); spin_lock(&p->lock); - _enable_swap_info(p, p->prio, p->swap_map, frontswap_map_get(p)); + _enable_swap_info(p, p->prio, p->swap_map); spin_unlock(&p->lock); spin_unlock(&swap_lock); } @@ -1563,6 +1561,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) { struct swap_info_struct *p = NULL; unsigned char *swap_map; + unsigned long *frontswap_map; struct file *swap_file, *victim; struct address_space *mapping; struct inode *inode; @@ -1662,12 +1661,14 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) swap_map = p->swap_map; p->swap_map = NULL; p->flags = 0; - frontswap_invalidate_area(type); + frontswap_map = frontswap_map_get(p); + frontswap_map_set(p, NULL); spin_unlock(&p->lock); spin_unlock(&swap_lock); + frontswap_invalidate_area(type); mutex_unlock(&swapon_mutex); vfree(swap_map); - vfree(frontswap_map_get(p)); + vfree(frontswap_map); /* Destroy swap account informatin */ swap_cgroup_swapoff(type); @@ -2120,7 +2121,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) if (p->bdev) { if (blk_queue_nonrot(bdev_get_queue(p->bdev))) { p->flags |= SWP_SOLIDSTATE; - p->cluster_next = 1 + (random32() % p->highest_bit); + p->cluster_next = 1 + (prandom_u32() % p->highest_bit); } if ((swap_flags & SWAP_FLAG_DISCARD) && discard_swap(p) == 0) p->flags |= SWP_DISCARDABLE; diff --git a/mm/util.c b/mm/util.c index ab1424dbe2e6..7441c41d00f6 100644 --- a/mm/util.c +++ b/mm/util.c @@ -295,7 +295,6 @@ void arch_pick_mmap_layout(struct mm_struct *mm) { mm->mmap_base = TASK_UNMAPPED_BASE; mm->get_unmapped_area = arch_get_unmapped_area; - mm->unmap_area = arch_unmap_area; } #endif diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 0f751f2068c3..72043d6c88c0 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -249,19 +249,9 @@ EXPORT_SYMBOL(vmalloc_to_pfn); #define VM_LAZY_FREEING 0x02 #define VM_VM_AREA 0x04 -struct vmap_area { - unsigned long va_start; - unsigned long va_end; - unsigned long flags; - struct rb_node rb_node; /* address sorted rbtree */ - struct list_head list; /* address sorted list */ - struct list_head purge_list; /* "lazy purge" list */ - struct vm_struct *vm; - struct rcu_head rcu_head; -}; - static DEFINE_SPINLOCK(vmap_area_lock); -static LIST_HEAD(vmap_area_list); +/* Export for kexec only */ +LIST_HEAD(vmap_area_list); static struct rb_root vmap_area_root = RB_ROOT; /* The vmap cache globals are protected by vmap_area_lock */ @@ -313,7 +303,7 @@ static void __insert_vmap_area(struct vmap_area *va) rb_link_node(&va->rb_node, parent, p); rb_insert_color(&va->rb_node, &vmap_area_root); - /* address-sort this list so it is usable like the vmlist */ + /* address-sort this list */ tmp = rb_prev(&va->rb_node); if (tmp) { struct vmap_area *prev; @@ -1125,6 +1115,7 @@ void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t pro } EXPORT_SYMBOL(vm_map_ram); +static struct vm_struct *vmlist __initdata; /** * vm_area_add_early - add vmap area early during boot * @vm: vm_struct to add @@ -1283,41 +1274,35 @@ int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages) } EXPORT_SYMBOL_GPL(map_vm_area); -/*** Old vmalloc interfaces ***/ -DEFINE_RWLOCK(vmlist_lock); -struct vm_struct *vmlist; - static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va, unsigned long flags, const void *caller) { + spin_lock(&vmap_area_lock); vm->flags = flags; vm->addr = (void *)va->va_start; vm->size = va->va_end - va->va_start; vm->caller = caller; va->vm = vm; va->flags |= VM_VM_AREA; + spin_unlock(&vmap_area_lock); } -static void insert_vmalloc_vmlist(struct vm_struct *vm) +static void clear_vm_unlist(struct vm_struct *vm) { - struct vm_struct *tmp, **p; - + /* + * Before removing VM_UNLIST, + * we should make sure that vm has proper values. + * Pair with smp_rmb() in show_numa_info(). + */ + smp_wmb(); vm->flags &= ~VM_UNLIST; - write_lock(&vmlist_lock); - for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) { - if (tmp->addr >= vm->addr) - break; - } - vm->next = *p; - *p = vm; - write_unlock(&vmlist_lock); } static void insert_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va, unsigned long flags, const void *caller) { setup_vmalloc_vm(vm, va, flags, caller); - insert_vmalloc_vmlist(vm); + clear_vm_unlist(vm); } static struct vm_struct *__get_vm_area_node(unsigned long size, @@ -1360,10 +1345,9 @@ static struct vm_struct *__get_vm_area_node(unsigned long size, /* * When this function is called from __vmalloc_node_range, - * we do not add vm_struct to vmlist here to avoid - * accessing uninitialized members of vm_struct such as - * pages and nr_pages fields. They will be set later. - * To distinguish it from others, we use a VM_UNLIST flag. + * we add VM_UNLIST flag to avoid accessing uninitialized + * members of vm_struct such as pages and nr_pages fields. + * They will be set later. */ if (flags & VM_UNLIST) setup_vmalloc_vm(area, va, flags, caller); @@ -1447,19 +1431,10 @@ struct vm_struct *remove_vm_area(const void *addr) if (va && va->flags & VM_VM_AREA) { struct vm_struct *vm = va->vm; - if (!(vm->flags & VM_UNLIST)) { - struct vm_struct *tmp, **p; - /* - * remove from list and disallow access to - * this vm_struct before unmap. (address range - * confliction is maintained by vmap.) - */ - write_lock(&vmlist_lock); - for (p = &vmlist; (tmp = *p) != vm; p = &tmp->next) - ; - *p = tmp->next; - write_unlock(&vmlist_lock); - } + spin_lock(&vmap_area_lock); + va->vm = NULL; + va->flags &= ~VM_VM_AREA; + spin_unlock(&vmap_area_lock); vmap_debug_free_range(va->va_start, va->va_end); free_unmap_vmap_area(va); @@ -1680,10 +1655,11 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, return NULL; /* - * In this function, newly allocated vm_struct is not added - * to vmlist at __get_vm_area_node(). so, it is added here. + * In this function, newly allocated vm_struct has VM_UNLIST flag. + * It means that vm_struct is not fully initialized. + * Now, it is fully initialized, so remove this flag here. */ - insert_vmalloc_vmlist(area); + clear_vm_unlist(area); /* * A ref_count = 3 is needed because the vm_struct and vmap_area @@ -2005,7 +1981,8 @@ static int aligned_vwrite(char *buf, char *addr, unsigned long count) long vread(char *buf, char *addr, unsigned long count) { - struct vm_struct *tmp; + struct vmap_area *va; + struct vm_struct *vm; char *vaddr, *buf_start = buf; unsigned long buflen = count; unsigned long n; @@ -2014,10 +1991,17 @@ long vread(char *buf, char *addr, unsigned long count) if ((unsigned long) addr + count < count) count = -(unsigned long) addr; - read_lock(&vmlist_lock); - for (tmp = vmlist; count && tmp; tmp = tmp->next) { - vaddr = (char *) tmp->addr; - if (addr >= vaddr + tmp->size - PAGE_SIZE) + spin_lock(&vmap_area_lock); + list_for_each_entry(va, &vmap_area_list, list) { + if (!count) + break; + + if (!(va->flags & VM_VM_AREA)) + continue; + + vm = va->vm; + vaddr = (char *) vm->addr; + if (addr >= vaddr + vm->size - PAGE_SIZE) continue; while (addr < vaddr) { if (count == 0) @@ -2027,10 +2011,10 @@ long vread(char *buf, char *addr, unsigned long count) addr++; count--; } - n = vaddr + tmp->size - PAGE_SIZE - addr; + n = vaddr + vm->size - PAGE_SIZE - addr; if (n > count) n = count; - if (!(tmp->flags & VM_IOREMAP)) + if (!(vm->flags & VM_IOREMAP)) aligned_vread(buf, addr, n); else /* IOREMAP area is treated as memory hole */ memset(buf, 0, n); @@ -2039,7 +2023,7 @@ long vread(char *buf, char *addr, unsigned long count) count -= n; } finished: - read_unlock(&vmlist_lock); + spin_unlock(&vmap_area_lock); if (buf == buf_start) return 0; @@ -2078,7 +2062,8 @@ finished: long vwrite(char *buf, char *addr, unsigned long count) { - struct vm_struct *tmp; + struct vmap_area *va; + struct vm_struct *vm; char *vaddr; unsigned long n, buflen; int copied = 0; @@ -2088,10 +2073,17 @@ long vwrite(char *buf, char *addr, unsigned long count) count = -(unsigned long) addr; buflen = count; - read_lock(&vmlist_lock); - for (tmp = vmlist; count && tmp; tmp = tmp->next) { - vaddr = (char *) tmp->addr; - if (addr >= vaddr + tmp->size - PAGE_SIZE) + spin_lock(&vmap_area_lock); + list_for_each_entry(va, &vmap_area_list, list) { + if (!count) + break; + + if (!(va->flags & VM_VM_AREA)) + continue; + + vm = va->vm; + vaddr = (char *) vm->addr; + if (addr >= vaddr + vm->size - PAGE_SIZE) continue; while (addr < vaddr) { if (count == 0) @@ -2100,10 +2092,10 @@ long vwrite(char *buf, char *addr, unsigned long count) addr++; count--; } - n = vaddr + tmp->size - PAGE_SIZE - addr; + n = vaddr + vm->size - PAGE_SIZE - addr; if (n > count) n = count; - if (!(tmp->flags & VM_IOREMAP)) { + if (!(vm->flags & VM_IOREMAP)) { aligned_vwrite(buf, addr, n); copied++; } @@ -2112,7 +2104,7 @@ long vwrite(char *buf, char *addr, unsigned long count) count -= n; } finished: - read_unlock(&vmlist_lock); + spin_unlock(&vmap_area_lock); if (!copied) return 0; return buflen; @@ -2519,19 +2511,19 @@ void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) #ifdef CONFIG_PROC_FS static void *s_start(struct seq_file *m, loff_t *pos) - __acquires(&vmlist_lock) + __acquires(&vmap_area_lock) { loff_t n = *pos; - struct vm_struct *v; + struct vmap_area *va; - read_lock(&vmlist_lock); - v = vmlist; - while (n > 0 && v) { + spin_lock(&vmap_area_lock); + va = list_entry((&vmap_area_list)->next, typeof(*va), list); + while (n > 0 && &va->list != &vmap_area_list) { n--; - v = v->next; + va = list_entry(va->list.next, typeof(*va), list); } - if (!n) - return v; + if (!n && &va->list != &vmap_area_list) + return va; return NULL; @@ -2539,16 +2531,20 @@ static void *s_start(struct seq_file *m, loff_t *pos) static void *s_next(struct seq_file *m, void *p, loff_t *pos) { - struct vm_struct *v = p; + struct vmap_area *va = p, *next; ++*pos; - return v->next; + next = list_entry(va->list.next, typeof(*va), list); + if (&next->list != &vmap_area_list) + return next; + + return NULL; } static void s_stop(struct seq_file *m, void *p) - __releases(&vmlist_lock) + __releases(&vmap_area_lock) { - read_unlock(&vmlist_lock); + spin_unlock(&vmap_area_lock); } static void show_numa_info(struct seq_file *m, struct vm_struct *v) @@ -2559,6 +2555,11 @@ static void show_numa_info(struct seq_file *m, struct vm_struct *v) if (!counters) return; + /* Pair with smp_wmb() in clear_vm_unlist() */ + smp_rmb(); + if (v->flags & VM_UNLIST) + return; + memset(counters, 0, nr_node_ids * sizeof(unsigned int)); for (nr = 0; nr < v->nr_pages; nr++) @@ -2572,7 +2573,20 @@ static void show_numa_info(struct seq_file *m, struct vm_struct *v) static int s_show(struct seq_file *m, void *p) { - struct vm_struct *v = p; + struct vmap_area *va = p; + struct vm_struct *v; + + if (va->flags & (VM_LAZY_FREE | VM_LAZY_FREEING)) + return 0; + + if (!(va->flags & VM_VM_AREA)) { + seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram\n", + (void *)va->va_start, (void *)va->va_end, + va->va_end - va->va_start); + return 0; + } + + v = va->vm; seq_printf(m, "0x%pK-0x%pK %7ld", v->addr, v->addr + v->size, v->size); @@ -2645,5 +2659,53 @@ static int __init proc_vmalloc_init(void) return 0; } module_init(proc_vmalloc_init); + +void get_vmalloc_info(struct vmalloc_info *vmi) +{ + struct vmap_area *va; + unsigned long free_area_size; + unsigned long prev_end; + + vmi->used = 0; + vmi->largest_chunk = 0; + + prev_end = VMALLOC_START; + + spin_lock(&vmap_area_lock); + + if (list_empty(&vmap_area_list)) { + vmi->largest_chunk = VMALLOC_TOTAL; + goto out; + } + + list_for_each_entry(va, &vmap_area_list, list) { + unsigned long addr = va->va_start; + + /* + * Some archs keep another range for modules in vmalloc space + */ + if (addr < VMALLOC_START) + continue; + if (addr >= VMALLOC_END) + break; + + if (va->flags & (VM_LAZY_FREE | VM_LAZY_FREEING)) + continue; + + vmi->used += (va->va_end - va->va_start); + + free_area_size = addr - prev_end; + if (vmi->largest_chunk < free_area_size) + vmi->largest_chunk = free_area_size; + + prev_end = va->va_end; + } + + if (VMALLOC_END - prev_end > vmi->largest_chunk) + vmi->largest_chunk = VMALLOC_END - prev_end; + +out: + spin_unlock(&vmap_area_lock); +} #endif diff --git a/mm/vmpressure.c b/mm/vmpressure.c new file mode 100644 index 000000000000..736a6011c2c8 --- /dev/null +++ b/mm/vmpressure.c @@ -0,0 +1,374 @@ +/* + * Linux VM pressure + * + * Copyright 2012 Linaro Ltd. + * Anton Vorontsov <anton.vorontsov@linaro.org> + * + * Based on ideas from Andrew Morton, David Rientjes, KOSAKI Motohiro, + * Leonid Moiseichuk, Mel Gorman, Minchan Kim and Pekka Enberg. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include <linux/cgroup.h> +#include <linux/fs.h> +#include <linux/log2.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/vmstat.h> +#include <linux/eventfd.h> +#include <linux/swap.h> +#include <linux/printk.h> +#include <linux/vmpressure.h> + +/* + * The window size (vmpressure_win) is the number of scanned pages before + * we try to analyze scanned/reclaimed ratio. So the window is used as a + * rate-limit tunable for the "low" level notification, and also for + * averaging the ratio for medium/critical levels. Using small window + * sizes can cause lot of false positives, but too big window size will + * delay the notifications. + * + * As the vmscan reclaimer logic works with chunks which are multiple of + * SWAP_CLUSTER_MAX, it makes sense to use it for the window size as well. + * + * TODO: Make the window size depend on machine size, as we do for vmstat + * thresholds. Currently we set it to 512 pages (2MB for 4KB pages). + */ +static const unsigned long vmpressure_win = SWAP_CLUSTER_MAX * 16; + +/* + * These thresholds are used when we account memory pressure through + * scanned/reclaimed ratio. The current values were chosen empirically. In + * essence, they are percents: the higher the value, the more number + * unsuccessful reclaims there were. + */ +static const unsigned int vmpressure_level_med = 60; +static const unsigned int vmpressure_level_critical = 95; + +/* + * When there are too little pages left to scan, vmpressure() may miss the + * critical pressure as number of pages will be less than "window size". + * However, in that case the vmscan priority will raise fast as the + * reclaimer will try to scan LRUs more deeply. + * + * The vmscan logic considers these special priorities: + * + * prio == DEF_PRIORITY (12): reclaimer starts with that value + * prio <= DEF_PRIORITY - 2 : kswapd becomes somewhat overwhelmed + * prio == 0 : close to OOM, kernel scans every page in an lru + * + * Any value in this range is acceptable for this tunable (i.e. from 12 to + * 0). Current value for the vmpressure_level_critical_prio is chosen + * empirically, but the number, in essence, means that we consider + * critical level when scanning depth is ~10% of the lru size (vmscan + * scans 'lru_size >> prio' pages, so it is actually 12.5%, or one + * eights). + */ +static const unsigned int vmpressure_level_critical_prio = ilog2(100 / 10); + +static struct vmpressure *work_to_vmpressure(struct work_struct *work) +{ + return container_of(work, struct vmpressure, work); +} + +static struct vmpressure *cg_to_vmpressure(struct cgroup *cg) +{ + return css_to_vmpressure(cgroup_subsys_state(cg, mem_cgroup_subsys_id)); +} + +static struct vmpressure *vmpressure_parent(struct vmpressure *vmpr) +{ + struct cgroup *cg = vmpressure_to_css(vmpr)->cgroup; + struct mem_cgroup *memcg = mem_cgroup_from_cont(cg); + + memcg = parent_mem_cgroup(memcg); + if (!memcg) + return NULL; + return memcg_to_vmpressure(memcg); +} + +enum vmpressure_levels { + VMPRESSURE_LOW = 0, + VMPRESSURE_MEDIUM, + VMPRESSURE_CRITICAL, + VMPRESSURE_NUM_LEVELS, +}; + +static const char * const vmpressure_str_levels[] = { + [VMPRESSURE_LOW] = "low", + [VMPRESSURE_MEDIUM] = "medium", + [VMPRESSURE_CRITICAL] = "critical", +}; + +static enum vmpressure_levels vmpressure_level(unsigned long pressure) +{ + if (pressure >= vmpressure_level_critical) + return VMPRESSURE_CRITICAL; + else if (pressure >= vmpressure_level_med) + return VMPRESSURE_MEDIUM; + return VMPRESSURE_LOW; +} + +static enum vmpressure_levels vmpressure_calc_level(unsigned long scanned, + unsigned long reclaimed) +{ + unsigned long scale = scanned + reclaimed; + unsigned long pressure; + + /* + * We calculate the ratio (in percents) of how many pages were + * scanned vs. reclaimed in a given time frame (window). Note that + * time is in VM reclaimer's "ticks", i.e. number of pages + * scanned. This makes it possible to set desired reaction time + * and serves as a ratelimit. + */ + pressure = scale - (reclaimed * scale / scanned); + pressure = pressure * 100 / scale; + + pr_debug("%s: %3lu (s: %lu r: %lu)\n", __func__, pressure, + scanned, reclaimed); + + return vmpressure_level(pressure); +} + +struct vmpressure_event { + struct eventfd_ctx *efd; + enum vmpressure_levels level; + struct list_head node; +}; + +static bool vmpressure_event(struct vmpressure *vmpr, + unsigned long scanned, unsigned long reclaimed) +{ + struct vmpressure_event *ev; + enum vmpressure_levels level; + bool signalled = false; + + level = vmpressure_calc_level(scanned, reclaimed); + + mutex_lock(&vmpr->events_lock); + + list_for_each_entry(ev, &vmpr->events, node) { + if (level >= ev->level) { + eventfd_signal(ev->efd, 1); + signalled = true; + } + } + + mutex_unlock(&vmpr->events_lock); + + return signalled; +} + +static void vmpressure_work_fn(struct work_struct *work) +{ + struct vmpressure *vmpr = work_to_vmpressure(work); + unsigned long scanned; + unsigned long reclaimed; + + /* + * Several contexts might be calling vmpressure(), so it is + * possible that the work was rescheduled again before the old + * work context cleared the counters. In that case we will run + * just after the old work returns, but then scanned might be zero + * here. No need for any locks here since we don't care if + * vmpr->reclaimed is in sync. + */ + if (!vmpr->scanned) + return; + + mutex_lock(&vmpr->sr_lock); + scanned = vmpr->scanned; + reclaimed = vmpr->reclaimed; + vmpr->scanned = 0; + vmpr->reclaimed = 0; + mutex_unlock(&vmpr->sr_lock); + + do { + if (vmpressure_event(vmpr, scanned, reclaimed)) + break; + /* + * If not handled, propagate the event upward into the + * hierarchy. + */ + } while ((vmpr = vmpressure_parent(vmpr))); +} + +/** + * vmpressure() - Account memory pressure through scanned/reclaimed ratio + * @gfp: reclaimer's gfp mask + * @memcg: cgroup memory controller handle + * @scanned: number of pages scanned + * @reclaimed: number of pages reclaimed + * + * This function should be called from the vmscan reclaim path to account + * "instantaneous" memory pressure (scanned/reclaimed ratio). The raw + * pressure index is then further refined and averaged over time. + * + * This function does not return any value. + */ +void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, + unsigned long scanned, unsigned long reclaimed) +{ + struct vmpressure *vmpr = memcg_to_vmpressure(memcg); + + /* + * Here we only want to account pressure that userland is able to + * help us with. For example, suppose that DMA zone is under + * pressure; if we notify userland about that kind of pressure, + * then it will be mostly a waste as it will trigger unnecessary + * freeing of memory by userland (since userland is more likely to + * have HIGHMEM/MOVABLE pages instead of the DMA fallback). That + * is why we include only movable, highmem and FS/IO pages. + * Indirect reclaim (kswapd) sets sc->gfp_mask to GFP_KERNEL, so + * we account it too. + */ + if (!(gfp & (__GFP_HIGHMEM | __GFP_MOVABLE | __GFP_IO | __GFP_FS))) + return; + + /* + * If we got here with no pages scanned, then that is an indicator + * that reclaimer was unable to find any shrinkable LRUs at the + * current scanning depth. But it does not mean that we should + * report the critical pressure, yet. If the scanning priority + * (scanning depth) goes too high (deep), we will be notified + * through vmpressure_prio(). But so far, keep calm. + */ + if (!scanned) + return; + + mutex_lock(&vmpr->sr_lock); + vmpr->scanned += scanned; + vmpr->reclaimed += reclaimed; + scanned = vmpr->scanned; + mutex_unlock(&vmpr->sr_lock); + + if (scanned < vmpressure_win || work_pending(&vmpr->work)) + return; + schedule_work(&vmpr->work); +} + +/** + * vmpressure_prio() - Account memory pressure through reclaimer priority level + * @gfp: reclaimer's gfp mask + * @memcg: cgroup memory controller handle + * @prio: reclaimer's priority + * + * This function should be called from the reclaim path every time when + * the vmscan's reclaiming priority (scanning depth) changes. + * + * This function does not return any value. + */ +void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio) +{ + /* + * We only use prio for accounting critical level. For more info + * see comment for vmpressure_level_critical_prio variable above. + */ + if (prio > vmpressure_level_critical_prio) + return; + + /* + * OK, the prio is below the threshold, updating vmpressure + * information before shrinker dives into long shrinking of long + * range vmscan. Passing scanned = vmpressure_win, reclaimed = 0 + * to the vmpressure() basically means that we signal 'critical' + * level. + */ + vmpressure(gfp, memcg, vmpressure_win, 0); +} + +/** + * vmpressure_register_event() - Bind vmpressure notifications to an eventfd + * @cg: cgroup that is interested in vmpressure notifications + * @cft: cgroup control files handle + * @eventfd: eventfd context to link notifications with + * @args: event arguments (used to set up a pressure level threshold) + * + * This function associates eventfd context with the vmpressure + * infrastructure, so that the notifications will be delivered to the + * @eventfd. The @args parameter is a string that denotes pressure level + * threshold (one of vmpressure_str_levels, i.e. "low", "medium", or + * "critical"). + * + * This function should not be used directly, just pass it to (struct + * cftype).register_event, and then cgroup core will handle everything by + * itself. + */ +int vmpressure_register_event(struct cgroup *cg, struct cftype *cft, + struct eventfd_ctx *eventfd, const char *args) +{ + struct vmpressure *vmpr = cg_to_vmpressure(cg); + struct vmpressure_event *ev; + int level; + + for (level = 0; level < VMPRESSURE_NUM_LEVELS; level++) { + if (!strcmp(vmpressure_str_levels[level], args)) + break; + } + + if (level >= VMPRESSURE_NUM_LEVELS) + return -EINVAL; + + ev = kzalloc(sizeof(*ev), GFP_KERNEL); + if (!ev) + return -ENOMEM; + + ev->efd = eventfd; + ev->level = level; + + mutex_lock(&vmpr->events_lock); + list_add(&ev->node, &vmpr->events); + mutex_unlock(&vmpr->events_lock); + + return 0; +} + +/** + * vmpressure_unregister_event() - Unbind eventfd from vmpressure + * @cg: cgroup handle + * @cft: cgroup control files handle + * @eventfd: eventfd context that was used to link vmpressure with the @cg + * + * This function does internal manipulations to detach the @eventfd from + * the vmpressure notifications, and then frees internal resources + * associated with the @eventfd (but the @eventfd itself is not freed). + * + * This function should not be used directly, just pass it to (struct + * cftype).unregister_event, and then cgroup core will handle everything + * by itself. + */ +void vmpressure_unregister_event(struct cgroup *cg, struct cftype *cft, + struct eventfd_ctx *eventfd) +{ + struct vmpressure *vmpr = cg_to_vmpressure(cg); + struct vmpressure_event *ev; + + mutex_lock(&vmpr->events_lock); + list_for_each_entry(ev, &vmpr->events, node) { + if (ev->efd != eventfd) + continue; + list_del(&ev->node); + kfree(ev); + break; + } + mutex_unlock(&vmpr->events_lock); +} + +/** + * vmpressure_init() - Initialize vmpressure control structure + * @vmpr: Structure to be initialized + * + * This function should be called on every allocated vmpressure structure + * before any usage. + */ +void vmpressure_init(struct vmpressure *vmpr) +{ + mutex_init(&vmpr->sr_lock); + mutex_init(&vmpr->events_lock); + INIT_LIST_HEAD(&vmpr->events); + INIT_WORK(&vmpr->work, vmpressure_work_fn); +} diff --git a/mm/vmscan.c b/mm/vmscan.c index 669fba39be1a..fa6a85378ee4 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -19,6 +19,7 @@ #include <linux/pagemap.h> #include <linux/init.h> #include <linux/highmem.h> +#include <linux/vmpressure.h> #include <linux/vmstat.h> #include <linux/file.h> #include <linux/writeback.h> @@ -780,7 +781,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, if (PageAnon(page) && !PageSwapCache(page)) { if (!(sc->gfp_mask & __GFP_IO)) goto keep_locked; - if (!add_to_swap(page)) + if (!add_to_swap(page, page_list)) goto activate_locked; may_enter_fs = 1; } @@ -1982,6 +1983,11 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc) } memcg = mem_cgroup_iter(root, memcg, &reclaim); } while (memcg); + + vmpressure(sc->gfp_mask, sc->target_mem_cgroup, + sc->nr_scanned - nr_scanned, + sc->nr_reclaimed - nr_reclaimed); + } while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed, sc->nr_scanned - nr_scanned, sc)); } @@ -2167,6 +2173,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, count_vm_event(ALLOCSTALL); do { + vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup, + sc->priority); sc->nr_scanned = 0; aborted_reclaim = shrink_zones(zonelist, sc); @@ -2619,7 +2627,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, bool pgdat_is_balanced = false; int i; int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ - unsigned long total_scanned; struct reclaim_state *reclaim_state = current->reclaim_state; unsigned long nr_soft_reclaimed; unsigned long nr_soft_scanned; @@ -2639,7 +2646,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, .gfp_mask = sc.gfp_mask, }; loop_again: - total_scanned = 0; sc.priority = DEF_PRIORITY; sc.nr_reclaimed = 0; sc.may_writepage = !laptop_mode; @@ -2730,7 +2736,6 @@ loop_again: order, sc.gfp_mask, &nr_soft_scanned); sc.nr_reclaimed += nr_soft_reclaimed; - total_scanned += nr_soft_scanned; /* * We put equal pressure on every zone, unless @@ -2765,7 +2770,6 @@ loop_again: reclaim_state->reclaimed_slab = 0; nr_slab = shrink_slab(&shrink, sc.nr_scanned, lru_pages); sc.nr_reclaimed += reclaim_state->reclaimed_slab; - total_scanned += sc.nr_scanned; if (nr_slab == 0 && !zone_reclaimable(zone)) zone->all_unreclaimable = 1; diff --git a/mm/vmstat.c b/mm/vmstat.c index e1d8ed172c42..7a35116462a7 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -52,7 +52,6 @@ void all_vm_events(unsigned long *ret) } EXPORT_SYMBOL_GPL(all_vm_events); -#ifdef CONFIG_HOTPLUG /* * Fold the foreign cpu events into our own. * @@ -69,7 +68,6 @@ void vm_events_fold_cpu(int cpu) fold_state->event[i] = 0; } } -#endif /* CONFIG_HOTPLUG */ #endif /* CONFIG_VM_EVENT_COUNTERS */ @@ -495,6 +493,10 @@ void refresh_cpu_vm_stats(int cpu) atomic_long_add(global_diff[i], &vm_stat[i]); } +/* + * this is only called if !populated_zone(zone), which implies no other users of + * pset->vm_stat_diff[] exsist. + */ void drain_zonestat(struct zone *zone, struct per_cpu_pageset *pset) { int i; @@ -792,7 +794,14 @@ const char * const vmstat_text[] = { "compact_stall", "compact_fail", "compact_success", -#endif + +#ifdef CONFIG_BALLOON_COMPACTION + "compact_balloon_isolated", + "compact_balloon_migrated", + "compact_balloon_returned", +#endif /* CONFIG_BALLOON_COMPACTION */ + +#endif /* CONFIG_COMPACTION */ #ifdef CONFIG_HUGETLB_PAGE "htlb_buddy_alloc_success", diff --git a/net/core/pktgen.c b/net/core/pktgen.c index f6af4fe59f2e..5bab10016067 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2198,7 +2198,7 @@ static inline int f_pick(struct pktgen_dev *pkt_dev) pkt_dev->curfl = 0; /*reset */ } } else { - flow = random32() % pkt_dev->cflows; + flow = prandom_u32() % pkt_dev->cflows; pkt_dev->curfl = flow; if (pkt_dev->flows[flow].count > pkt_dev->lflow) { @@ -2246,7 +2246,7 @@ static void set_cur_queue_map(struct pktgen_dev *pkt_dev) else if (pkt_dev->queue_map_min <= pkt_dev->queue_map_max) { __u16 t; if (pkt_dev->flags & F_QUEUE_MAP_RND) { - t = random32() % + t = prandom_u32() % (pkt_dev->queue_map_max - pkt_dev->queue_map_min + 1) + pkt_dev->queue_map_min; @@ -2278,7 +2278,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) __u32 tmp; if (pkt_dev->flags & F_MACSRC_RND) - mc = random32() % pkt_dev->src_mac_count; + mc = prandom_u32() % pkt_dev->src_mac_count; else { mc = pkt_dev->cur_src_mac_offset++; if (pkt_dev->cur_src_mac_offset >= @@ -2304,7 +2304,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) __u32 tmp; if (pkt_dev->flags & F_MACDST_RND) - mc = random32() % pkt_dev->dst_mac_count; + mc = prandom_u32() % pkt_dev->dst_mac_count; else { mc = pkt_dev->cur_dst_mac_offset++; @@ -2331,21 +2331,21 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) for (i = 0; i < pkt_dev->nr_labels; i++) if (pkt_dev->labels[i] & MPLS_STACK_BOTTOM) pkt_dev->labels[i] = MPLS_STACK_BOTTOM | - ((__force __be32)random32() & + ((__force __be32)prandom_u32() & htonl(0x000fffff)); } if ((pkt_dev->flags & F_VID_RND) && (pkt_dev->vlan_id != 0xffff)) { - pkt_dev->vlan_id = random32() & (4096-1); + pkt_dev->vlan_id = prandom_u32() & (4096 - 1); } if ((pkt_dev->flags & F_SVID_RND) && (pkt_dev->svlan_id != 0xffff)) { - pkt_dev->svlan_id = random32() & (4096 - 1); + pkt_dev->svlan_id = prandom_u32() & (4096 - 1); } if (pkt_dev->udp_src_min < pkt_dev->udp_src_max) { if (pkt_dev->flags & F_UDPSRC_RND) - pkt_dev->cur_udp_src = random32() % + pkt_dev->cur_udp_src = prandom_u32() % (pkt_dev->udp_src_max - pkt_dev->udp_src_min) + pkt_dev->udp_src_min; @@ -2358,7 +2358,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) if (pkt_dev->udp_dst_min < pkt_dev->udp_dst_max) { if (pkt_dev->flags & F_UDPDST_RND) { - pkt_dev->cur_udp_dst = random32() % + pkt_dev->cur_udp_dst = prandom_u32() % (pkt_dev->udp_dst_max - pkt_dev->udp_dst_min) + pkt_dev->udp_dst_min; } else { @@ -2375,7 +2375,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) if (imn < imx) { __u32 t; if (pkt_dev->flags & F_IPSRC_RND) - t = random32() % (imx - imn) + imn; + t = prandom_u32() % (imx - imn) + imn; else { t = ntohl(pkt_dev->cur_saddr); t++; @@ -2396,17 +2396,15 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) __be32 s; if (pkt_dev->flags & F_IPDST_RND) { - t = random32() % (imx - imn) + imn; - s = htonl(t); - - while (ipv4_is_loopback(s) || - ipv4_is_multicast(s) || - ipv4_is_lbcast(s) || - ipv4_is_zeronet(s) || - ipv4_is_local_multicast(s)) { - t = random32() % (imx - imn) + imn; + do { + t = prandom_u32() % + (imx - imn) + imn; s = htonl(t); - } + } while (ipv4_is_loopback(s) || + ipv4_is_multicast(s) || + ipv4_is_lbcast(s) || + ipv4_is_zeronet(s) || + ipv4_is_local_multicast(s)); pkt_dev->cur_daddr = s; } else { t = ntohl(pkt_dev->cur_daddr); @@ -2437,7 +2435,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) for (i = 0; i < 4; i++) { pkt_dev->cur_in6_daddr.s6_addr32[i] = - (((__force __be32)random32() | + (((__force __be32)prandom_u32() | pkt_dev->min_in6_daddr.s6_addr32[i]) & pkt_dev->max_in6_daddr.s6_addr32[i]); } @@ -2447,7 +2445,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) if (pkt_dev->min_pkt_size < pkt_dev->max_pkt_size) { __u32 t; if (pkt_dev->flags & F_TXSIZE_RND) { - t = random32() % + t = prandom_u32() % (pkt_dev->max_pkt_size - pkt_dev->min_pkt_size) + pkt_dev->min_pkt_size; } else { diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 9e4074c26dc2..1b185235de0a 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1705,9 +1705,9 @@ static struct ctl_table vs_vars[] = { }, { .procname = "sync_qlen_max", - .maxlen = sizeof(int), + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_doulongvec_minmax, }, { .procname = "sync_sock_size", diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 007e8c43d19a..f993ffcd00e7 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -265,7 +265,7 @@ static void death_by_event(unsigned long ul_conntrack) if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) { /* bad luck, let's retry again */ ecache->timeout.expires = jiffies + - (random32() % net->ct.sysctl_events_retry_timeout); + (prandom_u32() % net->ct.sysctl_events_retry_timeout); add_timer(&ecache->timeout); return; } @@ -284,7 +284,7 @@ void nf_ct_dying_timeout(struct nf_conn *ct) /* set a new timer to retry event delivery */ setup_timer(&ecache->timeout, death_by_event, (unsigned long)ct); ecache->timeout.expires = jiffies + - (random32() % net->ct.sysctl_events_retry_timeout); + (prandom_u32() % net->ct.sysctl_events_retry_timeout); add_timer(&ecache->timeout); } EXPORT_SYMBOL_GPL(nf_ct_dying_timeout); diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index cc37dd52ecf9..ef53ab8d0aae 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -80,7 +80,7 @@ struct choke_sched_data { /* deliver a random number between 0 and N - 1 */ static u32 random_N(unsigned int N) { - return reciprocal_divide(random32(), N); + return reciprocal_divide(prandom_u32(), N); } /* number of elements in queue including holes */ diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 423549a714e5..91cfd8f94a19 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -66,13 +66,6 @@ static void sctp_assoc_bh_rcv(struct work_struct *work); static void sctp_assoc_free_asconf_acks(struct sctp_association *asoc); static void sctp_assoc_free_asconf_queue(struct sctp_association *asoc); -/* Keep track of the new idr low so that we don't re-use association id - * numbers too fast. It is protected by they idr spin lock is in the - * range of 1 - INT_MAX. - */ -static u32 idr_low = 1; - - /* 1st Level Abstractions. */ /* Initialize a new association from provided memory. */ @@ -1597,13 +1590,8 @@ int sctp_assoc_set_id(struct sctp_association *asoc, gfp_t gfp) if (preload) idr_preload(gfp); spin_lock_bh(&sctp_assocs_id_lock); - /* 0 is not a valid id, idr_low is always >= 1 */ - ret = idr_alloc(&sctp_assocs_id, asoc, idr_low, 0, GFP_NOWAIT); - if (ret >= 0) { - idr_low = ret + 1; - if (idr_low == INT_MAX) - idr_low = 1; - } + /* 0 is not a valid assoc_id, must be >= 1 */ + ret = idr_alloc_cyclic(&sctp_assocs_id, asoc, 1, 0, GFP_NOWAIT); spin_unlock_bh(&sctp_assocs_id_lock); if (preload) idr_preload_end(); diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index 88edec929d73..1da52d1406fc 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -130,8 +130,8 @@ gss_krb5_make_confounder(char *p, u32 conflen) /* initialize to random value */ if (i == 0) { - i = random32(); - i = (i << 32) | random32(); + i = prandom_u32(); + i = (i << 32) | prandom_u32(); } switch (conflen) { diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 05ea9ac8848c..80fe5c86efd1 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1210,7 +1210,6 @@ EXPORT_SYMBOL_GPL(sunrpc_cache_pipe_upcall); * key and content are both parsed by cache */ -#define isodigit(c) (isdigit(c) && c <= '7') int qword_get(char **bpp, char *dest, int bufsize) { /* return bytes copied, or -1 on error */ diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 51bb3de680b6..a0ab6d7599f1 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -311,6 +311,11 @@ cmd_lzo = (cat $(filter-out FORCE,$^) | \ lzop -9 && $(call size_append, $(filter-out FORCE,$^))) > $@ || \ (rm -f $@ ; false) +quiet_cmd_lz4 = LZ4 $@ +cmd_lz4 = (cat $(filter-out FORCE,$^) | \ + lz4demo -c1 stdin stdout && $(call size_append, $(filter-out FORCE,$^))) > $@ || \ + (rm -f $@ ; false) + # U-Boot mkimage # --------------------------------------------------------------------------- diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 4de4bc48493b..3a7600d12783 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -628,6 +628,13 @@ sub sanitise_line { return $res; } +sub get_quoted_string { + my ($line, $rawline) = @_; + + return "" if ($line !~ m/(\"[X]+\")/g); + return substr($rawline, $-[0], $+[0] - $-[0]); +} + sub ctx_statement_block { my ($linenr, $remain, $off) = @_; my $line = $linenr - 1; @@ -1576,7 +1583,8 @@ sub process { # Check for incorrect file permissions if ($line =~ /^new (file )?mode.*[7531]\d{0,2}$/) { my $permhere = $here . "FILE: $realfile\n"; - if ($realfile =~ /(Makefile|Kconfig|\.c|\.h|\.S|\.tmpl)$/) { + if ($realfile !~ m@scripts/@ && + $realfile !~ /\.(py|pl|awk|sh)$/) { ERROR("EXECUTE_PERMISSIONS", "do not set execute permissions for source files\n" . $permhere); } @@ -2514,8 +2522,8 @@ sub process { # check for whitespace before a non-naked semicolon if ($line =~ /^\+.*\S\s+;/) { - CHK("SPACING", - "space prohibited before semicolon\n" . $herecurr); + WARN("SPACING", + "space prohibited before semicolon\n" . $herecurr); } # Check operator spacing. @@ -3221,7 +3229,7 @@ sub process { } # check for unnecessary blank lines around braces - if (($line =~ /^..*}\s*$/ && $prevline =~ /^.\s*$/)) { + if (($line =~ /^.\s*}\s*$/ && $prevline =~ /^.\s*$/)) { CHK("BRACES", "Blank lines aren't necessary before a close brace '}'\n" . $hereprev); } @@ -3373,6 +3381,15 @@ sub process { "struct spinlock should be spinlock_t\n" . $herecurr); } +# check for seq_printf uses that could be seq_puts + if ($line =~ /\bseq_printf\s*\(/) { + my $fmt = get_quoted_string($line, $rawline); + if ($fmt !~ /[^\\]\%/) { + WARN("PREFER_SEQ_PUTS", + "Prefer seq_puts to seq_printf\n" . $herecurr); + } + } + # Check for misused memsets if ($^V && $^V ge 5.10.0 && defined $stat && @@ -3477,6 +3494,13 @@ sub process { "unnecessary cast may hide bugs, see http://c-faq.com/malloc/mallocnocast.html\n" . $herecurr); } +# check for krealloc arg reuse + if ($^V && $^V ge 5.10.0 && + $line =~ /\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*krealloc\s*\(\s*\1\s*,/) { + WARN("KREALLOC_ARG_REUSE", + "Reusing the krealloc arg is almost always a bug\n" . $herecurr); + } + # check for alloc argument mismatch if ($line =~ /\b(kcalloc|kmalloc_array)\s*\(\s*sizeof\b/) { WARN("ALLOC_ARRAY_ARGS", diff --git a/scripts/decodecode b/scripts/decodecode index 4f8248d5a11f..d8824f37acce 100755 --- a/scripts/decodecode +++ b/scripts/decodecode @@ -89,10 +89,16 @@ echo $code >> $T.s disas $T cat $T.dis >> $T.aa +# (lines of whole $T.oo) - (lines of $T.aa, i.e. "Code starting") + 3, +# i.e. the title + the "===..=" line (sed is counting from 1, 0 address is +# special) +faultlinenum=$(( $(wc -l $T.oo | cut -d" " -f1) - \ + $(wc -l $T.aa | cut -d" " -f1) + 3)) + faultline=`cat $T.dis | head -1 | cut -d":" -f2-` faultline=`echo "$faultline" | sed -e 's/\[/\\\[/g; s/\]/\\\]/g'` -cat $T.oo | sed -e "s/\($faultline\)/\*\1 <-- trapping instruction/g" +cat $T.oo | sed -e "${faultlinenum}s/^\(.*:\)\(.*\)/\1\*\2\t\t<-- trapping instruction/" echo cat $T.aa cleanup diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index ce4cc837b748..5e4fb144a04f 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -611,7 +611,7 @@ sub get_maintainers { $hash{$tvi} = $value_pd; } } - } elsif ($type eq 'K') { + } elsif ($type eq 'N') { if ($file =~ m/$value/x) { $hash{$tvi} = 0; } diff --git a/security/keys/internal.h b/security/keys/internal.h index 8bbefc3b55d4..d4f1468b9b50 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -16,6 +16,8 @@ #include <linux/key-type.h> #include <linux/task_work.h> +struct iovec; + #ifdef __KDEBUG #define kenter(FMT, ...) \ printk(KERN_DEBUG "==> %s("FMT")\n", __func__, ##__VA_ARGS__) diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 4b5c948eb414..33cfd27b4de2 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -22,6 +22,7 @@ #include <linux/err.h> #include <linux/vmalloc.h> #include <linux/security.h> +#include <linux/uio.h> #include <asm/uaccess.h> #include "internal.h" diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 4bd6bdb74193..c411f9bb156b 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -93,9 +93,16 @@ static void umh_keys_cleanup(struct subprocess_info *info) static int call_usermodehelper_keys(char *path, char **argv, char **envp, struct key *session_keyring, int wait) { - return call_usermodehelper_fns(path, argv, envp, wait, - umh_keys_init, umh_keys_cleanup, - key_get(session_keyring)); + struct subprocess_info *info; + + info = call_usermodehelper_setup(path, argv, envp, GFP_KERNEL, + umh_keys_init, umh_keys_cleanup, + session_keyring); + if (!info) + return -ENOMEM; + + key_get(session_keyring); + return call_usermodehelper_exec(info, wait); } /* diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 5bce9152b64e..479e0a581797 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -25,7 +25,7 @@ #include <linux/slab.h> #include <linux/time.h> #include <linux/pm_qos.h> -#include <linux/uio.h> +#include <linux/aio.h> #include <linux/dma-mapping.h> #include <sound/core.h> #include <sound/control.h> diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index a4805932972b..c25eec5d4a5d 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -1,11 +1,14 @@ TARGETS = breakpoints -TARGETS += kcmp -TARGETS += mqueue -TARGETS += vm TARGETS += cpu-hotplug -TARGETS += memory-hotplug TARGETS += efivarfs +TARGETS += kcmp +TARGETS += memory-hotplug +TARGETS += mqueue TARGETS += net +TARGETS += ptrace +TARGETS += soft-dirty +TARGETS += timers +TARGETS += vm all: for TARGET in $(TARGETS); do \ diff --git a/tools/testing/selftests/ptrace/Makefile b/tools/testing/selftests/ptrace/Makefile new file mode 100644 index 000000000000..47ae2d385ce8 --- /dev/null +++ b/tools/testing/selftests/ptrace/Makefile @@ -0,0 +1,10 @@ +CFLAGS += -iquote../../../../include/uapi -Wall +peeksiginfo: peeksiginfo.c + +all: peeksiginfo + +clean: + rm -f peeksiginfo + +run_tests: all + @./peeksiginfo || echo "peeksiginfo selftests: [FAIL]" diff --git a/tools/testing/selftests/ptrace/peeksiginfo.c b/tools/testing/selftests/ptrace/peeksiginfo.c new file mode 100644 index 000000000000..d46558b1f58d --- /dev/null +++ b/tools/testing/selftests/ptrace/peeksiginfo.c @@ -0,0 +1,214 @@ +#define _GNU_SOURCE +#include <stdio.h> +#include <signal.h> +#include <unistd.h> +#include <errno.h> +#include <linux/types.h> +#include <sys/wait.h> +#include <sys/syscall.h> +#include <sys/user.h> +#include <sys/mman.h> + +#include "linux/ptrace.h" + +static int sys_rt_sigqueueinfo(pid_t tgid, int sig, siginfo_t *uinfo) +{ + return syscall(SYS_rt_sigqueueinfo, tgid, sig, uinfo); +} + +static int sys_rt_tgsigqueueinfo(pid_t tgid, pid_t tid, + int sig, siginfo_t *uinfo) +{ + return syscall(SYS_rt_tgsigqueueinfo, tgid, tid, sig, uinfo); +} + +static int sys_ptrace(int request, pid_t pid, void *addr, void *data) +{ + return syscall(SYS_ptrace, request, pid, addr, data); +} + +#define SIGNR 10 +#define TEST_SICODE_PRIV -1 +#define TEST_SICODE_SHARE -2 + +#define err(fmt, ...) \ + fprintf(stderr, \ + "Error (%s:%d): " fmt, \ + __FILE__, __LINE__, ##__VA_ARGS__) + +static int check_error_paths(pid_t child) +{ + struct ptrace_peeksiginfo_args arg; + int ret, exit_code = -1; + void *addr_rw, *addr_ro; + + /* + * Allocate two contiguous pages. The first one is for read-write, + * another is for read-only. + */ + addr_rw = mmap(NULL, 2 * PAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (addr_rw == MAP_FAILED) { + err("mmap() failed: %m\n"); + return 1; + } + + addr_ro = mmap(addr_rw + PAGE_SIZE, PAGE_SIZE, PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + if (addr_ro == MAP_FAILED) { + err("mmap() failed: %m\n"); + goto out; + } + + arg.nr = SIGNR; + arg.off = 0; + + /* Unsupported flags */ + arg.flags = ~0; + ret = sys_ptrace(PTRACE_PEEKSIGINFO, child, &arg, addr_rw); + if (ret != -1 || errno != EINVAL) { + err("sys_ptrace() returns %d (expected -1)," + " errno %d (expected %d): %m\n", + ret, errno, EINVAL); + goto out; + } + arg.flags = 0; + + /* A part of the buffer is read-only */ + ret = sys_ptrace(PTRACE_PEEKSIGINFO, child, &arg, + addr_ro - sizeof(siginfo_t) * 2); + if (ret != 2) { + err("sys_ptrace() returns %d (expected 2): %m\n", ret); + goto out; + } + + /* Read-only buffer */ + ret = sys_ptrace(PTRACE_PEEKSIGINFO, child, &arg, addr_ro); + if (ret != -1 && errno != EFAULT) { + err("sys_ptrace() returns %d (expected -1)," + " errno %d (expected %d): %m\n", + ret, errno, EFAULT); + goto out; + } + + exit_code = 0; +out: + munmap(addr_rw, 2 * PAGE_SIZE); + return exit_code; +} + +int check_direct_path(pid_t child, int shared, int nr) +{ + struct ptrace_peeksiginfo_args arg = {.flags = 0, .nr = nr, .off = 0}; + int i, j, ret, exit_code = -1; + siginfo_t siginfo[SIGNR]; + int si_code; + + if (shared == 1) { + arg.flags = PTRACE_PEEKSIGINFO_SHARED; + si_code = TEST_SICODE_SHARE; + } else { + arg.flags = 0; + si_code = TEST_SICODE_PRIV; + } + + for (i = 0; i < SIGNR; ) { + arg.off = i; + ret = sys_ptrace(PTRACE_PEEKSIGINFO, child, &arg, siginfo); + if (ret == -1) { + err("ptrace() failed: %m\n"); + goto out; + } + + if (ret == 0) + break; + + for (j = 0; j < ret; j++, i++) { + if (siginfo[j].si_code == si_code && + siginfo[j].si_int == i) + continue; + + err("%d: Wrong siginfo i=%d si_code=%d si_int=%d\n", + shared, i, siginfo[j].si_code, siginfo[j].si_int); + goto out; + } + } + + if (i != SIGNR) { + err("Only %d signals were read\n", i); + goto out; + } + + exit_code = 0; +out: + return exit_code; +} + +int main(int argc, char *argv[]) +{ + siginfo_t siginfo[SIGNR]; + int i, exit_code = 1; + sigset_t blockmask; + pid_t child; + + sigemptyset(&blockmask); + sigaddset(&blockmask, SIGRTMIN); + sigprocmask(SIG_BLOCK, &blockmask, NULL); + + child = fork(); + if (child == -1) { + err("fork() failed: %m"); + return 1; + } else if (child == 0) { + pid_t ppid = getppid(); + while (1) { + if (ppid != getppid()) + break; + sleep(1); + } + return 1; + } + + /* Send signals in process-wide and per-thread queues */ + for (i = 0; i < SIGNR; i++) { + siginfo->si_code = TEST_SICODE_SHARE; + siginfo->si_int = i; + sys_rt_sigqueueinfo(child, SIGRTMIN, siginfo); + + siginfo->si_code = TEST_SICODE_PRIV; + siginfo->si_int = i; + sys_rt_tgsigqueueinfo(child, child, SIGRTMIN, siginfo); + } + + if (sys_ptrace(PTRACE_ATTACH, child, NULL, NULL) == -1) + return 1; + + waitpid(child, NULL, 0); + + /* Dump signals one by one*/ + if (check_direct_path(child, 0, 1)) + goto out; + /* Dump all signals for one call */ + if (check_direct_path(child, 0, SIGNR)) + goto out; + + /* + * Dump signal from the process-wide queue. + * The number of signals is not multible to the buffer size + */ + if (check_direct_path(child, 1, 3)) + goto out; + + if (check_error_paths(child)) + goto out; + + printf("PASS\n"); + exit_code = 0; +out: + if (sys_ptrace(PTRACE_KILL, child, NULL, NULL) == -1) + return 1; + + waitpid(child, NULL, 0); + + return exit_code; +} diff --git a/tools/testing/selftests/soft-dirty/Makefile b/tools/testing/selftests/soft-dirty/Makefile new file mode 100644 index 000000000000..a9cdc823d6e0 --- /dev/null +++ b/tools/testing/selftests/soft-dirty/Makefile @@ -0,0 +1,10 @@ +CFLAGS += -iquote../../../../include/uapi -Wall +soft-dirty: soft-dirty.c + +all: soft-dirty + +clean: + rm -f soft-dirty + +run_tests: all + @./soft-dirty || echo "soft-dirty selftests: [FAIL]" diff --git a/tools/testing/selftests/soft-dirty/soft-dirty.c b/tools/testing/selftests/soft-dirty/soft-dirty.c new file mode 100644 index 000000000000..aba4f87f87f0 --- /dev/null +++ b/tools/testing/selftests/soft-dirty/soft-dirty.c @@ -0,0 +1,114 @@ +#include <stdlib.h> +#include <stdio.h> +#include <sys/mman.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/types.h> + +typedef unsigned long long u64; + +#define PME_PRESENT (1ULL << 63) +#define PME_SOFT_DIRTY (1Ull << 55) + +#define PAGES_TO_TEST 3 +#ifndef PAGE_SIZE +#define PAGE_SIZE 4096 +#endif + +static void get_pagemap2(char *mem, u64 *map) +{ + int fd; + + fd = open("/proc/self/pagemap2", O_RDONLY); + if (fd < 0) { + perror("Can't open pagemap2"); + exit(1); + } + + lseek(fd, (unsigned long)mem / PAGE_SIZE * sizeof(u64), SEEK_SET); + read(fd, map, sizeof(u64) * PAGES_TO_TEST); + close(fd); +} + +static inline char map_p(u64 map) +{ + return map & PME_PRESENT ? 'p' : '-'; +} + +static inline char map_sd(u64 map) +{ + return map & PME_SOFT_DIRTY ? 'd' : '-'; +} + +static int check_pte(int step, int page, u64 *map, u64 want) +{ + if ((map[page] & want) != want) { + printf("Step %d Page %d has %c%c, want %c%c\n", + step, page, + map_p(map[page]), map_sd(map[page]), + map_p(want), map_sd(want)); + return 1; + } + + return 0; +} + +static void clear_refs(void) +{ + int fd; + char *v = "4"; + + fd = open("/proc/self/clear_refs", O_WRONLY); + if (write(fd, v, 3) < 3) { + perror("Can't clear soft-dirty bit"); + exit(1); + } + close(fd); +} + +int main(void) +{ + char *mem, x; + u64 map[PAGES_TO_TEST]; + + mem = mmap(NULL, PAGES_TO_TEST * PAGE_SIZE, + PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, 0, 0); + + x = mem[0]; + mem[2 * PAGE_SIZE] = 'c'; + get_pagemap2(mem, map); + + if (check_pte(1, 0, map, PME_PRESENT)) + return 1; + if (check_pte(1, 1, map, 0)) + return 1; + if (check_pte(1, 2, map, PME_PRESENT | PME_SOFT_DIRTY)) + return 1; + + clear_refs(); + get_pagemap2(mem, map); + + if (check_pte(2, 0, map, PME_PRESENT)) + return 1; + if (check_pte(2, 1, map, 0)) + return 1; + if (check_pte(2, 2, map, PME_PRESENT)) + return 1; + + mem[0] = 'a'; + mem[PAGE_SIZE] = 'b'; + x = mem[2 * PAGE_SIZE]; + get_pagemap2(mem, map); + + if (check_pte(3, 0, map, PME_PRESENT | PME_SOFT_DIRTY)) + return 1; + if (check_pte(3, 1, map, PME_PRESENT | PME_SOFT_DIRTY)) + return 1; + if (check_pte(3, 2, map, PME_PRESENT)) + return 1; + + (void)x; /* gcc warn */ + + printf("PASS\n"); + return 0; +} diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile new file mode 100644 index 000000000000..eb2859f4ad21 --- /dev/null +++ b/tools/testing/selftests/timers/Makefile @@ -0,0 +1,8 @@ +all: + gcc posix_timers.c -o posix_timers -lrt + +run_tests: all + ./posix_timers + +clean: + rm -f ./posix_timers diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c new file mode 100644 index 000000000000..4fa655d68a81 --- /dev/null +++ b/tools/testing/selftests/timers/posix_timers.c @@ -0,0 +1,221 @@ +/* + * Copyright (C) 2013 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com> + * + * Licensed under the terms of the GNU GPL License version 2 + * + * Selftests for a few posix timers interface. + * + * Kernel loop code stolen from Steven Rostedt <srostedt@redhat.com> + */ + +#include <sys/time.h> +#include <stdio.h> +#include <signal.h> +#include <unistd.h> +#include <time.h> +#include <pthread.h> + +#define DELAY 2 +#define USECS_PER_SEC 1000000 + +static volatile int done; + +/* Busy loop in userspace to elapse ITIMER_VIRTUAL */ +static void user_loop(void) +{ + while (!done); +} + +/* + * Try to spend as much time as possible in kernelspace + * to elapse ITIMER_PROF. + */ +static void kernel_loop(void) +{ + void *addr = sbrk(0); + + while (!done) { + brk(addr + 4096); + brk(addr); + } +} + +/* + * Sleep until ITIMER_REAL expiration. + */ +static void idle_loop(void) +{ + pause(); +} + +static void sig_handler(int nr) +{ + done = 1; +} + +/* + * Check the expected timer expiration matches the GTOD elapsed delta since + * we armed the timer. Keep a 0.5 sec error margin due to various jitter. + */ +static int check_diff(struct timeval start, struct timeval end) +{ + long long diff; + + diff = end.tv_usec - start.tv_usec; + diff += (end.tv_sec - start.tv_sec) * USECS_PER_SEC; + + if (abs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) { + printf("Diff too high: %lld..", diff); + return -1; + } + + return 0; +} + +static int check_itimer(int which) +{ + int err; + struct timeval start, end; + struct itimerval val = { + .it_value.tv_sec = DELAY, + }; + + printf("Check itimer "); + + if (which == ITIMER_VIRTUAL) + printf("virtual... "); + else if (which == ITIMER_PROF) + printf("prof... "); + else if (which == ITIMER_REAL) + printf("real... "); + + fflush(stdout); + + done = 0; + + if (which == ITIMER_VIRTUAL) + signal(SIGVTALRM, sig_handler); + else if (which == ITIMER_PROF) + signal(SIGPROF, sig_handler); + else if (which == ITIMER_REAL) + signal(SIGALRM, sig_handler); + + err = gettimeofday(&start, NULL); + if (err < 0) { + perror("Can't call gettimeofday()\n"); + return -1; + } + + err = setitimer(which, &val, NULL); + if (err < 0) { + perror("Can't set timer\n"); + return -1; + } + + if (which == ITIMER_VIRTUAL) + user_loop(); + else if (which == ITIMER_PROF) + kernel_loop(); + else if (which == ITIMER_REAL) + idle_loop(); + + gettimeofday(&end, NULL); + if (err < 0) { + perror("Can't call gettimeofday()\n"); + return -1; + } + + if (!check_diff(start, end)) + printf("[OK]\n"); + else + printf("[FAIL]\n"); + + return 0; +} + +static int check_timer_create(int which) +{ + int err; + timer_t id; + struct timeval start, end; + struct itimerspec val = { + .it_value.tv_sec = DELAY, + }; + + printf("Check timer_create() "); + if (which == CLOCK_THREAD_CPUTIME_ID) { + printf("per thread... "); + } else if (which == CLOCK_PROCESS_CPUTIME_ID) { + printf("per process... "); + } + fflush(stdout); + + done = 0; + timer_create(which, NULL, &id); + if (err < 0) { + perror("Can't create timer\n"); + return -1; + } + signal(SIGALRM, sig_handler); + + err = gettimeofday(&start, NULL); + if (err < 0) { + perror("Can't call gettimeofday()\n"); + return -1; + } + + err = timer_settime(id, 0, &val, NULL); + if (err < 0) { + perror("Can't set timer\n"); + return -1; + } + + user_loop(); + + gettimeofday(&end, NULL); + if (err < 0) { + perror("Can't call gettimeofday()\n"); + return -1; + } + + if (!check_diff(start, end)) + printf("[OK]\n"); + else + printf("[FAIL]\n"); + + return 0; +} + +int main(int argc, char **argv) +{ + int err; + + printf("Testing posix timers. False negative may happen on CPU execution \n"); + printf("based timers if other threads run on the CPU...\n"); + + if (check_itimer(ITIMER_VIRTUAL) < 0) + return -1; + + if (check_itimer(ITIMER_PROF) < 0) + return -1; + + if (check_itimer(ITIMER_REAL) < 0) + return -1; + + if (check_timer_create(CLOCK_THREAD_CPUTIME_ID) < 0) + return -1; + + /* + * It's unfortunately hard to reliably test a timer expiration + * on parallel multithread cputime. We could arm it to expire + * on DELAY * nr_threads, with nr_threads busy looping, then wait + * the normal DELAY since the time is elapsing nr_threads faster. + * But for that we need to ensure we have real physical free CPUs + * to ensure true parallelism. So test only one thread until we + * find a better solution. + */ + if (check_timer_create(CLOCK_PROCESS_CPUTIME_ID) < 0) + return -1; + + return 0; +} diff --git a/usr/Kconfig b/usr/Kconfig index 085872bb2bb5..642f503d3e9f 100644 --- a/usr/Kconfig +++ b/usr/Kconfig @@ -90,6 +90,15 @@ config RD_LZO Support loading of a LZO encoded initial ramdisk or cpio buffer If unsure, say N. +config RD_LZ4 + bool "Support initial ramdisks compressed using LZ4" if EXPERT + default !EXPERT + depends on BLK_DEV_INITRD + select DECOMPRESS_LZ4 + help + Support loading of a LZ4 encoded initial ramdisk or cpio buffer + If unsure, say N. + choice prompt "Built-in initramfs compression mode" if INITRAMFS_SOURCE!="" help |