summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2014-05-27 18:24:54 +1000
committerStephen Rothwell <sfr@canb.auug.org.au>2014-05-27 18:24:54 +1000
commit247264072bc5d7552e3fb517e542f0d28312d5f0 (patch)
tree4bd71b975c5d769170b20d13ec7f9662bdb31c2d
parente4b1ebcec3000118eafe25944feddba130c1441a (diff)
parentca1139488da3dcf267b2ce8490f778d69af0d0e4 (diff)
Merge branch 'akpm/master'
-rw-r--r--Documentation/ABI/testing/sysfs-class-bdi8
-rw-r--r--Documentation/cgroups/memory.txt54
-rw-r--r--Documentation/filesystems/vfat.txt10
-rw-r--r--Documentation/kernel-parameters.txt5
-rw-r--r--Documentation/kmemleak.txt1
-rw-r--r--Documentation/sysctl/kernel.txt17
-rw-r--r--Documentation/vm/remap_file_pages.txt27
-rw-r--r--MAINTAINERS25
-rw-r--r--arch/arm/kernel/isa.c6
-rw-r--r--arch/blackfin/kernel/ptrace.c8
-rw-r--r--arch/ia64/kernel/crash.c4
-rw-r--r--arch/ia64/kernel/perfmon.c6
-rw-r--r--arch/sparc/include/asm/irq_64.h2
-rw-r--r--arch/sparc/kernel/process_64.c18
-rw-r--r--arch/tile/kernel/proc.c4
-rw-r--r--arch/x86/include/asm/irq.h2
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c18
-rw-r--r--block/bio.c48
-rw-r--r--block/bounce.c7
-rw-r--r--drivers/cdrom/cdrom.c10
-rw-r--r--drivers/char/random.c4
-rw-r--r--drivers/gpio/gpio-zevio.c4
-rw-r--r--drivers/parport/procfs.c58
-rw-r--r--drivers/rtc/Kconfig4
-rw-r--r--drivers/rtc/rtc-s5m.c310
-rw-r--r--drivers/scsi/scsi_sysctl.c6
-rw-r--r--drivers/tty/sysrq.c21
-rw-r--r--drivers/w1/w1_int.c5
-rw-r--r--fs/coda/sysctl.c4
-rw-r--r--fs/dcache.c2
-rw-r--r--fs/dlm/config.c26
-rw-r--r--fs/dlm/debug_fs.c34
-rw-r--r--fs/dlm/lockspace.c21
-rw-r--r--fs/drop_caches.c2
-rw-r--r--fs/eventpoll.c2
-rw-r--r--fs/fat/cache.c70
-rw-r--r--fs/fat/fat.h6
-rw-r--r--fs/fat/file.c78
-rw-r--r--fs/fat/inode.c87
-rw-r--r--fs/file_table.c4
-rw-r--r--fs/fscache/main.c4
-rw-r--r--fs/inode.c2
-rw-r--r--fs/lockd/svc.c6
-rw-r--r--fs/nfs/nfs4sysctl.c6
-rw-r--r--fs/nfs/sysctl.c6
-rw-r--r--fs/notify/inotify/inotify_user.c2
-rw-r--r--fs/ntfs/sysctl.c4
-rw-r--r--fs/reiserfs/bitmap.c13
-rw-r--r--fs/reiserfs/stree.c8
-rw-r--r--fs/ufs/balloc.c30
-rw-r--r--fs/ufs/ialloc.c17
-rw-r--r--fs/ufs/super.c28
-rw-r--r--fs/ufs/ufs.h1
-rw-r--r--include/linux/fs.h8
-rw-r--r--include/linux/key.h2
-rw-r--r--include/linux/kmemleak.h4
-rw-r--r--include/linux/memcontrol.h14
-rw-r--r--include/linux/mfd/samsung/rtc.h86
-rw-r--r--include/linux/nmi.h12
-rw-r--r--include/linux/res_counter.h40
-rw-r--r--include/linux/rwsem.h25
-rw-r--r--init/main.c23
-rw-r--r--ipc/ipc_sysctl.c14
-rw-r--r--ipc/mq_sysctl.c12
-rw-r--r--kernel/kprobes.c36
-rw-r--r--kernel/locking/rwsem-xadd.c229
-rw-r--r--kernel/locking/rwsem.c31
-rw-r--r--kernel/res_counter.c2
-rw-r--r--kernel/sysctl.c13
-rw-r--r--kernel/utsname_sysctl.c6
-rw-r--r--kernel/watchdog.c49
-rw-r--r--lib/radix-tree.c6
-rw-r--r--mm/Makefile2
-rw-r--r--mm/backing-dev.c35
-rw-r--r--mm/fremap.c279
-rw-r--r--mm/kmemleak-test.c36
-rw-r--r--mm/kmemleak.c36
-rw-r--r--mm/memblock.c10
-rw-r--r--mm/memcontrol.c192
-rw-r--r--mm/mempolicy.c5
-rw-r--r--mm/mempool.c6
-rw-r--r--mm/mmap.c90
-rw-r--r--mm/nobootmem.c2
-rw-r--r--mm/nommu.c13
-rw-r--r--mm/page-writeback.c2
-rw-r--r--mm/page_alloc.c12
-rw-r--r--mm/page_io.c4
-rw-r--r--mm/vmscan.c71
88 files changed, 1659 insertions, 903 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi
index d773d5697cf5..3187a18af6da 100644
--- a/Documentation/ABI/testing/sysfs-class-bdi
+++ b/Documentation/ABI/testing/sysfs-class-bdi
@@ -53,3 +53,11 @@ stable_pages_required (read-only)
If set, the backing device requires that all pages comprising a write
request must not be changed until writeout is complete.
+
+strictlimit (read-write)
+
+ Forces per-BDI checks for the share of given device in the write-back
+ cache even before the global background dirty limit is reached. This
+ is useful in situations where the global limit is much higher than
+ affordable for given relatively slow (or untrusted) device. Turning
+ strictlimit on has no visible effect if max_ratio is equal to 100%.
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index 2d70ceabadb7..5b3fdda3ad6b 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -57,6 +57,7 @@ Brief summary of control files.
memory.memsw.usage_in_bytes # show current res_counter usage for memory+Swap
(See 5.5 for details)
memory.limit_in_bytes # set/show limit of memory usage
+ memory.low_limit_in_bytes # set/show low limit for memory reclaim
memory.memsw.limit_in_bytes # set/show limit of memory+Swap usage
memory.failcnt # show the number of memory usage hits limits
memory.memsw.failcnt # show the number of memory+Swap hits limits
@@ -236,23 +237,26 @@ it by cgroup.
2.5 Reclaim
Each cgroup maintains a per cgroup LRU which has the same structure as
-global VM. When a cgroup goes over its limit, we first try
-to reclaim memory from the cgroup so as to make space for the new
-pages that the cgroup has touched. If the reclaim is unsuccessful,
-an OOM routine is invoked to select and kill the bulkiest task in the
-cgroup. (See 10. OOM Control below.)
-
-The reclaim algorithm has not been modified for cgroups, except that
-pages that are selected for reclaiming come from the per-cgroup LRU
-list.
-
-NOTE: Reclaim does not work for the root cgroup, since we cannot set any
-limits on the root cgroup.
-
-Note2: When panic_on_oom is set to "2", the whole system will panic.
-
-When oom event notifier is registered, event will be delivered.
-(See oom_control section)
+global VM. Cgroups can get reclaimed basically under two conditions
+ - under global memory pressure when all cgroups are reclaimed
+ proportionally wrt. their LRU size in a round robin fashion
+ - when a cgroup or its hierarchical parent (see 6. Hierarchical support)
+ hits hard limit. If the reclaim is unsuccessful, an OOM routine is invoked
+ to select and kill the bulkiest task in the hiearchy. (See 10. OOM Control
+ below.)
+
+Groups might be also protected from both global and limit reclaim by
+low_limit_in_bytes knob. If the limit is non-zero the reclaim logic
+doesn't include groups (and their subgroups - see 6. Hierarchy support)
+which are bellow the low limit if there is other eligible cgroup in the
+reclaimed hierarchy. If all groups which participate reclaim are under
+their low limits then all of them are reclaimed and the low limit is
+ignored.
+
+Note: When panic_on_oom is set to "2", the whole system will panic.
+
+When oom event notifier is registered, event will be delivered to the root
+of the memory pressure which cannot be handled (See oom_control section)
2.6 Locking
@@ -473,6 +477,9 @@ About use_hierarchy, see Section 6.
write will still return success. In this case, it is expected that
memory.kmem.usage_in_bytes == memory.usage_in_bytes.
+ Please note that this knob is considered deprecated and will be removed
+ in the future.
+
About use_hierarchy, see Section 6.
5.2 stat file
@@ -536,14 +543,13 @@ Note:
5.3 swappiness
-Similar to /proc/sys/vm/swappiness, but only affecting reclaim that is
-triggered by this cgroup's hard limit. The tunable in the root cgroup
-corresponds to the global swappiness setting.
+Overrides /proc/sys/vm/swappiness for the particular group. The tunable
+in the root cgroup corresponds to the global swappiness setting.
-Please note that unlike the global swappiness, memcg knob set to 0
-really prevents from any swapping even if there is a swap storage
-available. This might lead to memcg OOM killer if there are no file
-pages to reclaim.
+Please note that unlike during the global reclaim, limit reclaim
+enforces that 0 swappiness really prevents from any swapping even if
+there is a swap storage available. This might lead to memcg OOM killer
+if there are no file pages to reclaim.
5.4 failcnt
diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt
index ce1126aceed8..223c32171dcc 100644
--- a/Documentation/filesystems/vfat.txt
+++ b/Documentation/filesystems/vfat.txt
@@ -180,6 +180,16 @@ dos1xfloppy -- If set, use a fallback default BIOS Parameter Block
<bool>: 0,1,yes,no,true,false
+LIMITATION
+---------------------------------------------------------------------
+* The fallocated region of file is discarded at umount/evict time
+ when using fallocate with FALLOC_FL_KEEP_SIZE.
+ So, User should assume that fallocated region can be discarded at
+ last close if there is memory pressure resulting in eviction of
+ the inode from the memory. As a result, for any dependency on
+ the fallocated region, user should make sure to recheck fallocate
+ after reopening the file.
+
TODO
----------------------------------------------------------------------
* Need to get rid of the raw scanning stuff. Instead, always use
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index ba1c2768a906..909bcd5647eb 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3119,6 +3119,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
[KNL] Should the soft-lockup detector generate panics.
Format: <integer>
+ softlockup_all_cpu_backtrace=
+ [KNL] Should the soft-lockup detector generate
+ backtraces on all cpus.
+ Format: <integer>
+
sonypi.*= [HW] Sony Programmable I/O Control Device driver
See Documentation/laptops/sonypi.txt
diff --git a/Documentation/kmemleak.txt b/Documentation/kmemleak.txt
index a7563ec4ea7b..b772418bf064 100644
--- a/Documentation/kmemleak.txt
+++ b/Documentation/kmemleak.txt
@@ -142,6 +142,7 @@ kmemleak_alloc_percpu - notify of a percpu memory block allocation
kmemleak_free - notify of a memory block freeing
kmemleak_free_part - notify of a partial memory block freeing
kmemleak_free_percpu - notify of a percpu memory block freeing
+kmemleak_update_trace - update object allocation stack trace
kmemleak_not_leak - mark an object as not a leak
kmemleak_ignore - do not scan or report an object as leak
kmemleak_scan_area - add scan areas inside a memory block
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 708bb7f1b7e0..c14374e71775 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -75,6 +75,7 @@ show up in /proc/sys/kernel:
- shmall
- shmmax [ sysv ipc ]
- shmmni
+- softlockup_all_cpu_backtrace
- stop-a [ SPARC only ]
- sysrq ==> Documentation/sysrq.txt
- sysctl_writes_strict
@@ -783,6 +784,22 @@ via the /proc/sys interface:
==============================================================
+softlockup_all_cpu_backtrace:
+
+This value controls the soft lockup detector thread's behavior
+when a soft lockup condition is detected as to whether or not
+to gather further debug information. If enabled, each cpu will
+be issued an NMI and instructed to capture stack trace.
+
+This feature is only applicable for architectures which support
+NMI.
+
+0: do nothing. This is the default behavior.
+
+1: on detection capture more debug information.
+
+==============================================================
+
tainted:
Non-zero if the kernel has been tainted. Numeric values, which
diff --git a/Documentation/vm/remap_file_pages.txt b/Documentation/vm/remap_file_pages.txt
new file mode 100644
index 000000000000..f609142f406a
--- /dev/null
+++ b/Documentation/vm/remap_file_pages.txt
@@ -0,0 +1,27 @@
+The remap_file_pages() system call is used to create a nonlinear mapping,
+that is, a mapping in which the pages of the file are mapped into a
+nonsequential order in memory. The advantage of using remap_file_pages()
+over using repeated calls to mmap(2) is that the former approach does not
+require the kernel to create additional VMA (Virtual Memory Area) data
+structures.
+
+Supporting of nonlinear mapping requires significant amount of non-trivial
+code in kernel virtual memory subsystem including hot paths. Also to get
+nonlinear mapping work kernel need a way to distinguish normal page table
+entries from entries with file offset (pte_file). Kernel reserves flag in
+PTE for this purpose. PTE flags are scarce resource especially on some CPU
+architectures. It would be nice to free up the flag for other usage.
+
+Fortunately, there are not many users of remap_file_pages() in the wild.
+It's only known that one enterprise RDBMS implementation uses the syscall
+on 32-bit systems to map files bigger than can linearly fit into 32-bit
+virtual address space. This use-case is not critical anymore since 64-bit
+systems are widely available.
+
+The syscall is deprecated and replaced it with an emulation now. The
+emulation creates new VMAs instead of nonlinear mappings. It's going to
+work slower for rare users of remap_file_pages() but ABI is preserved.
+
+One side effect of emulation (apart from performance) is that user can hit
+vm.max_map_count limit more easily due to additional VMAs. See comment for
+DEFAULT_MAX_MAP_COUNT for more details on the limit.
diff --git a/MAINTAINERS b/MAINTAINERS
index b9fbd059bdcd..c0d1e360a9a1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -210,6 +210,13 @@ S: Supported
F: Documentation/scsi/aacraid.txt
F: drivers/scsi/aacraid/
+ABI/API
+L: linux-api@vger.kernel.org
+F: Documentation/ABI/
+F: include/linux/syscalls.h
+F: include/uapi/
+F: kernel/sys_ni.c
+
ABIT UGURU 1,2 HARDWARE MONITOR DRIVER
M: Hans de Goede <hdegoede@redhat.com>
L: lm-sensors@lm-sensors.org
@@ -647,7 +654,7 @@ F: sound/soc/codecs/ssm*
F: sound/soc/codecs/sigmadsp.*
ANALOG DEVICES INC ASOC DRIVERS
-L: adi-buildroot-devel@lists.sourceforge.net
+L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
L: alsa-devel@alsa-project.org (moderated for non-subscribers)
W: http://blackfin.uclinux.org/
S: Supported
@@ -1758,54 +1765,54 @@ F: include/uapi/linux/bfs_fs.h
BLACKFIN ARCHITECTURE
M: Steven Miao <realmz6@gmail.com>
-L: adi-buildroot-devel@lists.sourceforge.net
+L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
T: git git://git.code.sf.net/p/adi-linux/code
W: http://blackfin.uclinux.org
S: Supported
F: arch/blackfin/
BLACKFIN EMAC DRIVER
-L: adi-buildroot-devel@lists.sourceforge.net
+L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
W: http://blackfin.uclinux.org
S: Supported
F: drivers/net/ethernet/adi/
BLACKFIN RTC DRIVER
-L: adi-buildroot-devel@lists.sourceforge.net
+L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
W: http://blackfin.uclinux.org
S: Supported
F: drivers/rtc/rtc-bfin.c
BLACKFIN SDH DRIVER
M: Sonic Zhang <sonic.zhang@analog.com>
-L: adi-buildroot-devel@lists.sourceforge.net
+L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
W: http://blackfin.uclinux.org
S: Supported
F: drivers/mmc/host/bfin_sdh.c
BLACKFIN SERIAL DRIVER
M: Sonic Zhang <sonic.zhang@analog.com>
-L: adi-buildroot-devel@lists.sourceforge.net
+L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
W: http://blackfin.uclinux.org
S: Supported
F: drivers/tty/serial/bfin_uart.c
BLACKFIN WATCHDOG DRIVER
-L: adi-buildroot-devel@lists.sourceforge.net
+L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
W: http://blackfin.uclinux.org
S: Supported
F: drivers/watchdog/bfin_wdt.c
BLACKFIN I2C TWI DRIVER
M: Sonic Zhang <sonic.zhang@analog.com>
-L: adi-buildroot-devel@lists.sourceforge.net
+L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
W: http://blackfin.uclinux.org/
S: Supported
F: drivers/i2c/busses/i2c-bfin-twi.c
BLACKFIN MEDIA DRIVER
M: Scott Jiang <scott.jiang.linux@gmail.com>
-L: adi-buildroot-devel@lists.sourceforge.net
+L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
W: http://blackfin.uclinux.org/
S: Supported
F: drivers/media/platform/blackfin/
diff --git a/arch/arm/kernel/isa.c b/arch/arm/kernel/isa.c
index 346485910732..9d1cf7156895 100644
--- a/arch/arm/kernel/isa.c
+++ b/arch/arm/kernel/isa.c
@@ -20,7 +20,7 @@
static unsigned int isa_membase, isa_portbase, isa_portshift;
-static ctl_table ctl_isa_vars[4] = {
+static struct ctl_table ctl_isa_vars[4] = {
{
.procname = "membase",
.data = &isa_membase,
@@ -44,7 +44,7 @@ static ctl_table ctl_isa_vars[4] = {
static struct ctl_table_header *isa_sysctl_header;
-static ctl_table ctl_isa[2] = {
+static struct ctl_table ctl_isa[2] = {
{
.procname = "isa",
.mode = 0555,
@@ -52,7 +52,7 @@ static ctl_table ctl_isa[2] = {
}, {}
};
-static ctl_table ctl_bus[2] = {
+static struct ctl_table ctl_bus[2] = {
{
.procname = "bus",
.mode = 0555,
diff --git a/arch/blackfin/kernel/ptrace.c b/arch/blackfin/kernel/ptrace.c
index e1f88e028cfe..8b8fe671b1a6 100644
--- a/arch/blackfin/kernel/ptrace.c
+++ b/arch/blackfin/kernel/ptrace.c
@@ -117,6 +117,7 @@ put_reg(struct task_struct *task, unsigned long regno, unsigned long data)
int
is_user_addr_valid(struct task_struct *child, unsigned long start, unsigned long len)
{
+ bool valid;
struct vm_area_struct *vma;
struct sram_list_struct *sraml;
@@ -124,9 +125,12 @@ is_user_addr_valid(struct task_struct *child, unsigned long start, unsigned long
if (start + len < start)
return -EIO;
+ down_read(&child->mm->mmap_sem);
vma = find_vma(child->mm, start);
- if (vma && start >= vma->vm_start && start + len <= vma->vm_end)
- return 0;
+ valid = vma && start >= vma->vm_start && start + len <= vma->vm_end;
+ up_read(&child->mm->mmap_sem);
+ if (valid)
+ return 0;
for (sraml = child->mm->context.sram_list; sraml; sraml = sraml->next)
if (start >= (unsigned long)sraml->addr
diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c
index b942f4032d7a..2955f359e2a7 100644
--- a/arch/ia64/kernel/crash.c
+++ b/arch/ia64/kernel/crash.c
@@ -237,7 +237,7 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data)
}
#ifdef CONFIG_SYSCTL
-static ctl_table kdump_ctl_table[] = {
+static struct ctl_table kdump_ctl_table[] = {
{
.procname = "kdump_on_init",
.data = &kdump_on_init,
@@ -255,7 +255,7 @@ static ctl_table kdump_ctl_table[] = {
{ }
};
-static ctl_table sys_table[] = {
+static struct ctl_table sys_table[] = {
{
.procname = "kernel",
.mode = 0555,
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index d841c4bd6864..5845ffea67c3 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -521,7 +521,7 @@ static pmu_config_t *pmu_conf;
pfm_sysctl_t pfm_sysctl;
EXPORT_SYMBOL(pfm_sysctl);
-static ctl_table pfm_ctl_table[]={
+static struct ctl_table pfm_ctl_table[] = {
{
.procname = "debug",
.data = &pfm_sysctl.debug,
@@ -552,7 +552,7 @@ static ctl_table pfm_ctl_table[]={
},
{}
};
-static ctl_table pfm_sysctl_dir[] = {
+static struct ctl_table pfm_sysctl_dir[] = {
{
.procname = "perfmon",
.mode = 0555,
@@ -560,7 +560,7 @@ static ctl_table pfm_sysctl_dir[] = {
},
{}
};
-static ctl_table pfm_sysctl_root[] = {
+static struct ctl_table pfm_sysctl_root[] = {
{
.procname = "kernel",
.mode = 0555,
diff --git a/arch/sparc/include/asm/irq_64.h b/arch/sparc/include/asm/irq_64.h
index 375cffcf7dbd..91d219381306 100644
--- a/arch/sparc/include/asm/irq_64.h
+++ b/arch/sparc/include/asm/irq_64.h
@@ -89,7 +89,7 @@ static inline unsigned long get_softint(void)
return retval;
}
-void arch_trigger_all_cpu_backtrace(void);
+void arch_trigger_all_cpu_backtrace(bool);
#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
extern void *hardirq_stack[NR_CPUS];
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index b2988f25e230..027e09986194 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -239,7 +239,7 @@ static void __global_reg_poll(struct global_reg_snapshot *gp)
}
}
-void arch_trigger_all_cpu_backtrace(void)
+void arch_trigger_all_cpu_backtrace(bool include_self)
{
struct thread_info *tp = current_thread_info();
struct pt_regs *regs = get_irq_regs();
@@ -251,16 +251,22 @@ void arch_trigger_all_cpu_backtrace(void)
spin_lock_irqsave(&global_cpu_snapshot_lock, flags);
- memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot));
-
this_cpu = raw_smp_processor_id();
- __global_reg_self(tp, regs, this_cpu);
+ memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot));
+
+ if (include_self)
+ __global_reg_self(tp, regs, this_cpu);
smp_fetch_global_regs();
for_each_online_cpu(cpu) {
- struct global_reg_snapshot *gp = &global_cpu_snapshot[cpu].reg;
+ struct global_reg_snapshot *gp;
+
+ if (!include_self && cpu == this_cpu)
+ continue;
+
+ gp = &global_cpu_snapshot[cpu].reg;
__global_reg_poll(gp);
@@ -292,7 +298,7 @@ void arch_trigger_all_cpu_backtrace(void)
static void sysrq_handle_globreg(int key)
{
- arch_trigger_all_cpu_backtrace();
+ arch_trigger_all_cpu_backtrace(true);
}
static struct sysrq_key_op sparc_globalreg_op = {
diff --git a/arch/tile/kernel/proc.c b/arch/tile/kernel/proc.c
index 681100c59fda..6829a9508649 100644
--- a/arch/tile/kernel/proc.c
+++ b/arch/tile/kernel/proc.c
@@ -113,7 +113,7 @@ arch_initcall(proc_tile_init);
* Support /proc/sys/tile directory
*/
-static ctl_table unaligned_subtable[] = {
+static struct ctl_table unaligned_subtable[] = {
{
.procname = "enabled",
.data = &unaligned_fixup,
@@ -138,7 +138,7 @@ static ctl_table unaligned_subtable[] = {
{}
};
-static ctl_table unaligned_table[] = {
+static struct ctl_table unaligned_table[] = {
{
.procname = "unaligned_fixup",
.mode = 0555,
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index cb6cfcd034cf..a80cbb88ea91 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -43,7 +43,7 @@ extern int vector_used_by_percpu_irq(unsigned int vector);
extern void init_ISA_irqs(void);
#ifdef CONFIG_X86_LOCAL_APIC
-void arch_trigger_all_cpu_backtrace(void);
+void arch_trigger_all_cpu_backtrace(bool);
#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
#endif
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index c3fcb5de5083..6a1e71bde323 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -33,31 +33,41 @@ static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
/* "in progress" flag of arch_trigger_all_cpu_backtrace */
static unsigned long backtrace_flag;
-void arch_trigger_all_cpu_backtrace(void)
+void arch_trigger_all_cpu_backtrace(bool include_self)
{
int i;
+ int cpu = get_cpu();
- if (test_and_set_bit(0, &backtrace_flag))
+ if (test_and_set_bit(0, &backtrace_flag)) {
/*
* If there is already a trigger_all_cpu_backtrace() in progress
* (backtrace_flag == 1), don't output double cpu dump infos.
*/
+ put_cpu();
return;
+ }
cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
+ if (!include_self)
+ cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
- printk(KERN_INFO "sending NMI to all CPUs:\n");
- apic->send_IPI_all(NMI_VECTOR);
+ if (!cpumask_empty(to_cpumask(backtrace_mask))) {
+ pr_info("sending NMI to %s CPUs:\n",
+ (include_self ? "all" : "other"));
+ apic->send_IPI_mask(to_cpumask(backtrace_mask), NMI_VECTOR);
+ }
/* Wait for up to 10 seconds for all CPUs to do the backtrace */
for (i = 0; i < 10 * 1000; i++) {
if (cpumask_empty(to_cpumask(backtrace_mask)))
break;
mdelay(1);
+ touch_softlockup_watchdog();
}
clear_bit(0, &backtrace_flag);
smp_mb__after_atomic();
+ put_cpu();
}
static int
diff --git a/block/bio.c b/block/bio.c
index 1ba33657160f..0443694ccbb4 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -752,29 +752,31 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
return 0;
/*
- * we might lose a segment or two here, but rather that than
- * make this too complex.
+ * setup the new entry, we might clear it again later if we
+ * cannot add the page
+ */
+ bvec = &bio->bi_io_vec[bio->bi_vcnt];
+ bvec->bv_page = page;
+ bvec->bv_len = len;
+ bvec->bv_offset = offset;
+ bio->bi_vcnt++;
+ bio->bi_phys_segments++;
+
+ /*
+ * Perform a recount if the number of segments is greater
+ * than queue_max_segments(q).
*/
- while (bio->bi_phys_segments >= queue_max_segments(q)) {
+ while (bio->bi_phys_segments > queue_max_segments(q)) {
if (retried_segments)
- return 0;
+ goto failed;
retried_segments = 1;
blk_recount_segments(q, bio);
}
/*
- * setup the new entry, we might clear it again later if we
- * cannot add the page
- */
- bvec = &bio->bi_io_vec[bio->bi_vcnt];
- bvec->bv_page = page;
- bvec->bv_len = len;
- bvec->bv_offset = offset;
-
- /*
* if queue has other restrictions (eg varying max sector size
* depending on offset), it can specify a merge_bvec_fn in the
* queue to get further control
@@ -791,23 +793,25 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
* merge_bvec_fn() returns number of bytes it can accept
* at this offset
*/
- if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len) {
- bvec->bv_page = NULL;
- bvec->bv_len = 0;
- bvec->bv_offset = 0;
- return 0;
- }
+ if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len)
+ goto failed;
}
/* If we may be able to merge these biovecs, force a recount */
- if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
+ if (bio->bi_vcnt > 1 && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
bio->bi_flags &= ~(1 << BIO_SEG_VALID);
- bio->bi_vcnt++;
- bio->bi_phys_segments++;
done:
bio->bi_iter.bi_size += len;
return len;
+
+ failed:
+ bvec->bv_page = NULL;
+ bvec->bv_len = 0;
+ bvec->bv_offset = 0;
+ bio->bi_vcnt--;
+ blk_recount_segments(q, bio);
+ return 0;
}
/**
diff --git a/block/bounce.c b/block/bounce.c
index 523918b8c6dc..ab21ba203d5c 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -3,6 +3,8 @@
* - Split from highmem.c
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/mm.h>
#include <linux/export.h>
#include <linux/swap.h>
@@ -15,6 +17,7 @@
#include <linux/hash.h>
#include <linux/highmem.h>
#include <linux/bootmem.h>
+#include <linux/printk.h>
#include <asm/tlbflush.h>
#include <trace/events/block.h>
@@ -34,7 +37,7 @@ static __init int init_emergency_pool(void)
page_pool = mempool_create_page_pool(POOL_SIZE, 0);
BUG_ON(!page_pool);
- printk("bounce pool size: %d pages\n", POOL_SIZE);
+ pr_info("pool size: %d pages\n", POOL_SIZE);
return 0;
}
@@ -86,7 +89,7 @@ int init_emergency_isa_pool(void)
mempool_free_pages, (void *) 0);
BUG_ON(!isa_page_pool);
- printk("isa bounce pool size: %d pages\n", ISA_POOL_SIZE);
+ pr_info("isa pool size: %d pages\n", ISA_POOL_SIZE);
return 0;
}
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 49ac5662585b..2a44767891f5 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -3470,7 +3470,7 @@ static int cdrom_print_info(const char *header, int val, char *info,
return 0;
}
-static int cdrom_sysctl_info(ctl_table *ctl, int write,
+static int cdrom_sysctl_info(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int pos;
@@ -3583,7 +3583,7 @@ static void cdrom_update_settings(void)
mutex_unlock(&cdrom_mutex);
}
-static int cdrom_sysctl_handler(ctl_table *ctl, int write,
+static int cdrom_sysctl_handler(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
@@ -3609,7 +3609,7 @@ static int cdrom_sysctl_handler(ctl_table *ctl, int write,
}
/* Place files in /proc/sys/dev/cdrom */
-static ctl_table cdrom_table[] = {
+static struct ctl_table cdrom_table[] = {
{
.procname = "info",
.data = &cdrom_sysctl_settings.info,
@@ -3655,7 +3655,7 @@ static ctl_table cdrom_table[] = {
{ }
};
-static ctl_table cdrom_cdrom_table[] = {
+static struct ctl_table cdrom_cdrom_table[] = {
{
.procname = "cdrom",
.maxlen = 0,
@@ -3666,7 +3666,7 @@ static ctl_table cdrom_cdrom_table[] = {
};
/* Make sure that /proc/sys/dev is there */
-static ctl_table cdrom_root_table[] = {
+static struct ctl_table cdrom_root_table[] = {
{
.procname = "dev",
.maxlen = 0,
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 06cea7ff3a7c..4ad71ef2cd59 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1582,10 +1582,10 @@ static int proc_do_uuid(struct ctl_table *table, int write,
/*
* Return entropy available scaled to integral bits
*/
-static int proc_do_entropy(ctl_table *table, int write,
+static int proc_do_entropy(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- ctl_table fake_table;
+ struct ctl_table fake_table;
int entropy_count;
entropy_count = *(int *)table->data >> ENTROPY_SHIFT;
diff --git a/drivers/gpio/gpio-zevio.c b/drivers/gpio/gpio-zevio.c
index 54e54e4cc6c4..cd71e5769a76 100644
--- a/drivers/gpio/gpio-zevio.c
+++ b/drivers/gpio/gpio-zevio.c
@@ -18,6 +18,10 @@
#include <linux/slab.h>
#include <linux/gpio.h>
+#ifndef IOMEM
+#define IOMEM(x) ((void __force __iomem *)(x))
+#endif
+
/*
* Memory layout:
* This chip has four gpio sections, each controls 8 GPIOs.
diff --git a/drivers/parport/procfs.c b/drivers/parport/procfs.c
index 92ed045a5f93..3b470801a04f 100644
--- a/drivers/parport/procfs.c
+++ b/drivers/parport/procfs.c
@@ -31,7 +31,7 @@
#define PARPORT_MIN_SPINTIME_VALUE 1
#define PARPORT_MAX_SPINTIME_VALUE 1000
-static int do_active_device(ctl_table *table, int write,
+static int do_active_device(struct ctl_table *table, int write,
void __user *result, size_t *lenp, loff_t *ppos)
{
struct parport *port = (struct parport *)table->extra1;
@@ -68,7 +68,7 @@ static int do_active_device(ctl_table *table, int write,
}
#ifdef CONFIG_PARPORT_1284
-static int do_autoprobe(ctl_table *table, int write,
+static int do_autoprobe(struct ctl_table *table, int write,
void __user *result, size_t *lenp, loff_t *ppos)
{
struct parport_device_info *info = table->extra2;
@@ -110,9 +110,9 @@ static int do_autoprobe(ctl_table *table, int write,
}
#endif /* IEEE1284.3 support. */
-static int do_hardware_base_addr (ctl_table *table, int write,
- void __user *result,
- size_t *lenp, loff_t *ppos)
+static int do_hardware_base_addr(struct ctl_table *table, int write,
+ void __user *result,
+ size_t *lenp, loff_t *ppos)
{
struct parport *port = (struct parport *)table->extra1;
char buffer[20];
@@ -138,9 +138,9 @@ static int do_hardware_base_addr (ctl_table *table, int write,
return copy_to_user(result, buffer, len) ? -EFAULT : 0;
}
-static int do_hardware_irq (ctl_table *table, int write,
- void __user *result,
- size_t *lenp, loff_t *ppos)
+static int do_hardware_irq(struct ctl_table *table, int write,
+ void __user *result,
+ size_t *lenp, loff_t *ppos)
{
struct parport *port = (struct parport *)table->extra1;
char buffer[20];
@@ -166,9 +166,9 @@ static int do_hardware_irq (ctl_table *table, int write,
return copy_to_user(result, buffer, len) ? -EFAULT : 0;
}
-static int do_hardware_dma (ctl_table *table, int write,
- void __user *result,
- size_t *lenp, loff_t *ppos)
+static int do_hardware_dma(struct ctl_table *table, int write,
+ void __user *result,
+ size_t *lenp, loff_t *ppos)
{
struct parport *port = (struct parport *)table->extra1;
char buffer[20];
@@ -194,9 +194,9 @@ static int do_hardware_dma (ctl_table *table, int write,
return copy_to_user(result, buffer, len) ? -EFAULT : 0;
}
-static int do_hardware_modes (ctl_table *table, int write,
- void __user *result,
- size_t *lenp, loff_t *ppos)
+static int do_hardware_modes(struct ctl_table *table, int write,
+ void __user *result,
+ size_t *lenp, loff_t *ppos)
{
struct parport *port = (struct parport *)table->extra1;
char buffer[40];
@@ -255,11 +255,11 @@ PARPORT_MAX_SPINTIME_VALUE;
struct parport_sysctl_table {
struct ctl_table_header *sysctl_header;
- ctl_table vars[12];
- ctl_table device_dir[2];
- ctl_table port_dir[2];
- ctl_table parport_dir[2];
- ctl_table dev_dir[2];
+ struct ctl_table vars[12];
+ struct ctl_table device_dir[2];
+ struct ctl_table port_dir[2];
+ struct ctl_table parport_dir[2];
+ struct ctl_table dev_dir[2];
};
static const struct parport_sysctl_table parport_sysctl_template = {
@@ -369,12 +369,12 @@ static const struct parport_sysctl_table parport_sysctl_template = {
struct parport_device_sysctl_table
{
struct ctl_table_header *sysctl_header;
- ctl_table vars[2];
- ctl_table device_dir[2];
- ctl_table devices_root_dir[2];
- ctl_table port_dir[2];
- ctl_table parport_dir[2];
- ctl_table dev_dir[2];
+ struct ctl_table vars[2];
+ struct ctl_table device_dir[2];
+ struct ctl_table devices_root_dir[2];
+ struct ctl_table port_dir[2];
+ struct ctl_table parport_dir[2];
+ struct ctl_table dev_dir[2];
};
static const struct parport_device_sysctl_table
@@ -422,10 +422,10 @@ parport_device_sysctl_template = {
struct parport_default_sysctl_table
{
struct ctl_table_header *sysctl_header;
- ctl_table vars[3];
- ctl_table default_dir[2];
- ctl_table parport_dir[2];
- ctl_table dev_dir[2];
+ struct ctl_table vars[3];
+ struct ctl_table default_dir[2];
+ struct ctl_table parport_dir[2];
+ struct ctl_table dev_dir[2];
};
static struct parport_default_sysctl_table
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 71988b69eca6..0754f5c7cb3b 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -530,11 +530,11 @@ config RTC_DRV_RV3029C2
will be called rtc-rv3029c2.
config RTC_DRV_S5M
- tristate "Samsung S5M series"
+ tristate "Samsung S2M/S5M series"
depends on MFD_SEC_CORE
help
If you say yes here you will get support for the
- RTC of Samsung S5M PMIC series.
+ RTC of Samsung S2MPS14 and S5M PMIC series.
This driver can also be built as a module. If so, the module
will be called rtc-s5m.
diff --git a/drivers/rtc/rtc-s5m.c b/drivers/rtc/rtc-s5m.c
index 8ec2d6a1dbe1..8f06250a0389 100644
--- a/drivers/rtc/rtc-s5m.c
+++ b/drivers/rtc/rtc-s5m.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013 Samsung Electronics Co., Ltd
+ * Copyright (c) 2013-2014 Samsung Electronics Co., Ltd
* http://www.samsung.com
*
* Copyright (C) 2013 Google, Inc
@@ -17,27 +17,76 @@
#include <linux/module.h>
#include <linux/i2c.h>
-#include <linux/slab.h>
#include <linux/bcd.h>
-#include <linux/bitops.h>
#include <linux/regmap.h>
#include <linux/rtc.h>
-#include <linux/delay.h>
#include <linux/platform_device.h>
#include <linux/mfd/samsung/core.h>
#include <linux/mfd/samsung/irq.h>
#include <linux/mfd/samsung/rtc.h>
+#include <linux/mfd/samsung/s2mps14.h>
/*
* Maximum number of retries for checking changes in UDR field
- * of SEC_RTC_UDR_CON register (to limit possible endless loop).
+ * of S5M_RTC_UDR_CON register (to limit possible endless loop).
*
* After writing to RTC registers (setting time or alarm) read the UDR field
- * in SEC_RTC_UDR_CON register. UDR is auto-cleared when data have
+ * in S5M_RTC_UDR_CON register. UDR is auto-cleared when data have
* been transferred.
*/
#define UDR_READ_RETRY_CNT 5
+/* Registers used by the driver which are different between chipsets. */
+struct s5m_rtc_reg_config {
+ /* Number of registers used for setting time/alarm0/alarm1 */
+ unsigned int regs_count;
+ /* First register for time, seconds */
+ unsigned int time;
+ /* RTC control register */
+ unsigned int ctrl;
+ /* First register for alarm 0, seconds */
+ unsigned int alarm0;
+ /* First register for alarm 1, seconds */
+ unsigned int alarm1;
+ /* SMPL/WTSR register */
+ unsigned int smpl_wtsr;
+ /*
+ * Register for update flag (UDR). Typically setting UDR field to 1
+ * will enable update of time or alarm register. Then it will be
+ * auto-cleared after successful update.
+ */
+ unsigned int rtc_udr_update;
+ /* Mask for UDR field in 'rtc_udr_update' register */
+ unsigned int rtc_udr_mask;
+};
+
+/* Register map for S5M8763 and S5M8767 */
+static const struct s5m_rtc_reg_config s5m_rtc_regs = {
+ .regs_count = 8,
+ .time = S5M_RTC_SEC,
+ .ctrl = S5M_ALARM1_CONF,
+ .alarm0 = S5M_ALARM0_SEC,
+ .alarm1 = S5M_ALARM1_SEC,
+ .smpl_wtsr = S5M_WTSR_SMPL_CNTL,
+ .rtc_udr_update = S5M_RTC_UDR_CON,
+ .rtc_udr_mask = S5M_RTC_UDR_MASK,
+};
+
+/*
+ * Register map for S2MPS14.
+ * It may be also suitable for S2MPS11 but this was not tested.
+ */
+static const struct s5m_rtc_reg_config s2mps_rtc_regs = {
+ .regs_count = 7,
+ .time = S2MPS_RTC_SEC,
+ .ctrl = S2MPS_RTC_CTRL,
+ .alarm0 = S2MPS_ALARM0_SEC,
+ .alarm1 = S2MPS_ALARM1_SEC,
+ .smpl_wtsr = S2MPS_WTSR_SMPL_CNTL,
+ .rtc_udr_update = S2MPS_RTC_UDR_CON,
+ .rtc_udr_mask = S2MPS_RTC_WUDR_MASK,
+};
+
struct s5m_rtc_info {
struct device *dev;
struct i2c_client *i2c;
@@ -48,13 +97,14 @@ struct s5m_rtc_info {
int device_type;
int rtc_24hr_mode;
bool wtsr_smpl;
+ const struct s5m_rtc_reg_config *regs;
};
static const struct regmap_config s5m_rtc_regmap_config = {
.reg_bits = 8,
.val_bits = 8,
- .max_register = SEC_RTC_REG_MAX,
+ .max_register = S5M_RTC_REG_MAX,
};
static const struct regmap_config s2mps14_rtc_regmap_config = {
@@ -119,8 +169,9 @@ static inline int s5m8767_wait_for_udr_update(struct s5m_rtc_info *info)
unsigned int data;
do {
- ret = regmap_read(info->regmap, SEC_RTC_UDR_CON, &data);
- } while (--retry && (data & RTC_UDR_MASK) && !ret);
+ ret = regmap_read(info->regmap, info->regs->rtc_udr_update,
+ &data);
+ } while (--retry && (data & info->regs->rtc_udr_mask) && !ret);
if (!retry)
dev_err(info->dev, "waiting for UDR update, reached max number of retries\n");
@@ -128,21 +179,53 @@ static inline int s5m8767_wait_for_udr_update(struct s5m_rtc_info *info)
return ret;
}
+static inline int s5m_check_peding_alarm_interrupt(struct s5m_rtc_info *info,
+ struct rtc_wkalrm *alarm)
+{
+ int ret;
+ unsigned int val;
+
+ switch (info->device_type) {
+ case S5M8767X:
+ case S5M8763X:
+ ret = regmap_read(info->regmap, S5M_RTC_STATUS, &val);
+ val &= S5M_ALARM0_STATUS;
+ break;
+ case S2MPS14X:
+ ret = regmap_read(info->s5m87xx->regmap_pmic, S2MPS14_REG_ST2,
+ &val);
+ val &= S2MPS_ALARM0_STATUS;
+ break;
+ default:
+ return -EINVAL;
+ }
+ if (ret < 0)
+ return ret;
+
+ if (val)
+ alarm->pending = 1;
+ else
+ alarm->pending = 0;
+
+ return 0;
+}
+
static inline int s5m8767_rtc_set_time_reg(struct s5m_rtc_info *info)
{
int ret;
unsigned int data;
- ret = regmap_read(info->regmap, SEC_RTC_UDR_CON, &data);
+ ret = regmap_read(info->regmap, info->regs->rtc_udr_update, &data);
if (ret < 0) {
dev_err(info->dev, "failed to read update reg(%d)\n", ret);
return ret;
}
- data |= RTC_TIME_EN_MASK;
- data |= RTC_UDR_MASK;
+ data |= info->regs->rtc_udr_mask;
+ if (info->device_type == S5M8763X || info->device_type == S5M8767X)
+ data |= S5M_RTC_TIME_EN_MASK;
- ret = regmap_write(info->regmap, SEC_RTC_UDR_CON, data);
+ ret = regmap_write(info->regmap, info->regs->rtc_udr_update, data);
if (ret < 0) {
dev_err(info->dev, "failed to write update reg(%d)\n", ret);
return ret;
@@ -158,17 +241,27 @@ static inline int s5m8767_rtc_set_alarm_reg(struct s5m_rtc_info *info)
int ret;
unsigned int data;
- ret = regmap_read(info->regmap, SEC_RTC_UDR_CON, &data);
+ ret = regmap_read(info->regmap, info->regs->rtc_udr_update, &data);
if (ret < 0) {
dev_err(info->dev, "%s: fail to read update reg(%d)\n",
__func__, ret);
return ret;
}
- data &= ~RTC_TIME_EN_MASK;
- data |= RTC_UDR_MASK;
+ data |= info->regs->rtc_udr_mask;
+ switch (info->device_type) {
+ case S5M8763X:
+ case S5M8767X:
+ data &= ~S5M_RTC_TIME_EN_MASK;
+ break;
+ case S2MPS14X:
+ data |= S2MPS_RTC_RUDR_MASK;
+ break;
+ default:
+ return -EINVAL;
+ }
- ret = regmap_write(info->regmap, SEC_RTC_UDR_CON, data);
+ ret = regmap_write(info->regmap, info->regs->rtc_udr_update, data);
if (ret < 0) {
dev_err(info->dev, "%s: fail to write update reg(%d)\n",
__func__, ret);
@@ -215,10 +308,22 @@ static void s5m8763_tm_to_data(struct rtc_time *tm, u8 *data)
static int s5m_rtc_read_time(struct device *dev, struct rtc_time *tm)
{
struct s5m_rtc_info *info = dev_get_drvdata(dev);
- u8 data[8];
+ u8 data[info->regs->regs_count];
int ret;
- ret = regmap_bulk_read(info->regmap, SEC_RTC_SEC, data, 8);
+ if (info->device_type == S2MPS14X) {
+ ret = regmap_update_bits(info->regmap,
+ info->regs->rtc_udr_update,
+ S2MPS_RTC_RUDR_MASK, S2MPS_RTC_RUDR_MASK);
+ if (ret) {
+ dev_err(dev,
+ "Failed to prepare registers for time reading: %d\n",
+ ret);
+ return ret;
+ }
+ }
+ ret = regmap_bulk_read(info->regmap, info->regs->time, data,
+ info->regs->regs_count);
if (ret < 0)
return ret;
@@ -228,6 +333,7 @@ static int s5m_rtc_read_time(struct device *dev, struct rtc_time *tm)
break;
case S5M8767X:
+ case S2MPS14X:
s5m8767_data_to_tm(data, tm, info->rtc_24hr_mode);
break;
@@ -245,7 +351,7 @@ static int s5m_rtc_read_time(struct device *dev, struct rtc_time *tm)
static int s5m_rtc_set_time(struct device *dev, struct rtc_time *tm)
{
struct s5m_rtc_info *info = dev_get_drvdata(dev);
- u8 data[8];
+ u8 data[info->regs->regs_count];
int ret = 0;
switch (info->device_type) {
@@ -253,6 +359,7 @@ static int s5m_rtc_set_time(struct device *dev, struct rtc_time *tm)
s5m8763_tm_to_data(tm, data);
break;
case S5M8767X:
+ case S2MPS14X:
ret = s5m8767_tm_to_data(tm, data);
break;
default:
@@ -266,7 +373,8 @@ static int s5m_rtc_set_time(struct device *dev, struct rtc_time *tm)
1900 + tm->tm_year, 1 + tm->tm_mon, tm->tm_mday,
tm->tm_hour, tm->tm_min, tm->tm_sec, tm->tm_wday);
- ret = regmap_raw_write(info->regmap, SEC_RTC_SEC, data, 8);
+ ret = regmap_raw_write(info->regmap, info->regs->time, data,
+ info->regs->regs_count);
if (ret < 0)
return ret;
@@ -278,70 +386,60 @@ static int s5m_rtc_set_time(struct device *dev, struct rtc_time *tm)
static int s5m_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
{
struct s5m_rtc_info *info = dev_get_drvdata(dev);
- u8 data[8];
+ u8 data[info->regs->regs_count];
unsigned int val;
int ret, i;
- ret = regmap_bulk_read(info->regmap, SEC_ALARM0_SEC, data, 8);
+ ret = regmap_bulk_read(info->regmap, info->regs->alarm0, data,
+ info->regs->regs_count);
if (ret < 0)
return ret;
switch (info->device_type) {
case S5M8763X:
s5m8763_data_to_tm(data, &alrm->time);
- ret = regmap_read(info->regmap, SEC_ALARM0_CONF, &val);
+ ret = regmap_read(info->regmap, S5M_ALARM0_CONF, &val);
if (ret < 0)
return ret;
alrm->enabled = !!val;
-
- ret = regmap_read(info->regmap, SEC_RTC_STATUS, &val);
- if (ret < 0)
- return ret;
-
break;
case S5M8767X:
+ case S2MPS14X:
s5m8767_data_to_tm(data, &alrm->time, info->rtc_24hr_mode);
- dev_dbg(dev, "%s: %d/%d/%d %d:%d:%d(%d)\n", __func__,
- 1900 + alrm->time.tm_year, 1 + alrm->time.tm_mon,
- alrm->time.tm_mday, alrm->time.tm_hour,
- alrm->time.tm_min, alrm->time.tm_sec,
- alrm->time.tm_wday);
-
alrm->enabled = 0;
- for (i = 0; i < 7; i++) {
+ for (i = 0; i < info->regs->regs_count; i++) {
if (data[i] & ALARM_ENABLE_MASK) {
alrm->enabled = 1;
break;
}
}
-
- alrm->pending = 0;
- ret = regmap_read(info->regmap, SEC_RTC_STATUS, &val);
- if (ret < 0)
- return ret;
break;
default:
return -EINVAL;
}
- if (val & ALARM0_STATUS)
- alrm->pending = 1;
- else
- alrm->pending = 0;
+ dev_dbg(dev, "%s: %d/%d/%d %d:%d:%d(%d)\n", __func__,
+ 1900 + alrm->time.tm_year, 1 + alrm->time.tm_mon,
+ alrm->time.tm_mday, alrm->time.tm_hour,
+ alrm->time.tm_min, alrm->time.tm_sec,
+ alrm->time.tm_wday);
+
+ ret = s5m_check_peding_alarm_interrupt(info, alrm);
return 0;
}
static int s5m_rtc_stop_alarm(struct s5m_rtc_info *info)
{
- u8 data[8];
+ u8 data[info->regs->regs_count];
int ret, i;
struct rtc_time tm;
- ret = regmap_bulk_read(info->regmap, SEC_ALARM0_SEC, data, 8);
+ ret = regmap_bulk_read(info->regmap, info->regs->alarm0, data,
+ info->regs->regs_count);
if (ret < 0)
return ret;
@@ -352,14 +450,16 @@ static int s5m_rtc_stop_alarm(struct s5m_rtc_info *info)
switch (info->device_type) {
case S5M8763X:
- ret = regmap_write(info->regmap, SEC_ALARM0_CONF, 0);
+ ret = regmap_write(info->regmap, S5M_ALARM0_CONF, 0);
break;
case S5M8767X:
- for (i = 0; i < 7; i++)
+ case S2MPS14X:
+ for (i = 0; i < info->regs->regs_count; i++)
data[i] &= ~ALARM_ENABLE_MASK;
- ret = regmap_raw_write(info->regmap, SEC_ALARM0_SEC, data, 8);
+ ret = regmap_raw_write(info->regmap, info->regs->alarm0, data,
+ info->regs->regs_count);
if (ret < 0)
return ret;
@@ -377,11 +477,12 @@ static int s5m_rtc_stop_alarm(struct s5m_rtc_info *info)
static int s5m_rtc_start_alarm(struct s5m_rtc_info *info)
{
int ret;
- u8 data[8];
+ u8 data[info->regs->regs_count];
u8 alarm0_conf;
struct rtc_time tm;
- ret = regmap_bulk_read(info->regmap, SEC_ALARM0_SEC, data, 8);
+ ret = regmap_bulk_read(info->regmap, info->regs->alarm0, data,
+ info->regs->regs_count);
if (ret < 0)
return ret;
@@ -393,10 +494,11 @@ static int s5m_rtc_start_alarm(struct s5m_rtc_info *info)
switch (info->device_type) {
case S5M8763X:
alarm0_conf = 0x77;
- ret = regmap_write(info->regmap, SEC_ALARM0_CONF, alarm0_conf);
+ ret = regmap_write(info->regmap, S5M_ALARM0_CONF, alarm0_conf);
break;
case S5M8767X:
+ case S2MPS14X:
data[RTC_SEC] |= ALARM_ENABLE_MASK;
data[RTC_MIN] |= ALARM_ENABLE_MASK;
data[RTC_HOUR] |= ALARM_ENABLE_MASK;
@@ -408,7 +510,8 @@ static int s5m_rtc_start_alarm(struct s5m_rtc_info *info)
if (data[RTC_YEAR1] & 0x7f)
data[RTC_YEAR1] |= ALARM_ENABLE_MASK;
- ret = regmap_raw_write(info->regmap, SEC_ALARM0_SEC, data, 8);
+ ret = regmap_raw_write(info->regmap, info->regs->alarm0, data,
+ info->regs->regs_count);
if (ret < 0)
return ret;
ret = s5m8767_rtc_set_alarm_reg(info);
@@ -425,7 +528,7 @@ static int s5m_rtc_start_alarm(struct s5m_rtc_info *info)
static int s5m_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
{
struct s5m_rtc_info *info = dev_get_drvdata(dev);
- u8 data[8];
+ u8 data[info->regs->regs_count];
int ret;
switch (info->device_type) {
@@ -434,6 +537,7 @@ static int s5m_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
break;
case S5M8767X:
+ case S2MPS14X:
s5m8767_tm_to_data(&alrm->time, data);
break;
@@ -450,7 +554,8 @@ static int s5m_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
if (ret < 0)
return ret;
- ret = regmap_raw_write(info->regmap, SEC_ALARM0_SEC, data, 8);
+ ret = regmap_raw_write(info->regmap, info->regs->alarm0, data,
+ info->regs->regs_count);
if (ret < 0)
return ret;
@@ -495,7 +600,7 @@ static const struct rtc_class_ops s5m_rtc_ops = {
static void s5m_rtc_enable_wtsr(struct s5m_rtc_info *info, bool enable)
{
int ret;
- ret = regmap_update_bits(info->regmap, SEC_WTSR_SMPL_CNTL,
+ ret = regmap_update_bits(info->regmap, info->regs->smpl_wtsr,
WTSR_ENABLE_MASK,
enable ? WTSR_ENABLE_MASK : 0);
if (ret < 0)
@@ -506,7 +611,7 @@ static void s5m_rtc_enable_wtsr(struct s5m_rtc_info *info, bool enable)
static void s5m_rtc_enable_smpl(struct s5m_rtc_info *info, bool enable)
{
int ret;
- ret = regmap_update_bits(info->regmap, SEC_WTSR_SMPL_CNTL,
+ ret = regmap_update_bits(info->regmap, info->regs->smpl_wtsr,
SMPL_ENABLE_MASK,
enable ? SMPL_ENABLE_MASK : 0);
if (ret < 0)
@@ -517,50 +622,41 @@ static void s5m_rtc_enable_smpl(struct s5m_rtc_info *info, bool enable)
static int s5m8767_rtc_init_reg(struct s5m_rtc_info *info)
{
u8 data[2];
- unsigned int tp_read;
int ret;
- struct rtc_time tm;
- ret = regmap_read(info->regmap, SEC_RTC_UDR_CON, &tp_read);
- if (ret < 0) {
- dev_err(info->dev, "%s: fail to read control reg(%d)\n",
- __func__, ret);
- return ret;
- }
+ switch (info->device_type) {
+ case S5M8763X:
+ case S5M8767X:
+ /* UDR update time. Default of 7.32 ms is too long. */
+ ret = regmap_update_bits(info->regmap, S5M_RTC_UDR_CON,
+ S5M_RTC_UDR_T_MASK, S5M_RTC_UDR_T_450_US);
+ if (ret < 0)
+ dev_err(info->dev, "%s: fail to change UDR time: %d\n",
+ __func__, ret);
- /* Set RTC control register : Binary mode, 24hour mode */
- data[0] = (1 << BCD_EN_SHIFT) | (1 << MODEL24_SHIFT);
- data[1] = (0 << BCD_EN_SHIFT) | (1 << MODEL24_SHIFT);
+ /* Set RTC control register : Binary mode, 24hour mode */
+ data[0] = (1 << BCD_EN_SHIFT) | (1 << MODEL24_SHIFT);
+ data[1] = (0 << BCD_EN_SHIFT) | (1 << MODEL24_SHIFT);
+
+ ret = regmap_raw_write(info->regmap, S5M_ALARM0_CONF, data, 2);
+ break;
+
+ case S2MPS14X:
+ data[0] = (0 << BCD_EN_SHIFT) | (1 << MODEL24_SHIFT);
+ ret = regmap_write(info->regmap, info->regs->ctrl, data[0]);
+ break;
+
+ default:
+ return -EINVAL;
+ }
info->rtc_24hr_mode = 1;
- ret = regmap_raw_write(info->regmap, SEC_ALARM0_CONF, data, 2);
if (ret < 0) {
dev_err(info->dev, "%s: fail to write controlm reg(%d)\n",
__func__, ret);
return ret;
}
- /* In first boot time, Set rtc time to 1/1/2012 00:00:00(SUN) */
- if ((tp_read & RTC_TCON_MASK) == 0) {
- dev_dbg(info->dev, "rtc init\n");
- tm.tm_sec = 0;
- tm.tm_min = 0;
- tm.tm_hour = 0;
- tm.tm_wday = 0;
- tm.tm_mday = 1;
- tm.tm_mon = 0;
- tm.tm_year = 112;
- tm.tm_yday = 0;
- tm.tm_isdst = 0;
- ret = s5m_rtc_set_time(info->dev, &tm);
- }
-
- ret = regmap_update_bits(info->regmap, SEC_RTC_UDR_CON,
- RTC_TCON_MASK, tp_read | RTC_TCON_MASK);
- if (ret < 0)
- dev_err(info->dev, "%s: fail to update TCON reg(%d)\n",
- __func__, ret);
-
return ret;
}
@@ -570,7 +666,7 @@ static int s5m_rtc_probe(struct platform_device *pdev)
struct sec_platform_data *pdata = s5m87xx->pdata;
struct s5m_rtc_info *info;
const struct regmap_config *regmap_cfg;
- int ret;
+ int ret, alarm_irq;
if (!pdata) {
dev_err(pdev->dev.parent, "Platform data not supplied\n");
@@ -584,12 +680,18 @@ static int s5m_rtc_probe(struct platform_device *pdev)
switch (pdata->device_type) {
case S2MPS14X:
regmap_cfg = &s2mps14_rtc_regmap_config;
+ info->regs = &s2mps_rtc_regs;
+ alarm_irq = S2MPS14_IRQ_RTCA0;
break;
case S5M8763X:
regmap_cfg = &s5m_rtc_regmap_config;
+ info->regs = &s5m_rtc_regs;
+ alarm_irq = S5M8763_IRQ_ALARM0;
break;
case S5M8767X:
regmap_cfg = &s5m_rtc_regmap_config;
+ info->regs = &s5m_rtc_regs;
+ alarm_irq = S5M8767_IRQ_RTCA1;
break;
default:
dev_err(&pdev->dev, "Device type is not supported by RTC driver\n");
@@ -615,20 +717,11 @@ static int s5m_rtc_probe(struct platform_device *pdev)
info->device_type = s5m87xx->device_type;
info->wtsr_smpl = s5m87xx->wtsr_smpl;
- switch (pdata->device_type) {
- case S5M8763X:
- info->irq = regmap_irq_get_virq(s5m87xx->irq_data,
- S5M8763_IRQ_ALARM0);
- break;
-
- case S5M8767X:
- info->irq = regmap_irq_get_virq(s5m87xx->irq_data,
- S5M8767_IRQ_RTCA1);
- break;
-
- default:
+ info->irq = regmap_irq_get_virq(s5m87xx->irq_data, alarm_irq);
+ if (info->irq <= 0) {
ret = -EINVAL;
- dev_err(&pdev->dev, "Unsupported device type: %d\n", ret);
+ dev_err(&pdev->dev, "Failed to get virtual IRQ %d\n",
+ alarm_irq);
goto err;
}
@@ -676,7 +769,7 @@ static void s5m_rtc_shutdown(struct platform_device *pdev)
if (info->wtsr_smpl) {
for (i = 0; i < 3; i++) {
s5m_rtc_enable_wtsr(info, false);
- regmap_read(info->regmap, SEC_WTSR_SMPL_CNTL, &val);
+ regmap_read(info->regmap, info->regs->smpl_wtsr, &val);
pr_debug("%s: WTSR_SMPL reg(0x%02x)\n", __func__, val);
if (val & WTSR_ENABLE_MASK)
pr_emerg("%s: fail to disable WTSR\n",
@@ -730,7 +823,8 @@ static int s5m_rtc_suspend(struct device *dev)
static SIMPLE_DEV_PM_OPS(s5m_rtc_pm_ops, s5m_rtc_suspend, s5m_rtc_resume);
static const struct platform_device_id s5m_rtc_id[] = {
- { "s5m-rtc", 0 },
+ { "s5m-rtc", S5M8767X },
+ { "s2mps14-rtc", S2MPS14X },
};
static struct platform_driver s5m_rtc_driver = {
@@ -749,6 +843,6 @@ module_platform_driver(s5m_rtc_driver);
/* Module information */
MODULE_AUTHOR("Sangbeom Kim <sbkim73@samsung.com>");
-MODULE_DESCRIPTION("Samsung S5M RTC driver");
+MODULE_DESCRIPTION("Samsung S5M/S2MPS14 RTC driver");
MODULE_LICENSE("GPL");
MODULE_ALIAS("platform:s5m-rtc");
diff --git a/drivers/scsi/scsi_sysctl.c b/drivers/scsi/scsi_sysctl.c
index 2b6b93f7d8ef..546f16299ef9 100644
--- a/drivers/scsi/scsi_sysctl.c
+++ b/drivers/scsi/scsi_sysctl.c
@@ -12,7 +12,7 @@
#include "scsi_priv.h"
-static ctl_table scsi_table[] = {
+static struct ctl_table scsi_table[] = {
{ .procname = "logging_level",
.data = &scsi_logging_level,
.maxlen = sizeof(scsi_logging_level),
@@ -21,14 +21,14 @@ static ctl_table scsi_table[] = {
{ }
};
-static ctl_table scsi_dir_table[] = {
+static struct ctl_table scsi_dir_table[] = {
{ .procname = "scsi",
.mode = 0555,
.child = scsi_table },
{ }
};
-static ctl_table scsi_root_table[] = {
+static struct ctl_table scsi_root_table[] = {
{ .procname = "dev",
.mode = 0555,
.child = scsi_dir_table },
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index b767a64e49d9..454b65898e2c 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -46,6 +46,7 @@
#include <linux/jiffies.h>
#include <linux/syscalls.h>
#include <linux/of.h>
+#include <linux/rcupdate.h>
#include <asm/ptrace.h>
#include <asm/irq_regs.h>
@@ -510,9 +511,9 @@ void __handle_sysrq(int key, bool check_mask)
struct sysrq_key_op *op_p;
int orig_log_level;
int i;
- unsigned long flags;
- spin_lock_irqsave(&sysrq_key_table_lock, flags);
+ rcu_sysrq_start();
+ rcu_read_lock();
/*
* Raise the apparent loglevel to maximum so that the sysrq header
* is shown to provide the user with positive feedback. We do not
@@ -554,7 +555,8 @@ void __handle_sysrq(int key, bool check_mask)
printk("\n");
console_loglevel = orig_log_level;
}
- spin_unlock_irqrestore(&sysrq_key_table_lock, flags);
+ rcu_read_unlock();
+ rcu_sysrq_end();
}
void handle_sysrq(int key)
@@ -1043,16 +1045,23 @@ static int __sysrq_swap_key_ops(int key, struct sysrq_key_op *insert_op_p,
struct sysrq_key_op *remove_op_p)
{
int retval;
- unsigned long flags;
- spin_lock_irqsave(&sysrq_key_table_lock, flags);
+ spin_lock(&sysrq_key_table_lock);
if (__sysrq_get_key_op(key) == remove_op_p) {
__sysrq_put_key_op(key, insert_op_p);
retval = 0;
} else {
retval = -1;
}
- spin_unlock_irqrestore(&sysrq_key_table_lock, flags);
+ spin_unlock(&sysrq_key_table_lock);
+
+ /*
+ * A concurrent __handle_sysrq either got the old op or the new op.
+ * Wait for it to go away before returning, so the code for an old
+ * op is not freed (eg. on module unload) while it is in use.
+ */
+ synchronize_rcu();
+
return retval;
}
diff --git a/drivers/w1/w1_int.c b/drivers/w1/w1_int.c
index 9b084db739c7..3aaa15f6b7e9 100644
--- a/drivers/w1/w1_int.c
+++ b/drivers/w1/w1_int.c
@@ -92,9 +92,8 @@ static struct w1_master * w1_alloc_dev(u32 id, int slave_count, int slave_ttl,
err = device_register(&dev->dev);
if (err) {
printk(KERN_ERR "Failed to register master device. err=%d\n", err);
- memset(dev, 0, sizeof(struct w1_master));
- kfree(dev);
- dev = NULL;
+ put_device(&dev->dev);
+ return NULL;
}
return dev;
diff --git a/fs/coda/sysctl.c b/fs/coda/sysctl.c
index af56ad56a89a..34218a8a28cd 100644
--- a/fs/coda/sysctl.c
+++ b/fs/coda/sysctl.c
@@ -14,7 +14,7 @@
#ifdef CONFIG_SYSCTL
static struct ctl_table_header *fs_table_header;
-static ctl_table coda_table[] = {
+static struct ctl_table coda_table[] = {
{
.procname = "timeout",
.data = &coda_timeout,
@@ -39,7 +39,7 @@ static ctl_table coda_table[] = {
{}
};
-static ctl_table fs_table[] = {
+static struct ctl_table fs_table[] = {
{
.procname = "coda",
.mode = 0555,
diff --git a/fs/dcache.c b/fs/dcache.c
index 17010fd1cecd..ac6db5cd6b70 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -150,7 +150,7 @@ static long get_nr_dentry_unused(void)
return sum < 0 ? 0 : sum;
}
-int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
+int proc_nr_dentry(struct ctl_table *table, int write, void __user *buffer,
size_t *lenp, loff_t *ppos)
{
dentry_stat.nr_dentry = get_nr_dentry();
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 76feb4b60fa6..d521bddf876d 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -157,11 +157,13 @@ static ssize_t cluster_set(struct dlm_cluster *cl, unsigned int *cl_field,
const char *buf, size_t len)
{
unsigned int x;
+ int rc;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
-
- x = simple_strtoul(buf, NULL, 0);
+ rc = kstrtouint(buf, 0, &x);
+ if (rc)
+ return rc;
if (check_zero && !x)
return -EINVAL;
@@ -730,7 +732,10 @@ static ssize_t comm_nodeid_read(struct dlm_comm *cm, char *buf)
static ssize_t comm_nodeid_write(struct dlm_comm *cm, const char *buf,
size_t len)
{
- cm->nodeid = simple_strtol(buf, NULL, 0);
+ int rc = kstrtoint(buf, 0, &cm->nodeid);
+
+ if (rc)
+ return rc;
return len;
}
@@ -742,7 +747,10 @@ static ssize_t comm_local_read(struct dlm_comm *cm, char *buf)
static ssize_t comm_local_write(struct dlm_comm *cm, const char *buf,
size_t len)
{
- cm->local= simple_strtol(buf, NULL, 0);
+ int rc = kstrtoint(buf, 0, &cm->local);
+
+ if (rc)
+ return rc;
if (cm->local && !local_comm)
local_comm = cm;
return len;
@@ -846,7 +854,10 @@ static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf,
size_t len)
{
uint32_t seq = 0;
- nd->nodeid = simple_strtol(buf, NULL, 0);
+ int rc = kstrtoint(buf, 0, &nd->nodeid);
+
+ if (rc)
+ return rc;
dlm_comm_seq(nd->nodeid, &seq);
nd->comm_seq = seq;
return len;
@@ -860,7 +871,10 @@ static ssize_t node_weight_read(struct dlm_node *nd, char *buf)
static ssize_t node_weight_write(struct dlm_node *nd, const char *buf,
size_t len)
{
- nd->weight = simple_strtol(buf, NULL, 0);
+ int rc = kstrtoint(buf, 0, &nd->weight);
+
+ if (rc)
+ return rc;
return len;
}
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index b969deef9ebb..8d77ba7b1756 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -68,7 +68,7 @@ static int print_format1_lock(struct seq_file *s, struct dlm_lkb *lkb,
if (lkb->lkb_wait_type)
seq_printf(s, " wait_type: %d", lkb->lkb_wait_type);
- return seq_printf(s, "\n");
+ return seq_puts(s, "\n");
}
static int print_format1(struct dlm_rsb *res, struct seq_file *s)
@@ -92,31 +92,31 @@ static int print_format1(struct dlm_rsb *res, struct seq_file *s)
}
if (res->res_nodeid > 0)
- rv = seq_printf(s, "\" \nLocal Copy, Master is node %d\n",
+ rv = seq_printf(s, "\"\nLocal Copy, Master is node %d\n",
res->res_nodeid);
else if (res->res_nodeid == 0)
- rv = seq_printf(s, "\" \nMaster Copy\n");
+ rv = seq_puts(s, "\"\nMaster Copy\n");
else if (res->res_nodeid == -1)
- rv = seq_printf(s, "\" \nLooking up master (lkid %x)\n",
+ rv = seq_printf(s, "\"\nLooking up master (lkid %x)\n",
res->res_first_lkid);
else
- rv = seq_printf(s, "\" \nInvalid master %d\n",
+ rv = seq_printf(s, "\"\nInvalid master %d\n",
res->res_nodeid);
if (rv)
goto out;
/* Print the LVB: */
if (res->res_lvbptr) {
- seq_printf(s, "LVB: ");
+ seq_puts(s, "LVB: ");
for (i = 0; i < lvblen; i++) {
if (i == lvblen / 2)
- seq_printf(s, "\n ");
+ seq_puts(s, "\n ");
seq_printf(s, "%02x ",
(unsigned char) res->res_lvbptr[i]);
}
if (rsb_flag(res, RSB_VALNOTVALID))
- seq_printf(s, " (INVALID)");
- rv = seq_printf(s, "\n");
+ seq_puts(s, " (INVALID)");
+ rv = seq_puts(s, "\n");
if (rv)
goto out;
}
@@ -133,21 +133,21 @@ static int print_format1(struct dlm_rsb *res, struct seq_file *s)
}
/* Print the locks attached to this resource */
- seq_printf(s, "Granted Queue\n");
+ seq_puts(s, "Granted Queue\n");
list_for_each_entry(lkb, &res->res_grantqueue, lkb_statequeue) {
rv = print_format1_lock(s, lkb, res);
if (rv)
goto out;
}
- seq_printf(s, "Conversion Queue\n");
+ seq_puts(s, "Conversion Queue\n");
list_for_each_entry(lkb, &res->res_convertqueue, lkb_statequeue) {
rv = print_format1_lock(s, lkb, res);
if (rv)
goto out;
}
- seq_printf(s, "Waiting Queue\n");
+ seq_puts(s, "Waiting Queue\n");
list_for_each_entry(lkb, &res->res_waitqueue, lkb_statequeue) {
rv = print_format1_lock(s, lkb, res);
if (rv)
@@ -157,13 +157,13 @@ static int print_format1(struct dlm_rsb *res, struct seq_file *s)
if (list_empty(&res->res_lookup))
goto out;
- seq_printf(s, "Lookup Queue\n");
+ seq_puts(s, "Lookup Queue\n");
list_for_each_entry(lkb, &res->res_lookup, lkb_rsb_lookup) {
rv = seq_printf(s, "%08x %s", lkb->lkb_id,
print_lockmode(lkb->lkb_rqmode));
if (lkb->lkb_wait_type)
seq_printf(s, " wait_type: %d", lkb->lkb_wait_type);
- rv = seq_printf(s, "\n");
+ rv = seq_puts(s, "\n");
}
out:
unlock_rsb(res);
@@ -300,7 +300,7 @@ static int print_format3(struct dlm_rsb *r, struct seq_file *s)
else
seq_printf(s, " %02x", (unsigned char)r->res_name[i]);
}
- rv = seq_printf(s, "\n");
+ rv = seq_puts(s, "\n");
if (rv)
goto out;
@@ -311,7 +311,7 @@ static int print_format3(struct dlm_rsb *r, struct seq_file *s)
for (i = 0; i < lvblen; i++)
seq_printf(s, " %02x", (unsigned char)r->res_lvbptr[i]);
- rv = seq_printf(s, "\n");
+ rv = seq_puts(s, "\n");
if (rv)
goto out;
@@ -377,7 +377,7 @@ static int print_format4(struct dlm_rsb *r, struct seq_file *s)
else
seq_printf(s, " %02x", (unsigned char)r->res_name[i]);
}
- rv = seq_printf(s, "\n");
+ rv = seq_puts(s, "\n");
out:
unlock_rsb(r);
return rv;
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 04d6398c1f1c..f3e72787e7f9 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -35,8 +35,11 @@ static struct task_struct * scand_task;
static ssize_t dlm_control_store(struct dlm_ls *ls, const char *buf, size_t len)
{
ssize_t ret = len;
- int n = simple_strtol(buf, NULL, 0);
+ int n;
+ int rc = kstrtoint(buf, 0, &n);
+ if (rc)
+ return rc;
ls = dlm_find_lockspace_local(ls->ls_local_handle);
if (!ls)
return -EINVAL;
@@ -57,7 +60,10 @@ static ssize_t dlm_control_store(struct dlm_ls *ls, const char *buf, size_t len)
static ssize_t dlm_event_store(struct dlm_ls *ls, const char *buf, size_t len)
{
- ls->ls_uevent_result = simple_strtol(buf, NULL, 0);
+ int rc = kstrtoint(buf, 0, &ls->ls_uevent_result);
+
+ if (rc)
+ return rc;
set_bit(LSFL_UEVENT_WAIT, &ls->ls_flags);
wake_up(&ls->ls_uevent_wait);
return len;
@@ -70,7 +76,10 @@ static ssize_t dlm_id_show(struct dlm_ls *ls, char *buf)
static ssize_t dlm_id_store(struct dlm_ls *ls, const char *buf, size_t len)
{
- ls->ls_global_id = simple_strtoul(buf, NULL, 0);
+ int rc = kstrtouint(buf, 0, &ls->ls_global_id);
+
+ if (rc)
+ return rc;
return len;
}
@@ -81,7 +90,11 @@ static ssize_t dlm_nodir_show(struct dlm_ls *ls, char *buf)
static ssize_t dlm_nodir_store(struct dlm_ls *ls, const char *buf, size_t len)
{
- int val = simple_strtoul(buf, NULL, 0);
+ int val;
+ int rc = kstrtoint(buf, 0, &val);
+
+ if (rc)
+ return rc;
if (val == 1)
set_bit(LSFL_NODIR, &ls->ls_flags);
return len;
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 9280202e488c..1de7294aad20 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -50,7 +50,7 @@ static void drop_slab(void)
} while (nr_objects > 10);
}
-int drop_caches_sysctl_handler(ctl_table *table, int write,
+int drop_caches_sysctl_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
int ret;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index af903128891c..b73e0621ce9e 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -293,7 +293,7 @@ static LIST_HEAD(tfile_check_list);
static long zero;
static long long_max = LONG_MAX;
-ctl_table epoll_table[] = {
+struct ctl_table epoll_table[] = {
{
.procname = "max_user_watches",
.data = &max_user_watches,
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 91ad9e1c9441..e26bc9a22ac9 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -303,6 +303,31 @@ static int fat_bmap_cluster(struct inode *inode, int cluster)
return dclus;
}
+static int fat_get_mapped_cluster(struct inode *inode, sector_t sector,
+ sector_t last_block,
+ unsigned long *mapped_blocks, sector_t *bmap)
+{
+ struct super_block *sb = inode->i_sb;
+ struct msdos_sb_info *sbi = MSDOS_SB(sb);
+ int cluster, offset;
+
+ cluster = sector >> (sbi->cluster_bits - sb->s_blocksize_bits);
+ offset = sector & (sbi->sec_per_clus - 1);
+ cluster = fat_bmap_cluster(inode, cluster);
+
+ if (cluster < 0)
+ return cluster;
+
+ else if (cluster) {
+ *bmap = fat_clus_to_blknr(sbi, cluster) + offset;
+ *mapped_blocks = sbi->sec_per_clus - offset;
+ if (*mapped_blocks > last_block - sector)
+ *mapped_blocks = last_block - sector;
+ }
+
+ return 0;
+}
+
int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
unsigned long *mapped_blocks, int create)
{
@@ -311,7 +336,6 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
const unsigned long blocksize = sb->s_blocksize;
const unsigned char blocksize_bits = sb->s_blocksize_bits;
sector_t last_block;
- int cluster, offset;
*phys = 0;
*mapped_blocks = 0;
@@ -329,25 +353,39 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
return 0;
/*
- * ->mmu_private can access on only allocation path.
- * (caller must hold ->i_mutex)
+ * Both ->mmu_private and ->i_disksize can access
+ * on only allocation path. (caller must hold ->i_mutex)
*/
- last_block = (MSDOS_I(inode)->mmu_private + (blocksize - 1))
+ last_block = (MSDOS_I(inode)->i_disksize + (blocksize - 1))
>> blocksize_bits;
if (sector >= last_block)
return 0;
}
- cluster = sector >> (sbi->cluster_bits - sb->s_blocksize_bits);
- offset = sector & (sbi->sec_per_clus - 1);
- cluster = fat_bmap_cluster(inode, cluster);
- if (cluster < 0)
- return cluster;
- else if (cluster) {
- *phys = fat_clus_to_blknr(sbi, cluster) + offset;
- *mapped_blocks = sbi->sec_per_clus - offset;
- if (*mapped_blocks > last_block - sector)
- *mapped_blocks = last_block - sector;
- }
- return 0;
+ return fat_get_mapped_cluster(inode, sector, last_block, mapped_blocks,
+ phys);
+}
+
+int fat_bmap2(struct inode *inode, sector_t sector,
+ unsigned long *mapped_blocks, struct buffer_head *bh_result,
+ int create, sector_t *bmap)
+{
+ struct super_block *sb = inode->i_sb;
+ sector_t last_block;
+ const unsigned long blocksize = sb->s_blocksize;
+ const unsigned char blocksize_bits = sb->s_blocksize_bits;
+
+ BUG_ON(create != 0);
+
+ *bmap = 0;
+ *mapped_blocks = 0;
+
+ last_block = (MSDOS_I(inode)->i_disksize + (blocksize - 1))
+ >> blocksize_bits;
+
+ if (sector >= last_block)
+ return 0;
+
+ return fat_get_mapped_cluster(inode, sector, last_block, mapped_blocks,
+ bmap);
}
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index e0c4ba39a377..13b7202bd651 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -119,7 +119,8 @@ struct msdos_inode_info {
unsigned int cache_valid_id;
/* NOTE: mmu_private is 64bits, so must hold ->i_mutex to access */
- loff_t mmu_private; /* physically allocated size */
+ loff_t mmu_private; /* physically allocated size (initialized) */
+ loff_t i_disksize; /* physically allocated size (uninitialized) */
int i_start; /* first cluster or 0 */
int i_logstart; /* logical first cluster */
@@ -290,6 +291,9 @@ extern int fat_get_cluster(struct inode *inode, int cluster,
int *fclus, int *dclus);
extern int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
unsigned long *mapped_blocks, int create);
+extern int fat_bmap2(struct inode *inode, sector_t sector,
+ unsigned long *mapped_blocks,
+ struct buffer_head *bh_result, int create, sector_t *bmap);
/* fat/dir.c */
extern const struct file_operations fat_dir_operations;
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 85f79a89e747..92e9e753b554 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -17,8 +17,12 @@
#include <linux/blkdev.h>
#include <linux/fsnotify.h>
#include <linux/security.h>
+#include <linux/falloc.h>
#include "fat.h"
+static long fat_fallocate(struct file *file, int mode,
+ loff_t offset, loff_t len);
+
static int fat_ioctl_get_attributes(struct inode *inode, u32 __user *user_attr)
{
u32 attr;
@@ -182,6 +186,7 @@ const struct file_operations fat_file_operations = {
#endif
.fsync = fat_file_fsync,
.splice_read = generic_file_splice_read,
+ .fallocate = fat_fallocate,
};
static int fat_cont_expand(struct inode *inode, loff_t size)
@@ -220,6 +225,75 @@ out:
return err;
}
+/*
+ * Preallocate space for a file. This implements fat's fallocate file
+ * operation, which gets called from sys_fallocate system call. User
+ * space requests len bytes at offset. If FALLOC_FL_KEEP_SIZE is set
+ * we just allocate clusters without zeroing them out. Otherwise we
+ * allocate and zero out clusters via an expanding truncate.
+ */
+static long fat_fallocate(struct file *file, int mode,
+ loff_t offset, loff_t len)
+{
+ int cluster;
+ int nr_cluster; /* Number of clusters to be allocated */
+ loff_t mm_bytes; /* Number of bytes to be allocated for file */
+ struct inode *inode = file->f_mapping->host;
+ struct super_block *sb = inode->i_sb;
+ struct msdos_sb_info *sbi = MSDOS_SB(sb);
+ int err = 0;
+
+ /* No support for hole punch or other fallocate flags. */
+ if (mode & ~FALLOC_FL_KEEP_SIZE)
+ return -EOPNOTSUPP;
+
+ /* No support for dir */
+ if (!S_ISREG(inode->i_mode))
+ return -EOPNOTSUPP;
+
+ mutex_lock(&inode->i_mutex);
+ if ((offset + len) <= MSDOS_I(inode)->i_disksize)
+ goto error;
+
+ err = inode_newsize_ok(inode, (len + offset));
+ if (err)
+ goto error;
+
+ if (mode & FALLOC_FL_KEEP_SIZE) {
+ /* First compute the number of clusters to be allocated */
+ mm_bytes = offset + len - round_up(MSDOS_I(inode)->i_disksize,
+ sbi->cluster_size);
+ nr_cluster = (mm_bytes + (sbi->cluster_size - 1)) >>
+ sbi->cluster_bits;
+
+ /* Start the allocation.We are not zeroing out the clusters */
+ while (nr_cluster-- > 0) {
+ err = fat_alloc_clusters(inode, &cluster, 1);
+ if (err) {
+ fat_msg(sb, KERN_ERR,
+ "fat_fallocate(): fat_alloc_clusters() error");
+ goto error;
+ }
+ err = fat_chain_add(inode, cluster, 1);
+ if (err) {
+ fat_free_clusters(inode, cluster);
+ goto error;
+ }
+ MSDOS_I(inode)->i_disksize += sbi->cluster_size;
+ }
+ } else {
+ /* This is just an expanding truncate */
+ err = fat_cont_expand(inode, (offset + len));
+ if (err)
+ fat_msg(sb, KERN_ERR,
+ "fat_fallocate(): fat_cont_expand() error");
+ }
+
+error:
+ mutex_unlock(&inode->i_mutex);
+ return err;
+}
+
/* Free all clusters after the skip'th cluster. */
static int fat_free(struct inode *inode, int skip)
{
@@ -300,8 +374,10 @@ void fat_truncate_blocks(struct inode *inode, loff_t offset)
* This protects against truncating a file bigger than it was then
* trying to write into the hole.
*/
- if (MSDOS_I(inode)->mmu_private > offset)
+ if (MSDOS_I(inode)->i_disksize > offset) {
MSDOS_I(inode)->mmu_private = offset;
+ MSDOS_I(inode)->i_disksize = offset;
+ }
nr_clusters = (offset + (cluster_size - 1)) >> sbi->cluster_bits;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 17c099a8bf9c..473143203da8 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -116,6 +116,25 @@ static int fat_add_cluster(struct inode *inode)
return err;
}
+static void check_fallocated_region(struct inode *inode, sector_t iblock,
+ unsigned long *max_blocks, struct buffer_head *bh_result)
+{
+ struct super_block *sb = inode->i_sb;
+ sector_t last_block, disk_block;
+ const unsigned long blocksize = sb->s_blocksize;
+ const unsigned char blocksize_bits = sb->s_blocksize_bits;
+
+ last_block = (MSDOS_I(inode)->mmu_private + (blocksize - 1))
+ >> blocksize_bits;
+ disk_block = (MSDOS_I(inode)->i_disksize + (blocksize - 1))
+ >> blocksize_bits;
+ if (iblock >= last_block && iblock <= disk_block) {
+ MSDOS_I(inode)->mmu_private += *max_blocks << blocksize_bits;
+ set_buffer_new(bh_result);
+ }
+
+}
+
static inline int __fat_get_block(struct inode *inode, sector_t iblock,
unsigned long *max_blocks,
struct buffer_head *bh_result, int create)
@@ -130,8 +149,11 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock,
if (err)
return err;
if (phys) {
- map_bh(bh_result, sb, phys);
*max_blocks = min(mapped_blocks, *max_blocks);
+ if (create)
+ check_fallocated_region(inode, iblock, max_blocks,
+ bh_result);
+ map_bh(bh_result, sb, phys);
return 0;
}
if (!create)
@@ -155,6 +177,7 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock,
*max_blocks = min(mapped_blocks, *max_blocks);
MSDOS_I(inode)->mmu_private += *max_blocks << sb->s_blocksize_bits;
+ MSDOS_I(inode)->i_disksize = MSDOS_I(inode)->mmu_private;
err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create);
if (err)
@@ -269,6 +292,13 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
loff_t size = offset + count;
if (MSDOS_I(inode)->mmu_private < size)
return 0;
+
+ /*
+ * In case of writing in fallocated region, return 0 and
+ * fallback to buffered write.
+ */
+ if (MSDOS_I(inode)->i_disksize > MSDOS_I(inode)->mmu_private)
+ return 0;
}
/*
@@ -282,13 +312,36 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
return ret;
}
+static int fat_get_block_bmap(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+{
+ struct super_block *sb = inode->i_sb;
+ unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;
+ int err;
+ sector_t bmap;
+ unsigned long mapped_blocks;
+
+ err = fat_bmap2(inode, iblock, &mapped_blocks, bh_result, create,
+ &bmap);
+ if (err)
+ return err;
+
+ if (bmap) {
+ map_bh(bh_result, sb, bmap);
+ max_blocks = min(mapped_blocks, max_blocks);
+ }
+
+ bh_result->b_size = max_blocks << sb->s_blocksize_bits;
+ return 0;
+}
+
static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
{
sector_t blocknr;
/* fat_get_cluster() assumes the requested blocknr isn't truncated. */
down_read(&MSDOS_I(mapping->host)->truncate_lock);
- blocknr = generic_block_bmap(mapping, block, fat_get_block);
+ blocknr = generic_block_bmap(mapping, block, fat_get_block_bmap);
up_read(&MSDOS_I(mapping->host)->truncate_lock);
return blocknr;
@@ -469,7 +522,6 @@ int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
error = fat_calc_dir_size(inode);
if (error < 0)
return error;
- MSDOS_I(inode)->mmu_private = inode->i_size;
set_nlink(inode, fat_subdirs(inode));
} else { /* not a directory */
@@ -484,8 +536,12 @@ int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
inode->i_op = &fat_file_inode_operations;
inode->i_fop = &fat_file_operations;
inode->i_mapping->a_ops = &fat_aops;
- MSDOS_I(inode)->mmu_private = inode->i_size;
}
+
+ MSDOS_I(inode)->mmu_private = inode->i_size;
+ MSDOS_I(inode)->i_disksize = round_up(inode->i_size,
+ inode->i_sb->s_blocksize);
+
if (de->attr & ATTR_SYS) {
if (sbi->options.sys_immutable)
inode->i_flags |= S_IMMUTABLE;
@@ -550,12 +606,34 @@ out:
EXPORT_SYMBOL_GPL(fat_build_inode);
+static int __fat_write_inode(struct inode *inode, int wait);
static void fat_evict_inode(struct inode *inode)
{
truncate_inode_pages_final(&inode->i_data);
if (!inode->i_nlink) {
inode->i_size = 0;
fat_truncate_blocks(inode, 0);
+ } else {
+ /* Release unwritten fallocated blocks on inode eviction. */
+ if (MSDOS_I(inode)->i_disksize >
+ round_up(MSDOS_I(inode)->mmu_private,
+ inode->i_sb->s_blocksize)) {
+ int err;
+ fat_truncate_blocks(inode, MSDOS_I(inode)->mmu_private);
+ /* Fallocate results in updating the i_start/iogstart
+ * for the zero byte file. So, make it return to
+ * original state during evict and commit it to avoid
+ * any corruption on the next access to the cluster
+ * chain for the file.
+ */
+ err = __fat_write_inode(inode, inode_needs_sync(inode));
+ if (err) {
+ fat_msg(inode->i_sb, KERN_WARNING, "Failed to "
+ "update on disk inode for unused fallocated "
+ "blocks, inode could be corrupted. Please run "
+ "fsck");
+ }
+ }
}
invalidate_inode_buffers(inode);
clear_inode(inode);
@@ -1293,6 +1371,7 @@ static int fat_read_root(struct inode *inode)
& ~((loff_t)sbi->cluster_size - 1)) >> 9;
MSDOS_I(inode)->i_logstart = 0;
MSDOS_I(inode)->mmu_private = inode->i_size;
+ MSDOS_I(inode)->i_disksize = inode->i_size;
fat_save_attrs(inode, ATTR_DIR);
inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = 0;
diff --git a/fs/file_table.c b/fs/file_table.c
index f8cc881fbbfb..385bfd31512a 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -76,14 +76,14 @@ EXPORT_SYMBOL_GPL(get_max_files);
* Handle nr_files sysctl
*/
#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
-int proc_nr_files(ctl_table *table, int write,
+int proc_nr_files(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
files_stat.nr_files = get_nr_files();
return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
}
#else
-int proc_nr_files(ctl_table *table, int write,
+int proc_nr_files(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
index acd4bf1fc277..63f868e869b9 100644
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -67,7 +67,7 @@ static int fscache_max_active_sysctl(struct ctl_table *table, int write,
return ret;
}
-ctl_table fscache_sysctls[] = {
+struct ctl_table fscache_sysctls[] = {
{
.procname = "object_max_active",
.data = &fscache_object_max_active,
@@ -87,7 +87,7 @@ ctl_table fscache_sysctls[] = {
{}
};
-ctl_table fscache_sysctls_root[] = {
+struct ctl_table fscache_sysctls_root[] = {
{
.procname = "fscache",
.mode = 0555,
diff --git a/fs/inode.c b/fs/inode.c
index f96d2a6f88cc..2feb9b69f1be 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -105,7 +105,7 @@ long get_nr_dirty_inodes(void)
* Handle nr_inode sysctl
*/
#ifdef CONFIG_SYSCTL
-int proc_nr_inodes(ctl_table *table, int write,
+int proc_nr_inodes(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
inodes_stat.nr_inodes = get_nr_inodes();
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 2b431f7266c3..8f27c93f8d2e 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -436,7 +436,7 @@ EXPORT_SYMBOL_GPL(lockd_down);
* Sysctl parameters (same as module parameters, different interface).
*/
-static ctl_table nlm_sysctls[] = {
+static struct ctl_table nlm_sysctls[] = {
{
.procname = "nlm_grace_period",
.data = &nlm_grace_period,
@@ -490,7 +490,7 @@ static ctl_table nlm_sysctls[] = {
{ }
};
-static ctl_table nlm_sysctl_dir[] = {
+static struct ctl_table nlm_sysctl_dir[] = {
{
.procname = "nfs",
.mode = 0555,
@@ -499,7 +499,7 @@ static ctl_table nlm_sysctl_dir[] = {
{ }
};
-static ctl_table nlm_sysctl_root[] = {
+static struct ctl_table nlm_sysctl_root[] = {
{
.procname = "fs",
.mode = 0555,
diff --git a/fs/nfs/nfs4sysctl.c b/fs/nfs/nfs4sysctl.c
index 2628d921b7e3..b6ebe7e445f6 100644
--- a/fs/nfs/nfs4sysctl.c
+++ b/fs/nfs/nfs4sysctl.c
@@ -16,7 +16,7 @@ static const int nfs_set_port_min = 0;
static const int nfs_set_port_max = 65535;
static struct ctl_table_header *nfs4_callback_sysctl_table;
-static ctl_table nfs4_cb_sysctls[] = {
+static struct ctl_table nfs4_cb_sysctls[] = {
{
.procname = "nfs_callback_tcpport",
.data = &nfs_callback_set_tcpport,
@@ -36,7 +36,7 @@ static ctl_table nfs4_cb_sysctls[] = {
{ }
};
-static ctl_table nfs4_cb_sysctl_dir[] = {
+static struct ctl_table nfs4_cb_sysctl_dir[] = {
{
.procname = "nfs",
.mode = 0555,
@@ -45,7 +45,7 @@ static ctl_table nfs4_cb_sysctl_dir[] = {
{ }
};
-static ctl_table nfs4_cb_sysctl_root[] = {
+static struct ctl_table nfs4_cb_sysctl_root[] = {
{
.procname = "fs",
.mode = 0555,
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c
index 6b3f2535a3ec..bb6ed810fa6f 100644
--- a/fs/nfs/sysctl.c
+++ b/fs/nfs/sysctl.c
@@ -13,7 +13,7 @@
static struct ctl_table_header *nfs_callback_sysctl_table;
-static ctl_table nfs_cb_sysctls[] = {
+static struct ctl_table nfs_cb_sysctls[] = {
{
.procname = "nfs_mountpoint_timeout",
.data = &nfs_mountpoint_expiry_timeout,
@@ -31,7 +31,7 @@ static ctl_table nfs_cb_sysctls[] = {
{ }
};
-static ctl_table nfs_cb_sysctl_dir[] = {
+static struct ctl_table nfs_cb_sysctl_dir[] = {
{
.procname = "nfs",
.mode = 0555,
@@ -40,7 +40,7 @@ static ctl_table nfs_cb_sysctl_dir[] = {
{ }
};
-static ctl_table nfs_cb_sysctl_root[] = {
+static struct ctl_table nfs_cb_sysctl_root[] = {
{
.procname = "fs",
.mode = 0555,
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 78a2ca3966c3..cc423a30a0c8 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -57,7 +57,7 @@ static struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
static int zero;
-ctl_table inotify_table[] = {
+struct ctl_table inotify_table[] = {
{
.procname = "max_user_instances",
.data = &inotify_max_user_instances,
diff --git a/fs/ntfs/sysctl.c b/fs/ntfs/sysctl.c
index 1927170a35ce..a503156ec15f 100644
--- a/fs/ntfs/sysctl.c
+++ b/fs/ntfs/sysctl.c
@@ -34,7 +34,7 @@
#include "debug.h"
/* Definition of the ntfs sysctl. */
-static ctl_table ntfs_sysctls[] = {
+static struct ctl_table ntfs_sysctls[] = {
{
.procname = "ntfs-debug",
.data = &debug_msgs, /* Data pointer and size. */
@@ -46,7 +46,7 @@ static ctl_table ntfs_sysctls[] = {
};
/* Define the parent directory /proc/sys/fs. */
-static ctl_table sysctls_root[] = {
+static struct ctl_table sysctls_root[] = {
{
.procname = "fs",
.mode = 0555,
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index dc9a6829f7c6..1bcffeab713c 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -142,7 +142,6 @@ static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
int org = *beg;
BUG_ON(!th->t_trans_id);
-
RFALSE(bmap_n >= reiserfs_bmap_count(s), "Bitmap %u is out of "
"range (0..%u)", bmap_n, reiserfs_bmap_count(s) - 1);
PROC_INFO_INC(s, scan_bitmap.bmap);
@@ -321,7 +320,6 @@ static int scan_bitmap(struct reiserfs_transaction_handle *th,
unsigned int off_max = s->s_blocksize << 3;
BUG_ON(!th->t_trans_id);
-
PROC_INFO_INC(s, scan_bitmap.call);
if (SB_FREE_BLOCKS(s) <= 0)
return 0; // No point in looking for more free blocks
@@ -388,9 +386,7 @@ static void _reiserfs_free_block(struct reiserfs_transaction_handle *th,
unsigned int nr, offset;
BUG_ON(!th->t_trans_id);
-
PROC_INFO_INC(s, free_block);
-
rs = SB_DISK_SUPER_BLOCK(s);
sbh = SB_BUFFER_WITH_SB(s);
apbi = SB_AP_BITMAP(s);
@@ -435,8 +431,8 @@ void reiserfs_free_block(struct reiserfs_transaction_handle *th,
int for_unformatted)
{
struct super_block *s = th->t_super;
- BUG_ON(!th->t_trans_id);
+ BUG_ON(!th->t_trans_id);
RFALSE(!s, "vs-4061: trying to free block on nonexistent device");
if (!is_reusable(s, block, 1))
return;
@@ -471,6 +467,7 @@ static void __discard_prealloc(struct reiserfs_transaction_handle *th,
unsigned long save = ei->i_prealloc_block;
int dirty = 0;
struct inode *inode = &ei->vfs_inode;
+
BUG_ON(!th->t_trans_id);
#ifdef CONFIG_REISERFS_CHECK
if (ei->i_prealloc_count < 0)
@@ -494,6 +491,7 @@ void reiserfs_discard_prealloc(struct reiserfs_transaction_handle *th,
struct inode *inode)
{
struct reiserfs_inode_info *ei = REISERFS_I(inode);
+
BUG_ON(!th->t_trans_id);
if (ei->i_prealloc_count)
__discard_prealloc(th, ei);
@@ -504,7 +502,6 @@ void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th)
struct list_head *plist = &SB_JOURNAL(th->t_super)->j_prealloc_list;
BUG_ON(!th->t_trans_id);
-
while (!list_empty(plist)) {
struct reiserfs_inode_info *ei;
ei = list_entry(plist->next, struct reiserfs_inode_info,
@@ -562,7 +559,7 @@ int reiserfs_parse_alloc_options(struct super_block *s, char *options)
if (!strcmp(this_char, "displacing_new_packing_localities")) {
SET_OPTION(displacing_new_packing_localities);
continue;
- };
+ }
if (!strcmp(this_char, "old_hashed_relocation")) {
SET_OPTION(old_hashed_relocation);
@@ -729,6 +726,7 @@ void show_alloc_options(struct seq_file *seq, struct super_block *s)
static inline void new_hashed_relocation(reiserfs_blocknr_hint_t * hint)
{
char *hash_in;
+
if (hint->formatted_node) {
hash_in = (char *)&hint->key.k_dir_id;
} else {
@@ -757,6 +755,7 @@ static void dirid_groups(reiserfs_blocknr_hint_t * hint)
__u32 dirid = 0;
int bm = 0;
struct super_block *sb = hint->th->t_super;
+
if (hint->inode)
dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
else if (hint->formatted_node)
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index b14706a05d52..615cd9ab7940 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -228,10 +228,10 @@ const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} };
/* Maximal possible key. It is never in the tree. */
static const struct reiserfs_key MAX_KEY = {
- __constant_cpu_to_le32(0xffffffff),
- __constant_cpu_to_le32(0xffffffff),
- {{__constant_cpu_to_le32(0xffffffff),
- __constant_cpu_to_le32(0xffffffff)},}
+ cpu_to_le32(0xffffffff),
+ cpu_to_le32(0xffffffff),
+ {{cpu_to_le32(0xffffffff),
+ cpu_to_le32(0xffffffff)},}
};
/* Get delimiting key of the buffer by looking for it in the buffers in the path, starting from the bottom
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index 184323cac1a4..7bc20809c99e 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -52,7 +52,7 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)
if (ufs_fragnum(fragment) + count > uspi->s_fpg)
ufs_error (sb, "ufs_free_fragments", "internal error");
- mutex_lock(&UFS_SB(sb)->s_lock);
+ lock_ufs(sb);
cgno = ufs_dtog(uspi, fragment);
bit = ufs_dtogd(uspi, fragment);
@@ -116,12 +116,12 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)
ubh_sync_block(UCPI_UBH(ucpi));
ufs_mark_sb_dirty(sb);
- mutex_unlock(&UFS_SB(sb)->s_lock);
+ unlock_ufs(sb);
UFSD("EXIT\n");
return;
failed:
- mutex_unlock(&UFS_SB(sb)->s_lock);
+ unlock_ufs(sb);
UFSD("EXIT (FAILED)\n");
return;
}
@@ -151,7 +151,7 @@ void ufs_free_blocks(struct inode *inode, u64 fragment, unsigned count)
goto failed;
}
- mutex_lock(&UFS_SB(sb)->s_lock);
+ lock_ufs(sb);
do_more:
overflow = 0;
@@ -211,12 +211,12 @@ do_more:
}
ufs_mark_sb_dirty(sb);
- mutex_unlock(&UFS_SB(sb)->s_lock);
+ unlock_ufs(sb);
UFSD("EXIT\n");
return;
failed_unlock:
- mutex_unlock(&UFS_SB(sb)->s_lock);
+ unlock_ufs(sb);
failed:
UFSD("EXIT (FAILED)\n");
return;
@@ -357,7 +357,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
usb1 = ubh_get_usb_first(uspi);
*err = -ENOSPC;
- mutex_lock(&UFS_SB(sb)->s_lock);
+ lock_ufs(sb);
tmp = ufs_data_ptr_to_cpu(sb, p);
if (count + ufs_fragnum(fragment) > uspi->s_fpb) {
@@ -378,19 +378,19 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
"fragment %llu, tmp %llu\n",
(unsigned long long)fragment,
(unsigned long long)tmp);
- mutex_unlock(&UFS_SB(sb)->s_lock);
+ unlock_ufs(sb);
return INVBLOCK;
}
if (fragment < UFS_I(inode)->i_lastfrag) {
UFSD("EXIT (ALREADY ALLOCATED)\n");
- mutex_unlock(&UFS_SB(sb)->s_lock);
+ unlock_ufs(sb);
return 0;
}
}
else {
if (tmp) {
UFSD("EXIT (ALREADY ALLOCATED)\n");
- mutex_unlock(&UFS_SB(sb)->s_lock);
+ unlock_ufs(sb);
return 0;
}
}
@@ -399,7 +399,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
* There is not enough space for user on the device
*/
if (!capable(CAP_SYS_RESOURCE) && ufs_freespace(uspi, UFS_MINFREE) <= 0) {
- mutex_unlock(&UFS_SB(sb)->s_lock);
+ unlock_ufs(sb);
UFSD("EXIT (FAILED)\n");
return 0;
}
@@ -424,7 +424,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
ufs_clear_frags(inode, result + oldcount,
newcount - oldcount, locked_page != NULL);
}
- mutex_unlock(&UFS_SB(sb)->s_lock);
+ unlock_ufs(sb);
UFSD("EXIT, result %llu\n", (unsigned long long)result);
return result;
}
@@ -439,7 +439,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
fragment + count);
ufs_clear_frags(inode, result + oldcount, newcount - oldcount,
locked_page != NULL);
- mutex_unlock(&UFS_SB(sb)->s_lock);
+ unlock_ufs(sb);
UFSD("EXIT, result %llu\n", (unsigned long long)result);
return result;
}
@@ -477,7 +477,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
*err = 0;
UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag,
fragment + count);
- mutex_unlock(&UFS_SB(sb)->s_lock);
+ unlock_ufs(sb);
if (newcount < request)
ufs_free_fragments (inode, result + newcount, request - newcount);
ufs_free_fragments (inode, tmp, oldcount);
@@ -485,7 +485,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
return result;
}
- mutex_unlock(&UFS_SB(sb)->s_lock);
+ unlock_ufs(sb);
UFSD("EXIT (FAILED)\n");
return 0;
}
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 98f7211599ff..a9cc75ffa925 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -69,11 +69,11 @@ void ufs_free_inode (struct inode * inode)
ino = inode->i_ino;
- mutex_lock(&UFS_SB(sb)->s_lock);
+ lock_ufs(sb);
if (!((ino > 1) && (ino < (uspi->s_ncg * uspi->s_ipg )))) {
ufs_warning(sb, "ufs_free_inode", "reserved inode or nonexistent inode %u\n", ino);
- mutex_unlock(&UFS_SB(sb)->s_lock);
+ unlock_ufs(sb);
return;
}
@@ -81,7 +81,7 @@ void ufs_free_inode (struct inode * inode)
bit = ufs_inotocgoff (ino);
ucpi = ufs_load_cylinder (sb, cg);
if (!ucpi) {
- mutex_unlock(&UFS_SB(sb)->s_lock);
+ unlock_ufs(sb);
return;
}
ucg = ubh_get_ucg(UCPI_UBH(ucpi));
@@ -115,7 +115,7 @@ void ufs_free_inode (struct inode * inode)
ubh_sync_block(UCPI_UBH(ucpi));
ufs_mark_sb_dirty(sb);
- mutex_unlock(&UFS_SB(sb)->s_lock);
+ unlock_ufs(sb);
UFSD("EXIT\n");
}
@@ -193,7 +193,7 @@ struct inode *ufs_new_inode(struct inode *dir, umode_t mode)
sbi = UFS_SB(sb);
uspi = sbi->s_uspi;
- mutex_lock(&sbi->s_lock);
+ lock_ufs(sb);
/*
* Try to place the inode in its parent directory
@@ -328,21 +328,20 @@ cg_found:
sync_dirty_buffer(bh);
brelse(bh);
}
-
- mutex_unlock(&sbi->s_lock);
+ unlock_ufs(sb);
UFSD("allocating inode %lu\n", inode->i_ino);
UFSD("EXIT\n");
return inode;
fail_remove_inode:
- mutex_unlock(&sbi->s_lock);
+ unlock_ufs(sb);
clear_nlink(inode);
iput(inode);
UFSD("EXIT (FAILED): err %d\n", err);
return ERR_PTR(err);
failed:
- mutex_unlock(&sbi->s_lock);
+ unlock_ufs(sb);
make_bad_inode(inode);
iput (inode);
UFSD("EXIT (FAILED): err %d\n", err);
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index c1183f9f69dc..b879f1ba3439 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -697,7 +697,6 @@ static int ufs_sync_fs(struct super_block *sb, int wait)
unsigned flags;
lock_ufs(sb);
- mutex_lock(&UFS_SB(sb)->s_lock);
UFSD("ENTER\n");
@@ -715,7 +714,6 @@ static int ufs_sync_fs(struct super_block *sb, int wait)
ufs_put_cstotal(sb);
UFSD("EXIT\n");
- mutex_unlock(&UFS_SB(sb)->s_lock);
unlock_ufs(sb);
return 0;
@@ -760,6 +758,7 @@ static void ufs_put_super(struct super_block *sb)
ubh_brelse_uspi (sbi->s_uspi);
kfree (sbi->s_uspi);
+ mutex_destroy(&sbi->mutex);
kfree (sbi);
sb->s_fs_info = NULL;
UFSD("EXIT\n");
@@ -786,6 +785,14 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
flags = 0;
UFSD("ENTER\n");
+
+#ifndef CONFIG_UFS_FS_WRITE
+ if (!(sb->s_flags & MS_RDONLY)) {
+ printk("ufs was compiled with read-only support, "
+ "can't be mounted as read-write\n");
+ return -EROFS;
+ }
+#endif
sbi = kzalloc(sizeof(struct ufs_sb_info), GFP_KERNEL);
if (!sbi)
@@ -795,15 +802,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
UFSD("flag %u\n", (int)(sb->s_flags & MS_RDONLY));
-#ifndef CONFIG_UFS_FS_WRITE
- if (!(sb->s_flags & MS_RDONLY)) {
- printk("ufs was compiled with read-only support, "
- "can't be mounted as read-write\n");
- goto failed;
- }
-#endif
mutex_init(&sbi->mutex);
- mutex_init(&sbi->s_lock);
spin_lock_init(&sbi->work_lock);
INIT_DELAYED_WORK(&sbi->sync_work, delayed_sync_fs);
/*
@@ -1257,6 +1256,7 @@ magic_found:
return 0;
failed:
+ mutex_destroy(&sbi->mutex);
if (ubh)
ubh_brelse_uspi (uspi);
kfree (uspi);
@@ -1280,7 +1280,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
sync_filesystem(sb);
lock_ufs(sb);
- mutex_lock(&UFS_SB(sb)->s_lock);
uspi = UFS_SB(sb)->s_uspi;
flags = UFS_SB(sb)->s_flags;
usb1 = ubh_get_usb_first(uspi);
@@ -1294,7 +1293,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
new_mount_opt = 0;
ufs_set_opt (new_mount_opt, ONERROR_LOCK);
if (!ufs_parse_options (data, &new_mount_opt)) {
- mutex_unlock(&UFS_SB(sb)->s_lock);
unlock_ufs(sb);
return -EINVAL;
}
@@ -1302,14 +1300,12 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
new_mount_opt |= ufstype;
} else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) {
printk("ufstype can't be changed during remount\n");
- mutex_unlock(&UFS_SB(sb)->s_lock);
unlock_ufs(sb);
return -EINVAL;
}
if ((*mount_flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
UFS_SB(sb)->s_mount_opt = new_mount_opt;
- mutex_unlock(&UFS_SB(sb)->s_lock);
unlock_ufs(sb);
return 0;
}
@@ -1334,7 +1330,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
#ifndef CONFIG_UFS_FS_WRITE
printk("ufs was compiled with read-only support, "
"can't be mounted as read-write\n");
- mutex_unlock(&UFS_SB(sb)->s_lock);
unlock_ufs(sb);
return -EINVAL;
#else
@@ -1344,13 +1339,11 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
ufstype != UFS_MOUNT_UFSTYPE_SUNx86 &&
ufstype != UFS_MOUNT_UFSTYPE_UFS2) {
printk("this ufstype is read-only supported\n");
- mutex_unlock(&UFS_SB(sb)->s_lock);
unlock_ufs(sb);
return -EINVAL;
}
if (!ufs_read_cylinder_structures(sb)) {
printk("failed during remounting\n");
- mutex_unlock(&UFS_SB(sb)->s_lock);
unlock_ufs(sb);
return -EPERM;
}
@@ -1358,7 +1351,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
#endif
}
UFS_SB(sb)->s_mount_opt = new_mount_opt;
- mutex_unlock(&UFS_SB(sb)->s_lock);
unlock_ufs(sb);
return 0;
}
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index ff2c15ab81aa..343e6fc571e5 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -24,7 +24,6 @@ struct ufs_sb_info {
int work_queued; /* non-zero if the delayed work is queued */
struct delayed_work sync_work; /* FS sync delayed work */
spinlock_t work_lock; /* protects sync_work and work_queued */
- struct mutex s_lock;
};
struct ufs_inode_info {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 338e6f758c6d..1cab2f8148c0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2408,8 +2408,12 @@ extern int sb_min_blocksize(struct super_block *, int);
extern int generic_file_mmap(struct file *, struct vm_area_struct *);
extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
-extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr,
- unsigned long size, pgoff_t pgoff);
+static inline int generic_file_remap_pages(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long size, pgoff_t pgoff)
+{
+ BUG();
+ return 0;
+}
int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
diff --git a/include/linux/key.h b/include/linux/key.h
index cd0abb8c9c33..017b0826642f 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -343,7 +343,7 @@ do { \
} while (0)
#ifdef CONFIG_SYSCTL
-extern ctl_table key_sysctls[];
+extern struct ctl_table key_sysctls[];
#endif
/*
* the userspace interface
diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h
index 5bb424659c04..057e95971014 100644
--- a/include/linux/kmemleak.h
+++ b/include/linux/kmemleak.h
@@ -30,6 +30,7 @@ extern void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size) __ref;
extern void kmemleak_free(const void *ptr) __ref;
extern void kmemleak_free_part(const void *ptr, size_t size) __ref;
extern void kmemleak_free_percpu(const void __percpu *ptr) __ref;
+extern void kmemleak_update_trace(const void *ptr) __ref;
extern void kmemleak_not_leak(const void *ptr) __ref;
extern void kmemleak_ignore(const void *ptr) __ref;
extern void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) __ref;
@@ -83,6 +84,9 @@ static inline void kmemleak_free_recursive(const void *ptr, unsigned long flags)
static inline void kmemleak_free_percpu(const void __percpu *ptr)
{
}
+static inline void kmemleak_update_trace(const void *ptr)
+{
+}
static inline void kmemleak_not_leak(const void *ptr)
{
}
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index eb65d29516ca..a5cf853129ec 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -92,6 +92,10 @@ bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
bool task_in_mem_cgroup(struct task_struct *task,
const struct mem_cgroup *memcg);
+extern bool mem_cgroup_within_guarantee(struct mem_cgroup *memcg,
+ struct mem_cgroup *root);
+extern bool mem_cgroup_all_within_guarantee(struct mem_cgroup *root);
+
extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page);
extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
@@ -288,6 +292,16 @@ static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page,
return &zone->lruvec;
}
+static inline bool mem_cgroup_within_guarantee(struct mem_cgroup *memcg,
+ struct mem_cgroup *root)
+{
+ return false;
+}
+static inline bool mem_cgroup_all_within_guarantee(struct mem_cgroup *root)
+{
+ return false;
+}
+
static inline struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
{
return NULL;
diff --git a/include/linux/mfd/samsung/rtc.h b/include/linux/mfd/samsung/rtc.h
index 3e02b768d537..b6401e7661c7 100644
--- a/include/linux/mfd/samsung/rtc.h
+++ b/include/linux/mfd/samsung/rtc.h
@@ -18,38 +18,38 @@
#ifndef __LINUX_MFD_SEC_RTC_H
#define __LINUX_MFD_SEC_RTC_H
-enum sec_rtc_reg {
- SEC_RTC_SEC,
- SEC_RTC_MIN,
- SEC_RTC_HOUR,
- SEC_RTC_WEEKDAY,
- SEC_RTC_DATE,
- SEC_RTC_MONTH,
- SEC_RTC_YEAR1,
- SEC_RTC_YEAR2,
- SEC_ALARM0_SEC,
- SEC_ALARM0_MIN,
- SEC_ALARM0_HOUR,
- SEC_ALARM0_WEEKDAY,
- SEC_ALARM0_DATE,
- SEC_ALARM0_MONTH,
- SEC_ALARM0_YEAR1,
- SEC_ALARM0_YEAR2,
- SEC_ALARM1_SEC,
- SEC_ALARM1_MIN,
- SEC_ALARM1_HOUR,
- SEC_ALARM1_WEEKDAY,
- SEC_ALARM1_DATE,
- SEC_ALARM1_MONTH,
- SEC_ALARM1_YEAR1,
- SEC_ALARM1_YEAR2,
- SEC_ALARM0_CONF,
- SEC_ALARM1_CONF,
- SEC_RTC_STATUS,
- SEC_WTSR_SMPL_CNTL,
- SEC_RTC_UDR_CON,
+enum s5m_rtc_reg {
+ S5M_RTC_SEC,
+ S5M_RTC_MIN,
+ S5M_RTC_HOUR,
+ S5M_RTC_WEEKDAY,
+ S5M_RTC_DATE,
+ S5M_RTC_MONTH,
+ S5M_RTC_YEAR1,
+ S5M_RTC_YEAR2,
+ S5M_ALARM0_SEC,
+ S5M_ALARM0_MIN,
+ S5M_ALARM0_HOUR,
+ S5M_ALARM0_WEEKDAY,
+ S5M_ALARM0_DATE,
+ S5M_ALARM0_MONTH,
+ S5M_ALARM0_YEAR1,
+ S5M_ALARM0_YEAR2,
+ S5M_ALARM1_SEC,
+ S5M_ALARM1_MIN,
+ S5M_ALARM1_HOUR,
+ S5M_ALARM1_WEEKDAY,
+ S5M_ALARM1_DATE,
+ S5M_ALARM1_MONTH,
+ S5M_ALARM1_YEAR1,
+ S5M_ALARM1_YEAR2,
+ S5M_ALARM0_CONF,
+ S5M_ALARM1_CONF,
+ S5M_RTC_STATUS,
+ S5M_WTSR_SMPL_CNTL,
+ S5M_RTC_UDR_CON,
- SEC_RTC_REG_MAX,
+ S5M_RTC_REG_MAX,
};
enum s2mps_rtc_reg {
@@ -88,9 +88,9 @@ enum s2mps_rtc_reg {
#define HOUR_12 (1 << 7)
#define HOUR_AMPM (1 << 6)
#define HOUR_PM (1 << 5)
-#define ALARM0_STATUS (1 << 1)
-#define ALARM1_STATUS (1 << 2)
-#define UPDATE_AD (1 << 0)
+#define S5M_ALARM0_STATUS (1 << 1)
+#define S5M_ALARM1_STATUS (1 << 2)
+#define S5M_UPDATE_AD (1 << 0)
#define S2MPS_ALARM0_STATUS (1 << 2)
#define S2MPS_ALARM1_STATUS (1 << 1)
@@ -101,16 +101,26 @@ enum s2mps_rtc_reg {
#define MODEL24_SHIFT 1
#define MODEL24_MASK (1 << MODEL24_SHIFT)
/* RTC Update Register1 */
-#define RTC_UDR_SHIFT 0
-#define RTC_UDR_MASK (1 << RTC_UDR_SHIFT)
+#define S5M_RTC_UDR_SHIFT 0
+#define S5M_RTC_UDR_MASK (1 << S5M_RTC_UDR_SHIFT)
#define S2MPS_RTC_WUDR_SHIFT 4
#define S2MPS_RTC_WUDR_MASK (1 << S2MPS_RTC_WUDR_SHIFT)
#define S2MPS_RTC_RUDR_SHIFT 0
#define S2MPS_RTC_RUDR_MASK (1 << S2MPS_RTC_RUDR_SHIFT)
#define RTC_TCON_SHIFT 1
#define RTC_TCON_MASK (1 << RTC_TCON_SHIFT)
-#define RTC_TIME_EN_SHIFT 3
-#define RTC_TIME_EN_MASK (1 << RTC_TIME_EN_SHIFT)
+#define S5M_RTC_TIME_EN_SHIFT 3
+#define S5M_RTC_TIME_EN_MASK (1 << S5M_RTC_TIME_EN_SHIFT)
+/*
+ * UDR_T field in S5M_RTC_UDR_CON register determines the time needed
+ * for updating alarm and time registers. Default is 7.32 ms.
+ */
+#define S5M_RTC_UDR_T_SHIFT 6
+#define S5M_RTC_UDR_T_MASK (0x3 << S5M_RTC_UDR_T_SHIFT)
+#define S5M_RTC_UDR_T_7320_US (0x0 << S5M_RTC_UDR_T_SHIFT)
+#define S5M_RTC_UDR_T_1830_US (0x1 << S5M_RTC_UDR_T_SHIFT)
+#define S5M_RTC_UDR_T_3660_US (0x2 << S5M_RTC_UDR_T_SHIFT)
+#define S5M_RTC_UDR_T_450_US (0x3 << S5M_RTC_UDR_T_SHIFT)
/* RTC Hour register */
#define HOUR_PM_SHIFT 6
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 6a45fb583ff1..447775ee2c4b 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -32,15 +32,24 @@ static inline void touch_nmi_watchdog(void)
#ifdef arch_trigger_all_cpu_backtrace
static inline bool trigger_all_cpu_backtrace(void)
{
- arch_trigger_all_cpu_backtrace();
+ arch_trigger_all_cpu_backtrace(true);
return true;
}
+static inline bool trigger_allbutself_cpu_backtrace(void)
+{
+ arch_trigger_all_cpu_backtrace(false);
+ return true;
+}
#else
static inline bool trigger_all_cpu_backtrace(void)
{
return false;
}
+static inline bool trigger_allbutself_cpu_backtrace(void)
+{
+ return false;
+}
#endif
#ifdef CONFIG_LOCKUP_DETECTOR
@@ -48,6 +57,7 @@ int hw_nmi_is_cpu_stuck(struct pt_regs *);
u64 hw_nmi_get_sample_period(int watchdog_thresh);
extern int watchdog_user_enabled;
extern int watchdog_thresh;
+extern int sysctl_softlockup_all_cpu_backtrace;
struct ctl_table;
extern int proc_dowatchdog(struct ctl_table *, int ,
void __user *, size_t *, loff_t *);
diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index 56b7bc32db4f..b810855024f9 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -40,6 +40,11 @@ struct res_counter {
*/
unsigned long long soft_limit;
/*
+ * the limit under which the usage cannot be pushed
+ * due to external pressure.
+ */
+ unsigned long long low_limit;
+ /*
* the number of unsuccessful attempts to consume the resource
*/
unsigned long long failcnt;
@@ -88,6 +93,7 @@ enum {
RES_LIMIT,
RES_FAILCNT,
RES_SOFT_LIMIT,
+ RES_LOW_LIMIT,
};
/*
@@ -175,6 +181,28 @@ res_counter_soft_limit_excess(struct res_counter *cnt)
return excess;
}
+/**
+ * Get the difference between the usage and the low limit
+ * @cnt: The counter
+ *
+ * Returns 0 if usage is less than or equal to low limit
+ * The difference between usage and low limit, otherwise.
+ */
+static inline unsigned long long
+res_counter_low_limit_excess(struct res_counter *cnt)
+{
+ unsigned long long excess;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cnt->lock, flags);
+ if (cnt->usage <= cnt->low_limit)
+ excess = 0;
+ else
+ excess = cnt->usage - cnt->low_limit;
+ spin_unlock_irqrestore(&cnt->lock, flags);
+ return excess;
+}
+
static inline void res_counter_reset_max(struct res_counter *cnt)
{
unsigned long flags;
@@ -220,4 +248,16 @@ res_counter_set_soft_limit(struct res_counter *cnt,
return 0;
}
+static inline int
+res_counter_set_low_limit(struct res_counter *cnt,
+ unsigned long long low_limit)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cnt->lock, flags);
+ cnt->low_limit = low_limit;
+ spin_unlock_irqrestore(&cnt->lock, flags);
+ return 0;
+}
+
#endif
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 03f3b05e8ec1..091d9935c247 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -16,6 +16,7 @@
#include <linux/atomic.h>
+struct optimistic_spin_queue;
struct rw_semaphore;
#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
@@ -23,9 +24,17 @@ struct rw_semaphore;
#else
/* All arch specific implementations share the same struct */
struct rw_semaphore {
- long count;
- raw_spinlock_t wait_lock;
- struct list_head wait_list;
+ long count;
+ raw_spinlock_t wait_lock;
+ struct list_head wait_list;
+#ifdef CONFIG_SMP
+ /*
+ * Write owner. Used as a speculative check to see
+ * if the owner is running on the cpu.
+ */
+ struct task_struct *owner;
+ struct optimistic_spin_queue *osq; /* spinner MCS lock */
+#endif
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
#endif
@@ -55,11 +64,21 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem)
# define __RWSEM_DEP_MAP_INIT(lockname)
#endif
+#if defined(CONFIG_SMP) && defined(CONFIG_RWSEM_XCHGADD_ALGORITHM)
+#define __RWSEM_INITIALIZER(name) \
+ { RWSEM_UNLOCKED_VALUE, \
+ __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \
+ LIST_HEAD_INIT((name).wait_list), \
+ NULL, /* owner */ \
+ NULL /* mcs lock */ \
+ __RWSEM_DEP_MAP_INIT(name) }
+#else
#define __RWSEM_INITIALIZER(name) \
{ RWSEM_UNLOCKED_VALUE, \
__RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \
LIST_HEAD_INIT((name).wait_list) \
__RWSEM_DEP_MAP_INIT(name) }
+#endif
#define DECLARE_RWSEM(name) \
struct rw_semaphore name = __RWSEM_INITIALIZER(name)
diff --git a/init/main.c b/init/main.c
index e8ae1fef0908..bb1aed928f21 100644
--- a/init/main.c
+++ b/init/main.c
@@ -6,7 +6,7 @@
* GK 2/5/95 - Changed to support mounting root fs via NFS
* Added initrd & change_root: Werner Almesberger & Hans Lermen, Feb '96
* Moan early if gcc is old, avoiding bogus kernels - Paul Gortmaker, May '96
- * Simplified starting of init: Michael A. Griffith <grif@acm.org>
+ * Simplified starting of init: Michael A. Griffith <grif@acm.org>
*/
#define DEBUG /* Enable initcall_debug */
@@ -136,7 +136,7 @@ static char *ramdisk_execute_command;
* Used to generate warnings if static_key manipulation functions are used
* before jump_label_init is called.
*/
-bool static_key_initialized __read_mostly = false;
+bool static_key_initialized __read_mostly;
EXPORT_SYMBOL_GPL(static_key_initialized);
/*
@@ -159,8 +159,8 @@ static int __init set_reset_devices(char *str)
__setup("reset_devices", set_reset_devices);
-static const char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
-const char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
+static const char *argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
+const char *envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
static const char *panic_later, *panic_param;
extern const struct obs_kernel_param __setup_start[], __setup_end[];
@@ -199,7 +199,6 @@ static int __init obsolete_checksetup(char *line)
* still work even if initially too large, it will just take slightly longer
*/
unsigned long loops_per_jiffy = (1<<12);
-
EXPORT_SYMBOL(loops_per_jiffy);
static int __init debug_kernel(char *str)
@@ -376,8 +375,8 @@ static void __init setup_command_line(char *command_line)
initcall_command_line =
memblock_virt_alloc(strlen(boot_command_line) + 1, 0);
static_command_line = memblock_virt_alloc(strlen(command_line) + 1, 0);
- strcpy (saved_command_line, boot_command_line);
- strcpy (static_command_line, command_line);
+ strcpy(saved_command_line, boot_command_line);
+ strcpy(static_command_line, command_line);
}
/*
@@ -445,8 +444,8 @@ void __init parse_early_options(char *cmdline)
/* Arch code calls this early on, or if not, just before other parsing. */
void __init parse_early_param(void)
{
- static __initdata int done = 0;
- static __initdata char tmp_cmdline[COMMAND_LINE_SIZE];
+ static int done __initdata;
+ static char tmp_cmdline[COMMAND_LINE_SIZE] __initdata;
if (done)
return;
@@ -500,7 +499,8 @@ static void __init mm_init(void)
asmlinkage __visible void __init start_kernel(void)
{
- char * command_line, *after_dashes;
+ char *command_line;
+ char *after_dashes;
extern const struct kernel_param __start___param[], __stop___param[];
/*
@@ -572,7 +572,8 @@ asmlinkage __visible void __init start_kernel(void)
* fragile until we cpu_idle() for the first time.
*/
preempt_disable();
- if (WARN(!irqs_disabled(), "Interrupts were enabled *very* early, fixing it\n"))
+ if (WARN(!irqs_disabled(),
+ "Interrupts were enabled *very* early, fixing it\n"))
local_irq_disable();
idr_init_cache();
rcu_init();
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index 998d31b230f1..c3f0326e98db 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -18,7 +18,7 @@
#include <linux/msg.h>
#include "util.h"
-static void *get_ipc(ctl_table *table)
+static void *get_ipc(struct ctl_table *table)
{
char *which = table->data;
struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
@@ -27,7 +27,7 @@ static void *get_ipc(ctl_table *table)
}
#ifdef CONFIG_PROC_SYSCTL
-static int proc_ipc_dointvec(ctl_table *table, int write,
+static int proc_ipc_dointvec(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table ipc_table;
@@ -38,7 +38,7 @@ static int proc_ipc_dointvec(ctl_table *table, int write,
return proc_dointvec(&ipc_table, write, buffer, lenp, ppos);
}
-static int proc_ipc_dointvec_minmax(ctl_table *table, int write,
+static int proc_ipc_dointvec_minmax(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table ipc_table;
@@ -49,7 +49,7 @@ static int proc_ipc_dointvec_minmax(ctl_table *table, int write,
return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos);
}
-static int proc_ipc_dointvec_minmax_orphans(ctl_table *table, int write,
+static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ipc_namespace *ns = current->nsproxy->ipc_ns;
@@ -62,7 +62,7 @@ static int proc_ipc_dointvec_minmax_orphans(ctl_table *table, int write,
return err;
}
-static int proc_ipc_callback_dointvec_minmax(ctl_table *table, int write,
+static int proc_ipc_callback_dointvec_minmax(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table ipc_table;
@@ -85,7 +85,7 @@ static int proc_ipc_callback_dointvec_minmax(ctl_table *table, int write,
return rc;
}
-static int proc_ipc_doulongvec_minmax(ctl_table *table, int write,
+static int proc_ipc_doulongvec_minmax(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table ipc_table;
@@ -119,7 +119,7 @@ static void ipc_auto_callback(int val)
}
}
-static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
+static int proc_ipcauto_dointvec_minmax(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table ipc_table;
diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c
index 5bb8bfe67149..68d4e953762c 100644
--- a/ipc/mq_sysctl.c
+++ b/ipc/mq_sysctl.c
@@ -14,7 +14,7 @@
#include <linux/sysctl.h>
#ifdef CONFIG_PROC_SYSCTL
-static void *get_mq(ctl_table *table)
+static void *get_mq(struct ctl_table *table)
{
char *which = table->data;
struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
@@ -22,7 +22,7 @@ static void *get_mq(ctl_table *table)
return which;
}
-static int proc_mq_dointvec(ctl_table *table, int write,
+static int proc_mq_dointvec(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table mq_table;
@@ -32,7 +32,7 @@ static int proc_mq_dointvec(ctl_table *table, int write,
return proc_dointvec(&mq_table, write, buffer, lenp, ppos);
}
-static int proc_mq_dointvec_minmax(ctl_table *table, int write,
+static int proc_mq_dointvec_minmax(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table mq_table;
@@ -53,7 +53,7 @@ static int msg_max_limit_max = HARD_MSGMAX;
static int msg_maxsize_limit_min = MIN_MSGSIZEMAX;
static int msg_maxsize_limit_max = HARD_MSGSIZEMAX;
-static ctl_table mq_sysctls[] = {
+static struct ctl_table mq_sysctls[] = {
{
.procname = "queues_max",
.data = &init_ipc_ns.mq_queues_max,
@@ -100,7 +100,7 @@ static ctl_table mq_sysctls[] = {
{}
};
-static ctl_table mq_sysctl_dir[] = {
+static struct ctl_table mq_sysctl_dir[] = {
{
.procname = "mqueue",
.mode = 0555,
@@ -109,7 +109,7 @@ static ctl_table mq_sysctl_dir[] = {
{}
};
-static ctl_table mq_sysctl_root[] = {
+static struct ctl_table mq_sysctl_root[] = {
{
.procname = "fs",
.mode = 0555,
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 3214289df5a7..2ac9f133c4da 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -76,7 +76,7 @@ static bool kprobes_all_disarmed;
/* This protects kprobe_table and optimizing_list */
static DEFINE_MUTEX(kprobe_mutex);
-static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
+static DEFINE_PER_CPU(struct kprobe *, kprobe_instance);
static struct {
raw_spinlock_t lock ____cacheline_aligned_in_smp;
} kretprobe_table_locks[KPROBE_TABLE_SIZE];
@@ -297,9 +297,9 @@ static inline void reset_kprobe_instance(void)
/*
* This routine is called either:
- * - under the kprobe_mutex - during kprobe_[un]register()
- * OR
- * - with preemption disabled - from arch/xxx/kernel/kprobes.c
+ * - under the kprobe_mutex - during kprobe_[un]register()
+ * OR
+ * - with preemption disabled - from arch/xxx/kernel/kprobes.c
*/
struct kprobe *get_kprobe(void *addr)
{
@@ -567,7 +567,8 @@ static void wait_for_kprobe_optimizer(void)
{
mutex_lock(&kprobe_mutex);
- while (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list)) {
+ while (!list_empty(&optimizing_list) ||
+ !list_empty(&unoptimizing_list)) {
mutex_unlock(&kprobe_mutex);
/* this will also make optimizing_work execute immmediately */
@@ -676,8 +677,8 @@ static void reuse_unused_kprobe(struct kprobe *ap)
*/
op = container_of(ap, struct optimized_kprobe, kp);
if (unlikely(list_empty(&op->list)))
- printk(KERN_WARNING "Warning: found a stray unused "
- "aggrprobe@%p\n", ap->addr);
+ pr_warn("Warning: found a stray unused aggrprobe@%p\n",
+ ap->addr);
/* Enable the probe again */
ap->flags &= ~KPROBE_FLAG_DISABLED;
/* Optimize it again (remove from op->list) */
@@ -794,7 +795,7 @@ static void optimize_all_kprobes(void)
if (!kprobe_disabled(p))
optimize_kprobe(p);
}
- printk(KERN_INFO "Kprobes globally optimized\n");
+ pr_info("Kprobes globally optimized\n");
out:
mutex_unlock(&kprobe_mutex);
}
@@ -824,7 +825,7 @@ static void unoptimize_all_kprobes(void)
/* Wait for unoptimizing completion */
wait_for_kprobe_optimizer();
- printk(KERN_INFO "Kprobes globally unoptimized\n");
+ pr_info("Kprobes globally unoptimized\n");
}
static DEFINE_MUTEX(kprobe_sysctl_mutex);
@@ -896,7 +897,7 @@ static void __disarm_kprobe(struct kprobe *p, bool reopt)
/* There should be no unused kprobes can be reused without optimization */
static void reuse_unused_kprobe(struct kprobe *ap)
{
- printk(KERN_ERR "Error: There should be no unused kprobe here.\n");
+ pr_err("Error: There should be no unused kprobe here.\n");
BUG_ON(kprobe_unused(ap));
}
@@ -955,7 +956,8 @@ static void disarm_kprobe_ftrace(struct kprobe *p)
}
ret = ftrace_set_filter_ip(&kprobe_ftrace_ops,
(unsigned long)p->addr, 1, 0);
- WARN(ret < 0, "Failed to disarm kprobe-ftrace at %p (%d)\n", p->addr, ret);
+ WARN(ret < 0, "Failed to disarm kprobe-ftrace at %p (%d)\n",
+ p->addr, ret);
}
#else /* !CONFIG_KPROBES_ON_FTRACE */
#define prepare_kprobe(p) arch_prepare_kprobe(p)
@@ -1389,7 +1391,7 @@ static struct kprobe *__get_valid_kprobe(struct kprobe *p)
if (p != ap) {
list_for_each_entry_rcu(list_p, &ap->list, list)
if (list_p == p)
- /* kprobe p is a valid probe */
+ /* kprobe p is a valid probe */
goto valid;
return NULL;
}
@@ -2018,8 +2020,8 @@ EXPORT_SYMBOL_GPL(enable_kprobe);
void dump_kprobe(struct kprobe *kp)
{
- printk(KERN_WARNING "Dumping kprobe:\n");
- printk(KERN_WARNING "Name: %s\nAddress: %p\nOffset: %x\n",
+ pr_warn("Dumping kprobe:\n");
+ pr_warn("Name: %s\nAddress: %p\nOffset: %x\n",
kp->symbol_name, kp->addr, kp->offset);
}
NOKPROBE_SYMBOL(dump_kprobe);
@@ -2128,7 +2130,7 @@ static int __init init_kprobes(void)
kprobe_lookup_name(kretprobe_blacklist[i].name,
kretprobe_blacklist[i].addr);
if (!kretprobe_blacklist[i].addr)
- printk("kretprobe: lookup failed: %s\n",
+ pr_warn("kretprobe: lookup failed: %s\n",
kretprobe_blacklist[i].name);
}
}
@@ -2310,7 +2312,7 @@ static void arm_all_kprobes(void)
}
kprobes_all_disarmed = false;
- printk(KERN_INFO "Kprobes globally enabled\n");
+ pr_info("Kprobes globally enabled\n");
already_enabled:
mutex_unlock(&kprobe_mutex);
@@ -2332,7 +2334,7 @@ static void disarm_all_kprobes(void)
}
kprobes_all_disarmed = true;
- printk(KERN_INFO "Kprobes globally disabled\n");
+ pr_info("Kprobes globally disabled\n");
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index b4219ff87b8c..1f99664b7f87 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -5,12 +5,18 @@
*
* Writer lock-stealing by Alex Shi <alex.shi@intel.com>
* and Michel Lespinasse <walken@google.com>
+ *
+ * Optimistic spinning by Tim Chen <tim.c.chen@intel.com>
+ * and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes.
*/
#include <linux/rwsem.h>
#include <linux/sched.h>
#include <linux/init.h>
+#include <linux/sched/rt.h>
#include <linux/export.h>
+#include "mcs_spinlock.h"
+
/*
* Guide to the rw_semaphore's count field for common values.
* (32-bit case illustrated, similar for 64-bit)
@@ -60,9 +66,7 @@
*
*/
-/*
- * Initialize an rwsem:
- */
+/* initialize a rwsem */
void __init_rwsem(struct rw_semaphore *sem, const char *name,
struct lock_class_key *key)
{
@@ -76,6 +80,10 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name,
sem->count = RWSEM_UNLOCKED_VALUE;
raw_spin_lock_init(&sem->wait_lock);
INIT_LIST_HEAD(&sem->wait_list);
+#ifdef CONFIG_SMP
+ sem->owner = NULL;
+ sem->osq = NULL;
+#endif
}
EXPORT_SYMBOL(__init_rwsem);
@@ -190,7 +198,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
}
/*
- * wait for the read lock to be granted
+ * Wait for the read lock to be granted
*/
__visible
struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
@@ -237,64 +245,221 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
return sem;
}
+static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
+{
+ if (!(count & RWSEM_ACTIVE_MASK)) {
+ /* try acquiring the write lock */
+ if (sem->count == RWSEM_WAITING_BIAS &&
+ cmpxchg(&sem->count, RWSEM_WAITING_BIAS,
+ RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) {
+ if (!list_is_singular(&sem->wait_list))
+ rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
+ return true;
+ }
+ }
+ return false;
+}
+
+#ifdef CONFIG_SMP
/*
- * wait until we successfully acquire the write lock
+ * Try to acquire write lock before the writer has been put on wait queue.
+ */
+static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
+{
+ long old, count = ACCESS_ONCE(sem->count);
+
+ while (true) {
+ if (!(count == 0 || count == RWSEM_WAITING_BIAS))
+ return false;
+
+ old = cmpxchg(&sem->count, count, count + RWSEM_ACTIVE_WRITE_BIAS);
+ if (old == count)
+ return true;
+
+ count = old;
+ }
+}
+
+static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
+{
+ struct task_struct *owner;
+ bool on_cpu = true;
+
+ if (need_resched())
+ return 0;
+
+ rcu_read_lock();
+ owner = ACCESS_ONCE(sem->owner);
+ if (owner)
+ on_cpu = owner->on_cpu;
+ rcu_read_unlock();
+
+ /*
+ * If sem->owner is not set, the rwsem owner may have
+ * just acquired it and not set the owner yet or the rwsem
+ * has been released.
+ */
+ return on_cpu;
+}
+
+static inline bool owner_running(struct rw_semaphore *sem,
+ struct task_struct *owner)
+{
+ if (sem->owner != owner)
+ return false;
+
+ /*
+ * Ensure we emit the owner->on_cpu, dereference _after_ checking
+ * sem->owner still matches owner, if that fails, owner might
+ * point to free()d memory, if it still matches, the rcu_read_lock()
+ * ensures the memory stays valid.
+ */
+ barrier();
+
+ return owner->on_cpu;
+}
+
+static noinline
+bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner)
+{
+ rcu_read_lock();
+ while (owner_running(sem, owner)) {
+ if (need_resched())
+ break;
+
+ arch_mutex_cpu_relax();
+ }
+ rcu_read_unlock();
+
+ /*
+ * We break out the loop above on need_resched() or when the
+ * owner changed, which is a sign for heavy contention. Return
+ * success only when sem->owner is NULL.
+ */
+ return sem->owner == NULL;
+}
+
+static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
+{
+ struct task_struct *owner;
+ bool taken = false;
+
+ preempt_disable();
+
+ /* sem->wait_lock should not be held when doing optimistic spinning */
+ if (!rwsem_can_spin_on_owner(sem))
+ goto done;
+
+ if (!osq_lock(&sem->osq))
+ goto done;
+
+ while (true) {
+ owner = ACCESS_ONCE(sem->owner);
+ if (owner && !rwsem_spin_on_owner(sem, owner))
+ break;
+
+ /* wait_lock will be acquired if write_lock is obtained */
+ if (rwsem_try_write_lock_unqueued(sem)) {
+ taken = true;
+ break;
+ }
+
+ /*
+ * When there's no owner, we might have preempted between the
+ * owner acquiring the lock and setting the owner field. If
+ * we're an RT task that will live-lock because we won't let
+ * the owner complete.
+ */
+ if (!owner && (need_resched() || rt_task(current)))
+ break;
+
+ /*
+ * The cpu_relax() call is a compiler barrier which forces
+ * everything in this loop to be re-loaded. We don't need
+ * memory barriers as we'll eventually observe the right
+ * values at the cost of a few extra spins.
+ */
+ arch_mutex_cpu_relax();
+ }
+ osq_unlock(&sem->osq);
+done:
+ preempt_enable();
+ return taken;
+}
+
+#else
+static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
+{
+ return false;
+}
+#endif
+
+/*
+ * Wait until we successfully acquire the write lock
*/
__visible
struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
{
- long count, adjustment = -RWSEM_ACTIVE_WRITE_BIAS;
+ long count;
+ bool waiting = true; /* any queued threads before us */
struct rwsem_waiter waiter;
- struct task_struct *tsk = current;
- /* set up my own style of waitqueue */
- waiter.task = tsk;
+ /* undo write bias from down_write operation, stop active locking */
+ count = rwsem_atomic_update(-RWSEM_ACTIVE_WRITE_BIAS, sem);
+
+ /* do optimistic spinning and steal lock if possible */
+ if (rwsem_optimistic_spin(sem))
+ return sem;
+
+ /*
+ * Optimistic spinning failed, proceed to the slowpath
+ * and block until we can acquire the sem.
+ */
+ waiter.task = current;
waiter.type = RWSEM_WAITING_FOR_WRITE;
raw_spin_lock_irq(&sem->wait_lock);
+
+ /* account for this before adding a new element to the list */
if (list_empty(&sem->wait_list))
- adjustment += RWSEM_WAITING_BIAS;
+ waiting = false;
+
list_add_tail(&waiter.list, &sem->wait_list);
/* we're now waiting on the lock, but no longer actively locking */
- count = rwsem_atomic_update(adjustment, sem);
+ if (waiting) {
+ count = ACCESS_ONCE(sem->count);
- /* If there were already threads queued before us and there are no
- * active writers, the lock must be read owned; so we try to wake
- * any read locks that were queued ahead of us. */
- if (count > RWSEM_WAITING_BIAS &&
- adjustment == -RWSEM_ACTIVE_WRITE_BIAS)
- sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS);
+ /*
+ * If there were already threads queued before us and there are
+ * no active writers, the lock must be read owned; so we try to
+ * wake any read locks that were queued ahead of us.
+ */
+ if (count > RWSEM_WAITING_BIAS)
+ sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS);
+
+ } else
+ count = rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
/* wait until we successfully acquire the lock */
- set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ set_current_state(TASK_UNINTERRUPTIBLE);
while (true) {
- if (!(count & RWSEM_ACTIVE_MASK)) {
- /* Try acquiring the write lock. */
- count = RWSEM_ACTIVE_WRITE_BIAS;
- if (!list_is_singular(&sem->wait_list))
- count += RWSEM_WAITING_BIAS;
-
- if (sem->count == RWSEM_WAITING_BIAS &&
- cmpxchg(&sem->count, RWSEM_WAITING_BIAS, count) ==
- RWSEM_WAITING_BIAS)
- break;
- }
-
+ if (rwsem_try_write_lock(count, sem))
+ break;
raw_spin_unlock_irq(&sem->wait_lock);
/* Block until there are no active lockers. */
do {
schedule();
- set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ set_current_state(TASK_UNINTERRUPTIBLE);
} while ((count = sem->count) & RWSEM_ACTIVE_MASK);
raw_spin_lock_irq(&sem->wait_lock);
}
+ __set_current_state(TASK_RUNNING);
list_del(&waiter.list);
raw_spin_unlock_irq(&sem->wait_lock);
- tsk->state = TASK_RUNNING;
return sem;
}
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index cfff1435bdfb..42f806de49d4 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -12,6 +12,27 @@
#include <linux/atomic.h>
+#if defined(CONFIG_SMP) && defined(CONFIG_RWSEM_XCHGADD_ALGORITHM)
+static inline void rwsem_set_owner(struct rw_semaphore *sem)
+{
+ sem->owner = current;
+}
+
+static inline void rwsem_clear_owner(struct rw_semaphore *sem)
+{
+ sem->owner = NULL;
+}
+
+#else
+static inline void rwsem_set_owner(struct rw_semaphore *sem)
+{
+}
+
+static inline void rwsem_clear_owner(struct rw_semaphore *sem)
+{
+}
+#endif
+
/*
* lock for reading
*/
@@ -48,6 +69,7 @@ void __sched down_write(struct rw_semaphore *sem)
rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
+ rwsem_set_owner(sem);
}
EXPORT_SYMBOL(down_write);
@@ -59,8 +81,11 @@ int down_write_trylock(struct rw_semaphore *sem)
{
int ret = __down_write_trylock(sem);
- if (ret == 1)
+ if (ret == 1) {
rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_);
+ rwsem_set_owner(sem);
+ }
+
return ret;
}
@@ -85,6 +110,7 @@ void up_write(struct rw_semaphore *sem)
{
rwsem_release(&sem->dep_map, 1, _RET_IP_);
+ rwsem_clear_owner(sem);
__up_write(sem);
}
@@ -99,6 +125,7 @@ void downgrade_write(struct rw_semaphore *sem)
* lockdep: a downgraded write will live on as a write
* dependency.
*/
+ rwsem_clear_owner(sem);
__downgrade_write(sem);
}
@@ -122,6 +149,7 @@ void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_);
LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
+ rwsem_set_owner(sem);
}
EXPORT_SYMBOL(_down_write_nest_lock);
@@ -141,6 +169,7 @@ void down_write_nested(struct rw_semaphore *sem, int subclass)
rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
+ rwsem_set_owner(sem);
}
EXPORT_SYMBOL(down_write_nested);
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index e791130f85a7..85ea94305015 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -136,6 +136,8 @@ res_counter_member(struct res_counter *counter, int member)
return &counter->failcnt;
case RES_SOFT_LIMIT:
return &counter->soft_limit;
+ case RES_LOW_LIMIT:
+ return &counter->low_limit;
};
BUG();
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 156ae0104cba..da51641b6841 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -198,7 +198,7 @@ static int proc_dostring_coredump(struct ctl_table *table, int write,
/* Note: sysrq code uses it's own private copy */
static int __sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE;
-static int sysrq_sysctl_handler(ctl_table *table, int write,
+static int sysrq_sysctl_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
@@ -861,6 +861,17 @@ static struct ctl_table kern_table[] = {
.extra1 = &zero,
.extra2 = &one,
},
+#ifdef CONFIG_SMP
+ {
+ .procname = "softlockup_all_cpu_backtrace",
+ .data = &sysctl_softlockup_all_cpu_backtrace,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+#endif /* CONFIG_SMP */
{
.procname = "nmi_watchdog",
.data = &watchdog_user_enabled,
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c
index 6fbe811c7ad1..c8eac43267e9 100644
--- a/kernel/utsname_sysctl.c
+++ b/kernel/utsname_sysctl.c
@@ -17,7 +17,7 @@
#ifdef CONFIG_PROC_SYSCTL
-static void *get_uts(ctl_table *table, int write)
+static void *get_uts(struct ctl_table *table, int write)
{
char *which = table->data;
struct uts_namespace *uts_ns;
@@ -32,7 +32,7 @@ static void *get_uts(ctl_table *table, int write)
return which;
}
-static void put_uts(ctl_table *table, int write, void *which)
+static void put_uts(struct ctl_table *table, int write, void *which)
{
if (!write)
up_read(&uts_sem);
@@ -44,7 +44,7 @@ static void put_uts(ctl_table *table, int write, void *which)
* Special case of dostring for the UTS structure. This has locks
* to observe. Should this be in kernel/sys.c ????
*/
-static int proc_do_uts_string(ctl_table *table, int write,
+static int proc_do_uts_string(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table uts_table;
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 516203e665fc..5d2c48f5083b 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -31,6 +31,12 @@
int watchdog_user_enabled = 1;
int __read_mostly watchdog_thresh = 10;
+#ifdef CONFIG_SMP
+int __read_mostly sysctl_softlockup_all_cpu_backtrace;
+#else
+#define sysctl_softlockup_all_cpu_backtrace 0
+#endif
+
static int __read_mostly watchdog_running;
static u64 __read_mostly sample_period;
@@ -47,6 +53,7 @@ static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
#endif
+static unsigned long soft_lockup_nmi_warn;
/* boot commands */
/*
@@ -95,6 +102,15 @@ static int __init nosoftlockup_setup(char *str)
}
__setup("nosoftlockup", nosoftlockup_setup);
/* */
+#ifdef CONFIG_SMP
+static int __init softlockup_all_cpu_backtrace_setup(char *str)
+{
+ sysctl_softlockup_all_cpu_backtrace =
+ !!simple_strtol(str, NULL, 0);
+ return 1;
+}
+__setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
+#endif
/*
* Hard-lockup warnings should be triggered after just a few seconds. Soft-
@@ -244,9 +260,11 @@ static void watchdog_overflow_callback(struct perf_event *event,
return;
if (hardlockup_panic)
- panic("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
+ panic("Watchdog detected hard LOCKUP on cpu %d",
+ this_cpu);
else
- WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu);
+ WARN(1, "Watchdog detected hard LOCKUP on cpu %d",
+ this_cpu);
__this_cpu_write(hard_watchdog_warn, true);
return;
@@ -271,6 +289,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
struct pt_regs *regs = get_irq_regs();
int duration;
+ int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
/* kick the hardlockup detector */
watchdog_interrupt_count();
@@ -317,7 +336,18 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
if (__this_cpu_read(soft_watchdog_warn) == true)
return HRTIMER_RESTART;
- printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
+ if (softlockup_all_cpu_backtrace) {
+ /* Prevent multiple soft-lockup reports if one cpu is already
+ * engaged in dumping cpu back traces
+ */
+ if (test_and_set_bit(0, &soft_lockup_nmi_warn)) {
+ /* Someone else will report us. Let's give up */
+ __this_cpu_write(soft_watchdog_warn, true);
+ return HRTIMER_RESTART;
+ }
+ }
+
+ pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
smp_processor_id(), duration,
current->comm, task_pid_nr(current));
print_modules();
@@ -327,6 +357,17 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
else
dump_stack();
+ if (softlockup_all_cpu_backtrace) {
+ /* Avoid generating two back traces for current
+ * given that one is already made above
+ */
+ trigger_allbutself_cpu_backtrace();
+
+ clear_bit(0, &soft_lockup_nmi_warn);
+ /* Barrier to sync with other cpus */
+ smp_mb__after_atomic();
+ }
+
if (softlockup_panic)
panic("softlockup: hung tasks");
__this_cpu_write(soft_watchdog_warn, true);
@@ -445,7 +486,7 @@ static int watchdog_nmi_enable(unsigned int cpu)
if (PTR_ERR(event) == -EOPNOTSUPP)
pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu);
else if (PTR_ERR(event) == -ENOENT)
- pr_warning("disabled (cpu%i): hardware events not enabled\n",
+ pr_warn("disabled (cpu%i): hardware events not enabled\n",
cpu);
else
pr_err("disabled (cpu%i): unable to create perf event: %ld\n",
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index d64815651e90..3291a8e37490 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -27,6 +27,7 @@
#include <linux/radix-tree.h>
#include <linux/percpu.h>
#include <linux/slab.h>
+#include <linux/kmemleak.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/string.h>
@@ -200,6 +201,11 @@ radix_tree_node_alloc(struct radix_tree_root *root)
rtp->nodes[rtp->nr - 1] = NULL;
rtp->nr--;
}
+ /*
+ * Update the allocation stack trace as this is more useful
+ * for debugging.
+ */
+ kmemleak_update_trace(ret);
}
if (ret == NULL)
ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
diff --git a/mm/Makefile b/mm/Makefile
index 4064f3ec145e..1eaa70bed7a4 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -3,7 +3,7 @@
#
mmu-y := nommu.o
-mmu-$(CONFIG_MMU) := fremap.o gup.o highmem.o madvise.o memory.o mincore.o \
+mmu-$(CONFIG_MMU) := gup.o highmem.o madvise.o memory.o mincore.o \
mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \
vmalloc.o pagewalk.o pgtable-generic.o
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 1706cbbdf5f0..382f8ece1cb2 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -234,11 +234,46 @@ static ssize_t stable_pages_required_show(struct device *dev,
}
static DEVICE_ATTR_RO(stable_pages_required);
+static ssize_t strictlimit_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct backing_dev_info *bdi = dev_get_drvdata(dev);
+ unsigned int val;
+ ssize_t ret;
+
+ ret = kstrtouint(buf, 10, &val);
+ if (ret < 0)
+ return ret;
+
+ switch (val) {
+ case 0:
+ bdi->capabilities &= ~BDI_CAP_STRICTLIMIT;
+ break;
+ case 1:
+ bdi->capabilities |= BDI_CAP_STRICTLIMIT;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return count;
+}
+static ssize_t strictlimit_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ struct backing_dev_info *bdi = dev_get_drvdata(dev);
+
+ return snprintf(page, PAGE_SIZE-1, "%d\n",
+ !!(bdi->capabilities & BDI_CAP_STRICTLIMIT));
+}
+static DEVICE_ATTR_RW(strictlimit);
+
static struct attribute *bdi_dev_attrs[] = {
&dev_attr_read_ahead_kb.attr,
&dev_attr_min_ratio.attr,
&dev_attr_max_ratio.attr,
&dev_attr_stable_pages_required.attr,
+ &dev_attr_strictlimit.attr,
NULL,
};
ATTRIBUTE_GROUPS(bdi_dev);
diff --git a/mm/fremap.c b/mm/fremap.c
deleted file mode 100644
index 2c5646f11f41..000000000000
--- a/mm/fremap.c
+++ /dev/null
@@ -1,279 +0,0 @@
-/*
- * linux/mm/fremap.c
- *
- * Explicit pagetable population and nonlinear (random) mappings support.
- *
- * started by Ingo Molnar, Copyright (C) 2002, 2003
- */
-#include <linux/export.h>
-#include <linux/backing-dev.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/file.h>
-#include <linux/mman.h>
-#include <linux/pagemap.h>
-#include <linux/swapops.h>
-#include <linux/rmap.h>
-#include <linux/syscalls.h>
-#include <linux/mmu_notifier.h>
-
-#include <asm/mmu_context.h>
-#include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
-
-#include "internal.h"
-
-static int mm_counter(struct page *page)
-{
- return PageAnon(page) ? MM_ANONPAGES : MM_FILEPAGES;
-}
-
-static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long addr, pte_t *ptep)
-{
- pte_t pte = *ptep;
- struct page *page;
- swp_entry_t entry;
-
- if (pte_present(pte)) {
- flush_cache_page(vma, addr, pte_pfn(pte));
- pte = ptep_clear_flush(vma, addr, ptep);
- page = vm_normal_page(vma, addr, pte);
- if (page) {
- if (pte_dirty(pte))
- set_page_dirty(page);
- update_hiwater_rss(mm);
- dec_mm_counter(mm, mm_counter(page));
- page_remove_rmap(page);
- page_cache_release(page);
- }
- } else { /* zap_pte() is not called when pte_none() */
- if (!pte_file(pte)) {
- update_hiwater_rss(mm);
- entry = pte_to_swp_entry(pte);
- if (non_swap_entry(entry)) {
- if (is_migration_entry(entry)) {
- page = migration_entry_to_page(entry);
- dec_mm_counter(mm, mm_counter(page));
- }
- } else {
- free_swap_and_cache(entry);
- dec_mm_counter(mm, MM_SWAPENTS);
- }
- }
- pte_clear_not_present_full(mm, addr, ptep, 0);
- }
-}
-
-/*
- * Install a file pte to a given virtual memory address, release any
- * previously existing mapping.
- */
-static int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long addr, unsigned long pgoff, pgprot_t prot)
-{
- int err = -ENOMEM;
- pte_t *pte, ptfile;
- spinlock_t *ptl;
-
- pte = get_locked_pte(mm, addr, &ptl);
- if (!pte)
- goto out;
-
- ptfile = pgoff_to_pte(pgoff);
-
- if (!pte_none(*pte))
- zap_pte(mm, vma, addr, pte);
-
- set_pte_at(mm, addr, pte, pte_file_mksoft_dirty(ptfile));
- /*
- * We don't need to run update_mmu_cache() here because the "file pte"
- * being installed by install_file_pte() is not a real pte - it's a
- * non-present entry (like a swap entry), noting what file offset should
- * be mapped there when there's a fault (in a non-linear vma where
- * that's not obvious).
- */
- pte_unmap_unlock(pte, ptl);
- err = 0;
-out:
- return err;
-}
-
-int generic_file_remap_pages(struct vm_area_struct *vma, unsigned long addr,
- unsigned long size, pgoff_t pgoff)
-{
- struct mm_struct *mm = vma->vm_mm;
- int err;
-
- do {
- err = install_file_pte(mm, vma, addr, pgoff, vma->vm_page_prot);
- if (err)
- return err;
-
- size -= PAGE_SIZE;
- addr += PAGE_SIZE;
- pgoff++;
- } while (size);
-
- return 0;
-}
-EXPORT_SYMBOL(generic_file_remap_pages);
-
-/**
- * sys_remap_file_pages - remap arbitrary pages of an existing VM_SHARED vma
- * @start: start of the remapped virtual memory range
- * @size: size of the remapped virtual memory range
- * @prot: new protection bits of the range (see NOTE)
- * @pgoff: to-be-mapped page of the backing store file
- * @flags: 0 or MAP_NONBLOCKED - the later will cause no IO.
- *
- * sys_remap_file_pages remaps arbitrary pages of an existing VM_SHARED vma
- * (shared backing store file).
- *
- * This syscall works purely via pagetables, so it's the most efficient
- * way to map the same (large) file into a given virtual window. Unlike
- * mmap()/mremap() it does not create any new vmas. The new mappings are
- * also safe across swapout.
- *
- * NOTE: the @prot parameter right now is ignored (but must be zero),
- * and the vma's default protection is used. Arbitrary protections
- * might be implemented in the future.
- */
-SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
- unsigned long, prot, unsigned long, pgoff, unsigned long, flags)
-{
- struct mm_struct *mm = current->mm;
- struct address_space *mapping;
- struct vm_area_struct *vma;
- int err = -EINVAL;
- int has_write_lock = 0;
- vm_flags_t vm_flags = 0;
-
- if (prot)
- return err;
- /*
- * Sanitize the syscall parameters:
- */
- start = start & PAGE_MASK;
- size = size & PAGE_MASK;
-
- /* Does the address range wrap, or is the span zero-sized? */
- if (start + size <= start)
- return err;
-
- /* Does pgoff wrap? */
- if (pgoff + (size >> PAGE_SHIFT) < pgoff)
- return err;
-
- /* Can we represent this offset inside this architecture's pte's? */
-#if PTE_FILE_MAX_BITS < BITS_PER_LONG
- if (pgoff + (size >> PAGE_SHIFT) >= (1UL << PTE_FILE_MAX_BITS))
- return err;
-#endif
-
- /* We need down_write() to change vma->vm_flags. */
- down_read(&mm->mmap_sem);
- retry:
- vma = find_vma(mm, start);
-
- /*
- * Make sure the vma is shared, that it supports prefaulting,
- * and that the remapped range is valid and fully within
- * the single existing vma.
- */
- if (!vma || !(vma->vm_flags & VM_SHARED))
- goto out;
-
- if (!vma->vm_ops || !vma->vm_ops->remap_pages)
- goto out;
-
- if (start < vma->vm_start || start + size > vma->vm_end)
- goto out;
-
- /* Must set VM_NONLINEAR before any pages are populated. */
- if (!(vma->vm_flags & VM_NONLINEAR)) {
- /*
- * vm_private_data is used as a swapout cursor
- * in a VM_NONLINEAR vma.
- */
- if (vma->vm_private_data)
- goto out;
-
- /* Don't need a nonlinear mapping, exit success */
- if (pgoff == linear_page_index(vma, start)) {
- err = 0;
- goto out;
- }
-
- if (!has_write_lock) {
-get_write_lock:
- up_read(&mm->mmap_sem);
- down_write(&mm->mmap_sem);
- has_write_lock = 1;
- goto retry;
- }
- mapping = vma->vm_file->f_mapping;
- /*
- * page_mkclean doesn't work on nonlinear vmas, so if
- * dirty pages need to be accounted, emulate with linear
- * vmas.
- */
- if (mapping_cap_account_dirty(mapping)) {
- unsigned long addr;
- struct file *file = get_file(vma->vm_file);
- /* mmap_region may free vma; grab the info now */
- vm_flags = vma->vm_flags;
-
- addr = mmap_region(file, start, size, vm_flags, pgoff);
- fput(file);
- if (IS_ERR_VALUE(addr)) {
- err = addr;
- } else {
- BUG_ON(addr != start);
- err = 0;
- }
- goto out_freed;
- }
- mutex_lock(&mapping->i_mmap_mutex);
- flush_dcache_mmap_lock(mapping);
- vma->vm_flags |= VM_NONLINEAR;
- vma_interval_tree_remove(vma, &mapping->i_mmap);
- vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
- flush_dcache_mmap_unlock(mapping);
- mutex_unlock(&mapping->i_mmap_mutex);
- }
-
- if (vma->vm_flags & VM_LOCKED) {
- /*
- * drop PG_Mlocked flag for over-mapped range
- */
- if (!has_write_lock)
- goto get_write_lock;
- vm_flags = vma->vm_flags;
- munlock_vma_pages_range(vma, start, start + size);
- vma->vm_flags = vm_flags;
- }
-
- mmu_notifier_invalidate_range_start(mm, start, start + size);
- err = vma->vm_ops->remap_pages(vma, start, size, pgoff);
- mmu_notifier_invalidate_range_end(mm, start, start + size);
-
- /*
- * We can't clear VM_NONLINEAR because we'd have to do
- * it after ->populate completes, and that would prevent
- * downgrading the lock. (Locks can't be upgraded).
- */
-
-out:
- if (vma)
- vm_flags = vma->vm_flags;
-out_freed:
- if (likely(!has_write_lock))
- up_read(&mm->mmap_sem);
- else
- up_write(&mm->mmap_sem);
- if (!err && ((vm_flags & VM_LOCKED) || !(flags & MAP_NONBLOCK)))
- mm_populate(start, size);
-
- return err;
-}
diff --git a/mm/kmemleak-test.c b/mm/kmemleak-test.c
index ff0d9779cec8..dcdcadb69533 100644
--- a/mm/kmemleak-test.c
+++ b/mm/kmemleak-test.c
@@ -18,6 +18,8 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#define pr_fmt(fmt) "kmemleak: " fmt
+
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
@@ -50,25 +52,25 @@ static int __init kmemleak_test_init(void)
printk(KERN_INFO "Kmemleak testing\n");
/* make some orphan objects */
- pr_info("kmemleak: kmalloc(32) = %p\n", kmalloc(32, GFP_KERNEL));
- pr_info("kmemleak: kmalloc(32) = %p\n", kmalloc(32, GFP_KERNEL));
- pr_info("kmemleak: kmalloc(1024) = %p\n", kmalloc(1024, GFP_KERNEL));
- pr_info("kmemleak: kmalloc(1024) = %p\n", kmalloc(1024, GFP_KERNEL));
- pr_info("kmemleak: kmalloc(2048) = %p\n", kmalloc(2048, GFP_KERNEL));
- pr_info("kmemleak: kmalloc(2048) = %p\n", kmalloc(2048, GFP_KERNEL));
- pr_info("kmemleak: kmalloc(4096) = %p\n", kmalloc(4096, GFP_KERNEL));
- pr_info("kmemleak: kmalloc(4096) = %p\n", kmalloc(4096, GFP_KERNEL));
+ pr_info("kmalloc(32) = %p\n", kmalloc(32, GFP_KERNEL));
+ pr_info("kmalloc(32) = %p\n", kmalloc(32, GFP_KERNEL));
+ pr_info("kmalloc(1024) = %p\n", kmalloc(1024, GFP_KERNEL));
+ pr_info("kmalloc(1024) = %p\n", kmalloc(1024, GFP_KERNEL));
+ pr_info("kmalloc(2048) = %p\n", kmalloc(2048, GFP_KERNEL));
+ pr_info("kmalloc(2048) = %p\n", kmalloc(2048, GFP_KERNEL));
+ pr_info("kmalloc(4096) = %p\n", kmalloc(4096, GFP_KERNEL));
+ pr_info("kmalloc(4096) = %p\n", kmalloc(4096, GFP_KERNEL));
#ifndef CONFIG_MODULES
- pr_info("kmemleak: kmem_cache_alloc(files_cachep) = %p\n",
+ pr_info("kmem_cache_alloc(files_cachep) = %p\n",
kmem_cache_alloc(files_cachep, GFP_KERNEL));
- pr_info("kmemleak: kmem_cache_alloc(files_cachep) = %p\n",
+ pr_info("kmem_cache_alloc(files_cachep) = %p\n",
kmem_cache_alloc(files_cachep, GFP_KERNEL));
#endif
- pr_info("kmemleak: vmalloc(64) = %p\n", vmalloc(64));
- pr_info("kmemleak: vmalloc(64) = %p\n", vmalloc(64));
- pr_info("kmemleak: vmalloc(64) = %p\n", vmalloc(64));
- pr_info("kmemleak: vmalloc(64) = %p\n", vmalloc(64));
- pr_info("kmemleak: vmalloc(64) = %p\n", vmalloc(64));
+ pr_info("vmalloc(64) = %p\n", vmalloc(64));
+ pr_info("vmalloc(64) = %p\n", vmalloc(64));
+ pr_info("vmalloc(64) = %p\n", vmalloc(64));
+ pr_info("vmalloc(64) = %p\n", vmalloc(64));
+ pr_info("vmalloc(64) = %p\n", vmalloc(64));
/*
* Add elements to a list. They should only appear as orphan
@@ -76,7 +78,7 @@ static int __init kmemleak_test_init(void)
*/
for (i = 0; i < 10; i++) {
elem = kzalloc(sizeof(*elem), GFP_KERNEL);
- pr_info("kmemleak: kzalloc(sizeof(*elem)) = %p\n", elem);
+ pr_info("kzalloc(sizeof(*elem)) = %p\n", elem);
if (!elem)
return -ENOMEM;
INIT_LIST_HEAD(&elem->list);
@@ -85,7 +87,7 @@ static int __init kmemleak_test_init(void)
for_each_possible_cpu(i) {
per_cpu(kmemleak_test_pointer, i) = kmalloc(129, GFP_KERNEL);
- pr_info("kmemleak: kmalloc(129) = %p\n",
+ pr_info("kmalloc(129) = %p\n",
per_cpu(kmemleak_test_pointer, i));
}
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 736ade31d1dc..3cda50c1e394 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -387,7 +387,7 @@ static void dump_object_info(struct kmemleak_object *object)
pr_notice(" min_count = %d\n", object->min_count);
pr_notice(" count = %d\n", object->count);
pr_notice(" flags = 0x%lx\n", object->flags);
- pr_notice(" checksum = %d\n", object->checksum);
+ pr_notice(" checksum = %u\n", object->checksum);
pr_notice(" backtrace:\n");
print_stack_trace(&trace, 4);
}
@@ -990,6 +990,40 @@ void __ref kmemleak_free_percpu(const void __percpu *ptr)
EXPORT_SYMBOL_GPL(kmemleak_free_percpu);
/**
+ * kmemleak_update_trace - update object allocation stack trace
+ * @ptr: pointer to beginning of the object
+ *
+ * Override the object allocation stack trace for cases where the actual
+ * allocation place is not always useful.
+ */
+void __ref kmemleak_update_trace(const void *ptr)
+{
+ struct kmemleak_object *object;
+ unsigned long flags;
+
+ pr_debug("%s(0x%p)\n", __func__, ptr);
+
+ if (!kmemleak_enabled || IS_ERR_OR_NULL(ptr))
+ return;
+
+ object = find_and_get_object((unsigned long)ptr, 1);
+ if (!object) {
+#ifdef DEBUG
+ kmemleak_warn("Updating stack trace for unknown object at %p\n",
+ ptr);
+#endif
+ return;
+ }
+
+ spin_lock_irqsave(&object->lock, flags);
+ object->trace_len = __save_stack_trace(object->trace);
+ spin_unlock_irqrestore(&object->lock, flags);
+
+ put_object(object);
+}
+EXPORT_SYMBOL(kmemleak_update_trace);
+
+/**
* kmemleak_not_leak - mark an allocated object as false positive
* @ptr: pointer to beginning of the object
*
diff --git a/mm/memblock.c b/mm/memblock.c
index 0aa0d2b07624..6d2f219a48b0 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -691,6 +691,7 @@ int __init_memblock memblock_free(phys_addr_t base, phys_addr_t size)
(unsigned long long)base + size - 1,
(void *)_RET_IP_);
+ kmemleak_free_part(__va(base), size);
return memblock_remove_range(&memblock.reserved, base, size);
}
@@ -1043,9 +1044,14 @@ static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
align = SMP_CACHE_BYTES;
found = memblock_find_in_range_node(size, align, start, end, nid);
- if (found && !memblock_reserve(found, size))
+ if (found && !memblock_reserve(found, size)) {
+ /*
+ * The min_count is set to 0 so that memblock allocations are
+ * never reported as leaks.
+ */
+ kmemleak_alloc(__va(found), size, 0, 0);
return found;
-
+ }
return 0;
}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 39a8e90ac78d..15bda8133ff9 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -673,9 +673,11 @@ static void disarm_static_keys(struct mem_cgroup *memcg)
static void drain_all_stock_async(struct mem_cgroup *memcg);
static struct mem_cgroup_per_zone *
-mem_cgroup_zoneinfo(struct mem_cgroup *memcg, int nid, int zid)
+mem_cgroup_zone_zoneinfo(struct mem_cgroup *memcg, struct zone *zone)
{
- VM_BUG_ON((unsigned)nid >= nr_node_ids);
+ int nid = zone_to_nid(zone);
+ int zid = zone_idx(zone);
+
return &memcg->nodeinfo[nid]->zoneinfo[zid];
}
@@ -685,12 +687,12 @@ struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg)
}
static struct mem_cgroup_per_zone *
-page_cgroup_zoneinfo(struct mem_cgroup *memcg, struct page *page)
+mem_cgroup_page_zoneinfo(struct mem_cgroup *memcg, struct page *page)
{
int nid = page_to_nid(page);
int zid = page_zonenum(page);
- return mem_cgroup_zoneinfo(memcg, nid, zid);
+ return &memcg->nodeinfo[nid]->zoneinfo[zid];
}
static struct mem_cgroup_tree_per_zone *
@@ -708,11 +710,9 @@ soft_limit_tree_from_page(struct page *page)
return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
}
-static void
-__mem_cgroup_insert_exceeded(struct mem_cgroup *memcg,
- struct mem_cgroup_per_zone *mz,
- struct mem_cgroup_tree_per_zone *mctz,
- unsigned long long new_usage_in_excess)
+static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_zone *mz,
+ struct mem_cgroup_tree_per_zone *mctz,
+ unsigned long long new_usage_in_excess)
{
struct rb_node **p = &mctz->rb_root.rb_node;
struct rb_node *parent = NULL;
@@ -742,10 +742,8 @@ __mem_cgroup_insert_exceeded(struct mem_cgroup *memcg,
mz->on_tree = true;
}
-static void
-__mem_cgroup_remove_exceeded(struct mem_cgroup *memcg,
- struct mem_cgroup_per_zone *mz,
- struct mem_cgroup_tree_per_zone *mctz)
+static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz,
+ struct mem_cgroup_tree_per_zone *mctz)
{
if (!mz->on_tree)
return;
@@ -753,13 +751,11 @@ __mem_cgroup_remove_exceeded(struct mem_cgroup *memcg,
mz->on_tree = false;
}
-static void
-mem_cgroup_remove_exceeded(struct mem_cgroup *memcg,
- struct mem_cgroup_per_zone *mz,
- struct mem_cgroup_tree_per_zone *mctz)
+static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz,
+ struct mem_cgroup_tree_per_zone *mctz)
{
spin_lock(&mctz->lock);
- __mem_cgroup_remove_exceeded(memcg, mz, mctz);
+ __mem_cgroup_remove_exceeded(mz, mctz);
spin_unlock(&mctz->lock);
}
@@ -769,16 +765,14 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
unsigned long long excess;
struct mem_cgroup_per_zone *mz;
struct mem_cgroup_tree_per_zone *mctz;
- int nid = page_to_nid(page);
- int zid = page_zonenum(page);
- mctz = soft_limit_tree_from_page(page);
+ mctz = soft_limit_tree_from_page(page);
/*
* Necessary to update all ancestors when hierarchy is used.
* because their event counter is not touched.
*/
for (; memcg; memcg = parent_mem_cgroup(memcg)) {
- mz = mem_cgroup_zoneinfo(memcg, nid, zid);
+ mz = mem_cgroup_page_zoneinfo(memcg, page);
excess = res_counter_soft_limit_excess(&memcg->res);
/*
* We have to update the tree if mz is on RB-tree or
@@ -788,12 +782,12 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
spin_lock(&mctz->lock);
/* if on-tree, remove it */
if (mz->on_tree)
- __mem_cgroup_remove_exceeded(memcg, mz, mctz);
+ __mem_cgroup_remove_exceeded(mz, mctz);
/*
* Insert again. mz->usage_in_excess will be updated.
* If excess is 0, no tree ops.
*/
- __mem_cgroup_insert_exceeded(memcg, mz, mctz, excess);
+ __mem_cgroup_insert_exceeded(mz, mctz, excess);
spin_unlock(&mctz->lock);
}
}
@@ -801,15 +795,15 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg)
{
- int node, zone;
- struct mem_cgroup_per_zone *mz;
struct mem_cgroup_tree_per_zone *mctz;
+ struct mem_cgroup_per_zone *mz;
+ int nid, zid;
- for_each_node(node) {
- for (zone = 0; zone < MAX_NR_ZONES; zone++) {
- mz = mem_cgroup_zoneinfo(memcg, node, zone);
- mctz = soft_limit_tree_node_zone(node, zone);
- mem_cgroup_remove_exceeded(memcg, mz, mctz);
+ for_each_node(nid) {
+ for (zid = 0; zid < MAX_NR_ZONES; zid++) {
+ mz = &memcg->nodeinfo[nid]->zoneinfo[zid];
+ mctz = soft_limit_tree_node_zone(nid, zid);
+ mem_cgroup_remove_exceeded(mz, mctz);
}
}
}
@@ -832,7 +826,7 @@ retry:
* we will to add it back at the end of reclaim to its correct
* position in the tree.
*/
- __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz);
+ __mem_cgroup_remove_exceeded(mz, mctz);
if (!res_counter_soft_limit_excess(&mz->memcg->res) ||
!css_tryget_online(&mz->memcg->css))
goto retry;
@@ -943,8 +937,7 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
__this_cpu_add(memcg->stat->nr_page_events, nr_pages);
}
-unsigned long
-mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
+unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
{
struct mem_cgroup_per_zone *mz;
@@ -952,46 +945,38 @@ mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
return mz->lru_size[lru];
}
-static unsigned long
-mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, int nid, int zid,
- unsigned int lru_mask)
+static unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
+ int nid,
+ unsigned int lru_mask)
{
- struct mem_cgroup_per_zone *mz;
- enum lru_list lru;
- unsigned long ret = 0;
-
- mz = mem_cgroup_zoneinfo(memcg, nid, zid);
-
- for_each_lru(lru) {
- if (BIT(lru) & lru_mask)
- ret += mz->lru_size[lru];
- }
- return ret;
-}
-
-static unsigned long
-mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
- int nid, unsigned int lru_mask)
-{
- u64 total = 0;
+ unsigned long nr = 0;
int zid;
- for (zid = 0; zid < MAX_NR_ZONES; zid++)
- total += mem_cgroup_zone_nr_lru_pages(memcg,
- nid, zid, lru_mask);
+ VM_BUG_ON((unsigned)nid >= nr_node_ids);
- return total;
+ for (zid = 0; zid < MAX_NR_ZONES; zid++) {
+ struct mem_cgroup_per_zone *mz;
+ enum lru_list lru;
+
+ for_each_lru(lru) {
+ if (!(BIT(lru) & lru_mask))
+ continue;
+ mz = &memcg->nodeinfo[nid]->zoneinfo[zid];
+ nr += mz->lru_size[lru];
+ }
+ }
+ return nr;
}
static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg,
unsigned int lru_mask)
{
+ unsigned long nr = 0;
int nid;
- u64 total = 0;
for_each_node_state(nid, N_MEMORY)
- total += mem_cgroup_node_nr_lru_pages(memcg, nid, lru_mask);
- return total;
+ nr += mem_cgroup_node_nr_lru_pages(memcg, nid, lru_mask);
+ return nr;
}
static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
@@ -1240,11 +1225,9 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
int uninitialized_var(seq);
if (reclaim) {
- int nid = zone_to_nid(reclaim->zone);
- int zid = zone_idx(reclaim->zone);
struct mem_cgroup_per_zone *mz;
- mz = mem_cgroup_zoneinfo(root, nid, zid);
+ mz = mem_cgroup_zone_zoneinfo(root, reclaim->zone);
iter = &mz->reclaim_iter[reclaim->priority];
if (prev && reclaim->generation != iter->generation) {
iter->last_visited = NULL;
@@ -1351,7 +1334,7 @@ struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone,
goto out;
}
- mz = mem_cgroup_zoneinfo(memcg, zone_to_nid(zone), zone_idx(zone));
+ mz = mem_cgroup_zone_zoneinfo(memcg, zone);
lruvec = &mz->lruvec;
out:
/*
@@ -1410,7 +1393,7 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone)
if (!PageLRU(page) && !PageCgroupUsed(pc) && memcg != root_mem_cgroup)
pc->mem_cgroup = memcg = root_mem_cgroup;
- mz = page_cgroup_zoneinfo(memcg, page);
+ mz = mem_cgroup_page_zoneinfo(memcg, page);
lruvec = &mz->lruvec;
out:
/*
@@ -1683,8 +1666,9 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
rcu_read_unlock();
- pr_info("memory: usage %llukB, limit %llukB, failcnt %llu\n",
+ pr_info("memory: usage %llukB, low_limit %llukB limit %llukB, failcnt %llu\n",
res_counter_read_u64(&memcg->res, RES_USAGE) >> 10,
+ res_counter_read_u64(&memcg->res, RES_LOW_LIMIT) >> 10,
res_counter_read_u64(&memcg->res, RES_LIMIT) >> 10,
res_counter_read_u64(&memcg->res, RES_FAILCNT));
pr_info("memory+swap: usage %llukB, limit %llukB, failcnt %llu\n",
@@ -2796,6 +2780,43 @@ static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
return mem_cgroup_from_id(id);
}
+/**
+ * mem_cgroup_within_guarantee - checks whether given memcg is within its
+ * memory guarantee
+ * @memcg: target memcg for the reclaim
+ * @root: root of the reclaim hierarchy (null for the global reclaim)
+ *
+ * The given group is within its reclaim gurantee if it is below its low limit
+ * or the same applies for any parent up the hierarchy until root (including).
+ * Such a group might be excluded from the reclaim.
+ */
+bool mem_cgroup_within_guarantee(struct mem_cgroup *memcg,
+ struct mem_cgroup *root)
+{
+ do {
+ if (!res_counter_low_limit_excess(&memcg->res))
+ return true;
+ if (memcg == root)
+ break;
+
+ } while ((memcg = parent_mem_cgroup(memcg)));
+
+ return false;
+}
+
+bool mem_cgroup_all_within_guarantee(struct mem_cgroup *root)
+{
+ struct mem_cgroup *iter;
+
+ for_each_mem_cgroup_tree(iter, root)
+ if (!mem_cgroup_within_guarantee(iter, root)) {
+ mem_cgroup_iter_break(root, iter);
+ return false;
+ }
+
+ return true;
+}
+
struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
{
struct mem_cgroup *memcg = NULL;
@@ -4595,7 +4616,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
break;
} while (1);
}
- __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz);
+ __mem_cgroup_remove_exceeded(mz, mctz);
excess = res_counter_soft_limit_excess(&mz->memcg->res);
/*
* One school of thought says that we should not add
@@ -4606,7 +4627,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
* term TODO.
*/
/* If excess == 0, no tree ops */
- __mem_cgroup_insert_exceeded(mz->memcg, mz, mctz, excess);
+ __mem_cgroup_insert_exceeded(mz, mctz, excess);
spin_unlock(&mctz->lock);
css_put(&mz->memcg->css);
loop++;
@@ -4790,6 +4811,10 @@ static ssize_t mem_cgroup_force_empty_write(struct kernfs_open_file *of,
if (mem_cgroup_is_root(memcg))
return -EINVAL;
+ pr_info_once("%s (%d): memory.force_empty is deprecated and will be "
+ "removed. Let us know if it is needed in your usecase at "
+ "linux-mm@kvack.org\n",
+ current->comm, task_pid_nr(current));
return mem_cgroup_force_empty(memcg) ?: nbytes;
}
@@ -5073,6 +5098,24 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of,
else
return -EINVAL;
break;
+ case RES_LOW_LIMIT:
+ if (mem_cgroup_is_root(memcg)) { /* Can't set limit on root */
+ ret = -EINVAL;
+ break;
+ }
+ ret = res_counter_memparse_write_strategy(buf, &val);
+ if (ret)
+ break;
+ if (type == _MEM) {
+ ret = res_counter_set_low_limit(&memcg->res, val);
+ break;
+ }
+ /*
+ * memsw low limit doesn't make any sense and kmem is not
+ * implemented yet - if ever
+ */
+ return -EINVAL;
+
case RES_SOFT_LIMIT:
ret = res_counter_memparse_write_strategy(buf, &val);
if (ret)
@@ -5310,7 +5353,7 @@ static int memcg_stat_show(struct seq_file *m, void *v)
for_each_online_node(nid)
for (zid = 0; zid < MAX_NR_ZONES; zid++) {
- mz = mem_cgroup_zoneinfo(memcg, nid, zid);
+ mz = &memcg->nodeinfo[nid]->zoneinfo[zid];
rstat = &mz->lruvec.reclaim_stat;
recent_rotated[0] += rstat->recent_rotated[0];
@@ -5999,6 +6042,12 @@ static struct cftype mem_cgroup_files[] = {
.read_u64 = mem_cgroup_read_u64,
},
{
+ .name = "low_limit_in_bytes",
+ .private = MEMFILE_PRIVATE(_MEM, RES_LOW_LIMIT),
+ .write = mem_cgroup_write,
+ .read_u64 = mem_cgroup_read_u64,
+ },
+ {
.name = "soft_limit_in_bytes",
.private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT),
.write = mem_cgroup_write,
@@ -6016,6 +6065,7 @@ static struct cftype mem_cgroup_files[] = {
},
{
.name = "force_empty",
+ .flags = CFTYPE_INSANE,
.write = mem_cgroup_force_empty_write,
},
{
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index dbde41699272..8bc119909e1c 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -65,6 +65,8 @@
kernel is not always grateful with that.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/mempolicy.h>
#include <linux/mm.h>
#include <linux/highmem.h>
@@ -91,6 +93,7 @@
#include <linux/ctype.h>
#include <linux/mm_inline.h>
#include <linux/mmu_notifier.h>
+#include <linux/printk.h>
#include <asm/tlbflush.h>
#include <asm/uaccess.h>
@@ -2592,7 +2595,7 @@ void __init numa_policy_init(void)
node_set(prefer, interleave_nodes);
if (do_set_mempolicy(MPOL_INTERLEAVE, 0, &interleave_nodes))
- printk("numa_policy_init: interleaving failed\n");
+ pr_err("%s: interleaving failed\n", __func__);
check_numabalancing_enable();
}
diff --git a/mm/mempool.c b/mm/mempool.c
index 455d468c3a5d..e209c98c7203 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -10,6 +10,7 @@
#include <linux/mm.h>
#include <linux/slab.h>
+#include <linux/kmemleak.h>
#include <linux/export.h>
#include <linux/mempool.h>
#include <linux/blkdev.h>
@@ -222,6 +223,11 @@ repeat_alloc:
spin_unlock_irqrestore(&pool->lock, flags);
/* paired with rmb in mempool_free(), read comment there */
smp_wmb();
+ /*
+ * Update the allocation stack trace as this is more useful
+ * for debugging.
+ */
+ kmemleak_update_trace(element);
return element;
}
diff --git a/mm/mmap.c b/mm/mmap.c
index ced5efcdd4b6..af82b67c9f4d 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -6,6 +6,8 @@
* Address space accounting code <alan@lxorguk.ukuu.org.uk>
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/backing-dev.h>
@@ -37,6 +39,7 @@
#include <linux/sched/sysctl.h>
#include <linux/notifier.h>
#include <linux/memory.h>
+#include <linux/printk.h>
#include <asm/uaccess.h>
#include <asm/cacheflush.h>
@@ -361,20 +364,20 @@ static int browse_rb(struct rb_root *root)
struct vm_area_struct *vma;
vma = rb_entry(nd, struct vm_area_struct, vm_rb);
if (vma->vm_start < prev) {
- printk("vm_start %lx prev %lx\n", vma->vm_start, prev);
+ pr_info("vm_start %lx prev %lx\n", vma->vm_start, prev);
bug = 1;
}
if (vma->vm_start < pend) {
- printk("vm_start %lx pend %lx\n", vma->vm_start, pend);
+ pr_info("vm_start %lx pend %lx\n", vma->vm_start, pend);
bug = 1;
}
if (vma->vm_start > vma->vm_end) {
- printk("vm_end %lx < vm_start %lx\n",
+ pr_info("vm_end %lx < vm_start %lx\n",
vma->vm_end, vma->vm_start);
bug = 1;
}
if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) {
- printk("free gap %lx, correct %lx\n",
+ pr_info("free gap %lx, correct %lx\n",
vma->rb_subtree_gap,
vma_compute_subtree_gap(vma));
bug = 1;
@@ -388,7 +391,7 @@ static int browse_rb(struct rb_root *root)
for (nd = pn; nd; nd = rb_prev(nd))
j++;
if (i != j) {
- printk("backwards %d, forwards %d\n", j, i);
+ pr_info("backwards %d, forwards %d\n", j, i);
bug = 1;
}
return bug ? -1 : i;
@@ -423,17 +426,17 @@ static void validate_mm(struct mm_struct *mm)
i++;
}
if (i != mm->map_count) {
- printk("map_count %d vm_next %d\n", mm->map_count, i);
+ pr_info("map_count %d vm_next %d\n", mm->map_count, i);
bug = 1;
}
if (highest_address != mm->highest_vm_end) {
- printk("mm->highest_vm_end %lx, found %lx\n",
+ pr_info("mm->highest_vm_end %lx, found %lx\n",
mm->highest_vm_end, highest_address);
bug = 1;
}
i = browse_rb(&mm->mm_rb);
if (i != mm->map_count) {
- printk("map_count %d rb %d\n", mm->map_count, i);
+ pr_info("map_count %d rb %d\n", mm->map_count, i);
bug = 1;
}
BUG_ON(bug);
@@ -2578,6 +2581,75 @@ SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
return vm_munmap(addr, len);
}
+
+/*
+ * Emulation of deprecated remap_file_pages() syscall.
+ */
+SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
+ unsigned long, prot, unsigned long, pgoff, unsigned long, flags)
+{
+
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ unsigned long populate = 0;
+ unsigned long ret = -EINVAL;
+ struct file *file;
+
+ pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. "
+ "See Documentation/vm/remap_file_pages.txt.\n",
+ current->comm, current->pid);
+
+ if (prot)
+ return ret;
+ start = start & PAGE_MASK;
+ size = size & PAGE_MASK;
+
+ if (start + size <= start)
+ return ret;
+
+ /* Does pgoff wrap? */
+ if (pgoff + (size >> PAGE_SHIFT) < pgoff)
+ return ret;
+
+ down_write(&mm->mmap_sem);
+ vma = find_vma(mm, start);
+
+ if (!vma || !(vma->vm_flags & VM_SHARED))
+ goto out;
+
+ if (start < vma->vm_start || start + size > vma->vm_end)
+ goto out;
+
+ if (pgoff == linear_page_index(vma, start)) {
+ ret = 0;
+ goto out;
+ }
+
+ prot |= vma->vm_flags & VM_READ ? PROT_READ : 0;
+ prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0;
+ prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0;
+
+ flags &= MAP_NONBLOCK;
+ flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE;
+ if (vma->vm_flags & VM_LOCKED) {
+ flags |= MAP_LOCKED;
+ /* drop PG_Mlocked flag for over-mapped range */
+ munlock_vma_pages_range(vma, start, start + size);
+ }
+
+ file = get_file(vma->vm_file);
+ ret = do_mmap_pgoff(vma->vm_file, start, size,
+ prot, flags, pgoff, &populate);
+ fput(file);
+out:
+ up_write(&mm->mmap_sem);
+ if (populate)
+ mm_populate(ret, populate);
+ if (!IS_ERR_VALUE(ret))
+ ret = 0;
+ return ret;
+}
+
static inline void verify_mm_writelocked(struct mm_struct *mm)
{
#ifdef CONFIG_DEBUG_VM
@@ -3280,7 +3352,7 @@ static struct notifier_block reserve_mem_nb = {
static int __meminit init_reserve_notifier(void)
{
if (register_hotmemory_notifier(&reserve_mem_nb))
- printk("Failed registering memory add/remove notifier for admin reserve");
+ pr_err("Failed registering memory add/remove notifier for admin reserve\n");
return 0;
}
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index 04a9d94333a5..7ed58602e71b 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -197,7 +197,6 @@ unsigned long __init free_all_bootmem(void)
void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
unsigned long size)
{
- kmemleak_free_part(__va(physaddr), size);
memblock_free(physaddr, size);
}
@@ -212,7 +211,6 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
*/
void __init free_bootmem(unsigned long addr, unsigned long size)
{
- kmemleak_free_part(__va(addr), size);
memblock_free(addr, size);
}
diff --git a/mm/nommu.c b/mm/nommu.c
index 85f8d6698d48..e6ced9d836dd 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -13,6 +13,8 @@
* Copyright (c) 2007-2010 Paul Mundt <lethal@linux-sh.org>
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/export.h>
#include <linux/mm.h>
#include <linux/vmacache.h>
@@ -32,6 +34,7 @@
#include <linux/syscalls.h>
#include <linux/audit.h>
#include <linux/sched/sysctl.h>
+#include <linux/printk.h>
#include <asm/uaccess.h>
#include <asm/tlb.h>
@@ -1246,7 +1249,7 @@ error_free:
return ret;
enomem:
- printk("Allocation of length %lu from process %d (%s) failed\n",
+ pr_err("Allocation of length %lu from process %d (%s) failed\n",
len, current->pid, current->comm);
show_free_areas(0);
return -ENOMEM;
@@ -1996,14 +1999,6 @@ void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf)
}
EXPORT_SYMBOL(filemap_map_pages);
-int generic_file_remap_pages(struct vm_area_struct *vma, unsigned long addr,
- unsigned long size, pgoff_t pgoff)
-{
- BUG();
- return 0;
-}
-EXPORT_SYMBOL(generic_file_remap_pages);
-
static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
unsigned long addr, void *buf, int len, int write)
{
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 537d91cd3d28..ba037e32b8f3 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1682,7 +1682,7 @@ void throttle_vm_writeout(gfp_t gfp_mask)
/*
* sysctl handler for /proc/sys/vm/dirty_writeback_centisecs
*/
-int dirty_writeback_centisecs_handler(ctl_table *table, int write,
+int dirty_writeback_centisecs_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
proc_dointvec(table, write, buffer, length, ppos);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 964130a74cff..7f9776747bde 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3389,7 +3389,7 @@ early_param("numa_zonelist_order", setup_numa_zonelist_order);
/*
* sysctl handler for numa_zonelist_order
*/
-int numa_zonelist_order_handler(ctl_table *table, int write,
+int numa_zonelist_order_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length,
loff_t *ppos)
{
@@ -5810,7 +5810,7 @@ module_init(init_per_zone_wmark_min)
* that we can call two helper functions whenever min_free_kbytes
* changes.
*/
-int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
+int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
int rc;
@@ -5827,7 +5827,7 @@ int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
}
#ifdef CONFIG_NUMA
-int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
+int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
struct zone *zone;
@@ -5843,7 +5843,7 @@ int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
return 0;
}
-int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
+int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
struct zone *zone;
@@ -5869,7 +5869,7 @@ int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
* minimum watermarks. The lowmem reserve ratio can only make sense
* if in function of the boot time zone sizes.
*/
-int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
+int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
proc_dointvec_minmax(table, write, buffer, length, ppos);
@@ -5882,7 +5882,7 @@ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
* cpu. It is the fraction of total pages in each zone that a hot per cpu
* pagelist can have before it gets flushed back to buddy allocator.
*/
-int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
+int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
struct zone *zone;
diff --git a/mm/page_io.c b/mm/page_io.c
index 243a9b76e5ce..1ee120adbb45 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -274,9 +274,11 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
.count = PAGE_SIZE,
.iov_offset = 0,
.nr_segs = 1,
- .bvec = &bv
};
+ /* Do this by hand because old gcc messes up the initializer */
+ from.bvec = &bv;
+
init_sync_kiocb(&kiocb, swap_file);
kiocb.ki_pos = page_file_offset(page);
kiocb.ki_nbytes = PAGE_SIZE;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 3c2caadbdb0f..7c4eebdc6d44 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -11,6 +11,8 @@
* Multiqueue VM started 5.8.00, Rik van Riel.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/gfp.h>
@@ -43,6 +45,7 @@
#include <linux/sysctl.h>
#include <linux/oom.h>
#include <linux/prefetch.h>
+#include <linux/printk.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -83,6 +86,9 @@ struct scan_control {
/* Scan (total_size >> priority) pages at once */
int priority;
+ /* anon vs. file LRUs scanning "ratio" */
+ int swappiness;
+
/*
* The memory cgroup that hit its limit and as a result is the
* primary target of this reclaim invocation.
@@ -477,7 +483,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
if (page_has_private(page)) {
if (try_to_free_buffers(page)) {
ClearPageDirty(page);
- printk("%s: orphaned page\n", __func__);
+ pr_info("%s: orphaned page\n", __func__);
return PAGE_CLEAN;
}
}
@@ -1845,13 +1851,6 @@ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
return shrink_inactive_list(nr_to_scan, lruvec, sc, lru);
}
-static int vmscan_swappiness(struct scan_control *sc)
-{
- if (global_reclaim(sc))
- return vm_swappiness;
- return mem_cgroup_swappiness(sc->target_mem_cgroup);
-}
-
enum scan_balance {
SCAN_EQUAL,
SCAN_FRACT,
@@ -1912,7 +1911,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
* using the memory controller's swap limit feature would be
* too expensive.
*/
- if (!global_reclaim(sc) && !vmscan_swappiness(sc)) {
+ if (!global_reclaim(sc) && !sc->swappiness) {
scan_balance = SCAN_FILE;
goto out;
}
@@ -1922,7 +1921,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
* system is close to OOM, scan both anon and file equally
* (unless the swappiness setting disagrees with swapping).
*/
- if (!sc->priority && vmscan_swappiness(sc)) {
+ if (!sc->priority && sc->swappiness) {
scan_balance = SCAN_EQUAL;
goto out;
}
@@ -1965,7 +1964,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
* With swappiness at 100, anonymous and file have the same priority.
* This scanning priority is essentially the inverse of IO cost.
*/
- anon_prio = vmscan_swappiness(sc);
+ anon_prio = sc->swappiness;
file_prio = 200 - anon_prio;
/*
@@ -2230,9 +2229,21 @@ static inline bool should_continue_reclaim(struct zone *zone,
}
}
-static void shrink_zone(struct zone *zone, struct scan_control *sc)
+/**
+ * __shrink_zone - shrinks a given zone
+ *
+ * @zone: zone to shrink
+ * @sc: scan control with additional reclaim parameters
+ * @honor_memcg_guarantee: do not reclaim memcgs which are within their memory
+ * guarantee
+ *
+ * Returns the number of reclaimed memcgs.
+ */
+static unsigned __shrink_zone(struct zone *zone, struct scan_control *sc,
+ bool honor_memcg_guarantee)
{
unsigned long nr_reclaimed, nr_scanned;
+ unsigned nr_scanned_groups = 0;
do {
struct mem_cgroup *root = sc->target_mem_cgroup;
@@ -2249,8 +2260,22 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
do {
struct lruvec *lruvec;
+ /* Memcg might be protected from the reclaim */
+ if (honor_memcg_guarantee &&
+ mem_cgroup_within_guarantee(memcg, root)) {
+ /*
+ * It would be more optimal to skip the memcg
+ * subtree now but we do not have a memcg iter
+ * helper for that. Anyone?
+ */
+ memcg = mem_cgroup_iter(root, memcg, &reclaim);
+ continue;
+ }
+
lruvec = mem_cgroup_zone_lruvec(zone, memcg);
+ nr_scanned_groups++;
+ sc->swappiness = mem_cgroup_swappiness(memcg);
shrink_lruvec(lruvec, sc);
/*
@@ -2277,6 +2302,27 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
} while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed,
sc->nr_scanned - nr_scanned, sc));
+
+ return nr_scanned_groups;
+}
+
+static void shrink_zone(struct zone *zone, struct scan_control *sc)
+{
+ bool honor_guarantee = true;
+
+ while (!__shrink_zone(zone, sc, honor_guarantee)) {
+ /*
+ * The previous round of reclaim didn't find anything to scan
+ * because
+ * a) the whole reclaimed hierarchy is within guarantee so
+ * we fallback to ignore the guarantee because other option
+ * would be the OOM
+ * b) multiple reclaimers are racing and so the first round
+ * should be retried
+ */
+ if (mem_cgroup_all_within_guarantee(sc->target_mem_cgroup))
+ honor_guarantee = false;
+ }
}
/* Returns true if compaction should go ahead for a high-order request */
@@ -2717,6 +2763,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
.may_swap = !noswap,
.order = 0,
.priority = 0,
+ .swappiness = mem_cgroup_swappiness(memcg),
.target_mem_cgroup = memcg,
};
struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);