summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2018-11-21 14:52:07 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2018-11-21 14:52:07 +1100
commitabf3208a5392a47a17711cd8b4502d5c9b07c64f (patch)
tree26022f578ed63ab05f8e3f639309888008558441
parenta1cfa885bb22689f348185f5944ebe8ba0348bf8 (diff)
parent379c80d931fec0edccb6ee39d8122b53ebb5c602 (diff)
Merge branch 'akpm-current/current'
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt3
-rw-r--r--Documentation/filesystems/proc.txt7
-rw-r--r--Documentation/process/coding-style.rst3
-rw-r--r--arch/Kconfig5
-rw-r--r--arch/alpha/include/asm/bitops.h4
-rw-r--r--arch/alpha/include/asm/pgalloc.h6
-rw-r--r--arch/arc/include/asm/bitops.h4
-rw-r--r--arch/arc/include/asm/pgalloc.h5
-rw-r--r--arch/arm/include/asm/page.h2
-rw-r--r--arch/arm/include/asm/pgalloc.h4
-rw-r--r--arch/arm64/include/asm/pgalloc.h4
-rw-r--r--arch/c6x/include/asm/bitops.h2
-rw-r--r--arch/csky/include/asm/bitops.h2
-rw-r--r--arch/csky/mm/init.c4
-rw-r--r--arch/hexagon/include/asm/bitops.h2
-rw-r--r--arch/hexagon/include/asm/pgalloc.h6
-rw-r--r--arch/ia64/include/asm/bitops.h3
-rw-r--r--arch/ia64/include/asm/pgalloc.h5
-rw-r--r--arch/m68k/include/asm/bitops.h2
-rw-r--r--arch/m68k/include/asm/mcf_pgalloc.h8
-rw-r--r--arch/m68k/include/asm/motorola_pgalloc.h4
-rw-r--r--arch/m68k/include/asm/sun3_pgalloc.h6
-rw-r--r--arch/microblaze/include/asm/pgalloc.h19
-rw-r--r--arch/microblaze/mm/pgtable.c3
-rw-r--r--arch/mips/include/asm/bitops.h2
-rw-r--r--arch/mips/include/asm/pgalloc.h6
-rw-r--r--arch/nds32/include/asm/pgalloc.h5
-rw-r--r--arch/nios2/include/asm/pgalloc.h6
-rw-r--r--arch/openrisc/include/asm/bitops/fls.h2
-rw-r--r--arch/openrisc/include/asm/pgalloc.h5
-rw-r--r--arch/openrisc/mm/ioremap.c3
-rw-r--r--arch/parisc/include/asm/bitops.h2
-rw-r--r--arch/parisc/include/asm/pgalloc.h4
-rw-r--r--arch/powerpc/include/asm/book3s/32/pgalloc.h4
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgalloc.h12
-rw-r--r--arch/powerpc/include/asm/nohash/32/pgalloc.h4
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgalloc.h6
-rw-r--r--arch/powerpc/mm/pgtable-book3s64.c2
-rw-r--r--arch/powerpc/mm/pgtable_32.c4
-rw-r--r--arch/powerpc/platforms/pseries/cmm.c10
-rw-r--r--arch/riscv/include/asm/pgalloc.h6
-rw-r--r--arch/s390/include/asm/bitops.h4
-rw-r--r--arch/s390/include/asm/pgalloc.h4
-rw-r--r--arch/s390/mm/init.c2
-rw-r--r--arch/sh/boards/mach-kfr2r09/setup.c1
-rw-r--r--arch/sh/include/asm/io.h1
-rw-r--r--arch/sh/include/asm/pgalloc.h6
-rw-r--r--arch/sparc/include/asm/pgalloc_32.h5
-rw-r--r--arch/sparc/include/asm/pgalloc_64.h6
-rw-r--r--arch/sparc/include/asm/pgtable_64.h30
-rw-r--r--arch/sparc/mm/init_64.c6
-rw-r--r--arch/sparc/mm/srmmu.c4
-rw-r--r--arch/um/include/asm/pgalloc.h4
-rw-r--r--arch/um/kernel/mem.c8
-rw-r--r--arch/unicore32/include/asm/bitops.h2
-rw-r--r--arch/unicore32/include/asm/pgalloc.h4
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/bitops.h2
-rw-r--r--arch/x86/include/asm/pgalloc.h4
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c5
-rw-r--r--arch/x86/mm/pgtable.c4
-rw-r--r--arch/xtensa/include/asm/pgalloc.h8
-rw-r--r--block/genhd.c2
-rw-r--r--drivers/char/agp/backend.c4
-rw-r--r--drivers/dma-buf/udmabuf.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c2
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c2
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_gem_gtt.c4
-rw-r--r--drivers/hv/hv_balloon.c25
-rw-r--r--drivers/md/dm-bufio.c2
-rw-r--r--drivers/md/dm-crypt.c2
-rw-r--r--drivers/md/dm-integrity.c2
-rw-r--r--drivers/md/dm-stats.c2
-rw-r--r--drivers/media/platform/mtk-vpu/mtk_vpu.c2
-rw-r--r--drivers/misc/vmw_balloon.c2
-rw-r--r--drivers/parisc/ccio-dma.c4
-rw-r--r--drivers/parisc/sba_iommu.c4
-rw-r--r--drivers/staging/android/ion/ion_system_heap.c2
-rw-r--r--drivers/xen/balloon.c23
-rw-r--r--drivers/xen/xen-selfballoon.c6
-rw-r--r--fs/bfs/bfs.h11
-rw-r--r--fs/bfs/dir.c4
-rw-r--r--fs/bfs/file.c2
-rw-r--r--fs/bfs/inode.c65
-rw-r--r--fs/binfmt_script.c10
-rw-r--r--fs/buffer.c50
-rw-r--r--fs/ceph/super.h2
-rw-r--r--fs/eventpoll.c218
-rw-r--r--fs/exec.c103
-rw-r--r--fs/file_table.c7
-rw-r--r--fs/fuse/inode.c2
-rw-r--r--fs/hfsplus/dir.c1
-rw-r--r--fs/hfsplus/hfsplus_fs.h2
-rw-r--r--fs/hfsplus/inode.c21
-rw-r--r--fs/nfs/write.c2
-rw-r--r--fs/nfsd/nfscache.c2
-rw-r--r--fs/ntfs/malloc.h2
-rw-r--r--fs/ocfs2/Makefile2
-rw-r--r--fs/ocfs2/cluster/heartbeat.c17
-rw-r--r--fs/ocfs2/dlm/Makefile2
-rw-r--r--fs/ocfs2/dlmfs/Makefile2
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c3
-rw-r--r--fs/ocfs2/localalloc.c3
-rw-r--r--fs/ocfs2/move_extents.c47
-rw-r--r--fs/proc/base.c14
-rw-r--r--fs/proc/task_mmu.c14
-rw-r--r--fs/proc/util.c1
-rw-r--r--fs/userfaultfd.c11
-rw-r--r--include/asm-generic/bitops/builtin-fls.h2
-rw-r--r--include/asm-generic/bitops/fls.h2
-rw-r--r--include/linux/binfmts.h1
-rw-r--r--include/linux/compiler-gcc.h2
-rw-r--r--include/linux/genalloc.h13
-rw-r--r--include/linux/highmem.h28
-rw-r--r--include/linux/ipc_namespace.h1
-rw-r--r--include/linux/kernel.h8
-rw-r--r--include/linux/memblock.h38
-rw-r--r--include/linux/memory_hotplug.h2
-rw-r--r--include/linux/mm.h90
-rw-r--r--include/linux/mmu_notifier.h1
-rw-r--r--include/linux/mmzone.h15
-rw-r--r--include/linux/sched.h7
-rw-r--r--include/linux/sched/task.h2
-rw-r--r--include/linux/swap.h31
-rw-r--r--include/linux/xxhash.h23
-rw-r--r--include/uapi/linux/bfs_fs.h2
-rw-r--r--include/uapi/linux/binfmts.h2
-rw-r--r--include/uapi/linux/fcntl.h1
-rw-r--r--init/initramfs.c6
-rw-r--r--init/main.c1
-rw-r--r--ipc/ipc_sysctl.c14
-rw-r--r--ipc/util.c29
-rw-r--r--ipc/util.h46
-rw-r--r--kernel/fork.c10
-rw-r--r--kernel/kexec_core.c5
-rw-r--r--kernel/power/snapshot.c2
-rw-r--r--lib/Kconfig.debug6
-rw-r--r--lib/genalloc.c20
-rw-r--r--lib/show_mem.c2
-rw-r--r--mm/Kconfig1
-rw-r--r--mm/debug.c23
-rw-r--r--mm/highmem.c5
-rw-r--r--mm/huge_memory.c10
-rw-r--r--mm/internal.h11
-rw-r--r--mm/kasan/kasan_init.c2
-rw-r--r--mm/kasan/quarantine.c2
-rw-r--r--mm/ksm.c51
-rw-r--r--mm/memblock.c69
-rw-r--r--mm/memfd.c22
-rw-r--r--mm/memory.c21
-rw-r--r--mm/memory_hotplug.c124
-rw-r--r--mm/migrate.c2
-rw-r--r--mm/mincore.c12
-rw-r--r--mm/mm_init.c2
-rw-r--r--mm/mmap.c16
-rw-r--r--mm/mmu_notifier.c7
-rw-r--r--mm/mremap.c66
-rw-r--r--mm/oom_kill.c2
-rw-r--r--mm/page_alloc.c650
-rw-r--r--mm/page_owner.c5
-rw-r--r--mm/readahead.c12
-rw-r--r--mm/shmem.c8
-rw-r--r--mm/slab.c2
-rw-r--r--mm/slab_common.c4
-rw-r--r--mm/slub.c31
-rw-r--r--mm/sparse.c9
-rw-r--r--mm/swap.c2
-rw-r--r--mm/swap_state.c16
-rw-r--r--mm/swapfile.c159
-rw-r--r--mm/truncate.c8
-rw-r--r--mm/userfaultfd.c2
-rw-r--r--mm/util.c2
-rw-r--r--mm/vmalloc.c4
-rw-r--r--mm/vmscan.c23
-rw-r--r--mm/vmstat.c4
-rw-r--r--mm/workingset.c2
-rw-r--r--mm/zswap.c4
-rw-r--r--net/dccp/proto.c7
-rw-r--r--net/decnet/dn_route.c2
-rw-r--r--net/ipv4/tcp_metrics.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c7
-rw-r--r--net/netfilter/xt_hashlimit.c5
-rw-r--r--net/sctp/protocol.c7
-rwxr-xr-xscripts/bloat-o-meter1
-rwxr-xr-xscripts/checkpatch.pl13
-rw-r--r--security/integrity/ima/ima_kexec.c2
-rw-r--r--tools/include/asm-generic/bitops/fls.h2
-rw-r--r--tools/testing/selftests/memfd/memfd_test.c74
-rw-r--r--virt/kvm/arm/mmu.c2
189 files changed, 1889 insertions, 1061 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 24fb02d7d9ce..7fae41af1e6d 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1799,6 +1799,9 @@
ip= [IP_PNP]
See Documentation/filesystems/nfs/nfsroot.txt.
+ ipcmni_extend [KNL] Extend the maximum number of unique System V
+ IPC identifiers from 32,768 to 8,388,608.
+
irqaffinity= [SMP] Set the default irq affinity mask
The argument is a cpu list, as described above.
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index af88fa238786..23605c9167dd 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -502,9 +502,14 @@ manner. The codes are the following:
sd - soft-dirty flag
mm - mixed map area
hg - huge page advise flag
- nh - no-huge page advise flag
+ nh - no-huge page advise flag [*]
mg - mergable advise flag
+ [*] A process mapping may be advised to not be backed by transparent hugepages
+ by either madvise(MADV_NOHUGEPAGE) or prctl(PR_SET_THP_DISABLE). See
+ Documentation/admin-guide/mm/transhuge.rst for system-wide and process
+ mapping policies.
+
Note that there is no guarantee that every flag and associated mnemonic will
be present in all further kernel releases. Things get changed, the flags may
be vanished or the reverse -- new added.
diff --git a/Documentation/process/coding-style.rst b/Documentation/process/coding-style.rst
index 4e7c0a1c427a..4734619d6dc1 100644
--- a/Documentation/process/coding-style.rst
+++ b/Documentation/process/coding-style.rst
@@ -443,6 +443,9 @@ In function prototypes, include parameter names with their data types.
Although this is not required by the C language, it is preferred in Linux
because it is a simple way to add valuable information for the reader.
+Do not use the `extern' keyword with function prototypes as this makes
+lines longer and isn't strictly necessary.
+
7) Centralized exiting of functions
-----------------------------------
diff --git a/arch/Kconfig b/arch/Kconfig
index e1e540ffa979..b70c952ac838 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -535,6 +535,11 @@ config HAVE_IRQ_TIME_ACCOUNTING
Archs need to ensure they use a high enough resolution clock to
support irq time accounting and then call enable_sched_clock_irqtime().
+config HAVE_MOVE_PMD
+ bool
+ help
+ Archs that select this are able to move page tables at the PMD level.
+
config HAVE_ARCH_TRANSPARENT_HUGEPAGE
bool
diff --git a/arch/alpha/include/asm/bitops.h b/arch/alpha/include/asm/bitops.h
index ca43f4d0b937..5adca78830b5 100644
--- a/arch/alpha/include/asm/bitops.h
+++ b/arch/alpha/include/asm/bitops.h
@@ -391,9 +391,9 @@ static inline unsigned long __fls(unsigned long x)
return fls64(x) - 1;
}
-static inline int fls(int x)
+static inline int fls(unsigned int x)
{
- return fls64((unsigned int) x);
+ return fls64(x);
}
/*
diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
index ab3e3a8638fb..02f9f91bb4f0 100644
--- a/arch/alpha/include/asm/pgalloc.h
+++ b/arch/alpha/include/asm/pgalloc.h
@@ -52,7 +52,7 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd)
}
static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+pte_alloc_one_kernel(struct mm_struct *mm)
{
pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
return pte;
@@ -65,9 +65,9 @@ pte_free_kernel(struct mm_struct *mm, pte_t *pte)
}
static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pte_alloc_one(struct mm_struct *mm)
{
- pte_t *pte = pte_alloc_one_kernel(mm, address);
+ pte_t *pte = pte_alloc_one_kernel(mm);
struct page *page;
if (!pte)
diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h
index 8da87feec59a..ee9246184033 100644
--- a/arch/arc/include/asm/bitops.h
+++ b/arch/arc/include/asm/bitops.h
@@ -278,7 +278,7 @@ static inline __attribute__ ((const)) int clz(unsigned int x)
return res;
}
-static inline int constant_fls(int x)
+static inline int constant_fls(unsigned int x)
{
int r = 32;
@@ -312,7 +312,7 @@ static inline int constant_fls(int x)
* @result: [1-32]
* fls(1) = 1, fls(0x80000000) = 32, fls(0) = 0
*/
-static inline __attribute__ ((const)) int fls(unsigned long x)
+static inline __attribute__ ((const)) int fls(unsigned int x)
{
if (__builtin_constant_p(x))
return constant_fls(x);
diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
index 3749234b7419..9c9b5a5ebf2e 100644
--- a/arch/arc/include/asm/pgalloc.h
+++ b/arch/arc/include/asm/pgalloc.h
@@ -90,8 +90,7 @@ static inline int __get_order_pte(void)
return get_order(PTRS_PER_PTE * sizeof(pte_t));
}
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
- unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
{
pte_t *pte;
@@ -102,7 +101,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
}
static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pte_alloc_one(struct mm_struct *mm)
{
pgtable_t pte_pg;
struct page *page;
diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h
index 4355f0ec44d6..f98baaec0a15 100644
--- a/arch/arm/include/asm/page.h
+++ b/arch/arm/include/asm/page.h
@@ -17,6 +17,8 @@
#ifndef __ASSEMBLY__
+#include <linux/personality.h> /* For READ_IMPLIES_EXEC */
+
#ifndef CONFIG_MMU
#include <asm/page-nommu.h>
diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
index 2d7344f0e208..17ab72f0cc4e 100644
--- a/arch/arm/include/asm/pgalloc.h
+++ b/arch/arm/include/asm/pgalloc.h
@@ -81,7 +81,7 @@ static inline void clean_pte_table(pte_t *pte)
* +------------+
*/
static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
{
pte_t *pte;
@@ -93,7 +93,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
}
static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one(struct mm_struct *mm)
{
struct page *pte;
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 2e05bcd944c8..52fa47c73bf0 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -91,13 +91,13 @@ extern pgd_t *pgd_alloc(struct mm_struct *mm);
extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
{
return (pte_t *)__get_free_page(PGALLOC_GFP);
}
static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one(struct mm_struct *mm)
{
struct page *pte;
diff --git a/arch/c6x/include/asm/bitops.h b/arch/c6x/include/asm/bitops.h
index f0ab012401b6..8b68234ace18 100644
--- a/arch/c6x/include/asm/bitops.h
+++ b/arch/c6x/include/asm/bitops.h
@@ -54,7 +54,7 @@ static inline unsigned long __ffs(unsigned long x)
* This is defined the same way as ffs.
* Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
*/
-static inline int fls(int x)
+static inline int fls(unsigned int x)
{
if (!x)
return 0;
diff --git a/arch/csky/include/asm/bitops.h b/arch/csky/include/asm/bitops.h
index 335f2883fb1e..43b9838bff63 100644
--- a/arch/csky/include/asm/bitops.h
+++ b/arch/csky/include/asm/bitops.h
@@ -40,7 +40,7 @@ static __always_inline unsigned long __ffs(unsigned long x)
/*
* asm-generic/bitops/fls.h
*/
-static __always_inline int fls(int x)
+static __always_inline int fls(unsigned int x)
{
asm volatile(
"ff1 %0\n"
diff --git a/arch/csky/mm/init.c b/arch/csky/mm/init.c
index dc07c078f9b8..66e597053488 100644
--- a/arch/csky/mm/init.c
+++ b/arch/csky/mm/init.c
@@ -71,7 +71,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
ClearPageReserved(virt_to_page(start));
init_page_count(virt_to_page(start));
free_page(start);
- totalram_pages++;
+ totalram_pages_inc();
}
}
#endif
@@ -88,7 +88,7 @@ void free_initmem(void)
ClearPageReserved(virt_to_page(addr));
init_page_count(virt_to_page(addr));
free_page(addr);
- totalram_pages++;
+ totalram_pages_inc();
addr += PAGE_SIZE;
}
diff --git a/arch/hexagon/include/asm/bitops.h b/arch/hexagon/include/asm/bitops.h
index 2691a1857d20..bee974262387 100644
--- a/arch/hexagon/include/asm/bitops.h
+++ b/arch/hexagon/include/asm/bitops.h
@@ -211,7 +211,7 @@ static inline long ffz(int x)
* This is defined the same way as ffs.
* Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
*/
-static inline int fls(int x)
+static inline int fls(unsigned int x)
{
int r;
diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h
index eeebf862c46c..d36183887b60 100644
--- a/arch/hexagon/include/asm/pgalloc.h
+++ b/arch/hexagon/include/asm/pgalloc.h
@@ -59,8 +59,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
free_page((unsigned long) pgd);
}
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
- unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
{
struct page *pte;
@@ -75,8 +74,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
}
/* _kernel variant gets to use a different allocator */
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
- unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
{
gfp_t flags = GFP_KERNEL | __GFP_ZERO;
return (pte_t *) __get_free_page(flags);
diff --git a/arch/ia64/include/asm/bitops.h b/arch/ia64/include/asm/bitops.h
index 56a774bf13fa..2f24ee6459d2 100644
--- a/arch/ia64/include/asm/bitops.h
+++ b/arch/ia64/include/asm/bitops.h
@@ -388,8 +388,7 @@ ia64_fls (unsigned long x)
* Find the last (most significant) bit set. Returns 0 for x==0 and
* bits are numbered from 1..32 (e.g., fls(9) == 4).
*/
-static inline int
-fls (int t)
+static inline int fls(unsigned int t)
{
unsigned long x = t & 0xffffffffu;
diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h
index 3ee5362f2661..c9e481023c25 100644
--- a/arch/ia64/include/asm/pgalloc.h
+++ b/arch/ia64/include/asm/pgalloc.h
@@ -83,7 +83,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte)
pmd_val(*pmd_entry) = __pa(pte);
}
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
{
struct page *page;
void *pg;
@@ -99,8 +99,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
return page;
}
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
- unsigned long addr)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
{
return quicklist_alloc(0, GFP_KERNEL, NULL);
}
diff --git a/arch/m68k/include/asm/bitops.h b/arch/m68k/include/asm/bitops.h
index d979f38af751..10133a968c8e 100644
--- a/arch/m68k/include/asm/bitops.h
+++ b/arch/m68k/include/asm/bitops.h
@@ -502,7 +502,7 @@ static inline unsigned long __ffs(unsigned long x)
/*
* fls: find last bit set.
*/
-static inline int fls(int x)
+static inline int fls(unsigned int x)
{
int cnt;
diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
index 12fe700632f4..4399d712f6db 100644
--- a/arch/m68k/include/asm/mcf_pgalloc.h
+++ b/arch/m68k/include/asm/mcf_pgalloc.h
@@ -12,8 +12,7 @@ extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
extern const char bad_pmd_string[];
-extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
- unsigned long address)
+extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
{
unsigned long page = __get_free_page(GFP_DMA);
@@ -32,8 +31,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
#define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
#define pmd_alloc_one(mm, address) ({ BUG(); ((pmd_t *)2); })
-#define pte_alloc_one_fast(mm, addr) pte_alloc_one(mm, addr)
-
#define pmd_populate(mm, pmd, page) (pmd_val(*pmd) = \
(unsigned long)(page_address(page)))
@@ -50,8 +47,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page,
#define __pmd_free_tlb(tlb, pmd, address) do { } while (0)
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
- unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
{
struct page *page = alloc_pages(GFP_DMA, 0);
pte_t *pte;
diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h
index 7859a86319cf..d04d9ba9b976 100644
--- a/arch/m68k/include/asm/motorola_pgalloc.h
+++ b/arch/m68k/include/asm/motorola_pgalloc.h
@@ -8,7 +8,7 @@
extern pmd_t *get_pointer_table(void);
extern int free_pointer_table(pmd_t *);
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
{
pte_t *pte;
@@ -28,7 +28,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
free_page((unsigned long) pte);
}
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
{
struct page *page;
pte_t *pte;
diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h
index 11485d38de4e..1456c5eecbd9 100644
--- a/arch/m68k/include/asm/sun3_pgalloc.h
+++ b/arch/m68k/include/asm/sun3_pgalloc.h
@@ -35,8 +35,7 @@ do { \
tlb_remove_page((tlb), pte); \
} while (0)
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
- unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
{
unsigned long page = __get_free_page(GFP_KERNEL);
@@ -47,8 +46,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
return (pte_t *) (page);
}
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
- unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
{
struct page *page = alloc_pages(GFP_KERNEL, 0);
diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
index 7c89390c0c13..f4cc9ffc449e 100644
--- a/arch/microblaze/include/asm/pgalloc.h
+++ b/arch/microblaze/include/asm/pgalloc.h
@@ -108,10 +108,9 @@ static inline void free_pgd_slow(pgd_t *pgd)
#define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
#define pmd_alloc_one(mm, address) ({ BUG(); ((pmd_t *)2); })
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
- unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
{
struct page *ptepage;
@@ -132,20 +131,6 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
return ptepage;
}
-static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm,
- unsigned long address)
-{
- unsigned long *ret;
-
- ret = pte_quicklist;
- if (ret != NULL) {
- pte_quicklist = (unsigned long *)(*ret);
- ret[0] = 0;
- pgtable_cache_size--;
- }
- return (pte_t *)ret;
-}
-
static inline void pte_free_fast(pte_t *pte)
{
*(unsigned long **)pte = pte_quicklist;
diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
index 7f525962cdfa..c2ce1e42b888 100644
--- a/arch/microblaze/mm/pgtable.c
+++ b/arch/microblaze/mm/pgtable.c
@@ -235,8 +235,7 @@ unsigned long iopa(unsigned long addr)
return pa;
}
-__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
- unsigned long address)
+__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
{
pte_t *pte;
if (mem_init_done) {
diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h
index f2a840fb6a9a..c4675957b21b 100644
--- a/arch/mips/include/asm/bitops.h
+++ b/arch/mips/include/asm/bitops.h
@@ -555,7 +555,7 @@ static inline unsigned long __ffs(unsigned long word)
* This is defined the same way as ffs.
* Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
*/
-static inline int fls(int x)
+static inline int fls(unsigned int x)
{
int r;
diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index 39b9f311c4ef..27808d9461f4 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -50,14 +50,12 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
free_pages((unsigned long)pgd, PGD_ORDER);
}
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
- unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
{
return (pte_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PTE_ORDER);
}
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
- unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
{
struct page *pte;
diff --git a/arch/nds32/include/asm/pgalloc.h b/arch/nds32/include/asm/pgalloc.h
index 27448869131a..3c5fee5b5759 100644
--- a/arch/nds32/include/asm/pgalloc.h
+++ b/arch/nds32/include/asm/pgalloc.h
@@ -22,8 +22,7 @@ extern void pgd_free(struct mm_struct *mm, pgd_t * pgd);
#define check_pgt_cache() do { } while (0)
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
- unsigned long addr)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
{
pte_t *pte;
@@ -34,7 +33,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
return pte;
}
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
{
pgtable_t pte;
diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h
index bb47d08c8ef7..3a149ead1207 100644
--- a/arch/nios2/include/asm/pgalloc.h
+++ b/arch/nios2/include/asm/pgalloc.h
@@ -37,8 +37,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
free_pages((unsigned long)pgd, PGD_ORDER);
}
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
- unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
{
pte_t *pte;
@@ -47,8 +46,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
return pte;
}
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
- unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
{
struct page *pte;
diff --git a/arch/openrisc/include/asm/bitops/fls.h b/arch/openrisc/include/asm/bitops/fls.h
index 9efbf9ad86c4..57de5a1115bf 100644
--- a/arch/openrisc/include/asm/bitops/fls.h
+++ b/arch/openrisc/include/asm/bitops/fls.h
@@ -15,7 +15,7 @@
#ifdef CONFIG_OPENRISC_HAVE_INST_FL1
-static inline int fls(int x)
+static inline int fls(unsigned int x)
{
int ret;
diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h
index 8999b9226512..149c82ee4b8b 100644
--- a/arch/openrisc/include/asm/pgalloc.h
+++ b/arch/openrisc/include/asm/pgalloc.h
@@ -70,10 +70,9 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
free_page((unsigned long)pgd);
}
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
- unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
{
struct page *pte;
pte = alloc_pages(GFP_KERNEL, 0);
diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c
index c9697529b3f0..270d1c9bc0d6 100644
--- a/arch/openrisc/mm/ioremap.c
+++ b/arch/openrisc/mm/ioremap.c
@@ -118,8 +118,7 @@ EXPORT_SYMBOL(iounmap);
* the memblock infrastructure.
*/
-pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm,
- unsigned long address)
+pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm)
{
pte_t *pte;
diff --git a/arch/parisc/include/asm/bitops.h b/arch/parisc/include/asm/bitops.h
index 53252d4f9a57..a09eaebfdfd0 100644
--- a/arch/parisc/include/asm/bitops.h
+++ b/arch/parisc/include/asm/bitops.h
@@ -188,7 +188,7 @@ static __inline__ int ffs(int x)
* fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
*/
-static __inline__ int fls(int x)
+static __inline__ int fls(unsigned int x)
{
int ret;
if (!x)
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index cf13275f7c6d..d05c678c77c4 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -122,7 +122,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
#define pmd_pgtable(pmd) pmd_page(pmd)
static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pte_alloc_one(struct mm_struct *mm)
{
struct page *page = alloc_page(GFP_KERNEL|__GFP_ZERO);
if (!page)
@@ -135,7 +135,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address)
}
static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
{
pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
return pte;
diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index 82e44b1a00ae..af9e13555d95 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -82,8 +82,8 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
#define pmd_pgtable(pmd) pmd_page(pmd)
#endif
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
-extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
+extern pgtable_t pte_alloc_one(struct mm_struct *mm);
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
{
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 391ed2c3b697..8f1d92e99fe5 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -42,7 +42,7 @@ extern struct kmem_cache *pgtable_cache[];
pgtable_cache[(shift) - 1]; \
})
-extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int);
+extern pte_t *pte_fragment_alloc(struct mm_struct *, int);
extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long);
extern void pte_fragment_free(unsigned long *, int);
extern void pmd_fragment_free(unsigned long *);
@@ -192,16 +192,14 @@ static inline pgtable_t pmd_pgtable(pmd_t pmd)
return (pgtable_t)pmd_page_vaddr(pmd);
}
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
- unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
{
- return (pte_t *)pte_fragment_alloc(mm, address, 1);
+ return (pte_t *)pte_fragment_alloc(mm, 1);
}
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
- unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
{
- return (pgtable_t)pte_fragment_alloc(mm, address, 0);
+ return (pgtable_t)pte_fragment_alloc(mm, 0);
}
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
index 8825953c225b..16623f53f0d4 100644
--- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
@@ -83,8 +83,8 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
#define pmd_pgtable(pmd) pmd_page(pmd)
#endif
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
-extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
+extern pgtable_t pte_alloc_one(struct mm_struct *mm);
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
{
diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
index e2d62d033708..2e7e0230edf4 100644
--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -96,14 +96,12 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
}
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
- unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
{
return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
}
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
- unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
{
struct page *page;
pte_t *pte;
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 9f93c9f985c5..6f9d434b0a4c 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -384,7 +384,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
return (pte_t *)ret;
}
-pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
+pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel)
{
pte_t *pte;
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index bda3c6f1bd32..1d8e2d98db98 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -43,7 +43,7 @@ EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */
extern char etext[], _stext[], _sinittext[], _einittext[];
-__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
{
pte_t *pte;
@@ -57,7 +57,7 @@ __ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
return pte;
}
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
{
struct page *ptepage;
diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c
index 25427a48feae..e8d63a6a9002 100644
--- a/arch/powerpc/platforms/pseries/cmm.c
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -208,7 +208,7 @@ static long cmm_alloc_pages(long nr)
pa->page[pa->index++] = addr;
loaned_pages++;
- totalram_pages--;
+ totalram_pages_dec();
spin_unlock(&cmm_lock);
nr--;
}
@@ -247,7 +247,7 @@ static long cmm_free_pages(long nr)
free_page(addr);
loaned_pages--;
nr--;
- totalram_pages++;
+ totalram_pages_inc();
}
spin_unlock(&cmm_lock);
cmm_dbg("End request with %ld pages unfulfilled\n", nr);
@@ -291,7 +291,7 @@ static void cmm_get_mpp(void)
int rc;
struct hvcall_mpp_data mpp_data;
signed long active_pages_target, page_loan_request, target;
- signed long total_pages = totalram_pages + loaned_pages;
+ signed long total_pages = totalram_pages() + loaned_pages;
signed long min_mem_pages = (min_mem_mb * 1024 * 1024) / PAGE_SIZE;
rc = h_get_mpp(&mpp_data);
@@ -322,7 +322,7 @@ static void cmm_get_mpp(void)
cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n",
page_loan_request, loaned_pages, loaned_pages_target,
- oom_freed_pages, totalram_pages);
+ oom_freed_pages, totalram_pages());
}
static struct notifier_block cmm_oom_nb = {
@@ -581,7 +581,7 @@ static int cmm_mem_going_offline(void *arg)
free_page(pa_curr->page[idx]);
freed++;
loaned_pages--;
- totalram_pages++;
+ totalram_pages_inc();
pa_curr->page[idx] = pa_last->page[--pa_last->index];
if (pa_last->index == 0) {
if (pa_curr == pa_last)
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index a79ed5faff3a..94043cf83c90 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -82,15 +82,13 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
#endif /* __PAGETABLE_PMD_FOLDED */
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
- unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
{
return (pte_t *)__get_free_page(
GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO);
}
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
- unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm)
{
struct page *pte;
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index 86e5b2fdee3c..d1f8a4d94cca 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -397,9 +397,9 @@ static inline int fls64(unsigned long word)
* This is defined the same way as ffs.
* Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
*/
-static inline int fls(int word)
+static inline int fls(unsigned int word)
{
- return fls64((unsigned int)word);
+ return fls64(word);
}
#else /* CONFIG_HAVE_MARCH_Z9_109_FEATURES */
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 5ee733720a57..bccb8f4a63e2 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -139,8 +139,8 @@ static inline void pmd_populate(struct mm_struct *mm,
/*
* page table entry allocation/free routines.
*/
-#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
-#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
+#define pte_alloc_one_kernel(mm) ((pte_t *)page_table_alloc(mm))
+#define pte_alloc_one(mm) ((pte_t *)page_table_alloc(mm))
#define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
#define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 76d0708438e9..50388190b393 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -59,7 +59,7 @@ static void __init setup_zero_pages(void)
order = 7;
/* Limit number of empty zero pages for small memory sizes */
- while (order > 2 && (totalram_pages >> 10) < (1UL << order))
+ while (order > 2 && (totalram_pages() >> 10) < (1UL << order))
order--;
empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
diff --git a/arch/sh/boards/mach-kfr2r09/setup.c b/arch/sh/boards/mach-kfr2r09/setup.c
index e59c577ed871..c70bc7809dda 100644
--- a/arch/sh/boards/mach-kfr2r09/setup.c
+++ b/arch/sh/boards/mach-kfr2r09/setup.c
@@ -25,7 +25,6 @@
#include <linux/memblock.h>
#include <linux/mfd/tmio.h>
#include <linux/mmc/host.h>
-#include <linux/mtd/onenand.h>
#include <linux/mtd/physmap.h>
#include <linux/platform_data/lv5207lp.h>
#include <linux/platform_device.h>
diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h
index 98cb8c802b1a..4f7f235f15f8 100644
--- a/arch/sh/include/asm/io.h
+++ b/arch/sh/include/asm/io.h
@@ -24,6 +24,7 @@
#define __IO_PREFIX generic
#include <asm/io_generic.h>
#include <asm/io_trapped.h>
+#include <asm-generic/pci_iomap.h>
#include <mach/mangle-port.h>
#define __raw_writeb(v,a) (__chk_io_ptr(a), *(volatile u8 __force *)(a) = (v))
diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h
index ed053a359ab7..8ad73cb31121 100644
--- a/arch/sh/include/asm/pgalloc.h
+++ b/arch/sh/include/asm/pgalloc.h
@@ -32,14 +32,12 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
/*
* Allocate and free page tables.
*/
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
- unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
{
return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
}
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
- unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
{
struct page *page;
void *pg;
diff --git a/arch/sparc/include/asm/pgalloc_32.h b/arch/sparc/include/asm/pgalloc_32.h
index 90459481c6c7..282be50a4adf 100644
--- a/arch/sparc/include/asm/pgalloc_32.h
+++ b/arch/sparc/include/asm/pgalloc_32.h
@@ -58,10 +58,9 @@ void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, struct page *ptep);
void pmd_set(pmd_t *pmdp, pte_t *ptep);
#define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE)
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address);
+pgtable_t pte_alloc_one(struct mm_struct *mm);
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
- unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
{
return srmmu_get_nocache(PTE_SIZE, PTE_SIZE);
}
diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h
index 874632f34f62..48abccba4991 100644
--- a/arch/sparc/include/asm/pgalloc_64.h
+++ b/arch/sparc/include/asm/pgalloc_64.h
@@ -60,10 +60,8 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
kmem_cache_free(pgtable_cache, pmd);
}
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
- unsigned long address);
-pgtable_t pte_alloc_one(struct mm_struct *mm,
- unsigned long address);
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
+pgtable_t pte_alloc_one(struct mm_struct *mm);
void pte_free_kernel(struct mm_struct *mm, pte_t *pte);
void pte_free(struct mm_struct *mm, pgtable_t ptepage);
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 1393a8ac596b..22500c3be7a9 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -231,36 +231,6 @@ extern unsigned long _PAGE_ALL_SZ_BITS;
extern struct page *mem_map_zero;
#define ZERO_PAGE(vaddr) (mem_map_zero)
-/* This macro must be updated when the size of struct page grows above 80
- * or reduces below 64.
- * The idea that compiler optimizes out switch() statement, and only
- * leaves clrx instructions
- */
-#define mm_zero_struct_page(pp) do { \
- unsigned long *_pp = (void *)(pp); \
- \
- /* Check that struct page is either 64, 72, or 80 bytes */ \
- BUILD_BUG_ON(sizeof(struct page) & 7); \
- BUILD_BUG_ON(sizeof(struct page) < 64); \
- BUILD_BUG_ON(sizeof(struct page) > 80); \
- \
- switch (sizeof(struct page)) { \
- case 80: \
- _pp[9] = 0; /* fallthrough */ \
- case 72: \
- _pp[8] = 0; /* fallthrough */ \
- default: \
- _pp[7] = 0; \
- _pp[6] = 0; \
- _pp[5] = 0; \
- _pp[4] = 0; \
- _pp[3] = 0; \
- _pp[2] = 0; \
- _pp[1] = 0; \
- _pp[0] = 0; \
- } \
-} while (0)
-
/* PFNs are real physical page numbers. However, mem_map only begins to record
* per-page information starting at pfn_base. This is to handle systems where
* the first physical page in the machine is at some huge physical address,
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 3c8aac21f426..b4221d3727d0 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2925,8 +2925,7 @@ void __flush_tlb_all(void)
: : "r" (pstate));
}
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
- unsigned long address)
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
{
struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
pte_t *pte = NULL;
@@ -2937,8 +2936,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
return pte;
}
-pgtable_t pte_alloc_one(struct mm_struct *mm,
- unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
{
struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (!page)
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index a6142c5abf61..b609362e846f 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -364,12 +364,12 @@ pgd_t *get_pgd_fast(void)
* Alignments up to the page size are the same for physical and virtual
* addresses of the nocache area.
*/
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
{
unsigned long pte;
struct page *page;
- if ((pte = (unsigned long)pte_alloc_one_kernel(mm, address)) == 0)
+ if ((pte = (unsigned long)pte_alloc_one_kernel(mm)) == 0)
return NULL;
page = pfn_to_page(__nocache_pa(pte) >> PAGE_SHIFT);
if (!pgtable_page_ctor(page)) {
diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
index bf90b2aa2002..99eb5682792a 100644
--- a/arch/um/include/asm/pgalloc.h
+++ b/arch/um/include/asm/pgalloc.h
@@ -25,8 +25,8 @@
extern pgd_t *pgd_alloc(struct mm_struct *);
extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
-extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
+extern pgtable_t pte_alloc_one(struct mm_struct *);
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
{
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 1067469ba2ea..799b571a8f88 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -51,8 +51,8 @@ void __init mem_init(void)
/* this will put all low memory onto the freelists */
memblock_free_all();
- max_low_pfn = totalram_pages;
- max_pfn = totalram_pages;
+ max_low_pfn = totalram_pages();
+ max_pfn = max_low_pfn;
mem_init_print_info(NULL);
kmalloc_ok = 1;
}
@@ -199,7 +199,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
free_page((unsigned long) pgd);
}
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
{
pte_t *pte;
@@ -207,7 +207,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
return pte;
}
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
{
struct page *pte;
diff --git a/arch/unicore32/include/asm/bitops.h b/arch/unicore32/include/asm/bitops.h
index c0cbdbe17168..de5853761c22 100644
--- a/arch/unicore32/include/asm/bitops.h
+++ b/arch/unicore32/include/asm/bitops.h
@@ -22,7 +22,7 @@
* the cntlz instruction for much better code efficiency.
*/
-static inline int fls(int x)
+static inline int fls(unsigned int x)
{
int ret;
diff --git a/arch/unicore32/include/asm/pgalloc.h b/arch/unicore32/include/asm/pgalloc.h
index f0fdb268f8f2..7cceabecf4e3 100644
--- a/arch/unicore32/include/asm/pgalloc.h
+++ b/arch/unicore32/include/asm/pgalloc.h
@@ -34,7 +34,7 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd);
* Allocate one PTE table.
*/
static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one_kernel(struct mm_struct *mm)
{
pte_t *pte;
@@ -46,7 +46,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
}
static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+pte_alloc_one(struct mm_struct *mm)
{
struct page *pte;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c467fd5aeb37..f38e57d65e35 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -173,6 +173,7 @@ config X86
select HAVE_MEMBLOCK_NODE_MAP
select HAVE_MIXED_BREAKPOINTS_REGS
select HAVE_MOD_ARCH_SPECIFIC
+ select HAVE_MOVE_PMD
select HAVE_NMI
select HAVE_OPROFILE
select HAVE_OPTPROBES
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 124f9195eb3e..ad7b210aa3f6 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -448,7 +448,7 @@ static __always_inline int ffs(int x)
* set bit if value is nonzero. The last (most significant) bit is
* at position 32.
*/
-static __always_inline int fls(int x)
+static __always_inline int fls(unsigned int x)
{
int r;
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index ec7f43327033..f6861b700f5e 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -47,8 +47,8 @@ extern gfp_t __userpte_alloc_gfp;
extern pgd_t *pgd_alloc(struct mm_struct *);
extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
-extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
+extern pgtable_t pte_alloc_one(struct mm_struct *);
/* Should really implement gc for free page table pages. This could be
done with a reference count in struct page. */
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 2637ff09d6a0..97f9ada9ceda 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -434,9 +434,10 @@ static ssize_t microcode_write(struct file *file, const char __user *buf,
size_t len, loff_t *ppos)
{
ssize_t ret = -EINVAL;
+ unsigned long nr_pages = totalram_pages();
- if ((len >> PAGE_SHIFT) > totalram_pages) {
- pr_err("too much data (max %ld pages)\n", totalram_pages);
+ if ((len >> PAGE_SHIFT) > nr_pages) {
+ pr_err("too much data (max %ld pages)\n", nr_pages);
return ret;
}
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 59274e2c1ac4..27f63a74b45d 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -23,12 +23,12 @@ EXPORT_SYMBOL(physical_mask);
gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
{
return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT);
}
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm)
{
struct page *pte;
diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h
index 1065bc8bcae5..b3b388ff2f01 100644
--- a/arch/xtensa/include/asm/pgalloc.h
+++ b/arch/xtensa/include/asm/pgalloc.h
@@ -38,8 +38,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
free_page((unsigned long)pgd);
}
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
- unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
{
pte_t *ptep;
int i;
@@ -52,13 +51,12 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
return ptep;
}
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
- unsigned long addr)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
{
pte_t *pte;
struct page *page;
- pte = pte_alloc_one_kernel(mm, addr);
+ pte = pte_alloc_one_kernel(mm);
if (!pte)
return NULL;
page = virt_to_page(pte);
diff --git a/block/genhd.c b/block/genhd.c
index 0145bcb0cc76..26f27db5cae9 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1012,7 +1012,7 @@ static int show_partition(struct seq_file *seqf, void *v)
char buf[BDEVNAME_SIZE];
/* Don't show non-partitionable removeable devices or empty devices */
- if (!get_capacity(sgp) || (!disk_max_parts(sgp) &&
+ if (!get_capacity(sgp) || (!(disk_max_parts(sgp) > 1) &&
(sgp->flags & GENHD_FL_REMOVABLE)))
return 0;
if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
diff --git a/drivers/char/agp/backend.c b/drivers/char/agp/backend.c
index 38ffb281df97..004a3ce8ba72 100644
--- a/drivers/char/agp/backend.c
+++ b/drivers/char/agp/backend.c
@@ -115,9 +115,9 @@ static int agp_find_max(void)
long memory, index, result;
#if PAGE_SHIFT < 20
- memory = totalram_pages >> (20 - PAGE_SHIFT);
+ memory = totalram_pages() >> (20 - PAGE_SHIFT);
#else
- memory = totalram_pages << (PAGE_SHIFT - 20);
+ memory = totalram_pages() << (PAGE_SHIFT - 20);
#endif
index = 1;
diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
index fc359ca4503d..cd57747286f2 100644
--- a/drivers/dma-buf/udmabuf.c
+++ b/drivers/dma-buf/udmabuf.c
@@ -20,7 +20,7 @@ struct udmabuf {
struct page **pages;
};
-static int udmabuf_vm_fault(struct vm_fault *vmf)
+static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct udmabuf *ubuf = vma->vm_private_data;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 3783d122f283..6fb9af76e1ff 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -847,7 +847,7 @@ static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size,
*/
pgdat = NODE_DATA(numa_node_id);
for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
- mem_in_bytes += pgdat->node_zones[zone_type].managed_pages;
+ mem_in_bytes += zone_managed_pages(&pgdat->node_zones[zone_type]);
mem_in_bytes <<= PAGE_SHIFT;
sub_type_hdr->length_low = lower_32_bits(mem_in_bytes);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c55b1f75c980..939f16a438a9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2559,7 +2559,7 @@ static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
* If there's no chance of allocating enough pages for the whole
* object, bail early.
*/
- if (page_count > totalram_pages)
+ if (page_count > totalram_pages())
return -ENOMEM;
st = kmalloc(sizeof(*st), GFP_KERNEL);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 69fe86b30fbb..a9ed0ecc94e2 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -170,7 +170,7 @@ static int igt_ppgtt_alloc(void *arg)
* This should ensure that we do not run into the oomkiller during
* the test and take down the machine wilfully.
*/
- limit = totalram_pages << PAGE_SHIFT;
+ limit = totalram_pages() << PAGE_SHIFT;
limit = min(ppgtt->vm.total, limit);
/* Check we can allocate the entire range */
@@ -1244,7 +1244,7 @@ static int exercise_mock(struct drm_i915_private *i915,
u64 hole_start, u64 hole_end,
unsigned long end_time))
{
- const u64 limit = totalram_pages << PAGE_SHIFT;
+ const u64 limit = totalram_pages() << PAGE_SHIFT;
struct i915_gem_context *ctx;
struct i915_hw_ppgtt *ppgtt;
IGT_TIMEOUT(end_time);
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index 41631512ae97..211f3fe3a038 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -771,7 +771,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
}
}
-static void hv_online_page(struct page *pg)
+static int hv_online_page(struct page *pg, unsigned int order)
{
struct hv_hotadd_state *has;
unsigned long flags;
@@ -783,10 +783,12 @@ static void hv_online_page(struct page *pg)
if ((pfn < has->start_pfn) || (pfn >= has->end_pfn))
continue;
- hv_page_online_one(has, pg);
+ hv_bring_pgs_online(has, pfn, (1UL << order));
break;
}
spin_unlock_irqrestore(&dm_device.ha_lock, flags);
+
+ return 0;
}
static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt)
@@ -1090,6 +1092,7 @@ static void process_info(struct hv_dynmem_device *dm, struct dm_info_msg *msg)
static unsigned long compute_balloon_floor(void)
{
unsigned long min_pages;
+ unsigned long nr_pages = totalram_pages();
#define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
/* Simple continuous piecewiese linear function:
* max MiB -> min MiB gradient
@@ -1102,16 +1105,16 @@ static unsigned long compute_balloon_floor(void)
* 8192 744 (1/16)
* 32768 1512 (1/32)
*/
- if (totalram_pages < MB2PAGES(128))
- min_pages = MB2PAGES(8) + (totalram_pages >> 1);
- else if (totalram_pages < MB2PAGES(512))
- min_pages = MB2PAGES(40) + (totalram_pages >> 2);
- else if (totalram_pages < MB2PAGES(2048))
- min_pages = MB2PAGES(104) + (totalram_pages >> 3);
- else if (totalram_pages < MB2PAGES(8192))
- min_pages = MB2PAGES(232) + (totalram_pages >> 4);
+ if (nr_pages < MB2PAGES(128))
+ min_pages = MB2PAGES(8) + (nr_pages >> 1);
+ else if (nr_pages < MB2PAGES(512))
+ min_pages = MB2PAGES(40) + (nr_pages >> 2);
+ else if (nr_pages < MB2PAGES(2048))
+ min_pages = MB2PAGES(104) + (nr_pages >> 3);
+ else if (nr_pages < MB2PAGES(8192))
+ min_pages = MB2PAGES(232) + (nr_pages >> 4);
else
- min_pages = MB2PAGES(488) + (totalram_pages >> 5);
+ min_pages = MB2PAGES(488) + (nr_pages >> 5);
#undef MB2PAGES
return min_pages;
}
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index dc385b70e4c3..8b0b628e5d1c 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -1887,7 +1887,7 @@ static int __init dm_bufio_init(void)
dm_bufio_allocated_vmalloc = 0;
dm_bufio_current_allocated = 0;
- mem = (__u64)mult_frac(totalram_pages - totalhigh_pages,
+ mem = (__u64)mult_frac(totalram_pages() - totalhigh_pages(),
DM_BUFIO_MEMORY_PERCENT, 100) << PAGE_SHIFT;
if (mem > ULONG_MAX)
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index a7195eb5b8d8..a8c32de29e3f 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -2158,7 +2158,7 @@ static int crypt_wipe_key(struct crypt_config *cc)
static void crypt_calculate_pages_per_client(void)
{
- unsigned long pages = (totalram_pages - totalhigh_pages) * DM_CRYPT_MEMORY_PERCENT / 100;
+ unsigned long pages = (totalram_pages() - totalhigh_pages()) * DM_CRYPT_MEMORY_PERCENT / 100;
if (!dm_crypt_clients_n)
return;
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index d4ad0bfee251..62baa3214cc7 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -2843,7 +2843,7 @@ static int create_journal(struct dm_integrity_c *ic, char **error)
journal_pages = roundup((__u64)ic->journal_sections * ic->journal_section_sectors,
PAGE_SIZE >> SECTOR_SHIFT) >> (PAGE_SHIFT - SECTOR_SHIFT);
journal_desc_size = journal_pages * sizeof(struct page_list);
- if (journal_pages >= totalram_pages - totalhigh_pages || journal_desc_size > ULONG_MAX) {
+ if (journal_pages >= totalram_pages() - totalhigh_pages() || journal_desc_size > ULONG_MAX) {
*error = "Journal doesn't fit into memory";
r = -ENOMEM;
goto bad;
diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c
index 21de30b4e2a1..45b92a3d9d8e 100644
--- a/drivers/md/dm-stats.c
+++ b/drivers/md/dm-stats.c
@@ -85,7 +85,7 @@ static bool __check_shared_memory(size_t alloc_size)
a = shared_memory_amount + alloc_size;
if (a < shared_memory_amount)
return false;
- if (a >> PAGE_SHIFT > totalram_pages / DM_STATS_MEMORY_FACTOR)
+ if (a >> PAGE_SHIFT > totalram_pages() / DM_STATS_MEMORY_FACTOR)
return false;
#ifdef CONFIG_MMU
if (a > (VMALLOC_END - VMALLOC_START) / DM_STATS_VMALLOC_FACTOR)
diff --git a/drivers/media/platform/mtk-vpu/mtk_vpu.c b/drivers/media/platform/mtk-vpu/mtk_vpu.c
index 616f78b24a79..b6602490a247 100644
--- a/drivers/media/platform/mtk-vpu/mtk_vpu.c
+++ b/drivers/media/platform/mtk-vpu/mtk_vpu.c
@@ -855,7 +855,7 @@ static int mtk_vpu_probe(struct platform_device *pdev)
/* Set PTCM to 96K and DTCM to 32K */
vpu_cfg_writel(vpu, 0x2, VPU_TCM_CFG);
- vpu->enable_4GB = !!(totalram_pages > (SZ_2G >> PAGE_SHIFT));
+ vpu->enable_4GB = !!(totalram_pages() > (SZ_2G >> PAGE_SHIFT));
dev_info(dev, "4GB mode %u\n", vpu->enable_4GB);
if (vpu->enable_4GB) {
diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
index 9b0b3fa4f836..e6126a4b95d3 100644
--- a/drivers/misc/vmw_balloon.c
+++ b/drivers/misc/vmw_balloon.c
@@ -570,7 +570,7 @@ static int vmballoon_send_get_target(struct vmballoon *b)
unsigned long status;
unsigned long limit;
- limit = totalram_pages;
+ limit = totalram_pages();
/* Ensure limit fits in 32-bits */
if (limit != (u32)limit)
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
index 701a7d6a74d5..358e380eb7fa 100644
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -1251,7 +1251,7 @@ ccio_ioc_init(struct ioc *ioc)
** Hot-Plug/Removal of PCI cards. (aka PCI OLARD).
*/
- iova_space_size = (u32) (totalram_pages / count_parisc_driver(&ccio_driver));
+ iova_space_size = (u32) (totalram_pages() / count_parisc_driver(&ccio_driver));
/* limit IOVA space size to 1MB-1GB */
@@ -1290,7 +1290,7 @@ ccio_ioc_init(struct ioc *ioc)
DBG_INIT("%s() hpa 0x%p mem %luMB IOV %dMB (%d bits)\n",
__func__, ioc->ioc_regs,
- (unsigned long) totalram_pages >> (20 - PAGE_SHIFT),
+ (unsigned long) totalram_pages() >> (20 - PAGE_SHIFT),
iova_space_size>>20,
iov_order + PAGE_SHIFT);
diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c
index c1e599a429af..e0655949480a 100644
--- a/drivers/parisc/sba_iommu.c
+++ b/drivers/parisc/sba_iommu.c
@@ -1414,7 +1414,7 @@ sba_ioc_init(struct parisc_device *sba, struct ioc *ioc, int ioc_num)
** for DMA hints - ergo only 30 bits max.
*/
- iova_space_size = (u32) (totalram_pages/global_ioc_cnt);
+ iova_space_size = (u32) (totalram_pages()/global_ioc_cnt);
/* limit IOVA space size to 1MB-1GB */
if (iova_space_size < (1 << (20 - PAGE_SHIFT))) {
@@ -1439,7 +1439,7 @@ sba_ioc_init(struct parisc_device *sba, struct ioc *ioc, int ioc_num)
DBG_INIT("%s() hpa 0x%lx mem %ldMB IOV %dMB (%d bits)\n",
__func__,
ioc->ioc_hpa,
- (unsigned long) totalram_pages >> (20 - PAGE_SHIFT),
+ (unsigned long) totalram_pages() >> (20 - PAGE_SHIFT),
iova_space_size>>20,
iov_order + PAGE_SHIFT);
diff --git a/drivers/staging/android/ion/ion_system_heap.c b/drivers/staging/android/ion/ion_system_heap.c
index 548bb02c0ca6..6cb0eebdff89 100644
--- a/drivers/staging/android/ion/ion_system_heap.c
+++ b/drivers/staging/android/ion/ion_system_heap.c
@@ -110,7 +110,7 @@ static int ion_system_heap_allocate(struct ion_heap *heap,
unsigned long size_remaining = PAGE_ALIGN(size);
unsigned int max_order = orders[0];
- if (size / PAGE_SIZE > totalram_pages / 2)
+ if (size / PAGE_SIZE > totalram_pages() / 2)
return -ENOMEM;
INIT_LIST_HEAD(&pages);
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index fdfc64f5acea..12148289debd 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -390,8 +390,8 @@ static enum bp_state reserve_additional_memory(void)
/*
* add_memory_resource() will call online_pages() which in its turn
- * will call xen_online_page() callback causing deadlock if we don't
- * release balloon_mutex here. Unlocking here is safe because the
+ * will call xen_bring_pgs_online() callback causing deadlock if we
+ * don't release balloon_mutex here. Unlocking here is safe because the
* callers drop the mutex before trying again.
*/
mutex_unlock(&balloon_mutex);
@@ -414,15 +414,22 @@ static enum bp_state reserve_additional_memory(void)
return BP_ECANCELED;
}
-static void xen_online_page(struct page *page)
+static int xen_bring_pgs_online(struct page *pg, unsigned int order)
{
- __online_page_set_limits(page);
+ unsigned long i, size = (1 << order);
+ unsigned long start_pfn = page_to_pfn(pg);
+ struct page *p;
+ pr_debug("Online %lu pages starting at pfn 0x%lx\n", size, start_pfn);
mutex_lock(&balloon_mutex);
-
- __balloon_append(page);
-
+ for (i = 0; i < size; i++) {
+ p = pfn_to_page(start_pfn + i);
+ __online_page_set_limits(p);
+ __balloon_append(p);
+ }
mutex_unlock(&balloon_mutex);
+
+ return 0;
}
static int xen_memory_notifier(struct notifier_block *nb, unsigned long val, void *v)
@@ -747,7 +754,7 @@ static int __init balloon_init(void)
balloon_stats.max_retry_count = RETRY_UNLIMITED;
#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
- set_online_page_callback(&xen_online_page);
+ set_online_page_callback(&xen_bring_pgs_online);
register_memory_notifier(&xen_memory_nb);
register_sysctl_table(xen_root);
diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c
index 5165aa82bf7d..246f6122c9ee 100644
--- a/drivers/xen/xen-selfballoon.c
+++ b/drivers/xen/xen-selfballoon.c
@@ -189,7 +189,7 @@ static void selfballoon_process(struct work_struct *work)
bool reset_timer = false;
if (xen_selfballooning_enabled) {
- cur_pages = totalram_pages;
+ cur_pages = totalram_pages();
tgt_pages = cur_pages; /* default is no change */
goal_pages = vm_memory_committed() +
totalreserve_pages +
@@ -227,7 +227,7 @@ static void selfballoon_process(struct work_struct *work)
if (tgt_pages < floor_pages)
tgt_pages = floor_pages;
balloon_set_new_target(tgt_pages +
- balloon_stats.current_pages - totalram_pages);
+ balloon_stats.current_pages - totalram_pages());
reset_timer = true;
}
#ifdef CONFIG_FRONTSWAP
@@ -569,7 +569,7 @@ int xen_selfballoon_init(bool use_selfballooning, bool use_frontswap_selfshrink)
* much more reliably and response faster in some cases.
*/
if (!selfballoon_reserved_mb) {
- reserve_pages = totalram_pages / 10;
+ reserve_pages = totalram_pages() / 10;
selfballoon_reserved_mb = PAGES2MB(reserve_pages);
}
schedule_delayed_work(&selfballoon_worker, selfballoon_interval * HZ);
diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h
index 67aef3bb89e4..606f9378b2f0 100644
--- a/fs/bfs/bfs.h
+++ b/fs/bfs/bfs.h
@@ -1,13 +1,20 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* fs/bfs/bfs.h
- * Copyright (C) 1999 Tigran Aivazian <tigran@veritas.com>
+ * Copyright (C) 1999-2018 Tigran Aivazian <aivazian.tigran@gmail.com>
*/
#ifndef _FS_BFS_BFS_H
#define _FS_BFS_BFS_H
#include <linux/bfs_fs.h>
+/* In theory BFS supports up to 512 inodes, numbered from 2 (for /) up to 513 inclusive.
+ In actual fact, attempting to create the 512th inode (i.e. inode No. 513 or file No. 511)
+ will fail with ENOSPC in bfs_add_entry(): the root directory cannot contain so many entries, counting '..'.
+ So, mkfs.bfs(8) should really limit its -N option to 511 and not 512. For now, we just print a warning
+ if a filesystem is mounted with such "impossible to fill up" number of inodes */
+#define BFS_MAX_LASTI 513
+
/*
* BFS file system in-core superblock info
*/
@@ -17,7 +24,7 @@ struct bfs_sb_info {
unsigned long si_freei;
unsigned long si_lf_eblk;
unsigned long si_lasti;
- unsigned long *si_imap;
+ DECLARE_BITMAP(si_imap, BFS_MAX_LASTI+1);
struct mutex bfs_lock;
};
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index f32f21c3bbc7..d8dfe3a0cb39 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -2,8 +2,8 @@
/*
* fs/bfs/dir.c
* BFS directory operations.
- * Copyright (C) 1999,2000 Tigran Aivazian <tigran@veritas.com>
- * Made endianness-clean by Andrew Stribblehill <ads@wompom.org> 2005
+ * Copyright (C) 1999-2018 Tigran Aivazian <aivazian.tigran@gmail.com>
+ * Made endianness-clean by Andrew Stribblehill <ads@wompom.org> 2005
*/
#include <linux/time.h>
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index 1476cdd90cfb..0dceefc54b48 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -2,7 +2,7 @@
/*
* fs/bfs/file.c
* BFS file operations.
- * Copyright (C) 1999,2000 Tigran Aivazian <tigran@veritas.com>
+ * Copyright (C) 1999-2018 Tigran Aivazian <aivazian.tigran@gmail.com>
*
* Make the file block allocation algorithm understand the size
* of the underlying block device.
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index f7ef2913bd9d..ea2055d78858 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -1,10 +1,9 @@
/*
* fs/bfs/inode.c
* BFS superblock and inode operations.
- * Copyright (C) 1999-2006 Tigran Aivazian <aivazian.tigran@gmail.com>
+ * Copyright (C) 1999-2018 Tigran Aivazian <aivazian.tigran@gmail.com>
* From fs/minix, Copyright (C) 1991, 1992 Linus Torvalds.
- *
- * Made endianness-clean by Andrew Stribblehill <ads@wompom.org>, 2005.
+ * Made endianness-clean by Andrew Stribblehill <ads@wompom.org>, 2005.
*/
#include <linux/module.h>
@@ -118,12 +117,12 @@ static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc)
{
struct bfs_sb_info *info = BFS_SB(inode->i_sb);
unsigned int ino = (u16)inode->i_ino;
- unsigned long i_sblock;
+ unsigned long i_sblock;
struct bfs_inode *di;
struct buffer_head *bh;
int err = 0;
- dprintf("ino=%08x\n", ino);
+ dprintf("ino=%08x\n", ino);
di = find_inode(inode->i_sb, ino, &bh);
if (IS_ERR(di))
@@ -144,7 +143,7 @@ static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc)
di->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
di->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
di->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
- i_sblock = BFS_I(inode)->i_sblock;
+ i_sblock = BFS_I(inode)->i_sblock;
di->i_sblock = cpu_to_le32(i_sblock);
di->i_eblock = cpu_to_le32(BFS_I(inode)->i_eblock);
di->i_eoffset = cpu_to_le32(i_sblock * BFS_BSIZE + inode->i_size - 1);
@@ -188,13 +187,13 @@ static void bfs_evict_inode(struct inode *inode)
mark_buffer_dirty(bh);
brelse(bh);
- if (bi->i_dsk_ino) {
+ if (bi->i_dsk_ino) {
if (bi->i_sblock)
info->si_freeb += bi->i_eblock + 1 - bi->i_sblock;
info->si_freei++;
clear_bit(ino, info->si_imap);
- bfs_dump_imap("delete_inode", s);
- }
+ bfs_dump_imap("evict_inode", s);
+ }
/*
* If this was the last file, make the previous block
@@ -214,7 +213,6 @@ static void bfs_put_super(struct super_block *s)
return;
mutex_destroy(&info->bfs_lock);
- kfree(info->si_imap);
kfree(info);
s->s_fs_info = NULL;
}
@@ -311,8 +309,7 @@ void bfs_dump_imap(const char *prefix, struct super_block *s)
else
strcat(tmpbuf, "0");
}
- printf("BFS-fs: %s: lasti=%08lx <%s>\n",
- prefix, BFS_SB(s)->si_lasti, tmpbuf);
+ printf("%s: lasti=%08lx <%s>\n", prefix, BFS_SB(s)->si_lasti, tmpbuf);
free_page((unsigned long)tmpbuf);
#endif
}
@@ -323,7 +320,7 @@ static int bfs_fill_super(struct super_block *s, void *data, size_t data_size,
struct buffer_head *bh, *sbh;
struct bfs_super_block *bfs_sb;
struct inode *inode;
- unsigned i, imap_len;
+ unsigned i;
struct bfs_sb_info *info;
int ret = -EINVAL;
unsigned long i_sblock, i_eblock, i_eoff, s_size;
@@ -342,8 +339,7 @@ static int bfs_fill_super(struct super_block *s, void *data, size_t data_size,
bfs_sb = (struct bfs_super_block *)sbh->b_data;
if (le32_to_cpu(bfs_sb->s_magic) != BFS_MAGIC) {
if (!silent)
- printf("No BFS filesystem on %s (magic=%08x)\n",
- s->s_id, le32_to_cpu(bfs_sb->s_magic));
+ printf("No BFS filesystem on %s (magic=%08x)\n", s->s_id, le32_to_cpu(bfs_sb->s_magic));
goto out1;
}
if (BFS_UNCLEAN(bfs_sb, s) && !silent)
@@ -352,18 +348,16 @@ static int bfs_fill_super(struct super_block *s, void *data, size_t data_size,
s->s_magic = BFS_MAGIC;
if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end) ||
- le32_to_cpu(bfs_sb->s_start) < BFS_BSIZE) {
- printf("Superblock is corrupted\n");
+ le32_to_cpu(bfs_sb->s_start) < sizeof(struct bfs_super_block) + sizeof(struct bfs_dirent)) {
+ printf("Superblock is corrupted on %s\n", s->s_id);
goto out1;
}
- info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE) /
- sizeof(struct bfs_inode)
- + BFS_ROOT_INO - 1;
- imap_len = (info->si_lasti / 8) + 1;
- info->si_imap = kzalloc(imap_len, GFP_KERNEL | __GFP_NOWARN);
- if (!info->si_imap) {
- printf("Cannot allocate %u bytes\n", imap_len);
+ info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE) / sizeof(struct bfs_inode) + BFS_ROOT_INO - 1;
+ if (info->si_lasti == BFS_MAX_LASTI)
+ printf("WARNING: filesystem %s was created with 512 inodes, the real maximum is 511, mounting anyway\n", s->s_id);
+ else if (info->si_lasti > BFS_MAX_LASTI) {
+ printf("Impossible last inode number %lu > %d on %s\n", info->si_lasti, BFS_MAX_LASTI, s->s_id);
goto out1;
}
for (i = 0; i < BFS_ROOT_INO; i++)
@@ -373,26 +367,25 @@ static int bfs_fill_super(struct super_block *s, void *data, size_t data_size,
inode = bfs_iget(s, BFS_ROOT_INO);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
- goto out2;
+ goto out1;
}
s->s_root = d_make_root(inode);
if (!s->s_root) {
ret = -ENOMEM;
- goto out2;
+ goto out1;
}
info->si_blocks = (le32_to_cpu(bfs_sb->s_end) + 1) >> BFS_BSIZE_BITS;
- info->si_freeb = (le32_to_cpu(bfs_sb->s_end) + 1
- - le32_to_cpu(bfs_sb->s_start)) >> BFS_BSIZE_BITS;
+ info->si_freeb = (le32_to_cpu(bfs_sb->s_end) + 1 - le32_to_cpu(bfs_sb->s_start)) >> BFS_BSIZE_BITS;
info->si_freei = 0;
info->si_lf_eblk = 0;
/* can we read the last block? */
bh = sb_bread(s, info->si_blocks - 1);
if (!bh) {
- printf("Last block not available: %lu\n", info->si_blocks - 1);
+ printf("Last block not available on %s: %lu\n", s->s_id, info->si_blocks - 1);
ret = -EIO;
- goto out3;
+ goto out2;
}
brelse(bh);
@@ -426,11 +419,11 @@ static int bfs_fill_super(struct super_block *s, void *data, size_t data_size,
(i_eoff != le32_to_cpu(-1) && i_eoff > s_size) ||
i_sblock * BFS_BSIZE > i_eoff) {
- printf("Inode 0x%08x corrupted\n", i);
+ printf("Inode 0x%08x corrupted on %s\n", i, s->s_id);
brelse(bh);
ret = -EIO;
- goto out3;
+ goto out2;
}
if (!di->i_ino) {
@@ -446,14 +439,12 @@ static int bfs_fill_super(struct super_block *s, void *data, size_t data_size,
}
brelse(bh);
brelse(sbh);
- bfs_dump_imap("read_super", s);
+ bfs_dump_imap("fill_super", s);
return 0;
-out3:
+out2:
dput(s->s_root);
s->s_root = NULL;
-out2:
- kfree(info->si_imap);
out1:
brelse(sbh);
out:
@@ -484,7 +475,7 @@ static int __init init_bfs_fs(void)
int err = init_inodecache();
if (err)
goto out1;
- err = register_filesystem(&bfs_fs_type);
+ err = register_filesystem(&bfs_fs_type);
if (err)
goto out;
return 0;
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index 7cde3f46ad26..d0078cbb718b 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -42,10 +42,14 @@ static int load_script(struct linux_binprm *bprm)
fput(bprm->file);
bprm->file = NULL;
- bprm->buf[BINPRM_BUF_SIZE - 1] = '\0';
- if ((cp = strchr(bprm->buf, '\n')) == NULL)
- cp = bprm->buf+BINPRM_BUF_SIZE-1;
+ for (cp = bprm->buf+2;; cp++) {
+ if (cp >= bprm->buf + BINPRM_BUF_SIZE)
+ return -ENOEXEC;
+ if (!*cp || (*cp == '\n'))
+ break;
+ }
*cp = '\0';
+
while (cp > bprm->buf) {
cp--;
if ((*cp == ' ') || (*cp == '\t'))
diff --git a/fs/buffer.c b/fs/buffer.c
index 1286c2b95498..3618ce2c4541 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -954,10 +954,20 @@ grow_dev_page(struct block_device *bdev, sector_t block,
end_block = init_page_buffers(page, bdev,
(sector_t)index << sizebits,
size);
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+ current->getblk_executed |= 0x01;
+#endif
goto done;
}
- if (!try_to_free_buffers(page))
+ if (!try_to_free_buffers(page)) {
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+ current->getblk_executed |= 0x02;
+#endif
goto failed;
+ }
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+ current->getblk_executed |= 0x04;
+#endif
}
/*
@@ -977,6 +987,9 @@ grow_dev_page(struct block_device *bdev, sector_t block,
spin_unlock(&inode->i_mapping->private_lock);
done:
ret = (block < end_block) ? 1 : -ENXIO;
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+ current->getblk_executed |= 0x08;
+#endif
failed:
unlock_page(page);
put_page(page);
@@ -1032,6 +1045,12 @@ __getblk_slow(struct block_device *bdev, sector_t block,
return NULL;
}
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+ current->getblk_stamp = jiffies;
+ current->getblk_executed = 0;
+ current->getblk_bh_count = 0;
+ current->getblk_bh_state = 0;
+#endif
for (;;) {
struct buffer_head *bh;
int ret;
@@ -1043,6 +1062,18 @@ __getblk_slow(struct block_device *bdev, sector_t block,
ret = grow_buffers(bdev, block, size, gfp);
if (ret < 0)
return NULL;
+
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+ if (!time_after(jiffies, current->getblk_stamp + 3 * HZ))
+ continue;
+ printk(KERN_ERR "%s(%u): getblk(): executed=%x bh_count=%d bh_state=%lx\n",
+ current->comm, current->pid, current->getblk_executed,
+ current->getblk_bh_count, current->getblk_bh_state);
+ current->getblk_executed = 0;
+ current->getblk_bh_count = 0;
+ current->getblk_bh_state = 0;
+ current->getblk_stamp = jiffies;
+#endif
}
}
@@ -3215,6 +3246,11 @@ EXPORT_SYMBOL(sync_dirty_buffer);
*/
static inline int buffer_busy(struct buffer_head *bh)
{
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+ current->getblk_executed |= 0x80;
+ current->getblk_bh_count = atomic_read(&bh->b_count);
+ current->getblk_bh_state = bh->b_state;
+#endif
return atomic_read(&bh->b_count) |
(bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
}
@@ -3253,11 +3289,18 @@ int try_to_free_buffers(struct page *page)
int ret = 0;
BUG_ON(!PageLocked(page));
- if (PageWriteback(page))
+ if (PageWriteback(page)) {
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+ current->getblk_executed |= 0x10;
+#endif
return 0;
+ }
if (mapping == NULL) { /* can this still happen? */
ret = drop_buffers(page, &buffers_to_free);
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+ current->getblk_executed |= 0x20;
+#endif
goto out;
}
@@ -3281,6 +3324,9 @@ int try_to_free_buffers(struct page *page)
if (ret)
cancel_dirty_page(page);
spin_unlock(&mapping->private_lock);
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+ current->getblk_executed |= 0x40;
+#endif
out:
if (buffers_to_free) {
struct buffer_head *bh = buffers_to_free;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index c005a5400f2e..9a2d86191793 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -808,7 +808,7 @@ static inline int default_congestion_kb(void)
* This allows larger machines to have larger/more transfers.
* Limit the default to 256M
*/
- congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10);
+ congestion_kb = (16*int_sqrt(totalram_pages())) << (PAGE_SHIFT-10);
if (congestion_kb > 256*1024)
congestion_kb = 256*1024;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 42bbe6824b4b..0627454298d6 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -381,7 +381,8 @@ static void ep_nested_calls_init(struct nested_calls *ncalls)
*/
static inline int ep_events_available(struct eventpoll *ep)
{
- return !list_empty(&ep->rdllist) || ep->ovflist != EP_UNACTIVE_PTR;
+ return !list_empty_careful(&ep->rdllist) ||
+ READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR;
}
#ifdef CONFIG_NET_RX_BUSY_POLL
@@ -471,7 +472,6 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
* no re-entered.
*
* @ncalls: Pointer to the nested_calls structure to be used for this call.
- * @max_nests: Maximum number of allowed nesting calls.
* @nproc: Nested call core function pointer.
* @priv: Opaque data to be passed to the @nproc callback.
* @cookie: Cookie to be used to identify this nested call.
@@ -480,7 +480,7 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
* Returns: Returns the code returned by the @nproc callback, or -1 if
* the maximum recursion limit has been exceeded.
*/
-static int ep_call_nested(struct nested_calls *ncalls, int max_nests,
+static int ep_call_nested(struct nested_calls *ncalls,
int (*nproc)(void *, void *, int), void *priv,
void *cookie, void *ctx)
{
@@ -499,7 +499,7 @@ static int ep_call_nested(struct nested_calls *ncalls, int max_nests,
*/
list_for_each_entry(tncur, lsthead, llink) {
if (tncur->ctx == ctx &&
- (tncur->cookie == cookie || ++call_nests > max_nests)) {
+ (tncur->cookie == cookie || ++call_nests > EP_MAX_NESTS)) {
/*
* Ops ... loop detected or maximum nest level reached.
* We abort this wake by breaking the cycle itself.
@@ -573,7 +573,7 @@ static void ep_poll_safewake(wait_queue_head_t *wq)
{
int this_cpu = get_cpu();
- ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS,
+ ep_call_nested(&poll_safewake_ncalls,
ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu);
put_cpu();
@@ -699,7 +699,7 @@ static __poll_t ep_scan_ready_list(struct eventpoll *ep,
*/
spin_lock_irq(&ep->wq.lock);
list_splice_init(&ep->rdllist, &txlist);
- ep->ovflist = NULL;
+ WRITE_ONCE(ep->ovflist, NULL);
spin_unlock_irq(&ep->wq.lock);
/*
@@ -713,7 +713,7 @@ static __poll_t ep_scan_ready_list(struct eventpoll *ep,
* other events might have been queued by the poll callback.
* We re-insert them inside the main ready-list here.
*/
- for (nepi = ep->ovflist; (epi = nepi) != NULL;
+ for (nepi = READ_ONCE(ep->ovflist); (epi = nepi) != NULL;
nepi = epi->next, epi->next = EP_UNACTIVE_PTR) {
/*
* We need to check if the item is already in the list.
@@ -731,7 +731,7 @@ static __poll_t ep_scan_ready_list(struct eventpoll *ep,
* releasing the lock, events will be queued in the normal way inside
* ep->rdllist.
*/
- ep->ovflist = EP_UNACTIVE_PTR;
+ WRITE_ONCE(ep->ovflist, EP_UNACTIVE_PTR);
/*
* Quickly re-inject items left on "txlist".
@@ -1154,10 +1154,10 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
* semantics). All the events that happen during that period of time are
* chained in ep->ovflist and requeued later on.
*/
- if (unlikely(ep->ovflist != EP_UNACTIVE_PTR)) {
+ if (READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR) {
if (epi->next == EP_UNACTIVE_PTR) {
- epi->next = ep->ovflist;
- ep->ovflist = epi;
+ epi->next = READ_ONCE(ep->ovflist);
+ WRITE_ONCE(ep->ovflist, epi);
if (epi->ws) {
/*
* Activate ep->ws since epi->ws may get
@@ -1333,7 +1333,6 @@ static int reverse_path_check_proc(void *priv, void *cookie, int call_nests)
}
} else {
error = ep_call_nested(&poll_loop_ncalls,
- EP_MAX_NESTS,
reverse_path_check_proc,
child_file, child_file,
current);
@@ -1367,7 +1366,7 @@ static int reverse_path_check(void)
/* let's call this for all tfiles */
list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) {
path_count_init();
- error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+ error = ep_call_nested(&poll_loop_ncalls,
reverse_path_check_proc, current_file,
current_file, current);
if (error)
@@ -1626,21 +1625,24 @@ static __poll_t ep_send_events_proc(struct eventpoll *ep, struct list_head *head
{
struct ep_send_events_data *esed = priv;
__poll_t revents;
- struct epitem *epi;
- struct epoll_event __user *uevent;
+ struct epitem *epi, *tmp;
+ struct epoll_event __user *uevent = esed->events;
struct wakeup_source *ws;
poll_table pt;
init_poll_funcptr(&pt, NULL);
+ esed->res = 0;
/*
* We can loop without lock because we are passed a task private list.
* Items cannot vanish during the loop because ep_scan_ready_list() is
* holding "mtx" during this call.
*/
- for (esed->res = 0, uevent = esed->events;
- !list_empty(head) && esed->res < esed->maxevents;) {
- epi = list_first_entry(head, struct epitem, rdllink);
+ lockdep_assert_held(&ep->mtx);
+
+ list_for_each_entry_safe(epi, tmp, head, rdllink) {
+ if (esed->res >= esed->maxevents)
+ break;
/*
* Activate ep->ws before deactivating epi->ws to prevent
@@ -1660,42 +1662,42 @@ static __poll_t ep_send_events_proc(struct eventpoll *ep, struct list_head *head
list_del_init(&epi->rdllink);
- revents = ep_item_poll(epi, &pt, 1);
-
/*
* If the event mask intersect the caller-requested one,
* deliver the event to userspace. Again, ep_scan_ready_list()
- * is holding "mtx", so no operations coming from userspace
+ * is holding ep->mtx, so no operations coming from userspace
* can change the item.
*/
- if (revents) {
- if (__put_user(revents, &uevent->events) ||
- __put_user(epi->event.data, &uevent->data)) {
- list_add(&epi->rdllink, head);
- ep_pm_stay_awake(epi);
- if (!esed->res)
- esed->res = -EFAULT;
- return 0;
- }
- esed->res++;
- uevent++;
- if (epi->event.events & EPOLLONESHOT)
- epi->event.events &= EP_PRIVATE_BITS;
- else if (!(epi->event.events & EPOLLET)) {
- /*
- * If this file has been added with Level
- * Trigger mode, we need to insert back inside
- * the ready list, so that the next call to
- * epoll_wait() will check again the events
- * availability. At this point, no one can insert
- * into ep->rdllist besides us. The epoll_ctl()
- * callers are locked out by
- * ep_scan_ready_list() holding "mtx" and the
- * poll callback will queue them in ep->ovflist.
- */
- list_add_tail(&epi->rdllink, &ep->rdllist);
- ep_pm_stay_awake(epi);
- }
+ revents = ep_item_poll(epi, &pt, 1);
+ if (!revents)
+ continue;
+
+ if (__put_user(revents, &uevent->events) ||
+ __put_user(epi->event.data, &uevent->data)) {
+ list_add(&epi->rdllink, head);
+ ep_pm_stay_awake(epi);
+ if (!esed->res)
+ esed->res = -EFAULT;
+ return 0;
+ }
+ esed->res++;
+ uevent++;
+ if (epi->event.events & EPOLLONESHOT)
+ epi->event.events &= EP_PRIVATE_BITS;
+ else if (!(epi->event.events & EPOLLET)) {
+ /*
+ * If this file has been added with Level
+ * Trigger mode, we need to insert back inside
+ * the ready list, so that the next call to
+ * epoll_wait() will check again the events
+ * availability. At this point, no one can insert
+ * into ep->rdllist besides us. The epoll_ctl()
+ * callers are locked out by
+ * ep_scan_ready_list() holding "mtx" and the
+ * poll callback will queue them in ep->ovflist.
+ */
+ list_add_tail(&epi->rdllink, &ep->rdllist);
+ ep_pm_stay_awake(epi);
}
}
@@ -1747,6 +1749,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
{
int res = 0, eavail, timed_out = 0;
u64 slack = 0;
+ bool waiter = false;
wait_queue_entry_t wait;
ktime_t expires, *to = NULL;
@@ -1761,11 +1764,18 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
} else if (timeout == 0) {
/*
* Avoid the unnecessary trip to the wait queue loop, if the
- * caller specified a non blocking operation.
+ * caller specified a non blocking operation. We still need
+ * lock because we could race and not see an epi being added
+ * to the ready list while in irq callback. Thus incorrectly
+ * returning 0 back to userspace.
*/
timed_out = 1;
+
spin_lock_irq(&ep->wq.lock);
- goto check_events;
+ eavail = ep_events_available(ep);
+ spin_unlock_irq(&ep->wq.lock);
+
+ goto send_events;
}
fetch_events:
@@ -1773,64 +1783,66 @@ fetch_events:
if (!ep_events_available(ep))
ep_busy_loop(ep, timed_out);
- spin_lock_irq(&ep->wq.lock);
+ eavail = ep_events_available(ep);
+ if (eavail)
+ goto send_events;
- if (!ep_events_available(ep)) {
- /*
- * Busy poll timed out. Drop NAPI ID for now, we can add
- * it back in when we have moved a socket with a valid NAPI
- * ID onto the ready list.
- */
- ep_reset_busy_poll_napi_id(ep);
+ /*
+ * Busy poll timed out. Drop NAPI ID for now, we can add
+ * it back in when we have moved a socket with a valid NAPI
+ * ID onto the ready list.
+ */
+ ep_reset_busy_poll_napi_id(ep);
- /*
- * We don't have any available event to return to the caller.
- * We need to sleep here, and we will be wake up by
- * ep_poll_callback() when events will become available.
- */
+ /*
+ * We don't have any available event to return to the caller. We need
+ * to sleep here, and we will be woken by ep_poll_callback() when events
+ * become available.
+ */
+ if (!waiter) {
+ waiter = true;
init_waitqueue_entry(&wait, current);
- __add_wait_queue_exclusive(&ep->wq, &wait);
- for (;;) {
- /*
- * We don't want to sleep if the ep_poll_callback() sends us
- * a wakeup in between. That's why we set the task state
- * to TASK_INTERRUPTIBLE before doing the checks.
- */
- set_current_state(TASK_INTERRUPTIBLE);
- /*
- * Always short-circuit for fatal signals to allow
- * threads to make a timely exit without the chance of
- * finding more events available and fetching
- * repeatedly.
- */
- if (fatal_signal_pending(current)) {
- res = -EINTR;
- break;
- }
- if (ep_events_available(ep) || timed_out)
- break;
- if (signal_pending(current)) {
- res = -EINTR;
- break;
- }
+ spin_lock_irq(&ep->wq.lock);
+ __add_wait_queue_exclusive(&ep->wq, &wait);
+ spin_unlock_irq(&ep->wq.lock);
+ }
- spin_unlock_irq(&ep->wq.lock);
- if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
- timed_out = 1;
+ for (;;) {
+ /*
+ * We don't want to sleep if the ep_poll_callback() sends us
+ * a wakeup in between. That's why we set the task state
+ * to TASK_INTERRUPTIBLE before doing the checks.
+ */
+ set_current_state(TASK_INTERRUPTIBLE);
+ /*
+ * Always short-circuit for fatal signals to allow
+ * threads to make a timely exit without the chance of
+ * finding more events available and fetching
+ * repeatedly.
+ */
+ if (fatal_signal_pending(current)) {
+ res = -EINTR;
+ break;
+ }
- spin_lock_irq(&ep->wq.lock);
+ eavail = ep_events_available(ep);
+ if (eavail)
+ break;
+ if (signal_pending(current)) {
+ res = -EINTR;
+ break;
}
- __remove_wait_queue(&ep->wq, &wait);
- __set_current_state(TASK_RUNNING);
+ if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) {
+ timed_out = 1;
+ break;
+ }
}
-check_events:
- /* Is it worth to try to dig for events ? */
- eavail = ep_events_available(ep);
- spin_unlock_irq(&ep->wq.lock);
+ __set_current_state(TASK_RUNNING);
+send_events:
/*
* Try to transfer events to user space. In case we get 0 events and
* there's still timeout left over, we go trying again in search of
@@ -1840,6 +1852,12 @@ check_events:
!(res = ep_send_events(ep, events, maxevents)) && !timed_out)
goto fetch_events;
+ if (waiter) {
+ spin_lock_irq(&ep->wq.lock);
+ __remove_wait_queue(&ep->wq, &wait);
+ spin_unlock_irq(&ep->wq.lock);
+ }
+
return res;
}
@@ -1876,7 +1894,7 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
ep_tovisit = epi->ffd.file->private_data;
if (ep_tovisit->visited)
continue;
- error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+ error = ep_call_nested(&poll_loop_ncalls,
ep_loop_check_proc, epi->ffd.file,
ep_tovisit, current);
if (error != 0)
@@ -1916,7 +1934,7 @@ static int ep_loop_check(struct eventpoll *ep, struct file *file)
int ret;
struct eventpoll *ep_cur, *ep_next;
- ret = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+ ret = ep_call_nested(&poll_loop_ncalls,
ep_loop_check_proc, file, ep, current);
/* clear visited list */
list_for_each_entry_safe(ep_cur, ep_next, &visited_list,
diff --git a/fs/exec.c b/fs/exec.c
index acc3a5536384..c1f118898242 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -219,55 +219,10 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
if (ret <= 0)
return NULL;
- if (write) {
- unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
- unsigned long ptr_size, limit;
-
- /*
- * Since the stack will hold pointers to the strings, we
- * must account for them as well.
- *
- * The size calculation is the entire vma while each arg page is
- * built, so each time we get here it's calculating how far it
- * is currently (rather than each call being just the newly
- * added size from the arg page). As a result, we need to
- * always add the entire size of the pointers, so that on the
- * last call to get_arg_page() we'll actually have the entire
- * correct size.
- */
- ptr_size = (bprm->argc + bprm->envc) * sizeof(void *);
- if (ptr_size > ULONG_MAX - size)
- goto fail;
- size += ptr_size;
-
- acct_arg_size(bprm, size / PAGE_SIZE);
-
- /*
- * We've historically supported up to 32 pages (ARG_MAX)
- * of argument strings even with small stacks
- */
- if (size <= ARG_MAX)
- return page;
-
- /*
- * Limit to 1/4 of the max stack size or 3/4 of _STK_LIM
- * (whichever is smaller) for the argv+env strings.
- * This ensures that:
- * - the remaining binfmt code will not run out of stack space,
- * - the program will have a reasonable amount of stack left
- * to work from.
- */
- limit = _STK_LIM / 4 * 3;
- limit = min(limit, bprm->rlim_stack.rlim_cur / 4);
- if (size > limit)
- goto fail;
- }
+ if (write)
+ acct_arg_size(bprm, vma_pages(bprm->vma));
return page;
-
-fail:
- put_page(page);
- return NULL;
}
static void put_arg_page(struct page *page)
@@ -493,6 +448,50 @@ static int count(struct user_arg_ptr argv, int max)
return i;
}
+static int prepare_arg_pages(struct linux_binprm *bprm,
+ struct user_arg_ptr argv, struct user_arg_ptr envp)
+{
+ unsigned long limit, ptr_size;
+
+ bprm->argc = count(argv, MAX_ARG_STRINGS);
+ if (bprm->argc < 0)
+ return bprm->argc;
+
+ bprm->envc = count(envp, MAX_ARG_STRINGS);
+ if (bprm->envc < 0)
+ return bprm->envc;
+
+ /*
+ * Limit to 1/4 of the max stack size or 3/4 of _STK_LIM
+ * (whichever is smaller) for the argv+env strings.
+ * This ensures that:
+ * - the remaining binfmt code will not run out of stack space,
+ * - the program will have a reasonable amount of stack left
+ * to work from.
+ */
+ limit = _STK_LIM / 4 * 3;
+ limit = min(limit, bprm->rlim_stack.rlim_cur / 4);
+ /*
+ * We've historically supported up to 32 pages (ARG_MAX)
+ * of argument strings even with small stacks
+ */
+ limit = max_t(unsigned long, limit, ARG_MAX);
+ /*
+ * We must account for the size of all the argv and envp pointers to
+ * the argv and envp strings, since they will also take up space in
+ * the stack. They aren't stored until much later when we can't
+ * signal to the parent that the child has run out of stack space.
+ * Instead, calculate it here so it's possible to fail gracefully.
+ */
+ ptr_size = (bprm->argc + bprm->envc) * sizeof(void *);
+ if (limit <= ptr_size)
+ return -E2BIG;
+ limit -= ptr_size;
+
+ bprm->argmin = bprm->p - limit;
+ return 0;
+}
+
/*
* 'copy_strings()' copies argument/environment strings from the old
* processes's memory to the new process's stack. The call to get_user_pages()
@@ -528,6 +527,8 @@ static int copy_strings(int argc, struct user_arg_ptr argv,
pos = bprm->p;
str += len;
bprm->p -= len;
+ if (bprm->p < bprm->argmin)
+ goto out;
while (len > 0) {
int offset, bytes_to_copy;
@@ -1790,12 +1791,8 @@ static int __do_execve_file(int fd, struct filename *filename,
if (retval)
goto out_unmark;
- bprm->argc = count(argv, MAX_ARG_STRINGS);
- if ((retval = bprm->argc) < 0)
- goto out;
-
- bprm->envc = count(envp, MAX_ARG_STRINGS);
- if ((retval = bprm->envc) < 0)
+ retval = prepare_arg_pages(bprm, argv, envp);
+ if (retval < 0)
goto out;
retval = prepare_binprm(bprm);
diff --git a/fs/file_table.c b/fs/file_table.c
index e03c8d121c6c..10e0a3dcea4d 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -383,10 +383,11 @@ void __init files_init(void)
void __init files_maxfiles_init(void)
{
unsigned long n;
- unsigned long memreserve = (totalram_pages - nr_free_pages()) * 3/2;
+ unsigned long nr_pages = totalram_pages();
+ unsigned long memreserve = (nr_pages - nr_free_pages()) * 3/2;
- memreserve = min(memreserve, totalram_pages - 1);
- n = ((totalram_pages - memreserve) * (PAGE_SIZE / 1024)) / 10;
+ memreserve = min(memreserve, nr_pages - 1);
+ n = ((nr_pages - memreserve) * (PAGE_SIZE / 1024)) / 10;
files_stat.max_files = max_t(unsigned long, n, NR_FILE);
}
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 4727ef612019..f0fe74fa4c85 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -825,7 +825,7 @@ static const struct super_operations fuse_super_operations = {
static void sanitize_global_limit(unsigned *limit)
{
if (*limit == 0)
- *limit = ((totalram_pages << PAGE_SHIFT) >> 13) /
+ *limit = ((totalram_pages() << PAGE_SHIFT) >> 13) /
sizeof(struct fuse_req);
if (*limit >= 1 << 16)
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index f37662675c3a..29a9dcfbe81f 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -565,6 +565,7 @@ const struct inode_operations hfsplus_dir_inode_operations = {
.symlink = hfsplus_symlink,
.mknod = hfsplus_mknod,
.rename = hfsplus_rename,
+ .getattr = hfsplus_getattr,
.listxattr = hfsplus_listxattr,
};
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index dd7ad9f13e3a..b8471bf05def 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -488,6 +488,8 @@ void hfsplus_inode_write_fork(struct inode *inode,
struct hfsplus_fork_raw *fork);
int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd);
int hfsplus_cat_write_inode(struct inode *inode);
+int hfsplus_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags);
int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
int datasync);
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index d7ab9d8c4b67..d131c8ea7eb6 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -270,6 +270,26 @@ static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr)
return 0;
}
+int hfsplus_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags)
+{
+ struct inode *inode = d_inode(path->dentry);
+ struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
+
+ if (inode->i_flags & S_APPEND)
+ stat->attributes |= STATX_ATTR_APPEND;
+ if (inode->i_flags & S_IMMUTABLE)
+ stat->attributes |= STATX_ATTR_IMMUTABLE;
+ if (hip->userflags & HFSPLUS_FLG_NODUMP)
+ stat->attributes |= STATX_ATTR_NODUMP;
+
+ stat->attributes_mask |= STATX_ATTR_APPEND | STATX_ATTR_IMMUTABLE |
+ STATX_ATTR_NODUMP;
+
+ generic_fillattr(inode, stat);
+ return 0;
+}
+
int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
int datasync)
{
@@ -329,6 +349,7 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
static const struct inode_operations hfsplus_file_inode_operations = {
.setattr = hfsplus_setattr,
+ .getattr = hfsplus_getattr,
.listxattr = hfsplus_listxattr,
};
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 586726a590d8..4f15665f0ad1 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -2121,7 +2121,7 @@ int __init nfs_init_writepagecache(void)
* This allows larger machines to have larger/more transfers.
* Limit the default to 256M
*/
- nfs_congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10);
+ nfs_congestion_kb = (16*int_sqrt(totalram_pages())) << (PAGE_SHIFT-10);
if (nfs_congestion_kb > 256*1024)
nfs_congestion_kb = 256*1024;
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index e2fe0e9ce0df..da52b594362a 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -99,7 +99,7 @@ static unsigned int
nfsd_cache_size_limit(void)
{
unsigned int limit;
- unsigned long low_pages = totalram_pages - totalhigh_pages;
+ unsigned long low_pages = totalram_pages() - totalhigh_pages();
limit = (16 * int_sqrt(low_pages)) << (PAGE_SHIFT-10);
return min_t(unsigned int, limit, 256*1024);
diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h
index ab172e5f51d9..5becc8acc8f4 100644
--- a/fs/ntfs/malloc.h
+++ b/fs/ntfs/malloc.h
@@ -47,7 +47,7 @@ static inline void *__ntfs_malloc(unsigned long size, gfp_t gfp_mask)
return kmalloc(PAGE_SIZE, gfp_mask & ~__GFP_HIGHMEM);
/* return (void *)__get_free_page(gfp_mask); */
}
- if (likely((size >> PAGE_SHIFT) < totalram_pages))
+ if (likely((size >> PAGE_SHIFT) < totalram_pages()))
return __vmalloc(size, gfp_mask, PAGE_KERNEL);
return NULL;
}
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index 99ee093182cb..cc9b32b9db7c 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-ccflags-y := -Ifs/ocfs2
+ccflags-y := -I$(src)
obj-$(CONFIG_OCFS2_FS) += \
ocfs2.o \
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 9b2ed62dd638..f3c20b279eb2 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -582,9 +582,10 @@ bail:
}
static int o2hb_read_slots(struct o2hb_region *reg,
+ unsigned int begin_slot,
unsigned int max_slots)
{
- unsigned int current_slot=0;
+ unsigned int current_slot = begin_slot;
int status;
struct o2hb_bio_wait_ctxt wc;
struct bio *bio;
@@ -1093,9 +1094,14 @@ static int o2hb_highest_node(unsigned long *nodes, int numbits)
return find_last_bit(nodes, numbits);
}
+static int o2hb_lowest_node(unsigned long *nodes, int numbits)
+{
+ return find_first_bit(nodes, numbits);
+}
+
static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
{
- int i, ret, highest_node;
+ int i, ret, highest_node, lowest_node;
int membership_change = 0, own_slot_ok = 0;
unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)];
unsigned long live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
@@ -1120,7 +1126,8 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
}
highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES);
- if (highest_node >= O2NM_MAX_NODES) {
+ lowest_node = o2hb_lowest_node(configured_nodes, O2NM_MAX_NODES);
+ if (highest_node >= O2NM_MAX_NODES || lowest_node >= O2NM_MAX_NODES) {
mlog(ML_NOTICE, "o2hb: No configured nodes found!\n");
ret = -EINVAL;
goto bail;
@@ -1130,7 +1137,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
* yet. Of course, if the node definitions have holes in them
* then we're reading an empty slot anyway... Consider this
* best-effort. */
- ret = o2hb_read_slots(reg, highest_node + 1);
+ ret = o2hb_read_slots(reg, lowest_node, highest_node + 1);
if (ret < 0) {
mlog_errno(ret);
goto bail;
@@ -1801,7 +1808,7 @@ static int o2hb_populate_slot_data(struct o2hb_region *reg)
struct o2hb_disk_slot *slot;
struct o2hb_disk_heartbeat_block *hb_block;
- ret = o2hb_read_slots(reg, reg->hr_blocks);
+ ret = o2hb_read_slots(reg, 0, reg->hr_blocks);
if (ret)
goto out;
diff --git a/fs/ocfs2/dlm/Makefile b/fs/ocfs2/dlm/Makefile
index bd1aab1f49a4..ef2854422a6e 100644
--- a/fs/ocfs2/dlm/Makefile
+++ b/fs/ocfs2/dlm/Makefile
@@ -1,4 +1,4 @@
-ccflags-y := -Ifs/ocfs2
+ccflags-y := -I$(src)/..
obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_dlm.o
diff --git a/fs/ocfs2/dlmfs/Makefile b/fs/ocfs2/dlmfs/Makefile
index eed3db8c5b49..33431a0296a3 100644
--- a/fs/ocfs2/dlmfs/Makefile
+++ b/fs/ocfs2/dlmfs/Makefile
@@ -1,4 +1,4 @@
-ccflags-y := -Ifs/ocfs2
+ccflags-y := -I$(src)/..
obj-$(CONFIG_OCFS2_FS) += ocfs2_dlmfs.o
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 642e471a6472..b03dd46237ce 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -179,7 +179,7 @@ bail:
static int dlmfs_file_release(struct inode *inode,
struct file *file)
{
- int level, status;
+ int level;
struct dlmfs_inode_private *ip = DLMFS_I(inode);
struct dlmfs_filp_private *fp = file->private_data;
@@ -188,7 +188,6 @@ static int dlmfs_file_release(struct inode *inode,
mlog(0, "close called on inode %lu\n", inode->i_ino);
- status = 0;
if (fp) {
level = fp->fp_lock_level;
if (level != DLM_LOCK_IV)
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 7642b6712c39..308f05be107c 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -835,7 +835,7 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
u32 *numbits,
struct ocfs2_alloc_reservation *resv)
{
- int numfound = 0, bitoff, left, startoff, lastzero;
+ int numfound = 0, bitoff, left, startoff;
int local_resv = 0;
struct ocfs2_alloc_reservation r;
void *bitmap = NULL;
@@ -873,7 +873,6 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap;
numfound = bitoff = startoff = 0;
- lastzero = -1;
left = le32_to_cpu(alloc->id1.bitmap1.i_total);
while ((bitoff = ocfs2_find_next_zero_bit(bitmap, left, startoff)) != -1) {
if (bitoff == left) {
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 3f1685d7d43b..1565dd8e8856 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -157,18 +157,14 @@ out:
}
/*
- * lock allocators, and reserving appropriate number of bits for
- * meta blocks and data clusters.
- *
- * in some cases, we don't need to reserve clusters, just let data_ac
- * be NULL.
+ * lock allocator, and reserve appropriate number of bits for
+ * meta blocks.
*/
-static int ocfs2_lock_allocators_move_extents(struct inode *inode,
+static int ocfs2_lock_meta_allocator_move_extents(struct inode *inode,
struct ocfs2_extent_tree *et,
u32 clusters_to_move,
u32 extents_to_split,
struct ocfs2_alloc_context **meta_ac,
- struct ocfs2_alloc_context **data_ac,
int extra_blocks,
int *credits)
{
@@ -193,13 +189,6 @@ static int ocfs2_lock_allocators_move_extents(struct inode *inode,
goto out;
}
- if (data_ac) {
- ret = ocfs2_reserve_clusters(osb, clusters_to_move, data_ac);
- if (ret) {
- mlog_errno(ret);
- goto out;
- }
- }
*credits += ocfs2_calc_extend_credits(osb->sb, et->et_root_el);
@@ -259,10 +248,10 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
}
}
- ret = ocfs2_lock_allocators_move_extents(inode, &context->et, *len, 1,
- &context->meta_ac,
- &context->data_ac,
- extra_blocks, &credits);
+ ret = ocfs2_lock_meta_allocator_move_extents(inode, &context->et,
+ *len, 1,
+ &context->meta_ac,
+ extra_blocks, &credits);
if (ret) {
mlog_errno(ret);
goto out;
@@ -285,6 +274,21 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
}
}
+ /*
+ * Make sure ocfs2_reserve_cluster is called after
+ * __ocfs2_flush_truncate_log, otherwise, dead lock may happen.
+ *
+ * If ocfs2_reserve_cluster is called
+ * before __ocfs2_flush_truncate_log, dead lock on global bitmap
+ * may happen.
+ *
+ */
+ ret = ocfs2_reserve_clusters(osb, *len, &context->data_ac);
+ if (ret) {
+ mlog_errno(ret);
+ goto out_unlock_mutex;
+ }
+
handle = ocfs2_start_trans(osb, credits);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
@@ -617,9 +621,10 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
}
}
- ret = ocfs2_lock_allocators_move_extents(inode, &context->et, len, 1,
- &context->meta_ac,
- NULL, extra_blocks, &credits);
+ ret = ocfs2_lock_meta_allocator_move_extents(inode, &context->et,
+ len, 1,
+ &context->meta_ac,
+ extra_blocks, &credits);
if (ret) {
mlog_errno(ret);
goto out;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index ce3465479447..58a8dc3fd6c6 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -530,7 +530,7 @@ static const struct file_operations proc_lstats_operations = {
static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
{
- unsigned long totalpages = totalram_pages + total_swap_pages;
+ unsigned long totalpages = totalram_pages() + total_swap_pages;
unsigned long points = 0;
points = oom_badness(task, NULL, NULL, totalpages) *
@@ -2356,10 +2356,13 @@ static ssize_t timerslack_ns_write(struct file *file, const char __user *buf,
return -ESRCH;
if (p != current) {
- if (!capable(CAP_SYS_NICE)) {
+ rcu_read_lock();
+ if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
+ rcu_read_unlock();
count = -EPERM;
goto out;
}
+ rcu_read_unlock();
err = security_task_setscheduler(p);
if (err) {
@@ -2392,11 +2395,14 @@ static int timerslack_ns_show(struct seq_file *m, void *v)
return -ESRCH;
if (p != current) {
-
- if (!capable(CAP_SYS_NICE)) {
+ rcu_read_lock();
+ if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
+ rcu_read_unlock();
err = -EPERM;
goto out;
}
+ rcu_read_unlock();
+
err = security_task_getscheduler(p);
if (err)
goto out;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 47c3764c469b..39e96a21366e 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -653,13 +653,25 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
#endif
#endif /* CONFIG_ARCH_HAS_PKEYS */
};
+ unsigned long flags = vma->vm_flags;
size_t i;
+ /*
+ * Disabling thp is possible through both MADV_NOHUGEPAGE and
+ * PR_SET_THP_DISABLE. Both historically used VM_NOHUGEPAGE. Since
+ * the introduction of MMF_DISABLE_THP, however, userspace needs the
+ * ability to detect vmas where thp is not eligible in the same manner.
+ */
+ if (vma->vm_mm && test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags)) {
+ flags &= ~VM_HUGEPAGE;
+ flags |= VM_NOHUGEPAGE;
+ }
+
seq_puts(m, "VmFlags: ");
for (i = 0; i < BITS_PER_LONG; i++) {
if (!mnemonics[i][0])
continue;
- if (vma->vm_flags & (1UL << i)) {
+ if (flags & (1UL << i)) {
seq_putc(m, mnemonics[i][0]);
seq_putc(m, mnemonics[i][1]);
seq_putc(m, ' ');
diff --git a/fs/proc/util.c b/fs/proc/util.c
index b161cfa0f9fa..98f8adc17345 100644
--- a/fs/proc/util.c
+++ b/fs/proc/util.c
@@ -1,4 +1,5 @@
#include <linux/dcache.h>
+#include "internal.h"
unsigned name_to_int(const struct qstr *qstr)
{
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 681881dc8a9d..11ce3379abbb 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -53,7 +53,7 @@ struct userfaultfd_ctx {
/* a refile sequence protected by fault_pending_wqh lock */
struct seqcount refile_seq;
/* pseudo fd refcounting */
- atomic_t refcount;
+ refcount_t refcount;
/* userfaultfd syscall flags */
unsigned int flags;
/* features requested from the userspace */
@@ -140,8 +140,7 @@ out:
*/
static void userfaultfd_ctx_get(struct userfaultfd_ctx *ctx)
{
- if (!atomic_inc_not_zero(&ctx->refcount))
- BUG();
+ refcount_inc(&ctx->refcount);
}
/**
@@ -154,7 +153,7 @@ static void userfaultfd_ctx_get(struct userfaultfd_ctx *ctx)
*/
static void userfaultfd_ctx_put(struct userfaultfd_ctx *ctx)
{
- if (atomic_dec_and_test(&ctx->refcount)) {
+ if (refcount_dec_and_test(&ctx->refcount)) {
VM_BUG_ON(spin_is_locked(&ctx->fault_pending_wqh.lock));
VM_BUG_ON(waitqueue_active(&ctx->fault_pending_wqh));
VM_BUG_ON(spin_is_locked(&ctx->fault_wqh.lock));
@@ -686,7 +685,7 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs)
return -ENOMEM;
}
- atomic_set(&ctx->refcount, 1);
+ refcount_set(&ctx->refcount, 1);
ctx->flags = octx->flags;
ctx->state = UFFD_STATE_RUNNING;
ctx->features = octx->features;
@@ -1911,7 +1910,7 @@ SYSCALL_DEFINE1(userfaultfd, int, flags)
if (!ctx)
return -ENOMEM;
- atomic_set(&ctx->refcount, 1);
+ refcount_set(&ctx->refcount, 1);
ctx->flags = flags;
ctx->features = 0;
ctx->state = UFFD_STATE_WAIT_API;
diff --git a/include/asm-generic/bitops/builtin-fls.h b/include/asm-generic/bitops/builtin-fls.h
index 62daf940989d..c8455cc28841 100644
--- a/include/asm-generic/bitops/builtin-fls.h
+++ b/include/asm-generic/bitops/builtin-fls.h
@@ -9,7 +9,7 @@
* This is defined the same way as ffs.
* Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
*/
-static __always_inline int fls(int x)
+static __always_inline int fls(unsigned int x)
{
return x ? sizeof(x) * 8 - __builtin_clz(x) : 0;
}
diff --git a/include/asm-generic/bitops/fls.h b/include/asm-generic/bitops/fls.h
index 753aecaab641..b168bb10e1be 100644
--- a/include/asm-generic/bitops/fls.h
+++ b/include/asm-generic/bitops/fls.h
@@ -10,7 +10,7 @@
* Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
*/
-static __always_inline int fls(int x)
+static __always_inline int fls(unsigned int x)
{
int r = 32;
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index e9f5fe69df31..03200a8c0178 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -25,6 +25,7 @@ struct linux_binprm {
#endif
struct mm_struct *mm;
unsigned long p; /* current top of mem */
+ unsigned long argmin; /* rlimit marker for copy_strings() */
unsigned int
/*
* True after the bprm_set_creds hook has been called once
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index a437a2b79ebc..e5b76874b8fb 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -145,7 +145,7 @@
#define KASAN_ABI_VERSION 3
#endif
-#if GCC_VERSION >= 50100
+#if GCC_VERSION >= 50100 && !defined(__CHECKER__)
#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
#endif
diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h
index 872f930f1b06..dd0a452373e7 100644
--- a/include/linux/genalloc.h
+++ b/include/linux/genalloc.h
@@ -51,7 +51,8 @@ typedef unsigned long (*genpool_algo_t)(unsigned long *map,
unsigned long size,
unsigned long start,
unsigned int nr,
- void *data, struct gen_pool *pool);
+ void *data, struct gen_pool *pool,
+ unsigned long start_addr);
/*
* General purpose special memory pool descriptor.
@@ -131,24 +132,24 @@ extern void gen_pool_set_algo(struct gen_pool *pool, genpool_algo_t algo,
extern unsigned long gen_pool_first_fit(unsigned long *map, unsigned long size,
unsigned long start, unsigned int nr, void *data,
- struct gen_pool *pool);
+ struct gen_pool *pool, unsigned long start_addr);
extern unsigned long gen_pool_fixed_alloc(unsigned long *map,
unsigned long size, unsigned long start, unsigned int nr,
- void *data, struct gen_pool *pool);
+ void *data, struct gen_pool *pool, unsigned long start_addr);
extern unsigned long gen_pool_first_fit_align(unsigned long *map,
unsigned long size, unsigned long start, unsigned int nr,
- void *data, struct gen_pool *pool);
+ void *data, struct gen_pool *pool, unsigned long start_addr);
extern unsigned long gen_pool_first_fit_order_align(unsigned long *map,
unsigned long size, unsigned long start, unsigned int nr,
- void *data, struct gen_pool *pool);
+ void *data, struct gen_pool *pool, unsigned long start_addr);
extern unsigned long gen_pool_best_fit(unsigned long *map, unsigned long size,
unsigned long start, unsigned int nr, void *data,
- struct gen_pool *pool);
+ struct gen_pool *pool, unsigned long start_addr);
extern struct gen_pool *devm_gen_pool_create(struct device *dev,
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 0690679832d4..ea5cdbd8c2c3 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -36,7 +36,31 @@ static inline void invalidate_kernel_vmap_range(void *vaddr, int size)
/* declarations for linux/mm/highmem.c */
unsigned int nr_free_highpages(void);
-extern unsigned long totalhigh_pages;
+extern atomic_long_t _totalhigh_pages;
+static inline unsigned long totalhigh_pages(void)
+{
+ return (unsigned long)atomic_long_read(&_totalhigh_pages);
+}
+
+static inline void totalhigh_pages_inc(void)
+{
+ atomic_long_inc(&_totalhigh_pages);
+}
+
+static inline void totalhigh_pages_dec(void)
+{
+ atomic_long_dec(&_totalhigh_pages);
+}
+
+static inline void totalhigh_pages_add(long count)
+{
+ atomic_long_add(count, &_totalhigh_pages);
+}
+
+static inline void totalhigh_pages_set(long val)
+{
+ atomic_long_set(&_totalhigh_pages, val);
+}
void kmap_flush_unused(void);
@@ -51,7 +75,7 @@ static inline struct page *kmap_to_page(void *addr)
return virt_to_page(addr);
}
-#define totalhigh_pages 0UL
+static inline unsigned long totalhigh_pages(void) { return 0UL; }
#ifndef ARCH_HAS_KMAP
static inline void *kmap(struct page *page)
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 6ab8c1bada3f..7d5f553a3b24 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -16,6 +16,7 @@ struct user_namespace;
struct ipc_ids {
int in_use;
unsigned short seq;
+ unsigned short deleted;
struct rw_semaphore rwsem;
struct idr ipcs_idr;
int max_idx;
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d6aac75b51ba..d4d2233f95c9 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -844,6 +844,7 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
#define __typecheck(x, y) \
(!!(sizeof((typeof(x) *)1 == (typeof(y) *)1)))
+#ifndef __CHECKER__
/*
* This returns a constant expression while determining if an argument is
* a constant expression, most importantly without evaluating the argument.
@@ -851,6 +852,13 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
*/
#define __is_constexpr(x) \
(sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8)))
+#else
+/*
+ * We don't really care about the check when running sparse and the
+ * above expression causes a warning due to sizeof(void).
+ */
+#define __is_constexpr(x) 1
+#endif
#define __no_side_effects(x, y) \
(__is_constexpr(x) && __is_constexpr(y))
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index aee299a6aa76..5ba52a7878a0 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -248,6 +248,44 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid))
#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
+ unsigned long *out_spfn,
+ unsigned long *out_epfn);
+/**
+ * for_each_free_mem_range_in_zone - iterate through zone specific free
+ * memblock areas
+ * @i: u64 used as loop variable
+ * @zone: zone in which all of the memory blocks reside
+ * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
+ * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
+ *
+ * Walks over free (memory && !reserved) areas of memblock in a specific
+ * zone. Available as soon as memblock is initialized.
+ */
+#define for_each_free_mem_pfn_range_in_zone(i, zone, p_start, p_end) \
+ for (i = 0, \
+ __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end); \
+ i != (u64)ULLONG_MAX; \
+ __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end))
+
+/**
+ * for_each_free_mem_range_in_zone_from - iterate through zone specific
+ * free memblock areas from a given point
+ * @i: u64 used as loop variable
+ * @zone: zone in which all of the memory blocks reside
+ * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
+ * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
+ *
+ * Walks over free (memory && !reserved) areas of memblock in a specific
+ * zone, continuing from current position. Available as soon as memblock is
+ * initialized.
+ */
+#define for_each_free_mem_pfn_range_in_zone_from(i, zone, p_start, p_end) \
+ for (; i != (u64)ULLONG_MAX; \
+ __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end))
+#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
+
/**
* for_each_free_mem_range - iterate through free memblock areas
* @i: u64 used as loop variable
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index ffd9cd10fcf3..84e9ae205930 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -87,7 +87,7 @@ extern int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn,
unsigned long *valid_start, unsigned long *valid_end);
extern void __offline_isolated_pages(unsigned long, unsigned long);
-typedef void (*online_page_callback_t)(struct page *page);
+typedef int (*online_page_callback_t)(struct page *page, unsigned int order);
extern int set_online_page_callback(online_page_callback_t callback);
extern int restore_online_page_callback(online_page_callback_t callback);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5411de93a363..d557d15fc195 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -48,7 +48,32 @@ static inline void set_max_mapnr(unsigned long limit)
static inline void set_max_mapnr(unsigned long limit) { }
#endif
-extern unsigned long totalram_pages;
+extern atomic_long_t _totalram_pages;
+static inline unsigned long totalram_pages(void)
+{
+ return (unsigned long)atomic_long_read(&_totalram_pages);
+}
+
+static inline void totalram_pages_inc(void)
+{
+ atomic_long_inc(&_totalram_pages);
+}
+
+static inline void totalram_pages_dec(void)
+{
+ atomic_long_dec(&_totalram_pages);
+}
+
+static inline void totalram_pages_add(long count)
+{
+ atomic_long_add(count, &_totalram_pages);
+}
+
+static inline void totalram_pages_set(long val)
+{
+ atomic_long_set(&_totalram_pages, val);
+}
+
extern void * high_memory;
extern int page_cluster;
@@ -98,10 +123,45 @@ extern int mmap_rnd_compat_bits __read_mostly;
/*
* On some architectures it is expensive to call memset() for small sizes.
- * Those architectures should provide their own implementation of "struct page"
- * zeroing by defining this macro in <asm/pgtable.h>.
+ * If an architecture decides to implement their own version of
+ * mm_zero_struct_page they should wrap the defines below in a #ifndef and
+ * define their own version of this macro in <asm/pgtable.h>
+ */
+#if BITS_PER_LONG == 64
+/* This function must be updated when the size of struct page grows above 80
+ * or reduces below 56. The idea that compiler optimizes out switch()
+ * statement, and only leaves move/store instructions. Also the compiler can
+ * combine write statments if they are both assignments and can be reordered,
+ * this can result in several of the writes here being dropped.
*/
-#ifndef mm_zero_struct_page
+#define mm_zero_struct_page(pp) __mm_zero_struct_page(pp)
+static inline void __mm_zero_struct_page(struct page *page)
+{
+ unsigned long *_pp = (void *)page;
+
+ /* Check that struct page is either 56, 64, 72, or 80 bytes */
+ BUILD_BUG_ON(sizeof(struct page) & 7);
+ BUILD_BUG_ON(sizeof(struct page) < 56);
+ BUILD_BUG_ON(sizeof(struct page) > 80);
+
+ switch (sizeof(struct page)) {
+ case 80:
+ _pp[9] = 0; /* fallthrough */
+ case 72:
+ _pp[8] = 0; /* fallthrough */
+ case 64:
+ _pp[7] = 0; /* fallthrough */
+ case 56:
+ _pp[6] = 0;
+ _pp[5] = 0;
+ _pp[4] = 0;
+ _pp[3] = 0;
+ _pp[2] = 0;
+ _pp[1] = 0;
+ _pp[0] = 0;
+ }
+}
+#else
#define mm_zero_struct_page(pp) ((void)memset((pp), 0, sizeof(struct page)))
#endif
@@ -1136,11 +1196,18 @@ static inline void set_page_node(struct page *page, unsigned long node)
page->flags |= (node & NODES_MASK) << NODES_PGSHIFT;
}
+static inline void set_page_reserved(struct page *page, bool reserved)
+{
+ page->flags &= ~(1ul << PG_reserved);
+ page->flags |= (unsigned long)(!!reserved) << PG_reserved;
+}
+
static inline void set_page_links(struct page *page, enum zone_type zone,
- unsigned long node, unsigned long pfn)
+ unsigned long node, unsigned long pfn, bool reserved)
{
set_page_zone(page, zone);
set_page_node(page, node);
+ set_page_reserved(page, reserved);
#ifdef SECTION_IN_PAGE_FLAGS
set_page_section(page, pfn_to_section_nr(pfn));
#endif
@@ -1817,8 +1884,8 @@ static inline void mm_inc_nr_ptes(struct mm_struct *mm) {}
static inline void mm_dec_nr_ptes(struct mm_struct *mm) {}
#endif
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
-int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd);
+int __pte_alloc_kernel(pmd_t *pmd);
/*
* The following ifdef needed to get the 4level-fixup.h header to work.
@@ -1956,18 +2023,17 @@ static inline void pgtable_page_dtor(struct page *page)
pte_unmap(pte); \
} while (0)
-#define pte_alloc(mm, pmd, address) \
- (unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd, address))
+#define pte_alloc(mm, pmd) (unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd))
#define pte_alloc_map(mm, pmd, address) \
- (pte_alloc(mm, pmd, address) ? NULL : pte_offset_map(pmd, address))
+ (pte_alloc(mm, pmd) ? NULL : pte_offset_map(pmd, address))
#define pte_alloc_map_lock(mm, pmd, address, ptlp) \
- (pte_alloc(mm, pmd, address) ? \
+ (pte_alloc(mm, pmd) ? \
NULL : pte_offset_map_lock(mm, pmd, address, ptlp))
#define pte_alloc_kernel(pmd, address) \
- ((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
+ ((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \
NULL: pte_offset_kernel(pmd, address))
#if USE_SPLIT_PMD_PTLOCKS
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 9893a6432adf..913c3c13e36e 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -420,7 +420,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
extern void mmu_notifier_call_srcu(struct rcu_head *rcu,
void (*func)(struct rcu_head *rcu));
-extern void mmu_notifier_synchronize(void);
#else /* CONFIG_MMU_NOTIFIER */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 847705a6d0ec..c71b4d911ebb 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -428,14 +428,8 @@ struct zone {
* Write access to present_pages at runtime should be protected by
* mem_hotplug_begin/end(). Any reader who can't tolerant drift of
* present_pages should get_online_mems() to get a stable value.
- *
- * Read access to managed_pages should be safe because it's unsigned
- * long. Write access to zone->managed_pages and totalram_pages are
- * protected by managed_page_count_lock at runtime. Idealy only
- * adjust_managed_page_count() should be used instead of directly
- * touching zone->managed_pages and totalram_pages.
*/
- unsigned long managed_pages;
+ atomic_long_t managed_pages;
unsigned long spanned_pages;
unsigned long present_pages;
@@ -524,6 +518,11 @@ enum pgdat_flags {
PGDAT_RECLAIM_LOCKED, /* prevents concurrent reclaim */
};
+static inline unsigned long zone_managed_pages(struct zone *zone)
+{
+ return (unsigned long)atomic_long_read(&zone->managed_pages);
+}
+
static inline unsigned long zone_end_pfn(const struct zone *zone)
{
return zone->zone_start_pfn + zone->spanned_pages;
@@ -814,7 +813,7 @@ static inline bool is_dev_zone(const struct zone *zone)
*/
static inline bool managed_zone(struct zone *zone)
{
- return zone->managed_pages;
+ return zone_managed_pages(zone);
}
/* Returns true if a zone has memory */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e4c7b6241088..cac27dd9f375 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1207,6 +1207,13 @@ struct task_struct {
unsigned long prev_lowest_stack;
#endif
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+ unsigned long getblk_stamp;
+ unsigned int getblk_executed;
+ unsigned int getblk_bh_count;
+ unsigned long getblk_bh_state;
+#endif
+
/*
* New fields for task_struct should be added above here, so that
* they are included in the randomized portion of task_struct.
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 108ede99e533..44c6f15800ff 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -39,6 +39,8 @@ void __noreturn do_task_dead(void);
extern void proc_caches_init(void);
+extern void fork_init(void);
+
extern void release_task(struct task_struct * p);
#ifdef CONFIG_HAVE_COPY_THREAD_TLS
diff --git a/include/linux/swap.h b/include/linux/swap.h
index a8f6d5d89524..928550bd28f3 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -175,8 +175,9 @@ enum {
SWP_PAGE_DISCARD = (1 << 10), /* freed swap page-cluster discards */
SWP_STABLE_WRITES = (1 << 11), /* no overwrite PG_writeback pages */
SWP_SYNCHRONOUS_IO = (1 << 12), /* synchronous IO is efficient */
+ SWP_VALID = (1 << 13), /* swap is valid to be operated on? */
/* add others here before... */
- SWP_SCANNING = (1 << 13), /* refcount in scan_swap_map */
+ SWP_SCANNING = (1 << 14), /* refcount in scan_swap_map */
};
#define SWAP_CLUSTER_MAX 32UL
@@ -235,7 +236,6 @@ struct swap_info_struct {
unsigned long flags; /* SWP_USED etc: see above */
signed short prio; /* swap priority of this type */
struct plist_node list; /* entry in swap_active_head */
- struct plist_node avail_lists[MAX_NUMNODES];/* entry in swap_avail_heads */
signed char type; /* strange name for an index */
unsigned int max; /* extent of the swap_map */
unsigned char *swap_map; /* vmalloc'ed array of usage counts */
@@ -276,6 +276,16 @@ struct swap_info_struct {
*/
struct work_struct discard_work; /* discard worker */
struct swap_cluster_list discard_clusters; /* discard clusters list */
+ struct plist_node avail_lists[0]; /*
+ * entries in swap_avail_heads, one
+ * entry per node.
+ * Must be last as the number of the
+ * array is nr_node_ids, which is not
+ * a fixed value so have to allocate
+ * dynamically.
+ * And it has to be an array so that
+ * plist_for_each_* can work.
+ */
};
#ifdef CONFIG_64BIT
@@ -310,7 +320,6 @@ void workingset_update_node(struct xa_node *node);
} while (0)
/* linux/mm/page_alloc.c */
-extern unsigned long totalram_pages;
extern unsigned long totalreserve_pages;
extern unsigned long nr_free_buffer_pages(void);
extern unsigned long nr_free_pagecache_pages(void);
@@ -360,14 +369,8 @@ extern unsigned long vm_total_pages;
extern int node_reclaim_mode;
extern int sysctl_min_unmapped_ratio;
extern int sysctl_min_slab_ratio;
-extern int node_reclaim(struct pglist_data *, gfp_t, unsigned int);
#else
#define node_reclaim_mode 0
-static inline int node_reclaim(struct pglist_data *pgdat, gfp_t mask,
- unsigned int order)
-{
- return 0;
-}
#endif
extern int page_evictable(struct page *page);
@@ -458,7 +461,7 @@ extern unsigned int count_swap_pages(int, int);
extern sector_t map_swap_page(struct page *, struct block_device **);
extern sector_t swapdev_block(int, pgoff_t);
extern int page_swapcount(struct page *);
-extern int __swap_count(struct swap_info_struct *si, swp_entry_t entry);
+extern int __swap_count(swp_entry_t entry);
extern int __swp_swapcount(swp_entry_t entry);
extern int swp_swapcount(swp_entry_t entry);
extern struct swap_info_struct *page_swap_info(struct page *);
@@ -468,6 +471,12 @@ extern int try_to_free_swap(struct page *);
struct backing_dev_info;
extern int init_swap_address_space(unsigned int type, unsigned long nr_pages);
extern void exit_swap_address_space(unsigned int type);
+extern struct swap_info_struct *get_swap_device(swp_entry_t entry);
+
+static inline void put_swap_device(struct swap_info_struct *si)
+{
+ preempt_enable();
+}
#else /* CONFIG_SWAP */
@@ -574,7 +583,7 @@ static inline int page_swapcount(struct page *page)
return 0;
}
-static inline int __swap_count(struct swap_info_struct *si, swp_entry_t entry)
+static inline int __swap_count(swp_entry_t entry)
{
return 0;
}
diff --git a/include/linux/xxhash.h b/include/linux/xxhash.h
index 9e1f42cb57e9..52b073fea17f 100644
--- a/include/linux/xxhash.h
+++ b/include/linux/xxhash.h
@@ -107,6 +107,29 @@ uint32_t xxh32(const void *input, size_t length, uint32_t seed);
*/
uint64_t xxh64(const void *input, size_t length, uint64_t seed);
+/**
+ * xxhash() - calculate wordsize hash of the input with a given seed
+ * @input: The data to hash.
+ * @length: The length of the data to hash.
+ * @seed: The seed can be used to alter the result predictably.
+ *
+ * If the hash does not need to be comparable between machines with
+ * different word sizes, this function will call whichever of xxh32()
+ * or xxh64() is faster.
+ *
+ * Return: wordsize hash of the data.
+ */
+
+static inline unsigned long xxhash(const void *input, size_t length,
+ uint64_t seed)
+{
+#if BITS_PER_LONG == 64
+ return xxh64(input, length, seed);
+#else
+ return xxh32(input, length, seed);
+#endif
+}
+
/*-****************************
* Streaming Hash Functions
*****************************/
diff --git a/include/uapi/linux/bfs_fs.h b/include/uapi/linux/bfs_fs.h
index 940b04772af8..08f6b4956359 100644
--- a/include/uapi/linux/bfs_fs.h
+++ b/include/uapi/linux/bfs_fs.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* include/linux/bfs_fs.h - BFS data structures on disk.
- * Copyright (C) 1999 Tigran Aivazian <tigran@veritas.com>
+ * Copyright (C) 1999-2018 Tigran Aivazian <aivazian.tigran@gmail.com>
*/
#ifndef _LINUX_BFS_FS_H
diff --git a/include/uapi/linux/binfmts.h b/include/uapi/linux/binfmts.h
index 4abad03a8853..689025d9c185 100644
--- a/include/uapi/linux/binfmts.h
+++ b/include/uapi/linux/binfmts.h
@@ -16,6 +16,6 @@ struct pt_regs;
#define MAX_ARG_STRINGS 0x7FFFFFFF
/* sizeof(linux_binprm->buf) */
-#define BINPRM_BUF_SIZE 128
+#define BINPRM_BUF_SIZE 256
#endif /* _UAPI_LINUX_BINFMTS_H */
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index 594b85f7cb86..1d338357df8a 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -41,6 +41,7 @@
#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
#define F_SEAL_GROW 0x0004 /* prevent file from growing */
#define F_SEAL_WRITE 0x0008 /* prevent writes */
+#define F_SEAL_FUTURE_WRITE 0x0010 /* prevent future writes while mapped */
/* (1U << 31) is reserved for signed error codes */
/*
diff --git a/init/initramfs.c b/init/initramfs.c
index 96af18fec4d0..907233ed7293 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -548,7 +548,6 @@ skip:
initrd_end = 0;
}
-#ifdef CONFIG_BLK_DEV_RAM
#define BUF_SIZE 1024
static void __init clean_rootfs(void)
{
@@ -595,7 +594,6 @@ static void __init clean_rootfs(void)
ksys_close(fd);
kfree(buf);
}
-#endif
static int __init populate_rootfs(void)
{
@@ -638,8 +636,10 @@ static int __init populate_rootfs(void)
printk(KERN_INFO "Unpacking initramfs...\n");
err = unpack_to_rootfs((char *)initrd_start,
initrd_end - initrd_start);
- if (err)
+ if (err) {
printk(KERN_EMERG "Initramfs unpacking failed: %s\n", err);
+ clean_rootfs();
+ }
free_initrd();
#endif
}
diff --git a/init/main.c b/init/main.c
index 9c616034bb7e..bc1dcc037e01 100644
--- a/init/main.c
+++ b/init/main.c
@@ -105,7 +105,6 @@
static int kernel_init(void *);
extern void init_IRQ(void);
-extern void fork_init(void);
extern void radix_tree_init(void);
/*
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index 49f9bf4ffc7f..d9ac6caf6a5e 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -120,7 +120,9 @@ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write,
static int zero;
static int one = 1;
static int int_max = INT_MAX;
-static int ipc_mni = IPCMNI;
+int ipc_mni = IPCMNI;
+int ipc_mni_shift = IPCMNI_SHIFT;
+bool ipc_mni_extended;
static struct ctl_table ipc_kern_table[] = {
{
@@ -246,3 +248,13 @@ static int __init ipc_sysctl_init(void)
}
device_initcall(ipc_sysctl_init);
+
+static int __init ipc_mni_extend(char *str)
+{
+ ipc_mni = IPCMNI_EXTEND;
+ ipc_mni_shift = IPCMNI_EXTEND_SHIFT;
+ ipc_mni_extended = true;
+ pr_info("IPCMNI extended to %d.\n", ipc_mni);
+ return 0;
+}
+early_param("ipcmni_extend", ipc_mni_extend);
diff --git a/ipc/util.c b/ipc/util.c
index 0af05752969f..3f11a81cf9b7 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -110,12 +110,13 @@ static const struct rhashtable_params ipc_kht_params = {
* @ids: ipc identifier set
*
* Set up the sequence range to use for the ipc identifier range (limited
- * below IPCMNI) then initialise the keys hashtable and ids idr.
+ * below ipc_mni) then initialise the keys hashtable and ids idr.
*/
void ipc_init_ids(struct ipc_ids *ids)
{
ids->in_use = 0;
- ids->seq = 0;
+ ids->deleted = false;
+ ids->seq = ipc_mni_extended ? 0 : -1; /* seq # is pre-incremented */
init_rwsem(&ids->rwsem);
rhashtable_init(&ids->key_ht, &ipc_kht_params);
idr_init(&ids->ipcs_idr);
@@ -198,6 +199,11 @@ static inline int ipc_idr_alloc(struct ipc_ids *ids, struct kern_ipc_perm *new)
{
int idx, next_id = -1;
+/*
+ * To conserve sequence number space with extended ipc_mni when new ID
+ * is built, the sequence number is incremented only when one or more
+ * IDs have been removed previously.
+ */
#ifdef CONFIG_CHECKPOINT_RESTORE
next_id = ids->next_id;
ids->next_id = -1;
@@ -216,9 +222,13 @@ static inline int ipc_idr_alloc(struct ipc_ids *ids, struct kern_ipc_perm *new)
*/
if (next_id < 0) { /* !CHECKPOINT_RESTORE or next_id is unset */
- new->seq = ids->seq++;
- if (ids->seq > IPCID_SEQ_MAX)
- ids->seq = 0;
+ if (!ipc_mni_extended || ids->deleted) {
+ ids->seq++;
+ if (ids->seq > IPCID_SEQ_MAX)
+ ids->seq = 0;
+ ids->deleted = false;
+ }
+ new->seq = ids->seq;
idx = idr_alloc(&ids->ipcs_idr, new, 0, 0, GFP_NOWAIT);
} else {
new->seq = ipcid_to_seqx(next_id);
@@ -226,7 +236,7 @@ static inline int ipc_idr_alloc(struct ipc_ids *ids, struct kern_ipc_perm *new)
0, GFP_NOWAIT);
}
if (idx >= 0)
- new->id = SEQ_MULTIPLIER * new->seq + idx;
+ new->id = (new->seq << IPCMNI_SEQ_SHIFT) + idx;
return idx;
}
@@ -254,8 +264,8 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int limit)
/* 1) Initialize the refcount so that ipc_rcu_putref works */
refcount_set(&new->refcount, 1);
- if (limit > IPCMNI)
- limit = IPCMNI;
+ if (limit > ipc_mni)
+ limit = ipc_mni;
if (ids->in_use >= limit)
return -ENOSPC;
@@ -436,6 +446,7 @@ void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
idr_remove(&ids->ipcs_idr, idx);
ipc_kht_remove(ids, ipcp);
ids->in_use--;
+ ids->deleted = true;
ipcp->deleted = true;
if (unlikely(idx == ids->max_idx)) {
@@ -738,7 +749,7 @@ static struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos,
if (total >= ids->in_use)
return NULL;
- for (; pos < IPCMNI; pos++) {
+ for (; pos < ipc_mni; pos++) {
ipc = idr_find(&ids->ipcs_idr, pos);
if (ipc != NULL) {
*new_pos = pos + 1;
diff --git a/ipc/util.h b/ipc/util.h
index d768fdbed515..127c0fedbd09 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -15,8 +15,36 @@
#include <linux/err.h>
#include <linux/ipc_namespace.h>
-#define IPCMNI 32768 /* <= MAX_INT limit for ipc arrays (including sysctl changes) */
-#define SEQ_MULTIPLIER (IPCMNI)
+/*
+ * The IPC ID contains 2 separate numbers - index and sequence number.
+ * By default,
+ * bits 0-14: index (32k, 15 bits)
+ * bits 15-30: sequence number (64k, 16 bits)
+ *
+ * When IPCMNI extension mode is turned on, the composition changes:
+ * bits 0-22: index (8M, 23 bits)
+ * bits 23-30: sequence number (256, 8 bits)
+ */
+#define IPCMNI_SHIFT 15
+#define IPCMNI_EXTEND_SHIFT 23
+#define IPCMNI (1 << IPCMNI_SHIFT)
+#define IPCMNI_EXTEND (1 << IPCMNI_EXTEND_SHIFT)
+
+#ifdef CONFIG_SYSVIPC_SYSCTL
+extern int ipc_mni;
+extern int ipc_mni_shift;
+extern bool ipc_mni_extended;
+
+#define IPCMNI_SEQ_SHIFT ipc_mni_shift
+#define IPCMNI_IDX_MASK ((1 << ipc_mni_shift) - 1)
+
+#else /* CONFIG_SYSVIPC_SYSCTL */
+
+#define ipc_mni IPCMNI
+#define ipc_mni_extended false
+#define IPCMNI_SEQ_SHIFT IPCMNI_SHIFT
+#define IPCMNI_IDX_MASK ((1 << IPCMNI_SHIFT) - 1)
+#endif /* CONFIG_SYSVIPC_SYSCTL */
void sem_init(void);
void msg_init(void);
@@ -96,9 +124,9 @@ struct pid_namespace *ipc_seq_pid_ns(struct seq_file *);
#define IPC_MSG_IDS 1
#define IPC_SHM_IDS 2
-#define ipcid_to_idx(id) ((id) % SEQ_MULTIPLIER)
-#define ipcid_to_seqx(id) ((id) / SEQ_MULTIPLIER)
-#define IPCID_SEQ_MAX min_t(int, INT_MAX/SEQ_MULTIPLIER, USHRT_MAX)
+#define ipcid_to_idx(id) ((id) & IPCMNI_IDX_MASK)
+#define ipcid_to_seqx(id) ((id) >> IPCMNI_SEQ_SHIFT)
+#define IPCID_SEQ_MAX (INT_MAX >> IPCMNI_SEQ_SHIFT)
/* must be called with ids->rwsem acquired for writing */
int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int);
@@ -123,8 +151,8 @@ static inline int ipc_get_maxidx(struct ipc_ids *ids)
if (ids->in_use == 0)
return -1;
- if (ids->in_use == IPCMNI)
- return IPCMNI - 1;
+ if (ids->in_use == ipc_mni)
+ return ipc_mni - 1;
return ids->max_idx;
}
@@ -219,10 +247,10 @@ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
static inline int sem_check_semmni(struct ipc_namespace *ns) {
/*
- * Check semmni range [0, IPCMNI]
+ * Check semmni range [0, ipc_mni]
* semmni is the last element of sem_ctls[4] array
*/
- return ((ns->sem_ctls[3] < 0) || (ns->sem_ctls[3] > IPCMNI))
+ return ((ns->sem_ctls[3] < 0) || (ns->sem_ctls[3] > ipc_mni))
? -ERANGE : 0;
}
diff --git a/kernel/fork.c b/kernel/fork.c
index 07cddff89c7b..aaed316a1fd6 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -164,10 +164,6 @@ static inline void free_task_struct(struct task_struct *tsk)
}
#endif
-void __weak arch_release_thread_stack(unsigned long *stack)
-{
-}
-
#ifndef CONFIG_ARCH_THREAD_STACK_ALLOCATOR
/*
@@ -417,7 +413,6 @@ static void release_task_stack(struct task_struct *tsk)
return; /* Better to leak the stack than to free prematurely */
account_kernel_stack(tsk, -1);
- arch_release_thread_stack(tsk->stack);
free_thread_stack(tsk);
tsk->stack = NULL;
#ifdef CONFIG_VMAP_STACK
@@ -739,15 +734,16 @@ void __init __weak arch_task_cache_init(void) { }
static void set_max_threads(unsigned int max_threads_suggested)
{
u64 threads;
+ unsigned long nr_pages = totalram_pages();
/*
* The number of threads shall be limited such that the thread
* structures may only consume a small part of the available memory.
*/
- if (fls64(totalram_pages) + fls64(PAGE_SIZE) > 64)
+ if (fls64(nr_pages) + fls64(PAGE_SIZE) > 64)
threads = MAX_THREADS;
else
- threads = div64_u64((u64) totalram_pages * (u64) PAGE_SIZE,
+ threads = div64_u64((u64) nr_pages * (u64) PAGE_SIZE,
(u64) THREAD_SIZE * 8UL);
if (threads > max_threads_suggested)
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 86ef06d3dbe3..d7140447be75 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -152,6 +152,7 @@ int sanity_check_segment_list(struct kimage *image)
int i;
unsigned long nr_segments = image->nr_segments;
unsigned long total_pages = 0;
+ unsigned long nr_pages = totalram_pages();
/*
* Verify we have good destination addresses. The caller is
@@ -217,13 +218,13 @@ int sanity_check_segment_list(struct kimage *image)
* wasted allocating pages, which can cause a soft lockup.
*/
for (i = 0; i < nr_segments; i++) {
- if (PAGE_COUNT(image->segment[i].memsz) > totalram_pages / 2)
+ if (PAGE_COUNT(image->segment[i].memsz) > nr_pages / 2)
return -EINVAL;
total_pages += PAGE_COUNT(image->segment[i].memsz);
}
- if (total_pages > totalram_pages / 2)
+ if (total_pages > nr_pages / 2)
return -EINVAL;
/*
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index b0308a2c6000..640b2034edd6 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -105,7 +105,7 @@ unsigned long image_size;
void __init hibernate_image_size_init(void)
{
- image_size = ((totalram_pages * 2) / 5) * PAGE_SIZE;
+ image_size = ((totalram_pages() * 2) / 5) * PAGE_SIZE;
}
/*
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index f27f20e45547..d31218806bc1 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2071,6 +2071,12 @@ config IO_STRICT_DEVMEM
If in doubt, say Y.
+config DEBUG_AID_FOR_SYZBOT
+ bool "Additional debug code for syzbot"
+ default n
+ help
+ This option is intended for testing by syzbot.
+
source "arch/$(SRCARCH)/Kconfig.debug"
endmenu # Kernel hacking
diff --git a/lib/genalloc.c b/lib/genalloc.c
index ca06adc4f445..5deb25c40a5a 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -311,7 +311,7 @@ unsigned long gen_pool_alloc_algo(struct gen_pool *pool, size_t size,
end_bit = chunk_size(chunk) >> order;
retry:
start_bit = algo(chunk->bits, end_bit, start_bit,
- nbits, data, pool);
+ nbits, data, pool, chunk->start_addr);
if (start_bit >= end_bit)
continue;
remain = bitmap_set_ll(chunk->bits, start_bit, nbits);
@@ -525,7 +525,7 @@ EXPORT_SYMBOL(gen_pool_set_algo);
*/
unsigned long gen_pool_first_fit(unsigned long *map, unsigned long size,
unsigned long start, unsigned int nr, void *data,
- struct gen_pool *pool)
+ struct gen_pool *pool, unsigned long start_addr)
{
return bitmap_find_next_zero_area(map, size, start, nr, 0);
}
@@ -543,16 +543,19 @@ EXPORT_SYMBOL(gen_pool_first_fit);
*/
unsigned long gen_pool_first_fit_align(unsigned long *map, unsigned long size,
unsigned long start, unsigned int nr, void *data,
- struct gen_pool *pool)
+ struct gen_pool *pool, unsigned long start_addr)
{
struct genpool_data_align *alignment;
- unsigned long align_mask;
+ unsigned long align_mask, align_off;
int order;
alignment = data;
order = pool->min_alloc_order;
align_mask = ((alignment->align + (1UL << order) - 1) >> order) - 1;
- return bitmap_find_next_zero_area(map, size, start, nr, align_mask);
+ align_off = (start_addr & (alignment->align - 1)) >> order;
+
+ return bitmap_find_next_zero_area_off(map, size, start, nr,
+ align_mask, align_off);
}
EXPORT_SYMBOL(gen_pool_first_fit_align);
@@ -567,7 +570,7 @@ EXPORT_SYMBOL(gen_pool_first_fit_align);
*/
unsigned long gen_pool_fixed_alloc(unsigned long *map, unsigned long size,
unsigned long start, unsigned int nr, void *data,
- struct gen_pool *pool)
+ struct gen_pool *pool, unsigned long start_addr)
{
struct genpool_data_fixed *fixed_data;
int order;
@@ -601,7 +604,8 @@ EXPORT_SYMBOL(gen_pool_fixed_alloc);
*/
unsigned long gen_pool_first_fit_order_align(unsigned long *map,
unsigned long size, unsigned long start,
- unsigned int nr, void *data, struct gen_pool *pool)
+ unsigned int nr, void *data, struct gen_pool *pool,
+ unsigned long start_addr)
{
unsigned long align_mask = roundup_pow_of_two(nr) - 1;
@@ -624,7 +628,7 @@ EXPORT_SYMBOL(gen_pool_first_fit_order_align);
*/
unsigned long gen_pool_best_fit(unsigned long *map, unsigned long size,
unsigned long start, unsigned int nr, void *data,
- struct gen_pool *pool)
+ struct gen_pool *pool, unsigned long start_addr)
{
unsigned long start_bit = size;
unsigned long len = size + 1;
diff --git a/lib/show_mem.c b/lib/show_mem.c
index 0beaa1d899aa..eefe67d50e84 100644
--- a/lib/show_mem.c
+++ b/lib/show_mem.c
@@ -28,7 +28,7 @@ void show_mem(unsigned int filter, nodemask_t *nodemask)
continue;
total += zone->present_pages;
- reserved += zone->present_pages - zone->managed_pages;
+ reserved += zone->present_pages - zone_managed_pages(zone);
if (is_highmem_idx(zoneid))
highmem += zone->present_pages;
diff --git a/mm/Kconfig b/mm/Kconfig
index d85e39da47ae..25c71eb8a7db 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -291,6 +291,7 @@ config MMU_NOTIFIER
config KSM
bool "Enable KSM for page merging"
depends on MMU
+ select XXHASH
help
Enable Kernel Samepage Merging: KSM periodically scans those areas
of an application's address space that an app has advised may be
diff --git a/mm/debug.c b/mm/debug.c
index cdacba12e09a..d18c5cea3320 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -44,6 +44,7 @@ const struct trace_print_flags vmaflag_names[] = {
void __dump_page(struct page *page, const char *reason)
{
+ struct address_space *mapping = page_mapping(page);
bool page_poisoned = PagePoisoned(page);
int mapcount;
@@ -53,7 +54,7 @@ void __dump_page(struct page *page, const char *reason)
* dump_page() when detected.
*/
if (page_poisoned) {
- pr_emerg("page:%px is uninitialized and poisoned", page);
+ pr_warn("page:%px is uninitialized and poisoned", page);
goto hex_only;
}
@@ -64,15 +65,27 @@ void __dump_page(struct page *page, const char *reason)
*/
mapcount = PageSlab(page) ? 0 : page_mapcount(page);
- pr_emerg("page:%px count:%d mapcount:%d mapping:%px index:%#lx",
+ pr_warn("page:%px count:%d mapcount:%d mapping:%px index:%#lx",
page, page_ref_count(page), mapcount,
page->mapping, page_to_pgoff(page));
if (PageCompound(page))
pr_cont(" compound_mapcount: %d", compound_mapcount(page));
pr_cont("\n");
+ if (PageAnon(page))
+ pr_warn("anon ");
+ else if (PageKsm(page))
+ pr_warn("ksm ");
+ else if (mapping) {
+ pr_warn("%ps ", mapping->a_ops);
+ if (mapping->host->i_dentry.first) {
+ struct dentry *dentry;
+ dentry = container_of(mapping->host->i_dentry.first, struct dentry, d_u.d_alias);
+ pr_warn("name:\"%*s\" ", dentry->d_name.len, dentry->d_name.name);
+ }
+ }
BUILD_BUG_ON(ARRAY_SIZE(pageflag_names) != __NR_PAGEFLAGS + 1);
- pr_emerg("flags: %#lx(%pGp)\n", page->flags, &page->flags);
+ pr_warn("flags: %#lx(%pGp)\n", page->flags, &page->flags);
hex_only:
print_hex_dump(KERN_ALERT, "raw: ", DUMP_PREFIX_NONE, 32,
@@ -80,11 +93,11 @@ hex_only:
sizeof(struct page), false);
if (reason)
- pr_alert("page dumped because: %s\n", reason);
+ pr_warn("page dumped because: %s\n", reason);
#ifdef CONFIG_MEMCG
if (!page_poisoned && page->mem_cgroup)
- pr_alert("page->mem_cgroup:%px\n", page->mem_cgroup);
+ pr_warn("page->mem_cgroup:%px\n", page->mem_cgroup);
#endif
}
diff --git a/mm/highmem.c b/mm/highmem.c
index 59db3223a5d6..107b10f9878e 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -105,9 +105,8 @@ static inline wait_queue_head_t *get_pkmap_wait_queue_head(unsigned int color)
}
#endif
-unsigned long totalhigh_pages __read_mostly;
-EXPORT_SYMBOL(totalhigh_pages);
-
+atomic_long_t _totalhigh_pages __read_mostly;
+EXPORT_SYMBOL(_totalhigh_pages);
EXPORT_PER_CPU_SYMBOL(__kmap_atomic_idx);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 55478ab3c83b..c3072e9b21fb 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -420,7 +420,7 @@ static int __init hugepage_init(void)
* where the extra memory used could hurt more than TLB overhead
* is likely to save. The admin can still enable it through /sys.
*/
- if (totalram_pages < (512 << (20 - PAGE_SHIFT))) {
+ if (totalram_pages() < (512 << (20 - PAGE_SHIFT))) {
transparent_hugepage_flags = 0;
return 0;
}
@@ -558,7 +558,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
return VM_FAULT_FALLBACK;
}
- pgtable = pte_alloc_one(vma->vm_mm, haddr);
+ pgtable = pte_alloc_one(vma->vm_mm);
if (unlikely(!pgtable)) {
ret = VM_FAULT_OOM;
goto release;
@@ -702,7 +702,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
struct page *zero_page;
bool set;
vm_fault_t ret;
- pgtable = pte_alloc_one(vma->vm_mm, haddr);
+ pgtable = pte_alloc_one(vma->vm_mm);
if (unlikely(!pgtable))
return VM_FAULT_OOM;
zero_page = mm_get_huge_zero_page(vma->vm_mm);
@@ -791,7 +791,7 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
return VM_FAULT_SIGBUS;
if (arch_needs_pgtable_deposit()) {
- pgtable = pte_alloc_one(vma->vm_mm, addr);
+ pgtable = pte_alloc_one(vma->vm_mm);
if (!pgtable)
return VM_FAULT_OOM;
}
@@ -927,7 +927,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
if (!vma_is_anonymous(vma))
return 0;
- pgtable = pte_alloc_one(dst_mm, addr);
+ pgtable = pte_alloc_one(dst_mm);
if (unlikely(!pgtable))
goto out;
diff --git a/mm/internal.h b/mm/internal.h
index 291eb2b6d1d8..22c43448a602 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -163,6 +163,7 @@ static inline struct page *pageblock_pfn_to_page(unsigned long start_pfn,
extern int __isolate_free_page(struct page *page, unsigned int order);
extern void memblock_free_pages(struct page *page, unsigned long pfn,
unsigned int order);
+extern void __free_pages_core(struct page *page, unsigned int order);
extern void prep_compound_page(struct page *page, unsigned int order);
extern void post_alloc_hook(struct page *page, unsigned int order,
gfp_t gfp_flags);
@@ -444,6 +445,16 @@ static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn,
#define NODE_RECLAIM_SOME 0
#define NODE_RECLAIM_SUCCESS 1
+#ifdef CONFIG_NUMA
+extern int node_reclaim(struct pglist_data *, gfp_t, unsigned int);
+#else
+static inline int node_reclaim(struct pglist_data *pgdat, gfp_t mask,
+ unsigned int order)
+{
+ return NODE_RECLAIM_NOSCAN;
+}
+#endif
+
extern int hwpoison_filter(struct page *p);
extern u32 hwpoison_filter_dev_major;
diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
index c7550eb65922..5edb8b29a827 100644
--- a/mm/kasan/kasan_init.c
+++ b/mm/kasan/kasan_init.c
@@ -120,7 +120,7 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr,
pte_t *p;
if (slab_is_available())
- p = pte_alloc_one_kernel(&init_mm, addr);
+ p = pte_alloc_one_kernel(&init_mm);
else
p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
if (!p)
diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c
index b209dbaefde8..5835c0f583d3 100644
--- a/mm/kasan/quarantine.c
+++ b/mm/kasan/quarantine.c
@@ -236,7 +236,7 @@ void quarantine_reduce(void)
* Update quarantine size in case of hotplug. Allocate a fraction of
* the installed memory to quarantine minus per-cpu queue limits.
*/
- total_size = (READ_ONCE(totalram_pages) << PAGE_SHIFT) /
+ total_size = (totalram_pages() << PAGE_SHIFT) /
QUARANTINE_FRACTION;
percpu_quarantines = QUARANTINE_PERCPU_SIZE * num_online_cpus();
new_quarantine_size = (total_size < percpu_quarantines) ?
diff --git a/mm/ksm.c b/mm/ksm.c
index 5b0894b45ee5..04ace68ef514 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -25,7 +25,7 @@
#include <linux/pagemap.h>
#include <linux/rmap.h>
#include <linux/spinlock.h>
-#include <linux/jhash.h>
+#include <linux/xxhash.h>
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/wait.h>
@@ -667,7 +667,7 @@ static void remove_node_from_stable_tree(struct stable_node *stable_node)
}
/*
- * get_ksm_page: checks if the page indicated by the stable node
+ * __get_ksm_page: checks if the page indicated by the stable node
* is still its ksm page, despite having held no reference to it.
* In which case we can trust the content of the page, and it
* returns the gotten page; but if the page has now been zapped,
@@ -685,7 +685,8 @@ static void remove_node_from_stable_tree(struct stable_node *stable_node)
* a page to put something that might look like our key in page->mapping.
* is on its way to being freed; but it is an anomaly to bear in mind.
*/
-static struct page *get_ksm_page(struct stable_node *stable_node, bool lock_it)
+static struct page *__get_ksm_page(struct stable_node *stable_node,
+ bool lock_it, bool async)
{
struct page *page;
void *expected_mapping;
@@ -728,7 +729,15 @@ again:
}
if (lock_it) {
- lock_page(page);
+ if (async) {
+ if (!trylock_page(page)) {
+ put_page(page);
+ return ERR_PTR(-EBUSY);
+ }
+ } else {
+ lock_page(page);
+ }
+
if (READ_ONCE(page->mapping) != expected_mapping) {
unlock_page(page);
put_page(page);
@@ -751,6 +760,11 @@ stale:
return NULL;
}
+static struct page *get_ksm_page(struct stable_node *stable_node, bool lock_it)
+{
+ return __get_ksm_page(stable_node, lock_it, false);
+}
+
/*
* Removing rmap_item from stable or unstable tree.
* This function will clean the information from the stable/unstable tree.
@@ -1009,7 +1023,7 @@ static u32 calc_checksum(struct page *page)
{
u32 checksum;
void *addr = kmap_atomic(page);
- checksum = jhash2(addr, PAGE_SIZE / 4, 17);
+ checksum = xxhash(addr, PAGE_SIZE, 0);
kunmap_atomic(addr);
return checksum;
}
@@ -1321,6 +1335,23 @@ static struct page *try_to_merge_two_pages(struct rmap_item *rmap_item,
{
int err;
+ if (IS_ENABLED(CONFIG_COMPACTION)) {
+ unsigned long pfn;
+
+ /*
+ * Find neighbour of @page containing 1-order pair in buddy
+ * allocator and check whether its count is 0. If so, we
+ * consider the neighbour as a free page (this is more
+ * probable than it's freezed via page_ref_freeze()), and
+ * we try to use @tree_page as ksm page and to free @page.
+ */
+ pfn = page_to_pfn(page) ^ 1;
+ if (pfn_valid(pfn) && page_count(pfn_to_page(pfn)) == 0) {
+ swap(rmap_item, tree_rmap_item);
+ swap(page, tree_page);
+ }
+ }
+
err = try_to_merge_with_ksm_page(rmap_item, page, NULL);
if (!err) {
err = try_to_merge_with_ksm_page(tree_rmap_item,
@@ -1675,7 +1706,11 @@ again:
* It would be more elegant to return stable_node
* than kpage, but that involves more changes.
*/
- tree_page = get_ksm_page(stable_node_dup, true);
+ tree_page = __get_ksm_page(stable_node_dup, true, true);
+
+ if (PTR_ERR(tree_page) == -EBUSY)
+ return ERR_PTR(-EBUSY);
+
if (unlikely(!tree_page))
/*
* The tree may have been rebalanced,
@@ -2062,6 +2097,10 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
/* We first start with searching the page inside the stable tree */
kpage = stable_tree_search(page);
+
+ if (PTR_ERR(kpage) == -EBUSY)
+ return;
+
if (kpage == page && rmap_item->head == stable_node) {
put_page(kpage);
return;
diff --git a/mm/memblock.c b/mm/memblock.c
index 9a2d5ae81ae1..f09710644b4b 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1239,6 +1239,69 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
return 0;
}
#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+/**
+ * __next_mem_pfn_range_in_zone - iterator for for_each_*_range_in_zone()
+ *
+ * @idx: pointer to u64 loop variable
+ * @zone: zone in which all of the memory blocks reside
+ * @out_start: ptr to ulong for start pfn of the range, can be %NULL
+ * @out_end: ptr to ulong for end pfn of the range, can be %NULL
+ *
+ * This function is meant to be a zone/pfn specific wrapper for the
+ * for_each_mem_range type iterators. Specifically they are used in the
+ * deferred memory init routines and as such we were duplicating much of
+ * this logic throughout the code. So instead of having it in multiple
+ * locations it seemed like it would make more sense to centralize this to
+ * one new iterator that does everything they need.
+ */
+void __init_memblock
+__next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
+ unsigned long *out_spfn, unsigned long *out_epfn)
+{
+ int zone_nid = zone_to_nid(zone);
+ phys_addr_t spa, epa;
+ int nid;
+
+ __next_mem_range(idx, zone_nid, MEMBLOCK_NONE,
+ &memblock.memory, &memblock.reserved,
+ &spa, &epa, &nid);
+
+ while (*idx != ULLONG_MAX) {
+ unsigned long epfn = PFN_DOWN(epa);
+ unsigned long spfn = PFN_UP(spa);
+
+ /*
+ * Verify the end is at least past the start of the zone and
+ * that we have at least one PFN to initialize.
+ */
+ if (zone->zone_start_pfn < epfn && spfn < epfn) {
+ /* if we went too far just stop searching */
+ if (zone_end_pfn(zone) <= spfn)
+ break;
+
+ if (out_spfn)
+ *out_spfn = max(zone->zone_start_pfn, spfn);
+ if (out_epfn)
+ *out_epfn = min(zone_end_pfn(zone), epfn);
+
+ return;
+ }
+
+ __next_mem_range(idx, zone_nid, MEMBLOCK_NONE,
+ &memblock.memory, &memblock.reserved,
+ &spa, &epa, &nid);
+ }
+
+ /* signal end of iteration */
+ *idx = ULLONG_MAX;
+ if (out_spfn)
+ *out_spfn = ULONG_MAX;
+ if (out_epfn)
+ *out_epfn = 0;
+}
+
+#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
phys_addr_t align, phys_addr_t start,
@@ -1576,7 +1639,7 @@ void __init __memblock_free_late(phys_addr_t base, phys_addr_t size)
for (; cursor < end; cursor++) {
memblock_free_pages(pfn_to_page(cursor), cursor, 0);
- totalram_pages++;
+ totalram_pages_inc();
}
}
@@ -1950,7 +2013,7 @@ void reset_node_managed_pages(pg_data_t *pgdat)
struct zone *z;
for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
- z->managed_pages = 0;
+ atomic_long_set(&z->managed_pages, 0);
}
void __init reset_all_zones_managed_pages(void)
@@ -1978,7 +2041,7 @@ unsigned long __init memblock_free_all(void)
reset_all_zones_managed_pages();
pages = free_low_memory_core_early();
- totalram_pages += pages;
+ totalram_pages_add(pages);
return pages;
}
diff --git a/mm/memfd.c b/mm/memfd.c
index 97264c79d2cd..63fff5e77114 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -131,7 +131,8 @@ static unsigned int *memfd_file_seals_ptr(struct file *file)
#define F_ALL_SEALS (F_SEAL_SEAL | \
F_SEAL_SHRINK | \
F_SEAL_GROW | \
- F_SEAL_WRITE)
+ F_SEAL_WRITE | \
+ F_SEAL_FUTURE_WRITE)
static int memfd_add_seals(struct file *file, unsigned int seals)
{
@@ -200,6 +201,25 @@ static int memfd_add_seals(struct file *file, unsigned int seals)
}
}
+ if ((seals & F_SEAL_FUTURE_WRITE) &&
+ !(*file_seals & F_SEAL_FUTURE_WRITE)) {
+ /*
+ * The FUTURE_WRITE seal also prevents growing and shrinking
+ * so we need them to be already set, or requested now.
+ */
+ int test_seals = (seals | *file_seals) &
+ (F_SEAL_GROW | F_SEAL_SHRINK);
+
+ if (test_seals != (F_SEAL_GROW | F_SEAL_SHRINK)) {
+ error = -EINVAL;
+ goto unlock;
+ }
+
+ spin_lock(&file->f_lock);
+ file->f_mode &= ~(FMODE_WRITE | FMODE_PWRITE);
+ spin_unlock(&file->f_lock);
+ }
+
*file_seals |= seals;
error = 0;
diff --git a/mm/memory.c b/mm/memory.c
index 4ad2d293ddc2..1f7e6eef6ae4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -400,10 +400,10 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
}
}
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd)
{
spinlock_t *ptl;
- pgtable_t new = pte_alloc_one(mm, address);
+ pgtable_t new = pte_alloc_one(mm);
if (!new)
return -ENOMEM;
@@ -434,9 +434,9 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
return 0;
}
-int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
+int __pte_alloc_kernel(pmd_t *pmd)
{
- pte_t *new = pte_alloc_one_kernel(&init_mm, address);
+ pte_t *new = pte_alloc_one_kernel(&init_mm);
if (!new)
return -ENOMEM;
@@ -2698,7 +2698,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
struct swap_info_struct *si = swp_swap_info(entry);
if (si->flags & SWP_SYNCHRONOUS_IO &&
- __swap_count(si, entry) == 1) {
+ __swap_count(entry) == 1) {
/* skip swapcache */
page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma,
vmf->address);
@@ -2808,7 +2808,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
flush_icache_page(vma, page);
if (pte_swp_soft_dirty(vmf->orig_pte))
pte = pte_mksoft_dirty(pte);
- set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte);
vmf->orig_pte = pte;
@@ -2822,6 +2821,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
mem_cgroup_commit_charge(page, memcg, true, false);
activate_page(page);
}
+ set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
swap_free(entry);
if (mem_cgroup_swap_full(page) ||
@@ -2895,7 +2895,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
*
* Here we only have down_read(mmap_sem).
*/
- if (pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))
+ if (pte_alloc(vma->vm_mm, vmf->pmd))
return VM_FAULT_OOM;
/* See the comment in pte_alloc_one_map() */
@@ -3042,7 +3042,7 @@ static vm_fault_t pte_alloc_one_map(struct vm_fault *vmf)
pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
spin_unlock(vmf->ptl);
vmf->prealloc_pte = NULL;
- } else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))) {
+ } else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) {
return VM_FAULT_OOM;
}
map_pte:
@@ -3121,7 +3121,7 @@ static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
* related to pte entry. Use the preallocated table for that.
*/
if (arch_needs_pgtable_deposit() && !vmf->prealloc_pte) {
- vmf->prealloc_pte = pte_alloc_one(vma->vm_mm, vmf->address);
+ vmf->prealloc_pte = pte_alloc_one(vma->vm_mm);
if (!vmf->prealloc_pte)
return VM_FAULT_OOM;
smp_wmb(); /* See comment in __pte_alloc() */
@@ -3359,8 +3359,7 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
start_pgoff + nr_pages - 1);
if (pmd_none(*vmf->pmd)) {
- vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm,
- vmf->address);
+ vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm);
if (!vmf->prealloc_pte)
goto out;
smp_wmb(); /* See comment in __pte_alloc() */
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 2b2b3ccbbfb5..7b4317ae8318 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -46,7 +46,7 @@
* and restore_online_page_callback() for generic callback restore.
*/
-static void generic_online_page(struct page *page);
+static int generic_online_page(struct page *page, unsigned int order);
static online_page_callback_t online_page_callback = generic_online_page;
static DEFINE_MUTEX(online_page_callback_lock);
@@ -655,26 +655,44 @@ void __online_page_free(struct page *page)
}
EXPORT_SYMBOL_GPL(__online_page_free);
-static void generic_online_page(struct page *page)
+static int generic_online_page(struct page *page, unsigned int order)
{
- __online_page_set_limits(page);
- __online_page_increment_counters(page);
- __online_page_free(page);
+ __free_pages_core(page, order);
+ totalram_pages_add(1UL << order);
+#ifdef CONFIG_HIGHMEM
+ if (PageHighMem(page))
+ totalhigh_pages_add(1UL << order);
+#endif
+ return 0;
+}
+
+static int online_pages_blocks(unsigned long start, unsigned long nr_pages)
+{
+ unsigned long end = start + nr_pages;
+ int order, ret, onlined_pages = 0;
+
+ while (start < end) {
+ order = min(MAX_ORDER - 1,
+ get_order(PFN_PHYS(end) - PFN_PHYS(start)));
+
+ ret = (*online_page_callback)(pfn_to_page(start), order);
+ if (!ret)
+ onlined_pages += (1UL << order);
+ else if (ret > 0)
+ onlined_pages += ret;
+
+ start += (1UL << order);
+ }
+ return onlined_pages;
}
static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,
void *arg)
{
- unsigned long i;
unsigned long onlined_pages = *(unsigned long *)arg;
- struct page *page;
if (PageReserved(pfn_to_page(start_pfn)))
- for (i = 0; i < nr_pages; i++) {
- page = pfn_to_page(start_pfn + i);
- (*online_page_callback)(page);
- onlined_pages++;
- }
+ onlined_pages = online_pages_blocks(start_pfn, nr_pages);
online_mem_sections(start_pfn, start_pfn + nr_pages);
@@ -1388,10 +1406,8 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
page_is_file_cache(page));
} else {
-#ifdef CONFIG_DEBUG_VM
- pr_alert("failed to isolate pfn %lx\n", pfn);
+ pr_warn("failed to isolate pfn %lx\n", pfn);
dump_page(page, "isolation failed");
-#endif
put_page(page);
/* Because we don't have big zone->lock. we should
check this again here. */
@@ -1411,8 +1427,14 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
/* Allocate a new page from the nearest neighbor node */
ret = migrate_pages(&source, new_node_page, NULL, 0,
MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
- if (ret)
+ if (ret) {
+ list_for_each_entry(page, &source, lru) {
+ pr_warn("migrating pfn %lx failed: %d",
+ page_to_pfn(page), ret);
+ dump_page(page, NULL);
+ }
putback_movable_pages(&source);
+ }
}
out:
return ret;
@@ -1553,12 +1575,7 @@ static int __ref __offline_pages(unsigned long start_pfn,
unsigned long valid_start, valid_end;
struct zone *zone;
struct memory_notify arg;
-
- /* at least, alignment against pageblock is necessary */
- if (!IS_ALIGNED(start_pfn, pageblock_nr_pages))
- return -EINVAL;
- if (!IS_ALIGNED(end_pfn, pageblock_nr_pages))
- return -EINVAL;
+ char *reason;
mem_hotplug_begin();
@@ -1567,7 +1584,9 @@ static int __ref __offline_pages(unsigned long start_pfn,
if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start,
&valid_end)) {
mem_hotplug_done();
- return -EINVAL;
+ ret = -EINVAL;
+ reason = "multizone range";
+ goto failed_removal;
}
zone = page_zone(pfn_to_page(valid_start));
@@ -1579,7 +1598,8 @@ static int __ref __offline_pages(unsigned long start_pfn,
MIGRATE_MOVABLE, true);
if (ret) {
mem_hotplug_done();
- return ret;
+ reason = "failure to isolate range";
+ goto failed_removal;
}
arg.start_pfn = start_pfn;
@@ -1588,15 +1608,19 @@ static int __ref __offline_pages(unsigned long start_pfn,
ret = memory_notify(MEM_GOING_OFFLINE, &arg);
ret = notifier_to_errno(ret);
- if (ret)
- goto failed_removal;
+ if (ret) {
+ reason = "notifier failure";
+ goto failed_removal_isolated;
+ }
pfn = start_pfn;
repeat:
/* start memory hot removal */
ret = -EINTR;
- if (signal_pending(current))
- goto failed_removal;
+ if (signal_pending(current)) {
+ reason = "signal backoff";
+ goto failed_removal_isolated;
+ }
cond_resched();
lru_add_drain_all();
@@ -1613,8 +1637,10 @@ repeat:
* actually in order to make hugetlbfs's object counting consistent.
*/
ret = dissolve_free_huge_pages(start_pfn, end_pfn);
- if (ret)
- goto failed_removal;
+ if (ret) {
+ reason = "failure to dissolve huge pages";
+ goto failed_removal_isolated;
+ }
/* check again */
offlined_pages = check_pages_isolated(start_pfn, end_pfn);
if (offlined_pages < 0)
@@ -1654,13 +1680,15 @@ repeat:
mem_hotplug_done();
return 0;
+failed_removal_isolated:
+ undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
failed_removal:
- pr_debug("memory offlining [mem %#010llx-%#010llx] failed\n",
+ pr_debug("memory offlining [mem %#010llx-%#010llx] failed due to %s\n",
(unsigned long long) start_pfn << PAGE_SHIFT,
- ((unsigned long long) end_pfn << PAGE_SHIFT) - 1);
+ ((unsigned long long) end_pfn << PAGE_SHIFT) - 1,
+ reason);
memory_notify(MEM_CANCEL_OFFLINE, &arg);
/* pushback to free area */
- undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
mem_hotplug_done();
return ret;
}
@@ -1753,34 +1781,6 @@ static int check_cpu_on_node(pg_data_t *pgdat)
return 0;
}
-static void unmap_cpu_on_node(pg_data_t *pgdat)
-{
-#ifdef CONFIG_ACPI_NUMA
- int cpu;
-
- for_each_possible_cpu(cpu)
- if (cpu_to_node(cpu) == pgdat->node_id)
- numa_clear_node(cpu);
-#endif
-}
-
-static int check_and_unmap_cpu_on_node(pg_data_t *pgdat)
-{
- int ret;
-
- ret = check_cpu_on_node(pgdat);
- if (ret)
- return ret;
-
- /*
- * the node will be offlined when we come here, so we can clear
- * the cpu_to_node() now.
- */
-
- unmap_cpu_on_node(pgdat);
- return 0;
-}
-
/**
* try_offline_node
* @nid: the node ID
@@ -1813,7 +1813,7 @@ void try_offline_node(int nid)
return;
}
- if (check_and_unmap_cpu_on_node(pgdat))
+ if (check_cpu_on_node(pgdat))
return;
/*
diff --git a/mm/migrate.c b/mm/migrate.c
index f7e4bfdc13b7..1777a9327dbf 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2599,7 +2599,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
*
* Here we only have down_read(mmap_sem).
*/
- if (pte_alloc(mm, pmdp, addr))
+ if (pte_alloc(mm, pmdp))
goto abort;
/* See the comment in pte_alloc_one_map() */
diff --git a/mm/mincore.c b/mm/mincore.c
index 4985965aa20a..aa0e542569f9 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -68,8 +68,16 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
*/
if (xa_is_value(page)) {
swp_entry_t swp = radix_to_swp_entry(page);
- page = find_get_page(swap_address_space(swp),
- swp_offset(swp));
+ struct swap_info_struct *si;
+
+ /* Prevent swap device to being swapoff under us */
+ si = get_swap_device(swp);
+ if (si) {
+ page = find_get_page(swap_address_space(swp),
+ swp_offset(swp));
+ put_swap_device(si);
+ } else
+ page = NULL;
}
} else
page = find_get_page(mapping, pgoff);
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 6838a530789b..33917105a3a2 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -146,7 +146,7 @@ static void __meminit mm_compute_batch(void)
s32 batch = max_t(s32, nr*2, 32);
/* batch size set to 0.4% of (total memory/#cpus), or max int32 */
- memsized_batch = min_t(u64, (totalram_pages/nr)/256, 0x7fffffff);
+ memsized_batch = min_t(u64, (totalram_pages()/nr)/256, 0x7fffffff);
vm_committed_as_batch = max_t(s32, memsized_batch, batch);
}
diff --git a/mm/mmap.c b/mm/mmap.c
index 6c04292e16a7..eaba1f93bc0a 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2962,16 +2962,6 @@ out:
return ret;
}
-static inline void verify_mm_writelocked(struct mm_struct *mm)
-{
-#ifdef CONFIG_DEBUG_VM
- if (unlikely(down_read_trylock(&mm->mmap_sem))) {
- WARN_ON(1);
- up_read(&mm->mmap_sem);
- }
-#endif
-}
-
/*
* this is really a simplified "do_mmap". it only handles
* anonymous maps. eventually we may be able to do some
@@ -2999,12 +2989,6 @@ static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long fla
return error;
/*
- * mm->mmap_sem is required to protect against another thread
- * changing the mappings in case we sleep.
- */
- verify_mm_writelocked(mm);
-
- /*
* Clear old maps. this also does some error checking for us
*/
while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 5119ff846769..755466cd289a 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -35,13 +35,6 @@ void mmu_notifier_call_srcu(struct rcu_head *rcu,
}
EXPORT_SYMBOL_GPL(mmu_notifier_call_srcu);
-void mmu_notifier_synchronize(void)
-{
- /* Wait for any running method to finish. */
- srcu_barrier(&srcu);
-}
-EXPORT_SYMBOL_GPL(mmu_notifier_synchronize);
-
/*
* This function can't run concurrently against mmu_notifier_register
* because mm->mm_users > 0 during mmu_notifier_register and exit_mmap
diff --git a/mm/mremap.c b/mm/mremap.c
index 7f9f9180e401..feff311637f1 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -191,6 +191,52 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
drop_rmap_locks(vma);
}
+#ifdef CONFIG_HAVE_MOVE_PMD
+static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
+ unsigned long new_addr, unsigned long old_end,
+ pmd_t *old_pmd, pmd_t *new_pmd)
+{
+ spinlock_t *old_ptl, *new_ptl;
+ struct mm_struct *mm = vma->vm_mm;
+ pmd_t pmd;
+
+ if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
+ || old_end - old_addr < PMD_SIZE)
+ return false;
+
+ /*
+ * The destination pmd shouldn't be established, free_pgtables()
+ * should have release it.
+ */
+ if (WARN_ON(!pmd_none(*new_pmd)))
+ return false;
+
+ /*
+ * We don't have to worry about the ordering of src and dst
+ * ptlocks because exclusive mmap_sem prevents deadlock.
+ */
+ old_ptl = pmd_lock(vma->vm_mm, old_pmd);
+ new_ptl = pmd_lockptr(mm, new_pmd);
+ if (new_ptl != old_ptl)
+ spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
+
+ /* Clear the pmd */
+ pmd = *old_pmd;
+ pmd_clear(old_pmd);
+
+ VM_BUG_ON(!pmd_none(*new_pmd));
+
+ /* Set the new pmd */
+ set_pmd_at(mm, new_addr, new_pmd, pmd);
+ flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
+ if (new_ptl != old_ptl)
+ spin_unlock(new_ptl);
+ spin_unlock(old_ptl);
+
+ return true;
+}
+#endif
+
unsigned long move_page_tables(struct vm_area_struct *vma,
unsigned long old_addr, struct vm_area_struct *new_vma,
unsigned long new_addr, unsigned long len,
@@ -237,8 +283,26 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
split_huge_pmd(vma, old_pmd, old_addr);
if (pmd_trans_unstable(old_pmd))
continue;
+ } else if (extent == PMD_SIZE) {
+#ifdef CONFIG_HAVE_MOVE_PMD
+ /*
+ * If the extent is PMD-sized, try to speed the move by
+ * moving at the PMD level if possible.
+ */
+ bool moved;
+
+ if (need_rmap_locks)
+ take_rmap_locks(vma);
+ moved = move_normal_pmd(vma, old_addr, new_addr,
+ old_end, old_pmd, new_pmd);
+ if (need_rmap_locks)
+ drop_rmap_locks(vma);
+ if (moved)
+ continue;
+#endif
}
- if (pte_alloc(new_vma->vm_mm, new_pmd, new_addr))
+
+ if (pte_alloc(new_vma->vm_mm, new_pmd))
break;
next = (new_addr + PMD_SIZE) & PMD_MASK;
if (extent > next - new_addr)
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 6589f60d5018..21d487749e1d 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -269,7 +269,7 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc)
}
/* Default to all available memory */
- oc->totalpages = totalram_pages + total_swap_pages;
+ oc->totalpages = totalram_pages() + total_swap_pages;
if (!IS_ENABLED(CONFIG_NUMA))
return CONSTRAINT_NONE;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6847177dc4a1..421c5b652708 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -16,6 +16,7 @@
#include <linux/stddef.h>
#include <linux/mm.h>
+#include <linux/highmem.h>
#include <linux/swap.h>
#include <linux/interrupt.h>
#include <linux/pagemap.h>
@@ -121,10 +122,8 @@ nodemask_t node_states[NR_NODE_STATES] __read_mostly = {
};
EXPORT_SYMBOL(node_states);
-/* Protect totalram_pages and zone->managed_pages */
-static DEFINE_SPINLOCK(managed_page_count_lock);
-
-unsigned long totalram_pages __read_mostly;
+atomic_long_t _totalram_pages __read_mostly;
+EXPORT_SYMBOL(_totalram_pages);
unsigned long totalreserve_pages __read_mostly;
unsigned long totalcma_pages __read_mostly;
@@ -1179,7 +1178,7 @@ static void __meminit __init_single_page(struct page *page, unsigned long pfn,
unsigned long zone, int nid)
{
mm_zero_struct_page(page);
- set_page_links(page, zone, nid, pfn);
+ set_page_links(page, zone, nid, pfn, false);
init_page_count(page);
page_mapcount_reset(page);
page_cpupid_reset_last(page);
@@ -1192,6 +1191,90 @@ static void __meminit __init_single_page(struct page *page, unsigned long pfn,
#endif
}
+static void __meminit __init_pageblock(unsigned long start_pfn,
+ unsigned long nr_pages,
+ unsigned long zone, int nid,
+ struct dev_pagemap *pgmap,
+ bool is_reserved)
+{
+ unsigned long nr_pgmask = pageblock_nr_pages - 1;
+ struct page *start_page = pfn_to_page(start_pfn);
+ unsigned long pfn = start_pfn + nr_pages - 1;
+#ifdef WANT_PAGE_VIRTUAL
+ bool is_highmem = is_highmem_idx(zone);
+#endif
+ struct page *page;
+
+ /*
+ * Enforce the following requirements:
+ * size > 0
+ * size < pageblock_nr_pages
+ * start_pfn -> pfn does not cross pageblock_nr_pages boundary
+ */
+ VM_BUG_ON(((start_pfn ^ pfn) | (nr_pages - 1)) > nr_pgmask);
+
+ /*
+ * Work from highest page to lowest, this way we will still be
+ * warm in the cache when we call set_pageblock_migratetype
+ * below.
+ *
+ * The loop is based around the page pointer as the main index
+ * instead of the pfn because pfn is not used inside the loop if
+ * the section number is not in page flags and WANT_PAGE_VIRTUAL
+ * is not defined.
+ */
+ for (page = start_page + nr_pages; page-- != start_page; pfn--) {
+ mm_zero_struct_page(page);
+
+ /*
+ * We use the start_pfn instead of pfn in the set_page_links
+ * call because of the fact that the pfn number is used to
+ * get the section_nr and this function should not be
+ * spanning more than a single section.
+ *
+ * We can use a non-atomic operation for setting the
+ * PG_reserved flag as we are still initializing the pages.
+ */
+ set_page_links(page, zone, nid, start_pfn, is_reserved);
+ init_page_count(page);
+ page_mapcount_reset(page);
+ page_cpupid_reset_last(page);
+
+ /*
+ * ZONE_DEVICE pages union ->lru with a ->pgmap back
+ * pointer and hmm_data. It is a bug if a ZONE_DEVICE
+ * page is ever freed or placed on a driver-private list.
+ */
+ page->pgmap = pgmap;
+ if (!pgmap)
+ INIT_LIST_HEAD(&page->lru);
+
+#ifdef WANT_PAGE_VIRTUAL
+ /* The shift won't overflow because ZONE_NORMAL is below 4G. */
+ if (!is_highmem)
+ set_page_address(page, __va(pfn << PAGE_SHIFT));
+#endif
+ }
+
+ /*
+ * Mark the block movable so that blocks are reserved for
+ * movable at startup. This will force kernel allocations
+ * to reserve their blocks rather than leaking throughout
+ * the address space during boot when many long-lived
+ * kernel allocations are made.
+ *
+ * bitmap is created for zone's valid pfn range. but memmap
+ * can be created for invalid pages (for alignment)
+ * check here not to call set_pageblock_migratetype() against
+ * pfn out of zone.
+ *
+ * Please note that MEMMAP_HOTPLUG path doesn't clear memmap
+ * because this is done early in sparse_add_one_section
+ */
+ if (!(start_pfn & nr_pgmask))
+ set_pageblock_migratetype(start_page, MIGRATE_MOVABLE);
+}
+
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
static void __meminit init_reserved_page(unsigned long pfn)
{
@@ -1264,22 +1347,18 @@ static void __free_pages_ok(struct page *page, unsigned int order)
local_irq_restore(flags);
}
-static void __init __free_pages_boot_core(struct page *page, unsigned int order)
+void __free_pages_core(struct page *page, unsigned int order)
{
unsigned int nr_pages = 1 << order;
struct page *p = page;
unsigned int loop;
- prefetchw(p);
- for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
- prefetchw(p + 1);
+ for (loop = 0; loop < nr_pages ; loop++, p++) {
__ClearPageReserved(p);
set_page_count(p, 0);
}
- __ClearPageReserved(p);
- set_page_count(p, 0);
- page_zone(page)->managed_pages += nr_pages;
+ atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
set_page_refcounted(page);
__free_pages(page, order);
}
@@ -1305,36 +1384,22 @@ int __meminit early_pfn_to_nid(unsigned long pfn)
#endif
#ifdef CONFIG_NODES_SPAN_OTHER_NODES
-static inline bool __meminit __maybe_unused
-meminit_pfn_in_nid(unsigned long pfn, int node,
- struct mminit_pfnnid_cache *state)
+/* Only safe to use early in boot when initialisation is single-threaded */
+static inline bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
{
int nid;
- nid = __early_pfn_to_nid(pfn, state);
+ nid = __early_pfn_to_nid(pfn, &early_pfnnid_cache);
if (nid >= 0 && nid != node)
return false;
return true;
}
-/* Only safe to use early in boot when initialisation is single-threaded */
-static inline bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
-{
- return meminit_pfn_in_nid(pfn, node, &early_pfnnid_cache);
-}
-
#else
-
static inline bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
{
return true;
}
-static inline bool __meminit __maybe_unused
-meminit_pfn_in_nid(unsigned long pfn, int node,
- struct mminit_pfnnid_cache *state)
-{
- return true;
-}
#endif
@@ -1343,7 +1408,7 @@ void __init memblock_free_pages(struct page *page, unsigned long pfn,
{
if (early_page_uninitialised(pfn))
return;
- return __free_pages_boot_core(page, order);
+ __free_pages_core(page, order);
}
/*
@@ -1418,31 +1483,6 @@ void clear_zone_contiguous(struct zone *zone)
}
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
-static void __init deferred_free_range(unsigned long pfn,
- unsigned long nr_pages)
-{
- struct page *page;
- unsigned long i;
-
- if (!nr_pages)
- return;
-
- page = pfn_to_page(pfn);
-
- /* Free a large naturally-aligned chunk if possible */
- if (nr_pages == pageblock_nr_pages &&
- (pfn & (pageblock_nr_pages - 1)) == 0) {
- set_pageblock_migratetype(page, MIGRATE_MOVABLE);
- __free_pages_boot_core(page, pageblock_order);
- return;
- }
-
- for (i = 0; i < nr_pages; i++, page++, pfn++) {
- if ((pfn & (pageblock_nr_pages - 1)) == 0)
- set_pageblock_migratetype(page, MIGRATE_MOVABLE);
- __free_pages_boot_core(page, 0);
- }
-}
/* Completion tracking for deferred_init_memmap() threads */
static atomic_t pgdat_init_n_undone __initdata;
@@ -1455,57 +1495,77 @@ static inline void __init pgdat_init_report_one_done(void)
}
/*
- * Returns true if page needs to be initialized or freed to buddy allocator.
+ * Returns count if page range needs to be initialized or freed
*
- * First we check if pfn is valid on architectures where it is possible to have
- * holes within pageblock_nr_pages. On systems where it is not possible, this
- * function is optimized out.
+ * First, we check if a current large page is valid by only checking the
+ * validity of the head pfn.
*
- * Then, we check if a current large page is valid by only checking the validity
- * of the head pfn.
- *
- * Finally, meminit_pfn_in_nid is checked on systems where pfns can interleave
- * within a node: a pfn is between start and end of a node, but does not belong
- * to this memory node.
+ * Then we check if the contiguous pfns are valid on architectures where it
+ * is possible to have holes within pageblock_nr_pages. On systems where it
+ * is not possible, this function is optimized out.
*/
-static inline bool __init
-deferred_pfn_valid(int nid, unsigned long pfn,
- struct mminit_pfnnid_cache *nid_init_state)
+static unsigned long __next_pfn_valid_range(unsigned long *i,
+ unsigned long end_pfn)
{
- if (!pfn_valid_within(pfn))
- return false;
- if (!(pfn & (pageblock_nr_pages - 1)) && !pfn_valid(pfn))
- return false;
- if (!meminit_pfn_in_nid(pfn, nid, nid_init_state))
- return false;
- return true;
+ unsigned long pfn = *i;
+ unsigned long count;
+
+ while (pfn < end_pfn) {
+ unsigned long t = ALIGN(pfn + 1, pageblock_nr_pages);
+ unsigned long pageblock_pfn = min(t, end_pfn);
+
+#ifndef CONFIG_HOLES_IN_ZONE
+ count = pageblock_pfn - pfn;
+ pfn = pageblock_pfn;
+ if (!pfn_valid(pfn))
+ continue;
+#else
+ for (count = 0; pfn < pageblock_pfn; pfn++) {
+ if (pfn_valid_within(pfn)) {
+ count++;
+ continue;
+ }
+
+ if (count)
+ break;
+ }
+
+ if (!count)
+ continue;
+#endif
+ *i = pfn;
+ return count;
+ }
+
+ return 0;
}
+#define for_each_deferred_pfn_valid_range(i, start_pfn, end_pfn, pfn, count) \
+ for (i = (start_pfn), \
+ count = __next_pfn_valid_range(&i, (end_pfn)); \
+ count && ({ pfn = i - count; 1; }); \
+ count = __next_pfn_valid_range(&i, (end_pfn)))
/*
* Free pages to buddy allocator. Try to free aligned pages in
* pageblock_nr_pages sizes.
*/
-static void __init deferred_free_pages(int nid, int zid, unsigned long pfn,
+static void __init deferred_free_pages(unsigned long start_pfn,
unsigned long end_pfn)
{
- struct mminit_pfnnid_cache nid_init_state = { };
- unsigned long nr_pgmask = pageblock_nr_pages - 1;
- unsigned long nr_free = 0;
-
- for (; pfn < end_pfn; pfn++) {
- if (!deferred_pfn_valid(nid, pfn, &nid_init_state)) {
- deferred_free_range(pfn - nr_free, nr_free);
- nr_free = 0;
- } else if (!(pfn & nr_pgmask)) {
- deferred_free_range(pfn - nr_free, nr_free);
- nr_free = 1;
- touch_nmi_watchdog();
+ unsigned long i, pfn, count;
+
+ for_each_deferred_pfn_valid_range(i, start_pfn, end_pfn, pfn, count) {
+ struct page *page = pfn_to_page(pfn);
+
+ if (count == pageblock_nr_pages) {
+ __free_pages_core(page, pageblock_order);
} else {
- nr_free++;
+ while (count--)
+ __free_pages_core(page++, 0);
}
+
+ touch_nmi_watchdog();
}
- /* Free the last block of pages to allocator */
- deferred_free_range(pfn - nr_free, nr_free);
}
/*
@@ -1513,43 +1573,121 @@ static void __init deferred_free_pages(int nid, int zid, unsigned long pfn,
* by performing it only once every pageblock_nr_pages.
* Return number of pages initialized.
*/
-static unsigned long __init deferred_init_pages(int nid, int zid,
- unsigned long pfn,
+static unsigned long __init deferred_init_pages(struct zone *zone,
+ unsigned long start_pfn,
unsigned long end_pfn)
{
- struct mminit_pfnnid_cache nid_init_state = { };
- unsigned long nr_pgmask = pageblock_nr_pages - 1;
+ unsigned long i, pfn, count;
+ int nid = zone_to_nid(zone);
unsigned long nr_pages = 0;
- struct page *page = NULL;
+ int zid = zone_idx(zone);
+
+ for_each_deferred_pfn_valid_range(i, start_pfn, end_pfn, pfn, count) {
+ nr_pages += count;
+ __init_pageblock(pfn, count, zid, nid, NULL, false);
+
+ touch_nmi_watchdog();
+ }
- for (; pfn < end_pfn; pfn++) {
- if (!deferred_pfn_valid(nid, pfn, &nid_init_state)) {
- page = NULL;
+ return nr_pages;
+}
+
+/*
+ * This function is meant to pre-load the iterator for the zone init.
+ * Specifically it walks through the ranges until we are caught up to the
+ * first_init_pfn value and exits there. If we never encounter the value we
+ * return false indicating there are no valid ranges left.
+ */
+static bool __init
+deferred_init_mem_pfn_range_in_zone(u64 *i, struct zone *zone,
+ unsigned long *spfn, unsigned long *epfn,
+ unsigned long first_init_pfn)
+{
+ u64 j;
+
+ /*
+ * Start out by walking through the ranges in this zone that have
+ * already been initialized. We don't need to do anything with them
+ * so we just need to flush them out of the system.
+ */
+ for_each_free_mem_pfn_range_in_zone(j, zone, spfn, epfn) {
+ if (*epfn <= first_init_pfn)
continue;
- } else if (!page || !(pfn & nr_pgmask)) {
- page = pfn_to_page(pfn);
- touch_nmi_watchdog();
- } else {
- page++;
- }
- __init_single_page(page, pfn, zid, nid);
- nr_pages++;
+ if (*spfn < first_init_pfn)
+ *spfn = first_init_pfn;
+ *i = j;
+ return true;
}
- return (nr_pages);
+
+ return false;
+}
+
+/*
+ * Initialize and free pages. We do it in two loops: first we initialize
+ * struct page, than free to buddy allocator, because while we are
+ * freeing pages we can access pages that are ahead (computing buddy
+ * page in __free_one_page()).
+ *
+ * In order to try and keep some memory in the cache we have the loop
+ * broken along max page order boundaries. This way we will not cause
+ * any issues with the buddy page computation.
+ */
+static unsigned long __init
+deferred_init_maxorder(u64 *i, struct zone *zone, unsigned long *start_pfn,
+ unsigned long *end_pfn)
+{
+ unsigned long mo_pfn = ALIGN(*start_pfn + 1, MAX_ORDER_NR_PAGES);
+ unsigned long spfn = *start_pfn, epfn = *end_pfn;
+ unsigned long nr_pages = 0;
+ u64 j = *i;
+
+ /* First we loop through and initialize the page values */
+ for_each_free_mem_pfn_range_in_zone_from(j, zone, &spfn, &epfn) {
+ unsigned long t;
+
+ if (mo_pfn <= spfn)
+ break;
+
+ t = min(mo_pfn, epfn);
+ nr_pages += deferred_init_pages(zone, spfn, t);
+
+ if (mo_pfn <= epfn)
+ break;
+ }
+
+ /* Reset values and now loop through freeing pages as needed */
+ j = *i;
+
+ for_each_free_mem_pfn_range_in_zone_from(j, zone, start_pfn, end_pfn) {
+ unsigned long t;
+
+ if (mo_pfn <= *start_pfn)
+ break;
+
+ t = min(mo_pfn, *end_pfn);
+ deferred_free_pages(*start_pfn, t);
+ *start_pfn = t;
+
+ if (mo_pfn < *end_pfn)
+ break;
+ }
+
+ /* Store our current values to be reused on the next iteration */
+ *i = j;
+
+ return nr_pages;
}
/* Initialise remaining memory on a node */
static int __init deferred_init_memmap(void *data)
{
pg_data_t *pgdat = data;
- int nid = pgdat->node_id;
+ const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
+ unsigned long spfn = 0, epfn = 0, nr_pages = 0;
+ unsigned long first_init_pfn, flags;
unsigned long start = jiffies;
- unsigned long nr_pages = 0;
- unsigned long spfn, epfn, first_init_pfn, flags;
- phys_addr_t spa, epa;
- int zid;
struct zone *zone;
- const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
+ int zid;
u64 i;
/* Bind memory initialisation thread to a local node if possible */
@@ -1575,31 +1713,30 @@ static int __init deferred_init_memmap(void *data)
if (first_init_pfn < zone_end_pfn(zone))
break;
}
- first_init_pfn = max(zone->zone_start_pfn, first_init_pfn);
+
+ /* If the zone is empty somebody else may have cleared out the zone */
+ if (!deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn,
+ first_init_pfn)) {
+ pgdat_resize_unlock(pgdat, &flags);
+ pgdat_init_report_one_done();
+ return 0;
+ }
/*
- * Initialize and free pages. We do it in two loops: first we initialize
- * struct page, than free to buddy allocator, because while we are
- * freeing pages we can access pages that are ahead (computing buddy
- * page in __free_one_page()).
+ * Initialize and free pages in MAX_ORDER sized increments so
+ * that we can avoid introducing any issues with the buddy
+ * allocator.
*/
- for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) {
- spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa));
- epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa));
- nr_pages += deferred_init_pages(nid, zid, spfn, epfn);
- }
- for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) {
- spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa));
- epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa));
- deferred_free_pages(nid, zid, spfn, epfn);
- }
+ while (spfn < epfn)
+ nr_pages += deferred_init_maxorder(&i, zone, &spfn, &epfn);
+
pgdat_resize_unlock(pgdat, &flags);
/* Sanity check that the next zone really is unpopulated */
WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone));
- pr_info("node %d initialised, %lu pages in %ums\n", nid, nr_pages,
- jiffies_to_msecs(jiffies - start));
+ pr_info("node %d initialised, %lu pages in %ums\n",
+ pgdat->node_id, nr_pages, jiffies_to_msecs(jiffies - start));
pgdat_init_report_one_done();
return 0;
@@ -1630,14 +1767,11 @@ static DEFINE_STATIC_KEY_TRUE(deferred_pages);
static noinline bool __init
deferred_grow_zone(struct zone *zone, unsigned int order)
{
- int zid = zone_idx(zone);
- int nid = zone_to_nid(zone);
- pg_data_t *pgdat = NODE_DATA(nid);
unsigned long nr_pages_needed = ALIGN(1 << order, PAGES_PER_SECTION);
- unsigned long nr_pages = 0;
- unsigned long first_init_pfn, spfn, epfn, t, flags;
+ pg_data_t *pgdat = zone->zone_pgdat;
unsigned long first_deferred_pfn = pgdat->first_deferred_pfn;
- phys_addr_t spa, epa;
+ unsigned long spfn, epfn, flags;
+ unsigned long nr_pages = 0;
u64 i;
/* Only the last zone may have deferred pages */
@@ -1666,37 +1800,23 @@ deferred_grow_zone(struct zone *zone, unsigned int order)
return true;
}
- first_init_pfn = max(zone->zone_start_pfn, first_deferred_pfn);
-
- if (first_init_pfn >= pgdat_end_pfn(pgdat)) {
+ /* If the zone is empty somebody else may have cleared out the zone */
+ if (!deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn,
+ first_deferred_pfn)) {
pgdat_resize_unlock(pgdat, &flags);
- return false;
+ return true;
}
- for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) {
- spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa));
- epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa));
-
- while (spfn < epfn && nr_pages < nr_pages_needed) {
- t = ALIGN(spfn + PAGES_PER_SECTION, PAGES_PER_SECTION);
- first_deferred_pfn = min(t, epfn);
- nr_pages += deferred_init_pages(nid, zid, spfn,
- first_deferred_pfn);
- spfn = first_deferred_pfn;
- }
-
- if (nr_pages >= nr_pages_needed)
- break;
+ /*
+ * Initialize and free pages in MAX_ORDER sized increments so
+ * that we can avoid introducing any issues with the buddy
+ * allocator.
+ */
+ while (spfn < epfn && nr_pages < nr_pages_needed) {
+ nr_pages += deferred_init_maxorder(&i, zone, &spfn, &epfn);
+ first_deferred_pfn = spfn;
}
- for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) {
- spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa));
- epfn = min_t(unsigned long, first_deferred_pfn, PFN_DOWN(epa));
- deferred_free_pages(nid, zid, spfn, epfn);
-
- if (first_deferred_pfn == epfn)
- break;
- }
pgdat->first_deferred_pfn = first_deferred_pfn;
pgdat_resize_unlock(pgdat, &flags);
@@ -2258,7 +2378,7 @@ static void reserve_highatomic_pageblock(struct page *page, struct zone *zone,
* Limit the number reserved to 1 pageblock or roughly 1% of a zone.
* Check is race-prone but harmless.
*/
- max_managed = (zone->managed_pages / 100) + pageblock_nr_pages;
+ max_managed = (zone_managed_pages(zone) / 100) + pageblock_nr_pages;
if (zone->nr_reserved_highatomic >= max_managed)
return;
@@ -4660,7 +4780,7 @@ static unsigned long nr_free_zone_pages(int offset)
struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL);
for_each_zone_zonelist(zone, z, zonelist, offset) {
- unsigned long size = zone->managed_pages;
+ unsigned long size = zone_managed_pages(zone);
unsigned long high = high_wmark_pages(zone);
if (size > high)
sum += size - high;
@@ -4746,11 +4866,11 @@ EXPORT_SYMBOL_GPL(si_mem_available);
void si_meminfo(struct sysinfo *val)
{
- val->totalram = totalram_pages;
+ val->totalram = totalram_pages();
val->sharedram = global_node_page_state(NR_SHMEM);
val->freeram = global_zone_page_state(NR_FREE_PAGES);
val->bufferram = nr_blockdev_pages();
- val->totalhigh = totalhigh_pages;
+ val->totalhigh = totalhigh_pages();
val->freehigh = nr_free_highpages();
val->mem_unit = PAGE_SIZE;
}
@@ -4767,7 +4887,7 @@ void si_meminfo_node(struct sysinfo *val, int nid)
pg_data_t *pgdat = NODE_DATA(nid);
for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
- managed_pages += pgdat->node_zones[zone_type].managed_pages;
+ managed_pages += zone_managed_pages(&pgdat->node_zones[zone_type]);
val->totalram = managed_pages;
val->sharedram = node_page_state(pgdat, NR_SHMEM);
val->freeram = sum_zone_node_page_state(nid, NR_FREE_PAGES);
@@ -4776,7 +4896,7 @@ void si_meminfo_node(struct sysinfo *val, int nid)
struct zone *zone = &pgdat->node_zones[zone_type];
if (is_highmem(zone)) {
- managed_highpages += zone->managed_pages;
+ managed_highpages += zone_managed_pages(zone);
free_highpages += zone_page_state(zone, NR_FREE_PAGES);
}
}
@@ -4983,7 +5103,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
K(zone_page_state(zone, NR_ZONE_UNEVICTABLE)),
K(zone_page_state(zone, NR_ZONE_WRITE_PENDING)),
K(zone->present_pages),
- K(zone->managed_pages),
+ K(zone_managed_pages(zone)),
K(zone_page_state(zone, NR_MLOCK)),
zone_page_state(zone, NR_KERNEL_STACK_KB),
K(zone_page_state(zone, NR_PAGETABLE)),
@@ -5471,6 +5591,36 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn)
return false;
}
+static void __meminit __memmap_init_hotplug(unsigned long size, int nid,
+ unsigned long zone,
+ unsigned long start_pfn,
+ struct dev_pagemap *pgmap)
+{
+ unsigned long pfn = start_pfn + size;
+
+ while (pfn != start_pfn) {
+ unsigned long stride = pfn;
+
+ pfn = max(ALIGN_DOWN(pfn - 1, pageblock_nr_pages), start_pfn);
+ stride -= pfn;
+
+ /*
+ * The last argument of __init_pageblock is a boolean
+ * value indicating if the page will be marked as reserved.
+ *
+ * Mark page reserved as it will need to wait for onlining
+ * phase for it to be fully associated with a zone.
+ *
+ * Under certain circumstances ZONE_DEVICE pages may not
+ * need to be marked as reserved, however there is still
+ * code that is depending on this being set for now.
+ */
+ __init_pageblock(pfn, stride, zone, nid, pgmap, true);
+
+ cond_resched();
+ }
+}
+
/*
* Initially all pages are reserved - free ones are freed
* up by memblock_free_all() once the early boot process is
@@ -5481,49 +5631,59 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
struct vmem_altmap *altmap)
{
unsigned long pfn, end_pfn = start_pfn + size;
- struct page *page;
if (highest_memmap_pfn < end_pfn - 1)
highest_memmap_pfn = end_pfn - 1;
+ if (context == MEMMAP_HOTPLUG) {
#ifdef CONFIG_ZONE_DEVICE
- /*
- * Honor reservation requested by the driver for this ZONE_DEVICE
- * memory. We limit the total number of pages to initialize to just
- * those that might contain the memory mapping. We will defer the
- * ZONE_DEVICE page initialization until after we have released
- * the hotplug lock.
- */
- if (zone == ZONE_DEVICE) {
- if (!altmap)
- return;
+ /*
+ * Honor reservation requested by the driver for this
+ * ZONE_DEVICE memory. We limit the total number of pages to
+ * initialize to just those that might contain the memory
+ * mapping. We will defer the ZONE_DEVICE page initialization
+ * until after we have released the hotplug lock.
+ */
+ if (zone == ZONE_DEVICE) {
+ if (!altmap)
+ return;
+
+ if (start_pfn == altmap->base_pfn)
+ start_pfn += altmap->reserve;
+ end_pfn = altmap->base_pfn +
+ vmem_altmap_offset(altmap);
+ }
+#endif
+ /*
+ * For these ZONE_DEVICE pages we don't need to record the
+ * pgmap as they should represent only those pages used to
+ * store the memory map. The actual ZONE_DEVICE pages will
+ * be initialized later.
+ */
+ __memmap_init_hotplug(end_pfn - start_pfn, nid, zone,
+ start_pfn, NULL);
- if (start_pfn == altmap->base_pfn)
- start_pfn += altmap->reserve;
- end_pfn = altmap->base_pfn + vmem_altmap_offset(altmap);
+ return;
}
-#endif
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
+ struct page *page;
+
/*
* There can be holes in boot-time mem_map[]s handed to this
* function. They do not exist on hotplugged memory.
*/
- if (context == MEMMAP_EARLY) {
- if (!early_pfn_valid(pfn))
- continue;
- if (!early_pfn_in_nid(pfn, nid))
- continue;
- if (overlap_memmap_init(zone, &pfn))
- continue;
- if (defer_init(nid, pfn, end_pfn))
- break;
- }
+ if (!early_pfn_valid(pfn))
+ continue;
+ if (!early_pfn_in_nid(pfn, nid))
+ continue;
+ if (overlap_memmap_init(zone, &pfn))
+ continue;
+ if (defer_init(nid, pfn, end_pfn))
+ break;
page = pfn_to_page(pfn);
__init_single_page(page, pfn, zone, nid);
- if (context == MEMMAP_HOTPLUG)
- __SetPageReserved(page);
/*
* Mark the block movable so that blocks are reserved for
@@ -5550,7 +5710,6 @@ void __ref memmap_init_zone_device(struct zone *zone,
unsigned long size,
struct dev_pagemap *pgmap)
{
- unsigned long pfn, end_pfn = start_pfn + size;
struct pglist_data *pgdat = zone->zone_pgdat;
unsigned long zone_idx = zone_idx(zone);
unsigned long start = jiffies;
@@ -5566,53 +5725,13 @@ void __ref memmap_init_zone_device(struct zone *zone,
*/
if (pgmap->altmap_valid) {
struct vmem_altmap *altmap = &pgmap->altmap;
+ unsigned long end_pfn = start_pfn + size;
start_pfn = altmap->base_pfn + vmem_altmap_offset(altmap);
size = end_pfn - start_pfn;
}
- for (pfn = start_pfn; pfn < end_pfn; pfn++) {
- struct page *page = pfn_to_page(pfn);
-
- __init_single_page(page, pfn, zone_idx, nid);
-
- /*
- * Mark page reserved as it will need to wait for onlining
- * phase for it to be fully associated with a zone.
- *
- * We can use the non-atomic __set_bit operation for setting
- * the flag as we are still initializing the pages.
- */
- __SetPageReserved(page);
-
- /*
- * ZONE_DEVICE pages union ->lru with a ->pgmap back
- * pointer and hmm_data. It is a bug if a ZONE_DEVICE
- * page is ever freed or placed on a driver-private list.
- */
- page->pgmap = pgmap;
- page->hmm_data = 0;
-
- /*
- * Mark the block movable so that blocks are reserved for
- * movable at startup. This will force kernel allocations
- * to reserve their blocks rather than leaking throughout
- * the address space during boot when many long-lived
- * kernel allocations are made.
- *
- * bitmap is created for zone's valid pfn range. but memmap
- * can be created for invalid pages (for alignment)
- * check here not to call set_pageblock_migratetype() against
- * pfn out of zone.
- *
- * Please note that MEMMAP_HOTPLUG path doesn't clear memmap
- * because this is done early in sparse_add_one_section
- */
- if (!(pfn & (pageblock_nr_pages - 1))) {
- set_pageblock_migratetype(page, MIGRATE_MOVABLE);
- cond_resched();
- }
- }
+ __memmap_init_hotplug(size, nid, zone_idx, start_pfn, pgmap);
pr_info("%s initialised, %lu pages in %ums\n", dev_name(pgmap->dev),
size, jiffies_to_msecs(jiffies - start));
@@ -5643,7 +5762,7 @@ static int zone_batchsize(struct zone *zone)
* The per-cpu-pages pools are set to around 1000th of the
* size of the zone.
*/
- batch = zone->managed_pages / 1024;
+ batch = zone_managed_pages(zone) / 1024;
/* But no more than a meg. */
if (batch * PAGE_SIZE > 1024 * 1024)
batch = (1024 * 1024) / PAGE_SIZE;
@@ -5724,7 +5843,6 @@ static void pageset_init(struct per_cpu_pageset *p)
memset(p, 0, sizeof(*p));
pcp = &p->pcp;
- pcp->count = 0;
for (migratetype = 0; migratetype < MIGRATE_PCPTYPES; migratetype++)
INIT_LIST_HEAD(&pcp->lists[migratetype]);
}
@@ -5754,7 +5872,7 @@ static void pageset_set_high_and_batch(struct zone *zone,
{
if (percpu_pagelist_fraction)
pageset_set_high(pcp,
- (zone->managed_pages /
+ (zone_managed_pages(zone) /
percpu_pagelist_fraction));
else
pageset_set_batch(pcp, zone_batchsize(zone));
@@ -6309,7 +6427,7 @@ static void __meminit pgdat_init_internals(struct pglist_data *pgdat)
static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx, int nid,
unsigned long remaining_pages)
{
- zone->managed_pages = remaining_pages;
+ atomic_long_set(&zone->managed_pages, remaining_pages);
zone_set_nid(zone, nid);
zone->name = zone_names[idx];
zone->zone_pgdat = NODE_DATA(nid);
@@ -7061,14 +7179,12 @@ early_param("movablecore", cmdline_parse_movablecore);
void adjust_managed_page_count(struct page *page, long count)
{
- spin_lock(&managed_page_count_lock);
- page_zone(page)->managed_pages += count;
- totalram_pages += count;
+ atomic_long_add(count, &page_zone(page)->managed_pages);
+ totalram_pages_add(count);
#ifdef CONFIG_HIGHMEM
if (PageHighMem(page))
- totalhigh_pages += count;
+ totalhigh_pages_add(count);
#endif
- spin_unlock(&managed_page_count_lock);
}
EXPORT_SYMBOL(adjust_managed_page_count);
@@ -7109,9 +7225,9 @@ EXPORT_SYMBOL(free_reserved_area);
void free_highmem_page(struct page *page)
{
__free_reserved_page(page);
- totalram_pages++;
- page_zone(page)->managed_pages++;
- totalhigh_pages++;
+ totalram_pages_inc();
+ atomic_long_inc(&page_zone(page)->managed_pages);
+ totalhigh_pages_inc();
}
#endif
@@ -7160,10 +7276,10 @@ void __init mem_init_print_info(const char *str)
physpages << (PAGE_SHIFT - 10),
codesize >> 10, datasize >> 10, rosize >> 10,
(init_data_size + init_code_size) >> 10, bss_size >> 10,
- (physpages - totalram_pages - totalcma_pages) << (PAGE_SHIFT - 10),
+ (physpages - totalram_pages() - totalcma_pages) << (PAGE_SHIFT - 10),
totalcma_pages << (PAGE_SHIFT - 10),
#ifdef CONFIG_HIGHMEM
- totalhigh_pages << (PAGE_SHIFT - 10),
+ totalhigh_pages() << (PAGE_SHIFT - 10),
#endif
str ? ", " : "", str ? str : "");
}
@@ -7243,6 +7359,7 @@ static void calculate_totalreserve_pages(void)
for (i = 0; i < MAX_NR_ZONES; i++) {
struct zone *zone = pgdat->node_zones + i;
long max = 0;
+ unsigned long managed_pages = zone_managed_pages(zone);
/* Find valid and maximum lowmem_reserve in the zone */
for (j = i; j < MAX_NR_ZONES; j++) {
@@ -7253,8 +7370,8 @@ static void calculate_totalreserve_pages(void)
/* we treat the high watermark as reserved pages. */
max += high_wmark_pages(zone);
- if (max > zone->managed_pages)
- max = zone->managed_pages;
+ if (max > managed_pages)
+ max = managed_pages;
pgdat->totalreserve_pages += max;
@@ -7278,7 +7395,7 @@ static void setup_per_zone_lowmem_reserve(void)
for_each_online_pgdat(pgdat) {
for (j = 0; j < MAX_NR_ZONES; j++) {
struct zone *zone = pgdat->node_zones + j;
- unsigned long managed_pages = zone->managed_pages;
+ unsigned long managed_pages = zone_managed_pages(zone);
zone->lowmem_reserve[j] = 0;
@@ -7296,7 +7413,7 @@ static void setup_per_zone_lowmem_reserve(void)
lower_zone->lowmem_reserve[j] =
managed_pages / sysctl_lowmem_reserve_ratio[idx];
}
- managed_pages += lower_zone->managed_pages;
+ managed_pages += zone_managed_pages(lower_zone);
}
}
}
@@ -7315,14 +7432,14 @@ static void __setup_per_zone_wmarks(void)
/* Calculate total number of !ZONE_HIGHMEM pages */
for_each_zone(zone) {
if (!is_highmem(zone))
- lowmem_pages += zone->managed_pages;
+ lowmem_pages += zone_managed_pages(zone);
}
for_each_zone(zone) {
u64 tmp;
spin_lock_irqsave(&zone->lock, flags);
- tmp = (u64)pages_min * zone->managed_pages;
+ tmp = (u64)pages_min * zone_managed_pages(zone);
do_div(tmp, lowmem_pages);
if (is_highmem(zone)) {
/*
@@ -7336,7 +7453,7 @@ static void __setup_per_zone_wmarks(void)
*/
unsigned long min_pages;
- min_pages = zone->managed_pages / 1024;
+ min_pages = zone_managed_pages(zone) / 1024;
min_pages = clamp(min_pages, SWAP_CLUSTER_MAX, 128UL);
zone->watermark[WMARK_MIN] = min_pages;
} else {
@@ -7353,7 +7470,7 @@ static void __setup_per_zone_wmarks(void)
* ensure a minimum size on small systems.
*/
tmp = max_t(u64, tmp >> 2,
- mult_frac(zone->managed_pages,
+ mult_frac(zone_managed_pages(zone),
watermark_scale_factor, 10000));
zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + tmp;
@@ -7483,8 +7600,8 @@ static void setup_min_unmapped_ratio(void)
pgdat->min_unmapped_pages = 0;
for_each_zone(zone)
- zone->zone_pgdat->min_unmapped_pages += (zone->managed_pages *
- sysctl_min_unmapped_ratio) / 100;
+ zone->zone_pgdat->min_unmapped_pages += (zone_managed_pages(zone) *
+ sysctl_min_unmapped_ratio) / 100;
}
@@ -7511,8 +7628,8 @@ static void setup_min_slab_ratio(void)
pgdat->min_slab_pages = 0;
for_each_zone(zone)
- zone->zone_pgdat->min_slab_pages += (zone->managed_pages *
- sysctl_min_slab_ratio) / 100;
+ zone->zone_pgdat->min_slab_pages += (zone_managed_pages(zone) *
+ sysctl_min_slab_ratio) / 100;
}
int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write,
@@ -7851,6 +7968,7 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
return false;
unmovable:
WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE);
+ dump_page(pfn_to_page(pfn+iter), "has_unmovable_pages");
return true;
}
diff --git a/mm/page_owner.c b/mm/page_owner.c
index 87bc0dfdb52b..3bba0ddd3ee4 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -274,7 +274,7 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m,
*/
for (; pfn < end_pfn; ) {
if (!pfn_valid(pfn)) {
- pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
+ pfn = ALIGN(pfn + 1, pageblock_nr_pages);
continue;
}
@@ -351,6 +351,7 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn,
.skip = 0
};
+ count = min_t(size_t, count, PAGE_SIZE);
kbuf = kmalloc(count, GFP_KERNEL);
if (!kbuf)
return -ENOMEM;
@@ -541,7 +542,7 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone)
unsigned long block_end_pfn;
if (!pfn_valid(pfn)) {
- pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
+ pfn = ALIGN(pfn + 1, pageblock_nr_pages);
continue;
}
diff --git a/mm/readahead.c b/mm/readahead.c
index f3d6f9656a3c..1ae16522412a 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -270,17 +270,15 @@ static unsigned long get_init_ra_size(unsigned long size, unsigned long max)
* return it as the new window size.
*/
static unsigned long get_next_ra_size(struct file_ra_state *ra,
- unsigned long max)
+ unsigned long max)
{
unsigned long cur = ra->size;
- unsigned long newsize;
if (cur < max / 16)
- newsize = 4 * cur;
- else
- newsize = 2 * cur;
-
- return min(newsize, max);
+ return 4 * cur;
+ if (cur <= max / 2)
+ return 2 * cur;
+ return max;
}
/*
diff --git a/mm/shmem.c b/mm/shmem.c
index 8400af95a58f..32eb29bd72c6 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -109,12 +109,14 @@ struct shmem_falloc {
#ifdef CONFIG_TMPFS
static unsigned long shmem_default_max_blocks(void)
{
- return totalram_pages / 2;
+ return totalram_pages() / 2;
}
static unsigned long shmem_default_max_inodes(void)
{
- return min(totalram_pages - totalhigh_pages, totalram_pages / 2);
+ unsigned long nr_pages = totalram_pages();
+
+ return min(nr_pages - totalhigh_pages(), nr_pages / 2);
}
#endif
@@ -3272,7 +3274,7 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
size = memparse(value,&rest);
if (*rest == '%') {
size <<= PAGE_SHIFT;
- size *= totalram_pages;
+ size *= totalram_pages();
do_div(size, 100);
rest++;
}
diff --git a/mm/slab.c b/mm/slab.c
index 3abb9feb3818..13ea08fdbfd2 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1248,7 +1248,7 @@ void __init kmem_cache_init(void)
* page orders on machines with more than 32MB of memory if
* not overridden on the command line.
*/
- if (!slab_max_order_set && totalram_pages > (32 << 20) >> PAGE_SHIFT)
+ if (!slab_max_order_set && totalram_pages() > (32 << 20) >> PAGE_SHIFT)
slab_max_order = SLAB_MAX_ORDER_HI;
/* Bootstrap is tricky, because several objects are allocated
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 9c11e8a937d2..7d646e17cc2e 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -1029,10 +1029,8 @@ struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
index = size_index[size_index_elem(size)];
} else {
- if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
- WARN_ON(1);
+ if (WARN_ON_ONCE(size > KMALLOC_MAX_CACHE_SIZE))
return NULL;
- }
index = fls(size - 1);
}
diff --git a/mm/slub.c b/mm/slub.c
index e3629cd7aff1..255a07baafcc 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2127,26 +2127,15 @@ redo:
}
if (l != m) {
-
if (l == M_PARTIAL)
-
remove_partial(n, page);
-
else if (l == M_FULL)
-
remove_full(s, n, page);
- if (m == M_PARTIAL) {
-
+ if (m == M_PARTIAL)
add_partial(n, page, tail);
- stat(s, tail);
-
- } else if (m == M_FULL) {
-
- stat(s, DEACTIVATE_FULL);
+ else if (m == M_FULL)
add_full(s, n, page);
-
- }
}
l = m;
@@ -2159,7 +2148,11 @@ redo:
if (lock)
spin_unlock(&n->list_lock);
- if (m == M_FREE) {
+ if (m == M_PARTIAL)
+ stat(s, tail);
+ else if (m == M_FULL)
+ stat(s, DEACTIVATE_FULL);
+ else if (m == M_FREE) {
stat(s, DEACTIVATE_EMPTY);
discard_slab(s, page);
stat(s, FREE_SLAB);
@@ -2313,12 +2306,10 @@ static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
{
struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
- if (likely(c)) {
- if (c->page)
- flush_slab(s, c);
+ if (c->page)
+ flush_slab(s, c);
- unfreeze_partials(s, c);
- }
+ unfreeze_partials(s, c);
}
static void flush_cpu_slab(void *d)
@@ -2367,7 +2358,7 @@ static int slub_cpu_dead(unsigned int cpu)
static inline int node_match(struct page *page, int node)
{
#ifdef CONFIG_NUMA
- if (!page || (node != NUMA_NO_NODE && page_to_nid(page) != node))
+ if (node != NUMA_NO_NODE && page_to_nid(page) != node)
return 0;
#endif
return 1;
diff --git a/mm/sparse.c b/mm/sparse.c
index 33307fc05c4d..b6ed029a2f46 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -724,6 +724,15 @@ static void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
if (!memmap)
return;
+ /*
+ * A further optimization is to have per section refcounted
+ * num_poisoned_pages. But that would need more space per memmap, so
+ * for now just do a quick global check to speed up this routine in the
+ * absence of bad pages.
+ */
+ if (atomic_long_read(&num_poisoned_pages) == 0)
+ return;
+
for (i = 0; i < nr_pages; i++) {
if (PageHWPoison(&memmap[i])) {
atomic_long_sub(1, &num_poisoned_pages);
diff --git a/mm/swap.c b/mm/swap.c
index 5d786019eab9..4d8a1f1afaab 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -1022,7 +1022,7 @@ EXPORT_SYMBOL(pagevec_lookup_range_nr_tag);
*/
void __init swap_setup(void)
{
- unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT);
+ unsigned long megs = totalram_pages() >> (20 - PAGE_SHIFT);
/* Use a smaller cluster for small-memory machines */
if (megs < 16)
diff --git a/mm/swap_state.c b/mm/swap_state.c
index fd2f21e1c60a..5a1cc9387151 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -310,8 +310,13 @@ struct page *lookup_swap_cache(swp_entry_t entry, struct vm_area_struct *vma,
unsigned long addr)
{
struct page *page;
+ struct swap_info_struct *si;
+ si = get_swap_device(entry);
+ if (!si)
+ return NULL;
page = find_get_page(swap_address_space(entry), swp_offset(entry));
+ put_swap_device(si);
INC_CACHE_INFO(find_total);
if (page) {
@@ -354,8 +359,8 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
struct vm_area_struct *vma, unsigned long addr,
bool *new_page_allocated)
{
- struct page *found_page, *new_page = NULL;
- struct address_space *swapper_space = swap_address_space(entry);
+ struct page *found_page = NULL, *new_page = NULL;
+ struct swap_info_struct *si;
int err;
*new_page_allocated = false;
@@ -365,7 +370,12 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
* called after lookup_swap_cache() failed, re-calling
* that would confuse statistics.
*/
- found_page = find_get_page(swapper_space, swp_offset(entry));
+ si = get_swap_device(entry);
+ if (!si)
+ break;
+ found_page = find_get_page(swap_address_space(entry),
+ swp_offset(entry));
+ put_swap_device(si);
if (found_page)
break;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 8688ae65ef58..ec210be02c3b 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -38,6 +38,7 @@
#include <linux/export.h>
#include <linux/swap_slots.h>
#include <linux/sort.h>
+#include <linux/stop_machine.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
@@ -1174,6 +1175,64 @@ static unsigned char __swap_entry_free_locked(struct swap_info_struct *p,
return usage;
}
+/*
+ * Check whether swap entry is valid in the swap device. If so,
+ * return pointer to swap_info_struct, and keep the swap entry valid
+ * via preventing the swap device from being swapoff, until
+ * put_swap_device() is called. Otherwise return NULL.
+ *
+ * Notice that swapoff or swapoff+swapon can still happen before the
+ * preempt_disable() in get_swap_device() or after the
+ * preempt_enable() in put_swap_device() if there isn't any other way
+ * to prevent swapoff, such as page lock, page table lock, etc. The
+ * caller must be prepared for that. For example, the following
+ * situation is possible.
+ *
+ * CPU1 CPU2
+ * do_swap_page()
+ * ... swapoff+swapon
+ * __read_swap_cache_async()
+ * swapcache_prepare()
+ * __swap_duplicate()
+ * // check swap_map
+ * // verify PTE not changed
+ *
+ * In __swap_duplicate(), the swap_map need to be checked before
+ * changing partly because the specified swap entry may be for another
+ * swap device which has been swapoff. And in do_swap_page(), after
+ * the page is read from the swap device, the PTE is verified not
+ * changed with the page table locked to check whether the swap device
+ * has been swapoff or swapoff+swapon.
+ */
+struct swap_info_struct *get_swap_device(swp_entry_t entry)
+{
+ struct swap_info_struct *si;
+ unsigned long type, offset;
+
+ if (!entry.val)
+ goto out;
+ type = swp_type(entry);
+ if (type >= nr_swapfiles)
+ goto bad_nofile;
+ si = swap_info[type];
+
+ preempt_disable();
+ if (!(si->flags & SWP_VALID))
+ goto unlock_out;
+ offset = swp_offset(entry);
+ if (offset >= si->max)
+ goto unlock_out;
+
+ return si;
+bad_nofile:
+ pr_err("%s: %s%08lx\n", __func__, Bad_file, entry.val);
+out:
+ return NULL;
+unlock_out:
+ preempt_enable();
+ return NULL;
+}
+
static unsigned char __swap_entry_free(struct swap_info_struct *p,
swp_entry_t entry, unsigned char usage)
{
@@ -1345,11 +1404,18 @@ int page_swapcount(struct page *page)
return count;
}
-int __swap_count(struct swap_info_struct *si, swp_entry_t entry)
+int __swap_count(swp_entry_t entry)
{
+ struct swap_info_struct *si;
pgoff_t offset = swp_offset(entry);
+ int count = 0;
- return swap_count(si->swap_map[offset]);
+ si = get_swap_device(entry);
+ if (si) {
+ count = swap_count(si->swap_map[offset]);
+ put_swap_device(si);
+ }
+ return count;
}
static int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry)
@@ -1374,9 +1440,11 @@ int __swp_swapcount(swp_entry_t entry)
int count = 0;
struct swap_info_struct *si;
- si = __swap_info_get(entry);
- if (si)
+ si = get_swap_device(entry);
+ if (si) {
count = swap_swapcount(si, entry);
+ put_swap_device(si);
+ }
return count;
}
@@ -1772,8 +1840,6 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
inc_mm_counter(vma->vm_mm, MM_ANONPAGES);
get_page(page);
- set_pte_at(vma->vm_mm, addr, pte,
- pte_mkold(mk_pte(page, vma->vm_page_prot)));
if (page == swapcache) {
page_add_anon_rmap(page, vma, addr, false);
mem_cgroup_commit_charge(page, memcg, true, false);
@@ -1782,6 +1848,8 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
mem_cgroup_commit_charge(page, memcg, false, false);
lru_cache_add_active_or_unevictable(page, vma);
}
+ set_pte_at(vma->vm_mm, addr, pte,
+ pte_mkold(mk_pte(page, vma->vm_page_prot)));
swap_free(entry);
/*
* Move the page to the active list so it is not
@@ -2427,9 +2495,9 @@ static int swap_node(struct swap_info_struct *p)
return bdev ? bdev->bd_disk->node_id : NUMA_NO_NODE;
}
-static void _enable_swap_info(struct swap_info_struct *p, int prio,
- unsigned char *swap_map,
- struct swap_cluster_info *cluster_info)
+static void setup_swap_info(struct swap_info_struct *p, int prio,
+ unsigned char *swap_map,
+ struct swap_cluster_info *cluster_info)
{
int i;
@@ -2454,7 +2522,11 @@ static void _enable_swap_info(struct swap_info_struct *p, int prio,
}
p->swap_map = swap_map;
p->cluster_info = cluster_info;
- p->flags |= SWP_WRITEOK;
+}
+
+static void _enable_swap_info(struct swap_info_struct *p)
+{
+ p->flags |= SWP_WRITEOK | SWP_VALID;
atomic_long_add(p->pages, &nr_swap_pages);
total_swap_pages += p->pages;
@@ -2473,6 +2545,11 @@ static void _enable_swap_info(struct swap_info_struct *p, int prio,
add_to_avail_list(p);
}
+static int swap_onoff_stop(void *arg)
+{
+ return 0;
+}
+
static void enable_swap_info(struct swap_info_struct *p, int prio,
unsigned char *swap_map,
struct swap_cluster_info *cluster_info,
@@ -2481,7 +2558,17 @@ static void enable_swap_info(struct swap_info_struct *p, int prio,
frontswap_init(p->type, frontswap_map);
spin_lock(&swap_lock);
spin_lock(&p->lock);
- _enable_swap_info(p, prio, swap_map, cluster_info);
+ setup_swap_info(p, prio, swap_map, cluster_info);
+ spin_unlock(&p->lock);
+ spin_unlock(&swap_lock);
+ /*
+ * Guarantee swap_map, cluster_info, etc. fields are used
+ * between get/put_swap_device() only if SWP_VALID bit is set
+ */
+ stop_machine(swap_onoff_stop, NULL, cpu_online_mask);
+ spin_lock(&swap_lock);
+ spin_lock(&p->lock);
+ _enable_swap_info(p);
spin_unlock(&p->lock);
spin_unlock(&swap_lock);
}
@@ -2490,7 +2577,8 @@ static void reinsert_swap_info(struct swap_info_struct *p)
{
spin_lock(&swap_lock);
spin_lock(&p->lock);
- _enable_swap_info(p, p->prio, p->swap_map, p->cluster_info);
+ setup_swap_info(p, p->prio, p->swap_map, p->cluster_info);
+ _enable_swap_info(p);
spin_unlock(&p->lock);
spin_unlock(&swap_lock);
}
@@ -2593,6 +2681,17 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
reenable_swap_slots_cache_unlock();
+ spin_lock(&swap_lock);
+ spin_lock(&p->lock);
+ p->flags &= ~SWP_VALID; /* mark swap device as invalid */
+ spin_unlock(&p->lock);
+ spin_unlock(&swap_lock);
+ /*
+ * wait for swap operations protected by get/put_swap_device()
+ * to complete
+ */
+ stop_machine(swap_onoff_stop, NULL, cpu_online_mask);
+
flush_work(&p->discard_work);
destroy_swap_extents(p);
@@ -2812,8 +2911,9 @@ static struct swap_info_struct *alloc_swap_info(void)
struct swap_info_struct *p;
unsigned int type;
int i;
+ int size = sizeof(*p) + nr_node_ids * sizeof(struct plist_node);
- p = kvzalloc(sizeof(*p), GFP_KERNEL);
+ p = kvzalloc(size, GFP_KERNEL);
if (!p)
return ERR_PTR(-ENOMEM);
@@ -3356,22 +3456,16 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
{
struct swap_info_struct *p;
struct swap_cluster_info *ci;
- unsigned long offset, type;
+ unsigned long offset;
unsigned char count;
unsigned char has_cache;
int err = -EINVAL;
- if (non_swap_entry(entry))
+ p = get_swap_device(entry);
+ if (!p)
goto out;
- type = swp_type(entry);
- if (type >= nr_swapfiles)
- goto bad_file;
- p = swap_info[type];
offset = swp_offset(entry);
- if (unlikely(offset >= p->max))
- goto out;
-
ci = lock_cluster_or_swap_info(p, offset);
count = p->swap_map[offset];
@@ -3417,11 +3511,9 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
unlock_out:
unlock_cluster_or_swap_info(p, ci);
out:
+ if (p)
+ put_swap_device(p);
return err;
-
-bad_file:
- pr_err("swap_dup: %s%08lx\n", Bad_file, entry.val);
- goto out;
}
/*
@@ -3513,6 +3605,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
struct page *list_page;
pgoff_t offset;
unsigned char count;
+ int ret = 0;
/*
* When debugging, it's easier to use __GFP_ZERO here; but it's better
@@ -3520,15 +3613,15 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
*/
page = alloc_page(gfp_mask | __GFP_HIGHMEM);
- si = swap_info_get(entry);
+ si = get_swap_device(entry);
if (!si) {
/*
* An acceptable race has occurred since the failing
- * __swap_duplicate(): the swap entry has been freed,
- * perhaps even the whole swap_map cleared for swapoff.
+ * __swap_duplicate(): the swap device may be swapoff
*/
goto outer;
}
+ spin_lock(&si->lock);
offset = swp_offset(entry);
@@ -3546,9 +3639,8 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
}
if (!page) {
- unlock_cluster(ci);
- spin_unlock(&si->lock);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out;
}
/*
@@ -3600,10 +3692,11 @@ out_unlock_cont:
out:
unlock_cluster(ci);
spin_unlock(&si->lock);
+ put_swap_device(si);
outer:
if (page)
__free_page(page);
- return 0;
+ return ret;
}
/*
diff --git a/mm/truncate.c b/mm/truncate.c
index 45d68e90b703..798e7ccfb030 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -517,9 +517,13 @@ void truncate_inode_pages_final(struct address_space *mapping)
*/
xa_lock_irq(&mapping->i_pages);
xa_unlock_irq(&mapping->i_pages);
-
- truncate_inode_pages(mapping, 0);
}
+
+ /*
+ * Cleancache needs notification even if there are no pages or shadow
+ * entries.
+ */
+ truncate_inode_pages(mapping, 0);
}
EXPORT_SYMBOL(truncate_inode_pages_final);
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 5029f241908f..f05c8bc38ca5 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -513,7 +513,7 @@ retry:
break;
}
if (unlikely(pmd_none(dst_pmdval)) &&
- unlikely(__pte_alloc(dst_mm, dst_pmd, dst_addr))) {
+ unlikely(__pte_alloc(dst_mm, dst_pmd))) {
err = -ENOMEM;
break;
}
diff --git a/mm/util.c b/mm/util.c
index 8bf08b5b5760..4df23d64aac7 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -593,7 +593,7 @@ unsigned long vm_commit_limit(void)
if (sysctl_overcommit_kbytes)
allowed = sysctl_overcommit_kbytes >> (PAGE_SHIFT - 10);
else
- allowed = ((totalram_pages - hugetlb_total_pages())
+ allowed = ((totalram_pages() - hugetlb_total_pages())
* sysctl_overcommit_ratio / 100);
allowed += total_swap_pages;
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 97d4b25d0373..871e41c55e23 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1634,7 +1634,7 @@ void *vmap(struct page **pages, unsigned int count,
might_sleep();
- if (count > totalram_pages)
+ if (count > totalram_pages())
return NULL;
size = (unsigned long)count << PAGE_SHIFT;
@@ -1739,7 +1739,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
unsigned long real_size = size;
size = PAGE_ALIGN(size);
- if (!size || (size >> PAGE_SHIFT) > totalram_pages)
+ if (!size || (size >> PAGE_SHIFT) > totalram_pages())
goto fail;
area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNINITIALIZED |
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 24ab1f7394ab..edc024e3aea7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1261,8 +1261,17 @@ static unsigned long shrink_page_list(struct list_head *page_list,
}
}
- if (!force_reclaim)
- references = page_check_references(page, sc);
+ if (!force_reclaim) {
+ /*
+ * Don't try to reclaim KSM page in direct reclaim if
+ * the priority is not high enough.
+ */
+ if (PageKsm(page) && !current_is_kswapd() &&
+ sc->priority > (DEF_PRIORITY - 2))
+ references = PAGEREF_KEEP;
+ else
+ references = page_check_references(page, sc);
+ }
switch (references) {
case PAGEREF_ACTIVATE:
@@ -2137,6 +2146,16 @@ static void shrink_active_list(unsigned long nr_to_scan,
}
}
+ /*
+ * Skip KSM page in direct reclaim if priority is not
+ * high enough.
+ */
+ if (PageKsm(page) && !current_is_kswapd() &&
+ sc->priority > (DEF_PRIORITY - 2)) {
+ putback_lru_page(page);
+ continue;
+ }
+
if (page_referenced(page, 0, sc->target_mem_cgroup,
&vm_flags)) {
nr_rotated += hpage_nr_pages(page);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 9c624595e904..83b30edc2f7f 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -227,7 +227,7 @@ int calculate_normal_threshold(struct zone *zone)
* 125 1024 10 16-32 GB 9
*/
- mem = zone->managed_pages >> (27 - PAGE_SHIFT);
+ mem = zone_managed_pages(zone) >> (27 - PAGE_SHIFT);
threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem));
@@ -1569,7 +1569,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
high_wmark_pages(zone),
zone->spanned_pages,
zone->present_pages,
- zone->managed_pages);
+ zone_managed_pages(zone));
seq_printf(m,
"\n protection: (%ld",
diff --git a/mm/workingset.c b/mm/workingset.c
index d46f8c92aa2f..dcb994f2acc2 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -549,7 +549,7 @@ static int __init workingset_init(void)
* double the initial memory by using totalram_pages as-is.
*/
timestamp_bits = BITS_PER_LONG - EVICTION_SHIFT;
- max_order = fls_long(totalram_pages - 1);
+ max_order = fls_long(totalram_pages() - 1);
if (max_order > timestamp_bits)
bucket_order = max_order - timestamp_bits;
pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n",
diff --git a/mm/zswap.c b/mm/zswap.c
index cd91fd9d96b8..a4e4d36ec085 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -219,8 +219,8 @@ static const struct zpool_ops zswap_zpool_ops = {
static bool zswap_is_full(void)
{
- return totalram_pages * zswap_max_pool_percent / 100 <
- DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
+ return totalram_pages() * zswap_max_pool_percent / 100 <
+ DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
}
static void zswap_update_total_size(void)
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 658cd32bb7b3..26e51d5d91fb 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1131,6 +1131,7 @@ EXPORT_SYMBOL_GPL(dccp_debug);
static int __init dccp_init(void)
{
unsigned long goal;
+ unsigned long nr_pages = totalram_pages();
int ehash_order, bhash_order, i;
int rc;
@@ -1154,10 +1155,10 @@ static int __init dccp_init(void)
*
* The methodology is similar to that of the buffer cache.
*/
- if (totalram_pages >= (128 * 1024))
- goal = totalram_pages >> (21 - PAGE_SHIFT);
+ if (nr_pages >= (128 * 1024))
+ goal = nr_pages >> (21 - PAGE_SHIFT);
else
- goal = totalram_pages >> (23 - PAGE_SHIFT);
+ goal = nr_pages >> (23 - PAGE_SHIFT);
if (thash_entries)
goal = (thash_entries *
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 1c002c0fb712..950613ee7881 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1866,7 +1866,7 @@ void __init dn_route_init(void)
dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ;
add_timer(&dn_route_timer);
- goal = totalram_pages >> (26 - PAGE_SHIFT);
+ goal = totalram_pages() >> (26 - PAGE_SHIFT);
for(order = 0; (1UL << order) < goal; order++)
/* NOTHING */;
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 03b51cdcc731..b467a7cabf40 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -1000,7 +1000,7 @@ static int __net_init tcp_net_metrics_init(struct net *net)
slots = tcpmhash_entries;
if (!slots) {
- if (totalram_pages >= 128 * 1024)
+ if (totalram_pages() >= 128 * 1024)
slots = 16 * 1024;
else
slots = 8 * 1024;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index e92e749aff53..2764218e987c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -2251,6 +2251,7 @@ static __always_inline unsigned int total_extension_size(void)
int nf_conntrack_init_start(void)
{
+ unsigned long nr_pages = totalram_pages();
int max_factor = 8;
int ret = -ENOMEM;
int i;
@@ -2270,11 +2271,11 @@ int nf_conntrack_init_start(void)
* >= 4GB machines have 65536 buckets.
*/
nf_conntrack_htable_size
- = (((totalram_pages << PAGE_SHIFT) / 16384)
+ = (((nr_pages << PAGE_SHIFT) / 16384)
/ sizeof(struct hlist_head));
- if (totalram_pages > (4 * (1024 * 1024 * 1024 / PAGE_SIZE)))
+ if (nr_pages > (4 * (1024 * 1024 * 1024 / PAGE_SIZE)))
nf_conntrack_htable_size = 65536;
- else if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE))
+ else if (nr_pages > (1024 * 1024 * 1024 / PAGE_SIZE))
nf_conntrack_htable_size = 16384;
if (nf_conntrack_htable_size < 32)
nf_conntrack_htable_size = 32;
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 28e27a32d9b9..8d86e39d6280 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -274,14 +274,15 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg,
struct xt_hashlimit_htable *hinfo;
const struct seq_operations *ops;
unsigned int size, i;
+ unsigned long nr_pages = totalram_pages();
int ret;
if (cfg->size) {
size = cfg->size;
} else {
- size = (totalram_pages << PAGE_SHIFT) / 16384 /
+ size = (nr_pages << PAGE_SHIFT) / 16384 /
sizeof(struct hlist_head);
- if (totalram_pages > 1024 * 1024 * 1024 / PAGE_SIZE)
+ if (nr_pages > 1024 * 1024 * 1024 / PAGE_SIZE)
size = 8192;
if (size < 16)
size = 16;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 9b277bd36d1a..d5878ae55840 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1368,6 +1368,7 @@ static __init int sctp_init(void)
int status = -EINVAL;
unsigned long goal;
unsigned long limit;
+ unsigned long nr_pages = totalram_pages();
int max_share;
int order;
int num_entries;
@@ -1426,10 +1427,10 @@ static __init int sctp_init(void)
* The methodology is similar to that of the tcp hash tables.
* Though not identical. Start by getting a goal size
*/
- if (totalram_pages >= (128 * 1024))
- goal = totalram_pages >> (22 - PAGE_SHIFT);
+ if (nr_pages >= (128 * 1024))
+ goal = nr_pages >> (22 - PAGE_SHIFT);
else
- goal = totalram_pages >> (24 - PAGE_SHIFT);
+ goal = nr_pages >> (24 - PAGE_SHIFT);
/* Then compute the page order for said goal */
order = get_order(goal);
diff --git a/scripts/bloat-o-meter b/scripts/bloat-o-meter
index a923f05edb36..8c965f6a9881 100755
--- a/scripts/bloat-o-meter
+++ b/scripts/bloat-o-meter
@@ -32,6 +32,7 @@ def getsizes(file, format):
if name.startswith("__mod_"): continue
if name.startswith("__se_sys"): continue
if name.startswith("__se_compat_sys"): continue
+ if name.startswith("__addressable_"): continue
if name == "linux_banner": continue
# statics and some other optimizations adds random .NUMBER
name = re_NUMBER.sub('', name)
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 377f373db6c0..93e84c9504a1 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -3890,14 +3890,23 @@ sub process {
WARN("STATIC_CONST_CHAR_ARRAY",
"static const char * array should probably be static const char * const\n" .
$herecurr);
- }
+ }
+
+# check for initialized const char arrays that should be static const
+ if ($line =~ /^\+\s*const\s+(char|unsigned\s+char|_*u8|(?:[us]_)?int8_t)\s+\w+\s*\[\s*(?:\w+\s*)?\]\s*=\s*"/) {
+ if (WARN("STATIC_CONST_CHAR_ARRAY",
+ "const array should probably be static const\n" . $herecurr) &&
+ $fix) {
+ $fixed[$fixlinenr] =~ s/(^.\s*)const\b/${1}static const/;
+ }
+ }
# check for static char foo[] = "bar" declarations.
if ($line =~ /\bstatic\s+char\s+(\w+)\s*\[\s*\]\s*=\s*"/) {
WARN("STATIC_CONST_CHAR_ARRAY",
"static char array declaration should probably be static const char\n" .
$herecurr);
- }
+ }
# check for const <foo> const where <foo> is not a pointer or array type
if ($sline =~ /\bconst\s+($BasicType)\s+const\b/) {
diff --git a/security/integrity/ima/ima_kexec.c b/security/integrity/ima/ima_kexec.c
index 16bd18747cfa..d6f32807b347 100644
--- a/security/integrity/ima/ima_kexec.c
+++ b/security/integrity/ima/ima_kexec.c
@@ -106,7 +106,7 @@ void ima_add_kexec_buffer(struct kimage *image)
kexec_segment_size = ALIGN(ima_get_binary_runtime_size() +
PAGE_SIZE / 2, PAGE_SIZE);
if ((kexec_segment_size == ULONG_MAX) ||
- ((kexec_segment_size >> PAGE_SHIFT) > totalram_pages / 2)) {
+ ((kexec_segment_size >> PAGE_SHIFT) > totalram_pages() / 2)) {
pr_err("Binary measurement list too large.\n");
return;
}
diff --git a/tools/include/asm-generic/bitops/fls.h b/tools/include/asm-generic/bitops/fls.h
index 753aecaab641..b168bb10e1be 100644
--- a/tools/include/asm-generic/bitops/fls.h
+++ b/tools/include/asm-generic/bitops/fls.h
@@ -10,7 +10,7 @@
* Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
*/
-static __always_inline int fls(int x)
+static __always_inline int fls(unsigned int x)
{
int r = 32;
diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
index 10baa1652fc2..32b207ca7372 100644
--- a/tools/testing/selftests/memfd/memfd_test.c
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -693,6 +693,79 @@ static void test_seal_write(void)
}
/*
+ * Test SEAL_FUTURE_WRITE
+ * Test whether SEAL_FUTURE_WRITE actually prevents modifications.
+ */
+static void test_seal_future_write(void)
+{
+ int fd;
+ void *p;
+
+ printf("%s SEAL-FUTURE-WRITE\n", memfd_str);
+
+ fd = mfd_assert_new("kern_memfd_seal_future_write",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+
+ p = mfd_assert_mmap_shared(fd);
+
+ mfd_assert_has_seals(fd, 0);
+ /* Not adding grow/shrink seals makes the future write
+ * seal fail to get added
+ */
+ mfd_fail_add_seals(fd, F_SEAL_FUTURE_WRITE);
+
+ mfd_assert_add_seals(fd, F_SEAL_GROW);
+ mfd_assert_has_seals(fd, F_SEAL_GROW);
+
+ /* Should still fail since shrink seal has
+ * not yet been added
+ */
+ mfd_fail_add_seals(fd, F_SEAL_FUTURE_WRITE);
+
+ mfd_assert_add_seals(fd, F_SEAL_SHRINK);
+ mfd_assert_has_seals(fd, F_SEAL_GROW |
+ F_SEAL_SHRINK);
+
+ /* Now should succeed, also verifies that the seal
+ * could be added with an existing writable mmap
+ */
+ mfd_assert_add_seals(fd, F_SEAL_FUTURE_WRITE);
+ mfd_assert_has_seals(fd, F_SEAL_SHRINK |
+ F_SEAL_GROW |
+ F_SEAL_FUTURE_WRITE);
+
+ /* read should pass, writes should fail */
+ mfd_assert_read(fd);
+ mfd_fail_write(fd);
+
+ munmap(p, mfd_def_size);
+ close(fd);
+
+ /* Test adding all seals (grow, shrink, future write) at once */
+ fd = mfd_assert_new("kern_memfd_seal_future_write2",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+
+ p = mfd_assert_mmap_shared(fd);
+
+ mfd_assert_has_seals(fd, 0);
+ mfd_assert_add_seals(fd, F_SEAL_SHRINK |
+ F_SEAL_GROW |
+ F_SEAL_FUTURE_WRITE);
+ mfd_assert_has_seals(fd, F_SEAL_SHRINK |
+ F_SEAL_GROW |
+ F_SEAL_FUTURE_WRITE);
+
+ /* read should pass, writes should fail */
+ mfd_assert_read(fd);
+ mfd_fail_write(fd);
+
+ munmap(p, mfd_def_size);
+ close(fd);
+}
+
+/*
* Test SEAL_SHRINK
* Test whether SEAL_SHRINK actually prevents shrinking
*/
@@ -945,6 +1018,7 @@ int main(int argc, char **argv)
test_basic();
test_seal_write();
+ test_seal_future_write();
test_seal_shrink();
test_seal_grow();
test_seal_resize();
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 5eca48bdb1a6..0a36b1708b76 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -628,7 +628,7 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
BUG_ON(pmd_sect(*pmd));
if (pmd_none(*pmd)) {
- pte = pte_alloc_one_kernel(NULL, addr);
+ pte = pte_alloc_one_kernel(NULL);
if (!pte) {
kvm_err("Cannot allocate Hyp pte\n");
return -ENOMEM;