summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-06-02 12:21:36 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-06-02 12:21:36 -0700
commit94709049fb8442fb2f7b91fbec3c2897a75e18df (patch)
treef1d38ea6bc9db6d5a15ba4821c83abeb7ce7fd35 /arch
parent17839856fd588f4ab6b789f482ed3ffd7c403e1f (diff)
parent4fba37586e4e73f9f9a855e610e151ef7da2b481 (diff)
Merge branch 'akpm' (patches from Andrew)
Merge updates from Andrew Morton: "A few little subsystems and a start of a lot of MM patches. Subsystems affected by this patch series: squashfs, ocfs2, parisc, vfs. With mm subsystems: slab-generic, slub, debug, pagecache, gup, swap, memcg, pagemap, memory-failure, vmalloc, kasan" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (128 commits) kasan: move kasan_report() into report.c mm/mm_init.c: report kasan-tag information stored in page->flags ubsan: entirely disable alignment checks under UBSAN_TRAP kasan: fix clang compilation warning due to stack protector x86/mm: remove vmalloc faulting mm: remove vmalloc_sync_(un)mappings() x86/mm/32: implement arch_sync_kernel_mappings() x86/mm/64: implement arch_sync_kernel_mappings() mm/ioremap: track which page-table levels were modified mm/vmalloc: track which page-table levels were modified mm: add functions to track page directory modifications s390: use __vmalloc_node in stack_alloc powerpc: use __vmalloc_node in alloc_vm_stack arm64: use __vmalloc_node in arch_alloc_vmap_stack mm: remove vmalloc_user_node_flags mm: switch the test_vmalloc module to use __vmalloc_node mm: remove __vmalloc_node_flags_caller mm: remove both instances of __vmalloc_node_flags mm: remove the prot argument to __vmalloc_node mm: remove the pgprot argument to __vmalloc ...
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/configs/omap2plus_defconfig2
-rw-r--r--arch/arm64/include/asm/pgtable.h3
-rw-r--r--arch/arm64/include/asm/vmap_stack.h6
-rw-r--r--arch/arm64/mm/dump.c2
-rw-r--r--arch/parisc/include/asm/pgtable.h2
-rw-r--r--arch/powerpc/include/asm/io.h10
-rw-r--r--arch/powerpc/include/asm/pci-bridge.h2
-rw-r--r--arch/powerpc/kernel/irq.c5
-rw-r--r--arch/powerpc/kernel/isa-bridge.c28
-rw-r--r--arch/powerpc/kernel/pci_64.c54
-rw-r--r--arch/powerpc/mm/ioremap_64.c50
-rw-r--r--arch/riscv/include/asm/pgtable.h4
-rw-r--r--arch/riscv/mm/ptdump.c2
-rw-r--r--arch/s390/kernel/setup.c9
-rw-r--r--arch/sh/kernel/cpu/sh4/sq.c3
-rw-r--r--arch/x86/hyperv/hv_init.c5
-rw-r--r--arch/x86/include/asm/kvm_host.h3
-rw-r--r--arch/x86/include/asm/pgtable-2level_types.h2
-rw-r--r--arch/x86/include/asm/pgtable-3level_types.h2
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h2
-rw-r--r--arch/x86/include/asm/pgtable_types.h8
-rw-r--r--arch/x86/include/asm/switch_to.h23
-rw-r--r--arch/x86/kernel/irq_64.c2
-rw-r--r--arch/x86/kernel/setup_percpu.c6
-rw-r--r--arch/x86/kvm/svm/sev.c3
-rw-r--r--arch/x86/mm/dump_pagetables.c35
-rw-r--r--arch/x86/mm/fault.c176
-rw-r--r--arch/x86/mm/init_64.c5
-rw-r--r--arch/x86/mm/pti.c8
-rw-r--r--arch/x86/mm/tlb.c37
30 files changed, 130 insertions, 369 deletions
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index 8b83d4a5d309..fe383f5a92fb 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -81,7 +81,7 @@ CONFIG_PARTITION_ADVANCED=y
CONFIG_BINFMT_MISC=y
CONFIG_CMA=y
CONFIG_ZSMALLOC=m
-CONFIG_PGTABLE_MAPPING=y
+CONFIG_ZSMALLOC_PGTABLE_MAPPING=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index e50e4dda90c2..dae0466d19d6 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -407,6 +407,9 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
#define __pgprot_modify(prot,mask,bits) \
__pgprot((pgprot_val(prot) & ~(mask)) | (bits))
+#define pgprot_nx(prot) \
+ __pgprot_modify(prot, 0, PTE_PXN)
+
/*
* Mark the prot value as uncacheable and unbufferable.
*/
diff --git a/arch/arm64/include/asm/vmap_stack.h b/arch/arm64/include/asm/vmap_stack.h
index 0a12115d9638..0cc6636e3f15 100644
--- a/arch/arm64/include/asm/vmap_stack.h
+++ b/arch/arm64/include/asm/vmap_stack.h
@@ -19,10 +19,8 @@ static inline unsigned long *arch_alloc_vmap_stack(size_t stack_size, int node)
{
BUILD_BUG_ON(!IS_ENABLED(CONFIG_VMAP_STACK));
- return __vmalloc_node_range(stack_size, THREAD_ALIGN,
- VMALLOC_START, VMALLOC_END,
- THREADINFO_GFP, PAGE_KERNEL, 0, node,
- __builtin_return_address(0));
+ return __vmalloc_node(stack_size, THREAD_ALIGN, THREADINFO_GFP, node,
+ __builtin_return_address(0));
}
#endif /* __ASM_VMAP_STACK_H */
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index 78163b7a7dde..0da020c563e6 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -252,7 +252,7 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr)
}
static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
- unsigned long val)
+ u64 val)
{
struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
static const char units[] = "KMGTPE";
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 9832c73a7021..cd7df48dc874 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -93,10 +93,8 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
#define set_pte_at(mm, addr, ptep, pteval) \
do { \
- pte_t old_pte; \
unsigned long flags; \
spin_lock_irqsave(pgd_spinlock((mm)->pgd), flags);\
- old_pte = *ptep; \
set_pte(ptep, pteval); \
purge_tlb_entries(mm, addr); \
spin_unlock_irqrestore(pgd_spinlock((mm)->pgd), flags);\
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index 635969b5b58e..13f90dd03450 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -699,10 +699,6 @@ static inline void iosync(void)
*
* * iounmap undoes such a mapping and can be hooked
*
- * * __ioremap_at (and the pending __iounmap_at) are low level functions to
- * create hand-made mappings for use only by the PCI code and cannot
- * currently be hooked. Must be page aligned.
- *
* * __ioremap_caller is the same as above but takes an explicit caller
* reference rather than using __builtin_return_address(0)
*
@@ -719,6 +715,8 @@ void __iomem *ioremap_coherent(phys_addr_t address, unsigned long size);
extern void iounmap(volatile void __iomem *addr);
+void __iomem *ioremap_phb(phys_addr_t paddr, unsigned long size);
+
int early_ioremap_range(unsigned long ea, phys_addr_t pa,
unsigned long size, pgprot_t prot);
void __iomem *do_ioremap(phys_addr_t pa, phys_addr_t offset, unsigned long size,
@@ -727,10 +725,6 @@ void __iomem *do_ioremap(phys_addr_t pa, phys_addr_t offset, unsigned long size,
extern void __iomem *__ioremap_caller(phys_addr_t, unsigned long size,
pgprot_t prot, void *caller);
-extern void __iomem * __ioremap_at(phys_addr_t pa, void *ea,
- unsigned long size, pgprot_t prot);
-extern void __iounmap_at(void *ea, unsigned long size);
-
/*
* When CONFIG_PPC_INDIRECT_PIO is set, we use the generic iomap implementation
* which needs some additional definitions here. They basically allow PIO
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 69f4cb3b7c56..b92e81b256e5 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -66,7 +66,7 @@ struct pci_controller {
void __iomem *io_base_virt;
#ifdef CONFIG_PPC64
- void *io_base_alloc;
+ void __iomem *io_base_alloc;
#endif
resource_size_t io_base_phys;
resource_size_t pci_io_size;
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 1f1169856dc8..112d150354b2 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -748,9 +748,8 @@ void do_IRQ(struct pt_regs *regs)
static void *__init alloc_vm_stack(void)
{
- return __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN, VMALLOC_START,
- VMALLOC_END, THREADINFO_GFP, PAGE_KERNEL,
- 0, NUMA_NO_NODE, (void*)_RET_IP_);
+ return __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, THREADINFO_GFP,
+ NUMA_NO_NODE, (void *)_RET_IP_);
}
static void __init vmap_irqstack_init(void)
diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c
index 773671b512df..2257d24e6a26 100644
--- a/arch/powerpc/kernel/isa-bridge.c
+++ b/arch/powerpc/kernel/isa-bridge.c
@@ -18,6 +18,7 @@
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/notifier.h>
+#include <linux/vmalloc.h>
#include <asm/processor.h>
#include <asm/io.h>
@@ -38,6 +39,22 @@ EXPORT_SYMBOL_GPL(isa_bridge_pcidev);
#define ISA_SPACE_MASK 0x1
#define ISA_SPACE_IO 0x1
+static void remap_isa_base(phys_addr_t pa, unsigned long size)
+{
+ WARN_ON_ONCE(ISA_IO_BASE & ~PAGE_MASK);
+ WARN_ON_ONCE(pa & ~PAGE_MASK);
+ WARN_ON_ONCE(size & ~PAGE_MASK);
+
+ if (slab_is_available()) {
+ if (ioremap_page_range(ISA_IO_BASE, ISA_IO_BASE + size, pa,
+ pgprot_noncached(PAGE_KERNEL)))
+ unmap_kernel_range(ISA_IO_BASE, size);
+ } else {
+ early_ioremap_range(ISA_IO_BASE, pa, size,
+ pgprot_noncached(PAGE_KERNEL));
+ }
+}
+
static void pci_process_ISA_OF_ranges(struct device_node *isa_node,
unsigned long phb_io_base_phys)
{
@@ -105,15 +122,13 @@ static void pci_process_ISA_OF_ranges(struct device_node *isa_node,
if (size > 0x10000)
size = 0x10000;
- __ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE,
- size, pgprot_noncached(PAGE_KERNEL));
+ remap_isa_base(phb_io_base_phys, size);
return;
inval_range:
printk(KERN_ERR "no ISA IO ranges or unexpected isa range, "
"mapping 64k\n");
- __ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE,
- 0x10000, pgprot_noncached(PAGE_KERNEL));
+ remap_isa_base(phb_io_base_phys, 0x10000);
}
@@ -248,8 +263,7 @@ void __init isa_bridge_init_non_pci(struct device_node *np)
* and map it
*/
isa_io_base = ISA_IO_BASE;
- __ioremap_at(pbase, (void *)ISA_IO_BASE,
- size, pgprot_noncached(PAGE_KERNEL));
+ remap_isa_base(pbase, size);
pr_debug("ISA: Non-PCI bridge is %pOF\n", np);
}
@@ -297,7 +311,7 @@ static void isa_bridge_remove(void)
isa_bridge_pcidev = NULL;
/* Unmap the ISA area */
- __iounmap_at((void *)ISA_IO_BASE, 0x10000);
+ unmap_kernel_range(ISA_IO_BASE, 0x10000);
}
/**
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index f83d1f69b1dd..d9ac980c398c 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -109,23 +109,47 @@ int pcibios_unmap_io_space(struct pci_bus *bus)
/* Get the host bridge */
hose = pci_bus_to_host(bus);
- /* Check if we have IOs allocated */
- if (hose->io_base_alloc == NULL)
- return 0;
-
pr_debug("IO unmapping for PHB %pOF\n", hose->dn);
pr_debug(" alloc=0x%p\n", hose->io_base_alloc);
- /* This is a PHB, we fully unmap the IO area */
- vunmap(hose->io_base_alloc);
-
+ iounmap(hose->io_base_alloc);
return 0;
}
EXPORT_SYMBOL_GPL(pcibios_unmap_io_space);
-static int pcibios_map_phb_io_space(struct pci_controller *hose)
+void __iomem *ioremap_phb(phys_addr_t paddr, unsigned long size)
{
struct vm_struct *area;
+ unsigned long addr;
+
+ WARN_ON_ONCE(paddr & ~PAGE_MASK);
+ WARN_ON_ONCE(size & ~PAGE_MASK);
+
+ /*
+ * Let's allocate some IO space for that guy. We don't pass VM_IOREMAP
+ * because we don't care about alignment tricks that the core does in
+ * that case. Maybe we should due to stupid card with incomplete
+ * address decoding but I'd rather not deal with those outside of the
+ * reserved 64K legacy region.
+ */
+ area = __get_vm_area_caller(size, 0, PHB_IO_BASE, PHB_IO_END,
+ __builtin_return_address(0));
+ if (!area)
+ return NULL;
+
+ addr = (unsigned long)area->addr;
+ if (ioremap_page_range(addr, addr + size, paddr,
+ pgprot_noncached(PAGE_KERNEL))) {
+ unmap_kernel_range(addr, size);
+ return NULL;
+ }
+
+ return (void __iomem *)addr;
+}
+EXPORT_SYMBOL_GPL(ioremap_phb);
+
+static int pcibios_map_phb_io_space(struct pci_controller *hose)
+{
unsigned long phys_page;
unsigned long size_page;
unsigned long io_virt_offset;
@@ -146,12 +170,11 @@ static int pcibios_map_phb_io_space(struct pci_controller *hose)
* with incomplete address decoding but I'd rather not deal with
* those outside of the reserved 64K legacy region.
*/
- area = __get_vm_area(size_page, 0, PHB_IO_BASE, PHB_IO_END);
- if (area == NULL)
+ hose->io_base_alloc = ioremap_phb(phys_page, size_page);
+ if (!hose->io_base_alloc)
return -ENOMEM;
- hose->io_base_alloc = area->addr;
- hose->io_base_virt = (void __iomem *)(area->addr +
- hose->io_base_phys - phys_page);
+ hose->io_base_virt = hose->io_base_alloc +
+ hose->io_base_phys - phys_page;
pr_debug("IO mapping for PHB %pOF\n", hose->dn);
pr_debug(" phys=0x%016llx, virt=0x%p (alloc=0x%p)\n",
@@ -159,11 +182,6 @@ static int pcibios_map_phb_io_space(struct pci_controller *hose)
pr_debug(" size=0x%016llx (alloc=0x%016lx)\n",
hose->pci_io_size, size_page);
- /* Establish the mapping */
- if (__ioremap_at(phys_page, area->addr, size_page,
- pgprot_noncached(PAGE_KERNEL)) == NULL)
- return -ENOMEM;
-
/* Fixup hose IO resource */
io_virt_offset = pcibios_io_space_offset(hose);
hose->io_resource.start += io_virt_offset;
diff --git a/arch/powerpc/mm/ioremap_64.c b/arch/powerpc/mm/ioremap_64.c
index 50a99d9684f7..ba5cbb0d66bd 100644
--- a/arch/powerpc/mm/ioremap_64.c
+++ b/arch/powerpc/mm/ioremap_64.c
@@ -4,56 +4,6 @@
#include <linux/slab.h>
#include <linux/vmalloc.h>
-/**
- * Low level function to establish the page tables for an IO mapping
- */
-void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_t prot)
-{
- int ret;
- unsigned long va = (unsigned long)ea;
-
- /* We don't support the 4K PFN hack with ioremap */
- if (pgprot_val(prot) & H_PAGE_4K_PFN)
- return NULL;
-
- if ((ea + size) >= (void *)IOREMAP_END) {
- pr_warn("Outside the supported range\n");
- return NULL;
- }
-
- WARN_ON(pa & ~PAGE_MASK);
- WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
- WARN_ON(size & ~PAGE_MASK);
-
- if (slab_is_available()) {
- ret = ioremap_page_range(va, va + size, pa, prot);
- if (ret)
- unmap_kernel_range(va, size);
- } else {
- ret = early_ioremap_range(va, pa, size, prot);
- }
-
- if (ret)
- return NULL;
-
- return (void __iomem *)ea;
-}
-EXPORT_SYMBOL(__ioremap_at);
-
-/**
- * Low level function to tear down the page tables for an IO mapping. This is
- * used for mappings that are manipulated manually, like partial unmapping of
- * PCI IOs or ISA space.
- */
-void __iounmap_at(void *ea, unsigned long size)
-{
- WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
- WARN_ON(size & ~PAGE_MASK);
-
- unmap_kernel_range((unsigned long)ea, size);
-}
-EXPORT_SYMBOL(__iounmap_at);
-
void __iomem *__ioremap_caller(phys_addr_t addr, unsigned long size,
pgprot_t prot, void *caller)
{
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 35b60035b6b0..d50706ea1c94 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -473,9 +473,9 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
#define PAGE_SHARED __pgprot(0)
#define PAGE_KERNEL __pgprot(0)
#define swapper_pg_dir NULL
+#define TASK_SIZE 0xffffffffUL
#define VMALLOC_START 0
-
-#define TASK_SIZE 0xffffffffUL
+#define VMALLOC_END TASK_SIZE
static inline void __kernel_map_pages(struct page *page, int numpages, int enable) {}
diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c
index 7eab76a93106..070505d79b06 100644
--- a/arch/riscv/mm/ptdump.c
+++ b/arch/riscv/mm/ptdump.c
@@ -204,7 +204,7 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr)
}
static void note_page(struct ptdump_state *pt_st, unsigned long addr,
- int level, unsigned long val)
+ int level, u64 val)
{
struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
u64 pa = PFN_PHYS(pte_pfn(__pte(val)));
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 36445dd40fdb..0f0b140b5558 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -305,12 +305,9 @@ void *restart_stack __section(.data);
unsigned long stack_alloc(void)
{
#ifdef CONFIG_VMAP_STACK
- return (unsigned long)
- __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE,
- VMALLOC_START, VMALLOC_END,
- THREADINFO_GFP,
- PAGE_KERNEL, 0, NUMA_NO_NODE,
- __builtin_return_address(0));
+ return (unsigned long)__vmalloc_node(THREAD_SIZE, THREAD_SIZE,
+ THREADINFO_GFP, NUMA_NO_NODE,
+ __builtin_return_address(0));
#else
return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
#endif
diff --git a/arch/sh/kernel/cpu/sh4/sq.c b/arch/sh/kernel/cpu/sh4/sq.c
index 934ff84844fa..d432164b23b7 100644
--- a/arch/sh/kernel/cpu/sh4/sq.c
+++ b/arch/sh/kernel/cpu/sh4/sq.c
@@ -103,7 +103,8 @@ static int __sq_remap(struct sq_mapping *map, pgprot_t prot)
#if defined(CONFIG_MMU)
struct vm_struct *vma;
- vma = __get_vm_area(map->size, VM_ALLOC, map->sq_addr, SQ_ADDRMAX);
+ vma = __get_vm_area_caller(map->size, VM_ALLOC, map->sq_addr,
+ SQ_ADDRMAX, __builtin_return_address(0));
if (!vma)
return -ENOMEM;
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index acf76b466db6..e2137070386a 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -97,8 +97,7 @@ static int hv_cpu_init(unsigned int cpu)
* not be stopped in the case of CPU offlining and the VM will hang.
*/
if (!*hvp) {
- *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO,
- PAGE_KERNEL);
+ *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO);
}
if (*hvp) {
@@ -379,7 +378,7 @@ void __init hyperv_init(void)
guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0);
wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id);
- hv_hypercall_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_RX);
+ hv_hypercall_pg = vmalloc_exec(PAGE_SIZE);
if (hv_hypercall_pg == NULL) {
wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
goto remove_cpuhp_state;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0a6b35353fc7..e94b3de564d6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1279,8 +1279,7 @@ extern struct kmem_cache *x86_fpu_cache;
#define __KVM_HAVE_ARCH_VM_ALLOC
static inline struct kvm *kvm_arch_alloc_vm(void)
{
- return __vmalloc(kvm_x86_ops.vm_size,
- GFP_KERNEL_ACCOUNT | __GFP_ZERO, PAGE_KERNEL);
+ return __vmalloc(kvm_x86_ops.vm_size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
}
void kvm_arch_free_vm(struct kvm *kvm);
diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h
index 6deb6cd236e3..7f6ccff0ba72 100644
--- a/arch/x86/include/asm/pgtable-2level_types.h
+++ b/arch/x86/include/asm/pgtable-2level_types.h
@@ -20,6 +20,8 @@ typedef union {
#define SHARED_KERNEL_PMD 0
+#define ARCH_PAGE_TABLE_SYNC_MASK PGTBL_PMD_MODIFIED
+
/*
* traditional i386 two-level paging structure:
*/
diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h
index 33845d36897c..80fbb4a9ed87 100644
--- a/arch/x86/include/asm/pgtable-3level_types.h
+++ b/arch/x86/include/asm/pgtable-3level_types.h
@@ -27,6 +27,8 @@ typedef union {
#define SHARED_KERNEL_PMD (!static_cpu_has(X86_FEATURE_PTI))
#endif
+#define ARCH_PAGE_TABLE_SYNC_MASK (SHARED_KERNEL_PMD ? 0 : PGTBL_PMD_MODIFIED)
+
/*
* PGDIR_SHIFT determines what a top-level page table entry can map
*/
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 52e5f5f2240d..8f63efb2a2cc 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -159,4 +159,6 @@ extern unsigned int ptrs_per_p4d;
#define PGD_KERNEL_START ((PAGE_SIZE / 2) / sizeof(pgd_t))
+#define ARCH_PAGE_TABLE_SYNC_MASK (pgtable_l5_enabled() ? PGTBL_PGD_MODIFIED : PGTBL_P4D_MODIFIED)
+
#endif /* _ASM_X86_PGTABLE_64_DEFS_H */
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index b6606fe6cfdf..2e7c442cc618 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -194,7 +194,6 @@ enum page_cache_mode {
#define _PAGE_TABLE_NOENC (__PP|__RW|_USR|___A| 0|___D| 0| 0)
#define _PAGE_TABLE (__PP|__RW|_USR|___A| 0|___D| 0| 0| _ENC)
#define __PAGE_KERNEL_RO (__PP| 0| 0|___A|__NX|___D| 0|___G)
-#define __PAGE_KERNEL_RX (__PP| 0| 0|___A| 0|___D| 0|___G)
#define __PAGE_KERNEL_NOCACHE (__PP|__RW| 0|___A|__NX|___D| 0|___G| __NC)
#define __PAGE_KERNEL_VVAR (__PP| 0|_USR|___A|__NX|___D| 0|___G)
#define __PAGE_KERNEL_LARGE (__PP|__RW| 0|___A|__NX|___D|_PSE|___G)
@@ -220,7 +219,6 @@ enum page_cache_mode {
#define PAGE_KERNEL_RO __pgprot_mask(__PAGE_KERNEL_RO | _ENC)
#define PAGE_KERNEL_EXEC __pgprot_mask(__PAGE_KERNEL_EXEC | _ENC)
#define PAGE_KERNEL_EXEC_NOENC __pgprot_mask(__PAGE_KERNEL_EXEC | 0)
-#define PAGE_KERNEL_RX __pgprot_mask(__PAGE_KERNEL_RX | _ENC)
#define PAGE_KERNEL_NOCACHE __pgprot_mask(__PAGE_KERNEL_NOCACHE | _ENC)
#define PAGE_KERNEL_LARGE __pgprot_mask(__PAGE_KERNEL_LARGE | _ENC)
#define PAGE_KERNEL_LARGE_EXEC __pgprot_mask(__PAGE_KERNEL_LARGE_EXEC | _ENC)
@@ -284,6 +282,12 @@ typedef struct pgprot { pgprotval_t pgprot; } pgprot_t;
typedef struct { pgdval_t pgd; } pgd_t;
+static inline pgprot_t pgprot_nx(pgprot_t prot)
+{
+ return __pgprot(pgprot_val(prot) | _PAGE_NX);
+}
+#define pgprot_nx pgprot_nx
+
#ifdef CONFIG_X86_PAE
/*
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 0e059b73437b..9f69cc497f4b 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -12,27 +12,6 @@ struct task_struct *__switch_to_asm(struct task_struct *prev,
__visible struct task_struct *__switch_to(struct task_struct *prev,
struct task_struct *next);
-/* This runs runs on the previous thread's stack. */
-static inline void prepare_switch_to(struct task_struct *next)
-{
-#ifdef CONFIG_VMAP_STACK
- /*
- * If we switch to a stack that has a top-level paging entry
- * that is not present in the current mm, the resulting #PF will
- * will be promoted to a double-fault and we'll panic. Probe
- * the new stack now so that vmalloc_fault can fix up the page
- * tables if needed. This can only happen if we use a stack
- * in vmap space.
- *
- * We assume that the stack is aligned so that it never spans
- * more than one top-level paging entry.
- *
- * To minimize cache pollution, just follow the stack pointer.
- */
- READ_ONCE(*(unsigned char *)next->thread.sp);
-#endif
-}
-
asmlinkage void ret_from_fork(void);
/*
@@ -67,8 +46,6 @@ struct fork_frame {
#define switch_to(prev, next, last) \
do { \
- prepare_switch_to(next); \
- \
((last) = __switch_to_asm((prev), (next))); \
} while (0)
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 12df3a4abfdd..6b32ab009c19 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -43,7 +43,7 @@ static int map_irq_stack(unsigned int cpu)
pages[i] = pfn_to_page(pa >> PAGE_SHIFT);
}
- va = vmap(pages, IRQ_STACK_SIZE / PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL);
+ va = vmap(pages, IRQ_STACK_SIZE / PAGE_SIZE, VM_MAP, PAGE_KERNEL);
if (!va)
return -ENOMEM;
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index e6d7894ad127..fd945ce78554 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -287,9 +287,9 @@ void __init setup_per_cpu_areas(void)
/*
* Sync back kernel address range again. We already did this in
* setup_arch(), but percpu data also needs to be available in
- * the smpboot asm. We can't reliably pick up percpu mappings
- * using vmalloc_fault(), because exception dispatch needs
- * percpu data.
+ * the smpboot asm and arch_sync_kernel_mappings() doesn't sync to
+ * swapper_pg_dir on 32-bit. The per-cpu mappings need to be available
+ * there too.
*
* FIXME: Can the later sync in setup_cpu_entry_areas() replace
* this call?
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 89f7f3aebd31..5573a97f1520 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -336,8 +336,7 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
/* Avoid using vmalloc for smaller buffers. */
size = npages * sizeof(struct page *);
if (size > PAGE_SIZE)
- pages = __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO,
- PAGE_KERNEL);
+ pages = __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
else
pages = kmalloc(size, GFP_KERNEL_ACCOUNT);
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 69309cd56fdf..ea9010113f69 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -249,10 +249,22 @@ static void note_wx(struct pg_state *st, unsigned long addr)
(void *)st->start_address);
}
-static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2)
+static void effective_prot(struct ptdump_state *pt_st, int level, u64 val)
{
- return (prot1 & prot2 & (_PAGE_USER | _PAGE_RW)) |
- ((prot1 | prot2) & _PAGE_NX);
+ struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
+ pgprotval_t prot = val & PTE_FLAGS_MASK;
+ pgprotval_t effective;
+
+ if (level > 0) {
+ pgprotval_t higher_prot = st->prot_levels[level - 1];
+
+ effective = (higher_prot & prot & (_PAGE_USER | _PAGE_RW)) |
+ ((higher_prot | prot) & _PAGE_NX);
+ } else {
+ effective = prot;
+ }
+
+ st->prot_levels[level] = effective;
}
/*
@@ -261,7 +273,7 @@ static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2)
* print what we collected so far.
*/
static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
- unsigned long val)
+ u64 val)
{
struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
pgprotval_t new_prot, new_eff;
@@ -270,16 +282,10 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
struct seq_file *m = st->seq;
new_prot = val & PTE_FLAGS_MASK;
-
- if (level > 0) {
- new_eff = effective_prot(st->prot_levels[level - 1],
- new_prot);
- } else {
- new_eff = new_prot;
- }
-
- if (level >= 0)
- st->prot_levels[level] = new_eff;
+ if (!val)
+ new_eff = 0;
+ else
+ new_eff = st->prot_levels[level];
/*
* If we have a "break" in the series, we need to flush the state that
@@ -374,6 +380,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m,
struct pg_state st = {
.ptdump = {
.note_page = note_page,
+ .effective_prot = effective_prot,
.range = ptdump_ranges
},
.level = -1,
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a51df516b87b..dffe8e4d3140 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -190,16 +190,13 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
return pmd_k;
}
-static void vmalloc_sync(void)
+void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
{
- unsigned long address;
-
- if (SHARED_KERNEL_PMD)
- return;
+ unsigned long addr;
- for (address = VMALLOC_START & PMD_MASK;
- address >= TASK_SIZE_MAX && address < VMALLOC_END;
- address += PMD_SIZE) {
+ for (addr = start & PMD_MASK;
+ addr >= TASK_SIZE_MAX && addr < VMALLOC_END;
+ addr += PMD_SIZE) {
struct page *page;
spin_lock(&pgd_lock);
@@ -210,61 +207,13 @@ static void vmalloc_sync(void)
pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
spin_lock(pgt_lock);
- vmalloc_sync_one(page_address(page), address);
+ vmalloc_sync_one(page_address(page), addr);
spin_unlock(pgt_lock);
}
spin_unlock(&pgd_lock);
}
}
-void vmalloc_sync_mappings(void)
-{
- vmalloc_sync();
-}
-
-void vmalloc_sync_unmappings(void)
-{
- vmalloc_sync();
-}
-
-/*
- * 32-bit:
- *
- * Handle a fault on the vmalloc or module mapping area
- */
-static noinline int vmalloc_fault(unsigned long address)
-{
- unsigned long pgd_paddr;
- pmd_t *pmd_k;
- pte_t *pte_k;
-
- /* Make sure we are in vmalloc area: */
- if (!(address >= VMALLOC_START && address < VMALLOC_END))
- return -1;
-
- /*
- * Synchronize this task's top level page-table
- * with the 'reference' page table.
- *
- * Do _not_ use "current" here. We might be inside
- * an interrupt in the middle of a task switch..
- */
- pgd_paddr = read_cr3_pa();
- pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
- if (!pmd_k)
- return -1;
-
- if (pmd_large(*pmd_k))
- return 0;
-
- pte_k = pte_offset_kernel(pmd_k, address);
- if (!pte_present(*pte_k))
- return -1;
-
- return 0;
-}
-NOKPROBE_SYMBOL(vmalloc_fault);
-
/*
* Did it hit the DOS screen memory VA from vm86 mode?
*/
@@ -329,96 +278,6 @@ out:
#else /* CONFIG_X86_64: */
-void vmalloc_sync_mappings(void)
-{
- /*
- * 64-bit mappings might allocate new p4d/pud pages
- * that need to be propagated to all tasks' PGDs.
- */
- sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END);
-}
-
-void vmalloc_sync_unmappings(void)
-{
- /*
- * Unmappings never allocate or free p4d/pud pages.
- * No work is required here.
- */
-}
-
-/*
- * 64-bit:
- *
- * Handle a fault on the vmalloc area
- */
-static noinline int vmalloc_fault(unsigned long address)
-{
- pgd_t *pgd, *pgd_k;
- p4d_t *p4d, *p4d_k;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
-
- /* Make sure we are in vmalloc area: */
- if (!(address >= VMALLOC_START && address < VMALLOC_END))
- return -1;
-
- /*
- * Copy kernel mappings over when needed. This can also
- * happen within a race in page table update. In the later
- * case just flush:
- */
- pgd = (pgd_t *)__va(read_cr3_pa()) + pgd_index(address);
- pgd_k = pgd_offset_k(address);
- if (pgd_none(*pgd_k))
- return -1;
-
- if (pgtable_l5_enabled()) {
- if (pgd_none(*pgd)) {
- set_pgd(pgd, *pgd_k);
- arch_flush_lazy_mmu_mode();
- } else {
- BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_k));
- }
- }
-
- /* With 4-level paging, copying happens on the p4d level. */
- p4d = p4d_offset(pgd, address);
- p4d_k = p4d_offset(pgd_k, address);
- if (p4d_none(*p4d_k))
- return -1;
-
- if (p4d_none(*p4d) && !pgtable_l5_enabled()) {
- set_p4d(p4d, *p4d_k);
- arch_flush_lazy_mmu_mode();
- } else {
- BUG_ON(p4d_pfn(*p4d) != p4d_pfn(*p4d_k));
- }
-
- BUILD_BUG_ON(CONFIG_PGTABLE_LEVELS < 4);
-
- pud = pud_offset(p4d, address);
- if (pud_none(*pud))
- return -1;
-
- if (pud_large(*pud))
- return 0;
-
- pmd = pmd_offset(pud, address);
- if (pmd_none(*pmd))
- return -1;
-
- if (pmd_large(*pmd))
- return 0;
-
- pte = pte_offset_kernel(pmd, address);
- if (!pte_present(*pte))
- return -1;
-
- return 0;
-}
-NOKPROBE_SYMBOL(vmalloc_fault);
-
#ifdef CONFIG_CPU_SUP_AMD
static const char errata93_warning[] =
KERN_ERR
@@ -1257,29 +1116,6 @@ do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code,
*/
WARN_ON_ONCE(hw_error_code & X86_PF_PK);
- /*
- * We can fault-in kernel-space virtual memory on-demand. The
- * 'reference' page table is init_mm.pgd.
- *
- * NOTE! We MUST NOT take any locks for this case. We may
- * be in an interrupt or a critical region, and should
- * only copy the information from the master page table,
- * nothing more.
- *
- * Before doing this on-demand faulting, ensure that the
- * fault is not any of the following:
- * 1. A fault on a PTE with a reserved bit set.
- * 2. A fault caused by a user-mode access. (Do not demand-
- * fault kernel memory due to user-mode accesses).
- * 3. A fault caused by a page-level protection violation.
- * (A demand fault would be on a non-present page which
- * would have X86_PF_PROT==0).
- */
- if (!(hw_error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) {
- if (vmalloc_fault(address) >= 0)
- return;
- }
-
/* Was the fault spurious, caused by lazy TLB invalidation? */
if (spurious_kernel_fault(hw_error_code, address))
return;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 8b5f73f5e207..96274a90c5ff 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -218,6 +218,11 @@ void sync_global_pgds(unsigned long start, unsigned long end)
sync_global_pgds_l4(start, end);
}
+void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
+{
+ sync_global_pgds(start, end);
+}
+
/*
* NOTE: This function is marked __ref because it calls __init function
* (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0.
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 843aa10a4cb6..da0fb17a1a36 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -448,13 +448,7 @@ static void __init pti_clone_user_shared(void)
* the sp1 and sp2 slots.
*
* This is done for all possible CPUs during boot to ensure
- * that it's propagated to all mms. If we were to add one of
- * these mappings during CPU hotplug, we would need to take
- * some measure to make sure that every mm that subsequently
- * ran on that CPU would have the relevant PGD entry in its
- * pagetables. The usual vmalloc_fault() mechanism would not
- * work for page faults taken in entry_SYSCALL_64 before RSP
- * is set up.
+ * that it's propagated to all mms.
*/
unsigned long va = (unsigned long)&per_cpu(cpu_tss_rw, cpu);
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 66f96f21a7b6..f3fe261e5936 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -161,34 +161,6 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
local_irq_restore(flags);
}
-static void sync_current_stack_to_mm(struct mm_struct *mm)
-{
- unsigned long sp = current_stack_pointer;
- pgd_t *pgd = pgd_offset(mm, sp);
-
- if (pgtable_l5_enabled()) {
- if (unlikely(pgd_none(*pgd))) {
- pgd_t *pgd_ref = pgd_offset_k(sp);
-
- set_pgd(pgd, *pgd_ref);
- }
- } else {
- /*
- * "pgd" is faked. The top level entries are "p4d"s, so sync
- * the p4d. This compiles to approximately the same code as
- * the 5-level case.
- */
- p4d_t *p4d = p4d_offset(pgd, sp);
-
- if (unlikely(p4d_none(*p4d))) {
- pgd_t *pgd_ref = pgd_offset_k(sp);
- p4d_t *p4d_ref = p4d_offset(pgd_ref, sp);
-
- set_p4d(p4d, *p4d_ref);
- }
- }
-}
-
static inline unsigned long mm_mangle_tif_spec_ib(struct task_struct *next)
{
unsigned long next_tif = task_thread_info(next)->flags;
@@ -377,15 +349,6 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
*/
cond_ibpb(tsk);
- if (IS_ENABLED(CONFIG_VMAP_STACK)) {
- /*
- * If our current stack is in vmalloc space and isn't
- * mapped in the new pgd, we'll double-fault. Forcibly
- * map it.
- */
- sync_current_stack_to_mm(next);
- }
-
/*
* Stop remote flushes for the previous mm.
* Skip kernel threads; we never send init_mm TLB flushing IPIs,