diff options
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/dump_pagetables.c | 27 | ||||
-rw-r--r-- | arch/x86/mm/extable.c | 4 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 6 | ||||
-rw-r--r-- | arch/x86/mm/gup.c | 65 | ||||
-rw-r--r-- | arch/x86/mm/hugetlbpage.c | 1 | ||||
-rw-r--r-- | arch/x86/mm/init.c | 41 | ||||
-rw-r--r-- | arch/x86/mm/init_32.c | 6 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 33 | ||||
-rw-r--r-- | arch/x86/mm/kasan_init_64.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/kaslr.c | 4 | ||||
-rw-r--r-- | arch/x86/mm/mmap.c | 3 | ||||
-rw-r--r-- | arch/x86/mm/mpx.c | 21 | ||||
-rw-r--r-- | arch/x86/mm/numa.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 15 | ||||
-rw-r--r-- | arch/x86/mm/pat.c | 7 | ||||
-rw-r--r-- | arch/x86/mm/pat_rbtree.c | 12 | ||||
-rw-r--r-- | arch/x86/mm/pgtable.c | 31 | ||||
-rw-r--r-- | arch/x86/mm/pkeys.c | 3 |
18 files changed, 190 insertions, 93 deletions
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index ea9c49adaa1f..58b5bee7ea27 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -15,8 +15,10 @@ #include <linux/debugfs.h> #include <linux/mm.h> #include <linux/init.h> +#include <linux/sched.h> #include <linux/seq_file.h> +#include <asm/kasan.h> #include <asm/pgtable.h> /* @@ -50,6 +52,10 @@ enum address_markers_idx { LOW_KERNEL_NR, VMALLOC_START_NR, VMEMMAP_START_NR, +#ifdef CONFIG_KASAN + KASAN_SHADOW_START_NR, + KASAN_SHADOW_END_NR, +#endif # ifdef CONFIG_X86_ESPFIX64 ESPFIX_START_NR, # endif @@ -75,6 +81,10 @@ static struct addr_marker address_markers[] = { { 0/* PAGE_OFFSET */, "Low Kernel Mapping" }, { 0/* VMALLOC_START */, "vmalloc() Area" }, { 0/* VMEMMAP_START */, "Vmemmap" }, +#ifdef CONFIG_KASAN + { KASAN_SHADOW_START, "KASAN shadow" }, + { KASAN_SHADOW_END, "KASAN shadow end" }, +#endif # ifdef CONFIG_X86_ESPFIX64 { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, # endif @@ -326,18 +336,31 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, #if PTRS_PER_PUD > 1 +/* + * This is an optimization for CONFIG_DEBUG_WX=y + CONFIG_KASAN=y + * KASAN fills page tables with the same values. Since there is no + * point in checking page table more than once we just skip repeated + * entries. This saves us dozens of seconds during boot. + */ +static bool pud_already_checked(pud_t *prev_pud, pud_t *pud, bool checkwx) +{ + return checkwx && prev_pud && (pud_val(*prev_pud) == pud_val(*pud)); +} + static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, unsigned long P) { int i; pud_t *start; pgprotval_t prot; + pud_t *prev_pud = NULL; start = (pud_t *) pgd_page_vaddr(addr); for (i = 0; i < PTRS_PER_PUD; i++) { st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); - if (!pud_none(*start)) { + if (!pud_none(*start) && + !pud_already_checked(prev_pud, start, st->check_wx)) { if (pud_large(*start) || !pud_present(*start)) { prot = pud_flags(*start); note_page(m, st, __pgprot(prot), 2); @@ -348,6 +371,7 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, } else note_page(m, st, __pgprot(0), 2); + prev_pud = start; start++; } } @@ -406,6 +430,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, } else note_page(m, &st, __pgprot(0), 1); + cond_resched(); start++; } diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index fcd06f7526de..35ea061010a1 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -1,5 +1,7 @@ #include <linux/extable.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> +#include <linux/sched/debug.h> + #include <asm/traps.h> #include <asm/kdebug.h> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 9f72ca3b2669..428e31763cb9 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -4,6 +4,7 @@ * Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar */ #include <linux/sched.h> /* test_thread_flag(), ... */ +#include <linux/sched/task_stack.h> /* task_stack_*(), ... */ #include <linux/kdebug.h> /* oops_begin/end, ... */ #include <linux/extable.h> /* search_exception_tables */ #include <linux/bootmem.h> /* max_low_pfn */ @@ -413,7 +414,7 @@ out: void vmalloc_sync_all(void) { - sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END, 0); + sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END); } /* @@ -679,8 +680,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, printk(KERN_CONT "paging request"); printk(KERN_CONT " at %p\n", (void *) address); - printk(KERN_ALERT "IP:"); - printk_address(regs->ip); + printk(KERN_ALERT "IP: %pS\n", (void *)regs->ip); dump_pagetable(address); } diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 0d4fb3ebbbac..1f3b6ef105cd 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -106,32 +106,35 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { struct dev_pagemap *pgmap = NULL; - int nr_start = *nr; - pte_t *ptep; + int nr_start = *nr, ret = 0; + pte_t *ptep, *ptem; - ptep = pte_offset_map(&pmd, addr); + /* + * Keep the original mapped PTE value (ptem) around since we + * might increment ptep off the end of the page when finishing + * our loop iteration. + */ + ptem = ptep = pte_offset_map(&pmd, addr); do { pte_t pte = gup_get_pte(ptep); struct page *page; /* Similar to the PMD case, NUMA hinting must take slow path */ - if (pte_protnone(pte)) { - pte_unmap(ptep); - return 0; - } + if (pte_protnone(pte)) + break; + + if (!pte_allows_gup(pte_val(pte), write)) + break; if (pte_devmap(pte)) { pgmap = get_dev_pagemap(pte_pfn(pte), pgmap); if (unlikely(!pgmap)) { undo_dev_pagemap(nr, nr_start, pages); - pte_unmap(ptep); - return 0; + break; } - } else if (!pte_allows_gup(pte_val(pte), write) || - pte_special(pte)) { - pte_unmap(ptep); - return 0; - } + } else if (pte_special(pte)) + break; + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); page = pte_page(pte); get_page(page); @@ -141,9 +144,11 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, (*nr)++; } while (ptep++, addr += PAGE_SIZE, addr != end); - pte_unmap(ptep - 1); + if (addr == end) + ret = 1; + pte_unmap(ptem); - return 1; + return ret; } static inline void get_head_page_multiple(struct page *page, int nr) @@ -154,14 +159,12 @@ static inline void get_head_page_multiple(struct page *page, int nr) SetPageReferenced(page); } -static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr, +static int __gup_device_huge(unsigned long pfn, unsigned long addr, unsigned long end, struct page **pages, int *nr) { int nr_start = *nr; - unsigned long pfn = pmd_pfn(pmd); struct dev_pagemap *pgmap = NULL; - pfn += (addr & ~PMD_MASK) >> PAGE_SHIFT; do { struct page *page = pfn_to_page(pfn); @@ -180,6 +183,24 @@ static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr, return 1; } +static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr, + unsigned long end, struct page **pages, int *nr) +{ + unsigned long fault_pfn; + + fault_pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); + return __gup_device_huge(fault_pfn, addr, end, pages, nr); +} + +static int __gup_device_huge_pud(pud_t pud, unsigned long addr, + unsigned long end, struct page **pages, int *nr) +{ + unsigned long fault_pfn; + + fault_pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); + return __gup_device_huge(fault_pfn, addr, end, pages, nr); +} + static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { @@ -251,9 +272,13 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr, if (!pte_allows_gup(pud_val(pud), write)) return 0; + + VM_BUG_ON(!pfn_valid(pud_pfn(pud))); + if (pud_devmap(pud)) + return __gup_device_huge_pud(pud, addr, end, pages, nr); + /* hugepages are never "special" */ VM_BUG_ON(pud_flags(pud) & _PAGE_SPECIAL); - VM_BUG_ON(!pfn_valid(pud_pfn(pud))); refs = 0; head = pud_page(pud); diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 2ae8584b44c7..c5066a260803 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -7,6 +7,7 @@ #include <linux/init.h> #include <linux/fs.h> #include <linux/mm.h> +#include <linux/sched/mm.h> #include <linux/hugetlb.h> #include <linux/pagemap.h> #include <linux/err.h> diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 22af912d66d2..889e7619a091 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -643,21 +643,40 @@ void __init init_mem_mapping(void) * devmem_is_allowed() checks to see if /dev/mem access to a certain address * is valid. The argument is a physical page number. * - * - * On x86, access has to be given to the first megabyte of ram because that area - * contains BIOS code and data regions used by X and dosemu and similar apps. - * Access has to be given to non-kernel-ram areas as well, these contain the PCI - * mmio resources as well as potential bios/acpi data regions. + * On x86, access has to be given to the first megabyte of RAM because that + * area traditionally contains BIOS code and data regions used by X, dosemu, + * and similar apps. Since they map the entire memory range, the whole range + * must be allowed (for mapping), but any areas that would otherwise be + * disallowed are flagged as being "zero filled" instead of rejected. + * Access has to be given to non-kernel-ram areas as well, these contain the + * PCI mmio resources as well as potential bios/acpi data regions. */ int devmem_is_allowed(unsigned long pagenr) { - if (pagenr < 256) - return 1; - if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) + if (page_is_ram(pagenr)) { + /* + * For disallowed memory regions in the low 1MB range, + * request that the page be shown as all zeros. + */ + if (pagenr < 256) + return 2; + + return 0; + } + + /* + * This must follow RAM test, since System RAM is considered a + * restricted resource under CONFIG_STRICT_IOMEM. + */ + if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) { + /* Low 1MB bypasses iomem restrictions. */ + if (pagenr < 256) + return 1; + return 0; - if (!page_is_ram(pagenr)) - return 1; - return 0; + } + + return 1; } void free_init_pages(char *what, unsigned long begin, unsigned long end) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index cf8059016ec8..2b4b53e6793f 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -34,7 +34,7 @@ #include <asm/asm.h> #include <asm/bios_ebda.h> #include <asm/processor.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/pgtable.h> #include <asm/dma.h> #include <asm/fixmap.h> @@ -864,9 +864,6 @@ static noinline int do_test_wp_bit(void) return flag; } -const int rodata_test_data = 0xC3; -EXPORT_SYMBOL_GPL(rodata_test_data); - int kernel_set_to_readonly __read_mostly; void set_kernel_text_rw(void) @@ -939,7 +936,6 @@ void mark_rodata_ro(void) set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", size >> 10); - rodata_test(); #ifdef CONFIG_CPA_DEBUG printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, start + size); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 14b9dd71d9e8..15173d37f399 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -36,7 +36,7 @@ #include <asm/processor.h> #include <asm/bios_ebda.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/dma.h> @@ -89,10 +89,10 @@ static int __init nonx32_setup(char *str) __setup("noexec32=", nonx32_setup); /* - * When memory was added/removed make sure all the processes MM have + * When memory was added make sure all the processes MM have * suitable PGD entries in the local PGD level page. */ -void sync_global_pgds(unsigned long start, unsigned long end, int removed) +void sync_global_pgds(unsigned long start, unsigned long end) { unsigned long address; @@ -100,12 +100,7 @@ void sync_global_pgds(unsigned long start, unsigned long end, int removed) const pgd_t *pgd_ref = pgd_offset_k(address); struct page *page; - /* - * When it is called after memory hot remove, pgd_none() - * returns true. In this case (removed == 1), we must clear - * the PGD entries in the local PGD level page. - */ - if (pgd_none(*pgd_ref) && !removed) + if (pgd_none(*pgd_ref)) continue; spin_lock(&pgd_lock); @@ -122,13 +117,8 @@ void sync_global_pgds(unsigned long start, unsigned long end, int removed) BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); - if (removed) { - if (pgd_none(*pgd_ref) && !pgd_none(*pgd)) - pgd_clear(pgd); - } else { - if (pgd_none(*pgd)) - set_pgd(pgd, *pgd_ref); - } + if (pgd_none(*pgd)) + set_pgd(pgd, *pgd_ref); spin_unlock(pgt_lock); } @@ -596,7 +586,7 @@ kernel_physical_mapping_init(unsigned long paddr_start, } if (pgd_changed) - sync_global_pgds(vaddr_start, vaddr_end - 1, 0); + sync_global_pgds(vaddr_start, vaddr_end - 1); __flush_tlb_all(); @@ -689,7 +679,7 @@ static void __meminit free_pagetable(struct page *page, int order) if (PageReserved(page)) { __ClearPageReserved(page); - magic = (unsigned long)page->lru.next; + magic = (unsigned long)page->freelist; if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) { while (nr_pages--) put_page_bootmem(page++); @@ -1010,9 +1000,6 @@ void __init mem_init(void) mem_init_print_info(NULL); } -const int rodata_test_data = 0xC3; -EXPORT_SYMBOL_GPL(rodata_test_data); - int kernel_set_to_readonly; void set_kernel_text_rw(void) @@ -1081,8 +1068,6 @@ void mark_rodata_ro(void) all_end = roundup((unsigned long)_brk_end, PMD_SIZE); set_memory_nx(text_end, (all_end - text_end) >> PAGE_SHIFT); - rodata_test(); - #ifdef CONFIG_CPA_DEBUG printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, end); set_memory_rw(start, (end-start) >> PAGE_SHIFT); @@ -1239,7 +1224,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) } else err = vmemmap_populate_basepages(start, end, node); if (!err) - sync_global_pgds(start, end - 1, 0); + sync_global_pgds(start, end - 1); return err; } diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 0493c17b8a51..4c90cfdc128b 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -1,9 +1,11 @@ +#define DISABLE_BRANCH_PROFILING #define pr_fmt(fmt) "kasan: " fmt #include <linux/bootmem.h> #include <linux/kasan.h> #include <linux/kdebug.h> #include <linux/mm.h> #include <linux/sched.h> +#include <linux/sched/task.h> #include <linux/vmalloc.h> #include <asm/tlbflush.h> diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index 887e57182716..aed206475aa7 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -48,7 +48,7 @@ static const unsigned long vaddr_start = __PAGE_OFFSET_BASE; #if defined(CONFIG_X86_ESPFIX64) static const unsigned long vaddr_end = ESPFIX_BASE_ADDR; #elif defined(CONFIG_EFI) -static const unsigned long vaddr_end = EFI_VA_START; +static const unsigned long vaddr_end = EFI_VA_END; #else static const unsigned long vaddr_end = __START_KERNEL_map; #endif @@ -105,7 +105,7 @@ void __init kernel_randomize_memory(void) */ BUILD_BUG_ON(vaddr_start >= vaddr_end); BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_ESPFIX64) && - vaddr_end >= EFI_VA_START); + vaddr_end >= EFI_VA_END); BUILD_BUG_ON((IS_ENABLED(CONFIG_X86_ESPFIX64) || IS_ENABLED(CONFIG_EFI)) && vaddr_end >= __START_KERNEL_map); diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index d2dc0438d654..7940166c799b 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -28,7 +28,8 @@ #include <linux/mm.h> #include <linux/random.h> #include <linux/limits.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> +#include <linux/sched/mm.h> #include <asm/elf.h> struct va_alignment __read_mostly va_align = { diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index e4f800999b32..cd44ae727df7 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -7,6 +7,7 @@ */ #include <linux/kernel.h> #include <linux/slab.h> +#include <linux/mm_types.h> #include <linux/syscalls.h> #include <linux/sched/sysctl.h> @@ -51,7 +52,7 @@ static unsigned long mpx_mmap(unsigned long len) down_write(&mm->mmap_sem); addr = do_mmap(NULL, 0, len, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE, VM_MPX, 0, &populate); + MAP_ANONYMOUS | MAP_PRIVATE, VM_MPX, 0, &populate, NULL); up_write(&mm->mmap_sem); if (populate) mm_populate(addr, populate); @@ -293,7 +294,7 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs) * We were not able to extract an address from the instruction, * probably because there was something invalid in it. */ - if (info->si_addr == (void *)-1) { + if (info->si_addr == (void __user *)-1) { err = -EINVAL; goto err_out; } @@ -350,12 +351,12 @@ int mpx_enable_management(void) * The copy_xregs_to_kernel() beneath get_xsave_field_ptr() is * expected to be relatively expensive. Storing the bounds * directory here means that we do not have to do xsave in the - * unmap path; we can just use mm->bd_addr instead. + * unmap path; we can just use mm->context.bd_addr instead. */ bd_base = mpx_get_bounds_dir(); down_write(&mm->mmap_sem); - mm->bd_addr = bd_base; - if (mm->bd_addr == MPX_INVALID_BOUNDS_DIR) + mm->context.bd_addr = bd_base; + if (mm->context.bd_addr == MPX_INVALID_BOUNDS_DIR) ret = -ENXIO; up_write(&mm->mmap_sem); @@ -370,7 +371,7 @@ int mpx_disable_management(void) return -ENXIO; down_write(&mm->mmap_sem); - mm->bd_addr = MPX_INVALID_BOUNDS_DIR; + mm->context.bd_addr = MPX_INVALID_BOUNDS_DIR; up_write(&mm->mmap_sem); return 0; } @@ -589,7 +590,7 @@ static unsigned long mpx_bd_entry_to_bt_addr(struct mm_struct *mm, * we might run off the end of the bounds table if we are on * a 64-bit kernel and try to get 8 bytes. */ -int get_user_bd_entry(struct mm_struct *mm, unsigned long *bd_entry_ret, +static int get_user_bd_entry(struct mm_struct *mm, unsigned long *bd_entry_ret, long __user *bd_entry_ptr) { u32 bd_entry_32; @@ -796,7 +797,7 @@ static noinline int zap_bt_entries_mapping(struct mm_struct *mm, return -EINVAL; len = min(vma->vm_end, end) - addr; - zap_page_range(vma, addr, len, NULL); + zap_page_range(vma, addr, len); trace_mpx_unmap_zap(addr, addr+len); vma = vma->vm_next; @@ -893,7 +894,7 @@ static int unmap_entire_bt(struct mm_struct *mm, * avoid recursion, do_munmap() will check whether it comes * from one bounds table through VM_MPX flag. */ - return do_munmap(mm, bt_addr, mpx_bt_size_bytes(mm)); + return do_munmap(mm, bt_addr, mpx_bt_size_bytes(mm), NULL); } static int try_unmap_single_bt(struct mm_struct *mm, @@ -947,7 +948,7 @@ static int try_unmap_single_bt(struct mm_struct *mm, end = bta_end_vaddr; } - bde_vaddr = mm->bd_addr + mpx_get_bd_entry_offset(mm, start); + bde_vaddr = mm->context.bd_addr + mpx_get_bd_entry_offset(mm, start); ret = get_bt_addr(mm, bde_vaddr, &bt_addr); /* * No bounds table there, so nothing to unmap. diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 3f35b48d1d9d..12dcad7297a5 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -19,7 +19,7 @@ #include "numa_internal.h" -int __initdata numa_off; +int numa_off; nodemask_t numa_nodes_parsed __initdata; struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index e3353c97d086..28d42130243c 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -20,7 +20,7 @@ #include <asm/tlbflush.h> #include <asm/sections.h> #include <asm/setup.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/pgalloc.h> #include <asm/proto.h> #include <asm/pat.h> @@ -214,7 +214,20 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache, int in_flags, struct page **pages) { unsigned int i, level; +#ifdef CONFIG_PREEMPT + /* + * Avoid wbinvd() because it causes latencies on all CPUs, + * regardless of any CPU isolation that may be in effect. + * + * This should be extended for CAT enabled systems independent of + * PREEMPT because wbinvd() does not respect the CAT partitions and + * this is exposed to unpriviledged users through the graphics + * subsystem. + */ + unsigned long do_wbinvd = 0; +#else unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */ +#endif BUG_ON(irqs_disabled()); diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 83e701f160a9..efc32bc6862b 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -986,20 +986,17 @@ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, return 0; } -int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, - pfn_t pfn) +void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, pfn_t pfn) { enum page_cache_mode pcm; if (!pat_enabled()) - return 0; + return; /* Set prot based on lookup */ pcm = lookup_memtype(pfn_t_to_phys(pfn)); *prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) | cachemode2protval(pcm)); - - return 0; } /* diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c index 159b52ccd600..d76485b22824 100644 --- a/arch/x86/mm/pat_rbtree.c +++ b/arch/x86/mm/pat_rbtree.c @@ -47,7 +47,7 @@ static u64 get_subtree_max_end(struct rb_node *node) { u64 ret = 0; if (node) { - struct memtype *data = container_of(node, struct memtype, rb); + struct memtype *data = rb_entry(node, struct memtype, rb); ret = data->subtree_max_end; } return ret; @@ -79,7 +79,7 @@ static struct memtype *memtype_rb_lowest_match(struct rb_root *root, struct memtype *last_lower = NULL; while (node) { - struct memtype *data = container_of(node, struct memtype, rb); + struct memtype *data = rb_entry(node, struct memtype, rb); if (get_subtree_max_end(node->rb_left) > start) { /* Lowest overlap if any must be on left side */ @@ -121,7 +121,7 @@ static struct memtype *memtype_rb_match(struct rb_root *root, node = rb_next(&match->rb); if (node) - match = container_of(node, struct memtype, rb); + match = rb_entry(node, struct memtype, rb); else match = NULL; } @@ -150,7 +150,7 @@ static int memtype_rb_check_conflict(struct rb_root *root, node = rb_next(&match->rb); while (node) { - match = container_of(node, struct memtype, rb); + match = rb_entry(node, struct memtype, rb); if (match->start >= end) /* Checked all possible matches */ goto success; @@ -181,7 +181,7 @@ static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata) struct rb_node *parent = NULL; while (*node) { - struct memtype *data = container_of(*node, struct memtype, rb); + struct memtype *data = rb_entry(*node, struct memtype, rb); parent = *node; if (data->subtree_max_end < newdata->end) @@ -270,7 +270,7 @@ int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos) } if (node) { /* pos == i */ - struct memtype *this = container_of(node, struct memtype, rb); + struct memtype *this = rb_entry(node, struct memtype, rb); *out = *this; return 0; } else { diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 3feec5af4e67..6cbdff26bb96 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -445,6 +445,26 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, return changed; } + +int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address, + pud_t *pudp, pud_t entry, int dirty) +{ + int changed = !pud_same(*pudp, entry); + + VM_BUG_ON(address & ~HPAGE_PUD_MASK); + + if (changed && dirty) { + *pudp = entry; + /* + * We had a write-protection fault here and changed the pud + * to to more permissive. No need to flush the TLB for that, + * #PF is architecturally guaranteed to do that and in the + * worst-case we'll generate a spurious fault. + */ + } + + return changed; +} #endif int ptep_test_and_clear_young(struct vm_area_struct *vma, @@ -474,6 +494,17 @@ int pmdp_test_and_clear_young(struct vm_area_struct *vma, return ret; } +int pudp_test_and_clear_young(struct vm_area_struct *vma, + unsigned long addr, pud_t *pudp) +{ + int ret = 0; + + if (pud_young(*pudp)) + ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, + (unsigned long *)pudp); + + return ret; +} #endif int ptep_clear_flush_young(struct vm_area_struct *vma, diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c index f88ce0e5efd9..2dab69a706ec 100644 --- a/arch/x86/mm/pkeys.c +++ b/arch/x86/mm/pkeys.c @@ -141,8 +141,7 @@ u32 init_pkru_value = PKRU_AD_KEY( 1) | PKRU_AD_KEY( 2) | PKRU_AD_KEY( 3) | * Called from the FPU code when creating a fresh set of FPU * registers. This is called from a very specific context where * we know the FPU regstiers are safe for use and we can use PKRU - * directly. The fact that PKRU is only available when we are - * using eagerfpu mode makes this possible. + * directly. */ void copy_init_pkru_to_fpregs(void) { |