diff options
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/hugetlbpage.c | 101 | ||||
-rw-r--r-- | arch/x86/mm/init.c | 60 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 30 | ||||
-rw-r--r-- | arch/x86/mm/ioremap.c | 6 | ||||
-rw-r--r-- | arch/x86/mm/kaslr.c | 16 | ||||
-rw-r--r-- | arch/x86/mm/mem_encrypt_amd.c | 77 | ||||
-rw-r--r-- | arch/x86/mm/mem_encrypt_identity.c | 11 | ||||
-rw-r--r-- | arch/x86/mm/mmap.c | 5 | ||||
-rw-r--r-- | arch/x86/mm/pat/set_memory.c | 8 | ||||
-rw-r--r-- | arch/x86/mm/tlb.c | 2 |
10 files changed, 151 insertions, 165 deletions
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 807a5859a3c4..58f7f2bd535d 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -19,107 +19,6 @@ #include <asm/tlbflush.h> #include <asm/elf.h> -#ifdef CONFIG_HUGETLB_PAGE -static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, - unsigned long addr, unsigned long len, - unsigned long pgoff, unsigned long flags) -{ - struct hstate *h = hstate_file(file); - struct vm_unmapped_area_info info = {}; - - info.length = len; - info.low_limit = get_mmap_base(1); - - /* - * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area - * in the full address space. - */ - info.high_limit = in_32bit_syscall() ? - task_size_32bit() : task_size_64bit(addr > DEFAULT_MAP_WINDOW); - - info.align_mask = PAGE_MASK & ~huge_page_mask(h); - return vm_unmapped_area(&info); -} - -static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, - unsigned long addr, unsigned long len, - unsigned long pgoff, unsigned long flags) -{ - struct hstate *h = hstate_file(file); - struct vm_unmapped_area_info info = {}; - - info.flags = VM_UNMAPPED_AREA_TOPDOWN; - info.length = len; - info.low_limit = PAGE_SIZE; - info.high_limit = get_mmap_base(0); - - /* - * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area - * in the full address space. - */ - if (addr > DEFAULT_MAP_WINDOW && !in_32bit_syscall()) - info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW; - - info.align_mask = PAGE_MASK & ~huge_page_mask(h); - addr = vm_unmapped_area(&info); - - /* - * A failed mmap() very likely causes application failure, - * so fall back to the bottom-up function here. This scenario - * can happen with large stack limits and large mmap() - * allocations. - */ - if (addr & ~PAGE_MASK) { - VM_BUG_ON(addr != -ENOMEM); - info.flags = 0; - info.low_limit = TASK_UNMAPPED_BASE; - info.high_limit = TASK_SIZE_LOW; - addr = vm_unmapped_area(&info); - } - - return addr; -} - -unsigned long -hugetlb_get_unmapped_area(struct file *file, unsigned long addr, - unsigned long len, unsigned long pgoff, unsigned long flags) -{ - struct hstate *h = hstate_file(file); - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - - if (len & ~huge_page_mask(h)) - return -EINVAL; - - if (len > TASK_SIZE) - return -ENOMEM; - - /* No address checking. See comment at mmap_address_hint_valid() */ - if (flags & MAP_FIXED) { - if (prepare_hugepage_range(file, addr, len)) - return -EINVAL; - return addr; - } - - if (addr) { - addr &= huge_page_mask(h); - if (!mmap_address_hint_valid(addr, len)) - goto get_unmapped_area; - - vma = find_vma(mm, addr); - if (!vma || addr + len <= vm_start_gap(vma)) - return addr; - } - -get_unmapped_area: - if (!test_bit(MMF_TOPDOWN, &mm->flags)) - return hugetlb_get_unmapped_area_bottomup(file, addr, len, - pgoff, flags); - else - return hugetlb_get_unmapped_area_topdown(file, addr, len, - pgoff, flags); -} -#endif /* CONFIG_HUGETLB_PAGE */ #ifdef CONFIG_X86_64 bool __init arch_hugetlb_valid_size(unsigned long size) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index eb503f53c319..c6d29f283001 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -263,28 +263,33 @@ static void __init probe_page_size_mask(void) } /* - * INVLPG may not properly flush Global entries - * on these CPUs when PCIDs are enabled. + * INVLPG may not properly flush Global entries on + * these CPUs. New microcode fixes the issue. */ static const struct x86_cpu_id invlpg_miss_ids[] = { - X86_MATCH_VFM(INTEL_ALDERLAKE, 0), - X86_MATCH_VFM(INTEL_ALDERLAKE_L, 0), - X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, 0), - X86_MATCH_VFM(INTEL_RAPTORLAKE, 0), - X86_MATCH_VFM(INTEL_RAPTORLAKE_P, 0), - X86_MATCH_VFM(INTEL_RAPTORLAKE_S, 0), + X86_MATCH_VFM(INTEL_ALDERLAKE, 0x2e), + X86_MATCH_VFM(INTEL_ALDERLAKE_L, 0x42c), + X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, 0x11), + X86_MATCH_VFM(INTEL_RAPTORLAKE, 0x118), + X86_MATCH_VFM(INTEL_RAPTORLAKE_P, 0x4117), + X86_MATCH_VFM(INTEL_RAPTORLAKE_S, 0x2e), {} }; static void setup_pcid(void) { + const struct x86_cpu_id *invlpg_miss_match; + if (!IS_ENABLED(CONFIG_X86_64)) return; if (!boot_cpu_has(X86_FEATURE_PCID)) return; - if (x86_match_cpu(invlpg_miss_ids)) { + invlpg_miss_match = x86_match_cpu(invlpg_miss_ids); + + if (invlpg_miss_match && + boot_cpu_data.microcode < invlpg_miss_match->driver_data) { pr_info("Incomplete global flushes, disabling PCID"); setup_clear_cpu_cap(X86_FEATURE_PCID); return; @@ -1053,18 +1058,53 @@ unsigned long arch_max_swapfile_size(void) #ifdef CONFIG_EXECMEM static struct execmem_info execmem_info __ro_after_init; +#ifdef CONFIG_ARCH_HAS_EXECMEM_ROX +void execmem_fill_trapping_insns(void *ptr, size_t size, bool writeable) +{ + /* fill memory with INT3 instructions */ + if (writeable) + memset(ptr, INT3_INSN_OPCODE, size); + else + text_poke_set(ptr, INT3_INSN_OPCODE, size); +} +#endif + struct execmem_info __init *execmem_arch_setup(void) { unsigned long start, offset = 0; + enum execmem_range_flags flags; + pgprot_t pgprot; if (kaslr_enabled()) offset = get_random_u32_inclusive(1, 1024) * PAGE_SIZE; start = MODULES_VADDR + offset; + if (IS_ENABLED(CONFIG_ARCH_HAS_EXECMEM_ROX)) { + pgprot = PAGE_KERNEL_ROX; + flags = EXECMEM_KASAN_SHADOW | EXECMEM_ROX_CACHE; + } else { + pgprot = PAGE_KERNEL; + flags = EXECMEM_KASAN_SHADOW; + } + execmem_info = (struct execmem_info){ .ranges = { - [EXECMEM_DEFAULT] = { + [EXECMEM_MODULE_TEXT] = { + .flags = flags, + .start = start, + .end = MODULES_END, + .pgprot = pgprot, + .alignment = MODULE_ALIGN, + }, + [EXECMEM_KPROBES ... EXECMEM_BPF] = { + .flags = EXECMEM_KASAN_SHADOW, + .start = start, + .end = MODULES_END, + .pgprot = PAGE_KERNEL, + .alignment = MODULE_ALIGN, + }, + [EXECMEM_MODULE_DATA] = { .flags = EXECMEM_KASAN_SHADOW, .start = start, .end = MODULES_END, diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index ff253648706f..01ea7c6df303 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -961,7 +961,7 @@ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, unsigned long end = ((start_pfn + nr_pages) << PAGE_SHIFT) - 1; int ret; - if (WARN_ON_ONCE(end > PHYSMEM_END)) + if (WARN_ON_ONCE(end > DIRECT_MAP_PHYSMEM_END)) return -ERANGE; ret = __add_pages(nid, start_pfn, nr_pages, params); @@ -985,22 +985,32 @@ int arch_add_memory(int nid, u64 start, u64 size, return add_pages(nid, start_pfn, nr_pages, params); } -static void __meminit free_pagetable(struct page *page, int order) +static void free_reserved_pages(struct page *page, unsigned long nr_pages) { - unsigned long magic; - unsigned int nr_pages = 1 << order; + while (nr_pages--) + free_reserved_page(page++); +} +static void __meminit free_pagetable(struct page *page, int order) +{ /* bootmem page has reserved flag */ if (PageReserved(page)) { - magic = page->index; - if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) { + unsigned long nr_pages = 1 << order; +#ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE + enum bootmem_type type = bootmem_type(page); + + if (type == SECTION_INFO || type == MIX_SECTION_INFO) { while (nr_pages--) put_page_bootmem(page++); - } else - while (nr_pages--) - free_reserved_page(page++); - } else + } else { + free_reserved_pages(page, nr_pages); + } +#else + free_reserved_pages(page, nr_pages); +#endif + } else { free_pages((unsigned long)page_address(page), order); + } } static void __meminit free_hugepage_table(struct page *page, diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 70b02fc61d93..8d29163568a7 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -656,7 +656,8 @@ static bool memremap_is_setup_data(resource_size_t phys_addr, paddr_next = data->next; len = data->len; - if ((phys_addr > paddr) && (phys_addr < (paddr + len))) { + if ((phys_addr > paddr) && + (phys_addr < (paddr + sizeof(struct setup_data) + len))) { memunmap(data); return true; } @@ -718,7 +719,8 @@ static bool __init early_memremap_is_setup_data(resource_size_t phys_addr, paddr_next = data->next; len = data->len; - if ((phys_addr > paddr) && (phys_addr < (paddr + len))) { + if ((phys_addr > paddr) && + (phys_addr < (paddr + sizeof(struct setup_data) + len))) { early_memunmap(data, sizeof(*data)); return true; } diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index 230f1dee4f09..11a93542d198 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -22,7 +22,7 @@ #include <linux/kernel.h> #include <linux/init.h> -#include <linux/random.h> +#include <linux/prandom.h> #include <linux/memblock.h> #include <linux/pgtable.h> @@ -52,7 +52,7 @@ static __initdata struct kaslr_memory_region { } kaslr_regions[] = { { .base = &page_offset_base, - .end = &physmem_end, + .end = &direct_map_physmem_end, }, { .base = &vmalloc_base, @@ -62,8 +62,12 @@ static __initdata struct kaslr_memory_region { }, }; -/* The end of the possible address space for physical memory */ -unsigned long physmem_end __ro_after_init; +/* + * The end of the physical address space that can be mapped directly by the + * kernel. This starts out at (1<<MAX_PHYSMEM_BITS) - 1), but KASLR may reduce + * that in order to increase the available entropy for mapping other regions. + */ +unsigned long direct_map_physmem_end __ro_after_init; /* Get size in bytes used by the memory region */ static inline unsigned long get_padding(struct kaslr_memory_region *region) @@ -94,7 +98,7 @@ void __init kernel_randomize_memory(void) BUILD_BUG_ON(vaddr_end > __START_KERNEL_map); /* Preset the end of the possible address space for physical memory */ - physmem_end = ((1ULL << MAX_PHYSMEM_BITS) - 1); + direct_map_physmem_end = ((1ULL << MAX_PHYSMEM_BITS) - 1); if (!kaslr_memory_enabled()) return; @@ -145,7 +149,7 @@ void __init kernel_randomize_memory(void) vaddr += get_padding(&kaslr_regions[i]); /* * KASLR trims the maximum possible size of the - * direct-map. Update the physmem_end boundary. + * direct-map. Update the direct_map_physmem_end boundary. * No rounding required as the region starts * PUD aligned and size is in units of TB. */ diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index 86a476a426c2..774f9677458f 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -311,59 +311,82 @@ static int amd_enc_status_change_finish(unsigned long vaddr, int npages, bool en return 0; } -static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) +int prepare_pte_enc(struct pte_enc_desc *d) { - pgprot_t old_prot, new_prot; - unsigned long pfn, pa, size; - pte_t new_pte; + pgprot_t old_prot; - pfn = pg_level_to_pfn(level, kpte, &old_prot); - if (!pfn) - return; + d->pfn = pg_level_to_pfn(d->pte_level, d->kpte, &old_prot); + if (!d->pfn) + return 1; - new_prot = old_prot; - if (enc) - pgprot_val(new_prot) |= _PAGE_ENC; + d->new_pgprot = old_prot; + if (d->encrypt) + pgprot_val(d->new_pgprot) |= _PAGE_ENC; else - pgprot_val(new_prot) &= ~_PAGE_ENC; + pgprot_val(d->new_pgprot) &= ~_PAGE_ENC; /* If prot is same then do nothing. */ - if (pgprot_val(old_prot) == pgprot_val(new_prot)) - return; + if (pgprot_val(old_prot) == pgprot_val(d->new_pgprot)) + return 1; - pa = pfn << PAGE_SHIFT; - size = page_level_size(level); + d->pa = d->pfn << PAGE_SHIFT; + d->size = page_level_size(d->pte_level); /* - * We are going to perform in-place en-/decryption and change the - * physical page attribute from C=1 to C=0 or vice versa. Flush the - * caches to ensure that data gets accessed with the correct C-bit. + * In-place en-/decryption and physical page attribute change + * from C=1 to C=0 or vice versa will be performed. Flush the + * caches to ensure that data gets accessed with the correct + * C-bit. */ - clflush_cache_range(__va(pa), size); + if (d->va) + clflush_cache_range(d->va, d->size); + else + clflush_cache_range(__va(d->pa), d->size); + + return 0; +} + +void set_pte_enc_mask(pte_t *kpte, unsigned long pfn, pgprot_t new_prot) +{ + pte_t new_pte; + + /* Change the page encryption mask. */ + new_pte = pfn_pte(pfn, new_prot); + set_pte_atomic(kpte, new_pte); +} + +static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) +{ + struct pte_enc_desc d = { + .kpte = kpte, + .pte_level = level, + .encrypt = enc + }; + + if (prepare_pte_enc(&d)) + return; /* Encrypt/decrypt the contents in-place */ if (enc) { - sme_early_encrypt(pa, size); + sme_early_encrypt(d.pa, d.size); } else { - sme_early_decrypt(pa, size); + sme_early_decrypt(d.pa, d.size); /* * ON SNP, the page state in the RMP table must happen * before the page table updates. */ - early_snp_set_memory_shared((unsigned long)__va(pa), pa, 1); + early_snp_set_memory_shared((unsigned long)__va(d.pa), d.pa, 1); } - /* Change the page encryption mask. */ - new_pte = pfn_pte(pfn, new_prot); - set_pte_atomic(kpte, new_pte); + set_pte_enc_mask(kpte, d.pfn, d.new_pgprot); /* * If page is set encrypted in the page table, then update the RMP table to * add this page as private. */ if (enc) - early_snp_set_memory_private((unsigned long)__va(pa), pa, 1); + early_snp_set_memory_private((unsigned long)__va(d.pa), d.pa, 1); } static int __init early_set_memory_enc_dec(unsigned long vaddr, @@ -467,6 +490,8 @@ void __init sme_early_init(void) x86_platform.guest.enc_status_change_finish = amd_enc_status_change_finish; x86_platform.guest.enc_tlb_flush_required = amd_enc_tlb_flush_required; x86_platform.guest.enc_cache_flush_required = amd_enc_cache_flush_required; + x86_platform.guest.enc_kexec_begin = snp_kexec_begin; + x86_platform.guest.enc_kexec_finish = snp_kexec_finish; /* * AMD-SEV-ES intercepts the RDMSR to read the X2APIC ID in the diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index ac33b2263a43..e6c7686f443a 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -495,10 +495,10 @@ void __head sme_enable(struct boot_params *bp) unsigned int eax, ebx, ecx, edx; unsigned long feature_mask; unsigned long me_mask; - bool snp; + bool snp_en; u64 msr; - snp = snp_init(bp); + snp_en = snp_init(bp); /* Check for the SME/SEV support leaf */ eax = 0x80000000; @@ -531,8 +531,11 @@ void __head sme_enable(struct boot_params *bp) RIP_REL_REF(sev_status) = msr = __rdmsr(MSR_AMD64_SEV); feature_mask = (msr & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT; - /* The SEV-SNP CC blob should never be present unless SEV-SNP is enabled. */ - if (snp && !(msr & MSR_AMD64_SEV_SNP_ENABLED)) + /* + * Any discrepancies between the presence of a CC blob and SNP + * enablement abort the guest. + */ + if (snp_en ^ !!(msr & MSR_AMD64_SEV_SNP_ENABLED)) snp_abort(); /* Check if memory encryption is enabled */ diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index a2cabb1c81e1..b8a6ffffb451 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -163,11 +163,6 @@ unsigned long get_mmap_base(int is_legacy) return is_legacy ? mm->mmap_legacy_base : mm->mmap_base; } -const char *arch_vma_name(struct vm_area_struct *vma) -{ - return NULL; -} - /** * mmap_address_hint_valid - Validate the address hint of mmap * @addr: Address hint diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 44f7b2ea6a07..069e421c2247 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -2444,6 +2444,14 @@ int set_direct_map_default_noflush(struct page *page) return __set_pages_p(page, 1); } +int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid) +{ + if (valid) + return __set_pages_p(page, nr); + + return __set_pages_np(page, nr); +} + #ifdef CONFIG_DEBUG_PAGEALLOC void __kernel_map_pages(struct page *page, int numpages, int enable) { diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 86593d1b787d..b0d5a644fc84 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -568,7 +568,7 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, * mm_cpumask. The TLB shootdown code can figure out from * cpu_tlbstate_shared.is_lazy whether or not to send an IPI. */ - if (WARN_ON_ONCE(prev != &init_mm && + if (IS_ENABLED(CONFIG_DEBUG_VM) && WARN_ON_ONCE(prev != &init_mm && !cpumask_test_cpu(cpu, mm_cpumask(next)))) cpumask_set_cpu(cpu, mm_cpumask(next)); |