summaryrefslogtreecommitdiff
path: root/arch/x86/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/hugetlbpage.c101
-rw-r--r--arch/x86/mm/init.c60
-rw-r--r--arch/x86/mm/init_64.c30
-rw-r--r--arch/x86/mm/ioremap.c6
-rw-r--r--arch/x86/mm/kaslr.c16
-rw-r--r--arch/x86/mm/mem_encrypt_amd.c77
-rw-r--r--arch/x86/mm/mem_encrypt_identity.c11
-rw-r--r--arch/x86/mm/mmap.c5
-rw-r--r--arch/x86/mm/pat/set_memory.c8
-rw-r--r--arch/x86/mm/tlb.c2
10 files changed, 151 insertions, 165 deletions
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 807a5859a3c4..58f7f2bd535d 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -19,107 +19,6 @@
#include <asm/tlbflush.h>
#include <asm/elf.h>
-#ifdef CONFIG_HUGETLB_PAGE
-static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
- unsigned long addr, unsigned long len,
- unsigned long pgoff, unsigned long flags)
-{
- struct hstate *h = hstate_file(file);
- struct vm_unmapped_area_info info = {};
-
- info.length = len;
- info.low_limit = get_mmap_base(1);
-
- /*
- * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
- * in the full address space.
- */
- info.high_limit = in_32bit_syscall() ?
- task_size_32bit() : task_size_64bit(addr > DEFAULT_MAP_WINDOW);
-
- info.align_mask = PAGE_MASK & ~huge_page_mask(h);
- return vm_unmapped_area(&info);
-}
-
-static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
- unsigned long addr, unsigned long len,
- unsigned long pgoff, unsigned long flags)
-{
- struct hstate *h = hstate_file(file);
- struct vm_unmapped_area_info info = {};
-
- info.flags = VM_UNMAPPED_AREA_TOPDOWN;
- info.length = len;
- info.low_limit = PAGE_SIZE;
- info.high_limit = get_mmap_base(0);
-
- /*
- * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
- * in the full address space.
- */
- if (addr > DEFAULT_MAP_WINDOW && !in_32bit_syscall())
- info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW;
-
- info.align_mask = PAGE_MASK & ~huge_page_mask(h);
- addr = vm_unmapped_area(&info);
-
- /*
- * A failed mmap() very likely causes application failure,
- * so fall back to the bottom-up function here. This scenario
- * can happen with large stack limits and large mmap()
- * allocations.
- */
- if (addr & ~PAGE_MASK) {
- VM_BUG_ON(addr != -ENOMEM);
- info.flags = 0;
- info.low_limit = TASK_UNMAPPED_BASE;
- info.high_limit = TASK_SIZE_LOW;
- addr = vm_unmapped_area(&info);
- }
-
- return addr;
-}
-
-unsigned long
-hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
- unsigned long len, unsigned long pgoff, unsigned long flags)
-{
- struct hstate *h = hstate_file(file);
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
-
- if (len & ~huge_page_mask(h))
- return -EINVAL;
-
- if (len > TASK_SIZE)
- return -ENOMEM;
-
- /* No address checking. See comment at mmap_address_hint_valid() */
- if (flags & MAP_FIXED) {
- if (prepare_hugepage_range(file, addr, len))
- return -EINVAL;
- return addr;
- }
-
- if (addr) {
- addr &= huge_page_mask(h);
- if (!mmap_address_hint_valid(addr, len))
- goto get_unmapped_area;
-
- vma = find_vma(mm, addr);
- if (!vma || addr + len <= vm_start_gap(vma))
- return addr;
- }
-
-get_unmapped_area:
- if (!test_bit(MMF_TOPDOWN, &mm->flags))
- return hugetlb_get_unmapped_area_bottomup(file, addr, len,
- pgoff, flags);
- else
- return hugetlb_get_unmapped_area_topdown(file, addr, len,
- pgoff, flags);
-}
-#endif /* CONFIG_HUGETLB_PAGE */
#ifdef CONFIG_X86_64
bool __init arch_hugetlb_valid_size(unsigned long size)
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index eb503f53c319..c6d29f283001 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -263,28 +263,33 @@ static void __init probe_page_size_mask(void)
}
/*
- * INVLPG may not properly flush Global entries
- * on these CPUs when PCIDs are enabled.
+ * INVLPG may not properly flush Global entries on
+ * these CPUs. New microcode fixes the issue.
*/
static const struct x86_cpu_id invlpg_miss_ids[] = {
- X86_MATCH_VFM(INTEL_ALDERLAKE, 0),
- X86_MATCH_VFM(INTEL_ALDERLAKE_L, 0),
- X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, 0),
- X86_MATCH_VFM(INTEL_RAPTORLAKE, 0),
- X86_MATCH_VFM(INTEL_RAPTORLAKE_P, 0),
- X86_MATCH_VFM(INTEL_RAPTORLAKE_S, 0),
+ X86_MATCH_VFM(INTEL_ALDERLAKE, 0x2e),
+ X86_MATCH_VFM(INTEL_ALDERLAKE_L, 0x42c),
+ X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, 0x11),
+ X86_MATCH_VFM(INTEL_RAPTORLAKE, 0x118),
+ X86_MATCH_VFM(INTEL_RAPTORLAKE_P, 0x4117),
+ X86_MATCH_VFM(INTEL_RAPTORLAKE_S, 0x2e),
{}
};
static void setup_pcid(void)
{
+ const struct x86_cpu_id *invlpg_miss_match;
+
if (!IS_ENABLED(CONFIG_X86_64))
return;
if (!boot_cpu_has(X86_FEATURE_PCID))
return;
- if (x86_match_cpu(invlpg_miss_ids)) {
+ invlpg_miss_match = x86_match_cpu(invlpg_miss_ids);
+
+ if (invlpg_miss_match &&
+ boot_cpu_data.microcode < invlpg_miss_match->driver_data) {
pr_info("Incomplete global flushes, disabling PCID");
setup_clear_cpu_cap(X86_FEATURE_PCID);
return;
@@ -1053,18 +1058,53 @@ unsigned long arch_max_swapfile_size(void)
#ifdef CONFIG_EXECMEM
static struct execmem_info execmem_info __ro_after_init;
+#ifdef CONFIG_ARCH_HAS_EXECMEM_ROX
+void execmem_fill_trapping_insns(void *ptr, size_t size, bool writeable)
+{
+ /* fill memory with INT3 instructions */
+ if (writeable)
+ memset(ptr, INT3_INSN_OPCODE, size);
+ else
+ text_poke_set(ptr, INT3_INSN_OPCODE, size);
+}
+#endif
+
struct execmem_info __init *execmem_arch_setup(void)
{
unsigned long start, offset = 0;
+ enum execmem_range_flags flags;
+ pgprot_t pgprot;
if (kaslr_enabled())
offset = get_random_u32_inclusive(1, 1024) * PAGE_SIZE;
start = MODULES_VADDR + offset;
+ if (IS_ENABLED(CONFIG_ARCH_HAS_EXECMEM_ROX)) {
+ pgprot = PAGE_KERNEL_ROX;
+ flags = EXECMEM_KASAN_SHADOW | EXECMEM_ROX_CACHE;
+ } else {
+ pgprot = PAGE_KERNEL;
+ flags = EXECMEM_KASAN_SHADOW;
+ }
+
execmem_info = (struct execmem_info){
.ranges = {
- [EXECMEM_DEFAULT] = {
+ [EXECMEM_MODULE_TEXT] = {
+ .flags = flags,
+ .start = start,
+ .end = MODULES_END,
+ .pgprot = pgprot,
+ .alignment = MODULE_ALIGN,
+ },
+ [EXECMEM_KPROBES ... EXECMEM_BPF] = {
+ .flags = EXECMEM_KASAN_SHADOW,
+ .start = start,
+ .end = MODULES_END,
+ .pgprot = PAGE_KERNEL,
+ .alignment = MODULE_ALIGN,
+ },
+ [EXECMEM_MODULE_DATA] = {
.flags = EXECMEM_KASAN_SHADOW,
.start = start,
.end = MODULES_END,
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index ff253648706f..01ea7c6df303 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -961,7 +961,7 @@ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
unsigned long end = ((start_pfn + nr_pages) << PAGE_SHIFT) - 1;
int ret;
- if (WARN_ON_ONCE(end > PHYSMEM_END))
+ if (WARN_ON_ONCE(end > DIRECT_MAP_PHYSMEM_END))
return -ERANGE;
ret = __add_pages(nid, start_pfn, nr_pages, params);
@@ -985,22 +985,32 @@ int arch_add_memory(int nid, u64 start, u64 size,
return add_pages(nid, start_pfn, nr_pages, params);
}
-static void __meminit free_pagetable(struct page *page, int order)
+static void free_reserved_pages(struct page *page, unsigned long nr_pages)
{
- unsigned long magic;
- unsigned int nr_pages = 1 << order;
+ while (nr_pages--)
+ free_reserved_page(page++);
+}
+static void __meminit free_pagetable(struct page *page, int order)
+{
/* bootmem page has reserved flag */
if (PageReserved(page)) {
- magic = page->index;
- if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
+ unsigned long nr_pages = 1 << order;
+#ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE
+ enum bootmem_type type = bootmem_type(page);
+
+ if (type == SECTION_INFO || type == MIX_SECTION_INFO) {
while (nr_pages--)
put_page_bootmem(page++);
- } else
- while (nr_pages--)
- free_reserved_page(page++);
- } else
+ } else {
+ free_reserved_pages(page, nr_pages);
+ }
+#else
+ free_reserved_pages(page, nr_pages);
+#endif
+ } else {
free_pages((unsigned long)page_address(page), order);
+ }
}
static void __meminit free_hugepage_table(struct page *page,
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 70b02fc61d93..8d29163568a7 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -656,7 +656,8 @@ static bool memremap_is_setup_data(resource_size_t phys_addr,
paddr_next = data->next;
len = data->len;
- if ((phys_addr > paddr) && (phys_addr < (paddr + len))) {
+ if ((phys_addr > paddr) &&
+ (phys_addr < (paddr + sizeof(struct setup_data) + len))) {
memunmap(data);
return true;
}
@@ -718,7 +719,8 @@ static bool __init early_memremap_is_setup_data(resource_size_t phys_addr,
paddr_next = data->next;
len = data->len;
- if ((phys_addr > paddr) && (phys_addr < (paddr + len))) {
+ if ((phys_addr > paddr) &&
+ (phys_addr < (paddr + sizeof(struct setup_data) + len))) {
early_memunmap(data, sizeof(*data));
return true;
}
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 230f1dee4f09..11a93542d198 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -22,7 +22,7 @@
#include <linux/kernel.h>
#include <linux/init.h>
-#include <linux/random.h>
+#include <linux/prandom.h>
#include <linux/memblock.h>
#include <linux/pgtable.h>
@@ -52,7 +52,7 @@ static __initdata struct kaslr_memory_region {
} kaslr_regions[] = {
{
.base = &page_offset_base,
- .end = &physmem_end,
+ .end = &direct_map_physmem_end,
},
{
.base = &vmalloc_base,
@@ -62,8 +62,12 @@ static __initdata struct kaslr_memory_region {
},
};
-/* The end of the possible address space for physical memory */
-unsigned long physmem_end __ro_after_init;
+/*
+ * The end of the physical address space that can be mapped directly by the
+ * kernel. This starts out at (1<<MAX_PHYSMEM_BITS) - 1), but KASLR may reduce
+ * that in order to increase the available entropy for mapping other regions.
+ */
+unsigned long direct_map_physmem_end __ro_after_init;
/* Get size in bytes used by the memory region */
static inline unsigned long get_padding(struct kaslr_memory_region *region)
@@ -94,7 +98,7 @@ void __init kernel_randomize_memory(void)
BUILD_BUG_ON(vaddr_end > __START_KERNEL_map);
/* Preset the end of the possible address space for physical memory */
- physmem_end = ((1ULL << MAX_PHYSMEM_BITS) - 1);
+ direct_map_physmem_end = ((1ULL << MAX_PHYSMEM_BITS) - 1);
if (!kaslr_memory_enabled())
return;
@@ -145,7 +149,7 @@ void __init kernel_randomize_memory(void)
vaddr += get_padding(&kaslr_regions[i]);
/*
* KASLR trims the maximum possible size of the
- * direct-map. Update the physmem_end boundary.
+ * direct-map. Update the direct_map_physmem_end boundary.
* No rounding required as the region starts
* PUD aligned and size is in units of TB.
*/
diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c
index 86a476a426c2..774f9677458f 100644
--- a/arch/x86/mm/mem_encrypt_amd.c
+++ b/arch/x86/mm/mem_encrypt_amd.c
@@ -311,59 +311,82 @@ static int amd_enc_status_change_finish(unsigned long vaddr, int npages, bool en
return 0;
}
-static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
+int prepare_pte_enc(struct pte_enc_desc *d)
{
- pgprot_t old_prot, new_prot;
- unsigned long pfn, pa, size;
- pte_t new_pte;
+ pgprot_t old_prot;
- pfn = pg_level_to_pfn(level, kpte, &old_prot);
- if (!pfn)
- return;
+ d->pfn = pg_level_to_pfn(d->pte_level, d->kpte, &old_prot);
+ if (!d->pfn)
+ return 1;
- new_prot = old_prot;
- if (enc)
- pgprot_val(new_prot) |= _PAGE_ENC;
+ d->new_pgprot = old_prot;
+ if (d->encrypt)
+ pgprot_val(d->new_pgprot) |= _PAGE_ENC;
else
- pgprot_val(new_prot) &= ~_PAGE_ENC;
+ pgprot_val(d->new_pgprot) &= ~_PAGE_ENC;
/* If prot is same then do nothing. */
- if (pgprot_val(old_prot) == pgprot_val(new_prot))
- return;
+ if (pgprot_val(old_prot) == pgprot_val(d->new_pgprot))
+ return 1;
- pa = pfn << PAGE_SHIFT;
- size = page_level_size(level);
+ d->pa = d->pfn << PAGE_SHIFT;
+ d->size = page_level_size(d->pte_level);
/*
- * We are going to perform in-place en-/decryption and change the
- * physical page attribute from C=1 to C=0 or vice versa. Flush the
- * caches to ensure that data gets accessed with the correct C-bit.
+ * In-place en-/decryption and physical page attribute change
+ * from C=1 to C=0 or vice versa will be performed. Flush the
+ * caches to ensure that data gets accessed with the correct
+ * C-bit.
*/
- clflush_cache_range(__va(pa), size);
+ if (d->va)
+ clflush_cache_range(d->va, d->size);
+ else
+ clflush_cache_range(__va(d->pa), d->size);
+
+ return 0;
+}
+
+void set_pte_enc_mask(pte_t *kpte, unsigned long pfn, pgprot_t new_prot)
+{
+ pte_t new_pte;
+
+ /* Change the page encryption mask. */
+ new_pte = pfn_pte(pfn, new_prot);
+ set_pte_atomic(kpte, new_pte);
+}
+
+static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
+{
+ struct pte_enc_desc d = {
+ .kpte = kpte,
+ .pte_level = level,
+ .encrypt = enc
+ };
+
+ if (prepare_pte_enc(&d))
+ return;
/* Encrypt/decrypt the contents in-place */
if (enc) {
- sme_early_encrypt(pa, size);
+ sme_early_encrypt(d.pa, d.size);
} else {
- sme_early_decrypt(pa, size);
+ sme_early_decrypt(d.pa, d.size);
/*
* ON SNP, the page state in the RMP table must happen
* before the page table updates.
*/
- early_snp_set_memory_shared((unsigned long)__va(pa), pa, 1);
+ early_snp_set_memory_shared((unsigned long)__va(d.pa), d.pa, 1);
}
- /* Change the page encryption mask. */
- new_pte = pfn_pte(pfn, new_prot);
- set_pte_atomic(kpte, new_pte);
+ set_pte_enc_mask(kpte, d.pfn, d.new_pgprot);
/*
* If page is set encrypted in the page table, then update the RMP table to
* add this page as private.
*/
if (enc)
- early_snp_set_memory_private((unsigned long)__va(pa), pa, 1);
+ early_snp_set_memory_private((unsigned long)__va(d.pa), d.pa, 1);
}
static int __init early_set_memory_enc_dec(unsigned long vaddr,
@@ -467,6 +490,8 @@ void __init sme_early_init(void)
x86_platform.guest.enc_status_change_finish = amd_enc_status_change_finish;
x86_platform.guest.enc_tlb_flush_required = amd_enc_tlb_flush_required;
x86_platform.guest.enc_cache_flush_required = amd_enc_cache_flush_required;
+ x86_platform.guest.enc_kexec_begin = snp_kexec_begin;
+ x86_platform.guest.enc_kexec_finish = snp_kexec_finish;
/*
* AMD-SEV-ES intercepts the RDMSR to read the X2APIC ID in the
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index ac33b2263a43..e6c7686f443a 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -495,10 +495,10 @@ void __head sme_enable(struct boot_params *bp)
unsigned int eax, ebx, ecx, edx;
unsigned long feature_mask;
unsigned long me_mask;
- bool snp;
+ bool snp_en;
u64 msr;
- snp = snp_init(bp);
+ snp_en = snp_init(bp);
/* Check for the SME/SEV support leaf */
eax = 0x80000000;
@@ -531,8 +531,11 @@ void __head sme_enable(struct boot_params *bp)
RIP_REL_REF(sev_status) = msr = __rdmsr(MSR_AMD64_SEV);
feature_mask = (msr & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT;
- /* The SEV-SNP CC blob should never be present unless SEV-SNP is enabled. */
- if (snp && !(msr & MSR_AMD64_SEV_SNP_ENABLED))
+ /*
+ * Any discrepancies between the presence of a CC blob and SNP
+ * enablement abort the guest.
+ */
+ if (snp_en ^ !!(msr & MSR_AMD64_SEV_SNP_ENABLED))
snp_abort();
/* Check if memory encryption is enabled */
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index a2cabb1c81e1..b8a6ffffb451 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -163,11 +163,6 @@ unsigned long get_mmap_base(int is_legacy)
return is_legacy ? mm->mmap_legacy_base : mm->mmap_base;
}
-const char *arch_vma_name(struct vm_area_struct *vma)
-{
- return NULL;
-}
-
/**
* mmap_address_hint_valid - Validate the address hint of mmap
* @addr: Address hint
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 44f7b2ea6a07..069e421c2247 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -2444,6 +2444,14 @@ int set_direct_map_default_noflush(struct page *page)
return __set_pages_p(page, 1);
}
+int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid)
+{
+ if (valid)
+ return __set_pages_p(page, nr);
+
+ return __set_pages_np(page, nr);
+}
+
#ifdef CONFIG_DEBUG_PAGEALLOC
void __kernel_map_pages(struct page *page, int numpages, int enable)
{
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 86593d1b787d..b0d5a644fc84 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -568,7 +568,7 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
* mm_cpumask. The TLB shootdown code can figure out from
* cpu_tlbstate_shared.is_lazy whether or not to send an IPI.
*/
- if (WARN_ON_ONCE(prev != &init_mm &&
+ if (IS_ENABLED(CONFIG_DEBUG_VM) && WARN_ON_ONCE(prev != &init_mm &&
!cpumask_test_cpu(cpu, mm_cpumask(next))))
cpumask_set_cpu(cpu, mm_cpumask(next));