summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig13
-rw-r--r--arch/x86/boot/Makefile2
-rw-r--r--arch/x86/boot/compressed/Makefile2
-rw-r--r--arch/x86/boot/compressed/eboot.c278
-rw-r--r--arch/x86/boot/compressed/eboot.h30
-rw-r--r--arch/x86/boot/compressed/efi_stub_32.S87
-rw-r--r--arch/x86/boot/compressed/efi_stub_64.S5
-rw-r--r--arch/x86/boot/compressed/efi_thunk_64.S65
-rw-r--r--arch/x86/boot/compressed/head_32.S64
-rw-r--r--arch/x86/boot/compressed/head_64.S97
-rw-r--r--arch/x86/include/asm/cpu_entry_area.h10
-rw-r--r--arch/x86/include/asm/efi.h244
-rw-r--r--arch/x86/include/asm/memtype.h27
-rw-r--r--arch/x86/include/asm/mmu_context.h86
-rw-r--r--arch/x86/include/asm/mtrr.h4
-rw-r--r--arch/x86/include/asm/pat.h27
-rw-r--r--arch/x86/include/asm/pci.h2
-rw-r--r--arch/x86/include/asm/pgtable_32_areas.h53
-rw-r--r--arch/x86/include/asm/pgtable_32_types.h57
-rw-r--r--arch/x86/include/asm/pgtable_areas.h16
-rw-r--r--arch/x86/include/asm/pgtable_types.h143
-rw-r--r--arch/x86/include/asm/vmalloc.h6
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.c2
-rw-r--r--arch/x86/kernel/cpu/scattered.c2
-rw-r--r--arch/x86/kernel/cpu/topology.c2
-rw-r--r--arch/x86/kernel/kexec-bzimage64.c2
-rw-r--r--arch/x86/kernel/ldt.c83
-rw-r--r--arch/x86/kernel/setup.c1
-rw-r--r--arch/x86/kernel/x86_init.c2
-rw-r--r--arch/x86/kvm/mmu/mmu.c2
-rw-r--r--arch/x86/mm/Makefile8
-rw-r--r--arch/x86/mm/fault.c1
-rw-r--r--arch/x86/mm/init_32.c1
-rw-r--r--arch/x86/mm/iomap_32.c6
-rw-r--r--arch/x86/mm/ioremap.c12
-rw-r--r--arch/x86/mm/pat/Makefile5
-rw-r--r--arch/x86/mm/pat/cpa-test.c (renamed from arch/x86/mm/pageattr-test.c)0
-rw-r--r--arch/x86/mm/pat/memtype.c (renamed from arch/x86/mm/pat.c)203
-rw-r--r--arch/x86/mm/pat/memtype.h (renamed from arch/x86/mm/pat_internal.h)12
-rw-r--r--arch/x86/mm/pat/memtype_interval.c194
-rw-r--r--arch/x86/mm/pat/set_memory.c (renamed from arch/x86/mm/pageattr.c)32
-rw-r--r--arch/x86/mm/pat_interval.c185
-rw-r--r--arch/x86/mm/pgtable_32.c1
-rw-r--r--arch/x86/mm/physaddr.c1
-rw-r--r--arch/x86/pci/i386.c2
-rw-r--r--arch/x86/platform/efi/Makefile3
-rw-r--r--arch/x86/platform/efi/efi.c398
-rw-r--r--arch/x86/platform/efi/efi_32.c22
-rw-r--r--arch/x86/platform/efi/efi_64.c317
-rw-r--r--arch/x86/platform/efi/efi_stub_32.S109
-rw-r--r--arch/x86/platform/efi/efi_stub_64.S43
-rw-r--r--arch/x86/platform/efi/efi_thunk_64.S121
-rw-r--r--arch/x86/platform/efi/quirks.c46
-rw-r--r--arch/x86/platform/uv/bios_uv.c169
-rw-r--r--arch/x86/xen/efi.c2
-rw-r--r--arch/x86/xen/mmu_pv.c2
58 files changed, 1479 insertions, 1834 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 038d33b8e962..56e3445972c3 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1513,7 +1513,7 @@ config X86_CPA_STATISTICS
bool "Enable statistic for Change Page Attribute"
depends on DEBUG_FS
---help---
- Expose statistics about the Change Page Attribute mechanims, which
+ Expose statistics about the Change Page Attribute mechanism, which
helps to determine the effectiveness of preserving large and huge
page mappings when mapping protections are changed.
@@ -1992,11 +1992,12 @@ config EFI
platforms.
config EFI_STUB
- bool "EFI stub support"
- depends on EFI && !X86_USE_3DNOW
- select RELOCATABLE
- ---help---
- This kernel feature allows a bzImage to be loaded directly
+ bool "EFI stub support"
+ depends on EFI && !X86_USE_3DNOW
+ depends on $(cc-option,-mabi=ms) || X86_32
+ select RELOCATABLE
+ ---help---
+ This kernel feature allows a bzImage to be loaded directly
by EFI firmware without the use of a bootloader.
See Documentation/admin-guide/efi-stub.rst for more information.
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 95410d6ee2ff..748b6d28a91d 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -88,7 +88,7 @@ $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
SETUP_OBJS = $(addprefix $(obj)/,$(setup-y))
-sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|kernel_info\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p'
+sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [a-zA-Z] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|kernel_info\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p'
quiet_cmd_zoffset = ZOFFSET $@
cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 1dac210f7d44..56aa5fa0a66b 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -89,7 +89,7 @@ vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o
$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
-vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o \
+vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o \
$(objtree)/drivers/firmware/efi/libstub/lib.a
vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 72b08fde6de6..287393d725f0 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -6,6 +6,8 @@
*
* ----------------------------------------------------------------------- */
+#pragma GCC visibility push(hidden)
+
#include <linux/efi.h>
#include <linux/pci.h>
@@ -19,32 +21,18 @@
#include "eboot.h"
static efi_system_table_t *sys_table;
+extern const bool efi_is64;
-static struct efi_config *efi_early;
-
-__pure const struct efi_config *__efi_early(void)
+__pure efi_system_table_t *efi_system_table(void)
{
- return efi_early;
+ return sys_table;
}
-#define BOOT_SERVICES(bits) \
-static void setup_boot_services##bits(struct efi_config *c) \
-{ \
- efi_system_table_##bits##_t *table; \
- \
- table = (typeof(table))sys_table; \
- \
- c->runtime_services = table->runtime; \
- c->boot_services = table->boottime; \
- c->text_output = table->con_out; \
-}
-BOOT_SERVICES(32);
-BOOT_SERVICES(64);
-
-void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str)
+__attribute_const__ bool efi_is_64bit(void)
{
- efi_call_proto(efi_simple_text_output_protocol, output_string,
- efi_early->text_output, str);
+ if (IS_ENABLED(CONFIG_EFI_MIXED))
+ return efi_is64;
+ return IS_ENABLED(CONFIG_X86_64);
}
static efi_status_t
@@ -63,17 +51,17 @@ preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom)
* large romsize. The UEFI spec limits the size of option ROMs to 16
* MiB so we reject any ROMs over 16 MiB in size to catch this.
*/
- romimage = (void *)(unsigned long)efi_table_attr(efi_pci_io_protocol,
- romimage, pci);
- romsize = efi_table_attr(efi_pci_io_protocol, romsize, pci);
+ romimage = efi_table_attr(pci, romimage);
+ romsize = efi_table_attr(pci, romsize);
if (!romimage || !romsize || romsize > SZ_16M)
return EFI_INVALID_PARAMETER;
size = romsize + sizeof(*rom);
- status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom);
+ status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, size,
+ (void **)&rom);
if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to allocate memory for 'rom'\n");
+ efi_printk("Failed to allocate memory for 'rom'\n");
return status;
}
@@ -85,27 +73,24 @@ preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom)
rom->pcilen = pci->romsize;
*__rom = rom;
- status = efi_call_proto(efi_pci_io_protocol, pci.read, pci,
- EfiPciIoWidthUint16, PCI_VENDOR_ID, 1,
- &rom->vendor);
+ status = efi_call_proto(pci, pci.read, EfiPciIoWidthUint16,
+ PCI_VENDOR_ID, 1, &rom->vendor);
if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to read rom->vendor\n");
+ efi_printk("Failed to read rom->vendor\n");
goto free_struct;
}
- status = efi_call_proto(efi_pci_io_protocol, pci.read, pci,
- EfiPciIoWidthUint16, PCI_DEVICE_ID, 1,
- &rom->devid);
+ status = efi_call_proto(pci, pci.read, EfiPciIoWidthUint16,
+ PCI_DEVICE_ID, 1, &rom->devid);
if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to read rom->devid\n");
+ efi_printk("Failed to read rom->devid\n");
goto free_struct;
}
- status = efi_call_proto(efi_pci_io_protocol, get_location, pci,
- &rom->segment, &rom->bus, &rom->device,
- &rom->function);
+ status = efi_call_proto(pci, get_location, &rom->segment, &rom->bus,
+ &rom->device, &rom->function);
if (status != EFI_SUCCESS)
goto free_struct;
@@ -114,7 +99,7 @@ preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom)
return status;
free_struct:
- efi_call_early(free_pool, rom);
+ efi_bs_call(free_pool, rom);
return status;
}
@@ -133,27 +118,24 @@ static void setup_efi_pci(struct boot_params *params)
void **pci_handle = NULL;
efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID;
unsigned long size = 0;
- unsigned long nr_pci;
struct setup_data *data;
+ efi_handle_t h;
int i;
- status = efi_call_early(locate_handle,
- EFI_LOCATE_BY_PROTOCOL,
- &pci_proto, NULL, &size, pci_handle);
+ status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL,
+ &pci_proto, NULL, &size, pci_handle);
if (status == EFI_BUFFER_TOO_SMALL) {
- status = efi_call_early(allocate_pool,
- EFI_LOADER_DATA,
- size, (void **)&pci_handle);
+ status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, size,
+ (void **)&pci_handle);
if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to allocate memory for 'pci_handle'\n");
+ efi_printk("Failed to allocate memory for 'pci_handle'\n");
return;
}
- status = efi_call_early(locate_handle,
- EFI_LOCATE_BY_PROTOCOL, &pci_proto,
- NULL, &size, pci_handle);
+ status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL,
+ &pci_proto, NULL, &size, pci_handle);
}
if (status != EFI_SUCCESS)
@@ -164,15 +146,12 @@ static void setup_efi_pci(struct boot_params *params)
while (data && data->next)
data = (struct setup_data *)(unsigned long)data->next;
- nr_pci = size / (efi_is_64bit() ? sizeof(u64) : sizeof(u32));
- for (i = 0; i < nr_pci; i++) {
+ for_each_efi_handle(h, pci_handle, size, i) {
efi_pci_io_protocol_t *pci = NULL;
struct pci_setup_rom *rom;
- status = efi_call_early(handle_protocol,
- efi_is_64bit() ? ((u64 *)pci_handle)[i]
- : ((u32 *)pci_handle)[i],
- &pci_proto, (void **)&pci);
+ status = efi_bs_call(handle_protocol, h, &pci_proto,
+ (void **)&pci);
if (status != EFI_SUCCESS || !pci)
continue;
@@ -189,7 +168,7 @@ static void setup_efi_pci(struct boot_params *params)
}
free_handle:
- efi_call_early(free_pool, pci_handle);
+ efi_bs_call(free_pool, pci_handle);
}
static void retrieve_apple_device_properties(struct boot_params *boot_params)
@@ -198,34 +177,34 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params)
struct setup_data *data, *new;
efi_status_t status;
u32 size = 0;
- void *p;
+ apple_properties_protocol_t *p;
- status = efi_call_early(locate_protocol, &guid, NULL, &p);
+ status = efi_bs_call(locate_protocol, &guid, NULL, (void **)&p);
if (status != EFI_SUCCESS)
return;
- if (efi_table_attr(apple_properties_protocol, version, p) != 0x10000) {
- efi_printk(sys_table, "Unsupported properties proto version\n");
+ if (efi_table_attr(p, version) != 0x10000) {
+ efi_printk("Unsupported properties proto version\n");
return;
}
- efi_call_proto(apple_properties_protocol, get_all, p, NULL, &size);
+ efi_call_proto(p, get_all, NULL, &size);
if (!size)
return;
do {
- status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
- size + sizeof(struct setup_data), &new);
+ status = efi_bs_call(allocate_pool, EFI_LOADER_DATA,
+ size + sizeof(struct setup_data),
+ (void **)&new);
if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to allocate memory for 'properties'\n");
+ efi_printk("Failed to allocate memory for 'properties'\n");
return;
}
- status = efi_call_proto(apple_properties_protocol, get_all, p,
- new->data, &size);
+ status = efi_call_proto(p, get_all, new->data, &size);
if (status == EFI_BUFFER_TOO_SMALL)
- efi_call_early(free_pool, new);
+ efi_bs_call(free_pool, new);
} while (status == EFI_BUFFER_TOO_SMALL);
new->type = SETUP_APPLE_PROPERTIES;
@@ -247,7 +226,7 @@ static const efi_char16_t apple[] = L"Apple";
static void setup_quirks(struct boot_params *boot_params)
{
efi_char16_t *fw_vendor = (efi_char16_t *)(unsigned long)
- efi_table_attr(efi_system_table, fw_vendor, sys_table);
+ efi_table_attr(efi_system_table(), fw_vendor);
if (!memcmp(fw_vendor, apple, sizeof(apple))) {
if (IS_ENABLED(CONFIG_APPLE_PROPERTIES))
@@ -265,17 +244,16 @@ setup_uga(struct screen_info *si, efi_guid_t *uga_proto, unsigned long size)
u32 width, height;
void **uga_handle = NULL;
efi_uga_draw_protocol_t *uga = NULL, *first_uga;
- unsigned long nr_ugas;
+ efi_handle_t handle;
int i;
- status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
- size, (void **)&uga_handle);
+ status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, size,
+ (void **)&uga_handle);
if (status != EFI_SUCCESS)
return status;
- status = efi_call_early(locate_handle,
- EFI_LOCATE_BY_PROTOCOL,
- uga_proto, NULL, &size, uga_handle);
+ status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL,
+ uga_proto, NULL, &size, uga_handle);
if (status != EFI_SUCCESS)
goto free_handle;
@@ -283,24 +261,20 @@ setup_uga(struct screen_info *si, efi_guid_t *uga_proto, unsigned long size)
width = 0;
first_uga = NULL;
- nr_ugas = size / (efi_is_64bit() ? sizeof(u64) : sizeof(u32));
- for (i = 0; i < nr_ugas; i++) {
+ for_each_efi_handle(handle, uga_handle, size, i) {
efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID;
u32 w, h, depth, refresh;
void *pciio;
- unsigned long handle = efi_is_64bit() ? ((u64 *)uga_handle)[i]
- : ((u32 *)uga_handle)[i];
- status = efi_call_early(handle_protocol, handle,
- uga_proto, (void **)&uga);
+ status = efi_bs_call(handle_protocol, handle, uga_proto,
+ (void **)&uga);
if (status != EFI_SUCCESS)
continue;
pciio = NULL;
- efi_call_early(handle_protocol, handle, &pciio_proto, &pciio);
+ efi_bs_call(handle_protocol, handle, &pciio_proto, &pciio);
- status = efi_call_proto(efi_uga_draw_protocol, get_mode, uga,
- &w, &h, &depth, &refresh);
+ status = efi_call_proto(uga, get_mode, &w, &h, &depth, &refresh);
if (status == EFI_SUCCESS && (!first_uga || pciio)) {
width = w;
height = h;
@@ -336,7 +310,7 @@ setup_uga(struct screen_info *si, efi_guid_t *uga_proto, unsigned long size)
si->rsvd_pos = 24;
free_handle:
- efi_call_early(free_pool, uga_handle);
+ efi_bs_call(free_pool, uga_handle);
return status;
}
@@ -355,37 +329,38 @@ void setup_graphics(struct boot_params *boot_params)
memset(si, 0, sizeof(*si));
size = 0;
- status = efi_call_early(locate_handle,
- EFI_LOCATE_BY_PROTOCOL,
- &graphics_proto, NULL, &size, gop_handle);
+ status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL,
+ &graphics_proto, NULL, &size, gop_handle);
if (status == EFI_BUFFER_TOO_SMALL)
- status = efi_setup_gop(NULL, si, &graphics_proto, size);
+ status = efi_setup_gop(si, &graphics_proto, size);
if (status != EFI_SUCCESS) {
size = 0;
- status = efi_call_early(locate_handle,
- EFI_LOCATE_BY_PROTOCOL,
- &uga_proto, NULL, &size, uga_handle);
+ status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL,
+ &uga_proto, NULL, &size, uga_handle);
if (status == EFI_BUFFER_TOO_SMALL)
setup_uga(si, &uga_proto, size);
}
}
+void startup_32(struct boot_params *boot_params);
+
+void __noreturn efi_stub_entry(efi_handle_t handle,
+ efi_system_table_t *sys_table_arg,
+ struct boot_params *boot_params);
+
/*
* Because the x86 boot code expects to be passed a boot_params we
* need to create one ourselves (usually the bootloader would create
* one for us).
- *
- * The caller is responsible for filling out ->code32_start in the
- * returned boot_params.
*/
-struct boot_params *make_boot_params(struct efi_config *c)
+efi_status_t __efiapi efi_pe_entry(efi_handle_t handle,
+ efi_system_table_t *sys_table_arg)
{
struct boot_params *boot_params;
struct apm_bios_info *bi;
struct setup_header *hdr;
efi_loaded_image_t *image;
- void *handle;
efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID;
int options_size = 0;
efi_status_t status;
@@ -393,31 +368,22 @@ struct boot_params *make_boot_params(struct efi_config *c)
unsigned long ramdisk_addr;
unsigned long ramdisk_size;
- efi_early = c;
- sys_table = (efi_system_table_t *)(unsigned long)efi_early->table;
- handle = (void *)(unsigned long)efi_early->image_handle;
+ sys_table = sys_table_arg;
/* Check if we were booted by the EFI firmware */
if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
- return NULL;
-
- if (efi_is_64bit())
- setup_boot_services64(efi_early);
- else
- setup_boot_services32(efi_early);
+ return EFI_INVALID_PARAMETER;
- status = efi_call_early(handle_protocol, handle,
- &proto, (void *)&image);
+ status = efi_bs_call(handle_protocol, handle, &proto, (void *)&image);
if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to get handle for LOADED_IMAGE_PROTOCOL\n");
- return NULL;
+ efi_printk("Failed to get handle for LOADED_IMAGE_PROTOCOL\n");
+ return status;
}
- status = efi_low_alloc(sys_table, 0x4000, 1,
- (unsigned long *)&boot_params);
+ status = efi_low_alloc(0x4000, 1, (unsigned long *)&boot_params);
if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to allocate lowmem for boot params\n");
- return NULL;
+ efi_printk("Failed to allocate lowmem for boot params\n");
+ return status;
}
memset(boot_params, 0x0, 0x4000);
@@ -439,7 +405,7 @@ struct boot_params *make_boot_params(struct efi_config *c)
hdr->type_of_loader = 0x21;
/* Convert unicode cmdline to ascii */
- cmdline_ptr = efi_convert_cmdline(sys_table, image, &options_size);
+ cmdline_ptr = efi_convert_cmdline(image, &options_size);
if (!cmdline_ptr)
goto fail;
@@ -457,15 +423,15 @@ struct boot_params *make_boot_params(struct efi_config *c)
if (status != EFI_SUCCESS)
goto fail2;
- status = handle_cmdline_files(sys_table, image,
+ status = handle_cmdline_files(image,
(char *)(unsigned long)hdr->cmd_line_ptr,
"initrd=", hdr->initrd_addr_max,
&ramdisk_addr, &ramdisk_size);
if (status != EFI_SUCCESS &&
hdr->xloadflags & XLF_CAN_BE_LOADED_ABOVE_4G) {
- efi_printk(sys_table, "Trying to load files to higher address\n");
- status = handle_cmdline_files(sys_table, image,
+ efi_printk("Trying to load files to higher address\n");
+ status = handle_cmdline_files(image,
(char *)(unsigned long)hdr->cmd_line_ptr,
"initrd=", -1UL,
&ramdisk_addr, &ramdisk_size);
@@ -478,14 +444,17 @@ struct boot_params *make_boot_params(struct efi_config *c)
boot_params->ext_ramdisk_image = (u64)ramdisk_addr >> 32;
boot_params->ext_ramdisk_size = (u64)ramdisk_size >> 32;
- return boot_params;
+ hdr->code32_start = (u32)(unsigned long)startup_32;
+
+ efi_stub_entry(handle, sys_table, boot_params);
+ /* not reached */
fail2:
- efi_free(sys_table, options_size, hdr->cmd_line_ptr);
+ efi_free(options_size, hdr->cmd_line_ptr);
fail:
- efi_free(sys_table, 0x4000, (unsigned long)boot_params);
+ efi_free(0x4000, (unsigned long)boot_params);
- return NULL;
+ return status;
}
static void add_e820ext(struct boot_params *params,
@@ -620,13 +589,13 @@ static efi_status_t alloc_e820ext(u32 nr_desc, struct setup_data **e820ext,
sizeof(struct e820_entry) * nr_desc;
if (*e820ext) {
- efi_call_early(free_pool, *e820ext);
+ efi_bs_call(free_pool, *e820ext);
*e820ext = NULL;
*e820ext_size = 0;
}
- status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
- size, (void **)e820ext);
+ status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, size,
+ (void **)e820ext);
if (status == EFI_SUCCESS)
*e820ext_size = size;
@@ -650,7 +619,7 @@ static efi_status_t allocate_e820(struct boot_params *params,
boot_map.key_ptr = NULL;
boot_map.buff_size = &buff_size;
- status = efi_get_memory_map(sys_table, &boot_map);
+ status = efi_get_memory_map(&boot_map);
if (status != EFI_SUCCESS)
return status;
@@ -672,8 +641,7 @@ struct exit_boot_struct {
struct efi_info *efi;
};
-static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg,
- struct efi_boot_memmap *map,
+static efi_status_t exit_boot_func(struct efi_boot_memmap *map,
void *priv)
{
const char *signature;
@@ -683,14 +651,14 @@ static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg,
: EFI32_LOADER_SIGNATURE;
memcpy(&p->efi->efi_loader_signature, signature, sizeof(__u32));
- p->efi->efi_systab = (unsigned long)sys_table_arg;
+ p->efi->efi_systab = (unsigned long)efi_system_table();
p->efi->efi_memdesc_size = *map->desc_size;
p->efi->efi_memdesc_version = *map->desc_ver;
p->efi->efi_memmap = (unsigned long)*map->map;
p->efi->efi_memmap_size = *map->map_size;
#ifdef CONFIG_X86_64
- p->efi->efi_systab_hi = (unsigned long)sys_table_arg >> 32;
+ p->efi->efi_systab_hi = (unsigned long)efi_system_table() >> 32;
p->efi->efi_memmap_hi = (unsigned long)*map->map >> 32;
#endif
@@ -722,8 +690,7 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle)
return status;
/* Might as well exit boot services now */
- status = efi_exit_boot_services(sys_table, handle, &map, &priv,
- exit_boot_func);
+ status = efi_exit_boot_services(handle, &map, &priv, exit_boot_func);
if (status != EFI_SUCCESS)
return status;
@@ -741,33 +708,22 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle)
* On success we return a pointer to a boot_params structure, and NULL
* on failure.
*/
-struct boot_params *
-efi_main(struct efi_config *c, struct boot_params *boot_params)
+struct boot_params *efi_main(efi_handle_t handle,
+ efi_system_table_t *sys_table_arg,
+ struct boot_params *boot_params)
{
struct desc_ptr *gdt = NULL;
struct setup_header *hdr = &boot_params->hdr;
efi_status_t status;
struct desc_struct *desc;
- void *handle;
- efi_system_table_t *_table;
unsigned long cmdline_paddr;
- efi_early = c;
-
- _table = (efi_system_table_t *)(unsigned long)efi_early->table;
- handle = (void *)(unsigned long)efi_early->image_handle;
-
- sys_table = _table;
+ sys_table = sys_table_arg;
/* Check if we were booted by the EFI firmware */
if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
goto fail;
- if (efi_is_64bit())
- setup_boot_services64(efi_early);
- else
- setup_boot_services32(efi_early);
-
/*
* make_boot_params() may have been called before efi_main(), in which
* case this is the second time we parse the cmdline. This is ok,
@@ -782,14 +738,14 @@ efi_main(struct efi_config *c, struct boot_params *boot_params)
* otherwise we ask the BIOS.
*/
if (boot_params->secure_boot == efi_secureboot_mode_unset)
- boot_params->secure_boot = efi_get_secureboot(sys_table);
+ boot_params->secure_boot = efi_get_secureboot();
/* Ask the firmware to clear memory on unclean shutdown */
- efi_enable_reset_attack_mitigation(sys_table);
+ efi_enable_reset_attack_mitigation();
- efi_random_get_seed(sys_table);
+ efi_random_get_seed();
- efi_retrieve_tpm2_eventlog(sys_table);
+ efi_retrieve_tpm2_eventlog();
setup_graphics(boot_params);
@@ -797,18 +753,17 @@ efi_main(struct efi_config *c, struct boot_params *boot_params)
setup_quirks(boot_params);
- status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
- sizeof(*gdt), (void **)&gdt);
+ status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, sizeof(*gdt),
+ (void **)&gdt);
if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to allocate memory for 'gdt' structure\n");
+ efi_printk("Failed to allocate memory for 'gdt' structure\n");
goto fail;
}
gdt->size = 0x800;
- status = efi_low_alloc(sys_table, gdt->size, 8,
- (unsigned long *)&gdt->address);
+ status = efi_low_alloc(gdt->size, 8, (unsigned long *)&gdt->address);
if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to allocate memory for 'gdt'\n");
+ efi_printk("Failed to allocate memory for 'gdt'\n");
goto fail;
}
@@ -818,13 +773,13 @@ efi_main(struct efi_config *c, struct boot_params *boot_params)
*/
if (hdr->pref_address != hdr->code32_start) {
unsigned long bzimage_addr = hdr->code32_start;
- status = efi_relocate_kernel(sys_table, &bzimage_addr,
+ status = efi_relocate_kernel(&bzimage_addr,
hdr->init_size, hdr->init_size,
hdr->pref_address,
hdr->kernel_alignment,
LOAD_PHYSICAL_ADDR);
if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "efi_relocate_kernel() failed!\n");
+ efi_printk("efi_relocate_kernel() failed!\n");
goto fail;
}
@@ -834,7 +789,7 @@ efi_main(struct efi_config *c, struct boot_params *boot_params)
status = exit_boot(boot_params, handle);
if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "exit_boot() failed!\n");
+ efi_printk("exit_boot() failed!\n");
goto fail;
}
@@ -927,7 +882,8 @@ efi_main(struct efi_config *c, struct boot_params *boot_params)
return boot_params;
fail:
- efi_printk(sys_table, "efi_main() failed!\n");
+ efi_printk("efi_main() failed!\n");
- return NULL;
+ for (;;)
+ asm("hlt");
}
diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h
index 8297387c4676..99f35343d443 100644
--- a/arch/x86/boot/compressed/eboot.h
+++ b/arch/x86/boot/compressed/eboot.h
@@ -12,22 +12,20 @@
#define DESC_TYPE_CODE_DATA (1 << 0)
-typedef struct {
- u32 get_mode;
- u32 set_mode;
- u32 blt;
-} efi_uga_draw_protocol_32_t;
+typedef union efi_uga_draw_protocol efi_uga_draw_protocol_t;
-typedef struct {
- u64 get_mode;
- u64 set_mode;
- u64 blt;
-} efi_uga_draw_protocol_64_t;
-
-typedef struct {
- void *get_mode;
- void *set_mode;
- void *blt;
-} efi_uga_draw_protocol_t;
+union efi_uga_draw_protocol {
+ struct {
+ efi_status_t (__efiapi *get_mode)(efi_uga_draw_protocol_t *,
+ u32*, u32*, u32*, u32*);
+ void *set_mode;
+ void *blt;
+ };
+ struct {
+ u32 get_mode;
+ u32 set_mode;
+ u32 blt;
+ } mixed_mode;
+};
#endif /* BOOT_COMPRESSED_EBOOT_H */
diff --git a/arch/x86/boot/compressed/efi_stub_32.S b/arch/x86/boot/compressed/efi_stub_32.S
deleted file mode 100644
index ed6c351d34ed..000000000000
--- a/arch/x86/boot/compressed/efi_stub_32.S
+++ /dev/null
@@ -1,87 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * EFI call stub for IA32.
- *
- * This stub allows us to make EFI calls in physical mode with interrupts
- * turned off. Note that this implementation is different from the one in
- * arch/x86/platform/efi/efi_stub_32.S because we're _already_ in physical
- * mode at this point.
- */
-
-#include <linux/linkage.h>
-#include <asm/page_types.h>
-
-/*
- * efi_call_phys(void *, ...) is a function with variable parameters.
- * All the callers of this function assure that all the parameters are 4-bytes.
- */
-
-/*
- * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save.
- * So we'd better save all of them at the beginning of this function and restore
- * at the end no matter how many we use, because we can not assure EFI runtime
- * service functions will comply with gcc calling convention, too.
- */
-
-.text
-SYM_FUNC_START(efi_call_phys)
- /*
- * 0. The function can only be called in Linux kernel. So CS has been
- * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found
- * the values of these registers are the same. And, the corresponding
- * GDT entries are identical. So I will do nothing about segment reg
- * and GDT, but change GDT base register in prelog and epilog.
- */
-
- /*
- * 1. Because we haven't been relocated by this point we need to
- * use relative addressing.
- */
- call 1f
-1: popl %edx
- subl $1b, %edx
-
- /*
- * 2. Now on the top of stack is the return
- * address in the caller of efi_call_phys(), then parameter 1,
- * parameter 2, ..., param n. To make things easy, we save the return
- * address of efi_call_phys in a global variable.
- */
- popl %ecx
- movl %ecx, saved_return_addr(%edx)
- /* get the function pointer into ECX*/
- popl %ecx
- movl %ecx, efi_rt_function_ptr(%edx)
-
- /*
- * 3. Call the physical function.
- */
- call *%ecx
-
- /*
- * 4. Balance the stack. And because EAX contain the return value,
- * we'd better not clobber it. We need to calculate our address
- * again because %ecx and %edx are not preserved across EFI function
- * calls.
- */
- call 1f
-1: popl %edx
- subl $1b, %edx
-
- movl efi_rt_function_ptr(%edx), %ecx
- pushl %ecx
-
- /*
- * 10. Push the saved return address onto the stack and return.
- */
- movl saved_return_addr(%edx), %ecx
- pushl %ecx
- ret
-SYM_FUNC_END(efi_call_phys)
-.previous
-
-.data
-saved_return_addr:
- .long 0
-efi_rt_function_ptr:
- .long 0
diff --git a/arch/x86/boot/compressed/efi_stub_64.S b/arch/x86/boot/compressed/efi_stub_64.S
deleted file mode 100644
index 99494dff2113..000000000000
--- a/arch/x86/boot/compressed/efi_stub_64.S
+++ /dev/null
@@ -1,5 +0,0 @@
-#include <asm/segment.h>
-#include <asm/msr.h>
-#include <asm/processor-flags.h>
-
-#include "../../platform/efi/efi_stub_64.S"
diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S
index 593913692d16..8fb7f6799c52 100644
--- a/arch/x86/boot/compressed/efi_thunk_64.S
+++ b/arch/x86/boot/compressed/efi_thunk_64.S
@@ -10,7 +10,7 @@
* needs to be able to service interrupts.
*
* On the plus side, we don't have to worry about mangling 64-bit
- * addresses into 32-bits because we're executing with an identify
+ * addresses into 32-bits because we're executing with an identity
* mapped pagetable and haven't transitioned to 64-bit virtual addresses
* yet.
*/
@@ -23,16 +23,13 @@
.code64
.text
-SYM_FUNC_START(efi64_thunk)
+SYM_FUNC_START(__efi64_thunk)
push %rbp
push %rbx
- subq $8, %rsp
- leaq efi_exit32(%rip), %rax
- movl %eax, 4(%rsp)
- leaq efi_gdt64(%rip), %rax
- movl %eax, (%rsp)
- movl %eax, 2(%rax) /* Fixup the gdt base address */
+ leaq 1f(%rip), %rbp
+ leaq efi_gdt64(%rip), %rbx
+ movl %ebx, 2(%rbx) /* Fixup the gdt base address */
movl %ds, %eax
push %rax
@@ -48,15 +45,10 @@ SYM_FUNC_START(efi64_thunk)
movl %esi, 0x0(%rsp)
movl %edx, 0x4(%rsp)
movl %ecx, 0x8(%rsp)
- movq %r8, %rsi
- movl %esi, 0xc(%rsp)
- movq %r9, %rsi
- movl %esi, 0x10(%rsp)
+ movl %r8d, 0xc(%rsp)
+ movl %r9d, 0x10(%rsp)
- sgdt save_gdt(%rip)
-
- leaq 1f(%rip), %rbx
- movq %rbx, func_rt_ptr(%rip)
+ sgdt 0x14(%rsp)
/*
* Switch to gdt with 32-bit segments. This is the firmware GDT
@@ -71,9 +63,9 @@ SYM_FUNC_START(efi64_thunk)
pushq %rax
lretq
-1: addq $32, %rsp
-
- lgdt save_gdt(%rip)
+1: lgdt 0x14(%rsp)
+ addq $32, %rsp
+ movq %rdi, %rax
pop %rbx
movl %ebx, %ss
@@ -85,26 +77,13 @@ SYM_FUNC_START(efi64_thunk)
/*
* Convert 32-bit status code into 64-bit.
*/
- test %rax, %rax
- jz 1f
- movl %eax, %ecx
- andl $0x0fffffff, %ecx
- andl $0xf0000000, %eax
- shl $32, %rax
- or %rcx, %rax
-1:
- addq $8, %rsp
+ roll $1, %eax
+ rorq $1, %rax
+
pop %rbx
pop %rbp
ret
-SYM_FUNC_END(efi64_thunk)
-
-SYM_FUNC_START_LOCAL(efi_exit32)
- movq func_rt_ptr(%rip), %rax
- push %rax
- mov %rdi, %rax
- ret
-SYM_FUNC_END(efi_exit32)
+SYM_FUNC_END(__efi64_thunk)
.code32
/*
@@ -144,9 +123,7 @@ SYM_FUNC_START_LOCAL(efi_enter32)
*/
cli
- movl 56(%esp), %eax
- movl %eax, 2(%eax)
- lgdtl (%eax)
+ lgdtl (%ebx)
movl %cr4, %eax
btsl $(X86_CR4_PAE_BIT), %eax
@@ -163,9 +140,8 @@ SYM_FUNC_START_LOCAL(efi_enter32)
xorl %eax, %eax
lldt %ax
- movl 60(%esp), %eax
pushl $__KERNEL_CS
- pushl %eax
+ pushl %ebp
/* Enable paging */
movl %cr0, %eax
@@ -181,13 +157,6 @@ SYM_DATA_START(efi32_boot_gdt)
.quad 0
SYM_DATA_END(efi32_boot_gdt)
-SYM_DATA_START_LOCAL(save_gdt)
- .word 0
- .quad 0
-SYM_DATA_END(save_gdt)
-
-SYM_DATA_LOCAL(func_rt_ptr, .quad 0)
-
SYM_DATA_START(efi_gdt64)
.word efi_gdt64_end - efi_gdt64
.long 0 /* Filled out by user */
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index f2dfd6d083ef..e43ac17cb9fb 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -145,67 +145,16 @@ SYM_FUNC_START(startup_32)
SYM_FUNC_END(startup_32)
#ifdef CONFIG_EFI_STUB
-/*
- * We don't need the return address, so set up the stack so efi_main() can find
- * its arguments.
- */
-SYM_FUNC_START(efi_pe_entry)
- add $0x4, %esp
-
- call 1f
-1: popl %esi
- subl $1b, %esi
-
- popl %ecx
- movl %ecx, efi32_config(%esi) /* Handle */
- popl %ecx
- movl %ecx, efi32_config+8(%esi) /* EFI System table pointer */
-
- /* Relocate efi_config->call() */
- leal efi32_config(%esi), %eax
- add %esi, 40(%eax)
- pushl %eax
-
- call make_boot_params
- cmpl $0, %eax
- je fail
- movl %esi, BP_code32_start(%eax)
- popl %ecx
- pushl %eax
- pushl %ecx
- jmp 2f /* Skip efi_config initialization */
-SYM_FUNC_END(efi_pe_entry)
-
SYM_FUNC_START(efi32_stub_entry)
+SYM_FUNC_START_ALIAS(efi_stub_entry)
add $0x4, %esp
- popl %ecx
- popl %edx
-
- call 1f
-1: popl %esi
- subl $1b, %esi
-
- movl %ecx, efi32_config(%esi) /* Handle */
- movl %edx, efi32_config+8(%esi) /* EFI System table pointer */
-
- /* Relocate efi_config->call() */
- leal efi32_config(%esi), %eax
- add %esi, 40(%eax)
- pushl %eax
-2:
call efi_main
- cmpl $0, %eax
movl %eax, %esi
- jne 2f
-fail:
- /* EFI init failed, so hang. */
- hlt
- jmp fail
-2:
movl BP_code32_start(%esi), %eax
leal startup_32(%eax), %eax
jmp *%eax
SYM_FUNC_END(efi32_stub_entry)
+SYM_FUNC_END_ALIAS(efi_stub_entry)
#endif
.text
@@ -262,15 +211,6 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
jmp *%eax
SYM_FUNC_END(.Lrelocated)
-#ifdef CONFIG_EFI_STUB
- .data
-efi32_config:
- .fill 5,8,0
- .long efi_call_phys
- .long 0
- .byte 0
-#endif
-
/*
* Stack and heap for uncompression
*/
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index ee60b81944a7..1f1f6c8139b3 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -208,10 +208,12 @@ SYM_FUNC_START(startup_32)
pushl $__KERNEL_CS
leal startup_64(%ebp), %eax
#ifdef CONFIG_EFI_MIXED
- movl efi32_config(%ebp), %ebx
- cmp $0, %ebx
+ movl efi32_boot_args(%ebp), %edi
+ cmp $0, %edi
jz 1f
- leal handover_entry(%ebp), %eax
+ leal efi64_stub_entry(%ebp), %eax
+ movl %esi, %edx
+ movl efi32_boot_args+4(%ebp), %esi
1:
#endif
pushl %eax
@@ -232,17 +234,14 @@ SYM_FUNC_START(efi32_stub_entry)
popl %edx
popl %esi
- leal (BP_scratch+4)(%esi), %esp
call 1f
1: pop %ebp
subl $1b, %ebp
- movl %ecx, efi32_config(%ebp)
- movl %edx, efi32_config+8(%ebp)
+ movl %ecx, efi32_boot_args(%ebp)
+ movl %edx, efi32_boot_args+4(%ebp)
sgdtl efi32_boot_gdt(%ebp)
-
- leal efi32_config(%ebp), %eax
- movl %eax, efi_config(%ebp)
+ movb $0, efi_is64(%ebp)
/* Disable paging */
movl %cr0, %eax
@@ -450,70 +449,17 @@ trampoline_return:
SYM_CODE_END(startup_64)
#ifdef CONFIG_EFI_STUB
-
-/* The entry point for the PE/COFF executable is efi_pe_entry. */
-SYM_FUNC_START(efi_pe_entry)
- movq %rcx, efi64_config(%rip) /* Handle */
- movq %rdx, efi64_config+8(%rip) /* EFI System table pointer */
-
- leaq efi64_config(%rip), %rax
- movq %rax, efi_config(%rip)
-
- call 1f
-1: popq %rbp
- subq $1b, %rbp
-
- /*
- * Relocate efi_config->call().
- */
- addq %rbp, efi64_config+40(%rip)
-
- movq %rax, %rdi
- call make_boot_params
- cmpq $0,%rax
- je fail
- mov %rax, %rsi
- leaq startup_32(%rip), %rax
- movl %eax, BP_code32_start(%rsi)
- jmp 2f /* Skip the relocation */
-
-handover_entry:
- call 1f
-1: popq %rbp
- subq $1b, %rbp
-
- /*
- * Relocate efi_config->call().
- */
- movq efi_config(%rip), %rax
- addq %rbp, 40(%rax)
-2:
- movq efi_config(%rip), %rdi
+ .org 0x390
+SYM_FUNC_START(efi64_stub_entry)
+SYM_FUNC_START_ALIAS(efi_stub_entry)
+ and $~0xf, %rsp /* realign the stack */
call efi_main
movq %rax,%rsi
- cmpq $0,%rax
- jne 2f
-fail:
- /* EFI init failed, so hang. */
- hlt
- jmp fail
-2:
movl BP_code32_start(%esi), %eax
leaq startup_64(%rax), %rax
jmp *%rax
-SYM_FUNC_END(efi_pe_entry)
-
- .org 0x390
-SYM_FUNC_START(efi64_stub_entry)
- movq %rdi, efi64_config(%rip) /* Handle */
- movq %rsi, efi64_config+8(%rip) /* EFI System table pointer */
-
- leaq efi64_config(%rip), %rax
- movq %rax, efi_config(%rip)
-
- movq %rdx, %rsi
- jmp handover_entry
SYM_FUNC_END(efi64_stub_entry)
+SYM_FUNC_END_ALIAS(efi_stub_entry)
#endif
.text
@@ -682,24 +628,11 @@ SYM_DATA_START_LOCAL(gdt)
.quad 0x0000000000000000 /* TS continued */
SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end)
-#ifdef CONFIG_EFI_STUB
-SYM_DATA_LOCAL(efi_config, .quad 0)
-
#ifdef CONFIG_EFI_MIXED
-SYM_DATA_START(efi32_config)
- .fill 5,8,0
- .quad efi64_thunk
- .byte 0
-SYM_DATA_END(efi32_config)
+SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0)
+SYM_DATA(efi_is64, .byte 1)
#endif
-SYM_DATA_START(efi64_config)
- .fill 5,8,0
- .quad efi_call
- .byte 1
-SYM_DATA_END(efi64_config)
-#endif /* CONFIG_EFI_STUB */
-
/*
* Stack and heap for uncompression
*/
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
index 804734058c77..02c0078d3787 100644
--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -6,6 +6,7 @@
#include <linux/percpu-defs.h>
#include <asm/processor.h>
#include <asm/intel_ds.h>
+#include <asm/pgtable_areas.h>
#ifdef CONFIG_X86_64
@@ -134,15 +135,6 @@ DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks);
extern void setup_cpu_entry_areas(void);
extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
-/* Single page reserved for the readonly IDT mapping: */
-#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE
-#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
-
-#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT)
-
-#define CPU_ENTRY_AREA_MAP_SIZE \
- (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE)
-
extern struct cpu_entry_area *get_cpu_entry_area(int cpu);
static inline struct entry_stack *cpu_entry_stack(int cpu)
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index d028e9acdf1c..86169a24b0d8 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -8,6 +8,7 @@
#include <asm/tlb.h>
#include <asm/nospec-branch.h>
#include <asm/mmu_context.h>
+#include <linux/build_bug.h>
/*
* We map the EFI regions needed for runtime services non-contiguously,
@@ -19,13 +20,16 @@
* This is the main reason why we're doing stable VA mappings for RT
* services.
*
- * This flag is used in conjunction with a chicken bit called
- * "efi=old_map" which can be used as a fallback to the old runtime
- * services mapping method in case there's some b0rkage with a
- * particular EFI implementation (haha, it is hard to hold up the
- * sarcasm here...).
+ * SGI UV1 machines are known to be incompatible with this scheme, so we
+ * provide an opt-out for these machines via a DMI quirk that sets the
+ * attribute below.
*/
-#define EFI_OLD_MEMMAP EFI_ARCH_1
+#define EFI_UV1_MEMMAP EFI_ARCH_1
+
+static inline bool efi_have_uv1_memmap(void)
+{
+ return IS_ENABLED(CONFIG_X86_UV) && efi_enabled(EFI_UV1_MEMMAP);
+}
#define EFI32_LOADER_SIGNATURE "EL32"
#define EFI64_LOADER_SIGNATURE "EL64"
@@ -34,10 +38,46 @@
#define ARCH_EFI_IRQ_FLAGS_MASK X86_EFLAGS_IF
-#ifdef CONFIG_X86_32
+/*
+ * The EFI services are called through variadic functions in many cases. These
+ * functions are implemented in assembler and support only a fixed number of
+ * arguments. The macros below allows us to check at build time that we don't
+ * try to call them with too many arguments.
+ *
+ * __efi_nargs() will return the number of arguments if it is 7 or less, and
+ * cause a BUILD_BUG otherwise. The limitations of the C preprocessor make it
+ * impossible to calculate the exact number of arguments beyond some
+ * pre-defined limit. The maximum number of arguments currently supported by
+ * any of the thunks is 7, so this is good enough for now and can be extended
+ * in the obvious way if we ever need more.
+ */
-extern asmlinkage unsigned long efi_call_phys(void *, ...);
+#define __efi_nargs(...) __efi_nargs_(__VA_ARGS__)
+#define __efi_nargs_(...) __efi_nargs__(0, ##__VA_ARGS__, \
+ __efi_arg_sentinel(7), __efi_arg_sentinel(6), \
+ __efi_arg_sentinel(5), __efi_arg_sentinel(4), \
+ __efi_arg_sentinel(3), __efi_arg_sentinel(2), \
+ __efi_arg_sentinel(1), __efi_arg_sentinel(0))
+#define __efi_nargs__(_0, _1, _2, _3, _4, _5, _6, _7, n, ...) \
+ __take_second_arg(n, \
+ ({ BUILD_BUG_ON_MSG(1, "__efi_nargs limit exceeded"); 8; }))
+#define __efi_arg_sentinel(n) , n
+/*
+ * __efi_nargs_check(f, n, ...) will cause a BUILD_BUG if the ellipsis
+ * represents more than n arguments.
+ */
+
+#define __efi_nargs_check(f, n, ...) \
+ __efi_nargs_check_(f, __efi_nargs(__VA_ARGS__), n)
+#define __efi_nargs_check_(f, p, n) __efi_nargs_check__(f, p, n)
+#define __efi_nargs_check__(f, p, n) ({ \
+ BUILD_BUG_ON_MSG( \
+ (p) > (n), \
+ #f " called with too many arguments (" #p ">" #n ")"); \
+})
+
+#ifdef CONFIG_X86_32
#define arch_efi_call_virt_setup() \
({ \
kernel_fpu_begin(); \
@@ -51,13 +91,7 @@ extern asmlinkage unsigned long efi_call_phys(void *, ...);
})
-/*
- * Wrap all the virtual calls in a way that forces the parameters on the stack.
- */
-#define arch_efi_call_virt(p, f, args...) \
-({ \
- ((efi_##f##_t __attribute__((regparm(0)))*) p->f)(args); \
-})
+#define arch_efi_call_virt(p, f, args...) p->f(args)
#define efi_ioremap(addr, size, type, attr) ioremap_cache(addr, size)
@@ -65,9 +99,12 @@ extern asmlinkage unsigned long efi_call_phys(void *, ...);
#define EFI_LOADER_SIGNATURE "EL64"
-extern asmlinkage u64 efi_call(void *fp, ...);
+extern asmlinkage u64 __efi_call(void *fp, ...);
-#define efi_call_phys(f, args...) efi_call((f), args)
+#define efi_call(...) ({ \
+ __efi_nargs_check(efi_call, 7, __VA_ARGS__); \
+ __efi_call(__VA_ARGS__); \
+})
/*
* struct efi_scratch - Scratch space used while switching to/from efi_mm
@@ -85,7 +122,7 @@ struct efi_scratch {
kernel_fpu_begin(); \
firmware_restrict_branch_speculation_start(); \
\
- if (!efi_enabled(EFI_OLD_MEMMAP)) \
+ if (!efi_have_uv1_memmap()) \
efi_switch_mm(&efi_mm); \
})
@@ -94,7 +131,7 @@ struct efi_scratch {
#define arch_efi_call_virt_teardown() \
({ \
- if (!efi_enabled(EFI_OLD_MEMMAP)) \
+ if (!efi_have_uv1_memmap()) \
efi_switch_mm(efi_scratch.prev_mm); \
\
firmware_restrict_branch_speculation_end(); \
@@ -121,8 +158,6 @@ extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size,
extern struct efi_scratch efi_scratch;
extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable);
extern int __init efi_memblock_x86_reserve_range(void);
-extern pgd_t * __init efi_call_phys_prolog(void);
-extern void __init efi_call_phys_epilog(pgd_t *save_pgd);
extern void __init efi_print_memmap(void);
extern void __init efi_memory_uc(u64 addr, unsigned long size);
extern void __init efi_map_region(efi_memory_desc_t *md);
@@ -140,6 +175,8 @@ extern void efi_delete_dummy_variable(void);
extern void efi_switch_mm(struct mm_struct *mm);
extern void efi_recover_from_page_fault(unsigned long phys_addr);
extern void efi_free_boot_services(void);
+extern pgd_t * __init efi_uv1_memmap_phys_prolog(void);
+extern void __init efi_uv1_memmap_phys_epilog(pgd_t *save_pgd);
struct efi_setup_data {
u64 fw_vendor;
@@ -152,93 +189,144 @@ struct efi_setup_data {
extern u64 efi_setup;
#ifdef CONFIG_EFI
+extern efi_status_t __efi64_thunk(u32, ...);
-static inline bool efi_is_native(void)
+#define efi64_thunk(...) ({ \
+ __efi_nargs_check(efi64_thunk, 6, __VA_ARGS__); \
+ __efi64_thunk(__VA_ARGS__); \
+})
+
+static inline bool efi_is_mixed(void)
{
- return IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT);
+ if (!IS_ENABLED(CONFIG_EFI_MIXED))
+ return false;
+ return IS_ENABLED(CONFIG_X86_64) && !efi_enabled(EFI_64BIT);
}
static inline bool efi_runtime_supported(void)
{
- if (efi_is_native())
- return true;
-
- if (IS_ENABLED(CONFIG_EFI_MIXED) && !efi_enabled(EFI_OLD_MEMMAP))
+ if (IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT))
return true;
- return false;
+ return IS_ENABLED(CONFIG_EFI_MIXED);
}
extern void parse_efi_setup(u64 phys_addr, u32 data_len);
extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt);
-#ifdef CONFIG_EFI_MIXED
extern void efi_thunk_runtime_setup(void);
-extern efi_status_t efi_thunk_set_virtual_address_map(
- void *phys_set_virtual_address_map,
- unsigned long memory_map_size,
- unsigned long descriptor_size,
- u32 descriptor_version,
- efi_memory_desc_t *virtual_map);
-#else
-static inline void efi_thunk_runtime_setup(void) {}
-static inline efi_status_t efi_thunk_set_virtual_address_map(
- void *phys_set_virtual_address_map,
- unsigned long memory_map_size,
- unsigned long descriptor_size,
- u32 descriptor_version,
- efi_memory_desc_t *virtual_map)
-{
- return EFI_SUCCESS;
-}
-#endif /* CONFIG_EFI_MIXED */
-
+efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size,
+ unsigned long descriptor_size,
+ u32 descriptor_version,
+ efi_memory_desc_t *virtual_map);
/* arch specific definitions used by the stub code */
-struct efi_config {
- u64 image_handle;
- u64 table;
- u64 runtime_services;
- u64 boot_services;
- u64 text_output;
- efi_status_t (*call)(unsigned long, ...);
- bool is64;
-} __packed;
-
-__pure const struct efi_config *__efi_early(void);
+__attribute_const__ bool efi_is_64bit(void);
-static inline bool efi_is_64bit(void)
+static inline bool efi_is_native(void)
{
if (!IS_ENABLED(CONFIG_X86_64))
- return false;
-
+ return true;
if (!IS_ENABLED(CONFIG_EFI_MIXED))
return true;
+ return efi_is_64bit();
+}
+
+#define efi_mixed_mode_cast(attr) \
+ __builtin_choose_expr( \
+ __builtin_types_compatible_p(u32, __typeof__(attr)), \
+ (unsigned long)(attr), (attr))
- return __efi_early()->is64;
+#define efi_table_attr(inst, attr) \
+ (efi_is_native() \
+ ? inst->attr \
+ : (__typeof__(inst->attr)) \
+ efi_mixed_mode_cast(inst->mixed_mode.attr))
+
+/*
+ * The following macros allow translating arguments if necessary from native to
+ * mixed mode. The use case for this is to initialize the upper 32 bits of
+ * output parameters, and where the 32-bit method requires a 64-bit argument,
+ * which must be split up into two arguments to be thunked properly.
+ *
+ * As examples, the AllocatePool boot service returns the address of the
+ * allocation, but it will not set the high 32 bits of the address. To ensure
+ * that the full 64-bit address is initialized, we zero-init the address before
+ * calling the thunk.
+ *
+ * The FreePages boot service takes a 64-bit physical address even in 32-bit
+ * mode. For the thunk to work correctly, a native 64-bit call of
+ * free_pages(addr, size)
+ * must be translated to
+ * efi64_thunk(free_pages, addr & U32_MAX, addr >> 32, size)
+ * so that the two 32-bit halves of addr get pushed onto the stack separately.
+ */
+
+static inline void *efi64_zero_upper(void *p)
+{
+ ((u32 *)p)[1] = 0;
+ return p;
}
-#define efi_table_attr(table, attr, instance) \
- (efi_is_64bit() ? \
- ((table##_64_t *)(unsigned long)instance)->attr : \
- ((table##_32_t *)(unsigned long)instance)->attr)
+#define __efi64_argmap_free_pages(addr, size) \
+ ((addr), 0, (size))
+
+#define __efi64_argmap_get_memory_map(mm_size, mm, key, size, ver) \
+ ((mm_size), (mm), efi64_zero_upper(key), efi64_zero_upper(size), (ver))
+
+#define __efi64_argmap_allocate_pool(type, size, buffer) \
+ ((type), (size), efi64_zero_upper(buffer))
-#define efi_call_proto(protocol, f, instance, ...) \
- __efi_early()->call(efi_table_attr(protocol, f, instance), \
- instance, ##__VA_ARGS__)
+#define __efi64_argmap_handle_protocol(handle, protocol, interface) \
+ ((handle), (protocol), efi64_zero_upper(interface))
-#define efi_call_early(f, ...) \
- __efi_early()->call(efi_table_attr(efi_boot_services, f, \
- __efi_early()->boot_services), __VA_ARGS__)
+#define __efi64_argmap_locate_protocol(protocol, reg, interface) \
+ ((protocol), (reg), efi64_zero_upper(interface))
-#define __efi_call_early(f, ...) \
- __efi_early()->call((unsigned long)f, __VA_ARGS__);
+/* PCI I/O */
+#define __efi64_argmap_get_location(protocol, seg, bus, dev, func) \
+ ((protocol), efi64_zero_upper(seg), efi64_zero_upper(bus), \
+ efi64_zero_upper(dev), efi64_zero_upper(func))
+
+/*
+ * The macros below handle the plumbing for the argument mapping. To add a
+ * mapping for a specific EFI method, simply define a macro
+ * __efi64_argmap_<method name>, following the examples above.
+ */
-#define efi_call_runtime(f, ...) \
- __efi_early()->call(efi_table_attr(efi_runtime_services, f, \
- __efi_early()->runtime_services), __VA_ARGS__)
+#define __efi64_thunk_map(inst, func, ...) \
+ efi64_thunk(inst->mixed_mode.func, \
+ __efi64_argmap(__efi64_argmap_ ## func(__VA_ARGS__), \
+ (__VA_ARGS__)))
+
+#define __efi64_argmap(mapped, args) \
+ __PASTE(__efi64_argmap__, __efi_nargs(__efi_eat mapped))(mapped, args)
+#define __efi64_argmap__0(mapped, args) __efi_eval mapped
+#define __efi64_argmap__1(mapped, args) __efi_eval args
+
+#define __efi_eat(...)
+#define __efi_eval(...) __VA_ARGS__
+
+/* The three macros below handle dispatching via the thunk if needed */
+
+#define efi_call_proto(inst, func, ...) \
+ (efi_is_native() \
+ ? inst->func(inst, ##__VA_ARGS__) \
+ : __efi64_thunk_map(inst, func, inst, ##__VA_ARGS__))
+
+#define efi_bs_call(func, ...) \
+ (efi_is_native() \
+ ? efi_system_table()->boottime->func(__VA_ARGS__) \
+ : __efi64_thunk_map(efi_table_attr(efi_system_table(), \
+ boottime), func, __VA_ARGS__))
+
+#define efi_rt_call(func, ...) \
+ (efi_is_native() \
+ ? efi_system_table()->runtime->func(__VA_ARGS__) \
+ : __efi64_thunk_map(efi_table_attr(efi_system_table(), \
+ runtime), func, __VA_ARGS__))
extern bool efi_reboot_required(void);
extern bool efi_is_table_address(unsigned long phys_addr);
diff --git a/arch/x86/include/asm/memtype.h b/arch/x86/include/asm/memtype.h
new file mode 100644
index 000000000000..9c2447b3555d
--- /dev/null
+++ b/arch/x86/include/asm/memtype.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_MEMTYPE_H
+#define _ASM_X86_MEMTYPE_H
+
+#include <linux/types.h>
+#include <asm/pgtable_types.h>
+
+extern bool pat_enabled(void);
+extern void pat_disable(const char *reason);
+extern void pat_init(void);
+extern void init_cache_modes(void);
+
+extern int memtype_reserve(u64 start, u64 end,
+ enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);
+extern int memtype_free(u64 start, u64 end);
+
+extern int memtype_kernel_map_sync(u64 base, unsigned long size,
+ enum page_cache_mode pcm);
+
+extern int memtype_reserve_io(resource_size_t start, resource_size_t end,
+ enum page_cache_mode *pcm);
+
+extern void memtype_free_io(resource_size_t start, resource_size_t end);
+
+extern bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn);
+
+#endif /* _ASM_X86_MEMTYPE_H */
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 5f33924e200f..b243234e90cb 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -69,14 +69,6 @@ struct ldt_struct {
int slot;
};
-/* This is a multiple of PAGE_SIZE. */
-#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
-
-static inline void *ldt_slot_va(int slot)
-{
- return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
-}
-
/*
* Used for LDT copy/destruction.
*/
@@ -99,87 +91,21 @@ static inline void destroy_context_ldt(struct mm_struct *mm) { }
static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { }
#endif
-static inline void load_mm_ldt(struct mm_struct *mm)
-{
#ifdef CONFIG_MODIFY_LDT_SYSCALL
- struct ldt_struct *ldt;
-
- /* READ_ONCE synchronizes with smp_store_release */
- ldt = READ_ONCE(mm->context.ldt);
-
- /*
- * Any change to mm->context.ldt is followed by an IPI to all
- * CPUs with the mm active. The LDT will not be freed until
- * after the IPI is handled by all such CPUs. This means that,
- * if the ldt_struct changes before we return, the values we see
- * will be safe, and the new values will be loaded before we run
- * any user code.
- *
- * NB: don't try to convert this to use RCU without extreme care.
- * We would still need IRQs off, because we don't want to change
- * the local LDT after an IPI loaded a newer value than the one
- * that we can see.
- */
-
- if (unlikely(ldt)) {
- if (static_cpu_has(X86_FEATURE_PTI)) {
- if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
- /*
- * Whoops -- either the new LDT isn't mapped
- * (if slot == -1) or is mapped into a bogus
- * slot (if slot > 1).
- */
- clear_LDT();
- return;
- }
-
- /*
- * If page table isolation is enabled, ldt->entries
- * will not be mapped in the userspace pagetables.
- * Tell the CPU to access the LDT through the alias
- * at ldt_slot_va(ldt->slot).
- */
- set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
- } else {
- set_ldt(ldt->entries, ldt->nr_entries);
- }
- } else {
- clear_LDT();
- }
+extern void load_mm_ldt(struct mm_struct *mm);
+extern void switch_ldt(struct mm_struct *prev, struct mm_struct *next);
#else
+static inline void load_mm_ldt(struct mm_struct *mm)
+{
clear_LDT();
-#endif
}
-
static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
{
-#ifdef CONFIG_MODIFY_LDT_SYSCALL
- /*
- * Load the LDT if either the old or new mm had an LDT.
- *
- * An mm will never go from having an LDT to not having an LDT. Two
- * mms never share an LDT, so we don't gain anything by checking to
- * see whether the LDT changed. There's also no guarantee that
- * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL,
- * then prev->context.ldt will also be non-NULL.
- *
- * If we really cared, we could optimize the case where prev == next
- * and we're exiting lazy mode. Most of the time, if this happens,
- * we don't actually need to reload LDTR, but modify_ldt() is mostly
- * used by legacy code and emulators where we don't need this level of
- * performance.
- *
- * This uses | instead of || because it generates better code.
- */
- if (unlikely((unsigned long)prev->context.ldt |
- (unsigned long)next->context.ldt))
- load_mm_ldt(next);
-#endif
-
DEBUG_LOCKS_WARN_ON(preemptible());
}
+#endif
-void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
+extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
/*
* Init a new mm. Used on mm copies, like at fork()
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index dbff1456d215..829df26fd7a3 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -24,7 +24,7 @@
#define _ASM_X86_MTRR_H
#include <uapi/asm/mtrr.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
/*
@@ -86,7 +86,7 @@ static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
}
static inline void mtrr_bp_init(void)
{
- pat_disable("MTRRs disabled, skipping PAT initialization too.");
+ pat_disable("PAT support disabled because CONFIG_MTRR is disabled in the kernel.");
}
#define mtrr_ap_init() do {} while (0)
diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h
deleted file mode 100644
index 92015c65fa2a..000000000000
--- a/arch/x86/include/asm/pat.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_PAT_H
-#define _ASM_X86_PAT_H
-
-#include <linux/types.h>
-#include <asm/pgtable_types.h>
-
-bool pat_enabled(void);
-void pat_disable(const char *reason);
-extern void pat_init(void);
-extern void init_cache_modes(void);
-
-extern int reserve_memtype(u64 start, u64 end,
- enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);
-extern int free_memtype(u64 start, u64 end);
-
-extern int kernel_map_sync_memtype(u64 base, unsigned long size,
- enum page_cache_mode pcm);
-
-int io_reserve_memtype(resource_size_t start, resource_size_t end,
- enum page_cache_mode *pcm);
-
-void io_free_memtype(resource_size_t start, resource_size_t end);
-
-bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn);
-
-#endif /* _ASM_X86_PAT_H */
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 90d0731fdcb6..c1fdd43fe187 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -9,7 +9,7 @@
#include <linux/scatterlist.h>
#include <linux/numa.h>
#include <asm/io.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
#include <asm/x86_init.h>
struct pci_sysdata {
diff --git a/arch/x86/include/asm/pgtable_32_areas.h b/arch/x86/include/asm/pgtable_32_areas.h
new file mode 100644
index 000000000000..b6355416a15a
--- /dev/null
+++ b/arch/x86/include/asm/pgtable_32_areas.h
@@ -0,0 +1,53 @@
+#ifndef _ASM_X86_PGTABLE_32_AREAS_H
+#define _ASM_X86_PGTABLE_32_AREAS_H
+
+#include <asm/cpu_entry_area.h>
+
+/*
+ * Just any arbitrary offset to the start of the vmalloc VM area: the
+ * current 8MB value just means that there will be a 8MB "hole" after the
+ * physical memory until the kernel virtual memory starts. That means that
+ * any out-of-bounds memory accesses will hopefully be caught.
+ * The vmalloc() routines leaves a hole of 4kB between each vmalloced
+ * area for the same reason. ;)
+ */
+#define VMALLOC_OFFSET (8 * 1024 * 1024)
+
+#ifndef __ASSEMBLY__
+extern bool __vmalloc_start_set; /* set once high_memory is set */
+#endif
+
+#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET)
+#ifdef CONFIG_X86_PAE
+#define LAST_PKMAP 512
+#else
+#define LAST_PKMAP 1024
+#endif
+
+#define CPU_ENTRY_AREA_PAGES (NR_CPUS * DIV_ROUND_UP(sizeof(struct cpu_entry_area), PAGE_SIZE))
+
+/* The +1 is for the readonly IDT page: */
+#define CPU_ENTRY_AREA_BASE \
+ ((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK)
+
+#define LDT_BASE_ADDR \
+ ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
+
+#define LDT_END_ADDR (LDT_BASE_ADDR + PMD_SIZE)
+
+#define PKMAP_BASE \
+ ((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK)
+
+#ifdef CONFIG_HIGHMEM
+# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE)
+#else
+# define VMALLOC_END (LDT_BASE_ADDR - 2 * PAGE_SIZE)
+#endif
+
+#define MODULES_VADDR VMALLOC_START
+#define MODULES_END VMALLOC_END
+#define MODULES_LEN (MODULES_VADDR - MODULES_END)
+
+#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE)
+
+#endif /* _ASM_X86_PGTABLE_32_AREAS_H */
diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h
index 0416d42e5bdd..5356a46b0373 100644
--- a/arch/x86/include/asm/pgtable_32_types.h
+++ b/arch/x86/include/asm/pgtable_32_types.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_PGTABLE_32_DEFS_H
-#define _ASM_X86_PGTABLE_32_DEFS_H
+#ifndef _ASM_X86_PGTABLE_32_TYPES_H
+#define _ASM_X86_PGTABLE_32_TYPES_H
/*
* The Linux x86 paging architecture is 'compile-time dual-mode', it
@@ -20,55 +20,4 @@
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
-/* Just any arbitrary offset to the start of the vmalloc VM area: the
- * current 8MB value just means that there will be a 8MB "hole" after the
- * physical memory until the kernel virtual memory starts. That means that
- * any out-of-bounds memory accesses will hopefully be caught.
- * The vmalloc() routines leaves a hole of 4kB between each vmalloced
- * area for the same reason. ;)
- */
-#define VMALLOC_OFFSET (8 * 1024 * 1024)
-
-#ifndef __ASSEMBLY__
-extern bool __vmalloc_start_set; /* set once high_memory is set */
-#endif
-
-#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET)
-#ifdef CONFIG_X86_PAE
-#define LAST_PKMAP 512
-#else
-#define LAST_PKMAP 1024
-#endif
-
-/*
- * This is an upper bound on sizeof(struct cpu_entry_area) / PAGE_SIZE.
- * Define this here and validate with BUILD_BUG_ON() in cpu_entry_area.c
- * to avoid include recursion hell.
- */
-#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 43)
-
-/* The +1 is for the readonly IDT page: */
-#define CPU_ENTRY_AREA_BASE \
- ((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK)
-
-#define LDT_BASE_ADDR \
- ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
-
-#define LDT_END_ADDR (LDT_BASE_ADDR + PMD_SIZE)
-
-#define PKMAP_BASE \
- ((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK)
-
-#ifdef CONFIG_HIGHMEM
-# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE)
-#else
-# define VMALLOC_END (LDT_BASE_ADDR - 2 * PAGE_SIZE)
-#endif
-
-#define MODULES_VADDR VMALLOC_START
-#define MODULES_END VMALLOC_END
-#define MODULES_LEN (MODULES_VADDR - MODULES_END)
-
-#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE)
-
-#endif /* _ASM_X86_PGTABLE_32_DEFS_H */
+#endif /* _ASM_X86_PGTABLE_32_TYPES_H */
diff --git a/arch/x86/include/asm/pgtable_areas.h b/arch/x86/include/asm/pgtable_areas.h
new file mode 100644
index 000000000000..d34cce1b995c
--- /dev/null
+++ b/arch/x86/include/asm/pgtable_areas.h
@@ -0,0 +1,16 @@
+#ifndef _ASM_X86_PGTABLE_AREAS_H
+#define _ASM_X86_PGTABLE_AREAS_H
+
+#ifdef CONFIG_X86_32
+# include <asm/pgtable_32_areas.h>
+#endif
+
+/* Single page reserved for the readonly IDT mapping: */
+#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE
+#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
+
+#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT)
+
+#define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE)
+
+#endif /* _ASM_X86_PGTABLE_AREAS_H */
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index b5e49e6bac63..ea7400726d7a 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -110,11 +110,6 @@
#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
-#define _PAGE_TABLE_NOENC (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |\
- _PAGE_ACCESSED | _PAGE_DIRTY)
-#define _KERNPG_TABLE_NOENC (_PAGE_PRESENT | _PAGE_RW | \
- _PAGE_ACCESSED | _PAGE_DIRTY)
-
/*
* Set of bits not changed in pte_modify. The pte's
* protection key is treated like _PAGE_RW, for
@@ -136,80 +131,93 @@
*/
#ifndef __ASSEMBLY__
enum page_cache_mode {
- _PAGE_CACHE_MODE_WB = 0,
- _PAGE_CACHE_MODE_WC = 1,
+ _PAGE_CACHE_MODE_WB = 0,
+ _PAGE_CACHE_MODE_WC = 1,
_PAGE_CACHE_MODE_UC_MINUS = 2,
- _PAGE_CACHE_MODE_UC = 3,
- _PAGE_CACHE_MODE_WT = 4,
- _PAGE_CACHE_MODE_WP = 5,
- _PAGE_CACHE_MODE_NUM = 8
+ _PAGE_CACHE_MODE_UC = 3,
+ _PAGE_CACHE_MODE_WT = 4,
+ _PAGE_CACHE_MODE_WP = 5,
+
+ _PAGE_CACHE_MODE_NUM = 8
};
#endif
-#define _PAGE_CACHE_MASK (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)
-#define _PAGE_NOCACHE (cachemode2protval(_PAGE_CACHE_MODE_UC))
-#define _PAGE_CACHE_WP (cachemode2protval(_PAGE_CACHE_MODE_WP))
+#define _PAGE_ENC (_AT(pteval_t, sme_me_mask))
-#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
-#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
- _PAGE_ACCESSED | _PAGE_NX)
-
-#define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | \
- _PAGE_USER | _PAGE_ACCESSED)
-#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
- _PAGE_ACCESSED | _PAGE_NX)
-#define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
- _PAGE_ACCESSED)
-#define PAGE_COPY PAGE_COPY_NOEXEC
-#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | \
- _PAGE_ACCESSED | _PAGE_NX)
-#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
- _PAGE_ACCESSED)
-
-#define __PAGE_KERNEL_EXEC \
- (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_GLOBAL)
-#define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX)
-
-#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW)
-#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW)
-#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_NOCACHE)
-#define __PAGE_KERNEL_VVAR (__PAGE_KERNEL_RO | _PAGE_USER)
-#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE)
-#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
-#define __PAGE_KERNEL_WP (__PAGE_KERNEL | _PAGE_CACHE_WP)
-
-#define __PAGE_KERNEL_IO (__PAGE_KERNEL)
-#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE)
+#define _PAGE_CACHE_MASK (_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)
-#ifndef __ASSEMBLY__
+#define _PAGE_NOCACHE (cachemode2protval(_PAGE_CACHE_MODE_UC))
+#define _PAGE_CACHE_WP (cachemode2protval(_PAGE_CACHE_MODE_WP))
-#define _PAGE_ENC (_AT(pteval_t, sme_me_mask))
+#define __PP _PAGE_PRESENT
+#define __RW _PAGE_RW
+#define _USR _PAGE_USER
+#define ___A _PAGE_ACCESSED
+#define ___D _PAGE_DIRTY
+#define ___G _PAGE_GLOBAL
+#define __NX _PAGE_NX
+
+#define _ENC _PAGE_ENC
+#define __WP _PAGE_CACHE_WP
+#define __NC _PAGE_NOCACHE
+#define _PSE _PAGE_PSE
+
+#define pgprot_val(x) ((x).pgprot)
+#define __pgprot(x) ((pgprot_t) { (x) } )
+#define __pg(x) __pgprot(x)
+
+#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
+
+#define PAGE_NONE __pg( 0| 0| 0|___A| 0| 0| 0|___G)
+#define PAGE_SHARED __pg(__PP|__RW|_USR|___A|__NX| 0| 0| 0)
+#define PAGE_SHARED_EXEC __pg(__PP|__RW|_USR|___A| 0| 0| 0| 0)
+#define PAGE_COPY_NOEXEC __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0)
+#define PAGE_COPY_EXEC __pg(__PP| 0|_USR|___A| 0| 0| 0| 0)
+#define PAGE_COPY __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0)
+#define PAGE_READONLY __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0)
+#define PAGE_READONLY_EXEC __pg(__PP| 0|_USR|___A| 0| 0| 0| 0)
+
+#define __PAGE_KERNEL (__PP|__RW| 0|___A|__NX|___D| 0|___G)
+#define __PAGE_KERNEL_EXEC (__PP|__RW| 0|___A| 0|___D| 0|___G)
+#define _KERNPG_TABLE_NOENC (__PP|__RW| 0|___A| 0|___D| 0| 0)
+#define _KERNPG_TABLE (__PP|__RW| 0|___A| 0|___D| 0| 0| _ENC)
+#define _PAGE_TABLE_NOENC (__PP|__RW|_USR|___A| 0|___D| 0| 0)
+#define _PAGE_TABLE (__PP|__RW|_USR|___A| 0|___D| 0| 0| _ENC)
+#define __PAGE_KERNEL_RO (__PP| 0| 0|___A|__NX|___D| 0|___G)
+#define __PAGE_KERNEL_RX (__PP| 0| 0|___A| 0|___D| 0|___G)
+#define __PAGE_KERNEL_NOCACHE (__PP|__RW| 0|___A|__NX|___D| 0|___G| __NC)
+#define __PAGE_KERNEL_VVAR (__PP| 0|_USR|___A|__NX|___D| 0|___G)
+#define __PAGE_KERNEL_LARGE (__PP|__RW| 0|___A|__NX|___D|_PSE|___G)
+#define __PAGE_KERNEL_LARGE_EXEC (__PP|__RW| 0|___A| 0|___D|_PSE|___G)
+#define __PAGE_KERNEL_WP (__PP|__RW| 0|___A|__NX|___D| 0|___G| __WP)
+
+
+#define __PAGE_KERNEL_IO __PAGE_KERNEL
+#define __PAGE_KERNEL_IO_NOCACHE __PAGE_KERNEL_NOCACHE
-#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \
- _PAGE_DIRTY | _PAGE_ENC)
-#define _PAGE_TABLE (_KERNPG_TABLE | _PAGE_USER)
-#define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _PAGE_ENC)
-#define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _PAGE_ENC)
+#ifndef __ASSEMBLY__
-#define __PAGE_KERNEL_NOENC (__PAGE_KERNEL)
-#define __PAGE_KERNEL_NOENC_WP (__PAGE_KERNEL_WP)
+#define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _ENC)
+#define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _ENC)
+#define __PAGE_KERNEL_NOENC (__PAGE_KERNEL | 0)
+#define __PAGE_KERNEL_NOENC_WP (__PAGE_KERNEL_WP | 0)
-#define default_pgprot(x) __pgprot((x) & __default_kernel_pte_mask)
+#define __pgprot_mask(x) __pgprot((x) & __default_kernel_pte_mask)
-#define PAGE_KERNEL default_pgprot(__PAGE_KERNEL | _PAGE_ENC)
-#define PAGE_KERNEL_NOENC default_pgprot(__PAGE_KERNEL)
-#define PAGE_KERNEL_RO default_pgprot(__PAGE_KERNEL_RO | _PAGE_ENC)
-#define PAGE_KERNEL_EXEC default_pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC)
-#define PAGE_KERNEL_EXEC_NOENC default_pgprot(__PAGE_KERNEL_EXEC)
-#define PAGE_KERNEL_RX default_pgprot(__PAGE_KERNEL_RX | _PAGE_ENC)
-#define PAGE_KERNEL_NOCACHE default_pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC)
-#define PAGE_KERNEL_LARGE default_pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC)
-#define PAGE_KERNEL_LARGE_EXEC default_pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC)
-#define PAGE_KERNEL_VVAR default_pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC)
+#define PAGE_KERNEL __pgprot_mask(__PAGE_KERNEL | _ENC)
+#define PAGE_KERNEL_NOENC __pgprot_mask(__PAGE_KERNEL | 0)
+#define PAGE_KERNEL_RO __pgprot_mask(__PAGE_KERNEL_RO | _ENC)
+#define PAGE_KERNEL_EXEC __pgprot_mask(__PAGE_KERNEL_EXEC | _ENC)
+#define PAGE_KERNEL_EXEC_NOENC __pgprot_mask(__PAGE_KERNEL_EXEC | 0)
+#define PAGE_KERNEL_RX __pgprot_mask(__PAGE_KERNEL_RX | _ENC)
+#define PAGE_KERNEL_NOCACHE __pgprot_mask(__PAGE_KERNEL_NOCACHE | _ENC)
+#define PAGE_KERNEL_LARGE __pgprot_mask(__PAGE_KERNEL_LARGE | _ENC)
+#define PAGE_KERNEL_LARGE_EXEC __pgprot_mask(__PAGE_KERNEL_LARGE_EXEC | _ENC)
+#define PAGE_KERNEL_VVAR __pgprot_mask(__PAGE_KERNEL_VVAR | _ENC)
-#define PAGE_KERNEL_IO default_pgprot(__PAGE_KERNEL_IO)
-#define PAGE_KERNEL_IO_NOCACHE default_pgprot(__PAGE_KERNEL_IO_NOCACHE)
+#define PAGE_KERNEL_IO __pgprot_mask(__PAGE_KERNEL_IO)
+#define PAGE_KERNEL_IO_NOCACHE __pgprot_mask(__PAGE_KERNEL_IO_NOCACHE)
#endif /* __ASSEMBLY__ */
@@ -449,9 +457,6 @@ static inline pteval_t pte_flags(pte_t pte)
return native_pte_val(pte) & PTE_FLAGS_MASK;
}
-#define pgprot_val(x) ((x).pgprot)
-#define __pgprot(x) ((pgprot_t) { (x) } )
-
extern uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM];
extern uint8_t __pte2cachemode_tbl[8];
diff --git a/arch/x86/include/asm/vmalloc.h b/arch/x86/include/asm/vmalloc.h
new file mode 100644
index 000000000000..29837740b520
--- /dev/null
+++ b/arch/x86/include/asm/vmalloc.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_X86_VMALLOC_H
+#define _ASM_X86_VMALLOC_H
+
+#include <asm/pgtable_areas.h>
+
+#endif /* _ASM_X86_VMALLOC_H */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index ca4a0d2cc88f..ffb869f9b2df 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -49,7 +49,7 @@
#include <asm/cpu.h>
#include <asm/mce.h>
#include <asm/msr.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
#include <asm/microcode.h>
#include <asm/microcode_intel.h>
#include <asm/intel-family.h>
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index aa5c064a6a22..51b9190c628b 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -15,7 +15,7 @@
#include <asm/tlbflush.h>
#include <asm/mtrr.h>
#include <asm/msr.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
#include "mtrr.h"
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c
index 507039c20128..6a80f36b5d59 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.c
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.c
@@ -52,7 +52,7 @@
#include <asm/e820/api.h>
#include <asm/mtrr.h>
#include <asm/msr.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
#include "mtrr.h"
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index adf9b71386ef..62b137c3c97a 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -4,7 +4,7 @@
*/
#include <linux/cpu.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
#include <asm/apic.h>
#include <asm/processor.h>
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index ee48c3fc8a65..d3a0791bc052 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -7,7 +7,7 @@
#include <linux/cpu.h>
#include <asm/apic.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
#include <asm/processor.h>
#include "cpu.h"
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index d2f4e706a428..f293d872602a 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -177,7 +177,7 @@ setup_efi_state(struct boot_params *params, unsigned long params_load_addr,
* acpi_rsdp=<addr> on kernel command line to make second kernel boot
* without efi.
*/
- if (efi_enabled(EFI_OLD_MEMMAP))
+ if (efi_have_uv1_memmap())
return 0;
params->secure_boot = boot_params.secure_boot;
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index b2463fcb20a8..c57e1ca70fd1 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -28,6 +28,89 @@
#include <asm/desc.h>
#include <asm/mmu_context.h>
#include <asm/syscalls.h>
+#include <asm/pgtable_areas.h>
+
+/* This is a multiple of PAGE_SIZE. */
+#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
+
+static inline void *ldt_slot_va(int slot)
+{
+ return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
+}
+
+void load_mm_ldt(struct mm_struct *mm)
+{
+ struct ldt_struct *ldt;
+
+ /* READ_ONCE synchronizes with smp_store_release */
+ ldt = READ_ONCE(mm->context.ldt);
+
+ /*
+ * Any change to mm->context.ldt is followed by an IPI to all
+ * CPUs with the mm active. The LDT will not be freed until
+ * after the IPI is handled by all such CPUs. This means that,
+ * if the ldt_struct changes before we return, the values we see
+ * will be safe, and the new values will be loaded before we run
+ * any user code.
+ *
+ * NB: don't try to convert this to use RCU without extreme care.
+ * We would still need IRQs off, because we don't want to change
+ * the local LDT after an IPI loaded a newer value than the one
+ * that we can see.
+ */
+
+ if (unlikely(ldt)) {
+ if (static_cpu_has(X86_FEATURE_PTI)) {
+ if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
+ /*
+ * Whoops -- either the new LDT isn't mapped
+ * (if slot == -1) or is mapped into a bogus
+ * slot (if slot > 1).
+ */
+ clear_LDT();
+ return;
+ }
+
+ /*
+ * If page table isolation is enabled, ldt->entries
+ * will not be mapped in the userspace pagetables.
+ * Tell the CPU to access the LDT through the alias
+ * at ldt_slot_va(ldt->slot).
+ */
+ set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
+ } else {
+ set_ldt(ldt->entries, ldt->nr_entries);
+ }
+ } else {
+ clear_LDT();
+ }
+}
+
+void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
+{
+ /*
+ * Load the LDT if either the old or new mm had an LDT.
+ *
+ * An mm will never go from having an LDT to not having an LDT. Two
+ * mms never share an LDT, so we don't gain anything by checking to
+ * see whether the LDT changed. There's also no guarantee that
+ * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL,
+ * then prev->context.ldt will also be non-NULL.
+ *
+ * If we really cared, we could optimize the case where prev == next
+ * and we're exiting lazy mode. Most of the time, if this happens,
+ * we don't actually need to reload LDTR, but modify_ldt() is mostly
+ * used by legacy code and emulators where we don't need this level of
+ * performance.
+ *
+ * This uses | instead of || because it generates better code.
+ */
+ if (unlikely((unsigned long)prev->context.ldt |
+ (unsigned long)next->context.ldt))
+ load_mm_ldt(next);
+
+ DEBUG_LOCKS_WARN_ON(preemptible());
+}
static void refresh_ldt_segments(void)
{
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index a58498364cac..2441b64d061f 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -42,6 +42,7 @@
#include <asm/proto.h>
#include <asm/unwind.h>
#include <asm/vsyscall.h>
+#include <linux/vmalloc.h>
/*
* max_low_pfn_mapped: highest directly mapped pfn < 4 GB
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index ce89430a7f80..23e25f3034c2 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -20,7 +20,7 @@
#include <asm/irq.h>
#include <asm/io_apic.h>
#include <asm/hpet.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
#include <asm/tsc.h>
#include <asm/iommu.h>
#include <asm/mach_traps.h>
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 6f92b40d798c..a32b847a8089 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -40,7 +40,7 @@
#include <linux/kthread.h>
#include <asm/page.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
#include <asm/cmpxchg.h>
#include <asm/e820/api.h>
#include <asm/io.h>
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 3b89c201ac26..345848f270e3 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -12,8 +12,10 @@ CFLAGS_REMOVE_mem_encrypt.o = -pg
CFLAGS_REMOVE_mem_encrypt_identity.o = -pg
endif
-obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
- pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o maccess.o
+obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o mmap.o \
+ pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o maccess.o
+
+obj-y += pat/
# Make sure __phys_addr has no stackprotector
nostackp := $(call cc-option, -fno-stack-protector)
@@ -23,8 +25,6 @@ CFLAGS_mem_encrypt_identity.o := $(nostackp)
CFLAGS_fault.o := -I $(srctree)/$(src)/../include/asm/trace
-obj-$(CONFIG_X86_PAT) += pat_interval.o
-
obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 304d31d8cbbc..c9c8523a3a48 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -29,6 +29,7 @@
#include <asm/efi.h> /* efi_recover_from_page_fault()*/
#include <asm/desc.h> /* store_idt(), ... */
#include <asm/cpu_entry_area.h> /* exception stack */
+#include <asm/pgtable_areas.h> /* VMALLOC_START, ... */
#define CREATE_TRACE_POINTS
#include <asm/trace/exceptions.h>
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 0a74407ef92e..8d29ae8d3eb7 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -52,6 +52,7 @@
#include <asm/page_types.h>
#include <asm/cpu_entry_area.h>
#include <asm/init.h>
+#include <asm/pgtable_areas.h>
#include "mm_internal.h"
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index 6748b4c2baff..f60398aeb644 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -4,7 +4,7 @@
*/
#include <asm/iomap.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
#include <linux/export.h>
#include <linux/highmem.h>
@@ -26,7 +26,7 @@ int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot)
if (!is_io_mapping_possible(base, size))
return -EINVAL;
- ret = io_reserve_memtype(base, base + size, &pcm);
+ ret = memtype_reserve_io(base, base + size, &pcm);
if (ret)
return ret;
@@ -40,7 +40,7 @@ EXPORT_SYMBOL_GPL(iomap_create_wc);
void iomap_free(resource_size_t base, unsigned long size)
{
- io_free_memtype(base, base + size);
+ memtype_free_io(base, base + size);
}
EXPORT_SYMBOL_GPL(iomap_free);
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index b3a2936377b5..44e4beb4239f 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -24,7 +24,7 @@
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/pgalloc.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
#include <asm/setup.h>
#include "physaddr.h"
@@ -196,10 +196,10 @@ __ioremap_caller(resource_size_t phys_addr, unsigned long size,
phys_addr &= PHYSICAL_PAGE_MASK;
size = PAGE_ALIGN(last_addr+1) - phys_addr;
- retval = reserve_memtype(phys_addr, (u64)phys_addr + size,
+ retval = memtype_reserve(phys_addr, (u64)phys_addr + size,
pcm, &new_pcm);
if (retval) {
- printk(KERN_ERR "ioremap reserve_memtype failed %d\n", retval);
+ printk(KERN_ERR "ioremap memtype_reserve failed %d\n", retval);
return NULL;
}
@@ -255,7 +255,7 @@ __ioremap_caller(resource_size_t phys_addr, unsigned long size,
area->phys_addr = phys_addr;
vaddr = (unsigned long) area->addr;
- if (kernel_map_sync_memtype(phys_addr, size, pcm))
+ if (memtype_kernel_map_sync(phys_addr, size, pcm))
goto err_free_area;
if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot))
@@ -275,7 +275,7 @@ __ioremap_caller(resource_size_t phys_addr, unsigned long size,
err_free_area:
free_vm_area(area);
err_free_memtype:
- free_memtype(phys_addr, phys_addr + size);
+ memtype_free(phys_addr, phys_addr + size);
return NULL;
}
@@ -451,7 +451,7 @@ void iounmap(volatile void __iomem *addr)
return;
}
- free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p));
+ memtype_free(p->phys_addr, p->phys_addr + get_vm_area_size(p));
/* Finally remove it */
o = remove_vm_area((void __force *)addr);
diff --git a/arch/x86/mm/pat/Makefile b/arch/x86/mm/pat/Makefile
new file mode 100644
index 000000000000..ea464c995161
--- /dev/null
+++ b/arch/x86/mm/pat/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-y := set_memory.o memtype.o
+
+obj-$(CONFIG_X86_PAT) += memtype_interval.o
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pat/cpa-test.c
index facce271e8b9..facce271e8b9 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pat/cpa-test.c
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat/memtype.c
index 2d758e19ef22..394be8611748 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -1,11 +1,34 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Handle caching attributes in page tables (PAT)
+ * Page Attribute Table (PAT) support: handle memory caching attributes in page tables.
*
* Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
* Suresh B Siddha <suresh.b.siddha@intel.com>
*
* Loosely based on earlier PAT patchset from Eric Biederman and Andi Kleen.
+ *
+ * Basic principles:
+ *
+ * PAT is a CPU feature supported by all modern x86 CPUs, to allow the firmware and
+ * the kernel to set one of a handful of 'caching type' attributes for physical
+ * memory ranges: uncached, write-combining, write-through, write-protected,
+ * and the most commonly used and default attribute: write-back caching.
+ *
+ * PAT support supercedes and augments MTRR support in a compatible fashion: MTRR is
+ * a hardware interface to enumerate a limited number of physical memory ranges
+ * and set their caching attributes explicitly, programmed into the CPU via MSRs.
+ * Even modern CPUs have MTRRs enabled - but these are typically not touched
+ * by the kernel or by user-space (such as the X server), we rely on PAT for any
+ * additional cache attribute logic.
+ *
+ * PAT doesn't work via explicit memory ranges, but uses page table entries to add
+ * cache attribute information to the mapped memory range: there's 3 bits used,
+ * (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT), with the 8 possible values mapped by the
+ * CPU to actual cache attributes via an MSR loaded into the CPU (MSR_IA32_CR_PAT).
+ *
+ * ( There's a metric ton of finer details, such as compatibility with CPU quirks
+ * that only support 4 types of PAT entries, and interaction with MTRRs, see
+ * below for details. )
*/
#include <linux/seq_file.h>
@@ -29,44 +52,48 @@
#include <asm/mtrr.h>
#include <asm/page.h>
#include <asm/msr.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
#include <asm/io.h>
-#include "pat_internal.h"
-#include "mm_internal.h"
+#include "memtype.h"
+#include "../mm_internal.h"
#undef pr_fmt
#define pr_fmt(fmt) "" fmt
-static bool __read_mostly boot_cpu_done;
+static bool __read_mostly pat_bp_initialized;
static bool __read_mostly pat_disabled = !IS_ENABLED(CONFIG_X86_PAT);
-static bool __read_mostly pat_initialized;
-static bool __read_mostly init_cm_done;
+static bool __read_mostly pat_bp_enabled;
+static bool __read_mostly pat_cm_initialized;
-void pat_disable(const char *reason)
+/*
+ * PAT support is enabled by default, but can be disabled for
+ * various user-requested or hardware-forced reasons:
+ */
+void pat_disable(const char *msg_reason)
{
if (pat_disabled)
return;
- if (boot_cpu_done) {
+ if (pat_bp_initialized) {
WARN_ONCE(1, "x86/PAT: PAT cannot be disabled after initialization\n");
return;
}
pat_disabled = true;
- pr_info("x86/PAT: %s\n", reason);
+ pr_info("x86/PAT: %s\n", msg_reason);
}
static int __init nopat(char *str)
{
- pat_disable("PAT support disabled.");
+ pat_disable("PAT support disabled via boot option.");
return 0;
}
early_param("nopat", nopat);
bool pat_enabled(void)
{
- return pat_initialized;
+ return pat_bp_enabled;
}
EXPORT_SYMBOL_GPL(pat_enabled);
@@ -197,6 +224,8 @@ static void __init_cache_modes(u64 pat)
char pat_msg[33];
int i;
+ WARN_ON_ONCE(pat_cm_initialized);
+
pat_msg[32] = 0;
for (i = 7; i >= 0; i--) {
cache = pat_get_cache_mode((pat >> (i * 8)) & 7,
@@ -205,28 +234,28 @@ static void __init_cache_modes(u64 pat)
}
pr_info("x86/PAT: Configuration [0-7]: %s\n", pat_msg);
- init_cm_done = true;
+ pat_cm_initialized = true;
}
#define PAT(x, y) ((u64)PAT_ ## y << ((x)*8))
-static void pat_bsp_init(u64 pat)
+static void pat_bp_init(u64 pat)
{
u64 tmp_pat;
if (!boot_cpu_has(X86_FEATURE_PAT)) {
- pat_disable("PAT not supported by CPU.");
+ pat_disable("PAT not supported by the CPU.");
return;
}
rdmsrl(MSR_IA32_CR_PAT, tmp_pat);
if (!tmp_pat) {
- pat_disable("PAT MSR is 0, disabled.");
+ pat_disable("PAT support disabled by the firmware.");
return;
}
wrmsrl(MSR_IA32_CR_PAT, pat);
- pat_initialized = true;
+ pat_bp_enabled = true;
__init_cache_modes(pat);
}
@@ -248,7 +277,7 @@ void init_cache_modes(void)
{
u64 pat = 0;
- if (init_cm_done)
+ if (pat_cm_initialized)
return;
if (boot_cpu_has(X86_FEATURE_PAT)) {
@@ -291,7 +320,7 @@ void init_cache_modes(void)
}
/**
- * pat_init - Initialize PAT MSR and PAT table
+ * pat_init - Initialize the PAT MSR and PAT table on the current CPU
*
* This function initializes PAT MSR and PAT table with an OS-defined value
* to enable additional cache attributes, WC, WT and WP.
@@ -305,6 +334,10 @@ void pat_init(void)
u64 pat;
struct cpuinfo_x86 *c = &boot_cpu_data;
+#ifndef CONFIG_X86_PAT
+ pr_info_once("x86/PAT: PAT support disabled because CONFIG_X86_PAT is disabled in the kernel.\n");
+#endif
+
if (pat_disabled)
return;
@@ -364,9 +397,9 @@ void pat_init(void)
PAT(4, WB) | PAT(5, WP) | PAT(6, UC_MINUS) | PAT(7, WT);
}
- if (!boot_cpu_done) {
- pat_bsp_init(pat);
- boot_cpu_done = true;
+ if (!pat_bp_initialized) {
+ pat_bp_init(pat);
+ pat_bp_initialized = true;
} else {
pat_ap_init(pat);
}
@@ -542,10 +575,10 @@ static u64 sanitize_phys(u64 address)
* available type in new_type in case of no error. In case of any error
* it will return a negative return value.
*/
-int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type,
+int memtype_reserve(u64 start, u64 end, enum page_cache_mode req_type,
enum page_cache_mode *new_type)
{
- struct memtype *new;
+ struct memtype *entry_new;
enum page_cache_mode actual_type;
int is_range_ram;
int err = 0;
@@ -593,22 +626,22 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type,
return -EINVAL;
}
- new = kzalloc(sizeof(struct memtype), GFP_KERNEL);
- if (!new)
+ entry_new = kzalloc(sizeof(struct memtype), GFP_KERNEL);
+ if (!entry_new)
return -ENOMEM;
- new->start = start;
- new->end = end;
- new->type = actual_type;
+ entry_new->start = start;
+ entry_new->end = end;
+ entry_new->type = actual_type;
spin_lock(&memtype_lock);
- err = memtype_check_insert(new, new_type);
+ err = memtype_check_insert(entry_new, new_type);
if (err) {
- pr_info("x86/PAT: reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n",
+ pr_info("x86/PAT: memtype_reserve failed [mem %#010Lx-%#010Lx], track %s, req %s\n",
start, end - 1,
- cattr_name(new->type), cattr_name(req_type));
- kfree(new);
+ cattr_name(entry_new->type), cattr_name(req_type));
+ kfree(entry_new);
spin_unlock(&memtype_lock);
return err;
@@ -616,18 +649,17 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type,
spin_unlock(&memtype_lock);
- dprintk("reserve_memtype added [mem %#010Lx-%#010Lx], track %s, req %s, ret %s\n",
- start, end - 1, cattr_name(new->type), cattr_name(req_type),
+ dprintk("memtype_reserve added [mem %#010Lx-%#010Lx], track %s, req %s, ret %s\n",
+ start, end - 1, cattr_name(entry_new->type), cattr_name(req_type),
new_type ? cattr_name(*new_type) : "-");
return err;
}
-int free_memtype(u64 start, u64 end)
+int memtype_free(u64 start, u64 end)
{
- int err = -EINVAL;
int is_range_ram;
- struct memtype *entry;
+ struct memtype *entry_old;
if (!pat_enabled())
return 0;
@@ -640,28 +672,24 @@ int free_memtype(u64 start, u64 end)
return 0;
is_range_ram = pat_pagerange_is_ram(start, end);
- if (is_range_ram == 1) {
-
- err = free_ram_pages_type(start, end);
-
- return err;
- } else if (is_range_ram < 0) {
+ if (is_range_ram == 1)
+ return free_ram_pages_type(start, end);
+ if (is_range_ram < 0)
return -EINVAL;
- }
spin_lock(&memtype_lock);
- entry = memtype_erase(start, end);
+ entry_old = memtype_erase(start, end);
spin_unlock(&memtype_lock);
- if (IS_ERR(entry)) {
+ if (IS_ERR(entry_old)) {
pr_info("x86/PAT: %s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n",
current->comm, current->pid, start, end - 1);
return -EINVAL;
}
- kfree(entry);
+ kfree(entry_old);
- dprintk("free_memtype request [mem %#010Lx-%#010Lx]\n", start, end - 1);
+ dprintk("memtype_free request [mem %#010Lx-%#010Lx]\n", start, end - 1);
return 0;
}
@@ -700,6 +728,7 @@ static enum page_cache_mode lookup_memtype(u64 paddr)
rettype = _PAGE_CACHE_MODE_UC_MINUS;
spin_unlock(&memtype_lock);
+
return rettype;
}
@@ -723,7 +752,7 @@ bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn)
EXPORT_SYMBOL_GPL(pat_pfn_immune_to_uc_mtrr);
/**
- * io_reserve_memtype - Request a memory type mapping for a region of memory
+ * memtype_reserve_io - Request a memory type mapping for a region of memory
* @start: start (physical address) of the region
* @end: end (physical address) of the region
* @type: A pointer to memtype, with requested type. On success, requested
@@ -732,7 +761,7 @@ EXPORT_SYMBOL_GPL(pat_pfn_immune_to_uc_mtrr);
* On success, returns 0
* On failure, returns non-zero
*/
-int io_reserve_memtype(resource_size_t start, resource_size_t end,
+int memtype_reserve_io(resource_size_t start, resource_size_t end,
enum page_cache_mode *type)
{
resource_size_t size = end - start;
@@ -742,47 +771,47 @@ int io_reserve_memtype(resource_size_t start, resource_size_t end,
WARN_ON_ONCE(iomem_map_sanity_check(start, size));
- ret = reserve_memtype(start, end, req_type, &new_type);
+ ret = memtype_reserve(start, end, req_type, &new_type);
if (ret)
goto out_err;
if (!is_new_memtype_allowed(start, size, req_type, new_type))
goto out_free;
- if (kernel_map_sync_memtype(start, size, new_type) < 0)
+ if (memtype_kernel_map_sync(start, size, new_type) < 0)
goto out_free;
*type = new_type;
return 0;
out_free:
- free_memtype(start, end);
+ memtype_free(start, end);
ret = -EBUSY;
out_err:
return ret;
}
/**
- * io_free_memtype - Release a memory type mapping for a region of memory
+ * memtype_free_io - Release a memory type mapping for a region of memory
* @start: start (physical address) of the region
* @end: end (physical address) of the region
*/
-void io_free_memtype(resource_size_t start, resource_size_t end)
+void memtype_free_io(resource_size_t start, resource_size_t end)
{
- free_memtype(start, end);
+ memtype_free(start, end);
}
int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size)
{
enum page_cache_mode type = _PAGE_CACHE_MODE_WC;
- return io_reserve_memtype(start, start + size, &type);
+ return memtype_reserve_io(start, start + size, &type);
}
EXPORT_SYMBOL(arch_io_reserve_memtype_wc);
void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size)
{
- io_free_memtype(start, start + size);
+ memtype_free_io(start, start + size);
}
EXPORT_SYMBOL(arch_io_free_memtype_wc);
@@ -839,10 +868,10 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
}
/*
- * Change the memory type for the physial address range in kernel identity
+ * Change the memory type for the physical address range in kernel identity
* mapping space if that range is a part of identity map.
*/
-int kernel_map_sync_memtype(u64 base, unsigned long size,
+int memtype_kernel_map_sync(u64 base, unsigned long size,
enum page_cache_mode pcm)
{
unsigned long id_sz;
@@ -851,15 +880,14 @@ int kernel_map_sync_memtype(u64 base, unsigned long size,
return 0;
/*
- * some areas in the middle of the kernel identity range
- * are not mapped, like the PCI space.
+ * Some areas in the middle of the kernel identity range
+ * are not mapped, for example the PCI space.
*/
if (!page_is_ram(base >> PAGE_SHIFT))
return 0;
id_sz = (__pa(high_memory-1) <= base + size) ?
- __pa(high_memory) - base :
- size;
+ __pa(high_memory) - base : size;
if (ioremap_change_attr((unsigned long)__va(base), id_sz, pcm) < 0) {
pr_info("x86/PAT: %s:%d ioremap_change_attr failed %s for [mem %#010Lx-%#010Lx]\n",
@@ -873,7 +901,7 @@ int kernel_map_sync_memtype(u64 base, unsigned long size,
/*
* Internal interface to reserve a range of physical memory with prot.
- * Reserved non RAM regions only and after successful reserve_memtype,
+ * Reserved non RAM regions only and after successful memtype_reserve,
* this func also keeps identity mapping (if any) in sync with this new prot.
*/
static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
@@ -910,14 +938,14 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
return 0;
}
- ret = reserve_memtype(paddr, paddr + size, want_pcm, &pcm);
+ ret = memtype_reserve(paddr, paddr + size, want_pcm, &pcm);
if (ret)
return ret;
if (pcm != want_pcm) {
if (strict_prot ||
!is_new_memtype_allowed(paddr, size, want_pcm, pcm)) {
- free_memtype(paddr, paddr + size);
+ memtype_free(paddr, paddr + size);
pr_err("x86/PAT: %s:%d map pfn expected mapping type %s for [mem %#010Lx-%#010Lx], got %s\n",
current->comm, current->pid,
cattr_name(want_pcm),
@@ -935,8 +963,8 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
cachemode2protval(pcm));
}
- if (kernel_map_sync_memtype(paddr, size, pcm) < 0) {
- free_memtype(paddr, paddr + size);
+ if (memtype_kernel_map_sync(paddr, size, pcm) < 0) {
+ memtype_free(paddr, paddr + size);
return -EINVAL;
}
return 0;
@@ -952,7 +980,7 @@ static void free_pfn_range(u64 paddr, unsigned long size)
is_ram = pat_pagerange_is_ram(paddr, paddr + size);
if (is_ram == 0)
- free_memtype(paddr, paddr + size);
+ memtype_free(paddr, paddr + size);
}
/*
@@ -1099,25 +1127,30 @@ EXPORT_SYMBOL_GPL(pgprot_writethrough);
#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT)
+/*
+ * We are allocating a temporary printout-entry to be passed
+ * between seq_start()/next() and seq_show():
+ */
static struct memtype *memtype_get_idx(loff_t pos)
{
- struct memtype *print_entry;
+ struct memtype *entry_print;
int ret;
- print_entry = kzalloc(sizeof(struct memtype), GFP_KERNEL);
- if (!print_entry)
+ entry_print = kzalloc(sizeof(struct memtype), GFP_KERNEL);
+ if (!entry_print)
return NULL;
spin_lock(&memtype_lock);
- ret = memtype_copy_nth_element(print_entry, pos);
+ ret = memtype_copy_nth_element(entry_print, pos);
spin_unlock(&memtype_lock);
- if (!ret) {
- return print_entry;
- } else {
- kfree(print_entry);
+ /* Free it on error: */
+ if (ret) {
+ kfree(entry_print);
return NULL;
}
+
+ return entry_print;
}
static void *memtype_seq_start(struct seq_file *seq, loff_t *pos)
@@ -1142,11 +1175,14 @@ static void memtype_seq_stop(struct seq_file *seq, void *v)
static int memtype_seq_show(struct seq_file *seq, void *v)
{
- struct memtype *print_entry = (struct memtype *)v;
+ struct memtype *entry_print = (struct memtype *)v;
+
+ seq_printf(seq, "PAT: [mem 0x%016Lx-0x%016Lx] %s\n",
+ entry_print->start,
+ entry_print->end,
+ cattr_name(entry_print->type));
- seq_printf(seq, "%s @ 0x%Lx-0x%Lx\n", cattr_name(print_entry->type),
- print_entry->start, print_entry->end);
- kfree(print_entry);
+ kfree(entry_print);
return 0;
}
@@ -1178,7 +1214,6 @@ static int __init pat_memtype_list_init(void)
}
return 0;
}
-
late_initcall(pat_memtype_list_init);
#endif /* CONFIG_DEBUG_FS && CONFIG_X86_PAT */
diff --git a/arch/x86/mm/pat_internal.h b/arch/x86/mm/pat/memtype.h
index 79a06684349e..cacecdbceb55 100644
--- a/arch/x86/mm/pat_internal.h
+++ b/arch/x86/mm/pat/memtype.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __PAT_INTERNAL_H_
-#define __PAT_INTERNAL_H_
+#ifndef __MEMTYPE_H_
+#define __MEMTYPE_H_
extern int pat_debug_enable;
@@ -29,13 +29,13 @@ static inline char *cattr_name(enum page_cache_mode pcm)
}
#ifdef CONFIG_X86_PAT
-extern int memtype_check_insert(struct memtype *new,
+extern int memtype_check_insert(struct memtype *entry_new,
enum page_cache_mode *new_type);
extern struct memtype *memtype_erase(u64 start, u64 end);
extern struct memtype *memtype_lookup(u64 addr);
-extern int memtype_copy_nth_element(struct memtype *out, loff_t pos);
+extern int memtype_copy_nth_element(struct memtype *entry_out, loff_t pos);
#else
-static inline int memtype_check_insert(struct memtype *new,
+static inline int memtype_check_insert(struct memtype *entry_new,
enum page_cache_mode *new_type)
{ return 0; }
static inline struct memtype *memtype_erase(u64 start, u64 end)
@@ -46,4 +46,4 @@ static inline int memtype_copy_nth_element(struct memtype *out, loff_t pos)
{ return 0; }
#endif
-#endif /* __PAT_INTERNAL_H_ */
+#endif /* __MEMTYPE_H_ */
diff --git a/arch/x86/mm/pat/memtype_interval.c b/arch/x86/mm/pat/memtype_interval.c
new file mode 100644
index 000000000000..a07e4882bf36
--- /dev/null
+++ b/arch/x86/mm/pat/memtype_interval.c
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Handle caching attributes in page tables (PAT)
+ *
+ * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ * Suresh B Siddha <suresh.b.siddha@intel.com>
+ *
+ * Interval tree used to store the PAT memory type reservations.
+ */
+
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/kernel.h>
+#include <linux/interval_tree_generic.h>
+#include <linux/sched.h>
+#include <linux/gfp.h>
+
+#include <asm/pgtable.h>
+#include <asm/memtype.h>
+
+#include "memtype.h"
+
+/*
+ * The memtype tree keeps track of memory type for specific
+ * physical memory areas. Without proper tracking, conflicting memory
+ * types in different mappings can cause CPU cache corruption.
+ *
+ * The tree is an interval tree (augmented rbtree) which tree is ordered
+ * by the starting address. The tree can contain multiple entries for
+ * different regions which overlap. All the aliases have the same
+ * cache attributes of course, as enforced by the PAT logic.
+ *
+ * memtype_lock protects the rbtree.
+ */
+
+static inline u64 interval_start(struct memtype *entry)
+{
+ return entry->start;
+}
+
+static inline u64 interval_end(struct memtype *entry)
+{
+ return entry->end - 1;
+}
+
+INTERVAL_TREE_DEFINE(struct memtype, rb, u64, subtree_max_end,
+ interval_start, interval_end,
+ static, interval)
+
+static struct rb_root_cached memtype_rbroot = RB_ROOT_CACHED;
+
+enum {
+ MEMTYPE_EXACT_MATCH = 0,
+ MEMTYPE_END_MATCH = 1
+};
+
+static struct memtype *memtype_match(u64 start, u64 end, int match_type)
+{
+ struct memtype *entry_match;
+
+ entry_match = interval_iter_first(&memtype_rbroot, start, end-1);
+
+ while (entry_match != NULL && entry_match->start < end) {
+ if ((match_type == MEMTYPE_EXACT_MATCH) &&
+ (entry_match->start == start) && (entry_match->end == end))
+ return entry_match;
+
+ if ((match_type == MEMTYPE_END_MATCH) &&
+ (entry_match->start < start) && (entry_match->end == end))
+ return entry_match;
+
+ entry_match = interval_iter_next(entry_match, start, end-1);
+ }
+
+ return NULL; /* Returns NULL if there is no match */
+}
+
+static int memtype_check_conflict(u64 start, u64 end,
+ enum page_cache_mode reqtype,
+ enum page_cache_mode *newtype)
+{
+ struct memtype *entry_match;
+ enum page_cache_mode found_type = reqtype;
+
+ entry_match = interval_iter_first(&memtype_rbroot, start, end-1);
+ if (entry_match == NULL)
+ goto success;
+
+ if (entry_match->type != found_type && newtype == NULL)
+ goto failure;
+
+ dprintk("Overlap at 0x%Lx-0x%Lx\n", entry_match->start, entry_match->end);
+ found_type = entry_match->type;
+
+ entry_match = interval_iter_next(entry_match, start, end-1);
+ while (entry_match) {
+ if (entry_match->type != found_type)
+ goto failure;
+
+ entry_match = interval_iter_next(entry_match, start, end-1);
+ }
+success:
+ if (newtype)
+ *newtype = found_type;
+
+ return 0;
+
+failure:
+ pr_info("x86/PAT: %s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
+ current->comm, current->pid, start, end,
+ cattr_name(found_type), cattr_name(entry_match->type));
+
+ return -EBUSY;
+}
+
+int memtype_check_insert(struct memtype *entry_new, enum page_cache_mode *ret_type)
+{
+ int err = 0;
+
+ err = memtype_check_conflict(entry_new->start, entry_new->end, entry_new->type, ret_type);
+ if (err)
+ return err;
+
+ if (ret_type)
+ entry_new->type = *ret_type;
+
+ interval_insert(entry_new, &memtype_rbroot);
+ return 0;
+}
+
+struct memtype *memtype_erase(u64 start, u64 end)
+{
+ struct memtype *entry_old;
+
+ /*
+ * Since the memtype_rbroot tree allows overlapping ranges,
+ * memtype_erase() checks with EXACT_MATCH first, i.e. free
+ * a whole node for the munmap case. If no such entry is found,
+ * it then checks with END_MATCH, i.e. shrink the size of a node
+ * from the end for the mremap case.
+ */
+ entry_old = memtype_match(start, end, MEMTYPE_EXACT_MATCH);
+ if (!entry_old) {
+ entry_old = memtype_match(start, end, MEMTYPE_END_MATCH);
+ if (!entry_old)
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (entry_old->start == start) {
+ /* munmap: erase this node */
+ interval_remove(entry_old, &memtype_rbroot);
+ } else {
+ /* mremap: update the end value of this node */
+ interval_remove(entry_old, &memtype_rbroot);
+ entry_old->end = start;
+ interval_insert(entry_old, &memtype_rbroot);
+
+ return NULL;
+ }
+
+ return entry_old;
+}
+
+struct memtype *memtype_lookup(u64 addr)
+{
+ return interval_iter_first(&memtype_rbroot, addr, addr + PAGE_SIZE-1);
+}
+
+/*
+ * Debugging helper, copy the Nth entry of the tree into a
+ * a copy for printout. This allows us to print out the tree
+ * via debugfs, without holding the memtype_lock too long:
+ */
+#ifdef CONFIG_DEBUG_FS
+int memtype_copy_nth_element(struct memtype *entry_out, loff_t pos)
+{
+ struct memtype *entry_match;
+ int i = 1;
+
+ entry_match = interval_iter_first(&memtype_rbroot, 0, ULONG_MAX);
+
+ while (entry_match && pos != i) {
+ entry_match = interval_iter_next(entry_match, 0, ULONG_MAX);
+ i++;
+ }
+
+ if (entry_match) { /* pos == i */
+ *entry_out = *entry_match;
+ return 0;
+ } else {
+ return 1;
+ }
+}
+#endif
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pat/set_memory.c
index 1b99ad05b117..62a8ebe72a52 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -24,10 +24,10 @@
#include <linux/uaccess.h>
#include <asm/pgalloc.h>
#include <asm/proto.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
#include <asm/set_memory.h>
-#include "mm_internal.h"
+#include "../mm_internal.h"
/*
* The current flushing context - we pass it instead of 5 arguments:
@@ -331,7 +331,7 @@ static void cpa_flush_all(unsigned long cache)
on_each_cpu(__cpa_flush_all, (void *) cache, 1);
}
-void __cpa_flush_tlb(void *data)
+static void __cpa_flush_tlb(void *data)
{
struct cpa_data *cpa = data;
unsigned int i;
@@ -1801,7 +1801,7 @@ int set_memory_uc(unsigned long addr, int numpages)
/*
* for now UC MINUS. see comments in ioremap()
*/
- ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
+ ret = memtype_reserve(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
_PAGE_CACHE_MODE_UC_MINUS, NULL);
if (ret)
goto out_err;
@@ -1813,7 +1813,7 @@ int set_memory_uc(unsigned long addr, int numpages)
return 0;
out_free:
- free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
+ memtype_free(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
out_err:
return ret;
}
@@ -1839,14 +1839,14 @@ int set_memory_wc(unsigned long addr, int numpages)
{
int ret;
- ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
+ ret = memtype_reserve(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
_PAGE_CACHE_MODE_WC, NULL);
if (ret)
return ret;
ret = _set_memory_wc(addr, numpages);
if (ret)
- free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
+ memtype_free(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
return ret;
}
@@ -1873,7 +1873,7 @@ int set_memory_wb(unsigned long addr, int numpages)
if (ret)
return ret;
- free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
+ memtype_free(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
return 0;
}
EXPORT_SYMBOL(set_memory_wb);
@@ -2014,7 +2014,7 @@ static int _set_pages_array(struct page **pages, int numpages,
continue;
start = page_to_pfn(pages[i]) << PAGE_SHIFT;
end = start + PAGE_SIZE;
- if (reserve_memtype(start, end, new_type, NULL))
+ if (memtype_reserve(start, end, new_type, NULL))
goto err_out;
}
@@ -2040,7 +2040,7 @@ err_out:
continue;
start = page_to_pfn(pages[i]) << PAGE_SHIFT;
end = start + PAGE_SIZE;
- free_memtype(start, end);
+ memtype_free(start, end);
}
return -EINVAL;
}
@@ -2089,7 +2089,7 @@ int set_pages_array_wb(struct page **pages, int numpages)
continue;
start = page_to_pfn(pages[i]) << PAGE_SHIFT;
end = start + PAGE_SIZE;
- free_memtype(start, end);
+ memtype_free(start, end);
}
return 0;
@@ -2215,7 +2215,7 @@ int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
.pgd = pgd,
.numpages = numpages,
.mask_set = __pgprot(0),
- .mask_clr = __pgprot(0),
+ .mask_clr = __pgprot(~page_flags & (_PAGE_NX|_PAGE_RW)),
.flags = 0,
};
@@ -2224,12 +2224,6 @@ int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
if (!(__supported_pte_mask & _PAGE_NX))
goto out;
- if (!(page_flags & _PAGE_NX))
- cpa.mask_clr = __pgprot(_PAGE_NX);
-
- if (!(page_flags & _PAGE_RW))
- cpa.mask_clr = __pgprot(_PAGE_RW);
-
if (!(page_flags & _PAGE_ENC))
cpa.mask_clr = pgprot_encrypted(cpa.mask_clr);
@@ -2281,5 +2275,5 @@ int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address,
* be exposed to the rest of the kernel. Include these directly here.
*/
#ifdef CONFIG_CPA_DEBUG
-#include "pageattr-test.c"
+#include "cpa-test.c"
#endif
diff --git a/arch/x86/mm/pat_interval.c b/arch/x86/mm/pat_interval.c
deleted file mode 100644
index 6855362eaf21..000000000000
--- a/arch/x86/mm/pat_interval.c
+++ /dev/null
@@ -1,185 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Handle caching attributes in page tables (PAT)
- *
- * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
- * Suresh B Siddha <suresh.b.siddha@intel.com>
- *
- * Interval tree used to store the PAT memory type reservations.
- */
-
-#include <linux/seq_file.h>
-#include <linux/debugfs.h>
-#include <linux/kernel.h>
-#include <linux/interval_tree_generic.h>
-#include <linux/sched.h>
-#include <linux/gfp.h>
-
-#include <asm/pgtable.h>
-#include <asm/pat.h>
-
-#include "pat_internal.h"
-
-/*
- * The memtype tree keeps track of memory type for specific
- * physical memory areas. Without proper tracking, conflicting memory
- * types in different mappings can cause CPU cache corruption.
- *
- * The tree is an interval tree (augmented rbtree) with tree ordered
- * on starting address. Tree can contain multiple entries for
- * different regions which overlap. All the aliases have the same
- * cache attributes of course.
- *
- * memtype_lock protects the rbtree.
- */
-static inline u64 memtype_interval_start(struct memtype *memtype)
-{
- return memtype->start;
-}
-
-static inline u64 memtype_interval_end(struct memtype *memtype)
-{
- return memtype->end - 1;
-}
-INTERVAL_TREE_DEFINE(struct memtype, rb, u64, subtree_max_end,
- memtype_interval_start, memtype_interval_end,
- static, memtype_interval)
-
-static struct rb_root_cached memtype_rbroot = RB_ROOT_CACHED;
-
-enum {
- MEMTYPE_EXACT_MATCH = 0,
- MEMTYPE_END_MATCH = 1
-};
-
-static struct memtype *memtype_match(u64 start, u64 end, int match_type)
-{
- struct memtype *match;
-
- match = memtype_interval_iter_first(&memtype_rbroot, start, end-1);
- while (match != NULL && match->start < end) {
- if ((match_type == MEMTYPE_EXACT_MATCH) &&
- (match->start == start) && (match->end == end))
- return match;
-
- if ((match_type == MEMTYPE_END_MATCH) &&
- (match->start < start) && (match->end == end))
- return match;
-
- match = memtype_interval_iter_next(match, start, end-1);
- }
-
- return NULL; /* Returns NULL if there is no match */
-}
-
-static int memtype_check_conflict(u64 start, u64 end,
- enum page_cache_mode reqtype,
- enum page_cache_mode *newtype)
-{
- struct memtype *match;
- enum page_cache_mode found_type = reqtype;
-
- match = memtype_interval_iter_first(&memtype_rbroot, start, end-1);
- if (match == NULL)
- goto success;
-
- if (match->type != found_type && newtype == NULL)
- goto failure;
-
- dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end);
- found_type = match->type;
-
- match = memtype_interval_iter_next(match, start, end-1);
- while (match) {
- if (match->type != found_type)
- goto failure;
-
- match = memtype_interval_iter_next(match, start, end-1);
- }
-success:
- if (newtype)
- *newtype = found_type;
-
- return 0;
-
-failure:
- pr_info("x86/PAT: %s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
- current->comm, current->pid, start, end,
- cattr_name(found_type), cattr_name(match->type));
- return -EBUSY;
-}
-
-int memtype_check_insert(struct memtype *new,
- enum page_cache_mode *ret_type)
-{
- int err = 0;
-
- err = memtype_check_conflict(new->start, new->end, new->type, ret_type);
- if (err)
- return err;
-
- if (ret_type)
- new->type = *ret_type;
-
- memtype_interval_insert(new, &memtype_rbroot);
- return 0;
-}
-
-struct memtype *memtype_erase(u64 start, u64 end)
-{
- struct memtype *data;
-
- /*
- * Since the memtype_rbroot tree allows overlapping ranges,
- * memtype_erase() checks with EXACT_MATCH first, i.e. free
- * a whole node for the munmap case. If no such entry is found,
- * it then checks with END_MATCH, i.e. shrink the size of a node
- * from the end for the mremap case.
- */
- data = memtype_match(start, end, MEMTYPE_EXACT_MATCH);
- if (!data) {
- data = memtype_match(start, end, MEMTYPE_END_MATCH);
- if (!data)
- return ERR_PTR(-EINVAL);
- }
-
- if (data->start == start) {
- /* munmap: erase this node */
- memtype_interval_remove(data, &memtype_rbroot);
- } else {
- /* mremap: update the end value of this node */
- memtype_interval_remove(data, &memtype_rbroot);
- data->end = start;
- memtype_interval_insert(data, &memtype_rbroot);
- return NULL;
- }
-
- return data;
-}
-
-struct memtype *memtype_lookup(u64 addr)
-{
- return memtype_interval_iter_first(&memtype_rbroot, addr,
- addr + PAGE_SIZE-1);
-}
-
-#if defined(CONFIG_DEBUG_FS)
-int memtype_copy_nth_element(struct memtype *out, loff_t pos)
-{
- struct memtype *match;
- int i = 1;
-
- match = memtype_interval_iter_first(&memtype_rbroot, 0, ULONG_MAX);
- while (match && pos != i) {
- match = memtype_interval_iter_next(match, 0, ULONG_MAX);
- i++;
- }
-
- if (match) { /* pos == i */
- *out = *match;
- return 0;
- } else {
- return 1;
- }
-}
-#endif
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 9bb7f0ab9fe6..0e6700eaa4f9 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -18,6 +18,7 @@
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/io.h>
+#include <linux/vmalloc.h>
unsigned int __VMALLOC_RESERVE = 128 << 20;
diff --git a/arch/x86/mm/physaddr.c b/arch/x86/mm/physaddr.c
index bdc98150d4db..fc3f3d3e2ef2 100644
--- a/arch/x86/mm/physaddr.c
+++ b/arch/x86/mm/physaddr.c
@@ -5,6 +5,7 @@
#include <linux/mm.h>
#include <asm/page.h>
+#include <linux/vmalloc.h>
#include "physaddr.h"
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 9df652d3d927..fa855bbaebaf 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -34,7 +34,7 @@
#include <linux/errno.h>
#include <linux/memblock.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
#include <asm/e820/api.h>
#include <asm/pci_x86.h>
#include <asm/io_apic.h>
diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile
index fe29f3f5d384..84b09c230cbd 100644
--- a/arch/x86/platform/efi/Makefile
+++ b/arch/x86/platform/efi/Makefile
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
OBJECT_FILES_NON_STANDARD_efi_thunk_$(BITS).o := y
-OBJECT_FILES_NON_STANDARD_efi_stub_$(BITS).o := y
+KASAN_SANITIZE := n
+GCOV_PROFILE := n
obj-$(CONFIG_EFI) += quirks.o efi.o efi_$(BITS).o efi_stub_$(BITS).o
obj-$(CONFIG_EFI_MIXED) += efi_thunk_$(BITS).o
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 38d44f36d5ed..59f7f6d60cf6 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -54,8 +54,8 @@
#include <asm/x86_init.h>
#include <asm/uv/uv.h>
-static struct efi efi_phys __initdata;
static efi_system_table_t efi_systab __initdata;
+static u64 efi_systab_phys __initdata;
static efi_config_table_type_t arch_tables[] __initdata = {
#ifdef CONFIG_X86_UV
@@ -97,32 +97,6 @@ static int __init setup_add_efi_memmap(char *arg)
}
early_param("add_efi_memmap", setup_add_efi_memmap);
-static efi_status_t __init phys_efi_set_virtual_address_map(
- unsigned long memory_map_size,
- unsigned long descriptor_size,
- u32 descriptor_version,
- efi_memory_desc_t *virtual_map)
-{
- efi_status_t status;
- unsigned long flags;
- pgd_t *save_pgd;
-
- save_pgd = efi_call_phys_prolog();
- if (!save_pgd)
- return EFI_ABORTED;
-
- /* Disable interrupts around EFI calls: */
- local_irq_save(flags);
- status = efi_call_phys(efi_phys.set_virtual_address_map,
- memory_map_size, descriptor_size,
- descriptor_version, virtual_map);
- local_irq_restore(flags);
-
- efi_call_phys_epilog(save_pgd);
-
- return status;
-}
-
void __init efi_find_mirror(void)
{
efi_memory_desc_t *md;
@@ -330,10 +304,16 @@ static void __init efi_clean_memmap(void)
}
if (n_removal > 0) {
- u64 size = efi.memmap.nr_map - n_removal;
+ struct efi_memory_map_data data = {
+ .phys_map = efi.memmap.phys_map,
+ .desc_version = efi.memmap.desc_version,
+ .desc_size = efi.memmap.desc_size,
+ .size = data.desc_size * (efi.memmap.nr_map - n_removal),
+ .flags = 0,
+ };
pr_warn("Removing %d invalid memory map entries.\n", n_removal);
- efi_memmap_install(efi.memmap.phys_map, size);
+ efi_memmap_install(&data);
}
}
@@ -353,89 +333,90 @@ void __init efi_print_memmap(void)
}
}
-static int __init efi_systab_init(void *phys)
+static int __init efi_systab_init(u64 phys)
{
+ int size = efi_enabled(EFI_64BIT) ? sizeof(efi_system_table_64_t)
+ : sizeof(efi_system_table_32_t);
+ bool over4g = false;
+ void *p;
+
+ p = early_memremap_ro(phys, size);
+ if (p == NULL) {
+ pr_err("Couldn't map the system table!\n");
+ return -ENOMEM;
+ }
+
if (efi_enabled(EFI_64BIT)) {
- efi_system_table_64_t *systab64;
- struct efi_setup_data *data = NULL;
- u64 tmp = 0;
+ const efi_system_table_64_t *systab64 = p;
+
+ efi_systab.hdr = systab64->hdr;
+ efi_systab.fw_vendor = systab64->fw_vendor;
+ efi_systab.fw_revision = systab64->fw_revision;
+ efi_systab.con_in_handle = systab64->con_in_handle;
+ efi_systab.con_in = systab64->con_in;
+ efi_systab.con_out_handle = systab64->con_out_handle;
+ efi_systab.con_out = (void *)(unsigned long)systab64->con_out;
+ efi_systab.stderr_handle = systab64->stderr_handle;
+ efi_systab.stderr = systab64->stderr;
+ efi_systab.runtime = (void *)(unsigned long)systab64->runtime;
+ efi_systab.boottime = (void *)(unsigned long)systab64->boottime;
+ efi_systab.nr_tables = systab64->nr_tables;
+ efi_systab.tables = systab64->tables;
+
+ over4g = systab64->con_in_handle > U32_MAX ||
+ systab64->con_in > U32_MAX ||
+ systab64->con_out_handle > U32_MAX ||
+ systab64->con_out > U32_MAX ||
+ systab64->stderr_handle > U32_MAX ||
+ systab64->stderr > U32_MAX ||
+ systab64->boottime > U32_MAX;
if (efi_setup) {
- data = early_memremap(efi_setup, sizeof(*data));
- if (!data)
+ struct efi_setup_data *data;
+
+ data = early_memremap_ro(efi_setup, sizeof(*data));
+ if (!data) {
+ early_memunmap(p, size);
return -ENOMEM;
- }
- systab64 = early_memremap((unsigned long)phys,
- sizeof(*systab64));
- if (systab64 == NULL) {
- pr_err("Couldn't map the system table!\n");
- if (data)
- early_memunmap(data, sizeof(*data));
- return -ENOMEM;
- }
+ }
+
+ efi_systab.fw_vendor = (unsigned long)data->fw_vendor;
+ efi_systab.runtime = (void *)(unsigned long)data->runtime;
+ efi_systab.tables = (unsigned long)data->tables;
+
+ over4g |= data->fw_vendor > U32_MAX ||
+ data->runtime > U32_MAX ||
+ data->tables > U32_MAX;
- efi_systab.hdr = systab64->hdr;
- efi_systab.fw_vendor = data ? (unsigned long)data->fw_vendor :
- systab64->fw_vendor;
- tmp |= data ? data->fw_vendor : systab64->fw_vendor;
- efi_systab.fw_revision = systab64->fw_revision;
- efi_systab.con_in_handle = systab64->con_in_handle;
- tmp |= systab64->con_in_handle;
- efi_systab.con_in = systab64->con_in;
- tmp |= systab64->con_in;
- efi_systab.con_out_handle = systab64->con_out_handle;
- tmp |= systab64->con_out_handle;
- efi_systab.con_out = systab64->con_out;
- tmp |= systab64->con_out;
- efi_systab.stderr_handle = systab64->stderr_handle;
- tmp |= systab64->stderr_handle;
- efi_systab.stderr = systab64->stderr;
- tmp |= systab64->stderr;
- efi_systab.runtime = data ?
- (void *)(unsigned long)data->runtime :
- (void *)(unsigned long)systab64->runtime;
- tmp |= data ? data->runtime : systab64->runtime;
- efi_systab.boottime = (void *)(unsigned long)systab64->boottime;
- tmp |= systab64->boottime;
- efi_systab.nr_tables = systab64->nr_tables;
- efi_systab.tables = data ? (unsigned long)data->tables :
- systab64->tables;
- tmp |= data ? data->tables : systab64->tables;
-
- early_memunmap(systab64, sizeof(*systab64));
- if (data)
early_memunmap(data, sizeof(*data));
-#ifdef CONFIG_X86_32
- if (tmp >> 32) {
- pr_err("EFI data located above 4GB, disabling EFI.\n");
- return -EINVAL;
+ } else {
+ over4g |= systab64->fw_vendor > U32_MAX ||
+ systab64->runtime > U32_MAX ||
+ systab64->tables > U32_MAX;
}
-#endif
} else {
- efi_system_table_32_t *systab32;
-
- systab32 = early_memremap((unsigned long)phys,
- sizeof(*systab32));
- if (systab32 == NULL) {
- pr_err("Couldn't map the system table!\n");
- return -ENOMEM;
- }
-
- efi_systab.hdr = systab32->hdr;
- efi_systab.fw_vendor = systab32->fw_vendor;
- efi_systab.fw_revision = systab32->fw_revision;
- efi_systab.con_in_handle = systab32->con_in_handle;
- efi_systab.con_in = systab32->con_in;
- efi_systab.con_out_handle = systab32->con_out_handle;
- efi_systab.con_out = systab32->con_out;
- efi_systab.stderr_handle = systab32->stderr_handle;
- efi_systab.stderr = systab32->stderr;
- efi_systab.runtime = (void *)(unsigned long)systab32->runtime;
- efi_systab.boottime = (void *)(unsigned long)systab32->boottime;
- efi_systab.nr_tables = systab32->nr_tables;
- efi_systab.tables = systab32->tables;
-
- early_memunmap(systab32, sizeof(*systab32));
+ const efi_system_table_32_t *systab32 = p;
+
+ efi_systab.hdr = systab32->hdr;
+ efi_systab.fw_vendor = systab32->fw_vendor;
+ efi_systab.fw_revision = systab32->fw_revision;
+ efi_systab.con_in_handle = systab32->con_in_handle;
+ efi_systab.con_in = systab32->con_in;
+ efi_systab.con_out_handle = systab32->con_out_handle;
+ efi_systab.con_out = (void *)(unsigned long)systab32->con_out;
+ efi_systab.stderr_handle = systab32->stderr_handle;
+ efi_systab.stderr = systab32->stderr;
+ efi_systab.runtime = (void *)(unsigned long)systab32->runtime;
+ efi_systab.boottime = (void *)(unsigned long)systab32->boottime;
+ efi_systab.nr_tables = systab32->nr_tables;
+ efi_systab.tables = systab32->tables;
+ }
+
+ early_memunmap(p, size);
+
+ if (IS_ENABLED(CONFIG_X86_32) && over4g) {
+ pr_err("EFI data located above 4GB, disabling EFI.\n");
+ return -EINVAL;
}
efi.systab = &efi_systab;
@@ -455,108 +436,23 @@ static int __init efi_systab_init(void *phys)
return 0;
}
-static int __init efi_runtime_init32(void)
-{
- efi_runtime_services_32_t *runtime;
-
- runtime = early_memremap((unsigned long)efi.systab->runtime,
- sizeof(efi_runtime_services_32_t));
- if (!runtime) {
- pr_err("Could not map the runtime service table!\n");
- return -ENOMEM;
- }
-
- /*
- * We will only need *early* access to the SetVirtualAddressMap
- * EFI runtime service. All other runtime services will be called
- * via the virtual mapping.
- */
- efi_phys.set_virtual_address_map =
- (efi_set_virtual_address_map_t *)
- (unsigned long)runtime->set_virtual_address_map;
- early_memunmap(runtime, sizeof(efi_runtime_services_32_t));
-
- return 0;
-}
-
-static int __init efi_runtime_init64(void)
-{
- efi_runtime_services_64_t *runtime;
-
- runtime = early_memremap((unsigned long)efi.systab->runtime,
- sizeof(efi_runtime_services_64_t));
- if (!runtime) {
- pr_err("Could not map the runtime service table!\n");
- return -ENOMEM;
- }
-
- /*
- * We will only need *early* access to the SetVirtualAddressMap
- * EFI runtime service. All other runtime services will be called
- * via the virtual mapping.
- */
- efi_phys.set_virtual_address_map =
- (efi_set_virtual_address_map_t *)
- (unsigned long)runtime->set_virtual_address_map;
- early_memunmap(runtime, sizeof(efi_runtime_services_64_t));
-
- return 0;
-}
-
-static int __init efi_runtime_init(void)
-{
- int rv;
-
- /*
- * Check out the runtime services table. We need to map
- * the runtime services table so that we can grab the physical
- * address of several of the EFI runtime functions, needed to
- * set the firmware into virtual mode.
- *
- * When EFI_PARAVIRT is in force then we could not map runtime
- * service memory region because we do not have direct access to it.
- * However, runtime services are available through proxy functions
- * (e.g. in case of Xen dom0 EFI implementation they call special
- * hypercall which executes relevant EFI functions) and that is why
- * they are always enabled.
- */
-
- if (!efi_enabled(EFI_PARAVIRT)) {
- if (efi_enabled(EFI_64BIT))
- rv = efi_runtime_init64();
- else
- rv = efi_runtime_init32();
-
- if (rv)
- return rv;
- }
-
- set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
-
- return 0;
-}
-
void __init efi_init(void)
{
efi_char16_t *c16;
char vendor[100] = "unknown";
int i = 0;
- void *tmp;
-#ifdef CONFIG_X86_32
- if (boot_params.efi_info.efi_systab_hi ||
- boot_params.efi_info.efi_memmap_hi) {
+ if (IS_ENABLED(CONFIG_X86_32) &&
+ (boot_params.efi_info.efi_systab_hi ||
+ boot_params.efi_info.efi_memmap_hi)) {
pr_info("Table located above 4GB, disabling EFI.\n");
return;
}
- efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab;
-#else
- efi_phys.systab = (efi_system_table_t *)
- (boot_params.efi_info.efi_systab |
- ((__u64)boot_params.efi_info.efi_systab_hi<<32));
-#endif
- if (efi_systab_init(efi_phys.systab))
+ efi_systab_phys = boot_params.efi_info.efi_systab |
+ ((__u64)boot_params.efi_info.efi_systab_hi << 32);
+
+ if (efi_systab_init(efi_systab_phys))
return;
efi.config_table = (unsigned long)efi.systab->tables;
@@ -566,14 +462,16 @@ void __init efi_init(void)
/*
* Show what we know for posterity
*/
- c16 = tmp = early_memremap(efi.systab->fw_vendor, 2);
+ c16 = early_memremap_ro(efi.systab->fw_vendor,
+ sizeof(vendor) * sizeof(efi_char16_t));
if (c16) {
- for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i)
- vendor[i] = *c16++;
+ for (i = 0; i < sizeof(vendor) - 1 && c16[i]; ++i)
+ vendor[i] = c16[i];
vendor[i] = '\0';
- } else
+ early_memunmap(c16, sizeof(vendor) * sizeof(efi_char16_t));
+ } else {
pr_err("Could not map the firmware vendor!\n");
- early_memunmap(tmp, 2);
+ }
pr_info("EFI v%u.%.02u by %s\n",
efi.systab->hdr.revision >> 16,
@@ -592,19 +490,21 @@ void __init efi_init(void)
if (!efi_runtime_supported())
pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n");
- else {
- if (efi_runtime_disabled() || efi_runtime_init()) {
- efi_memmap_unmap();
- return;
- }
+
+ if (!efi_runtime_supported() || efi_runtime_disabled()) {
+ efi_memmap_unmap();
+ return;
}
+ set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
efi_clean_memmap();
if (efi_enabled(EFI_DBG))
efi_print_memmap();
}
+#if defined(CONFIG_X86_32) || defined(CONFIG_X86_UV)
+
void __init efi_set_executable(efi_memory_desc_t *md, bool executable)
{
u64 addr, npages;
@@ -669,6 +569,8 @@ void __init old_map_region(efi_memory_desc_t *md)
(unsigned long long)md->phys_addr);
}
+#endif
+
/* Merge contiguous regions of the same type and attribute */
static void __init efi_merge_regions(void)
{
@@ -707,7 +609,7 @@ static void __init get_systab_virt_addr(efi_memory_desc_t *md)
size = md->num_pages << EFI_PAGE_SHIFT;
end = md->phys_addr + size;
- systab = (u64)(unsigned long)efi_phys.systab;
+ systab = efi_systab_phys;
if (md->phys_addr <= systab && systab < end) {
systab += md->virt_addr - md->phys_addr;
efi.systab = (efi_system_table_t *)(unsigned long)systab;
@@ -767,7 +669,7 @@ static inline void *efi_map_next_entry_reverse(void *entry)
*/
static void *efi_map_next_entry(void *entry)
{
- if (!efi_enabled(EFI_OLD_MEMMAP) && efi_enabled(EFI_64BIT)) {
+ if (!efi_have_uv1_memmap() && efi_enabled(EFI_64BIT)) {
/*
* Starting in UEFI v2.5 the EFI_PROPERTIES_TABLE
* config table feature requires us to map all entries
@@ -828,7 +730,7 @@ static bool should_map_region(efi_memory_desc_t *md)
* Map all of RAM so that we can access arguments in the 1:1
* mapping when making EFI runtime calls.
*/
- if (IS_ENABLED(CONFIG_EFI_MIXED) && !efi_is_native()) {
+ if (efi_is_mixed()) {
if (md->type == EFI_CONVENTIONAL_MEMORY ||
md->type == EFI_LOADER_DATA ||
md->type == EFI_LOADER_CODE)
@@ -899,11 +801,11 @@ static void __init kexec_enter_virtual_mode(void)
/*
* We don't do virtual mode, since we don't do runtime services, on
- * non-native EFI. With efi=old_map, we don't do runtime services in
+ * non-native EFI. With the UV1 memmap, we don't do runtime services in
* kexec kernel because in the initial boot something else might
* have been mapped at these virtual addresses.
*/
- if (!efi_is_native() || efi_enabled(EFI_OLD_MEMMAP)) {
+ if (efi_is_mixed() || efi_have_uv1_memmap()) {
efi_memmap_unmap();
clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
return;
@@ -958,11 +860,6 @@ static void __init kexec_enter_virtual_mode(void)
efi.runtime_version = efi_systab.hdr.revision;
efi_native_runtime_setup();
-
- efi.set_virtual_address_map = NULL;
-
- if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX))
- runtime_code_page_mkexec();
#endif
}
@@ -974,9 +871,9 @@ static void __init kexec_enter_virtual_mode(void)
*
* The old method which used to update that memory descriptor with the
* virtual address obtained from ioremap() is still supported when the
- * kernel is booted with efi=old_map on its command line. Same old
- * method enabled the runtime services to be called without having to
- * thunk back into physical mode for every invocation.
+ * kernel is booted on SG1 UV1 hardware. Same old method enabled the
+ * runtime services to be called without having to thunk back into
+ * physical mode for every invocation.
*
* The new method does a pagetable switch in a preemption-safe manner
* so that we're in a different address space when calling a runtime
@@ -999,16 +896,14 @@ static void __init __efi_enter_virtual_mode(void)
if (efi_alloc_page_tables()) {
pr_err("Failed to allocate EFI page tables\n");
- clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
- return;
+ goto err;
}
efi_merge_regions();
new_memmap = efi_map_regions(&count, &pg_shift);
if (!new_memmap) {
pr_err("Error reallocating memory, EFI runtime non-functional!\n");
- clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
- return;
+ goto err;
}
pa = __pa(new_memmap);
@@ -1022,8 +917,7 @@ static void __init __efi_enter_virtual_mode(void)
if (efi_memmap_init_late(pa, efi.memmap.desc_size * count)) {
pr_err("Failed to remap late EFI memory map\n");
- clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
- return;
+ goto err;
}
if (efi_enabled(EFI_DBG)) {
@@ -1031,34 +925,22 @@ static void __init __efi_enter_virtual_mode(void)
efi_print_memmap();
}
- BUG_ON(!efi.systab);
+ if (WARN_ON(!efi.systab))
+ goto err;
- if (efi_setup_page_tables(pa, 1 << pg_shift)) {
- clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
- return;
- }
+ if (efi_setup_page_tables(pa, 1 << pg_shift))
+ goto err;
efi_sync_low_kernel_mappings();
- if (efi_is_native()) {
- status = phys_efi_set_virtual_address_map(
- efi.memmap.desc_size * count,
- efi.memmap.desc_size,
- efi.memmap.desc_version,
- (efi_memory_desc_t *)pa);
- } else {
- status = efi_thunk_set_virtual_address_map(
- efi_phys.set_virtual_address_map,
- efi.memmap.desc_size * count,
- efi.memmap.desc_size,
- efi.memmap.desc_version,
- (efi_memory_desc_t *)pa);
- }
-
+ status = efi_set_virtual_address_map(efi.memmap.desc_size * count,
+ efi.memmap.desc_size,
+ efi.memmap.desc_version,
+ (efi_memory_desc_t *)pa);
if (status != EFI_SUCCESS) {
- pr_alert("Unable to switch EFI into virtual mode (status=%lx)!\n",
- status);
- panic("EFI call to SetVirtualAddressMap() failed!");
+ pr_err("Unable to switch EFI into virtual mode (status=%lx)!\n",
+ status);
+ goto err;
}
efi_free_boot_services();
@@ -1071,13 +953,11 @@ static void __init __efi_enter_virtual_mode(void)
*/
efi.runtime_version = efi_systab.hdr.revision;
- if (efi_is_native())
+ if (!efi_is_mixed())
efi_native_runtime_setup();
else
efi_thunk_runtime_setup();
- efi.set_virtual_address_map = NULL;
-
/*
* Apply more restrictive page table mapping attributes now that
* SVAM() has been called and the firmware has performed all
@@ -1087,6 +967,10 @@ static void __init __efi_enter_virtual_mode(void)
/* clean DUMMY object */
efi_delete_dummy_variable();
+ return;
+
+err:
+ clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
}
void __init efi_enter_virtual_mode(void)
@@ -1102,20 +986,6 @@ void __init efi_enter_virtual_mode(void)
efi_dump_pagetable();
}
-static int __init arch_parse_efi_cmdline(char *str)
-{
- if (!str) {
- pr_warn("need at least one option\n");
- return -EINVAL;
- }
-
- if (parse_option_str(str, "old_map"))
- set_bit(EFI_OLD_MEMMAP, &efi.flags);
-
- return 0;
-}
-early_param("efi", arch_parse_efi_cmdline);
-
bool efi_is_table_address(unsigned long phys_addr)
{
unsigned int i;
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 9959657127f4..71dddd1620f9 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -66,9 +66,17 @@ void __init efi_map_region(efi_memory_desc_t *md)
void __init efi_map_region_fixed(efi_memory_desc_t *md) {}
void __init parse_efi_setup(u64 phys_addr, u32 data_len) {}
-pgd_t * __init efi_call_phys_prolog(void)
+efi_status_t efi_call_svam(efi_set_virtual_address_map_t *__efiapi *,
+ u32, u32, u32, void *);
+
+efi_status_t __init efi_set_virtual_address_map(unsigned long memory_map_size,
+ unsigned long descriptor_size,
+ u32 descriptor_version,
+ efi_memory_desc_t *virtual_map)
{
struct desc_ptr gdt_descr;
+ efi_status_t status;
+ unsigned long flags;
pgd_t *save_pgd;
/* Current pgd is swapper_pg_dir, we'll restore it later: */
@@ -80,14 +88,18 @@ pgd_t * __init efi_call_phys_prolog(void)
gdt_descr.size = GDT_SIZE - 1;
load_gdt(&gdt_descr);
- return save_pgd;
-}
+ /* Disable interrupts around EFI calls: */
+ local_irq_save(flags);
+ status = efi_call_svam(&efi.systab->runtime->set_virtual_address_map,
+ memory_map_size, descriptor_size,
+ descriptor_version, virtual_map);
+ local_irq_restore(flags);
-void __init efi_call_phys_epilog(pgd_t *save_pgd)
-{
load_fixmap_gdt(0);
load_cr3(save_pgd);
__flush_tlb_all();
+
+ return status;
}
void __init efi_runtime_update_mappings(void)
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 08ce8177c3af..e2accfe636bd 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -57,142 +57,6 @@ static u64 efi_va = EFI_VA_START;
struct efi_scratch efi_scratch;
-static void __init early_code_mapping_set_exec(int executable)
-{
- efi_memory_desc_t *md;
-
- if (!(__supported_pte_mask & _PAGE_NX))
- return;
-
- /* Make EFI service code area executable */
- for_each_efi_memory_desc(md) {
- if (md->type == EFI_RUNTIME_SERVICES_CODE ||
- md->type == EFI_BOOT_SERVICES_CODE)
- efi_set_executable(md, executable);
- }
-}
-
-pgd_t * __init efi_call_phys_prolog(void)
-{
- unsigned long vaddr, addr_pgd, addr_p4d, addr_pud;
- pgd_t *save_pgd, *pgd_k, *pgd_efi;
- p4d_t *p4d, *p4d_k, *p4d_efi;
- pud_t *pud;
-
- int pgd;
- int n_pgds, i, j;
-
- if (!efi_enabled(EFI_OLD_MEMMAP)) {
- efi_switch_mm(&efi_mm);
- return efi_mm.pgd;
- }
-
- early_code_mapping_set_exec(1);
-
- n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE);
- save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL);
- if (!save_pgd)
- return NULL;
-
- /*
- * Build 1:1 identity mapping for efi=old_map usage. Note that
- * PAGE_OFFSET is PGDIR_SIZE aligned when KASLR is disabled, while
- * it is PUD_SIZE ALIGNED with KASLR enabled. So for a given physical
- * address X, the pud_index(X) != pud_index(__va(X)), we can only copy
- * PUD entry of __va(X) to fill in pud entry of X to build 1:1 mapping.
- * This means here we can only reuse the PMD tables of the direct mapping.
- */
- for (pgd = 0; pgd < n_pgds; pgd++) {
- addr_pgd = (unsigned long)(pgd * PGDIR_SIZE);
- vaddr = (unsigned long)__va(pgd * PGDIR_SIZE);
- pgd_efi = pgd_offset_k(addr_pgd);
- save_pgd[pgd] = *pgd_efi;
-
- p4d = p4d_alloc(&init_mm, pgd_efi, addr_pgd);
- if (!p4d) {
- pr_err("Failed to allocate p4d table!\n");
- goto out;
- }
-
- for (i = 0; i < PTRS_PER_P4D; i++) {
- addr_p4d = addr_pgd + i * P4D_SIZE;
- p4d_efi = p4d + p4d_index(addr_p4d);
-
- pud = pud_alloc(&init_mm, p4d_efi, addr_p4d);
- if (!pud) {
- pr_err("Failed to allocate pud table!\n");
- goto out;
- }
-
- for (j = 0; j < PTRS_PER_PUD; j++) {
- addr_pud = addr_p4d + j * PUD_SIZE;
-
- if (addr_pud > (max_pfn << PAGE_SHIFT))
- break;
-
- vaddr = (unsigned long)__va(addr_pud);
-
- pgd_k = pgd_offset_k(vaddr);
- p4d_k = p4d_offset(pgd_k, vaddr);
- pud[j] = *pud_offset(p4d_k, vaddr);
- }
- }
- pgd_offset_k(pgd * PGDIR_SIZE)->pgd &= ~_PAGE_NX;
- }
-
- __flush_tlb_all();
- return save_pgd;
-out:
- efi_call_phys_epilog(save_pgd);
- return NULL;
-}
-
-void __init efi_call_phys_epilog(pgd_t *save_pgd)
-{
- /*
- * After the lock is released, the original page table is restored.
- */
- int pgd_idx, i;
- int nr_pgds;
- pgd_t *pgd;
- p4d_t *p4d;
- pud_t *pud;
-
- if (!efi_enabled(EFI_OLD_MEMMAP)) {
- efi_switch_mm(efi_scratch.prev_mm);
- return;
- }
-
- nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
-
- for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) {
- pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE);
- set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]);
-
- if (!pgd_present(*pgd))
- continue;
-
- for (i = 0; i < PTRS_PER_P4D; i++) {
- p4d = p4d_offset(pgd,
- pgd_idx * PGDIR_SIZE + i * P4D_SIZE);
-
- if (!p4d_present(*p4d))
- continue;
-
- pud = (pud_t *)p4d_page_vaddr(*p4d);
- pud_free(&init_mm, pud);
- }
-
- p4d = (p4d_t *)pgd_page_vaddr(*pgd);
- p4d_free(&init_mm, p4d);
- }
-
- kfree(save_pgd);
-
- __flush_tlb_all();
- early_code_mapping_set_exec(0);
-}
-
EXPORT_SYMBOL_GPL(efi_mm);
/*
@@ -211,7 +75,7 @@ int __init efi_alloc_page_tables(void)
pud_t *pud;
gfp_t gfp_mask;
- if (efi_enabled(EFI_OLD_MEMMAP))
+ if (efi_have_uv1_memmap())
return 0;
gfp_mask = GFP_KERNEL | __GFP_ZERO;
@@ -252,7 +116,7 @@ void efi_sync_low_kernel_mappings(void)
pud_t *pud_k, *pud_efi;
pgd_t *efi_pgd = efi_mm.pgd;
- if (efi_enabled(EFI_OLD_MEMMAP))
+ if (efi_have_uv1_memmap())
return;
/*
@@ -346,7 +210,7 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
unsigned npages;
pgd_t *pgd = efi_mm.pgd;
- if (efi_enabled(EFI_OLD_MEMMAP))
+ if (efi_have_uv1_memmap())
return 0;
/*
@@ -373,10 +237,6 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
* as trim_bios_range() will reserve the first page and isolate it away
* from memory allocators anyway.
*/
- pf = _PAGE_RW;
- if (sev_active())
- pf |= _PAGE_ENC;
-
if (kernel_map_pages_in_pgd(pgd, 0x0, 0x0, 1, pf)) {
pr_err("Failed to create 1:1 mapping for the first page!\n");
return 1;
@@ -388,21 +248,22 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
* text and allocate a new stack because we can't rely on the
* stack pointer being < 4GB.
*/
- if (!IS_ENABLED(CONFIG_EFI_MIXED) || efi_is_native())
+ if (!efi_is_mixed())
return 0;
page = alloc_page(GFP_KERNEL|__GFP_DMA32);
- if (!page)
- panic("Unable to allocate EFI runtime stack < 4GB\n");
+ if (!page) {
+ pr_err("Unable to allocate EFI runtime stack < 4GB\n");
+ return 1;
+ }
- efi_scratch.phys_stack = virt_to_phys(page_address(page));
- efi_scratch.phys_stack += PAGE_SIZE; /* stack grows down */
+ efi_scratch.phys_stack = page_to_phys(page + 1); /* stack grows down */
- npages = (_etext - _text) >> PAGE_SHIFT;
+ npages = (__end_rodata_aligned - _text) >> PAGE_SHIFT;
text = __pa(_text);
pfn = text >> PAGE_SHIFT;
- pf = _PAGE_RW | _PAGE_ENC;
+ pf = _PAGE_ENC;
if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, pf)) {
pr_err("Failed to map kernel text 1:1\n");
return 1;
@@ -417,6 +278,22 @@ static void __init __map_region(efi_memory_desc_t *md, u64 va)
unsigned long pfn;
pgd_t *pgd = efi_mm.pgd;
+ /*
+ * EFI_RUNTIME_SERVICES_CODE regions typically cover PE/COFF
+ * executable images in memory that consist of both R-X and
+ * RW- sections, so we cannot apply read-only or non-exec
+ * permissions just yet. However, modern EFI systems provide
+ * a memory attributes table that describes those sections
+ * with the appropriate restricted permissions, which are
+ * applied in efi_runtime_update_mappings() below. All other
+ * regions can be mapped non-executable at this point, with
+ * the exception of boot services code regions, but those will
+ * be unmapped again entirely in efi_free_boot_services().
+ */
+ if (md->type != EFI_BOOT_SERVICES_CODE &&
+ md->type != EFI_RUNTIME_SERVICES_CODE)
+ flags |= _PAGE_NX;
+
if (!(md->attribute & EFI_MEMORY_WB))
flags |= _PAGE_PCD;
@@ -434,7 +311,7 @@ void __init efi_map_region(efi_memory_desc_t *md)
unsigned long size = md->num_pages << PAGE_SHIFT;
u64 pa = md->phys_addr;
- if (efi_enabled(EFI_OLD_MEMMAP))
+ if (efi_have_uv1_memmap())
return old_map_region(md);
/*
@@ -449,7 +326,7 @@ void __init efi_map_region(efi_memory_desc_t *md)
* booting in EFI mixed mode, because even though we may be
* running a 64-bit kernel, the firmware may only be 32-bit.
*/
- if (!efi_is_native () && IS_ENABLED(CONFIG_EFI_MIXED)) {
+ if (efi_is_mixed()) {
md->virt_addr = md->phys_addr;
return;
}
@@ -491,26 +368,6 @@ void __init efi_map_region_fixed(efi_memory_desc_t *md)
__map_region(md, md->virt_addr);
}
-void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
- u32 type, u64 attribute)
-{
- unsigned long last_map_pfn;
-
- if (type == EFI_MEMORY_MAPPED_IO)
- return ioremap(phys_addr, size);
-
- last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size);
- if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) {
- unsigned long top = last_map_pfn << PAGE_SHIFT;
- efi_ioremap(top, size - (top - phys_addr), type, attribute);
- }
-
- if (!(attribute & EFI_MEMORY_WB))
- efi_memory_uc((u64)(unsigned long)__va(phys_addr), size);
-
- return (void __iomem *)__va(phys_addr);
-}
-
void __init parse_efi_setup(u64 phys_addr, u32 data_len)
{
efi_setup = phys_addr + sizeof(struct setup_data);
@@ -559,7 +416,7 @@ void __init efi_runtime_update_mappings(void)
{
efi_memory_desc_t *md;
- if (efi_enabled(EFI_OLD_MEMMAP)) {
+ if (efi_have_uv1_memmap()) {
if (__supported_pte_mask & _PAGE_NX)
runtime_code_page_mkexec();
return;
@@ -613,7 +470,7 @@ void __init efi_runtime_update_mappings(void)
void __init efi_dump_pagetable(void)
{
#ifdef CONFIG_EFI_PGT_DUMP
- if (efi_enabled(EFI_OLD_MEMMAP))
+ if (efi_have_uv1_memmap())
ptdump_walk_pgd_level(NULL, swapper_pg_dir);
else
ptdump_walk_pgd_level(NULL, efi_mm.pgd);
@@ -634,63 +491,74 @@ void efi_switch_mm(struct mm_struct *mm)
switch_mm(efi_scratch.prev_mm, mm, NULL);
}
-#ifdef CONFIG_EFI_MIXED
-extern efi_status_t efi64_thunk(u32, ...);
-
static DEFINE_SPINLOCK(efi_runtime_lock);
-#define runtime_service32(func) \
-({ \
- u32 table = (u32)(unsigned long)efi.systab; \
- u32 *rt, *___f; \
- \
- rt = (u32 *)(table + offsetof(efi_system_table_32_t, runtime)); \
- ___f = (u32 *)(*rt + offsetof(efi_runtime_services_32_t, func)); \
- *___f; \
+/*
+ * DS and ES contain user values. We need to save them.
+ * The 32-bit EFI code needs a valid DS, ES, and SS. There's no
+ * need to save the old SS: __KERNEL_DS is always acceptable.
+ */
+#define __efi_thunk(func, ...) \
+({ \
+ efi_runtime_services_32_t *__rt; \
+ unsigned short __ds, __es; \
+ efi_status_t ____s; \
+ \
+ __rt = (void *)(unsigned long)efi.systab->mixed_mode.runtime; \
+ \
+ savesegment(ds, __ds); \
+ savesegment(es, __es); \
+ \
+ loadsegment(ss, __KERNEL_DS); \
+ loadsegment(ds, __KERNEL_DS); \
+ loadsegment(es, __KERNEL_DS); \
+ \
+ ____s = efi64_thunk(__rt->func, __VA_ARGS__); \
+ \
+ loadsegment(ds, __ds); \
+ loadsegment(es, __es); \
+ \
+ ____s ^= (____s & BIT(31)) | (____s & BIT_ULL(31)) << 32; \
+ ____s; \
})
/*
* Switch to the EFI page tables early so that we can access the 1:1
* runtime services mappings which are not mapped in any other page
- * tables. This function must be called before runtime_service32().
+ * tables.
*
* Also, disable interrupts because the IDT points to 64-bit handlers,
* which aren't going to function correctly when we switch to 32-bit.
*/
-#define efi_thunk(f, ...) \
+#define efi_thunk(func...) \
({ \
efi_status_t __s; \
- u32 __func; \
\
arch_efi_call_virt_setup(); \
\
- __func = runtime_service32(f); \
- __s = efi64_thunk(__func, __VA_ARGS__); \
+ __s = __efi_thunk(func); \
\
arch_efi_call_virt_teardown(); \
\
__s; \
})
-efi_status_t efi_thunk_set_virtual_address_map(
- void *phys_set_virtual_address_map,
- unsigned long memory_map_size,
- unsigned long descriptor_size,
- u32 descriptor_version,
- efi_memory_desc_t *virtual_map)
+static efi_status_t __init __no_sanitize_address
+efi_thunk_set_virtual_address_map(unsigned long memory_map_size,
+ unsigned long descriptor_size,
+ u32 descriptor_version,
+ efi_memory_desc_t *virtual_map)
{
efi_status_t status;
unsigned long flags;
- u32 func;
efi_sync_low_kernel_mappings();
local_irq_save(flags);
efi_switch_mm(&efi_mm);
- func = (u32)(unsigned long)phys_set_virtual_address_map;
- status = efi64_thunk(func, memory_map_size, descriptor_size,
- descriptor_version, virtual_map);
+ status = __efi_thunk(set_virtual_address_map, memory_map_size,
+ descriptor_size, descriptor_version, virtual_map);
efi_switch_mm(efi_scratch.prev_mm);
local_irq_restore(flags);
@@ -993,8 +861,11 @@ efi_thunk_query_capsule_caps(efi_capsule_header_t **capsules,
return EFI_UNSUPPORTED;
}
-void efi_thunk_runtime_setup(void)
+void __init efi_thunk_runtime_setup(void)
{
+ if (!IS_ENABLED(CONFIG_EFI_MIXED))
+ return;
+
efi.get_time = efi_thunk_get_time;
efi.set_time = efi_thunk_set_time;
efi.get_wakeup_time = efi_thunk_get_wakeup_time;
@@ -1010,4 +881,46 @@ void efi_thunk_runtime_setup(void)
efi.update_capsule = efi_thunk_update_capsule;
efi.query_capsule_caps = efi_thunk_query_capsule_caps;
}
-#endif /* CONFIG_EFI_MIXED */
+
+efi_status_t __init __no_sanitize_address
+efi_set_virtual_address_map(unsigned long memory_map_size,
+ unsigned long descriptor_size,
+ u32 descriptor_version,
+ efi_memory_desc_t *virtual_map)
+{
+ efi_status_t status;
+ unsigned long flags;
+ pgd_t *save_pgd = NULL;
+
+ if (efi_is_mixed())
+ return efi_thunk_set_virtual_address_map(memory_map_size,
+ descriptor_size,
+ descriptor_version,
+ virtual_map);
+
+ if (efi_have_uv1_memmap()) {
+ save_pgd = efi_uv1_memmap_phys_prolog();
+ if (!save_pgd)
+ return EFI_ABORTED;
+ } else {
+ efi_switch_mm(&efi_mm);
+ }
+
+ kernel_fpu_begin();
+
+ /* Disable interrupts around EFI calls: */
+ local_irq_save(flags);
+ status = efi_call(efi.systab->runtime->set_virtual_address_map,
+ memory_map_size, descriptor_size,
+ descriptor_version, virtual_map);
+ local_irq_restore(flags);
+
+ kernel_fpu_end();
+
+ if (save_pgd)
+ efi_uv1_memmap_phys_epilog(save_pgd);
+ else
+ efi_switch_mm(efi_scratch.prev_mm);
+
+ return status;
+}
diff --git a/arch/x86/platform/efi/efi_stub_32.S b/arch/x86/platform/efi/efi_stub_32.S
index eed8b5b441f8..75c46e7a809f 100644
--- a/arch/x86/platform/efi/efi_stub_32.S
+++ b/arch/x86/platform/efi/efi_stub_32.S
@@ -7,118 +7,43 @@
*/
#include <linux/linkage.h>
+#include <linux/init.h>
#include <asm/page_types.h>
-/*
- * efi_call_phys(void *, ...) is a function with variable parameters.
- * All the callers of this function assure that all the parameters are 4-bytes.
- */
-
-/*
- * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save.
- * So we'd better save all of them at the beginning of this function and restore
- * at the end no matter how many we use, because we can not assure EFI runtime
- * service functions will comply with gcc calling convention, too.
- */
+ __INIT
+SYM_FUNC_START(efi_call_svam)
+ push 8(%esp)
+ push 8(%esp)
+ push %ecx
+ push %edx
-.text
-SYM_FUNC_START(efi_call_phys)
/*
- * 0. The function can only be called in Linux kernel. So CS has been
- * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found
- * the values of these registers are the same. And, the corresponding
- * GDT entries are identical. So I will do nothing about segment reg
- * and GDT, but change GDT base register in prolog and epilog.
- */
-
- /*
- * 1. Now I am running with EIP = <physical address> + PAGE_OFFSET.
- * But to make it smoothly switch from virtual mode to flat mode.
- * The mapping of lower virtual memory has been created in prolog and
- * epilog.
+ * Switch to the flat mapped alias of this routine, by jumping to the
+ * address of label '1' after subtracting PAGE_OFFSET from it.
*/
movl $1f, %edx
subl $__PAGE_OFFSET, %edx
jmp *%edx
1:
- /*
- * 2. Now on the top of stack is the return
- * address in the caller of efi_call_phys(), then parameter 1,
- * parameter 2, ..., param n. To make things easy, we save the return
- * address of efi_call_phys in a global variable.
- */
- popl %edx
- movl %edx, saved_return_addr
- /* get the function pointer into ECX*/
- popl %ecx
- movl %ecx, efi_rt_function_ptr
- movl $2f, %edx
- subl $__PAGE_OFFSET, %edx
- pushl %edx
-
- /*
- * 3. Clear PG bit in %CR0.
- */
+ /* disable paging */
movl %cr0, %edx
andl $0x7fffffff, %edx
movl %edx, %cr0
- jmp 1f
-1:
- /*
- * 4. Adjust stack pointer.
- */
+ /* convert the stack pointer to a flat mapped address */
subl $__PAGE_OFFSET, %esp
- /*
- * 5. Call the physical function.
- */
- jmp *%ecx
+ /* call the EFI routine */
+ call *(%eax)
-2:
- /*
- * 6. After EFI runtime service returns, control will return to
- * following instruction. We'd better readjust stack pointer first.
- */
- addl $__PAGE_OFFSET, %esp
+ /* convert ESP back to a kernel VA, and pop the outgoing args */
+ addl $__PAGE_OFFSET + 16, %esp
- /*
- * 7. Restore PG bit
- */
+ /* re-enable paging */
movl %cr0, %edx
orl $0x80000000, %edx
movl %edx, %cr0
- jmp 1f
-1:
- /*
- * 8. Now restore the virtual mode from flat mode by
- * adding EIP with PAGE_OFFSET.
- */
- movl $1f, %edx
- jmp *%edx
-1:
-
- /*
- * 9. Balance the stack. And because EAX contain the return value,
- * we'd better not clobber it.
- */
- leal efi_rt_function_ptr, %edx
- movl (%edx), %ecx
- pushl %ecx
- /*
- * 10. Push the saved return address onto the stack and return.
- */
- leal saved_return_addr, %edx
- movl (%edx), %ecx
- pushl %ecx
ret
-SYM_FUNC_END(efi_call_phys)
-.previous
-
-.data
-saved_return_addr:
- .long 0
-efi_rt_function_ptr:
- .long 0
+SYM_FUNC_END(efi_call_svam)
diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S
index b1d2313fe3bf..15da118f04f0 100644
--- a/arch/x86/platform/efi/efi_stub_64.S
+++ b/arch/x86/platform/efi/efi_stub_64.S
@@ -8,41 +8,12 @@
*/
#include <linux/linkage.h>
-#include <asm/segment.h>
-#include <asm/msr.h>
-#include <asm/processor-flags.h>
-#include <asm/page_types.h>
+#include <asm/nospec-branch.h>
-#define SAVE_XMM \
- mov %rsp, %rax; \
- subq $0x70, %rsp; \
- and $~0xf, %rsp; \
- mov %rax, (%rsp); \
- mov %cr0, %rax; \
- clts; \
- mov %rax, 0x8(%rsp); \
- movaps %xmm0, 0x60(%rsp); \
- movaps %xmm1, 0x50(%rsp); \
- movaps %xmm2, 0x40(%rsp); \
- movaps %xmm3, 0x30(%rsp); \
- movaps %xmm4, 0x20(%rsp); \
- movaps %xmm5, 0x10(%rsp)
-
-#define RESTORE_XMM \
- movaps 0x60(%rsp), %xmm0; \
- movaps 0x50(%rsp), %xmm1; \
- movaps 0x40(%rsp), %xmm2; \
- movaps 0x30(%rsp), %xmm3; \
- movaps 0x20(%rsp), %xmm4; \
- movaps 0x10(%rsp), %xmm5; \
- mov 0x8(%rsp), %rsi; \
- mov %rsi, %cr0; \
- mov (%rsp), %rsp
-
-SYM_FUNC_START(efi_call)
+SYM_FUNC_START(__efi_call)
pushq %rbp
movq %rsp, %rbp
- SAVE_XMM
+ and $~0xf, %rsp
mov 16(%rbp), %rax
subq $48, %rsp
mov %r9, 32(%rsp)
@@ -50,9 +21,7 @@ SYM_FUNC_START(efi_call)
mov %r8, %r9
mov %rcx, %r8
mov %rsi, %rcx
- call *%rdi
- addq $48, %rsp
- RESTORE_XMM
- popq %rbp
+ CALL_NOSPEC %rdi
+ leave
ret
-SYM_FUNC_END(efi_call)
+SYM_FUNC_END(__efi_call)
diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S
index 3189f1394701..26f0da238c1c 100644
--- a/arch/x86/platform/efi/efi_thunk_64.S
+++ b/arch/x86/platform/efi/efi_thunk_64.S
@@ -25,15 +25,16 @@
.text
.code64
-SYM_FUNC_START(efi64_thunk)
+SYM_CODE_START(__efi64_thunk)
push %rbp
push %rbx
/*
* Switch to 1:1 mapped 32-bit stack pointer.
*/
- movq %rsp, efi_saved_sp(%rip)
+ movq %rsp, %rax
movq efi_scratch(%rip), %rsp
+ push %rax
/*
* Calculate the physical address of the kernel text.
@@ -41,113 +42,31 @@ SYM_FUNC_START(efi64_thunk)
movq $__START_KERNEL_map, %rax
subq phys_base(%rip), %rax
- /*
- * Push some physical addresses onto the stack. This is easier
- * to do now in a code64 section while the assembler can address
- * 64-bit values. Note that all the addresses on the stack are
- * 32-bit.
- */
- subq $16, %rsp
- leaq efi_exit32(%rip), %rbx
- subq %rax, %rbx
- movl %ebx, 8(%rsp)
-
- leaq __efi64_thunk(%rip), %rbx
+ leaq 1f(%rip), %rbp
+ leaq 2f(%rip), %rbx
+ subq %rax, %rbp
subq %rax, %rbx
- call *%rbx
-
- movq efi_saved_sp(%rip), %rsp
- pop %rbx
- pop %rbp
- retq
-SYM_FUNC_END(efi64_thunk)
-/*
- * We run this function from the 1:1 mapping.
- *
- * This function must be invoked with a 1:1 mapped stack.
- */
-SYM_FUNC_START_LOCAL(__efi64_thunk)
- movl %ds, %eax
- push %rax
- movl %es, %eax
- push %rax
- movl %ss, %eax
- push %rax
-
- subq $32, %rsp
- movl %esi, 0x0(%rsp)
- movl %edx, 0x4(%rsp)
- movl %ecx, 0x8(%rsp)
- movq %r8, %rsi
- movl %esi, 0xc(%rsp)
- movq %r9, %rsi
- movl %esi, 0x10(%rsp)
-
- leaq 1f(%rip), %rbx
- movq %rbx, func_rt_ptr(%rip)
+ subq $28, %rsp
+ movl %ebx, 0x0(%rsp) /* return address */
+ movl %esi, 0x4(%rsp)
+ movl %edx, 0x8(%rsp)
+ movl %ecx, 0xc(%rsp)
+ movl %r8d, 0x10(%rsp)
+ movl %r9d, 0x14(%rsp)
/* Switch to 32-bit descriptor */
pushq $__KERNEL32_CS
- leaq efi_enter32(%rip), %rax
- pushq %rax
+ pushq %rdi /* EFI runtime service address */
lretq
-1: addq $32, %rsp
-
+1: movq 24(%rsp), %rsp
pop %rbx
- movl %ebx, %ss
- pop %rbx
- movl %ebx, %es
- pop %rbx
- movl %ebx, %ds
-
- /*
- * Convert 32-bit status code into 64-bit.
- */
- test %rax, %rax
- jz 1f
- movl %eax, %ecx
- andl $0x0fffffff, %ecx
- andl $0xf0000000, %eax
- shl $32, %rax
- or %rcx, %rax
-1:
- ret
-SYM_FUNC_END(__efi64_thunk)
-
-SYM_FUNC_START_LOCAL(efi_exit32)
- movq func_rt_ptr(%rip), %rax
- push %rax
- mov %rdi, %rax
- ret
-SYM_FUNC_END(efi_exit32)
+ pop %rbp
+ retq
.code32
-/*
- * EFI service pointer must be in %edi.
- *
- * The stack should represent the 32-bit calling convention.
- */
-SYM_FUNC_START_LOCAL(efi_enter32)
- movl $__KERNEL_DS, %eax
- movl %eax, %ds
- movl %eax, %es
- movl %eax, %ss
-
- call *%edi
-
- /* We must preserve return value */
- movl %eax, %edi
-
- movl 72(%esp), %eax
- pushl $__KERNEL_CS
- pushl %eax
-
+2: pushl $__KERNEL_CS
+ pushl %ebp
lret
-SYM_FUNC_END(efi_enter32)
-
- .data
- .balign 8
-func_rt_ptr: .quad 0
-efi_saved_sp: .quad 0
+SYM_CODE_END(__efi64_thunk)
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index ab91218d27bd..88d32c06cffa 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -244,7 +244,7 @@ EXPORT_SYMBOL_GPL(efi_query_variable_store);
*/
void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size)
{
- phys_addr_t new_phys, new_size;
+ struct efi_memory_map_data data = { 0 };
struct efi_mem_range mr;
efi_memory_desc_t md;
int num_entries;
@@ -272,24 +272,21 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size)
num_entries = efi_memmap_split_count(&md, &mr.range);
num_entries += efi.memmap.nr_map;
- new_size = efi.memmap.desc_size * num_entries;
-
- new_phys = efi_memmap_alloc(num_entries);
- if (!new_phys) {
+ if (efi_memmap_alloc(num_entries, &data) != 0) {
pr_err("Could not allocate boot services memmap\n");
return;
}
- new = early_memremap(new_phys, new_size);
+ new = early_memremap(data.phys_map, data.size);
if (!new) {
pr_err("Failed to map new boot services memmap\n");
return;
}
efi_memmap_insert(&efi.memmap, new, &mr);
- early_memunmap(new, new_size);
+ early_memunmap(new, data.size);
- efi_memmap_install(new_phys, num_entries);
+ efi_memmap_install(&data);
e820__range_update(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED);
e820__update_table(e820_table);
}
@@ -385,10 +382,10 @@ static void __init efi_unmap_pages(efi_memory_desc_t *md)
/*
* To Do: Remove this check after adding functionality to unmap EFI boot
- * services code/data regions from direct mapping area because
- * "efi=old_map" maps EFI regions in swapper_pg_dir.
+ * services code/data regions from direct mapping area because the UV1
+ * memory map maps EFI regions in swapper_pg_dir.
*/
- if (efi_enabled(EFI_OLD_MEMMAP))
+ if (efi_have_uv1_memmap())
return;
/*
@@ -396,7 +393,7 @@ static void __init efi_unmap_pages(efi_memory_desc_t *md)
* EFI runtime calls, hence don't unmap EFI boot services code/data
* regions.
*/
- if (!efi_is_native())
+ if (efi_is_mixed())
return;
if (kernel_unmap_pages_in_pgd(pgd, pa, md->num_pages))
@@ -408,7 +405,7 @@ static void __init efi_unmap_pages(efi_memory_desc_t *md)
void __init efi_free_boot_services(void)
{
- phys_addr_t new_phys, new_size;
+ struct efi_memory_map_data data = { 0 };
efi_memory_desc_t *md;
int num_entries = 0;
void *new, *new_md;
@@ -463,14 +460,12 @@ void __init efi_free_boot_services(void)
if (!num_entries)
return;
- new_size = efi.memmap.desc_size * num_entries;
- new_phys = efi_memmap_alloc(num_entries);
- if (!new_phys) {
+ if (efi_memmap_alloc(num_entries, &data) != 0) {
pr_err("Failed to allocate new EFI memmap\n");
return;
}
- new = memremap(new_phys, new_size, MEMREMAP_WB);
+ new = memremap(data.phys_map, data.size, MEMREMAP_WB);
if (!new) {
pr_err("Failed to map new EFI memmap\n");
return;
@@ -494,7 +489,7 @@ void __init efi_free_boot_services(void)
memunmap(new);
- if (efi_memmap_install(new_phys, num_entries)) {
+ if (efi_memmap_install(&data) != 0) {
pr_err("Could not install new EFI memmap\n");
return;
}
@@ -559,7 +554,7 @@ out:
return ret;
}
-static const struct dmi_system_id sgi_uv1_dmi[] = {
+static const struct dmi_system_id sgi_uv1_dmi[] __initconst = {
{ NULL, "SGI UV1",
{ DMI_MATCH(DMI_PRODUCT_NAME, "Stoutland Platform"),
DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"),
@@ -582,8 +577,15 @@ void __init efi_apply_memmap_quirks(void)
}
/* UV2+ BIOS has a fix for this issue. UV1 still needs the quirk. */
- if (dmi_check_system(sgi_uv1_dmi))
- set_bit(EFI_OLD_MEMMAP, &efi.flags);
+ if (dmi_check_system(sgi_uv1_dmi)) {
+ if (IS_ENABLED(CONFIG_X86_UV)) {
+ set_bit(EFI_UV1_MEMMAP, &efi.flags);
+ } else {
+ pr_warn("EFI runtime disabled, needs CONFIG_X86_UV=y on UV1\n");
+ clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+ efi_memmap_unmap();
+ }
+ }
}
/*
@@ -721,7 +723,7 @@ void efi_recover_from_page_fault(unsigned long phys_addr)
/*
* Make sure that an efi runtime service caused the page fault.
* "efi_mm" cannot be used to check if the page fault had occurred
- * in the firmware context because efi=old_map doesn't use efi_pgd.
+ * in the firmware context because the UV1 memmap doesn't use efi_pgd.
*/
if (efi_rts_work.efi_rts_id == EFI_NONE)
return;
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index ece9cb9c1189..607f58147311 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -31,13 +31,16 @@ static s64 __uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
return BIOS_STATUS_UNIMPLEMENTED;
/*
- * If EFI_OLD_MEMMAP is set, we need to fall back to using our old EFI
+ * If EFI_UV1_MEMMAP is set, we need to fall back to using our old EFI
* callback method, which uses efi_call() directly, with the kernel page tables:
*/
- if (unlikely(efi_enabled(EFI_OLD_MEMMAP)))
+ if (unlikely(efi_enabled(EFI_UV1_MEMMAP))) {
+ kernel_fpu_begin();
ret = efi_call((void *)__va(tab->function), (u64)which, a1, a2, a3, a4, a5);
- else
+ kernel_fpu_end();
+ } else {
ret = efi_call_virt_pointer(tab, function, (u64)which, a1, a2, a3, a4, a5);
+ }
return ret;
}
@@ -214,3 +217,163 @@ int uv_bios_init(void)
pr_info("UV: UVsystab: Revision:%x\n", uv_systab->revision);
return 0;
}
+
+static void __init early_code_mapping_set_exec(int executable)
+{
+ efi_memory_desc_t *md;
+
+ if (!(__supported_pte_mask & _PAGE_NX))
+ return;
+
+ /* Make EFI service code area executable */
+ for_each_efi_memory_desc(md) {
+ if (md->type == EFI_RUNTIME_SERVICES_CODE ||
+ md->type == EFI_BOOT_SERVICES_CODE)
+ efi_set_executable(md, executable);
+ }
+}
+
+void __init efi_uv1_memmap_phys_epilog(pgd_t *save_pgd)
+{
+ /*
+ * After the lock is released, the original page table is restored.
+ */
+ int pgd_idx, i;
+ int nr_pgds;
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+
+ nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
+
+ for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) {
+ pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE);
+ set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]);
+
+ if (!pgd_present(*pgd))
+ continue;
+
+ for (i = 0; i < PTRS_PER_P4D; i++) {
+ p4d = p4d_offset(pgd,
+ pgd_idx * PGDIR_SIZE + i * P4D_SIZE);
+
+ if (!p4d_present(*p4d))
+ continue;
+
+ pud = (pud_t *)p4d_page_vaddr(*p4d);
+ pud_free(&init_mm, pud);
+ }
+
+ p4d = (p4d_t *)pgd_page_vaddr(*pgd);
+ p4d_free(&init_mm, p4d);
+ }
+
+ kfree(save_pgd);
+
+ __flush_tlb_all();
+ early_code_mapping_set_exec(0);
+}
+
+pgd_t * __init efi_uv1_memmap_phys_prolog(void)
+{
+ unsigned long vaddr, addr_pgd, addr_p4d, addr_pud;
+ pgd_t *save_pgd, *pgd_k, *pgd_efi;
+ p4d_t *p4d, *p4d_k, *p4d_efi;
+ pud_t *pud;
+
+ int pgd;
+ int n_pgds, i, j;
+
+ early_code_mapping_set_exec(1);
+
+ n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE);
+ save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL);
+ if (!save_pgd)
+ return NULL;
+
+ /*
+ * Build 1:1 identity mapping for UV1 memmap usage. Note that
+ * PAGE_OFFSET is PGDIR_SIZE aligned when KASLR is disabled, while
+ * it is PUD_SIZE ALIGNED with KASLR enabled. So for a given physical
+ * address X, the pud_index(X) != pud_index(__va(X)), we can only copy
+ * PUD entry of __va(X) to fill in pud entry of X to build 1:1 mapping.
+ * This means here we can only reuse the PMD tables of the direct mapping.
+ */
+ for (pgd = 0; pgd < n_pgds; pgd++) {
+ addr_pgd = (unsigned long)(pgd * PGDIR_SIZE);
+ vaddr = (unsigned long)__va(pgd * PGDIR_SIZE);
+ pgd_efi = pgd_offset_k(addr_pgd);
+ save_pgd[pgd] = *pgd_efi;
+
+ p4d = p4d_alloc(&init_mm, pgd_efi, addr_pgd);
+ if (!p4d) {
+ pr_err("Failed to allocate p4d table!\n");
+ goto out;
+ }
+
+ for (i = 0; i < PTRS_PER_P4D; i++) {
+ addr_p4d = addr_pgd + i * P4D_SIZE;
+ p4d_efi = p4d + p4d_index(addr_p4d);
+
+ pud = pud_alloc(&init_mm, p4d_efi, addr_p4d);
+ if (!pud) {
+ pr_err("Failed to allocate pud table!\n");
+ goto out;
+ }
+
+ for (j = 0; j < PTRS_PER_PUD; j++) {
+ addr_pud = addr_p4d + j * PUD_SIZE;
+
+ if (addr_pud > (max_pfn << PAGE_SHIFT))
+ break;
+
+ vaddr = (unsigned long)__va(addr_pud);
+
+ pgd_k = pgd_offset_k(vaddr);
+ p4d_k = p4d_offset(pgd_k, vaddr);
+ pud[j] = *pud_offset(p4d_k, vaddr);
+ }
+ }
+ pgd_offset_k(pgd * PGDIR_SIZE)->pgd &= ~_PAGE_NX;
+ }
+
+ __flush_tlb_all();
+ return save_pgd;
+out:
+ efi_uv1_memmap_phys_epilog(save_pgd);
+ return NULL;
+}
+
+void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
+ u32 type, u64 attribute)
+{
+ unsigned long last_map_pfn;
+
+ if (type == EFI_MEMORY_MAPPED_IO)
+ return ioremap(phys_addr, size);
+
+ last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size);
+ if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) {
+ unsigned long top = last_map_pfn << PAGE_SHIFT;
+ efi_ioremap(top, size - (top - phys_addr), type, attribute);
+ }
+
+ if (!(attribute & EFI_MEMORY_WB))
+ efi_memory_uc((u64)(unsigned long)__va(phys_addr), size);
+
+ return (void __iomem *)__va(phys_addr);
+}
+
+static int __init arch_parse_efi_cmdline(char *str)
+{
+ if (!str) {
+ pr_warn("need at least one option\n");
+ return -EINVAL;
+ }
+
+ if (!efi_is_mixed() && parse_option_str(str, "old_map"))
+ set_bit(EFI_UV1_MEMMAP, &efi.flags);
+
+ return 0;
+}
+early_param("efi", arch_parse_efi_cmdline);
diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c
index a04551ee5568..1abe455d926a 100644
--- a/arch/x86/xen/efi.c
+++ b/arch/x86/xen/efi.c
@@ -31,7 +31,7 @@ static efi_system_table_t efi_systab_xen __initdata = {
.con_in_handle = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
.con_in = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
.con_out_handle = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
- .con_out = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
+ .con_out = NULL, /* Not used under Xen. */
.stderr_handle = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
.stderr = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
.runtime = (efi_runtime_services_t *)EFI_INVALID_TABLE_ADDR,
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index c8dbee62ec2a..bbba8b17829a 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -67,7 +67,7 @@
#include <asm/linkage.h>
#include <asm/page.h>
#include <asm/init.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
#include <asm/smp.h>
#include <asm/tlb.h>