From 0432523f4807a83902857347bd73eb817ef0a742 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 22 Sep 2021 12:16:35 +0200 Subject: xen/privcmd: replace kcalloc() by kvcalloc() when allocating empty pages Osstest has been suffering test failures for a little while from order-4 allocation failures, resulting from alloc_empty_pages() calling kcalloc(). As there's no need for physically contiguous space here, switch to kvcalloc(). Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/6d698901-98a4-05be-c421-bcd0713f5335@suse.com Signed-off-by: Juergen Gross --- drivers/xen/privcmd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 720a7b7abd46..a3f2031aa3d9 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -420,7 +420,7 @@ static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs) int rc; struct page **pages; - pages = kcalloc(numpgs, sizeof(pages[0]), GFP_KERNEL); + pages = kvcalloc(numpgs, sizeof(pages[0]), GFP_KERNEL); if (pages == NULL) return -ENOMEM; @@ -428,7 +428,7 @@ static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs) if (rc != 0) { pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__, numpgs, rc); - kfree(pages); + kvfree(pages); return -ENOMEM; } BUG_ON(vma->vm_private_data != NULL); @@ -912,7 +912,7 @@ static void privcmd_close(struct vm_area_struct *vma) else pr_crit("unable to unmap MFN range: leaking %d pages. rc=%d\n", numpgs, rc); - kfree(pages); + kvfree(pages); } static vm_fault_t privcmd_fault(struct vm_fault *vmf) -- cgit v1.2.3 From e11423d6721dd63b23fb41ade5e8d0b448b17780 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 22 Sep 2021 12:17:48 +0200 Subject: xen/privcmd: fix error handling in mmap-resource processing xen_pfn_t is the same size as int only on 32-bit builds (and not even on Arm32). Hence pfns[] can't be used directly to read individual error values returned from xen_remap_domain_mfn_array(); every other error indicator would be skipped/ignored on 64-bit. Fixes: 3ad0876554ca ("xen/privcmd: add IOCTL_PRIVCMD_MMAP_RESOURCE") Cc: stable@vger.kernel.org Signed-off-by: Jan Beulich Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/aa6d6a67-6889-338a-a910-51e889f792d5@suse.com Signed-off-by: Juergen Gross --- drivers/xen/privcmd.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index a3f2031aa3d9..5af2a295e32f 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -803,11 +803,12 @@ static long privcmd_ioctl_mmap_resource(struct file *file, unsigned int domid = (xdata.flags & XENMEM_rsrc_acq_caller_owned) ? DOMID_SELF : kdata.dom; - int num; + int num, *errs = (int *)pfns; + BUILD_BUG_ON(sizeof(*errs) > sizeof(*pfns)); num = xen_remap_domain_mfn_array(vma, kdata.addr & PAGE_MASK, - pfns, kdata.num, (int *)pfns, + pfns, kdata.num, errs, vma->vm_page_prot, domid, vma->vm_private_data); @@ -817,7 +818,7 @@ static long privcmd_ioctl_mmap_resource(struct file *file, unsigned int i; for (i = 0; i < num; i++) { - rc = pfns[i]; + rc = errs[i]; if (rc < 0) break; } -- cgit v1.2.3 From 97315723c463679a9ecf803d6479fca24c3efda0 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 22 Sep 2021 12:18:25 +0200 Subject: xen/privcmd: drop "pages" parameter from xen_remap_pfn() The function doesn't use it and all of its callers say in a comment that their respective arguments are to be non-NULL only in auto-translated mode. Since xen_remap_domain_mfn_array() isn't supposed to be used by non-PV, drop the parameter there as well. It was bogusly passed as non- NULL (PRIV_VMA_LOCKED) by its only caller anyway. For xen_remap_domain_gfn_range(), otoh, it's not clear at all why this wouldn't want / might not need to gain auto-translated support down the road, so the parameter is retained there despite now remaining unused (and the only caller passing NULL); correct a respective comment as well. Signed-off-by: Jan Beulich Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/036ad8a2-46f9-ac3d-6219-bdc93ab9e10b@suse.com Signed-off-by: Juergen Gross --- arch/x86/xen/mmu_pv.c | 2 +- drivers/xen/privcmd.c | 5 ++--- include/xen/xen-ops.h | 15 ++++++--------- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 8d751939c6f3..3359c23573c5 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -2398,7 +2398,7 @@ static int remap_area_pfn_pte_fn(pte_t *ptep, unsigned long addr, void *data) int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr, xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot, - unsigned int domid, bool no_translate, struct page **pages) + unsigned int domid, bool no_translate) { int err = 0; struct remap_data rmd; diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 5af2a295e32f..3369734108af 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -257,7 +257,7 @@ static long privcmd_ioctl_mmap(struct file *file, void __user *udata) LIST_HEAD(pagelist); struct mmap_gfn_state state; - /* We only support privcmd_ioctl_mmap_batch for auto translated. */ + /* We only support privcmd_ioctl_mmap_batch for non-auto-translated. */ if (xen_feature(XENFEAT_auto_translated_physmap)) return -ENOSYS; @@ -810,8 +810,7 @@ static long privcmd_ioctl_mmap_resource(struct file *file, kdata.addr & PAGE_MASK, pfns, kdata.num, errs, vma->vm_page_prot, - domid, - vma->vm_private_data); + domid); if (num < 0) rc = num; else if (num != kdata.num) { diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index db28e79b77ee..a3584a357f35 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -52,12 +52,12 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order); #if defined(CONFIG_XEN_PV) int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr, xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot, - unsigned int domid, bool no_translate, struct page **pages); + unsigned int domid, bool no_translate); #else static inline int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr, xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot, unsigned int domid, - bool no_translate, struct page **pages) + bool no_translate) { BUG(); return 0; @@ -134,7 +134,7 @@ static inline int xen_remap_domain_gfn_array(struct vm_area_struct *vma, */ BUG_ON(err_ptr == NULL); return xen_remap_pfn(vma, addr, gfn, nr, err_ptr, prot, domid, - false, pages); + false); } /* @@ -146,7 +146,6 @@ static inline int xen_remap_domain_gfn_array(struct vm_area_struct *vma, * @err_ptr: Returns per-MFN error status. * @prot: page protection mask * @domid: Domain owning the pages - * @pages: Array of pages if this domain has an auto-translated physmap * * @mfn and @err_ptr may point to the same buffer, the MFNs will be * overwritten by the error codes after they are mapped. @@ -157,14 +156,13 @@ static inline int xen_remap_domain_gfn_array(struct vm_area_struct *vma, static inline int xen_remap_domain_mfn_array(struct vm_area_struct *vma, unsigned long addr, xen_pfn_t *mfn, int nr, int *err_ptr, - pgprot_t prot, unsigned int domid, - struct page **pages) + pgprot_t prot, unsigned int domid) { if (xen_feature(XENFEAT_auto_translated_physmap)) return -EOPNOTSUPP; return xen_remap_pfn(vma, addr, mfn, nr, err_ptr, prot, domid, - true, pages); + true); } /* xen_remap_domain_gfn_range() - map a range of foreign frames @@ -188,8 +186,7 @@ static inline int xen_remap_domain_gfn_range(struct vm_area_struct *vma, if (xen_feature(XENFEAT_auto_translated_physmap)) return -EOPNOTSUPP; - return xen_remap_pfn(vma, addr, &gfn, nr, NULL, prot, domid, false, - pages); + return xen_remap_pfn(vma, addr, &gfn, nr, NULL, prot, domid, false); } int xen_unmap_domain_gfn_range(struct vm_area_struct *vma, -- cgit v1.2.3 From 9172b5c4a778da1f855b2e3780b1afabb3cfd523 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 30 Sep 2021 14:16:15 +0200 Subject: xen/x86: prevent PVH type from getting clobbered Like xen_start_flags, xen_domain_type gets set before .bss gets cleared. Hence this variable also needs to be prevented from getting put in .bss, which is possible because XEN_NATIVE is an enumerator evaluating to zero. Any use prior to init_hvm_pv_info() setting the variable again would lead to wrong decisions; one such case is xenboot_console_setup() when called as a result of "earlyprintk=xen". Use __ro_after_init as more applicable than either __section(".data") or __read_mostly. Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/d301677b-6f22-5ae6-bd36-458e1f323d0b@suse.com Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index c79bd0af2e8c..f252faf5028f 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -52,9 +52,6 @@ DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); DEFINE_PER_CPU(uint32_t, xen_vcpu_id); EXPORT_PER_CPU_SYMBOL(xen_vcpu_id); -enum xen_domain_type xen_domain_type = XEN_NATIVE; -EXPORT_SYMBOL_GPL(xen_domain_type); - unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START; EXPORT_SYMBOL(machine_to_phys_mapping); unsigned long machine_to_phys_nr; @@ -69,9 +66,11 @@ __read_mostly int xen_have_vector_callback; EXPORT_SYMBOL_GPL(xen_have_vector_callback); /* - * NB: needs to live in .data because it's used by xen_prepare_pvh which runs - * before clearing the bss. + * NB: These need to live in .data or alike because they're used by + * xen_prepare_pvh() which runs before clearing the bss. */ +enum xen_domain_type __ro_after_init xen_domain_type = XEN_NATIVE; +EXPORT_SYMBOL_GPL(xen_domain_type); uint32_t xen_start_flags __section(".data") = 0; EXPORT_SYMBOL(xen_start_flags); -- cgit v1.2.3 From cae7d81a3730dfe08623f8c1083230c8d0987639 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 30 Sep 2021 14:16:47 +0200 Subject: xen/x86: allow PVH Dom0 without XEN_PV=y Decouple XEN_DOM0 from XEN_PV, converting some existing uses of XEN_DOM0 to a new XEN_PV_DOM0. (I'm not convinced all are really / should really be PV-specific, but for starters I've tried to be conservative.) For PVH Dom0 the hypervisor populates MADT with only x2APIC entries, so without x2APIC support enabled in the kernel things aren't going to work very well. (As opposed, DomU-s would only ever see LAPIC entries in MADT as of now.) Note that this then requires PVH Dom0 to be 64-bit, as X86_X2APIC depends on X86_64. In the course of this xen_running_on_version_or_later() needs to be available more broadly. Move it from a PV-specific to a generic file, considering that what it does isn't really PV-specific at all anyway. Note that xen/interface/version.h cannot be included on its own; in enlighten.c, which uses SCHEDOP_* anyway, include xen/interface/sched.h first to resolve the apparently sole missing type (xen_ulong_t). Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/983bb72f-53df-b6af-14bd-5e088bd06a08@suse.com Signed-off-by: Juergen Gross --- arch/x86/include/asm/xen/pci.h | 11 +++++++---- arch/x86/pci/xen.c | 15 +++++++++------ arch/x86/xen/Kconfig | 19 ++++++++++++------- arch/x86/xen/Makefile | 2 +- arch/x86/xen/enlighten.c | 17 +++++++++++++++++ arch/x86/xen/enlighten_pv.c | 16 ---------------- arch/x86/xen/xen-ops.h | 2 +- drivers/xen/Kconfig | 4 ++-- 8 files changed, 49 insertions(+), 37 deletions(-) diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h index 3506d8c598c1..4557f7cb0fa6 100644 --- a/arch/x86/include/asm/xen/pci.h +++ b/arch/x86/include/asm/xen/pci.h @@ -14,16 +14,19 @@ static inline int pci_xen_hvm_init(void) return -1; } #endif -#if defined(CONFIG_XEN_DOM0) +#ifdef CONFIG_XEN_PV_DOM0 int __init pci_xen_initial_domain(void); -int xen_find_device_domain_owner(struct pci_dev *dev); -int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain); -int xen_unregister_device_domain_owner(struct pci_dev *dev); #else static inline int __init pci_xen_initial_domain(void) { return -1; } +#endif +#ifdef CONFIG_XEN_DOM0 +int xen_find_device_domain_owner(struct pci_dev *dev); +int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain); +int xen_unregister_device_domain_owner(struct pci_dev *dev); +#else static inline int xen_find_device_domain_owner(struct pci_dev *dev) { return -1; diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 3d41a09c2c14..5debe4ac6f81 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -113,7 +113,7 @@ static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi, false /* no mapping of GSI to PIRQ */); } -#ifdef CONFIG_XEN_DOM0 +#ifdef CONFIG_XEN_PV_DOM0 static int xen_register_gsi(u32 gsi, int triggering, int polarity) { int rc, irq; @@ -261,7 +261,7 @@ error: return irq; } -#ifdef CONFIG_XEN_DOM0 +#ifdef CONFIG_XEN_PV_DOM0 static bool __read_mostly pci_seg_supported = true; static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) @@ -375,10 +375,10 @@ static void xen_initdom_restore_msi_irqs(struct pci_dev *dev) WARN(ret && ret != -ENOSYS, "restore_msi -> %d\n", ret); } } -#else /* CONFIG_XEN_DOM0 */ +#else /* CONFIG_XEN_PV_DOM0 */ #define xen_initdom_setup_msi_irqs NULL #define xen_initdom_restore_msi_irqs NULL -#endif /* !CONFIG_XEN_DOM0 */ +#endif /* !CONFIG_XEN_PV_DOM0 */ static void xen_teardown_msi_irqs(struct pci_dev *dev) { @@ -555,7 +555,7 @@ int __init pci_xen_hvm_init(void) return 0; } -#ifdef CONFIG_XEN_DOM0 +#ifdef CONFIG_XEN_PV_DOM0 int __init pci_xen_initial_domain(void) { int irq; @@ -583,6 +583,9 @@ int __init pci_xen_initial_domain(void) } return 0; } +#endif + +#ifdef CONFIG_XEN_DOM0 struct xen_device_domain_owner { domid_t domain; @@ -656,4 +659,4 @@ int xen_unregister_device_domain_owner(struct pci_dev *dev) return 0; } EXPORT_SYMBOL_GPL(xen_unregister_device_domain_owner); -#endif +#endif /* CONFIG_XEN_DOM0 */ diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index afc1da68b06d..6bcd3d8ca6ac 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -43,13 +43,9 @@ config XEN_PV_SMP def_bool y depends on XEN_PV && SMP -config XEN_DOM0 - bool "Xen PV Dom0 support" - default y - depends on XEN_PV && PCI_XEN && SWIOTLB_XEN - depends on X86_IO_APIC && ACPI && PCI - help - Support running as a Xen PV Dom0 guest. +config XEN_PV_DOM0 + def_bool y + depends on XEN_PV && XEN_DOM0 config XEN_PVHVM def_bool y @@ -86,3 +82,12 @@ config XEN_PVH def_bool n help Support for running as a Xen PVH guest. + +config XEN_DOM0 + bool "Xen Dom0 support" + default XEN_PV + depends on (XEN_PV && SWIOTLB_XEN) || (XEN_PVH && X86_64) + depends on X86_IO_APIC && ACPI && PCI + select X86_X2APIC if XEN_PVH && X86_64 + help + Support running as a Xen Dom0 guest. diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 40b5779fce21..4953260e281c 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -45,7 +45,7 @@ obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o -obj-$(CONFIG_XEN_DOM0) += vga.o +obj-$(CONFIG_XEN_PV_DOM0) += vga.o obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index f252faf5028f..501466038075 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -10,6 +10,8 @@ #include #include +#include +#include #include #include @@ -257,6 +259,21 @@ int xen_vcpu_setup(int cpu) return ((per_cpu(xen_vcpu, cpu) == NULL) ? -ENODEV : 0); } +/* Check if running on Xen version (major, minor) or later */ +bool xen_running_on_version_or_later(unsigned int major, unsigned int minor) +{ + unsigned int version; + + if (!xen_domain()) + return false; + + version = HYPERVISOR_xen_version(XENVER_version, NULL); + if ((((version >> 16) == major) && ((version & 0xffff) >= minor)) || + ((version >> 16) > major)) + return true; + return false; +} + void xen_reboot(int reason) { struct sched_shutdown r = { .reason = reason }; diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 6e0d0754f94f..e46953249e39 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -142,22 +142,6 @@ static void __init xen_pv_guest_late_init(void) #endif } -/* Check if running on Xen version (major, minor) or later */ -bool -xen_running_on_version_or_later(unsigned int major, unsigned int minor) -{ - unsigned int version; - - if (!xen_domain()) - return false; - - version = HYPERVISOR_xen_version(XENVER_version, NULL); - if ((((version >> 16) == major) && ((version & 0xffff) >= minor)) || - ((version >> 16) > major)) - return true; - return false; -} - static __read_mostly unsigned int cpuid_leaf5_ecx_val; static __read_mostly unsigned int cpuid_leaf5_edx_val; diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 8d7ec49a35fb..6d9b2b3e0c1c 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -109,7 +109,7 @@ static inline void xen_uninit_lock_cpu(int cpu) struct dom0_vga_console_info; -#ifdef CONFIG_XEN_DOM0 +#ifdef CONFIG_XEN_PV_DOM0 void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size); #else static inline void __init xen_init_vga(const struct dom0_vga_console_info *info, diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 22f5aff0c136..1b2c3aca6887 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -241,7 +241,7 @@ config XEN_PRIVCMD config XEN_ACPI_PROCESSOR tristate "Xen ACPI processor" - depends on XEN && XEN_DOM0 && X86 && ACPI_PROCESSOR && CPU_FREQ + depends on XEN && XEN_PV_DOM0 && X86 && ACPI_PROCESSOR && CPU_FREQ default m help This ACPI processor uploads Power Management information to the Xen @@ -259,7 +259,7 @@ config XEN_ACPI_PROCESSOR config XEN_MCE_LOG bool "Xen platform mcelog" - depends on XEN_DOM0 && X86_MCE + depends on XEN_PV_DOM0 && X86_MCE help Allow kernel fetching MCE error from Xen platform and converting it into Linux mcelog format for mcelog tools -- cgit v1.2.3 From adf330a7cd64a8bb959dc48a9c282285c1d5b4d5 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 30 Sep 2021 14:17:41 +0200 Subject: xen/x86: make "earlyprintk=xen" work better for PVH Dom0 The xen_hvm_early_write() path better wouldn't be taken in this case; while port 0xE9 can be used, the hypercall path is quite a bit more efficient. Put that first, as it may also work for DomU-s (see also xen_raw_console_write()). While there also bail from the function when the first domU_write_console() failed - later ones aren't going to succeed. Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/4fd89dcb-cfc5-c740-2e94-bb271e432d3e@suse.com Signed-off-by: Juergen Gross --- drivers/tty/hvc/hvc_xen.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index 92c9a476defc..ee7ea8d762fa 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -621,17 +621,16 @@ static void xenboot_write_console(struct console *console, const char *string, unsigned int linelen, off = 0; const char *pos; + if (dom0_write_console(0, string, len) >= 0) + return; + if (!xen_pv_domain()) { xen_hvm_early_write(0, string, len); return; } - dom0_write_console(0, string, len); - - if (xen_initial_domain()) + if (domU_write_console(0, "(early) ", 8) < 0) return; - - domU_write_console(0, "(early) ", 8); while (off < len && NULL != (pos = strchr(string+off, '\n'))) { linelen = pos-string+off; if (off + linelen > len) -- cgit v1.2.3 From 8e24d9bfc44d3bd884669ef8b344112fe41c9826 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 30 Sep 2021 14:18:12 +0200 Subject: xen/x86: allow "earlyprintk=xen" to work for PV Dom0 With preferred consoles "tty" and "hvc" announced as preferred, registering "xenboot" early won't result in use of the console: It also needs to be registered as preferred. Generalize this from being DomU- only so far. Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/d4a34540-a476-df2c-bca6-732d0d58c5f0@suse.com Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten_pv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index e46953249e39..b1c0e06dcaa8 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1348,7 +1348,6 @@ asmlinkage __visible void __init xen_start_kernel(void) boot_params.hdr.hardware_subarch = X86_SUBARCH_XEN; if (!xen_initial_domain()) { - add_preferred_console("xenboot", 0, NULL); if (pci_xen) x86_init.pci.arch_init = pci_xen_init; x86_platform.set_legacy_features = @@ -1393,6 +1392,7 @@ asmlinkage __visible void __init xen_start_kernel(void) #endif } + add_preferred_console("xenboot", 0, NULL); if (!boot_params.screen_info.orig_video_isVGA) add_preferred_console("tty", 0, NULL); add_preferred_console("hvc", 0, NULL); -- cgit v1.2.3 From 42bc9716bc1df21b55b303fe243f8575b3af24f9 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 30 Sep 2021 14:18:45 +0200 Subject: xen/x86: make "earlyprintk=xen" work for HVM/PVH DomU xenboot_write_console() is dealing with these quite fine so I don't see why xenboot_console_setup() would return -ENOENT in this case. Adjust documentation accordingly. Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/3d212583-700e-8b2d-727a-845ef33ac265@suse.com Signed-off-by: Juergen Gross --- Documentation/admin-guide/kernel-parameters.txt | 2 +- drivers/tty/hvc/hvc_xen.c | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index bdb22006f713..259d03fc38d1 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1255,7 +1255,7 @@ The VGA and EFI output is eventually overwritten by the real console. - The xen output can only be used by Xen PV guests. + The xen option can only be used in Xen domains. The sclp output can only be used on s390. diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index ee7ea8d762fa..f1c99b4b89b2 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -607,10 +607,8 @@ static int __init xenboot_console_setup(struct console *console, char *string) { static struct xencons_info xenboot; - if (xen_initial_domain()) + if (xen_initial_domain() || !xen_pv_domain()) return 0; - if (!xen_pv_domain()) - return -ENODEV; return xencons_info_pv_init(&xenboot, 0); } -- cgit v1.2.3 From 4d1ab432acc9391a5ae13c629dbb5882c29fd1b0 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 30 Sep 2021 14:19:16 +0200 Subject: xen/x86: generalize preferred console model from PV to PVH Dom0 Without announcing hvc0 as preferred it won't get used as long as tty0 gets registered earlier. This is particularly problematic with there not being any screen output for PVH Dom0 when the screen is in graphics mode, as the necessary information doesn't get conveyed yet from the hypervisor. Follow PV's model, but be conservative and do this for Dom0 only for now. Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/582328b6-c86c-37f3-d802-5539b7a86736@suse.com Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten.c | 12 ++++++++++++ arch/x86/xen/enlighten_pv.c | 8 +------- arch/x86/xen/enlighten_pvh.c | 3 +++ arch/x86/xen/xen-ops.h | 2 ++ 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 501466038075..c5b1fd606c6f 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -3,6 +3,7 @@ #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG #include #endif +#include #include #include #include @@ -18,6 +19,7 @@ #include #include #include +#include #include "xen-ops.h" #include "smp.h" @@ -274,6 +276,16 @@ bool xen_running_on_version_or_later(unsigned int major, unsigned int minor) return false; } +void __init xen_add_preferred_consoles(void) +{ + add_preferred_console("xenboot", 0, NULL); + if (!boot_params.screen_info.orig_video_isVGA) + add_preferred_console("tty", 0, NULL); + add_preferred_console("hvc", 0, NULL); + if (boot_params.screen_info.orig_video_isVGA) + add_preferred_console("tty", 0, NULL); +} + void xen_reboot(int reason) { struct sched_shutdown r = { .reason = reason }; diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index b1c0e06dcaa8..fb3095ba3bf1 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include @@ -1392,12 +1391,7 @@ asmlinkage __visible void __init xen_start_kernel(void) #endif } - add_preferred_console("xenboot", 0, NULL); - if (!boot_params.screen_info.orig_video_isVGA) - add_preferred_console("tty", 0, NULL); - add_preferred_console("hvc", 0, NULL); - if (boot_params.screen_info.orig_video_isVGA) - add_preferred_console("tty", 0, NULL); + xen_add_preferred_consoles(); #ifdef CONFIG_PCI /* PCI BIOS service won't work from a PV guest. */ diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c index 0d5e34b9e6f9..9029c5ab60e2 100644 --- a/arch/x86/xen/enlighten_pvh.c +++ b/arch/x86/xen/enlighten_pvh.c @@ -36,6 +36,9 @@ void __init xen_pvh_init(struct boot_params *boot_params) pfn = __pa(hypercall_page); wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); + if (xen_initial_domain()) + x86_init.oem.arch_setup = xen_add_preferred_consoles; + xen_efi_init(boot_params); } diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 6d9b2b3e0c1c..524d1243a0ce 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -118,6 +118,8 @@ static inline void __init xen_init_vga(const struct dom0_vga_console_info *info, } #endif +void xen_add_preferred_consoles(void); + void __init xen_init_apic(void); #ifdef CONFIG_XEN_EFI -- cgit v1.2.3 From 079c4baa2aad05e8007faa24b2411c1457f60d74 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 30 Sep 2021 14:19:48 +0200 Subject: xen/x86: hook up xen_banner() also for PVH This was effectively lost while dropping PVHv1 code. Move the function and arrange for it to be called the same way as done in PV mode. Clearly this then needs re-introducing the XENFEAT_mmu_pt_update_preserve_ad check that was recently removed, as that's a PV-only feature. Since the string pointed at by pv_info.name describes the mode, drop "paravirtualized" from the log message while moving the code. Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/de03054d-a20d-2114-bb86-eec28e17b3b8@suse.com Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten.c | 14 ++++++++++++++ arch/x86/xen/enlighten_pv.c | 11 ----------- arch/x86/xen/enlighten_pvh.c | 1 + arch/x86/xen/xen-ops.h | 1 + 4 files changed, 16 insertions(+), 11 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index c5b1fd606c6f..57efb484e7df 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -261,6 +261,20 @@ int xen_vcpu_setup(int cpu) return ((per_cpu(xen_vcpu, cpu) == NULL) ? -ENODEV : 0); } +void __init xen_banner(void) +{ + unsigned version = HYPERVISOR_xen_version(XENVER_version, NULL); + struct xen_extraversion extra; + + HYPERVISOR_xen_version(XENVER_extraversion, &extra); + + pr_info("Booting kernel on %s\n", pv_info.name); + pr_info("Xen version: %u.%u%s%s\n", + version >> 16, version & 0xffff, extra.extraversion, + xen_feature(XENFEAT_mmu_pt_update_preserve_ad) + ? " (preserve-AD)" : ""); +} + /* Check if running on Xen version (major, minor) or later */ bool xen_running_on_version_or_later(unsigned int major, unsigned int minor) { diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index fb3095ba3bf1..a7b7d674f500 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -108,17 +108,6 @@ struct tls_descs { */ static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc); -static void __init xen_banner(void) -{ - unsigned version = HYPERVISOR_xen_version(XENVER_version, NULL); - struct xen_extraversion extra; - HYPERVISOR_xen_version(XENVER_extraversion, &extra); - - pr_info("Booting paravirtualized kernel on %s\n", pv_info.name); - pr_info("Xen version: %d.%d%s (preserve-AD)\n", - version >> 16, version & 0xffff, extra.extraversion); -} - static void __init xen_pv_init_platform(void) { populate_extra_pte(fix_to_virt(FIX_PARAVIRT_BOOTMAP)); diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c index 9029c5ab60e2..b20bd5439837 100644 --- a/arch/x86/xen/enlighten_pvh.c +++ b/arch/x86/xen/enlighten_pvh.c @@ -38,6 +38,7 @@ void __init xen_pvh_init(struct boot_params *boot_params) if (xen_initial_domain()) x86_init.oem.arch_setup = xen_add_preferred_consoles; + x86_init.oem.banner = xen_banner; xen_efi_init(boot_params); } diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 524d1243a0ce..8bc8b72a205d 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -51,6 +51,7 @@ void __init xen_remap_memory(void); phys_addr_t __init xen_find_free_area(phys_addr_t size); char * __init xen_memory_setup(void); void __init xen_arch_setup(void); +void xen_banner(void); void xen_enable_sysenter(void); void xen_enable_syscall(void); void xen_vcpu_restore(void); -- cgit v1.2.3 From 59f7e5374175ce5d776efeb12a1e61cd6b1f82fb Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 30 Sep 2021 14:20:42 +0200 Subject: x86/PVH: adjust function/data placement Two of the variables can live in .init.data, allowing the open-coded placing in .data to go away. Another "variable" is used to communicate a size value only to very early assembly code, which hence can be both const and live in .init.*. Additionally two functions were lacking __init annotations. Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/3b0bb22e-43f4-e459-c5cb-169f996b5669@suse.com Signed-off-by: Juergen Gross --- arch/x86/platform/pvh/enlighten.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/platform/pvh/enlighten.c b/arch/x86/platform/pvh/enlighten.c index 9ac7457f52a3..ed0442e35434 100644 --- a/arch/x86/platform/pvh/enlighten.c +++ b/arch/x86/platform/pvh/enlighten.c @@ -16,15 +16,15 @@ /* * PVH variables. * - * pvh_bootparams and pvh_start_info need to live in the data segment since + * pvh_bootparams and pvh_start_info need to live in a data segment since * they are used after startup_{32|64}, which clear .bss, are invoked. */ -struct boot_params pvh_bootparams __section(".data"); -struct hvm_start_info pvh_start_info __section(".data"); +struct boot_params __initdata pvh_bootparams; +struct hvm_start_info __initdata pvh_start_info; -unsigned int pvh_start_info_sz = sizeof(pvh_start_info); +const unsigned int __initconst pvh_start_info_sz = sizeof(pvh_start_info); -static u64 pvh_get_root_pointer(void) +static u64 __init pvh_get_root_pointer(void) { return pvh_start_info.rsdp_paddr; } @@ -107,7 +107,7 @@ void __init __weak xen_pvh_init(struct boot_params *boot_params) BUG(); } -static void hypervisor_specific_init(bool xen_guest) +static void __init hypervisor_specific_init(bool xen_guest) { if (xen_guest) xen_pvh_init(&pvh_bootparams); -- cgit v1.2.3 From 9c11112c0ec7ec322cd495320c3ab9fa8bdc1bbc Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 30 Sep 2021 14:21:13 +0200 Subject: xen/x86: adjust data placement Both xen_pvh and xen_start_flags get written just once early during init. Using the respective annotation then allows the open-coded placing in .data to go away. Additionally the former, like the latter, wants exporting, or else xen_pvh_domain() can't be used from modules. Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/8155ed26-5a1d-c06f-42d8-596d26e75849@suse.com Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten.c | 2 +- arch/x86/xen/enlighten_pvh.c | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 57efb484e7df..95d970359e17 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -75,7 +75,7 @@ EXPORT_SYMBOL_GPL(xen_have_vector_callback); */ enum xen_domain_type __ro_after_init xen_domain_type = XEN_NATIVE; EXPORT_SYMBOL_GPL(xen_domain_type); -uint32_t xen_start_flags __section(".data") = 0; +uint32_t __ro_after_init xen_start_flags; EXPORT_SYMBOL(xen_start_flags); /* diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c index b20bd5439837..bcae606bbc5c 100644 --- a/arch/x86/xen/enlighten_pvh.c +++ b/arch/x86/xen/enlighten_pvh.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include @@ -18,10 +19,11 @@ /* * PVH variables. * - * The variable xen_pvh needs to live in the data segment since it is used + * The variable xen_pvh needs to live in a data segment since it is used * after startup_{32|64} is invoked, which will clear the .bss segment. */ -bool xen_pvh __section(".data") = 0; +bool __ro_after_init xen_pvh; +EXPORT_SYMBOL_GPL(xen_pvh); void __init xen_pvh_init(struct boot_params *boot_params) { -- cgit v1.2.3 From 319933a80fd4f07122466a77f93e5019d71be74c Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 5 Oct 2021 15:34:33 +0200 Subject: xen/balloon: fix cancelled balloon action MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In case a ballooning action is cancelled the new kernel thread handling the ballooning might end up in a busy loop. Fix that by handling the cancelled action gracefully. While at it introduce a short wait for the BP_WAIT case. Cc: stable@vger.kernel.org Fixes: 8480ed9c2bbd56 ("xen/balloon: use a kernel thread instead a workqueue") Reported-by: Marek Marczykowski-Górecki Signed-off-by: Juergen Gross Tested-by: Jason Andryuk Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20211005133433.32008-1-jgross@suse.com Signed-off-by: Juergen Gross --- drivers/xen/balloon.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 43ebfe36ac27..3a50f097ed3e 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -491,12 +491,12 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) } /* - * Stop waiting if either state is not BP_EAGAIN and ballooning action is - * needed, or if the credit has changed while state is BP_EAGAIN. + * Stop waiting if either state is BP_DONE and ballooning action is + * needed, or if the credit has changed while state is not BP_DONE. */ static bool balloon_thread_cond(enum bp_state state, long credit) { - if (state != BP_EAGAIN) + if (state == BP_DONE) credit = 0; return current_credit() != credit || kthread_should_stop(); @@ -516,10 +516,19 @@ static int balloon_thread(void *unused) set_freezable(); for (;;) { - if (state == BP_EAGAIN) - timeout = balloon_stats.schedule_delay * HZ; - else + switch (state) { + case BP_DONE: + case BP_ECANCELED: timeout = 3600 * HZ; + break; + case BP_EAGAIN: + timeout = balloon_stats.schedule_delay * HZ; + break; + case BP_WAIT: + timeout = HZ; + break; + } + credit = current_credit(); wait_event_freezable_timeout(balloon_thread_wq, -- cgit v1.2.3