diff options
author | Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | 2013-06-10 12:46:22 -0400 |
---|---|---|
committer | Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | 2013-06-10 12:46:22 -0400 |
commit | 6c63f3caff1731fdded6ba500201b01abab76d98 (patch) | |
tree | c1f1ff67ac59644ceef4472100c0428cd42e6ad6 /arch/x86 | |
parent | 08f69f2aa0f6049ad620e0aa70730374a7884b93 (diff) | |
parent | 0e0764358c7833a8a2a721c8db6c30c3e7cd3df7 (diff) |
Merge branch 'stable/pvh.v8' into linux-next
* stable/pvh.v8:
x86/xen: remove depends on CONFIG_EXPERIMENTAL
xen/pvh: Use ballooning to allocate grant table pages [v2]
xen/pvh: remove code to map iomem from guest
xen/pvh: specify xen features strings cleanly for PVH
xen: implement updated XENMEM_add_to_physmap_range ABI
x86/xen: Use __pa_symbol instead of __pa on C visible symbols
xen/privcmd: fix condition in privcmd_close()
xen/x86: remove duplicated include from enlighten.c
xen: x86 pvh: use XENMEM_add_to_physmap_range for foreign gmfn mappings
xen/pvh: balloon and grant changes.
xen/pvh: bootup and setup (E820) related changes.
xen/pvh: Implement MMU changes for PVH.
xen/pvh: Extend vcpu_guest_context, p2m, event, and XenBus.
xen/pvh: Support ParaVirtualized Hardware extensions.
Conflicts:
arch/x86/xen/mmu.c
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/xen/interface.h | 11 | ||||
-rw-r--r-- | arch/x86/include/asm/xen/page.h | 3 | ||||
-rw-r--r-- | arch/x86/xen/Kconfig | 8 | ||||
-rw-r--r-- | arch/x86/xen/enlighten.c | 76 | ||||
-rw-r--r-- | arch/x86/xen/irq.c | 5 | ||||
-rw-r--r-- | arch/x86/xen/mmu.c | 158 | ||||
-rw-r--r-- | arch/x86/xen/mmu.h | 2 | ||||
-rw-r--r-- | arch/x86/xen/p2m.c | 2 | ||||
-rw-r--r-- | arch/x86/xen/setup.c | 59 | ||||
-rw-r--r-- | arch/x86/xen/smp.c | 39 | ||||
-rw-r--r-- | arch/x86/xen/xen-head.S | 10 |
11 files changed, 315 insertions, 58 deletions
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index fd9cb7695b5f..20e738a90763 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h @@ -145,7 +145,16 @@ struct vcpu_guest_context { struct cpu_user_regs user_regs; /* User-level CPU registers */ struct trap_info trap_ctxt[256]; /* Virtual IDT */ unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */ - unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */ + union { + struct { + /* PV: GDT (machine frames, # ents).*/ + unsigned long gdt_frames[16], gdt_ents; + } pv; + struct { + /* PVH: GDTR addr and size */ + unsigned long gdtaddr, gdtsz; + } pvh; + } u; unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */ /* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */ unsigned long ctrlreg[8]; /* CR0-CR7 (control registers) */ diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 6aef9fbc09b7..3143a8a300ed 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -159,6 +159,9 @@ static inline xpaddr_t machine_to_phys(xmaddr_t machine) static inline unsigned long mfn_to_local_pfn(unsigned long mfn) { unsigned long pfn = mfn_to_pfn(mfn); + + if (xen_feature(XENFEAT_auto_translated_physmap)) + return mfn; if (get_phys_to_machine(pfn) != mfn) return -1; /* force !pfn_valid() */ return pfn; diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 1a3c76505649..47316b9134bf 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -51,3 +51,11 @@ config XEN_DEBUG_FS Enable statistics output and various tuning options in debugfs. Enabling this option may incur a significant performance overhead. +config XEN_X86_PVH + bool "Support for running as a PVH guest" + depends on X86_64 && XEN + default n + help + This option enables support for running as a PVH guest (PV guest + using hardware extensions) under a suitably capable hypervisor. + If unsure, say N. diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index a492be2635ac..591cf3cefc29 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -129,6 +129,9 @@ RESERVE_BRK(shared_info_page_brk, PAGE_SIZE); __read_mostly int xen_have_vector_callback; EXPORT_SYMBOL_GPL(xen_have_vector_callback); +#define xen_pvh_domain() (xen_pv_domain() && \ + xen_feature(XENFEAT_auto_translated_physmap) && \ + xen_have_vector_callback) /* * Point at some empty memory to start with. We map the real shared_info * page as soon as fixmap is up and running. @@ -262,8 +265,9 @@ static void __init xen_banner(void) struct xen_extraversion extra; HYPERVISOR_xen_version(XENVER_extraversion, &extra); - printk(KERN_INFO "Booting paravirtualized kernel on %s\n", - pv_info.name); + pr_info("Booting paravirtualized kernel %son %s\n", + xen_feature(XENFEAT_auto_translated_physmap) ? + "with PVH extensions " : "", pv_info.name); printk(KERN_INFO "Xen version: %d.%d%s%s\n", version >> 16, version & 0xffff, extra.extraversion, xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); @@ -331,12 +335,15 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, break; } - asm(XEN_EMULATE_PREFIX "cpuid" - : "=a" (*ax), - "=b" (*bx), - "=c" (*cx), - "=d" (*dx) - : "0" (*ax), "2" (*cx)); + if (xen_pvh_domain()) + native_cpuid(ax, bx, cx, dx); + else + asm(XEN_EMULATE_PREFIX "cpuid" + : "=a" (*ax), + "=b" (*bx), + "=c" (*cx), + "=d" (*dx) + : "0" (*ax), "2" (*cx)); *bx &= maskebx; *cx &= maskecx; @@ -1122,6 +1129,10 @@ void xen_setup_shared_info(void) HYPERVISOR_shared_info = (struct shared_info *)__va(xen_start_info->shared_info); + /* PVH TBD/FIXME: vcpu info placement in phase 2 */ + if (xen_pvh_domain()) + return; + #ifndef CONFIG_SMP /* In UP this is as good a place as any to set up shared info */ xen_setup_vcpu_info_placement(); @@ -1407,6 +1418,11 @@ static void __init xen_boot_params_init_edd(void) */ static void __init xen_setup_stackprotector(void) { + /* PVH TBD/FIXME: investigate setup_stack_canary_segment */ + if (xen_feature(XENFEAT_auto_translated_physmap)) { + switch_to_new_gdt(0); + return; + } pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot; pv_cpu_ops.load_gdt = xen_load_gdt_boot; @@ -1417,6 +1433,19 @@ static void __init xen_setup_stackprotector(void) pv_cpu_ops.load_gdt = xen_load_gdt; } +static void __init xen_pvh_early_guest_init(void) +{ + if (xen_feature(XENFEAT_hvm_callback_vector)) + xen_have_vector_callback = 1; + +#ifdef CONFIG_X86_32 + if (xen_feature(XENFEAT_auto_translated_physmap)) { + xen_raw_printk("ERROR: 32bit PVH guests are not supported\n"); + BUG(); + } +#endif +} + /* First C function to be called on Xen boot */ asmlinkage void __init xen_start_kernel(void) { @@ -1428,13 +1457,18 @@ asmlinkage void __init xen_start_kernel(void) xen_domain_type = XEN_PV_DOMAIN; + xen_setup_features(); + xen_pvh_early_guest_init(); xen_setup_machphys_mapping(); /* Install Xen paravirt ops */ pv_info = xen_info; pv_init_ops = xen_init_ops; - pv_cpu_ops = xen_cpu_ops; pv_apic_ops = xen_apic_ops; + if (xen_pvh_domain()) + pv_cpu_ops.cpuid = xen_cpuid; + else + pv_cpu_ops = xen_cpu_ops; x86_init.resources.memory_setup = xen_memory_setup; x86_init.oem.arch_setup = xen_arch_setup; @@ -1466,8 +1500,6 @@ asmlinkage void __init xen_start_kernel(void) /* Work out if we support NX */ x86_configure_nx(); - xen_setup_features(); - /* Get mfn list */ if (!xen_feature(XENFEAT_auto_translated_physmap)) xen_build_dynamic_phys_to_machine(); @@ -1545,14 +1577,18 @@ asmlinkage void __init xen_start_kernel(void) /* set the limit of our address space */ xen_reserve_top(); - /* We used to do this in xen_arch_setup, but that is too late on AMD - * were early_cpu_init (run before ->arch_setup()) calls early_amd_init - * which pokes 0xcf8 port. - */ - set_iopl.iopl = 1; - rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); - if (rc != 0) - xen_raw_printk("physdev_op failed %d\n", rc); + /* PVH: runs at default kernel iopl of 0 */ + if (!xen_pvh_domain()) { + /* + * We used to do this in xen_arch_setup, but that is too late + * on AMD were early_cpu_init (run before ->arch_setup()) calls + * early_amd_init which pokes 0xcf8 port. + */ + set_iopl.iopl = 1; + rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); + if (rc != 0) + xen_raw_printk("physdev_op failed %d\n", rc); + } #ifdef CONFIG_X86_32 /* set up basic CPUID stuff */ @@ -1622,6 +1658,8 @@ asmlinkage void __init xen_start_kernel(void) } void __ref xen_hvm_init_shared_info(void) +/* Use a pfn in RAM, may move to MMIO before kexec. + * This function also called for PVH dom0 */ { int cpu; struct xen_add_to_physmap xatp; diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 01a4dc015ae1..fcbe56acd88c 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -5,6 +5,7 @@ #include <xen/interface/xen.h> #include <xen/interface/sched.h> #include <xen/interface/vcpu.h> +#include <xen/features.h> #include <xen/events.h> #include <asm/xen/hypercall.h> @@ -129,6 +130,8 @@ static const struct pv_irq_ops xen_irq_ops __initconst = { void __init xen_init_irq_ops(void) { - pv_irq_ops = xen_irq_ops; + /* For PVH we use default pv_irq_ops settings */ + if (!xen_feature(XENFEAT_hvm_callback_vector)) + pv_irq_ops = xen_irq_ops; x86_init.irqs.intr_init = xen_init_IRQ; } diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index fdc3ba28ca38..33c113aff6f8 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -74,6 +74,7 @@ #include <xen/interface/version.h> #include <xen/interface/memory.h> #include <xen/hvc-console.h> +#include <xen/balloon.h> #include "multicalls.h" #include "mmu.h" @@ -1207,6 +1208,8 @@ static void __init xen_pagetable_init(void) #endif paging_init(); xen_setup_shared_info(); + if (xen_feature(XENFEAT_auto_translated_physmap)) + return; #ifdef CONFIG_X86_64 if (!xen_feature(XENFEAT_auto_translated_physmap)) { unsigned long new_mfn_list; @@ -1491,7 +1494,8 @@ static int xen_pgd_alloc(struct mm_struct *mm) if (user_pgd != NULL) { user_pgd[pgd_index(VSYSCALL_START)] = - __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE); + __pgd(__pa_symbol(level3_user_vsyscall) | + _PAGE_TABLE); ret = 0; } @@ -1556,6 +1560,10 @@ static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) { struct mmuext_op op; + + if (xen_feature(XENFEAT_writable_page_tables)) + return; + op.cmd = cmd; op.arg1.mfn = pfn_to_mfn(pfn); if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) @@ -1753,6 +1761,10 @@ static void set_page_prot_flags(void *addr, pgprot_t prot, unsigned long flags) unsigned long pfn = __pa(addr) >> PAGE_SHIFT; pte_t pte = pfn_pte(pfn, prot); + /* recall for PVH, page tables are native. */ + if (xen_feature(XENFEAT_auto_translated_physmap)) + return; + if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags)) BUG(); } @@ -1834,6 +1846,9 @@ static void convert_pfn_mfn(void *v) pte_t *pte = v; int i; + if (xen_feature(XENFEAT_auto_translated_physmap)) + return; + /* All levels are converted the same way, so just treat them as ptes. */ for (i = 0; i < PTRS_PER_PTE; i++) @@ -1853,6 +1868,7 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end, (*pt_end)--; } } + /* * Set up the initial kernel pagetable. * @@ -1863,6 +1879,7 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end, * but that's enough to get __va working. We need to fill in the rest * of the physical mapping once some sort of allocator has been set * up. + * NOTE: for PVH, the page tables are native. */ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) { @@ -1940,10 +1957,13 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) * structure to attach it to, so make sure we just set kernel * pgd. */ - xen_mc_batch(); - __xen_write_cr3(true, __pa(init_level4_pgt)); - xen_mc_issue(PARAVIRT_LAZY_CPU); - + if (xen_feature(XENFEAT_writable_page_tables)) { + native_write_cr3(__pa_symbol(init_level4_pgt)); + } else { + xen_mc_batch(); + __xen_write_cr3(true, __pa_symbol(init_level4_pgt)); + xen_mc_issue(PARAVIRT_LAZY_CPU); + } /* We can't that easily rip out L3 and L2, as the Xen pagetables are * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for * the initial domain. For guests using the toolstack, they are in: @@ -1964,10 +1984,10 @@ static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD); static void __init xen_write_cr3_init(unsigned long cr3) { - unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir)); + unsigned long pfn = PFN_DOWN(__pa_symbol(swapper_pg_dir)); - BUG_ON(read_cr3() != __pa(initial_page_table)); - BUG_ON(cr3 != __pa(swapper_pg_dir)); + BUG_ON(read_cr3() != __pa_symbol(initial_page_table)); + BUG_ON(cr3 != __pa_symbol(swapper_pg_dir)); /* * We are switching to swapper_pg_dir for the first time (from @@ -1991,7 +2011,7 @@ static void __init xen_write_cr3_init(unsigned long cr3) pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, pfn); pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, - PFN_DOWN(__pa(initial_page_table))); + PFN_DOWN(__pa_symbol(initial_page_table))); set_page_prot(initial_page_table, PAGE_KERNEL); set_page_prot(initial_kernel_pmd, PAGE_KERNEL); @@ -2016,7 +2036,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) copy_page(initial_page_table, pgd); initial_page_table[KERNEL_PGD_BOUNDARY] = - __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT); + __pgd(__pa_symbol(initial_kernel_pmd) | _PAGE_PRESENT); set_page_prot(initial_kernel_pmd, PAGE_KERNEL_RO); set_page_prot(initial_page_table, PAGE_KERNEL_RO); @@ -2025,8 +2045,8 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, - PFN_DOWN(__pa(initial_page_table))); - xen_write_cr3(__pa(initial_page_table)); + PFN_DOWN(__pa_symbol(initial_page_table))); + xen_write_cr3(__pa_symbol(initial_page_table)); memblock_reserve(__pa(xen_start_info->pt_base), xen_start_info->nr_pt_frames * PAGE_SIZE); @@ -2207,6 +2227,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { void __init xen_init_mmu_ops(void) { x86_init.paging.pagetable_init = xen_pagetable_init; + + if (xen_feature(XENFEAT_auto_translated_physmap)) { + pv_mmu_ops.flush_tlb_others = xen_flush_tlb_others; + return; + } pv_mmu_ops = xen_mmu_ops; memset(dummy_mapping, 0xff, PAGE_SIZE); @@ -2482,6 +2507,97 @@ void __init xen_hvm_init_mmu_ops(void) } #endif +/* Map foreign gmfn, fgmfn, to local pfn, lpfn. This for the user space + * creating new guest on PVH dom0 and needs to map domU pages. + */ +static int pvh_add_to_xen_p2m(unsigned long lpfn, unsigned long fgmfn, + unsigned int domid) +{ + int rc; + struct xen_add_to_physmap_range xatp = { + .domid = DOMID_SELF, + .foreign_domid = domid, + .size = 1, + .space = XENMAPSPACE_gmfn_foreign, + }; + xen_ulong_t idx = fgmfn; + xen_pfn_t gpfn = lpfn; + int err = 0; + + set_xen_guest_handle(xatp.idxs, &idx); + set_xen_guest_handle(xatp.gpfns, &gpfn); + set_xen_guest_handle(xatp.errs, &err); + + rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap_range, &xatp); + if (rc || err) + pr_warn("d0: Failed to map pfn (0x%lx) to mfn (0x%lx) rc:%d:%d\n", + lpfn, fgmfn, rc, err); + return rc; +} + +static int pvh_rem_xen_p2m(unsigned long spfn, int count) +{ + struct xen_remove_from_physmap xrp; + int i, rc; + + for (i = 0; i < count; i++) { + xrp.domid = DOMID_SELF; + xrp.gpfn = spfn+i; + rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp); + if (rc) { + pr_warn("Failed to unmap pfn:%lx rc:%d done:%d\n", + spfn+i, rc, i); + return 1; + } + } + return 0; +} + +struct pvh_remap_data { + unsigned long fgmfn; /* foreign domain's gmfn */ + pgprot_t prot; + domid_t domid; + int index; + struct page **pages; +}; + +static int pvh_map_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr, + void *data) +{ + int rc; + struct pvh_remap_data *remap = data; + unsigned long pfn = page_to_pfn(remap->pages[remap->index++]); + pte_t pteval = pte_mkspecial(pfn_pte(pfn, remap->prot)); + + rc = pvh_add_to_xen_p2m(pfn, remap->fgmfn, remap->domid); + if (rc) + return rc; + native_set_pte(ptep, pteval); + + return 0; +} + +static int pvh_remap_gmfn_range(struct vm_area_struct *vma, + unsigned long addr, unsigned long mfn, int nr, + pgprot_t prot, unsigned domid, + struct page **pages) +{ + int err; + struct pvh_remap_data pvhdata; + + BUG_ON(!pages); + + pvhdata.fgmfn = mfn; + pvhdata.prot = prot; + pvhdata.domid = domid; + pvhdata.index = 0; + pvhdata.pages = pages; + err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT, + pvh_map_pte_fn, &pvhdata); + flush_tlb_all(); + return err; +} + #define REMAP_BATCH_SIZE 16 struct remap_data { @@ -2523,6 +2639,10 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO))); + if (xen_feature(XENFEAT_auto_translated_physmap)) { + /* We need to update the local page tables and the xen HAP */ + return pvh_remap_gmfn_range(vma, addr, mfn, nr, prot, domid, pages); + } rmd.mfn = mfn; rmd.prot = prot; @@ -2560,6 +2680,18 @@ int xen_unmap_domain_mfn_range(struct vm_area_struct *vma, if (!pages || !xen_feature(XENFEAT_auto_translated_physmap)) return 0; - return -EINVAL; + while (numpgs--) { + + /* the mmu has already cleaned up the process mmu resources at + * this point (lookup_address will return NULL). */ + unsigned long pfn = page_to_pfn(pages[numpgs]); + + pvh_rem_xen_p2m(pfn, 1); + } + /* We don't need to flush tlbs because as part of pvh_rem_xen_p2m(), + * the hypervisor will do tlb flushes after removing the p2m entries + * from the EPT/NPT */ + + return 0; } EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range); diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index 73809bb951b4..6d0bb56a6e82 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h @@ -23,4 +23,6 @@ unsigned long xen_read_cr2_direct(void); extern void xen_init_mmu_ops(void); extern void xen_hvm_init_mmu_ops(void); +extern void xen_set_clr_mmio_pvh_pte(unsigned long pfn, unsigned long mfn, + int nr_mfns, int add_mapping); #endif /* _XEN_MMU_H */ diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 95fb2aa5927e..ea553c864ff3 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -798,7 +798,7 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) { unsigned topidx, mididx, idx; - if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { + if (xen_feature(XENFEAT_auto_translated_physmap)) { BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); return true; } diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 94eac5c85cdc..c4874c05774b 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -27,6 +27,7 @@ #include <xen/interface/memory.h> #include <xen/interface/physdev.h> #include <xen/features.h> +#include "mmu.h" #include "xen-ops.h" #include "vdso.h" @@ -78,6 +79,9 @@ static void __init xen_add_extra_mem(u64 start, u64 size) memblock_reserve(start, size); + if (xen_feature(XENFEAT_auto_translated_physmap)) + return; + xen_max_p2m_pfn = PFN_DOWN(start + size); for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) { unsigned long mfn = pfn_to_mfn(pfn); @@ -100,6 +104,7 @@ static unsigned long __init xen_do_chunk(unsigned long start, .domid = DOMID_SELF }; unsigned long len = 0; + int xlated_phys = xen_feature(XENFEAT_auto_translated_physmap); unsigned long pfn; int ret; @@ -113,7 +118,7 @@ static unsigned long __init xen_do_chunk(unsigned long start, continue; frame = mfn; } else { - if (mfn != INVALID_P2M_ENTRY) + if (!xlated_phys && mfn != INVALID_P2M_ENTRY) continue; frame = pfn; } @@ -230,6 +235,22 @@ static void __init xen_set_identity_and_release_chunk( *identity += set_phys_range_identity(start_pfn, end_pfn); } + +/* + * PVH: xen has already mapped the IO space in the EPT/NPT for us, so we + * just need to adjust the released and identity count. + */ +static void __init xen_pvh_adjust_stats(unsigned long start_pfn, + unsigned long end_pfn, unsigned long *released, + unsigned long *identity, unsigned long max_pfn) +{ + if (start_pfn <= max_pfn) { + unsigned long end = min(max_pfn_mapped, end_pfn); + *released += end - start_pfn; + } + *identity += end_pfn - start_pfn; +} + static unsigned long __init xen_set_identity_and_release( const struct e820entry *list, size_t map_size, unsigned long nr_pages) { @@ -238,6 +259,7 @@ static unsigned long __init xen_set_identity_and_release( unsigned long identity = 0; const struct e820entry *entry; int i; + int xlated_phys = xen_feature(XENFEAT_auto_translated_physmap); /* * Combine non-RAM regions and gaps until a RAM region (or the @@ -259,11 +281,17 @@ static unsigned long __init xen_set_identity_and_release( if (entry->type == E820_RAM) end_pfn = PFN_UP(entry->addr); - if (start_pfn < end_pfn) - xen_set_identity_and_release_chunk( - start_pfn, end_pfn, nr_pages, - &released, &identity); - + if (start_pfn < end_pfn) { + if (xlated_phys) { + xen_pvh_adjust_stats(start_pfn, + end_pfn, &released, &identity, + nr_pages); + } else { + xen_set_identity_and_release_chunk( + start_pfn, end_pfn, nr_pages, + &released, &identity); + } + } start = end; } } @@ -526,16 +554,14 @@ void __cpuinit xen_enable_syscall(void) #endif /* CONFIG_X86_64 */ } -void __init xen_arch_setup(void) +/* Non auto translated PV domain, ie, it's not PVH. */ +static __init void xen_pvmmu_arch_setup(void) { - xen_panic_handler_init(); - HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); - if (!xen_feature(XENFEAT_auto_translated_physmap)) - HYPERVISOR_vm_assist(VMASST_CMD_enable, - VMASST_TYPE_pae_extended_cr3); + HYPERVISOR_vm_assist(VMASST_CMD_enable, + VMASST_TYPE_pae_extended_cr3); if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) || register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback)) @@ -543,6 +569,15 @@ void __init xen_arch_setup(void) xen_enable_sysenter(); xen_enable_syscall(); +} + +/* This function not called for HVM domain */ +void __init xen_arch_setup(void) +{ + xen_panic_handler_init(); + + if (!xen_feature(XENFEAT_auto_translated_physmap)) + xen_pvmmu_arch_setup(); #ifdef CONFIG_ACPI if (!(xen_start_info->flags & SIF_INITDOMAIN)) { diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index c1367b29c3b1..3d8849963d51 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -73,9 +73,11 @@ static void __cpuinit cpu_bringup(void) touch_softlockup_watchdog(); preempt_disable(); - xen_enable_sysenter(); - xen_enable_syscall(); - + /* PVH runs in ring 0 and allows us to do native syscalls. Yay! */ + if (!xen_feature(XENFEAT_supervisor_mode_kernel)) { + xen_enable_sysenter(); + xen_enable_syscall(); + } cpu = smp_processor_id(); smp_store_cpu_info(cpu); cpu_data(cpu).x86_max_cores = 1; @@ -273,10 +275,11 @@ static void __init xen_smp_prepare_boot_cpu(void) BUG_ON(smp_processor_id() != 0); native_smp_prepare_boot_cpu(); - /* We've switched to the "real" per-cpu gdt, so make sure the - old memory can be recycled */ - make_lowmem_page_readwrite(xen_initial_gdt); - + if (!xen_feature(XENFEAT_writable_page_tables)) { + /* We've switched to the "real" per-cpu gdt, so make sure the + * old memory can be recycled */ + make_lowmem_page_readwrite(xen_initial_gdt); + } xen_filter_cpu_maps(); xen_setup_vcpu_info_placement(); } @@ -354,7 +357,23 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); - { + if (xen_feature(XENFEAT_auto_translated_physmap) && + xen_feature(XENFEAT_supervisor_mode_kernel)) { + /* Note: PVH is not supported on x86_32. */ +#ifdef CONFIG_X86_64 + ctxt->user_regs.ds = __KERNEL_DS; + ctxt->user_regs.es = 0; + ctxt->user_regs.gs = 0; + + /* GUEST_GDTR_BASE and */ + ctxt->u.pvh.gdtaddr = (unsigned long)gdt; + /* GUEST_GDTR_LIMIT in the VMCS. */ + ctxt->u.pvh.gdtsz = (unsigned long)(GDT_SIZE - 1); + + ctxt->gs_base_user = (unsigned long) + per_cpu(irq_stack_union.gs_base, cpu); +#endif + } else { ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ ctxt->user_regs.ds = __USER_DS; ctxt->user_regs.es = __USER_DS; @@ -369,8 +388,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) make_lowmem_page_readonly(gdt); make_lowmem_page_readonly(mfn_to_virt(gdt_mfn)); - ctxt->gdt_frames[0] = gdt_mfn; - ctxt->gdt_ents = GDT_ENTRIES; + ctxt->u.pv.gdt_frames[0] = gdt_mfn; + ctxt->u.pv.gdt_ents = GDT_ENTRIES; ctxt->kernel_ss = __KERNEL_DS; ctxt->kernel_sp = idle->thread.sp0; diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 7faed5869e5b..45226cb9493a 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -13,6 +13,14 @@ #include <xen/interface/elfnote.h> #include <asm/xen/interface.h> +#ifdef CONFIG_XEN_X86_PVH + +#define PVH_FEATURES_STR "|writable_descriptor_tables|auto_translated_physmap|supervisor_mode_kernel|hvm_callback_vector" + +#else +#define PVH_FEATURES_STR "" +#endif + __INIT ENTRY(startup_xen) cld @@ -95,7 +103,7 @@ NEXT_HYPERCALL(arch_6) #endif ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen) ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page) - ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") + ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .ascii "!writable_page_tables|pae_pgdir_above_4gb"; .asciz PVH_FEATURES_STR); ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, |