summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
authorKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2013-06-10 12:46:22 -0400
committerKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2013-06-10 12:46:22 -0400
commit6c63f3caff1731fdded6ba500201b01abab76d98 (patch)
treec1f1ff67ac59644ceef4472100c0428cd42e6ad6 /arch/x86
parent08f69f2aa0f6049ad620e0aa70730374a7884b93 (diff)
parent0e0764358c7833a8a2a721c8db6c30c3e7cd3df7 (diff)
Merge branch 'stable/pvh.v8' into linux-next
* stable/pvh.v8: x86/xen: remove depends on CONFIG_EXPERIMENTAL xen/pvh: Use ballooning to allocate grant table pages [v2] xen/pvh: remove code to map iomem from guest xen/pvh: specify xen features strings cleanly for PVH xen: implement updated XENMEM_add_to_physmap_range ABI x86/xen: Use __pa_symbol instead of __pa on C visible symbols xen/privcmd: fix condition in privcmd_close() xen/x86: remove duplicated include from enlighten.c xen: x86 pvh: use XENMEM_add_to_physmap_range for foreign gmfn mappings xen/pvh: balloon and grant changes. xen/pvh: bootup and setup (E820) related changes. xen/pvh: Implement MMU changes for PVH. xen/pvh: Extend vcpu_guest_context, p2m, event, and XenBus. xen/pvh: Support ParaVirtualized Hardware extensions. Conflicts: arch/x86/xen/mmu.c
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/xen/interface.h11
-rw-r--r--arch/x86/include/asm/xen/page.h3
-rw-r--r--arch/x86/xen/Kconfig8
-rw-r--r--arch/x86/xen/enlighten.c76
-rw-r--r--arch/x86/xen/irq.c5
-rw-r--r--arch/x86/xen/mmu.c158
-rw-r--r--arch/x86/xen/mmu.h2
-rw-r--r--arch/x86/xen/p2m.c2
-rw-r--r--arch/x86/xen/setup.c59
-rw-r--r--arch/x86/xen/smp.c39
-rw-r--r--arch/x86/xen/xen-head.S10
11 files changed, 315 insertions, 58 deletions
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
index fd9cb7695b5f..20e738a90763 100644
--- a/arch/x86/include/asm/xen/interface.h
+++ b/arch/x86/include/asm/xen/interface.h
@@ -145,7 +145,16 @@ struct vcpu_guest_context {
struct cpu_user_regs user_regs; /* User-level CPU registers */
struct trap_info trap_ctxt[256]; /* Virtual IDT */
unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */
- unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
+ union {
+ struct {
+ /* PV: GDT (machine frames, # ents).*/
+ unsigned long gdt_frames[16], gdt_ents;
+ } pv;
+ struct {
+ /* PVH: GDTR addr and size */
+ unsigned long gdtaddr, gdtsz;
+ } pvh;
+ } u;
unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */
/* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */
unsigned long ctrlreg[8]; /* CR0-CR7 (control registers) */
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 6aef9fbc09b7..3143a8a300ed 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -159,6 +159,9 @@ static inline xpaddr_t machine_to_phys(xmaddr_t machine)
static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
{
unsigned long pfn = mfn_to_pfn(mfn);
+
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return mfn;
if (get_phys_to_machine(pfn) != mfn)
return -1; /* force !pfn_valid() */
return pfn;
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 1a3c76505649..47316b9134bf 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -51,3 +51,11 @@ config XEN_DEBUG_FS
Enable statistics output and various tuning options in debugfs.
Enabling this option may incur a significant performance overhead.
+config XEN_X86_PVH
+ bool "Support for running as a PVH guest"
+ depends on X86_64 && XEN
+ default n
+ help
+ This option enables support for running as a PVH guest (PV guest
+ using hardware extensions) under a suitably capable hypervisor.
+ If unsure, say N.
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index a492be2635ac..591cf3cefc29 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -129,6 +129,9 @@ RESERVE_BRK(shared_info_page_brk, PAGE_SIZE);
__read_mostly int xen_have_vector_callback;
EXPORT_SYMBOL_GPL(xen_have_vector_callback);
+#define xen_pvh_domain() (xen_pv_domain() && \
+ xen_feature(XENFEAT_auto_translated_physmap) && \
+ xen_have_vector_callback)
/*
* Point at some empty memory to start with. We map the real shared_info
* page as soon as fixmap is up and running.
@@ -262,8 +265,9 @@ static void __init xen_banner(void)
struct xen_extraversion extra;
HYPERVISOR_xen_version(XENVER_extraversion, &extra);
- printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
- pv_info.name);
+ pr_info("Booting paravirtualized kernel %son %s\n",
+ xen_feature(XENFEAT_auto_translated_physmap) ?
+ "with PVH extensions " : "", pv_info.name);
printk(KERN_INFO "Xen version: %d.%d%s%s\n",
version >> 16, version & 0xffff, extra.extraversion,
xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
@@ -331,12 +335,15 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
break;
}
- asm(XEN_EMULATE_PREFIX "cpuid"
- : "=a" (*ax),
- "=b" (*bx),
- "=c" (*cx),
- "=d" (*dx)
- : "0" (*ax), "2" (*cx));
+ if (xen_pvh_domain())
+ native_cpuid(ax, bx, cx, dx);
+ else
+ asm(XEN_EMULATE_PREFIX "cpuid"
+ : "=a" (*ax),
+ "=b" (*bx),
+ "=c" (*cx),
+ "=d" (*dx)
+ : "0" (*ax), "2" (*cx));
*bx &= maskebx;
*cx &= maskecx;
@@ -1122,6 +1129,10 @@ void xen_setup_shared_info(void)
HYPERVISOR_shared_info =
(struct shared_info *)__va(xen_start_info->shared_info);
+ /* PVH TBD/FIXME: vcpu info placement in phase 2 */
+ if (xen_pvh_domain())
+ return;
+
#ifndef CONFIG_SMP
/* In UP this is as good a place as any to set up shared info */
xen_setup_vcpu_info_placement();
@@ -1407,6 +1418,11 @@ static void __init xen_boot_params_init_edd(void)
*/
static void __init xen_setup_stackprotector(void)
{
+ /* PVH TBD/FIXME: investigate setup_stack_canary_segment */
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
+ switch_to_new_gdt(0);
+ return;
+ }
pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot;
pv_cpu_ops.load_gdt = xen_load_gdt_boot;
@@ -1417,6 +1433,19 @@ static void __init xen_setup_stackprotector(void)
pv_cpu_ops.load_gdt = xen_load_gdt;
}
+static void __init xen_pvh_early_guest_init(void)
+{
+ if (xen_feature(XENFEAT_hvm_callback_vector))
+ xen_have_vector_callback = 1;
+
+#ifdef CONFIG_X86_32
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
+ xen_raw_printk("ERROR: 32bit PVH guests are not supported\n");
+ BUG();
+ }
+#endif
+}
+
/* First C function to be called on Xen boot */
asmlinkage void __init xen_start_kernel(void)
{
@@ -1428,13 +1457,18 @@ asmlinkage void __init xen_start_kernel(void)
xen_domain_type = XEN_PV_DOMAIN;
+ xen_setup_features();
+ xen_pvh_early_guest_init();
xen_setup_machphys_mapping();
/* Install Xen paravirt ops */
pv_info = xen_info;
pv_init_ops = xen_init_ops;
- pv_cpu_ops = xen_cpu_ops;
pv_apic_ops = xen_apic_ops;
+ if (xen_pvh_domain())
+ pv_cpu_ops.cpuid = xen_cpuid;
+ else
+ pv_cpu_ops = xen_cpu_ops;
x86_init.resources.memory_setup = xen_memory_setup;
x86_init.oem.arch_setup = xen_arch_setup;
@@ -1466,8 +1500,6 @@ asmlinkage void __init xen_start_kernel(void)
/* Work out if we support NX */
x86_configure_nx();
- xen_setup_features();
-
/* Get mfn list */
if (!xen_feature(XENFEAT_auto_translated_physmap))
xen_build_dynamic_phys_to_machine();
@@ -1545,14 +1577,18 @@ asmlinkage void __init xen_start_kernel(void)
/* set the limit of our address space */
xen_reserve_top();
- /* We used to do this in xen_arch_setup, but that is too late on AMD
- * were early_cpu_init (run before ->arch_setup()) calls early_amd_init
- * which pokes 0xcf8 port.
- */
- set_iopl.iopl = 1;
- rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
- if (rc != 0)
- xen_raw_printk("physdev_op failed %d\n", rc);
+ /* PVH: runs at default kernel iopl of 0 */
+ if (!xen_pvh_domain()) {
+ /*
+ * We used to do this in xen_arch_setup, but that is too late
+ * on AMD were early_cpu_init (run before ->arch_setup()) calls
+ * early_amd_init which pokes 0xcf8 port.
+ */
+ set_iopl.iopl = 1;
+ rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
+ if (rc != 0)
+ xen_raw_printk("physdev_op failed %d\n", rc);
+ }
#ifdef CONFIG_X86_32
/* set up basic CPUID stuff */
@@ -1622,6 +1658,8 @@ asmlinkage void __init xen_start_kernel(void)
}
void __ref xen_hvm_init_shared_info(void)
+/* Use a pfn in RAM, may move to MMIO before kexec.
+ * This function also called for PVH dom0 */
{
int cpu;
struct xen_add_to_physmap xatp;
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 01a4dc015ae1..fcbe56acd88c 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -5,6 +5,7 @@
#include <xen/interface/xen.h>
#include <xen/interface/sched.h>
#include <xen/interface/vcpu.h>
+#include <xen/features.h>
#include <xen/events.h>
#include <asm/xen/hypercall.h>
@@ -129,6 +130,8 @@ static const struct pv_irq_ops xen_irq_ops __initconst = {
void __init xen_init_irq_ops(void)
{
- pv_irq_ops = xen_irq_ops;
+ /* For PVH we use default pv_irq_ops settings */
+ if (!xen_feature(XENFEAT_hvm_callback_vector))
+ pv_irq_ops = xen_irq_ops;
x86_init.irqs.intr_init = xen_init_IRQ;
}
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index fdc3ba28ca38..33c113aff6f8 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -74,6 +74,7 @@
#include <xen/interface/version.h>
#include <xen/interface/memory.h>
#include <xen/hvc-console.h>
+#include <xen/balloon.h>
#include "multicalls.h"
#include "mmu.h"
@@ -1207,6 +1208,8 @@ static void __init xen_pagetable_init(void)
#endif
paging_init();
xen_setup_shared_info();
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return;
#ifdef CONFIG_X86_64
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
unsigned long new_mfn_list;
@@ -1491,7 +1494,8 @@ static int xen_pgd_alloc(struct mm_struct *mm)
if (user_pgd != NULL) {
user_pgd[pgd_index(VSYSCALL_START)] =
- __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
+ __pgd(__pa_symbol(level3_user_vsyscall) |
+ _PAGE_TABLE);
ret = 0;
}
@@ -1556,6 +1560,10 @@ static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
{
struct mmuext_op op;
+
+ if (xen_feature(XENFEAT_writable_page_tables))
+ return;
+
op.cmd = cmd;
op.arg1.mfn = pfn_to_mfn(pfn);
if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
@@ -1753,6 +1761,10 @@ static void set_page_prot_flags(void *addr, pgprot_t prot, unsigned long flags)
unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
pte_t pte = pfn_pte(pfn, prot);
+ /* recall for PVH, page tables are native. */
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return;
+
if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags))
BUG();
}
@@ -1834,6 +1846,9 @@ static void convert_pfn_mfn(void *v)
pte_t *pte = v;
int i;
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return;
+
/* All levels are converted the same way, so just treat them
as ptes. */
for (i = 0; i < PTRS_PER_PTE; i++)
@@ -1853,6 +1868,7 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end,
(*pt_end)--;
}
}
+
/*
* Set up the initial kernel pagetable.
*
@@ -1863,6 +1879,7 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end,
* but that's enough to get __va working. We need to fill in the rest
* of the physical mapping once some sort of allocator has been set
* up.
+ * NOTE: for PVH, the page tables are native.
*/
void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
{
@@ -1940,10 +1957,13 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
* structure to attach it to, so make sure we just set kernel
* pgd.
*/
- xen_mc_batch();
- __xen_write_cr3(true, __pa(init_level4_pgt));
- xen_mc_issue(PARAVIRT_LAZY_CPU);
-
+ if (xen_feature(XENFEAT_writable_page_tables)) {
+ native_write_cr3(__pa_symbol(init_level4_pgt));
+ } else {
+ xen_mc_batch();
+ __xen_write_cr3(true, __pa_symbol(init_level4_pgt));
+ xen_mc_issue(PARAVIRT_LAZY_CPU);
+ }
/* We can't that easily rip out L3 and L2, as the Xen pagetables are
* set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for
* the initial domain. For guests using the toolstack, they are in:
@@ -1964,10 +1984,10 @@ static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD);
static void __init xen_write_cr3_init(unsigned long cr3)
{
- unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir));
+ unsigned long pfn = PFN_DOWN(__pa_symbol(swapper_pg_dir));
- BUG_ON(read_cr3() != __pa(initial_page_table));
- BUG_ON(cr3 != __pa(swapper_pg_dir));
+ BUG_ON(read_cr3() != __pa_symbol(initial_page_table));
+ BUG_ON(cr3 != __pa_symbol(swapper_pg_dir));
/*
* We are switching to swapper_pg_dir for the first time (from
@@ -1991,7 +2011,7 @@ static void __init xen_write_cr3_init(unsigned long cr3)
pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, pfn);
pin_pagetable_pfn(MMUEXT_UNPIN_TABLE,
- PFN_DOWN(__pa(initial_page_table)));
+ PFN_DOWN(__pa_symbol(initial_page_table)));
set_page_prot(initial_page_table, PAGE_KERNEL);
set_page_prot(initial_kernel_pmd, PAGE_KERNEL);
@@ -2016,7 +2036,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
copy_page(initial_page_table, pgd);
initial_page_table[KERNEL_PGD_BOUNDARY] =
- __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT);
+ __pgd(__pa_symbol(initial_kernel_pmd) | _PAGE_PRESENT);
set_page_prot(initial_kernel_pmd, PAGE_KERNEL_RO);
set_page_prot(initial_page_table, PAGE_KERNEL_RO);
@@ -2025,8 +2045,8 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE,
- PFN_DOWN(__pa(initial_page_table)));
- xen_write_cr3(__pa(initial_page_table));
+ PFN_DOWN(__pa_symbol(initial_page_table)));
+ xen_write_cr3(__pa_symbol(initial_page_table));
memblock_reserve(__pa(xen_start_info->pt_base),
xen_start_info->nr_pt_frames * PAGE_SIZE);
@@ -2207,6 +2227,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
void __init xen_init_mmu_ops(void)
{
x86_init.paging.pagetable_init = xen_pagetable_init;
+
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
+ pv_mmu_ops.flush_tlb_others = xen_flush_tlb_others;
+ return;
+ }
pv_mmu_ops = xen_mmu_ops;
memset(dummy_mapping, 0xff, PAGE_SIZE);
@@ -2482,6 +2507,97 @@ void __init xen_hvm_init_mmu_ops(void)
}
#endif
+/* Map foreign gmfn, fgmfn, to local pfn, lpfn. This for the user space
+ * creating new guest on PVH dom0 and needs to map domU pages.
+ */
+static int pvh_add_to_xen_p2m(unsigned long lpfn, unsigned long fgmfn,
+ unsigned int domid)
+{
+ int rc;
+ struct xen_add_to_physmap_range xatp = {
+ .domid = DOMID_SELF,
+ .foreign_domid = domid,
+ .size = 1,
+ .space = XENMAPSPACE_gmfn_foreign,
+ };
+ xen_ulong_t idx = fgmfn;
+ xen_pfn_t gpfn = lpfn;
+ int err = 0;
+
+ set_xen_guest_handle(xatp.idxs, &idx);
+ set_xen_guest_handle(xatp.gpfns, &gpfn);
+ set_xen_guest_handle(xatp.errs, &err);
+
+ rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap_range, &xatp);
+ if (rc || err)
+ pr_warn("d0: Failed to map pfn (0x%lx) to mfn (0x%lx) rc:%d:%d\n",
+ lpfn, fgmfn, rc, err);
+ return rc;
+}
+
+static int pvh_rem_xen_p2m(unsigned long spfn, int count)
+{
+ struct xen_remove_from_physmap xrp;
+ int i, rc;
+
+ for (i = 0; i < count; i++) {
+ xrp.domid = DOMID_SELF;
+ xrp.gpfn = spfn+i;
+ rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp);
+ if (rc) {
+ pr_warn("Failed to unmap pfn:%lx rc:%d done:%d\n",
+ spfn+i, rc, i);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+struct pvh_remap_data {
+ unsigned long fgmfn; /* foreign domain's gmfn */
+ pgprot_t prot;
+ domid_t domid;
+ int index;
+ struct page **pages;
+};
+
+static int pvh_map_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr,
+ void *data)
+{
+ int rc;
+ struct pvh_remap_data *remap = data;
+ unsigned long pfn = page_to_pfn(remap->pages[remap->index++]);
+ pte_t pteval = pte_mkspecial(pfn_pte(pfn, remap->prot));
+
+ rc = pvh_add_to_xen_p2m(pfn, remap->fgmfn, remap->domid);
+ if (rc)
+ return rc;
+ native_set_pte(ptep, pteval);
+
+ return 0;
+}
+
+static int pvh_remap_gmfn_range(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long mfn, int nr,
+ pgprot_t prot, unsigned domid,
+ struct page **pages)
+{
+ int err;
+ struct pvh_remap_data pvhdata;
+
+ BUG_ON(!pages);
+
+ pvhdata.fgmfn = mfn;
+ pvhdata.prot = prot;
+ pvhdata.domid = domid;
+ pvhdata.index = 0;
+ pvhdata.pages = pages;
+ err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT,
+ pvh_map_pte_fn, &pvhdata);
+ flush_tlb_all();
+ return err;
+}
+
#define REMAP_BATCH_SIZE 16
struct remap_data {
@@ -2523,6 +2639,10 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
+ /* We need to update the local page tables and the xen HAP */
+ return pvh_remap_gmfn_range(vma, addr, mfn, nr, prot, domid, pages);
+ }
rmd.mfn = mfn;
rmd.prot = prot;
@@ -2560,6 +2680,18 @@ int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
if (!pages || !xen_feature(XENFEAT_auto_translated_physmap))
return 0;
- return -EINVAL;
+ while (numpgs--) {
+
+ /* the mmu has already cleaned up the process mmu resources at
+ * this point (lookup_address will return NULL). */
+ unsigned long pfn = page_to_pfn(pages[numpgs]);
+
+ pvh_rem_xen_p2m(pfn, 1);
+ }
+ /* We don't need to flush tlbs because as part of pvh_rem_xen_p2m(),
+ * the hypervisor will do tlb flushes after removing the p2m entries
+ * from the EPT/NPT */
+
+ return 0;
}
EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range);
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index 73809bb951b4..6d0bb56a6e82 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -23,4 +23,6 @@ unsigned long xen_read_cr2_direct(void);
extern void xen_init_mmu_ops(void);
extern void xen_hvm_init_mmu_ops(void);
+extern void xen_set_clr_mmio_pvh_pte(unsigned long pfn, unsigned long mfn,
+ int nr_mfns, int add_mapping);
#endif /* _XEN_MMU_H */
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 95fb2aa5927e..ea553c864ff3 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -798,7 +798,7 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
{
unsigned topidx, mididx, idx;
- if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
return true;
}
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 94eac5c85cdc..c4874c05774b 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -27,6 +27,7 @@
#include <xen/interface/memory.h>
#include <xen/interface/physdev.h>
#include <xen/features.h>
+#include "mmu.h"
#include "xen-ops.h"
#include "vdso.h"
@@ -78,6 +79,9 @@ static void __init xen_add_extra_mem(u64 start, u64 size)
memblock_reserve(start, size);
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return;
+
xen_max_p2m_pfn = PFN_DOWN(start + size);
for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) {
unsigned long mfn = pfn_to_mfn(pfn);
@@ -100,6 +104,7 @@ static unsigned long __init xen_do_chunk(unsigned long start,
.domid = DOMID_SELF
};
unsigned long len = 0;
+ int xlated_phys = xen_feature(XENFEAT_auto_translated_physmap);
unsigned long pfn;
int ret;
@@ -113,7 +118,7 @@ static unsigned long __init xen_do_chunk(unsigned long start,
continue;
frame = mfn;
} else {
- if (mfn != INVALID_P2M_ENTRY)
+ if (!xlated_phys && mfn != INVALID_P2M_ENTRY)
continue;
frame = pfn;
}
@@ -230,6 +235,22 @@ static void __init xen_set_identity_and_release_chunk(
*identity += set_phys_range_identity(start_pfn, end_pfn);
}
+
+/*
+ * PVH: xen has already mapped the IO space in the EPT/NPT for us, so we
+ * just need to adjust the released and identity count.
+ */
+static void __init xen_pvh_adjust_stats(unsigned long start_pfn,
+ unsigned long end_pfn, unsigned long *released,
+ unsigned long *identity, unsigned long max_pfn)
+{
+ if (start_pfn <= max_pfn) {
+ unsigned long end = min(max_pfn_mapped, end_pfn);
+ *released += end - start_pfn;
+ }
+ *identity += end_pfn - start_pfn;
+}
+
static unsigned long __init xen_set_identity_and_release(
const struct e820entry *list, size_t map_size, unsigned long nr_pages)
{
@@ -238,6 +259,7 @@ static unsigned long __init xen_set_identity_and_release(
unsigned long identity = 0;
const struct e820entry *entry;
int i;
+ int xlated_phys = xen_feature(XENFEAT_auto_translated_physmap);
/*
* Combine non-RAM regions and gaps until a RAM region (or the
@@ -259,11 +281,17 @@ static unsigned long __init xen_set_identity_and_release(
if (entry->type == E820_RAM)
end_pfn = PFN_UP(entry->addr);
- if (start_pfn < end_pfn)
- xen_set_identity_and_release_chunk(
- start_pfn, end_pfn, nr_pages,
- &released, &identity);
-
+ if (start_pfn < end_pfn) {
+ if (xlated_phys) {
+ xen_pvh_adjust_stats(start_pfn,
+ end_pfn, &released, &identity,
+ nr_pages);
+ } else {
+ xen_set_identity_and_release_chunk(
+ start_pfn, end_pfn, nr_pages,
+ &released, &identity);
+ }
+ }
start = end;
}
}
@@ -526,16 +554,14 @@ void __cpuinit xen_enable_syscall(void)
#endif /* CONFIG_X86_64 */
}
-void __init xen_arch_setup(void)
+/* Non auto translated PV domain, ie, it's not PVH. */
+static __init void xen_pvmmu_arch_setup(void)
{
- xen_panic_handler_init();
-
HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
- if (!xen_feature(XENFEAT_auto_translated_physmap))
- HYPERVISOR_vm_assist(VMASST_CMD_enable,
- VMASST_TYPE_pae_extended_cr3);
+ HYPERVISOR_vm_assist(VMASST_CMD_enable,
+ VMASST_TYPE_pae_extended_cr3);
if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) ||
register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback))
@@ -543,6 +569,15 @@ void __init xen_arch_setup(void)
xen_enable_sysenter();
xen_enable_syscall();
+}
+
+/* This function not called for HVM domain */
+void __init xen_arch_setup(void)
+{
+ xen_panic_handler_init();
+
+ if (!xen_feature(XENFEAT_auto_translated_physmap))
+ xen_pvmmu_arch_setup();
#ifdef CONFIG_ACPI
if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index c1367b29c3b1..3d8849963d51 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -73,9 +73,11 @@ static void __cpuinit cpu_bringup(void)
touch_softlockup_watchdog();
preempt_disable();
- xen_enable_sysenter();
- xen_enable_syscall();
-
+ /* PVH runs in ring 0 and allows us to do native syscalls. Yay! */
+ if (!xen_feature(XENFEAT_supervisor_mode_kernel)) {
+ xen_enable_sysenter();
+ xen_enable_syscall();
+ }
cpu = smp_processor_id();
smp_store_cpu_info(cpu);
cpu_data(cpu).x86_max_cores = 1;
@@ -273,10 +275,11 @@ static void __init xen_smp_prepare_boot_cpu(void)
BUG_ON(smp_processor_id() != 0);
native_smp_prepare_boot_cpu();
- /* We've switched to the "real" per-cpu gdt, so make sure the
- old memory can be recycled */
- make_lowmem_page_readwrite(xen_initial_gdt);
-
+ if (!xen_feature(XENFEAT_writable_page_tables)) {
+ /* We've switched to the "real" per-cpu gdt, so make sure the
+ * old memory can be recycled */
+ make_lowmem_page_readwrite(xen_initial_gdt);
+ }
xen_filter_cpu_maps();
xen_setup_vcpu_info_placement();
}
@@ -354,7 +357,23 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
- {
+ if (xen_feature(XENFEAT_auto_translated_physmap) &&
+ xen_feature(XENFEAT_supervisor_mode_kernel)) {
+ /* Note: PVH is not supported on x86_32. */
+#ifdef CONFIG_X86_64
+ ctxt->user_regs.ds = __KERNEL_DS;
+ ctxt->user_regs.es = 0;
+ ctxt->user_regs.gs = 0;
+
+ /* GUEST_GDTR_BASE and */
+ ctxt->u.pvh.gdtaddr = (unsigned long)gdt;
+ /* GUEST_GDTR_LIMIT in the VMCS. */
+ ctxt->u.pvh.gdtsz = (unsigned long)(GDT_SIZE - 1);
+
+ ctxt->gs_base_user = (unsigned long)
+ per_cpu(irq_stack_union.gs_base, cpu);
+#endif
+ } else {
ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
ctxt->user_regs.ds = __USER_DS;
ctxt->user_regs.es = __USER_DS;
@@ -369,8 +388,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
make_lowmem_page_readonly(gdt);
make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));
- ctxt->gdt_frames[0] = gdt_mfn;
- ctxt->gdt_ents = GDT_ENTRIES;
+ ctxt->u.pv.gdt_frames[0] = gdt_mfn;
+ ctxt->u.pv.gdt_ents = GDT_ENTRIES;
ctxt->kernel_ss = __KERNEL_DS;
ctxt->kernel_sp = idle->thread.sp0;
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 7faed5869e5b..45226cb9493a 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -13,6 +13,14 @@
#include <xen/interface/elfnote.h>
#include <asm/xen/interface.h>
+#ifdef CONFIG_XEN_X86_PVH
+
+#define PVH_FEATURES_STR "|writable_descriptor_tables|auto_translated_physmap|supervisor_mode_kernel|hvm_callback_vector"
+
+#else
+#define PVH_FEATURES_STR ""
+#endif
+
__INIT
ENTRY(startup_xen)
cld
@@ -95,7 +103,7 @@ NEXT_HYPERCALL(arch_6)
#endif
ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen)
ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page)
- ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb")
+ ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .ascii "!writable_page_tables|pae_pgdir_above_4gb"; .asciz PVH_FEATURES_STR);
ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes")
ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic")
ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,