From e08b96371625aaa84cb03f51acc4c8e0be27403a Mon Sep 17 00:00:00 2001 From: Carsten Otte Date: Wed, 4 Jan 2012 10:25:20 +0100 Subject: KVM: s390: add parameter for KVM_CREATE_VM This patch introduces a new config option for user controlled kernel virtual machines. It introduces a parameter to KVM_CREATE_VM that allows to set bits that alter the capabilities of the newly created virtual machine. The parameter is passed to kvm_arch_init_vm for all architectures. The only valid modifier bit for now is KVM_VM_S390_UCONTROL. This requires CAP_SYS_ADMIN privileges and creates a user controlled virtual machine on s390 architectures. Signed-off-by: Carsten Otte Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/powerpc/kvm/powerpc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kvm/powerpc.c') diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 607fbdf24b84..83f244569874 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -171,8 +171,11 @@ void kvm_arch_check_processor_compat(void *rtn) *(int *)rtn = kvmppc_core_check_processor_compat(); } -int kvm_arch_init_vm(struct kvm *kvm) +int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { + if (type) + return -EINVAL; + return kvmppc_core_init_vm(kvm); } -- cgit v1.2.3 From 5b1c1493afe8d69909f9df3221bb2fffdf479f4a Mon Sep 17 00:00:00 2001 From: Carsten Otte Date: Wed, 4 Jan 2012 10:25:23 +0100 Subject: KVM: s390: ucontrol: export SIE control block to user This patch exports the s390 SIE hardware control block to userspace via the mapping of the vcpu file descriptor. In order to do so, a new arch callback named kvm_arch_vcpu_fault is introduced for all architectures. It allows to map architecture specific pages. Signed-off-by: Carsten Otte Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- Documentation/virtual/kvm/api.txt | 5 +++++ arch/ia64/kvm/kvm-ia64.c | 5 +++++ arch/powerpc/kvm/powerpc.c | 5 +++++ arch/s390/kvm/kvm-s390.c | 13 +++++++++++++ arch/x86/kvm/x86.c | 5 +++++ include/linux/kvm.h | 2 ++ include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 2 +- 8 files changed, 37 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kvm/powerpc.c') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 6e53ff51422f..5ebf47d99e56 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -218,6 +218,11 @@ allocation of vcpu ids. For example, if userspace wants single-threaded guest vcpus, it should make all vcpu ids be a multiple of the number of vcpus per vcore. +For virtual cpus that have been created with S390 user controlled virtual +machines, the resulting vcpu fd can be memory mapped at page offset +KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of the virtual +cpu's hardware control block. + 4.8 KVM_GET_DIRTY_LOG (vm ioctl) Capability: basic diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index df6b14194051..8ca7261e7b3d 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1566,6 +1566,11 @@ out: return r; } +int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) +{ + return VM_FAULT_SIGBUS; +} + int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_memory_slot old, diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 83f244569874..a5671616af86 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -659,6 +659,11 @@ out: return r; } +int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) +{ + return VM_FAULT_SIGBUS; +} + static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo) { u32 inst_lis = 0x3c000000; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index af05328aca25..d6bc65aeb950 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -763,6 +763,19 @@ long kvm_arch_vcpu_ioctl(struct file *filp, return r; } +int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) +{ +#ifdef CONFIG_KVM_S390_UCONTROL + if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET) + && (kvm_is_ucontrol(vcpu->kvm))) { + vmf->page = virt_to_page(vcpu->arch.sie_block); + get_page(vmf->page); + return 0; + } +#endif + return VM_FAULT_SIGBUS; +} + /* Section: memory related */ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 06925b4bcc27..a3ce196d21fe 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2814,6 +2814,11 @@ out: return r; } +int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) +{ + return VM_FAULT_SIGBUS; +} + static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr) { int ret; diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 7f686f6708b0..8f888df206a2 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -440,6 +440,8 @@ struct kvm_ppc_pvinfo { /* machine type bits, to be used as argument to KVM_CREATE_VM */ #define KVM_VM_S390_UCONTROL 1 +#define KVM_S390_SIE_PAGE_OFFSET 1 + /* * ioctls for /dev/kvm fds: */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 82375e145e64..d4d4d7092110 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -450,6 +450,7 @@ long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); +int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf); int kvm_dev_ioctl_check_extension(long ext); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 32e3b048a6cf..64be836f3348 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1657,7 +1657,7 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) page = virt_to_page(vcpu->kvm->coalesced_mmio_ring); #endif else - return VM_FAULT_SIGBUS; + return kvm_arch_vcpu_fault(vcpu, vmf); get_page(page); vmf->page = page; return 0; -- cgit v1.2.3 From dc83b8bc0256ee682506ed83853a98eaba529c6f Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Thu, 18 Aug 2011 15:25:21 -0500 Subject: KVM: PPC: e500: MMU API This implements a shared-memory API for giving host userspace access to the guest's TLB. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- Documentation/virtual/kvm/api.txt | 74 +++++++ arch/powerpc/include/asm/kvm.h | 35 ++++ arch/powerpc/include/asm/kvm_e500.h | 24 +-- arch/powerpc/include/asm/kvm_ppc.h | 5 + arch/powerpc/kvm/e500.c | 5 +- arch/powerpc/kvm/e500_emulate.c | 12 +- arch/powerpc/kvm/e500_tlb.c | 393 ++++++++++++++++++++++++------------ arch/powerpc/kvm/e500_tlb.h | 38 ++-- arch/powerpc/kvm/powerpc.c | 28 +++ include/linux/kvm.h | 18 ++ 10 files changed, 469 insertions(+), 163 deletions(-) (limited to 'arch/powerpc/kvm/powerpc.c') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 7ca696227d3a..bcd45d5afcab 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1409,6 +1409,38 @@ The following flags are defined: If datamatch flag is set, the event will be signaled only if the written value to the registered address is equal to datamatch in struct kvm_ioeventfd. +4.59 KVM_DIRTY_TLB + +Capability: KVM_CAP_SW_TLB +Architectures: ppc +Type: vcpu ioctl +Parameters: struct kvm_dirty_tlb (in) +Returns: 0 on success, -1 on error + +struct kvm_dirty_tlb { + __u64 bitmap; + __u32 num_dirty; +}; + +This must be called whenever userspace has changed an entry in the shared +TLB, prior to calling KVM_RUN on the associated vcpu. + +The "bitmap" field is the userspace address of an array. This array +consists of a number of bits, equal to the total number of TLB entries as +determined by the last successful call to KVM_CONFIG_TLB, rounded up to the +nearest multiple of 64. + +Each bit corresponds to one TLB entry, ordered the same as in the shared TLB +array. + +The array is little-endian: the bit 0 is the least significant bit of the +first byte, bit 8 is the least significant bit of the second byte, etc. +This avoids any complications with differing word sizes. + +The "num_dirty" field is a performance hint for KVM to determine whether it +should skip processing the bitmap and just invalidate everything. It must +be set to the number of set bits in the bitmap. + 4.62 KVM_CREATE_SPAPR_TCE Capability: KVM_CAP_SPAPR_TCE @@ -1842,3 +1874,45 @@ HTAB address part of SDR1 contains an HVA instead of a GPA, as PAPR keeps the HTAB invisible to the guest. When this capability is enabled, KVM_EXIT_PAPR_HCALL can occur. + +6.3 KVM_CAP_SW_TLB + +Architectures: ppc +Parameters: args[0] is the address of a struct kvm_config_tlb +Returns: 0 on success; -1 on error + +struct kvm_config_tlb { + __u64 params; + __u64 array; + __u32 mmu_type; + __u32 array_len; +}; + +Configures the virtual CPU's TLB array, establishing a shared memory area +between userspace and KVM. The "params" and "array" fields are userspace +addresses of mmu-type-specific data structures. The "array_len" field is an +safety mechanism, and should be set to the size in bytes of the memory that +userspace has reserved for the array. It must be at least the size dictated +by "mmu_type" and "params". + +While KVM_RUN is active, the shared region is under control of KVM. Its +contents are undefined, and any modification by userspace results in +boundedly undefined behavior. + +On return from KVM_RUN, the shared region will reflect the current state of +the guest's TLB. If userspace makes any changes, it must call KVM_DIRTY_TLB +to tell KVM which entries have been changed, prior to calling KVM_RUN again +on this vcpu. + +For mmu types KVM_MMU_FSL_BOOKE_NOHV and KVM_MMU_FSL_BOOKE_HV: + - The "params" field is of type "struct kvm_book3e_206_tlb_params". + - The "array" field points to an array of type "struct + kvm_book3e_206_tlb_entry". + - The array consists of all entries in the first TLB, followed by all + entries in the second TLB. + - Within a TLB, entries are ordered first by increasing set number. Within a + set, entries are ordered by way (increasing ESEL). + - The hash for determining set number in TLB0 is: (MAS2 >> 12) & (num_sets - 1) + where "num_sets" is the tlb_sizes[] value divided by the tlb_ways[] value. + - The tsize field of mas1 shall be set to 4K on TLB0, even though the + hardware ignores this value for TLB0. diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index 7d9d4de057ef..663c57f87165 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -296,4 +296,39 @@ struct kvm_allocate_rma { __u64 rma_size; }; +struct kvm_book3e_206_tlb_entry { + __u32 mas8; + __u32 mas1; + __u64 mas2; + __u64 mas7_3; +}; + +struct kvm_book3e_206_tlb_params { + /* + * For mmu types KVM_MMU_FSL_BOOKE_NOHV and KVM_MMU_FSL_BOOKE_HV: + * + * - The number of ways of TLB0 must be a power of two between 2 and + * 16. + * - TLB1 must be fully associative. + * - The size of TLB0 must be a multiple of the number of ways, and + * the number of sets must be a power of two. + * - The size of TLB1 may not exceed 64 entries. + * - TLB0 supports 4 KiB pages. + * - The page sizes supported by TLB1 are as indicated by + * TLB1CFG (if MMUCFG[MAVN] = 0) or TLB1PS (if MMUCFG[MAVN] = 1) + * as returned by KVM_GET_SREGS. + * - TLB2 and TLB3 are reserved, and their entries in tlb_sizes[] + * and tlb_ways[] must be zero. + * + * tlb_ways[n] = tlb_sizes[n] means the array is fully associative. + * + * KVM will adjust TLBnCFG based on the sizes configured here, + * though arrays greater than 2048 entries will have TLBnCFG[NENTRY] + * set to zero. + */ + __u32 tlb_sizes[4]; + __u32 tlb_ways[4]; + __u32 reserved[8]; +}; + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h index a5197d816ec7..bc17441535f2 100644 --- a/arch/powerpc/include/asm/kvm_e500.h +++ b/arch/powerpc/include/asm/kvm_e500.h @@ -22,13 +22,6 @@ #define E500_PID_NUM 3 #define E500_TLB_NUM 2 -struct tlbe{ - u32 mas1; - u32 mas2; - u32 mas3; - u32 mas7; -}; - #define E500_TLB_VALID 1 #define E500_TLB_DIRTY 2 @@ -48,13 +41,17 @@ struct kvmppc_e500_tlb_params { }; struct kvmppc_vcpu_e500 { - /* Unmodified copy of the guest's TLB. */ - struct tlbe *gtlb_arch[E500_TLB_NUM]; + /* Unmodified copy of the guest's TLB -- shared with host userspace. */ + struct kvm_book3e_206_tlb_entry *gtlb_arch; + + /* Starting entry number in gtlb_arch[] */ + int gtlb_offset[E500_TLB_NUM]; /* KVM internal information associated with each guest TLB entry */ struct tlbe_priv *gtlb_priv[E500_TLB_NUM]; - unsigned int gtlb_size[E500_TLB_NUM]; + struct kvmppc_e500_tlb_params gtlb_params[E500_TLB_NUM]; + unsigned int gtlb_nv[E500_TLB_NUM]; /* @@ -68,7 +65,6 @@ struct kvmppc_vcpu_e500 { * and back, and our host TLB entries got evicted). */ struct tlbe_ref *tlb_refs[E500_TLB_NUM]; - unsigned int host_tlb1_nv; u32 host_pid[E500_PID_NUM]; @@ -78,11 +74,10 @@ struct kvmppc_vcpu_e500 { u32 mas0; u32 mas1; u32 mas2; - u32 mas3; + u64 mas7_3; u32 mas4; u32 mas5; u32 mas6; - u32 mas7; /* vcpu id table */ struct vcpu_id_table *idt; @@ -95,6 +90,9 @@ struct kvmppc_vcpu_e500 { u32 tlb1cfg; u64 mcar; + struct page **shared_tlb_pages; + int num_shared_tlb_pages; + struct kvm_vcpu vcpu; }; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 46efd1a265c9..a284f209e2df 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -193,4 +193,9 @@ static inline void kvm_rma_init(void) {} #endif +int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, + struct kvm_config_tlb *cfg); +int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, + struct kvm_dirty_tlb *cfg); + #endif /* __POWERPC_KVM_PPC_H__ */ diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 8c0d45a6faf7..f17d7e732a1e 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -121,7 +121,7 @@ void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) sregs->u.e.mas0 = vcpu_e500->mas0; sregs->u.e.mas1 = vcpu_e500->mas1; sregs->u.e.mas2 = vcpu_e500->mas2; - sregs->u.e.mas7_3 = ((u64)vcpu_e500->mas7 << 32) | vcpu_e500->mas3; + sregs->u.e.mas7_3 = vcpu_e500->mas7_3; sregs->u.e.mas4 = vcpu_e500->mas4; sregs->u.e.mas6 = vcpu_e500->mas6; @@ -154,8 +154,7 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) vcpu_e500->mas0 = sregs->u.e.mas0; vcpu_e500->mas1 = sregs->u.e.mas1; vcpu_e500->mas2 = sregs->u.e.mas2; - vcpu_e500->mas7 = sregs->u.e.mas7_3 >> 32; - vcpu_e500->mas3 = (u32)sregs->u.e.mas7_3; + vcpu_e500->mas7_3 = sregs->u.e.mas7_3; vcpu_e500->mas4 = sregs->u.e.mas4; vcpu_e500->mas6 = sregs->u.e.mas6; } diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index d48ae396f41e..e0d36099c756 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -95,13 +95,17 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) case SPRN_MAS2: vcpu_e500->mas2 = spr_val; break; case SPRN_MAS3: - vcpu_e500->mas3 = spr_val; break; + vcpu_e500->mas7_3 &= ~(u64)0xffffffff; + vcpu_e500->mas7_3 |= spr_val; + break; case SPRN_MAS4: vcpu_e500->mas4 = spr_val; break; case SPRN_MAS6: vcpu_e500->mas6 = spr_val; break; case SPRN_MAS7: - vcpu_e500->mas7 = spr_val; break; + vcpu_e500->mas7_3 &= (u64)0xffffffff; + vcpu_e500->mas7_3 |= (u64)spr_val << 32; + break; case SPRN_L1CSR0: vcpu_e500->l1csr0 = spr_val; vcpu_e500->l1csr0 &= ~(L1CSR0_DCFI | L1CSR0_CLFC); @@ -158,13 +162,13 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) case SPRN_MAS2: kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas2); break; case SPRN_MAS3: - kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas3); break; + kvmppc_set_gpr(vcpu, rt, (u32)vcpu_e500->mas7_3); break; case SPRN_MAS4: kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas4); break; case SPRN_MAS6: kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas6); break; case SPRN_MAS7: - kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas7); break; + kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas7_3 >> 32); break; case SPRN_TLB0CFG: kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb0cfg); break; diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 59221bb1e00e..f19ae2f61521 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -19,6 +19,11 @@ #include #include #include +#include +#include +#include +#include +#include #include #include @@ -66,6 +71,13 @@ static DEFINE_PER_CPU(unsigned long, pcpu_last_used_sid); static struct kvmppc_e500_tlb_params host_tlb_params[E500_TLB_NUM]; +static struct kvm_book3e_206_tlb_entry *get_entry( + struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, int entry) +{ + int offset = vcpu_e500->gtlb_offset[tlbsel]; + return &vcpu_e500->gtlb_arch[offset + entry]; +} + /* * Allocate a free shadow id and setup a valid sid mapping in given entry. * A mapping is only valid when vcpu_id_table and pcpu_id_table are match. @@ -217,34 +229,13 @@ void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *vcpu_e500) preempt_enable(); } -void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu) -{ - struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - struct tlbe *tlbe; - int i, tlbsel; - - printk("| %8s | %8s | %8s | %8s | %8s |\n", - "nr", "mas1", "mas2", "mas3", "mas7"); - - for (tlbsel = 0; tlbsel < 2; tlbsel++) { - printk("Guest TLB%d:\n", tlbsel); - for (i = 0; i < vcpu_e500->gtlb_size[tlbsel]; i++) { - tlbe = &vcpu_e500->gtlb_arch[tlbsel][i]; - if (tlbe->mas1 & MAS1_VALID) - printk(" G[%d][%3d] | %08X | %08X | %08X | %08X |\n", - tlbsel, i, tlbe->mas1, tlbe->mas2, - tlbe->mas3, tlbe->mas7); - } - } -} - static inline unsigned int gtlb0_get_next_victim( struct kvmppc_vcpu_e500 *vcpu_e500) { unsigned int victim; victim = vcpu_e500->gtlb_nv[0]++; - if (unlikely(vcpu_e500->gtlb_nv[0] >= KVM_E500_TLB0_WAY_NUM)) + if (unlikely(vcpu_e500->gtlb_nv[0] >= vcpu_e500->gtlb_params[0].ways)) vcpu_e500->gtlb_nv[0] = 0; return victim; @@ -256,9 +247,9 @@ static inline unsigned int tlb1_max_shadow_size(void) return host_tlb_params[1].entries - tlbcam_index - 1; } -static inline int tlbe_is_writable(struct tlbe *tlbe) +static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe) { - return tlbe->mas3 & (MAS3_SW|MAS3_UW); + return tlbe->mas7_3 & (MAS3_SW|MAS3_UW); } static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode) @@ -289,39 +280,41 @@ static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode) /* * writing shadow tlb entry to host TLB */ -static inline void __write_host_tlbe(struct tlbe *stlbe, uint32_t mas0) +static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe, + uint32_t mas0) { unsigned long flags; local_irq_save(flags); mtspr(SPRN_MAS0, mas0); mtspr(SPRN_MAS1, stlbe->mas1); - mtspr(SPRN_MAS2, stlbe->mas2); - mtspr(SPRN_MAS3, stlbe->mas3); - mtspr(SPRN_MAS7, stlbe->mas7); + mtspr(SPRN_MAS2, (unsigned long)stlbe->mas2); + mtspr(SPRN_MAS3, (u32)stlbe->mas7_3); + mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32)); asm volatile("isync; tlbwe" : : : "memory"); local_irq_restore(flags); } /* esel is index into set, not whole array */ static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500, - int tlbsel, int esel, struct tlbe *stlbe) + int tlbsel, int esel, struct kvm_book3e_206_tlb_entry *stlbe) { if (tlbsel == 0) { - __write_host_tlbe(stlbe, MAS0_TLBSEL(0) | MAS0_ESEL(esel)); + int way = esel & (vcpu_e500->gtlb_params[0].ways - 1); + __write_host_tlbe(stlbe, MAS0_TLBSEL(0) | MAS0_ESEL(way)); } else { __write_host_tlbe(stlbe, MAS0_TLBSEL(1) | MAS0_ESEL(to_htlb1_esel(esel))); } trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2, - stlbe->mas3, stlbe->mas7); + (u32)stlbe->mas7_3, (u32)(stlbe->mas7_3 >> 32)); } void kvmppc_map_magic(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - struct tlbe magic; + struct kvm_book3e_206_tlb_entry magic; ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK; unsigned int stid; pfn_t pfn; @@ -335,9 +328,8 @@ void kvmppc_map_magic(struct kvm_vcpu *vcpu) magic.mas1 = MAS1_VALID | MAS1_TS | MAS1_TID(stid) | MAS1_TSIZE(BOOK3E_PAGESZ_4K); magic.mas2 = vcpu->arch.magic_page_ea | MAS2_M; - magic.mas3 = (pfn << PAGE_SHIFT) | - MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR; - magic.mas7 = pfn >> (32 - PAGE_SHIFT); + magic.mas7_3 = ((u64)pfn << PAGE_SHIFT) | + MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR; __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index)); preempt_enable(); @@ -358,7 +350,8 @@ void kvmppc_e500_tlb_put(struct kvm_vcpu *vcpu) static void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, int esel) { - struct tlbe *gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel]; + struct kvm_book3e_206_tlb_entry *gtlbe = + get_entry(vcpu_e500, tlbsel, esel); struct vcpu_id_table *idt = vcpu_e500->idt; unsigned int pr, tid, ts, pid; u32 val, eaddr; @@ -424,9 +417,8 @@ static int tlb0_set_base(gva_t addr, int sets, int ways) static int gtlb0_set_base(struct kvmppc_vcpu_e500 *vcpu_e500, gva_t addr) { - int sets = KVM_E500_TLB0_SIZE / KVM_E500_TLB0_WAY_NUM; - - return tlb0_set_base(addr, sets, KVM_E500_TLB0_WAY_NUM); + return tlb0_set_base(addr, vcpu_e500->gtlb_params[0].sets, + vcpu_e500->gtlb_params[0].ways); } static int htlb0_set_base(gva_t addr) @@ -440,10 +432,10 @@ static unsigned int get_tlb_esel(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel) unsigned int esel = get_tlb_esel_bit(vcpu_e500); if (tlbsel == 0) { - esel &= KVM_E500_TLB0_WAY_NUM_MASK; + esel &= vcpu_e500->gtlb_params[0].ways - 1; esel += gtlb0_set_base(vcpu_e500, vcpu_e500->mas2); } else { - esel &= vcpu_e500->gtlb_size[tlbsel] - 1; + esel &= vcpu_e500->gtlb_params[tlbsel].entries - 1; } return esel; @@ -453,19 +445,22 @@ static unsigned int get_tlb_esel(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel) static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500, gva_t eaddr, int tlbsel, unsigned int pid, int as) { - int size = vcpu_e500->gtlb_size[tlbsel]; - unsigned int set_base; + int size = vcpu_e500->gtlb_params[tlbsel].entries; + unsigned int set_base, offset; int i; if (tlbsel == 0) { set_base = gtlb0_set_base(vcpu_e500, eaddr); - size = KVM_E500_TLB0_WAY_NUM; + size = vcpu_e500->gtlb_params[0].ways; } else { set_base = 0; } + offset = vcpu_e500->gtlb_offset[tlbsel]; + for (i = 0; i < size; i++) { - struct tlbe *tlbe = &vcpu_e500->gtlb_arch[tlbsel][set_base + i]; + struct kvm_book3e_206_tlb_entry *tlbe = + &vcpu_e500->gtlb_arch[offset + set_base + i]; unsigned int tid; if (eaddr < get_tlb_eaddr(tlbe)) @@ -491,7 +486,7 @@ static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500, } static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, - struct tlbe *gtlbe, + struct kvm_book3e_206_tlb_entry *gtlbe, pfn_t pfn) { ref->pfn = pfn; @@ -518,7 +513,7 @@ static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500) int tlbsel = 0; int i; - for (i = 0; i < vcpu_e500->gtlb_size[tlbsel]; i++) { + for (i = 0; i < vcpu_e500->gtlb_params[tlbsel].entries; i++) { struct tlbe_ref *ref = &vcpu_e500->gtlb_priv[tlbsel][i].ref; kvmppc_e500_ref_release(ref); @@ -530,6 +525,8 @@ static void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500) int stlbsel = 1; int i; + kvmppc_e500_id_table_reset_all(vcpu_e500); + for (i = 0; i < host_tlb_params[stlbsel].entries; i++) { struct tlbe_ref *ref = &vcpu_e500->tlb_refs[stlbsel][i]; @@ -559,18 +556,18 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu, | MAS1_TSIZE(tsized); vcpu_e500->mas2 = (eaddr & MAS2_EPN) | (vcpu_e500->mas4 & MAS2_ATTRIB_MASK); - vcpu_e500->mas3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3; + vcpu_e500->mas7_3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3; vcpu_e500->mas6 = (vcpu_e500->mas6 & MAS6_SPID1) | (get_cur_pid(vcpu) << 16) | (as ? MAS6_SAS : 0); - vcpu_e500->mas7 = 0; } /* TID must be supplied by the caller */ -static inline void kvmppc_e500_setup_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500, - struct tlbe *gtlbe, int tsize, - struct tlbe_ref *ref, - u64 gvaddr, struct tlbe *stlbe) +static inline void kvmppc_e500_setup_stlbe( + struct kvmppc_vcpu_e500 *vcpu_e500, + struct kvm_book3e_206_tlb_entry *gtlbe, + int tsize, struct tlbe_ref *ref, u64 gvaddr, + struct kvm_book3e_206_tlb_entry *stlbe) { pfn_t pfn = ref->pfn; @@ -581,16 +578,16 @@ static inline void kvmppc_e500_setup_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500, stlbe->mas2 = (gvaddr & MAS2_EPN) | e500_shadow_mas2_attrib(gtlbe->mas2, vcpu_e500->vcpu.arch.shared->msr & MSR_PR); - stlbe->mas3 = ((pfn << PAGE_SHIFT) & MAS3_RPN) - | e500_shadow_mas3_attrib(gtlbe->mas3, + stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) + | e500_shadow_mas3_attrib(gtlbe->mas7_3, vcpu_e500->vcpu.arch.shared->msr & MSR_PR); - stlbe->mas7 = (pfn >> (32 - PAGE_SHIFT)) & MAS7_RPN; } /* sesel is an index into the entire array, not just the set */ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, - u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, int tlbsel, int sesel, - struct tlbe *stlbe, struct tlbe_ref *ref) + u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe, + int tlbsel, int sesel, struct kvm_book3e_206_tlb_entry *stlbe, + struct tlbe_ref *ref) { struct kvm_memory_slot *slot; unsigned long pfn, hva; @@ -700,15 +697,16 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, /* XXX only map the one-one case, for now use TLB0 */ static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, - int esel, struct tlbe *stlbe) + int esel, + struct kvm_book3e_206_tlb_entry *stlbe) { - struct tlbe *gtlbe; + struct kvm_book3e_206_tlb_entry *gtlbe; struct tlbe_ref *ref; int sesel = esel & (host_tlb_params[0].ways - 1); int sesel_base; gva_t ea; - gtlbe = &vcpu_e500->gtlb_arch[0][esel]; + gtlbe = get_entry(vcpu_e500, 0, esel); ref = &vcpu_e500->gtlb_priv[0][esel].ref; ea = get_tlb_eaddr(gtlbe); @@ -725,7 +723,8 @@ static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, * the shadow TLB. */ /* XXX for both one-one and one-to-many , for now use TLB1 */ static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, - u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, struct tlbe *stlbe) + u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe, + struct kvm_book3e_206_tlb_entry *stlbe) { struct tlbe_ref *ref; unsigned int victim; @@ -754,7 +753,8 @@ static inline int kvmppc_e500_gtlbe_invalidate( struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, int esel) { - struct tlbe *gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel]; + struct kvm_book3e_206_tlb_entry *gtlbe = + get_entry(vcpu_e500, tlbsel, esel); if (unlikely(get_tlb_iprot(gtlbe))) return -1; @@ -769,10 +769,10 @@ int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value) int esel; if (value & MMUCSR0_TLB0FI) - for (esel = 0; esel < vcpu_e500->gtlb_size[0]; esel++) + for (esel = 0; esel < vcpu_e500->gtlb_params[0].entries; esel++) kvmppc_e500_gtlbe_invalidate(vcpu_e500, 0, esel); if (value & MMUCSR0_TLB1FI) - for (esel = 0; esel < vcpu_e500->gtlb_size[1]; esel++) + for (esel = 0; esel < vcpu_e500->gtlb_params[1].entries; esel++) kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel); /* Invalidate all vcpu id mappings */ @@ -797,7 +797,8 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb) if (ia) { /* invalidate all entries */ - for (esel = 0; esel < vcpu_e500->gtlb_size[tlbsel]; esel++) + for (esel = 0; esel < vcpu_e500->gtlb_params[tlbsel].entries; + esel++) kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel); } else { ea &= 0xfffff000; @@ -817,18 +818,17 @@ int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); int tlbsel, esel; - struct tlbe *gtlbe; + struct kvm_book3e_206_tlb_entry *gtlbe; tlbsel = get_tlb_tlbsel(vcpu_e500); esel = get_tlb_esel(vcpu_e500, tlbsel); - gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel]; + gtlbe = get_entry(vcpu_e500, tlbsel, esel); vcpu_e500->mas0 &= ~MAS0_NV(~0); vcpu_e500->mas0 |= MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); vcpu_e500->mas1 = gtlbe->mas1; vcpu_e500->mas2 = gtlbe->mas2; - vcpu_e500->mas3 = gtlbe->mas3; - vcpu_e500->mas7 = gtlbe->mas7; + vcpu_e500->mas7_3 = gtlbe->mas7_3; return EMULATE_DONE; } @@ -839,7 +839,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb) int as = !!get_cur_sas(vcpu_e500); unsigned int pid = get_cur_spid(vcpu_e500); int esel, tlbsel; - struct tlbe *gtlbe = NULL; + struct kvm_book3e_206_tlb_entry *gtlbe = NULL; gva_t ea; ea = kvmppc_get_gpr(vcpu, rb); @@ -847,7 +847,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb) for (tlbsel = 0; tlbsel < 2; tlbsel++) { esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as); if (esel >= 0) { - gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel]; + gtlbe = get_entry(vcpu_e500, tlbsel, esel); break; } } @@ -857,8 +857,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb) | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); vcpu_e500->mas1 = gtlbe->mas1; vcpu_e500->mas2 = gtlbe->mas2; - vcpu_e500->mas3 = gtlbe->mas3; - vcpu_e500->mas7 = gtlbe->mas7; + vcpu_e500->mas7_3 = gtlbe->mas7_3; } else { int victim; @@ -873,8 +872,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb) | (vcpu_e500->mas4 & MAS4_TSIZED(~0)); vcpu_e500->mas2 &= MAS2_EPN; vcpu_e500->mas2 |= vcpu_e500->mas4 & MAS2_ATTRIB_MASK; - vcpu_e500->mas3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3; - vcpu_e500->mas7 = 0; + vcpu_e500->mas7_3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3; } kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS); @@ -883,8 +881,8 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb) /* sesel is index into the set, not the whole array */ static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500, - struct tlbe *gtlbe, - struct tlbe *stlbe, + struct kvm_book3e_206_tlb_entry *gtlbe, + struct kvm_book3e_206_tlb_entry *stlbe, int stlbsel, int sesel) { int stid; @@ -902,28 +900,27 @@ static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500, int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - struct tlbe *gtlbe; + struct kvm_book3e_206_tlb_entry *gtlbe; int tlbsel, esel; tlbsel = get_tlb_tlbsel(vcpu_e500); esel = get_tlb_esel(vcpu_e500, tlbsel); - gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel]; + gtlbe = get_entry(vcpu_e500, tlbsel, esel); if (get_tlb_v(gtlbe)) inval_gtlbe_on_host(vcpu_e500, tlbsel, esel); gtlbe->mas1 = vcpu_e500->mas1; gtlbe->mas2 = vcpu_e500->mas2; - gtlbe->mas3 = vcpu_e500->mas3; - gtlbe->mas7 = vcpu_e500->mas7; + gtlbe->mas7_3 = vcpu_e500->mas7_3; trace_kvm_gtlb_write(vcpu_e500->mas0, gtlbe->mas1, gtlbe->mas2, - gtlbe->mas3, gtlbe->mas7); + (u32)gtlbe->mas7_3, (u32)(gtlbe->mas7_3 >> 32)); /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */ if (tlbe_is_host_safe(vcpu, gtlbe)) { - struct tlbe stlbe; + struct kvm_book3e_206_tlb_entry stlbe; int stlbsel, sesel; u64 eaddr; u64 raddr; @@ -996,9 +993,11 @@ gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index, gva_t eaddr) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - struct tlbe *gtlbe = - &vcpu_e500->gtlb_arch[tlbsel_of(index)][esel_of(index)]; - u64 pgmask = get_tlb_bytes(gtlbe) - 1; + struct kvm_book3e_206_tlb_entry *gtlbe; + u64 pgmask; + + gtlbe = get_entry(vcpu_e500, tlbsel_of(index), esel_of(index)); + pgmask = get_tlb_bytes(gtlbe) - 1; return get_tlb_raddr(gtlbe) | (eaddr & pgmask); } @@ -1012,12 +1011,12 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); struct tlbe_priv *priv; - struct tlbe *gtlbe, stlbe; + struct kvm_book3e_206_tlb_entry *gtlbe, stlbe; int tlbsel = tlbsel_of(index); int esel = esel_of(index); int stlbsel, sesel; - gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel]; + gtlbe = get_entry(vcpu_e500, tlbsel, esel); switch (tlbsel) { case 0: @@ -1073,25 +1072,174 @@ void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid) void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500) { - struct tlbe *tlbe; + struct kvm_book3e_206_tlb_entry *tlbe; /* Insert large initial mapping for guest. */ - tlbe = &vcpu_e500->gtlb_arch[1][0]; + tlbe = get_entry(vcpu_e500, 1, 0); tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M); tlbe->mas2 = 0; - tlbe->mas3 = E500_TLB_SUPER_PERM_MASK; - tlbe->mas7 = 0; + tlbe->mas7_3 = E500_TLB_SUPER_PERM_MASK; /* 4K map for serial output. Used by kernel wrapper. */ - tlbe = &vcpu_e500->gtlb_arch[1][1]; + tlbe = get_entry(vcpu_e500, 1, 1); tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K); tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G; - tlbe->mas3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK; - tlbe->mas7 = 0; + tlbe->mas7_3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK; +} + +static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + int i; + + clear_tlb_refs(vcpu_e500); + kfree(vcpu_e500->gtlb_priv[0]); + kfree(vcpu_e500->gtlb_priv[1]); + + if (vcpu_e500->shared_tlb_pages) { + vfree((void *)(round_down((uintptr_t)vcpu_e500->gtlb_arch, + PAGE_SIZE))); + + for (i = 0; i < vcpu_e500->num_shared_tlb_pages; i++) { + set_page_dirty_lock(vcpu_e500->shared_tlb_pages[i]); + put_page(vcpu_e500->shared_tlb_pages[i]); + } + + vcpu_e500->num_shared_tlb_pages = 0; + vcpu_e500->shared_tlb_pages = NULL; + } else { + kfree(vcpu_e500->gtlb_arch); + } + + vcpu_e500->gtlb_arch = NULL; +} + +int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, + struct kvm_config_tlb *cfg) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + struct kvm_book3e_206_tlb_params params; + char *virt; + struct page **pages; + struct tlbe_priv *privs[2] = {}; + size_t array_len; + u32 sets; + int num_pages, ret, i; + + if (cfg->mmu_type != KVM_MMU_FSL_BOOKE_NOHV) + return -EINVAL; + + if (copy_from_user(¶ms, (void __user *)(uintptr_t)cfg->params, + sizeof(params))) + return -EFAULT; + + if (params.tlb_sizes[1] > 64) + return -EINVAL; + if (params.tlb_ways[1] != params.tlb_sizes[1]) + return -EINVAL; + if (params.tlb_sizes[2] != 0 || params.tlb_sizes[3] != 0) + return -EINVAL; + if (params.tlb_ways[2] != 0 || params.tlb_ways[3] != 0) + return -EINVAL; + + if (!is_power_of_2(params.tlb_ways[0])) + return -EINVAL; + + sets = params.tlb_sizes[0] >> ilog2(params.tlb_ways[0]); + if (!is_power_of_2(sets)) + return -EINVAL; + + array_len = params.tlb_sizes[0] + params.tlb_sizes[1]; + array_len *= sizeof(struct kvm_book3e_206_tlb_entry); + + if (cfg->array_len < array_len) + return -EINVAL; + + num_pages = DIV_ROUND_UP(cfg->array + array_len - 1, PAGE_SIZE) - + cfg->array / PAGE_SIZE; + pages = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL); + if (!pages) + return -ENOMEM; + + ret = get_user_pages_fast(cfg->array, num_pages, 1, pages); + if (ret < 0) + goto err_pages; + + if (ret != num_pages) { + num_pages = ret; + ret = -EFAULT; + goto err_put_page; + } + + virt = vmap(pages, num_pages, VM_MAP, PAGE_KERNEL); + if (!virt) + goto err_put_page; + + privs[0] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[0], + GFP_KERNEL); + privs[1] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[1], + GFP_KERNEL); + + if (!privs[0] || !privs[1]) + goto err_put_page; + + free_gtlb(vcpu_e500); + + vcpu_e500->gtlb_priv[0] = privs[0]; + vcpu_e500->gtlb_priv[1] = privs[1]; + + vcpu_e500->gtlb_arch = (struct kvm_book3e_206_tlb_entry *) + (virt + (cfg->array & (PAGE_SIZE - 1))); + + vcpu_e500->gtlb_params[0].entries = params.tlb_sizes[0]; + vcpu_e500->gtlb_params[1].entries = params.tlb_sizes[1]; + + vcpu_e500->gtlb_offset[0] = 0; + vcpu_e500->gtlb_offset[1] = params.tlb_sizes[0]; + + vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & ~0xfffUL; + if (params.tlb_sizes[0] <= 2048) + vcpu_e500->tlb0cfg |= params.tlb_sizes[0]; + + vcpu_e500->tlb1cfg = mfspr(SPRN_TLB1CFG) & ~0xfffUL; + vcpu_e500->tlb1cfg |= params.tlb_sizes[1]; + + vcpu_e500->shared_tlb_pages = pages; + vcpu_e500->num_shared_tlb_pages = num_pages; + + vcpu_e500->gtlb_params[0].ways = params.tlb_ways[0]; + vcpu_e500->gtlb_params[0].sets = sets; + + vcpu_e500->gtlb_params[1].ways = params.tlb_sizes[1]; + vcpu_e500->gtlb_params[1].sets = 1; + + return 0; + +err_put_page: + kfree(privs[0]); + kfree(privs[1]); + + for (i = 0; i < num_pages; i++) + put_page(pages[i]); + +err_pages: + kfree(pages); + return ret; +} + +int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, + struct kvm_dirty_tlb *dirty) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + clear_tlb_refs(vcpu_e500); + return 0; } int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) { + int entry_size = sizeof(struct kvm_book3e_206_tlb_entry); + int entries = KVM_E500_TLB0_SIZE + KVM_E500_TLB1_SIZE; + host_tlb_params[0].entries = mfspr(SPRN_TLB0CFG) & TLBnCFG_N_ENTRY; host_tlb_params[1].entries = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; @@ -1124,17 +1272,22 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) host_tlb_params[0].entries / host_tlb_params[0].ways; host_tlb_params[1].sets = 1; - vcpu_e500->gtlb_size[0] = KVM_E500_TLB0_SIZE; - vcpu_e500->gtlb_arch[0] = - kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL); - if (vcpu_e500->gtlb_arch[0] == NULL) - goto err; + vcpu_e500->gtlb_params[0].entries = KVM_E500_TLB0_SIZE; + vcpu_e500->gtlb_params[1].entries = KVM_E500_TLB1_SIZE; - vcpu_e500->gtlb_size[1] = KVM_E500_TLB1_SIZE; - vcpu_e500->gtlb_arch[1] = - kzalloc(sizeof(struct tlbe) * KVM_E500_TLB1_SIZE, GFP_KERNEL); - if (vcpu_e500->gtlb_arch[1] == NULL) - goto err; + vcpu_e500->gtlb_params[0].ways = KVM_E500_TLB0_WAY_NUM; + vcpu_e500->gtlb_params[0].sets = + KVM_E500_TLB0_SIZE / KVM_E500_TLB0_WAY_NUM; + + vcpu_e500->gtlb_params[1].ways = KVM_E500_TLB1_SIZE; + vcpu_e500->gtlb_params[1].sets = 1; + + vcpu_e500->gtlb_arch = kmalloc(entries * entry_size, GFP_KERNEL); + if (!vcpu_e500->gtlb_arch) + return -ENOMEM; + + vcpu_e500->gtlb_offset[0] = 0; + vcpu_e500->gtlb_offset[1] = KVM_E500_TLB0_SIZE; vcpu_e500->tlb_refs[0] = kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[0].entries, @@ -1148,15 +1301,15 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) if (!vcpu_e500->tlb_refs[1]) goto err; - vcpu_e500->gtlb_priv[0] = - kzalloc(sizeof(struct tlbe_ref) * vcpu_e500->gtlb_size[0], - GFP_KERNEL); + vcpu_e500->gtlb_priv[0] = kzalloc(sizeof(struct tlbe_ref) * + vcpu_e500->gtlb_params[0].entries, + GFP_KERNEL); if (!vcpu_e500->gtlb_priv[0]) goto err; - vcpu_e500->gtlb_priv[1] = - kzalloc(sizeof(struct tlbe_ref) * vcpu_e500->gtlb_size[1], - GFP_KERNEL); + vcpu_e500->gtlb_priv[1] = kzalloc(sizeof(struct tlbe_ref) * + vcpu_e500->gtlb_params[1].entries, + GFP_KERNEL); if (!vcpu_e500->gtlb_priv[1]) goto err; @@ -1165,32 +1318,24 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) /* Init TLB configuration register */ vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & ~0xfffUL; - vcpu_e500->tlb0cfg |= vcpu_e500->gtlb_size[0]; + vcpu_e500->tlb0cfg |= vcpu_e500->gtlb_params[0].entries; vcpu_e500->tlb1cfg = mfspr(SPRN_TLB1CFG) & ~0xfffUL; - vcpu_e500->tlb1cfg |= vcpu_e500->gtlb_size[1]; + vcpu_e500->tlb1cfg |= vcpu_e500->gtlb_params[1].entries; return 0; err: + free_gtlb(vcpu_e500); kfree(vcpu_e500->tlb_refs[0]); kfree(vcpu_e500->tlb_refs[1]); - kfree(vcpu_e500->gtlb_priv[0]); - kfree(vcpu_e500->gtlb_priv[1]); - kfree(vcpu_e500->gtlb_arch[0]); - kfree(vcpu_e500->gtlb_arch[1]); return -1; } void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500) { - clear_tlb_refs(vcpu_e500); - + free_gtlb(vcpu_e500); kvmppc_e500_id_table_free(vcpu_e500); kfree(vcpu_e500->tlb_refs[0]); kfree(vcpu_e500->tlb_refs[1]); - kfree(vcpu_e500->gtlb_priv[0]); - kfree(vcpu_e500->gtlb_priv[1]); - kfree(vcpu_e500->gtlb_arch[1]); - kfree(vcpu_e500->gtlb_arch[0]); } diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h index b587f691459a..2c296407e759 100644 --- a/arch/powerpc/kvm/e500_tlb.h +++ b/arch/powerpc/kvm/e500_tlb.h @@ -20,13 +20,9 @@ #include #include -#define KVM_E500_TLB0_WAY_SIZE_BIT 7 /* Fixed */ -#define KVM_E500_TLB0_WAY_SIZE (1UL << KVM_E500_TLB0_WAY_SIZE_BIT) -#define KVM_E500_TLB0_WAY_SIZE_MASK (KVM_E500_TLB0_WAY_SIZE - 1) - -#define KVM_E500_TLB0_WAY_NUM_BIT 1 /* No greater than 7 */ -#define KVM_E500_TLB0_WAY_NUM (1UL << KVM_E500_TLB0_WAY_NUM_BIT) -#define KVM_E500_TLB0_WAY_NUM_MASK (KVM_E500_TLB0_WAY_NUM - 1) +/* This geometry is the legacy default -- can be overridden by userspace */ +#define KVM_E500_TLB0_WAY_SIZE 128 +#define KVM_E500_TLB0_WAY_NUM 2 #define KVM_E500_TLB0_SIZE (KVM_E500_TLB0_WAY_SIZE * KVM_E500_TLB0_WAY_NUM) #define KVM_E500_TLB1_SIZE 16 @@ -58,50 +54,54 @@ extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *); extern void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *); /* TLB helper functions */ -static inline unsigned int get_tlb_size(const struct tlbe *tlbe) +static inline unsigned int +get_tlb_size(const struct kvm_book3e_206_tlb_entry *tlbe) { return (tlbe->mas1 >> 7) & 0x1f; } -static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe) +static inline gva_t get_tlb_eaddr(const struct kvm_book3e_206_tlb_entry *tlbe) { return tlbe->mas2 & 0xfffff000; } -static inline u64 get_tlb_bytes(const struct tlbe *tlbe) +static inline u64 get_tlb_bytes(const struct kvm_book3e_206_tlb_entry *tlbe) { unsigned int pgsize = get_tlb_size(tlbe); return 1ULL << 10 << pgsize; } -static inline gva_t get_tlb_end(const struct tlbe *tlbe) +static inline gva_t get_tlb_end(const struct kvm_book3e_206_tlb_entry *tlbe) { u64 bytes = get_tlb_bytes(tlbe); return get_tlb_eaddr(tlbe) + bytes - 1; } -static inline u64 get_tlb_raddr(const struct tlbe *tlbe) +static inline u64 get_tlb_raddr(const struct kvm_book3e_206_tlb_entry *tlbe) { - u64 rpn = tlbe->mas7; - return (rpn << 32) | (tlbe->mas3 & 0xfffff000); + return tlbe->mas7_3 & ~0xfffULL; } -static inline unsigned int get_tlb_tid(const struct tlbe *tlbe) +static inline unsigned int +get_tlb_tid(const struct kvm_book3e_206_tlb_entry *tlbe) { return (tlbe->mas1 >> 16) & 0xff; } -static inline unsigned int get_tlb_ts(const struct tlbe *tlbe) +static inline unsigned int +get_tlb_ts(const struct kvm_book3e_206_tlb_entry *tlbe) { return (tlbe->mas1 >> 12) & 0x1; } -static inline unsigned int get_tlb_v(const struct tlbe *tlbe) +static inline unsigned int +get_tlb_v(const struct kvm_book3e_206_tlb_entry *tlbe) { return (tlbe->mas1 >> 31) & 0x1; } -static inline unsigned int get_tlb_iprot(const struct tlbe *tlbe) +static inline unsigned int +get_tlb_iprot(const struct kvm_book3e_206_tlb_entry *tlbe) { return (tlbe->mas1 >> 30) & 0x1; } @@ -156,7 +156,7 @@ static inline unsigned int get_tlb_esel_bit( } static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, - const struct tlbe *tlbe) + const struct kvm_book3e_206_tlb_entry *tlbe) { gpa_t gpa; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index a5671616af86..3cf6fba513ac 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -222,6 +222,9 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PPC_PAIRED_SINGLES: case KVM_CAP_PPC_OSI: case KVM_CAP_PPC_GET_PVINFO: +#ifdef CONFIG_KVM_E500 + case KVM_CAP_SW_TLB: +#endif r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -602,6 +605,19 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, r = 0; vcpu->arch.papr_enabled = true; break; +#ifdef CONFIG_KVM_E500 + case KVM_CAP_SW_TLB: { + struct kvm_config_tlb cfg; + void __user *user_ptr = (void __user *)(uintptr_t)cap->args[0]; + + r = -EFAULT; + if (copy_from_user(&cfg, user_ptr, sizeof(cfg))) + break; + + r = kvm_vcpu_ioctl_config_tlb(vcpu, &cfg); + break; + } +#endif default: r = -EINVAL; break; @@ -651,6 +667,18 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); break; } + +#ifdef CONFIG_KVM_E500 + case KVM_DIRTY_TLB: { + struct kvm_dirty_tlb dirty; + r = -EFAULT; + if (copy_from_user(&dirty, argp, sizeof(dirty))) + goto out; + r = kvm_vcpu_ioctl_dirty_tlb(vcpu, &dirty); + break; + } +#endif + default: r = -EINVAL; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 245bcb3a0fcd..fa029ced4bd5 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -581,6 +581,7 @@ struct kvm_ppc_pvinfo { #define KVM_CAP_PPC_RMA 65 #define KVM_CAP_MAX_VCPUS 66 /* returns max vcpus per vm */ #define KVM_CAP_PPC_PAPR 68 +#define KVM_CAP_SW_TLB 69 #define KVM_CAP_S390_GMAP 71 #define KVM_CAP_TSC_DEADLINE_TIMER 72 #define KVM_CAP_S390_UCONTROL 73 @@ -664,6 +665,21 @@ struct kvm_clock_data { __u32 pad[9]; }; +#define KVM_MMU_FSL_BOOKE_NOHV 0 +#define KVM_MMU_FSL_BOOKE_HV 1 + +struct kvm_config_tlb { + __u64 params; + __u64 array; + __u32 mmu_type; + __u32 array_len; +}; + +struct kvm_dirty_tlb { + __u64 bitmap; + __u32 num_dirty; +}; + /* * ioctls for VM fds */ @@ -801,6 +817,8 @@ struct kvm_s390_ucas_mapping { #define KVM_CREATE_SPAPR_TCE _IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce) /* Available with KVM_CAP_RMA */ #define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma) +/* Available with KVM_CAP_SW_TLB */ +#define KVM_DIRTY_TLB _IOW(KVMIO, 0xaa, struct kvm_dirty_tlb) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) -- cgit v1.2.3 From 7e28e60ef974d0eeb43112ef264d8c130f7b7bf4 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 8 Nov 2011 18:23:20 -0600 Subject: KVM: PPC: Rename deliver_interrupts to prepare_to_enter This function also updates paravirt int_pending, so rename it to be more obvious that this is a collection of checks run prior to (re)entering a guest. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_ppc.h | 2 +- arch/powerpc/kvm/book3s.c | 2 +- arch/powerpc/kvm/book3s_hv.c | 6 +++--- arch/powerpc/kvm/book3s_pr.c | 2 +- arch/powerpc/kvm/booke.c | 4 ++-- arch/powerpc/kvm/powerpc.c | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/kvm/powerpc.c') diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index a284f209e2df..c089927f64cc 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -94,7 +94,7 @@ extern int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu); extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu); -extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu); +extern void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu); extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu); extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags); extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index e41ac6f7dcf1..73fc9f046107 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -258,7 +258,7 @@ static bool clear_irqprio(struct kvm_vcpu *vcpu, unsigned int priority) return true; } -void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) +void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) { unsigned long *pending = &vcpu->arch.pending_exceptions; unsigned long old_pending = vcpu->arch.pending_exceptions; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 336983da9e72..536adee59c07 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -482,7 +482,7 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu) if (now > vcpu->arch.dec_expires) { /* decrementer has already gone negative */ kvmppc_core_queue_dec(vcpu); - kvmppc_core_deliver_interrupts(vcpu); + kvmppc_core_prepare_to_enter(vcpu); return; } dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC @@ -797,7 +797,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) list_for_each_entry_safe(v, vn, &vc->runnable_threads, arch.run_list) { - kvmppc_core_deliver_interrupts(v); + kvmppc_core_prepare_to_enter(v); if (signal_pending(v->arch.run_task)) { kvmppc_remove_runnable(vc, v); v->stat.signal_exits++; @@ -857,7 +857,7 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) if (run->exit_reason == KVM_EXIT_PAPR_HCALL && !(vcpu->arch.shregs.msr & MSR_PR)) { r = kvmppc_pseries_do_hcall(vcpu); - kvmppc_core_deliver_interrupts(vcpu); + kvmppc_core_prepare_to_enter(vcpu); } } while (r == RESUME_GUEST); return r; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index e2cfb9e1e20e..f3628581fb7c 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -764,7 +764,7 @@ program_interrupt: /* In case an interrupt came in that was triggered * from userspace (like DEC), we need to check what * to inject now! */ - kvmppc_core_deliver_interrupts(vcpu); + kvmppc_core_prepare_to_enter(vcpu); } } diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 9c785896e867..e082e348c882 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -289,7 +289,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, } /* Check pending exceptions and deliver one, if possible. */ -void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) +void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) { unsigned long *pending = &vcpu->arch.pending_exceptions; unsigned long old_pending = vcpu->arch.pending_exceptions; @@ -611,7 +611,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, local_irq_disable(); - kvmppc_core_deliver_interrupts(vcpu); + kvmppc_core_prepare_to_enter(vcpu); if (!(r & RESUME_HOST)) { /* To avoid clobbering exit_reason, only check for signals if diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 3cf6fba513ac..6186ec0d939b 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -559,7 +559,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) vcpu->arch.hcall_needed = 0; } - kvmppc_core_deliver_interrupts(vcpu); + kvmppc_core_prepare_to_enter(vcpu); r = kvmppc_vcpu_run(run, vcpu); -- cgit v1.2.3 From 25051b5a5aff0bb71435421b4b80279b789fa0dc Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 8 Nov 2011 18:23:23 -0600 Subject: KVM: PPC: Move prepare_to_enter call site into subarch code This function should be called with interrupts disabled, to avoid a race where an exception is delivered after we check, but the resched kick is received before we disable interrupts (and thus doesn't actually trigger the exit code that would recheck exceptions). booke already does this properly in the lightweight exit case, but not on initial entry. For now, move the call of prepare_to_enter into subarch-specific code so that booke can do the right thing here. Ideally book3s would do the same thing, but I'm having a hard time seeing where it does any interrupt disabling of this sort (plus it has several additional call sites), so I'm deferring the book3s fix to someone more familiar with that code. book3s behavior should be unchanged by this patch. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/kvm/book3s_hv.c | 2 ++ arch/powerpc/kvm/book3s_pr.c | 2 ++ arch/powerpc/kvm/booke.c | 4 ++++ arch/powerpc/kvm/powerpc.c | 2 -- 4 files changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kvm/powerpc.c') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 536adee59c07..b1e3b9c1326a 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -836,6 +836,8 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) return -EINVAL; } + kvmppc_core_prepare_to_enter(vcpu); + /* No need to go into the guest when all we'll do is come back out */ if (signal_pending(current)) { run->exit_reason = KVM_EXIT_INTR; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index f3628581fb7c..203a7b7b58b9 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -929,6 +929,8 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) return -EINVAL; } + kvmppc_core_prepare_to_enter(vcpu); + /* No need to go into the guest when all we do is going out */ if (signal_pending(current)) { kvm_run->exit_reason = KVM_EXIT_INTR; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index e082e348c882..feaefc433276 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -295,6 +295,8 @@ void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) unsigned long old_pending = vcpu->arch.pending_exceptions; unsigned int priority; + WARN_ON_ONCE(!irqs_disabled()); + priority = __ffs(*pending); while (priority <= BOOKE_IRQPRIO_MAX) { if (kvmppc_booke_irqprio_deliver(vcpu, priority)) @@ -323,6 +325,8 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) local_irq_disable(); + kvmppc_core_prepare_to_enter(vcpu); + if (signal_pending(current)) { kvm_run->exit_reason = KVM_EXIT_INTR; ret = -EINTR; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 6186ec0d939b..7411bdd8ff6f 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -559,8 +559,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) vcpu->arch.hcall_needed = 0; } - kvmppc_core_prepare_to_enter(vcpu); - r = kvmppc_vcpu_run(run, vcpu); if (vcpu->sigset_active) -- cgit v1.2.3 From b59049720dd95021dfe0d9f4e1fa9458a67cfe29 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 8 Nov 2011 18:23:30 -0600 Subject: KVM: PPC: Paravirtualize SPRG4-7, ESR, PIR, MASn This allows additional registers to be accessed by the guest in PR-mode KVM without trapping. SPRG4-7 are readable from userspace. On booke, KVM will sync these registers when it enters the guest, so that accesses from guest userspace will work. The guest kernel, OTOH, must consistently use either the real registers or the shared area between exits. This also applies to the already-paravirted SPRG3. On non-booke, it's not clear to what extent SPRG4-7 are supported (they're not architected for book3s, but exist on at least some classic chips). They are copied in the get/set regs ioctls, but I do not see any non-booke emulation. I also do not see any syncing with real registers (in PR-mode) including the user-readable SPRG3. This patch should not make that situation any worse. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_e500.h | 8 -- arch/powerpc/include/asm/kvm_host.h | 6 -- arch/powerpc/include/asm/kvm_para.h | 31 +++++- arch/powerpc/kernel/asm-offsets.c | 15 ++- arch/powerpc/kernel/kvm.c | 204 ++++++++++++++++++++++++++++++------ arch/powerpc/kvm/book3s.c | 16 +-- arch/powerpc/kvm/booke.c | 23 ++-- arch/powerpc/kvm/booke_emulate.c | 12 +-- arch/powerpc/kvm/booke_interrupts.S | 18 ++-- arch/powerpc/kvm/e500.c | 24 ++--- arch/powerpc/kvm/e500_emulate.c | 38 ++++--- arch/powerpc/kvm/e500_tlb.c | 83 ++++++++------- arch/powerpc/kvm/e500_tlb.h | 25 ++--- arch/powerpc/kvm/emulate.c | 3 +- arch/powerpc/kvm/powerpc.c | 2 +- 15 files changed, 339 insertions(+), 169 deletions(-) (limited to 'arch/powerpc/kvm/powerpc.c') diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h index bc17441535f2..8cd50a514271 100644 --- a/arch/powerpc/include/asm/kvm_e500.h +++ b/arch/powerpc/include/asm/kvm_e500.h @@ -71,14 +71,6 @@ struct kvmppc_vcpu_e500 { u32 pid[E500_PID_NUM]; u32 svr; - u32 mas0; - u32 mas1; - u32 mas2; - u64 mas7_3; - u32 mas4; - u32 mas5; - u32 mas6; - /* vcpu id table */ struct vcpu_id_table *idt; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index bf8af5d5d5dc..bfd0c9912da5 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -318,10 +318,6 @@ struct kvm_vcpu_arch { u32 vrsave; /* also USPRG0 */ u32 mmucr; ulong shadow_msr; - ulong sprg4; - ulong sprg5; - ulong sprg6; - ulong sprg7; ulong csrr0; ulong csrr1; ulong dsrr0; @@ -329,7 +325,6 @@ struct kvm_vcpu_arch { ulong mcsrr0; ulong mcsrr1; ulong mcsr; - ulong esr; u32 dec; u32 decar; u32 tbl; @@ -338,7 +333,6 @@ struct kvm_vcpu_arch { u32 tsr; u32 ivor[64]; ulong ivpr; - u32 pir; u32 pvr; u32 shadow_pid; diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index 50533f9adf40..ece70fb36513 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h @@ -33,11 +33,35 @@ struct kvm_vcpu_arch_shared { __u64 sprg3; __u64 srr0; __u64 srr1; - __u64 dar; + __u64 dar; /* dear on BookE */ __u64 msr; __u32 dsisr; __u32 int_pending; /* Tells the guest if we have an interrupt */ __u32 sr[16]; + __u32 mas0; + __u32 mas1; + __u64 mas7_3; + __u64 mas2; + __u32 mas4; + __u32 mas6; + __u32 esr; + __u32 pir; + + /* + * SPRG4-7 are user-readable, so we can only keep these consistent + * between the shared area and the real registers when there's an + * intervening exit to KVM. This also applies to SPRG3 on some + * chips. + * + * This suffices for access by guest userspace, since in PR-mode + * KVM, an exit must occur when changing the guest's MSR[PR]. + * If the guest kernel writes to SPRG3-7 via the shared area, it + * must also use the shared area for reading while in kernel space. + */ + __u64 sprg4; + __u64 sprg5; + __u64 sprg6; + __u64 sprg7; }; #define KVM_SC_MAGIC_R0 0x4b564d21 /* "KVM!" */ @@ -47,7 +71,10 @@ struct kvm_vcpu_arch_shared { #define KVM_FEATURE_MAGIC_PAGE 1 -#define KVM_MAGIC_FEAT_SR (1 << 0) +#define KVM_MAGIC_FEAT_SR (1 << 0) + +/* MASn, ESR, PIR, and high SPRGs */ +#define KVM_MAGIC_FEAT_MAS0_TO_SPRG7 (1 << 1) #ifdef __KERNEL__ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 04caee7d9bc1..e7bfcf81b746 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -426,16 +426,23 @@ int main(void) DEFINE(VCPU_SPRG2, offsetof(struct kvm_vcpu, arch.shregs.sprg2)); DEFINE(VCPU_SPRG3, offsetof(struct kvm_vcpu, arch.shregs.sprg3)); #endif - DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4)); - DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5)); - DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6)); - DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7)); + DEFINE(VCPU_SHARED_SPRG4, offsetof(struct kvm_vcpu_arch_shared, sprg4)); + DEFINE(VCPU_SHARED_SPRG5, offsetof(struct kvm_vcpu_arch_shared, sprg5)); + DEFINE(VCPU_SHARED_SPRG6, offsetof(struct kvm_vcpu_arch_shared, sprg6)); + DEFINE(VCPU_SHARED_SPRG7, offsetof(struct kvm_vcpu_arch_shared, sprg7)); DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid)); DEFINE(VCPU_SHADOW_PID1, offsetof(struct kvm_vcpu, arch.shadow_pid1)); DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared)); DEFINE(VCPU_SHARED_MSR, offsetof(struct kvm_vcpu_arch_shared, msr)); DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr)); + DEFINE(VCPU_SHARED_MAS0, offsetof(struct kvm_vcpu_arch_shared, mas0)); + DEFINE(VCPU_SHARED_MAS1, offsetof(struct kvm_vcpu_arch_shared, mas1)); + DEFINE(VCPU_SHARED_MAS2, offsetof(struct kvm_vcpu_arch_shared, mas2)); + DEFINE(VCPU_SHARED_MAS7_3, offsetof(struct kvm_vcpu_arch_shared, mas7_3)); + DEFINE(VCPU_SHARED_MAS4, offsetof(struct kvm_vcpu_arch_shared, mas4)); + DEFINE(VCPU_SHARED_MAS6, offsetof(struct kvm_vcpu_arch_shared, mas6)); + /* book3s */ #ifdef CONFIG_KVM_BOOK3S_64_HV DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid)); diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 06b15ee997f7..04d4b5aa6dca 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -49,23 +49,17 @@ #define KVM_RT_30 0x03c00000 #define KVM_MASK_RB 0x0000f800 #define KVM_INST_MFMSR 0x7c0000a6 -#define KVM_INST_MFSPR_SPRG0 0x7c1042a6 -#define KVM_INST_MFSPR_SPRG1 0x7c1142a6 -#define KVM_INST_MFSPR_SPRG2 0x7c1242a6 -#define KVM_INST_MFSPR_SPRG3 0x7c1342a6 -#define KVM_INST_MFSPR_SRR0 0x7c1a02a6 -#define KVM_INST_MFSPR_SRR1 0x7c1b02a6 -#define KVM_INST_MFSPR_DAR 0x7c1302a6 -#define KVM_INST_MFSPR_DSISR 0x7c1202a6 - -#define KVM_INST_MTSPR_SPRG0 0x7c1043a6 -#define KVM_INST_MTSPR_SPRG1 0x7c1143a6 -#define KVM_INST_MTSPR_SPRG2 0x7c1243a6 -#define KVM_INST_MTSPR_SPRG3 0x7c1343a6 -#define KVM_INST_MTSPR_SRR0 0x7c1a03a6 -#define KVM_INST_MTSPR_SRR1 0x7c1b03a6 -#define KVM_INST_MTSPR_DAR 0x7c1303a6 -#define KVM_INST_MTSPR_DSISR 0x7c1203a6 + +#define SPR_FROM 0 +#define SPR_TO 0x100 + +#define KVM_INST_SPR(sprn, moveto) (0x7c0002a6 | \ + (((sprn) & 0x1f) << 16) | \ + (((sprn) & 0x3e0) << 6) | \ + (moveto)) + +#define KVM_INST_MFSPR(sprn) KVM_INST_SPR(sprn, SPR_FROM) +#define KVM_INST_MTSPR(sprn) KVM_INST_SPR(sprn, SPR_TO) #define KVM_INST_TLBSYNC 0x7c00046c #define KVM_INST_MTMSRD_L0 0x7c000164 @@ -440,56 +434,191 @@ static void kvm_check_ins(u32 *inst, u32 features) case KVM_INST_MFMSR: kvm_patch_ins_ld(inst, magic_var(msr), inst_rt); break; - case KVM_INST_MFSPR_SPRG0: + case KVM_INST_MFSPR(SPRN_SPRG0): kvm_patch_ins_ld(inst, magic_var(sprg0), inst_rt); break; - case KVM_INST_MFSPR_SPRG1: + case KVM_INST_MFSPR(SPRN_SPRG1): kvm_patch_ins_ld(inst, magic_var(sprg1), inst_rt); break; - case KVM_INST_MFSPR_SPRG2: + case KVM_INST_MFSPR(SPRN_SPRG2): kvm_patch_ins_ld(inst, magic_var(sprg2), inst_rt); break; - case KVM_INST_MFSPR_SPRG3: + case KVM_INST_MFSPR(SPRN_SPRG3): kvm_patch_ins_ld(inst, magic_var(sprg3), inst_rt); break; - case KVM_INST_MFSPR_SRR0: + case KVM_INST_MFSPR(SPRN_SRR0): kvm_patch_ins_ld(inst, magic_var(srr0), inst_rt); break; - case KVM_INST_MFSPR_SRR1: + case KVM_INST_MFSPR(SPRN_SRR1): kvm_patch_ins_ld(inst, magic_var(srr1), inst_rt); break; - case KVM_INST_MFSPR_DAR: +#ifdef CONFIG_BOOKE + case KVM_INST_MFSPR(SPRN_DEAR): +#else + case KVM_INST_MFSPR(SPRN_DAR): +#endif kvm_patch_ins_ld(inst, magic_var(dar), inst_rt); break; - case KVM_INST_MFSPR_DSISR: + case KVM_INST_MFSPR(SPRN_DSISR): kvm_patch_ins_lwz(inst, magic_var(dsisr), inst_rt); break; +#ifdef CONFIG_PPC_BOOK3E_MMU + case KVM_INST_MFSPR(SPRN_MAS0): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_lwz(inst, magic_var(mas0), inst_rt); + break; + case KVM_INST_MFSPR(SPRN_MAS1): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_lwz(inst, magic_var(mas1), inst_rt); + break; + case KVM_INST_MFSPR(SPRN_MAS2): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_ld(inst, magic_var(mas2), inst_rt); + break; + case KVM_INST_MFSPR(SPRN_MAS3): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_lwz(inst, magic_var(mas7_3) + 4, inst_rt); + break; + case KVM_INST_MFSPR(SPRN_MAS4): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_lwz(inst, magic_var(mas4), inst_rt); + break; + case KVM_INST_MFSPR(SPRN_MAS6): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_lwz(inst, magic_var(mas6), inst_rt); + break; + case KVM_INST_MFSPR(SPRN_MAS7): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_lwz(inst, magic_var(mas7_3), inst_rt); + break; +#endif /* CONFIG_PPC_BOOK3E_MMU */ + + case KVM_INST_MFSPR(SPRN_SPRG4): +#ifdef CONFIG_BOOKE + case KVM_INST_MFSPR(SPRN_SPRG4R): +#endif + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_ld(inst, magic_var(sprg4), inst_rt); + break; + case KVM_INST_MFSPR(SPRN_SPRG5): +#ifdef CONFIG_BOOKE + case KVM_INST_MFSPR(SPRN_SPRG5R): +#endif + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_ld(inst, magic_var(sprg5), inst_rt); + break; + case KVM_INST_MFSPR(SPRN_SPRG6): +#ifdef CONFIG_BOOKE + case KVM_INST_MFSPR(SPRN_SPRG6R): +#endif + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_ld(inst, magic_var(sprg6), inst_rt); + break; + case KVM_INST_MFSPR(SPRN_SPRG7): +#ifdef CONFIG_BOOKE + case KVM_INST_MFSPR(SPRN_SPRG7R): +#endif + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_ld(inst, magic_var(sprg7), inst_rt); + break; + +#ifdef CONFIG_BOOKE + case KVM_INST_MFSPR(SPRN_ESR): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_lwz(inst, magic_var(esr), inst_rt); + break; +#endif + + case KVM_INST_MFSPR(SPRN_PIR): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_lwz(inst, magic_var(pir), inst_rt); + break; + + /* Stores */ - case KVM_INST_MTSPR_SPRG0: + case KVM_INST_MTSPR(SPRN_SPRG0): kvm_patch_ins_std(inst, magic_var(sprg0), inst_rt); break; - case KVM_INST_MTSPR_SPRG1: + case KVM_INST_MTSPR(SPRN_SPRG1): kvm_patch_ins_std(inst, magic_var(sprg1), inst_rt); break; - case KVM_INST_MTSPR_SPRG2: + case KVM_INST_MTSPR(SPRN_SPRG2): kvm_patch_ins_std(inst, magic_var(sprg2), inst_rt); break; - case KVM_INST_MTSPR_SPRG3: + case KVM_INST_MTSPR(SPRN_SPRG3): kvm_patch_ins_std(inst, magic_var(sprg3), inst_rt); break; - case KVM_INST_MTSPR_SRR0: + case KVM_INST_MTSPR(SPRN_SRR0): kvm_patch_ins_std(inst, magic_var(srr0), inst_rt); break; - case KVM_INST_MTSPR_SRR1: + case KVM_INST_MTSPR(SPRN_SRR1): kvm_patch_ins_std(inst, magic_var(srr1), inst_rt); break; - case KVM_INST_MTSPR_DAR: +#ifdef CONFIG_BOOKE + case KVM_INST_MTSPR(SPRN_DEAR): +#else + case KVM_INST_MTSPR(SPRN_DAR): +#endif kvm_patch_ins_std(inst, magic_var(dar), inst_rt); break; - case KVM_INST_MTSPR_DSISR: + case KVM_INST_MTSPR(SPRN_DSISR): kvm_patch_ins_stw(inst, magic_var(dsisr), inst_rt); break; +#ifdef CONFIG_PPC_BOOK3E_MMU + case KVM_INST_MTSPR(SPRN_MAS0): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_stw(inst, magic_var(mas0), inst_rt); + break; + case KVM_INST_MTSPR(SPRN_MAS1): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_stw(inst, magic_var(mas1), inst_rt); + break; + case KVM_INST_MTSPR(SPRN_MAS2): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_std(inst, magic_var(mas2), inst_rt); + break; + case KVM_INST_MTSPR(SPRN_MAS3): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_stw(inst, magic_var(mas7_3) + 4, inst_rt); + break; + case KVM_INST_MTSPR(SPRN_MAS4): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_stw(inst, magic_var(mas4), inst_rt); + break; + case KVM_INST_MTSPR(SPRN_MAS6): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_stw(inst, magic_var(mas6), inst_rt); + break; + case KVM_INST_MTSPR(SPRN_MAS7): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_stw(inst, magic_var(mas7_3), inst_rt); + break; +#endif /* CONFIG_PPC_BOOK3E_MMU */ + + case KVM_INST_MTSPR(SPRN_SPRG4): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_std(inst, magic_var(sprg4), inst_rt); + break; + case KVM_INST_MTSPR(SPRN_SPRG5): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_std(inst, magic_var(sprg5), inst_rt); + break; + case KVM_INST_MTSPR(SPRN_SPRG6): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_std(inst, magic_var(sprg6), inst_rt); + break; + case KVM_INST_MTSPR(SPRN_SPRG7): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_std(inst, magic_var(sprg7), inst_rt); + break; + +#ifdef CONFIG_BOOKE + case KVM_INST_MTSPR(SPRN_ESR): + if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) + kvm_patch_ins_stw(inst, magic_var(esr), inst_rt); + break; +#endif /* Nops */ case KVM_INST_TLBSYNC: @@ -556,9 +685,18 @@ static void kvm_use_magic_page(void) start = (void*)_stext; end = (void*)_etext; + /* + * Being interrupted in the middle of patching would + * be bad for SPRG4-7, which KVM can't keep in sync + * with emulated accesses because reads don't trap. + */ + local_irq_disable(); + for (p = start; p < end; p++) kvm_check_ins(p, features); + local_irq_enable(); + printk(KERN_INFO "KVM: Live patching for a fast VM %s\n", kvm_patching_worked ? "worked" : "failed"); } diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 73fc9f046107..5398744cd773 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -423,10 +423,10 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) regs->sprg1 = vcpu->arch.shared->sprg1; regs->sprg2 = vcpu->arch.shared->sprg2; regs->sprg3 = vcpu->arch.shared->sprg3; - regs->sprg4 = vcpu->arch.sprg4; - regs->sprg5 = vcpu->arch.sprg5; - regs->sprg6 = vcpu->arch.sprg6; - regs->sprg7 = vcpu->arch.sprg7; + regs->sprg4 = vcpu->arch.shared->sprg4; + regs->sprg5 = vcpu->arch.shared->sprg5; + regs->sprg6 = vcpu->arch.shared->sprg6; + regs->sprg7 = vcpu->arch.shared->sprg7; for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) regs->gpr[i] = kvmppc_get_gpr(vcpu, i); @@ -450,10 +450,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) vcpu->arch.shared->sprg1 = regs->sprg1; vcpu->arch.shared->sprg2 = regs->sprg2; vcpu->arch.shared->sprg3 = regs->sprg3; - vcpu->arch.sprg4 = regs->sprg4; - vcpu->arch.sprg5 = regs->sprg5; - vcpu->arch.sprg6 = regs->sprg6; - vcpu->arch.sprg7 = regs->sprg7; + vcpu->arch.shared->sprg4 = regs->sprg4; + vcpu->arch.shared->sprg5 = regs->sprg5; + vcpu->arch.shared->sprg6 = regs->sprg6; + vcpu->arch.shared->sprg7 = regs->sprg7; for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) kvmppc_set_gpr(vcpu, i, regs->gpr[i]); diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 8dfc59a8a715..50803dd0b8f2 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -270,7 +270,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, vcpu->arch.shared->srr1 = vcpu->arch.shared->msr; vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority]; if (update_esr == true) - vcpu->arch.esr = vcpu->arch.queued_esr; + vcpu->arch.shared->esr = vcpu->arch.queued_esr; if (update_dear == true) vcpu->arch.shared->dar = vcpu->arch.queued_dear; kvmppc_set_msr(vcpu, vcpu->arch.shared->msr & msr_mask); @@ -644,6 +644,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) vcpu->arch.pc = 0; vcpu->arch.shared->msr = 0; vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS; + vcpu->arch.shared->pir = vcpu->vcpu_id; kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */ vcpu->arch.shadow_pid = 1; @@ -678,10 +679,10 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) regs->sprg1 = vcpu->arch.shared->sprg1; regs->sprg2 = vcpu->arch.shared->sprg2; regs->sprg3 = vcpu->arch.shared->sprg3; - regs->sprg4 = vcpu->arch.sprg4; - regs->sprg5 = vcpu->arch.sprg5; - regs->sprg6 = vcpu->arch.sprg6; - regs->sprg7 = vcpu->arch.sprg7; + regs->sprg4 = vcpu->arch.shared->sprg4; + regs->sprg5 = vcpu->arch.shared->sprg5; + regs->sprg6 = vcpu->arch.shared->sprg6; + regs->sprg7 = vcpu->arch.shared->sprg7; for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) regs->gpr[i] = kvmppc_get_gpr(vcpu, i); @@ -706,10 +707,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) vcpu->arch.shared->sprg1 = regs->sprg1; vcpu->arch.shared->sprg2 = regs->sprg2; vcpu->arch.shared->sprg3 = regs->sprg3; - vcpu->arch.sprg4 = regs->sprg4; - vcpu->arch.sprg5 = regs->sprg5; - vcpu->arch.sprg6 = regs->sprg6; - vcpu->arch.sprg7 = regs->sprg7; + vcpu->arch.shared->sprg4 = regs->sprg4; + vcpu->arch.shared->sprg5 = regs->sprg5; + vcpu->arch.shared->sprg6 = regs->sprg6; + vcpu->arch.shared->sprg7 = regs->sprg7; for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) kvmppc_set_gpr(vcpu, i, regs->gpr[i]); @@ -727,7 +728,7 @@ static void get_sregs_base(struct kvm_vcpu *vcpu, sregs->u.e.csrr0 = vcpu->arch.csrr0; sregs->u.e.csrr1 = vcpu->arch.csrr1; sregs->u.e.mcsr = vcpu->arch.mcsr; - sregs->u.e.esr = vcpu->arch.esr; + sregs->u.e.esr = vcpu->arch.shared->esr; sregs->u.e.dear = vcpu->arch.shared->dar; sregs->u.e.tsr = vcpu->arch.tsr; sregs->u.e.tcr = vcpu->arch.tcr; @@ -745,7 +746,7 @@ static int set_sregs_base(struct kvm_vcpu *vcpu, vcpu->arch.csrr0 = sregs->u.e.csrr0; vcpu->arch.csrr1 = sregs->u.e.csrr1; vcpu->arch.mcsr = sregs->u.e.mcsr; - vcpu->arch.esr = sregs->u.e.esr; + vcpu->arch.shared->esr = sregs->u.e.esr; vcpu->arch.shared->dar = sregs->u.e.dear; vcpu->arch.vrsave = sregs->u.e.vrsave; vcpu->arch.tcr = sregs->u.e.tcr; diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index 1260f5f24c0c..bae9288ac1e1 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c @@ -107,7 +107,7 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) case SPRN_DEAR: vcpu->arch.shared->dar = spr_val; break; case SPRN_ESR: - vcpu->arch.esr = spr_val; break; + vcpu->arch.shared->esr = spr_val; break; case SPRN_DBCR0: vcpu->arch.dbcr0 = spr_val; break; case SPRN_DBCR1: @@ -125,13 +125,13 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) * loaded into the real SPRGs when resuming the * guest. */ case SPRN_SPRG4: - vcpu->arch.sprg4 = spr_val; break; + vcpu->arch.shared->sprg4 = spr_val; break; case SPRN_SPRG5: - vcpu->arch.sprg5 = spr_val; break; + vcpu->arch.shared->sprg5 = spr_val; break; case SPRN_SPRG6: - vcpu->arch.sprg6 = spr_val; break; + vcpu->arch.shared->sprg6 = spr_val; break; case SPRN_SPRG7: - vcpu->arch.sprg7 = spr_val; break; + vcpu->arch.shared->sprg7 = spr_val; break; case SPRN_IVPR: vcpu->arch.ivpr = spr_val; @@ -202,7 +202,7 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) case SPRN_DEAR: kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dar); break; case SPRN_ESR: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.esr); break; + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->esr); break; case SPRN_DBCR0: kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr0); break; case SPRN_DBCR1: diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 42f2fb1f66e9..10d8ef602e5c 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -402,19 +402,25 @@ lightweight_exit: /* Save vcpu pointer for the exception handlers. */ mtspr SPRN_SPRG_WVCPU, r4 + lwz r5, VCPU_SHARED(r4) + /* Can't switch the stack pointer until after IVPR is switched, * because host interrupt handlers would get confused. */ lwz r1, VCPU_GPR(r1)(r4) - /* Host interrupt handlers may have clobbered these guest-readable - * SPRGs, so we need to reload them here with the guest's values. */ - lwz r3, VCPU_SPRG4(r4) + /* + * Host interrupt handlers may have clobbered these + * guest-readable SPRGs, or the guest kernel may have + * written directly to the shared area, so we + * need to reload them here with the guest's values. + */ + lwz r3, VCPU_SHARED_SPRG4(r5) mtspr SPRN_SPRG4W, r3 - lwz r3, VCPU_SPRG5(r4) + lwz r3, VCPU_SHARED_SPRG5(r5) mtspr SPRN_SPRG5W, r3 - lwz r3, VCPU_SPRG6(r4) + lwz r3, VCPU_SHARED_SPRG6(r5) mtspr SPRN_SPRG6W, r3 - lwz r3, VCPU_SPRG7(r4) + lwz r3, VCPU_SHARED_SPRG7(r5) mtspr SPRN_SPRG7W, r3 #ifdef CONFIG_KVM_EXIT_TIMING diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index ac3c4bf21677..709d82f956e3 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -115,12 +115,12 @@ void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0; sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar; - sregs->u.e.mas0 = vcpu_e500->mas0; - sregs->u.e.mas1 = vcpu_e500->mas1; - sregs->u.e.mas2 = vcpu_e500->mas2; - sregs->u.e.mas7_3 = vcpu_e500->mas7_3; - sregs->u.e.mas4 = vcpu_e500->mas4; - sregs->u.e.mas6 = vcpu_e500->mas6; + sregs->u.e.mas0 = vcpu->arch.shared->mas0; + sregs->u.e.mas1 = vcpu->arch.shared->mas1; + sregs->u.e.mas2 = vcpu->arch.shared->mas2; + sregs->u.e.mas7_3 = vcpu->arch.shared->mas7_3; + sregs->u.e.mas4 = vcpu->arch.shared->mas4; + sregs->u.e.mas6 = vcpu->arch.shared->mas6; sregs->u.e.mmucfg = mfspr(SPRN_MMUCFG); sregs->u.e.tlbcfg[0] = vcpu_e500->tlb0cfg; @@ -148,12 +148,12 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) } if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) { - vcpu_e500->mas0 = sregs->u.e.mas0; - vcpu_e500->mas1 = sregs->u.e.mas1; - vcpu_e500->mas2 = sregs->u.e.mas2; - vcpu_e500->mas7_3 = sregs->u.e.mas7_3; - vcpu_e500->mas4 = sregs->u.e.mas4; - vcpu_e500->mas6 = sregs->u.e.mas6; + vcpu->arch.shared->mas0 = sregs->u.e.mas0; + vcpu->arch.shared->mas1 = sregs->u.e.mas1; + vcpu->arch.shared->mas2 = sregs->u.e.mas2; + vcpu->arch.shared->mas7_3 = sregs->u.e.mas7_3; + vcpu->arch.shared->mas4 = sregs->u.e.mas4; + vcpu->arch.shared->mas6 = sregs->u.e.mas6; } if (!(sregs->u.e.features & KVM_SREGS_E_IVOR)) diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index e0d36099c756..6d0b2bd54fb0 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -89,22 +89,22 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) return EMULATE_FAIL; vcpu_e500->pid[2] = spr_val; break; case SPRN_MAS0: - vcpu_e500->mas0 = spr_val; break; + vcpu->arch.shared->mas0 = spr_val; break; case SPRN_MAS1: - vcpu_e500->mas1 = spr_val; break; + vcpu->arch.shared->mas1 = spr_val; break; case SPRN_MAS2: - vcpu_e500->mas2 = spr_val; break; + vcpu->arch.shared->mas2 = spr_val; break; case SPRN_MAS3: - vcpu_e500->mas7_3 &= ~(u64)0xffffffff; - vcpu_e500->mas7_3 |= spr_val; + vcpu->arch.shared->mas7_3 &= ~(u64)0xffffffff; + vcpu->arch.shared->mas7_3 |= spr_val; break; case SPRN_MAS4: - vcpu_e500->mas4 = spr_val; break; + vcpu->arch.shared->mas4 = spr_val; break; case SPRN_MAS6: - vcpu_e500->mas6 = spr_val; break; + vcpu->arch.shared->mas6 = spr_val; break; case SPRN_MAS7: - vcpu_e500->mas7_3 &= (u64)0xffffffff; - vcpu_e500->mas7_3 |= (u64)spr_val << 32; + vcpu->arch.shared->mas7_3 &= (u64)0xffffffff; + vcpu->arch.shared->mas7_3 |= (u64)spr_val << 32; break; case SPRN_L1CSR0: vcpu_e500->l1csr0 = spr_val; @@ -147,6 +147,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); int emulated = EMULATE_DONE; + unsigned long val; switch (sprn) { case SPRN_PID: @@ -156,20 +157,23 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) case SPRN_PID2: kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[2]); break; case SPRN_MAS0: - kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas0); break; + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas0); break; case SPRN_MAS1: - kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas1); break; + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas1); break; case SPRN_MAS2: - kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas2); break; + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas2); break; case SPRN_MAS3: - kvmppc_set_gpr(vcpu, rt, (u32)vcpu_e500->mas7_3); break; + val = (u32)vcpu->arch.shared->mas7_3; + kvmppc_set_gpr(vcpu, rt, val); + break; case SPRN_MAS4: - kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas4); break; + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas4); break; case SPRN_MAS6: - kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas6); break; + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas6); break; case SPRN_MAS7: - kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas7_3 >> 32); break; - + val = vcpu->arch.shared->mas7_3 >> 32; + kvmppc_set_gpr(vcpu, rt, val); + break; case SPRN_TLB0CFG: kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb0cfg); break; case SPRN_TLB1CFG: diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 6fefb9144f23..9cd124a11acd 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -428,13 +428,14 @@ static int htlb0_set_base(gva_t addr) host_tlb_params[0].ways); } -static unsigned int get_tlb_esel(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel) +static unsigned int get_tlb_esel(struct kvm_vcpu *vcpu, int tlbsel) { - unsigned int esel = get_tlb_esel_bit(vcpu_e500); + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int esel = get_tlb_esel_bit(vcpu); if (tlbsel == 0) { esel &= vcpu_e500->gtlb_params[0].ways - 1; - esel += gtlb0_set_base(vcpu_e500, vcpu_e500->mas2); + esel += gtlb0_set_base(vcpu_e500, vcpu->arch.shared->mas2); } else { esel &= vcpu_e500->gtlb_params[tlbsel].entries - 1; } @@ -545,20 +546,20 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu, int tlbsel; /* since we only have two TLBs, only lower bit is used. */ - tlbsel = (vcpu_e500->mas4 >> 28) & 0x1; + tlbsel = (vcpu->arch.shared->mas4 >> 28) & 0x1; victim = (tlbsel == 0) ? gtlb0_get_next_victim(vcpu_e500) : 0; - pidsel = (vcpu_e500->mas4 >> 16) & 0xf; - tsized = (vcpu_e500->mas4 >> 7) & 0x1f; + pidsel = (vcpu->arch.shared->mas4 >> 16) & 0xf; + tsized = (vcpu->arch.shared->mas4 >> 7) & 0x1f; - vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim) + vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim) | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); - vcpu_e500->mas1 = MAS1_VALID | (as ? MAS1_TS : 0) + vcpu->arch.shared->mas1 = MAS1_VALID | (as ? MAS1_TS : 0) | MAS1_TID(vcpu_e500->pid[pidsel]) | MAS1_TSIZE(tsized); - vcpu_e500->mas2 = (eaddr & MAS2_EPN) - | (vcpu_e500->mas4 & MAS2_ATTRIB_MASK); - vcpu_e500->mas7_3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3; - vcpu_e500->mas6 = (vcpu_e500->mas6 & MAS6_SPID1) + vcpu->arch.shared->mas2 = (eaddr & MAS2_EPN) + | (vcpu->arch.shared->mas4 & MAS2_ATTRIB_MASK); + vcpu->arch.shared->mas7_3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3; + vcpu->arch.shared->mas6 = (vcpu->arch.shared->mas6 & MAS6_SPID1) | (get_cur_pid(vcpu) << 16) | (as ? MAS6_SAS : 0); } @@ -844,15 +845,15 @@ int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu) int tlbsel, esel; struct kvm_book3e_206_tlb_entry *gtlbe; - tlbsel = get_tlb_tlbsel(vcpu_e500); - esel = get_tlb_esel(vcpu_e500, tlbsel); + tlbsel = get_tlb_tlbsel(vcpu); + esel = get_tlb_esel(vcpu, tlbsel); gtlbe = get_entry(vcpu_e500, tlbsel, esel); - vcpu_e500->mas0 &= ~MAS0_NV(~0); - vcpu_e500->mas0 |= MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); - vcpu_e500->mas1 = gtlbe->mas1; - vcpu_e500->mas2 = gtlbe->mas2; - vcpu_e500->mas7_3 = gtlbe->mas7_3; + vcpu->arch.shared->mas0 &= ~MAS0_NV(~0); + vcpu->arch.shared->mas0 |= MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); + vcpu->arch.shared->mas1 = gtlbe->mas1; + vcpu->arch.shared->mas2 = gtlbe->mas2; + vcpu->arch.shared->mas7_3 = gtlbe->mas7_3; return EMULATE_DONE; } @@ -860,8 +861,8 @@ int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu) int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - int as = !!get_cur_sas(vcpu_e500); - unsigned int pid = get_cur_spid(vcpu_e500); + int as = !!get_cur_sas(vcpu); + unsigned int pid = get_cur_spid(vcpu); int esel, tlbsel; struct kvm_book3e_206_tlb_entry *gtlbe = NULL; gva_t ea; @@ -879,26 +880,30 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb) if (gtlbe) { esel &= vcpu_e500->gtlb_params[tlbsel].ways - 1; - vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(esel) + vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(esel) | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); - vcpu_e500->mas1 = gtlbe->mas1; - vcpu_e500->mas2 = gtlbe->mas2; - vcpu_e500->mas7_3 = gtlbe->mas7_3; + vcpu->arch.shared->mas1 = gtlbe->mas1; + vcpu->arch.shared->mas2 = gtlbe->mas2; + vcpu->arch.shared->mas7_3 = gtlbe->mas7_3; } else { int victim; /* since we only have two TLBs, only lower bit is used. */ - tlbsel = vcpu_e500->mas4 >> 28 & 0x1; + tlbsel = vcpu->arch.shared->mas4 >> 28 & 0x1; victim = (tlbsel == 0) ? gtlb0_get_next_victim(vcpu_e500) : 0; - vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim) + vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel) + | MAS0_ESEL(victim) | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); - vcpu_e500->mas1 = (vcpu_e500->mas6 & MAS6_SPID0) - | (vcpu_e500->mas6 & (MAS6_SAS ? MAS1_TS : 0)) - | (vcpu_e500->mas4 & MAS4_TSIZED(~0)); - vcpu_e500->mas2 &= MAS2_EPN; - vcpu_e500->mas2 |= vcpu_e500->mas4 & MAS2_ATTRIB_MASK; - vcpu_e500->mas7_3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3; + vcpu->arch.shared->mas1 = + (vcpu->arch.shared->mas6 & MAS6_SPID0) + | (vcpu->arch.shared->mas6 & (MAS6_SAS ? MAS1_TS : 0)) + | (vcpu->arch.shared->mas4 & MAS4_TSIZED(~0)); + vcpu->arch.shared->mas2 &= MAS2_EPN; + vcpu->arch.shared->mas2 |= vcpu->arch.shared->mas4 & + MAS2_ATTRIB_MASK; + vcpu->arch.shared->mas7_3 &= MAS3_U0 | MAS3_U1 | + MAS3_U2 | MAS3_U3; } kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS); @@ -929,19 +934,19 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) struct kvm_book3e_206_tlb_entry *gtlbe; int tlbsel, esel; - tlbsel = get_tlb_tlbsel(vcpu_e500); - esel = get_tlb_esel(vcpu_e500, tlbsel); + tlbsel = get_tlb_tlbsel(vcpu); + esel = get_tlb_esel(vcpu, tlbsel); gtlbe = get_entry(vcpu_e500, tlbsel, esel); if (get_tlb_v(gtlbe)) inval_gtlbe_on_host(vcpu_e500, tlbsel, esel); - gtlbe->mas1 = vcpu_e500->mas1; - gtlbe->mas2 = vcpu_e500->mas2; - gtlbe->mas7_3 = vcpu_e500->mas7_3; + gtlbe->mas1 = vcpu->arch.shared->mas1; + gtlbe->mas2 = vcpu->arch.shared->mas2; + gtlbe->mas7_3 = vcpu->arch.shared->mas7_3; - trace_kvm_gtlb_write(vcpu_e500->mas0, gtlbe->mas1, gtlbe->mas2, + trace_kvm_gtlb_write(vcpu->arch.shared->mas0, gtlbe->mas1, gtlbe->mas2, (u32)gtlbe->mas7_3, (u32)(gtlbe->mas7_3 >> 32)); /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */ diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h index 2c296407e759..5c6d2d7bf058 100644 --- a/arch/powerpc/kvm/e500_tlb.h +++ b/arch/powerpc/kvm/e500_tlb.h @@ -121,38 +121,33 @@ static inline unsigned int get_cur_pr(struct kvm_vcpu *vcpu) return !!(vcpu->arch.shared->msr & MSR_PR); } -static inline unsigned int get_cur_spid( - const struct kvmppc_vcpu_e500 *vcpu_e500) +static inline unsigned int get_cur_spid(const struct kvm_vcpu *vcpu) { - return (vcpu_e500->mas6 >> 16) & 0xff; + return (vcpu->arch.shared->mas6 >> 16) & 0xff; } -static inline unsigned int get_cur_sas( - const struct kvmppc_vcpu_e500 *vcpu_e500) +static inline unsigned int get_cur_sas(const struct kvm_vcpu *vcpu) { - return vcpu_e500->mas6 & 0x1; + return vcpu->arch.shared->mas6 & 0x1; } -static inline unsigned int get_tlb_tlbsel( - const struct kvmppc_vcpu_e500 *vcpu_e500) +static inline unsigned int get_tlb_tlbsel(const struct kvm_vcpu *vcpu) { /* * Manual says that tlbsel has 2 bits wide. * Since we only have two TLBs, only lower bit is used. */ - return (vcpu_e500->mas0 >> 28) & 0x1; + return (vcpu->arch.shared->mas0 >> 28) & 0x1; } -static inline unsigned int get_tlb_nv_bit( - const struct kvmppc_vcpu_e500 *vcpu_e500) +static inline unsigned int get_tlb_nv_bit(const struct kvm_vcpu *vcpu) { - return vcpu_e500->mas0 & 0xfff; + return vcpu->arch.shared->mas0 & 0xfff; } -static inline unsigned int get_tlb_esel_bit( - const struct kvmppc_vcpu_e500 *vcpu_e500) +static inline unsigned int get_tlb_esel_bit(const struct kvm_vcpu *vcpu) { - return (vcpu_e500->mas0 >> 16) & 0xfff; + return (vcpu->arch.shared->mas0 >> 16) & 0xfff; } static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index b6df56dd93ba..bda052e2264b 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -162,7 +162,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) case OP_TRAP_64: kvmppc_core_queue_program(vcpu, SRR1_PROGTRAP); #else - kvmppc_core_queue_program(vcpu, vcpu->arch.esr | ESR_PTR); + kvmppc_core_queue_program(vcpu, + vcpu->arch.shared->esr | ESR_PTR); #endif advance = 0; break; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 7411bdd8ff6f..d02e4c84e213 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -66,7 +66,7 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) vcpu->arch.magic_page_pa = param1; vcpu->arch.magic_page_ea = param2; - r2 = KVM_MAGIC_FEAT_SR; + r2 = KVM_MAGIC_FEAT_SR | KVM_MAGIC_FEAT_MAS0_TO_SPRG7; r = HC_EV_SUCCESS; break; -- cgit v1.2.3 From dfd4d47e9a71c5a35eb67a44cd311efbe1846b7e Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Thu, 17 Nov 2011 12:39:59 +0000 Subject: KVM: PPC: booke: Improve timer register emulation Decrementers are now properly driven by TCR/TSR, and the guest has full read/write access to these registers. The decrementer keeps ticking (and setting the TSR bit) regardless of whether the interrupts are enabled with TCR. The decrementer stops at zero, rather than going negative. Decrementers (and FITs, once implemented) are delivered as level-triggered interrupts -- dequeued when the TSR bit is cleared, not on delivery. Signed-off-by: Liu Yu [scottwood@freescale.com: significant changes] Signed-off-by: Scott Wood Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_host.h | 2 +- arch/powerpc/include/asm/kvm_ppc.h | 1 + arch/powerpc/kvm/book3s.c | 8 +++++ arch/powerpc/kvm/booke.c | 67 ++++++++++++++++++++++++++++--------- arch/powerpc/kvm/booke.h | 4 +++ arch/powerpc/kvm/booke_emulate.c | 11 ++++-- arch/powerpc/kvm/emulate.c | 59 ++++++++++++++++---------------- arch/powerpc/kvm/powerpc.c | 33 +++++++----------- 8 files changed, 115 insertions(+), 70 deletions(-) (limited to 'arch/powerpc/kvm/powerpc.c') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index bfd0c9912da5..66c75cddaec6 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -330,7 +330,7 @@ struct kvm_vcpu_arch { u32 tbl; u32 tbu; u32 tcr; - u32 tsr; + ulong tsr; /* we need to perform set/clr_bits() which requires ulong */ u32 ivor[64]; ulong ivpr; u32 pvr; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index c089927f64cc..5192c2e70583 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -66,6 +66,7 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run, extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb); +extern void kvmppc_decrementer_func(unsigned long data); extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu); /* Core-specific hooks */ diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 5398744cd773..6bf7e0582c5a 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -515,3 +515,11 @@ out: mutex_unlock(&kvm->slots_lock); return r; } + +void kvmppc_decrementer_func(unsigned long data) +{ + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; + + kvmppc_core_queue_dec(vcpu); + kvm_vcpu_kick(vcpu); +} diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 50803dd0b8f2..9e41f45d07ed 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -252,9 +252,11 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, allowed = vcpu->arch.shared->msr & MSR_ME; msr_mask = 0; break; - case BOOKE_IRQPRIO_EXTERNAL: case BOOKE_IRQPRIO_DECREMENTER: case BOOKE_IRQPRIO_FIT: + keep_irq = true; + /* fall through */ + case BOOKE_IRQPRIO_EXTERNAL: allowed = vcpu->arch.shared->msr & MSR_EE; allowed = allowed && !crit; msr_mask = MSR_CE|MSR_ME|MSR_DE; @@ -282,11 +284,26 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, return allowed; } +static void update_timer_ints(struct kvm_vcpu *vcpu) +{ + if ((vcpu->arch.tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS)) + kvmppc_core_queue_dec(vcpu); + else + kvmppc_core_dequeue_dec(vcpu); +} + static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu) { unsigned long *pending = &vcpu->arch.pending_exceptions; unsigned int priority; + if (vcpu->requests) { + if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) { + smp_mb(); + update_timer_ints(vcpu); + } + } + priority = __ffs(*pending); while (priority <= BOOKE_IRQPRIO_MAX) { if (kvmppc_booke_irqprio_deliver(vcpu, priority)) @@ -749,25 +766,16 @@ static int set_sregs_base(struct kvm_vcpu *vcpu, vcpu->arch.shared->esr = sregs->u.e.esr; vcpu->arch.shared->dar = sregs->u.e.dear; vcpu->arch.vrsave = sregs->u.e.vrsave; - vcpu->arch.tcr = sregs->u.e.tcr; + kvmppc_set_tcr(vcpu, sregs->u.e.tcr); - if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC) + if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC) { vcpu->arch.dec = sregs->u.e.dec; - - kvmppc_emulate_dec(vcpu); + kvmppc_emulate_dec(vcpu); + } if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) { - /* - * FIXME: existing KVM timer handling is incomplete. - * TSR cannot be read by the guest, and its value in - * vcpu->arch is always zero. For now, just handle - * the case where the caller is trying to inject a - * decrementer interrupt. - */ - - if ((sregs->u.e.tsr & TSR_DIS) && - (vcpu->arch.tcr & TCR_DIE)) - kvmppc_core_queue_dec(vcpu); + vcpu->arch.tsr = sregs->u.e.tsr; + update_timer_ints(vcpu); } return 0; @@ -923,6 +931,33 @@ void kvmppc_core_destroy_vm(struct kvm *kvm) { } +void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr) +{ + vcpu->arch.tcr = new_tcr; + update_timer_ints(vcpu); +} + +void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits) +{ + set_bits(tsr_bits, &vcpu->arch.tsr); + smp_wmb(); + kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); + kvm_vcpu_kick(vcpu); +} + +void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits) +{ + clear_bits(tsr_bits, &vcpu->arch.tsr); + update_timer_ints(vcpu); +} + +void kvmppc_decrementer_func(unsigned long data) +{ + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; + + kvmppc_set_tsr_bits(vcpu, TSR_DIS); +} + int __init kvmppc_booke_init(void) { unsigned long ivor[16]; diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index 8e1fe33d64e5..2fe202705a3f 100644 --- a/arch/powerpc/kvm/booke.h +++ b/arch/powerpc/kvm/booke.h @@ -55,6 +55,10 @@ extern unsigned long kvmppc_booke_handlers; void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr); void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr); +void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr); +void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits); +void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits); + int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int inst, int *advance); int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt); diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index bae9288ac1e1..3e652da36534 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c @@ -13,6 +13,7 @@ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * Copyright IBM Corp. 2008 + * Copyright 2011 Freescale Semiconductor, Inc. * * Authors: Hollis Blanchard */ @@ -115,10 +116,10 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) case SPRN_DBSR: vcpu->arch.dbsr &= ~spr_val; break; case SPRN_TSR: - vcpu->arch.tsr &= ~spr_val; break; + kvmppc_clr_tsr_bits(vcpu, spr_val); + break; case SPRN_TCR: - vcpu->arch.tcr = spr_val; - kvmppc_emulate_dec(vcpu); + kvmppc_set_tcr(vcpu, spr_val); break; /* Note: SPRG4-7 are user-readable. These values are @@ -209,6 +210,10 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr1); break; case SPRN_DBSR: kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbsr); break; + case SPRN_TSR: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.tsr); break; + case SPRN_TCR: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.tcr); break; case SPRN_IVOR0: kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]); diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index bda052e2264b..968f40101883 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -13,6 +13,7 @@ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * Copyright IBM Corp. 2007 + * Copyright 2011 Freescale Semiconductor, Inc. * * Authors: Hollis Blanchard */ @@ -69,57 +70,55 @@ #define OP_STH 44 #define OP_STHU 45 -#ifdef CONFIG_PPC_BOOK3S -static int kvmppc_dec_enabled(struct kvm_vcpu *vcpu) -{ - return 1; -} -#else -static int kvmppc_dec_enabled(struct kvm_vcpu *vcpu) -{ - /* On BOOKE, DEC = 0 is as good as decrementer not enabled */ - return (vcpu->arch.tcr & TCR_DIE) && vcpu->arch.dec; -} -#endif - void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) { unsigned long dec_nsec; unsigned long long dec_time; pr_debug("mtDEC: %x\n", vcpu->arch.dec); + hrtimer_try_to_cancel(&vcpu->arch.dec_timer); + #ifdef CONFIG_PPC_BOOK3S /* mtdec lowers the interrupt line when positive. */ kvmppc_core_dequeue_dec(vcpu); /* POWER4+ triggers a dec interrupt if the value is < 0 */ if (vcpu->arch.dec & 0x80000000) { - hrtimer_try_to_cancel(&vcpu->arch.dec_timer); kvmppc_core_queue_dec(vcpu); return; } #endif - if (kvmppc_dec_enabled(vcpu)) { - /* The decrementer ticks at the same rate as the timebase, so - * that's how we convert the guest DEC value to the number of - * host ticks. */ - - hrtimer_try_to_cancel(&vcpu->arch.dec_timer); - dec_time = vcpu->arch.dec; - dec_time *= 1000; - do_div(dec_time, tb_ticks_per_usec); - dec_nsec = do_div(dec_time, NSEC_PER_SEC); - hrtimer_start(&vcpu->arch.dec_timer, - ktime_set(dec_time, dec_nsec), HRTIMER_MODE_REL); - vcpu->arch.dec_jiffies = get_tb(); - } else { - hrtimer_try_to_cancel(&vcpu->arch.dec_timer); - } + +#ifdef CONFIG_BOOKE + /* On BOOKE, DEC = 0 is as good as decrementer not enabled */ + if (vcpu->arch.dec == 0) + return; +#endif + + /* + * The decrementer ticks at the same rate as the timebase, so + * that's how we convert the guest DEC value to the number of + * host ticks. + */ + + dec_time = vcpu->arch.dec; + dec_time *= 1000; + do_div(dec_time, tb_ticks_per_usec); + dec_nsec = do_div(dec_time, NSEC_PER_SEC); + hrtimer_start(&vcpu->arch.dec_timer, + ktime_set(dec_time, dec_nsec), HRTIMER_MODE_REL); + vcpu->arch.dec_jiffies = get_tb(); } u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb) { u64 jd = tb - vcpu->arch.dec_jiffies; + +#ifdef CONFIG_BOOKE + if (vcpu->arch.dec < jd) + return 0; +#endif + return vcpu->arch.dec - jd; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index d02e4c84e213..fd8d3b16eaf3 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -39,7 +39,8 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { return !(v->arch.shared->msr & MSR_WE) || - !!(v->arch.pending_exceptions); + !!(v->arch.pending_exceptions) || + v->requests; } int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) @@ -311,18 +312,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) return kvmppc_core_pending_dec(vcpu); } -static void kvmppc_decrementer_func(unsigned long data) -{ - struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; - - kvmppc_core_queue_dec(vcpu); - - if (waitqueue_active(vcpu->arch.wqp)) { - wake_up_interruptible(vcpu->arch.wqp); - vcpu->stat.halt_wakeup++; - } -} - /* * low level hrtimer wake routine. Because this runs in hardirq context * we schedule a tasklet to do the real work. @@ -567,6 +556,16 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) return r; } +void kvm_vcpu_kick(struct kvm_vcpu *vcpu) +{ + if (waitqueue_active(&vcpu->wq)) { + wake_up_interruptible(vcpu->arch.wqp); + vcpu->stat.halt_wakeup++; + } else if (vcpu->cpu != -1) { + smp_send_reschedule(vcpu->cpu); + } +} + int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { if (irq->irq == KVM_INTERRUPT_UNSET) { @@ -575,13 +574,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) } kvmppc_core_queue_external(vcpu, irq); - - if (waitqueue_active(vcpu->arch.wqp)) { - wake_up_interruptible(vcpu->arch.wqp); - vcpu->stat.halt_wakeup++; - } else if (vcpu->cpu != -1) { - smp_send_reschedule(vcpu->cpu); - } + kvm_vcpu_kick(vcpu); return 0; } -- cgit v1.2.3 From ae21216bece0a623d09980c120b9c98790a860b9 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 9 Dec 2011 15:20:46 +0100 Subject: KVM: PPC: align vcpu_kick with x86 Our vcpu kick implementation differs a bit from x86 which resulted in us not disabling preemption during the kick. Get it a bit closer to what x86 does. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/kvm/powerpc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kvm/powerpc.c') diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index fd8d3b16eaf3..e1ef4d6d972a 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -558,12 +558,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) void kvm_vcpu_kick(struct kvm_vcpu *vcpu) { + int me; + int cpu = vcpu->cpu; + + me = get_cpu(); if (waitqueue_active(&vcpu->wq)) { wake_up_interruptible(vcpu->arch.wqp); vcpu->stat.halt_wakeup++; - } else if (vcpu->cpu != -1) { + } else if (cpu != me && cpu != -1) { smp_send_reschedule(vcpu->cpu); } + put_cpu(); } int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) -- cgit v1.2.3 From 4e72dbe13528394a413889d73e5025dbdf6cab70 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 12 Dec 2011 12:24:48 +0000 Subject: KVM: PPC: Make wakeups work again for Book3S HV guests When commit f43fdc15fa ("KVM: PPC: booke: Improve timer register emulation") factored out some code in arch/powerpc/kvm/powerpc.c into a new helper function, kvm_vcpu_kick(), an error crept in which causes Book3s HV guest vcpus to stall. This fixes it. On POWER7 machines, guest vcpus are grouped together into virtual CPU cores that share a single waitqueue, so it's important to use vcpu->arch.wqp rather than &vcpu->wq. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/kvm/powerpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kvm/powerpc.c') diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index e1ef4d6d972a..4f85ac32258a 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -562,7 +562,7 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu) int cpu = vcpu->cpu; me = get_cpu(); - if (waitqueue_active(&vcpu->wq)) { + if (waitqueue_active(vcpu->arch.wqp)) { wake_up_interruptible(vcpu->arch.wqp); vcpu->stat.halt_wakeup++; } else if (cpu != me && cpu != -1) { -- cgit v1.2.3 From 342d3db763f2621ed4546ebf8f6c61cb29d7fbdb Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 12 Dec 2011 12:38:05 +0000 Subject: KVM: PPC: Implement MMU notifiers for Book3S HV guests This adds the infrastructure to enable us to page out pages underneath a Book3S HV guest, on processors that support virtualized partition memory, that is, POWER7. Instead of pinning all the guest's pages, we now look in the host userspace Linux page tables to find the mapping for a given guest page. Then, if the userspace Linux PTE gets invalidated, kvm_unmap_hva() gets called for that address, and we replace all the guest HPTEs that refer to that page with absent HPTEs, i.e. ones with the valid bit clear and the HPTE_V_ABSENT bit set, which will cause an HDSI when the guest tries to access them. Finally, the page fault handler is extended to reinstantiate the guest HPTE when the guest tries to access a page which has been paged out. Since we can't intercept the guest DSI and ISI interrupts on PPC970, we still have to pin all the guest pages on PPC970. We have a new flag, kvm->arch.using_mmu_notifiers, that indicates whether we can page guest pages out. If it is not set, the MMU notifier callbacks do nothing and everything operates as before. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_book3s.h | 4 + arch/powerpc/include/asm/kvm_book3s_64.h | 31 ++++ arch/powerpc/include/asm/kvm_host.h | 16 ++ arch/powerpc/include/asm/reg.h | 3 + arch/powerpc/kvm/Kconfig | 1 + arch/powerpc/kvm/book3s_64_mmu_hv.c | 290 ++++++++++++++++++++++++++++--- arch/powerpc/kvm/book3s_hv.c | 25 +-- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 140 +++++++++++---- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 49 ++++++ arch/powerpc/kvm/powerpc.c | 3 + arch/powerpc/mm/hugetlbpage.c | 2 + 11 files changed, 499 insertions(+), 65 deletions(-) (limited to 'arch/powerpc/kvm/powerpc.c') diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 3a9e51f43397..9240cebf8bad 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -143,6 +143,10 @@ extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu); extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); +extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, + unsigned long *rmap, long pte_index, int realmode); +extern void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep, + unsigned long pte_index); extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr, unsigned long *nb_ret); extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr); diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 79dc37fb86b5..c21e46da4a3b 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -136,6 +136,37 @@ static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type) return (wimg & (HPTE_R_W | HPTE_R_I)) == io_type; } +/* + * Lock and read a linux PTE. If it's present and writable, atomically + * set dirty and referenced bits and return the PTE, otherwise return 0. + */ +static inline pte_t kvmppc_read_update_linux_pte(pte_t *p) +{ + pte_t pte, tmp; + + /* wait until _PAGE_BUSY is clear then set it atomically */ + __asm__ __volatile__ ( + "1: ldarx %0,0,%3\n" + " andi. %1,%0,%4\n" + " bne- 1b\n" + " ori %1,%0,%4\n" + " stdcx. %1,0,%3\n" + " bne- 1b" + : "=&r" (pte), "=&r" (tmp), "=m" (*p) + : "r" (p), "i" (_PAGE_BUSY) + : "cc"); + + if (pte_present(pte)) { + pte = pte_mkyoung(pte); + if (pte_write(pte)) + pte = pte_mkdirty(pte); + } + + *p = pte; /* clears _PAGE_BUSY */ + + return pte; +} + /* Return HPTE cache control bits corresponding to Linux pte bits */ static inline unsigned long hpte_cache_bits(unsigned long pte_val) { diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 937cacaaf236..968f3aa61cd1 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -32,6 +32,7 @@ #include #include #include +#include #define KVM_MAX_VCPUS NR_CPUS #define KVM_MAX_VCORES NR_CPUS @@ -44,6 +45,19 @@ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #endif +#ifdef CONFIG_KVM_BOOK3S_64_HV +#include + +#define KVM_ARCH_WANT_MMU_NOTIFIER + +struct kvm; +extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); +extern int kvm_age_hva(struct kvm *kvm, unsigned long hva); +extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); +extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); + +#endif + /* We don't currently support large pages. */ #define KVM_HPAGE_GFN_SHIFT(x) 0 #define KVM_NR_PAGE_SIZES 1 @@ -212,6 +226,7 @@ struct kvm_arch { struct kvmppc_rma_info *rma; unsigned long vrma_slb_v; int rma_setup_done; + int using_mmu_notifiers; struct list_head spapr_tce_tables; spinlock_t slot_phys_lock; unsigned long *slot_phys[KVM_MEM_SLOTS_NUM]; @@ -460,6 +475,7 @@ struct kvm_vcpu_arch { struct list_head run_list; struct task_struct *run_task; struct kvm_run *kvm_run; + pgd_t *pgdir; #endif }; diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 16efb3151c20..35c9309bf038 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -495,6 +495,9 @@ #define SPRN_SPRG7 0x117 /* Special Purpose Register General 7 */ #define SPRN_SRR0 0x01A /* Save/Restore Register 0 */ #define SPRN_SRR1 0x01B /* Save/Restore Register 1 */ +#define SRR1_ISI_NOPT 0x40000000 /* ISI: Not found in hash */ +#define SRR1_ISI_N_OR_G 0x10000000 /* ISI: Access is no-exec or G */ +#define SRR1_ISI_PROT 0x08000000 /* ISI: Other protection fault */ #define SRR1_WAKEMASK 0x00380000 /* reason for wakeup */ #define SRR1_WAKESYSERR 0x00300000 /* System error */ #define SRR1_WAKEEE 0x00200000 /* External interrupt */ diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 78133deb4b64..8f64709ae331 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -69,6 +69,7 @@ config KVM_BOOK3S_64 config KVM_BOOK3S_64_HV bool "KVM support for POWER7 and PPC970 using hypervisor mode in host" depends on KVM_BOOK3S_64 + select MMU_NOTIFIER ---help--- Support running unmodified book3s_64 guest kernels in virtual machines on POWER7 and PPC970 processors that have diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 2d31519b8637..83761dd8a924 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -281,8 +281,9 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn, } /* - * We come here on a H_ENTER call from the guest when - * we don't have the requested page pinned already. + * We come here on a H_ENTER call from the guest when we are not + * using mmu notifiers and we don't have the requested page pinned + * already. */ long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, long pte_index, unsigned long pteh, unsigned long ptel) @@ -292,6 +293,9 @@ long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, struct kvm_memory_slot *memslot; long ret; + if (kvm->arch.using_mmu_notifiers) + goto do_insert; + psize = hpte_page_size(pteh, ptel); if (!psize) return H_PARAMETER; @@ -309,9 +313,12 @@ long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, return H_PARAMETER; } - preempt_disable(); + do_insert: + /* Protect linux PTE lookup from page table destruction */ + rcu_read_lock_sched(); /* this disables preemption too */ + vcpu->arch.pgdir = current->mm->pgd; ret = kvmppc_h_enter(vcpu, flags, pte_index, pteh, ptel); - preempt_enable(); + rcu_read_unlock_sched(); if (ret == H_TOO_HARD) { /* this can't happen */ pr_err("KVM: Oops, kvmppc_h_enter returned too hard!\n"); @@ -487,12 +494,16 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned long ea, unsigned long dsisr) { struct kvm *kvm = vcpu->kvm; - unsigned long *hptep, hpte[3]; - unsigned long psize; - unsigned long gfn; + unsigned long *hptep, hpte[3], r; + unsigned long mmu_seq, psize, pte_size; + unsigned long gfn, hva, pfn; struct kvm_memory_slot *memslot; + unsigned long *rmap; struct revmap_entry *rev; - long index; + struct page *page, *pages[1]; + long index, ret, npages; + unsigned long is_io; + struct vm_area_struct *vma; /* * Real-mode code has already searched the HPT and found the @@ -510,7 +521,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, cpu_relax(); hpte[0] = hptep[0] & ~HPTE_V_HVLOCK; hpte[1] = hptep[1]; - hpte[2] = rev->guest_rpte; + hpte[2] = r = rev->guest_rpte; asm volatile("lwsync" : : : "memory"); hptep[0] = hpte[0]; preempt_enable(); @@ -520,8 +531,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, return RESUME_GUEST; /* Translate the logical address and get the page */ - psize = hpte_page_size(hpte[0], hpte[1]); - gfn = hpte_rpn(hpte[2], psize); + psize = hpte_page_size(hpte[0], r); + gfn = hpte_rpn(r, psize); memslot = gfn_to_memslot(kvm, gfn); /* No memslot means it's an emulated MMIO region */ @@ -531,8 +542,228 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, dsisr & DSISR_ISSTORE); } - /* should never get here otherwise */ - return -EFAULT; + if (!kvm->arch.using_mmu_notifiers) + return -EFAULT; /* should never get here */ + + /* used to check for invalidations in progress */ + mmu_seq = kvm->mmu_notifier_seq; + smp_rmb(); + + is_io = 0; + pfn = 0; + page = NULL; + pte_size = PAGE_SIZE; + hva = gfn_to_hva_memslot(memslot, gfn); + npages = get_user_pages_fast(hva, 1, 1, pages); + if (npages < 1) { + /* Check if it's an I/O mapping */ + down_read(¤t->mm->mmap_sem); + vma = find_vma(current->mm, hva); + if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end && + (vma->vm_flags & VM_PFNMAP)) { + pfn = vma->vm_pgoff + + ((hva - vma->vm_start) >> PAGE_SHIFT); + pte_size = psize; + is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot)); + } + up_read(¤t->mm->mmap_sem); + if (!pfn) + return -EFAULT; + } else { + page = pages[0]; + if (PageHuge(page)) { + page = compound_head(page); + pte_size <<= compound_order(page); + } + pfn = page_to_pfn(page); + } + + ret = -EFAULT; + if (psize > pte_size) + goto out_put; + + /* Check WIMG vs. the actual page we're accessing */ + if (!hpte_cache_flags_ok(r, is_io)) { + if (is_io) + return -EFAULT; + /* + * Allow guest to map emulated device memory as + * uncacheable, but actually make it cacheable. + */ + r = (r & ~(HPTE_R_W|HPTE_R_I|HPTE_R_G)) | HPTE_R_M; + } + + /* Set the HPTE to point to pfn */ + r = (r & ~(HPTE_R_PP0 - pte_size)) | (pfn << PAGE_SHIFT); + ret = RESUME_GUEST; + preempt_disable(); + while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) + cpu_relax(); + if ((hptep[0] & ~HPTE_V_HVLOCK) != hpte[0] || hptep[1] != hpte[1] || + rev->guest_rpte != hpte[2]) + /* HPTE has been changed under us; let the guest retry */ + goto out_unlock; + hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; + + rmap = &memslot->rmap[gfn - memslot->base_gfn]; + lock_rmap(rmap); + + /* Check if we might have been invalidated; let the guest retry if so */ + ret = RESUME_GUEST; + if (mmu_notifier_retry(vcpu, mmu_seq)) { + unlock_rmap(rmap); + goto out_unlock; + } + kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0); + + hptep[1] = r; + eieio(); + hptep[0] = hpte[0]; + asm volatile("ptesync" : : : "memory"); + preempt_enable(); + if (page) + SetPageDirty(page); + + out_put: + if (page) + put_page(page); + return ret; + + out_unlock: + hptep[0] &= ~HPTE_V_HVLOCK; + preempt_enable(); + goto out_put; +} + +static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, + int (*handler)(struct kvm *kvm, unsigned long *rmapp, + unsigned long gfn)) +{ + int ret; + int retval = 0; + struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; + + slots = kvm_memslots(kvm); + kvm_for_each_memslot(memslot, slots) { + unsigned long start = memslot->userspace_addr; + unsigned long end; + + end = start + (memslot->npages << PAGE_SHIFT); + if (hva >= start && hva < end) { + gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; + + ret = handler(kvm, &memslot->rmap[gfn_offset], + memslot->base_gfn + gfn_offset); + retval |= ret; + } + } + + return retval; +} + +static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, + unsigned long gfn) +{ + struct revmap_entry *rev = kvm->arch.revmap; + unsigned long h, i, j; + unsigned long *hptep; + unsigned long ptel, psize; + + for (;;) { + while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmapp)) + cpu_relax(); + if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { + __clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmapp); + break; + } + + /* + * To avoid an ABBA deadlock with the HPTE lock bit, + * we have to unlock the rmap chain before locking the HPTE. + * Thus we remove the first entry, unlock the rmap chain, + * lock the HPTE and then check that it is for the + * page we're unmapping before changing it to non-present. + */ + i = *rmapp & KVMPPC_RMAP_INDEX; + j = rev[i].forw; + if (j == i) { + /* chain is now empty */ + j = 0; + } else { + /* remove i from chain */ + h = rev[i].back; + rev[h].forw = j; + rev[j].back = h; + rev[i].forw = rev[i].back = i; + j |= KVMPPC_RMAP_PRESENT; + } + smp_wmb(); + *rmapp = j | (1ul << KVMPPC_RMAP_REF_BIT); + + /* Now lock, check and modify the HPTE */ + hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); + while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) + cpu_relax(); + ptel = rev[i].guest_rpte; + psize = hpte_page_size(hptep[0], ptel); + if ((hptep[0] & HPTE_V_VALID) && + hpte_rpn(ptel, psize) == gfn) { + kvmppc_invalidate_hpte(kvm, hptep, i); + hptep[0] |= HPTE_V_ABSENT; + } + hptep[0] &= ~HPTE_V_HVLOCK; + } + return 0; +} + +int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) +{ + if (kvm->arch.using_mmu_notifiers) + kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); + return 0; +} + +static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, + unsigned long gfn) +{ + if (!kvm->arch.using_mmu_notifiers) + return 0; + if (!(*rmapp & KVMPPC_RMAP_REFERENCED)) + return 0; + kvm_unmap_rmapp(kvm, rmapp, gfn); + while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmapp)) + cpu_relax(); + __clear_bit(KVMPPC_RMAP_REF_BIT, rmapp); + __clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmapp); + return 1; +} + +int kvm_age_hva(struct kvm *kvm, unsigned long hva) +{ + if (!kvm->arch.using_mmu_notifiers) + return 0; + return kvm_handle_hva(kvm, hva, kvm_age_rmapp); +} + +static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, + unsigned long gfn) +{ + return !!(*rmapp & KVMPPC_RMAP_REFERENCED); +} + +int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) +{ + if (!kvm->arch.using_mmu_notifiers) + return 0; + return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp); +} + +void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) +{ + if (!kvm->arch.using_mmu_notifiers) + return; + kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); } void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, @@ -540,31 +771,42 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, { struct kvm_memory_slot *memslot; unsigned long gfn = gpa >> PAGE_SHIFT; - struct page *page; - unsigned long psize, offset; + struct page *page, *pages[1]; + int npages; + unsigned long hva, psize, offset; unsigned long pa; unsigned long *physp; memslot = gfn_to_memslot(kvm, gfn); if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) return NULL; - physp = kvm->arch.slot_phys[memslot->id]; - if (!physp) - return NULL; - physp += gfn - memslot->base_gfn; - pa = *physp; - if (!pa) { - if (kvmppc_get_guest_page(kvm, gfn, memslot, PAGE_SIZE) < 0) + if (!kvm->arch.using_mmu_notifiers) { + physp = kvm->arch.slot_phys[memslot->id]; + if (!physp) return NULL; + physp += gfn - memslot->base_gfn; pa = *physp; + if (!pa) { + if (kvmppc_get_guest_page(kvm, gfn, memslot, + PAGE_SIZE) < 0) + return NULL; + pa = *physp; + } + page = pfn_to_page(pa >> PAGE_SHIFT); + } else { + hva = gfn_to_hva_memslot(memslot, gfn); + npages = get_user_pages_fast(hva, 1, 1, pages); + if (npages < 1) + return NULL; + page = pages[0]; } - page = pfn_to_page(pa >> PAGE_SHIFT); psize = PAGE_SIZE; if (PageHuge(page)) { page = compound_head(page); psize <<= compound_order(page); } - get_page(page); + if (!kvm->arch.using_mmu_notifiers) + get_page(page); offset = gpa & (psize - 1); if (nb_ret) *nb_ret = psize - offset; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 45aabb9a527f..86c4191cb75b 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -326,19 +326,19 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, break; } /* - * We get this if the guest accesses a page which it thinks - * it has mapped but which is not actually present, because - * it is for an emulated I/O device. - * Any other HDSI interrupt has been handled already. + * We get these next two if the guest accesses a page which it thinks + * it has mapped but which is not actually present, either because + * it is for an emulated I/O device or because the corresonding + * host page has been paged out. Any other HDSI/HISI interrupts + * have been handled already. */ case BOOK3S_INTERRUPT_H_DATA_STORAGE: r = kvmppc_book3s_hv_page_fault(run, vcpu, vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); break; case BOOK3S_INTERRUPT_H_INST_STORAGE: - kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_INST_STORAGE, - vcpu->arch.shregs.msr & 0x58000000); - r = RESUME_GUEST; + r = kvmppc_book3s_hv_page_fault(run, vcpu, + kvmppc_get_pc(vcpu), 0); break; /* * This occurs if the guest executes an illegal instruction. @@ -867,6 +867,7 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) flush_altivec_to_thread(current); flush_vsx_to_thread(current); vcpu->arch.wqp = &vcpu->arch.vcore->wq; + vcpu->arch.pgdir = current->mm->pgd; do { r = kvmppc_run_vcpu(run, vcpu); @@ -1090,9 +1091,9 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, unsigned long *phys; /* Allocate a slot_phys array */ - npages = mem->memory_size >> PAGE_SHIFT; phys = kvm->arch.slot_phys[mem->slot]; - if (!phys) { + if (!kvm->arch.using_mmu_notifiers && !phys) { + npages = mem->memory_size >> PAGE_SHIFT; phys = vzalloc(npages * sizeof(unsigned long)); if (!phys) return -ENOMEM; @@ -1298,6 +1299,7 @@ int kvmppc_core_init_vm(struct kvm *kvm) } kvm->arch.lpcr = lpcr; + kvm->arch.using_mmu_notifiers = !!cpu_has_feature(CPU_FTR_ARCH_206); spin_lock_init(&kvm->arch.slot_phys_lock); return 0; } @@ -1306,8 +1308,9 @@ void kvmppc_core_destroy_vm(struct kvm *kvm) { unsigned long i; - for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) - unpin_slot(kvm, i); + if (!kvm->arch.using_mmu_notifiers) + for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) + unpin_slot(kvm, i); if (kvm->arch.rma) { kvm_release_rma(kvm->arch.rma); diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index a5176dc37e7e..81d16ed9767d 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -58,7 +58,7 @@ static void *real_vmalloc_addr(void *x) * Add this HPTE into the chain for the real page. * Must be called with the chain locked; it unlocks the chain. */ -static void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, +void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, unsigned long *rmap, long pte_index, int realmode) { struct revmap_entry *head, *tail; @@ -83,6 +83,7 @@ static void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, smp_wmb(); *rmap = i | KVMPPC_RMAP_REFERENCED | KVMPPC_RMAP_PRESENT; /* unlock */ } +EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain); /* Remove this HPTE from the chain for a real page */ static void remove_revmap_chain(struct kvm *kvm, long pte_index, @@ -118,12 +119,33 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index, unlock_rmap(rmap); } +static pte_t lookup_linux_pte(struct kvm_vcpu *vcpu, unsigned long hva, + unsigned long *pte_sizep) +{ + pte_t *ptep; + unsigned long ps = *pte_sizep; + unsigned int shift; + + ptep = find_linux_pte_or_hugepte(vcpu->arch.pgdir, hva, &shift); + if (!ptep) + return __pte(0); + if (shift) + *pte_sizep = 1ul << shift; + else + *pte_sizep = PAGE_SIZE; + if (ps > *pte_sizep) + return __pte(0); + if (!pte_present(*ptep)) + return __pte(0); + return kvmppc_read_update_linux_pte(ptep); +} + long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, long pte_index, unsigned long pteh, unsigned long ptel) { struct kvm *kvm = vcpu->kvm; unsigned long i, pa, gpa, gfn, psize; - unsigned long slot_fn; + unsigned long slot_fn, hva; unsigned long *hpte; struct revmap_entry *rev; unsigned long g_ptel = ptel; @@ -131,6 +153,8 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, unsigned long *physp, pte_size; unsigned long is_io; unsigned long *rmap; + pte_t pte; + unsigned long mmu_seq; bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING; psize = hpte_page_size(pteh, ptel); @@ -138,11 +162,16 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, return H_PARAMETER; pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID); + /* used later to detect if we might have been invalidated */ + mmu_seq = kvm->mmu_notifier_seq; + smp_rmb(); + /* Find the memslot (if any) for this address */ gpa = (ptel & HPTE_R_RPN) & ~(psize - 1); gfn = gpa >> PAGE_SHIFT; memslot = builtin_gfn_to_memslot(kvm, gfn); pa = 0; + is_io = ~0ul; rmap = NULL; if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) { /* PPC970 can't do emulated MMIO */ @@ -160,19 +189,31 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, slot_fn = gfn - memslot->base_gfn; rmap = &memslot->rmap[slot_fn]; - physp = kvm->arch.slot_phys[memslot->id]; - if (!physp) - return H_PARAMETER; - physp += slot_fn; - if (realmode) - physp = real_vmalloc_addr(physp); - pa = *physp; - if (!pa) - return H_TOO_HARD; - is_io = pa & (HPTE_R_I | HPTE_R_W); - pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK); - pa &= PAGE_MASK; - + if (!kvm->arch.using_mmu_notifiers) { + physp = kvm->arch.slot_phys[memslot->id]; + if (!physp) + return H_PARAMETER; + physp += slot_fn; + if (realmode) + physp = real_vmalloc_addr(physp); + pa = *physp; + if (!pa) + return H_TOO_HARD; + is_io = pa & (HPTE_R_I | HPTE_R_W); + pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK); + pa &= PAGE_MASK; + } else { + /* Translate to host virtual address */ + hva = gfn_to_hva_memslot(memslot, gfn); + + /* Look up the Linux PTE for the backing page */ + pte_size = psize; + pte = lookup_linux_pte(vcpu, hva, &pte_size); + if (pte_present(pte)) { + is_io = hpte_cache_bits(pte_val(pte)); + pa = pte_pfn(pte) << PAGE_SHIFT; + } + } if (pte_size < psize) return H_PARAMETER; if (pa && pte_size > psize) @@ -180,10 +221,14 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, ptel &= ~(HPTE_R_PP0 - psize); ptel |= pa; - pteh |= HPTE_V_VALID; + + if (pa) + pteh |= HPTE_V_VALID; + else + pteh |= HPTE_V_ABSENT; /* Check WIMG */ - if (!hpte_cache_flags_ok(ptel, is_io)) { + if (is_io != ~0ul && !hpte_cache_flags_ok(ptel, is_io)) { if (is_io) return H_PARAMETER; /* @@ -194,6 +239,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, ptel |= HPTE_R_M; } + /* Find and lock the HPTEG slot to use */ do_insert: if (pte_index >= HPT_NPTE) return H_PARAMETER; @@ -253,7 +299,17 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, if (realmode) rmap = real_vmalloc_addr(rmap); lock_rmap(rmap); - kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index, realmode); + /* Check for pending invalidations under the rmap chain lock */ + if (kvm->arch.using_mmu_notifiers && + mmu_notifier_retry(vcpu, mmu_seq)) { + /* inval in progress, write a non-present HPTE */ + pteh |= HPTE_V_ABSENT; + pteh &= ~HPTE_V_VALID; + unlock_rmap(rmap); + } else { + kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index, + realmode); + } } hpte[1] = ptel; @@ -516,6 +572,23 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, return H_SUCCESS; } +void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep, + unsigned long pte_index) +{ + unsigned long rb; + + hptep[0] &= ~HPTE_V_VALID; + rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index); + while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) + cpu_relax(); + asm volatile("ptesync" : : : "memory"); + asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync" + : : "r" (rb), "r" (kvm->arch.lpid)); + asm volatile("ptesync" : : : "memory"); + kvm->arch.tlbie_lock = 0; +} +EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte); + static int slb_base_page_shift[4] = { 24, /* 16M */ 16, /* 64k */ @@ -605,15 +678,15 @@ EXPORT_SYMBOL(kvmppc_hv_find_lock_hpte); /* * Called in real mode to check whether an HPTE not found fault - * is due to accessing an emulated MMIO page. + * is due to accessing a paged-out page or an emulated MMIO page. * Returns a possibly modified status (DSISR) value if not * (i.e. pass the interrupt to the guest), * -1 to pass the fault up to host kernel mode code, -2 to do that - * and also load the instruction word, + * and also load the instruction word (for MMIO emulation), * or 0 if we should make the guest retry the access. */ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, - unsigned long slb_v, unsigned int status) + unsigned long slb_v, unsigned int status, bool data) { struct kvm *kvm = vcpu->kvm; long int index; @@ -624,6 +697,7 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, unsigned long pp, key; valid = HPTE_V_VALID | HPTE_V_ABSENT; + index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid); if (index < 0) return status; /* there really was no HPTE */ @@ -645,22 +719,28 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, /* Check access permissions to the page */ pp = gr & (HPTE_R_PP0 | HPTE_R_PP); key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS; - if (status & DSISR_ISSTORE) { + status &= ~DSISR_NOHPTE; /* DSISR_NOHPTE == SRR1_ISI_NOPT */ + if (!data) { + if (gr & (HPTE_R_N | HPTE_R_G)) + return status | SRR1_ISI_N_OR_G; + if (!hpte_read_permission(pp, slb_v & key)) + return status | SRR1_ISI_PROT; + } else if (status & DSISR_ISSTORE) { /* check write permission */ if (!hpte_write_permission(pp, slb_v & key)) - goto protfault; + return status | DSISR_PROTFAULT; } else { if (!hpte_read_permission(pp, slb_v & key)) - goto protfault; + return status | DSISR_PROTFAULT; } /* Check storage key, if applicable */ - if (vcpu->arch.shregs.msr & MSR_DR) { + if (data && (vcpu->arch.shregs.msr & MSR_DR)) { unsigned int perm = hpte_get_skey_perm(gr, vcpu->arch.amr); if (status & DSISR_ISSTORE) perm >>= 1; if (perm & 1) - return (status & ~DSISR_NOHPTE) | DSISR_KEYFAULT; + return status | DSISR_KEYFAULT; } /* Save HPTE info for virtual-mode handler */ @@ -669,11 +749,11 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, vcpu->arch.pgfault_hpte[0] = v; vcpu->arch.pgfault_hpte[1] = r; - if (vcpu->arch.shregs.msr & MSR_IR) + /* Check the storage key to see if it is possibly emulated MMIO */ + if (data && (vcpu->arch.shregs.msr & MSR_IR) && + (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) == + (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) return -2; /* MMIO emulation - load instr word */ return -1; /* send fault up to host kernel mode */ - - protfault: - return (status & ~DSISR_NOHPTE) | DSISR_PROTFAULT; } diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index d07b64d5f37e..7d4990665d00 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -621,6 +621,8 @@ BEGIN_FTR_SECTION /* If this is a page table miss then see if it's theirs or ours */ cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE beq kvmppc_hdsi + cmpwi r12, BOOK3S_INTERRUPT_H_INST_STORAGE + beq kvmppc_hisi END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* See if this is a leftover HDEC interrupt */ @@ -1125,6 +1127,7 @@ kvmppc_hdsi: /* Search the hash table. */ mr r3, r9 /* vcpu pointer */ + li r7, 1 /* data fault */ bl .kvmppc_hpte_hv_fault ld r9, HSTATE_KVM_VCPU(r13) ld r10, VCPU_PC(r9) @@ -1181,6 +1184,52 @@ kvmppc_hdsi: stb r0, HSTATE_IN_GUEST(r13) b nohpte_cont +/* + * Similarly for an HISI, reflect it to the guest as an ISI unless + * it is an HPTE not found fault for a page that we have paged out. + */ +kvmppc_hisi: + andis. r0, r11, SRR1_ISI_NOPT@h + beq 1f + andi. r0, r11, MSR_IR /* instruction relocation enabled? */ + beq 3f + clrrdi r0, r10, 28 + PPC_SLBFEE_DOT(r5, r0) /* if so, look up SLB */ + bne 1f /* if no SLB entry found */ +4: + /* Search the hash table. */ + mr r3, r9 /* vcpu pointer */ + mr r4, r10 + mr r6, r11 + li r7, 0 /* instruction fault */ + bl .kvmppc_hpte_hv_fault + ld r9, HSTATE_KVM_VCPU(r13) + ld r10, VCPU_PC(r9) + ld r11, VCPU_MSR(r9) + li r12, BOOK3S_INTERRUPT_H_INST_STORAGE + cmpdi r3, 0 /* retry the instruction */ + beq 6f + cmpdi r3, -1 /* handle in kernel mode */ + beq nohpte_cont + + /* Synthesize an ISI for the guest */ + mr r11, r3 +1: mtspr SPRN_SRR0, r10 + mtspr SPRN_SRR1, r11 + li r10, BOOK3S_INTERRUPT_INST_STORAGE + li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ + rotldi r11, r11, 63 +6: ld r7, VCPU_CTR(r9) + lwz r8, VCPU_XER(r9) + mtctr r7 + mtxer r8 + mr r4, r9 + b fast_guest_return + +3: ld r6, VCPU_KVM(r9) /* not relocated, use VRMA */ + ld r5, KVM_VRMA_SLB_V(r6) + b 4b + /* * Try to handle an hcall in real mode. * Returns to the guest if we handle it, or continues on up to diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 4f85ac32258a..06e955b5837e 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -245,6 +245,9 @@ int kvm_dev_ioctl_check_extension(long ext) if (cpu_has_feature(CPU_FTR_ARCH_201)) r = 2; break; + case KVM_CAP_SYNC_MMU: + r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0; + break; #endif default: r = 0; diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index a8b3cc7d90fe..f348c3d90404 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -103,6 +104,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift *shift = hugepd_shift(*hpdp); return hugepte_offset(hpdp, ea, pdshift); } +EXPORT_SYMBOL_GPL(find_linux_pte_or_hugepte); pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) { -- cgit v1.2.3 From 03cdab5340b423ec88fc18eb158a62a8a7b94d7f Mon Sep 17 00:00:00 2001 From: Matt Evans Date: Tue, 6 Dec 2011 21:19:42 +0000 Subject: KVM: PPC: Fix vcpu_create dereference before validity check. Fix usage of vcpu struct before check that it's actually valid. Signed-off-by: Matt Evans Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/kvm/powerpc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kvm/powerpc.c') diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 06e955b5837e..25171037a36c 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -289,9 +289,10 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) { struct kvm_vcpu *vcpu; vcpu = kvmppc_core_vcpu_create(kvm, id); - vcpu->arch.wqp = &vcpu->wq; - if (!IS_ERR(vcpu)) + if (!IS_ERR(vcpu)) { + vcpu->arch.wqp = &vcpu->wq; kvmppc_create_vcpu_debugfs(vcpu, id); + } return vcpu; } -- cgit v1.2.3 From b5434032fcfd7490c6453feb397fb781762b6f09 Mon Sep 17 00:00:00 2001 From: Matt Evans Date: Wed, 7 Dec 2011 16:55:57 +0000 Subject: KVM: PPC: Add KVM_CAP_NR_VCPUS and KVM_CAP_MAX_VCPUS PPC KVM lacks these two capabilities, and as such a userland system must assume a max of 4 VCPUs (following api.txt). With these, a userland can determine a more realistic limit. Signed-off-by: Matt Evans Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/kvm/powerpc.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'arch/powerpc/kvm/powerpc.c') diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 25171037a36c..f4380cb264e0 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -249,6 +249,22 @@ int kvm_dev_ioctl_check_extension(long ext) r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0; break; #endif + case KVM_CAP_NR_VCPUS: + /* + * Recommending a number of CPUs is somewhat arbitrary; we + * return the number of present CPUs for -HV (since a host + * will have secondary threads "offline"), and for other KVM + * implementations just count online CPUs. + */ +#ifdef CONFIG_KVM_BOOK3S_64_HV + r = num_present_cpus(); +#else + r = num_online_cpus(); +#endif + break; + case KVM_CAP_MAX_VCPUS: + r = KVM_MAX_VCPUS; + break; default: r = 0; break; -- cgit v1.2.3 From e24ed81fedd551e80378be62fa0b0532480ea7d4 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 14 Sep 2011 10:02:41 +0200 Subject: KVM: PPC: Add generic single register ioctls Right now we transfer a static struct every time we want to get or set registers. Unfortunately, over time we realize that there are more of these than we thought of before and the extensibility and flexibility of transferring a full struct every time is limited. So this is a new approach to the problem. With these new ioctls, we can get and set a single register that is identified by an ID. This allows for very precise and limited transmittal of data. When we later realize that it's a better idea to shove over multiple registers at once, we can reuse most of the infrastructure and simply implement a GET_MANY_REGS / SET_MANY_REGS interface. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- Documentation/virtual/kvm/api.txt | 46 ++++++++++++++++++++++++++++++++++++--- arch/powerpc/kvm/powerpc.c | 41 ++++++++++++++++++++++++++++++++++ include/linux/kvm.h | 35 +++++++++++++++++++++++++++++ 3 files changed, 119 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kvm/powerpc.c') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index bcd45d5afcab..e0c75dcb2ca3 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1533,7 +1533,7 @@ following algorithm: Some guests configure the LINT1 NMI input to cause a panic, aiding in debugging. -4.64 KVM_S390_UCAS_MAP +4.65 KVM_S390_UCAS_MAP Capability: KVM_CAP_S390_UCONTROL Architectures: s390 @@ -1552,7 +1552,7 @@ This ioctl maps the memory at "user_addr" with the length "length" to the vcpu's address space starting at "vcpu_addr". All parameters need to be alligned by 1 megabyte. -4.65 KVM_S390_UCAS_UNMAP +4.66 KVM_S390_UCAS_UNMAP Capability: KVM_CAP_S390_UCONTROL Architectures: s390 @@ -1571,7 +1571,7 @@ This ioctl unmaps the memory in the vcpu's address space starting at "vcpu_addr" with the length "length". The field "user_addr" is ignored. All parameters need to be alligned by 1 megabyte. -4.66 KVM_S390_VCPU_FAULT +4.67 KVM_S390_VCPU_FAULT Capability: KVM_CAP_S390_UCONTROL Architectures: s390 @@ -1587,6 +1587,46 @@ table upfront. This is useful to handle validity intercepts for user controlled virtual machines to fault in the virtual cpu's lowcore pages prior to calling the KVM_RUN ioctl. +4.68 KVM_SET_ONE_REG + +Capability: KVM_CAP_ONE_REG +Architectures: all +Type: vcpu ioctl +Parameters: struct kvm_one_reg (in) +Returns: 0 on success, negative value on failure + +struct kvm_one_reg { + __u64 id; + __u64 addr; +}; + +Using this ioctl, a single vcpu register can be set to a specific value +defined by user space with the passed in struct kvm_one_reg, where id +refers to the register identifier as described below and addr is a pointer +to a variable with the respective size. There can be architecture agnostic +and architecture specific registers. Each have their own range of operation +and their own constants and width. To keep track of the implemented +registers, find a list below: + + Arch | Register | Width (bits) + | | + +4.69 KVM_GET_ONE_REG + +Capability: KVM_CAP_ONE_REG +Architectures: all +Type: vcpu ioctl +Parameters: struct kvm_one_reg (in and out) +Returns: 0 on success, negative value on failure + +This ioctl allows to receive the value of a single register implemented +in a vcpu. The register to read is indicated by the "id" field of the +kvm_one_reg struct passed in. On success, the register value can be found +at the memory location pointed to by "addr". + +The list of registers accessible using this interface is identical to the +list in 4.64. + 5. The kvm_run structure Application code obtains a pointer to the kvm_run structure by diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index f4380cb264e0..089c61bf0e16 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -217,6 +217,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PPC_UNSET_IRQ: case KVM_CAP_PPC_IRQ_LEVEL: case KVM_CAP_ENABLE_CAP: + case KVM_CAP_ONE_REG: r = 1; break; #ifndef CONFIG_KVM_BOOK3S_64_HV @@ -645,6 +646,32 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, return r; } +static int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, + struct kvm_one_reg *reg) +{ + int r = -EINVAL; + + switch (reg->id) { + default: + break; + } + + return r; +} + +static int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, + struct kvm_one_reg *reg) +{ + int r = -EINVAL; + + switch (reg->id) { + default: + break; + } + + return r; +} + int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { @@ -684,6 +711,20 @@ long kvm_arch_vcpu_ioctl(struct file *filp, break; } + case KVM_SET_ONE_REG: + case KVM_GET_ONE_REG: + { + struct kvm_one_reg reg; + r = -EFAULT; + if (copy_from_user(®, argp, sizeof(reg))) + goto out; + if (ioctl == KVM_SET_ONE_REG) + r = kvm_vcpu_ioctl_set_one_reg(vcpu, ®); + else + r = kvm_vcpu_ioctl_get_one_reg(vcpu, ®); + break; + } + #ifdef CONFIG_KVM_E500 case KVM_DIRTY_TLB: { struct kvm_dirty_tlb dirty; diff --git a/include/linux/kvm.h b/include/linux/kvm.h index fa029ced4bd5..4f7a9fb8ab06 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -582,6 +582,7 @@ struct kvm_ppc_pvinfo { #define KVM_CAP_MAX_VCPUS 66 /* returns max vcpus per vm */ #define KVM_CAP_PPC_PAPR 68 #define KVM_CAP_SW_TLB 69 +#define KVM_CAP_ONE_REG 70 #define KVM_CAP_S390_GMAP 71 #define KVM_CAP_TSC_DEADLINE_TIMER 72 #define KVM_CAP_S390_UCONTROL 73 @@ -680,6 +681,37 @@ struct kvm_dirty_tlb { __u32 num_dirty; }; +/* Available with KVM_CAP_ONE_REG */ + +#define KVM_REG_ARCH_MASK 0xff00000000000000ULL +#define KVM_REG_GENERIC 0x0000000000000000ULL + +/* + * Architecture specific registers are to be defined in arch headers and + * ORed with the arch identifier. + */ +#define KVM_REG_PPC 0x1000000000000000ULL +#define KVM_REG_X86 0x2000000000000000ULL +#define KVM_REG_IA64 0x3000000000000000ULL +#define KVM_REG_ARM 0x4000000000000000ULL +#define KVM_REG_S390 0x5000000000000000ULL + +#define KVM_REG_SIZE_SHIFT 52 +#define KVM_REG_SIZE_MASK 0x00f0000000000000ULL +#define KVM_REG_SIZE_U8 0x0000000000000000ULL +#define KVM_REG_SIZE_U16 0x0010000000000000ULL +#define KVM_REG_SIZE_U32 0x0020000000000000ULL +#define KVM_REG_SIZE_U64 0x0030000000000000ULL +#define KVM_REG_SIZE_U128 0x0040000000000000ULL +#define KVM_REG_SIZE_U256 0x0050000000000000ULL +#define KVM_REG_SIZE_U512 0x0060000000000000ULL +#define KVM_REG_SIZE_U1024 0x0070000000000000ULL + +struct kvm_one_reg { + __u64 id; + __u64 addr; +}; + /* * ioctls for VM fds */ @@ -819,6 +851,9 @@ struct kvm_s390_ucas_mapping { #define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma) /* Available with KVM_CAP_SW_TLB */ #define KVM_DIRTY_TLB _IOW(KVMIO, 0xaa, struct kvm_dirty_tlb) +/* Available with KVM_CAP_ONE_REG */ +#define KVM_GET_ONE_REG _IOW(KVMIO, 0xab, struct kvm_one_reg) +#define KVM_SET_ONE_REG _IOW(KVMIO, 0xac, struct kvm_one_reg) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) -- cgit v1.2.3 From 1022fc3d3bfaca09d5d6bfcc93a168de16840814 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 14 Sep 2011 21:45:23 +0200 Subject: KVM: PPC: Add support for explicit HIOR setting Until now, we always set HIOR based on the PVR, but this is just wrong. Instead, we should be setting HIOR explicitly, so user space can decide what the initial HIOR value is - just like on real hardware. We keep the old PVR based way around for backwards compatibility, but once user space uses the SET_ONE_REG based method, we drop the PVR logic. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- Documentation/virtual/kvm/api.txt | 1 + arch/powerpc/include/asm/kvm.h | 2 ++ arch/powerpc/include/asm/kvm_book3s.h | 2 ++ arch/powerpc/kvm/book3s_pr.c | 6 ++++-- arch/powerpc/kvm/powerpc.c | 13 +++++++++++++ include/linux/kvm.h | 1 + 6 files changed, 23 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kvm/powerpc.c') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index e0c75dcb2ca3..59a38264a0ed 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1610,6 +1610,7 @@ registers, find a list below: Arch | Register | Width (bits) | | + PPC | KVM_REG_PPC_HIOR | 64 4.69 KVM_GET_ONE_REG diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index 663c57f87165..f41adcda1468 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -331,4 +331,6 @@ struct kvm_book3e_206_tlb_params { __u32 reserved[8]; }; +#define KVM_REG_PPC_HIOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1) + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 3c3edee672aa..aa795ccef294 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -90,6 +90,8 @@ struct kvmppc_vcpu_book3s { #endif int context_id[SID_CONTEXTS]; + bool hior_explicit; /* HIOR is set by ioctl, not PVR */ + struct hlist_head hpte_hash_pte[HPTEG_HASH_NUM_PTE]; struct hlist_head hpte_hash_pte_long[HPTEG_HASH_NUM_PTE_LONG]; struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE]; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index c193625d5289..00efda6dc0e2 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -157,14 +157,16 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) #ifdef CONFIG_PPC_BOOK3S_64 if ((pvr >= 0x330000) && (pvr < 0x70330000)) { kvmppc_mmu_book3s_64_init(vcpu); - to_book3s(vcpu)->hior = 0xfff00000; + if (!to_book3s(vcpu)->hior_explicit) + to_book3s(vcpu)->hior = 0xfff00000; to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL; vcpu->arch.cpu_type = KVM_CPU_3S_64; } else #endif { kvmppc_mmu_book3s_32_init(vcpu); - to_book3s(vcpu)->hior = 0; + if (!to_book3s(vcpu)->hior_explicit) + to_book3s(vcpu)->hior = 0; to_book3s(vcpu)->msr_mask = 0xffffffffULL; vcpu->arch.cpu_type = KVM_CPU_3S_32; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 089c61bf0e16..59852091b389 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -212,6 +212,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PPC_BOOKE_SREGS: #else case KVM_CAP_PPC_SEGSTATE: + case KVM_CAP_PPC_HIOR: case KVM_CAP_PPC_PAPR: #endif case KVM_CAP_PPC_UNSET_IRQ: @@ -652,6 +653,11 @@ static int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, int r = -EINVAL; switch (reg->id) { +#ifdef CONFIG_PPC_BOOK3S + case KVM_REG_PPC_HIOR: + r = put_user(to_book3s(vcpu)->hior, (u64 __user *)reg->addr); + break; +#endif default: break; } @@ -665,6 +671,13 @@ static int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, int r = -EINVAL; switch (reg->id) { +#ifdef CONFIG_PPC_BOOK3S + case KVM_ONE_REG_PPC_HIOR: + r = get_user(to_book3s(vcpu)->hior, (u64 __user *)reg->addr); + if (!r) + to_book3s(vcpu)->hior_explicit = true; + break; +#endif default: break; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 4f7a9fb8ab06..acbe42939089 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -580,6 +580,7 @@ struct kvm_ppc_pvinfo { #define KVM_CAP_PPC_SMT 64 #define KVM_CAP_PPC_RMA 65 #define KVM_CAP_MAX_VCPUS 66 /* returns max vcpus per vm */ +#define KVM_CAP_PPC_HIOR 67 #define KVM_CAP_PPC_PAPR 68 #define KVM_CAP_SW_TLB 69 #define KVM_CAP_ONE_REG 70 -- cgit v1.2.3 From 31f3438eca2fc90dc892e0e9963ba4b93a2c8383 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 12 Dec 2011 12:26:50 +0000 Subject: KVM: PPC: Move kvm_vcpu_ioctl_[gs]et_one_reg down to platform-specific code This moves the get/set_one_reg implementation down from powerpc.c into booke.c, book3s_pr.c and book3s_hv.c. This avoids #ifdefs in C code, but more importantly, it fixes a bug on Book3s HV where we were accessing beyond the end of the kvm_vcpu struct (via the to_book3s() macro) and corrupting memory, causing random crashes and file corruption. On Book3s HV we only accept setting the HIOR to zero, since the guest runs in supervisor mode and its vectors are never offset from zero. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf [agraf update to apply on top of changed ONE_REG patches] Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_ppc.h | 3 +++ arch/powerpc/kvm/book3s_hv.c | 36 ++++++++++++++++++++++++++++++++++++ arch/powerpc/kvm/book3s_pr.c | 32 ++++++++++++++++++++++++++++++++ arch/powerpc/kvm/booke.c | 10 ++++++++++ arch/powerpc/kvm/powerpc.c | 38 -------------------------------------- 5 files changed, 81 insertions(+), 38 deletions(-) (limited to 'arch/powerpc/kvm/powerpc.c') diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index fb70414db90c..a61b5b5047d6 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -176,6 +176,9 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); +int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg); +int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg); + void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid); #ifdef CONFIG_KVM_BOOK3S_64_HV diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index fdc804c83938..3580db8a2326 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -398,6 +398,42 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, return 0; } +int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +{ + int r = -EINVAL; + + switch (reg->id) { + case KVM_REG_PPC_HIOR: + r = put_user(0, (u64 __user *)reg->addr); + break; + default: + break; + } + + return r; +} + +int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +{ + int r = -EINVAL; + + switch (reg->id) { + case KVM_REG_PPC_HIOR: + { + u64 hior; + /* Only allow this to be set to zero */ + r = get_user(hior, (u64 __user *)reg->addr); + if (!r && (hior != 0)) + r = -EINVAL; + break; + } + default: + break; + } + + return r; +} + int kvmppc_core_check_processor_compat(void) { if (cpu_has_feature(CPU_FTR_HVMODE)) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 00efda6dc0e2..ee222ec7c95c 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -874,6 +874,38 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, return 0; } +int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +{ + int r = -EINVAL; + + switch (reg->id) { + case KVM_REG_PPC_HIOR: + r = put_user(to_book3s(vcpu)->hior, (u64 __user *)reg->addr); + break; + default: + break; + } + + return r; +} + +int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +{ + int r = -EINVAL; + + switch (reg->id) { + case KVM_REG_PPC_HIOR: + r = get_user(to_book3s(vcpu)->hior, (u64 __user *)reg->addr); + if (!r) + to_book3s(vcpu)->hior_explicit = true; + break; + default: + break; + } + + return r; +} + int kvmppc_core_check_processor_compat(void) { return 0; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 9e41f45d07ed..ee9e1ee9c858 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -887,6 +887,16 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, return kvmppc_core_set_sregs(vcpu, sregs); } +int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +{ + return -EINVAL; +} + +int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +{ + return -EINVAL; +} + int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { return -ENOTSUPP; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 59852091b389..e23270779ff5 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -647,44 +647,6 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, return r; } -static int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, - struct kvm_one_reg *reg) -{ - int r = -EINVAL; - - switch (reg->id) { -#ifdef CONFIG_PPC_BOOK3S - case KVM_REG_PPC_HIOR: - r = put_user(to_book3s(vcpu)->hior, (u64 __user *)reg->addr); - break; -#endif - default: - break; - } - - return r; -} - -static int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, - struct kvm_one_reg *reg) -{ - int r = -EINVAL; - - switch (reg->id) { -#ifdef CONFIG_PPC_BOOK3S - case KVM_ONE_REG_PPC_HIOR: - r = get_user(to_book3s(vcpu)->hior, (u64 __user *)reg->addr); - if (!r) - to_book3s(vcpu)->hior_explicit = true; - break; -#endif - default: - break; - } - - return r; -} - int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { -- cgit v1.2.3 From b3c5d3c2a49602c370de6d02fdb923bc48cd1abc Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sat, 7 Jan 2012 02:07:38 +0100 Subject: KVM: PPC: Rename MMIO register identifiers We need the KVM_REG namespace for generic register settings now, so let's rename the existing users to something different, enabling us to reuse the namespace for more visible interfaces. While at it, also move these private constants to a private header. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm.h | 7 ------- arch/powerpc/include/asm/kvm_host.h | 8 ++++++++ arch/powerpc/kvm/book3s_paired_singles.c | 9 ++++++--- arch/powerpc/kvm/powerpc.c | 18 +++++++++--------- 4 files changed, 23 insertions(+), 19 deletions(-) (limited to 'arch/powerpc/kvm/powerpc.c') diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index f41adcda1468..b921c3f48928 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -269,13 +269,6 @@ struct kvm_guest_debug_arch { struct kvm_sync_regs { }; -#define KVM_REG_MASK 0x001f -#define KVM_REG_EXT_MASK 0xffe0 -#define KVM_REG_GPR 0x0000 -#define KVM_REG_FPR 0x0020 -#define KVM_REG_QPR 0x0040 -#define KVM_REG_FQPR 0x0060 - #define KVM_INTERRUPT_SET -1U #define KVM_INTERRUPT_UNSET -2U #define KVM_INTERRUPT_SET_LEVEL -3U diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 1cb6e522485b..af438b1e8a3c 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -485,4 +485,12 @@ struct kvm_vcpu_arch { #define KVMPPC_VCPU_BUSY_IN_HOST 1 #define KVMPPC_VCPU_RUNNABLE 2 +/* Values for vcpu->arch.io_gpr */ +#define KVM_MMIO_REG_MASK 0x001f +#define KVM_MMIO_REG_EXT_MASK 0xffe0 +#define KVM_MMIO_REG_GPR 0x0000 +#define KVM_MMIO_REG_FPR 0x0020 +#define KVM_MMIO_REG_QPR 0x0040 +#define KVM_MMIO_REG_FQPR 0x0060 + #endif /* __POWERPC_KVM_HOST_H__ */ diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c index 7b0ee96c1bed..e70ef2d86431 100644 --- a/arch/powerpc/kvm/book3s_paired_singles.c +++ b/arch/powerpc/kvm/book3s_paired_singles.c @@ -196,7 +196,8 @@ static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_inject_pf(vcpu, addr, false); goto done_load; } else if (r == EMULATE_DO_MMIO) { - emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FPR | rs, len, 1); + emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FPR | rs, + len, 1); goto done_load; } @@ -286,11 +287,13 @@ static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_inject_pf(vcpu, addr, false); goto done_load; } else if ((r == EMULATE_DO_MMIO) && w) { - emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FPR | rs, 4, 1); + emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FPR | rs, + 4, 1); vcpu->arch.qpr[rs] = tmp[1]; goto done_load; } else if (r == EMULATE_DO_MMIO) { - emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FQPR | rs, 8, 1); + emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FQPR | rs, + 8, 1); goto done_load; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index e23270779ff5..0e21d155eea7 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -448,20 +448,20 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); - switch (vcpu->arch.io_gpr & KVM_REG_EXT_MASK) { - case KVM_REG_GPR: + switch (vcpu->arch.io_gpr & KVM_MMIO_REG_EXT_MASK) { + case KVM_MMIO_REG_GPR: kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); break; - case KVM_REG_FPR: - vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr; + case KVM_MMIO_REG_FPR: + vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr; break; #ifdef CONFIG_PPC_BOOK3S - case KVM_REG_QPR: - vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr; + case KVM_MMIO_REG_QPR: + vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr; break; - case KVM_REG_FQPR: - vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr; - vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr; + case KVM_MMIO_REG_FQPR: + vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr; + vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr; break; #endif default: -- cgit v1.2.3 From db3fe4eb45f3555d91a7124e18cf3a2f2a30eb90 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Wed, 8 Feb 2012 13:02:18 +0900 Subject: KVM: Introduce kvm_memory_slot::arch and move lpage_info into it Some members of kvm_memory_slot are not used by every architecture. This patch is the first step to make this difference clear by introducing kvm_memory_slot::arch; lpage_info is moved into it. Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/ia64/include/asm/kvm_host.h | 3 ++ arch/ia64/kvm/kvm-ia64.c | 10 ++++++ arch/powerpc/include/asm/kvm_host.h | 3 ++ arch/powerpc/kvm/powerpc.c | 10 ++++++ arch/s390/include/asm/kvm_host.h | 3 ++ arch/s390/kvm/kvm-s390.c | 10 ++++++ arch/x86/include/asm/kvm_host.h | 9 +++++ arch/x86/kvm/mmu.c | 2 +- arch/x86/kvm/x86.c | 59 +++++++++++++++++++++++++++++++ include/linux/kvm_host.h | 11 +++--- virt/kvm/kvm_main.c | 70 +++++-------------------------------- 11 files changed, 122 insertions(+), 68 deletions(-) (limited to 'arch/powerpc/kvm/powerpc.c') diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index 2689ee54a1c9..e35b3a84a40b 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h @@ -459,6 +459,9 @@ struct kvm_sal_data { unsigned long boot_gp; }; +struct kvm_arch_memory_slot { +}; + struct kvm_arch { spinlock_t dirty_log_lock; diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 8ca7261e7b3d..d8ddbba6fe7d 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1571,6 +1571,16 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } +void kvm_arch_free_memslot(struct kvm_memory_slot *free, + struct kvm_memory_slot *dont) +{ +} + +int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +{ + return 0; +} + int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_memory_slot old, diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 1843d5d2a3be..52eb9c1f4fe0 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -213,6 +213,9 @@ struct revmap_entry { #define KVMPPC_PAGE_WRITETHRU HPTE_R_W /* 0x40 */ #define KVMPPC_GOT_PAGE 0x80 +struct kvm_arch_memory_slot { +}; + struct kvm_arch { #ifdef CONFIG_KVM_BOOK3S_64_HV unsigned long hpt_virt; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 0e21d155eea7..00d7e345b3fe 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -281,6 +281,16 @@ long kvm_arch_dev_ioctl(struct file *filp, return -EINVAL; } +void kvm_arch_free_memslot(struct kvm_memory_slot *free, + struct kvm_memory_slot *dont) +{ +} + +int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +{ + return 0; +} + int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_memory_slot old, diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index e6304268ea28..7343872890a2 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -245,6 +245,9 @@ struct kvm_vm_stat { u32 remote_tlb_flush; }; +struct kvm_arch_memory_slot { +}; + struct kvm_arch{ struct sca_block *sca; debug_info_t *dbf; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index cf3c0a91d046..17ad69d596fd 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -814,6 +814,16 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } +void kvm_arch_free_memslot(struct kvm_memory_slot *free, + struct kvm_memory_slot *dont) +{ +} + +int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +{ + return 0; +} + /* Section: memory related */ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c24125cd0c63..74c9edf2bb18 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -483,6 +483,15 @@ struct kvm_vcpu_arch { } osvw; }; +struct kvm_lpage_info { + unsigned long rmap_pde; + int write_count; +}; + +struct kvm_arch_memory_slot { + struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; +}; + struct kvm_arch { unsigned int n_used_mmu_pages; unsigned int n_requested_mmu_pages; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 37e7f100a0e0..ff053ca32303 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -689,7 +689,7 @@ static struct kvm_lpage_info *lpage_info_slot(gfn_t gfn, unsigned long idx; idx = gfn_to_index(gfn, slot->base_gfn, level); - return &slot->lpage_info[level - 2][idx]; + return &slot->arch.lpage_info[level - 2][idx]; } static void account_shadowed(struct kvm *kvm, gfn_t gfn) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3df0b7a140b0..ca74c1dadf3a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6239,6 +6239,65 @@ void kvm_arch_destroy_vm(struct kvm *kvm) put_page(kvm->arch.ept_identity_pagetable); } +void kvm_arch_free_memslot(struct kvm_memory_slot *free, + struct kvm_memory_slot *dont) +{ + int i; + + for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { + if (!dont || free->arch.lpage_info[i] != dont->arch.lpage_info[i]) { + vfree(free->arch.lpage_info[i]); + free->arch.lpage_info[i] = NULL; + } + } +} + +int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +{ + int i; + + for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { + unsigned long ugfn; + int lpages; + int level = i + 2; + + lpages = gfn_to_index(slot->base_gfn + npages - 1, + slot->base_gfn, level) + 1; + + slot->arch.lpage_info[i] = + vzalloc(lpages * sizeof(*slot->arch.lpage_info[i])); + if (!slot->arch.lpage_info[i]) + goto out_free; + + if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1)) + slot->arch.lpage_info[i][0].write_count = 1; + if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1)) + slot->arch.lpage_info[i][lpages - 1].write_count = 1; + ugfn = slot->userspace_addr >> PAGE_SHIFT; + /* + * If the gfn and userspace address are not aligned wrt each + * other, or if explicitly asked to, disable large page + * support for this slot + */ + if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) || + !kvm_largepages_enabled()) { + unsigned long j; + + for (j = 0; j < lpages; ++j) + slot->arch.lpage_info[i][j].write_count = 1; + } + } + + return 0; + +out_free: + for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { + vfree(slot->arch.lpage_info[i]); + slot->arch.lpage_info[i] = NULL; + } + return -ENOMEM; +} + int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_memory_slot old, diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7a08496b974a..355e44555c39 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -171,11 +171,6 @@ static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu) */ #define KVM_MEM_MAX_NR_PAGES ((1UL << 31) - 1) -struct kvm_lpage_info { - unsigned long rmap_pde; - int write_count; -}; - struct kvm_memory_slot { gfn_t base_gfn; unsigned long npages; @@ -184,7 +179,7 @@ struct kvm_memory_slot { unsigned long *dirty_bitmap; unsigned long *dirty_bitmap_head; unsigned long nr_dirty_pages; - struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; + struct kvm_arch_memory_slot arch; unsigned long userspace_addr; int user_alloc; int id; @@ -376,6 +371,9 @@ int kvm_set_memory_region(struct kvm *kvm, int __kvm_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, int user_alloc); +void kvm_arch_free_memslot(struct kvm_memory_slot *free, + struct kvm_memory_slot *dont); +int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages); int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_memory_slot old, @@ -385,6 +383,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, struct kvm_memory_slot old, int user_alloc); +bool kvm_largepages_enabled(void); void kvm_disable_largepages(void); void kvm_arch_flush_shadow(struct kvm *kvm); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a30447c5eb4a..8340e0e62034 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -535,21 +535,13 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) static void kvm_free_physmem_slot(struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { - int i; - if (!dont || free->rmap != dont->rmap) vfree(free->rmap); if (!dont || free->dirty_bitmap != dont->dirty_bitmap) kvm_destroy_dirty_bitmap(free); - - for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { - if (!dont || free->lpage_info[i] != dont->lpage_info[i]) { - vfree(free->lpage_info[i]); - free->lpage_info[i] = NULL; - } - } + kvm_arch_free_memslot(free, dont); free->npages = 0; free->rmap = NULL; @@ -685,53 +677,6 @@ void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new) slots->generation++; } -#ifndef CONFIG_S390 -static int create_lpage_info(struct kvm_memory_slot *slot, unsigned long npages) -{ - int i; - - for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { - unsigned long ugfn; - int lpages; - int level = i + 2; - - lpages = gfn_to_index(slot->base_gfn + npages - 1, - slot->base_gfn, level) + 1; - - slot->lpage_info[i] = vzalloc(lpages * sizeof(*slot->lpage_info[i])); - if (!slot->lpage_info[i]) - goto out_free; - - if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1)) - slot->lpage_info[i][0].write_count = 1; - if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1)) - slot->lpage_info[i][lpages - 1].write_count = 1; - ugfn = slot->userspace_addr >> PAGE_SHIFT; - /* - * If the gfn and userspace address are not aligned wrt each - * other, or if explicitly asked to, disable large page - * support for this slot - */ - if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) || - !largepages_enabled) { - unsigned long j; - - for (j = 0; j < lpages; ++j) - slot->lpage_info[i][j].write_count = 1; - } - } - - return 0; - -out_free: - for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { - vfree(slot->lpage_info[i]); - slot->lpage_info[i] = NULL; - } - return -ENOMEM; -} -#endif /* not defined CONFIG_S390 */ - /* * Allocate some memory and give it an address in the guest physical address * space. @@ -819,10 +764,9 @@ int __kvm_set_memory_region(struct kvm *kvm, new.rmap = vzalloc(npages * sizeof(*new.rmap)); if (!new.rmap) goto out_free; - - if (create_lpage_info(&new, npages)) - goto out_free; #endif /* not defined CONFIG_S390 */ + if (kvm_arch_create_memslot(&new, npages)) + goto out_free; } /* Allocate page dirty bitmap if needed */ @@ -880,8 +824,7 @@ int __kvm_set_memory_region(struct kvm *kvm, if (!npages) { new.rmap = NULL; new.dirty_bitmap = NULL; - for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) - new.lpage_info[i] = NULL; + memset(&new.arch, 0, sizeof(new.arch)); } update_memslots(slots, &new); @@ -968,6 +911,11 @@ out: return r; } +bool kvm_largepages_enabled(void) +{ + return largepages_enabled; +} + void kvm_disable_largepages(void) { largepages_enabled = false; -- cgit v1.2.3