From e48d53a91f6e90873e21a5ca5e8c0d7a9f8936a4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 6 Apr 2018 12:27:28 +0100 Subject: arm64: KVM: Add support for Stage-2 control of memory types and cacheability Up to ARMv8.3, the combinaison of Stage-1 and Stage-2 attributes results in the strongest attribute of the two stages. This means that the hypervisor has to perform quite a lot of cache maintenance just in case the guest has some non-cacheable mappings around. ARMv8.4 solves this problem by offering a different mode (FWB) where Stage-2 has total control over the memory attribute (this is limited to systems where both I/O and instruction fetches are coherent with the dcache). This is achieved by having a different set of memory attributes in the page tables, and a new bit set in HCR_EL2. On such a system, we can then safely sidestep any form of dcache management. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier --- virt/kvm/arm/mmu.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'virt') diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 1d90d79706bd..ea7314296ad1 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -196,6 +196,10 @@ static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr * This is why right after unmapping a page/section and invalidating * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure * the IO subsystem will never hit in the cache. + * + * This is all avoided on systems that have ARM64_HAS_STAGE2_FWB, as + * we then fully enforce cacheability of RAM, no matter what the guest + * does. */ static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr, phys_addr_t end) -- cgit v1.2.3 From 88dc25e8ea7c968bbf76d033431e2d7e1418bcd7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 25 May 2018 12:23:11 +0100 Subject: KVM: arm/arm64: Consolidate page-table accessors The arm and arm64 KVM page tables accessors are pointlessly different between the two architectures, and likely both wrong one way or another: arm64 lacks a dsb(), and arm doesn't use WRITE_ONCE. Let's unify them. Acked-by: Mark Rutland Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_mmu.h | 12 ------------ arch/arm64/include/asm/kvm_mmu.h | 3 --- virt/kvm/arm/mmu.c | 12 ++++++++++++ 3 files changed, 12 insertions(+), 15 deletions(-) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 8553d68b7c8a..b2feaea1434c 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -75,18 +75,6 @@ phys_addr_t kvm_get_idmap_vector(void); int kvm_mmu_init(void); void kvm_clear_hyp_idmap(void); -static inline void kvm_set_pmd(pmd_t *pmd, pmd_t new_pmd) -{ - *pmd = new_pmd; - dsb(ishst); -} - -static inline void kvm_set_pte(pte_t *pte, pte_t new_pte) -{ - *pte = new_pte; - dsb(ishst); -} - static inline pte_t kvm_s2pte_mkwrite(pte_t pte) { pte_val(pte) |= L_PTE_S2_RDWR; diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index bac9f016736b..ea000fb47ec0 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -169,9 +169,6 @@ phys_addr_t kvm_get_idmap_vector(void); int kvm_mmu_init(void); void kvm_clear_hyp_idmap(void); -#define kvm_set_pte(ptep, pte) set_pte(ptep, pte) -#define kvm_set_pmd(pmdp, pmd) set_pmd(pmdp, pmd) - static inline pte_t kvm_s2pte_mkwrite(pte_t pte) { pte_val(pte) |= PTE_S2_RDWR; diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index ea7314296ad1..a6bdbed1903a 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -177,6 +177,18 @@ static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr put_page(virt_to_page(pmd)); } +static inline void kvm_set_pte(pte_t *ptep, pte_t new_pte) +{ + WRITE_ONCE(*ptep, new_pte); + dsb(ishst); +} + +static inline void kvm_set_pmd(pmd_t *pmdp, pmd_t new_pmd) +{ + WRITE_ONCE(*pmdp, new_pmd); + dsb(ishst); +} + /* * Unmapping vs dcache management: * -- cgit v1.2.3 From 0db9dd8a0fbd5c861737bf2a8a2852e56dbd7ceb Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 27 Jun 2018 15:51:05 +0100 Subject: KVM: arm/arm64: Stop using the kernel's {pmd,pud,pgd}_populate helpers The {pmd,pud,pgd}_populate accessors usage have always been a bit weird in KVM. We don't have a struct mm to pass (and neither does the kernel most of the time, but still...), and the 32bit code has all kind of cache maintenance that doesn't make sense on ARMv7+ when MP extensions are mandatory (which is the case when the VEs are present). Let's bite the bullet and provide our own implementations. The only bit of architectural code left has to do with building the table entry itself (arm64 having up to 52bit PA, arm lacking PUD level). Acked-by: Mark Rutland Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_mmu.h | 4 ++++ arch/arm64/include/asm/kvm_mmu.h | 7 +++++++ virt/kvm/arm/mmu.c | 25 +++++++++++++++++++++---- 3 files changed, 32 insertions(+), 4 deletions(-) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index b2feaea1434c..265ea9cf7df7 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -75,6 +75,10 @@ phys_addr_t kvm_get_idmap_vector(void); int kvm_mmu_init(void); void kvm_clear_hyp_idmap(void); +#define kvm_mk_pmd(ptep) __pmd(__pa(ptep) | PMD_TYPE_TABLE) +#define kvm_mk_pud(pmdp) __pud(__pa(pmdp) | PMD_TYPE_TABLE) +#define kvm_mk_pgd(pudp) ({ BUILD_BUG(); 0; }) + static inline pte_t kvm_s2pte_mkwrite(pte_t pte) { pte_val(pte) |= L_PTE_S2_RDWR; diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index ea000fb47ec0..d6fff7de5539 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -169,6 +169,13 @@ phys_addr_t kvm_get_idmap_vector(void); int kvm_mmu_init(void); void kvm_clear_hyp_idmap(void); +#define kvm_mk_pmd(ptep) \ + __pmd(__phys_to_pmd_val(__pa(ptep)) | PMD_TYPE_TABLE) +#define kvm_mk_pud(pmdp) \ + __pud(__phys_to_pud_val(__pa(pmdp)) | PMD_TYPE_TABLE) +#define kvm_mk_pgd(pudp) \ + __pgd(__phys_to_pgd_val(__pa(pudp)) | PUD_TYPE_TABLE) + static inline pte_t kvm_s2pte_mkwrite(pte_t pte) { pte_val(pte) |= PTE_S2_RDWR; diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index a6bdbed1903a..eade30caaa3c 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -189,6 +189,23 @@ static inline void kvm_set_pmd(pmd_t *pmdp, pmd_t new_pmd) dsb(ishst); } +static inline void kvm_pmd_populate(pmd_t *pmdp, pte_t *ptep) +{ + kvm_set_pmd(pmdp, kvm_mk_pmd(ptep)); +} + +static inline void kvm_pud_populate(pud_t *pudp, pmd_t *pmdp) +{ + WRITE_ONCE(*pudp, kvm_mk_pud(pmdp)); + dsb(ishst); +} + +static inline void kvm_pgd_populate(pgd_t *pgdp, pud_t *pudp) +{ + WRITE_ONCE(*pgdp, kvm_mk_pgd(pudp)); + dsb(ishst); +} + /* * Unmapping vs dcache management: * @@ -617,7 +634,7 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start, kvm_err("Cannot allocate Hyp pte\n"); return -ENOMEM; } - pmd_populate_kernel(NULL, pmd, pte); + kvm_pmd_populate(pmd, pte); get_page(virt_to_page(pmd)); kvm_flush_dcache_to_poc(pmd, sizeof(*pmd)); } @@ -650,7 +667,7 @@ static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start, kvm_err("Cannot allocate Hyp pmd\n"); return -ENOMEM; } - pud_populate(NULL, pud, pmd); + kvm_pud_populate(pud, pmd); get_page(virt_to_page(pud)); kvm_flush_dcache_to_poc(pud, sizeof(*pud)); } @@ -687,7 +704,7 @@ static int __create_hyp_mappings(pgd_t *pgdp, unsigned long ptrs_per_pgd, err = -ENOMEM; goto out; } - pgd_populate(NULL, pgd, pud); + kvm_pgd_populate(pgd, pud); get_page(virt_to_page(pgd)); kvm_flush_dcache_to_poc(pgd, sizeof(*pgd)); } @@ -1106,7 +1123,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, if (!cache) return 0; /* ignore calls from kvm_set_spte_hva */ pte = mmu_memory_cache_alloc(cache); - pmd_populate_kernel(NULL, pmd, pte); + kvm_pmd_populate(pmd, pte); get_page(virt_to_page(pmd)); } -- cgit v1.2.3 From 0a72a5ab9feb8ba6101b00bbcac14cdbae1d1df7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 1 May 2018 12:09:42 +0100 Subject: KVM: arm/arm64: Remove unnecessary CMOs when creating HYP page tables There is no need to perform cache maintenance operations when creating the HYP page tables if we have the multiprocessing extensions. ARMv7 mandates them with the virtualization support, and ARMv8 just mandates them unconditionally. Let's remove these operations. Acked-by: Mark Rutland Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier --- virt/kvm/arm/mmu.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index eade30caaa3c..97d27cd9c654 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -609,7 +609,6 @@ static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start, pte = pte_offset_kernel(pmd, addr); kvm_set_pte(pte, pfn_pte(pfn, prot)); get_page(virt_to_page(pte)); - kvm_flush_dcache_to_poc(pte, sizeof(*pte)); pfn++; } while (addr += PAGE_SIZE, addr != end); } @@ -636,7 +635,6 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start, } kvm_pmd_populate(pmd, pte); get_page(virt_to_page(pmd)); - kvm_flush_dcache_to_poc(pmd, sizeof(*pmd)); } next = pmd_addr_end(addr, end); @@ -669,7 +667,6 @@ static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start, } kvm_pud_populate(pud, pmd); get_page(virt_to_page(pud)); - kvm_flush_dcache_to_poc(pud, sizeof(*pud)); } next = pud_addr_end(addr, end); @@ -706,7 +703,6 @@ static int __create_hyp_mappings(pgd_t *pgdp, unsigned long ptrs_per_pgd, } kvm_pgd_populate(pgd, pud); get_page(virt_to_page(pgd)); - kvm_flush_dcache_to_poc(pgd, sizeof(*pgd)); } next = pgd_addr_end(addr, end); -- cgit v1.2.3 From de73708915adc1b3f05e617a86da6b2d68fae141 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 21 Jun 2018 10:43:59 +0100 Subject: KVM: arm/arm64: Enable adaptative WFE trapping Trapping blocking WFE is extremely beneficial in situations where the system is oversubscribed, as it allows another thread to run while being blocked. In a non-oversubscribed environment, this is the complete opposite, and trapping WFE is just unnecessary overhead. Let's only enable WFE trapping if the CPU has more than a single task to run (that is, more than just the vcpu thread). Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_emulate.h | 10 ++++++++++ arch/arm64/include/asm/kvm_emulate.h | 10 ++++++++++ virt/kvm/arm/arm.c | 6 ++++++ 3 files changed, 26 insertions(+) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 6493bd479ddc..b50fe8380868 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -107,6 +107,16 @@ static inline unsigned long *vcpu_hcr(const struct kvm_vcpu *vcpu) return (unsigned long *)&vcpu->arch.hcr; } +static inline void vcpu_clear_wfe_traps(struct kvm_vcpu *vcpu) +{ + vcpu->arch.hcr &= ~HCR_TWE; +} + +static inline void vcpu_set_wfe_traps(struct kvm_vcpu *vcpu) +{ + vcpu->arch.hcr |= HCR_TWE; +} + static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu) { return 1; diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index dd98fdf33d99..bfefdd9a72eb 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -83,6 +83,16 @@ static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu) return (unsigned long *)&vcpu->arch.hcr_el2; } +static inline void vcpu_clear_wfe_traps(struct kvm_vcpu *vcpu) +{ + vcpu->arch.hcr_el2 &= ~HCR_TWE; +} + +static inline void vcpu_set_wfe_traps(struct kvm_vcpu *vcpu) +{ + vcpu->arch.hcr_el2 |= HCR_TWE; +} + static inline void vcpu_set_vsesr(struct kvm_vcpu *vcpu, u64 vsesr) { vcpu->arch.vsesr_el2 = vsesr; diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 04e554cae3a2..8e66b89a3db2 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -380,6 +381,11 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_timer_vcpu_load(vcpu); kvm_vcpu_load_sysregs(vcpu); kvm_arch_vcpu_load_fp(vcpu); + + if (single_task_running()) + vcpu_clear_wfe_traps(vcpu); + else + vcpu_set_wfe_traps(vcpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) -- cgit v1.2.3 From 1d47191de7e15900f8fbfe7cccd7c6e1c2d7c31a Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 3 Jul 2018 22:54:14 +0200 Subject: KVM: arm/arm64: Fix vgic init race The vgic_init function can race with kvm_arch_vcpu_create() which does not hold kvm_lock() and we therefore have no synchronization primitives to ensure we're doing the right thing. As the user is trying to initialize or run the VM while at the same time creating more VCPUs, we just have to refuse to initialize the VGIC in this case rather than silently failing with a broken VCPU. Reviewed-by: Eric Auger Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic-init.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'virt') diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index 2673efce65f3..b71417913741 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -271,6 +271,10 @@ int vgic_init(struct kvm *kvm) if (vgic_initialized(kvm)) return 0; + /* Are we also in the middle of creating a VCPU? */ + if (kvm->created_vcpus != atomic_read(&kvm->online_vcpus)) + return -EBUSY; + /* freeze the number of spis */ if (!dist->nr_spis) dist->nr_spis = VGIC_NR_IRQS_LEGACY - VGIC_NR_PRIVATE_IRQS; -- cgit v1.2.3 From 2326aceebc516fe5e7c5b8456f22ef05e2961264 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 29 Jun 2018 11:46:18 -0700 Subject: KVM: arm64: vgic-its: Remove VLA usage In the quest to remove all stack VLA usage from the kernel[1], this switches to using a maximum size and adds sanity checks. Additionally cleans up some of the int-vs-u32 usage and adds additional bounds checking. As it currently stands, this will always be 8 bytes until the ABI changes. [1] https://lkml.kernel.org/r/CA+55aFzCG-zNmZwX4A2FQpadafLfEzK6CC=qPXydAacU1RqZWA@mail.gmail.com Cc: Christoffer Dall Cc: Eric Auger Cc: Andre Przywara Cc: linux-arm-kernel@lists.infradead.org Cc: kvmarm@lists.cs.columbia.edu Signed-off-by: Kees Cook [maz: dropped WARN_ONs] Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic-its.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 4ed79c939fb4..1d88010f6b61 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -168,8 +168,14 @@ struct vgic_its_abi { int (*commit)(struct vgic_its *its); }; +#define ABI_0_ESZ 8 +#define ESZ_MAX ABI_0_ESZ + static const struct vgic_its_abi its_table_abi_versions[] = { - [0] = {.cte_esz = 8, .dte_esz = 8, .ite_esz = 8, + [0] = { + .cte_esz = ABI_0_ESZ, + .dte_esz = ABI_0_ESZ, + .ite_esz = ABI_0_ESZ, .save_tables = vgic_its_save_tables_v0, .restore_tables = vgic_its_restore_tables_v0, .commit = vgic_its_commit_v0, @@ -183,7 +189,7 @@ inline const struct vgic_its_abi *vgic_its_get_abi(struct vgic_its *its) return &its_table_abi_versions[its->abi_rev]; } -int vgic_its_set_abi(struct vgic_its *its, int rev) +static int vgic_its_set_abi(struct vgic_its *its, u32 rev) { const struct vgic_its_abi *abi; @@ -1881,14 +1887,14 @@ typedef int (*entry_fn_t)(struct vgic_its *its, u32 id, void *entry, * Return: < 0 on error, 0 if last element was identified, 1 otherwise * (the last element may not be found on second level tables) */ -static int scan_its_table(struct vgic_its *its, gpa_t base, int size, int esz, +static int scan_its_table(struct vgic_its *its, gpa_t base, int size, u32 esz, int start_id, entry_fn_t fn, void *opaque) { struct kvm *kvm = its->dev->kvm; unsigned long len = size; int id = start_id; gpa_t gpa = base; - char entry[esz]; + char entry[ESZ_MAX]; int ret; memset(entry, 0, esz); -- cgit v1.2.3 From e294cb3a6d1a8967a83b5f083d4f463e2dc28b61 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 23 Mar 2018 15:18:26 +0000 Subject: KVM: arm/arm64: vgic-debug: Show LPI status The vgic debugfs file only knows about SGI/PPI/SPI interrupts, and completely ignores LPIs. Let's fix that. Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic-debug.c | 42 ++++++++++++++++++++++++++++++++---------- virt/kvm/arm/vgic/vgic-its.c | 12 ++++++------ virt/kvm/arm/vgic/vgic.h | 1 + 3 files changed, 39 insertions(+), 16 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic/vgic-debug.c b/virt/kvm/arm/vgic/vgic-debug.c index c589d4c2b478..9279e35fefb1 100644 --- a/virt/kvm/arm/vgic/vgic-debug.c +++ b/virt/kvm/arm/vgic/vgic-debug.c @@ -36,9 +36,12 @@ struct vgic_state_iter { int nr_cpus; int nr_spis; + int nr_lpis; int dist_id; int vcpu_id; int intid; + int lpi_idx; + u32 *lpi_array; }; static void iter_next(struct vgic_state_iter *iter) @@ -52,6 +55,12 @@ static void iter_next(struct vgic_state_iter *iter) if (iter->intid == VGIC_NR_PRIVATE_IRQS && ++iter->vcpu_id < iter->nr_cpus) iter->intid = 0; + + if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS)) { + if (iter->lpi_idx < iter->nr_lpis) + iter->intid = iter->lpi_array[iter->lpi_idx]; + iter->lpi_idx++; + } } static void iter_init(struct kvm *kvm, struct vgic_state_iter *iter, @@ -63,6 +72,11 @@ static void iter_init(struct kvm *kvm, struct vgic_state_iter *iter, iter->nr_cpus = nr_cpus; iter->nr_spis = kvm->arch.vgic.nr_spis; + if (kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { + iter->nr_lpis = vgic_copy_lpi_list(kvm, NULL, &iter->lpi_array); + if (iter->nr_lpis < 0) + iter->nr_lpis = 0; + } /* Fast forward to the right position if needed */ while (pos--) @@ -73,7 +87,8 @@ static bool end_of_vgic(struct vgic_state_iter *iter) { return iter->dist_id > 0 && iter->vcpu_id == iter->nr_cpus && - (iter->intid - VGIC_NR_PRIVATE_IRQS) == iter->nr_spis; + iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS) && + iter->lpi_idx > iter->nr_lpis; } static void *vgic_debug_start(struct seq_file *s, loff_t *pos) @@ -130,6 +145,7 @@ static void vgic_debug_stop(struct seq_file *s, void *v) mutex_lock(&kvm->lock); iter = kvm->arch.vgic.iter; + kfree(iter->lpi_array); kfree(iter); kvm->arch.vgic.iter = NULL; mutex_unlock(&kvm->lock); @@ -137,12 +153,14 @@ static void vgic_debug_stop(struct seq_file *s, void *v) static void print_dist_state(struct seq_file *s, struct vgic_dist *dist) { + bool v3 = dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3; + seq_printf(s, "Distributor\n"); seq_printf(s, "===========\n"); - seq_printf(s, "vgic_model:\t%s\n", - (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) ? - "GICv3" : "GICv2"); + seq_printf(s, "vgic_model:\t%s\n", v3 ? "GICv3" : "GICv2"); seq_printf(s, "nr_spis:\t%d\n", dist->nr_spis); + if (v3) + seq_printf(s, "nr_lpis:\t%d\n", dist->lpi_list_count); seq_printf(s, "enabled:\t%d\n", dist->enabled); seq_printf(s, "\n"); @@ -174,8 +192,10 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq, type = "SGI"; else if (irq->intid < VGIC_NR_PRIVATE_IRQS) type = "PPI"; - else + else if (irq->intid < VGIC_MAX_SPI) type = "SPI"; + else + type = "LPI"; if (irq->intid ==0 || irq->intid == VGIC_NR_PRIVATE_IRQS) print_header(s, irq, vcpu); @@ -202,7 +222,6 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq, irq->source, irq->priority, (irq->vcpu) ? irq->vcpu->vcpu_id : -1); - } static int vgic_debug_show(struct seq_file *s, void *v) @@ -221,17 +240,20 @@ static int vgic_debug_show(struct seq_file *s, void *v) if (!kvm->arch.vgic.initialized) return 0; - if (iter->vcpu_id < iter->nr_cpus) { + if (iter->vcpu_id < iter->nr_cpus) vcpu = kvm_get_vcpu(kvm, iter->vcpu_id); - irq = &vcpu->arch.vgic_cpu.private_irqs[iter->intid]; - } else { - irq = &kvm->arch.vgic.spis[iter->intid - VGIC_NR_PRIVATE_IRQS]; + + irq = vgic_get_irq(kvm, vcpu, iter->intid); + if (!irq) { + seq_printf(s, " LPI %4d freed\n", iter->intid); + return 0; } spin_lock_irqsave(&irq->irq_lock, flags); print_irq_state(s, irq, vcpu); spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(kvm, irq); return 0; } diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 1d88010f6b61..cee2c3c5519c 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -318,9 +318,9 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, * enumerate those LPIs without holding any lock. * Returns their number and puts the kmalloc'ed array into intid_ptr. */ -static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr) +int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr) { - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + struct vgic_dist *dist = &kvm->arch.vgic; struct vgic_irq *irq; unsigned long flags; u32 *intids; @@ -343,7 +343,7 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr) if (i == irq_count) break; /* We don't need to "get" the IRQ, as we hold the list lock. */ - if (irq->target_vcpu != vcpu) + if (vcpu && irq->target_vcpu != vcpu) continue; intids[i++] = irq->intid; } @@ -435,7 +435,7 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) unsigned long flags; u8 pendmask; - nr_irqs = vgic_copy_lpi_list(vcpu, &intids); + nr_irqs = vgic_copy_lpi_list(vcpu->kvm, vcpu, &intids); if (nr_irqs < 0) return nr_irqs; @@ -1160,7 +1160,7 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its, vcpu = kvm_get_vcpu(kvm, collection->target_addr); - irq_count = vgic_copy_lpi_list(vcpu, &intids); + irq_count = vgic_copy_lpi_list(kvm, vcpu, &intids); if (irq_count < 0) return irq_count; @@ -1208,7 +1208,7 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its, vcpu1 = kvm_get_vcpu(kvm, target1_addr); vcpu2 = kvm_get_vcpu(kvm, target2_addr); - irq_count = vgic_copy_lpi_list(vcpu1, &intids); + irq_count = vgic_copy_lpi_list(kvm, vcpu1, &intids); if (irq_count < 0) return irq_count; diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index ead00b2072b2..ed9d9d9e2fc5 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -305,6 +305,7 @@ static inline bool vgic_dist_overlap(struct kvm *kvm, gpa_t base, size_t size) (base < d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE); } +int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr); int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its, u32 devid, u32 eventid, struct vgic_irq **irq); struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi); -- cgit v1.2.3 From a2dca217dae29c4ff6420e8c78d56b3f61ae0797 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 16 Jul 2018 15:06:18 +0200 Subject: KVM: arm/arm64: vgic: Define GICD_IIDR fields for GICv2 and GIv3 Instead of hardcoding the shifts and masks in the GICD_IIDR register emulation, let's add the definition of these fields to the GIC header files and use them. This will make things more obvious when we're going to bump the revision in the IIDR when we'll make guest-visible changes to the implementation. Reviewed-by: Andrew Jones Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 10 ++++++++++ include/linux/irqchip/arm-gic.h | 10 ++++++++++ virt/kvm/arm/vgic/vgic-mmio-v2.c | 3 ++- virt/kvm/arm/vgic/vgic-mmio-v3.c | 3 ++- 4 files changed, 24 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index cbb872c1b607..b22f9dfa61af 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -61,6 +61,16 @@ #define GICD_CTLR_ENABLE_G1A (1U << 1) #define GICD_CTLR_ENABLE_G1 (1U << 0) +#define GICD_IIDR_IMPLEMENTER_SHIFT 0 +#define GICD_IIDR_IMPLEMENTER_MASK (0xfff << GICD_IIDR_IMPLEMENTER_SHIFT) +#define GICD_IIDR_REVISION_SHIFT 12 +#define GICD_IIDR_REVISION_MASK (0xf << GICD_IIDR_REVISION_SHIFT) +#define GICD_IIDR_VARIANT_SHIFT 16 +#define GICD_IIDR_VARIANT_MASK (0xf << GICD_IIDR_VARIANT_SHIFT) +#define GICD_IIDR_PRODUCT_ID_SHIFT 24 +#define GICD_IIDR_PRODUCT_ID_MASK (0xff << GICD_IIDR_PRODUCT_ID_SHIFT) + + /* * In systems with a single security state (what we emulate in KVM) * the meaning of the interrupt group enable bits is slightly different diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index 68d8b1f73682..484f5bfa9f3d 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -71,6 +71,16 @@ (GICD_INT_DEF_PRI << 8) |\ GICD_INT_DEF_PRI) +#define GICD_IIDR_IMPLEMENTER_SHIFT 0 +#define GICD_IIDR_IMPLEMENTER_MASK (0xfff << GICD_IIDR_IMPLEMENTER_SHIFT) +#define GICD_IIDR_REVISION_SHIFT 12 +#define GICD_IIDR_REVISION_MASK (0xf << GICD_IIDR_REVISION_SHIFT) +#define GICD_IIDR_VARIANT_SHIFT 16 +#define GICD_IIDR_VARIANT_MASK (0xf << GICD_IIDR_VARIANT_SHIFT) +#define GICD_IIDR_PRODUCT_ID_SHIFT 24 +#define GICD_IIDR_PRODUCT_ID_MASK (0xff << GICD_IIDR_PRODUCT_ID_SHIFT) + + #define GICH_HCR 0x0 #define GICH_VTR 0x4 #define GICH_VMCR 0x8 diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c index ffc587bf4742..af44e569373a 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c @@ -37,7 +37,8 @@ static unsigned long vgic_mmio_read_v2_misc(struct kvm_vcpu *vcpu, value |= (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5; break; case GIC_DIST_IIDR: - value = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0); + value = (PRODUCT_ID_KVM << GICD_IIDR_PRODUCT_ID_SHIFT) | + (IMPLEMENTER_ARM << GICD_IIDR_IMPLEMENTER_SHIFT); break; default: return 0; diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index 287784095b5b..c03f42409b98 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -81,7 +81,8 @@ static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu, } break; case GICD_IIDR: - value = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0); + value = (PRODUCT_ID_KVM << GICD_IIDR_PRODUCT_ID_SHIFT) | + (IMPLEMENTER_ARM << GICD_IIDR_IMPLEMENTER_SHIFT); break; default: return 0; -- cgit v1.2.3 From aa075b0f30b53e397fd4d4162ebf4a3a236b9206 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 16 Jul 2018 15:06:19 +0200 Subject: KVM: arm/arm64: vgic: Keep track of implementation revision As we are about to tweak implementation aspects of the VGIC emulation, while still preserving some level of backwards compatibility support, add a field to keep track of the implementation revision field which is reported to the VM and to userspace. Reviewed-by: Andrew Jones Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- include/kvm/arm_vgic.h | 3 +++ virt/kvm/arm/vgic/vgic-init.c | 1 + virt/kvm/arm/vgic/vgic-mmio-v2.c | 6 ++++-- virt/kvm/arm/vgic/vgic-mmio-v3.c | 6 ++++-- 4 files changed, 12 insertions(+), 4 deletions(-) (limited to 'virt') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index cfdd2484cc42..7e64c463ab4d 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -217,6 +217,9 @@ struct vgic_dist { /* vGIC model the kernel emulates for the guest (GICv2 or GICv3) */ u32 vgic_model; + /* Implementation revision as reported in the GICD_IIDR */ + u32 implementation_rev; + /* Do injected MSIs require an additional device ID? */ bool msis_require_devid; diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index b71417913741..8b6fc45c42fe 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -298,6 +298,7 @@ int vgic_init(struct kvm *kvm) vgic_debug_init(kvm); + dist->implementation_rev = 0; dist->initialized = true; out: diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c index af44e569373a..f0c5351805b6 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c @@ -25,19 +25,21 @@ static unsigned long vgic_mmio_read_v2_misc(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len) { + struct vgic_dist *vgic = &vcpu->kvm->arch.vgic; u32 value; switch (addr & 0x0c) { case GIC_DIST_CTRL: - value = vcpu->kvm->arch.vgic.enabled ? GICD_ENABLE : 0; + value = vgic->enabled ? GICD_ENABLE : 0; break; case GIC_DIST_CTR: - value = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; + value = vgic->nr_spis + VGIC_NR_PRIVATE_IRQS; value = (value >> 5) - 1; value |= (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5; break; case GIC_DIST_IIDR: value = (PRODUCT_ID_KVM << GICD_IIDR_PRODUCT_ID_SHIFT) | + (vgic->implementation_rev << GICD_IIDR_REVISION_SHIFT) | (IMPLEMENTER_ARM << GICD_IIDR_IMPLEMENTER_SHIFT); break; default: diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index c03f42409b98..ebe10a015bfb 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -62,16 +62,17 @@ bool vgic_supports_direct_msis(struct kvm *kvm) static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len) { + struct vgic_dist *vgic = &vcpu->kvm->arch.vgic; u32 value = 0; switch (addr & 0x0c) { case GICD_CTLR: - if (vcpu->kvm->arch.vgic.enabled) + if (vgic->enabled) value |= GICD_CTLR_ENABLE_SS_G1; value |= GICD_CTLR_ARE_NS | GICD_CTLR_DS; break; case GICD_TYPER: - value = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; + value = vgic->nr_spis + VGIC_NR_PRIVATE_IRQS; value = (value >> 5) - 1; if (vgic_has_its(vcpu->kvm)) { value |= (INTERRUPT_ID_BITS_ITS - 1) << 19; @@ -82,6 +83,7 @@ static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu, break; case GICD_IIDR: value = (PRODUCT_ID_KVM << GICD_IIDR_PRODUCT_ID_SHIFT) | + (vgic->implementation_rev << GICD_IIDR_REVISION_SHIFT) | (IMPLEMENTER_ARM << GICD_IIDR_IMPLEMENTER_SHIFT); break; default: -- cgit v1.2.3 From dd6251e463d3d8ea55ac2c5944e24bd6ed8f423b Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 16 Jul 2018 15:06:20 +0200 Subject: KVM: arm/arm64: vgic: GICv2 IGROUPR should read as zero We currently don't support grouping in the emulated VGIC, which is a known defect on KVM (not hurting any currently used guests as far as we're aware). This is currently handled by treating all interrupts as group 0 interrupts for an emulated GICv2 and always signaling interrupts as group 0 to the virtual CPU interface. However, when reading which group interrupts belongs to in the guest from the emulated VGIC, the VGIC currently reports group 1 instead of group 0, which is misleading. Fix this temporarily before introducing full group support by changing the hander to _raz instead of _rao. Fixes: fb848db39661a "KVM: arm/arm64: vgic-new: Add GICv2 MMIO handling framework" Reviewed-by: Andrew Jones Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic-init.c | 2 +- virt/kvm/arm/vgic/vgic-mmio-v2.c | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index 8b6fc45c42fe..230c9221fe70 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -298,7 +298,7 @@ int vgic_init(struct kvm *kvm) vgic_debug_init(kvm); - dist->implementation_rev = 0; + dist->implementation_rev = 1; dist->initialized = true; out: diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c index f0c5351805b6..db646f140e7d 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c @@ -22,6 +22,12 @@ #include "vgic.h" #include "vgic-mmio.h" +/* + * The Revision field in the IIDR have the following meanings: + * + * Revision 1: Report GICv2 interrupts as group 0 instead of group 1 + */ + static unsigned long vgic_mmio_read_v2_misc(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len) { @@ -365,7 +371,7 @@ static const struct vgic_register_region vgic_v2_dist_registers[] = { vgic_mmio_read_v2_misc, vgic_mmio_write_v2_misc, 12, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_IGROUP, - vgic_mmio_read_rao, vgic_mmio_write_wi, NULL, NULL, 1, + vgic_mmio_read_raz, vgic_mmio_write_wi, NULL, NULL, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_SET, vgic_mmio_read_enable, vgic_mmio_write_senable, NULL, NULL, 1, -- cgit v1.2.3 From 8df3c8f33f46adbaa811c0d57fb1d7eb421b88a9 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 16 Jul 2018 15:06:21 +0200 Subject: KVM: arm/arm64: vgic: Add group field to struct irq In preparation for proper group 0 and group 1 support in the vgic, we add a field in the struct irq to store the group of all interrupts. We initialize the group to group 0 when emulating GICv2 and to group 1 when emulating GICv3, just like we treat them today. LPIs are always group 1. We also continue to ignore writes from the guest, preserving existing functionality, for now. Finally, we also add this field to the vgic debug logic to show the group for all interrupts. Reviewed-by: Andrew Jones Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- include/kvm/arm_vgic.h | 1 + virt/kvm/arm/vgic/vgic-debug.c | 8 +++++--- virt/kvm/arm/vgic/vgic-init.c | 19 +++++++++++++++++-- virt/kvm/arm/vgic/vgic-its.c | 1 + 4 files changed, 24 insertions(+), 5 deletions(-) (limited to 'virt') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 7e64c463ab4d..c661d0ee6628 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -133,6 +133,7 @@ struct vgic_irq { u8 source; /* GICv2 SGIs only */ u8 active_source; /* GICv2 SGIs only */ u8 priority; + u8 group; /* 0 == group 0, 1 == group 1 */ enum vgic_irq_config config; /* Level or edge */ /* diff --git a/virt/kvm/arm/vgic/vgic-debug.c b/virt/kvm/arm/vgic/vgic-debug.c index 9279e35fefb1..07aa900bac56 100644 --- a/virt/kvm/arm/vgic/vgic-debug.c +++ b/virt/kvm/arm/vgic/vgic-debug.c @@ -166,6 +166,7 @@ static void print_dist_state(struct seq_file *s, struct vgic_dist *dist) seq_printf(s, "P=pending_latch, L=line_level, A=active\n"); seq_printf(s, "E=enabled, H=hw, C=config (level=1, edge=0)\n"); + seq_printf(s, "G=group\n"); } static void print_header(struct seq_file *s, struct vgic_irq *irq, @@ -180,8 +181,8 @@ static void print_header(struct seq_file *s, struct vgic_irq *irq, } seq_printf(s, "\n"); - seq_printf(s, "%s%2d TYP ID TGT_ID PLAEHC HWID TARGET SRC PRI VCPU_ID\n", hdr, id); - seq_printf(s, "---------------------------------------------------------------\n"); + seq_printf(s, "%s%2d TYP ID TGT_ID PLAEHCG HWID TARGET SRC PRI VCPU_ID\n", hdr, id); + seq_printf(s, "----------------------------------------------------------------\n"); } static void print_irq_state(struct seq_file *s, struct vgic_irq *irq, @@ -202,7 +203,7 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq, seq_printf(s, " %s %4d " " %2d " - "%d%d%d%d%d%d " + "%d%d%d%d%d%d%d " "%8d " "%8x " " %2x " @@ -217,6 +218,7 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq, irq->enabled, irq->hw, irq->config == VGIC_CONFIG_LEVEL, + irq->group, irq->hwintid, irq->mpidr, irq->source, diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index 230c9221fe70..a7c19cda5835 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -175,10 +175,13 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis) irq->vcpu = NULL; irq->target_vcpu = vcpu0; kref_init(&irq->refcount); - if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) + if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) { irq->targets = 0; - else + irq->group = 0; + } else { irq->mpidr = 0; + irq->group = 1; + } } return 0; } @@ -227,6 +230,18 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) /* PPIs */ irq->config = VGIC_CONFIG_LEVEL; } + + /* + * GICv3 can only be created via the KVM_DEVICE_CREATE API and + * so we always know the emulation type at this point as it's + * either explicitly configured as GICv3, or explicitly + * configured as GICv2, or not configured yet which also + * implies GICv2. + */ + if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) + irq->group = 1; + else + irq->group = 0; } if (!irqchip_in_kernel(vcpu->kvm)) diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index cee2c3c5519c..12502251727e 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -71,6 +71,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, kref_init(&irq->refcount); irq->intid = intid; irq->target_vcpu = vcpu; + irq->group = 1; spin_lock_irqsave(&dist->lpi_list_lock, flags); -- cgit v1.2.3 From 87322099052b6a534cecd3d303fc097d4560b7d0 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 16 Jul 2018 15:06:22 +0200 Subject: KVM: arm/arm64: vgic: Signal IRQs using their configured group Now when we have a group configuration on the struct IRQ, use this state when populating the LR and signaling interrupts as either group 0 or group 1 to the VM. Depending on the model of the emulated GIC, and the guest's configuration of the VMCR, interrupts may be signaled as IRQs or FIQs to the VM. Reviewed-by: Andrew Jones Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic.h | 1 + virt/kvm/arm/vgic/vgic-v2.c | 3 +++ virt/kvm/arm/vgic/vgic-v3.c | 6 +----- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'virt') diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index 484f5bfa9f3d..6c4aaf04046c 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -104,6 +104,7 @@ #define GICH_LR_PENDING_BIT (1 << 28) #define GICH_LR_ACTIVE_BIT (1 << 29) #define GICH_LR_EOI (1 << 19) +#define GICH_LR_GROUP1 (1 << 30) #define GICH_LR_HW (1 << 31) #define GICH_VMCR_ENABLE_GRP0_SHIFT 0 diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index a5f2e44f1c33..df5e6a6e3186 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -159,6 +159,9 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) } } + if (irq->group) + val |= GICH_LR_GROUP1; + if (irq->hw) { val |= GICH_LR_HW; val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT; diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index cdce653e3c47..530b8491c892 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -197,11 +197,7 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT)) irq->line_level = false; - /* - * We currently only support Group1 interrupts, which is a - * known defect. This needs to be addressed at some point. - */ - if (model == KVM_DEV_TYPE_ARM_VGIC_V3) + if (irq->group) val |= ICH_LR_GROUP; val |= (u64)irq->priority << ICH_LR_PRIORITY_SHIFT; -- cgit v1.2.3 From c6e0917b67fc006f3785b79bfdacdf9eef2f4816 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 16 Jul 2018 15:06:23 +0200 Subject: KVM: arm/arm64: vgic: Permit uaccess writes to return errors Currently we do not allow any vgic mmio write operations to fail, which makes sense from mmio traps from the guest. However, we should be able to report failures to userspace, if userspace writes incompatible values to read-only registers. Rework the internal interface to allow errors to be returned on the write side for userspace writes. Reviewed-by: Andrew Jones Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic-mmio-v3.c | 12 +++++++----- virt/kvm/arm/vgic/vgic-mmio.c | 18 +++++++++++++----- virt/kvm/arm/vgic/vgic-mmio.h | 19 +++++++++++-------- 3 files changed, 31 insertions(+), 18 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index ebe10a015bfb..ef57a1aa0b14 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -249,9 +249,9 @@ static unsigned long vgic_v3_uaccess_read_pending(struct kvm_vcpu *vcpu, return value; } -static void vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len, - unsigned long val) +static int vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) { u32 intid = VGIC_ADDR_TO_INTID(addr, 1); int i; @@ -276,6 +276,8 @@ static void vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu, vgic_put_irq(vcpu->kvm, irq); } + + return 0; } /* We want to avoid outer shareable. */ @@ -468,7 +470,7 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = { VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICPENDR, vgic_mmio_read_pending, vgic_mmio_write_cpending, - vgic_mmio_read_raz, vgic_mmio_write_wi, 1, + vgic_mmio_read_raz, vgic_mmio_uaccess_write_wi, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISACTIVER, vgic_mmio_read_active, vgic_mmio_write_sactive, @@ -541,7 +543,7 @@ static const struct vgic_register_region vgic_v3_sgibase_registers[] = { VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ICPENDR0, vgic_mmio_read_pending, vgic_mmio_write_cpending, - vgic_mmio_read_raz, vgic_mmio_write_wi, 4, + vgic_mmio_read_raz, vgic_mmio_uaccess_write_wi, 4, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ISACTIVER0, vgic_mmio_read_active, vgic_mmio_write_sactive, diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index ff9655cfeb2f..e1e79989d473 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -40,6 +40,13 @@ void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr, /* Ignore */ } +int vgic_mmio_uaccess_write_wi(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len, unsigned long val) +{ + /* Ignore */ + return 0; +} + /* * Read accesses to both GICD_ICENABLER and GICD_ISENABLER return the value * of the enabled bit, so there is only one function for both here. @@ -363,11 +370,12 @@ void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, mutex_unlock(&vcpu->kvm->lock); } -void vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu, +int vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val) { __vgic_mmio_write_cactive(vcpu, addr, len, val); + return 0; } static void __vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, @@ -399,11 +407,12 @@ void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, mutex_unlock(&vcpu->kvm->lock); } -void vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu, +int vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val) { __vgic_mmio_write_sactive(vcpu, addr, len, val); + return 0; } unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu, @@ -735,10 +744,9 @@ static int vgic_uaccess_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu; if (region->uaccess_write) - region->uaccess_write(r_vcpu, addr, sizeof(u32), *val); - else - region->write(r_vcpu, addr, sizeof(u32), *val); + return region->uaccess_write(r_vcpu, addr, sizeof(u32), *val); + region->write(r_vcpu, addr, sizeof(u32), *val); return 0; } diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h index 5693f6df45ec..9c3d6d358014 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.h +++ b/virt/kvm/arm/vgic/vgic-mmio.h @@ -37,8 +37,8 @@ struct vgic_register_region { unsigned long (*uaccess_read)(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len); union { - void (*uaccess_write)(struct kvm_vcpu *vcpu, gpa_t addr, - unsigned int len, unsigned long val); + int (*uaccess_write)(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len, unsigned long val); int (*uaccess_its_write)(struct kvm *kvm, struct vgic_its *its, gpa_t addr, unsigned int len, unsigned long val); @@ -134,6 +134,9 @@ unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu, void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val); +int vgic_mmio_uaccess_write_wi(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len, unsigned long val); + unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len); @@ -167,13 +170,13 @@ void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val); -void vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len, - unsigned long val); +int vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); -void vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len, - unsigned long val); +int vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len); -- cgit v1.2.3 From b489edc36169938e955c748eb83eb707bd53436e Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 16 Jul 2018 15:06:24 +0200 Subject: KVM: arm/arm64: vgic: Return error on incompatible uaccess GICD_IIDR writes If userspace attempts to write a GICD_IIDR that does not match the kernel version, return an error to userspace. The intention is to allow implementation changes inside KVM while avoiding silently breaking migration resulting in guests not running without any clear indication of what went wrong. Reviewed-by: Andrew Jones Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic-mmio-v2.c | 21 ++++++++++++++++++--- virt/kvm/arm/vgic/vgic-mmio-v3.c | 21 ++++++++++++++++++--- 2 files changed, 36 insertions(+), 6 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c index db646f140e7d..4f0f2c4165ec 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c @@ -75,6 +75,20 @@ static void vgic_mmio_write_v2_misc(struct kvm_vcpu *vcpu, } } +static int vgic_mmio_uaccess_write_v2_misc(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + switch (addr & 0x0c) { + case GIC_DIST_IIDR: + if (val != vgic_mmio_read_v2_misc(vcpu, addr, len)) + return -EINVAL; + } + + vgic_mmio_write_v2_misc(vcpu, addr, len, val); + return 0; +} + static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu, gpa_t addr, unsigned int len, unsigned long val) @@ -367,9 +381,10 @@ static void vgic_mmio_write_apr(struct kvm_vcpu *vcpu, } static const struct vgic_register_region vgic_v2_dist_registers[] = { - REGISTER_DESC_WITH_LENGTH(GIC_DIST_CTRL, - vgic_mmio_read_v2_misc, vgic_mmio_write_v2_misc, 12, - VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH_UACCESS(GIC_DIST_CTRL, + vgic_mmio_read_v2_misc, vgic_mmio_write_v2_misc, + NULL, vgic_mmio_uaccess_write_v2_misc, + 12, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_IGROUP, vgic_mmio_read_raz, vgic_mmio_write_wi, NULL, NULL, 1, VGIC_ACCESS_32bit), diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index ef57a1aa0b14..abdb0ec77b38 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -113,6 +113,20 @@ static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu, } } +static int vgic_mmio_uaccess_write_v3_misc(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + switch (addr & 0x0c) { + case GICD_IIDR: + if (val != vgic_mmio_read_v3_misc(vcpu, addr, len)) + return -EINVAL; + } + + vgic_mmio_write_v3_misc(vcpu, addr, len, val); + return 0; +} + static unsigned long vgic_mmio_read_irouter(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len) { @@ -449,9 +463,10 @@ static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu, } static const struct vgic_register_region vgic_v3_dist_registers[] = { - REGISTER_DESC_WITH_LENGTH(GICD_CTLR, - vgic_mmio_read_v3_misc, vgic_mmio_write_v3_misc, 16, - VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH_UACCESS(GICD_CTLR, + vgic_mmio_read_v3_misc, vgic_mmio_write_v3_misc, + NULL, vgic_mmio_uaccess_write_v3_misc, + 16, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH(GICD_STATUSR, vgic_mmio_read_rao, vgic_mmio_write_wi, 4, VGIC_ACCESS_32bit), -- cgit v1.2.3 From d53c2c29ae0dfac1e062939afeaeaa1a45cc47a3 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 16 Jul 2018 15:06:25 +0200 Subject: KVM: arm/arm64: vgic: Allow configuration of interrupt groups Implement the required MMIO accessors for GICv2 and GICv3 for the IGROUPR distributor and redistributor registers. This can allow guests to change behavior compared to running on previous versions of KVM, but only to align with the architecture and hardware implementations. This also allows userspace to configure the interrupts groups for GICv3. We don't allow userspace to write the groups on GICv2 just yet, because that would result in GICv2 guests not receiving interrupts after migrating from an older kernel that exposes GICv2 interrupts as group 1. Reviewed-by: Andrew Jones Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic-init.c | 2 +- virt/kvm/arm/vgic/vgic-mmio-v2.c | 13 ++++++++++++- virt/kvm/arm/vgic/vgic-mmio-v3.c | 11 +++++++++-- virt/kvm/arm/vgic/vgic-mmio.c | 38 ++++++++++++++++++++++++++++++++++++++ virt/kvm/arm/vgic/vgic-mmio.h | 6 ++++++ 5 files changed, 66 insertions(+), 4 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index a7c19cda5835..c0c0b88af1d5 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -313,7 +313,7 @@ int vgic_init(struct kvm *kvm) vgic_debug_init(kvm); - dist->implementation_rev = 1; + dist->implementation_rev = 2; dist->initialized = true; out: diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c index 4f0f2c4165ec..ee164f831401 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c @@ -26,6 +26,8 @@ * The Revision field in the IIDR have the following meanings: * * Revision 1: Report GICv2 interrupts as group 0 instead of group 1 + * Revision 2: Interrupt groups are guest-configurable and signaled using + * their configured groups. */ static unsigned long vgic_mmio_read_v2_misc(struct kvm_vcpu *vcpu, @@ -89,6 +91,14 @@ static int vgic_mmio_uaccess_write_v2_misc(struct kvm_vcpu *vcpu, return 0; } +static int vgic_mmio_uaccess_write_v2_group(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + /* Ignore writes from userspace */ + return 0; +} + static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu, gpa_t addr, unsigned int len, unsigned long val) @@ -386,7 +396,8 @@ static const struct vgic_register_region vgic_v2_dist_registers[] = { NULL, vgic_mmio_uaccess_write_v2_misc, 12, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_IGROUP, - vgic_mmio_read_raz, vgic_mmio_write_wi, NULL, NULL, 1, + vgic_mmio_read_group, vgic_mmio_write_group, + NULL, vgic_mmio_uaccess_write_v2_group, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_SET, vgic_mmio_read_enable, vgic_mmio_write_senable, NULL, NULL, 1, diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index abdb0ec77b38..88e78b582139 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -59,6 +59,13 @@ bool vgic_supports_direct_msis(struct kvm *kvm) return kvm_vgic_global_state.has_gicv4 && vgic_has_its(kvm); } +/* + * The Revision field in the IIDR have the following meanings: + * + * Revision 2: Interrupt groups are guest-configurable and signaled using + * their configured groups. + */ + static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len) { @@ -471,7 +478,7 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = { vgic_mmio_read_rao, vgic_mmio_write_wi, 4, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGROUPR, - vgic_mmio_read_rao, vgic_mmio_write_wi, NULL, NULL, 1, + vgic_mmio_read_group, vgic_mmio_write_group, NULL, NULL, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISENABLER, vgic_mmio_read_enable, vgic_mmio_write_senable, NULL, NULL, 1, @@ -544,7 +551,7 @@ static const struct vgic_register_region vgic_v3_rdbase_registers[] = { static const struct vgic_register_region vgic_v3_sgibase_registers[] = { REGISTER_DESC_WITH_LENGTH(GICR_IGROUPR0, - vgic_mmio_read_rao, vgic_mmio_write_wi, 4, + vgic_mmio_read_group, vgic_mmio_write_group, 4, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH(GICR_ISENABLER0, vgic_mmio_read_enable, vgic_mmio_write_senable, 4, diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index e1e79989d473..f56ff1cf52ec 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -47,6 +47,44 @@ int vgic_mmio_uaccess_write_wi(struct kvm_vcpu *vcpu, gpa_t addr, return 0; } +unsigned long vgic_mmio_read_group(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + u32 value = 0; + int i; + + /* Loop over all IRQs affected by this read */ + for (i = 0; i < len * 8; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + if (irq->group) + value |= BIT(i); + + vgic_put_irq(vcpu->kvm, irq); + } + + return value; +} + +void vgic_mmio_write_group(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len, unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + unsigned long flags; + + for (i = 0; i < len * 8; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + spin_lock_irqsave(&irq->irq_lock, flags); + irq->group = !!(val & BIT(i)); + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + + vgic_put_irq(vcpu->kvm, irq); + } +} + /* * Read accesses to both GICD_ICENABLER and GICD_ISENABLER return the value * of the enabled bit, so there is only one function for both here. diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h index 9c3d6d358014..a07f90acdaec 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.h +++ b/virt/kvm/arm/vgic/vgic-mmio.h @@ -137,6 +137,12 @@ void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr, int vgic_mmio_uaccess_write_wi(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val); +unsigned long vgic_mmio_read_group(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len); + +void vgic_mmio_write_group(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len, unsigned long val); + unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len); -- cgit v1.2.3 From 32f8777ed92d73e504840a955a9dc92617826b69 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 16 Jul 2018 15:06:26 +0200 Subject: KVM: arm/arm64: vgic: Let userspace opt-in to writable v2 IGROUPR Simply letting IGROUPR be writable from userspace would break migration from old kernels to newer kernels, because old kernels incorrectly report interrupt groups as group 1. This would not be a big problem if userspace wrote GICD_IIDR as read from the kernel, because we could detect the incompatibility and return an error to userspace. Unfortunately, this is not the case with current userspace implementations and simply letting IGROUPR be writable from userspace for an emulated GICv2 silently breaks migration and causes the destination VM to no longer run after migration. We now encourage userspace to write the read and expected value of GICD_IIDR as the first part of a GIC register restore, and if we observe a write to GICD_IIDR we know that userspace has been updated and has had a chance to cope with older kernels (VGICv2 IIDR.Revision == 0) incorrectly reporting interrupts as group 1, and therefore we now allow groups to be user writable. Reviewed-by: Andrew Jones Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- include/kvm/arm_vgic.h | 3 +++ virt/kvm/arm/vgic/vgic-mmio-v2.c | 16 +++++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) (limited to 'virt') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index c661d0ee6628..c134790be32c 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -221,6 +221,9 @@ struct vgic_dist { /* Implementation revision as reported in the GICD_IIDR */ u32 implementation_rev; + /* Userspace can write to GICv2 IGROUPR */ + bool v2_groups_user_writable; + /* Do injected MSIs require an additional device ID? */ bool msis_require_devid; diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c index ee164f831401..26654f4140ed 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c @@ -85,6 +85,18 @@ static int vgic_mmio_uaccess_write_v2_misc(struct kvm_vcpu *vcpu, case GIC_DIST_IIDR: if (val != vgic_mmio_read_v2_misc(vcpu, addr, len)) return -EINVAL; + + /* + * If we observe a write to GICD_IIDR we know that userspace + * has been updated and has had a chance to cope with older + * kernels (VGICv2 IIDR.Revision == 0) incorrectly reporting + * interrupts as group 1, and therefore we now allow groups to + * be user writable. Doing this by default would break + * migration from old kernels to new kernels with legacy + * userspace. + */ + vcpu->kvm->arch.vgic.v2_groups_user_writable = true; + return 0; } vgic_mmio_write_v2_misc(vcpu, addr, len, val); @@ -95,7 +107,9 @@ static int vgic_mmio_uaccess_write_v2_group(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val) { - /* Ignore writes from userspace */ + if (vcpu->kvm->arch.vgic.v2_groups_user_writable) + vgic_mmio_write_group(vcpu, addr, len, val); + return 0; } -- cgit v1.2.3 From b7b27facc7b50a5fce0afaa3df56157136ce181a Mon Sep 17 00:00:00 2001 From: Dongjiu Geng Date: Thu, 19 Jul 2018 16:24:22 +0100 Subject: arm/arm64: KVM: Add KVM_GET/SET_VCPU_EVENTS For the migrating VMs, user space may need to know the exception state. For example, in the machine A, KVM make an SError pending, when migrate to B, KVM also needs to pend an SError. This new IOCTL exports user-invisible states related to SError. Together with appropriate user space changes, user space can get/set the SError exception state to do migrate/snapshot/suspend. Signed-off-by: Dongjiu Geng Reviewed-by: James Morse [expanded documentation wording] Signed-off-by: James Morse Signed-off-by: Marc Zyngier --- Documentation/virtual/kvm/api.txt | 54 +++++++++++++++++++++++++++++++++--- arch/arm64/include/asm/kvm_emulate.h | 5 ++++ arch/arm64/include/asm/kvm_host.h | 7 +++++ arch/arm64/include/uapi/asm/kvm.h | 13 +++++++++ arch/arm64/kvm/guest.c | 46 ++++++++++++++++++++++++++++++ arch/arm64/kvm/inject_fault.c | 6 ++-- arch/arm64/kvm/reset.c | 1 + virt/kvm/arm/arm.c | 21 ++++++++++++++ 8 files changed, 146 insertions(+), 7 deletions(-) (limited to 'virt') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index d10944e619d3..284d36e72f28 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -835,11 +835,13 @@ struct kvm_clock_data { Capability: KVM_CAP_VCPU_EVENTS Extended by: KVM_CAP_INTR_SHADOW -Architectures: x86 -Type: vm ioctl +Architectures: x86, arm64 +Type: vcpu ioctl Parameters: struct kvm_vcpu_event (out) Returns: 0 on success, -1 on error +X86: + Gets currently pending exceptions, interrupts, and NMIs as well as related states of the vcpu. @@ -881,15 +883,52 @@ Only two fields are defined in the flags field: - KVM_VCPUEVENT_VALID_SMM may be set in the flags field to signal that smi contains a valid state. +ARM64: + +If the guest accesses a device that is being emulated by the host kernel in +such a way that a real device would generate a physical SError, KVM may make +a virtual SError pending for that VCPU. This system error interrupt remains +pending until the guest takes the exception by unmasking PSTATE.A. + +Running the VCPU may cause it to take a pending SError, or make an access that +causes an SError to become pending. The event's description is only valid while +the VPCU is not running. + +This API provides a way to read and write the pending 'event' state that is not +visible to the guest. To save, restore or migrate a VCPU the struct representing +the state can be read then written using this GET/SET API, along with the other +guest-visible registers. It is not possible to 'cancel' an SError that has been +made pending. + +A device being emulated in user-space may also wish to generate an SError. To do +this the events structure can be populated by user-space. The current state +should be read first, to ensure no existing SError is pending. If an existing +SError is pending, the architecture's 'Multiple SError interrupts' rules should +be followed. (2.5.3 of DDI0587.a "ARM Reliability, Availability, and +Serviceability (RAS) Specification"). + +struct kvm_vcpu_events { + struct { + __u8 serror_pending; + __u8 serror_has_esr; + /* Align it to 8 bytes */ + __u8 pad[6]; + __u64 serror_esr; + } exception; + __u32 reserved[12]; +}; + 4.32 KVM_SET_VCPU_EVENTS Capability: KVM_CAP_VCPU_EVENTS Extended by: KVM_CAP_INTR_SHADOW -Architectures: x86 -Type: vm ioctl +Architectures: x86, arm64 +Type: vcpu ioctl Parameters: struct kvm_vcpu_event (in) Returns: 0 on success, -1 on error +X86: + Set pending exceptions, interrupts, and NMIs as well as related states of the vcpu. @@ -910,6 +949,13 @@ shall be written into the VCPU. KVM_VCPUEVENT_VALID_SMM can only be set if KVM_CAP_X86_SMM is available. +ARM64: + +Set the pending SError exception state for this VCPU. It is not possible to +'cancel' an Serror that has been made pending. + +See KVM_GET_VCPU_EVENTS for the data structure. + 4.33 KVM_GET_DEBUGREGS diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index bfefdd9a72eb..3f8ab8da9a2b 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -93,6 +93,11 @@ static inline void vcpu_set_wfe_traps(struct kvm_vcpu *vcpu) vcpu->arch.hcr_el2 |= HCR_TWE; } +static inline unsigned long vcpu_get_vsesr(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.vsesr_el2; +} + static inline void vcpu_set_vsesr(struct kvm_vcpu *vcpu, u64 vsesr) { vcpu->arch.vsesr_el2 = vsesr; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 268619ce0154..bc244cc6e451 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -350,6 +350,11 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); +int kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events); + +int kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events); #define KVM_ARCH_WANT_MMU_NOTIFIER int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); @@ -378,6 +383,8 @@ void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run, int kvm_perf_init(void); int kvm_perf_teardown(void); +void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome); + struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state); diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 4e76630dd655..97c3478ee6e7 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -39,6 +39,7 @@ #define __KVM_HAVE_GUEST_DEBUG #define __KVM_HAVE_IRQ_LINE #define __KVM_HAVE_READONLY_MEM +#define __KVM_HAVE_VCPU_EVENTS #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 @@ -154,6 +155,18 @@ struct kvm_sync_regs { struct kvm_arch_memory_slot { }; +/* for KVM_GET/SET_VCPU_EVENTS */ +struct kvm_vcpu_events { + struct { + __u8 serror_pending; + __u8 serror_has_esr; + /* Align it to 8 bytes */ + __u8 pad[6]; + __u64 serror_esr; + } exception; + __u32 reserved[12]; +}; + /* If you need to interpret the index values, here is the key: */ #define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000 #define KVM_REG_ARM_COPROC_SHIFT 16 diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 56a0260ceb11..dd05be96d981 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -289,6 +289,52 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, return -EINVAL; } +int kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events) +{ + memset(events, 0, sizeof(*events)); + + events->exception.serror_pending = !!(vcpu->arch.hcr_el2 & HCR_VSE); + events->exception.serror_has_esr = cpus_have_const_cap(ARM64_HAS_RAS_EXTN); + + if (events->exception.serror_pending && events->exception.serror_has_esr) + events->exception.serror_esr = vcpu_get_vsesr(vcpu); + + return 0; +} + +int kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events) +{ + int i; + bool serror_pending = events->exception.serror_pending; + bool has_esr = events->exception.serror_has_esr; + + /* check whether the reserved field is zero */ + for (i = 0; i < ARRAY_SIZE(events->reserved); i++) + if (events->reserved[i]) + return -EINVAL; + + /* check whether the pad field is zero */ + for (i = 0; i < ARRAY_SIZE(events->exception.pad); i++) + if (events->exception.pad[i]) + return -EINVAL; + + if (serror_pending && has_esr) { + if (!cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) + return -EINVAL; + + if (!((events->exception.serror_esr) & ~ESR_ELx_ISS_MASK)) + kvm_set_sei_esr(vcpu, events->exception.serror_esr); + else + return -EINVAL; + } else if (serror_pending) { + kvm_inject_vabt(vcpu); + } + + return 0; +} + int __attribute_const__ kvm_target_cpu(void) { unsigned long implementor = read_cpuid_implementor(); diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index d8e71659ba7e..a55e91dfcf8f 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -164,9 +164,9 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu) inject_undef64(vcpu); } -static void pend_guest_serror(struct kvm_vcpu *vcpu, u64 esr) +void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 esr) { - vcpu_set_vsesr(vcpu, esr); + vcpu_set_vsesr(vcpu, esr & ESR_ELx_ISS_MASK); *vcpu_hcr(vcpu) |= HCR_VSE; } @@ -184,5 +184,5 @@ static void pend_guest_serror(struct kvm_vcpu *vcpu, u64 esr) */ void kvm_inject_vabt(struct kvm_vcpu *vcpu) { - pend_guest_serror(vcpu, ESR_ELx_ISV); + kvm_set_sei_esr(vcpu, ESR_ELx_ISV); } diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index a74311beda35..a3db01a28062 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -79,6 +79,7 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext) break; case KVM_CAP_SET_GUEST_DEBUG: case KVM_CAP_VCPU_ATTRIBUTES: + case KVM_CAP_VCPU_EVENTS: r = 1; break; default: diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 8e66b89a3db2..1c72247aeb1d 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1130,6 +1130,27 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_arm_vcpu_has_attr(vcpu, &attr); break; } +#ifdef __KVM_HAVE_VCPU_EVENTS + case KVM_GET_VCPU_EVENTS: { + struct kvm_vcpu_events events; + + if (kvm_arm_vcpu_get_events(vcpu, &events)) + return -EINVAL; + + if (copy_to_user(argp, &events, sizeof(events))) + return -EFAULT; + + return 0; + } + case KVM_SET_VCPU_EVENTS: { + struct kvm_vcpu_events events; + + if (copy_from_user(&events, argp, sizeof(events))) + return -EFAULT; + + return kvm_arm_vcpu_set_events(vcpu, &events); + } +#endif default: r = -EINVAL; } -- cgit v1.2.3 From 539aee0edb9fdc8f465e3843c261acc88c47d8ee Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 19 Jul 2018 16:24:24 +0100 Subject: KVM: arm64: Share the parts of get/set events useful to 32bit The get/set events helpers to do some work to check reserved and padding fields are zero. This is useful on 32bit too. Move this code into virt/kvm/arm/arm.c, and give the arch code some underscores. This is temporarily hidden behind __KVM_HAVE_VCPU_EVENTS until 32bit is wired up. Signed-off-by: James Morse Reviewed-by: Dongjiu Geng Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 8 ++++---- arch/arm64/kvm/guest.c | 21 ++++----------------- virt/kvm/arm/arm.c | 28 ++++++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 21 deletions(-) (limited to 'virt') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index bc244cc6e451..f26055f2306e 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -350,11 +350,11 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); -int kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, - struct kvm_vcpu_events *events); +int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events); -int kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, - struct kvm_vcpu_events *events); +int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events); #define KVM_ARCH_WANT_MMU_NOTIFIER int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index dd05be96d981..725c7545e91a 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -289,11 +289,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, return -EINVAL; } -int kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, - struct kvm_vcpu_events *events) +int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events) { - memset(events, 0, sizeof(*events)); - events->exception.serror_pending = !!(vcpu->arch.hcr_el2 & HCR_VSE); events->exception.serror_has_esr = cpus_have_const_cap(ARM64_HAS_RAS_EXTN); @@ -303,23 +301,12 @@ int kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, return 0; } -int kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, - struct kvm_vcpu_events *events) +int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events) { - int i; bool serror_pending = events->exception.serror_pending; bool has_esr = events->exception.serror_has_esr; - /* check whether the reserved field is zero */ - for (i = 0; i < ARRAY_SIZE(events->reserved); i++) - if (events->reserved[i]) - return -EINVAL; - - /* check whether the pad field is zero */ - for (i = 0; i < ARRAY_SIZE(events->exception.pad); i++) - if (events->exception.pad[i]) - return -EINVAL; - if (serror_pending && has_esr) { if (!cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) return -EINVAL; diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 1c72247aeb1d..14f8fad1c7ae 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1050,6 +1050,34 @@ static int kvm_arm_vcpu_has_attr(struct kvm_vcpu *vcpu, return ret; } +#ifdef __KVM_HAVE_VCPU_EVENTS /* temporary: until 32bit is wired up */ +static int kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events) +{ + memset(events, 0, sizeof(*events)); + + return __kvm_arm_vcpu_get_events(vcpu, events); +} + +static int kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events) +{ + int i; + + /* check whether the reserved field is zero */ + for (i = 0; i < ARRAY_SIZE(events->reserved); i++) + if (events->reserved[i]) + return -EINVAL; + + /* check whether the pad field is zero */ + for (i = 0; i < ARRAY_SIZE(events->exception.pad); i++) + if (events->exception.pad[i]) + return -EINVAL; + + return __kvm_arm_vcpu_set_events(vcpu, events); +} +#endif /* __KVM_HAVE_VCPU_EVENTS */ + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { -- cgit v1.2.3 From b0960b9569db6b2fe8a75967d47b9dcdeb44016b Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 19 Jul 2018 16:24:25 +0100 Subject: KVM: arm: Add 32bit get/set events support arm64's new use of KVMs get_events/set_events API calls isn't just or RAS, it allows an SError that has been made pending by KVM as part of its device emulation to be migrated. Wire this up for 32bit too. We only need to read/write the HCR_VA bit, and check that no esr has been provided, as we don't yet support VDFSR. Signed-off-by: James Morse Reviewed-by: Dongjiu Geng Signed-off-by: Marc Zyngier --- Documentation/virtual/kvm/api.txt | 8 ++++---- arch/arm/include/asm/kvm_host.h | 5 +++++ arch/arm/include/uapi/asm/kvm.h | 13 +++++++++++++ arch/arm/kvm/guest.c | 23 +++++++++++++++++++++++ virt/kvm/arm/arm.c | 4 ---- 5 files changed, 45 insertions(+), 8 deletions(-) (limited to 'virt') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index dbbb95d5798a..4d93fd65bbbd 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -835,7 +835,7 @@ struct kvm_clock_data { Capability: KVM_CAP_VCPU_EVENTS Extended by: KVM_CAP_INTR_SHADOW -Architectures: x86, arm64 +Architectures: x86, arm, arm64 Type: vcpu ioctl Parameters: struct kvm_vcpu_event (out) Returns: 0 on success, -1 on error @@ -883,7 +883,7 @@ Only two fields are defined in the flags field: - KVM_VCPUEVENT_VALID_SMM may be set in the flags field to signal that smi contains a valid state. -ARM64: +ARM/ARM64: If the guest accesses a device that is being emulated by the host kernel in such a way that a real device would generate a physical SError, KVM may make @@ -934,7 +934,7 @@ struct kvm_vcpu_events { Capability: KVM_CAP_VCPU_EVENTS Extended by: KVM_CAP_INTR_SHADOW -Architectures: x86, arm64 +Architectures: x86, arm, arm64 Type: vcpu ioctl Parameters: struct kvm_vcpu_event (in) Returns: 0 on success, -1 on error @@ -961,7 +961,7 @@ shall be written into the VCPU. KVM_VCPUEVENT_VALID_SMM can only be set if KVM_CAP_X86_SMM is available. -ARM64: +ARM/ARM64: Set the pending SError exception state for this VCPU. It is not possible to 'cancel' an Serror that has been made pending. diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 1f1fe4109b02..79906cecb091 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -216,6 +216,11 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); unsigned long kvm_call_hyp(void *hypfn, ...); void force_vm_exit(const cpumask_t *mask); +int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events); + +int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events); #define KVM_ARCH_WANT_MMU_NOTIFIER int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 16e006f708ca..4602464ebdfb 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -27,6 +27,7 @@ #define __KVM_HAVE_GUEST_DEBUG #define __KVM_HAVE_IRQ_LINE #define __KVM_HAVE_READONLY_MEM +#define __KVM_HAVE_VCPU_EVENTS #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 @@ -125,6 +126,18 @@ struct kvm_sync_regs { struct kvm_arch_memory_slot { }; +/* for KVM_GET/SET_VCPU_EVENTS */ +struct kvm_vcpu_events { + struct { + __u8 serror_pending; + __u8 serror_has_esr; + /* Align it to 8 bytes */ + __u8 pad[6]; + __u64 serror_esr; + } exception; + __u32 reserved[12]; +}; + /* If you need to interpret the index values, here is the key: */ #define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000 #define KVM_REG_ARM_COPROC_SHIFT 16 diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index a18f33edc471..2b8de885b2bf 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -261,6 +261,29 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, return -EINVAL; } + +int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events) +{ + events->exception.serror_pending = !!(*vcpu_hcr(vcpu) & HCR_VA); + + return 0; +} + +int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events) +{ + bool serror_pending = events->exception.serror_pending; + bool has_esr = events->exception.serror_has_esr; + + if (serror_pending && has_esr) + return -EINVAL; + else if (serror_pending) + kvm_inject_vabt(vcpu); + + return 0; +} + int __attribute_const__ kvm_target_cpu(void) { switch (read_cpuid_part()) { diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 14f8fad1c7ae..ac658bd63196 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1050,7 +1050,6 @@ static int kvm_arm_vcpu_has_attr(struct kvm_vcpu *vcpu, return ret; } -#ifdef __KVM_HAVE_VCPU_EVENTS /* temporary: until 32bit is wired up */ static int kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events) { @@ -1076,7 +1075,6 @@ static int kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, return __kvm_arm_vcpu_set_events(vcpu, events); } -#endif /* __KVM_HAVE_VCPU_EVENTS */ long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) @@ -1158,7 +1156,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_arm_vcpu_has_attr(vcpu, &attr); break; } -#ifdef __KVM_HAVE_VCPU_EVENTS case KVM_GET_VCPU_EVENTS: { struct kvm_vcpu_events events; @@ -1178,7 +1175,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp, return kvm_arm_vcpu_set_events(vcpu, &events); } -#endif default: r = -EINVAL; } -- cgit v1.2.3 From 6b8b9a48545e08345b8ff77c9fd51b1aebdbefb3 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 10 Jul 2018 19:01:23 +0100 Subject: KVM: arm/arm64: vgic: Fix possible spectre-v1 write in vgic_mmio_write_apr() It's possible for userspace to control n. Sanitize n when using it as an array index, to inhibit the potential spectre-v1 write gadget. Note that while it appears that n must be bound to the interval [0,3] due to the way it is extracted from addr, we cannot guarantee that compiler transformations (and/or future refactoring) will ensure this is the case, and given this is a slow path it's better to always perform the masking. Found by smatch. Signed-off-by: Mark Rutland Cc: Christoffer Dall Cc: Marc Zyngier Cc: kvmarm@lists.cs.columbia.edu Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic-mmio-v2.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'virt') diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c index 26654f4140ed..738b65d2d0e7 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c @@ -399,6 +399,9 @@ static void vgic_mmio_write_apr(struct kvm_vcpu *vcpu, if (n > vgic_v3_max_apr_idx(vcpu)) return; + + n = array_index_nospec(n, 4); + /* GICv3 only uses ICH_AP1Rn for memory mapped (GICv2) guests */ vgicv3->vgic_ap1r[n] = val; } -- cgit v1.2.3 From 7afc4ddbf299a13aaf28406783d141a34c6b4f5a Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 25 Jul 2018 10:21:27 +0100 Subject: KVM: arm/arm64: Fix potential loss of ptimer interrupts kvm_timer_update_state() is called when changing the phys timer configuration registers, either via vcpu reset, as a result of a trap from the guest, or when userspace programs the registers. phys_timer_emulate() is in turn called by kvm_timer_update_state() to either cancel an existing software timer, or program a new software timer, to emulate the behavior of a real phys timer, based on the change in configuration registers. Unfortunately, the interaction between these two functions left a small race; if the conceptual emulated phys timer should actually fire, but the soft timer hasn't executed its callback yet, we cancel the timer in phys_timer_emulate without injecting an irq. This only happens if the check in kvm_timer_update_state is called before the timer should fire, which is relatively unlikely, but possible. The solution is to update the state of the phys timer after calling phys_timer_emulate, which will pick up the pending timer state and update the interrupt value. Note that this leaves the opportunity of raising the interrupt twice, once in the just-programmed soft timer, and once in kvm_timer_update_state. Since this always happens synchronously with the VCPU execution, there is no harm in this, and the guest ever only sees a single timer interrupt. Cc: Stable # 4.15+ Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- virt/kvm/arm/arch_timer.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index bd3d57f40f1b..18ff6203079d 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -295,9 +295,9 @@ static void phys_timer_emulate(struct kvm_vcpu *vcpu) struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); /* - * If the timer can fire now we have just raised the IRQ line and we - * don't need to have a soft timer scheduled for the future. If the - * timer cannot fire at all, then we also don't need a soft timer. + * If the timer can fire now, we don't need to have a soft timer + * scheduled for the future. If the timer cannot fire at all, + * then we also don't need a soft timer. */ if (kvm_timer_should_fire(ptimer) || !kvm_timer_irq_can_fire(ptimer)) { soft_timer_cancel(&timer->phys_timer, NULL); @@ -332,10 +332,10 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu) level = kvm_timer_should_fire(vtimer); kvm_timer_update_irq(vcpu, level, vtimer); + phys_timer_emulate(vcpu); + if (kvm_timer_should_fire(ptimer) != ptimer->irq.level) kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer); - - phys_timer_emulate(vcpu); } static void vtimer_save_state(struct kvm_vcpu *vcpu) -- cgit v1.2.3 From 245715cbe83ca934af5d20e078fd85175c62995e Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 25 Jul 2018 10:21:28 +0100 Subject: KVM: arm/arm64: Fix lost IRQs from emulated physcial timer when blocked When the VCPU is blocked (for example from WFI) we don't inject the physical timer interrupt if it should fire while the CPU is blocked, but instead we just wake up the VCPU and expect kvm_timer_vcpu_load to take care of injecting the interrupt. Unfortunately, kvm_timer_vcpu_load() doesn't actually do that, it only has support to schedule a soft timer if the emulated phys timer is expected to fire in the future. Follow the same pattern as kvm_timer_update_state() and update the irq state after potentially scheduling a soft timer. Reported-by: Andre Przywara Cc: Stable # 4.15+ Fixes: bbdd52cfcba29 ("KVM: arm/arm64: Avoid phys timer emulation in vcpu entry/exit") Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- virt/kvm/arm/arch_timer.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'virt') diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 18ff6203079d..17cecc96f735 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -487,6 +487,7 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); if (unlikely(!timer->enabled)) return; @@ -502,6 +503,10 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) /* Set the background timer for the physical timer emulation. */ phys_timer_emulate(vcpu); + + /* If the timer fired while we weren't running, inject it now */ + if (kvm_timer_should_fire(ptimer) != ptimer->irq.level) + kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer); } bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) -- cgit v1.2.3 From 6249f2a479268702f7259aeee3671db2be3b922c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 6 Aug 2018 12:51:19 +0100 Subject: KVM: arm/arm64: vgic-v3: Add core support for Group0 SGIs Although vgic-v3 now supports Group0 interrupts, it still doesn't deal with Group0 SGIs. As usually with the GIC, nothing is simple: - ICC_SGI1R can signal SGIs of both groups, since GICD_CTLR.DS==1 with KVM (as per 8.1.10, Non-secure EL1 access) - ICC_SGI0R can only generate Group0 SGIs - ICC_ASGI1R sees its scope refocussed to generate only Group0 SGIs (as per the note at the bottom of Table 8-14) We only support Group1 SGIs so far, so no material change. Reviewed-by: Eric Auger Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier --- arch/arm/kvm/coproc.c | 2 +- arch/arm64/kvm/sys_regs.c | 2 +- include/kvm/arm_vgic.h | 2 +- virt/kvm/arm/vgic/vgic-mmio-v3.c | 19 +++++++++++++++---- 4 files changed, 18 insertions(+), 7 deletions(-) (limited to 'virt') diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 3a02e76699a6..b17c52608a19 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -253,7 +253,7 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu, reg = (u64)*vcpu_reg(vcpu, p->Rt2) << 32; reg |= *vcpu_reg(vcpu, p->Rt1) ; - vgic_v3_dispatch_sgi(vcpu, reg); + vgic_v3_dispatch_sgi(vcpu, reg, true); return true; } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index e04aacb2a24c..aba6755c816d 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -255,7 +255,7 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu, if (!p->is_write) return read_from_write_only(vcpu, p, r); - vgic_v3_dispatch_sgi(vcpu, p->regval); + vgic_v3_dispatch_sgi(vcpu, p->regval, true); return true; } diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index c134790be32c..4f31f96bbfab 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -373,7 +373,7 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid); -void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg); +void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1); /** * kvm_vgic_get_max_vcpus - Get the maximum number of VCPUs allowed by HW diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index 88e78b582139..a2a175b08b17 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -900,7 +900,8 @@ static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu) /** * vgic_v3_dispatch_sgi - handle SGI requests from VCPUs * @vcpu: The VCPU requesting a SGI - * @reg: The value written into the ICC_SGI1R_EL1 register by that VCPU + * @reg: The value written into ICC_{ASGI1,SGI0,SGI1}R by that VCPU + * @allow_group1: Does the sysreg access allow generation of G1 SGIs * * With GICv3 (and ARE=1) CPUs trigger SGIs by writing to a system register. * This will trap in sys_regs.c and call this function. @@ -910,7 +911,7 @@ static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu) * check for matching ones. If this bit is set, we signal all, but not the * calling VCPU. */ -void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg) +void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1) { struct kvm *kvm = vcpu->kvm; struct kvm_vcpu *c_vcpu; @@ -959,9 +960,19 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg) irq = vgic_get_irq(vcpu->kvm, c_vcpu, sgi); spin_lock_irqsave(&irq->irq_lock, flags); - irq->pending_latch = true; - vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + /* + * An access targetting Group0 SGIs can only generate + * those, while an access targetting Group1 SGIs can + * generate interrupts of either group. + */ + if (!irq->group || allow_group1) { + irq->pending_latch = true; + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + } else { + spin_unlock_irqrestore(&irq->irq_lock, flags); + } + vgic_put_irq(vcpu->kvm, irq); } } -- cgit v1.2.3 From dc961e5395dde279b77e2cdbdcc99560b89bdfd9 Mon Sep 17 00:00:00 2001 From: Jia He Date: Fri, 3 Aug 2018 21:57:03 +0800 Subject: KVM: arm/arm64: vgic: Move DEBUG_SPINLOCK_BUG_ON to vgic.h DEBUG_SPINLOCK_BUG_ON can be used with both vgic-v2 and vgic-v3, so let's move it to vgic.h Signed-off-by: Jia He [maz: commit message tidy-up] Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic.c | 6 ------ virt/kvm/arm/vgic/vgic.h | 6 ++++++ 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 33c8325c8f35..c22cea678a66 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -28,12 +28,6 @@ #define CREATE_TRACE_POINTS #include "trace.h" -#ifdef CONFIG_DEBUG_SPINLOCK -#define DEBUG_SPINLOCK_BUG_ON(p) BUG_ON(p) -#else -#define DEBUG_SPINLOCK_BUG_ON(p) -#endif - struct vgic_global kvm_vgic_global_state __ro_after_init = { .gicv3_cpuif = STATIC_KEY_FALSE_INIT, }; diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index ed9d9d9e2fc5..a90024718ca4 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -103,6 +103,12 @@ #define KVM_VGIC_V3_RDIST_COUNT_MASK GENMASK_ULL(63, 52) #define KVM_VGIC_V3_RDIST_COUNT_SHIFT 52 +#ifdef CONFIG_DEBUG_SPINLOCK +#define DEBUG_SPINLOCK_BUG_ON(p) BUG_ON(p) +#else +#define DEBUG_SPINLOCK_BUG_ON(p) +#endif + /* Requires the irq_lock to be held by the caller. */ static inline bool irq_is_pending(struct vgic_irq *irq) { -- cgit v1.2.3 From d0823cb346bc6f685f230bdbec51910a329e3fe3 Mon Sep 17 00:00:00 2001 From: Jia He Date: Fri, 3 Aug 2018 21:57:04 +0800 Subject: KVM: arm/arm64: vgic: Do not use spin_lock_irqsave/restore with irq disabled kvm_vgic_sync_hwstate is only called with IRQ being disabled. There is thus no need to call spin_lock_irqsave/restore in vgic_fold_lr_state and vgic_prune_ap_list. This patch replace them with the non irq-safe version. Signed-off-by: Jia He Acked-by: Christoffer Dall [maz: commit message tidy-up] Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic-v2.c | 7 ++++--- virt/kvm/arm/vgic/vgic-v3.c | 7 ++++--- virt/kvm/arm/vgic/vgic.c | 13 +++++++------ 3 files changed, 15 insertions(+), 12 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index df5e6a6e3186..69b892abd7dc 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -62,7 +62,8 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_v2_cpu_if *cpuif = &vgic_cpu->vgic_v2; int lr; - unsigned long flags; + + DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); cpuif->vgic_hcr &= ~GICH_HCR_UIE; @@ -83,7 +84,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) irq = vgic_get_irq(vcpu->kvm, vcpu, intid); - spin_lock_irqsave(&irq->irq_lock, flags); + spin_lock(&irq->irq_lock); /* Always preserve the active bit */ irq->active = !!(val & GICH_LR_ACTIVE_BIT); @@ -126,7 +127,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) vgic_irq_set_phys_active(irq, false); } - spin_unlock_irqrestore(&irq->irq_lock, flags); + spin_unlock(&irq->irq_lock); vgic_put_irq(vcpu->kvm, irq); } diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 530b8491c892..9c0dd234ebe8 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -46,7 +46,8 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3; u32 model = vcpu->kvm->arch.vgic.vgic_model; int lr; - unsigned long flags; + + DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); cpuif->vgic_hcr &= ~ICH_HCR_UIE; @@ -75,7 +76,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) if (!irq) /* An LPI could have been unmapped. */ continue; - spin_lock_irqsave(&irq->irq_lock, flags); + spin_lock(&irq->irq_lock); /* Always preserve the active bit */ irq->active = !!(val & ICH_LR_ACTIVE_BIT); @@ -118,7 +119,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) vgic_irq_set_phys_active(irq, false); } - spin_unlock_irqrestore(&irq->irq_lock, flags); + spin_unlock(&irq->irq_lock); vgic_put_irq(vcpu->kvm, irq); } diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index c22cea678a66..7cfdfbc910e0 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -593,10 +593,11 @@ static void vgic_prune_ap_list(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_irq *irq, *tmp; - unsigned long flags; + + DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); retry: - spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags); + spin_lock(&vgic_cpu->ap_list_lock); list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) { struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB; @@ -637,7 +638,7 @@ retry: /* This interrupt looks like it has to be migrated. */ spin_unlock(&irq->irq_lock); - spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags); + spin_unlock(&vgic_cpu->ap_list_lock); /* * Ensure locking order by always locking the smallest @@ -651,7 +652,7 @@ retry: vcpuB = vcpu; } - spin_lock_irqsave(&vcpuA->arch.vgic_cpu.ap_list_lock, flags); + spin_lock(&vcpuA->arch.vgic_cpu.ap_list_lock); spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock, SINGLE_DEPTH_NESTING); spin_lock(&irq->irq_lock); @@ -676,7 +677,7 @@ retry: spin_unlock(&irq->irq_lock); spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock); - spin_unlock_irqrestore(&vcpuA->arch.vgic_cpu.ap_list_lock, flags); + spin_unlock(&vcpuA->arch.vgic_cpu.ap_list_lock); if (target_vcpu_needs_kick) { kvm_make_request(KVM_REQ_IRQ_PENDING, target_vcpu); @@ -686,7 +687,7 @@ retry: goto retry; } - spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags); + spin_unlock(&vgic_cpu->ap_list_lock); } static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu) -- cgit v1.2.3 From 86658b819cd0a9aa584cd84453ed268a6f013770 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Mon, 13 Aug 2018 11:43:50 +0100 Subject: KVM: arm/arm64: Skip updating PMD entry if no change Contention on updating a PMD entry by a large number of vcpus can lead to duplicate work when handling stage 2 page faults. As the page table update follows the break-before-make requirement of the architecture, it can lead to repeated refaults due to clearing the entry and flushing the tlbs. This problem is more likely when - * there are large number of vcpus * the mapping is large block mapping such as when using PMD hugepages (512MB) with 64k pages. Fix this by skipping the page table update if there is no change in the entry being updated. Cc: stable@vger.kernel.org Fixes: ad361f093c1e ("KVM: ARM: Support hugetlbfs backed huge pages") Reviewed-by: Suzuki Poulose Acked-by: Christoffer Dall Signed-off-by: Punit Agrawal Signed-off-by: Marc Zyngier --- virt/kvm/arm/mmu.c | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 97d27cd9c654..13dfe36501aa 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1044,19 +1044,35 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache pmd = stage2_get_pmd(kvm, cache, addr); VM_BUG_ON(!pmd); - /* - * Mapping in huge pages should only happen through a fault. If a - * page is merged into a transparent huge page, the individual - * subpages of that huge page should be unmapped through MMU - * notifiers before we get here. - * - * Merging of CompoundPages is not supported; they should become - * splitting first, unmapped, merged, and mapped back in on-demand. - */ - VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd)); - old_pmd = *pmd; if (pmd_present(old_pmd)) { + /* + * Multiple vcpus faulting on the same PMD entry, can + * lead to them sequentially updating the PMD with the + * same value. Following the break-before-make + * (pmd_clear() followed by tlb_flush()) process can + * hinder forward progress due to refaults generated + * on missing translations. + * + * Skip updating the page table if the entry is + * unchanged. + */ + if (pmd_val(old_pmd) == pmd_val(*new_pmd)) + return 0; + + /* + * Mapping in huge pages should only happen through a + * fault. If a page is merged into a transparent huge + * page, the individual subpages of that huge page + * should be unmapped through MMU notifiers before we + * get here. + * + * Merging of CompoundPages is not supported; they + * should become splitting first, unmapped, merged, + * and mapped back in on-demand. + */ + VM_BUG_ON(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd)); + pmd_clear(pmd); kvm_tlb_flush_vmid_ipa(kvm, addr); } else { -- cgit v1.2.3 From 976d34e2dab10ece5ea8fe7090b7692913f89084 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Mon, 13 Aug 2018 11:43:51 +0100 Subject: KVM: arm/arm64: Skip updating PTE entry if no change When there is contention on faulting in a particular page table entry at stage 2, the break-before-make requirement of the architecture can lead to additional refaulting due to TLB invalidation. Avoid this by skipping a page table update if the new value of the PTE matches the previous value. Cc: stable@vger.kernel.org Fixes: d5d8184d35c9 ("KVM: ARM: Memory virtualization setup") Reviewed-by: Suzuki Poulose Acked-by: Christoffer Dall Signed-off-by: Punit Agrawal Signed-off-by: Marc Zyngier --- virt/kvm/arm/mmu.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'virt') diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 13dfe36501aa..91aaf73b00df 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1147,6 +1147,10 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, /* Create 2nd stage page table mapping - Level 3 */ old_pte = *pte; if (pte_present(old_pte)) { + /* Skip page table update if there is no change */ + if (pte_val(old_pte) == pte_val(*new_pte)) + return 0; + kvm_set_pte(pte, __pte(0)); kvm_tlb_flush_vmid_ipa(kvm, addr); } else { -- cgit v1.2.3