From 53692908b0f594285aba18ab848318262332ed25 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 18 Apr 2018 10:39:04 +0100 Subject: KVM: arm/arm64: vgic: Fix source vcpu issues for GICv2 SGI Now that we make sure we don't inject multiple instances of the same GICv2 SGI at the same time, we've made another bug more obvious: If we exit with an active SGI, we completely lose track of which vcpu it came from. On the next entry, we restore it with 0 as a source, and if that wasn't the right one, too bad. While this doesn't seem to trouble GIC-400, the architectural model gets offended and doesn't deactivate the interrupt on EOI. Another connected issue is that we will happilly make pending an interrupt from another vcpu, overriding the above zero with something that is just as inconsistent. Don't do that. The final issue is that we signal a maintenance interrupt when no pending interrupts are present in the LR. Assuming we've fixed the two issues above, we end-up in a situation where we keep exiting as soon as we've reached the active state, and not be able to inject the following pending. The fix comes in 3 parts: - GICv2 SGIs have their source vcpu saved if they are active on exit, and restored on entry - Multi-SGIs cannot go via the Pending+Active state, as this would corrupt the source field - Multi-SGIs are converted to using MI on EOI instead of NPIE Fixes: 16ca6a607d84bef0 ("KVM: arm/arm64: vgic: Don't populate multiple LRs with the same vintid") Reported-by: Mark Rutland Tested-by: Mark Rutland Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier --- include/kvm/arm_vgic.h | 1 + virt/kvm/arm/vgic/vgic-mmio.c | 10 +++++++-- virt/kvm/arm/vgic/vgic-v2.c | 38 +++++++++++++++++++-------------- virt/kvm/arm/vgic/vgic-v3.c | 49 +++++++++++++++++++++++++------------------ virt/kvm/arm/vgic/vgic.c | 30 +++++++------------------- virt/kvm/arm/vgic/vgic.h | 14 +++++++++++++ 6 files changed, 81 insertions(+), 61 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 24f03941ada8..e7efe12a81bd 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -131,6 +131,7 @@ struct vgic_irq { u32 mpidr; /* GICv3 target VCPU */ }; u8 source; /* GICv2 SGIs only */ + u8 active_source; /* GICv2 SGIs only */ u8 priority; enum vgic_irq_config config; /* Level or edge */ diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index dbe99d635c80..ff9655cfeb2f 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -289,10 +289,16 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, irq->vcpu->cpu != -1) /* VCPU thread is running */ cond_resched_lock(&irq->irq_lock); - if (irq->hw) + if (irq->hw) { vgic_hw_irq_change_active(vcpu, irq, active, !requester_vcpu); - else + } else { + u32 model = vcpu->kvm->arch.vgic.vgic_model; + irq->active = active; + if (model == KVM_DEV_TYPE_ARM_VGIC_V2 && + active && vgic_irq_is_sgi(irq->intid)) + irq->active_source = requester_vcpu->vcpu_id; + } if (irq->active) vgic_queue_irq_unlock(vcpu->kvm, irq, flags); diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index 45aa433f018f..a5f2e44f1c33 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -37,13 +37,6 @@ void vgic_v2_init_lrs(void) vgic_v2_write_lr(i, 0); } -void vgic_v2_set_npie(struct kvm_vcpu *vcpu) -{ - struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2; - - cpuif->vgic_hcr |= GICH_HCR_NPIE; -} - void vgic_v2_set_underflow(struct kvm_vcpu *vcpu) { struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2; @@ -71,13 +64,18 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) int lr; unsigned long flags; - cpuif->vgic_hcr &= ~(GICH_HCR_UIE | GICH_HCR_NPIE); + cpuif->vgic_hcr &= ~GICH_HCR_UIE; for (lr = 0; lr < vgic_cpu->used_lrs; lr++) { u32 val = cpuif->vgic_lr[lr]; - u32 intid = val & GICH_LR_VIRTUALID; + u32 cpuid, intid = val & GICH_LR_VIRTUALID; struct vgic_irq *irq; + /* Extract the source vCPU id from the LR */ + cpuid = val & GICH_LR_PHYSID_CPUID; + cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT; + cpuid &= 7; + /* Notify fds when the guest EOI'ed a level-triggered SPI */ if (lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid)) kvm_notify_acked_irq(vcpu->kvm, 0, @@ -90,17 +88,16 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) /* Always preserve the active bit */ irq->active = !!(val & GICH_LR_ACTIVE_BIT); + if (irq->active && vgic_irq_is_sgi(intid)) + irq->active_source = cpuid; + /* Edge is the only case where we preserve the pending bit */ if (irq->config == VGIC_CONFIG_EDGE && (val & GICH_LR_PENDING_BIT)) { irq->pending_latch = true; - if (vgic_irq_is_sgi(intid)) { - u32 cpuid = val & GICH_LR_PHYSID_CPUID; - - cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT; + if (vgic_irq_is_sgi(intid)) irq->source |= (1 << cpuid); - } } /* @@ -152,8 +149,15 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) u32 val = irq->intid; bool allow_pending = true; - if (irq->active) + if (irq->active) { val |= GICH_LR_ACTIVE_BIT; + if (vgic_irq_is_sgi(irq->intid)) + val |= irq->active_source << GICH_LR_PHYSID_CPUID_SHIFT; + if (vgic_irq_is_multi_sgi(irq)) { + allow_pending = false; + val |= GICH_LR_EOI; + } + } if (irq->hw) { val |= GICH_LR_HW; @@ -190,8 +194,10 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) BUG_ON(!src); val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; irq->source &= ~(1 << (src - 1)); - if (irq->source) + if (irq->source) { irq->pending_latch = true; + val |= GICH_LR_EOI; + } } } diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 8195f52ae6f0..c7423f3768e5 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -27,13 +27,6 @@ static bool group1_trap; static bool common_trap; static bool gicv4_enable; -void vgic_v3_set_npie(struct kvm_vcpu *vcpu) -{ - struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; - - cpuif->vgic_hcr |= ICH_HCR_NPIE; -} - void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; @@ -55,17 +48,23 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) int lr; unsigned long flags; - cpuif->vgic_hcr &= ~(ICH_HCR_UIE | ICH_HCR_NPIE); + cpuif->vgic_hcr &= ~ICH_HCR_UIE; for (lr = 0; lr < vgic_cpu->used_lrs; lr++) { u64 val = cpuif->vgic_lr[lr]; - u32 intid; + u32 intid, cpuid; struct vgic_irq *irq; + bool is_v2_sgi = false; - if (model == KVM_DEV_TYPE_ARM_VGIC_V3) + cpuid = val & GICH_LR_PHYSID_CPUID; + cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT; + + if (model == KVM_DEV_TYPE_ARM_VGIC_V3) { intid = val & ICH_LR_VIRTUAL_ID_MASK; - else + } else { intid = val & GICH_LR_VIRTUALID; + is_v2_sgi = vgic_irq_is_sgi(intid); + } /* Notify fds when the guest EOI'ed a level-triggered IRQ */ if (lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid)) @@ -81,18 +80,16 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) /* Always preserve the active bit */ irq->active = !!(val & ICH_LR_ACTIVE_BIT); + if (irq->active && is_v2_sgi) + irq->active_source = cpuid; + /* Edge is the only case where we preserve the pending bit */ if (irq->config == VGIC_CONFIG_EDGE && (val & ICH_LR_PENDING_BIT)) { irq->pending_latch = true; - if (vgic_irq_is_sgi(intid) && - model == KVM_DEV_TYPE_ARM_VGIC_V2) { - u32 cpuid = val & GICH_LR_PHYSID_CPUID; - - cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT; + if (is_v2_sgi) irq->source |= (1 << cpuid); - } } /* @@ -133,10 +130,20 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) { u32 model = vcpu->kvm->arch.vgic.vgic_model; u64 val = irq->intid; - bool allow_pending = true; + bool allow_pending = true, is_v2_sgi; - if (irq->active) + is_v2_sgi = (vgic_irq_is_sgi(irq->intid) && + model == KVM_DEV_TYPE_ARM_VGIC_V2); + + if (irq->active) { val |= ICH_LR_ACTIVE_BIT; + if (is_v2_sgi) + val |= irq->active_source << GICH_LR_PHYSID_CPUID_SHIFT; + if (vgic_irq_is_multi_sgi(irq)) { + allow_pending = false; + val |= ICH_LR_EOI; + } + } if (irq->hw) { val |= ICH_LR_HW; @@ -174,8 +181,10 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) BUG_ON(!src); val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; irq->source &= ~(1 << (src - 1)); - if (irq->source) + if (irq->source) { irq->pending_latch = true; + val |= ICH_LR_EOI; + } } } diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 4b6d72939c42..568c65f852e1 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -719,14 +719,6 @@ static inline void vgic_set_underflow(struct kvm_vcpu *vcpu) vgic_v3_set_underflow(vcpu); } -static inline void vgic_set_npie(struct kvm_vcpu *vcpu) -{ - if (kvm_vgic_global_state.type == VGIC_V2) - vgic_v2_set_npie(vcpu); - else - vgic_v3_set_npie(vcpu); -} - /* Requires the ap_list_lock to be held. */ static int compute_ap_list_depth(struct kvm_vcpu *vcpu, bool *multi_sgi) @@ -740,17 +732,15 @@ static int compute_ap_list_depth(struct kvm_vcpu *vcpu, DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock)); list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { + int w; + spin_lock(&irq->irq_lock); /* GICv2 SGIs can count for more than one... */ - if (vgic_irq_is_sgi(irq->intid) && irq->source) { - int w = hweight8(irq->source); - - count += w; - *multi_sgi |= (w > 1); - } else { - count++; - } + w = vgic_irq_get_lr_count(irq); spin_unlock(&irq->irq_lock); + + count += w; + *multi_sgi |= (w > 1); } return count; } @@ -761,7 +751,6 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_irq *irq; int count; - bool npie = false; bool multi_sgi; u8 prio = 0xff; @@ -791,10 +780,8 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) if (likely(vgic_target_oracle(irq) == vcpu)) { vgic_populate_lr(vcpu, irq, count++); - if (irq->source) { - npie = true; + if (irq->source) prio = irq->priority; - } } spin_unlock(&irq->irq_lock); @@ -807,9 +794,6 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) } } - if (npie) - vgic_set_npie(vcpu); - vcpu->arch.vgic_cpu.used_lrs = count; /* Nuke remaining LRs */ diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index 830e815748a0..32c25d42c93f 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -110,6 +110,20 @@ static inline bool vgic_irq_is_mapped_level(struct vgic_irq *irq) return irq->config == VGIC_CONFIG_LEVEL && irq->hw; } +static inline int vgic_irq_get_lr_count(struct vgic_irq *irq) +{ + /* Account for the active state as an interrupt */ + if (vgic_irq_is_sgi(irq->intid) && irq->source) + return hweight8(irq->source) + irq->active; + + return irq_is_pending(irq) || irq->active; +} + +static inline bool vgic_irq_is_multi_sgi(struct vgic_irq *irq) +{ + return vgic_irq_get_lr_count(irq) > 1; +} + /* * This struct provides an intermediate representation of the fields contained * in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC -- cgit v1.2.3 From 1975fa56f1c85f5f47ab5cee903b9374a921b122 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 2 May 2018 12:17:02 +0100 Subject: KVM: arm64: Fix order of vcpu_write_sys_reg() arguments A typo in kvm_vcpu_set_be()'s call: | vcpu_write_sys_reg(vcpu, SCTLR_EL1, sctlr) causes us to use the 32bit register value as an index into the sys_reg[] array, and sail off the end of the linear map when we try to bring up big-endian secondaries. | Unable to handle kernel paging request at virtual address ffff80098b982c00 | Mem abort info: | ESR = 0x96000045 | Exception class = DABT (current EL), IL = 32 bits | SET = 0, FnV = 0 | EA = 0, S1PTW = 0 | Data abort info: | ISV = 0, ISS = 0x00000045 | CM = 0, WnR = 1 | swapper pgtable: 4k pages, 48-bit VAs, pgdp = 000000002ea0571a | [ffff80098b982c00] pgd=00000009ffff8803, pud=0000000000000000 | Internal error: Oops: 96000045 [#1] PREEMPT SMP | Modules linked in: | CPU: 2 PID: 1561 Comm: kvm-vcpu-0 Not tainted 4.17.0-rc3-00001-ga912e2261ca6-dirty #1323 | Hardware name: ARM Juno development board (r1) (DT) | pstate: 60000005 (nZCv daif -PAN -UAO) | pc : vcpu_write_sys_reg+0x50/0x134 | lr : vcpu_write_sys_reg+0x50/0x134 | Process kvm-vcpu-0 (pid: 1561, stack limit = 0x000000006df4728b) | Call trace: | vcpu_write_sys_reg+0x50/0x134 | kvm_psci_vcpu_on+0x14c/0x150 | kvm_psci_0_2_call+0x244/0x2a4 | kvm_hvc_call_handler+0x1cc/0x258 | handle_hvc+0x20/0x3c | handle_exit+0x130/0x1ec | kvm_arch_vcpu_ioctl_run+0x340/0x614 | kvm_vcpu_ioctl+0x4d0/0x840 | do_vfs_ioctl+0xc8/0x8d0 | ksys_ioctl+0x78/0xa8 | sys_ioctl+0xc/0x18 | el0_svc_naked+0x30/0x34 | Code: 73620291 604d00b0 00201891 1ab10194 (957a33f8) |---[ end trace 4b4a4f9628596602 ]--- Fix the order of the arguments. Fixes: 8d404c4c24613 ("KVM: arm64: Rewrite system register accessors to read/write functions") CC: Christoffer Dall Signed-off-by: James Morse Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_emulate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 23b33e8ea03a..1dab3a984608 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -333,7 +333,7 @@ static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) } else { u64 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); sctlr |= (1 << 25); - vcpu_write_sys_reg(vcpu, SCTLR_EL1, sctlr); + vcpu_write_sys_reg(vcpu, sctlr, SCTLR_EL1); } } -- cgit v1.2.3 From c3616a077190435cb540e134d5dfcd15207817ee Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Wed, 2 May 2018 11:53:03 +0100 Subject: KVM: arm/arm64: vgic_init: Cleanup reference to process_maintenance One comment still mentioned process_maintenance operations after commit af0614991ab6 ("KVM: arm/arm64: vgic: Get rid of unnecessary process_maintenance operation") Update the comment to point to vgic_fold_lr_state instead, which is where maintenance interrupts are taken care of. Acked-by: Christoffer Dall Signed-off-by: Valentin Schneider Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic-init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index 68378fe17a0e..e07156c30323 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -423,7 +423,7 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data) * We cannot rely on the vgic maintenance interrupt to be * delivered synchronously. This means we can only use it to * exit the VM, and we perform the handling of EOIed - * interrupts on the exit path (see vgic_process_maintenance). + * interrupts on the exit path (see vgic_fold_lr_state). */ return IRQ_HANDLED; } -- cgit v1.2.3 From b220244d41798c6592e7d17843256eb0bae456a0 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 4 May 2018 16:19:24 +0100 Subject: arm64: vgic-v2: Fix proxying of cpuif access Proxying the cpuif accesses at EL2 makes use of vcpu_data_guest_to_host and co, which check the endianness, which call into vcpu_read_sys_reg... which isn't mapped at EL2 (it was inlined before, and got moved OoL with the VHE optimizations). The result is of course a nice panic. Let's add some specialized cruft to keep the broken platforms that require this hack alive. But, this code used vcpu_data_guest_to_host(), which expected us to write the value to host memory, instead we have trapped the guest's read or write to an mmio-device, and are about to replay it using the host's readl()/writel() which also perform swabbing based on the host endianness. This goes wrong when both host and guest are big-endian, as readl()/writel() will undo the guest's swabbing, causing the big-endian value to be written to device-memory. What needs doing? A big-endian guest will have pre-swabbed data before storing, undo this. If its necessary for the host, writel() will re-swab it. For a read a big-endian guest expects to swab the data after the load. The hosts's readl() will correct for host endianness, giving us the device-memory's value in the register. For a big-endian guest, swab it as if we'd only done the load. For a little-endian guest, nothing needs doing as readl()/writel() leave the correct device-memory value in registers. Tested on Juno with that rarest of things: a big-endian 64K host. Based on a patch from Marc Zyngier. Reported-by: Suzuki K Poulose Fixes: bf8feb39642b ("arm64: KVM: vgic-v2: Add GICV access from HYP") Signed-off-by: James Morse Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c index 86801b6055d6..39be799d0417 100644 --- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c +++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c @@ -18,11 +18,20 @@ #include #include #include +#include #include #include #include +static bool __hyp_text __is_be(struct kvm_vcpu *vcpu) +{ + if (vcpu_mode_is_32bit(vcpu)) + return !!(read_sysreg_el2(spsr) & COMPAT_PSR_E_BIT); + + return !!(read_sysreg(SCTLR_EL1) & SCTLR_ELx_EE); +} + /* * __vgic_v2_perform_cpuif_access -- perform a GICV access on behalf of the * guest. @@ -64,14 +73,19 @@ int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu) addr += fault_ipa - vgic->vgic_cpu_base; if (kvm_vcpu_dabt_iswrite(vcpu)) { - u32 data = vcpu_data_guest_to_host(vcpu, - vcpu_get_reg(vcpu, rd), - sizeof(u32)); + u32 data = vcpu_get_reg(vcpu, rd); + if (__is_be(vcpu)) { + /* guest pre-swabbed data, undo this for writel() */ + data = swab32(data); + } writel_relaxed(data, addr); } else { u32 data = readl_relaxed(addr); - vcpu_set_reg(vcpu, rd, vcpu_data_host_to_guest(vcpu, data, - sizeof(u32))); + if (__is_be(vcpu)) { + /* guest expects swabbed data */ + data = swab32(data); + } + vcpu_set_reg(vcpu, rd, data); } return 1; -- cgit v1.2.3