summaryrefslogtreecommitdiff
path: root/virt
diff options
context:
space:
mode:
Diffstat (limited to 'virt')
-rw-r--r--virt/kvm/arm/arch_timer.c244
-rw-r--r--virt/kvm/arm/hyp/timer-sr.c46
-rw-r--r--virt/kvm/arm/vgic/vgic-debug.c283
-rw-r--r--virt/kvm/arm/vgic/vgic-init.c43
-rw-r--r--virt/kvm/arm/vgic/vgic-irqfd.c3
-rw-r--r--virt/kvm/arm/vgic/vgic-its.c126
-rw-r--r--virt/kvm/arm/vgic/vgic-kvm-device.c233
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio-v2.c110
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio-v3.c205
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.c199
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.h24
-rw-r--r--virt/kvm/arm/vgic/vgic-v2.c37
-rw-r--r--virt/kvm/arm/vgic/vgic-v3.c47
-rw-r--r--virt/kvm/arm/vgic/vgic.c66
-rw-r--r--virt/kvm/arm/vgic/vgic.h120
-rw-r--r--virt/kvm/async_pf.c13
-rw-r--r--virt/kvm/eventfd.c3
-rw-r--r--virt/kvm/kvm_main.c277
-rw-r--r--virt/kvm/vfio.c18
-rw-r--r--virt/lib/irqbypass.c4
20 files changed, 1623 insertions, 478 deletions
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 27a1f6341d41..35d7100e0815 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -24,6 +24,7 @@
#include <clocksource/arm_arch_timer.h>
#include <asm/arch_timer.h>
+#include <asm/kvm_hyp.h>
#include <kvm/arm_vgic.h>
#include <kvm/arm_arch_timer.h>
@@ -36,10 +37,10 @@ static u32 host_vtimer_irq_flags;
void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
{
- vcpu->arch.timer_cpu.active_cleared_last = false;
+ vcpu_vtimer(vcpu)->active_cleared_last = false;
}
-static cycle_t kvm_phys_timer_read(void)
+u64 kvm_phys_timer_read(void)
{
return timecounter->cc->read(timecounter->cc);
}
@@ -89,9 +90,6 @@ static void kvm_timer_inject_irq_work(struct work_struct *work)
struct kvm_vcpu *vcpu;
vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired);
- vcpu->arch.timer_cpu.armed = false;
-
- WARN_ON(!kvm_timer_should_fire(vcpu));
/*
* If the vcpu is blocked we want to wake it up so that it will see
@@ -100,12 +98,12 @@ static void kvm_timer_inject_irq_work(struct work_struct *work)
kvm_vcpu_kick(vcpu);
}
-static u64 kvm_timer_compute_delta(struct kvm_vcpu *vcpu)
+static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx)
{
- cycle_t cval, now;
+ u64 cval, now;
- cval = vcpu->arch.timer_cpu.cntv_cval;
- now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
+ cval = timer_ctx->cnt_cval;
+ now = kvm_phys_timer_read() - timer_ctx->cntvoff;
if (now < cval) {
u64 ns;
@@ -120,6 +118,35 @@ static u64 kvm_timer_compute_delta(struct kvm_vcpu *vcpu)
return 0;
}
+static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx)
+{
+ return !(timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_IT_MASK) &&
+ (timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_ENABLE);
+}
+
+/*
+ * Returns the earliest expiration time in ns among guest timers.
+ * Note that it will return 0 if none of timers can fire.
+ */
+static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu)
+{
+ u64 min_virt = ULLONG_MAX, min_phys = ULLONG_MAX;
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+ struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+
+ if (kvm_timer_irq_can_fire(vtimer))
+ min_virt = kvm_timer_compute_delta(vtimer);
+
+ if (kvm_timer_irq_can_fire(ptimer))
+ min_phys = kvm_timer_compute_delta(ptimer);
+
+ /* If none of timers can fire, then return 0 */
+ if ((min_virt == ULLONG_MAX) && (min_phys == ULLONG_MAX))
+ return 0;
+
+ return min(min_virt, min_phys);
+}
+
static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
{
struct arch_timer_cpu *timer;
@@ -134,7 +161,7 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
* PoV (NTP on the host may have forced it to expire
* early). If we should have slept longer, restart it.
*/
- ns = kvm_timer_compute_delta(vcpu);
+ ns = kvm_timer_earliest_exp(vcpu);
if (unlikely(ns)) {
hrtimer_forward_now(hrt, ns_to_ktime(ns));
return HRTIMER_RESTART;
@@ -144,42 +171,33 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
return HRTIMER_NORESTART;
}
-static bool kvm_timer_irq_can_fire(struct kvm_vcpu *vcpu)
-{
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-
- return !(timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) &&
- (timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE);
-}
-
-bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
+bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
{
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
- cycle_t cval, now;
+ u64 cval, now;
- if (!kvm_timer_irq_can_fire(vcpu))
+ if (!kvm_timer_irq_can_fire(timer_ctx))
return false;
- cval = timer->cntv_cval;
- now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
+ cval = timer_ctx->cnt_cval;
+ now = kvm_phys_timer_read() - timer_ctx->cntvoff;
return cval <= now;
}
-static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level)
+static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
+ struct arch_timer_context *timer_ctx)
{
int ret;
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
BUG_ON(!vgic_initialized(vcpu->kvm));
- timer->active_cleared_last = false;
- timer->irq.level = new_level;
- trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->irq.irq,
- timer->irq.level);
- ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id,
- timer->irq.irq,
- timer->irq.level);
+ timer_ctx->active_cleared_last = false;
+ timer_ctx->irq.level = new_level;
+ trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq,
+ timer_ctx->irq.level);
+
+ ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, timer_ctx->irq.irq,
+ timer_ctx->irq.level);
WARN_ON(ret);
}
@@ -190,22 +208,43 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level)
static int kvm_timer_update_state(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+ struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
/*
* If userspace modified the timer registers via SET_ONE_REG before
- * the vgic was initialized, we mustn't set the timer->irq.level value
+ * the vgic was initialized, we mustn't set the vtimer->irq.level value
* because the guest would never see the interrupt. Instead wait
* until we call this function from kvm_timer_flush_hwstate.
*/
if (!vgic_initialized(vcpu->kvm) || !timer->enabled)
return -ENODEV;
- if (kvm_timer_should_fire(vcpu) != timer->irq.level)
- kvm_timer_update_irq(vcpu, !timer->irq.level);
+ if (kvm_timer_should_fire(vtimer) != vtimer->irq.level)
+ kvm_timer_update_irq(vcpu, !vtimer->irq.level, vtimer);
+
+ if (kvm_timer_should_fire(ptimer) != ptimer->irq.level)
+ kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer);
return 0;
}
+/* Schedule the background timer for the emulated timer. */
+static void kvm_timer_emulate(struct kvm_vcpu *vcpu,
+ struct arch_timer_context *timer_ctx)
+{
+ struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+
+ if (kvm_timer_should_fire(timer_ctx))
+ return;
+
+ if (!kvm_timer_irq_can_fire(timer_ctx))
+ return;
+
+ /* The timer has not yet expired, schedule a background timer */
+ timer_arm(timer, kvm_timer_compute_delta(timer_ctx));
+}
+
/*
* Schedule the background timer before calling kvm_vcpu_block, so that this
* thread is removed from its waitqueue and made runnable when there's a timer
@@ -214,26 +253,31 @@ static int kvm_timer_update_state(struct kvm_vcpu *vcpu)
void kvm_timer_schedule(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+ struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
BUG_ON(timer_is_armed(timer));
/*
- * No need to schedule a background timer if the guest timer has
+ * No need to schedule a background timer if any guest timer has
* already expired, because kvm_vcpu_block will return before putting
* the thread to sleep.
*/
- if (kvm_timer_should_fire(vcpu))
+ if (kvm_timer_should_fire(vtimer) || kvm_timer_should_fire(ptimer))
return;
/*
- * If the timer is not capable of raising interrupts (disabled or
+ * If both timers are not capable of raising interrupts (disabled or
* masked), then there's no more work for us to do.
*/
- if (!kvm_timer_irq_can_fire(vcpu))
+ if (!kvm_timer_irq_can_fire(vtimer) && !kvm_timer_irq_can_fire(ptimer))
return;
- /* The timer has not yet expired, schedule a background timer */
- timer_arm(timer, kvm_timer_compute_delta(vcpu));
+ /*
+ * The guest timers have not yet expired, schedule a background timer.
+ * Set the earliest expiration time among the guest timers.
+ */
+ timer_arm(timer, kvm_timer_earliest_exp(vcpu));
}
void kvm_timer_unschedule(struct kvm_vcpu *vcpu)
@@ -251,13 +295,16 @@ void kvm_timer_unschedule(struct kvm_vcpu *vcpu)
*/
void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
{
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
bool phys_active;
int ret;
if (kvm_timer_update_state(vcpu))
return;
+ /* Set the background timer for the physical timer emulation. */
+ kvm_timer_emulate(vcpu, vcpu_ptimer(vcpu));
+
/*
* If we enter the guest with the virtual input level to the VGIC
* asserted, then we have already told the VGIC what we need to, and
@@ -275,8 +322,8 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
* to ensure that hardware interrupts from the timer triggers a guest
* exit.
*/
- phys_active = timer->irq.level ||
- kvm_vgic_map_is_active(vcpu, timer->irq.irq);
+ phys_active = vtimer->irq.level ||
+ kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
/*
* We want to avoid hitting the (re)distributor as much as
@@ -298,7 +345,7 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
* - cached value is "active clear"
* - value to be programmed is "active clear"
*/
- if (timer->active_cleared_last && !phys_active)
+ if (vtimer->active_cleared_last && !phys_active)
return;
ret = irq_set_irqchip_state(host_vtimer_irq,
@@ -306,7 +353,7 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
phys_active);
WARN_ON(ret);
- timer->active_cleared_last = !phys_active;
+ vtimer->active_cleared_last = !phys_active;
}
/**
@@ -320,7 +367,11 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
- BUG_ON(timer_is_armed(timer));
+ /*
+ * This is to cancel the background timer for the physical timer
+ * emulation if it is set.
+ */
+ timer_disarm(timer);
/*
* The guest could have modified the timer registers or the timer
@@ -330,9 +381,11 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
}
int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
- const struct kvm_irq_level *irq)
+ const struct kvm_irq_level *virt_irq,
+ const struct kvm_irq_level *phys_irq)
{
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+ struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
/*
* The vcpu timer irq number cannot be determined in
@@ -340,7 +393,8 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
* kvm_vcpu_set_target(). To handle this, we determine
* vcpu timer irq number when the vcpu is reset.
*/
- timer->irq.irq = irq->irq;
+ vtimer->irq.irq = virt_irq->irq;
+ ptimer->irq.irq = phys_irq->irq;
/*
* The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8
@@ -348,16 +402,40 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
* resets the timer to be disabled and unmasked and is compliant with
* the ARMv7 architecture.
*/
- timer->cntv_ctl = 0;
+ vtimer->cnt_ctl = 0;
+ ptimer->cnt_ctl = 0;
kvm_timer_update_state(vcpu);
return 0;
}
+/* Make the updates of cntvoff for all vtimer contexts atomic */
+static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff)
+{
+ int i;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_vcpu *tmp;
+
+ mutex_lock(&kvm->lock);
+ kvm_for_each_vcpu(i, tmp, kvm)
+ vcpu_vtimer(tmp)->cntvoff = cntvoff;
+
+ /*
+ * When called from the vcpu create path, the CPU being created is not
+ * included in the loop above, so we just set it here as well.
+ */
+ vcpu_vtimer(vcpu)->cntvoff = cntvoff;
+ mutex_unlock(&kvm->lock);
+}
+
void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ /* Synchronize cntvoff across all vtimers of a VM. */
+ update_vtimer_cntvoff(vcpu, kvm_phys_timer_read());
+ vcpu_ptimer(vcpu)->cntvoff = 0;
+
INIT_WORK(&timer->expired, kvm_timer_inject_irq_work);
hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
timer->timer.function = kvm_timer_expire;
@@ -370,17 +448,17 @@ static void kvm_timer_init_interrupt(void *info)
int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
{
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
switch (regid) {
case KVM_REG_ARM_TIMER_CTL:
- timer->cntv_ctl = value;
+ vtimer->cnt_ctl = value;
break;
case KVM_REG_ARM_TIMER_CNT:
- vcpu->kvm->arch.timer.cntvoff = kvm_phys_timer_read() - value;
+ update_vtimer_cntvoff(vcpu, kvm_phys_timer_read() - value);
break;
case KVM_REG_ARM_TIMER_CVAL:
- timer->cntv_cval = value;
+ vtimer->cnt_cval = value;
break;
default:
return -1;
@@ -392,15 +470,15 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
{
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
switch (regid) {
case KVM_REG_ARM_TIMER_CTL:
- return timer->cntv_ctl;
+ return vtimer->cnt_ctl;
case KVM_REG_ARM_TIMER_CNT:
- return kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
+ return kvm_phys_timer_read() - vtimer->cntvoff;
case KVM_REG_ARM_TIMER_CVAL:
- return timer->cntv_cval;
+ return vtimer->cnt_cval;
}
return (u64)-1;
}
@@ -425,6 +503,11 @@ int kvm_timer_hyp_init(void)
info = arch_timer_get_kvm_info();
timecounter = &info->timecounter;
+ if (!timecounter->cc) {
+ kvm_err("kvm_arch_timer: uninitialized timecounter\n");
+ return -ENODEV;
+ }
+
if (info->virtual_irq <= 0) {
kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n",
info->virtual_irq);
@@ -451,7 +534,7 @@ int kvm_timer_hyp_init(void)
kvm_info("virtual timer IRQ%d\n", host_vtimer_irq);
cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING,
- "AP_KVM_ARM_TIMER_STARTING", kvm_timer_starting_cpu,
+ "kvm/arm/timer:starting", kvm_timer_starting_cpu,
kvm_timer_dying_cpu);
return err;
}
@@ -459,14 +542,16 @@ int kvm_timer_hyp_init(void)
void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
timer_disarm(timer);
- kvm_vgic_unmap_phys_irq(vcpu, timer->irq.irq);
+ kvm_vgic_unmap_phys_irq(vcpu, vtimer->irq.irq);
}
int kvm_timer_enable(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
struct irq_desc *desc;
struct irq_data *data;
int phys_irq;
@@ -494,26 +579,33 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
* Tell the VGIC that the virtual interrupt is tied to a
* physical interrupt. We do that once per VCPU.
*/
- ret = kvm_vgic_map_phys_irq(vcpu, timer->irq.irq, phys_irq);
+ ret = kvm_vgic_map_phys_irq(vcpu, vtimer->irq.irq, phys_irq);
if (ret)
return ret;
-
- /*
- * There is a potential race here between VCPUs starting for the first
- * time, which may be enabling the timer multiple times. That doesn't
- * hurt though, because we're just setting a variable to the same
- * variable that it already was. The important thing is that all
- * VCPUs have the enabled variable set, before entering the guest, if
- * the arch timers are enabled.
- */
- if (timecounter)
- timer->enabled = 1;
+ timer->enabled = 1;
return 0;
}
-void kvm_timer_init(struct kvm *kvm)
+/*
+ * On VHE system, we only need to configure trap on physical timer and counter
+ * accesses in EL0 and EL1 once, not for every world switch.
+ * The host kernel runs at EL2 with HCR_EL2.TGE == 1,
+ * and this makes those bits have no effect for the host kernel execution.
+ */
+void kvm_timer_init_vhe(void)
{
- kvm->arch.timer.cntvoff = kvm_phys_timer_read();
+ /* When HCR_EL2.E2H ==1, EL1PCEN and EL1PCTEN are shifted by 10 */
+ u32 cnthctl_shift = 10;
+ u64 val;
+
+ /*
+ * Disallow physical timer access for the guest.
+ * Physical counter access is allowed.
+ */
+ val = read_sysreg(cnthctl_el2);
+ val &= ~(CNTHCTL_EL1PCEN << cnthctl_shift);
+ val |= (CNTHCTL_EL1PCTEN << cnthctl_shift);
+ write_sysreg(val, cnthctl_el2);
}
diff --git a/virt/kvm/arm/hyp/timer-sr.c b/virt/kvm/arm/hyp/timer-sr.c
index 798866a8d875..4734915ab71f 100644
--- a/virt/kvm/arm/hyp/timer-sr.c
+++ b/virt/kvm/arm/hyp/timer-sr.c
@@ -25,20 +25,27 @@
void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
u64 val;
if (timer->enabled) {
- timer->cntv_ctl = read_sysreg_el0(cntv_ctl);
- timer->cntv_cval = read_sysreg_el0(cntv_cval);
+ vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
+ vtimer->cnt_cval = read_sysreg_el0(cntv_cval);
}
/* Disable the virtual timer */
write_sysreg_el0(0, cntv_ctl);
- /* Allow physical timer/counter access for the host */
- val = read_sysreg(cnthctl_el2);
- val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN;
- write_sysreg(val, cnthctl_el2);
+ /*
+ * We don't need to do this for VHE since the host kernel runs in EL2
+ * with HCR_EL2.TGE ==1, which makes those bits have no impact.
+ */
+ if (!has_vhe()) {
+ /* Allow physical timer/counter access for the host */
+ val = read_sysreg(cnthctl_el2);
+ val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN;
+ write_sysreg(val, cnthctl_el2);
+ }
/* Clear cntvoff for the host */
write_sysreg(0, cntvoff_el2);
@@ -46,23 +53,26 @@ void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu)
void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu)
{
- struct kvm *kvm = kern_hyp_va(vcpu->kvm);
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
u64 val;
- /*
- * Disallow physical timer access for the guest
- * Physical counter access is allowed
- */
- val = read_sysreg(cnthctl_el2);
- val &= ~CNTHCTL_EL1PCEN;
- val |= CNTHCTL_EL1PCTEN;
- write_sysreg(val, cnthctl_el2);
+ /* Those bits are already configured at boot on VHE-system */
+ if (!has_vhe()) {
+ /*
+ * Disallow physical timer access for the guest
+ * Physical counter access is allowed
+ */
+ val = read_sysreg(cnthctl_el2);
+ val &= ~CNTHCTL_EL1PCEN;
+ val |= CNTHCTL_EL1PCTEN;
+ write_sysreg(val, cnthctl_el2);
+ }
if (timer->enabled) {
- write_sysreg(kvm->arch.timer.cntvoff, cntvoff_el2);
- write_sysreg_el0(timer->cntv_cval, cntv_cval);
+ write_sysreg(vtimer->cntvoff, cntvoff_el2);
+ write_sysreg_el0(vtimer->cnt_cval, cntv_cval);
isb();
- write_sysreg_el0(timer->cntv_ctl, cntv_ctl);
+ write_sysreg_el0(vtimer->cnt_ctl, cntv_ctl);
}
}
diff --git a/virt/kvm/arm/vgic/vgic-debug.c b/virt/kvm/arm/vgic/vgic-debug.c
new file mode 100644
index 000000000000..7072ab743332
--- /dev/null
+++ b/virt/kvm/arm/vgic/vgic-debug.c
@@ -0,0 +1,283 @@
+/*
+ * Copyright (C) 2016 Linaro
+ * Author: Christoffer Dall <christoffer.dall@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/cpu.h>
+#include <linux/debugfs.h>
+#include <linux/interrupt.h>
+#include <linux/kvm_host.h>
+#include <linux/seq_file.h>
+#include <kvm/arm_vgic.h>
+#include <asm/kvm_mmu.h>
+#include "vgic.h"
+
+/*
+ * Structure to control looping through the entire vgic state. We start at
+ * zero for each field and move upwards. So, if dist_id is 0 we print the
+ * distributor info. When dist_id is 1, we have already printed it and move
+ * on.
+ *
+ * When vcpu_id < nr_cpus we print the vcpu info until vcpu_id == nr_cpus and
+ * so on.
+ */
+struct vgic_state_iter {
+ int nr_cpus;
+ int nr_spis;
+ int dist_id;
+ int vcpu_id;
+ int intid;
+};
+
+static void iter_next(struct vgic_state_iter *iter)
+{
+ if (iter->dist_id == 0) {
+ iter->dist_id++;
+ return;
+ }
+
+ iter->intid++;
+ if (iter->intid == VGIC_NR_PRIVATE_IRQS &&
+ ++iter->vcpu_id < iter->nr_cpus)
+ iter->intid = 0;
+}
+
+static void iter_init(struct kvm *kvm, struct vgic_state_iter *iter,
+ loff_t pos)
+{
+ int nr_cpus = atomic_read(&kvm->online_vcpus);
+
+ memset(iter, 0, sizeof(*iter));
+
+ iter->nr_cpus = nr_cpus;
+ iter->nr_spis = kvm->arch.vgic.nr_spis;
+
+ /* Fast forward to the right position if needed */
+ while (pos--)
+ iter_next(iter);
+}
+
+static bool end_of_vgic(struct vgic_state_iter *iter)
+{
+ return iter->dist_id > 0 &&
+ iter->vcpu_id == iter->nr_cpus &&
+ (iter->intid - VGIC_NR_PRIVATE_IRQS) == iter->nr_spis;
+}
+
+static void *vgic_debug_start(struct seq_file *s, loff_t *pos)
+{
+ struct kvm *kvm = (struct kvm *)s->private;
+ struct vgic_state_iter *iter;
+
+ mutex_lock(&kvm->lock);
+ iter = kvm->arch.vgic.iter;
+ if (iter) {
+ iter = ERR_PTR(-EBUSY);
+ goto out;
+ }
+
+ iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+ if (!iter) {
+ iter = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ iter_init(kvm, iter, *pos);
+ kvm->arch.vgic.iter = iter;
+
+ if (end_of_vgic(iter))
+ iter = NULL;
+out:
+ mutex_unlock(&kvm->lock);
+ return iter;
+}
+
+static void *vgic_debug_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct kvm *kvm = (struct kvm *)s->private;
+ struct vgic_state_iter *iter = kvm->arch.vgic.iter;
+
+ ++*pos;
+ iter_next(iter);
+ if (end_of_vgic(iter))
+ iter = NULL;
+ return iter;
+}
+
+static void vgic_debug_stop(struct seq_file *s, void *v)
+{
+ struct kvm *kvm = (struct kvm *)s->private;
+ struct vgic_state_iter *iter;
+
+ /*
+ * If the seq file wasn't properly opened, there's nothing to clearn
+ * up.
+ */
+ if (IS_ERR(v))
+ return;
+
+ mutex_lock(&kvm->lock);
+ iter = kvm->arch.vgic.iter;
+ kfree(iter);
+ kvm->arch.vgic.iter = NULL;
+ mutex_unlock(&kvm->lock);
+}
+
+static void print_dist_state(struct seq_file *s, struct vgic_dist *dist)
+{
+ seq_printf(s, "Distributor\n");
+ seq_printf(s, "===========\n");
+ seq_printf(s, "vgic_model:\t%s\n",
+ (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) ?
+ "GICv3" : "GICv2");
+ seq_printf(s, "nr_spis:\t%d\n", dist->nr_spis);
+ seq_printf(s, "enabled:\t%d\n", dist->enabled);
+ seq_printf(s, "\n");
+
+ seq_printf(s, "P=pending_latch, L=line_level, A=active\n");
+ seq_printf(s, "E=enabled, H=hw, C=config (level=1, edge=0)\n");
+}
+
+static void print_header(struct seq_file *s, struct vgic_irq *irq,
+ struct kvm_vcpu *vcpu)
+{
+ int id = 0;
+ char *hdr = "SPI ";
+
+ if (vcpu) {
+ hdr = "VCPU";
+ id = vcpu->vcpu_id;
+ }
+
+ seq_printf(s, "\n");
+ seq_printf(s, "%s%2d TYP ID TGT_ID PLAEHC HWID TARGET SRC PRI VCPU_ID\n", hdr, id);
+ seq_printf(s, "---------------------------------------------------------------\n");
+}
+
+static void print_irq_state(struct seq_file *s, struct vgic_irq *irq,
+ struct kvm_vcpu *vcpu)
+{
+ char *type;
+ if (irq->intid < VGIC_NR_SGIS)
+ type = "SGI";
+ else if (irq->intid < VGIC_NR_PRIVATE_IRQS)
+ type = "PPI";
+ else
+ type = "SPI";
+
+ if (irq->intid ==0 || irq->intid == VGIC_NR_PRIVATE_IRQS)
+ print_header(s, irq, vcpu);
+
+ seq_printf(s, " %s %4d "
+ " %2d "
+ "%d%d%d%d%d%d "
+ "%8d "
+ "%8x "
+ " %2x "
+ "%3d "
+ " %2d "
+ "\n",
+ type, irq->intid,
+ (irq->target_vcpu) ? irq->target_vcpu->vcpu_id : -1,
+ irq->pending_latch,
+ irq->line_level,
+ irq->active,
+ irq->enabled,
+ irq->hw,
+ irq->config == VGIC_CONFIG_LEVEL,
+ irq->hwintid,
+ irq->mpidr,
+ irq->source,
+ irq->priority,
+ (irq->vcpu) ? irq->vcpu->vcpu_id : -1);
+
+}
+
+static int vgic_debug_show(struct seq_file *s, void *v)
+{
+ struct kvm *kvm = (struct kvm *)s->private;
+ struct vgic_state_iter *iter = (struct vgic_state_iter *)v;
+ struct vgic_irq *irq;
+ struct kvm_vcpu *vcpu = NULL;
+
+ if (iter->dist_id == 0) {
+ print_dist_state(s, &kvm->arch.vgic);
+ return 0;
+ }
+
+ if (!kvm->arch.vgic.initialized)
+ return 0;
+
+ if (iter->vcpu_id < iter->nr_cpus) {
+ vcpu = kvm_get_vcpu(kvm, iter->vcpu_id);
+ irq = &vcpu->arch.vgic_cpu.private_irqs[iter->intid];
+ } else {
+ irq = &kvm->arch.vgic.spis[iter->intid - VGIC_NR_PRIVATE_IRQS];
+ }
+
+ spin_lock(&irq->irq_lock);
+ print_irq_state(s, irq, vcpu);
+ spin_unlock(&irq->irq_lock);
+
+ return 0;
+}
+
+static struct seq_operations vgic_debug_seq_ops = {
+ .start = vgic_debug_start,
+ .next = vgic_debug_next,
+ .stop = vgic_debug_stop,
+ .show = vgic_debug_show
+};
+
+static int debug_open(struct inode *inode, struct file *file)
+{
+ int ret;
+ ret = seq_open(file, &vgic_debug_seq_ops);
+ if (!ret) {
+ struct seq_file *seq;
+ /* seq_open will have modified file->private_data */
+ seq = file->private_data;
+ seq->private = inode->i_private;
+ }
+
+ return ret;
+};
+
+static struct file_operations vgic_debug_fops = {
+ .owner = THIS_MODULE,
+ .open = debug_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release
+};
+
+int vgic_debug_init(struct kvm *kvm)
+{
+ if (!kvm->debugfs_dentry)
+ return -ENOENT;
+
+ if (!debugfs_create_file("vgic-state", 0444,
+ kvm->debugfs_dentry,
+ kvm,
+ &vgic_debug_fops))
+ return -ENOMEM;
+
+ return 0;
+}
+
+int vgic_debug_destroy(struct kvm *kvm)
+{
+ return 0;
+}
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index 8cebfbc19e90..702f8108608d 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -259,6 +259,8 @@ int vgic_init(struct kvm *kvm)
if (ret)
goto out;
+ vgic_debug_init(kvm);
+
dist->initialized = true;
out:
return ret;
@@ -268,15 +270,11 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
- mutex_lock(&kvm->lock);
-
dist->ready = false;
dist->initialized = false;
kfree(dist->spis);
dist->nr_spis = 0;
-
- mutex_unlock(&kvm->lock);
}
void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
@@ -286,17 +284,27 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
}
-void kvm_vgic_destroy(struct kvm *kvm)
+/* To be called with kvm->lock held */
+static void __kvm_vgic_destroy(struct kvm *kvm)
{
struct kvm_vcpu *vcpu;
int i;
+ vgic_debug_destroy(kvm);
+
kvm_vgic_dist_destroy(kvm);
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_vgic_vcpu_destroy(vcpu);
}
+void kvm_vgic_destroy(struct kvm *kvm)
+{
+ mutex_lock(&kvm->lock);
+ __kvm_vgic_destroy(kvm);
+ mutex_unlock(&kvm->lock);
+}
+
/**
* vgic_lazy_init: Lazy init is only allowed if the GIC exposed to the guest
* is a GICv2. A GICv3 must be explicitly initialized by the guest using the
@@ -348,6 +356,10 @@ int kvm_vgic_map_resources(struct kvm *kvm)
ret = vgic_v2_map_resources(kvm);
else
ret = vgic_v3_map_resources(kvm);
+
+ if (ret)
+ __kvm_vgic_destroy(kvm);
+
out:
mutex_unlock(&kvm->lock);
return ret;
@@ -380,6 +392,25 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
}
/**
+ * kvm_vgic_init_cpu_hardware - initialize the GIC VE hardware
+ *
+ * For a specific CPU, initialize the GIC VE hardware.
+ */
+void kvm_vgic_init_cpu_hardware(void)
+{
+ BUG_ON(preemptible());
+
+ /*
+ * We want to make sure the list registers start out clear so that we
+ * only have the program the used registers.
+ */
+ if (kvm_vgic_global_state.type == VGIC_V2)
+ vgic_v2_init_lrs();
+ else
+ kvm_call_hyp(__vgic_v3_init_lrs);
+}
+
+/**
* kvm_vgic_hyp_init: populates the kvm_vgic_global_state variable
* according to the host GIC model. Accordingly calls either
* vgic_v2/v3_probe which registers the KVM_DEVICE that can be
@@ -428,7 +459,7 @@ int kvm_vgic_hyp_init(void)
}
ret = cpuhp_setup_state(CPUHP_AP_KVM_ARM_VGIC_INIT_STARTING,
- "AP_KVM_ARM_VGIC_INIT_STARTING",
+ "kvm/arm/vgic:starting",
vgic_init_cpu_starting, vgic_init_cpu_dying);
if (ret) {
kvm_err("Cannot register vgic CPU notifier\n");
diff --git a/virt/kvm/arm/vgic/vgic-irqfd.c b/virt/kvm/arm/vgic/vgic-irqfd.c
index d918dcf26a5a..f138ed2e9c63 100644
--- a/virt/kvm/arm/vgic/vgic-irqfd.c
+++ b/virt/kvm/arm/vgic/vgic-irqfd.c
@@ -99,6 +99,9 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
if (!vgic_has_its(kvm))
return -ENODEV;
+ if (!level)
+ return -1;
+
return vgic_its_inject_msi(kvm, &msi);
}
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 4660a7d04eea..8d1da1af4b09 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -350,7 +350,7 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]);
spin_lock(&irq->irq_lock);
- irq->pending = pendmask & (1U << bit_nr);
+ irq->pending_latch = pendmask & (1U << bit_nr);
vgic_queue_irq_unlock(vcpu->kvm, irq);
vgic_put_irq(vcpu->kvm, irq);
}
@@ -360,29 +360,6 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
return ret;
}
-static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu,
- struct vgic_its *its,
- gpa_t addr, unsigned int len)
-{
- u32 reg = 0;
-
- mutex_lock(&its->cmd_lock);
- if (its->creadr == its->cwriter)
- reg |= GITS_CTLR_QUIESCENT;
- if (its->enabled)
- reg |= GITS_CTLR_ENABLE;
- mutex_unlock(&its->cmd_lock);
-
- return reg;
-}
-
-static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its,
- gpa_t addr, unsigned int len,
- unsigned long val)
-{
- its->enabled = !!(val & GITS_CTLR_ENABLE);
-}
-
static unsigned long vgic_mmio_read_its_typer(struct kvm *kvm,
struct vgic_its *its,
gpa_t addr, unsigned int len)
@@ -465,7 +442,7 @@ static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
return -EBUSY;
spin_lock(&itte->irq->irq_lock);
- itte->irq->pending = true;
+ itte->irq->pending_latch = true;
vgic_queue_irq_unlock(kvm, itte->irq);
return 0;
@@ -632,21 +609,22 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, int id)
int index;
u64 indirect_ptr;
gfn_t gfn;
+ int esz = GITS_BASER_ENTRY_SIZE(baser);
if (!(baser & GITS_BASER_INDIRECT)) {
phys_addr_t addr;
- if (id >= (l1_tbl_size / GITS_BASER_ENTRY_SIZE(baser)))
+ if (id >= (l1_tbl_size / esz))
return false;
- addr = BASER_ADDRESS(baser) + id * GITS_BASER_ENTRY_SIZE(baser);
+ addr = BASER_ADDRESS(baser) + id * esz;
gfn = addr >> PAGE_SHIFT;
return kvm_is_visible_gfn(its->dev->kvm, gfn);
}
/* calculate and check the index into the 1st level */
- index = id / (SZ_64K / GITS_BASER_ENTRY_SIZE(baser));
+ index = id / (SZ_64K / esz);
if (index >= (l1_tbl_size / sizeof(u64)))
return false;
@@ -670,8 +648,8 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, int id)
indirect_ptr &= GENMASK_ULL(51, 16);
/* Find the address of the actual entry */
- index = id % (SZ_64K / GITS_BASER_ENTRY_SIZE(baser));
- indirect_ptr += index * GITS_BASER_ENTRY_SIZE(baser);
+ index = id % (SZ_64K / esz);
+ indirect_ptr += index * esz;
gfn = indirect_ptr >> PAGE_SHIFT;
return kvm_is_visible_gfn(its->dev->kvm, gfn);
@@ -912,7 +890,7 @@ static int vgic_its_cmd_handle_clear(struct kvm *kvm, struct vgic_its *its,
if (!itte)
return E_ITS_CLEAR_UNMAPPED_INTERRUPT;
- itte->irq->pending = false;
+ itte->irq->pending_latch = false;
return 0;
}
@@ -1160,33 +1138,16 @@ static void vgic_mmio_write_its_cbaser(struct kvm *kvm, struct vgic_its *its,
#define ITS_CMD_SIZE 32
#define ITS_CMD_OFFSET(reg) ((reg) & GENMASK(19, 5))
-/*
- * By writing to CWRITER the guest announces new commands to be processed.
- * To avoid any races in the first place, we take the its_cmd lock, which
- * protects our ring buffer variables, so that there is only one user
- * per ITS handling commands at a given time.
- */
-static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its,
- gpa_t addr, unsigned int len,
- unsigned long val)
+/* Must be called with the cmd_lock held. */
+static void vgic_its_process_commands(struct kvm *kvm, struct vgic_its *its)
{
gpa_t cbaser;
u64 cmd_buf[4];
- u32 reg;
- if (!its)
- return;
-
- mutex_lock(&its->cmd_lock);
-
- reg = update_64bit_reg(its->cwriter, addr & 7, len, val);
- reg = ITS_CMD_OFFSET(reg);
- if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) {
- mutex_unlock(&its->cmd_lock);
+ /* Commands are only processed when the ITS is enabled. */
+ if (!its->enabled)
return;
- }
- its->cwriter = reg;
cbaser = CBASER_ADDRESS(its->cbaser);
while (its->cwriter != its->creadr) {
@@ -1206,6 +1167,34 @@ static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its,
if (its->creadr == ITS_CMD_BUFFER_SIZE(its->cbaser))
its->creadr = 0;
}
+}
+
+/*
+ * By writing to CWRITER the guest announces new commands to be processed.
+ * To avoid any races in the first place, we take the its_cmd lock, which
+ * protects our ring buffer variables, so that there is only one user
+ * per ITS handling commands at a given time.
+ */
+static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ u64 reg;
+
+ if (!its)
+ return;
+
+ mutex_lock(&its->cmd_lock);
+
+ reg = update_64bit_reg(its->cwriter, addr & 7, len, val);
+ reg = ITS_CMD_OFFSET(reg);
+ if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) {
+ mutex_unlock(&its->cmd_lock);
+ return;
+ }
+ its->cwriter = reg;
+
+ vgic_its_process_commands(kvm, its);
mutex_unlock(&its->cmd_lock);
}
@@ -1286,6 +1275,39 @@ static void vgic_mmio_write_its_baser(struct kvm *kvm,
*regptr = reg;
}
+static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu,
+ struct vgic_its *its,
+ gpa_t addr, unsigned int len)
+{
+ u32 reg = 0;
+
+ mutex_lock(&its->cmd_lock);
+ if (its->creadr == its->cwriter)
+ reg |= GITS_CTLR_QUIESCENT;
+ if (its->enabled)
+ reg |= GITS_CTLR_ENABLE;
+ mutex_unlock(&its->cmd_lock);
+
+ return reg;
+}
+
+static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ mutex_lock(&its->cmd_lock);
+
+ its->enabled = !!(val & GITS_CTLR_ENABLE);
+
+ /*
+ * Try to process any pending commands. This function bails out early
+ * if the ITS is disabled or no commands have been queued.
+ */
+ vgic_its_process_commands(kvm, its);
+
+ mutex_unlock(&its->cmd_lock);
+}
+
#define REGISTER_ITS_DESC(off, rd, wr, length, acc) \
{ \
.reg_offset = off, \
diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/virt/kvm/arm/vgic/vgic-kvm-device.c
index ce1f4ed9daf4..d181d2baee9c 100644
--- a/virt/kvm/arm/vgic/vgic-kvm-device.c
+++ b/virt/kvm/arm/vgic/vgic-kvm-device.c
@@ -17,6 +17,7 @@
#include <kvm/arm_vgic.h>
#include <linux/uaccess.h>
#include <asm/kvm_mmu.h>
+#include <asm/cputype.h>
#include "vgic.h"
/* common helpers */
@@ -221,25 +222,17 @@ int kvm_register_vgic_device(unsigned long type)
ret = kvm_register_device_ops(&kvm_arm_vgic_v3_ops,
KVM_DEV_TYPE_ARM_VGIC_V3);
-#ifdef CONFIG_KVM_ARM_VGIC_V3_ITS
if (ret)
break;
ret = kvm_vgic_register_its_device();
-#endif
break;
}
return ret;
}
-struct vgic_reg_attr {
- struct kvm_vcpu *vcpu;
- gpa_t addr;
-};
-
-static int parse_vgic_v2_attr(struct kvm_device *dev,
- struct kvm_device_attr *attr,
- struct vgic_reg_attr *reg_attr)
+int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr,
+ struct vgic_reg_attr *reg_attr)
{
int cpuid;
@@ -294,14 +287,14 @@ static bool lock_all_vcpus(struct kvm *kvm)
}
/**
- * vgic_attr_regs_access_v2 - allows user space to access VGIC v2 state
+ * vgic_v2_attr_regs_access - allows user space to access VGIC v2 state
*
* @dev: kvm device handle
* @attr: kvm device attribute
* @reg: address the value is read or written
* @is_write: true if userspace is writing a register
*/
-static int vgic_attr_regs_access_v2(struct kvm_device *dev,
+static int vgic_v2_attr_regs_access(struct kvm_device *dev,
struct kvm_device_attr *attr,
u32 *reg, bool is_write)
{
@@ -310,7 +303,7 @@ static int vgic_attr_regs_access_v2(struct kvm_device *dev,
struct kvm_vcpu *vcpu;
int ret;
- ret = parse_vgic_v2_attr(dev, attr, &reg_attr);
+ ret = vgic_v2_parse_attr(dev, attr, &reg_attr);
if (ret)
return ret;
@@ -364,7 +357,7 @@ static int vgic_v2_set_attr(struct kvm_device *dev,
if (get_user(reg, uaddr))
return -EFAULT;
- return vgic_attr_regs_access_v2(dev, attr, &reg, true);
+ return vgic_v2_attr_regs_access(dev, attr, &reg, true);
}
}
@@ -386,7 +379,7 @@ static int vgic_v2_get_attr(struct kvm_device *dev,
u32 __user *uaddr = (u32 __user *)(long)attr->addr;
u32 reg = 0;
- ret = vgic_attr_regs_access_v2(dev, attr, &reg, false);
+ ret = vgic_v2_attr_regs_access(dev, attr, &reg, false);
if (ret)
return ret;
return put_user(reg, uaddr);
@@ -430,16 +423,211 @@ struct kvm_device_ops kvm_arm_vgic_v2_ops = {
.has_attr = vgic_v2_has_attr,
};
+int vgic_v3_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr,
+ struct vgic_reg_attr *reg_attr)
+{
+ unsigned long vgic_mpidr, mpidr_reg;
+
+ /*
+ * For KVM_DEV_ARM_VGIC_GRP_DIST_REGS group,
+ * attr might not hold MPIDR. Hence assume vcpu0.
+ */
+ if (attr->group != KVM_DEV_ARM_VGIC_GRP_DIST_REGS) {
+ vgic_mpidr = (attr->attr & KVM_DEV_ARM_VGIC_V3_MPIDR_MASK) >>
+ KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT;
+
+ mpidr_reg = VGIC_TO_MPIDR(vgic_mpidr);
+ reg_attr->vcpu = kvm_mpidr_to_vcpu(dev->kvm, mpidr_reg);
+ } else {
+ reg_attr->vcpu = kvm_get_vcpu(dev->kvm, 0);
+ }
+
+ if (!reg_attr->vcpu)
+ return -EINVAL;
+
+ reg_attr->addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
+
+ return 0;
+}
+
+/*
+ * vgic_v3_attr_regs_access - allows user space to access VGIC v3 state
+ *
+ * @dev: kvm device handle
+ * @attr: kvm device attribute
+ * @reg: address the value is read or written
+ * @is_write: true if userspace is writing a register
+ */
+static int vgic_v3_attr_regs_access(struct kvm_device *dev,
+ struct kvm_device_attr *attr,
+ u64 *reg, bool is_write)
+{
+ struct vgic_reg_attr reg_attr;
+ gpa_t addr;
+ struct kvm_vcpu *vcpu;
+ int ret;
+ u32 tmp32;
+
+ ret = vgic_v3_parse_attr(dev, attr, &reg_attr);
+ if (ret)
+ return ret;
+
+ vcpu = reg_attr.vcpu;
+ addr = reg_attr.addr;
+
+ mutex_lock(&dev->kvm->lock);
+
+ if (unlikely(!vgic_initialized(dev->kvm))) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ if (!lock_all_vcpus(dev->kvm)) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ switch (attr->group) {
+ case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+ if (is_write)
+ tmp32 = *reg;
+
+ ret = vgic_v3_dist_uaccess(vcpu, is_write, addr, &tmp32);
+ if (!is_write)
+ *reg = tmp32;
+ break;
+ case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS:
+ if (is_write)
+ tmp32 = *reg;
+
+ ret = vgic_v3_redist_uaccess(vcpu, is_write, addr, &tmp32);
+ if (!is_write)
+ *reg = tmp32;
+ break;
+ case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: {
+ u64 regid;
+
+ regid = (attr->attr & KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK);
+ ret = vgic_v3_cpu_sysregs_uaccess(vcpu, is_write,
+ regid, reg);
+ break;
+ }
+ case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: {
+ unsigned int info, intid;
+
+ info = (attr->attr & KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK) >>
+ KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT;
+ if (info == VGIC_LEVEL_INFO_LINE_LEVEL) {
+ intid = attr->attr &
+ KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK;
+ ret = vgic_v3_line_level_info_uaccess(vcpu, is_write,
+ intid, reg);
+ } else {
+ ret = -EINVAL;
+ }
+ break;
+ }
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ unlock_all_vcpus(dev->kvm);
+out:
+ mutex_unlock(&dev->kvm->lock);
+ return ret;
+}
+
static int vgic_v3_set_attr(struct kvm_device *dev,
struct kvm_device_attr *attr)
{
- return vgic_set_common_attr(dev, attr);
+ int ret;
+
+ ret = vgic_set_common_attr(dev, attr);
+ if (ret != -ENXIO)
+ return ret;
+
+ switch (attr->group) {
+ case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+ case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: {
+ u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+ u32 tmp32;
+ u64 reg;
+
+ if (get_user(tmp32, uaddr))
+ return -EFAULT;
+
+ reg = tmp32;
+ return vgic_v3_attr_regs_access(dev, attr, &reg, true);
+ }
+ case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: {
+ u64 __user *uaddr = (u64 __user *)(long)attr->addr;
+ u64 reg;
+
+ if (get_user(reg, uaddr))
+ return -EFAULT;
+
+ return vgic_v3_attr_regs_access(dev, attr, &reg, true);
+ }
+ case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: {
+ u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+ u64 reg;
+ u32 tmp32;
+
+ if (get_user(tmp32, uaddr))
+ return -EFAULT;
+
+ reg = tmp32;
+ return vgic_v3_attr_regs_access(dev, attr, &reg, true);
+ }
+ }
+ return -ENXIO;
}
static int vgic_v3_get_attr(struct kvm_device *dev,
struct kvm_device_attr *attr)
{
- return vgic_get_common_attr(dev, attr);
+ int ret;
+
+ ret = vgic_get_common_attr(dev, attr);
+ if (ret != -ENXIO)
+ return ret;
+
+ switch (attr->group) {
+ case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+ case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: {
+ u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+ u64 reg;
+ u32 tmp32;
+
+ ret = vgic_v3_attr_regs_access(dev, attr, &reg, false);
+ if (ret)
+ return ret;
+ tmp32 = reg;
+ return put_user(tmp32, uaddr);
+ }
+ case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: {
+ u64 __user *uaddr = (u64 __user *)(long)attr->addr;
+ u64 reg;
+
+ ret = vgic_v3_attr_regs_access(dev, attr, &reg, false);
+ if (ret)
+ return ret;
+ return put_user(reg, uaddr);
+ }
+ case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: {
+ u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+ u64 reg;
+ u32 tmp32;
+
+ ret = vgic_v3_attr_regs_access(dev, attr, &reg, false);
+ if (ret)
+ return ret;
+ tmp32 = reg;
+ return put_user(tmp32, uaddr);
+ }
+ }
+ return -ENXIO;
}
static int vgic_v3_has_attr(struct kvm_device *dev,
@@ -453,8 +641,19 @@ static int vgic_v3_has_attr(struct kvm_device *dev,
return 0;
}
break;
+ case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+ case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS:
+ case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
+ return vgic_v3_has_attr_regs(dev, attr);
case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
return 0;
+ case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: {
+ if (((attr->attr & KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK) >>
+ KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) ==
+ VGIC_LEVEL_INFO_LINE_LEVEL)
+ return 0;
+ break;
+ }
case KVM_DEV_ARM_VGIC_GRP_CTRL:
switch (attr->attr) {
case KVM_DEV_ARM_VGIC_CTRL_INIT:
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c
index b44b359cbbad..0a4283ed9aa7 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v2.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c
@@ -98,7 +98,7 @@ static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu,
irq = vgic_get_irq(source_vcpu->kvm, vcpu, intid);
spin_lock(&irq->irq_lock);
- irq->pending = true;
+ irq->pending_latch = true;
irq->source |= 1U << source_vcpu->vcpu_id;
vgic_queue_irq_unlock(source_vcpu->kvm, irq);
@@ -129,6 +129,7 @@ static void vgic_mmio_write_target(struct kvm_vcpu *vcpu,
unsigned long val)
{
u32 intid = VGIC_ADDR_TO_INTID(addr, 8);
+ u8 cpu_mask = GENMASK(atomic_read(&vcpu->kvm->online_vcpus) - 1, 0);
int i;
/* GICD_ITARGETSR[0-7] are read-only */
@@ -141,7 +142,7 @@ static void vgic_mmio_write_target(struct kvm_vcpu *vcpu,
spin_lock(&irq->irq_lock);
- irq->targets = (val >> (i * 8)) & 0xff;
+ irq->targets = (val >> (i * 8)) & cpu_mask;
target = irq->targets ? __ffs(irq->targets) : 0;
irq->target_vcpu = kvm_get_vcpu(vcpu->kvm, target);
@@ -181,7 +182,7 @@ static void vgic_mmio_write_sgipendc(struct kvm_vcpu *vcpu,
irq->source &= ~((val >> (i * 8)) & 0xff);
if (!irq->source)
- irq->pending = false;
+ irq->pending_latch = false;
spin_unlock(&irq->irq_lock);
vgic_put_irq(vcpu->kvm, irq);
@@ -203,7 +204,7 @@ static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu,
irq->source |= (val >> (i * 8)) & 0xff;
if (irq->source) {
- irq->pending = true;
+ irq->pending_latch = true;
vgic_queue_irq_unlock(vcpu->kvm, irq);
} else {
spin_unlock(&irq->irq_lock);
@@ -212,22 +213,6 @@ static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu,
}
}
-static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
-{
- if (kvm_vgic_global_state.type == VGIC_V2)
- vgic_v2_set_vmcr(vcpu, vmcr);
- else
- vgic_v3_set_vmcr(vcpu, vmcr);
-}
-
-static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
-{
- if (kvm_vgic_global_state.type == VGIC_V2)
- vgic_v2_get_vmcr(vcpu, vmcr);
- else
- vgic_v3_get_vmcr(vcpu, vmcr);
-}
-
#define GICC_ARCH_VERSION_V2 0x2
/* These are for userland accesses only, there is no guest-facing emulation. */
@@ -244,7 +229,15 @@ static unsigned long vgic_mmio_read_vcpuif(struct kvm_vcpu *vcpu,
val = vmcr.ctlr;
break;
case GIC_CPU_PRIMASK:
- val = vmcr.pmr;
+ /*
+ * Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the
+ * the PMR field as GICH_VMCR.VMPriMask rather than
+ * GICC_PMR.Priority, so we expose the upper five bits of
+ * priority mask to userspace using the lower bits in the
+ * unsigned long.
+ */
+ val = (vmcr.pmr & GICV_PMR_PRIORITY_MASK) >>
+ GICV_PMR_PRIORITY_SHIFT;
break;
case GIC_CPU_BINPOINT:
val = vmcr.bpr;
@@ -277,7 +270,15 @@ static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu,
vmcr.ctlr = val;
break;
case GIC_CPU_PRIMASK:
- vmcr.pmr = val;
+ /*
+ * Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the
+ * the PMR field as GICH_VMCR.VMPriMask rather than
+ * GICC_PMR.Priority, so we expose the upper five bits of
+ * priority mask to userspace using the lower bits in the
+ * unsigned long.
+ */
+ vmcr.pmr = (val << GICV_PMR_PRIORITY_SHIFT) &
+ GICV_PMR_PRIORITY_MASK;
break;
case GIC_CPU_BINPOINT:
vmcr.bpr = val;
@@ -368,21 +369,30 @@ unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev)
int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr)
{
- int nr_irqs = dev->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
- const struct vgic_register_region *regions;
+ const struct vgic_register_region *region;
+ struct vgic_io_device iodev;
+ struct vgic_reg_attr reg_attr;
+ struct kvm_vcpu *vcpu;
gpa_t addr;
- int nr_regions, i, len;
+ int ret;
+
+ ret = vgic_v2_parse_attr(dev, attr, &reg_attr);
+ if (ret)
+ return ret;
- addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
+ vcpu = reg_attr.vcpu;
+ addr = reg_attr.addr;
switch (attr->group) {
case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
- regions = vgic_v2_dist_registers;
- nr_regions = ARRAY_SIZE(vgic_v2_dist_registers);
+ iodev.regions = vgic_v2_dist_registers;
+ iodev.nr_regions = ARRAY_SIZE(vgic_v2_dist_registers);
+ iodev.base_addr = 0;
break;
case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
- regions = vgic_v2_cpu_registers;
- nr_regions = ARRAY_SIZE(vgic_v2_cpu_registers);
+ iodev.regions = vgic_v2_cpu_registers;
+ iodev.nr_regions = ARRAY_SIZE(vgic_v2_cpu_registers);
+ iodev.base_addr = 0;
break;
default:
return -ENXIO;
@@ -392,43 +402,11 @@ int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr)
if (addr & 3)
return -ENXIO;
- for (i = 0; i < nr_regions; i++) {
- if (regions[i].bits_per_irq)
- len = (regions[i].bits_per_irq * nr_irqs) / 8;
- else
- len = regions[i].len;
-
- if (regions[i].reg_offset <= addr &&
- regions[i].reg_offset + len > addr)
- return 0;
- }
-
- return -ENXIO;
-}
-
-/*
- * When userland tries to access the VGIC register handlers, we need to
- * create a usable struct vgic_io_device to be passed to the handlers and we
- * have to set up a buffer similar to what would have happened if a guest MMIO
- * access occurred, including doing endian conversions on BE systems.
- */
-static int vgic_uaccess(struct kvm_vcpu *vcpu, struct vgic_io_device *dev,
- bool is_write, int offset, u32 *val)
-{
- unsigned int len = 4;
- u8 buf[4];
- int ret;
-
- if (is_write) {
- vgic_data_host_to_mmio_bus(buf, len, *val);
- ret = kvm_io_gic_ops.write(vcpu, &dev->dev, offset, len, buf);
- } else {
- ret = kvm_io_gic_ops.read(vcpu, &dev->dev, offset, len, buf);
- if (!ret)
- *val = vgic_data_mmio_bus_to_host(buf, len);
- }
+ region = vgic_get_mmio_region(vcpu, &iodev, addr, sizeof(u32));
+ if (!region)
+ return -ENXIO;
- return ret;
+ return 0;
}
int vgic_v2_cpuif_uaccess(struct kvm_vcpu *vcpu, bool is_write,
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index 0d3c76a4208b..6afb3b484886 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -18,6 +18,8 @@
#include <kvm/arm_vgic.h>
#include <asm/kvm_emulate.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
#include "vgic.h"
#include "vgic-mmio.h"
@@ -42,7 +44,6 @@ u64 update_64bit_reg(u64 reg, unsigned int offset, unsigned int len,
return reg | ((u64)val << lower);
}
-#ifdef CONFIG_KVM_ARM_VGIC_V3_ITS
bool vgic_has_its(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
@@ -52,7 +53,6 @@ bool vgic_has_its(struct kvm *kvm)
return dist->has_its;
}
-#endif
static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len)
@@ -209,6 +209,60 @@ static unsigned long vgic_mmio_read_v3_idregs(struct kvm_vcpu *vcpu,
return 0;
}
+static unsigned long vgic_v3_uaccess_read_pending(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len)
+{
+ u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
+ u32 value = 0;
+ int i;
+
+ /*
+ * pending state of interrupt is latched in pending_latch variable.
+ * Userspace will save and restore pending state and line_level
+ * separately.
+ * Refer to Documentation/virtual/kvm/devices/arm-vgic-v3.txt
+ * for handling of ISPENDR and ICPENDR.
+ */
+ for (i = 0; i < len * 8; i++) {
+ struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+
+ if (irq->pending_latch)
+ value |= (1U << i);
+
+ vgic_put_irq(vcpu->kvm, irq);
+ }
+
+ return value;
+}
+
+static void vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
+ int i;
+
+ for (i = 0; i < len * 8; i++) {
+ struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+
+ spin_lock(&irq->irq_lock);
+ if (test_bit(i, &val)) {
+ /*
+ * pending_latch is set irrespective of irq type
+ * (level or edge) to avoid dependency that VM should
+ * restore irq config before pending info.
+ */
+ irq->pending_latch = true;
+ vgic_queue_irq_unlock(vcpu->kvm, irq);
+ } else {
+ irq->pending_latch = false;
+ spin_unlock(&irq->irq_lock);
+ }
+
+ vgic_put_irq(vcpu->kvm, irq);
+ }
+}
+
/* We want to avoid outer shareable. */
u64 vgic_sanitise_shareability(u64 field)
{
@@ -358,7 +412,7 @@ static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu,
* We take some special care here to fix the calculation of the register
* offset.
*/
-#define REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(off, rd, wr, bpi, acc) \
+#define REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(off, rd, wr, ur, uw, bpi, acc) \
{ \
.reg_offset = off, \
.bits_per_irq = bpi, \
@@ -373,47 +427,54 @@ static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu,
.access_flags = acc, \
.read = rd, \
.write = wr, \
+ .uaccess_read = ur, \
+ .uaccess_write = uw, \
}
static const struct vgic_register_region vgic_v3_dist_registers[] = {
REGISTER_DESC_WITH_LENGTH(GICD_CTLR,
vgic_mmio_read_v3_misc, vgic_mmio_write_v3_misc, 16,
VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH(GICD_STATUSR,
+ vgic_mmio_read_rao, vgic_mmio_write_wi, 4,
+ VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGROUPR,
- vgic_mmio_read_rao, vgic_mmio_write_wi, 1,
+ vgic_mmio_read_rao, vgic_mmio_write_wi, NULL, NULL, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISENABLER,
- vgic_mmio_read_enable, vgic_mmio_write_senable, 1,
+ vgic_mmio_read_enable, vgic_mmio_write_senable, NULL, NULL, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICENABLER,
- vgic_mmio_read_enable, vgic_mmio_write_cenable, 1,
+ vgic_mmio_read_enable, vgic_mmio_write_cenable, NULL, NULL, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISPENDR,
- vgic_mmio_read_pending, vgic_mmio_write_spending, 1,
+ vgic_mmio_read_pending, vgic_mmio_write_spending,
+ vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICPENDR,
- vgic_mmio_read_pending, vgic_mmio_write_cpending, 1,
+ vgic_mmio_read_pending, vgic_mmio_write_cpending,
+ vgic_mmio_read_raz, vgic_mmio_write_wi, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISACTIVER,
- vgic_mmio_read_active, vgic_mmio_write_sactive, 1,
+ vgic_mmio_read_active, vgic_mmio_write_sactive, NULL, NULL, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICACTIVER,
- vgic_mmio_read_active, vgic_mmio_write_cactive, 1,
+ vgic_mmio_read_active, vgic_mmio_write_cactive, NULL, NULL, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IPRIORITYR,
- vgic_mmio_read_priority, vgic_mmio_write_priority, 8,
- VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
+ vgic_mmio_read_priority, vgic_mmio_write_priority, NULL, NULL,
+ 8, VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ITARGETSR,
- vgic_mmio_read_raz, vgic_mmio_write_wi, 8,
+ vgic_mmio_read_raz, vgic_mmio_write_wi, NULL, NULL, 8,
VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICFGR,
- vgic_mmio_read_config, vgic_mmio_write_config, 2,
+ vgic_mmio_read_config, vgic_mmio_write_config, NULL, NULL, 2,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGRPMODR,
- vgic_mmio_read_raz, vgic_mmio_write_wi, 1,
+ vgic_mmio_read_raz, vgic_mmio_write_wi, NULL, NULL, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IROUTER,
- vgic_mmio_read_irouter, vgic_mmio_write_irouter, 64,
+ vgic_mmio_read_irouter, vgic_mmio_write_irouter, NULL, NULL, 64,
VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH(GICD_IDREGS,
vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48,
@@ -424,12 +485,18 @@ static const struct vgic_register_region vgic_v3_rdbase_registers[] = {
REGISTER_DESC_WITH_LENGTH(GICR_CTLR,
vgic_mmio_read_v3r_ctlr, vgic_mmio_write_v3r_ctlr, 4,
VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH(GICR_STATUSR,
+ vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
+ VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH(GICR_IIDR,
vgic_mmio_read_v3r_iidr, vgic_mmio_write_wi, 4,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH(GICR_TYPER,
vgic_mmio_read_v3r_typer, vgic_mmio_write_wi, 8,
VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH(GICR_WAKER,
+ vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
+ VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH(GICR_PROPBASER,
vgic_mmio_read_propbase, vgic_mmio_write_propbase, 8,
VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
@@ -451,11 +518,13 @@ static const struct vgic_register_region vgic_v3_sgibase_registers[] = {
REGISTER_DESC_WITH_LENGTH(GICR_ICENABLER0,
vgic_mmio_read_enable, vgic_mmio_write_cenable, 4,
VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH(GICR_ISPENDR0,
- vgic_mmio_read_pending, vgic_mmio_write_spending, 4,
+ REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ISPENDR0,
+ vgic_mmio_read_pending, vgic_mmio_write_spending,
+ vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4,
VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH(GICR_ICPENDR0,
- vgic_mmio_read_pending, vgic_mmio_write_cpending, 4,
+ REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ICPENDR0,
+ vgic_mmio_read_pending, vgic_mmio_write_cpending,
+ vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH(GICR_ISACTIVER0,
vgic_mmio_read_active, vgic_mmio_write_sactive, 4,
@@ -548,6 +617,54 @@ int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t redist_base_address)
return ret;
}
+int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ const struct vgic_register_region *region;
+ struct vgic_io_device iodev;
+ struct vgic_reg_attr reg_attr;
+ struct kvm_vcpu *vcpu;
+ gpa_t addr;
+ int ret;
+
+ ret = vgic_v3_parse_attr(dev, attr, &reg_attr);
+ if (ret)
+ return ret;
+
+ vcpu = reg_attr.vcpu;
+ addr = reg_attr.addr;
+
+ switch (attr->group) {
+ case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+ iodev.regions = vgic_v3_dist_registers;
+ iodev.nr_regions = ARRAY_SIZE(vgic_v3_dist_registers);
+ iodev.base_addr = 0;
+ break;
+ case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS:{
+ iodev.regions = vgic_v3_rdbase_registers;
+ iodev.nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers);
+ iodev.base_addr = 0;
+ break;
+ }
+ case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: {
+ u64 reg, id;
+
+ id = (attr->attr & KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK);
+ return vgic_v3_has_cpu_sysregs_attr(vcpu, 0, id, &reg);
+ }
+ default:
+ return -ENXIO;
+ }
+
+ /* We only support aligned 32-bit accesses. */
+ if (addr & 3)
+ return -ENXIO;
+
+ region = vgic_get_mmio_region(vcpu, &iodev, addr, sizeof(u32));
+ if (!region)
+ return -ENXIO;
+
+ return 0;
+}
/*
* Compare a given affinity (level 1-3 and a level 0 mask, from the SGI
* generation register ICC_SGI1R_EL1) with a given VCPU.
@@ -648,9 +765,55 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg)
irq = vgic_get_irq(vcpu->kvm, c_vcpu, sgi);
spin_lock(&irq->irq_lock);
- irq->pending = true;
+ irq->pending_latch = true;
vgic_queue_irq_unlock(vcpu->kvm, irq);
vgic_put_irq(vcpu->kvm, irq);
}
}
+
+int vgic_v3_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
+ int offset, u32 *val)
+{
+ struct vgic_io_device dev = {
+ .regions = vgic_v3_dist_registers,
+ .nr_regions = ARRAY_SIZE(vgic_v3_dist_registers),
+ };
+
+ return vgic_uaccess(vcpu, &dev, is_write, offset, val);
+}
+
+int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
+ int offset, u32 *val)
+{
+ struct vgic_io_device rd_dev = {
+ .regions = vgic_v3_rdbase_registers,
+ .nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers),
+ };
+
+ struct vgic_io_device sgi_dev = {
+ .regions = vgic_v3_sgibase_registers,
+ .nr_regions = ARRAY_SIZE(vgic_v3_sgibase_registers),
+ };
+
+ /* SGI_base is the next 64K frame after RD_base */
+ if (offset >= SZ_64K)
+ return vgic_uaccess(vcpu, &sgi_dev, is_write, offset - SZ_64K,
+ val);
+ else
+ return vgic_uaccess(vcpu, &rd_dev, is_write, offset, val);
+}
+
+int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write,
+ u32 intid, u64 *val)
+{
+ if (intid % 32)
+ return -EINVAL;
+
+ if (is_write)
+ vgic_write_irq_line_level_info(vcpu, intid, *val);
+ else
+ *val = vgic_read_irq_line_level_info(vcpu, intid);
+
+ return 0;
+}
diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
index ebe1b9fa3c4d..2a5db1352722 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.c
+++ b/virt/kvm/arm/vgic/vgic-mmio.c
@@ -111,7 +111,7 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
for (i = 0; i < len * 8; i++) {
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
- if (irq->pending)
+ if (irq_is_pending(irq))
value |= (1U << i);
vgic_put_irq(vcpu->kvm, irq);
@@ -131,9 +131,7 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
spin_lock(&irq->irq_lock);
- irq->pending = true;
- if (irq->config == VGIC_CONFIG_LEVEL)
- irq->soft_pending = true;
+ irq->pending_latch = true;
vgic_queue_irq_unlock(vcpu->kvm, irq);
vgic_put_irq(vcpu->kvm, irq);
@@ -152,12 +150,7 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
spin_lock(&irq->irq_lock);
- if (irq->config == VGIC_CONFIG_LEVEL) {
- irq->soft_pending = false;
- irq->pending = irq->line_level;
- } else {
- irq->pending = false;
- }
+ irq->pending_latch = false;
spin_unlock(&irq->irq_lock);
vgic_put_irq(vcpu->kvm, irq);
@@ -187,21 +180,37 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
bool new_active_state)
{
+ struct kvm_vcpu *requester_vcpu;
spin_lock(&irq->irq_lock);
+
+ /*
+ * The vcpu parameter here can mean multiple things depending on how
+ * this function is called; when handling a trap from the kernel it
+ * depends on the GIC version, and these functions are also called as
+ * part of save/restore from userspace.
+ *
+ * Therefore, we have to figure out the requester in a reliable way.
+ *
+ * When accessing VGIC state from user space, the requester_vcpu is
+ * NULL, which is fine, because we guarantee that no VCPUs are running
+ * when accessing VGIC state from user space so irq->vcpu->cpu is
+ * always -1.
+ */
+ requester_vcpu = kvm_arm_get_running_vcpu();
+
/*
* If this virtual IRQ was written into a list register, we
* have to make sure the CPU that runs the VCPU thread has
- * synced back LR state to the struct vgic_irq. We can only
- * know this for sure, when either this irq is not assigned to
- * anyone's AP list anymore, or the VCPU thread is not
- * running on any CPUs.
+ * synced back the LR state to the struct vgic_irq.
*
- * In the opposite case, we know the VCPU thread may be on its
- * way back from the guest and still has to sync back this
- * IRQ, so we release and re-acquire the spin_lock to let the
- * other thread sync back the IRQ.
+ * As long as the conditions below are true, we know the VCPU thread
+ * may be on its way back from the guest (we kicked the VCPU thread in
+ * vgic_change_active_prepare) and still has to sync back this IRQ,
+ * so we release and re-acquire the spin_lock to let the other thread
+ * sync back the IRQ.
*/
while (irq->vcpu && /* IRQ may have state in an LR somewhere */
+ irq->vcpu != requester_vcpu && /* Current thread is not the VCPU thread */
irq->vcpu->cpu != -1) /* VCPU thread is running */
cond_resched_lock(&irq->irq_lock);
@@ -359,18 +368,70 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
spin_lock(&irq->irq_lock);
- if (test_bit(i * 2 + 1, &val)) {
+ if (test_bit(i * 2 + 1, &val))
irq->config = VGIC_CONFIG_EDGE;
- } else {
+ else
irq->config = VGIC_CONFIG_LEVEL;
- irq->pending = irq->line_level | irq->soft_pending;
- }
spin_unlock(&irq->irq_lock);
vgic_put_irq(vcpu->kvm, irq);
}
}
+u64 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid)
+{
+ int i;
+ u64 val = 0;
+ int nr_irqs = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
+
+ for (i = 0; i < 32; i++) {
+ struct vgic_irq *irq;
+
+ if ((intid + i) < VGIC_NR_SGIS || (intid + i) >= nr_irqs)
+ continue;
+
+ irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+ if (irq->config == VGIC_CONFIG_LEVEL && irq->line_level)
+ val |= (1U << i);
+
+ vgic_put_irq(vcpu->kvm, irq);
+ }
+
+ return val;
+}
+
+void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid,
+ const u64 val)
+{
+ int i;
+ int nr_irqs = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
+
+ for (i = 0; i < 32; i++) {
+ struct vgic_irq *irq;
+ bool new_level;
+
+ if ((intid + i) < VGIC_NR_SGIS || (intid + i) >= nr_irqs)
+ continue;
+
+ irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+
+ /*
+ * Line level is set irrespective of irq type
+ * (level or edge) to avoid dependency that VM should
+ * restore irq config before line level.
+ */
+ new_level = !!(val & (1U << i));
+ spin_lock(&irq->irq_lock);
+ irq->line_level = new_level;
+ if (new_level)
+ vgic_queue_irq_unlock(vcpu->kvm, irq);
+ else
+ spin_unlock(&irq->irq_lock);
+
+ vgic_put_irq(vcpu->kvm, irq);
+ }
+}
+
static int match_region(const void *key, const void *elt)
{
const unsigned int offset = (unsigned long)key;
@@ -394,6 +455,22 @@ vgic_find_mmio_region(const struct vgic_register_region *region, int nr_regions,
sizeof(region[0]), match_region);
}
+void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
+{
+ if (kvm_vgic_global_state.type == VGIC_V2)
+ vgic_v2_set_vmcr(vcpu, vmcr);
+ else
+ vgic_v3_set_vmcr(vcpu, vmcr);
+}
+
+void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
+{
+ if (kvm_vgic_global_state.type == VGIC_V2)
+ vgic_v2_get_vmcr(vcpu, vmcr);
+ else
+ vgic_v3_get_vmcr(vcpu, vmcr);
+}
+
/*
* kvm_mmio_read_buf() returns a value in a format where it can be converted
* to a byte array and be directly observed as the guest wanted it to appear
@@ -484,6 +561,74 @@ static bool check_region(const struct kvm *kvm,
return false;
}
+const struct vgic_register_region *
+vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev,
+ gpa_t addr, int len)
+{
+ const struct vgic_register_region *region;
+
+ region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions,
+ addr - iodev->base_addr);
+ if (!region || !check_region(vcpu->kvm, region, addr, len))
+ return NULL;
+
+ return region;
+}
+
+static int vgic_uaccess_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
+ gpa_t addr, u32 *val)
+{
+ struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev);
+ const struct vgic_register_region *region;
+ struct kvm_vcpu *r_vcpu;
+
+ region = vgic_get_mmio_region(vcpu, iodev, addr, sizeof(u32));
+ if (!region) {
+ *val = 0;
+ return 0;
+ }
+
+ r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu;
+ if (region->uaccess_read)
+ *val = region->uaccess_read(r_vcpu, addr, sizeof(u32));
+ else
+ *val = region->read(r_vcpu, addr, sizeof(u32));
+
+ return 0;
+}
+
+static int vgic_uaccess_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
+ gpa_t addr, const u32 *val)
+{
+ struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev);
+ const struct vgic_register_region *region;
+ struct kvm_vcpu *r_vcpu;
+
+ region = vgic_get_mmio_region(vcpu, iodev, addr, sizeof(u32));
+ if (!region)
+ return 0;
+
+ r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu;
+ if (region->uaccess_write)
+ region->uaccess_write(r_vcpu, addr, sizeof(u32), *val);
+ else
+ region->write(r_vcpu, addr, sizeof(u32), *val);
+
+ return 0;
+}
+
+/*
+ * Userland access to VGIC registers.
+ */
+int vgic_uaccess(struct kvm_vcpu *vcpu, struct vgic_io_device *dev,
+ bool is_write, int offset, u32 *val)
+{
+ if (is_write)
+ return vgic_uaccess_write(vcpu, &dev->dev, offset, val);
+ else
+ return vgic_uaccess_read(vcpu, &dev->dev, offset, val);
+}
+
static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
gpa_t addr, int len, void *val)
{
@@ -491,9 +636,8 @@ static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
const struct vgic_register_region *region;
unsigned long data = 0;
- region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions,
- addr - iodev->base_addr);
- if (!region || !check_region(vcpu->kvm, region, addr, len)) {
+ region = vgic_get_mmio_region(vcpu, iodev, addr, len);
+ if (!region) {
memset(val, 0, len);
return 0;
}
@@ -524,9 +668,8 @@ static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
const struct vgic_register_region *region;
unsigned long data = vgic_data_mmio_bus_to_host(val, len);
- region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions,
- addr - iodev->base_addr);
- if (!region || !check_region(vcpu->kvm, region, addr, len))
+ region = vgic_get_mmio_region(vcpu, iodev, addr, len);
+ if (!region)
return 0;
switch (iodev->iodev_type) {
diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h
index 84961b4e4422..98bb566b660a 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.h
+++ b/virt/kvm/arm/vgic/vgic-mmio.h
@@ -34,6 +34,10 @@ struct vgic_register_region {
gpa_t addr, unsigned int len,
unsigned long val);
};
+ unsigned long (*uaccess_read)(struct kvm_vcpu *vcpu, gpa_t addr,
+ unsigned int len);
+ void (*uaccess_write)(struct kvm_vcpu *vcpu, gpa_t addr,
+ unsigned int len, unsigned long val);
};
extern struct kvm_io_device_ops kvm_io_gic_ops;
@@ -86,6 +90,18 @@ extern struct kvm_io_device_ops kvm_io_gic_ops;
.write = wr, \
}
+#define REGISTER_DESC_WITH_LENGTH_UACCESS(off, rd, wr, urd, uwr, length, acc) \
+ { \
+ .reg_offset = off, \
+ .bits_per_irq = 0, \
+ .len = length, \
+ .access_flags = acc, \
+ .read = rd, \
+ .write = wr, \
+ .uaccess_read = urd, \
+ .uaccess_write = uwr, \
+ }
+
int kvm_vgic_register_mmio_region(struct kvm *kvm, struct kvm_vcpu *vcpu,
struct vgic_register_region *reg_desc,
struct vgic_io_device *region,
@@ -158,6 +174,14 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val);
+int vgic_uaccess(struct kvm_vcpu *vcpu, struct vgic_io_device *dev,
+ bool is_write, int offset, u32 *val);
+
+u64 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid);
+
+void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid,
+ const u64 val);
+
unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev);
unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev);
diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
index 9bab86757fa4..b637d9c7afe3 100644
--- a/virt/kvm/arm/vgic/vgic-v2.c
+++ b/virt/kvm/arm/vgic/vgic-v2.c
@@ -36,6 +36,21 @@ static unsigned long *u64_to_bitmask(u64 *val)
return (unsigned long *)val;
}
+static inline void vgic_v2_write_lr(int lr, u32 val)
+{
+ void __iomem *base = kvm_vgic_global_state.vctrl_base;
+
+ writel_relaxed(val, base + GICH_LR0 + (lr * 4));
+}
+
+void vgic_v2_init_lrs(void)
+{
+ int i;
+
+ for (i = 0; i < kvm_vgic_global_state.nr_lr; i++)
+ vgic_v2_write_lr(i, 0);
+}
+
void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu)
{
struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
@@ -104,7 +119,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
/* Edge is the only case where we preserve the pending bit */
if (irq->config == VGIC_CONFIG_EDGE &&
(val & GICH_LR_PENDING_BIT)) {
- irq->pending = true;
+ irq->pending_latch = true;
if (vgic_irq_is_sgi(intid)) {
u32 cpuid = val & GICH_LR_PHYSID_CPUID;
@@ -120,9 +135,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
*/
if (irq->config == VGIC_CONFIG_LEVEL) {
if (!(val & GICH_LR_PENDING_BIT))
- irq->soft_pending = false;
-
- irq->pending = irq->line_level || irq->soft_pending;
+ irq->pending_latch = false;
}
spin_unlock(&irq->irq_lock);
@@ -145,11 +158,11 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
{
u32 val = irq->intid;
- if (irq->pending) {
+ if (irq_is_pending(irq)) {
val |= GICH_LR_PENDING_BIT;
if (irq->config == VGIC_CONFIG_EDGE)
- irq->pending = false;
+ irq->pending_latch = false;
if (vgic_irq_is_sgi(irq->intid)) {
u32 src = ffs(irq->source);
@@ -158,7 +171,7 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
irq->source &= ~(1 << (src - 1));
if (irq->source)
- irq->pending = true;
+ irq->pending_latch = true;
}
}
@@ -193,8 +206,8 @@ void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
GICH_VMCR_ALIAS_BINPOINT_MASK;
vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) &
GICH_VMCR_BINPOINT_MASK;
- vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) &
- GICH_VMCR_PRIMASK_MASK;
+ vmcr |= ((vmcrp->pmr >> GICV_PMR_PRIORITY_SHIFT) <<
+ GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK;
vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr;
}
@@ -209,8 +222,8 @@ void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
GICH_VMCR_ALIAS_BINPOINT_SHIFT;
vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >>
GICH_VMCR_BINPOINT_SHIFT;
- vmcrp->pmr = (vmcr & GICH_VMCR_PRIMASK_MASK) >>
- GICH_VMCR_PRIMASK_SHIFT;
+ vmcrp->pmr = ((vmcr & GICH_VMCR_PRIMASK_MASK) >>
+ GICH_VMCR_PRIMASK_SHIFT) << GICV_PMR_PRIORITY_SHIFT;
}
void vgic_v2_enable(struct kvm_vcpu *vcpu)
@@ -293,8 +306,6 @@ int vgic_v2_map_resources(struct kvm *kvm)
dist->ready = true;
out:
- if (ret)
- kvm_vgic_destroy(kvm);
return ret;
}
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index 5c9f9745e6ca..be0f4c3e0142 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -94,7 +94,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
/* Edge is the only case where we preserve the pending bit */
if (irq->config == VGIC_CONFIG_EDGE &&
(val & ICH_LR_PENDING_BIT)) {
- irq->pending = true;
+ irq->pending_latch = true;
if (vgic_irq_is_sgi(intid) &&
model == KVM_DEV_TYPE_ARM_VGIC_V2) {
@@ -111,9 +111,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
*/
if (irq->config == VGIC_CONFIG_LEVEL) {
if (!(val & ICH_LR_PENDING_BIT))
- irq->soft_pending = false;
-
- irq->pending = irq->line_level || irq->soft_pending;
+ irq->pending_latch = false;
}
spin_unlock(&irq->irq_lock);
@@ -127,11 +125,11 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
u32 model = vcpu->kvm->arch.vgic.vgic_model;
u64 val = irq->intid;
- if (irq->pending) {
+ if (irq_is_pending(irq)) {
val |= ICH_LR_PENDING_BIT;
if (irq->config == VGIC_CONFIG_EDGE)
- irq->pending = false;
+ irq->pending_latch = false;
if (vgic_irq_is_sgi(irq->intid) &&
model == KVM_DEV_TYPE_ARM_VGIC_V2) {
@@ -141,7 +139,7 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
irq->source &= ~(1 << (src - 1));
if (irq->source)
- irq->pending = true;
+ irq->pending_latch = true;
}
}
@@ -177,10 +175,18 @@ void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
{
u32 vmcr;
- vmcr = (vmcrp->ctlr << ICH_VMCR_CTLR_SHIFT) & ICH_VMCR_CTLR_MASK;
+ /*
+ * Ignore the FIQen bit, because GIC emulation always implies
+ * SRE=1 which means the vFIQEn bit is also RES1.
+ */
+ vmcr = ((vmcrp->ctlr >> ICC_CTLR_EL1_EOImode_SHIFT) <<
+ ICH_VMCR_EOIM_SHIFT) & ICH_VMCR_EOIM_MASK;
+ vmcr |= (vmcrp->ctlr << ICH_VMCR_CBPR_SHIFT) & ICH_VMCR_CBPR_MASK;
vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK;
vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK;
vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK;
+ vmcr |= (vmcrp->grpen0 << ICH_VMCR_ENG0_SHIFT) & ICH_VMCR_ENG0_MASK;
+ vmcr |= (vmcrp->grpen1 << ICH_VMCR_ENG1_SHIFT) & ICH_VMCR_ENG1_MASK;
vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = vmcr;
}
@@ -189,10 +195,18 @@ void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
{
u32 vmcr = vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr;
- vmcrp->ctlr = (vmcr & ICH_VMCR_CTLR_MASK) >> ICH_VMCR_CTLR_SHIFT;
+ /*
+ * Ignore the FIQen bit, because GIC emulation always implies
+ * SRE=1 which means the vFIQEn bit is also RES1.
+ */
+ vmcrp->ctlr = ((vmcr >> ICH_VMCR_EOIM_SHIFT) <<
+ ICC_CTLR_EL1_EOImode_SHIFT) & ICC_CTLR_EL1_EOImode_MASK;
+ vmcrp->ctlr |= (vmcr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT;
vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT;
vmcrp->bpr = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT;
vmcrp->pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT;
+ vmcrp->grpen0 = (vmcr & ICH_VMCR_ENG0_MASK) >> ICH_VMCR_ENG0_SHIFT;
+ vmcrp->grpen1 = (vmcr & ICH_VMCR_ENG1_MASK) >> ICH_VMCR_ENG1_SHIFT;
}
#define INITIAL_PENDBASER_VALUE \
@@ -215,15 +229,25 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu)
/*
* If we are emulating a GICv3, we do it in an non-GICv2-compatible
* way, so we force SRE to 1 to demonstrate this to the guest.
+ * Also, we don't support any form of IRQ/FIQ bypass.
* This goes with the spec allowing the value to be RAO/WI.
*/
if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
- vgic_v3->vgic_sre = ICC_SRE_EL1_SRE;
+ vgic_v3->vgic_sre = (ICC_SRE_EL1_DIB |
+ ICC_SRE_EL1_DFB |
+ ICC_SRE_EL1_SRE);
vcpu->arch.vgic_cpu.pendbaser = INITIAL_PENDBASER_VALUE;
} else {
vgic_v3->vgic_sre = 0;
}
+ vcpu->arch.vgic_cpu.num_id_bits = (kvm_vgic_global_state.ich_vtr_el2 &
+ ICH_VTR_ID_BITS_MASK) >>
+ ICH_VTR_ID_BITS_SHIFT;
+ vcpu->arch.vgic_cpu.num_pri_bits = ((kvm_vgic_global_state.ich_vtr_el2 &
+ ICH_VTR_PRI_BITS_MASK) >>
+ ICH_VTR_PRI_BITS_SHIFT) + 1;
+
/* Get the show on the road... */
vgic_v3->vgic_hcr = ICH_HCR_EN;
}
@@ -302,8 +326,6 @@ int vgic_v3_map_resources(struct kvm *kvm)
dist->ready = true;
out:
- if (ret)
- kvm_vgic_destroy(kvm);
return ret;
}
@@ -324,6 +346,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
*/
kvm_vgic_global_state.nr_lr = (ich_vtr_el2 & 0xf) + 1;
kvm_vgic_global_state.can_emulate_gicv2 = false;
+ kvm_vgic_global_state.ich_vtr_el2 = ich_vtr_el2;
if (!info->vcpu.start) {
kvm_info("GICv3: no GICV resource entry\n");
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index 6440b56ec90e..654dfd40e449 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -160,7 +160,7 @@ static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq)
* If the distributor is disabled, pending interrupts shouldn't be
* forwarded.
*/
- if (irq->enabled && irq->pending) {
+ if (irq->enabled && irq_is_pending(irq)) {
if (unlikely(irq->target_vcpu &&
!irq->target_vcpu->kvm->arch.vgic.enabled))
return NULL;
@@ -204,8 +204,8 @@ static int vgic_irq_cmp(void *priv, struct list_head *a, struct list_head *b)
goto out;
}
- penda = irqa->enabled && irqa->pending;
- pendb = irqb->enabled && irqb->pending;
+ penda = irqa->enabled && irq_is_pending(irqa);
+ pendb = irqb->enabled && irq_is_pending(irqb);
if (!penda || !pendb) {
ret = (int)pendb - (int)penda;
@@ -335,9 +335,22 @@ retry:
return true;
}
-static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
- unsigned int intid, bool level,
- bool mapped_irq)
+/**
+ * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
+ * @kvm: The VM structure pointer
+ * @cpuid: The CPU for PPIs
+ * @intid: The INTID to inject a new state to.
+ * @level: Edge-triggered: true: to trigger the interrupt
+ * false: to ignore the call
+ * Level-sensitive true: raise the input signal
+ * false: lower the input signal
+ *
+ * The VGIC is not concerned with devices being active-LOW or active-HIGH for
+ * level-sensitive interrupts. You can think of the level parameter as 1
+ * being HIGH and 0 being LOW and all devices being active-HIGH.
+ */
+int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
+ bool level)
{
struct kvm_vcpu *vcpu;
struct vgic_irq *irq;
@@ -357,11 +370,6 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
if (!irq)
return -EINVAL;
- if (irq->hw != mapped_irq) {
- vgic_put_irq(kvm, irq);
- return -EINVAL;
- }
-
spin_lock(&irq->irq_lock);
if (!vgic_validate_injection(irq, level)) {
@@ -371,12 +379,10 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
return 0;
}
- if (irq->config == VGIC_CONFIG_LEVEL) {
+ if (irq->config == VGIC_CONFIG_LEVEL)
irq->line_level = level;
- irq->pending = level || irq->soft_pending;
- } else {
- irq->pending = true;
- }
+ else
+ irq->pending_latch = true;
vgic_queue_irq_unlock(kvm, irq);
vgic_put_irq(kvm, irq);
@@ -384,32 +390,6 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
return 0;
}
-/**
- * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
- * @kvm: The VM structure pointer
- * @cpuid: The CPU for PPIs
- * @intid: The INTID to inject a new state to.
- * @level: Edge-triggered: true: to trigger the interrupt
- * false: to ignore the call
- * Level-sensitive true: raise the input signal
- * false: lower the input signal
- *
- * The VGIC is not concerned with devices being active-LOW or active-HIGH for
- * level-sensitive interrupts. You can think of the level parameter as 1
- * being HIGH and 0 being LOW and all devices being active-HIGH.
- */
-int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
- bool level)
-{
- return vgic_update_irq_pending(kvm, cpuid, intid, level, false);
-}
-
-int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, unsigned int intid,
- bool level)
-{
- return vgic_update_irq_pending(kvm, cpuid, intid, level, true);
-}
-
int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq)
{
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq);
@@ -689,7 +669,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
spin_lock(&irq->irq_lock);
- pending = irq->pending && irq->enabled;
+ pending = irq_is_pending(irq) && irq->enabled;
spin_unlock(&irq->irq_lock);
if (pending)
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index 9d9e014765a2..6cf557e9f718 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -30,13 +30,86 @@
#define vgic_irq_is_sgi(intid) ((intid) < VGIC_NR_SGIS)
+#define VGIC_AFFINITY_0_SHIFT 0
+#define VGIC_AFFINITY_0_MASK (0xffUL << VGIC_AFFINITY_0_SHIFT)
+#define VGIC_AFFINITY_1_SHIFT 8
+#define VGIC_AFFINITY_1_MASK (0xffUL << VGIC_AFFINITY_1_SHIFT)
+#define VGIC_AFFINITY_2_SHIFT 16
+#define VGIC_AFFINITY_2_MASK (0xffUL << VGIC_AFFINITY_2_SHIFT)
+#define VGIC_AFFINITY_3_SHIFT 24
+#define VGIC_AFFINITY_3_MASK (0xffUL << VGIC_AFFINITY_3_SHIFT)
+
+#define VGIC_AFFINITY_LEVEL(reg, level) \
+ ((((reg) & VGIC_AFFINITY_## level ##_MASK) \
+ >> VGIC_AFFINITY_## level ##_SHIFT) << MPIDR_LEVEL_SHIFT(level))
+
+/*
+ * The Userspace encodes the affinity differently from the MPIDR,
+ * Below macro converts vgic userspace format to MPIDR reg format.
+ */
+#define VGIC_TO_MPIDR(val) (VGIC_AFFINITY_LEVEL(val, 0) | \
+ VGIC_AFFINITY_LEVEL(val, 1) | \
+ VGIC_AFFINITY_LEVEL(val, 2) | \
+ VGIC_AFFINITY_LEVEL(val, 3))
+
+/*
+ * As per Documentation/virtual/kvm/devices/arm-vgic-v3.txt,
+ * below macros are defined for CPUREG encoding.
+ */
+#define KVM_REG_ARM_VGIC_SYSREG_OP0_MASK 0x000000000000c000
+#define KVM_REG_ARM_VGIC_SYSREG_OP0_SHIFT 14
+#define KVM_REG_ARM_VGIC_SYSREG_OP1_MASK 0x0000000000003800
+#define KVM_REG_ARM_VGIC_SYSREG_OP1_SHIFT 11
+#define KVM_REG_ARM_VGIC_SYSREG_CRN_MASK 0x0000000000000780
+#define KVM_REG_ARM_VGIC_SYSREG_CRN_SHIFT 7
+#define KVM_REG_ARM_VGIC_SYSREG_CRM_MASK 0x0000000000000078
+#define KVM_REG_ARM_VGIC_SYSREG_CRM_SHIFT 3
+#define KVM_REG_ARM_VGIC_SYSREG_OP2_MASK 0x0000000000000007
+#define KVM_REG_ARM_VGIC_SYSREG_OP2_SHIFT 0
+
+#define KVM_DEV_ARM_VGIC_SYSREG_MASK (KVM_REG_ARM_VGIC_SYSREG_OP0_MASK | \
+ KVM_REG_ARM_VGIC_SYSREG_OP1_MASK | \
+ KVM_REG_ARM_VGIC_SYSREG_CRN_MASK | \
+ KVM_REG_ARM_VGIC_SYSREG_CRM_MASK | \
+ KVM_REG_ARM_VGIC_SYSREG_OP2_MASK)
+
+static inline bool irq_is_pending(struct vgic_irq *irq)
+{
+ if (irq->config == VGIC_CONFIG_EDGE)
+ return irq->pending_latch;
+ else
+ return irq->pending_latch || irq->line_level;
+}
+
+/*
+ * This struct provides an intermediate representation of the fields contained
+ * in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC
+ * state to userspace can generate either GICv2 or GICv3 CPU interface
+ * registers regardless of the hardware backed GIC used.
+ */
struct vgic_vmcr {
u32 ctlr;
u32 abpr;
u32 bpr;
- u32 pmr;
+ u32 pmr; /* Priority mask field in the GICC_PMR and
+ * ICC_PMR_EL1 priority field format */
+ /* Below member variable are valid only for GICv3 */
+ u32 grpen0;
+ u32 grpen1;
+};
+
+struct vgic_reg_attr {
+ struct kvm_vcpu *vcpu;
+ gpa_t addr;
};
+int vgic_v3_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr,
+ struct vgic_reg_attr *reg_attr);
+int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr,
+ struct vgic_reg_attr *reg_attr);
+const struct vgic_register_region *
+vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev,
+ gpa_t addr, int len);
struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
u32 intid);
void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq);
@@ -64,6 +137,8 @@ int vgic_v2_map_resources(struct kvm *kvm);
int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
enum vgic_type);
+void vgic_v2_init_lrs(void);
+
static inline void vgic_get_irq_kref(struct vgic_irq *irq)
{
if (irq->intid < VGIC_MIN_LPI)
@@ -84,40 +159,29 @@ int vgic_v3_probe(const struct gic_kvm_info *info);
int vgic_v3_map_resources(struct kvm *kvm);
int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t dist_base_address);
-#ifdef CONFIG_KVM_ARM_VGIC_V3_ITS
int vgic_register_its_iodevs(struct kvm *kvm);
bool vgic_has_its(struct kvm *kvm);
int kvm_vgic_register_its_device(void);
void vgic_enable_lpis(struct kvm_vcpu *vcpu);
int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi);
-#else
-static inline int vgic_register_its_iodevs(struct kvm *kvm)
-{
- return -ENODEV;
-}
-
-static inline bool vgic_has_its(struct kvm *kvm)
-{
- return false;
-}
-
-static inline int kvm_vgic_register_its_device(void)
-{
- return -ENODEV;
-}
-
-static inline void vgic_enable_lpis(struct kvm_vcpu *vcpu)
-{
-}
-
-static inline int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi)
-{
- return -ENODEV;
-}
-#endif
-
+int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr);
+int vgic_v3_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
+ int offset, u32 *val);
+int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
+ int offset, u32 *val);
+int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, bool is_write,
+ u64 id, u64 *val);
+int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id,
+ u64 *reg);
+int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write,
+ u32 intid, u64 *val);
int kvm_register_vgic_device(unsigned long type);
+void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
+void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
int vgic_lazy_init(struct kvm *kvm);
int vgic_init(struct kvm *kvm);
+int vgic_debug_init(struct kvm *kvm);
+int vgic_debug_destroy(struct kvm *kvm);
+
#endif
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index efeceb0a222d..bb298a200cd3 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -24,6 +24,7 @@
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/mmu_context.h>
+#include <linux/sched/mm.h>
#include "async_pf.h"
#include <trace/events/kvm.h>
@@ -76,16 +77,20 @@ static void async_pf_execute(struct work_struct *work)
struct kvm_vcpu *vcpu = apf->vcpu;
unsigned long addr = apf->addr;
gva_t gva = apf->gva;
+ int locked = 1;
might_sleep();
/*
* This work is run asynchromously to the task which owns
* mm and might be done in another context, so we must
- * use FOLL_REMOTE.
+ * access remotely.
*/
- __get_user_pages_unlocked(NULL, mm, addr, 1, NULL,
- FOLL_WRITE | FOLL_REMOTE);
+ down_read(&mm->mmap_sem);
+ get_user_pages_remote(NULL, mm, addr, 1, FOLL_WRITE, NULL, NULL,
+ &locked);
+ if (locked)
+ up_read(&mm->mmap_sem);
kvm_async_page_present_sync(vcpu, apf);
@@ -200,7 +205,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
work->addr = hva;
work->arch = *arch;
work->mm = current->mm;
- atomic_inc(&work->mm->mm_users);
+ mmget(work->mm);
kvm_get_kvm(work->vcpu->kvm);
/* this can't really happen otherwise gfn_to_pfn_async
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index a29786dd9522..4d28a9ddbee0 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -870,7 +870,8 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
continue;
kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
- kvm->buses[bus_idx]->ioeventfd_count--;
+ if (kvm->buses[bus_idx])
+ kvm->buses[bus_idx]->ioeventfd_count--;
ioeventfd_release(p);
ret = 0;
break;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 7f9ee2929cfe..88257b311cb5 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -32,7 +32,9 @@
#include <linux/file.h>
#include <linux/syscore_ops.h>
#include <linux/cpu.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/stat.h>
#include <linux/cpumask.h>
#include <linux/smp.h>
#include <linux/anon_inodes.h>
@@ -53,7 +55,7 @@
#include <asm/processor.h>
#include <asm/io.h>
#include <asm/ioctl.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/pgtable.h>
#include "coalesced_mmio.h"
@@ -70,16 +72,19 @@ MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
/* Architectures should define their poll value according to the halt latency */
-static unsigned int halt_poll_ns = KVM_HALT_POLL_NS_DEFAULT;
+unsigned int halt_poll_ns = KVM_HALT_POLL_NS_DEFAULT;
module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR);
+EXPORT_SYMBOL_GPL(halt_poll_ns);
/* Default doubles per-vcpu halt_poll_ns. */
-static unsigned int halt_poll_ns_grow = 2;
+unsigned int halt_poll_ns_grow = 2;
module_param(halt_poll_ns_grow, uint, S_IRUGO | S_IWUSR);
+EXPORT_SYMBOL_GPL(halt_poll_ns_grow);
/* Default resets per-vcpu halt_poll_ns . */
-static unsigned int halt_poll_ns_shrink;
+unsigned int halt_poll_ns_shrink;
module_param(halt_poll_ns_shrink, uint, S_IRUGO | S_IWUSR);
+EXPORT_SYMBOL_GPL(halt_poll_ns_shrink);
/*
* Ordering of locks:
@@ -503,11 +508,6 @@ static struct kvm_memslots *kvm_alloc_memslots(void)
if (!slots)
return NULL;
- /*
- * Init kvm generation close to the maximum to easily test the
- * code of handling generation number wrap-around.
- */
- slots->generation = -150;
for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
slots->id_to_index[i] = slots->memslots[i].id = i;
@@ -595,7 +595,7 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
stat_data->kvm = kvm;
stat_data->offset = p->offset;
kvm->debugfs_stat_data[p - debugfs_entries] = stat_data;
- if (!debugfs_create_file(p->name, 0444,
+ if (!debugfs_create_file(p->name, 0644,
kvm->debugfs_dentry,
stat_data,
stat_fops_per_vm[p->kind]))
@@ -613,13 +613,13 @@ static struct kvm *kvm_create_vm(unsigned long type)
return ERR_PTR(-ENOMEM);
spin_lock_init(&kvm->mmu_lock);
- atomic_inc(&current->mm->mm_count);
+ mmgrab(current->mm);
kvm->mm = current->mm;
kvm_eventfd_init(kvm);
mutex_init(&kvm->lock);
mutex_init(&kvm->irq_lock);
mutex_init(&kvm->slots_lock);
- atomic_set(&kvm->users_count, 1);
+ refcount_set(&kvm->users_count, 1);
INIT_LIST_HEAD(&kvm->devices);
r = kvm_arch_init_vm(kvm, type);
@@ -638,9 +638,16 @@ static struct kvm *kvm_create_vm(unsigned long type)
r = -ENOMEM;
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
- kvm->memslots[i] = kvm_alloc_memslots();
- if (!kvm->memslots[i])
+ struct kvm_memslots *slots = kvm_alloc_memslots();
+ if (!slots)
goto out_err_no_srcu;
+ /*
+ * Generations must be different for each address space.
+ * Init kvm generation close to the maximum to easily test the
+ * code of handling generation number wrap-around.
+ */
+ slots->generation = i * 2 - 150;
+ rcu_assign_pointer(kvm->memslots[i], slots);
}
if (init_srcu_struct(&kvm->srcu))
@@ -720,8 +727,11 @@ static void kvm_destroy_vm(struct kvm *kvm)
list_del(&kvm->vm_list);
spin_unlock(&kvm_lock);
kvm_free_irq_routing(kvm);
- for (i = 0; i < KVM_NR_BUSES; i++)
- kvm_io_bus_destroy(kvm->buses[i]);
+ for (i = 0; i < KVM_NR_BUSES; i++) {
+ if (kvm->buses[i])
+ kvm_io_bus_destroy(kvm->buses[i]);
+ kvm->buses[i] = NULL;
+ }
kvm_coalesced_mmio_free(kvm);
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
@@ -742,13 +752,13 @@ static void kvm_destroy_vm(struct kvm *kvm)
void kvm_get_kvm(struct kvm *kvm)
{
- atomic_inc(&kvm->users_count);
+ refcount_inc(&kvm->users_count);
}
EXPORT_SYMBOL_GPL(kvm_get_kvm);
void kvm_put_kvm(struct kvm *kvm)
{
- if (atomic_dec_and_test(&kvm->users_count))
+ if (refcount_dec_and_test(&kvm->users_count))
kvm_destroy_vm(kvm);
}
EXPORT_SYMBOL_GPL(kvm_put_kvm);
@@ -867,8 +877,14 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
* Increment the new memslot generation a second time. This prevents
* vm exits that race with memslot updates from caching a memslot
* generation that will (potentially) be valid forever.
+ *
+ * Generations must be unique even across address spaces. We do not need
+ * a global counter for that, instead the generation space is evenly split
+ * across address spaces. For example, with two address spaces, address
+ * space 0 will use generations 0, 4, 8, ... while * address space 1 will
+ * use generations 2, 6, 10, 14, ...
*/
- slots->generation++;
+ slots->generation += KVM_ADDRESS_SPACE_NUM * 2 - 1;
kvm_arch_memslots_updated(kvm, slots);
@@ -1049,7 +1065,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
* changes) is disallowed above, so any other attribute changes getting
* here can be skipped.
*/
- if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
+ if (as_id == 0 && (change == KVM_MR_CREATE || change == KVM_MR_MOVE)) {
r = kvm_iommu_map_pages(kvm, &new);
return r;
}
@@ -1091,37 +1107,31 @@ int kvm_get_dirty_log(struct kvm *kvm,
{
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
- int r, i, as_id, id;
+ int i, as_id, id;
unsigned long n;
unsigned long any = 0;
- r = -EINVAL;
as_id = log->slot >> 16;
id = (u16)log->slot;
if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
- goto out;
+ return -EINVAL;
slots = __kvm_memslots(kvm, as_id);
memslot = id_to_memslot(slots, id);
- r = -ENOENT;
if (!memslot->dirty_bitmap)
- goto out;
+ return -ENOENT;
n = kvm_dirty_bitmap_bytes(memslot);
for (i = 0; !any && i < n/sizeof(long); ++i)
any = memslot->dirty_bitmap[i];
- r = -EFAULT;
if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
- goto out;
+ return -EFAULT;
if (any)
*is_dirty = 1;
-
- r = 0;
-out:
- return r;
+ return 0;
}
EXPORT_SYMBOL_GPL(kvm_get_dirty_log);
@@ -1153,24 +1163,22 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,
{
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
- int r, i, as_id, id;
+ int i, as_id, id;
unsigned long n;
unsigned long *dirty_bitmap;
unsigned long *dirty_bitmap_buffer;
- r = -EINVAL;
as_id = log->slot >> 16;
id = (u16)log->slot;
if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
- goto out;
+ return -EINVAL;
slots = __kvm_memslots(kvm, as_id);
memslot = id_to_memslot(slots, id);
dirty_bitmap = memslot->dirty_bitmap;
- r = -ENOENT;
if (!dirty_bitmap)
- goto out;
+ return -ENOENT;
n = kvm_dirty_bitmap_bytes(memslot);
@@ -1199,14 +1207,9 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,
}
spin_unlock(&kvm->mmu_lock);
-
- r = -EFAULT;
if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
- goto out;
-
- r = 0;
-out:
- return r;
+ return -EFAULT;
+ return 0;
}
EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect);
#endif
@@ -1415,13 +1418,12 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
npages = get_user_page_nowait(addr, write_fault, page);
up_read(&current->mm->mmap_sem);
} else {
- unsigned int flags = FOLL_TOUCH | FOLL_HWPOISON;
+ unsigned int flags = FOLL_HWPOISON;
if (write_fault)
flags |= FOLL_WRITE;
- npages = __get_user_pages_unlocked(current, current->mm, addr, 1,
- page, flags);
+ npages = get_user_pages_unlocked(addr, 1, page, flags);
}
if (npages != 1)
return npages;
@@ -1935,10 +1937,10 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data,
}
EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest);
-int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
- gpa_t gpa, unsigned long len)
+static int __kvm_gfn_to_hva_cache_init(struct kvm_memslots *slots,
+ struct gfn_to_hva_cache *ghc,
+ gpa_t gpa, unsigned long len)
{
- struct kvm_memslots *slots = kvm_memslots(kvm);
int offset = offset_in_page(gpa);
gfn_t start_gfn = gpa >> PAGE_SHIFT;
gfn_t end_gfn = (gpa + len - 1) >> PAGE_SHIFT;
@@ -1948,7 +1950,7 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
ghc->gpa = gpa;
ghc->generation = slots->generation;
ghc->len = len;
- ghc->memslot = gfn_to_memslot(kvm, start_gfn);
+ ghc->memslot = __gfn_to_memslot(slots, start_gfn);
ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, NULL);
if (!kvm_is_error_hva(ghc->hva) && nr_pages_needed <= 1) {
ghc->hva += offset;
@@ -1958,7 +1960,7 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
* verify that the entire region is valid here.
*/
while (start_gfn <= end_gfn) {
- ghc->memslot = gfn_to_memslot(kvm, start_gfn);
+ ghc->memslot = __gfn_to_memslot(slots, start_gfn);
ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn,
&nr_pages_avail);
if (kvm_is_error_hva(ghc->hva))
@@ -1970,47 +1972,62 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
}
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_gfn_to_hva_cache_init);
-int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
- void *data, unsigned long len)
+int kvm_vcpu_gfn_to_hva_cache_init(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc,
+ gpa_t gpa, unsigned long len)
+{
+ struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu);
+ return __kvm_gfn_to_hva_cache_init(slots, ghc, gpa, len);
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_hva_cache_init);
+
+int kvm_vcpu_write_guest_offset_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc,
+ void *data, int offset, unsigned long len)
{
- struct kvm_memslots *slots = kvm_memslots(kvm);
+ struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu);
int r;
+ gpa_t gpa = ghc->gpa + offset;
- BUG_ON(len > ghc->len);
+ BUG_ON(len + offset > ghc->len);
if (slots->generation != ghc->generation)
- kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa, ghc->len);
+ __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len);
if (unlikely(!ghc->memslot))
- return kvm_write_guest(kvm, ghc->gpa, data, len);
+ return kvm_vcpu_write_guest(vcpu, gpa, data, len);
if (kvm_is_error_hva(ghc->hva))
return -EFAULT;
- r = __copy_to_user((void __user *)ghc->hva, data, len);
+ r = __copy_to_user((void __user *)ghc->hva + offset, data, len);
if (r)
return -EFAULT;
- mark_page_dirty_in_slot(ghc->memslot, ghc->gpa >> PAGE_SHIFT);
+ mark_page_dirty_in_slot(ghc->memslot, gpa >> PAGE_SHIFT);
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_write_guest_cached);
+EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_offset_cached);
-int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
- void *data, unsigned long len)
+int kvm_vcpu_write_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc,
+ void *data, unsigned long len)
{
- struct kvm_memslots *slots = kvm_memslots(kvm);
+ return kvm_vcpu_write_guest_offset_cached(vcpu, ghc, data, 0, len);
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_cached);
+
+int kvm_vcpu_read_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc,
+ void *data, unsigned long len)
+{
+ struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu);
int r;
BUG_ON(len > ghc->len);
if (slots->generation != ghc->generation)
- kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa, ghc->len);
+ __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len);
if (unlikely(!ghc->memslot))
- return kvm_read_guest(kvm, ghc->gpa, data, len);
+ return kvm_vcpu_read_guest(vcpu, ghc->gpa, data, len);
if (kvm_is_error_hva(ghc->hva))
return -EFAULT;
@@ -2021,7 +2038,7 @@ int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_read_guest_cached);
+EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_cached);
int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
{
@@ -2338,9 +2355,9 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
}
EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin);
-static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int kvm_vcpu_fault(struct vm_fault *vmf)
{
- struct kvm_vcpu *vcpu = vma->vm_file->private_data;
+ struct kvm_vcpu *vcpu = vmf->vma->vm_file->private_data;
struct page *page;
if (vmf->pgoff == 0)
@@ -3123,10 +3140,9 @@ static long kvm_vm_compat_ioctl(struct file *filp,
struct compat_kvm_dirty_log compat_log;
struct kvm_dirty_log log;
- r = -EFAULT;
if (copy_from_user(&compat_log, (void __user *)arg,
sizeof(compat_log)))
- goto out;
+ return -EFAULT;
log.slot = compat_log.slot;
log.padding1 = compat_log.padding1;
log.padding2 = compat_log.padding2;
@@ -3138,8 +3154,6 @@ static long kvm_vm_compat_ioctl(struct file *filp,
default:
r = kvm_vm_ioctl(filp, ioctl, arg);
}
-
-out:
return r;
}
#endif
@@ -3463,6 +3477,8 @@ int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
};
bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+ if (!bus)
+ return -ENOMEM;
r = __kvm_io_bus_write(vcpu, bus, &range, val);
return r < 0 ? r : 0;
}
@@ -3480,6 +3496,8 @@ int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx,
};
bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+ if (!bus)
+ return -ENOMEM;
/* First try the device referenced by cookie. */
if ((cookie >= 0) && (cookie < bus->dev_count) &&
@@ -3530,6 +3548,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
};
bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+ if (!bus)
+ return -ENOMEM;
r = __kvm_io_bus_read(vcpu, bus, &range, val);
return r < 0 ? r : 0;
}
@@ -3542,6 +3562,9 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
struct kvm_io_bus *new_bus, *bus;
bus = kvm->buses[bus_idx];
+ if (!bus)
+ return -ENOMEM;
+
/* exclude ioeventfd which is limited by maximum fd */
if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1)
return -ENOSPC;
@@ -3561,37 +3584,41 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
}
/* Caller must hold slots_lock. */
-int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
- struct kvm_io_device *dev)
+void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+ struct kvm_io_device *dev)
{
- int i, r;
+ int i;
struct kvm_io_bus *new_bus, *bus;
bus = kvm->buses[bus_idx];
- r = -ENOENT;
+ if (!bus)
+ return;
+
for (i = 0; i < bus->dev_count; i++)
if (bus->range[i].dev == dev) {
- r = 0;
break;
}
- if (r)
- return r;
+ if (i == bus->dev_count)
+ return;
new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) *
sizeof(struct kvm_io_range)), GFP_KERNEL);
- if (!new_bus)
- return -ENOMEM;
+ if (!new_bus) {
+ pr_err("kvm: failed to shrink bus, removing it completely\n");
+ goto broken;
+ }
memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range));
new_bus->dev_count--;
memcpy(new_bus->range + i, bus->range + i + 1,
(new_bus->dev_count - i) * sizeof(struct kvm_io_range));
+broken:
rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
synchronize_srcu_expedited(&kvm->srcu);
kfree(bus);
- return r;
+ return;
}
struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
@@ -3604,6 +3631,8 @@ struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
srcu_idx = srcu_read_lock(&kvm->srcu);
bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
+ if (!bus)
+ goto out_unlock;
dev_idx = kvm_io_bus_get_first_dev(bus, addr, 1);
if (dev_idx < 0)
@@ -3630,7 +3659,7 @@ static int kvm_debugfs_open(struct inode *inode, struct file *file,
* To avoid the race between open and the removal of the debugfs
* directory we test against the users count.
*/
- if (!atomic_add_unless(&stat_data->kvm->users_count, 1, 0))
+ if (!refcount_inc_not_zero(&stat_data->kvm->users_count))
return -ENOENT;
if (simple_attr_open(inode, file, get, set, fmt)) {
@@ -3661,11 +3690,23 @@ static int vm_stat_get_per_vm(void *data, u64 *val)
return 0;
}
+static int vm_stat_clear_per_vm(void *data, u64 val)
+{
+ struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data;
+
+ if (val)
+ return -EINVAL;
+
+ *(ulong *)((void *)stat_data->kvm + stat_data->offset) = 0;
+
+ return 0;
+}
+
static int vm_stat_get_per_vm_open(struct inode *inode, struct file *file)
{
__simple_attr_check_format("%llu\n", 0ull);
return kvm_debugfs_open(inode, file, vm_stat_get_per_vm,
- NULL, "%llu\n");
+ vm_stat_clear_per_vm, "%llu\n");
}
static const struct file_operations vm_stat_get_per_vm_fops = {
@@ -3691,11 +3732,26 @@ static int vcpu_stat_get_per_vm(void *data, u64 *val)
return 0;
}
+static int vcpu_stat_clear_per_vm(void *data, u64 val)
+{
+ int i;
+ struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data;
+ struct kvm_vcpu *vcpu;
+
+ if (val)
+ return -EINVAL;
+
+ kvm_for_each_vcpu(i, vcpu, stat_data->kvm)
+ *(u64 *)((void *)vcpu + stat_data->offset) = 0;
+
+ return 0;
+}
+
static int vcpu_stat_get_per_vm_open(struct inode *inode, struct file *file)
{
__simple_attr_check_format("%llu\n", 0ull);
return kvm_debugfs_open(inode, file, vcpu_stat_get_per_vm,
- NULL, "%llu\n");
+ vcpu_stat_clear_per_vm, "%llu\n");
}
static const struct file_operations vcpu_stat_get_per_vm_fops = {
@@ -3730,7 +3786,26 @@ static int vm_stat_get(void *_offset, u64 *val)
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(vm_stat_fops, vm_stat_get, NULL, "%llu\n");
+static int vm_stat_clear(void *_offset, u64 val)
+{
+ unsigned offset = (long)_offset;
+ struct kvm *kvm;
+ struct kvm_stat_data stat_tmp = {.offset = offset};
+
+ if (val)
+ return -EINVAL;
+
+ spin_lock(&kvm_lock);
+ list_for_each_entry(kvm, &vm_list, vm_list) {
+ stat_tmp.kvm = kvm;
+ vm_stat_clear_per_vm((void *)&stat_tmp, 0);
+ }
+ spin_unlock(&kvm_lock);
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(vm_stat_fops, vm_stat_get, vm_stat_clear, "%llu\n");
static int vcpu_stat_get(void *_offset, u64 *val)
{
@@ -3750,7 +3825,27 @@ static int vcpu_stat_get(void *_offset, u64 *val)
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, NULL, "%llu\n");
+static int vcpu_stat_clear(void *_offset, u64 val)
+{
+ unsigned offset = (long)_offset;
+ struct kvm *kvm;
+ struct kvm_stat_data stat_tmp = {.offset = offset};
+
+ if (val)
+ return -EINVAL;
+
+ spin_lock(&kvm_lock);
+ list_for_each_entry(kvm, &vm_list, vm_list) {
+ stat_tmp.kvm = kvm;
+ vcpu_stat_clear_per_vm((void *)&stat_tmp, 0);
+ }
+ spin_unlock(&kvm_lock);
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, vcpu_stat_clear,
+ "%llu\n");
static const struct file_operations *stat_fops[] = {
[KVM_STAT_VCPU] = &vcpu_stat_fops,
@@ -3768,7 +3863,7 @@ static int kvm_init_debug(void)
kvm_debugfs_num_entries = 0;
for (p = debugfs_entries; p->name; ++p, kvm_debugfs_num_entries++) {
- if (!debugfs_create_file(p->name, 0444, kvm_debugfs_dir,
+ if (!debugfs_create_file(p->name, 0644, kvm_debugfs_dir,
(void *)(long)p->offset,
stat_fops[p->kind]))
goto out_dir;
@@ -3868,7 +3963,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
goto out_free_1;
}
- r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_STARTING, "AP_KVM_STARTING",
+ r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_STARTING, "kvm/cpu:starting",
kvm_starting_cpu, kvm_dying_cpu);
if (r)
goto out_free_2;
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index 1dd087da6f31..d32f239eb471 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -60,6 +60,19 @@ static void kvm_vfio_group_put_external_user(struct vfio_group *vfio_group)
symbol_put(vfio_group_put_external_user);
}
+static void kvm_vfio_group_set_kvm(struct vfio_group *group, struct kvm *kvm)
+{
+ void (*fn)(struct vfio_group *, struct kvm *);
+
+ fn = symbol_get(vfio_group_set_kvm);
+ if (!fn)
+ return;
+
+ fn(group, kvm);
+
+ symbol_put(vfio_group_set_kvm);
+}
+
static bool kvm_vfio_group_is_coherent(struct vfio_group *vfio_group)
{
long (*fn)(struct vfio_group *, unsigned long);
@@ -159,6 +172,8 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
mutex_unlock(&kv->lock);
+ kvm_vfio_group_set_kvm(vfio_group, dev->kvm);
+
kvm_vfio_update_coherency(dev);
return 0;
@@ -196,6 +211,8 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
mutex_unlock(&kv->lock);
+ kvm_vfio_group_set_kvm(vfio_group, NULL);
+
kvm_vfio_group_put_external_user(vfio_group);
kvm_vfio_update_coherency(dev);
@@ -240,6 +257,7 @@ static void kvm_vfio_destroy(struct kvm_device *dev)
struct kvm_vfio_group *kvg, *tmp;
list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) {
+ kvm_vfio_group_set_kvm(kvg->vfio_group, NULL);
kvm_vfio_group_put_external_user(kvg->vfio_group);
list_del(&kvg->node);
kfree(kvg);
diff --git a/virt/lib/irqbypass.c b/virt/lib/irqbypass.c
index 52abac4bb6a2..6d2fcd6fcb25 100644
--- a/virt/lib/irqbypass.c
+++ b/virt/lib/irqbypass.c
@@ -195,7 +195,7 @@ int irq_bypass_register_consumer(struct irq_bypass_consumer *consumer)
mutex_lock(&lock);
list_for_each_entry(tmp, &consumers, node) {
- if (tmp->token == consumer->token) {
+ if (tmp->token == consumer->token || tmp == consumer) {
mutex_unlock(&lock);
module_put(THIS_MODULE);
return -EBUSY;
@@ -245,7 +245,7 @@ void irq_bypass_unregister_consumer(struct irq_bypass_consumer *consumer)
mutex_lock(&lock);
list_for_each_entry(tmp, &consumers, node) {
- if (tmp->token != consumer->token)
+ if (tmp != consumer)
continue;
list_for_each_entry(producer, &producers, node) {