From d0bfb940ecabf0b44fb1fd80d8d60594e569e5ec Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 15 Dec 2008 13:52:10 +0100 Subject: KVM: New guest debug interface This rips out the support for KVM_DEBUG_GUEST and introduces a new IOCTL instead: KVM_SET_GUEST_DEBUG. The IOCTL payload consists of a generic part, controlling the "main switch" and the single-step feature. The arch specific part adds an x86 interface for intercepting both types of debug exceptions separately and re-injecting them when the host was not interested. Moveover, the foundation for guest debugging via debug registers is layed. To signal breakpoint events properly back to userland, an arch-specific data block is now returned along KVM_EXIT_DEBUG. For x86, the arch block contains the PC, the debug exception, and relevant debug registers to tell debug events properly apart. The availability of this new interface is signaled by KVM_CAP_SET_GUEST_DEBUG. Empty stubs for not yet supported archs are provided. Note that both SVM and VTX are supported, but only the latter was tested yet. Based on the experience with all those VTX corner case, I would be fairly surprised if SVM will work out of the box. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 29a667ce35b0..f83ef9c7e89b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1755,13 +1755,13 @@ out_free2: r = 0; break; } - case KVM_DEBUG_GUEST: { - struct kvm_debug_guest dbg; + case KVM_SET_GUEST_DEBUG: { + struct kvm_guest_debug dbg; r = -EFAULT; if (copy_from_user(&dbg, argp, sizeof dbg)) goto out; - r = kvm_arch_vcpu_ioctl_debug_guest(vcpu, &dbg); + r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg); if (r) goto out; r = 0; -- cgit v1.2.3 From 17071fe74fe0fbfdb03cd9b82f2490447cf1f986 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Tue, 6 Jan 2009 16:25:11 +0800 Subject: KVM: Add support to disable MSI for assigned device MSI is always enabled by default for msi2intx=1. But if msi2intx=0, we have to disable MSI if guest require to do so. The patch also discard unnecessary msi2intx judgment if guest want to update MSI state. Notice KVM_DEV_IRQ_ASSIGN_MSI_ACTION is a mask which should cover all MSI related operations, though we only got one for now. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- include/linux/kvm.h | 1 + virt/kvm/kvm_main.c | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index ae7a12c77427..73f348066866 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -550,6 +550,7 @@ struct kvm_assigned_irq { #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) +#define KVM_DEV_IRQ_ASSIGN_MSI_ACTION KVM_DEV_IRQ_ASSIGN_ENABLE_MSI #define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI (1 << 0) #endif diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f83ef9c7e89b..04401e17c758 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -343,6 +343,14 @@ static int assigned_device_update_msi(struct kvm *kvm, adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_MSI; adev->guest_irq = airq->guest_irq; adev->ack_notifier.gsi = airq->guest_irq; + } else { + /* + * Guest require to disable device MSI, we disable MSI and + * re-enable INTx by default again. Notice it's only for + * non-msi2intx. + */ + assigned_device_update_intx(kvm, adev, airq); + return 0; } if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) @@ -379,6 +387,7 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, { int r = 0; struct kvm_assigned_dev_kernel *match; + u32 current_flags = 0, changed_flags; mutex_lock(&kvm->lock); @@ -416,8 +425,13 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, } } - if ((!msi2intx && - (assigned_irq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI)) || + if ((match->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) && + (match->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI)) + current_flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSI; + + changed_flags = assigned_irq->flags ^ current_flags; + + if ((changed_flags & KVM_DEV_IRQ_ASSIGN_MSI_ACTION) || (msi2intx && match->dev->msi_enabled)) { #ifdef CONFIG_X86 r = assigned_device_update_msi(kvm, match, assigned_irq); -- cgit v1.2.3 From 75858a84a6207f5e60196f6bbd18fde4250e5759 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 4 Jan 2009 17:10:50 +0200 Subject: KVM: Interrupt mask notifiers for ioapic Allow clients to request notifications when the guest masks or unmasks a particular irq line. This complements irq ack notifications, as the guest will not ack an irq line that is masked. Currently implemented for the ioapic only. Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 17 +++++++++++++++++ virt/kvm/ioapic.c | 6 ++++++ virt/kvm/irq_comm.c | 24 ++++++++++++++++++++++++ virt/kvm/kvm_main.c | 3 +++ 4 files changed, 50 insertions(+) (limited to 'virt/kvm/kvm_main.c') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3cf0ede3fd73..99963f36a6db 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -127,6 +127,10 @@ struct kvm { struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; #endif +#ifdef CONFIG_HAVE_KVM_IRQCHIP + struct hlist_head mask_notifier_list; +#endif + #ifdef KVM_ARCH_WANT_MMU_NOTIFIER struct mmu_notifier mmu_notifier; unsigned long mmu_notifier_seq; @@ -320,6 +324,19 @@ struct kvm_assigned_dev_kernel { struct pci_dev *dev; struct kvm *kvm; }; + +struct kvm_irq_mask_notifier { + void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked); + int irq; + struct hlist_node link; +}; + +void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq, + struct kvm_irq_mask_notifier *kimn); +void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, + struct kvm_irq_mask_notifier *kimn); +void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask); + void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level); void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi); void kvm_register_irq_ack_notifier(struct kvm *kvm, diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 23b81cf242af..e85a2bcd2db1 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -101,6 +101,7 @@ static void ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx) static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) { unsigned index; + bool mask_before, mask_after; switch (ioapic->ioregsel) { case IOAPIC_REG_VERSION: @@ -120,6 +121,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) ioapic_debug("change redir index %x val %x\n", index, val); if (index >= IOAPIC_NUM_PINS) return; + mask_before = ioapic->redirtbl[index].fields.mask; if (ioapic->ioregsel & 1) { ioapic->redirtbl[index].bits &= 0xffffffff; ioapic->redirtbl[index].bits |= (u64) val << 32; @@ -128,6 +130,9 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) ioapic->redirtbl[index].bits |= (u32) val; ioapic->redirtbl[index].fields.remote_irr = 0; } + mask_after = ioapic->redirtbl[index].fields.mask; + if (mask_before != mask_after) + kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after); if (ioapic->irr & (1 << index)) ioapic_service(ioapic, index); break; @@ -426,3 +431,4 @@ int kvm_ioapic_init(struct kvm *kvm) kvm_io_bus_register_dev(&kvm->mmio_bus, &ioapic->dev); return 0; } + diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index aa5d1e5c497e..5162a411e4d2 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -99,3 +99,27 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) clear_bit(irq_source_id, &kvm->arch.irq_states[i]); clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap); } + +void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq, + struct kvm_irq_mask_notifier *kimn) +{ + kimn->irq = irq; + hlist_add_head(&kimn->link, &kvm->mask_notifier_list); +} + +void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, + struct kvm_irq_mask_notifier *kimn) +{ + hlist_del(&kimn->link); +} + +void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) +{ + struct kvm_irq_mask_notifier *kimn; + struct hlist_node *n; + + hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link) + if (kimn->irq == irq) + kimn->func(kimn, mask); +} + diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 04401e17c758..786a3ae373b0 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -842,6 +842,9 @@ static struct kvm *kvm_create_vm(void) if (IS_ERR(kvm)) goto out; +#ifdef CONFIG_HAVE_KVM_IRQCHIP + INIT_HLIST_HEAD(&kvm->mask_notifier_list); +#endif #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET page = alloc_page(GFP_KERNEL | __GFP_ZERO); -- cgit v1.2.3 From 399ec807ddc38ecccf8c06dbde04531cbdc63e11 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 19 Nov 2008 13:58:46 +0200 Subject: KVM: Userspace controlled irq routing Currently KVM has a static routing from GSI numbers to interrupts (namely, 0-15 are mapped 1:1 to both PIC and IOAPIC, and 16:23 are mapped 1:1 to the IOAPIC). This is insufficient for several reasons: - HPET requires non 1:1 mapping for the timer interrupt - MSIs need a new method to assign interrupt numbers and dispatch them - ACPI APIC mode needs to be able to reassign the PCI LINK interrupts to the ioapics This patch implements an interrupt routing table (as a linked list, but this can be easily changed) and a userspace interface to replace the table. The routing table is initialized according to the current hardwired mapping. Signed-off-by: Avi Kivity --- arch/ia64/kvm/kvm-ia64.c | 5 ++ arch/x86/kvm/x86.c | 6 ++ include/linux/kvm.h | 33 ++++++++++ include/linux/kvm_host.h | 31 +++++++++ virt/kvm/irq_comm.c | 168 +++++++++++++++++++++++++++++++++++++++++++++-- virt/kvm/kvm_main.c | 36 ++++++++++ 6 files changed, 275 insertions(+), 4 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 1477f91617a5..dbf527a57341 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -919,6 +919,11 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_ioapic_init(kvm); if (r) goto out; + r = kvm_setup_default_irq_routing(kvm); + if (r) { + kfree(kvm->arch.vioapic); + goto out; + } break; case KVM_IRQ_LINE: { struct kvm_irq_level irq_event; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 141a0166e51c..32e3a7ec6ad2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1835,6 +1835,12 @@ long kvm_arch_vm_ioctl(struct file *filp, } } else goto out; + r = kvm_setup_default_irq_routing(kvm); + if (r) { + kfree(kvm->arch.vpic); + kfree(kvm->arch.vioapic); + goto out; + } break; case KVM_CREATE_PIT: mutex_lock(&kvm->lock); diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 73f348066866..7a5d73a8d4fa 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -398,6 +398,38 @@ struct kvm_trace_rec { #endif #if defined(CONFIG_X86) #define KVM_CAP_REINJECT_CONTROL 24 +#endif +#if defined(CONFIG_X86)||defined(CONFIG_IA64) +#define KVM_CAP_IRQ_ROUTING 25 +#endif + +#ifdef KVM_CAP_IRQ_ROUTING + +struct kvm_irq_routing_irqchip { + __u32 irqchip; + __u32 pin; +}; + +/* gsi routing entry types */ +#define KVM_IRQ_ROUTING_IRQCHIP 1 + +struct kvm_irq_routing_entry { + __u32 gsi; + __u32 type; + __u32 flags; + __u32 pad; + union { + struct kvm_irq_routing_irqchip irqchip; + __u32 pad[8]; + } u; +}; + +struct kvm_irq_routing { + __u32 nr; + __u32 flags; + struct kvm_irq_routing_entry entries[0]; +}; + #endif /* @@ -430,6 +462,7 @@ struct kvm_trace_rec { _IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone) #define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \ struct kvm_assigned_pci_dev) +#define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing) #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \ struct kvm_assigned_irq) #define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 99963f36a6db..ce285e01bd57 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -107,6 +107,19 @@ struct kvm_memory_slot { int user_alloc; }; +struct kvm_kernel_irq_routing_entry { + u32 gsi; + void (*set)(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int level); + union { + struct { + unsigned irqchip; + unsigned pin; + } irqchip; + }; + struct list_head link; +}; + struct kvm { struct mutex lock; /* protects the vcpus array and APIC accesses */ spinlock_t mmu_lock; @@ -128,6 +141,7 @@ struct kvm { #endif #ifdef CONFIG_HAVE_KVM_IRQCHIP + struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */ struct hlist_head mask_notifier_list; #endif @@ -480,4 +494,21 @@ static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_se } #endif +#ifdef CONFIG_HAVE_KVM_IRQCHIP + +#define KVM_MAX_IRQ_ROUTES 1024 + +int kvm_setup_default_irq_routing(struct kvm *kvm); +int kvm_set_irq_routing(struct kvm *kvm, + const struct kvm_irq_routing_entry *entries, + unsigned nr, + unsigned flags); +void kvm_free_irq_routing(struct kvm *kvm); + +#else + +static inline void kvm_free_irq_routing(struct kvm *kvm) {} + +#endif + #endif diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 5162a411e4d2..a797fa5e6420 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -24,9 +24,24 @@ #include "ioapic.h" +static void kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int level) +{ +#ifdef CONFIG_X86 + kvm_pic_set_irq(pic_irqchip(kvm), e->irqchip.pin, level); +#endif +} + +static void kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int level) +{ + kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level); +} + /* This should be called with the kvm->lock mutex held */ void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) { + struct kvm_kernel_irq_routing_entry *e; unsigned long *irq_state = (unsigned long *)&kvm->arch.irq_states[irq]; /* Logical OR for level trig interrupt */ @@ -39,10 +54,9 @@ void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) * IOAPIC. So set the bit in both. The guest will ignore * writes to the unused one. */ - kvm_ioapic_set_irq(kvm->arch.vioapic, irq, !!(*irq_state)); -#ifdef CONFIG_X86 - kvm_pic_set_irq(pic_irqchip(kvm), irq, !!(*irq_state)); -#endif + list_for_each_entry(e, &kvm->irq_routing, link) + if (e->gsi == irq) + e->set(e, kvm, !!(*irq_state)); } void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi) @@ -123,3 +137,149 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) kimn->func(kimn, mask); } +static void __kvm_free_irq_routing(struct list_head *irq_routing) +{ + struct kvm_kernel_irq_routing_entry *e, *n; + + list_for_each_entry_safe(e, n, irq_routing, link) + kfree(e); +} + +void kvm_free_irq_routing(struct kvm *kvm) +{ + __kvm_free_irq_routing(&kvm->irq_routing); +} + +int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) +{ + int r = -EINVAL; + int delta; + + e->gsi = ue->gsi; + switch (ue->type) { + case KVM_IRQ_ROUTING_IRQCHIP: + delta = 0; + switch (ue->u.irqchip.irqchip) { + case KVM_IRQCHIP_PIC_MASTER: + e->set = kvm_set_pic_irq; + break; + case KVM_IRQCHIP_PIC_SLAVE: + e->set = kvm_set_pic_irq; + delta = 8; + break; + case KVM_IRQCHIP_IOAPIC: + e->set = kvm_set_ioapic_irq; + break; + default: + goto out; + } + e->irqchip.irqchip = ue->u.irqchip.irqchip; + e->irqchip.pin = ue->u.irqchip.pin + delta; + break; + default: + goto out; + } + r = 0; +out: + return r; +} + + +int kvm_set_irq_routing(struct kvm *kvm, + const struct kvm_irq_routing_entry *ue, + unsigned nr, + unsigned flags) +{ + struct list_head irq_list = LIST_HEAD_INIT(irq_list); + struct list_head tmp = LIST_HEAD_INIT(tmp); + struct kvm_kernel_irq_routing_entry *e = NULL; + unsigned i; + int r; + + for (i = 0; i < nr; ++i) { + r = -EINVAL; + if (ue->gsi >= KVM_MAX_IRQ_ROUTES) + goto out; + if (ue->flags) + goto out; + r = -ENOMEM; + e = kzalloc(sizeof(*e), GFP_KERNEL); + if (!e) + goto out; + r = setup_routing_entry(e, ue); + if (r) + goto out; + ++ue; + list_add(&e->link, &irq_list); + e = NULL; + } + + mutex_lock(&kvm->lock); + list_splice(&kvm->irq_routing, &tmp); + INIT_LIST_HEAD(&kvm->irq_routing); + list_splice(&irq_list, &kvm->irq_routing); + INIT_LIST_HEAD(&irq_list); + list_splice(&tmp, &irq_list); + mutex_unlock(&kvm->lock); + + r = 0; + +out: + kfree(e); + __kvm_free_irq_routing(&irq_list); + return r; +} + +#define IOAPIC_ROUTING_ENTRY(irq) \ + { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ + .u.irqchip.irqchip = KVM_IRQCHIP_IOAPIC, .u.irqchip.pin = (irq) } +#define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq) + +#ifdef CONFIG_X86 +#define SELECT_PIC(irq) \ + ((irq) < 8 ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE) +# define PIC_ROUTING_ENTRY(irq) \ + { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ + .u.irqchip.irqchip = SELECT_PIC(irq), .u.irqchip.pin = (irq) % 8 } +# define ROUTING_ENTRY2(irq) \ + IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq) +#else +# define ROUTING_ENTRY2(irq) \ + IOAPIC_ROUTING_ENTRY(irq) +#endif + +static const struct kvm_irq_routing_entry default_routing[] = { + ROUTING_ENTRY2(0), ROUTING_ENTRY2(1), + ROUTING_ENTRY2(2), ROUTING_ENTRY2(3), + ROUTING_ENTRY2(4), ROUTING_ENTRY2(5), + ROUTING_ENTRY2(6), ROUTING_ENTRY2(7), + ROUTING_ENTRY2(8), ROUTING_ENTRY2(9), + ROUTING_ENTRY2(10), ROUTING_ENTRY2(11), + ROUTING_ENTRY2(12), ROUTING_ENTRY2(13), + ROUTING_ENTRY2(14), ROUTING_ENTRY2(15), + ROUTING_ENTRY1(16), ROUTING_ENTRY1(17), + ROUTING_ENTRY1(18), ROUTING_ENTRY1(19), + ROUTING_ENTRY1(20), ROUTING_ENTRY1(21), + ROUTING_ENTRY1(22), ROUTING_ENTRY1(23), +#ifdef CONFIG_IA64 + ROUTING_ENTRY1(24), ROUTING_ENTRY1(25), + ROUTING_ENTRY1(26), ROUTING_ENTRY1(27), + ROUTING_ENTRY1(28), ROUTING_ENTRY1(29), + ROUTING_ENTRY1(30), ROUTING_ENTRY1(31), + ROUTING_ENTRY1(32), ROUTING_ENTRY1(33), + ROUTING_ENTRY1(34), ROUTING_ENTRY1(35), + ROUTING_ENTRY1(36), ROUTING_ENTRY1(37), + ROUTING_ENTRY1(38), ROUTING_ENTRY1(39), + ROUTING_ENTRY1(40), ROUTING_ENTRY1(41), + ROUTING_ENTRY1(42), ROUTING_ENTRY1(43), + ROUTING_ENTRY1(44), ROUTING_ENTRY1(45), + ROUTING_ENTRY1(46), ROUTING_ENTRY1(47), +#endif +}; + +int kvm_setup_default_irq_routing(struct kvm *kvm) +{ + return kvm_set_irq_routing(kvm, default_routing, + ARRAY_SIZE(default_routing), 0); +} diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 786a3ae373b0..c65484b471c6 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -843,6 +843,7 @@ static struct kvm *kvm_create_vm(void) if (IS_ERR(kvm)) goto out; #ifdef CONFIG_HAVE_KVM_IRQCHIP + INIT_LIST_HEAD(&kvm->irq_routing); INIT_HLIST_HEAD(&kvm->mask_notifier_list); #endif @@ -926,6 +927,7 @@ static void kvm_destroy_vm(struct kvm *kvm) spin_lock(&kvm_lock); list_del(&kvm->vm_list); spin_unlock(&kvm_lock); + kvm_free_irq_routing(kvm); kvm_io_bus_destroy(&kvm->pio_bus); kvm_io_bus_destroy(&kvm->mmio_bus); #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET @@ -1945,6 +1947,36 @@ static long kvm_vm_ioctl(struct file *filp, goto out; break; } +#endif +#ifdef KVM_CAP_IRQ_ROUTING + case KVM_SET_GSI_ROUTING: { + struct kvm_irq_routing routing; + struct kvm_irq_routing __user *urouting; + struct kvm_irq_routing_entry *entries; + + r = -EFAULT; + if (copy_from_user(&routing, argp, sizeof(routing))) + goto out; + r = -EINVAL; + if (routing.nr >= KVM_MAX_IRQ_ROUTES) + goto out; + if (routing.flags) + goto out; + r = -ENOMEM; + entries = vmalloc(routing.nr * sizeof(*entries)); + if (!entries) + goto out; + r = -EFAULT; + urouting = argp; + if (copy_from_user(entries, urouting->entries, + routing.nr * sizeof(*entries))) + goto out_free_irq_routing; + r = kvm_set_irq_routing(kvm, entries, routing.nr, + routing.flags); + out_free_irq_routing: + vfree(entries); + break; + } #endif default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); @@ -2012,6 +2044,10 @@ static long kvm_dev_ioctl_check_extension_generic(long arg) case KVM_CAP_USER_MEMORY: case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: return 1; +#ifdef CONFIG_HAVE_KVM_IRQCHIP + case KVM_CAP_IRQ_ROUTING: + return 1; +#endif default: break; } -- cgit v1.2.3 From 79950e1073150909619b7c0f9a39a2fea83a42d8 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Tue, 10 Feb 2009 13:57:06 +0800 Subject: KVM: Use irq routing API for MSI Merge MSI userspace interface with IRQ routing table. Notice the API have been changed, and using IRQ routing table would be the only interface kvm-userspace supported. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- include/linux/kvm.h | 9 ++++++ include/linux/kvm_host.h | 2 +- virt/kvm/irq_comm.c | 78 +++++++++++++++++++++++++++++++++++++++++++----- virt/kvm/kvm_main.c | 70 ++++--------------------------------------- 4 files changed, 86 insertions(+), 73 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 869462ca7625..2163b3dd36e7 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -410,8 +410,16 @@ struct kvm_irq_routing_irqchip { __u32 pin; }; +struct kvm_irq_routing_msi { + __u32 address_lo; + __u32 address_hi; + __u32 data; + __u32 pad; +}; + /* gsi routing entry types */ #define KVM_IRQ_ROUTING_IRQCHIP 1 +#define KVM_IRQ_ROUTING_MSI 2 struct kvm_irq_routing_entry { __u32 gsi; @@ -420,6 +428,7 @@ struct kvm_irq_routing_entry { __u32 pad; union { struct kvm_irq_routing_irqchip irqchip; + struct kvm_irq_routing_msi msi; __u32 pad[8]; } u; }; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c03a0a9a8584..339eda3ca6ee 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -116,6 +116,7 @@ struct kvm_kernel_irq_routing_entry { unsigned irqchip; unsigned pin; } irqchip; + struct msi_msg msi; }; struct list_head link; }; @@ -327,7 +328,6 @@ struct kvm_assigned_dev_kernel { int host_irq; bool host_irq_disabled; int guest_irq; - struct msi_msg guest_msi; #define KVM_ASSIGNED_DEV_GUEST_INTX (1 << 0) #define KVM_ASSIGNED_DEV_GUEST_MSI (1 << 1) #define KVM_ASSIGNED_DEV_HOST_INTX (1 << 8) diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 7aa5086c8622..6bc7439eff6e 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -20,6 +20,11 @@ */ #include + +#ifdef CONFIG_X86 +#include +#endif + #include "irq.h" #include "ioapic.h" @@ -38,17 +43,70 @@ static void kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level); } +static void kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int level) +{ + int vcpu_id; + struct kvm_vcpu *vcpu; + struct kvm_ioapic *ioapic = ioapic_irqchip(kvm); + int dest_id = (e->msi.address_lo & MSI_ADDR_DEST_ID_MASK) + >> MSI_ADDR_DEST_ID_SHIFT; + int vector = (e->msi.data & MSI_DATA_VECTOR_MASK) + >> MSI_DATA_VECTOR_SHIFT; + int dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT, + (unsigned long *)&e->msi.address_lo); + int trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT, + (unsigned long *)&e->msi.data); + int delivery_mode = test_bit(MSI_DATA_DELIVERY_MODE_SHIFT, + (unsigned long *)&e->msi.data); + u32 deliver_bitmask; + + BUG_ON(!ioapic); + + deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic, + dest_id, dest_mode); + /* IOAPIC delivery mode value is the same as MSI here */ + switch (delivery_mode) { + case IOAPIC_LOWEST_PRIORITY: + vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector, + deliver_bitmask); + if (vcpu != NULL) + kvm_apic_set_irq(vcpu, vector, trig_mode); + else + printk(KERN_INFO "kvm: null lowest priority vcpu!\n"); + break; + case IOAPIC_FIXED: + for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) { + if (!(deliver_bitmask & (1 << vcpu_id))) + continue; + deliver_bitmask &= ~(1 << vcpu_id); + vcpu = ioapic->kvm->vcpus[vcpu_id]; + if (vcpu) + kvm_apic_set_irq(vcpu, vector, trig_mode); + } + break; + default: + break; + } +} + /* This should be called with the kvm->lock mutex held */ void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) { struct kvm_kernel_irq_routing_entry *e; - unsigned long *irq_state = (unsigned long *)&kvm->arch.irq_states[irq]; + unsigned long *irq_state, sig_level; + + if (irq < KVM_IOAPIC_NUM_PINS) { + irq_state = (unsigned long *)&kvm->arch.irq_states[irq]; - /* Logical OR for level trig interrupt */ - if (level) - set_bit(irq_source_id, irq_state); - else - clear_bit(irq_source_id, irq_state); + /* Logical OR for level trig interrupt */ + if (level) + set_bit(irq_source_id, irq_state); + else + clear_bit(irq_source_id, irq_state); + sig_level = !!(*irq_state); + } else /* Deal with MSI/MSI-X */ + sig_level = 1; /* Not possible to detect if the guest uses the PIC or the * IOAPIC. So set the bit in both. The guest will ignore @@ -56,7 +114,7 @@ void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) */ list_for_each_entry(e, &kvm->irq_routing, link) if (e->gsi == irq) - e->set(e, kvm, !!(*irq_state)); + e->set(e, kvm, sig_level); } void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) @@ -186,6 +244,12 @@ int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e, e->irqchip.irqchip = ue->u.irqchip.irqchip; e->irqchip.pin = ue->u.irqchip.pin + delta; break; + case KVM_IRQ_ROUTING_MSI: + e->set = kvm_set_msi; + e->msi.address_lo = ue->u.msi.address_lo; + e->msi.address_hi = ue->u.msi.address_hi; + e->msi.data = ue->u.msi.data; + break; default: goto out; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c65484b471c6..266bdaf0ce44 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -47,10 +47,6 @@ #include #include -#ifdef CONFIG_X86 -#include -#endif - #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET #include "coalesced_mmio.h" #endif @@ -85,57 +81,6 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, static bool kvm_rebooting; #ifdef KVM_CAP_DEVICE_ASSIGNMENT - -#ifdef CONFIG_X86 -static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev) -{ - int vcpu_id; - struct kvm_vcpu *vcpu; - struct kvm_ioapic *ioapic = ioapic_irqchip(dev->kvm); - int dest_id = (dev->guest_msi.address_lo & MSI_ADDR_DEST_ID_MASK) - >> MSI_ADDR_DEST_ID_SHIFT; - int vector = (dev->guest_msi.data & MSI_DATA_VECTOR_MASK) - >> MSI_DATA_VECTOR_SHIFT; - int dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT, - (unsigned long *)&dev->guest_msi.address_lo); - int trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT, - (unsigned long *)&dev->guest_msi.data); - int delivery_mode = test_bit(MSI_DATA_DELIVERY_MODE_SHIFT, - (unsigned long *)&dev->guest_msi.data); - u32 deliver_bitmask; - - BUG_ON(!ioapic); - - deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic, - dest_id, dest_mode); - /* IOAPIC delivery mode value is the same as MSI here */ - switch (delivery_mode) { - case IOAPIC_LOWEST_PRIORITY: - vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector, - deliver_bitmask); - if (vcpu != NULL) - kvm_apic_set_irq(vcpu, vector, trig_mode); - else - printk(KERN_INFO "kvm: null lowest priority vcpu!\n"); - break; - case IOAPIC_FIXED: - for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) { - if (!(deliver_bitmask & (1 << vcpu_id))) - continue; - deliver_bitmask &= ~(1 << vcpu_id); - vcpu = ioapic->kvm->vcpus[vcpu_id]; - if (vcpu) - kvm_apic_set_irq(vcpu, vector, trig_mode); - } - break; - default: - printk(KERN_INFO "kvm: unsupported MSI delivery mode\n"); - } -} -#else -static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev) {} -#endif - static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, int assigned_dev_id) { @@ -162,13 +107,10 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) * finer-grained lock, update this */ mutex_lock(&assigned_dev->kvm->lock); - if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_INTX) - kvm_set_irq(assigned_dev->kvm, - assigned_dev->irq_source_id, - assigned_dev->guest_irq, 1); - else if (assigned_dev->irq_requested_type & - KVM_ASSIGNED_DEV_GUEST_MSI) { - assigned_device_msi_dispatch(assigned_dev); + kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, + assigned_dev->guest_irq, 1); + + if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI) { enable_irq(assigned_dev->host_irq); assigned_dev->host_irq_disabled = false; } @@ -331,17 +273,15 @@ static int assigned_device_update_msi(struct kvm *kvm, { int r; + adev->guest_irq = airq->guest_irq; if (airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI) { /* x86 don't care upper address of guest msi message addr */ adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_MSI; adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_INTX; - adev->guest_msi.address_lo = airq->guest_msi.addr_lo; - adev->guest_msi.data = airq->guest_msi.data; adev->ack_notifier.gsi = -1; } else if (msi2intx) { adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_INTX; adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_MSI; - adev->guest_irq = airq->guest_irq; adev->ack_notifier.gsi = airq->guest_irq; } else { /* -- cgit v1.2.3 From fc5659c8c6b6c4e02ac354b369017c1bf231f347 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 18 Feb 2009 14:08:58 +0100 Subject: KVM: MMU: handle compound pages in kvm_is_mmio_pfn The function kvm_is_mmio_pfn is called before put_page is called on a page by KVM. This is a problem when when this function is called on some struct page which is part of a compund page. It does not test the reserved flag of the compound page but of the struct page within the compount page. This is a problem when KVM works with hugepages allocated at boot time. These pages have the reserved bit set in all tail pages. Only the flag in the compount head is cleared. KVM would not put such a page which results in a memory leak. Signed-off-by: Joerg Roedel Acked-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 266bdaf0ce44..0ed662dc72d2 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -535,8 +535,10 @@ static inline int valid_vcpu(int n) inline int kvm_is_mmio_pfn(pfn_t pfn) { - if (pfn_valid(pfn)) - return PageReserved(pfn_to_page(pfn)); + if (pfn_valid(pfn)) { + struct page *page = compound_head(pfn_to_page(pfn)); + return PageReserved(page); + } return true; } -- cgit v1.2.3 From 4a906e49f103c2e544148a209ba1db316510799f Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Fri, 13 Feb 2009 17:27:51 +0800 Subject: KVM: fix kvm_vm_ioctl_deassign_device only need to set assigned_dev_id for deassignment, use match->flags to judge and deassign it. Acked-by: Mark McLoughlin Signed-off-by: Weidong Han Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 0ed662dc72d2..c4278975c8ca 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -517,7 +517,7 @@ static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, goto out; } - if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) + if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) kvm_deassign_device(kvm, match); kvm_free_assigned_device(kvm, match); -- cgit v1.2.3 From 36463146ffb7eee4582ed785a8c8be213b8ed110 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Mon, 16 Mar 2009 16:33:43 +0800 Subject: KVM: Get support IRQ routing entry counts In capability probing ioctl. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c4278975c8ca..605697e9c4dd 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1988,7 +1988,7 @@ static long kvm_dev_ioctl_check_extension_generic(long arg) return 1; #ifdef CONFIG_HAVE_KVM_IRQCHIP case KVM_CAP_IRQ_ROUTING: - return 1; + return KVM_MAX_IRQ_ROUTES; #endif default: break; -- cgit v1.2.3