summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorPeter Xu <peterx@redhat.com>2020-09-30 21:22:22 -0400
committerPaolo Bonzini <pbonzini@redhat.com>2020-11-15 09:49:15 -0500
commitfb04a1eddb1a65b6588a021bdc132270d5ae48bb (patch)
tree0faceb84959c8f1b65f91d51c1bcc0c0a38ece43 /include/linux
parent28bd726aa404c0da8fd6852fe69bb4538a103b71 (diff)
KVM: X86: Implement ring-based dirty memory tracking
This patch is heavily based on previous work from Lei Cao <lei.cao@stratus.com> and Paolo Bonzini <pbonzini@redhat.com>. [1] KVM currently uses large bitmaps to track dirty memory. These bitmaps are copied to userspace when userspace queries KVM for its dirty page information. The use of bitmaps is mostly sufficient for live migration, as large parts of memory are be dirtied from one log-dirty pass to another. However, in a checkpointing system, the number of dirty pages is small and in fact it is often bounded---the VM is paused when it has dirtied a pre-defined number of pages. Traversing a large, sparsely populated bitmap to find set bits is time-consuming, as is copying the bitmap to user-space. A similar issue will be there for live migration when the guest memory is huge while the page dirty procedure is trivial. In that case for each dirty sync we need to pull the whole dirty bitmap to userspace and analyse every bit even if it's mostly zeros. The preferred data structure for above scenarios is a dense list of guest frame numbers (GFN). This patch series stores the dirty list in kernel memory that can be memory mapped into userspace to allow speedy harvesting. This patch enables dirty ring for X86 only. However it should be easily extended to other archs as well. [1] https://patchwork.kernel.org/patch/10471409/ Signed-off-by: Lei Cao <lei.cao@stratus.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Peter Xu <peterx@redhat.com> Message-Id: <20201001012222.5767-1-peterx@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/kvm_dirty_ring.h103
-rw-r--r--include/linux/kvm_host.h13
2 files changed, 116 insertions, 0 deletions
diff --git a/include/linux/kvm_dirty_ring.h b/include/linux/kvm_dirty_ring.h
new file mode 100644
index 000000000000..120e5e90fa1d
--- /dev/null
+++ b/include/linux/kvm_dirty_ring.h
@@ -0,0 +1,103 @@
+#ifndef KVM_DIRTY_RING_H
+#define KVM_DIRTY_RING_H
+
+#include <linux/kvm.h>
+
+/**
+ * kvm_dirty_ring: KVM internal dirty ring structure
+ *
+ * @dirty_index: free running counter that points to the next slot in
+ * dirty_ring->dirty_gfns, where a new dirty page should go
+ * @reset_index: free running counter that points to the next dirty page
+ * in dirty_ring->dirty_gfns for which dirty trap needs to
+ * be reenabled
+ * @size: size of the compact list, dirty_ring->dirty_gfns
+ * @soft_limit: when the number of dirty pages in the list reaches this
+ * limit, vcpu that owns this ring should exit to userspace
+ * to allow userspace to harvest all the dirty pages
+ * @dirty_gfns: the array to keep the dirty gfns
+ * @index: index of this dirty ring
+ */
+struct kvm_dirty_ring {
+ u32 dirty_index;
+ u32 reset_index;
+ u32 size;
+ u32 soft_limit;
+ struct kvm_dirty_gfn *dirty_gfns;
+ int index;
+};
+
+#if (KVM_DIRTY_LOG_PAGE_OFFSET == 0)
+/*
+ * If KVM_DIRTY_LOG_PAGE_OFFSET not defined, kvm_dirty_ring.o should
+ * not be included as well, so define these nop functions for the arch.
+ */
+static inline u32 kvm_dirty_ring_get_rsvd_entries(void)
+{
+ return 0;
+}
+
+static inline int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring,
+ int index, u32 size)
+{
+ return 0;
+}
+
+static inline struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm)
+{
+ return NULL;
+}
+
+static inline int kvm_dirty_ring_reset(struct kvm *kvm,
+ struct kvm_dirty_ring *ring)
+{
+ return 0;
+}
+
+static inline void kvm_dirty_ring_push(struct kvm_dirty_ring *ring,
+ u32 slot, u64 offset)
+{
+}
+
+static inline struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring,
+ u32 offset)
+{
+ return NULL;
+}
+
+static inline void kvm_dirty_ring_free(struct kvm_dirty_ring *ring)
+{
+}
+
+static inline bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring)
+{
+ return true;
+}
+
+#else /* KVM_DIRTY_LOG_PAGE_OFFSET == 0 */
+
+u32 kvm_dirty_ring_get_rsvd_entries(void);
+int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size);
+struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm);
+
+/*
+ * called with kvm->slots_lock held, returns the number of
+ * processed pages.
+ */
+int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring);
+
+/*
+ * returns =0: successfully pushed
+ * <0: unable to push, need to wait
+ */
+void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, u32 slot, u64 offset);
+
+/* for use in vm_operations_struct */
+struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, u32 offset);
+
+void kvm_dirty_ring_free(struct kvm_dirty_ring *ring);
+bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring);
+
+#endif /* KVM_DIRTY_LOG_PAGE_OFFSET == 0 */
+
+#endif /* KVM_DIRTY_RING_H */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ca7c1459a8e3..864b156391c8 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -34,6 +34,7 @@
#include <linux/kvm_types.h>
#include <asm/kvm_host.h>
+#include <linux/kvm_dirty_ring.h>
#ifndef KVM_MAX_VCPU_ID
#define KVM_MAX_VCPU_ID KVM_MAX_VCPUS
@@ -319,6 +320,7 @@ struct kvm_vcpu {
bool preempted;
bool ready;
struct kvm_vcpu_arch arch;
+ struct kvm_dirty_ring dirty_ring;
};
static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
@@ -505,6 +507,7 @@ struct kvm {
struct srcu_struct irq_srcu;
pid_t userspace_pid;
unsigned int max_halt_poll_ns;
+ u32 dirty_ring_size;
};
#define kvm_err(fmt, ...) \
@@ -1477,4 +1480,14 @@ static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu)
}
#endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */
+/*
+ * This defines how many reserved entries we want to keep before we
+ * kick the vcpu to the userspace to avoid dirty ring full. This
+ * value can be tuned to higher if e.g. PML is enabled on the host.
+ */
+#define KVM_DIRTY_RING_RSVD_ENTRIES 64
+
+/* Max number of entries allowed for each kvm dirty ring */
+#define KVM_DIRTY_RING_MAX_ENTRIES 65536
+
#endif