From 5972e9535e94bf875eb8eab8a667ba04c7583874 Mon Sep 17 00:00:00 2001 From: Markus Rechberger Date: Mon, 19 Feb 2007 14:37:47 +0200 Subject: KVM: Use page_private()/set_page_private() apis Besides using an established api, this allows using kvm in older kernels. Signed-off-by: Markus Rechberger Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'drivers/kvm/mmu.c') diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index be793770f31b..a1a93368f314 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -298,18 +298,18 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte) if (!is_rmap_pte(*spte)) return; page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); - if (!page->private) { + if (!page_private(page)) { rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte); - page->private = (unsigned long)spte; - } else if (!(page->private & 1)) { + set_page_private(page,(unsigned long)spte); + } else if (!(page_private(page) & 1)) { rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte); desc = mmu_alloc_rmap_desc(vcpu); - desc->shadow_ptes[0] = (u64 *)page->private; + desc->shadow_ptes[0] = (u64 *)page_private(page); desc->shadow_ptes[1] = spte; - page->private = (unsigned long)desc | 1; + set_page_private(page,(unsigned long)desc | 1); } else { rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte); - desc = (struct kvm_rmap_desc *)(page->private & ~1ul); + desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul); while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) desc = desc->more; if (desc->shadow_ptes[RMAP_EXT-1]) { @@ -337,12 +337,12 @@ static void rmap_desc_remove_entry(struct kvm_vcpu *vcpu, if (j != 0) return; if (!prev_desc && !desc->more) - page->private = (unsigned long)desc->shadow_ptes[0]; + set_page_private(page,(unsigned long)desc->shadow_ptes[0]); else if (prev_desc) prev_desc->more = desc->more; else - page->private = (unsigned long)desc->more | 1; + set_page_private(page,(unsigned long)desc->more | 1); mmu_free_rmap_desc(vcpu, desc); } @@ -356,20 +356,20 @@ static void rmap_remove(struct kvm_vcpu *vcpu, u64 *spte) if (!is_rmap_pte(*spte)) return; page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); - if (!page->private) { + if (!page_private(page)) { printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); BUG(); - } else if (!(page->private & 1)) { + } else if (!(page_private(page) & 1)) { rmap_printk("rmap_remove: %p %llx 1->0\n", spte, *spte); - if ((u64 *)page->private != spte) { + if ((u64 *)page_private(page) != spte) { printk(KERN_ERR "rmap_remove: %p %llx 1->BUG\n", spte, *spte); BUG(); } - page->private = 0; + set_page_private(page,0); } else { rmap_printk("rmap_remove: %p %llx many->many\n", spte, *spte); - desc = (struct kvm_rmap_desc *)(page->private & ~1ul); + desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul); prev_desc = NULL; while (desc) { for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i) @@ -398,11 +398,11 @@ static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) BUG_ON(!slot); page = gfn_to_page(slot, gfn); - while (page->private) { - if (!(page->private & 1)) - spte = (u64 *)page->private; + while (page_private(page)) { + if (!(page_private(page) & 1)) + spte = (u64 *)page_private(page); else { - desc = (struct kvm_rmap_desc *)(page->private & ~1ul); + desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul); spte = desc->shadow_ptes[0]; } BUG_ON(!spte); @@ -1218,7 +1218,7 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu) INIT_LIST_HEAD(&page_header->link); if ((page = alloc_page(GFP_KERNEL)) == NULL) goto error_1; - page->private = (unsigned long)page_header; + set_page_private(page, (unsigned long)page_header); page_header->page_hpa = (hpa_t)page_to_pfn(page) << PAGE_SHIFT; memset(__va(page_header->page_hpa), 0, PAGE_SIZE); list_add(&page_header->link, &vcpu->free_pages); -- cgit v1.2.3 From ac1b714e78c8f0b252f8d8872e6ce6f898a123b3 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 8 Mar 2007 17:13:32 +0200 Subject: KVM: MMU: Fix guest writes to nonpae pde KVM shadow page tables are always in pae mode, regardless of the guest setting. This means that a guest pde (mapping 4MB of memory) is mapped to two shadow pdes (mapping 2MB each). When the guest writes to a pte or pde, we intercept the write and emulate it. We also remove any shadowed mappings corresponding to the write. Since the mmu did not account for the doubling in the number of pdes, it removed the wrong entry, resulting in a mismatch between shadow page tables and guest page tables, followed shortly by guest memory corruption. This patch fixes the problem by detecting the special case of writing to a non-pae pde and adjusting the address and number of shadow pdes zapped accordingly. Acked-by: Ingo Molnar Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 46 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 12 deletions(-) (limited to 'drivers/kvm/mmu.c') diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index a1a93368f314..2cb48937be44 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -1093,22 +1093,40 @@ out: return r; } +static void mmu_pre_write_zap_pte(struct kvm_vcpu *vcpu, + struct kvm_mmu_page *page, + u64 *spte) +{ + u64 pte; + struct kvm_mmu_page *child; + + pte = *spte; + if (is_present_pte(pte)) { + if (page->role.level == PT_PAGE_TABLE_LEVEL) + rmap_remove(vcpu, spte); + else { + child = page_header(pte & PT64_BASE_ADDR_MASK); + mmu_page_remove_parent_pte(vcpu, child, spte); + } + } + *spte = 0; +} + void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes) { gfn_t gfn = gpa >> PAGE_SHIFT; struct kvm_mmu_page *page; - struct kvm_mmu_page *child; struct hlist_node *node, *n; struct hlist_head *bucket; unsigned index; u64 *spte; - u64 pte; unsigned offset = offset_in_page(gpa); unsigned pte_size; unsigned page_offset; unsigned misaligned; int level; int flooded = 0; + int npte; pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes); if (gfn == vcpu->last_pt_write_gfn) { @@ -1144,22 +1162,26 @@ void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes) } page_offset = offset; level = page->role.level; + npte = 1; if (page->role.glevels == PT32_ROOT_LEVEL) { - page_offset <<= 1; /* 32->64 */ + page_offset <<= 1; /* 32->64 */ + /* + * A 32-bit pde maps 4MB while the shadow pdes map + * only 2MB. So we need to double the offset again + * and zap two pdes instead of one. + */ + if (level == PT32_ROOT_LEVEL) { + page_offset <<= 1; + npte = 2; + } page_offset &= ~PAGE_MASK; } spte = __va(page->page_hpa); spte += page_offset / sizeof(*spte); - pte = *spte; - if (is_present_pte(pte)) { - if (level == PT_PAGE_TABLE_LEVEL) - rmap_remove(vcpu, spte); - else { - child = page_header(pte & PT64_BASE_ADDR_MASK); - mmu_page_remove_parent_pte(vcpu, child, spte); - } + while (npte--) { + mmu_pre_write_zap_pte(vcpu, page, spte); + ++spte; } - *spte = 0; } } -- cgit v1.2.3 From 27aba76615eeb36af84118e8ea6d35ffa51fd1e3 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Fri, 9 Mar 2007 13:04:31 +0200 Subject: KVM: MMU: Fix host memory corruption on i386 with >= 4GB ram PAGE_MASK is an unsigned long, so using it to mask physical addresses on i386 (which are 64-bit wide) leads to truncation. This can result in page->private of unrelated memory pages being modified, with disasterous results. Fix by not using PAGE_MASK for physical addresses; instead calculate the correct value directly from PAGE_SIZE. Also fix a similar BUG_ON(). Acked-by: Ingo Molnar Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/kvm/mmu.c') diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 2cb48937be44..e85b4c7c36f7 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -131,7 +131,7 @@ static int dbg = 1; (((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1)) -#define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & PAGE_MASK) +#define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1)) #define PT64_DIR_BASE_ADDR_MASK \ (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + PT64_LEVEL_BITS)) - 1)) @@ -406,8 +406,8 @@ static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) spte = desc->shadow_ptes[0]; } BUG_ON(!spte); - BUG_ON((*spte & PT64_BASE_ADDR_MASK) != - page_to_pfn(page) << PAGE_SHIFT); + BUG_ON((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT + != page_to_pfn(page)); BUG_ON(!(*spte & PT_PRESENT_MASK)); BUG_ON(!(*spte & PT_WRITABLE_MASK)); rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); -- cgit v1.2.3 From 6b8d0f9b180cb93513bb65f705b299370f0357a1 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 18 Apr 2007 11:18:18 +0300 Subject: KVM: Fix off-by-one when writing to a nonpae guest pde Nonpae guest pdes are shadowed by two pae ptes, so we double the offset twice: once to account for the pte size difference, and once because we need to shadow pdes for a single guest pde. But when writing to the upper guest pde we also need to truncate the lower bits, otherwise the multiply shifts these bits into the pde index and causes an access to the wrong shadow pde. If we're at the end of the page (accessing the very last guest pde) we can even overflow into the next host page and oops. Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/kvm/mmu.c') diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index e85b4c7c36f7..cab26f301eab 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -1171,6 +1171,7 @@ void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes) * and zap two pdes instead of one. */ if (level == PT32_ROOT_LEVEL) { + page_offset &= ~7; /* kill rounding error */ page_offset <<= 1; npte = 2; } -- cgit v1.2.3