summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2013-02-25 15:44:56 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2013-02-25 15:44:56 +1100
commitb8857e76b29556323c13ce029c5b5963084e1870 (patch)
tree0bf3c0569560883d7f8afb9bd5d89ad9b8fabda2 /arch/x86
parent23d3a6aee3cab5bfb7430bd43eb75ec12d7a99f0 (diff)
parentc84bcd63ab87c8f176e11d725ea73c5df0f2c716 (diff)
Merge branch 'akpm/master'
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/ia32/ia32_aout.c2
-rw-r--r--arch/x86/include/asm/e820.h2
-rw-r--r--arch/x86/kernel/e820.c72
-rw-r--r--arch/x86/kernel/kprobes/core.c8
-rw-r--r--arch/x86/kvm/mmu.c26
-rw-r--r--arch/x86/mm/fault.c8
-rw-r--r--arch/x86/mm/mmap.c2
-rw-r--r--arch/x86/mm/numa.c3
-rw-r--r--arch/x86/mm/pageattr.c50
-rw-r--r--arch/x86/platform/efi/efi.c15
11 files changed, 141 insertions, 48 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6a9383370311..a4f24f5b1218 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -112,6 +112,7 @@ config X86
select GENERIC_STRNLEN_USER
select HAVE_CONTEXT_TRACKING if X86_64
select HAVE_IRQ_TIME_ACCOUNTING
+ select HAVE_VIRT_TO_BUS
select MODULES_USE_ELF_REL if X86_32
select MODULES_USE_ELF_RELA if X86_64
select CLONE_BACKWARDS if X86_32
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 03abf9b70011..14fb6f9a7363 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -309,8 +309,6 @@ static int load_aout_binary(struct linux_binprm *bprm)
(current->mm->start_data = N_DATADDR(ex));
current->mm->brk = ex.a_bss +
(current->mm->start_brk = N_BSSADDR(ex));
- current->mm->free_area_cache = TASK_UNMAPPED_BASE;
- current->mm->cached_hole_size = 0;
retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT);
if (retval < 0) {
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index cccd07fa5e3a..b8e9224f0b45 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -17,6 +17,8 @@ extern unsigned long pci_mem_start;
extern int e820_any_mapped(u64 start, u64 end, unsigned type);
extern int e820_all_mapped(u64 start, u64 end, unsigned type);
extern void e820_add_region(u64 start, u64 size, int type);
+extern void e820_add_limit_region(u64 start, u64 size, int type);
+extern void e820_adjust_region(u64 *start, u64 *size);
extern void e820_print_map(char *who);
extern int
sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, u32 *pnr_map);
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index d32abeabbda5..0d5bb689649a 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -47,6 +47,7 @@ unsigned long pci_mem_start = 0xaeedbabe;
#ifdef CONFIG_PCI
EXPORT_SYMBOL(pci_mem_start);
#endif
+static u64 mem_limit = ~0ULL;
/*
* This function checks if any part of the range <start,end> is mapped
@@ -108,7 +109,7 @@ int __init e820_all_mapped(u64 start, u64 end, unsigned type)
* Add a memory region to the kernel e820 map.
*/
static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
- int type)
+ int type, bool limited)
{
int x = e820x->nr_map;
@@ -119,6 +120,22 @@ static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
return;
}
+ if (limited) {
+ if (start >= mem_limit) {
+ printk(KERN_ERR "e820: ignoring [mem %#010llx-%#010llx]\n",
+ (unsigned long long)start,
+ (unsigned long long)(start + size - 1));
+ return;
+ }
+
+ if (mem_limit - start < size) {
+ printk(KERN_ERR "e820: ignoring [mem %#010llx-%#010llx]\n",
+ (unsigned long long)mem_limit,
+ (unsigned long long)(start + size - 1));
+ size = mem_limit - start;
+ }
+ }
+
e820x->map[x].addr = start;
e820x->map[x].size = size;
e820x->map[x].type = type;
@@ -127,7 +144,37 @@ static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
void __init e820_add_region(u64 start, u64 size, int type)
{
- __e820_add_region(&e820, start, size, type);
+ __e820_add_region(&e820, start, size, type, false);
+}
+
+/*
+ * do_add_efi_memmap() calls this function().
+ *
+ * Note: BOOT_SERVICES_{CODE,DATA} regions on some efi machines are marked
+ * as E820_RAM, and they are needed to be mapped. Please use e820_add_region()
+ * to add BOOT_SERVICES_{CODE,DATA} regions.
+ */
+void __init e820_add_limit_region(u64 start, u64 size, int type)
+{
+ /*
+ * efi_init() is called after finish_e820_parsing(), so we should
+ * check whether [start, start + size) contains address above
+ * mem_limit if the type is E820_RAM.
+ */
+ __e820_add_region(&e820, start, size, type, type == E820_RAM);
+}
+
+void __init e820_adjust_region(u64 *start, u64 *size)
+{
+ if (*start >= mem_limit) {
+ *size = 0;
+ return;
+ }
+
+ if (mem_limit - *start < *size)
+ *size = mem_limit - *start;
+
+ return;
}
static void __init e820_print_type(u32 type)
@@ -455,8 +502,9 @@ static u64 __init __e820_update_range(struct e820map *e820x, u64 start,
/* new range is totally covered? */
if (ei->addr < start && ei_end > end) {
- __e820_add_region(e820x, start, size, new_type);
- __e820_add_region(e820x, end, ei_end - end, ei->type);
+ __e820_add_region(e820x, start, size, new_type, false);
+ __e820_add_region(e820x, end, ei_end - end, ei->type,
+ false);
ei->size = start - ei->addr;
real_updated_size += size;
continue;
@@ -469,7 +517,7 @@ static u64 __init __e820_update_range(struct e820map *e820x, u64 start,
continue;
__e820_add_region(e820x, final_start, final_end - final_start,
- new_type);
+ new_type, false);
real_updated_size += final_end - final_start;
@@ -809,7 +857,7 @@ static int userdef __initdata;
/* "mem=nopentium" disables the 4MB page tables. */
static int __init parse_memopt(char *p)
{
- u64 mem_size;
+ char *oldp;
if (!p)
return -EINVAL;
@@ -825,11 +873,11 @@ static int __init parse_memopt(char *p)
}
userdef = 1;
- mem_size = memparse(p, &p);
+ oldp = p;
+ mem_limit = memparse(p, &p);
/* don't remove all of memory when handling "mem={invalid}" param */
- if (mem_size == 0)
+ if (mem_limit == 0 || p == oldp)
return -EINVAL;
- e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
return 0;
}
@@ -895,6 +943,12 @@ early_param("memmap", parse_memmap_opt);
void __init finish_e820_parsing(void)
{
+ if (mem_limit != ~0ULL) {
+ userdef = 1;
+ e820_remove_range(mem_limit, ULLONG_MAX - mem_limit,
+ E820_RAM, 1);
+ }
+
if (userdef) {
u32 nr = e820.nr_map;
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index e124554598ee..3f06e6149981 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -652,7 +652,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
{
struct kretprobe_instance *ri = NULL;
struct hlist_head *head, empty_rp;
- struct hlist_node *node, *tmp;
+ struct hlist_node *tmp;
unsigned long flags, orig_ret_address = 0;
unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
kprobe_opcode_t *correct_ret_addr = NULL;
@@ -682,7 +682,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
* will be the real return address, and all the rest will
* point to kretprobe_trampoline.
*/
- hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+ hlist_for_each_entry_safe(ri, tmp, head, hlist) {
if (ri->task != current)
/* another task is sharing our hash bucket */
continue;
@@ -701,7 +701,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
kretprobe_assert(ri, orig_ret_address, trampoline_address);
correct_ret_addr = ri->ret_addr;
- hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+ hlist_for_each_entry_safe(ri, tmp, head, hlist) {
if (ri->task != current)
/* another task is sharing our hash bucket */
continue;
@@ -728,7 +728,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
kretprobe_hash_unlock(current, &flags);
- hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+ hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
hlist_del(&ri->hlist);
kfree(ri);
}
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 4ed3edbe06bd..956ca358108a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1644,13 +1644,13 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
static void kvm_mmu_commit_zap_page(struct kvm *kvm,
struct list_head *invalid_list);
-#define for_each_gfn_sp(kvm, sp, gfn, pos) \
- hlist_for_each_entry(sp, pos, \
+#define for_each_gfn_sp(kvm, sp, gfn) \
+ hlist_for_each_entry(sp, \
&(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \
if ((sp)->gfn != (gfn)) {} else
-#define for_each_gfn_indirect_valid_sp(kvm, sp, gfn, pos) \
- hlist_for_each_entry(sp, pos, \
+#define for_each_gfn_indirect_valid_sp(kvm, sp, gfn) \
+ hlist_for_each_entry(sp, \
&(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \
if ((sp)->gfn != (gfn) || (sp)->role.direct || \
(sp)->role.invalid) {} else
@@ -1706,11 +1706,10 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn)
{
struct kvm_mmu_page *s;
- struct hlist_node *node;
LIST_HEAD(invalid_list);
bool flush = false;
- for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) {
+ for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) {
if (!s->unsync)
continue;
@@ -1848,7 +1847,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
union kvm_mmu_page_role role;
unsigned quadrant;
struct kvm_mmu_page *sp;
- struct hlist_node *node;
bool need_sync = false;
role = vcpu->arch.mmu.base_role;
@@ -1863,7 +1861,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
role.quadrant = quadrant;
}
- for_each_gfn_sp(vcpu->kvm, sp, gfn, node) {
+ for_each_gfn_sp(vcpu->kvm, sp, gfn) {
if (!need_sync && sp->unsync)
need_sync = true;
@@ -2151,14 +2149,13 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages)
int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
{
struct kvm_mmu_page *sp;
- struct hlist_node *node;
LIST_HEAD(invalid_list);
int r;
pgprintk("%s: looking for gfn %llx\n", __func__, gfn);
r = 0;
spin_lock(&kvm->mmu_lock);
- for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) {
+ for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
sp->role.word);
r = 1;
@@ -2288,9 +2285,8 @@ static void __kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
static void kvm_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn)
{
struct kvm_mmu_page *s;
- struct hlist_node *node;
- for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) {
+ for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) {
if (s->unsync)
continue;
WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL);
@@ -2302,10 +2298,9 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
bool can_unsync)
{
struct kvm_mmu_page *s;
- struct hlist_node *node;
bool need_unsync = false;
- for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) {
+ for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) {
if (!can_unsync)
return 1;
@@ -3933,7 +3928,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
gfn_t gfn = gpa >> PAGE_SHIFT;
union kvm_mmu_page_role mask = { .word = 0 };
struct kvm_mmu_page *sp;
- struct hlist_node *node;
LIST_HEAD(invalid_list);
u64 entry, gentry, *spte;
int npte;
@@ -3964,7 +3958,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;
- for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) {
+ for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) {
if (detect_write_misaligned(sp, gpa, bytes) ||
detect_write_flooding(sp)) {
zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index fb674fd3fc22..2b97525246d4 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -939,14 +939,8 @@ spurious_fault(unsigned long error_code, unsigned long address)
if (pmd_large(*pmd))
return spurious_fault_check(error_code, (pte_t *) pmd);
- /*
- * Note: don't use pte_present() here, since it returns true
- * if the _PAGE_PROTNONE bit is set. However, this aliases the
- * _PAGE_GLOBAL bit, which for kernel pages give false positives
- * when CONFIG_DEBUG_PAGEALLOC is used.
- */
pte = pte_offset_kernel(pmd, address);
- if (!(pte_flags(*pte) & _PAGE_PRESENT))
+ if (!pte_present(*pte))
return 0;
ret = spurious_fault_check(error_code, pte);
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 845df6835f9f..62c29a5bfe26 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -115,10 +115,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
if (mmap_is_legacy()) {
mm->mmap_base = mmap_legacy_base();
mm->get_unmapped_area = arch_get_unmapped_area;
- mm->unmap_area = arch_unmap_area;
} else {
mm->mmap_base = mmap_base();
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
- mm->unmap_area = arch_unmap_area_topdown;
}
}
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index dfd30259eb89..ff3633c794c6 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -97,8 +97,7 @@ void numa_set_node(int cpu, int node)
#endif
per_cpu(x86_cpu_to_node_map, cpu) = node;
- if (node != NUMA_NO_NODE)
- set_cpu_numa_node(cpu, node);
+ set_cpu_numa_node(cpu, node);
}
void numa_clear_node(int cpu)
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index ca1f1c2bb7be..091934e1d0d9 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -473,6 +473,19 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
pgprot_val(req_prot) |= pgprot_val(cpa->mask_set);
/*
+ * Set the PSE and GLOBAL flags only if the PRESENT flag is
+ * set otherwise pmd_present/pmd_huge will return true even on
+ * a non present pmd. The canon_pgprot will clear _PAGE_GLOBAL
+ * for the ancient hardware that doesn't support it.
+ */
+ if (pgprot_val(new_prot) & _PAGE_PRESENT)
+ pgprot_val(new_prot) |= _PAGE_PSE | _PAGE_GLOBAL;
+ else
+ pgprot_val(new_prot) &= ~(_PAGE_PSE | _PAGE_GLOBAL);
+
+ new_prot = canon_pgprot(new_prot);
+
+ /*
* old_pte points to the large page base address. So we need
* to add the offset of the virtual address:
*/
@@ -517,7 +530,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
* The address is aligned and the number of pages
* covers the full page.
*/
- new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot));
+ new_pte = pfn_pte(pte_pfn(old_pte), new_prot);
__set_pmd_pte(kpte, address, new_pte);
cpa->flags |= CPA_FLUSHTLB;
do_split = 0;
@@ -561,16 +574,35 @@ int __split_large_page(pte_t *kpte, unsigned long address, pte_t *pbase)
#ifdef CONFIG_X86_64
if (level == PG_LEVEL_1G) {
pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT;
- pgprot_val(ref_prot) |= _PAGE_PSE;
+ /*
+ * Set the PSE flags only if the PRESENT flag is set
+ * otherwise pmd_present/pmd_huge will return true
+ * even on a non present pmd.
+ */
+ if (pgprot_val(ref_prot) & _PAGE_PRESENT)
+ pgprot_val(ref_prot) |= _PAGE_PSE;
+ else
+ pgprot_val(ref_prot) &= ~_PAGE_PSE;
}
#endif
/*
+ * Set the GLOBAL flags only if the PRESENT flag is set
+ * otherwise pmd/pte_present will return true even on a non
+ * present pmd/pte. The canon_pgprot will clear _PAGE_GLOBAL
+ * for the ancient hardware that doesn't support it.
+ */
+ if (pgprot_val(ref_prot) & _PAGE_PRESENT)
+ pgprot_val(ref_prot) |= _PAGE_GLOBAL;
+ else
+ pgprot_val(ref_prot) &= ~_PAGE_GLOBAL;
+
+ /*
* Get the target pfn from the original entry:
*/
pfn = pte_pfn(*kpte);
for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc)
- set_pte(&pbase[i], pfn_pte(pfn, ref_prot));
+ set_pte(&pbase[i], pfn_pte(pfn, canon_pgprot(ref_prot)));
if (pfn_range_is_mapped(PFN_DOWN(__pa(address)),
PFN_DOWN(__pa(address)) + 1))
@@ -685,6 +717,18 @@ repeat:
new_prot = static_protections(new_prot, address, pfn);
/*
+ * Set the GLOBAL flags only if the PRESENT flag is
+ * set otherwise pte_present will return true even on
+ * a non present pte. The canon_pgprot will clear
+ * _PAGE_GLOBAL for the ancient hardware that doesn't
+ * support it.
+ */
+ if (pgprot_val(new_prot) & _PAGE_PRESENT)
+ pgprot_val(new_prot) |= _PAGE_GLOBAL;
+ else
+ pgprot_val(new_prot) &= ~_PAGE_GLOBAL;
+
+ /*
* We need to keep the pfn from the existing PTE,
* after all we're only going to change it's attributes
* not the memory it points to
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 70b2a3a305d6..34464b6c0077 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -320,10 +320,17 @@ static void __init do_add_efi_memmap(void)
int e820_type;
switch (md->type) {
- case EFI_LOADER_CODE:
- case EFI_LOADER_DATA:
case EFI_BOOT_SERVICES_CODE:
case EFI_BOOT_SERVICES_DATA:
+ /* EFI_BOOT_SERVICES_{CODE,DATA} needs to be mapped */
+ if (md->attribute & EFI_MEMORY_WB)
+ e820_type = E820_RAM;
+ else
+ e820_type = E820_RESERVED;
+ e820_add_region(start, size, e820_type);
+ continue;
+ case EFI_LOADER_CODE:
+ case EFI_LOADER_DATA:
case EFI_CONVENTIONAL_MEMORY:
if (md->attribute & EFI_MEMORY_WB)
e820_type = E820_RAM;
@@ -348,7 +355,7 @@ static void __init do_add_efi_memmap(void)
e820_type = E820_RESERVED;
break;
}
- e820_add_region(start, size, e820_type);
+ e820_add_limit_region(start, size, e820_type);
}
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
}
@@ -455,6 +462,8 @@ void __init efi_free_boot_services(void)
md->type != EFI_BOOT_SERVICES_DATA)
continue;
+ e820_adjust_region(&start, &size);
+
/* Could not reserve boot area */
if (!size)
continue;