From fd8fc476e5cb07d78e134ed6b64a2eae906b55da Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 16 Apr 2009 10:21:34 +1000 Subject: lguest:document-PAE-requirement Impact: documentation update Robert noted that we don't actually document that lguest is 32-bit only, nor that PAE must be off (CONFIG_PAE is now prompted for if HIGHMEM is set to "off). Signed-off-by: Rusty Russell Cc: lguest@ozlabs.org Cc: "Robert P. J. Day" --- Documentation/lguest/lguest.txt | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Documentation/lguest/lguest.txt b/Documentation/lguest/lguest.txt index 29510dc51510..28c747362f95 100644 --- a/Documentation/lguest/lguest.txt +++ b/Documentation/lguest/lguest.txt @@ -3,11 +3,11 @@ /, /` - or, A Young Coder's Illustrated Hypervisor \\"--\\ http://lguest.ozlabs.org -Lguest is designed to be a minimal hypervisor for the Linux kernel, for -Linux developers and users to experiment with virtualization with the -minimum of complexity. Nonetheless, it should have sufficient -features to make it useful for specific tasks, and, of course, you are -encouraged to fork and enhance it (see drivers/lguest/README). +Lguest is designed to be a minimal 32-bit x86 hypervisor for the Linux kernel, +for Linux developers and users to experiment with virtualization with the +minimum of complexity. Nonetheless, it should have sufficient features to +make it useful for specific tasks, and, of course, you are encouraged to fork +and enhance it (see drivers/lguest/README). Features: @@ -37,6 +37,7 @@ Running Lguest: "Paravirtualized guest support" = Y "Lguest guest support" = Y "High Memory Support" = off/4GB + "PAE (Physical Address Extension) Support" = N "Alignment value to which kernel should be aligned" = 0x100000 (CONFIG_PARAVIRT=y, CONFIG_LGUEST_GUEST=y, CONFIG_HIGHMEM64G=n and CONFIG_PHYSICAL_ALIGN=0x100000) -- cgit v1.2.3 From ba892b6f6d249ed3cf6450e55b2c9bb63c374849 Mon Sep 17 00:00:00 2001 From: Matias Zabaljauregui Date: Wed, 8 Apr 2009 17:58:39 -0300 Subject: lguest: fix KVM-style hypercalls with vmlinux images Impact: fix guest crash 'lguest: unhandled trap 6 at 0x418726 (0x0)' The Launcher mmaps the kernel image. The Guest executes and immediately faults in the first text page (read-only). Then it hits a hypercall, and we rewrite that hypercall, causing a copy-on-write. But the Guest pagetables still refer to the old page: we fault again, but as Host we see the hypercall already rewritten, and pass the fault back to the Guest. The Guest hasn't set up an IDT yet, so we kill it. This doesn't happen with bzImages: they unpack themselves and so the text pages are already read-write. Signed-off-by: Rusty Russell Tested-by: Patrick McHardy --- drivers/lguest/x86/core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index a6b717644be0..d6d7ac0982ab 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -324,6 +324,11 @@ static void rewrite_hypercall(struct lg_cpu *cpu) u8 insn[3] = {0xcd, 0x1f, 0x90}; __lgwrite(cpu, guest_pa(cpu, cpu->regs->eip), insn, sizeof(insn)); + /* The above write might have caused a copy of that page to be made + * (if it was read-only). We need to make sure the Guest has + * up-to-date pagetables. As this doesn't happen often, we can just + * drop them all. */ + guest_pagetable_clear_all(cpu); } static bool is_hypercall(struct lg_cpu *cpu) -- cgit v1.2.3 From 5929431270a860a79188a2154ded2c0836728b43 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 16 Apr 2009 10:21:34 +1000 Subject: lguest:LOAD_GDT_ENTRY Impact: fix guest crash 'lguest: bad read address 0x4800000 len 256' The new per-cpu allocator ends up handing a non-linear address to write_gdt_entry. We do __pa() on it, and hand it to the host, which kills us. I've long wanted to make the hypercall "LOAD_GDT_ENTRY" to match the IDT code, but had no pressing reason until now. Signed-off-by: Rusty Russell Cc: lguest@ozlabs.org --- arch/x86/include/asm/lguest_hcall.h | 2 +- arch/x86/lguest/boot.c | 16 +++++++++------- drivers/lguest/lg.h | 3 ++- drivers/lguest/segments.c | 13 +++++++------ drivers/lguest/x86/core.c | 4 ++-- 5 files changed, 21 insertions(+), 17 deletions(-) diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h index 0f4ee7148afe..faae1996487b 100644 --- a/arch/x86/include/asm/lguest_hcall.h +++ b/arch/x86/include/asm/lguest_hcall.h @@ -5,7 +5,6 @@ #define LHCALL_FLUSH_ASYNC 0 #define LHCALL_LGUEST_INIT 1 #define LHCALL_SHUTDOWN 2 -#define LHCALL_LOAD_GDT 3 #define LHCALL_NEW_PGTABLE 4 #define LHCALL_FLUSH_TLB 5 #define LHCALL_LOAD_IDT_ENTRY 6 @@ -17,6 +16,7 @@ #define LHCALL_SET_PMD 15 #define LHCALL_LOAD_TLS 16 #define LHCALL_NOTIFY 17 +#define LHCALL_LOAD_GDT_ENTRY 18 #define LGUEST_TRAP_ENTRY 0x1F diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index e94a11e42f98..a2085368a3dc 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -273,15 +273,15 @@ static void lguest_load_idt(const struct desc_ptr *desc) * controls the entire thing and the Guest asks it to make changes using the * LOAD_GDT hypercall. * - * This is the opposite of the IDT code where we have a LOAD_IDT_ENTRY - * hypercall and use that repeatedly to load a new IDT. I don't think it - * really matters, but wouldn't it be nice if they were the same? Wouldn't - * it be even better if you were the one to send the patch to fix it? + * This is the exactly like the IDT code. */ static void lguest_load_gdt(const struct desc_ptr *desc) { - BUG_ON((desc->size + 1) / 8 != GDT_ENTRIES); - kvm_hypercall2(LHCALL_LOAD_GDT, __pa(desc->address), GDT_ENTRIES); + unsigned int i; + struct desc_struct *gdt = (void *)desc->address; + + for (i = 0; i < (desc->size+1)/8; i++) + kvm_hypercall3(LHCALL_LOAD_GDT_ENTRY, i, gdt[i].a, gdt[i].b); } /* For a single GDT entry which changes, we do the lazy thing: alter our GDT, @@ -291,7 +291,9 @@ static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum, const void *desc, int type) { native_write_gdt_entry(dt, entrynum, desc, type); - kvm_hypercall2(LHCALL_LOAD_GDT, __pa(dt), GDT_ENTRIES); + /* Tell Host about this new entry. */ + kvm_hypercall3(LHCALL_LOAD_GDT_ENTRY, entrynum, + dt[entrynum].a, dt[entrynum].b); } /* OK, I lied. There are three "thread local storage" GDT entries which change diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index ac8a4a3741b8..af92a176697f 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -158,7 +158,8 @@ void free_interrupts(void); /* segments.c: */ void setup_default_gdt_entries(struct lguest_ro_state *state); void setup_guest_gdt(struct lg_cpu *cpu); -void load_guest_gdt(struct lg_cpu *cpu, unsigned long table, u32 num); +void load_guest_gdt_entry(struct lg_cpu *cpu, unsigned int i, + u32 low, u32 hi); void guest_load_tls(struct lg_cpu *cpu, unsigned long tls_array); void copy_gdt(const struct lg_cpu *cpu, struct desc_struct *gdt); void copy_gdt_tls(const struct lg_cpu *cpu, struct desc_struct *gdt); diff --git a/drivers/lguest/segments.c b/drivers/lguest/segments.c index 4f15439b7f12..7ede64ffeef9 100644 --- a/drivers/lguest/segments.c +++ b/drivers/lguest/segments.c @@ -144,18 +144,19 @@ void copy_gdt(const struct lg_cpu *cpu, struct desc_struct *gdt) gdt[i] = cpu->arch.gdt[i]; } -/*H:620 This is where the Guest asks us to load a new GDT (LHCALL_LOAD_GDT). - * We copy it from the Guest and tweak the entries. */ -void load_guest_gdt(struct lg_cpu *cpu, unsigned long table, u32 num) +/*H:620 This is where the Guest asks us to load a new GDT entry + * (LHCALL_LOAD_GDT_ENTRY). We tweak the entry and copy it in. */ +void load_guest_gdt_entry(struct lg_cpu *cpu, u32 num, u32 lo, u32 hi) { /* We assume the Guest has the same number of GDT entries as the * Host, otherwise we'd have to dynamically allocate the Guest GDT. */ if (num > ARRAY_SIZE(cpu->arch.gdt)) kill_guest(cpu, "too many gdt entries %i", num); - /* We read the whole thing in, then fix it up. */ - __lgread(cpu, cpu->arch.gdt, table, num * sizeof(cpu->arch.gdt[0])); - fixup_gdt_table(cpu, 0, ARRAY_SIZE(cpu->arch.gdt)); + /* Set it up, then fix it. */ + cpu->arch.gdt[num].a = lo; + cpu->arch.gdt[num].b = hi; + fixup_gdt_table(cpu, num, num+1); /* Mark that the GDT changed so the core knows it has to copy it again, * even if the Guest is run on the same CPU. */ cpu->changed |= CHANGED_GDT; diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index d6d7ac0982ab..1a83910f674f 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -568,8 +568,8 @@ void __exit lguest_arch_host_fini(void) int lguest_arch_do_hcall(struct lg_cpu *cpu, struct hcall_args *args) { switch (args->arg0) { - case LHCALL_LOAD_GDT: - load_guest_gdt(cpu, args->arg1, args->arg2); + case LHCALL_LOAD_GDT_ENTRY: + load_guest_gdt_entry(cpu, args->arg1, args->arg2, args->arg3); break; case LHCALL_LOAD_IDT_ENTRY: load_guest_idt_entry(cpu, args->arg1, args->arg2, args->arg3); -- cgit v1.2.3 From 94c8925fb0adb57db12f508e5f7a831d8393b084 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 16 Apr 2009 10:21:34 +1000 Subject: module:cyber2000fb-stop-unload-for-CONFIG_ARCH_SHARK Russell explains the __module_get(): > cyber2000fb.c does it in its module initialization function > to prevent the module (when built for Shark) from being unloaded. It > does this because it's from the days of 2.2 kernels and no one bothered > writing the module unload support for Shark. Since 2.4, the correct answer has been to not define an unload fn. Cc: Russell King Cc: alex@shark-linux.de Signed-off-by: Rusty Russell --- drivers/video/cyber2000fb.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/video/cyber2000fb.c b/drivers/video/cyber2000fb.c index 83c5cefc266c..da7c01b39be2 100644 --- a/drivers/video/cyber2000fb.c +++ b/drivers/video/cyber2000fb.c @@ -1736,10 +1736,8 @@ static int __init cyber2000fb_init(void) #ifdef CONFIG_ARCH_SHARK err = cyberpro_vl_probe(); - if (!err) { + if (!err) ret = 0; - __module_get(THIS_MODULE); - } #endif #ifdef CONFIG_PCI err = pci_register_driver(&cyberpro_driver); @@ -1749,14 +1747,15 @@ static int __init cyber2000fb_init(void) return ret ? err : 0; } +module_init(cyber2000fb_init); +#ifndef CONFIG_ARCH_SHARK static void __exit cyberpro_exit(void) { pci_unregister_driver(&cyberpro_driver); } - -module_init(cyber2000fb_init); module_exit(cyberpro_exit); +#endif MODULE_AUTHOR("Russell King"); MODULE_DESCRIPTION("CyberPro 2000, 2010 and 5000 framebuffer driver"); -- cgit v1.2.3 From 593a226ac4d4f875216cf9f57fb51826cecd20c7 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 16 Apr 2009 10:21:35 +1000 Subject: module:invbool-takes-a-bool It takes an 'int' for historical reasons, and there are only two users: simply switch it over to bool. The other user (uvesafb.c) will get a (harmless-on-x86) warning until the next patch is applied. Cc: Brad Douglas Cc: Michal Januszewski Signed-off-by: Rusty Russell --- drivers/video/aty/aty128fb.c | 2 +- include/linux/moduleparam.h | 2 +- kernel/params.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/video/aty/aty128fb.c b/drivers/video/aty/aty128fb.c index 35e8eb02b9e9..e4e4d433b007 100644 --- a/drivers/video/aty/aty128fb.c +++ b/drivers/video/aty/aty128fb.c @@ -354,7 +354,7 @@ static int default_crt_on __devinitdata = 0; static int default_lcd_on __devinitdata = 1; #ifdef CONFIG_MTRR -static int mtrr = 1; +static bool mtrr = true; #endif #ifdef CONFIG_PMAC_BACKLIGHT diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index a4f0b931846c..9bbca8e8c19f 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -192,7 +192,7 @@ extern int param_get_bool(char *buffer, struct kernel_param *kp); extern int param_set_invbool(const char *val, struct kernel_param *kp); extern int param_get_invbool(char *buffer, struct kernel_param *kp); -#define param_check_invbool(name, p) __param_check(name, p, int) +#define param_check_invbool(name, p) __param_check(name, p, bool) /* Comma-separated array: *nump is set to number they actually specified. */ #define module_param_array_named(name, array, type, nump, perm) \ diff --git a/kernel/params.c b/kernel/params.c index de273ec85bd2..023abbf5f89f 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -272,13 +272,13 @@ int param_set_invbool(const char *val, struct kernel_param *kp) dummy.arg = &boolval; ret = param_set_bool(val, &dummy); if (ret == 0) - *(int *)kp->arg = !boolval; + *(bool *)kp->arg = !boolval; return ret; } int param_get_invbool(char *buffer, struct kernel_param *kp) { - return sprintf(buffer, "%c", (*(int *)kp->arg) ? 'N' : 'Y'); + return sprintf(buffer, "%c", (*(bool *)kp->arg) ? 'N' : 'Y'); } /* We break the rule and mangle the string. */ -- cgit v1.2.3 From f144e0863591f5d33e91edb1bc41b3a5dbd6f752 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 16 Apr 2009 10:21:35 +1000 Subject: module:param-flags Impact: cleanup Rather than hack KPARAM_KMALLOCED into the perm field, separate it out. Since the perm field was 32 bits and only needs 16, we don't add bloat. Signed-off-by: Rusty Russell --- include/linux/moduleparam.h | 8 ++++++-- kernel/params.c | 9 +++------ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 9bbca8e8c19f..009a5f768768 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -36,9 +36,13 @@ typedef int (*param_set_fn)(const char *val, struct kernel_param *kp); /* Returns length written or -errno. Buffer is 4k (ie. be short!) */ typedef int (*param_get_fn)(char *buffer, struct kernel_param *kp); +/* Flag bits for kernel_param.flags */ +#define KPARAM_KMALLOCED 1 + struct kernel_param { const char *name; - unsigned int perm; + u16 perm; + u16 flags; param_set_fn set; param_get_fn get; union { @@ -88,7 +92,7 @@ struct kparam_array static struct kernel_param __moduleparam_const __param_##name \ __used \ __attribute__ ((unused,__section__ ("__param"),aligned(sizeof(void *)))) \ - = { __param_str_##name, perm, set, get, { arg } } + = { __param_str_##name, perm, 0, set, get, { arg } } #define module_param_call(name, set, get, arg, perm) \ __module_param_call(MODULE_PARAM_PREFIX, name, set, get, arg, perm) diff --git a/kernel/params.c b/kernel/params.c index 023abbf5f89f..b4660dc13dbc 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -24,9 +24,6 @@ #include #include -/* We abuse the high bits of "perm" to record whether we kmalloc'ed. */ -#define KPARAM_KMALLOCED 0x80000000 - #if 0 #define DEBUGP printk #else @@ -220,13 +217,13 @@ int param_set_charp(const char *val, struct kernel_param *kp) return -ENOSPC; } - if (kp->perm & KPARAM_KMALLOCED) + if (kp->flags & KPARAM_KMALLOCED) kfree(*(char **)kp->arg); /* This is a hack. We can't need to strdup in early boot, and we * don't need to; this mangled commandline is preserved. */ if (slab_is_available()) { - kp->perm |= KPARAM_KMALLOCED; + kp->flags |= KPARAM_KMALLOCED; *(char **)kp->arg = kstrdup(val, GFP_KERNEL); if (!kp->arg) return -ENOMEM; @@ -591,7 +588,7 @@ void destroy_params(const struct kernel_param *params, unsigned num) unsigned int i; for (i = 0; i < num; i++) - if (params[i].perm & KPARAM_KMALLOCED) + if (params[i].flags & KPARAM_KMALLOCED) kfree(*(char **)params[i].arg); } -- cgit v1.2.3 From a9477308bb1d620d7f94a9a349dc509ffe94f7e1 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 16 Apr 2009 10:21:35 +1000 Subject: module:__same_type Impact: new API __builtin_types_compatible_p() is a little awkward to use: it takes two types rather than types or variables, and it's just damn long. (typeof(type) == type, so this works on types as well as vars). Signed-off-by: Rusty Russell --- include/linux/compiler.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 37bcb50a4d7c..04fb5135b4e1 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -261,6 +261,11 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); # define __section(S) __attribute__ ((__section__(#S))) #endif +/* Are two types/vars the same type (ignoring qualifiers)? */ +#ifndef __same_type +# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) +#endif + /* * Prevent the compiler from merging or refetching accesses. The compiler * is also forbidden from reordering successive instances of ACCESS_ONCE(), -- cgit v1.2.3 From 852946e0ed8f6ad20e3dc6e777079197d36e7a8b Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 16 Apr 2009 10:21:35 +1000 Subject: module:bool-param-can-be-bool Impact: API cleanup For historical reasons, 'bool' parameters must be an int, not a bool. But there are around 600 users, so a conversion seems like useless churn. So we use __same_type() to distinguish, and handle both cases. Signed-off-by: Rusty Russell --- include/linux/moduleparam.h | 32 +++++++++++++++++++++++--------- kernel/params.c | 33 ++++++++++++++++++++++++++------- 2 files changed, 49 insertions(+), 16 deletions(-) diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 009a5f768768..6547c3cdbc4c 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -38,6 +38,7 @@ typedef int (*param_get_fn)(char *buffer, struct kernel_param *kp); /* Flag bits for kernel_param.flags */ #define KPARAM_KMALLOCED 1 +#define KPARAM_ISBOOL 2 struct kernel_param { const char *name; @@ -83,7 +84,7 @@ struct kparam_array parameters. perm sets the visibility in sysfs: 000 means it's not there, read bits mean it's readable, write bits mean it's writable. */ -#define __module_param_call(prefix, name, set, get, arg, perm) \ +#define __module_param_call(prefix, name, set, get, arg, isbool, perm) \ /* Default value instead of permissions? */ \ static int __param_perm_check_##name __attribute__((unused)) = \ BUILD_BUG_ON_ZERO((perm) < 0 || (perm) > 0777 || ((perm) & 2)) \ @@ -92,10 +93,13 @@ struct kparam_array static struct kernel_param __moduleparam_const __param_##name \ __used \ __attribute__ ((unused,__section__ ("__param"),aligned(sizeof(void *)))) \ - = { __param_str_##name, perm, 0, set, get, { arg } } + = { __param_str_##name, perm, isbool ? KPARAM_ISBOOL : 0, \ + set, get, { arg } } #define module_param_call(name, set, get, arg, perm) \ - __module_param_call(MODULE_PARAM_PREFIX, name, set, get, arg, perm) + __module_param_call(MODULE_PARAM_PREFIX, \ + name, set, get, arg, \ + __same_type(*(arg), bool), perm) /* Helper functions: type is byte, short, ushort, int, uint, long, ulong, charp, bool or invbool, or XXX if you define param_get_XXX, @@ -124,15 +128,16 @@ struct kparam_array #define core_param(name, var, type, perm) \ param_check_##type(name, &(var)); \ __module_param_call("", name, param_set_##type, param_get_##type, \ - &var, perm) + &var, __same_type(var, bool), perm) #endif /* !MODULE */ /* Actually copy string: maxlen param is usually sizeof(string). */ #define module_param_string(name, string, len, perm) \ static const struct kparam_string __param_string_##name \ = { len, string }; \ - module_param_call(name, param_set_copystring, param_get_string, \ - .str = &__param_string_##name, perm); \ + __module_param_call(MODULE_PARAM_PREFIX, name, \ + param_set_copystring, param_get_string, \ + .str = &__param_string_##name, 0, perm); \ __MODULE_PARM_TYPE(name, "string") /* Called on module insert or kernel boot */ @@ -190,9 +195,16 @@ extern int param_set_charp(const char *val, struct kernel_param *kp); extern int param_get_charp(char *buffer, struct kernel_param *kp); #define param_check_charp(name, p) __param_check(name, p, char *) +/* For historical reasons "bool" parameters can be (unsigned) "int". */ extern int param_set_bool(const char *val, struct kernel_param *kp); extern int param_get_bool(char *buffer, struct kernel_param *kp); -#define param_check_bool(name, p) __param_check(name, p, int) +#define param_check_bool(name, p) \ + static inline void __check_##name(void) \ + { \ + BUILD_BUG_ON(!__same_type(*(p), bool) && \ + !__same_type(*(p), unsigned int) && \ + !__same_type(*(p), int)); \ + } extern int param_set_invbool(const char *val, struct kernel_param *kp); extern int param_get_invbool(char *buffer, struct kernel_param *kp); @@ -203,8 +215,10 @@ extern int param_get_invbool(char *buffer, struct kernel_param *kp); static const struct kparam_array __param_arr_##name \ = { ARRAY_SIZE(array), nump, param_set_##type, param_get_##type,\ sizeof(array[0]), array }; \ - module_param_call(name, param_array_set, param_array_get, \ - .arr = &__param_arr_##name, perm); \ + __module_param_call(MODULE_PARAM_PREFIX, name, \ + param_array_set, param_array_get, \ + .arr = &__param_arr_##name, \ + __same_type(array[0], bool), perm); \ __MODULE_PARM_TYPE(name, "array of " #type) #define module_param_array(name, type, nump, perm) \ diff --git a/kernel/params.c b/kernel/params.c index b4660dc13dbc..7f6912ced2ba 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -238,35 +238,54 @@ int param_get_charp(char *buffer, struct kernel_param *kp) return sprintf(buffer, "%s", *((char **)kp->arg)); } +/* Actually could be a bool or an int, for historical reasons. */ int param_set_bool(const char *val, struct kernel_param *kp) { + bool v; + /* No equals means "set"... */ if (!val) val = "1"; /* One of =[yYnN01] */ switch (val[0]) { case 'y': case 'Y': case '1': - *(int *)kp->arg = 1; - return 0; + v = true; + break; case 'n': case 'N': case '0': - *(int *)kp->arg = 0; - return 0; + v = false; + break; + default: + return -EINVAL; } - return -EINVAL; + + if (kp->flags & KPARAM_ISBOOL) + *(bool *)kp->arg = v; + else + *(int *)kp->arg = v; + return 0; } int param_get_bool(char *buffer, struct kernel_param *kp) { + bool val; + if (kp->flags & KPARAM_ISBOOL) + val = *(bool *)kp->arg; + else + val = *(int *)kp->arg; + /* Y and N chosen as being relatively non-coder friendly */ - return sprintf(buffer, "%c", (*(int *)kp->arg) ? 'Y' : 'N'); + return sprintf(buffer, "%c", val ? 'Y' : 'N'); } +/* This one must be bool. */ int param_set_invbool(const char *val, struct kernel_param *kp) { - int boolval, ret; + int ret; + bool boolval; struct kernel_param dummy; dummy.arg = &boolval; + dummy.flags = KPARAM_ISBOOL; ret = param_set_bool(val, &dummy); if (ret == 0) *(bool *)kp->arg = !boolval; -- cgit v1.2.3 From 67024f39006f408e24040bf8c064d80a8fc89938 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 16 Apr 2009 10:21:36 +1000 Subject: uvesafb:param-cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1) Now module_param(..., invbool, ...) requires a bool, and similarly module_param(..., bool, ...) allows it, change pmi_setpal to a bool. 2) #define param_get_scroll to NULL, since it can never be called (perm argument to module_param_named is 0). 3) Return -EINVAL from param_set_scroll if the value is bad, so it's reported. Note that I don't think the old fb_get_options() is required for new drivers: the parameters automatically work as uvesafb.XXX=... anyway. Acked-by: MichaƂ Januszewski Signed-off-by: Rusty Russell --- drivers/video/uvesafb.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/video/uvesafb.c b/drivers/video/uvesafb.c index 0b370aebdbfd..40463733a1d3 100644 --- a/drivers/video/uvesafb.c +++ b/drivers/video/uvesafb.c @@ -45,7 +45,7 @@ static struct fb_fix_screeninfo uvesafb_fix __devinitdata = { static int mtrr __devinitdata = 3; /* enable mtrr by default */ static int blank = 1; /* enable blanking by default */ static int ypan = 1; /* 0: scroll, 1: ypan, 2: ywrap */ -static int pmi_setpal __devinitdata = 1; /* use PMI for palette changes */ +static bool pmi_setpal __devinitdata = true; /* use PMI for palette changes */ static int nocrtc __devinitdata; /* ignore CRTC settings */ static int noedid __devinitdata; /* don't try DDC transfers */ static int vram_remap __devinitdata; /* set amt. of memory to be used */ @@ -2017,11 +2017,7 @@ static void __devexit uvesafb_exit(void) module_exit(uvesafb_exit); -static int param_get_scroll(char *buffer, struct kernel_param *kp) -{ - return 0; -} - +#define param_get_scroll NULL static int param_set_scroll(const char *val, struct kernel_param *kp) { ypan = 0; @@ -2032,6 +2028,8 @@ static int param_set_scroll(const char *val, struct kernel_param *kp) ypan = 1; else if (!strcmp(val, "ywrap")) ypan = 2; + else + return -EINVAL; return 0; } -- cgit v1.2.3 From cf2fb1659cb6d1269487f3c9870f045b95e5e3a2 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 16 Apr 2009 10:21:36 +1000 Subject: use smp_call_function_single() in arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c Impact: fix sysbench(oltp)+mysql 10% regression with 2.6.30-rc1 Mike says: It's kondemand, do_dbs_timer() is queueing work 9-14 times/sec/cpu. Tested-by: Mike Galbraith Cc: Ingo Molnar Cc: Venkatesh Pallipadi Cc: Len Brown Cc: Zhao Yakui Cc: Dave Jones Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Rusty Russell (use rdmsr_on_cpu) --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 9d3af380c6bd..1c5f5c3b65df 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -153,7 +153,8 @@ struct drv_cmd { u32 val; }; -static long do_drv_read(void *_cmd) +/* Called via smp_call_function_single(), on the target CPU */ +static void do_drv_read(void *_cmd) { struct drv_cmd *cmd = _cmd; u32 h; @@ -170,10 +171,10 @@ static long do_drv_read(void *_cmd) default: break; } - return 0; } -static long do_drv_write(void *_cmd) +/* Called via smp_call_function_single(), on the target CPU */ +static void do_drv_write(void *_cmd) { struct drv_cmd *cmd = _cmd; u32 lo, hi; @@ -192,23 +193,21 @@ static long do_drv_write(void *_cmd) default: break; } - return 0; } static void drv_read(struct drv_cmd *cmd) { cmd->val = 0; - work_on_cpu(cpumask_any(cmd->mask), do_drv_read, cmd); + smp_call_function_single(cpumask_any(cmd->mask), do_drv_read, cmd, 1); } static void drv_write(struct drv_cmd *cmd) { - unsigned int i; + unsigned int cpu; - for_each_cpu(i, cmd->mask) { - work_on_cpu(i, do_drv_write, cmd); - } + for_each_cpu(cpu, cmd->mask) + smp_call_function_single(cpu, do_drv_write, cmd, 1); } static u32 get_cur_val(const struct cpumask *mask) @@ -253,14 +252,13 @@ struct perf_pair { }; -static long read_measured_perf_ctrs(void *_cur) +/* Called via smp_call_function_single(), on the target CPU */ +static void read_measured_perf_ctrs(void *_cur) { struct perf_pair *cur = _cur; rdmsr(MSR_IA32_APERF, cur->aperf.split.lo, cur->aperf.split.hi); rdmsr(MSR_IA32_MPERF, cur->mperf.split.lo, cur->mperf.split.hi); - - return 0; } /* @@ -283,7 +281,7 @@ static unsigned int get_measured_perf(struct cpufreq_policy *policy, unsigned int perf_percent; unsigned int retval; - if (!work_on_cpu(cpu, read_measured_perf_ctrs, &readin)) + if (!smp_call_function_single(cpu, read_measured_perf_ctrs, &readin, 1)) return 0; cur.aperf.whole = readin.aperf.whole - -- cgit v1.2.3 From 56bbf054781162efa14168984c33b8635d98eced Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 16 Apr 2009 10:21:36 +1000 Subject: use smp_call_function_single() in arch/x86/kernel/acpi/cstate.c Attempting to rid us of the problematic work_on_cpu(). Just use smp_call_function_single() here. (Includes fix from Wu Fengguang ) Cc: Ingo Molnar Cc: Venkatesh Pallipadi Cc: Len Brown Cc: Zhao Yakui Cc: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Rusty Russell --- arch/x86/kernel/acpi/cstate.c | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index bbbe4bbb6f34..b866b876446e 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -53,6 +53,12 @@ struct cstate_entry { }; static struct cstate_entry *cpu_cstate_entry; /* per CPU ptr */ +/* Used for the cross-CPU calls */ +struct acpi_processor_cx_cross_cpu { + struct acpi_processor_cx *cx; + long retval; +}; + static short mwait_supported[ACPI_PROCESSOR_MAX_POWER]; #define MWAIT_SUBSTATE_MASK (0xf) @@ -67,10 +73,10 @@ static short mwait_supported[ACPI_PROCESSOR_MAX_POWER]; #define NATIVE_CSTATE_BEYOND_HALT (2) -static long acpi_processor_ffh_cstate_probe_cpu(void *_cx) +static void acpi_processor_ffh_cstate_probe_cpu(void *_cxcc) { - struct acpi_processor_cx *cx = _cx; - long retval; + struct acpi_processor_cx_cross_cpu *cxcc = _cxcc; + struct acpi_processor_cx *cx = cxcc->cx; unsigned int eax, ebx, ecx, edx; unsigned int edx_part; unsigned int cstate_type; /* C-state type and not ACPI C-state type */ @@ -84,16 +90,16 @@ static long acpi_processor_ffh_cstate_probe_cpu(void *_cx) edx_part = edx >> (cstate_type * MWAIT_SUBSTATE_SIZE); num_cstate_subtype = edx_part & MWAIT_SUBSTATE_MASK; - retval = 0; + cxcc->retval = 0; if (num_cstate_subtype < (cx->address & MWAIT_SUBSTATE_MASK)) { - retval = -1; + cxcc->retval = -1; goto out; } /* mwait ecx extensions INTERRUPT_BREAK should be supported for C2/C3 */ if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || !(ecx & CPUID5_ECX_INTERRUPT_BREAK)) { - retval = -1; + cxcc->retval = -1; goto out; } @@ -107,7 +113,7 @@ static long acpi_processor_ffh_cstate_probe_cpu(void *_cx) ACPI_CX_DESC_LEN, "ACPI FFH INTEL MWAIT 0x%x", cx->address); out: - return retval; + return; } int acpi_processor_ffh_cstate_probe(unsigned int cpu, @@ -115,6 +121,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, { struct cstate_entry *percpu_entry; struct cpuinfo_x86 *c = &cpu_data(cpu); + struct acpi_processor_cx_cross_cpu cxcc = { .cx = cx, }; long retval; if (!cpu_cstate_entry || c->cpuid_level < CPUID_MWAIT_LEAF) @@ -127,13 +134,18 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, percpu_entry->states[cx->index].eax = 0; percpu_entry->states[cx->index].ecx = 0; - /* Make sure we are running on right CPU */ + /* Run acpi_processor_ffh_cstate_probe_cpu() on the target CPU */ - retval = work_on_cpu(cpu, acpi_processor_ffh_cstate_probe_cpu, cx); + retval = smp_call_function_single(cpu, + acpi_processor_ffh_cstate_probe_cpu, &cxcc, 1); if (retval == 0) { - /* Use the hint in CST */ - percpu_entry->states[cx->index].eax = cx->address; - percpu_entry->states[cx->index].ecx = MWAIT_ECX_INTERRUPT_BREAK; + retval = cxcc.retval; + if (retval == 0) { + /* Use the hint in CST */ + percpu_entry->states[cx->index].eax = cx->address; + percpu_entry->states[cx->index].ecx = + MWAIT_ECX_INTERRUPT_BREAK; + } } return retval; } -- cgit v1.2.3 From fc1160e780081a772e03fb52fdd537831d0a0b52 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 16 Apr 2009 10:21:36 +1000 Subject: cpumask:remove-cpumask-games-arch-x86-kernel-cpu-cpufreq-speedstep-ich.c Impact: don't play with current's cpumask It's generally a very bad idea to mug some process's cpumask: it could legitimately and reasonably be changed by root, which could break us (if done before our code) or them (if we restore the wrong value). For speedstep_set_state and speedstep_get_state we use smp_call_function_single: this had the advantage of being more efficient, too. For speedstep_get_freqs() in speedstep_cpu_init(), we use work_on_cpu: slightly less efficient, but not performance critical. Signed-off-by: Rusty Russell To: davej@redhat.com To: cpufreq@vger.kernel.org Cc: Dominik Brodowski --- arch/x86/kernel/cpu/cpufreq/speedstep-ich.c | 81 ++++++++++++++++------------- arch/x86/kernel/cpu/cpufreq/speedstep-lib.c | 1 + 2 files changed, 45 insertions(+), 37 deletions(-) diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 016c1a4fa3fc..ed9c539ba646 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c @@ -89,7 +89,8 @@ static int speedstep_find_register(void) * speedstep_set_state - set the SpeedStep state * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) * - * Tries to change the SpeedStep state. + * Tries to change the SpeedStep state. Can be called from + * smp_call_function_single. */ static void speedstep_set_state(unsigned int state) { @@ -143,6 +144,11 @@ static void speedstep_set_state(unsigned int state) return; } +/* Wrapper for smp_call_function_single. */ +static void _speedstep_set_state(void *_state) +{ + speedstep_set_state(*(unsigned int *)_state); +} /** * speedstep_activate - activate SpeedStep control in the chipset @@ -226,22 +232,28 @@ static unsigned int speedstep_detect_chipset(void) return 0; } -static unsigned int _speedstep_get(const struct cpumask *cpus) -{ +struct get_freq_data { unsigned int speed; - cpumask_t cpus_allowed; - - cpus_allowed = current->cpus_allowed; - set_cpus_allowed_ptr(current, cpus); - speed = speedstep_get_frequency(speedstep_processor); - set_cpus_allowed_ptr(current, &cpus_allowed); - dprintk("detected %u kHz as current frequency\n", speed); - return speed; + unsigned int processor; +}; + +static void get_freq_data(void *_data) +{ + struct get_freq_data *data = _data; + + data->speed = speedstep_get_frequency(data->processor); } static unsigned int speedstep_get(unsigned int cpu) { - return _speedstep_get(cpumask_of(cpu)); + struct get_freq_data data = { .processor = cpu }; + + /* You're supposed to ensure CPU is online. */ + if (smp_call_function_single(cpu, get_freq_data, &data, 1) != 0) + BUG(); + + dprintk("detected %u kHz as current frequency\n", data.speed); + return data.speed; } /** @@ -257,16 +269,16 @@ static int speedstep_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { - unsigned int newstate = 0; + unsigned int newstate = 0, policy_cpu; struct cpufreq_freqs freqs; - cpumask_t cpus_allowed; int i; if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate)) return -EINVAL; - freqs.old = _speedstep_get(policy->cpus); + policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask); + freqs.old = speedstep_get(policy_cpu); freqs.new = speedstep_freqs[newstate].frequency; freqs.cpu = policy->cpu; @@ -276,20 +288,13 @@ static int speedstep_target(struct cpufreq_policy *policy, if (freqs.old == freqs.new) return 0; - cpus_allowed = current->cpus_allowed; - for_each_cpu(i, policy->cpus) { freqs.cpu = i; cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); } - /* switch to physical CPU where state is to be changed */ - set_cpus_allowed_ptr(current, policy->cpus); - - speedstep_set_state(newstate); - - /* allow to be run on all CPUs */ - set_cpus_allowed_ptr(current, &cpus_allowed); + smp_call_function_single(policy_cpu, _speedstep_set_state, &newstate, + true); for_each_cpu(i, policy->cpus) { freqs.cpu = i; @@ -312,33 +317,35 @@ static int speedstep_verify(struct cpufreq_policy *policy) return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]); } +static long get_freqs_on_cpu(void *_policy) +{ + struct cpufreq_policy *policy = _policy; + + return speedstep_get_freqs(speedstep_processor, + &speedstep_freqs[SPEEDSTEP_LOW].frequency, + &speedstep_freqs[SPEEDSTEP_HIGH].frequency, + &policy->cpuinfo.transition_latency, + &speedstep_set_state); +} static int speedstep_cpu_init(struct cpufreq_policy *policy) { - int result = 0; - unsigned int speed; - cpumask_t cpus_allowed; + int result; + unsigned int policy_cpu, speed; /* only run on CPU to be set, or on its sibling */ #ifdef CONFIG_SMP cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu)); #endif - - cpus_allowed = current->cpus_allowed; - set_cpus_allowed_ptr(current, policy->cpus); + policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask); /* detect low and high frequency and transition latency */ - result = speedstep_get_freqs(speedstep_processor, - &speedstep_freqs[SPEEDSTEP_LOW].frequency, - &speedstep_freqs[SPEEDSTEP_HIGH].frequency, - &policy->cpuinfo.transition_latency, - &speedstep_set_state); - set_cpus_allowed_ptr(current, &cpus_allowed); + result = work_on_cpu(policy_cpu, get_freqs_on_cpu, policy); if (result) return result; /* get current speed setting */ - speed = _speedstep_get(policy->cpus); + speed = speedstep_get(policy_cpu); if (!speed) return -EIO; diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c index 2e3c6862657b..f4c290b8482f 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c @@ -226,6 +226,7 @@ static unsigned int pentium4_get_frequency(void) } +/* Warning: may get called from smp_call_function_single. */ unsigned int speedstep_get_frequency(unsigned int processor) { switch (processor) { -- cgit v1.2.3 From 108277fc1a88336b770af3bb86e837d715f3fb30 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 16 Apr 2009 10:21:36 +1000 Subject: cpumask: avoid playing with cpus_allowed in powernow-k8.c Impact: don't play with current's cpumask It's generally a very bad idea to mug some process's cpumask: it could legitimately and reasonably be changed by root, which could break us (if done before our code) or them (if we restore the wrong value). I use work_on_cpu, which is slightly less efficient than the old code, but the code is complex enough that using smp_call_function_single() is not trivial. Signed-off-by: Rusty Russell To: davej@redhat.com To: cpufreq@vger.kernel.org Cc: mark.langsdorf@amd.com --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 134 +++++++++++++++--------------- 1 file changed, 67 insertions(+), 67 deletions(-) diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 4709ead2db52..e9c6f7736b1c 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -510,19 +510,10 @@ static int core_voltage_post_transition(struct powernow_k8_data *data, return 0; } -static int check_supported_cpu(unsigned int cpu) +static long check_supported_cpu(void *unused) { - cpumask_t oldmask; u32 eax, ebx, ecx, edx; - unsigned int rc = 0; - - oldmask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); - - if (smp_processor_id() != cpu) { - printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu); - goto out; - } + unsigned int rc = -ENODEV; if (current_cpu_data.x86_vendor != X86_VENDOR_AMD) goto out; @@ -562,10 +553,9 @@ static int check_supported_cpu(unsigned int cpu) goto out; } - rc = 1; + rc = 0; out: - set_cpus_allowed_ptr(current, &oldmask); return rc; } @@ -1121,11 +1111,16 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, return res; } -/* Driver entry point to switch to the target frequency */ -static int powernowk8_target(struct cpufreq_policy *pol, - unsigned targfreq, unsigned relation) +struct target_data { + struct cpufreq_policy *pol; + unsigned targfreq; + unsigned relation; +}; + +static long powernowk8_target_on_cpu(void *_tdata) { - cpumask_t oldmask; + struct target_data *tdata = _tdata; + struct cpufreq_policy *pol = tdata->pol; struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); u32 checkfid; u32 checkvid; @@ -1138,22 +1133,13 @@ static int powernowk8_target(struct cpufreq_policy *pol, checkfid = data->currfid; checkvid = data->currvid; - /* only run on specific CPU from here on */ - oldmask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); - - if (smp_processor_id() != pol->cpu) { - printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); - goto err_out; - } - if (pending_bit_stuck()) { printk(KERN_ERR PFX "failing targ, change pending bit set\n"); goto err_out; } dprintk("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n", - pol->cpu, targfreq, pol->min, pol->max, relation); + pol->cpu, tdata->targfreq, pol->min, pol->max, tdata->relation); if (query_current_values_with_pending_wait(data)) goto err_out; @@ -1173,7 +1159,8 @@ static int powernowk8_target(struct cpufreq_policy *pol, } if (cpufreq_frequency_table_target(pol, data->powernow_table, - targfreq, relation, &newstate)) + tdata->targfreq, tdata->relation, + &newstate)) goto err_out; mutex_lock(&fidvid_mutex); @@ -1200,10 +1187,19 @@ static int powernowk8_target(struct cpufreq_policy *pol, ret = 0; err_out: - set_cpus_allowed_ptr(current, &oldmask); return ret; } +/* Driver entry point to switch to the target frequency */ +static int powernowk8_target(struct cpufreq_policy *pol, + unsigned targfreq, unsigned relation) +{ + struct target_data tdata = { .pol = pol, + .targfreq = targfreq, + .relation = relation }; + return work_on_cpu(pol->cpu, powernowk8_target_on_cpu, &tdata); +} + /* Driver entry point to verify the policy and range of frequencies */ static int powernowk8_verify(struct cpufreq_policy *pol) { @@ -1215,19 +1211,42 @@ static int powernowk8_verify(struct cpufreq_policy *pol) return cpufreq_frequency_table_verify(pol, data->powernow_table); } +static long __cpuinit powernowk8_cpu_init_on_cpu(void *_data) +{ + struct powernow_k8_data *data = _data; + + if (smp_processor_id() != data->cpu) { + printk(KERN_ERR PFX "limiting to cpu %u failed\n", data->cpu); + return -EIO; + } + + if (pending_bit_stuck()) { + printk(KERN_ERR PFX "failing init, change pending bit set\n"); + return -ENODEV; + } + + if (query_current_values_with_pending_wait(data)) + return -ENODEV; + + if (cpu_family == CPU_OPTERON) + fidvid_msr_init(); + + return 0; +} + /* per CPU init entry point to the driver */ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) { struct powernow_k8_data *data; - cpumask_t oldmask; int rc; static int print_once; if (!cpu_online(pol->cpu)) return -ENODEV; - if (!check_supported_cpu(pol->cpu)) - return -ENODEV; + rc = work_on_cpu(pol->cpu, check_supported_cpu, NULL); + if (rc != 0) + return rc; data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL); if (!data) { @@ -1278,27 +1297,9 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) pol->cpuinfo.transition_latency = get_transition_latency(data); /* only run on specific CPU from here on */ - oldmask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); - - if (smp_processor_id() != pol->cpu) { - printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); - goto err_out_unmask; - } - - if (pending_bit_stuck()) { - printk(KERN_ERR PFX "failing init, change pending bit set\n"); - goto err_out_unmask; - } - - if (query_current_values_with_pending_wait(data)) - goto err_out_unmask; - - if (cpu_family == CPU_OPTERON) - fidvid_msr_init(); - - /* run on any CPU again */ - set_cpus_allowed_ptr(current, &oldmask); + rc = work_on_cpu(data->cpu, powernowk8_cpu_init_on_cpu, data); + if (rc != 0) + goto err_out_exit_acpi; if (cpu_family == CPU_HW_PSTATE) cpumask_copy(pol->cpus, cpumask_of(pol->cpu)); @@ -1335,8 +1336,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) return 0; -err_out_unmask: - set_cpus_allowed_ptr(current, &oldmask); +err_out_exit_acpi: powernow_k8_cpu_exit_acpi(data); err_out: @@ -1361,12 +1361,20 @@ static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol) return 0; } +static void query_values_on_cpu(void *_err) +{ + int *err = _err; + struct powernow_k8_data *data = __get_cpu_var(powernow_data); + + *err = query_current_values_with_pending_wait(data); +} + static unsigned int powernowk8_get(unsigned int cpu) { struct powernow_k8_data *data; - cpumask_t oldmask = current->cpus_allowed; unsigned int khz = 0; unsigned int first; + int err; first = cpumask_first(cpu_core_mask(cpu)); data = per_cpu(powernow_data, first); @@ -1374,15 +1382,8 @@ static unsigned int powernowk8_get(unsigned int cpu) if (!data) return -EINVAL; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); - if (smp_processor_id() != cpu) { - printk(KERN_ERR PFX - "limiting to CPU %d failed in powernowk8_get\n", cpu); - set_cpus_allowed_ptr(current, &oldmask); - return 0; - } - - if (query_current_values_with_pending_wait(data)) + smp_call_function_single(first, query_values_on_cpu, &err, true); + if (err) goto out; if (cpu_family == CPU_HW_PSTATE) @@ -1393,7 +1394,6 @@ static unsigned int powernowk8_get(unsigned int cpu) out: - set_cpus_allowed_ptr(current, &oldmask); return khz; } @@ -1419,7 +1419,7 @@ static int __cpuinit powernowk8_init(void) unsigned int i, supported_cpus = 0; for_each_online_cpu(i) { - if (check_supported_cpu(i)) + if (work_on_cpu(i, check_supported_cpu, NULL) == 0) supported_cpus++; } -- cgit v1.2.3 From 0cf8d2f07b38e3a8e1d634d253cbcf4f85e250d1 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 16 Apr 2009 10:21:37 +1000 Subject: cpumask:remove-cpumask-games-arch-x86-kernel-cpu-cpufreq-speedstep-centrino.c Impact: don't play with current's cpumask It's generally a very bad idea to mug some process's cpumask: it could legitimately and reasonably be changed by root, which could break us (if done before our code) or them (if we restore the wrong value). Use rdmsr_on_cpu and wrmsr_on_cpu instead. Signed-off-by: Rusty Russell To: davej@redhat.com To: cpufreq@vger.kernel.org Cc: Jeremy Fitzhardinge --- arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c | 60 +++++++----------------- 1 file changed, 17 insertions(+), 43 deletions(-) diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index c9f1fdc02830..c9727f62a820 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c @@ -323,14 +323,8 @@ static unsigned int get_cur_freq(unsigned int cpu) { unsigned l, h; unsigned clock_freq; - cpumask_t saved_mask; - saved_mask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); - if (smp_processor_id() != cpu) - return 0; - - rdmsr(MSR_IA32_PERF_STATUS, l, h); + rdmsr_on_cpu(cpu, MSR_IA32_PERF_STATUS, &l, &h); clock_freq = extract_clock(l, cpu, 0); if (unlikely(clock_freq == 0)) { @@ -340,11 +334,9 @@ static unsigned int get_cur_freq(unsigned int cpu) * P-state transition (like TM2). Get the last freq set * in PERF_CTL. */ - rdmsr(MSR_IA32_PERF_CTL, l, h); + rdmsr_on_cpu(cpu, MSR_IA32_PERF_CTL, &l, &h); clock_freq = extract_clock(l, cpu, 1); } - - set_cpus_allowed_ptr(current, &saved_mask); return clock_freq; } @@ -467,15 +459,10 @@ static int centrino_target (struct cpufreq_policy *policy, struct cpufreq_freqs freqs; int retval = 0; unsigned int j, k, first_cpu, tmp; - cpumask_var_t saved_mask, covered_cpus; + cpumask_var_t covered_cpus; - if (unlikely(!alloc_cpumask_var(&saved_mask, GFP_KERNEL))) - return -ENOMEM; - if (unlikely(!alloc_cpumask_var(&covered_cpus, GFP_KERNEL))) { - free_cpumask_var(saved_mask); + if (unlikely(!alloc_cpumask_var(&covered_cpus, GFP_KERNEL))) return -ENOMEM; - } - cpumask_copy(saved_mask, ¤t->cpus_allowed); if (unlikely(per_cpu(centrino_model, cpu) == NULL)) { retval = -ENODEV; @@ -493,7 +480,7 @@ static int centrino_target (struct cpufreq_policy *policy, first_cpu = 1; for_each_cpu(j, policy->cpus) { - const struct cpumask *mask; + int good_cpu; /* cpufreq holds the hotplug lock, so we are safe here */ if (!cpu_online(j)) @@ -504,32 +491,30 @@ static int centrino_target (struct cpufreq_policy *policy, * Make sure we are running on CPU that wants to change freq */ if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) - mask = policy->cpus; + good_cpu = cpumask_any_and(policy->cpus, + cpu_online_mask); else - mask = cpumask_of(j); + good_cpu = j; - set_cpus_allowed_ptr(current, mask); - preempt_disable(); - if (unlikely(!cpu_isset(smp_processor_id(), *mask))) { + if (good_cpu >= nr_cpu_ids) { dprintk("couldn't limit to CPUs in this domain\n"); retval = -EAGAIN; if (first_cpu) { /* We haven't started the transition yet. */ - goto migrate_end; + goto out; } - preempt_enable(); break; } msr = per_cpu(centrino_model, cpu)->op_points[newstate].index; if (first_cpu) { - rdmsr(MSR_IA32_PERF_CTL, oldmsr, h); + rdmsr_on_cpu(good_cpu, MSR_IA32_PERF_CTL, &oldmsr, &h); if (msr == (oldmsr & 0xffff)) { dprintk("no change needed - msr was and needs " "to be %x\n", oldmsr); retval = 0; - goto migrate_end; + goto out; } freqs.old = extract_clock(oldmsr, cpu, 0); @@ -553,14 +538,11 @@ static int centrino_target (struct cpufreq_policy *policy, oldmsr |= msr; } - wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); - if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { - preempt_enable(); + wrmsr_on_cpu(good_cpu, MSR_IA32_PERF_CTL, oldmsr, h); + if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) break; - } - cpu_set(j, *covered_cpus); - preempt_enable(); + cpumask_set_cpu(j, covered_cpus); } for_each_cpu(k, policy->cpus) { @@ -578,10 +560,8 @@ static int centrino_target (struct cpufreq_policy *policy, * Best effort undo.. */ - for_each_cpu_mask_nr(j, *covered_cpus) { - set_cpus_allowed_ptr(current, &cpumask_of_cpu(j)); - wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); - } + for_each_cpu(j, covered_cpus) + wrmsr_on_cpu(j, MSR_IA32_PERF_CTL, oldmsr, h); tmp = freqs.new; freqs.new = freqs.old; @@ -593,15 +573,9 @@ static int centrino_target (struct cpufreq_policy *policy, cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); } } - set_cpus_allowed_ptr(current, saved_mask); retval = 0; - goto out; -migrate_end: - preempt_enable(); - set_cpus_allowed_ptr(current, saved_mask); out: - free_cpumask_var(saved_mask); free_cpumask_var(covered_cpus); return retval; } -- cgit v1.2.3 From 51bb9630e04d8f43e2980b9bd42f09a87c6e56fe Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 16 Apr 2009 10:21:38 +1000 Subject: misc:work_on_cpu-acpi Impact: don't play with current's cpumask It's generally a very bad idea to mug some process's cpumask: it could legitimately and reasonably be changed by root, which could break us (if done before our code) or them (if we restore the wrong value). So we use work_on_cpu in two places instead of cpumask games. Signed-off-by: Rusty Russell Cc: Len Brown --- drivers/acpi/processor_throttling.c | 72 +++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 40 deletions(-) diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c index d0d1f4d50434..9b37dda58895 100644 --- a/drivers/acpi/processor_throttling.c +++ b/drivers/acpi/processor_throttling.c @@ -827,32 +827,22 @@ static int acpi_processor_get_throttling_ptc(struct acpi_processor *pr) return 0; } -static int acpi_processor_get_throttling(struct acpi_processor *pr) +static long get_throttling(void *_pr) { - cpumask_var_t saved_mask; - int ret; + struct acpi_processor *pr = _pr; + + return pr->throttling.acpi_processor_get_throttling(pr); +} +static int acpi_processor_get_throttling(struct acpi_processor *pr) +{ if (!pr) return -EINVAL; if (!pr->flags.throttling) return -ENODEV; - if (!alloc_cpumask_var(&saved_mask, GFP_KERNEL)) - return -ENOMEM; - - /* - * Migrate task to the cpu pointed by pr. - */ - cpumask_copy(saved_mask, ¤t->cpus_allowed); - /* FIXME: use work_on_cpu() */ - set_cpus_allowed_ptr(current, cpumask_of(pr->id)); - ret = pr->throttling.acpi_processor_get_throttling(pr); - /* restore the previous state */ - set_cpus_allowed_ptr(current, saved_mask); - free_cpumask_var(saved_mask); - - return ret; + return work_on_cpu(pr->id, get_throttling, pr); } static int acpi_processor_get_fadt_info(struct acpi_processor *pr) @@ -993,9 +983,22 @@ static int acpi_processor_set_throttling_ptc(struct acpi_processor *pr, return 0; } +struct set_throttling_info { + struct acpi_processor *pr; + struct acpi_processor_throttling *p_throttling; + int target_state; +}; + +static long set_throttling(void *_sti) +{ + struct set_throttling_info *s = _sti; + + return s->p_throttling->acpi_processor_set_throttling(s->pr, + s->target_state); +} + int acpi_processor_set_throttling(struct acpi_processor *pr, int state) { - cpumask_var_t saved_mask; int ret = 0; unsigned int i; struct acpi_processor *match_pr; @@ -1012,15 +1015,9 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state) if ((state < 0) || (state > (pr->throttling.state_count - 1))) return -EINVAL; - if (!alloc_cpumask_var(&saved_mask, GFP_KERNEL)) - return -ENOMEM; - - if (!alloc_cpumask_var(&online_throttling_cpus, GFP_KERNEL)) { - free_cpumask_var(saved_mask); + if (!alloc_cpumask_var(&online_throttling_cpus, GFP_KERNEL)) return -ENOMEM; - } - cpumask_copy(saved_mask, ¤t->cpus_allowed); t_state.target_state = state; p_throttling = &(pr->throttling); cpumask_and(online_throttling_cpus, cpu_online_mask, @@ -1042,10 +1039,9 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state) * it can be called only for the cpu pointed by pr. */ if (p_throttling->shared_type == DOMAIN_COORD_TYPE_SW_ANY) { - /* FIXME: use work_on_cpu() */ - set_cpus_allowed_ptr(current, cpumask_of(pr->id)); - ret = p_throttling->acpi_processor_set_throttling(pr, - t_state.target_state); + struct set_throttling_info sti + = { pr, p_throttling, t_state.target_state }; + ret = work_on_cpu(pr->id, set_throttling, &sti); } else { /* * When the T-state coordination is SW_ALL or HW_ALL, @@ -1053,6 +1049,8 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state) * cpus. */ for_each_cpu(i, online_throttling_cpus) { + struct set_throttling_info sti; + match_pr = per_cpu(processors, i); /* * If the pointer is invalid, we will report the @@ -1074,11 +1072,10 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state) continue; } t_state.cpu = i; - /* FIXME: use work_on_cpu() */ - set_cpus_allowed_ptr(current, cpumask_of(i)); - ret = match_pr->throttling. - acpi_processor_set_throttling( - match_pr, t_state.target_state); + sti.pr = match_pr; + sti.p_throttling = &match_pr->throttling; + sti.target_state = t_state.target_state; + ret = work_on_cpu(i, set_throttling, &sti); } } /* @@ -1092,11 +1089,6 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state) acpi_processor_throttling_notifier(THROTTLING_POSTCHANGE, &t_state); } - /* restore the previous state */ - /* FIXME: use work_on_cpu() */ - set_cpus_allowed_ptr(current, saved_mask); - free_cpumask_var(online_throttling_cpus); - free_cpumask_var(saved_mask); return ret; } -- cgit v1.2.3 From ba84e7a365d0dd8792ba41d051f88e992639ee45 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 16 Apr 2009 10:21:38 +1000 Subject: misc:work_on_cpu-dcdbas Impact: don't play with current's cpumask It's generally a very bad idea to mug some process's cpumask: it could legitimately and reasonably be changed by root, which could break us (if done before our code) or them (if we restore the wrong value). So we use work_on_cpu instead of cpumask games. Note one subtle change. If we can't get to CPU 0 for some reason, the return will be -EINVAL not -EBUSY. We could fix this in the caller? ie: return work_on_cpu(0, generate_smi, smi_cmd) == 0 ? 0 : -EBUSY; Signed-off-by: Rusty Russell Cc: Matthew Garrett Cc: Matt Domsch Cc: Douglas_Warzecha@dell.com --- drivers/firmware/dcdbas.c | 49 +++++++++++++++++------------------------------ 1 file changed, 18 insertions(+), 31 deletions(-) diff --git a/drivers/firmware/dcdbas.c b/drivers/firmware/dcdbas.c index 18d65fb42ee7..e8727817c8b5 100644 --- a/drivers/firmware/dcdbas.c +++ b/drivers/firmware/dcdbas.c @@ -237,6 +237,23 @@ static ssize_t host_control_on_shutdown_store(struct device *dev, return count; } +static long generate_smi(void *_smi_cmd) +{ + struct smi_cmd *smi_cmd = _smi_cmd; + + /* generate SMI */ + asm volatile ( + "outb %b0,%w1" + : /* no output args */ + : "a" (smi_cmd->command_code), + "d" (smi_cmd->command_address), + "b" (smi_cmd->ebx), + "c" (smi_cmd->ecx) + : "memory" + ); + return 0; +} + /** * dcdbas_smi_request: generate SMI request * @@ -244,9 +261,6 @@ static ssize_t host_control_on_shutdown_store(struct device *dev, */ int dcdbas_smi_request(struct smi_cmd *smi_cmd) { - cpumask_var_t old_mask; - int ret = 0; - if (smi_cmd->magic != SMI_CMD_MAGIC) { dev_info(&dcdbas_pdev->dev, "%s: invalid magic value\n", __func__); @@ -254,35 +268,8 @@ int dcdbas_smi_request(struct smi_cmd *smi_cmd) } /* SMI requires CPU 0 */ - if (!alloc_cpumask_var(&old_mask, GFP_KERNEL)) - return -ENOMEM; - - cpumask_copy(old_mask, ¤t->cpus_allowed); - set_cpus_allowed_ptr(current, cpumask_of(0)); - if (smp_processor_id() != 0) { - dev_dbg(&dcdbas_pdev->dev, "%s: failed to get CPU 0\n", - __func__); - ret = -EBUSY; - goto out; - } - - /* generate SMI */ - asm volatile ( - "outb %b0,%w1" - : /* no output args */ - : "a" (smi_cmd->command_code), - "d" (smi_cmd->command_address), - "b" (smi_cmd->ebx), - "c" (smi_cmd->ecx) - : "memory" - ); - -out: - set_cpus_allowed_ptr(current, old_mask); - free_cpumask_var(old_mask); - return ret; + return work_on_cpu(0, generate_smi, smi_cmd); } - /** * smi_request_store: * -- cgit v1.2.3 From c254ca7ea0ba50e10b46299cdac3fc31127b3335 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 16 Apr 2009 10:21:38 +1000 Subject: misc:remove-start_kernel-prototypes Impact: cleanup No need for redeclaration. Signed-off-by: Rusty Russell --- arch/frv/include/asm/gdb-stub.h | 1 - arch/mn10300/include/asm/gdb-stub.h | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/frv/include/asm/gdb-stub.h b/arch/frv/include/asm/gdb-stub.h index 24f9738670bd..2da716407ff2 100644 --- a/arch/frv/include/asm/gdb-stub.h +++ b/arch/frv/include/asm/gdb-stub.h @@ -90,7 +90,6 @@ extern void gdbstub_do_rx(void); extern asmlinkage void __debug_stub_init_break(void); extern asmlinkage void __break_hijack_kernel_event(void); extern asmlinkage void __break_hijack_kernel_event_breaks_here(void); -extern asmlinkage void start_kernel(void); extern asmlinkage void gdbstub_rx_handler(void); extern asmlinkage void gdbstub_rx_irq(void); diff --git a/arch/mn10300/include/asm/gdb-stub.h b/arch/mn10300/include/asm/gdb-stub.h index e5a6368559af..556cce992548 100644 --- a/arch/mn10300/include/asm/gdb-stub.h +++ b/arch/mn10300/include/asm/gdb-stub.h @@ -109,7 +109,6 @@ extern asmlinkage int gdbstub_intercept(struct pt_regs *, enum exception_code); extern asmlinkage void gdbstub_exception(struct pt_regs *, enum exception_code); extern asmlinkage void __gdbstub_bug_trap(void); extern asmlinkage void __gdbstub_pause(void); -extern asmlinkage void start_kernel(void); #ifndef CONFIG_MN10300_CACHE_DISABLED extern asmlinkage void gdbstub_purge_cache(void); -- cgit v1.2.3 From 7a5e82cb780071541134bc14c2040a0929e5d21c Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 17 Feb 2009 17:08:40 +0800 Subject: cpuhotplug: remove cpu_hotplug_init() cpu_hotplug_init() is almost equal to a dummy function, this patch remove it. Signed-off-by: Lai Jiangshan Signed-off-by: Rusty Russell --- include/linux/cpu.h | 5 ----- init/main.c | 1 - kernel/cpu.c | 11 +++-------- 3 files changed, 3 insertions(+), 14 deletions(-) diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 2643d848df90..4d668e05d458 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -69,7 +69,6 @@ static inline void unregister_cpu_notifier(struct notifier_block *nb) int cpu_up(unsigned int cpu); void notify_cpu_starting(unsigned int cpu); -extern void cpu_hotplug_init(void); extern void cpu_maps_update_begin(void); extern void cpu_maps_update_done(void); @@ -84,10 +83,6 @@ static inline void unregister_cpu_notifier(struct notifier_block *nb) { } -static inline void cpu_hotplug_init(void) -{ -} - static inline void cpu_maps_update_begin(void) { } diff --git a/init/main.c b/init/main.c index 3585f073d636..7e508898c49d 100644 --- a/init/main.c +++ b/init/main.c @@ -647,7 +647,6 @@ asmlinkage void __init start_kernel(void) page_cgroup_init(); mem_init(); enable_debug_pagealloc(); - cpu_hotplug_init(); kmem_cache_init(); kmemtrace_init(); debug_objects_mem_init(); diff --git a/kernel/cpu.c b/kernel/cpu.c index 395b6974dc8d..609fae119e57 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -34,14 +34,9 @@ static struct { * an ongoing cpu hotplug operation. */ int refcount; -} cpu_hotplug; - -void __init cpu_hotplug_init(void) -{ - cpu_hotplug.active_writer = NULL; - mutex_init(&cpu_hotplug.lock); - cpu_hotplug.refcount = 0; -} +} cpu_hotplug = { + .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock), +}; #ifdef CONFIG_HOTPLUG_CPU -- cgit v1.2.3