diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2008-12-04 14:35:15 +1100 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2008-12-04 14:35:15 +1100 |
commit | 5f326153c1ce431b4e3d587d20ea876f03d950a4 (patch) | |
tree | 8b30e55dc3447311b2511b622b485c834af839fd | |
parent | 3ce47ce285af9ef7b758f2952a68281ae74eef0d (diff) | |
parent | 0e94068d0522f152ffe279556e5550a8056026db (diff) |
Merge branch 'quilt/rr'
Conflicts:
arch/x86/kernel/setup.c
kernel/cpu.c
72 files changed, 699 insertions, 532 deletions
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index 804520633fcf..f2dbbf3bdeab 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c @@ -481,51 +481,6 @@ static unsigned long load_initrd(const char *name, unsigned long mem) /* We return the initrd size. */ return len; } - -/* Once we know how much memory we have we can construct simple linear page - * tables which set virtual == physical which will get the Guest far enough - * into the boot to create its own. - * - * We lay them out of the way, just below the initrd (which is why we need to - * know its size here). */ -static unsigned long setup_pagetables(unsigned long mem, - unsigned long initrd_size) -{ - unsigned long *pgdir, *linear; - unsigned int mapped_pages, i, linear_pages; - unsigned int ptes_per_page = getpagesize()/sizeof(void *); - - mapped_pages = mem/getpagesize(); - - /* Each PTE page can map ptes_per_page pages: how many do we need? */ - linear_pages = (mapped_pages + ptes_per_page-1)/ptes_per_page; - - /* We put the toplevel page directory page at the top of memory. */ - pgdir = from_guest_phys(mem) - initrd_size - getpagesize(); - - /* Now we use the next linear_pages pages as pte pages */ - linear = (void *)pgdir - linear_pages*getpagesize(); - - /* Linear mapping is easy: put every page's address into the mapping in - * order. PAGE_PRESENT contains the flags Present, Writable and - * Executable. */ - for (i = 0; i < mapped_pages; i++) - linear[i] = ((i * getpagesize()) | PAGE_PRESENT); - - /* The top level points to the linear page table pages above. */ - for (i = 0; i < mapped_pages; i += ptes_per_page) { - pgdir[i/ptes_per_page] - = ((to_guest_phys(linear) + i*sizeof(void *)) - | PAGE_PRESENT); - } - - verbose("Linear mapping of %u pages in %u pte pages at %#lx\n", - mapped_pages, linear_pages, to_guest_phys(linear)); - - /* We return the top level (guest-physical) address: the kernel needs - * to know where it is. */ - return to_guest_phys(pgdir); -} /*:*/ /* Simple routine to roll all the commandline arguments together with spaces @@ -548,13 +503,13 @@ static void concat(char *dst, char *args[]) /*L:185 This is where we actually tell the kernel to initialize the Guest. We * saw the arguments it expects when we looked at initialize() in lguest_user.c: - * the base of Guest "physical" memory, the top physical page to allow, the - * top level pagetable and the entry point for the Guest. */ -static int tell_kernel(unsigned long pgdir, unsigned long start) + * the base of Guest "physical" memory, the top physical page to allow and the + * entry point for the Guest. */ +static int tell_kernel(unsigned long start) { unsigned long args[] = { LHREQ_INITIALIZE, (unsigned long)guest_base, - guest_limit / getpagesize(), pgdir, start }; + guest_limit / getpagesize(), start }; int fd; verbose("Guest: %p - %p (%#lx)\n", @@ -1030,7 +985,7 @@ static void update_device_status(struct device *dev) /* Zero out the virtqueues. */ for (vq = dev->vq; vq; vq = vq->next) { memset(vq->vring.desc, 0, - vring_size(vq->config.num, getpagesize())); + vring_size(vq->config.num, LGUEST_VRING_ALIGN)); lg_last_avail(vq) = 0; } } else if (dev->desc->status & VIRTIO_CONFIG_S_FAILED) { @@ -1211,7 +1166,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, void *p; /* First we need some memory for this virtqueue. */ - pages = (vring_size(num_descs, getpagesize()) + getpagesize() - 1) + pages = (vring_size(num_descs, LGUEST_VRING_ALIGN) + getpagesize() - 1) / getpagesize(); p = get_pages(pages); @@ -1228,7 +1183,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, vq->config.pfn = to_guest_phys(p) / getpagesize(); /* Initialize the vring. */ - vring_init(&vq->vring, num_descs, p, getpagesize()); + vring_init(&vq->vring, num_descs, p, LGUEST_VRING_ALIGN); /* Append virtqueue to this device's descriptor. We use * device_config() to get the end of the device's current virtqueues; @@ -1941,7 +1896,7 @@ int main(int argc, char *argv[]) { /* Memory, top-level pagetable, code startpoint and size of the * (optional) initrd. */ - unsigned long mem = 0, pgdir, start, initrd_size = 0; + unsigned long mem = 0, start, initrd_size = 0; /* Two temporaries and the /dev/lguest file descriptor. */ int i, c, lguest_fd; /* The boot information for the Guest. */ @@ -2040,9 +1995,6 @@ int main(int argc, char *argv[]) boot->hdr.type_of_loader = 0xFF; } - /* Set up the initial linear pagetables, starting below the initrd. */ - pgdir = setup_pagetables(mem, initrd_size); - /* The Linux boot header contains an "E820" memory map: ours is a * simple, single region. */ boot->e820_entries = 1; @@ -2064,7 +2016,7 @@ int main(int argc, char *argv[]) /* We tell the kernel to initialize the Guest: this returns the open * /dev/lguest file descriptor. */ - lguest_fd = tell_kernel(pgdir, start); + lguest_fd = tell_kernel(start); /* We clone off a thread, which wakes the Launcher whenever one of the * input file descriptors needs attention. We call this the Waker, and diff --git a/arch/alpha/include/asm/topology.h b/arch/alpha/include/asm/topology.h index 149532e162c4..b4f284c72ff3 100644 --- a/arch/alpha/include/asm/topology.h +++ b/arch/alpha/include/asm/topology.h @@ -39,7 +39,24 @@ static inline cpumask_t node_to_cpumask(int node) return node_cpu_mask; } +extern struct cpumask node_to_cpumask_map[]; +/* FIXME: This is dumb, recalculating every time. But simple. */ +static const struct cpumask *cpumask_of_node(int node) +{ + int cpu; + + cpumask_clear(&node_to_cpumask_map[node]); + + for_each_online_cpu(cpu) { + if (cpu_to_node(cpu) == node) + cpumask_set_cpu(cpu, node_to_cpumask_map[node]); + } + + return &node_to_cpumask_map[node]; +} + #define pcibus_to_cpumask(bus) (cpu_online_map) +#define cpumask_of_pcibus(bus) (cpu_online_mask) #endif /* !CONFIG_NUMA */ # include <asm-generic/topology.h> diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c index a449e999027c..02bee6983ce2 100644 --- a/arch/alpha/kernel/setup.c +++ b/arch/alpha/kernel/setup.c @@ -79,6 +79,11 @@ int alpha_l3_cacheshape; unsigned long alpha_verbose_mcheck = CONFIG_VERBOSE_MCHECK_ON; #endif +#ifdef CONFIG_NUMA +struct cpumask node_to_cpumask_map[MAX_NUMNODES] __read_mostly; +EXPORT_SYMBOL(node_to_cpumask_map); +#endif + /* Which processor we booted from. */ int boot_cpuid; diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h index 35bcb641c9e5..66f0f1ef9e7f 100644 --- a/arch/ia64/include/asm/topology.h +++ b/arch/ia64/include/asm/topology.h @@ -34,6 +34,7 @@ * Returns a bitmask of CPUs on Node 'node'. */ #define node_to_cpumask(node) (node_to_cpu_mask[node]) +#define cpumask_of_node(node) (&node_to_cpu_mask[node]) /* * Returns the number of the node containing Node 'nid'. @@ -45,7 +46,7 @@ /* * Returns the number of the first CPU on Node 'node'. */ -#define node_to_first_cpu(node) (first_cpu(node_to_cpumask(node))) +#define node_to_first_cpu(node) (cpumask_first(cpumask_of_node(node))) /* * Determines the node for a given pci bus @@ -121,6 +122,10 @@ extern void arch_fix_phys_package_id(int num, u32 slot); node_to_cpumask(pcibus_to_node(bus)) \ ) +#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \ + cpu_all_mask : \ + cpumask_from_node(pcibus_to_node(bus))) + #include <asm-generic/topology.h> #endif /* _ASM_IA64_TOPOLOGY_H */ diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index bd7acc71e8a9..54ae373e6e22 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -1001,7 +1001,7 @@ acpi_map_iosapic(acpi_handle handle, u32 depth, void *context, void **ret) node = pxm_to_node(pxm); if (node >= MAX_NUMNODES || !node_online(node) || - cpus_empty(node_to_cpumask(node))) + cpumask_empty(cpumask_of_node(node))) return AE_OK; /* We know a gsi to node mapping! */ diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index c8adecd5b416..212da4edc8e5 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -695,16 +695,15 @@ get_target_cpu (unsigned int gsi, int irq) #ifdef CONFIG_NUMA { int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0; - cpumask_t cpu_mask; + const struct cpumask *cpu_mask; iosapic_index = find_iosapic(gsi); if (iosapic_index < 0 || iosapic_lists[iosapic_index].node == MAX_NUMNODES) goto skip_numa_setup; - cpu_mask = node_to_cpumask(iosapic_lists[iosapic_index].node); - cpus_and(cpu_mask, cpu_mask, domain); - for_each_cpu_mask(numa_cpu, cpu_mask) { + cpu_mask = cpumask_of_node(iosapic_lists[iosapic_index].node); + for_each_cpu_and(numa_cpu, cpu_mask, &domain) { if (!cpu_online(numa_cpu)) cpu_clear(numa_cpu, cpu_mask); } diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c index 636588e7e068..be339477f906 100644 --- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c +++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c @@ -385,7 +385,6 @@ static int sn_topology_show(struct seq_file *s, void *d) int j; const char *slabname; int ordinal; - cpumask_t cpumask; char slice; struct cpuinfo_ia64 *c; struct sn_hwperf_port_info *ptdata; @@ -473,23 +472,21 @@ static int sn_topology_show(struct seq_file *s, void *d) * CPUs on this node, if any */ if (!SN_HWPERF_IS_IONODE(obj)) { - cpumask = node_to_cpumask(ordinal); - for_each_online_cpu(i) { - if (cpu_isset(i, cpumask)) { - slice = 'a' + cpuid_to_slice(i); - c = cpu_data(i); - seq_printf(s, "cpu %d %s%c local" - " freq %luMHz, arch ia64", - i, obj->location, slice, - c->proc_freq / 1000000); - for_each_online_cpu(j) { - seq_printf(s, j ? ":%d" : ", dist %d", - node_distance( + for_each_cpu_and(i, cpu_online_mask, + cpumask_of_node(ordinal)) { + slice = 'a' + cpuid_to_slice(i); + c = cpu_data(i); + seq_printf(s, "cpu %d %s%c local" + " freq %luMHz, arch ia64", + i, obj->location, slice, + c->proc_freq / 1000000); + for_each_online_cpu(j) { + seq_printf(s, j ? ":%d" : ", dist %d", + node_distance( cpu_to_node(i), cpu_to_node(j))); - } - seq_putc(s, '\n'); } + seq_putc(s, '\n'); } } } diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c index 0f06b3722e96..2547d6c4a827 100644 --- a/arch/m32r/kernel/smpboot.c +++ b/arch/m32r/kernel/smpboot.c @@ -592,7 +592,7 @@ int setup_profiling_timer(unsigned int multiplier) * accounting. At that time they also adjust their APIC timers * accordingly. */ - for (i = 0; i < NR_CPUS; ++i) + for_each_possible_cpu(i) per_cpu(prof_multiplier, i) = multiplier; return 0; diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h index 7785bec732f2..c1c3f5b2f18f 100644 --- a/arch/mips/include/asm/mach-ip27/topology.h +++ b/arch/mips/include/asm/mach-ip27/topology.h @@ -25,11 +25,13 @@ extern struct cpuinfo_ip27 sn_cpu_info[NR_CPUS]; #define cpu_to_node(cpu) (sn_cpu_info[(cpu)].p_nodeid) #define parent_node(node) (node) #define node_to_cpumask(node) (hub_data(node)->h_cpus) -#define node_to_first_cpu(node) (first_cpu(node_to_cpumask(node))) +#define cpumask_of_node(node) (&hub_data(node)->h_cpus) +#define node_to_first_cpu(node) (cpumask_first(cpumask_of_node(node))) struct pci_bus; extern int pcibus_to_node(struct pci_bus *); #define pcibus_to_cpumask(bus) (cpu_online_map) +#define cpumask_of_pcibus(bus) (cpu_online_mask) extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES]; diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index c32da6f97999..bcf25c2b8d21 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -22,11 +22,11 @@ static inline cpumask_t node_to_cpumask(int node) return numa_cpumask_lookup_table[node]; } +#define cpumask_of_node(node) (&numa_cpumask_lookup_table[node]) + static inline int node_to_first_cpu(int node) { - cpumask_t tmp; - tmp = node_to_cpumask(node); - return first_cpu(tmp); + return cpumask_first(cpumask_of_node(node)); } int of_node_to_nid(struct device_node *device); @@ -46,6 +46,10 @@ static inline int pcibus_to_node(struct pci_bus *bus) node_to_cpumask(pcibus_to_node(bus)) \ ) +#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \ + cpu_all_mask : \ + cpumask_of_node(pcibus_to_node(bus))) + /* sched_domains SD_NODE_INIT for PPC64 machines */ #define SD_NODE_INIT (struct sched_domain) { \ .span = CPU_MASK_NONE, \ diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 20885a38237a..4b59649f769f 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -134,36 +134,15 @@ void ppc_enable_pmcs(void) } EXPORT_SYMBOL(ppc_enable_pmcs); -#if defined(CONFIG_6xx) || defined(CONFIG_PPC64) -/* XXX convert to rusty's on_one_cpu */ -static unsigned long run_on_cpu(unsigned long cpu, - unsigned long (*func)(unsigned long), - unsigned long arg) -{ - cpumask_t old_affinity = current->cpus_allowed; - unsigned long ret; - - /* should return -EINVAL to userspace */ - if (set_cpus_allowed(current, cpumask_of_cpu(cpu))) - return 0; - - ret = func(arg); - - set_cpus_allowed(current, old_affinity); - - return ret; -} -#endif - #define SYSFS_PMCSETUP(NAME, ADDRESS) \ -static unsigned long read_##NAME(unsigned long junk) \ +static long read_##NAME(void *junk) \ { \ return mfspr(ADDRESS); \ } \ -static unsigned long write_##NAME(unsigned long val) \ +static long write_##NAME(void *val) \ { \ ppc_enable_pmcs(); \ - mtspr(ADDRESS, val); \ + mtspr(ADDRESS, (unsigned long)val); \ return 0; \ } \ static ssize_t show_##NAME(struct sys_device *dev, \ @@ -171,7 +150,7 @@ static ssize_t show_##NAME(struct sys_device *dev, \ char *buf) \ { \ struct cpu *cpu = container_of(dev, struct cpu, sysdev); \ - unsigned long val = run_on_cpu(cpu->sysdev.id, read_##NAME, 0); \ + unsigned long val = work_on_cpu(cpu->sysdev.id, read_##NAME, NULL); \ return sprintf(buf, "%lx\n", val); \ } \ static ssize_t __used \ @@ -183,7 +162,7 @@ static ssize_t __used \ int ret = sscanf(buf, "%lx", &val); \ if (ret != 1) \ return -EINVAL; \ - run_on_cpu(cpu->sysdev.id, write_##NAME, val); \ + work_on_cpu(cpu->sysdev.id, write_##NAME, (void *)val); \ return count; \ } diff --git a/arch/powerpc/platforms/cell/spu_priv1_mmio.c b/arch/powerpc/platforms/cell/spu_priv1_mmio.c index 906a0a2a9fe1..1410443731eb 100644 --- a/arch/powerpc/platforms/cell/spu_priv1_mmio.c +++ b/arch/powerpc/platforms/cell/spu_priv1_mmio.c @@ -80,10 +80,10 @@ static void cpu_affinity_set(struct spu *spu, int cpu) u64 route; if (nr_cpus_node(spu->node)) { - cpumask_t spumask = node_to_cpumask(spu->node); - cpumask_t cpumask = node_to_cpumask(cpu_to_node(cpu)); + const struct cpumask *spumask = cpumask_of_node(spu->node), + *cpumask = cpumask_of_node(cpu_to_node(cpu)); - if (!cpus_intersects(spumask, cpumask)) + if (!cpumask_intersects(spumask, cpumask)) return; } diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 2ad914c47493..6a0ad196aeb3 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -166,9 +166,9 @@ void spu_update_sched_info(struct spu_context *ctx) static int __node_allowed(struct spu_context *ctx, int node) { if (nr_cpus_node(node)) { - cpumask_t mask = node_to_cpumask(node); + const struct cpumask *mask = cpumask_of_node(node); - if (cpus_intersects(mask, ctx->cpus_allowed)) + if (cpumask_intersects(mask, &ctx->cpus_allowed)) return 1; } diff --git a/arch/s390/include/asm/kvm_virtio.h b/arch/s390/include/asm/kvm_virtio.h index c13568b9351c..0503936f101f 100644 --- a/arch/s390/include/asm/kvm_virtio.h +++ b/arch/s390/include/asm/kvm_virtio.h @@ -50,6 +50,10 @@ struct kvm_vqconfig { #define KVM_S390_VIRTIO_RESET 1 #define KVM_S390_VIRTIO_SET_STATUS 2 +/* The alignment to use between consumer and producer parts of vring. + * This is pagesize for historical reasons. */ +#define KVM_S390_VIRTIO_RING_ALIGN 4096 + #ifdef __KERNEL__ /* early virtio console setup */ #ifdef CONFIG_S390_GUEST diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index d96c91643458..fff4a86c602a 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -6,6 +6,7 @@ #define mc_capable() (1) cpumask_t cpu_coregroup_map(unsigned int cpu); +const struct cpumask *cpu_coregroup_mask(unsigned int cpu); extern cpumask_t cpu_core_map[NR_CPUS]; diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 42e6cb354017..5fd5e606554b 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -97,6 +97,11 @@ cpumask_t cpu_coregroup_map(unsigned int cpu) return mask; } +const struct cpumask *cpu_coregroup_mask(unsigned int cpu) +{ + return &cpu_core_map[cpu]; +} + static void add_cpus_to_core(struct tl_cpu *tl_cpu, struct core_info *core) { unsigned int cpu; diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h index 95f0085e098a..9aa160d0efe5 100644 --- a/arch/sh/include/asm/topology.h +++ b/arch/sh/include/asm/topology.h @@ -33,6 +33,7 @@ #define parent_node(node) ((void)(node),0) #define node_to_cpumask(node) ((void)node, cpu_online_map) +#define cpumask_of_node(node) ((void)node, cpu_online_mask) #define node_to_first_cpu(node) ((void)(node),0) #define pcibus_to_node(bus) ((void)(bus), -1) diff --git a/arch/sparc/include/asm/bitops_32.h b/arch/sparc/include/asm/bitops_32.h index 68b98a7e6454..9cf4ae0cd7ba 100644 --- a/arch/sparc/include/asm/bitops_32.h +++ b/arch/sparc/include/asm/bitops_32.h @@ -98,6 +98,7 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr) #include <asm-generic/bitops/sched.h> #include <asm-generic/bitops/ffs.h> #include <asm-generic/bitops/fls.h> +#include <asm-generic/bitops/__fls.h> #include <asm-generic/bitops/fls64.h> #include <asm-generic/bitops/hweight.h> #include <asm-generic/bitops/lock.h> diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h index 001c04027c82..3270cfb1ab53 100644 --- a/arch/sparc/include/asm/topology_64.h +++ b/arch/sparc/include/asm/topology_64.h @@ -16,8 +16,12 @@ static inline cpumask_t node_to_cpumask(int node) { return numa_cpumask_lookup_table[node]; } +#define cpumask_of_node(node) (&numa_cpumask_lookup_table[node]) -/* Returns a pointer to the cpumask of CPUs on Node 'node'. */ +/* + * Returns a pointer to the cpumask of CPUs on Node 'node'. + * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)" + */ #define node_to_cpumask_ptr(v, node) \ cpumask_t *v = &(numa_cpumask_lookup_table[node]) @@ -26,9 +30,7 @@ static inline cpumask_t node_to_cpumask(int node) static inline int node_to_first_cpu(int node) { - cpumask_t tmp; - tmp = node_to_cpumask(node); - return first_cpu(tmp); + return cpumask_first(cpumask_of_node(node)); } struct pci_bus; @@ -82,5 +84,6 @@ static inline int pcibus_to_node(struct pci_bus *pbus) #endif /* CONFIG_SMP */ #define cpu_coregroup_map(cpu) (cpu_core_map[cpu]) +#define cpu_coregroup_mask(cpu) (&cpu_core_map[cpu]) #endif /* _ASM_SPARC64_TOPOLOGY_H */ diff --git a/arch/sparc64/kernel/of_device.c b/arch/sparc64/kernel/of_device.c index df2efb7fc14c..4f6098d318ec 100644 --- a/arch/sparc64/kernel/of_device.c +++ b/arch/sparc64/kernel/of_device.c @@ -778,7 +778,7 @@ static unsigned int __init build_one_device_irq(struct of_device *op, out: nid = of_node_to_nid(dp); if (nid != -1) { - cpumask_t numa_mask = node_to_cpumask(nid); + cpumask_t numa_mask = *cpumask_of_node(nid); irq_set_affinity(irq, &numa_mask); } diff --git a/arch/sparc64/kernel/pci_msi.c b/arch/sparc64/kernel/pci_msi.c index 0d0cd815e83e..4ef282e81912 100644 --- a/arch/sparc64/kernel/pci_msi.c +++ b/arch/sparc64/kernel/pci_msi.c @@ -286,7 +286,7 @@ static int bringup_one_msi_queue(struct pci_pbm_info *pbm, nid = pbm->numa_node; if (nid != -1) { - cpumask_t numa_mask = node_to_cpumask(nid); + cpumask_t numa_mask = *cpumask_of_node(nid); irq_set_affinity(irq, &numa_mask); } diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 647781298e7e..f8959c7a985f 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -100,9 +100,9 @@ static inline void early_quirks(void) { } #ifdef CONFIG_NUMA /* Returns the node based on pci bus */ -static inline int __pcibus_to_node(struct pci_bus *bus) +static inline int __pcibus_to_node(const struct pci_bus *bus) { - struct pci_sysdata *sd = bus->sysdata; + const struct pci_sysdata *sd = bus->sysdata; return sd->node; } @@ -111,6 +111,12 @@ static inline cpumask_t __pcibus_to_cpumask(struct pci_bus *bus) { return node_to_cpumask(__pcibus_to_node(bus)); } + +static inline const struct cpumask * +cpumask_of_pcibus(const struct pci_bus *bus) +{ + return cpumask_of_node(__pcibus_to_node(bus)); +} #endif #endif /* _ASM_X86_PCI_H */ diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index ff386ff50ed7..168203c0c316 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -61,13 +61,19 @@ static inline int cpu_to_node(int cpu) * * Side note: this function creates the returned cpumask on the stack * so with a high NR_CPUS count, excessive stack space is used. The - * node_to_cpumask_ptr function should be used whenever possible. + * cpumask_of_node function should be used whenever possible. */ static inline cpumask_t node_to_cpumask(int node) { return node_to_cpumask_map[node]; } +/* Returns a bitmask of CPUs on Node 'node'. */ +static inline const struct cpumask *cpumask_of_node(int node) +{ + return &node_to_cpumask_map[node]; +} + #else /* CONFIG_X86_64 */ /* Mappings between node number and cpus on that node. */ @@ -82,7 +88,7 @@ DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map); #ifdef CONFIG_DEBUG_PER_CPU_MAPS extern int cpu_to_node(int cpu); extern int early_cpu_to_node(int cpu); -extern const cpumask_t *_node_to_cpumask_ptr(int node); +extern const cpumask_t *cpumask_of_node(int node); extern cpumask_t node_to_cpumask(int node); #else /* !CONFIG_DEBUG_PER_CPU_MAPS */ @@ -103,7 +109,7 @@ static inline int early_cpu_to_node(int cpu) } /* Returns a pointer to the cpumask of CPUs on Node 'node'. */ -static inline const cpumask_t *_node_to_cpumask_ptr(int node) +static inline const cpumask_t *cpumask_of_node(int node) { return &node_to_cpumask_map[node]; } @@ -116,12 +122,15 @@ static inline cpumask_t node_to_cpumask(int node) #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ -/* Replace default node_to_cpumask_ptr with optimized version */ +/* + * Replace default node_to_cpumask_ptr with optimized version + * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)" + */ #define node_to_cpumask_ptr(v, node) \ - const cpumask_t *v = _node_to_cpumask_ptr(node) + const cpumask_t *v = cpumask_of_node(node) #define node_to_cpumask_ptr_next(v, node) \ - v = _node_to_cpumask_ptr(node) + v = cpumask_of_node(node) #endif /* CONFIG_X86_64 */ @@ -187,7 +196,7 @@ extern int __node_distance(int, int); #define cpu_to_node(cpu) 0 #define early_cpu_to_node(cpu) 0 -static inline const cpumask_t *_node_to_cpumask_ptr(int node) +static inline const cpumask_t *cpumask_of_node(int node) { return &cpu_online_map; } @@ -200,12 +209,15 @@ static inline int node_to_first_cpu(int node) return first_cpu(cpu_online_map); } -/* Replace default node_to_cpumask_ptr with optimized version */ +/* + * Replace default node_to_cpumask_ptr with optimized version + * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)" + */ #define node_to_cpumask_ptr(v, node) \ - const cpumask_t *v = _node_to_cpumask_ptr(node) + const cpumask_t *v = cpumask_of_node(node) #define node_to_cpumask_ptr_next(v, node) \ - v = _node_to_cpumask_ptr(node) + v = cpumask_of_node(node) #endif #include <asm-generic/topology.h> @@ -214,12 +226,12 @@ static inline int node_to_first_cpu(int node) /* Returns the number of the first CPU on Node 'node'. */ static inline int node_to_first_cpu(int node) { - node_to_cpumask_ptr(mask, node); - return first_cpu(*mask); + return cpumask_first(cpumask_of_node(node)); } #endif extern cpumask_t cpu_coregroup_map(int cpu); +extern const struct cpumask *cpu_coregroup_mask(int cpu); #ifdef ENABLE_TOPO_DEFINES #define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 1c2084291f97..8e8b1193add5 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -334,25 +334,25 @@ static const cpumask_t cpu_mask_none; /* * Returns a pointer to the bitmask of CPUs on Node 'node'. */ -const cpumask_t *_node_to_cpumask_ptr(int node) +const cpumask_t *cpumask_of_node(int node) { if (node_to_cpumask_map == NULL) { printk(KERN_WARNING - "_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n", + "cpumask_of_node(%d): no node_to_cpumask_map!\n", node); dump_stack(); return (const cpumask_t *)&cpu_online_map; } if (node >= nr_node_ids) { printk(KERN_WARNING - "_node_to_cpumask_ptr(%d): node > nr_node_ids(%d)\n", + "cpumask_of_node(%d): node > nr_node_ids(%d)\n", node, nr_node_ids); dump_stack(); return &cpu_mask_none; } return &node_to_cpumask_map[node]; } -EXPORT_SYMBOL(_node_to_cpumask_ptr); +EXPORT_SYMBOL(cpumask_of_node); /* * Returns a bitmask of CPUs on Node 'node'. diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 9d58134e0231..5b044f82c43b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -498,7 +498,7 @@ void __cpuinit set_cpu_sibling_map(int cpu) } /* maps the cpu to the sched domain representing multi-core */ -cpumask_t cpu_coregroup_map(int cpu) +const struct cpumask *cpu_coregroup_mask(int cpu) { struct cpuinfo_x86 *c = &cpu_data(cpu); /* @@ -506,9 +506,14 @@ cpumask_t cpu_coregroup_map(int cpu) * And for power savings, we return cpu_core_map */ if (sched_mc_power_savings || sched_smt_power_savings) - return per_cpu(cpu_core_map, cpu); + return &per_cpu(cpu_core_map, cpu); else - return c->llc_shared_map; + return &c->llc_shared_map; +} + +cpumask_t cpu_coregroup_map(int cpu) +{ + return *cpu_coregroup_mask(cpu); } static void impress_friends(void) diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S index 5c7cef34c9e7..10b9bd35a8ff 100644 --- a/arch/x86/lguest/i386_head.S +++ b/arch/x86/lguest/i386_head.S @@ -30,21 +30,6 @@ ENTRY(lguest_entry) movl $lguest_data - __PAGE_OFFSET, %edx int $LGUEST_TRAP_ENTRY - /* The Host put the toplevel pagetable in lguest_data.pgdir. The movsl - * instruction uses %esi implicitly as the source for the copy we're - * about to do. */ - movl lguest_data - __PAGE_OFFSET + LGUEST_DATA_pgdir, %esi - - /* Copy first 32 entries of page directory to __PAGE_OFFSET entries. - * This means the first 128M of kernel memory will be mapped at - * PAGE_OFFSET where the kernel expects to run. This will get it far - * enough through boot to switch to its own pagetables. */ - movl $32, %ecx - movl %esi, %edi - addl $((__PAGE_OFFSET >> 22) * 4), %edi - rep - movsl - /* Set up the initial stack so we can run C code. */ movl $(init_thread_union+THREAD_SIZE),%esp diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 9c990185e9f2..27c1fb2495e3 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -1227,7 +1227,7 @@ int setup_profiling_timer(unsigned int multiplier) * new values until the next timer interrupt in which they do process * accounting. */ - for (i = 0; i < NR_CPUS; ++i) + for_each_possible_cpu(i) per_cpu(prof_multiplier, i) = multiplier; return 0; diff --git a/block/blk.h b/block/blk.h index d2e49af90db5..6e1ed40534e9 100644 --- a/block/blk.h +++ b/block/blk.h @@ -99,8 +99,8 @@ static inline int queue_congestion_off_threshold(struct request_queue *q) static inline int blk_cpu_to_group(int cpu) { #ifdef CONFIG_SCHED_MC - cpumask_t mask = cpu_coregroup_map(cpu); - return first_cpu(mask); + const struct cpumask *mask = cpu_coregroup_mask(cpu); + return cpumask_first(mask); #elif defined(CONFIG_SCHED_SMT) return first_cpu(per_cpu(cpu_sibling_map, cpu)); #else diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c index a0c38c94a8a0..804f4b145f14 100644 --- a/drivers/acpi/processor_throttling.c +++ b/drivers/acpi/processor_throttling.c @@ -824,26 +824,22 @@ static int acpi_processor_get_throttling_ptc(struct acpi_processor *pr) return 0; } -static int acpi_processor_get_throttling(struct acpi_processor *pr) +static long get_throttling(void *_pr) { - cpumask_t saved_mask; - int ret; + struct acpi_processor *pr = _pr; + return pr->throttling.acpi_processor_get_throttling(pr); +} + +static int acpi_processor_get_throttling(struct acpi_processor *pr) +{ if (!pr) return -EINVAL; if (!pr->flags.throttling) return -ENODEV; - /* - * Migrate task to the cpu pointed by pr. - */ - saved_mask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(pr->id)); - ret = pr->throttling.acpi_processor_get_throttling(pr); - /* restore the previous state */ - set_cpus_allowed_ptr(current, &saved_mask); - return ret; + return work_on_cpu(pr->id, get_throttling, pr); } static int acpi_processor_get_fadt_info(struct acpi_processor *pr) @@ -984,9 +980,22 @@ static int acpi_processor_set_throttling_ptc(struct acpi_processor *pr, return 0; } +struct set_throttling_info { + struct acpi_processor *pr; + struct acpi_processor_throttling *p_throttling; + int target_state; +}; + +static long set_throttling(void *_sti) +{ + struct set_throttling_info *s = _sti; + + return s->p_throttling->acpi_processor_set_throttling(s->pr, + s->target_state); +} + int acpi_processor_set_throttling(struct acpi_processor *pr, int state) { - cpumask_t saved_mask; int ret = 0; unsigned int i; struct acpi_processor *match_pr; @@ -1003,7 +1012,6 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state) if ((state < 0) || (state > (pr->throttling.state_count - 1))) return -EINVAL; - saved_mask = current->cpus_allowed; t_state.target_state = state; p_throttling = &(pr->throttling); cpus_and(online_throttling_cpus, cpu_online_map, @@ -1025,9 +1033,9 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state) * it can be called only for the cpu pointed by pr. */ if (p_throttling->shared_type == DOMAIN_COORD_TYPE_SW_ANY) { - set_cpus_allowed_ptr(current, &cpumask_of_cpu(pr->id)); - ret = p_throttling->acpi_processor_set_throttling(pr, - t_state.target_state); + struct set_throttling_info sti + = { pr, p_throttling, t_state.target_state }; + ret = work_on_cpu(pr->id, set_throttling, &sti); } else { /* * When the T-state coordination is SW_ALL or HW_ALL, @@ -1035,6 +1043,8 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state) * cpus. */ for_each_cpu_mask_nr(i, online_throttling_cpus) { + struct set_throttling_info sti; + match_pr = per_cpu(processors, i); /* * If the pointer is invalid, we will report the @@ -1056,10 +1066,10 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state) continue; } t_state.cpu = i; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(i)); - ret = match_pr->throttling. - acpi_processor_set_throttling( - match_pr, t_state.target_state); + sti.pr = match_pr; + sti.p_throttling = &match_pr->throttling; + sti.target_state = t_state.target_state; + ret = work_on_cpu(i, set_throttling, &sti); } } /* @@ -1073,8 +1083,6 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state) acpi_processor_throttling_notifier(THROTTLING_POSTCHANGE, &t_state); } - /* restore the previous state */ - set_cpus_allowed_ptr(current, &saved_mask); return ret; } diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 85d79a02d487..300078b02e5d 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -6,7 +6,6 @@ #include <linux/virtio_blk.h> #include <linux/scatterlist.h> -#define VIRTIO_MAX_SG (3+MAX_PHYS_SEGMENTS) #define PART_BITS 4 static int major, index; @@ -26,8 +25,11 @@ struct virtio_blk mempool_t *pool; + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + /* Scatterlist: can be too big for stack. */ - struct scatterlist sg[VIRTIO_MAX_SG]; + struct scatterlist sg[/*sg_elems*/]; }; struct virtblk_req @@ -97,8 +99,6 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk, if (blk_barrier_rq(vbr->req)) vbr->out_hdr.type |= VIRTIO_BLK_T_BARRIER; - /* This init could be done at vblk creation time */ - sg_init_table(vblk->sg, VIRTIO_MAX_SG); sg_set_buf(&vblk->sg[0], &vbr->out_hdr, sizeof(vbr->out_hdr)); num = blk_rq_map_sg(q, vbr->req, vblk->sg+1); sg_set_buf(&vblk->sg[num+1], &vbr->status, sizeof(vbr->status)); @@ -130,7 +130,7 @@ static void do_virtblk_request(struct request_queue *q) while ((req = elv_next_request(q)) != NULL) { vblk = req->rq_disk->private_data; - BUG_ON(req->nr_phys_segments > ARRAY_SIZE(vblk->sg)); + BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems); /* If this request fails, stop queue and wait for something to finish to restart it. */ @@ -196,12 +196,22 @@ static int virtblk_probe(struct virtio_device *vdev) int err; u64 cap; u32 v; - u32 blk_size; + u32 blk_size, sg_elems; if (index_to_minor(index) >= 1 << MINORBITS) return -ENOSPC; - vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL); + /* We need to know how many segments before we allocate. */ + err = virtio_config_val(vdev, VIRTIO_BLK_F_SEG_MAX, + offsetof(struct virtio_blk_config, seg_max), + &sg_elems); + if (err) + sg_elems = 1; + + /* We need an extra sg elements at head and tail. */ + sg_elems += 2; + vdev->priv = vblk = kmalloc(sizeof(*vblk) + + sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL); if (!vblk) { err = -ENOMEM; goto out; @@ -210,6 +220,8 @@ static int virtblk_probe(struct virtio_device *vdev) INIT_LIST_HEAD(&vblk->reqs); spin_lock_init(&vblk->lock); vblk->vdev = vdev; + vblk->sg_elems = sg_elems; + sg_init_table(vblk->sg, vblk->sg_elems); /* We expect one virtqueue, for output. */ vblk->vq = vdev->config->find_vq(vdev, 0, blk_done); @@ -277,6 +289,13 @@ static int virtblk_probe(struct virtio_device *vdev) } set_capacity(vblk->disk, cap); + /* We can handle whatever the host told us to handle. */ + blk_queue_max_phys_segments(vblk->disk->queue, vblk->sg_elems-2); + blk_queue_max_hw_segments(vblk->disk->queue, vblk->sg_elems-2); + + /* No real sector limit. */ + blk_queue_max_sectors(vblk->disk->queue, -1U); + /* Host can optionally specify maximum segment size and number of * segments. */ err = virtio_config_val(vdev, VIRTIO_BLK_F_SIZE_MAX, @@ -284,12 +303,8 @@ static int virtblk_probe(struct virtio_device *vdev) &v); if (!err) blk_queue_max_segment_size(vblk->disk->queue, v); - - err = virtio_config_val(vdev, VIRTIO_BLK_F_SEG_MAX, - offsetof(struct virtio_blk_config, seg_max), - &v); - if (!err) - blk_queue_max_hw_segments(vblk->disk->queue, v); + else + blk_queue_max_segment_size(vblk->disk->queue, -1U); /* Host can optionally specify the block size of the device */ err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE, diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c index 74ecb5b2968e..178638fa226f 100644 --- a/drivers/char/hvc_console.c +++ b/drivers/char/hvc_console.c @@ -692,6 +692,7 @@ void hvc_resize(struct hvc_struct *hp, struct winsize ws) hp->ws = ws; schedule_work(&hp->tty_resize); } +EXPORT_SYMBOL_GPL(hvc_resize); /* * This kthread is either polling or interrupt driven. This is determined by diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 3fb0d2c88ba5..ff6f5a4b58fb 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -138,12 +138,33 @@ int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int)) } /* + * virtio console configuration. This supports: + * - console resize + */ +static void virtcons_apply_config(struct virtio_device *dev) +{ + struct winsize ws; + + if (virtio_has_feature(dev, VIRTIO_CONSOLE_F_SIZE)) { + dev->config->get(dev, + offsetof(struct virtio_console_config, cols), + &ws.ws_col, sizeof(u16)); + dev->config->get(dev, + offsetof(struct virtio_console_config, rows), + &ws.ws_row, sizeof(u16)); + hvc_resize(hvc, ws); + } +} + +/* * we support only one console, the hvc struct is a global var - * There is no need to do anything + * We set the configuration at this point, since we now have a tty */ static int notifier_add_vio(struct hvc_struct *hp, int data) { hp->irq_requested = 1; + virtcons_apply_config(vdev); + return 0; } @@ -234,11 +255,18 @@ static struct virtio_device_id id_table[] = { { 0 }, }; +static unsigned int features[] = { + VIRTIO_CONSOLE_F_SIZE, +}; + static struct virtio_driver virtio_console = { + .feature_table = features, + .feature_table_size = ARRAY_SIZE(features), .driver.name = KBUILD_MODNAME, .driver.owner = THIS_MODULE, .id_table = id_table, .probe = virtcons_probe, + .config_changed = virtcons_apply_config, }; static int __init init(void) diff --git a/drivers/firmware/dcdbas.c b/drivers/firmware/dcdbas.c index 50a071f1c945..924097ccc860 100644 --- a/drivers/firmware/dcdbas.c +++ b/drivers/firmware/dcdbas.c @@ -237,6 +237,23 @@ static ssize_t host_control_on_shutdown_store(struct device *dev, return count; } +static long generate_smi(void *_smi_cmd) +{ + struct smi_cmd *smi_cmd = _smi_cmd; + + /* generate SMI */ + asm volatile ( + "outb %b0,%w1" + : /* no output args */ + : "a" (smi_cmd->command_code), + "d" (smi_cmd->command_address), + "b" (smi_cmd->ebx), + "c" (smi_cmd->ecx) + : "memory" + ); + return 0; +} + /** * smi_request: generate SMI request * @@ -244,9 +261,6 @@ static ssize_t host_control_on_shutdown_store(struct device *dev, */ static int smi_request(struct smi_cmd *smi_cmd) { - cpumask_t old_mask; - int ret = 0; - if (smi_cmd->magic != SMI_CMD_MAGIC) { dev_info(&dcdbas_pdev->dev, "%s: invalid magic value\n", __func__); @@ -254,29 +268,7 @@ static int smi_request(struct smi_cmd *smi_cmd) } /* SMI requires CPU 0 */ - old_mask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(0)); - if (smp_processor_id() != 0) { - dev_dbg(&dcdbas_pdev->dev, "%s: failed to get CPU 0\n", - __func__); - ret = -EBUSY; - goto out; - } - - /* generate SMI */ - asm volatile ( - "outb %b0,%w1" - : /* no output args */ - : "a" (smi_cmd->command_code), - "d" (smi_cmd->command_address), - "b" (smi_cmd->ebx), - "c" (smi_cmd->ecx) - : "memory" - ); - -out: - set_cpus_allowed_ptr(current, &old_mask); - return ret; + return work_on_cpu(0, generate_smi, smi_cmd); } /** diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index 757035ea246f..3128a5090dbd 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -659,12 +659,12 @@ static inline int find_next_online_cpu(struct ehca_comp_pool *pool) WARN_ON_ONCE(!in_interrupt()); if (ehca_debug_level >= 3) - ehca_dmp(&cpu_online_map, sizeof(cpumask_t), ""); + ehca_dmp(cpu_online_mask, cpumask_size(), ""); spin_lock_irqsave(&pool->last_cpu_lock, flags); - cpu = next_cpu_nr(pool->last_cpu, cpu_online_map); + cpu = cpumask_next(pool->last_cpu, cpu_online_mask); if (cpu >= nr_cpu_ids) - cpu = first_cpu(cpu_online_map); + cpu = cpumask_first(cpu_online_mask); pool->last_cpu = cpu; spin_unlock_irqrestore(&pool->last_cpu_lock, flags); @@ -855,7 +855,7 @@ static int __cpuinit comp_pool_callback(struct notifier_block *nfb, case CPU_UP_CANCELED_FROZEN: ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu); cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - kthread_bind(cct->task, any_online_cpu(cpu_online_map)); + kthread_bind(cct->task, cpumask_any(cpu_online_mask)); destroy_comp_task(pool, cpu); break; case CPU_ONLINE: @@ -902,7 +902,7 @@ int ehca_create_comp_pool(void) return -ENOMEM; spin_lock_init(&pool->last_cpu_lock); - pool->last_cpu = any_online_cpu(cpu_online_map); + pool->last_cpu = cpumask_any(cpu_online_mask); pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task); if (pool->cpu_comp_tasks == NULL) { @@ -934,10 +934,9 @@ void ehca_destroy_comp_pool(void) unregister_hotcpu_notifier(&comp_pool_callback_nb); - for (i = 0; i < NR_CPUS; i++) { - if (cpu_online(i)) - destroy_comp_task(pool, i); - } + for_each_online_cpu(i) + destroy_comp_task(pool, i); + free_percpu(pool->cpu_comp_tasks); kfree(pool); } diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index 1af1f3a907c6..b4c2b98527b2 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -1674,7 +1674,7 @@ static int find_best_unit(struct file *fp, * InfiniPath chip to that processor (we assume reasonable connectivity, * for now). This code assumes that if affinity has been set * before this point, that at most one cpu is set; for now this - * is reasonable. I check for both cpus_empty() and cpus_full(), + * is reasonable. I check for both cpumask_empty() and cpumask_full(), * in case some kernel variant sets none of the bits when no * affinity is set. 2.6.11 and 12 kernels have all present * cpus set. Some day we'll have to fix it up further to handle @@ -1683,11 +1683,11 @@ static int find_best_unit(struct file *fp, * information. There may be some issues with dual core numbering * as well. This needs more work prior to release. */ - if (!cpus_empty(current->cpus_allowed) && - !cpus_full(current->cpus_allowed)) { + if (!cpumask_empty(¤t->cpus_allowed) && + !cpumask_full(¤t->cpus_allowed)) { int ncpus = num_online_cpus(), curcpu = -1, nset = 0; for (i = 0; i < ncpus; i++) - if (cpu_isset(i, current->cpus_allowed)) { + if (cpumask_test_cpu(i, ¤t->cpus_allowed)) { ipath_cdbg(PROC, "%s[%u] affinity set for " "cpu %d/%d\n", current->comm, current->pid, i, ncpus); diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 5faefeaf6790..f2c641e0bdde 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -164,7 +164,7 @@ void copy_gdt(const struct lg_cpu *cpu, struct desc_struct *gdt); void copy_gdt_tls(const struct lg_cpu *cpu, struct desc_struct *gdt); /* page_tables.c: */ -int init_guest_pagetable(struct lguest *lg, unsigned long pgtable); +int init_guest_pagetable(struct lguest *lg); void free_guest_pagetable(struct lguest *lg); void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable); void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 i); diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c index a661bbdae3d6..b02f6bcb64c4 100644 --- a/drivers/lguest/lguest_device.c +++ b/drivers/lguest/lguest_device.c @@ -250,7 +250,7 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev, /* Figure out how many pages the ring will take, and map that memory */ lvq->pages = lguest_map((unsigned long)lvq->config.pfn << PAGE_SHIFT, DIV_ROUND_UP(vring_size(lvq->config.num, - PAGE_SIZE), + LGUEST_VRING_ALIGN), PAGE_SIZE)); if (!lvq->pages) { err = -ENOMEM; @@ -259,8 +259,8 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev, /* OK, tell virtio_ring.c to set up a virtqueue now we know its size * and we've got a pointer to its pages. */ - vq = vring_new_virtqueue(lvq->config.num, vdev, lvq->pages, - lg_notify, callback); + vq = vring_new_virtqueue(lvq->config.num, LGUEST_VRING_ALIGN, + vdev, lvq->pages, lg_notify, callback); if (!vq) { err = -ENOMEM; goto unmap; diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index e73a000473cc..34bc017b8b3c 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c @@ -146,7 +146,7 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip) return 0; } -/*L:020 The initialization write supplies 4 pointer sized (32 or 64 bit) +/*L:020 The initialization write supplies 3 pointer sized (32 or 64 bit) * values (in addition to the LHREQ_INITIALIZE value). These are: * * base: The start of the Guest-physical memory inside the Launcher memory. @@ -155,9 +155,6 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip) * allowed to access. The Guest memory lives inside the Launcher, so it sets * this to ensure the Guest can only reach its own memory. * - * pgdir: The (Guest-physical) address of the top of the initial Guest - * pagetables (which are set up by the Launcher). - * * start: The first instruction to execute ("eip" in x86-speak). */ static int initialize(struct file *file, const unsigned long __user *input) @@ -166,7 +163,7 @@ static int initialize(struct file *file, const unsigned long __user *input) * Guest. */ struct lguest *lg; int err; - unsigned long args[4]; + unsigned long args[3]; /* We grab the Big Lguest lock, which protects against multiple * simultaneous initializations. */ @@ -192,14 +189,14 @@ static int initialize(struct file *file, const unsigned long __user *input) lg->mem_base = (void __user *)args[0]; lg->pfn_limit = args[1]; - /* This is the first cpu (cpu 0) and it will start booting at args[3] */ - err = lg_cpu_start(&lg->cpus[0], 0, args[3]); + /* This is the first cpu (cpu 0) and it will start booting at args[2] */ + err = lg_cpu_start(&lg->cpus[0], 0, args[2]); if (err) goto release_guest; /* Initialize the Guest's shadow page tables, using the toplevel * address the Launcher gave us. This allocates memory, so can fail. */ - err = init_guest_pagetable(lg, args[2]); + err = init_guest_pagetable(lg); if (err) goto free_regs; diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index 81d0c6053447..576a8318221c 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c @@ -14,6 +14,7 @@ #include <linux/percpu.h> #include <asm/tlbflush.h> #include <asm/uaccess.h> +#include <asm/bootparam.h> #include "lg.h" /*M:008 We hold reference to pages, which prevents them from being swapped. @@ -581,15 +582,82 @@ void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 idx) release_pgd(lg, lg->pgdirs[pgdir].pgdir + idx); } +/* Once we know how much memory we have we can construct simple identity + * (which set virtual == physical) and linear mappings + * which will get the Guest far enough into the boot to create its own. + * + * We lay them out of the way, just below the initrd (which is why we need to + * know its size here). */ +static unsigned long setup_pagetables(struct lguest *lg, + unsigned long mem, + unsigned long initrd_size) +{ + pgd_t __user *pgdir; + pte_t __user *linear; + unsigned int mapped_pages, i, linear_pages, phys_linear; + unsigned long mem_base = (unsigned long)lg->mem_base; + + /* We have mapped_pages frames to map, so we need + * linear_pages page tables to map them. */ + mapped_pages = mem / PAGE_SIZE; + linear_pages = (mapped_pages + PTRS_PER_PTE - 1) / PTRS_PER_PTE; + + /* We put the toplevel page directory page at the top of memory. */ + pgdir = (pgd_t *)(mem + mem_base - initrd_size - PAGE_SIZE); + + /* Now we use the next linear_pages pages as pte pages */ + linear = (void *)pgdir - linear_pages * PAGE_SIZE; + + /* Linear mapping is easy: put every page's address into the + * mapping in order. */ + for (i = 0; i < mapped_pages; i++) { + pte_t pte; + pte = pfn_pte(i, __pgprot(_PAGE_PRESENT|_PAGE_RW|_PAGE_USER)); + if (copy_to_user(&linear[i], &pte, sizeof(pte)) != 0) + return -EFAULT; + } + + /* The top level points to the linear page table pages above. + * We setup the identity and linear mappings here. */ + phys_linear = (unsigned long)linear - mem_base; + for (i = 0; i < mapped_pages; i += PTRS_PER_PTE) { + pgd_t pgd; + pgd = __pgd((phys_linear + i * sizeof(pte_t)) | + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER)); + + if (copy_to_user(&pgdir[i / PTRS_PER_PTE], &pgd, sizeof(pgd)) + || copy_to_user(&pgdir[pgd_index(PAGE_OFFSET) + + i / PTRS_PER_PTE], + &pgd, sizeof(pgd))) + return -EFAULT; + } + + /* We return the top level (guest-physical) address: remember where + * this is. */ + return (unsigned long)pgdir - mem_base; +} + /*H:500 (vii) Setting up the page tables initially. * * When a Guest is first created, the Launcher tells us where the toplevel of * its first page table is. We set some things up here: */ -int init_guest_pagetable(struct lguest *lg, unsigned long pgtable) +int init_guest_pagetable(struct lguest *lg) { + u64 mem; + u32 initrd_size; + struct boot_params __user *boot = (struct boot_params *)lg->mem_base; + + /* Get the Guest memory size and the ramdisk size from the boot header + * located at lg->mem_base (Guest address 0). */ + if (copy_from_user(&mem, &boot->e820_map[0].size, sizeof(mem)) + || get_user(initrd_size, &boot->hdr.ramdisk_size)) + return -EFAULT; + /* We start on the first shadow page table, and give it a blank PGD * page. */ - lg->pgdirs[0].gpgdir = pgtable; + lg->pgdirs[0].gpgdir = setup_pagetables(lg, mem, initrd_size); + if (IS_ERR_VALUE(lg->pgdirs[0].gpgdir)) + return lg->pgdirs[0].gpgdir; lg->pgdirs[0].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL); if (!lg->pgdirs[0].pgdir) return -ENOMEM; diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 19f924429591..ef7ba89b3bd1 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -16,6 +16,7 @@ #include <linux/string.h> #include <linux/slab.h> #include <linux/sched.h> +#include <linux/cpu.h> #include "pci.h" /* @@ -185,32 +186,43 @@ static const struct pci_device_id *pci_match_device(struct pci_driver *drv, return pci_match_id(drv->id_table, dev); } +struct drv_dev_and_id { + struct pci_driver *drv; + struct pci_dev *dev; + const struct pci_device_id *id; +}; + +static long local_pci_probe(void *_ddi) +{ + struct drv_dev_and_id *ddi = _ddi; + + return ddi->drv->probe(ddi->dev, ddi->id); +} + static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev, const struct pci_device_id *id) { - int error; -#ifdef CONFIG_NUMA - /* Execute driver initialization on node where the - device's bus is attached to. This way the driver likely - allocates its local memory on the right node without - any need to change it. */ - struct mempolicy *oldpol; - cpumask_t oldmask = current->cpus_allowed; - int node = dev_to_node(&dev->dev); + int error, node; + struct drv_dev_and_id ddi = { drv, dev, id }; + /* Execute driver initialization on node where the device's + bus is attached to. This way the driver likely allocates + its local memory on the right node without any need to + change it. */ + node = dev_to_node(&dev->dev); if (node >= 0) { + int cpu; node_to_cpumask_ptr(nodecpumask, node); - set_cpus_allowed_ptr(current, nodecpumask); - } - /* And set default memory allocation policy */ - oldpol = current->mempolicy; - current->mempolicy = NULL; /* fall back to system default policy */ -#endif - error = drv->probe(dev, id); -#ifdef CONFIG_NUMA - set_cpus_allowed_ptr(current, &oldmask); - current->mempolicy = oldpol; -#endif + + get_online_cpus(); + cpu = cpumask_any_and(nodecpumask, cpu_online_mask); + if (cpu < nr_cpu_ids) + error = work_on_cpu(cpu, local_pci_probe, &ddi); + else + error = local_pci_probe(&ddi); + put_online_cpus(); + } else + error = local_pci_probe(&ddi); return error; } diff --git a/drivers/pnp/pnpbios/bioscalls.c b/drivers/pnp/pnpbios/bioscalls.c index 7ff824496b39..7e6b5a3b3281 100644 --- a/drivers/pnp/pnpbios/bioscalls.c +++ b/drivers/pnp/pnpbios/bioscalls.c @@ -481,7 +481,7 @@ void pnpbios_calls_init(union pnp_bios_install_struct *header) set_base(bad_bios_desc, __va((unsigned long)0x40 << 4)); _set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4)); - for (i = 0; i < NR_CPUS; i++) { + for_each_possible_cpu(i) { struct desc_struct *gdt = get_cpu_gdt_table(i); if (!gdt) continue; diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c index 3d442444c618..c79cf0515374 100644 --- a/drivers/s390/kvm/kvm_virtio.c +++ b/drivers/s390/kvm/kvm_virtio.c @@ -188,11 +188,13 @@ static struct virtqueue *kvm_find_vq(struct virtio_device *vdev, config = kvm_vq_config(kdev->desc)+index; err = vmem_add_mapping(config->address, - vring_size(config->num, PAGE_SIZE)); + vring_size(config->num, + KVM_S390_VIRTIO_RING_ALIGN)); if (err) goto out; - vq = vring_new_virtqueue(config->num, vdev, (void *) config->address, + vq = vring_new_virtqueue(config->num, KVM_S390_VIRTIO_RING_ALIGN, + vdev, (void *) config->address, kvm_notify, callback); if (!vq) { err = -ENOMEM; @@ -209,7 +211,8 @@ static struct virtqueue *kvm_find_vq(struct virtio_device *vdev, return vq; unmap: vmem_remove_mapping(config->address, - vring_size(config->num, PAGE_SIZE)); + vring_size(config->num, + KVM_S390_VIRTIO_RING_ALIGN)); out: return ERR_PTR(err); } @@ -220,7 +223,8 @@ static void kvm_del_vq(struct virtqueue *vq) vring_del_virtqueue(vq); vmem_remove_mapping(config->address, - vring_size(config->num, PAGE_SIZE)); + vring_size(config->num, + KVM_S390_VIRTIO_RING_ALIGN)); } /* @@ -295,13 +299,29 @@ static void scan_devices(void) */ static void kvm_extint_handler(u16 code) { - void *data = (void *) *(long *) __LC_PFAULT_INTPARM; - u16 subcode = S390_lowcore.cpu_addr; + struct virtqueue *vq; + u16 subcode; + int config_changed; + subcode = S390_lowcore.cpu_addr; if ((subcode & 0xff00) != VIRTIO_SUBCODE_64) return; - vring_interrupt(0, data); + /* The LSB might be overloaded, we have to mask it */ + vq = (struct virtqueue *) ((*(long *) __LC_PFAULT_INTPARM) & ~1UL); + + /* We use the LSB of extparam, to decide, if this interrupt is a config + * change or a "standard" interrupt */ + config_changed = (*(int *) __LC_EXT_PARAMS & 1); + + if (config_changed) { + struct virtio_driver *drv; + drv = container_of(vq->vdev->dev.driver, + struct virtio_driver, driver); + if (drv->config_changed) + drv->config_changed(vq->vdev); + } else + vring_interrupt(0, vq); } /* diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 5b78fd0aff0a..018c070a357f 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -176,7 +176,7 @@ int register_virtio_device(struct virtio_device *dev) /* Assign a unique device index and hence name. */ dev->index = dev_index++; - sprintf(dev->dev.bus_id, "virtio%u", dev->index); + dev_set_name(&dev->dev, "virtio%u", dev->index); /* We always start by resetting the device, in case a previous * driver messed it up. This also tests that code path a little. */ diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 62eab43152d2..59268266b79a 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -56,6 +56,15 @@ static struct virtio_device_id id_table[] = { { 0 }, }; +static u32 page_to_balloon_pfn(struct page *page) +{ + unsigned long pfn = page_to_pfn(page); + + BUILD_BUG_ON(PAGE_SHIFT < VIRTIO_BALLOON_PFN_SHIFT); + /* Convert pfn from Linux page size to balloon page size. */ + return pfn >> (PAGE_SHIFT - VIRTIO_BALLOON_PFN_SHIFT); +} + static void balloon_ack(struct virtqueue *vq) { struct virtio_balloon *vb; @@ -99,7 +108,7 @@ static void fill_balloon(struct virtio_balloon *vb, size_t num) msleep(200); break; } - vb->pfns[vb->num_pfns] = page_to_pfn(page); + vb->pfns[vb->num_pfns] = page_to_balloon_pfn(page); totalram_pages--; vb->num_pages++; list_add(&page->lru, &vb->pages); @@ -132,7 +141,7 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num) for (vb->num_pfns = 0; vb->num_pfns < num; vb->num_pfns++) { page = list_first_entry(&vb->pages, struct page, lru); list_del(&page->lru); - vb->pfns[vb->num_pfns] = page_to_pfn(page); + vb->pfns[vb->num_pfns] = page_to_balloon_pfn(page); vb->num_pages--; } diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index c7dc37c7cce9..7462a51e820b 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -75,7 +75,7 @@ MODULE_DEVICE_TABLE(pci, virtio_pci_id_table); * would make more sense for virtio to not insist on having it's own device. */ static struct device virtio_pci_root = { .parent = NULL, - .bus_id = "virtio-pci", + .init_name = "virtio-pci", }; /* Convert a generic virtio device to our structure */ @@ -216,7 +216,7 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_vq_info *info; struct virtqueue *vq; - unsigned long flags; + unsigned long flags, size; u16 num; int err; @@ -237,19 +237,20 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, info->queue_index = index; info->num = num; - info->queue = kzalloc(PAGE_ALIGN(vring_size(num,PAGE_SIZE)), GFP_KERNEL); + size = PAGE_ALIGN(vring_size(num, VIRTIO_PCI_VRING_ALIGN)); + info->queue = alloc_pages_exact(size, GFP_KERNEL|__GFP_ZERO); if (info->queue == NULL) { err = -ENOMEM; goto out_info; } /* activate the queue */ - iowrite32(virt_to_phys(info->queue) >> PAGE_SHIFT, + iowrite32(virt_to_phys(info->queue) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); /* create the vring */ - vq = vring_new_virtqueue(info->num, vdev, info->queue, - vp_notify, callback); + vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN, + vdev, info->queue, vp_notify, callback); if (!vq) { err = -ENOMEM; goto out_activate_queue; @@ -266,7 +267,7 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, out_activate_queue: iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); - kfree(info->queue); + free_pages_exact(info->queue, size); out_info: kfree(info); return ERR_PTR(err); @@ -277,7 +278,7 @@ static void vp_del_vq(struct virtqueue *vq) { struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); struct virtio_pci_vq_info *info = vq->priv; - unsigned long flags; + unsigned long flags, size; spin_lock_irqsave(&vp_dev->lock, flags); list_del(&info->node); @@ -289,7 +290,8 @@ static void vp_del_vq(struct virtqueue *vq) iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); - kfree(info->queue); + size = PAGE_ALIGN(vring_size(info->num, VIRTIO_PCI_VRING_ALIGN)); + free_pages_exact(info->queue, size); kfree(info); } @@ -357,7 +359,7 @@ static int __devinit virtio_pci_probe(struct pci_dev *pci_dev, /* register a handler for the queue with the PCI device's interrupt */ err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED, - vp_dev->vdev.dev.bus_id, vp_dev); + dev_name(&vp_dev->vdev.dev), vp_dev); if (err) goto out_set_drvdata; diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 6eb5303fed11..5777196bf6c9 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -274,6 +274,7 @@ static struct virtqueue_ops vring_vq_ops = { }; struct virtqueue *vring_new_virtqueue(unsigned int num, + unsigned int vring_align, struct virtio_device *vdev, void *pages, void (*notify)(struct virtqueue *), @@ -292,7 +293,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int num, if (!vq) return NULL; - vring_init(&vq->vring, num, pages, PAGE_SIZE); + vring_init(&vq->vring, num, pages, vring_align); vq->vq.callback = callback; vq->vq.vdev = vdev; vq->vq.vq_ops = &vring_vq_ops; diff --git a/include/asm-generic/topology.h b/include/asm-generic/topology.h index 54bbf6e04ee8..0e9e2bc0ee96 100644 --- a/include/asm-generic/topology.h +++ b/include/asm-generic/topology.h @@ -40,6 +40,9 @@ #ifndef node_to_cpumask #define node_to_cpumask(node) ((void)node, cpu_online_map) #endif +#ifndef cpumask_of_node +#define cpumask_of_node(node) ((void)node, cpu_online_mask) +#endif #ifndef node_to_first_cpu #define node_to_first_cpu(node) ((void)(node),0) #endif @@ -54,9 +57,18 @@ ) #endif +#ifndef cpumask_of_pcibus +#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \ + cpu_all_mask : \ + cpumask_of_node(pcibus_to_node(bus))) +#endif + #endif /* CONFIG_NUMA */ -/* returns pointer to cpumask for specified node */ +/* + * returns pointer to cpumask for specified node + * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)" + */ #ifndef node_to_cpumask_ptr #define node_to_cpumask_ptr(v, node) \ diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index a08c33a26ca9..2878811c6134 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -137,9 +137,12 @@ extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits); (1UL<<((nbits) % BITS_PER_LONG))-1 : ~0UL \ ) +#define small_const_nbits(nbits) \ + (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG) + static inline void bitmap_zero(unsigned long *dst, int nbits) { - if (nbits <= BITS_PER_LONG) + if (small_const_nbits(nbits)) *dst = 0UL; else { int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); @@ -150,7 +153,7 @@ static inline void bitmap_zero(unsigned long *dst, int nbits) static inline void bitmap_fill(unsigned long *dst, int nbits) { size_t nlongs = BITS_TO_LONGS(nbits); - if (nlongs > 1) { + if (!small_const_nbits(nbits)) { int len = (nlongs - 1) * sizeof(unsigned long); memset(dst, 0xff, len); } @@ -160,7 +163,7 @@ static inline void bitmap_fill(unsigned long *dst, int nbits) static inline void bitmap_copy(unsigned long *dst, const unsigned long *src, int nbits) { - if (nbits <= BITS_PER_LONG) + if (small_const_nbits(nbits)) *dst = *src; else { int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); @@ -171,7 +174,7 @@ static inline void bitmap_copy(unsigned long *dst, const unsigned long *src, static inline void bitmap_and(unsigned long *dst, const unsigned long *src1, const unsigned long *src2, int nbits) { - if (nbits <= BITS_PER_LONG) + if (small_const_nbits(nbits)) *dst = *src1 & *src2; else __bitmap_and(dst, src1, src2, nbits); @@ -180,7 +183,7 @@ static inline void bitmap_and(unsigned long *dst, const unsigned long *src1, static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, const unsigned long *src2, int nbits) { - if (nbits <= BITS_PER_LONG) + if (small_const_nbits(nbits)) *dst = *src1 | *src2; else __bitmap_or(dst, src1, src2, nbits); @@ -189,7 +192,7 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1, const unsigned long *src2, int nbits) { - if (nbits <= BITS_PER_LONG) + if (small_const_nbits(nbits)) *dst = *src1 ^ *src2; else __bitmap_xor(dst, src1, src2, nbits); @@ -198,7 +201,7 @@ static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1, static inline void bitmap_andnot(unsigned long *dst, const unsigned long *src1, const unsigned long *src2, int nbits) { - if (nbits <= BITS_PER_LONG) + if (small_const_nbits(nbits)) *dst = *src1 & ~(*src2); else __bitmap_andnot(dst, src1, src2, nbits); @@ -207,7 +210,7 @@ static inline void bitmap_andnot(unsigned long *dst, const unsigned long *src1, static inline void bitmap_complement(unsigned long *dst, const unsigned long *src, int nbits) { - if (nbits <= BITS_PER_LONG) + if (small_const_nbits(nbits)) *dst = ~(*src) & BITMAP_LAST_WORD_MASK(nbits); else __bitmap_complement(dst, src, nbits); @@ -216,7 +219,7 @@ static inline void bitmap_complement(unsigned long *dst, const unsigned long *sr static inline int bitmap_equal(const unsigned long *src1, const unsigned long *src2, int nbits) { - if (nbits <= BITS_PER_LONG) + if (small_const_nbits(nbits)) return ! ((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits)); else return __bitmap_equal(src1, src2, nbits); @@ -225,7 +228,7 @@ static inline int bitmap_equal(const unsigned long *src1, static inline int bitmap_intersects(const unsigned long *src1, const unsigned long *src2, int nbits) { - if (nbits <= BITS_PER_LONG) + if (small_const_nbits(nbits)) return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0; else return __bitmap_intersects(src1, src2, nbits); @@ -234,7 +237,7 @@ static inline int bitmap_intersects(const unsigned long *src1, static inline int bitmap_subset(const unsigned long *src1, const unsigned long *src2, int nbits) { - if (nbits <= BITS_PER_LONG) + if (small_const_nbits(nbits)) return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits)); else return __bitmap_subset(src1, src2, nbits); @@ -242,7 +245,7 @@ static inline int bitmap_subset(const unsigned long *src1, static inline int bitmap_empty(const unsigned long *src, int nbits) { - if (nbits <= BITS_PER_LONG) + if (small_const_nbits(nbits)) return ! (*src & BITMAP_LAST_WORD_MASK(nbits)); else return __bitmap_empty(src, nbits); @@ -250,7 +253,7 @@ static inline int bitmap_empty(const unsigned long *src, int nbits) static inline int bitmap_full(const unsigned long *src, int nbits) { - if (nbits <= BITS_PER_LONG) + if (small_const_nbits(nbits)) return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits)); else return __bitmap_full(src, nbits); @@ -258,7 +261,7 @@ static inline int bitmap_full(const unsigned long *src, int nbits) static inline int bitmap_weight(const unsigned long *src, int nbits) { - if (nbits <= BITS_PER_LONG) + if (small_const_nbits(nbits)) return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits)); return __bitmap_weight(src, nbits); } @@ -266,7 +269,7 @@ static inline int bitmap_weight(const unsigned long *src, int nbits) static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *src, int n, int nbits) { - if (nbits <= BITS_PER_LONG) + if (small_const_nbits(nbits)) *dst = *src >> n; else __bitmap_shift_right(dst, src, n, nbits); @@ -275,7 +278,7 @@ static inline void bitmap_shift_right(unsigned long *dst, static inline void bitmap_shift_left(unsigned long *dst, const unsigned long *src, int n, int nbits) { - if (nbits <= BITS_PER_LONG) + if (small_const_nbits(nbits)) *dst = (*src << n) & BITMAP_LAST_WORD_MASK(nbits); else __bitmap_shift_left(dst, src, n, nbits); diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 94a2ab88ae85..5ac47bcf4b28 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -416,65 +416,54 @@ int __next_cpu_nr(int n, const cpumask_t *srcp); /* * The following particular system cpumasks and operations manage - * possible, present, active and online cpus. Each of them is a fixed size - * bitmap of size NR_CPUS. + * possible, present, active and online cpus. * - * #ifdef CONFIG_HOTPLUG_CPU - * cpu_possible_map - has bit 'cpu' set iff cpu is populatable - * cpu_present_map - has bit 'cpu' set iff cpu is populated - * cpu_online_map - has bit 'cpu' set iff cpu available to scheduler - * cpu_active_map - has bit 'cpu' set iff cpu available to migration - * #else - * cpu_possible_map - has bit 'cpu' set iff cpu is populated - * cpu_present_map - copy of cpu_possible_map - * cpu_online_map - has bit 'cpu' set iff cpu available to scheduler - * #endif + * cpu_possible_mask- has bit 'cpu' set iff cpu is populatable + * cpu_present_mask - has bit 'cpu' set iff cpu is populated + * cpu_online_mask - has bit 'cpu' set iff cpu available to scheduler + * cpu_active_mask - has bit 'cpu' set iff cpu available to migration * - * In either case, NR_CPUS is fixed at compile time, as the static - * size of these bitmaps. The cpu_possible_map is fixed at boot - * time, as the set of CPU id's that it is possible might ever - * be plugged in at anytime during the life of that system boot. - * The cpu_present_map is dynamic(*), representing which CPUs - * are currently plugged in. And cpu_online_map is the dynamic - * subset of cpu_present_map, indicating those CPUs available - * for scheduling. + * If !CONFIG_HOTPLUG_CPU, present == possible, and active == online. * - * If HOTPLUG is enabled, then cpu_possible_map is forced to have + * The cpu_possible_mask is fixed at boot time, as the set of CPU id's + * that it is possible might ever be plugged in at anytime during the + * life of that system boot. The cpu_present_mask is dynamic(*), + * representing which CPUs are currently plugged in. And + * cpu_online_mask is the dynamic subset of cpu_present_mask, + * indicating those CPUs available for scheduling. + * + * If HOTPLUG is enabled, then cpu_possible_mask is forced to have * all NR_CPUS bits set, otherwise it is just the set of CPUs that * ACPI reports present at boot. * - * If HOTPLUG is enabled, then cpu_present_map varies dynamically, + * If HOTPLUG is enabled, then cpu_present_mask varies dynamically, * depending on what ACPI reports as currently plugged in, otherwise - * cpu_present_map is just a copy of cpu_possible_map. + * cpu_present_mask is just a copy of cpu_possible_mask. * - * (*) Well, cpu_present_map is dynamic in the hotplug case. If not - * hotplug, it's a copy of cpu_possible_map, hence fixed at boot. + * (*) Well, cpu_present_mask is dynamic in the hotplug case. If not + * hotplug, it's a copy of cpu_possible_mask, hence fixed at boot. * * Subtleties: * 1) UP arch's (NR_CPUS == 1, CONFIG_SMP not defined) hardcode * assumption that their single CPU is online. The UP - * cpu_{online,possible,present}_maps are placebos. Changing them + * cpu_{online,possible,present}_masks are placebos. Changing them * will have no useful affect on the following num_*_cpus() * and cpu_*() macros in the UP case. This ugliness is a UP * optimization - don't waste any instructions or memory references * asking if you're online or how many CPUs there are if there is * only one CPU. - * 2) Most SMP arch's #define some of these maps to be some - * other map specific to that arch. Therefore, the following - * must be #define macros, not inlines. To see why, examine - * the assembly code produced by the following. Note that - * set1() writes phys_x_map, but set2() writes x_map: - * int x_map, phys_x_map; - * #define set1(a) x_map = a - * inline void set2(int a) { x_map = a; } - * #define x_map phys_x_map - * main(){ set1(3); set2(5); } */ -extern cpumask_t cpu_possible_map; -extern cpumask_t cpu_online_map; -extern cpumask_t cpu_present_map; -extern cpumask_t cpu_active_map; +extern const struct cpumask *const cpu_possible_mask; +extern const struct cpumask *const cpu_online_mask; +extern const struct cpumask *const cpu_present_mask; +extern const struct cpumask *const cpu_active_mask; + +/* These strip const, as traditionally they weren't const. */ +#define cpu_possible_map (*(cpumask_t *)cpu_possible_mask) +#define cpu_online_map (*(cpumask_t *)cpu_online_mask) +#define cpu_present_map (*(cpumask_t *)cpu_present_mask) +#define cpu_active_map (*(cpumask_t *)cpu_active_mask) #if NR_CPUS > 1 #define num_online_cpus() cpus_weight_nr(cpu_online_map) @@ -496,10 +485,6 @@ extern cpumask_t cpu_active_map; #define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) -#define for_each_possible_cpu(cpu) for_each_cpu_mask_nr((cpu), cpu_possible_map) -#define for_each_online_cpu(cpu) for_each_cpu_mask_nr((cpu), cpu_online_map) -#define for_each_present_cpu(cpu) for_each_cpu_mask_nr((cpu), cpu_present_map) - /* These are the new versions of the cpumask operators: passed by pointer. * The older versions will be implemented in terms of these, then deleted. */ #define cpumask_bits(maskp) ((maskp)->bits) @@ -1048,12 +1033,6 @@ static inline void free_bootmem_cpumask_var(cpumask_var_t mask) } #endif /* CONFIG_CPUMASK_OFFSTACK */ -/* The pointer versions of the maps, these will become the primary versions. */ -#define cpu_possible_mask ((const struct cpumask *)&cpu_possible_map) -#define cpu_online_mask ((const struct cpumask *)&cpu_online_map) -#define cpu_present_mask ((const struct cpumask *)&cpu_present_map) -#define cpu_active_mask ((const struct cpumask *)&cpu_active_map) - /* It's common to want to use cpu_all_mask in struct member initializers, * so it has to refer to an address rather than a pointer. */ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS); @@ -1062,6 +1041,10 @@ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS); /* First bits of cpu_bit_bitmap are in fact unset. */ #define cpu_none_mask to_cpumask(cpu_bit_bitmap[0]) +#define for_each_possible_cpu(cpu) for_each_cpu((cpu), cpu_possible_mask) +#define for_each_online_cpu(cpu) for_each_cpu((cpu), cpu_online_mask) +#define for_each_present_cpu(cpu) for_each_cpu((cpu), cpu_present_mask) + /* Wrappers for arch boot code to manipulate normally-constant masks */ static inline void set_cpu_possible(unsigned int cpu, bool possible) { diff --git a/include/linux/lguest_launcher.h b/include/linux/lguest_launcher.h index e7217dc58f39..a53407a4165c 100644 --- a/include/linux/lguest_launcher.h +++ b/include/linux/lguest_launcher.h @@ -54,9 +54,13 @@ struct lguest_vqconfig { /* Write command first word is a request. */ enum lguest_req { - LHREQ_INITIALIZE, /* + base, pfnlimit, pgdir, start */ + LHREQ_INITIALIZE, /* + base, pfnlimit, start */ LHREQ_GETDMA, /* No longer used */ LHREQ_IRQ, /* + irq */ LHREQ_BREAK, /* + on/off flag (on blocks until someone does off) */ }; + +/* The alignment to use between consumer and producer parts of vring. + * x86 pagesize for historical reasons. */ +#define LGUEST_VRING_ALIGN 4096 #endif /* _LINUX_LGUEST_LAUNCHER */ diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index b3dfa72f13b9..389fc4a99419 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -53,7 +53,7 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root, int seq_bitmap(struct seq_file *m, unsigned long *bits, unsigned int nr_bits); static inline int seq_cpumask(struct seq_file *m, cpumask_t *mask) { - return seq_bitmap(m, mask->bits, NR_CPUS); + return seq_bitmap(m, mask->bits, nr_cpu_ids); } static inline int seq_nodemask(struct seq_file *m, nodemask_t *mask) diff --git a/include/linux/smp.h b/include/linux/smp.h index 3f9a60043a97..e467fc940e81 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -64,15 +64,16 @@ extern void smp_cpus_done(unsigned int max_cpus); * Call a function on all other processors */ int smp_call_function(void(*func)(void *info), void *info, int wait); -/* Deprecated: use smp_call_function_many() which uses a cpumask ptr. */ -int smp_call_function_mask(cpumask_t mask, void(*func)(void *info), void *info, - int wait); +void smp_call_function_many(const struct cpumask *mask, + void (*func)(void *info), void *info, bool wait); -static inline void smp_call_function_many(const struct cpumask *mask, - void (*func)(void *info), void *info, - int wait) +/* Deprecated: Use smp_call_function_many which takes a pointer to the mask. */ +static inline int +smp_call_function_mask(cpumask_t mask, void(*func)(void *info), void *info, + int wait) { - smp_call_function_mask(*mask, func, info, wait); + smp_call_function_many(&mask, func, info, wait); + return 0; } int smp_call_function_single(int cpuid, void (*func) (void *info), void *info, @@ -146,6 +147,8 @@ static inline void smp_send_reschedule(int cpu) { } }) #define smp_call_function_mask(mask, func, info, wait) \ (up_smp_call_function(func, info)) +#define smp_call_function_many(mask, func, info, wait) \ + (up_smp_call_function(func, info)) static inline void init_call_single_data(void) { } diff --git a/include/linux/threads.h b/include/linux/threads.h index 38d1a5d6568e..052b12bec8bd 100644 --- a/include/linux/threads.h +++ b/include/linux/threads.h @@ -8,17 +8,17 @@ */ /* - * Maximum supported processors that can run under SMP. This value is - * set via configure setting. The maximum is equal to the size of the - * bitmasks used on that platform, i.e. 32 or 64. Setting this smaller - * saves quite a bit of memory. + * Maximum supported processors. Setting this smaller saves quite a + * bit of memory. Use nr_cpu_ids instead of this except for static bitmaps. */ -#ifdef CONFIG_SMP -#define NR_CPUS CONFIG_NR_CPUS -#else -#define NR_CPUS 1 +#ifndef CONFIG_NR_CPUS +/* FIXME: This should be fixed in the arch's Kconfig */ +#define CONFIG_NR_CPUS 1 #endif +/* Places which use this should consider cpumask_var_t. */ +#define NR_CPUS CONFIG_NR_CPUS + #define MIN_THREADS_LEFT_FOR_ROOT 4 /* diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h index c30c7bfbf39b..8726ff77763e 100644 --- a/include/linux/virtio_balloon.h +++ b/include/linux/virtio_balloon.h @@ -10,6 +10,9 @@ /* The feature bitmap for virtio balloon */ #define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */ +/* Size of a PFN in the balloon interface. */ +#define VIRTIO_BALLOON_PFN_SHIFT 12 + struct virtio_balloon_config { /* Number of pages host wants Guest to give up. */ diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h index 19a0da0dba41..7615ffcdd555 100644 --- a/include/linux/virtio_console.h +++ b/include/linux/virtio_console.h @@ -7,6 +7,17 @@ /* The ID for virtio console */ #define VIRTIO_ID_CONSOLE 3 +/* Feature bits */ +#define VIRTIO_CONSOLE_F_SIZE 0 /* Does host provide console size? */ + +struct virtio_console_config { + /* colums of the screens */ + __u16 cols; + /* rows of the screens */ + __u16 rows; +} __attribute__((packed)); + + #ifdef __KERNEL__ int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int)); #endif /* __KERNEL__ */ diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h index cdef35742932..cd0fd5d181a6 100644 --- a/include/linux/virtio_pci.h +++ b/include/linux/virtio_pci.h @@ -53,4 +53,12 @@ /* Virtio ABI version, this must match exactly */ #define VIRTIO_PCI_ABI_VERSION 0 + +/* How many bits to shift physical queue address written to QUEUE_PFN. + * 12 is historical, and due to x86 page size. */ +#define VIRTIO_PCI_QUEUE_ADDR_SHIFT 12 + +/* The alignment to use between consumer and producer parts of vring. + * x86 pagesize again. */ +#define VIRTIO_PCI_VRING_ALIGN 4096 #endif diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index c4a598fb3826..71e03722fb59 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -83,7 +83,7 @@ struct vring { * __u16 avail_idx; * __u16 available[num]; * - * // Padding to the next page boundary. + * // Padding to the next align boundary. * char pad[]; * * // A ring of used descriptor heads with free-running index. @@ -93,19 +93,19 @@ struct vring { * }; */ static inline void vring_init(struct vring *vr, unsigned int num, void *p, - unsigned long pagesize) + unsigned long align) { vr->num = num; vr->desc = p; vr->avail = p + num*sizeof(struct vring_desc); - vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + pagesize-1) - & ~(pagesize - 1)); + vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + align-1) + & ~(align - 1)); } -static inline unsigned vring_size(unsigned int num, unsigned long pagesize) +static inline unsigned vring_size(unsigned int num, unsigned long align) { return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (2 + num) - + pagesize - 1) & ~(pagesize - 1)) + + align - 1) & ~(align - 1)) + sizeof(__u16) * 2 + sizeof(struct vring_used_elem) * num; } @@ -115,6 +115,7 @@ struct virtio_device; struct virtqueue; struct virtqueue *vring_new_virtqueue(unsigned int num, + unsigned int vring_align, struct virtio_device *vdev, void *pages, void (*notify)(struct virtqueue *vq), diff --git a/init/Kconfig b/init/Kconfig index 15f73066aada..ce75d2dff948 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -919,12 +919,6 @@ config MODULE_SRCVERSION_ALL the version). With this option, such a "srcversion" field will be created for all modules. If unsure, say N. -config KMOD - def_bool y - help - This is being removed soon. These days, CONFIG_MODULES - implies CONFIG_KMOD, so use that instead. - endif # MODULES config INIT_ALL_POSSIBLE diff --git a/init/main.c b/init/main.c index 59414b5cf40d..5a819ac2b696 100644 --- a/init/main.c +++ b/init/main.c @@ -528,9 +528,9 @@ static void __init boot_cpu_init(void) { int cpu = smp_processor_id(); /* Mark the boot cpu "present", "online" etc for SMP and UP case */ - cpu_set(cpu, cpu_online_map); - cpu_set(cpu, cpu_present_map); - cpu_set(cpu, cpu_possible_map); + set_cpu_online(cpu, true); + set_cpu_present(cpu, true); + set_cpu_possible(cpu, true); } void __init __weak smp_setup_processor_id(void) diff --git a/kernel/cpu.c b/kernel/cpu.c index bae131a1211b..3ddc509b19c5 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -15,30 +15,8 @@ #include <linux/stop_machine.h> #include <linux/mutex.h> -/* - * Represents all cpu's present in the system - * In systems capable of hotplug, this map could dynamically grow - * as new cpu's are detected in the system via any platform specific - * method, such as ACPI for e.g. - */ -cpumask_t cpu_present_map __read_mostly; -EXPORT_SYMBOL(cpu_present_map); - -/* - * Represents all cpu's that are currently online. - */ -cpumask_t cpu_online_map __read_mostly; -EXPORT_SYMBOL(cpu_online_map); - -#ifdef CONFIG_INIT_ALL_POSSIBLE -cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL; -#else -cpumask_t cpu_possible_map __read_mostly; -#endif -EXPORT_SYMBOL(cpu_possible_map); - #ifdef CONFIG_SMP -/* Serializes the updates to cpu_online_map, cpu_present_map */ +/* Serializes the updates to cpu_online_mask, cpu_present_mask */ static DEFINE_MUTEX(cpu_add_remove_lock); static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain); @@ -65,8 +43,6 @@ void __init cpu_hotplug_init(void) cpu_hotplug.refcount = 0; } -cpumask_t cpu_active_map; - #ifdef CONFIG_HOTPLUG_CPU void get_online_cpus(void) @@ -97,7 +73,7 @@ EXPORT_SYMBOL_GPL(put_online_cpus); /* * The following two API's must be used when attempting - * to serialize the updates to cpu_online_map, cpu_present_map. + * to serialize the updates to cpu_online_mask, cpu_present_mask. */ void cpu_maps_update_begin(void) { @@ -503,3 +479,24 @@ EXPORT_SYMBOL_GPL(cpu_bit_bitmap); const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL; EXPORT_SYMBOL(cpu_all_bits); + +#ifdef CONFIG_INIT_ALL_POSSIBLE +static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly + = CPU_BITS_ALL; +#else +static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly; +#endif +const struct cpumask *const cpu_possible_mask = to_cpumask(cpu_possible_bits); +EXPORT_SYMBOL(cpu_possible_mask); + +static DECLARE_BITMAP(cpu_online_bits, CONFIG_NR_CPUS) __read_mostly; +const struct cpumask *const cpu_online_mask = to_cpumask(cpu_online_bits); +EXPORT_SYMBOL(cpu_online_mask); + +static DECLARE_BITMAP(cpu_present_bits, CONFIG_NR_CPUS) __read_mostly; +const struct cpumask *const cpu_present_mask = to_cpumask(cpu_present_bits); +EXPORT_SYMBOL(cpu_present_mask); + +static DECLARE_BITMAP(cpu_active_bits, CONFIG_NR_CPUS) __read_mostly; +const struct cpumask *const cpu_active_mask = to_cpumask(cpu_active_bits); +EXPORT_SYMBOL(cpu_active_mask); diff --git a/kernel/kexec.c b/kernel/kexec.c index ac0fde7b54d0..3fb855ad6aa0 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1116,7 +1116,7 @@ void crash_save_cpu(struct pt_regs *regs, int cpu) struct elf_prstatus prstatus; u32 *buf; - if ((cpu < 0) || (cpu >= NR_CPUS)) + if ((cpu < 0) || (cpu >= nr_cpu_ids)) return; /* Using ELF notes here is opportunistic. diff --git a/kernel/module.c b/kernel/module.c index f5cd963e8cc4..b7c884bd56ab 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1726,15 +1726,15 @@ static const struct kernel_symbol *lookup_symbol(const char *name, return NULL; } -static int is_exported(const char *name, const struct module *mod) +static int is_exported(const char *name, unsigned long value, + const struct module *mod) { - if (!mod && lookup_symbol(name, __start___ksymtab, __stop___ksymtab)) - return 1; + const struct kernel_symbol *ks; + if (!mod) + ks = lookup_symbol(name, __start___ksymtab, __stop___ksymtab); else - if (mod && lookup_symbol(name, mod->syms, mod->syms + mod->num_syms)) - return 1; - else - return 0; + ks = lookup_symbol(name, mod->syms, mod->syms + mod->num_syms); + return ks != NULL && ks->value == value; } /* As per nm */ @@ -2505,7 +2505,7 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, strlcpy(name, mod->strtab + mod->symtab[symnum].st_name, KSYM_NAME_LEN); strlcpy(module_name, mod->name, MODULE_NAME_LEN); - *exported = is_exported(name, mod); + *exported = is_exported(name, *value, mod); preempt_enable(); return 0; } diff --git a/kernel/sched.c b/kernel/sched.c index 720b141501c4..f56c48c0dcd7 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7072,7 +7072,7 @@ cpu_to_phys_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, { int group; #ifdef CONFIG_SCHED_MC - *mask = cpu_coregroup_map(cpu); + *mask = *cpu_coregroup_mask(cpu); cpus_and(*mask, *mask, *cpu_map); group = first_cpu(*mask); #elif defined(CONFIG_SCHED_SMT) @@ -7445,7 +7445,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map, sd = &per_cpu(core_domains, i); SD_INIT(sd, MC); set_domain_attribute(sd, attr); - sd->span = cpu_coregroup_map(i); + sd->span = *cpu_coregroup_mask(i); cpus_and(sd->span, sd->span, *cpu_map); sd->parent = p; p->child = sd; @@ -7488,7 +7488,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map, SCHED_CPUMASK_VAR(this_core_map, allmasks); SCHED_CPUMASK_VAR(send_covered, allmasks); - *this_core_map = cpu_coregroup_map(i); + *this_core_map = *cpu_coregroup_mask(i); cpus_and(*this_core_map, *this_core_map, *cpu_map); if (i != first_cpu(*this_core_map)) continue; diff --git a/kernel/smp.c b/kernel/smp.c index 75c8dde58c55..461b08d83d46 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -24,8 +24,8 @@ struct call_function_data { struct call_single_data csd; spinlock_t lock; unsigned int refs; - cpumask_t cpumask; struct rcu_head rcu_head; + unsigned long cpumask_bits[]; }; struct call_single_queue { @@ -110,13 +110,13 @@ void generic_smp_call_function_interrupt(void) list_for_each_entry_rcu(data, &call_function_queue, csd.list) { int refs; - if (!cpu_isset(cpu, data->cpumask)) + if (!cpumask_test_cpu(cpu, to_cpumask(data->cpumask_bits))) continue; data->csd.func(data->csd.info); spin_lock(&data->lock); - cpu_clear(cpu, data->cpumask); + cpumask_clear_cpu(cpu, to_cpumask(data->cpumask_bits)); WARN_ON(data->refs == 0); data->refs--; refs = data->refs; @@ -223,7 +223,7 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info, local_irq_save(flags); func(info); local_irq_restore(flags); - } else if ((unsigned)cpu < NR_CPUS && cpu_online(cpu)) { + } else if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) { struct call_single_data *data = NULL; if (!wait) { @@ -266,51 +266,13 @@ void __smp_call_function_single(int cpu, struct call_single_data *data) generic_exec_single(cpu, data); } -/* Dummy function */ -static void quiesce_dummy(void *unused) -{ -} - -/* - * Ensure stack based data used in call function mask is safe to free. - * - * This is needed by smp_call_function_mask when using on-stack data, because - * a single call function queue is shared by all CPUs, and any CPU may pick up - * the data item on the queue at any time before it is deleted. So we need to - * ensure that all CPUs have transitioned through a quiescent state after - * this call. - * - * This is a very slow function, implemented by sending synchronous IPIs to - * all possible CPUs. For this reason, we have to alloc data rather than use - * stack based data even in the case of synchronous calls. The stack based - * data is then just used for deadlock/oom fallback which will be very rare. - * - * If a faster scheme can be made, we could go back to preferring stack based - * data -- the data allocation/free is non-zero cost. - */ -static void smp_call_function_mask_quiesce_stack(cpumask_t mask) -{ - struct call_single_data data; - int cpu; - - data.func = quiesce_dummy; - data.info = NULL; - - for_each_cpu_mask(cpu, mask) { - data.flags = CSD_FLAG_WAIT; - generic_exec_single(cpu, &data); - } -} - /** - * smp_call_function_mask(): Run a function on a set of other CPUs. - * @mask: The set of cpus to run on. + * smp_call_function_many(): Run a function on a set of other CPUs. + * @mask: The set of cpus to run on (only runs on online subset). * @func: The function to run. This must be fast and non-blocking. * @info: An arbitrary pointer to pass to the function. * @wait: If true, wait (atomically) until function has completed on other CPUs. * - * Returns 0 on success, else a negative status code. - * * If @wait is true, then returns once @func has returned. Note that @wait * will be implicitly turned on in case of allocation failures, since * we fall back to on-stack allocation. @@ -319,53 +281,59 @@ static void smp_call_function_mask_quiesce_stack(cpumask_t mask) * hardware interrupt handler or from a bottom half handler. Preemption * must be disabled when calling this function. */ -int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info, - int wait) +void smp_call_function_many(const struct cpumask *mask, + void (*func)(void *), void *info, + bool wait) { - struct call_function_data d; - struct call_function_data *data = NULL; - cpumask_t allbutself; + struct call_function_data *data; unsigned long flags; - int cpu, num_cpus; - int slowpath = 0; + int cpu, next_cpu; /* Can deadlock when called with interrupts disabled */ WARN_ON(irqs_disabled()); - cpu = smp_processor_id(); - allbutself = cpu_online_map; - cpu_clear(cpu, allbutself); - cpus_and(mask, mask, allbutself); - num_cpus = cpus_weight(mask); - - /* - * If zero CPUs, return. If just a single CPU, turn this request - * into a targetted single call instead since it's faster. - */ - if (!num_cpus) - return 0; - else if (num_cpus == 1) { - cpu = first_cpu(mask); - return smp_call_function_single(cpu, func, info, wait); + /* So, what's a CPU they want? Ignoring this one. */ + cpu = cpumask_first_and(mask, cpu_online_mask); + if (cpu == smp_processor_id()) + cpu = cpumask_next_and(cpu, mask, cpu_online_mask); + /* No online cpus? We're done. */ + if (cpu >= nr_cpu_ids) + return; + + /* Do we have another CPU which isn't us? */ + next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask); + if (next_cpu == smp_processor_id()) + next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask); + + /* Fastpath: do that cpu by itself. */ + if (next_cpu >= nr_cpu_ids) { + smp_call_function_single(cpu, func, info, wait); + return; } - data = kmalloc(sizeof(*data), GFP_ATOMIC); - if (data) { - data->csd.flags = CSD_FLAG_ALLOC; - if (wait) - data->csd.flags |= CSD_FLAG_WAIT; - } else { - data = &d; - data->csd.flags = CSD_FLAG_WAIT; - wait = 1; - slowpath = 1; + data = kmalloc(sizeof(*data) + cpumask_size(), GFP_ATOMIC); + if (unlikely(!data)) { + /* Slow path. */ + for_each_online_cpu(cpu) { + if (cpu == smp_processor_id()) + continue; + if (cpumask_test_cpu(cpu, mask)) + smp_call_function_single(cpu, func, info, wait); + } + return; } spin_lock_init(&data->lock); + data->csd.flags = CSD_FLAG_ALLOC; + if (wait) + data->csd.flags |= CSD_FLAG_WAIT; data->csd.func = func; data->csd.info = info; - data->refs = num_cpus; - data->cpumask = mask; + /* FIXME: Make archs use new iterators and ignore bits >= nr_cpu_ids */ + memset(to_cpumask(data->cpumask_bits), 0, cpumask_size()); + cpumask_and(to_cpumask(data->cpumask_bits), mask, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), to_cpumask(data->cpumask_bits)); + data->refs = cpumask_weight(to_cpumask(data->cpumask_bits)); spin_lock_irqsave(&call_function_lock, flags); list_add_tail_rcu(&data->csd.list, &call_function_queue); @@ -377,18 +345,13 @@ int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info, smp_mb(); /* Send a message to all CPUs in the map */ - arch_send_call_function_ipi(mask); + arch_send_call_function_ipi(*to_cpumask(data->cpumask_bits)); /* optionally wait for the CPUs to complete */ - if (wait) { + if (wait) csd_flag_wait(&data->csd); - if (unlikely(slowpath)) - smp_call_function_mask_quiesce_stack(mask); - } - - return 0; } -EXPORT_SYMBOL(smp_call_function_mask); +EXPORT_SYMBOL(smp_call_function_many); /** * smp_call_function(): Run a function on all other CPUs. @@ -396,7 +359,7 @@ EXPORT_SYMBOL(smp_call_function_mask); * @info: An arbitrary pointer to pass to the function. * @wait: If true, wait (atomically) until function has completed on other CPUs. * - * Returns 0 on success, else a negative status code. + * Returns 0. * * If @wait is true, then returns once @func has returned; otherwise * it returns just before the target cpu calls @func. In case of allocation @@ -407,12 +370,10 @@ EXPORT_SYMBOL(smp_call_function_mask); */ int smp_call_function(void (*func)(void *), void *info, int wait) { - int ret; - preempt_disable(); - ret = smp_call_function_mask(cpu_online_map, func, info, wait); + smp_call_function_many(cpu_online_mask, func, info, wait); preempt_enable(); - return ret; + return 0; } EXPORT_SYMBOL(smp_call_function); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index d4dc69ddebd7..4952322cba45 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -84,21 +84,21 @@ static cpumask_t cpu_singlethread_map __read_mostly; static cpumask_t cpu_populated_map __read_mostly; /* If it's single threaded, it isn't in the list of workqueues. */ -static inline int is_single_threaded(struct workqueue_struct *wq) +static inline int is_wq_single_threaded(struct workqueue_struct *wq) { return wq->singlethread; } static const cpumask_t *wq_cpu_map(struct workqueue_struct *wq) { - return is_single_threaded(wq) + return is_wq_single_threaded(wq) ? &cpu_singlethread_map : &cpu_populated_map; } static struct cpu_workqueue_struct *wq_per_cpu(struct workqueue_struct *wq, int cpu) { - if (unlikely(is_single_threaded(wq))) + if (unlikely(is_wq_single_threaded(wq))) cpu = singlethread_cpu; return per_cpu_ptr(wq->cpu_wq, cpu); } @@ -769,7 +769,7 @@ static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) { struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; struct workqueue_struct *wq = cwq->wq; - const char *fmt = is_single_threaded(wq) ? "%s" : "%s/%d"; + const char *fmt = is_wq_single_threaded(wq) ? "%s" : "%s/%d"; struct task_struct *p; p = kthread_create(worker_thread, cwq, fmt, wq->name, cpu); diff --git a/lib/Kconfig b/lib/Kconfig index 85cf7ea978aa..7823f8342abf 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -157,4 +157,11 @@ config CHECK_SIGNATURE config HAVE_LMB boolean +config CPUMASK_OFFSTACK + bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS + help + Use dynamic allocation for cpumask_var_t, instead of putting + them on the stack. This is a bit more expensive, but avoids + stack overflow. + endmenu diff --git a/lib/is_single_threaded.c b/lib/is_single_threaded.c new file mode 100644 index 000000000000..f1ed2fe76c65 --- /dev/null +++ b/lib/is_single_threaded.c @@ -0,0 +1,45 @@ +/* Function to determine if a thread group is single threaded or not + * + * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * - Derived from security/selinux/hooks.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/sched.h> + +/** + * is_single_threaded - Determine if a thread group is single-threaded or not + * @p: A task in the thread group in question + * + * This returns true if the thread group to which a task belongs is single + * threaded, false if it is not. + */ +bool is_single_threaded(struct task_struct *p) +{ + struct task_struct *g, *t; + struct mm_struct *mm = p->mm; + + if (atomic_read(&p->signal->count) != 1) + goto no; + + if (atomic_read(&p->mm->mm_users) != 1) { + read_lock(&tasklist_lock); + do_each_thread(g, t) { + if (t->mm == mm && t != p) + goto no_unlock; + } while_each_thread(g, t); + read_unlock(&tasklist_lock); + } + + return true; + +no_unlock: + read_unlock(&tasklist_lock); +no: + return false; +} diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 9c3717a23cf7..f66c58df8953 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2414,7 +2414,7 @@ static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos) if (*pos == 0) return SEQ_START_TOKEN; - for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) { + for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu+1; @@ -2429,7 +2429,7 @@ static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos) struct neigh_table *tbl = pde->data; int cpu; - for (cpu = *pos; cpu < NR_CPUS; ++cpu) { + for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu+1; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 313ebf00ee36..6ba5c557690c 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -291,7 +291,7 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) if (*pos == 0) return SEQ_START_TOKEN; - for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) { + for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu+1; @@ -306,7 +306,7 @@ static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) struct net *net = seq_file_net(seq); int cpu; - for (cpu = *pos; cpu < NR_CPUS; ++cpu) { + for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu+1; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 77bfba975959..97f71153584f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -429,7 +429,7 @@ static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos) if (*pos == 0) return SEQ_START_TOKEN; - for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) { + for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu+1; @@ -442,7 +442,7 @@ static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) { int cpu; - for (cpu = *pos; cpu < NR_CPUS; ++cpu) { + for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu+1; diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index f37b9b74c6a8..4da54b0b9233 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -200,7 +200,7 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) if (*pos == 0) return SEQ_START_TOKEN; - for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) { + for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu + 1; @@ -215,7 +215,7 @@ static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) struct net *net = seq_file_net(seq); int cpu; - for (cpu = *pos; cpu < NR_CPUS; ++cpu) { + for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu + 1; diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 69c9dccc8cf0..93c067b2fa8c 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -1206,7 +1206,7 @@ static struct avc_cache_stats *sel_avc_get_stat_idx(loff_t *idx) { int cpu; - for (cpu = *idx; cpu < NR_CPUS; ++cpu) { + for (cpu = *idx; cpu < nr_cpu_ids; ++cpu) { if (!cpu_possible(cpu)) continue; *idx = cpu + 1; |