diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2010-12-09 11:32:15 +1100 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2010-12-09 11:32:15 +1100 |
commit | fdea1faf14bc610d88623f1c42f7641276c9a9f0 (patch) | |
tree | ed7f2fae54c67820894b9828a7727c4ab935a6db /arch/arm | |
parent | ca99692aed2d84002f00646512770f3196720e92 (diff) | |
parent | 292d10a14990483e5628291bd1c626a28d729659 (diff) |
Merge remote branch 'arm/devel'
Diffstat (limited to 'arch/arm')
172 files changed, 4633 insertions, 4402 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index f1d9297b1050..e77fe0f2ff5b 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -14,6 +14,7 @@ config ARM select HAVE_FUNCTION_TRACER if (!XIP_KERNEL) select HAVE_FTRACE_MCOUNT_RECORD if (!XIP_KERNEL) select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL) + select HAVE_FUNCTION_GRAPH_TRACER if (!THUMB2_KERNEL) select HAVE_GENERIC_DMA_COHERENT select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZO @@ -23,6 +24,7 @@ config ARM select PERF_USE_VMALLOC select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_HW_BREAKPOINT if (PERF_EVENTS && (CPU_V6 || CPU_V7)) + select HAVE_C_RECORDMCOUNT help The ARM series is a line of low-power-consumption RISC chip designs licensed by ARM Ltd and targeted at embedded applications and @@ -34,6 +36,9 @@ config ARM config HAVE_PWM bool +config MIGHT_HAVE_PCI + bool + config SYS_SUPPORTS_APM_EMULATION bool @@ -221,7 +226,7 @@ config ARCH_INTEGRATOR bool "ARM Ltd. Integrator family" select ARM_AMBA select ARCH_HAS_CPUFREQ - select COMMON_CLKDEV + select CLKDEV_LOOKUP select ICST select GENERIC_CLOCKEVENTS select PLAT_VERSATILE @@ -231,7 +236,7 @@ config ARCH_INTEGRATOR config ARCH_REALVIEW bool "ARM Ltd. RealView family" select ARM_AMBA - select COMMON_CLKDEV + select CLKDEV_LOOKUP select ICST select GENERIC_CLOCKEVENTS select ARCH_WANT_OPTIONAL_GPIOLIB @@ -245,7 +250,7 @@ config ARCH_VERSATILE bool "ARM Ltd. Versatile family" select ARM_AMBA select ARM_VIC - select COMMON_CLKDEV + select CLKDEV_LOOKUP select ICST select GENERIC_CLOCKEVENTS select ARCH_WANT_OPTIONAL_GPIOLIB @@ -259,7 +264,7 @@ config ARCH_VEXPRESS select ARCH_WANT_OPTIONAL_GPIOLIB select ARM_AMBA select ARM_TIMER_SP804 - select COMMON_CLKDEV + select CLKDEV_LOOKUP select GENERIC_CLOCKEVENTS select HAVE_CLK select ICST @@ -280,7 +285,7 @@ config ARCH_BCMRING depends on MMU select CPU_V6 select ARM_AMBA - select COMMON_CLKDEV + select CLKDEV_LOOKUP select GENERIC_CLOCKEVENTS select ARCH_WANT_OPTIONAL_GPIOLIB help @@ -298,6 +303,7 @@ config ARCH_CNS3XXX select CPU_V6 select GENERIC_CLOCKEVENTS select ARM_GIC + select MIGHT_HAVE_PCI select PCI_DOMAINS if PCI help Support for Cavium Networks CNS3XXX platform. @@ -327,7 +333,7 @@ config ARCH_EP93XX select CPU_ARM920T select ARM_AMBA select ARM_VIC - select COMMON_CLKDEV + select CLKDEV_LOOKUP select ARCH_REQUIRE_GPIOLIB select ARCH_HAS_HOLES_MEMORYMODEL select ARCH_USES_GETTIMEOFFSET @@ -347,14 +353,14 @@ config ARCH_MXC bool "Freescale MXC/iMX-based" select GENERIC_CLOCKEVENTS select ARCH_REQUIRE_GPIOLIB - select COMMON_CLKDEV + select CLKDEV_LOOKUP help Support for Freescale MXC/iMX-based family of processors config ARCH_STMP3XXX bool "Freescale STMP3xxx" select CPU_ARM926T - select COMMON_CLKDEV + select CLKDEV_LOOKUP select ARCH_REQUIRE_GPIOLIB select GENERIC_CLOCKEVENTS select USB_ARCH_HAS_EHCI @@ -433,6 +439,7 @@ config ARCH_IXP4XX select CPU_XSCALE select GENERIC_GPIO select GENERIC_CLOCKEVENTS + select MIGHT_HAVE_PCI select DMABOUNCE if PCI help Support for Intel's IXP4XX (XScale) family of processors. @@ -472,7 +479,7 @@ config ARCH_LPC32XX select HAVE_IDE select ARM_AMBA select USB_ARCH_HAS_OHCI - select COMMON_CLKDEV + select CLKDEV_LOOKUP select GENERIC_TIME select GENERIC_CLOCKEVENTS help @@ -506,7 +513,7 @@ config ARCH_MMP bool "Marvell PXA168/910/MMP2" depends on MMU select ARCH_REQUIRE_GPIOLIB - select COMMON_CLKDEV + select CLKDEV_LOOKUP select GENERIC_CLOCKEVENTS select TICK_ONESHOT select PLAT_PXA @@ -539,7 +546,7 @@ config ARCH_W90X900 bool "Nuvoton W90X900 CPU" select CPU_ARM926T select ARCH_REQUIRE_GPIOLIB - select COMMON_CLKDEV + select CLKDEV_LOOKUP select GENERIC_CLOCKEVENTS help Support for Nuvoton (Winbond logic dept.) ARM9 processor, @@ -553,7 +560,7 @@ config ARCH_W90X900 config ARCH_NUC93X bool "Nuvoton NUC93X CPU" select CPU_ARM926T - select COMMON_CLKDEV + select CLKDEV_LOOKUP help Support for Nuvoton (Winbond logic dept.) NUC93X MCU,The NUC93X is a low-power and high performance MPEG-4/JPEG multimedia controller chip. @@ -564,7 +571,7 @@ config ARCH_TEGRA select GENERIC_CLOCKEVENTS select GENERIC_GPIO select HAVE_CLK - select COMMON_CLKDEV + select CLKDEV_LOOKUP select ARCH_HAS_BARRIERS if CACHE_L2X0 select ARCH_HAS_CPUFREQ help @@ -574,7 +581,7 @@ config ARCH_TEGRA config ARCH_PNX4008 bool "Philips Nexperia PNX4008 Mobile" select CPU_ARM926T - select COMMON_CLKDEV + select CLKDEV_LOOKUP select ARCH_USES_GETTIMEOFFSET help This enables support for Philips PNX4008 mobile platform. @@ -584,7 +591,7 @@ config ARCH_PXA depends on MMU select ARCH_MTD_XIP select ARCH_HAS_CPUFREQ - select COMMON_CLKDEV + select CLKDEV_LOOKUP select ARCH_REQUIRE_GPIOLIB select GENERIC_CLOCKEVENTS select TICK_ONESHOT @@ -761,7 +768,7 @@ config ARCH_TCC_926 bool "Telechips TCC ARM926-based systems" select CPU_ARM926T select HAVE_CLK - select COMMON_CLKDEV + select CLKDEV_LOOKUP select GENERIC_CLOCKEVENTS help Support for Telechips TCC ARM926-based systems. @@ -785,7 +792,7 @@ config ARCH_U300 select ARM_AMBA select ARM_VIC select GENERIC_CLOCKEVENTS - select COMMON_CLKDEV + select CLKDEV_LOOKUP select GENERIC_GPIO help Support for ST-Ericsson U300 series mobile platforms. @@ -795,7 +802,7 @@ config ARCH_U8500 select CPU_V7 select ARM_AMBA select GENERIC_CLOCKEVENTS - select COMMON_CLKDEV + select CLKDEV_LOOKUP select ARCH_REQUIRE_GPIOLIB help Support for ST-Ericsson's Ux500 architecture @@ -805,7 +812,7 @@ config ARCH_NOMADIK select ARM_AMBA select ARM_VIC select CPU_ARM926T - select COMMON_CLKDEV + select CLKDEV_LOOKUP select GENERIC_CLOCKEVENTS select ARCH_REQUIRE_GPIOLIB help @@ -817,7 +824,7 @@ config ARCH_DAVINCI select ARCH_REQUIRE_GPIOLIB select ZONE_DMA select HAVE_IDE - select COMMON_CLKDEV + select CLKDEV_LOOKUP select GENERIC_ALLOCATOR select ARCH_HAS_HOLES_MEMORYMODEL help @@ -837,7 +844,7 @@ config PLAT_SPEAR bool "ST SPEAr" select ARM_AMBA select ARCH_REQUIRE_GPIOLIB - select COMMON_CLKDEV + select CLKDEV_LOOKUP select GENERIC_CLOCKEVENTS select HAVE_CLK help @@ -1164,7 +1171,7 @@ config ISA_DMA_API bool config PCI - bool "PCI support" if ARCH_INTEGRATOR_AP || ARCH_VERSATILE_PB || ARCH_IXP4XX || ARCH_KS8695 || MACH_ARMCORE || ARCH_CNS3XXX + bool "PCI support" if MIGHT_HAVE_PCI help Find out whether you have a PCI motherboard. PCI is the name of a bus system, i.e. the way the CPU talks to the other stuff inside @@ -1205,10 +1212,11 @@ config SMP depends on EXPERIMENTAL depends on GENERIC_CLOCKEVENTS depends on REALVIEW_EB_ARM11MP || REALVIEW_EB_A9MP || \ - MACH_REALVIEW_PB11MP || MACH_REALVIEW_PBX || ARCH_OMAP4 ||\ - ARCH_S5PV310 || ARCH_TEGRA || ARCH_U8500 || ARCH_VEXPRESS_CA9X4 + MACH_REALVIEW_PB11MP || MACH_REALVIEW_PBX || ARCH_OMAP4 || \ + ARCH_S5PV310 || ARCH_TEGRA || ARCH_U8500 || ARCH_VEXPRESS_CA9X4 || \ + ARCH_MSM_SCORPIONMP select USE_GENERIC_SMP_HELPERS - select HAVE_ARM_SCU + select HAVE_ARM_SCU if !ARCH_MSM_SCORPIONMP help This enables support for systems with more than one CPU. If you have a system with only one CPU, like most personal computers, say N. If @@ -1229,7 +1237,7 @@ config SMP config SMP_ON_UP bool "Allow booting SMP kernel on uniprocessor systems (EXPERIMENTAL)" depends on EXPERIMENTAL - depends on SMP && !XIP && !THUMB2_KERNEL + depends on SMP && !XIP default y help SMP kernels contain instructions which fail on non-SMP processors. @@ -1283,6 +1291,7 @@ config NR_CPUS config HOTPLUG_CPU bool "Support for hot-pluggable CPUs (EXPERIMENTAL)" depends on SMP && HOTPLUG && EXPERIMENTAL + depends on !ARCH_MSM help Say Y here to experiment with turning CPUs off and on. CPUs can be controlled through /sys/devices/system/cpu. @@ -1291,7 +1300,7 @@ config LOCAL_TIMERS bool "Use local timer interrupts" depends on SMP default y - select HAVE_ARM_TWD + select HAVE_ARM_TWD if !ARCH_MSM_SCORPIONMP help Enable support for local timers on SMP platforms, rather then the legacy IPI broadcast method. Local timers allows the system @@ -1310,7 +1319,7 @@ config HZ default 100 config THUMB2_KERNEL - bool "Compile the kernel in Thumb-2 mode" + bool "Compile the kernel in Thumb-2 mode (EXPERIMENTAL)" depends on CPU_V7 && EXPERIMENTAL select AEABI select ARM_ASM_UNIFIED @@ -1524,6 +1533,7 @@ config SECCOMP config CC_STACKPROTECTOR bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" + depends on EXPERIMENTAL help This option turns on the -fstack-protector GCC feature. This feature puts, at the beginning of functions, a canary value on @@ -1650,6 +1660,19 @@ config ATAGS_PROC Should the atags used to boot the kernel be exported in an "atags" file in procfs. Useful with kexec. +config CRASH_DUMP + bool "Build kdump crash kernel (EXPERIMENTAL)" + depends on EXPERIMENTAL + help + Generate crash dump after being started by kexec. This should + be normally only set in special crash dump kernels which are + loaded in the main kernel with kexec-tools into a specially + reserved region and then later executed after a crash by + kdump/kexec. The crash dump kernel must be compiled to a + memory address not used by the main kernel + + For more details see Documentation/kdump/kdump.txt + config AUTO_ZRELADDR bool "Auto calculation of the decompressed kernel image address" depends on !ZBOOT_ROM && !ARCH_U300 @@ -1707,7 +1730,7 @@ config CPU_FREQ_S3C Internal configuration node for common cpufreq on Samsung SoC config CPU_FREQ_S3C24XX - bool "CPUfreq driver for Samsung S3C24XX series CPUs" + bool "CPUfreq driver for Samsung S3C24XX series CPUs (EXPERIMENTAL)" depends on ARCH_S3C2410 && CPU_FREQ && EXPERIMENTAL select CPU_FREQ_S3C help @@ -1719,7 +1742,7 @@ config CPU_FREQ_S3C24XX If in doubt, say N. config CPU_FREQ_S3C24XX_PLL - bool "Support CPUfreq changing of PLL frequency" + bool "Support CPUfreq changing of PLL frequency (EXPERIMENTAL)" depends on CPU_FREQ_S3C24XX && EXPERIMENTAL help Compile in support for changing the PLL frequency from the diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 2fd0b99afc4b..494224a9b459 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -23,7 +23,7 @@ config STRICT_DEVMEM config FRAME_POINTER bool depends on !THUMB2_KERNEL - default y if !ARM_UNWIND + default y if !ARM_UNWIND || FUNCTION_GRAPH_TRACER help If you say N here, the resulting kernel will be slightly smaller and faster. However, if neither FRAME_POINTER nor ARM_UNWIND are enabled, @@ -31,7 +31,7 @@ config FRAME_POINTER reported is severely limited. config ARM_UNWIND - bool "Enable stack unwinding support" + bool "Enable stack unwinding support (EXPERIMENTAL)" depends on AEABI && EXPERIMENTAL default y help diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 65a7c1c588a9..0a8f748e506a 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -45,6 +45,10 @@ else endif endif +ifeq ($(CONFIG_ARCH_SHMOBILE),y) +OBJS += head-shmobile.o +endif + # # We now have a PIC decompressor implementation. Decompressors running # from RAM should not define ZTEXTADDR. Decompressors running directly diff --git a/arch/arm/boot/compressed/head-shmobile.S b/arch/arm/boot/compressed/head-shmobile.S new file mode 100644 index 000000000000..30973b76e6ae --- /dev/null +++ b/arch/arm/boot/compressed/head-shmobile.S @@ -0,0 +1,53 @@ +/* + * The head-file for SH-Mobile ARM platforms + * + * Kuninori Morimoto <kuninori.morimoto.gx@renesas.com> + * Simon Horman <horms@verge.net.au> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifdef CONFIG_ZBOOT_ROM + + .section ".start", "ax" + + /* load board-specific initialization code */ +#include <mach/zboot.h> + + b 1f +__atags:@ tag #1 + .long 12 @ tag->hdr.size = tag_size(tag_core); + .long 0x54410001 @ tag->hdr.tag = ATAG_CORE; + .long 0 @ tag->u.core.flags = 0; + .long 0 @ tag->u.core.pagesize = 0; + .long 0 @ tag->u.core.rootdev = 0; + @ tag #2 + .long 8 @ tag->hdr.size = tag_size(tag_mem32); + .long 0x54410002 @ tag->hdr.tag = ATAG_MEM; + .long CONFIG_MEMORY_SIZE @ tag->u.mem.size = CONFIG_MEMORY_SIZE; + .long CONFIG_MEMORY_START @ @ tag->u.mem.start = CONFIG_MEMORY_START; + @ tag #3 + .long 0 @ tag->hdr.size = 0 + .long 0 @ tag->hdr.tag = ATAG_NONE; +1: + + /* Set board ID necessary for boot */ + ldr r7, 1f @ Set machine type register + adr r8, __atags @ Set atag register + b 2f + +1 : .long MACH_TYPE +2 : + +#endif /* CONFIG_ZBOOT_ROM */ diff --git a/arch/arm/common/Kconfig b/arch/arm/common/Kconfig index 0a34c8186924..778655f0257a 100644 --- a/arch/arm/common/Kconfig +++ b/arch/arm/common/Kconfig @@ -37,7 +37,3 @@ config SHARP_PARAM config SHARP_SCOOP bool - -config COMMON_CLKDEV - bool - select HAVE_CLK diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile index e6e8664a9413..e7521bca2c35 100644 --- a/arch/arm/common/Makefile +++ b/arch/arm/common/Makefile @@ -17,3 +17,4 @@ obj-$(CONFIG_ARCH_IXP2000) += uengine.o obj-$(CONFIG_ARCH_IXP23XX) += uengine.o obj-$(CONFIG_PCI_HOST_ITE8152) += it8152.o obj-$(CONFIG_COMMON_CLKDEV) += clkdev.o +obj-$(CONFIG_ARM_TIMER_SP804) += timer-sp.o diff --git a/arch/arm/common/clkdev.c b/arch/arm/common/clkdev.c deleted file mode 100644 index e2b2bb66e094..000000000000 --- a/arch/arm/common/clkdev.c +++ /dev/null @@ -1,179 +0,0 @@ -/* - * arch/arm/common/clkdev.c - * - * Copyright (C) 2008 Russell King. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Helper for the clk API to assist looking up a struct clk. - */ -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/device.h> -#include <linux/list.h> -#include <linux/errno.h> -#include <linux/err.h> -#include <linux/string.h> -#include <linux/mutex.h> -#include <linux/clk.h> -#include <linux/slab.h> - -#include <asm/clkdev.h> -#include <mach/clkdev.h> - -static LIST_HEAD(clocks); -static DEFINE_MUTEX(clocks_mutex); - -/* - * Find the correct struct clk for the device and connection ID. - * We do slightly fuzzy matching here: - * An entry with a NULL ID is assumed to be a wildcard. - * If an entry has a device ID, it must match - * If an entry has a connection ID, it must match - * Then we take the most specific entry - with the following - * order of precedence: dev+con > dev only > con only. - */ -static struct clk *clk_find(const char *dev_id, const char *con_id) -{ - struct clk_lookup *p; - struct clk *clk = NULL; - int match, best = 0; - - list_for_each_entry(p, &clocks, node) { - match = 0; - if (p->dev_id) { - if (!dev_id || strcmp(p->dev_id, dev_id)) - continue; - match += 2; - } - if (p->con_id) { - if (!con_id || strcmp(p->con_id, con_id)) - continue; - match += 1; - } - - if (match > best) { - clk = p->clk; - if (match != 3) - best = match; - else - break; - } - } - return clk; -} - -struct clk *clk_get_sys(const char *dev_id, const char *con_id) -{ - struct clk *clk; - - mutex_lock(&clocks_mutex); - clk = clk_find(dev_id, con_id); - if (clk && !__clk_get(clk)) - clk = NULL; - mutex_unlock(&clocks_mutex); - - return clk ? clk : ERR_PTR(-ENOENT); -} -EXPORT_SYMBOL(clk_get_sys); - -struct clk *clk_get(struct device *dev, const char *con_id) -{ - const char *dev_id = dev ? dev_name(dev) : NULL; - - return clk_get_sys(dev_id, con_id); -} -EXPORT_SYMBOL(clk_get); - -void clk_put(struct clk *clk) -{ - __clk_put(clk); -} -EXPORT_SYMBOL(clk_put); - -void clkdev_add(struct clk_lookup *cl) -{ - mutex_lock(&clocks_mutex); - list_add_tail(&cl->node, &clocks); - mutex_unlock(&clocks_mutex); -} -EXPORT_SYMBOL(clkdev_add); - -void __init clkdev_add_table(struct clk_lookup *cl, size_t num) -{ - mutex_lock(&clocks_mutex); - while (num--) { - list_add_tail(&cl->node, &clocks); - cl++; - } - mutex_unlock(&clocks_mutex); -} - -#define MAX_DEV_ID 20 -#define MAX_CON_ID 16 - -struct clk_lookup_alloc { - struct clk_lookup cl; - char dev_id[MAX_DEV_ID]; - char con_id[MAX_CON_ID]; -}; - -struct clk_lookup *clkdev_alloc(struct clk *clk, const char *con_id, - const char *dev_fmt, ...) -{ - struct clk_lookup_alloc *cla; - - cla = kzalloc(sizeof(*cla), GFP_KERNEL); - if (!cla) - return NULL; - - cla->cl.clk = clk; - if (con_id) { - strlcpy(cla->con_id, con_id, sizeof(cla->con_id)); - cla->cl.con_id = cla->con_id; - } - - if (dev_fmt) { - va_list ap; - - va_start(ap, dev_fmt); - vscnprintf(cla->dev_id, sizeof(cla->dev_id), dev_fmt, ap); - cla->cl.dev_id = cla->dev_id; - va_end(ap); - } - - return &cla->cl; -} -EXPORT_SYMBOL(clkdev_alloc); - -int clk_add_alias(const char *alias, const char *alias_dev_name, char *id, - struct device *dev) -{ - struct clk *r = clk_get(dev, id); - struct clk_lookup *l; - - if (IS_ERR(r)) - return PTR_ERR(r); - - l = clkdev_alloc(r, alias, alias_dev_name); - clk_put(r); - if (!l) - return -ENODEV; - clkdev_add(l); - return 0; -} -EXPORT_SYMBOL(clk_add_alias); - -/* - * clkdev_drop - remove a clock dynamically allocated - */ -void clkdev_drop(struct clk_lookup *cl) -{ - mutex_lock(&clocks_mutex); - list_del(&cl->node); - mutex_unlock(&clocks_mutex); - kfree(cl); -} -EXPORT_SYMBOL(clkdev_drop); diff --git a/arch/arm/common/gic.c b/arch/arm/common/gic.c index e6388dcd8cfa..0b89ef001330 100644 --- a/arch/arm/common/gic.c +++ b/arch/arm/common/gic.c @@ -35,6 +35,9 @@ static DEFINE_SPINLOCK(irq_controller_lock); +/* Address of GIC 0 CPU interface */ +void __iomem *gic_cpu_base_addr __read_mostly; + struct gic_chip_data { unsigned int irq_offset; void __iomem *dist_base; @@ -45,7 +48,7 @@ struct gic_chip_data { #define MAX_GIC_NR 1 #endif -static struct gic_chip_data gic_data[MAX_GIC_NR]; +static struct gic_chip_data gic_data[MAX_GIC_NR] __read_mostly; static inline void __iomem *gic_dist_base(unsigned int irq) { @@ -213,21 +216,16 @@ void __init gic_cascade_irq(unsigned int gic_nr, unsigned int irq) set_irq_chained_handler(irq, gic_handle_cascade_irq); } -void __init gic_dist_init(unsigned int gic_nr, void __iomem *base, - unsigned int irq_start) +static void __init gic_dist_init(struct gic_chip_data *gic, + unsigned int irq_start) { unsigned int gic_irqs, irq_limit, i; + void __iomem *base = gic->dist_base; u32 cpumask = 1 << smp_processor_id(); - if (gic_nr >= MAX_GIC_NR) - BUG(); - cpumask |= cpumask << 8; cpumask |= cpumask << 16; - gic_data[gic_nr].dist_base = base; - gic_data[gic_nr].irq_offset = (irq_start - 1) & ~31; - writel(0, base + GIC_DIST_CTRL); /* @@ -267,7 +265,7 @@ void __init gic_dist_init(unsigned int gic_nr, void __iomem *base, /* * Limit number of interrupts registered to the platform maximum */ - irq_limit = gic_data[gic_nr].irq_offset + gic_irqs; + irq_limit = gic->irq_offset + gic_irqs; if (WARN_ON(irq_limit > NR_IRQS)) irq_limit = NR_IRQS; @@ -276,7 +274,7 @@ void __init gic_dist_init(unsigned int gic_nr, void __iomem *base, */ for (i = irq_start; i < irq_limit; i++) { set_irq_chip(i, &gic_chip); - set_irq_chip_data(i, &gic_data[gic_nr]); + set_irq_chip_data(i, gic); set_irq_handler(i, handle_level_irq); set_irq_flags(i, IRQF_VALID | IRQF_PROBE); } @@ -284,19 +282,12 @@ void __init gic_dist_init(unsigned int gic_nr, void __iomem *base, writel(1, base + GIC_DIST_CTRL); } -void __cpuinit gic_cpu_init(unsigned int gic_nr, void __iomem *base) +static void __cpuinit gic_cpu_init(struct gic_chip_data *gic) { - void __iomem *dist_base; + void __iomem *dist_base = gic->dist_base; + void __iomem *base = gic->cpu_base; int i; - if (gic_nr >= MAX_GIC_NR) - BUG(); - - dist_base = gic_data[gic_nr].dist_base; - BUG_ON(!dist_base); - - gic_data[gic_nr].cpu_base = base; - /* * Deal with the banked PPI and SGI interrupts - disable all * PPI interrupts, ensure all SGI interrupts are enabled. @@ -314,6 +305,42 @@ void __cpuinit gic_cpu_init(unsigned int gic_nr, void __iomem *base) writel(1, base + GIC_CPU_CTRL); } +void __init gic_init(unsigned int gic_nr, unsigned int irq_start, + void __iomem *dist_base, void __iomem *cpu_base) +{ + struct gic_chip_data *gic; + + BUG_ON(gic_nr >= MAX_GIC_NR); + + gic = &gic_data[gic_nr]; + gic->dist_base = dist_base; + gic->cpu_base = cpu_base; + gic->irq_offset = (irq_start - 1) & ~31; + + if (gic_nr == 0) + gic_cpu_base_addr = cpu_base; + + gic_dist_init(gic, irq_start); + gic_cpu_init(gic); +} + +void __cpuinit gic_secondary_init(unsigned int gic_nr) +{ + BUG_ON(gic_nr >= MAX_GIC_NR); + + gic_cpu_init(&gic_data[gic_nr]); +} + +void __cpuinit gic_enable_ppi(unsigned int irq) +{ + unsigned long flags; + + local_irq_save(flags); + irq_to_desc(irq)->status |= IRQ_NOPROBE; + gic_unmask_irq(irq); + local_irq_restore(flags); +} + #ifdef CONFIG_SMP void gic_raise_softirq(const struct cpumask *mask, unsigned int irq) { diff --git a/arch/arm/plat-versatile/timer-sp.c b/arch/arm/common/timer-sp.c index fb0d1c299718..4740313daa5b 100644 --- a/arch/arm/plat-versatile/timer-sp.c +++ b/arch/arm/common/timer-sp.c @@ -1,5 +1,5 @@ /* - * linux/arch/arm/plat-versatile/timer-sp.c + * linux/arch/arm/common/timer-sp.c * * Copyright (C) 1999 - 2003 ARM Limited * Copyright (C) 2000 Deep Blue Solutions Ltd @@ -26,8 +26,6 @@ #include <asm/hardware/arm_timer.h> -#include <plat/timer-sp.h> - /* * These timers are currently always setup to be clocked at 1MHz. */ diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 749bb6622404..bc2d2d75f706 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -18,6 +18,7 @@ #endif #include <asm/ptrace.h> +#include <asm/domain.h> /* * Endian independent macros for shifting bytes within registers. @@ -157,16 +158,24 @@ #ifdef CONFIG_SMP #define ALT_SMP(instr...) \ 9998: instr +/* + * Note: if you get assembler errors from ALT_UP() when building with + * CONFIG_THUMB2_KERNEL, you almost certainly need to use + * ALT_SMP( W(instr) ... ) + */ #define ALT_UP(instr...) \ .pushsection ".alt.smp.init", "a" ;\ .long 9998b ;\ - instr ;\ +9997: instr ;\ + .if . - 9997b != 4 ;\ + .error "ALT_UP() content must assemble to exactly 4 bytes";\ + .endif ;\ .popsection #define ALT_UP_B(label) \ .equ up_b_offset, label - 9998b ;\ .pushsection ".alt.smp.init", "a" ;\ .long 9998b ;\ - b . + up_b_offset ;\ + W(b) . + up_b_offset ;\ .popsection #else #define ALT_SMP(instr...) @@ -177,16 +186,24 @@ /* * SMP data memory barrier */ - .macro smp_dmb + .macro smp_dmb mode #ifdef CONFIG_SMP #if __LINUX_ARM_ARCH__ >= 7 + .ifeqs "\mode","arm" ALT_SMP(dmb) + .else + ALT_SMP(W(dmb)) + .endif #elif __LINUX_ARM_ARCH__ == 6 ALT_SMP(mcr p15, 0, r0, c7, c10, 5) @ dmb #else #error Incompatible SMP platform #endif + .ifeqs "\mode","arm" ALT_UP(nop) + .else + ALT_UP(W(nop)) + .endif #endif .endm @@ -206,12 +223,12 @@ */ #ifdef CONFIG_THUMB2_KERNEL - .macro usraccoff, instr, reg, ptr, inc, off, cond, abort + .macro usraccoff, instr, reg, ptr, inc, off, cond, abort, t=T() 9999: .if \inc == 1 - \instr\cond\()bt \reg, [\ptr, #\off] + \instr\cond\()b\()\t\().w \reg, [\ptr, #\off] .elseif \inc == 4 - \instr\cond\()t \reg, [\ptr, #\off] + \instr\cond\()\t\().w \reg, [\ptr, #\off] .else .error "Unsupported inc macro argument" .endif @@ -246,13 +263,13 @@ #else /* !CONFIG_THUMB2_KERNEL */ - .macro usracc, instr, reg, ptr, inc, cond, rept, abort + .macro usracc, instr, reg, ptr, inc, cond, rept, abort, t=T() .rept \rept 9999: .if \inc == 1 - \instr\cond\()bt \reg, [\ptr], #\inc + \instr\cond\()b\()\t \reg, [\ptr], #\inc .elseif \inc == 4 - \instr\cond\()t \reg, [\ptr], #\inc + \instr\cond\()\t \reg, [\ptr], #\inc .else .error "Unsupported inc macro argument" .endif diff --git a/arch/arm/include/asm/cache.h b/arch/arm/include/asm/cache.h index 9d6122096fbe..75fe66bc02b4 100644 --- a/arch/arm/include/asm/cache.h +++ b/arch/arm/include/asm/cache.h @@ -23,4 +23,6 @@ #define ARCH_SLAB_MINALIGN 8 #endif +#define __read_mostly __attribute__((__section__(".data..read_mostly"))) + #endif diff --git a/arch/arm/include/asm/clkdev.h b/arch/arm/include/asm/clkdev.h index b56c1389b6fa..765d33222369 100644 --- a/arch/arm/include/asm/clkdev.h +++ b/arch/arm/include/asm/clkdev.h @@ -12,23 +12,13 @@ #ifndef __ASM_CLKDEV_H #define __ASM_CLKDEV_H -struct clk; -struct device; +#include <linux/slab.h> -struct clk_lookup { - struct list_head node; - const char *dev_id; - const char *con_id; - struct clk *clk; -}; +#include <mach/clkdev.h> -struct clk_lookup *clkdev_alloc(struct clk *clk, const char *con_id, - const char *dev_fmt, ...); - -void clkdev_add(struct clk_lookup *cl); -void clkdev_drop(struct clk_lookup *cl); - -void clkdev_add_table(struct clk_lookup *, size_t); -int clk_add_alias(const char *, const char *, char *, struct device *); +static inline struct clk_lookup_alloc *__clkdev_alloc(size_t size) +{ + return kzalloc(size, GFP_KERNEL); +} #endif diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h index cc7ef4080711..af18ceaacf5d 100644 --- a/arch/arm/include/asm/domain.h +++ b/arch/arm/include/asm/domain.h @@ -45,13 +45,17 @@ */ #define DOMAIN_NOACCESS 0 #define DOMAIN_CLIENT 1 +#ifdef CONFIG_CPU_USE_DOMAINS #define DOMAIN_MANAGER 3 +#else +#define DOMAIN_MANAGER 1 +#endif #define domain_val(dom,type) ((type) << (2*(dom))) #ifndef __ASSEMBLY__ -#ifdef CONFIG_MMU +#ifdef CONFIG_CPU_USE_DOMAINS #define set_domain(x) \ do { \ __asm__ __volatile__( \ @@ -74,5 +78,28 @@ #define modify_domain(dom,type) do { } while (0) #endif +/* + * Generate the T (user) versions of the LDR/STR and related + * instructions (inline assembly) + */ +#ifdef CONFIG_CPU_USE_DOMAINS +#define T(instr) #instr "t" +#else +#define T(instr) #instr #endif -#endif /* !__ASSEMBLY__ */ + +#else /* __ASSEMBLY__ */ + +/* + * Generate the T (user) versions of the LDR/STR and related + * instructions + */ +#ifdef CONFIG_CPU_USE_DOMAINS +#define T(instr) instr ## t +#else +#define T(instr) instr +#endif + +#endif /* __ASSEMBLY__ */ + +#endif /* !__ASM_PROC_DOMAIN_H */ diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h index 8bb66bca2e3e..c3cd8755e648 100644 --- a/arch/arm/include/asm/elf.h +++ b/arch/arm/include/asm/elf.h @@ -99,6 +99,8 @@ struct elf32_hdr; extern int elf_check_arch(const struct elf32_hdr *); #define elf_check_arch elf_check_arch +#define vmcore_elf64_check_arch(x) (0) + extern int arm_elf_read_implies_exec(const struct elf32_hdr *, int); #define elf_read_implies_exec(ex,stk) arm_elf_read_implies_exec(&(ex), stk) diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h index 540a044153a5..b33fe7065b38 100644 --- a/arch/arm/include/asm/futex.h +++ b/arch/arm/include/asm/futex.h @@ -13,12 +13,13 @@ #include <linux/preempt.h> #include <linux/uaccess.h> #include <asm/errno.h> +#include <asm/domain.h> #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \ __asm__ __volatile__( \ - "1: ldrt %1, [%2]\n" \ + "1: " T(ldr) " %1, [%2]\n" \ " " insn "\n" \ - "2: strt %0, [%2]\n" \ + "2: " T(str) " %0, [%2]\n" \ " mov %0, #0\n" \ "3:\n" \ " .pushsection __ex_table,\"a\"\n" \ @@ -97,10 +98,10 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) pagefault_disable(); /* implies preempt_disable() */ __asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n" - "1: ldrt %0, [%3]\n" + "1: " T(ldr) " %0, [%3]\n" " teq %0, %1\n" " it eq @ explicit IT needed for the 2b label\n" - "2: streqt %2, [%3]\n" + "2: " T(streq) " %2, [%3]\n" "3:\n" " .pushsection __ex_table,\"a\"\n" " .align 3\n" diff --git a/arch/arm/include/asm/hardirq.h b/arch/arm/include/asm/hardirq.h index 6d7485aff955..89ad1805e579 100644 --- a/arch/arm/include/asm/hardirq.h +++ b/arch/arm/include/asm/hardirq.h @@ -5,13 +5,31 @@ #include <linux/threads.h> #include <asm/irq.h> +#define NR_IPI 5 + typedef struct { unsigned int __softirq_pending; +#ifdef CONFIG_LOCAL_TIMERS unsigned int local_timer_irqs; +#endif +#ifdef CONFIG_SMP + unsigned int ipi_irqs[NR_IPI]; +#endif } ____cacheline_aligned irq_cpustat_t; #include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */ +#define __inc_irq_stat(cpu, member) __IRQ_STAT(cpu, member)++ +#define __get_irq_stat(cpu, member) __IRQ_STAT(cpu, member) + +#ifdef CONFIG_SMP +u64 smp_irq_stat_cpu(unsigned int cpu); +#else +#define smp_irq_stat_cpu(cpu) 0 +#endif + +#define arch_irq_stat_cpu smp_irq_stat_cpu + #if NR_IRQS > 512 #define HARDIRQ_BITS 10 #elif NR_IRQS > 256 diff --git a/arch/arm/include/asm/hardware/entry-macro-gic.S b/arch/arm/include/asm/hardware/entry-macro-gic.S new file mode 100644 index 000000000000..c115b82fe80a --- /dev/null +++ b/arch/arm/include/asm/hardware/entry-macro-gic.S @@ -0,0 +1,75 @@ +/* + * arch/arm/include/asm/hardware/entry-macro-gic.S + * + * Low-level IRQ helper macros for GIC + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#include <asm/hardware/gic.h> + +#ifndef HAVE_GET_IRQNR_PREAMBLE + .macro get_irqnr_preamble, base, tmp + ldr \base, =gic_cpu_base_addr + ldr \base, [\base] + .endm +#endif + +/* + * The interrupt numbering scheme is defined in the + * interrupt controller spec. To wit: + * + * Interrupts 0-15 are IPI + * 16-28 are reserved + * 29-31 are local. We allow 30 to be used for the watchdog. + * 32-1020 are global + * 1021-1022 are reserved + * 1023 is "spurious" (no interrupt) + * + * For now, we ignore all local interrupts so only return an interrupt if it's + * between 30 and 1020. The test_for_ipi routine below will pick up on IPIs. + * + * A simple read from the controller will tell us the number of the highest + * priority enabled interrupt. We then just need to check whether it is in the + * valid range for an IRQ (30-1020 inclusive). + */ + + .macro get_irqnr_and_base, irqnr, irqstat, base, tmp + + ldr \irqstat, [\base, #GIC_CPU_INTACK] + /* bits 12-10 = src CPU, 9-0 = int # */ + + ldr \tmp, =1021 + bic \irqnr, \irqstat, #0x1c00 + cmp \irqnr, #29 + cmpcc \irqnr, \irqnr + cmpne \irqnr, \tmp + cmpcs \irqnr, \irqnr + .endm + +/* We assume that irqstat (the raw value of the IRQ acknowledge + * register) is preserved from the macro above. + * If there is an IPI, we immediately signal end of interrupt on the + * controller, since this requires the original irqstat value which + * we won't easily be able to recreate later. + */ + + .macro test_for_ipi, irqnr, irqstat, base, tmp + bic \irqnr, \irqstat, #0x1c00 + cmp \irqnr, #16 + strcc \irqstat, [\base, #GIC_CPU_EOI] + cmpcs \irqnr, \irqnr + .endm + +/* As above, this assumes that irqstat and base are preserved.. */ + + .macro test_for_ltirq, irqnr, irqstat, base, tmp + bic \irqnr, \irqstat, #0x1c00 + mov \tmp, #0 + cmp \irqnr, #29 + moveq \tmp, #1 + streq \irqstat, [\base, #GIC_CPU_EOI] + cmp \tmp, #0 + .endm diff --git a/arch/arm/include/asm/hardware/gic.h b/arch/arm/include/asm/hardware/gic.h index 7f34333bb545..84557d321001 100644 --- a/arch/arm/include/asm/hardware/gic.h +++ b/arch/arm/include/asm/hardware/gic.h @@ -33,10 +33,13 @@ #define GIC_DIST_SOFTINT 0xf00 #ifndef __ASSEMBLY__ -void gic_dist_init(unsigned int gic_nr, void __iomem *base, unsigned int irq_start); -void gic_cpu_init(unsigned int gic_nr, void __iomem *base); +extern void __iomem *gic_cpu_base_addr; + +void gic_init(unsigned int, unsigned int, void __iomem *, void __iomem *); +void gic_secondary_init(unsigned int); void gic_cascade_irq(unsigned int gic_nr, unsigned int irq); void gic_raise_softirq(const struct cpumask *mask, unsigned int irq); +void gic_enable_ppi(unsigned int); #endif #endif diff --git a/arch/arm/plat-versatile/include/plat/timer-sp.h b/arch/arm/include/asm/hardware/timer-sp.h index 21e75e30d497..21e75e30d497 100644 --- a/arch/arm/plat-versatile/include/plat/timer-sp.h +++ b/arch/arm/include/asm/hardware/timer-sp.h diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index 815efa2d4e07..20e0f7c9e03e 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -241,18 +241,15 @@ extern void _memset_io(volatile void __iomem *, int, size_t); * */ #ifndef __arch_ioremap -#define ioremap(cookie,size) __arm_ioremap(cookie, size, MT_DEVICE) -#define ioremap_nocache(cookie,size) __arm_ioremap(cookie, size, MT_DEVICE) -#define ioremap_cached(cookie,size) __arm_ioremap(cookie, size, MT_DEVICE_CACHED) -#define ioremap_wc(cookie,size) __arm_ioremap(cookie, size, MT_DEVICE_WC) -#define iounmap(cookie) __iounmap(cookie) -#else +#define __arch_ioremap __arm_ioremap +#define __arch_iounmap __iounmap +#endif + #define ioremap(cookie,size) __arch_ioremap((cookie), (size), MT_DEVICE) #define ioremap_nocache(cookie,size) __arch_ioremap((cookie), (size), MT_DEVICE) #define ioremap_cached(cookie,size) __arch_ioremap((cookie), (size), MT_DEVICE_CACHED) #define ioremap_wc(cookie,size) __arch_ioremap((cookie), (size), MT_DEVICE_WC) -#define iounmap(cookie) __arch_iounmap(cookie) -#endif +#define iounmap __arch_iounmap /* * io{read,write}{8,16,32} macros diff --git a/arch/arm/include/asm/kexec.h b/arch/arm/include/asm/kexec.h index 8ec9ef5c3c7b..c0094d8edae4 100644 --- a/arch/arm/include/asm/kexec.h +++ b/arch/arm/include/asm/kexec.h @@ -33,10 +33,20 @@ static inline void crash_setup_regs(struct pt_regs *newregs, if (oldregs) { memcpy(newregs, oldregs, sizeof(*newregs)); } else { - __asm__ __volatile__ ("stmia %0, {r0 - r15}" - : : "r" (&newregs->ARM_r0)); - __asm__ __volatile__ ("mrs %0, cpsr" - : "=r" (newregs->ARM_cpsr)); + __asm__ __volatile__ ( + "stmia %[regs_base], {r0-r12}\n\t" + "mov %[_ARM_sp], sp\n\t" + "str lr, %[_ARM_lr]\n\t" + "adr %[_ARM_pc], 1f\n\t" + "mrs %[_ARM_cpsr], cpsr\n\t" + "1:" + : [_ARM_pc] "=r" (newregs->ARM_pc), + [_ARM_cpsr] "=r" (newregs->ARM_cpsr), + [_ARM_sp] "=r" (newregs->ARM_sp), + [_ARM_lr] "=o" (newregs->ARM_lr) + : [regs_base] "r" (&newregs->ARM_r0) + : "memory" + ); } } diff --git a/arch/arm/include/asm/mach/irq.h b/arch/arm/include/asm/mach/irq.h index ce3eee9fe26c..2bc47fb94d51 100644 --- a/arch/arm/include/asm/mach/irq.h +++ b/arch/arm/include/asm/mach/irq.h @@ -20,7 +20,7 @@ struct seq_file; extern unsigned int arch_nr_irqs; extern void (*init_arch_irq)(void); extern void init_FIQ(void); -extern int show_fiq_list(struct seq_file *, void *); +extern int show_fiq_list(struct seq_file *, int); /* * This is for easy migration, but should be changed in the source diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h index cbb0bc295d2b..12c8e680cbff 100644 --- a/arch/arm/include/asm/module.h +++ b/arch/arm/include/asm/module.h @@ -8,11 +8,6 @@ struct unwind_table; #ifdef CONFIG_ARM_UNWIND -struct arm_unwind_mapping { - Elf_Shdr *unw_sec; - Elf_Shdr *sec_text; - struct unwind_table *unwind; -}; enum { ARM_SEC_INIT, ARM_SEC_DEVINIT, @@ -21,13 +16,13 @@ enum { ARM_SEC_DEVEXIT, ARM_SEC_MAX, }; +#endif + struct mod_arch_specific { - struct arm_unwind_mapping map[ARM_SEC_MAX]; -}; -#else -struct mod_arch_specific { -}; +#ifdef CONFIG_ARM_UNWIND + struct unwind_table *unwind[ARM_SEC_MAX]; #endif +}; /* * Include the ARM architecture version. diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h index a485ac3c8696..f51a69595f6e 100644 --- a/arch/arm/include/asm/page.h +++ b/arch/arm/include/asm/page.h @@ -151,13 +151,15 @@ extern void __cpu_copy_user_highpage(struct page *to, struct page *from, #define clear_page(page) memset((void *)(page), 0, PAGE_SIZE) extern void copy_page(void *to, const void *from); +typedef unsigned long pteval_t; + #undef STRICT_MM_TYPECHECKS #ifdef STRICT_MM_TYPECHECKS /* * These are used to make use of C type-checking.. */ -typedef struct { unsigned long pte; } pte_t; +typedef struct { pteval_t pte; } pte_t; typedef struct { unsigned long pmd; } pmd_t; typedef struct { unsigned long pgd[2]; } pgd_t; typedef struct { unsigned long pgprot; } pgprot_t; @@ -175,7 +177,7 @@ typedef struct { unsigned long pgprot; } pgprot_t; /* * .. while these make it easier on the compiler */ -typedef unsigned long pte_t; +typedef pteval_t pte_t; typedef unsigned long pmd_t; typedef unsigned long pgd_t[2]; typedef unsigned long pgprot_t; diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h index b12cc98bbe04..bdbebda5611c 100644 --- a/arch/arm/include/asm/pgalloc.h +++ b/arch/arm/include/asm/pgalloc.h @@ -28,16 +28,18 @@ */ #define pmd_alloc_one(mm,addr) ({ BUG(); ((pmd_t *)2); }) #define pmd_free(mm, pmd) do { } while (0) -#define pgd_populate(mm,pmd,pte) BUG() +#define pud_populate(mm,pmd,pte) BUG() -extern pgd_t *get_pgd_slow(struct mm_struct *mm); -extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd); - -#define pgd_alloc(mm) get_pgd_slow(mm) -#define pgd_free(mm, pgd) free_pgd_slow(mm, pgd) +extern pgd_t *pgd_alloc(struct mm_struct *mm); +extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); #define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) +static inline void clean_pte_table(pte_t *pte) +{ + clean_dcache_area(pte + PTE_HWTABLE_OFF, PTE_HWTABLE_SIZE); +} + /* * Allocate one PTE table. * @@ -45,14 +47,14 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd); * into one table thus: * * +------------+ - * | h/w pt 0 | - * +------------+ - * | h/w pt 1 | - * +------------+ * | Linux pt 0 | * +------------+ * | Linux pt 1 | * +------------+ + * | h/w pt 0 | + * +------------+ + * | h/w pt 1 | + * +------------+ */ static inline pte_t * pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr) @@ -60,10 +62,8 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr) pte_t *pte; pte = (pte_t *)__get_free_page(PGALLOC_GFP); - if (pte) { - clean_dcache_area(pte, sizeof(pte_t) * PTRS_PER_PTE); - pte += PTRS_PER_PTE; - } + if (pte) + clean_pte_table(pte); return pte; } @@ -79,10 +79,8 @@ pte_alloc_one(struct mm_struct *mm, unsigned long addr) pte = alloc_pages(PGALLOC_GFP, 0); #endif if (pte) { - if (!PageHighMem(pte)) { - void *page = page_address(pte); - clean_dcache_area(page, sizeof(pte_t) * PTRS_PER_PTE); - } + if (!PageHighMem(pte)) + clean_pte_table(page_address(pte)); pgtable_page_ctor(pte); } @@ -94,10 +92,8 @@ pte_alloc_one(struct mm_struct *mm, unsigned long addr) */ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { - if (pte) { - pte -= PTRS_PER_PTE; + if (pte) free_page((unsigned long)pte); - } } static inline void pte_free(struct mm_struct *mm, pgtable_t pte) @@ -106,8 +102,10 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t pte) __free_page(pte); } -static inline void __pmd_populate(pmd_t *pmdp, unsigned long pmdval) +static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte, + unsigned long prot) { + unsigned long pmdval = (pte + PTE_HWTABLE_OFF) | prot; pmdp[0] = __pmd(pmdval); pmdp[1] = __pmd(pmdval + 256 * sizeof(pte_t)); flush_pmd_entry(pmdp); @@ -122,20 +120,16 @@ static inline void __pmd_populate(pmd_t *pmdp, unsigned long pmdval) static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep) { - unsigned long pte_ptr = (unsigned long)ptep; - /* - * The pmd must be loaded with the physical - * address of the PTE table + * The pmd must be loaded with the physical address of the PTE table */ - pte_ptr -= PTRS_PER_PTE * sizeof(void *); - __pmd_populate(pmdp, __pa(pte_ptr) | _PAGE_KERNEL_TABLE); + __pmd_populate(pmdp, __pa(ptep), _PAGE_KERNEL_TABLE); } static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, pgtable_t ptep) { - __pmd_populate(pmdp, page_to_pfn(ptep) << PAGE_SHIFT | _PAGE_USER_TABLE); + __pmd_populate(pmdp, page_to_phys(ptep), _PAGE_USER_TABLE); } #define pmd_pgtable(pmd) pmd_page(pmd) diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 53d1d5deb111..0a8dad013367 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -10,15 +10,17 @@ #ifndef _ASMARM_PGTABLE_H #define _ASMARM_PGTABLE_H -#include <asm-generic/4level-fixup.h> +#include <linux/const.h> #include <asm/proc-fns.h> #ifndef CONFIG_MMU +#include <asm-generic/4level-fixup.h> #include "pgtable-nommu.h" #else +#include <asm-generic/pgtable-nopud.h> #include <asm/memory.h> #include <mach/vmalloc.h> #include <asm/pgtable-hwdef.h> @@ -54,7 +56,7 @@ * Therefore, we tweak the implementation slightly - we tell Linux that we * have 2048 entries in the first level, each of which is 8 bytes (iow, two * hardware pointers to the second level.) The second level contains two - * hardware PTE tables arranged contiguously, followed by Linux versions + * hardware PTE tables arranged contiguously, preceded by Linux versions * which contain the state information Linux needs. We, therefore, end up * with 512 entries in the "PTE" level. * @@ -62,15 +64,15 @@ * * pgd pte * | | - * +--------+ +0 - * | |-----> +------------+ +0 + * +--------+ + * | | +------------+ +0 + * +- - - - + | Linux pt 0 | + * | | +------------+ +1024 + * +--------+ +0 | Linux pt 1 | + * | |-----> +------------+ +2048 * +- - - - + +4 | h/w pt 0 | - * | |-----> +------------+ +1024 + * | |-----> +------------+ +3072 * +--------+ +8 | h/w pt 1 | - * | | +------------+ +2048 - * +- - - - + | Linux pt 0 | - * | | +------------+ +3072 - * +--------+ | Linux pt 1 | * | | +------------+ +4096 * * See L_PTE_xxx below for definitions of bits in the "Linux pt", and @@ -102,6 +104,10 @@ #define PTRS_PER_PMD 1 #define PTRS_PER_PGD 2048 +#define PTE_HWTABLE_PTRS (PTRS_PER_PTE) +#define PTE_HWTABLE_OFF (PTE_HWTABLE_PTRS * sizeof(pte_t)) +#define PTE_HWTABLE_SIZE (PTRS_PER_PTE * sizeof(u32)) + /* * PMD_SHIFT determines the size of the area a second-level page table can map * PGDIR_SHIFT determines what a third-level page table entry can map @@ -112,13 +118,13 @@ #define LIBRARY_TEXT_START 0x0c000000 #ifndef __ASSEMBLY__ -extern void __pte_error(const char *file, int line, unsigned long val); -extern void __pmd_error(const char *file, int line, unsigned long val); -extern void __pgd_error(const char *file, int line, unsigned long val); +extern void __pte_error(const char *file, int line, pte_t); +extern void __pmd_error(const char *file, int line, pmd_t); +extern void __pgd_error(const char *file, int line, pgd_t); -#define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte)) -#define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd_val(pmd)) -#define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) +#define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte) +#define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd) +#define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd) #endif /* !__ASSEMBLY__ */ #define PMD_SIZE (1UL << PMD_SHIFT) @@ -133,8 +139,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val); */ #define FIRST_USER_ADDRESS PAGE_SIZE -#define FIRST_USER_PGD_NR 1 -#define USER_PTRS_PER_PGD ((TASK_SIZE/PGDIR_SIZE) - FIRST_USER_PGD_NR) +#define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) /* * section address mask and size definitions. @@ -161,30 +166,30 @@ extern void __pgd_error(const char *file, int line, unsigned long val); * The PTE table pointer refers to the hardware entries; the "Linux" * entries are stored 1024 bytes below. */ -#define L_PTE_PRESENT (1 << 0) -#define L_PTE_YOUNG (1 << 1) -#define L_PTE_FILE (1 << 2) /* only when !PRESENT */ -#define L_PTE_DIRTY (1 << 6) -#define L_PTE_WRITE (1 << 7) -#define L_PTE_USER (1 << 8) -#define L_PTE_EXEC (1 << 9) -#define L_PTE_SHARED (1 << 10) /* shared(v6), coherent(xsc3) */ +#define L_PTE_PRESENT (_AT(pteval_t, 1) << 0) +#define L_PTE_YOUNG (_AT(pteval_t, 1) << 1) +#define L_PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !PRESENT */ +#define L_PTE_DIRTY (_AT(pteval_t, 1) << 6) +#define L_PTE_RDONLY (_AT(pteval_t, 1) << 7) +#define L_PTE_USER (_AT(pteval_t, 1) << 8) +#define L_PTE_XN (_AT(pteval_t, 1) << 9) +#define L_PTE_SHARED (_AT(pteval_t, 1) << 10) /* shared(v6), coherent(xsc3) */ /* * These are the memory types, defined to be compatible with * pre-ARMv6 CPUs cacheable and bufferable bits: XXCB */ -#define L_PTE_MT_UNCACHED (0x00 << 2) /* 0000 */ -#define L_PTE_MT_BUFFERABLE (0x01 << 2) /* 0001 */ -#define L_PTE_MT_WRITETHROUGH (0x02 << 2) /* 0010 */ -#define L_PTE_MT_WRITEBACK (0x03 << 2) /* 0011 */ -#define L_PTE_MT_MINICACHE (0x06 << 2) /* 0110 (sa1100, xscale) */ -#define L_PTE_MT_WRITEALLOC (0x07 << 2) /* 0111 */ -#define L_PTE_MT_DEV_SHARED (0x04 << 2) /* 0100 */ -#define L_PTE_MT_DEV_NONSHARED (0x0c << 2) /* 1100 */ -#define L_PTE_MT_DEV_WC (0x09 << 2) /* 1001 */ -#define L_PTE_MT_DEV_CACHED (0x0b << 2) /* 1011 */ -#define L_PTE_MT_MASK (0x0f << 2) +#define L_PTE_MT_UNCACHED (_AT(pteval_t, 0x00) << 2) /* 0000 */ +#define L_PTE_MT_BUFFERABLE (_AT(pteval_t, 0x01) << 2) /* 0001 */ +#define L_PTE_MT_WRITETHROUGH (_AT(pteval_t, 0x02) << 2) /* 0010 */ +#define L_PTE_MT_WRITEBACK (_AT(pteval_t, 0x03) << 2) /* 0011 */ +#define L_PTE_MT_MINICACHE (_AT(pteval_t, 0x06) << 2) /* 0110 (sa1100, xscale) */ +#define L_PTE_MT_WRITEALLOC (_AT(pteval_t, 0x07) << 2) /* 0111 */ +#define L_PTE_MT_DEV_SHARED (_AT(pteval_t, 0x04) << 2) /* 0100 */ +#define L_PTE_MT_DEV_NONSHARED (_AT(pteval_t, 0x0c) << 2) /* 1100 */ +#define L_PTE_MT_DEV_WC (_AT(pteval_t, 0x09) << 2) /* 1001 */ +#define L_PTE_MT_DEV_CACHED (_AT(pteval_t, 0x0b) << 2) /* 1011 */ +#define L_PTE_MT_MASK (_AT(pteval_t, 0x0f) << 2) #ifndef __ASSEMBLY__ @@ -201,23 +206,44 @@ extern pgprot_t pgprot_kernel; #define _MOD_PROT(p, b) __pgprot(pgprot_val(p) | (b)) -#define PAGE_NONE pgprot_user -#define PAGE_SHARED _MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_WRITE) -#define PAGE_SHARED_EXEC _MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_WRITE | L_PTE_EXEC) -#define PAGE_COPY _MOD_PROT(pgprot_user, L_PTE_USER) -#define PAGE_COPY_EXEC _MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_EXEC) -#define PAGE_READONLY _MOD_PROT(pgprot_user, L_PTE_USER) -#define PAGE_READONLY_EXEC _MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_EXEC) -#define PAGE_KERNEL pgprot_kernel -#define PAGE_KERNEL_EXEC _MOD_PROT(pgprot_kernel, L_PTE_EXEC) - -#define __PAGE_NONE __pgprot(_L_PTE_DEFAULT) -#define __PAGE_SHARED __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_WRITE) -#define __PAGE_SHARED_EXEC __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_WRITE | L_PTE_EXEC) -#define __PAGE_COPY __pgprot(_L_PTE_DEFAULT | L_PTE_USER) -#define __PAGE_COPY_EXEC __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_EXEC) -#define __PAGE_READONLY __pgprot(_L_PTE_DEFAULT | L_PTE_USER) -#define __PAGE_READONLY_EXEC __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_EXEC) +#define PAGE_NONE _MOD_PROT(pgprot_user, L_PTE_XN | L_PTE_RDONLY) +#define PAGE_SHARED _MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_XN) +#define PAGE_SHARED_EXEC _MOD_PROT(pgprot_user, L_PTE_USER) +#define PAGE_COPY _MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_RDONLY | L_PTE_XN) +#define PAGE_COPY_EXEC _MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_RDONLY) +#define PAGE_READONLY _MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_RDONLY | L_PTE_XN) +#define PAGE_READONLY_EXEC _MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_RDONLY) +#define PAGE_KERNEL _MOD_PROT(pgprot_kernel, L_PTE_XN) +#define PAGE_KERNEL_EXEC pgprot_kernel + +#define __PAGE_NONE __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN) +#define __PAGE_SHARED __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN) +#define __PAGE_SHARED_EXEC __pgprot(_L_PTE_DEFAULT | L_PTE_USER) +#define __PAGE_COPY __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_RDONLY | L_PTE_XN) +#define __PAGE_COPY_EXEC __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_RDONLY) +#define __PAGE_READONLY __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_RDONLY | L_PTE_XN) +#define __PAGE_READONLY_EXEC __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_RDONLY) + +#define __pgprot_modify(prot,mask,bits) \ + __pgprot((pgprot_val(prot) & ~(mask)) | (bits)) + +#define pgprot_noncached(prot) \ + __pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_UNCACHED) + +#define pgprot_writecombine(prot) \ + __pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_BUFFERABLE) + +#ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE +#define pgprot_dmacoherent(prot) \ + __pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_BUFFERABLE | L_PTE_XN) +#define __HAVE_PHYS_MEM_ACCESS_PROT +struct file; +extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot); +#else +#define pgprot_dmacoherent(prot) \ + __pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_UNCACHED | L_PTE_XN) +#endif #endif /* __ASSEMBLY__ */ @@ -255,26 +281,87 @@ extern pgprot_t pgprot_kernel; extern struct page *empty_zero_page; #define ZERO_PAGE(vaddr) (empty_zero_page) -#define pte_pfn(pte) (pte_val(pte) >> PAGE_SHIFT) -#define pfn_pte(pfn,prot) (__pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot))) -#define pte_none(pte) (!pte_val(pte)) -#define pte_clear(mm,addr,ptep) set_pte_ext(ptep, __pte(0), 0) -#define pte_page(pte) (pfn_to_page(pte_pfn(pte))) -#define pte_offset_kernel(dir,addr) (pmd_page_vaddr(*(dir)) + __pte_index(addr)) +extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; + +/* to find an entry in a page-table-directory */ +#define pgd_index(addr) ((addr) >> PGDIR_SHIFT) + +#define pgd_offset(mm, addr) ((mm)->pgd + pgd_index(addr)) + +/* to find an entry in a kernel page-table-directory */ +#define pgd_offset_k(addr) pgd_offset(&init_mm, addr) + +/* + * The "pud_xxx()" functions here are trivial when the pmd is folded into + * the pud: the pud entry is never bad, always exists, and can't be set or + * cleared. + */ +#define pud_none(pud) (0) +#define pud_bad(pud) (0) +#define pud_present(pud) (1) +#define pud_clear(pudp) do { } while (0) +#define set_pud(pud,pudp) do { } while (0) + + +/* Find an entry in the second-level page table.. */ +static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) +{ + return (pmd_t *)pud; +} + +#define pmd_none(pmd) (!pmd_val(pmd)) +#define pmd_present(pmd) (pmd_val(pmd)) +#define pmd_bad(pmd) (pmd_val(pmd) & 2) + +#define copy_pmd(pmdpd,pmdps) \ + do { \ + pmdpd[0] = pmdps[0]; \ + pmdpd[1] = pmdps[1]; \ + flush_pmd_entry(pmdpd); \ + } while (0) + +#define pmd_clear(pmdp) \ + do { \ + pmdp[0] = __pmd(0); \ + pmdp[1] = __pmd(0); \ + clean_pmd_entry(pmdp); \ + } while (0) + +static inline pte_t *pmd_page_vaddr(pmd_t pmd) +{ + return __va(pmd_val(pmd) & PAGE_MASK); +} + +#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd))) + +/* we don't need complex calculations here as the pmd is folded into the pgd */ +#define pmd_addr_end(addr,end) (end) -#define pte_offset_map(dir,addr) (__pte_map(dir) + __pte_index(addr)) -#define pte_unmap(pte) __pte_unmap(pte) #ifndef CONFIG_HIGHPTE -#define __pte_map(dir) pmd_page_vaddr(*(dir)) +#define __pte_map(pmd) pmd_page_vaddr(*(pmd)) #define __pte_unmap(pte) do { } while (0) #else -#define __pte_map(dir) ((pte_t *)kmap_atomic(pmd_page(*(dir))) + PTRS_PER_PTE) -#define __pte_unmap(pte) kunmap_atomic((pte - PTRS_PER_PTE)) +#define __pte_map(pmd) (pte_t *)kmap_atomic(pmd_page(*(pmd))) +#define __pte_unmap(pte) kunmap_atomic(pte) #endif +#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + +#define pte_offset_kernel(pmd,addr) (pmd_page_vaddr(*(pmd)) + pte_index(addr)) + +#define pte_offset_map(pmd,addr) (__pte_map(pmd) + pte_index(addr)) +#define pte_unmap(pte) __pte_unmap(pte) + +#define pte_pfn(pte) (pte_val(pte) >> PAGE_SHIFT) +#define pfn_pte(pfn,prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) + +#define pte_page(pte) pfn_to_page(pte_pfn(pte)) +#define mk_pte(page,prot) pfn_pte(page_to_pfn(page), prot) + #define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,pte,ext) +#define pte_clear(mm,addr,ptep) set_pte_ext(ptep, __pte(0), 0) #if __LINUX_ARM_ARCH__ < 6 static inline void __sync_icache_dcache(pte_t pteval) @@ -295,15 +382,12 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, } } -/* - * The following only work if pte_present() is true. - * Undefined behaviour if not.. - */ +#define pte_none(pte) (!pte_val(pte)) #define pte_present(pte) (pte_val(pte) & L_PTE_PRESENT) -#define pte_write(pte) (pte_val(pte) & L_PTE_WRITE) +#define pte_write(pte) (!(pte_val(pte) & L_PTE_RDONLY)) #define pte_dirty(pte) (pte_val(pte) & L_PTE_DIRTY) #define pte_young(pte) (pte_val(pte) & L_PTE_YOUNG) -#define pte_exec(pte) (pte_val(pte) & L_PTE_EXEC) +#define pte_exec(pte) (!(pte_val(pte) & L_PTE_XN)) #define pte_special(pte) (0) #define pte_present_user(pte) \ @@ -313,8 +397,8 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, #define PTE_BIT_FUNC(fn,op) \ static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; } -PTE_BIT_FUNC(wrprotect, &= ~L_PTE_WRITE); -PTE_BIT_FUNC(mkwrite, |= L_PTE_WRITE); +PTE_BIT_FUNC(wrprotect, |= L_PTE_RDONLY); +PTE_BIT_FUNC(mkwrite, &= ~L_PTE_RDONLY); PTE_BIT_FUNC(mkclean, &= ~L_PTE_DIRTY); PTE_BIT_FUNC(mkdirty, |= L_PTE_DIRTY); PTE_BIT_FUNC(mkold, &= ~L_PTE_YOUNG); @@ -322,101 +406,13 @@ PTE_BIT_FUNC(mkyoung, |= L_PTE_YOUNG); static inline pte_t pte_mkspecial(pte_t pte) { return pte; } -#define __pgprot_modify(prot,mask,bits) \ - __pgprot((pgprot_val(prot) & ~(mask)) | (bits)) - -/* - * Mark the prot value as uncacheable and unbufferable. - */ -#define pgprot_noncached(prot) \ - __pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_UNCACHED) -#define pgprot_writecombine(prot) \ - __pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_BUFFERABLE) -#ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE -#define pgprot_dmacoherent(prot) \ - __pgprot_modify(prot, L_PTE_MT_MASK|L_PTE_EXEC, L_PTE_MT_BUFFERABLE) -#define __HAVE_PHYS_MEM_ACCESS_PROT -struct file; -extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, - unsigned long size, pgprot_t vma_prot); -#else -#define pgprot_dmacoherent(prot) \ - __pgprot_modify(prot, L_PTE_MT_MASK|L_PTE_EXEC, L_PTE_MT_UNCACHED) -#endif - -#define pmd_none(pmd) (!pmd_val(pmd)) -#define pmd_present(pmd) (pmd_val(pmd)) -#define pmd_bad(pmd) (pmd_val(pmd) & 2) - -#define copy_pmd(pmdpd,pmdps) \ - do { \ - pmdpd[0] = pmdps[0]; \ - pmdpd[1] = pmdps[1]; \ - flush_pmd_entry(pmdpd); \ - } while (0) - -#define pmd_clear(pmdp) \ - do { \ - pmdp[0] = __pmd(0); \ - pmdp[1] = __pmd(0); \ - clean_pmd_entry(pmdp); \ - } while (0) - -static inline pte_t *pmd_page_vaddr(pmd_t pmd) -{ - unsigned long ptr; - - ptr = pmd_val(pmd) & ~(PTRS_PER_PTE * sizeof(void *) - 1); - ptr += PTRS_PER_PTE * sizeof(void *); - - return __va(ptr); -} - -#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd))) - -/* we don't need complex calculations here as the pmd is folded into the pgd */ -#define pmd_addr_end(addr,end) (end) - -/* - * Conversion functions: convert a page and protection to a page entry, - * and a page entry and page directory to the page they refer to. - */ -#define mk_pte(page,prot) pfn_pte(page_to_pfn(page),prot) - -/* - * The "pgd_xxx()" functions here are trivial for a folded two-level - * setup: the pgd is never bad, and a pmd always exists (as it's folded - * into the pgd entry) - */ -#define pgd_none(pgd) (0) -#define pgd_bad(pgd) (0) -#define pgd_present(pgd) (1) -#define pgd_clear(pgdp) do { } while (0) -#define set_pgd(pgd,pgdp) do { } while (0) - -/* to find an entry in a page-table-directory */ -#define pgd_index(addr) ((addr) >> PGDIR_SHIFT) - -#define pgd_offset(mm, addr) ((mm)->pgd+pgd_index(addr)) - -/* to find an entry in a kernel page-table-directory */ -#define pgd_offset_k(addr) pgd_offset(&init_mm, addr) - -/* Find an entry in the second-level page table.. */ -#define pmd_offset(dir, addr) ((pmd_t *)(dir)) - -/* Find an entry in the third-level page table.. */ -#define __pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) - static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { - const unsigned long mask = L_PTE_EXEC | L_PTE_WRITE | L_PTE_USER; + const pteval_t mask = L_PTE_XN | L_PTE_RDONLY | L_PTE_USER; pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask); return pte; } -extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; - /* * Encode and decode a swap entry. Swap entries are stored in the Linux * page tables as follows: @@ -481,6 +477,9 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; #define pgtable_cache_init() do { } while (0) +void identity_mapping_add(pgd_t *, unsigned long, unsigned long); +void identity_mapping_del(pgd_t *, unsigned long, unsigned long); + #endif /* !__ASSEMBLY__ */ #endif /* CONFIG_MMU */ diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h index 3d05190797cb..96ed521f2408 100644 --- a/arch/arm/include/asm/smp.h +++ b/arch/arm/include/asm/smp.h @@ -33,27 +33,23 @@ struct seq_file; /* * generate IPI list text */ -extern void show_ipi_list(struct seq_file *p); +extern void show_ipi_list(struct seq_file *, int); /* * Called from assembly code, this handles an IPI. */ -asmlinkage void do_IPI(struct pt_regs *regs); +asmlinkage void do_IPI(int ipinr, struct pt_regs *regs); /* * Setup the set of possible CPUs (via set_cpu_possible) */ extern void smp_init_cpus(void); -/* - * Move global data into per-processor storage. - */ -extern void smp_store_cpu_info(unsigned int cpuid); /* * Raise an IPI cross call on CPUs in callmap. */ -extern void smp_cross_call(const struct cpumask *mask); +extern void smp_cross_call(const struct cpumask *mask, int ipi); /* * Boot a secondary CPU, and assign it the specified idle task. @@ -73,6 +69,11 @@ asmlinkage void secondary_start_kernel(void); extern void platform_secondary_init(unsigned int cpu); /* + * Initialize cpu_possible map, and enable coherency + */ +extern void platform_smp_prepare_cpus(unsigned int); + +/* * Initial data for bringing up a secondary CPU. */ struct secondary_data { @@ -97,6 +98,6 @@ extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); /* * show local interrupt info */ -extern void show_local_irqs(struct seq_file *); +extern void show_local_irqs(struct seq_file *, int); #endif /* ifndef __ASM_ARM_SMP_H */ diff --git a/arch/arm/include/asm/smp_mpidr.h b/arch/arm/include/asm/smp_mpidr.h deleted file mode 100644 index 6a9307d64900..000000000000 --- a/arch/arm/include/asm/smp_mpidr.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef ASMARM_SMP_MIDR_H -#define ASMARM_SMP_MIDR_H - -#define hard_smp_processor_id() \ - ({ \ - unsigned int cpunum; \ - __asm__("\n" \ - "1: mrc p15, 0, %0, c0, c0, 5\n" \ - " .pushsection \".alt.smp.init\", \"a\"\n"\ - " .long 1b\n" \ - " mov %0, #0\n" \ - " .popsection" \ - : "=r" (cpunum)); \ - cpunum &= 0x0F; \ - }) - -#endif diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h index 1120f18a6b17..ec4327a4653d 100644 --- a/arch/arm/include/asm/system.h +++ b/arch/arm/include/asm/system.h @@ -63,6 +63,11 @@ #include <asm/outercache.h> #define __exception __attribute__((section(".exception.text"))) +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +#define __exception_irq_entry __irq_entry +#else +#define __exception_irq_entry __exception +#endif struct thread_info; struct task_struct; diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h index f41a6f57cd12..a116bef20cbe 100644 --- a/arch/arm/include/asm/tlb.h +++ b/arch/arm/include/asm/tlb.h @@ -104,6 +104,7 @@ tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) #define tlb_remove_page(tlb,page) free_page_and_swap_cache(page) #define pte_free_tlb(tlb, ptep, addr) pte_free((tlb)->mm, ptep) #define pmd_free_tlb(tlb, pmdp, addr) pmd_free((tlb)->mm, pmdp) +#define pud_free_tlb(tlb, pudp, addr) pud_free((tlb)->mm, pudp) #define tlb_migrate_finish(mm) do { } while (0) diff --git a/arch/arm/include/asm/traps.h b/arch/arm/include/asm/traps.h index 491960bf4260..1b960d5ef6a5 100644 --- a/arch/arm/include/asm/traps.h +++ b/arch/arm/include/asm/traps.h @@ -15,16 +15,37 @@ struct undef_hook { void register_undef_hook(struct undef_hook *hook); void unregister_undef_hook(struct undef_hook *hook); +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +static inline int __in_irqentry_text(unsigned long ptr) +{ + extern char __irqentry_text_start[]; + extern char __irqentry_text_end[]; + + return ptr >= (unsigned long)&__irqentry_text_start && + ptr < (unsigned long)&__irqentry_text_end; +} +#else +static inline int __in_irqentry_text(unsigned long ptr) +{ + return 0; +} +#endif + static inline int in_exception_text(unsigned long ptr) { extern char __exception_text_start[]; extern char __exception_text_end[]; + int in; - return ptr >= (unsigned long)&__exception_text_start && - ptr < (unsigned long)&__exception_text_end; + in = ptr >= (unsigned long)&__exception_text_start && + ptr < (unsigned long)&__exception_text_end; + + return in ? : __in_irqentry_text(ptr); } extern void __init early_trap_init(void); extern void dump_backtrace_entry(unsigned long where, unsigned long from, unsigned long frame); +extern void *vectors_page; + #endif diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 33e4a48fe103..b293616a1a1a 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -227,7 +227,7 @@ do { \ #define __get_user_asm_byte(x,addr,err) \ __asm__ __volatile__( \ - "1: ldrbt %1,[%2]\n" \ + "1: " T(ldrb) " %1,[%2],#0\n" \ "2:\n" \ " .pushsection .fixup,\"ax\"\n" \ " .align 2\n" \ @@ -263,7 +263,7 @@ do { \ #define __get_user_asm_word(x,addr,err) \ __asm__ __volatile__( \ - "1: ldrt %1,[%2]\n" \ + "1: " T(ldr) " %1,[%2],#0\n" \ "2:\n" \ " .pushsection .fixup,\"ax\"\n" \ " .align 2\n" \ @@ -308,7 +308,7 @@ do { \ #define __put_user_asm_byte(x,__pu_addr,err) \ __asm__ __volatile__( \ - "1: strbt %1,[%2]\n" \ + "1: " T(strb) " %1,[%2],#0\n" \ "2:\n" \ " .pushsection .fixup,\"ax\"\n" \ " .align 2\n" \ @@ -341,7 +341,7 @@ do { \ #define __put_user_asm_word(x,__pu_addr,err) \ __asm__ __volatile__( \ - "1: strt %1,[%2]\n" \ + "1: " T(str) " %1,[%2],#0\n" \ "2:\n" \ " .pushsection .fixup,\"ax\"\n" \ " .align 2\n" \ @@ -366,10 +366,10 @@ do { \ #define __put_user_asm_dword(x,__pu_addr,err) \ __asm__ __volatile__( \ - ARM( "1: strt " __reg_oper1 ", [%1], #4\n" ) \ - ARM( "2: strt " __reg_oper0 ", [%1]\n" ) \ - THUMB( "1: strt " __reg_oper1 ", [%1]\n" ) \ - THUMB( "2: strt " __reg_oper0 ", [%1, #4]\n" ) \ + ARM( "1: " T(str) " " __reg_oper1 ", [%1], #4\n" ) \ + ARM( "2: " T(str) " " __reg_oper0 ", [%1]\n" ) \ + THUMB( "1: " T(str) " " __reg_oper1 ", [%1]\n" ) \ + THUMB( "2: " T(str) " " __reg_oper0 ", [%1, #4]\n" ) \ "3:\n" \ " .pushsection .fixup,\"ax\"\n" \ " .align 2\n" \ diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 5b9b268f4fbb..806d5e135947 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -5,7 +5,7 @@ CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET) AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) -ifdef CONFIG_DYNAMIC_FTRACE +ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_ftrace.o = -pg endif @@ -33,6 +33,7 @@ obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_HAVE_ARM_SCU) += smp_scu.o obj-$(CONFIG_HAVE_ARM_TWD) += smp_twd.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o +obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o obj-$(CONFIG_KPROBES) += kprobes.o kprobes-decode.o obj-$(CONFIG_ATAGS_PROC) += atags.o @@ -42,6 +43,8 @@ obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_ARM_UNWIND) += unwind.o obj-$(CONFIG_HAVE_TCM) += tcm.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o +obj-$(CONFIG_SWP_EMULATE) += swp_emulate.o +CFLAGS_swp_emulate.o := -Wa,-march=armv7-a obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_CRUNCH) += crunch.o crunch-bits.o diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index bb96a7d4bbf5..25c9b52448fa 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -48,7 +48,7 @@ */ ALT_SMP(test_for_ipi r0, r6, r5, lr) ALT_UP_B(9997f) - movne r0, sp + movne r1, sp adrne lr, BSYM(1b) bne do_IPI @@ -735,7 +735,7 @@ ENTRY(__switch_to) THUMB( stmia ip!, {r4 - sl, fp} ) @ Store most regs on stack THUMB( str sp, [ip], #4 ) THUMB( str lr, [ip], #4 ) -#ifdef CONFIG_MMU +#ifdef CONFIG_CPU_USE_DOMAINS ldr r6, [r2, #TI_CPU_DOMAIN] #endif set_tls r3, r4, r5 @@ -744,7 +744,7 @@ ENTRY(__switch_to) ldr r8, =__stack_chk_guard ldr r7, [r7, #TSK_STACK_CANARY] #endif -#ifdef CONFIG_MMU +#ifdef CONFIG_CPU_USE_DOMAINS mcr p15, 0, r6, c3, c0, 0 @ Set domain register #endif mov r5, r0 @@ -842,7 +842,7 @@ __kuser_helper_start: */ __kuser_memory_barrier: @ 0xffff0fa0 - smp_dmb + smp_dmb arm usr_ret lr .align 5 @@ -959,7 +959,7 @@ kuser_cmpxchg_fixup: #else - smp_dmb + smp_dmb arm 1: ldrex r3, [r2] subs r3, r3, r0 strexeq r3, r1, [r2] diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 8bfa98757cd2..aae802ee12f8 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -141,98 +141,170 @@ ENDPROC(ret_from_fork) #endif #endif -#ifdef CONFIG_DYNAMIC_FTRACE -ENTRY(__gnu_mcount_nc) - mov ip, lr - ldmia sp!, {lr} - mov pc, ip -ENDPROC(__gnu_mcount_nc) +.macro __mcount suffix + mcount_enter + ldr r0, =ftrace_trace_function + ldr r2, [r0] + adr r0, .Lftrace_stub + cmp r0, r2 + bne 1f + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + ldr r1, =ftrace_graph_return + ldr r2, [r1] + cmp r0, r2 + bne ftrace_graph_caller\suffix + + ldr r1, =ftrace_graph_entry + ldr r2, [r1] + ldr r0, =ftrace_graph_entry_stub + cmp r0, r2 + bne ftrace_graph_caller\suffix +#endif -ENTRY(ftrace_caller) - stmdb sp!, {r0-r3, lr} - mov r0, lr + mcount_exit + +1: mcount_get_lr r1 @ lr of instrumented func + mov r0, lr @ instrumented function + sub r0, r0, #MCOUNT_INSN_SIZE + adr lr, BSYM(2f) + mov pc, r2 +2: mcount_exit +.endm + +.macro __ftrace_caller suffix + mcount_enter + + mcount_get_lr r1 @ lr of instrumented func + mov r0, lr @ instrumented function sub r0, r0, #MCOUNT_INSN_SIZE - ldr r1, [sp, #20] - .global ftrace_call -ftrace_call: + .globl ftrace_call\suffix +ftrace_call\suffix: bl ftrace_stub - ldmia sp!, {r0-r3, ip, lr} - mov pc, ip -ENDPROC(ftrace_caller) + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + .globl ftrace_graph_call\suffix +ftrace_graph_call\suffix: + mov r0, r0 +#endif + + mcount_exit +.endm + +.macro __ftrace_graph_caller + sub r0, fp, #4 @ &lr of instrumented routine (&parent) +#ifdef CONFIG_DYNAMIC_FTRACE + @ called from __ftrace_caller, saved in mcount_enter + ldr r1, [sp, #16] @ instrumented routine (func) +#else + @ called from __mcount, untouched in lr + mov r1, lr @ instrumented routine (func) +#endif + sub r1, r1, #MCOUNT_INSN_SIZE + mov r2, fp @ frame pointer + bl prepare_ftrace_return + mcount_exit +.endm #ifdef CONFIG_OLD_MCOUNT +/* + * mcount + */ + +.macro mcount_enter + stmdb sp!, {r0-r3, lr} +.endm + +.macro mcount_get_lr reg + ldr \reg, [fp, #-4] +.endm + +.macro mcount_exit + ldr lr, [fp, #-4] + ldmia sp!, {r0-r3, pc} +.endm + ENTRY(mcount) +#ifdef CONFIG_DYNAMIC_FTRACE stmdb sp!, {lr} ldr lr, [fp, #-4] ldmia sp!, {pc} +#else + __mcount _old +#endif ENDPROC(mcount) +#ifdef CONFIG_DYNAMIC_FTRACE ENTRY(ftrace_caller_old) - stmdb sp!, {r0-r3, lr} - ldr r1, [fp, #-4] - mov r0, lr - sub r0, r0, #MCOUNT_INSN_SIZE - - .globl ftrace_call_old -ftrace_call_old: - bl ftrace_stub - ldr lr, [fp, #-4] @ restore lr - ldmia sp!, {r0-r3, pc} + __ftrace_caller _old ENDPROC(ftrace_caller_old) #endif -#else +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +ENTRY(ftrace_graph_caller_old) + __ftrace_graph_caller +ENDPROC(ftrace_graph_caller_old) +#endif -ENTRY(__gnu_mcount_nc) +.purgem mcount_enter +.purgem mcount_get_lr +.purgem mcount_exit +#endif + +/* + * __gnu_mcount_nc + */ + +.macro mcount_enter stmdb sp!, {r0-r3, lr} - ldr r0, =ftrace_trace_function - ldr r2, [r0] - adr r0, .Lftrace_stub - cmp r0, r2 - bne gnu_trace +.endm + +.macro mcount_get_lr reg + ldr \reg, [sp, #20] +.endm + +.macro mcount_exit ldmia sp!, {r0-r3, ip, lr} mov pc, ip +.endm -gnu_trace: - ldr r1, [sp, #20] @ lr of instrumented routine - mov r0, lr - sub r0, r0, #MCOUNT_INSN_SIZE - adr lr, BSYM(1f) - mov pc, r2 -1: - ldmia sp!, {r0-r3, ip, lr} +ENTRY(__gnu_mcount_nc) +#ifdef CONFIG_DYNAMIC_FTRACE + mov ip, lr + ldmia sp!, {lr} mov pc, ip +#else + __mcount +#endif ENDPROC(__gnu_mcount_nc) -#ifdef CONFIG_OLD_MCOUNT -/* - * This is under an ifdef in order to force link-time errors for people trying - * to build with !FRAME_POINTER with a GCC which doesn't use the new-style - * mcount. - */ -ENTRY(mcount) - stmdb sp!, {r0-r3, lr} - ldr r0, =ftrace_trace_function - ldr r2, [r0] - adr r0, ftrace_stub - cmp r0, r2 - bne trace - ldr lr, [fp, #-4] @ restore lr - ldmia sp!, {r0-r3, pc} +#ifdef CONFIG_DYNAMIC_FTRACE +ENTRY(ftrace_caller) + __ftrace_caller +ENDPROC(ftrace_caller) +#endif -trace: - ldr r1, [fp, #-4] @ lr of instrumented routine - mov r0, lr - sub r0, r0, #MCOUNT_INSN_SIZE - mov lr, pc - mov pc, r2 - ldr lr, [fp, #-4] @ restore lr - ldmia sp!, {r0-r3, pc} -ENDPROC(mcount) +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +ENTRY(ftrace_graph_caller) + __ftrace_graph_caller +ENDPROC(ftrace_graph_caller) #endif -#endif /* CONFIG_DYNAMIC_FTRACE */ +.purgem mcount_enter +.purgem mcount_get_lr +.purgem mcount_exit + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + .globl return_to_handler +return_to_handler: + stmdb sp!, {r0-r3} + mov r0, fp @ frame pointer + bl ftrace_return_to_handler + mov lr, r0 @ r0 has real ret addr + ldmia sp!, {r0-r3} + mov pc, lr +#endif ENTRY(ftrace_stub) .Lftrace_stub: diff --git a/arch/arm/kernel/fiq.c b/arch/arm/kernel/fiq.c index 6ff7919613d7..e72dc34eea1c 100644 --- a/arch/arm/kernel/fiq.c +++ b/arch/arm/kernel/fiq.c @@ -45,6 +45,7 @@ #include <asm/fiq.h> #include <asm/irq.h> #include <asm/system.h> +#include <asm/traps.h> static unsigned long no_fiq_insn; @@ -67,17 +68,22 @@ static struct fiq_handler default_owner = { static struct fiq_handler *current_fiq = &default_owner; -int show_fiq_list(struct seq_file *p, void *v) +int show_fiq_list(struct seq_file *p, int prec) { if (current_fiq != &default_owner) - seq_printf(p, "FIQ: %s\n", current_fiq->name); + seq_printf(p, "%*s: %s\n", prec, "FIQ", + current_fiq->name); return 0; } void set_fiq_handler(void *start, unsigned int length) { +#if defined(CONFIG_CPU_USE_DOMAINS) memcpy((void *)0xffff001c, start, length); +#else + memcpy(vectors_page + 0x1c, start, length); +#endif flush_icache_range(0xffff001c, 0xffff001c + length); if (!vectors_high()) flush_icache_range(0x1c, 0x1c + length); diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index 971ac8c36ea7..c0062ad1e847 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -24,6 +24,7 @@ #define NOP 0xe8bd4000 /* pop {lr} */ #endif +#ifdef CONFIG_DYNAMIC_FTRACE #ifdef CONFIG_OLD_MCOUNT #define OLD_MCOUNT_ADDR ((unsigned long) mcount) #define OLD_FTRACE_ADDR ((unsigned long) ftrace_caller_old) @@ -59,9 +60,9 @@ static unsigned long adjust_address(struct dyn_ftrace *rec, unsigned long addr) } #endif -/* construct a branch (BL) instruction to addr */ #ifdef CONFIG_THUMB2_KERNEL -static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) +static unsigned long ftrace_gen_branch(unsigned long pc, unsigned long addr, + bool link) { unsigned long s, j1, j2, i1, i2, imm10, imm11; unsigned long first, second; @@ -83,15 +84,22 @@ static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) j2 = (!i2) ^ s; first = 0xf000 | (s << 10) | imm10; - second = 0xd000 | (j1 << 13) | (j2 << 11) | imm11; + second = 0x9000 | (j1 << 13) | (j2 << 11) | imm11; + if (link) + second |= 1 << 14; return (second << 16) | first; } #else -static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) +static unsigned long ftrace_gen_branch(unsigned long pc, unsigned long addr, + bool link) { + unsigned long opcode = 0xea000000; long offset; + if (link) + opcode |= 1 << 24; + offset = (long)addr - (long)(pc + 8); if (unlikely(offset < -33554432 || offset > 33554428)) { /* Can't generate branches that far (from ARM ARM). Ftrace @@ -103,10 +111,15 @@ static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) offset = (offset >> 2) & 0x00ffffff; - return 0xeb000000 | offset; + return opcode | offset; } #endif +static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) +{ + return ftrace_gen_branch(pc, addr, true); +} + static int ftrace_modify_code(unsigned long pc, unsigned long old, unsigned long new) { @@ -193,3 +206,83 @@ int __init ftrace_dyn_arch_init(void *data) return 0; } +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, + unsigned long frame_pointer) +{ + unsigned long return_hooker = (unsigned long) &return_to_handler; + struct ftrace_graph_ent trace; + unsigned long old; + int err; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + + old = *parent; + *parent = return_hooker; + + err = ftrace_push_return_trace(old, self_addr, &trace.depth, + frame_pointer); + if (err == -EBUSY) { + *parent = old; + return; + } + + trace.func = self_addr; + + /* Only trace if the calling function expects to */ + if (!ftrace_graph_entry(&trace)) { + current->curr_ret_stack--; + *parent = old; + } +} + +#ifdef CONFIG_DYNAMIC_FTRACE +extern unsigned long ftrace_graph_call; +extern unsigned long ftrace_graph_call_old; +extern void ftrace_graph_caller_old(void); + +static int __ftrace_modify_caller(unsigned long *callsite, + void (*func) (void), bool enable) +{ + unsigned long caller_fn = (unsigned long) func; + unsigned long pc = (unsigned long) callsite; + unsigned long branch = ftrace_gen_branch(pc, caller_fn, false); + unsigned long nop = 0xe1a00000; /* mov r0, r0 */ + unsigned long old = enable ? nop : branch; + unsigned long new = enable ? branch : nop; + + return ftrace_modify_code(pc, old, new); +} + +static int ftrace_modify_graph_caller(bool enable) +{ + int ret; + + ret = __ftrace_modify_caller(&ftrace_graph_call, + ftrace_graph_caller, + enable); + +#ifdef CONFIG_OLD_MCOUNT + if (!ret) + ret = __ftrace_modify_caller(&ftrace_graph_call_old, + ftrace_graph_caller_old, + enable); +#endif + + return ret; +} + +int ftrace_enable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(true); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(false); +} +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 6bd82d25683c..f17d9a09e8fb 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -91,6 +91,11 @@ ENTRY(stext) movs r8, r5 @ invalid machine (r5=0)? THUMB( it eq ) @ force fixup-able long branch encoding beq __error_a @ yes, error 'a' + + /* + * r1 = machine no, r2 = atags, + * r8 = machinfo, r9 = cpuid, r10 = procinfo + */ bl __vet_atags #ifdef CONFIG_SMP_ON_UP bl __fixup_smp @@ -387,19 +392,19 @@ ENDPROC(__turn_mmu_on) #ifdef CONFIG_SMP_ON_UP __fixup_smp: - mov r7, #0x00070000 - orr r6, r7, #0xff000000 @ mask 0xff070000 - orr r7, r7, #0x41000000 @ val 0x41070000 - and r0, r9, r6 - teq r0, r7 @ ARM CPU and ARMv6/v7? + mov r4, #0x00070000 + orr r3, r4, #0xff000000 @ mask 0xff070000 + orr r4, r4, #0x41000000 @ val 0x41070000 + and r0, r9, r3 + teq r0, r4 @ ARM CPU and ARMv6/v7? bne __fixup_smp_on_up @ no, assume UP - orr r6, r6, #0x0000ff00 - orr r6, r6, #0x000000f0 @ mask 0xff07fff0 - orr r7, r7, #0x0000b000 - orr r7, r7, #0x00000020 @ val 0x4107b020 - and r0, r9, r6 - teq r0, r7 @ ARM 11MPCore? + orr r3, r3, #0x0000ff00 + orr r3, r3, #0x000000f0 @ mask 0xff07fff0 + orr r4, r4, #0x0000b000 + orr r4, r4, #0x00000020 @ val 0x4107b020 + and r0, r9, r3 + teq r0, r4 @ ARM 11MPCore? moveq pc, lr @ yes, assume SMP mrc p15, 0, r0, c0, c0, 5 @ read MPIDR @@ -408,15 +413,22 @@ __fixup_smp: __fixup_smp_on_up: adr r0, 1f - ldmia r0, {r3, r6, r7} + ldmia r0, {r3 - r5} sub r3, r0, r3 - add r6, r6, r3 - add r7, r7, r3 -2: cmp r6, r7 - ldmia r6!, {r0, r4} - strlo r4, [r0, r3] - blo 2b - mov pc, lr + add r4, r4, r3 + add r5, r5, r3 +2: cmp r4, r5 + movhs pc, lr + ldmia r4!, {r0, r6} + ARM( str r6, [r0, r3] ) + THUMB( add r0, r0, r3 ) +#ifdef __ARMEB__ + THUMB( mov r6, r6, ror #16 ) @ Convert word order for big-endian. +#endif + THUMB( strh r6, [r0], #2 ) @ For Thumb-2, store as two halfwords + THUMB( mov r6, r6, lsr #16 ) @ to be robust against misaligned r3. + THUMB( strh r6, [r0] ) + b 2b ENDPROC(__fixup_smp) .align diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index 36ad3be4692a..d57476792120 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -35,6 +35,7 @@ #include <linux/list.h> #include <linux/kallsyms.h> #include <linux/proc_fs.h> +#include <linux/ftrace.h> #include <asm/system.h> #include <asm/mach/irq.h> @@ -57,11 +58,20 @@ int show_interrupts(struct seq_file *p, void *v) struct irq_desc *desc; struct irqaction * action; unsigned long flags; + int prec, n; + + for (prec = 3, n = 1000; prec < 10 && n <= nr_irqs; prec++) + n *= 10; + +#ifdef CONFIG_SMP + if (prec < 4) + prec = 4; +#endif if (i == 0) { char cpuname[12]; - seq_printf(p, " "); + seq_printf(p, "%*s ", prec, ""); for_each_present_cpu(cpu) { sprintf(cpuname, "CPU%d", cpu); seq_printf(p, " %10s", cpuname); @@ -76,7 +86,7 @@ int show_interrupts(struct seq_file *p, void *v) if (!action) goto unlock; - seq_printf(p, "%3d: ", i); + seq_printf(p, "%*d: ", prec, i); for_each_present_cpu(cpu) seq_printf(p, "%10u ", kstat_irqs_cpu(i, cpu)); seq_printf(p, " %10s", desc->chip->name ? : "-"); @@ -89,13 +99,15 @@ unlock: raw_spin_unlock_irqrestore(&desc->lock, flags); } else if (i == nr_irqs) { #ifdef CONFIG_FIQ - show_fiq_list(p, v); + show_fiq_list(p, prec); #endif #ifdef CONFIG_SMP - show_ipi_list(p); - show_local_irqs(p); + show_ipi_list(p, prec); +#endif +#ifdef CONFIG_LOCAL_TIMERS + show_local_irqs(p, prec); #endif - seq_printf(p, "Err: %10lu\n", irq_err_count); + seq_printf(p, "%*s: %10lu\n", prec, "Err", irq_err_count); } return 0; } @@ -105,7 +117,8 @@ unlock: * come via this function. Instead, they should provide their * own 'handler' */ -asmlinkage void __exception asm_do_IRQ(unsigned int irq, struct pt_regs *regs) +asmlinkage void __exception_irq_entry +asm_do_IRQ(unsigned int irq, struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c index 3a8fd5140d7a..30ead135ff5f 100644 --- a/arch/arm/kernel/machine_kexec.c +++ b/arch/arm/kernel/machine_kexec.c @@ -23,6 +23,8 @@ extern unsigned long kexec_indirection_page; extern unsigned long kexec_mach_type; extern unsigned long kexec_boot_atags; +static atomic_t waiting_for_crash_ipi; + /* * Provide a dummy crash_notes definition while crash dump arrives to arm. * This prevents breakage of crash_notes attribute in kernel/ksysfs.c. @@ -37,9 +39,37 @@ void machine_kexec_cleanup(struct kimage *image) { } +void machine_crash_nonpanic_core(void *unused) +{ + struct pt_regs regs; + + crash_setup_regs(®s, NULL); + printk(KERN_DEBUG "CPU %u will stop doing anything useful since another CPU has crashed\n", + smp_processor_id()); + crash_save_cpu(®s, smp_processor_id()); + flush_cache_all(); + + atomic_dec(&waiting_for_crash_ipi); + while (1) + cpu_relax(); +} + void machine_crash_shutdown(struct pt_regs *regs) { + unsigned long msecs; + local_irq_disable(); + + atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); + smp_call_function(machine_crash_nonpanic_core, NULL, false); + msecs = 1000; /* Wait at most a second for the other cpus to stop */ + while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) { + mdelay(1); + msecs--; + } + if (atomic_read(&waiting_for_crash_ipi) > 0) + printk(KERN_WARNING "Non-crashing CPUs did not react to IPI\n"); + crash_save_cpu(regs, smp_processor_id()); printk(KERN_INFO "Loading crashdump kernel...\n"); diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index d9bd786ce23d..0c1bb68ff4a8 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -67,35 +67,6 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, char *secstrings, struct module *mod) { -#ifdef CONFIG_ARM_UNWIND - Elf_Shdr *s, *sechdrs_end = sechdrs + hdr->e_shnum; - struct arm_unwind_mapping *maps = mod->arch.map; - - for (s = sechdrs; s < sechdrs_end; s++) { - char const *secname = secstrings + s->sh_name; - - if (strcmp(".ARM.exidx.init.text", secname) == 0) - maps[ARM_SEC_INIT].unw_sec = s; - else if (strcmp(".ARM.exidx.devinit.text", secname) == 0) - maps[ARM_SEC_DEVINIT].unw_sec = s; - else if (strcmp(".ARM.exidx", secname) == 0) - maps[ARM_SEC_CORE].unw_sec = s; - else if (strcmp(".ARM.exidx.exit.text", secname) == 0) - maps[ARM_SEC_EXIT].unw_sec = s; - else if (strcmp(".ARM.exidx.devexit.text", secname) == 0) - maps[ARM_SEC_DEVEXIT].unw_sec = s; - else if (strcmp(".init.text", secname) == 0) - maps[ARM_SEC_INIT].sec_text = s; - else if (strcmp(".devinit.text", secname) == 0) - maps[ARM_SEC_DEVINIT].sec_text = s; - else if (strcmp(".text", secname) == 0) - maps[ARM_SEC_CORE].sec_text = s; - else if (strcmp(".exit.text", secname) == 0) - maps[ARM_SEC_EXIT].sec_text = s; - else if (strcmp(".devexit.text", secname) == 0) - maps[ARM_SEC_DEVEXIT].sec_text = s; - } -#endif return 0; } @@ -300,41 +271,69 @@ apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab, return -ENOEXEC; } -#ifdef CONFIG_ARM_UNWIND -static void register_unwind_tables(struct module *mod) +struct mod_unwind_map { + const Elf_Shdr *unw_sec; + const Elf_Shdr *txt_sec; +}; + +int module_finalize(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs, + struct module *mod) { +#ifdef CONFIG_ARM_UNWIND + const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + const Elf_Shdr *s, *sechdrs_end = sechdrs + hdr->e_shnum; + struct mod_unwind_map maps[ARM_SEC_MAX]; int i; - for (i = 0; i < ARM_SEC_MAX; ++i) { - struct arm_unwind_mapping *map = &mod->arch.map[i]; - if (map->unw_sec && map->sec_text) - map->unwind = unwind_table_add(map->unw_sec->sh_addr, - map->unw_sec->sh_size, - map->sec_text->sh_addr, - map->sec_text->sh_size); + + memset(maps, 0, sizeof(maps)); + + for (s = sechdrs; s < sechdrs_end; s++) { + const char *secname = secstrs + s->sh_name; + + if (!(s->sh_flags & SHF_ALLOC)) + continue; + + if (strcmp(".ARM.exidx.init.text", secname) == 0) + maps[ARM_SEC_INIT].unw_sec = s; + else if (strcmp(".ARM.exidx.devinit.text", secname) == 0) + maps[ARM_SEC_DEVINIT].unw_sec = s; + else if (strcmp(".ARM.exidx", secname) == 0) + maps[ARM_SEC_CORE].unw_sec = s; + else if (strcmp(".ARM.exidx.exit.text", secname) == 0) + maps[ARM_SEC_EXIT].unw_sec = s; + else if (strcmp(".ARM.exidx.devexit.text", secname) == 0) + maps[ARM_SEC_DEVEXIT].unw_sec = s; + else if (strcmp(".init.text", secname) == 0) + maps[ARM_SEC_INIT].txt_sec = s; + else if (strcmp(".devinit.text", secname) == 0) + maps[ARM_SEC_DEVINIT].txt_sec = s; + else if (strcmp(".text", secname) == 0) + maps[ARM_SEC_CORE].txt_sec = s; + else if (strcmp(".exit.text", secname) == 0) + maps[ARM_SEC_EXIT].txt_sec = s; + else if (strcmp(".devexit.text", secname) == 0) + maps[ARM_SEC_DEVEXIT].txt_sec = s; } -} -static void unregister_unwind_tables(struct module *mod) -{ - int i = ARM_SEC_MAX; - while (--i >= 0) - unwind_table_del(mod->arch.map[i].unwind); -} -#else -static inline void register_unwind_tables(struct module *mod) { } -static inline void unregister_unwind_tables(struct module *mod) { } + for (i = 0; i < ARM_SEC_MAX; i++) + if (maps[i].unw_sec && maps[i].txt_sec) + mod->arch.unwind[i] = + unwind_table_add(maps[i].unw_sec->sh_addr, + maps[i].unw_sec->sh_size, + maps[i].txt_sec->sh_addr, + maps[i].txt_sec->sh_size); #endif - -int -module_finalize(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs, - struct module *module) -{ - register_unwind_tables(module); return 0; } void module_arch_cleanup(struct module *mod) { - unregister_unwind_tables(mod); +#ifdef CONFIG_ARM_UNWIND + int i; + + for (i = 0; i < ARM_SEC_MAX; i++) + if (mod->arch.unwind[i]) + unwind_table_del(mod->arch.unwind[i]); +#endif } diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 07a50357492a..624e2a5de2b3 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -4,9 +4,7 @@ * ARM performance counter support. * * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles - * - * ARMv7 support: Jean Pihet <jpihet@mvista.com> - * 2010 (c) MontaVista Software, LLC. + * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com> * * This code is based on the sparc64 perf event code, which is in turn based * on the x86 code. Callchain code is based on the ARM OProfile backtrace @@ -34,7 +32,7 @@ static struct platform_device *pmu_device; * Hardware lock to serialize accesses to PMU registers. Needed for the * read/modify/write sequences. */ -DEFINE_SPINLOCK(pmu_lock); +static DEFINE_RAW_SPINLOCK(pmu_lock); /* * ARMv6 supports a maximum of 3 events, starting from index 1. If we add @@ -67,31 +65,25 @@ struct cpu_hw_events { */ unsigned long active_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)]; }; -DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); - -/* PMU names. */ -static const char *arm_pmu_names[] = { - [ARM_PERF_PMU_ID_XSCALE1] = "xscale1", - [ARM_PERF_PMU_ID_XSCALE2] = "xscale2", - [ARM_PERF_PMU_ID_V6] = "v6", - [ARM_PERF_PMU_ID_V6MP] = "v6mpcore", - [ARM_PERF_PMU_ID_CA8] = "ARMv7 Cortex-A8", - [ARM_PERF_PMU_ID_CA9] = "ARMv7 Cortex-A9", -}; +static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); struct arm_pmu { enum arm_perf_pmu_ids id; + const char *name; irqreturn_t (*handle_irq)(int irq_num, void *dev); void (*enable)(struct hw_perf_event *evt, int idx); void (*disable)(struct hw_perf_event *evt, int idx); - int (*event_map)(int evt); - u64 (*raw_event)(u64); int (*get_event_idx)(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc); u32 (*read_counter)(int idx); void (*write_counter)(int idx, u32 val); void (*start)(void); void (*stop)(void); + const unsigned (*cache_map)[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; + const unsigned (*event_map)[PERF_COUNT_HW_MAX]; + u32 raw_event_mask; int num_events; u64 max_period; }; @@ -136,10 +128,6 @@ EXPORT_SYMBOL_GPL(perf_num_counters); #define CACHE_OP_UNSUPPORTED 0xFFFF -static unsigned armpmu_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX]; - static int armpmu_map_cache_event(u64 config) { @@ -157,7 +145,7 @@ armpmu_map_cache_event(u64 config) if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) return -EINVAL; - ret = (int)armpmu_perf_cache_map[cache_type][cache_op][cache_result]; + ret = (int)(*armpmu->cache_map)[cache_type][cache_op][cache_result]; if (ret == CACHE_OP_UNSUPPORTED) return -ENOENT; @@ -166,6 +154,19 @@ armpmu_map_cache_event(u64 config) } static int +armpmu_map_event(u64 config) +{ + int mapping = (*armpmu->event_map)[config]; + return mapping == HW_OP_UNSUPPORTED ? -EOPNOTSUPP : mapping; +} + +static int +armpmu_map_raw_event(u64 config) +{ + return (int)(config & armpmu->raw_event_mask); +} + +static int armpmu_event_set_period(struct perf_event *event, struct hw_perf_event *hwc, int idx) @@ -458,11 +459,11 @@ __hw_perf_event_init(struct perf_event *event) /* Decode the generic type into an ARM event identifier. */ if (PERF_TYPE_HARDWARE == event->attr.type) { - mapping = armpmu->event_map(event->attr.config); + mapping = armpmu_map_event(event->attr.config); } else if (PERF_TYPE_HW_CACHE == event->attr.type) { mapping = armpmu_map_cache_event(event->attr.config); } else if (PERF_TYPE_RAW == event->attr.type) { - mapping = armpmu->raw_event(event->attr.config); + mapping = armpmu_map_raw_event(event->attr.config); } else { pr_debug("event type %x not supported\n", event->attr.type); return -EOPNOTSUPP; @@ -603,2366 +604,10 @@ static struct pmu pmu = { .read = armpmu_read, }; -/* - * ARMv6 Performance counter handling code. - * - * ARMv6 has 2 configurable performance counters and a single cycle counter. - * They all share a single reset bit but can be written to zero so we can use - * that for a reset. - * - * The counters can't be individually enabled or disabled so when we remove - * one event and replace it with another we could get spurious counts from the - * wrong event. However, we can take advantage of the fact that the - * performance counters can export events to the event bus, and the event bus - * itself can be monitored. This requires that we *don't* export the events to - * the event bus. The procedure for disabling a configurable counter is: - * - change the counter to count the ETMEXTOUT[0] signal (0x20). This - * effectively stops the counter from counting. - * - disable the counter's interrupt generation (each counter has it's - * own interrupt enable bit). - * Once stopped, the counter value can be written as 0 to reset. - * - * To enable a counter: - * - enable the counter's interrupt generation. - * - set the new event type. - * - * Note: the dedicated cycle counter only counts cycles and can't be - * enabled/disabled independently of the others. When we want to disable the - * cycle counter, we have to just disable the interrupt reporting and start - * ignoring that counter. When re-enabling, we have to reset the value and - * enable the interrupt. - */ - -enum armv6_perf_types { - ARMV6_PERFCTR_ICACHE_MISS = 0x0, - ARMV6_PERFCTR_IBUF_STALL = 0x1, - ARMV6_PERFCTR_DDEP_STALL = 0x2, - ARMV6_PERFCTR_ITLB_MISS = 0x3, - ARMV6_PERFCTR_DTLB_MISS = 0x4, - ARMV6_PERFCTR_BR_EXEC = 0x5, - ARMV6_PERFCTR_BR_MISPREDICT = 0x6, - ARMV6_PERFCTR_INSTR_EXEC = 0x7, - ARMV6_PERFCTR_DCACHE_HIT = 0x9, - ARMV6_PERFCTR_DCACHE_ACCESS = 0xA, - ARMV6_PERFCTR_DCACHE_MISS = 0xB, - ARMV6_PERFCTR_DCACHE_WBACK = 0xC, - ARMV6_PERFCTR_SW_PC_CHANGE = 0xD, - ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF, - ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10, - ARMV6_PERFCTR_LSU_FULL_STALL = 0x11, - ARMV6_PERFCTR_WBUF_DRAINED = 0x12, - ARMV6_PERFCTR_CPU_CYCLES = 0xFF, - ARMV6_PERFCTR_NOP = 0x20, -}; - -enum armv6_counters { - ARMV6_CYCLE_COUNTER = 1, - ARMV6_COUNTER0, - ARMV6_COUNTER1, -}; - -/* - * The hardware events that we support. We do support cache operations but - * we have harvard caches and no way to combine instruction and data - * accesses/misses in hardware. - */ -static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC, - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT, - [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, -}; - -static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - /* - * The performance counters don't differentiate between read - * and write accesses/misses so this isn't strictly correct, - * but it's the best we can do. Writes and reads get - * combined. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - /* - * The ARM performance counters can count micro DTLB misses, - * micro ITLB misses and main TLB misses. There isn't an event - * for TLB misses, so use the micro misses here and if users - * want the main TLB misses they can use a raw counter. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -enum armv6mpcore_perf_types { - ARMV6MPCORE_PERFCTR_ICACHE_MISS = 0x0, - ARMV6MPCORE_PERFCTR_IBUF_STALL = 0x1, - ARMV6MPCORE_PERFCTR_DDEP_STALL = 0x2, - ARMV6MPCORE_PERFCTR_ITLB_MISS = 0x3, - ARMV6MPCORE_PERFCTR_DTLB_MISS = 0x4, - ARMV6MPCORE_PERFCTR_BR_EXEC = 0x5, - ARMV6MPCORE_PERFCTR_BR_NOTPREDICT = 0x6, - ARMV6MPCORE_PERFCTR_BR_MISPREDICT = 0x7, - ARMV6MPCORE_PERFCTR_INSTR_EXEC = 0x8, - ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA, - ARMV6MPCORE_PERFCTR_DCACHE_RDMISS = 0xB, - ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC, - ARMV6MPCORE_PERFCTR_DCACHE_WRMISS = 0xD, - ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE, - ARMV6MPCORE_PERFCTR_SW_PC_CHANGE = 0xF, - ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS = 0x10, - ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11, - ARMV6MPCORE_PERFCTR_LSU_FULL_STALL = 0x12, - ARMV6MPCORE_PERFCTR_WBUF_DRAINED = 0x13, - ARMV6MPCORE_PERFCTR_CPU_CYCLES = 0xFF, -}; - -/* - * The hardware events that we support. We do support cache operations but - * we have harvard caches and no way to combine instruction and data - * accesses/misses in hardware. - */ -static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC, - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT, - [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, -}; - -static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = - ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS, - [C(RESULT_MISS)] = - ARMV6MPCORE_PERFCTR_DCACHE_RDMISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = - ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS, - [C(RESULT_MISS)] = - ARMV6MPCORE_PERFCTR_DCACHE_WRMISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - /* - * The ARM performance counters can count micro DTLB misses, - * micro ITLB misses and main TLB misses. There isn't an event - * for TLB misses, so use the micro misses here and if users - * want the main TLB misses they can use a raw counter. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -static inline unsigned long -armv6_pmcr_read(void) -{ - u32 val; - asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val)); - return val; -} - -static inline void -armv6_pmcr_write(unsigned long val) -{ - asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val)); -} - -#define ARMV6_PMCR_ENABLE (1 << 0) -#define ARMV6_PMCR_CTR01_RESET (1 << 1) -#define ARMV6_PMCR_CCOUNT_RESET (1 << 2) -#define ARMV6_PMCR_CCOUNT_DIV (1 << 3) -#define ARMV6_PMCR_COUNT0_IEN (1 << 4) -#define ARMV6_PMCR_COUNT1_IEN (1 << 5) -#define ARMV6_PMCR_CCOUNT_IEN (1 << 6) -#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8) -#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9) -#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10) -#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20 -#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT) -#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12 -#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT) - -#define ARMV6_PMCR_OVERFLOWED_MASK \ - (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \ - ARMV6_PMCR_CCOUNT_OVERFLOW) - -static inline int -armv6_pmcr_has_overflowed(unsigned long pmcr) -{ - return (pmcr & ARMV6_PMCR_OVERFLOWED_MASK); -} - -static inline int -armv6_pmcr_counter_has_overflowed(unsigned long pmcr, - enum armv6_counters counter) -{ - int ret = 0; - - if (ARMV6_CYCLE_COUNTER == counter) - ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW; - else if (ARMV6_COUNTER0 == counter) - ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW; - else if (ARMV6_COUNTER1 == counter) - ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW; - else - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - - return ret; -} - -static inline u32 -armv6pmu_read_counter(int counter) -{ - unsigned long value = 0; - - if (ARMV6_CYCLE_COUNTER == counter) - asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value)); - else if (ARMV6_COUNTER0 == counter) - asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value)); - else if (ARMV6_COUNTER1 == counter) - asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value)); - else - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - - return value; -} - -static inline void -armv6pmu_write_counter(int counter, - u32 value) -{ - if (ARMV6_CYCLE_COUNTER == counter) - asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value)); - else if (ARMV6_COUNTER0 == counter) - asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value)); - else if (ARMV6_COUNTER1 == counter) - asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value)); - else - WARN_ONCE(1, "invalid counter number (%d)\n", counter); -} - -void -armv6pmu_enable_event(struct hw_perf_event *hwc, - int idx) -{ - unsigned long val, mask, evt, flags; - - if (ARMV6_CYCLE_COUNTER == idx) { - mask = 0; - evt = ARMV6_PMCR_CCOUNT_IEN; - } else if (ARMV6_COUNTER0 == idx) { - mask = ARMV6_PMCR_EVT_COUNT0_MASK; - evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) | - ARMV6_PMCR_COUNT0_IEN; - } else if (ARMV6_COUNTER1 == idx) { - mask = ARMV6_PMCR_EVT_COUNT1_MASK; - evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) | - ARMV6_PMCR_COUNT1_IEN; - } else { - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - /* - * Mask out the current event and set the counter to count the event - * that we're interested in. - */ - spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val &= ~mask; - val |= evt; - armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static irqreturn_t -armv6pmu_handle_irq(int irq_num, - void *dev) -{ - unsigned long pmcr = armv6_pmcr_read(); - struct perf_sample_data data; - struct cpu_hw_events *cpuc; - struct pt_regs *regs; - int idx; - - if (!armv6_pmcr_has_overflowed(pmcr)) - return IRQ_NONE; - - regs = get_irq_regs(); - - /* - * The interrupts are cleared by writing the overflow flags back to - * the control register. All of the other bits don't have any effect - * if they are rewritten, so write the whole value back. - */ - armv6_pmcr_write(pmcr); - - perf_sample_data_init(&data, 0); - - cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx <= armpmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - - /* - * We have a single interrupt for all counters. Check that - * each counter has overflowed before we process it. - */ - if (!armv6_pmcr_counter_has_overflowed(pmcr, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event, hwc, idx); - data.period = event->hw.last_period; - if (!armpmu_event_set_period(event, hwc, idx)) - continue; - - if (perf_event_overflow(event, 0, &data, regs)) - armpmu->disable(hwc, idx); - } - - /* - * Handle the pending perf events. - * - * Note: this call *must* be run with interrupts disabled. For - * platforms that can have the PMU interrupts raised as an NMI, this - * will not work. - */ - irq_work_run(); - - return IRQ_HANDLED; -} - -static void -armv6pmu_start(void) -{ - unsigned long flags, val; - - spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val |= ARMV6_PMCR_ENABLE; - armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -void -armv6pmu_stop(void) -{ - unsigned long flags, val; - - spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val &= ~ARMV6_PMCR_ENABLE; - armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static inline int -armv6pmu_event_map(int config) -{ - int mapping = armv6_perf_map[config]; - if (HW_OP_UNSUPPORTED == mapping) - mapping = -EOPNOTSUPP; - return mapping; -} - -static inline int -armv6mpcore_pmu_event_map(int config) -{ - int mapping = armv6mpcore_perf_map[config]; - if (HW_OP_UNSUPPORTED == mapping) - mapping = -EOPNOTSUPP; - return mapping; -} - -static u64 -armv6pmu_raw_event(u64 config) -{ - return config & 0xff; -} - -static int -armv6pmu_get_event_idx(struct cpu_hw_events *cpuc, - struct hw_perf_event *event) -{ - /* Always place a cycle counter into the cycle counter. */ - if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) { - if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask)) - return -EAGAIN; - - return ARMV6_CYCLE_COUNTER; - } else { - /* - * For anything other than a cycle counter, try and use - * counter0 and counter1. - */ - if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) { - return ARMV6_COUNTER1; - } - - if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) { - return ARMV6_COUNTER0; - } - - /* The counters are all in use. */ - return -EAGAIN; - } -} - -static void -armv6pmu_disable_event(struct hw_perf_event *hwc, - int idx) -{ - unsigned long val, mask, evt, flags; - - if (ARMV6_CYCLE_COUNTER == idx) { - mask = ARMV6_PMCR_CCOUNT_IEN; - evt = 0; - } else if (ARMV6_COUNTER0 == idx) { - mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK; - evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT; - } else if (ARMV6_COUNTER1 == idx) { - mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK; - evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT; - } else { - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - /* - * Mask out the current event and set the counter to count the number - * of ETM bus signal assertion cycles. The external reporting should - * be disabled and so this should never increment. - */ - spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val &= ~mask; - val |= evt; - armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc, - int idx) -{ - unsigned long val, mask, flags, evt = 0; - - if (ARMV6_CYCLE_COUNTER == idx) { - mask = ARMV6_PMCR_CCOUNT_IEN; - } else if (ARMV6_COUNTER0 == idx) { - mask = ARMV6_PMCR_COUNT0_IEN; - } else if (ARMV6_COUNTER1 == idx) { - mask = ARMV6_PMCR_COUNT1_IEN; - } else { - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - /* - * Unlike UP ARMv6, we don't have a way of stopping the counters. We - * simply disable the interrupt reporting. - */ - spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val &= ~mask; - val |= evt; - armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static const struct arm_pmu armv6pmu = { - .id = ARM_PERF_PMU_ID_V6, - .handle_irq = armv6pmu_handle_irq, - .enable = armv6pmu_enable_event, - .disable = armv6pmu_disable_event, - .event_map = armv6pmu_event_map, - .raw_event = armv6pmu_raw_event, - .read_counter = armv6pmu_read_counter, - .write_counter = armv6pmu_write_counter, - .get_event_idx = armv6pmu_get_event_idx, - .start = armv6pmu_start, - .stop = armv6pmu_stop, - .num_events = 3, - .max_period = (1LLU << 32) - 1, -}; - -/* - * ARMv6mpcore is almost identical to single core ARMv6 with the exception - * that some of the events have different enumerations and that there is no - * *hack* to stop the programmable counters. To stop the counters we simply - * disable the interrupt reporting and update the event. When unthrottling we - * reset the period and enable the interrupt reporting. - */ -static const struct arm_pmu armv6mpcore_pmu = { - .id = ARM_PERF_PMU_ID_V6MP, - .handle_irq = armv6pmu_handle_irq, - .enable = armv6pmu_enable_event, - .disable = armv6mpcore_pmu_disable_event, - .event_map = armv6mpcore_pmu_event_map, - .raw_event = armv6pmu_raw_event, - .read_counter = armv6pmu_read_counter, - .write_counter = armv6pmu_write_counter, - .get_event_idx = armv6pmu_get_event_idx, - .start = armv6pmu_start, - .stop = armv6pmu_stop, - .num_events = 3, - .max_period = (1LLU << 32) - 1, -}; - -/* - * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code. - * - * Copied from ARMv6 code, with the low level code inspired - * by the ARMv7 Oprofile code. - * - * Cortex-A8 has up to 4 configurable performance counters and - * a single cycle counter. - * Cortex-A9 has up to 31 configurable performance counters and - * a single cycle counter. - * - * All counters can be enabled/disabled and IRQ masked separately. The cycle - * counter and all 4 performance counters together can be reset separately. - */ - -/* Common ARMv7 event types */ -enum armv7_perf_types { - ARMV7_PERFCTR_PMNC_SW_INCR = 0x00, - ARMV7_PERFCTR_IFETCH_MISS = 0x01, - ARMV7_PERFCTR_ITLB_MISS = 0x02, - ARMV7_PERFCTR_DCACHE_REFILL = 0x03, - ARMV7_PERFCTR_DCACHE_ACCESS = 0x04, - ARMV7_PERFCTR_DTLB_REFILL = 0x05, - ARMV7_PERFCTR_DREAD = 0x06, - ARMV7_PERFCTR_DWRITE = 0x07, - - ARMV7_PERFCTR_EXC_TAKEN = 0x09, - ARMV7_PERFCTR_EXC_EXECUTED = 0x0A, - ARMV7_PERFCTR_CID_WRITE = 0x0B, - /* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS. - * It counts: - * - all branch instructions, - * - instructions that explicitly write the PC, - * - exception generating instructions. - */ - ARMV7_PERFCTR_PC_WRITE = 0x0C, - ARMV7_PERFCTR_PC_IMM_BRANCH = 0x0D, - ARMV7_PERFCTR_UNALIGNED_ACCESS = 0x0F, - ARMV7_PERFCTR_PC_BRANCH_MIS_PRED = 0x10, - ARMV7_PERFCTR_CLOCK_CYCLES = 0x11, - - ARMV7_PERFCTR_PC_BRANCH_MIS_USED = 0x12, - - ARMV7_PERFCTR_CPU_CYCLES = 0xFF -}; - -/* ARMv7 Cortex-A8 specific event types */ -enum armv7_a8_perf_types { - ARMV7_PERFCTR_INSTR_EXECUTED = 0x08, - - ARMV7_PERFCTR_PC_PROC_RETURN = 0x0E, - - ARMV7_PERFCTR_WRITE_BUFFER_FULL = 0x40, - ARMV7_PERFCTR_L2_STORE_MERGED = 0x41, - ARMV7_PERFCTR_L2_STORE_BUFF = 0x42, - ARMV7_PERFCTR_L2_ACCESS = 0x43, - ARMV7_PERFCTR_L2_CACH_MISS = 0x44, - ARMV7_PERFCTR_AXI_READ_CYCLES = 0x45, - ARMV7_PERFCTR_AXI_WRITE_CYCLES = 0x46, - ARMV7_PERFCTR_MEMORY_REPLAY = 0x47, - ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY = 0x48, - ARMV7_PERFCTR_L1_DATA_MISS = 0x49, - ARMV7_PERFCTR_L1_INST_MISS = 0x4A, - ARMV7_PERFCTR_L1_DATA_COLORING = 0x4B, - ARMV7_PERFCTR_L1_NEON_DATA = 0x4C, - ARMV7_PERFCTR_L1_NEON_CACH_DATA = 0x4D, - ARMV7_PERFCTR_L2_NEON = 0x4E, - ARMV7_PERFCTR_L2_NEON_HIT = 0x4F, - ARMV7_PERFCTR_L1_INST = 0x50, - ARMV7_PERFCTR_PC_RETURN_MIS_PRED = 0x51, - ARMV7_PERFCTR_PC_BRANCH_FAILED = 0x52, - ARMV7_PERFCTR_PC_BRANCH_TAKEN = 0x53, - ARMV7_PERFCTR_PC_BRANCH_EXECUTED = 0x54, - ARMV7_PERFCTR_OP_EXECUTED = 0x55, - ARMV7_PERFCTR_CYCLES_INST_STALL = 0x56, - ARMV7_PERFCTR_CYCLES_INST = 0x57, - ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL = 0x58, - ARMV7_PERFCTR_CYCLES_NEON_INST_STALL = 0x59, - ARMV7_PERFCTR_NEON_CYCLES = 0x5A, - - ARMV7_PERFCTR_PMU0_EVENTS = 0x70, - ARMV7_PERFCTR_PMU1_EVENTS = 0x71, - ARMV7_PERFCTR_PMU_EVENTS = 0x72, -}; - -/* ARMv7 Cortex-A9 specific event types */ -enum armv7_a9_perf_types { - ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC = 0x40, - ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC = 0x41, - ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC = 0x42, - - ARMV7_PERFCTR_COHERENT_LINE_MISS = 0x50, - ARMV7_PERFCTR_COHERENT_LINE_HIT = 0x51, - - ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES = 0x60, - ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES = 0x61, - ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES = 0x62, - ARMV7_PERFCTR_STREX_EXECUTED_PASSED = 0x63, - ARMV7_PERFCTR_STREX_EXECUTED_FAILED = 0x64, - ARMV7_PERFCTR_DATA_EVICTION = 0x65, - ARMV7_PERFCTR_ISSUE_STAGE_NO_INST = 0x66, - ARMV7_PERFCTR_ISSUE_STAGE_EMPTY = 0x67, - ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE = 0x68, - - ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS = 0x6E, - - ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST = 0x70, - ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST = 0x71, - ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST = 0x72, - ARMV7_PERFCTR_FP_EXECUTED_INST = 0x73, - ARMV7_PERFCTR_NEON_EXECUTED_INST = 0x74, - - ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES = 0x80, - ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES = 0x81, - ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES = 0x82, - ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES = 0x83, - ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES = 0x84, - ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES = 0x85, - ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES = 0x86, - - ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES = 0x8A, - ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES = 0x8B, - - ARMV7_PERFCTR_ISB_INST = 0x90, - ARMV7_PERFCTR_DSB_INST = 0x91, - ARMV7_PERFCTR_DMB_INST = 0x92, - ARMV7_PERFCTR_EXT_INTERRUPTS = 0x93, - - ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED = 0xA0, - ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED = 0xA1, - ARMV7_PERFCTR_PLE_FIFO_FLUSH = 0xA2, - ARMV7_PERFCTR_PLE_RQST_COMPLETED = 0xA3, - ARMV7_PERFCTR_PLE_FIFO_OVERFLOW = 0xA4, - ARMV7_PERFCTR_PLE_RQST_PROG = 0xA5 -}; - -/* - * Cortex-A8 HW events mapping - * - * The hardware events that we support. We do support cache operations but - * we have harvard caches and no way to combine instruction and data - * accesses/misses in hardware. - */ -static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, -}; - -static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - /* - * The performance counters don't differentiate between read - * and write accesses/misses so this isn't strictly correct, - * but it's the best we can do. Writes and reads get - * combined. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - /* - * Only ITLB misses and DTLB refills are supported. - * If users want the DTLB refills misses a raw counter - * must be used. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, - [C(RESULT_MISS)] - = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, - [C(RESULT_MISS)] - = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -/* - * Cortex-A9 HW events mapping - */ -static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = - ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE, - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_COHERENT_LINE_HIT, - [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_COHERENT_LINE_MISS, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, -}; - -static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - /* - * The performance counters don't differentiate between read - * and write accesses/misses so this isn't strictly correct, - * but it's the best we can do. Writes and reads get - * combined. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - /* - * Only ITLB misses and DTLB refills are supported. - * If users want the DTLB refills misses a raw counter - * must be used. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, - [C(RESULT_MISS)] - = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, - [C(RESULT_MISS)] - = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -/* - * Perf Events counters - */ -enum armv7_counters { - ARMV7_CYCLE_COUNTER = 1, /* Cycle counter */ - ARMV7_COUNTER0 = 2, /* First event counter */ -}; - -/* - * The cycle counter is ARMV7_CYCLE_COUNTER. - * The first event counter is ARMV7_COUNTER0. - * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1). - */ -#define ARMV7_COUNTER_LAST (ARMV7_COUNTER0 + armpmu->num_events - 1) - -/* - * ARMv7 low level PMNC access - */ - -/* - * Per-CPU PMNC: config reg - */ -#define ARMV7_PMNC_E (1 << 0) /* Enable all counters */ -#define ARMV7_PMNC_P (1 << 1) /* Reset all counters */ -#define ARMV7_PMNC_C (1 << 2) /* Cycle counter reset */ -#define ARMV7_PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */ -#define ARMV7_PMNC_X (1 << 4) /* Export to ETM */ -#define ARMV7_PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ -#define ARMV7_PMNC_N_SHIFT 11 /* Number of counters supported */ -#define ARMV7_PMNC_N_MASK 0x1f -#define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */ - -/* - * Available counters - */ -#define ARMV7_CNT0 0 /* First event counter */ -#define ARMV7_CCNT 31 /* Cycle counter */ - -/* Perf Event to low level counters mapping */ -#define ARMV7_EVENT_CNT_TO_CNTx (ARMV7_COUNTER0 - ARMV7_CNT0) - -/* - * CNTENS: counters enable reg - */ -#define ARMV7_CNTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) -#define ARMV7_CNTENS_C (1 << ARMV7_CCNT) - -/* - * CNTENC: counters disable reg - */ -#define ARMV7_CNTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) -#define ARMV7_CNTENC_C (1 << ARMV7_CCNT) - -/* - * INTENS: counters overflow interrupt enable reg - */ -#define ARMV7_INTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) -#define ARMV7_INTENS_C (1 << ARMV7_CCNT) - -/* - * INTENC: counters overflow interrupt disable reg - */ -#define ARMV7_INTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) -#define ARMV7_INTENC_C (1 << ARMV7_CCNT) - -/* - * EVTSEL: Event selection reg - */ -#define ARMV7_EVTSEL_MASK 0xff /* Mask for writable bits */ - -/* - * SELECT: Counter selection reg - */ -#define ARMV7_SELECT_MASK 0x1f /* Mask for writable bits */ - -/* - * FLAG: counters overflow flag status reg - */ -#define ARMV7_FLAG_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) -#define ARMV7_FLAG_C (1 << ARMV7_CCNT) -#define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */ -#define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK - -static inline unsigned long armv7_pmnc_read(void) -{ - u32 val; - asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val)); - return val; -} - -static inline void armv7_pmnc_write(unsigned long val) -{ - val &= ARMV7_PMNC_MASK; - asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val)); -} - -static inline int armv7_pmnc_has_overflowed(unsigned long pmnc) -{ - return pmnc & ARMV7_OVERFLOWED_MASK; -} - -static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc, - enum armv7_counters counter) -{ - int ret = 0; - - if (counter == ARMV7_CYCLE_COUNTER) - ret = pmnc & ARMV7_FLAG_C; - else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST)) - ret = pmnc & ARMV7_FLAG_P(counter); - else - pr_err("CPU%u checking wrong counter %d overflow status\n", - smp_processor_id(), counter); - - return ret; -} - -static inline int armv7_pmnc_select_counter(unsigned int idx) -{ - u32 val; - - if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) { - pr_err("CPU%u selecting wrong PMNC counter" - " %d\n", smp_processor_id(), idx); - return -1; - } - - val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK; - asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val)); - - return idx; -} - -static inline u32 armv7pmu_read_counter(int idx) -{ - unsigned long value = 0; - - if (idx == ARMV7_CYCLE_COUNTER) - asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value)); - else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { - if (armv7_pmnc_select_counter(idx) == idx) - asm volatile("mrc p15, 0, %0, c9, c13, 2" - : "=r" (value)); - } else - pr_err("CPU%u reading wrong counter %d\n", - smp_processor_id(), idx); - - return value; -} - -static inline void armv7pmu_write_counter(int idx, u32 value) -{ - if (idx == ARMV7_CYCLE_COUNTER) - asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value)); - else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { - if (armv7_pmnc_select_counter(idx) == idx) - asm volatile("mcr p15, 0, %0, c9, c13, 2" - : : "r" (value)); - } else - pr_err("CPU%u writing wrong counter %d\n", - smp_processor_id(), idx); -} - -static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val) -{ - if (armv7_pmnc_select_counter(idx) == idx) { - val &= ARMV7_EVTSEL_MASK; - asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val)); - } -} - -static inline u32 armv7_pmnc_enable_counter(unsigned int idx) -{ - u32 val; - - if ((idx != ARMV7_CYCLE_COUNTER) && - ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { - pr_err("CPU%u enabling wrong PMNC counter" - " %d\n", smp_processor_id(), idx); - return -1; - } - - if (idx == ARMV7_CYCLE_COUNTER) - val = ARMV7_CNTENS_C; - else - val = ARMV7_CNTENS_P(idx); - - asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val)); - - return idx; -} - -static inline u32 armv7_pmnc_disable_counter(unsigned int idx) -{ - u32 val; - - - if ((idx != ARMV7_CYCLE_COUNTER) && - ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { - pr_err("CPU%u disabling wrong PMNC counter" - " %d\n", smp_processor_id(), idx); - return -1; - } - - if (idx == ARMV7_CYCLE_COUNTER) - val = ARMV7_CNTENC_C; - else - val = ARMV7_CNTENC_P(idx); - - asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val)); - - return idx; -} - -static inline u32 armv7_pmnc_enable_intens(unsigned int idx) -{ - u32 val; - - if ((idx != ARMV7_CYCLE_COUNTER) && - ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { - pr_err("CPU%u enabling wrong PMNC counter" - " interrupt enable %d\n", smp_processor_id(), idx); - return -1; - } - - if (idx == ARMV7_CYCLE_COUNTER) - val = ARMV7_INTENS_C; - else - val = ARMV7_INTENS_P(idx); - - asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val)); - - return idx; -} - -static inline u32 armv7_pmnc_disable_intens(unsigned int idx) -{ - u32 val; - - if ((idx != ARMV7_CYCLE_COUNTER) && - ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { - pr_err("CPU%u disabling wrong PMNC counter" - " interrupt enable %d\n", smp_processor_id(), idx); - return -1; - } - - if (idx == ARMV7_CYCLE_COUNTER) - val = ARMV7_INTENC_C; - else - val = ARMV7_INTENC_P(idx); - - asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val)); - - return idx; -} - -static inline u32 armv7_pmnc_getreset_flags(void) -{ - u32 val; - - /* Read */ - asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); - - /* Write to clear flags */ - val &= ARMV7_FLAG_MASK; - asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val)); - - return val; -} - -#ifdef DEBUG -static void armv7_pmnc_dump_regs(void) -{ - u32 val; - unsigned int cnt; - - printk(KERN_INFO "PMNC registers dump:\n"); - - asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val)); - printk(KERN_INFO "PMNC =0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val)); - printk(KERN_INFO "CNTENS=0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val)); - printk(KERN_INFO "INTENS=0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); - printk(KERN_INFO "FLAGS =0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val)); - printk(KERN_INFO "SELECT=0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val)); - printk(KERN_INFO "CCNT =0x%08x\n", val); - - for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) { - armv7_pmnc_select_counter(cnt); - asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val)); - printk(KERN_INFO "CNT[%d] count =0x%08x\n", - cnt-ARMV7_EVENT_CNT_TO_CNTx, val); - asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val)); - printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n", - cnt-ARMV7_EVENT_CNT_TO_CNTx, val); - } -} -#endif - -void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long flags; - - /* - * Enable counter and interrupt, and set the counter to count - * the event that we're interested in. - */ - spin_lock_irqsave(&pmu_lock, flags); - - /* - * Disable counter - */ - armv7_pmnc_disable_counter(idx); - - /* - * Set event (if destined for PMNx counters) - * We don't need to set the event if it's a cycle count - */ - if (idx != ARMV7_CYCLE_COUNTER) - armv7_pmnc_write_evtsel(idx, hwc->config_base); - - /* - * Enable interrupt for this counter - */ - armv7_pmnc_enable_intens(idx); - - /* - * Enable counter - */ - armv7_pmnc_enable_counter(idx); - - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long flags; - - /* - * Disable counter and interrupt - */ - spin_lock_irqsave(&pmu_lock, flags); - - /* - * Disable counter - */ - armv7_pmnc_disable_counter(idx); - - /* - * Disable interrupt for this counter - */ - armv7_pmnc_disable_intens(idx); - - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) -{ - unsigned long pmnc; - struct perf_sample_data data; - struct cpu_hw_events *cpuc; - struct pt_regs *regs; - int idx; - - /* - * Get and reset the IRQ flags - */ - pmnc = armv7_pmnc_getreset_flags(); - - /* - * Did an overflow occur? - */ - if (!armv7_pmnc_has_overflowed(pmnc)) - return IRQ_NONE; - - /* - * Handle the counter(s) overflow(s) - */ - regs = get_irq_regs(); - - perf_sample_data_init(&data, 0); - - cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx <= armpmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - - /* - * We have a single interrupt for all counters. Check that - * each counter has overflowed before we process it. - */ - if (!armv7_pmnc_counter_has_overflowed(pmnc, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event, hwc, idx); - data.period = event->hw.last_period; - if (!armpmu_event_set_period(event, hwc, idx)) - continue; - - if (perf_event_overflow(event, 0, &data, regs)) - armpmu->disable(hwc, idx); - } - - /* - * Handle the pending perf events. - * - * Note: this call *must* be run with interrupts disabled. For - * platforms that can have the PMU interrupts raised as an NMI, this - * will not work. - */ - irq_work_run(); - - return IRQ_HANDLED; -} - -static void armv7pmu_start(void) -{ - unsigned long flags; - - spin_lock_irqsave(&pmu_lock, flags); - /* Enable all counters */ - armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void armv7pmu_stop(void) -{ - unsigned long flags; - - spin_lock_irqsave(&pmu_lock, flags); - /* Disable all counters */ - armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static inline int armv7_a8_pmu_event_map(int config) -{ - int mapping = armv7_a8_perf_map[config]; - if (HW_OP_UNSUPPORTED == mapping) - mapping = -EOPNOTSUPP; - return mapping; -} - -static inline int armv7_a9_pmu_event_map(int config) -{ - int mapping = armv7_a9_perf_map[config]; - if (HW_OP_UNSUPPORTED == mapping) - mapping = -EOPNOTSUPP; - return mapping; -} - -static u64 armv7pmu_raw_event(u64 config) -{ - return config & 0xff; -} - -static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc, - struct hw_perf_event *event) -{ - int idx; - - /* Always place a cycle counter into the cycle counter. */ - if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) { - if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask)) - return -EAGAIN; - - return ARMV7_CYCLE_COUNTER; - } else { - /* - * For anything other than a cycle counter, try and use - * the events counters - */ - for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) { - if (!test_and_set_bit(idx, cpuc->used_mask)) - return idx; - } - - /* The counters are all in use. */ - return -EAGAIN; - } -} - -static struct arm_pmu armv7pmu = { - .handle_irq = armv7pmu_handle_irq, - .enable = armv7pmu_enable_event, - .disable = armv7pmu_disable_event, - .raw_event = armv7pmu_raw_event, - .read_counter = armv7pmu_read_counter, - .write_counter = armv7pmu_write_counter, - .get_event_idx = armv7pmu_get_event_idx, - .start = armv7pmu_start, - .stop = armv7pmu_stop, - .max_period = (1LLU << 32) - 1, -}; - -static u32 __init armv7_reset_read_pmnc(void) -{ - u32 nb_cnt; - - /* Initialize & Reset PMNC: C and P bits */ - armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C); - - /* Read the nb of CNTx counters supported from PMNC */ - nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK; - - /* Add the CPU cycles counter and return */ - return nb_cnt + 1; -} - -/* - * ARMv5 [xscale] Performance counter handling code. - * - * Based on xscale OProfile code. - * - * There are two variants of the xscale PMU that we support: - * - xscale1pmu: 2 event counters and a cycle counter - * - xscale2pmu: 4 event counters and a cycle counter - * The two variants share event definitions, but have different - * PMU structures. - */ - -enum xscale_perf_types { - XSCALE_PERFCTR_ICACHE_MISS = 0x00, - XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01, - XSCALE_PERFCTR_DATA_STALL = 0x02, - XSCALE_PERFCTR_ITLB_MISS = 0x03, - XSCALE_PERFCTR_DTLB_MISS = 0x04, - XSCALE_PERFCTR_BRANCH = 0x05, - XSCALE_PERFCTR_BRANCH_MISS = 0x06, - XSCALE_PERFCTR_INSTRUCTION = 0x07, - XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08, - XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09, - XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A, - XSCALE_PERFCTR_DCACHE_MISS = 0x0B, - XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C, - XSCALE_PERFCTR_PC_CHANGED = 0x0D, - XSCALE_PERFCTR_BCU_REQUEST = 0x10, - XSCALE_PERFCTR_BCU_FULL = 0x11, - XSCALE_PERFCTR_BCU_DRAIN = 0x12, - XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14, - XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15, - XSCALE_PERFCTR_RMW = 0x16, - /* XSCALE_PERFCTR_CCNT is not hardware defined */ - XSCALE_PERFCTR_CCNT = 0xFE, - XSCALE_PERFCTR_UNUSED = 0xFF, -}; - -enum xscale_counters { - XSCALE_CYCLE_COUNTER = 1, - XSCALE_COUNTER0, - XSCALE_COUNTER1, - XSCALE_COUNTER2, - XSCALE_COUNTER3, -}; - -static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT, - [PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION, - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH, - [PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS, - [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, -}; - -static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -#define XSCALE_PMU_ENABLE 0x001 -#define XSCALE_PMN_RESET 0x002 -#define XSCALE_CCNT_RESET 0x004 -#define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET) -#define XSCALE_PMU_CNT64 0x008 - -static inline int -xscalepmu_event_map(int config) -{ - int mapping = xscale_perf_map[config]; - if (HW_OP_UNSUPPORTED == mapping) - mapping = -EOPNOTSUPP; - return mapping; -} - -static u64 -xscalepmu_raw_event(u64 config) -{ - return config & 0xff; -} - -#define XSCALE1_OVERFLOWED_MASK 0x700 -#define XSCALE1_CCOUNT_OVERFLOW 0x400 -#define XSCALE1_COUNT0_OVERFLOW 0x100 -#define XSCALE1_COUNT1_OVERFLOW 0x200 -#define XSCALE1_CCOUNT_INT_EN 0x040 -#define XSCALE1_COUNT0_INT_EN 0x010 -#define XSCALE1_COUNT1_INT_EN 0x020 -#define XSCALE1_COUNT0_EVT_SHFT 12 -#define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT) -#define XSCALE1_COUNT1_EVT_SHFT 20 -#define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT) - -static inline u32 -xscale1pmu_read_pmnc(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val)); - return val; -} - -static inline void -xscale1pmu_write_pmnc(u32 val) -{ - /* upper 4bits and 7, 11 are write-as-0 */ - val &= 0xffff77f; - asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val)); -} - -static inline int -xscale1_pmnc_counter_has_overflowed(unsigned long pmnc, - enum xscale_counters counter) -{ - int ret = 0; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - ret = pmnc & XSCALE1_CCOUNT_OVERFLOW; - break; - case XSCALE_COUNTER0: - ret = pmnc & XSCALE1_COUNT0_OVERFLOW; - break; - case XSCALE_COUNTER1: - ret = pmnc & XSCALE1_COUNT1_OVERFLOW; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - } - - return ret; -} - -static irqreturn_t -xscale1pmu_handle_irq(int irq_num, void *dev) -{ - unsigned long pmnc; - struct perf_sample_data data; - struct cpu_hw_events *cpuc; - struct pt_regs *regs; - int idx; - - /* - * NOTE: there's an A stepping erratum that states if an overflow - * bit already exists and another occurs, the previous - * Overflow bit gets cleared. There's no workaround. - * Fixed in B stepping or later. - */ - pmnc = xscale1pmu_read_pmnc(); - - /* - * Write the value back to clear the overflow flags. Overflow - * flags remain in pmnc for use below. We also disable the PMU - * while we process the interrupt. - */ - xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); - - if (!(pmnc & XSCALE1_OVERFLOWED_MASK)) - return IRQ_NONE; - - regs = get_irq_regs(); - - perf_sample_data_init(&data, 0); - - cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx <= armpmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - - if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event, hwc, idx); - data.period = event->hw.last_period; - if (!armpmu_event_set_period(event, hwc, idx)) - continue; - - if (perf_event_overflow(event, 0, &data, regs)) - armpmu->disable(hwc, idx); - } - - irq_work_run(); - - /* - * Re-enable the PMU. - */ - pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE; - xscale1pmu_write_pmnc(pmnc); - - return IRQ_HANDLED; -} - -static void -xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long val, mask, evt, flags; - - switch (idx) { - case XSCALE_CYCLE_COUNTER: - mask = 0; - evt = XSCALE1_CCOUNT_INT_EN; - break; - case XSCALE_COUNTER0: - mask = XSCALE1_COUNT0_EVT_MASK; - evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) | - XSCALE1_COUNT0_INT_EN; - break; - case XSCALE_COUNTER1: - mask = XSCALE1_COUNT1_EVT_MASK; - evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) | - XSCALE1_COUNT1_INT_EN; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - spin_lock_irqsave(&pmu_lock, flags); - val = xscale1pmu_read_pmnc(); - val &= ~mask; - val |= evt; - xscale1pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long val, mask, evt, flags; - - switch (idx) { - case XSCALE_CYCLE_COUNTER: - mask = XSCALE1_CCOUNT_INT_EN; - evt = 0; - break; - case XSCALE_COUNTER0: - mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK; - evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT; - break; - case XSCALE_COUNTER1: - mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK; - evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - spin_lock_irqsave(&pmu_lock, flags); - val = xscale1pmu_read_pmnc(); - val &= ~mask; - val |= evt; - xscale1pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static int -xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc, - struct hw_perf_event *event) -{ - if (XSCALE_PERFCTR_CCNT == event->config_base) { - if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask)) - return -EAGAIN; - - return XSCALE_CYCLE_COUNTER; - } else { - if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask)) { - return XSCALE_COUNTER1; - } - - if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask)) { - return XSCALE_COUNTER0; - } - - return -EAGAIN; - } -} - -static void -xscale1pmu_start(void) -{ - unsigned long flags, val; - - spin_lock_irqsave(&pmu_lock, flags); - val = xscale1pmu_read_pmnc(); - val |= XSCALE_PMU_ENABLE; - xscale1pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -xscale1pmu_stop(void) -{ - unsigned long flags, val; - - spin_lock_irqsave(&pmu_lock, flags); - val = xscale1pmu_read_pmnc(); - val &= ~XSCALE_PMU_ENABLE; - xscale1pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static inline u32 -xscale1pmu_read_counter(int counter) -{ - u32 val = 0; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val)); - break; - case XSCALE_COUNTER0: - asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val)); - break; - case XSCALE_COUNTER1: - asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val)); - break; - } - - return val; -} - -static inline void -xscale1pmu_write_counter(int counter, u32 val) -{ - switch (counter) { - case XSCALE_CYCLE_COUNTER: - asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val)); - break; - case XSCALE_COUNTER0: - asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val)); - break; - case XSCALE_COUNTER1: - asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val)); - break; - } -} - -static const struct arm_pmu xscale1pmu = { - .id = ARM_PERF_PMU_ID_XSCALE1, - .handle_irq = xscale1pmu_handle_irq, - .enable = xscale1pmu_enable_event, - .disable = xscale1pmu_disable_event, - .event_map = xscalepmu_event_map, - .raw_event = xscalepmu_raw_event, - .read_counter = xscale1pmu_read_counter, - .write_counter = xscale1pmu_write_counter, - .get_event_idx = xscale1pmu_get_event_idx, - .start = xscale1pmu_start, - .stop = xscale1pmu_stop, - .num_events = 3, - .max_period = (1LLU << 32) - 1, -}; - -#define XSCALE2_OVERFLOWED_MASK 0x01f -#define XSCALE2_CCOUNT_OVERFLOW 0x001 -#define XSCALE2_COUNT0_OVERFLOW 0x002 -#define XSCALE2_COUNT1_OVERFLOW 0x004 -#define XSCALE2_COUNT2_OVERFLOW 0x008 -#define XSCALE2_COUNT3_OVERFLOW 0x010 -#define XSCALE2_CCOUNT_INT_EN 0x001 -#define XSCALE2_COUNT0_INT_EN 0x002 -#define XSCALE2_COUNT1_INT_EN 0x004 -#define XSCALE2_COUNT2_INT_EN 0x008 -#define XSCALE2_COUNT3_INT_EN 0x010 -#define XSCALE2_COUNT0_EVT_SHFT 0 -#define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT) -#define XSCALE2_COUNT1_EVT_SHFT 8 -#define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT) -#define XSCALE2_COUNT2_EVT_SHFT 16 -#define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT) -#define XSCALE2_COUNT3_EVT_SHFT 24 -#define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT) - -static inline u32 -xscale2pmu_read_pmnc(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val)); - /* bits 1-2 and 4-23 are read-unpredictable */ - return val & 0xff000009; -} - -static inline void -xscale2pmu_write_pmnc(u32 val) -{ - /* bits 4-23 are write-as-0, 24-31 are write ignored */ - val &= 0xf; - asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val)); -} - -static inline u32 -xscale2pmu_read_overflow_flags(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val)); - return val; -} - -static inline void -xscale2pmu_write_overflow_flags(u32 val) -{ - asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val)); -} - -static inline u32 -xscale2pmu_read_event_select(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val)); - return val; -} - -static inline void -xscale2pmu_write_event_select(u32 val) -{ - asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val)); -} - -static inline u32 -xscale2pmu_read_int_enable(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val)); - return val; -} - -static void -xscale2pmu_write_int_enable(u32 val) -{ - asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val)); -} - -static inline int -xscale2_pmnc_counter_has_overflowed(unsigned long of_flags, - enum xscale_counters counter) -{ - int ret = 0; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - ret = of_flags & XSCALE2_CCOUNT_OVERFLOW; - break; - case XSCALE_COUNTER0: - ret = of_flags & XSCALE2_COUNT0_OVERFLOW; - break; - case XSCALE_COUNTER1: - ret = of_flags & XSCALE2_COUNT1_OVERFLOW; - break; - case XSCALE_COUNTER2: - ret = of_flags & XSCALE2_COUNT2_OVERFLOW; - break; - case XSCALE_COUNTER3: - ret = of_flags & XSCALE2_COUNT3_OVERFLOW; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - } - - return ret; -} - -static irqreturn_t -xscale2pmu_handle_irq(int irq_num, void *dev) -{ - unsigned long pmnc, of_flags; - struct perf_sample_data data; - struct cpu_hw_events *cpuc; - struct pt_regs *regs; - int idx; - - /* Disable the PMU. */ - pmnc = xscale2pmu_read_pmnc(); - xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); - - /* Check the overflow flag register. */ - of_flags = xscale2pmu_read_overflow_flags(); - if (!(of_flags & XSCALE2_OVERFLOWED_MASK)) - return IRQ_NONE; - - /* Clear the overflow bits. */ - xscale2pmu_write_overflow_flags(of_flags); - - regs = get_irq_regs(); - - perf_sample_data_init(&data, 0); - - cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx <= armpmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - - if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event, hwc, idx); - data.period = event->hw.last_period; - if (!armpmu_event_set_period(event, hwc, idx)) - continue; - - if (perf_event_overflow(event, 0, &data, regs)) - armpmu->disable(hwc, idx); - } - - irq_work_run(); - - /* - * Re-enable the PMU. - */ - pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE; - xscale2pmu_write_pmnc(pmnc); - - return IRQ_HANDLED; -} - -static void -xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long flags, ien, evtsel; - - ien = xscale2pmu_read_int_enable(); - evtsel = xscale2pmu_read_event_select(); - - switch (idx) { - case XSCALE_CYCLE_COUNTER: - ien |= XSCALE2_CCOUNT_INT_EN; - break; - case XSCALE_COUNTER0: - ien |= XSCALE2_COUNT0_INT_EN; - evtsel &= ~XSCALE2_COUNT0_EVT_MASK; - evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT; - break; - case XSCALE_COUNTER1: - ien |= XSCALE2_COUNT1_INT_EN; - evtsel &= ~XSCALE2_COUNT1_EVT_MASK; - evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT; - break; - case XSCALE_COUNTER2: - ien |= XSCALE2_COUNT2_INT_EN; - evtsel &= ~XSCALE2_COUNT2_EVT_MASK; - evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT; - break; - case XSCALE_COUNTER3: - ien |= XSCALE2_COUNT3_INT_EN; - evtsel &= ~XSCALE2_COUNT3_EVT_MASK; - evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - spin_lock_irqsave(&pmu_lock, flags); - xscale2pmu_write_event_select(evtsel); - xscale2pmu_write_int_enable(ien); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long flags, ien, evtsel; - - ien = xscale2pmu_read_int_enable(); - evtsel = xscale2pmu_read_event_select(); - - switch (idx) { - case XSCALE_CYCLE_COUNTER: - ien &= ~XSCALE2_CCOUNT_INT_EN; - break; - case XSCALE_COUNTER0: - ien &= ~XSCALE2_COUNT0_INT_EN; - evtsel &= ~XSCALE2_COUNT0_EVT_MASK; - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT; - break; - case XSCALE_COUNTER1: - ien &= ~XSCALE2_COUNT1_INT_EN; - evtsel &= ~XSCALE2_COUNT1_EVT_MASK; - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT; - break; - case XSCALE_COUNTER2: - ien &= ~XSCALE2_COUNT2_INT_EN; - evtsel &= ~XSCALE2_COUNT2_EVT_MASK; - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT; - break; - case XSCALE_COUNTER3: - ien &= ~XSCALE2_COUNT3_INT_EN; - evtsel &= ~XSCALE2_COUNT3_EVT_MASK; - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - spin_lock_irqsave(&pmu_lock, flags); - xscale2pmu_write_event_select(evtsel); - xscale2pmu_write_int_enable(ien); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static int -xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc, - struct hw_perf_event *event) -{ - int idx = xscale1pmu_get_event_idx(cpuc, event); - if (idx >= 0) - goto out; - - if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask)) - idx = XSCALE_COUNTER3; - else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask)) - idx = XSCALE_COUNTER2; -out: - return idx; -} - -static void -xscale2pmu_start(void) -{ - unsigned long flags, val; - - spin_lock_irqsave(&pmu_lock, flags); - val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64; - val |= XSCALE_PMU_ENABLE; - xscale2pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -xscale2pmu_stop(void) -{ - unsigned long flags, val; - - spin_lock_irqsave(&pmu_lock, flags); - val = xscale2pmu_read_pmnc(); - val &= ~XSCALE_PMU_ENABLE; - xscale2pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static inline u32 -xscale2pmu_read_counter(int counter) -{ - u32 val = 0; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val)); - break; - case XSCALE_COUNTER0: - asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val)); - break; - case XSCALE_COUNTER1: - asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val)); - break; - case XSCALE_COUNTER2: - asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val)); - break; - case XSCALE_COUNTER3: - asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val)); - break; - } - - return val; -} - -static inline void -xscale2pmu_write_counter(int counter, u32 val) -{ - switch (counter) { - case XSCALE_CYCLE_COUNTER: - asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val)); - break; - case XSCALE_COUNTER0: - asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val)); - break; - case XSCALE_COUNTER1: - asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val)); - break; - case XSCALE_COUNTER2: - asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val)); - break; - case XSCALE_COUNTER3: - asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val)); - break; - } -} - -static const struct arm_pmu xscale2pmu = { - .id = ARM_PERF_PMU_ID_XSCALE2, - .handle_irq = xscale2pmu_handle_irq, - .enable = xscale2pmu_enable_event, - .disable = xscale2pmu_disable_event, - .event_map = xscalepmu_event_map, - .raw_event = xscalepmu_raw_event, - .read_counter = xscale2pmu_read_counter, - .write_counter = xscale2pmu_write_counter, - .get_event_idx = xscale2pmu_get_event_idx, - .start = xscale2pmu_start, - .stop = xscale2pmu_stop, - .num_events = 5, - .max_period = (1LLU << 32) - 1, -}; +/* Include the PMU-specific implementations. */ +#include "perf_event_xscale.c" +#include "perf_event_v6.c" +#include "perf_event_v7.c" static int __init init_hw_perf_events(void) @@ -2977,37 +622,16 @@ init_hw_perf_events(void) case 0xB360: /* ARM1136 */ case 0xB560: /* ARM1156 */ case 0xB760: /* ARM1176 */ - armpmu = &armv6pmu; - memcpy(armpmu_perf_cache_map, armv6_perf_cache_map, - sizeof(armv6_perf_cache_map)); + armpmu = armv6pmu_init(); break; case 0xB020: /* ARM11mpcore */ - armpmu = &armv6mpcore_pmu; - memcpy(armpmu_perf_cache_map, - armv6mpcore_perf_cache_map, - sizeof(armv6mpcore_perf_cache_map)); + armpmu = armv6mpcore_pmu_init(); break; case 0xC080: /* Cortex-A8 */ - armv7pmu.id = ARM_PERF_PMU_ID_CA8; - memcpy(armpmu_perf_cache_map, armv7_a8_perf_cache_map, - sizeof(armv7_a8_perf_cache_map)); - armv7pmu.event_map = armv7_a8_pmu_event_map; - armpmu = &armv7pmu; - - /* Reset PMNC and read the nb of CNTx counters - supported */ - armv7pmu.num_events = armv7_reset_read_pmnc(); + armpmu = armv7_a8_pmu_init(); break; case 0xC090: /* Cortex-A9 */ - armv7pmu.id = ARM_PERF_PMU_ID_CA9; - memcpy(armpmu_perf_cache_map, armv7_a9_perf_cache_map, - sizeof(armv7_a9_perf_cache_map)); - armv7pmu.event_map = armv7_a9_pmu_event_map; - armpmu = &armv7pmu; - - /* Reset PMNC and read the nb of CNTx counters - supported */ - armv7pmu.num_events = armv7_reset_read_pmnc(); + armpmu = armv7_a9_pmu_init(); break; } /* Intel CPUs [xscale]. */ @@ -3015,21 +639,17 @@ init_hw_perf_events(void) part_number = (cpuid >> 13) & 0x7; switch (part_number) { case 1: - armpmu = &xscale1pmu; - memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, - sizeof(xscale_perf_cache_map)); + armpmu = xscale1pmu_init(); break; case 2: - armpmu = &xscale2pmu; - memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, - sizeof(xscale_perf_cache_map)); + armpmu = xscale2pmu_init(); break; } } if (armpmu) { pr_info("enabled with %s PMU driver, %d counters available\n", - arm_pmu_names[armpmu->id], armpmu->num_events); + armpmu->name, armpmu->num_events); } else { pr_info("no hardware support available\n"); } @@ -3053,17 +673,17 @@ arch_initcall(init_hw_perf_events); * This code has been adapted from the ARM OProfile support. */ struct frame_tail { - struct frame_tail *fp; - unsigned long sp; - unsigned long lr; + struct frame_tail __user *fp; + unsigned long sp; + unsigned long lr; } __attribute__((packed)); /* * Get the return address for a single stackframe and return a pointer to the * next frame tail. */ -static struct frame_tail * -user_backtrace(struct frame_tail *tail, +static struct frame_tail __user * +user_backtrace(struct frame_tail __user *tail, struct perf_callchain_entry *entry) { struct frame_tail buftail; @@ -3089,10 +709,10 @@ user_backtrace(struct frame_tail *tail, void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { - struct frame_tail *tail; + struct frame_tail __user *tail; - tail = (struct frame_tail *)regs->ARM_fp - 1; + tail = (struct frame_tail __user *)regs->ARM_fp - 1; while (tail && !((unsigned long)tail & 0x3)) tail = user_backtrace(tail, entry); diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c new file mode 100644 index 000000000000..c058bfc8532b --- /dev/null +++ b/arch/arm/kernel/perf_event_v6.c @@ -0,0 +1,672 @@ +/* + * ARMv6 Performance counter handling code. + * + * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles + * + * ARMv6 has 2 configurable performance counters and a single cycle counter. + * They all share a single reset bit but can be written to zero so we can use + * that for a reset. + * + * The counters can't be individually enabled or disabled so when we remove + * one event and replace it with another we could get spurious counts from the + * wrong event. However, we can take advantage of the fact that the + * performance counters can export events to the event bus, and the event bus + * itself can be monitored. This requires that we *don't* export the events to + * the event bus. The procedure for disabling a configurable counter is: + * - change the counter to count the ETMEXTOUT[0] signal (0x20). This + * effectively stops the counter from counting. + * - disable the counter's interrupt generation (each counter has it's + * own interrupt enable bit). + * Once stopped, the counter value can be written as 0 to reset. + * + * To enable a counter: + * - enable the counter's interrupt generation. + * - set the new event type. + * + * Note: the dedicated cycle counter only counts cycles and can't be + * enabled/disabled independently of the others. When we want to disable the + * cycle counter, we have to just disable the interrupt reporting and start + * ignoring that counter. When re-enabling, we have to reset the value and + * enable the interrupt. + */ + +#ifdef CONFIG_CPU_V6 +enum armv6_perf_types { + ARMV6_PERFCTR_ICACHE_MISS = 0x0, + ARMV6_PERFCTR_IBUF_STALL = 0x1, + ARMV6_PERFCTR_DDEP_STALL = 0x2, + ARMV6_PERFCTR_ITLB_MISS = 0x3, + ARMV6_PERFCTR_DTLB_MISS = 0x4, + ARMV6_PERFCTR_BR_EXEC = 0x5, + ARMV6_PERFCTR_BR_MISPREDICT = 0x6, + ARMV6_PERFCTR_INSTR_EXEC = 0x7, + ARMV6_PERFCTR_DCACHE_HIT = 0x9, + ARMV6_PERFCTR_DCACHE_ACCESS = 0xA, + ARMV6_PERFCTR_DCACHE_MISS = 0xB, + ARMV6_PERFCTR_DCACHE_WBACK = 0xC, + ARMV6_PERFCTR_SW_PC_CHANGE = 0xD, + ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF, + ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10, + ARMV6_PERFCTR_LSU_FULL_STALL = 0x11, + ARMV6_PERFCTR_WBUF_DRAINED = 0x12, + ARMV6_PERFCTR_CPU_CYCLES = 0xFF, + ARMV6_PERFCTR_NOP = 0x20, +}; + +enum armv6_counters { + ARMV6_CYCLE_COUNTER = 1, + ARMV6_COUNTER0, + ARMV6_COUNTER1, +}; + +/* + * The hardware events that we support. We do support cache operations but + * we have harvard caches and no way to combine instruction and data + * accesses/misses in hardware. + */ +static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC, + [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT, + [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, +}; + +static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + /* + * The performance counters don't differentiate between read + * and write accesses/misses so this isn't strictly correct, + * but it's the best we can do. Writes and reads get + * combined. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + /* + * The ARM performance counters can count micro DTLB misses, + * micro ITLB misses and main TLB misses. There isn't an event + * for TLB misses, so use the micro misses here and if users + * want the main TLB misses they can use a raw counter. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +enum armv6mpcore_perf_types { + ARMV6MPCORE_PERFCTR_ICACHE_MISS = 0x0, + ARMV6MPCORE_PERFCTR_IBUF_STALL = 0x1, + ARMV6MPCORE_PERFCTR_DDEP_STALL = 0x2, + ARMV6MPCORE_PERFCTR_ITLB_MISS = 0x3, + ARMV6MPCORE_PERFCTR_DTLB_MISS = 0x4, + ARMV6MPCORE_PERFCTR_BR_EXEC = 0x5, + ARMV6MPCORE_PERFCTR_BR_NOTPREDICT = 0x6, + ARMV6MPCORE_PERFCTR_BR_MISPREDICT = 0x7, + ARMV6MPCORE_PERFCTR_INSTR_EXEC = 0x8, + ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA, + ARMV6MPCORE_PERFCTR_DCACHE_RDMISS = 0xB, + ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC, + ARMV6MPCORE_PERFCTR_DCACHE_WRMISS = 0xD, + ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE, + ARMV6MPCORE_PERFCTR_SW_PC_CHANGE = 0xF, + ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS = 0x10, + ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11, + ARMV6MPCORE_PERFCTR_LSU_FULL_STALL = 0x12, + ARMV6MPCORE_PERFCTR_WBUF_DRAINED = 0x13, + ARMV6MPCORE_PERFCTR_CPU_CYCLES = 0xFF, +}; + +/* + * The hardware events that we support. We do support cache operations but + * we have harvard caches and no way to combine instruction and data + * accesses/misses in hardware. + */ +static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC, + [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT, + [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, +}; + +static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = + ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS, + [C(RESULT_MISS)] = + ARMV6MPCORE_PERFCTR_DCACHE_RDMISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = + ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS, + [C(RESULT_MISS)] = + ARMV6MPCORE_PERFCTR_DCACHE_WRMISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + /* + * The ARM performance counters can count micro DTLB misses, + * micro ITLB misses and main TLB misses. There isn't an event + * for TLB misses, so use the micro misses here and if users + * want the main TLB misses they can use a raw counter. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +static inline unsigned long +armv6_pmcr_read(void) +{ + u32 val; + asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val)); + return val; +} + +static inline void +armv6_pmcr_write(unsigned long val) +{ + asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val)); +} + +#define ARMV6_PMCR_ENABLE (1 << 0) +#define ARMV6_PMCR_CTR01_RESET (1 << 1) +#define ARMV6_PMCR_CCOUNT_RESET (1 << 2) +#define ARMV6_PMCR_CCOUNT_DIV (1 << 3) +#define ARMV6_PMCR_COUNT0_IEN (1 << 4) +#define ARMV6_PMCR_COUNT1_IEN (1 << 5) +#define ARMV6_PMCR_CCOUNT_IEN (1 << 6) +#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8) +#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9) +#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10) +#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20 +#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT) +#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12 +#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT) + +#define ARMV6_PMCR_OVERFLOWED_MASK \ + (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \ + ARMV6_PMCR_CCOUNT_OVERFLOW) + +static inline int +armv6_pmcr_has_overflowed(unsigned long pmcr) +{ + return pmcr & ARMV6_PMCR_OVERFLOWED_MASK; +} + +static inline int +armv6_pmcr_counter_has_overflowed(unsigned long pmcr, + enum armv6_counters counter) +{ + int ret = 0; + + if (ARMV6_CYCLE_COUNTER == counter) + ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW; + else if (ARMV6_COUNTER0 == counter) + ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW; + else if (ARMV6_COUNTER1 == counter) + ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW; + else + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + + return ret; +} + +static inline u32 +armv6pmu_read_counter(int counter) +{ + unsigned long value = 0; + + if (ARMV6_CYCLE_COUNTER == counter) + asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value)); + else if (ARMV6_COUNTER0 == counter) + asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value)); + else if (ARMV6_COUNTER1 == counter) + asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value)); + else + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + + return value; +} + +static inline void +armv6pmu_write_counter(int counter, + u32 value) +{ + if (ARMV6_CYCLE_COUNTER == counter) + asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value)); + else if (ARMV6_COUNTER0 == counter) + asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value)); + else if (ARMV6_COUNTER1 == counter) + asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value)); + else + WARN_ONCE(1, "invalid counter number (%d)\n", counter); +} + +static void +armv6pmu_enable_event(struct hw_perf_event *hwc, + int idx) +{ + unsigned long val, mask, evt, flags; + + if (ARMV6_CYCLE_COUNTER == idx) { + mask = 0; + evt = ARMV6_PMCR_CCOUNT_IEN; + } else if (ARMV6_COUNTER0 == idx) { + mask = ARMV6_PMCR_EVT_COUNT0_MASK; + evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) | + ARMV6_PMCR_COUNT0_IEN; + } else if (ARMV6_COUNTER1 == idx) { + mask = ARMV6_PMCR_EVT_COUNT1_MASK; + evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) | + ARMV6_PMCR_COUNT1_IEN; + } else { + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + /* + * Mask out the current event and set the counter to count the event + * that we're interested in. + */ + raw_spin_lock_irqsave(&pmu_lock, flags); + val = armv6_pmcr_read(); + val &= ~mask; + val |= evt; + armv6_pmcr_write(val); + raw_spin_unlock_irqrestore(&pmu_lock, flags); +} + +static irqreturn_t +armv6pmu_handle_irq(int irq_num, + void *dev) +{ + unsigned long pmcr = armv6_pmcr_read(); + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct pt_regs *regs; + int idx; + + if (!armv6_pmcr_has_overflowed(pmcr)) + return IRQ_NONE; + + regs = get_irq_regs(); + + /* + * The interrupts are cleared by writing the overflow flags back to + * the control register. All of the other bits don't have any effect + * if they are rewritten, so write the whole value back. + */ + armv6_pmcr_write(pmcr); + + perf_sample_data_init(&data, 0); + + cpuc = &__get_cpu_var(cpu_hw_events); + for (idx = 0; idx <= armpmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + + /* + * We have a single interrupt for all counters. Check that + * each counter has overflowed before we process it. + */ + if (!armv6_pmcr_counter_has_overflowed(pmcr, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event, hwc, idx); + data.period = event->hw.last_period; + if (!armpmu_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, 0, &data, regs)) + armpmu->disable(hwc, idx); + } + + /* + * Handle the pending perf events. + * + * Note: this call *must* be run with interrupts disabled. For + * platforms that can have the PMU interrupts raised as an NMI, this + * will not work. + */ + irq_work_run(); + + return IRQ_HANDLED; +} + +static void +armv6pmu_start(void) +{ + unsigned long flags, val; + + raw_spin_lock_irqsave(&pmu_lock, flags); + val = armv6_pmcr_read(); + val |= ARMV6_PMCR_ENABLE; + armv6_pmcr_write(val); + raw_spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +armv6pmu_stop(void) +{ + unsigned long flags, val; + + raw_spin_lock_irqsave(&pmu_lock, flags); + val = armv6_pmcr_read(); + val &= ~ARMV6_PMCR_ENABLE; + armv6_pmcr_write(val); + raw_spin_unlock_irqrestore(&pmu_lock, flags); +} + +static int +armv6pmu_get_event_idx(struct cpu_hw_events *cpuc, + struct hw_perf_event *event) +{ + /* Always place a cycle counter into the cycle counter. */ + if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) { + if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask)) + return -EAGAIN; + + return ARMV6_CYCLE_COUNTER; + } else { + /* + * For anything other than a cycle counter, try and use + * counter0 and counter1. + */ + if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) + return ARMV6_COUNTER1; + + if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) + return ARMV6_COUNTER0; + + /* The counters are all in use. */ + return -EAGAIN; + } +} + +static void +armv6pmu_disable_event(struct hw_perf_event *hwc, + int idx) +{ + unsigned long val, mask, evt, flags; + + if (ARMV6_CYCLE_COUNTER == idx) { + mask = ARMV6_PMCR_CCOUNT_IEN; + evt = 0; + } else if (ARMV6_COUNTER0 == idx) { + mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK; + evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT; + } else if (ARMV6_COUNTER1 == idx) { + mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK; + evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT; + } else { + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + /* + * Mask out the current event and set the counter to count the number + * of ETM bus signal assertion cycles. The external reporting should + * be disabled and so this should never increment. + */ + raw_spin_lock_irqsave(&pmu_lock, flags); + val = armv6_pmcr_read(); + val &= ~mask; + val |= evt; + armv6_pmcr_write(val); + raw_spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc, + int idx) +{ + unsigned long val, mask, flags, evt = 0; + + if (ARMV6_CYCLE_COUNTER == idx) { + mask = ARMV6_PMCR_CCOUNT_IEN; + } else if (ARMV6_COUNTER0 == idx) { + mask = ARMV6_PMCR_COUNT0_IEN; + } else if (ARMV6_COUNTER1 == idx) { + mask = ARMV6_PMCR_COUNT1_IEN; + } else { + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + /* + * Unlike UP ARMv6, we don't have a way of stopping the counters. We + * simply disable the interrupt reporting. + */ + raw_spin_lock_irqsave(&pmu_lock, flags); + val = armv6_pmcr_read(); + val &= ~mask; + val |= evt; + armv6_pmcr_write(val); + raw_spin_unlock_irqrestore(&pmu_lock, flags); +} + +static const struct arm_pmu armv6pmu = { + .id = ARM_PERF_PMU_ID_V6, + .name = "v6", + .handle_irq = armv6pmu_handle_irq, + .enable = armv6pmu_enable_event, + .disable = armv6pmu_disable_event, + .read_counter = armv6pmu_read_counter, + .write_counter = armv6pmu_write_counter, + .get_event_idx = armv6pmu_get_event_idx, + .start = armv6pmu_start, + .stop = armv6pmu_stop, + .cache_map = &armv6_perf_cache_map, + .event_map = &armv6_perf_map, + .raw_event_mask = 0xFF, + .num_events = 3, + .max_period = (1LLU << 32) - 1, +}; + +static const struct arm_pmu *__init armv6pmu_init(void) +{ + return &armv6pmu; +} + +/* + * ARMv6mpcore is almost identical to single core ARMv6 with the exception + * that some of the events have different enumerations and that there is no + * *hack* to stop the programmable counters. To stop the counters we simply + * disable the interrupt reporting and update the event. When unthrottling we + * reset the period and enable the interrupt reporting. + */ +static const struct arm_pmu armv6mpcore_pmu = { + .id = ARM_PERF_PMU_ID_V6MP, + .name = "v6mpcore", + .handle_irq = armv6pmu_handle_irq, + .enable = armv6pmu_enable_event, + .disable = armv6mpcore_pmu_disable_event, + .read_counter = armv6pmu_read_counter, + .write_counter = armv6pmu_write_counter, + .get_event_idx = armv6pmu_get_event_idx, + .start = armv6pmu_start, + .stop = armv6pmu_stop, + .cache_map = &armv6mpcore_perf_cache_map, + .event_map = &armv6mpcore_perf_map, + .raw_event_mask = 0xFF, + .num_events = 3, + .max_period = (1LLU << 32) - 1, +}; + +static const struct arm_pmu *__init armv6mpcore_pmu_init(void) +{ + return &armv6mpcore_pmu; +} +#else +static const struct arm_pmu *__init armv6pmu_init(void) +{ + return NULL; +} + +static const struct arm_pmu *__init armv6mpcore_pmu_init(void) +{ + return NULL; +} +#endif /* CONFIG_CPU_V6 */ diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c new file mode 100644 index 000000000000..2e1402556fa0 --- /dev/null +++ b/arch/arm/kernel/perf_event_v7.c @@ -0,0 +1,906 @@ +/* + * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code. + * + * ARMv7 support: Jean Pihet <jpihet@mvista.com> + * 2010 (c) MontaVista Software, LLC. + * + * Copied from ARMv6 code, with the low level code inspired + * by the ARMv7 Oprofile code. + * + * Cortex-A8 has up to 4 configurable performance counters and + * a single cycle counter. + * Cortex-A9 has up to 31 configurable performance counters and + * a single cycle counter. + * + * All counters can be enabled/disabled and IRQ masked separately. The cycle + * counter and all 4 performance counters together can be reset separately. + */ + +#ifdef CONFIG_CPU_V7 +/* Common ARMv7 event types */ +enum armv7_perf_types { + ARMV7_PERFCTR_PMNC_SW_INCR = 0x00, + ARMV7_PERFCTR_IFETCH_MISS = 0x01, + ARMV7_PERFCTR_ITLB_MISS = 0x02, + ARMV7_PERFCTR_DCACHE_REFILL = 0x03, + ARMV7_PERFCTR_DCACHE_ACCESS = 0x04, + ARMV7_PERFCTR_DTLB_REFILL = 0x05, + ARMV7_PERFCTR_DREAD = 0x06, + ARMV7_PERFCTR_DWRITE = 0x07, + + ARMV7_PERFCTR_EXC_TAKEN = 0x09, + ARMV7_PERFCTR_EXC_EXECUTED = 0x0A, + ARMV7_PERFCTR_CID_WRITE = 0x0B, + /* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS. + * It counts: + * - all branch instructions, + * - instructions that explicitly write the PC, + * - exception generating instructions. + */ + ARMV7_PERFCTR_PC_WRITE = 0x0C, + ARMV7_PERFCTR_PC_IMM_BRANCH = 0x0D, + ARMV7_PERFCTR_UNALIGNED_ACCESS = 0x0F, + ARMV7_PERFCTR_PC_BRANCH_MIS_PRED = 0x10, + ARMV7_PERFCTR_CLOCK_CYCLES = 0x11, + + ARMV7_PERFCTR_PC_BRANCH_MIS_USED = 0x12, + + ARMV7_PERFCTR_CPU_CYCLES = 0xFF +}; + +/* ARMv7 Cortex-A8 specific event types */ +enum armv7_a8_perf_types { + ARMV7_PERFCTR_INSTR_EXECUTED = 0x08, + + ARMV7_PERFCTR_PC_PROC_RETURN = 0x0E, + + ARMV7_PERFCTR_WRITE_BUFFER_FULL = 0x40, + ARMV7_PERFCTR_L2_STORE_MERGED = 0x41, + ARMV7_PERFCTR_L2_STORE_BUFF = 0x42, + ARMV7_PERFCTR_L2_ACCESS = 0x43, + ARMV7_PERFCTR_L2_CACH_MISS = 0x44, + ARMV7_PERFCTR_AXI_READ_CYCLES = 0x45, + ARMV7_PERFCTR_AXI_WRITE_CYCLES = 0x46, + ARMV7_PERFCTR_MEMORY_REPLAY = 0x47, + ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY = 0x48, + ARMV7_PERFCTR_L1_DATA_MISS = 0x49, + ARMV7_PERFCTR_L1_INST_MISS = 0x4A, + ARMV7_PERFCTR_L1_DATA_COLORING = 0x4B, + ARMV7_PERFCTR_L1_NEON_DATA = 0x4C, + ARMV7_PERFCTR_L1_NEON_CACH_DATA = 0x4D, + ARMV7_PERFCTR_L2_NEON = 0x4E, + ARMV7_PERFCTR_L2_NEON_HIT = 0x4F, + ARMV7_PERFCTR_L1_INST = 0x50, + ARMV7_PERFCTR_PC_RETURN_MIS_PRED = 0x51, + ARMV7_PERFCTR_PC_BRANCH_FAILED = 0x52, + ARMV7_PERFCTR_PC_BRANCH_TAKEN = 0x53, + ARMV7_PERFCTR_PC_BRANCH_EXECUTED = 0x54, + ARMV7_PERFCTR_OP_EXECUTED = 0x55, + ARMV7_PERFCTR_CYCLES_INST_STALL = 0x56, + ARMV7_PERFCTR_CYCLES_INST = 0x57, + ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL = 0x58, + ARMV7_PERFCTR_CYCLES_NEON_INST_STALL = 0x59, + ARMV7_PERFCTR_NEON_CYCLES = 0x5A, + + ARMV7_PERFCTR_PMU0_EVENTS = 0x70, + ARMV7_PERFCTR_PMU1_EVENTS = 0x71, + ARMV7_PERFCTR_PMU_EVENTS = 0x72, +}; + +/* ARMv7 Cortex-A9 specific event types */ +enum armv7_a9_perf_types { + ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC = 0x40, + ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC = 0x41, + ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC = 0x42, + + ARMV7_PERFCTR_COHERENT_LINE_MISS = 0x50, + ARMV7_PERFCTR_COHERENT_LINE_HIT = 0x51, + + ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES = 0x60, + ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES = 0x61, + ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES = 0x62, + ARMV7_PERFCTR_STREX_EXECUTED_PASSED = 0x63, + ARMV7_PERFCTR_STREX_EXECUTED_FAILED = 0x64, + ARMV7_PERFCTR_DATA_EVICTION = 0x65, + ARMV7_PERFCTR_ISSUE_STAGE_NO_INST = 0x66, + ARMV7_PERFCTR_ISSUE_STAGE_EMPTY = 0x67, + ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE = 0x68, + + ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS = 0x6E, + + ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST = 0x70, + ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST = 0x71, + ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST = 0x72, + ARMV7_PERFCTR_FP_EXECUTED_INST = 0x73, + ARMV7_PERFCTR_NEON_EXECUTED_INST = 0x74, + + ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES = 0x80, + ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES = 0x81, + ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES = 0x82, + ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES = 0x83, + ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES = 0x84, + ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES = 0x85, + ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES = 0x86, + + ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES = 0x8A, + ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES = 0x8B, + + ARMV7_PERFCTR_ISB_INST = 0x90, + ARMV7_PERFCTR_DSB_INST = 0x91, + ARMV7_PERFCTR_DMB_INST = 0x92, + ARMV7_PERFCTR_EXT_INTERRUPTS = 0x93, + + ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED = 0xA0, + ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED = 0xA1, + ARMV7_PERFCTR_PLE_FIFO_FLUSH = 0xA2, + ARMV7_PERFCTR_PLE_RQST_COMPLETED = 0xA3, + ARMV7_PERFCTR_PLE_FIFO_OVERFLOW = 0xA4, + ARMV7_PERFCTR_PLE_RQST_PROG = 0xA5 +}; + +/* + * Cortex-A8 HW events mapping + * + * The hardware events that we support. We do support cache operations but + * we have harvard caches and no way to combine instruction and data + * accesses/misses in hardware. + */ +static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, +}; + +static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + /* + * The performance counters don't differentiate between read + * and write accesses/misses so this isn't strictly correct, + * but it's the best we can do. Writes and reads get + * combined. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + /* + * Only ITLB misses and DTLB refills are supported. + * If users want the DTLB refills misses a raw counter + * must be used. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, + [C(RESULT_MISS)] + = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, + [C(RESULT_MISS)] + = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +/* + * Cortex-A9 HW events mapping + */ +static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = + ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_COHERENT_LINE_HIT, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_COHERENT_LINE_MISS, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, +}; + +static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + /* + * The performance counters don't differentiate between read + * and write accesses/misses so this isn't strictly correct, + * but it's the best we can do. Writes and reads get + * combined. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + /* + * Only ITLB misses and DTLB refills are supported. + * If users want the DTLB refills misses a raw counter + * must be used. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, + [C(RESULT_MISS)] + = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, + [C(RESULT_MISS)] + = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +/* + * Perf Events counters + */ +enum armv7_counters { + ARMV7_CYCLE_COUNTER = 1, /* Cycle counter */ + ARMV7_COUNTER0 = 2, /* First event counter */ +}; + +/* + * The cycle counter is ARMV7_CYCLE_COUNTER. + * The first event counter is ARMV7_COUNTER0. + * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1). + */ +#define ARMV7_COUNTER_LAST (ARMV7_COUNTER0 + armpmu->num_events - 1) + +/* + * ARMv7 low level PMNC access + */ + +/* + * Per-CPU PMNC: config reg + */ +#define ARMV7_PMNC_E (1 << 0) /* Enable all counters */ +#define ARMV7_PMNC_P (1 << 1) /* Reset all counters */ +#define ARMV7_PMNC_C (1 << 2) /* Cycle counter reset */ +#define ARMV7_PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */ +#define ARMV7_PMNC_X (1 << 4) /* Export to ETM */ +#define ARMV7_PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ +#define ARMV7_PMNC_N_SHIFT 11 /* Number of counters supported */ +#define ARMV7_PMNC_N_MASK 0x1f +#define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */ + +/* + * Available counters + */ +#define ARMV7_CNT0 0 /* First event counter */ +#define ARMV7_CCNT 31 /* Cycle counter */ + +/* Perf Event to low level counters mapping */ +#define ARMV7_EVENT_CNT_TO_CNTx (ARMV7_COUNTER0 - ARMV7_CNT0) + +/* + * CNTENS: counters enable reg + */ +#define ARMV7_CNTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) +#define ARMV7_CNTENS_C (1 << ARMV7_CCNT) + +/* + * CNTENC: counters disable reg + */ +#define ARMV7_CNTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) +#define ARMV7_CNTENC_C (1 << ARMV7_CCNT) + +/* + * INTENS: counters overflow interrupt enable reg + */ +#define ARMV7_INTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) +#define ARMV7_INTENS_C (1 << ARMV7_CCNT) + +/* + * INTENC: counters overflow interrupt disable reg + */ +#define ARMV7_INTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) +#define ARMV7_INTENC_C (1 << ARMV7_CCNT) + +/* + * EVTSEL: Event selection reg + */ +#define ARMV7_EVTSEL_MASK 0xff /* Mask for writable bits */ + +/* + * SELECT: Counter selection reg + */ +#define ARMV7_SELECT_MASK 0x1f /* Mask for writable bits */ + +/* + * FLAG: counters overflow flag status reg + */ +#define ARMV7_FLAG_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) +#define ARMV7_FLAG_C (1 << ARMV7_CCNT) +#define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */ +#define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK + +static inline unsigned long armv7_pmnc_read(void) +{ + u32 val; + asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val)); + return val; +} + +static inline void armv7_pmnc_write(unsigned long val) +{ + val &= ARMV7_PMNC_MASK; + asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val)); +} + +static inline int armv7_pmnc_has_overflowed(unsigned long pmnc) +{ + return pmnc & ARMV7_OVERFLOWED_MASK; +} + +static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc, + enum armv7_counters counter) +{ + int ret = 0; + + if (counter == ARMV7_CYCLE_COUNTER) + ret = pmnc & ARMV7_FLAG_C; + else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST)) + ret = pmnc & ARMV7_FLAG_P(counter); + else + pr_err("CPU%u checking wrong counter %d overflow status\n", + smp_processor_id(), counter); + + return ret; +} + +static inline int armv7_pmnc_select_counter(unsigned int idx) +{ + u32 val; + + if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) { + pr_err("CPU%u selecting wrong PMNC counter" + " %d\n", smp_processor_id(), idx); + return -1; + } + + val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK; + asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val)); + + return idx; +} + +static inline u32 armv7pmu_read_counter(int idx) +{ + unsigned long value = 0; + + if (idx == ARMV7_CYCLE_COUNTER) + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value)); + else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { + if (armv7_pmnc_select_counter(idx) == idx) + asm volatile("mrc p15, 0, %0, c9, c13, 2" + : "=r" (value)); + } else + pr_err("CPU%u reading wrong counter %d\n", + smp_processor_id(), idx); + + return value; +} + +static inline void armv7pmu_write_counter(int idx, u32 value) +{ + if (idx == ARMV7_CYCLE_COUNTER) + asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value)); + else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { + if (armv7_pmnc_select_counter(idx) == idx) + asm volatile("mcr p15, 0, %0, c9, c13, 2" + : : "r" (value)); + } else + pr_err("CPU%u writing wrong counter %d\n", + smp_processor_id(), idx); +} + +static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val) +{ + if (armv7_pmnc_select_counter(idx) == idx) { + val &= ARMV7_EVTSEL_MASK; + asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val)); + } +} + +static inline u32 armv7_pmnc_enable_counter(unsigned int idx) +{ + u32 val; + + if ((idx != ARMV7_CYCLE_COUNTER) && + ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { + pr_err("CPU%u enabling wrong PMNC counter" + " %d\n", smp_processor_id(), idx); + return -1; + } + + if (idx == ARMV7_CYCLE_COUNTER) + val = ARMV7_CNTENS_C; + else + val = ARMV7_CNTENS_P(idx); + + asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val)); + + return idx; +} + +static inline u32 armv7_pmnc_disable_counter(unsigned int idx) +{ + u32 val; + + + if ((idx != ARMV7_CYCLE_COUNTER) && + ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { + pr_err("CPU%u disabling wrong PMNC counter" + " %d\n", smp_processor_id(), idx); + return -1; + } + + if (idx == ARMV7_CYCLE_COUNTER) + val = ARMV7_CNTENC_C; + else + val = ARMV7_CNTENC_P(idx); + + asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val)); + + return idx; +} + +static inline u32 armv7_pmnc_enable_intens(unsigned int idx) +{ + u32 val; + + if ((idx != ARMV7_CYCLE_COUNTER) && + ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { + pr_err("CPU%u enabling wrong PMNC counter" + " interrupt enable %d\n", smp_processor_id(), idx); + return -1; + } + + if (idx == ARMV7_CYCLE_COUNTER) + val = ARMV7_INTENS_C; + else + val = ARMV7_INTENS_P(idx); + + asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val)); + + return idx; +} + +static inline u32 armv7_pmnc_disable_intens(unsigned int idx) +{ + u32 val; + + if ((idx != ARMV7_CYCLE_COUNTER) && + ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { + pr_err("CPU%u disabling wrong PMNC counter" + " interrupt enable %d\n", smp_processor_id(), idx); + return -1; + } + + if (idx == ARMV7_CYCLE_COUNTER) + val = ARMV7_INTENC_C; + else + val = ARMV7_INTENC_P(idx); + + asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val)); + + return idx; +} + +static inline u32 armv7_pmnc_getreset_flags(void) +{ + u32 val; + + /* Read */ + asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); + + /* Write to clear flags */ + val &= ARMV7_FLAG_MASK; + asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val)); + + return val; +} + +#ifdef DEBUG +static void armv7_pmnc_dump_regs(void) +{ + u32 val; + unsigned int cnt; + + printk(KERN_INFO "PMNC registers dump:\n"); + + asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val)); + printk(KERN_INFO "PMNC =0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val)); + printk(KERN_INFO "CNTENS=0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val)); + printk(KERN_INFO "INTENS=0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); + printk(KERN_INFO "FLAGS =0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val)); + printk(KERN_INFO "SELECT=0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val)); + printk(KERN_INFO "CCNT =0x%08x\n", val); + + for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) { + armv7_pmnc_select_counter(cnt); + asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val)); + printk(KERN_INFO "CNT[%d] count =0x%08x\n", + cnt-ARMV7_EVENT_CNT_TO_CNTx, val); + asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val)); + printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n", + cnt-ARMV7_EVENT_CNT_TO_CNTx, val); + } +} +#endif + +static void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long flags; + + /* + * Enable counter and interrupt, and set the counter to count + * the event that we're interested in. + */ + raw_spin_lock_irqsave(&pmu_lock, flags); + + /* + * Disable counter + */ + armv7_pmnc_disable_counter(idx); + + /* + * Set event (if destined for PMNx counters) + * We don't need to set the event if it's a cycle count + */ + if (idx != ARMV7_CYCLE_COUNTER) + armv7_pmnc_write_evtsel(idx, hwc->config_base); + + /* + * Enable interrupt for this counter + */ + armv7_pmnc_enable_intens(idx); + + /* + * Enable counter + */ + armv7_pmnc_enable_counter(idx); + + raw_spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long flags; + + /* + * Disable counter and interrupt + */ + raw_spin_lock_irqsave(&pmu_lock, flags); + + /* + * Disable counter + */ + armv7_pmnc_disable_counter(idx); + + /* + * Disable interrupt for this counter + */ + armv7_pmnc_disable_intens(idx); + + raw_spin_unlock_irqrestore(&pmu_lock, flags); +} + +static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) +{ + unsigned long pmnc; + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct pt_regs *regs; + int idx; + + /* + * Get and reset the IRQ flags + */ + pmnc = armv7_pmnc_getreset_flags(); + + /* + * Did an overflow occur? + */ + if (!armv7_pmnc_has_overflowed(pmnc)) + return IRQ_NONE; + + /* + * Handle the counter(s) overflow(s) + */ + regs = get_irq_regs(); + + perf_sample_data_init(&data, 0); + + cpuc = &__get_cpu_var(cpu_hw_events); + for (idx = 0; idx <= armpmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + + /* + * We have a single interrupt for all counters. Check that + * each counter has overflowed before we process it. + */ + if (!armv7_pmnc_counter_has_overflowed(pmnc, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event, hwc, idx); + data.period = event->hw.last_period; + if (!armpmu_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, 0, &data, regs)) + armpmu->disable(hwc, idx); + } + + /* + * Handle the pending perf events. + * + * Note: this call *must* be run with interrupts disabled. For + * platforms that can have the PMU interrupts raised as an NMI, this + * will not work. + */ + irq_work_run(); + + return IRQ_HANDLED; +} + +static void armv7pmu_start(void) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&pmu_lock, flags); + /* Enable all counters */ + armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E); + raw_spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void armv7pmu_stop(void) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&pmu_lock, flags); + /* Disable all counters */ + armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E); + raw_spin_unlock_irqrestore(&pmu_lock, flags); +} + +static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc, + struct hw_perf_event *event) +{ + int idx; + + /* Always place a cycle counter into the cycle counter. */ + if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) { + if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask)) + return -EAGAIN; + + return ARMV7_CYCLE_COUNTER; + } else { + /* + * For anything other than a cycle counter, try and use + * the events counters + */ + for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) { + if (!test_and_set_bit(idx, cpuc->used_mask)) + return idx; + } + + /* The counters are all in use. */ + return -EAGAIN; + } +} + +static struct arm_pmu armv7pmu = { + .handle_irq = armv7pmu_handle_irq, + .enable = armv7pmu_enable_event, + .disable = armv7pmu_disable_event, + .read_counter = armv7pmu_read_counter, + .write_counter = armv7pmu_write_counter, + .get_event_idx = armv7pmu_get_event_idx, + .start = armv7pmu_start, + .stop = armv7pmu_stop, + .raw_event_mask = 0xFF, + .max_period = (1LLU << 32) - 1, +}; + +static u32 __init armv7_reset_read_pmnc(void) +{ + u32 nb_cnt; + + /* Initialize & Reset PMNC: C and P bits */ + armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C); + + /* Read the nb of CNTx counters supported from PMNC */ + nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK; + + /* Add the CPU cycles counter and return */ + return nb_cnt + 1; +} + +static const struct arm_pmu *__init armv7_a8_pmu_init(void) +{ + armv7pmu.id = ARM_PERF_PMU_ID_CA8; + armv7pmu.name = "ARMv7 Cortex-A8"; + armv7pmu.cache_map = &armv7_a8_perf_cache_map; + armv7pmu.event_map = &armv7_a8_perf_map; + armv7pmu.num_events = armv7_reset_read_pmnc(); + return &armv7pmu; +} + +static const struct arm_pmu *__init armv7_a9_pmu_init(void) +{ + armv7pmu.id = ARM_PERF_PMU_ID_CA9; + armv7pmu.name = "ARMv7 Cortex-A9"; + armv7pmu.cache_map = &armv7_a9_perf_cache_map; + armv7pmu.event_map = &armv7_a9_perf_map; + armv7pmu.num_events = armv7_reset_read_pmnc(); + return &armv7pmu; +} +#else +static const struct arm_pmu *__init armv7_a8_pmu_init(void) +{ + return NULL; +} + +static const struct arm_pmu *__init armv7_a9_pmu_init(void) +{ + return NULL; +} +#endif /* CONFIG_CPU_V7 */ diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c new file mode 100644 index 000000000000..28cd3b025bc3 --- /dev/null +++ b/arch/arm/kernel/perf_event_xscale.c @@ -0,0 +1,807 @@ +/* + * ARMv5 [xscale] Performance counter handling code. + * + * Copyright (C) 2010, ARM Ltd., Will Deacon <will.deacon@arm.com> + * + * Based on the previous xscale OProfile code. + * + * There are two variants of the xscale PMU that we support: + * - xscale1pmu: 2 event counters and a cycle counter + * - xscale2pmu: 4 event counters and a cycle counter + * The two variants share event definitions, but have different + * PMU structures. + */ + +#ifdef CONFIG_CPU_XSCALE +enum xscale_perf_types { + XSCALE_PERFCTR_ICACHE_MISS = 0x00, + XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01, + XSCALE_PERFCTR_DATA_STALL = 0x02, + XSCALE_PERFCTR_ITLB_MISS = 0x03, + XSCALE_PERFCTR_DTLB_MISS = 0x04, + XSCALE_PERFCTR_BRANCH = 0x05, + XSCALE_PERFCTR_BRANCH_MISS = 0x06, + XSCALE_PERFCTR_INSTRUCTION = 0x07, + XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08, + XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09, + XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A, + XSCALE_PERFCTR_DCACHE_MISS = 0x0B, + XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C, + XSCALE_PERFCTR_PC_CHANGED = 0x0D, + XSCALE_PERFCTR_BCU_REQUEST = 0x10, + XSCALE_PERFCTR_BCU_FULL = 0x11, + XSCALE_PERFCTR_BCU_DRAIN = 0x12, + XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14, + XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15, + XSCALE_PERFCTR_RMW = 0x16, + /* XSCALE_PERFCTR_CCNT is not hardware defined */ + XSCALE_PERFCTR_CCNT = 0xFE, + XSCALE_PERFCTR_UNUSED = 0xFF, +}; + +enum xscale_counters { + XSCALE_CYCLE_COUNTER = 1, + XSCALE_COUNTER0, + XSCALE_COUNTER1, + XSCALE_COUNTER2, + XSCALE_COUNTER3, +}; + +static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT, + [PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION, + [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH, + [PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS, + [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, +}; + +static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +#define XSCALE_PMU_ENABLE 0x001 +#define XSCALE_PMN_RESET 0x002 +#define XSCALE_CCNT_RESET 0x004 +#define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET) +#define XSCALE_PMU_CNT64 0x008 + +#define XSCALE1_OVERFLOWED_MASK 0x700 +#define XSCALE1_CCOUNT_OVERFLOW 0x400 +#define XSCALE1_COUNT0_OVERFLOW 0x100 +#define XSCALE1_COUNT1_OVERFLOW 0x200 +#define XSCALE1_CCOUNT_INT_EN 0x040 +#define XSCALE1_COUNT0_INT_EN 0x010 +#define XSCALE1_COUNT1_INT_EN 0x020 +#define XSCALE1_COUNT0_EVT_SHFT 12 +#define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT) +#define XSCALE1_COUNT1_EVT_SHFT 20 +#define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT) + +static inline u32 +xscale1pmu_read_pmnc(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val)); + return val; +} + +static inline void +xscale1pmu_write_pmnc(u32 val) +{ + /* upper 4bits and 7, 11 are write-as-0 */ + val &= 0xffff77f; + asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val)); +} + +static inline int +xscale1_pmnc_counter_has_overflowed(unsigned long pmnc, + enum xscale_counters counter) +{ + int ret = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + ret = pmnc & XSCALE1_CCOUNT_OVERFLOW; + break; + case XSCALE_COUNTER0: + ret = pmnc & XSCALE1_COUNT0_OVERFLOW; + break; + case XSCALE_COUNTER1: + ret = pmnc & XSCALE1_COUNT1_OVERFLOW; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + } + + return ret; +} + +static irqreturn_t +xscale1pmu_handle_irq(int irq_num, void *dev) +{ + unsigned long pmnc; + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct pt_regs *regs; + int idx; + + /* + * NOTE: there's an A stepping erratum that states if an overflow + * bit already exists and another occurs, the previous + * Overflow bit gets cleared. There's no workaround. + * Fixed in B stepping or later. + */ + pmnc = xscale1pmu_read_pmnc(); + + /* + * Write the value back to clear the overflow flags. Overflow + * flags remain in pmnc for use below. We also disable the PMU + * while we process the interrupt. + */ + xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); + + if (!(pmnc & XSCALE1_OVERFLOWED_MASK)) + return IRQ_NONE; + + regs = get_irq_regs(); + + perf_sample_data_init(&data, 0); + + cpuc = &__get_cpu_var(cpu_hw_events); + for (idx = 0; idx <= armpmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + + if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event, hwc, idx); + data.period = event->hw.last_period; + if (!armpmu_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, 0, &data, regs)) + armpmu->disable(hwc, idx); + } + + irq_work_run(); + + /* + * Re-enable the PMU. + */ + pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE; + xscale1pmu_write_pmnc(pmnc); + + return IRQ_HANDLED; +} + +static void +xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long val, mask, evt, flags; + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + mask = 0; + evt = XSCALE1_CCOUNT_INT_EN; + break; + case XSCALE_COUNTER0: + mask = XSCALE1_COUNT0_EVT_MASK; + evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) | + XSCALE1_COUNT0_INT_EN; + break; + case XSCALE_COUNTER1: + mask = XSCALE1_COUNT1_EVT_MASK; + evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) | + XSCALE1_COUNT1_INT_EN; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + raw_spin_lock_irqsave(&pmu_lock, flags); + val = xscale1pmu_read_pmnc(); + val &= ~mask; + val |= evt; + xscale1pmu_write_pmnc(val); + raw_spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long val, mask, evt, flags; + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + mask = XSCALE1_CCOUNT_INT_EN; + evt = 0; + break; + case XSCALE_COUNTER0: + mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK; + evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT; + break; + case XSCALE_COUNTER1: + mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK; + evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + raw_spin_lock_irqsave(&pmu_lock, flags); + val = xscale1pmu_read_pmnc(); + val &= ~mask; + val |= evt; + xscale1pmu_write_pmnc(val); + raw_spin_unlock_irqrestore(&pmu_lock, flags); +} + +static int +xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc, + struct hw_perf_event *event) +{ + if (XSCALE_PERFCTR_CCNT == event->config_base) { + if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask)) + return -EAGAIN; + + return XSCALE_CYCLE_COUNTER; + } else { + if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask)) + return XSCALE_COUNTER1; + + if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask)) + return XSCALE_COUNTER0; + + return -EAGAIN; + } +} + +static void +xscale1pmu_start(void) +{ + unsigned long flags, val; + + raw_spin_lock_irqsave(&pmu_lock, flags); + val = xscale1pmu_read_pmnc(); + val |= XSCALE_PMU_ENABLE; + xscale1pmu_write_pmnc(val); + raw_spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +xscale1pmu_stop(void) +{ + unsigned long flags, val; + + raw_spin_lock_irqsave(&pmu_lock, flags); + val = xscale1pmu_read_pmnc(); + val &= ~XSCALE_PMU_ENABLE; + xscale1pmu_write_pmnc(val); + raw_spin_unlock_irqrestore(&pmu_lock, flags); +} + +static inline u32 +xscale1pmu_read_counter(int counter) +{ + u32 val = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val)); + break; + } + + return val; +} + +static inline void +xscale1pmu_write_counter(int counter, u32 val) +{ + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val)); + break; + } +} + +static const struct arm_pmu xscale1pmu = { + .id = ARM_PERF_PMU_ID_XSCALE1, + .name = "xscale1", + .handle_irq = xscale1pmu_handle_irq, + .enable = xscale1pmu_enable_event, + .disable = xscale1pmu_disable_event, + .read_counter = xscale1pmu_read_counter, + .write_counter = xscale1pmu_write_counter, + .get_event_idx = xscale1pmu_get_event_idx, + .start = xscale1pmu_start, + .stop = xscale1pmu_stop, + .cache_map = &xscale_perf_cache_map, + .event_map = &xscale_perf_map, + .raw_event_mask = 0xFF, + .num_events = 3, + .max_period = (1LLU << 32) - 1, +}; + +static const struct arm_pmu *__init xscale1pmu_init(void) +{ + return &xscale1pmu; +} + +#define XSCALE2_OVERFLOWED_MASK 0x01f +#define XSCALE2_CCOUNT_OVERFLOW 0x001 +#define XSCALE2_COUNT0_OVERFLOW 0x002 +#define XSCALE2_COUNT1_OVERFLOW 0x004 +#define XSCALE2_COUNT2_OVERFLOW 0x008 +#define XSCALE2_COUNT3_OVERFLOW 0x010 +#define XSCALE2_CCOUNT_INT_EN 0x001 +#define XSCALE2_COUNT0_INT_EN 0x002 +#define XSCALE2_COUNT1_INT_EN 0x004 +#define XSCALE2_COUNT2_INT_EN 0x008 +#define XSCALE2_COUNT3_INT_EN 0x010 +#define XSCALE2_COUNT0_EVT_SHFT 0 +#define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT) +#define XSCALE2_COUNT1_EVT_SHFT 8 +#define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT) +#define XSCALE2_COUNT2_EVT_SHFT 16 +#define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT) +#define XSCALE2_COUNT3_EVT_SHFT 24 +#define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT) + +static inline u32 +xscale2pmu_read_pmnc(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val)); + /* bits 1-2 and 4-23 are read-unpredictable */ + return val & 0xff000009; +} + +static inline void +xscale2pmu_write_pmnc(u32 val) +{ + /* bits 4-23 are write-as-0, 24-31 are write ignored */ + val &= 0xf; + asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val)); +} + +static inline u32 +xscale2pmu_read_overflow_flags(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val)); + return val; +} + +static inline void +xscale2pmu_write_overflow_flags(u32 val) +{ + asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val)); +} + +static inline u32 +xscale2pmu_read_event_select(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val)); + return val; +} + +static inline void +xscale2pmu_write_event_select(u32 val) +{ + asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val)); +} + +static inline u32 +xscale2pmu_read_int_enable(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val)); + return val; +} + +static void +xscale2pmu_write_int_enable(u32 val) +{ + asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val)); +} + +static inline int +xscale2_pmnc_counter_has_overflowed(unsigned long of_flags, + enum xscale_counters counter) +{ + int ret = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + ret = of_flags & XSCALE2_CCOUNT_OVERFLOW; + break; + case XSCALE_COUNTER0: + ret = of_flags & XSCALE2_COUNT0_OVERFLOW; + break; + case XSCALE_COUNTER1: + ret = of_flags & XSCALE2_COUNT1_OVERFLOW; + break; + case XSCALE_COUNTER2: + ret = of_flags & XSCALE2_COUNT2_OVERFLOW; + break; + case XSCALE_COUNTER3: + ret = of_flags & XSCALE2_COUNT3_OVERFLOW; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + } + + return ret; +} + +static irqreturn_t +xscale2pmu_handle_irq(int irq_num, void *dev) +{ + unsigned long pmnc, of_flags; + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct pt_regs *regs; + int idx; + + /* Disable the PMU. */ + pmnc = xscale2pmu_read_pmnc(); + xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); + + /* Check the overflow flag register. */ + of_flags = xscale2pmu_read_overflow_flags(); + if (!(of_flags & XSCALE2_OVERFLOWED_MASK)) + return IRQ_NONE; + + /* Clear the overflow bits. */ + xscale2pmu_write_overflow_flags(of_flags); + + regs = get_irq_regs(); + + perf_sample_data_init(&data, 0); + + cpuc = &__get_cpu_var(cpu_hw_events); + for (idx = 0; idx <= armpmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + + if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event, hwc, idx); + data.period = event->hw.last_period; + if (!armpmu_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, 0, &data, regs)) + armpmu->disable(hwc, idx); + } + + irq_work_run(); + + /* + * Re-enable the PMU. + */ + pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE; + xscale2pmu_write_pmnc(pmnc); + + return IRQ_HANDLED; +} + +static void +xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long flags, ien, evtsel; + + ien = xscale2pmu_read_int_enable(); + evtsel = xscale2pmu_read_event_select(); + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + ien |= XSCALE2_CCOUNT_INT_EN; + break; + case XSCALE_COUNTER0: + ien |= XSCALE2_COUNT0_INT_EN; + evtsel &= ~XSCALE2_COUNT0_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT; + break; + case XSCALE_COUNTER1: + ien |= XSCALE2_COUNT1_INT_EN; + evtsel &= ~XSCALE2_COUNT1_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT; + break; + case XSCALE_COUNTER2: + ien |= XSCALE2_COUNT2_INT_EN; + evtsel &= ~XSCALE2_COUNT2_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT; + break; + case XSCALE_COUNTER3: + ien |= XSCALE2_COUNT3_INT_EN; + evtsel &= ~XSCALE2_COUNT3_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + raw_spin_lock_irqsave(&pmu_lock, flags); + xscale2pmu_write_event_select(evtsel); + xscale2pmu_write_int_enable(ien); + raw_spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long flags, ien, evtsel; + + ien = xscale2pmu_read_int_enable(); + evtsel = xscale2pmu_read_event_select(); + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + ien &= ~XSCALE2_CCOUNT_INT_EN; + break; + case XSCALE_COUNTER0: + ien &= ~XSCALE2_COUNT0_INT_EN; + evtsel &= ~XSCALE2_COUNT0_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT; + break; + case XSCALE_COUNTER1: + ien &= ~XSCALE2_COUNT1_INT_EN; + evtsel &= ~XSCALE2_COUNT1_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT; + break; + case XSCALE_COUNTER2: + ien &= ~XSCALE2_COUNT2_INT_EN; + evtsel &= ~XSCALE2_COUNT2_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT; + break; + case XSCALE_COUNTER3: + ien &= ~XSCALE2_COUNT3_INT_EN; + evtsel &= ~XSCALE2_COUNT3_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + raw_spin_lock_irqsave(&pmu_lock, flags); + xscale2pmu_write_event_select(evtsel); + xscale2pmu_write_int_enable(ien); + raw_spin_unlock_irqrestore(&pmu_lock, flags); +} + +static int +xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc, + struct hw_perf_event *event) +{ + int idx = xscale1pmu_get_event_idx(cpuc, event); + if (idx >= 0) + goto out; + + if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask)) + idx = XSCALE_COUNTER3; + else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask)) + idx = XSCALE_COUNTER2; +out: + return idx; +} + +static void +xscale2pmu_start(void) +{ + unsigned long flags, val; + + raw_spin_lock_irqsave(&pmu_lock, flags); + val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64; + val |= XSCALE_PMU_ENABLE; + xscale2pmu_write_pmnc(val); + raw_spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +xscale2pmu_stop(void) +{ + unsigned long flags, val; + + raw_spin_lock_irqsave(&pmu_lock, flags); + val = xscale2pmu_read_pmnc(); + val &= ~XSCALE_PMU_ENABLE; + xscale2pmu_write_pmnc(val); + raw_spin_unlock_irqrestore(&pmu_lock, flags); +} + +static inline u32 +xscale2pmu_read_counter(int counter) +{ + u32 val = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val)); + break; + case XSCALE_COUNTER2: + asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val)); + break; + case XSCALE_COUNTER3: + asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val)); + break; + } + + return val; +} + +static inline void +xscale2pmu_write_counter(int counter, u32 val) +{ + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val)); + break; + case XSCALE_COUNTER2: + asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val)); + break; + case XSCALE_COUNTER3: + asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val)); + break; + } +} + +static const struct arm_pmu xscale2pmu = { + .id = ARM_PERF_PMU_ID_XSCALE2, + .name = "xscale2", + .handle_irq = xscale2pmu_handle_irq, + .enable = xscale2pmu_enable_event, + .disable = xscale2pmu_disable_event, + .read_counter = xscale2pmu_read_counter, + .write_counter = xscale2pmu_write_counter, + .get_event_idx = xscale2pmu_get_event_idx, + .start = xscale2pmu_start, + .stop = xscale2pmu_stop, + .cache_map = &xscale_perf_cache_map, + .event_map = &xscale_perf_map, + .raw_event_mask = 0xFF, + .num_events = 5, + .max_period = (1LLU << 32) - 1, +}; + +static const struct arm_pmu *__init xscale2pmu_init(void) +{ + return &xscale2pmu; +} +#else +static const struct arm_pmu *__init xscale1pmu_init(void) +{ + return NULL; +} + +static const struct arm_pmu *__init xscale2pmu_init(void) +{ + return NULL; +} +#endif /* CONFIG_CPU_XSCALE */ diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 336f14e0e5c2..8075e592f902 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -75,9 +75,9 @@ extern void reboot_setup(char *str); unsigned int processor_id; EXPORT_SYMBOL(processor_id); -unsigned int __machine_arch_type; +unsigned int __machine_arch_type __read_mostly; EXPORT_SYMBOL(__machine_arch_type); -unsigned int cacheid; +unsigned int cacheid __read_mostly; EXPORT_SYMBOL(cacheid); unsigned int __atags_pointer __initdata; @@ -91,24 +91,24 @@ EXPORT_SYMBOL(system_serial_low); unsigned int system_serial_high; EXPORT_SYMBOL(system_serial_high); -unsigned int elf_hwcap; +unsigned int elf_hwcap __read_mostly; EXPORT_SYMBOL(elf_hwcap); #ifdef MULTI_CPU -struct processor processor; +struct processor processor __read_mostly; #endif #ifdef MULTI_TLB -struct cpu_tlb_fns cpu_tlb; +struct cpu_tlb_fns cpu_tlb __read_mostly; #endif #ifdef MULTI_USER -struct cpu_user_fns cpu_user; +struct cpu_user_fns cpu_user __read_mostly; #endif #ifdef MULTI_CACHE -struct cpu_cache_fns cpu_cache; +struct cpu_cache_fns cpu_cache __read_mostly; #endif #ifdef CONFIG_OUTER_CACHE -struct outer_cache_fns outer_cache; +struct outer_cache_fns outer_cache __read_mostly; EXPORT_SYMBOL(outer_cache); #endif diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 8c1959590252..c434fa2dcbda 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -16,6 +16,7 @@ #include <linux/cache.h> #include <linux/profile.h> #include <linux/errno.h> +#include <linux/ftrace.h> #include <linux/mm.h> #include <linux/err.h> #include <linux/cpu.h> @@ -24,6 +25,7 @@ #include <linux/irq.h> #include <linux/percpu.h> #include <linux/clockchips.h> +#include <linux/completion.h> #include <asm/atomic.h> #include <asm/cacheflush.h> @@ -46,64 +48,14 @@ */ struct secondary_data secondary_data; -/* - * structures for inter-processor calls - * - A collection of single bit ipi messages. - */ -struct ipi_data { - spinlock_t lock; - unsigned long ipi_count; - unsigned long bits; -}; - -static DEFINE_PER_CPU(struct ipi_data, ipi_data) = { - .lock = SPIN_LOCK_UNLOCKED, -}; - enum ipi_msg_type { - IPI_TIMER, + IPI_TIMER = 2, IPI_RESCHEDULE, IPI_CALL_FUNC, IPI_CALL_FUNC_SINGLE, IPI_CPU_STOP, }; -static inline void identity_mapping_add(pgd_t *pgd, unsigned long start, - unsigned long end) -{ - unsigned long addr, prot; - pmd_t *pmd; - - prot = PMD_TYPE_SECT | PMD_SECT_AP_WRITE; - if (cpu_architecture() <= CPU_ARCH_ARMv5TEJ && !cpu_is_xscale()) - prot |= PMD_BIT4; - - for (addr = start & PGDIR_MASK; addr < end;) { - pmd = pmd_offset(pgd + pgd_index(addr), addr); - pmd[0] = __pmd(addr | prot); - addr += SECTION_SIZE; - pmd[1] = __pmd(addr | prot); - addr += SECTION_SIZE; - flush_pmd_entry(pmd); - outer_clean_range(__pa(pmd), __pa(pmd + 1)); - } -} - -static inline void identity_mapping_del(pgd_t *pgd, unsigned long start, - unsigned long end) -{ - unsigned long addr; - pmd_t *pmd; - - for (addr = start & PGDIR_MASK; addr < end; addr += PGDIR_SIZE) { - pmd = pmd_offset(pgd + pgd_index(addr), addr); - pmd[0] = __pmd(0); - pmd[1] = __pmd(0); - clean_pmd_entry(pmd); - outer_clean_range(__pa(pmd), __pa(pmd + 1)); - } -} - int __cpuinit __cpu_up(unsigned int cpu) { struct cpuinfo_arm *ci = &per_cpu(cpu_data, cpu); @@ -252,12 +204,20 @@ int __cpu_disable(void) return 0; } +static DECLARE_COMPLETION(cpu_died); + /* * called on the thread which is asking for a CPU to be shutdown - * waits until shutdown has completed, or it is timed out. */ void __cpu_die(unsigned int cpu) { + if (!wait_for_completion_timeout(&cpu_died, 5000)) { + pr_err("CPU%u: cpu didn't die\n", cpu); + return; + } + printk(KERN_NOTICE "CPU%u: shutdown\n", cpu); + if (!platform_cpu_kill(cpu)) printk("CPU%u: unable to kill\n", cpu); } @@ -274,12 +234,17 @@ void __ref cpu_die(void) { unsigned int cpu = smp_processor_id(); - local_irq_disable(); idle_task_exit(); + local_irq_disable(); + mb(); + + /* Tell __cpu_die() that this CPU is now safe to dispose of */ + complete(&cpu_died); + /* * actual CPU shutdown procedure is at least platform (if not - * CPU) specific + * CPU) specific. */ platform_cpu_die(cpu); @@ -296,6 +261,17 @@ void __ref cpu_die(void) #endif /* CONFIG_HOTPLUG_CPU */ /* + * Called by both boot and secondaries to move global data into + * per-processor storage. + */ +static void __cpuinit smp_store_cpu_info(unsigned int cpuid) +{ + struct cpuinfo_arm *cpu_info = &per_cpu(cpu_data, cpuid); + + cpu_info->loops_per_jiffy = loops_per_jiffy; +} + +/* * This is the secondary CPU boot entry. We're using this CPUs * idle thread stack, but a set of temporary page tables. */ @@ -320,6 +296,7 @@ asmlinkage void __cpuinit secondary_start_kernel(void) cpu_init(); preempt_disable(); + trace_hardirqs_off(); /* * Give the platform a chance to do its own initialisation. @@ -353,17 +330,6 @@ asmlinkage void __cpuinit secondary_start_kernel(void) cpu_idle(); } -/* - * Called by both boot and secondaries to move global data into - * per-processor storage. - */ -void __cpuinit smp_store_cpu_info(unsigned int cpuid) -{ - struct cpuinfo_arm *cpu_info = &per_cpu(cpu_data, cpuid); - - cpu_info->loops_per_jiffy = loops_per_jiffy; -} - void __init smp_cpus_done(unsigned int max_cpus) { int cpu; @@ -386,27 +352,39 @@ void __init smp_prepare_boot_cpu(void) per_cpu(cpu_data, cpu).idle = current; } -static void send_ipi_message(const struct cpumask *mask, enum ipi_msg_type msg) +void __init smp_prepare_cpus(unsigned int max_cpus) { - unsigned long flags; - unsigned int cpu; + unsigned int ncores = num_possible_cpus(); + + smp_store_cpu_info(smp_processor_id()); - local_irq_save(flags); + /* + * are we trying to boot more cores than exist? + */ + if (max_cpus > ncores) + max_cpus = ncores; - for_each_cpu(cpu, mask) { - struct ipi_data *ipi = &per_cpu(ipi_data, cpu); + if (max_cpus > 1) { + /* + * Enable the local timer or broadcast device for the + * boot CPU, but only if we have more than one CPU. + */ + percpu_timer_setup(); - spin_lock(&ipi->lock); - ipi->bits |= 1 << msg; - spin_unlock(&ipi->lock); + /* + * Initialise the SCU if there are more than one CPU + * and let them know where to start. + */ + platform_smp_prepare_cpus(max_cpus); } +} +static void send_ipi_message(const struct cpumask *mask, enum ipi_msg_type msg) +{ /* * Call the platform specific cross-CPU call function. */ - smp_cross_call(mask); - - local_irq_restore(flags); + smp_cross_call(mask, msg); } void arch_send_call_function_ipi_mask(const struct cpumask *mask) @@ -419,28 +397,43 @@ void arch_send_call_function_single_ipi(int cpu) send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE); } -void show_ipi_list(struct seq_file *p) +static const char *ipi_types[NR_IPI] = { +#define S(x,s) [x - IPI_TIMER] = s + S(IPI_TIMER, "Timer broadcast interrupts"), + S(IPI_RESCHEDULE, "Rescheduling interrupts"), + S(IPI_CALL_FUNC, "Function call interrupts"), + S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"), + S(IPI_CPU_STOP, "CPU stop interrupts"), +}; + +void show_ipi_list(struct seq_file *p, int prec) { - unsigned int cpu; + unsigned int cpu, i; - seq_puts(p, "IPI:"); + for (i = 0; i < NR_IPI; i++) { + seq_printf(p, "%*s%u: ", prec - 1, "IPI", i); - for_each_present_cpu(cpu) - seq_printf(p, " %10lu", per_cpu(ipi_data, cpu).ipi_count); + for_each_present_cpu(cpu) + seq_printf(p, "%10u ", + __get_irq_stat(cpu, ipi_irqs[i])); - seq_putc(p, '\n'); + seq_printf(p, " %s\n", ipi_types[i]); + } } -void show_local_irqs(struct seq_file *p) +u64 smp_irq_stat_cpu(unsigned int cpu) { - unsigned int cpu; + u64 sum = 0; + int i; - seq_printf(p, "LOC: "); + for (i = 0; i < NR_IPI; i++) + sum += __get_irq_stat(cpu, ipi_irqs[i]); - for_each_present_cpu(cpu) - seq_printf(p, "%10u ", irq_stat[cpu].local_timer_irqs); +#ifdef CONFIG_LOCAL_TIMERS + sum += __get_irq_stat(cpu, local_timer_irqs); +#endif - seq_putc(p, '\n'); + return sum; } /* @@ -457,18 +450,30 @@ static void ipi_timer(void) } #ifdef CONFIG_LOCAL_TIMERS -asmlinkage void __exception do_local_timer(struct pt_regs *regs) +asmlinkage void __exception_irq_entry do_local_timer(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); int cpu = smp_processor_id(); if (local_timer_ack()) { - irq_stat[cpu].local_timer_irqs++; + __inc_irq_stat(cpu, local_timer_irqs); ipi_timer(); } set_irq_regs(old_regs); } + +void show_local_irqs(struct seq_file *p, int prec) +{ + unsigned int cpu; + + seq_printf(p, "%*s: ", prec, "LOC"); + + for_each_present_cpu(cpu) + seq_printf(p, "%10u ", __get_irq_stat(cpu, local_timer_irqs)); + + seq_printf(p, " Local timer interrupts\n"); +} #endif #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST @@ -537,71 +542,44 @@ static void ipi_cpu_stop(unsigned int cpu) /* * Main handler for inter-processor interrupts - * - * For ARM, the ipimask now only identifies a single - * category of IPI (Bit 1 IPIs have been replaced by a - * different mechanism): - * - * Bit 0 - Inter-processor function call */ -asmlinkage void __exception do_IPI(struct pt_regs *regs) +asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs) { unsigned int cpu = smp_processor_id(); - struct ipi_data *ipi = &per_cpu(ipi_data, cpu); struct pt_regs *old_regs = set_irq_regs(regs); - ipi->ipi_count++; - - for (;;) { - unsigned long msgs; - - spin_lock(&ipi->lock); - msgs = ipi->bits; - ipi->bits = 0; - spin_unlock(&ipi->lock); + if (ipinr >= IPI_TIMER && ipinr < IPI_TIMER + NR_IPI) + __inc_irq_stat(cpu, ipi_irqs[ipinr - IPI_TIMER]); - if (!msgs) - break; - - do { - unsigned nextmsg; - - nextmsg = msgs & -msgs; - msgs &= ~nextmsg; - nextmsg = ffz(~nextmsg); - - switch (nextmsg) { - case IPI_TIMER: - ipi_timer(); - break; + switch (ipinr) { + case IPI_TIMER: + ipi_timer(); + break; - case IPI_RESCHEDULE: - /* - * nothing more to do - eveything is - * done on the interrupt return path - */ - break; + case IPI_RESCHEDULE: + /* + * nothing more to do - eveything is + * done on the interrupt return path + */ + break; - case IPI_CALL_FUNC: - generic_smp_call_function_interrupt(); - break; + case IPI_CALL_FUNC: + generic_smp_call_function_interrupt(); + break; - case IPI_CALL_FUNC_SINGLE: - generic_smp_call_function_single_interrupt(); - break; + case IPI_CALL_FUNC_SINGLE: + generic_smp_call_function_single_interrupt(); + break; - case IPI_CPU_STOP: - ipi_cpu_stop(cpu); - break; + case IPI_CPU_STOP: + ipi_cpu_stop(cpu); + break; - default: - printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%x\n", - cpu, nextmsg); - break; - } - } while (msgs); + default: + printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%x\n", + cpu, ipinr); + break; } - set_irq_regs(old_regs); } @@ -612,10 +590,22 @@ void smp_send_reschedule(int cpu) void smp_send_stop(void) { - cpumask_t mask = cpu_online_map; - cpu_clear(smp_processor_id(), mask); - if (!cpus_empty(mask)) + unsigned long timeout; + + if (num_online_cpus() > 1) { + cpumask_t mask = cpu_online_map; + cpu_clear(smp_processor_id(), mask); + send_ipi_message(&mask, IPI_CPU_STOP); + } + + /* Wait up to one second for other CPUs to stop */ + timeout = USEC_PER_SEC; + while (num_online_cpus() > 1 && timeout--) + udelay(1); + + if (num_online_cpus() > 1) + pr_warning("SMP: failed to stop secondary CPUs\n"); } /* diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c index 35882fbf37f9..948d14944260 100644 --- a/arch/arm/kernel/smp_twd.c +++ b/arch/arm/kernel/smp_twd.c @@ -143,10 +143,7 @@ void __cpuinit twd_timer_setup(struct clock_event_device *clk) clk->min_delta_ns = clockevent_delta2ns(0xf, clk); /* Make sure our local interrupt controller has this enabled */ - local_irq_save(flags); - irq_to_desc(clk->irq)->status |= IRQ_NOPROBE; - get_irq_chip(clk->irq)->unmask(clk->irq); - local_irq_restore(flags); + gic_enable_ppi(clk->irq); clockevents_register_device(clk); } diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c new file mode 100644 index 000000000000..7a5760922914 --- /dev/null +++ b/arch/arm/kernel/swp_emulate.c @@ -0,0 +1,267 @@ +/* + * linux/arch/arm/kernel/swp_emulate.c + * + * Copyright (C) 2009 ARM Limited + * __user_* functions adapted from include/asm/uaccess.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Implements emulation of the SWP/SWPB instructions using load-exclusive and + * store-exclusive for processors that have them disabled (or future ones that + * might not implement them). + * + * Syntax of SWP{B} instruction: SWP{B}<c> <Rt>, <Rt2>, [<Rn>] + * Where: Rt = destination + * Rt2 = source + * Rn = address + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/proc_fs.h> +#include <linux/sched.h> +#include <linux/syscalls.h> +#include <linux/perf_event.h> + +#include <asm/traps.h> +#include <asm/uaccess.h> + +/* + * Error-checking SWP macros implemented using ldrex{b}/strex{b} + */ +#define __user_swpX_asm(data, addr, res, temp, B) \ + __asm__ __volatile__( \ + " mov %2, %1\n" \ + "0: ldrex"B" %1, [%3]\n" \ + "1: strex"B" %0, %2, [%3]\n" \ + " cmp %0, #0\n" \ + " movne %0, %4\n" \ + "2:\n" \ + " .section .fixup,\"ax\"\n" \ + " .align 2\n" \ + "3: mov %0, %5\n" \ + " b 2b\n" \ + " .previous\n" \ + " .section __ex_table,\"a\"\n" \ + " .align 3\n" \ + " .long 0b, 3b\n" \ + " .long 1b, 3b\n" \ + " .previous" \ + : "=&r" (res), "+r" (data), "=&r" (temp) \ + : "r" (addr), "i" (-EAGAIN), "i" (-EFAULT) \ + : "cc", "memory") + +#define __user_swp_asm(data, addr, res, temp) \ + __user_swpX_asm(data, addr, res, temp, "") +#define __user_swpb_asm(data, addr, res, temp) \ + __user_swpX_asm(data, addr, res, temp, "b") + +/* + * Macros/defines for extracting register numbers from instruction. + */ +#define EXTRACT_REG_NUM(instruction, offset) \ + (((instruction) & (0xf << (offset))) >> (offset)) +#define RN_OFFSET 16 +#define RT_OFFSET 12 +#define RT2_OFFSET 0 +/* + * Bit 22 of the instruction encoding distinguishes between + * the SWP and SWPB variants (bit set means SWPB). + */ +#define TYPE_SWPB (1 << 22) + +static unsigned long swpcounter; +static unsigned long swpbcounter; +static unsigned long abtcounter; +static pid_t previous_pid; + +#ifdef CONFIG_PROC_FS +static int proc_read_status(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + char *p = page; + int len; + + p += sprintf(p, "Emulated SWP:\t\t%lu\n", swpcounter); + p += sprintf(p, "Emulated SWPB:\t\t%lu\n", swpbcounter); + p += sprintf(p, "Aborted SWP{B}:\t\t%lu\n", abtcounter); + if (previous_pid != 0) + p += sprintf(p, "Last process:\t\t%d\n", previous_pid); + + len = (p - page) - off; + if (len < 0) + len = 0; + + *eof = (len <= count) ? 1 : 0; + *start = page + off; + + return len; +} +#endif + +/* + * Set up process info to signal segmentation fault - called on access error. + */ +static void set_segfault(struct pt_regs *regs, unsigned long addr) +{ + siginfo_t info; + + if (find_vma(current->mm, addr) == NULL) + info.si_code = SEGV_MAPERR; + else + info.si_code = SEGV_ACCERR; + + info.si_signo = SIGSEGV; + info.si_errno = 0; + info.si_addr = (void *) instruction_pointer(regs); + + pr_debug("SWP{B} emulation: access caused memory abort!\n"); + arm_notify_die("Illegal memory access", regs, &info, 0, 0); + + abtcounter++; +} + +static int emulate_swpX(unsigned int address, unsigned int *data, + unsigned int type) +{ + unsigned int res = 0; + + if ((type != TYPE_SWPB) && (address & 0x3)) { + /* SWP to unaligned address not permitted */ + pr_debug("SWP instruction on unaligned pointer!\n"); + return -EFAULT; + } + + while (1) { + unsigned long temp; + + /* + * Barrier required between accessing protected resource and + * releasing a lock for it. Legacy code might not have done + * this, and we cannot determine that this is not the case + * being emulated, so insert always. + */ + smp_mb(); + + if (type == TYPE_SWPB) + __user_swpb_asm(*data, address, res, temp); + else + __user_swp_asm(*data, address, res, temp); + + if (likely(res != -EAGAIN) || signal_pending(current)) + break; + + cond_resched(); + } + + if (res == 0) { + /* + * Barrier also required between aquiring a lock for a + * protected resource and accessing the resource. Inserted for + * same reason as above. + */ + smp_mb(); + + if (type == TYPE_SWPB) + swpbcounter++; + else + swpcounter++; + } + + return res; +} + +/* + * swp_handler logs the id of calling process, dissects the instruction, sanity + * checks the memory location, calls emulate_swpX for the actual operation and + * deals with fixup/error handling before returning + */ +static int swp_handler(struct pt_regs *regs, unsigned int instr) +{ + unsigned int address, destreg, data, type; + unsigned int res = 0; + + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, regs->ARM_pc); + + if (current->pid != previous_pid) { + pr_debug("\"%s\" (%ld) uses deprecated SWP{B} instruction\n", + current->comm, (unsigned long)current->pid); + previous_pid = current->pid; + } + + address = regs->uregs[EXTRACT_REG_NUM(instr, RN_OFFSET)]; + data = regs->uregs[EXTRACT_REG_NUM(instr, RT2_OFFSET)]; + destreg = EXTRACT_REG_NUM(instr, RT_OFFSET); + + type = instr & TYPE_SWPB; + + pr_debug("addr in r%d->0x%08x, dest is r%d, source in r%d->0x%08x)\n", + EXTRACT_REG_NUM(instr, RN_OFFSET), address, + destreg, EXTRACT_REG_NUM(instr, RT2_OFFSET), data); + + /* Check access in reasonable access range for both SWP and SWPB */ + if (!access_ok(VERIFY_WRITE, (address & ~3), 4)) { + pr_debug("SWP{B} emulation: access to %p not allowed!\n", + (void *)address); + res = -EFAULT; + } else { + res = emulate_swpX(address, &data, type); + } + + if (res == 0) { + /* + * On successful emulation, revert the adjustment to the PC + * made in kernel/traps.c in order to resume execution at the + * instruction following the SWP{B}. + */ + regs->ARM_pc += 4; + regs->uregs[destreg] = data; + } else if (res == -EFAULT) { + /* + * Memory errors do not mean emulation failed. + * Set up signal info to return SEGV, then return OK + */ + set_segfault(regs, address); + } + + return 0; +} + +/* + * Only emulate SWP/SWPB executed in ARM state/User mode. + * The kernel must be SWP free and SWP{B} does not exist in Thumb/ThumbEE. + */ +static struct undef_hook swp_hook = { + .instr_mask = 0x0fb00ff0, + .instr_val = 0x01000090, + .cpsr_mask = MODE_MASK | PSR_T_BIT | PSR_J_BIT, + .cpsr_val = USR_MODE, + .fn = swp_handler +}; + +/* + * Register handler and create status file in /proc/cpu + * Invoked as late_initcall, since not needed before init spawned. + */ +static int __init swp_emulation_init(void) +{ +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *res; + + res = create_proc_entry("cpu/swp_emulation", S_IRUGO, NULL); + + if (!res) + return -ENOMEM; + + res->read_proc = proc_read_status; +#endif /* CONFIG_PROC_FS */ + + printk(KERN_NOTICE "Registering SWP/SWPB emulation handler\n"); + register_undef_hook(&swp_hook); + + return 0; +} + +late_initcall(swp_emulation_init); diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 446aee97436f..ee57640ba2bb 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -37,6 +37,8 @@ static const char *handler[]= { "prefetch abort", "data abort", "address exception", "interrupt" }; +void *vectors_page; + #ifdef CONFIG_DEBUG_USER unsigned int user_debug; @@ -708,19 +710,19 @@ void __readwrite_bug(const char *fn) } EXPORT_SYMBOL(__readwrite_bug); -void __pte_error(const char *file, int line, unsigned long val) +void __pte_error(const char *file, int line, pte_t pte) { - printk("%s:%d: bad pte %08lx.\n", file, line, val); + printk("%s:%d: bad pte %08lx.\n", file, line, pte_val(pte)); } -void __pmd_error(const char *file, int line, unsigned long val) +void __pmd_error(const char *file, int line, pmd_t pmd) { - printk("%s:%d: bad pmd %08lx.\n", file, line, val); + printk("%s:%d: bad pmd %08lx.\n", file, line, pmd_val(pmd)); } -void __pgd_error(const char *file, int line, unsigned long val) +void __pgd_error(const char *file, int line, pgd_t pgd) { - printk("%s:%d: bad pgd %08lx.\n", file, line, val); + printk("%s:%d: bad pgd %08lx.\n", file, line, pgd_val(pgd)); } asmlinkage void __div0(void) @@ -756,7 +758,11 @@ static void __init kuser_get_tls_init(unsigned long vectors) void __init early_trap_init(void) { +#if defined(CONFIG_CPU_USE_DOMAINS) unsigned long vectors = CONFIG_VECTORS_BASE; +#else + unsigned long vectors = (unsigned long)vectors_page; +#endif extern char __stubs_start[], __stubs_end[]; extern char __vectors_start[], __vectors_end[]; extern char __kuser_helper_start[], __kuser_helper_end[]; @@ -780,10 +786,10 @@ void __init early_trap_init(void) * Copy signal return handlers into the vector page, and * set sigreturn to be a pointer to these. */ - memcpy((void *)KERN_SIGRETURN_CODE, sigreturn_codes, - sizeof(sigreturn_codes)); - memcpy((void *)KERN_RESTART_CODE, syscall_restart_code, - sizeof(syscall_restart_code)); + memcpy((void *)(vectors + KERN_SIGRETURN_CODE - CONFIG_VECTORS_BASE), + sigreturn_codes, sizeof(sigreturn_codes)); + memcpy((void *)(vectors + KERN_RESTART_CODE - CONFIG_VECTORS_BASE), + syscall_restart_code, sizeof(syscall_restart_code)); flush_icache_range(vectors, vectors + PAGE_SIZE); modify_domain(DOMAIN_USER, DOMAIN_CLIENT); diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index cead8893b46b..86b66f3f2031 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -101,6 +101,7 @@ SECTIONS __exception_text_start = .; *(.exception.text) __exception_text_end = .; + IRQENTRY_TEXT TEXT_TEXT SCHED_TEXT LOCK_TEXT @@ -167,6 +168,7 @@ SECTIONS NOSAVE_DATA CACHELINE_ALIGNED_DATA(32) + READ_MOSTLY_DATA(32) /* * The exception fixup table (might need resorting at runtime) diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S index b1631a7dbe75..1b049cd7a49a 100644 --- a/arch/arm/lib/getuser.S +++ b/arch/arm/lib/getuser.S @@ -28,20 +28,21 @@ */ #include <linux/linkage.h> #include <asm/errno.h> +#include <asm/domain.h> ENTRY(__get_user_1) -1: ldrbt r2, [r0] +1: T(ldrb) r2, [r0] mov r0, #0 mov pc, lr ENDPROC(__get_user_1) ENTRY(__get_user_2) #ifdef CONFIG_THUMB2_KERNEL -2: ldrbt r2, [r0] -3: ldrbt r3, [r0, #1] +2: T(ldrb) r2, [r0] +3: T(ldrb) r3, [r0, #1] #else -2: ldrbt r2, [r0], #1 -3: ldrbt r3, [r0] +2: T(ldrb) r2, [r0], #1 +3: T(ldrb) r3, [r0] #endif #ifndef __ARMEB__ orr r2, r2, r3, lsl #8 @@ -53,7 +54,7 @@ ENTRY(__get_user_2) ENDPROC(__get_user_2) ENTRY(__get_user_4) -4: ldrt r2, [r0] +4: T(ldr) r2, [r0] mov r0, #0 mov pc, lr ENDPROC(__get_user_4) diff --git a/arch/arm/lib/putuser.S b/arch/arm/lib/putuser.S index 5a01a23c6c06..c023fc11e86c 100644 --- a/arch/arm/lib/putuser.S +++ b/arch/arm/lib/putuser.S @@ -28,9 +28,10 @@ */ #include <linux/linkage.h> #include <asm/errno.h> +#include <asm/domain.h> ENTRY(__put_user_1) -1: strbt r2, [r0] +1: T(strb) r2, [r0] mov r0, #0 mov pc, lr ENDPROC(__put_user_1) @@ -39,19 +40,19 @@ ENTRY(__put_user_2) mov ip, r2, lsr #8 #ifdef CONFIG_THUMB2_KERNEL #ifndef __ARMEB__ -2: strbt r2, [r0] -3: strbt ip, [r0, #1] +2: T(strb) r2, [r0] +3: T(strb) ip, [r0, #1] #else -2: strbt ip, [r0] -3: strbt r2, [r0, #1] +2: T(strb) ip, [r0] +3: T(strb) r2, [r0, #1] #endif #else /* !CONFIG_THUMB2_KERNEL */ #ifndef __ARMEB__ -2: strbt r2, [r0], #1 -3: strbt ip, [r0] +2: T(strb) r2, [r0], #1 +3: T(strb) ip, [r0] #else -2: strbt ip, [r0], #1 -3: strbt r2, [r0] +2: T(strb) ip, [r0], #1 +3: T(strb) r2, [r0] #endif #endif /* CONFIG_THUMB2_KERNEL */ mov r0, #0 @@ -59,18 +60,18 @@ ENTRY(__put_user_2) ENDPROC(__put_user_2) ENTRY(__put_user_4) -4: strt r2, [r0] +4: T(str) r2, [r0] mov r0, #0 mov pc, lr ENDPROC(__put_user_4) ENTRY(__put_user_8) #ifdef CONFIG_THUMB2_KERNEL -5: strt r2, [r0] -6: strt r3, [r0, #4] +5: T(str) r2, [r0] +6: T(str) r3, [r0, #4] #else -5: strt r2, [r0], #4 -6: strt r3, [r0] +5: T(str) r2, [r0], #4 +6: T(str) r3, [r0] #endif mov r0, #0 mov pc, lr diff --git a/arch/arm/lib/uaccess.S b/arch/arm/lib/uaccess.S index fee9f6f88adb..d0ece2aeb70d 100644 --- a/arch/arm/lib/uaccess.S +++ b/arch/arm/lib/uaccess.S @@ -14,6 +14,7 @@ #include <linux/linkage.h> #include <asm/assembler.h> #include <asm/errno.h> +#include <asm/domain.h> .text @@ -31,11 +32,11 @@ rsb ip, ip, #4 cmp ip, #2 ldrb r3, [r1], #1 -USER( strbt r3, [r0], #1) @ May fault +USER( T(strb) r3, [r0], #1) @ May fault ldrgeb r3, [r1], #1 -USER( strgebt r3, [r0], #1) @ May fault +USER( T(strgeb) r3, [r0], #1) @ May fault ldrgtb r3, [r1], #1 -USER( strgtbt r3, [r0], #1) @ May fault +USER( T(strgtb) r3, [r0], #1) @ May fault sub r2, r2, ip b .Lc2u_dest_aligned @@ -58,7 +59,7 @@ ENTRY(__copy_to_user) addmi ip, r2, #4 bmi .Lc2u_0nowords ldr r3, [r1], #4 -USER( strt r3, [r0], #4) @ May fault +USER( T(str) r3, [r0], #4) @ May fault mov ip, r0, lsl #32 - PAGE_SHIFT @ On each page, use a ld/st??t instruction rsb ip, ip, #0 movs ip, ip, lsr #32 - PAGE_SHIFT @@ -87,18 +88,18 @@ USER( strt r3, [r0], #4) @ May fault stmneia r0!, {r3 - r4} @ Shouldnt fault tst ip, #4 ldrne r3, [r1], #4 - strnet r3, [r0], #4 @ Shouldnt fault + T(strne) r3, [r0], #4 @ Shouldnt fault ands ip, ip, #3 beq .Lc2u_0fupi .Lc2u_0nowords: teq ip, #0 beq .Lc2u_finished .Lc2u_nowords: cmp ip, #2 ldrb r3, [r1], #1 -USER( strbt r3, [r0], #1) @ May fault +USER( T(strb) r3, [r0], #1) @ May fault ldrgeb r3, [r1], #1 -USER( strgebt r3, [r0], #1) @ May fault +USER( T(strgeb) r3, [r0], #1) @ May fault ldrgtb r3, [r1], #1 -USER( strgtbt r3, [r0], #1) @ May fault +USER( T(strgtb) r3, [r0], #1) @ May fault b .Lc2u_finished .Lc2u_not_enough: @@ -119,7 +120,7 @@ USER( strgtbt r3, [r0], #1) @ May fault mov r3, r7, pull #8 ldr r7, [r1], #4 orr r3, r3, r7, push #24 -USER( strt r3, [r0], #4) @ May fault +USER( T(str) r3, [r0], #4) @ May fault mov ip, r0, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 movs ip, ip, lsr #32 - PAGE_SHIFT @@ -154,18 +155,18 @@ USER( strt r3, [r0], #4) @ May fault movne r3, r7, pull #8 ldrne r7, [r1], #4 orrne r3, r3, r7, push #24 - strnet r3, [r0], #4 @ Shouldnt fault + T(strne) r3, [r0], #4 @ Shouldnt fault ands ip, ip, #3 beq .Lc2u_1fupi .Lc2u_1nowords: mov r3, r7, get_byte_1 teq ip, #0 beq .Lc2u_finished cmp ip, #2 -USER( strbt r3, [r0], #1) @ May fault +USER( T(strb) r3, [r0], #1) @ May fault movge r3, r7, get_byte_2 -USER( strgebt r3, [r0], #1) @ May fault +USER( T(strgeb) r3, [r0], #1) @ May fault movgt r3, r7, get_byte_3 -USER( strgtbt r3, [r0], #1) @ May fault +USER( T(strgtb) r3, [r0], #1) @ May fault b .Lc2u_finished .Lc2u_2fupi: subs r2, r2, #4 @@ -174,7 +175,7 @@ USER( strgtbt r3, [r0], #1) @ May fault mov r3, r7, pull #16 ldr r7, [r1], #4 orr r3, r3, r7, push #16 -USER( strt r3, [r0], #4) @ May fault +USER( T(str) r3, [r0], #4) @ May fault mov ip, r0, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 movs ip, ip, lsr #32 - PAGE_SHIFT @@ -209,18 +210,18 @@ USER( strt r3, [r0], #4) @ May fault movne r3, r7, pull #16 ldrne r7, [r1], #4 orrne r3, r3, r7, push #16 - strnet r3, [r0], #4 @ Shouldnt fault + T(strne) r3, [r0], #4 @ Shouldnt fault ands ip, ip, #3 beq .Lc2u_2fupi .Lc2u_2nowords: mov r3, r7, get_byte_2 teq ip, #0 beq .Lc2u_finished cmp ip, #2 -USER( strbt r3, [r0], #1) @ May fault +USER( T(strb) r3, [r0], #1) @ May fault movge r3, r7, get_byte_3 -USER( strgebt r3, [r0], #1) @ May fault +USER( T(strgeb) r3, [r0], #1) @ May fault ldrgtb r3, [r1], #0 -USER( strgtbt r3, [r0], #1) @ May fault +USER( T(strgtb) r3, [r0], #1) @ May fault b .Lc2u_finished .Lc2u_3fupi: subs r2, r2, #4 @@ -229,7 +230,7 @@ USER( strgtbt r3, [r0], #1) @ May fault mov r3, r7, pull #24 ldr r7, [r1], #4 orr r3, r3, r7, push #8 -USER( strt r3, [r0], #4) @ May fault +USER( T(str) r3, [r0], #4) @ May fault mov ip, r0, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 movs ip, ip, lsr #32 - PAGE_SHIFT @@ -264,18 +265,18 @@ USER( strt r3, [r0], #4) @ May fault movne r3, r7, pull #24 ldrne r7, [r1], #4 orrne r3, r3, r7, push #8 - strnet r3, [r0], #4 @ Shouldnt fault + T(strne) r3, [r0], #4 @ Shouldnt fault ands ip, ip, #3 beq .Lc2u_3fupi .Lc2u_3nowords: mov r3, r7, get_byte_3 teq ip, #0 beq .Lc2u_finished cmp ip, #2 -USER( strbt r3, [r0], #1) @ May fault +USER( T(strb) r3, [r0], #1) @ May fault ldrgeb r3, [r1], #1 -USER( strgebt r3, [r0], #1) @ May fault +USER( T(strgeb) r3, [r0], #1) @ May fault ldrgtb r3, [r1], #0 -USER( strgtbt r3, [r0], #1) @ May fault +USER( T(strgtb) r3, [r0], #1) @ May fault b .Lc2u_finished ENDPROC(__copy_to_user) @@ -294,11 +295,11 @@ ENDPROC(__copy_to_user) .Lcfu_dest_not_aligned: rsb ip, ip, #4 cmp ip, #2 -USER( ldrbt r3, [r1], #1) @ May fault +USER( T(ldrb) r3, [r1], #1) @ May fault strb r3, [r0], #1 -USER( ldrgebt r3, [r1], #1) @ May fault +USER( T(ldrgeb) r3, [r1], #1) @ May fault strgeb r3, [r0], #1 -USER( ldrgtbt r3, [r1], #1) @ May fault +USER( T(ldrgtb) r3, [r1], #1) @ May fault strgtb r3, [r0], #1 sub r2, r2, ip b .Lcfu_dest_aligned @@ -321,7 +322,7 @@ ENTRY(__copy_from_user) .Lcfu_0fupi: subs r2, r2, #4 addmi ip, r2, #4 bmi .Lcfu_0nowords -USER( ldrt r3, [r1], #4) +USER( T(ldr) r3, [r1], #4) str r3, [r0], #4 mov ip, r1, lsl #32 - PAGE_SHIFT @ On each page, use a ld/st??t instruction rsb ip, ip, #0 @@ -350,18 +351,18 @@ USER( ldrt r3, [r1], #4) ldmneia r1!, {r3 - r4} @ Shouldnt fault stmneia r0!, {r3 - r4} tst ip, #4 - ldrnet r3, [r1], #4 @ Shouldnt fault + T(ldrne) r3, [r1], #4 @ Shouldnt fault strne r3, [r0], #4 ands ip, ip, #3 beq .Lcfu_0fupi .Lcfu_0nowords: teq ip, #0 beq .Lcfu_finished .Lcfu_nowords: cmp ip, #2 -USER( ldrbt r3, [r1], #1) @ May fault +USER( T(ldrb) r3, [r1], #1) @ May fault strb r3, [r0], #1 -USER( ldrgebt r3, [r1], #1) @ May fault +USER( T(ldrgeb) r3, [r1], #1) @ May fault strgeb r3, [r0], #1 -USER( ldrgtbt r3, [r1], #1) @ May fault +USER( T(ldrgtb) r3, [r1], #1) @ May fault strgtb r3, [r0], #1 b .Lcfu_finished @@ -374,7 +375,7 @@ USER( ldrgtbt r3, [r1], #1) @ May fault .Lcfu_src_not_aligned: bic r1, r1, #3 -USER( ldrt r7, [r1], #4) @ May fault +USER( T(ldr) r7, [r1], #4) @ May fault cmp ip, #2 bgt .Lcfu_3fupi beq .Lcfu_2fupi @@ -382,7 +383,7 @@ USER( ldrt r7, [r1], #4) @ May fault addmi ip, r2, #4 bmi .Lcfu_1nowords mov r3, r7, pull #8 -USER( ldrt r7, [r1], #4) @ May fault +USER( T(ldr) r7, [r1], #4) @ May fault orr r3, r3, r7, push #24 str r3, [r0], #4 mov ip, r1, lsl #32 - PAGE_SHIFT @@ -417,7 +418,7 @@ USER( ldrt r7, [r1], #4) @ May fault stmneia r0!, {r3 - r4} tst ip, #4 movne r3, r7, pull #8 -USER( ldrnet r7, [r1], #4) @ May fault +USER( T(ldrne) r7, [r1], #4) @ May fault orrne r3, r3, r7, push #24 strne r3, [r0], #4 ands ip, ip, #3 @@ -437,7 +438,7 @@ USER( ldrnet r7, [r1], #4) @ May fault addmi ip, r2, #4 bmi .Lcfu_2nowords mov r3, r7, pull #16 -USER( ldrt r7, [r1], #4) @ May fault +USER( T(ldr) r7, [r1], #4) @ May fault orr r3, r3, r7, push #16 str r3, [r0], #4 mov ip, r1, lsl #32 - PAGE_SHIFT @@ -473,7 +474,7 @@ USER( ldrt r7, [r1], #4) @ May fault stmneia r0!, {r3 - r4} tst ip, #4 movne r3, r7, pull #16 -USER( ldrnet r7, [r1], #4) @ May fault +USER( T(ldrne) r7, [r1], #4) @ May fault orrne r3, r3, r7, push #16 strne r3, [r0], #4 ands ip, ip, #3 @@ -485,7 +486,7 @@ USER( ldrnet r7, [r1], #4) @ May fault strb r3, [r0], #1 movge r3, r7, get_byte_3 strgeb r3, [r0], #1 -USER( ldrgtbt r3, [r1], #0) @ May fault +USER( T(ldrgtb) r3, [r1], #0) @ May fault strgtb r3, [r0], #1 b .Lcfu_finished @@ -493,7 +494,7 @@ USER( ldrgtbt r3, [r1], #0) @ May fault addmi ip, r2, #4 bmi .Lcfu_3nowords mov r3, r7, pull #24 -USER( ldrt r7, [r1], #4) @ May fault +USER( T(ldr) r7, [r1], #4) @ May fault orr r3, r3, r7, push #8 str r3, [r0], #4 mov ip, r1, lsl #32 - PAGE_SHIFT @@ -528,7 +529,7 @@ USER( ldrt r7, [r1], #4) @ May fault stmneia r0!, {r3 - r4} tst ip, #4 movne r3, r7, pull #24 -USER( ldrnet r7, [r1], #4) @ May fault +USER( T(ldrne) r7, [r1], #4) @ May fault orrne r3, r3, r7, push #8 strne r3, [r0], #4 ands ip, ip, #3 @@ -538,9 +539,9 @@ USER( ldrnet r7, [r1], #4) @ May fault beq .Lcfu_finished cmp ip, #2 strb r3, [r0], #1 -USER( ldrgebt r3, [r1], #1) @ May fault +USER( T(ldrgeb) r3, [r1], #1) @ May fault strgeb r3, [r0], #1 -USER( ldrgtbt r3, [r1], #1) @ May fault +USER( T(ldrgtb) r3, [r1], #1) @ May fault strgtb r3, [r0], #1 b .Lcfu_finished ENDPROC(__copy_from_user) diff --git a/arch/arm/mach-bcmring/clock.c b/arch/arm/mach-bcmring/clock.c index 14bafc38f2dc..ad237a42d265 100644 --- a/arch/arm/mach-bcmring/clock.c +++ b/arch/arm/mach-bcmring/clock.c @@ -21,13 +21,12 @@ #include <linux/string.h> #include <linux/clk.h> #include <linux/spinlock.h> +#include <linux/clkdev.h> #include <mach/csp/hw_cfg.h> #include <mach/csp/chipcHw_def.h> #include <mach/csp/chipcHw_reg.h> #include <mach/csp/chipcHw_inline.h> -#include <asm/clkdev.h> - #include "clock.h" #define clk_is_primary(x) ((x)->type & CLK_TYPE_PRIMARY) diff --git a/arch/arm/mach-bcmring/core.c b/arch/arm/mach-bcmring/core.c index d3f959e92b2d..ed96ef400474 100644 --- a/arch/arm/mach-bcmring/core.c +++ b/arch/arm/mach-bcmring/core.c @@ -30,10 +30,10 @@ #include <linux/amba/bus.h> #include <linux/clocksource.h> #include <linux/clockchips.h> +#include <linux/clkdev.h> #include <mach/csp/mm_addr.h> #include <mach/hardware.h> -#include <asm/clkdev.h> #include <linux/io.h> #include <asm/irq.h> #include <asm/hardware/arm_timer.h> diff --git a/arch/arm/mach-cns3xxx/Kconfig b/arch/arm/mach-cns3xxx/Kconfig index 9ebfcc46feb1..29b13f249aa9 100644 --- a/arch/arm/mach-cns3xxx/Kconfig +++ b/arch/arm/mach-cns3xxx/Kconfig @@ -3,6 +3,7 @@ menu "CNS3XXX platform type" config MACH_CNS3420VB bool "Support for CNS3420 Validation Board" + select MIGHT_HAVE_PCI help Include support for the Cavium Networks CNS3420 MPCore Platform Baseboard. diff --git a/arch/arm/mach-cns3xxx/core.c b/arch/arm/mach-cns3xxx/core.c index 9ca4d581016f..da30078a80c1 100644 --- a/arch/arm/mach-cns3xxx/core.c +++ b/arch/arm/mach-cns3xxx/core.c @@ -69,13 +69,10 @@ void __init cns3xxx_map_io(void) } /* used by entry-macro.S */ -void __iomem *gic_cpu_base_addr; - void __init cns3xxx_init_irq(void) { - gic_cpu_base_addr = __io(CNS3XXX_TC11MP_GIC_CPU_BASE_VIRT); - gic_dist_init(0, __io(CNS3XXX_TC11MP_GIC_DIST_BASE_VIRT), 29); - gic_cpu_init(0, gic_cpu_base_addr); + gic_init(0, 29, __io(CNS3XXX_TC11MP_GIC_DIST_BASE_VIRT), + __io(CNS3XXX_TC11MP_GIC_CPU_BASE_VIRT)); } void cns3xxx_power_off(void) diff --git a/arch/arm/mach-cns3xxx/core.h b/arch/arm/mach-cns3xxx/core.h index 6b33ec11346e..ef9e5116b1a9 100644 --- a/arch/arm/mach-cns3xxx/core.h +++ b/arch/arm/mach-cns3xxx/core.h @@ -11,7 +11,6 @@ #ifndef __CNS3XXX_CORE_H #define __CNS3XXX_CORE_H -extern void __iomem *gic_cpu_base_addr; extern struct sys_timer cns3xxx_timer; void __init cns3xxx_map_io(void); diff --git a/arch/arm/mach-cns3xxx/include/mach/entry-macro.S b/arch/arm/mach-cns3xxx/include/mach/entry-macro.S index 5e1c5545680f..6bd83ed90afe 100644 --- a/arch/arm/mach-cns3xxx/include/mach/entry-macro.S +++ b/arch/arm/mach-cns3xxx/include/mach/entry-macro.S @@ -9,74 +9,10 @@ */ #include <mach/hardware.h> -#include <asm/hardware/gic.h> +#include <asm/hardware/entry-macro-gic.S> .macro disable_fiq .endm - .macro get_irqnr_preamble, base, tmp - ldr \base, =gic_cpu_base_addr - ldr \base, [\base] - .endm - .macro arch_ret_to_user, tmp1, tmp2 .endm - - /* - * The interrupt numbering scheme is defined in the - * interrupt controller spec. To wit: - * - * Interrupts 0-15 are IPI - * 16-28 are reserved - * 29-31 are local. We allow 30 to be used for the watchdog. - * 32-1020 are global - * 1021-1022 are reserved - * 1023 is "spurious" (no interrupt) - * - * For now, we ignore all local interrupts so only return an interrupt if it's - * between 30 and 1020. The test_for_ipi routine below will pick up on IPIs. - * - * A simple read from the controller will tell us the number of the highest - * priority enabled interrupt. We then just need to check whether it is in the - * valid range for an IRQ (30-1020 inclusive). - */ - - .macro get_irqnr_and_base, irqnr, irqstat, base, tmp - - ldr \irqstat, [\base, #GIC_CPU_INTACK] /* bits 12-10 = src CPU, 9-0 = int # */ - - ldr \tmp, =1021 - - bic \irqnr, \irqstat, #0x1c00 - - cmp \irqnr, #29 - cmpcc \irqnr, \irqnr - cmpne \irqnr, \tmp - cmpcs \irqnr, \irqnr - - .endm - - /* We assume that irqstat (the raw value of the IRQ acknowledge - * register) is preserved from the macro above. - * If there is an IPI, we immediately signal end of interrupt on the - * controller, since this requires the original irqstat value which - * we won't easily be able to recreate later. - */ - - .macro test_for_ipi, irqnr, irqstat, base, tmp - bic \irqnr, \irqstat, #0x1c00 - cmp \irqnr, #16 - strcc \irqstat, [\base, #GIC_CPU_EOI] - cmpcs \irqnr, \irqnr - .endm - - /* As above, this assumes that irqstat and base are preserved.. */ - - .macro test_for_ltirq, irqnr, irqstat, base, tmp - bic \irqnr, \irqstat, #0x1c00 - mov \tmp, #0 - cmp \irqnr, #29 - moveq \tmp, #1 - streq \irqstat, [\base, #GIC_CPU_EOI] - cmp \tmp, #0 - .endm diff --git a/arch/arm/mach-davinci/clock.h b/arch/arm/mach-davinci/clock.h index 11099980b58b..0dd22031ec62 100644 --- a/arch/arm/mach-davinci/clock.h +++ b/arch/arm/mach-davinci/clock.h @@ -68,7 +68,7 @@ #ifndef __ASSEMBLER__ #include <linux/list.h> -#include <asm/clkdev.h> +#include <linux/clkdev.h> #define PLLSTAT_GOSTAT BIT(0) #define PLLCMD_GOSET BIT(0) diff --git a/arch/arm/mach-davinci/include/mach/io.h b/arch/arm/mach-davinci/include/mach/io.h index 62b0a90309ad..d1b954955c12 100644 --- a/arch/arm/mach-davinci/include/mach/io.h +++ b/arch/arm/mach-davinci/include/mach/io.h @@ -22,8 +22,8 @@ #define __mem_isa(a) (a) #ifndef __ASSEMBLER__ -#define __arch_ioremap(p, s, t) davinci_ioremap(p, s, t) -#define __arch_iounmap(v) davinci_iounmap(v) +#define __arch_ioremap davinci_ioremap +#define __arch_iounmap davinci_iounmap void __iomem *davinci_ioremap(unsigned long phys, size_t size, unsigned int type); diff --git a/arch/arm/mach-ep93xx/clock.c b/arch/arm/mach-ep93xx/clock.c index ef06c66a6f16..ca4de7105097 100644 --- a/arch/arm/mach-ep93xx/clock.c +++ b/arch/arm/mach-ep93xx/clock.c @@ -19,10 +19,10 @@ #include <linux/string.h> #include <linux/io.h> #include <linux/spinlock.h> +#include <linux/clkdev.h> #include <mach/hardware.h> -#include <asm/clkdev.h> #include <asm/div64.h> diff --git a/arch/arm/mach-imx/clock-imx1.c b/arch/arm/mach-imx/clock-imx1.c index daca30b2d5b1..3938a563b280 100644 --- a/arch/arm/mach-imx/clock-imx1.c +++ b/arch/arm/mach-imx/clock-imx1.c @@ -22,8 +22,7 @@ #include <linux/err.h> #include <linux/clk.h> #include <linux/io.h> - -#include <asm/clkdev.h> +#include <linux/clkdev.h> #include <mach/clock.h> #include <mach/hardware.h> diff --git a/arch/arm/mach-imx/clock-imx21.c b/arch/arm/mach-imx/clock-imx21.c index cf15ea516a72..d7056559715a 100644 --- a/arch/arm/mach-imx/clock-imx21.c +++ b/arch/arm/mach-imx/clock-imx21.c @@ -21,11 +21,11 @@ #include <linux/clk.h> #include <linux/io.h> #include <linux/module.h> +#include <linux/clkdev.h> #include <mach/clock.h> #include <mach/hardware.h> #include <mach/common.h> -#include <asm/clkdev.h> #include <asm/div64.h> #define IO_ADDR_CCM(off) (MX21_IO_ADDRESS(MX21_CCM_BASE_ADDR + (off))) diff --git a/arch/arm/mach-imx/clock-imx27.c b/arch/arm/mach-imx/clock-imx27.c index 98a25bada783..ca1017b9028d 100644 --- a/arch/arm/mach-imx/clock-imx27.c +++ b/arch/arm/mach-imx/clock-imx27.c @@ -21,8 +21,8 @@ #include <linux/clk.h> #include <linux/io.h> #include <linux/module.h> +#include <linux/clkdev.h> -#include <asm/clkdev.h> #include <asm/div64.h> #include <mach/clock.h> diff --git a/arch/arm/mach-integrator/Kconfig b/arch/arm/mach-integrator/Kconfig index 27db275b367c..769b0f10c834 100644 --- a/arch/arm/mach-integrator/Kconfig +++ b/arch/arm/mach-integrator/Kconfig @@ -4,6 +4,7 @@ menu "Integrator Options" config ARCH_INTEGRATOR_AP bool "Support Integrator/AP and Integrator/PP2 platforms" + select MIGHT_HAVE_PCI help Include support for the ARM(R) Integrator/AP and Integrator/PP2 platforms. diff --git a/arch/arm/mach-integrator/core.c b/arch/arm/mach-integrator/core.c index 8f4fb6d638f7..b8e884b450da 100644 --- a/arch/arm/mach-integrator/core.c +++ b/arch/arm/mach-integrator/core.c @@ -21,9 +21,8 @@ #include <linux/amba/bus.h> #include <linux/amba/serial.h> #include <linux/io.h> +#include <linux/clkdev.h> -#include <asm/clkdev.h> -#include <mach/clkdev.h> #include <mach/hardware.h> #include <mach/platform.h> #include <asm/irq.h> diff --git a/arch/arm/mach-integrator/impd1.c b/arch/arm/mach-integrator/impd1.c index fd684bf205e5..5db574f8ae3f 100644 --- a/arch/arm/mach-integrator/impd1.c +++ b/arch/arm/mach-integrator/impd1.c @@ -22,9 +22,8 @@ #include <linux/amba/clcd.h> #include <linux/io.h> #include <linux/slab.h> +#include <linux/clkdev.h> -#include <asm/clkdev.h> -#include <mach/clkdev.h> #include <asm/hardware/icst.h> #include <mach/lm.h> #include <mach/impd1.h> diff --git a/arch/arm/mach-integrator/integrator_cp.c b/arch/arm/mach-integrator/integrator_cp.c index 6258c90d020c..85e48a5f77b9 100644 --- a/arch/arm/mach-integrator/integrator_cp.c +++ b/arch/arm/mach-integrator/integrator_cp.c @@ -21,9 +21,8 @@ #include <linux/amba/mmci.h> #include <linux/io.h> #include <linux/gfp.h> +#include <linux/clkdev.h> -#include <asm/clkdev.h> -#include <mach/clkdev.h> #include <mach/hardware.h> #include <mach/platform.h> #include <asm/irq.h> @@ -41,7 +40,7 @@ #include <asm/mach/map.h> #include <asm/mach/time.h> -#include <plat/timer-sp.h> +#include <asm/hardware/timer-sp.h> #include "common.h" diff --git a/arch/arm/mach-iop13xx/include/mach/io.h b/arch/arm/mach-iop13xx/include/mach/io.h index a6e0f9e6ddcf..dffb234bb967 100644 --- a/arch/arm/mach-iop13xx/include/mach/io.h +++ b/arch/arm/mach-iop13xx/include/mach/io.h @@ -35,7 +35,7 @@ extern u32 iop13xx_atux_mem_base; extern size_t iop13xx_atue_mem_size; extern size_t iop13xx_atux_mem_size; -#define __arch_ioremap(a, s, f) __iop13xx_ioremap(a, s, f) -#define __arch_iounmap(a) __iop13xx_iounmap(a) +#define __arch_ioremap __iop13xx_ioremap +#define __arch_iounmap __iop13xx_iounmap #endif diff --git a/arch/arm/mach-iop32x/include/mach/io.h b/arch/arm/mach-iop32x/include/mach/io.h index 339e5854728b..059c783ce0b2 100644 --- a/arch/arm/mach-iop32x/include/mach/io.h +++ b/arch/arm/mach-iop32x/include/mach/io.h @@ -21,7 +21,7 @@ extern void __iop3xx_iounmap(void __iomem *addr); #define __io(p) ((void __iomem *)IOP3XX_PCI_IO_PHYS_TO_VIRT(p)) #define __mem_pci(a) (a) -#define __arch_ioremap(a, s, f) __iop3xx_ioremap(a, s, f) -#define __arch_iounmap(a) __iop3xx_iounmap(a) +#define __arch_ioremap __iop3xx_ioremap +#define __arch_iounmap __iop3xx_iounmap #endif diff --git a/arch/arm/mach-iop33x/include/mach/io.h b/arch/arm/mach-iop33x/include/mach/io.h index e99a7ed6d050..39e893e97c21 100644 --- a/arch/arm/mach-iop33x/include/mach/io.h +++ b/arch/arm/mach-iop33x/include/mach/io.h @@ -21,7 +21,7 @@ extern void __iop3xx_iounmap(void __iomem *addr); #define __io(p) ((void __iomem *)IOP3XX_PCI_IO_PHYS_TO_VIRT(p)) #define __mem_pci(a) (a) -#define __arch_ioremap(a, s, f) __iop3xx_ioremap(a, s, f) -#define __arch_iounmap(a) __iop3xx_iounmap(a) +#define __arch_ioremap __iop3xx_ioremap +#define __arch_iounmap __iop3xx_iounmap #endif diff --git a/arch/arm/mach-ixp23xx/include/mach/io.h b/arch/arm/mach-ixp23xx/include/mach/io.h index fd9ef8e519f7..a1749d0fd896 100644 --- a/arch/arm/mach-ixp23xx/include/mach/io.h +++ b/arch/arm/mach-ixp23xx/include/mach/io.h @@ -45,8 +45,8 @@ ixp23xx_iounmap(void __iomem *addr) __iounmap(addr); } -#define __arch_ioremap(a,s,f) ixp23xx_ioremap(a,s,f) -#define __arch_iounmap(a) ixp23xx_iounmap(a) +#define __arch_ioremap ixp23xx_ioremap +#define __arch_iounmap ixp23xx_iounmap #endif diff --git a/arch/arm/mach-ixp4xx/include/mach/io.h b/arch/arm/mach-ixp4xx/include/mach/io.h index de274a1f19d7..57b5410c31f4 100644 --- a/arch/arm/mach-ixp4xx/include/mach/io.h +++ b/arch/arm/mach-ixp4xx/include/mach/io.h @@ -74,8 +74,8 @@ static inline void __indirect_iounmap(void __iomem *addr) __iounmap(addr); } -#define __arch_ioremap(a, s, f) __indirect_ioremap(a, s, f) -#define __arch_iounmap(a) __indirect_iounmap(a) +#define __arch_ioremap __indirect_ioremap +#define __arch_iounmap __indirect_iounmap #define writeb(v, p) __indirect_writeb(v, p) #define writew(v, p) __indirect_writew(v, p) diff --git a/arch/arm/mach-kirkwood/include/mach/io.h b/arch/arm/mach-kirkwood/include/mach/io.h index 44e8be04f259..1aaddc364f2e 100644 --- a/arch/arm/mach-kirkwood/include/mach/io.h +++ b/arch/arm/mach-kirkwood/include/mach/io.h @@ -42,8 +42,8 @@ __arch_iounmap(void __iomem *addr) __iounmap(addr); } -#define __arch_ioremap(p, s, m) __arch_ioremap(p, s, m) -#define __arch_iounmap(a) __arch_iounmap(a) +#define __arch_ioremap __arch_ioremap +#define __arch_iounmap __arch_iounmap #define __io(a) __io(a) #define __mem_pci(a) (a) diff --git a/arch/arm/mach-ks8695/Kconfig b/arch/arm/mach-ks8695/Kconfig index fe0c82e30b2d..f5c39a8c2b00 100644 --- a/arch/arm/mach-ks8695/Kconfig +++ b/arch/arm/mach-ks8695/Kconfig @@ -4,6 +4,7 @@ menu "Kendin/Micrel KS8695 Implementations" config MACH_KS8695 bool "KS8695 development board" + select MIGHT_HAVE_PCI help Say 'Y' here if you want your kernel to run on the original Kendin-Micrel KS8695 development board. diff --git a/arch/arm/mach-lpc32xx/clock.c b/arch/arm/mach-lpc32xx/clock.c index 32d63796430a..da0e6498110a 100644 --- a/arch/arm/mach-lpc32xx/clock.c +++ b/arch/arm/mach-lpc32xx/clock.c @@ -90,10 +90,9 @@ #include <linux/clk.h> #include <linux/amba/bus.h> #include <linux/amba/clcd.h> +#include <linux/clkdev.h> #include <mach/hardware.h> -#include <asm/clkdev.h> -#include <mach/clkdev.h> #include <mach/platform.h> #include "clock.h" #include "common.h" diff --git a/arch/arm/mach-mmp/clock.h b/arch/arm/mach-mmp/clock.h index 016ae94691c0..9b027d7491f5 100644 --- a/arch/arm/mach-mmp/clock.h +++ b/arch/arm/mach-mmp/clock.h @@ -6,7 +6,7 @@ * published by the Free Software Foundation. */ -#include <asm/clkdev.h> +#include <linux/clkdev.h> struct clkops { void (*enable)(struct clk *); diff --git a/arch/arm/mach-msm/Kconfig b/arch/arm/mach-msm/Kconfig index dbbcfeb919db..31e5fd63ec9a 100644 --- a/arch/arm/mach-msm/Kconfig +++ b/arch/arm/mach-msm/Kconfig @@ -49,6 +49,8 @@ endchoice config MSM_SOC_REV_A bool +config ARCH_MSM_SCORPIONMP + bool config ARCH_MSM_ARM11 bool diff --git a/arch/arm/mach-msm/board-msm8x60.c b/arch/arm/mach-msm/board-msm8x60.c index 7486a681cc71..9b5eb2b4ae1b 100644 --- a/arch/arm/mach-msm/board-msm8x60.c +++ b/arch/arm/mach-msm/board-msm8x60.c @@ -28,8 +28,6 @@ #include <mach/board.h> #include <mach/msm_iomap.h> -void __iomem *gic_cpu_base_addr; - unsigned long clk_get_max_axi_khz(void) { return 0; @@ -44,9 +42,8 @@ static void __init msm8x60_init_irq(void) { unsigned int i; - gic_dist_init(0, MSM_QGIC_DIST_BASE, GIC_PPI_START); - gic_cpu_base_addr = (void *)MSM_QGIC_CPU_BASE; - gic_cpu_init(0, MSM_QGIC_CPU_BASE); + gic_init(0, GIC_PPI_START, MSM_QGIC_DIST_BASE, + (void *)MSM_QGIC_CPU_BASE); /* Edge trigger PPIs except AVS_SVICINT and AVS_SVICINTSWDONE */ writel(0xFFFFD7FF, MSM_QGIC_DIST_BASE + GIC_DIST_CONFIG + 4); diff --git a/arch/arm/mach-msm/include/mach/smp.h b/arch/arm/mach-msm/include/mach/smp.h index 3ff7bf5e679e..a95f7b9efe31 100644 --- a/arch/arm/mach-msm/include/mach/smp.h +++ b/arch/arm/mach-msm/include/mach/smp.h @@ -31,9 +31,9 @@ #include <asm/hardware/gic.h> -static inline void smp_cross_call(const struct cpumask *mask) +static inline void smp_cross_call(const struct cpumask *mask, int ipi) { - gic_raise_softirq(mask, 1); + gic_raise_softirq(mask, ipi); } #endif diff --git a/arch/arm/mach-mx25/clock.c b/arch/arm/mach-mx25/clock.c index 9e4a5578c2fb..00dcb08019e9 100644 --- a/arch/arm/mach-mx25/clock.c +++ b/arch/arm/mach-mx25/clock.c @@ -21,8 +21,7 @@ #include <linux/list.h> #include <linux/clk.h> #include <linux/io.h> - -#include <asm/clkdev.h> +#include <linux/clkdev.h> #include <mach/clock.h> #include <mach/hardware.h> diff --git a/arch/arm/mach-mx3/clock-imx31.c b/arch/arm/mach-mx3/clock-imx31.c index 109e98f323e0..1cd8b40b7676 100644 --- a/arch/arm/mach-mx3/clock-imx31.c +++ b/arch/arm/mach-mx3/clock-imx31.c @@ -23,8 +23,8 @@ #include <linux/clk.h> #include <linux/err.h> #include <linux/io.h> +#include <linux/clkdev.h> -#include <asm/clkdev.h> #include <asm/div64.h> #include <mach/clock.h> diff --git a/arch/arm/mach-mx3/clock-imx35.c b/arch/arm/mach-mx3/clock-imx35.c index 61e4a318980a..819dd809615a 100644 --- a/arch/arm/mach-mx3/clock-imx35.c +++ b/arch/arm/mach-mx3/clock-imx35.c @@ -21,8 +21,7 @@ #include <linux/list.h> #include <linux/clk.h> #include <linux/io.h> - -#include <asm/clkdev.h> +#include <linux/clkdev.h> #include <mach/clock.h> #include <mach/hardware.h> diff --git a/arch/arm/mach-mx5/clock-mx51.c b/arch/arm/mach-mx5/clock-mx51.c index 8ac36d882927..5975edb47de8 100644 --- a/arch/arm/mach-mx5/clock-mx51.c +++ b/arch/arm/mach-mx5/clock-mx51.c @@ -14,8 +14,8 @@ #include <linux/delay.h> #include <linux/clk.h> #include <linux/io.h> +#include <linux/clkdev.h> -#include <asm/clkdev.h> #include <asm/div64.h> #include <mach/hardware.h> diff --git a/arch/arm/mach-mxc91231/clock.c b/arch/arm/mach-mxc91231/clock.c index 5c85075d8a56..9fab505f1eb1 100644 --- a/arch/arm/mach-mxc91231/clock.c +++ b/arch/arm/mach-mxc91231/clock.c @@ -2,12 +2,12 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/io.h> +#include <linux/clkdev.h> #include <mach/clock.h> #include <mach/hardware.h> #include <mach/common.h> -#include <asm/clkdev.h> #include <asm/bug.h> #include <asm/div64.h> diff --git a/arch/arm/mach-nomadik/clock.c b/arch/arm/mach-nomadik/clock.c index 89f793adf776..48a59f24e10c 100644 --- a/arch/arm/mach-nomadik/clock.c +++ b/arch/arm/mach-nomadik/clock.c @@ -7,7 +7,7 @@ #include <linux/module.h> #include <linux/errno.h> #include <linux/clk.h> -#include <asm/clkdev.h> +#include <linux/clkdev.h> #include "clock.h" /* diff --git a/arch/arm/mach-nuc93x/clock.h b/arch/arm/mach-nuc93x/clock.h index 18e51be4816f..4de1f1da9dc5 100644 --- a/arch/arm/mach-nuc93x/clock.h +++ b/arch/arm/mach-nuc93x/clock.h @@ -10,7 +10,7 @@ * the Free Software Foundation; either version 2 of the License. */ -#include <asm/clkdev.h> +#include <linux/clkdev.h> void nuc93x_clk_enable(struct clk *clk, int enable); void clks_register(struct clk_lookup *clks, size_t num); diff --git a/arch/arm/mach-omap1/clock.c b/arch/arm/mach-omap1/clock.c index b8c7fb9d7921..84ef70476b51 100644 --- a/arch/arm/mach-omap1/clock.c +++ b/arch/arm/mach-omap1/clock.c @@ -17,9 +17,9 @@ #include <linux/err.h> #include <linux/clk.h> #include <linux/io.h> +#include <linux/clkdev.h> #include <asm/mach-types.h> -#include <asm/clkdev.h> #include <plat/cpu.h> #include <plat/usb.h> diff --git a/arch/arm/mach-omap2/dpll3xxx.c b/arch/arm/mach-omap2/dpll3xxx.c index ed8d330522f1..ebb888f59365 100644 --- a/arch/arm/mach-omap2/dpll3xxx.c +++ b/arch/arm/mach-omap2/dpll3xxx.c @@ -26,10 +26,10 @@ #include <linux/clk.h> #include <linux/io.h> #include <linux/bitops.h> +#include <linux/clkdev.h> #include <plat/cpu.h> #include <plat/clock.h> -#include <asm/clkdev.h> #include "clock.h" #include "prm.h" diff --git a/arch/arm/mach-omap2/include/mach/entry-macro.S b/arch/arm/mach-omap2/include/mach/entry-macro.S index 06e64e1fc28a..d54c4f89a8bd 100644 --- a/arch/arm/mach-omap2/include/mach/entry-macro.S +++ b/arch/arm/mach-omap2/include/mach/entry-macro.S @@ -105,6 +105,35 @@ omap_irq_base: .word 0 9999: .endm +#ifdef CONFIG_SMP + /* We assume that irqstat (the raw value of the IRQ acknowledge + * register) is preserved from the macro above. + * If there is an IPI, we immediately signal end of interrupt + * on the controller, since this requires the original irqstat + * value which we won't easily be able to recreate later. + */ + + .macro test_for_ipi, irqnr, irqstat, base, tmp + bic \irqnr, \irqstat, #0x1c00 + cmp \irqnr, #16 + it cc + strcc \irqstat, [\base, #GIC_CPU_EOI] + it cs + cmpcs \irqnr, \irqnr + .endm + + /* As above, this assumes that irqstat and base are preserved */ + + .macro test_for_ltirq, irqnr, irqstat, base, tmp + bic \irqnr, \irqstat, #0x1c00 + mov \tmp, #0 + cmp \irqnr, #29 + itt eq + moveq \tmp, #1 + streq \irqstat, [\base, #GIC_CPU_EOI] + cmp \tmp, #0 + .endm +#endif /* CONFIG_SMP */ #else /* MULTI_OMAP2 */ @@ -141,74 +170,16 @@ omap_irq_base: .word 0 #ifdef CONFIG_ARCH_OMAP4 +#define HAVE_GET_IRQNR_PREAMBLE +#include <asm/hardware/entry-macro-gic.S> .macro get_irqnr_preamble, base, tmp ldr \base, =OMAP4_IRQ_BASE .endm - /* - * The interrupt numbering scheme is defined in the - * interrupt controller spec. To wit: - * - * Interrupts 0-15 are IPI - * 16-28 are reserved - * 29-31 are local. We allow 30 to be used for the watchdog. - * 32-1020 are global - * 1021-1022 are reserved - * 1023 is "spurious" (no interrupt) - * - * For now, we ignore all local interrupts so only return an - * interrupt if it's between 30 and 1020. The test_for_ipi - * routine below will pick up on IPIs. - * A simple read from the controller will tell us the number - * of the highest priority enabled interrupt. - * We then just need to check whether it is in the - * valid range for an IRQ (30-1020 inclusive). - */ - .macro get_irqnr_and_base, irqnr, irqstat, base, tmp - ldr \irqstat, [\base, #GIC_CPU_INTACK] - - ldr \tmp, =1021 - - bic \irqnr, \irqstat, #0x1c00 - - cmp \irqnr, #29 - cmpcc \irqnr, \irqnr - cmpne \irqnr, \tmp - cmpcs \irqnr, \irqnr - .endm #endif -#endif /* MULTI_OMAP2 */ - -#ifdef CONFIG_SMP - /* We assume that irqstat (the raw value of the IRQ acknowledge - * register) is preserved from the macro above. - * If there is an IPI, we immediately signal end of interrupt - * on the controller, since this requires the original irqstat - * value which we won't easily be able to recreate later. - */ - .macro test_for_ipi, irqnr, irqstat, base, tmp - bic \irqnr, \irqstat, #0x1c00 - cmp \irqnr, #16 - it cc - strcc \irqstat, [\base, #GIC_CPU_EOI] - it cs - cmpcs \irqnr, \irqnr - .endm - - /* As above, this assumes that irqstat and base are preserved */ - - .macro test_for_ltirq, irqnr, irqstat, base, tmp - bic \irqnr, \irqstat, #0x1c00 - mov \tmp, #0 - cmp \irqnr, #29 - itt eq - moveq \tmp, #1 - streq \irqstat, [\base, #GIC_CPU_EOI] - cmp \tmp, #0 - .endm -#endif /* CONFIG_SMP */ +#endif /* MULTI_OMAP2 */ .macro irq_prio_table .endm diff --git a/arch/arm/mach-omap2/include/mach/omap4-common.h b/arch/arm/mach-omap2/include/mach/omap4-common.h index 2744dfee1ff4..5b0270b28934 100644 --- a/arch/arm/mach-omap2/include/mach/omap4-common.h +++ b/arch/arm/mach-omap2/include/mach/omap4-common.h @@ -24,7 +24,6 @@ extern void __iomem *l2cache_base; #endif -extern void __iomem *gic_cpu_base_addr; extern void __iomem *gic_dist_base_addr; extern void __init gic_init_irq(void); diff --git a/arch/arm/mach-omap2/omap-hotplug.c b/arch/arm/mach-omap2/omap-hotplug.c index 6cee456ca542..4976b9393e49 100644 --- a/arch/arm/mach-omap2/omap-hotplug.c +++ b/arch/arm/mach-omap2/omap-hotplug.c @@ -17,16 +17,13 @@ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/smp.h> -#include <linux/completion.h> #include <asm/cacheflush.h> #include <mach/omap4-common.h> -static DECLARE_COMPLETION(cpu_killed); - int platform_cpu_kill(unsigned int cpu) { - return wait_for_completion_timeout(&cpu_killed, 5000); + return 1; } /* @@ -35,15 +32,6 @@ int platform_cpu_kill(unsigned int cpu) */ void platform_cpu_die(unsigned int cpu) { - unsigned int this_cpu = hard_smp_processor_id(); - - if (cpu != this_cpu) { - pr_crit("platform_cpu_die running on %u, should be %u\n", - this_cpu, cpu); - BUG(); - } - pr_notice("CPU%u: shutdown\n", cpu); - complete(&cpu_killed); flush_cache_all(); dsb(); diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c index 9e9f70e18e3c..b66cfe8bc464 100644 --- a/arch/arm/mach-omap2/omap-smp.c +++ b/arch/arm/mach-omap2/omap-smp.c @@ -21,7 +21,6 @@ #include <linux/io.h> #include <asm/cacheflush.h> -#include <asm/localtimer.h> #include <asm/smp_scu.h> #include <mach/hardware.h> #include <mach/omap4-common.h> @@ -29,28 +28,16 @@ /* SCU base address */ static void __iomem *scu_base; -/* - * Use SCU config register to count number of cores - */ -static inline unsigned int get_core_count(void) -{ - if (scu_base) - return scu_get_core_count(scu_base); - return 1; -} - static DEFINE_SPINLOCK(boot_lock); void __cpuinit platform_secondary_init(unsigned int cpu) { - trace_hardirqs_off(); - /* * If any interrupts are already enabled for the primary * core (e.g. timer irq), then they will not have been enabled * for us: do so */ - gic_cpu_init(0, gic_cpu_base_addr); + gic_secondary_init(0); /* * Synchronise with the boot thread. @@ -76,7 +63,7 @@ int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle) omap_modify_auxcoreboot0(0x200, 0xfffffdff); flush_cache_all(); smp_wmb(); - smp_cross_call(cpumask_of(cpu)); + smp_cross_call(cpumask_of(cpu), 1); /* * Now the secondary core is starting up let it run its @@ -118,25 +105,9 @@ void __init smp_init_cpus(void) scu_base = ioremap(OMAP44XX_SCU_BASE, SZ_256); BUG_ON(!scu_base); - ncores = get_core_count(); - - for (i = 0; i < ncores; i++) - set_cpu_possible(i, true); -} - -void __init smp_prepare_cpus(unsigned int max_cpus) -{ - unsigned int ncores = get_core_count(); - unsigned int cpu = smp_processor_id(); - int i; + ncores = scu_get_core_count(scu_base); /* sanity check */ - if (ncores == 0) { - printk(KERN_ERR - "OMAP4: strange core count of 0? Default to 1\n"); - ncores = 1; - } - if (ncores > NR_CPUS) { printk(KERN_WARNING "OMAP4: no. of cores (%d) greater than configured " @@ -144,13 +115,14 @@ void __init smp_prepare_cpus(unsigned int max_cpus) ncores, NR_CPUS); ncores = NR_CPUS; } - smp_store_cpu_info(cpu); - /* - * are we trying to boot more cores than exist? - */ - if (max_cpus > ncores) - max_cpus = ncores; + for (i = 0; i < ncores; i++) + set_cpu_possible(i, true); +} + +void __init platform_smp_prepare_cpus(unsigned int max_cpus) +{ + int i; /* * Initialise the present map, which describes the set of CPUs @@ -159,18 +131,10 @@ void __init smp_prepare_cpus(unsigned int max_cpus) for (i = 0; i < max_cpus; i++) set_cpu_present(i, true); - if (max_cpus > 1) { - /* - * Enable the local timer or broadcast device for the - * boot CPU, but only if we have more than one CPU. - */ - percpu_timer_setup(); - - /* - * Initialise the SCU and wake up the secondary core using - * wakeup_secondary(). - */ - scu_enable(scu_base); - wakeup_secondary(); - } + /* + * Initialise the SCU and wake up the secondary core using + * wakeup_secondary(). + */ + scu_enable(scu_base); + wakeup_secondary(); } diff --git a/arch/arm/mach-omap2/omap4-common.c b/arch/arm/mach-omap2/omap4-common.c index 2f895553e6a8..666e852988d5 100644 --- a/arch/arm/mach-omap2/omap4-common.c +++ b/arch/arm/mach-omap2/omap4-common.c @@ -26,21 +26,22 @@ void __iomem *l2cache_base; #endif -void __iomem *gic_cpu_base_addr; void __iomem *gic_dist_base_addr; void __init gic_init_irq(void) { + void __iomem *gic_cpu_base; + /* Static mapping, never released */ gic_dist_base_addr = ioremap(OMAP44XX_GIC_DIST_BASE, SZ_4K); BUG_ON(!gic_dist_base_addr); - gic_dist_init(0, gic_dist_base_addr, 29); /* Static mapping, never released */ - gic_cpu_base_addr = ioremap(OMAP44XX_GIC_CPU_BASE, SZ_512); - BUG_ON(!gic_cpu_base_addr); - gic_cpu_init(0, gic_cpu_base_addr); + gic_cpu_base = ioremap(OMAP44XX_GIC_CPU_BASE, SZ_512); + BUG_ON(!gic_cpu_base); + + gic_init(0, 29, gic_dist_base_addr, gic_cpu_base); } #ifdef CONFIG_CACHE_L2X0 diff --git a/arch/arm/mach-orion5x/include/mach/io.h b/arch/arm/mach-orion5x/include/mach/io.h index c47b033bd999..c5196101a237 100644 --- a/arch/arm/mach-orion5x/include/mach/io.h +++ b/arch/arm/mach-orion5x/include/mach/io.h @@ -38,8 +38,8 @@ __arch_iounmap(void __iomem *addr) __iounmap(addr); } -#define __arch_ioremap(p, s, m) __arch_ioremap(p, s, m) -#define __arch_iounmap(a) __arch_iounmap(a) +#define __arch_ioremap __arch_ioremap +#define __arch_iounmap __arch_iounmap #define __io(a) __typesafe_io(a) #define __mem_pci(a) (a) diff --git a/arch/arm/mach-pnx4008/clock.c b/arch/arm/mach-pnx4008/clock.c index 9d1975fa4d9f..a4a3819c96cb 100644 --- a/arch/arm/mach-pnx4008/clock.c +++ b/arch/arm/mach-pnx4008/clock.c @@ -21,8 +21,7 @@ #include <linux/err.h> #include <linux/delay.h> #include <linux/io.h> - -#include <asm/clkdev.h> +#include <linux/clkdev.h> #include <mach/hardware.h> #include <mach/clock.h> diff --git a/arch/arm/mach-pxa/Kconfig b/arch/arm/mach-pxa/Kconfig index dd235ecc9d6c..716a2e159c2c 100644 --- a/arch/arm/mach-pxa/Kconfig +++ b/arch/arm/mach-pxa/Kconfig @@ -94,6 +94,7 @@ config MACH_ARMCORE select PXA27x select IWMMXT select PXA25x + select MIGHT_HAVE_PCI config MACH_EM_X270 bool "CompuLab EM-x270 platform" diff --git a/arch/arm/mach-pxa/clock.c b/arch/arm/mach-pxa/clock.c index abba0089a2ae..4e4a84be96ba 100644 --- a/arch/arm/mach-pxa/clock.c +++ b/arch/arm/mach-pxa/clock.c @@ -11,8 +11,8 @@ #include <linux/spinlock.h> #include <linux/platform_device.h> #include <linux/delay.h> +#include <linux/clkdev.h> -#include <asm/clkdev.h> #include <mach/pxa2xx-regs.h> #include <mach/hardware.h> diff --git a/arch/arm/mach-pxa/clock.h b/arch/arm/mach-pxa/clock.h index d8488742b807..12cc0e87e6c4 100644 --- a/arch/arm/mach-pxa/clock.h +++ b/arch/arm/mach-pxa/clock.h @@ -1,4 +1,4 @@ -#include <asm/clkdev.h> +#include <linux/clkdev.h> struct clkops { void (*enable)(struct clk *); diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c index 07c08151dfe6..c2ea56ff647a 100644 --- a/arch/arm/mach-realview/core.c +++ b/arch/arm/mach-realview/core.c @@ -30,8 +30,8 @@ #include <linux/ata_platform.h> #include <linux/amba/mmci.h> #include <linux/gfp.h> +#include <linux/clkdev.h> -#include <asm/clkdev.h> #include <asm/system.h> #include <mach/hardware.h> #include <asm/irq.h> @@ -47,16 +47,12 @@ #include <asm/hardware/gic.h> -#include <mach/clkdev.h> #include <mach/platform.h> #include <mach/irqs.h> -#include <plat/timer-sp.h> +#include <asm/hardware/timer-sp.h> #include "core.h" -/* used by entry-macro.S and platsmp.c */ -void __iomem *gic_cpu_base_addr; - #ifdef CONFIG_ZONE_DMA /* * Adjust the zones if there are restrictions for DMA access. diff --git a/arch/arm/mach-realview/core.h b/arch/arm/mach-realview/core.h index 781bca68a9fa..693239ddc39e 100644 --- a/arch/arm/mach-realview/core.h +++ b/arch/arm/mach-realview/core.h @@ -53,7 +53,6 @@ extern struct platform_device realview_i2c_device; extern struct mmci_platform_data realview_mmc0_plat_data; extern struct mmci_platform_data realview_mmc1_plat_data; extern struct clcd_board clcd_plat_data; -extern void __iomem *gic_cpu_base_addr; extern void __iomem *timer0_va_base; extern void __iomem *timer1_va_base; extern void __iomem *timer2_va_base; diff --git a/arch/arm/mach-realview/hotplug.c b/arch/arm/mach-realview/hotplug.c index f95521a5e5ce..e06572e6c490 100644 --- a/arch/arm/mach-realview/hotplug.c +++ b/arch/arm/mach-realview/hotplug.c @@ -11,14 +11,11 @@ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/smp.h> -#include <linux/completion.h> #include <asm/cacheflush.h> extern volatile int pen_release; -static DECLARE_COMPLETION(cpu_killed); - static inline void cpu_enter_lowpower(void) { unsigned int v; @@ -95,7 +92,7 @@ static inline void platform_do_lowpower(unsigned int cpu) int platform_cpu_kill(unsigned int cpu) { - return wait_for_completion_timeout(&cpu_killed, 5000); + return 1; } /* @@ -105,19 +102,6 @@ int platform_cpu_kill(unsigned int cpu) */ void platform_cpu_die(unsigned int cpu) { -#ifdef DEBUG - unsigned int this_cpu = hard_smp_processor_id(); - - if (cpu != this_cpu) { - printk(KERN_CRIT "Eek! platform_cpu_die running on %u, should be %u\n", - this_cpu, cpu); - BUG(); - } -#endif - - printk(KERN_NOTICE "CPU%u: shutdown\n", cpu); - complete(&cpu_killed); - /* * we're ready for shutdown now, so do it */ diff --git a/arch/arm/mach-realview/include/mach/entry-macro.S b/arch/arm/mach-realview/include/mach/entry-macro.S index 340a5c276946..4071164aebaa 100644 --- a/arch/arm/mach-realview/include/mach/entry-macro.S +++ b/arch/arm/mach-realview/include/mach/entry-macro.S @@ -8,74 +8,11 @@ * warranty of any kind, whether express or implied. */ #include <mach/hardware.h> -#include <asm/hardware/gic.h> +#include <asm/hardware/entry-macro-gic.S> .macro disable_fiq .endm - .macro get_irqnr_preamble, base, tmp - ldr \base, =gic_cpu_base_addr - ldr \base, [\base] - .endm - .macro arch_ret_to_user, tmp1, tmp2 .endm - /* - * The interrupt numbering scheme is defined in the - * interrupt controller spec. To wit: - * - * Interrupts 0-15 are IPI - * 16-28 are reserved - * 29-31 are local. We allow 30 to be used for the watchdog. - * 32-1020 are global - * 1021-1022 are reserved - * 1023 is "spurious" (no interrupt) - * - * For now, we ignore all local interrupts so only return an interrupt if it's - * between 30 and 1020. The test_for_ipi routine below will pick up on IPIs. - * - * A simple read from the controller will tell us the number of the highest - * priority enabled interrupt. We then just need to check whether it is in the - * valid range for an IRQ (30-1020 inclusive). - */ - - .macro get_irqnr_and_base, irqnr, irqstat, base, tmp - - ldr \irqstat, [\base, #GIC_CPU_INTACK] /* bits 12-10 = src CPU, 9-0 = int # */ - - ldr \tmp, =1021 - - bic \irqnr, \irqstat, #0x1c00 - - cmp \irqnr, #29 - cmpcc \irqnr, \irqnr - cmpne \irqnr, \tmp - cmpcs \irqnr, \irqnr - - .endm - - /* We assume that irqstat (the raw value of the IRQ acknowledge - * register) is preserved from the macro above. - * If there is an IPI, we immediately signal end of interrupt on the - * controller, since this requires the original irqstat value which - * we won't easily be able to recreate later. - */ - - .macro test_for_ipi, irqnr, irqstat, base, tmp - bic \irqnr, \irqstat, #0x1c00 - cmp \irqnr, #16 - strcc \irqstat, [\base, #GIC_CPU_EOI] - cmpcs \irqnr, \irqnr - .endm - - /* As above, this assumes that irqstat and base are preserved.. */ - - .macro test_for_ltirq, irqnr, irqstat, base, tmp - bic \irqnr, \irqstat, #0x1c00 - mov \tmp, #0 - cmp \irqnr, #29 - moveq \tmp, #1 - streq \irqstat, [\base, #GIC_CPU_EOI] - cmp \tmp, #0 - .endm diff --git a/arch/arm/mach-realview/include/mach/smp.h b/arch/arm/mach-realview/include/mach/smp.h index d3cd265cb058..c8221b38ee7c 100644 --- a/arch/arm/mach-realview/include/mach/smp.h +++ b/arch/arm/mach-realview/include/mach/smp.h @@ -2,14 +2,13 @@ #define ASMARM_ARCH_SMP_H #include <asm/hardware/gic.h> -#include <asm/smp_mpidr.h> /* * We use IRQ1 as the IPI */ -static inline void smp_cross_call(const struct cpumask *mask) +static inline void smp_cross_call(const struct cpumask *mask, int ipi) { - gic_raise_softirq(mask, 1); + gic_raise_softirq(mask, ipi); } #endif diff --git a/arch/arm/mach-realview/platsmp.c b/arch/arm/mach-realview/platsmp.c index 009265818d55..dc7c912e8fac 100644 --- a/arch/arm/mach-realview/platsmp.c +++ b/arch/arm/mach-realview/platsmp.c @@ -19,7 +19,6 @@ #include <asm/cacheflush.h> #include <mach/hardware.h> #include <asm/mach-types.h> -#include <asm/localtimer.h> #include <asm/unified.h> #include <mach/board-eb.h> @@ -50,26 +49,16 @@ static void __iomem *scu_base_addr(void) return (void __iomem *)0; } -static inline unsigned int get_core_count(void) -{ - void __iomem *scu_base = scu_base_addr(); - if (scu_base) - return scu_get_core_count(scu_base); - return 1; -} - static DEFINE_SPINLOCK(boot_lock); void __cpuinit platform_secondary_init(unsigned int cpu) { - trace_hardirqs_off(); - /* * if any interrupts are already enabled for the primary * core (e.g. timer irq), then they will not have been enabled * for us: do so */ - gic_cpu_init(0, gic_cpu_base_addr); + gic_secondary_init(0); /* * let the primary processor know we're out of the @@ -107,16 +96,11 @@ int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle) flush_cache_all(); /* - * XXX - * - * This is a later addition to the booting protocol: the - * bootMonitor now puts secondary cores into WFI, so - * poke_milo() no longer gets the cores moving; we need - * to send a soft interrupt to wake the secondary core. - * Use smp_cross_call() for this, since there's little - * point duplicating the code here + * Send the secondary CPU a soft interrupt, thereby causing + * the boot monitor to read the system wide flags register, + * and branch to the address found there. */ - smp_cross_call(cpumask_of(cpu)); + smp_cross_call(cpumask_of(cpu), 1); timeout = jiffies + (1 * HZ); while (time_before(jiffies, timeout)) { @@ -136,48 +120,18 @@ int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle) return pen_release != -1 ? -ENOSYS : 0; } -static void __init poke_milo(void) -{ - /* nobody is to be released from the pen yet */ - pen_release = -1; - - /* - * Write the address of secondary startup into the system-wide flags - * register. The BootMonitor waits for this register to become - * non-zero. - */ - __raw_writel(BSYM(virt_to_phys(realview_secondary_startup)), - __io_address(REALVIEW_SYS_FLAGSSET)); - - mb(); -} - /* * Initialise the CPU possible map early - this describes the CPUs * which may be present or become present in the system. */ void __init smp_init_cpus(void) { - unsigned int i, ncores = get_core_count(); - - for (i = 0; i < ncores; i++) - set_cpu_possible(i, true); -} + void __iomem *scu_base = scu_base_addr(); + unsigned int i, ncores; -void __init smp_prepare_cpus(unsigned int max_cpus) -{ - unsigned int ncores = get_core_count(); - unsigned int cpu = smp_processor_id(); - int i; + ncores = scu_base ? scu_get_core_count(scu_base) : 1; /* sanity check */ - if (ncores == 0) { - printk(KERN_ERR - "Realview: strange CM count of 0? Default to 1\n"); - - ncores = 1; - } - if (ncores > NR_CPUS) { printk(KERN_WARNING "Realview: no. of cores (%d) greater than configured " @@ -186,13 +140,13 @@ void __init smp_prepare_cpus(unsigned int max_cpus) ncores = NR_CPUS; } - smp_store_cpu_info(cpu); + for (i = 0; i < ncores; i++) + set_cpu_possible(i, true); +} - /* - * are we trying to boot more cores than exist? - */ - if (max_cpus > ncores) - max_cpus = ncores; +void __init platform_smp_prepare_cpus(unsigned int max_cpus) +{ + int i; /* * Initialise the present map, which describes the set of CPUs @@ -201,21 +155,14 @@ void __init smp_prepare_cpus(unsigned int max_cpus) for (i = 0; i < max_cpus; i++) set_cpu_present(i, true); + scu_enable(scu_base_addr()); + /* - * Initialise the SCU if there are more than one CPU and let - * them know where to start. Note that, on modern versions of - * MILO, the "poke" doesn't actually do anything until each - * individual core is sent a soft interrupt to get it out of - * WFI + * Write the address of secondary startup into the + * system-wide flags register. The BootMonitor waits + * until it receives a soft interrupt, and then the + * secondary CPU branches to this address. */ - if (max_cpus > 1) { - /* - * Enable the local timer or broadcast device for the - * boot CPU, but only if we have more than one CPU. - */ - percpu_timer_setup(); - - scu_enable(scu_base_addr()); - poke_milo(); - } + __raw_writel(BSYM(virt_to_phys(realview_secondary_startup)), + __io_address(REALVIEW_SYS_FLAGSSET)); } diff --git a/arch/arm/mach-realview/realview_eb.c b/arch/arm/mach-realview/realview_eb.c index f2697106f809..6ef5c5e528b2 100644 --- a/arch/arm/mach-realview/realview_eb.c +++ b/arch/arm/mach-realview/realview_eb.c @@ -364,21 +364,19 @@ static void __init gic_init_irq(void) writel(0x00000000, __io_address(REALVIEW_SYS_LOCK)); /* core tile GIC, primary */ - gic_cpu_base_addr = __io_address(REALVIEW_EB11MP_GIC_CPU_BASE); - gic_dist_init(0, __io_address(REALVIEW_EB11MP_GIC_DIST_BASE), 29); - gic_cpu_init(0, gic_cpu_base_addr); + gic_init(0, 29, __io_address(REALVIEW_EB11MP_GIC_DIST_BASE), + __io_address(REALVIEW_EB11MP_GIC_CPU_BASE)); #ifndef CONFIG_REALVIEW_EB_ARM11MP_REVB /* board GIC, secondary */ - gic_dist_init(1, __io_address(REALVIEW_EB_GIC_DIST_BASE), 64); - gic_cpu_init(1, __io_address(REALVIEW_EB_GIC_CPU_BASE)); + gic_init(1, 64, __io_address(REALVIEW_EB_GIC_DIST_BASE), + __io_address(REALVIEW_EB_GIC_CPU_BASE)); gic_cascade_irq(1, IRQ_EB11MP_EB_IRQ1); #endif } else { /* board GIC, primary */ - gic_cpu_base_addr = __io_address(REALVIEW_EB_GIC_CPU_BASE); - gic_dist_init(0, __io_address(REALVIEW_EB_GIC_DIST_BASE), 29); - gic_cpu_init(0, gic_cpu_base_addr); + gic_init(0, 29, __io_address(REALVIEW_EB_GIC_DIST_BASE), + __io_address(REALVIEW_EB_GIC_CPU_BASE)); } } diff --git a/arch/arm/mach-realview/realview_pb1176.c b/arch/arm/mach-realview/realview_pb1176.c index a4125619d71b..cbdc97a5685f 100644 --- a/arch/arm/mach-realview/realview_pb1176.c +++ b/arch/arm/mach-realview/realview_pb1176.c @@ -304,13 +304,14 @@ static struct platform_device char_lcd_device = { static void __init gic_init_irq(void) { /* ARM1176 DevChip GIC, primary */ - gic_cpu_base_addr = __io_address(REALVIEW_DC1176_GIC_CPU_BASE); - gic_dist_init(0, __io_address(REALVIEW_DC1176_GIC_DIST_BASE), IRQ_DC1176_GIC_START); - gic_cpu_init(0, gic_cpu_base_addr); + gic_init(0, IRQ_DC1176_GIC_START, + __io_address(REALVIEW_DC1176_GIC_DIST_BASE), + __io_address(REALVIEW_DC1176_GIC_CPU_BASE)); /* board GIC, secondary */ - gic_dist_init(1, __io_address(REALVIEW_PB1176_GIC_DIST_BASE), IRQ_PB1176_GIC_START); - gic_cpu_init(1, __io_address(REALVIEW_PB1176_GIC_CPU_BASE)); + gic_init(1, IRQ_PB1176_GIC_START, + __io_address(REALVIEW_PB1176_GIC_DIST_BASE), + __io_address(REALVIEW_PB1176_GIC_CPU_BASE)); gic_cascade_irq(1, IRQ_DC1176_PB_IRQ1); } diff --git a/arch/arm/mach-realview/realview_pb11mp.c b/arch/arm/mach-realview/realview_pb11mp.c index 117b95b2ca15..8e8ab7d29a6a 100644 --- a/arch/arm/mach-realview/realview_pb11mp.c +++ b/arch/arm/mach-realview/realview_pb11mp.c @@ -309,13 +309,13 @@ static void __init gic_init_irq(void) writel(0x00000000, __io_address(REALVIEW_SYS_LOCK)); /* ARM11MPCore test chip GIC, primary */ - gic_cpu_base_addr = __io_address(REALVIEW_TC11MP_GIC_CPU_BASE); - gic_dist_init(0, __io_address(REALVIEW_TC11MP_GIC_DIST_BASE), 29); - gic_cpu_init(0, gic_cpu_base_addr); + gic_init(0, 29, __io_address(REALVIEW_TC11MP_GIC_DIST_BASE), + __io_address(REALVIEW_TC11MP_GIC_CPU_BASE)); /* board GIC, secondary */ - gic_dist_init(1, __io_address(REALVIEW_PB11MP_GIC_DIST_BASE), IRQ_PB11MP_GIC_START); - gic_cpu_init(1, __io_address(REALVIEW_PB11MP_GIC_CPU_BASE)); + gic_init(1, IRQ_PB11MP_GIC_START, + __io_address(REALVIEW_PB11MP_GIC_DIST_BASE), + __io_address(REALVIEW_PB11MP_GIC_CPU_BASE)); gic_cascade_irq(1, IRQ_TC11MP_PB_IRQ1); } diff --git a/arch/arm/mach-realview/realview_pba8.c b/arch/arm/mach-realview/realview_pba8.c index 929b8dc12e81..841118e3e118 100644 --- a/arch/arm/mach-realview/realview_pba8.c +++ b/arch/arm/mach-realview/realview_pba8.c @@ -273,9 +273,9 @@ static struct platform_device pmu_device = { static void __init gic_init_irq(void) { /* ARM PB-A8 on-board GIC */ - gic_cpu_base_addr = __io_address(REALVIEW_PBA8_GIC_CPU_BASE); - gic_dist_init(0, __io_address(REALVIEW_PBA8_GIC_DIST_BASE), IRQ_PBA8_GIC_START); - gic_cpu_init(0, __io_address(REALVIEW_PBA8_GIC_CPU_BASE)); + gic_init(0, IRQ_PBA8_GIC_START, + __io_address(REALVIEW_PBA8_GIC_DIST_BASE), + __io_address(REALVIEW_PBA8_GIC_CPU_BASE)); } static void __init realview_pba8_timer_init(void) diff --git a/arch/arm/mach-realview/realview_pbx.c b/arch/arm/mach-realview/realview_pbx.c index b9f9e20031a7..02b755b009db 100644 --- a/arch/arm/mach-realview/realview_pbx.c +++ b/arch/arm/mach-realview/realview_pbx.c @@ -313,15 +313,12 @@ static void __init gic_init_irq(void) { /* ARM PBX on-board GIC */ if (core_tile_pbx11mp() || core_tile_pbxa9mp()) { - gic_cpu_base_addr = __io_address(REALVIEW_PBX_TILE_GIC_CPU_BASE); - gic_dist_init(0, __io_address(REALVIEW_PBX_TILE_GIC_DIST_BASE), - 29); - gic_cpu_init(0, __io_address(REALVIEW_PBX_TILE_GIC_CPU_BASE)); + gic_init(0, 29, __io_address(REALVIEW_PBX_TILE_GIC_DIST_BASE), + __io_address(REALVIEW_PBX_TILE_GIC_CPU_BASE)); } else { - gic_cpu_base_addr = __io_address(REALVIEW_PBX_GIC_CPU_BASE); - gic_dist_init(0, __io_address(REALVIEW_PBX_GIC_DIST_BASE), - IRQ_PBX_GIC_START); - gic_cpu_init(0, __io_address(REALVIEW_PBX_GIC_CPU_BASE)); + gic_init(0, IRQ_PBX_GIC_START, + __io_address(REALVIEW_PBX_GIC_DIST_BASE), + __io_address(REALVIEW_PBX_GIC_CPU_BASE)); } } diff --git a/arch/arm/mach-s3c2412/Kconfig b/arch/arm/mach-s3c2412/Kconfig index fa2e5bffbb8e..3a8a1bfd7740 100644 --- a/arch/arm/mach-s3c2412/Kconfig +++ b/arch/arm/mach-s3c2412/Kconfig @@ -52,7 +52,7 @@ config MACH_JIVE Say Y here if you are using the Logitech Jive. config MACH_JIVE_SHOW_BOOTLOADER - bool "Allow access to bootloader partitions in MTD" + bool "Allow access to bootloader partitions in MTD (EXPERIMENTAL)" depends on MACH_JIVE && EXPERIMENTAL config MACH_SMDK2413 diff --git a/arch/arm/mach-s5pv310/cpu.c b/arch/arm/mach-s5pv310/cpu.c index 82ce4aa6d61a..72ab289e7816 100644 --- a/arch/arm/mach-s5pv310/cpu.c +++ b/arch/arm/mach-s5pv310/cpu.c @@ -24,8 +24,6 @@ #include <mach/regs-irq.h> -void __iomem *gic_cpu_base_addr; - extern int combiner_init(unsigned int combiner_nr, void __iomem *base, unsigned int irq_start); extern void combiner_cascade_irq(unsigned int combiner_nr, unsigned int irq); @@ -122,9 +120,7 @@ void __init s5pv310_init_irq(void) { int irq; - gic_cpu_base_addr = S5P_VA_GIC_CPU; - gic_dist_init(0, S5P_VA_GIC_DIST, IRQ_LOCALTIMER); - gic_cpu_init(0, S5P_VA_GIC_CPU); + gic_init(0, IRQ_LOCALTIMER, S5P_VA_GIC_DIST, S5P_VA_GIC_CPU); for (irq = 0; irq < MAX_COMBINER_NR; irq++) { combiner_init(irq, (void __iomem *)S5P_VA_COMBINER(irq), diff --git a/arch/arm/mach-s5pv310/hotplug.c b/arch/arm/mach-s5pv310/hotplug.c index 03652c3605f6..ea951ef6ea54 100644 --- a/arch/arm/mach-s5pv310/hotplug.c +++ b/arch/arm/mach-s5pv310/hotplug.c @@ -13,14 +13,11 @@ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/smp.h> -#include <linux/completion.h> #include <asm/cacheflush.h> extern volatile int pen_release; -static DECLARE_COMPLETION(cpu_killed); - static inline void cpu_enter_lowpower(void) { unsigned int v; @@ -98,7 +95,7 @@ static inline void platform_do_lowpower(unsigned int cpu) int platform_cpu_kill(unsigned int cpu) { - return wait_for_completion_timeout(&cpu_killed, 5000); + return 1; } /* @@ -108,19 +105,6 @@ int platform_cpu_kill(unsigned int cpu) */ void platform_cpu_die(unsigned int cpu) { -#ifdef DEBUG - unsigned int this_cpu = hard_smp_processor_id(); - - if (cpu != this_cpu) { - printk(KERN_CRIT "Eek! platform_cpu_die running on %u, should be %u\n", - this_cpu, cpu); - BUG(); - } -#endif - - printk(KERN_NOTICE "CPU%u: shutdown\n", cpu); - complete(&cpu_killed); - /* * we're ready for shutdown now, so do it */ diff --git a/arch/arm/mach-s5pv310/include/mach/smp.h b/arch/arm/mach-s5pv310/include/mach/smp.h index b7ec252384f4..393ccbd52c4a 100644 --- a/arch/arm/mach-s5pv310/include/mach/smp.h +++ b/arch/arm/mach-s5pv310/include/mach/smp.h @@ -7,16 +7,13 @@ #define ASM_ARCH_SMP_H __FILE__ #include <asm/hardware/gic.h> -#include <asm/smp_mpidr.h> - -extern void __iomem *gic_cpu_base_addr; /* * We use IRQ1 as the IPI */ -static inline void smp_cross_call(const struct cpumask *mask) +static inline void smp_cross_call(const struct cpumask *mask, int ipi) { - gic_raise_softirq(mask, 1); + gic_raise_softirq(mask, ipi); } #endif diff --git a/arch/arm/mach-s5pv310/platsmp.c b/arch/arm/mach-s5pv310/platsmp.c index d357c198edee..307ee7b72763 100644 --- a/arch/arm/mach-s5pv310/platsmp.c +++ b/arch/arm/mach-s5pv310/platsmp.c @@ -22,7 +22,6 @@ #include <linux/io.h> #include <asm/cacheflush.h> -#include <asm/localtimer.h> #include <asm/smp_scu.h> #include <asm/unified.h> @@ -47,14 +46,12 @@ static DEFINE_SPINLOCK(boot_lock); void __cpuinit platform_secondary_init(unsigned int cpu) { - trace_hardirqs_off(); - /* * if any interrupts are already enabled for the primary * core (e.g. timer irq), then they will not have been enabled * for us: do so */ - gic_cpu_init(0, gic_cpu_base_addr); + gic_secondary_init(0); /* * let the primary processor know we're out of the @@ -97,7 +94,7 @@ int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle) * the boot monitor to read the system wide flags register, * and branch to the address found there. */ - smp_cross_call(cpumask_of(cpu)); + smp_cross_call(cpumask_of(cpu), 1); timeout = jiffies + (1 * HZ); while (time_before(jiffies, timeout)) { @@ -130,13 +127,6 @@ void __init smp_init_cpus(void) ncores = scu_base ? scu_get_core_count(scu_base) : 1; /* sanity check */ - if (ncores == 0) { - printk(KERN_ERR - "S5PV310: strange CM count of 0? Default to 1\n"); - - ncores = 1; - } - if (ncores > NR_CPUS) { printk(KERN_WARNING "S5PV310: no. of cores (%d) greater than configured " @@ -149,18 +139,10 @@ void __init smp_init_cpus(void) set_cpu_possible(i, true); } -void __init smp_prepare_cpus(unsigned int max_cpus) +void __init platform_smp_prepare_cpus(unsigned int max_cpus) { - unsigned int ncores = num_possible_cpus(); - unsigned int cpu = smp_processor_id(); int i; - smp_store_cpu_info(cpu); - - /* are we trying to boot more cores than exist? */ - if (max_cpus > ncores) - max_cpus = ncores; - /* * Initialise the present map, which describes the set of CPUs * actually populated at the present time. @@ -168,25 +150,13 @@ void __init smp_prepare_cpus(unsigned int max_cpus) for (i = 0; i < max_cpus; i++) set_cpu_present(i, true); + scu_enable(scu_base_addr()); + /* - * Initialise the SCU if there are more than one CPU and let - * them know where to start. + * Write the address of secondary startup into the + * system-wide flags register. The boot monitor waits + * until it receives a soft interrupt, and then the + * secondary CPU branches to this address. */ - if (max_cpus > 1) { - /* - * Enable the local timer or broadcast device for the - * boot CPU, but only if we have more than one CPU. - */ - percpu_timer_setup(); - - scu_enable(scu_base_addr()); - - /* - * Write the address of secondary startup into the - * system-wide flags register. The boot monitor waits - * until it receives a soft interrupt, and then the - * secondary CPU branches to this address. - */ __raw_writel(BSYM(virt_to_phys(s5pv310_secondary_startup)), S5P_VA_SYSRAM); - } } diff --git a/arch/arm/mach-sa1100/cpu-sa1100.c b/arch/arm/mach-sa1100/cpu-sa1100.c index 96f7dc103b59..07d4e8ba3719 100644 --- a/arch/arm/mach-sa1100/cpu-sa1100.c +++ b/arch/arm/mach-sa1100/cpu-sa1100.c @@ -94,48 +94,47 @@ #include "generic.h" -typedef struct { +struct sa1100_dram_regs { int speed; u32 mdcnfg; u32 mdcas0; u32 mdcas1; u32 mdcas2; -} sa1100_dram_regs_t; +}; static struct cpufreq_driver sa1100_driver; -static sa1100_dram_regs_t sa1100_dram_settings[] = -{ - /* speed, mdcnfg, mdcas0, mdcas1, mdcas2 clock frequency */ - { 59000, 0x00dc88a3, 0xcccccccf, 0xfffffffc, 0xffffffff }, /* 59.0 MHz */ - { 73700, 0x011490a3, 0xcccccccf, 0xfffffffc, 0xffffffff }, /* 73.7 MHz */ - { 88500, 0x014e90a3, 0xcccccccf, 0xfffffffc, 0xffffffff }, /* 88.5 MHz */ - { 103200, 0x01889923, 0xcccccccf, 0xfffffffc, 0xffffffff }, /* 103.2 MHz */ - { 118000, 0x01c29923, 0x9999998f, 0xfffffff9, 0xffffffff }, /* 118.0 MHz */ - { 132700, 0x01fb2123, 0x9999998f, 0xfffffff9, 0xffffffff }, /* 132.7 MHz */ - { 147500, 0x02352123, 0x3333330f, 0xfffffff3, 0xffffffff }, /* 147.5 MHz */ - { 162200, 0x026b29a3, 0x38e38e1f, 0xfff8e38e, 0xffffffff }, /* 162.2 MHz */ - { 176900, 0x02a329a3, 0x71c71c1f, 0xfff1c71c, 0xffffffff }, /* 176.9 MHz */ - { 191700, 0x02dd31a3, 0xe38e383f, 0xffe38e38, 0xffffffff }, /* 191.7 MHz */ - { 206400, 0x03153223, 0xc71c703f, 0xffc71c71, 0xffffffff }, /* 206.4 MHz */ - { 221200, 0x034fba23, 0xc71c703f, 0xffc71c71, 0xffffffff }, /* 221.2 MHz */ - { 235900, 0x03853a23, 0xe1e1e07f, 0xe1e1e1e1, 0xffffffe1 }, /* 235.9 MHz */ - { 250700, 0x03bf3aa3, 0xc3c3c07f, 0xc3c3c3c3, 0xffffffc3 }, /* 250.7 MHz */ - { 265400, 0x03f7c2a3, 0xc3c3c07f, 0xc3c3c3c3, 0xffffffc3 }, /* 265.4 MHz */ - { 280200, 0x0431c2a3, 0x878780ff, 0x87878787, 0xffffff87 }, /* 280.2 MHz */ +static struct sa1100_dram_regs sa1100_dram_settings[] = { + /*speed, mdcnfg, mdcas0, mdcas1, mdcas2, clock freq */ + { 59000, 0x00dc88a3, 0xcccccccf, 0xfffffffc, 0xffffffff},/* 59.0 MHz */ + { 73700, 0x011490a3, 0xcccccccf, 0xfffffffc, 0xffffffff},/* 73.7 MHz */ + { 88500, 0x014e90a3, 0xcccccccf, 0xfffffffc, 0xffffffff},/* 88.5 MHz */ + {103200, 0x01889923, 0xcccccccf, 0xfffffffc, 0xffffffff},/* 103.2 MHz */ + {118000, 0x01c29923, 0x9999998f, 0xfffffff9, 0xffffffff},/* 118.0 MHz */ + {132700, 0x01fb2123, 0x9999998f, 0xfffffff9, 0xffffffff},/* 132.7 MHz */ + {147500, 0x02352123, 0x3333330f, 0xfffffff3, 0xffffffff},/* 147.5 MHz */ + {162200, 0x026b29a3, 0x38e38e1f, 0xfff8e38e, 0xffffffff},/* 162.2 MHz */ + {176900, 0x02a329a3, 0x71c71c1f, 0xfff1c71c, 0xffffffff},/* 176.9 MHz */ + {191700, 0x02dd31a3, 0xe38e383f, 0xffe38e38, 0xffffffff},/* 191.7 MHz */ + {206400, 0x03153223, 0xc71c703f, 0xffc71c71, 0xffffffff},/* 206.4 MHz */ + {221200, 0x034fba23, 0xc71c703f, 0xffc71c71, 0xffffffff},/* 221.2 MHz */ + {235900, 0x03853a23, 0xe1e1e07f, 0xe1e1e1e1, 0xffffffe1},/* 235.9 MHz */ + {250700, 0x03bf3aa3, 0xc3c3c07f, 0xc3c3c3c3, 0xffffffc3},/* 250.7 MHz */ + {265400, 0x03f7c2a3, 0xc3c3c07f, 0xc3c3c3c3, 0xffffffc3},/* 265.4 MHz */ + {280200, 0x0431c2a3, 0x878780ff, 0x87878787, 0xffffff87},/* 280.2 MHz */ { 0, 0, 0, 0, 0 } /* last entry */ }; static void sa1100_update_dram_timings(int current_speed, int new_speed) { - sa1100_dram_regs_t *settings = sa1100_dram_settings; + struct sa1100_dram_regs *settings = sa1100_dram_settings; /* find speed */ while (settings->speed != 0) { - if(new_speed == settings->speed) + if (new_speed == settings->speed) break; - + settings++; } @@ -149,7 +148,7 @@ static void sa1100_update_dram_timings(int current_speed, int new_speed) /* We're going FASTER, so first relax the memory * timings before changing the core frequency */ - + /* Half the memory access clock */ MDCNFG |= MDCNFG_CDB2; @@ -187,7 +186,7 @@ static int sa1100_target(struct cpufreq_policy *policy, struct cpufreq_freqs freqs; new_ppcr = sa11x0_freq_to_ppcr(target_freq); - switch(relation){ + switch (relation) { case CPUFREQ_RELATION_L: if (sa11x0_ppcr_to_freq(new_ppcr) > policy->max) new_ppcr--; diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig index 51dcd59eda6a..632933357242 100644 --- a/arch/arm/mach-shmobile/Kconfig +++ b/arch/arm/mach-shmobile/Kconfig @@ -6,7 +6,7 @@ config ARCH_SH7367 bool "SH-Mobile G3 (SH7367)" select CPU_V6 select HAVE_CLK - select COMMON_CLKDEV + select CLKDEV_LOOKUP select SH_CLK_CPG select GENERIC_CLOCKEVENTS @@ -14,7 +14,7 @@ config ARCH_SH7377 bool "SH-Mobile G4 (SH7377)" select CPU_V7 select HAVE_CLK - select COMMON_CLKDEV + select CLKDEV_LOOKUP select SH_CLK_CPG select GENERIC_CLOCKEVENTS @@ -22,7 +22,7 @@ config ARCH_SH7372 bool "SH-Mobile AP4 (SH7372)" select CPU_V7 select HAVE_CLK - select COMMON_CLKDEV + select CLKDEV_LOOKUP select SH_CLK_CPG select GENERIC_CLOCKEVENTS diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c index d440e5f456ad..ac429ff2c20d 100644 --- a/arch/arm/mach-shmobile/board-ap4evb.c +++ b/arch/arm/mach-shmobile/board-ap4evb.c @@ -61,6 +61,7 @@ #include <asm/mach/arch.h> #include <asm/mach/map.h> #include <asm/mach/time.h> +#include <asm/setup.h> /* * Address Interface BusWidth note diff --git a/arch/arm/mach-shmobile/clock-sh7367.c b/arch/arm/mach-shmobile/clock-sh7367.c index 9f78729098f2..6b186aefcbd6 100644 --- a/arch/arm/mach-shmobile/clock-sh7367.c +++ b/arch/arm/mach-shmobile/clock-sh7367.c @@ -20,8 +20,8 @@ #include <linux/kernel.h> #include <linux/io.h> #include <linux/sh_clk.h> +#include <linux/clkdev.h> #include <mach/common.h> -#include <asm/clkdev.h> /* SH7367 registers */ #define RTFRQCR 0xe6150000 diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c index 3aa026069435..d98deb497c2f 100644 --- a/arch/arm/mach-shmobile/clock-sh7372.c +++ b/arch/arm/mach-shmobile/clock-sh7372.c @@ -20,8 +20,8 @@ #include <linux/kernel.h> #include <linux/io.h> #include <linux/sh_clk.h> +#include <linux/clkdev.h> #include <mach/common.h> -#include <asm/clkdev.h> /* SH7372 registers */ #define FRQCRA 0xe6150000 diff --git a/arch/arm/mach-shmobile/clock-sh7377.c b/arch/arm/mach-shmobile/clock-sh7377.c index f91395aeb9ab..95942466e63f 100644 --- a/arch/arm/mach-shmobile/clock-sh7377.c +++ b/arch/arm/mach-shmobile/clock-sh7377.c @@ -20,8 +20,8 @@ #include <linux/kernel.h> #include <linux/io.h> #include <linux/sh_clk.h> +#include <linux/clkdev.h> #include <mach/common.h> -#include <asm/clkdev.h> /* SH7377 registers */ #define RTFRQCR 0xe6150000 diff --git a/arch/arm/mach-shmobile/include/mach/head-ap4evb.txt b/arch/arm/mach-shmobile/include/mach/head-ap4evb.txt new file mode 100644 index 000000000000..e3ebfa73956e --- /dev/null +++ b/arch/arm/mach-shmobile/include/mach/head-ap4evb.txt @@ -0,0 +1,87 @@ +LIST "partner-jet-setup.txt" +LIST "(C) Copyright 2010 Renesas Solutions Corp" +LIST "Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>" + +LIST "RWT Setting" +EW 0xE6020004, 0xA500 +EW 0xE6030004, 0xA500 + +DD 0x01001000, 0x01001000 + +LIST "GPIO Setting" +EB 0xE6051013, 0xA2 + +LIST "CPG" +ED 0xE6150080, 0x00000180 +ED 0xE61500C0, 0x00000002 + +WAIT 1, 0xFE40009C + +LIST "FRQCR" +ED 0xE6150000, 0x2D1305C3 +ED 0xE61500E0, 0x9E40358E +ED 0xE6150004, 0x80331050 + +WAIT 1, 0xFE40009C + +ED 0xE61500E4, 0x00002000 + +WAIT 1, 0xFE40009C + +LIST "PLL" +ED 0xE6150028, 0x00004000 + +WAIT 1, 0xFE40009C + +ED 0xE615002C, 0x93000040 + +WAIT 1, 0xFE40009C + +LIST "BSC" +ED 0xFEC10000, 0x00E0001B + +LIST "SBSC1" +ED 0xFE400354, 0x01AD8000 +ED 0xFE400354, 0x01AD8001 + +WAIT 5, 0xFE40009C + +ED 0xFE400008, 0xBCC90151 +ED 0xFE400040, 0x41774113 +ED 0xFE400044, 0x2712E229 +ED 0xFE400048, 0x20C18505 +ED 0xFE40004C, 0x00110209 +ED 0xFE400010, 0x00000087 + +WAIT 10, 0xFE40009C + +ED 0xFE400084, 0x0000003F +EB 0xFE500000, 0x00 + +WAIT 5, 0xFE40009C + +ED 0xFE400084, 0x0000FF0A +EB 0xFE500000, 0x00 + +WAIT 1, 0xFE40009C + +ED 0xFE400084, 0x00002201 +EB 0xFE500000, 0x00 +ED 0xFE400084, 0x00000302 +EB 0xFE500000, 0x00 +EB 0xFE5C0000, 0x00 +ED 0xFE400008, 0xBCC90159 +ED 0xFE40008C, 0x88800004 +ED 0xFE400094, 0x00000004 +ED 0xFE400028, 0xA55A0032 +ED 0xFE40002C, 0xA55A000C +ED 0xFE400020, 0xA55A2048 +ED 0xFE400008, 0xBCC90959 + +LIST "Change CPGA setting" +ED 0xE61500E0, 0x9E40352E +ED 0xE6150004, 0x80331050 + +WAIT 1, 0xFE40009C + +ED 0xE6150354, 0x00000002 diff --git a/arch/arm/mach-shmobile/include/mach/zboot.h b/arch/arm/mach-shmobile/include/mach/zboot.h new file mode 100644 index 000000000000..3ad86b7708e9 --- /dev/null +++ b/arch/arm/mach-shmobile/include/mach/zboot.h @@ -0,0 +1,20 @@ +#ifndef ZBOOT_H +#define ZBOOT_H + +#include <asm/mach-types.h> +#include <mach/zboot_macros.h> + +/************************************************** + * + * board specific settings + * + **************************************************/ + +#ifdef CONFIG_MACH_AP4EVB +#define MACH_TYPE MACH_TYPE_AP4EVB +#include "mach/head-ap4evb.txt" +#else +#error "unsupported board." +#endif + +#endif /* ZBOOT_H */ diff --git a/arch/arm/mach-shmobile/include/mach/zboot_macros.h b/arch/arm/mach-shmobile/include/mach/zboot_macros.h new file mode 100644 index 000000000000..aa6111fbc989 --- /dev/null +++ b/arch/arm/mach-shmobile/include/mach/zboot_macros.h @@ -0,0 +1,65 @@ +#ifndef __ZBOOT_MACRO_H +#define __ZBOOT_MACRO_H + +/* The LIST command is used to include comments in the script */ +.macro LIST comment +.endm + +/* The ED command is used to write a 32-bit word */ +.macro ED, addr, data + LDR r0, 1f + LDR r1, 2f + STR r1, [r0] + B 3f +1 : .long \addr +2 : .long \data +3 : +.endm + +/* The EW command is used to write a 16-bit word */ +.macro EW, addr, data + LDR r0, 1f + LDR r1, 2f + STRH r1, [r0] + B 3f +1 : .long \addr +2 : .long \data +3 : +.endm + +/* The EB command is used to write an 8-bit word */ +.macro EB, addr, data + LDR r0, 1f + LDR r1, 2f + STRB r1, [r0] + B 3f +1 : .long \addr +2 : .long \data +3 : +.endm + +/* The WAIT command is used to delay the execution */ +.macro WAIT, time, reg + LDR r1, 1f + LDR r0, 2f + STR r0, [r1] +10 : + LDR r0, [r1] + CMP r0, #0x00000000 + BNE 10b + NOP + B 3f +1 : .long \reg +2 : .long \time * 100 +3 : +.endm + +/* The DD command is used to read a 32-bit word */ +.macro DD, start, end + LDR r1, 1f + B 2f +1 : .long \start +2 : +.endm + +#endif /* __ZBOOT_MACRO_H */ diff --git a/arch/arm/mach-tcc8k/clock.c b/arch/arm/mach-tcc8k/clock.c index ba32a15127ab..3970a9cdce26 100644 --- a/arch/arm/mach-tcc8k/clock.c +++ b/arch/arm/mach-tcc8k/clock.c @@ -12,8 +12,7 @@ #include <linux/io.h> #include <linux/module.h> #include <linux/spinlock.h> - -#include <asm/clkdev.h> +#include <linux/clkdev.h> #include <mach/clock.h> #include <mach/irqs.h> diff --git a/arch/arm/mach-tegra/clock.c b/arch/arm/mach-tegra/clock.c index ae19f95585be..77948e0f4909 100644 --- a/arch/arm/mach-tegra/clock.c +++ b/arch/arm/mach-tegra/clock.c @@ -25,7 +25,7 @@ #include <linux/slab.h> #include <linux/seq_file.h> #include <linux/regulator/consumer.h> -#include <asm/clkdev.h> +#include <linux/clkdev.h> #include "clock.h" #include "board.h" diff --git a/arch/arm/mach-tegra/clock.h b/arch/arm/mach-tegra/clock.h index 94fd859770f1..083a4cfc6cf0 100644 --- a/arch/arm/mach-tegra/clock.h +++ b/arch/arm/mach-tegra/clock.h @@ -21,7 +21,7 @@ #define __MACH_TEGRA_CLOCK_H #include <linux/list.h> -#include <asm/clkdev.h> +#include <linux/clkdev.h> #define DIV_BUS (1 << 0) #define DIV_U71 (1 << 1) diff --git a/arch/arm/mach-tegra/hotplug.c b/arch/arm/mach-tegra/hotplug.c index 8e7f115aa21e..a5eeb9126645 100644 --- a/arch/arm/mach-tegra/hotplug.c +++ b/arch/arm/mach-tegra/hotplug.c @@ -11,12 +11,9 @@ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/smp.h> -#include <linux/completion.h> #include <asm/cacheflush.h> -static DECLARE_COMPLETION(cpu_killed); - static inline void cpu_enter_lowpower(void) { unsigned int v; @@ -94,7 +91,7 @@ static inline void platform_do_lowpower(unsigned int cpu) int platform_cpu_kill(unsigned int cpu) { - return wait_for_completion_timeout(&cpu_killed, 5000); + return 1; } /* @@ -104,19 +101,6 @@ int platform_cpu_kill(unsigned int cpu) */ void platform_cpu_die(unsigned int cpu) { -#ifdef DEBUG - unsigned int this_cpu = hard_smp_processor_id(); - - if (cpu != this_cpu) { - printk(KERN_CRIT "Eek! platform_cpu_die running on %u, should be %u\n", - this_cpu, cpu); - BUG(); - } -#endif - - printk(KERN_NOTICE "CPU%u: shutdown\n", cpu); - complete(&cpu_killed); - /* * we're ready for shutdown now, so do it */ diff --git a/arch/arm/mach-tegra/include/mach/entry-macro.S b/arch/arm/mach-tegra/include/mach/entry-macro.S index 2ba9e5c9d2f6..dd165c53889d 100644 --- a/arch/arm/mach-tegra/include/mach/entry-macro.S +++ b/arch/arm/mach-tegra/include/mach/entry-macro.S @@ -16,8 +16,8 @@ #include <mach/io.h> #if defined(CONFIG_ARM_GIC) - -#include <asm/hardware/gic.h> +#define HAVE_GET_IRQNR_PREAMBLE +#include <asm/hardware/entry-macro-gic.S> /* Uses the GIC interrupt controller built into the cpu */ #define ICTRL_BASE (IO_CPU_VIRT + 0x100) @@ -32,68 +32,6 @@ .macro arch_ret_to_user, tmp1, tmp2 .endm - - /* - * The interrupt numbering scheme is defined in the - * interrupt controller spec. To wit: - * - * Interrupts 0-15 are IPI - * 16-28 are reserved - * 29-31 are local. We allow 30 to be used for the watchdog. - * 32-1020 are global - * 1021-1022 are reserved - * 1023 is "spurious" (no interrupt) - * - * For now, we ignore all local interrupts so only return an interrupt - * if it's between 30 and 1020. The test_for_ipi routine below will - * pick up on IPIs. - * - * A simple read from the controller will tell us the number of the - * highest priority enabled interrupt. We then just need to check - * whether it is in the valid range for an IRQ (30-1020 inclusive). - */ - - .macro get_irqnr_and_base, irqnr, irqstat, base, tmp - - /* bits 12-10 = src CPU, 9-0 = int # */ - ldr \irqstat, [\base, #GIC_CPU_INTACK] - - ldr \tmp, =1021 - - bic \irqnr, \irqstat, #0x1c00 - - cmp \irqnr, #29 - cmpcc \irqnr, \irqnr - cmpne \irqnr, \tmp - cmpcs \irqnr, \irqnr - - .endm - - /* We assume that irqstat (the raw value of the IRQ acknowledge - * register) is preserved from the macro above. - * If there is an IPI, we immediately signal end of interrupt on the - * controller, since this requires the original irqstat value which - * we won't easily be able to recreate later. - */ - - .macro test_for_ipi, irqnr, irqstat, base, tmp - bic \irqnr, \irqstat, #0x1c00 - cmp \irqnr, #16 - strcc \irqstat, [\base, #GIC_CPU_EOI] - cmpcs \irqnr, \irqnr - .endm - - /* As above, this assumes that irqstat and base are preserved.. */ - - .macro test_for_ltirq, irqnr, irqstat, base, tmp - bic \irqnr, \irqstat, #0x1c00 - mov \tmp, #0 - cmp \irqnr, #29 - moveq \tmp, #1 - streq \irqstat, [\base, #GIC_CPU_EOI] - cmp \tmp, #0 - .endm - #else /* legacy interrupt controller for AP16 */ .macro disable_fiq diff --git a/arch/arm/mach-tegra/include/mach/io.h b/arch/arm/mach-tegra/include/mach/io.h index f0981b1ac59e..4cea2230c8dc 100644 --- a/arch/arm/mach-tegra/include/mach/io.h +++ b/arch/arm/mach-tegra/include/mach/io.h @@ -65,8 +65,8 @@ #ifndef __ASSEMBLER__ -#define __arch_ioremap(p, s, t) tegra_ioremap(p, s, t) -#define __arch_iounmap(v) tegra_iounmap(v) +#define __arch_ioremap tegra_ioremap +#define __arch_iounmap tegra_iounmap void __iomem *tegra_ioremap(unsigned long phys, size_t size, unsigned int type); void tegra_iounmap(volatile void __iomem *addr); diff --git a/arch/arm/mach-tegra/include/mach/smp.h b/arch/arm/mach-tegra/include/mach/smp.h index e4a34a35a544..c8221b38ee7c 100644 --- a/arch/arm/mach-tegra/include/mach/smp.h +++ b/arch/arm/mach-tegra/include/mach/smp.h @@ -2,21 +2,13 @@ #define ASMARM_ARCH_SMP_H #include <asm/hardware/gic.h> -#include <asm/smp_mpidr.h> /* * We use IRQ1 as the IPI */ -static inline void smp_cross_call(const struct cpumask *mask) -{ - gic_raise_softirq(mask, 1); -} - -/* - * Do nothing on MPcore. - */ -static inline void smp_cross_call_done(cpumask_t callmap) +static inline void smp_cross_call(const struct cpumask *mask, int ipi) { + gic_raise_softirq(mask, ipi); } #endif diff --git a/arch/arm/mach-tegra/irq.c b/arch/arm/mach-tegra/irq.c index 50a8dfb9a0cf..5407de01abf0 100644 --- a/arch/arm/mach-tegra/irq.c +++ b/arch/arm/mach-tegra/irq.c @@ -94,8 +94,8 @@ void __init tegra_init_irq(void) writel(0, ictlr_to_virt(i) + ICTLR_CPU_IEP_CLASS); } - gic_dist_init(0, IO_ADDRESS(TEGRA_ARM_INT_DIST_BASE), 29); - gic_cpu_init(0, IO_ADDRESS(TEGRA_ARM_PERIF_BASE + 0x100)); + gic_init(0, 29, IO_ADDRESS(TEGRA_ARM_INT_DIST_BASE), + IO_ADDRESS(TEGRA_ARM_PERIF_BASE + 0x100)); gic = get_irq_chip(29); gic_unmask_irq = gic->unmask; diff --git a/arch/arm/mach-tegra/platsmp.c b/arch/arm/mach-tegra/platsmp.c index 1c0fd92cab39..ec1f68924edf 100644 --- a/arch/arm/mach-tegra/platsmp.c +++ b/arch/arm/mach-tegra/platsmp.c @@ -22,7 +22,6 @@ #include <asm/cacheflush.h> #include <mach/hardware.h> #include <asm/mach-types.h> -#include <asm/localtimer.h> #include <asm/smp_scu.h> #include <mach/iomap.h> @@ -41,14 +40,12 @@ static void __iomem *scu_base = IO_ADDRESS(TEGRA_ARM_PERIF_BASE); void __cpuinit platform_secondary_init(unsigned int cpu) { - trace_hardirqs_off(); - /* * if any interrupts are already enabled for the primary * core (e.g. timer irq), then they will not have been enabled * for us: do so */ - gic_cpu_init(0, IO_ADDRESS(TEGRA_ARM_PERIF_BASE) + 0x100); + gic_secondary_init(0); /* * Synchronise with the boot thread. @@ -117,24 +114,20 @@ void __init smp_init_cpus(void) { unsigned int i, ncores = scu_get_core_count(scu_base); + if (ncores > NR_CPUS) { + printk(KERN_ERR "Tegra: no. of cores (%u) greater than configured (%u), clipping\n", + ncores, NR_CPUS); + ncores = NR_CPUS; + } + for (i = 0; i < ncores; i++) cpu_set(i, cpu_possible_map); } -void __init smp_prepare_cpus(unsigned int max_cpus) +void __init platform_smp_prepare_cpus(unsigned int max_cpus) { - unsigned int ncores = scu_get_core_count(scu_base); - unsigned int cpu = smp_processor_id(); int i; - smp_store_cpu_info(cpu); - - /* - * are we trying to boot more cores than exist? - */ - if (max_cpus > ncores) - max_cpus = ncores; - /* * Initialise the present map, which describes the set of CPUs * actually populated at the present time. @@ -142,15 +135,5 @@ void __init smp_prepare_cpus(unsigned int max_cpus) for (i = 0; i < max_cpus; i++) set_cpu_present(i, true); - /* - * Initialise the SCU if there are more than one CPU and let - * them know where to start. Note that, on modern versions of - * MILO, the "poke" doesn't actually do anything until each - * individual core is sent a soft interrupt to get it out of - * WFI - */ - if (max_cpus > 1) { - percpu_timer_setup(); - scu_enable(scu_base); - } + scu_enable(scu_base); } diff --git a/arch/arm/mach-tegra/tegra2_clocks.c b/arch/arm/mach-tegra/tegra2_clocks.c index ae3b308e22a4..f0dae6d8ba52 100644 --- a/arch/arm/mach-tegra/tegra2_clocks.c +++ b/arch/arm/mach-tegra/tegra2_clocks.c @@ -24,8 +24,7 @@ #include <linux/delay.h> #include <linux/io.h> #include <linux/hrtimer.h> - -#include <asm/clkdev.h> +#include <linux/clkdev.h> #include <mach/iomap.h> diff --git a/arch/arm/mach-u300/clock.c b/arch/arm/mach-u300/clock.c index 7458fc6df5c6..fabcc49abe80 100644 --- a/arch/arm/mach-u300/clock.c +++ b/arch/arm/mach-u300/clock.c @@ -25,8 +25,8 @@ #include <linux/timer.h> #include <linux/io.h> #include <linux/seq_file.h> +#include <linux/clkdev.h> -#include <asm/clkdev.h> #include <mach/hardware.h> #include <mach/syscon.h> diff --git a/arch/arm/mach-ux500/clock.c b/arch/arm/mach-ux500/clock.c index 1675047daf20..531de5c63641 100644 --- a/arch/arm/mach-ux500/clock.c +++ b/arch/arm/mach-ux500/clock.c @@ -13,8 +13,7 @@ #include <linux/err.h> #include <linux/clk.h> #include <linux/io.h> - -#include <asm/clkdev.h> +#include <linux/clkdev.h> #include <plat/mtu.h> #include <mach/hardware.h> diff --git a/arch/arm/mach-ux500/cpu.c b/arch/arm/mach-ux500/cpu.c index 608a1372b172..7328c0179769 100644 --- a/arch/arm/mach-ux500/cpu.c +++ b/arch/arm/mach-ux500/cpu.c @@ -61,8 +61,8 @@ void __init ux500_init_devices(void) void __init ux500_init_irq(void) { - gic_dist_init(0, __io_address(UX500_GIC_DIST_BASE), 29); - gic_cpu_init(0, __io_address(UX500_GIC_CPU_BASE)); + gic_init(0, 29, __io_address(UX500_GIC_DIST_BASE), + __io_address(UX500_GIC_CPU_BASE)); /* * Init clocks here so that they are available for system timer diff --git a/arch/arm/mach-ux500/hotplug.c b/arch/arm/mach-ux500/hotplug.c index b782a03024be..dd8037ebccf8 100644 --- a/arch/arm/mach-ux500/hotplug.c +++ b/arch/arm/mach-ux500/hotplug.c @@ -11,14 +11,11 @@ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/smp.h> -#include <linux/completion.h> #include <asm/cacheflush.h> extern volatile int pen_release; -static DECLARE_COMPLETION(cpu_killed); - static inline void platform_do_lowpower(unsigned int cpu) { flush_cache_all(); @@ -38,7 +35,7 @@ static inline void platform_do_lowpower(unsigned int cpu) int platform_cpu_kill(unsigned int cpu) { - return wait_for_completion_timeout(&cpu_killed, 5000); + return 1; } /* @@ -48,19 +45,6 @@ int platform_cpu_kill(unsigned int cpu) */ void platform_cpu_die(unsigned int cpu) { -#ifdef DEBUG - unsigned int this_cpu = hard_smp_processor_id(); - - if (cpu != this_cpu) { - printk(KERN_CRIT "Eek! platform_cpu_die running on %u, should be %u\n", - this_cpu, cpu); - BUG(); - } -#endif - - printk(KERN_NOTICE "CPU%u: shutdown\n", cpu); - complete(&cpu_killed); - /* directly enter low power state, skipping secure registers */ platform_do_lowpower(cpu); } diff --git a/arch/arm/mach-ux500/include/mach/entry-macro.S b/arch/arm/mach-ux500/include/mach/entry-macro.S index 60ea88db8283..a37f585a3ecb 100644 --- a/arch/arm/mach-ux500/include/mach/entry-macro.S +++ b/arch/arm/mach-ux500/include/mach/entry-macro.S @@ -11,7 +11,8 @@ * warranty of any kind, whether express or implied. */ #include <mach/hardware.h> -#include <asm/hardware/gic.h> +#define HAVE_GET_IRQNR_PREAMBLE +#include <asm/hardware/entry-macro-gic.S> .macro disable_fiq .endm @@ -22,68 +23,3 @@ .macro arch_ret_to_user, tmp1, tmp2 .endm - - /* - * The interrupt numbering scheme is defined in the - * interrupt controller spec. To wit: - * - * Interrupts 0-15 are IPI - * 16-28 are reserved - * 29-31 are local. We allow 30 to be used for the watchdog. - * 32-1020 are global - * 1021-1022 are reserved - * 1023 is "spurious" (no interrupt) - * - * For now, we ignore all local interrupts so only return an - * interrupt if it's between 30 and 1020. The test_for_ipi - * routine below will pick up on IPIs. - * - * A simple read from the controller will tell us the number - * of the highest priority enabled interrupt. We then just - * need to check whether it is in the valid range for an - * IRQ (30-1020 inclusive). - */ - - .macro get_irqnr_and_base, irqnr, irqstat, base, tmp - - /* bits 12-10 = src CPU, 9-0 = int # */ - ldr \irqstat, [\base, #GIC_CPU_INTACK] - - ldr \tmp, =1021 - - bic \irqnr, \irqstat, #0x1c00 - - cmp \irqnr, #29 - cmpcc \irqnr, \irqnr - cmpne \irqnr, \tmp - cmpcs \irqnr, \irqnr - - .endm - - /* We assume that irqstat (the raw value of the IRQ - * acknowledge register) is preserved from the macro above. - * If there is an IPI, we immediately signal end of - * interrupt on the controller, since this requires the - * original irqstat value which we won't easily be able - * to recreate later. - */ - - .macro test_for_ipi, irqnr, irqstat, base, tmp - bic \irqnr, \irqstat, #0x1c00 - cmp \irqnr, #16 - strcc \irqstat, [\base, #GIC_CPU_EOI] - cmpcs \irqnr, \irqnr - .endm - - /* As above, this assumes that irqstat and base - * are preserved.. - */ - - .macro test_for_ltirq, irqnr, irqstat, base, tmp - bic \irqnr, \irqstat, #0x1c00 - mov \tmp, #0 - cmp \irqnr, #29 - moveq \tmp, #1 - streq \irqstat, [\base, #GIC_CPU_EOI] - cmp \tmp, #0 - .endm diff --git a/arch/arm/mach-ux500/include/mach/smp.h b/arch/arm/mach-ux500/include/mach/smp.h index 197e8417375e..ca2b15b1b3b1 100644 --- a/arch/arm/mach-ux500/include/mach/smp.h +++ b/arch/arm/mach-ux500/include/mach/smp.h @@ -10,7 +10,6 @@ #define ASMARM_ARCH_SMP_H #include <asm/hardware/gic.h> -#include <asm/smp_mpidr.h> /* This is required to wakeup the secondary core */ extern void u8500_secondary_startup(void); @@ -18,8 +17,8 @@ extern void u8500_secondary_startup(void); /* * We use IRQ1 as the IPI */ -static inline void smp_cross_call(const struct cpumask *mask) +static inline void smp_cross_call(const struct cpumask *mask, int ipi) { - gic_raise_softirq(mask, 1); + gic_raise_softirq(mask, ipi); } #endif diff --git a/arch/arm/mach-ux500/platsmp.c b/arch/arm/mach-ux500/platsmp.c index 9e4c678de785..7f0a83e592a7 100644 --- a/arch/arm/mach-ux500/platsmp.c +++ b/arch/arm/mach-ux500/platsmp.c @@ -18,7 +18,6 @@ #include <linux/io.h> #include <asm/cacheflush.h> -#include <asm/localtimer.h> #include <asm/smp_scu.h> #include <mach/hardware.h> @@ -28,23 +27,16 @@ */ volatile int __cpuinitdata pen_release = -1; -static unsigned int __init get_core_count(void) -{ - return scu_get_core_count(__io_address(UX500_SCU_BASE)); -} - static DEFINE_SPINLOCK(boot_lock); void __cpuinit platform_secondary_init(unsigned int cpu) { - trace_hardirqs_off(); - /* * if any interrupts are already enabled for the primary * core (e.g. timer irq), then they will not have been enabled * for us: do so */ - gic_cpu_init(0, __io_address(UX500_GIC_CPU_BASE)); + gic_secondary_init(0); /* * let the primary processor know we're out of the @@ -78,7 +70,7 @@ int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle) __cpuc_flush_dcache_area((void *)&pen_release, sizeof(pen_release)); outer_clean_range(__pa(&pen_release), __pa(&pen_release) + 1); - smp_cross_call(cpumask_of(cpu)); + smp_cross_call(cpumask_of(cpu), 1); timeout = jiffies + (1 * HZ); while (time_before(jiffies, timeout)) { @@ -126,40 +118,26 @@ static void __init wakeup_secondary(void) */ void __init smp_init_cpus(void) { - unsigned int i, ncores = get_core_count(); - - for (i = 0; i < ncores; i++) - set_cpu_possible(i, true); -} + unsigned int i, ncores; -void __init smp_prepare_cpus(unsigned int max_cpus) -{ - unsigned int ncores = get_core_count(); - unsigned int cpu = smp_processor_id(); - int i; + ncores = scu_get_core_count(__io_address(UX500_SCU_BASE)); /* sanity check */ - if (ncores == 0) { - printk(KERN_ERR - "U8500: strange CM count of 0? Default to 1\n"); - ncores = 1; - } - - if (ncores > num_possible_cpus()) { + if (ncores > NR_CPUS) { printk(KERN_WARNING "U8500: no. of cores (%d) greater than configured " "maximum of %d - clipping\n", - ncores, num_possible_cpus()); - ncores = num_possible_cpus(); + ncores, NR_CPUS); + ncores = NR_CPUS; } - smp_store_cpu_info(cpu); + for (i = 0; i < ncores; i++) + set_cpu_possible(i, true); +} - /* - * are we trying to boot more cores than exist? - */ - if (max_cpus > ncores) - max_cpus = ncores; +void __init platform_smp_prepare_cpus(unsigned int max_cpus) +{ + int i; /* * Initialise the present map, which describes the set of CPUs @@ -168,13 +146,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus) for (i = 0; i < max_cpus; i++) set_cpu_present(i, true); - if (max_cpus > 1) { - /* - * Enable the local timer or broadcast device for the - * boot CPU, but only if we have more than one CPU. - */ - percpu_timer_setup(); - scu_enable(__io_address(UX500_SCU_BASE)); - wakeup_secondary(); - } + scu_enable(__io_address(UX500_SCU_BASE)); + wakeup_secondary(); } diff --git a/arch/arm/mach-versatile/Kconfig b/arch/arm/mach-versatile/Kconfig index c781f30c8368..3f7b5e9d83c5 100644 --- a/arch/arm/mach-versatile/Kconfig +++ b/arch/arm/mach-versatile/Kconfig @@ -4,6 +4,7 @@ menu "Versatile platform type" config ARCH_VERSATILE_PB bool "Support Versatile/PB platform" select CPU_ARM926T + select MIGHT_HAVE_PCI default y help Include support for the ARM(R) Versatile/PB platform. diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c index e38acb0f89c8..4f89864b3c6d 100644 --- a/arch/arm/mach-versatile/core.c +++ b/arch/arm/mach-versatile/core.c @@ -31,8 +31,8 @@ #include <linux/amba/pl022.h> #include <linux/io.h> #include <linux/gfp.h> +#include <linux/clkdev.h> -#include <asm/clkdev.h> #include <asm/system.h> #include <asm/irq.h> #include <asm/leds.h> @@ -46,10 +46,9 @@ #include <asm/mach/irq.h> #include <asm/mach/time.h> #include <asm/mach/map.h> -#include <mach/clkdev.h> #include <mach/hardware.h> #include <mach/platform.h> -#include <plat/timer-sp.h> +#include <asm/hardware/timer-sp.h> #include "core.h" diff --git a/arch/arm/mach-vexpress/core.h b/arch/arm/mach-vexpress/core.h index 57dd95ce41f9..362780d868de 100644 --- a/arch/arm/mach-vexpress/core.h +++ b/arch/arm/mach-vexpress/core.h @@ -22,5 +22,3 @@ struct map_desc; void v2m_map_io(struct map_desc *tile, size_t num); extern struct sys_timer v2m_timer; - -extern void __iomem *gic_cpu_base_addr; diff --git a/arch/arm/mach-vexpress/ct-ca9x4.c b/arch/arm/mach-vexpress/ct-ca9x4.c index fd25ccd7272f..e628402b754c 100644 --- a/arch/arm/mach-vexpress/ct-ca9x4.c +++ b/arch/arm/mach-vexpress/ct-ca9x4.c @@ -8,8 +8,8 @@ #include <linux/platform_device.h> #include <linux/amba/bus.h> #include <linux/amba/clcd.h> +#include <linux/clkdev.h> -#include <asm/clkdev.h> #include <asm/pgtable.h> #include <asm/hardware/arm_timer.h> #include <asm/hardware/cache-l2x0.h> @@ -18,10 +18,9 @@ #include <asm/pmu.h> #include <asm/smp_twd.h> -#include <mach/clkdev.h> #include <mach/ct-ca9x4.h> -#include <plat/timer-sp.h> +#include <asm/hardware/timer-sp.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> @@ -60,13 +59,10 @@ static void __init ct_ca9x4_map_io(void) v2m_map_io(ct_ca9x4_io_desc, ARRAY_SIZE(ct_ca9x4_io_desc)); } -void __iomem *gic_cpu_base_addr; - static void __init ct_ca9x4_init_irq(void) { - gic_cpu_base_addr = MMIO_P2V(A9_MPCORE_GIC_CPU); - gic_dist_init(0, MMIO_P2V(A9_MPCORE_GIC_DIST), 29); - gic_cpu_init(0, gic_cpu_base_addr); + gic_init(0, 29, MMIO_P2V(A9_MPCORE_GIC_DIST), + MMIO_P2V(A9_MPCORE_GIC_CPU)); } #if 0 diff --git a/arch/arm/mach-vexpress/include/mach/entry-macro.S b/arch/arm/mach-vexpress/include/mach/entry-macro.S index 20e9fb514f0a..73c11297509e 100644 --- a/arch/arm/mach-vexpress/include/mach/entry-macro.S +++ b/arch/arm/mach-vexpress/include/mach/entry-macro.S @@ -1,67 +1,7 @@ -#include <asm/hardware/gic.h> +#include <asm/hardware/entry-macro-gic.S> .macro disable_fiq .endm - .macro get_irqnr_preamble, base, tmp - ldr \base, =gic_cpu_base_addr - ldr \base, [\base] - .endm - .macro arch_ret_to_user, tmp1, tmp2 .endm - - /* - * The interrupt numbering scheme is defined in the - * interrupt controller spec. To wit: - * - * Interrupts 0-15 are IPI - * 16-28 are reserved - * 29-31 are local. We allow 30 to be used for the watchdog. - * 32-1020 are global - * 1021-1022 are reserved - * 1023 is "spurious" (no interrupt) - * - * For now, we ignore all local interrupts so only return an interrupt if it's - * between 30 and 1020. The test_for_ipi routine below will pick up on IPIs. - * - * A simple read from the controller will tell us the number of the highest - * priority enabled interrupt. We then just need to check whether it is in the - * valid range for an IRQ (30-1020 inclusive). - */ - - .macro get_irqnr_and_base, irqnr, irqstat, base, tmp - ldr \irqstat, [\base, #GIC_CPU_INTACK] /* bits 12-10 = src CPU, 9-0 = int # */ - ldr \tmp, =1021 - bic \irqnr, \irqstat, #0x1c00 - cmp \irqnr, #29 - cmpcc \irqnr, \irqnr - cmpne \irqnr, \tmp - cmpcs \irqnr, \irqnr - .endm - - /* We assume that irqstat (the raw value of the IRQ acknowledge - * register) is preserved from the macro above. - * If there is an IPI, we immediately signal end of interrupt on the - * controller, since this requires the original irqstat value which - * we won't easily be able to recreate later. - */ - - .macro test_for_ipi, irqnr, irqstat, base, tmp - bic \irqnr, \irqstat, #0x1c00 - cmp \irqnr, #16 - strcc \irqstat, [\base, #GIC_CPU_EOI] - cmpcs \irqnr, \irqnr - .endm - - /* As above, this assumes that irqstat and base are preserved.. */ - - .macro test_for_ltirq, irqnr, irqstat, base, tmp - bic \irqnr, \irqstat, #0x1c00 - mov \tmp, #0 - cmp \irqnr, #29 - moveq \tmp, #1 - streq \irqstat, [\base, #GIC_CPU_EOI] - cmp \tmp, #0 - .endm - diff --git a/arch/arm/mach-vexpress/include/mach/smp.h b/arch/arm/mach-vexpress/include/mach/smp.h index 5a6da4fd247e..4c05e4a9713a 100644 --- a/arch/arm/mach-vexpress/include/mach/smp.h +++ b/arch/arm/mach-vexpress/include/mach/smp.h @@ -2,13 +2,12 @@ #define __MACH_SMP_H #include <asm/hardware/gic.h> -#include <asm/smp_mpidr.h> /* * We use IRQ1 as the IPI */ -static inline void smp_cross_call(const struct cpumask *mask) +static inline void smp_cross_call(const struct cpumask *mask, int ipi) { - gic_raise_softirq(mask, 1); + gic_raise_softirq(mask, ipi); } #endif diff --git a/arch/arm/mach-vexpress/platsmp.c b/arch/arm/mach-vexpress/platsmp.c index 670970699ba9..24f14936eb6f 100644 --- a/arch/arm/mach-vexpress/platsmp.c +++ b/arch/arm/mach-vexpress/platsmp.c @@ -17,7 +17,6 @@ #include <linux/io.h> #include <asm/cacheflush.h> -#include <asm/localtimer.h> #include <asm/smp_scu.h> #include <asm/unified.h> @@ -44,14 +43,12 @@ static DEFINE_SPINLOCK(boot_lock); void __cpuinit platform_secondary_init(unsigned int cpu) { - trace_hardirqs_off(); - /* * if any interrupts are already enabled for the primary * core (e.g. timer irq), then they will not have been enabled * for us: do so */ - gic_cpu_init(0, gic_cpu_base_addr); + gic_secondary_init(0); /* * let the primary processor know we're out of the @@ -92,7 +89,7 @@ int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle) * the boot monitor to read the system wide flags register, * and branch to the address found there. */ - smp_cross_call(cpumask_of(cpu)); + smp_cross_call(cpumask_of(cpu), 1); timeout = jiffies + (1 * HZ); while (time_before(jiffies, timeout)) { @@ -124,13 +121,6 @@ void __init smp_init_cpus(void) ncores = scu_base ? scu_get_core_count(scu_base) : 1; /* sanity check */ - if (ncores == 0) { - printk(KERN_ERR - "vexpress: strange CM count of 0? Default to 1\n"); - - ncores = 1; - } - if (ncores > NR_CPUS) { printk(KERN_WARNING "vexpress: no. of cores (%d) greater than configured " @@ -143,20 +133,10 @@ void __init smp_init_cpus(void) set_cpu_possible(i, true); } -void __init smp_prepare_cpus(unsigned int max_cpus) +void __init platform_smp_prepare_cpus(unsigned int max_cpus) { - unsigned int ncores = num_possible_cpus(); - unsigned int cpu = smp_processor_id(); int i; - smp_store_cpu_info(cpu); - - /* - * are we trying to boot more cores than exist? - */ - if (max_cpus > ncores) - max_cpus = ncores; - /* * Initialise the present map, which describes the set of CPUs * actually populated at the present time. @@ -164,27 +144,15 @@ void __init smp_prepare_cpus(unsigned int max_cpus) for (i = 0; i < max_cpus; i++) set_cpu_present(i, true); + scu_enable(scu_base_addr()); + /* - * Initialise the SCU if there are more than one CPU and let - * them know where to start. + * Write the address of secondary startup into the + * system-wide flags register. The boot monitor waits + * until it receives a soft interrupt, and then the + * secondary CPU branches to this address. */ - if (max_cpus > 1) { - /* - * Enable the local timer or broadcast device for the - * boot CPU, but only if we have more than one CPU. - */ - percpu_timer_setup(); - - scu_enable(scu_base_addr()); - - /* - * Write the address of secondary startup into the - * system-wide flags register. The boot monitor waits - * until it receives a soft interrupt, and then the - * secondary CPU branches to this address. - */ - writel(~0, MMIO_P2V(V2M_SYS_FLAGSCLR)); - writel(BSYM(virt_to_phys(vexpress_secondary_startup)), - MMIO_P2V(V2M_SYS_FLAGSSET)); - } + writel(~0, MMIO_P2V(V2M_SYS_FLAGSCLR)); + writel(BSYM(virt_to_phys(vexpress_secondary_startup)), + MMIO_P2V(V2M_SYS_FLAGSSET)); } diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c index 7eaa232180a5..314cedc0e09d 100644 --- a/arch/arm/mach-vexpress/v2m.c +++ b/arch/arm/mach-vexpress/v2m.c @@ -11,18 +11,17 @@ #include <linux/spinlock.h> #include <linux/sysdev.h> #include <linux/usb/isp1760.h> +#include <linux/clkdev.h> -#include <asm/clkdev.h> #include <asm/sizes.h> #include <asm/mach/flash.h> #include <asm/mach/map.h> #include <asm/mach/time.h> #include <asm/hardware/arm_timer.h> -#include <mach/clkdev.h> #include <mach/motherboard.h> -#include <plat/timer-sp.h> +#include <asm/hardware/timer-sp.h> #include "core.h" diff --git a/arch/arm/mach-w90x900/clock.h b/arch/arm/mach-w90x900/clock.h index c56ddab3d912..b88a1b16b2e9 100644 --- a/arch/arm/mach-w90x900/clock.h +++ b/arch/arm/mach-w90x900/clock.h @@ -10,7 +10,7 @@ * the Free Software Foundation; either version 2 of the License. */ -#include <asm/clkdev.h> +#include <linux/clkdev.h> void nuc900_clk_enable(struct clk *clk, int enable); void nuc900_subclk_enable(struct clk *clk, int enable); diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 4414a01e1e8a..8493ed04797a 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -599,6 +599,14 @@ config CPU_CP15_MPU help Processor has the CP15 register, which has MPU related registers. +config CPU_USE_DOMAINS + bool + depends on MMU + default y if !CPU_32v6K + help + This option enables or disables the use of domain switching + via the set_fs() function. + # # CPU supports 36-bit I/O # @@ -628,6 +636,33 @@ config ARM_THUMBEE Say Y here if you have a CPU with the ThumbEE extension and code to make use of it. Say N for code that can run on CPUs without ThumbEE. +config SWP_EMULATE + bool "Emulate SWP/SWPB instructions" + depends on CPU_V7 + select HAVE_PROC_CPU if PROC_FS + default y if SMP + help + ARMv6 architecture deprecates use of the SWP/SWPB instructions. + ARMv7 multiprocessing extensions introduce the ability to disable + these instructions, triggering an undefined instruction exception + when executed. Say Y here to enable software emulation of these + instructions for userspace (not kernel) using LDREX/STREX. + Also creates /proc/cpu/swp_emulation for statistics. + + In some older versions of glibc [<=2.8] SWP is used during futex + trylock() operations with the assumption that the code will not + be preempted. This invalid assumption may be more likely to fail + with SWP emulation enabled, leading to deadlock of the user + application. + + NOTE: when accessing uncached shared regions, LDREX/STREX rely + on an external transaction monitoring block called a global + monitor to maintain update atomicity. If your system does not + implement a global monitor, this option can cause programs that + perform SWP operations to uncached memory to deadlock. + + If unsure, say Y. + config CPU_BIG_ENDIAN bool "Build big-endian kernel" depends on ARCH_SUPPORTS_BIG_ENDIAN diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index d63b6c413758..00d74a04af3a 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -5,8 +5,8 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ iomap.o -obj-$(CONFIG_MMU) += fault-armv.o flush.o ioremap.o mmap.o \ - pgd.o mmu.o vmregion.o +obj-$(CONFIG_MMU) += fault-armv.o flush.o idmap.o ioremap.o \ + mmap.o pgd.o mmu.o vmregion.o ifneq ($(CONFIG_MMU),y) obj-y += nommu.o diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index ac6a36142fcd..a9bdfcda23f4 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -148,6 +148,7 @@ static int __init consistent_init(void) { int ret = 0; pgd_t *pgd; + pud_t *pud; pmd_t *pmd; pte_t *pte; int i = 0; @@ -155,7 +156,15 @@ static int __init consistent_init(void) do { pgd = pgd_offset(&init_mm, base); - pmd = pmd_alloc(&init_mm, pgd, base); + + pud = pud_alloc(&init_mm, pgd, base); + if (!pud) { + printk(KERN_ERR "%s: no pud tables\n", __func__); + ret = -ENOMEM; + break; + } + + pmd = pmd_alloc(&init_mm, pud, base); if (!pmd) { printk(KERN_ERR "%s: no pmd tables\n", __func__); ret = -ENOMEM; diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c index 83e59f870426..01210dba0221 100644 --- a/arch/arm/mm/fault-armv.c +++ b/arch/arm/mm/fault-armv.c @@ -26,7 +26,7 @@ #include "mm.h" -static unsigned long shared_pte_mask = L_PTE_MT_BUFFERABLE; +static pteval_t shared_pte_mask = L_PTE_MT_BUFFERABLE; #if __LINUX_ARM_ARCH__ < 6 /* diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 1e21e125fe3a..00e9bc76982d 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -79,6 +79,7 @@ void show_pte(struct mm_struct *mm, unsigned long addr) printk(KERN_ALERT "[%08lx] *pgd=%08lx", addr, pgd_val(*pgd)); do { + pud_t *pud; pmd_t *pmd; pte_t *pte; @@ -90,7 +91,19 @@ void show_pte(struct mm_struct *mm, unsigned long addr) break; } - pmd = pmd_offset(pgd, addr); + pud = pud_offset(pgd, addr); + if (PTRS_PER_PUD != 1) + printk(", *pud=%08lx", pud_val(*pud)); + + if (pud_none(*pud)) + break; + + if (pud_bad(*pud)) { + printk("(bad)"); + break; + } + + pmd = pmd_offset(pud, addr); if (PTRS_PER_PMD != 1) printk(", *pmd=%08lx", pmd_val(*pmd)); @@ -108,7 +121,7 @@ void show_pte(struct mm_struct *mm, unsigned long addr) pte = pte_offset_map(pmd, addr); printk(", *pte=%08lx", pte_val(*pte)); - printk(", *ppte=%08lx", pte_val(pte[-PTRS_PER_PTE])); + printk(", *ppte=%08lx", pte_val(pte[PTE_HWTABLE_PTRS])); pte_unmap(pte); } while(0); @@ -388,6 +401,7 @@ do_translation_fault(unsigned long addr, unsigned int fsr, { unsigned int index; pgd_t *pgd, *pgd_k; + pud_t *pud, *pud_k; pmd_t *pmd, *pmd_k; if (addr < TASK_SIZE) @@ -406,12 +420,19 @@ do_translation_fault(unsigned long addr, unsigned int fsr, if (pgd_none(*pgd_k)) goto bad_area; - if (!pgd_present(*pgd)) set_pgd(pgd, *pgd_k); - pmd_k = pmd_offset(pgd_k, addr); - pmd = pmd_offset(pgd, addr); + pud = pud_offset(pgd, addr); + pud_k = pud_offset(pgd_k, addr); + + if (pud_none(*pud_k)) + goto bad_area; + if (!pud_present(*pud)) + set_pud(pud, *pud_k); + + pmd = pmd_offset(pud, addr); + pmd_k = pmd_offset(pud_k, addr); /* * On ARM one Linux PGD entry contains two hardware entries (see page diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c new file mode 100644 index 000000000000..2be9139a4ef3 --- /dev/null +++ b/arch/arm/mm/idmap.c @@ -0,0 +1,90 @@ +#include <linux/kernel.h> + +#include <asm/cputype.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> + +static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end, + unsigned long prot) +{ + pmd_t *pmd = pmd_offset(pud, addr); + + addr = (addr & PMD_MASK) | prot; + pmd[0] = __pmd(addr); + addr += SECTION_SIZE; + pmd[1] = __pmd(addr); + flush_pmd_entry(pmd); +} + +static void idmap_add_pud(pgd_t *pgd, unsigned long addr, unsigned long end, + unsigned long prot) +{ + pud_t *pud = pud_offset(pgd, addr); + unsigned long next; + + do { + next = pud_addr_end(addr, end); + idmap_add_pmd(pud, addr, next, prot); + } while (pud++, addr = next, addr != end); +} + +void identity_mapping_add(pgd_t *pgd, unsigned long addr, unsigned long end) +{ + unsigned long prot, next; + + prot = PMD_TYPE_SECT | PMD_SECT_AP_WRITE; + if (cpu_architecture() <= CPU_ARCH_ARMv5TEJ && !cpu_is_xscale()) + prot |= PMD_BIT4; + + pgd += pgd_index(addr); + do { + next = pgd_addr_end(addr, end); + idmap_add_pud(pgd, addr, next, prot); + } while (pgd++, addr = next, addr != end); +} + +#ifdef CONFIG_SMP +static void idmap_del_pmd(pud_t *pud, unsigned long addr, unsigned long end) +{ + pmd_t *pmd = pmd_offset(pud, addr); + pmd_clear(pmd); +} + +static void idmap_del_pud(pgd_t *pgd, unsigned long addr, unsigned long end) +{ + pud_t *pud = pud_offset(pgd, addr); + unsigned long next; + + do { + next = pud_addr_end(addr, end); + idmap_del_pmd(pud, addr, next); + } while (pud++, addr = next, addr != end); +} + +void identity_mapping_del(pgd_t *pgd, unsigned long addr, unsigned long end) +{ + unsigned long next; + + pgd += pgd_index(addr); + do { + next = pgd_addr_end(addr, end); + idmap_del_pud(pgd, addr, next); + } while (pgd++, addr = next, addr != end); +} +#endif + +/* + * In order to soft-boot, we need to insert a 1:1 mapping in place of + * the user-mode pages. This will then ensure that we have predictable + * results when turning the mmu off + */ +void setup_mm_for_reboot(char mode) +{ + /* + * We need to access to user-mode page tables here. For kernel threads + * we don't have any user-mode mappings so we use the context that we + * "borrowed". + */ + identity_mapping_add(current->active_mm->pgd, 0, TASK_SIZE); + local_flush_tlb_all(); +} diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h index 6630620380a4..d2384106af9c 100644 --- a/arch/arm/mm/mm.h +++ b/arch/arm/mm/mm.h @@ -7,7 +7,7 @@ extern pmd_t *top_pmd; static inline pmd_t *pmd_off(pgd_t *pgd, unsigned long virt) { - return pmd_offset(pgd, virt); + return pmd_offset(pud_offset(pgd, virt), virt); } static inline pmd_t *pmd_off_k(unsigned long virt) @@ -16,7 +16,7 @@ static inline pmd_t *pmd_off_k(unsigned long virt) } struct mem_type { - unsigned int prot_pte; + pteval_t prot_pte; unsigned int prot_l1; unsigned int prot_sect; unsigned int domain; diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 72ad3e1f56cf..e94888bc5cd3 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -24,6 +24,7 @@ #include <asm/smp_plat.h> #include <asm/tlb.h> #include <asm/highmem.h> +#include <asm/traps.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> @@ -62,7 +63,7 @@ struct cachepolicy { const char policy[16]; unsigned int cr_mask; unsigned int pmd; - unsigned int pte; + pteval_t pte; }; static struct cachepolicy cache_policies[] __initdata = { @@ -190,7 +191,7 @@ void adjust_cr(unsigned long mask, unsigned long set) } #endif -#define PROT_PTE_DEVICE L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_WRITE +#define PROT_PTE_DEVICE L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_XN #define PROT_SECT_DEVICE PMD_TYPE_SECT|PMD_SECT_AP_WRITE static struct mem_type mem_types[] = { @@ -235,19 +236,18 @@ static struct mem_type mem_types[] = { }, [MT_LOW_VECTORS] = { .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | - L_PTE_EXEC, + L_PTE_RDONLY, .prot_l1 = PMD_TYPE_TABLE, .domain = DOMAIN_USER, }, [MT_HIGH_VECTORS] = { .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | - L_PTE_USER | L_PTE_EXEC, + L_PTE_USER | L_PTE_RDONLY, .prot_l1 = PMD_TYPE_TABLE, .domain = DOMAIN_USER, }, [MT_MEMORY] = { - .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | - L_PTE_WRITE | L_PTE_EXEC, + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY, .prot_l1 = PMD_TYPE_TABLE, .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, .domain = DOMAIN_KERNEL, @@ -258,21 +258,20 @@ static struct mem_type mem_types[] = { }, [MT_MEMORY_NONCACHED] = { .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | - L_PTE_WRITE | L_PTE_EXEC | L_PTE_MT_BUFFERABLE, + L_PTE_MT_BUFFERABLE, .prot_l1 = PMD_TYPE_TABLE, .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, .domain = DOMAIN_KERNEL, }, [MT_MEMORY_DTCM] = { .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | - L_PTE_WRITE, + L_PTE_XN, .prot_l1 = PMD_TYPE_TABLE, .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN, .domain = DOMAIN_KERNEL, }, [MT_MEMORY_ITCM] = { - .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | - L_PTE_WRITE | L_PTE_EXEC, + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY, .prot_l1 = PMD_TYPE_TABLE, .domain = DOMAIN_KERNEL, }, @@ -479,7 +478,7 @@ static void __init build_mem_type_table(void) pgprot_user = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot); pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | - L_PTE_DIRTY | L_PTE_WRITE | kern_pgprot); + L_PTE_DIRTY | kern_pgprot); mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask; mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask; @@ -535,7 +534,7 @@ static pte_t * __init early_pte_alloc(pmd_t *pmd, unsigned long addr, unsigned l { if (pmd_none(*pmd)) { pte_t *pte = early_alloc(2 * PTRS_PER_PTE * sizeof(pte_t)); - __pmd_populate(pmd, __pa(pte) | prot); + __pmd_populate(pmd, __pa(pte), prot); } BUG_ON(pmd_bad(*pmd)); return pte_offset_kernel(pmd, addr); @@ -552,11 +551,11 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, } while (pte++, addr += PAGE_SIZE, addr != end); } -static void __init alloc_init_section(pgd_t *pgd, unsigned long addr, - unsigned long end, unsigned long phys, +static void __init alloc_init_section(pud_t *pud, unsigned long addr, + unsigned long end, phys_addr_t phys, const struct mem_type *type) { - pmd_t *pmd = pmd_offset(pgd, addr); + pmd_t *pmd = pmd_offset(pud, addr); /* * Try a section mapping - end, addr and phys must all be aligned @@ -585,10 +584,24 @@ static void __init alloc_init_section(pgd_t *pgd, unsigned long addr, } } +static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, + unsigned long phys, const struct mem_type *type) +{ + pud_t *pud = pud_offset(pgd, addr); + unsigned long next; + + do { + next = pud_addr_end(addr, end); + alloc_init_section(pud, addr, next, phys, type); + phys += next - addr; + } while (pud++, addr = next, addr != end); +} + static void __init create_36bit_mapping(struct map_desc *md, const struct mem_type *type) { - unsigned long phys, addr, length, end; + unsigned long addr, length, end; + phys_addr_t phys; pgd_t *pgd; addr = md->virtual; @@ -631,7 +644,8 @@ static void __init create_36bit_mapping(struct map_desc *md, pgd = pgd_offset_k(addr); end = addr + length; do { - pmd_t *pmd = pmd_offset(pgd, addr); + pud_t *pud = pud_offset(pgd, addr); + pmd_t *pmd = pmd_offset(pud, addr); int i; for (i = 0; i < 16; i++) @@ -696,7 +710,7 @@ static void __init create_mapping(struct map_desc *md) do { unsigned long next = pgd_addr_end(addr, end); - alloc_init_section(pgd, addr, next, phys, type); + alloc_init_pud(pgd, addr, next, phys, type); phys += next - addr; addr = next; @@ -914,12 +928,11 @@ static void __init devicemaps_init(struct machine_desc *mdesc) { struct map_desc map; unsigned long addr; - void *vectors; /* * Allocate the vector page early. */ - vectors = early_alloc(PAGE_SIZE); + vectors_page = early_alloc(PAGE_SIZE); for (addr = VMALLOC_END; addr; addr += PGDIR_SIZE) pmd_clear(pmd_off_k(addr)); @@ -959,7 +972,7 @@ static void __init devicemaps_init(struct machine_desc *mdesc) * location (0xffff0000). If we aren't using high-vectors, also * create a mapping at the low-vectors virtual address. */ - map.pfn = __phys_to_pfn(virt_to_phys(vectors)); + map.pfn = __phys_to_pfn(virt_to_phys(vectors_page)); map.virtual = 0xffff0000; map.length = PAGE_SIZE; map.type = MT_HIGH_VECTORS; @@ -1044,38 +1057,3 @@ void __init paging_init(struct machine_desc *mdesc) empty_zero_page = virt_to_page(zero_page); __flush_dcache_page(NULL, empty_zero_page); } - -/* - * In order to soft-boot, we need to insert a 1:1 mapping in place of - * the user-mode pages. This will then ensure that we have predictable - * results when turning the mmu off - */ -void setup_mm_for_reboot(char mode) -{ - unsigned long base_pmdval; - pgd_t *pgd; - int i; - - /* - * We need to access to user-mode page tables here. For kernel threads - * we don't have any user-mode mappings so we use the context that we - * "borrowed". - */ - pgd = current->active_mm->pgd; - - base_pmdval = PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | PMD_TYPE_SECT; - if (cpu_architecture() <= CPU_ARCH_ARMv5TEJ && !cpu_is_xscale()) - base_pmdval |= PMD_BIT4; - - for (i = 0; i < FIRST_USER_PGD_NR + USER_PTRS_PER_PGD; i++, pgd++) { - unsigned long pmdval = (i << PGDIR_SHIFT) | base_pmdval; - pmd_t *pmd; - - pmd = pmd_off(pgd, i << PGDIR_SHIFT); - pmd[0] = __pmd(pmdval); - pmd[1] = __pmd(pmdval + (1 << (PGDIR_SHIFT - 1))); - flush_pmd_entry(pmd); - } - - local_flush_tlb_all(); -} diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c index 69bbfc6645a6..f7fafb1741f4 100644 --- a/arch/arm/mm/pgd.c +++ b/arch/arm/mm/pgd.c @@ -17,14 +17,13 @@ #include "mm.h" -#define FIRST_KERNEL_PGD_NR (FIRST_USER_PGD_NR + USER_PTRS_PER_PGD) - /* * need to get a 16k page for level 1 */ -pgd_t *get_pgd_slow(struct mm_struct *mm) +pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *new_pgd, *init_pgd; + pud_t *new_pud, *init_pud; pmd_t *new_pmd, *init_pmd; pte_t *new_pte, *init_pte; @@ -32,14 +31,14 @@ pgd_t *get_pgd_slow(struct mm_struct *mm) if (!new_pgd) goto no_pgd; - memset(new_pgd, 0, FIRST_KERNEL_PGD_NR * sizeof(pgd_t)); + memset(new_pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); /* * Copy over the kernel and IO PGD entries */ init_pgd = pgd_offset_k(0); - memcpy(new_pgd + FIRST_KERNEL_PGD_NR, init_pgd + FIRST_KERNEL_PGD_NR, - (PTRS_PER_PGD - FIRST_KERNEL_PGD_NR) * sizeof(pgd_t)); + memcpy(new_pgd + USER_PTRS_PER_PGD, init_pgd + USER_PTRS_PER_PGD, + (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); clean_dcache_area(new_pgd, PTRS_PER_PGD * sizeof(pgd_t)); @@ -48,7 +47,11 @@ pgd_t *get_pgd_slow(struct mm_struct *mm) * On ARM, first page must always be allocated since it * contains the machine vectors. */ - new_pmd = pmd_alloc(mm, new_pgd, 0); + new_pud = pud_alloc(mm, new_pgd, 0); + if (!new_pud) + goto no_pud; + + new_pmd = pmd_alloc(mm, new_pud, 0); if (!new_pmd) goto no_pmd; @@ -56,7 +59,8 @@ pgd_t *get_pgd_slow(struct mm_struct *mm) if (!new_pte) goto no_pte; - init_pmd = pmd_offset(init_pgd, 0); + init_pud = pud_offset(init_pgd, 0); + init_pmd = pmd_offset(init_pud, 0); init_pte = pte_offset_map(init_pmd, 0); set_pte_ext(new_pte, *init_pte, 0); pte_unmap(init_pte); @@ -68,33 +72,44 @@ pgd_t *get_pgd_slow(struct mm_struct *mm) no_pte: pmd_free(mm, new_pmd); no_pmd: + pud_free(mm, new_pud); +no_pud: free_pages((unsigned long)new_pgd, 2); no_pgd: return NULL; } -void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd) +void pgd_free(struct mm_struct *mm, pgd_t *pgd_base) { + pgd_t *pgd; + pud_t *pud; pmd_t *pmd; pgtable_t pte; - if (!pgd) + if (!pgd_base) return; - /* pgd is always present and good */ - pmd = pmd_off(pgd, 0); - if (pmd_none(*pmd)) - goto free; - if (pmd_bad(*pmd)) { - pmd_ERROR(*pmd); - pmd_clear(pmd); - goto free; - } + pgd = pgd_base + pgd_index(0); + if (pgd_none_or_clear_bad(pgd)) + goto no_pgd; + + pud = pud_offset(pgd, 0); + if (pud_none_or_clear_bad(pud)) + goto no_pud; + + pmd = pmd_offset(pud, 0); + if (pmd_none_or_clear_bad(pmd)) + goto no_pmd; pte = pmd_pgtable(*pmd); pmd_clear(pmd); pte_free(mm, pte); +no_pmd: + pud_clear(pud); pmd_free(mm, pmd); -free: - free_pages((unsigned long) pgd, 2); +no_pud: + pgd_clear(pgd); + pud_free(mm, pud); +no_pgd: + free_pages((unsigned long) pgd_base, 2); } diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index 7d63beaf9745..c28ecf42205c 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -81,7 +81,7 @@ #if L_PTE_SHARED != PTE_EXT_SHARED #error PTE shared bit mismatch #endif -#if (L_PTE_EXEC+L_PTE_USER+L_PTE_WRITE+L_PTE_DIRTY+L_PTE_YOUNG+\ +#if (L_PTE_XN+L_PTE_USER+L_PTE_RDONLY+L_PTE_DIRTY+L_PTE_YOUNG+\ L_PTE_FILE+L_PTE_PRESENT) > L_PTE_SHARED #error Invalid Linux PTE bit settings #endif @@ -99,6 +99,10 @@ * 110x 0 1 0 r/w r/o * 11x0 0 1 0 r/w r/o * 1111 0 1 1 r/w r/w + * + * If !CONFIG_CPU_USE_DOMAINS, the following permissions are changed: + * 110x 1 1 1 r/o r/o + * 11x0 1 1 1 r/o r/o */ .macro armv6_mt_table pfx \pfx\()_mt_table: @@ -121,7 +125,7 @@ .endm .macro armv6_set_pte_ext pfx - str r1, [r0], #-2048 @ linux version + str r1, [r0], #2048 @ linux version bic r3, r1, #0x000003fc bic r3, r3, #PTE_TYPE_MASK @@ -132,17 +136,20 @@ and r2, r1, #L_PTE_MT_MASK ldr r2, [ip, r2] - tst r1, #L_PTE_WRITE - tstne r1, #L_PTE_DIRTY - orreq r3, r3, #PTE_EXT_APX + eor r1, r1, #L_PTE_DIRTY + tst r1, #L_PTE_DIRTY|L_PTE_RDONLY + orrne r3, r3, #PTE_EXT_APX tst r1, #L_PTE_USER orrne r3, r3, #PTE_EXT_AP1 +#ifdef CONFIG_CPU_USE_DOMAINS + @ allow kernel read/write access to read-only user pages tstne r3, #PTE_EXT_APX bicne r3, r3, #PTE_EXT_APX | PTE_EXT_AP0 +#endif - tst r1, #L_PTE_EXEC - orreq r3, r3, #PTE_EXT_XN + tst r1, #L_PTE_XN + orrne r3, r3, #PTE_EXT_XN orr r3, r3, r2 @@ -170,9 +177,9 @@ * 1111 0xff r/w r/w */ .macro armv3_set_pte_ext wc_disable=1 - str r1, [r0], #-2048 @ linux version + str r1, [r0], #2048 @ linux version - eor r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY + eor r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY bic r2, r1, #PTE_SMALL_AP_MASK @ keep C, B bits bic r2, r2, #PTE_TYPE_MASK @@ -181,7 +188,7 @@ tst r3, #L_PTE_USER @ user? orrne r2, r2, #PTE_SMALL_AP_URO_SRW - tst r3, #L_PTE_WRITE | L_PTE_DIRTY @ write and dirty? + tst r3, #L_PTE_RDONLY | L_PTE_DIRTY @ write and dirty? orreq r2, r2, #PTE_SMALL_AP_UNO_SRW tst r3, #L_PTE_PRESENT | L_PTE_YOUNG @ present and young? @@ -193,7 +200,7 @@ bicne r2, r2, #PTE_BUFFERABLE #endif .endif - str r2, [r0] @ hardware version + str r2, [r0] @ hardware version .endm @@ -213,9 +220,9 @@ * 1111 11 r/w r/w */ .macro xscale_set_pte_ext_prologue - str r1, [r0], #-2048 @ linux version + str r1, [r0] @ linux version - eor r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY + eor r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY bic r2, r1, #PTE_SMALL_AP_MASK @ keep C, B bits orr r2, r2, #PTE_TYPE_EXT @ extended page @@ -223,7 +230,7 @@ tst r3, #L_PTE_USER @ user? orrne r2, r2, #PTE_EXT_AP_URO_SRW @ yes -> user r/o, system r/w - tst r3, #L_PTE_WRITE | L_PTE_DIRTY @ write and dirty? + tst r3, #L_PTE_RDONLY | L_PTE_DIRTY @ write and dirty? orreq r2, r2, #PTE_EXT_AP_UNO_SRW @ yes -> user n/a, system r/w @ combined with user -> user r/w .endm @@ -232,7 +239,7 @@ tst r3, #L_PTE_PRESENT | L_PTE_YOUNG @ present and young? movne r2, #0 @ no -> fault - str r2, [r0] @ hardware version + str r2, [r0, #2048]! @ hardware version mov ip, #0 mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line mcr p15, 0, ip, c7, c10, 4 @ data write barrier diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 9b9ff5d949fd..b49fab21517c 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -124,15 +124,13 @@ ENDPROC(cpu_v7_switch_mm) * Set a level 2 translation table entry. * * - ptep - pointer to level 2 translation table entry - * (hardware version is stored at -1024 bytes) + * (hardware version is stored at +2048 bytes) * - pte - PTE value to store * - ext - value for extended PTE bits */ ENTRY(cpu_v7_set_pte_ext) #ifdef CONFIG_MMU - ARM( str r1, [r0], #-2048 ) @ linux version - THUMB( str r1, [r0] ) @ linux version - THUMB( sub r0, r0, #2048 ) + str r1, [r0] @ linux version bic r3, r1, #0x000003f0 bic r3, r3, #PTE_TYPE_MASK @@ -142,23 +140,26 @@ ENTRY(cpu_v7_set_pte_ext) tst r1, #1 << 4 orrne r3, r3, #PTE_EXT_TEX(1) - tst r1, #L_PTE_WRITE - tstne r1, #L_PTE_DIRTY - orreq r3, r3, #PTE_EXT_APX + eor r1, r1, #L_PTE_DIRTY + tst r1, #L_PTE_RDONLY | L_PTE_DIRTY + orrne r3, r3, #PTE_EXT_APX tst r1, #L_PTE_USER orrne r3, r3, #PTE_EXT_AP1 +#ifdef CONFIG_CPU_USE_DOMAINS + @ allow kernel read/write access to read-only user pages tstne r3, #PTE_EXT_APX bicne r3, r3, #PTE_EXT_APX | PTE_EXT_AP0 +#endif - tst r1, #L_PTE_EXEC - orreq r3, r3, #PTE_EXT_XN + tst r1, #L_PTE_XN + orrne r3, r3, #PTE_EXT_XN tst r1, #L_PTE_YOUNG tstne r1, #L_PTE_PRESENT moveq r3, #0 - str r3, [r0] + str r3, [r0, #2048]! mcr p15, 0, r0, c7, c10, 1 @ flush_pte #endif mov pc, lr @@ -273,8 +274,6 @@ __v7_setup: ALT_SMP(orr r4, r4, #TTB_FLAGS_SMP) ALT_UP(orr r4, r4, #TTB_FLAGS_UP) mcr p15, 0, r4, c2, c0, 1 @ load TTB1 - mov r10, #0x1f @ domains 0, 1 = manager - mcr p15, 0, r10, c3, c0, 0 @ load domain access register /* * Memory region attributes with SCTLR.TRE=1 * @@ -313,6 +312,10 @@ __v7_setup: #ifdef CONFIG_CPU_ENDIAN_BE8 orr r6, r6, #1 << 25 @ big-endian page tables #endif +#ifdef CONFIG_SWP_EMULATE + orr r5, r5, #(1 << 10) @ set SW bit in "clear" + bic r6, r6, #(1 << 10) @ clear it in "mmuset" +#endif mrc p15, 0, r0, c1, c0, 0 @ read control register bic r0, r0, r5 @ clear bits them orr r0, r0, r6 @ set them diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S index 523408c0bb38..5a37c5e45c41 100644 --- a/arch/arm/mm/proc-xscale.S +++ b/arch/arm/mm/proc-xscale.S @@ -500,8 +500,8 @@ ENTRY(cpu_xscale_set_pte_ext) @ @ Erratum 40: must set memory to write-through for user read-only pages @ - and ip, r1, #(L_PTE_MT_MASK | L_PTE_USER | L_PTE_WRITE) & ~(4 << 2) - teq ip, #L_PTE_MT_WRITEBACK | L_PTE_USER + and ip, r1, #(L_PTE_MT_MASK | L_PTE_USER | L_PTE_RDONLY) & ~(4 << 2) + teq ip, #L_PTE_MT_WRITEBACK | L_PTE_USER | L_PTE_RDONLY moveq r1, #L_PTE_MT_WRITETHROUGH and r1, r1, #L_PTE_MT_MASK diff --git a/arch/arm/plat-omap/Kconfig b/arch/arm/plat-omap/Kconfig index 92c5bb7909f5..c9408434a855 100644 --- a/arch/arm/plat-omap/Kconfig +++ b/arch/arm/plat-omap/Kconfig @@ -11,13 +11,13 @@ choice config ARCH_OMAP1 bool "TI OMAP1" - select COMMON_CLKDEV + select CLKDEV_LOOKUP help "Systems based on omap7xx, omap15xx or omap16xx" config ARCH_OMAP2PLUS bool "TI OMAP2/3/4" - select COMMON_CLKDEV + select CLKDEV_LOOKUP help "Systems based on OMAP2, OMAP3 or OMAP4" diff --git a/arch/arm/plat-omap/include/plat/clkdev_omap.h b/arch/arm/plat-omap/include/plat/clkdev_omap.h index bb937f3fabed..4b2028ab4d2b 100644 --- a/arch/arm/plat-omap/include/plat/clkdev_omap.h +++ b/arch/arm/plat-omap/include/plat/clkdev_omap.h @@ -8,7 +8,7 @@ #ifndef __ARCH_ARM_PLAT_OMAP_INCLUDE_PLAT_CLKDEV_OMAP_H #define __ARCH_ARM_PLAT_OMAP_INCLUDE_PLAT_CLKDEV_OMAP_H -#include <asm/clkdev.h> +#include <linux/clkdev.h> struct omap_clk { u16 cpu; diff --git a/arch/arm/plat-omap/include/plat/io.h b/arch/arm/plat-omap/include/plat/io.h index 128b549c2796..204865f91d93 100644 --- a/arch/arm/plat-omap/include/plat/io.h +++ b/arch/arm/plat-omap/include/plat/io.h @@ -294,8 +294,8 @@ static inline void omap44xx_map_common_io(void) extern void omap2_init_common_hw(struct omap_sdrc_params *sdrc_cs0, struct omap_sdrc_params *sdrc_cs1); -#define __arch_ioremap(p,s,t) omap_ioremap(p,s,t) -#define __arch_iounmap(v) omap_iounmap(v) +#define __arch_ioremap omap_ioremap +#define __arch_iounmap omap_iounmap void __iomem *omap_ioremap(unsigned long phys, size_t size, unsigned int type); void omap_iounmap(volatile void __iomem *addr); diff --git a/arch/arm/plat-omap/include/plat/smp.h b/arch/arm/plat-omap/include/plat/smp.h index ecd6a488c497..7a10257909ef 100644 --- a/arch/arm/plat-omap/include/plat/smp.h +++ b/arch/arm/plat-omap/include/plat/smp.h @@ -18,7 +18,6 @@ #define OMAP_ARCH_SMP_H #include <asm/hardware/gic.h> -#include <asm/smp_mpidr.h> /* Needed for secondary core boot */ extern void omap_secondary_startup(void); @@ -29,9 +28,9 @@ extern u32 omap_read_auxcoreboot0(void); /* * We use Soft IRQ1 as the IPI */ -static inline void smp_cross_call(const struct cpumask *mask) +static inline void smp_cross_call(const struct cpumask *mask, int ipi) { - gic_raise_softirq(mask, 1); + gic_raise_softirq(mask, ipi); } #endif diff --git a/arch/arm/plat-spear/include/plat/clock.h b/arch/arm/plat-spear/include/plat/clock.h index 298bafc0a52f..2572260f990f 100644 --- a/arch/arm/plat-spear/include/plat/clock.h +++ b/arch/arm/plat-spear/include/plat/clock.h @@ -15,7 +15,7 @@ #define __PLAT_CLOCK_H #include <linux/list.h> -#include <asm/clkdev.h> +#include <linux/clkdev.h> #include <linux/types.h> /* clk structure flags */ diff --git a/arch/arm/plat-stmp3xxx/clock.c b/arch/arm/plat-stmp3xxx/clock.c index e593a2a801c6..2e712e17ce72 100644 --- a/arch/arm/plat-stmp3xxx/clock.c +++ b/arch/arm/plat-stmp3xxx/clock.c @@ -25,9 +25,9 @@ #include <linux/err.h> #include <linux/delay.h> #include <linux/io.h> +#include <linux/clkdev.h> #include <asm/mach-types.h> -#include <asm/clkdev.h> #include <mach/platform.h> #include <mach/regs-clkctrl.h> diff --git a/arch/arm/plat-versatile/Makefile b/arch/arm/plat-versatile/Makefile index 5cf88e8427b1..aaa571d17924 100644 --- a/arch/arm/plat-versatile/Makefile +++ b/arch/arm/plat-versatile/Makefile @@ -1,5 +1,4 @@ obj-y := clock.o -obj-$(CONFIG_ARM_TIMER_SP804) += timer-sp.o obj-$(CONFIG_ARCH_REALVIEW) += sched-clock.o obj-$(CONFIG_ARCH_VERSATILE) += sched-clock.o ifeq ($(CONFIG_LEDS_CLASS),y) diff --git a/arch/arm/plat-versatile/sched-clock.c b/arch/arm/plat-versatile/sched-clock.c index 9768cf7e83d7..9696ddc238c9 100644 --- a/arch/arm/plat-versatile/sched-clock.c +++ b/arch/arm/plat-versatile/sched-clock.c @@ -20,6 +20,7 @@ */ #include <linux/cnt32_to_63.h> #include <linux/io.h> +#include <linux/sched.h> #include <asm/div64.h> #include <mach/hardware.h> |