summaryrefslogtreecommitdiff
path: root/arch/sparc/kernel
diff options
context:
space:
mode:
authorSam Ravnborg <sam@ravnborg.org>2008-12-03 03:11:52 -0800
committerDavid S. Miller <davem@davemloft.net>2008-12-04 09:17:21 -0800
commita88b5ba8bd8ac18aad65ee6c6a254e2e74876db3 (patch)
treeeb3d0ffaf53c3f7ec6083752c2097cecd1cb892a /arch/sparc/kernel
parentd670bd4f803c8b646acd20f3ba21e65458293faf (diff)
sparc,sparc64: unify kernel/
o Move all files from sparc64/kernel/ to sparc/kernel - rename as appropriate o Update sparc/Makefile to the changes o Update sparc/kernel/Makefile to include the sparc64 files NOTE: This commit changes link order on sparc64! Link order had to change for either of sparc32 and sparc64. And assuming sparc64 see more testing than sparc32 change link order on sparc64 where issues will be caught faster. Signed-off-by: Sam Ravnborg <sam@ravnborg.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/sparc/kernel')
-rw-r--r--arch/sparc/kernel/Makefile39
-rw-r--r--arch/sparc/kernel/audit.c83
-rw-r--r--arch/sparc/kernel/auxio_64.c149
-rw-r--r--arch/sparc/kernel/central.c268
-rw-r--r--arch/sparc/kernel/cherrs.S579
-rw-r--r--arch/sparc/kernel/chmc.c863
-rw-r--r--arch/sparc/kernel/compat_audit.c43
-rw-r--r--arch/sparc/kernel/cpu_64.c166
-rw-r--r--arch/sparc/kernel/ds.c1244
-rw-r--r--arch/sparc/kernel/dtlb_miss.S39
-rw-r--r--arch/sparc/kernel/dtlb_prot.S54
-rw-r--r--arch/sparc/kernel/ebus.c257
-rw-r--r--arch/sparc/kernel/entry.h195
-rw-r--r--arch/sparc/kernel/etrap_64.S236
-rw-r--r--arch/sparc/kernel/fpu_traps.S384
-rw-r--r--arch/sparc/kernel/ftrace.c76
-rw-r--r--arch/sparc/kernel/getsetcc.S24
-rw-r--r--arch/sparc/kernel/head_64.S900
-rw-r--r--arch/sparc/kernel/helpers.S63
-rw-r--r--arch/sparc/kernel/hvapi.c193
-rw-r--r--arch/sparc/kernel/hvcalls.S800
-rw-r--r--arch/sparc/kernel/hvtramp.S140
-rw-r--r--arch/sparc/kernel/idprom_64.c49
-rw-r--r--arch/sparc/kernel/init_task_64.c35
-rw-r--r--arch/sparc/kernel/iommu.c866
-rw-r--r--arch/sparc/kernel/iommu_common.h59
-rw-r--r--arch/sparc/kernel/irq_64.c1101
-rw-r--r--arch/sparc/kernel/itlb_miss.S39
-rw-r--r--arch/sparc/kernel/ivec.S51
-rw-r--r--arch/sparc/kernel/kgdb_64.c186
-rw-r--r--arch/sparc/kernel/kprobes.c593
-rw-r--r--arch/sparc/kernel/kstack.h60
-rw-r--r--arch/sparc/kernel/ktlb.S304
-rw-r--r--arch/sparc/kernel/ldc.c2378
-rw-r--r--arch/sparc/kernel/mdesc.c916
-rw-r--r--arch/sparc/kernel/misctrap.S97
-rw-r--r--arch/sparc/kernel/module_64.c213
-rw-r--r--arch/sparc/kernel/of_device_64.c898
-rw-r--r--arch/sparc/kernel/pci.c1095
-rw-r--r--arch/sparc/kernel/pci_common.c545
-rw-r--r--arch/sparc/kernel/pci_fire.c521
-rw-r--r--arch/sparc/kernel/pci_impl.h185
-rw-r--r--arch/sparc/kernel/pci_msi.c447
-rw-r--r--arch/sparc/kernel/pci_psycho.c618
-rw-r--r--arch/sparc/kernel/pci_sabre.c609
-rw-r--r--arch/sparc/kernel/pci_schizo.c1504
-rw-r--r--arch/sparc/kernel/pci_sun4v.c1033
-rw-r--r--arch/sparc/kernel/pci_sun4v.h92
-rw-r--r--arch/sparc/kernel/pci_sun4v_asm.S362
-rw-r--r--arch/sparc/kernel/power.c75
-rw-r--r--arch/sparc/kernel/process_64.c812
-rw-r--r--arch/sparc/kernel/prom_64.c1684
-rw-r--r--arch/sparc/kernel/psycho_common.c470
-rw-r--r--arch/sparc/kernel/psycho_common.h48
-rw-r--r--arch/sparc/kernel/ptrace_64.c1090
-rw-r--r--arch/sparc/kernel/reboot.c53
-rw-r--r--arch/sparc/kernel/rtrap_64.S450
-rw-r--r--arch/sparc/kernel/sbus.c674
-rw-r--r--arch/sparc/kernel/setup_64.c428
-rw-r--r--arch/sparc/kernel/signal32.c899
-rw-r--r--arch/sparc/kernel/signal_64.c617
-rw-r--r--arch/sparc/kernel/smp_64.c1412
-rw-r--r--arch/sparc/kernel/sparc_ksyms_64.c289
-rw-r--r--arch/sparc/kernel/spiterrs.S245
-rw-r--r--arch/sparc/kernel/sstate.c127
-rw-r--r--arch/sparc/kernel/stacktrace.c64
-rw-r--r--arch/sparc/kernel/starfire.c116
-rw-r--r--arch/sparc/kernel/sun4v_ivec.S341
-rw-r--r--arch/sparc/kernel/sun4v_tlb_miss.S428
-rw-r--r--arch/sparc/kernel/sys32.S367
-rw-r--r--arch/sparc/kernel/sys_sparc32.c682
-rw-r--r--arch/sparc/kernel/sys_sparc_64.c914
-rw-r--r--arch/sparc/kernel/syscalls.S279
-rw-r--r--arch/sparc/kernel/sysfs.c313
-rw-r--r--arch/sparc/kernel/systbls.h51
-rw-r--r--arch/sparc/kernel/systbls_64.S159
-rw-r--r--arch/sparc/kernel/time_64.c862
-rw-r--r--arch/sparc/kernel/trampoline_64.S417
-rw-r--r--arch/sparc/kernel/traps_64.c2600
-rw-r--r--arch/sparc/kernel/tsb.S552
-rw-r--r--arch/sparc/kernel/ttable.S266
-rw-r--r--arch/sparc/kernel/una_asm_64.S146
-rw-r--r--arch/sparc/kernel/unaligned_64.c690
-rw-r--r--arch/sparc/kernel/us2e_cpufreq.c413
-rw-r--r--arch/sparc/kernel/us3_cpufreq.c274
-rw-r--r--arch/sparc/kernel/utrap.S29
-rw-r--r--arch/sparc/kernel/vio.c451
-rw-r--r--arch/sparc/kernel/viohs.c822
-rw-r--r--arch/sparc/kernel/visemul.c890
-rw-r--r--arch/sparc/kernel/winfixup.S156
90 files changed, 43306 insertions, 0 deletions
diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile
index 6558eea5f0bc..46439465c3b2 100644
--- a/arch/sparc/kernel/Makefile
+++ b/arch/sparc/kernel/Makefile
@@ -38,6 +38,19 @@ obj-$(CONFIG_SPARC32) += muldiv.o
obj-y += prom_$(BITS).o
obj-y += of_device_$(BITS).o
+obj-$(CONFIG_SPARC64) += reboot.o
+obj-$(CONFIG_SPARC64) += sysfs.o
+obj-$(CONFIG_SPARC64) += iommu.o
+obj-$(CONFIG_SPARC64) += central.o
+obj-$(CONFIG_SPARC64) += starfire.o
+obj-$(CONFIG_SPARC64) += power.o
+obj-$(CONFIG_SPARC64) += sbus.o
+obj-$(CONFIG_SPARC64) += ebus.o
+obj-$(CONFIG_SPARC64) += visemul.o
+obj-$(CONFIG_SPARC64) += hvapi.o
+obj-$(CONFIG_SPARC64) += sstate.o
+obj-$(CONFIG_SPARC64) += mdesc.o
+
# sparc32 do not use GENERIC_HARDIRQS but uses the generic devres implementation
obj-$(CONFIG_SPARC32) += devres.o
devres-y := ../../../kernel/irq/devres.o
@@ -48,6 +61,7 @@ obj-$(CONFIG_SPARC32_PCI) += pcic.o
obj-$(CONFIG_SMP) += trampoline_$(BITS).o smp_$(BITS).o
obj-$(CONFIG_SPARC32_SMP) += sun4m_smp.o sun4d_smp.o
+obj-$(CONFIG_SPARC64_SMP) += hvtramp.o
obj-y += auxio_$(BITS).o
obj-$(CONFIG_SUN_PM) += apc.o pmc.o
@@ -56,3 +70,28 @@ obj-$(CONFIG_MODULES) += module_$(BITS).o
obj-$(CONFIG_MODULES) += sparc_ksyms_$(BITS).o
obj-$(CONFIG_SPARC_LED) += led.o
obj-$(CONFIG_KGDB) += kgdb_$(BITS).o
+
+
+obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
+CFLAGS_REMOVE_ftrace.o := -pg
+
+obj-$(CONFIG_STACKTRACE) += stacktrace.o
+# sparc64 PCI
+obj-$(CONFIG_SPARC64_PCI) += pci.o pci_common.o psycho_common.o
+obj-$(CONFIG_SPARC64_PCI) += pci_psycho.o pci_sabre.o pci_schizo.o
+obj-$(CONFIG_SPARC64_PCI) += pci_sun4v.o pci_sun4v_asm.o pci_fire.o
+obj-$(CONFIG_PCI_MSI) += pci_msi.o
+
+obj-$(CONFIG_COMPAT) += sys32.o sys_sparc32.o signal32.o
+
+# sparc64 cpufreq
+obj-$(CONFIG_US3_FREQ) += us3_cpufreq.o
+obj-$(CONFIG_US2E_FREQ) += us2e_cpufreq.o
+obj-$(CONFIG_US3_MC) += chmc.o
+
+obj-$(CONFIG_KPROBES) += kprobes.o
+obj-$(CONFIG_SUN_LDOMS) += ldc.o vio.o viohs.o ds.o
+
+obj-$(CONFIG_AUDIT) += audit.o
+audit--$(CONFIG_AUDIT) := compat_audit.o
+obj-$(CONFIG_COMPAT) += $(audit--y)
diff --git a/arch/sparc/kernel/audit.c b/arch/sparc/kernel/audit.c
new file mode 100644
index 000000000000..8fff0ac63d56
--- /dev/null
+++ b/arch/sparc/kernel/audit.c
@@ -0,0 +1,83 @@
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/audit.h>
+#include <asm/unistd.h>
+
+static unsigned dir_class[] = {
+#include <asm-generic/audit_dir_write.h>
+~0U
+};
+
+static unsigned read_class[] = {
+#include <asm-generic/audit_read.h>
+~0U
+};
+
+static unsigned write_class[] = {
+#include <asm-generic/audit_write.h>
+~0U
+};
+
+static unsigned chattr_class[] = {
+#include <asm-generic/audit_change_attr.h>
+~0U
+};
+
+static unsigned signal_class[] = {
+#include <asm-generic/audit_signal.h>
+~0U
+};
+
+int audit_classify_arch(int arch)
+{
+#ifdef CONFIG_COMPAT
+ if (arch == AUDIT_ARCH_SPARC)
+ return 1;
+#endif
+ return 0;
+}
+
+int audit_classify_syscall(int abi, unsigned syscall)
+{
+#ifdef CONFIG_COMPAT
+ extern int sparc32_classify_syscall(unsigned);
+ if (abi == AUDIT_ARCH_SPARC)
+ return sparc32_classify_syscall(syscall);
+#endif
+ switch(syscall) {
+ case __NR_open:
+ return 2;
+ case __NR_openat:
+ return 3;
+ case __NR_socketcall:
+ return 4;
+ case __NR_execve:
+ return 5;
+ default:
+ return 0;
+ }
+}
+
+static int __init audit_classes_init(void)
+{
+#ifdef CONFIG_COMPAT
+ extern __u32 sparc32_dir_class[];
+ extern __u32 sparc32_write_class[];
+ extern __u32 sparc32_read_class[];
+ extern __u32 sparc32_chattr_class[];
+ extern __u32 sparc32_signal_class[];
+ audit_register_class(AUDIT_CLASS_WRITE_32, sparc32_write_class);
+ audit_register_class(AUDIT_CLASS_READ_32, sparc32_read_class);
+ audit_register_class(AUDIT_CLASS_DIR_WRITE_32, sparc32_dir_class);
+ audit_register_class(AUDIT_CLASS_CHATTR_32, sparc32_chattr_class);
+ audit_register_class(AUDIT_CLASS_SIGNAL_32, sparc32_signal_class);
+#endif
+ audit_register_class(AUDIT_CLASS_WRITE, write_class);
+ audit_register_class(AUDIT_CLASS_READ, read_class);
+ audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class);
+ audit_register_class(AUDIT_CLASS_CHATTR, chattr_class);
+ audit_register_class(AUDIT_CLASS_SIGNAL, signal_class);
+ return 0;
+}
+
+__initcall(audit_classes_init);
diff --git a/arch/sparc/kernel/auxio_64.c b/arch/sparc/kernel/auxio_64.c
new file mode 100644
index 000000000000..8b67347d4221
--- /dev/null
+++ b/arch/sparc/kernel/auxio_64.c
@@ -0,0 +1,149 @@
+/* auxio.c: Probing for the Sparc AUXIO register at boot time.
+ *
+ * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
+ *
+ * Refactoring for unified NCR/PCIO support 2002 Eric Brower (ebrower@usa.net)
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/of_device.h>
+
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/auxio.h>
+
+void __iomem *auxio_register = NULL;
+EXPORT_SYMBOL(auxio_register);
+
+enum auxio_type {
+ AUXIO_TYPE_NODEV,
+ AUXIO_TYPE_SBUS,
+ AUXIO_TYPE_EBUS
+};
+
+static enum auxio_type auxio_devtype = AUXIO_TYPE_NODEV;
+static DEFINE_SPINLOCK(auxio_lock);
+
+static void __auxio_rmw(u8 bits_on, u8 bits_off, int ebus)
+{
+ if (auxio_register) {
+ unsigned long flags;
+ u8 regval, newval;
+
+ spin_lock_irqsave(&auxio_lock, flags);
+
+ regval = (ebus ?
+ (u8) readl(auxio_register) :
+ sbus_readb(auxio_register));
+ newval = regval | bits_on;
+ newval &= ~bits_off;
+ if (!ebus)
+ newval &= ~AUXIO_AUX1_MASK;
+ if (ebus)
+ writel((u32) newval, auxio_register);
+ else
+ sbus_writeb(newval, auxio_register);
+
+ spin_unlock_irqrestore(&auxio_lock, flags);
+ }
+}
+
+static void __auxio_set_bit(u8 bit, int on, int ebus)
+{
+ u8 bits_on = (ebus ? AUXIO_PCIO_LED : AUXIO_AUX1_LED);
+ u8 bits_off = 0;
+
+ if (!on) {
+ u8 tmp = bits_off;
+ bits_off = bits_on;
+ bits_on = tmp;
+ }
+ __auxio_rmw(bits_on, bits_off, ebus);
+}
+
+void auxio_set_led(int on)
+{
+ int ebus = auxio_devtype == AUXIO_TYPE_EBUS;
+ u8 bit;
+
+ bit = (ebus ? AUXIO_PCIO_LED : AUXIO_AUX1_LED);
+ __auxio_set_bit(bit, on, ebus);
+}
+
+static void __auxio_sbus_set_lte(int on)
+{
+ __auxio_set_bit(AUXIO_AUX1_LTE, on, 0);
+}
+
+void auxio_set_lte(int on)
+{
+ switch(auxio_devtype) {
+ case AUXIO_TYPE_SBUS:
+ __auxio_sbus_set_lte(on);
+ break;
+ case AUXIO_TYPE_EBUS:
+ /* FALL-THROUGH */
+ default:
+ break;
+ }
+}
+
+static struct of_device_id __initdata auxio_match[] = {
+ {
+ .name = "auxio",
+ },
+ {},
+};
+
+MODULE_DEVICE_TABLE(of, auxio_match);
+
+static int __devinit auxio_probe(struct of_device *dev, const struct of_device_id *match)
+{
+ struct device_node *dp = dev->node;
+ unsigned long size;
+
+ if (!strcmp(dp->parent->name, "ebus")) {
+ auxio_devtype = AUXIO_TYPE_EBUS;
+ size = sizeof(u32);
+ } else if (!strcmp(dp->parent->name, "sbus")) {
+ auxio_devtype = AUXIO_TYPE_SBUS;
+ size = 1;
+ } else {
+ printk("auxio: Unknown parent bus type [%s]\n",
+ dp->parent->name);
+ return -ENODEV;
+ }
+ auxio_register = of_ioremap(&dev->resource[0], 0, size, "auxio");
+ if (!auxio_register)
+ return -ENODEV;
+
+ printk(KERN_INFO "AUXIO: Found device at %s\n",
+ dp->full_name);
+
+ if (auxio_devtype == AUXIO_TYPE_EBUS)
+ auxio_set_led(AUXIO_LED_ON);
+
+ return 0;
+}
+
+static struct of_platform_driver auxio_driver = {
+ .match_table = auxio_match,
+ .probe = auxio_probe,
+ .driver = {
+ .name = "auxio",
+ },
+};
+
+static int __init auxio_init(void)
+{
+ return of_register_driver(&auxio_driver, &of_platform_bus_type);
+}
+
+/* Must be after subsys_initcall() so that busses are probed. Must
+ * be before device_initcall() because things like the floppy driver
+ * need to use the AUXIO register.
+ */
+fs_initcall(auxio_init);
diff --git a/arch/sparc/kernel/central.c b/arch/sparc/kernel/central.c
new file mode 100644
index 000000000000..05f1c916db06
--- /dev/null
+++ b/arch/sparc/kernel/central.c
@@ -0,0 +1,268 @@
+/* central.c: Central FHC driver for Sunfire/Starfire/Wildfire.
+ *
+ * Copyright (C) 1997, 1999, 2008 David S. Miller (davem@davemloft.net)
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+
+#include <asm/fhc.h>
+#include <asm/upa.h>
+
+struct clock_board {
+ void __iomem *clock_freq_regs;
+ void __iomem *clock_regs;
+ void __iomem *clock_ver_reg;
+ int num_slots;
+ struct resource leds_resource;
+ struct platform_device leds_pdev;
+};
+
+struct fhc {
+ void __iomem *pregs;
+ bool central;
+ bool jtag_master;
+ int board_num;
+ struct resource leds_resource;
+ struct platform_device leds_pdev;
+};
+
+static int __devinit clock_board_calc_nslots(struct clock_board *p)
+{
+ u8 reg = upa_readb(p->clock_regs + CLOCK_STAT1) & 0xc0;
+
+ switch (reg) {
+ case 0x40:
+ return 16;
+
+ case 0xc0:
+ return 8;
+
+ case 0x80:
+ reg = 0;
+ if (p->clock_ver_reg)
+ reg = upa_readb(p->clock_ver_reg);
+ if (reg) {
+ if (reg & 0x80)
+ return 4;
+ else
+ return 5;
+ }
+ /* Fallthrough */
+ default:
+ return 4;
+ }
+}
+
+static int __devinit clock_board_probe(struct of_device *op,
+ const struct of_device_id *match)
+{
+ struct clock_board *p = kzalloc(sizeof(*p), GFP_KERNEL);
+ int err = -ENOMEM;
+
+ if (!p) {
+ printk(KERN_ERR "clock_board: Cannot allocate struct clock_board\n");
+ goto out;
+ }
+
+ p->clock_freq_regs = of_ioremap(&op->resource[0], 0,
+ resource_size(&op->resource[0]),
+ "clock_board_freq");
+ if (!p->clock_freq_regs) {
+ printk(KERN_ERR "clock_board: Cannot map clock_freq_regs\n");
+ goto out_free;
+ }
+
+ p->clock_regs = of_ioremap(&op->resource[1], 0,
+ resource_size(&op->resource[1]),
+ "clock_board_regs");
+ if (!p->clock_regs) {
+ printk(KERN_ERR "clock_board: Cannot map clock_regs\n");
+ goto out_unmap_clock_freq_regs;
+ }
+
+ if (op->resource[2].flags) {
+ p->clock_ver_reg = of_ioremap(&op->resource[2], 0,
+ resource_size(&op->resource[2]),
+ "clock_ver_reg");
+ if (!p->clock_ver_reg) {
+ printk(KERN_ERR "clock_board: Cannot map clock_ver_reg\n");
+ goto out_unmap_clock_regs;
+ }
+ }
+
+ p->num_slots = clock_board_calc_nslots(p);
+
+ p->leds_resource.start = (unsigned long)
+ (p->clock_regs + CLOCK_CTRL);
+ p->leds_resource.end = p->leds_resource.end;
+ p->leds_resource.name = "leds";
+
+ p->leds_pdev.name = "sunfire-clockboard-leds";
+ p->leds_pdev.resource = &p->leds_resource;
+ p->leds_pdev.num_resources = 1;
+ p->leds_pdev.dev.parent = &op->dev;
+
+ err = platform_device_register(&p->leds_pdev);
+ if (err) {
+ printk(KERN_ERR "clock_board: Could not register LEDS "
+ "platform device\n");
+ goto out_unmap_clock_ver_reg;
+ }
+
+ printk(KERN_INFO "clock_board: Detected %d slot Enterprise system.\n",
+ p->num_slots);
+
+ err = 0;
+out:
+ return err;
+
+out_unmap_clock_ver_reg:
+ if (p->clock_ver_reg)
+ of_iounmap(&op->resource[2], p->clock_ver_reg,
+ resource_size(&op->resource[2]));
+
+out_unmap_clock_regs:
+ of_iounmap(&op->resource[1], p->clock_regs,
+ resource_size(&op->resource[1]));
+
+out_unmap_clock_freq_regs:
+ of_iounmap(&op->resource[0], p->clock_freq_regs,
+ resource_size(&op->resource[0]));
+
+out_free:
+ kfree(p);
+ goto out;
+}
+
+static struct of_device_id __initdata clock_board_match[] = {
+ {
+ .name = "clock-board",
+ },
+ {},
+};
+
+static struct of_platform_driver clock_board_driver = {
+ .match_table = clock_board_match,
+ .probe = clock_board_probe,
+ .driver = {
+ .name = "clock_board",
+ },
+};
+
+static int __devinit fhc_probe(struct of_device *op,
+ const struct of_device_id *match)
+{
+ struct fhc *p = kzalloc(sizeof(*p), GFP_KERNEL);
+ int err = -ENOMEM;
+ u32 reg;
+
+ if (!p) {
+ printk(KERN_ERR "fhc: Cannot allocate struct fhc\n");
+ goto out;
+ }
+
+ if (!strcmp(op->node->parent->name, "central"))
+ p->central = true;
+
+ p->pregs = of_ioremap(&op->resource[0], 0,
+ resource_size(&op->resource[0]),
+ "fhc_pregs");
+ if (!p->pregs) {
+ printk(KERN_ERR "fhc: Cannot map pregs\n");
+ goto out_free;
+ }
+
+ if (p->central) {
+ reg = upa_readl(p->pregs + FHC_PREGS_BSR);
+ p->board_num = ((reg >> 16) & 1) | ((reg >> 12) & 0x0e);
+ } else {
+ p->board_num = of_getintprop_default(op->node, "board#", -1);
+ if (p->board_num == -1) {
+ printk(KERN_ERR "fhc: No board# property\n");
+ goto out_unmap_pregs;
+ }
+ if (upa_readl(p->pregs + FHC_PREGS_JCTRL) & FHC_JTAG_CTRL_MENAB)
+ p->jtag_master = true;
+ }
+
+ if (!p->central) {
+ p->leds_resource.start = (unsigned long)
+ (p->pregs + FHC_PREGS_CTRL);
+ p->leds_resource.end = p->leds_resource.end;
+ p->leds_resource.name = "leds";
+
+ p->leds_pdev.name = "sunfire-fhc-leds";
+ p->leds_pdev.resource = &p->leds_resource;
+ p->leds_pdev.num_resources = 1;
+ p->leds_pdev.dev.parent = &op->dev;
+
+ err = platform_device_register(&p->leds_pdev);
+ if (err) {
+ printk(KERN_ERR "fhc: Could not register LEDS "
+ "platform device\n");
+ goto out_unmap_pregs;
+ }
+ }
+ reg = upa_readl(p->pregs + FHC_PREGS_CTRL);
+
+ if (!p->central)
+ reg |= FHC_CONTROL_IXIST;
+
+ reg &= ~(FHC_CONTROL_AOFF |
+ FHC_CONTROL_BOFF |
+ FHC_CONTROL_SLINE);
+
+ upa_writel(reg, p->pregs + FHC_PREGS_CTRL);
+ upa_readl(p->pregs + FHC_PREGS_CTRL);
+
+ reg = upa_readl(p->pregs + FHC_PREGS_ID);
+ printk(KERN_INFO "fhc: Board #%d, Version[%x] PartID[%x] Manuf[%x] %s\n",
+ p->board_num,
+ (reg & FHC_ID_VERS) >> 28,
+ (reg & FHC_ID_PARTID) >> 12,
+ (reg & FHC_ID_MANUF) >> 1,
+ (p->jtag_master ?
+ "(JTAG Master)" :
+ (p->central ? "(Central)" : "")));
+
+ err = 0;
+
+out:
+ return err;
+
+out_unmap_pregs:
+ of_iounmap(&op->resource[0], p->pregs, resource_size(&op->resource[0]));
+
+out_free:
+ kfree(p);
+ goto out;
+}
+
+static struct of_device_id __initdata fhc_match[] = {
+ {
+ .name = "fhc",
+ },
+ {},
+};
+
+static struct of_platform_driver fhc_driver = {
+ .match_table = fhc_match,
+ .probe = fhc_probe,
+ .driver = {
+ .name = "fhc",
+ },
+};
+
+static int __init sunfire_init(void)
+{
+ (void) of_register_driver(&fhc_driver, &of_platform_bus_type);
+ (void) of_register_driver(&clock_board_driver, &of_platform_bus_type);
+ return 0;
+}
+
+subsys_initcall(sunfire_init);
diff --git a/arch/sparc/kernel/cherrs.S b/arch/sparc/kernel/cherrs.S
new file mode 100644
index 000000000000..4ee1ad420862
--- /dev/null
+++ b/arch/sparc/kernel/cherrs.S
@@ -0,0 +1,579 @@
+ /* These get patched into the trap table at boot time
+ * once we know we have a cheetah processor.
+ */
+ .globl cheetah_fecc_trap_vector
+ .type cheetah_fecc_trap_vector,#function
+cheetah_fecc_trap_vector:
+ membar #Sync
+ ldxa [%g0] ASI_DCU_CONTROL_REG, %g1
+ andn %g1, DCU_DC | DCU_IC, %g1
+ stxa %g1, [%g0] ASI_DCU_CONTROL_REG
+ membar #Sync
+ sethi %hi(cheetah_fast_ecc), %g2
+ jmpl %g2 + %lo(cheetah_fast_ecc), %g0
+ mov 0, %g1
+ .size cheetah_fecc_trap_vector,.-cheetah_fecc_trap_vector
+
+ .globl cheetah_fecc_trap_vector_tl1
+ .type cheetah_fecc_trap_vector_tl1,#function
+cheetah_fecc_trap_vector_tl1:
+ membar #Sync
+ ldxa [%g0] ASI_DCU_CONTROL_REG, %g1
+ andn %g1, DCU_DC | DCU_IC, %g1
+ stxa %g1, [%g0] ASI_DCU_CONTROL_REG
+ membar #Sync
+ sethi %hi(cheetah_fast_ecc), %g2
+ jmpl %g2 + %lo(cheetah_fast_ecc), %g0
+ mov 1, %g1
+ .size cheetah_fecc_trap_vector_tl1,.-cheetah_fecc_trap_vector_tl1
+
+ .globl cheetah_cee_trap_vector
+ .type cheetah_cee_trap_vector,#function
+cheetah_cee_trap_vector:
+ membar #Sync
+ ldxa [%g0] ASI_DCU_CONTROL_REG, %g1
+ andn %g1, DCU_IC, %g1
+ stxa %g1, [%g0] ASI_DCU_CONTROL_REG
+ membar #Sync
+ sethi %hi(cheetah_cee), %g2
+ jmpl %g2 + %lo(cheetah_cee), %g0
+ mov 0, %g1
+ .size cheetah_cee_trap_vector,.-cheetah_cee_trap_vector
+
+ .globl cheetah_cee_trap_vector_tl1
+ .type cheetah_cee_trap_vector_tl1,#function
+cheetah_cee_trap_vector_tl1:
+ membar #Sync
+ ldxa [%g0] ASI_DCU_CONTROL_REG, %g1
+ andn %g1, DCU_IC, %g1
+ stxa %g1, [%g0] ASI_DCU_CONTROL_REG
+ membar #Sync
+ sethi %hi(cheetah_cee), %g2
+ jmpl %g2 + %lo(cheetah_cee), %g0
+ mov 1, %g1
+ .size cheetah_cee_trap_vector_tl1,.-cheetah_cee_trap_vector_tl1
+
+ .globl cheetah_deferred_trap_vector
+ .type cheetah_deferred_trap_vector,#function
+cheetah_deferred_trap_vector:
+ membar #Sync
+ ldxa [%g0] ASI_DCU_CONTROL_REG, %g1;
+ andn %g1, DCU_DC | DCU_IC, %g1;
+ stxa %g1, [%g0] ASI_DCU_CONTROL_REG;
+ membar #Sync;
+ sethi %hi(cheetah_deferred_trap), %g2
+ jmpl %g2 + %lo(cheetah_deferred_trap), %g0
+ mov 0, %g1
+ .size cheetah_deferred_trap_vector,.-cheetah_deferred_trap_vector
+
+ .globl cheetah_deferred_trap_vector_tl1
+ .type cheetah_deferred_trap_vector_tl1,#function
+cheetah_deferred_trap_vector_tl1:
+ membar #Sync;
+ ldxa [%g0] ASI_DCU_CONTROL_REG, %g1;
+ andn %g1, DCU_DC | DCU_IC, %g1;
+ stxa %g1, [%g0] ASI_DCU_CONTROL_REG;
+ membar #Sync;
+ sethi %hi(cheetah_deferred_trap), %g2
+ jmpl %g2 + %lo(cheetah_deferred_trap), %g0
+ mov 1, %g1
+ .size cheetah_deferred_trap_vector_tl1,.-cheetah_deferred_trap_vector_tl1
+
+ /* Cheetah+ specific traps. These are for the new I/D cache parity
+ * error traps. The first argument to cheetah_plus_parity_handler
+ * is encoded as follows:
+ *
+ * Bit0: 0=dcache,1=icache
+ * Bit1: 0=recoverable,1=unrecoverable
+ */
+ .globl cheetah_plus_dcpe_trap_vector
+ .type cheetah_plus_dcpe_trap_vector,#function
+cheetah_plus_dcpe_trap_vector:
+ membar #Sync
+ sethi %hi(do_cheetah_plus_data_parity), %g7
+ jmpl %g7 + %lo(do_cheetah_plus_data_parity), %g0
+ nop
+ nop
+ nop
+ nop
+ nop
+ .size cheetah_plus_dcpe_trap_vector,.-cheetah_plus_dcpe_trap_vector
+
+ .type do_cheetah_plus_data_parity,#function
+do_cheetah_plus_data_parity:
+ rdpr %pil, %g2
+ wrpr %g0, PIL_NORMAL_MAX, %pil
+ ba,pt %xcc, etrap_irq
+ rd %pc, %g7
+#ifdef CONFIG_TRACE_IRQFLAGS
+ call trace_hardirqs_off
+ nop
+#endif
+ mov 0x0, %o0
+ call cheetah_plus_parity_error
+ add %sp, PTREGS_OFF, %o1
+ ba,a,pt %xcc, rtrap_irq
+ .size do_cheetah_plus_data_parity,.-do_cheetah_plus_data_parity
+
+ .globl cheetah_plus_dcpe_trap_vector_tl1
+ .type cheetah_plus_dcpe_trap_vector_tl1,#function
+cheetah_plus_dcpe_trap_vector_tl1:
+ membar #Sync
+ wrpr PSTATE_IG | PSTATE_PEF | PSTATE_PRIV, %pstate
+ sethi %hi(do_dcpe_tl1), %g3
+ jmpl %g3 + %lo(do_dcpe_tl1), %g0
+ nop
+ nop
+ nop
+ nop
+ .size cheetah_plus_dcpe_trap_vector_tl1,.-cheetah_plus_dcpe_trap_vector_tl1
+
+ .globl cheetah_plus_icpe_trap_vector
+ .type cheetah_plus_icpe_trap_vector,#function
+cheetah_plus_icpe_trap_vector:
+ membar #Sync
+ sethi %hi(do_cheetah_plus_insn_parity), %g7
+ jmpl %g7 + %lo(do_cheetah_plus_insn_parity), %g0
+ nop
+ nop
+ nop
+ nop
+ nop
+ .size cheetah_plus_icpe_trap_vector,.-cheetah_plus_icpe_trap_vector
+
+ .type do_cheetah_plus_insn_parity,#function
+do_cheetah_plus_insn_parity:
+ rdpr %pil, %g2
+ wrpr %g0, PIL_NORMAL_MAX, %pil
+ ba,pt %xcc, etrap_irq
+ rd %pc, %g7
+#ifdef CONFIG_TRACE_IRQFLAGS
+ call trace_hardirqs_off
+ nop
+#endif
+ mov 0x1, %o0
+ call cheetah_plus_parity_error
+ add %sp, PTREGS_OFF, %o1
+ ba,a,pt %xcc, rtrap_irq
+ .size do_cheetah_plus_insn_parity,.-do_cheetah_plus_insn_parity
+
+ .globl cheetah_plus_icpe_trap_vector_tl1
+ .type cheetah_plus_icpe_trap_vector_tl1,#function
+cheetah_plus_icpe_trap_vector_tl1:
+ membar #Sync
+ wrpr PSTATE_IG | PSTATE_PEF | PSTATE_PRIV, %pstate
+ sethi %hi(do_icpe_tl1), %g3
+ jmpl %g3 + %lo(do_icpe_tl1), %g0
+ nop
+ nop
+ nop
+ nop
+ .size cheetah_plus_icpe_trap_vector_tl1,.-cheetah_plus_icpe_trap_vector_tl1
+
+ /* If we take one of these traps when tl >= 1, then we
+ * jump to interrupt globals. If some trap level above us
+ * was also using interrupt globals, we cannot recover.
+ * We may use all interrupt global registers except %g6.
+ */
+ .globl do_dcpe_tl1
+ .type do_dcpe_tl1,#function
+do_dcpe_tl1:
+ rdpr %tl, %g1 ! Save original trap level
+ mov 1, %g2 ! Setup TSTATE checking loop
+ sethi %hi(TSTATE_IG), %g3 ! TSTATE mask bit
+1: wrpr %g2, %tl ! Set trap level to check
+ rdpr %tstate, %g4 ! Read TSTATE for this level
+ andcc %g4, %g3, %g0 ! Interrupt globals in use?
+ bne,a,pn %xcc, do_dcpe_tl1_fatal ! Yep, irrecoverable
+ wrpr %g1, %tl ! Restore original trap level
+ add %g2, 1, %g2 ! Next trap level
+ cmp %g2, %g1 ! Hit them all yet?
+ ble,pt %icc, 1b ! Not yet
+ nop
+ wrpr %g1, %tl ! Restore original trap level
+do_dcpe_tl1_nonfatal: /* Ok we may use interrupt globals safely. */
+ sethi %hi(dcache_parity_tl1_occurred), %g2
+ lduw [%g2 + %lo(dcache_parity_tl1_occurred)], %g1
+ add %g1, 1, %g1
+ stw %g1, [%g2 + %lo(dcache_parity_tl1_occurred)]
+ /* Reset D-cache parity */
+ sethi %hi(1 << 16), %g1 ! D-cache size
+ mov (1 << 5), %g2 ! D-cache line size
+ sub %g1, %g2, %g1 ! Move down 1 cacheline
+1: srl %g1, 14, %g3 ! Compute UTAG
+ membar #Sync
+ stxa %g3, [%g1] ASI_DCACHE_UTAG
+ membar #Sync
+ sub %g2, 8, %g3 ! 64-bit data word within line
+2: membar #Sync
+ stxa %g0, [%g1 + %g3] ASI_DCACHE_DATA
+ membar #Sync
+ subcc %g3, 8, %g3 ! Next 64-bit data word
+ bge,pt %icc, 2b
+ nop
+ subcc %g1, %g2, %g1 ! Next cacheline
+ bge,pt %icc, 1b
+ nop
+ ba,pt %xcc, dcpe_icpe_tl1_common
+ nop
+
+do_dcpe_tl1_fatal:
+ sethi %hi(1f), %g7
+ ba,pt %xcc, etraptl1
+1: or %g7, %lo(1b), %g7
+ mov 0x2, %o0
+ call cheetah_plus_parity_error
+ add %sp, PTREGS_OFF, %o1
+ ba,pt %xcc, rtrap
+ nop
+ .size do_dcpe_tl1,.-do_dcpe_tl1
+
+ .globl do_icpe_tl1
+ .type do_icpe_tl1,#function
+do_icpe_tl1:
+ rdpr %tl, %g1 ! Save original trap level
+ mov 1, %g2 ! Setup TSTATE checking loop
+ sethi %hi(TSTATE_IG), %g3 ! TSTATE mask bit
+1: wrpr %g2, %tl ! Set trap level to check
+ rdpr %tstate, %g4 ! Read TSTATE for this level
+ andcc %g4, %g3, %g0 ! Interrupt globals in use?
+ bne,a,pn %xcc, do_icpe_tl1_fatal ! Yep, irrecoverable
+ wrpr %g1, %tl ! Restore original trap level
+ add %g2, 1, %g2 ! Next trap level
+ cmp %g2, %g1 ! Hit them all yet?
+ ble,pt %icc, 1b ! Not yet
+ nop
+ wrpr %g1, %tl ! Restore original trap level
+do_icpe_tl1_nonfatal: /* Ok we may use interrupt globals safely. */
+ sethi %hi(icache_parity_tl1_occurred), %g2
+ lduw [%g2 + %lo(icache_parity_tl1_occurred)], %g1
+ add %g1, 1, %g1
+ stw %g1, [%g2 + %lo(icache_parity_tl1_occurred)]
+ /* Flush I-cache */
+ sethi %hi(1 << 15), %g1 ! I-cache size
+ mov (1 << 5), %g2 ! I-cache line size
+ sub %g1, %g2, %g1
+1: or %g1, (2 << 3), %g3
+ stxa %g0, [%g3] ASI_IC_TAG
+ membar #Sync
+ subcc %g1, %g2, %g1
+ bge,pt %icc, 1b
+ nop
+ ba,pt %xcc, dcpe_icpe_tl1_common
+ nop
+
+do_icpe_tl1_fatal:
+ sethi %hi(1f), %g7
+ ba,pt %xcc, etraptl1
+1: or %g7, %lo(1b), %g7
+ mov 0x3, %o0
+ call cheetah_plus_parity_error
+ add %sp, PTREGS_OFF, %o1
+ ba,pt %xcc, rtrap
+ nop
+ .size do_icpe_tl1,.-do_icpe_tl1
+
+ .type dcpe_icpe_tl1_common,#function
+dcpe_icpe_tl1_common:
+ /* Flush D-cache, re-enable D/I caches in DCU and finally
+ * retry the trapping instruction.
+ */
+ sethi %hi(1 << 16), %g1 ! D-cache size
+ mov (1 << 5), %g2 ! D-cache line size
+ sub %g1, %g2, %g1
+1: stxa %g0, [%g1] ASI_DCACHE_TAG
+ membar #Sync
+ subcc %g1, %g2, %g1
+ bge,pt %icc, 1b
+ nop
+ ldxa [%g0] ASI_DCU_CONTROL_REG, %g1
+ or %g1, (DCU_DC | DCU_IC), %g1
+ stxa %g1, [%g0] ASI_DCU_CONTROL_REG
+ membar #Sync
+ retry
+ .size dcpe_icpe_tl1_common,.-dcpe_icpe_tl1_common
+
+ /* Capture I/D/E-cache state into per-cpu error scoreboard.
+ *
+ * %g1: (TL>=0) ? 1 : 0
+ * %g2: scratch
+ * %g3: scratch
+ * %g4: AFSR
+ * %g5: AFAR
+ * %g6: unused, will have current thread ptr after etrap
+ * %g7: scratch
+ */
+ .type __cheetah_log_error,#function
+__cheetah_log_error:
+ /* Put "TL1" software bit into AFSR. */
+ and %g1, 0x1, %g1
+ sllx %g1, 63, %g2
+ or %g4, %g2, %g4
+
+ /* Get log entry pointer for this cpu at this trap level. */
+ BRANCH_IF_JALAPENO(g2,g3,50f)
+ ldxa [%g0] ASI_SAFARI_CONFIG, %g2
+ srlx %g2, 17, %g2
+ ba,pt %xcc, 60f
+ and %g2, 0x3ff, %g2
+
+50: ldxa [%g0] ASI_JBUS_CONFIG, %g2
+ srlx %g2, 17, %g2
+ and %g2, 0x1f, %g2
+
+60: sllx %g2, 9, %g2
+ sethi %hi(cheetah_error_log), %g3
+ ldx [%g3 + %lo(cheetah_error_log)], %g3
+ brz,pn %g3, 80f
+ nop
+
+ add %g3, %g2, %g3
+ sllx %g1, 8, %g1
+ add %g3, %g1, %g1
+
+ /* %g1 holds pointer to the top of the logging scoreboard */
+ ldx [%g1 + 0x0], %g7
+ cmp %g7, -1
+ bne,pn %xcc, 80f
+ nop
+
+ stx %g4, [%g1 + 0x0]
+ stx %g5, [%g1 + 0x8]
+ add %g1, 0x10, %g1
+
+ /* %g1 now points to D-cache logging area */
+ set 0x3ff8, %g2 /* DC_addr mask */
+ and %g5, %g2, %g2 /* DC_addr bits of AFAR */
+ srlx %g5, 12, %g3
+ or %g3, 1, %g3 /* PHYS tag + valid */
+
+10: ldxa [%g2] ASI_DCACHE_TAG, %g7
+ cmp %g3, %g7 /* TAG match? */
+ bne,pt %xcc, 13f
+ nop
+
+ /* Yep, what we want, capture state. */
+ stx %g2, [%g1 + 0x20]
+ stx %g7, [%g1 + 0x28]
+
+ /* A membar Sync is required before and after utag access. */
+ membar #Sync
+ ldxa [%g2] ASI_DCACHE_UTAG, %g7
+ membar #Sync
+ stx %g7, [%g1 + 0x30]
+ ldxa [%g2] ASI_DCACHE_SNOOP_TAG, %g7
+ stx %g7, [%g1 + 0x38]
+ clr %g3
+
+12: ldxa [%g2 + %g3] ASI_DCACHE_DATA, %g7
+ stx %g7, [%g1]
+ add %g3, (1 << 5), %g3
+ cmp %g3, (4 << 5)
+ bl,pt %xcc, 12b
+ add %g1, 0x8, %g1
+
+ ba,pt %xcc, 20f
+ add %g1, 0x20, %g1
+
+13: sethi %hi(1 << 14), %g7
+ add %g2, %g7, %g2
+ srlx %g2, 14, %g7
+ cmp %g7, 4
+ bl,pt %xcc, 10b
+ nop
+
+ add %g1, 0x40, %g1
+
+ /* %g1 now points to I-cache logging area */
+20: set 0x1fe0, %g2 /* IC_addr mask */
+ and %g5, %g2, %g2 /* IC_addr bits of AFAR */
+ sllx %g2, 1, %g2 /* IC_addr[13:6]==VA[12:5] */
+ srlx %g5, (13 - 8), %g3 /* Make PTAG */
+ andn %g3, 0xff, %g3 /* Mask off undefined bits */
+
+21: ldxa [%g2] ASI_IC_TAG, %g7
+ andn %g7, 0xff, %g7
+ cmp %g3, %g7
+ bne,pt %xcc, 23f
+ nop
+
+ /* Yep, what we want, capture state. */
+ stx %g2, [%g1 + 0x40]
+ stx %g7, [%g1 + 0x48]
+ add %g2, (1 << 3), %g2
+ ldxa [%g2] ASI_IC_TAG, %g7
+ add %g2, (1 << 3), %g2
+ stx %g7, [%g1 + 0x50]
+ ldxa [%g2] ASI_IC_TAG, %g7
+ add %g2, (1 << 3), %g2
+ stx %g7, [%g1 + 0x60]
+ ldxa [%g2] ASI_IC_TAG, %g7
+ stx %g7, [%g1 + 0x68]
+ sub %g2, (3 << 3), %g2
+ ldxa [%g2] ASI_IC_STAG, %g7
+ stx %g7, [%g1 + 0x58]
+ clr %g3
+ srlx %g2, 2, %g2
+
+22: ldxa [%g2 + %g3] ASI_IC_INSTR, %g7
+ stx %g7, [%g1]
+ add %g3, (1 << 3), %g3
+ cmp %g3, (8 << 3)
+ bl,pt %xcc, 22b
+ add %g1, 0x8, %g1
+
+ ba,pt %xcc, 30f
+ add %g1, 0x30, %g1
+
+23: sethi %hi(1 << 14), %g7
+ add %g2, %g7, %g2
+ srlx %g2, 14, %g7
+ cmp %g7, 4
+ bl,pt %xcc, 21b
+ nop
+
+ add %g1, 0x70, %g1
+
+ /* %g1 now points to E-cache logging area */
+30: andn %g5, (32 - 1), %g2
+ stx %g2, [%g1 + 0x20]
+ ldxa [%g2] ASI_EC_TAG_DATA, %g7
+ stx %g7, [%g1 + 0x28]
+ ldxa [%g2] ASI_EC_R, %g0
+ clr %g3
+
+31: ldxa [%g3] ASI_EC_DATA, %g7
+ stx %g7, [%g1 + %g3]
+ add %g3, 0x8, %g3
+ cmp %g3, 0x20
+
+ bl,pt %xcc, 31b
+ nop
+80:
+ rdpr %tt, %g2
+ cmp %g2, 0x70
+ be c_fast_ecc
+ cmp %g2, 0x63
+ be c_cee
+ nop
+ ba,pt %xcc, c_deferred
+ .size __cheetah_log_error,.-__cheetah_log_error
+
+ /* Cheetah FECC trap handling, we get here from tl{0,1}_fecc
+ * in the trap table. That code has done a memory barrier
+ * and has disabled both the I-cache and D-cache in the DCU
+ * control register. The I-cache is disabled so that we may
+ * capture the corrupted cache line, and the D-cache is disabled
+ * because corrupt data may have been placed there and we don't
+ * want to reference it.
+ *
+ * %g1 is one if this trap occurred at %tl >= 1.
+ *
+ * Next, we turn off error reporting so that we don't recurse.
+ */
+ .globl cheetah_fast_ecc
+ .type cheetah_fast_ecc,#function
+cheetah_fast_ecc:
+ ldxa [%g0] ASI_ESTATE_ERROR_EN, %g2
+ andn %g2, ESTATE_ERROR_NCEEN | ESTATE_ERROR_CEEN, %g2
+ stxa %g2, [%g0] ASI_ESTATE_ERROR_EN
+ membar #Sync
+
+ /* Fetch and clear AFSR/AFAR */
+ ldxa [%g0] ASI_AFSR, %g4
+ ldxa [%g0] ASI_AFAR, %g5
+ stxa %g4, [%g0] ASI_AFSR
+ membar #Sync
+
+ ba,pt %xcc, __cheetah_log_error
+ nop
+ .size cheetah_fast_ecc,.-cheetah_fast_ecc
+
+ .type c_fast_ecc,#function
+c_fast_ecc:
+ rdpr %pil, %g2
+ wrpr %g0, PIL_NORMAL_MAX, %pil
+ ba,pt %xcc, etrap_irq
+ rd %pc, %g7
+#ifdef CONFIG_TRACE_IRQFLAGS
+ call trace_hardirqs_off
+ nop
+#endif
+ mov %l4, %o1
+ mov %l5, %o2
+ call cheetah_fecc_handler
+ add %sp, PTREGS_OFF, %o0
+ ba,a,pt %xcc, rtrap_irq
+ .size c_fast_ecc,.-c_fast_ecc
+
+ /* Our caller has disabled I-cache and performed membar Sync. */
+ .globl cheetah_cee
+ .type cheetah_cee,#function
+cheetah_cee:
+ ldxa [%g0] ASI_ESTATE_ERROR_EN, %g2
+ andn %g2, ESTATE_ERROR_CEEN, %g2
+ stxa %g2, [%g0] ASI_ESTATE_ERROR_EN
+ membar #Sync
+
+ /* Fetch and clear AFSR/AFAR */
+ ldxa [%g0] ASI_AFSR, %g4
+ ldxa [%g0] ASI_AFAR, %g5
+ stxa %g4, [%g0] ASI_AFSR
+ membar #Sync
+
+ ba,pt %xcc, __cheetah_log_error
+ nop
+ .size cheetah_cee,.-cheetah_cee
+
+ .type c_cee,#function
+c_cee:
+ rdpr %pil, %g2
+ wrpr %g0, PIL_NORMAL_MAX, %pil
+ ba,pt %xcc, etrap_irq
+ rd %pc, %g7
+#ifdef CONFIG_TRACE_IRQFLAGS
+ call trace_hardirqs_off
+ nop
+#endif
+ mov %l4, %o1
+ mov %l5, %o2
+ call cheetah_cee_handler
+ add %sp, PTREGS_OFF, %o0
+ ba,a,pt %xcc, rtrap_irq
+ .size c_cee,.-c_cee
+
+ /* Our caller has disabled I-cache+D-cache and performed membar Sync. */
+ .globl cheetah_deferred_trap
+ .type cheetah_deferred_trap,#function
+cheetah_deferred_trap:
+ ldxa [%g0] ASI_ESTATE_ERROR_EN, %g2
+ andn %g2, ESTATE_ERROR_NCEEN | ESTATE_ERROR_CEEN, %g2
+ stxa %g2, [%g0] ASI_ESTATE_ERROR_EN
+ membar #Sync
+
+ /* Fetch and clear AFSR/AFAR */
+ ldxa [%g0] ASI_AFSR, %g4
+ ldxa [%g0] ASI_AFAR, %g5
+ stxa %g4, [%g0] ASI_AFSR
+ membar #Sync
+
+ ba,pt %xcc, __cheetah_log_error
+ nop
+ .size cheetah_deferred_trap,.-cheetah_deferred_trap
+
+ .type c_deferred,#function
+c_deferred:
+ rdpr %pil, %g2
+ wrpr %g0, PIL_NORMAL_MAX, %pil
+ ba,pt %xcc, etrap_irq
+ rd %pc, %g7
+#ifdef CONFIG_TRACE_IRQFLAGS
+ call trace_hardirqs_off
+ nop
+#endif
+ mov %l4, %o1
+ mov %l5, %o2
+ call cheetah_deferred_handler
+ add %sp, PTREGS_OFF, %o0
+ ba,a,pt %xcc, rtrap_irq
+ .size c_deferred,.-c_deferred
diff --git a/arch/sparc/kernel/chmc.c b/arch/sparc/kernel/chmc.c
new file mode 100644
index 000000000000..3b9f4d6e14a9
--- /dev/null
+++ b/arch/sparc/kernel/chmc.c
@@ -0,0 +1,863 @@
+/* chmc.c: Driver for UltraSPARC-III memory controller.
+ *
+ * Copyright (C) 2001, 2007, 2008 David S. Miller (davem@davemloft.net)
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <asm/spitfire.h>
+#include <asm/chmctrl.h>
+#include <asm/cpudata.h>
+#include <asm/oplib.h>
+#include <asm/prom.h>
+#include <asm/head.h>
+#include <asm/io.h>
+#include <asm/memctrl.h>
+
+#define DRV_MODULE_NAME "chmc"
+#define PFX DRV_MODULE_NAME ": "
+#define DRV_MODULE_VERSION "0.2"
+
+MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
+MODULE_DESCRIPTION("UltraSPARC-III memory controller driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_MODULE_VERSION);
+
+static int mc_type;
+#define MC_TYPE_SAFARI 1
+#define MC_TYPE_JBUS 2
+
+static dimm_printer_t us3mc_dimm_printer;
+
+#define CHMCTRL_NDGRPS 2
+#define CHMCTRL_NDIMMS 4
+
+#define CHMC_DIMMS_PER_MC (CHMCTRL_NDGRPS * CHMCTRL_NDIMMS)
+
+/* OBP memory-layout property format. */
+struct chmc_obp_map {
+ unsigned char dimm_map[144];
+ unsigned char pin_map[576];
+};
+
+#define DIMM_LABEL_SZ 8
+
+struct chmc_obp_mem_layout {
+ /* One max 8-byte string label per DIMM. Usually
+ * this matches the label on the motherboard where
+ * that DIMM resides.
+ */
+ char dimm_labels[CHMC_DIMMS_PER_MC][DIMM_LABEL_SZ];
+
+ /* If symmetric use map[0], else it is
+ * asymmetric and map[1] should be used.
+ */
+ char symmetric;
+
+ struct chmc_obp_map map[2];
+};
+
+#define CHMCTRL_NBANKS 4
+
+struct chmc_bank_info {
+ struct chmc *p;
+ int bank_id;
+
+ u64 raw_reg;
+ int valid;
+ int uk;
+ int um;
+ int lk;
+ int lm;
+ int interleave;
+ unsigned long base;
+ unsigned long size;
+};
+
+struct chmc {
+ struct list_head list;
+ int portid;
+
+ struct chmc_obp_mem_layout layout_prop;
+ int layout_size;
+
+ void __iomem *regs;
+
+ u64 timing_control1;
+ u64 timing_control2;
+ u64 timing_control3;
+ u64 timing_control4;
+ u64 memaddr_control;
+
+ struct chmc_bank_info logical_banks[CHMCTRL_NBANKS];
+};
+
+#define JBUSMC_REGS_SIZE 8
+
+#define JB_MC_REG1_DIMM2_BANK3 0x8000000000000000UL
+#define JB_MC_REG1_DIMM1_BANK1 0x4000000000000000UL
+#define JB_MC_REG1_DIMM2_BANK2 0x2000000000000000UL
+#define JB_MC_REG1_DIMM1_BANK0 0x1000000000000000UL
+#define JB_MC_REG1_XOR 0x0000010000000000UL
+#define JB_MC_REG1_ADDR_GEN_2 0x000000e000000000UL
+#define JB_MC_REG1_ADDR_GEN_2_SHIFT 37
+#define JB_MC_REG1_ADDR_GEN_1 0x0000001c00000000UL
+#define JB_MC_REG1_ADDR_GEN_1_SHIFT 34
+#define JB_MC_REG1_INTERLEAVE 0x0000000001800000UL
+#define JB_MC_REG1_INTERLEAVE_SHIFT 23
+#define JB_MC_REG1_DIMM2_PTYPE 0x0000000000200000UL
+#define JB_MC_REG1_DIMM2_PTYPE_SHIFT 21
+#define JB_MC_REG1_DIMM1_PTYPE 0x0000000000100000UL
+#define JB_MC_REG1_DIMM1_PTYPE_SHIFT 20
+
+#define PART_TYPE_X8 0
+#define PART_TYPE_X4 1
+
+#define INTERLEAVE_NONE 0
+#define INTERLEAVE_SAME 1
+#define INTERLEAVE_INTERNAL 2
+#define INTERLEAVE_BOTH 3
+
+#define ADDR_GEN_128MB 0
+#define ADDR_GEN_256MB 1
+#define ADDR_GEN_512MB 2
+#define ADDR_GEN_1GB 3
+
+#define JB_NUM_DIMM_GROUPS 2
+#define JB_NUM_DIMMS_PER_GROUP 2
+#define JB_NUM_DIMMS (JB_NUM_DIMM_GROUPS * JB_NUM_DIMMS_PER_GROUP)
+
+struct jbusmc_obp_map {
+ unsigned char dimm_map[18];
+ unsigned char pin_map[144];
+};
+
+struct jbusmc_obp_mem_layout {
+ /* One max 8-byte string label per DIMM. Usually
+ * this matches the label on the motherboard where
+ * that DIMM resides.
+ */
+ char dimm_labels[JB_NUM_DIMMS][DIMM_LABEL_SZ];
+
+ /* If symmetric use map[0], else it is
+ * asymmetric and map[1] should be used.
+ */
+ char symmetric;
+
+ struct jbusmc_obp_map map;
+
+ char _pad;
+};
+
+struct jbusmc_dimm_group {
+ struct jbusmc *controller;
+ int index;
+ u64 base_addr;
+ u64 size;
+};
+
+struct jbusmc {
+ void __iomem *regs;
+ u64 mc_reg_1;
+ u32 portid;
+ struct jbusmc_obp_mem_layout layout;
+ int layout_len;
+ int num_dimm_groups;
+ struct jbusmc_dimm_group dimm_groups[JB_NUM_DIMM_GROUPS];
+ struct list_head list;
+};
+
+static DEFINE_SPINLOCK(mctrl_list_lock);
+static LIST_HEAD(mctrl_list);
+
+static void mc_list_add(struct list_head *list)
+{
+ spin_lock(&mctrl_list_lock);
+ list_add(list, &mctrl_list);
+ spin_unlock(&mctrl_list_lock);
+}
+
+static void mc_list_del(struct list_head *list)
+{
+ spin_lock(&mctrl_list_lock);
+ list_del_init(list);
+ spin_unlock(&mctrl_list_lock);
+}
+
+#define SYNDROME_MIN -1
+#define SYNDROME_MAX 144
+
+/* Covert syndrome code into the way the bits are positioned
+ * on the bus.
+ */
+static int syndrome_to_qword_code(int syndrome_code)
+{
+ if (syndrome_code < 128)
+ syndrome_code += 16;
+ else if (syndrome_code < 128 + 9)
+ syndrome_code -= (128 - 7);
+ else if (syndrome_code < (128 + 9 + 3))
+ syndrome_code -= (128 + 9 - 4);
+ else
+ syndrome_code -= (128 + 9 + 3);
+ return syndrome_code;
+}
+
+/* All this magic has to do with how a cache line comes over the wire
+ * on Safari and JBUS. A 64-bit line comes over in 1 or more quadword
+ * cycles, each of which transmit ECC/MTAG info as well as the actual
+ * data.
+ */
+#define L2_LINE_SIZE 64
+#define L2_LINE_ADDR_MSK (L2_LINE_SIZE - 1)
+#define QW_PER_LINE 4
+#define QW_BYTES (L2_LINE_SIZE / QW_PER_LINE)
+#define QW_BITS 144
+#define SAFARI_LAST_BIT (576 - 1)
+#define JBUS_LAST_BIT (144 - 1)
+
+static void get_pin_and_dimm_str(int syndrome_code, unsigned long paddr,
+ int *pin_p, char **dimm_str_p, void *_prop,
+ int base_dimm_offset)
+{
+ int qword_code = syndrome_to_qword_code(syndrome_code);
+ int cache_line_offset;
+ int offset_inverse;
+ int dimm_map_index;
+ int map_val;
+
+ if (mc_type == MC_TYPE_JBUS) {
+ struct jbusmc_obp_mem_layout *p = _prop;
+
+ /* JBUS */
+ cache_line_offset = qword_code;
+ offset_inverse = (JBUS_LAST_BIT - cache_line_offset);
+ dimm_map_index = offset_inverse / 8;
+ map_val = p->map.dimm_map[dimm_map_index];
+ map_val = ((map_val >> ((7 - (offset_inverse & 7)))) & 1);
+ *dimm_str_p = p->dimm_labels[base_dimm_offset + map_val];
+ *pin_p = p->map.pin_map[cache_line_offset];
+ } else {
+ struct chmc_obp_mem_layout *p = _prop;
+ struct chmc_obp_map *mp;
+ int qword;
+
+ /* Safari */
+ if (p->symmetric)
+ mp = &p->map[0];
+ else
+ mp = &p->map[1];
+
+ qword = (paddr & L2_LINE_ADDR_MSK) / QW_BYTES;
+ cache_line_offset = ((3 - qword) * QW_BITS) + qword_code;
+ offset_inverse = (SAFARI_LAST_BIT - cache_line_offset);
+ dimm_map_index = offset_inverse >> 2;
+ map_val = mp->dimm_map[dimm_map_index];
+ map_val = ((map_val >> ((3 - (offset_inverse & 3)) << 1)) & 0x3);
+ *dimm_str_p = p->dimm_labels[base_dimm_offset + map_val];
+ *pin_p = mp->pin_map[cache_line_offset];
+ }
+}
+
+static struct jbusmc_dimm_group *jbusmc_find_dimm_group(unsigned long phys_addr)
+{
+ struct jbusmc *p;
+
+ list_for_each_entry(p, &mctrl_list, list) {
+ int i;
+
+ for (i = 0; i < p->num_dimm_groups; i++) {
+ struct jbusmc_dimm_group *dp = &p->dimm_groups[i];
+
+ if (phys_addr < dp->base_addr ||
+ (dp->base_addr + dp->size) <= phys_addr)
+ continue;
+
+ return dp;
+ }
+ }
+ return NULL;
+}
+
+static int jbusmc_print_dimm(int syndrome_code,
+ unsigned long phys_addr,
+ char *buf, int buflen)
+{
+ struct jbusmc_obp_mem_layout *prop;
+ struct jbusmc_dimm_group *dp;
+ struct jbusmc *p;
+ int first_dimm;
+
+ dp = jbusmc_find_dimm_group(phys_addr);
+ if (dp == NULL ||
+ syndrome_code < SYNDROME_MIN ||
+ syndrome_code > SYNDROME_MAX) {
+ buf[0] = '?';
+ buf[1] = '?';
+ buf[2] = '?';
+ buf[3] = '\0';
+ }
+ p = dp->controller;
+ prop = &p->layout;
+
+ first_dimm = dp->index * JB_NUM_DIMMS_PER_GROUP;
+
+ if (syndrome_code != SYNDROME_MIN) {
+ char *dimm_str;
+ int pin;
+
+ get_pin_and_dimm_str(syndrome_code, phys_addr, &pin,
+ &dimm_str, prop, first_dimm);
+ sprintf(buf, "%s, pin %3d", dimm_str, pin);
+ } else {
+ int dimm;
+
+ /* Multi-bit error, we just dump out all the
+ * dimm labels associated with this dimm group.
+ */
+ for (dimm = 0; dimm < JB_NUM_DIMMS_PER_GROUP; dimm++) {
+ sprintf(buf, "%s ",
+ prop->dimm_labels[first_dimm + dimm]);
+ buf += strlen(buf);
+ }
+ }
+
+ return 0;
+}
+
+static u64 __devinit jbusmc_dimm_group_size(u64 base,
+ const struct linux_prom64_registers *mem_regs,
+ int num_mem_regs)
+{
+ u64 max = base + (8UL * 1024 * 1024 * 1024);
+ u64 max_seen = base;
+ int i;
+
+ for (i = 0; i < num_mem_regs; i++) {
+ const struct linux_prom64_registers *ent;
+ u64 this_base;
+ u64 this_end;
+
+ ent = &mem_regs[i];
+ this_base = ent->phys_addr;
+ this_end = this_base + ent->reg_size;
+ if (base < this_base || base >= this_end)
+ continue;
+ if (this_end > max)
+ this_end = max;
+ if (this_end > max_seen)
+ max_seen = this_end;
+ }
+
+ return max_seen - base;
+}
+
+static void __devinit jbusmc_construct_one_dimm_group(struct jbusmc *p,
+ unsigned long index,
+ const struct linux_prom64_registers *mem_regs,
+ int num_mem_regs)
+{
+ struct jbusmc_dimm_group *dp = &p->dimm_groups[index];
+
+ dp->controller = p;
+ dp->index = index;
+
+ dp->base_addr = (p->portid * (64UL * 1024 * 1024 * 1024));
+ dp->base_addr += (index * (8UL * 1024 * 1024 * 1024));
+ dp->size = jbusmc_dimm_group_size(dp->base_addr, mem_regs, num_mem_regs);
+}
+
+static void __devinit jbusmc_construct_dimm_groups(struct jbusmc *p,
+ const struct linux_prom64_registers *mem_regs,
+ int num_mem_regs)
+{
+ if (p->mc_reg_1 & JB_MC_REG1_DIMM1_BANK0) {
+ jbusmc_construct_one_dimm_group(p, 0, mem_regs, num_mem_regs);
+ p->num_dimm_groups++;
+ }
+ if (p->mc_reg_1 & JB_MC_REG1_DIMM2_BANK2) {
+ jbusmc_construct_one_dimm_group(p, 1, mem_regs, num_mem_regs);
+ p->num_dimm_groups++;
+ }
+}
+
+static int __devinit jbusmc_probe(struct of_device *op,
+ const struct of_device_id *match)
+{
+ const struct linux_prom64_registers *mem_regs;
+ struct device_node *mem_node;
+ int err, len, num_mem_regs;
+ struct jbusmc *p;
+ const u32 *prop;
+ const void *ml;
+
+ err = -ENODEV;
+ mem_node = of_find_node_by_path("/memory");
+ if (!mem_node) {
+ printk(KERN_ERR PFX "Cannot find /memory node.\n");
+ goto out;
+ }
+ mem_regs = of_get_property(mem_node, "reg", &len);
+ if (!mem_regs) {
+ printk(KERN_ERR PFX "Cannot get reg property of /memory node.\n");
+ goto out;
+ }
+ num_mem_regs = len / sizeof(*mem_regs);
+
+ err = -ENOMEM;
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p) {
+ printk(KERN_ERR PFX "Cannot allocate struct jbusmc.\n");
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&p->list);
+
+ err = -ENODEV;
+ prop = of_get_property(op->node, "portid", &len);
+ if (!prop || len != 4) {
+ printk(KERN_ERR PFX "Cannot find portid.\n");
+ goto out_free;
+ }
+
+ p->portid = *prop;
+
+ prop = of_get_property(op->node, "memory-control-register-1", &len);
+ if (!prop || len != 8) {
+ printk(KERN_ERR PFX "Cannot get memory control register 1.\n");
+ goto out_free;
+ }
+
+ p->mc_reg_1 = ((u64)prop[0] << 32) | (u64) prop[1];
+
+ err = -ENOMEM;
+ p->regs = of_ioremap(&op->resource[0], 0, JBUSMC_REGS_SIZE, "jbusmc");
+ if (!p->regs) {
+ printk(KERN_ERR PFX "Cannot map jbusmc regs.\n");
+ goto out_free;
+ }
+
+ err = -ENODEV;
+ ml = of_get_property(op->node, "memory-layout", &p->layout_len);
+ if (!ml) {
+ printk(KERN_ERR PFX "Cannot get memory layout property.\n");
+ goto out_iounmap;
+ }
+ if (p->layout_len > sizeof(p->layout)) {
+ printk(KERN_ERR PFX "Unexpected memory-layout size %d\n",
+ p->layout_len);
+ goto out_iounmap;
+ }
+ memcpy(&p->layout, ml, p->layout_len);
+
+ jbusmc_construct_dimm_groups(p, mem_regs, num_mem_regs);
+
+ mc_list_add(&p->list);
+
+ printk(KERN_INFO PFX "UltraSPARC-IIIi memory controller at %s\n",
+ op->node->full_name);
+
+ dev_set_drvdata(&op->dev, p);
+
+ err = 0;
+
+out:
+ return err;
+
+out_iounmap:
+ of_iounmap(&op->resource[0], p->regs, JBUSMC_REGS_SIZE);
+
+out_free:
+ kfree(p);
+ goto out;
+}
+
+/* Does BANK decode PHYS_ADDR? */
+static int chmc_bank_match(struct chmc_bank_info *bp, unsigned long phys_addr)
+{
+ unsigned long upper_bits = (phys_addr & PA_UPPER_BITS) >> PA_UPPER_BITS_SHIFT;
+ unsigned long lower_bits = (phys_addr & PA_LOWER_BITS) >> PA_LOWER_BITS_SHIFT;
+
+ /* Bank must be enabled to match. */
+ if (bp->valid == 0)
+ return 0;
+
+ /* Would BANK match upper bits? */
+ upper_bits ^= bp->um; /* What bits are different? */
+ upper_bits = ~upper_bits; /* Invert. */
+ upper_bits |= bp->uk; /* What bits don't matter for matching? */
+ upper_bits = ~upper_bits; /* Invert. */
+
+ if (upper_bits)
+ return 0;
+
+ /* Would BANK match lower bits? */
+ lower_bits ^= bp->lm; /* What bits are different? */
+ lower_bits = ~lower_bits; /* Invert. */
+ lower_bits |= bp->lk; /* What bits don't matter for matching? */
+ lower_bits = ~lower_bits; /* Invert. */
+
+ if (lower_bits)
+ return 0;
+
+ /* I always knew you'd be the one. */
+ return 1;
+}
+
+/* Given PHYS_ADDR, search memory controller banks for a match. */
+static struct chmc_bank_info *chmc_find_bank(unsigned long phys_addr)
+{
+ struct chmc *p;
+
+ list_for_each_entry(p, &mctrl_list, list) {
+ int bank_no;
+
+ for (bank_no = 0; bank_no < CHMCTRL_NBANKS; bank_no++) {
+ struct chmc_bank_info *bp;
+
+ bp = &p->logical_banks[bank_no];
+ if (chmc_bank_match(bp, phys_addr))
+ return bp;
+ }
+ }
+
+ return NULL;
+}
+
+/* This is the main purpose of this driver. */
+static int chmc_print_dimm(int syndrome_code,
+ unsigned long phys_addr,
+ char *buf, int buflen)
+{
+ struct chmc_bank_info *bp;
+ struct chmc_obp_mem_layout *prop;
+ int bank_in_controller, first_dimm;
+
+ bp = chmc_find_bank(phys_addr);
+ if (bp == NULL ||
+ syndrome_code < SYNDROME_MIN ||
+ syndrome_code > SYNDROME_MAX) {
+ buf[0] = '?';
+ buf[1] = '?';
+ buf[2] = '?';
+ buf[3] = '\0';
+ return 0;
+ }
+
+ prop = &bp->p->layout_prop;
+ bank_in_controller = bp->bank_id & (CHMCTRL_NBANKS - 1);
+ first_dimm = (bank_in_controller & (CHMCTRL_NDGRPS - 1));
+ first_dimm *= CHMCTRL_NDIMMS;
+
+ if (syndrome_code != SYNDROME_MIN) {
+ char *dimm_str;
+ int pin;
+
+ get_pin_and_dimm_str(syndrome_code, phys_addr, &pin,
+ &dimm_str, prop, first_dimm);
+ sprintf(buf, "%s, pin %3d", dimm_str, pin);
+ } else {
+ int dimm;
+
+ /* Multi-bit error, we just dump out all the
+ * dimm labels associated with this bank.
+ */
+ for (dimm = 0; dimm < CHMCTRL_NDIMMS; dimm++) {
+ sprintf(buf, "%s ",
+ prop->dimm_labels[first_dimm + dimm]);
+ buf += strlen(buf);
+ }
+ }
+ return 0;
+}
+
+/* Accessing the registers is slightly complicated. If you want
+ * to get at the memory controller which is on the same processor
+ * the code is executing, you must use special ASI load/store else
+ * you go through the global mapping.
+ */
+static u64 chmc_read_mcreg(struct chmc *p, unsigned long offset)
+{
+ unsigned long ret, this_cpu;
+
+ preempt_disable();
+
+ this_cpu = real_hard_smp_processor_id();
+
+ if (p->portid == this_cpu) {
+ __asm__ __volatile__("ldxa [%1] %2, %0"
+ : "=r" (ret)
+ : "r" (offset), "i" (ASI_MCU_CTRL_REG));
+ } else {
+ __asm__ __volatile__("ldxa [%1] %2, %0"
+ : "=r" (ret)
+ : "r" (p->regs + offset),
+ "i" (ASI_PHYS_BYPASS_EC_E));
+ }
+
+ preempt_enable();
+
+ return ret;
+}
+
+#if 0 /* currently unused */
+static void chmc_write_mcreg(struct chmc *p, unsigned long offset, u64 val)
+{
+ if (p->portid == smp_processor_id()) {
+ __asm__ __volatile__("stxa %0, [%1] %2"
+ : : "r" (val),
+ "r" (offset), "i" (ASI_MCU_CTRL_REG));
+ } else {
+ __asm__ __volatile__("ldxa %0, [%1] %2"
+ : : "r" (val),
+ "r" (p->regs + offset),
+ "i" (ASI_PHYS_BYPASS_EC_E));
+ }
+}
+#endif
+
+static void chmc_interpret_one_decode_reg(struct chmc *p, int which_bank, u64 val)
+{
+ struct chmc_bank_info *bp = &p->logical_banks[which_bank];
+
+ bp->p = p;
+ bp->bank_id = (CHMCTRL_NBANKS * p->portid) + which_bank;
+ bp->raw_reg = val;
+ bp->valid = (val & MEM_DECODE_VALID) >> MEM_DECODE_VALID_SHIFT;
+ bp->uk = (val & MEM_DECODE_UK) >> MEM_DECODE_UK_SHIFT;
+ bp->um = (val & MEM_DECODE_UM) >> MEM_DECODE_UM_SHIFT;
+ bp->lk = (val & MEM_DECODE_LK) >> MEM_DECODE_LK_SHIFT;
+ bp->lm = (val & MEM_DECODE_LM) >> MEM_DECODE_LM_SHIFT;
+
+ bp->base = (bp->um);
+ bp->base &= ~(bp->uk);
+ bp->base <<= PA_UPPER_BITS_SHIFT;
+
+ switch(bp->lk) {
+ case 0xf:
+ default:
+ bp->interleave = 1;
+ break;
+
+ case 0xe:
+ bp->interleave = 2;
+ break;
+
+ case 0xc:
+ bp->interleave = 4;
+ break;
+
+ case 0x8:
+ bp->interleave = 8;
+ break;
+
+ case 0x0:
+ bp->interleave = 16;
+ break;
+ };
+
+ /* UK[10] is reserved, and UK[11] is not set for the SDRAM
+ * bank size definition.
+ */
+ bp->size = (((unsigned long)bp->uk &
+ ((1UL << 10UL) - 1UL)) + 1UL) << PA_UPPER_BITS_SHIFT;
+ bp->size /= bp->interleave;
+}
+
+static void chmc_fetch_decode_regs(struct chmc *p)
+{
+ if (p->layout_size == 0)
+ return;
+
+ chmc_interpret_one_decode_reg(p, 0,
+ chmc_read_mcreg(p, CHMCTRL_DECODE1));
+ chmc_interpret_one_decode_reg(p, 1,
+ chmc_read_mcreg(p, CHMCTRL_DECODE2));
+ chmc_interpret_one_decode_reg(p, 2,
+ chmc_read_mcreg(p, CHMCTRL_DECODE3));
+ chmc_interpret_one_decode_reg(p, 3,
+ chmc_read_mcreg(p, CHMCTRL_DECODE4));
+}
+
+static int __devinit chmc_probe(struct of_device *op,
+ const struct of_device_id *match)
+{
+ struct device_node *dp = op->node;
+ unsigned long ver;
+ const void *pval;
+ int len, portid;
+ struct chmc *p;
+ int err;
+
+ err = -ENODEV;
+ __asm__ ("rdpr %%ver, %0" : "=r" (ver));
+ if ((ver >> 32UL) == __JALAPENO_ID ||
+ (ver >> 32UL) == __SERRANO_ID)
+ goto out;
+
+ portid = of_getintprop_default(dp, "portid", -1);
+ if (portid == -1)
+ goto out;
+
+ pval = of_get_property(dp, "memory-layout", &len);
+ if (pval && len > sizeof(p->layout_prop)) {
+ printk(KERN_ERR PFX "Unexpected memory-layout property "
+ "size %d.\n", len);
+ goto out;
+ }
+
+ err = -ENOMEM;
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p) {
+ printk(KERN_ERR PFX "Could not allocate struct chmc.\n");
+ goto out;
+ }
+
+ p->portid = portid;
+ p->layout_size = len;
+ if (!pval)
+ p->layout_size = 0;
+ else
+ memcpy(&p->layout_prop, pval, len);
+
+ p->regs = of_ioremap(&op->resource[0], 0, 0x48, "chmc");
+ if (!p->regs) {
+ printk(KERN_ERR PFX "Could not map registers.\n");
+ goto out_free;
+ }
+
+ if (p->layout_size != 0UL) {
+ p->timing_control1 = chmc_read_mcreg(p, CHMCTRL_TCTRL1);
+ p->timing_control2 = chmc_read_mcreg(p, CHMCTRL_TCTRL2);
+ p->timing_control3 = chmc_read_mcreg(p, CHMCTRL_TCTRL3);
+ p->timing_control4 = chmc_read_mcreg(p, CHMCTRL_TCTRL4);
+ p->memaddr_control = chmc_read_mcreg(p, CHMCTRL_MACTRL);
+ }
+
+ chmc_fetch_decode_regs(p);
+
+ mc_list_add(&p->list);
+
+ printk(KERN_INFO PFX "UltraSPARC-III memory controller at %s [%s]\n",
+ dp->full_name,
+ (p->layout_size ? "ACTIVE" : "INACTIVE"));
+
+ dev_set_drvdata(&op->dev, p);
+
+ err = 0;
+
+out:
+ return err;
+
+out_free:
+ kfree(p);
+ goto out;
+}
+
+static int __devinit us3mc_probe(struct of_device *op,
+ const struct of_device_id *match)
+{
+ if (mc_type == MC_TYPE_SAFARI)
+ return chmc_probe(op, match);
+ else if (mc_type == MC_TYPE_JBUS)
+ return jbusmc_probe(op, match);
+ return -ENODEV;
+}
+
+static void __devexit chmc_destroy(struct of_device *op, struct chmc *p)
+{
+ list_del(&p->list);
+ of_iounmap(&op->resource[0], p->regs, 0x48);
+ kfree(p);
+}
+
+static void __devexit jbusmc_destroy(struct of_device *op, struct jbusmc *p)
+{
+ mc_list_del(&p->list);
+ of_iounmap(&op->resource[0], p->regs, JBUSMC_REGS_SIZE);
+ kfree(p);
+}
+
+static int __devexit us3mc_remove(struct of_device *op)
+{
+ void *p = dev_get_drvdata(&op->dev);
+
+ if (p) {
+ if (mc_type == MC_TYPE_SAFARI)
+ chmc_destroy(op, p);
+ else if (mc_type == MC_TYPE_JBUS)
+ jbusmc_destroy(op, p);
+ }
+ return 0;
+}
+
+static const struct of_device_id us3mc_match[] = {
+ {
+ .name = "memory-controller",
+ },
+ {},
+};
+MODULE_DEVICE_TABLE(of, us3mc_match);
+
+static struct of_platform_driver us3mc_driver = {
+ .name = "us3mc",
+ .match_table = us3mc_match,
+ .probe = us3mc_probe,
+ .remove = __devexit_p(us3mc_remove),
+};
+
+static inline bool us3mc_platform(void)
+{
+ if (tlb_type == cheetah || tlb_type == cheetah_plus)
+ return true;
+ return false;
+}
+
+static int __init us3mc_init(void)
+{
+ unsigned long ver;
+ int ret;
+
+ if (!us3mc_platform())
+ return -ENODEV;
+
+ __asm__ __volatile__("rdpr %%ver, %0" : "=r" (ver));
+ if ((ver >> 32UL) == __JALAPENO_ID ||
+ (ver >> 32UL) == __SERRANO_ID) {
+ mc_type = MC_TYPE_JBUS;
+ us3mc_dimm_printer = jbusmc_print_dimm;
+ } else {
+ mc_type = MC_TYPE_SAFARI;
+ us3mc_dimm_printer = chmc_print_dimm;
+ }
+
+ ret = register_dimm_printer(us3mc_dimm_printer);
+
+ if (!ret) {
+ ret = of_register_driver(&us3mc_driver, &of_bus_type);
+ if (ret)
+ unregister_dimm_printer(us3mc_dimm_printer);
+ }
+ return ret;
+}
+
+static void __exit us3mc_cleanup(void)
+{
+ if (us3mc_platform()) {
+ unregister_dimm_printer(us3mc_dimm_printer);
+ of_unregister_driver(&us3mc_driver);
+ }
+}
+
+module_init(us3mc_init);
+module_exit(us3mc_cleanup);
diff --git a/arch/sparc/kernel/compat_audit.c b/arch/sparc/kernel/compat_audit.c
new file mode 100644
index 000000000000..d865575b25bf
--- /dev/null
+++ b/arch/sparc/kernel/compat_audit.c
@@ -0,0 +1,43 @@
+#define __32bit_syscall_numbers__
+#include <asm/unistd.h>
+
+unsigned sparc32_dir_class[] = {
+#include <asm-generic/audit_dir_write.h>
+~0U
+};
+
+unsigned sparc32_chattr_class[] = {
+#include <asm-generic/audit_change_attr.h>
+~0U
+};
+
+unsigned sparc32_write_class[] = {
+#include <asm-generic/audit_write.h>
+~0U
+};
+
+unsigned sparc32_read_class[] = {
+#include <asm-generic/audit_read.h>
+~0U
+};
+
+unsigned sparc32_signal_class[] = {
+#include <asm-generic/audit_signal.h>
+~0U
+};
+
+int sparc32_classify_syscall(unsigned syscall)
+{
+ switch(syscall) {
+ case __NR_open:
+ return 2;
+ case __NR_openat:
+ return 3;
+ case __NR_socketcall:
+ return 4;
+ case __NR_execve:
+ return 5;
+ default:
+ return 1;
+ }
+}
diff --git a/arch/sparc/kernel/cpu_64.c b/arch/sparc/kernel/cpu_64.c
new file mode 100644
index 000000000000..0c9ac83ed0a8
--- /dev/null
+++ b/arch/sparc/kernel/cpu_64.c
@@ -0,0 +1,166 @@
+/* cpu.c: Dinky routines to look for the kind of Sparc cpu
+ * we are on.
+ *
+ * Copyright (C) 1996, 2007, 2008 David S. Miller (davem@davemloft.net)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <asm/asi.h>
+#include <asm/system.h>
+#include <asm/fpumacro.h>
+#include <asm/cpudata.h>
+#include <asm/spitfire.h>
+#include <asm/oplib.h>
+
+#include "entry.h"
+
+DEFINE_PER_CPU(cpuinfo_sparc, __cpu_data) = { 0 };
+
+struct cpu_chip_info {
+ unsigned short manuf;
+ unsigned short impl;
+ const char *cpu_name;
+ const char *fp_name;
+};
+
+static const struct cpu_chip_info cpu_chips[] = {
+ {
+ .manuf = 0x17,
+ .impl = 0x10,
+ .cpu_name = "TI UltraSparc I (SpitFire)",
+ .fp_name = "UltraSparc I integrated FPU",
+ },
+ {
+ .manuf = 0x22,
+ .impl = 0x10,
+ .cpu_name = "TI UltraSparc I (SpitFire)",
+ .fp_name = "UltraSparc I integrated FPU",
+ },
+ {
+ .manuf = 0x17,
+ .impl = 0x11,
+ .cpu_name = "TI UltraSparc II (BlackBird)",
+ .fp_name = "UltraSparc II integrated FPU",
+ },
+ {
+ .manuf = 0x17,
+ .impl = 0x12,
+ .cpu_name = "TI UltraSparc IIi (Sabre)",
+ .fp_name = "UltraSparc IIi integrated FPU",
+ },
+ {
+ .manuf = 0x17,
+ .impl = 0x13,
+ .cpu_name = "TI UltraSparc IIe (Hummingbird)",
+ .fp_name = "UltraSparc IIe integrated FPU",
+ },
+ {
+ .manuf = 0x3e,
+ .impl = 0x14,
+ .cpu_name = "TI UltraSparc III (Cheetah)",
+ .fp_name = "UltraSparc III integrated FPU",
+ },
+ {
+ .manuf = 0x3e,
+ .impl = 0x15,
+ .cpu_name = "TI UltraSparc III+ (Cheetah+)",
+ .fp_name = "UltraSparc III+ integrated FPU",
+ },
+ {
+ .manuf = 0x3e,
+ .impl = 0x16,
+ .cpu_name = "TI UltraSparc IIIi (Jalapeno)",
+ .fp_name = "UltraSparc IIIi integrated FPU",
+ },
+ {
+ .manuf = 0x3e,
+ .impl = 0x18,
+ .cpu_name = "TI UltraSparc IV (Jaguar)",
+ .fp_name = "UltraSparc IV integrated FPU",
+ },
+ {
+ .manuf = 0x3e,
+ .impl = 0x19,
+ .cpu_name = "TI UltraSparc IV+ (Panther)",
+ .fp_name = "UltraSparc IV+ integrated FPU",
+ },
+ {
+ .manuf = 0x3e,
+ .impl = 0x22,
+ .cpu_name = "TI UltraSparc IIIi+ (Serrano)",
+ .fp_name = "UltraSparc IIIi+ integrated FPU",
+ },
+};
+
+#define NSPARCCHIPS ARRAY_SIZE(linux_sparc_chips)
+
+const char *sparc_cpu_type;
+const char *sparc_fpu_type;
+
+static void __init sun4v_cpu_probe(void)
+{
+ switch (sun4v_chip_type) {
+ case SUN4V_CHIP_NIAGARA1:
+ sparc_cpu_type = "UltraSparc T1 (Niagara)";
+ sparc_fpu_type = "UltraSparc T1 integrated FPU";
+ break;
+
+ case SUN4V_CHIP_NIAGARA2:
+ sparc_cpu_type = "UltraSparc T2 (Niagara2)";
+ sparc_fpu_type = "UltraSparc T2 integrated FPU";
+ break;
+
+ default:
+ printk(KERN_WARNING "CPU: Unknown sun4v cpu type [%s]\n",
+ prom_cpu_compatible);
+ sparc_cpu_type = "Unknown SUN4V CPU";
+ sparc_fpu_type = "Unknown SUN4V FPU";
+ break;
+ }
+}
+
+static const struct cpu_chip_info * __init find_cpu_chip(unsigned short manuf,
+ unsigned short impl)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(cpu_chips); i++) {
+ const struct cpu_chip_info *p = &cpu_chips[i];
+
+ if (p->manuf == manuf && p->impl == impl)
+ return p;
+ }
+ return NULL;
+}
+
+static int __init cpu_type_probe(void)
+{
+ if (tlb_type == hypervisor) {
+ sun4v_cpu_probe();
+ } else {
+ unsigned long ver, manuf, impl;
+ const struct cpu_chip_info *p;
+
+ __asm__ __volatile__("rdpr %%ver, %0" : "=r" (ver));
+
+ manuf = ((ver >> 48) & 0xffff);
+ impl = ((ver >> 32) & 0xffff);
+
+ p = find_cpu_chip(manuf, impl);
+ if (p) {
+ sparc_cpu_type = p->cpu_name;
+ sparc_fpu_type = p->fp_name;
+ } else {
+ printk(KERN_ERR "CPU: Unknown chip, manuf[%lx] impl[%lx]\n",
+ manuf, impl);
+ sparc_cpu_type = "Unknown CPU";
+ sparc_fpu_type = "Unknown FPU";
+ }
+ }
+ return 0;
+}
+
+arch_initcall(cpu_type_probe);
diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c
new file mode 100644
index 000000000000..f52e0534d91d
--- /dev/null
+++ b/arch/sparc/kernel/ds.c
@@ -0,0 +1,1244 @@
+/* ds.c: Domain Services driver for Logical Domains
+ *
+ * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/mutex.h>
+#include <linux/kthread.h>
+#include <linux/reboot.h>
+#include <linux/cpu.h>
+
+#include <asm/ldc.h>
+#include <asm/vio.h>
+#include <asm/mdesc.h>
+#include <asm/head.h>
+#include <asm/irq.h>
+
+#define DRV_MODULE_NAME "ds"
+#define PFX DRV_MODULE_NAME ": "
+#define DRV_MODULE_VERSION "1.0"
+#define DRV_MODULE_RELDATE "Jul 11, 2007"
+
+static char version[] __devinitdata =
+ DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
+MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
+MODULE_DESCRIPTION("Sun LDOM domain services driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_MODULE_VERSION);
+
+struct ds_msg_tag {
+ __u32 type;
+#define DS_INIT_REQ 0x00
+#define DS_INIT_ACK 0x01
+#define DS_INIT_NACK 0x02
+#define DS_REG_REQ 0x03
+#define DS_REG_ACK 0x04
+#define DS_REG_NACK 0x05
+#define DS_UNREG_REQ 0x06
+#define DS_UNREG_ACK 0x07
+#define DS_UNREG_NACK 0x08
+#define DS_DATA 0x09
+#define DS_NACK 0x0a
+
+ __u32 len;
+};
+
+/* Result codes */
+#define DS_OK 0x00
+#define DS_REG_VER_NACK 0x01
+#define DS_REG_DUP 0x02
+#define DS_INV_HDL 0x03
+#define DS_TYPE_UNKNOWN 0x04
+
+struct ds_version {
+ __u16 major;
+ __u16 minor;
+};
+
+struct ds_ver_req {
+ struct ds_msg_tag tag;
+ struct ds_version ver;
+};
+
+struct ds_ver_ack {
+ struct ds_msg_tag tag;
+ __u16 minor;
+};
+
+struct ds_ver_nack {
+ struct ds_msg_tag tag;
+ __u16 major;
+};
+
+struct ds_reg_req {
+ struct ds_msg_tag tag;
+ __u64 handle;
+ __u16 major;
+ __u16 minor;
+ char svc_id[0];
+};
+
+struct ds_reg_ack {
+ struct ds_msg_tag tag;
+ __u64 handle;
+ __u16 minor;
+};
+
+struct ds_reg_nack {
+ struct ds_msg_tag tag;
+ __u64 handle;
+ __u16 major;
+};
+
+struct ds_unreg_req {
+ struct ds_msg_tag tag;
+ __u64 handle;
+};
+
+struct ds_unreg_ack {
+ struct ds_msg_tag tag;
+ __u64 handle;
+};
+
+struct ds_unreg_nack {
+ struct ds_msg_tag tag;
+ __u64 handle;
+};
+
+struct ds_data {
+ struct ds_msg_tag tag;
+ __u64 handle;
+};
+
+struct ds_data_nack {
+ struct ds_msg_tag tag;
+ __u64 handle;
+ __u64 result;
+};
+
+struct ds_info;
+struct ds_cap_state {
+ __u64 handle;
+
+ void (*data)(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ void *buf, int len);
+
+ const char *service_id;
+
+ u8 state;
+#define CAP_STATE_UNKNOWN 0x00
+#define CAP_STATE_REG_SENT 0x01
+#define CAP_STATE_REGISTERED 0x02
+};
+
+static void md_update_data(struct ds_info *dp, struct ds_cap_state *cp,
+ void *buf, int len);
+static void domain_shutdown_data(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ void *buf, int len);
+static void domain_panic_data(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ void *buf, int len);
+#ifdef CONFIG_HOTPLUG_CPU
+static void dr_cpu_data(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ void *buf, int len);
+#endif
+static void ds_pri_data(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ void *buf, int len);
+static void ds_var_data(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ void *buf, int len);
+
+static struct ds_cap_state ds_states_template[] = {
+ {
+ .service_id = "md-update",
+ .data = md_update_data,
+ },
+ {
+ .service_id = "domain-shutdown",
+ .data = domain_shutdown_data,
+ },
+ {
+ .service_id = "domain-panic",
+ .data = domain_panic_data,
+ },
+#ifdef CONFIG_HOTPLUG_CPU
+ {
+ .service_id = "dr-cpu",
+ .data = dr_cpu_data,
+ },
+#endif
+ {
+ .service_id = "pri",
+ .data = ds_pri_data,
+ },
+ {
+ .service_id = "var-config",
+ .data = ds_var_data,
+ },
+ {
+ .service_id = "var-config-backup",
+ .data = ds_var_data,
+ },
+};
+
+static DEFINE_SPINLOCK(ds_lock);
+
+struct ds_info {
+ struct ldc_channel *lp;
+ u8 hs_state;
+#define DS_HS_START 0x01
+#define DS_HS_DONE 0x02
+
+ u64 id;
+
+ void *rcv_buf;
+ int rcv_buf_len;
+
+ struct ds_cap_state *ds_states;
+ int num_ds_states;
+
+ struct ds_info *next;
+};
+
+static struct ds_info *ds_info_list;
+
+static struct ds_cap_state *find_cap(struct ds_info *dp, u64 handle)
+{
+ unsigned int index = handle >> 32;
+
+ if (index >= dp->num_ds_states)
+ return NULL;
+ return &dp->ds_states[index];
+}
+
+static struct ds_cap_state *find_cap_by_string(struct ds_info *dp,
+ const char *name)
+{
+ int i;
+
+ for (i = 0; i < dp->num_ds_states; i++) {
+ if (strcmp(dp->ds_states[i].service_id, name))
+ continue;
+
+ return &dp->ds_states[i];
+ }
+ return NULL;
+}
+
+static int __ds_send(struct ldc_channel *lp, void *data, int len)
+{
+ int err, limit = 1000;
+
+ err = -EINVAL;
+ while (limit-- > 0) {
+ err = ldc_write(lp, data, len);
+ if (!err || (err != -EAGAIN))
+ break;
+ udelay(1);
+ }
+
+ return err;
+}
+
+static int ds_send(struct ldc_channel *lp, void *data, int len)
+{
+ unsigned long flags;
+ int err;
+
+ spin_lock_irqsave(&ds_lock, flags);
+ err = __ds_send(lp, data, len);
+ spin_unlock_irqrestore(&ds_lock, flags);
+
+ return err;
+}
+
+struct ds_md_update_req {
+ __u64 req_num;
+};
+
+struct ds_md_update_res {
+ __u64 req_num;
+ __u32 result;
+};
+
+static void md_update_data(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ void *buf, int len)
+{
+ struct ldc_channel *lp = dp->lp;
+ struct ds_data *dpkt = buf;
+ struct ds_md_update_req *rp;
+ struct {
+ struct ds_data data;
+ struct ds_md_update_res res;
+ } pkt;
+
+ rp = (struct ds_md_update_req *) (dpkt + 1);
+
+ printk(KERN_INFO "ds-%lu: Machine description update.\n", dp->id);
+
+ mdesc_update();
+
+ memset(&pkt, 0, sizeof(pkt));
+ pkt.data.tag.type = DS_DATA;
+ pkt.data.tag.len = sizeof(pkt) - sizeof(struct ds_msg_tag);
+ pkt.data.handle = cp->handle;
+ pkt.res.req_num = rp->req_num;
+ pkt.res.result = DS_OK;
+
+ ds_send(lp, &pkt, sizeof(pkt));
+}
+
+struct ds_shutdown_req {
+ __u64 req_num;
+ __u32 ms_delay;
+};
+
+struct ds_shutdown_res {
+ __u64 req_num;
+ __u32 result;
+ char reason[1];
+};
+
+static void domain_shutdown_data(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ void *buf, int len)
+{
+ struct ldc_channel *lp = dp->lp;
+ struct ds_data *dpkt = buf;
+ struct ds_shutdown_req *rp;
+ struct {
+ struct ds_data data;
+ struct ds_shutdown_res res;
+ } pkt;
+
+ rp = (struct ds_shutdown_req *) (dpkt + 1);
+
+ printk(KERN_ALERT "ds-%lu: Shutdown request from "
+ "LDOM manager received.\n", dp->id);
+
+ memset(&pkt, 0, sizeof(pkt));
+ pkt.data.tag.type = DS_DATA;
+ pkt.data.tag.len = sizeof(pkt) - sizeof(struct ds_msg_tag);
+ pkt.data.handle = cp->handle;
+ pkt.res.req_num = rp->req_num;
+ pkt.res.result = DS_OK;
+ pkt.res.reason[0] = 0;
+
+ ds_send(lp, &pkt, sizeof(pkt));
+
+ orderly_poweroff(true);
+}
+
+struct ds_panic_req {
+ __u64 req_num;
+};
+
+struct ds_panic_res {
+ __u64 req_num;
+ __u32 result;
+ char reason[1];
+};
+
+static void domain_panic_data(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ void *buf, int len)
+{
+ struct ldc_channel *lp = dp->lp;
+ struct ds_data *dpkt = buf;
+ struct ds_panic_req *rp;
+ struct {
+ struct ds_data data;
+ struct ds_panic_res res;
+ } pkt;
+
+ rp = (struct ds_panic_req *) (dpkt + 1);
+
+ printk(KERN_ALERT "ds-%lu: Panic request from "
+ "LDOM manager received.\n", dp->id);
+
+ memset(&pkt, 0, sizeof(pkt));
+ pkt.data.tag.type = DS_DATA;
+ pkt.data.tag.len = sizeof(pkt) - sizeof(struct ds_msg_tag);
+ pkt.data.handle = cp->handle;
+ pkt.res.req_num = rp->req_num;
+ pkt.res.result = DS_OK;
+ pkt.res.reason[0] = 0;
+
+ ds_send(lp, &pkt, sizeof(pkt));
+
+ panic("PANIC requested by LDOM manager.");
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+struct dr_cpu_tag {
+ __u64 req_num;
+ __u32 type;
+#define DR_CPU_CONFIGURE 0x43
+#define DR_CPU_UNCONFIGURE 0x55
+#define DR_CPU_FORCE_UNCONFIGURE 0x46
+#define DR_CPU_STATUS 0x53
+
+/* Responses */
+#define DR_CPU_OK 0x6f
+#define DR_CPU_ERROR 0x65
+
+ __u32 num_records;
+};
+
+struct dr_cpu_resp_entry {
+ __u32 cpu;
+ __u32 result;
+#define DR_CPU_RES_OK 0x00
+#define DR_CPU_RES_FAILURE 0x01
+#define DR_CPU_RES_BLOCKED 0x02
+#define DR_CPU_RES_CPU_NOT_RESPONDING 0x03
+#define DR_CPU_RES_NOT_IN_MD 0x04
+
+ __u32 stat;
+#define DR_CPU_STAT_NOT_PRESENT 0x00
+#define DR_CPU_STAT_UNCONFIGURED 0x01
+#define DR_CPU_STAT_CONFIGURED 0x02
+
+ __u32 str_off;
+};
+
+static void __dr_cpu_send_error(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ struct ds_data *data)
+{
+ struct dr_cpu_tag *tag = (struct dr_cpu_tag *) (data + 1);
+ struct {
+ struct ds_data data;
+ struct dr_cpu_tag tag;
+ } pkt;
+ int msg_len;
+
+ memset(&pkt, 0, sizeof(pkt));
+ pkt.data.tag.type = DS_DATA;
+ pkt.data.handle = cp->handle;
+ pkt.tag.req_num = tag->req_num;
+ pkt.tag.type = DR_CPU_ERROR;
+ pkt.tag.num_records = 0;
+
+ msg_len = (sizeof(struct ds_data) +
+ sizeof(struct dr_cpu_tag));
+
+ pkt.data.tag.len = msg_len - sizeof(struct ds_msg_tag);
+
+ __ds_send(dp->lp, &pkt, msg_len);
+}
+
+static void dr_cpu_send_error(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ struct ds_data *data)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ds_lock, flags);
+ __dr_cpu_send_error(dp, cp, data);
+ spin_unlock_irqrestore(&ds_lock, flags);
+}
+
+#define CPU_SENTINEL 0xffffffff
+
+static void purge_dups(u32 *list, u32 num_ents)
+{
+ unsigned int i;
+
+ for (i = 0; i < num_ents; i++) {
+ u32 cpu = list[i];
+ unsigned int j;
+
+ if (cpu == CPU_SENTINEL)
+ continue;
+
+ for (j = i + 1; j < num_ents; j++) {
+ if (list[j] == cpu)
+ list[j] = CPU_SENTINEL;
+ }
+ }
+}
+
+static int dr_cpu_size_response(int ncpus)
+{
+ return (sizeof(struct ds_data) +
+ sizeof(struct dr_cpu_tag) +
+ (sizeof(struct dr_cpu_resp_entry) * ncpus));
+}
+
+static void dr_cpu_init_response(struct ds_data *resp, u64 req_num,
+ u64 handle, int resp_len, int ncpus,
+ cpumask_t *mask, u32 default_stat)
+{
+ struct dr_cpu_resp_entry *ent;
+ struct dr_cpu_tag *tag;
+ int i, cpu;
+
+ tag = (struct dr_cpu_tag *) (resp + 1);
+ ent = (struct dr_cpu_resp_entry *) (tag + 1);
+
+ resp->tag.type = DS_DATA;
+ resp->tag.len = resp_len - sizeof(struct ds_msg_tag);
+ resp->handle = handle;
+ tag->req_num = req_num;
+ tag->type = DR_CPU_OK;
+ tag->num_records = ncpus;
+
+ i = 0;
+ for_each_cpu_mask(cpu, *mask) {
+ ent[i].cpu = cpu;
+ ent[i].result = DR_CPU_RES_OK;
+ ent[i].stat = default_stat;
+ i++;
+ }
+ BUG_ON(i != ncpus);
+}
+
+static void dr_cpu_mark(struct ds_data *resp, int cpu, int ncpus,
+ u32 res, u32 stat)
+{
+ struct dr_cpu_resp_entry *ent;
+ struct dr_cpu_tag *tag;
+ int i;
+
+ tag = (struct dr_cpu_tag *) (resp + 1);
+ ent = (struct dr_cpu_resp_entry *) (tag + 1);
+
+ for (i = 0; i < ncpus; i++) {
+ if (ent[i].cpu != cpu)
+ continue;
+ ent[i].result = res;
+ ent[i].stat = stat;
+ break;
+ }
+}
+
+static int __cpuinit dr_cpu_configure(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ u64 req_num,
+ cpumask_t *mask)
+{
+ struct ds_data *resp;
+ int resp_len, ncpus, cpu;
+ unsigned long flags;
+
+ ncpus = cpus_weight(*mask);
+ resp_len = dr_cpu_size_response(ncpus);
+ resp = kzalloc(resp_len, GFP_KERNEL);
+ if (!resp)
+ return -ENOMEM;
+
+ dr_cpu_init_response(resp, req_num, cp->handle,
+ resp_len, ncpus, mask,
+ DR_CPU_STAT_CONFIGURED);
+
+ mdesc_fill_in_cpu_data(*mask);
+
+ for_each_cpu_mask(cpu, *mask) {
+ int err;
+
+ printk(KERN_INFO "ds-%lu: Starting cpu %d...\n",
+ dp->id, cpu);
+ err = cpu_up(cpu);
+ if (err) {
+ __u32 res = DR_CPU_RES_FAILURE;
+ __u32 stat = DR_CPU_STAT_UNCONFIGURED;
+
+ if (!cpu_present(cpu)) {
+ /* CPU not present in MD */
+ res = DR_CPU_RES_NOT_IN_MD;
+ stat = DR_CPU_STAT_NOT_PRESENT;
+ } else if (err == -ENODEV) {
+ /* CPU did not call in successfully */
+ res = DR_CPU_RES_CPU_NOT_RESPONDING;
+ }
+
+ printk(KERN_INFO "ds-%lu: CPU startup failed err=%d\n",
+ dp->id, err);
+ dr_cpu_mark(resp, cpu, ncpus, res, stat);
+ }
+ }
+
+ spin_lock_irqsave(&ds_lock, flags);
+ __ds_send(dp->lp, resp, resp_len);
+ spin_unlock_irqrestore(&ds_lock, flags);
+
+ kfree(resp);
+
+ /* Redistribute IRQs, taking into account the new cpus. */
+ fixup_irqs();
+
+ return 0;
+}
+
+static int dr_cpu_unconfigure(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ u64 req_num,
+ cpumask_t *mask)
+{
+ struct ds_data *resp;
+ int resp_len, ncpus, cpu;
+ unsigned long flags;
+
+ ncpus = cpus_weight(*mask);
+ resp_len = dr_cpu_size_response(ncpus);
+ resp = kzalloc(resp_len, GFP_KERNEL);
+ if (!resp)
+ return -ENOMEM;
+
+ dr_cpu_init_response(resp, req_num, cp->handle,
+ resp_len, ncpus, mask,
+ DR_CPU_STAT_UNCONFIGURED);
+
+ for_each_cpu_mask(cpu, *mask) {
+ int err;
+
+ printk(KERN_INFO "ds-%lu: Shutting down cpu %d...\n",
+ dp->id, cpu);
+ err = cpu_down(cpu);
+ if (err)
+ dr_cpu_mark(resp, cpu, ncpus,
+ DR_CPU_RES_FAILURE,
+ DR_CPU_STAT_CONFIGURED);
+ }
+
+ spin_lock_irqsave(&ds_lock, flags);
+ __ds_send(dp->lp, resp, resp_len);
+ spin_unlock_irqrestore(&ds_lock, flags);
+
+ kfree(resp);
+
+ return 0;
+}
+
+static void __cpuinit dr_cpu_data(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ void *buf, int len)
+{
+ struct ds_data *data = buf;
+ struct dr_cpu_tag *tag = (struct dr_cpu_tag *) (data + 1);
+ u32 *cpu_list = (u32 *) (tag + 1);
+ u64 req_num = tag->req_num;
+ cpumask_t mask;
+ unsigned int i;
+ int err;
+
+ switch (tag->type) {
+ case DR_CPU_CONFIGURE:
+ case DR_CPU_UNCONFIGURE:
+ case DR_CPU_FORCE_UNCONFIGURE:
+ break;
+
+ default:
+ dr_cpu_send_error(dp, cp, data);
+ return;
+ }
+
+ purge_dups(cpu_list, tag->num_records);
+
+ cpus_clear(mask);
+ for (i = 0; i < tag->num_records; i++) {
+ if (cpu_list[i] == CPU_SENTINEL)
+ continue;
+
+ if (cpu_list[i] < NR_CPUS)
+ cpu_set(cpu_list[i], mask);
+ }
+
+ if (tag->type == DR_CPU_CONFIGURE)
+ err = dr_cpu_configure(dp, cp, req_num, &mask);
+ else
+ err = dr_cpu_unconfigure(dp, cp, req_num, &mask);
+
+ if (err)
+ dr_cpu_send_error(dp, cp, data);
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+struct ds_pri_msg {
+ __u64 req_num;
+ __u64 type;
+#define DS_PRI_REQUEST 0x00
+#define DS_PRI_DATA 0x01
+#define DS_PRI_UPDATE 0x02
+};
+
+static void ds_pri_data(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ void *buf, int len)
+{
+ struct ds_data *dpkt = buf;
+ struct ds_pri_msg *rp;
+
+ rp = (struct ds_pri_msg *) (dpkt + 1);
+
+ printk(KERN_INFO "ds-%lu: PRI REQ [%lx:%lx], len=%d\n",
+ dp->id, rp->req_num, rp->type, len);
+}
+
+struct ds_var_hdr {
+ __u32 type;
+#define DS_VAR_SET_REQ 0x00
+#define DS_VAR_DELETE_REQ 0x01
+#define DS_VAR_SET_RESP 0x02
+#define DS_VAR_DELETE_RESP 0x03
+};
+
+struct ds_var_set_msg {
+ struct ds_var_hdr hdr;
+ char name_and_value[0];
+};
+
+struct ds_var_delete_msg {
+ struct ds_var_hdr hdr;
+ char name[0];
+};
+
+struct ds_var_resp {
+ struct ds_var_hdr hdr;
+ __u32 result;
+#define DS_VAR_SUCCESS 0x00
+#define DS_VAR_NO_SPACE 0x01
+#define DS_VAR_INVALID_VAR 0x02
+#define DS_VAR_INVALID_VAL 0x03
+#define DS_VAR_NOT_PRESENT 0x04
+};
+
+static DEFINE_MUTEX(ds_var_mutex);
+static int ds_var_doorbell;
+static int ds_var_response;
+
+static void ds_var_data(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ void *buf, int len)
+{
+ struct ds_data *dpkt = buf;
+ struct ds_var_resp *rp;
+
+ rp = (struct ds_var_resp *) (dpkt + 1);
+
+ if (rp->hdr.type != DS_VAR_SET_RESP &&
+ rp->hdr.type != DS_VAR_DELETE_RESP)
+ return;
+
+ ds_var_response = rp->result;
+ wmb();
+ ds_var_doorbell = 1;
+}
+
+void ldom_set_var(const char *var, const char *value)
+{
+ struct ds_cap_state *cp;
+ struct ds_info *dp;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ds_lock, flags);
+ cp = NULL;
+ for (dp = ds_info_list; dp; dp = dp->next) {
+ struct ds_cap_state *tmp;
+
+ tmp = find_cap_by_string(dp, "var-config");
+ if (tmp && tmp->state == CAP_STATE_REGISTERED) {
+ cp = tmp;
+ break;
+ }
+ }
+ if (!cp) {
+ for (dp = ds_info_list; dp; dp = dp->next) {
+ struct ds_cap_state *tmp;
+
+ tmp = find_cap_by_string(dp, "var-config-backup");
+ if (tmp && tmp->state == CAP_STATE_REGISTERED) {
+ cp = tmp;
+ break;
+ }
+ }
+ }
+ spin_unlock_irqrestore(&ds_lock, flags);
+
+ if (cp) {
+ union {
+ struct {
+ struct ds_data data;
+ struct ds_var_set_msg msg;
+ } header;
+ char all[512];
+ } pkt;
+ char *base, *p;
+ int msg_len, loops;
+
+ memset(&pkt, 0, sizeof(pkt));
+ pkt.header.data.tag.type = DS_DATA;
+ pkt.header.data.handle = cp->handle;
+ pkt.header.msg.hdr.type = DS_VAR_SET_REQ;
+ base = p = &pkt.header.msg.name_and_value[0];
+ strcpy(p, var);
+ p += strlen(var) + 1;
+ strcpy(p, value);
+ p += strlen(value) + 1;
+
+ msg_len = (sizeof(struct ds_data) +
+ sizeof(struct ds_var_set_msg) +
+ (p - base));
+ msg_len = (msg_len + 3) & ~3;
+ pkt.header.data.tag.len = msg_len - sizeof(struct ds_msg_tag);
+
+ mutex_lock(&ds_var_mutex);
+
+ spin_lock_irqsave(&ds_lock, flags);
+ ds_var_doorbell = 0;
+ ds_var_response = -1;
+
+ __ds_send(dp->lp, &pkt, msg_len);
+ spin_unlock_irqrestore(&ds_lock, flags);
+
+ loops = 1000;
+ while (ds_var_doorbell == 0) {
+ if (loops-- < 0)
+ break;
+ barrier();
+ udelay(100);
+ }
+
+ mutex_unlock(&ds_var_mutex);
+
+ if (ds_var_doorbell == 0 ||
+ ds_var_response != DS_VAR_SUCCESS)
+ printk(KERN_ERR "ds-%lu: var-config [%s:%s] "
+ "failed, response(%d).\n",
+ dp->id, var, value,
+ ds_var_response);
+ } else {
+ printk(KERN_ERR PFX "var-config not registered so "
+ "could not set (%s) variable to (%s).\n",
+ var, value);
+ }
+}
+
+void ldom_reboot(const char *boot_command)
+{
+ /* Don't bother with any of this if the boot_command
+ * is empty.
+ */
+ if (boot_command && strlen(boot_command)) {
+ char full_boot_str[256];
+
+ strcpy(full_boot_str, "boot ");
+ strcpy(full_boot_str + strlen("boot "), boot_command);
+
+ ldom_set_var("reboot-command", full_boot_str);
+ }
+ sun4v_mach_sir();
+}
+
+void ldom_power_off(void)
+{
+ sun4v_mach_exit(0);
+}
+
+static void ds_conn_reset(struct ds_info *dp)
+{
+ printk(KERN_ERR "ds-%lu: ds_conn_reset() from %p\n",
+ dp->id, __builtin_return_address(0));
+}
+
+static int register_services(struct ds_info *dp)
+{
+ struct ldc_channel *lp = dp->lp;
+ int i;
+
+ for (i = 0; i < dp->num_ds_states; i++) {
+ struct {
+ struct ds_reg_req req;
+ u8 id_buf[256];
+ } pbuf;
+ struct ds_cap_state *cp = &dp->ds_states[i];
+ int err, msg_len;
+ u64 new_count;
+
+ if (cp->state == CAP_STATE_REGISTERED)
+ continue;
+
+ new_count = sched_clock() & 0xffffffff;
+ cp->handle = ((u64) i << 32) | new_count;
+
+ msg_len = (sizeof(struct ds_reg_req) +
+ strlen(cp->service_id));
+
+ memset(&pbuf, 0, sizeof(pbuf));
+ pbuf.req.tag.type = DS_REG_REQ;
+ pbuf.req.tag.len = (msg_len - sizeof(struct ds_msg_tag));
+ pbuf.req.handle = cp->handle;
+ pbuf.req.major = 1;
+ pbuf.req.minor = 0;
+ strcpy(pbuf.req.svc_id, cp->service_id);
+
+ err = __ds_send(lp, &pbuf, msg_len);
+ if (err > 0)
+ cp->state = CAP_STATE_REG_SENT;
+ }
+ return 0;
+}
+
+static int ds_handshake(struct ds_info *dp, struct ds_msg_tag *pkt)
+{
+
+ if (dp->hs_state == DS_HS_START) {
+ if (pkt->type != DS_INIT_ACK)
+ goto conn_reset;
+
+ dp->hs_state = DS_HS_DONE;
+
+ return register_services(dp);
+ }
+
+ if (dp->hs_state != DS_HS_DONE)
+ goto conn_reset;
+
+ if (pkt->type == DS_REG_ACK) {
+ struct ds_reg_ack *ap = (struct ds_reg_ack *) pkt;
+ struct ds_cap_state *cp = find_cap(dp, ap->handle);
+
+ if (!cp) {
+ printk(KERN_ERR "ds-%lu: REG ACK for unknown "
+ "handle %lx\n", dp->id, ap->handle);
+ return 0;
+ }
+ printk(KERN_INFO "ds-%lu: Registered %s service.\n",
+ dp->id, cp->service_id);
+ cp->state = CAP_STATE_REGISTERED;
+ } else if (pkt->type == DS_REG_NACK) {
+ struct ds_reg_nack *np = (struct ds_reg_nack *) pkt;
+ struct ds_cap_state *cp = find_cap(dp, np->handle);
+
+ if (!cp) {
+ printk(KERN_ERR "ds-%lu: REG NACK for "
+ "unknown handle %lx\n",
+ dp->id, np->handle);
+ return 0;
+ }
+ cp->state = CAP_STATE_UNKNOWN;
+ }
+
+ return 0;
+
+conn_reset:
+ ds_conn_reset(dp);
+ return -ECONNRESET;
+}
+
+static void __send_ds_nack(struct ds_info *dp, u64 handle)
+{
+ struct ds_data_nack nack = {
+ .tag = {
+ .type = DS_NACK,
+ .len = (sizeof(struct ds_data_nack) -
+ sizeof(struct ds_msg_tag)),
+ },
+ .handle = handle,
+ .result = DS_INV_HDL,
+ };
+
+ __ds_send(dp->lp, &nack, sizeof(nack));
+}
+
+static LIST_HEAD(ds_work_list);
+static DECLARE_WAIT_QUEUE_HEAD(ds_wait);
+
+struct ds_queue_entry {
+ struct list_head list;
+ struct ds_info *dp;
+ int req_len;
+ int __pad;
+ u64 req[0];
+};
+
+static void process_ds_work(void)
+{
+ struct ds_queue_entry *qp, *tmp;
+ unsigned long flags;
+ LIST_HEAD(todo);
+
+ spin_lock_irqsave(&ds_lock, flags);
+ list_splice_init(&ds_work_list, &todo);
+ spin_unlock_irqrestore(&ds_lock, flags);
+
+ list_for_each_entry_safe(qp, tmp, &todo, list) {
+ struct ds_data *dpkt = (struct ds_data *) qp->req;
+ struct ds_info *dp = qp->dp;
+ struct ds_cap_state *cp = find_cap(dp, dpkt->handle);
+ int req_len = qp->req_len;
+
+ if (!cp) {
+ printk(KERN_ERR "ds-%lu: Data for unknown "
+ "handle %lu\n",
+ dp->id, dpkt->handle);
+
+ spin_lock_irqsave(&ds_lock, flags);
+ __send_ds_nack(dp, dpkt->handle);
+ spin_unlock_irqrestore(&ds_lock, flags);
+ } else {
+ cp->data(dp, cp, dpkt, req_len);
+ }
+
+ list_del(&qp->list);
+ kfree(qp);
+ }
+}
+
+static int ds_thread(void *__unused)
+{
+ DEFINE_WAIT(wait);
+
+ while (1) {
+ prepare_to_wait(&ds_wait, &wait, TASK_INTERRUPTIBLE);
+ if (list_empty(&ds_work_list))
+ schedule();
+ finish_wait(&ds_wait, &wait);
+
+ if (kthread_should_stop())
+ break;
+
+ process_ds_work();
+ }
+
+ return 0;
+}
+
+static int ds_data(struct ds_info *dp, struct ds_msg_tag *pkt, int len)
+{
+ struct ds_data *dpkt = (struct ds_data *) pkt;
+ struct ds_queue_entry *qp;
+
+ qp = kmalloc(sizeof(struct ds_queue_entry) + len, GFP_ATOMIC);
+ if (!qp) {
+ __send_ds_nack(dp, dpkt->handle);
+ } else {
+ qp->dp = dp;
+ memcpy(&qp->req, pkt, len);
+ list_add_tail(&qp->list, &ds_work_list);
+ wake_up(&ds_wait);
+ }
+ return 0;
+}
+
+static void ds_up(struct ds_info *dp)
+{
+ struct ldc_channel *lp = dp->lp;
+ struct ds_ver_req req;
+ int err;
+
+ req.tag.type = DS_INIT_REQ;
+ req.tag.len = sizeof(req) - sizeof(struct ds_msg_tag);
+ req.ver.major = 1;
+ req.ver.minor = 0;
+
+ err = __ds_send(lp, &req, sizeof(req));
+ if (err > 0)
+ dp->hs_state = DS_HS_START;
+}
+
+static void ds_reset(struct ds_info *dp)
+{
+ int i;
+
+ dp->hs_state = 0;
+
+ for (i = 0; i < dp->num_ds_states; i++) {
+ struct ds_cap_state *cp = &dp->ds_states[i];
+
+ cp->state = CAP_STATE_UNKNOWN;
+ }
+}
+
+static void ds_event(void *arg, int event)
+{
+ struct ds_info *dp = arg;
+ struct ldc_channel *lp = dp->lp;
+ unsigned long flags;
+ int err;
+
+ spin_lock_irqsave(&ds_lock, flags);
+
+ if (event == LDC_EVENT_UP) {
+ ds_up(dp);
+ spin_unlock_irqrestore(&ds_lock, flags);
+ return;
+ }
+
+ if (event == LDC_EVENT_RESET) {
+ ds_reset(dp);
+ spin_unlock_irqrestore(&ds_lock, flags);
+ return;
+ }
+
+ if (event != LDC_EVENT_DATA_READY) {
+ printk(KERN_WARNING "ds-%lu: Unexpected LDC event %d\n",
+ dp->id, event);
+ spin_unlock_irqrestore(&ds_lock, flags);
+ return;
+ }
+
+ err = 0;
+ while (1) {
+ struct ds_msg_tag *tag;
+
+ err = ldc_read(lp, dp->rcv_buf, sizeof(*tag));
+
+ if (unlikely(err < 0)) {
+ if (err == -ECONNRESET)
+ ds_conn_reset(dp);
+ break;
+ }
+ if (err == 0)
+ break;
+
+ tag = dp->rcv_buf;
+ err = ldc_read(lp, tag + 1, tag->len);
+
+ if (unlikely(err < 0)) {
+ if (err == -ECONNRESET)
+ ds_conn_reset(dp);
+ break;
+ }
+ if (err < tag->len)
+ break;
+
+ if (tag->type < DS_DATA)
+ err = ds_handshake(dp, dp->rcv_buf);
+ else
+ err = ds_data(dp, dp->rcv_buf,
+ sizeof(*tag) + err);
+ if (err == -ECONNRESET)
+ break;
+ }
+
+ spin_unlock_irqrestore(&ds_lock, flags);
+}
+
+static int __devinit ds_probe(struct vio_dev *vdev,
+ const struct vio_device_id *id)
+{
+ static int ds_version_printed;
+ struct ldc_channel_config ds_cfg = {
+ .event = ds_event,
+ .mtu = 4096,
+ .mode = LDC_MODE_STREAM,
+ };
+ struct mdesc_handle *hp;
+ struct ldc_channel *lp;
+ struct ds_info *dp;
+ const u64 *val;
+ int err, i;
+
+ if (ds_version_printed++ == 0)
+ printk(KERN_INFO "%s", version);
+
+ dp = kzalloc(sizeof(*dp), GFP_KERNEL);
+ err = -ENOMEM;
+ if (!dp)
+ goto out_err;
+
+ hp = mdesc_grab();
+ val = mdesc_get_property(hp, vdev->mp, "id", NULL);
+ if (val)
+ dp->id = *val;
+ mdesc_release(hp);
+
+ dp->rcv_buf = kzalloc(4096, GFP_KERNEL);
+ if (!dp->rcv_buf)
+ goto out_free_dp;
+
+ dp->rcv_buf_len = 4096;
+
+ dp->ds_states = kzalloc(sizeof(ds_states_template),
+ GFP_KERNEL);
+ if (!dp->ds_states)
+ goto out_free_rcv_buf;
+
+ memcpy(dp->ds_states, ds_states_template,
+ sizeof(ds_states_template));
+ dp->num_ds_states = ARRAY_SIZE(ds_states_template);
+
+ for (i = 0; i < dp->num_ds_states; i++)
+ dp->ds_states[i].handle = ((u64)i << 32);
+
+ ds_cfg.tx_irq = vdev->tx_irq;
+ ds_cfg.rx_irq = vdev->rx_irq;
+
+ lp = ldc_alloc(vdev->channel_id, &ds_cfg, dp);
+ if (IS_ERR(lp)) {
+ err = PTR_ERR(lp);
+ goto out_free_ds_states;
+ }
+ dp->lp = lp;
+
+ err = ldc_bind(lp, "DS");
+ if (err)
+ goto out_free_ldc;
+
+ spin_lock_irq(&ds_lock);
+ dp->next = ds_info_list;
+ ds_info_list = dp;
+ spin_unlock_irq(&ds_lock);
+
+ return err;
+
+out_free_ldc:
+ ldc_free(dp->lp);
+
+out_free_ds_states:
+ kfree(dp->ds_states);
+
+out_free_rcv_buf:
+ kfree(dp->rcv_buf);
+
+out_free_dp:
+ kfree(dp);
+
+out_err:
+ return err;
+}
+
+static int ds_remove(struct vio_dev *vdev)
+{
+ return 0;
+}
+
+static struct vio_device_id __initdata ds_match[] = {
+ {
+ .type = "domain-services-port",
+ },
+ {},
+};
+
+static struct vio_driver ds_driver = {
+ .id_table = ds_match,
+ .probe = ds_probe,
+ .remove = ds_remove,
+ .driver = {
+ .name = "ds",
+ .owner = THIS_MODULE,
+ }
+};
+
+static int __init ds_init(void)
+{
+ kthread_run(ds_thread, NULL, "kldomd");
+
+ return vio_register_driver(&ds_driver);
+}
+
+subsys_initcall(ds_init);
diff --git a/arch/sparc/kernel/dtlb_miss.S b/arch/sparc/kernel/dtlb_miss.S
new file mode 100644
index 000000000000..09a6a15a7105
--- /dev/null
+++ b/arch/sparc/kernel/dtlb_miss.S
@@ -0,0 +1,39 @@
+/* DTLB ** ICACHE line 1: Context 0 check and TSB load */
+ ldxa [%g0] ASI_DMMU_TSB_8KB_PTR, %g1 ! Get TSB 8K pointer
+ ldxa [%g0] ASI_DMMU, %g6 ! Get TAG TARGET
+ srlx %g6, 48, %g5 ! Get context
+ sllx %g6, 22, %g6 ! Zero out context
+ brz,pn %g5, kvmap_dtlb ! Context 0 processing
+ srlx %g6, 22, %g6 ! Delay slot
+ TSB_LOAD_QUAD(%g1, %g4) ! Load TSB entry
+ cmp %g4, %g6 ! Compare TAG
+
+/* DTLB ** ICACHE line 2: TSB compare and TLB load */
+ bne,pn %xcc, tsb_miss_dtlb ! Miss
+ mov FAULT_CODE_DTLB, %g3
+ stxa %g5, [%g0] ASI_DTLB_DATA_IN ! Load TLB
+ retry ! Trap done
+ nop
+ nop
+ nop
+ nop
+
+/* DTLB ** ICACHE line 3: */
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+
+/* DTLB ** ICACHE line 4: */
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
diff --git a/arch/sparc/kernel/dtlb_prot.S b/arch/sparc/kernel/dtlb_prot.S
new file mode 100644
index 000000000000..b2c2c5be281c
--- /dev/null
+++ b/arch/sparc/kernel/dtlb_prot.S
@@ -0,0 +1,54 @@
+/*
+ * dtlb_prot.S: DTLB protection trap strategy.
+ * This is included directly into the trap table.
+ *
+ * Copyright (C) 1996,1998 David S. Miller (davem@redhat.com)
+ * Copyright (C) 1997,1998 Jakub Jelinek (jj@ultra.linux.cz)
+ */
+
+/* Ways we can get here:
+ *
+ * [TL == 0] 1) User stores to readonly pages.
+ * [TL == 0] 2) Nucleus stores to user readonly pages.
+ * [TL > 0] 3) Nucleus stores to user readonly stack frame.
+ */
+
+/* PROT ** ICACHE line 1: User DTLB protection trap */
+ mov TLB_SFSR, %g1
+ stxa %g0, [%g1] ASI_DMMU ! Clear FaultValid bit
+ membar #Sync ! Synchronize stores
+ rdpr %pstate, %g5 ! Move into alt-globals
+ wrpr %g5, PSTATE_AG|PSTATE_MG, %pstate
+ rdpr %tl, %g1 ! Need a winfixup?
+ cmp %g1, 1 ! Trap level >1?
+ mov TLB_TAG_ACCESS, %g4 ! For reload of vaddr
+
+/* PROT ** ICACHE line 2: More real fault processing */
+ bgu,pn %xcc, winfix_trampoline ! Yes, perform winfixup
+ ldxa [%g4] ASI_DMMU, %g5 ! Put tagaccess in %g5
+ ba,pt %xcc, sparc64_realfault_common ! Nope, normal fault
+ mov FAULT_CODE_DTLB | FAULT_CODE_WRITE, %g4
+ nop
+ nop
+ nop
+ nop
+
+/* PROT ** ICACHE line 3: Unused... */
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+
+/* PROT ** ICACHE line 4: Unused... */
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
diff --git a/arch/sparc/kernel/ebus.c b/arch/sparc/kernel/ebus.c
new file mode 100644
index 000000000000..77dbf6d45faf
--- /dev/null
+++ b/arch/sparc/kernel/ebus.c
@@ -0,0 +1,257 @@
+/* ebus.c: EBUS DMA library code.
+ *
+ * Copyright (C) 1997 Eddie C. Dost (ecd@skynet.be)
+ * Copyright (C) 1999 David S. Miller (davem@redhat.com)
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+
+#include <asm/ebus_dma.h>
+#include <asm/io.h>
+
+#define EBDMA_CSR 0x00UL /* Control/Status */
+#define EBDMA_ADDR 0x04UL /* DMA Address */
+#define EBDMA_COUNT 0x08UL /* DMA Count */
+
+#define EBDMA_CSR_INT_PEND 0x00000001
+#define EBDMA_CSR_ERR_PEND 0x00000002
+#define EBDMA_CSR_DRAIN 0x00000004
+#define EBDMA_CSR_INT_EN 0x00000010
+#define EBDMA_CSR_RESET 0x00000080
+#define EBDMA_CSR_WRITE 0x00000100
+#define EBDMA_CSR_EN_DMA 0x00000200
+#define EBDMA_CSR_CYC_PEND 0x00000400
+#define EBDMA_CSR_DIAG_RD_DONE 0x00000800
+#define EBDMA_CSR_DIAG_WR_DONE 0x00001000
+#define EBDMA_CSR_EN_CNT 0x00002000
+#define EBDMA_CSR_TC 0x00004000
+#define EBDMA_CSR_DIS_CSR_DRN 0x00010000
+#define EBDMA_CSR_BURST_SZ_MASK 0x000c0000
+#define EBDMA_CSR_BURST_SZ_1 0x00080000
+#define EBDMA_CSR_BURST_SZ_4 0x00000000
+#define EBDMA_CSR_BURST_SZ_8 0x00040000
+#define EBDMA_CSR_BURST_SZ_16 0x000c0000
+#define EBDMA_CSR_DIAG_EN 0x00100000
+#define EBDMA_CSR_DIS_ERR_PEND 0x00400000
+#define EBDMA_CSR_TCI_DIS 0x00800000
+#define EBDMA_CSR_EN_NEXT 0x01000000
+#define EBDMA_CSR_DMA_ON 0x02000000
+#define EBDMA_CSR_A_LOADED 0x04000000
+#define EBDMA_CSR_NA_LOADED 0x08000000
+#define EBDMA_CSR_DEV_ID_MASK 0xf0000000
+
+#define EBUS_DMA_RESET_TIMEOUT 10000
+
+static void __ebus_dma_reset(struct ebus_dma_info *p, int no_drain)
+{
+ int i;
+ u32 val = 0;
+
+ writel(EBDMA_CSR_RESET, p->regs + EBDMA_CSR);
+ udelay(1);
+
+ if (no_drain)
+ return;
+
+ for (i = EBUS_DMA_RESET_TIMEOUT; i > 0; i--) {
+ val = readl(p->regs + EBDMA_CSR);
+
+ if (!(val & (EBDMA_CSR_DRAIN | EBDMA_CSR_CYC_PEND)))
+ break;
+ udelay(10);
+ }
+}
+
+static irqreturn_t ebus_dma_irq(int irq, void *dev_id)
+{
+ struct ebus_dma_info *p = dev_id;
+ unsigned long flags;
+ u32 csr = 0;
+
+ spin_lock_irqsave(&p->lock, flags);
+ csr = readl(p->regs + EBDMA_CSR);
+ writel(csr, p->regs + EBDMA_CSR);
+ spin_unlock_irqrestore(&p->lock, flags);
+
+ if (csr & EBDMA_CSR_ERR_PEND) {
+ printk(KERN_CRIT "ebus_dma(%s): DMA error!\n", p->name);
+ p->callback(p, EBUS_DMA_EVENT_ERROR, p->client_cookie);
+ return IRQ_HANDLED;
+ } else if (csr & EBDMA_CSR_INT_PEND) {
+ p->callback(p,
+ (csr & EBDMA_CSR_TC) ?
+ EBUS_DMA_EVENT_DMA : EBUS_DMA_EVENT_DEVICE,
+ p->client_cookie);
+ return IRQ_HANDLED;
+ }
+
+ return IRQ_NONE;
+
+}
+
+int ebus_dma_register(struct ebus_dma_info *p)
+{
+ u32 csr;
+
+ if (!p->regs)
+ return -EINVAL;
+ if (p->flags & ~(EBUS_DMA_FLAG_USE_EBDMA_HANDLER |
+ EBUS_DMA_FLAG_TCI_DISABLE))
+ return -EINVAL;
+ if ((p->flags & EBUS_DMA_FLAG_USE_EBDMA_HANDLER) && !p->callback)
+ return -EINVAL;
+ if (!strlen(p->name))
+ return -EINVAL;
+
+ __ebus_dma_reset(p, 1);
+
+ csr = EBDMA_CSR_BURST_SZ_16 | EBDMA_CSR_EN_CNT;
+
+ if (p->flags & EBUS_DMA_FLAG_TCI_DISABLE)
+ csr |= EBDMA_CSR_TCI_DIS;
+
+ writel(csr, p->regs + EBDMA_CSR);
+
+ return 0;
+}
+EXPORT_SYMBOL(ebus_dma_register);
+
+int ebus_dma_irq_enable(struct ebus_dma_info *p, int on)
+{
+ unsigned long flags;
+ u32 csr;
+
+ if (on) {
+ if (p->flags & EBUS_DMA_FLAG_USE_EBDMA_HANDLER) {
+ if (request_irq(p->irq, ebus_dma_irq, IRQF_SHARED, p->name, p))
+ return -EBUSY;
+ }
+
+ spin_lock_irqsave(&p->lock, flags);
+ csr = readl(p->regs + EBDMA_CSR);
+ csr |= EBDMA_CSR_INT_EN;
+ writel(csr, p->regs + EBDMA_CSR);
+ spin_unlock_irqrestore(&p->lock, flags);
+ } else {
+ spin_lock_irqsave(&p->lock, flags);
+ csr = readl(p->regs + EBDMA_CSR);
+ csr &= ~EBDMA_CSR_INT_EN;
+ writel(csr, p->regs + EBDMA_CSR);
+ spin_unlock_irqrestore(&p->lock, flags);
+
+ if (p->flags & EBUS_DMA_FLAG_USE_EBDMA_HANDLER) {
+ free_irq(p->irq, p);
+ }
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(ebus_dma_irq_enable);
+
+void ebus_dma_unregister(struct ebus_dma_info *p)
+{
+ unsigned long flags;
+ u32 csr;
+ int irq_on = 0;
+
+ spin_lock_irqsave(&p->lock, flags);
+ csr = readl(p->regs + EBDMA_CSR);
+ if (csr & EBDMA_CSR_INT_EN) {
+ csr &= ~EBDMA_CSR_INT_EN;
+ writel(csr, p->regs + EBDMA_CSR);
+ irq_on = 1;
+ }
+ spin_unlock_irqrestore(&p->lock, flags);
+
+ if (irq_on)
+ free_irq(p->irq, p);
+}
+EXPORT_SYMBOL(ebus_dma_unregister);
+
+int ebus_dma_request(struct ebus_dma_info *p, dma_addr_t bus_addr, size_t len)
+{
+ unsigned long flags;
+ u32 csr;
+ int err;
+
+ if (len >= (1 << 24))
+ return -EINVAL;
+
+ spin_lock_irqsave(&p->lock, flags);
+ csr = readl(p->regs + EBDMA_CSR);
+ err = -EINVAL;
+ if (!(csr & EBDMA_CSR_EN_DMA))
+ goto out;
+ err = -EBUSY;
+ if (csr & EBDMA_CSR_NA_LOADED)
+ goto out;
+
+ writel(len, p->regs + EBDMA_COUNT);
+ writel(bus_addr, p->regs + EBDMA_ADDR);
+ err = 0;
+
+out:
+ spin_unlock_irqrestore(&p->lock, flags);
+
+ return err;
+}
+EXPORT_SYMBOL(ebus_dma_request);
+
+void ebus_dma_prepare(struct ebus_dma_info *p, int write)
+{
+ unsigned long flags;
+ u32 csr;
+
+ spin_lock_irqsave(&p->lock, flags);
+ __ebus_dma_reset(p, 0);
+
+ csr = (EBDMA_CSR_INT_EN |
+ EBDMA_CSR_EN_CNT |
+ EBDMA_CSR_BURST_SZ_16 |
+ EBDMA_CSR_EN_NEXT);
+
+ if (write)
+ csr |= EBDMA_CSR_WRITE;
+ if (p->flags & EBUS_DMA_FLAG_TCI_DISABLE)
+ csr |= EBDMA_CSR_TCI_DIS;
+
+ writel(csr, p->regs + EBDMA_CSR);
+
+ spin_unlock_irqrestore(&p->lock, flags);
+}
+EXPORT_SYMBOL(ebus_dma_prepare);
+
+unsigned int ebus_dma_residue(struct ebus_dma_info *p)
+{
+ return readl(p->regs + EBDMA_COUNT);
+}
+EXPORT_SYMBOL(ebus_dma_residue);
+
+unsigned int ebus_dma_addr(struct ebus_dma_info *p)
+{
+ return readl(p->regs + EBDMA_ADDR);
+}
+EXPORT_SYMBOL(ebus_dma_addr);
+
+void ebus_dma_enable(struct ebus_dma_info *p, int on)
+{
+ unsigned long flags;
+ u32 orig_csr, csr;
+
+ spin_lock_irqsave(&p->lock, flags);
+ orig_csr = csr = readl(p->regs + EBDMA_CSR);
+ if (on)
+ csr |= EBDMA_CSR_EN_DMA;
+ else
+ csr &= ~EBDMA_CSR_EN_DMA;
+ if ((orig_csr & EBDMA_CSR_EN_DMA) !=
+ (csr & EBDMA_CSR_EN_DMA))
+ writel(csr, p->regs + EBDMA_CSR);
+ spin_unlock_irqrestore(&p->lock, flags);
+}
+EXPORT_SYMBOL(ebus_dma_enable);
diff --git a/arch/sparc/kernel/entry.h b/arch/sparc/kernel/entry.h
new file mode 100644
index 000000000000..34d7ab5e10d2
--- /dev/null
+++ b/arch/sparc/kernel/entry.h
@@ -0,0 +1,195 @@
+#ifndef _ENTRY_H
+#define _ENTRY_H
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
+
+extern const char *sparc_cpu_type;
+extern const char *sparc_fpu_type;
+
+extern void __init per_cpu_patch(void);
+extern void __init sun4v_patch(void);
+extern void __init boot_cpu_id_too_large(int cpu);
+extern unsigned int dcache_parity_tl1_occurred;
+extern unsigned int icache_parity_tl1_occurred;
+
+extern asmlinkage void update_perfctrs(void);
+extern asmlinkage void sparc_breakpoint(struct pt_regs *regs);
+extern void timer_interrupt(int irq, struct pt_regs *regs);
+
+extern void do_notify_resume(struct pt_regs *regs,
+ unsigned long orig_i0,
+ unsigned long thread_info_flags);
+
+extern asmlinkage int syscall_trace_enter(struct pt_regs *regs);
+extern asmlinkage void syscall_trace_leave(struct pt_regs *regs);
+
+extern void bad_trap_tl1(struct pt_regs *regs, long lvl);
+
+extern void do_fpe_common(struct pt_regs *regs);
+extern void do_fpieee(struct pt_regs *regs);
+extern void do_fpother(struct pt_regs *regs);
+extern void do_tof(struct pt_regs *regs);
+extern void do_div0(struct pt_regs *regs);
+extern void do_illegal_instruction(struct pt_regs *regs);
+extern void mem_address_unaligned(struct pt_regs *regs,
+ unsigned long sfar,
+ unsigned long sfsr);
+extern void sun4v_do_mna(struct pt_regs *regs,
+ unsigned long addr,
+ unsigned long type_ctx);
+extern void do_privop(struct pt_regs *regs);
+extern void do_privact(struct pt_regs *regs);
+extern void do_cee(struct pt_regs *regs);
+extern void do_cee_tl1(struct pt_regs *regs);
+extern void do_dae_tl1(struct pt_regs *regs);
+extern void do_iae_tl1(struct pt_regs *regs);
+extern void do_div0_tl1(struct pt_regs *regs);
+extern void do_fpdis_tl1(struct pt_regs *regs);
+extern void do_fpieee_tl1(struct pt_regs *regs);
+extern void do_fpother_tl1(struct pt_regs *regs);
+extern void do_ill_tl1(struct pt_regs *regs);
+extern void do_irq_tl1(struct pt_regs *regs);
+extern void do_lddfmna_tl1(struct pt_regs *regs);
+extern void do_stdfmna_tl1(struct pt_regs *regs);
+extern void do_paw(struct pt_regs *regs);
+extern void do_paw_tl1(struct pt_regs *regs);
+extern void do_vaw(struct pt_regs *regs);
+extern void do_vaw_tl1(struct pt_regs *regs);
+extern void do_tof_tl1(struct pt_regs *regs);
+extern void do_getpsr(struct pt_regs *regs);
+
+extern void spitfire_insn_access_exception(struct pt_regs *regs,
+ unsigned long sfsr,
+ unsigned long sfar);
+extern void spitfire_insn_access_exception_tl1(struct pt_regs *regs,
+ unsigned long sfsr,
+ unsigned long sfar);
+extern void spitfire_data_access_exception(struct pt_regs *regs,
+ unsigned long sfsr,
+ unsigned long sfar);
+extern void spitfire_data_access_exception_tl1(struct pt_regs *regs,
+ unsigned long sfsr,
+ unsigned long sfar);
+extern void spitfire_access_error(struct pt_regs *regs,
+ unsigned long status_encoded,
+ unsigned long afar);
+
+extern void cheetah_fecc_handler(struct pt_regs *regs,
+ unsigned long afsr,
+ unsigned long afar);
+extern void cheetah_cee_handler(struct pt_regs *regs,
+ unsigned long afsr,
+ unsigned long afar);
+extern void cheetah_deferred_handler(struct pt_regs *regs,
+ unsigned long afsr,
+ unsigned long afar);
+extern void cheetah_plus_parity_error(int type, struct pt_regs *regs);
+
+extern void sun4v_insn_access_exception(struct pt_regs *regs,
+ unsigned long addr,
+ unsigned long type_ctx);
+extern void sun4v_insn_access_exception_tl1(struct pt_regs *regs,
+ unsigned long addr,
+ unsigned long type_ctx);
+extern void sun4v_data_access_exception(struct pt_regs *regs,
+ unsigned long addr,
+ unsigned long type_ctx);
+extern void sun4v_data_access_exception_tl1(struct pt_regs *regs,
+ unsigned long addr,
+ unsigned long type_ctx);
+extern void sun4v_resum_error(struct pt_regs *regs,
+ unsigned long offset);
+extern void sun4v_resum_overflow(struct pt_regs *regs);
+extern void sun4v_nonresum_error(struct pt_regs *regs,
+ unsigned long offset);
+extern void sun4v_nonresum_overflow(struct pt_regs *regs);
+
+extern unsigned long sun4v_err_itlb_vaddr;
+extern unsigned long sun4v_err_itlb_ctx;
+extern unsigned long sun4v_err_itlb_pte;
+extern unsigned long sun4v_err_itlb_error;
+
+extern void sun4v_itlb_error_report(struct pt_regs *regs, int tl);
+
+extern unsigned long sun4v_err_dtlb_vaddr;
+extern unsigned long sun4v_err_dtlb_ctx;
+extern unsigned long sun4v_err_dtlb_pte;
+extern unsigned long sun4v_err_dtlb_error;
+
+extern void sun4v_dtlb_error_report(struct pt_regs *regs, int tl);
+extern void hypervisor_tlbop_error(unsigned long err,
+ unsigned long op);
+extern void hypervisor_tlbop_error_xcall(unsigned long err,
+ unsigned long op);
+
+/* WARNING: The error trap handlers in assembly know the precise
+ * layout of the following structure.
+ *
+ * C-level handlers in traps.c use this information to log the
+ * error and then determine how to recover (if possible).
+ */
+struct cheetah_err_info {
+/*0x00*/u64 afsr;
+/*0x08*/u64 afar;
+
+ /* D-cache state */
+/*0x10*/u64 dcache_data[4]; /* The actual data */
+/*0x30*/u64 dcache_index; /* D-cache index */
+/*0x38*/u64 dcache_tag; /* D-cache tag/valid */
+/*0x40*/u64 dcache_utag; /* D-cache microtag */
+/*0x48*/u64 dcache_stag; /* D-cache snooptag */
+
+ /* I-cache state */
+/*0x50*/u64 icache_data[8]; /* The actual insns + predecode */
+/*0x90*/u64 icache_index; /* I-cache index */
+/*0x98*/u64 icache_tag; /* I-cache phys tag */
+/*0xa0*/u64 icache_utag; /* I-cache microtag */
+/*0xa8*/u64 icache_stag; /* I-cache snooptag */
+/*0xb0*/u64 icache_upper; /* I-cache upper-tag */
+/*0xb8*/u64 icache_lower; /* I-cache lower-tag */
+
+ /* E-cache state */
+/*0xc0*/u64 ecache_data[4]; /* 32 bytes from staging registers */
+/*0xe0*/u64 ecache_index; /* E-cache index */
+/*0xe8*/u64 ecache_tag; /* E-cache tag/state */
+
+/*0xf0*/u64 __pad[32 - 30];
+};
+#define CHAFSR_INVALID ((u64)-1L)
+
+/* This is allocated at boot time based upon the largest hardware
+ * cpu ID in the system. We allocate two entries per cpu, one for
+ * TL==0 logging and one for TL >= 1 logging.
+ */
+extern struct cheetah_err_info *cheetah_error_log;
+
+/* UPA nodes send interrupt packet to UltraSparc with first data reg
+ * value low 5 (7 on Starfire) bits holding the IRQ identifier being
+ * delivered. We must translate this into a non-vector IRQ so we can
+ * set the softint on this cpu.
+ *
+ * To make processing these packets efficient and race free we use
+ * an array of irq buckets below. The interrupt vector handler in
+ * entry.S feeds incoming packets into per-cpu pil-indexed lists.
+ *
+ * If you make changes to ino_bucket, please update hand coded assembler
+ * of the vectored interrupt trap handler(s) in entry.S and sun4v_ivec.S
+ */
+struct ino_bucket {
+/*0x00*/unsigned long __irq_chain_pa;
+
+ /* Virtual interrupt number assigned to this INO. */
+/*0x08*/unsigned int __virt_irq;
+/*0x0c*/unsigned int __pad;
+};
+
+extern struct ino_bucket *ivector_table;
+extern unsigned long ivector_table_pa;
+
+extern void handler_irq(int irq, struct pt_regs *regs);
+extern void init_irqwork_curcpu(void);
+extern void __cpuinit sun4v_register_mondo_queues(int this_cpu);
+
+#endif /* _ENTRY_H */
diff --git a/arch/sparc/kernel/etrap_64.S b/arch/sparc/kernel/etrap_64.S
new file mode 100644
index 000000000000..786b185e6e3f
--- /dev/null
+++ b/arch/sparc/kernel/etrap_64.S
@@ -0,0 +1,236 @@
+/*
+ * etrap.S: Preparing for entry into the kernel on Sparc V9.
+ *
+ * Copyright (C) 1996, 1997 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1997, 1998, 1999 Jakub Jelinek (jj@ultra.linux.cz)
+ */
+
+
+#include <asm/asi.h>
+#include <asm/pstate.h>
+#include <asm/ptrace.h>
+#include <asm/page.h>
+#include <asm/spitfire.h>
+#include <asm/head.h>
+#include <asm/processor.h>
+#include <asm/mmu.h>
+
+#define TASK_REGOFF (THREAD_SIZE-TRACEREG_SZ-STACKFRAME_SZ)
+#define ETRAP_PSTATE1 (PSTATE_TSO | PSTATE_PRIV)
+#define ETRAP_PSTATE2 \
+ (PSTATE_TSO | PSTATE_PEF | PSTATE_PRIV | PSTATE_IE)
+
+/*
+ * On entry, %g7 is return address - 0x4.
+ * %g4 and %g5 will be preserved %l4 and %l5 respectively.
+ */
+
+ .text
+ .align 64
+ .globl etrap_syscall, etrap, etrap_irq, etraptl1
+etrap: rdpr %pil, %g2
+etrap_irq: clr %g3
+etrap_syscall: TRAP_LOAD_THREAD_REG(%g6, %g1)
+ rdpr %tstate, %g1
+ or %g1, %g3, %g1
+ sllx %g2, 20, %g3
+ andcc %g1, TSTATE_PRIV, %g0
+ or %g1, %g3, %g1
+ bne,pn %xcc, 1f
+ sub %sp, STACKFRAME_SZ+TRACEREG_SZ-STACK_BIAS, %g2
+ wrpr %g0, 7, %cleanwin
+
+ sethi %hi(TASK_REGOFF), %g2
+ sethi %hi(TSTATE_PEF), %g3
+ or %g2, %lo(TASK_REGOFF), %g2
+ and %g1, %g3, %g3
+ brnz,pn %g3, 1f
+ add %g6, %g2, %g2
+ wr %g0, 0, %fprs
+1: rdpr %tpc, %g3
+
+ stx %g1, [%g2 + STACKFRAME_SZ + PT_V9_TSTATE]
+ rdpr %tnpc, %g1
+ stx %g3, [%g2 + STACKFRAME_SZ + PT_V9_TPC]
+ rd %y, %g3
+ stx %g1, [%g2 + STACKFRAME_SZ + PT_V9_TNPC]
+ rdpr %tt, %g1
+ st %g3, [%g2 + STACKFRAME_SZ + PT_V9_Y]
+ sethi %hi(PT_REGS_MAGIC), %g3
+ or %g3, %g1, %g1
+ st %g1, [%g2 + STACKFRAME_SZ + PT_V9_MAGIC]
+
+ rdpr %cansave, %g1
+ brnz,pt %g1, etrap_save
+ nop
+
+ rdpr %cwp, %g1
+ add %g1, 2, %g1
+ wrpr %g1, %cwp
+ be,pt %xcc, etrap_user_spill
+ mov ASI_AIUP, %g3
+
+ rdpr %otherwin, %g3
+ brz %g3, etrap_kernel_spill
+ mov ASI_AIUS, %g3
+
+etrap_user_spill:
+
+ wr %g3, 0x0, %asi
+ ldx [%g6 + TI_FLAGS], %g3
+ and %g3, _TIF_32BIT, %g3
+ brnz,pt %g3, etrap_user_spill_32bit
+ nop
+ ba,a,pt %xcc, etrap_user_spill_64bit
+
+etrap_save: save %g2, -STACK_BIAS, %sp
+ mov %g6, %l6
+
+ bne,pn %xcc, 3f
+ mov PRIMARY_CONTEXT, %l4
+ rdpr %canrestore, %g3
+ rdpr %wstate, %g2
+ wrpr %g0, 0, %canrestore
+ sll %g2, 3, %g2
+ mov 1, %l5
+ stb %l5, [%l6 + TI_FPDEPTH]
+
+ wrpr %g3, 0, %otherwin
+ wrpr %g2, 0, %wstate
+ sethi %hi(sparc64_kern_pri_context), %g2
+ ldx [%g2 + %lo(sparc64_kern_pri_context)], %g3
+
+661: stxa %g3, [%l4] ASI_DMMU
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ stxa %g3, [%l4] ASI_MMU
+ .previous
+
+ sethi %hi(KERNBASE), %l4
+ flush %l4
+ mov ASI_AIUS, %l7
+2: mov %g4, %l4
+ mov %g5, %l5
+ add %g7, 4, %l2
+
+ /* Go to trap time globals so we can save them. */
+661: wrpr %g0, ETRAP_PSTATE1, %pstate
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ SET_GL(0)
+ .previous
+
+ stx %g1, [%sp + PTREGS_OFF + PT_V9_G1]
+ stx %g2, [%sp + PTREGS_OFF + PT_V9_G2]
+ sllx %l7, 24, %l7
+ stx %g3, [%sp + PTREGS_OFF + PT_V9_G3]
+ rdpr %cwp, %l0
+ stx %g4, [%sp + PTREGS_OFF + PT_V9_G4]
+ stx %g5, [%sp + PTREGS_OFF + PT_V9_G5]
+ stx %g6, [%sp + PTREGS_OFF + PT_V9_G6]
+ stx %g7, [%sp + PTREGS_OFF + PT_V9_G7]
+ or %l7, %l0, %l7
+ sethi %hi(TSTATE_TSO | TSTATE_PEF), %l0
+ or %l7, %l0, %l7
+ wrpr %l2, %tnpc
+ wrpr %l7, (TSTATE_PRIV | TSTATE_IE), %tstate
+ stx %i0, [%sp + PTREGS_OFF + PT_V9_I0]
+ stx %i1, [%sp + PTREGS_OFF + PT_V9_I1]
+ stx %i2, [%sp + PTREGS_OFF + PT_V9_I2]
+ stx %i3, [%sp + PTREGS_OFF + PT_V9_I3]
+ stx %i4, [%sp + PTREGS_OFF + PT_V9_I4]
+ stx %i5, [%sp + PTREGS_OFF + PT_V9_I5]
+ stx %i6, [%sp + PTREGS_OFF + PT_V9_I6]
+ mov %l6, %g6
+ stx %i7, [%sp + PTREGS_OFF + PT_V9_I7]
+ LOAD_PER_CPU_BASE(%g5, %g6, %g4, %g3, %l1)
+ ldx [%g6 + TI_TASK], %g4
+ done
+
+3: mov ASI_P, %l7
+ ldub [%l6 + TI_FPDEPTH], %l5
+ add %l6, TI_FPSAVED + 1, %l4
+ srl %l5, 1, %l3
+ add %l5, 2, %l5
+ stb %l5, [%l6 + TI_FPDEPTH]
+ ba,pt %xcc, 2b
+ stb %g0, [%l4 + %l3]
+ nop
+
+etraptl1: /* Save tstate/tpc/tnpc of TL 1-->4 and the tl register itself.
+ * We place this right after pt_regs on the trap stack.
+ * The layout is:
+ * 0x00 TL1's TSTATE
+ * 0x08 TL1's TPC
+ * 0x10 TL1's TNPC
+ * 0x18 TL1's TT
+ * ...
+ * 0x58 TL4's TT
+ * 0x60 TL
+ */
+ TRAP_LOAD_THREAD_REG(%g6, %g1)
+ sub %sp, ((4 * 8) * 4) + 8, %g2
+ rdpr %tl, %g1
+
+ wrpr %g0, 1, %tl
+ rdpr %tstate, %g3
+ stx %g3, [%g2 + STACK_BIAS + 0x00]
+ rdpr %tpc, %g3
+ stx %g3, [%g2 + STACK_BIAS + 0x08]
+ rdpr %tnpc, %g3
+ stx %g3, [%g2 + STACK_BIAS + 0x10]
+ rdpr %tt, %g3
+ stx %g3, [%g2 + STACK_BIAS + 0x18]
+
+ wrpr %g0, 2, %tl
+ rdpr %tstate, %g3
+ stx %g3, [%g2 + STACK_BIAS + 0x20]
+ rdpr %tpc, %g3
+ stx %g3, [%g2 + STACK_BIAS + 0x28]
+ rdpr %tnpc, %g3
+ stx %g3, [%g2 + STACK_BIAS + 0x30]
+ rdpr %tt, %g3
+ stx %g3, [%g2 + STACK_BIAS + 0x38]
+
+ sethi %hi(is_sun4v), %g3
+ lduw [%g3 + %lo(is_sun4v)], %g3
+ brnz,pn %g3, finish_tl1_capture
+ nop
+
+ wrpr %g0, 3, %tl
+ rdpr %tstate, %g3
+ stx %g3, [%g2 + STACK_BIAS + 0x40]
+ rdpr %tpc, %g3
+ stx %g3, [%g2 + STACK_BIAS + 0x48]
+ rdpr %tnpc, %g3
+ stx %g3, [%g2 + STACK_BIAS + 0x50]
+ rdpr %tt, %g3
+ stx %g3, [%g2 + STACK_BIAS + 0x58]
+
+ wrpr %g0, 4, %tl
+ rdpr %tstate, %g3
+ stx %g3, [%g2 + STACK_BIAS + 0x60]
+ rdpr %tpc, %g3
+ stx %g3, [%g2 + STACK_BIAS + 0x68]
+ rdpr %tnpc, %g3
+ stx %g3, [%g2 + STACK_BIAS + 0x70]
+ rdpr %tt, %g3
+ stx %g3, [%g2 + STACK_BIAS + 0x78]
+
+ stx %g1, [%g2 + STACK_BIAS + 0x80]
+
+finish_tl1_capture:
+ wrpr %g0, 1, %tl
+661: nop
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ SET_GL(1)
+ .previous
+
+ rdpr %tstate, %g1
+ sub %g2, STACKFRAME_SZ + TRACEREG_SZ - STACK_BIAS, %g2
+ ba,pt %xcc, 1b
+ andcc %g1, TSTATE_PRIV, %g0
+
+#undef TASK_REGOFF
+#undef ETRAP_PSTATE1
diff --git a/arch/sparc/kernel/fpu_traps.S b/arch/sparc/kernel/fpu_traps.S
new file mode 100644
index 000000000000..a6864826a4bd
--- /dev/null
+++ b/arch/sparc/kernel/fpu_traps.S
@@ -0,0 +1,384 @@
+ /* This is trivial with the new code... */
+ .globl do_fpdis
+ .type do_fpdis,#function
+do_fpdis:
+ sethi %hi(TSTATE_PEF), %g4
+ rdpr %tstate, %g5
+ andcc %g5, %g4, %g0
+ be,pt %xcc, 1f
+ nop
+ rd %fprs, %g5
+ andcc %g5, FPRS_FEF, %g0
+ be,pt %xcc, 1f
+ nop
+
+ /* Legal state when DCR_IFPOE is set in Cheetah %dcr. */
+ sethi %hi(109f), %g7
+ ba,pt %xcc, etrap
+109: or %g7, %lo(109b), %g7
+ add %g0, %g0, %g0
+ ba,a,pt %xcc, rtrap
+
+1: TRAP_LOAD_THREAD_REG(%g6, %g1)
+ ldub [%g6 + TI_FPSAVED], %g5
+ wr %g0, FPRS_FEF, %fprs
+ andcc %g5, FPRS_FEF, %g0
+ be,a,pt %icc, 1f
+ clr %g7
+ ldx [%g6 + TI_GSR], %g7
+1: andcc %g5, FPRS_DL, %g0
+ bne,pn %icc, 2f
+ fzero %f0
+ andcc %g5, FPRS_DU, %g0
+ bne,pn %icc, 1f
+ fzero %f2
+ faddd %f0, %f2, %f4
+ fmuld %f0, %f2, %f6
+ faddd %f0, %f2, %f8
+ fmuld %f0, %f2, %f10
+ faddd %f0, %f2, %f12
+ fmuld %f0, %f2, %f14
+ faddd %f0, %f2, %f16
+ fmuld %f0, %f2, %f18
+ faddd %f0, %f2, %f20
+ fmuld %f0, %f2, %f22
+ faddd %f0, %f2, %f24
+ fmuld %f0, %f2, %f26
+ faddd %f0, %f2, %f28
+ fmuld %f0, %f2, %f30
+ faddd %f0, %f2, %f32
+ fmuld %f0, %f2, %f34
+ faddd %f0, %f2, %f36
+ fmuld %f0, %f2, %f38
+ faddd %f0, %f2, %f40
+ fmuld %f0, %f2, %f42
+ faddd %f0, %f2, %f44
+ fmuld %f0, %f2, %f46
+ faddd %f0, %f2, %f48
+ fmuld %f0, %f2, %f50
+ faddd %f0, %f2, %f52
+ fmuld %f0, %f2, %f54
+ faddd %f0, %f2, %f56
+ fmuld %f0, %f2, %f58
+ b,pt %xcc, fpdis_exit2
+ faddd %f0, %f2, %f60
+1: mov SECONDARY_CONTEXT, %g3
+ add %g6, TI_FPREGS + 0x80, %g1
+ faddd %f0, %f2, %f4
+ fmuld %f0, %f2, %f6
+
+661: ldxa [%g3] ASI_DMMU, %g5
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ ldxa [%g3] ASI_MMU, %g5
+ .previous
+
+ sethi %hi(sparc64_kern_sec_context), %g2
+ ldx [%g2 + %lo(sparc64_kern_sec_context)], %g2
+
+661: stxa %g2, [%g3] ASI_DMMU
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ stxa %g2, [%g3] ASI_MMU
+ .previous
+
+ membar #Sync
+ add %g6, TI_FPREGS + 0xc0, %g2
+ faddd %f0, %f2, %f8
+ fmuld %f0, %f2, %f10
+ membar #Sync
+ ldda [%g1] ASI_BLK_S, %f32
+ ldda [%g2] ASI_BLK_S, %f48
+ membar #Sync
+ faddd %f0, %f2, %f12
+ fmuld %f0, %f2, %f14
+ faddd %f0, %f2, %f16
+ fmuld %f0, %f2, %f18
+ faddd %f0, %f2, %f20
+ fmuld %f0, %f2, %f22
+ faddd %f0, %f2, %f24
+ fmuld %f0, %f2, %f26
+ faddd %f0, %f2, %f28
+ fmuld %f0, %f2, %f30
+ b,pt %xcc, fpdis_exit
+ nop
+2: andcc %g5, FPRS_DU, %g0
+ bne,pt %icc, 3f
+ fzero %f32
+ mov SECONDARY_CONTEXT, %g3
+ fzero %f34
+
+661: ldxa [%g3] ASI_DMMU, %g5
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ ldxa [%g3] ASI_MMU, %g5
+ .previous
+
+ add %g6, TI_FPREGS, %g1
+ sethi %hi(sparc64_kern_sec_context), %g2
+ ldx [%g2 + %lo(sparc64_kern_sec_context)], %g2
+
+661: stxa %g2, [%g3] ASI_DMMU
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ stxa %g2, [%g3] ASI_MMU
+ .previous
+
+ membar #Sync
+ add %g6, TI_FPREGS + 0x40, %g2
+ faddd %f32, %f34, %f36
+ fmuld %f32, %f34, %f38
+ membar #Sync
+ ldda [%g1] ASI_BLK_S, %f0
+ ldda [%g2] ASI_BLK_S, %f16
+ membar #Sync
+ faddd %f32, %f34, %f40
+ fmuld %f32, %f34, %f42
+ faddd %f32, %f34, %f44
+ fmuld %f32, %f34, %f46
+ faddd %f32, %f34, %f48
+ fmuld %f32, %f34, %f50
+ faddd %f32, %f34, %f52
+ fmuld %f32, %f34, %f54
+ faddd %f32, %f34, %f56
+ fmuld %f32, %f34, %f58
+ faddd %f32, %f34, %f60
+ fmuld %f32, %f34, %f62
+ ba,pt %xcc, fpdis_exit
+ nop
+3: mov SECONDARY_CONTEXT, %g3
+ add %g6, TI_FPREGS, %g1
+
+661: ldxa [%g3] ASI_DMMU, %g5
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ ldxa [%g3] ASI_MMU, %g5
+ .previous
+
+ sethi %hi(sparc64_kern_sec_context), %g2
+ ldx [%g2 + %lo(sparc64_kern_sec_context)], %g2
+
+661: stxa %g2, [%g3] ASI_DMMU
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ stxa %g2, [%g3] ASI_MMU
+ .previous
+
+ membar #Sync
+ mov 0x40, %g2
+ membar #Sync
+ ldda [%g1] ASI_BLK_S, %f0
+ ldda [%g1 + %g2] ASI_BLK_S, %f16
+ add %g1, 0x80, %g1
+ ldda [%g1] ASI_BLK_S, %f32
+ ldda [%g1 + %g2] ASI_BLK_S, %f48
+ membar #Sync
+fpdis_exit:
+
+661: stxa %g5, [%g3] ASI_DMMU
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ stxa %g5, [%g3] ASI_MMU
+ .previous
+
+ membar #Sync
+fpdis_exit2:
+ wr %g7, 0, %gsr
+ ldx [%g6 + TI_XFSR], %fsr
+ rdpr %tstate, %g3
+ or %g3, %g4, %g3 ! anal...
+ wrpr %g3, %tstate
+ wr %g0, FPRS_FEF, %fprs ! clean DU/DL bits
+ retry
+ .size do_fpdis,.-do_fpdis
+
+ .align 32
+ .type fp_other_bounce,#function
+fp_other_bounce:
+ call do_fpother
+ add %sp, PTREGS_OFF, %o0
+ ba,pt %xcc, rtrap
+ nop
+ .size fp_other_bounce,.-fp_other_bounce
+
+ .align 32
+ .globl do_fpother_check_fitos
+ .type do_fpother_check_fitos,#function
+do_fpother_check_fitos:
+ TRAP_LOAD_THREAD_REG(%g6, %g1)
+ sethi %hi(fp_other_bounce - 4), %g7
+ or %g7, %lo(fp_other_bounce - 4), %g7
+
+ /* NOTE: Need to preserve %g7 until we fully commit
+ * to the fitos fixup.
+ */
+ stx %fsr, [%g6 + TI_XFSR]
+ rdpr %tstate, %g3
+ andcc %g3, TSTATE_PRIV, %g0
+ bne,pn %xcc, do_fptrap_after_fsr
+ nop
+ ldx [%g6 + TI_XFSR], %g3
+ srlx %g3, 14, %g1
+ and %g1, 7, %g1
+ cmp %g1, 2 ! Unfinished FP-OP
+ bne,pn %xcc, do_fptrap_after_fsr
+ sethi %hi(1 << 23), %g1 ! Inexact
+ andcc %g3, %g1, %g0
+ bne,pn %xcc, do_fptrap_after_fsr
+ rdpr %tpc, %g1
+ lduwa [%g1] ASI_AIUP, %g3 ! This cannot ever fail
+#define FITOS_MASK 0xc1f83fe0
+#define FITOS_COMPARE 0x81a01880
+ sethi %hi(FITOS_MASK), %g1
+ or %g1, %lo(FITOS_MASK), %g1
+ and %g3, %g1, %g1
+ sethi %hi(FITOS_COMPARE), %g2
+ or %g2, %lo(FITOS_COMPARE), %g2
+ cmp %g1, %g2
+ bne,pn %xcc, do_fptrap_after_fsr
+ nop
+ std %f62, [%g6 + TI_FPREGS + (62 * 4)]
+ sethi %hi(fitos_table_1), %g1
+ and %g3, 0x1f, %g2
+ or %g1, %lo(fitos_table_1), %g1
+ sllx %g2, 2, %g2
+ jmpl %g1 + %g2, %g0
+ ba,pt %xcc, fitos_emul_continue
+
+fitos_table_1:
+ fitod %f0, %f62
+ fitod %f1, %f62
+ fitod %f2, %f62
+ fitod %f3, %f62
+ fitod %f4, %f62
+ fitod %f5, %f62
+ fitod %f6, %f62
+ fitod %f7, %f62
+ fitod %f8, %f62
+ fitod %f9, %f62
+ fitod %f10, %f62
+ fitod %f11, %f62
+ fitod %f12, %f62
+ fitod %f13, %f62
+ fitod %f14, %f62
+ fitod %f15, %f62
+ fitod %f16, %f62
+ fitod %f17, %f62
+ fitod %f18, %f62
+ fitod %f19, %f62
+ fitod %f20, %f62
+ fitod %f21, %f62
+ fitod %f22, %f62
+ fitod %f23, %f62
+ fitod %f24, %f62
+ fitod %f25, %f62
+ fitod %f26, %f62
+ fitod %f27, %f62
+ fitod %f28, %f62
+ fitod %f29, %f62
+ fitod %f30, %f62
+ fitod %f31, %f62
+
+fitos_emul_continue:
+ sethi %hi(fitos_table_2), %g1
+ srl %g3, 25, %g2
+ or %g1, %lo(fitos_table_2), %g1
+ and %g2, 0x1f, %g2
+ sllx %g2, 2, %g2
+ jmpl %g1 + %g2, %g0
+ ba,pt %xcc, fitos_emul_fini
+
+fitos_table_2:
+ fdtos %f62, %f0
+ fdtos %f62, %f1
+ fdtos %f62, %f2
+ fdtos %f62, %f3
+ fdtos %f62, %f4
+ fdtos %f62, %f5
+ fdtos %f62, %f6
+ fdtos %f62, %f7
+ fdtos %f62, %f8
+ fdtos %f62, %f9
+ fdtos %f62, %f10
+ fdtos %f62, %f11
+ fdtos %f62, %f12
+ fdtos %f62, %f13
+ fdtos %f62, %f14
+ fdtos %f62, %f15
+ fdtos %f62, %f16
+ fdtos %f62, %f17
+ fdtos %f62, %f18
+ fdtos %f62, %f19
+ fdtos %f62, %f20
+ fdtos %f62, %f21
+ fdtos %f62, %f22
+ fdtos %f62, %f23
+ fdtos %f62, %f24
+ fdtos %f62, %f25
+ fdtos %f62, %f26
+ fdtos %f62, %f27
+ fdtos %f62, %f28
+ fdtos %f62, %f29
+ fdtos %f62, %f30
+ fdtos %f62, %f31
+
+fitos_emul_fini:
+ ldd [%g6 + TI_FPREGS + (62 * 4)], %f62
+ done
+ .size do_fpother_check_fitos,.-do_fpother_check_fitos
+
+ .align 32
+ .globl do_fptrap
+ .type do_fptrap,#function
+do_fptrap:
+ TRAP_LOAD_THREAD_REG(%g6, %g1)
+ stx %fsr, [%g6 + TI_XFSR]
+do_fptrap_after_fsr:
+ ldub [%g6 + TI_FPSAVED], %g3
+ rd %fprs, %g1
+ or %g3, %g1, %g3
+ stb %g3, [%g6 + TI_FPSAVED]
+ rd %gsr, %g3
+ stx %g3, [%g6 + TI_GSR]
+ mov SECONDARY_CONTEXT, %g3
+
+661: ldxa [%g3] ASI_DMMU, %g5
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ ldxa [%g3] ASI_MMU, %g5
+ .previous
+
+ sethi %hi(sparc64_kern_sec_context), %g2
+ ldx [%g2 + %lo(sparc64_kern_sec_context)], %g2
+
+661: stxa %g2, [%g3] ASI_DMMU
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ stxa %g2, [%g3] ASI_MMU
+ .previous
+
+ membar #Sync
+ add %g6, TI_FPREGS, %g2
+ andcc %g1, FPRS_DL, %g0
+ be,pn %icc, 4f
+ mov 0x40, %g3
+ stda %f0, [%g2] ASI_BLK_S
+ stda %f16, [%g2 + %g3] ASI_BLK_S
+ andcc %g1, FPRS_DU, %g0
+ be,pn %icc, 5f
+4: add %g2, 128, %g2
+ stda %f32, [%g2] ASI_BLK_S
+ stda %f48, [%g2 + %g3] ASI_BLK_S
+5: mov SECONDARY_CONTEXT, %g1
+ membar #Sync
+
+661: stxa %g5, [%g1] ASI_DMMU
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ stxa %g5, [%g1] ASI_MMU
+ .previous
+
+ membar #Sync
+ ba,pt %xcc, etrap
+ wr %g0, 0, %fprs
+ .size do_fptrap,.-do_fptrap
diff --git a/arch/sparc/kernel/ftrace.c b/arch/sparc/kernel/ftrace.c
new file mode 100644
index 000000000000..d0218e73f982
--- /dev/null
+++ b/arch/sparc/kernel/ftrace.c
@@ -0,0 +1,76 @@
+#include <linux/spinlock.h>
+#include <linux/hardirq.h>
+#include <linux/ftrace.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <linux/list.h>
+
+#include <asm/ftrace.h>
+
+static const u32 ftrace_nop = 0x01000000;
+
+unsigned char *ftrace_nop_replace(void)
+{
+ return (char *)&ftrace_nop;
+}
+
+unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
+{
+ static u32 call;
+ s32 off;
+
+ off = ((s32)addr - (s32)ip);
+ call = 0x40000000 | ((u32)off >> 2);
+
+ return (unsigned char *) &call;
+}
+
+int
+ftrace_modify_code(unsigned long ip, unsigned char *old_code,
+ unsigned char *new_code)
+{
+ u32 old = *(u32 *)old_code;
+ u32 new = *(u32 *)new_code;
+ u32 replaced;
+ int faulted;
+
+ __asm__ __volatile__(
+ "1: cas [%[ip]], %[old], %[new]\n"
+ " flush %[ip]\n"
+ " mov 0, %[faulted]\n"
+ "2:\n"
+ " .section .fixup,#alloc,#execinstr\n"
+ " .align 4\n"
+ "3: sethi %%hi(2b), %[faulted]\n"
+ " jmpl %[faulted] + %%lo(2b), %%g0\n"
+ " mov 1, %[faulted]\n"
+ " .previous\n"
+ " .section __ex_table,\"a\"\n"
+ " .align 4\n"
+ " .word 1b, 3b\n"
+ " .previous\n"
+ : "=r" (replaced), [faulted] "=r" (faulted)
+ : [new] "0" (new), [old] "r" (old), [ip] "r" (ip)
+ : "memory");
+
+ if (replaced != old && replaced != new)
+ faulted = 2;
+
+ return faulted;
+}
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+ unsigned long ip = (unsigned long)(&ftrace_call);
+ unsigned char old[MCOUNT_INSN_SIZE], *new;
+
+ memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
+ new = ftrace_call_replace(ip, (unsigned long)func);
+ return ftrace_modify_code(ip, old, new);
+}
+
+int __init ftrace_dyn_arch_init(void *data)
+{
+ ftrace_mcount_set(data);
+ return 0;
+}
diff --git a/arch/sparc/kernel/getsetcc.S b/arch/sparc/kernel/getsetcc.S
new file mode 100644
index 000000000000..a14d272d2061
--- /dev/null
+++ b/arch/sparc/kernel/getsetcc.S
@@ -0,0 +1,24 @@
+ .globl getcc
+ .type getcc,#function
+getcc:
+ ldx [%o0 + PT_V9_TSTATE], %o1
+ srlx %o1, 32, %o1
+ and %o1, 0xf, %o1
+ retl
+ stx %o1, [%o0 + PT_V9_G1]
+ .size getcc,.-getcc
+
+ .globl setcc
+ .type setcc,#function
+setcc:
+ ldx [%o0 + PT_V9_TSTATE], %o1
+ ldx [%o0 + PT_V9_G1], %o2
+ or %g0, %ulo(TSTATE_ICC), %o3
+ sllx %o3, 32, %o3
+ andn %o1, %o3, %o1
+ sllx %o2, 32, %o2
+ and %o2, %o3, %o2
+ or %o1, %o2, %o1
+ retl
+ stx %o1, [%o0 + PT_V9_TSTATE]
+ .size setcc,.-setcc
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
new file mode 100644
index 000000000000..8ffee714f932
--- /dev/null
+++ b/arch/sparc/kernel/head_64.S
@@ -0,0 +1,900 @@
+/* head.S: Initial boot code for the Sparc64 port of Linux.
+ *
+ * Copyright (C) 1996, 1997, 2007 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1996 David Sitsky (David.Sitsky@anu.edu.au)
+ * Copyright (C) 1997, 1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ * Copyright (C) 1997 Miguel de Icaza (miguel@nuclecu.unam.mx)
+ */
+
+#include <linux/version.h>
+#include <linux/errno.h>
+#include <linux/threads.h>
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/thread_info.h>
+#include <asm/asi.h>
+#include <asm/pstate.h>
+#include <asm/ptrace.h>
+#include <asm/spitfire.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/errno.h>
+#include <asm/signal.h>
+#include <asm/processor.h>
+#include <asm/lsu.h>
+#include <asm/dcr.h>
+#include <asm/dcu.h>
+#include <asm/head.h>
+#include <asm/ttable.h>
+#include <asm/mmu.h>
+#include <asm/cpudata.h>
+#include <asm/pil.h>
+#include <asm/estate.h>
+#include <asm/sfafsr.h>
+#include <asm/unistd.h>
+
+/* This section from from _start to sparc64_boot_end should fit into
+ * 0x0000000000404000 to 0x0000000000408000.
+ */
+ .text
+ .globl start, _start, stext, _stext
+_start:
+start:
+_stext:
+stext:
+! 0x0000000000404000
+ b sparc64_boot
+ flushw /* Flush register file. */
+
+/* This stuff has to be in sync with SILO and other potential boot loaders
+ * Fields should be kept upward compatible and whenever any change is made,
+ * HdrS version should be incremented.
+ */
+ .global root_flags, ram_flags, root_dev
+ .global sparc_ramdisk_image, sparc_ramdisk_size
+ .global sparc_ramdisk_image64
+
+ .ascii "HdrS"
+ .word LINUX_VERSION_CODE
+
+ /* History:
+ *
+ * 0x0300 : Supports being located at other than 0x4000
+ * 0x0202 : Supports kernel params string
+ * 0x0201 : Supports reboot_command
+ */
+ .half 0x0301 /* HdrS version */
+
+root_flags:
+ .half 1
+root_dev:
+ .half 0
+ram_flags:
+ .half 0
+sparc_ramdisk_image:
+ .word 0
+sparc_ramdisk_size:
+ .word 0
+ .xword reboot_command
+ .xword bootstr_info
+sparc_ramdisk_image64:
+ .xword 0
+ .word _end
+
+ /* PROM cif handler code address is in %o4. */
+sparc64_boot:
+ mov %o4, %l7
+
+ /* We need to remap the kernel. Use position independant
+ * code to remap us to KERNBASE.
+ *
+ * SILO can invoke us with 32-bit address masking enabled,
+ * so make sure that's clear.
+ */
+ rdpr %pstate, %g1
+ andn %g1, PSTATE_AM, %g1
+ wrpr %g1, 0x0, %pstate
+ ba,a,pt %xcc, 1f
+
+ .globl prom_finddev_name, prom_chosen_path, prom_root_node
+ .globl prom_getprop_name, prom_mmu_name, prom_peer_name
+ .globl prom_callmethod_name, prom_translate_name, prom_root_compatible
+ .globl prom_map_name, prom_unmap_name, prom_mmu_ihandle_cache
+ .globl prom_boot_mapped_pc, prom_boot_mapping_mode
+ .globl prom_boot_mapping_phys_high, prom_boot_mapping_phys_low
+ .globl prom_compatible_name, prom_cpu_path, prom_cpu_compatible
+ .globl is_sun4v, sun4v_chip_type, prom_set_trap_table_name
+prom_peer_name:
+ .asciz "peer"
+prom_compatible_name:
+ .asciz "compatible"
+prom_finddev_name:
+ .asciz "finddevice"
+prom_chosen_path:
+ .asciz "/chosen"
+prom_cpu_path:
+ .asciz "/cpu"
+prom_getprop_name:
+ .asciz "getprop"
+prom_mmu_name:
+ .asciz "mmu"
+prom_callmethod_name:
+ .asciz "call-method"
+prom_translate_name:
+ .asciz "translate"
+prom_map_name:
+ .asciz "map"
+prom_unmap_name:
+ .asciz "unmap"
+prom_set_trap_table_name:
+ .asciz "SUNW,set-trap-table"
+prom_sun4v_name:
+ .asciz "sun4v"
+prom_niagara_prefix:
+ .asciz "SUNW,UltraSPARC-T"
+ .align 4
+prom_root_compatible:
+ .skip 64
+prom_cpu_compatible:
+ .skip 64
+prom_root_node:
+ .word 0
+prom_mmu_ihandle_cache:
+ .word 0
+prom_boot_mapped_pc:
+ .word 0
+prom_boot_mapping_mode:
+ .word 0
+ .align 8
+prom_boot_mapping_phys_high:
+ .xword 0
+prom_boot_mapping_phys_low:
+ .xword 0
+is_sun4v:
+ .word 0
+sun4v_chip_type:
+ .word SUN4V_CHIP_INVALID
+1:
+ rd %pc, %l0
+
+ mov (1b - prom_peer_name), %l1
+ sub %l0, %l1, %l1
+ mov 0, %l2
+
+ /* prom_root_node = prom_peer(0) */
+ stx %l1, [%sp + 2047 + 128 + 0x00] ! service, "peer"
+ mov 1, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 1
+ stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 1
+ stx %l2, [%sp + 2047 + 128 + 0x18] ! arg1, 0
+ stx %g0, [%sp + 2047 + 128 + 0x20] ! ret1
+ call %l7
+ add %sp, (2047 + 128), %o0 ! argument array
+
+ ldx [%sp + 2047 + 128 + 0x20], %l4 ! prom root node
+ mov (1b - prom_root_node), %l1
+ sub %l0, %l1, %l1
+ stw %l4, [%l1]
+
+ mov (1b - prom_getprop_name), %l1
+ mov (1b - prom_compatible_name), %l2
+ mov (1b - prom_root_compatible), %l5
+ sub %l0, %l1, %l1
+ sub %l0, %l2, %l2
+ sub %l0, %l5, %l5
+
+ /* prom_getproperty(prom_root_node, "compatible",
+ * &prom_root_compatible, 64)
+ */
+ stx %l1, [%sp + 2047 + 128 + 0x00] ! service, "getprop"
+ mov 4, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 4
+ mov 1, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 1
+ stx %l4, [%sp + 2047 + 128 + 0x18] ! arg1, prom_root_node
+ stx %l2, [%sp + 2047 + 128 + 0x20] ! arg2, "compatible"
+ stx %l5, [%sp + 2047 + 128 + 0x28] ! arg3, &prom_root_compatible
+ mov 64, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x30] ! arg4, size
+ stx %g0, [%sp + 2047 + 128 + 0x38] ! ret1
+ call %l7
+ add %sp, (2047 + 128), %o0 ! argument array
+
+ mov (1b - prom_finddev_name), %l1
+ mov (1b - prom_chosen_path), %l2
+ mov (1b - prom_boot_mapped_pc), %l3
+ sub %l0, %l1, %l1
+ sub %l0, %l2, %l2
+ sub %l0, %l3, %l3
+ stw %l0, [%l3]
+ sub %sp, (192 + 128), %sp
+
+ /* chosen_node = prom_finddevice("/chosen") */
+ stx %l1, [%sp + 2047 + 128 + 0x00] ! service, "finddevice"
+ mov 1, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 1
+ stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 1
+ stx %l2, [%sp + 2047 + 128 + 0x18] ! arg1, "/chosen"
+ stx %g0, [%sp + 2047 + 128 + 0x20] ! ret1
+ call %l7
+ add %sp, (2047 + 128), %o0 ! argument array
+
+ ldx [%sp + 2047 + 128 + 0x20], %l4 ! chosen device node
+
+ mov (1b - prom_getprop_name), %l1
+ mov (1b - prom_mmu_name), %l2
+ mov (1b - prom_mmu_ihandle_cache), %l5
+ sub %l0, %l1, %l1
+ sub %l0, %l2, %l2
+ sub %l0, %l5, %l5
+
+ /* prom_mmu_ihandle_cache = prom_getint(chosen_node, "mmu") */
+ stx %l1, [%sp + 2047 + 128 + 0x00] ! service, "getprop"
+ mov 4, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 4
+ mov 1, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 1
+ stx %l4, [%sp + 2047 + 128 + 0x18] ! arg1, chosen_node
+ stx %l2, [%sp + 2047 + 128 + 0x20] ! arg2, "mmu"
+ stx %l5, [%sp + 2047 + 128 + 0x28] ! arg3, &prom_mmu_ihandle_cache
+ mov 4, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x30] ! arg4, sizeof(arg3)
+ stx %g0, [%sp + 2047 + 128 + 0x38] ! ret1
+ call %l7
+ add %sp, (2047 + 128), %o0 ! argument array
+
+ mov (1b - prom_callmethod_name), %l1
+ mov (1b - prom_translate_name), %l2
+ sub %l0, %l1, %l1
+ sub %l0, %l2, %l2
+ lduw [%l5], %l5 ! prom_mmu_ihandle_cache
+
+ stx %l1, [%sp + 2047 + 128 + 0x00] ! service, "call-method"
+ mov 3, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 3
+ mov 5, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 5
+ stx %l2, [%sp + 2047 + 128 + 0x18] ! arg1: "translate"
+ stx %l5, [%sp + 2047 + 128 + 0x20] ! arg2: prom_mmu_ihandle_cache
+ /* PAGE align */
+ srlx %l0, 13, %l3
+ sllx %l3, 13, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x28] ! arg3: vaddr, our PC
+ stx %g0, [%sp + 2047 + 128 + 0x30] ! res1
+ stx %g0, [%sp + 2047 + 128 + 0x38] ! res2
+ stx %g0, [%sp + 2047 + 128 + 0x40] ! res3
+ stx %g0, [%sp + 2047 + 128 + 0x48] ! res4
+ stx %g0, [%sp + 2047 + 128 + 0x50] ! res5
+ call %l7
+ add %sp, (2047 + 128), %o0 ! argument array
+
+ ldx [%sp + 2047 + 128 + 0x40], %l1 ! translation mode
+ mov (1b - prom_boot_mapping_mode), %l4
+ sub %l0, %l4, %l4
+ stw %l1, [%l4]
+ mov (1b - prom_boot_mapping_phys_high), %l4
+ sub %l0, %l4, %l4
+ ldx [%sp + 2047 + 128 + 0x48], %l2 ! physaddr high
+ stx %l2, [%l4 + 0x0]
+ ldx [%sp + 2047 + 128 + 0x50], %l3 ! physaddr low
+ /* 4MB align */
+ srlx %l3, 22, %l3
+ sllx %l3, 22, %l3
+ stx %l3, [%l4 + 0x8]
+
+ /* Leave service as-is, "call-method" */
+ mov 7, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 7
+ mov 1, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 1
+ mov (1b - prom_map_name), %l3
+ sub %l0, %l3, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x18] ! arg1: "map"
+ /* Leave arg2 as-is, prom_mmu_ihandle_cache */
+ mov -1, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x28] ! arg3: mode (-1 default)
+ /* 4MB align the kernel image size. */
+ set (_end - KERNBASE), %l3
+ set ((4 * 1024 * 1024) - 1), %l4
+ add %l3, %l4, %l3
+ andn %l3, %l4, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x30] ! arg4: roundup(ksize, 4MB)
+ sethi %hi(KERNBASE), %l3
+ stx %l3, [%sp + 2047 + 128 + 0x38] ! arg5: vaddr (KERNBASE)
+ stx %g0, [%sp + 2047 + 128 + 0x40] ! arg6: empty
+ mov (1b - prom_boot_mapping_phys_low), %l3
+ sub %l0, %l3, %l3
+ ldx [%l3], %l3
+ stx %l3, [%sp + 2047 + 128 + 0x48] ! arg7: phys addr
+ call %l7
+ add %sp, (2047 + 128), %o0 ! argument array
+
+ add %sp, (192 + 128), %sp
+
+ sethi %hi(prom_root_compatible), %g1
+ or %g1, %lo(prom_root_compatible), %g1
+ sethi %hi(prom_sun4v_name), %g7
+ or %g7, %lo(prom_sun4v_name), %g7
+ mov 5, %g3
+90: ldub [%g7], %g2
+ ldub [%g1], %g4
+ cmp %g2, %g4
+ bne,pn %icc, 80f
+ add %g7, 1, %g7
+ subcc %g3, 1, %g3
+ bne,pt %xcc, 90b
+ add %g1, 1, %g1
+
+ sethi %hi(is_sun4v), %g1
+ or %g1, %lo(is_sun4v), %g1
+ mov 1, %g7
+ stw %g7, [%g1]
+
+ /* cpu_node = prom_finddevice("/cpu") */
+ mov (1b - prom_finddev_name), %l1
+ mov (1b - prom_cpu_path), %l2
+ sub %l0, %l1, %l1
+ sub %l0, %l2, %l2
+ sub %sp, (192 + 128), %sp
+
+ stx %l1, [%sp + 2047 + 128 + 0x00] ! service, "finddevice"
+ mov 1, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 1
+ stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 1
+ stx %l2, [%sp + 2047 + 128 + 0x18] ! arg1, "/cpu"
+ stx %g0, [%sp + 2047 + 128 + 0x20] ! ret1
+ call %l7
+ add %sp, (2047 + 128), %o0 ! argument array
+
+ ldx [%sp + 2047 + 128 + 0x20], %l4 ! cpu device node
+
+ mov (1b - prom_getprop_name), %l1
+ mov (1b - prom_compatible_name), %l2
+ mov (1b - prom_cpu_compatible), %l5
+ sub %l0, %l1, %l1
+ sub %l0, %l2, %l2
+ sub %l0, %l5, %l5
+
+ /* prom_getproperty(cpu_node, "compatible",
+ * &prom_cpu_compatible, 64)
+ */
+ stx %l1, [%sp + 2047 + 128 + 0x00] ! service, "getprop"
+ mov 4, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 4
+ mov 1, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 1
+ stx %l4, [%sp + 2047 + 128 + 0x18] ! arg1, cpu_node
+ stx %l2, [%sp + 2047 + 128 + 0x20] ! arg2, "compatible"
+ stx %l5, [%sp + 2047 + 128 + 0x28] ! arg3, &prom_cpu_compatible
+ mov 64, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x30] ! arg4, size
+ stx %g0, [%sp + 2047 + 128 + 0x38] ! ret1
+ call %l7
+ add %sp, (2047 + 128), %o0 ! argument array
+
+ add %sp, (192 + 128), %sp
+
+ sethi %hi(prom_cpu_compatible), %g1
+ or %g1, %lo(prom_cpu_compatible), %g1
+ sethi %hi(prom_niagara_prefix), %g7
+ or %g7, %lo(prom_niagara_prefix), %g7
+ mov 17, %g3
+90: ldub [%g7], %g2
+ ldub [%g1], %g4
+ cmp %g2, %g4
+ bne,pn %icc, 4f
+ add %g7, 1, %g7
+ subcc %g3, 1, %g3
+ bne,pt %xcc, 90b
+ add %g1, 1, %g1
+
+ sethi %hi(prom_cpu_compatible), %g1
+ or %g1, %lo(prom_cpu_compatible), %g1
+ ldub [%g1 + 17], %g2
+ cmp %g2, '1'
+ be,pt %xcc, 5f
+ mov SUN4V_CHIP_NIAGARA1, %g4
+ cmp %g2, '2'
+ be,pt %xcc, 5f
+ mov SUN4V_CHIP_NIAGARA2, %g4
+4:
+ mov SUN4V_CHIP_UNKNOWN, %g4
+5: sethi %hi(sun4v_chip_type), %g2
+ or %g2, %lo(sun4v_chip_type), %g2
+ stw %g4, [%g2]
+
+80:
+ BRANCH_IF_SUN4V(g1, jump_to_sun4u_init)
+ BRANCH_IF_CHEETAH_BASE(g1,g7,cheetah_boot)
+ BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,cheetah_plus_boot)
+ ba,pt %xcc, spitfire_boot
+ nop
+
+cheetah_plus_boot:
+ /* Preserve OBP chosen DCU and DCR register settings. */
+ ba,pt %xcc, cheetah_generic_boot
+ nop
+
+cheetah_boot:
+ mov DCR_BPE | DCR_RPE | DCR_SI | DCR_IFPOE | DCR_MS, %g1
+ wr %g1, %asr18
+
+ sethi %uhi(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g7
+ or %g7, %ulo(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g7
+ sllx %g7, 32, %g7
+ or %g7, DCU_DM | DCU_IM | DCU_DC | DCU_IC, %g7
+ stxa %g7, [%g0] ASI_DCU_CONTROL_REG
+ membar #Sync
+
+cheetah_generic_boot:
+ mov TSB_EXTENSION_P, %g3
+ stxa %g0, [%g3] ASI_DMMU
+ stxa %g0, [%g3] ASI_IMMU
+ membar #Sync
+
+ mov TSB_EXTENSION_S, %g3
+ stxa %g0, [%g3] ASI_DMMU
+ membar #Sync
+
+ mov TSB_EXTENSION_N, %g3
+ stxa %g0, [%g3] ASI_DMMU
+ stxa %g0, [%g3] ASI_IMMU
+ membar #Sync
+
+ ba,a,pt %xcc, jump_to_sun4u_init
+
+spitfire_boot:
+ /* Typically PROM has already enabled both MMU's and both on-chip
+ * caches, but we do it here anyway just to be paranoid.
+ */
+ mov (LSU_CONTROL_IC|LSU_CONTROL_DC|LSU_CONTROL_IM|LSU_CONTROL_DM), %g1
+ stxa %g1, [%g0] ASI_LSU_CONTROL
+ membar #Sync
+
+jump_to_sun4u_init:
+ /*
+ * Make sure we are in privileged mode, have address masking,
+ * using the ordinary globals and have enabled floating
+ * point.
+ *
+ * Again, typically PROM has left %pil at 13 or similar, and
+ * (PSTATE_PRIV | PSTATE_PEF | PSTATE_IE) in %pstate.
+ */
+ wrpr %g0, (PSTATE_PRIV|PSTATE_PEF|PSTATE_IE), %pstate
+ wr %g0, 0, %fprs
+
+ set sun4u_init, %g2
+ jmpl %g2 + %g0, %g0
+ nop
+
+ .section .text.init.refok
+sun4u_init:
+ BRANCH_IF_SUN4V(g1, sun4v_init)
+
+ /* Set ctx 0 */
+ mov PRIMARY_CONTEXT, %g7
+ stxa %g0, [%g7] ASI_DMMU
+ membar #Sync
+
+ mov SECONDARY_CONTEXT, %g7
+ stxa %g0, [%g7] ASI_DMMU
+ membar #Sync
+
+ ba,pt %xcc, sun4u_continue
+ nop
+
+sun4v_init:
+ /* Set ctx 0 */
+ mov PRIMARY_CONTEXT, %g7
+ stxa %g0, [%g7] ASI_MMU
+ membar #Sync
+
+ mov SECONDARY_CONTEXT, %g7
+ stxa %g0, [%g7] ASI_MMU
+ membar #Sync
+ ba,pt %xcc, niagara_tlb_fixup
+ nop
+
+sun4u_continue:
+ BRANCH_IF_ANY_CHEETAH(g1, g7, cheetah_tlb_fixup)
+
+ ba,pt %xcc, spitfire_tlb_fixup
+ nop
+
+niagara_tlb_fixup:
+ mov 3, %g2 /* Set TLB type to hypervisor. */
+ sethi %hi(tlb_type), %g1
+ stw %g2, [%g1 + %lo(tlb_type)]
+
+ /* Patch copy/clear ops. */
+ sethi %hi(sun4v_chip_type), %g1
+ lduw [%g1 + %lo(sun4v_chip_type)], %g1
+ cmp %g1, SUN4V_CHIP_NIAGARA1
+ be,pt %xcc, niagara_patch
+ cmp %g1, SUN4V_CHIP_NIAGARA2
+ be,pt %xcc, niagara2_patch
+ nop
+
+ call generic_patch_copyops
+ nop
+ call generic_patch_bzero
+ nop
+ call generic_patch_pageops
+ nop
+
+ ba,a,pt %xcc, 80f
+niagara2_patch:
+ call niagara2_patch_copyops
+ nop
+ call niagara_patch_bzero
+ nop
+ call niagara2_patch_pageops
+ nop
+
+ ba,a,pt %xcc, 80f
+
+niagara_patch:
+ call niagara_patch_copyops
+ nop
+ call niagara_patch_bzero
+ nop
+ call niagara_patch_pageops
+ nop
+
+80:
+ /* Patch TLB/cache ops. */
+ call hypervisor_patch_cachetlbops
+ nop
+
+ ba,pt %xcc, tlb_fixup_done
+ nop
+
+cheetah_tlb_fixup:
+ mov 2, %g2 /* Set TLB type to cheetah+. */
+ BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,1f)
+
+ mov 1, %g2 /* Set TLB type to cheetah. */
+
+1: sethi %hi(tlb_type), %g1
+ stw %g2, [%g1 + %lo(tlb_type)]
+
+ /* Patch copy/page operations to cheetah optimized versions. */
+ call cheetah_patch_copyops
+ nop
+ call cheetah_patch_copy_page
+ nop
+ call cheetah_patch_cachetlbops
+ nop
+
+ ba,pt %xcc, tlb_fixup_done
+ nop
+
+spitfire_tlb_fixup:
+ /* Set TLB type to spitfire. */
+ mov 0, %g2
+ sethi %hi(tlb_type), %g1
+ stw %g2, [%g1 + %lo(tlb_type)]
+
+tlb_fixup_done:
+ sethi %hi(init_thread_union), %g6
+ or %g6, %lo(init_thread_union), %g6
+ ldx [%g6 + TI_TASK], %g4
+ mov %sp, %l6
+
+ wr %g0, ASI_P, %asi
+ mov 1, %g1
+ sllx %g1, THREAD_SHIFT, %g1
+ sub %g1, (STACKFRAME_SZ + STACK_BIAS), %g1
+ add %g6, %g1, %sp
+ mov 0, %fp
+
+ /* Set per-cpu pointer initially to zero, this makes
+ * the boot-cpu use the in-kernel-image per-cpu areas
+ * before setup_per_cpu_area() is invoked.
+ */
+ clr %g5
+
+ wrpr %g0, 0, %wstate
+ wrpr %g0, 0x0, %tl
+
+ /* Clear the bss */
+ sethi %hi(__bss_start), %o0
+ or %o0, %lo(__bss_start), %o0
+ sethi %hi(_end), %o1
+ or %o1, %lo(_end), %o1
+ call __bzero
+ sub %o1, %o0, %o1
+
+#ifdef CONFIG_LOCKDEP
+ /* We have this call this super early, as even prom_init can grab
+ * spinlocks and thus call into the lockdep code.
+ */
+ call lockdep_init
+ nop
+#endif
+
+ mov %l6, %o1 ! OpenPROM stack
+ call prom_init
+ mov %l7, %o0 ! OpenPROM cif handler
+
+ /* Initialize current_thread_info()->cpu as early as possible.
+ * In order to do that accurately we have to patch up the get_cpuid()
+ * assembler sequences. And that, in turn, requires that we know
+ * if we are on a Starfire box or not. While we're here, patch up
+ * the sun4v sequences as well.
+ */
+ call check_if_starfire
+ nop
+ call per_cpu_patch
+ nop
+ call sun4v_patch
+ nop
+
+#ifdef CONFIG_SMP
+ call hard_smp_processor_id
+ nop
+ cmp %o0, NR_CPUS
+ blu,pt %xcc, 1f
+ nop
+ call boot_cpu_id_too_large
+ nop
+ /* Not reached... */
+
+1:
+ /* If we boot on a non-zero cpu, all of the per-cpu
+ * variable references we make before setting up the
+ * per-cpu areas will use a bogus offset. Put a
+ * compensating factor into __per_cpu_base to handle
+ * this cleanly.
+ *
+ * What the per-cpu code calculates is:
+ *
+ * __per_cpu_base + (cpu << __per_cpu_shift)
+ *
+ * These two variables are zero initially, so to
+ * make it all cancel out to zero we need to put
+ * "0 - (cpu << 0)" into __per_cpu_base so that the
+ * above formula evaluates to zero.
+ *
+ * We cannot even perform a printk() until this stuff
+ * is setup as that calls cpu_clock() which uses
+ * per-cpu variables.
+ */
+ sub %g0, %o0, %o1
+ sethi %hi(__per_cpu_base), %o2
+ stx %o1, [%o2 + %lo(__per_cpu_base)]
+#else
+ mov 0, %o0
+#endif
+ sth %o0, [%g6 + TI_CPU]
+
+ call prom_init_report
+ nop
+
+ /* Off we go.... */
+ call start_kernel
+ nop
+ /* Not reached... */
+
+ .previous
+
+ /* This is meant to allow the sharing of this code between
+ * boot processor invocation (via setup_tba() below) and
+ * secondary processor startup (via trampoline.S). The
+ * former does use this code, the latter does not yet due
+ * to some complexities. That should be fixed up at some
+ * point.
+ *
+ * There used to be enormous complexity wrt. transferring
+ * over from the firwmare's trap table to the Linux kernel's.
+ * For example, there was a chicken & egg problem wrt. building
+ * the OBP page tables, yet needing to be on the Linux kernel
+ * trap table (to translate PAGE_OFFSET addresses) in order to
+ * do that.
+ *
+ * We now handle OBP tlb misses differently, via linear lookups
+ * into the prom_trans[] array. So that specific problem no
+ * longer exists. Yet, unfortunately there are still some issues
+ * preventing trampoline.S from using this code... ho hum.
+ */
+ .globl setup_trap_table
+setup_trap_table:
+ save %sp, -192, %sp
+
+ /* Force interrupts to be disabled. */
+ rdpr %pstate, %l0
+ andn %l0, PSTATE_IE, %o1
+ wrpr %o1, 0x0, %pstate
+ rdpr %pil, %l1
+ wrpr %g0, PIL_NORMAL_MAX, %pil
+
+ /* Make the firmware call to jump over to the Linux trap table. */
+ sethi %hi(is_sun4v), %o0
+ lduw [%o0 + %lo(is_sun4v)], %o0
+ brz,pt %o0, 1f
+ nop
+
+ TRAP_LOAD_TRAP_BLOCK(%g2, %g3)
+ add %g2, TRAP_PER_CPU_FAULT_INFO, %g2
+ stxa %g2, [%g0] ASI_SCRATCHPAD
+
+ /* Compute physical address:
+ *
+ * paddr = kern_base + (mmfsa_vaddr - KERNBASE)
+ */
+ sethi %hi(KERNBASE), %g3
+ sub %g2, %g3, %g2
+ sethi %hi(kern_base), %g3
+ ldx [%g3 + %lo(kern_base)], %g3
+ add %g2, %g3, %o1
+ sethi %hi(sparc64_ttable_tl0), %o0
+
+ set prom_set_trap_table_name, %g2
+ stx %g2, [%sp + 2047 + 128 + 0x00]
+ mov 2, %g2
+ stx %g2, [%sp + 2047 + 128 + 0x08]
+ mov 0, %g2
+ stx %g2, [%sp + 2047 + 128 + 0x10]
+ stx %o0, [%sp + 2047 + 128 + 0x18]
+ stx %o1, [%sp + 2047 + 128 + 0x20]
+ sethi %hi(p1275buf), %g2
+ or %g2, %lo(p1275buf), %g2
+ ldx [%g2 + 0x08], %o1
+ call %o1
+ add %sp, (2047 + 128), %o0
+
+ ba,pt %xcc, 2f
+ nop
+
+1: sethi %hi(sparc64_ttable_tl0), %o0
+ set prom_set_trap_table_name, %g2
+ stx %g2, [%sp + 2047 + 128 + 0x00]
+ mov 1, %g2
+ stx %g2, [%sp + 2047 + 128 + 0x08]
+ mov 0, %g2
+ stx %g2, [%sp + 2047 + 128 + 0x10]
+ stx %o0, [%sp + 2047 + 128 + 0x18]
+ sethi %hi(p1275buf), %g2
+ or %g2, %lo(p1275buf), %g2
+ ldx [%g2 + 0x08], %o1
+ call %o1
+ add %sp, (2047 + 128), %o0
+
+ /* Start using proper page size encodings in ctx register. */
+2: sethi %hi(sparc64_kern_pri_context), %g3
+ ldx [%g3 + %lo(sparc64_kern_pri_context)], %g2
+
+ mov PRIMARY_CONTEXT, %g1
+
+661: stxa %g2, [%g1] ASI_DMMU
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ stxa %g2, [%g1] ASI_MMU
+ .previous
+
+ membar #Sync
+
+ BRANCH_IF_SUN4V(o2, 1f)
+
+ /* Kill PROM timer */
+ sethi %hi(0x80000000), %o2
+ sllx %o2, 32, %o2
+ wr %o2, 0, %tick_cmpr
+
+ BRANCH_IF_ANY_CHEETAH(o2, o3, 1f)
+
+ ba,pt %xcc, 2f
+ nop
+
+ /* Disable STICK_INT interrupts. */
+1:
+ sethi %hi(0x80000000), %o2
+ sllx %o2, 32, %o2
+ wr %o2, %asr25
+
+2:
+ wrpr %g0, %g0, %wstate
+
+ call init_irqwork_curcpu
+ nop
+
+ /* Now we can restore interrupt state. */
+ wrpr %l0, 0, %pstate
+ wrpr %l1, 0x0, %pil
+
+ ret
+ restore
+
+ .globl setup_tba
+setup_tba:
+ save %sp, -192, %sp
+
+ /* The boot processor is the only cpu which invokes this
+ * routine, the other cpus set things up via trampoline.S.
+ * So save the OBP trap table address here.
+ */
+ rdpr %tba, %g7
+ sethi %hi(prom_tba), %o1
+ or %o1, %lo(prom_tba), %o1
+ stx %g7, [%o1]
+
+ call setup_trap_table
+ nop
+
+ ret
+ restore
+sparc64_boot_end:
+
+#include "etrap_64.S"
+#include "rtrap_64.S"
+#include "winfixup.S"
+#include "fpu_traps.S"
+#include "ivec.S"
+#include "getsetcc.S"
+#include "utrap.S"
+#include "spiterrs.S"
+#include "cherrs.S"
+#include "misctrap.S"
+#include "syscalls.S"
+#include "helpers.S"
+#include "hvcalls.S"
+#include "sun4v_tlb_miss.S"
+#include "sun4v_ivec.S"
+#include "ktlb.S"
+#include "tsb.S"
+
+/*
+ * The following skip makes sure the trap table in ttable.S is aligned
+ * on a 32K boundary as required by the v9 specs for TBA register.
+ *
+ * We align to a 32K boundary, then we have the 32K kernel TSB,
+ * the 64K kernel 4MB TSB, and then the 32K aligned trap table.
+ */
+1:
+ .skip 0x4000 + _start - 1b
+
+! 0x0000000000408000
+
+ .globl swapper_tsb
+swapper_tsb:
+ .skip (32 * 1024)
+
+ .globl swapper_4m_tsb
+swapper_4m_tsb:
+ .skip (64 * 1024)
+
+! 0x0000000000420000
+
+ /* Some care needs to be exercised if you try to move the
+ * location of the trap table relative to other things. For
+ * one thing there are br* instructions in some of the
+ * trap table entires which branch back to code in ktlb.S
+ * Those instructions can only handle a signed 16-bit
+ * displacement.
+ *
+ * There is a binutils bug (bugzilla #4558) which causes
+ * the relocation overflow checks for such instructions to
+ * not be done correctly. So bintuils will not notice the
+ * error and will instead write junk into the relocation and
+ * you'll have an unbootable kernel.
+ */
+#include "ttable.S"
+
+! 0x0000000000428000
+
+#include "systbls_64.S"
+
+ .data
+ .align 8
+ .globl prom_tba, tlb_type
+prom_tba: .xword 0
+tlb_type: .word 0 /* Must NOT end up in BSS */
+ .section ".fixup",#alloc,#execinstr
+
+ .globl __ret_efault, __retl_efault
+__ret_efault:
+ ret
+ restore %g0, -EFAULT, %o0
+__retl_efault:
+ retl
+ mov -EFAULT, %o0
diff --git a/arch/sparc/kernel/helpers.S b/arch/sparc/kernel/helpers.S
new file mode 100644
index 000000000000..314dd0c9fc5b
--- /dev/null
+++ b/arch/sparc/kernel/helpers.S
@@ -0,0 +1,63 @@
+ .align 32
+ .globl __flushw_user
+ .type __flushw_user,#function
+__flushw_user:
+ rdpr %otherwin, %g1
+ brz,pn %g1, 2f
+ clr %g2
+1: save %sp, -128, %sp
+ rdpr %otherwin, %g1
+ brnz,pt %g1, 1b
+ add %g2, 1, %g2
+1: sub %g2, 1, %g2
+ brnz,pt %g2, 1b
+ restore %g0, %g0, %g0
+2: retl
+ nop
+ .size __flushw_user,.-__flushw_user
+
+ /* Flush %fp and %i7 to the stack for all register
+ * windows active inside of the cpu. This allows
+ * show_stack_trace() to avoid using an expensive
+ * 'flushw'.
+ */
+ .globl stack_trace_flush
+ .type stack_trace_flush,#function
+stack_trace_flush:
+ rdpr %pstate, %o0
+ wrpr %o0, PSTATE_IE, %pstate
+
+ rdpr %cwp, %g1
+ rdpr %canrestore, %g2
+ sub %g1, 1, %g3
+
+1: brz,pn %g2, 2f
+ sub %g2, 1, %g2
+ wrpr %g3, %cwp
+ stx %fp, [%sp + STACK_BIAS + RW_V9_I6]
+ stx %i7, [%sp + STACK_BIAS + RW_V9_I7]
+ ba,pt %xcc, 1b
+ sub %g3, 1, %g3
+
+2: wrpr %g1, %cwp
+ wrpr %o0, %pstate
+
+ retl
+ nop
+ .size stack_trace_flush,.-stack_trace_flush
+
+#ifdef CONFIG_SMP
+ .globl hard_smp_processor_id
+ .type hard_smp_processor_id,#function
+hard_smp_processor_id:
+#endif
+ .globl real_hard_smp_processor_id
+ .type real_hard_smp_processor_id,#function
+real_hard_smp_processor_id:
+ __GET_CPUID(%o0)
+ retl
+ nop
+#ifdef CONFIG_SMP
+ .size hard_smp_processor_id,.-hard_smp_processor_id
+#endif
+ .size real_hard_smp_processor_id,.-real_hard_smp_processor_id
diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c
new file mode 100644
index 000000000000..1d272c3b5740
--- /dev/null
+++ b/arch/sparc/kernel/hvapi.c
@@ -0,0 +1,193 @@
+/* hvapi.c: Hypervisor API management.
+ *
+ * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+
+#include <asm/hypervisor.h>
+#include <asm/oplib.h>
+
+/* If the hypervisor indicates that the API setting
+ * calls are unsupported, by returning HV_EBADTRAP or
+ * HV_ENOTSUPPORTED, we assume that API groups with the
+ * PRE_API flag set are major 1 minor 0.
+ */
+struct api_info {
+ unsigned long group;
+ unsigned long major;
+ unsigned long minor;
+ unsigned int refcnt;
+ unsigned int flags;
+#define FLAG_PRE_API 0x00000001
+};
+
+static struct api_info api_table[] = {
+ { .group = HV_GRP_SUN4V, .flags = FLAG_PRE_API },
+ { .group = HV_GRP_CORE, .flags = FLAG_PRE_API },
+ { .group = HV_GRP_INTR, },
+ { .group = HV_GRP_SOFT_STATE, },
+ { .group = HV_GRP_PCI, .flags = FLAG_PRE_API },
+ { .group = HV_GRP_LDOM, },
+ { .group = HV_GRP_SVC_CHAN, .flags = FLAG_PRE_API },
+ { .group = HV_GRP_NCS, .flags = FLAG_PRE_API },
+ { .group = HV_GRP_RNG, },
+ { .group = HV_GRP_NIAG_PERF, .flags = FLAG_PRE_API },
+ { .group = HV_GRP_FIRE_PERF, },
+ { .group = HV_GRP_N2_CPU, },
+ { .group = HV_GRP_NIU, },
+ { .group = HV_GRP_VF_CPU, },
+ { .group = HV_GRP_DIAG, .flags = FLAG_PRE_API },
+};
+
+static DEFINE_SPINLOCK(hvapi_lock);
+
+static struct api_info *__get_info(unsigned long group)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(api_table); i++) {
+ if (api_table[i].group == group)
+ return &api_table[i];
+ }
+ return NULL;
+}
+
+static void __get_ref(struct api_info *p)
+{
+ p->refcnt++;
+}
+
+static void __put_ref(struct api_info *p)
+{
+ if (--p->refcnt == 0) {
+ unsigned long ignore;
+
+ sun4v_set_version(p->group, 0, 0, &ignore);
+ p->major = p->minor = 0;
+ }
+}
+
+/* Register a hypervisor API specification. It indicates the
+ * API group and desired major+minor.
+ *
+ * If an existing API registration exists '0' (success) will
+ * be returned if it is compatible with the one being registered.
+ * Otherwise a negative error code will be returned.
+ *
+ * Otherwise an attempt will be made to negotiate the requested
+ * API group/major/minor with the hypervisor, and errors returned
+ * if that does not succeed.
+ */
+int sun4v_hvapi_register(unsigned long group, unsigned long major,
+ unsigned long *minor)
+{
+ struct api_info *p;
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&hvapi_lock, flags);
+ p = __get_info(group);
+ ret = -EINVAL;
+ if (p) {
+ if (p->refcnt) {
+ ret = -EINVAL;
+ if (p->major == major) {
+ *minor = p->minor;
+ ret = 0;
+ }
+ } else {
+ unsigned long actual_minor;
+ unsigned long hv_ret;
+
+ hv_ret = sun4v_set_version(group, major, *minor,
+ &actual_minor);
+ ret = -EINVAL;
+ if (hv_ret == HV_EOK) {
+ *minor = actual_minor;
+ p->major = major;
+ p->minor = actual_minor;
+ ret = 0;
+ } else if (hv_ret == HV_EBADTRAP ||
+ hv_ret == HV_ENOTSUPPORTED) {
+ if (p->flags & FLAG_PRE_API) {
+ if (major == 1) {
+ p->major = 1;
+ p->minor = 0;
+ *minor = 0;
+ ret = 0;
+ }
+ }
+ }
+ }
+
+ if (ret == 0)
+ __get_ref(p);
+ }
+ spin_unlock_irqrestore(&hvapi_lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(sun4v_hvapi_register);
+
+void sun4v_hvapi_unregister(unsigned long group)
+{
+ struct api_info *p;
+ unsigned long flags;
+
+ spin_lock_irqsave(&hvapi_lock, flags);
+ p = __get_info(group);
+ if (p)
+ __put_ref(p);
+ spin_unlock_irqrestore(&hvapi_lock, flags);
+}
+EXPORT_SYMBOL(sun4v_hvapi_unregister);
+
+int sun4v_hvapi_get(unsigned long group,
+ unsigned long *major,
+ unsigned long *minor)
+{
+ struct api_info *p;
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&hvapi_lock, flags);
+ ret = -EINVAL;
+ p = __get_info(group);
+ if (p && p->refcnt) {
+ *major = p->major;
+ *minor = p->minor;
+ ret = 0;
+ }
+ spin_unlock_irqrestore(&hvapi_lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(sun4v_hvapi_get);
+
+void __init sun4v_hvapi_init(void)
+{
+ unsigned long group, major, minor;
+
+ group = HV_GRP_SUN4V;
+ major = 1;
+ minor = 0;
+ if (sun4v_hvapi_register(group, major, &minor))
+ goto bad;
+
+ group = HV_GRP_CORE;
+ major = 1;
+ minor = 1;
+ if (sun4v_hvapi_register(group, major, &minor))
+ goto bad;
+
+ return;
+
+bad:
+ prom_printf("HVAPI: Cannot register API group "
+ "%lx with major(%u) minor(%u)\n",
+ group, major, minor);
+ prom_halt();
+}
diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S
new file mode 100644
index 000000000000..8a5f35ffb15e
--- /dev/null
+++ b/arch/sparc/kernel/hvcalls.S
@@ -0,0 +1,800 @@
+ /* %o0: devhandle
+ * %o1: devino
+ *
+ * returns %o0: sysino
+ */
+ENTRY(sun4v_devino_to_sysino)
+ mov HV_FAST_INTR_DEVINO2SYSINO, %o5
+ ta HV_FAST_TRAP
+ retl
+ mov %o1, %o0
+ENDPROC(sun4v_devino_to_sysino)
+
+ /* %o0: sysino
+ *
+ * returns %o0: intr_enabled (HV_INTR_{DISABLED,ENABLED})
+ */
+ENTRY(sun4v_intr_getenabled)
+ mov HV_FAST_INTR_GETENABLED, %o5
+ ta HV_FAST_TRAP
+ retl
+ mov %o1, %o0
+ENDPROC(sun4v_intr_getenabled)
+
+ /* %o0: sysino
+ * %o1: intr_enabled (HV_INTR_{DISABLED,ENABLED})
+ */
+ENTRY(sun4v_intr_setenabled)
+ mov HV_FAST_INTR_SETENABLED, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_intr_setenabled)
+
+ /* %o0: sysino
+ *
+ * returns %o0: intr_state (HV_INTR_STATE_*)
+ */
+ENTRY(sun4v_intr_getstate)
+ mov HV_FAST_INTR_GETSTATE, %o5
+ ta HV_FAST_TRAP
+ retl
+ mov %o1, %o0
+ENDPROC(sun4v_intr_getstate)
+
+ /* %o0: sysino
+ * %o1: intr_state (HV_INTR_STATE_*)
+ */
+ENTRY(sun4v_intr_setstate)
+ mov HV_FAST_INTR_SETSTATE, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_intr_setstate)
+
+ /* %o0: sysino
+ *
+ * returns %o0: cpuid
+ */
+ENTRY(sun4v_intr_gettarget)
+ mov HV_FAST_INTR_GETTARGET, %o5
+ ta HV_FAST_TRAP
+ retl
+ mov %o1, %o0
+ENDPROC(sun4v_intr_gettarget)
+
+ /* %o0: sysino
+ * %o1: cpuid
+ */
+ENTRY(sun4v_intr_settarget)
+ mov HV_FAST_INTR_SETTARGET, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_intr_settarget)
+
+ /* %o0: cpuid
+ * %o1: pc
+ * %o2: rtba
+ * %o3: arg0
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_cpu_start)
+ mov HV_FAST_CPU_START, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_cpu_start)
+
+ /* %o0: cpuid
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_cpu_stop)
+ mov HV_FAST_CPU_STOP, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_cpu_stop)
+
+ /* returns %o0: status */
+ENTRY(sun4v_cpu_yield)
+ mov HV_FAST_CPU_YIELD, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_cpu_yield)
+
+ /* %o0: type
+ * %o1: queue paddr
+ * %o2: num queue entries
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_cpu_qconf)
+ mov HV_FAST_CPU_QCONF, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_cpu_qconf)
+
+ /* %o0: num cpus in cpu list
+ * %o1: cpu list paddr
+ * %o2: mondo block paddr
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_cpu_mondo_send)
+ mov HV_FAST_CPU_MONDO_SEND, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_cpu_mondo_send)
+
+ /* %o0: CPU ID
+ *
+ * returns %o0: -status if status non-zero, else
+ * %o0: cpu state as HV_CPU_STATE_*
+ */
+ENTRY(sun4v_cpu_state)
+ mov HV_FAST_CPU_STATE, %o5
+ ta HV_FAST_TRAP
+ brnz,pn %o0, 1f
+ sub %g0, %o0, %o0
+ mov %o1, %o0
+1: retl
+ nop
+ENDPROC(sun4v_cpu_state)
+
+ /* %o0: virtual address
+ * %o1: must be zero
+ * %o2: TTE
+ * %o3: HV_MMU_* flags
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_mmu_map_perm_addr)
+ mov HV_FAST_MMU_MAP_PERM_ADDR, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_mmu_map_perm_addr)
+
+ /* %o0: number of TSB descriptions
+ * %o1: TSB descriptions real address
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_mmu_tsb_ctx0)
+ mov HV_FAST_MMU_TSB_CTX0, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_mmu_tsb_ctx0)
+
+ /* %o0: API group number
+ * %o1: pointer to unsigned long major number storage
+ * %o2: pointer to unsigned long minor number storage
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_get_version)
+ mov HV_CORE_GET_VER, %o5
+ mov %o1, %o3
+ mov %o2, %o4
+ ta HV_CORE_TRAP
+ stx %o1, [%o3]
+ retl
+ stx %o2, [%o4]
+ENDPROC(sun4v_get_version)
+
+ /* %o0: API group number
+ * %o1: desired major number
+ * %o2: desired minor number
+ * %o3: pointer to unsigned long actual minor number storage
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_set_version)
+ mov HV_CORE_SET_VER, %o5
+ mov %o3, %o4
+ ta HV_CORE_TRAP
+ retl
+ stx %o1, [%o4]
+ENDPROC(sun4v_set_version)
+
+ /* %o0: pointer to unsigned long time
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_tod_get)
+ mov %o0, %o4
+ mov HV_FAST_TOD_GET, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%o4]
+ retl
+ nop
+ENDPROC(sun4v_tod_get)
+
+ /* %o0: time
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_tod_set)
+ mov HV_FAST_TOD_SET, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_tod_set)
+
+ /* %o0: pointer to unsigned long status
+ *
+ * returns %o0: signed character
+ */
+ENTRY(sun4v_con_getchar)
+ mov %o0, %o4
+ mov HV_FAST_CONS_GETCHAR, %o5
+ clr %o0
+ clr %o1
+ ta HV_FAST_TRAP
+ stx %o0, [%o4]
+ retl
+ sra %o1, 0, %o0
+ENDPROC(sun4v_con_getchar)
+
+ /* %o0: signed long character
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_con_putchar)
+ mov HV_FAST_CONS_PUTCHAR, %o5
+ ta HV_FAST_TRAP
+ retl
+ sra %o0, 0, %o0
+ENDPROC(sun4v_con_putchar)
+
+ /* %o0: buffer real address
+ * %o1: buffer size
+ * %o2: pointer to unsigned long bytes_read
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_con_read)
+ mov %o2, %o4
+ mov HV_FAST_CONS_READ, %o5
+ ta HV_FAST_TRAP
+ brnz %o0, 1f
+ cmp %o1, -1 /* break */
+ be,a,pn %icc, 1f
+ mov %o1, %o0
+ cmp %o1, -2 /* hup */
+ be,a,pn %icc, 1f
+ mov %o1, %o0
+ stx %o1, [%o4]
+1: retl
+ nop
+ENDPROC(sun4v_con_read)
+
+ /* %o0: buffer real address
+ * %o1: buffer size
+ * %o2: pointer to unsigned long bytes_written
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_con_write)
+ mov %o2, %o4
+ mov HV_FAST_CONS_WRITE, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%o4]
+ retl
+ nop
+ENDPROC(sun4v_con_write)
+
+ /* %o0: soft state
+ * %o1: address of description string
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_mach_set_soft_state)
+ mov HV_FAST_MACH_SET_SOFT_STATE, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_mach_set_soft_state)
+
+ /* %o0: exit code
+ *
+ * Does not return.
+ */
+ENTRY(sun4v_mach_exit)
+ mov HV_FAST_MACH_EXIT, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_mach_exit)
+
+ /* %o0: buffer real address
+ * %o1: buffer length
+ * %o2: pointer to unsigned long real_buf_len
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_mach_desc)
+ mov %o2, %o4
+ mov HV_FAST_MACH_DESC, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%o4]
+ retl
+ nop
+ENDPROC(sun4v_mach_desc)
+
+ /* %o0: new timeout in milliseconds
+ * %o1: pointer to unsigned long orig_timeout
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_mach_set_watchdog)
+ mov %o1, %o4
+ mov HV_FAST_MACH_SET_WATCHDOG, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%o4]
+ retl
+ nop
+ENDPROC(sun4v_mach_set_watchdog)
+
+ /* No inputs and does not return. */
+ENTRY(sun4v_mach_sir)
+ mov %o1, %o4
+ mov HV_FAST_MACH_SIR, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%o4]
+ retl
+ nop
+ENDPROC(sun4v_mach_sir)
+
+ /* %o0: channel
+ * %o1: ra
+ * %o2: num_entries
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_ldc_tx_qconf)
+ mov HV_FAST_LDC_TX_QCONF, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_ldc_tx_qconf)
+
+ /* %o0: channel
+ * %o1: pointer to unsigned long ra
+ * %o2: pointer to unsigned long num_entries
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_ldc_tx_qinfo)
+ mov %o1, %g1
+ mov %o2, %g2
+ mov HV_FAST_LDC_TX_QINFO, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%g1]
+ stx %o2, [%g2]
+ retl
+ nop
+ENDPROC(sun4v_ldc_tx_qinfo)
+
+ /* %o0: channel
+ * %o1: pointer to unsigned long head_off
+ * %o2: pointer to unsigned long tail_off
+ * %o2: pointer to unsigned long chan_state
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_ldc_tx_get_state)
+ mov %o1, %g1
+ mov %o2, %g2
+ mov %o3, %g3
+ mov HV_FAST_LDC_TX_GET_STATE, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%g1]
+ stx %o2, [%g2]
+ stx %o3, [%g3]
+ retl
+ nop
+ENDPROC(sun4v_ldc_tx_get_state)
+
+ /* %o0: channel
+ * %o1: tail_off
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_ldc_tx_set_qtail)
+ mov HV_FAST_LDC_TX_SET_QTAIL, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_ldc_tx_set_qtail)
+
+ /* %o0: channel
+ * %o1: ra
+ * %o2: num_entries
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_ldc_rx_qconf)
+ mov HV_FAST_LDC_RX_QCONF, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_ldc_rx_qconf)
+
+ /* %o0: channel
+ * %o1: pointer to unsigned long ra
+ * %o2: pointer to unsigned long num_entries
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_ldc_rx_qinfo)
+ mov %o1, %g1
+ mov %o2, %g2
+ mov HV_FAST_LDC_RX_QINFO, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%g1]
+ stx %o2, [%g2]
+ retl
+ nop
+ENDPROC(sun4v_ldc_rx_qinfo)
+
+ /* %o0: channel
+ * %o1: pointer to unsigned long head_off
+ * %o2: pointer to unsigned long tail_off
+ * %o2: pointer to unsigned long chan_state
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_ldc_rx_get_state)
+ mov %o1, %g1
+ mov %o2, %g2
+ mov %o3, %g3
+ mov HV_FAST_LDC_RX_GET_STATE, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%g1]
+ stx %o2, [%g2]
+ stx %o3, [%g3]
+ retl
+ nop
+ENDPROC(sun4v_ldc_rx_get_state)
+
+ /* %o0: channel
+ * %o1: head_off
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_ldc_rx_set_qhead)
+ mov HV_FAST_LDC_RX_SET_QHEAD, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_ldc_rx_set_qhead)
+
+ /* %o0: channel
+ * %o1: ra
+ * %o2: num_entries
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_ldc_set_map_table)
+ mov HV_FAST_LDC_SET_MAP_TABLE, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_ldc_set_map_table)
+
+ /* %o0: channel
+ * %o1: pointer to unsigned long ra
+ * %o2: pointer to unsigned long num_entries
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_ldc_get_map_table)
+ mov %o1, %g1
+ mov %o2, %g2
+ mov HV_FAST_LDC_GET_MAP_TABLE, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%g1]
+ stx %o2, [%g2]
+ retl
+ nop
+ENDPROC(sun4v_ldc_get_map_table)
+
+ /* %o0: channel
+ * %o1: dir_code
+ * %o2: tgt_raddr
+ * %o3: lcl_raddr
+ * %o4: len
+ * %o5: pointer to unsigned long actual_len
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_ldc_copy)
+ mov %o5, %g1
+ mov HV_FAST_LDC_COPY, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%g1]
+ retl
+ nop
+ENDPROC(sun4v_ldc_copy)
+
+ /* %o0: channel
+ * %o1: cookie
+ * %o2: pointer to unsigned long ra
+ * %o3: pointer to unsigned long perm
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_ldc_mapin)
+ mov %o2, %g1
+ mov %o3, %g2
+ mov HV_FAST_LDC_MAPIN, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%g1]
+ stx %o2, [%g2]
+ retl
+ nop
+ENDPROC(sun4v_ldc_mapin)
+
+ /* %o0: ra
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_ldc_unmap)
+ mov HV_FAST_LDC_UNMAP, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_ldc_unmap)
+
+ /* %o0: channel
+ * %o1: cookie
+ * %o2: mte_cookie
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_ldc_revoke)
+ mov HV_FAST_LDC_REVOKE, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_ldc_revoke)
+
+ /* %o0: device handle
+ * %o1: device INO
+ * %o2: pointer to unsigned long cookie
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_vintr_get_cookie)
+ mov %o2, %g1
+ mov HV_FAST_VINTR_GET_COOKIE, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%g1]
+ retl
+ nop
+ENDPROC(sun4v_vintr_get_cookie)
+
+ /* %o0: device handle
+ * %o1: device INO
+ * %o2: cookie
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_vintr_set_cookie)
+ mov HV_FAST_VINTR_SET_COOKIE, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_vintr_set_cookie)
+
+ /* %o0: device handle
+ * %o1: device INO
+ * %o2: pointer to unsigned long valid_state
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_vintr_get_valid)
+ mov %o2, %g1
+ mov HV_FAST_VINTR_GET_VALID, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%g1]
+ retl
+ nop
+ENDPROC(sun4v_vintr_get_valid)
+
+ /* %o0: device handle
+ * %o1: device INO
+ * %o2: valid_state
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_vintr_set_valid)
+ mov HV_FAST_VINTR_SET_VALID, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_vintr_set_valid)
+
+ /* %o0: device handle
+ * %o1: device INO
+ * %o2: pointer to unsigned long state
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_vintr_get_state)
+ mov %o2, %g1
+ mov HV_FAST_VINTR_GET_STATE, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%g1]
+ retl
+ nop
+ENDPROC(sun4v_vintr_get_state)
+
+ /* %o0: device handle
+ * %o1: device INO
+ * %o2: state
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_vintr_set_state)
+ mov HV_FAST_VINTR_SET_STATE, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_vintr_set_state)
+
+ /* %o0: device handle
+ * %o1: device INO
+ * %o2: pointer to unsigned long cpuid
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_vintr_get_target)
+ mov %o2, %g1
+ mov HV_FAST_VINTR_GET_TARGET, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%g1]
+ retl
+ nop
+ENDPROC(sun4v_vintr_get_target)
+
+ /* %o0: device handle
+ * %o1: device INO
+ * %o2: cpuid
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_vintr_set_target)
+ mov HV_FAST_VINTR_SET_TARGET, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_vintr_set_target)
+
+ /* %o0: NCS sub-function
+ * %o1: sub-function arg real-address
+ * %o2: sub-function arg size
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_ncs_request)
+ mov HV_FAST_NCS_REQUEST, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_ncs_request)
+
+ENTRY(sun4v_svc_send)
+ save %sp, -192, %sp
+ mov %i0, %o0
+ mov %i1, %o1
+ mov %i2, %o2
+ mov HV_FAST_SVC_SEND, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%i3]
+ ret
+ restore
+ENDPROC(sun4v_svc_send)
+
+ENTRY(sun4v_svc_recv)
+ save %sp, -192, %sp
+ mov %i0, %o0
+ mov %i1, %o1
+ mov %i2, %o2
+ mov HV_FAST_SVC_RECV, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%i3]
+ ret
+ restore
+ENDPROC(sun4v_svc_recv)
+
+ENTRY(sun4v_svc_getstatus)
+ mov HV_FAST_SVC_GETSTATUS, %o5
+ mov %o1, %o4
+ ta HV_FAST_TRAP
+ stx %o1, [%o4]
+ retl
+ nop
+ENDPROC(sun4v_svc_getstatus)
+
+ENTRY(sun4v_svc_setstatus)
+ mov HV_FAST_SVC_SETSTATUS, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_svc_setstatus)
+
+ENTRY(sun4v_svc_clrstatus)
+ mov HV_FAST_SVC_CLRSTATUS, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_svc_clrstatus)
+
+ENTRY(sun4v_mmustat_conf)
+ mov %o1, %o4
+ mov HV_FAST_MMUSTAT_CONF, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%o4]
+ retl
+ nop
+ENDPROC(sun4v_mmustat_conf)
+
+ENTRY(sun4v_mmustat_info)
+ mov %o0, %o4
+ mov HV_FAST_MMUSTAT_INFO, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%o4]
+ retl
+ nop
+ENDPROC(sun4v_mmustat_info)
+
+ENTRY(sun4v_mmu_demap_all)
+ clr %o0
+ clr %o1
+ mov HV_MMU_ALL, %o2
+ mov HV_FAST_MMU_DEMAP_ALL, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_mmu_demap_all)
+
+ENTRY(sun4v_niagara_getperf)
+ mov %o0, %o4
+ mov HV_FAST_GET_PERFREG, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%o4]
+ retl
+ nop
+ENDPROC(sun4v_niagara_getperf)
+
+ENTRY(sun4v_niagara_setperf)
+ mov HV_FAST_SET_PERFREG, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_niagara_setperf)
+
+ENTRY(sun4v_niagara2_getperf)
+ mov %o0, %o4
+ mov HV_FAST_N2_GET_PERFREG, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%o4]
+ retl
+ nop
+ENDPROC(sun4v_niagara2_getperf)
+
+ENTRY(sun4v_niagara2_setperf)
+ mov HV_FAST_N2_SET_PERFREG, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_niagara2_setperf)
diff --git a/arch/sparc/kernel/hvtramp.S b/arch/sparc/kernel/hvtramp.S
new file mode 100644
index 000000000000..9365432904d6
--- /dev/null
+++ b/arch/sparc/kernel/hvtramp.S
@@ -0,0 +1,140 @@
+/* hvtramp.S: Hypervisor start-cpu trampoline code.
+ *
+ * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
+ */
+
+#include <linux/init.h>
+
+#include <asm/thread_info.h>
+#include <asm/hypervisor.h>
+#include <asm/scratchpad.h>
+#include <asm/spitfire.h>
+#include <asm/hvtramp.h>
+#include <asm/pstate.h>
+#include <asm/ptrace.h>
+#include <asm/head.h>
+#include <asm/asi.h>
+#include <asm/pil.h>
+
+ __CPUINIT
+ .align 8
+ .globl hv_cpu_startup, hv_cpu_startup_end
+
+ /* This code executes directly out of the hypervisor
+ * with physical addressing (va==pa). %o0 contains
+ * our client argument which for Linux points to
+ * a descriptor data structure which defines the
+ * MMU entries we need to load up.
+ *
+ * After we set things up we enable the MMU and call
+ * into the kernel.
+ *
+ * First setup basic privileged cpu state.
+ */
+hv_cpu_startup:
+ SET_GL(0)
+ wrpr %g0, PIL_NORMAL_MAX, %pil
+ wrpr %g0, 0, %canrestore
+ wrpr %g0, 0, %otherwin
+ wrpr %g0, 6, %cansave
+ wrpr %g0, 6, %cleanwin
+ wrpr %g0, 0, %cwp
+ wrpr %g0, 0, %wstate
+ wrpr %g0, 0, %tl
+
+ sethi %hi(sparc64_ttable_tl0), %g1
+ wrpr %g1, %tba
+
+ mov %o0, %l0
+
+ lduw [%l0 + HVTRAMP_DESCR_CPU], %g1
+ mov SCRATCHPAD_CPUID, %g2
+ stxa %g1, [%g2] ASI_SCRATCHPAD
+
+ ldx [%l0 + HVTRAMP_DESCR_FAULT_INFO_VA], %g2
+ stxa %g2, [%g0] ASI_SCRATCHPAD
+
+ mov 0, %l1
+ lduw [%l0 + HVTRAMP_DESCR_NUM_MAPPINGS], %l2
+ add %l0, HVTRAMP_DESCR_MAPS, %l3
+
+1: ldx [%l3 + HVTRAMP_MAPPING_VADDR], %o0
+ clr %o1
+ ldx [%l3 + HVTRAMP_MAPPING_TTE], %o2
+ mov HV_MMU_IMMU | HV_MMU_DMMU, %o3
+ mov HV_FAST_MMU_MAP_PERM_ADDR, %o5
+ ta HV_FAST_TRAP
+
+ brnz,pn %o0, 80f
+ nop
+
+ add %l1, 1, %l1
+ cmp %l1, %l2
+ blt,a,pt %xcc, 1b
+ add %l3, HVTRAMP_MAPPING_SIZE, %l3
+
+ ldx [%l0 + HVTRAMP_DESCR_FAULT_INFO_PA], %o0
+ mov HV_FAST_MMU_FAULT_AREA_CONF, %o5
+ ta HV_FAST_TRAP
+
+ brnz,pn %o0, 80f
+ nop
+
+ wrpr %g0, (PSTATE_PRIV | PSTATE_PEF), %pstate
+
+ ldx [%l0 + HVTRAMP_DESCR_THREAD_REG], %l6
+
+ mov 1, %o0
+ set 1f, %o1
+ mov HV_FAST_MMU_ENABLE, %o5
+ ta HV_FAST_TRAP
+
+ ba,pt %xcc, 80f
+ nop
+
+1:
+ wr %g0, 0, %fprs
+ wr %g0, ASI_P, %asi
+
+ mov PRIMARY_CONTEXT, %g7
+ stxa %g0, [%g7] ASI_MMU
+ membar #Sync
+
+ mov SECONDARY_CONTEXT, %g7
+ stxa %g0, [%g7] ASI_MMU
+ membar #Sync
+
+ mov %l6, %g6
+ ldx [%g6 + TI_TASK], %g4
+
+ mov 1, %g5
+ sllx %g5, THREAD_SHIFT, %g5
+ sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5
+ add %g6, %g5, %sp
+ mov 0, %fp
+
+ call init_irqwork_curcpu
+ nop
+ call hard_smp_processor_id
+ nop
+
+ call sun4v_register_mondo_queues
+ nop
+
+ call init_cur_cpu_trap
+ mov %g6, %o0
+
+ wrpr %g0, (PSTATE_PRIV | PSTATE_PEF | PSTATE_IE), %pstate
+
+ call smp_callin
+ nop
+ call cpu_idle
+ mov 0, %o0
+ call cpu_panic
+ nop
+
+80: ba,pt %xcc, 80b
+ nop
+
+ .align 8
+hv_cpu_startup_end:
diff --git a/arch/sparc/kernel/idprom_64.c b/arch/sparc/kernel/idprom_64.c
new file mode 100644
index 000000000000..5b45a808c621
--- /dev/null
+++ b/arch/sparc/kernel/idprom_64.c
@@ -0,0 +1,49 @@
+/*
+ * idprom.c: Routines to load the idprom into kernel addresses and
+ * interpret the data contained within.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
+
+#include <asm/oplib.h>
+#include <asm/idprom.h>
+
+struct idprom *idprom;
+static struct idprom idprom_buffer;
+
+/* Calculate the IDPROM checksum (xor of the data bytes). */
+static unsigned char __init calc_idprom_cksum(struct idprom *idprom)
+{
+ unsigned char cksum, i, *ptr = (unsigned char *)idprom;
+
+ for (i = cksum = 0; i <= 0x0E; i++)
+ cksum ^= *ptr++;
+
+ return cksum;
+}
+
+/* Create a local IDPROM copy and verify integrity. */
+void __init idprom_init(void)
+{
+ prom_get_idprom((char *) &idprom_buffer, sizeof(idprom_buffer));
+
+ idprom = &idprom_buffer;
+
+ if (idprom->id_format != 0x01) {
+ prom_printf("IDPROM: Warning, unknown format type!\n");
+ }
+
+ if (idprom->id_cksum != calc_idprom_cksum(idprom)) {
+ prom_printf("IDPROM: Warning, checksum failure (nvram=%x, calc=%x)!\n",
+ idprom->id_cksum, calc_idprom_cksum(idprom));
+ }
+
+ printk("Ethernet address: %02x:%02x:%02x:%02x:%02x:%02x\n",
+ idprom->id_ethaddr[0], idprom->id_ethaddr[1],
+ idprom->id_ethaddr[2], idprom->id_ethaddr[3],
+ idprom->id_ethaddr[4], idprom->id_ethaddr[5]);
+}
diff --git a/arch/sparc/kernel/init_task_64.c b/arch/sparc/kernel/init_task_64.c
new file mode 100644
index 000000000000..d2b312381c19
--- /dev/null
+++ b/arch/sparc/kernel/init_task_64.c
@@ -0,0 +1,35 @@
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/init_task.h>
+#include <linux/mqueue.h>
+
+#include <asm/pgtable.h>
+#include <asm/uaccess.h>
+#include <asm/processor.h>
+
+static struct fs_struct init_fs = INIT_FS;
+static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
+static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
+struct mm_struct init_mm = INIT_MM(init_mm);
+
+EXPORT_SYMBOL(init_mm);
+
+/* .text section in head.S is aligned at 2 page boundary and this gets linked
+ * right after that so that the init_thread_union is aligned properly as well.
+ * We really don't need this special alignment like the Intel does, but
+ * I do it anyways for completeness.
+ */
+__asm__ (".text");
+union thread_union init_thread_union = { INIT_THREAD_INFO(init_task) };
+
+/*
+ * Initial task structure.
+ *
+ * All other task structs will be allocated on slabs in fork.c
+ */
+EXPORT_SYMBOL(init_task);
+
+__asm__(".data");
+struct task_struct init_task = INIT_TASK(init_task);
diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c
new file mode 100644
index 000000000000..1cc1995531e2
--- /dev/null
+++ b/arch/sparc/kernel/iommu.c
@@ -0,0 +1,866 @@
+/* iommu.c: Generic sparc64 IOMMU support.
+ *
+ * Copyright (C) 1999, 2007, 2008 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1999, 2000 Jakub Jelinek (jakub@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/errno.h>
+#include <linux/iommu-helper.h>
+
+#ifdef CONFIG_PCI
+#include <linux/pci.h>
+#endif
+
+#include <asm/iommu.h>
+
+#include "iommu_common.h"
+
+#define STC_CTXMATCH_ADDR(STC, CTX) \
+ ((STC)->strbuf_ctxmatch_base + ((CTX) << 3))
+#define STC_FLUSHFLAG_INIT(STC) \
+ (*((STC)->strbuf_flushflag) = 0UL)
+#define STC_FLUSHFLAG_SET(STC) \
+ (*((STC)->strbuf_flushflag) != 0UL)
+
+#define iommu_read(__reg) \
+({ u64 __ret; \
+ __asm__ __volatile__("ldxa [%1] %2, %0" \
+ : "=r" (__ret) \
+ : "r" (__reg), "i" (ASI_PHYS_BYPASS_EC_E) \
+ : "memory"); \
+ __ret; \
+})
+#define iommu_write(__reg, __val) \
+ __asm__ __volatile__("stxa %0, [%1] %2" \
+ : /* no outputs */ \
+ : "r" (__val), "r" (__reg), \
+ "i" (ASI_PHYS_BYPASS_EC_E))
+
+/* Must be invoked under the IOMMU lock. */
+static void iommu_flushall(struct iommu *iommu)
+{
+ if (iommu->iommu_flushinv) {
+ iommu_write(iommu->iommu_flushinv, ~(u64)0);
+ } else {
+ unsigned long tag;
+ int entry;
+
+ tag = iommu->iommu_tags;
+ for (entry = 0; entry < 16; entry++) {
+ iommu_write(tag, 0);
+ tag += 8;
+ }
+
+ /* Ensure completion of previous PIO writes. */
+ (void) iommu_read(iommu->write_complete_reg);
+ }
+}
+
+#define IOPTE_CONSISTENT(CTX) \
+ (IOPTE_VALID | IOPTE_CACHE | \
+ (((CTX) << 47) & IOPTE_CONTEXT))
+
+#define IOPTE_STREAMING(CTX) \
+ (IOPTE_CONSISTENT(CTX) | IOPTE_STBUF)
+
+/* Existing mappings are never marked invalid, instead they
+ * are pointed to a dummy page.
+ */
+#define IOPTE_IS_DUMMY(iommu, iopte) \
+ ((iopte_val(*iopte) & IOPTE_PAGE) == (iommu)->dummy_page_pa)
+
+static inline void iopte_make_dummy(struct iommu *iommu, iopte_t *iopte)
+{
+ unsigned long val = iopte_val(*iopte);
+
+ val &= ~IOPTE_PAGE;
+ val |= iommu->dummy_page_pa;
+
+ iopte_val(*iopte) = val;
+}
+
+/* Based almost entirely upon the ppc64 iommu allocator. If you use the 'handle'
+ * facility it must all be done in one pass while under the iommu lock.
+ *
+ * On sun4u platforms, we only flush the IOMMU once every time we've passed
+ * over the entire page table doing allocations. Therefore we only ever advance
+ * the hint and cannot backtrack it.
+ */
+unsigned long iommu_range_alloc(struct device *dev,
+ struct iommu *iommu,
+ unsigned long npages,
+ unsigned long *handle)
+{
+ unsigned long n, end, start, limit, boundary_size;
+ struct iommu_arena *arena = &iommu->arena;
+ int pass = 0;
+
+ /* This allocator was derived from x86_64's bit string search */
+
+ /* Sanity check */
+ if (unlikely(npages == 0)) {
+ if (printk_ratelimit())
+ WARN_ON(1);
+ return DMA_ERROR_CODE;
+ }
+
+ if (handle && *handle)
+ start = *handle;
+ else
+ start = arena->hint;
+
+ limit = arena->limit;
+
+ /* The case below can happen if we have a small segment appended
+ * to a large, or when the previous alloc was at the very end of
+ * the available space. If so, go back to the beginning and flush.
+ */
+ if (start >= limit) {
+ start = 0;
+ if (iommu->flush_all)
+ iommu->flush_all(iommu);
+ }
+
+ again:
+
+ if (dev)
+ boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
+ 1 << IO_PAGE_SHIFT);
+ else
+ boundary_size = ALIGN(1UL << 32, 1 << IO_PAGE_SHIFT);
+
+ n = iommu_area_alloc(arena->map, limit, start, npages,
+ iommu->page_table_map_base >> IO_PAGE_SHIFT,
+ boundary_size >> IO_PAGE_SHIFT, 0);
+ if (n == -1) {
+ if (likely(pass < 1)) {
+ /* First failure, rescan from the beginning. */
+ start = 0;
+ if (iommu->flush_all)
+ iommu->flush_all(iommu);
+ pass++;
+ goto again;
+ } else {
+ /* Second failure, give up */
+ return DMA_ERROR_CODE;
+ }
+ }
+
+ end = n + npages;
+
+ arena->hint = end;
+
+ /* Update handle for SG allocations */
+ if (handle)
+ *handle = end;
+
+ return n;
+}
+
+void iommu_range_free(struct iommu *iommu, dma_addr_t dma_addr, unsigned long npages)
+{
+ struct iommu_arena *arena = &iommu->arena;
+ unsigned long entry;
+
+ entry = (dma_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT;
+
+ iommu_area_free(arena->map, entry, npages);
+}
+
+int iommu_table_init(struct iommu *iommu, int tsbsize,
+ u32 dma_offset, u32 dma_addr_mask,
+ int numa_node)
+{
+ unsigned long i, order, sz, num_tsb_entries;
+ struct page *page;
+
+ num_tsb_entries = tsbsize / sizeof(iopte_t);
+
+ /* Setup initial software IOMMU state. */
+ spin_lock_init(&iommu->lock);
+ iommu->ctx_lowest_free = 1;
+ iommu->page_table_map_base = dma_offset;
+ iommu->dma_addr_mask = dma_addr_mask;
+
+ /* Allocate and initialize the free area map. */
+ sz = num_tsb_entries / 8;
+ sz = (sz + 7UL) & ~7UL;
+ iommu->arena.map = kmalloc_node(sz, GFP_KERNEL, numa_node);
+ if (!iommu->arena.map) {
+ printk(KERN_ERR "IOMMU: Error, kmalloc(arena.map) failed.\n");
+ return -ENOMEM;
+ }
+ memset(iommu->arena.map, 0, sz);
+ iommu->arena.limit = num_tsb_entries;
+
+ if (tlb_type != hypervisor)
+ iommu->flush_all = iommu_flushall;
+
+ /* Allocate and initialize the dummy page which we
+ * set inactive IO PTEs to point to.
+ */
+ page = alloc_pages_node(numa_node, GFP_KERNEL, 0);
+ if (!page) {
+ printk(KERN_ERR "IOMMU: Error, gfp(dummy_page) failed.\n");
+ goto out_free_map;
+ }
+ iommu->dummy_page = (unsigned long) page_address(page);
+ memset((void *)iommu->dummy_page, 0, PAGE_SIZE);
+ iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page);
+
+ /* Now allocate and setup the IOMMU page table itself. */
+ order = get_order(tsbsize);
+ page = alloc_pages_node(numa_node, GFP_KERNEL, order);
+ if (!page) {
+ printk(KERN_ERR "IOMMU: Error, gfp(tsb) failed.\n");
+ goto out_free_dummy_page;
+ }
+ iommu->page_table = (iopte_t *)page_address(page);
+
+ for (i = 0; i < num_tsb_entries; i++)
+ iopte_make_dummy(iommu, &iommu->page_table[i]);
+
+ return 0;
+
+out_free_dummy_page:
+ free_page(iommu->dummy_page);
+ iommu->dummy_page = 0UL;
+
+out_free_map:
+ kfree(iommu->arena.map);
+ iommu->arena.map = NULL;
+
+ return -ENOMEM;
+}
+
+static inline iopte_t *alloc_npages(struct device *dev, struct iommu *iommu,
+ unsigned long npages)
+{
+ unsigned long entry;
+
+ entry = iommu_range_alloc(dev, iommu, npages, NULL);
+ if (unlikely(entry == DMA_ERROR_CODE))
+ return NULL;
+
+ return iommu->page_table + entry;
+}
+
+static int iommu_alloc_ctx(struct iommu *iommu)
+{
+ int lowest = iommu->ctx_lowest_free;
+ int sz = IOMMU_NUM_CTXS - lowest;
+ int n = find_next_zero_bit(iommu->ctx_bitmap, sz, lowest);
+
+ if (unlikely(n == sz)) {
+ n = find_next_zero_bit(iommu->ctx_bitmap, lowest, 1);
+ if (unlikely(n == lowest)) {
+ printk(KERN_WARNING "IOMMU: Ran out of contexts.\n");
+ n = 0;
+ }
+ }
+ if (n)
+ __set_bit(n, iommu->ctx_bitmap);
+
+ return n;
+}
+
+static inline void iommu_free_ctx(struct iommu *iommu, int ctx)
+{
+ if (likely(ctx)) {
+ __clear_bit(ctx, iommu->ctx_bitmap);
+ if (ctx < iommu->ctx_lowest_free)
+ iommu->ctx_lowest_free = ctx;
+ }
+}
+
+static void *dma_4u_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_addrp, gfp_t gfp)
+{
+ unsigned long flags, order, first_page;
+ struct iommu *iommu;
+ struct page *page;
+ int npages, nid;
+ iopte_t *iopte;
+ void *ret;
+
+ size = IO_PAGE_ALIGN(size);
+ order = get_order(size);
+ if (order >= 10)
+ return NULL;
+
+ nid = dev->archdata.numa_node;
+ page = alloc_pages_node(nid, gfp, order);
+ if (unlikely(!page))
+ return NULL;
+
+ first_page = (unsigned long) page_address(page);
+ memset((char *)first_page, 0, PAGE_SIZE << order);
+
+ iommu = dev->archdata.iommu;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+ iopte = alloc_npages(dev, iommu, size >> IO_PAGE_SHIFT);
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+ if (unlikely(iopte == NULL)) {
+ free_pages(first_page, order);
+ return NULL;
+ }
+
+ *dma_addrp = (iommu->page_table_map_base +
+ ((iopte - iommu->page_table) << IO_PAGE_SHIFT));
+ ret = (void *) first_page;
+ npages = size >> IO_PAGE_SHIFT;
+ first_page = __pa(first_page);
+ while (npages--) {
+ iopte_val(*iopte) = (IOPTE_CONSISTENT(0UL) |
+ IOPTE_WRITE |
+ (first_page & IOPTE_PAGE));
+ iopte++;
+ first_page += IO_PAGE_SIZE;
+ }
+
+ return ret;
+}
+
+static void dma_4u_free_coherent(struct device *dev, size_t size,
+ void *cpu, dma_addr_t dvma)
+{
+ struct iommu *iommu;
+ iopte_t *iopte;
+ unsigned long flags, order, npages;
+
+ npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
+ iommu = dev->archdata.iommu;
+ iopte = iommu->page_table +
+ ((dvma - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
+
+ spin_lock_irqsave(&iommu->lock, flags);
+
+ iommu_range_free(iommu, dvma, npages);
+
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+ order = get_order(size);
+ if (order < 10)
+ free_pages((unsigned long)cpu, order);
+}
+
+static dma_addr_t dma_4u_map_single(struct device *dev, void *ptr, size_t sz,
+ enum dma_data_direction direction)
+{
+ struct iommu *iommu;
+ struct strbuf *strbuf;
+ iopte_t *base;
+ unsigned long flags, npages, oaddr;
+ unsigned long i, base_paddr, ctx;
+ u32 bus_addr, ret;
+ unsigned long iopte_protection;
+
+ iommu = dev->archdata.iommu;
+ strbuf = dev->archdata.stc;
+
+ if (unlikely(direction == DMA_NONE))
+ goto bad_no_ctx;
+
+ oaddr = (unsigned long)ptr;
+ npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
+ npages >>= IO_PAGE_SHIFT;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+ base = alloc_npages(dev, iommu, npages);
+ ctx = 0;
+ if (iommu->iommu_ctxflush)
+ ctx = iommu_alloc_ctx(iommu);
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+ if (unlikely(!base))
+ goto bad;
+
+ bus_addr = (iommu->page_table_map_base +
+ ((base - iommu->page_table) << IO_PAGE_SHIFT));
+ ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
+ base_paddr = __pa(oaddr & IO_PAGE_MASK);
+ if (strbuf->strbuf_enabled)
+ iopte_protection = IOPTE_STREAMING(ctx);
+ else
+ iopte_protection = IOPTE_CONSISTENT(ctx);
+ if (direction != DMA_TO_DEVICE)
+ iopte_protection |= IOPTE_WRITE;
+
+ for (i = 0; i < npages; i++, base++, base_paddr += IO_PAGE_SIZE)
+ iopte_val(*base) = iopte_protection | base_paddr;
+
+ return ret;
+
+bad:
+ iommu_free_ctx(iommu, ctx);
+bad_no_ctx:
+ if (printk_ratelimit())
+ WARN_ON(1);
+ return DMA_ERROR_CODE;
+}
+
+static void strbuf_flush(struct strbuf *strbuf, struct iommu *iommu,
+ u32 vaddr, unsigned long ctx, unsigned long npages,
+ enum dma_data_direction direction)
+{
+ int limit;
+
+ if (strbuf->strbuf_ctxflush &&
+ iommu->iommu_ctxflush) {
+ unsigned long matchreg, flushreg;
+ u64 val;
+
+ flushreg = strbuf->strbuf_ctxflush;
+ matchreg = STC_CTXMATCH_ADDR(strbuf, ctx);
+
+ iommu_write(flushreg, ctx);
+ val = iommu_read(matchreg);
+ val &= 0xffff;
+ if (!val)
+ goto do_flush_sync;
+
+ while (val) {
+ if (val & 0x1)
+ iommu_write(flushreg, ctx);
+ val >>= 1;
+ }
+ val = iommu_read(matchreg);
+ if (unlikely(val)) {
+ printk(KERN_WARNING "strbuf_flush: ctx flush "
+ "timeout matchreg[%lx] ctx[%lx]\n",
+ val, ctx);
+ goto do_page_flush;
+ }
+ } else {
+ unsigned long i;
+
+ do_page_flush:
+ for (i = 0; i < npages; i++, vaddr += IO_PAGE_SIZE)
+ iommu_write(strbuf->strbuf_pflush, vaddr);
+ }
+
+do_flush_sync:
+ /* If the device could not have possibly put dirty data into
+ * the streaming cache, no flush-flag synchronization needs
+ * to be performed.
+ */
+ if (direction == DMA_TO_DEVICE)
+ return;
+
+ STC_FLUSHFLAG_INIT(strbuf);
+ iommu_write(strbuf->strbuf_fsync, strbuf->strbuf_flushflag_pa);
+ (void) iommu_read(iommu->write_complete_reg);
+
+ limit = 100000;
+ while (!STC_FLUSHFLAG_SET(strbuf)) {
+ limit--;
+ if (!limit)
+ break;
+ udelay(1);
+ rmb();
+ }
+ if (!limit)
+ printk(KERN_WARNING "strbuf_flush: flushflag timeout "
+ "vaddr[%08x] ctx[%lx] npages[%ld]\n",
+ vaddr, ctx, npages);
+}
+
+static void dma_4u_unmap_single(struct device *dev, dma_addr_t bus_addr,
+ size_t sz, enum dma_data_direction direction)
+{
+ struct iommu *iommu;
+ struct strbuf *strbuf;
+ iopte_t *base;
+ unsigned long flags, npages, ctx, i;
+
+ if (unlikely(direction == DMA_NONE)) {
+ if (printk_ratelimit())
+ WARN_ON(1);
+ return;
+ }
+
+ iommu = dev->archdata.iommu;
+ strbuf = dev->archdata.stc;
+
+ npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
+ npages >>= IO_PAGE_SHIFT;
+ base = iommu->page_table +
+ ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
+ bus_addr &= IO_PAGE_MASK;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+
+ /* Record the context, if any. */
+ ctx = 0;
+ if (iommu->iommu_ctxflush)
+ ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL;
+
+ /* Step 1: Kick data out of streaming buffers if necessary. */
+ if (strbuf->strbuf_enabled)
+ strbuf_flush(strbuf, iommu, bus_addr, ctx,
+ npages, direction);
+
+ /* Step 2: Clear out TSB entries. */
+ for (i = 0; i < npages; i++)
+ iopte_make_dummy(iommu, base + i);
+
+ iommu_range_free(iommu, bus_addr, npages);
+
+ iommu_free_ctx(iommu, ctx);
+
+ spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
+static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
+ int nelems, enum dma_data_direction direction)
+{
+ struct scatterlist *s, *outs, *segstart;
+ unsigned long flags, handle, prot, ctx;
+ dma_addr_t dma_next = 0, dma_addr;
+ unsigned int max_seg_size;
+ unsigned long seg_boundary_size;
+ int outcount, incount, i;
+ struct strbuf *strbuf;
+ struct iommu *iommu;
+ unsigned long base_shift;
+
+ BUG_ON(direction == DMA_NONE);
+
+ iommu = dev->archdata.iommu;
+ strbuf = dev->archdata.stc;
+ if (nelems == 0 || !iommu)
+ return 0;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+
+ ctx = 0;
+ if (iommu->iommu_ctxflush)
+ ctx = iommu_alloc_ctx(iommu);
+
+ if (strbuf->strbuf_enabled)
+ prot = IOPTE_STREAMING(ctx);
+ else
+ prot = IOPTE_CONSISTENT(ctx);
+ if (direction != DMA_TO_DEVICE)
+ prot |= IOPTE_WRITE;
+
+ outs = s = segstart = &sglist[0];
+ outcount = 1;
+ incount = nelems;
+ handle = 0;
+
+ /* Init first segment length for backout at failure */
+ outs->dma_length = 0;
+
+ max_seg_size = dma_get_max_seg_size(dev);
+ seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
+ IO_PAGE_SIZE) >> IO_PAGE_SHIFT;
+ base_shift = iommu->page_table_map_base >> IO_PAGE_SHIFT;
+ for_each_sg(sglist, s, nelems, i) {
+ unsigned long paddr, npages, entry, out_entry = 0, slen;
+ iopte_t *base;
+
+ slen = s->length;
+ /* Sanity check */
+ if (slen == 0) {
+ dma_next = 0;
+ continue;
+ }
+ /* Allocate iommu entries for that segment */
+ paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s);
+ npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE);
+ entry = iommu_range_alloc(dev, iommu, npages, &handle);
+
+ /* Handle failure */
+ if (unlikely(entry == DMA_ERROR_CODE)) {
+ if (printk_ratelimit())
+ printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx"
+ " npages %lx\n", iommu, paddr, npages);
+ goto iommu_map_failed;
+ }
+
+ base = iommu->page_table + entry;
+
+ /* Convert entry to a dma_addr_t */
+ dma_addr = iommu->page_table_map_base +
+ (entry << IO_PAGE_SHIFT);
+ dma_addr |= (s->offset & ~IO_PAGE_MASK);
+
+ /* Insert into HW table */
+ paddr &= IO_PAGE_MASK;
+ while (npages--) {
+ iopte_val(*base) = prot | paddr;
+ base++;
+ paddr += IO_PAGE_SIZE;
+ }
+
+ /* If we are in an open segment, try merging */
+ if (segstart != s) {
+ /* We cannot merge if:
+ * - allocated dma_addr isn't contiguous to previous allocation
+ */
+ if ((dma_addr != dma_next) ||
+ (outs->dma_length + s->length > max_seg_size) ||
+ (is_span_boundary(out_entry, base_shift,
+ seg_boundary_size, outs, s))) {
+ /* Can't merge: create a new segment */
+ segstart = s;
+ outcount++;
+ outs = sg_next(outs);
+ } else {
+ outs->dma_length += s->length;
+ }
+ }
+
+ if (segstart == s) {
+ /* This is a new segment, fill entries */
+ outs->dma_address = dma_addr;
+ outs->dma_length = slen;
+ out_entry = entry;
+ }
+
+ /* Calculate next page pointer for contiguous check */
+ dma_next = dma_addr + slen;
+ }
+
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+ if (outcount < incount) {
+ outs = sg_next(outs);
+ outs->dma_address = DMA_ERROR_CODE;
+ outs->dma_length = 0;
+ }
+
+ return outcount;
+
+iommu_map_failed:
+ for_each_sg(sglist, s, nelems, i) {
+ if (s->dma_length != 0) {
+ unsigned long vaddr, npages, entry, j;
+ iopte_t *base;
+
+ vaddr = s->dma_address & IO_PAGE_MASK;
+ npages = iommu_num_pages(s->dma_address, s->dma_length,
+ IO_PAGE_SIZE);
+ iommu_range_free(iommu, vaddr, npages);
+
+ entry = (vaddr - iommu->page_table_map_base)
+ >> IO_PAGE_SHIFT;
+ base = iommu->page_table + entry;
+
+ for (j = 0; j < npages; j++)
+ iopte_make_dummy(iommu, base + j);
+
+ s->dma_address = DMA_ERROR_CODE;
+ s->dma_length = 0;
+ }
+ if (s == outs)
+ break;
+ }
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+ return 0;
+}
+
+/* If contexts are being used, they are the same in all of the mappings
+ * we make for a particular SG.
+ */
+static unsigned long fetch_sg_ctx(struct iommu *iommu, struct scatterlist *sg)
+{
+ unsigned long ctx = 0;
+
+ if (iommu->iommu_ctxflush) {
+ iopte_t *base;
+ u32 bus_addr;
+
+ bus_addr = sg->dma_address & IO_PAGE_MASK;
+ base = iommu->page_table +
+ ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
+
+ ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL;
+ }
+ return ctx;
+}
+
+static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist,
+ int nelems, enum dma_data_direction direction)
+{
+ unsigned long flags, ctx;
+ struct scatterlist *sg;
+ struct strbuf *strbuf;
+ struct iommu *iommu;
+
+ BUG_ON(direction == DMA_NONE);
+
+ iommu = dev->archdata.iommu;
+ strbuf = dev->archdata.stc;
+
+ ctx = fetch_sg_ctx(iommu, sglist);
+
+ spin_lock_irqsave(&iommu->lock, flags);
+
+ sg = sglist;
+ while (nelems--) {
+ dma_addr_t dma_handle = sg->dma_address;
+ unsigned int len = sg->dma_length;
+ unsigned long npages, entry;
+ iopte_t *base;
+ int i;
+
+ if (!len)
+ break;
+ npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE);
+ iommu_range_free(iommu, dma_handle, npages);
+
+ entry = ((dma_handle - iommu->page_table_map_base)
+ >> IO_PAGE_SHIFT);
+ base = iommu->page_table + entry;
+
+ dma_handle &= IO_PAGE_MASK;
+ if (strbuf->strbuf_enabled)
+ strbuf_flush(strbuf, iommu, dma_handle, ctx,
+ npages, direction);
+
+ for (i = 0; i < npages; i++)
+ iopte_make_dummy(iommu, base + i);
+
+ sg = sg_next(sg);
+ }
+
+ iommu_free_ctx(iommu, ctx);
+
+ spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
+static void dma_4u_sync_single_for_cpu(struct device *dev,
+ dma_addr_t bus_addr, size_t sz,
+ enum dma_data_direction direction)
+{
+ struct iommu *iommu;
+ struct strbuf *strbuf;
+ unsigned long flags, ctx, npages;
+
+ iommu = dev->archdata.iommu;
+ strbuf = dev->archdata.stc;
+
+ if (!strbuf->strbuf_enabled)
+ return;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+
+ npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
+ npages >>= IO_PAGE_SHIFT;
+ bus_addr &= IO_PAGE_MASK;
+
+ /* Step 1: Record the context, if any. */
+ ctx = 0;
+ if (iommu->iommu_ctxflush &&
+ strbuf->strbuf_ctxflush) {
+ iopte_t *iopte;
+
+ iopte = iommu->page_table +
+ ((bus_addr - iommu->page_table_map_base)>>IO_PAGE_SHIFT);
+ ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL;
+ }
+
+ /* Step 2: Kick data out of streaming buffers. */
+ strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction);
+
+ spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
+static void dma_4u_sync_sg_for_cpu(struct device *dev,
+ struct scatterlist *sglist, int nelems,
+ enum dma_data_direction direction)
+{
+ struct iommu *iommu;
+ struct strbuf *strbuf;
+ unsigned long flags, ctx, npages, i;
+ struct scatterlist *sg, *sgprv;
+ u32 bus_addr;
+
+ iommu = dev->archdata.iommu;
+ strbuf = dev->archdata.stc;
+
+ if (!strbuf->strbuf_enabled)
+ return;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+
+ /* Step 1: Record the context, if any. */
+ ctx = 0;
+ if (iommu->iommu_ctxflush &&
+ strbuf->strbuf_ctxflush) {
+ iopte_t *iopte;
+
+ iopte = iommu->page_table +
+ ((sglist[0].dma_address - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
+ ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL;
+ }
+
+ /* Step 2: Kick data out of streaming buffers. */
+ bus_addr = sglist[0].dma_address & IO_PAGE_MASK;
+ sgprv = NULL;
+ for_each_sg(sglist, sg, nelems, i) {
+ if (sg->dma_length == 0)
+ break;
+ sgprv = sg;
+ }
+
+ npages = (IO_PAGE_ALIGN(sgprv->dma_address + sgprv->dma_length)
+ - bus_addr) >> IO_PAGE_SHIFT;
+ strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction);
+
+ spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
+static const struct dma_ops sun4u_dma_ops = {
+ .alloc_coherent = dma_4u_alloc_coherent,
+ .free_coherent = dma_4u_free_coherent,
+ .map_single = dma_4u_map_single,
+ .unmap_single = dma_4u_unmap_single,
+ .map_sg = dma_4u_map_sg,
+ .unmap_sg = dma_4u_unmap_sg,
+ .sync_single_for_cpu = dma_4u_sync_single_for_cpu,
+ .sync_sg_for_cpu = dma_4u_sync_sg_for_cpu,
+};
+
+const struct dma_ops *dma_ops = &sun4u_dma_ops;
+EXPORT_SYMBOL(dma_ops);
+
+int dma_supported(struct device *dev, u64 device_mask)
+{
+ struct iommu *iommu = dev->archdata.iommu;
+ u64 dma_addr_mask = iommu->dma_addr_mask;
+
+ if (device_mask >= (1UL << 32UL))
+ return 0;
+
+ if ((device_mask & dma_addr_mask) == dma_addr_mask)
+ return 1;
+
+#ifdef CONFIG_PCI
+ if (dev->bus == &pci_bus_type)
+ return pci_dma_supported(to_pci_dev(dev), device_mask);
+#endif
+
+ return 0;
+}
+EXPORT_SYMBOL(dma_supported);
+
+int dma_set_mask(struct device *dev, u64 dma_mask)
+{
+#ifdef CONFIG_PCI
+ if (dev->bus == &pci_bus_type)
+ return pci_set_dma_mask(to_pci_dev(dev), dma_mask);
+#endif
+ return -EINVAL;
+}
+EXPORT_SYMBOL(dma_set_mask);
diff --git a/arch/sparc/kernel/iommu_common.h b/arch/sparc/kernel/iommu_common.h
new file mode 100644
index 000000000000..591f5879039c
--- /dev/null
+++ b/arch/sparc/kernel/iommu_common.h
@@ -0,0 +1,59 @@
+/* iommu_common.h: UltraSparc SBUS/PCI common iommu declarations.
+ *
+ * Copyright (C) 1999, 2008 David S. Miller (davem@davemloft.net)
+ */
+
+#ifndef _IOMMU_COMMON_H
+#define _IOMMU_COMMON_H
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/scatterlist.h>
+#include <linux/device.h>
+#include <linux/iommu-helper.h>
+
+#include <asm/iommu.h>
+#include <asm/scatterlist.h>
+
+/*
+ * These give mapping size of each iommu pte/tlb.
+ */
+#define IO_PAGE_SHIFT 13
+#define IO_PAGE_SIZE (1UL << IO_PAGE_SHIFT)
+#define IO_PAGE_MASK (~(IO_PAGE_SIZE-1))
+#define IO_PAGE_ALIGN(addr) ALIGN(addr, IO_PAGE_SIZE)
+
+#define IO_TSB_ENTRIES (128*1024)
+#define IO_TSB_SIZE (IO_TSB_ENTRIES * 8)
+
+/*
+ * This is the hardwired shift in the iotlb tag/data parts.
+ */
+#define IOMMU_PAGE_SHIFT 13
+
+#define SG_ENT_PHYS_ADDRESS(SG) (__pa(sg_virt((SG))))
+
+static inline int is_span_boundary(unsigned long entry,
+ unsigned long shift,
+ unsigned long boundary_size,
+ struct scatterlist *outs,
+ struct scatterlist *sg)
+{
+ unsigned long paddr = SG_ENT_PHYS_ADDRESS(outs);
+ int nr = iommu_num_pages(paddr, outs->dma_length + sg->length,
+ IO_PAGE_SIZE);
+
+ return iommu_is_span_boundary(entry, nr, shift, boundary_size);
+}
+
+extern unsigned long iommu_range_alloc(struct device *dev,
+ struct iommu *iommu,
+ unsigned long npages,
+ unsigned long *handle);
+extern void iommu_range_free(struct iommu *iommu,
+ dma_addr_t dma_addr,
+ unsigned long npages);
+
+#endif /* _IOMMU_COMMON_H */
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
new file mode 100644
index 000000000000..a3ea2bcb95de
--- /dev/null
+++ b/arch/sparc/kernel/irq_64.c
@@ -0,0 +1,1101 @@
+/* irq.c: UltraSparc IRQ handling/init/registry.
+ *
+ * Copyright (C) 1997, 2007, 2008 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be)
+ * Copyright (C) 1998 Jakub Jelinek (jj@ultra.linux.cz)
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/linkage.h>
+#include <linux/ptrace.h>
+#include <linux/errno.h>
+#include <linux/kernel_stat.h>
+#include <linux/signal.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/bootmem.h>
+#include <linux/irq.h>
+
+#include <asm/ptrace.h>
+#include <asm/processor.h>
+#include <asm/atomic.h>
+#include <asm/system.h>
+#include <asm/irq.h>
+#include <asm/io.h>
+#include <asm/iommu.h>
+#include <asm/upa.h>
+#include <asm/oplib.h>
+#include <asm/prom.h>
+#include <asm/timer.h>
+#include <asm/smp.h>
+#include <asm/starfire.h>
+#include <asm/uaccess.h>
+#include <asm/cache.h>
+#include <asm/cpudata.h>
+#include <asm/auxio.h>
+#include <asm/head.h>
+#include <asm/hypervisor.h>
+#include <asm/cacheflush.h>
+
+#include "entry.h"
+
+#define NUM_IVECS (IMAP_INR + 1)
+
+struct ino_bucket *ivector_table;
+unsigned long ivector_table_pa;
+
+/* On several sun4u processors, it is illegal to mix bypass and
+ * non-bypass accesses. Therefore we access all INO buckets
+ * using bypass accesses only.
+ */
+static unsigned long bucket_get_chain_pa(unsigned long bucket_pa)
+{
+ unsigned long ret;
+
+ __asm__ __volatile__("ldxa [%1] %2, %0"
+ : "=&r" (ret)
+ : "r" (bucket_pa +
+ offsetof(struct ino_bucket,
+ __irq_chain_pa)),
+ "i" (ASI_PHYS_USE_EC));
+
+ return ret;
+}
+
+static void bucket_clear_chain_pa(unsigned long bucket_pa)
+{
+ __asm__ __volatile__("stxa %%g0, [%0] %1"
+ : /* no outputs */
+ : "r" (bucket_pa +
+ offsetof(struct ino_bucket,
+ __irq_chain_pa)),
+ "i" (ASI_PHYS_USE_EC));
+}
+
+static unsigned int bucket_get_virt_irq(unsigned long bucket_pa)
+{
+ unsigned int ret;
+
+ __asm__ __volatile__("lduwa [%1] %2, %0"
+ : "=&r" (ret)
+ : "r" (bucket_pa +
+ offsetof(struct ino_bucket,
+ __virt_irq)),
+ "i" (ASI_PHYS_USE_EC));
+
+ return ret;
+}
+
+static void bucket_set_virt_irq(unsigned long bucket_pa,
+ unsigned int virt_irq)
+{
+ __asm__ __volatile__("stwa %0, [%1] %2"
+ : /* no outputs */
+ : "r" (virt_irq),
+ "r" (bucket_pa +
+ offsetof(struct ino_bucket,
+ __virt_irq)),
+ "i" (ASI_PHYS_USE_EC));
+}
+
+#define irq_work_pa(__cpu) &(trap_block[(__cpu)].irq_worklist_pa)
+
+static struct {
+ unsigned int dev_handle;
+ unsigned int dev_ino;
+ unsigned int in_use;
+} virt_irq_table[NR_IRQS];
+static DEFINE_SPINLOCK(virt_irq_alloc_lock);
+
+unsigned char virt_irq_alloc(unsigned int dev_handle,
+ unsigned int dev_ino)
+{
+ unsigned long flags;
+ unsigned char ent;
+
+ BUILD_BUG_ON(NR_IRQS >= 256);
+
+ spin_lock_irqsave(&virt_irq_alloc_lock, flags);
+
+ for (ent = 1; ent < NR_IRQS; ent++) {
+ if (!virt_irq_table[ent].in_use)
+ break;
+ }
+ if (ent >= NR_IRQS) {
+ printk(KERN_ERR "IRQ: Out of virtual IRQs.\n");
+ ent = 0;
+ } else {
+ virt_irq_table[ent].dev_handle = dev_handle;
+ virt_irq_table[ent].dev_ino = dev_ino;
+ virt_irq_table[ent].in_use = 1;
+ }
+
+ spin_unlock_irqrestore(&virt_irq_alloc_lock, flags);
+
+ return ent;
+}
+
+#ifdef CONFIG_PCI_MSI
+void virt_irq_free(unsigned int virt_irq)
+{
+ unsigned long flags;
+
+ if (virt_irq >= NR_IRQS)
+ return;
+
+ spin_lock_irqsave(&virt_irq_alloc_lock, flags);
+
+ virt_irq_table[virt_irq].in_use = 0;
+
+ spin_unlock_irqrestore(&virt_irq_alloc_lock, flags);
+}
+#endif
+
+/*
+ * /proc/interrupts printing:
+ */
+
+int show_interrupts(struct seq_file *p, void *v)
+{
+ int i = *(loff_t *) v, j;
+ struct irqaction * action;
+ unsigned long flags;
+
+ if (i == 0) {
+ seq_printf(p, " ");
+ for_each_online_cpu(j)
+ seq_printf(p, "CPU%d ",j);
+ seq_putc(p, '\n');
+ }
+
+ if (i < NR_IRQS) {
+ spin_lock_irqsave(&irq_desc[i].lock, flags);
+ action = irq_desc[i].action;
+ if (!action)
+ goto skip;
+ seq_printf(p, "%3d: ",i);
+#ifndef CONFIG_SMP
+ seq_printf(p, "%10u ", kstat_irqs(i));
+#else
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+#endif
+ seq_printf(p, " %9s", irq_desc[i].chip->typename);
+ seq_printf(p, " %s", action->name);
+
+ for (action=action->next; action; action = action->next)
+ seq_printf(p, ", %s", action->name);
+
+ seq_putc(p, '\n');
+skip:
+ spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+ }
+ return 0;
+}
+
+static unsigned int sun4u_compute_tid(unsigned long imap, unsigned long cpuid)
+{
+ unsigned int tid;
+
+ if (this_is_starfire) {
+ tid = starfire_translate(imap, cpuid);
+ tid <<= IMAP_TID_SHIFT;
+ tid &= IMAP_TID_UPA;
+ } else {
+ if (tlb_type == cheetah || tlb_type == cheetah_plus) {
+ unsigned long ver;
+
+ __asm__ ("rdpr %%ver, %0" : "=r" (ver));
+ if ((ver >> 32UL) == __JALAPENO_ID ||
+ (ver >> 32UL) == __SERRANO_ID) {
+ tid = cpuid << IMAP_TID_SHIFT;
+ tid &= IMAP_TID_JBUS;
+ } else {
+ unsigned int a = cpuid & 0x1f;
+ unsigned int n = (cpuid >> 5) & 0x1f;
+
+ tid = ((a << IMAP_AID_SHIFT) |
+ (n << IMAP_NID_SHIFT));
+ tid &= (IMAP_AID_SAFARI |
+ IMAP_NID_SAFARI);;
+ }
+ } else {
+ tid = cpuid << IMAP_TID_SHIFT;
+ tid &= IMAP_TID_UPA;
+ }
+ }
+
+ return tid;
+}
+
+struct irq_handler_data {
+ unsigned long iclr;
+ unsigned long imap;
+
+ void (*pre_handler)(unsigned int, void *, void *);
+ void *arg1;
+ void *arg2;
+};
+
+#ifdef CONFIG_SMP
+static int irq_choose_cpu(unsigned int virt_irq)
+{
+ cpumask_t mask = irq_desc[virt_irq].affinity;
+ int cpuid;
+
+ if (cpus_equal(mask, CPU_MASK_ALL)) {
+ static int irq_rover;
+ static DEFINE_SPINLOCK(irq_rover_lock);
+ unsigned long flags;
+
+ /* Round-robin distribution... */
+ do_round_robin:
+ spin_lock_irqsave(&irq_rover_lock, flags);
+
+ while (!cpu_online(irq_rover)) {
+ if (++irq_rover >= NR_CPUS)
+ irq_rover = 0;
+ }
+ cpuid = irq_rover;
+ do {
+ if (++irq_rover >= NR_CPUS)
+ irq_rover = 0;
+ } while (!cpu_online(irq_rover));
+
+ spin_unlock_irqrestore(&irq_rover_lock, flags);
+ } else {
+ cpumask_t tmp;
+
+ cpus_and(tmp, cpu_online_map, mask);
+
+ if (cpus_empty(tmp))
+ goto do_round_robin;
+
+ cpuid = first_cpu(tmp);
+ }
+
+ return cpuid;
+}
+#else
+static int irq_choose_cpu(unsigned int virt_irq)
+{
+ return real_hard_smp_processor_id();
+}
+#endif
+
+static void sun4u_irq_enable(unsigned int virt_irq)
+{
+ struct irq_handler_data *data = get_irq_chip_data(virt_irq);
+
+ if (likely(data)) {
+ unsigned long cpuid, imap, val;
+ unsigned int tid;
+
+ cpuid = irq_choose_cpu(virt_irq);
+ imap = data->imap;
+
+ tid = sun4u_compute_tid(imap, cpuid);
+
+ val = upa_readq(imap);
+ val &= ~(IMAP_TID_UPA | IMAP_TID_JBUS |
+ IMAP_AID_SAFARI | IMAP_NID_SAFARI);
+ val |= tid | IMAP_VALID;
+ upa_writeq(val, imap);
+ upa_writeq(ICLR_IDLE, data->iclr);
+ }
+}
+
+static void sun4u_set_affinity(unsigned int virt_irq, cpumask_t mask)
+{
+ sun4u_irq_enable(virt_irq);
+}
+
+static void sun4u_irq_disable(unsigned int virt_irq)
+{
+ struct irq_handler_data *data = get_irq_chip_data(virt_irq);
+
+ if (likely(data)) {
+ unsigned long imap = data->imap;
+ unsigned long tmp = upa_readq(imap);
+
+ tmp &= ~IMAP_VALID;
+ upa_writeq(tmp, imap);
+ }
+}
+
+static void sun4u_irq_eoi(unsigned int virt_irq)
+{
+ struct irq_handler_data *data = get_irq_chip_data(virt_irq);
+ struct irq_desc *desc = irq_desc + virt_irq;
+
+ if (unlikely(desc->status & (IRQ_DISABLED|IRQ_INPROGRESS)))
+ return;
+
+ if (likely(data))
+ upa_writeq(ICLR_IDLE, data->iclr);
+}
+
+static void sun4v_irq_enable(unsigned int virt_irq)
+{
+ unsigned int ino = virt_irq_table[virt_irq].dev_ino;
+ unsigned long cpuid = irq_choose_cpu(virt_irq);
+ int err;
+
+ err = sun4v_intr_settarget(ino, cpuid);
+ if (err != HV_EOK)
+ printk(KERN_ERR "sun4v_intr_settarget(%x,%lu): "
+ "err(%d)\n", ino, cpuid, err);
+ err = sun4v_intr_setstate(ino, HV_INTR_STATE_IDLE);
+ if (err != HV_EOK)
+ printk(KERN_ERR "sun4v_intr_setstate(%x): "
+ "err(%d)\n", ino, err);
+ err = sun4v_intr_setenabled(ino, HV_INTR_ENABLED);
+ if (err != HV_EOK)
+ printk(KERN_ERR "sun4v_intr_setenabled(%x): err(%d)\n",
+ ino, err);
+}
+
+static void sun4v_set_affinity(unsigned int virt_irq, cpumask_t mask)
+{
+ unsigned int ino = virt_irq_table[virt_irq].dev_ino;
+ unsigned long cpuid = irq_choose_cpu(virt_irq);
+ int err;
+
+ err = sun4v_intr_settarget(ino, cpuid);
+ if (err != HV_EOK)
+ printk(KERN_ERR "sun4v_intr_settarget(%x,%lu): "
+ "err(%d)\n", ino, cpuid, err);
+}
+
+static void sun4v_irq_disable(unsigned int virt_irq)
+{
+ unsigned int ino = virt_irq_table[virt_irq].dev_ino;
+ int err;
+
+ err = sun4v_intr_setenabled(ino, HV_INTR_DISABLED);
+ if (err != HV_EOK)
+ printk(KERN_ERR "sun4v_intr_setenabled(%x): "
+ "err(%d)\n", ino, err);
+}
+
+static void sun4v_irq_eoi(unsigned int virt_irq)
+{
+ unsigned int ino = virt_irq_table[virt_irq].dev_ino;
+ struct irq_desc *desc = irq_desc + virt_irq;
+ int err;
+
+ if (unlikely(desc->status & (IRQ_DISABLED|IRQ_INPROGRESS)))
+ return;
+
+ err = sun4v_intr_setstate(ino, HV_INTR_STATE_IDLE);
+ if (err != HV_EOK)
+ printk(KERN_ERR "sun4v_intr_setstate(%x): "
+ "err(%d)\n", ino, err);
+}
+
+static void sun4v_virq_enable(unsigned int virt_irq)
+{
+ unsigned long cpuid, dev_handle, dev_ino;
+ int err;
+
+ cpuid = irq_choose_cpu(virt_irq);
+
+ dev_handle = virt_irq_table[virt_irq].dev_handle;
+ dev_ino = virt_irq_table[virt_irq].dev_ino;
+
+ err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid);
+ if (err != HV_EOK)
+ printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): "
+ "err(%d)\n",
+ dev_handle, dev_ino, cpuid, err);
+ err = sun4v_vintr_set_state(dev_handle, dev_ino,
+ HV_INTR_STATE_IDLE);
+ if (err != HV_EOK)
+ printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx,"
+ "HV_INTR_STATE_IDLE): err(%d)\n",
+ dev_handle, dev_ino, err);
+ err = sun4v_vintr_set_valid(dev_handle, dev_ino,
+ HV_INTR_ENABLED);
+ if (err != HV_EOK)
+ printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx,"
+ "HV_INTR_ENABLED): err(%d)\n",
+ dev_handle, dev_ino, err);
+}
+
+static void sun4v_virt_set_affinity(unsigned int virt_irq, cpumask_t mask)
+{
+ unsigned long cpuid, dev_handle, dev_ino;
+ int err;
+
+ cpuid = irq_choose_cpu(virt_irq);
+
+ dev_handle = virt_irq_table[virt_irq].dev_handle;
+ dev_ino = virt_irq_table[virt_irq].dev_ino;
+
+ err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid);
+ if (err != HV_EOK)
+ printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): "
+ "err(%d)\n",
+ dev_handle, dev_ino, cpuid, err);
+}
+
+static void sun4v_virq_disable(unsigned int virt_irq)
+{
+ unsigned long dev_handle, dev_ino;
+ int err;
+
+ dev_handle = virt_irq_table[virt_irq].dev_handle;
+ dev_ino = virt_irq_table[virt_irq].dev_ino;
+
+ err = sun4v_vintr_set_valid(dev_handle, dev_ino,
+ HV_INTR_DISABLED);
+ if (err != HV_EOK)
+ printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx,"
+ "HV_INTR_DISABLED): err(%d)\n",
+ dev_handle, dev_ino, err);
+}
+
+static void sun4v_virq_eoi(unsigned int virt_irq)
+{
+ struct irq_desc *desc = irq_desc + virt_irq;
+ unsigned long dev_handle, dev_ino;
+ int err;
+
+ if (unlikely(desc->status & (IRQ_DISABLED|IRQ_INPROGRESS)))
+ return;
+
+ dev_handle = virt_irq_table[virt_irq].dev_handle;
+ dev_ino = virt_irq_table[virt_irq].dev_ino;
+
+ err = sun4v_vintr_set_state(dev_handle, dev_ino,
+ HV_INTR_STATE_IDLE);
+ if (err != HV_EOK)
+ printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx,"
+ "HV_INTR_STATE_IDLE): err(%d)\n",
+ dev_handle, dev_ino, err);
+}
+
+static struct irq_chip sun4u_irq = {
+ .typename = "sun4u",
+ .enable = sun4u_irq_enable,
+ .disable = sun4u_irq_disable,
+ .eoi = sun4u_irq_eoi,
+ .set_affinity = sun4u_set_affinity,
+};
+
+static struct irq_chip sun4v_irq = {
+ .typename = "sun4v",
+ .enable = sun4v_irq_enable,
+ .disable = sun4v_irq_disable,
+ .eoi = sun4v_irq_eoi,
+ .set_affinity = sun4v_set_affinity,
+};
+
+static struct irq_chip sun4v_virq = {
+ .typename = "vsun4v",
+ .enable = sun4v_virq_enable,
+ .disable = sun4v_virq_disable,
+ .eoi = sun4v_virq_eoi,
+ .set_affinity = sun4v_virt_set_affinity,
+};
+
+static void pre_flow_handler(unsigned int virt_irq,
+ struct irq_desc *desc)
+{
+ struct irq_handler_data *data = get_irq_chip_data(virt_irq);
+ unsigned int ino = virt_irq_table[virt_irq].dev_ino;
+
+ data->pre_handler(ino, data->arg1, data->arg2);
+
+ handle_fasteoi_irq(virt_irq, desc);
+}
+
+void irq_install_pre_handler(int virt_irq,
+ void (*func)(unsigned int, void *, void *),
+ void *arg1, void *arg2)
+{
+ struct irq_handler_data *data = get_irq_chip_data(virt_irq);
+ struct irq_desc *desc = irq_desc + virt_irq;
+
+ data->pre_handler = func;
+ data->arg1 = arg1;
+ data->arg2 = arg2;
+
+ desc->handle_irq = pre_flow_handler;
+}
+
+unsigned int build_irq(int inofixup, unsigned long iclr, unsigned long imap)
+{
+ struct ino_bucket *bucket;
+ struct irq_handler_data *data;
+ unsigned int virt_irq;
+ int ino;
+
+ BUG_ON(tlb_type == hypervisor);
+
+ ino = (upa_readq(imap) & (IMAP_IGN | IMAP_INO)) + inofixup;
+ bucket = &ivector_table[ino];
+ virt_irq = bucket_get_virt_irq(__pa(bucket));
+ if (!virt_irq) {
+ virt_irq = virt_irq_alloc(0, ino);
+ bucket_set_virt_irq(__pa(bucket), virt_irq);
+ set_irq_chip_and_handler_name(virt_irq,
+ &sun4u_irq,
+ handle_fasteoi_irq,
+ "IVEC");
+ }
+
+ data = get_irq_chip_data(virt_irq);
+ if (unlikely(data))
+ goto out;
+
+ data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC);
+ if (unlikely(!data)) {
+ prom_printf("IRQ: kzalloc(irq_handler_data) failed.\n");
+ prom_halt();
+ }
+ set_irq_chip_data(virt_irq, data);
+
+ data->imap = imap;
+ data->iclr = iclr;
+
+out:
+ return virt_irq;
+}
+
+static unsigned int sun4v_build_common(unsigned long sysino,
+ struct irq_chip *chip)
+{
+ struct ino_bucket *bucket;
+ struct irq_handler_data *data;
+ unsigned int virt_irq;
+
+ BUG_ON(tlb_type != hypervisor);
+
+ bucket = &ivector_table[sysino];
+ virt_irq = bucket_get_virt_irq(__pa(bucket));
+ if (!virt_irq) {
+ virt_irq = virt_irq_alloc(0, sysino);
+ bucket_set_virt_irq(__pa(bucket), virt_irq);
+ set_irq_chip_and_handler_name(virt_irq, chip,
+ handle_fasteoi_irq,
+ "IVEC");
+ }
+
+ data = get_irq_chip_data(virt_irq);
+ if (unlikely(data))
+ goto out;
+
+ data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC);
+ if (unlikely(!data)) {
+ prom_printf("IRQ: kzalloc(irq_handler_data) failed.\n");
+ prom_halt();
+ }
+ set_irq_chip_data(virt_irq, data);
+
+ /* Catch accidental accesses to these things. IMAP/ICLR handling
+ * is done by hypervisor calls on sun4v platforms, not by direct
+ * register accesses.
+ */
+ data->imap = ~0UL;
+ data->iclr = ~0UL;
+
+out:
+ return virt_irq;
+}
+
+unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino)
+{
+ unsigned long sysino = sun4v_devino_to_sysino(devhandle, devino);
+
+ return sun4v_build_common(sysino, &sun4v_irq);
+}
+
+unsigned int sun4v_build_virq(u32 devhandle, unsigned int devino)
+{
+ struct irq_handler_data *data;
+ unsigned long hv_err, cookie;
+ struct ino_bucket *bucket;
+ struct irq_desc *desc;
+ unsigned int virt_irq;
+
+ bucket = kzalloc(sizeof(struct ino_bucket), GFP_ATOMIC);
+ if (unlikely(!bucket))
+ return 0;
+ __flush_dcache_range((unsigned long) bucket,
+ ((unsigned long) bucket +
+ sizeof(struct ino_bucket)));
+
+ virt_irq = virt_irq_alloc(devhandle, devino);
+ bucket_set_virt_irq(__pa(bucket), virt_irq);
+
+ set_irq_chip_and_handler_name(virt_irq, &sun4v_virq,
+ handle_fasteoi_irq,
+ "IVEC");
+
+ data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC);
+ if (unlikely(!data))
+ return 0;
+
+ /* In order to make the LDC channel startup sequence easier,
+ * especially wrt. locking, we do not let request_irq() enable
+ * the interrupt.
+ */
+ desc = irq_desc + virt_irq;
+ desc->status |= IRQ_NOAUTOEN;
+
+ set_irq_chip_data(virt_irq, data);
+
+ /* Catch accidental accesses to these things. IMAP/ICLR handling
+ * is done by hypervisor calls on sun4v platforms, not by direct
+ * register accesses.
+ */
+ data->imap = ~0UL;
+ data->iclr = ~0UL;
+
+ cookie = ~__pa(bucket);
+ hv_err = sun4v_vintr_set_cookie(devhandle, devino, cookie);
+ if (hv_err) {
+ prom_printf("IRQ: Fatal, cannot set cookie for [%x:%x] "
+ "err=%lu\n", devhandle, devino, hv_err);
+ prom_halt();
+ }
+
+ return virt_irq;
+}
+
+void ack_bad_irq(unsigned int virt_irq)
+{
+ unsigned int ino = virt_irq_table[virt_irq].dev_ino;
+
+ if (!ino)
+ ino = 0xdeadbeef;
+
+ printk(KERN_CRIT "Unexpected IRQ from ino[%x] virt_irq[%u]\n",
+ ino, virt_irq);
+}
+
+void *hardirq_stack[NR_CPUS];
+void *softirq_stack[NR_CPUS];
+
+static __attribute__((always_inline)) void *set_hardirq_stack(void)
+{
+ void *orig_sp, *sp = hardirq_stack[smp_processor_id()];
+
+ __asm__ __volatile__("mov %%sp, %0" : "=r" (orig_sp));
+ if (orig_sp < sp ||
+ orig_sp > (sp + THREAD_SIZE)) {
+ sp += THREAD_SIZE - 192 - STACK_BIAS;
+ __asm__ __volatile__("mov %0, %%sp" : : "r" (sp));
+ }
+
+ return orig_sp;
+}
+static __attribute__((always_inline)) void restore_hardirq_stack(void *orig_sp)
+{
+ __asm__ __volatile__("mov %0, %%sp" : : "r" (orig_sp));
+}
+
+void handler_irq(int irq, struct pt_regs *regs)
+{
+ unsigned long pstate, bucket_pa;
+ struct pt_regs *old_regs;
+ void *orig_sp;
+
+ clear_softint(1 << irq);
+
+ old_regs = set_irq_regs(regs);
+ irq_enter();
+
+ /* Grab an atomic snapshot of the pending IVECs. */
+ __asm__ __volatile__("rdpr %%pstate, %0\n\t"
+ "wrpr %0, %3, %%pstate\n\t"
+ "ldx [%2], %1\n\t"
+ "stx %%g0, [%2]\n\t"
+ "wrpr %0, 0x0, %%pstate\n\t"
+ : "=&r" (pstate), "=&r" (bucket_pa)
+ : "r" (irq_work_pa(smp_processor_id())),
+ "i" (PSTATE_IE)
+ : "memory");
+
+ orig_sp = set_hardirq_stack();
+
+ while (bucket_pa) {
+ struct irq_desc *desc;
+ unsigned long next_pa;
+ unsigned int virt_irq;
+
+ next_pa = bucket_get_chain_pa(bucket_pa);
+ virt_irq = bucket_get_virt_irq(bucket_pa);
+ bucket_clear_chain_pa(bucket_pa);
+
+ desc = irq_desc + virt_irq;
+
+ desc->handle_irq(virt_irq, desc);
+
+ bucket_pa = next_pa;
+ }
+
+ restore_hardirq_stack(orig_sp);
+
+ irq_exit();
+ set_irq_regs(old_regs);
+}
+
+void do_softirq(void)
+{
+ unsigned long flags;
+
+ if (in_interrupt())
+ return;
+
+ local_irq_save(flags);
+
+ if (local_softirq_pending()) {
+ void *orig_sp, *sp = softirq_stack[smp_processor_id()];
+
+ sp += THREAD_SIZE - 192 - STACK_BIAS;
+
+ __asm__ __volatile__("mov %%sp, %0\n\t"
+ "mov %1, %%sp"
+ : "=&r" (orig_sp)
+ : "r" (sp));
+ __do_softirq();
+ __asm__ __volatile__("mov %0, %%sp"
+ : : "r" (orig_sp));
+ }
+
+ local_irq_restore(flags);
+}
+
+static void unhandled_perf_irq(struct pt_regs *regs)
+{
+ unsigned long pcr, pic;
+
+ read_pcr(pcr);
+ read_pic(pic);
+
+ write_pcr(0);
+
+ printk(KERN_EMERG "CPU %d: Got unexpected perf counter IRQ.\n",
+ smp_processor_id());
+ printk(KERN_EMERG "CPU %d: PCR[%016lx] PIC[%016lx]\n",
+ smp_processor_id(), pcr, pic);
+}
+
+/* Almost a direct copy of the powerpc PMC code. */
+static DEFINE_SPINLOCK(perf_irq_lock);
+static void *perf_irq_owner_caller; /* mostly for debugging */
+static void (*perf_irq)(struct pt_regs *regs) = unhandled_perf_irq;
+
+/* Invoked from level 15 PIL handler in trap table. */
+void perfctr_irq(int irq, struct pt_regs *regs)
+{
+ clear_softint(1 << irq);
+ perf_irq(regs);
+}
+
+int register_perfctr_intr(void (*handler)(struct pt_regs *))
+{
+ int ret;
+
+ if (!handler)
+ return -EINVAL;
+
+ spin_lock(&perf_irq_lock);
+ if (perf_irq != unhandled_perf_irq) {
+ printk(KERN_WARNING "register_perfctr_intr: "
+ "perf IRQ busy (reserved by caller %p)\n",
+ perf_irq_owner_caller);
+ ret = -EBUSY;
+ goto out;
+ }
+
+ perf_irq_owner_caller = __builtin_return_address(0);
+ perf_irq = handler;
+
+ ret = 0;
+out:
+ spin_unlock(&perf_irq_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(register_perfctr_intr);
+
+void release_perfctr_intr(void (*handler)(struct pt_regs *))
+{
+ spin_lock(&perf_irq_lock);
+ perf_irq_owner_caller = NULL;
+ perf_irq = unhandled_perf_irq;
+ spin_unlock(&perf_irq_lock);
+}
+EXPORT_SYMBOL_GPL(release_perfctr_intr);
+
+#ifdef CONFIG_HOTPLUG_CPU
+void fixup_irqs(void)
+{
+ unsigned int irq;
+
+ for (irq = 0; irq < NR_IRQS; irq++) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&irq_desc[irq].lock, flags);
+ if (irq_desc[irq].action &&
+ !(irq_desc[irq].status & IRQ_PER_CPU)) {
+ if (irq_desc[irq].chip->set_affinity)
+ irq_desc[irq].chip->set_affinity(irq,
+ irq_desc[irq].affinity);
+ }
+ spin_unlock_irqrestore(&irq_desc[irq].lock, flags);
+ }
+
+ tick_ops->disable_irq();
+}
+#endif
+
+struct sun5_timer {
+ u64 count0;
+ u64 limit0;
+ u64 count1;
+ u64 limit1;
+};
+
+static struct sun5_timer *prom_timers;
+static u64 prom_limit0, prom_limit1;
+
+static void map_prom_timers(void)
+{
+ struct device_node *dp;
+ const unsigned int *addr;
+
+ /* PROM timer node hangs out in the top level of device siblings... */
+ dp = of_find_node_by_path("/");
+ dp = dp->child;
+ while (dp) {
+ if (!strcmp(dp->name, "counter-timer"))
+ break;
+ dp = dp->sibling;
+ }
+
+ /* Assume if node is not present, PROM uses different tick mechanism
+ * which we should not care about.
+ */
+ if (!dp) {
+ prom_timers = (struct sun5_timer *) 0;
+ return;
+ }
+
+ /* If PROM is really using this, it must be mapped by him. */
+ addr = of_get_property(dp, "address", NULL);
+ if (!addr) {
+ prom_printf("PROM does not have timer mapped, trying to continue.\n");
+ prom_timers = (struct sun5_timer *) 0;
+ return;
+ }
+ prom_timers = (struct sun5_timer *) ((unsigned long)addr[0]);
+}
+
+static void kill_prom_timer(void)
+{
+ if (!prom_timers)
+ return;
+
+ /* Save them away for later. */
+ prom_limit0 = prom_timers->limit0;
+ prom_limit1 = prom_timers->limit1;
+
+ /* Just as in sun4c/sun4m PROM uses timer which ticks at IRQ 14.
+ * We turn both off here just to be paranoid.
+ */
+ prom_timers->limit0 = 0;
+ prom_timers->limit1 = 0;
+
+ /* Wheee, eat the interrupt packet too... */
+ __asm__ __volatile__(
+" mov 0x40, %%g2\n"
+" ldxa [%%g0] %0, %%g1\n"
+" ldxa [%%g2] %1, %%g1\n"
+" stxa %%g0, [%%g0] %0\n"
+" membar #Sync\n"
+ : /* no outputs */
+ : "i" (ASI_INTR_RECEIVE), "i" (ASI_INTR_R)
+ : "g1", "g2");
+}
+
+void notrace init_irqwork_curcpu(void)
+{
+ int cpu = hard_smp_processor_id();
+
+ trap_block[cpu].irq_worklist_pa = 0UL;
+}
+
+/* Please be very careful with register_one_mondo() and
+ * sun4v_register_mondo_queues().
+ *
+ * On SMP this gets invoked from the CPU trampoline before
+ * the cpu has fully taken over the trap table from OBP,
+ * and it's kernel stack + %g6 thread register state is
+ * not fully cooked yet.
+ *
+ * Therefore you cannot make any OBP calls, not even prom_printf,
+ * from these two routines.
+ */
+static void __cpuinit register_one_mondo(unsigned long paddr, unsigned long type, unsigned long qmask)
+{
+ unsigned long num_entries = (qmask + 1) / 64;
+ unsigned long status;
+
+ status = sun4v_cpu_qconf(type, paddr, num_entries);
+ if (status != HV_EOK) {
+ prom_printf("SUN4V: sun4v_cpu_qconf(%lu:%lx:%lu) failed, "
+ "err %lu\n", type, paddr, num_entries, status);
+ prom_halt();
+ }
+}
+
+void __cpuinit notrace sun4v_register_mondo_queues(int this_cpu)
+{
+ struct trap_per_cpu *tb = &trap_block[this_cpu];
+
+ register_one_mondo(tb->cpu_mondo_pa, HV_CPU_QUEUE_CPU_MONDO,
+ tb->cpu_mondo_qmask);
+ register_one_mondo(tb->dev_mondo_pa, HV_CPU_QUEUE_DEVICE_MONDO,
+ tb->dev_mondo_qmask);
+ register_one_mondo(tb->resum_mondo_pa, HV_CPU_QUEUE_RES_ERROR,
+ tb->resum_qmask);
+ register_one_mondo(tb->nonresum_mondo_pa, HV_CPU_QUEUE_NONRES_ERROR,
+ tb->nonresum_qmask);
+}
+
+static void __init alloc_one_mondo(unsigned long *pa_ptr, unsigned long qmask)
+{
+ unsigned long size = PAGE_ALIGN(qmask + 1);
+ void *p = __alloc_bootmem(size, size, 0);
+ if (!p) {
+ prom_printf("SUN4V: Error, cannot allocate mondo queue.\n");
+ prom_halt();
+ }
+
+ *pa_ptr = __pa(p);
+}
+
+static void __init alloc_one_kbuf(unsigned long *pa_ptr, unsigned long qmask)
+{
+ unsigned long size = PAGE_ALIGN(qmask + 1);
+ void *p = __alloc_bootmem(size, size, 0);
+
+ if (!p) {
+ prom_printf("SUN4V: Error, cannot allocate kbuf page.\n");
+ prom_halt();
+ }
+
+ *pa_ptr = __pa(p);
+}
+
+static void __init init_cpu_send_mondo_info(struct trap_per_cpu *tb)
+{
+#ifdef CONFIG_SMP
+ void *page;
+
+ BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > (PAGE_SIZE - 64));
+
+ page = alloc_bootmem_pages(PAGE_SIZE);
+ if (!page) {
+ prom_printf("SUN4V: Error, cannot allocate cpu mondo page.\n");
+ prom_halt();
+ }
+
+ tb->cpu_mondo_block_pa = __pa(page);
+ tb->cpu_list_pa = __pa(page + 64);
+#endif
+}
+
+/* Allocate mondo and error queues for all possible cpus. */
+static void __init sun4v_init_mondo_queues(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct trap_per_cpu *tb = &trap_block[cpu];
+
+ alloc_one_mondo(&tb->cpu_mondo_pa, tb->cpu_mondo_qmask);
+ alloc_one_mondo(&tb->dev_mondo_pa, tb->dev_mondo_qmask);
+ alloc_one_mondo(&tb->resum_mondo_pa, tb->resum_qmask);
+ alloc_one_kbuf(&tb->resum_kernel_buf_pa, tb->resum_qmask);
+ alloc_one_mondo(&tb->nonresum_mondo_pa, tb->nonresum_qmask);
+ alloc_one_kbuf(&tb->nonresum_kernel_buf_pa,
+ tb->nonresum_qmask);
+ }
+}
+
+static void __init init_send_mondo_info(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct trap_per_cpu *tb = &trap_block[cpu];
+
+ init_cpu_send_mondo_info(tb);
+ }
+}
+
+static struct irqaction timer_irq_action = {
+ .name = "timer",
+};
+
+/* Only invoked on boot processor. */
+void __init init_IRQ(void)
+{
+ unsigned long size;
+
+ map_prom_timers();
+ kill_prom_timer();
+
+ size = sizeof(struct ino_bucket) * NUM_IVECS;
+ ivector_table = alloc_bootmem(size);
+ if (!ivector_table) {
+ prom_printf("Fatal error, cannot allocate ivector_table\n");
+ prom_halt();
+ }
+ __flush_dcache_range((unsigned long) ivector_table,
+ ((unsigned long) ivector_table) + size);
+
+ ivector_table_pa = __pa(ivector_table);
+
+ if (tlb_type == hypervisor)
+ sun4v_init_mondo_queues();
+
+ init_send_mondo_info();
+
+ if (tlb_type == hypervisor) {
+ /* Load up the boot cpu's entries. */
+ sun4v_register_mondo_queues(hard_smp_processor_id());
+ }
+
+ /* We need to clear any IRQ's pending in the soft interrupt
+ * registers, a spurious one could be left around from the
+ * PROM timer which we just disabled.
+ */
+ clear_softint(get_softint());
+
+ /* Now that ivector table is initialized, it is safe
+ * to receive IRQ vector traps. We will normally take
+ * one or two right now, in case some device PROM used
+ * to boot us wants to speak to us. We just ignore them.
+ */
+ __asm__ __volatile__("rdpr %%pstate, %%g1\n\t"
+ "or %%g1, %0, %%g1\n\t"
+ "wrpr %%g1, 0x0, %%pstate"
+ : /* No outputs */
+ : "i" (PSTATE_IE)
+ : "g1");
+
+ irq_desc[0].action = &timer_irq_action;
+}
diff --git a/arch/sparc/kernel/itlb_miss.S b/arch/sparc/kernel/itlb_miss.S
new file mode 100644
index 000000000000..5a8377b54955
--- /dev/null
+++ b/arch/sparc/kernel/itlb_miss.S
@@ -0,0 +1,39 @@
+/* ITLB ** ICACHE line 1: Context 0 check and TSB load */
+ ldxa [%g0] ASI_IMMU_TSB_8KB_PTR, %g1 ! Get TSB 8K pointer
+ ldxa [%g0] ASI_IMMU, %g6 ! Get TAG TARGET
+ srlx %g6, 48, %g5 ! Get context
+ sllx %g6, 22, %g6 ! Zero out context
+ brz,pn %g5, kvmap_itlb ! Context 0 processing
+ srlx %g6, 22, %g6 ! Delay slot
+ TSB_LOAD_QUAD(%g1, %g4) ! Load TSB entry
+ cmp %g4, %g6 ! Compare TAG
+
+/* ITLB ** ICACHE line 2: TSB compare and TLB load */
+ bne,pn %xcc, tsb_miss_itlb ! Miss
+ mov FAULT_CODE_ITLB, %g3
+ sethi %hi(_PAGE_EXEC_4U), %g4
+ andcc %g5, %g4, %g0 ! Executable?
+ be,pn %xcc, tsb_do_fault
+ nop ! Delay slot, fill me
+ stxa %g5, [%g0] ASI_ITLB_DATA_IN ! Load TLB
+ retry ! Trap done
+
+/* ITLB ** ICACHE line 3: */
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+
+/* ITLB ** ICACHE line 4: */
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
diff --git a/arch/sparc/kernel/ivec.S b/arch/sparc/kernel/ivec.S
new file mode 100644
index 000000000000..d29f92ebca5e
--- /dev/null
+++ b/arch/sparc/kernel/ivec.S
@@ -0,0 +1,51 @@
+ /* The registers for cross calls will be:
+ *
+ * DATA 0: [low 32-bits] Address of function to call, jmp to this
+ * [high 32-bits] MMU Context Argument 0, place in %g5
+ * DATA 1: Address Argument 1, place in %g1
+ * DATA 2: Address Argument 2, place in %g7
+ *
+ * With this method we can do most of the cross-call tlb/cache
+ * flushing very quickly.
+ */
+ .align 32
+ .globl do_ivec
+ .type do_ivec,#function
+do_ivec:
+ mov 0x40, %g3
+ ldxa [%g3 + %g0] ASI_INTR_R, %g3
+ sethi %hi(KERNBASE), %g4
+ cmp %g3, %g4
+ bgeu,pn %xcc, do_ivec_xcall
+ srlx %g3, 32, %g5
+ stxa %g0, [%g0] ASI_INTR_RECEIVE
+ membar #Sync
+
+ sethi %hi(ivector_table_pa), %g2
+ ldx [%g2 + %lo(ivector_table_pa)], %g2
+ sllx %g3, 4, %g3
+ add %g2, %g3, %g3
+
+ TRAP_LOAD_IRQ_WORK_PA(%g6, %g1)
+
+ ldx [%g6], %g5
+ stxa %g5, [%g3] ASI_PHYS_USE_EC
+ stx %g3, [%g6]
+ wr %g0, 1 << PIL_DEVICE_IRQ, %set_softint
+ retry
+do_ivec_xcall:
+ mov 0x50, %g1
+ ldxa [%g1 + %g0] ASI_INTR_R, %g1
+ srl %g3, 0, %g3
+
+ mov 0x60, %g7
+ ldxa [%g7 + %g0] ASI_INTR_R, %g7
+ stxa %g0, [%g0] ASI_INTR_RECEIVE
+ membar #Sync
+ ba,pt %xcc, 1f
+ nop
+
+ .align 32
+1: jmpl %g3, %g0
+ nop
+ .size do_ivec,.-do_ivec
diff --git a/arch/sparc/kernel/kgdb_64.c b/arch/sparc/kernel/kgdb_64.c
new file mode 100644
index 000000000000..fefbe6dc51be
--- /dev/null
+++ b/arch/sparc/kernel/kgdb_64.c
@@ -0,0 +1,186 @@
+/* kgdb.c: KGDB support for 64-bit sparc.
+ *
+ * Copyright (C) 2008 David S. Miller <davem@davemloft.net>
+ */
+
+#include <linux/kgdb.h>
+#include <linux/kdebug.h>
+
+#include <asm/kdebug.h>
+#include <asm/ptrace.h>
+#include <asm/irq.h>
+
+void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
+{
+ struct reg_window *win;
+ int i;
+
+ gdb_regs[GDB_G0] = 0;
+ for (i = 0; i < 15; i++)
+ gdb_regs[GDB_G1 + i] = regs->u_regs[UREG_G1 + i];
+
+ win = (struct reg_window *) (regs->u_regs[UREG_FP] + STACK_BIAS);
+ for (i = 0; i < 8; i++)
+ gdb_regs[GDB_L0 + i] = win->locals[i];
+ for (i = 0; i < 8; i++)
+ gdb_regs[GDB_I0 + i] = win->ins[i];
+
+ for (i = GDB_F0; i <= GDB_F62; i++)
+ gdb_regs[i] = 0;
+
+ gdb_regs[GDB_PC] = regs->tpc;
+ gdb_regs[GDB_NPC] = regs->tnpc;
+ gdb_regs[GDB_STATE] = regs->tstate;
+ gdb_regs[GDB_FSR] = 0;
+ gdb_regs[GDB_FPRS] = 0;
+ gdb_regs[GDB_Y] = regs->y;
+}
+
+void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
+{
+ struct thread_info *t = task_thread_info(p);
+ extern unsigned int switch_to_pc;
+ extern unsigned int ret_from_syscall;
+ struct reg_window *win;
+ unsigned long pc, cwp;
+ int i;
+
+ for (i = GDB_G0; i < GDB_G6; i++)
+ gdb_regs[i] = 0;
+ gdb_regs[GDB_G6] = (unsigned long) t;
+ gdb_regs[GDB_G7] = (unsigned long) p;
+ for (i = GDB_O0; i < GDB_SP; i++)
+ gdb_regs[i] = 0;
+ gdb_regs[GDB_SP] = t->ksp;
+ gdb_regs[GDB_O7] = 0;
+
+ win = (struct reg_window *) (t->ksp + STACK_BIAS);
+ for (i = 0; i < 8; i++)
+ gdb_regs[GDB_L0 + i] = win->locals[i];
+ for (i = 0; i < 8; i++)
+ gdb_regs[GDB_I0 + i] = win->ins[i];
+
+ for (i = GDB_F0; i <= GDB_F62; i++)
+ gdb_regs[i] = 0;
+
+ if (t->new_child)
+ pc = (unsigned long) &ret_from_syscall;
+ else
+ pc = (unsigned long) &switch_to_pc;
+
+ gdb_regs[GDB_PC] = pc;
+ gdb_regs[GDB_NPC] = pc + 4;
+
+ cwp = __thread_flag_byte_ptr(t)[TI_FLAG_BYTE_CWP];
+
+ gdb_regs[GDB_STATE] = (TSTATE_PRIV | TSTATE_IE | cwp);
+ gdb_regs[GDB_FSR] = 0;
+ gdb_regs[GDB_FPRS] = 0;
+ gdb_regs[GDB_Y] = 0;
+}
+
+void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs)
+{
+ struct reg_window *win;
+ int i;
+
+ for (i = 0; i < 15; i++)
+ regs->u_regs[UREG_G1 + i] = gdb_regs[GDB_G1 + i];
+
+ /* If the TSTATE register is changing, we have to preserve
+ * the CWP field, otherwise window save/restore explodes.
+ */
+ if (regs->tstate != gdb_regs[GDB_STATE]) {
+ unsigned long cwp = regs->tstate & TSTATE_CWP;
+
+ regs->tstate = (gdb_regs[GDB_STATE] & ~TSTATE_CWP) | cwp;
+ }
+
+ regs->tpc = gdb_regs[GDB_PC];
+ regs->tnpc = gdb_regs[GDB_NPC];
+ regs->y = gdb_regs[GDB_Y];
+
+ win = (struct reg_window *) (regs->u_regs[UREG_FP] + STACK_BIAS);
+ for (i = 0; i < 8; i++)
+ win->locals[i] = gdb_regs[GDB_L0 + i];
+ for (i = 0; i < 8; i++)
+ win->ins[i] = gdb_regs[GDB_I0 + i];
+}
+
+#ifdef CONFIG_SMP
+void smp_kgdb_capture_client(struct pt_regs *regs)
+{
+ unsigned long flags;
+
+ __asm__ __volatile__("rdpr %%pstate, %0\n\t"
+ "wrpr %0, %1, %%pstate"
+ : "=r" (flags)
+ : "i" (PSTATE_IE));
+
+ flushw_all();
+
+ if (atomic_read(&kgdb_active) != -1)
+ kgdb_nmicallback(raw_smp_processor_id(), regs);
+
+ __asm__ __volatile__("wrpr %0, 0, %%pstate"
+ : : "r" (flags));
+}
+#endif
+
+int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
+ char *remcomInBuffer, char *remcomOutBuffer,
+ struct pt_regs *linux_regs)
+{
+ unsigned long addr;
+ char *ptr;
+
+ switch (remcomInBuffer[0]) {
+ case 'c':
+ /* try to read optional parameter, pc unchanged if no parm */
+ ptr = &remcomInBuffer[1];
+ if (kgdb_hex2long(&ptr, &addr)) {
+ linux_regs->tpc = addr;
+ linux_regs->tnpc = addr + 4;
+ }
+ /* fallthru */
+
+ case 'D':
+ case 'k':
+ if (linux_regs->tpc == (unsigned long) arch_kgdb_breakpoint) {
+ linux_regs->tpc = linux_regs->tnpc;
+ linux_regs->tnpc += 4;
+ }
+ return 0;
+ }
+ return -1;
+}
+
+asmlinkage void kgdb_trap(unsigned long trap_level, struct pt_regs *regs)
+{
+ unsigned long flags;
+
+ if (user_mode(regs)) {
+ bad_trap(regs, trap_level);
+ return;
+ }
+
+ flushw_all();
+
+ local_irq_save(flags);
+ kgdb_handle_exception(0x172, SIGTRAP, 0, regs);
+ local_irq_restore(flags);
+}
+
+int kgdb_arch_init(void)
+{
+ return 0;
+}
+
+void kgdb_arch_exit(void)
+{
+}
+
+struct kgdb_arch arch_kgdb_ops = {
+ /* Breakpoint instruction: ta 0x72 */
+ .gdb_bpt_instr = { 0x91, 0xd0, 0x20, 0x72 },
+};
diff --git a/arch/sparc/kernel/kprobes.c b/arch/sparc/kernel/kprobes.c
new file mode 100644
index 000000000000..201a6e547e4a
--- /dev/null
+++ b/arch/sparc/kernel/kprobes.c
@@ -0,0 +1,593 @@
+/* arch/sparc64/kernel/kprobes.c
+ *
+ * Copyright (C) 2004 David S. Miller <davem@davemloft.net>
+ */
+
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/kdebug.h>
+#include <asm/signal.h>
+#include <asm/cacheflush.h>
+#include <asm/uaccess.h>
+
+/* We do not have hardware single-stepping on sparc64.
+ * So we implement software single-stepping with breakpoint
+ * traps. The top-level scheme is similar to that used
+ * in the x86 kprobes implementation.
+ *
+ * In the kprobe->ainsn.insn[] array we store the original
+ * instruction at index zero and a break instruction at
+ * index one.
+ *
+ * When we hit a kprobe we:
+ * - Run the pre-handler
+ * - Remember "regs->tnpc" and interrupt level stored in
+ * "regs->tstate" so we can restore them later
+ * - Disable PIL interrupts
+ * - Set regs->tpc to point to kprobe->ainsn.insn[0]
+ * - Set regs->tnpc to point to kprobe->ainsn.insn[1]
+ * - Mark that we are actively in a kprobe
+ *
+ * At this point we wait for the second breakpoint at
+ * kprobe->ainsn.insn[1] to hit. When it does we:
+ * - Run the post-handler
+ * - Set regs->tpc to "remembered" regs->tnpc stored above,
+ * restore the PIL interrupt level in "regs->tstate" as well
+ * - Make any adjustments necessary to regs->tnpc in order
+ * to handle relative branches correctly. See below.
+ * - Mark that we are no longer actively in a kprobe.
+ */
+
+DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
+struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
+
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
+{
+ p->ainsn.insn[0] = *p->addr;
+ flushi(&p->ainsn.insn[0]);
+
+ p->ainsn.insn[1] = BREAKPOINT_INSTRUCTION_2;
+ flushi(&p->ainsn.insn[1]);
+
+ p->opcode = *p->addr;
+ return 0;
+}
+
+void __kprobes arch_arm_kprobe(struct kprobe *p)
+{
+ *p->addr = BREAKPOINT_INSTRUCTION;
+ flushi(p->addr);
+}
+
+void __kprobes arch_disarm_kprobe(struct kprobe *p)
+{
+ *p->addr = p->opcode;
+ flushi(p->addr);
+}
+
+static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+ kcb->prev_kprobe.kp = kprobe_running();
+ kcb->prev_kprobe.status = kcb->kprobe_status;
+ kcb->prev_kprobe.orig_tnpc = kcb->kprobe_orig_tnpc;
+ kcb->prev_kprobe.orig_tstate_pil = kcb->kprobe_orig_tstate_pil;
+}
+
+static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+ __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
+ kcb->kprobe_status = kcb->prev_kprobe.status;
+ kcb->kprobe_orig_tnpc = kcb->prev_kprobe.orig_tnpc;
+ kcb->kprobe_orig_tstate_pil = kcb->prev_kprobe.orig_tstate_pil;
+}
+
+static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+{
+ __get_cpu_var(current_kprobe) = p;
+ kcb->kprobe_orig_tnpc = regs->tnpc;
+ kcb->kprobe_orig_tstate_pil = (regs->tstate & TSTATE_PIL);
+}
+
+static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+{
+ regs->tstate |= TSTATE_PIL;
+
+ /*single step inline, if it a breakpoint instruction*/
+ if (p->opcode == BREAKPOINT_INSTRUCTION) {
+ regs->tpc = (unsigned long) p->addr;
+ regs->tnpc = kcb->kprobe_orig_tnpc;
+ } else {
+ regs->tpc = (unsigned long) &p->ainsn.insn[0];
+ regs->tnpc = (unsigned long) &p->ainsn.insn[1];
+ }
+}
+
+static int __kprobes kprobe_handler(struct pt_regs *regs)
+{
+ struct kprobe *p;
+ void *addr = (void *) regs->tpc;
+ int ret = 0;
+ struct kprobe_ctlblk *kcb;
+
+ /*
+ * We don't want to be preempted for the entire
+ * duration of kprobe processing
+ */
+ preempt_disable();
+ kcb = get_kprobe_ctlblk();
+
+ if (kprobe_running()) {
+ p = get_kprobe(addr);
+ if (p) {
+ if (kcb->kprobe_status == KPROBE_HIT_SS) {
+ regs->tstate = ((regs->tstate & ~TSTATE_PIL) |
+ kcb->kprobe_orig_tstate_pil);
+ goto no_kprobe;
+ }
+ /* We have reentered the kprobe_handler(), since
+ * another probe was hit while within the handler.
+ * We here save the original kprobes variables and
+ * just single step on the instruction of the new probe
+ * without calling any user handlers.
+ */
+ save_previous_kprobe(kcb);
+ set_current_kprobe(p, regs, kcb);
+ kprobes_inc_nmissed_count(p);
+ kcb->kprobe_status = KPROBE_REENTER;
+ prepare_singlestep(p, regs, kcb);
+ return 1;
+ } else {
+ if (*(u32 *)addr != BREAKPOINT_INSTRUCTION) {
+ /* The breakpoint instruction was removed by
+ * another cpu right after we hit, no further
+ * handling of this interrupt is appropriate
+ */
+ ret = 1;
+ goto no_kprobe;
+ }
+ p = __get_cpu_var(current_kprobe);
+ if (p->break_handler && p->break_handler(p, regs))
+ goto ss_probe;
+ }
+ goto no_kprobe;
+ }
+
+ p = get_kprobe(addr);
+ if (!p) {
+ if (*(u32 *)addr != BREAKPOINT_INSTRUCTION) {
+ /*
+ * The breakpoint instruction was removed right
+ * after we hit it. Another cpu has removed
+ * either a probepoint or a debugger breakpoint
+ * at this address. In either case, no further
+ * handling of this interrupt is appropriate.
+ */
+ ret = 1;
+ }
+ /* Not one of ours: let kernel handle it */
+ goto no_kprobe;
+ }
+
+ set_current_kprobe(p, regs, kcb);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+ if (p->pre_handler && p->pre_handler(p, regs))
+ return 1;
+
+ss_probe:
+ prepare_singlestep(p, regs, kcb);
+ kcb->kprobe_status = KPROBE_HIT_SS;
+ return 1;
+
+no_kprobe:
+ preempt_enable_no_resched();
+ return ret;
+}
+
+/* If INSN is a relative control transfer instruction,
+ * return the corrected branch destination value.
+ *
+ * regs->tpc and regs->tnpc still hold the values of the
+ * program counters at the time of trap due to the execution
+ * of the BREAKPOINT_INSTRUCTION_2 at p->ainsn.insn[1]
+ *
+ */
+static unsigned long __kprobes relbranch_fixup(u32 insn, struct kprobe *p,
+ struct pt_regs *regs)
+{
+ unsigned long real_pc = (unsigned long) p->addr;
+
+ /* Branch not taken, no mods necessary. */
+ if (regs->tnpc == regs->tpc + 0x4UL)
+ return real_pc + 0x8UL;
+
+ /* The three cases are call, branch w/prediction,
+ * and traditional branch.
+ */
+ if ((insn & 0xc0000000) == 0x40000000 ||
+ (insn & 0xc1c00000) == 0x00400000 ||
+ (insn & 0xc1c00000) == 0x00800000) {
+ unsigned long ainsn_addr;
+
+ ainsn_addr = (unsigned long) &p->ainsn.insn[0];
+
+ /* The instruction did all the work for us
+ * already, just apply the offset to the correct
+ * instruction location.
+ */
+ return (real_pc + (regs->tnpc - ainsn_addr));
+ }
+
+ /* It is jmpl or some other absolute PC modification instruction,
+ * leave NPC as-is.
+ */
+ return regs->tnpc;
+}
+
+/* If INSN is an instruction which writes it's PC location
+ * into a destination register, fix that up.
+ */
+static void __kprobes retpc_fixup(struct pt_regs *regs, u32 insn,
+ unsigned long real_pc)
+{
+ unsigned long *slot = NULL;
+
+ /* Simplest case is 'call', which always uses %o7 */
+ if ((insn & 0xc0000000) == 0x40000000) {
+ slot = &regs->u_regs[UREG_I7];
+ }
+
+ /* 'jmpl' encodes the register inside of the opcode */
+ if ((insn & 0xc1f80000) == 0x81c00000) {
+ unsigned long rd = ((insn >> 25) & 0x1f);
+
+ if (rd <= 15) {
+ slot = &regs->u_regs[rd];
+ } else {
+ /* Hard case, it goes onto the stack. */
+ flushw_all();
+
+ rd -= 16;
+ slot = (unsigned long *)
+ (regs->u_regs[UREG_FP] + STACK_BIAS);
+ slot += rd;
+ }
+ }
+ if (slot != NULL)
+ *slot = real_pc;
+}
+
+/*
+ * Called after single-stepping. p->addr is the address of the
+ * instruction which has been replaced by the breakpoint
+ * instruction. To avoid the SMP problems that can occur when we
+ * temporarily put back the original opcode to single-step, we
+ * single-stepped a copy of the instruction. The address of this
+ * copy is &p->ainsn.insn[0].
+ *
+ * This function prepares to return from the post-single-step
+ * breakpoint trap.
+ */
+static void __kprobes resume_execution(struct kprobe *p,
+ struct pt_regs *regs, struct kprobe_ctlblk *kcb)
+{
+ u32 insn = p->ainsn.insn[0];
+
+ regs->tnpc = relbranch_fixup(insn, p, regs);
+
+ /* This assignment must occur after relbranch_fixup() */
+ regs->tpc = kcb->kprobe_orig_tnpc;
+
+ retpc_fixup(regs, insn, (unsigned long) p->addr);
+
+ regs->tstate = ((regs->tstate & ~TSTATE_PIL) |
+ kcb->kprobe_orig_tstate_pil);
+}
+
+static int __kprobes post_kprobe_handler(struct pt_regs *regs)
+{
+ struct kprobe *cur = kprobe_running();
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ if (!cur)
+ return 0;
+
+ if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ cur->post_handler(cur, regs, 0);
+ }
+
+ resume_execution(cur, regs, kcb);
+
+ /*Restore back the original saved kprobes variables and continue. */
+ if (kcb->kprobe_status == KPROBE_REENTER) {
+ restore_previous_kprobe(kcb);
+ goto out;
+ }
+ reset_current_kprobe();
+out:
+ preempt_enable_no_resched();
+
+ return 1;
+}
+
+int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+{
+ struct kprobe *cur = kprobe_running();
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+ const struct exception_table_entry *entry;
+
+ switch(kcb->kprobe_status) {
+ case KPROBE_HIT_SS:
+ case KPROBE_REENTER:
+ /*
+ * We are here because the instruction being single
+ * stepped caused a page fault. We reset the current
+ * kprobe and the tpc points back to the probe address
+ * and allow the page fault handler to continue as a
+ * normal page fault.
+ */
+ regs->tpc = (unsigned long)cur->addr;
+ regs->tnpc = kcb->kprobe_orig_tnpc;
+ regs->tstate = ((regs->tstate & ~TSTATE_PIL) |
+ kcb->kprobe_orig_tstate_pil);
+ if (kcb->kprobe_status == KPROBE_REENTER)
+ restore_previous_kprobe(kcb);
+ else
+ reset_current_kprobe();
+ preempt_enable_no_resched();
+ break;
+ case KPROBE_HIT_ACTIVE:
+ case KPROBE_HIT_SSDONE:
+ /*
+ * We increment the nmissed count for accounting,
+ * we can also use npre/npostfault count for accouting
+ * these specific fault cases.
+ */
+ kprobes_inc_nmissed_count(cur);
+
+ /*
+ * We come here because instructions in the pre/post
+ * handler caused the page_fault, this could happen
+ * if handler tries to access user space by
+ * copy_from_user(), get_user() etc. Let the
+ * user-specified handler try to fix it first.
+ */
+ if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
+ return 1;
+
+ /*
+ * In case the user-specified fault handler returned
+ * zero, try to fix up.
+ */
+
+ entry = search_exception_tables(regs->tpc);
+ if (entry) {
+ regs->tpc = entry->fixup;
+ regs->tnpc = regs->tpc + 4;
+ return 1;
+ }
+
+ /*
+ * fixup_exception() could not handle it,
+ * Let do_page_fault() fix it.
+ */
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+/*
+ * Wrapper routine to for handling exceptions.
+ */
+int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
+ unsigned long val, void *data)
+{
+ struct die_args *args = (struct die_args *)data;
+ int ret = NOTIFY_DONE;
+
+ if (args->regs && user_mode(args->regs))
+ return ret;
+
+ switch (val) {
+ case DIE_DEBUG:
+ if (kprobe_handler(args->regs))
+ ret = NOTIFY_STOP;
+ break;
+ case DIE_DEBUG_2:
+ if (post_kprobe_handler(args->regs))
+ ret = NOTIFY_STOP;
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+asmlinkage void __kprobes kprobe_trap(unsigned long trap_level,
+ struct pt_regs *regs)
+{
+ BUG_ON(trap_level != 0x170 && trap_level != 0x171);
+
+ if (user_mode(regs)) {
+ local_irq_enable();
+ bad_trap(regs, trap_level);
+ return;
+ }
+
+ /* trap_level == 0x170 --> ta 0x70
+ * trap_level == 0x171 --> ta 0x71
+ */
+ if (notify_die((trap_level == 0x170) ? DIE_DEBUG : DIE_DEBUG_2,
+ (trap_level == 0x170) ? "debug" : "debug_2",
+ regs, 0, trap_level, SIGTRAP) != NOTIFY_STOP)
+ bad_trap(regs, trap_level);
+}
+
+/* Jprobes support. */
+int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct jprobe *jp = container_of(p, struct jprobe, kp);
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ memcpy(&(kcb->jprobe_saved_regs), regs, sizeof(*regs));
+
+ regs->tpc = (unsigned long) jp->entry;
+ regs->tnpc = ((unsigned long) jp->entry) + 0x4UL;
+ regs->tstate |= TSTATE_PIL;
+
+ return 1;
+}
+
+void __kprobes jprobe_return(void)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+ register unsigned long orig_fp asm("g1");
+
+ orig_fp = kcb->jprobe_saved_regs.u_regs[UREG_FP];
+ __asm__ __volatile__("\n"
+"1: cmp %%sp, %0\n\t"
+ "blu,a,pt %%xcc, 1b\n\t"
+ " restore\n\t"
+ ".globl jprobe_return_trap_instruction\n"
+"jprobe_return_trap_instruction:\n\t"
+ "ta 0x70"
+ : /* no outputs */
+ : "r" (orig_fp));
+}
+
+extern void jprobe_return_trap_instruction(void);
+
+int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ u32 *addr = (u32 *) regs->tpc;
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ if (addr == (u32 *) jprobe_return_trap_instruction) {
+ memcpy(regs, &(kcb->jprobe_saved_regs), sizeof(*regs));
+ preempt_enable_no_resched();
+ return 1;
+ }
+ return 0;
+}
+
+/* The value stored in the return address register is actually 2
+ * instructions before where the callee will return to.
+ * Sequences usually look something like this
+ *
+ * call some_function <--- return register points here
+ * nop <--- call delay slot
+ * whatever <--- where callee returns to
+ *
+ * To keep trampoline_probe_handler logic simpler, we normalize the
+ * value kept in ri->ret_addr so we don't need to keep adjusting it
+ * back and forth.
+ */
+void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
+ struct pt_regs *regs)
+{
+ ri->ret_addr = (kprobe_opcode_t *)(regs->u_regs[UREG_RETPC] + 8);
+
+ /* Replace the return addr with trampoline addr */
+ regs->u_regs[UREG_RETPC] =
+ ((unsigned long)kretprobe_trampoline) - 8;
+}
+
+/*
+ * Called when the probe at kretprobe trampoline is hit
+ */
+int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct kretprobe_instance *ri = NULL;
+ struct hlist_head *head, empty_rp;
+ struct hlist_node *node, *tmp;
+ unsigned long flags, orig_ret_address = 0;
+ unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
+
+ INIT_HLIST_HEAD(&empty_rp);
+ kretprobe_hash_lock(current, &head, &flags);
+
+ /*
+ * It is possible to have multiple instances associated with a given
+ * task either because an multiple functions in the call path
+ * have a return probe installed on them, and/or more then one return
+ * return probe was registered for a target function.
+ *
+ * We can handle this because:
+ * - instances are always inserted at the head of the list
+ * - when multiple return probes are registered for the same
+ * function, the first instance's ret_addr will point to the
+ * real return address, and all the rest will point to
+ * kretprobe_trampoline
+ */
+ hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+ if (ri->task != current)
+ /* another task is sharing our hash bucket */
+ continue;
+
+ if (ri->rp && ri->rp->handler)
+ ri->rp->handler(ri, regs);
+
+ orig_ret_address = (unsigned long)ri->ret_addr;
+ recycle_rp_inst(ri, &empty_rp);
+
+ if (orig_ret_address != trampoline_address)
+ /*
+ * This is the real return address. Any other
+ * instances associated with this task are for
+ * other calls deeper on the call stack
+ */
+ break;
+ }
+
+ kretprobe_assert(ri, orig_ret_address, trampoline_address);
+ regs->tpc = orig_ret_address;
+ regs->tnpc = orig_ret_address + 4;
+
+ reset_current_kprobe();
+ kretprobe_hash_unlock(current, &flags);
+ preempt_enable_no_resched();
+
+ hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+ hlist_del(&ri->hlist);
+ kfree(ri);
+ }
+ /*
+ * By returning a non-zero value, we are telling
+ * kprobe_handler() that we don't want the post_handler
+ * to run (and have re-enabled preemption)
+ */
+ return 1;
+}
+
+void kretprobe_trampoline_holder(void)
+{
+ asm volatile(".global kretprobe_trampoline\n"
+ "kretprobe_trampoline:\n"
+ "\tnop\n"
+ "\tnop\n");
+}
+static struct kprobe trampoline_p = {
+ .addr = (kprobe_opcode_t *) &kretprobe_trampoline,
+ .pre_handler = trampoline_probe_handler
+};
+
+int __init arch_init_kprobes(void)
+{
+ return register_kprobe(&trampoline_p);
+}
+
+int __kprobes arch_trampoline_kprobe(struct kprobe *p)
+{
+ if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline)
+ return 1;
+
+ return 0;
+}
diff --git a/arch/sparc/kernel/kstack.h b/arch/sparc/kernel/kstack.h
new file mode 100644
index 000000000000..4248d969272f
--- /dev/null
+++ b/arch/sparc/kernel/kstack.h
@@ -0,0 +1,60 @@
+#ifndef _KSTACK_H
+#define _KSTACK_H
+
+#include <linux/thread_info.h>
+#include <linux/sched.h>
+#include <asm/ptrace.h>
+#include <asm/irq.h>
+
+/* SP must be STACK_BIAS adjusted already. */
+static inline bool kstack_valid(struct thread_info *tp, unsigned long sp)
+{
+ unsigned long base = (unsigned long) tp;
+
+ if (sp >= (base + sizeof(struct thread_info)) &&
+ sp <= (base + THREAD_SIZE - sizeof(struct sparc_stackf)))
+ return true;
+
+ if (hardirq_stack[tp->cpu]) {
+ base = (unsigned long) hardirq_stack[tp->cpu];
+ if (sp >= base &&
+ sp <= (base + THREAD_SIZE - sizeof(struct sparc_stackf)))
+ return true;
+ base = (unsigned long) softirq_stack[tp->cpu];
+ if (sp >= base &&
+ sp <= (base + THREAD_SIZE - sizeof(struct sparc_stackf)))
+ return true;
+ }
+ return false;
+}
+
+/* Does "regs" point to a valid pt_regs trap frame? */
+static inline bool kstack_is_trap_frame(struct thread_info *tp, struct pt_regs *regs)
+{
+ unsigned long base = (unsigned long) tp;
+ unsigned long addr = (unsigned long) regs;
+
+ if (addr >= base &&
+ addr <= (base + THREAD_SIZE - sizeof(*regs)))
+ goto check_magic;
+
+ if (hardirq_stack[tp->cpu]) {
+ base = (unsigned long) hardirq_stack[tp->cpu];
+ if (addr >= base &&
+ addr <= (base + THREAD_SIZE - sizeof(*regs)))
+ goto check_magic;
+ base = (unsigned long) softirq_stack[tp->cpu];
+ if (addr >= base &&
+ addr <= (base + THREAD_SIZE - sizeof(*regs)))
+ goto check_magic;
+ }
+ return false;
+
+check_magic:
+ if ((regs->magic & ~0x1ff) == PT_REGS_MAGIC)
+ return true;
+ return false;
+
+}
+
+#endif /* _KSTACK_H */
diff --git a/arch/sparc/kernel/ktlb.S b/arch/sparc/kernel/ktlb.S
new file mode 100644
index 000000000000..cef8defcd7a9
--- /dev/null
+++ b/arch/sparc/kernel/ktlb.S
@@ -0,0 +1,304 @@
+/* arch/sparc64/kernel/ktlb.S: Kernel mapping TLB miss handling.
+ *
+ * Copyright (C) 1995, 1997, 2005, 2008 David S. Miller <davem@davemloft.net>
+ * Copyright (C) 1996 Eddie C. Dost (ecd@brainaid.de)
+ * Copyright (C) 1996 Miguel de Icaza (miguel@nuclecu.unam.mx)
+ * Copyright (C) 1996,98,99 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ */
+
+#include <asm/head.h>
+#include <asm/asi.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/tsb.h>
+
+ .text
+ .align 32
+
+kvmap_itlb:
+ /* g6: TAG TARGET */
+ mov TLB_TAG_ACCESS, %g4
+ ldxa [%g4] ASI_IMMU, %g4
+
+ /* sun4v_itlb_miss branches here with the missing virtual
+ * address already loaded into %g4
+ */
+kvmap_itlb_4v:
+
+kvmap_itlb_nonlinear:
+ /* Catch kernel NULL pointer calls. */
+ sethi %hi(PAGE_SIZE), %g5
+ cmp %g4, %g5
+ bleu,pn %xcc, kvmap_dtlb_longpath
+ nop
+
+ KERN_TSB_LOOKUP_TL1(%g4, %g6, %g5, %g1, %g2, %g3, kvmap_itlb_load)
+
+kvmap_itlb_tsb_miss:
+ sethi %hi(LOW_OBP_ADDRESS), %g5
+ cmp %g4, %g5
+ blu,pn %xcc, kvmap_itlb_vmalloc_addr
+ mov 0x1, %g5
+ sllx %g5, 32, %g5
+ cmp %g4, %g5
+ blu,pn %xcc, kvmap_itlb_obp
+ nop
+
+kvmap_itlb_vmalloc_addr:
+ KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_itlb_longpath)
+
+ KTSB_LOCK_TAG(%g1, %g2, %g7)
+
+ /* Load and check PTE. */
+ ldxa [%g5] ASI_PHYS_USE_EC, %g5
+ mov 1, %g7
+ sllx %g7, TSB_TAG_INVALID_BIT, %g7
+ brgez,a,pn %g5, kvmap_itlb_longpath
+ KTSB_STORE(%g1, %g7)
+
+ KTSB_WRITE(%g1, %g5, %g6)
+
+ /* fallthrough to TLB load */
+
+kvmap_itlb_load:
+
+661: stxa %g5, [%g0] ASI_ITLB_DATA_IN
+ retry
+ .section .sun4v_2insn_patch, "ax"
+ .word 661b
+ nop
+ nop
+ .previous
+
+ /* For sun4v the ASI_ITLB_DATA_IN store and the retry
+ * instruction get nop'd out and we get here to branch
+ * to the sun4v tlb load code. The registers are setup
+ * as follows:
+ *
+ * %g4: vaddr
+ * %g5: PTE
+ * %g6: TAG
+ *
+ * The sun4v TLB load wants the PTE in %g3 so we fix that
+ * up here.
+ */
+ ba,pt %xcc, sun4v_itlb_load
+ mov %g5, %g3
+
+kvmap_itlb_longpath:
+
+661: rdpr %pstate, %g5
+ wrpr %g5, PSTATE_AG | PSTATE_MG, %pstate
+ .section .sun4v_2insn_patch, "ax"
+ .word 661b
+ SET_GL(1)
+ nop
+ .previous
+
+ rdpr %tpc, %g5
+ ba,pt %xcc, sparc64_realfault_common
+ mov FAULT_CODE_ITLB, %g4
+
+kvmap_itlb_obp:
+ OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_itlb_longpath)
+
+ KTSB_LOCK_TAG(%g1, %g2, %g7)
+
+ KTSB_WRITE(%g1, %g5, %g6)
+
+ ba,pt %xcc, kvmap_itlb_load
+ nop
+
+kvmap_dtlb_obp:
+ OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_dtlb_longpath)
+
+ KTSB_LOCK_TAG(%g1, %g2, %g7)
+
+ KTSB_WRITE(%g1, %g5, %g6)
+
+ ba,pt %xcc, kvmap_dtlb_load
+ nop
+
+ .align 32
+kvmap_dtlb_tsb4m_load:
+ KTSB_LOCK_TAG(%g1, %g2, %g7)
+ KTSB_WRITE(%g1, %g5, %g6)
+ ba,pt %xcc, kvmap_dtlb_load
+ nop
+
+kvmap_dtlb:
+ /* %g6: TAG TARGET */
+ mov TLB_TAG_ACCESS, %g4
+ ldxa [%g4] ASI_DMMU, %g4
+
+ /* sun4v_dtlb_miss branches here with the missing virtual
+ * address already loaded into %g4
+ */
+kvmap_dtlb_4v:
+ brgez,pn %g4, kvmap_dtlb_nonlinear
+ nop
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ /* Index through the base page size TSB even for linear
+ * mappings when using page allocation debugging.
+ */
+ KERN_TSB_LOOKUP_TL1(%g4, %g6, %g5, %g1, %g2, %g3, kvmap_dtlb_load)
+#else
+ /* Correct TAG_TARGET is already in %g6, check 4mb TSB. */
+ KERN_TSB4M_LOOKUP_TL1(%g6, %g5, %g1, %g2, %g3, kvmap_dtlb_load)
+#endif
+ /* TSB entry address left in %g1, lookup linear PTE.
+ * Must preserve %g1 and %g6 (TAG).
+ */
+kvmap_dtlb_tsb4m_miss:
+ sethi %hi(kpte_linear_bitmap), %g2
+ or %g2, %lo(kpte_linear_bitmap), %g2
+
+ /* Clear the PAGE_OFFSET top virtual bits, then shift
+ * down to get a 256MB physical address index.
+ */
+ sllx %g4, 21, %g5
+ mov 1, %g7
+ srlx %g5, 21 + 28, %g5
+
+ /* Don't try this at home kids... this depends upon srlx
+ * only taking the low 6 bits of the shift count in %g5.
+ */
+ sllx %g7, %g5, %g7
+
+ /* Divide by 64 to get the offset into the bitmask. */
+ srlx %g5, 6, %g5
+ sllx %g5, 3, %g5
+
+ /* kern_linear_pte_xor[((mask & bit) ? 1 : 0)] */
+ ldx [%g2 + %g5], %g2
+ andcc %g2, %g7, %g0
+ sethi %hi(kern_linear_pte_xor), %g5
+ or %g5, %lo(kern_linear_pte_xor), %g5
+ bne,a,pt %xcc, 1f
+ add %g5, 8, %g5
+
+1: ldx [%g5], %g2
+
+ .globl kvmap_linear_patch
+kvmap_linear_patch:
+ ba,pt %xcc, kvmap_dtlb_tsb4m_load
+ xor %g2, %g4, %g5
+
+kvmap_dtlb_vmalloc_addr:
+ KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath)
+
+ KTSB_LOCK_TAG(%g1, %g2, %g7)
+
+ /* Load and check PTE. */
+ ldxa [%g5] ASI_PHYS_USE_EC, %g5
+ mov 1, %g7
+ sllx %g7, TSB_TAG_INVALID_BIT, %g7
+ brgez,a,pn %g5, kvmap_dtlb_longpath
+ KTSB_STORE(%g1, %g7)
+
+ KTSB_WRITE(%g1, %g5, %g6)
+
+ /* fallthrough to TLB load */
+
+kvmap_dtlb_load:
+
+661: stxa %g5, [%g0] ASI_DTLB_DATA_IN ! Reload TLB
+ retry
+ .section .sun4v_2insn_patch, "ax"
+ .word 661b
+ nop
+ nop
+ .previous
+
+ /* For sun4v the ASI_DTLB_DATA_IN store and the retry
+ * instruction get nop'd out and we get here to branch
+ * to the sun4v tlb load code. The registers are setup
+ * as follows:
+ *
+ * %g4: vaddr
+ * %g5: PTE
+ * %g6: TAG
+ *
+ * The sun4v TLB load wants the PTE in %g3 so we fix that
+ * up here.
+ */
+ ba,pt %xcc, sun4v_dtlb_load
+ mov %g5, %g3
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+kvmap_vmemmap:
+ sub %g4, %g5, %g5
+ srlx %g5, 22, %g5
+ sethi %hi(vmemmap_table), %g1
+ sllx %g5, 3, %g5
+ or %g1, %lo(vmemmap_table), %g1
+ ba,pt %xcc, kvmap_dtlb_load
+ ldx [%g1 + %g5], %g5
+#endif
+
+kvmap_dtlb_nonlinear:
+ /* Catch kernel NULL pointer derefs. */
+ sethi %hi(PAGE_SIZE), %g5
+ cmp %g4, %g5
+ bleu,pn %xcc, kvmap_dtlb_longpath
+ nop
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ /* Do not use the TSB for vmemmap. */
+ mov (VMEMMAP_BASE >> 24), %g5
+ sllx %g5, 24, %g5
+ cmp %g4,%g5
+ bgeu,pn %xcc, kvmap_vmemmap
+ nop
+#endif
+
+ KERN_TSB_LOOKUP_TL1(%g4, %g6, %g5, %g1, %g2, %g3, kvmap_dtlb_load)
+
+kvmap_dtlb_tsbmiss:
+ sethi %hi(MODULES_VADDR), %g5
+ cmp %g4, %g5
+ blu,pn %xcc, kvmap_dtlb_longpath
+ mov (VMALLOC_END >> 24), %g5
+ sllx %g5, 24, %g5
+ cmp %g4, %g5
+ bgeu,pn %xcc, kvmap_dtlb_longpath
+ nop
+
+kvmap_check_obp:
+ sethi %hi(LOW_OBP_ADDRESS), %g5
+ cmp %g4, %g5
+ blu,pn %xcc, kvmap_dtlb_vmalloc_addr
+ mov 0x1, %g5
+ sllx %g5, 32, %g5
+ cmp %g4, %g5
+ blu,pn %xcc, kvmap_dtlb_obp
+ nop
+ ba,pt %xcc, kvmap_dtlb_vmalloc_addr
+ nop
+
+kvmap_dtlb_longpath:
+
+661: rdpr %pstate, %g5
+ wrpr %g5, PSTATE_AG | PSTATE_MG, %pstate
+ .section .sun4v_2insn_patch, "ax"
+ .word 661b
+ SET_GL(1)
+ ldxa [%g0] ASI_SCRATCHPAD, %g5
+ .previous
+
+ rdpr %tl, %g3
+ cmp %g3, 1
+
+661: mov TLB_TAG_ACCESS, %g4
+ ldxa [%g4] ASI_DMMU, %g5
+ .section .sun4v_2insn_patch, "ax"
+ .word 661b
+ ldx [%g5 + HV_FAULT_D_ADDR_OFFSET], %g5
+ nop
+ .previous
+
+ be,pt %xcc, sparc64_realfault_common
+ mov FAULT_CODE_DTLB, %g4
+ ba,pt %xcc, winfix_trampoline
+ nop
diff --git a/arch/sparc/kernel/ldc.c b/arch/sparc/kernel/ldc.c
new file mode 100644
index 000000000000..d68982330f66
--- /dev/null
+++ b/arch/sparc/kernel/ldc.c
@@ -0,0 +1,2378 @@
+/* ldc.c: Logical Domain Channel link-layer protocol driver.
+ *
+ * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/scatterlist.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/init.h>
+
+#include <asm/hypervisor.h>
+#include <asm/iommu.h>
+#include <asm/page.h>
+#include <asm/ldc.h>
+#include <asm/mdesc.h>
+
+#define DRV_MODULE_NAME "ldc"
+#define PFX DRV_MODULE_NAME ": "
+#define DRV_MODULE_VERSION "1.1"
+#define DRV_MODULE_RELDATE "July 22, 2008"
+
+static char version[] __devinitdata =
+ DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
+#define LDC_PACKET_SIZE 64
+
+/* Packet header layout for unreliable and reliable mode frames.
+ * When in RAW mode, packets are simply straight 64-byte payloads
+ * with no headers.
+ */
+struct ldc_packet {
+ u8 type;
+#define LDC_CTRL 0x01
+#define LDC_DATA 0x02
+#define LDC_ERR 0x10
+
+ u8 stype;
+#define LDC_INFO 0x01
+#define LDC_ACK 0x02
+#define LDC_NACK 0x04
+
+ u8 ctrl;
+#define LDC_VERS 0x01 /* Link Version */
+#define LDC_RTS 0x02 /* Request To Send */
+#define LDC_RTR 0x03 /* Ready To Receive */
+#define LDC_RDX 0x04 /* Ready for Data eXchange */
+#define LDC_CTRL_MSK 0x0f
+
+ u8 env;
+#define LDC_LEN 0x3f
+#define LDC_FRAG_MASK 0xc0
+#define LDC_START 0x40
+#define LDC_STOP 0x80
+
+ u32 seqid;
+
+ union {
+ u8 u_data[LDC_PACKET_SIZE - 8];
+ struct {
+ u32 pad;
+ u32 ackid;
+ u8 r_data[LDC_PACKET_SIZE - 8 - 8];
+ } r;
+ } u;
+};
+
+struct ldc_version {
+ u16 major;
+ u16 minor;
+};
+
+/* Ordered from largest major to lowest. */
+static struct ldc_version ver_arr[] = {
+ { .major = 1, .minor = 0 },
+};
+
+#define LDC_DEFAULT_MTU (4 * LDC_PACKET_SIZE)
+#define LDC_DEFAULT_NUM_ENTRIES (PAGE_SIZE / LDC_PACKET_SIZE)
+
+struct ldc_channel;
+
+struct ldc_mode_ops {
+ int (*write)(struct ldc_channel *, const void *, unsigned int);
+ int (*read)(struct ldc_channel *, void *, unsigned int);
+};
+
+static const struct ldc_mode_ops raw_ops;
+static const struct ldc_mode_ops nonraw_ops;
+static const struct ldc_mode_ops stream_ops;
+
+int ldom_domaining_enabled;
+
+struct ldc_iommu {
+ /* Protects arena alloc/free. */
+ spinlock_t lock;
+ struct iommu_arena arena;
+ struct ldc_mtable_entry *page_table;
+};
+
+struct ldc_channel {
+ /* Protects all operations that depend upon channel state. */
+ spinlock_t lock;
+
+ unsigned long id;
+
+ u8 *mssbuf;
+ u32 mssbuf_len;
+ u32 mssbuf_off;
+
+ struct ldc_packet *tx_base;
+ unsigned long tx_head;
+ unsigned long tx_tail;
+ unsigned long tx_num_entries;
+ unsigned long tx_ra;
+
+ unsigned long tx_acked;
+
+ struct ldc_packet *rx_base;
+ unsigned long rx_head;
+ unsigned long rx_tail;
+ unsigned long rx_num_entries;
+ unsigned long rx_ra;
+
+ u32 rcv_nxt;
+ u32 snd_nxt;
+
+ unsigned long chan_state;
+
+ struct ldc_channel_config cfg;
+ void *event_arg;
+
+ const struct ldc_mode_ops *mops;
+
+ struct ldc_iommu iommu;
+
+ struct ldc_version ver;
+
+ u8 hs_state;
+#define LDC_HS_CLOSED 0x00
+#define LDC_HS_OPEN 0x01
+#define LDC_HS_GOTVERS 0x02
+#define LDC_HS_SENTRTR 0x03
+#define LDC_HS_GOTRTR 0x04
+#define LDC_HS_COMPLETE 0x10
+
+ u8 flags;
+#define LDC_FLAG_ALLOCED_QUEUES 0x01
+#define LDC_FLAG_REGISTERED_QUEUES 0x02
+#define LDC_FLAG_REGISTERED_IRQS 0x04
+#define LDC_FLAG_RESET 0x10
+
+ u8 mss;
+ u8 state;
+
+#define LDC_IRQ_NAME_MAX 32
+ char rx_irq_name[LDC_IRQ_NAME_MAX];
+ char tx_irq_name[LDC_IRQ_NAME_MAX];
+
+ struct hlist_head mh_list;
+
+ struct hlist_node list;
+};
+
+#define ldcdbg(TYPE, f, a...) \
+do { if (lp->cfg.debug & LDC_DEBUG_##TYPE) \
+ printk(KERN_INFO PFX "ID[%lu] " f, lp->id, ## a); \
+} while (0)
+
+static const char *state_to_str(u8 state)
+{
+ switch (state) {
+ case LDC_STATE_INVALID:
+ return "INVALID";
+ case LDC_STATE_INIT:
+ return "INIT";
+ case LDC_STATE_BOUND:
+ return "BOUND";
+ case LDC_STATE_READY:
+ return "READY";
+ case LDC_STATE_CONNECTED:
+ return "CONNECTED";
+ default:
+ return "<UNKNOWN>";
+ }
+}
+
+static void ldc_set_state(struct ldc_channel *lp, u8 state)
+{
+ ldcdbg(STATE, "STATE (%s) --> (%s)\n",
+ state_to_str(lp->state),
+ state_to_str(state));
+
+ lp->state = state;
+}
+
+static unsigned long __advance(unsigned long off, unsigned long num_entries)
+{
+ off += LDC_PACKET_SIZE;
+ if (off == (num_entries * LDC_PACKET_SIZE))
+ off = 0;
+
+ return off;
+}
+
+static unsigned long rx_advance(struct ldc_channel *lp, unsigned long off)
+{
+ return __advance(off, lp->rx_num_entries);
+}
+
+static unsigned long tx_advance(struct ldc_channel *lp, unsigned long off)
+{
+ return __advance(off, lp->tx_num_entries);
+}
+
+static struct ldc_packet *handshake_get_tx_packet(struct ldc_channel *lp,
+ unsigned long *new_tail)
+{
+ struct ldc_packet *p;
+ unsigned long t;
+
+ t = tx_advance(lp, lp->tx_tail);
+ if (t == lp->tx_head)
+ return NULL;
+
+ *new_tail = t;
+
+ p = lp->tx_base;
+ return p + (lp->tx_tail / LDC_PACKET_SIZE);
+}
+
+/* When we are in reliable or stream mode, have to track the next packet
+ * we haven't gotten an ACK for in the TX queue using tx_acked. We have
+ * to be careful not to stomp over the queue past that point. During
+ * the handshake, we don't have TX data packets pending in the queue
+ * and that's why handshake_get_tx_packet() need not be mindful of
+ * lp->tx_acked.
+ */
+static unsigned long head_for_data(struct ldc_channel *lp)
+{
+ if (lp->cfg.mode == LDC_MODE_STREAM)
+ return lp->tx_acked;
+ return lp->tx_head;
+}
+
+static int tx_has_space_for(struct ldc_channel *lp, unsigned int size)
+{
+ unsigned long limit, tail, new_tail, diff;
+ unsigned int mss;
+
+ limit = head_for_data(lp);
+ tail = lp->tx_tail;
+ new_tail = tx_advance(lp, tail);
+ if (new_tail == limit)
+ return 0;
+
+ if (limit > new_tail)
+ diff = limit - new_tail;
+ else
+ diff = (limit +
+ ((lp->tx_num_entries * LDC_PACKET_SIZE) - new_tail));
+ diff /= LDC_PACKET_SIZE;
+ mss = lp->mss;
+
+ if (diff * mss < size)
+ return 0;
+
+ return 1;
+}
+
+static struct ldc_packet *data_get_tx_packet(struct ldc_channel *lp,
+ unsigned long *new_tail)
+{
+ struct ldc_packet *p;
+ unsigned long h, t;
+
+ h = head_for_data(lp);
+ t = tx_advance(lp, lp->tx_tail);
+ if (t == h)
+ return NULL;
+
+ *new_tail = t;
+
+ p = lp->tx_base;
+ return p + (lp->tx_tail / LDC_PACKET_SIZE);
+}
+
+static int set_tx_tail(struct ldc_channel *lp, unsigned long tail)
+{
+ unsigned long orig_tail = lp->tx_tail;
+ int limit = 1000;
+
+ lp->tx_tail = tail;
+ while (limit-- > 0) {
+ unsigned long err;
+
+ err = sun4v_ldc_tx_set_qtail(lp->id, tail);
+ if (!err)
+ return 0;
+
+ if (err != HV_EWOULDBLOCK) {
+ lp->tx_tail = orig_tail;
+ return -EINVAL;
+ }
+ udelay(1);
+ }
+
+ lp->tx_tail = orig_tail;
+ return -EBUSY;
+}
+
+/* This just updates the head value in the hypervisor using
+ * a polling loop with a timeout. The caller takes care of
+ * upating software state representing the head change, if any.
+ */
+static int __set_rx_head(struct ldc_channel *lp, unsigned long head)
+{
+ int limit = 1000;
+
+ while (limit-- > 0) {
+ unsigned long err;
+
+ err = sun4v_ldc_rx_set_qhead(lp->id, head);
+ if (!err)
+ return 0;
+
+ if (err != HV_EWOULDBLOCK)
+ return -EINVAL;
+
+ udelay(1);
+ }
+
+ return -EBUSY;
+}
+
+static int send_tx_packet(struct ldc_channel *lp,
+ struct ldc_packet *p,
+ unsigned long new_tail)
+{
+ BUG_ON(p != (lp->tx_base + (lp->tx_tail / LDC_PACKET_SIZE)));
+
+ return set_tx_tail(lp, new_tail);
+}
+
+static struct ldc_packet *handshake_compose_ctrl(struct ldc_channel *lp,
+ u8 stype, u8 ctrl,
+ void *data, int dlen,
+ unsigned long *new_tail)
+{
+ struct ldc_packet *p = handshake_get_tx_packet(lp, new_tail);
+
+ if (p) {
+ memset(p, 0, sizeof(*p));
+ p->type = LDC_CTRL;
+ p->stype = stype;
+ p->ctrl = ctrl;
+ if (data)
+ memcpy(p->u.u_data, data, dlen);
+ }
+ return p;
+}
+
+static int start_handshake(struct ldc_channel *lp)
+{
+ struct ldc_packet *p;
+ struct ldc_version *ver;
+ unsigned long new_tail;
+
+ ver = &ver_arr[0];
+
+ ldcdbg(HS, "SEND VER INFO maj[%u] min[%u]\n",
+ ver->major, ver->minor);
+
+ p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
+ ver, sizeof(*ver), &new_tail);
+ if (p) {
+ int err = send_tx_packet(lp, p, new_tail);
+ if (!err)
+ lp->flags &= ~LDC_FLAG_RESET;
+ return err;
+ }
+ return -EBUSY;
+}
+
+static int send_version_nack(struct ldc_channel *lp,
+ u16 major, u16 minor)
+{
+ struct ldc_packet *p;
+ struct ldc_version ver;
+ unsigned long new_tail;
+
+ ver.major = major;
+ ver.minor = minor;
+
+ p = handshake_compose_ctrl(lp, LDC_NACK, LDC_VERS,
+ &ver, sizeof(ver), &new_tail);
+ if (p) {
+ ldcdbg(HS, "SEND VER NACK maj[%u] min[%u]\n",
+ ver.major, ver.minor);
+
+ return send_tx_packet(lp, p, new_tail);
+ }
+ return -EBUSY;
+}
+
+static int send_version_ack(struct ldc_channel *lp,
+ struct ldc_version *vp)
+{
+ struct ldc_packet *p;
+ unsigned long new_tail;
+
+ p = handshake_compose_ctrl(lp, LDC_ACK, LDC_VERS,
+ vp, sizeof(*vp), &new_tail);
+ if (p) {
+ ldcdbg(HS, "SEND VER ACK maj[%u] min[%u]\n",
+ vp->major, vp->minor);
+
+ return send_tx_packet(lp, p, new_tail);
+ }
+ return -EBUSY;
+}
+
+static int send_rts(struct ldc_channel *lp)
+{
+ struct ldc_packet *p;
+ unsigned long new_tail;
+
+ p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTS, NULL, 0,
+ &new_tail);
+ if (p) {
+ p->env = lp->cfg.mode;
+ p->seqid = 0;
+ lp->rcv_nxt = 0;
+
+ ldcdbg(HS, "SEND RTS env[0x%x] seqid[0x%x]\n",
+ p->env, p->seqid);
+
+ return send_tx_packet(lp, p, new_tail);
+ }
+ return -EBUSY;
+}
+
+static int send_rtr(struct ldc_channel *lp)
+{
+ struct ldc_packet *p;
+ unsigned long new_tail;
+
+ p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTR, NULL, 0,
+ &new_tail);
+ if (p) {
+ p->env = lp->cfg.mode;
+ p->seqid = 0;
+
+ ldcdbg(HS, "SEND RTR env[0x%x] seqid[0x%x]\n",
+ p->env, p->seqid);
+
+ return send_tx_packet(lp, p, new_tail);
+ }
+ return -EBUSY;
+}
+
+static int send_rdx(struct ldc_channel *lp)
+{
+ struct ldc_packet *p;
+ unsigned long new_tail;
+
+ p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RDX, NULL, 0,
+ &new_tail);
+ if (p) {
+ p->env = 0;
+ p->seqid = ++lp->snd_nxt;
+ p->u.r.ackid = lp->rcv_nxt;
+
+ ldcdbg(HS, "SEND RDX env[0x%x] seqid[0x%x] ackid[0x%x]\n",
+ p->env, p->seqid, p->u.r.ackid);
+
+ return send_tx_packet(lp, p, new_tail);
+ }
+ return -EBUSY;
+}
+
+static int send_data_nack(struct ldc_channel *lp, struct ldc_packet *data_pkt)
+{
+ struct ldc_packet *p;
+ unsigned long new_tail;
+ int err;
+
+ p = data_get_tx_packet(lp, &new_tail);
+ if (!p)
+ return -EBUSY;
+ memset(p, 0, sizeof(*p));
+ p->type = data_pkt->type;
+ p->stype = LDC_NACK;
+ p->ctrl = data_pkt->ctrl & LDC_CTRL_MSK;
+ p->seqid = lp->snd_nxt + 1;
+ p->u.r.ackid = lp->rcv_nxt;
+
+ ldcdbg(HS, "SEND DATA NACK type[0x%x] ctl[0x%x] seq[0x%x] ack[0x%x]\n",
+ p->type, p->ctrl, p->seqid, p->u.r.ackid);
+
+ err = send_tx_packet(lp, p, new_tail);
+ if (!err)
+ lp->snd_nxt++;
+
+ return err;
+}
+
+static int ldc_abort(struct ldc_channel *lp)
+{
+ unsigned long hv_err;
+
+ ldcdbg(STATE, "ABORT\n");
+
+ /* We report but do not act upon the hypervisor errors because
+ * there really isn't much we can do if they fail at this point.
+ */
+ hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
+ if (hv_err)
+ printk(KERN_ERR PFX "ldc_abort: "
+ "sun4v_ldc_tx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
+ lp->id, lp->tx_ra, lp->tx_num_entries, hv_err);
+
+ hv_err = sun4v_ldc_tx_get_state(lp->id,
+ &lp->tx_head,
+ &lp->tx_tail,
+ &lp->chan_state);
+ if (hv_err)
+ printk(KERN_ERR PFX "ldc_abort: "
+ "sun4v_ldc_tx_get_state(%lx,...) failed, err=%lu\n",
+ lp->id, hv_err);
+
+ hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
+ if (hv_err)
+ printk(KERN_ERR PFX "ldc_abort: "
+ "sun4v_ldc_rx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
+ lp->id, lp->rx_ra, lp->rx_num_entries, hv_err);
+
+ /* Refetch the RX queue state as well, because we could be invoked
+ * here in the queue processing context.
+ */
+ hv_err = sun4v_ldc_rx_get_state(lp->id,
+ &lp->rx_head,
+ &lp->rx_tail,
+ &lp->chan_state);
+ if (hv_err)
+ printk(KERN_ERR PFX "ldc_abort: "
+ "sun4v_ldc_rx_get_state(%lx,...) failed, err=%lu\n",
+ lp->id, hv_err);
+
+ return -ECONNRESET;
+}
+
+static struct ldc_version *find_by_major(u16 major)
+{
+ struct ldc_version *ret = NULL;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ver_arr); i++) {
+ struct ldc_version *v = &ver_arr[i];
+ if (v->major <= major) {
+ ret = v;
+ break;
+ }
+ }
+ return ret;
+}
+
+static int process_ver_info(struct ldc_channel *lp, struct ldc_version *vp)
+{
+ struct ldc_version *vap;
+ int err;
+
+ ldcdbg(HS, "GOT VERSION INFO major[%x] minor[%x]\n",
+ vp->major, vp->minor);
+
+ if (lp->hs_state == LDC_HS_GOTVERS) {
+ lp->hs_state = LDC_HS_OPEN;
+ memset(&lp->ver, 0, sizeof(lp->ver));
+ }
+
+ vap = find_by_major(vp->major);
+ if (!vap) {
+ err = send_version_nack(lp, 0, 0);
+ } else if (vap->major != vp->major) {
+ err = send_version_nack(lp, vap->major, vap->minor);
+ } else {
+ struct ldc_version ver = *vp;
+ if (ver.minor > vap->minor)
+ ver.minor = vap->minor;
+ err = send_version_ack(lp, &ver);
+ if (!err) {
+ lp->ver = ver;
+ lp->hs_state = LDC_HS_GOTVERS;
+ }
+ }
+ if (err)
+ return ldc_abort(lp);
+
+ return 0;
+}
+
+static int process_ver_ack(struct ldc_channel *lp, struct ldc_version *vp)
+{
+ ldcdbg(HS, "GOT VERSION ACK major[%x] minor[%x]\n",
+ vp->major, vp->minor);
+
+ if (lp->hs_state == LDC_HS_GOTVERS) {
+ if (lp->ver.major != vp->major ||
+ lp->ver.minor != vp->minor)
+ return ldc_abort(lp);
+ } else {
+ lp->ver = *vp;
+ lp->hs_state = LDC_HS_GOTVERS;
+ }
+ if (send_rts(lp))
+ return ldc_abort(lp);
+ return 0;
+}
+
+static int process_ver_nack(struct ldc_channel *lp, struct ldc_version *vp)
+{
+ struct ldc_version *vap;
+
+ if ((vp->major == 0 && vp->minor == 0) ||
+ !(vap = find_by_major(vp->major))) {
+ return ldc_abort(lp);
+ } else {
+ struct ldc_packet *p;
+ unsigned long new_tail;
+
+ p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
+ vap, sizeof(*vap),
+ &new_tail);
+ if (p)
+ return send_tx_packet(lp, p, new_tail);
+ else
+ return ldc_abort(lp);
+ }
+}
+
+static int process_version(struct ldc_channel *lp,
+ struct ldc_packet *p)
+{
+ struct ldc_version *vp;
+
+ vp = (struct ldc_version *) p->u.u_data;
+
+ switch (p->stype) {
+ case LDC_INFO:
+ return process_ver_info(lp, vp);
+
+ case LDC_ACK:
+ return process_ver_ack(lp, vp);
+
+ case LDC_NACK:
+ return process_ver_nack(lp, vp);
+
+ default:
+ return ldc_abort(lp);
+ }
+}
+
+static int process_rts(struct ldc_channel *lp,
+ struct ldc_packet *p)
+{
+ ldcdbg(HS, "GOT RTS stype[%x] seqid[%x] env[%x]\n",
+ p->stype, p->seqid, p->env);
+
+ if (p->stype != LDC_INFO ||
+ lp->hs_state != LDC_HS_GOTVERS ||
+ p->env != lp->cfg.mode)
+ return ldc_abort(lp);
+
+ lp->snd_nxt = p->seqid;
+ lp->rcv_nxt = p->seqid;
+ lp->hs_state = LDC_HS_SENTRTR;
+ if (send_rtr(lp))
+ return ldc_abort(lp);
+
+ return 0;
+}
+
+static int process_rtr(struct ldc_channel *lp,
+ struct ldc_packet *p)
+{
+ ldcdbg(HS, "GOT RTR stype[%x] seqid[%x] env[%x]\n",
+ p->stype, p->seqid, p->env);
+
+ if (p->stype != LDC_INFO ||
+ p->env != lp->cfg.mode)
+ return ldc_abort(lp);
+
+ lp->snd_nxt = p->seqid;
+ lp->hs_state = LDC_HS_COMPLETE;
+ ldc_set_state(lp, LDC_STATE_CONNECTED);
+ send_rdx(lp);
+
+ return LDC_EVENT_UP;
+}
+
+static int rx_seq_ok(struct ldc_channel *lp, u32 seqid)
+{
+ return lp->rcv_nxt + 1 == seqid;
+}
+
+static int process_rdx(struct ldc_channel *lp,
+ struct ldc_packet *p)
+{
+ ldcdbg(HS, "GOT RDX stype[%x] seqid[%x] env[%x] ackid[%x]\n",
+ p->stype, p->seqid, p->env, p->u.r.ackid);
+
+ if (p->stype != LDC_INFO ||
+ !(rx_seq_ok(lp, p->seqid)))
+ return ldc_abort(lp);
+
+ lp->rcv_nxt = p->seqid;
+
+ lp->hs_state = LDC_HS_COMPLETE;
+ ldc_set_state(lp, LDC_STATE_CONNECTED);
+
+ return LDC_EVENT_UP;
+}
+
+static int process_control_frame(struct ldc_channel *lp,
+ struct ldc_packet *p)
+{
+ switch (p->ctrl) {
+ case LDC_VERS:
+ return process_version(lp, p);
+
+ case LDC_RTS:
+ return process_rts(lp, p);
+
+ case LDC_RTR:
+ return process_rtr(lp, p);
+
+ case LDC_RDX:
+ return process_rdx(lp, p);
+
+ default:
+ return ldc_abort(lp);
+ }
+}
+
+static int process_error_frame(struct ldc_channel *lp,
+ struct ldc_packet *p)
+{
+ return ldc_abort(lp);
+}
+
+static int process_data_ack(struct ldc_channel *lp,
+ struct ldc_packet *ack)
+{
+ unsigned long head = lp->tx_acked;
+ u32 ackid = ack->u.r.ackid;
+
+ while (1) {
+ struct ldc_packet *p = lp->tx_base + (head / LDC_PACKET_SIZE);
+
+ head = tx_advance(lp, head);
+
+ if (p->seqid == ackid) {
+ lp->tx_acked = head;
+ return 0;
+ }
+ if (head == lp->tx_tail)
+ return ldc_abort(lp);
+ }
+
+ return 0;
+}
+
+static void send_events(struct ldc_channel *lp, unsigned int event_mask)
+{
+ if (event_mask & LDC_EVENT_RESET)
+ lp->cfg.event(lp->event_arg, LDC_EVENT_RESET);
+ if (event_mask & LDC_EVENT_UP)
+ lp->cfg.event(lp->event_arg, LDC_EVENT_UP);
+ if (event_mask & LDC_EVENT_DATA_READY)
+ lp->cfg.event(lp->event_arg, LDC_EVENT_DATA_READY);
+}
+
+static irqreturn_t ldc_rx(int irq, void *dev_id)
+{
+ struct ldc_channel *lp = dev_id;
+ unsigned long orig_state, hv_err, flags;
+ unsigned int event_mask;
+
+ spin_lock_irqsave(&lp->lock, flags);
+
+ orig_state = lp->chan_state;
+ hv_err = sun4v_ldc_rx_get_state(lp->id,
+ &lp->rx_head,
+ &lp->rx_tail,
+ &lp->chan_state);
+
+ ldcdbg(RX, "RX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
+ orig_state, lp->chan_state, lp->rx_head, lp->rx_tail);
+
+ event_mask = 0;
+
+ if (lp->cfg.mode == LDC_MODE_RAW &&
+ lp->chan_state == LDC_CHANNEL_UP) {
+ lp->hs_state = LDC_HS_COMPLETE;
+ ldc_set_state(lp, LDC_STATE_CONNECTED);
+
+ event_mask |= LDC_EVENT_UP;
+
+ orig_state = lp->chan_state;
+ }
+
+ /* If we are in reset state, flush the RX queue and ignore
+ * everything.
+ */
+ if (lp->flags & LDC_FLAG_RESET) {
+ (void) __set_rx_head(lp, lp->rx_tail);
+ goto out;
+ }
+
+ /* Once we finish the handshake, we let the ldc_read()
+ * paths do all of the control frame and state management.
+ * Just trigger the callback.
+ */
+ if (lp->hs_state == LDC_HS_COMPLETE) {
+handshake_complete:
+ if (lp->chan_state != orig_state) {
+ unsigned int event = LDC_EVENT_RESET;
+
+ if (lp->chan_state == LDC_CHANNEL_UP)
+ event = LDC_EVENT_UP;
+
+ event_mask |= event;
+ }
+ if (lp->rx_head != lp->rx_tail)
+ event_mask |= LDC_EVENT_DATA_READY;
+
+ goto out;
+ }
+
+ if (lp->chan_state != orig_state)
+ goto out;
+
+ while (lp->rx_head != lp->rx_tail) {
+ struct ldc_packet *p;
+ unsigned long new;
+ int err;
+
+ p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
+
+ switch (p->type) {
+ case LDC_CTRL:
+ err = process_control_frame(lp, p);
+ if (err > 0)
+ event_mask |= err;
+ break;
+
+ case LDC_DATA:
+ event_mask |= LDC_EVENT_DATA_READY;
+ err = 0;
+ break;
+
+ case LDC_ERR:
+ err = process_error_frame(lp, p);
+ break;
+
+ default:
+ err = ldc_abort(lp);
+ break;
+ }
+
+ if (err < 0)
+ break;
+
+ new = lp->rx_head;
+ new += LDC_PACKET_SIZE;
+ if (new == (lp->rx_num_entries * LDC_PACKET_SIZE))
+ new = 0;
+ lp->rx_head = new;
+
+ err = __set_rx_head(lp, new);
+ if (err < 0) {
+ (void) ldc_abort(lp);
+ break;
+ }
+ if (lp->hs_state == LDC_HS_COMPLETE)
+ goto handshake_complete;
+ }
+
+out:
+ spin_unlock_irqrestore(&lp->lock, flags);
+
+ send_events(lp, event_mask);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t ldc_tx(int irq, void *dev_id)
+{
+ struct ldc_channel *lp = dev_id;
+ unsigned long flags, hv_err, orig_state;
+ unsigned int event_mask = 0;
+
+ spin_lock_irqsave(&lp->lock, flags);
+
+ orig_state = lp->chan_state;
+ hv_err = sun4v_ldc_tx_get_state(lp->id,
+ &lp->tx_head,
+ &lp->tx_tail,
+ &lp->chan_state);
+
+ ldcdbg(TX, " TX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
+ orig_state, lp->chan_state, lp->tx_head, lp->tx_tail);
+
+ if (lp->cfg.mode == LDC_MODE_RAW &&
+ lp->chan_state == LDC_CHANNEL_UP) {
+ lp->hs_state = LDC_HS_COMPLETE;
+ ldc_set_state(lp, LDC_STATE_CONNECTED);
+
+ event_mask |= LDC_EVENT_UP;
+ }
+
+ spin_unlock_irqrestore(&lp->lock, flags);
+
+ send_events(lp, event_mask);
+
+ return IRQ_HANDLED;
+}
+
+/* XXX ldc_alloc() and ldc_free() needs to run under a mutex so
+ * XXX that addition and removal from the ldc_channel_list has
+ * XXX atomicity, otherwise the __ldc_channel_exists() check is
+ * XXX totally pointless as another thread can slip into ldc_alloc()
+ * XXX and add a channel with the same ID. There also needs to be
+ * XXX a spinlock for ldc_channel_list.
+ */
+static HLIST_HEAD(ldc_channel_list);
+
+static int __ldc_channel_exists(unsigned long id)
+{
+ struct ldc_channel *lp;
+ struct hlist_node *n;
+
+ hlist_for_each_entry(lp, n, &ldc_channel_list, list) {
+ if (lp->id == id)
+ return 1;
+ }
+ return 0;
+}
+
+static int alloc_queue(const char *name, unsigned long num_entries,
+ struct ldc_packet **base, unsigned long *ra)
+{
+ unsigned long size, order;
+ void *q;
+
+ size = num_entries * LDC_PACKET_SIZE;
+ order = get_order(size);
+
+ q = (void *) __get_free_pages(GFP_KERNEL, order);
+ if (!q) {
+ printk(KERN_ERR PFX "Alloc of %s queue failed with "
+ "size=%lu order=%lu\n", name, size, order);
+ return -ENOMEM;
+ }
+
+ memset(q, 0, PAGE_SIZE << order);
+
+ *base = q;
+ *ra = __pa(q);
+
+ return 0;
+}
+
+static void free_queue(unsigned long num_entries, struct ldc_packet *q)
+{
+ unsigned long size, order;
+
+ if (!q)
+ return;
+
+ size = num_entries * LDC_PACKET_SIZE;
+ order = get_order(size);
+
+ free_pages((unsigned long)q, order);
+}
+
+/* XXX Make this configurable... XXX */
+#define LDC_IOTABLE_SIZE (8 * 1024)
+
+static int ldc_iommu_init(struct ldc_channel *lp)
+{
+ unsigned long sz, num_tsb_entries, tsbsize, order;
+ struct ldc_iommu *iommu = &lp->iommu;
+ struct ldc_mtable_entry *table;
+ unsigned long hv_err;
+ int err;
+
+ num_tsb_entries = LDC_IOTABLE_SIZE;
+ tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
+
+ spin_lock_init(&iommu->lock);
+
+ sz = num_tsb_entries / 8;
+ sz = (sz + 7UL) & ~7UL;
+ iommu->arena.map = kzalloc(sz, GFP_KERNEL);
+ if (!iommu->arena.map) {
+ printk(KERN_ERR PFX "Alloc of arena map failed, sz=%lu\n", sz);
+ return -ENOMEM;
+ }
+
+ iommu->arena.limit = num_tsb_entries;
+
+ order = get_order(tsbsize);
+
+ table = (struct ldc_mtable_entry *)
+ __get_free_pages(GFP_KERNEL, order);
+ err = -ENOMEM;
+ if (!table) {
+ printk(KERN_ERR PFX "Alloc of MTE table failed, "
+ "size=%lu order=%lu\n", tsbsize, order);
+ goto out_free_map;
+ }
+
+ memset(table, 0, PAGE_SIZE << order);
+
+ iommu->page_table = table;
+
+ hv_err = sun4v_ldc_set_map_table(lp->id, __pa(table),
+ num_tsb_entries);
+ err = -EINVAL;
+ if (hv_err)
+ goto out_free_table;
+
+ return 0;
+
+out_free_table:
+ free_pages((unsigned long) table, order);
+ iommu->page_table = NULL;
+
+out_free_map:
+ kfree(iommu->arena.map);
+ iommu->arena.map = NULL;
+
+ return err;
+}
+
+static void ldc_iommu_release(struct ldc_channel *lp)
+{
+ struct ldc_iommu *iommu = &lp->iommu;
+ unsigned long num_tsb_entries, tsbsize, order;
+
+ (void) sun4v_ldc_set_map_table(lp->id, 0, 0);
+
+ num_tsb_entries = iommu->arena.limit;
+ tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
+ order = get_order(tsbsize);
+
+ free_pages((unsigned long) iommu->page_table, order);
+ iommu->page_table = NULL;
+
+ kfree(iommu->arena.map);
+ iommu->arena.map = NULL;
+}
+
+struct ldc_channel *ldc_alloc(unsigned long id,
+ const struct ldc_channel_config *cfgp,
+ void *event_arg)
+{
+ struct ldc_channel *lp;
+ const struct ldc_mode_ops *mops;
+ unsigned long dummy1, dummy2, hv_err;
+ u8 mss, *mssbuf;
+ int err;
+
+ err = -ENODEV;
+ if (!ldom_domaining_enabled)
+ goto out_err;
+
+ err = -EINVAL;
+ if (!cfgp)
+ goto out_err;
+
+ switch (cfgp->mode) {
+ case LDC_MODE_RAW:
+ mops = &raw_ops;
+ mss = LDC_PACKET_SIZE;
+ break;
+
+ case LDC_MODE_UNRELIABLE:
+ mops = &nonraw_ops;
+ mss = LDC_PACKET_SIZE - 8;
+ break;
+
+ case LDC_MODE_STREAM:
+ mops = &stream_ops;
+ mss = LDC_PACKET_SIZE - 8 - 8;
+ break;
+
+ default:
+ goto out_err;
+ }
+
+ if (!cfgp->event || !event_arg || !cfgp->rx_irq || !cfgp->tx_irq)
+ goto out_err;
+
+ hv_err = sun4v_ldc_tx_qinfo(id, &dummy1, &dummy2);
+ err = -ENODEV;
+ if (hv_err == HV_ECHANNEL)
+ goto out_err;
+
+ err = -EEXIST;
+ if (__ldc_channel_exists(id))
+ goto out_err;
+
+ mssbuf = NULL;
+
+ lp = kzalloc(sizeof(*lp), GFP_KERNEL);
+ err = -ENOMEM;
+ if (!lp)
+ goto out_err;
+
+ spin_lock_init(&lp->lock);
+
+ lp->id = id;
+
+ err = ldc_iommu_init(lp);
+ if (err)
+ goto out_free_ldc;
+
+ lp->mops = mops;
+ lp->mss = mss;
+
+ lp->cfg = *cfgp;
+ if (!lp->cfg.mtu)
+ lp->cfg.mtu = LDC_DEFAULT_MTU;
+
+ if (lp->cfg.mode == LDC_MODE_STREAM) {
+ mssbuf = kzalloc(lp->cfg.mtu, GFP_KERNEL);
+ if (!mssbuf) {
+ err = -ENOMEM;
+ goto out_free_iommu;
+ }
+ lp->mssbuf = mssbuf;
+ }
+
+ lp->event_arg = event_arg;
+
+ /* XXX allow setting via ldc_channel_config to override defaults
+ * XXX or use some formula based upon mtu
+ */
+ lp->tx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
+ lp->rx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
+
+ err = alloc_queue("TX", lp->tx_num_entries,
+ &lp->tx_base, &lp->tx_ra);
+ if (err)
+ goto out_free_mssbuf;
+
+ err = alloc_queue("RX", lp->rx_num_entries,
+ &lp->rx_base, &lp->rx_ra);
+ if (err)
+ goto out_free_txq;
+
+ lp->flags |= LDC_FLAG_ALLOCED_QUEUES;
+
+ lp->hs_state = LDC_HS_CLOSED;
+ ldc_set_state(lp, LDC_STATE_INIT);
+
+ INIT_HLIST_NODE(&lp->list);
+ hlist_add_head(&lp->list, &ldc_channel_list);
+
+ INIT_HLIST_HEAD(&lp->mh_list);
+
+ return lp;
+
+out_free_txq:
+ free_queue(lp->tx_num_entries, lp->tx_base);
+
+out_free_mssbuf:
+ if (mssbuf)
+ kfree(mssbuf);
+
+out_free_iommu:
+ ldc_iommu_release(lp);
+
+out_free_ldc:
+ kfree(lp);
+
+out_err:
+ return ERR_PTR(err);
+}
+EXPORT_SYMBOL(ldc_alloc);
+
+void ldc_free(struct ldc_channel *lp)
+{
+ if (lp->flags & LDC_FLAG_REGISTERED_IRQS) {
+ free_irq(lp->cfg.rx_irq, lp);
+ free_irq(lp->cfg.tx_irq, lp);
+ }
+
+ if (lp->flags & LDC_FLAG_REGISTERED_QUEUES) {
+ sun4v_ldc_tx_qconf(lp->id, 0, 0);
+ sun4v_ldc_rx_qconf(lp->id, 0, 0);
+ lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
+ }
+ if (lp->flags & LDC_FLAG_ALLOCED_QUEUES) {
+ free_queue(lp->tx_num_entries, lp->tx_base);
+ free_queue(lp->rx_num_entries, lp->rx_base);
+ lp->flags &= ~LDC_FLAG_ALLOCED_QUEUES;
+ }
+
+ hlist_del(&lp->list);
+
+ if (lp->mssbuf)
+ kfree(lp->mssbuf);
+
+ ldc_iommu_release(lp);
+
+ kfree(lp);
+}
+EXPORT_SYMBOL(ldc_free);
+
+/* Bind the channel. This registers the LDC queues with
+ * the hypervisor and puts the channel into a pseudo-listening
+ * state. This does not initiate a handshake, ldc_connect() does
+ * that.
+ */
+int ldc_bind(struct ldc_channel *lp, const char *name)
+{
+ unsigned long hv_err, flags;
+ int err = -EINVAL;
+
+ if (!name ||
+ (lp->state != LDC_STATE_INIT))
+ return -EINVAL;
+
+ snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name);
+ snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name);
+
+ err = request_irq(lp->cfg.rx_irq, ldc_rx,
+ IRQF_SAMPLE_RANDOM | IRQF_SHARED,
+ lp->rx_irq_name, lp);
+ if (err)
+ return err;
+
+ err = request_irq(lp->cfg.tx_irq, ldc_tx,
+ IRQF_SAMPLE_RANDOM | IRQF_SHARED,
+ lp->tx_irq_name, lp);
+ if (err) {
+ free_irq(lp->cfg.rx_irq, lp);
+ return err;
+ }
+
+
+ spin_lock_irqsave(&lp->lock, flags);
+
+ enable_irq(lp->cfg.rx_irq);
+ enable_irq(lp->cfg.tx_irq);
+
+ lp->flags |= LDC_FLAG_REGISTERED_IRQS;
+
+ err = -ENODEV;
+ hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
+ if (hv_err)
+ goto out_free_irqs;
+
+ hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
+ if (hv_err)
+ goto out_free_irqs;
+
+ hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
+ if (hv_err)
+ goto out_unmap_tx;
+
+ hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
+ if (hv_err)
+ goto out_unmap_tx;
+
+ lp->flags |= LDC_FLAG_REGISTERED_QUEUES;
+
+ hv_err = sun4v_ldc_tx_get_state(lp->id,
+ &lp->tx_head,
+ &lp->tx_tail,
+ &lp->chan_state);
+ err = -EBUSY;
+ if (hv_err)
+ goto out_unmap_rx;
+
+ lp->tx_acked = lp->tx_head;
+
+ lp->hs_state = LDC_HS_OPEN;
+ ldc_set_state(lp, LDC_STATE_BOUND);
+
+ spin_unlock_irqrestore(&lp->lock, flags);
+
+ return 0;
+
+out_unmap_rx:
+ lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
+ sun4v_ldc_rx_qconf(lp->id, 0, 0);
+
+out_unmap_tx:
+ sun4v_ldc_tx_qconf(lp->id, 0, 0);
+
+out_free_irqs:
+ lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
+ free_irq(lp->cfg.tx_irq, lp);
+ free_irq(lp->cfg.rx_irq, lp);
+
+ spin_unlock_irqrestore(&lp->lock, flags);
+
+ return err;
+}
+EXPORT_SYMBOL(ldc_bind);
+
+int ldc_connect(struct ldc_channel *lp)
+{
+ unsigned long flags;
+ int err;
+
+ if (lp->cfg.mode == LDC_MODE_RAW)
+ return -EINVAL;
+
+ spin_lock_irqsave(&lp->lock, flags);
+
+ if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
+ !(lp->flags & LDC_FLAG_REGISTERED_QUEUES) ||
+ lp->hs_state != LDC_HS_OPEN)
+ err = -EINVAL;
+ else
+ err = start_handshake(lp);
+
+ spin_unlock_irqrestore(&lp->lock, flags);
+
+ return err;
+}
+EXPORT_SYMBOL(ldc_connect);
+
+int ldc_disconnect(struct ldc_channel *lp)
+{
+ unsigned long hv_err, flags;
+ int err;
+
+ if (lp->cfg.mode == LDC_MODE_RAW)
+ return -EINVAL;
+
+ if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
+ !(lp->flags & LDC_FLAG_REGISTERED_QUEUES))
+ return -EINVAL;
+
+ spin_lock_irqsave(&lp->lock, flags);
+
+ err = -ENODEV;
+ hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
+ if (hv_err)
+ goto out_err;
+
+ hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
+ if (hv_err)
+ goto out_err;
+
+ hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
+ if (hv_err)
+ goto out_err;
+
+ hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
+ if (hv_err)
+ goto out_err;
+
+ ldc_set_state(lp, LDC_STATE_BOUND);
+ lp->hs_state = LDC_HS_OPEN;
+ lp->flags |= LDC_FLAG_RESET;
+
+ spin_unlock_irqrestore(&lp->lock, flags);
+
+ return 0;
+
+out_err:
+ sun4v_ldc_tx_qconf(lp->id, 0, 0);
+ sun4v_ldc_rx_qconf(lp->id, 0, 0);
+ free_irq(lp->cfg.tx_irq, lp);
+ free_irq(lp->cfg.rx_irq, lp);
+ lp->flags &= ~(LDC_FLAG_REGISTERED_IRQS |
+ LDC_FLAG_REGISTERED_QUEUES);
+ ldc_set_state(lp, LDC_STATE_INIT);
+
+ spin_unlock_irqrestore(&lp->lock, flags);
+
+ return err;
+}
+EXPORT_SYMBOL(ldc_disconnect);
+
+int ldc_state(struct ldc_channel *lp)
+{
+ return lp->state;
+}
+EXPORT_SYMBOL(ldc_state);
+
+static int write_raw(struct ldc_channel *lp, const void *buf, unsigned int size)
+{
+ struct ldc_packet *p;
+ unsigned long new_tail;
+ int err;
+
+ if (size > LDC_PACKET_SIZE)
+ return -EMSGSIZE;
+
+ p = data_get_tx_packet(lp, &new_tail);
+ if (!p)
+ return -EAGAIN;
+
+ memcpy(p, buf, size);
+
+ err = send_tx_packet(lp, p, new_tail);
+ if (!err)
+ err = size;
+
+ return err;
+}
+
+static int read_raw(struct ldc_channel *lp, void *buf, unsigned int size)
+{
+ struct ldc_packet *p;
+ unsigned long hv_err, new;
+ int err;
+
+ if (size < LDC_PACKET_SIZE)
+ return -EINVAL;
+
+ hv_err = sun4v_ldc_rx_get_state(lp->id,
+ &lp->rx_head,
+ &lp->rx_tail,
+ &lp->chan_state);
+ if (hv_err)
+ return ldc_abort(lp);
+
+ if (lp->chan_state == LDC_CHANNEL_DOWN ||
+ lp->chan_state == LDC_CHANNEL_RESETTING)
+ return -ECONNRESET;
+
+ if (lp->rx_head == lp->rx_tail)
+ return 0;
+
+ p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
+ memcpy(buf, p, LDC_PACKET_SIZE);
+
+ new = rx_advance(lp, lp->rx_head);
+ lp->rx_head = new;
+
+ err = __set_rx_head(lp, new);
+ if (err < 0)
+ err = -ECONNRESET;
+ else
+ err = LDC_PACKET_SIZE;
+
+ return err;
+}
+
+static const struct ldc_mode_ops raw_ops = {
+ .write = write_raw,
+ .read = read_raw,
+};
+
+static int write_nonraw(struct ldc_channel *lp, const void *buf,
+ unsigned int size)
+{
+ unsigned long hv_err, tail;
+ unsigned int copied;
+ u32 seq;
+ int err;
+
+ hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail,
+ &lp->chan_state);
+ if (unlikely(hv_err))
+ return -EBUSY;
+
+ if (unlikely(lp->chan_state != LDC_CHANNEL_UP))
+ return ldc_abort(lp);
+
+ if (!tx_has_space_for(lp, size))
+ return -EAGAIN;
+
+ seq = lp->snd_nxt;
+ copied = 0;
+ tail = lp->tx_tail;
+ while (copied < size) {
+ struct ldc_packet *p = lp->tx_base + (tail / LDC_PACKET_SIZE);
+ u8 *data = ((lp->cfg.mode == LDC_MODE_UNRELIABLE) ?
+ p->u.u_data :
+ p->u.r.r_data);
+ int data_len;
+
+ p->type = LDC_DATA;
+ p->stype = LDC_INFO;
+ p->ctrl = 0;
+
+ data_len = size - copied;
+ if (data_len > lp->mss)
+ data_len = lp->mss;
+
+ BUG_ON(data_len > LDC_LEN);
+
+ p->env = (data_len |
+ (copied == 0 ? LDC_START : 0) |
+ (data_len == size - copied ? LDC_STOP : 0));
+
+ p->seqid = ++seq;
+
+ ldcdbg(DATA, "SENT DATA [%02x:%02x:%02x:%02x:%08x]\n",
+ p->type,
+ p->stype,
+ p->ctrl,
+ p->env,
+ p->seqid);
+
+ memcpy(data, buf, data_len);
+ buf += data_len;
+ copied += data_len;
+
+ tail = tx_advance(lp, tail);
+ }
+
+ err = set_tx_tail(lp, tail);
+ if (!err) {
+ lp->snd_nxt = seq;
+ err = size;
+ }
+
+ return err;
+}
+
+static int rx_bad_seq(struct ldc_channel *lp, struct ldc_packet *p,
+ struct ldc_packet *first_frag)
+{
+ int err;
+
+ if (first_frag)
+ lp->rcv_nxt = first_frag->seqid - 1;
+
+ err = send_data_nack(lp, p);
+ if (err)
+ return err;
+
+ err = __set_rx_head(lp, lp->rx_tail);
+ if (err < 0)
+ return ldc_abort(lp);
+
+ return 0;
+}
+
+static int data_ack_nack(struct ldc_channel *lp, struct ldc_packet *p)
+{
+ if (p->stype & LDC_ACK) {
+ int err = process_data_ack(lp, p);
+ if (err)
+ return err;
+ }
+ if (p->stype & LDC_NACK)
+ return ldc_abort(lp);
+
+ return 0;
+}
+
+static int rx_data_wait(struct ldc_channel *lp, unsigned long cur_head)
+{
+ unsigned long dummy;
+ int limit = 1000;
+
+ ldcdbg(DATA, "DATA WAIT cur_head[%lx] rx_head[%lx] rx_tail[%lx]\n",
+ cur_head, lp->rx_head, lp->rx_tail);
+ while (limit-- > 0) {
+ unsigned long hv_err;
+
+ hv_err = sun4v_ldc_rx_get_state(lp->id,
+ &dummy,
+ &lp->rx_tail,
+ &lp->chan_state);
+ if (hv_err)
+ return ldc_abort(lp);
+
+ if (lp->chan_state == LDC_CHANNEL_DOWN ||
+ lp->chan_state == LDC_CHANNEL_RESETTING)
+ return -ECONNRESET;
+
+ if (cur_head != lp->rx_tail) {
+ ldcdbg(DATA, "DATA WAIT DONE "
+ "head[%lx] tail[%lx] chan_state[%lx]\n",
+ dummy, lp->rx_tail, lp->chan_state);
+ return 0;
+ }
+
+ udelay(1);
+ }
+ return -EAGAIN;
+}
+
+static int rx_set_head(struct ldc_channel *lp, unsigned long head)
+{
+ int err = __set_rx_head(lp, head);
+
+ if (err < 0)
+ return ldc_abort(lp);
+
+ lp->rx_head = head;
+ return 0;
+}
+
+static void send_data_ack(struct ldc_channel *lp)
+{
+ unsigned long new_tail;
+ struct ldc_packet *p;
+
+ p = data_get_tx_packet(lp, &new_tail);
+ if (likely(p)) {
+ int err;
+
+ memset(p, 0, sizeof(*p));
+ p->type = LDC_DATA;
+ p->stype = LDC_ACK;
+ p->ctrl = 0;
+ p->seqid = lp->snd_nxt + 1;
+ p->u.r.ackid = lp->rcv_nxt;
+
+ err = send_tx_packet(lp, p, new_tail);
+ if (!err)
+ lp->snd_nxt++;
+ }
+}
+
+static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size)
+{
+ struct ldc_packet *first_frag;
+ unsigned long hv_err, new;
+ int err, copied;
+
+ hv_err = sun4v_ldc_rx_get_state(lp->id,
+ &lp->rx_head,
+ &lp->rx_tail,
+ &lp->chan_state);
+ if (hv_err)
+ return ldc_abort(lp);
+
+ if (lp->chan_state == LDC_CHANNEL_DOWN ||
+ lp->chan_state == LDC_CHANNEL_RESETTING)
+ return -ECONNRESET;
+
+ if (lp->rx_head == lp->rx_tail)
+ return 0;
+
+ first_frag = NULL;
+ copied = err = 0;
+ new = lp->rx_head;
+ while (1) {
+ struct ldc_packet *p;
+ int pkt_len;
+
+ BUG_ON(new == lp->rx_tail);
+ p = lp->rx_base + (new / LDC_PACKET_SIZE);
+
+ ldcdbg(RX, "RX read pkt[%02x:%02x:%02x:%02x:%08x:%08x] "
+ "rcv_nxt[%08x]\n",
+ p->type,
+ p->stype,
+ p->ctrl,
+ p->env,
+ p->seqid,
+ p->u.r.ackid,
+ lp->rcv_nxt);
+
+ if (unlikely(!rx_seq_ok(lp, p->seqid))) {
+ err = rx_bad_seq(lp, p, first_frag);
+ copied = 0;
+ break;
+ }
+
+ if (p->type & LDC_CTRL) {
+ err = process_control_frame(lp, p);
+ if (err < 0)
+ break;
+ err = 0;
+ }
+
+ lp->rcv_nxt = p->seqid;
+
+ if (!(p->type & LDC_DATA)) {
+ new = rx_advance(lp, new);
+ goto no_data;
+ }
+ if (p->stype & (LDC_ACK | LDC_NACK)) {
+ err = data_ack_nack(lp, p);
+ if (err)
+ break;
+ }
+ if (!(p->stype & LDC_INFO)) {
+ new = rx_advance(lp, new);
+ err = rx_set_head(lp, new);
+ if (err)
+ break;
+ goto no_data;
+ }
+
+ pkt_len = p->env & LDC_LEN;
+
+ /* Every initial packet starts with the START bit set.
+ *
+ * Singleton packets will have both START+STOP set.
+ *
+ * Fragments will have START set in the first frame, STOP
+ * set in the last frame, and neither bit set in middle
+ * frames of the packet.
+ *
+ * Therefore if we are at the beginning of a packet and
+ * we don't see START, or we are in the middle of a fragmented
+ * packet and do see START, we are unsynchronized and should
+ * flush the RX queue.
+ */
+ if ((first_frag == NULL && !(p->env & LDC_START)) ||
+ (first_frag != NULL && (p->env & LDC_START))) {
+ if (!first_frag)
+ new = rx_advance(lp, new);
+
+ err = rx_set_head(lp, new);
+ if (err)
+ break;
+
+ if (!first_frag)
+ goto no_data;
+ }
+ if (!first_frag)
+ first_frag = p;
+
+ if (pkt_len > size - copied) {
+ /* User didn't give us a big enough buffer,
+ * what to do? This is a pretty serious error.
+ *
+ * Since we haven't updated the RX ring head to
+ * consume any of the packets, signal the error
+ * to the user and just leave the RX ring alone.
+ *
+ * This seems the best behavior because this allows
+ * a user of the LDC layer to start with a small
+ * RX buffer for ldc_read() calls and use -EMSGSIZE
+ * as a cue to enlarge it's read buffer.
+ */
+ err = -EMSGSIZE;
+ break;
+ }
+
+ /* Ok, we are gonna eat this one. */
+ new = rx_advance(lp, new);
+
+ memcpy(buf,
+ (lp->cfg.mode == LDC_MODE_UNRELIABLE ?
+ p->u.u_data : p->u.r.r_data), pkt_len);
+ buf += pkt_len;
+ copied += pkt_len;
+
+ if (p->env & LDC_STOP)
+ break;
+
+no_data:
+ if (new == lp->rx_tail) {
+ err = rx_data_wait(lp, new);
+ if (err)
+ break;
+ }
+ }
+
+ if (!err)
+ err = rx_set_head(lp, new);
+
+ if (err && first_frag)
+ lp->rcv_nxt = first_frag->seqid - 1;
+
+ if (!err) {
+ err = copied;
+ if (err > 0 && lp->cfg.mode != LDC_MODE_UNRELIABLE)
+ send_data_ack(lp);
+ }
+
+ return err;
+}
+
+static const struct ldc_mode_ops nonraw_ops = {
+ .write = write_nonraw,
+ .read = read_nonraw,
+};
+
+static int write_stream(struct ldc_channel *lp, const void *buf,
+ unsigned int size)
+{
+ if (size > lp->cfg.mtu)
+ size = lp->cfg.mtu;
+ return write_nonraw(lp, buf, size);
+}
+
+static int read_stream(struct ldc_channel *lp, void *buf, unsigned int size)
+{
+ if (!lp->mssbuf_len) {
+ int err = read_nonraw(lp, lp->mssbuf, lp->cfg.mtu);
+ if (err < 0)
+ return err;
+
+ lp->mssbuf_len = err;
+ lp->mssbuf_off = 0;
+ }
+
+ if (size > lp->mssbuf_len)
+ size = lp->mssbuf_len;
+ memcpy(buf, lp->mssbuf + lp->mssbuf_off, size);
+
+ lp->mssbuf_off += size;
+ lp->mssbuf_len -= size;
+
+ return size;
+}
+
+static const struct ldc_mode_ops stream_ops = {
+ .write = write_stream,
+ .read = read_stream,
+};
+
+int ldc_write(struct ldc_channel *lp, const void *buf, unsigned int size)
+{
+ unsigned long flags;
+ int err;
+
+ if (!buf)
+ return -EINVAL;
+
+ if (!size)
+ return 0;
+
+ spin_lock_irqsave(&lp->lock, flags);
+
+ if (lp->hs_state != LDC_HS_COMPLETE)
+ err = -ENOTCONN;
+ else
+ err = lp->mops->write(lp, buf, size);
+
+ spin_unlock_irqrestore(&lp->lock, flags);
+
+ return err;
+}
+EXPORT_SYMBOL(ldc_write);
+
+int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size)
+{
+ unsigned long flags;
+ int err;
+
+ if (!buf)
+ return -EINVAL;
+
+ if (!size)
+ return 0;
+
+ spin_lock_irqsave(&lp->lock, flags);
+
+ if (lp->hs_state != LDC_HS_COMPLETE)
+ err = -ENOTCONN;
+ else
+ err = lp->mops->read(lp, buf, size);
+
+ spin_unlock_irqrestore(&lp->lock, flags);
+
+ return err;
+}
+EXPORT_SYMBOL(ldc_read);
+
+static long arena_alloc(struct ldc_iommu *iommu, unsigned long npages)
+{
+ struct iommu_arena *arena = &iommu->arena;
+ unsigned long n, i, start, end, limit;
+ int pass;
+
+ limit = arena->limit;
+ start = arena->hint;
+ pass = 0;
+
+again:
+ n = find_next_zero_bit(arena->map, limit, start);
+ end = n + npages;
+ if (unlikely(end >= limit)) {
+ if (likely(pass < 1)) {
+ limit = start;
+ start = 0;
+ pass++;
+ goto again;
+ } else {
+ /* Scanned the whole thing, give up. */
+ return -1;
+ }
+ }
+
+ for (i = n; i < end; i++) {
+ if (test_bit(i, arena->map)) {
+ start = i + 1;
+ goto again;
+ }
+ }
+
+ for (i = n; i < end; i++)
+ __set_bit(i, arena->map);
+
+ arena->hint = end;
+
+ return n;
+}
+
+#define COOKIE_PGSZ_CODE 0xf000000000000000ULL
+#define COOKIE_PGSZ_CODE_SHIFT 60ULL
+
+static u64 pagesize_code(void)
+{
+ switch (PAGE_SIZE) {
+ default:
+ case (8ULL * 1024ULL):
+ return 0;
+ case (64ULL * 1024ULL):
+ return 1;
+ case (512ULL * 1024ULL):
+ return 2;
+ case (4ULL * 1024ULL * 1024ULL):
+ return 3;
+ case (32ULL * 1024ULL * 1024ULL):
+ return 4;
+ case (256ULL * 1024ULL * 1024ULL):
+ return 5;
+ }
+}
+
+static u64 make_cookie(u64 index, u64 pgsz_code, u64 page_offset)
+{
+ return ((pgsz_code << COOKIE_PGSZ_CODE_SHIFT) |
+ (index << PAGE_SHIFT) |
+ page_offset);
+}
+
+static u64 cookie_to_index(u64 cookie, unsigned long *shift)
+{
+ u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT;
+
+ cookie &= ~COOKIE_PGSZ_CODE;
+
+ *shift = szcode * 3;
+
+ return (cookie >> (13ULL + (szcode * 3ULL)));
+}
+
+static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu,
+ unsigned long npages)
+{
+ long entry;
+
+ entry = arena_alloc(iommu, npages);
+ if (unlikely(entry < 0))
+ return NULL;
+
+ return iommu->page_table + entry;
+}
+
+static u64 perm_to_mte(unsigned int map_perm)
+{
+ u64 mte_base;
+
+ mte_base = pagesize_code();
+
+ if (map_perm & LDC_MAP_SHADOW) {
+ if (map_perm & LDC_MAP_R)
+ mte_base |= LDC_MTE_COPY_R;
+ if (map_perm & LDC_MAP_W)
+ mte_base |= LDC_MTE_COPY_W;
+ }
+ if (map_perm & LDC_MAP_DIRECT) {
+ if (map_perm & LDC_MAP_R)
+ mte_base |= LDC_MTE_READ;
+ if (map_perm & LDC_MAP_W)
+ mte_base |= LDC_MTE_WRITE;
+ if (map_perm & LDC_MAP_X)
+ mte_base |= LDC_MTE_EXEC;
+ }
+ if (map_perm & LDC_MAP_IO) {
+ if (map_perm & LDC_MAP_R)
+ mte_base |= LDC_MTE_IOMMU_R;
+ if (map_perm & LDC_MAP_W)
+ mte_base |= LDC_MTE_IOMMU_W;
+ }
+
+ return mte_base;
+}
+
+static int pages_in_region(unsigned long base, long len)
+{
+ int count = 0;
+
+ do {
+ unsigned long new = (base + PAGE_SIZE) & PAGE_MASK;
+
+ len -= (new - base);
+ base = new;
+ count++;
+ } while (len > 0);
+
+ return count;
+}
+
+struct cookie_state {
+ struct ldc_mtable_entry *page_table;
+ struct ldc_trans_cookie *cookies;
+ u64 mte_base;
+ u64 prev_cookie;
+ u32 pte_idx;
+ u32 nc;
+};
+
+static void fill_cookies(struct cookie_state *sp, unsigned long pa,
+ unsigned long off, unsigned long len)
+{
+ do {
+ unsigned long tlen, new = pa + PAGE_SIZE;
+ u64 this_cookie;
+
+ sp->page_table[sp->pte_idx].mte = sp->mte_base | pa;
+
+ tlen = PAGE_SIZE;
+ if (off)
+ tlen = PAGE_SIZE - off;
+ if (tlen > len)
+ tlen = len;
+
+ this_cookie = make_cookie(sp->pte_idx,
+ pagesize_code(), off);
+
+ off = 0;
+
+ if (this_cookie == sp->prev_cookie) {
+ sp->cookies[sp->nc - 1].cookie_size += tlen;
+ } else {
+ sp->cookies[sp->nc].cookie_addr = this_cookie;
+ sp->cookies[sp->nc].cookie_size = tlen;
+ sp->nc++;
+ }
+ sp->prev_cookie = this_cookie + tlen;
+
+ sp->pte_idx++;
+
+ len -= tlen;
+ pa = new;
+ } while (len > 0);
+}
+
+static int sg_count_one(struct scatterlist *sg)
+{
+ unsigned long base = page_to_pfn(sg_page(sg)) << PAGE_SHIFT;
+ long len = sg->length;
+
+ if ((sg->offset | len) & (8UL - 1))
+ return -EFAULT;
+
+ return pages_in_region(base + sg->offset, len);
+}
+
+static int sg_count_pages(struct scatterlist *sg, int num_sg)
+{
+ int count;
+ int i;
+
+ count = 0;
+ for (i = 0; i < num_sg; i++) {
+ int err = sg_count_one(sg + i);
+ if (err < 0)
+ return err;
+ count += err;
+ }
+
+ return count;
+}
+
+int ldc_map_sg(struct ldc_channel *lp,
+ struct scatterlist *sg, int num_sg,
+ struct ldc_trans_cookie *cookies, int ncookies,
+ unsigned int map_perm)
+{
+ unsigned long i, npages, flags;
+ struct ldc_mtable_entry *base;
+ struct cookie_state state;
+ struct ldc_iommu *iommu;
+ int err;
+
+ if (map_perm & ~LDC_MAP_ALL)
+ return -EINVAL;
+
+ err = sg_count_pages(sg, num_sg);
+ if (err < 0)
+ return err;
+
+ npages = err;
+ if (err > ncookies)
+ return -EMSGSIZE;
+
+ iommu = &lp->iommu;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+ base = alloc_npages(iommu, npages);
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+ if (!base)
+ return -ENOMEM;
+
+ state.page_table = iommu->page_table;
+ state.cookies = cookies;
+ state.mte_base = perm_to_mte(map_perm);
+ state.prev_cookie = ~(u64)0;
+ state.pte_idx = (base - iommu->page_table);
+ state.nc = 0;
+
+ for (i = 0; i < num_sg; i++)
+ fill_cookies(&state, page_to_pfn(sg_page(&sg[i])) << PAGE_SHIFT,
+ sg[i].offset, sg[i].length);
+
+ return state.nc;
+}
+EXPORT_SYMBOL(ldc_map_sg);
+
+int ldc_map_single(struct ldc_channel *lp,
+ void *buf, unsigned int len,
+ struct ldc_trans_cookie *cookies, int ncookies,
+ unsigned int map_perm)
+{
+ unsigned long npages, pa, flags;
+ struct ldc_mtable_entry *base;
+ struct cookie_state state;
+ struct ldc_iommu *iommu;
+
+ if ((map_perm & ~LDC_MAP_ALL) || (ncookies < 1))
+ return -EINVAL;
+
+ pa = __pa(buf);
+ if ((pa | len) & (8UL - 1))
+ return -EFAULT;
+
+ npages = pages_in_region(pa, len);
+
+ iommu = &lp->iommu;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+ base = alloc_npages(iommu, npages);
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+ if (!base)
+ return -ENOMEM;
+
+ state.page_table = iommu->page_table;
+ state.cookies = cookies;
+ state.mte_base = perm_to_mte(map_perm);
+ state.prev_cookie = ~(u64)0;
+ state.pte_idx = (base - iommu->page_table);
+ state.nc = 0;
+ fill_cookies(&state, (pa & PAGE_MASK), (pa & ~PAGE_MASK), len);
+ BUG_ON(state.nc != 1);
+
+ return state.nc;
+}
+EXPORT_SYMBOL(ldc_map_single);
+
+static void free_npages(unsigned long id, struct ldc_iommu *iommu,
+ u64 cookie, u64 size)
+{
+ struct iommu_arena *arena = &iommu->arena;
+ unsigned long i, shift, index, npages;
+ struct ldc_mtable_entry *base;
+
+ npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT;
+ index = cookie_to_index(cookie, &shift);
+ base = iommu->page_table + index;
+
+ BUG_ON(index > arena->limit ||
+ (index + npages) > arena->limit);
+
+ for (i = 0; i < npages; i++) {
+ if (base->cookie)
+ sun4v_ldc_revoke(id, cookie + (i << shift),
+ base->cookie);
+ base->mte = 0;
+ __clear_bit(index + i, arena->map);
+ }
+}
+
+void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies,
+ int ncookies)
+{
+ struct ldc_iommu *iommu = &lp->iommu;
+ unsigned long flags;
+ int i;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+ for (i = 0; i < ncookies; i++) {
+ u64 addr = cookies[i].cookie_addr;
+ u64 size = cookies[i].cookie_size;
+
+ free_npages(lp->id, iommu, addr, size);
+ }
+ spin_unlock_irqrestore(&iommu->lock, flags);
+}
+EXPORT_SYMBOL(ldc_unmap);
+
+int ldc_copy(struct ldc_channel *lp, int copy_dir,
+ void *buf, unsigned int len, unsigned long offset,
+ struct ldc_trans_cookie *cookies, int ncookies)
+{
+ unsigned int orig_len;
+ unsigned long ra;
+ int i;
+
+ if (copy_dir != LDC_COPY_IN && copy_dir != LDC_COPY_OUT) {
+ printk(KERN_ERR PFX "ldc_copy: ID[%lu] Bad copy_dir[%d]\n",
+ lp->id, copy_dir);
+ return -EINVAL;
+ }
+
+ ra = __pa(buf);
+ if ((ra | len | offset) & (8UL - 1)) {
+ printk(KERN_ERR PFX "ldc_copy: ID[%lu] Unaligned buffer "
+ "ra[%lx] len[%x] offset[%lx]\n",
+ lp->id, ra, len, offset);
+ return -EFAULT;
+ }
+
+ if (lp->hs_state != LDC_HS_COMPLETE ||
+ (lp->flags & LDC_FLAG_RESET)) {
+ printk(KERN_ERR PFX "ldc_copy: ID[%lu] Link down hs_state[%x] "
+ "flags[%x]\n", lp->id, lp->hs_state, lp->flags);
+ return -ECONNRESET;
+ }
+
+ orig_len = len;
+ for (i = 0; i < ncookies; i++) {
+ unsigned long cookie_raddr = cookies[i].cookie_addr;
+ unsigned long this_len = cookies[i].cookie_size;
+ unsigned long actual_len;
+
+ if (unlikely(offset)) {
+ unsigned long this_off = offset;
+
+ if (this_off > this_len)
+ this_off = this_len;
+
+ offset -= this_off;
+ this_len -= this_off;
+ if (!this_len)
+ continue;
+ cookie_raddr += this_off;
+ }
+
+ if (this_len > len)
+ this_len = len;
+
+ while (1) {
+ unsigned long hv_err;
+
+ hv_err = sun4v_ldc_copy(lp->id, copy_dir,
+ cookie_raddr, ra,
+ this_len, &actual_len);
+ if (unlikely(hv_err)) {
+ printk(KERN_ERR PFX "ldc_copy: ID[%lu] "
+ "HV error %lu\n",
+ lp->id, hv_err);
+ if (lp->hs_state != LDC_HS_COMPLETE ||
+ (lp->flags & LDC_FLAG_RESET))
+ return -ECONNRESET;
+ else
+ return -EFAULT;
+ }
+
+ cookie_raddr += actual_len;
+ ra += actual_len;
+ len -= actual_len;
+ if (actual_len == this_len)
+ break;
+
+ this_len -= actual_len;
+ }
+
+ if (!len)
+ break;
+ }
+
+ /* It is caller policy what to do about short copies.
+ * For example, a networking driver can declare the
+ * packet a runt and drop it.
+ */
+
+ return orig_len - len;
+}
+EXPORT_SYMBOL(ldc_copy);
+
+void *ldc_alloc_exp_dring(struct ldc_channel *lp, unsigned int len,
+ struct ldc_trans_cookie *cookies, int *ncookies,
+ unsigned int map_perm)
+{
+ void *buf;
+ int err;
+
+ if (len & (8UL - 1))
+ return ERR_PTR(-EINVAL);
+
+ buf = kzalloc(len, GFP_KERNEL);
+ if (!buf)
+ return ERR_PTR(-ENOMEM);
+
+ err = ldc_map_single(lp, buf, len, cookies, *ncookies, map_perm);
+ if (err < 0) {
+ kfree(buf);
+ return ERR_PTR(err);
+ }
+ *ncookies = err;
+
+ return buf;
+}
+EXPORT_SYMBOL(ldc_alloc_exp_dring);
+
+void ldc_free_exp_dring(struct ldc_channel *lp, void *buf, unsigned int len,
+ struct ldc_trans_cookie *cookies, int ncookies)
+{
+ ldc_unmap(lp, cookies, ncookies);
+ kfree(buf);
+}
+EXPORT_SYMBOL(ldc_free_exp_dring);
+
+static int __init ldc_init(void)
+{
+ unsigned long major, minor;
+ struct mdesc_handle *hp;
+ const u64 *v;
+ int err;
+ u64 mp;
+
+ hp = mdesc_grab();
+ if (!hp)
+ return -ENODEV;
+
+ mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform");
+ err = -ENODEV;
+ if (mp == MDESC_NODE_NULL)
+ goto out;
+
+ v = mdesc_get_property(hp, mp, "domaining-enabled", NULL);
+ if (!v)
+ goto out;
+
+ major = 1;
+ minor = 0;
+ if (sun4v_hvapi_register(HV_GRP_LDOM, major, &minor)) {
+ printk(KERN_INFO PFX "Could not register LDOM hvapi.\n");
+ goto out;
+ }
+
+ printk(KERN_INFO "%s", version);
+
+ if (!*v) {
+ printk(KERN_INFO PFX "Domaining disabled.\n");
+ goto out;
+ }
+ ldom_domaining_enabled = 1;
+ err = 0;
+
+out:
+ mdesc_release(hp);
+ return err;
+}
+
+core_initcall(ldc_init);
diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c
new file mode 100644
index 000000000000..dde52bcf5c64
--- /dev/null
+++ b/arch/sparc/kernel/mdesc.c
@@ -0,0 +1,916 @@
+/* mdesc.c: Sun4V machine description handling.
+ *
+ * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/lmb.h>
+#include <linux/log2.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/miscdevice.h>
+
+#include <asm/hypervisor.h>
+#include <asm/mdesc.h>
+#include <asm/prom.h>
+#include <asm/oplib.h>
+#include <asm/smp.h>
+
+/* Unlike the OBP device tree, the machine description is a full-on
+ * DAG. An arbitrary number of ARCs are possible from one
+ * node to other nodes and thus we can't use the OBP device_node
+ * data structure to represent these nodes inside of the kernel.
+ *
+ * Actually, it isn't even a DAG, because there are back pointers
+ * which create cycles in the graph.
+ *
+ * mdesc_hdr and mdesc_elem describe the layout of the data structure
+ * we get from the Hypervisor.
+ */
+struct mdesc_hdr {
+ u32 version; /* Transport version */
+ u32 node_sz; /* node block size */
+ u32 name_sz; /* name block size */
+ u32 data_sz; /* data block size */
+} __attribute__((aligned(16)));
+
+struct mdesc_elem {
+ u8 tag;
+#define MD_LIST_END 0x00
+#define MD_NODE 0x4e
+#define MD_NODE_END 0x45
+#define MD_NOOP 0x20
+#define MD_PROP_ARC 0x61
+#define MD_PROP_VAL 0x76
+#define MD_PROP_STR 0x73
+#define MD_PROP_DATA 0x64
+ u8 name_len;
+ u16 resv;
+ u32 name_offset;
+ union {
+ struct {
+ u32 data_len;
+ u32 data_offset;
+ } data;
+ u64 val;
+ } d;
+};
+
+struct mdesc_mem_ops {
+ struct mdesc_handle *(*alloc)(unsigned int mdesc_size);
+ void (*free)(struct mdesc_handle *handle);
+};
+
+struct mdesc_handle {
+ struct list_head list;
+ struct mdesc_mem_ops *mops;
+ void *self_base;
+ atomic_t refcnt;
+ unsigned int handle_size;
+ struct mdesc_hdr mdesc;
+};
+
+static void mdesc_handle_init(struct mdesc_handle *hp,
+ unsigned int handle_size,
+ void *base)
+{
+ BUG_ON(((unsigned long)&hp->mdesc) & (16UL - 1));
+
+ memset(hp, 0, handle_size);
+ INIT_LIST_HEAD(&hp->list);
+ hp->self_base = base;
+ atomic_set(&hp->refcnt, 1);
+ hp->handle_size = handle_size;
+}
+
+static struct mdesc_handle * __init mdesc_lmb_alloc(unsigned int mdesc_size)
+{
+ unsigned int handle_size, alloc_size;
+ struct mdesc_handle *hp;
+ unsigned long paddr;
+
+ handle_size = (sizeof(struct mdesc_handle) -
+ sizeof(struct mdesc_hdr) +
+ mdesc_size);
+ alloc_size = PAGE_ALIGN(handle_size);
+
+ paddr = lmb_alloc(alloc_size, PAGE_SIZE);
+
+ hp = NULL;
+ if (paddr) {
+ hp = __va(paddr);
+ mdesc_handle_init(hp, handle_size, hp);
+ }
+ return hp;
+}
+
+static void mdesc_lmb_free(struct mdesc_handle *hp)
+{
+ unsigned int alloc_size, handle_size = hp->handle_size;
+ unsigned long start, end;
+
+ BUG_ON(atomic_read(&hp->refcnt) != 0);
+ BUG_ON(!list_empty(&hp->list));
+
+ alloc_size = PAGE_ALIGN(handle_size);
+
+ start = (unsigned long) hp;
+ end = start + alloc_size;
+
+ while (start < end) {
+ struct page *p;
+
+ p = virt_to_page(start);
+ ClearPageReserved(p);
+ __free_page(p);
+ start += PAGE_SIZE;
+ }
+}
+
+static struct mdesc_mem_ops lmb_mdesc_ops = {
+ .alloc = mdesc_lmb_alloc,
+ .free = mdesc_lmb_free,
+};
+
+static struct mdesc_handle *mdesc_kmalloc(unsigned int mdesc_size)
+{
+ unsigned int handle_size;
+ void *base;
+
+ handle_size = (sizeof(struct mdesc_handle) -
+ sizeof(struct mdesc_hdr) +
+ mdesc_size);
+
+ base = kmalloc(handle_size + 15, GFP_KERNEL | __GFP_NOFAIL);
+ if (base) {
+ struct mdesc_handle *hp;
+ unsigned long addr;
+
+ addr = (unsigned long)base;
+ addr = (addr + 15UL) & ~15UL;
+ hp = (struct mdesc_handle *) addr;
+
+ mdesc_handle_init(hp, handle_size, base);
+ return hp;
+ }
+
+ return NULL;
+}
+
+static void mdesc_kfree(struct mdesc_handle *hp)
+{
+ BUG_ON(atomic_read(&hp->refcnt) != 0);
+ BUG_ON(!list_empty(&hp->list));
+
+ kfree(hp->self_base);
+}
+
+static struct mdesc_mem_ops kmalloc_mdesc_memops = {
+ .alloc = mdesc_kmalloc,
+ .free = mdesc_kfree,
+};
+
+static struct mdesc_handle *mdesc_alloc(unsigned int mdesc_size,
+ struct mdesc_mem_ops *mops)
+{
+ struct mdesc_handle *hp = mops->alloc(mdesc_size);
+
+ if (hp)
+ hp->mops = mops;
+
+ return hp;
+}
+
+static void mdesc_free(struct mdesc_handle *hp)
+{
+ hp->mops->free(hp);
+}
+
+static struct mdesc_handle *cur_mdesc;
+static LIST_HEAD(mdesc_zombie_list);
+static DEFINE_SPINLOCK(mdesc_lock);
+
+struct mdesc_handle *mdesc_grab(void)
+{
+ struct mdesc_handle *hp;
+ unsigned long flags;
+
+ spin_lock_irqsave(&mdesc_lock, flags);
+ hp = cur_mdesc;
+ if (hp)
+ atomic_inc(&hp->refcnt);
+ spin_unlock_irqrestore(&mdesc_lock, flags);
+
+ return hp;
+}
+EXPORT_SYMBOL(mdesc_grab);
+
+void mdesc_release(struct mdesc_handle *hp)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&mdesc_lock, flags);
+ if (atomic_dec_and_test(&hp->refcnt)) {
+ list_del_init(&hp->list);
+ hp->mops->free(hp);
+ }
+ spin_unlock_irqrestore(&mdesc_lock, flags);
+}
+EXPORT_SYMBOL(mdesc_release);
+
+static DEFINE_MUTEX(mdesc_mutex);
+static struct mdesc_notifier_client *client_list;
+
+void mdesc_register_notifier(struct mdesc_notifier_client *client)
+{
+ u64 node;
+
+ mutex_lock(&mdesc_mutex);
+ client->next = client_list;
+ client_list = client;
+
+ mdesc_for_each_node_by_name(cur_mdesc, node, client->node_name)
+ client->add(cur_mdesc, node);
+
+ mutex_unlock(&mdesc_mutex);
+}
+
+static const u64 *parent_cfg_handle(struct mdesc_handle *hp, u64 node)
+{
+ const u64 *id;
+ u64 a;
+
+ id = NULL;
+ mdesc_for_each_arc(a, hp, node, MDESC_ARC_TYPE_BACK) {
+ u64 target;
+
+ target = mdesc_arc_target(hp, a);
+ id = mdesc_get_property(hp, target,
+ "cfg-handle", NULL);
+ if (id)
+ break;
+ }
+
+ return id;
+}
+
+/* Run 'func' on nodes which are in A but not in B. */
+static void invoke_on_missing(const char *name,
+ struct mdesc_handle *a,
+ struct mdesc_handle *b,
+ void (*func)(struct mdesc_handle *, u64))
+{
+ u64 node;
+
+ mdesc_for_each_node_by_name(a, node, name) {
+ int found = 0, is_vdc_port = 0;
+ const char *name_prop;
+ const u64 *id;
+ u64 fnode;
+
+ name_prop = mdesc_get_property(a, node, "name", NULL);
+ if (name_prop && !strcmp(name_prop, "vdc-port")) {
+ is_vdc_port = 1;
+ id = parent_cfg_handle(a, node);
+ } else
+ id = mdesc_get_property(a, node, "id", NULL);
+
+ if (!id) {
+ printk(KERN_ERR "MD: Cannot find ID for %s node.\n",
+ (name_prop ? name_prop : name));
+ continue;
+ }
+
+ mdesc_for_each_node_by_name(b, fnode, name) {
+ const u64 *fid;
+
+ if (is_vdc_port) {
+ name_prop = mdesc_get_property(b, fnode,
+ "name", NULL);
+ if (!name_prop ||
+ strcmp(name_prop, "vdc-port"))
+ continue;
+ fid = parent_cfg_handle(b, fnode);
+ if (!fid) {
+ printk(KERN_ERR "MD: Cannot find ID "
+ "for vdc-port node.\n");
+ continue;
+ }
+ } else
+ fid = mdesc_get_property(b, fnode,
+ "id", NULL);
+
+ if (*id == *fid) {
+ found = 1;
+ break;
+ }
+ }
+ if (!found)
+ func(a, node);
+ }
+}
+
+static void notify_one(struct mdesc_notifier_client *p,
+ struct mdesc_handle *old_hp,
+ struct mdesc_handle *new_hp)
+{
+ invoke_on_missing(p->node_name, old_hp, new_hp, p->remove);
+ invoke_on_missing(p->node_name, new_hp, old_hp, p->add);
+}
+
+static void mdesc_notify_clients(struct mdesc_handle *old_hp,
+ struct mdesc_handle *new_hp)
+{
+ struct mdesc_notifier_client *p = client_list;
+
+ while (p) {
+ notify_one(p, old_hp, new_hp);
+ p = p->next;
+ }
+}
+
+void mdesc_update(void)
+{
+ unsigned long len, real_len, status;
+ struct mdesc_handle *hp, *orig_hp;
+ unsigned long flags;
+
+ mutex_lock(&mdesc_mutex);
+
+ (void) sun4v_mach_desc(0UL, 0UL, &len);
+
+ hp = mdesc_alloc(len, &kmalloc_mdesc_memops);
+ if (!hp) {
+ printk(KERN_ERR "MD: mdesc alloc fails\n");
+ goto out;
+ }
+
+ status = sun4v_mach_desc(__pa(&hp->mdesc), len, &real_len);
+ if (status != HV_EOK || real_len > len) {
+ printk(KERN_ERR "MD: mdesc reread fails with %lu\n",
+ status);
+ atomic_dec(&hp->refcnt);
+ mdesc_free(hp);
+ goto out;
+ }
+
+ spin_lock_irqsave(&mdesc_lock, flags);
+ orig_hp = cur_mdesc;
+ cur_mdesc = hp;
+ spin_unlock_irqrestore(&mdesc_lock, flags);
+
+ mdesc_notify_clients(orig_hp, hp);
+
+ spin_lock_irqsave(&mdesc_lock, flags);
+ if (atomic_dec_and_test(&orig_hp->refcnt))
+ mdesc_free(orig_hp);
+ else
+ list_add(&orig_hp->list, &mdesc_zombie_list);
+ spin_unlock_irqrestore(&mdesc_lock, flags);
+
+out:
+ mutex_unlock(&mdesc_mutex);
+}
+
+static struct mdesc_elem *node_block(struct mdesc_hdr *mdesc)
+{
+ return (struct mdesc_elem *) (mdesc + 1);
+}
+
+static void *name_block(struct mdesc_hdr *mdesc)
+{
+ return ((void *) node_block(mdesc)) + mdesc->node_sz;
+}
+
+static void *data_block(struct mdesc_hdr *mdesc)
+{
+ return ((void *) name_block(mdesc)) + mdesc->name_sz;
+}
+
+u64 mdesc_node_by_name(struct mdesc_handle *hp,
+ u64 from_node, const char *name)
+{
+ struct mdesc_elem *ep = node_block(&hp->mdesc);
+ const char *names = name_block(&hp->mdesc);
+ u64 last_node = hp->mdesc.node_sz / 16;
+ u64 ret;
+
+ if (from_node == MDESC_NODE_NULL) {
+ ret = from_node = 0;
+ } else if (from_node >= last_node) {
+ return MDESC_NODE_NULL;
+ } else {
+ ret = ep[from_node].d.val;
+ }
+
+ while (ret < last_node) {
+ if (ep[ret].tag != MD_NODE)
+ return MDESC_NODE_NULL;
+ if (!strcmp(names + ep[ret].name_offset, name))
+ break;
+ ret = ep[ret].d.val;
+ }
+ if (ret >= last_node)
+ ret = MDESC_NODE_NULL;
+ return ret;
+}
+EXPORT_SYMBOL(mdesc_node_by_name);
+
+const void *mdesc_get_property(struct mdesc_handle *hp, u64 node,
+ const char *name, int *lenp)
+{
+ const char *names = name_block(&hp->mdesc);
+ u64 last_node = hp->mdesc.node_sz / 16;
+ void *data = data_block(&hp->mdesc);
+ struct mdesc_elem *ep;
+
+ if (node == MDESC_NODE_NULL || node >= last_node)
+ return NULL;
+
+ ep = node_block(&hp->mdesc) + node;
+ ep++;
+ for (; ep->tag != MD_NODE_END; ep++) {
+ void *val = NULL;
+ int len = 0;
+
+ switch (ep->tag) {
+ case MD_PROP_VAL:
+ val = &ep->d.val;
+ len = 8;
+ break;
+
+ case MD_PROP_STR:
+ case MD_PROP_DATA:
+ val = data + ep->d.data.data_offset;
+ len = ep->d.data.data_len;
+ break;
+
+ default:
+ break;
+ }
+ if (!val)
+ continue;
+
+ if (!strcmp(names + ep->name_offset, name)) {
+ if (lenp)
+ *lenp = len;
+ return val;
+ }
+ }
+
+ return NULL;
+}
+EXPORT_SYMBOL(mdesc_get_property);
+
+u64 mdesc_next_arc(struct mdesc_handle *hp, u64 from, const char *arc_type)
+{
+ struct mdesc_elem *ep, *base = node_block(&hp->mdesc);
+ const char *names = name_block(&hp->mdesc);
+ u64 last_node = hp->mdesc.node_sz / 16;
+
+ if (from == MDESC_NODE_NULL || from >= last_node)
+ return MDESC_NODE_NULL;
+
+ ep = base + from;
+
+ ep++;
+ for (; ep->tag != MD_NODE_END; ep++) {
+ if (ep->tag != MD_PROP_ARC)
+ continue;
+
+ if (strcmp(names + ep->name_offset, arc_type))
+ continue;
+
+ return ep - base;
+ }
+
+ return MDESC_NODE_NULL;
+}
+EXPORT_SYMBOL(mdesc_next_arc);
+
+u64 mdesc_arc_target(struct mdesc_handle *hp, u64 arc)
+{
+ struct mdesc_elem *ep, *base = node_block(&hp->mdesc);
+
+ ep = base + arc;
+
+ return ep->d.val;
+}
+EXPORT_SYMBOL(mdesc_arc_target);
+
+const char *mdesc_node_name(struct mdesc_handle *hp, u64 node)
+{
+ struct mdesc_elem *ep, *base = node_block(&hp->mdesc);
+ const char *names = name_block(&hp->mdesc);
+ u64 last_node = hp->mdesc.node_sz / 16;
+
+ if (node == MDESC_NODE_NULL || node >= last_node)
+ return NULL;
+
+ ep = base + node;
+ if (ep->tag != MD_NODE)
+ return NULL;
+
+ return names + ep->name_offset;
+}
+EXPORT_SYMBOL(mdesc_node_name);
+
+static void __init report_platform_properties(void)
+{
+ struct mdesc_handle *hp = mdesc_grab();
+ u64 pn = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform");
+ const char *s;
+ const u64 *v;
+
+ if (pn == MDESC_NODE_NULL) {
+ prom_printf("No platform node in machine-description.\n");
+ prom_halt();
+ }
+
+ s = mdesc_get_property(hp, pn, "banner-name", NULL);
+ printk("PLATFORM: banner-name [%s]\n", s);
+ s = mdesc_get_property(hp, pn, "name", NULL);
+ printk("PLATFORM: name [%s]\n", s);
+
+ v = mdesc_get_property(hp, pn, "hostid", NULL);
+ if (v)
+ printk("PLATFORM: hostid [%08lx]\n", *v);
+ v = mdesc_get_property(hp, pn, "serial#", NULL);
+ if (v)
+ printk("PLATFORM: serial# [%08lx]\n", *v);
+ v = mdesc_get_property(hp, pn, "stick-frequency", NULL);
+ printk("PLATFORM: stick-frequency [%08lx]\n", *v);
+ v = mdesc_get_property(hp, pn, "mac-address", NULL);
+ if (v)
+ printk("PLATFORM: mac-address [%lx]\n", *v);
+ v = mdesc_get_property(hp, pn, "watchdog-resolution", NULL);
+ if (v)
+ printk("PLATFORM: watchdog-resolution [%lu ms]\n", *v);
+ v = mdesc_get_property(hp, pn, "watchdog-max-timeout", NULL);
+ if (v)
+ printk("PLATFORM: watchdog-max-timeout [%lu ms]\n", *v);
+ v = mdesc_get_property(hp, pn, "max-cpus", NULL);
+ if (v)
+ printk("PLATFORM: max-cpus [%lu]\n", *v);
+
+#ifdef CONFIG_SMP
+ {
+ int max_cpu, i;
+
+ if (v) {
+ max_cpu = *v;
+ if (max_cpu > NR_CPUS)
+ max_cpu = NR_CPUS;
+ } else {
+ max_cpu = NR_CPUS;
+ }
+ for (i = 0; i < max_cpu; i++)
+ cpu_set(i, cpu_possible_map);
+ }
+#endif
+
+ mdesc_release(hp);
+}
+
+static void __devinit fill_in_one_cache(cpuinfo_sparc *c,
+ struct mdesc_handle *hp,
+ u64 mp)
+{
+ const u64 *level = mdesc_get_property(hp, mp, "level", NULL);
+ const u64 *size = mdesc_get_property(hp, mp, "size", NULL);
+ const u64 *line_size = mdesc_get_property(hp, mp, "line-size", NULL);
+ const char *type;
+ int type_len;
+
+ type = mdesc_get_property(hp, mp, "type", &type_len);
+
+ switch (*level) {
+ case 1:
+ if (of_find_in_proplist(type, "instn", type_len)) {
+ c->icache_size = *size;
+ c->icache_line_size = *line_size;
+ } else if (of_find_in_proplist(type, "data", type_len)) {
+ c->dcache_size = *size;
+ c->dcache_line_size = *line_size;
+ }
+ break;
+
+ case 2:
+ c->ecache_size = *size;
+ c->ecache_line_size = *line_size;
+ break;
+
+ default:
+ break;
+ }
+
+ if (*level == 1) {
+ u64 a;
+
+ mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) {
+ u64 target = mdesc_arc_target(hp, a);
+ const char *name = mdesc_node_name(hp, target);
+
+ if (!strcmp(name, "cache"))
+ fill_in_one_cache(c, hp, target);
+ }
+ }
+}
+
+static void __devinit mark_core_ids(struct mdesc_handle *hp, u64 mp,
+ int core_id)
+{
+ u64 a;
+
+ mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_BACK) {
+ u64 t = mdesc_arc_target(hp, a);
+ const char *name;
+ const u64 *id;
+
+ name = mdesc_node_name(hp, t);
+ if (!strcmp(name, "cpu")) {
+ id = mdesc_get_property(hp, t, "id", NULL);
+ if (*id < NR_CPUS)
+ cpu_data(*id).core_id = core_id;
+ } else {
+ u64 j;
+
+ mdesc_for_each_arc(j, hp, t, MDESC_ARC_TYPE_BACK) {
+ u64 n = mdesc_arc_target(hp, j);
+ const char *n_name;
+
+ n_name = mdesc_node_name(hp, n);
+ if (strcmp(n_name, "cpu"))
+ continue;
+
+ id = mdesc_get_property(hp, n, "id", NULL);
+ if (*id < NR_CPUS)
+ cpu_data(*id).core_id = core_id;
+ }
+ }
+ }
+}
+
+static void __devinit set_core_ids(struct mdesc_handle *hp)
+{
+ int idx;
+ u64 mp;
+
+ idx = 1;
+ mdesc_for_each_node_by_name(hp, mp, "cache") {
+ const u64 *level;
+ const char *type;
+ int len;
+
+ level = mdesc_get_property(hp, mp, "level", NULL);
+ if (*level != 1)
+ continue;
+
+ type = mdesc_get_property(hp, mp, "type", &len);
+ if (!of_find_in_proplist(type, "instn", len))
+ continue;
+
+ mark_core_ids(hp, mp, idx);
+
+ idx++;
+ }
+}
+
+static void __devinit mark_proc_ids(struct mdesc_handle *hp, u64 mp,
+ int proc_id)
+{
+ u64 a;
+
+ mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_BACK) {
+ u64 t = mdesc_arc_target(hp, a);
+ const char *name;
+ const u64 *id;
+
+ name = mdesc_node_name(hp, t);
+ if (strcmp(name, "cpu"))
+ continue;
+
+ id = mdesc_get_property(hp, t, "id", NULL);
+ if (*id < NR_CPUS)
+ cpu_data(*id).proc_id = proc_id;
+ }
+}
+
+static void __devinit __set_proc_ids(struct mdesc_handle *hp,
+ const char *exec_unit_name)
+{
+ int idx;
+ u64 mp;
+
+ idx = 0;
+ mdesc_for_each_node_by_name(hp, mp, exec_unit_name) {
+ const char *type;
+ int len;
+
+ type = mdesc_get_property(hp, mp, "type", &len);
+ if (!of_find_in_proplist(type, "int", len) &&
+ !of_find_in_proplist(type, "integer", len))
+ continue;
+
+ mark_proc_ids(hp, mp, idx);
+
+ idx++;
+ }
+}
+
+static void __devinit set_proc_ids(struct mdesc_handle *hp)
+{
+ __set_proc_ids(hp, "exec_unit");
+ __set_proc_ids(hp, "exec-unit");
+}
+
+static void __devinit get_one_mondo_bits(const u64 *p, unsigned int *mask,
+ unsigned char def)
+{
+ u64 val;
+
+ if (!p)
+ goto use_default;
+ val = *p;
+
+ if (!val || val >= 64)
+ goto use_default;
+
+ *mask = ((1U << val) * 64U) - 1U;
+ return;
+
+use_default:
+ *mask = ((1U << def) * 64U) - 1U;
+}
+
+static void __devinit get_mondo_data(struct mdesc_handle *hp, u64 mp,
+ struct trap_per_cpu *tb)
+{
+ const u64 *val;
+
+ val = mdesc_get_property(hp, mp, "q-cpu-mondo-#bits", NULL);
+ get_one_mondo_bits(val, &tb->cpu_mondo_qmask, 7);
+
+ val = mdesc_get_property(hp, mp, "q-dev-mondo-#bits", NULL);
+ get_one_mondo_bits(val, &tb->dev_mondo_qmask, 7);
+
+ val = mdesc_get_property(hp, mp, "q-resumable-#bits", NULL);
+ get_one_mondo_bits(val, &tb->resum_qmask, 6);
+
+ val = mdesc_get_property(hp, mp, "q-nonresumable-#bits", NULL);
+ get_one_mondo_bits(val, &tb->nonresum_qmask, 2);
+}
+
+void __cpuinit mdesc_fill_in_cpu_data(cpumask_t mask)
+{
+ struct mdesc_handle *hp = mdesc_grab();
+ u64 mp;
+
+ ncpus_probed = 0;
+ mdesc_for_each_node_by_name(hp, mp, "cpu") {
+ const u64 *id = mdesc_get_property(hp, mp, "id", NULL);
+ const u64 *cfreq = mdesc_get_property(hp, mp, "clock-frequency", NULL);
+ struct trap_per_cpu *tb;
+ cpuinfo_sparc *c;
+ int cpuid;
+ u64 a;
+
+ ncpus_probed++;
+
+ cpuid = *id;
+
+#ifdef CONFIG_SMP
+ if (cpuid >= NR_CPUS) {
+ printk(KERN_WARNING "Ignoring CPU %d which is "
+ ">= NR_CPUS (%d)\n",
+ cpuid, NR_CPUS);
+ continue;
+ }
+ if (!cpu_isset(cpuid, mask))
+ continue;
+#else
+ /* On uniprocessor we only want the values for the
+ * real physical cpu the kernel booted onto, however
+ * cpu_data() only has one entry at index 0.
+ */
+ if (cpuid != real_hard_smp_processor_id())
+ continue;
+ cpuid = 0;
+#endif
+
+ c = &cpu_data(cpuid);
+ c->clock_tick = *cfreq;
+
+ tb = &trap_block[cpuid];
+ get_mondo_data(hp, mp, tb);
+
+ mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) {
+ u64 j, t = mdesc_arc_target(hp, a);
+ const char *t_name;
+
+ t_name = mdesc_node_name(hp, t);
+ if (!strcmp(t_name, "cache")) {
+ fill_in_one_cache(c, hp, t);
+ continue;
+ }
+
+ mdesc_for_each_arc(j, hp, t, MDESC_ARC_TYPE_FWD) {
+ u64 n = mdesc_arc_target(hp, j);
+ const char *n_name;
+
+ n_name = mdesc_node_name(hp, n);
+ if (!strcmp(n_name, "cache"))
+ fill_in_one_cache(c, hp, n);
+ }
+ }
+
+#ifdef CONFIG_SMP
+ cpu_set(cpuid, cpu_present_map);
+#endif
+
+ c->core_id = 0;
+ c->proc_id = -1;
+ }
+
+#ifdef CONFIG_SMP
+ sparc64_multi_core = 1;
+#endif
+
+ set_core_ids(hp);
+ set_proc_ids(hp);
+
+ smp_fill_in_sib_core_maps();
+
+ mdesc_release(hp);
+}
+
+static ssize_t mdesc_read(struct file *file, char __user *buf,
+ size_t len, loff_t *offp)
+{
+ struct mdesc_handle *hp = mdesc_grab();
+ int err;
+
+ if (!hp)
+ return -ENODEV;
+
+ err = hp->handle_size;
+ if (len < hp->handle_size)
+ err = -EMSGSIZE;
+ else if (copy_to_user(buf, &hp->mdesc, hp->handle_size))
+ err = -EFAULT;
+ mdesc_release(hp);
+
+ return err;
+}
+
+static const struct file_operations mdesc_fops = {
+ .read = mdesc_read,
+ .owner = THIS_MODULE,
+};
+
+static struct miscdevice mdesc_misc = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "mdesc",
+ .fops = &mdesc_fops,
+};
+
+static int __init mdesc_misc_init(void)
+{
+ return misc_register(&mdesc_misc);
+}
+
+__initcall(mdesc_misc_init);
+
+void __init sun4v_mdesc_init(void)
+{
+ struct mdesc_handle *hp;
+ unsigned long len, real_len, status;
+ cpumask_t mask;
+
+ (void) sun4v_mach_desc(0UL, 0UL, &len);
+
+ printk("MDESC: Size is %lu bytes.\n", len);
+
+ hp = mdesc_alloc(len, &lmb_mdesc_ops);
+ if (hp == NULL) {
+ prom_printf("MDESC: alloc of %lu bytes failed.\n", len);
+ prom_halt();
+ }
+
+ status = sun4v_mach_desc(__pa(&hp->mdesc), len, &real_len);
+ if (status != HV_EOK || real_len > len) {
+ prom_printf("sun4v_mach_desc fails, err(%lu), "
+ "len(%lu), real_len(%lu)\n",
+ status, len, real_len);
+ mdesc_free(hp);
+ prom_halt();
+ }
+
+ cur_mdesc = hp;
+
+ report_platform_properties();
+
+ cpus_setall(mask);
+ mdesc_fill_in_cpu_data(mask);
+}
diff --git a/arch/sparc/kernel/misctrap.S b/arch/sparc/kernel/misctrap.S
new file mode 100644
index 000000000000..753b4f031bfb
--- /dev/null
+++ b/arch/sparc/kernel/misctrap.S
@@ -0,0 +1,97 @@
+#ifdef CONFIG_KGDB
+ .globl arch_kgdb_breakpoint
+ .type arch_kgdb_breakpoint,#function
+arch_kgdb_breakpoint:
+ ta 0x72
+ retl
+ nop
+ .size arch_kgdb_breakpoint,.-arch_kgdb_breakpoint
+#endif
+
+ .type __do_privact,#function
+__do_privact:
+ mov TLB_SFSR, %g3
+ stxa %g0, [%g3] ASI_DMMU ! Clear FaultValid bit
+ membar #Sync
+ sethi %hi(109f), %g7
+ ba,pt %xcc, etrap
+109: or %g7, %lo(109b), %g7
+ call do_privact
+ add %sp, PTREGS_OFF, %o0
+ ba,pt %xcc, rtrap
+ nop
+ .size __do_privact,.-__do_privact
+
+ .type do_mna,#function
+do_mna:
+ rdpr %tl, %g3
+ cmp %g3, 1
+
+ /* Setup %g4/%g5 now as they are used in the
+ * winfixup code.
+ */
+ mov TLB_SFSR, %g3
+ mov DMMU_SFAR, %g4
+ ldxa [%g4] ASI_DMMU, %g4
+ ldxa [%g3] ASI_DMMU, %g5
+ stxa %g0, [%g3] ASI_DMMU ! Clear FaultValid bit
+ membar #Sync
+ bgu,pn %icc, winfix_mna
+ rdpr %tpc, %g3
+
+1: sethi %hi(109f), %g7
+ ba,pt %xcc, etrap
+109: or %g7, %lo(109b), %g7
+ mov %l4, %o1
+ mov %l5, %o2
+ call mem_address_unaligned
+ add %sp, PTREGS_OFF, %o0
+ ba,pt %xcc, rtrap
+ nop
+ .size do_mna,.-do_mna
+
+ .type do_lddfmna,#function
+do_lddfmna:
+ sethi %hi(109f), %g7
+ mov TLB_SFSR, %g4
+ ldxa [%g4] ASI_DMMU, %g5
+ stxa %g0, [%g4] ASI_DMMU ! Clear FaultValid bit
+ membar #Sync
+ mov DMMU_SFAR, %g4
+ ldxa [%g4] ASI_DMMU, %g4
+ ba,pt %xcc, etrap
+109: or %g7, %lo(109b), %g7
+ mov %l4, %o1
+ mov %l5, %o2
+ call handle_lddfmna
+ add %sp, PTREGS_OFF, %o0
+ ba,pt %xcc, rtrap
+ nop
+ .size do_lddfmna,.-do_lddfmna
+
+ .type do_stdfmna,#function
+do_stdfmna:
+ sethi %hi(109f), %g7
+ mov TLB_SFSR, %g4
+ ldxa [%g4] ASI_DMMU, %g5
+ stxa %g0, [%g4] ASI_DMMU ! Clear FaultValid bit
+ membar #Sync
+ mov DMMU_SFAR, %g4
+ ldxa [%g4] ASI_DMMU, %g4
+ ba,pt %xcc, etrap
+109: or %g7, %lo(109b), %g7
+ mov %l4, %o1
+ mov %l5, %o2
+ call handle_stdfmna
+ add %sp, PTREGS_OFF, %o0
+ ba,pt %xcc, rtrap
+ nop
+ .size do_stdfmna,.-do_stdfmna
+
+ .type breakpoint_trap,#function
+breakpoint_trap:
+ call sparc_breakpoint
+ add %sp, PTREGS_OFF, %o0
+ ba,pt %xcc, rtrap
+ nop
+ .size breakpoint_trap,.-breakpoint_trap
diff --git a/arch/sparc/kernel/module_64.c b/arch/sparc/kernel/module_64.c
new file mode 100644
index 000000000000..158484bf5999
--- /dev/null
+++ b/arch/sparc/kernel/module_64.c
@@ -0,0 +1,213 @@
+/* Kernel module help for sparc64.
+ *
+ * Copyright (C) 2001 Rusty Russell.
+ * Copyright (C) 2002 David S. Miller.
+ */
+
+#include <linux/moduleloader.h>
+#include <linux/kernel.h>
+#include <linux/elf.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+
+#include <asm/processor.h>
+#include <asm/spitfire.h>
+
+static void *module_map(unsigned long size)
+{
+ struct vm_struct *area;
+
+ size = PAGE_ALIGN(size);
+ if (!size || size > MODULES_LEN)
+ return NULL;
+
+ area = __get_vm_area(size, VM_ALLOC, MODULES_VADDR, MODULES_END);
+ if (!area)
+ return NULL;
+
+ return __vmalloc_area(area, GFP_KERNEL, PAGE_KERNEL);
+}
+
+void *module_alloc(unsigned long size)
+{
+ void *ret;
+
+ /* We handle the zero case fine, unlike vmalloc */
+ if (size == 0)
+ return NULL;
+
+ ret = module_map(size);
+ if (!ret)
+ ret = ERR_PTR(-ENOMEM);
+ else
+ memset(ret, 0, size);
+
+ return ret;
+}
+
+/* Free memory returned from module_core_alloc/module_init_alloc */
+void module_free(struct module *mod, void *module_region)
+{
+ vfree(module_region);
+ /* FIXME: If module_region == mod->init_region, trim exception
+ table entries. */
+}
+
+/* Make generic code ignore STT_REGISTER dummy undefined symbols. */
+int module_frob_arch_sections(Elf_Ehdr *hdr,
+ Elf_Shdr *sechdrs,
+ char *secstrings,
+ struct module *mod)
+{
+ unsigned int symidx;
+ Elf64_Sym *sym;
+ const char *strtab;
+ int i;
+
+ for (symidx = 0; sechdrs[symidx].sh_type != SHT_SYMTAB; symidx++) {
+ if (symidx == hdr->e_shnum-1) {
+ printk("%s: no symtab found.\n", mod->name);
+ return -ENOEXEC;
+ }
+ }
+ sym = (Elf64_Sym *)sechdrs[symidx].sh_addr;
+ strtab = (char *)sechdrs[sechdrs[symidx].sh_link].sh_addr;
+
+ for (i = 1; i < sechdrs[symidx].sh_size / sizeof(Elf_Sym); i++) {
+ if (sym[i].st_shndx == SHN_UNDEF &&
+ ELF64_ST_TYPE(sym[i].st_info) == STT_REGISTER)
+ sym[i].st_shndx = SHN_ABS;
+ }
+ return 0;
+}
+
+int apply_relocate(Elf64_Shdr *sechdrs,
+ const char *strtab,
+ unsigned int symindex,
+ unsigned int relsec,
+ struct module *me)
+{
+ printk(KERN_ERR "module %s: non-ADD RELOCATION unsupported\n",
+ me->name);
+ return -ENOEXEC;
+}
+
+int apply_relocate_add(Elf64_Shdr *sechdrs,
+ const char *strtab,
+ unsigned int symindex,
+ unsigned int relsec,
+ struct module *me)
+{
+ unsigned int i;
+ Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr;
+ Elf64_Sym *sym;
+ u8 *location;
+ u32 *loc32;
+
+ for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
+ Elf64_Addr v;
+
+ /* This is where to make the change */
+ location = (u8 *)sechdrs[sechdrs[relsec].sh_info].sh_addr
+ + rel[i].r_offset;
+ loc32 = (u32 *) location;
+
+ BUG_ON(((u64)location >> (u64)32) != (u64)0);
+
+ /* This is the symbol it is referring to. Note that all
+ undefined symbols have been resolved. */
+ sym = (Elf64_Sym *)sechdrs[symindex].sh_addr
+ + ELF64_R_SYM(rel[i].r_info);
+ v = sym->st_value + rel[i].r_addend;
+
+ switch (ELF64_R_TYPE(rel[i].r_info) & 0xff) {
+ case R_SPARC_64:
+ location[0] = v >> 56;
+ location[1] = v >> 48;
+ location[2] = v >> 40;
+ location[3] = v >> 32;
+ location[4] = v >> 24;
+ location[5] = v >> 16;
+ location[6] = v >> 8;
+ location[7] = v >> 0;
+ break;
+
+ case R_SPARC_32:
+ location[0] = v >> 24;
+ location[1] = v >> 16;
+ location[2] = v >> 8;
+ location[3] = v >> 0;
+ break;
+
+ case R_SPARC_DISP32:
+ v -= (Elf64_Addr) location;
+ *loc32 = v;
+ break;
+
+ case R_SPARC_WDISP30:
+ v -= (Elf64_Addr) location;
+ *loc32 = (*loc32 & ~0x3fffffff) |
+ ((v >> 2) & 0x3fffffff);
+ break;
+
+ case R_SPARC_WDISP22:
+ v -= (Elf64_Addr) location;
+ *loc32 = (*loc32 & ~0x3fffff) |
+ ((v >> 2) & 0x3fffff);
+ break;
+
+ case R_SPARC_WDISP19:
+ v -= (Elf64_Addr) location;
+ *loc32 = (*loc32 & ~0x7ffff) |
+ ((v >> 2) & 0x7ffff);
+ break;
+
+ case R_SPARC_LO10:
+ *loc32 = (*loc32 & ~0x3ff) | (v & 0x3ff);
+ break;
+
+ case R_SPARC_HI22:
+ *loc32 = (*loc32 & ~0x3fffff) |
+ ((v >> 10) & 0x3fffff);
+ break;
+
+ case R_SPARC_OLO10:
+ *loc32 = (*loc32 & ~0x1fff) |
+ (((v & 0x3ff) +
+ (ELF64_R_TYPE(rel[i].r_info) >> 8))
+ & 0x1fff);
+ break;
+
+ default:
+ printk(KERN_ERR "module %s: Unknown relocation: %x\n",
+ me->name,
+ (int) (ELF64_R_TYPE(rel[i].r_info) & 0xff));
+ return -ENOEXEC;
+ };
+ }
+ return 0;
+}
+
+int module_finalize(const Elf_Ehdr *hdr,
+ const Elf_Shdr *sechdrs,
+ struct module *me)
+{
+ /* Cheetah's I-cache is fully coherent. */
+ if (tlb_type == spitfire) {
+ unsigned long va;
+
+ flushw_all();
+ for (va = 0; va < (PAGE_SIZE << 1); va += 32)
+ spitfire_put_icache_tag(va, 0x0);
+ __asm__ __volatile__("flush %g6");
+ }
+
+ return 0;
+}
+
+void module_arch_cleanup(struct module *mod)
+{
+}
diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c
new file mode 100644
index 000000000000..0f616ae3246c
--- /dev/null
+++ b/arch/sparc/kernel/of_device_64.c
@@ -0,0 +1,898 @@
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/irq.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+
+void __iomem *of_ioremap(struct resource *res, unsigned long offset, unsigned long size, char *name)
+{
+ unsigned long ret = res->start + offset;
+ struct resource *r;
+
+ if (res->flags & IORESOURCE_MEM)
+ r = request_mem_region(ret, size, name);
+ else
+ r = request_region(ret, size, name);
+ if (!r)
+ ret = 0;
+
+ return (void __iomem *) ret;
+}
+EXPORT_SYMBOL(of_ioremap);
+
+void of_iounmap(struct resource *res, void __iomem *base, unsigned long size)
+{
+ if (res->flags & IORESOURCE_MEM)
+ release_mem_region((unsigned long) base, size);
+ else
+ release_region((unsigned long) base, size);
+}
+EXPORT_SYMBOL(of_iounmap);
+
+static int node_match(struct device *dev, void *data)
+{
+ struct of_device *op = to_of_device(dev);
+ struct device_node *dp = data;
+
+ return (op->node == dp);
+}
+
+struct of_device *of_find_device_by_node(struct device_node *dp)
+{
+ struct device *dev = bus_find_device(&of_platform_bus_type, NULL,
+ dp, node_match);
+
+ if (dev)
+ return to_of_device(dev);
+
+ return NULL;
+}
+EXPORT_SYMBOL(of_find_device_by_node);
+
+unsigned int irq_of_parse_and_map(struct device_node *node, int index)
+{
+ struct of_device *op = of_find_device_by_node(node);
+
+ if (!op || index >= op->num_irqs)
+ return 0;
+
+ return op->irqs[index];
+}
+EXPORT_SYMBOL(irq_of_parse_and_map);
+
+/* Take the archdata values for IOMMU, STC, and HOSTDATA found in
+ * BUS and propagate to all child of_device objects.
+ */
+void of_propagate_archdata(struct of_device *bus)
+{
+ struct dev_archdata *bus_sd = &bus->dev.archdata;
+ struct device_node *bus_dp = bus->node;
+ struct device_node *dp;
+
+ for (dp = bus_dp->child; dp; dp = dp->sibling) {
+ struct of_device *op = of_find_device_by_node(dp);
+
+ op->dev.archdata.iommu = bus_sd->iommu;
+ op->dev.archdata.stc = bus_sd->stc;
+ op->dev.archdata.host_controller = bus_sd->host_controller;
+ op->dev.archdata.numa_node = bus_sd->numa_node;
+
+ if (dp->child)
+ of_propagate_archdata(op);
+ }
+}
+
+struct bus_type of_platform_bus_type;
+EXPORT_SYMBOL(of_platform_bus_type);
+
+static inline u64 of_read_addr(const u32 *cell, int size)
+{
+ u64 r = 0;
+ while (size--)
+ r = (r << 32) | *(cell++);
+ return r;
+}
+
+static void __init get_cells(struct device_node *dp,
+ int *addrc, int *sizec)
+{
+ if (addrc)
+ *addrc = of_n_addr_cells(dp);
+ if (sizec)
+ *sizec = of_n_size_cells(dp);
+}
+
+/* Max address size we deal with */
+#define OF_MAX_ADDR_CELLS 4
+
+struct of_bus {
+ const char *name;
+ const char *addr_prop_name;
+ int (*match)(struct device_node *parent);
+ void (*count_cells)(struct device_node *child,
+ int *addrc, int *sizec);
+ int (*map)(u32 *addr, const u32 *range,
+ int na, int ns, int pna);
+ unsigned long (*get_flags)(const u32 *addr, unsigned long);
+};
+
+/*
+ * Default translator (generic bus)
+ */
+
+static void of_bus_default_count_cells(struct device_node *dev,
+ int *addrc, int *sizec)
+{
+ get_cells(dev, addrc, sizec);
+}
+
+/* Make sure the least significant 64-bits are in-range. Even
+ * for 3 or 4 cell values it is a good enough approximation.
+ */
+static int of_out_of_range(const u32 *addr, const u32 *base,
+ const u32 *size, int na, int ns)
+{
+ u64 a = of_read_addr(addr, na);
+ u64 b = of_read_addr(base, na);
+
+ if (a < b)
+ return 1;
+
+ b += of_read_addr(size, ns);
+ if (a >= b)
+ return 1;
+
+ return 0;
+}
+
+static int of_bus_default_map(u32 *addr, const u32 *range,
+ int na, int ns, int pna)
+{
+ u32 result[OF_MAX_ADDR_CELLS];
+ int i;
+
+ if (ns > 2) {
+ printk("of_device: Cannot handle size cells (%d) > 2.", ns);
+ return -EINVAL;
+ }
+
+ if (of_out_of_range(addr, range, range + na + pna, na, ns))
+ return -EINVAL;
+
+ /* Start with the parent range base. */
+ memcpy(result, range + na, pna * 4);
+
+ /* Add in the child address offset. */
+ for (i = 0; i < na; i++)
+ result[pna - 1 - i] +=
+ (addr[na - 1 - i] -
+ range[na - 1 - i]);
+
+ memcpy(addr, result, pna * 4);
+
+ return 0;
+}
+
+static unsigned long of_bus_default_get_flags(const u32 *addr, unsigned long flags)
+{
+ if (flags)
+ return flags;
+ return IORESOURCE_MEM;
+}
+
+/*
+ * PCI bus specific translator
+ */
+
+static int of_bus_pci_match(struct device_node *np)
+{
+ if (!strcmp(np->name, "pci")) {
+ const char *model = of_get_property(np, "model", NULL);
+
+ if (model && !strcmp(model, "SUNW,simba"))
+ return 0;
+
+ /* Do not do PCI specific frobbing if the
+ * PCI bridge lacks a ranges property. We
+ * want to pass it through up to the next
+ * parent as-is, not with the PCI translate
+ * method which chops off the top address cell.
+ */
+ if (!of_find_property(np, "ranges", NULL))
+ return 0;
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static int of_bus_simba_match(struct device_node *np)
+{
+ const char *model = of_get_property(np, "model", NULL);
+
+ if (model && !strcmp(model, "SUNW,simba"))
+ return 1;
+
+ /* Treat PCI busses lacking ranges property just like
+ * simba.
+ */
+ if (!strcmp(np->name, "pci")) {
+ if (!of_find_property(np, "ranges", NULL))
+ return 1;
+ }
+
+ return 0;
+}
+
+static int of_bus_simba_map(u32 *addr, const u32 *range,
+ int na, int ns, int pna)
+{
+ return 0;
+}
+
+static void of_bus_pci_count_cells(struct device_node *np,
+ int *addrc, int *sizec)
+{
+ if (addrc)
+ *addrc = 3;
+ if (sizec)
+ *sizec = 2;
+}
+
+static int of_bus_pci_map(u32 *addr, const u32 *range,
+ int na, int ns, int pna)
+{
+ u32 result[OF_MAX_ADDR_CELLS];
+ int i;
+
+ /* Check address type match */
+ if ((addr[0] ^ range[0]) & 0x03000000)
+ return -EINVAL;
+
+ if (of_out_of_range(addr + 1, range + 1, range + na + pna,
+ na - 1, ns))
+ return -EINVAL;
+
+ /* Start with the parent range base. */
+ memcpy(result, range + na, pna * 4);
+
+ /* Add in the child address offset, skipping high cell. */
+ for (i = 0; i < na - 1; i++)
+ result[pna - 1 - i] +=
+ (addr[na - 1 - i] -
+ range[na - 1 - i]);
+
+ memcpy(addr, result, pna * 4);
+
+ return 0;
+}
+
+static unsigned long of_bus_pci_get_flags(const u32 *addr, unsigned long flags)
+{
+ u32 w = addr[0];
+
+ /* For PCI, we override whatever child busses may have used. */
+ flags = 0;
+ switch((w >> 24) & 0x03) {
+ case 0x01:
+ flags |= IORESOURCE_IO;
+ break;
+
+ case 0x02: /* 32 bits */
+ case 0x03: /* 64 bits */
+ flags |= IORESOURCE_MEM;
+ break;
+ }
+ if (w & 0x40000000)
+ flags |= IORESOURCE_PREFETCH;
+ return flags;
+}
+
+/*
+ * SBUS bus specific translator
+ */
+
+static int of_bus_sbus_match(struct device_node *np)
+{
+ return !strcmp(np->name, "sbus") ||
+ !strcmp(np->name, "sbi");
+}
+
+static void of_bus_sbus_count_cells(struct device_node *child,
+ int *addrc, int *sizec)
+{
+ if (addrc)
+ *addrc = 2;
+ if (sizec)
+ *sizec = 1;
+}
+
+/*
+ * FHC/Central bus specific translator.
+ *
+ * This is just needed to hard-code the address and size cell
+ * counts. 'fhc' and 'central' nodes lack the #address-cells and
+ * #size-cells properties, and if you walk to the root on such
+ * Enterprise boxes all you'll get is a #size-cells of 2 which is
+ * not what we want to use.
+ */
+static int of_bus_fhc_match(struct device_node *np)
+{
+ return !strcmp(np->name, "fhc") ||
+ !strcmp(np->name, "central");
+}
+
+#define of_bus_fhc_count_cells of_bus_sbus_count_cells
+
+/*
+ * Array of bus specific translators
+ */
+
+static struct of_bus of_busses[] = {
+ /* PCI */
+ {
+ .name = "pci",
+ .addr_prop_name = "assigned-addresses",
+ .match = of_bus_pci_match,
+ .count_cells = of_bus_pci_count_cells,
+ .map = of_bus_pci_map,
+ .get_flags = of_bus_pci_get_flags,
+ },
+ /* SIMBA */
+ {
+ .name = "simba",
+ .addr_prop_name = "assigned-addresses",
+ .match = of_bus_simba_match,
+ .count_cells = of_bus_pci_count_cells,
+ .map = of_bus_simba_map,
+ .get_flags = of_bus_pci_get_flags,
+ },
+ /* SBUS */
+ {
+ .name = "sbus",
+ .addr_prop_name = "reg",
+ .match = of_bus_sbus_match,
+ .count_cells = of_bus_sbus_count_cells,
+ .map = of_bus_default_map,
+ .get_flags = of_bus_default_get_flags,
+ },
+ /* FHC */
+ {
+ .name = "fhc",
+ .addr_prop_name = "reg",
+ .match = of_bus_fhc_match,
+ .count_cells = of_bus_fhc_count_cells,
+ .map = of_bus_default_map,
+ .get_flags = of_bus_default_get_flags,
+ },
+ /* Default */
+ {
+ .name = "default",
+ .addr_prop_name = "reg",
+ .match = NULL,
+ .count_cells = of_bus_default_count_cells,
+ .map = of_bus_default_map,
+ .get_flags = of_bus_default_get_flags,
+ },
+};
+
+static struct of_bus *of_match_bus(struct device_node *np)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(of_busses); i ++)
+ if (!of_busses[i].match || of_busses[i].match(np))
+ return &of_busses[i];
+ BUG();
+ return NULL;
+}
+
+static int __init build_one_resource(struct device_node *parent,
+ struct of_bus *bus,
+ struct of_bus *pbus,
+ u32 *addr,
+ int na, int ns, int pna)
+{
+ const u32 *ranges;
+ int rone, rlen;
+
+ ranges = of_get_property(parent, "ranges", &rlen);
+ if (ranges == NULL || rlen == 0) {
+ u32 result[OF_MAX_ADDR_CELLS];
+ int i;
+
+ memset(result, 0, pna * 4);
+ for (i = 0; i < na; i++)
+ result[pna - 1 - i] =
+ addr[na - 1 - i];
+
+ memcpy(addr, result, pna * 4);
+ return 0;
+ }
+
+ /* Now walk through the ranges */
+ rlen /= 4;
+ rone = na + pna + ns;
+ for (; rlen >= rone; rlen -= rone, ranges += rone) {
+ if (!bus->map(addr, ranges, na, ns, pna))
+ return 0;
+ }
+
+ /* When we miss an I/O space match on PCI, just pass it up
+ * to the next PCI bridge and/or controller.
+ */
+ if (!strcmp(bus->name, "pci") &&
+ (addr[0] & 0x03000000) == 0x01000000)
+ return 0;
+
+ return 1;
+}
+
+static int __init use_1to1_mapping(struct device_node *pp)
+{
+ /* If we have a ranges property in the parent, use it. */
+ if (of_find_property(pp, "ranges", NULL) != NULL)
+ return 0;
+
+ /* If the parent is the dma node of an ISA bus, pass
+ * the translation up to the root.
+ *
+ * Some SBUS devices use intermediate nodes to express
+ * hierarchy within the device itself. These aren't
+ * real bus nodes, and don't have a 'ranges' property.
+ * But, we should still pass the translation work up
+ * to the SBUS itself.
+ */
+ if (!strcmp(pp->name, "dma") ||
+ !strcmp(pp->name, "espdma") ||
+ !strcmp(pp->name, "ledma") ||
+ !strcmp(pp->name, "lebuffer"))
+ return 0;
+
+ /* Similarly for all PCI bridges, if we get this far
+ * it lacks a ranges property, and this will include
+ * cases like Simba.
+ */
+ if (!strcmp(pp->name, "pci"))
+ return 0;
+
+ return 1;
+}
+
+static int of_resource_verbose;
+
+static void __init build_device_resources(struct of_device *op,
+ struct device *parent)
+{
+ struct of_device *p_op;
+ struct of_bus *bus;
+ int na, ns;
+ int index, num_reg;
+ const void *preg;
+
+ if (!parent)
+ return;
+
+ p_op = to_of_device(parent);
+ bus = of_match_bus(p_op->node);
+ bus->count_cells(op->node, &na, &ns);
+
+ preg = of_get_property(op->node, bus->addr_prop_name, &num_reg);
+ if (!preg || num_reg == 0)
+ return;
+
+ /* Convert to num-cells. */
+ num_reg /= 4;
+
+ /* Convert to num-entries. */
+ num_reg /= na + ns;
+
+ /* Prevent overrunning the op->resources[] array. */
+ if (num_reg > PROMREG_MAX) {
+ printk(KERN_WARNING "%s: Too many regs (%d), "
+ "limiting to %d.\n",
+ op->node->full_name, num_reg, PROMREG_MAX);
+ num_reg = PROMREG_MAX;
+ }
+
+ for (index = 0; index < num_reg; index++) {
+ struct resource *r = &op->resource[index];
+ u32 addr[OF_MAX_ADDR_CELLS];
+ const u32 *reg = (preg + (index * ((na + ns) * 4)));
+ struct device_node *dp = op->node;
+ struct device_node *pp = p_op->node;
+ struct of_bus *pbus, *dbus;
+ u64 size, result = OF_BAD_ADDR;
+ unsigned long flags;
+ int dna, dns;
+ int pna, pns;
+
+ size = of_read_addr(reg + na, ns);
+ memcpy(addr, reg, na * 4);
+
+ flags = bus->get_flags(addr, 0);
+
+ if (use_1to1_mapping(pp)) {
+ result = of_read_addr(addr, na);
+ goto build_res;
+ }
+
+ dna = na;
+ dns = ns;
+ dbus = bus;
+
+ while (1) {
+ dp = pp;
+ pp = dp->parent;
+ if (!pp) {
+ result = of_read_addr(addr, dna);
+ break;
+ }
+
+ pbus = of_match_bus(pp);
+ pbus->count_cells(dp, &pna, &pns);
+
+ if (build_one_resource(dp, dbus, pbus, addr,
+ dna, dns, pna))
+ break;
+
+ flags = pbus->get_flags(addr, flags);
+
+ dna = pna;
+ dns = pns;
+ dbus = pbus;
+ }
+
+ build_res:
+ memset(r, 0, sizeof(*r));
+
+ if (of_resource_verbose)
+ printk("%s reg[%d] -> %lx\n",
+ op->node->full_name, index,
+ result);
+
+ if (result != OF_BAD_ADDR) {
+ if (tlb_type == hypervisor)
+ result &= 0x0fffffffffffffffUL;
+
+ r->start = result;
+ r->end = result + size - 1;
+ r->flags = flags;
+ }
+ r->name = op->node->name;
+ }
+}
+
+static struct device_node * __init
+apply_interrupt_map(struct device_node *dp, struct device_node *pp,
+ const u32 *imap, int imlen, const u32 *imask,
+ unsigned int *irq_p)
+{
+ struct device_node *cp;
+ unsigned int irq = *irq_p;
+ struct of_bus *bus;
+ phandle handle;
+ const u32 *reg;
+ int na, num_reg, i;
+
+ bus = of_match_bus(pp);
+ bus->count_cells(dp, &na, NULL);
+
+ reg = of_get_property(dp, "reg", &num_reg);
+ if (!reg || !num_reg)
+ return NULL;
+
+ imlen /= ((na + 3) * 4);
+ handle = 0;
+ for (i = 0; i < imlen; i++) {
+ int j;
+
+ for (j = 0; j < na; j++) {
+ if ((reg[j] & imask[j]) != imap[j])
+ goto next;
+ }
+ if (imap[na] == irq) {
+ handle = imap[na + 1];
+ irq = imap[na + 2];
+ break;
+ }
+
+ next:
+ imap += (na + 3);
+ }
+ if (i == imlen) {
+ /* Psycho and Sabre PCI controllers can have 'interrupt-map'
+ * properties that do not include the on-board device
+ * interrupts. Instead, the device's 'interrupts' property
+ * is already a fully specified INO value.
+ *
+ * Handle this by deciding that, if we didn't get a
+ * match in the parent's 'interrupt-map', and the
+ * parent is an IRQ translater, then use the parent as
+ * our IRQ controller.
+ */
+ if (pp->irq_trans)
+ return pp;
+
+ return NULL;
+ }
+
+ *irq_p = irq;
+ cp = of_find_node_by_phandle(handle);
+
+ return cp;
+}
+
+static unsigned int __init pci_irq_swizzle(struct device_node *dp,
+ struct device_node *pp,
+ unsigned int irq)
+{
+ const struct linux_prom_pci_registers *regs;
+ unsigned int bus, devfn, slot, ret;
+
+ if (irq < 1 || irq > 4)
+ return irq;
+
+ regs = of_get_property(dp, "reg", NULL);
+ if (!regs)
+ return irq;
+
+ bus = (regs->phys_hi >> 16) & 0xff;
+ devfn = (regs->phys_hi >> 8) & 0xff;
+ slot = (devfn >> 3) & 0x1f;
+
+ if (pp->irq_trans) {
+ /* Derived from Table 8-3, U2P User's Manual. This branch
+ * is handling a PCI controller that lacks a proper set of
+ * interrupt-map and interrupt-map-mask properties. The
+ * Ultra-E450 is one example.
+ *
+ * The bit layout is BSSLL, where:
+ * B: 0 on bus A, 1 on bus B
+ * D: 2-bit slot number, derived from PCI device number as
+ * (dev - 1) for bus A, or (dev - 2) for bus B
+ * L: 2-bit line number
+ */
+ if (bus & 0x80) {
+ /* PBM-A */
+ bus = 0x00;
+ slot = (slot - 1) << 2;
+ } else {
+ /* PBM-B */
+ bus = 0x10;
+ slot = (slot - 2) << 2;
+ }
+ irq -= 1;
+
+ ret = (bus | slot | irq);
+ } else {
+ /* Going through a PCI-PCI bridge that lacks a set of
+ * interrupt-map and interrupt-map-mask properties.
+ */
+ ret = ((irq - 1 + (slot & 3)) & 3) + 1;
+ }
+
+ return ret;
+}
+
+static int of_irq_verbose;
+
+static unsigned int __init build_one_device_irq(struct of_device *op,
+ struct device *parent,
+ unsigned int irq)
+{
+ struct device_node *dp = op->node;
+ struct device_node *pp, *ip;
+ unsigned int orig_irq = irq;
+ int nid;
+
+ if (irq == 0xffffffff)
+ return irq;
+
+ if (dp->irq_trans) {
+ irq = dp->irq_trans->irq_build(dp, irq,
+ dp->irq_trans->data);
+
+ if (of_irq_verbose)
+ printk("%s: direct translate %x --> %x\n",
+ dp->full_name, orig_irq, irq);
+
+ goto out;
+ }
+
+ /* Something more complicated. Walk up to the root, applying
+ * interrupt-map or bus specific translations, until we hit
+ * an IRQ translator.
+ *
+ * If we hit a bus type or situation we cannot handle, we
+ * stop and assume that the original IRQ number was in a
+ * format which has special meaning to it's immediate parent.
+ */
+ pp = dp->parent;
+ ip = NULL;
+ while (pp) {
+ const void *imap, *imsk;
+ int imlen;
+
+ imap = of_get_property(pp, "interrupt-map", &imlen);
+ imsk = of_get_property(pp, "interrupt-map-mask", NULL);
+ if (imap && imsk) {
+ struct device_node *iret;
+ int this_orig_irq = irq;
+
+ iret = apply_interrupt_map(dp, pp,
+ imap, imlen, imsk,
+ &irq);
+
+ if (of_irq_verbose)
+ printk("%s: Apply [%s:%x] imap --> [%s:%x]\n",
+ op->node->full_name,
+ pp->full_name, this_orig_irq,
+ (iret ? iret->full_name : "NULL"), irq);
+
+ if (!iret)
+ break;
+
+ if (iret->irq_trans) {
+ ip = iret;
+ break;
+ }
+ } else {
+ if (!strcmp(pp->name, "pci")) {
+ unsigned int this_orig_irq = irq;
+
+ irq = pci_irq_swizzle(dp, pp, irq);
+ if (of_irq_verbose)
+ printk("%s: PCI swizzle [%s] "
+ "%x --> %x\n",
+ op->node->full_name,
+ pp->full_name, this_orig_irq,
+ irq);
+
+ }
+
+ if (pp->irq_trans) {
+ ip = pp;
+ break;
+ }
+ }
+ dp = pp;
+ pp = pp->parent;
+ }
+ if (!ip)
+ return orig_irq;
+
+ irq = ip->irq_trans->irq_build(op->node, irq,
+ ip->irq_trans->data);
+ if (of_irq_verbose)
+ printk("%s: Apply IRQ trans [%s] %x --> %x\n",
+ op->node->full_name, ip->full_name, orig_irq, irq);
+
+out:
+ nid = of_node_to_nid(dp);
+ if (nid != -1) {
+ cpumask_t numa_mask = node_to_cpumask(nid);
+
+ irq_set_affinity(irq, numa_mask);
+ }
+
+ return irq;
+}
+
+static struct of_device * __init scan_one_device(struct device_node *dp,
+ struct device *parent)
+{
+ struct of_device *op = kzalloc(sizeof(*op), GFP_KERNEL);
+ const unsigned int *irq;
+ struct dev_archdata *sd;
+ int len, i;
+
+ if (!op)
+ return NULL;
+
+ sd = &op->dev.archdata;
+ sd->prom_node = dp;
+ sd->op = op;
+
+ op->node = dp;
+
+ op->clock_freq = of_getintprop_default(dp, "clock-frequency",
+ (25*1000*1000));
+ op->portid = of_getintprop_default(dp, "upa-portid", -1);
+ if (op->portid == -1)
+ op->portid = of_getintprop_default(dp, "portid", -1);
+
+ irq = of_get_property(dp, "interrupts", &len);
+ if (irq) {
+ memcpy(op->irqs, irq, len);
+ op->num_irqs = len / 4;
+ } else {
+ op->num_irqs = 0;
+ }
+
+ /* Prevent overrunning the op->irqs[] array. */
+ if (op->num_irqs > PROMINTR_MAX) {
+ printk(KERN_WARNING "%s: Too many irqs (%d), "
+ "limiting to %d.\n",
+ dp->full_name, op->num_irqs, PROMINTR_MAX);
+ op->num_irqs = PROMINTR_MAX;
+ }
+
+ build_device_resources(op, parent);
+ for (i = 0; i < op->num_irqs; i++)
+ op->irqs[i] = build_one_device_irq(op, parent, op->irqs[i]);
+
+ op->dev.parent = parent;
+ op->dev.bus = &of_platform_bus_type;
+ if (!parent)
+ dev_set_name(&op->dev, "root");
+ else
+ dev_set_name(&op->dev, "%08x", dp->node);
+
+ if (of_device_register(op)) {
+ printk("%s: Could not register of device.\n",
+ dp->full_name);
+ kfree(op);
+ op = NULL;
+ }
+
+ return op;
+}
+
+static void __init scan_tree(struct device_node *dp, struct device *parent)
+{
+ while (dp) {
+ struct of_device *op = scan_one_device(dp, parent);
+
+ if (op)
+ scan_tree(dp->child, &op->dev);
+
+ dp = dp->sibling;
+ }
+}
+
+static void __init scan_of_devices(void)
+{
+ struct device_node *root = of_find_node_by_path("/");
+ struct of_device *parent;
+
+ parent = scan_one_device(root, NULL);
+ if (!parent)
+ return;
+
+ scan_tree(root->child, &parent->dev);
+}
+
+static int __init of_bus_driver_init(void)
+{
+ int err;
+
+ err = of_bus_type_init(&of_platform_bus_type, "of");
+ if (!err)
+ scan_of_devices();
+
+ return err;
+}
+
+postcore_initcall(of_bus_driver_init);
+
+static int __init of_debug(char *str)
+{
+ int val = 0;
+
+ get_option(&str, &val);
+ if (val & 1)
+ of_resource_verbose = 1;
+ if (val & 2)
+ of_irq_verbose = 1;
+ return 1;
+}
+
+__setup("of_debug=", of_debug);
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
new file mode 100644
index 000000000000..bdb7c0a6d83d
--- /dev/null
+++ b/arch/sparc/kernel/pci.c
@@ -0,0 +1,1095 @@
+/* pci.c: UltraSparc PCI controller support.
+ *
+ * Copyright (C) 1997, 1998, 1999 David S. Miller (davem@redhat.com)
+ * Copyright (C) 1998, 1999 Eddie C. Dost (ecd@skynet.be)
+ * Copyright (C) 1999 Jakub Jelinek (jj@ultra.linux.cz)
+ *
+ * OF tree based PCI bus probing taken from the PowerPC port
+ * with minor modifications, see there for credits.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/capability.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/msi.h>
+#include <linux/irq.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/irq.h>
+#include <asm/prom.h>
+#include <asm/apb.h>
+
+#include "pci_impl.h"
+
+/* List of all PCI controllers found in the system. */
+struct pci_pbm_info *pci_pbm_root = NULL;
+
+/* Each PBM found gets a unique index. */
+int pci_num_pbms = 0;
+
+volatile int pci_poke_in_progress;
+volatile int pci_poke_cpu = -1;
+volatile int pci_poke_faulted;
+
+static DEFINE_SPINLOCK(pci_poke_lock);
+
+void pci_config_read8(u8 *addr, u8 *ret)
+{
+ unsigned long flags;
+ u8 byte;
+
+ spin_lock_irqsave(&pci_poke_lock, flags);
+ pci_poke_cpu = smp_processor_id();
+ pci_poke_in_progress = 1;
+ pci_poke_faulted = 0;
+ __asm__ __volatile__("membar #Sync\n\t"
+ "lduba [%1] %2, %0\n\t"
+ "membar #Sync"
+ : "=r" (byte)
+ : "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E_L)
+ : "memory");
+ pci_poke_in_progress = 0;
+ pci_poke_cpu = -1;
+ if (!pci_poke_faulted)
+ *ret = byte;
+ spin_unlock_irqrestore(&pci_poke_lock, flags);
+}
+
+void pci_config_read16(u16 *addr, u16 *ret)
+{
+ unsigned long flags;
+ u16 word;
+
+ spin_lock_irqsave(&pci_poke_lock, flags);
+ pci_poke_cpu = smp_processor_id();
+ pci_poke_in_progress = 1;
+ pci_poke_faulted = 0;
+ __asm__ __volatile__("membar #Sync\n\t"
+ "lduha [%1] %2, %0\n\t"
+ "membar #Sync"
+ : "=r" (word)
+ : "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E_L)
+ : "memory");
+ pci_poke_in_progress = 0;
+ pci_poke_cpu = -1;
+ if (!pci_poke_faulted)
+ *ret = word;
+ spin_unlock_irqrestore(&pci_poke_lock, flags);
+}
+
+void pci_config_read32(u32 *addr, u32 *ret)
+{
+ unsigned long flags;
+ u32 dword;
+
+ spin_lock_irqsave(&pci_poke_lock, flags);
+ pci_poke_cpu = smp_processor_id();
+ pci_poke_in_progress = 1;
+ pci_poke_faulted = 0;
+ __asm__ __volatile__("membar #Sync\n\t"
+ "lduwa [%1] %2, %0\n\t"
+ "membar #Sync"
+ : "=r" (dword)
+ : "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E_L)
+ : "memory");
+ pci_poke_in_progress = 0;
+ pci_poke_cpu = -1;
+ if (!pci_poke_faulted)
+ *ret = dword;
+ spin_unlock_irqrestore(&pci_poke_lock, flags);
+}
+
+void pci_config_write8(u8 *addr, u8 val)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&pci_poke_lock, flags);
+ pci_poke_cpu = smp_processor_id();
+ pci_poke_in_progress = 1;
+ pci_poke_faulted = 0;
+ __asm__ __volatile__("membar #Sync\n\t"
+ "stba %0, [%1] %2\n\t"
+ "membar #Sync"
+ : /* no outputs */
+ : "r" (val), "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E_L)
+ : "memory");
+ pci_poke_in_progress = 0;
+ pci_poke_cpu = -1;
+ spin_unlock_irqrestore(&pci_poke_lock, flags);
+}
+
+void pci_config_write16(u16 *addr, u16 val)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&pci_poke_lock, flags);
+ pci_poke_cpu = smp_processor_id();
+ pci_poke_in_progress = 1;
+ pci_poke_faulted = 0;
+ __asm__ __volatile__("membar #Sync\n\t"
+ "stha %0, [%1] %2\n\t"
+ "membar #Sync"
+ : /* no outputs */
+ : "r" (val), "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E_L)
+ : "memory");
+ pci_poke_in_progress = 0;
+ pci_poke_cpu = -1;
+ spin_unlock_irqrestore(&pci_poke_lock, flags);
+}
+
+void pci_config_write32(u32 *addr, u32 val)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&pci_poke_lock, flags);
+ pci_poke_cpu = smp_processor_id();
+ pci_poke_in_progress = 1;
+ pci_poke_faulted = 0;
+ __asm__ __volatile__("membar #Sync\n\t"
+ "stwa %0, [%1] %2\n\t"
+ "membar #Sync"
+ : /* no outputs */
+ : "r" (val), "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E_L)
+ : "memory");
+ pci_poke_in_progress = 0;
+ pci_poke_cpu = -1;
+ spin_unlock_irqrestore(&pci_poke_lock, flags);
+}
+
+static int ofpci_verbose;
+
+static int __init ofpci_debug(char *str)
+{
+ int val = 0;
+
+ get_option(&str, &val);
+ if (val)
+ ofpci_verbose = 1;
+ return 1;
+}
+
+__setup("ofpci_debug=", ofpci_debug);
+
+static unsigned long pci_parse_of_flags(u32 addr0)
+{
+ unsigned long flags = 0;
+
+ if (addr0 & 0x02000000) {
+ flags = IORESOURCE_MEM | PCI_BASE_ADDRESS_SPACE_MEMORY;
+ flags |= (addr0 >> 22) & PCI_BASE_ADDRESS_MEM_TYPE_64;
+ flags |= (addr0 >> 28) & PCI_BASE_ADDRESS_MEM_TYPE_1M;
+ if (addr0 & 0x40000000)
+ flags |= IORESOURCE_PREFETCH
+ | PCI_BASE_ADDRESS_MEM_PREFETCH;
+ } else if (addr0 & 0x01000000)
+ flags = IORESOURCE_IO | PCI_BASE_ADDRESS_SPACE_IO;
+ return flags;
+}
+
+/* The of_device layer has translated all of the assigned-address properties
+ * into physical address resources, we only have to figure out the register
+ * mapping.
+ */
+static void pci_parse_of_addrs(struct of_device *op,
+ struct device_node *node,
+ struct pci_dev *dev)
+{
+ struct resource *op_res;
+ const u32 *addrs;
+ int proplen;
+
+ addrs = of_get_property(node, "assigned-addresses", &proplen);
+ if (!addrs)
+ return;
+ if (ofpci_verbose)
+ printk(" parse addresses (%d bytes) @ %p\n",
+ proplen, addrs);
+ op_res = &op->resource[0];
+ for (; proplen >= 20; proplen -= 20, addrs += 5, op_res++) {
+ struct resource *res;
+ unsigned long flags;
+ int i;
+
+ flags = pci_parse_of_flags(addrs[0]);
+ if (!flags)
+ continue;
+ i = addrs[0] & 0xff;
+ if (ofpci_verbose)
+ printk(" start: %lx, end: %lx, i: %x\n",
+ op_res->start, op_res->end, i);
+
+ if (PCI_BASE_ADDRESS_0 <= i && i <= PCI_BASE_ADDRESS_5) {
+ res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2];
+ } else if (i == dev->rom_base_reg) {
+ res = &dev->resource[PCI_ROM_RESOURCE];
+ flags |= IORESOURCE_READONLY | IORESOURCE_CACHEABLE;
+ } else {
+ printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i);
+ continue;
+ }
+ res->start = op_res->start;
+ res->end = op_res->end;
+ res->flags = flags;
+ res->name = pci_name(dev);
+ }
+}
+
+static struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm,
+ struct device_node *node,
+ struct pci_bus *bus, int devfn)
+{
+ struct dev_archdata *sd;
+ struct of_device *op;
+ struct pci_dev *dev;
+ const char *type;
+ u32 class;
+
+ dev = alloc_pci_dev();
+ if (!dev)
+ return NULL;
+
+ sd = &dev->dev.archdata;
+ sd->iommu = pbm->iommu;
+ sd->stc = &pbm->stc;
+ sd->host_controller = pbm;
+ sd->prom_node = node;
+ sd->op = op = of_find_device_by_node(node);
+ sd->numa_node = pbm->numa_node;
+
+ sd = &op->dev.archdata;
+ sd->iommu = pbm->iommu;
+ sd->stc = &pbm->stc;
+ sd->numa_node = pbm->numa_node;
+
+ if (!strcmp(node->name, "ebus"))
+ of_propagate_archdata(op);
+
+ type = of_get_property(node, "device_type", NULL);
+ if (type == NULL)
+ type = "";
+
+ if (ofpci_verbose)
+ printk(" create device, devfn: %x, type: %s\n",
+ devfn, type);
+
+ dev->bus = bus;
+ dev->sysdata = node;
+ dev->dev.parent = bus->bridge;
+ dev->dev.bus = &pci_bus_type;
+ dev->devfn = devfn;
+ dev->multifunction = 0; /* maybe a lie? */
+
+ dev->vendor = of_getintprop_default(node, "vendor-id", 0xffff);
+ dev->device = of_getintprop_default(node, "device-id", 0xffff);
+ dev->subsystem_vendor =
+ of_getintprop_default(node, "subsystem-vendor-id", 0);
+ dev->subsystem_device =
+ of_getintprop_default(node, "subsystem-id", 0);
+
+ dev->cfg_size = pci_cfg_space_size(dev);
+
+ /* We can't actually use the firmware value, we have
+ * to read what is in the register right now. One
+ * reason is that in the case of IDE interfaces the
+ * firmware can sample the value before the the IDE
+ * interface is programmed into native mode.
+ */
+ pci_read_config_dword(dev, PCI_CLASS_REVISION, &class);
+ dev->class = class >> 8;
+ dev->revision = class & 0xff;
+
+ dev_set_name(&dev->dev, "%04x:%02x:%02x.%d", pci_domain_nr(bus),
+ dev->bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn));
+
+ if (ofpci_verbose)
+ printk(" class: 0x%x device name: %s\n",
+ dev->class, pci_name(dev));
+
+ /* I have seen IDE devices which will not respond to
+ * the bmdma simplex check reads if bus mastering is
+ * disabled.
+ */
+ if ((dev->class >> 8) == PCI_CLASS_STORAGE_IDE)
+ pci_set_master(dev);
+
+ dev->current_state = 4; /* unknown power state */
+ dev->error_state = pci_channel_io_normal;
+
+ if (!strcmp(node->name, "pci")) {
+ /* a PCI-PCI bridge */
+ dev->hdr_type = PCI_HEADER_TYPE_BRIDGE;
+ dev->rom_base_reg = PCI_ROM_ADDRESS1;
+ } else if (!strcmp(type, "cardbus")) {
+ dev->hdr_type = PCI_HEADER_TYPE_CARDBUS;
+ } else {
+ dev->hdr_type = PCI_HEADER_TYPE_NORMAL;
+ dev->rom_base_reg = PCI_ROM_ADDRESS;
+
+ dev->irq = sd->op->irqs[0];
+ if (dev->irq == 0xffffffff)
+ dev->irq = PCI_IRQ_NONE;
+ }
+
+ pci_parse_of_addrs(sd->op, node, dev);
+
+ if (ofpci_verbose)
+ printk(" adding to system ...\n");
+
+ pci_device_add(dev, bus);
+
+ return dev;
+}
+
+static void __devinit apb_calc_first_last(u8 map, u32 *first_p, u32 *last_p)
+{
+ u32 idx, first, last;
+
+ first = 8;
+ last = 0;
+ for (idx = 0; idx < 8; idx++) {
+ if ((map & (1 << idx)) != 0) {
+ if (first > idx)
+ first = idx;
+ if (last < idx)
+ last = idx;
+ }
+ }
+
+ *first_p = first;
+ *last_p = last;
+}
+
+static void pci_resource_adjust(struct resource *res,
+ struct resource *root)
+{
+ res->start += root->start;
+ res->end += root->start;
+}
+
+/* For PCI bus devices which lack a 'ranges' property we interrogate
+ * the config space values to set the resources, just like the generic
+ * Linux PCI probing code does.
+ */
+static void __devinit pci_cfg_fake_ranges(struct pci_dev *dev,
+ struct pci_bus *bus,
+ struct pci_pbm_info *pbm)
+{
+ struct resource *res;
+ u8 io_base_lo, io_limit_lo;
+ u16 mem_base_lo, mem_limit_lo;
+ unsigned long base, limit;
+
+ pci_read_config_byte(dev, PCI_IO_BASE, &io_base_lo);
+ pci_read_config_byte(dev, PCI_IO_LIMIT, &io_limit_lo);
+ base = (io_base_lo & PCI_IO_RANGE_MASK) << 8;
+ limit = (io_limit_lo & PCI_IO_RANGE_MASK) << 8;
+
+ if ((io_base_lo & PCI_IO_RANGE_TYPE_MASK) == PCI_IO_RANGE_TYPE_32) {
+ u16 io_base_hi, io_limit_hi;
+
+ pci_read_config_word(dev, PCI_IO_BASE_UPPER16, &io_base_hi);
+ pci_read_config_word(dev, PCI_IO_LIMIT_UPPER16, &io_limit_hi);
+ base |= (io_base_hi << 16);
+ limit |= (io_limit_hi << 16);
+ }
+
+ res = bus->resource[0];
+ if (base <= limit) {
+ res->flags = (io_base_lo & PCI_IO_RANGE_TYPE_MASK) | IORESOURCE_IO;
+ if (!res->start)
+ res->start = base;
+ if (!res->end)
+ res->end = limit + 0xfff;
+ pci_resource_adjust(res, &pbm->io_space);
+ }
+
+ pci_read_config_word(dev, PCI_MEMORY_BASE, &mem_base_lo);
+ pci_read_config_word(dev, PCI_MEMORY_LIMIT, &mem_limit_lo);
+ base = (mem_base_lo & PCI_MEMORY_RANGE_MASK) << 16;
+ limit = (mem_limit_lo & PCI_MEMORY_RANGE_MASK) << 16;
+
+ res = bus->resource[1];
+ if (base <= limit) {
+ res->flags = ((mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) |
+ IORESOURCE_MEM);
+ res->start = base;
+ res->end = limit + 0xfffff;
+ pci_resource_adjust(res, &pbm->mem_space);
+ }
+
+ pci_read_config_word(dev, PCI_PREF_MEMORY_BASE, &mem_base_lo);
+ pci_read_config_word(dev, PCI_PREF_MEMORY_LIMIT, &mem_limit_lo);
+ base = (mem_base_lo & PCI_PREF_RANGE_MASK) << 16;
+ limit = (mem_limit_lo & PCI_PREF_RANGE_MASK) << 16;
+
+ if ((mem_base_lo & PCI_PREF_RANGE_TYPE_MASK) == PCI_PREF_RANGE_TYPE_64) {
+ u32 mem_base_hi, mem_limit_hi;
+
+ pci_read_config_dword(dev, PCI_PREF_BASE_UPPER32, &mem_base_hi);
+ pci_read_config_dword(dev, PCI_PREF_LIMIT_UPPER32, &mem_limit_hi);
+
+ /*
+ * Some bridges set the base > limit by default, and some
+ * (broken) BIOSes do not initialize them. If we find
+ * this, just assume they are not being used.
+ */
+ if (mem_base_hi <= mem_limit_hi) {
+ base |= ((long) mem_base_hi) << 32;
+ limit |= ((long) mem_limit_hi) << 32;
+ }
+ }
+
+ res = bus->resource[2];
+ if (base <= limit) {
+ res->flags = ((mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) |
+ IORESOURCE_MEM | IORESOURCE_PREFETCH);
+ res->start = base;
+ res->end = limit + 0xfffff;
+ pci_resource_adjust(res, &pbm->mem_space);
+ }
+}
+
+/* Cook up fake bus resources for SUNW,simba PCI bridges which lack
+ * a proper 'ranges' property.
+ */
+static void __devinit apb_fake_ranges(struct pci_dev *dev,
+ struct pci_bus *bus,
+ struct pci_pbm_info *pbm)
+{
+ struct resource *res;
+ u32 first, last;
+ u8 map;
+
+ pci_read_config_byte(dev, APB_IO_ADDRESS_MAP, &map);
+ apb_calc_first_last(map, &first, &last);
+ res = bus->resource[0];
+ res->start = (first << 21);
+ res->end = (last << 21) + ((1 << 21) - 1);
+ res->flags = IORESOURCE_IO;
+ pci_resource_adjust(res, &pbm->io_space);
+
+ pci_read_config_byte(dev, APB_MEM_ADDRESS_MAP, &map);
+ apb_calc_first_last(map, &first, &last);
+ res = bus->resource[1];
+ res->start = (first << 21);
+ res->end = (last << 21) + ((1 << 21) - 1);
+ res->flags = IORESOURCE_MEM;
+ pci_resource_adjust(res, &pbm->mem_space);
+}
+
+static void __devinit pci_of_scan_bus(struct pci_pbm_info *pbm,
+ struct device_node *node,
+ struct pci_bus *bus);
+
+#define GET_64BIT(prop, i) ((((u64) (prop)[(i)]) << 32) | (prop)[(i)+1])
+
+static void __devinit of_scan_pci_bridge(struct pci_pbm_info *pbm,
+ struct device_node *node,
+ struct pci_dev *dev)
+{
+ struct pci_bus *bus;
+ const u32 *busrange, *ranges;
+ int len, i, simba;
+ struct resource *res;
+ unsigned int flags;
+ u64 size;
+
+ if (ofpci_verbose)
+ printk("of_scan_pci_bridge(%s)\n", node->full_name);
+
+ /* parse bus-range property */
+ busrange = of_get_property(node, "bus-range", &len);
+ if (busrange == NULL || len != 8) {
+ printk(KERN_DEBUG "Can't get bus-range for PCI-PCI bridge %s\n",
+ node->full_name);
+ return;
+ }
+ ranges = of_get_property(node, "ranges", &len);
+ simba = 0;
+ if (ranges == NULL) {
+ const char *model = of_get_property(node, "model", NULL);
+ if (model && !strcmp(model, "SUNW,simba"))
+ simba = 1;
+ }
+
+ bus = pci_add_new_bus(dev->bus, dev, busrange[0]);
+ if (!bus) {
+ printk(KERN_ERR "Failed to create pci bus for %s\n",
+ node->full_name);
+ return;
+ }
+
+ bus->primary = dev->bus->number;
+ bus->subordinate = busrange[1];
+ bus->bridge_ctl = 0;
+
+ /* parse ranges property, or cook one up by hand for Simba */
+ /* PCI #address-cells == 3 and #size-cells == 2 always */
+ res = &dev->resource[PCI_BRIDGE_RESOURCES];
+ for (i = 0; i < PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES; ++i) {
+ res->flags = 0;
+ bus->resource[i] = res;
+ ++res;
+ }
+ if (simba) {
+ apb_fake_ranges(dev, bus, pbm);
+ goto after_ranges;
+ } else if (ranges == NULL) {
+ pci_cfg_fake_ranges(dev, bus, pbm);
+ goto after_ranges;
+ }
+ i = 1;
+ for (; len >= 32; len -= 32, ranges += 8) {
+ struct resource *root;
+
+ flags = pci_parse_of_flags(ranges[0]);
+ size = GET_64BIT(ranges, 6);
+ if (flags == 0 || size == 0)
+ continue;
+ if (flags & IORESOURCE_IO) {
+ res = bus->resource[0];
+ if (res->flags) {
+ printk(KERN_ERR "PCI: ignoring extra I/O range"
+ " for bridge %s\n", node->full_name);
+ continue;
+ }
+ root = &pbm->io_space;
+ } else {
+ if (i >= PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES) {
+ printk(KERN_ERR "PCI: too many memory ranges"
+ " for bridge %s\n", node->full_name);
+ continue;
+ }
+ res = bus->resource[i];
+ ++i;
+ root = &pbm->mem_space;
+ }
+
+ res->start = GET_64BIT(ranges, 1);
+ res->end = res->start + size - 1;
+ res->flags = flags;
+
+ /* Another way to implement this would be to add an of_device
+ * layer routine that can calculate a resource for a given
+ * range property value in a PCI device.
+ */
+ pci_resource_adjust(res, root);
+ }
+after_ranges:
+ sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus),
+ bus->number);
+ if (ofpci_verbose)
+ printk(" bus name: %s\n", bus->name);
+
+ pci_of_scan_bus(pbm, node, bus);
+}
+
+static void __devinit pci_of_scan_bus(struct pci_pbm_info *pbm,
+ struct device_node *node,
+ struct pci_bus *bus)
+{
+ struct device_node *child;
+ const u32 *reg;
+ int reglen, devfn, prev_devfn;
+ struct pci_dev *dev;
+
+ if (ofpci_verbose)
+ printk("PCI: scan_bus[%s] bus no %d\n",
+ node->full_name, bus->number);
+
+ child = NULL;
+ prev_devfn = -1;
+ while ((child = of_get_next_child(node, child)) != NULL) {
+ if (ofpci_verbose)
+ printk(" * %s\n", child->full_name);
+ reg = of_get_property(child, "reg", &reglen);
+ if (reg == NULL || reglen < 20)
+ continue;
+
+ devfn = (reg[0] >> 8) & 0xff;
+
+ /* This is a workaround for some device trees
+ * which list PCI devices twice. On the V100
+ * for example, device number 3 is listed twice.
+ * Once as "pm" and once again as "lomp".
+ */
+ if (devfn == prev_devfn)
+ continue;
+ prev_devfn = devfn;
+
+ /* create a new pci_dev for this device */
+ dev = of_create_pci_dev(pbm, child, bus, devfn);
+ if (!dev)
+ continue;
+ if (ofpci_verbose)
+ printk("PCI: dev header type: %x\n",
+ dev->hdr_type);
+
+ if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
+ dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
+ of_scan_pci_bridge(pbm, child, dev);
+ }
+}
+
+static ssize_t
+show_pciobppath_attr(struct device * dev, struct device_attribute * attr, char * buf)
+{
+ struct pci_dev *pdev;
+ struct device_node *dp;
+
+ pdev = to_pci_dev(dev);
+ dp = pdev->dev.archdata.prom_node;
+
+ return snprintf (buf, PAGE_SIZE, "%s\n", dp->full_name);
+}
+
+static DEVICE_ATTR(obppath, S_IRUSR | S_IRGRP | S_IROTH, show_pciobppath_attr, NULL);
+
+static void __devinit pci_bus_register_of_sysfs(struct pci_bus *bus)
+{
+ struct pci_dev *dev;
+ struct pci_bus *child_bus;
+ int err;
+
+ list_for_each_entry(dev, &bus->devices, bus_list) {
+ /* we don't really care if we can create this file or
+ * not, but we need to assign the result of the call
+ * or the world will fall under alien invasion and
+ * everybody will be frozen on a spaceship ready to be
+ * eaten on alpha centauri by some green and jelly
+ * humanoid.
+ */
+ err = sysfs_create_file(&dev->dev.kobj, &dev_attr_obppath.attr);
+ }
+ list_for_each_entry(child_bus, &bus->children, node)
+ pci_bus_register_of_sysfs(child_bus);
+}
+
+struct pci_bus * __devinit pci_scan_one_pbm(struct pci_pbm_info *pbm,
+ struct device *parent)
+{
+ struct device_node *node = pbm->op->node;
+ struct pci_bus *bus;
+
+ printk("PCI: Scanning PBM %s\n", node->full_name);
+
+ bus = pci_create_bus(parent, pbm->pci_first_busno, pbm->pci_ops, pbm);
+ if (!bus) {
+ printk(KERN_ERR "Failed to create bus for %s\n",
+ node->full_name);
+ return NULL;
+ }
+ bus->secondary = pbm->pci_first_busno;
+ bus->subordinate = pbm->pci_last_busno;
+
+ bus->resource[0] = &pbm->io_space;
+ bus->resource[1] = &pbm->mem_space;
+
+ pci_of_scan_bus(pbm, node, bus);
+ pci_bus_add_devices(bus);
+ pci_bus_register_of_sysfs(bus);
+
+ return bus;
+}
+
+void __devinit pcibios_fixup_bus(struct pci_bus *pbus)
+{
+ struct pci_pbm_info *pbm = pbus->sysdata;
+
+ /* Generic PCI bus probing sets these to point at
+ * &io{port,mem}_resouce which is wrong for us.
+ */
+ pbus->resource[0] = &pbm->io_space;
+ pbus->resource[1] = &pbm->mem_space;
+}
+
+struct resource *pcibios_select_root(struct pci_dev *pdev, struct resource *r)
+{
+ struct pci_pbm_info *pbm = pdev->bus->sysdata;
+ struct resource *root = NULL;
+
+ if (r->flags & IORESOURCE_IO)
+ root = &pbm->io_space;
+ if (r->flags & IORESOURCE_MEM)
+ root = &pbm->mem_space;
+
+ return root;
+}
+
+void pcibios_update_irq(struct pci_dev *pdev, int irq)
+{
+}
+
+void pcibios_align_resource(void *data, struct resource *res,
+ resource_size_t size, resource_size_t align)
+{
+}
+
+int pcibios_enable_device(struct pci_dev *dev, int mask)
+{
+ u16 cmd, oldcmd;
+ int i;
+
+ pci_read_config_word(dev, PCI_COMMAND, &cmd);
+ oldcmd = cmd;
+
+ for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+ struct resource *res = &dev->resource[i];
+
+ /* Only set up the requested stuff */
+ if (!(mask & (1<<i)))
+ continue;
+
+ if (res->flags & IORESOURCE_IO)
+ cmd |= PCI_COMMAND_IO;
+ if (res->flags & IORESOURCE_MEM)
+ cmd |= PCI_COMMAND_MEMORY;
+ }
+
+ if (cmd != oldcmd) {
+ printk(KERN_DEBUG "PCI: Enabling device: (%s), cmd %x\n",
+ pci_name(dev), cmd);
+ /* Enable the appropriate bits in the PCI command register. */
+ pci_write_config_word(dev, PCI_COMMAND, cmd);
+ }
+ return 0;
+}
+
+void pcibios_resource_to_bus(struct pci_dev *pdev, struct pci_bus_region *region,
+ struct resource *res)
+{
+ struct pci_pbm_info *pbm = pdev->bus->sysdata;
+ struct resource zero_res, *root;
+
+ zero_res.start = 0;
+ zero_res.end = 0;
+ zero_res.flags = res->flags;
+
+ if (res->flags & IORESOURCE_IO)
+ root = &pbm->io_space;
+ else
+ root = &pbm->mem_space;
+
+ pci_resource_adjust(&zero_res, root);
+
+ region->start = res->start - zero_res.start;
+ region->end = res->end - zero_res.start;
+}
+EXPORT_SYMBOL(pcibios_resource_to_bus);
+
+void pcibios_bus_to_resource(struct pci_dev *pdev, struct resource *res,
+ struct pci_bus_region *region)
+{
+ struct pci_pbm_info *pbm = pdev->bus->sysdata;
+ struct resource *root;
+
+ res->start = region->start;
+ res->end = region->end;
+
+ if (res->flags & IORESOURCE_IO)
+ root = &pbm->io_space;
+ else
+ root = &pbm->mem_space;
+
+ pci_resource_adjust(res, root);
+}
+EXPORT_SYMBOL(pcibios_bus_to_resource);
+
+char * __devinit pcibios_setup(char *str)
+{
+ return str;
+}
+
+/* Platform support for /proc/bus/pci/X/Y mmap()s. */
+
+/* If the user uses a host-bridge as the PCI device, he may use
+ * this to perform a raw mmap() of the I/O or MEM space behind
+ * that controller.
+ *
+ * This can be useful for execution of x86 PCI bios initialization code
+ * on a PCI card, like the xfree86 int10 stuff does.
+ */
+static int __pci_mmap_make_offset_bus(struct pci_dev *pdev, struct vm_area_struct *vma,
+ enum pci_mmap_state mmap_state)
+{
+ struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller;
+ unsigned long space_size, user_offset, user_size;
+
+ if (mmap_state == pci_mmap_io) {
+ space_size = (pbm->io_space.end -
+ pbm->io_space.start) + 1;
+ } else {
+ space_size = (pbm->mem_space.end -
+ pbm->mem_space.start) + 1;
+ }
+
+ /* Make sure the request is in range. */
+ user_offset = vma->vm_pgoff << PAGE_SHIFT;
+ user_size = vma->vm_end - vma->vm_start;
+
+ if (user_offset >= space_size ||
+ (user_offset + user_size) > space_size)
+ return -EINVAL;
+
+ if (mmap_state == pci_mmap_io) {
+ vma->vm_pgoff = (pbm->io_space.start +
+ user_offset) >> PAGE_SHIFT;
+ } else {
+ vma->vm_pgoff = (pbm->mem_space.start +
+ user_offset) >> PAGE_SHIFT;
+ }
+
+ return 0;
+}
+
+/* Adjust vm_pgoff of VMA such that it is the physical page offset
+ * corresponding to the 32-bit pci bus offset for DEV requested by the user.
+ *
+ * Basically, the user finds the base address for his device which he wishes
+ * to mmap. They read the 32-bit value from the config space base register,
+ * add whatever PAGE_SIZE multiple offset they wish, and feed this into the
+ * offset parameter of mmap on /proc/bus/pci/XXX for that device.
+ *
+ * Returns negative error code on failure, zero on success.
+ */
+static int __pci_mmap_make_offset(struct pci_dev *pdev,
+ struct vm_area_struct *vma,
+ enum pci_mmap_state mmap_state)
+{
+ unsigned long user_paddr, user_size;
+ int i, err;
+
+ /* First compute the physical address in vma->vm_pgoff,
+ * making sure the user offset is within range in the
+ * appropriate PCI space.
+ */
+ err = __pci_mmap_make_offset_bus(pdev, vma, mmap_state);
+ if (err)
+ return err;
+
+ /* If this is a mapping on a host bridge, any address
+ * is OK.
+ */
+ if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST)
+ return err;
+
+ /* Otherwise make sure it's in the range for one of the
+ * device's resources.
+ */
+ user_paddr = vma->vm_pgoff << PAGE_SHIFT;
+ user_size = vma->vm_end - vma->vm_start;
+
+ for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
+ struct resource *rp = &pdev->resource[i];
+ resource_size_t aligned_end;
+
+ /* Active? */
+ if (!rp->flags)
+ continue;
+
+ /* Same type? */
+ if (i == PCI_ROM_RESOURCE) {
+ if (mmap_state != pci_mmap_mem)
+ continue;
+ } else {
+ if ((mmap_state == pci_mmap_io &&
+ (rp->flags & IORESOURCE_IO) == 0) ||
+ (mmap_state == pci_mmap_mem &&
+ (rp->flags & IORESOURCE_MEM) == 0))
+ continue;
+ }
+
+ /* Align the resource end to the next page address.
+ * PAGE_SIZE intentionally added instead of (PAGE_SIZE - 1),
+ * because actually we need the address of the next byte
+ * after rp->end.
+ */
+ aligned_end = (rp->end + PAGE_SIZE) & PAGE_MASK;
+
+ if ((rp->start <= user_paddr) &&
+ (user_paddr + user_size) <= aligned_end)
+ break;
+ }
+
+ if (i > PCI_ROM_RESOURCE)
+ return -EINVAL;
+
+ return 0;
+}
+
+/* Set vm_flags of VMA, as appropriate for this architecture, for a pci device
+ * mapping.
+ */
+static void __pci_mmap_set_flags(struct pci_dev *dev, struct vm_area_struct *vma,
+ enum pci_mmap_state mmap_state)
+{
+ vma->vm_flags |= (VM_IO | VM_RESERVED);
+}
+
+/* Set vm_page_prot of VMA, as appropriate for this architecture, for a pci
+ * device mapping.
+ */
+static void __pci_mmap_set_pgprot(struct pci_dev *dev, struct vm_area_struct *vma,
+ enum pci_mmap_state mmap_state)
+{
+ /* Our io_remap_pfn_range takes care of this, do nothing. */
+}
+
+/* Perform the actual remap of the pages for a PCI device mapping, as appropriate
+ * for this architecture. The region in the process to map is described by vm_start
+ * and vm_end members of VMA, the base physical address is found in vm_pgoff.
+ * The pci device structure is provided so that architectures may make mapping
+ * decisions on a per-device or per-bus basis.
+ *
+ * Returns a negative error code on failure, zero on success.
+ */
+int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
+ enum pci_mmap_state mmap_state,
+ int write_combine)
+{
+ int ret;
+
+ ret = __pci_mmap_make_offset(dev, vma, mmap_state);
+ if (ret < 0)
+ return ret;
+
+ __pci_mmap_set_flags(dev, vma, mmap_state);
+ __pci_mmap_set_pgprot(dev, vma, mmap_state);
+
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ ret = io_remap_pfn_range(vma, vma->vm_start,
+ vma->vm_pgoff,
+ vma->vm_end - vma->vm_start,
+ vma->vm_page_prot);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+#ifdef CONFIG_NUMA
+int pcibus_to_node(struct pci_bus *pbus)
+{
+ struct pci_pbm_info *pbm = pbus->sysdata;
+
+ return pbm->numa_node;
+}
+EXPORT_SYMBOL(pcibus_to_node);
+#endif
+
+/* Return the domain number for this pci bus */
+
+int pci_domain_nr(struct pci_bus *pbus)
+{
+ struct pci_pbm_info *pbm = pbus->sysdata;
+ int ret;
+
+ if (!pbm) {
+ ret = -ENXIO;
+ } else {
+ ret = pbm->index;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(pci_domain_nr);
+
+#ifdef CONFIG_PCI_MSI
+int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
+{
+ struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller;
+ unsigned int virt_irq;
+
+ if (!pbm->setup_msi_irq)
+ return -EINVAL;
+
+ return pbm->setup_msi_irq(&virt_irq, pdev, desc);
+}
+
+void arch_teardown_msi_irq(unsigned int virt_irq)
+{
+ struct msi_desc *entry = get_irq_msi(virt_irq);
+ struct pci_dev *pdev = entry->dev;
+ struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller;
+
+ if (pbm->teardown_msi_irq)
+ pbm->teardown_msi_irq(virt_irq, pdev);
+}
+#endif /* !(CONFIG_PCI_MSI) */
+
+struct device_node *pci_device_to_OF_node(struct pci_dev *pdev)
+{
+ return pdev->dev.archdata.prom_node;
+}
+EXPORT_SYMBOL(pci_device_to_OF_node);
+
+static void ali_sound_dma_hack(struct pci_dev *pdev, int set_bit)
+{
+ struct pci_dev *ali_isa_bridge;
+ u8 val;
+
+ /* ALI sound chips generate 31-bits of DMA, a special register
+ * determines what bit 31 is emitted as.
+ */
+ ali_isa_bridge = pci_get_device(PCI_VENDOR_ID_AL,
+ PCI_DEVICE_ID_AL_M1533,
+ NULL);
+
+ pci_read_config_byte(ali_isa_bridge, 0x7e, &val);
+ if (set_bit)
+ val |= 0x01;
+ else
+ val &= ~0x01;
+ pci_write_config_byte(ali_isa_bridge, 0x7e, val);
+ pci_dev_put(ali_isa_bridge);
+}
+
+int pci_dma_supported(struct pci_dev *pdev, u64 device_mask)
+{
+ u64 dma_addr_mask;
+
+ if (pdev == NULL) {
+ dma_addr_mask = 0xffffffff;
+ } else {
+ struct iommu *iommu = pdev->dev.archdata.iommu;
+
+ dma_addr_mask = iommu->dma_addr_mask;
+
+ if (pdev->vendor == PCI_VENDOR_ID_AL &&
+ pdev->device == PCI_DEVICE_ID_AL_M5451 &&
+ device_mask == 0x7fffffff) {
+ ali_sound_dma_hack(pdev,
+ (dma_addr_mask & 0x80000000) != 0);
+ return 1;
+ }
+ }
+
+ if (device_mask >= (1UL << 32UL))
+ return 0;
+
+ return (device_mask & dma_addr_mask) == dma_addr_mask;
+}
+
+void pci_resource_to_user(const struct pci_dev *pdev, int bar,
+ const struct resource *rp, resource_size_t *start,
+ resource_size_t *end)
+{
+ struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller;
+ unsigned long offset;
+
+ if (rp->flags & IORESOURCE_IO)
+ offset = pbm->io_space.start;
+ else
+ offset = pbm->mem_space.start;
+
+ *start = rp->start - offset;
+ *end = rp->end - offset;
+}
diff --git a/arch/sparc/kernel/pci_common.c b/arch/sparc/kernel/pci_common.c
new file mode 100644
index 000000000000..23b88082d0b2
--- /dev/null
+++ b/arch/sparc/kernel/pci_common.c
@@ -0,0 +1,545 @@
+/* pci_common.c: PCI controller common support.
+ *
+ * Copyright (C) 1999, 2007 David S. Miller (davem@davemloft.net)
+ */
+
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/of_device.h>
+
+#include <asm/prom.h>
+#include <asm/oplib.h>
+
+#include "pci_impl.h"
+#include "pci_sun4v.h"
+
+static int config_out_of_range(struct pci_pbm_info *pbm,
+ unsigned long bus,
+ unsigned long devfn,
+ unsigned long reg)
+{
+ if (bus < pbm->pci_first_busno ||
+ bus > pbm->pci_last_busno)
+ return 1;
+ return 0;
+}
+
+static void *sun4u_config_mkaddr(struct pci_pbm_info *pbm,
+ unsigned long bus,
+ unsigned long devfn,
+ unsigned long reg)
+{
+ unsigned long rbits = pbm->config_space_reg_bits;
+
+ if (config_out_of_range(pbm, bus, devfn, reg))
+ return NULL;
+
+ reg = (reg & ((1 << rbits) - 1));
+ devfn <<= rbits;
+ bus <<= rbits + 8;
+
+ return (void *) (pbm->config_space | bus | devfn | reg);
+}
+
+/* At least on Sabre, it is necessary to access all PCI host controller
+ * registers at their natural size, otherwise zeros are returned.
+ * Strange but true, and I see no language in the UltraSPARC-IIi
+ * programmer's manual that mentions this even indirectly.
+ */
+static int sun4u_read_pci_cfg_host(struct pci_pbm_info *pbm,
+ unsigned char bus, unsigned int devfn,
+ int where, int size, u32 *value)
+{
+ u32 tmp32, *addr;
+ u16 tmp16;
+ u8 tmp8;
+
+ addr = sun4u_config_mkaddr(pbm, bus, devfn, where);
+ if (!addr)
+ return PCIBIOS_SUCCESSFUL;
+
+ switch (size) {
+ case 1:
+ if (where < 8) {
+ unsigned long align = (unsigned long) addr;
+
+ align &= ~1;
+ pci_config_read16((u16 *)align, &tmp16);
+ if (where & 1)
+ *value = tmp16 >> 8;
+ else
+ *value = tmp16 & 0xff;
+ } else {
+ pci_config_read8((u8 *)addr, &tmp8);
+ *value = (u32) tmp8;
+ }
+ break;
+
+ case 2:
+ if (where < 8) {
+ pci_config_read16((u16 *)addr, &tmp16);
+ *value = (u32) tmp16;
+ } else {
+ pci_config_read8((u8 *)addr, &tmp8);
+ *value = (u32) tmp8;
+ pci_config_read8(((u8 *)addr) + 1, &tmp8);
+ *value |= ((u32) tmp8) << 8;
+ }
+ break;
+
+ case 4:
+ tmp32 = 0xffffffff;
+ sun4u_read_pci_cfg_host(pbm, bus, devfn,
+ where, 2, &tmp32);
+ *value = tmp32;
+
+ tmp32 = 0xffffffff;
+ sun4u_read_pci_cfg_host(pbm, bus, devfn,
+ where + 2, 2, &tmp32);
+ *value |= tmp32 << 16;
+ break;
+ }
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int sun4u_read_pci_cfg(struct pci_bus *bus_dev, unsigned int devfn,
+ int where, int size, u32 *value)
+{
+ struct pci_pbm_info *pbm = bus_dev->sysdata;
+ unsigned char bus = bus_dev->number;
+ u32 *addr;
+ u16 tmp16;
+ u8 tmp8;
+
+ switch (size) {
+ case 1:
+ *value = 0xff;
+ break;
+ case 2:
+ *value = 0xffff;
+ break;
+ case 4:
+ *value = 0xffffffff;
+ break;
+ }
+
+ if (!bus_dev->number && !PCI_SLOT(devfn))
+ return sun4u_read_pci_cfg_host(pbm, bus, devfn, where,
+ size, value);
+
+ addr = sun4u_config_mkaddr(pbm, bus, devfn, where);
+ if (!addr)
+ return PCIBIOS_SUCCESSFUL;
+
+ switch (size) {
+ case 1:
+ pci_config_read8((u8 *)addr, &tmp8);
+ *value = (u32) tmp8;
+ break;
+
+ case 2:
+ if (where & 0x01) {
+ printk("pci_read_config_word: misaligned reg [%x]\n",
+ where);
+ return PCIBIOS_SUCCESSFUL;
+ }
+ pci_config_read16((u16 *)addr, &tmp16);
+ *value = (u32) tmp16;
+ break;
+
+ case 4:
+ if (where & 0x03) {
+ printk("pci_read_config_dword: misaligned reg [%x]\n",
+ where);
+ return PCIBIOS_SUCCESSFUL;
+ }
+ pci_config_read32(addr, value);
+ break;
+ }
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int sun4u_write_pci_cfg_host(struct pci_pbm_info *pbm,
+ unsigned char bus, unsigned int devfn,
+ int where, int size, u32 value)
+{
+ u32 *addr;
+
+ addr = sun4u_config_mkaddr(pbm, bus, devfn, where);
+ if (!addr)
+ return PCIBIOS_SUCCESSFUL;
+
+ switch (size) {
+ case 1:
+ if (where < 8) {
+ unsigned long align = (unsigned long) addr;
+ u16 tmp16;
+
+ align &= ~1;
+ pci_config_read16((u16 *)align, &tmp16);
+ if (where & 1) {
+ tmp16 &= 0x00ff;
+ tmp16 |= value << 8;
+ } else {
+ tmp16 &= 0xff00;
+ tmp16 |= value;
+ }
+ pci_config_write16((u16 *)align, tmp16);
+ } else
+ pci_config_write8((u8 *)addr, value);
+ break;
+ case 2:
+ if (where < 8) {
+ pci_config_write16((u16 *)addr, value);
+ } else {
+ pci_config_write8((u8 *)addr, value & 0xff);
+ pci_config_write8(((u8 *)addr) + 1, value >> 8);
+ }
+ break;
+ case 4:
+ sun4u_write_pci_cfg_host(pbm, bus, devfn,
+ where, 2, value & 0xffff);
+ sun4u_write_pci_cfg_host(pbm, bus, devfn,
+ where + 2, 2, value >> 16);
+ break;
+ }
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int sun4u_write_pci_cfg(struct pci_bus *bus_dev, unsigned int devfn,
+ int where, int size, u32 value)
+{
+ struct pci_pbm_info *pbm = bus_dev->sysdata;
+ unsigned char bus = bus_dev->number;
+ u32 *addr;
+
+ if (!bus_dev->number && !PCI_SLOT(devfn))
+ return sun4u_write_pci_cfg_host(pbm, bus, devfn, where,
+ size, value);
+
+ addr = sun4u_config_mkaddr(pbm, bus, devfn, where);
+ if (!addr)
+ return PCIBIOS_SUCCESSFUL;
+
+ switch (size) {
+ case 1:
+ pci_config_write8((u8 *)addr, value);
+ break;
+
+ case 2:
+ if (where & 0x01) {
+ printk("pci_write_config_word: misaligned reg [%x]\n",
+ where);
+ return PCIBIOS_SUCCESSFUL;
+ }
+ pci_config_write16((u16 *)addr, value);
+ break;
+
+ case 4:
+ if (where & 0x03) {
+ printk("pci_write_config_dword: misaligned reg [%x]\n",
+ where);
+ return PCIBIOS_SUCCESSFUL;
+ }
+ pci_config_write32(addr, value);
+ }
+ return PCIBIOS_SUCCESSFUL;
+}
+
+struct pci_ops sun4u_pci_ops = {
+ .read = sun4u_read_pci_cfg,
+ .write = sun4u_write_pci_cfg,
+};
+
+static int sun4v_read_pci_cfg(struct pci_bus *bus_dev, unsigned int devfn,
+ int where, int size, u32 *value)
+{
+ struct pci_pbm_info *pbm = bus_dev->sysdata;
+ u32 devhandle = pbm->devhandle;
+ unsigned int bus = bus_dev->number;
+ unsigned int device = PCI_SLOT(devfn);
+ unsigned int func = PCI_FUNC(devfn);
+ unsigned long ret;
+
+ if (config_out_of_range(pbm, bus, devfn, where)) {
+ ret = ~0UL;
+ } else {
+ ret = pci_sun4v_config_get(devhandle,
+ HV_PCI_DEVICE_BUILD(bus, device, func),
+ where, size);
+ }
+ switch (size) {
+ case 1:
+ *value = ret & 0xff;
+ break;
+ case 2:
+ *value = ret & 0xffff;
+ break;
+ case 4:
+ *value = ret & 0xffffffff;
+ break;
+ };
+
+
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int sun4v_write_pci_cfg(struct pci_bus *bus_dev, unsigned int devfn,
+ int where, int size, u32 value)
+{
+ struct pci_pbm_info *pbm = bus_dev->sysdata;
+ u32 devhandle = pbm->devhandle;
+ unsigned int bus = bus_dev->number;
+ unsigned int device = PCI_SLOT(devfn);
+ unsigned int func = PCI_FUNC(devfn);
+ unsigned long ret;
+
+ if (config_out_of_range(pbm, bus, devfn, where)) {
+ /* Do nothing. */
+ } else {
+ ret = pci_sun4v_config_put(devhandle,
+ HV_PCI_DEVICE_BUILD(bus, device, func),
+ where, size, value);
+ }
+ return PCIBIOS_SUCCESSFUL;
+}
+
+struct pci_ops sun4v_pci_ops = {
+ .read = sun4v_read_pci_cfg,
+ .write = sun4v_write_pci_cfg,
+};
+
+void pci_get_pbm_props(struct pci_pbm_info *pbm)
+{
+ const u32 *val = of_get_property(pbm->op->node, "bus-range", NULL);
+
+ pbm->pci_first_busno = val[0];
+ pbm->pci_last_busno = val[1];
+
+ val = of_get_property(pbm->op->node, "ino-bitmap", NULL);
+ if (val) {
+ pbm->ino_bitmap = (((u64)val[1] << 32UL) |
+ ((u64)val[0] << 0UL));
+ }
+}
+
+static void pci_register_legacy_regions(struct resource *io_res,
+ struct resource *mem_res)
+{
+ struct resource *p;
+
+ /* VGA Video RAM. */
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return;
+
+ p->name = "Video RAM area";
+ p->start = mem_res->start + 0xa0000UL;
+ p->end = p->start + 0x1ffffUL;
+ p->flags = IORESOURCE_BUSY;
+ request_resource(mem_res, p);
+
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return;
+
+ p->name = "System ROM";
+ p->start = mem_res->start + 0xf0000UL;
+ p->end = p->start + 0xffffUL;
+ p->flags = IORESOURCE_BUSY;
+ request_resource(mem_res, p);
+
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return;
+
+ p->name = "Video ROM";
+ p->start = mem_res->start + 0xc0000UL;
+ p->end = p->start + 0x7fffUL;
+ p->flags = IORESOURCE_BUSY;
+ request_resource(mem_res, p);
+}
+
+static void pci_register_iommu_region(struct pci_pbm_info *pbm)
+{
+ const u32 *vdma = of_get_property(pbm->op->node, "virtual-dma", NULL);
+
+ if (vdma) {
+ struct resource *rp = kmalloc(sizeof(*rp), GFP_KERNEL);
+
+ if (!rp) {
+ prom_printf("Cannot allocate IOMMU resource.\n");
+ prom_halt();
+ }
+ rp->name = "IOMMU";
+ rp->start = pbm->mem_space.start + (unsigned long) vdma[0];
+ rp->end = rp->start + (unsigned long) vdma[1] - 1UL;
+ rp->flags = IORESOURCE_BUSY;
+ request_resource(&pbm->mem_space, rp);
+ }
+}
+
+void pci_determine_mem_io_space(struct pci_pbm_info *pbm)
+{
+ const struct linux_prom_pci_ranges *pbm_ranges;
+ int i, saw_mem, saw_io;
+ int num_pbm_ranges;
+
+ saw_mem = saw_io = 0;
+ pbm_ranges = of_get_property(pbm->op->node, "ranges", &i);
+ if (!pbm_ranges) {
+ prom_printf("PCI: Fatal error, missing PBM ranges property "
+ " for %s\n",
+ pbm->name);
+ prom_halt();
+ }
+
+ num_pbm_ranges = i / sizeof(*pbm_ranges);
+
+ for (i = 0; i < num_pbm_ranges; i++) {
+ const struct linux_prom_pci_ranges *pr = &pbm_ranges[i];
+ unsigned long a, size;
+ u32 parent_phys_hi, parent_phys_lo;
+ u32 size_hi, size_lo;
+ int type;
+
+ parent_phys_hi = pr->parent_phys_hi;
+ parent_phys_lo = pr->parent_phys_lo;
+ if (tlb_type == hypervisor)
+ parent_phys_hi &= 0x0fffffff;
+
+ size_hi = pr->size_hi;
+ size_lo = pr->size_lo;
+
+ type = (pr->child_phys_hi >> 24) & 0x3;
+ a = (((unsigned long)parent_phys_hi << 32UL) |
+ ((unsigned long)parent_phys_lo << 0UL));
+ size = (((unsigned long)size_hi << 32UL) |
+ ((unsigned long)size_lo << 0UL));
+
+ switch (type) {
+ case 0:
+ /* PCI config space, 16MB */
+ pbm->config_space = a;
+ break;
+
+ case 1:
+ /* 16-bit IO space, 16MB */
+ pbm->io_space.start = a;
+ pbm->io_space.end = a + size - 1UL;
+ pbm->io_space.flags = IORESOURCE_IO;
+ saw_io = 1;
+ break;
+
+ case 2:
+ /* 32-bit MEM space, 2GB */
+ pbm->mem_space.start = a;
+ pbm->mem_space.end = a + size - 1UL;
+ pbm->mem_space.flags = IORESOURCE_MEM;
+ saw_mem = 1;
+ break;
+
+ case 3:
+ /* XXX 64-bit MEM handling XXX */
+
+ default:
+ break;
+ };
+ }
+
+ if (!saw_io || !saw_mem) {
+ prom_printf("%s: Fatal error, missing %s PBM range.\n",
+ pbm->name,
+ (!saw_io ? "IO" : "MEM"));
+ prom_halt();
+ }
+
+ printk("%s: PCI IO[%lx] MEM[%lx]\n",
+ pbm->name,
+ pbm->io_space.start,
+ pbm->mem_space.start);
+
+ pbm->io_space.name = pbm->mem_space.name = pbm->name;
+
+ request_resource(&ioport_resource, &pbm->io_space);
+ request_resource(&iomem_resource, &pbm->mem_space);
+
+ pci_register_legacy_regions(&pbm->io_space,
+ &pbm->mem_space);
+ pci_register_iommu_region(pbm);
+}
+
+/* Generic helper routines for PCI error reporting. */
+void pci_scan_for_target_abort(struct pci_pbm_info *pbm,
+ struct pci_bus *pbus)
+{
+ struct pci_dev *pdev;
+ struct pci_bus *bus;
+
+ list_for_each_entry(pdev, &pbus->devices, bus_list) {
+ u16 status, error_bits;
+
+ pci_read_config_word(pdev, PCI_STATUS, &status);
+ error_bits =
+ (status & (PCI_STATUS_SIG_TARGET_ABORT |
+ PCI_STATUS_REC_TARGET_ABORT));
+ if (error_bits) {
+ pci_write_config_word(pdev, PCI_STATUS, error_bits);
+ printk("%s: Device %s saw Target Abort [%016x]\n",
+ pbm->name, pci_name(pdev), status);
+ }
+ }
+
+ list_for_each_entry(bus, &pbus->children, node)
+ pci_scan_for_target_abort(pbm, bus);
+}
+
+void pci_scan_for_master_abort(struct pci_pbm_info *pbm,
+ struct pci_bus *pbus)
+{
+ struct pci_dev *pdev;
+ struct pci_bus *bus;
+
+ list_for_each_entry(pdev, &pbus->devices, bus_list) {
+ u16 status, error_bits;
+
+ pci_read_config_word(pdev, PCI_STATUS, &status);
+ error_bits =
+ (status & (PCI_STATUS_REC_MASTER_ABORT));
+ if (error_bits) {
+ pci_write_config_word(pdev, PCI_STATUS, error_bits);
+ printk("%s: Device %s received Master Abort [%016x]\n",
+ pbm->name, pci_name(pdev), status);
+ }
+ }
+
+ list_for_each_entry(bus, &pbus->children, node)
+ pci_scan_for_master_abort(pbm, bus);
+}
+
+void pci_scan_for_parity_error(struct pci_pbm_info *pbm,
+ struct pci_bus *pbus)
+{
+ struct pci_dev *pdev;
+ struct pci_bus *bus;
+
+ list_for_each_entry(pdev, &pbus->devices, bus_list) {
+ u16 status, error_bits;
+
+ pci_read_config_word(pdev, PCI_STATUS, &status);
+ error_bits =
+ (status & (PCI_STATUS_PARITY |
+ PCI_STATUS_DETECTED_PARITY));
+ if (error_bits) {
+ pci_write_config_word(pdev, PCI_STATUS, error_bits);
+ printk("%s: Device %s saw Parity Error [%016x]\n",
+ pbm->name, pci_name(pdev), status);
+ }
+ }
+
+ list_for_each_entry(bus, &pbus->children, node)
+ pci_scan_for_parity_error(pbm, bus);
+}
diff --git a/arch/sparc/kernel/pci_fire.c b/arch/sparc/kernel/pci_fire.c
new file mode 100644
index 000000000000..9462b68f4894
--- /dev/null
+++ b/arch/sparc/kernel/pci_fire.c
@@ -0,0 +1,521 @@
+/* pci_fire.c: Sun4u platform PCI-E controller support.
+ *
+ * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
+ */
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/msi.h>
+#include <linux/irq.h>
+#include <linux/of_device.h>
+
+#include <asm/prom.h>
+#include <asm/irq.h>
+#include <asm/upa.h>
+
+#include "pci_impl.h"
+
+#define DRIVER_NAME "fire"
+#define PFX DRIVER_NAME ": "
+
+#define FIRE_IOMMU_CONTROL 0x40000UL
+#define FIRE_IOMMU_TSBBASE 0x40008UL
+#define FIRE_IOMMU_FLUSH 0x40100UL
+#define FIRE_IOMMU_FLUSHINV 0x40108UL
+
+static int pci_fire_pbm_iommu_init(struct pci_pbm_info *pbm)
+{
+ struct iommu *iommu = pbm->iommu;
+ u32 vdma[2], dma_mask;
+ u64 control;
+ int tsbsize, err;
+
+ /* No virtual-dma property on these guys, use largest size. */
+ vdma[0] = 0xc0000000; /* base */
+ vdma[1] = 0x40000000; /* size */
+ dma_mask = 0xffffffff;
+ tsbsize = 128;
+
+ /* Register addresses. */
+ iommu->iommu_control = pbm->pbm_regs + FIRE_IOMMU_CONTROL;
+ iommu->iommu_tsbbase = pbm->pbm_regs + FIRE_IOMMU_TSBBASE;
+ iommu->iommu_flush = pbm->pbm_regs + FIRE_IOMMU_FLUSH;
+ iommu->iommu_flushinv = pbm->pbm_regs + FIRE_IOMMU_FLUSHINV;
+
+ /* We use the main control/status register of FIRE as the write
+ * completion register.
+ */
+ iommu->write_complete_reg = pbm->controller_regs + 0x410000UL;
+
+ /*
+ * Invalidate TLB Entries.
+ */
+ upa_writeq(~(u64)0, iommu->iommu_flushinv);
+
+ err = iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask,
+ pbm->numa_node);
+ if (err)
+ return err;
+
+ upa_writeq(__pa(iommu->page_table) | 0x7UL, iommu->iommu_tsbbase);
+
+ control = upa_readq(iommu->iommu_control);
+ control |= (0x00000400 /* TSB cache snoop enable */ |
+ 0x00000300 /* Cache mode */ |
+ 0x00000002 /* Bypass enable */ |
+ 0x00000001 /* Translation enable */);
+ upa_writeq(control, iommu->iommu_control);
+
+ return 0;
+}
+
+#ifdef CONFIG_PCI_MSI
+struct pci_msiq_entry {
+ u64 word0;
+#define MSIQ_WORD0_RESV 0x8000000000000000UL
+#define MSIQ_WORD0_FMT_TYPE 0x7f00000000000000UL
+#define MSIQ_WORD0_FMT_TYPE_SHIFT 56
+#define MSIQ_WORD0_LEN 0x00ffc00000000000UL
+#define MSIQ_WORD0_LEN_SHIFT 46
+#define MSIQ_WORD0_ADDR0 0x00003fff00000000UL
+#define MSIQ_WORD0_ADDR0_SHIFT 32
+#define MSIQ_WORD0_RID 0x00000000ffff0000UL
+#define MSIQ_WORD0_RID_SHIFT 16
+#define MSIQ_WORD0_DATA0 0x000000000000ffffUL
+#define MSIQ_WORD0_DATA0_SHIFT 0
+
+#define MSIQ_TYPE_MSG 0x6
+#define MSIQ_TYPE_MSI32 0xb
+#define MSIQ_TYPE_MSI64 0xf
+
+ u64 word1;
+#define MSIQ_WORD1_ADDR1 0xffffffffffff0000UL
+#define MSIQ_WORD1_ADDR1_SHIFT 16
+#define MSIQ_WORD1_DATA1 0x000000000000ffffUL
+#define MSIQ_WORD1_DATA1_SHIFT 0
+
+ u64 resv[6];
+};
+
+/* All MSI registers are offset from pbm->pbm_regs */
+#define EVENT_QUEUE_BASE_ADDR_REG 0x010000UL
+#define EVENT_QUEUE_BASE_ADDR_ALL_ONES 0xfffc000000000000UL
+
+#define EVENT_QUEUE_CONTROL_SET(EQ) (0x011000UL + (EQ) * 0x8UL)
+#define EVENT_QUEUE_CONTROL_SET_OFLOW 0x0200000000000000UL
+#define EVENT_QUEUE_CONTROL_SET_EN 0x0000100000000000UL
+
+#define EVENT_QUEUE_CONTROL_CLEAR(EQ) (0x011200UL + (EQ) * 0x8UL)
+#define EVENT_QUEUE_CONTROL_CLEAR_OF 0x0200000000000000UL
+#define EVENT_QUEUE_CONTROL_CLEAR_E2I 0x0000800000000000UL
+#define EVENT_QUEUE_CONTROL_CLEAR_DIS 0x0000100000000000UL
+
+#define EVENT_QUEUE_STATE(EQ) (0x011400UL + (EQ) * 0x8UL)
+#define EVENT_QUEUE_STATE_MASK 0x0000000000000007UL
+#define EVENT_QUEUE_STATE_IDLE 0x0000000000000001UL
+#define EVENT_QUEUE_STATE_ACTIVE 0x0000000000000002UL
+#define EVENT_QUEUE_STATE_ERROR 0x0000000000000004UL
+
+#define EVENT_QUEUE_TAIL(EQ) (0x011600UL + (EQ) * 0x8UL)
+#define EVENT_QUEUE_TAIL_OFLOW 0x0200000000000000UL
+#define EVENT_QUEUE_TAIL_VAL 0x000000000000007fUL
+
+#define EVENT_QUEUE_HEAD(EQ) (0x011800UL + (EQ) * 0x8UL)
+#define EVENT_QUEUE_HEAD_VAL 0x000000000000007fUL
+
+#define MSI_MAP(MSI) (0x020000UL + (MSI) * 0x8UL)
+#define MSI_MAP_VALID 0x8000000000000000UL
+#define MSI_MAP_EQWR_N 0x4000000000000000UL
+#define MSI_MAP_EQNUM 0x000000000000003fUL
+
+#define MSI_CLEAR(MSI) (0x028000UL + (MSI) * 0x8UL)
+#define MSI_CLEAR_EQWR_N 0x4000000000000000UL
+
+#define IMONDO_DATA0 0x02C000UL
+#define IMONDO_DATA0_DATA 0xffffffffffffffc0UL
+
+#define IMONDO_DATA1 0x02C008UL
+#define IMONDO_DATA1_DATA 0xffffffffffffffffUL
+
+#define MSI_32BIT_ADDR 0x034000UL
+#define MSI_32BIT_ADDR_VAL 0x00000000ffff0000UL
+
+#define MSI_64BIT_ADDR 0x034008UL
+#define MSI_64BIT_ADDR_VAL 0xffffffffffff0000UL
+
+static int pci_fire_get_head(struct pci_pbm_info *pbm, unsigned long msiqid,
+ unsigned long *head)
+{
+ *head = upa_readq(pbm->pbm_regs + EVENT_QUEUE_HEAD(msiqid));
+ return 0;
+}
+
+static int pci_fire_dequeue_msi(struct pci_pbm_info *pbm, unsigned long msiqid,
+ unsigned long *head, unsigned long *msi)
+{
+ unsigned long type_fmt, type, msi_num;
+ struct pci_msiq_entry *base, *ep;
+
+ base = (pbm->msi_queues + ((msiqid - pbm->msiq_first) * 8192));
+ ep = &base[*head];
+
+ if ((ep->word0 & MSIQ_WORD0_FMT_TYPE) == 0)
+ return 0;
+
+ type_fmt = ((ep->word0 & MSIQ_WORD0_FMT_TYPE) >>
+ MSIQ_WORD0_FMT_TYPE_SHIFT);
+ type = (type_fmt >> 3);
+ if (unlikely(type != MSIQ_TYPE_MSI32 &&
+ type != MSIQ_TYPE_MSI64))
+ return -EINVAL;
+
+ *msi = msi_num = ((ep->word0 & MSIQ_WORD0_DATA0) >>
+ MSIQ_WORD0_DATA0_SHIFT);
+
+ upa_writeq(MSI_CLEAR_EQWR_N, pbm->pbm_regs + MSI_CLEAR(msi_num));
+
+ /* Clear the entry. */
+ ep->word0 &= ~MSIQ_WORD0_FMT_TYPE;
+
+ /* Go to next entry in ring. */
+ (*head)++;
+ if (*head >= pbm->msiq_ent_count)
+ *head = 0;
+
+ return 1;
+}
+
+static int pci_fire_set_head(struct pci_pbm_info *pbm, unsigned long msiqid,
+ unsigned long head)
+{
+ upa_writeq(head, pbm->pbm_regs + EVENT_QUEUE_HEAD(msiqid));
+ return 0;
+}
+
+static int pci_fire_msi_setup(struct pci_pbm_info *pbm, unsigned long msiqid,
+ unsigned long msi, int is_msi64)
+{
+ u64 val;
+
+ val = upa_readq(pbm->pbm_regs + MSI_MAP(msi));
+ val &= ~(MSI_MAP_EQNUM);
+ val |= msiqid;
+ upa_writeq(val, pbm->pbm_regs + MSI_MAP(msi));
+
+ upa_writeq(MSI_CLEAR_EQWR_N, pbm->pbm_regs + MSI_CLEAR(msi));
+
+ val = upa_readq(pbm->pbm_regs + MSI_MAP(msi));
+ val |= MSI_MAP_VALID;
+ upa_writeq(val, pbm->pbm_regs + MSI_MAP(msi));
+
+ return 0;
+}
+
+static int pci_fire_msi_teardown(struct pci_pbm_info *pbm, unsigned long msi)
+{
+ unsigned long msiqid;
+ u64 val;
+
+ val = upa_readq(pbm->pbm_regs + MSI_MAP(msi));
+ msiqid = (val & MSI_MAP_EQNUM);
+
+ val &= ~MSI_MAP_VALID;
+
+ upa_writeq(val, pbm->pbm_regs + MSI_MAP(msi));
+
+ return 0;
+}
+
+static int pci_fire_msiq_alloc(struct pci_pbm_info *pbm)
+{
+ unsigned long pages, order, i;
+
+ order = get_order(512 * 1024);
+ pages = __get_free_pages(GFP_KERNEL | __GFP_COMP, order);
+ if (pages == 0UL) {
+ printk(KERN_ERR "MSI: Cannot allocate MSI queues (o=%lu).\n",
+ order);
+ return -ENOMEM;
+ }
+ memset((char *)pages, 0, PAGE_SIZE << order);
+ pbm->msi_queues = (void *) pages;
+
+ upa_writeq((EVENT_QUEUE_BASE_ADDR_ALL_ONES |
+ __pa(pbm->msi_queues)),
+ pbm->pbm_regs + EVENT_QUEUE_BASE_ADDR_REG);
+
+ upa_writeq(pbm->portid << 6, pbm->pbm_regs + IMONDO_DATA0);
+ upa_writeq(0, pbm->pbm_regs + IMONDO_DATA1);
+
+ upa_writeq(pbm->msi32_start, pbm->pbm_regs + MSI_32BIT_ADDR);
+ upa_writeq(pbm->msi64_start, pbm->pbm_regs + MSI_64BIT_ADDR);
+
+ for (i = 0; i < pbm->msiq_num; i++) {
+ upa_writeq(0, pbm->pbm_regs + EVENT_QUEUE_HEAD(i));
+ upa_writeq(0, pbm->pbm_regs + EVENT_QUEUE_TAIL(i));
+ }
+
+ return 0;
+}
+
+static void pci_fire_msiq_free(struct pci_pbm_info *pbm)
+{
+ unsigned long pages, order;
+
+ order = get_order(512 * 1024);
+ pages = (unsigned long) pbm->msi_queues;
+
+ free_pages(pages, order);
+
+ pbm->msi_queues = NULL;
+}
+
+static int pci_fire_msiq_build_irq(struct pci_pbm_info *pbm,
+ unsigned long msiqid,
+ unsigned long devino)
+{
+ unsigned long cregs = (unsigned long) pbm->pbm_regs;
+ unsigned long imap_reg, iclr_reg, int_ctrlr;
+ unsigned int virt_irq;
+ int fixup;
+ u64 val;
+
+ imap_reg = cregs + (0x001000UL + (devino * 0x08UL));
+ iclr_reg = cregs + (0x001400UL + (devino * 0x08UL));
+
+ /* XXX iterate amongst the 4 IRQ controllers XXX */
+ int_ctrlr = (1UL << 6);
+
+ val = upa_readq(imap_reg);
+ val |= (1UL << 63) | int_ctrlr;
+ upa_writeq(val, imap_reg);
+
+ fixup = ((pbm->portid << 6) | devino) - int_ctrlr;
+
+ virt_irq = build_irq(fixup, iclr_reg, imap_reg);
+ if (!virt_irq)
+ return -ENOMEM;
+
+ upa_writeq(EVENT_QUEUE_CONTROL_SET_EN,
+ pbm->pbm_regs + EVENT_QUEUE_CONTROL_SET(msiqid));
+
+ return virt_irq;
+}
+
+static const struct sparc64_msiq_ops pci_fire_msiq_ops = {
+ .get_head = pci_fire_get_head,
+ .dequeue_msi = pci_fire_dequeue_msi,
+ .set_head = pci_fire_set_head,
+ .msi_setup = pci_fire_msi_setup,
+ .msi_teardown = pci_fire_msi_teardown,
+ .msiq_alloc = pci_fire_msiq_alloc,
+ .msiq_free = pci_fire_msiq_free,
+ .msiq_build_irq = pci_fire_msiq_build_irq,
+};
+
+static void pci_fire_msi_init(struct pci_pbm_info *pbm)
+{
+ sparc64_pbm_msi_init(pbm, &pci_fire_msiq_ops);
+}
+#else /* CONFIG_PCI_MSI */
+static void pci_fire_msi_init(struct pci_pbm_info *pbm)
+{
+}
+#endif /* !(CONFIG_PCI_MSI) */
+
+/* Based at pbm->controller_regs */
+#define FIRE_PARITY_CONTROL 0x470010UL
+#define FIRE_PARITY_ENAB 0x8000000000000000UL
+#define FIRE_FATAL_RESET_CTL 0x471028UL
+#define FIRE_FATAL_RESET_SPARE 0x0000000004000000UL
+#define FIRE_FATAL_RESET_MB 0x0000000002000000UL
+#define FIRE_FATAL_RESET_CPE 0x0000000000008000UL
+#define FIRE_FATAL_RESET_APE 0x0000000000004000UL
+#define FIRE_FATAL_RESET_PIO 0x0000000000000040UL
+#define FIRE_FATAL_RESET_JW 0x0000000000000004UL
+#define FIRE_FATAL_RESET_JI 0x0000000000000002UL
+#define FIRE_FATAL_RESET_JR 0x0000000000000001UL
+#define FIRE_CORE_INTR_ENABLE 0x471800UL
+
+/* Based at pbm->pbm_regs */
+#define FIRE_TLU_CTRL 0x80000UL
+#define FIRE_TLU_CTRL_TIM 0x00000000da000000UL
+#define FIRE_TLU_CTRL_QDET 0x0000000000000100UL
+#define FIRE_TLU_CTRL_CFG 0x0000000000000001UL
+#define FIRE_TLU_DEV_CTRL 0x90008UL
+#define FIRE_TLU_LINK_CTRL 0x90020UL
+#define FIRE_TLU_LINK_CTRL_CLK 0x0000000000000040UL
+#define FIRE_LPU_RESET 0xe2008UL
+#define FIRE_LPU_LLCFG 0xe2200UL
+#define FIRE_LPU_LLCFG_VC0 0x0000000000000100UL
+#define FIRE_LPU_FCTRL_UCTRL 0xe2240UL
+#define FIRE_LPU_FCTRL_UCTRL_N 0x0000000000000002UL
+#define FIRE_LPU_FCTRL_UCTRL_P 0x0000000000000001UL
+#define FIRE_LPU_TXL_FIFOP 0xe2430UL
+#define FIRE_LPU_LTSSM_CFG2 0xe2788UL
+#define FIRE_LPU_LTSSM_CFG3 0xe2790UL
+#define FIRE_LPU_LTSSM_CFG4 0xe2798UL
+#define FIRE_LPU_LTSSM_CFG5 0xe27a0UL
+#define FIRE_DMC_IENAB 0x31800UL
+#define FIRE_DMC_DBG_SEL_A 0x53000UL
+#define FIRE_DMC_DBG_SEL_B 0x53008UL
+#define FIRE_PEC_IENAB 0x51800UL
+
+static void pci_fire_hw_init(struct pci_pbm_info *pbm)
+{
+ u64 val;
+
+ upa_writeq(FIRE_PARITY_ENAB,
+ pbm->controller_regs + FIRE_PARITY_CONTROL);
+
+ upa_writeq((FIRE_FATAL_RESET_SPARE |
+ FIRE_FATAL_RESET_MB |
+ FIRE_FATAL_RESET_CPE |
+ FIRE_FATAL_RESET_APE |
+ FIRE_FATAL_RESET_PIO |
+ FIRE_FATAL_RESET_JW |
+ FIRE_FATAL_RESET_JI |
+ FIRE_FATAL_RESET_JR),
+ pbm->controller_regs + FIRE_FATAL_RESET_CTL);
+
+ upa_writeq(~(u64)0, pbm->controller_regs + FIRE_CORE_INTR_ENABLE);
+
+ val = upa_readq(pbm->pbm_regs + FIRE_TLU_CTRL);
+ val |= (FIRE_TLU_CTRL_TIM |
+ FIRE_TLU_CTRL_QDET |
+ FIRE_TLU_CTRL_CFG);
+ upa_writeq(val, pbm->pbm_regs + FIRE_TLU_CTRL);
+ upa_writeq(0, pbm->pbm_regs + FIRE_TLU_DEV_CTRL);
+ upa_writeq(FIRE_TLU_LINK_CTRL_CLK,
+ pbm->pbm_regs + FIRE_TLU_LINK_CTRL);
+
+ upa_writeq(0, pbm->pbm_regs + FIRE_LPU_RESET);
+ upa_writeq(FIRE_LPU_LLCFG_VC0, pbm->pbm_regs + FIRE_LPU_LLCFG);
+ upa_writeq((FIRE_LPU_FCTRL_UCTRL_N | FIRE_LPU_FCTRL_UCTRL_P),
+ pbm->pbm_regs + FIRE_LPU_FCTRL_UCTRL);
+ upa_writeq(((0xffff << 16) | (0x0000 << 0)),
+ pbm->pbm_regs + FIRE_LPU_TXL_FIFOP);
+ upa_writeq(3000000, pbm->pbm_regs + FIRE_LPU_LTSSM_CFG2);
+ upa_writeq(500000, pbm->pbm_regs + FIRE_LPU_LTSSM_CFG3);
+ upa_writeq((2 << 16) | (140 << 8),
+ pbm->pbm_regs + FIRE_LPU_LTSSM_CFG4);
+ upa_writeq(0, pbm->pbm_regs + FIRE_LPU_LTSSM_CFG5);
+
+ upa_writeq(~(u64)0, pbm->pbm_regs + FIRE_DMC_IENAB);
+ upa_writeq(0, pbm->pbm_regs + FIRE_DMC_DBG_SEL_A);
+ upa_writeq(0, pbm->pbm_regs + FIRE_DMC_DBG_SEL_B);
+
+ upa_writeq(~(u64)0, pbm->pbm_regs + FIRE_PEC_IENAB);
+}
+
+static int __init pci_fire_pbm_init(struct pci_pbm_info *pbm,
+ struct of_device *op, u32 portid)
+{
+ const struct linux_prom64_registers *regs;
+ struct device_node *dp = op->node;
+ int err;
+
+ pbm->numa_node = -1;
+
+ pbm->pci_ops = &sun4u_pci_ops;
+ pbm->config_space_reg_bits = 12;
+
+ pbm->index = pci_num_pbms++;
+
+ pbm->portid = portid;
+ pbm->op = op;
+ pbm->name = dp->full_name;
+
+ regs = of_get_property(dp, "reg", NULL);
+ pbm->pbm_regs = regs[0].phys_addr;
+ pbm->controller_regs = regs[1].phys_addr - 0x410000UL;
+
+ printk("%s: SUN4U PCIE Bus Module\n", pbm->name);
+
+ pci_determine_mem_io_space(pbm);
+
+ pci_get_pbm_props(pbm);
+
+ pci_fire_hw_init(pbm);
+
+ err = pci_fire_pbm_iommu_init(pbm);
+ if (err)
+ return err;
+
+ pci_fire_msi_init(pbm);
+
+ pbm->pci_bus = pci_scan_one_pbm(pbm, &op->dev);
+
+ /* XXX register error interrupt handlers XXX */
+
+ pbm->next = pci_pbm_root;
+ pci_pbm_root = pbm;
+
+ return 0;
+}
+
+static int __devinit fire_probe(struct of_device *op,
+ const struct of_device_id *match)
+{
+ struct device_node *dp = op->node;
+ struct pci_pbm_info *pbm;
+ struct iommu *iommu;
+ u32 portid;
+ int err;
+
+ portid = of_getintprop_default(dp, "portid", 0xff);
+
+ err = -ENOMEM;
+ pbm = kzalloc(sizeof(*pbm), GFP_KERNEL);
+ if (!pbm) {
+ printk(KERN_ERR PFX "Cannot allocate pci_pbminfo.\n");
+ goto out_err;
+ }
+
+ iommu = kzalloc(sizeof(struct iommu), GFP_KERNEL);
+ if (!iommu) {
+ printk(KERN_ERR PFX "Cannot allocate PBM iommu.\n");
+ goto out_free_controller;
+ }
+
+ pbm->iommu = iommu;
+
+ err = pci_fire_pbm_init(pbm, op, portid);
+ if (err)
+ goto out_free_iommu;
+
+ dev_set_drvdata(&op->dev, pbm);
+
+ return 0;
+
+out_free_iommu:
+ kfree(pbm->iommu);
+
+out_free_controller:
+ kfree(pbm);
+
+out_err:
+ return err;
+}
+
+static struct of_device_id __initdata fire_match[] = {
+ {
+ .name = "pci",
+ .compatible = "pciex108e,80f0",
+ },
+ {},
+};
+
+static struct of_platform_driver fire_driver = {
+ .name = DRIVER_NAME,
+ .match_table = fire_match,
+ .probe = fire_probe,
+};
+
+static int __init fire_init(void)
+{
+ return of_register_driver(&fire_driver, &of_bus_type);
+}
+
+subsys_initcall(fire_init);
diff --git a/arch/sparc/kernel/pci_impl.h b/arch/sparc/kernel/pci_impl.h
new file mode 100644
index 000000000000..03186824327e
--- /dev/null
+++ b/arch/sparc/kernel/pci_impl.h
@@ -0,0 +1,185 @@
+/* pci_impl.h: Helper definitions for PCI controller support.
+ *
+ * Copyright (C) 1999, 2007 David S. Miller (davem@davemloft.net)
+ */
+
+#ifndef PCI_IMPL_H
+#define PCI_IMPL_H
+
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/msi.h>
+#include <linux/of_device.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/iommu.h>
+
+/* The abstraction used here is that there are PCI controllers,
+ * each with one (Sabre) or two (PSYCHO/SCHIZO) PCI bus modules
+ * underneath. Each PCI bus module uses an IOMMU (shared by both
+ * PBMs of a controller, or per-PBM), and if a streaming buffer
+ * is present, each PCI bus module has it's own. (ie. the IOMMU
+ * might be shared between PBMs, the STC is never shared)
+ * Furthermore, each PCI bus module controls it's own autonomous
+ * PCI bus.
+ */
+
+#define PCI_STC_FLUSHFLAG_INIT(STC) \
+ (*((STC)->strbuf_flushflag) = 0UL)
+#define PCI_STC_FLUSHFLAG_SET(STC) \
+ (*((STC)->strbuf_flushflag) != 0UL)
+
+#ifdef CONFIG_PCI_MSI
+struct pci_pbm_info;
+struct sparc64_msiq_ops {
+ int (*get_head)(struct pci_pbm_info *pbm, unsigned long msiqid,
+ unsigned long *head);
+ int (*dequeue_msi)(struct pci_pbm_info *pbm, unsigned long msiqid,
+ unsigned long *head, unsigned long *msi);
+ int (*set_head)(struct pci_pbm_info *pbm, unsigned long msiqid,
+ unsigned long head);
+ int (*msi_setup)(struct pci_pbm_info *pbm, unsigned long msiqid,
+ unsigned long msi, int is_msi64);
+ int (*msi_teardown)(struct pci_pbm_info *pbm, unsigned long msi);
+ int (*msiq_alloc)(struct pci_pbm_info *pbm);
+ void (*msiq_free)(struct pci_pbm_info *pbm);
+ int (*msiq_build_irq)(struct pci_pbm_info *pbm, unsigned long msiqid,
+ unsigned long devino);
+};
+
+extern void sparc64_pbm_msi_init(struct pci_pbm_info *pbm,
+ const struct sparc64_msiq_ops *ops);
+
+struct sparc64_msiq_cookie {
+ struct pci_pbm_info *pbm;
+ unsigned long msiqid;
+};
+#endif
+
+struct pci_pbm_info {
+ struct pci_pbm_info *next;
+ struct pci_pbm_info *sibling;
+ int index;
+
+ /* Physical address base of controller registers. */
+ unsigned long controller_regs;
+
+ /* Physical address base of PBM registers. */
+ unsigned long pbm_regs;
+
+ /* Physical address of DMA sync register, if any. */
+ unsigned long sync_reg;
+
+ /* Opaque 32-bit system bus Port ID. */
+ u32 portid;
+
+ /* Opaque 32-bit handle used for hypervisor calls. */
+ u32 devhandle;
+
+ /* Chipset version information. */
+ int chip_type;
+#define PBM_CHIP_TYPE_SABRE 1
+#define PBM_CHIP_TYPE_PSYCHO 2
+#define PBM_CHIP_TYPE_SCHIZO 3
+#define PBM_CHIP_TYPE_SCHIZO_PLUS 4
+#define PBM_CHIP_TYPE_TOMATILLO 5
+ int chip_version;
+ int chip_revision;
+
+ /* Name used for top-level resources. */
+ char *name;
+
+ /* OBP specific information. */
+ struct of_device *op;
+ u64 ino_bitmap;
+
+ /* PBM I/O and Memory space resources. */
+ struct resource io_space;
+ struct resource mem_space;
+
+ /* Base of PCI Config space, can be per-PBM or shared. */
+ unsigned long config_space;
+
+ /* This will be 12 on PCI-E controllers, 8 elsewhere. */
+ unsigned long config_space_reg_bits;
+
+ unsigned long pci_afsr;
+ unsigned long pci_afar;
+ unsigned long pci_csr;
+
+ /* State of 66MHz capabilities on this PBM. */
+ int is_66mhz_capable;
+ int all_devs_66mhz;
+
+#ifdef CONFIG_PCI_MSI
+ /* MSI info. */
+ u32 msiq_num;
+ u32 msiq_ent_count;
+ u32 msiq_first;
+ u32 msiq_first_devino;
+ u32 msiq_rotor;
+ struct sparc64_msiq_cookie *msiq_irq_cookies;
+ u32 msi_num;
+ u32 msi_first;
+ u32 msi_data_mask;
+ u32 msix_data_width;
+ u64 msi32_start;
+ u64 msi64_start;
+ u32 msi32_len;
+ u32 msi64_len;
+ void *msi_queues;
+ unsigned long *msi_bitmap;
+ unsigned int *msi_irq_table;
+ int (*setup_msi_irq)(unsigned int *virt_irq_p, struct pci_dev *pdev,
+ struct msi_desc *entry);
+ void (*teardown_msi_irq)(unsigned int virt_irq, struct pci_dev *pdev);
+ const struct sparc64_msiq_ops *msi_ops;
+#endif /* !(CONFIG_PCI_MSI) */
+
+ /* This PBM's streaming buffer. */
+ struct strbuf stc;
+
+ /* IOMMU state, potentially shared by both PBM segments. */
+ struct iommu *iommu;
+
+ /* Now things for the actual PCI bus probes. */
+ unsigned int pci_first_busno;
+ unsigned int pci_last_busno;
+ struct pci_bus *pci_bus;
+ struct pci_ops *pci_ops;
+
+ int numa_node;
+};
+
+extern struct pci_pbm_info *pci_pbm_root;
+
+extern int pci_num_pbms;
+
+/* PCI bus scanning and fixup support. */
+extern void pci_get_pbm_props(struct pci_pbm_info *pbm);
+extern struct pci_bus *pci_scan_one_pbm(struct pci_pbm_info *pbm,
+ struct device *parent);
+extern void pci_determine_mem_io_space(struct pci_pbm_info *pbm);
+
+/* Error reporting support. */
+extern void pci_scan_for_target_abort(struct pci_pbm_info *, struct pci_bus *);
+extern void pci_scan_for_master_abort(struct pci_pbm_info *, struct pci_bus *);
+extern void pci_scan_for_parity_error(struct pci_pbm_info *, struct pci_bus *);
+
+/* Configuration space access. */
+extern void pci_config_read8(u8 *addr, u8 *ret);
+extern void pci_config_read16(u16 *addr, u16 *ret);
+extern void pci_config_read32(u32 *addr, u32 *ret);
+extern void pci_config_write8(u8 *addr, u8 val);
+extern void pci_config_write16(u16 *addr, u16 val);
+extern void pci_config_write32(u32 *addr, u32 val);
+
+extern struct pci_ops sun4u_pci_ops;
+extern struct pci_ops sun4v_pci_ops;
+
+extern volatile int pci_poke_in_progress;
+extern volatile int pci_poke_cpu;
+extern volatile int pci_poke_faulted;
+
+#endif /* !(PCI_IMPL_H) */
diff --git a/arch/sparc/kernel/pci_msi.c b/arch/sparc/kernel/pci_msi.c
new file mode 100644
index 000000000000..2e680f34f727
--- /dev/null
+++ b/arch/sparc/kernel/pci_msi.c
@@ -0,0 +1,447 @@
+/* pci_msi.c: Sparc64 MSI support common layer.
+ *
+ * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
+ */
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+
+#include "pci_impl.h"
+
+static irqreturn_t sparc64_msiq_interrupt(int irq, void *cookie)
+{
+ struct sparc64_msiq_cookie *msiq_cookie = cookie;
+ struct pci_pbm_info *pbm = msiq_cookie->pbm;
+ unsigned long msiqid = msiq_cookie->msiqid;
+ const struct sparc64_msiq_ops *ops;
+ unsigned long orig_head, head;
+ int err;
+
+ ops = pbm->msi_ops;
+
+ err = ops->get_head(pbm, msiqid, &head);
+ if (unlikely(err < 0))
+ goto err_get_head;
+
+ orig_head = head;
+ for (;;) {
+ unsigned long msi;
+
+ err = ops->dequeue_msi(pbm, msiqid, &head, &msi);
+ if (likely(err > 0)) {
+ struct irq_desc *desc;
+ unsigned int virt_irq;
+
+ virt_irq = pbm->msi_irq_table[msi - pbm->msi_first];
+ desc = irq_desc + virt_irq;
+
+ desc->handle_irq(virt_irq, desc);
+ }
+
+ if (unlikely(err < 0))
+ goto err_dequeue;
+
+ if (err == 0)
+ break;
+ }
+ if (likely(head != orig_head)) {
+ err = ops->set_head(pbm, msiqid, head);
+ if (unlikely(err < 0))
+ goto err_set_head;
+ }
+ return IRQ_HANDLED;
+
+err_get_head:
+ printk(KERN_EMERG "MSI: Get head on msiqid[%lu] gives error %d\n",
+ msiqid, err);
+ goto err_out;
+
+err_dequeue:
+ printk(KERN_EMERG "MSI: Dequeue head[%lu] from msiqid[%lu] "
+ "gives error %d\n",
+ head, msiqid, err);
+ goto err_out;
+
+err_set_head:
+ printk(KERN_EMERG "MSI: Set head[%lu] on msiqid[%lu] "
+ "gives error %d\n",
+ head, msiqid, err);
+ goto err_out;
+
+err_out:
+ return IRQ_NONE;
+}
+
+static u32 pick_msiq(struct pci_pbm_info *pbm)
+{
+ static DEFINE_SPINLOCK(rotor_lock);
+ unsigned long flags;
+ u32 ret, rotor;
+
+ spin_lock_irqsave(&rotor_lock, flags);
+
+ rotor = pbm->msiq_rotor;
+ ret = pbm->msiq_first + rotor;
+
+ if (++rotor >= pbm->msiq_num)
+ rotor = 0;
+ pbm->msiq_rotor = rotor;
+
+ spin_unlock_irqrestore(&rotor_lock, flags);
+
+ return ret;
+}
+
+
+static int alloc_msi(struct pci_pbm_info *pbm)
+{
+ int i;
+
+ for (i = 0; i < pbm->msi_num; i++) {
+ if (!test_and_set_bit(i, pbm->msi_bitmap))
+ return i + pbm->msi_first;
+ }
+
+ return -ENOENT;
+}
+
+static void free_msi(struct pci_pbm_info *pbm, int msi_num)
+{
+ msi_num -= pbm->msi_first;
+ clear_bit(msi_num, pbm->msi_bitmap);
+}
+
+static struct irq_chip msi_irq = {
+ .typename = "PCI-MSI",
+ .mask = mask_msi_irq,
+ .unmask = unmask_msi_irq,
+ .enable = unmask_msi_irq,
+ .disable = mask_msi_irq,
+ /* XXX affinity XXX */
+};
+
+static int sparc64_setup_msi_irq(unsigned int *virt_irq_p,
+ struct pci_dev *pdev,
+ struct msi_desc *entry)
+{
+ struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller;
+ const struct sparc64_msiq_ops *ops = pbm->msi_ops;
+ struct msi_msg msg;
+ int msi, err;
+ u32 msiqid;
+
+ *virt_irq_p = virt_irq_alloc(0, 0);
+ err = -ENOMEM;
+ if (!*virt_irq_p)
+ goto out_err;
+
+ set_irq_chip_and_handler_name(*virt_irq_p, &msi_irq,
+ handle_simple_irq, "MSI");
+
+ err = alloc_msi(pbm);
+ if (unlikely(err < 0))
+ goto out_virt_irq_free;
+
+ msi = err;
+
+ msiqid = pick_msiq(pbm);
+
+ err = ops->msi_setup(pbm, msiqid, msi,
+ (entry->msi_attrib.is_64 ? 1 : 0));
+ if (err)
+ goto out_msi_free;
+
+ pbm->msi_irq_table[msi - pbm->msi_first] = *virt_irq_p;
+
+ if (entry->msi_attrib.is_64) {
+ msg.address_hi = pbm->msi64_start >> 32;
+ msg.address_lo = pbm->msi64_start & 0xffffffff;
+ } else {
+ msg.address_hi = 0;
+ msg.address_lo = pbm->msi32_start;
+ }
+ msg.data = msi;
+
+ set_irq_msi(*virt_irq_p, entry);
+ write_msi_msg(*virt_irq_p, &msg);
+
+ return 0;
+
+out_msi_free:
+ free_msi(pbm, msi);
+
+out_virt_irq_free:
+ set_irq_chip(*virt_irq_p, NULL);
+ virt_irq_free(*virt_irq_p);
+ *virt_irq_p = 0;
+
+out_err:
+ return err;
+}
+
+static void sparc64_teardown_msi_irq(unsigned int virt_irq,
+ struct pci_dev *pdev)
+{
+ struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller;
+ const struct sparc64_msiq_ops *ops = pbm->msi_ops;
+ unsigned int msi_num;
+ int i, err;
+
+ for (i = 0; i < pbm->msi_num; i++) {
+ if (pbm->msi_irq_table[i] == virt_irq)
+ break;
+ }
+ if (i >= pbm->msi_num) {
+ printk(KERN_ERR "%s: teardown: No MSI for irq %u\n",
+ pbm->name, virt_irq);
+ return;
+ }
+
+ msi_num = pbm->msi_first + i;
+ pbm->msi_irq_table[i] = ~0U;
+
+ err = ops->msi_teardown(pbm, msi_num);
+ if (err) {
+ printk(KERN_ERR "%s: teardown: ops->teardown() on MSI %u, "
+ "irq %u, gives error %d\n",
+ pbm->name, msi_num, virt_irq, err);
+ return;
+ }
+
+ free_msi(pbm, msi_num);
+
+ set_irq_chip(virt_irq, NULL);
+ virt_irq_free(virt_irq);
+}
+
+static int msi_bitmap_alloc(struct pci_pbm_info *pbm)
+{
+ unsigned long size, bits_per_ulong;
+
+ bits_per_ulong = sizeof(unsigned long) * 8;
+ size = (pbm->msi_num + (bits_per_ulong - 1)) & ~(bits_per_ulong - 1);
+ size /= 8;
+ BUG_ON(size % sizeof(unsigned long));
+
+ pbm->msi_bitmap = kzalloc(size, GFP_KERNEL);
+ if (!pbm->msi_bitmap)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void msi_bitmap_free(struct pci_pbm_info *pbm)
+{
+ kfree(pbm->msi_bitmap);
+ pbm->msi_bitmap = NULL;
+}
+
+static int msi_table_alloc(struct pci_pbm_info *pbm)
+{
+ int size, i;
+
+ size = pbm->msiq_num * sizeof(struct sparc64_msiq_cookie);
+ pbm->msiq_irq_cookies = kzalloc(size, GFP_KERNEL);
+ if (!pbm->msiq_irq_cookies)
+ return -ENOMEM;
+
+ for (i = 0; i < pbm->msiq_num; i++) {
+ struct sparc64_msiq_cookie *p;
+
+ p = &pbm->msiq_irq_cookies[i];
+ p->pbm = pbm;
+ p->msiqid = pbm->msiq_first + i;
+ }
+
+ size = pbm->msi_num * sizeof(unsigned int);
+ pbm->msi_irq_table = kzalloc(size, GFP_KERNEL);
+ if (!pbm->msi_irq_table) {
+ kfree(pbm->msiq_irq_cookies);
+ pbm->msiq_irq_cookies = NULL;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void msi_table_free(struct pci_pbm_info *pbm)
+{
+ kfree(pbm->msiq_irq_cookies);
+ pbm->msiq_irq_cookies = NULL;
+
+ kfree(pbm->msi_irq_table);
+ pbm->msi_irq_table = NULL;
+}
+
+static int bringup_one_msi_queue(struct pci_pbm_info *pbm,
+ const struct sparc64_msiq_ops *ops,
+ unsigned long msiqid,
+ unsigned long devino)
+{
+ int irq = ops->msiq_build_irq(pbm, msiqid, devino);
+ int err, nid;
+
+ if (irq < 0)
+ return irq;
+
+ nid = pbm->numa_node;
+ if (nid != -1) {
+ cpumask_t numa_mask = node_to_cpumask(nid);
+
+ irq_set_affinity(irq, numa_mask);
+ }
+ err = request_irq(irq, sparc64_msiq_interrupt, 0,
+ "MSIQ",
+ &pbm->msiq_irq_cookies[msiqid - pbm->msiq_first]);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int sparc64_bringup_msi_queues(struct pci_pbm_info *pbm,
+ const struct sparc64_msiq_ops *ops)
+{
+ int i;
+
+ for (i = 0; i < pbm->msiq_num; i++) {
+ unsigned long msiqid = i + pbm->msiq_first;
+ unsigned long devino = i + pbm->msiq_first_devino;
+ int err;
+
+ err = bringup_one_msi_queue(pbm, ops, msiqid, devino);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+void sparc64_pbm_msi_init(struct pci_pbm_info *pbm,
+ const struct sparc64_msiq_ops *ops)
+{
+ const u32 *val;
+ int len;
+
+ val = of_get_property(pbm->op->node, "#msi-eqs", &len);
+ if (!val || len != 4)
+ goto no_msi;
+ pbm->msiq_num = *val;
+ if (pbm->msiq_num) {
+ const struct msiq_prop {
+ u32 first_msiq;
+ u32 num_msiq;
+ u32 first_devino;
+ } *mqp;
+ const struct msi_range_prop {
+ u32 first_msi;
+ u32 num_msi;
+ } *mrng;
+ const struct addr_range_prop {
+ u32 msi32_high;
+ u32 msi32_low;
+ u32 msi32_len;
+ u32 msi64_high;
+ u32 msi64_low;
+ u32 msi64_len;
+ } *arng;
+
+ val = of_get_property(pbm->op->node, "msi-eq-size", &len);
+ if (!val || len != 4)
+ goto no_msi;
+
+ pbm->msiq_ent_count = *val;
+
+ mqp = of_get_property(pbm->op->node,
+ "msi-eq-to-devino", &len);
+ if (!mqp)
+ mqp = of_get_property(pbm->op->node,
+ "msi-eq-devino", &len);
+ if (!mqp || len != sizeof(struct msiq_prop))
+ goto no_msi;
+
+ pbm->msiq_first = mqp->first_msiq;
+ pbm->msiq_first_devino = mqp->first_devino;
+
+ val = of_get_property(pbm->op->node, "#msi", &len);
+ if (!val || len != 4)
+ goto no_msi;
+ pbm->msi_num = *val;
+
+ mrng = of_get_property(pbm->op->node, "msi-ranges", &len);
+ if (!mrng || len != sizeof(struct msi_range_prop))
+ goto no_msi;
+ pbm->msi_first = mrng->first_msi;
+
+ val = of_get_property(pbm->op->node, "msi-data-mask", &len);
+ if (!val || len != 4)
+ goto no_msi;
+ pbm->msi_data_mask = *val;
+
+ val = of_get_property(pbm->op->node, "msix-data-width", &len);
+ if (!val || len != 4)
+ goto no_msi;
+ pbm->msix_data_width = *val;
+
+ arng = of_get_property(pbm->op->node, "msi-address-ranges",
+ &len);
+ if (!arng || len != sizeof(struct addr_range_prop))
+ goto no_msi;
+ pbm->msi32_start = ((u64)arng->msi32_high << 32) |
+ (u64) arng->msi32_low;
+ pbm->msi64_start = ((u64)arng->msi64_high << 32) |
+ (u64) arng->msi64_low;
+ pbm->msi32_len = arng->msi32_len;
+ pbm->msi64_len = arng->msi64_len;
+
+ if (msi_bitmap_alloc(pbm))
+ goto no_msi;
+
+ if (msi_table_alloc(pbm)) {
+ msi_bitmap_free(pbm);
+ goto no_msi;
+ }
+
+ if (ops->msiq_alloc(pbm)) {
+ msi_table_free(pbm);
+ msi_bitmap_free(pbm);
+ goto no_msi;
+ }
+
+ if (sparc64_bringup_msi_queues(pbm, ops)) {
+ ops->msiq_free(pbm);
+ msi_table_free(pbm);
+ msi_bitmap_free(pbm);
+ goto no_msi;
+ }
+
+ printk(KERN_INFO "%s: MSI Queue first[%u] num[%u] count[%u] "
+ "devino[0x%x]\n",
+ pbm->name,
+ pbm->msiq_first, pbm->msiq_num,
+ pbm->msiq_ent_count,
+ pbm->msiq_first_devino);
+ printk(KERN_INFO "%s: MSI first[%u] num[%u] mask[0x%x] "
+ "width[%u]\n",
+ pbm->name,
+ pbm->msi_first, pbm->msi_num, pbm->msi_data_mask,
+ pbm->msix_data_width);
+ printk(KERN_INFO "%s: MSI addr32[0x%lx:0x%x] "
+ "addr64[0x%lx:0x%x]\n",
+ pbm->name,
+ pbm->msi32_start, pbm->msi32_len,
+ pbm->msi64_start, pbm->msi64_len);
+ printk(KERN_INFO "%s: MSI queues at RA [%016lx]\n",
+ pbm->name,
+ __pa(pbm->msi_queues));
+
+ pbm->msi_ops = ops;
+ pbm->setup_msi_irq = sparc64_setup_msi_irq;
+ pbm->teardown_msi_irq = sparc64_teardown_msi_irq;
+ }
+ return;
+
+no_msi:
+ pbm->msiq_num = 0;
+ printk(KERN_INFO "%s: No MSI support.\n", pbm->name);
+}
diff --git a/arch/sparc/kernel/pci_psycho.c b/arch/sparc/kernel/pci_psycho.c
new file mode 100644
index 000000000000..dfb3ec892987
--- /dev/null
+++ b/arch/sparc/kernel/pci_psycho.c
@@ -0,0 +1,618 @@
+/* pci_psycho.c: PSYCHO/U2P specific PCI controller support.
+ *
+ * Copyright (C) 1997, 1998, 1999, 2007 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1998, 1999 Eddie C. Dost (ecd@skynet.be)
+ * Copyright (C) 1999 Jakub Jelinek (jakub@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/of_device.h>
+
+#include <asm/iommu.h>
+#include <asm/irq.h>
+#include <asm/starfire.h>
+#include <asm/prom.h>
+#include <asm/upa.h>
+
+#include "pci_impl.h"
+#include "iommu_common.h"
+#include "psycho_common.h"
+
+#define DRIVER_NAME "psycho"
+#define PFX DRIVER_NAME ": "
+
+/* Misc. PSYCHO PCI controller register offsets and definitions. */
+#define PSYCHO_CONTROL 0x0010UL
+#define PSYCHO_CONTROL_IMPL 0xf000000000000000UL /* Implementation of this PSYCHO*/
+#define PSYCHO_CONTROL_VER 0x0f00000000000000UL /* Version of this PSYCHO */
+#define PSYCHO_CONTROL_MID 0x00f8000000000000UL /* UPA Module ID of PSYCHO */
+#define PSYCHO_CONTROL_IGN 0x0007c00000000000UL /* Interrupt Group Number */
+#define PSYCHO_CONTROL_RESV 0x00003ffffffffff0UL /* Reserved */
+#define PSYCHO_CONTROL_APCKEN 0x0000000000000008UL /* Address Parity Check Enable */
+#define PSYCHO_CONTROL_APERR 0x0000000000000004UL /* Incoming System Addr Parerr */
+#define PSYCHO_CONTROL_IAP 0x0000000000000002UL /* Invert UPA Parity */
+#define PSYCHO_CONTROL_MODE 0x0000000000000001UL /* PSYCHO clock mode */
+#define PSYCHO_PCIA_CTRL 0x2000UL
+#define PSYCHO_PCIB_CTRL 0x4000UL
+#define PSYCHO_PCICTRL_RESV1 0xfffffff000000000UL /* Reserved */
+#define PSYCHO_PCICTRL_SBH_ERR 0x0000000800000000UL /* Streaming byte hole error */
+#define PSYCHO_PCICTRL_SERR 0x0000000400000000UL /* SERR signal asserted */
+#define PSYCHO_PCICTRL_SPEED 0x0000000200000000UL /* PCI speed (1 is U2P clock) */
+#define PSYCHO_PCICTRL_RESV2 0x00000001ffc00000UL /* Reserved */
+#define PSYCHO_PCICTRL_ARB_PARK 0x0000000000200000UL /* PCI arbitration parking */
+#define PSYCHO_PCICTRL_RESV3 0x00000000001ff800UL /* Reserved */
+#define PSYCHO_PCICTRL_SBH_INT 0x0000000000000400UL /* Streaming byte hole int enab */
+#define PSYCHO_PCICTRL_WEN 0x0000000000000200UL /* Power Mgmt Wake Enable */
+#define PSYCHO_PCICTRL_EEN 0x0000000000000100UL /* PCI Error Interrupt Enable */
+#define PSYCHO_PCICTRL_RESV4 0x00000000000000c0UL /* Reserved */
+#define PSYCHO_PCICTRL_AEN 0x000000000000003fUL /* PCI DVMA Arbitration Enable */
+
+/* PSYCHO error handling support. */
+
+/* Helper function of IOMMU error checking, which checks out
+ * the state of the streaming buffers. The IOMMU lock is
+ * held when this is called.
+ *
+ * For the PCI error case we know which PBM (and thus which
+ * streaming buffer) caused the error, but for the uncorrectable
+ * error case we do not. So we always check both streaming caches.
+ */
+#define PSYCHO_STRBUF_CONTROL_A 0x2800UL
+#define PSYCHO_STRBUF_CONTROL_B 0x4800UL
+#define PSYCHO_STRBUF_CTRL_LPTR 0x00000000000000f0UL /* LRU Lock Pointer */
+#define PSYCHO_STRBUF_CTRL_LENAB 0x0000000000000008UL /* LRU Lock Enable */
+#define PSYCHO_STRBUF_CTRL_RRDIS 0x0000000000000004UL /* Rerun Disable */
+#define PSYCHO_STRBUF_CTRL_DENAB 0x0000000000000002UL /* Diagnostic Mode Enable */
+#define PSYCHO_STRBUF_CTRL_ENAB 0x0000000000000001UL /* Streaming Buffer Enable */
+#define PSYCHO_STRBUF_FLUSH_A 0x2808UL
+#define PSYCHO_STRBUF_FLUSH_B 0x4808UL
+#define PSYCHO_STRBUF_FSYNC_A 0x2810UL
+#define PSYCHO_STRBUF_FSYNC_B 0x4810UL
+#define PSYCHO_STC_DATA_A 0xb000UL
+#define PSYCHO_STC_DATA_B 0xc000UL
+#define PSYCHO_STC_ERR_A 0xb400UL
+#define PSYCHO_STC_ERR_B 0xc400UL
+#define PSYCHO_STC_TAG_A 0xb800UL
+#define PSYCHO_STC_TAG_B 0xc800UL
+#define PSYCHO_STC_LINE_A 0xb900UL
+#define PSYCHO_STC_LINE_B 0xc900UL
+
+/* When an Uncorrectable Error or a PCI Error happens, we
+ * interrogate the IOMMU state to see if it is the cause.
+ */
+#define PSYCHO_IOMMU_CONTROL 0x0200UL
+#define PSYCHO_IOMMU_CTRL_RESV 0xfffffffff9000000UL /* Reserved */
+#define PSYCHO_IOMMU_CTRL_XLTESTAT 0x0000000006000000UL /* Translation Error Status */
+#define PSYCHO_IOMMU_CTRL_XLTEERR 0x0000000001000000UL /* Translation Error encountered */
+#define PSYCHO_IOMMU_CTRL_LCKEN 0x0000000000800000UL /* Enable translation locking */
+#define PSYCHO_IOMMU_CTRL_LCKPTR 0x0000000000780000UL /* Translation lock pointer */
+#define PSYCHO_IOMMU_CTRL_TSBSZ 0x0000000000070000UL /* TSB Size */
+#define PSYCHO_IOMMU_TSBSZ_1K 0x0000000000000000UL /* TSB Table 1024 8-byte entries */
+#define PSYCHO_IOMMU_TSBSZ_2K 0x0000000000010000UL /* TSB Table 2048 8-byte entries */
+#define PSYCHO_IOMMU_TSBSZ_4K 0x0000000000020000UL /* TSB Table 4096 8-byte entries */
+#define PSYCHO_IOMMU_TSBSZ_8K 0x0000000000030000UL /* TSB Table 8192 8-byte entries */
+#define PSYCHO_IOMMU_TSBSZ_16K 0x0000000000040000UL /* TSB Table 16k 8-byte entries */
+#define PSYCHO_IOMMU_TSBSZ_32K 0x0000000000050000UL /* TSB Table 32k 8-byte entries */
+#define PSYCHO_IOMMU_TSBSZ_64K 0x0000000000060000UL /* TSB Table 64k 8-byte entries */
+#define PSYCHO_IOMMU_TSBSZ_128K 0x0000000000070000UL /* TSB Table 128k 8-byte entries */
+#define PSYCHO_IOMMU_CTRL_RESV2 0x000000000000fff8UL /* Reserved */
+#define PSYCHO_IOMMU_CTRL_TBWSZ 0x0000000000000004UL /* Assumed page size, 0=8k 1=64k */
+#define PSYCHO_IOMMU_CTRL_DENAB 0x0000000000000002UL /* Diagnostic mode enable */
+#define PSYCHO_IOMMU_CTRL_ENAB 0x0000000000000001UL /* IOMMU Enable */
+#define PSYCHO_IOMMU_TSBBASE 0x0208UL
+#define PSYCHO_IOMMU_FLUSH 0x0210UL
+#define PSYCHO_IOMMU_TAG 0xa580UL
+#define PSYCHO_IOMMU_DATA 0xa600UL
+
+/* Uncorrectable Errors. Cause of the error and the address are
+ * recorded in the UE_AFSR and UE_AFAR of PSYCHO. They are errors
+ * relating to UPA interface transactions.
+ */
+#define PSYCHO_UE_AFSR 0x0030UL
+#define PSYCHO_UEAFSR_PPIO 0x8000000000000000UL /* Primary PIO is cause */
+#define PSYCHO_UEAFSR_PDRD 0x4000000000000000UL /* Primary DVMA read is cause */
+#define PSYCHO_UEAFSR_PDWR 0x2000000000000000UL /* Primary DVMA write is cause */
+#define PSYCHO_UEAFSR_SPIO 0x1000000000000000UL /* Secondary PIO is cause */
+#define PSYCHO_UEAFSR_SDRD 0x0800000000000000UL /* Secondary DVMA read is cause */
+#define PSYCHO_UEAFSR_SDWR 0x0400000000000000UL /* Secondary DVMA write is cause*/
+#define PSYCHO_UEAFSR_RESV1 0x03ff000000000000UL /* Reserved */
+#define PSYCHO_UEAFSR_BMSK 0x0000ffff00000000UL /* Bytemask of failed transfer */
+#define PSYCHO_UEAFSR_DOFF 0x00000000e0000000UL /* Doubleword Offset */
+#define PSYCHO_UEAFSR_MID 0x000000001f000000UL /* UPA MID causing the fault */
+#define PSYCHO_UEAFSR_BLK 0x0000000000800000UL /* Trans was block operation */
+#define PSYCHO_UEAFSR_RESV2 0x00000000007fffffUL /* Reserved */
+#define PSYCHO_UE_AFAR 0x0038UL
+
+static irqreturn_t psycho_ue_intr(int irq, void *dev_id)
+{
+ struct pci_pbm_info *pbm = dev_id;
+ unsigned long afsr_reg = pbm->controller_regs + PSYCHO_UE_AFSR;
+ unsigned long afar_reg = pbm->controller_regs + PSYCHO_UE_AFAR;
+ unsigned long afsr, afar, error_bits;
+ int reported;
+
+ /* Latch uncorrectable error status. */
+ afar = upa_readq(afar_reg);
+ afsr = upa_readq(afsr_reg);
+
+ /* Clear the primary/secondary error status bits. */
+ error_bits = afsr &
+ (PSYCHO_UEAFSR_PPIO | PSYCHO_UEAFSR_PDRD | PSYCHO_UEAFSR_PDWR |
+ PSYCHO_UEAFSR_SPIO | PSYCHO_UEAFSR_SDRD | PSYCHO_UEAFSR_SDWR);
+ if (!error_bits)
+ return IRQ_NONE;
+ upa_writeq(error_bits, afsr_reg);
+
+ /* Log the error. */
+ printk("%s: Uncorrectable Error, primary error type[%s]\n",
+ pbm->name,
+ (((error_bits & PSYCHO_UEAFSR_PPIO) ?
+ "PIO" :
+ ((error_bits & PSYCHO_UEAFSR_PDRD) ?
+ "DMA Read" :
+ ((error_bits & PSYCHO_UEAFSR_PDWR) ?
+ "DMA Write" : "???")))));
+ printk("%s: bytemask[%04lx] dword_offset[%lx] UPA_MID[%02lx] was_block(%d)\n",
+ pbm->name,
+ (afsr & PSYCHO_UEAFSR_BMSK) >> 32UL,
+ (afsr & PSYCHO_UEAFSR_DOFF) >> 29UL,
+ (afsr & PSYCHO_UEAFSR_MID) >> 24UL,
+ ((afsr & PSYCHO_UEAFSR_BLK) ? 1 : 0));
+ printk("%s: UE AFAR [%016lx]\n", pbm->name, afar);
+ printk("%s: UE Secondary errors [", pbm->name);
+ reported = 0;
+ if (afsr & PSYCHO_UEAFSR_SPIO) {
+ reported++;
+ printk("(PIO)");
+ }
+ if (afsr & PSYCHO_UEAFSR_SDRD) {
+ reported++;
+ printk("(DMA Read)");
+ }
+ if (afsr & PSYCHO_UEAFSR_SDWR) {
+ reported++;
+ printk("(DMA Write)");
+ }
+ if (!reported)
+ printk("(none)");
+ printk("]\n");
+
+ /* Interrogate both IOMMUs for error status. */
+ psycho_check_iommu_error(pbm, afsr, afar, UE_ERR);
+ if (pbm->sibling)
+ psycho_check_iommu_error(pbm->sibling, afsr, afar, UE_ERR);
+
+ return IRQ_HANDLED;
+}
+
+/* Correctable Errors. */
+#define PSYCHO_CE_AFSR 0x0040UL
+#define PSYCHO_CEAFSR_PPIO 0x8000000000000000UL /* Primary PIO is cause */
+#define PSYCHO_CEAFSR_PDRD 0x4000000000000000UL /* Primary DVMA read is cause */
+#define PSYCHO_CEAFSR_PDWR 0x2000000000000000UL /* Primary DVMA write is cause */
+#define PSYCHO_CEAFSR_SPIO 0x1000000000000000UL /* Secondary PIO is cause */
+#define PSYCHO_CEAFSR_SDRD 0x0800000000000000UL /* Secondary DVMA read is cause */
+#define PSYCHO_CEAFSR_SDWR 0x0400000000000000UL /* Secondary DVMA write is cause*/
+#define PSYCHO_CEAFSR_RESV1 0x0300000000000000UL /* Reserved */
+#define PSYCHO_CEAFSR_ESYND 0x00ff000000000000UL /* Syndrome Bits */
+#define PSYCHO_CEAFSR_BMSK 0x0000ffff00000000UL /* Bytemask of failed transfer */
+#define PSYCHO_CEAFSR_DOFF 0x00000000e0000000UL /* Double Offset */
+#define PSYCHO_CEAFSR_MID 0x000000001f000000UL /* UPA MID causing the fault */
+#define PSYCHO_CEAFSR_BLK 0x0000000000800000UL /* Trans was block operation */
+#define PSYCHO_CEAFSR_RESV2 0x00000000007fffffUL /* Reserved */
+#define PSYCHO_CE_AFAR 0x0040UL
+
+static irqreturn_t psycho_ce_intr(int irq, void *dev_id)
+{
+ struct pci_pbm_info *pbm = dev_id;
+ unsigned long afsr_reg = pbm->controller_regs + PSYCHO_CE_AFSR;
+ unsigned long afar_reg = pbm->controller_regs + PSYCHO_CE_AFAR;
+ unsigned long afsr, afar, error_bits;
+ int reported;
+
+ /* Latch error status. */
+ afar = upa_readq(afar_reg);
+ afsr = upa_readq(afsr_reg);
+
+ /* Clear primary/secondary error status bits. */
+ error_bits = afsr &
+ (PSYCHO_CEAFSR_PPIO | PSYCHO_CEAFSR_PDRD | PSYCHO_CEAFSR_PDWR |
+ PSYCHO_CEAFSR_SPIO | PSYCHO_CEAFSR_SDRD | PSYCHO_CEAFSR_SDWR);
+ if (!error_bits)
+ return IRQ_NONE;
+ upa_writeq(error_bits, afsr_reg);
+
+ /* Log the error. */
+ printk("%s: Correctable Error, primary error type[%s]\n",
+ pbm->name,
+ (((error_bits & PSYCHO_CEAFSR_PPIO) ?
+ "PIO" :
+ ((error_bits & PSYCHO_CEAFSR_PDRD) ?
+ "DMA Read" :
+ ((error_bits & PSYCHO_CEAFSR_PDWR) ?
+ "DMA Write" : "???")))));
+
+ /* XXX Use syndrome and afar to print out module string just like
+ * XXX UDB CE trap handler does... -DaveM
+ */
+ printk("%s: syndrome[%02lx] bytemask[%04lx] dword_offset[%lx] "
+ "UPA_MID[%02lx] was_block(%d)\n",
+ pbm->name,
+ (afsr & PSYCHO_CEAFSR_ESYND) >> 48UL,
+ (afsr & PSYCHO_CEAFSR_BMSK) >> 32UL,
+ (afsr & PSYCHO_CEAFSR_DOFF) >> 29UL,
+ (afsr & PSYCHO_CEAFSR_MID) >> 24UL,
+ ((afsr & PSYCHO_CEAFSR_BLK) ? 1 : 0));
+ printk("%s: CE AFAR [%016lx]\n", pbm->name, afar);
+ printk("%s: CE Secondary errors [", pbm->name);
+ reported = 0;
+ if (afsr & PSYCHO_CEAFSR_SPIO) {
+ reported++;
+ printk("(PIO)");
+ }
+ if (afsr & PSYCHO_CEAFSR_SDRD) {
+ reported++;
+ printk("(DMA Read)");
+ }
+ if (afsr & PSYCHO_CEAFSR_SDWR) {
+ reported++;
+ printk("(DMA Write)");
+ }
+ if (!reported)
+ printk("(none)");
+ printk("]\n");
+
+ return IRQ_HANDLED;
+}
+
+/* PCI Errors. They are signalled by the PCI bus module since they
+ * are associated with a specific bus segment.
+ */
+#define PSYCHO_PCI_AFSR_A 0x2010UL
+#define PSYCHO_PCI_AFSR_B 0x4010UL
+#define PSYCHO_PCI_AFAR_A 0x2018UL
+#define PSYCHO_PCI_AFAR_B 0x4018UL
+
+/* XXX What about PowerFail/PowerManagement??? -DaveM */
+#define PSYCHO_ECC_CTRL 0x0020
+#define PSYCHO_ECCCTRL_EE 0x8000000000000000UL /* Enable ECC Checking */
+#define PSYCHO_ECCCTRL_UE 0x4000000000000000UL /* Enable UE Interrupts */
+#define PSYCHO_ECCCTRL_CE 0x2000000000000000UL /* Enable CE INterrupts */
+static void psycho_register_error_handlers(struct pci_pbm_info *pbm)
+{
+ struct of_device *op = of_find_device_by_node(pbm->op->node);
+ unsigned long base = pbm->controller_regs;
+ u64 tmp;
+ int err;
+
+ if (!op)
+ return;
+
+ /* Psycho interrupt property order is:
+ * 0: PCIERR INO for this PBM
+ * 1: UE ERR
+ * 2: CE ERR
+ * 3: POWER FAIL
+ * 4: SPARE HARDWARE
+ * 5: POWER MANAGEMENT
+ */
+
+ if (op->num_irqs < 6)
+ return;
+
+ /* We really mean to ignore the return result here. Two
+ * PCI controller share the same interrupt numbers and
+ * drive the same front-end hardware. Whichever of the
+ * two get in here first will register the IRQ handler
+ * the second will just error out since we do not pass in
+ * IRQF_SHARED.
+ */
+ err = request_irq(op->irqs[1], psycho_ue_intr, IRQF_SHARED,
+ "PSYCHO_UE", pbm);
+ err = request_irq(op->irqs[2], psycho_ce_intr, IRQF_SHARED,
+ "PSYCHO_CE", pbm);
+
+ /* This one, however, ought not to fail. We can just warn
+ * about it since the system can still operate properly even
+ * if this fails.
+ */
+ err = request_irq(op->irqs[0], psycho_pcierr_intr, IRQF_SHARED,
+ "PSYCHO_PCIERR", pbm);
+ if (err)
+ printk(KERN_WARNING "%s: Could not register PCIERR, "
+ "err=%d\n", pbm->name, err);
+
+ /* Enable UE and CE interrupts for controller. */
+ upa_writeq((PSYCHO_ECCCTRL_EE |
+ PSYCHO_ECCCTRL_UE |
+ PSYCHO_ECCCTRL_CE), base + PSYCHO_ECC_CTRL);
+
+ /* Enable PCI Error interrupts and clear error
+ * bits for each PBM.
+ */
+ tmp = upa_readq(base + PSYCHO_PCIA_CTRL);
+ tmp |= (PSYCHO_PCICTRL_SERR |
+ PSYCHO_PCICTRL_SBH_ERR |
+ PSYCHO_PCICTRL_EEN);
+ tmp &= ~(PSYCHO_PCICTRL_SBH_INT);
+ upa_writeq(tmp, base + PSYCHO_PCIA_CTRL);
+
+ tmp = upa_readq(base + PSYCHO_PCIB_CTRL);
+ tmp |= (PSYCHO_PCICTRL_SERR |
+ PSYCHO_PCICTRL_SBH_ERR |
+ PSYCHO_PCICTRL_EEN);
+ tmp &= ~(PSYCHO_PCICTRL_SBH_INT);
+ upa_writeq(tmp, base + PSYCHO_PCIB_CTRL);
+}
+
+/* PSYCHO boot time probing and initialization. */
+static void pbm_config_busmastering(struct pci_pbm_info *pbm)
+{
+ u8 *addr;
+
+ /* Set cache-line size to 64 bytes, this is actually
+ * a nop but I do it for completeness.
+ */
+ addr = psycho_pci_config_mkaddr(pbm, pbm->pci_first_busno,
+ 0, PCI_CACHE_LINE_SIZE);
+ pci_config_write8(addr, 64 / sizeof(u32));
+
+ /* Set PBM latency timer to 64 PCI clocks. */
+ addr = psycho_pci_config_mkaddr(pbm, pbm->pci_first_busno,
+ 0, PCI_LATENCY_TIMER);
+ pci_config_write8(addr, 64);
+}
+
+static void __init psycho_scan_bus(struct pci_pbm_info *pbm,
+ struct device *parent)
+{
+ pbm_config_busmastering(pbm);
+ pbm->is_66mhz_capable = 0;
+ pbm->pci_bus = pci_scan_one_pbm(pbm, parent);
+
+ /* After the PCI bus scan is complete, we can register
+ * the error interrupt handlers.
+ */
+ psycho_register_error_handlers(pbm);
+}
+
+#define PSYCHO_IRQ_RETRY 0x1a00UL
+#define PSYCHO_PCIA_DIAG 0x2020UL
+#define PSYCHO_PCIB_DIAG 0x4020UL
+#define PSYCHO_PCIDIAG_RESV 0xffffffffffffff80UL /* Reserved */
+#define PSYCHO_PCIDIAG_DRETRY 0x0000000000000040UL /* Disable retry limit */
+#define PSYCHO_PCIDIAG_DISYNC 0x0000000000000020UL /* Disable DMA wr / irq sync */
+#define PSYCHO_PCIDIAG_DDWSYNC 0x0000000000000010UL /* Disable DMA wr / PIO rd sync */
+#define PSYCHO_PCIDIAG_IDDPAR 0x0000000000000008UL /* Invert DMA data parity */
+#define PSYCHO_PCIDIAG_IPDPAR 0x0000000000000004UL /* Invert PIO data parity */
+#define PSYCHO_PCIDIAG_IPAPAR 0x0000000000000002UL /* Invert PIO address parity */
+#define PSYCHO_PCIDIAG_LPBACK 0x0000000000000001UL /* Enable loopback mode */
+
+static void psycho_controller_hwinit(struct pci_pbm_info *pbm)
+{
+ u64 tmp;
+
+ upa_writeq(5, pbm->controller_regs + PSYCHO_IRQ_RETRY);
+
+ /* Enable arbiter for all PCI slots. */
+ tmp = upa_readq(pbm->controller_regs + PSYCHO_PCIA_CTRL);
+ tmp |= PSYCHO_PCICTRL_AEN;
+ upa_writeq(tmp, pbm->controller_regs + PSYCHO_PCIA_CTRL);
+
+ tmp = upa_readq(pbm->controller_regs + PSYCHO_PCIB_CTRL);
+ tmp |= PSYCHO_PCICTRL_AEN;
+ upa_writeq(tmp, pbm->controller_regs + PSYCHO_PCIB_CTRL);
+
+ /* Disable DMA write / PIO read synchronization on
+ * both PCI bus segments.
+ * [ U2P Erratum 1243770, STP2223BGA data sheet ]
+ */
+ tmp = upa_readq(pbm->controller_regs + PSYCHO_PCIA_DIAG);
+ tmp |= PSYCHO_PCIDIAG_DDWSYNC;
+ upa_writeq(tmp, pbm->controller_regs + PSYCHO_PCIA_DIAG);
+
+ tmp = upa_readq(pbm->controller_regs + PSYCHO_PCIB_DIAG);
+ tmp |= PSYCHO_PCIDIAG_DDWSYNC;
+ upa_writeq(tmp, pbm->controller_regs + PSYCHO_PCIB_DIAG);
+}
+
+static void psycho_pbm_strbuf_init(struct pci_pbm_info *pbm,
+ int is_pbm_a)
+{
+ unsigned long base = pbm->controller_regs;
+ u64 control;
+
+ if (is_pbm_a) {
+ pbm->stc.strbuf_control = base + PSYCHO_STRBUF_CONTROL_A;
+ pbm->stc.strbuf_pflush = base + PSYCHO_STRBUF_FLUSH_A;
+ pbm->stc.strbuf_fsync = base + PSYCHO_STRBUF_FSYNC_A;
+ pbm->stc.strbuf_err_stat = base + PSYCHO_STC_ERR_A;
+ pbm->stc.strbuf_tag_diag = base + PSYCHO_STC_TAG_A;
+ pbm->stc.strbuf_line_diag= base + PSYCHO_STC_LINE_A;
+ } else {
+ pbm->stc.strbuf_control = base + PSYCHO_STRBUF_CONTROL_B;
+ pbm->stc.strbuf_pflush = base + PSYCHO_STRBUF_FLUSH_B;
+ pbm->stc.strbuf_fsync = base + PSYCHO_STRBUF_FSYNC_B;
+ pbm->stc.strbuf_err_stat = base + PSYCHO_STC_ERR_B;
+ pbm->stc.strbuf_tag_diag = base + PSYCHO_STC_TAG_B;
+ pbm->stc.strbuf_line_diag= base + PSYCHO_STC_LINE_B;
+ }
+ /* PSYCHO's streaming buffer lacks ctx flushing. */
+ pbm->stc.strbuf_ctxflush = 0;
+ pbm->stc.strbuf_ctxmatch_base = 0;
+
+ pbm->stc.strbuf_flushflag = (volatile unsigned long *)
+ ((((unsigned long)&pbm->stc.__flushflag_buf[0])
+ + 63UL)
+ & ~63UL);
+ pbm->stc.strbuf_flushflag_pa = (unsigned long)
+ __pa(pbm->stc.strbuf_flushflag);
+
+ /* Enable the streaming buffer. We have to be careful
+ * just in case OBP left it with LRU locking enabled.
+ *
+ * It is possible to control if PBM will be rerun on
+ * line misses. Currently I just retain whatever setting
+ * OBP left us with. All checks so far show it having
+ * a value of zero.
+ */
+#undef PSYCHO_STRBUF_RERUN_ENABLE
+#undef PSYCHO_STRBUF_RERUN_DISABLE
+ control = upa_readq(pbm->stc.strbuf_control);
+ control |= PSYCHO_STRBUF_CTRL_ENAB;
+ control &= ~(PSYCHO_STRBUF_CTRL_LENAB | PSYCHO_STRBUF_CTRL_LPTR);
+#ifdef PSYCHO_STRBUF_RERUN_ENABLE
+ control &= ~(PSYCHO_STRBUF_CTRL_RRDIS);
+#else
+#ifdef PSYCHO_STRBUF_RERUN_DISABLE
+ control |= PSYCHO_STRBUF_CTRL_RRDIS;
+#endif
+#endif
+ upa_writeq(control, pbm->stc.strbuf_control);
+
+ pbm->stc.strbuf_enabled = 1;
+}
+
+#define PSYCHO_IOSPACE_A 0x002000000UL
+#define PSYCHO_IOSPACE_B 0x002010000UL
+#define PSYCHO_IOSPACE_SIZE 0x00000ffffUL
+#define PSYCHO_MEMSPACE_A 0x100000000UL
+#define PSYCHO_MEMSPACE_B 0x180000000UL
+#define PSYCHO_MEMSPACE_SIZE 0x07fffffffUL
+
+static void __init psycho_pbm_init(struct pci_pbm_info *pbm,
+ struct of_device *op, int is_pbm_a)
+{
+ psycho_pbm_init_common(pbm, op, "PSYCHO", PBM_CHIP_TYPE_PSYCHO);
+ psycho_pbm_strbuf_init(pbm, is_pbm_a);
+ psycho_scan_bus(pbm, &op->dev);
+}
+
+static struct pci_pbm_info * __devinit psycho_find_sibling(u32 upa_portid)
+{
+ struct pci_pbm_info *pbm;
+
+ for (pbm = pci_pbm_root; pbm; pbm = pbm->next) {
+ if (pbm->portid == upa_portid)
+ return pbm;
+ }
+ return NULL;
+}
+
+#define PSYCHO_CONFIGSPACE 0x001000000UL
+
+static int __devinit psycho_probe(struct of_device *op,
+ const struct of_device_id *match)
+{
+ const struct linux_prom64_registers *pr_regs;
+ struct device_node *dp = op->node;
+ struct pci_pbm_info *pbm;
+ struct iommu *iommu;
+ int is_pbm_a, err;
+ u32 upa_portid;
+
+ upa_portid = of_getintprop_default(dp, "upa-portid", 0xff);
+
+ err = -ENOMEM;
+ pbm = kzalloc(sizeof(*pbm), GFP_KERNEL);
+ if (!pbm) {
+ printk(KERN_ERR PFX "Cannot allocate pci_pbm_info.\n");
+ goto out_err;
+ }
+
+ pbm->sibling = psycho_find_sibling(upa_portid);
+ if (pbm->sibling) {
+ iommu = pbm->sibling->iommu;
+ } else {
+ iommu = kzalloc(sizeof(struct iommu), GFP_KERNEL);
+ if (!iommu) {
+ printk(KERN_ERR PFX "Cannot allocate PBM iommu.\n");
+ goto out_free_controller;
+ }
+ }
+
+ pbm->iommu = iommu;
+ pbm->portid = upa_portid;
+
+ pr_regs = of_get_property(dp, "reg", NULL);
+ err = -ENODEV;
+ if (!pr_regs) {
+ printk(KERN_ERR PFX "No reg property.\n");
+ goto out_free_iommu;
+ }
+
+ is_pbm_a = ((pr_regs[0].phys_addr & 0x6000) == 0x2000);
+
+ pbm->controller_regs = pr_regs[2].phys_addr;
+ pbm->config_space = (pr_regs[2].phys_addr + PSYCHO_CONFIGSPACE);
+
+ if (is_pbm_a) {
+ pbm->pci_afsr = pbm->controller_regs + PSYCHO_PCI_AFSR_A;
+ pbm->pci_afar = pbm->controller_regs + PSYCHO_PCI_AFAR_A;
+ pbm->pci_csr = pbm->controller_regs + PSYCHO_PCIA_CTRL;
+ } else {
+ pbm->pci_afsr = pbm->controller_regs + PSYCHO_PCI_AFSR_B;
+ pbm->pci_afar = pbm->controller_regs + PSYCHO_PCI_AFAR_B;
+ pbm->pci_csr = pbm->controller_regs + PSYCHO_PCIB_CTRL;
+ }
+
+ psycho_controller_hwinit(pbm);
+ if (!pbm->sibling) {
+ err = psycho_iommu_init(pbm, 128, 0xc0000000,
+ 0xffffffff, PSYCHO_CONTROL);
+ if (err)
+ goto out_free_iommu;
+
+ /* If necessary, hook us up for starfire IRQ translations. */
+ if (this_is_starfire)
+ starfire_hookup(pbm->portid);
+ }
+
+ psycho_pbm_init(pbm, op, is_pbm_a);
+
+ pbm->next = pci_pbm_root;
+ pci_pbm_root = pbm;
+
+ if (pbm->sibling)
+ pbm->sibling->sibling = pbm;
+
+ dev_set_drvdata(&op->dev, pbm);
+
+ return 0;
+
+out_free_iommu:
+ if (!pbm->sibling)
+ kfree(pbm->iommu);
+
+out_free_controller:
+ kfree(pbm);
+
+out_err:
+ return err;
+}
+
+static struct of_device_id __initdata psycho_match[] = {
+ {
+ .name = "pci",
+ .compatible = "pci108e,8000",
+ },
+ {},
+};
+
+static struct of_platform_driver psycho_driver = {
+ .name = DRIVER_NAME,
+ .match_table = psycho_match,
+ .probe = psycho_probe,
+};
+
+static int __init psycho_init(void)
+{
+ return of_register_driver(&psycho_driver, &of_bus_type);
+}
+
+subsys_initcall(psycho_init);
diff --git a/arch/sparc/kernel/pci_sabre.c b/arch/sparc/kernel/pci_sabre.c
new file mode 100644
index 000000000000..713257b6963c
--- /dev/null
+++ b/arch/sparc/kernel/pci_sabre.c
@@ -0,0 +1,609 @@
+/* pci_sabre.c: Sabre specific PCI controller support.
+ *
+ * Copyright (C) 1997, 1998, 1999, 2007 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1998, 1999 Eddie C. Dost (ecd@skynet.be)
+ * Copyright (C) 1999 Jakub Jelinek (jakub@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/of_device.h>
+
+#include <asm/apb.h>
+#include <asm/iommu.h>
+#include <asm/irq.h>
+#include <asm/prom.h>
+#include <asm/upa.h>
+
+#include "pci_impl.h"
+#include "iommu_common.h"
+#include "psycho_common.h"
+
+#define DRIVER_NAME "sabre"
+#define PFX DRIVER_NAME ": "
+
+/* SABRE PCI controller register offsets and definitions. */
+#define SABRE_UE_AFSR 0x0030UL
+#define SABRE_UEAFSR_PDRD 0x4000000000000000UL /* Primary PCI DMA Read */
+#define SABRE_UEAFSR_PDWR 0x2000000000000000UL /* Primary PCI DMA Write */
+#define SABRE_UEAFSR_SDRD 0x0800000000000000UL /* Secondary PCI DMA Read */
+#define SABRE_UEAFSR_SDWR 0x0400000000000000UL /* Secondary PCI DMA Write */
+#define SABRE_UEAFSR_SDTE 0x0200000000000000UL /* Secondary DMA Translation Error */
+#define SABRE_UEAFSR_PDTE 0x0100000000000000UL /* Primary DMA Translation Error */
+#define SABRE_UEAFSR_BMSK 0x0000ffff00000000UL /* Bytemask */
+#define SABRE_UEAFSR_OFF 0x00000000e0000000UL /* Offset (AFAR bits [5:3] */
+#define SABRE_UEAFSR_BLK 0x0000000000800000UL /* Was block operation */
+#define SABRE_UECE_AFAR 0x0038UL
+#define SABRE_CE_AFSR 0x0040UL
+#define SABRE_CEAFSR_PDRD 0x4000000000000000UL /* Primary PCI DMA Read */
+#define SABRE_CEAFSR_PDWR 0x2000000000000000UL /* Primary PCI DMA Write */
+#define SABRE_CEAFSR_SDRD 0x0800000000000000UL /* Secondary PCI DMA Read */
+#define SABRE_CEAFSR_SDWR 0x0400000000000000UL /* Secondary PCI DMA Write */
+#define SABRE_CEAFSR_ESYND 0x00ff000000000000UL /* ECC Syndrome */
+#define SABRE_CEAFSR_BMSK 0x0000ffff00000000UL /* Bytemask */
+#define SABRE_CEAFSR_OFF 0x00000000e0000000UL /* Offset */
+#define SABRE_CEAFSR_BLK 0x0000000000800000UL /* Was block operation */
+#define SABRE_UECE_AFAR_ALIAS 0x0048UL /* Aliases to 0x0038 */
+#define SABRE_IOMMU_CONTROL 0x0200UL
+#define SABRE_IOMMUCTRL_ERRSTS 0x0000000006000000UL /* Error status bits */
+#define SABRE_IOMMUCTRL_ERR 0x0000000001000000UL /* Error present in IOTLB */
+#define SABRE_IOMMUCTRL_LCKEN 0x0000000000800000UL /* IOTLB lock enable */
+#define SABRE_IOMMUCTRL_LCKPTR 0x0000000000780000UL /* IOTLB lock pointer */
+#define SABRE_IOMMUCTRL_TSBSZ 0x0000000000070000UL /* TSB Size */
+#define SABRE_IOMMU_TSBSZ_1K 0x0000000000000000
+#define SABRE_IOMMU_TSBSZ_2K 0x0000000000010000
+#define SABRE_IOMMU_TSBSZ_4K 0x0000000000020000
+#define SABRE_IOMMU_TSBSZ_8K 0x0000000000030000
+#define SABRE_IOMMU_TSBSZ_16K 0x0000000000040000
+#define SABRE_IOMMU_TSBSZ_32K 0x0000000000050000
+#define SABRE_IOMMU_TSBSZ_64K 0x0000000000060000
+#define SABRE_IOMMU_TSBSZ_128K 0x0000000000070000
+#define SABRE_IOMMUCTRL_TBWSZ 0x0000000000000004UL /* TSB assumed page size */
+#define SABRE_IOMMUCTRL_DENAB 0x0000000000000002UL /* Diagnostic Mode Enable */
+#define SABRE_IOMMUCTRL_ENAB 0x0000000000000001UL /* IOMMU Enable */
+#define SABRE_IOMMU_TSBBASE 0x0208UL
+#define SABRE_IOMMU_FLUSH 0x0210UL
+#define SABRE_IMAP_A_SLOT0 0x0c00UL
+#define SABRE_IMAP_B_SLOT0 0x0c20UL
+#define SABRE_IMAP_SCSI 0x1000UL
+#define SABRE_IMAP_ETH 0x1008UL
+#define SABRE_IMAP_BPP 0x1010UL
+#define SABRE_IMAP_AU_REC 0x1018UL
+#define SABRE_IMAP_AU_PLAY 0x1020UL
+#define SABRE_IMAP_PFAIL 0x1028UL
+#define SABRE_IMAP_KMS 0x1030UL
+#define SABRE_IMAP_FLPY 0x1038UL
+#define SABRE_IMAP_SHW 0x1040UL
+#define SABRE_IMAP_KBD 0x1048UL
+#define SABRE_IMAP_MS 0x1050UL
+#define SABRE_IMAP_SER 0x1058UL
+#define SABRE_IMAP_UE 0x1070UL
+#define SABRE_IMAP_CE 0x1078UL
+#define SABRE_IMAP_PCIERR 0x1080UL
+#define SABRE_IMAP_GFX 0x1098UL
+#define SABRE_IMAP_EUPA 0x10a0UL
+#define SABRE_ICLR_A_SLOT0 0x1400UL
+#define SABRE_ICLR_B_SLOT0 0x1480UL
+#define SABRE_ICLR_SCSI 0x1800UL
+#define SABRE_ICLR_ETH 0x1808UL
+#define SABRE_ICLR_BPP 0x1810UL
+#define SABRE_ICLR_AU_REC 0x1818UL
+#define SABRE_ICLR_AU_PLAY 0x1820UL
+#define SABRE_ICLR_PFAIL 0x1828UL
+#define SABRE_ICLR_KMS 0x1830UL
+#define SABRE_ICLR_FLPY 0x1838UL
+#define SABRE_ICLR_SHW 0x1840UL
+#define SABRE_ICLR_KBD 0x1848UL
+#define SABRE_ICLR_MS 0x1850UL
+#define SABRE_ICLR_SER 0x1858UL
+#define SABRE_ICLR_UE 0x1870UL
+#define SABRE_ICLR_CE 0x1878UL
+#define SABRE_ICLR_PCIERR 0x1880UL
+#define SABRE_WRSYNC 0x1c20UL
+#define SABRE_PCICTRL 0x2000UL
+#define SABRE_PCICTRL_MRLEN 0x0000001000000000UL /* Use MemoryReadLine for block loads/stores */
+#define SABRE_PCICTRL_SERR 0x0000000400000000UL /* Set when SERR asserted on PCI bus */
+#define SABRE_PCICTRL_ARBPARK 0x0000000000200000UL /* Bus Parking 0=Ultra-IIi 1=prev-bus-owner */
+#define SABRE_PCICTRL_CPUPRIO 0x0000000000100000UL /* Ultra-IIi granted every other bus cycle */
+#define SABRE_PCICTRL_ARBPRIO 0x00000000000f0000UL /* Slot which is granted every other bus cycle */
+#define SABRE_PCICTRL_ERREN 0x0000000000000100UL /* PCI Error Interrupt Enable */
+#define SABRE_PCICTRL_RTRYWE 0x0000000000000080UL /* DMA Flow Control 0=wait-if-possible 1=retry */
+#define SABRE_PCICTRL_AEN 0x000000000000000fUL /* Slot PCI arbitration enables */
+#define SABRE_PIOAFSR 0x2010UL
+#define SABRE_PIOAFSR_PMA 0x8000000000000000UL /* Primary Master Abort */
+#define SABRE_PIOAFSR_PTA 0x4000000000000000UL /* Primary Target Abort */
+#define SABRE_PIOAFSR_PRTRY 0x2000000000000000UL /* Primary Excessive Retries */
+#define SABRE_PIOAFSR_PPERR 0x1000000000000000UL /* Primary Parity Error */
+#define SABRE_PIOAFSR_SMA 0x0800000000000000UL /* Secondary Master Abort */
+#define SABRE_PIOAFSR_STA 0x0400000000000000UL /* Secondary Target Abort */
+#define SABRE_PIOAFSR_SRTRY 0x0200000000000000UL /* Secondary Excessive Retries */
+#define SABRE_PIOAFSR_SPERR 0x0100000000000000UL /* Secondary Parity Error */
+#define SABRE_PIOAFSR_BMSK 0x0000ffff00000000UL /* Byte Mask */
+#define SABRE_PIOAFSR_BLK 0x0000000080000000UL /* Was Block Operation */
+#define SABRE_PIOAFAR 0x2018UL
+#define SABRE_PCIDIAG 0x2020UL
+#define SABRE_PCIDIAG_DRTRY 0x0000000000000040UL /* Disable PIO Retry Limit */
+#define SABRE_PCIDIAG_IPAPAR 0x0000000000000008UL /* Invert PIO Address Parity */
+#define SABRE_PCIDIAG_IPDPAR 0x0000000000000004UL /* Invert PIO Data Parity */
+#define SABRE_PCIDIAG_IDDPAR 0x0000000000000002UL /* Invert DMA Data Parity */
+#define SABRE_PCIDIAG_ELPBK 0x0000000000000001UL /* Loopback Enable - not supported */
+#define SABRE_PCITASR 0x2028UL
+#define SABRE_PCITASR_EF 0x0000000000000080UL /* Respond to 0xe0000000-0xffffffff */
+#define SABRE_PCITASR_CD 0x0000000000000040UL /* Respond to 0xc0000000-0xdfffffff */
+#define SABRE_PCITASR_AB 0x0000000000000020UL /* Respond to 0xa0000000-0xbfffffff */
+#define SABRE_PCITASR_89 0x0000000000000010UL /* Respond to 0x80000000-0x9fffffff */
+#define SABRE_PCITASR_67 0x0000000000000008UL /* Respond to 0x60000000-0x7fffffff */
+#define SABRE_PCITASR_45 0x0000000000000004UL /* Respond to 0x40000000-0x5fffffff */
+#define SABRE_PCITASR_23 0x0000000000000002UL /* Respond to 0x20000000-0x3fffffff */
+#define SABRE_PCITASR_01 0x0000000000000001UL /* Respond to 0x00000000-0x1fffffff */
+#define SABRE_PIOBUF_DIAG 0x5000UL
+#define SABRE_DMABUF_DIAGLO 0x5100UL
+#define SABRE_DMABUF_DIAGHI 0x51c0UL
+#define SABRE_IMAP_GFX_ALIAS 0x6000UL /* Aliases to 0x1098 */
+#define SABRE_IMAP_EUPA_ALIAS 0x8000UL /* Aliases to 0x10a0 */
+#define SABRE_IOMMU_VADIAG 0xa400UL
+#define SABRE_IOMMU_TCDIAG 0xa408UL
+#define SABRE_IOMMU_TAG 0xa580UL
+#define SABRE_IOMMUTAG_ERRSTS 0x0000000001800000UL /* Error status bits */
+#define SABRE_IOMMUTAG_ERR 0x0000000000400000UL /* Error present */
+#define SABRE_IOMMUTAG_WRITE 0x0000000000200000UL /* Page is writable */
+#define SABRE_IOMMUTAG_STREAM 0x0000000000100000UL /* Streamable bit - unused */
+#define SABRE_IOMMUTAG_SIZE 0x0000000000080000UL /* 0=8k 1=16k */
+#define SABRE_IOMMUTAG_VPN 0x000000000007ffffUL /* Virtual Page Number [31:13] */
+#define SABRE_IOMMU_DATA 0xa600UL
+#define SABRE_IOMMUDATA_VALID 0x0000000040000000UL /* Valid */
+#define SABRE_IOMMUDATA_USED 0x0000000020000000UL /* Used (for LRU algorithm) */
+#define SABRE_IOMMUDATA_CACHE 0x0000000010000000UL /* Cacheable */
+#define SABRE_IOMMUDATA_PPN 0x00000000001fffffUL /* Physical Page Number [33:13] */
+#define SABRE_PCI_IRQSTATE 0xa800UL
+#define SABRE_OBIO_IRQSTATE 0xa808UL
+#define SABRE_FFBCFG 0xf000UL
+#define SABRE_FFBCFG_SPRQS 0x000000000f000000 /* Slave P_RQST queue size */
+#define SABRE_FFBCFG_ONEREAD 0x0000000000004000 /* Slave supports one outstanding read */
+#define SABRE_MCCTRL0 0xf010UL
+#define SABRE_MCCTRL0_RENAB 0x0000000080000000 /* Refresh Enable */
+#define SABRE_MCCTRL0_EENAB 0x0000000010000000 /* Enable all ECC functions */
+#define SABRE_MCCTRL0_11BIT 0x0000000000001000 /* Enable 11-bit column addressing */
+#define SABRE_MCCTRL0_DPP 0x0000000000000f00 /* DIMM Pair Present Bits */
+#define SABRE_MCCTRL0_RINTVL 0x00000000000000ff /* Refresh Interval */
+#define SABRE_MCCTRL1 0xf018UL
+#define SABRE_MCCTRL1_AMDC 0x0000000038000000 /* Advance Memdata Clock */
+#define SABRE_MCCTRL1_ARDC 0x0000000007000000 /* Advance DRAM Read Data Clock */
+#define SABRE_MCCTRL1_CSR 0x0000000000e00000 /* CAS to RAS delay for CBR refresh */
+#define SABRE_MCCTRL1_CASRW 0x00000000001c0000 /* CAS length for read/write */
+#define SABRE_MCCTRL1_RCD 0x0000000000038000 /* RAS to CAS delay */
+#define SABRE_MCCTRL1_CP 0x0000000000007000 /* CAS Precharge */
+#define SABRE_MCCTRL1_RP 0x0000000000000e00 /* RAS Precharge */
+#define SABRE_MCCTRL1_RAS 0x00000000000001c0 /* Length of RAS for refresh */
+#define SABRE_MCCTRL1_CASRW2 0x0000000000000038 /* Must be same as CASRW */
+#define SABRE_MCCTRL1_RSC 0x0000000000000007 /* RAS after CAS hold time */
+#define SABRE_RESETCTRL 0xf020UL
+
+#define SABRE_CONFIGSPACE 0x001000000UL
+#define SABRE_IOSPACE 0x002000000UL
+#define SABRE_IOSPACE_SIZE 0x000ffffffUL
+#define SABRE_MEMSPACE 0x100000000UL
+#define SABRE_MEMSPACE_SIZE 0x07fffffffUL
+
+static int hummingbird_p;
+static struct pci_bus *sabre_root_bus;
+
+static irqreturn_t sabre_ue_intr(int irq, void *dev_id)
+{
+ struct pci_pbm_info *pbm = dev_id;
+ unsigned long afsr_reg = pbm->controller_regs + SABRE_UE_AFSR;
+ unsigned long afar_reg = pbm->controller_regs + SABRE_UECE_AFAR;
+ unsigned long afsr, afar, error_bits;
+ int reported;
+
+ /* Latch uncorrectable error status. */
+ afar = upa_readq(afar_reg);
+ afsr = upa_readq(afsr_reg);
+
+ /* Clear the primary/secondary error status bits. */
+ error_bits = afsr &
+ (SABRE_UEAFSR_PDRD | SABRE_UEAFSR_PDWR |
+ SABRE_UEAFSR_SDRD | SABRE_UEAFSR_SDWR |
+ SABRE_UEAFSR_SDTE | SABRE_UEAFSR_PDTE);
+ if (!error_bits)
+ return IRQ_NONE;
+ upa_writeq(error_bits, afsr_reg);
+
+ /* Log the error. */
+ printk("%s: Uncorrectable Error, primary error type[%s%s]\n",
+ pbm->name,
+ ((error_bits & SABRE_UEAFSR_PDRD) ?
+ "DMA Read" :
+ ((error_bits & SABRE_UEAFSR_PDWR) ?
+ "DMA Write" : "???")),
+ ((error_bits & SABRE_UEAFSR_PDTE) ?
+ ":Translation Error" : ""));
+ printk("%s: bytemask[%04lx] dword_offset[%lx] was_block(%d)\n",
+ pbm->name,
+ (afsr & SABRE_UEAFSR_BMSK) >> 32UL,
+ (afsr & SABRE_UEAFSR_OFF) >> 29UL,
+ ((afsr & SABRE_UEAFSR_BLK) ? 1 : 0));
+ printk("%s: UE AFAR [%016lx]\n", pbm->name, afar);
+ printk("%s: UE Secondary errors [", pbm->name);
+ reported = 0;
+ if (afsr & SABRE_UEAFSR_SDRD) {
+ reported++;
+ printk("(DMA Read)");
+ }
+ if (afsr & SABRE_UEAFSR_SDWR) {
+ reported++;
+ printk("(DMA Write)");
+ }
+ if (afsr & SABRE_UEAFSR_SDTE) {
+ reported++;
+ printk("(Translation Error)");
+ }
+ if (!reported)
+ printk("(none)");
+ printk("]\n");
+
+ /* Interrogate IOMMU for error status. */
+ psycho_check_iommu_error(pbm, afsr, afar, UE_ERR);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t sabre_ce_intr(int irq, void *dev_id)
+{
+ struct pci_pbm_info *pbm = dev_id;
+ unsigned long afsr_reg = pbm->controller_regs + SABRE_CE_AFSR;
+ unsigned long afar_reg = pbm->controller_regs + SABRE_UECE_AFAR;
+ unsigned long afsr, afar, error_bits;
+ int reported;
+
+ /* Latch error status. */
+ afar = upa_readq(afar_reg);
+ afsr = upa_readq(afsr_reg);
+
+ /* Clear primary/secondary error status bits. */
+ error_bits = afsr &
+ (SABRE_CEAFSR_PDRD | SABRE_CEAFSR_PDWR |
+ SABRE_CEAFSR_SDRD | SABRE_CEAFSR_SDWR);
+ if (!error_bits)
+ return IRQ_NONE;
+ upa_writeq(error_bits, afsr_reg);
+
+ /* Log the error. */
+ printk("%s: Correctable Error, primary error type[%s]\n",
+ pbm->name,
+ ((error_bits & SABRE_CEAFSR_PDRD) ?
+ "DMA Read" :
+ ((error_bits & SABRE_CEAFSR_PDWR) ?
+ "DMA Write" : "???")));
+
+ /* XXX Use syndrome and afar to print out module string just like
+ * XXX UDB CE trap handler does... -DaveM
+ */
+ printk("%s: syndrome[%02lx] bytemask[%04lx] dword_offset[%lx] "
+ "was_block(%d)\n",
+ pbm->name,
+ (afsr & SABRE_CEAFSR_ESYND) >> 48UL,
+ (afsr & SABRE_CEAFSR_BMSK) >> 32UL,
+ (afsr & SABRE_CEAFSR_OFF) >> 29UL,
+ ((afsr & SABRE_CEAFSR_BLK) ? 1 : 0));
+ printk("%s: CE AFAR [%016lx]\n", pbm->name, afar);
+ printk("%s: CE Secondary errors [", pbm->name);
+ reported = 0;
+ if (afsr & SABRE_CEAFSR_SDRD) {
+ reported++;
+ printk("(DMA Read)");
+ }
+ if (afsr & SABRE_CEAFSR_SDWR) {
+ reported++;
+ printk("(DMA Write)");
+ }
+ if (!reported)
+ printk("(none)");
+ printk("]\n");
+
+ return IRQ_HANDLED;
+}
+
+static void sabre_register_error_handlers(struct pci_pbm_info *pbm)
+{
+ struct device_node *dp = pbm->op->node;
+ struct of_device *op;
+ unsigned long base = pbm->controller_regs;
+ u64 tmp;
+ int err;
+
+ if (pbm->chip_type == PBM_CHIP_TYPE_SABRE)
+ dp = dp->parent;
+
+ op = of_find_device_by_node(dp);
+ if (!op)
+ return;
+
+ /* Sabre/Hummingbird IRQ property layout is:
+ * 0: PCI ERR
+ * 1: UE ERR
+ * 2: CE ERR
+ * 3: POWER FAIL
+ */
+ if (op->num_irqs < 4)
+ return;
+
+ /* We clear the error bits in the appropriate AFSR before
+ * registering the handler so that we don't get spurious
+ * interrupts.
+ */
+ upa_writeq((SABRE_UEAFSR_PDRD | SABRE_UEAFSR_PDWR |
+ SABRE_UEAFSR_SDRD | SABRE_UEAFSR_SDWR |
+ SABRE_UEAFSR_SDTE | SABRE_UEAFSR_PDTE),
+ base + SABRE_UE_AFSR);
+
+ err = request_irq(op->irqs[1], sabre_ue_intr, 0, "SABRE_UE", pbm);
+ if (err)
+ printk(KERN_WARNING "%s: Couldn't register UE, err=%d.\n",
+ pbm->name, err);
+
+ upa_writeq((SABRE_CEAFSR_PDRD | SABRE_CEAFSR_PDWR |
+ SABRE_CEAFSR_SDRD | SABRE_CEAFSR_SDWR),
+ base + SABRE_CE_AFSR);
+
+
+ err = request_irq(op->irqs[2], sabre_ce_intr, 0, "SABRE_CE", pbm);
+ if (err)
+ printk(KERN_WARNING "%s: Couldn't register CE, err=%d.\n",
+ pbm->name, err);
+ err = request_irq(op->irqs[0], psycho_pcierr_intr, 0,
+ "SABRE_PCIERR", pbm);
+ if (err)
+ printk(KERN_WARNING "%s: Couldn't register PCIERR, err=%d.\n",
+ pbm->name, err);
+
+ tmp = upa_readq(base + SABRE_PCICTRL);
+ tmp |= SABRE_PCICTRL_ERREN;
+ upa_writeq(tmp, base + SABRE_PCICTRL);
+}
+
+static void apb_init(struct pci_bus *sabre_bus)
+{
+ struct pci_dev *pdev;
+
+ list_for_each_entry(pdev, &sabre_bus->devices, bus_list) {
+ if (pdev->vendor == PCI_VENDOR_ID_SUN &&
+ pdev->device == PCI_DEVICE_ID_SUN_SIMBA) {
+ u16 word16;
+
+ pci_read_config_word(pdev, PCI_COMMAND, &word16);
+ word16 |= PCI_COMMAND_SERR | PCI_COMMAND_PARITY |
+ PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY |
+ PCI_COMMAND_IO;
+ pci_write_config_word(pdev, PCI_COMMAND, word16);
+
+ /* Status register bits are "write 1 to clear". */
+ pci_write_config_word(pdev, PCI_STATUS, 0xffff);
+ pci_write_config_word(pdev, PCI_SEC_STATUS, 0xffff);
+
+ /* Use a primary/seconday latency timer value
+ * of 64.
+ */
+ pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 64);
+ pci_write_config_byte(pdev, PCI_SEC_LATENCY_TIMER, 64);
+
+ /* Enable reporting/forwarding of master aborts,
+ * parity, and SERR.
+ */
+ pci_write_config_byte(pdev, PCI_BRIDGE_CONTROL,
+ (PCI_BRIDGE_CTL_PARITY |
+ PCI_BRIDGE_CTL_SERR |
+ PCI_BRIDGE_CTL_MASTER_ABORT));
+ }
+ }
+}
+
+static void __init sabre_scan_bus(struct pci_pbm_info *pbm,
+ struct device *parent)
+{
+ static int once;
+
+ /* The APB bridge speaks to the Sabre host PCI bridge
+ * at 66Mhz, but the front side of APB runs at 33Mhz
+ * for both segments.
+ *
+ * Hummingbird systems do not use APB, so they run
+ * at 66MHZ.
+ */
+ if (hummingbird_p)
+ pbm->is_66mhz_capable = 1;
+ else
+ pbm->is_66mhz_capable = 0;
+
+ /* This driver has not been verified to handle
+ * multiple SABREs yet, so trap this.
+ *
+ * Also note that the SABRE host bridge is hardwired
+ * to live at bus 0.
+ */
+ if (once != 0) {
+ printk(KERN_ERR PFX "Multiple controllers unsupported.\n");
+ return;
+ }
+ once++;
+
+ pbm->pci_bus = pci_scan_one_pbm(pbm, parent);
+ if (!pbm->pci_bus)
+ return;
+
+ sabre_root_bus = pbm->pci_bus;
+
+ apb_init(pbm->pci_bus);
+
+ sabre_register_error_handlers(pbm);
+}
+
+static void __init sabre_pbm_init(struct pci_pbm_info *pbm,
+ struct of_device *op)
+{
+ psycho_pbm_init_common(pbm, op, "SABRE", PBM_CHIP_TYPE_SABRE);
+ pbm->pci_afsr = pbm->controller_regs + SABRE_PIOAFSR;
+ pbm->pci_afar = pbm->controller_regs + SABRE_PIOAFAR;
+ pbm->pci_csr = pbm->controller_regs + SABRE_PCICTRL;
+ sabre_scan_bus(pbm, &op->dev);
+}
+
+static int __devinit sabre_probe(struct of_device *op,
+ const struct of_device_id *match)
+{
+ const struct linux_prom64_registers *pr_regs;
+ struct device_node *dp = op->node;
+ struct pci_pbm_info *pbm;
+ u32 upa_portid, dma_mask;
+ struct iommu *iommu;
+ int tsbsize, err;
+ const u32 *vdma;
+ u64 clear_irq;
+
+ hummingbird_p = (match->data != NULL);
+ if (!hummingbird_p) {
+ struct device_node *cpu_dp;
+
+ /* Of course, Sun has to encode things a thousand
+ * different ways, inconsistently.
+ */
+ for_each_node_by_type(cpu_dp, "cpu") {
+ if (!strcmp(cpu_dp->name, "SUNW,UltraSPARC-IIe"))
+ hummingbird_p = 1;
+ }
+ }
+
+ err = -ENOMEM;
+ pbm = kzalloc(sizeof(*pbm), GFP_KERNEL);
+ if (!pbm) {
+ printk(KERN_ERR PFX "Cannot allocate pci_pbm_info.\n");
+ goto out_err;
+ }
+
+ iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
+ if (!iommu) {
+ printk(KERN_ERR PFX "Cannot allocate PBM iommu.\n");
+ goto out_free_controller;
+ }
+
+ pbm->iommu = iommu;
+
+ upa_portid = of_getintprop_default(dp, "upa-portid", 0xff);
+
+ pbm->portid = upa_portid;
+
+ /*
+ * Map in SABRE register set and report the presence of this SABRE.
+ */
+
+ pr_regs = of_get_property(dp, "reg", NULL);
+ err = -ENODEV;
+ if (!pr_regs) {
+ printk(KERN_ERR PFX "No reg property\n");
+ goto out_free_iommu;
+ }
+
+ /*
+ * First REG in property is base of entire SABRE register space.
+ */
+ pbm->controller_regs = pr_regs[0].phys_addr;
+
+ /* Clear interrupts */
+
+ /* PCI first */
+ for (clear_irq = SABRE_ICLR_A_SLOT0; clear_irq < SABRE_ICLR_B_SLOT0 + 0x80; clear_irq += 8)
+ upa_writeq(0x0UL, pbm->controller_regs + clear_irq);
+
+ /* Then OBIO */
+ for (clear_irq = SABRE_ICLR_SCSI; clear_irq < SABRE_ICLR_SCSI + 0x80; clear_irq += 8)
+ upa_writeq(0x0UL, pbm->controller_regs + clear_irq);
+
+ /* Error interrupts are enabled later after the bus scan. */
+ upa_writeq((SABRE_PCICTRL_MRLEN | SABRE_PCICTRL_SERR |
+ SABRE_PCICTRL_ARBPARK | SABRE_PCICTRL_AEN),
+ pbm->controller_regs + SABRE_PCICTRL);
+
+ /* Now map in PCI config space for entire SABRE. */
+ pbm->config_space = pbm->controller_regs + SABRE_CONFIGSPACE;
+
+ vdma = of_get_property(dp, "virtual-dma", NULL);
+ if (!vdma) {
+ printk(KERN_ERR PFX "No virtual-dma property\n");
+ goto out_free_iommu;
+ }
+
+ dma_mask = vdma[0];
+ switch(vdma[1]) {
+ case 0x20000000:
+ dma_mask |= 0x1fffffff;
+ tsbsize = 64;
+ break;
+ case 0x40000000:
+ dma_mask |= 0x3fffffff;
+ tsbsize = 128;
+ break;
+
+ case 0x80000000:
+ dma_mask |= 0x7fffffff;
+ tsbsize = 128;
+ break;
+ default:
+ printk(KERN_ERR PFX "Strange virtual-dma size.\n");
+ goto out_free_iommu;
+ }
+
+ err = psycho_iommu_init(pbm, tsbsize, vdma[0], dma_mask, SABRE_WRSYNC);
+ if (err)
+ goto out_free_iommu;
+
+ /*
+ * Look for APB underneath.
+ */
+ sabre_pbm_init(pbm, op);
+
+ pbm->next = pci_pbm_root;
+ pci_pbm_root = pbm;
+
+ dev_set_drvdata(&op->dev, pbm);
+
+ return 0;
+
+out_free_iommu:
+ kfree(pbm->iommu);
+
+out_free_controller:
+ kfree(pbm);
+
+out_err:
+ return err;
+}
+
+static struct of_device_id __initdata sabre_match[] = {
+ {
+ .name = "pci",
+ .compatible = "pci108e,a001",
+ .data = (void *) 1,
+ },
+ {
+ .name = "pci",
+ .compatible = "pci108e,a000",
+ },
+ {},
+};
+
+static struct of_platform_driver sabre_driver = {
+ .name = DRIVER_NAME,
+ .match_table = sabre_match,
+ .probe = sabre_probe,
+};
+
+static int __init sabre_init(void)
+{
+ return of_register_driver(&sabre_driver, &of_bus_type);
+}
+
+subsys_initcall(sabre_init);
diff --git a/arch/sparc/kernel/pci_schizo.c b/arch/sparc/kernel/pci_schizo.c
new file mode 100644
index 000000000000..45d9dba1ba11
--- /dev/null
+++ b/arch/sparc/kernel/pci_schizo.c
@@ -0,0 +1,1504 @@
+/* pci_schizo.c: SCHIZO/TOMATILLO specific PCI controller support.
+ *
+ * Copyright (C) 2001, 2002, 2003, 2007, 2008 David S. Miller (davem@davemloft.net)
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/of_device.h>
+
+#include <asm/iommu.h>
+#include <asm/irq.h>
+#include <asm/pstate.h>
+#include <asm/prom.h>
+#include <asm/upa.h>
+
+#include "pci_impl.h"
+#include "iommu_common.h"
+
+#define DRIVER_NAME "schizo"
+#define PFX DRIVER_NAME ": "
+
+/* This is a convention that at least Excalibur and Merlin
+ * follow. I suppose the SCHIZO used in Starcat and friends
+ * will do similar.
+ *
+ * The only way I could see this changing is if the newlink
+ * block requires more space in Schizo's address space than
+ * they predicted, thus requiring an address space reorg when
+ * the newer Schizo is taped out.
+ */
+
+/* Streaming buffer control register. */
+#define SCHIZO_STRBUF_CTRL_LPTR 0x00000000000000f0UL /* LRU Lock Pointer */
+#define SCHIZO_STRBUF_CTRL_LENAB 0x0000000000000008UL /* LRU Lock Enable */
+#define SCHIZO_STRBUF_CTRL_RRDIS 0x0000000000000004UL /* Rerun Disable */
+#define SCHIZO_STRBUF_CTRL_DENAB 0x0000000000000002UL /* Diagnostic Mode Enable */
+#define SCHIZO_STRBUF_CTRL_ENAB 0x0000000000000001UL /* Streaming Buffer Enable */
+
+/* IOMMU control register. */
+#define SCHIZO_IOMMU_CTRL_RESV 0xfffffffff9000000UL /* Reserved */
+#define SCHIZO_IOMMU_CTRL_XLTESTAT 0x0000000006000000UL /* Translation Error Status */
+#define SCHIZO_IOMMU_CTRL_XLTEERR 0x0000000001000000UL /* Translation Error encountered */
+#define SCHIZO_IOMMU_CTRL_LCKEN 0x0000000000800000UL /* Enable translation locking */
+#define SCHIZO_IOMMU_CTRL_LCKPTR 0x0000000000780000UL /* Translation lock pointer */
+#define SCHIZO_IOMMU_CTRL_TSBSZ 0x0000000000070000UL /* TSB Size */
+#define SCHIZO_IOMMU_TSBSZ_1K 0x0000000000000000UL /* TSB Table 1024 8-byte entries */
+#define SCHIZO_IOMMU_TSBSZ_2K 0x0000000000010000UL /* TSB Table 2048 8-byte entries */
+#define SCHIZO_IOMMU_TSBSZ_4K 0x0000000000020000UL /* TSB Table 4096 8-byte entries */
+#define SCHIZO_IOMMU_TSBSZ_8K 0x0000000000030000UL /* TSB Table 8192 8-byte entries */
+#define SCHIZO_IOMMU_TSBSZ_16K 0x0000000000040000UL /* TSB Table 16k 8-byte entries */
+#define SCHIZO_IOMMU_TSBSZ_32K 0x0000000000050000UL /* TSB Table 32k 8-byte entries */
+#define SCHIZO_IOMMU_TSBSZ_64K 0x0000000000060000UL /* TSB Table 64k 8-byte entries */
+#define SCHIZO_IOMMU_TSBSZ_128K 0x0000000000070000UL /* TSB Table 128k 8-byte entries */
+#define SCHIZO_IOMMU_CTRL_RESV2 0x000000000000fff8UL /* Reserved */
+#define SCHIZO_IOMMU_CTRL_TBWSZ 0x0000000000000004UL /* Assumed page size, 0=8k 1=64k */
+#define SCHIZO_IOMMU_CTRL_DENAB 0x0000000000000002UL /* Diagnostic mode enable */
+#define SCHIZO_IOMMU_CTRL_ENAB 0x0000000000000001UL /* IOMMU Enable */
+
+/* Schizo config space address format is nearly identical to
+ * that of PSYCHO:
+ *
+ * 32 24 23 16 15 11 10 8 7 2 1 0
+ * ---------------------------------------------------------
+ * |0 0 0 0 0 0 0 0 0| bus | device | function | reg | 0 0 |
+ * ---------------------------------------------------------
+ */
+#define SCHIZO_CONFIG_BASE(PBM) ((PBM)->config_space)
+#define SCHIZO_CONFIG_ENCODE(BUS, DEVFN, REG) \
+ (((unsigned long)(BUS) << 16) | \
+ ((unsigned long)(DEVFN) << 8) | \
+ ((unsigned long)(REG)))
+
+static void *schizo_pci_config_mkaddr(struct pci_pbm_info *pbm,
+ unsigned char bus,
+ unsigned int devfn,
+ int where)
+{
+ if (!pbm)
+ return NULL;
+ bus -= pbm->pci_first_busno;
+ return (void *)
+ (SCHIZO_CONFIG_BASE(pbm) |
+ SCHIZO_CONFIG_ENCODE(bus, devfn, where));
+}
+
+/* SCHIZO error handling support. */
+enum schizo_error_type {
+ UE_ERR, CE_ERR, PCI_ERR, SAFARI_ERR
+};
+
+static DEFINE_SPINLOCK(stc_buf_lock);
+static unsigned long stc_error_buf[128];
+static unsigned long stc_tag_buf[16];
+static unsigned long stc_line_buf[16];
+
+#define SCHIZO_UE_INO 0x30 /* Uncorrectable ECC error */
+#define SCHIZO_CE_INO 0x31 /* Correctable ECC error */
+#define SCHIZO_PCIERR_A_INO 0x32 /* PBM A PCI bus error */
+#define SCHIZO_PCIERR_B_INO 0x33 /* PBM B PCI bus error */
+#define SCHIZO_SERR_INO 0x34 /* Safari interface error */
+
+#define SCHIZO_STC_ERR 0xb800UL /* --> 0xba00 */
+#define SCHIZO_STC_TAG 0xba00UL /* --> 0xba80 */
+#define SCHIZO_STC_LINE 0xbb00UL /* --> 0xbb80 */
+
+#define SCHIZO_STCERR_WRITE 0x2UL
+#define SCHIZO_STCERR_READ 0x1UL
+
+#define SCHIZO_STCTAG_PPN 0x3fffffff00000000UL
+#define SCHIZO_STCTAG_VPN 0x00000000ffffe000UL
+#define SCHIZO_STCTAG_VALID 0x8000000000000000UL
+#define SCHIZO_STCTAG_READ 0x4000000000000000UL
+
+#define SCHIZO_STCLINE_LINDX 0x0000000007800000UL
+#define SCHIZO_STCLINE_SPTR 0x000000000007e000UL
+#define SCHIZO_STCLINE_LADDR 0x0000000000001fc0UL
+#define SCHIZO_STCLINE_EPTR 0x000000000000003fUL
+#define SCHIZO_STCLINE_VALID 0x0000000000600000UL
+#define SCHIZO_STCLINE_FOFN 0x0000000000180000UL
+
+static void __schizo_check_stc_error_pbm(struct pci_pbm_info *pbm,
+ enum schizo_error_type type)
+{
+ struct strbuf *strbuf = &pbm->stc;
+ unsigned long regbase = pbm->pbm_regs;
+ unsigned long err_base, tag_base, line_base;
+ u64 control;
+ int i;
+
+ err_base = regbase + SCHIZO_STC_ERR;
+ tag_base = regbase + SCHIZO_STC_TAG;
+ line_base = regbase + SCHIZO_STC_LINE;
+
+ spin_lock(&stc_buf_lock);
+
+ /* This is __REALLY__ dangerous. When we put the
+ * streaming buffer into diagnostic mode to probe
+ * it's tags and error status, we _must_ clear all
+ * of the line tag valid bits before re-enabling
+ * the streaming buffer. If any dirty data lives
+ * in the STC when we do this, we will end up
+ * invalidating it before it has a chance to reach
+ * main memory.
+ */
+ control = upa_readq(strbuf->strbuf_control);
+ upa_writeq((control | SCHIZO_STRBUF_CTRL_DENAB),
+ strbuf->strbuf_control);
+ for (i = 0; i < 128; i++) {
+ unsigned long val;
+
+ val = upa_readq(err_base + (i * 8UL));
+ upa_writeq(0UL, err_base + (i * 8UL));
+ stc_error_buf[i] = val;
+ }
+ for (i = 0; i < 16; i++) {
+ stc_tag_buf[i] = upa_readq(tag_base + (i * 8UL));
+ stc_line_buf[i] = upa_readq(line_base + (i * 8UL));
+ upa_writeq(0UL, tag_base + (i * 8UL));
+ upa_writeq(0UL, line_base + (i * 8UL));
+ }
+
+ /* OK, state is logged, exit diagnostic mode. */
+ upa_writeq(control, strbuf->strbuf_control);
+
+ for (i = 0; i < 16; i++) {
+ int j, saw_error, first, last;
+
+ saw_error = 0;
+ first = i * 8;
+ last = first + 8;
+ for (j = first; j < last; j++) {
+ unsigned long errval = stc_error_buf[j];
+ if (errval != 0) {
+ saw_error++;
+ printk("%s: STC_ERR(%d)[wr(%d)rd(%d)]\n",
+ pbm->name,
+ j,
+ (errval & SCHIZO_STCERR_WRITE) ? 1 : 0,
+ (errval & SCHIZO_STCERR_READ) ? 1 : 0);
+ }
+ }
+ if (saw_error != 0) {
+ unsigned long tagval = stc_tag_buf[i];
+ unsigned long lineval = stc_line_buf[i];
+ printk("%s: STC_TAG(%d)[PA(%016lx)VA(%08lx)V(%d)R(%d)]\n",
+ pbm->name,
+ i,
+ ((tagval & SCHIZO_STCTAG_PPN) >> 19UL),
+ (tagval & SCHIZO_STCTAG_VPN),
+ ((tagval & SCHIZO_STCTAG_VALID) ? 1 : 0),
+ ((tagval & SCHIZO_STCTAG_READ) ? 1 : 0));
+
+ /* XXX Should spit out per-bank error information... -DaveM */
+ printk("%s: STC_LINE(%d)[LIDX(%lx)SP(%lx)LADDR(%lx)EP(%lx)"
+ "V(%d)FOFN(%d)]\n",
+ pbm->name,
+ i,
+ ((lineval & SCHIZO_STCLINE_LINDX) >> 23UL),
+ ((lineval & SCHIZO_STCLINE_SPTR) >> 13UL),
+ ((lineval & SCHIZO_STCLINE_LADDR) >> 6UL),
+ ((lineval & SCHIZO_STCLINE_EPTR) >> 0UL),
+ ((lineval & SCHIZO_STCLINE_VALID) ? 1 : 0),
+ ((lineval & SCHIZO_STCLINE_FOFN) ? 1 : 0));
+ }
+ }
+
+ spin_unlock(&stc_buf_lock);
+}
+
+/* IOMMU is per-PBM in Schizo, so interrogate both for anonymous
+ * controller level errors.
+ */
+
+#define SCHIZO_IOMMU_TAG 0xa580UL
+#define SCHIZO_IOMMU_DATA 0xa600UL
+
+#define SCHIZO_IOMMU_TAG_CTXT 0x0000001ffe000000UL
+#define SCHIZO_IOMMU_TAG_ERRSTS 0x0000000001800000UL
+#define SCHIZO_IOMMU_TAG_ERR 0x0000000000400000UL
+#define SCHIZO_IOMMU_TAG_WRITE 0x0000000000200000UL
+#define SCHIZO_IOMMU_TAG_STREAM 0x0000000000100000UL
+#define SCHIZO_IOMMU_TAG_SIZE 0x0000000000080000UL
+#define SCHIZO_IOMMU_TAG_VPAGE 0x000000000007ffffUL
+
+#define SCHIZO_IOMMU_DATA_VALID 0x0000000100000000UL
+#define SCHIZO_IOMMU_DATA_CACHE 0x0000000040000000UL
+#define SCHIZO_IOMMU_DATA_PPAGE 0x000000003fffffffUL
+
+static void schizo_check_iommu_error_pbm(struct pci_pbm_info *pbm,
+ enum schizo_error_type type)
+{
+ struct iommu *iommu = pbm->iommu;
+ unsigned long iommu_tag[16];
+ unsigned long iommu_data[16];
+ unsigned long flags;
+ u64 control;
+ int i;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+ control = upa_readq(iommu->iommu_control);
+ if (control & SCHIZO_IOMMU_CTRL_XLTEERR) {
+ unsigned long base;
+ char *type_string;
+
+ /* Clear the error encountered bit. */
+ control &= ~SCHIZO_IOMMU_CTRL_XLTEERR;
+ upa_writeq(control, iommu->iommu_control);
+
+ switch((control & SCHIZO_IOMMU_CTRL_XLTESTAT) >> 25UL) {
+ case 0:
+ type_string = "Protection Error";
+ break;
+ case 1:
+ type_string = "Invalid Error";
+ break;
+ case 2:
+ type_string = "TimeOut Error";
+ break;
+ case 3:
+ default:
+ type_string = "ECC Error";
+ break;
+ };
+ printk("%s: IOMMU Error, type[%s]\n",
+ pbm->name, type_string);
+
+ /* Put the IOMMU into diagnostic mode and probe
+ * it's TLB for entries with error status.
+ *
+ * It is very possible for another DVMA to occur
+ * while we do this probe, and corrupt the system
+ * further. But we are so screwed at this point
+ * that we are likely to crash hard anyways, so
+ * get as much diagnostic information to the
+ * console as we can.
+ */
+ upa_writeq(control | SCHIZO_IOMMU_CTRL_DENAB,
+ iommu->iommu_control);
+
+ base = pbm->pbm_regs;
+
+ for (i = 0; i < 16; i++) {
+ iommu_tag[i] =
+ upa_readq(base + SCHIZO_IOMMU_TAG + (i * 8UL));
+ iommu_data[i] =
+ upa_readq(base + SCHIZO_IOMMU_DATA + (i * 8UL));
+
+ /* Now clear out the entry. */
+ upa_writeq(0, base + SCHIZO_IOMMU_TAG + (i * 8UL));
+ upa_writeq(0, base + SCHIZO_IOMMU_DATA + (i * 8UL));
+ }
+
+ /* Leave diagnostic mode. */
+ upa_writeq(control, iommu->iommu_control);
+
+ for (i = 0; i < 16; i++) {
+ unsigned long tag, data;
+
+ tag = iommu_tag[i];
+ if (!(tag & SCHIZO_IOMMU_TAG_ERR))
+ continue;
+
+ data = iommu_data[i];
+ switch((tag & SCHIZO_IOMMU_TAG_ERRSTS) >> 23UL) {
+ case 0:
+ type_string = "Protection Error";
+ break;
+ case 1:
+ type_string = "Invalid Error";
+ break;
+ case 2:
+ type_string = "TimeOut Error";
+ break;
+ case 3:
+ default:
+ type_string = "ECC Error";
+ break;
+ };
+ printk("%s: IOMMU TAG(%d)[error(%s) ctx(%x) wr(%d) str(%d) "
+ "sz(%dK) vpg(%08lx)]\n",
+ pbm->name, i, type_string,
+ (int)((tag & SCHIZO_IOMMU_TAG_CTXT) >> 25UL),
+ ((tag & SCHIZO_IOMMU_TAG_WRITE) ? 1 : 0),
+ ((tag & SCHIZO_IOMMU_TAG_STREAM) ? 1 : 0),
+ ((tag & SCHIZO_IOMMU_TAG_SIZE) ? 64 : 8),
+ (tag & SCHIZO_IOMMU_TAG_VPAGE) << IOMMU_PAGE_SHIFT);
+ printk("%s: IOMMU DATA(%d)[valid(%d) cache(%d) ppg(%016lx)]\n",
+ pbm->name, i,
+ ((data & SCHIZO_IOMMU_DATA_VALID) ? 1 : 0),
+ ((data & SCHIZO_IOMMU_DATA_CACHE) ? 1 : 0),
+ (data & SCHIZO_IOMMU_DATA_PPAGE) << IOMMU_PAGE_SHIFT);
+ }
+ }
+ if (pbm->stc.strbuf_enabled)
+ __schizo_check_stc_error_pbm(pbm, type);
+ spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
+static void schizo_check_iommu_error(struct pci_pbm_info *pbm,
+ enum schizo_error_type type)
+{
+ schizo_check_iommu_error_pbm(pbm, type);
+ if (pbm->sibling)
+ schizo_check_iommu_error_pbm(pbm->sibling, type);
+}
+
+/* Uncorrectable ECC error status gathering. */
+#define SCHIZO_UE_AFSR 0x10030UL
+#define SCHIZO_UE_AFAR 0x10038UL
+
+#define SCHIZO_UEAFSR_PPIO 0x8000000000000000UL /* Safari */
+#define SCHIZO_UEAFSR_PDRD 0x4000000000000000UL /* Safari/Tomatillo */
+#define SCHIZO_UEAFSR_PDWR 0x2000000000000000UL /* Safari */
+#define SCHIZO_UEAFSR_SPIO 0x1000000000000000UL /* Safari */
+#define SCHIZO_UEAFSR_SDMA 0x0800000000000000UL /* Safari/Tomatillo */
+#define SCHIZO_UEAFSR_ERRPNDG 0x0300000000000000UL /* Safari */
+#define SCHIZO_UEAFSR_BMSK 0x000003ff00000000UL /* Safari */
+#define SCHIZO_UEAFSR_QOFF 0x00000000c0000000UL /* Safari/Tomatillo */
+#define SCHIZO_UEAFSR_AID 0x000000001f000000UL /* Safari/Tomatillo */
+#define SCHIZO_UEAFSR_PARTIAL 0x0000000000800000UL /* Safari */
+#define SCHIZO_UEAFSR_OWNEDIN 0x0000000000400000UL /* Safari */
+#define SCHIZO_UEAFSR_MTAGSYND 0x00000000000f0000UL /* Safari */
+#define SCHIZO_UEAFSR_MTAG 0x000000000000e000UL /* Safari */
+#define SCHIZO_UEAFSR_ECCSYND 0x00000000000001ffUL /* Safari */
+
+static irqreturn_t schizo_ue_intr(int irq, void *dev_id)
+{
+ struct pci_pbm_info *pbm = dev_id;
+ unsigned long afsr_reg = pbm->controller_regs + SCHIZO_UE_AFSR;
+ unsigned long afar_reg = pbm->controller_regs + SCHIZO_UE_AFAR;
+ unsigned long afsr, afar, error_bits;
+ int reported, limit;
+
+ /* Latch uncorrectable error status. */
+ afar = upa_readq(afar_reg);
+
+ /* If either of the error pending bits are set in the
+ * AFSR, the error status is being actively updated by
+ * the hardware and we must re-read to get a clean value.
+ */
+ limit = 1000;
+ do {
+ afsr = upa_readq(afsr_reg);
+ } while ((afsr & SCHIZO_UEAFSR_ERRPNDG) != 0 && --limit);
+
+ /* Clear the primary/secondary error status bits. */
+ error_bits = afsr &
+ (SCHIZO_UEAFSR_PPIO | SCHIZO_UEAFSR_PDRD | SCHIZO_UEAFSR_PDWR |
+ SCHIZO_UEAFSR_SPIO | SCHIZO_UEAFSR_SDMA);
+ if (!error_bits)
+ return IRQ_NONE;
+ upa_writeq(error_bits, afsr_reg);
+
+ /* Log the error. */
+ printk("%s: Uncorrectable Error, primary error type[%s]\n",
+ pbm->name,
+ (((error_bits & SCHIZO_UEAFSR_PPIO) ?
+ "PIO" :
+ ((error_bits & SCHIZO_UEAFSR_PDRD) ?
+ "DMA Read" :
+ ((error_bits & SCHIZO_UEAFSR_PDWR) ?
+ "DMA Write" : "???")))));
+ printk("%s: bytemask[%04lx] qword_offset[%lx] SAFARI_AID[%02lx]\n",
+ pbm->name,
+ (afsr & SCHIZO_UEAFSR_BMSK) >> 32UL,
+ (afsr & SCHIZO_UEAFSR_QOFF) >> 30UL,
+ (afsr & SCHIZO_UEAFSR_AID) >> 24UL);
+ printk("%s: partial[%d] owned_in[%d] mtag[%lx] mtag_synd[%lx] ecc_sync[%lx]\n",
+ pbm->name,
+ (afsr & SCHIZO_UEAFSR_PARTIAL) ? 1 : 0,
+ (afsr & SCHIZO_UEAFSR_OWNEDIN) ? 1 : 0,
+ (afsr & SCHIZO_UEAFSR_MTAG) >> 13UL,
+ (afsr & SCHIZO_UEAFSR_MTAGSYND) >> 16UL,
+ (afsr & SCHIZO_UEAFSR_ECCSYND) >> 0UL);
+ printk("%s: UE AFAR [%016lx]\n", pbm->name, afar);
+ printk("%s: UE Secondary errors [", pbm->name);
+ reported = 0;
+ if (afsr & SCHIZO_UEAFSR_SPIO) {
+ reported++;
+ printk("(PIO)");
+ }
+ if (afsr & SCHIZO_UEAFSR_SDMA) {
+ reported++;
+ printk("(DMA)");
+ }
+ if (!reported)
+ printk("(none)");
+ printk("]\n");
+
+ /* Interrogate IOMMU for error status. */
+ schizo_check_iommu_error(pbm, UE_ERR);
+
+ return IRQ_HANDLED;
+}
+
+#define SCHIZO_CE_AFSR 0x10040UL
+#define SCHIZO_CE_AFAR 0x10048UL
+
+#define SCHIZO_CEAFSR_PPIO 0x8000000000000000UL
+#define SCHIZO_CEAFSR_PDRD 0x4000000000000000UL
+#define SCHIZO_CEAFSR_PDWR 0x2000000000000000UL
+#define SCHIZO_CEAFSR_SPIO 0x1000000000000000UL
+#define SCHIZO_CEAFSR_SDMA 0x0800000000000000UL
+#define SCHIZO_CEAFSR_ERRPNDG 0x0300000000000000UL
+#define SCHIZO_CEAFSR_BMSK 0x000003ff00000000UL
+#define SCHIZO_CEAFSR_QOFF 0x00000000c0000000UL
+#define SCHIZO_CEAFSR_AID 0x000000001f000000UL
+#define SCHIZO_CEAFSR_PARTIAL 0x0000000000800000UL
+#define SCHIZO_CEAFSR_OWNEDIN 0x0000000000400000UL
+#define SCHIZO_CEAFSR_MTAGSYND 0x00000000000f0000UL
+#define SCHIZO_CEAFSR_MTAG 0x000000000000e000UL
+#define SCHIZO_CEAFSR_ECCSYND 0x00000000000001ffUL
+
+static irqreturn_t schizo_ce_intr(int irq, void *dev_id)
+{
+ struct pci_pbm_info *pbm = dev_id;
+ unsigned long afsr_reg = pbm->controller_regs + SCHIZO_CE_AFSR;
+ unsigned long afar_reg = pbm->controller_regs + SCHIZO_CE_AFAR;
+ unsigned long afsr, afar, error_bits;
+ int reported, limit;
+
+ /* Latch error status. */
+ afar = upa_readq(afar_reg);
+
+ /* If either of the error pending bits are set in the
+ * AFSR, the error status is being actively updated by
+ * the hardware and we must re-read to get a clean value.
+ */
+ limit = 1000;
+ do {
+ afsr = upa_readq(afsr_reg);
+ } while ((afsr & SCHIZO_UEAFSR_ERRPNDG) != 0 && --limit);
+
+ /* Clear primary/secondary error status bits. */
+ error_bits = afsr &
+ (SCHIZO_CEAFSR_PPIO | SCHIZO_CEAFSR_PDRD | SCHIZO_CEAFSR_PDWR |
+ SCHIZO_CEAFSR_SPIO | SCHIZO_CEAFSR_SDMA);
+ if (!error_bits)
+ return IRQ_NONE;
+ upa_writeq(error_bits, afsr_reg);
+
+ /* Log the error. */
+ printk("%s: Correctable Error, primary error type[%s]\n",
+ pbm->name,
+ (((error_bits & SCHIZO_CEAFSR_PPIO) ?
+ "PIO" :
+ ((error_bits & SCHIZO_CEAFSR_PDRD) ?
+ "DMA Read" :
+ ((error_bits & SCHIZO_CEAFSR_PDWR) ?
+ "DMA Write" : "???")))));
+
+ /* XXX Use syndrome and afar to print out module string just like
+ * XXX UDB CE trap handler does... -DaveM
+ */
+ printk("%s: bytemask[%04lx] qword_offset[%lx] SAFARI_AID[%02lx]\n",
+ pbm->name,
+ (afsr & SCHIZO_UEAFSR_BMSK) >> 32UL,
+ (afsr & SCHIZO_UEAFSR_QOFF) >> 30UL,
+ (afsr & SCHIZO_UEAFSR_AID) >> 24UL);
+ printk("%s: partial[%d] owned_in[%d] mtag[%lx] mtag_synd[%lx] ecc_sync[%lx]\n",
+ pbm->name,
+ (afsr & SCHIZO_UEAFSR_PARTIAL) ? 1 : 0,
+ (afsr & SCHIZO_UEAFSR_OWNEDIN) ? 1 : 0,
+ (afsr & SCHIZO_UEAFSR_MTAG) >> 13UL,
+ (afsr & SCHIZO_UEAFSR_MTAGSYND) >> 16UL,
+ (afsr & SCHIZO_UEAFSR_ECCSYND) >> 0UL);
+ printk("%s: CE AFAR [%016lx]\n", pbm->name, afar);
+ printk("%s: CE Secondary errors [", pbm->name);
+ reported = 0;
+ if (afsr & SCHIZO_CEAFSR_SPIO) {
+ reported++;
+ printk("(PIO)");
+ }
+ if (afsr & SCHIZO_CEAFSR_SDMA) {
+ reported++;
+ printk("(DMA)");
+ }
+ if (!reported)
+ printk("(none)");
+ printk("]\n");
+
+ return IRQ_HANDLED;
+}
+
+#define SCHIZO_PCI_AFSR 0x2010UL
+#define SCHIZO_PCI_AFAR 0x2018UL
+
+#define SCHIZO_PCIAFSR_PMA 0x8000000000000000UL /* Schizo/Tomatillo */
+#define SCHIZO_PCIAFSR_PTA 0x4000000000000000UL /* Schizo/Tomatillo */
+#define SCHIZO_PCIAFSR_PRTRY 0x2000000000000000UL /* Schizo/Tomatillo */
+#define SCHIZO_PCIAFSR_PPERR 0x1000000000000000UL /* Schizo/Tomatillo */
+#define SCHIZO_PCIAFSR_PTTO 0x0800000000000000UL /* Schizo/Tomatillo */
+#define SCHIZO_PCIAFSR_PUNUS 0x0400000000000000UL /* Schizo */
+#define SCHIZO_PCIAFSR_SMA 0x0200000000000000UL /* Schizo/Tomatillo */
+#define SCHIZO_PCIAFSR_STA 0x0100000000000000UL /* Schizo/Tomatillo */
+#define SCHIZO_PCIAFSR_SRTRY 0x0080000000000000UL /* Schizo/Tomatillo */
+#define SCHIZO_PCIAFSR_SPERR 0x0040000000000000UL /* Schizo/Tomatillo */
+#define SCHIZO_PCIAFSR_STTO 0x0020000000000000UL /* Schizo/Tomatillo */
+#define SCHIZO_PCIAFSR_SUNUS 0x0010000000000000UL /* Schizo */
+#define SCHIZO_PCIAFSR_BMSK 0x000003ff00000000UL /* Schizo/Tomatillo */
+#define SCHIZO_PCIAFSR_BLK 0x0000000080000000UL /* Schizo/Tomatillo */
+#define SCHIZO_PCIAFSR_CFG 0x0000000040000000UL /* Schizo/Tomatillo */
+#define SCHIZO_PCIAFSR_MEM 0x0000000020000000UL /* Schizo/Tomatillo */
+#define SCHIZO_PCIAFSR_IO 0x0000000010000000UL /* Schizo/Tomatillo */
+
+#define SCHIZO_PCI_CTRL (0x2000UL)
+#define SCHIZO_PCICTRL_BUS_UNUS (1UL << 63UL) /* Safari */
+#define SCHIZO_PCICTRL_DTO_INT (1UL << 61UL) /* Tomatillo */
+#define SCHIZO_PCICTRL_ARB_PRIO (0x1ff << 52UL) /* Tomatillo */
+#define SCHIZO_PCICTRL_ESLCK (1UL << 51UL) /* Safari */
+#define SCHIZO_PCICTRL_ERRSLOT (7UL << 48UL) /* Safari */
+#define SCHIZO_PCICTRL_TTO_ERR (1UL << 38UL) /* Safari/Tomatillo */
+#define SCHIZO_PCICTRL_RTRY_ERR (1UL << 37UL) /* Safari/Tomatillo */
+#define SCHIZO_PCICTRL_DTO_ERR (1UL << 36UL) /* Safari/Tomatillo */
+#define SCHIZO_PCICTRL_SBH_ERR (1UL << 35UL) /* Safari */
+#define SCHIZO_PCICTRL_SERR (1UL << 34UL) /* Safari/Tomatillo */
+#define SCHIZO_PCICTRL_PCISPD (1UL << 33UL) /* Safari */
+#define SCHIZO_PCICTRL_MRM_PREF (1UL << 30UL) /* Tomatillo */
+#define SCHIZO_PCICTRL_RDO_PREF (1UL << 29UL) /* Tomatillo */
+#define SCHIZO_PCICTRL_RDL_PREF (1UL << 28UL) /* Tomatillo */
+#define SCHIZO_PCICTRL_PTO (3UL << 24UL) /* Safari/Tomatillo */
+#define SCHIZO_PCICTRL_PTO_SHIFT 24UL
+#define SCHIZO_PCICTRL_TRWSW (7UL << 21UL) /* Tomatillo */
+#define SCHIZO_PCICTRL_F_TGT_A (1UL << 20UL) /* Tomatillo */
+#define SCHIZO_PCICTRL_S_DTO_INT (1UL << 19UL) /* Safari */
+#define SCHIZO_PCICTRL_F_TGT_RT (1UL << 19UL) /* Tomatillo */
+#define SCHIZO_PCICTRL_SBH_INT (1UL << 18UL) /* Safari */
+#define SCHIZO_PCICTRL_T_DTO_INT (1UL << 18UL) /* Tomatillo */
+#define SCHIZO_PCICTRL_EEN (1UL << 17UL) /* Safari/Tomatillo */
+#define SCHIZO_PCICTRL_PARK (1UL << 16UL) /* Safari/Tomatillo */
+#define SCHIZO_PCICTRL_PCIRST (1UL << 8UL) /* Safari */
+#define SCHIZO_PCICTRL_ARB_S (0x3fUL << 0UL) /* Safari */
+#define SCHIZO_PCICTRL_ARB_T (0xffUL << 0UL) /* Tomatillo */
+
+static irqreturn_t schizo_pcierr_intr_other(struct pci_pbm_info *pbm)
+{
+ unsigned long csr_reg, csr, csr_error_bits;
+ irqreturn_t ret = IRQ_NONE;
+ u16 stat;
+
+ csr_reg = pbm->pbm_regs + SCHIZO_PCI_CTRL;
+ csr = upa_readq(csr_reg);
+ csr_error_bits =
+ csr & (SCHIZO_PCICTRL_BUS_UNUS |
+ SCHIZO_PCICTRL_TTO_ERR |
+ SCHIZO_PCICTRL_RTRY_ERR |
+ SCHIZO_PCICTRL_DTO_ERR |
+ SCHIZO_PCICTRL_SBH_ERR |
+ SCHIZO_PCICTRL_SERR);
+ if (csr_error_bits) {
+ /* Clear the errors. */
+ upa_writeq(csr, csr_reg);
+
+ /* Log 'em. */
+ if (csr_error_bits & SCHIZO_PCICTRL_BUS_UNUS)
+ printk("%s: Bus unusable error asserted.\n",
+ pbm->name);
+ if (csr_error_bits & SCHIZO_PCICTRL_TTO_ERR)
+ printk("%s: PCI TRDY# timeout error asserted.\n",
+ pbm->name);
+ if (csr_error_bits & SCHIZO_PCICTRL_RTRY_ERR)
+ printk("%s: PCI excessive retry error asserted.\n",
+ pbm->name);
+ if (csr_error_bits & SCHIZO_PCICTRL_DTO_ERR)
+ printk("%s: PCI discard timeout error asserted.\n",
+ pbm->name);
+ if (csr_error_bits & SCHIZO_PCICTRL_SBH_ERR)
+ printk("%s: PCI streaming byte hole error asserted.\n",
+ pbm->name);
+ if (csr_error_bits & SCHIZO_PCICTRL_SERR)
+ printk("%s: PCI SERR signal asserted.\n",
+ pbm->name);
+ ret = IRQ_HANDLED;
+ }
+ pci_read_config_word(pbm->pci_bus->self, PCI_STATUS, &stat);
+ if (stat & (PCI_STATUS_PARITY |
+ PCI_STATUS_SIG_TARGET_ABORT |
+ PCI_STATUS_REC_TARGET_ABORT |
+ PCI_STATUS_REC_MASTER_ABORT |
+ PCI_STATUS_SIG_SYSTEM_ERROR)) {
+ printk("%s: PCI bus error, PCI_STATUS[%04x]\n",
+ pbm->name, stat);
+ pci_write_config_word(pbm->pci_bus->self, PCI_STATUS, 0xffff);
+ ret = IRQ_HANDLED;
+ }
+ return ret;
+}
+
+static irqreturn_t schizo_pcierr_intr(int irq, void *dev_id)
+{
+ struct pci_pbm_info *pbm = dev_id;
+ unsigned long afsr_reg, afar_reg, base;
+ unsigned long afsr, afar, error_bits;
+ int reported;
+
+ base = pbm->pbm_regs;
+
+ afsr_reg = base + SCHIZO_PCI_AFSR;
+ afar_reg = base + SCHIZO_PCI_AFAR;
+
+ /* Latch error status. */
+ afar = upa_readq(afar_reg);
+ afsr = upa_readq(afsr_reg);
+
+ /* Clear primary/secondary error status bits. */
+ error_bits = afsr &
+ (SCHIZO_PCIAFSR_PMA | SCHIZO_PCIAFSR_PTA |
+ SCHIZO_PCIAFSR_PRTRY | SCHIZO_PCIAFSR_PPERR |
+ SCHIZO_PCIAFSR_PTTO | SCHIZO_PCIAFSR_PUNUS |
+ SCHIZO_PCIAFSR_SMA | SCHIZO_PCIAFSR_STA |
+ SCHIZO_PCIAFSR_SRTRY | SCHIZO_PCIAFSR_SPERR |
+ SCHIZO_PCIAFSR_STTO | SCHIZO_PCIAFSR_SUNUS);
+ if (!error_bits)
+ return schizo_pcierr_intr_other(pbm);
+ upa_writeq(error_bits, afsr_reg);
+
+ /* Log the error. */
+ printk("%s: PCI Error, primary error type[%s]\n",
+ pbm->name,
+ (((error_bits & SCHIZO_PCIAFSR_PMA) ?
+ "Master Abort" :
+ ((error_bits & SCHIZO_PCIAFSR_PTA) ?
+ "Target Abort" :
+ ((error_bits & SCHIZO_PCIAFSR_PRTRY) ?
+ "Excessive Retries" :
+ ((error_bits & SCHIZO_PCIAFSR_PPERR) ?
+ "Parity Error" :
+ ((error_bits & SCHIZO_PCIAFSR_PTTO) ?
+ "Timeout" :
+ ((error_bits & SCHIZO_PCIAFSR_PUNUS) ?
+ "Bus Unusable" : "???"))))))));
+ printk("%s: bytemask[%04lx] was_block(%d) space(%s)\n",
+ pbm->name,
+ (afsr & SCHIZO_PCIAFSR_BMSK) >> 32UL,
+ (afsr & SCHIZO_PCIAFSR_BLK) ? 1 : 0,
+ ((afsr & SCHIZO_PCIAFSR_CFG) ?
+ "Config" :
+ ((afsr & SCHIZO_PCIAFSR_MEM) ?
+ "Memory" :
+ ((afsr & SCHIZO_PCIAFSR_IO) ?
+ "I/O" : "???"))));
+ printk("%s: PCI AFAR [%016lx]\n",
+ pbm->name, afar);
+ printk("%s: PCI Secondary errors [",
+ pbm->name);
+ reported = 0;
+ if (afsr & SCHIZO_PCIAFSR_SMA) {
+ reported++;
+ printk("(Master Abort)");
+ }
+ if (afsr & SCHIZO_PCIAFSR_STA) {
+ reported++;
+ printk("(Target Abort)");
+ }
+ if (afsr & SCHIZO_PCIAFSR_SRTRY) {
+ reported++;
+ printk("(Excessive Retries)");
+ }
+ if (afsr & SCHIZO_PCIAFSR_SPERR) {
+ reported++;
+ printk("(Parity Error)");
+ }
+ if (afsr & SCHIZO_PCIAFSR_STTO) {
+ reported++;
+ printk("(Timeout)");
+ }
+ if (afsr & SCHIZO_PCIAFSR_SUNUS) {
+ reported++;
+ printk("(Bus Unusable)");
+ }
+ if (!reported)
+ printk("(none)");
+ printk("]\n");
+
+ /* For the error types shown, scan PBM's PCI bus for devices
+ * which have logged that error type.
+ */
+
+ /* If we see a Target Abort, this could be the result of an
+ * IOMMU translation error of some sort. It is extremely
+ * useful to log this information as usually it indicates
+ * a bug in the IOMMU support code or a PCI device driver.
+ */
+ if (error_bits & (SCHIZO_PCIAFSR_PTA | SCHIZO_PCIAFSR_STA)) {
+ schizo_check_iommu_error(pbm, PCI_ERR);
+ pci_scan_for_target_abort(pbm, pbm->pci_bus);
+ }
+ if (error_bits & (SCHIZO_PCIAFSR_PMA | SCHIZO_PCIAFSR_SMA))
+ pci_scan_for_master_abort(pbm, pbm->pci_bus);
+
+ /* For excessive retries, PSYCHO/PBM will abort the device
+ * and there is no way to specifically check for excessive
+ * retries in the config space status registers. So what
+ * we hope is that we'll catch it via the master/target
+ * abort events.
+ */
+
+ if (error_bits & (SCHIZO_PCIAFSR_PPERR | SCHIZO_PCIAFSR_SPERR))
+ pci_scan_for_parity_error(pbm, pbm->pci_bus);
+
+ return IRQ_HANDLED;
+}
+
+#define SCHIZO_SAFARI_ERRLOG 0x10018UL
+
+#define SAFARI_ERRLOG_ERROUT 0x8000000000000000UL
+
+#define BUS_ERROR_BADCMD 0x4000000000000000UL /* Schizo/Tomatillo */
+#define BUS_ERROR_SSMDIS 0x2000000000000000UL /* Safari */
+#define BUS_ERROR_BADMA 0x1000000000000000UL /* Safari */
+#define BUS_ERROR_BADMB 0x0800000000000000UL /* Safari */
+#define BUS_ERROR_BADMC 0x0400000000000000UL /* Safari */
+#define BUS_ERROR_SNOOP_GR 0x0000000000200000UL /* Tomatillo */
+#define BUS_ERROR_SNOOP_PCI 0x0000000000100000UL /* Tomatillo */
+#define BUS_ERROR_SNOOP_RD 0x0000000000080000UL /* Tomatillo */
+#define BUS_ERROR_SNOOP_RDS 0x0000000000020000UL /* Tomatillo */
+#define BUS_ERROR_SNOOP_RDSA 0x0000000000010000UL /* Tomatillo */
+#define BUS_ERROR_SNOOP_OWN 0x0000000000008000UL /* Tomatillo */
+#define BUS_ERROR_SNOOP_RDO 0x0000000000004000UL /* Tomatillo */
+#define BUS_ERROR_CPU1PS 0x0000000000002000UL /* Safari */
+#define BUS_ERROR_WDATA_PERR 0x0000000000002000UL /* Tomatillo */
+#define BUS_ERROR_CPU1PB 0x0000000000001000UL /* Safari */
+#define BUS_ERROR_CTRL_PERR 0x0000000000001000UL /* Tomatillo */
+#define BUS_ERROR_CPU0PS 0x0000000000000800UL /* Safari */
+#define BUS_ERROR_SNOOP_ERR 0x0000000000000800UL /* Tomatillo */
+#define BUS_ERROR_CPU0PB 0x0000000000000400UL /* Safari */
+#define BUS_ERROR_JBUS_ILL_B 0x0000000000000400UL /* Tomatillo */
+#define BUS_ERROR_CIQTO 0x0000000000000200UL /* Safari */
+#define BUS_ERROR_LPQTO 0x0000000000000100UL /* Safari */
+#define BUS_ERROR_JBUS_ILL_C 0x0000000000000100UL /* Tomatillo */
+#define BUS_ERROR_SFPQTO 0x0000000000000080UL /* Safari */
+#define BUS_ERROR_UFPQTO 0x0000000000000040UL /* Safari */
+#define BUS_ERROR_RD_PERR 0x0000000000000040UL /* Tomatillo */
+#define BUS_ERROR_APERR 0x0000000000000020UL /* Safari/Tomatillo */
+#define BUS_ERROR_UNMAP 0x0000000000000010UL /* Safari/Tomatillo */
+#define BUS_ERROR_BUSERR 0x0000000000000004UL /* Safari/Tomatillo */
+#define BUS_ERROR_TIMEOUT 0x0000000000000002UL /* Safari/Tomatillo */
+#define BUS_ERROR_ILL 0x0000000000000001UL /* Safari */
+
+/* We only expect UNMAP errors here. The rest of the Safari errors
+ * are marked fatal and thus cause a system reset.
+ */
+static irqreturn_t schizo_safarierr_intr(int irq, void *dev_id)
+{
+ struct pci_pbm_info *pbm = dev_id;
+ u64 errlog;
+
+ errlog = upa_readq(pbm->controller_regs + SCHIZO_SAFARI_ERRLOG);
+ upa_writeq(errlog & ~(SAFARI_ERRLOG_ERROUT),
+ pbm->controller_regs + SCHIZO_SAFARI_ERRLOG);
+
+ if (!(errlog & BUS_ERROR_UNMAP)) {
+ printk("%s: Unexpected Safari/JBUS error interrupt, errlog[%016lx]\n",
+ pbm->name, errlog);
+
+ return IRQ_HANDLED;
+ }
+
+ printk("%s: Safari/JBUS interrupt, UNMAPPED error, interrogating IOMMUs.\n",
+ pbm->name);
+ schizo_check_iommu_error(pbm, SAFARI_ERR);
+
+ return IRQ_HANDLED;
+}
+
+/* Nearly identical to PSYCHO equivalents... */
+#define SCHIZO_ECC_CTRL 0x10020UL
+#define SCHIZO_ECCCTRL_EE 0x8000000000000000UL /* Enable ECC Checking */
+#define SCHIZO_ECCCTRL_UE 0x4000000000000000UL /* Enable UE Interrupts */
+#define SCHIZO_ECCCTRL_CE 0x2000000000000000UL /* Enable CE INterrupts */
+
+#define SCHIZO_SAFARI_ERRCTRL 0x10008UL
+#define SCHIZO_SAFERRCTRL_EN 0x8000000000000000UL
+#define SCHIZO_SAFARI_IRQCTRL 0x10010UL
+#define SCHIZO_SAFIRQCTRL_EN 0x8000000000000000UL
+
+static int pbm_routes_this_ino(struct pci_pbm_info *pbm, u32 ino)
+{
+ ino &= IMAP_INO;
+
+ if (pbm->ino_bitmap & (1UL << ino))
+ return 1;
+
+ return 0;
+}
+
+/* How the Tomatillo IRQs are routed around is pure guesswork here.
+ *
+ * All the Tomatillo devices I see in prtconf dumps seem to have only
+ * a single PCI bus unit attached to it. It would seem they are separate
+ * devices because their PortID (ie. JBUS ID) values are all different
+ * and thus the registers are mapped to totally different locations.
+ *
+ * However, two Tomatillo's look "similar" in that the only difference
+ * in their PortID is the lowest bit.
+ *
+ * So if we were to ignore this lower bit, it certainly looks like two
+ * PCI bus units of the same Tomatillo. I still have not really
+ * figured this out...
+ */
+static void tomatillo_register_error_handlers(struct pci_pbm_info *pbm)
+{
+ struct of_device *op = of_find_device_by_node(pbm->op->node);
+ u64 tmp, err_mask, err_no_mask;
+ int err;
+
+ /* Tomatillo IRQ property layout is:
+ * 0: PCIERR
+ * 1: UE ERR
+ * 2: CE ERR
+ * 3: SERR
+ * 4: POWER FAIL?
+ */
+
+ if (pbm_routes_this_ino(pbm, SCHIZO_UE_INO)) {
+ err = request_irq(op->irqs[1], schizo_ue_intr, 0,
+ "TOMATILLO_UE", pbm);
+ if (err)
+ printk(KERN_WARNING "%s: Could not register UE, "
+ "err=%d\n", pbm->name, err);
+ }
+ if (pbm_routes_this_ino(pbm, SCHIZO_CE_INO)) {
+ err = request_irq(op->irqs[2], schizo_ce_intr, 0,
+ "TOMATILLO_CE", pbm);
+ if (err)
+ printk(KERN_WARNING "%s: Could not register CE, "
+ "err=%d\n", pbm->name, err);
+ }
+ err = 0;
+ if (pbm_routes_this_ino(pbm, SCHIZO_PCIERR_A_INO)) {
+ err = request_irq(op->irqs[0], schizo_pcierr_intr, 0,
+ "TOMATILLO_PCIERR", pbm);
+ } else if (pbm_routes_this_ino(pbm, SCHIZO_PCIERR_B_INO)) {
+ err = request_irq(op->irqs[0], schizo_pcierr_intr, 0,
+ "TOMATILLO_PCIERR", pbm);
+ }
+ if (err)
+ printk(KERN_WARNING "%s: Could not register PCIERR, "
+ "err=%d\n", pbm->name, err);
+
+ if (pbm_routes_this_ino(pbm, SCHIZO_SERR_INO)) {
+ err = request_irq(op->irqs[3], schizo_safarierr_intr, 0,
+ "TOMATILLO_SERR", pbm);
+ if (err)
+ printk(KERN_WARNING "%s: Could not register SERR, "
+ "err=%d\n", pbm->name, err);
+ }
+
+ /* Enable UE and CE interrupts for controller. */
+ upa_writeq((SCHIZO_ECCCTRL_EE |
+ SCHIZO_ECCCTRL_UE |
+ SCHIZO_ECCCTRL_CE), pbm->controller_regs + SCHIZO_ECC_CTRL);
+
+ /* Enable PCI Error interrupts and clear error
+ * bits.
+ */
+ err_mask = (SCHIZO_PCICTRL_BUS_UNUS |
+ SCHIZO_PCICTRL_TTO_ERR |
+ SCHIZO_PCICTRL_RTRY_ERR |
+ SCHIZO_PCICTRL_SERR |
+ SCHIZO_PCICTRL_EEN);
+
+ err_no_mask = SCHIZO_PCICTRL_DTO_ERR;
+
+ tmp = upa_readq(pbm->pbm_regs + SCHIZO_PCI_CTRL);
+ tmp |= err_mask;
+ tmp &= ~err_no_mask;
+ upa_writeq(tmp, pbm->pbm_regs + SCHIZO_PCI_CTRL);
+
+ err_mask = (SCHIZO_PCIAFSR_PMA | SCHIZO_PCIAFSR_PTA |
+ SCHIZO_PCIAFSR_PRTRY | SCHIZO_PCIAFSR_PPERR |
+ SCHIZO_PCIAFSR_PTTO |
+ SCHIZO_PCIAFSR_SMA | SCHIZO_PCIAFSR_STA |
+ SCHIZO_PCIAFSR_SRTRY | SCHIZO_PCIAFSR_SPERR |
+ SCHIZO_PCIAFSR_STTO);
+
+ upa_writeq(err_mask, pbm->pbm_regs + SCHIZO_PCI_AFSR);
+
+ err_mask = (BUS_ERROR_BADCMD | BUS_ERROR_SNOOP_GR |
+ BUS_ERROR_SNOOP_PCI | BUS_ERROR_SNOOP_RD |
+ BUS_ERROR_SNOOP_RDS | BUS_ERROR_SNOOP_RDSA |
+ BUS_ERROR_SNOOP_OWN | BUS_ERROR_SNOOP_RDO |
+ BUS_ERROR_WDATA_PERR | BUS_ERROR_CTRL_PERR |
+ BUS_ERROR_SNOOP_ERR | BUS_ERROR_JBUS_ILL_B |
+ BUS_ERROR_JBUS_ILL_C | BUS_ERROR_RD_PERR |
+ BUS_ERROR_APERR | BUS_ERROR_UNMAP |
+ BUS_ERROR_BUSERR | BUS_ERROR_TIMEOUT);
+
+ upa_writeq((SCHIZO_SAFERRCTRL_EN | err_mask),
+ pbm->controller_regs + SCHIZO_SAFARI_ERRCTRL);
+
+ upa_writeq((SCHIZO_SAFIRQCTRL_EN | (BUS_ERROR_UNMAP)),
+ pbm->controller_regs + SCHIZO_SAFARI_IRQCTRL);
+}
+
+static void schizo_register_error_handlers(struct pci_pbm_info *pbm)
+{
+ struct of_device *op = of_find_device_by_node(pbm->op->node);
+ u64 tmp, err_mask, err_no_mask;
+ int err;
+
+ /* Schizo IRQ property layout is:
+ * 0: PCIERR
+ * 1: UE ERR
+ * 2: CE ERR
+ * 3: SERR
+ * 4: POWER FAIL?
+ */
+
+ if (pbm_routes_this_ino(pbm, SCHIZO_UE_INO)) {
+ err = request_irq(op->irqs[1], schizo_ue_intr, 0,
+ "SCHIZO_UE", pbm);
+ if (err)
+ printk(KERN_WARNING "%s: Could not register UE, "
+ "err=%d\n", pbm->name, err);
+ }
+ if (pbm_routes_this_ino(pbm, SCHIZO_CE_INO)) {
+ err = request_irq(op->irqs[2], schizo_ce_intr, 0,
+ "SCHIZO_CE", pbm);
+ if (err)
+ printk(KERN_WARNING "%s: Could not register CE, "
+ "err=%d\n", pbm->name, err);
+ }
+ err = 0;
+ if (pbm_routes_this_ino(pbm, SCHIZO_PCIERR_A_INO)) {
+ err = request_irq(op->irqs[0], schizo_pcierr_intr, 0,
+ "SCHIZO_PCIERR", pbm);
+ } else if (pbm_routes_this_ino(pbm, SCHIZO_PCIERR_B_INO)) {
+ err = request_irq(op->irqs[0], schizo_pcierr_intr, 0,
+ "SCHIZO_PCIERR", pbm);
+ }
+ if (err)
+ printk(KERN_WARNING "%s: Could not register PCIERR, "
+ "err=%d\n", pbm->name, err);
+
+ if (pbm_routes_this_ino(pbm, SCHIZO_SERR_INO)) {
+ err = request_irq(op->irqs[3], schizo_safarierr_intr, 0,
+ "SCHIZO_SERR", pbm);
+ if (err)
+ printk(KERN_WARNING "%s: Could not register SERR, "
+ "err=%d\n", pbm->name, err);
+ }
+
+ /* Enable UE and CE interrupts for controller. */
+ upa_writeq((SCHIZO_ECCCTRL_EE |
+ SCHIZO_ECCCTRL_UE |
+ SCHIZO_ECCCTRL_CE), pbm->controller_regs + SCHIZO_ECC_CTRL);
+
+ err_mask = (SCHIZO_PCICTRL_BUS_UNUS |
+ SCHIZO_PCICTRL_ESLCK |
+ SCHIZO_PCICTRL_TTO_ERR |
+ SCHIZO_PCICTRL_RTRY_ERR |
+ SCHIZO_PCICTRL_SBH_ERR |
+ SCHIZO_PCICTRL_SERR |
+ SCHIZO_PCICTRL_EEN);
+
+ err_no_mask = (SCHIZO_PCICTRL_DTO_ERR |
+ SCHIZO_PCICTRL_SBH_INT);
+
+ /* Enable PCI Error interrupts and clear error
+ * bits for each PBM.
+ */
+ tmp = upa_readq(pbm->pbm_regs + SCHIZO_PCI_CTRL);
+ tmp |= err_mask;
+ tmp &= ~err_no_mask;
+ upa_writeq(tmp, pbm->pbm_regs + SCHIZO_PCI_CTRL);
+
+ upa_writeq((SCHIZO_PCIAFSR_PMA | SCHIZO_PCIAFSR_PTA |
+ SCHIZO_PCIAFSR_PRTRY | SCHIZO_PCIAFSR_PPERR |
+ SCHIZO_PCIAFSR_PTTO | SCHIZO_PCIAFSR_PUNUS |
+ SCHIZO_PCIAFSR_SMA | SCHIZO_PCIAFSR_STA |
+ SCHIZO_PCIAFSR_SRTRY | SCHIZO_PCIAFSR_SPERR |
+ SCHIZO_PCIAFSR_STTO | SCHIZO_PCIAFSR_SUNUS),
+ pbm->pbm_regs + SCHIZO_PCI_AFSR);
+
+ /* Make all Safari error conditions fatal except unmapped
+ * errors which we make generate interrupts.
+ */
+ err_mask = (BUS_ERROR_BADCMD | BUS_ERROR_SSMDIS |
+ BUS_ERROR_BADMA | BUS_ERROR_BADMB |
+ BUS_ERROR_BADMC |
+ BUS_ERROR_CPU1PS | BUS_ERROR_CPU1PB |
+ BUS_ERROR_CPU0PS | BUS_ERROR_CPU0PB |
+ BUS_ERROR_CIQTO |
+ BUS_ERROR_LPQTO | BUS_ERROR_SFPQTO |
+ BUS_ERROR_UFPQTO | BUS_ERROR_APERR |
+ BUS_ERROR_BUSERR | BUS_ERROR_TIMEOUT |
+ BUS_ERROR_ILL);
+#if 1
+ /* XXX Something wrong with some Excalibur systems
+ * XXX Sun is shipping. The behavior on a 2-cpu
+ * XXX machine is that both CPU1 parity error bits
+ * XXX are set and are immediately set again when
+ * XXX their error status bits are cleared. Just
+ * XXX ignore them for now. -DaveM
+ */
+ err_mask &= ~(BUS_ERROR_CPU1PS | BUS_ERROR_CPU1PB |
+ BUS_ERROR_CPU0PS | BUS_ERROR_CPU0PB);
+#endif
+
+ upa_writeq((SCHIZO_SAFERRCTRL_EN | err_mask),
+ pbm->controller_regs + SCHIZO_SAFARI_ERRCTRL);
+}
+
+static void pbm_config_busmastering(struct pci_pbm_info *pbm)
+{
+ u8 *addr;
+
+ /* Set cache-line size to 64 bytes, this is actually
+ * a nop but I do it for completeness.
+ */
+ addr = schizo_pci_config_mkaddr(pbm, pbm->pci_first_busno,
+ 0, PCI_CACHE_LINE_SIZE);
+ pci_config_write8(addr, 64 / sizeof(u32));
+
+ /* Set PBM latency timer to 64 PCI clocks. */
+ addr = schizo_pci_config_mkaddr(pbm, pbm->pci_first_busno,
+ 0, PCI_LATENCY_TIMER);
+ pci_config_write8(addr, 64);
+}
+
+static void __devinit schizo_scan_bus(struct pci_pbm_info *pbm,
+ struct device *parent)
+{
+ pbm_config_busmastering(pbm);
+ pbm->is_66mhz_capable =
+ (of_find_property(pbm->op->node, "66mhz-capable", NULL)
+ != NULL);
+
+ pbm->pci_bus = pci_scan_one_pbm(pbm, parent);
+
+ if (pbm->chip_type == PBM_CHIP_TYPE_TOMATILLO)
+ tomatillo_register_error_handlers(pbm);
+ else
+ schizo_register_error_handlers(pbm);
+}
+
+#define SCHIZO_STRBUF_CONTROL (0x02800UL)
+#define SCHIZO_STRBUF_FLUSH (0x02808UL)
+#define SCHIZO_STRBUF_FSYNC (0x02810UL)
+#define SCHIZO_STRBUF_CTXFLUSH (0x02818UL)
+#define SCHIZO_STRBUF_CTXMATCH (0x10000UL)
+
+static void schizo_pbm_strbuf_init(struct pci_pbm_info *pbm)
+{
+ unsigned long base = pbm->pbm_regs;
+ u64 control;
+
+ if (pbm->chip_type == PBM_CHIP_TYPE_TOMATILLO) {
+ /* TOMATILLO lacks streaming cache. */
+ return;
+ }
+
+ /* SCHIZO has context flushing. */
+ pbm->stc.strbuf_control = base + SCHIZO_STRBUF_CONTROL;
+ pbm->stc.strbuf_pflush = base + SCHIZO_STRBUF_FLUSH;
+ pbm->stc.strbuf_fsync = base + SCHIZO_STRBUF_FSYNC;
+ pbm->stc.strbuf_ctxflush = base + SCHIZO_STRBUF_CTXFLUSH;
+ pbm->stc.strbuf_ctxmatch_base = base + SCHIZO_STRBUF_CTXMATCH;
+
+ pbm->stc.strbuf_flushflag = (volatile unsigned long *)
+ ((((unsigned long)&pbm->stc.__flushflag_buf[0])
+ + 63UL)
+ & ~63UL);
+ pbm->stc.strbuf_flushflag_pa = (unsigned long)
+ __pa(pbm->stc.strbuf_flushflag);
+
+ /* Turn off LRU locking and diag mode, enable the
+ * streaming buffer and leave the rerun-disable
+ * setting however OBP set it.
+ */
+ control = upa_readq(pbm->stc.strbuf_control);
+ control &= ~(SCHIZO_STRBUF_CTRL_LPTR |
+ SCHIZO_STRBUF_CTRL_LENAB |
+ SCHIZO_STRBUF_CTRL_DENAB);
+ control |= SCHIZO_STRBUF_CTRL_ENAB;
+ upa_writeq(control, pbm->stc.strbuf_control);
+
+ pbm->stc.strbuf_enabled = 1;
+}
+
+#define SCHIZO_IOMMU_CONTROL (0x00200UL)
+#define SCHIZO_IOMMU_TSBBASE (0x00208UL)
+#define SCHIZO_IOMMU_FLUSH (0x00210UL)
+#define SCHIZO_IOMMU_CTXFLUSH (0x00218UL)
+
+static int schizo_pbm_iommu_init(struct pci_pbm_info *pbm)
+{
+ static const u32 vdma_default[] = { 0xc0000000, 0x40000000 };
+ unsigned long i, tagbase, database;
+ struct iommu *iommu = pbm->iommu;
+ int tsbsize, err;
+ const u32 *vdma;
+ u32 dma_mask;
+ u64 control;
+
+ vdma = of_get_property(pbm->op->node, "virtual-dma", NULL);
+ if (!vdma)
+ vdma = vdma_default;
+
+ dma_mask = vdma[0];
+ switch (vdma[1]) {
+ case 0x20000000:
+ dma_mask |= 0x1fffffff;
+ tsbsize = 64;
+ break;
+
+ case 0x40000000:
+ dma_mask |= 0x3fffffff;
+ tsbsize = 128;
+ break;
+
+ case 0x80000000:
+ dma_mask |= 0x7fffffff;
+ tsbsize = 128;
+ break;
+
+ default:
+ printk(KERN_ERR PFX "Strange virtual-dma size.\n");
+ return -EINVAL;
+ }
+
+ /* Register addresses, SCHIZO has iommu ctx flushing. */
+ iommu->iommu_control = pbm->pbm_regs + SCHIZO_IOMMU_CONTROL;
+ iommu->iommu_tsbbase = pbm->pbm_regs + SCHIZO_IOMMU_TSBBASE;
+ iommu->iommu_flush = pbm->pbm_regs + SCHIZO_IOMMU_FLUSH;
+ iommu->iommu_tags = iommu->iommu_flush + (0xa580UL - 0x0210UL);
+ iommu->iommu_ctxflush = pbm->pbm_regs + SCHIZO_IOMMU_CTXFLUSH;
+
+ /* We use the main control/status register of SCHIZO as the write
+ * completion register.
+ */
+ iommu->write_complete_reg = pbm->controller_regs + 0x10000UL;
+
+ /*
+ * Invalidate TLB Entries.
+ */
+ control = upa_readq(iommu->iommu_control);
+ control |= SCHIZO_IOMMU_CTRL_DENAB;
+ upa_writeq(control, iommu->iommu_control);
+
+ tagbase = SCHIZO_IOMMU_TAG, database = SCHIZO_IOMMU_DATA;
+
+ for (i = 0; i < 16; i++) {
+ upa_writeq(0, pbm->pbm_regs + tagbase + (i * 8UL));
+ upa_writeq(0, pbm->pbm_regs + database + (i * 8UL));
+ }
+
+ /* Leave diag mode enabled for full-flushing done
+ * in pci_iommu.c
+ */
+ err = iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask,
+ pbm->numa_node);
+ if (err) {
+ printk(KERN_ERR PFX "iommu_table_init() fails with %d\n", err);
+ return err;
+ }
+
+ upa_writeq(__pa(iommu->page_table), iommu->iommu_tsbbase);
+
+ control = upa_readq(iommu->iommu_control);
+ control &= ~(SCHIZO_IOMMU_CTRL_TSBSZ | SCHIZO_IOMMU_CTRL_TBWSZ);
+ switch (tsbsize) {
+ case 64:
+ control |= SCHIZO_IOMMU_TSBSZ_64K;
+ break;
+ case 128:
+ control |= SCHIZO_IOMMU_TSBSZ_128K;
+ break;
+ }
+
+ control |= SCHIZO_IOMMU_CTRL_ENAB;
+ upa_writeq(control, iommu->iommu_control);
+
+ return 0;
+}
+
+#define SCHIZO_PCI_IRQ_RETRY (0x1a00UL)
+#define SCHIZO_IRQ_RETRY_INF 0xffUL
+
+#define SCHIZO_PCI_DIAG (0x2020UL)
+#define SCHIZO_PCIDIAG_D_BADECC (1UL << 10UL) /* Disable BAD ECC errors (Schizo) */
+#define SCHIZO_PCIDIAG_D_BYPASS (1UL << 9UL) /* Disable MMU bypass mode (Schizo/Tomatillo) */
+#define SCHIZO_PCIDIAG_D_TTO (1UL << 8UL) /* Disable TTO errors (Schizo/Tomatillo) */
+#define SCHIZO_PCIDIAG_D_RTRYARB (1UL << 7UL) /* Disable retry arbitration (Schizo) */
+#define SCHIZO_PCIDIAG_D_RETRY (1UL << 6UL) /* Disable retry limit (Schizo/Tomatillo) */
+#define SCHIZO_PCIDIAG_D_INTSYNC (1UL << 5UL) /* Disable interrupt/DMA synch (Schizo/Tomatillo) */
+#define SCHIZO_PCIDIAG_I_DMA_PARITY (1UL << 3UL) /* Invert DMA parity (Schizo/Tomatillo) */
+#define SCHIZO_PCIDIAG_I_PIOD_PARITY (1UL << 2UL) /* Invert PIO data parity (Schizo/Tomatillo) */
+#define SCHIZO_PCIDIAG_I_PIOA_PARITY (1UL << 1UL) /* Invert PIO address parity (Schizo/Tomatillo) */
+
+#define TOMATILLO_PCI_IOC_CSR (0x2248UL)
+#define TOMATILLO_IOC_PART_WPENAB 0x0000000000080000UL
+#define TOMATILLO_IOC_RDMULT_PENAB 0x0000000000040000UL
+#define TOMATILLO_IOC_RDONE_PENAB 0x0000000000020000UL
+#define TOMATILLO_IOC_RDLINE_PENAB 0x0000000000010000UL
+#define TOMATILLO_IOC_RDMULT_PLEN 0x000000000000c000UL
+#define TOMATILLO_IOC_RDMULT_PLEN_SHIFT 14UL
+#define TOMATILLO_IOC_RDONE_PLEN 0x0000000000003000UL
+#define TOMATILLO_IOC_RDONE_PLEN_SHIFT 12UL
+#define TOMATILLO_IOC_RDLINE_PLEN 0x0000000000000c00UL
+#define TOMATILLO_IOC_RDLINE_PLEN_SHIFT 10UL
+#define TOMATILLO_IOC_PREF_OFF 0x00000000000003f8UL
+#define TOMATILLO_IOC_PREF_OFF_SHIFT 3UL
+#define TOMATILLO_IOC_RDMULT_CPENAB 0x0000000000000004UL
+#define TOMATILLO_IOC_RDONE_CPENAB 0x0000000000000002UL
+#define TOMATILLO_IOC_RDLINE_CPENAB 0x0000000000000001UL
+
+#define TOMATILLO_PCI_IOC_TDIAG (0x2250UL)
+#define TOMATILLO_PCI_IOC_DDIAG (0x2290UL)
+
+static void schizo_pbm_hw_init(struct pci_pbm_info *pbm)
+{
+ u64 tmp;
+
+ upa_writeq(5, pbm->pbm_regs + SCHIZO_PCI_IRQ_RETRY);
+
+ tmp = upa_readq(pbm->pbm_regs + SCHIZO_PCI_CTRL);
+
+ /* Enable arbiter for all PCI slots. */
+ tmp |= 0xff;
+
+ if (pbm->chip_type == PBM_CHIP_TYPE_TOMATILLO &&
+ pbm->chip_version >= 0x2)
+ tmp |= 0x3UL << SCHIZO_PCICTRL_PTO_SHIFT;
+
+ if (!of_find_property(pbm->op->node, "no-bus-parking", NULL))
+ tmp |= SCHIZO_PCICTRL_PARK;
+ else
+ tmp &= ~SCHIZO_PCICTRL_PARK;
+
+ if (pbm->chip_type == PBM_CHIP_TYPE_TOMATILLO &&
+ pbm->chip_version <= 0x1)
+ tmp |= SCHIZO_PCICTRL_DTO_INT;
+ else
+ tmp &= ~SCHIZO_PCICTRL_DTO_INT;
+
+ if (pbm->chip_type == PBM_CHIP_TYPE_TOMATILLO)
+ tmp |= (SCHIZO_PCICTRL_MRM_PREF |
+ SCHIZO_PCICTRL_RDO_PREF |
+ SCHIZO_PCICTRL_RDL_PREF);
+
+ upa_writeq(tmp, pbm->pbm_regs + SCHIZO_PCI_CTRL);
+
+ tmp = upa_readq(pbm->pbm_regs + SCHIZO_PCI_DIAG);
+ tmp &= ~(SCHIZO_PCIDIAG_D_RTRYARB |
+ SCHIZO_PCIDIAG_D_RETRY |
+ SCHIZO_PCIDIAG_D_INTSYNC);
+ upa_writeq(tmp, pbm->pbm_regs + SCHIZO_PCI_DIAG);
+
+ if (pbm->chip_type == PBM_CHIP_TYPE_TOMATILLO) {
+ /* Clear prefetch lengths to workaround a bug in
+ * Jalapeno...
+ */
+ tmp = (TOMATILLO_IOC_PART_WPENAB |
+ (1 << TOMATILLO_IOC_PREF_OFF_SHIFT) |
+ TOMATILLO_IOC_RDMULT_CPENAB |
+ TOMATILLO_IOC_RDONE_CPENAB |
+ TOMATILLO_IOC_RDLINE_CPENAB);
+
+ upa_writeq(tmp, pbm->pbm_regs + TOMATILLO_PCI_IOC_CSR);
+ }
+}
+
+static int __devinit schizo_pbm_init(struct pci_pbm_info *pbm,
+ struct of_device *op, u32 portid,
+ int chip_type)
+{
+ const struct linux_prom64_registers *regs;
+ struct device_node *dp = op->node;
+ const char *chipset_name;
+ int is_pbm_a, err;
+
+ switch (chip_type) {
+ case PBM_CHIP_TYPE_TOMATILLO:
+ chipset_name = "TOMATILLO";
+ break;
+
+ case PBM_CHIP_TYPE_SCHIZO_PLUS:
+ chipset_name = "SCHIZO+";
+ break;
+
+ case PBM_CHIP_TYPE_SCHIZO:
+ default:
+ chipset_name = "SCHIZO";
+ break;
+ };
+
+ /* For SCHIZO, three OBP regs:
+ * 1) PBM controller regs
+ * 2) Schizo front-end controller regs (same for both PBMs)
+ * 3) PBM PCI config space
+ *
+ * For TOMATILLO, four OBP regs:
+ * 1) PBM controller regs
+ * 2) Tomatillo front-end controller regs
+ * 3) PBM PCI config space
+ * 4) Ichip regs
+ */
+ regs = of_get_property(dp, "reg", NULL);
+
+ is_pbm_a = ((regs[0].phys_addr & 0x00700000) == 0x00600000);
+
+ pbm->next = pci_pbm_root;
+ pci_pbm_root = pbm;
+
+ pbm->numa_node = -1;
+
+ pbm->pci_ops = &sun4u_pci_ops;
+ pbm->config_space_reg_bits = 8;
+
+ pbm->index = pci_num_pbms++;
+
+ pbm->portid = portid;
+ pbm->op = op;
+
+ pbm->chip_type = chip_type;
+ pbm->chip_version = of_getintprop_default(dp, "version#", 0);
+ pbm->chip_revision = of_getintprop_default(dp, "module-version#", 0);
+
+ pbm->pbm_regs = regs[0].phys_addr;
+ pbm->controller_regs = regs[1].phys_addr - 0x10000UL;
+
+ if (chip_type == PBM_CHIP_TYPE_TOMATILLO)
+ pbm->sync_reg = regs[3].phys_addr + 0x1a18UL;
+
+ pbm->name = dp->full_name;
+
+ printk("%s: %s PCI Bus Module ver[%x:%x]\n",
+ pbm->name, chipset_name,
+ pbm->chip_version, pbm->chip_revision);
+
+ schizo_pbm_hw_init(pbm);
+
+ pci_determine_mem_io_space(pbm);
+
+ pci_get_pbm_props(pbm);
+
+ err = schizo_pbm_iommu_init(pbm);
+ if (err)
+ return err;
+
+ schizo_pbm_strbuf_init(pbm);
+
+ schizo_scan_bus(pbm, &op->dev);
+
+ return 0;
+}
+
+static inline int portid_compare(u32 x, u32 y, int chip_type)
+{
+ if (chip_type == PBM_CHIP_TYPE_TOMATILLO) {
+ if (x == (y ^ 1))
+ return 1;
+ return 0;
+ }
+ return (x == y);
+}
+
+static struct pci_pbm_info * __devinit schizo_find_sibling(u32 portid,
+ int chip_type)
+{
+ struct pci_pbm_info *pbm;
+
+ for (pbm = pci_pbm_root; pbm; pbm = pbm->next) {
+ if (portid_compare(pbm->portid, portid, chip_type))
+ return pbm;
+ }
+ return NULL;
+}
+
+static int __devinit __schizo_init(struct of_device *op, unsigned long chip_type)
+{
+ struct device_node *dp = op->node;
+ struct pci_pbm_info *pbm;
+ struct iommu *iommu;
+ u32 portid;
+ int err;
+
+ portid = of_getintprop_default(dp, "portid", 0xff);
+
+ err = -ENOMEM;
+ pbm = kzalloc(sizeof(*pbm), GFP_KERNEL);
+ if (!pbm) {
+ printk(KERN_ERR PFX "Cannot allocate pci_pbm_info.\n");
+ goto out_err;
+ }
+
+ pbm->sibling = schizo_find_sibling(portid, chip_type);
+
+ iommu = kzalloc(sizeof(struct iommu), GFP_KERNEL);
+ if (!iommu) {
+ printk(KERN_ERR PFX "Cannot allocate PBM A iommu.\n");
+ goto out_free_pbm;
+ }
+
+ pbm->iommu = iommu;
+
+ if (schizo_pbm_init(pbm, op, portid, chip_type))
+ goto out_free_iommu;
+
+ if (pbm->sibling)
+ pbm->sibling->sibling = pbm;
+
+ dev_set_drvdata(&op->dev, pbm);
+
+ return 0;
+
+out_free_iommu:
+ kfree(pbm->iommu);
+
+out_free_pbm:
+ kfree(pbm);
+
+out_err:
+ return err;
+}
+
+static int __devinit schizo_probe(struct of_device *op,
+ const struct of_device_id *match)
+{
+ return __schizo_init(op, (unsigned long) match->data);
+}
+
+/* The ordering of this table is very important. Some Tomatillo
+ * nodes announce that they are compatible with both pci108e,a801
+ * and pci108e,8001. So list the chips in reverse chronological
+ * order.
+ */
+static struct of_device_id __initdata schizo_match[] = {
+ {
+ .name = "pci",
+ .compatible = "pci108e,a801",
+ .data = (void *) PBM_CHIP_TYPE_TOMATILLO,
+ },
+ {
+ .name = "pci",
+ .compatible = "pci108e,8002",
+ .data = (void *) PBM_CHIP_TYPE_SCHIZO_PLUS,
+ },
+ {
+ .name = "pci",
+ .compatible = "pci108e,8001",
+ .data = (void *) PBM_CHIP_TYPE_SCHIZO,
+ },
+ {},
+};
+
+static struct of_platform_driver schizo_driver = {
+ .name = DRIVER_NAME,
+ .match_table = schizo_match,
+ .probe = schizo_probe,
+};
+
+static int __init schizo_init(void)
+{
+ return of_register_driver(&schizo_driver, &of_bus_type);
+}
+
+subsys_initcall(schizo_init);
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
new file mode 100644
index 000000000000..34a1fded3941
--- /dev/null
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -0,0 +1,1033 @@
+/* pci_sun4v.c: SUN4V specific PCI controller support.
+ *
+ * Copyright (C) 2006, 2007, 2008 David S. Miller (davem@davemloft.net)
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/percpu.h>
+#include <linux/irq.h>
+#include <linux/msi.h>
+#include <linux/log2.h>
+#include <linux/of_device.h>
+
+#include <asm/iommu.h>
+#include <asm/irq.h>
+#include <asm/hypervisor.h>
+#include <asm/prom.h>
+
+#include "pci_impl.h"
+#include "iommu_common.h"
+
+#include "pci_sun4v.h"
+
+#define DRIVER_NAME "pci_sun4v"
+#define PFX DRIVER_NAME ": "
+
+static unsigned long vpci_major = 1;
+static unsigned long vpci_minor = 1;
+
+#define PGLIST_NENTS (PAGE_SIZE / sizeof(u64))
+
+struct iommu_batch {
+ struct device *dev; /* Device mapping is for. */
+ unsigned long prot; /* IOMMU page protections */
+ unsigned long entry; /* Index into IOTSB. */
+ u64 *pglist; /* List of physical pages */
+ unsigned long npages; /* Number of pages in list. */
+};
+
+static DEFINE_PER_CPU(struct iommu_batch, iommu_batch);
+static int iommu_batch_initialized;
+
+/* Interrupts must be disabled. */
+static inline void iommu_batch_start(struct device *dev, unsigned long prot, unsigned long entry)
+{
+ struct iommu_batch *p = &__get_cpu_var(iommu_batch);
+
+ p->dev = dev;
+ p->prot = prot;
+ p->entry = entry;
+ p->npages = 0;
+}
+
+/* Interrupts must be disabled. */
+static long iommu_batch_flush(struct iommu_batch *p)
+{
+ struct pci_pbm_info *pbm = p->dev->archdata.host_controller;
+ unsigned long devhandle = pbm->devhandle;
+ unsigned long prot = p->prot;
+ unsigned long entry = p->entry;
+ u64 *pglist = p->pglist;
+ unsigned long npages = p->npages;
+
+ while (npages != 0) {
+ long num;
+
+ num = pci_sun4v_iommu_map(devhandle, HV_PCI_TSBID(0, entry),
+ npages, prot, __pa(pglist));
+ if (unlikely(num < 0)) {
+ if (printk_ratelimit())
+ printk("iommu_batch_flush: IOMMU map of "
+ "[%08lx:%08lx:%lx:%lx:%lx] failed with "
+ "status %ld\n",
+ devhandle, HV_PCI_TSBID(0, entry),
+ npages, prot, __pa(pglist), num);
+ return -1;
+ }
+
+ entry += num;
+ npages -= num;
+ pglist += num;
+ }
+
+ p->entry = entry;
+ p->npages = 0;
+
+ return 0;
+}
+
+static inline void iommu_batch_new_entry(unsigned long entry)
+{
+ struct iommu_batch *p = &__get_cpu_var(iommu_batch);
+
+ if (p->entry + p->npages == entry)
+ return;
+ if (p->entry != ~0UL)
+ iommu_batch_flush(p);
+ p->entry = entry;
+}
+
+/* Interrupts must be disabled. */
+static inline long iommu_batch_add(u64 phys_page)
+{
+ struct iommu_batch *p = &__get_cpu_var(iommu_batch);
+
+ BUG_ON(p->npages >= PGLIST_NENTS);