summaryrefslogtreecommitdiff
path: root/arch/sparc/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/sparc/kernel')
-rw-r--r--arch/sparc/kernel/.gitignore1
-rw-r--r--arch/sparc/kernel/Makefile117
-rw-r--r--arch/sparc/kernel/asm-offsets.c19
-rw-r--r--arch/sparc/kernel/audit.c83
-rw-r--r--arch/sparc/kernel/auxio_32.c (renamed from arch/sparc/kernel/auxio.c)0
-rw-r--r--arch/sparc/kernel/auxio_64.c149
-rw-r--r--arch/sparc/kernel/central.c268
-rw-r--r--arch/sparc/kernel/cherrs.S579
-rw-r--r--arch/sparc/kernel/chmc.c863
-rw-r--r--arch/sparc/kernel/compat_audit.c43
-rw-r--r--arch/sparc/kernel/cpu.c413
-rw-r--r--arch/sparc/kernel/devices.c2
-rw-r--r--arch/sparc/kernel/ds.c1244
-rw-r--r--arch/sparc/kernel/dtlb_miss.S39
-rw-r--r--arch/sparc/kernel/dtlb_prot.S54
-rw-r--r--arch/sparc/kernel/ebus.c257
-rw-r--r--arch/sparc/kernel/entry.h229
-rw-r--r--arch/sparc/kernel/etrap_32.S (renamed from arch/sparc/kernel/etrap.S)0
-rw-r--r--arch/sparc/kernel/etrap_64.S236
-rw-r--r--arch/sparc/kernel/fpu_traps.S384
-rw-r--r--arch/sparc/kernel/ftrace.c76
-rw-r--r--arch/sparc/kernel/getsetcc.S24
-rw-r--r--arch/sparc/kernel/head_32.S (renamed from arch/sparc/kernel/head.S)2
-rw-r--r--arch/sparc/kernel/head_64.S900
-rw-r--r--arch/sparc/kernel/helpers.S63
-rw-r--r--arch/sparc/kernel/hvapi.c193
-rw-r--r--arch/sparc/kernel/hvcalls.S800
-rw-r--r--arch/sparc/kernel/hvtramp.S140
-rw-r--r--arch/sparc/kernel/idprom.c67
-rw-r--r--arch/sparc/kernel/init_task.c4
-rw-r--r--arch/sparc/kernel/iommu.c866
-rw-r--r--arch/sparc/kernel/iommu_common.h59
-rw-r--r--arch/sparc/kernel/ioport.c4
-rw-r--r--arch/sparc/kernel/irq_32.c (renamed from arch/sparc/kernel/irq.c)7
-rw-r--r--arch/sparc/kernel/irq_64.c1104
-rw-r--r--arch/sparc/kernel/itlb_miss.S39
-rw-r--r--arch/sparc/kernel/ivec.S51
-rw-r--r--arch/sparc/kernel/kernel.h31
-rw-r--r--arch/sparc/kernel/kgdb_32.c (renamed from arch/sparc/kernel/kgdb.c)0
-rw-r--r--arch/sparc/kernel/kgdb_64.c186
-rw-r--r--arch/sparc/kernel/kprobes.c593
-rw-r--r--arch/sparc/kernel/kstack.h60
-rw-r--r--arch/sparc/kernel/ktlb.S304
-rw-r--r--arch/sparc/kernel/ldc.c2378
-rw-r--r--arch/sparc/kernel/mdesc.c917
-rw-r--r--arch/sparc/kernel/misctrap.S97
-rw-r--r--arch/sparc/kernel/module.c140
-rw-r--r--arch/sparc/kernel/muldiv.c5
-rw-r--r--arch/sparc/kernel/of_device_32.c (renamed from arch/sparc/kernel/of_device.c)0
-rw-r--r--arch/sparc/kernel/of_device_64.c898
-rw-r--r--arch/sparc/kernel/pci.c1095
-rw-r--r--arch/sparc/kernel/pci_common.c545
-rw-r--r--arch/sparc/kernel/pci_fire.c521
-rw-r--r--arch/sparc/kernel/pci_impl.h185
-rw-r--r--arch/sparc/kernel/pci_msi.c447
-rw-r--r--arch/sparc/kernel/pci_psycho.c618
-rw-r--r--arch/sparc/kernel/pci_sabre.c609
-rw-r--r--arch/sparc/kernel/pci_schizo.c1504
-rw-r--r--arch/sparc/kernel/pci_sun4v.c1033
-rw-r--r--arch/sparc/kernel/pci_sun4v.h92
-rw-r--r--arch/sparc/kernel/pci_sun4v_asm.S362
-rw-r--r--arch/sparc/kernel/pcic.c2
-rw-r--r--arch/sparc/kernel/pmc.c18
-rw-r--r--arch/sparc/kernel/power.c75
-rw-r--r--arch/sparc/kernel/process_32.c (renamed from arch/sparc/kernel/process.c)2
-rw-r--r--arch/sparc/kernel/process_64.c812
-rw-r--r--arch/sparc/kernel/prom.h29
-rw-r--r--arch/sparc/kernel/prom_32.c (renamed from arch/sparc/kernel/prom.c)287
-rw-r--r--arch/sparc/kernel/prom_64.c571
-rw-r--r--arch/sparc/kernel/prom_common.c326
-rw-r--r--arch/sparc/kernel/prom_irqtrans.c842
-rw-r--r--arch/sparc/kernel/psycho_common.c470
-rw-r--r--arch/sparc/kernel/psycho_common.h48
-rw-r--r--arch/sparc/kernel/ptrace_32.c (renamed from arch/sparc/kernel/ptrace.c)0
-rw-r--r--arch/sparc/kernel/ptrace_64.c1090
-rw-r--r--arch/sparc/kernel/reboot.c53
-rw-r--r--arch/sparc/kernel/rtrap_32.S (renamed from arch/sparc/kernel/rtrap.S)0
-rw-r--r--arch/sparc/kernel/rtrap_64.S450
-rw-r--r--arch/sparc/kernel/sbus.c674
-rw-r--r--arch/sparc/kernel/setup_32.c (renamed from arch/sparc/kernel/setup.c)9
-rw-r--r--arch/sparc/kernel/setup_64.c429
-rw-r--r--arch/sparc/kernel/signal32.c899
-rw-r--r--arch/sparc/kernel/signal_32.c (renamed from arch/sparc/kernel/signal.c)0
-rw-r--r--arch/sparc/kernel/signal_64.c617
-rw-r--r--arch/sparc/kernel/smp_32.c (renamed from arch/sparc/kernel/smp.c)0
-rw-r--r--arch/sparc/kernel/smp_64.c1408
-rw-r--r--arch/sparc/kernel/sparc_ksyms_32.c (renamed from arch/sparc/kernel/sparc_ksyms.c)4
-rw-r--r--arch/sparc/kernel/sparc_ksyms_64.c284
-rw-r--r--arch/sparc/kernel/spiterrs.S245
-rw-r--r--arch/sparc/kernel/sstate.c127
-rw-r--r--arch/sparc/kernel/stacktrace.c64
-rw-r--r--arch/sparc/kernel/starfire.c116
-rw-r--r--arch/sparc/kernel/sun4c_irq.c2
-rw-r--r--arch/sparc/kernel/sun4d_irq.c3
-rw-r--r--arch/sparc/kernel/sun4m_irq.c2
-rw-r--r--arch/sparc/kernel/sun4v_ivec.S341
-rw-r--r--arch/sparc/kernel/sun4v_tlb_miss.S428
-rw-r--r--arch/sparc/kernel/sys32.S367
-rw-r--r--arch/sparc/kernel/sys_sparc32.c682
-rw-r--r--arch/sparc/kernel/sys_sparc_32.c (renamed from arch/sparc/kernel/sys_sparc.c)0
-rw-r--r--arch/sparc/kernel/sys_sparc_64.c914
-rw-r--r--arch/sparc/kernel/syscalls.S279
-rw-r--r--arch/sparc/kernel/sysfs.c314
-rw-r--r--arch/sparc/kernel/systbls.h51
-rw-r--r--arch/sparc/kernel/systbls_32.S (renamed from arch/sparc/kernel/systbls.S)0
-rw-r--r--arch/sparc/kernel/systbls_64.S159
-rw-r--r--arch/sparc/kernel/time_32.c (renamed from arch/sparc/kernel/time.c)0
-rw-r--r--arch/sparc/kernel/time_64.c862
-rw-r--r--arch/sparc/kernel/trampoline_32.S (renamed from arch/sparc/kernel/trampoline.S)0
-rw-r--r--arch/sparc/kernel/trampoline_64.S417
-rw-r--r--arch/sparc/kernel/traps_32.c (renamed from arch/sparc/kernel/traps.c)34
-rw-r--r--arch/sparc/kernel/traps_64.c2600
-rw-r--r--arch/sparc/kernel/tsb.S552
-rw-r--r--arch/sparc/kernel/ttable.S266
-rw-r--r--arch/sparc/kernel/una_asm_32.S (renamed from arch/sparc/kernel/una_asm.S)0
-rw-r--r--arch/sparc/kernel/una_asm_64.S146
-rw-r--r--arch/sparc/kernel/unaligned_32.c (renamed from arch/sparc/kernel/unaligned.c)0
-rw-r--r--arch/sparc/kernel/unaligned_64.c690
-rw-r--r--arch/sparc/kernel/us2e_cpufreq.c413
-rw-r--r--arch/sparc/kernel/us3_cpufreq.c274
-rw-r--r--arch/sparc/kernel/utrap.S29
-rw-r--r--arch/sparc/kernel/vio.c451
-rw-r--r--arch/sparc/kernel/viohs.c822
-rw-r--r--arch/sparc/kernel/visemul.c890
-rw-r--r--arch/sparc/kernel/vmlinux.lds.S109
-rw-r--r--arch/sparc/kernel/winfixup.S156
126 files changed, 43631 insertions, 572 deletions
diff --git a/arch/sparc/kernel/.gitignore b/arch/sparc/kernel/.gitignore
new file mode 100644
index 000000000000..c5f676c3c224
--- /dev/null
+++ b/arch/sparc/kernel/.gitignore
@@ -0,0 +1 @@
+vmlinux.lds
diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile
index 2d6582095099..53adcaa0348b 100644
--- a/arch/sparc/kernel/Makefile
+++ b/arch/sparc/kernel/Makefile
@@ -2,25 +2,98 @@
# Makefile for the linux kernel.
#
-extra-y := head.o init_task.o vmlinux.lds
-
-EXTRA_AFLAGS := -ansi
-
-IRQ_OBJS := irq.o sun4m_irq.o sun4c_irq.o sun4d_irq.o
-obj-y := entry.o wof.o wuf.o etrap.o rtrap.o traps.o $(IRQ_OBJS) \
- process.o signal.o ioport.o setup.o idprom.o \
- sys_sparc.o systbls.o \
- time.o windows.o cpu.o devices.o \
- tadpole.o tick14.o ptrace.o \
- unaligned.o una_asm.o muldiv.o \
- prom.o of_device.o devres.o dma.o
-
-devres-y = ../../../kernel/irq/devres.o
-
-obj-$(CONFIG_PCI) += pcic.o
-obj-$(CONFIG_SMP) += trampoline.o smp.o sun4m_smp.o sun4d_smp.o
-obj-$(CONFIG_SUN_AUXIO) += auxio.o
-obj-$(CONFIG_SUN_PM) += apc.o pmc.o
-obj-$(CONFIG_MODULES) += module.o sparc_ksyms.o
-obj-$(CONFIG_SPARC_LED) += led.o
-obj-$(CONFIG_KGDB) += kgdb.o
+asflags-y := -ansi
+ccflags-y := -Werror
+
+extra-y := head_$(BITS).o
+extra-y += init_task.o
+extra-y += vmlinux.lds
+
+obj-$(CONFIG_SPARC32) += entry.o wof.o wuf.o
+obj-$(CONFIG_SPARC32) += etrap_32.o
+obj-$(CONFIG_SPARC32) += rtrap_32.o
+obj-y += traps_$(BITS).o
+
+# IRQ
+obj-y += irq_$(BITS).o
+obj-$(CONFIG_SPARC32) += sun4m_irq.o sun4c_irq.o sun4d_irq.o
+
+obj-y += process_$(BITS).o
+obj-y += signal_$(BITS).o
+obj-$(CONFIG_SPARC32) += ioport.o
+obj-y += setup_$(BITS).o
+obj-y += idprom.o
+obj-y += sys_sparc_$(BITS).o
+obj-$(CONFIG_SPARC32) += systbls_32.o
+obj-y += time_$(BITS).o
+obj-$(CONFIG_SPARC32) += windows.o
+obj-y += cpu.o
+obj-$(CONFIG_SPARC32) += devices.o
+obj-$(CONFIG_SPARC32) += tadpole.o
+obj-$(CONFIG_SPARC32) += tick14.o
+obj-y += ptrace_$(BITS).o
+obj-y += unaligned_$(BITS).o
+obj-y += una_asm_$(BITS).o
+obj-$(CONFIG_SPARC32) += muldiv.o
+obj-y += prom_common.o
+obj-y += prom_$(BITS).o
+obj-y += of_device_$(BITS).o
+obj-$(CONFIG_SPARC64) += prom_irqtrans.o
+
+obj-$(CONFIG_SPARC64) += reboot.o
+obj-$(CONFIG_SPARC64) += sysfs.o
+obj-$(CONFIG_SPARC64) += iommu.o
+obj-$(CONFIG_SPARC64) += central.o
+obj-$(CONFIG_SPARC64) += starfire.o
+obj-$(CONFIG_SPARC64) += power.o
+obj-$(CONFIG_SPARC64) += sbus.o
+obj-$(CONFIG_SPARC64) += ebus.o
+obj-$(CONFIG_SPARC64) += visemul.o
+obj-$(CONFIG_SPARC64) += hvapi.o
+obj-$(CONFIG_SPARC64) += sstate.o
+obj-$(CONFIG_SPARC64) += mdesc.o
+
+# sparc32 do not use GENERIC_HARDIRQS but uses the generic devres implementation
+obj-$(CONFIG_SPARC32) += devres.o
+devres-y := ../../../kernel/irq/devres.o
+
+obj-$(CONFIG_SPARC32) += dma.o
+
+obj-$(CONFIG_SPARC32_PCI) += pcic.o
+
+obj-$(CONFIG_SMP) += trampoline_$(BITS).o smp_$(BITS).o
+obj-$(CONFIG_SPARC32_SMP) += sun4m_smp.o sun4d_smp.o
+obj-$(CONFIG_SPARC64_SMP) += hvtramp.o
+
+obj-y += auxio_$(BITS).o
+obj-$(CONFIG_SUN_PM) += apc.o pmc.o
+
+obj-$(CONFIG_MODULES) += module.o
+obj-$(CONFIG_MODULES) += sparc_ksyms_$(BITS).o
+obj-$(CONFIG_SPARC_LED) += led.o
+obj-$(CONFIG_KGDB) += kgdb_$(BITS).o
+
+
+obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
+CFLAGS_REMOVE_ftrace.o := -pg
+
+obj-$(CONFIG_STACKTRACE) += stacktrace.o
+# sparc64 PCI
+obj-$(CONFIG_SPARC64_PCI) += pci.o pci_common.o psycho_common.o
+obj-$(CONFIG_SPARC64_PCI) += pci_psycho.o pci_sabre.o pci_schizo.o
+obj-$(CONFIG_SPARC64_PCI) += pci_sun4v.o pci_sun4v_asm.o pci_fire.o
+obj-$(CONFIG_PCI_MSI) += pci_msi.o
+
+obj-$(CONFIG_COMPAT) += sys32.o sys_sparc32.o signal32.o
+
+# sparc64 cpufreq
+obj-$(CONFIG_US3_FREQ) += us3_cpufreq.o
+obj-$(CONFIG_US2E_FREQ) += us2e_cpufreq.o
+obj-$(CONFIG_US3_MC) += chmc.o
+
+obj-$(CONFIG_KPROBES) += kprobes.o
+obj-$(CONFIG_SUN_LDOMS) += ldc.o vio.o viohs.o ds.o
+
+obj-$(CONFIG_AUDIT) += audit.o
+audit--$(CONFIG_AUDIT) := compat_audit.o
+obj-$(CONFIG_COMPAT) += $(audit--y)
diff --git a/arch/sparc/kernel/asm-offsets.c b/arch/sparc/kernel/asm-offsets.c
index b5bb99ed892c..68f7e1118e9b 100644
--- a/arch/sparc/kernel/asm-offsets.c
+++ b/arch/sparc/kernel/asm-offsets.c
@@ -14,15 +14,28 @@
// #include <linux/mm.h>
#include <linux/kbuild.h>
-int foo(void)
+#ifdef CONFIG_SPARC32
+int sparc32_foo(void)
{
- DEFINE(AOFF_task_thread, offsetof(struct task_struct, thread));
- BLANK();
DEFINE(AOFF_thread_fork_kpsr,
offsetof(struct thread_struct, fork_kpsr));
+ return 0;
+}
+#else
+int sparc64_foo(void)
+{
+ return 0;
+}
+#endif
+
+int foo(void)
+{
+ BLANK();
+ DEFINE(AOFF_task_thread, offsetof(struct task_struct, thread));
BLANK();
DEFINE(AOFF_mm_context, offsetof(struct mm_struct, context));
/* DEFINE(NUM_USER_SEGMENTS, TASK_SIZE>>28); */
return 0;
}
+
diff --git a/arch/sparc/kernel/audit.c b/arch/sparc/kernel/audit.c
new file mode 100644
index 000000000000..8fff0ac63d56
--- /dev/null
+++ b/arch/sparc/kernel/audit.c
@@ -0,0 +1,83 @@
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/audit.h>
+#include <asm/unistd.h>
+
+static unsigned dir_class[] = {
+#include <asm-generic/audit_dir_write.h>
+~0U
+};
+
+static unsigned read_class[] = {
+#include <asm-generic/audit_read.h>
+~0U
+};
+
+static unsigned write_class[] = {
+#include <asm-generic/audit_write.h>
+~0U
+};
+
+static unsigned chattr_class[] = {
+#include <asm-generic/audit_change_attr.h>
+~0U
+};
+
+static unsigned signal_class[] = {
+#include <asm-generic/audit_signal.h>
+~0U
+};
+
+int audit_classify_arch(int arch)
+{
+#ifdef CONFIG_COMPAT
+ if (arch == AUDIT_ARCH_SPARC)
+ return 1;
+#endif
+ return 0;
+}
+
+int audit_classify_syscall(int abi, unsigned syscall)
+{
+#ifdef CONFIG_COMPAT
+ extern int sparc32_classify_syscall(unsigned);
+ if (abi == AUDIT_ARCH_SPARC)
+ return sparc32_classify_syscall(syscall);
+#endif
+ switch(syscall) {
+ case __NR_open:
+ return 2;
+ case __NR_openat:
+ return 3;
+ case __NR_socketcall:
+ return 4;
+ case __NR_execve:
+ return 5;
+ default:
+ return 0;
+ }
+}
+
+static int __init audit_classes_init(void)
+{
+#ifdef CONFIG_COMPAT
+ extern __u32 sparc32_dir_class[];
+ extern __u32 sparc32_write_class[];
+ extern __u32 sparc32_read_class[];
+ extern __u32 sparc32_chattr_class[];
+ extern __u32 sparc32_signal_class[];
+ audit_register_class(AUDIT_CLASS_WRITE_32, sparc32_write_class);
+ audit_register_class(AUDIT_CLASS_READ_32, sparc32_read_class);
+ audit_register_class(AUDIT_CLASS_DIR_WRITE_32, sparc32_dir_class);
+ audit_register_class(AUDIT_CLASS_CHATTR_32, sparc32_chattr_class);
+ audit_register_class(AUDIT_CLASS_SIGNAL_32, sparc32_signal_class);
+#endif
+ audit_register_class(AUDIT_CLASS_WRITE, write_class);
+ audit_register_class(AUDIT_CLASS_READ, read_class);
+ audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class);
+ audit_register_class(AUDIT_CLASS_CHATTR, chattr_class);
+ audit_register_class(AUDIT_CLASS_SIGNAL, signal_class);
+ return 0;
+}
+
+__initcall(audit_classes_init);
diff --git a/arch/sparc/kernel/auxio.c b/arch/sparc/kernel/auxio_32.c
index 09c857215a52..09c857215a52 100644
--- a/arch/sparc/kernel/auxio.c
+++ b/arch/sparc/kernel/auxio_32.c
diff --git a/arch/sparc/kernel/auxio_64.c b/arch/sparc/kernel/auxio_64.c
new file mode 100644
index 000000000000..8b67347d4221
--- /dev/null
+++ b/arch/sparc/kernel/auxio_64.c
@@ -0,0 +1,149 @@
+/* auxio.c: Probing for the Sparc AUXIO register at boot time.
+ *
+ * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
+ *
+ * Refactoring for unified NCR/PCIO support 2002 Eric Brower (ebrower@usa.net)
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/of_device.h>
+
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/auxio.h>
+
+void __iomem *auxio_register = NULL;
+EXPORT_SYMBOL(auxio_register);
+
+enum auxio_type {
+ AUXIO_TYPE_NODEV,
+ AUXIO_TYPE_SBUS,
+ AUXIO_TYPE_EBUS
+};
+
+static enum auxio_type auxio_devtype = AUXIO_TYPE_NODEV;
+static DEFINE_SPINLOCK(auxio_lock);
+
+static void __auxio_rmw(u8 bits_on, u8 bits_off, int ebus)
+{
+ if (auxio_register) {
+ unsigned long flags;
+ u8 regval, newval;
+
+ spin_lock_irqsave(&auxio_lock, flags);
+
+ regval = (ebus ?
+ (u8) readl(auxio_register) :
+ sbus_readb(auxio_register));
+ newval = regval | bits_on;
+ newval &= ~bits_off;
+ if (!ebus)
+ newval &= ~AUXIO_AUX1_MASK;
+ if (ebus)
+ writel((u32) newval, auxio_register);
+ else
+ sbus_writeb(newval, auxio_register);
+
+ spin_unlock_irqrestore(&auxio_lock, flags);
+ }
+}
+
+static void __auxio_set_bit(u8 bit, int on, int ebus)
+{
+ u8 bits_on = (ebus ? AUXIO_PCIO_LED : AUXIO_AUX1_LED);
+ u8 bits_off = 0;
+
+ if (!on) {
+ u8 tmp = bits_off;
+ bits_off = bits_on;
+ bits_on = tmp;
+ }
+ __auxio_rmw(bits_on, bits_off, ebus);
+}
+
+void auxio_set_led(int on)
+{
+ int ebus = auxio_devtype == AUXIO_TYPE_EBUS;
+ u8 bit;
+
+ bit = (ebus ? AUXIO_PCIO_LED : AUXIO_AUX1_LED);
+ __auxio_set_bit(bit, on, ebus);
+}
+
+static void __auxio_sbus_set_lte(int on)
+{
+ __auxio_set_bit(AUXIO_AUX1_LTE, on, 0);
+}
+
+void auxio_set_lte(int on)
+{
+ switch(auxio_devtype) {
+ case AUXIO_TYPE_SBUS:
+ __auxio_sbus_set_lte(on);
+ break;
+ case AUXIO_TYPE_EBUS:
+ /* FALL-THROUGH */
+ default:
+ break;
+ }
+}
+
+static struct of_device_id __initdata auxio_match[] = {
+ {
+ .name = "auxio",
+ },
+ {},
+};
+
+MODULE_DEVICE_TABLE(of, auxio_match);
+
+static int __devinit auxio_probe(struct of_device *dev, const struct of_device_id *match)
+{
+ struct device_node *dp = dev->node;
+ unsigned long size;
+
+ if (!strcmp(dp->parent->name, "ebus")) {
+ auxio_devtype = AUXIO_TYPE_EBUS;
+ size = sizeof(u32);
+ } else if (!strcmp(dp->parent->name, "sbus")) {
+ auxio_devtype = AUXIO_TYPE_SBUS;
+ size = 1;
+ } else {
+ printk("auxio: Unknown parent bus type [%s]\n",
+ dp->parent->name);
+ return -ENODEV;
+ }
+ auxio_register = of_ioremap(&dev->resource[0], 0, size, "auxio");
+ if (!auxio_register)
+ return -ENODEV;
+
+ printk(KERN_INFO "AUXIO: Found device at %s\n",
+ dp->full_name);
+
+ if (auxio_devtype == AUXIO_TYPE_EBUS)
+ auxio_set_led(AUXIO_LED_ON);
+
+ return 0;
+}
+
+static struct of_platform_driver auxio_driver = {
+ .match_table = auxio_match,
+ .probe = auxio_probe,
+ .driver = {
+ .name = "auxio",
+ },
+};
+
+static int __init auxio_init(void)
+{
+ return of_register_driver(&auxio_driver, &of_platform_bus_type);
+}
+
+/* Must be after subsys_initcall() so that busses are probed. Must
+ * be before device_initcall() because things like the floppy driver
+ * need to use the AUXIO register.
+ */
+fs_initcall(auxio_init);
diff --git a/arch/sparc/kernel/central.c b/arch/sparc/kernel/central.c
new file mode 100644
index 000000000000..05f1c916db06
--- /dev/null
+++ b/arch/sparc/kernel/central.c
@@ -0,0 +1,268 @@
+/* central.c: Central FHC driver for Sunfire/Starfire/Wildfire.
+ *
+ * Copyright (C) 1997, 1999, 2008 David S. Miller (davem@davemloft.net)
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+
+#include <asm/fhc.h>
+#include <asm/upa.h>
+
+struct clock_board {
+ void __iomem *clock_freq_regs;
+ void __iomem *clock_regs;
+ void __iomem *clock_ver_reg;
+ int num_slots;
+ struct resource leds_resource;
+ struct platform_device leds_pdev;
+};
+
+struct fhc {
+ void __iomem *pregs;
+ bool central;
+ bool jtag_master;
+ int board_num;
+ struct resource leds_resource;
+ struct platform_device leds_pdev;
+};
+
+static int __devinit clock_board_calc_nslots(struct clock_board *p)
+{
+ u8 reg = upa_readb(p->clock_regs + CLOCK_STAT1) & 0xc0;
+
+ switch (reg) {
+ case 0x40:
+ return 16;
+
+ case 0xc0:
+ return 8;
+
+ case 0x80:
+ reg = 0;
+ if (p->clock_ver_reg)
+ reg = upa_readb(p->clock_ver_reg);
+ if (reg) {
+ if (reg & 0x80)
+ return 4;
+ else
+ return 5;
+ }
+ /* Fallthrough */
+ default:
+ return 4;
+ }
+}
+
+static int __devinit clock_board_probe(struct of_device *op,
+ const struct of_device_id *match)
+{
+ struct clock_board *p = kzalloc(sizeof(*p), GFP_KERNEL);
+ int err = -ENOMEM;
+
+ if (!p) {
+ printk(KERN_ERR "clock_board: Cannot allocate struct clock_board\n");
+ goto out;
+ }
+
+ p->clock_freq_regs = of_ioremap(&op->resource[0], 0,
+ resource_size(&op->resource[0]),
+ "clock_board_freq");
+ if (!p->clock_freq_regs) {
+ printk(KERN_ERR "clock_board: Cannot map clock_freq_regs\n");
+ goto out_free;
+ }
+
+ p->clock_regs = of_ioremap(&op->resource[1], 0,
+ resource_size(&op->resource[1]),
+ "clock_board_regs");
+ if (!p->clock_regs) {
+ printk(KERN_ERR "clock_board: Cannot map clock_regs\n");
+ goto out_unmap_clock_freq_regs;
+ }
+
+ if (op->resource[2].flags) {
+ p->clock_ver_reg = of_ioremap(&op->resource[2], 0,
+ resource_size(&op->resource[2]),
+ "clock_ver_reg");
+ if (!p->clock_ver_reg) {
+ printk(KERN_ERR "clock_board: Cannot map clock_ver_reg\n");
+ goto out_unmap_clock_regs;
+ }
+ }
+
+ p->num_slots = clock_board_calc_nslots(p);
+
+ p->leds_resource.start = (unsigned long)
+ (p->clock_regs + CLOCK_CTRL);
+ p->leds_resource.end = p->leds_resource.end;
+ p->leds_resource.name = "leds";
+
+ p->leds_pdev.name = "sunfire-clockboard-leds";
+ p->leds_pdev.resource = &p->leds_resource;
+ p->leds_pdev.num_resources = 1;
+ p->leds_pdev.dev.parent = &op->dev;
+
+ err = platform_device_register(&p->leds_pdev);
+ if (err) {
+ printk(KERN_ERR "clock_board: Could not register LEDS "
+ "platform device\n");
+ goto out_unmap_clock_ver_reg;
+ }
+
+ printk(KERN_INFO "clock_board: Detected %d slot Enterprise system.\n",
+ p->num_slots);
+
+ err = 0;
+out:
+ return err;
+
+out_unmap_clock_ver_reg:
+ if (p->clock_ver_reg)
+ of_iounmap(&op->resource[2], p->clock_ver_reg,
+ resource_size(&op->resource[2]));
+
+out_unmap_clock_regs:
+ of_iounmap(&op->resource[1], p->clock_regs,
+ resource_size(&op->resource[1]));
+
+out_unmap_clock_freq_regs:
+ of_iounmap(&op->resource[0], p->clock_freq_regs,
+ resource_size(&op->resource[0]));
+
+out_free:
+ kfree(p);
+ goto out;
+}
+
+static struct of_device_id __initdata clock_board_match[] = {
+ {
+ .name = "clock-board",
+ },
+ {},
+};
+
+static struct of_platform_driver clock_board_driver = {
+ .match_table = clock_board_match,
+ .probe = clock_board_probe,
+ .driver = {
+ .name = "clock_board",
+ },
+};
+
+static int __devinit fhc_probe(struct of_device *op,
+ const struct of_device_id *match)
+{
+ struct fhc *p = kzalloc(sizeof(*p), GFP_KERNEL);
+ int err = -ENOMEM;
+ u32 reg;
+
+ if (!p) {
+ printk(KERN_ERR "fhc: Cannot allocate struct fhc\n");
+ goto out;
+ }
+
+ if (!strcmp(op->node->parent->name, "central"))
+ p->central = true;
+
+ p->pregs = of_ioremap(&op->resource[0], 0,
+ resource_size(&op->resource[0]),
+ "fhc_pregs");
+ if (!p->pregs) {
+ printk(KERN_ERR "fhc: Cannot map pregs\n");
+ goto out_free;
+ }
+
+ if (p->central) {
+ reg = upa_readl(p->pregs + FHC_PREGS_BSR);
+ p->board_num = ((reg >> 16) & 1) | ((reg >> 12) & 0x0e);
+ } else {
+ p->board_num = of_getintprop_default(op->node, "board#", -1);
+ if (p->board_num == -1) {
+ printk(KERN_ERR "fhc: No board# property\n");
+ goto out_unmap_pregs;
+ }
+ if (upa_readl(p->pregs + FHC_PREGS_JCTRL) & FHC_JTAG_CTRL_MENAB)
+ p->jtag_master = true;
+ }
+
+ if (!p->central) {
+ p->leds_resource.start = (unsigned long)
+ (p->pregs + FHC_PREGS_CTRL);
+ p->leds_resource.end = p->leds_resource.end;
+ p->leds_resource.name = "leds";
+
+ p->leds_pdev.name = "sunfire-fhc-leds";
+ p->leds_pdev.resource = &p->leds_resource;
+ p->leds_pdev.num_resources = 1;
+ p->leds_pdev.dev.parent = &op->dev;
+
+ err = platform_device_register(&p->leds_pdev);
+ if (err) {
+ printk(KERN_ERR "fhc: Could not register LEDS "
+ "platform device\n");
+ goto out_unmap_pregs;
+ }
+ }
+ reg = upa_readl(p->pregs + FHC_PREGS_CTRL);
+
+ if (!p->central)
+ reg |= FHC_CONTROL_IXIST;
+
+ reg &= ~(FHC_CONTROL_AOFF |
+ FHC_CONTROL_BOFF |
+ FHC_CONTROL_SLINE);
+
+ upa_writel(reg, p->pregs + FHC_PREGS_CTRL);
+ upa_readl(p->pregs + FHC_PREGS_CTRL);
+
+ reg = upa_readl(p->pregs + FHC_PREGS_ID);
+ printk(KERN_INFO "fhc: Board #%d, Version[%x] PartID[%x] Manuf[%x] %s\n",
+ p->board_num,
+ (reg & FHC_ID_VERS) >> 28,
+ (reg & FHC_ID_PARTID) >> 12,
+ (reg & FHC_ID_MANUF) >> 1,
+ (p->jtag_master ?
+ "(JTAG Master)" :
+ (p->central ? "(Central)" : "")));
+
+ err = 0;
+
+out:
+ return err;
+
+out_unmap_pregs:
+ of_iounmap(&op->resource[0], p->pregs, resource_size(&op->resource[0]));
+
+out_free:
+ kfree(p);
+ goto out;
+}
+
+static struct of_device_id __initdata fhc_match[] = {
+ {
+ .name = "fhc",
+ },
+ {},
+};
+
+static struct of_platform_driver fhc_driver = {
+ .match_table = fhc_match,
+ .probe = fhc_probe,
+ .driver = {
+ .name = "fhc",
+ },
+};
+
+static int __init sunfire_init(void)
+{
+ (void) of_register_driver(&fhc_driver, &of_platform_bus_type);
+ (void) of_register_driver(&clock_board_driver, &of_platform_bus_type);
+ return 0;
+}
+
+subsys_initcall(sunfire_init);
diff --git a/arch/sparc/kernel/cherrs.S b/arch/sparc/kernel/cherrs.S
new file mode 100644
index 000000000000..4ee1ad420862
--- /dev/null
+++ b/arch/sparc/kernel/cherrs.S
@@ -0,0 +1,579 @@
+ /* These get patched into the trap table at boot time
+ * once we know we have a cheetah processor.
+ */
+ .globl cheetah_fecc_trap_vector
+ .type cheetah_fecc_trap_vector,#function
+cheetah_fecc_trap_vector:
+ membar #Sync
+ ldxa [%g0] ASI_DCU_CONTROL_REG, %g1
+ andn %g1, DCU_DC | DCU_IC, %g1
+ stxa %g1, [%g0] ASI_DCU_CONTROL_REG
+ membar #Sync
+ sethi %hi(cheetah_fast_ecc), %g2
+ jmpl %g2 + %lo(cheetah_fast_ecc), %g0
+ mov 0, %g1
+ .size cheetah_fecc_trap_vector,.-cheetah_fecc_trap_vector
+
+ .globl cheetah_fecc_trap_vector_tl1
+ .type cheetah_fecc_trap_vector_tl1,#function
+cheetah_fecc_trap_vector_tl1:
+ membar #Sync
+ ldxa [%g0] ASI_DCU_CONTROL_REG, %g1
+ andn %g1, DCU_DC | DCU_IC, %g1
+ stxa %g1, [%g0] ASI_DCU_CONTROL_REG
+ membar #Sync
+ sethi %hi(cheetah_fast_ecc), %g2
+ jmpl %g2 + %lo(cheetah_fast_ecc), %g0
+ mov 1, %g1
+ .size cheetah_fecc_trap_vector_tl1,.-cheetah_fecc_trap_vector_tl1
+
+ .globl cheetah_cee_trap_vector
+ .type cheetah_cee_trap_vector,#function
+cheetah_cee_trap_vector:
+ membar #Sync
+ ldxa [%g0] ASI_DCU_CONTROL_REG, %g1
+ andn %g1, DCU_IC, %g1
+ stxa %g1, [%g0] ASI_DCU_CONTROL_REG
+ membar #Sync
+ sethi %hi(cheetah_cee), %g2
+ jmpl %g2 + %lo(cheetah_cee), %g0
+ mov 0, %g1
+ .size cheetah_cee_trap_vector,.-cheetah_cee_trap_vector
+
+ .globl cheetah_cee_trap_vector_tl1
+ .type cheetah_cee_trap_vector_tl1,#function
+cheetah_cee_trap_vector_tl1:
+ membar #Sync
+ ldxa [%g0] ASI_DCU_CONTROL_REG, %g1
+ andn %g1, DCU_IC, %g1
+ stxa %g1, [%g0] ASI_DCU_CONTROL_REG
+ membar #Sync
+ sethi %hi(cheetah_cee), %g2
+ jmpl %g2 + %lo(cheetah_cee), %g0
+ mov 1, %g1
+ .size cheetah_cee_trap_vector_tl1,.-cheetah_cee_trap_vector_tl1
+
+ .globl cheetah_deferred_trap_vector
+ .type cheetah_deferred_trap_vector,#function
+cheetah_deferred_trap_vector:
+ membar #Sync
+ ldxa [%g0] ASI_DCU_CONTROL_REG, %g1;
+ andn %g1, DCU_DC | DCU_IC, %g1;
+ stxa %g1, [%g0] ASI_DCU_CONTROL_REG;
+ membar #Sync;
+ sethi %hi(cheetah_deferred_trap), %g2
+ jmpl %g2 + %lo(cheetah_deferred_trap), %g0
+ mov 0, %g1
+ .size cheetah_deferred_trap_vector,.-cheetah_deferred_trap_vector
+
+ .globl cheetah_deferred_trap_vector_tl1
+ .type cheetah_deferred_trap_vector_tl1,#function
+cheetah_deferred_trap_vector_tl1:
+ membar #Sync;
+ ldxa [%g0] ASI_DCU_CONTROL_REG, %g1;
+ andn %g1, DCU_DC | DCU_IC, %g1;
+ stxa %g1, [%g0] ASI_DCU_CONTROL_REG;
+ membar #Sync;
+ sethi %hi(cheetah_deferred_trap), %g2
+ jmpl %g2 + %lo(cheetah_deferred_trap), %g0
+ mov 1, %g1
+ .size cheetah_deferred_trap_vector_tl1,.-cheetah_deferred_trap_vector_tl1
+
+ /* Cheetah+ specific traps. These are for the new I/D cache parity
+ * error traps. The first argument to cheetah_plus_parity_handler
+ * is encoded as follows:
+ *
+ * Bit0: 0=dcache,1=icache
+ * Bit1: 0=recoverable,1=unrecoverable
+ */
+ .globl cheetah_plus_dcpe_trap_vector
+ .type cheetah_plus_dcpe_trap_vector,#function
+cheetah_plus_dcpe_trap_vector:
+ membar #Sync
+ sethi %hi(do_cheetah_plus_data_parity), %g7
+ jmpl %g7 + %lo(do_cheetah_plus_data_parity), %g0
+ nop
+ nop
+ nop
+ nop
+ nop
+ .size cheetah_plus_dcpe_trap_vector,.-cheetah_plus_dcpe_trap_vector
+
+ .type do_cheetah_plus_data_parity,#function
+do_cheetah_plus_data_parity:
+ rdpr %pil, %g2
+ wrpr %g0, PIL_NORMAL_MAX, %pil
+ ba,pt %xcc, etrap_irq
+ rd %pc, %g7
+#ifdef CONFIG_TRACE_IRQFLAGS
+ call trace_hardirqs_off
+ nop
+#endif
+ mov 0x0, %o0
+ call cheetah_plus_parity_error
+ add %sp, PTREGS_OFF, %o1
+ ba,a,pt %xcc, rtrap_irq
+ .size do_cheetah_plus_data_parity,.-do_cheetah_plus_data_parity
+
+ .globl cheetah_plus_dcpe_trap_vector_tl1
+ .type cheetah_plus_dcpe_trap_vector_tl1,#function
+cheetah_plus_dcpe_trap_vector_tl1:
+ membar #Sync
+ wrpr PSTATE_IG | PSTATE_PEF | PSTATE_PRIV, %pstate
+ sethi %hi(do_dcpe_tl1), %g3
+ jmpl %g3 + %lo(do_dcpe_tl1), %g0
+ nop
+ nop
+ nop
+ nop
+ .size cheetah_plus_dcpe_trap_vector_tl1,.-cheetah_plus_dcpe_trap_vector_tl1
+
+ .globl cheetah_plus_icpe_trap_vector
+ .type cheetah_plus_icpe_trap_vector,#function
+cheetah_plus_icpe_trap_vector:
+ membar #Sync
+ sethi %hi(do_cheetah_plus_insn_parity), %g7
+ jmpl %g7 + %lo(do_cheetah_plus_insn_parity), %g0
+ nop
+ nop
+ nop
+ nop
+ nop
+ .size cheetah_plus_icpe_trap_vector,.-cheetah_plus_icpe_trap_vector
+
+ .type do_cheetah_plus_insn_parity,#function
+do_cheetah_plus_insn_parity:
+ rdpr %pil, %g2
+ wrpr %g0, PIL_NORMAL_MAX, %pil
+ ba,pt %xcc, etrap_irq
+ rd %pc, %g7
+#ifdef CONFIG_TRACE_IRQFLAGS
+ call trace_hardirqs_off
+ nop
+#endif
+ mov 0x1, %o0
+ call cheetah_plus_parity_error
+ add %sp, PTREGS_OFF, %o1
+ ba,a,pt %xcc, rtrap_irq
+ .size do_cheetah_plus_insn_parity,.-do_cheetah_plus_insn_parity
+
+ .globl cheetah_plus_icpe_trap_vector_tl1
+ .type cheetah_plus_icpe_trap_vector_tl1,#function
+cheetah_plus_icpe_trap_vector_tl1:
+ membar #Sync
+ wrpr PSTATE_IG | PSTATE_PEF | PSTATE_PRIV, %pstate
+ sethi %hi(do_icpe_tl1), %g3
+ jmpl %g3 + %lo(do_icpe_tl1), %g0
+ nop
+ nop
+ nop
+ nop
+ .size cheetah_plus_icpe_trap_vector_tl1,.-cheetah_plus_icpe_trap_vector_tl1
+
+ /* If we take one of these traps when tl >= 1, then we
+ * jump to interrupt globals. If some trap level above us
+ * was also using interrupt globals, we cannot recover.
+ * We may use all interrupt global registers except %g6.
+ */
+ .globl do_dcpe_tl1
+ .type do_dcpe_tl1,#function
+do_dcpe_tl1:
+ rdpr %tl, %g1 ! Save original trap level
+ mov 1, %g2 ! Setup TSTATE checking loop
+ sethi %hi(TSTATE_IG), %g3 ! TSTATE mask bit
+1: wrpr %g2, %tl ! Set trap level to check
+ rdpr %tstate, %g4 ! Read TSTATE for this level
+ andcc %g4, %g3, %g0 ! Interrupt globals in use?
+ bne,a,pn %xcc, do_dcpe_tl1_fatal ! Yep, irrecoverable
+ wrpr %g1, %tl ! Restore original trap level
+ add %g2, 1, %g2 ! Next trap level
+ cmp %g2, %g1 ! Hit them all yet?
+ ble,pt %icc, 1b ! Not yet
+ nop
+ wrpr %g1, %tl ! Restore original trap level
+do_dcpe_tl1_nonfatal: /* Ok we may use interrupt globals safely. */
+ sethi %hi(dcache_parity_tl1_occurred), %g2
+ lduw [%g2 + %lo(dcache_parity_tl1_occurred)], %g1
+ add %g1, 1, %g1
+ stw %g1, [%g2 + %lo(dcache_parity_tl1_occurred)]
+ /* Reset D-cache parity */
+ sethi %hi(1 << 16), %g1 ! D-cache size
+ mov (1 << 5), %g2 ! D-cache line size
+ sub %g1, %g2, %g1 ! Move down 1 cacheline
+1: srl %g1, 14, %g3 ! Compute UTAG
+ membar #Sync
+ stxa %g3, [%g1] ASI_DCACHE_UTAG
+ membar #Sync
+ sub %g2, 8, %g3 ! 64-bit data word within line
+2: membar #Sync
+ stxa %g0, [%g1 + %g3] ASI_DCACHE_DATA
+ membar #Sync
+ subcc %g3, 8, %g3 ! Next 64-bit data word
+ bge,pt %icc, 2b
+ nop
+ subcc %g1, %g2, %g1 ! Next cacheline
+ bge,pt %icc, 1b
+ nop
+ ba,pt %xcc, dcpe_icpe_tl1_common
+ nop
+
+do_dcpe_tl1_fatal:
+ sethi %hi(1f), %g7
+ ba,pt %xcc, etraptl1
+1: or %g7, %lo(1b), %g7
+ mov 0x2, %o0
+ call cheetah_plus_parity_error
+ add %sp, PTREGS_OFF, %o1
+ ba,pt %xcc, rtrap
+ nop
+ .size do_dcpe_tl1,.-do_dcpe_tl1
+
+ .globl do_icpe_tl1
+ .type do_icpe_tl1,#function
+do_icpe_tl1:
+ rdpr %tl, %g1 ! Save original trap level
+ mov 1, %g2 ! Setup TSTATE checking loop
+ sethi %hi(TSTATE_IG), %g3 ! TSTATE mask bit
+1: wrpr %g2, %tl ! Set trap level to check
+ rdpr %tstate, %g4 ! Read TSTATE for this level
+ andcc %g4, %g3, %g0 ! Interrupt globals in use?
+ bne,a,pn %xcc, do_icpe_tl1_fatal ! Yep, irrecoverable
+ wrpr %g1, %tl ! Restore original trap level
+ add %g2, 1, %g2 ! Next trap level
+ cmp %g2, %g1 ! Hit them all yet?
+ ble,pt %icc, 1b ! Not yet
+ nop
+ wrpr %g1, %tl ! Restore original trap level
+do_icpe_tl1_nonfatal: /* Ok we may use interrupt globals safely. */
+ sethi %hi(icache_parity_tl1_occurred), %g2
+ lduw [%g2 + %lo(icache_parity_tl1_occurred)], %g1
+ add %g1, 1, %g1
+ stw %g1, [%g2 + %lo(icache_parity_tl1_occurred)]
+ /* Flush I-cache */
+ sethi %hi(1 << 15), %g1 ! I-cache size
+ mov (1 << 5), %g2 ! I-cache line size
+ sub %g1, %g2, %g1
+1: or %g1, (2 << 3), %g3
+ stxa %g0, [%g3] ASI_IC_TAG
+ membar #Sync
+ subcc %g1, %g2, %g1
+ bge,pt %icc, 1b
+ nop
+ ba,pt %xcc, dcpe_icpe_tl1_common
+ nop
+
+do_icpe_tl1_fatal:
+ sethi %hi(1f), %g7
+ ba,pt %xcc, etraptl1
+1: or %g7, %lo(1b), %g7
+ mov 0x3, %o0
+ call cheetah_plus_parity_error
+ add %sp, PTREGS_OFF, %o1
+ ba,pt %xcc, rtrap
+ nop
+ .size do_icpe_tl1,.-do_icpe_tl1
+
+ .type dcpe_icpe_tl1_common,#function
+dcpe_icpe_tl1_common:
+ /* Flush D-cache, re-enable D/I caches in DCU and finally
+ * retry the trapping instruction.
+ */
+ sethi %hi(1 << 16), %g1 ! D-cache size
+ mov (1 << 5), %g2 ! D-cache line size
+ sub %g1, %g2, %g1
+1: stxa %g0, [%g1] ASI_DCACHE_TAG
+ membar #Sync
+ subcc %g1, %g2, %g1
+ bge,pt %icc, 1b
+ nop
+ ldxa [%g0] ASI_DCU_CONTROL_REG, %g1
+ or %g1, (DCU_DC | DCU_IC), %g1
+ stxa %g1, [%g0] ASI_DCU_CONTROL_REG
+ membar #Sync
+ retry
+ .size dcpe_icpe_tl1_common,.-dcpe_icpe_tl1_common
+
+ /* Capture I/D/E-cache state into per-cpu error scoreboard.
+ *
+ * %g1: (TL>=0) ? 1 : 0
+ * %g2: scratch
+ * %g3: scratch
+ * %g4: AFSR
+ * %g5: AFAR
+ * %g6: unused, will have current thread ptr after etrap
+ * %g7: scratch
+ */
+ .type __cheetah_log_error,#function
+__cheetah_log_error:
+ /* Put "TL1" software bit into AFSR. */
+ and %g1, 0x1, %g1
+ sllx %g1, 63, %g2
+ or %g4, %g2, %g4
+
+ /* Get log entry pointer for this cpu at this trap level. */
+ BRANCH_IF_JALAPENO(g2,g3,50f)
+ ldxa [%g0] ASI_SAFARI_CONFIG, %g2
+ srlx %g2, 17, %g2
+ ba,pt %xcc, 60f
+ and %g2, 0x3ff, %g2
+
+50: ldxa [%g0] ASI_JBUS_CONFIG, %g2
+ srlx %g2, 17, %g2
+ and %g2, 0x1f, %g2
+
+60: sllx %g2, 9, %g2
+ sethi %hi(cheetah_error_log), %g3
+ ldx [%g3 + %lo(cheetah_error_log)], %g3
+ brz,pn %g3, 80f
+ nop
+
+ add %g3, %g2, %g3
+ sllx %g1, 8, %g1
+ add %g3, %g1, %g1
+
+ /* %g1 holds pointer to the top of the logging scoreboard */
+ ldx [%g1 + 0x0], %g7
+ cmp %g7, -1
+ bne,pn %xcc, 80f
+ nop
+
+ stx %g4, [%g1 + 0x0]
+ stx %g5, [%g1 + 0x8]
+ add %g1, 0x10, %g1
+
+ /* %g1 now points to D-cache logging area */
+ set 0x3ff8, %g2 /* DC_addr mask */
+ and %g5, %g2, %g2 /* DC_addr bits of AFAR */
+ srlx %g5, 12, %g3
+ or %g3, 1, %g3 /* PHYS tag + valid */
+
+10: ldxa [%g2] ASI_DCACHE_TAG, %g7
+ cmp %g3, %g7 /* TAG match? */
+ bne,pt %xcc, 13f
+ nop
+
+ /* Yep, what we want, capture state. */
+ stx %g2, [%g1 + 0x20]
+ stx %g7, [%g1 + 0x28]
+
+ /* A membar Sync is required before and after utag access. */
+ membar #Sync
+ ldxa [%g2] ASI_DCACHE_UTAG, %g7
+ membar #Sync
+ stx %g7, [%g1 + 0x30]
+ ldxa [%g2] ASI_DCACHE_SNOOP_TAG, %g7
+ stx %g7, [%g1 + 0x38]
+ clr %g3
+
+12: ldxa [%g2 + %g3] ASI_DCACHE_DATA, %g7
+ stx %g7, [%g1]
+ add %g3, (1 << 5), %g3
+ cmp %g3, (4 << 5)
+ bl,pt %xcc, 12b
+ add %g1, 0x8, %g1
+
+ ba,pt %xcc, 20f
+ add %g1, 0x20, %g1
+
+13: sethi %hi(1 << 14), %g7
+ add %g2, %g7, %g2
+ srlx %g2, 14, %g7
+ cmp %g7, 4
+ bl,pt %xcc, 10b
+ nop
+
+ add %g1, 0x40, %g1
+
+ /* %g1 now points to I-cache logging area */
+20: set 0x1fe0, %g2 /* IC_addr mask */
+ and %g5, %g2, %g2 /* IC_addr bits of AFAR */
+ sllx %g2, 1, %g2 /* IC_addr[13:6]==VA[12:5] */
+ srlx %g5, (13 - 8), %g3 /* Make PTAG */
+ andn %g3, 0xff, %g3 /* Mask off undefined bits */
+
+21: ldxa [%g2] ASI_IC_TAG, %g7
+ andn %g7, 0xff, %g7
+ cmp %g3, %g7
+ bne,pt %xcc, 23f
+ nop
+
+ /* Yep, what we want, capture state. */
+ stx %g2, [%g1 + 0x40]
+ stx %g7, [%g1 + 0x48]
+ add %g2, (1 << 3), %g2
+ ldxa [%g2] ASI_IC_TAG, %g7
+ add %g2, (1 << 3), %g2
+ stx %g7, [%g1 + 0x50]
+ ldxa [%g2] ASI_IC_TAG, %g7
+ add %g2, (1 << 3), %g2
+ stx %g7, [%g1 + 0x60]
+ ldxa [%g2] ASI_IC_TAG, %g7
+ stx %g7, [%g1 + 0x68]
+ sub %g2, (3 << 3), %g2
+ ldxa [%g2] ASI_IC_STAG, %g7
+ stx %g7, [%g1 + 0x58]
+ clr %g3
+ srlx %g2, 2, %g2
+
+22: ldxa [%g2 + %g3] ASI_IC_INSTR, %g7
+ stx %g7, [%g1]
+ add %g3, (1 << 3), %g3
+ cmp %g3, (8 << 3)
+ bl,pt %xcc, 22b
+ add %g1, 0x8, %g1
+
+ ba,pt %xcc, 30f
+ add %g1, 0x30, %g1
+
+23: sethi %hi(1 << 14), %g7
+ add %g2, %g7, %g2
+ srlx %g2, 14, %g7
+ cmp %g7, 4
+ bl,pt %xcc, 21b
+ nop
+
+ add %g1, 0x70, %g1
+
+ /* %g1 now points to E-cache logging area */
+30: andn %g5, (32 - 1), %g2
+ stx %g2, [%g1 + 0x20]
+ ldxa [%g2] ASI_EC_TAG_DATA, %g7
+ stx %g7, [%g1 + 0x28]
+ ldxa [%g2] ASI_EC_R, %g0
+ clr %g3
+
+31: ldxa [%g3] ASI_EC_DATA, %g7
+ stx %g7, [%g1 + %g3]
+ add %g3, 0x8, %g3
+ cmp %g3, 0x20
+
+ bl,pt %xcc, 31b
+ nop
+80:
+ rdpr %tt, %g2
+ cmp %g2, 0x70
+ be c_fast_ecc
+ cmp %g2, 0x63
+ be c_cee
+ nop
+ ba,pt %xcc, c_deferred
+ .size __cheetah_log_error,.-__cheetah_log_error
+
+ /* Cheetah FECC trap handling, we get here from tl{0,1}_fecc
+ * in the trap table. That code has done a memory barrier
+ * and has disabled both the I-cache and D-cache in the DCU
+ * control register. The I-cache is disabled so that we may
+ * capture the corrupted cache line, and the D-cache is disabled
+ * because corrupt data may have been placed there and we don't
+ * want to reference it.
+ *
+ * %g1 is one if this trap occurred at %tl >= 1.
+ *
+ * Next, we turn off error reporting so that we don't recurse.
+ */
+ .globl cheetah_fast_ecc
+ .type cheetah_fast_ecc,#function
+cheetah_fast_ecc:
+ ldxa [%g0] ASI_ESTATE_ERROR_EN, %g2
+ andn %g2, ESTATE_ERROR_NCEEN | ESTATE_ERROR_CEEN, %g2
+ stxa %g2, [%g0] ASI_ESTATE_ERROR_EN
+ membar #Sync
+
+ /* Fetch and clear AFSR/AFAR */
+ ldxa [%g0] ASI_AFSR, %g4
+ ldxa [%g0] ASI_AFAR, %g5
+ stxa %g4, [%g0] ASI_AFSR
+ membar #Sync
+
+ ba,pt %xcc, __cheetah_log_error
+ nop
+ .size cheetah_fast_ecc,.-cheetah_fast_ecc
+
+ .type c_fast_ecc,#function
+c_fast_ecc:
+ rdpr %pil, %g2
+ wrpr %g0, PIL_NORMAL_MAX, %pil
+ ba,pt %xcc, etrap_irq
+ rd %pc, %g7
+#ifdef CONFIG_TRACE_IRQFLAGS
+ call trace_hardirqs_off
+ nop
+#endif
+ mov %l4, %o1
+ mov %l5, %o2
+ call cheetah_fecc_handler
+ add %sp, PTREGS_OFF, %o0
+ ba,a,pt %xcc, rtrap_irq
+ .size c_fast_ecc,.-c_fast_ecc
+
+ /* Our caller has disabled I-cache and performed membar Sync. */
+ .globl cheetah_cee
+ .type cheetah_cee,#function
+cheetah_cee:
+ ldxa [%g0] ASI_ESTATE_ERROR_EN, %g2
+ andn %g2, ESTATE_ERROR_CEEN, %g2
+ stxa %g2, [%g0] ASI_ESTATE_ERROR_EN
+ membar #Sync
+
+ /* Fetch and clear AFSR/AFAR */
+ ldxa [%g0] ASI_AFSR, %g4
+ ldxa [%g0] ASI_AFAR, %g5
+ stxa %g4, [%g0] ASI_AFSR
+ membar #Sync
+
+ ba,pt %xcc, __cheetah_log_error
+ nop
+ .size cheetah_cee,.-cheetah_cee
+
+ .type c_cee,#function
+c_cee:
+ rdpr %pil, %g2
+ wrpr %g0, PIL_NORMAL_MAX, %pil
+ ba,pt %xcc, etrap_irq
+ rd %pc, %g7
+#ifdef CONFIG_TRACE_IRQFLAGS
+ call trace_hardirqs_off
+ nop
+#endif
+ mov %l4, %o1
+ mov %l5, %o2
+ call cheetah_cee_handler
+ add %sp, PTREGS_OFF, %o0
+ ba,a,pt %xcc, rtrap_irq
+ .size c_cee,.-c_cee
+
+ /* Our caller has disabled I-cache+D-cache and performed membar Sync. */
+ .globl cheetah_deferred_trap
+ .type cheetah_deferred_trap,#function
+cheetah_deferred_trap:
+ ldxa [%g0] ASI_ESTATE_ERROR_EN, %g2
+ andn %g2, ESTATE_ERROR_NCEEN | ESTATE_ERROR_CEEN, %g2
+ stxa %g2, [%g0] ASI_ESTATE_ERROR_EN
+ membar #Sync
+
+ /* Fetch and clear AFSR/AFAR */
+ ldxa [%g0] ASI_AFSR, %g4
+ ldxa [%g0] ASI_AFAR, %g5
+ stxa %g4, [%g0] ASI_AFSR
+ membar #Sync
+
+ ba,pt %xcc, __cheetah_log_error
+ nop
+ .size cheetah_deferred_trap,.-cheetah_deferred_trap
+
+ .type c_deferred,#function
+c_deferred:
+ rdpr %pil, %g2
+ wrpr %g0, PIL_NORMAL_MAX, %pil
+ ba,pt %xcc, etrap_irq
+ rd %pc, %g7
+#ifdef CONFIG_TRACE_IRQFLAGS
+ call trace_hardirqs_off
+ nop
+#endif
+ mov %l4, %o1
+ mov %l5, %o2
+ call cheetah_deferred_handler
+ add %sp, PTREGS_OFF, %o0
+ ba,a,pt %xcc, rtrap_irq
+ .size c_deferred,.-c_deferred
diff --git a/arch/sparc/kernel/chmc.c b/arch/sparc/kernel/chmc.c
new file mode 100644
index 000000000000..3b9f4d6e14a9
--- /dev/null
+++ b/arch/sparc/kernel/chmc.c
@@ -0,0 +1,863 @@
+/* chmc.c: Driver for UltraSPARC-III memory controller.
+ *
+ * Copyright (C) 2001, 2007, 2008 David S. Miller (davem@davemloft.net)
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <asm/spitfire.h>
+#include <asm/chmctrl.h>
+#include <asm/cpudata.h>
+#include <asm/oplib.h>
+#include <asm/prom.h>
+#include <asm/head.h>
+#include <asm/io.h>
+#include <asm/memctrl.h>
+
+#define DRV_MODULE_NAME "chmc"
+#define PFX DRV_MODULE_NAME ": "
+#define DRV_MODULE_VERSION "0.2"
+
+MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
+MODULE_DESCRIPTION("UltraSPARC-III memory controller driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_MODULE_VERSION);
+
+static int mc_type;
+#define MC_TYPE_SAFARI 1
+#define MC_TYPE_JBUS 2
+
+static dimm_printer_t us3mc_dimm_printer;
+
+#define CHMCTRL_NDGRPS 2
+#define CHMCTRL_NDIMMS 4
+
+#define CHMC_DIMMS_PER_MC (CHMCTRL_NDGRPS * CHMCTRL_NDIMMS)
+
+/* OBP memory-layout property format. */
+struct chmc_obp_map {
+ unsigned char dimm_map[144];
+ unsigned char pin_map[576];
+};
+
+#define DIMM_LABEL_SZ 8
+
+struct chmc_obp_mem_layout {
+ /* One max 8-byte string label per DIMM. Usually
+ * this matches the label on the motherboard where
+ * that DIMM resides.
+ */
+ char dimm_labels[CHMC_DIMMS_PER_MC][DIMM_LABEL_SZ];
+
+ /* If symmetric use map[0], else it is
+ * asymmetric and map[1] should be used.
+ */
+ char symmetric;
+
+ struct chmc_obp_map map[2];
+};
+
+#define CHMCTRL_NBANKS 4
+
+struct chmc_bank_info {
+ struct chmc *p;
+ int bank_id;
+
+ u64 raw_reg;
+ int valid;
+ int uk;
+ int um;
+ int lk;
+ int lm;
+ int interleave;
+ unsigned long base;
+ unsigned long size;
+};
+
+struct chmc {
+ struct list_head list;
+ int portid;
+
+ struct chmc_obp_mem_layout layout_prop;
+ int layout_size;
+
+ void __iomem *regs;
+
+ u64 timing_control1;
+ u64 timing_control2;
+ u64 timing_control3;
+ u64 timing_control4;
+ u64 memaddr_control;
+
+ struct chmc_bank_info logical_banks[CHMCTRL_NBANKS];
+};
+
+#define JBUSMC_REGS_SIZE 8
+
+#define JB_MC_REG1_DIMM2_BANK3 0x8000000000000000UL
+#define JB_MC_REG1_DIMM1_BANK1 0x4000000000000000UL
+#define JB_MC_REG1_DIMM2_BANK2 0x2000000000000000UL
+#define JB_MC_REG1_DIMM1_BANK0 0x1000000000000000UL
+#define JB_MC_REG1_XOR 0x0000010000000000UL
+#define JB_MC_REG1_ADDR_GEN_2 0x000000e000000000UL
+#define JB_MC_REG1_ADDR_GEN_2_SHIFT 37
+#define JB_MC_REG1_ADDR_GEN_1 0x0000001c00000000UL
+#define JB_MC_REG1_ADDR_GEN_1_SHIFT 34
+#define JB_MC_REG1_INTERLEAVE 0x0000000001800000UL
+#define JB_MC_REG1_INTERLEAVE_SHIFT 23
+#define JB_MC_REG1_DIMM2_PTYPE 0x0000000000200000UL
+#define JB_MC_REG1_DIMM2_PTYPE_SHIFT 21
+#define JB_MC_REG1_DIMM1_PTYPE 0x0000000000100000UL
+#define JB_MC_REG1_DIMM1_PTYPE_SHIFT 20
+
+#define PART_TYPE_X8 0
+#define PART_TYPE_X4 1
+
+#define INTERLEAVE_NONE 0
+#define INTERLEAVE_SAME 1
+#define INTERLEAVE_INTERNAL 2
+#define INTERLEAVE_BOTH 3
+
+#define ADDR_GEN_128MB 0
+#define ADDR_GEN_256MB 1
+#define ADDR_GEN_512MB 2
+#define ADDR_GEN_1GB 3
+
+#define JB_NUM_DIMM_GROUPS 2
+#define JB_NUM_DIMMS_PER_GROUP 2
+#define JB_NUM_DIMMS (JB_NUM_DIMM_GROUPS * JB_NUM_DIMMS_PER_GROUP)
+
+struct jbusmc_obp_map {
+ unsigned char dimm_map[18];
+ unsigned char pin_map[144];
+};
+
+struct jbusmc_obp_mem_layout {
+ /* One max 8-byte string label per DIMM. Usually
+ * this matches the label on the motherboard where
+ * that DIMM resides.
+ */
+ char dimm_labels[JB_NUM_DIMMS][DIMM_LABEL_SZ];
+
+ /* If symmetric use map[0], else it is
+ * asymmetric and map[1] should be used.
+ */
+ char symmetric;
+
+ struct jbusmc_obp_map map;
+
+ char _pad;
+};
+
+struct jbusmc_dimm_group {
+ struct jbusmc *controller;
+ int index;
+ u64 base_addr;
+ u64 size;
+};
+
+struct jbusmc {
+ void __iomem *regs;
+ u64 mc_reg_1;
+ u32 portid;
+ struct jbusmc_obp_mem_layout layout;
+ int layout_len;
+ int num_dimm_groups;
+ struct jbusmc_dimm_group dimm_groups[JB_NUM_DIMM_GROUPS];
+ struct list_head list;
+};
+
+static DEFINE_SPINLOCK(mctrl_list_lock);
+static LIST_HEAD(mctrl_list);
+
+static void mc_list_add(struct list_head *list)
+{
+ spin_lock(&mctrl_list_lock);
+ list_add(list, &mctrl_list);
+ spin_unlock(&mctrl_list_lock);
+}
+
+static void mc_list_del(struct list_head *list)
+{
+ spin_lock(&mctrl_list_lock);
+ list_del_init(list);
+ spin_unlock(&mctrl_list_lock);
+}
+
+#define SYNDROME_MIN -1
+#define SYNDROME_MAX 144
+
+/* Covert syndrome code into the way the bits are positioned
+ * on the bus.
+ */
+static int syndrome_to_qword_code(int syndrome_code)
+{
+ if (syndrome_code < 128)
+ syndrome_code += 16;
+ else if (syndrome_code < 128 + 9)
+ syndrome_code -= (128 - 7);
+ else if (syndrome_code < (128 + 9 + 3))
+ syndrome_code -= (128 + 9 - 4);
+ else
+ syndrome_code -= (128 + 9 + 3);
+ return syndrome_code;
+}
+
+/* All this magic has to do with how a cache line comes over the wire
+ * on Safari and JBUS. A 64-bit line comes over in 1 or more quadword
+ * cycles, each of which transmit ECC/MTAG info as well as the actual
+ * data.
+ */
+#define L2_LINE_SIZE 64
+#define L2_LINE_ADDR_MSK (L2_LINE_SIZE - 1)
+#define QW_PER_LINE 4
+#define QW_BYTES (L2_LINE_SIZE / QW_PER_LINE)
+#define QW_BITS 144
+#define SAFARI_LAST_BIT (576 - 1)
+#define JBUS_LAST_BIT (144 - 1)
+
+static void get_pin_and_dimm_str(int syndrome_code, unsigned long paddr,
+ int *pin_p, char **dimm_str_p, void *_prop,
+ int base_dimm_offset)
+{
+ int qword_code = syndrome_to_qword_code(syndrome_code);
+ int cache_line_offset;
+ int offset_inverse;
+ int dimm_map_index;
+ int map_val;
+
+ if (mc_type == MC_TYPE_JBUS) {
+ struct jbusmc_obp_mem_layout *p = _prop;
+
+ /* JBUS */
+ cache_line_offset = qword_code;
+ offset_inverse = (JBUS_LAST_BIT - cache_line_offset);
+ dimm_map_index = offset_inverse / 8;
+ map_val = p->map.dimm_map[dimm_map_index];
+ map_val = ((map_val >> ((7 - (offset_inverse & 7)))) & 1);
+ *dimm_str_p = p->dimm_labels[base_dimm_offset + map_val];
+ *pin_p = p->map.pin_map[cache_line_offset];
+ } else {
+ struct chmc_obp_mem_layout *p = _prop;
+ struct chmc_obp_map *mp;
+ int qword;
+
+ /* Safari */
+ if (p->symmetric)
+ mp = &p->map[0];
+ else
+ mp = &p->map[1];
+
+ qword = (paddr & L2_LINE_ADDR_MSK) / QW_BYTES;
+ cache_line_offset = ((3 - qword) * QW_BITS) + qword_code;
+ offset_inverse = (SAFARI_LAST_BIT - cache_line_offset);
+ dimm_map_index = offset_inverse >> 2;
+ map_val = mp->dimm_map[dimm_map_index];
+ map_val = ((map_val >> ((3 - (offset_inverse & 3)) << 1)) & 0x3);
+ *dimm_str_p = p->dimm_labels[base_dimm_offset + map_val];
+ *pin_p = mp->pin_map[cache_line_offset];
+ }
+}
+
+static struct jbusmc_dimm_group *jbusmc_find_dimm_group(unsigned long phys_addr)
+{
+ struct jbusmc *p;
+
+ list_for_each_entry(p, &mctrl_list, list) {
+ int i;
+
+ for (i = 0; i < p->num_dimm_groups; i++) {
+ struct jbusmc_dimm_group *dp = &p->dimm_groups[i];
+
+ if (phys_addr < dp->base_addr ||
+ (dp->base_addr + dp->size) <= phys_addr)
+ continue;
+
+ return dp;
+ }
+ }
+ return NULL;
+}
+
+static int jbusmc_print_dimm(int syndrome_code,
+ unsigned long phys_addr,
+ char *buf, int buflen)
+{
+ struct jbusmc_obp_mem_layout *prop;
+ struct jbusmc_dimm_group *dp;
+ struct jbusmc *p;
+ int first_dimm;
+
+ dp = jbusmc_find_dimm_group(phys_addr);
+ if (dp == NULL ||
+ syndrome_code < SYNDROME_MIN ||
+ syndrome_code > SYNDROME_MAX) {
+ buf[0] = '?';
+ buf[1] = '?';
+ buf[2] = '?';
+ buf[3] = '\0';
+ }
+ p = dp->controller;
+ prop = &p->layout;
+
+ first_dimm = dp->index * JB_NUM_DIMMS_PER_GROUP;
+
+ if (syndrome_code != SYNDROME_MIN) {
+ char *dimm_str;
+ int pin;
+
+ get_pin_and_dimm_str(syndrome_code, phys_addr, &pin,
+ &dimm_str, prop, first_dimm);
+ sprintf(buf, "%s, pin %3d", dimm_str, pin);
+ } else {
+ int dimm;
+
+ /* Multi-bit error, we just dump out all the
+ * dimm labels associated with this dimm group.
+ */
+ for (dimm = 0; dimm < JB_NUM_DIMMS_PER_GROUP; dimm++) {
+ sprintf(buf, "%s ",
+ prop->dimm_labels[first_dimm + dimm]);
+ buf += strlen(buf);
+ }
+ }
+
+ return 0;
+}
+
+static u64 __devinit jbusmc_dimm_group_size(u64 base,
+ const struct linux_prom64_registers *mem_regs,
+ int num_mem_regs)
+{
+ u64 max = base + (8UL * 1024 * 1024 * 1024);
+ u64 max_seen = base;
+ int i;
+
+ for (i = 0; i < num_mem_regs; i++) {
+ const struct linux_prom64_registers *ent;
+ u64 this_base;
+ u64 this_end;
+
+ ent = &mem_regs[i];
+ this_base = ent->phys_addr;
+ this_end = this_base + ent->reg_size;
+ if (base < this_base || base >= this_end)
+ continue;
+ if (this_end > max)
+ this_end = max;
+ if (this_end > max_seen)
+ max_seen = this_end;
+ }
+
+ return max_seen - base;
+}
+
+static void __devinit jbusmc_construct_one_dimm_group(struct jbusmc *p,
+ unsigned long index,
+ const struct linux_prom64_registers *mem_regs,
+ int num_mem_regs)
+{
+ struct jbusmc_dimm_group *dp = &p->dimm_groups[index];
+
+ dp->controller = p;
+ dp->index = index;
+
+ dp->base_addr = (p->portid * (64UL * 1024 * 1024 * 1024));
+ dp->base_addr += (index * (8UL * 1024 * 1024 * 1024));
+ dp->size = jbusmc_dimm_group_size(dp->base_addr, mem_regs, num_mem_regs);
+}
+
+static void __devinit jbusmc_construct_dimm_groups(struct jbusmc *p,
+ const struct linux_prom64_registers *mem_regs,
+ int num_mem_regs)
+{
+ if (p->mc_reg_1 & JB_MC_REG1_DIMM1_BANK0) {
+ jbusmc_construct_one_dimm_group(p, 0, mem_regs, num_mem_regs);
+ p->num_dimm_groups++;
+ }
+ if (p->mc_reg_1 & JB_MC_REG1_DIMM2_BANK2) {
+ jbusmc_construct_one_dimm_group(p, 1, mem_regs, num_mem_regs);
+ p->num_dimm_groups++;
+ }
+}
+
+static int __devinit jbusmc_probe(struct of_device *op,
+ const struct of_device_id *match)
+{
+ const struct linux_prom64_registers *mem_regs;
+ struct device_node *mem_node;
+ int err, len, num_mem_regs;
+ struct jbusmc *p;
+ const u32 *prop;
+ const void *ml;
+
+ err = -ENODEV;
+ mem_node = of_find_node_by_path("/memory");
+ if (!mem_node) {
+ printk(KERN_ERR PFX "Cannot find /memory node.\n");
+ goto out;
+ }
+ mem_regs = of_get_property(mem_node, "reg", &len);
+ if (!mem_regs) {
+ printk(KERN_ERR PFX "Cannot get reg property of /memory node.\n");
+ goto out;
+ }
+ num_mem_regs = len / sizeof(*mem_regs);
+
+ err = -ENOMEM;
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p) {
+ printk(KERN_ERR PFX "Cannot allocate struct jbusmc.\n");
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&p->list);
+
+ err = -ENODEV;
+ prop = of_get_property(op->node, "portid", &len);
+ if (!prop || len != 4) {
+ printk(KERN_ERR PFX "Cannot find portid.\n");
+ goto out_free;
+ }
+
+ p->portid = *prop;
+
+ prop = of_get_property(op->node, "memory-control-register-1", &len);
+ if (!prop || len != 8) {
+ printk(KERN_ERR PFX "Cannot get memory control register 1.\n");
+ goto out_free;
+ }
+
+ p->mc_reg_1 = ((u64)prop[0] << 32) | (u64) prop[1];
+
+ err = -ENOMEM;
+ p->regs = of_ioremap(&op->resource[0], 0, JBUSMC_REGS_SIZE, "jbusmc");
+ if (!p->regs) {
+ printk(KERN_ERR PFX "Cannot map jbusmc regs.\n");
+ goto out_free;
+ }
+
+ err = -ENODEV;
+ ml = of_get_property(op->node, "memory-layout", &p->layout_len);
+ if (!ml) {
+ printk(KERN_ERR PFX "Cannot get memory layout property.\n");
+ goto out_iounmap;
+ }
+ if (p->layout_len > sizeof(p->layout)) {
+ printk(KERN_ERR PFX "Unexpected memory-layout size %d\n",
+ p->layout_len);
+ goto out_iounmap;
+ }
+ memcpy(&p->layout, ml, p->layout_len);
+
+ jbusmc_construct_dimm_groups(p, mem_regs, num_mem_regs);
+
+ mc_list_add(&p->list);
+
+ printk(KERN_INFO PFX "UltraSPARC-IIIi memory controller at %s\n",
+ op->node->full_name);
+
+ dev_set_drvdata(&op->dev, p);
+
+ err = 0;
+
+out:
+ return err;
+
+out_iounmap:
+ of_iounmap(&op->resource[0], p->regs, JBUSMC_REGS_SIZE);
+
+out_free:
+ kfree(p);
+ goto out;
+}
+
+/* Does BANK decode PHYS_ADDR? */
+static int chmc_bank_match(struct chmc_bank_info *bp, unsigned long phys_addr)
+{
+ unsigned long upper_bits = (phys_addr & PA_UPPER_BITS) >> PA_UPPER_BITS_SHIFT;
+ unsigned long lower_bits = (phys_addr & PA_LOWER_BITS) >> PA_LOWER_BITS_SHIFT;
+
+ /* Bank must be enabled to match. */
+ if (bp->valid == 0)
+ return 0;
+
+ /* Would BANK match upper bits? */
+ upper_bits ^= bp->um; /* What bits are different? */
+ upper_bits = ~upper_bits; /* Invert. */
+ upper_bits |= bp->uk; /* What bits don't matter for matching? */
+ upper_bits = ~upper_bits; /* Invert. */
+
+ if (upper_bits)
+ return 0;
+
+ /* Would BANK match lower bits? */
+ lower_bits ^= bp->lm; /* What bits are different? */
+ lower_bits = ~lower_bits; /* Invert. */
+ lower_bits |= bp->lk; /* What bits don't matter for matching? */
+ lower_bits = ~lower_bits; /* Invert. */
+
+ if (lower_bits)
+ return 0;
+
+ /* I always knew you'd be the one. */
+ return 1;
+}
+
+/* Given PHYS_ADDR, search memory controller banks for a match. */
+static struct chmc_bank_info *chmc_find_bank(unsigned long phys_addr)
+{
+ struct chmc *p;
+
+ list_for_each_entry(p, &mctrl_list, list) {
+ int bank_no;
+
+ for (bank_no = 0; bank_no < CHMCTRL_NBANKS; bank_no++) {
+ struct chmc_bank_info *bp;
+
+ bp = &p->logical_banks[bank_no];
+ if (chmc_bank_match(bp, phys_addr))
+ return bp;
+ }
+ }
+
+ return NULL;
+}
+
+/* This is the main purpose of this driver. */
+static int chmc_print_dimm(int syndrome_code,
+ unsigned long phys_addr,
+ char *buf, int buflen)
+{
+ struct chmc_bank_info *bp;
+ struct chmc_obp_mem_layout *prop;
+ int bank_in_controller, first_dimm;
+
+ bp = chmc_find_bank(phys_addr);
+ if (bp == NULL ||
+ syndrome_code < SYNDROME_MIN ||
+ syndrome_code > SYNDROME_MAX) {
+ buf[0] = '?';
+ buf[1] = '?';
+ buf[2] = '?';
+ buf[3] = '\0';
+ return 0;
+ }
+
+ prop = &bp->p->layout_prop;
+ bank_in_controller = bp->bank_id & (CHMCTRL_NBANKS - 1);
+ first_dimm = (bank_in_controller & (CHMCTRL_NDGRPS - 1));
+ first_dimm *= CHMCTRL_NDIMMS;
+
+ if (syndrome_code != SYNDROME_MIN) {
+ char *dimm_str;
+ int pin;
+
+ get_pin_and_dimm_str(syndrome_code, phys_addr, &pin,
+ &dimm_str, prop, first_dimm);
+ sprintf(buf, "%s, pin %3d", dimm_str, pin);
+ } else {
+ int dimm;
+
+ /* Multi-bit error, we just dump out all the
+ * dimm labels associated with this bank.
+ */
+ for (dimm = 0; dimm < CHMCTRL_NDIMMS; dimm++) {
+ sprintf(buf, "%s ",
+ prop->dimm_labels[first_dimm + dimm]);
+ buf += strlen(buf);
+ }
+ }
+ return 0;
+}
+
+/* Accessing the registers is slightly complicated. If you want
+ * to get at the memory controller which is on the same processor
+ * the code is executing, you must use special ASI load/store else
+ * you go through the global mapping.
+ */
+static u64 chmc_read_mcreg(struct chmc *p, unsigned long offset)
+{
+ unsigned long ret, this_cpu;
+
+ preempt_disable();
+
+ this_cpu = real_hard_smp_processor_id();
+
+ if (p->portid == this_cpu) {
+ __asm__ __volatile__("ldxa [%1] %2, %0"
+ : "=r" (ret)
+ : "r" (offset), "i" (ASI_MCU_CTRL_REG));
+ } else {
+ __asm__ __volatile__("ldxa [%1] %2, %0"
+ : "=r" (ret)
+ : "r" (p->regs + offset),
+ "i" (ASI_PHYS_BYPASS_EC_E));
+ }
+
+ preempt_enable();
+
+ return ret;
+}
+
+#if 0 /* currently unused */
+static void chmc_write_mcreg(struct chmc *p, unsigned long offset, u64 val)
+{
+ if (p->portid == smp_processor_id()) {
+ __asm__ __volatile__("stxa %0, [%1] %2"
+ : : "r" (val),
+ "r" (offset), "i" (ASI_MCU_CTRL_REG));
+ } else {
+ __asm__ __volatile__("ldxa %0, [%1] %2"
+ : : "r" (val),
+ "r" (p->regs + offset),
+ "i" (ASI_PHYS_BYPASS_EC_E));
+ }
+}
+#endif
+
+static void chmc_interpret_one_decode_reg(struct chmc *p, int which_bank, u64 val)
+{
+ struct chmc_bank_info *bp = &p->logical_banks[which_bank];
+
+ bp->p = p;
+ bp->bank_id = (CHMCTRL_NBANKS * p->portid) + which_bank;
+ bp->raw_reg = val;
+ bp->valid = (val & MEM_DECODE_VALID) >> MEM_DECODE_VALID_SHIFT;
+ bp->uk = (val & MEM_DECODE_UK) >> MEM_DECODE_UK_SHIFT;
+ bp->um = (val & MEM_DECODE_UM) >> MEM_DECODE_UM_SHIFT;
+ bp->lk = (val & MEM_DECODE_LK) >> MEM_DECODE_LK_SHIFT;
+ bp->lm = (val & MEM_DECODE_LM) >> MEM_DECODE_LM_SHIFT;
+
+ bp->base = (bp->um);
+ bp->base &= ~(bp->uk);
+ bp->base <<= PA_UPPER_BITS_SHIFT;
+
+ switch(bp->lk) {
+ case 0xf:
+ default:
+ bp->interleave = 1;
+ break;
+
+ case 0xe:
+ bp->interleave = 2;
+ break;
+
+ case 0xc:
+ bp->interleave = 4;
+ break;
+
+ case 0x8:
+ bp->interleave = 8;
+ break;
+
+ case 0x0:
+ bp->interleave = 16;
+ break;
+ };
+
+ /* UK[10] is reserved, and UK[11] is not set for the SDRAM
+ * bank size definition.
+ */
+ bp->size = (((unsigned long)bp->uk &
+ ((1UL << 10UL) - 1UL)) + 1UL) << PA_UPPER_BITS_SHIFT;
+ bp->size /= bp->interleave;
+}
+
+static void chmc_fetch_decode_regs(struct chmc *p)
+{
+ if (p->layout_size == 0)
+ return;
+
+ chmc_interpret_one_decode_reg(p, 0,
+ chmc_read_mcreg(p, CHMCTRL_DECODE1));
+ chmc_interpret_one_decode_reg(p, 1,
+ chmc_read_mcreg(p, CHMCTRL_DECODE2));
+ chmc_interpret_one_decode_reg(p, 2,
+ chmc_read_mcreg(p, CHMCTRL_DECODE3));
+ chmc_interpret_one_decode_reg(p, 3,
+ chmc_read_mcreg(p, CHMCTRL_DECODE4));
+}
+
+static int __devinit chmc_probe(struct of_device *op,
+ const struct of_device_id *match)
+{
+ struct device_node *dp = op->node;
+ unsigned long ver;
+ const void *pval;
+ int len, portid;
+ struct chmc *p;
+ int err;
+
+ err = -ENODEV;
+ __asm__ ("rdpr %%ver, %0" : "=r" (ver));
+ if ((ver >> 32UL) == __JALAPENO_ID ||
+ (ver >> 32UL) == __SERRANO_ID)
+ goto out;
+
+ portid = of_getintprop_default(dp, "portid", -1);
+ if (portid == -1)
+ goto out;
+
+ pval = of_get_property(dp, "memory-layout", &len);
+ if (pval && len > sizeof(p->layout_prop)) {
+ printk(KERN_ERR PFX "Unexpected memory-layout property "
+ "size %d.\n", len);
+ goto out;
+ }
+
+ err = -ENOMEM;
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p) {
+ printk(KERN_ERR PFX "Could not allocate struct chmc.\n");
+ goto out;
+ }
+
+ p->portid = portid;
+ p->layout_size = len;
+ if (!pval)
+ p->layout_size = 0;
+ else
+ memcpy(&p->layout_prop, pval, len);
+
+ p->regs = of_ioremap(&op->resource[0], 0, 0x48, "chmc");
+ if (!p->regs) {
+ printk(KERN_ERR PFX "Could not map registers.\n");
+ goto out_free;
+ }
+
+ if (p->layout_size != 0UL) {
+ p->timing_control1 = chmc_read_mcreg(p, CHMCTRL_TCTRL1);
+ p->timing_control2 = chmc_read_mcreg(p, CHMCTRL_TCTRL2);
+ p->timing_control3 = chmc_read_mcreg(p, CHMCTRL_TCTRL3);
+ p->timing_control4 = chmc_read_mcreg(p, CHMCTRL_TCTRL4);
+ p->memaddr_control = chmc_read_mcreg(p, CHMCTRL_MACTRL);
+ }
+
+ chmc_fetch_decode_regs(p);
+
+ mc_list_add(&p->list);
+
+ printk(KERN_INFO PFX "UltraSPARC-III memory controller at %s [%s]\n",
+ dp->full_name,
+ (p->layout_size ? "ACTIVE" : "INACTIVE"));
+
+ dev_set_drvdata(&op->dev, p);
+
+ err = 0;
+
+out:
+ return err;
+
+out_free:
+ kfree(p);
+ goto out;
+}
+
+static int __devinit us3mc_probe(struct of_device *op,
+ const struct of_device_id *match)
+{
+ if (mc_type == MC_TYPE_SAFARI)
+ return chmc_probe(op, match);
+ else if (mc_type == MC_TYPE_JBUS)
+ return jbusmc_probe(op, match);
+ return -ENODEV;
+}
+
+static void __devexit chmc_destroy(struct of_device *op, struct chmc *p)
+{
+ list_del(&p->list);
+ of_iounmap(&op->resource[0], p->regs, 0x48);
+ kfree(p);
+}
+
+static void __devexit jbusmc_destroy(struct of_device *op, struct jbusmc *p)
+{
+ mc_list_del(&p->list);
+ of_iounmap(&op->resource[0], p->regs, JBUSMC_REGS_SIZE);
+ kfree(p);
+}
+
+static int __devexit us3mc_remove(struct of_device *op)
+{
+ void *p = dev_get_drvdata(&op->dev);
+
+ if (p) {
+ if (mc_type == MC_TYPE_SAFARI)
+ chmc_destroy(op, p);
+ else if (mc_type == MC_TYPE_JBUS)
+ jbusmc_destroy(op, p);
+ }
+ return 0;
+}
+
+static const struct of_device_id us3mc_match[] = {
+ {
+ .name = "memory-controller",
+ },
+ {},
+};
+MODULE_DEVICE_TABLE(of, us3mc_match);
+
+static struct of_platform_driver us3mc_driver = {
+ .name = "us3mc",
+ .match_table = us3mc_match,
+ .probe = us3mc_probe,
+ .remove = __devexit_p(us3mc_remove),
+};
+
+static inline bool us3mc_platform(void)
+{
+ if (tlb_type == cheetah || tlb_type == cheetah_plus)
+ return true;
+ return false;
+}
+
+static int __init us3mc_init(void)
+{
+ unsigned long ver;
+ int ret;
+
+ if (!us3mc_platform())
+ return -ENODEV;
+
+ __asm__ __volatile__("rdpr %%ver, %0" : "=r" (ver));
+ if ((ver >> 32UL) == __JALAPENO_ID ||
+ (ver >> 32UL) == __SERRANO_ID) {
+ mc_type = MC_TYPE_JBUS;
+ us3mc_dimm_printer = jbusmc_print_dimm;
+ } else {
+ mc_type = MC_TYPE_SAFARI;
+ us3mc_dimm_printer = chmc_print_dimm;
+ }
+
+ ret = register_dimm_printer(us3mc_dimm_printer);
+
+ if (!ret) {
+ ret = of_register_driver(&us3mc_driver, &of_bus_type);
+ if (ret)
+ unregister_dimm_printer(us3mc_dimm_printer);
+ }
+ return ret;
+}
+
+static void __exit us3mc_cleanup(void)
+{
+ if (us3mc_platform()) {
+ unregister_dimm_printer(us3mc_dimm_printer);
+ of_unregister_driver(&us3mc_driver);
+ }
+}
+
+module_init(us3mc_init);
+module_exit(us3mc_cleanup);
diff --git a/arch/sparc/kernel/compat_audit.c b/arch/sparc/kernel/compat_audit.c
new file mode 100644
index 000000000000..d865575b25bf
--- /dev/null
+++ b/arch/sparc/kernel/compat_audit.c
@@ -0,0 +1,43 @@
+#define __32bit_syscall_numbers__
+#include <asm/unistd.h>
+
+unsigned sparc32_dir_class[] = {
+#include <asm-generic/audit_dir_write.h>
+~0U
+};
+
+unsigned sparc32_chattr_class[] = {
+#include <asm-generic/audit_change_attr.h>
+~0U
+};
+
+unsigned sparc32_write_class[] = {
+#include <asm-generic/audit_write.h>
+~0U
+};
+
+unsigned sparc32_read_class[] = {
+#include <asm-generic/audit_read.h>
+~0U
+};
+
+unsigned sparc32_signal_class[] = {
+#include <asm-generic/audit_signal.h>
+~0U
+};
+
+int sparc32_classify_syscall(unsigned syscall)
+{
+ switch(syscall) {
+ case __NR_open:
+ return 2;
+ case __NR_openat:
+ return 3;
+ case __NR_socketcall:
+ return 4;
+ case __NR_execve:
+ return 5;
+ default:
+ return 1;
+ }
+}
diff --git a/arch/sparc/kernel/cpu.c b/arch/sparc/kernel/cpu.c
index 1fc17f59c6bf..6c2da2420f76 100644
--- a/arch/sparc/kernel/cpu.c
+++ b/arch/sparc/kernel/cpu.c
@@ -8,6 +8,8 @@
#include <linux/init.h>
#include <linux/smp.h>
#include <linux/threads.h>
+
+#include <asm/spitfire.h>
#include <asm/oplib.h>
#include <asm/page.h>
#include <asm/head.h>
@@ -15,153 +17,322 @@
#include <asm/mbus.h>
#include <asm/cpudata.h>
+#include "kernel.h"
+
DEFINE_PER_CPU(cpuinfo_sparc, __cpu_data) = { 0 };
-struct cpu_iu_info {
- int psr_impl;
- int psr_vers;
- char* cpu_name; /* should be enough I hope... */
+struct cpu_info {
+ int psr_vers;
+ const char *name;
+};
+
+struct fpu_info {
+ int fp_vers;
+ const char *name;
};
-struct cpu_fp_info {
- int psr_impl;
- int fp_vers;
- char* fp_name;
+#define NOCPU 8
+#define NOFPU 8
+
+struct manufacturer_info {
+ int psr_impl;
+ struct cpu_info cpu_info[NOCPU];
+ struct fpu_info fpu_info[NOFPU];
};
+#define CPU(ver, _name) \
+{ .psr_vers = ver, .name = _name }
+
+#define FPU(ver, _name) \
+{ .fp_vers = ver, .name = _name }
+
+static const struct manufacturer_info __initconst manufacturer_info[] = {
+{
+ 0,
+ /* Sun4/100, 4/200, SLC */
+ .cpu_info = {
+ CPU(0, "Fujitsu MB86900/1A or LSI L64831 SparcKIT-40"),
+ /* borned STP1012PGA */
+ CPU(4, "Fujitsu MB86904"),
+ CPU(5, "Fujitsu TurboSparc MB86907"),
+ CPU(-1, NULL)
+ },
+ .fpu_info = {
+ FPU(0, "Fujitsu MB86910 or Weitek WTL1164/5"),
+ FPU(1, "Fujitsu MB86911 or Weitek WTL1164/5 or LSI L64831"),
+ FPU(2, "LSI Logic L64802 or Texas Instruments ACT8847"),
+ /* SparcStation SLC, SparcStation1 */
+ FPU(3, "Weitek WTL3170/2"),
+ /* SPARCstation-5 */
+ FPU(4, "Lsi Logic/Meiko L64804 or compatible"),
+ FPU(-1, NULL)
+ }
+},{
+ 1,
+ .cpu_info = {
+ /* SparcStation2, SparcServer 490 & 690 */
+ CPU(0, "LSI Logic Corporation - L64811"),
+ /* SparcStation2 */
+ CPU(1, "Cypress/ROSS CY7C601"),
+ /* Embedded controller */
+ CPU(3, "Cypress/ROSS CY7C611"),
+ /* Ross Technologies HyperSparc */
+ CPU(0xf, "ROSS HyperSparc RT620"),
+ CPU(0xe, "ROSS HyperSparc RT625 or RT626"),
+ CPU(-1, NULL)
+ },
+ .fpu_info = {
+ FPU(0, "ROSS HyperSparc combined IU/FPU"),
+ FPU(1, "Lsi Logic L64814"),
+ FPU(2, "Texas Instruments TMS390-C602A"),
+ FPU(3, "Cypress CY7C602 FPU"),
+ FPU(-1, NULL)
+ }
+},{
+ 2,
+ .cpu_info = {
+ /* ECL Implementation, CRAY S-MP Supercomputer... AIEEE! */
+ /* Someone please write the code to support this beast! ;) */
+ CPU(0, "Bipolar Integrated Technology - B5010"),
+ CPU(-1, NULL)
+ },
+ .fpu_info = {
+ FPU(-1, NULL)
+ }
+},{
+ 3,
+ .cpu_info = {
+ CPU(0, "LSI Logic Corporation - unknown-type"),
+ CPU(-1, NULL)
+ },
+ .fpu_info = {
+ FPU(-1, NULL)
+ }
+},{
+ 4,
+ .cpu_info = {
+ CPU(0, "Texas Instruments, Inc. - SuperSparc-(II)"),
+ /* SparcClassic -- borned STP1010TAB-50*/
+ CPU(1, "Texas Instruments, Inc. - MicroSparc"),
+ CPU(2, "Texas Instruments, Inc. - MicroSparc II"),
+ CPU(3, "Texas Instruments, Inc. - SuperSparc 51"),
+ CPU(4, "Texas Instruments, Inc. - SuperSparc 61"),
+ CPU(5, "Texas Instruments, Inc. - unknown"),
+ CPU(-1, NULL)
+ },
+ .fpu_info = {
+ /* SuperSparc 50 module */
+ FPU(0, "SuperSparc on-chip FPU"),
+ /* SparcClassic */
+ FPU(4, "TI MicroSparc on chip FPU"),
+ FPU(-1, NULL)
+ }
+},{
+ 5,
+ .cpu_info = {
+ CPU(0, "Matsushita - MN10501"),
+ CPU(-1, NULL)
+ },
+ .fpu_info = {
+ FPU(0, "Matsushita MN10501"),
+ FPU(-1, NULL)
+ }
+},{
+ 6,
+ .cpu_info = {
+ CPU(0, "Philips Corporation - unknown"),
+ CPU(-1, NULL)
+ },
+ .fpu_info = {
+ FPU(-1, NULL)
+ }
+},{
+ 7,
+ .cpu_info = {
+ CPU(0, "Harvest VLSI Design Center, Inc. - unknown"),
+ CPU(-1, NULL)
+ },
+ .fpu_info = {
+ FPU(-1, NULL)
+ }
+},{
+ 8,
+ .cpu_info = {
+ CPU(0, "Systems and Processes Engineering Corporation (SPEC)"),
+ CPU(-1, NULL)
+ },
+ .fpu_info = {
+ FPU(-1, NULL)
+ }
+},{
+ 9,
+ .cpu_info = {
+ /* Gallium arsenide 200MHz, BOOOOGOOOOMIPS!!! */
+ CPU(0, "Fujitsu or Weitek Power-UP"),
+ CPU(1, "Fujitsu or Weitek Power-UP"),
+ CPU(2, "Fujitsu or Weitek Power-UP"),
+ CPU(3, "Fujitsu or Weitek Power-UP"),
+ CPU(-1, NULL)
+ },
+ .fpu_info = {
+ FPU(3, "Fujitsu or Weitek on-chip FPU"),
+ FPU(-1, NULL)
+ }
+},{
+ 0x17,
+ .cpu_info = {
+ CPU(0x10, "TI UltraSparc I (SpitFire)"),
+ CPU(0x11, "TI UltraSparc II (BlackBird)"),
+ CPU(0x12, "TI UltraSparc IIi (Sabre)"),
+ CPU(0x13, "TI UltraSparc IIe (Hummingbird)"),
+ CPU(-1, NULL)
+ },
+ .fpu_info = {
+ FPU(0x10, "UltraSparc I integrated FPU"),
+ FPU(0x11, "UltraSparc II integrated FPU"),
+ FPU(0x12, "UltraSparc IIi integrated FPU"),
+ FPU(0x13, "UltraSparc IIe integrated FPU"),
+ FPU(-1, NULL)
+ }
+},{
+ 0x22,
+ .cpu_info = {
+ CPU(0x10, "TI UltraSparc I (SpitFire)"),
+ CPU(-1, NULL)
+ },
+ .fpu_info = {
+ FPU(0x10, "UltraSparc I integrated FPU"),
+ FPU(-1, NULL)
+ }
+},{
+ 0x3e,
+ .cpu_info = {
+ CPU(0x14, "TI UltraSparc III (Cheetah)"),
+ CPU(0x15, "TI UltraSparc III+ (Cheetah+)"),
+ CPU(0x16, "TI UltraSparc IIIi (Jalapeno)"),
+ CPU(0x18, "TI UltraSparc IV (Jaguar)"),
+ CPU(0x19, "TI UltraSparc IV+ (Panther)"),
+ CPU(0x22, "TI UltraSparc IIIi+ (Serrano)"),
+ CPU(-1, NULL)
+ },
+ .fpu_info = {
+ FPU(0x14, "UltraSparc III integrated FPU"),
+ FPU(0x15, "UltraSparc III+ integrated FPU"),
+ FPU(0x16, "UltraSparc IIIi integrated FPU"),
+ FPU(0x18, "UltraSparc IV integrated FPU"),
+ FPU(0x19, "UltraSparc IV+ integrated FPU"),
+ FPU(0x22, "UltraSparc IIIi+ integrated FPU"),
+ FPU(-1, NULL)
+ }
+}};
+
/* In order to get the fpu type correct, you need to take the IDPROM's
* machine type value into consideration too. I will fix this.
*/
-static struct cpu_fp_info linux_sparc_fpu[] = {
- { 0, 0, "Fujitsu MB86910 or Weitek WTL1164/5"},
- { 0, 1, "Fujitsu MB86911 or Weitek WTL1164/5 or LSI L64831"},
- { 0, 2, "LSI Logic L64802 or Texas Instruments ACT8847"},
- /* SparcStation SLC, SparcStation1 */
- { 0, 3, "Weitek WTL3170/2"},
- /* SPARCstation-5 */
- { 0, 4, "Lsi Logic/Meiko L64804 or compatible"},
- { 0, 5, "reserved"},
- { 0, 6, "reserved"},
- { 0, 7, "No FPU"},
- { 1, 0, "ROSS HyperSparc combined IU/FPU"},
- { 1, 1, "Lsi Logic L64814"},
- { 1, 2, "Texas Instruments TMS390-C602A"},
- { 1, 3, "Cypress CY7C602 FPU"},
- { 1, 4, "reserved"},
- { 1, 5, "reserved"},
- { 1, 6, "reserved"},
- { 1, 7, "No FPU"},
- { 2, 0, "BIT B5010 or B5110/20 or B5210"},
- { 2, 1, "reserved"},
- { 2, 2, "reserved"},
- { 2, 3, "reserved"},
- { 2, 4, "reserved"},
- { 2, 5, "reserved"},
- { 2, 6, "reserved"},
- { 2, 7, "No FPU"},
- /* SuperSparc 50 module */
- { 4, 0, "SuperSparc on-chip FPU"},
- /* SparcClassic */
- { 4, 4, "TI MicroSparc on chip FPU"},
- { 5, 0, "Matsushita MN10501"},
- { 5, 1, "reserved"},
- { 5, 2, "reserved"},
- { 5, 3, "reserved"},
- { 5, 4, "reserved"},
- { 5, 5, "reserved"},
- { 5, 6, "reserved"},
- { 5, 7, "No FPU"},
- { 9, 3, "Fujitsu or Weitek on-chip FPU"},
-};
-#define NSPARCFPU ARRAY_SIZE(linux_sparc_fpu)
-
-static struct cpu_iu_info linux_sparc_chips[] = {
- /* Sun4/100, 4/200, SLC */
- { 0, 0, "Fujitsu MB86900/1A or LSI L64831 SparcKIT-40"},
- /* borned STP1012PGA */
- { 0, 4, "Fujitsu MB86904"},
- { 0, 5, "Fujitsu TurboSparc MB86907"},
- /* SparcStation2, SparcServer 490 & 690 */
- { 1, 0, "LSI Logic Corporation - L64811"},
- /* SparcStation2 */
- { 1, 1, "Cypress/ROSS CY7C601"},
- /* Embedded controller */
- { 1, 3, "Cypress/ROSS CY7C611"},
- /* Ross Technologies HyperSparc */
- { 1, 0xf, "ROSS HyperSparc RT620"},
- { 1, 0xe, "ROSS HyperSparc RT625 or RT626"},
- /* ECL Implementation, CRAY S-MP Supercomputer... AIEEE! */
- /* Someone please write the code to support this beast! ;) */
- { 2, 0, "Bipolar Integrated Technology - B5010"},
- { 3, 0, "LSI Logic Corporation - unknown-type"},
- { 4, 0, "Texas Instruments, Inc. - SuperSparc-(II)"},
- /* SparcClassic -- borned STP1010TAB-50*/
- { 4, 1, "Texas Instruments, Inc. - MicroSparc"},
- { 4, 2, "Texas Instruments, Inc. - MicroSparc II"},
- { 4, 3, "Texas Instruments, Inc. - SuperSparc 51"},
- { 4, 4, "Texas Instruments, Inc. - SuperSparc 61"},
- { 4, 5, "Texas Instruments, Inc. - unknown"},
- { 5, 0, "Matsushita - MN10501"},
- { 6, 0, "Philips Corporation - unknown"},
- { 7, 0, "Harvest VLSI Design Center, Inc. - unknown"},
- /* Gallium arsenide 200MHz, BOOOOGOOOOMIPS!!! */
- { 8, 0, "Systems and Processes Engineering Corporation (SPEC)"},
- { 9, 0, "Fujitsu or Weitek Power-UP"},
- { 9, 1, "Fujitsu or Weitek Power-UP"},
- { 9, 2, "Fujitsu or Weitek Power-UP"},
- { 9, 3, "Fujitsu or Weitek Power-UP"},
- { 0xa, 0, "UNKNOWN CPU-VENDOR/TYPE"},
- { 0xb, 0, "UNKNOWN CPU-VENDOR/TYPE"},
- { 0xc, 0, "UNKNOWN CPU-VENDOR/TYPE"},
- { 0xd, 0, "UNKNOWN CPU-VENDOR/TYPE"},
- { 0xe, 0, "UNKNOWN CPU-VENDOR/TYPE"},
- { 0xf, 0, "UNKNOWN CPU-VENDOR/TYPE"},
-};
+const char *sparc_cpu_type;
+const char *sparc_fpu_type;
-#define NSPARCCHIPS ARRAY_SIZE(linux_sparc_chips)
+unsigned int fsr_storage;
-char *sparc_cpu_type;
-char *sparc_fpu_type;
+static void set_cpu_and_fpu(int psr_impl, int psr_vers, int fpu_vers)
+{
+ sparc_cpu_type = NULL;
+ sparc_fpu_type = NULL;
+ if (psr_impl < ARRAY_SIZE(manufacturer_info))
+ {
+ const struct cpu_info *cpu;
+ const struct fpu_info *fpu;
-unsigned int fsr_storage;
+ cpu = &manufacturer_info[psr_impl].cpu_info[0];
+ while (cpu->psr_vers != -1)
+ {
+ if (cpu->psr_vers == psr_vers) {
+ sparc_cpu_type = cpu->name;
+ sparc_fpu_type = "No FPU";
+ break;
+ }
+ cpu++;
+ }
+ fpu = &manufacturer_info[psr_impl].fpu_info[0];
+ while (fpu->fp_vers != -1)
+ {
+ if (fpu->fp_vers == fpu_vers) {
+ sparc_fpu_type = fpu->name;
+ break;
+ }
+ fpu++;
+ }
+ }
+ if (sparc_cpu_type == NULL)
+ {
+ printk(KERN_ERR "CPU: Unknown chip, impl[0x%x] vers[0x%x]\n",
+ psr_impl, psr_vers);
+ sparc_cpu_type = "Unknown CPU";
+ }
+ if (sparc_fpu_type == NULL)
+ {
+ printk(KERN_ERR "FPU: Unknown chip, impl[0x%x] vers[0x%x]\n",
+ psr_impl, fpu_vers);
+ sparc_fpu_type = "Unknown FPU";
+ }
+}
+#ifdef CONFIG_SPARC32
void __cpuinit cpu_probe(void)
{
int psr_impl, psr_vers, fpu_vers;
- int i, psr;
+ int psr;
- psr_impl = ((get_psr()>>28)&0xf);
- psr_vers = ((get_psr()>>24)&0xf);
+ psr_impl = ((get_psr() >> 28) & 0xf);
+ psr_vers = ((get_psr() >> 24) & 0xf);
psr = get_psr();
put_psr(psr | PSR_EF);
- fpu_vers = ((get_fsr()>>17)&0x7);
+ fpu_vers = ((get_fsr() >> 17) & 0x7);
put_psr(psr);
- for(i = 0; i<NSPARCCHIPS; i++) {
- if(linux_sparc_chips[i].psr_impl == psr_impl)
- if(linux_sparc_chips[i].psr_vers == psr_vers) {
- sparc_cpu_type = linux_sparc_chips[i].cpu_name;
- break;
- }
- }
+ set_cpu_and_fpu(psr_impl, psr_vers, fpu_vers);
+}
+#else
+static void __init sun4v_cpu_probe(void)
+{
+ switch (sun4v_chip_type) {
+ case SUN4V_CHIP_NIAGARA1:
+ sparc_cpu_type = "UltraSparc T1 (Niagara)";
+ sparc_fpu_type = "UltraSparc T1 integrated FPU";
+ break;
- if(i==NSPARCCHIPS)
- printk("DEBUG: psr.impl = 0x%x psr.vers = 0x%x\n", psr_impl,
- psr_vers);
+ case SUN4V_CHIP_NIAGARA2:
+ sparc_cpu_type = "UltraSparc T2 (Niagara2)";
+ sparc_fpu_type = "UltraSparc T2 integrated FPU";
+ break;
- for(i = 0; i<NSPARCFPU; i++) {
- if(linux_sparc_fpu[i].psr_impl == psr_impl)
- if(linux_sparc_fpu[i].fp_vers == fpu_vers) {
- sparc_fpu_type = linux_sparc_fpu[i].fp_name;
- break;
- }
+ default:
+ printk(KERN_WARNING "CPU: Unknown sun4v cpu type [%s]\n",
+ prom_cpu_compatible);
+ sparc_cpu_type = "Unknown SUN4V CPU";
+ sparc_fpu_type = "Unknown SUN4V FPU";
+ break;
}
+}
+
+static int __init cpu_type_probe(void)
+{
+ if (tlb_type == hypervisor) {
+ sun4v_cpu_probe();
+ } else {
+ unsigned long ver;
+ int manuf, impl;
- if(i == NSPARCFPU) {
- printk("DEBUG: psr.impl = 0x%x fsr.vers = 0x%x\n", psr_impl,
- fpu_vers);
- sparc_fpu_type = linux_sparc_fpu[31].fp_name;
+ __asm__ __volatile__("rdpr %%ver, %0" : "=r" (ver));
+
+ manuf = ((ver >> 48) & 0xffff);
+ impl = ((ver >> 32) & 0xffff);
+ set_cpu_and_fpu(manuf, impl, impl);
}
+ return 0;
}
+
+arch_initcall(cpu_type_probe);
+#endif
diff --git a/arch/sparc/kernel/devices.c b/arch/sparc/kernel/devices.c
index ad656b044b8c..b171ae8de90d 100644
--- a/arch/sparc/kernel/devices.c
+++ b/arch/sparc/kernel/devices.c
@@ -133,14 +133,12 @@ void __init device_scan(void)
#endif /* !CONFIG_SMP */
cpu_probe();
-#ifdef CONFIG_SUN_AUXIO
{
extern void auxio_probe(void);
extern void auxio_power_probe(void);
auxio_probe();
auxio_power_probe();
}
-#endif
clock_stop_probe();
if (ARCH_SUN4C)
diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c
new file mode 100644
index 000000000000..f52e0534d91d
--- /dev/null
+++ b/arch/sparc/kernel/ds.c
@@ -0,0 +1,1244 @@
+/* ds.c: Domain Services driver for Logical Domains
+ *
+ * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/mutex.h>
+#include <linux/kthread.h>
+#include <linux/reboot.h>
+#include <linux/cpu.h>
+
+#include <asm/ldc.h>
+#include <asm/vio.h>
+#include <asm/mdesc.h>
+#include <asm/head.h>
+#include <asm/irq.h>
+
+#define DRV_MODULE_NAME "ds"
+#define PFX DRV_MODULE_NAME ": "
+#define DRV_MODULE_VERSION "1.0"
+#define DRV_MODULE_RELDATE "Jul 11, 2007"
+
+static char version[] __devinitdata =
+ DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
+MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
+MODULE_DESCRIPTION("Sun LDOM domain services driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_MODULE_VERSION);
+
+struct ds_msg_tag {
+ __u32 type;
+#define DS_INIT_REQ 0x00
+#define DS_INIT_ACK 0x01
+#define DS_INIT_NACK 0x02
+#define DS_REG_REQ 0x03
+#define DS_REG_ACK 0x04
+#define DS_REG_NACK 0x05
+#define DS_UNREG_REQ 0x06
+#define DS_UNREG_ACK 0x07
+#define DS_UNREG_NACK 0x08
+#define DS_DATA 0x09
+#define DS_NACK 0x0a
+
+ __u32 len;
+};
+
+/* Result codes */
+#define DS_OK 0x00
+#define DS_REG_VER_NACK 0x01
+#define DS_REG_DUP 0x02
+#define DS_INV_HDL 0x03
+#define DS_TYPE_UNKNOWN 0x04
+
+struct ds_version {
+ __u16 major;
+ __u16 minor;
+};
+
+struct ds_ver_req {
+ struct ds_msg_tag tag;
+ struct ds_version ver;
+};
+
+struct ds_ver_ack {
+ struct ds_msg_tag tag;
+ __u16 minor;
+};
+
+struct ds_ver_nack {
+ struct ds_msg_tag tag;
+ __u16 major;
+};
+
+struct ds_reg_req {
+ struct ds_msg_tag tag;
+ __u64 handle;
+ __u16 major;
+ __u16 minor;
+ char svc_id[0];
+};
+
+struct ds_reg_ack {
+ struct ds_msg_tag tag;
+ __u64 handle;
+ __u16 minor;
+};
+
+struct ds_reg_nack {
+ struct ds_msg_tag tag;
+ __u64 handle;
+ __u16 major;
+};
+
+struct ds_unreg_req {
+ struct ds_msg_tag tag;
+ __u64 handle;
+};
+
+struct ds_unreg_ack {
+ struct ds_msg_tag tag;
+ __u64 handle;
+};
+
+struct ds_unreg_nack {
+ struct ds_msg_tag tag;
+ __u64 handle;
+};
+
+struct ds_data {
+ struct ds_msg_tag tag;
+ __u64 handle;
+};
+
+struct ds_data_nack {
+ struct ds_msg_tag tag;
+ __u64 handle;
+ __u64 result;
+};
+
+struct ds_info;
+struct ds_cap_state {
+ __u64 handle;
+
+ void (*data)(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ void *buf, int len);
+
+ const char *service_id;
+
+ u8 state;
+#define CAP_STATE_UNKNOWN 0x00
+#define CAP_STATE_REG_SENT 0x01
+#define CAP_STATE_REGISTERED 0x02
+};
+
+static void md_update_data(struct ds_info *dp, struct ds_cap_state *cp,
+ void *buf, int len);
+static void domain_shutdown_data(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ void *buf, int len);
+static void domain_panic_data(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ void *buf, int len);
+#ifdef CONFIG_HOTPLUG_CPU
+static void dr_cpu_data(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ void *buf, int len);
+#endif
+static void ds_pri_data(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ void *buf, int len);
+static void ds_var_data(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ void *buf, int len);
+
+static struct ds_cap_state ds_states_template[] = {
+ {
+ .service_id = "md-update",
+ .data = md_update_data,
+ },
+ {
+ .service_id = "domain-shutdown",
+ .data = domain_shutdown_data,
+ },
+ {
+ .service_id = "domain-panic",
+ .data = domain_panic_data,
+ },
+#ifdef CONFIG_HOTPLUG_CPU
+ {
+ .service_id = "dr-cpu",
+ .data = dr_cpu_data,
+ },
+#endif
+ {
+ .service_id = "pri",
+ .data = ds_pri_data,
+ },
+ {
+ .service_id = "var-config",
+ .data = ds_var_data,
+ },
+ {
+ .service_id = "var-config-backup",
+ .data = ds_var_data,
+ },
+};
+
+static DEFINE_SPINLOCK(ds_lock);
+
+struct ds_info {
+ struct ldc_channel *lp;
+ u8 hs_state;
+#define DS_HS_START 0x01
+#define DS_HS_DONE 0x02
+
+ u64 id;
+
+ void *rcv_buf;
+ int rcv_buf_len;
+
+ struct ds_cap_state *ds_states;
+ int num_ds_states;
+
+ struct ds_info *next;
+};
+
+static struct ds_info *ds_info_list;
+
+static struct ds_cap_state *find_cap(struct ds_info *dp, u64 handle)
+{
+ unsigned int index = handle >> 32;
+
+ if (index >= dp->num_ds_states)
+ return NULL;
+ return &dp->ds_states[index];
+}
+
+static struct ds_cap_state *find_cap_by_string(struct ds_info *dp,
+ const char *name)
+{
+ int i;
+
+ for (i = 0; i < dp->num_ds_states; i++) {
+ if (strcmp(dp->ds_states[i].service_id, name))
+ continue;
+
+ return &dp->ds_states[i];
+ }
+ return NULL;
+}
+
+static int __ds_send(struct ldc_channel *lp, void *data, int len)
+{
+ int err, limit = 1000;
+
+ err = -EINVAL;
+ while (limit-- > 0) {
+ err = ldc_write(lp, data, len);
+ if (!err || (err != -EAGAIN))
+ break;
+ udelay(1);
+ }
+
+ return err;
+}
+
+static int ds_send(struct ldc_channel *lp, void *data, int len)
+{
+ unsigned long flags;
+ int err;
+
+ spin_lock_irqsave(&ds_lock, flags);
+ err = __ds_send(lp, data, len);
+ spin_unlock_irqrestore(&ds_lock, flags);
+
+ return err;
+}
+
+struct ds_md_update_req {
+ __u64 req_num;
+};
+
+struct ds_md_update_res {
+ __u64 req_num;
+ __u32 result;
+};
+
+static void md_update_data(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ void *buf, int len)
+{
+ struct ldc_channel *lp = dp->lp;
+ struct ds_data *dpkt = buf;
+ struct ds_md_update_req *rp;
+ struct {
+ struct ds_data data;
+ struct ds_md_update_res res;
+ } pkt;
+
+ rp = (struct ds_md_update_req *) (dpkt + 1);
+
+ printk(KERN_INFO "ds-%lu: Machine description update.\n", dp->id);
+
+ mdesc_update();
+
+ memset(&pkt, 0, sizeof(pkt));
+ pkt.data.tag.type = DS_DATA;
+ pkt.data.tag.len = sizeof(pkt) - sizeof(struct ds_msg_tag);
+ pkt.data.handle = cp->handle;
+ pkt.res.req_num = rp->req_num;
+ pkt.res.result = DS_OK;
+
+ ds_send(lp, &pkt, sizeof(pkt));
+}
+
+struct ds_shutdown_req {
+ __u64 req_num;
+ __u32 ms_delay;
+};
+
+struct ds_shutdown_res {
+ __u64 req_num;
+ __u32 result;
+ char reason[1];
+};
+
+static void domain_shutdown_data(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ void *buf, int len)
+{
+ struct ldc_channel *lp = dp->lp;
+ struct ds_data *dpkt = buf;
+ struct ds_shutdown_req *rp;
+ struct {
+ struct ds_data data;
+ struct ds_shutdown_res res;
+ } pkt;
+
+ rp = (struct ds_shutdown_req *) (dpkt + 1);
+
+ printk(KERN_ALERT "ds-%lu: Shutdown request from "
+ "LDOM manager received.\n", dp->id);
+
+ memset(&pkt, 0, sizeof(pkt));
+ pkt.data.tag.type = DS_DATA;
+ pkt.data.tag.len = sizeof(pkt) - sizeof(struct ds_msg_tag);
+ pkt.data.handle = cp->handle;
+ pkt.res.req_num = rp->req_num;
+ pkt.res.result = DS_OK;
+ pkt.res.reason[0] = 0;
+
+ ds_send(lp, &pkt, sizeof(pkt));
+
+ orderly_poweroff(true);
+}
+
+struct ds_panic_req {
+ __u64 req_num;
+};
+
+struct ds_panic_res {
+ __u64 req_num;
+ __u32 result;
+ char reason[1];
+};
+
+static void domain_panic_data(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ void *buf, int len)
+{
+ struct ldc_channel *lp = dp->lp;
+ struct ds_data *dpkt = buf;
+ struct ds_panic_req *rp;
+ struct {
+ struct ds_data data;
+ struct ds_panic_res res;
+ } pkt;
+
+ rp = (struct ds_panic_req *) (dpkt + 1);
+
+ printk(KERN_ALERT "ds-%lu: Panic request from "
+ "LDOM manager received.\n", dp->id);
+
+ memset(&pkt, 0, sizeof(pkt));
+ pkt.data.tag.type = DS_DATA;
+ pkt.data.tag.len = sizeof(pkt) - sizeof(struct ds_msg_tag);
+ pkt.data.handle = cp->handle;
+ pkt.res.req_num = rp->req_num;
+ pkt.res.result = DS_OK;
+ pkt.res.reason[0] = 0;
+
+ ds_send(lp, &pkt, sizeof(pkt));
+
+ panic("PANIC requested by LDOM manager.");
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+struct dr_cpu_tag {
+ __u64 req_num;
+ __u32 type;
+#define DR_CPU_CONFIGURE 0x43
+#define DR_CPU_UNCONFIGURE 0x55
+#define DR_CPU_FORCE_UNCONFIGURE 0x46
+#define DR_CPU_STATUS 0x53
+
+/* Responses */
+#define DR_CPU_OK 0x6f
+#define DR_CPU_ERROR 0x65
+
+ __u32 num_records;
+};
+
+struct dr_cpu_resp_entry {
+ __u32 cpu;
+ __u32 result;
+#define DR_CPU_RES_OK 0x00
+#define DR_CPU_RES_FAILURE 0x01
+#define DR_CPU_RES_BLOCKED 0x02
+#define DR_CPU_RES_CPU_NOT_RESPONDING 0x03
+#define DR_CPU_RES_NOT_IN_MD 0x04
+
+ __u32 stat;
+#define DR_CPU_STAT_NOT_PRESENT 0x00
+#define DR_CPU_STAT_UNCONFIGURED 0x01
+#define DR_CPU_STAT_CONFIGURED 0x02
+
+ __u32 str_off;
+};
+
+static void __dr_cpu_send_error(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ struct ds_data *data)
+{
+ struct dr_cpu_tag *tag = (struct dr_cpu_tag *) (data + 1);
+ struct {
+ struct ds_data data;
+ struct dr_cpu_tag tag;
+ } pkt;
+ int msg_len;
+
+ memset(&pkt, 0, sizeof(pkt));
+ pkt.data.tag.type = DS_DATA;
+ pkt.data.handle = cp->handle;
+ pkt.tag.req_num = tag->req_num;
+ pkt.tag.type = DR_CPU_ERROR;
+ pkt.tag.num_records = 0;
+
+ msg_len = (sizeof(struct ds_data) +
+ sizeof(struct dr_cpu_tag));
+
+ pkt.data.tag.len = msg_len - sizeof(struct ds_msg_tag);
+
+ __ds_send(dp->lp, &pkt, msg_len);
+}
+
+static void dr_cpu_send_error(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ struct ds_data *data)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ds_lock, flags);
+ __dr_cpu_send_error(dp, cp, data);
+ spin_unlock_irqrestore(&ds_lock, flags);
+}
+
+#define CPU_SENTINEL 0xffffffff
+
+static void purge_dups(u32 *list, u32 num_ents)
+{
+ unsigned int i;
+
+ for (i = 0; i < num_ents; i++) {
+ u32 cpu = list[i];
+ unsigned int j;
+
+ if (cpu == CPU_SENTINEL)
+ continue;
+
+ for (j = i + 1; j < num_ents; j++) {
+ if (list[j] == cpu)
+ list[j] = CPU_SENTINEL;
+ }
+ }
+}
+
+static int dr_cpu_size_response(int ncpus)
+{
+ return (sizeof(struct ds_data) +
+ sizeof(struct dr_cpu_tag) +
+ (sizeof(struct dr_cpu_resp_entry) * ncpus));
+}
+
+static void dr_cpu_init_response(struct ds_data *resp, u64 req_num,
+ u64 handle, int resp_len, int ncpus,
+ cpumask_t *mask, u32 default_stat)
+{
+ struct dr_cpu_resp_entry *ent;
+ struct dr_cpu_tag *tag;
+ int i, cpu;
+
+ tag = (struct dr_cpu_tag *) (resp + 1);
+ ent = (struct dr_cpu_resp_entry *) (tag + 1);
+
+ resp->tag.type = DS_DATA;
+ resp->tag.len = resp_len - sizeof(struct ds_msg_tag);
+ resp->handle = handle;
+ tag->req_num = req_num;
+ tag->type = DR_CPU_OK;
+ tag->num_records = ncpus;
+
+ i = 0;
+ for_each_cpu_mask(cpu, *mask) {
+ ent[i].cpu = cpu;
+ ent[i].result = DR_CPU_RES_OK;
+ ent[i].stat = default_stat;
+ i++;
+ }
+ BUG_ON(i != ncpus);
+}
+
+static void dr_cpu_mark(struct ds_data *resp, int cpu, int ncpus,
+ u32 res, u32 stat)
+{
+ struct dr_cpu_resp_entry *ent;
+ struct dr_cpu_tag *tag;
+ int i;
+
+ tag = (struct dr_cpu_tag *) (resp + 1);
+ ent = (struct dr_cpu_resp_entry *) (tag + 1);
+
+ for (i = 0; i < ncpus; i++) {
+ if (ent[i].cpu != cpu)
+ continue;
+ ent[i].result = res;
+ ent[i].stat = stat;
+ break;
+ }
+}
+
+static int __cpuinit dr_cpu_configure(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ u64 req_num,
+ cpumask_t *mask)
+{
+ struct ds_data *resp;
+ int resp_len, ncpus, cpu;
+ unsigned long flags;
+
+ ncpus = cpus_weight(*mask);
+ resp_len = dr_cpu_size_response(ncpus);
+ resp = kzalloc(resp_len, GFP_KERNEL);
+ if (!resp)
+ return -ENOMEM;
+
+ dr_cpu_init_response(resp, req_num, cp->handle,
+ resp_len, ncpus, mask,
+ DR_CPU_STAT_CONFIGURED);
+
+ mdesc_fill_in_cpu_data(*mask);
+
+ for_each_cpu_mask(cpu, *mask) {
+ int err;
+
+ printk(KERN_INFO "ds-%lu: Starting cpu %d...\n",
+ dp->id, cpu);
+ err = cpu_up(cpu);
+ if (err) {
+ __u32 res = DR_CPU_RES_FAILURE;
+ __u32 stat = DR_CPU_STAT_UNCONFIGURED;
+
+ if (!cpu_present(cpu)) {
+ /* CPU not present in MD */
+ res = DR_CPU_RES_NOT_IN_MD;
+ stat = DR_CPU_STAT_NOT_PRESENT;
+ } else if (err == -ENODEV) {
+ /* CPU did not call in successfully */
+ res = DR_CPU_RES_CPU_NOT_RESPONDING;
+ }
+
+ printk(KERN_INFO "ds-%lu: CPU startup failed err=%d\n",
+ dp->id, err);
+ dr_cpu_mark(resp, cpu, ncpus, res, stat);
+ }
+ }
+
+ spin_lock_irqsave(&ds_lock, flags);
+ __ds_send(dp->lp, resp, resp_len);
+ spin_unlock_irqrestore(&ds_lock, flags);
+
+ kfree(resp);
+
+ /* Redistribute IRQs, taking into account the new cpus. */
+ fixup_irqs();
+
+ return 0;
+}
+
+static int dr_cpu_unconfigure(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ u64 req_num,
+ cpumask_t *mask)
+{
+ struct ds_data *resp;
+ int resp_len, ncpus, cpu;
+ unsigned long flags;
+
+ ncpus = cpus_weight(*mask);
+ resp_len = dr_cpu_size_response(ncpus);
+ resp = kzalloc(resp_len, GFP_KERNEL);
+ if (!resp)
+ return -ENOMEM;
+
+ dr_cpu_init_response(resp, req_num, cp->handle,
+ resp_len, ncpus, mask,
+ DR_CPU_STAT_UNCONFIGURED);
+
+ for_each_cpu_mask(cpu, *mask) {
+ int err;
+
+ printk(KERN_INFO "ds-%lu: Shutting down cpu %d...\n",
+ dp->id, cpu);
+ err = cpu_down(cpu);
+ if (err)
+ dr_cpu_mark(resp, cpu, ncpus,
+ DR_CPU_RES_FAILURE,
+ DR_CPU_STAT_CONFIGURED);
+ }
+
+ spin_lock_irqsave(&ds_lock, flags);
+ __ds_send(dp->lp, resp, resp_len);
+ spin_unlock_irqrestore(&ds_lock, flags);
+
+ kfree(resp);
+
+ return 0;
+}
+
+static void __cpuinit dr_cpu_data(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ void *buf, int len)
+{
+ struct ds_data *data = buf;
+ struct dr_cpu_tag *tag = (struct dr_cpu_tag *) (data + 1);
+ u32 *cpu_list = (u32 *) (tag + 1);
+ u64 req_num = tag->req_num;
+ cpumask_t mask;
+ unsigned int i;
+ int err;
+
+ switch (tag->type) {
+ case DR_CPU_CONFIGURE:
+ case DR_CPU_UNCONFIGURE:
+ case DR_CPU_FORCE_UNCONFIGURE:
+ break;
+
+ default:
+ dr_cpu_send_error(dp, cp, data);
+ return;
+ }
+
+ purge_dups(cpu_list, tag->num_records);
+
+ cpus_clear(mask);
+ for (i = 0; i < tag->num_records; i++) {
+ if (cpu_list[i] == CPU_SENTINEL)
+ continue;
+
+ if (cpu_list[i] < NR_CPUS)
+ cpu_set(cpu_list[i], mask);
+ }
+
+ if (tag->type == DR_CPU_CONFIGURE)
+ err = dr_cpu_configure(dp, cp, req_num, &mask);
+ else
+ err = dr_cpu_unconfigure(dp, cp, req_num, &mask);
+
+ if (err)
+ dr_cpu_send_error(dp, cp, data);
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+struct ds_pri_msg {
+ __u64 req_num;
+ __u64 type;
+#define DS_PRI_REQUEST 0x00
+#define DS_PRI_DATA 0x01
+#define DS_PRI_UPDATE 0x02
+};
+
+static void ds_pri_data(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ void *buf, int len)
+{
+ struct ds_data *dpkt = buf;
+ struct ds_pri_msg *rp;
+
+ rp = (struct ds_pri_msg *) (dpkt + 1);
+
+ printk(KERN_INFO "ds-%lu: PRI REQ [%lx:%lx], len=%d\n",
+ dp->id, rp->req_num, rp->type, len);
+}
+
+struct ds_var_hdr {
+ __u32 type;
+#define DS_VAR_SET_REQ 0x00
+#define DS_VAR_DELETE_REQ 0x01
+#define DS_VAR_SET_RESP 0x02
+#define DS_VAR_DELETE_RESP 0x03
+};
+
+struct ds_var_set_msg {
+ struct ds_var_hdr hdr;
+ char name_and_value[0];
+};
+
+struct ds_var_delete_msg {
+ struct ds_var_hdr hdr;
+ char name[0];
+};
+
+struct ds_var_resp {
+ struct ds_var_hdr hdr;
+ __u32 result;
+#define DS_VAR_SUCCESS 0x00
+#define DS_VAR_NO_SPACE 0x01
+#define DS_VAR_INVALID_VAR 0x02
+#define DS_VAR_INVALID_VAL 0x03
+#define DS_VAR_NOT_PRESENT 0x04
+};
+
+static DEFINE_MUTEX(ds_var_mutex);
+static int ds_var_doorbell;
+static int ds_var_response;
+
+static void ds_var_data(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ void *buf, int len)
+{
+ struct ds_data *dpkt = buf;
+ struct ds_var_resp *rp;
+
+ rp = (struct ds_var_resp *) (dpkt + 1);
+
+ if (rp->hdr.type != DS_VAR_SET_RESP &&
+ rp->hdr.type != DS_VAR_DELETE_RESP)
+ return;
+
+ ds_var_response = rp->result;
+ wmb();
+ ds_var_doorbell = 1;
+}
+
+void ldom_set_var(const char *var, const char *value)
+{
+ struct ds_cap_state *cp;
+ struct ds_info *dp;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ds_lock, flags);
+ cp = NULL;
+ for (dp = ds_info_list; dp; dp = dp->next) {
+ struct ds_cap_state *tmp;
+
+ tmp = find_cap_by_string(dp, "var-config");
+ if (tmp && tmp->state == CAP_STATE_REGISTERED) {
+ cp = tmp;
+ break;
+ }
+ }
+ if (!cp) {
+ for (dp = ds_info_list; dp; dp = dp->next) {
+ struct ds_cap_state *tmp;
+
+ tmp = find_cap_by_string(dp, "var-config-backup");
+ if (tmp && tmp->state == CAP_STATE_REGISTERED) {
+ cp = tmp;
+ break;
+ }
+ }
+ }
+ spin_unlock_irqrestore(&ds_lock, flags);
+
+ if (cp) {
+ union {
+ struct {
+ struct ds_data data;
+ struct ds_var_set_msg msg;
+ } header;
+ char all[512];
+ } pkt;
+ char *base, *p;
+ int msg_len, loops;
+
+ memset(&pkt, 0, sizeof(pkt));
+ pkt.header.data.tag.type = DS_DATA;
+ pkt.header.data.handle = cp->handle;
+ pkt.header.msg.hdr.type = DS_VAR_SET_REQ;
+ base = p = &pkt.header.msg.name_and_value[0];
+ strcpy(p, var);
+ p += strlen(var) + 1;
+ strcpy(p, value);
+ p += strlen(value) + 1;
+
+ msg_len = (sizeof(struct ds_data) +
+ sizeof(struct ds_var_set_msg) +
+ (p - base));
+ msg_len = (msg_len + 3) & ~3;
+ pkt.header.data.tag.len = msg_len - sizeof(struct ds_msg_tag);
+
+ mutex_lock(&ds_var_mutex);
+
+ spin_lock_irqsave(&ds_lock, flags);
+ ds_var_doorbell = 0;
+ ds_var_response = -1;
+
+ __ds_send(dp->lp, &pkt, msg_len);
+ spin_unlock_irqrestore(&ds_lock, flags);
+
+ loops = 1000;
+ while (ds_var_doorbell == 0) {
+ if (loops-- < 0)
+ break;
+ barrier();
+ udelay(100);
+ }
+
+ mutex_unlock(&ds_var_mutex);
+
+ if (ds_var_doorbell == 0 ||
+ ds_var_response != DS_VAR_SUCCESS)
+ printk(KERN_ERR "ds-%lu: var-config [%s:%s] "
+ "failed, response(%d).\n",
+ dp->id, var, value,
+ ds_var_response);
+ } else {
+ printk(KERN_ERR PFX "var-config not registered so "
+ "could not set (%s) variable to (%s).\n",
+ var, value);
+ }
+}
+
+void ldom_reboot(const char *boot_command)
+{
+ /* Don't bother with any of this if the boot_command
+ * is empty.
+ */
+ if (boot_command && strlen(boot_command)) {
+ char full_boot_str[256];
+
+ strcpy(full_boot_str, "boot ");
+ strcpy(full_boot_str + strlen("boot "), boot_command);
+
+ ldom_set_var("reboot-command", full_boot_str);
+ }
+ sun4v_mach_sir();
+}
+
+void ldom_power_off(void)
+{
+ sun4v_mach_exit(0);
+}
+
+static void ds_conn_reset(struct ds_info *dp)
+{
+ printk(KERN_ERR "ds-%lu: ds_conn_reset() from %p\n",
+ dp->id, __builtin_return_address(0));
+}
+
+static int register_services(struct ds_info *dp)
+{
+ struct ldc_channel *lp = dp->lp;
+ int i;
+
+ for (i = 0; i < dp->num_ds_states; i++) {
+ struct {
+ struct ds_reg_req req;
+ u8 id_buf[256];
+ } pbuf;
+ struct ds_cap_state *cp = &dp->ds_states[i];
+ int err, msg_len;
+ u64 new_count;
+
+ if (cp->state == CAP_STATE_REGISTERED)
+ continue;
+
+ new_count = sched_clock() & 0xffffffff;
+ cp->handle = ((u64) i << 32) | new_count;
+
+ msg_len = (sizeof(struct ds_reg_req) +
+ strlen(cp->service_id));
+
+ memset(&pbuf, 0, sizeof(pbuf));
+ pbuf.req.tag.type = DS_REG_REQ;
+ pbuf.req.tag.len = (msg_len - sizeof(struct ds_msg_tag));
+ pbuf.req.handle = cp->handle;
+ pbuf.req.major = 1;
+ pbuf.req.minor = 0;
+ strcpy(pbuf.req.svc_id, cp->service_id);
+
+ err = __ds_send(lp, &pbuf, msg_len);
+ if (err > 0)
+ cp->state = CAP_STATE_REG_SENT;
+ }
+ return 0;
+}
+
+static int ds_handshake(struct ds_info *dp, struct ds_msg_tag *pkt)
+{
+
+ if (dp->hs_state == DS_HS_START) {
+ if (pkt->type != DS_INIT_ACK)
+ goto conn_reset;
+
+ dp->hs_state = DS_HS_DONE;
+
+ return register_services(dp);
+ }
+
+ if (dp->hs_state != DS_HS_DONE)
+ goto conn_reset;
+
+ if (pkt->type == DS_REG_ACK) {
+ struct ds_reg_ack *ap = (struct ds_reg_ack *) pkt;
+ struct ds_cap_state *cp = find_cap(dp, ap->handle);
+
+ if (!cp) {
+ printk(KERN_ERR "ds-%lu: REG ACK for unknown "
+ "handle %lx\n", dp->id, ap->handle);
+ return 0;
+ }
+ printk(KERN_INFO "ds-%lu: Registered %s service.\n",
+ dp->id, cp->service_id);
+ cp->state = CAP_STATE_REGISTERED;
+ } else if (pkt->type == DS_REG_NACK) {
+ struct ds_reg_nack *np = (struct ds_reg_nack *) pkt;
+ struct ds_cap_state *cp = find_cap(dp, np->handle);
+
+ if (!cp) {
+ printk(KERN_ERR "ds-%lu: REG NACK for "
+ "unknown handle %lx\n",
+ dp->id, np->handle);
+ return 0;
+ }
+ cp->state = CAP_STATE_UNKNOWN;
+ }
+
+ return 0;
+
+conn_reset:
+ ds_conn_reset(dp);
+ return -ECONNRESET;
+}
+
+static void __send_ds_nack(struct ds_info *dp, u64 handle)
+{
+ struct ds_data_nack nack = {
+ .tag = {
+ .type = DS_NACK,
+ .len = (sizeof(struct ds_data_nack) -
+ sizeof(struct ds_msg_tag)),
+ },
+ .handle = handle,
+ .result = DS_INV_HDL,
+ };
+
+ __ds_send(dp->lp, &nack, sizeof(nack));
+}
+
+static LIST_HEAD(ds_work_list);
+static DECLARE_WAIT_QUEUE_HEAD(ds_wait);
+
+struct ds_queue_entry {
+ struct list_head list;
+ struct ds_info *dp;
+ int req_len;
+ int __pad;
+ u64 req[0];
+};
+
+static void process_ds_work(void)
+{
+ struct ds_queue_entry *qp, *tmp;
+ unsigned long flags;
+ LIST_HEAD(todo);
+
+ spin_lock_irqsave(&ds_lock, flags);
+ list_splice_init(&ds_work_list, &todo);
+ spin_unlock_irqrestore(&ds_lock, flags);
+
+ list_for_each_entry_safe(qp, tmp, &todo, list) {
+ struct ds_data *dpkt = (struct ds_data *) qp->req;
+ struct ds_info *dp = qp->dp;
+ struct ds_cap_state *cp = find_cap(dp, dpkt->handle);
+ int req_len = qp->req_len;
+
+ if (!cp) {
+ printk(KERN_ERR "ds-%lu: Data for unknown "
+ "handle %lu\n",
+ dp->id, dpkt->handle);
+
+ spin_lock_irqsave(&ds_lock, flags);
+ __send_ds_nack(dp, dpkt->handle);
+ spin_unlock_irqrestore(&ds_lock, flags);
+ } else {
+ cp->data(dp, cp, dpkt, req_len);
+ }
+
+ list_del(&qp->list);
+ kfree(qp);
+ }
+}
+
+static int ds_thread(void *__unused)
+{
+ DEFINE_WAIT(wait);
+
+ while (1) {
+ prepare_to_wait(&ds_wait, &wait, TASK_INTERRUPTIBLE);
+ if (list_empty(&ds_work_list))
+ schedule();
+ finish_wait(&ds_wait, &wait);
+
+ if (kthread_should_stop())
+ break;
+
+ process_ds_work();
+ }
+
+ return 0;
+}
+
+static int ds_data(struct ds_info *dp, struct ds_msg_tag *pkt, int len)
+{
+ struct ds_data *dpkt = (struct ds_data *) pkt;
+ struct ds_queue_entry *qp;
+
+ qp = kmalloc(sizeof(struct ds_queue_entry) + len, GFP_ATOMIC);
+ if (!qp) {
+ __send_ds_nack(dp, dpkt->handle);
+ } else {
+ qp->dp = dp;
+ memcpy(&qp->req, pkt, len);
+ list_add_tail(&qp->list, &ds_work_list);
+ wake_up(&ds_wait);
+ }
+ return 0;
+}
+
+static void ds_up(struct ds_info *dp)
+{
+ struct ldc_channel *lp = dp->lp;
+ struct ds_ver_req req;
+ int err;
+
+ req.tag.type = DS_INIT_REQ;
+ req.tag.len = sizeof(req) - sizeof(struct ds_msg_tag);
+ req.ver.major = 1;
+ req.ver.minor = 0;
+
+ err = __ds_send(lp, &req, sizeof(req));
+ if (err > 0)
+ dp->hs_state = DS_HS_START;
+}
+
+static void ds_reset(struct ds_info *dp)
+{
+ int i;
+
+ dp->hs_state = 0;
+
+ for (i = 0; i < dp->num_ds_states; i++) {
+ struct ds_cap_state *cp = &dp->ds_states[i];
+
+ cp->state = CAP_STATE_UNKNOWN;
+ }
+}
+
+static void ds_event(void *arg, int event)
+{
+ struct ds_info *dp = arg;
+ struct ldc_channel *lp = dp->lp;
+ unsigned long flags;
+ int err;
+
+ spin_lock_irqsave(&ds_lock, flags);
+
+ if (event == LDC_EVENT_UP) {
+ ds_up(dp);
+ spin_unlock_irqrestore(&ds_lock, flags);
+ return;
+ }
+
+ if (event == LDC_EVENT_RESET) {
+ ds_reset(dp);
+ spin_unlock_irqrestore(&ds_lock, flags);
+ return;
+ }
+
+ if (event != LDC_EVENT_DATA_READY) {
+ printk(KERN_WARNING "ds-%lu: Unexpected LDC event %d\n",
+ dp->id, event);
+ spin_unlock_irqrestore(&ds_lock, flags);
+ return;
+ }
+
+ err = 0;
+ while (1) {
+ struct ds_msg_tag *tag;
+
+ err = ldc_read(lp, dp->rcv_buf, sizeof(*tag));
+
+ if (unlikely(err < 0)) {
+ if (err == -ECONNRESET)
+ ds_conn_reset(dp);
+ break;
+ }
+ if (err == 0)
+ break;
+
+ tag = dp->rcv_buf;
+ err = ldc_read(lp, tag + 1, tag->len);
+
+ if (unlikely(err < 0)) {
+ if (err == -ECONNRESET)
+ ds_conn_reset(dp);
+ break;
+ }
+ if (err < tag->len)
+ break;
+
+ if (tag->type < DS_DATA)
+ err = ds_handshake(dp, dp->rcv_buf);
+ else
+ err = ds_data(dp, dp->rcv_buf,
+ sizeof(*tag) + err);
+ if (err == -ECONNRESET)
+ break;
+ }
+
+ spin_unlock_irqrestore(&ds_lock, flags);
+}
+
+static int __devinit ds_probe(struct vio_dev *vdev,
+ const struct vio_device_id *id)
+{
+ static int ds_version_printed;
+ struct ldc_channel_config ds_cfg = {
+ .event = ds_event,
+ .mtu = 4096,
+ .mode = LDC_MODE_STREAM,
+ };
+ struct mdesc_handle *hp;
+ struct ldc_channel *lp;
+ struct ds_info *dp;
+ const u64 *val;
+ int err, i;
+
+ if (ds_version_printed++ == 0)
+ printk(KERN_INFO "%s", version);
+
+ dp = kzalloc(sizeof(*dp), GFP_KERNEL);
+ err = -ENOMEM;
+ if (!dp)
+ goto out_err;
+
+ hp = mdesc_grab();
+ val = mdesc_get_property(hp, vdev->mp, "id", NULL);
+ if (val)
+ dp->id = *val;
+ mdesc_release(hp);
+
+ dp->rcv_buf = kzalloc(4096, GFP_KERNEL);
+ if (!dp->rcv_buf)
+ goto out_free_dp;
+
+ dp->rcv_buf_len = 4096;
+
+ dp->ds_states = kzalloc(sizeof(ds_states_template),
+ GFP_KERNEL);
+ if (!dp->ds_states)
+ goto out_free_rcv_buf;
+
+ memcpy(dp->ds_states, ds_states_template,
+ sizeof(ds_states_template));
+ dp->num_ds_states = ARRAY_SIZE(ds_states_template);
+
+ for (i = 0; i < dp->num_ds_states; i++)
+ dp->ds_states[i].handle = ((u64)i << 32);
+
+ ds_cfg.tx_irq = vdev->tx_irq;
+ ds_cfg.rx_irq = vdev->rx_irq;
+
+ lp = ldc_alloc(vdev->channel_id, &ds_cfg, dp);
+ if (IS_ERR(lp)) {
+ err = PTR_ERR(lp);
+ goto out_free_ds_states;
+ }
+ dp->lp = lp;
+
+ err = ldc_bind(lp, "DS");
+ if (err)
+ goto out_free_ldc;
+
+ spin_lock_irq(&ds_lock);
+ dp->next = ds_info_list;
+ ds_info_list = dp;
+ spin_unlock_irq(&ds_lock);
+
+ return err;
+
+out_free_ldc:
+ ldc_free(dp->lp);
+
+out_free_ds_states:
+ kfree(dp->ds_states);
+
+out_free_rcv_buf:
+ kfree(dp->rcv_buf);
+
+out_free_dp:
+ kfree(dp);
+
+out_err:
+ return err;
+}
+
+static int ds_remove(struct vio_dev *vdev)
+{
+ return 0;
+}
+
+static struct vio_device_id __initdata ds_match[] = {
+ {
+ .type = "domain-services-port",
+ },
+ {},
+};
+
+static struct vio_driver ds_driver = {
+ .id_table = ds_match,
+ .probe = ds_probe,
+ .remove = ds_remove,
+ .driver = {
+ .name = "ds",
+ .owner = THIS_MODULE,
+ }
+};
+
+static int __init ds_init(void)
+{
+ kthread_run(ds_thread, NULL, "kldomd");
+
+ return vio_register_driver(&ds_driver);
+}
+
+subsys_initcall(ds_init);
diff --git a/arch/sparc/kernel/dtlb_miss.S b/arch/sparc/kernel/dtlb_miss.S
new file mode 100644
index 000000000000..09a6a15a7105
--- /dev/null
+++ b/arch/sparc/kernel/dtlb_miss.S
@@ -0,0 +1,39 @@
+/* DTLB ** ICACHE line 1: Context 0 check and TSB load */
+ ldxa [%g0] ASI_DMMU_TSB_8KB_PTR, %g1 ! Get TSB 8K pointer
+ ldxa [%g0] ASI_DMMU, %g6 ! Get TAG TARGET
+ srlx %g6, 48, %g5 ! Get context
+ sllx %g6, 22, %g6 ! Zero out context
+ brz,pn %g5, kvmap_dtlb ! Context 0 processing
+ srlx %g6, 22, %g6 ! Delay slot
+ TSB_LOAD_QUAD(%g1, %g4) ! Load TSB entry
+ cmp %g4, %g6 ! Compare TAG
+
+/* DTLB ** ICACHE line 2: TSB compare and TLB load */
+ bne,pn %xcc, tsb_miss_dtlb ! Miss
+ mov FAULT_CODE_DTLB, %g3
+ stxa %g5, [%g0] ASI_DTLB_DATA_IN ! Load TLB
+ retry ! Trap done
+ nop
+ nop
+ nop
+ nop
+
+/* DTLB ** ICACHE line 3: */
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+
+/* DTLB ** ICACHE line 4: */
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
diff --git a/arch/sparc/kernel/dtlb_prot.S b/arch/sparc/kernel/dtlb_prot.S
new file mode 100644
index 000000000000..b2c2c5be281c
--- /dev/null
+++ b/arch/sparc/kernel/dtlb_prot.S
@@ -0,0 +1,54 @@
+/*
+ * dtlb_prot.S: DTLB protection trap strategy.
+ * This is included directly into the trap table.
+ *
+ * Copyright (C) 1996,1998 David S. Miller (davem@redhat.com)
+ * Copyright (C) 1997,1998 Jakub Jelinek (jj@ultra.linux.cz)
+ */
+
+/* Ways we can get here:
+ *
+ * [TL == 0] 1) User stores to readonly pages.
+ * [TL == 0] 2) Nucleus stores to user readonly pages.
+ * [TL > 0] 3) Nucleus stores to user readonly stack frame.
+ */
+
+/* PROT ** ICACHE line 1: User DTLB protection trap */
+ mov TLB_SFSR, %g1
+ stxa %g0, [%g1] ASI_DMMU ! Clear FaultValid bit
+ membar #Sync ! Synchronize stores
+ rdpr %pstate, %g5 ! Move into alt-globals
+ wrpr %g5, PSTATE_AG|PSTATE_MG, %pstate
+ rdpr %tl, %g1 ! Need a winfixup?
+ cmp %g1, 1 ! Trap level >1?
+ mov TLB_TAG_ACCESS, %g4 ! For reload of vaddr
+
+/* PROT ** ICACHE line 2: More real fault processing */
+ bgu,pn %xcc, winfix_trampoline ! Yes, perform winfixup
+ ldxa [%g4] ASI_DMMU, %g5 ! Put tagaccess in %g5
+ ba,pt %xcc, sparc64_realfault_common ! Nope, normal fault
+ mov FAULT_CODE_DTLB | FAULT_CODE_WRITE, %g4
+ nop
+ nop
+ nop
+ nop
+
+/* PROT ** ICACHE line 3: Unused... */
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+
+/* PROT ** ICACHE line 4: Unused... */
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
diff --git a/arch/sparc/kernel/ebus.c b/arch/sparc/kernel/ebus.c
new file mode 100644
index 000000000000..77dbf6d45faf
--- /dev/null
+++ b/arch/sparc/kernel/ebus.c
@@ -0,0 +1,257 @@
+/* ebus.c: EBUS DMA library code.
+ *
+ * Copyright (C) 1997 Eddie C. Dost (ecd@skynet.be)
+ * Copyright (C) 1999 David S. Miller (davem@redhat.com)
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+
+#include <asm/ebus_dma.h>
+#include <asm/io.h>
+
+#define EBDMA_CSR 0x00UL /* Control/Status */
+#define EBDMA_ADDR 0x04UL /* DMA Address */
+#define EBDMA_COUNT 0x08UL /* DMA Count */
+
+#define EBDMA_CSR_INT_PEND 0x00000001
+#define EBDMA_CSR_ERR_PEND 0x00000002
+#define EBDMA_CSR_DRAIN 0x00000004
+#define EBDMA_CSR_INT_EN 0x00000010
+#define EBDMA_CSR_RESET 0x00000080
+#define EBDMA_CSR_WRITE 0x00000100
+#define EBDMA_CSR_EN_DMA 0x00000200
+#define EBDMA_CSR_CYC_PEND 0x00000400
+#define EBDMA_CSR_DIAG_RD_DONE 0x00000800
+#define EBDMA_CSR_DIAG_WR_DONE 0x00001000
+#define EBDMA_CSR_EN_CNT 0x00002000
+#define EBDMA_CSR_TC 0x00004000
+#define EBDMA_CSR_DIS_CSR_DRN 0x00010000
+#define EBDMA_CSR_BURST_SZ_MASK 0x000c0000
+#define EBDMA_CSR_BURST_SZ_1 0x00080000
+#define EBDMA_CSR_BURST_SZ_4 0x00000000
+#define EBDMA_CSR_BURST_SZ_8 0x00040000
+#define EBDMA_CSR_BURST_SZ_16 0x000c0000
+#define EBDMA_CSR_DIAG_EN 0x00100000
+#define EBDMA_CSR_DIS_ERR_PEND 0x00400000
+#define EBDMA_CSR_TCI_DIS 0x00800000
+#define EBDMA_CSR_EN_NEXT 0x01000000
+#define EBDMA_CSR_DMA_ON 0x02000000
+#define EBDMA_CSR_A_LOADED 0x04000000
+#define EBDMA_CSR_NA_LOADED 0x08000000
+#define EBDMA_CSR_DEV_ID_MASK 0xf0000000
+
+#define EBUS_DMA_RESET_TIMEOUT 10000
+
+static void __ebus_dma_reset(struct ebus_dma_info *p, int no_drain)
+{
+ int i;
+ u32 val = 0;
+
+ writel(EBDMA_CSR_RESET, p->regs + EBDMA_CSR);
+ udelay(1);
+
+ if (no_drain)
+ return;
+
+ for (i = EBUS_DMA_RESET_TIMEOUT; i > 0; i--) {
+ val = readl(p->regs + EBDMA_CSR);
+
+ if (!(val & (EBDMA_CSR_DRAIN | EBDMA_CSR_CYC_PEND)))
+ break;
+ udelay(10);
+ }
+}
+
+static irqreturn_t ebus_dma_irq(int irq, void *dev_id)
+{
+ struct ebus_dma_info *p = dev_id;
+ unsigned long flags;
+ u32 csr = 0;
+
+ spin_lock_irqsave(&p->lock, flags);
+ csr = readl(p->regs + EBDMA_CSR);
+ writel(csr, p->regs + EBDMA_CSR);
+ spin_unlock_irqrestore(&p->lock, flags);
+
+ if (csr & EBDMA_CSR_ERR_PEND) {
+ printk(KERN_CRIT "ebus_dma(%s): DMA error!\n", p->name);
+ p->callback(p, EBUS_DMA_EVENT_ERROR, p->client_cookie);
+ return IRQ_HANDLED;
+ } else if (csr & EBDMA_CSR_INT_PEND) {
+ p->callback(p,
+ (csr & EBDMA_CSR_TC) ?
+ EBUS_DMA_EVENT_DMA : EBUS_DMA_EVENT_DEVICE,
+ p->client_cookie);
+ return IRQ_HANDLED;
+ }
+
+ return IRQ_NONE;
+
+}
+
+int ebus_dma_register(struct ebus_dma_info *p)
+{
+ u32 csr;
+
+ if (!p->regs)
+ return -EINVAL;
+ if (p->flags & ~(EBUS_DMA_FLAG_USE_EBDMA_HANDLER |
+ EBUS_DMA_FLAG_TCI_DISABLE))
+ return -EINVAL;
+ if ((p->flags & EBUS_DMA_FLAG_USE_EBDMA_HANDLER) && !p->callback)
+ return -EINVAL;
+ if (!strlen(p->name))
+ return -EINVAL;
+
+ __ebus_dma_reset(p, 1);
+
+ csr = EBDMA_CSR_BURST_SZ_16 | EBDMA_CSR_EN_CNT;
+
+ if (p->flags & EBUS_DMA_FLAG_TCI_DISABLE)
+ csr |= EBDMA_CSR_TCI_DIS;
+
+ writel(csr, p->regs + EBDMA_CSR);
+
+ return 0;
+}
+EXPORT_SYMBOL(ebus_dma_register);
+
+int ebus_dma_irq_enable(struct ebus_dma_info *p, int on)
+{
+ unsigned long flags;
+ u32 csr;
+
+ if (on) {
+ if (p->flags & EBUS_DMA_FLAG_USE_EBDMA_HANDLER) {
+ if (request_irq(p->irq, ebus_dma_irq, IRQF_SHARED, p->name, p))
+ return -EBUSY;
+ }
+
+ spin_lock_irqsave(&p->lock, flags);
+ csr = readl(p->regs + EBDMA_CSR);
+ csr |= EBDMA_CSR_INT_EN;
+ writel(csr, p->regs + EBDMA_CSR);
+ spin_unlock_irqrestore(&p->lock, flags);
+ } else {
+ spin_lock_irqsave(&p->lock, flags);
+ csr = readl(p->regs + EBDMA_CSR);
+ csr &= ~EBDMA_CSR_INT_EN;
+ writel(csr, p->regs + EBDMA_CSR);
+ spin_unlock_irqrestore(&p->lock, flags);
+
+ if (p->flags & EBUS_DMA_FLAG_USE_EBDMA_HANDLER) {
+ free_irq(p->irq, p);
+ }
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(ebus_dma_irq_enable);
+
+void ebus_dma_unregister(struct ebus_dma_info *p)
+{
+ unsigned long flags;
+ u32 csr;
+ int irq_on = 0;
+
+ spin_lock_irqsave(&p->lock, flags);
+ csr = readl(p->regs + EBDMA_CSR);
+ if (csr & EBDMA_CSR_INT_EN) {
+ csr &= ~EBDMA_CSR_INT_EN;
+ writel(csr, p->regs + EBDMA_CSR);
+ irq_on = 1;
+ }
+ spin_unlock_irqrestore(&p->lock, flags);
+
+ if (irq_on)
+ free_irq(p->irq, p);
+}
+EXPORT_SYMBOL(ebus_dma_unregister);
+
+int ebus_dma_request(struct ebus_dma_info *p, dma_addr_t bus_addr, size_t len)
+{
+ unsigned long flags;
+ u32 csr;
+ int err;
+
+ if (len >= (1 << 24))
+ return -EINVAL;
+
+ spin_lock_irqsave(&p->lock, flags);
+ csr = readl(p->regs + EBDMA_CSR);
+ err = -EINVAL;
+ if (!(csr & EBDMA_CSR_EN_DMA))
+ goto out;
+ err = -EBUSY;
+ if (csr & EBDMA_CSR_NA_LOADED)
+ goto out;
+
+ writel(len, p->regs + EBDMA_COUNT);
+ writel(bus_addr, p->regs + EBDMA_ADDR);
+ err = 0;
+
+out:
+ spin_unlock_irqrestore(&p->lock, flags);
+
+ return err;
+}
+EXPORT_SYMBOL(ebus_dma_request);
+
+void ebus_dma_prepare(struct ebus_dma_info *p, int write)
+{
+ unsigned long flags;
+ u32 csr;
+
+ spin_lock_irqsave(&p->lock, flags);
+ __ebus_dma_reset(p, 0);
+
+ csr = (EBDMA_CSR_INT_EN |
+ EBDMA_CSR_EN_CNT |
+ EBDMA_CSR_BURST_SZ_16 |
+ EBDMA_CSR_EN_NEXT);
+
+ if (write)
+ csr |= EBDMA_CSR_WRITE;
+ if (p->flags & EBUS_DMA_FLAG_TCI_DISABLE)
+ csr |= EBDMA_CSR_TCI_DIS;
+
+ writel(csr, p->regs + EBDMA_CSR);
+
+ spin_unlock_irqrestore(&p->lock, flags);
+}
+EXPORT_SYMBOL(ebus_dma_prepare);
+
+unsigned int ebus_dma_residue(struct ebus_dma_info *p)
+{
+ return readl(p->regs + EBDMA_COUNT);
+}
+EXPORT_SYMBOL(ebus_dma_residue);
+
+unsigned int ebus_dma_addr(struct ebus_dma_info *p)
+{
+ return readl(p->regs + EBDMA_ADDR);
+}
+EXPORT_SYMBOL(ebus_dma_addr);
+
+void ebus_dma_enable(struct ebus_dma_info *p, int on)
+{
+ unsigned long flags;
+ u32 orig_csr, csr;
+
+ spin_lock_irqsave(&p->lock, flags);
+ orig_csr = csr = readl(p->regs + EBDMA_CSR);
+ if (on)
+ csr |= EBDMA_CSR_EN_DMA;
+ else
+ csr &= ~EBDMA_CSR_EN_DMA;
+ if ((orig_csr & EBDMA_CSR_EN_DMA) !=
+ (csr & EBDMA_CSR_EN_DMA))
+ writel(csr, p->regs + EBDMA_CSR);
+ spin_unlock_irqrestore(&p->lock, flags);
+}
+EXPORT_SYMBOL(ebus_dma_enable);
diff --git a/arch/sparc/kernel/entry.h b/arch/sparc/kernel/entry.h
new file mode 100644
index 000000000000..4f53a2395ac6
--- /dev/null
+++ b/arch/sparc/kernel/entry.h
@@ -0,0 +1,229 @@
+#ifndef _ENTRY_H
+#define _ENTRY_H
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
+
+/* irq */
+extern void handler_irq(int irq, struct pt_regs *regs);
+
+#ifdef CONFIG_SPARC32
+/* traps */
+extern void do_hw_interrupt(struct pt_regs *regs, unsigned long type);
+extern void do_illegal_instruction(struct pt_regs *regs, unsigned long pc,
+ unsigned long npc, unsigned long psr);
+
+extern void do_priv_instruction(struct pt_regs *regs, unsigned long pc,
+ unsigned long npc, unsigned long psr);
+extern void do_memaccess_unaligned(struct pt_regs *regs, unsigned long pc,
+ unsigned long npc,
+ unsigned long psr);
+extern void do_fpd_trap(struct pt_regs *regs, unsigned long pc,
+ unsigned long npc, unsigned long psr);
+extern void do_fpe_trap(struct pt_regs *regs, unsigned long pc,
+ unsigned long npc, unsigned long psr);
+extern void handle_tag_overflow(struct pt_regs *regs, unsigned long pc,
+ unsigned long npc, unsigned long psr);
+extern void handle_watchpoint(struct pt_regs *regs, unsigned long pc,
+ unsigned long npc, unsigned long psr);
+extern void handle_reg_access(struct pt_regs *regs, unsigned long pc,
+ unsigned long npc, unsigned long psr);
+extern void handle_cp_disabled(struct pt_regs *regs, unsigned long pc,
+ unsigned long npc, unsigned long psr);
+extern void handle_cp_exception(struct pt_regs *regs, unsigned long pc,
+ unsigned long npc, unsigned long psr);
+
+
+
+/* entry.S */
+extern void fpsave(unsigned long *fpregs, unsigned long *fsr,
+ void *fpqueue, unsigned long *fpqdepth);
+extern void fpload(unsigned long *fpregs, unsigned long *fsr);
+
+#else /* CONFIG_SPARC32 */
+extern void __init per_cpu_patch(void);
+extern void __init sun4v_patch(void);
+extern void __init boot_cpu_id_too_large(int cpu);
+extern unsigned int dcache_parity_tl1_occurred;
+extern unsigned int icache_parity_tl1_occurred;
+
+extern asmlinkage void update_perfctrs(void);
+extern asmlinkage void sparc_breakpoint(struct pt_regs *regs);
+extern void timer_interrupt(int irq, struct pt_regs *regs);
+
+extern void do_notify_resume(struct pt_regs *regs,
+ unsigned long orig_i0,
+ unsigned long thread_info_flags);
+
+extern asmlinkage int syscall_trace_enter(struct pt_regs *regs);
+extern asmlinkage void syscall_trace_leave(struct pt_regs *regs);
+
+extern void bad_trap_tl1(struct pt_regs *regs, long lvl);
+
+extern void do_fpe_common(struct pt_regs *regs);
+extern void do_fpieee(struct pt_regs *regs);
+extern void do_fpother(struct pt_regs *regs);
+extern void do_tof(struct pt_regs *regs);
+extern void do_div0(struct pt_regs *regs);
+extern void do_illegal_instruction(struct pt_regs *regs);
+extern void mem_address_unaligned(struct pt_regs *regs,
+ unsigned long sfar,
+ unsigned long sfsr);
+extern void sun4v_do_mna(struct pt_regs *regs,
+ unsigned long addr,
+ unsigned long type_ctx);
+extern void do_privop(struct pt_regs *regs);
+extern void do_privact(struct pt_regs *regs);
+extern void do_cee(struct pt_regs *regs);
+extern void do_cee_tl1(struct pt_regs *regs);
+extern void do_dae_tl1(struct pt_regs *regs);
+extern void do_iae_tl1(struct pt_regs *regs);
+extern void do_div0_tl1(struct pt_regs *regs);
+extern void do_fpdis_tl1(struct pt_regs *regs);
+extern void do_fpieee_tl1(struct pt_regs *regs);
+extern void do_fpother_tl1(struct pt_regs *regs);
+extern void do_ill_tl1(struct pt_regs *regs);
+extern void do_irq_tl1(struct pt_regs *regs);
+extern void do_lddfmna_tl1(struct pt_regs *regs);
+extern void do_stdfmna_tl1(struct pt_regs *regs);
+extern void do_paw(struct pt_regs *regs);
+extern void do_paw_tl1(struct pt_regs *regs);
+extern void do_vaw(struct pt_regs *regs);
+extern void do_vaw_tl1(struct pt_regs *regs);
+extern void do_tof_tl1(struct pt_regs *regs);
+extern void do_getpsr(struct pt_regs *regs);
+
+extern void spitfire_insn_access_exception(struct pt_regs *regs,
+ unsigned long sfsr,
+ unsigned long sfar);
+extern void spitfire_insn_access_exception_tl1(struct pt_regs *regs,
+ unsigned long sfsr,
+ unsigned long sfar);
+extern void spitfire_data_access_exception(struct pt_regs *regs,
+ unsigned long sfsr,
+ unsigned long sfar);
+extern void spitfire_data_access_exception_tl1(struct pt_regs *regs,
+ unsigned long sfsr,
+ unsigned long sfar);
+extern void spitfire_access_error(struct pt_regs *regs,
+ unsigned long status_encoded,
+ unsigned long afar);
+
+extern void cheetah_fecc_handler(struct pt_regs *regs,
+ unsigned long afsr,
+ unsigned long afar);
+extern void cheetah_cee_handler(struct pt_regs *regs,
+ unsigned long afsr,
+ unsigned long afar);
+extern void cheetah_deferred_handler(struct pt_regs *regs,
+ unsigned long afsr,
+ unsigned long afar);
+extern void cheetah_plus_parity_error(int type, struct pt_regs *regs);
+
+extern void sun4v_insn_access_exception(struct pt_regs *regs,
+ unsigned long addr,
+ unsigned long type_ctx);
+extern void sun4v_insn_access_exception_tl1(struct pt_regs *regs,
+ unsigned long addr,
+ unsigned long type_ctx);
+extern void sun4v_data_access_exception(struct pt_regs *regs,
+ unsigned long addr,
+ unsigned long type_ctx);
+extern void sun4v_data_access_exception_tl1(struct pt_regs *regs,
+ unsigned long addr,
+ unsigned long type_ctx);
+extern void sun4v_resum_error(struct pt_regs *regs,
+ unsigned long offset);
+extern void sun4v_resum_overflow(struct pt_regs *regs);
+extern void sun4v_nonresum_error(struct pt_regs *regs,
+ unsigned long offset);
+extern void sun4v_nonresum_overflow(struct pt_regs *regs);
+
+extern unsigned long sun4v_err_itlb_vaddr;
+extern unsigned long sun4v_err_itlb_ctx;
+extern unsigned long sun4v_err_itlb_pte;
+extern unsigned long sun4v_err_itlb_error;
+
+extern void sun4v_itlb_error_report(struct pt_regs *regs, int tl);
+
+extern unsigned long sun4v_err_dtlb_vaddr;
+extern unsigned long sun4v_err_dtlb_ctx;
+extern unsigned long sun4v_err_dtlb_pte;
+extern unsigned long sun4v_err_dtlb_error;
+
+extern void sun4v_dtlb_error_report(struct pt_regs *regs, int tl);
+extern void hypervisor_tlbop_error(unsigned long err,
+ unsigned long op);
+extern void hypervisor_tlbop_error_xcall(unsigned long err,
+ unsigned long op);
+
+/* WARNING: The error trap handlers in assembly know the precise
+ * layout of the following structure.
+ *
+ * C-level handlers in traps.c use this information to log the
+ * error and then determine how to recover (if possible).
+ */
+struct cheetah_err_info {
+/*0x00*/u64 afsr;
+/*0x08*/u64 afar;
+
+ /* D-cache state */
+/*0x10*/u64 dcache_data[4]; /* The actual data */
+/*0x30*/u64 dcache_index; /* D-cache index */
+/*0x38*/u64 dcache_tag; /* D-cache tag/valid */
+/*0x40*/u64 dcache_utag; /* D-cache microtag */
+/*0x48*/u64 dcache_stag; /* D-cache snooptag */
+
+ /* I-cache state */
+/*0x50*/u64 icache_data[8]; /* The actual insns + predecode */
+/*0x90*/u64 icache_index; /* I-cache index */
+/*0x98*/u64 icache_tag; /* I-cache phys tag */
+/*0xa0*/u64 icache_utag; /* I-cache microtag */
+/*0xa8*/u64 icache_stag; /* I-cache snooptag */
+/*0xb0*/u64 icache_upper; /* I-cache upper-tag */
+/*0xb8*/u64 icache_lower; /* I-cache lower-tag */
+
+ /* E-cache state */
+/*0xc0*/u64 ecache_data[4]; /* 32 bytes from staging registers */
+/*0xe0*/u64 ecache_index; /* E-cache index */
+/*0xe8*/u64 ecache_tag; /* E-cache tag/state */
+
+/*0xf0*/u64 __pad[32 - 30];
+};
+#define CHAFSR_INVALID ((u64)-1L)
+
+/* This is allocated at boot time based upon the largest hardware
+ * cpu ID in the system. We allocate two entries per cpu, one for
+ * TL==0 logging and one for TL >= 1 logging.
+ */
+extern struct cheetah_err_info *cheetah_error_log;
+
+/* UPA nodes send interrupt packet to UltraSparc with first data reg
+ * value low 5 (7 on Starfire) bits holding the IRQ identifier being
+ * delivered. We must translate this into a non-vector IRQ so we can
+ * set the softint on this cpu.
+ *
+ * To make processing these packets efficient and race free we use
+ * an array of irq buckets below. The interrupt vector handler in
+ * entry.S feeds incoming packets into per-cpu pil-indexed lists.
+ *
+ * If you make changes to ino_bucket, please update hand coded assembler
+ * of the vectored interrupt trap handler(s) in entry.S and sun4v_ivec.S
+ */
+struct ino_bucket {
+/*0x00*/unsigned long __irq_chain_pa;
+
+ /* Virtual interrupt number assigned to this INO. */
+/*0x08*/unsigned int __virt_irq;
+/*0x0c*/unsigned int __pad;
+};
+
+extern struct ino_bucket *ivector_table;
+extern unsigned long ivector_table_pa;
+
+extern void init_irqwork_curcpu(void);
+extern void __cpuinit sun4v_register_mondo_queues(int this_cpu);
+
+#endif /* CONFIG_SPARC32 */
+#endif /* _ENTRY_H */
diff --git a/arch/sparc/kernel/etrap.S b/arch/sparc/kernel/etrap_32.S
index e806fcdc46db..e806fcdc46db 100644
--- a/arch/sparc/kernel/etrap.S
+++ b/arch/sparc/kernel/etrap_32.S
diff --git a/arch/sparc/kernel/etrap_64.S b/arch/sparc/kernel/etrap_64.S
new file mode 100644
index 000000000000..786b185e6e3f
--- /dev/null
+++ b/arch/sparc/kernel/etrap_64.S
@@ -0,0 +1,236 @@
+/*
+ * etrap.S: Preparing for entry into the kernel on Sparc V9.
+ *
+ * Copyright (C) 1996, 1997 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1997, 1998, 1999 Jakub Jelinek (jj@ultra.linux.cz)
+ */
+
+
+#include <asm/asi.h>
+#include <asm/pstate.h>
+#include <asm/ptrace.h>
+#include <asm/page.h>
+#include <asm/spitfire.h>
+#include <asm/head.h>
+#include <asm/processor.h>
+#include <asm/mmu.h>
+
+#define TASK_REGOFF (THREAD_SIZE-TRACEREG_SZ-STACKFRAME_SZ)
+#define ETRAP_PSTATE1 (PSTATE_TSO | PSTATE_PRIV)
+#define ETRAP_PSTATE2 \
+ (PSTATE_TSO | PSTATE_PEF | PSTATE_PRIV | PSTATE_IE)
+
+/*
+ * On entry, %g7 is return address - 0x4.
+ * %g4 and %g5 will be preserved %l4 and %l5 respectively.
+ */
+
+ .text
+ .align 64
+ .globl etrap_syscall, etrap, etrap_irq, etraptl1
+etrap: rdpr %pil, %g2
+etrap_irq: clr %g3
+etrap_syscall: TRAP_LOAD_THREAD_REG(%g6, %g1)
+ rdpr %tstate, %g1
+ or %g1, %g3, %g1
+ sllx %g2, 20, %g3
+ andcc %g1, TSTATE_PRIV, %g0
+ or %g1, %g3, %g1
+ bne,pn %xcc, 1f
+ sub %sp, STACKFRAME_SZ+TRACEREG_SZ-STACK_BIAS, %g2
+ wrpr %g0, 7, %cleanwin
+
+ sethi %hi(TASK_REGOFF), %g2
+ sethi %hi(TSTATE_PEF), %g3
+ or %g2, %lo(TASK_REGOFF), %g2
+ and %g1, %g3, %g3
+ brnz,pn %g3, 1f
+ add %g6, %g2, %g2
+ wr %g0, 0, %fprs
+1: rdpr %tpc, %g3
+
+ stx %g1, [%g2 + STACKFRAME_SZ + PT_V9_TSTATE]
+ rdpr %tnpc, %g1
+ stx %g3, [%g2 + STACKFRAME_SZ + PT_V9_TPC]
+ rd %y, %g3
+ stx %g1, [%g2 + STACKFRAME_SZ + PT_V9_TNPC]
+ rdpr %tt, %g1
+ st %g3, [%g2 + STACKFRAME_SZ + PT_V9_Y]
+ sethi %hi(PT_REGS_MAGIC), %g3
+ or %g3, %g1, %g1
+ st %g1, [%g2 + STACKFRAME_SZ + PT_V9_MAGIC]
+
+ rdpr %cansave, %g1
+ brnz,pt %g1, etrap_save
+ nop
+
+ rdpr %cwp, %g1
+ add %g1, 2, %g1
+ wrpr %g1, %cwp
+ be,pt %xcc, etrap_user_spill
+ mov ASI_AIUP, %g3
+
+ rdpr %otherwin, %g3
+ brz %g3, etrap_kernel_spill
+ mov ASI_AIUS, %g3
+
+etrap_user_spill:
+
+ wr %g3, 0x0, %asi
+ ldx [%g6 + TI_FLAGS], %g3
+ and %g3, _TIF_32BIT, %g3
+ brnz,pt %g3, etrap_user_spill_32bit
+ nop
+ ba,a,pt %xcc, etrap_user_spill_64bit
+
+etrap_save: save %g2, -STACK_BIAS, %sp
+ mov %g6, %l6
+
+ bne,pn %xcc, 3f
+ mov PRIMARY_CONTEXT, %l4
+ rdpr %canrestore, %g3
+ rdpr %wstate, %g2
+ wrpr %g0, 0, %canrestore
+ sll %g2, 3, %g2
+ mov 1, %l5
+ stb %l5, [%l6 + TI_FPDEPTH]
+
+ wrpr %g3, 0, %otherwin
+ wrpr %g2, 0, %wstate
+ sethi %hi(sparc64_kern_pri_context), %g2
+ ldx [%g2 + %lo(sparc64_kern_pri_context)], %g3
+
+661: stxa %g3, [%l4] ASI_DMMU
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ stxa %g3, [%l4] ASI_MMU
+ .previous
+
+ sethi %hi(KERNBASE), %l4
+ flush %l4
+ mov ASI_AIUS, %l7
+2: mov %g4, %l4
+ mov %g5, %l5
+ add %g7, 4, %l2
+
+ /* Go to trap time globals so we can save them. */
+661: wrpr %g0, ETRAP_PSTATE1, %pstate
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ SET_GL(0)
+ .previous
+
+ stx %g1, [%sp + PTREGS_OFF + PT_V9_G1]
+ stx %g2, [%sp + PTREGS_OFF + PT_V9_G2]
+ sllx %l7, 24, %l7
+ stx %g3, [%sp + PTREGS_OFF + PT_V9_G3]
+ rdpr %cwp, %l0
+ stx %g4, [%sp + PTREGS_OFF + PT_V9_G4]
+ stx %g5, [%sp + PTREGS_OFF + PT_V9_G5]
+ stx %g6, [%sp + PTREGS_OFF + PT_V9_G6]
+ stx %g7, [%sp + PTREGS_OFF + PT_V9_G7]
+ or %l7, %l0, %l7
+ sethi %hi(TSTATE_TSO | TSTATE_PEF), %l0
+ or %l7, %l0, %l7
+ wrpr %l2, %tnpc
+ wrpr %l7, (TSTATE_PRIV | TSTATE_IE), %tstate
+ stx %i0, [%sp + PTREGS_OFF + PT_V9_I0]
+ stx %i1, [%sp + PTREGS_OFF + PT_V9_I1]
+ stx %i2, [%sp + PTREGS_OFF + PT_V9_I2]
+ stx %i3, [%sp + PTREGS_OFF + PT_V9_I3]
+ stx %i4, [%sp + PTREGS_OFF + PT_V9_I4]
+ stx %i5, [%sp + PTREGS_OFF + PT_V9_I5]
+ stx %i6, [%sp + PTREGS_OFF + PT_V9_I6]
+ mov %l6, %g6
+ stx %i7, [%sp + PTREGS_OFF + PT_V9_I7]
+ LOAD_PER_CPU_BASE(%g5, %g6, %g4, %g3, %l1)
+ ldx [%g6 + TI_TASK], %g4
+ done
+
+3: mov ASI_P, %l7
+ ldub [%l6 + TI_FPDEPTH], %l5
+ add %l6, TI_FPSAVED + 1, %l4
+ srl %l5, 1, %l3
+ add %l5, 2, %l5
+ stb %l5, [%l6 + TI_FPDEPTH]
+ ba,pt %xcc, 2b
+ stb %g0, [%l4 + %l3]
+ nop
+
+etraptl1: /* Save tstate/tpc/tnpc of TL 1-->4 and the tl register itself.
+ * We place this right after pt_regs on the trap stack.
+ * The layout is:
+ * 0x00 TL1's TSTATE
+ * 0x08 TL1's TPC
+ * 0x10 TL1's TNPC
+ * 0x18 TL1's TT
+ * ...
+ * 0x58 TL4's TT
+ * 0x60 TL
+ */
+ TRAP_LOAD_THREAD_REG(%g6, %g1)
+ sub %sp, ((4 * 8) * 4) + 8, %g2
+ rdpr %tl, %g1
+
+ wrpr %g0, 1, %tl
+ rdpr %tstate, %g3
+ stx %g3, [%g2 + STACK_BIAS + 0x00]
+ rdpr %tpc, %g3
+ stx %g3, [%g2 + STACK_BIAS + 0x08]
+ rdpr %tnpc, %g3
+ stx %g3, [%g2 + STACK_BIAS + 0x10]
+ rdpr %tt, %g3
+ stx %g3, [%g2 + STACK_BIAS + 0x18]
+
+ wrpr %g0, 2, %tl
+ rdpr %tstate, %g3
+ stx %g3, [%g2 + STACK_BIAS + 0x20]
+ rdpr %tpc, %g3
+ stx %g3, [%g2 + STACK_BIAS + 0x28]
+ rdpr %tnpc, %g3
+ stx %g3, [%g2 + STACK_BIAS + 0x30]
+ rdpr %tt, %g3
+ stx %g3, [%g2 + STACK_BIAS + 0x38]
+
+ sethi %hi(is_sun4v), %g3
+ lduw [%g3 + %lo(is_sun4v)], %g3
+ brnz,pn %g3, finish_tl1_capture
+ nop
+
+ wrpr %g0, 3, %tl
+ rdpr %tstate, %g3
+ stx %g3, [%g2 + STACK_BIAS + 0x40]
+ rdpr %tpc, %g3
+ stx %g3, [%g2 + STACK_BIAS + 0x48]
+ rdpr %tnpc, %g3
+ stx %g3, [%g2 + STACK_BIAS + 0x50]
+ rdpr %tt, %g3
+ stx %g3, [%g2 + STACK_BIAS + 0x58]
+
+ wrpr %g0, 4, %tl
+ rdpr %tstate, %g3
+ stx %g3, [%g2 + STACK_BIAS + 0x60]
+ rdpr %tpc, %g3
+ stx %g3, [%g2 + STACK_BIAS + 0x68]
+ rdpr %tnpc, %g3
+ stx %g3, [%g2 + STACK_BIAS + 0x70]
+ rdpr %tt, %g3
+ stx %g3, [%g2 + STACK_BIAS + 0x78]
+
+ stx %g1, [%g2 + STACK_BIAS + 0x80]
+
+finish_tl1_capture:
+ wrpr %g0, 1, %tl
+661: nop
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ SET_GL(1)
+ .previous
+
+ rdpr %tstate, %g1
+ sub %g2, STACKFRAME_SZ + TRACEREG_SZ - STACK_BIAS, %g2
+ ba,pt %xcc, 1b
+ andcc %g1, TSTATE_PRIV, %g0
+
+#undef TASK_REGOFF
+#undef ETRAP_PSTATE1
diff --git a/arch/sparc/kernel/fpu_traps.S b/arch/sparc/kernel/fpu_traps.S
new file mode 100644
index 000000000000..a6864826a4bd
--- /dev/null
+++ b/arch/sparc/kernel/fpu_traps.S
@@ -0,0 +1,384 @@
+ /* This is trivial with the new code... */
+ .globl do_fpdis
+ .type do_fpdis,#function
+do_fpdis:
+ sethi %hi(TSTATE_PEF), %g4
+ rdpr %tstate, %g5
+ andcc %g5, %g4, %g0
+ be,pt %xcc, 1f
+ nop
+ rd %fprs, %g5
+ andcc %g5, FPRS_FEF, %g0
+ be,pt %xcc, 1f
+ nop
+
+ /* Legal state when DCR_IFPOE is set in Cheetah %dcr. */
+ sethi %hi(109f), %g7
+ ba,pt %xcc, etrap
+109: or %g7, %lo(109b), %g7
+ add %g0, %g0, %g0
+ ba,a,pt %xcc, rtrap
+
+1: TRAP_LOAD_THREAD_REG(%g6, %g1)
+ ldub [%g6 + TI_FPSAVED], %g5
+ wr %g0, FPRS_FEF, %fprs
+ andcc %g5, FPRS_FEF, %g0
+ be,a,pt %icc, 1f
+ clr %g7
+ ldx [%g6 + TI_GSR], %g7
+1: andcc %g5, FPRS_DL, %g0
+ bne,pn %icc, 2f
+ fzero %f0
+ andcc %g5, FPRS_DU, %g0
+ bne,pn %icc, 1f
+ fzero %f2
+ faddd %f0, %f2, %f4
+ fmuld %f0, %f2, %f6
+ faddd %f0, %f2, %f8
+ fmuld %f0, %f2, %f10
+ faddd %f0, %f2, %f12
+ fmuld %f0, %f2, %f14
+ faddd %f0, %f2, %f16
+ fmuld %f0, %f2, %f18
+ faddd %f0, %f2, %f20
+ fmuld %f0, %f2, %f22
+ faddd %f0, %f2, %f24
+ fmuld %f0, %f2, %f26
+ faddd %f0, %f2, %f28
+ fmuld %f0, %f2, %f30
+ faddd %f0, %f2, %f32
+ fmuld %f0, %f2, %f34
+ faddd %f0, %f2, %f36
+ fmuld %f0, %f2, %f38
+ faddd %f0, %f2, %f40
+ fmuld %f0, %f2, %f42
+ faddd %f0, %f2, %f44
+ fmuld %f0, %f2, %f46
+ faddd %f0, %f2, %f48
+ fmuld %f0, %f2, %f50
+ faddd %f0, %f2, %f52
+ fmuld %f0, %f2, %f54
+ faddd %f0, %f2, %f56
+ fmuld %f0, %f2, %f58
+ b,pt %xcc, fpdis_exit2
+ faddd %f0, %f2, %f60
+1: mov SECONDARY_CONTEXT, %g3
+ add %g6, TI_FPREGS + 0x80, %g1
+ faddd %f0, %f2, %f4
+ fmuld %f0, %f2, %f6
+
+661: ldxa [%g3] ASI_DMMU, %g5
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ ldxa [%g3] ASI_MMU, %g5
+ .previous
+
+ sethi %hi(sparc64_kern_sec_context), %g2
+ ldx [%g2 + %lo(sparc64_kern_sec_context)], %g2
+
+661: stxa %g2, [%g3] ASI_DMMU
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ stxa %g2, [%g3] ASI_MMU
+ .previous
+
+ membar #Sync
+ add %g6, TI_FPREGS + 0xc0, %g2
+ faddd %f0, %f2, %f8
+ fmuld %f0, %f2, %f10
+ membar #Sync
+ ldda [%g1] ASI_BLK_S, %f32
+ ldda [%g2] ASI_BLK_S, %f48
+ membar #Sync
+ faddd %f0, %f2, %f12
+ fmuld %f0, %f2, %f14
+ faddd %f0, %f2, %f16
+ fmuld %f0, %f2, %f18
+ faddd %f0, %f2, %f20
+ fmuld %f0, %f2, %f22
+ faddd %f0, %f2, %f24
+ fmuld %f0, %f2, %f26
+ faddd %f0, %f2, %f28
+ fmuld %f0, %f2, %f30
+ b,pt %xcc, fpdis_exit
+ nop
+2: andcc %g5, FPRS_DU, %g0
+ bne,pt %icc, 3f
+ fzero %f32
+ mov SECONDARY_CONTEXT, %g3
+ fzero %f34
+
+661: ldxa [%g3] ASI_DMMU, %g5
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ ldxa [%g3] ASI_MMU, %g5
+ .previous
+
+ add %g6, TI_FPREGS, %g1
+ sethi %hi(sparc64_kern_sec_context), %g2
+ ldx [%g2 + %lo(sparc64_kern_sec_context)], %g2
+
+661: stxa %g2, [%g3] ASI_DMMU
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ stxa %g2, [%g3] ASI_MMU
+ .previous
+
+ membar #Sync
+ add %g6, TI_FPREGS + 0x40, %g2
+ faddd %f32, %f34, %f36
+ fmuld %f32, %f34, %f38
+ membar #Sync
+ ldda [%g1] ASI_BLK_S, %f0
+ ldda [%g2] ASI_BLK_S, %f16
+ membar #Sync
+ faddd %f32, %f34, %f40
+ fmuld %f32, %f34, %f42
+ faddd %f32, %f34, %f44
+ fmuld %f32, %f34, %f46
+ faddd %f32, %f34, %f48
+ fmuld %f32, %f34, %f50
+ faddd %f32, %f34, %f52
+ fmuld %f32, %f34, %f54
+ faddd %f32, %f34, %f56
+ fmuld %f32, %f34, %f58
+ faddd %f32, %f34, %f60
+ fmuld %f32, %f34, %f62
+ ba,pt %xcc, fpdis_exit
+ nop
+3: mov SECONDARY_CONTEXT, %g3
+ add %g6, TI_FPREGS, %g1
+
+661: ldxa [%g3] ASI_DMMU, %g5
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ ldxa [%g3] ASI_MMU, %g5
+ .previous
+
+ sethi %hi(sparc64_kern_sec_context), %g2
+ ldx [%g2 + %lo(sparc64_kern_sec_context)], %g2
+
+661: stxa %g2, [%g3] ASI_DMMU
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ stxa %g2, [%g3] ASI_MMU
+ .previous
+
+ membar #Sync
+ mov 0x40, %g2
+ membar #Sync
+ ldda [%g1] ASI_BLK_S, %f0
+ ldda [%g1 + %g2] ASI_BLK_S, %f16
+ add %g1, 0x80, %g1
+ ldda [%g1] ASI_BLK_S, %f32
+ ldda [%g1 + %g2] ASI_BLK_S, %f48
+ membar #Sync
+fpdis_exit:
+
+661: stxa %g5, [%g3] ASI_DMMU
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ stxa %g5, [%g3] ASI_MMU
+ .previous
+
+ membar #Sync
+fpdis_exit2:
+ wr %g7, 0, %gsr
+ ldx [%g6 + TI_XFSR], %fsr
+ rdpr %tstate, %g3
+ or %g3, %g4, %g3 ! anal...
+ wrpr %g3, %tstate
+ wr %g0, FPRS_FEF, %fprs ! clean DU/DL bits
+ retry
+ .size do_fpdis,.-do_fpdis
+
+ .align 32
+ .type fp_other_bounce,#function
+fp_other_bounce:
+ call do_fpother
+ add %sp, PTREGS_OFF, %o0
+ ba,pt %xcc, rtrap
+ nop
+ .size fp_other_bounce,.-fp_other_bounce
+
+ .align 32
+ .globl do_fpother_check_fitos
+ .type do_fpother_check_fitos,#function
+do_fpother_check_fitos:
+ TRAP_LOAD_THREAD_REG(%g6, %g1)
+ sethi %hi(fp_other_bounce - 4), %g7
+ or %g7, %lo(fp_other_bounce - 4), %g7
+
+ /* NOTE: Need to preserve %g7 until we fully commit
+ * to the fitos fixup.
+ */
+ stx %fsr, [%g6 + TI_XFSR]
+ rdpr %tstate, %g3
+ andcc %g3, TSTATE_PRIV, %g0
+ bne,pn %xcc, do_fptrap_after_fsr
+ nop
+ ldx [%g6 + TI_XFSR], %g3
+ srlx %g3, 14, %g1
+ and %g1, 7, %g1
+ cmp %g1, 2 ! Unfinished FP-OP
+ bne,pn %xcc, do_fptrap_after_fsr
+ sethi %hi(1 << 23), %g1 ! Inexact
+ andcc %g3, %g1, %g0
+ bne,pn %xcc, do_fptrap_after_fsr
+ rdpr %tpc, %g1
+ lduwa [%g1] ASI_AIUP, %g3 ! This cannot ever fail
+#define FITOS_MASK 0xc1f83fe0
+#define FITOS_COMPARE 0x81a01880
+ sethi %hi(FITOS_MASK), %g1
+ or %g1, %lo(FITOS_MASK), %g1
+ and %g3, %g1, %g1
+ sethi %hi(FITOS_COMPARE), %g2
+ or %g2, %lo(FITOS_COMPARE), %g2
+ cmp %g1, %g2
+ bne,pn %xcc, do_fptrap_after_fsr
+ nop
+ std %f62, [%g6 + TI_FPREGS + (62 * 4)]
+ sethi %hi(fitos_table_1), %g1
+ and %g3, 0x1f, %g2
+ or %g1, %lo(fitos_table_1), %g1
+ sllx %g2, 2, %g2
+ jmpl %g1 + %g2, %g0
+ ba,pt %xcc, fitos_emul_continue
+
+fitos_table_1:
+ fitod %f0, %f62
+ fitod %f1, %f62
+ fitod %f2, %f62
+ fitod %f3, %f62
+ fitod %f4, %f62
+ fitod %f5, %f62
+ fitod %f6, %f62
+ fitod %f7, %f62
+ fitod %f8, %f62
+ fitod %f9, %f62
+ fitod %f10, %f62
+ fitod %f11, %f62
+ fitod %f12, %f62
+ fitod %f13, %f62
+ fitod %f14, %f62
+ fitod %f15, %f62
+ fitod %f16, %f62
+ fitod %f17, %f62
+ fitod %f18, %f62
+ fitod %f19, %f62
+ fitod %f20, %f62
+ fitod %f21, %f62
+ fitod %f22, %f62
+ fitod %f23, %f62
+ fitod %f24, %f62
+ fitod %f25, %f62
+ fitod %f26, %f62
+ fitod %f27, %f62
+ fitod %f28, %f62
+ fitod %f29, %f62
+ fitod %f30, %f62
+ fitod %f31, %f62
+
+fitos_emul_continue:
+ sethi %hi(fitos_table_2), %g1
+ srl %g3, 25, %g2
+ or %g1, %lo(fitos_table_2), %g1
+ and %g2, 0x1f, %g2
+ sllx %g2, 2, %g2
+ jmpl %g1 + %g2, %g0
+ ba,pt %xcc, fitos_emul_fini
+
+fitos_table_2:
+ fdtos %f62, %f0
+ fdtos %f62, %f1
+ fdtos %f62, %f2
+ fdtos %f62, %f3
+ fdtos %f62, %f4
+ fdtos %f62, %f5
+ fdtos %f62, %f6
+ fdtos %f62, %f7
+ fdtos %f62, %f8
+ fdtos %f62, %f9
+ fdtos %f62, %f10
+ fdtos %f62, %f11
+ fdtos %f62, %f12
+ fdtos %f62, %f13
+ fdtos %f62, %f14
+ fdtos %f62, %f15
+ fdtos %f62, %f16
+ fdtos %f62, %f17
+ fdtos %f62, %f18
+ fdtos %f62, %f19
+ fdtos %f62, %f20
+ fdtos %f62, %f21
+ fdtos %f62, %f22
+ fdtos %f62, %f23
+ fdtos %f62, %f24
+ fdtos %f62, %f25
+ fdtos %f62, %f26
+ fdtos %f62, %f27
+ fdtos %f62, %f28
+ fdtos %f62, %f29
+ fdtos %f62, %f30
+ fdtos %f62, %f31
+
+fitos_emul_fini:
+ ldd [%g6 + TI_FPREGS + (62 * 4)], %f62
+ done
+ .size do_fpother_check_fitos,.-do_fpother_check_fitos
+
+ .align 32
+ .globl do_fptrap
+ .type do_fptrap,#function
+do_fptrap:
+ TRAP_LOAD_THREAD_REG(%g6, %g1)
+ stx %fsr, [%g6 + TI_XFSR]
+do_fptrap_after_fsr:
+ ldub [%g6 + TI_FPSAVED], %g3
+ rd %fprs, %g1
+ or %g3, %g1, %g3
+ stb %g3, [%g6 + TI_FPSAVED]
+ rd %gsr, %g3
+ stx %g3, [%g6 + TI_GSR]
+ mov SECONDARY_CONTEXT, %g3
+
+661: ldxa [%g3] ASI_DMMU, %g5
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ ldxa [%g3] ASI_MMU, %g5
+ .previous
+
+ sethi %hi(sparc64_kern_sec_context), %g2
+ ldx [%g2 + %lo(sparc64_kern_sec_context)], %g2
+
+661: stxa %g2, [%g3] ASI_DMMU
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ stxa %g2, [%g3] ASI_MMU
+ .previous
+
+ membar #Sync
+ add %g6, TI_FPREGS, %g2
+ andcc %g1, FPRS_DL, %g0
+ be,pn %icc, 4f
+ mov 0x40, %g3
+ stda %f0, [%g2] ASI_BLK_S
+ stda %f16, [%g2 + %g3] ASI_BLK_S
+ andcc %g1, FPRS_DU, %g0
+ be,pn %icc, 5f
+4: add %g2, 128, %g2
+ stda %f32, [%g2] ASI_BLK_S
+ stda %f48, [%g2 + %g3] ASI_BLK_S
+5: mov SECONDARY_CONTEXT, %g1
+ membar #Sync
+
+661: stxa %g5, [%g1] ASI_DMMU
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ stxa %g5, [%g1] ASI_MMU
+ .previous
+
+ membar #Sync
+ ba,pt %xcc, etrap
+ wr %g0, 0, %fprs
+ .size do_fptrap,.-do_fptrap
diff --git a/arch/sparc/kernel/ftrace.c b/arch/sparc/kernel/ftrace.c
new file mode 100644
index 000000000000..d0218e73f982
--- /dev/null
+++ b/arch/sparc/kernel/ftrace.c
@@ -0,0 +1,76 @@
+#include <linux/spinlock.h>
+#include <linux/hardirq.h>
+#include <linux/ftrace.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <linux/list.h>
+
+#include <asm/ftrace.h>
+
+static const u32 ftrace_nop = 0x01000000;
+
+unsigned char *ftrace_nop_replace(void)
+{
+ return (char *)&ftrace_nop;
+}
+
+unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
+{
+ static u32 call;
+ s32 off;
+
+ off = ((s32)addr - (s32)ip);
+ call = 0x40000000 | ((u32)off >> 2);
+
+ return (unsigned char *) &call;
+}
+
+int
+ftrace_modify_code(unsigned long ip, unsigned char *old_code,
+ unsigned char *new_code)
+{
+ u32 old = *(u32 *)old_code;
+ u32 new = *(u32 *)new_code;
+ u32 replaced;
+ int faulted;
+
+ __asm__ __volatile__(
+ "1: cas [%[ip]], %[old], %[new]\n"
+ " flush %[ip]\n"
+ " mov 0, %[faulted]\n"
+ "2:\n"
+ " .section .fixup,#alloc,#execinstr\n"
+ " .align 4\n"
+ "3: sethi %%hi(2b), %[faulted]\n"
+ " jmpl %[faulted] + %%lo(2b), %%g0\n"
+ " mov 1, %[faulted]\n"
+ " .previous\n"
+ " .section __ex_table,\"a\"\n"
+ " .align 4\n"
+ " .word 1b, 3b\n"
+ " .previous\n"
+ : "=r" (replaced), [faulted] "=r" (faulted)
+ : [new] "0" (new), [old] "r" (old), [ip] "r" (ip)
+ : "memory");
+
+ if (replaced != old && replaced != new)
+ faulted = 2;
+
+ return faulted;
+}
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+ unsigned long ip = (unsigned long)(&ftrace_call);
+ unsigned char old[MCOUNT_INSN_SIZE], *new;
+
+ memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
+ new = ftrace_call_replace(ip, (unsigned long)func);
+ return ftrace_modify_code(ip, old, new);
+}
+
+int __init ftrace_dyn_arch_init(void *data)
+{
+ ftrace_mcount_set(data);
+ return 0;
+}
diff --git a/arch/sparc/kernel/getsetcc.S b/arch/sparc/kernel/getsetcc.S
new file mode 100644
index 000000000000..a14d272d2061
--- /dev/null
+++ b/arch/sparc/kernel/getsetcc.S
@@ -0,0 +1,24 @@
+ .globl getcc
+ .type getcc,#function
+getcc:
+ ldx [%o0 + PT_V9_TSTATE], %o1
+ srlx %o1, 32, %o1
+ and %o1, 0xf, %o1
+ retl
+ stx %o1, [%o0 + PT_V9_G1]
+ .size getcc,.-getcc
+
+ .globl setcc
+ .type setcc,#function
+setcc:
+ ldx [%o0 + PT_V9_TSTATE], %o1
+ ldx [%o0 + PT_V9_G1], %o2
+ or %g0, %ulo(TSTATE_ICC), %o3
+ sllx %o3, 32, %o3
+ andn %o1, %o3, %o1
+ sllx %o2, 32, %o2
+ and %o2, %o3, %o2
+ or %o1, %o2, %o1
+ retl
+ stx %o1, [%o0 + PT_V9_TSTATE]
+ .size setcc,.-setcc
diff --git a/arch/sparc/kernel/head.S b/arch/sparc/kernel/head_32.S
index 51b40426f9c6..f0b4b516304f 100644
--- a/arch/sparc/kernel/head.S
+++ b/arch/sparc/kernel/head_32.S
@@ -990,7 +990,7 @@ sun4c_continue_boot:
/* Zero out our BSS section. */
set __bss_start , %o0 ! First address of BSS
- set end , %o1 ! Last address of BSS
+ set _end , %o1 ! Last address of BSS
add %o0, 0x1, %o0
1:
stb %g0, [%o0]
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
new file mode 100644
index 000000000000..8ffee714f932
--- /dev/null
+++ b/arch/sparc/kernel/head_64.S
@@ -0,0 +1,900 @@
+/* head.S: Initial boot code for the Sparc64 port of Linux.
+ *
+ * Copyright (C) 1996, 1997, 2007 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1996 David Sitsky (David.Sitsky@anu.edu.au)
+ * Copyright (C) 1997, 1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ * Copyright (C) 1997 Miguel de Icaza (miguel@nuclecu.unam.mx)
+ */
+
+#include <linux/version.h>
+#include <linux/errno.h>
+#include <linux/threads.h>
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/thread_info.h>
+#include <asm/asi.h>
+#include <asm/pstate.h>
+#include <asm/ptrace.h>
+#include <asm/spitfire.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/errno.h>
+#include <asm/signal.h>
+#include <asm/processor.h>
+#include <asm/lsu.h>
+#include <asm/dcr.h>
+#include <asm/dcu.h>
+#include <asm/head.h>
+#include <asm/ttable.h>
+#include <asm/mmu.h>
+#include <asm/cpudata.h>
+#include <asm/pil.h>
+#include <asm/estate.h>
+#include <asm/sfafsr.h>
+#include <asm/unistd.h>
+
+/* This section from from _start to sparc64_boot_end should fit into
+ * 0x0000000000404000 to 0x0000000000408000.
+ */
+ .text
+ .globl start, _start, stext, _stext
+_start:
+start:
+_stext:
+stext:
+! 0x0000000000404000
+ b sparc64_boot
+ flushw /* Flush register file. */
+
+/* This stuff has to be in sync with SILO and other potential boot loaders
+ * Fields should be kept upward compatible and whenever any change is made,
+ * HdrS version should be incremented.
+ */
+ .global root_flags, ram_flags, root_dev
+ .global sparc_ramdisk_image, sparc_ramdisk_size
+ .global sparc_ramdisk_image64
+
+ .ascii "HdrS"
+ .word LINUX_VERSION_CODE
+
+ /* History:
+ *
+ * 0x0300 : Supports being located at other than 0x4000
+ * 0x0202 : Supports kernel params string
+ * 0x0201 : Supports reboot_command
+ */
+ .half 0x0301 /* HdrS version */
+
+root_flags:
+ .half 1
+root_dev:
+ .half 0
+ram_flags:
+ .half 0
+sparc_ramdisk_image:
+ .word 0
+sparc_ramdisk_size:
+ .word 0
+ .xword reboot_command
+ .xword bootstr_info
+sparc_ramdisk_image64:
+ .xword 0
+ .word _end
+
+ /* PROM cif handler code address is in %o4. */
+sparc64_boot:
+ mov %o4, %l7
+
+ /* We need to remap the kernel. Use position independant
+ * code to remap us to KERNBASE.
+ *
+ * SILO can invoke us with 32-bit address masking enabled,
+ * so make sure that's clear.
+ */
+ rdpr %pstate, %g1
+ andn %g1, PSTATE_AM, %g1
+ wrpr %g1, 0x0, %pstate
+ ba,a,pt %xcc, 1f
+
+ .globl prom_finddev_name, prom_chosen_path, prom_root_node
+ .globl prom_getprop_name, prom_mmu_name, prom_peer_name
+ .globl prom_callmethod_name, prom_translate_name, prom_root_compatible
+ .globl prom_map_name, prom_unmap_name, prom_mmu_ihandle_cache
+ .globl prom_boot_mapped_pc, prom_boot_mapping_mode
+ .globl prom_boot_mapping_phys_high, prom_boot_mapping_phys_low
+ .globl prom_compatible_name, prom_cpu_path, prom_cpu_compatible
+ .globl is_sun4v, sun4v_chip_type, prom_set_trap_table_name
+prom_peer_name:
+ .asciz "peer"
+prom_compatible_name:
+ .asciz "compatible"
+prom_finddev_name:
+ .asciz "finddevice"
+prom_chosen_path:
+ .asciz "/chosen"
+prom_cpu_path:
+ .asciz "/cpu"
+prom_getprop_name:
+ .asciz "getprop"
+prom_mmu_name:
+ .asciz "mmu"
+prom_callmethod_name:
+ .asciz "call-method"
+prom_translate_name:
+ .asciz "translate"
+prom_map_name:
+ .asciz "map"
+prom_unmap_name:
+ .asciz "unmap"
+prom_set_trap_table_name:
+ .asciz "SUNW,set-trap-table"
+prom_sun4v_name:
+ .asciz "sun4v"
+prom_niagara_prefix:
+ .asciz "SUNW,UltraSPARC-T"
+ .align 4
+prom_root_compatible:
+ .skip 64
+prom_cpu_compatible:
+ .skip 64
+prom_root_node:
+ .word 0
+prom_mmu_ihandle_cache:
+ .word 0
+prom_boot_mapped_pc:
+ .word 0
+prom_boot_mapping_mode:
+ .word 0
+ .align 8
+prom_boot_mapping_phys_high:
+ .xword 0
+prom_boot_mapping_phys_low:
+ .xword 0
+is_sun4v:
+ .word 0
+sun4v_chip_type:
+ .word SUN4V_CHIP_INVALID
+1:
+ rd %pc, %l0
+
+ mov (1b - prom_peer_name), %l1
+ sub %l0, %l1, %l1
+ mov 0, %l2
+
+ /* prom_root_node = prom_peer(0) */
+ stx %l1, [%sp + 2047 + 128 + 0x00] ! service, "peer"
+ mov 1, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 1
+ stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 1
+ stx %l2, [%sp + 2047 + 128 + 0x18] ! arg1, 0
+ stx %g0, [%sp + 2047 + 128 + 0x20] ! ret1
+ call %l7
+ add %sp, (2047 + 128), %o0 ! argument array
+
+ ldx [%sp + 2047 + 128 + 0x20], %l4 ! prom root node
+ mov (1b - prom_root_node), %l1
+ sub %l0, %l1, %l1
+ stw %l4, [%l1]
+
+ mov (1b - prom_getprop_name), %l1
+ mov (1b - prom_compatible_name), %l2
+ mov (1b - prom_root_compatible), %l5
+ sub %l0, %l1, %l1
+ sub %l0, %l2, %l2
+ sub %l0, %l5, %l5
+
+ /* prom_getproperty(prom_root_node, "compatible",
+ * &prom_root_compatible, 64)
+ */
+ stx %l1, [%sp + 2047 + 128 + 0x00] ! service, "getprop"
+ mov 4, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 4
+ mov 1, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 1
+ stx %l4, [%sp + 2047 + 128 + 0x18] ! arg1, prom_root_node
+ stx %l2, [%sp + 2047 + 128 + 0x20] ! arg2, "compatible"
+ stx %l5, [%sp + 2047 + 128 + 0x28] ! arg3, &prom_root_compatible
+ mov 64, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x30] ! arg4, size
+ stx %g0, [%sp + 2047 + 128 + 0x38] ! ret1
+ call %l7
+ add %sp, (2047 + 128), %o0 ! argument array
+
+ mov (1b - prom_finddev_name), %l1
+ mov (1b - prom_chosen_path), %l2
+ mov (1b - prom_boot_mapped_pc), %l3
+ sub %l0, %l1, %l1
+ sub %l0, %l2, %l2
+ sub %l0, %l3, %l3
+ stw %l0, [%l3]
+ sub %sp, (192 + 128), %sp
+
+ /* chosen_node = prom_finddevice("/chosen") */
+ stx %l1, [%sp + 2047 + 128 + 0x00] ! service, "finddevice"
+ mov 1, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 1
+ stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 1
+ stx %l2, [%sp + 2047 + 128 + 0x18] ! arg1, "/chosen"
+ stx %g0, [%sp + 2047 + 128 + 0x20] ! ret1
+ call %l7
+ add %sp, (2047 + 128), %o0 ! argument array
+
+ ldx [%sp + 2047 + 128 + 0x20], %l4 ! chosen device node
+
+ mov (1b - prom_getprop_name), %l1
+ mov (1b - prom_mmu_name), %l2
+ mov (1b - prom_mmu_ihandle_cache), %l5
+ sub %l0, %l1, %l1
+ sub %l0, %l2, %l2
+ sub %l0, %l5, %l5
+
+ /* prom_mmu_ihandle_cache = prom_getint(chosen_node, "mmu") */
+ stx %l1, [%sp + 2047 + 128 + 0x00] ! service, "getprop"
+ mov 4, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 4
+ mov 1, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 1
+ stx %l4, [%sp + 2047 + 128 + 0x18] ! arg1, chosen_node
+ stx %l2, [%sp + 2047 + 128 + 0x20] ! arg2, "mmu"
+ stx %l5, [%sp + 2047 + 128 + 0x28] ! arg3, &prom_mmu_ihandle_cache
+ mov 4, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x30] ! arg4, sizeof(arg3)
+ stx %g0, [%sp + 2047 + 128 + 0x38] ! ret1
+ call %l7
+ add %sp, (2047 + 128), %o0 ! argument array
+
+ mov (1b - prom_callmethod_name), %l1
+ mov (1b - prom_translate_name), %l2
+ sub %l0, %l1, %l1
+ sub %l0, %l2, %l2
+ lduw [%l5], %l5 ! prom_mmu_ihandle_cache
+
+ stx %l1, [%sp + 2047 + 128 + 0x00] ! service, "call-method"
+ mov 3, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 3
+ mov 5, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 5
+ stx %l2, [%sp + 2047 + 128 + 0x18] ! arg1: "translate"
+ stx %l5, [%sp + 2047 + 128 + 0x20] ! arg2: prom_mmu_ihandle_cache
+ /* PAGE align */
+ srlx %l0, 13, %l3
+ sllx %l3, 13, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x28] ! arg3: vaddr, our PC
+ stx %g0, [%sp + 2047 + 128 + 0x30] ! res1
+ stx %g0, [%sp + 2047 + 128 + 0x38] ! res2
+ stx %g0, [%sp + 2047 + 128 + 0x40] ! res3
+ stx %g0, [%sp + 2047 + 128 + 0x48] ! res4
+ stx %g0, [%sp + 2047 + 128 + 0x50] ! res5
+ call %l7
+ add %sp, (2047 + 128), %o0 ! argument array
+
+ ldx [%sp + 2047 + 128 + 0x40], %l1 ! translation mode
+ mov (1b - prom_boot_mapping_mode), %l4
+ sub %l0, %l4, %l4
+ stw %l1, [%l4]
+ mov (1b - prom_boot_mapping_phys_high), %l4
+ sub %l0, %l4, %l4
+ ldx [%sp + 2047 + 128 + 0x48], %l2 ! physaddr high
+ stx %l2, [%l4 + 0x0]
+ ldx [%sp + 2047 + 128 + 0x50], %l3 ! physaddr low
+ /* 4MB align */
+ srlx %l3, 22, %l3
+ sllx %l3, 22, %l3
+ stx %l3, [%l4 + 0x8]
+
+ /* Leave service as-is, "call-method" */
+ mov 7, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 7
+ mov 1, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 1
+ mov (1b - prom_map_name), %l3
+ sub %l0, %l3, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x18] ! arg1: "map"
+ /* Leave arg2 as-is, prom_mmu_ihandle_cache */
+ mov -1, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x28] ! arg3: mode (-1 default)
+ /* 4MB align the kernel image size. */
+ set (_end - KERNBASE), %l3
+ set ((4 * 1024 * 1024) - 1), %l4
+ add %l3, %l4, %l3
+ andn %l3, %l4, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x30] ! arg4: roundup(ksize, 4MB)
+ sethi %hi(KERNBASE), %l3
+ stx %l3, [%sp + 2047 + 128 + 0x38] ! arg5: vaddr (KERNBASE)
+ stx %g0, [%sp + 2047 + 128 + 0x40] ! arg6: empty
+ mov (1b - prom_boot_mapping_phys_low), %l3
+ sub %l0, %l3, %l3
+ ldx [%l3], %l3
+ stx %l3, [%sp + 2047 + 128 + 0x48] ! arg7: phys addr
+ call %l7
+ add %sp, (2047 + 128), %o0 ! argument array
+
+ add %sp, (192 + 128), %sp
+
+ sethi %hi(prom_root_compatible), %g1
+ or %g1, %lo(prom_root_compatible), %g1
+ sethi %hi(prom_sun4v_name), %g7
+ or %g7, %lo(prom_sun4v_name), %g7
+ mov 5, %g3
+90: ldub [%g7], %g2
+ ldub [%g1], %g4
+ cmp %g2, %g4
+ bne,pn %icc, 80f
+ add %g7, 1, %g7
+ subcc %g3, 1, %g3
+ bne,pt %xcc, 90b
+ add %g1, 1, %g1
+
+ sethi %hi(is_sun4v), %g1
+ or %g1, %lo(is_sun4v), %g1
+ mov 1, %g7
+ stw %g7, [%g1]
+
+ /* cpu_node = prom_finddevice("/cpu") */
+ mov (1b - prom_finddev_name), %l1
+ mov (1b - prom_cpu_path), %l2
+ sub %l0, %l1, %l1
+ sub %l0, %l2, %l2
+ sub %sp, (192 + 128), %sp
+
+ stx %l1, [%sp + 2047 + 128 + 0x00] ! service, "finddevice"
+ mov 1, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 1
+ stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 1
+ stx %l2, [%sp + 2047 + 128 + 0x18] ! arg1, "/cpu"
+ stx %g0, [%sp + 2047 + 128 + 0x20] ! ret1
+ call %l7
+ add %sp, (2047 + 128), %o0 ! argument array
+
+ ldx [%sp + 2047 + 128 + 0x20], %l4 ! cpu device node
+
+ mov (1b - prom_getprop_name), %l1
+ mov (1b - prom_compatible_name), %l2
+ mov (1b - prom_cpu_compatible), %l5
+ sub %l0, %l1, %l1
+ sub %l0, %l2, %l2
+ sub %l0, %l5, %l5
+
+ /* prom_getproperty(cpu_node, "compatible",
+ * &prom_cpu_compatible, 64)
+ */
+ stx %l1, [%sp + 2047 + 128 + 0x00] ! service, "getprop"
+ mov 4, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 4
+ mov 1, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 1
+ stx %l4, [%sp + 2047 + 128 + 0x18] ! arg1, cpu_node
+ stx %l2, [%sp + 2047 + 128 + 0x20] ! arg2, "compatible"
+ stx %l5, [%sp + 2047 + 128 + 0x28] ! arg3, &prom_cpu_compatible
+ mov 64, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x30] ! arg4, size
+ stx %g0, [%sp + 2047 + 128 + 0x38] ! ret1
+ call %l7
+ add %sp, (2047 + 128), %o0 ! argument array
+
+ add %sp, (192 + 128), %sp
+
+ sethi %hi(prom_cpu_compatible), %g1
+ or %g1, %lo(prom_cpu_compatible), %g1
+ sethi %hi(prom_niagara_prefix), %g7
+ or %g7, %lo(prom_niagara_prefix), %g7
+ mov 17, %g3
+90: ldub [%g7], %g2
+ ldub [%g1], %g4
+ cmp %g2, %g4
+ bne,pn %icc, 4f
+ add %g7, 1, %g7
+ subcc %g3, 1, %g3
+ bne,pt %xcc, 90b
+ add %g1, 1, %g1
+
+ sethi %hi(prom_cpu_compatible), %g1
+ or %g1, %lo(prom_cpu_compatible), %g1
+ ldub [%g1 + 17], %g2
+ cmp %g2, '1'
+ be,pt %xcc, 5f
+ mov SUN4V_CHIP_NIAGARA1, %g4
+ cmp %g2, '2'
+ be,pt %xcc, 5f
+ mov SUN4V_CHIP_NIAGARA2, %g4
+4:
+ mov SUN4V_CHIP_UNKNOWN, %g4
+5: sethi %hi(sun4v_chip_type), %g2
+ or %g2, %lo(sun4v_chip_type), %g2
+ stw %g4, [%g2]
+
+80:
+ BRANCH_IF_SUN4V(g1, jump_to_sun4u_init)
+ BRANCH_IF_CHEETAH_BASE(g1,g7,cheetah_boot)
+ BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,cheetah_plus_boot)
+ ba,pt %xcc, spitfire_boot
+ nop
+
+cheetah_plus_boot:
+ /* Preserve OBP chosen DCU and DCR register settings. */
+ ba,pt %xcc, cheetah_generic_boot
+ nop
+
+cheetah_boot:
+ mov DCR_BPE | DCR_RPE | DCR_SI | DCR_IFPOE | DCR_MS, %g1
+ wr %g1, %asr18
+
+ sethi %uhi(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g7
+ or %g7, %ulo(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g7
+ sllx %g7, 32, %g7
+ or %g7, DCU_DM | DCU_IM | DCU_DC | DCU_IC, %g7
+ stxa %g7, [%g0] ASI_DCU_CONTROL_REG
+ membar #Sync
+
+cheetah_generic_boot:
+ mov TSB_EXTENSION_P, %g3
+ stxa %g0, [%g3] ASI_DMMU
+ stxa %g0, [%g3] ASI_IMMU
+ membar #Sync
+
+ mov TSB_EXTENSION_S, %g3
+ stxa %g0, [%g3] ASI_DMMU
+ membar #Sync
+
+ mov TSB_EXTENSION_N, %g3
+ stxa %g0, [%g3] ASI_DMMU
+ stxa %g0, [%g3] ASI_IMMU
+ membar #Sync
+
+ ba,a,pt %xcc, jump_to_sun4u_init
+
+spitfire_boot:
+ /* Typically PROM has already enabled both MMU's and both on-chip
+ * caches, but we do it here anyway just to be paranoid.
+ */
+ mov (LSU_CONTROL_IC|LSU_CONTROL_DC|LSU_CONTROL_IM|LSU_CONTROL_DM), %g1
+ stxa %g1, [%g0] ASI_LSU_CONTROL
+ membar #Sync
+
+jump_to_sun4u_init:
+ /*
+ * Make sure we are in privileged mode, have address masking,
+ * using the ordinary globals and have enabled floating
+ * point.
+ *
+ * Again, typically PROM has left %pil at 13 or similar, and
+ * (PSTATE_PRIV | PSTATE_PEF | PSTATE_IE) in %pstate.
+ */
+ wrpr %g0, (PSTATE_PRIV|PSTATE_PEF|PSTATE_IE), %pstate
+ wr %g0, 0, %fprs
+
+ set sun4u_init, %g2
+ jmpl %g2 + %g0, %g0
+ nop
+
+ .section .text.init.refok
+sun4u_init:
+ BRANCH_IF_SUN4V(g1, sun4v_init)
+
+ /* Set ctx 0 */
+ mov PRIMARY_CONTEXT, %g7
+ stxa %g0, [%g7] ASI_DMMU
+ membar #Sync
+
+ mov SECONDARY_CONTEXT, %g7
+ stxa %g0, [%g7] ASI_DMMU
+ membar #Sync
+
+ ba,pt %xcc, sun4u_continue
+ nop
+
+sun4v_init:
+ /* Set ctx 0 */
+ mov PRIMARY_CONTEXT, %g7
+ stxa %g0, [%g7] ASI_MMU
+ membar #Sync
+
+ mov SECONDARY_CONTEXT, %g7
+ stxa %g0, [%g7] ASI_MMU
+ membar #Sync
+ ba,pt %xcc, niagara_tlb_fixup
+ nop
+
+sun4u_continue:
+ BRANCH_IF_ANY_CHEETAH(g1, g7, cheetah_tlb_fixup)
+
+ ba,pt %xcc, spitfire_tlb_fixup
+ nop
+
+niagara_tlb_fixup:
+ mov 3, %g2 /* Set TLB type to hypervisor. */
+ sethi %hi(tlb_type), %g1
+ stw %g2, [%g1 + %lo(tlb_type)]
+
+ /* Patch copy/clear ops. */
+ sethi %hi(sun4v_chip_type), %g1
+ lduw [%g1 + %lo(sun4v_chip_type)], %g1
+ cmp %g1, SUN4V_CHIP_NIAGARA1
+ be,pt %xcc, niagara_patch
+ cmp %g1, SUN4V_CHIP_NIAGARA2
+ be,pt %xcc, niagara2_patch
+ nop
+
+ call generic_patch_copyops
+ nop
+ call generic_patch_bzero
+ nop
+ call generic_patch_pageops
+ nop
+
+ ba,a,pt %xcc, 80f
+niagara2_patch:
+ call niagara2_patch_copyops
+ nop
+ call niagara_patch_bzero
+ nop
+ call niagara2_patch_pageops
+ nop
+
+ ba,a,pt %xcc, 80f
+
+niagara_patch:
+ call niagara_patch_copyops
+ nop
+ call niagara_patch_bzero
+ nop
+ call niagara_patch_pageops
+ nop
+
+80:
+ /* Patch TLB/cache ops. */
+ call hypervisor_patch_cachetlbops
+ nop
+
+ ba,pt %xcc, tlb_fixup_done
+ nop
+
+cheetah_tlb_fixup:
+ mov 2, %g2 /* Set TLB type to cheetah+. */
+ BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,1f)
+
+ mov 1, %g2 /* Set TLB type to cheetah. */
+
+1: sethi %hi(tlb_type), %g1
+ stw %g2, [%g1 + %lo(tlb_type)]
+
+ /* Patch copy/page operations to cheetah optimized versions. */
+ call cheetah_patch_copyops
+ nop
+ call cheetah_patch_copy_page
+ nop
+ call cheetah_patch_cachetlbops
+ nop
+
+ ba,pt %xcc, tlb_fixup_done
+ nop
+
+spitfire_tlb_fixup:
+ /* Set TLB type to spitfire. */
+ mov 0, %g2
+ sethi %hi(tlb_type), %g1
+ stw %g2, [%g1 + %lo(tlb_type)]
+
+tlb_fixup_done:
+ sethi %hi(init_thread_union), %g6
+ or %g6, %lo(init_thread_union), %g6
+ ldx [%g6 + TI_TASK], %g4
+ mov %sp, %l6
+
+ wr %g0, ASI_P, %asi
+ mov 1, %g1
+ sllx %g1, THREAD_SHIFT, %g1
+ sub %g1, (STACKFRAME_SZ + STACK_BIAS), %g1
+ add %g6, %g1, %sp
+ mov 0, %fp
+
+ /* Set per-cpu pointer initially to zero, this makes
+ * the boot-cpu use the in-kernel-image per-cpu areas
+ * before setup_per_cpu_area() is invoked.
+ */
+ clr %g5
+
+ wrpr %g0, 0, %wstate
+ wrpr %g0, 0x0, %tl
+
+ /* Clear the bss */
+ sethi %hi(__bss_start), %o0
+ or %o0, %lo(__bss_start), %o0
+ sethi %hi(_end), %o1
+ or %o1, %lo(_end), %o1
+ call __bzero
+ sub %o1, %o0, %o1
+
+#ifdef CONFIG_LOCKDEP
+ /* We have this call this super early, as even prom_init can grab
+ * spinlocks and thus call into the lockdep code.
+ */
+ call lockdep_init
+ nop
+#endif
+
+ mov %l6, %o1 ! OpenPROM stack
+ call prom_init
+ mov %l7, %o0 ! OpenPROM cif handler
+
+ /* Initialize current_thread_info()->cpu as early as possible.
+ * In order to do that accurately we have to patch up the get_cpuid()
+ * assembler sequences. And that, in turn, requires that we know
+ * if we are on a Starfire box or not. While we're here, patch up
+ * the sun4v sequences as well.
+ */
+ call check_if_starfire
+ nop
+ call per_cpu_patch
+ nop
+ call sun4v_patch
+ nop
+
+#ifdef CONFIG_SMP
+ call hard_smp_processor_id
+ nop
+ cmp %o0, NR_CPUS
+ blu,pt %xcc, 1f
+ nop
+ call boot_cpu_id_too_large
+ nop
+ /* Not reached... */
+
+1:
+ /* If we boot on a non-zero cpu, all of the per-cpu
+ * variable references we make before setting up the
+ * per-cpu areas will use a bogus offset. Put a
+ * compensating factor into __per_cpu_base to handle
+ * this cleanly.
+ *
+ * What the per-cpu code calculates is:
+ *
+ * __per_cpu_base + (cpu << __per_cpu_shift)
+ *
+ * These two variables are zero initially, so to
+ * make it all cancel out to zero we need to put
+ * "0 - (cpu << 0)" into __per_cpu_base so that the
+ * above formula evaluates to zero.
+ *
+ * We cannot even perform a printk() until this stuff
+ * is setup as that calls cpu_clock() which uses
+ * per-cpu variables.
+ */
+ sub %g0, %o0, %o1
+ sethi %hi(__per_cpu_base), %o2
+ stx %o1, [%o2 + %lo(__per_cpu_base)]
+#else
+ mov 0, %o0
+#endif
+ sth %o0, [%g6 + TI_CPU]
+
+ call prom_init_report
+ nop
+
+ /* Off we go.... */
+ call start_kernel
+ nop
+ /* Not reached... */
+
+ .previous
+
+ /* This is meant to allow the sharing of this code between
+ * boot processor invocation (via setup_tba() below) and
+ * secondary processor startup (via trampoline.S). The
+ * former does use this code, the latter does not yet due
+ * to some complexities. That should be fixed up at some
+ * point.
+ *
+ * There used to be enormous complexity wrt. transferring
+ * over from the firwmare's trap table to the Linux kernel's.
+ * For example, there was a chicken & egg problem wrt. building
+ * the OBP page tables, yet needing to be on the Linux kernel
+ * trap table (to translate PAGE_OFFSET addresses) in order to
+ * do that.
+ *
+ * We now handle OBP tlb misses differently, via linear lookups
+ * into the prom_trans[] array. So that specific problem no
+ * longer exists. Yet, unfortunately there are still some issues
+ * preventing trampoline.S from using this code... ho hum.
+ */
+ .globl setup_trap_table
+setup_trap_table:
+ save %sp, -192, %sp
+
+ /* Force interrupts to be disabled. */
+ rdpr %pstate, %l0
+ andn %l0, PSTATE_IE, %o1
+ wrpr %o1, 0x0, %pstate
+ rdpr %pil, %l1
+ wrpr %g0, PIL_NORMAL_MAX, %pil
+
+ /* Make the firmware call to jump over to the Linux trap table. */
+ sethi %hi(is_sun4v), %o0
+ lduw [%o0 + %lo(is_sun4v)], %o0
+ brz,pt %o0, 1f
+ nop
+
+ TRAP_LOAD_TRAP_BLOCK(%g2, %g3)
+ add %g2, TRAP_PER_CPU_FAULT_INFO, %g2
+ stxa %g2, [%g0] ASI_SCRATCHPAD
+
+ /* Compute physical address:
+ *
+ * paddr = kern_base + (mmfsa_vaddr - KERNBASE)
+ */
+ sethi %hi(KERNBASE), %g3
+ sub %g2, %g3, %g2
+ sethi %hi(kern_base), %g3
+ ldx [%g3 + %lo(kern_base)], %g3
+ add %g2, %g3, %o1
+ sethi %hi(sparc64_ttable_tl0), %o0
+
+ set prom_set_trap_table_name, %g2
+ stx %g2, [%sp + 2047 + 128 + 0x00]
+ mov 2, %g2
+ stx %g2, [%sp + 2047 + 128 + 0x08]
+ mov 0, %g2
+ stx %g2, [%sp + 2047 + 128 + 0x10]
+ stx %o0, [%sp + 2047 + 128 + 0x18]
+ stx %o1, [%sp + 2047 + 128 + 0x20]
+ sethi %hi(p1275buf), %g2
+ or %g2, %lo(p1275buf), %g2
+ ldx [%g2 + 0x08], %o1
+ call %o1
+ add %sp, (2047 + 128), %o0
+
+ ba,pt %xcc, 2f
+ nop
+
+1: sethi %hi(sparc64_ttable_tl0), %o0
+ set prom_set_trap_table_name, %g2
+ stx %g2, [%sp + 2047 + 128 + 0x00]
+ mov 1, %g2
+ stx %g2, [%sp + 2047 + 128 + 0x08]
+ mov 0, %g2
+ stx %g2, [%sp + 2047 + 128 + 0x10]
+ stx %o0, [%sp + 2047 + 128 + 0x18]
+ sethi %hi(p1275buf), %g2
+ or %g2, %lo(p1275buf), %g2
+ ldx [%g2 + 0x08], %o1
+ call %o1
+ add %sp, (2047 + 128), %o0
+
+ /* Start using proper page size encodings in ctx register. */
+2: sethi %hi(sparc64_kern_pri_context), %g3
+ ldx [%g3 + %lo(sparc64_kern_pri_context)], %g2
+
+ mov PRIMARY_CONTEXT, %g1
+
+661: stxa %g2, [%g1] ASI_DMMU
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ stxa %g2, [%g1] ASI_MMU
+ .previous
+
+ membar #Sync
+
+ BRANCH_IF_SUN4V(o2, 1f)
+
+ /* Kill PROM timer */
+ sethi %hi(0x80000000), %o2
+ sllx %o2, 32, %o2
+ wr %o2, 0, %tick_cmpr
+
+ BRANCH_IF_ANY_CHEETAH(o2, o3, 1f)
+
+ ba,pt %xcc, 2f
+ nop
+
+ /* Disable STICK_INT interrupts. */
+1:
+ sethi %hi(0x80000000), %o2
+ sllx %o2, 32, %o2
+ wr %o2, %asr25
+
+2:
+ wrpr %g0, %g0, %wstate
+
+ call init_irqwork_curcpu
+ nop
+
+ /* Now we can restore interrupt state. */
+ wrpr %l0, 0, %pstate
+ wrpr %l1, 0x0, %pil
+
+ ret
+ restore
+
+ .globl setup_tba
+setup_tba:
+ save %sp, -192, %sp
+
+ /* The boot processor is the only cpu which invokes this
+ * routine, the other cpus set things up via trampoline.S.
+ * So save the OBP trap table address here.
+ */
+ rdpr %tba, %g7
+ sethi %hi(prom_tba), %o1
+ or %o1, %lo(prom_tba), %o1
+ stx %g7, [%o1]
+
+ call setup_trap_table
+ nop
+
+ ret
+ restore
+sparc64_boot_end:
+
+#include "etrap_64.S"
+#include "rtrap_64.S"
+#include "winfixup.S"
+#include "fpu_traps.S"
+#include "ivec.S"
+#include "getsetcc.S"
+#include "utrap.S"
+#include "spiterrs.S"
+#include "cherrs.S"
+#include "misctrap.S"
+#include "syscalls.S"
+#include "helpers.S"
+#include "hvcalls.S"
+#include "sun4v_tlb_miss.S"
+#include "sun4v_ivec.S"
+#include "ktlb.S"
+#include "tsb.S"
+
+/*
+ * The following skip makes sure the trap table in ttable.S is aligned
+ * on a 32K boundary as required by the v9 specs for TBA register.
+ *
+ * We align to a 32K boundary, then we have the 32K kernel TSB,
+ * the 64K kernel 4MB TSB, and then the 32K aligned trap table.
+ */
+1:
+ .skip 0x4000 + _start - 1b
+
+! 0x0000000000408000
+
+ .globl swapper_tsb
+swapper_tsb:
+ .skip (32 * 1024)
+
+ .globl swapper_4m_tsb
+swapper_4m_tsb:
+ .skip (64 * 1024)
+
+! 0x0000000000420000
+
+ /* Some care needs to be exercised if you try to move the
+ * location of the trap table relative to other things. For
+ * one thing there are br* instructions in some of the
+ * trap table entires which branch back to code in ktlb.S
+ * Those instructions can only handle a signed 16-bit
+ * displacement.
+ *
+ * There is a binutils bug (bugzilla #4558) which causes
+ * the relocation overflow checks for such instructions to
+ * not be done correctly. So bintuils will not notice the
+ * error and will instead write junk into the relocation and
+ * you'll have an unbootable kernel.
+ */
+#include "ttable.S"
+
+! 0x0000000000428000
+
+#include "systbls_64.S"
+
+ .data
+ .align 8
+ .globl prom_tba, tlb_type
+prom_tba: .xword 0
+tlb_type: .word 0 /* Must NOT end up in BSS */
+ .section ".fixup",#alloc,#execinstr
+
+ .globl __ret_efault, __retl_efault
+__ret_efault:
+ ret
+ restore %g0, -EFAULT, %o0
+__retl_efault:
+ retl
+ mov -EFAULT, %o0
diff --git a/arch/sparc/kernel/helpers.S b/arch/sparc/kernel/helpers.S
new file mode 100644
index 000000000000..314dd0c9fc5b
--- /dev/null
+++ b/arch/sparc/kernel/helpers.S
@@ -0,0 +1,63 @@
+ .align 32
+ .globl __flushw_user
+ .type __flushw_user,#function
+__flushw_user:
+ rdpr %otherwin, %g1
+ brz,pn %g1, 2f
+ clr %g2
+1: save %sp, -128, %sp
+ rdpr %otherwin, %g1
+ brnz,pt %g1, 1b
+ add %g2, 1, %g2
+1: sub %g2, 1, %g2
+ brnz,pt %g2, 1b
+ restore %g0, %g0, %g0
+2: retl
+ nop
+ .size __flushw_user,.-__flushw_user
+
+ /* Flush %fp and %i7 to the stack for all register
+ * windows active inside of the cpu. This allows
+ * show_stack_trace() to avoid using an expensive
+ * 'flushw'.
+ */
+ .globl stack_trace_flush
+ .type stack_trace_flush,#function
+stack_trace_flush:
+ rdpr %pstate, %o0
+ wrpr %o0, PSTATE_IE, %pstate
+
+ rdpr %cwp, %g1
+ rdpr %canrestore, %g2
+ sub %g1, 1, %g3
+
+1: brz,pn %g2, 2f
+ sub %g2, 1, %g2
+ wrpr %g3, %cwp
+ stx %fp, [%sp + STACK_BIAS + RW_V9_I6]
+ stx %i7, [%sp + STACK_BIAS + RW_V9_I7]
+ ba,pt %xcc, 1b
+ sub %g3, 1, %g3
+
+2: wrpr %g1, %cwp
+ wrpr %o0, %pstate
+
+ retl
+ nop
+ .size stack_trace_flush,.-stack_trace_flush
+
+#ifdef CONFIG_SMP
+ .globl hard_smp_processor_id
+ .type hard_smp_processor_id,#function
+hard_smp_processor_id:
+#endif
+ .globl real_hard_smp_processor_id
+ .type real_hard_smp_processor_id,#function
+real_hard_smp_processor_id:
+ __GET_CPUID(%o0)
+ retl
+ nop
+#ifdef CONFIG_SMP
+ .size hard_smp_processor_id,.-hard_smp_processor_id
+#endif
+ .size real_hard_smp_processor_id,.-real_hard_smp_processor_id
diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c
new file mode 100644
index 000000000000..1d272c3b5740
--- /dev/null
+++ b/arch/sparc/kernel/hvapi.c
@@ -0,0 +1,193 @@
+/* hvapi.c: Hypervisor API management.
+ *
+ * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+
+#include <asm/hypervisor.h>
+#include <asm/oplib.h>
+
+/* If the hypervisor indicates that the API setting
+ * calls are unsupported, by returning HV_EBADTRAP or
+ * HV_ENOTSUPPORTED, we assume that API groups with the
+ * PRE_API flag set are major 1 minor 0.
+ */
+struct api_info {
+ unsigned long group;
+ unsigned long major;
+ unsigned long minor;
+ unsigned int refcnt;
+ unsigned int flags;
+#define FLAG_PRE_API 0x00000001
+};
+
+static struct api_info api_table[] = {
+ { .group = HV_GRP_SUN4V, .flags = FLAG_PRE_API },
+ { .group = HV_GRP_CORE, .flags = FLAG_PRE_API },
+ { .group = HV_GRP_INTR, },
+ { .group = HV_GRP_SOFT_STATE, },
+ { .group = HV_GRP_PCI, .flags = FLAG_PRE_API },
+ { .group = HV_GRP_LDOM, },
+ { .group = HV_GRP_SVC_CHAN, .flags = FLAG_PRE_API },
+ { .group = HV_GRP_NCS, .flags = FLAG_PRE_API },
+ { .group = HV_GRP_RNG, },
+ { .group = HV_GRP_NIAG_PERF, .flags = FLAG_PRE_API },
+ { .group = HV_GRP_FIRE_PERF, },
+ { .group = HV_GRP_N2_CPU, },
+ { .group = HV_GRP_NIU, },
+ { .group = HV_GRP_VF_CPU, },
+ { .group = HV_GRP_DIAG, .flags = FLAG_PRE_API },
+};
+
+static DEFINE_SPINLOCK(hvapi_lock);
+
+static struct api_info *__get_info(unsigned long group)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(api_table); i++) {
+ if (api_table[i].group == group)
+ return &api_table[i];
+ }
+ return NULL;
+}
+
+static void __get_ref(struct api_info *p)
+{
+ p->refcnt++;
+}
+
+static void __put_ref(struct api_info *p)
+{
+ if (--p->refcnt == 0) {
+ unsigned long ignore;
+
+ sun4v_set_version(p->group, 0, 0, &ignore);
+ p->major = p->minor = 0;
+ }
+}
+
+/* Register a hypervisor API specification. It indicates the
+ * API group and desired major+minor.
+ *
+ * If an existing API registration exists '0' (success) will
+ * be returned if it is compatible with the one being registered.
+ * Otherwise a negative error code will be returned.
+ *
+ * Otherwise an attempt will be made to negotiate the requested
+ * API group/major/minor with the hypervisor, and errors returned
+ * if that does not succeed.
+ */
+int sun4v_hvapi_register(unsigned long group, unsigned long major,
+ unsigned long *minor)
+{
+ struct api_info *p;
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&hvapi_lock, flags);
+ p = __get_info(group);
+ ret = -EINVAL;
+ if (p) {
+ if (p->refcnt) {
+ ret = -EINVAL;
+ if (p->major == major) {
+ *minor = p->minor;
+ ret = 0;
+ }
+ } else {
+ unsigned long actual_minor;
+ unsigned long hv_ret;
+
+ hv_ret = sun4v_set_version(group, major, *minor,
+ &actual_minor);
+ ret = -EINVAL;
+ if (hv_ret == HV_EOK) {
+ *minor = actual_minor;
+ p->major = major;
+ p->minor = actual_minor;
+ ret = 0;
+ } else if (hv_ret == HV_EBADTRAP ||
+ hv_ret == HV_ENOTSUPPORTED) {
+ if (p->flags & FLAG_PRE_API) {
+ if (major == 1) {
+ p->major = 1;
+ p->minor = 0;
+ *minor = 0;
+ ret = 0;
+ }
+ }
+ }
+ }
+
+ if (ret == 0)
+ __get_ref(p);
+ }
+ spin_unlock_irqrestore(&hvapi_lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(sun4v_hvapi_register);
+
+void sun4v_hvapi_unregister(unsigned long group)
+{
+ struct api_info *p;
+ unsigned long flags;
+
+ spin_lock_irqsave(&hvapi_lock, flags);
+ p = __get_info(group);
+ if (p)
+ __put_ref(p);
+ spin_unlock_irqrestore(&hvapi_lock, flags);
+}
+EXPORT_SYMBOL(sun4v_hvapi_unregister);
+
+int sun4v_hvapi_get(unsigned long group,
+ unsigned long *major,
+ unsigned long *minor)
+{
+ struct api_info *p;
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&hvapi_lock, flags);
+ ret = -EINVAL;
+ p = __get_info(group);
+ if (p && p->refcnt) {
+ *major = p->major;
+ *minor = p->minor;
+ ret = 0;
+ }
+ spin_unlock_irqrestore(&hvapi_lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(sun4v_hvapi_get);
+
+void __init sun4v_hvapi_init(void)
+{
+ unsigned long group, major, minor;
+
+ group = HV_GRP_SUN4V;
+ major = 1;
+ minor = 0;
+ if (sun4v_hvapi_register(group, major, &minor))
+ goto bad;
+
+ group = HV_GRP_CORE;
+ major = 1;
+ minor = 1;
+ if (sun4v_hvapi_register(group, major, &minor))
+ goto bad;
+
+ return;
+
+bad:
+ prom_printf("HVAPI: Cannot register API group "
+ "%lx with major(%u) minor(%u)\n",
+ group, major, minor);
+ prom_halt();
+}
diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S
new file mode 100644
index 000000000000..8a5f35ffb15e
--- /dev/null
+++ b/arch/sparc/kernel/hvcalls.S
@@ -0,0 +1,800 @@
+ /* %o0: devhandle
+ * %o1: devino
+ *
+ * returns %o0: sysino
+ */
+ENTRY(sun4v_devino_to_sysino)
+ mov HV_FAST_INTR_DEVINO2SYSINO, %o5
+ ta HV_FAST_TRAP
+ retl
+ mov %o1, %o0
+ENDPROC(sun4v_devino_to_sysino)
+
+ /* %o0: sysino
+ *
+ * returns %o0: intr_enabled (HV_INTR_{DISABLED,ENABLED})
+ */
+ENTRY(sun4v_intr_getenabled)
+ mov HV_FAST_INTR_GETENABLED, %o5
+ ta HV_FAST_TRAP
+ retl
+ mov %o1, %o0
+ENDPROC(sun4v_intr_getenabled)
+
+ /* %o0: sysino
+ * %o1: intr_enabled (HV_INTR_{DISABLED,ENABLED})
+ */
+ENTRY(sun4v_intr_setenabled)
+ mov HV_FAST_INTR_SETENABLED, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_intr_setenabled)
+
+ /* %o0: sysino
+ *
+ * returns %o0: intr_state (HV_INTR_STATE_*)
+ */
+ENTRY(sun4v_intr_getstate)
+ mov HV_FAST_INTR_GETSTATE, %o5
+ ta HV_FAST_TRAP
+ retl
+ mov %o1, %o0
+ENDPROC(sun4v_intr_getstate)
+
+ /* %o0: sysino
+ * %o1: intr_state (HV_INTR_STATE_*)
+ */
+ENTRY(sun4v_intr_setstate)
+ mov HV_FAST_INTR_SETSTATE, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_intr_setstate)
+
+ /* %o0: sysino
+ *
+ * returns %o0: cpuid
+ */
+ENTRY(sun4v_intr_gettarget)
+ mov HV_FAST_INTR_GETTARGET, %o5
+ ta HV_FAST_TRAP
+ retl
+ mov %o1, %o0
+ENDPROC(sun4v_intr_gettarget)
+
+ /* %o0: sysino
+ * %o1: cpuid
+ */
+ENTRY(sun4v_intr_settarget)
+ mov HV_FAST_INTR_SETTARGET, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_intr_settarget)
+
+ /* %o0: cpuid
+ * %o1: pc
+ * %o2: rtba
+ * %o3: arg0
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_cpu_start)
+ mov HV_FAST_CPU_START, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_cpu_start)
+
+ /* %o0: cpuid
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_cpu_stop)
+ mov HV_FAST_CPU_STOP, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_cpu_stop)
+
+ /* returns %o0: status */
+ENTRY(sun4v_cpu_yield)
+ mov HV_FAST_CPU_YIELD, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_cpu_yield)
+
+ /* %o0: type
+ * %o1: queue paddr
+ * %o2: num queue entries
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_cpu_qconf)
+ mov HV_FAST_CPU_QCONF, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_cpu_qconf)
+
+ /* %o0: num cpus in cpu list
+ * %o1: cpu list paddr
+ * %o2: mondo block paddr
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_cpu_mondo_send)
+ mov HV_FAST_CPU_MONDO_SEND, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_cpu_mondo_send)
+
+ /* %o0: CPU ID
+ *
+ * returns %o0: -status if status non-zero, else
+ * %o0: cpu state as HV_CPU_STATE_*
+ */
+ENTRY(sun4v_cpu_state)
+ mov HV_FAST_CPU_STATE, %o5
+ ta HV_FAST_TRAP
+ brnz,pn %o0, 1f
+ sub %g0, %o0, %o0
+ mov %o1, %o0
+1: retl
+ nop
+ENDPROC(sun4v_cpu_state)
+
+ /* %o0: virtual address
+ * %o1: must be zero
+ * %o2: TTE
+ * %o3: HV_MMU_* flags
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_mmu_map_perm_addr)
+ mov HV_FAST_MMU_MAP_PERM_ADDR, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_mmu_map_perm_addr)
+
+ /* %o0: number of TSB descriptions
+ * %o1: TSB descriptions real address
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_mmu_tsb_ctx0)
+ mov HV_FAST_MMU_TSB_CTX0, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_mmu_tsb_ctx0)
+
+ /* %o0: API group number
+ * %o1: pointer to unsigned long major number storage
+ * %o2: pointer to unsigned long minor number storage
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_get_version)
+ mov HV_CORE_GET_VER, %o5
+ mov %o1, %o3
+ mov %o2, %o4
+ ta HV_CORE_TRAP
+ stx %o1, [%o3]
+ retl
+ stx %o2, [%o4]
+ENDPROC(sun4v_get_version)
+
+ /* %o0: API group number
+ * %o1: desired major number
+ * %o2: desired minor number
+ * %o3: pointer to unsigned long actual minor number storage
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_set_version)
+ mov HV_CORE_SET_VER, %o5
+ mov %o3, %o4
+ ta HV_CORE_TRAP
+ retl
+ stx %o1, [%o4]
+ENDPROC(sun4v_set_version)
+
+ /* %o0: pointer to unsigned long time
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_tod_get)
+ mov %o0, %o4
+ mov HV_FAST_TOD_GET, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%o4]
+ retl
+ nop
+ENDPROC(sun4v_tod_get)
+
+ /* %o0: time
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_tod_set)
+ mov HV_FAST_TOD_SET, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_tod_set)
+
+ /* %o0: pointer to unsigned long status
+ *
+ * returns %o0: signed character
+ */
+ENTRY(sun4v_con_getchar)
+ mov %o0, %o4
+ mov HV_FAST_CONS_GETCHAR, %o5
+ clr %o0
+ clr %o1
+ ta HV_FAST_TRAP
+ stx %o0, [%o4]
+ retl
+ sra %o1, 0, %o0
+ENDPROC(sun4v_con_getchar)
+
+ /* %o0: signed long character
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_con_putchar)
+ mov HV_FAST_CONS_PUTCHAR, %o5
+ ta HV_FAST_TRAP
+ retl
+ sra %o0, 0, %o0
+ENDPROC(sun4v_con_putchar)
+
+ /* %o0: buffer real address
+ * %o1: buffer size
+ * %o2: pointer to unsigned long bytes_read
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_con_read)
+ mov %o2, %o4
+ mov HV_FAST_CONS_READ, %o5
+ ta HV_FAST_TRAP
+ brnz %o0, 1f
+ cmp %o1, -1 /* break */
+ be,a,pn %icc, 1f
+ mov %o1, %o0
+ cmp %o1, -2 /* hup */
+ be,a,pn %icc, 1f
+ mov %o1, %o0
+ stx %o1, [%o4]
+1: retl
+ nop
+ENDPROC(sun4v_con_read)
+
+ /* %o0: buffer real address
+ * %o1: buffer size
+ * %o2: pointer to unsigned long bytes_written
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_con_write)
+ mov %o2, %o4
+ mov HV_FAST_CONS_WRITE, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%o4]
+ retl
+ nop
+ENDPROC(sun4v_con_write)
+
+ /* %o0: soft state
+ * %o1: address of description string
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_mach_set_soft_state)
+ mov HV_FAST_MACH_SET_SOFT_STATE, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_mach_set_soft_state)
+
+ /* %o0: exit code
+ *
+ * Does not return.
+ */
+ENTRY(sun4v_mach_exit)
+ mov HV_FAST_MACH_EXIT, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_mach_exit)
+
+ /* %o0: buffer real address
+ * %o1: buffer length
+ * %o2: pointer to unsigned long real_buf_len
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_mach_desc)
+ mov %o2, %o4
+ mov HV_FAST_MACH_DESC, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%o4]
+ retl
+ nop
+ENDPROC(sun4v_mach_desc)
+
+ /* %o0: new timeout in milliseconds
+ * %o1: pointer to unsigned long orig_timeout
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_mach_set_watchdog)
+ mov %o1, %o4
+ mov HV_FAST_MACH_SET_WATCHDOG, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%o4]
+ retl
+ nop
+ENDPROC(sun4v_mach_set_watchdog)
+
+ /* No inputs and does not return. */
+ENTRY(sun4v_mach_sir)
+ mov %o1, %o4
+ mov HV_FAST_MACH_SIR, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%o4]
+ retl
+ nop
+ENDPROC(sun4v_mach_sir)
+
+ /* %o0: channel
+ * %o1: ra
+ * %o2: num_entries
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_ldc_tx_qconf)
+ mov HV_FAST_LDC_TX_QCONF, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_ldc_tx_qconf)
+
+ /* %o0: channel
+ * %o1: pointer to unsigned long ra
+ * %o2: pointer to unsigned long num_entries
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_ldc_tx_qinfo)
+ mov %o1, %g1
+ mov %o2, %g2
+ mov HV_FAST_LDC_TX_QINFO, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%g1]
+ stx %o2, [%g2]
+ retl
+ nop
+ENDPROC(sun4v_ldc_tx_qinfo)
+
+ /* %o0: channel
+ * %o1: pointer to unsigned long head_off
+ * %o2: pointer to unsigned long tail_off
+ * %o2: pointer to unsigned long chan_state
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_ldc_tx_get_state)
+ mov %o1, %g1
+ mov %o2, %g2
+ mov %o3, %g3
+ mov HV_FAST_LDC_TX_GET_STATE, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%g1]
+ stx %o2, [%g2]
+ stx %o3, [%g3]
+ retl
+ nop
+ENDPROC(sun4v_ldc_tx_get_state)
+
+ /* %o0: channel
+ * %o1: tail_off
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_ldc_tx_set_qtail)
+ mov HV_FAST_LDC_TX_SET_QTAIL, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_ldc_tx_set_qtail)
+
+ /* %o0: channel
+ * %o1: ra
+ * %o2: num_entries
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_ldc_rx_qconf)
+ mov HV_FAST_LDC_RX_QCONF, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_ldc_rx_qconf)
+
+ /* %o0: channel
+ * %o1: pointer to unsigned long ra
+ * %o2: pointer to unsigned long num_entries
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_ldc_rx_qinfo)
+ mov %o1, %g1
+ mov %o2, %g2
+ mov HV_FAST_LDC_RX_QINFO, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%g1]
+ stx %o2, [%g2]
+ retl
+ nop
+ENDPROC(sun4v_ldc_rx_qinfo)
+
+ /* %o0: channel
+ * %o1: pointer to unsigned long head_off
+ * %o2: pointer to unsigned long tail_off
+ * %o2: pointer to unsigned long chan_state
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_ldc_rx_get_state)
+ mov %o1, %g1
+ mov %o2, %g2
+ mov %o3, %g3
+ mov HV_FAST_LDC_RX_GET_STATE, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%g1]
+ stx %o2, [%g2]
+ stx %o3, [%g3]
+ retl
+ nop
+ENDPROC(sun4v_ldc_rx_get_state)
+
+ /* %o0: channel
+ * %o1: head_off
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_ldc_rx_set_qhead)
+ mov HV_FAST_LDC_RX_SET_QHEAD, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_ldc_rx_set_qhead)
+
+ /* %o0: channel
+ * %o1: ra
+ * %o2: num_entries
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_ldc_set_map_table)
+ mov HV_FAST_LDC_SET_MAP_TABLE, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_ldc_set_map_table)
+
+ /* %o0: channel
+ * %o1: pointer to unsigned long ra
+ * %o2: pointer to unsigned long num_entries
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_ldc_get_map_table)
+ mov %o1, %g1
+ mov %o2, %g2
+ mov HV_FAST_LDC_GET_MAP_TABLE, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%g1]
+ stx %o2, [%g2]
+ retl
+ nop
+ENDPROC(sun4v_ldc_get_map_table)
+
+ /* %o0: channel
+ * %o1: dir_code
+ * %o2: tgt_raddr
+ * %o3: lcl_raddr
+ * %o4: len
+ * %o5: pointer to unsigned long actual_len
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_ldc_copy)
+ mov %o5, %g1
+ mov HV_FAST_LDC_COPY, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%g1]
+ retl
+ nop
+ENDPROC(sun4v_ldc_copy)
+
+ /* %o0: channel
+ * %o1: cookie
+ * %o2: pointer to unsigned long ra
+ * %o3: pointer to unsigned long perm
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_ldc_mapin)
+ mov %o2, %g1
+ mov %o3, %g2
+ mov HV_FAST_LDC_MAPIN, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%g1]
+ stx %o2, [%g2]
+ retl
+ nop
+ENDPROC(sun4v_ldc_mapin)
+
+ /* %o0: ra
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_ldc_unmap)
+ mov HV_FAST_LDC_UNMAP, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_ldc_unmap)
+
+ /* %o0: channel
+ * %o1: cookie
+ * %o2: mte_cookie
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_ldc_revoke)
+ mov HV_FAST_LDC_REVOKE, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_ldc_revoke)
+
+ /* %o0: device handle
+ * %o1: device INO
+ * %o2: pointer to unsigned long cookie
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_vintr_get_cookie)
+ mov %o2, %g1
+ mov HV_FAST_VINTR_GET_COOKIE, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%g1]
+ retl
+ nop
+ENDPROC(sun4v_vintr_get_cookie)
+
+ /* %o0: device handle
+ * %o1: device INO
+ * %o2: cookie
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_vintr_set_cookie)
+ mov HV_FAST_VINTR_SET_COOKIE, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_vintr_set_cookie)
+
+ /* %o0: device handle
+ * %o1: device INO
+ * %o2: pointer to unsigned long valid_state
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_vintr_get_valid)
+ mov %o2, %g1
+ mov HV_FAST_VINTR_GET_VALID, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%g1]
+ retl
+ nop
+ENDPROC(sun4v_vintr_get_valid)
+
+ /* %o0: device handle
+ * %o1: device INO
+ * %o2: valid_state
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_vintr_set_valid)
+ mov HV_FAST_VINTR_SET_VALID, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_vintr_set_valid)
+
+ /* %o0: device handle
+ * %o1: device INO
+ * %o2: pointer to unsigned long state
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_vintr_get_state)
+ mov %o2, %g1
+ mov HV_FAST_VINTR_GET_STATE, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%g1]
+ retl
+ nop
+ENDPROC(sun4v_vintr_get_state)
+
+ /* %o0: device handle
+ * %o1: device INO
+ * %o2: state
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_vintr_set_state)
+ mov HV_FAST_VINTR_SET_STATE, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_vintr_set_state)
+
+ /* %o0: device handle
+ * %o1: device INO
+ * %o2: pointer to unsigned long cpuid
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_vintr_get_target)
+ mov %o2, %g1
+ mov HV_FAST_VINTR_GET_TARGET, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%g1]
+ retl
+ nop
+ENDPROC(sun4v_vintr_get_target)
+
+ /* %o0: device handle
+ * %o1: device INO
+ * %o2: cpuid
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_vintr_set_target)
+ mov HV_FAST_VINTR_SET_TARGET, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_vintr_set_target)
+
+ /* %o0: NCS sub-function
+ * %o1: sub-function arg real-address
+ * %o2: sub-function arg size
+ *
+ * returns %o0: status
+ */
+ENTRY(sun4v_ncs_request)
+ mov HV_FAST_NCS_REQUEST, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_ncs_request)
+
+ENTRY(sun4v_svc_send)
+ save %sp, -192, %sp
+ mov %i0, %o0
+ mov %i1, %o1
+ mov %i2, %o2
+ mov HV_FAST_SVC_SEND, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%i3]
+ ret
+ restore
+ENDPROC(sun4v_svc_send)
+
+ENTRY(sun4v_svc_recv)
+ save %sp, -192, %sp
+ mov %i0, %o0
+ mov %i1, %o1
+ mov %i2, %o2
+ mov HV_FAST_SVC_RECV, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%i3]
+ ret
+ restore
+ENDPROC(sun4v_svc_recv)
+
+ENTRY(sun4v_svc_getstatus)
+ mov HV_FAST_SVC_GETSTATUS, %o5
+ mov %o1, %o4
+ ta HV_FAST_TRAP
+ stx %o1, [%o4]
+ retl
+ nop
+ENDPROC(sun4v_svc_getstatus)
+
+ENTRY(sun4v_svc_setstatus)
+ mov HV_FAST_SVC_SETSTATUS, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_svc_setstatus)
+
+ENTRY(sun4v_svc_clrstatus)
+ mov HV_FAST_SVC_CLRSTATUS, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_svc_clrstatus)
+
+ENTRY(sun4v_mmustat_conf)
+ mov %o1, %o4
+ mov HV_FAST_MMUSTAT_CONF, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%o4]
+ retl
+ nop
+ENDPROC(sun4v_mmustat_conf)
+
+ENTRY(sun4v_mmustat_info)
+ mov %o0, %o4
+ mov HV_FAST_MMUSTAT_INFO, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%o4]
+ retl
+ nop
+ENDPROC(sun4v_mmustat_info)
+
+ENTRY(sun4v_mmu_demap_all)
+ clr %o0
+ clr %o1
+ mov HV_MMU_ALL, %o2
+ mov HV_FAST_MMU_DEMAP_ALL, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_mmu_demap_all)
+
+ENTRY(sun4v_niagara_getperf)
+ mov %o0, %o4
+ mov HV_FAST_GET_PERFREG, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%o4]
+ retl
+ nop
+ENDPROC(sun4v_niagara_getperf)
+
+ENTRY(sun4v_niagara_setperf)
+ mov HV_FAST_SET_PERFREG, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_niagara_setperf)
+
+ENTRY(sun4v_niagara2_getperf)
+ mov %o0, %o4
+ mov HV_FAST_N2_GET_PERFREG, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%o4]
+ retl
+ nop
+ENDPROC(sun4v_niagara2_getperf)
+
+ENTRY(sun4v_niagara2_setperf)
+ mov HV_FAST_N2_SET_PERFREG, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_niagara2_setperf)
diff --git a/arch/sparc/kernel/hvtramp.S b/arch/sparc/kernel/hvtramp.S
new file mode 100644
index 000000000000..9365432904d6
--- /dev/null
+++ b/arch/sparc/kernel/hvtramp.S
@@ -0,0 +1,140 @@
+/* hvtramp.S: Hypervisor start-cpu trampoline code.
+ *
+ * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
+ */
+
+#include <linux/init.h>
+
+#include <asm/thread_info.h>
+#include <asm/hypervisor.h>
+#include <asm/scratchpad.h>
+#include <asm/spitfire.h>
+#include <asm/hvtramp.h>
+#include <asm/pstate.h>
+#include <asm/ptrace.h>
+#include <asm/head.h>
+#include <asm/asi.h>
+#include <asm/pil.h>
+
+ __CPUINIT
+ .align 8
+ .globl hv_cpu_startup, hv_cpu_startup_end
+
+ /* This code executes directly out of the hypervisor
+ * with physical addressing (va==pa). %o0 contains
+ * our client argument which for Linux points to
+ * a descriptor data structure which defines the
+ * MMU entries we need to load up.
+ *
+ * After we set things up we enable the MMU and call
+ * into the kernel.
+ *
+ * First setup basic privileged cpu state.
+ */
+hv_cpu_startup:
+ SET_GL(0)
+ wrpr %g0, PIL_NORMAL_MAX, %pil
+ wrpr %g0, 0, %canrestore
+ wrpr %g0, 0, %otherwin
+ wrpr %g0, 6, %cansave
+ wrpr %g0, 6, %cleanwin
+ wrpr %g0, 0, %cwp
+ wrpr %g0, 0, %wstate
+ wrpr %g0, 0, %tl
+
+ sethi %hi(sparc64_ttable_tl0), %g1
+ wrpr %g1, %tba
+
+ mov %o0, %l0
+
+ lduw [%l0 + HVTRAMP_DESCR_CPU], %g1
+ mov SCRATCHPAD_CPUID, %g2
+ stxa %g1, [%g2] ASI_SCRATCHPAD
+
+ ldx [%l0 + HVTRAMP_DESCR_FAULT_INFO_VA], %g2
+ stxa %g2, [%g0] ASI_SCRATCHPAD
+
+ mov 0, %l1
+ lduw [%l0 + HVTRAMP_DESCR_NUM_MAPPINGS], %l2
+ add %l0, HVTRAMP_DESCR_MAPS, %l3
+
+1: ldx [%l3 + HVTRAMP_MAPPING_VADDR], %o0
+ clr %o1
+ ldx [%l3 + HVTRAMP_MAPPING_TTE], %o2
+ mov HV_MMU_IMMU | HV_MMU_DMMU, %o3
+ mov HV_FAST_MMU_MAP_PERM_ADDR, %o5
+ ta HV_FAST_TRAP
+
+ brnz,pn %o0, 80f
+ nop
+
+ add %l1, 1, %l1
+ cmp %l1, %l2
+ blt,a,pt %xcc, 1b
+ add %l3, HVTRAMP_MAPPING_SIZE, %l3
+
+ ldx [%l0 + HVTRAMP_DESCR_FAULT_INFO_PA], %o0
+ mov HV_FAST_MMU_FAULT_AREA_CONF, %o5
+ ta HV_FAST_TRAP
+
+ brnz,pn %o0, 80f
+ nop
+
+ wrpr %g0, (PSTATE_PRIV | PSTATE_PEF), %pstate
+
+ ldx [%l0 + HVTRAMP_DESCR_THREAD_REG], %l6
+
+ mov 1, %o0
+ set 1f, %o1
+ mov HV_FAST_MMU_ENABLE, %o5
+ ta HV_FAST_TRAP
+
+ ba,pt %xcc, 80f
+ nop
+
+1:
+ wr %g0, 0, %fprs
+ wr %g0, ASI_P, %asi
+
+ mov PRIMARY_CONTEXT, %g7
+ stxa %g0, [%g7] ASI_MMU
+ membar #Sync
+
+ mov SECONDARY_CONTEXT, %g7
+ stxa %g0, [%g7] ASI_MMU
+ membar #Sync
+
+ mov %l6, %g6
+ ldx [%g6 + TI_TASK], %g4
+
+ mov 1, %g5
+ sllx %g5, THREAD_SHIFT, %g5
+ sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5
+ add %g6, %g5, %sp
+ mov 0, %fp
+
+ call init_irqwork_curcpu
+ nop
+ call hard_smp_processor_id
+ nop
+
+ call sun4v_register_mondo_queues
+ nop
+
+ call init_cur_cpu_trap
+ mov %g6, %o0
+
+ wrpr %g0, (PSTATE_PRIV | PSTATE_PEF | PSTATE_IE), %pstate
+
+ call smp_callin
+ nop
+ call cpu_idle
+ mov 0, %o0
+ call cpu_panic
+ nop
+
+80: ba,pt %xcc, 80b
+ nop
+
+ .align 8
+hv_cpu_startup_end:
diff --git a/arch/sparc/kernel/idprom.c b/arch/sparc/kernel/idprom.c
index 223a6582e1e2..c16135e0c151 100644
--- a/arch/sparc/kernel/idprom.c
+++ b/arch/sparc/kernel/idprom.c
@@ -11,35 +11,37 @@
#include <asm/oplib.h>
#include <asm/idprom.h>
-#include <asm/machines.h> /* Fun with Sun released architectures. */
struct idprom *idprom;
static struct idprom idprom_buffer;
+#ifdef CONFIG_SPARC32
+#include <asm/machines.h> /* Fun with Sun released architectures. */
+
/* Here is the master table of Sun machines which use some implementation
* of the Sparc CPU and have a meaningful IDPROM machtype value that we
* know about. See asm-sparc/machines.h for empirical constants.
*/
static struct Sun_Machine_Models Sun_Machines[NUM_SUN_MACHINES] = {
/* First, Sun4's */
-{ "Sun 4/100 Series", (SM_SUN4 | SM_4_110) },
-{ "Sun 4/200 Series", (SM_SUN4 | SM_4_260) },
-{ "Sun 4/300 Series", (SM_SUN4 | SM_4_330) },
-{ "Sun 4/400 Series", (SM_SUN4 | SM_4_470) },
+{ .name = "Sun 4/100 Series", .id_machtype = (SM_SUN4 | SM_4_110) },
+{ .name = "Sun 4/200 Series", .id_machtype = (SM_SUN4 | SM_4_260) },
+{ .name = "Sun 4/300 Series", .id_machtype = (SM_SUN4 | SM_4_330) },
+{ .name = "Sun 4/400 Series", .id_machtype = (SM_SUN4 | SM_4_470) },
/* Now, Sun4c's */
-{ "Sun4c SparcStation 1", (SM_SUN4C | SM_4C_SS1) },
-{ "Sun4c SparcStation IPC", (SM_SUN4C | SM_4C_IPC) },
-{ "Sun4c SparcStation 1+", (SM_SUN4C | SM_4C_SS1PLUS) },
-{ "Sun4c SparcStation SLC", (SM_SUN4C | SM_4C_SLC) },
-{ "Sun4c SparcStation 2", (SM_SUN4C | SM_4C_SS2) },
-{ "Sun4c SparcStation ELC", (SM_SUN4C | SM_4C_ELC) },
-{ "Sun4c SparcStation IPX", (SM_SUN4C | SM_4C_IPX) },
+{ .name = "Sun4c SparcStation 1", .id_machtype = (SM_SUN4C | SM_4C_SS1) },
+{ .name = "Sun4c SparcStation IPC", .id_machtype = (SM_SUN4C | SM_4C_IPC) },
+{ .name = "Sun4c SparcStation 1+", .id_machtype = (SM_SUN4C | SM_4C_SS1PLUS) },
+{ .name = "Sun4c SparcStation SLC", .id_machtype = (SM_SUN4C | SM_4C_SLC) },
+{ .name = "Sun4c SparcStation 2", .id_machtype = (SM_SUN4C | SM_4C_SS2) },
+{ .name = "Sun4c SparcStation ELC", .id_machtype = (SM_SUN4C | SM_4C_ELC) },
+{ .name = "Sun4c SparcStation IPX", .id_machtype = (SM_SUN4C | SM_4C_IPX) },
/* Finally, early Sun4m's */
-{ "Sun4m SparcSystem600", (SM_SUN4M | SM_4M_SS60) },
-{ "Sun4m SparcStation10/20", (SM_SUN4M | SM_4M_SS50) },
-{ "Sun4m SparcStation5", (SM_SUN4M | SM_4M_SS40) },
+{ .name = "Sun4m SparcSystem600", .id_machtype = (SM_SUN4M | SM_4M_SS60) },
+{ .name = "Sun4m SparcStation10/20", .id_machtype = (SM_SUN4M | SM_4M_SS50) },
+{ .name = "Sun4m SparcStation5", .id_machtype = (SM_SUN4M | SM_4M_SS40) },
/* One entry for the OBP arch's which are sun4d, sun4e, and newer sun4m's */
-{ "Sun4M OBP based system", (SM_SUN4M_OBP | 0x0) } };
+{ .name = "Sun4M OBP based system", .id_machtype = (SM_SUN4M_OBP | 0x0) } };
static void __init display_system_type(unsigned char machtype)
{
@@ -47,21 +49,25 @@ static void __init display_system_type(unsigned char machtype)
register int i;
for (i = 0; i < NUM_SUN_MACHINES; i++) {
- if(Sun_Machines[i].id_machtype == machtype) {
+ if (Sun_Machines[i].id_machtype == machtype) {
if (machtype != (SM_SUN4M_OBP | 0x00) ||
prom_getproperty(prom_root_node, "banner-name",
sysname, sizeof(sysname)) <= 0)
- printk("TYPE: %s\n", Sun_Machines[i].name);
+ printk(KERN_WARNING "TYPE: %s\n",
+ Sun_Machines[i].name);
else
- printk("TYPE: %s\n", sysname);
+ printk(KERN_WARNING "TYPE: %s\n", sysname);
return;
}
}
- prom_printf("IDPROM: Bogus id_machtype value, 0x%x\n", machtype);
- prom_halt();
+ prom_printf("IDPROM: Warning, bogus id_machtype value, 0x%x\n", machtype);
}
-
+#else
+static void __init display_system_type(unsigned char machtype)
+{
+}
+#endif
/* Calculate the IDPROM checksum (xor of the data bytes). */
static unsigned char __init calc_idprom_cksum(struct idprom *idprom)
{
@@ -80,21 +86,14 @@ void __init idprom_init(void)
idprom = &idprom_buffer;
- if (idprom->id_format != 0x01) {
- prom_printf("IDPROM: Unknown format type!\n");
- prom_halt();
- }
+ if (idprom->id_format != 0x01)
+ prom_printf("IDPROM: Warning, unknown format type!\n");
- if (idprom->id_cksum != calc_idprom_cksum(idprom)) {
- prom_printf("IDPROM: Checksum failure (nvram=%x, calc=%x)!\n",
+ if (idprom->id_cksum != calc_idprom_cksum(idprom))
+ prom_printf("IDPROM: Warning, checksum failure (nvram=%x, calc=%x)!\n",
idprom->id_cksum, calc_idprom_cksum(idprom));
- prom_halt();
- }
display_system_type(idprom->id_machtype);
- printk("Ethernet address: %x:%x:%x:%x:%x:%x\n",
- idprom->id_ethaddr[0], idprom->id_ethaddr[1],
- idprom->id_ethaddr[2], idprom->id_ethaddr[3],
- idprom->id_ethaddr[4], idprom->id_ethaddr[5]);
+ printk(KERN_WARNING "Ethernet address: %pM\n", idprom->id_ethaddr);
}
diff --git a/arch/sparc/kernel/init_task.c b/arch/sparc/kernel/init_task.c
index 8e64ebc445ef..f28cb8278e98 100644
--- a/arch/sparc/kernel/init_task.c
+++ b/arch/sparc/kernel/init_task.c
@@ -8,7 +8,6 @@
#include <asm/pgtable.h>
#include <asm/uaccess.h>
-static struct fs_struct init_fs = INIT_FS;
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
struct mm_struct init_mm = INIT_MM(init_mm);
@@ -23,6 +22,5 @@ EXPORT_SYMBOL(init_task);
* in etrap.S which assumes it.
*/
union thread_union init_thread_union
- __attribute__((section (".text\"\n\t#")))
- __attribute__((aligned (THREAD_SIZE)))
+ __attribute__((section (".data.init_task")))
= { INIT_THREAD_INFO(init_task) };
diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c
new file mode 100644
index 000000000000..1cc1995531e2
--- /dev/null
+++ b/arch/sparc/kernel/iommu.c
@@ -0,0 +1,866 @@
+/* iommu.c: Generic sparc64 IOMMU support.
+ *
+ * Copyright (C) 1999, 2007, 2008 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1999, 2000 Jakub Jelinek (jakub@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/errno.h>
+#include <linux/iommu-helper.h>
+
+#ifdef CONFIG_PCI
+#include <linux/pci.h>
+#endif
+
+#include <asm/iommu.h>
+
+#include "iommu_common.h"
+
+#define STC_CTXMATCH_ADDR(STC, CTX) \
+ ((STC)->strbuf_ctxmatch_base + ((CTX) << 3))
+#define STC_FLUSHFLAG_INIT(STC) \
+ (*((STC)->strbuf_flushflag) = 0UL)
+#define STC_FLUSHFLAG_SET(STC) \
+ (*((STC)->strbuf_flushflag) != 0UL)
+
+#define iommu_read(__reg) \
+({ u64 __ret; \
+ __asm__ __volatile__("ldxa [%1] %2, %0" \
+ : "=r" (__ret) \
+ : "r" (__reg), "i" (ASI_PHYS_BYPASS_EC_E) \
+ : "memory"); \
+ __ret; \
+})
+#define iommu_write(__reg, __val) \
+ __asm__ __volatile__("stxa %0, [%1] %2" \
+ : /* no outputs */ \
+ : "r" (__val), "r" (__reg), \
+ "i" (ASI_PHYS_BYPASS_EC_E))
+
+/* Must be invoked under the IOMMU lock. */
+static void iommu_flushall(struct iommu *iommu)
+{
+ if (iommu->iommu_flushinv) {
+ iommu_write(iommu->iommu_flushinv, ~(u64)0);
+ } else {
+ unsigned long tag;
+ int entry;
+
+ tag = iommu->iommu_tags;
+ for (entry = 0; entry < 16; entry++) {
+ iommu_write(tag, 0);
+ tag += 8;
+ }
+
+ /* Ensure completion of previous PIO writes. */
+ (void) iommu_read(iommu->write_complete_reg);
+ }
+}
+
+#define IOPTE_CONSISTENT(CTX) \
+ (IOPTE_VALID | IOPTE_CACHE | \
+ (((CTX) << 47) & IOPTE_CONTEXT))
+
+#define IOPTE_STREAMING(CTX) \
+ (IOPTE_CONSISTENT(CTX) | IOPTE_STBUF)
+
+/* Existing mappings are never marked invalid, instead they
+ * are pointed to a dummy page.
+ */
+#define IOPTE_IS_DUMMY(iommu, iopte) \
+ ((iopte_val(*iopte) & IOPTE_PAGE) == (iommu)->dummy_page_pa)
+
+static inline void iopte_make_dummy(struct iommu *iommu, iopte_t *iopte)
+{
+ unsigned long val = iopte_val(*iopte);
+
+ val &= ~IOPTE_PAGE;
+ val |= iommu->dummy_page_pa;
+
+ iopte_val(*iopte) = val;
+}
+
+/* Based almost entirely upon the ppc64 iommu allocator. If you use the 'handle'
+ * facility it must all be done in one pass while under the iommu lock.
+ *
+ * On sun4u platforms, we only flush the IOMMU once every time we've passed
+ * over the entire page table doing allocations. Therefore we only ever advance
+ * the hint and cannot backtrack it.
+ */
+unsigned long iommu_range_alloc(struct device *dev,
+ struct iommu *iommu,
+ unsigned long npages,
+ unsigned long *handle)
+{
+ unsigned long n, end, start, limit, boundary_size;
+ struct iommu_arena *arena = &iommu->arena;
+ int pass = 0;
+
+ /* This allocator was derived from x86_64's bit string search */
+
+ /* Sanity check */
+ if (unlikely(npages == 0)) {
+ if (printk_ratelimit())
+ WARN_ON(1);
+ return DMA_ERROR_CODE;
+ }
+
+ if (handle && *handle)
+ start = *handle;
+ else
+ start = arena->hint;
+
+ limit = arena->limit;
+
+ /* The case below can happen if we have a small segment appended
+ * to a large, or when the previous alloc was at the very end of
+ * the available space. If so, go back to the beginning and flush.
+ */
+ if (start >= limit) {
+ start = 0;
+ if (iommu->flush_all)
+ iommu->flush_all(iommu);
+ }
+
+ again:
+
+ if (dev)
+ boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
+ 1 << IO_PAGE_SHIFT);
+ else
+ boundary_size = ALIGN(1UL << 32, 1 << IO_PAGE_SHIFT);
+
+ n = iommu_area_alloc(arena->map, limit, start, npages,
+ iommu->page_table_map_base >> IO_PAGE_SHIFT,
+ boundary_size >> IO_PAGE_SHIFT, 0);
+ if (n == -1) {
+ if (likely(pass < 1)) {
+ /* First failure, rescan from the beginning. */
+ start = 0;
+ if (iommu->flush_all)
+ iommu->flush_all(iommu);
+ pass++;
+ goto again;
+ } else {
+ /* Second failure, give up */
+ return DMA_ERROR_CODE;
+ }
+ }
+
+ end = n + npages;
+
+ arena->hint = end;
+
+ /* Update handle for SG allocations */
+ if (handle)
+ *handle = end;
+
+ return n;
+}
+
+void iommu_range_free(struct iommu *iommu, dma_addr_t dma_addr, unsigned long npages)
+{
+ struct iommu_arena *arena = &iommu->arena;
+ unsigned long entry;
+
+ entry = (dma_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT;
+
+ iommu_area_free(arena->map, entry, npages);
+}
+
+int iommu_table_init(struct iommu *iommu, int tsbsize,
+ u32 dma_offset, u32 dma_addr_mask,
+ int numa_node)
+{
+ unsigned long i, order, sz, num_tsb_entries;
+ struct page *page;
+
+ num_tsb_entries = tsbsize / sizeof(iopte_t);
+
+ /* Setup initial software IOMMU state. */
+ spin_lock_init(&iommu->lock);
+ iommu->ctx_lowest_free = 1;
+ iommu->page_table_map_base = dma_offset;
+ iommu->dma_addr_mask = dma_addr_mask;
+
+ /* Allocate and initialize the free area map. */
+ sz = num_tsb_entries / 8;
+ sz = (sz + 7UL) & ~7UL;
+ iommu->arena.map = kmalloc_node(sz, GFP_KERNEL, numa_node);
+ if (!iommu->arena.map) {
+ printk(KERN_ERR "IOMMU: Error, kmalloc(arena.map) failed.\n");
+ return -ENOMEM;
+ }
+ memset(iommu->arena.map, 0, sz);
+ iommu->arena.limit = num_tsb_entries;
+
+ if (tlb_type != hypervisor)
+ iommu->flush_all = iommu_flushall;
+
+ /* Allocate and initialize the dummy page which we
+ * set inactive IO PTEs to point to.
+ */
+ page = alloc_pages_node(numa_node, GFP_KERNEL, 0);
+ if (!page) {
+ printk(KERN_ERR "IOMMU: Error, gfp(dummy_page) failed.\n");
+ goto out_free_map;
+ }
+ iommu->dummy_page = (unsigned long) page_address(page);
+ memset((void *)iommu->dummy_page, 0, PAGE_SIZE);
+ iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page);
+
+ /* Now allocate and setup the IOMMU page table itself. */
+ order = get_order(tsbsize);
+ page = alloc_pages_node(numa_node, GFP_KERNEL, order);
+ if (!page) {
+ printk(KERN_ERR "IOMMU: Error, gfp(tsb) failed.\n");
+ goto out_free_dummy_page;
+ }
+ iommu->page_table = (iopte_t *)page_address(page);
+
+ for (i = 0; i < num_tsb_entries; i++)
+ iopte_make_dummy(iommu, &iommu->page_table[i]);
+
+ return 0;
+
+out_free_dummy_page:
+ free_page(iommu->dummy_page);
+ iommu->dummy_page = 0UL;
+
+out_free_map:
+ kfree(iommu->arena.map);
+ iommu->arena.map = NULL;
+
+ return -ENOMEM;
+}
+
+static inline iopte_t *alloc_npages(struct device *dev, struct iommu *iommu,
+ unsigned long npages)
+{
+ unsigned long entry;
+
+ entry = iommu_range_alloc(dev, iommu, npages, NULL);
+ if (unlikely(entry == DMA_ERROR_CODE))
+ return NULL;
+
+ return iommu->page_table + entry;
+}
+
+static int iommu_alloc_ctx(struct iommu *iommu)
+{
+ int lowest = iommu->ctx_lowest_free;
+ int sz = IOMMU_NUM_CTXS - lowest;
+ int n = find_next_zero_bit(iommu->ctx_bitmap, sz, lowest);
+
+ if (unlikely(n == sz)) {
+ n = find_next_zero_bit(iommu->ctx_bitmap, lowest, 1);
+ if (unlikely(n == lowest)) {
+ printk(KERN_WARNING "IOMMU: Ran out of contexts.\n");
+ n = 0;
+ }
+ }
+ if (n)
+ __set_bit(n, iommu->ctx_bitmap);
+
+ return n;
+}
+
+static inline void iommu_free_ctx(struct iommu *iommu, int ctx)
+{
+ if (likely(ctx)) {
+ __clear_bit(ctx, iommu->ctx_bitmap);
+ if (ctx < iommu->ctx_lowest_free)
+ iommu->ctx_lowest_free = ctx;
+ }
+}
+
+static void *dma_4u_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_addrp, gfp_t gfp)
+{
+ unsigned long flags, order, first_page;
+ struct iommu *iommu;
+ struct page *page;
+ int npages, nid;
+ iopte_t *iopte;
+ void *ret;
+
+ size = IO_PAGE_ALIGN(size);
+ order = get_order(size);
+ if (order >= 10)
+ return NULL;
+
+ nid = dev->archdata.numa_node;
+ page = alloc_pages_node(nid, gfp, order);
+ if (unlikely(!page))
+ return NULL;
+
+ first_page = (unsigned long) page_address(page);
+ memset((char *)first_page, 0, PAGE_SIZE << order);
+
+ iommu = dev->archdata.iommu;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+ iopte = alloc_npages(dev, iommu, size >> IO_PAGE_SHIFT);
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+ if (unlikely(iopte == NULL)) {
+ free_pages(first_page, order);
+ return NULL;
+ }
+
+ *dma_addrp = (iommu->page_table_map_base +
+ ((iopte - iommu->page_table) << IO_PAGE_SHIFT));
+ ret = (void *) first_page;
+ npages = size >> IO_PAGE_SHIFT;
+ first_page = __pa(first_page);
+ while (npages--) {
+ iopte_val(*iopte) = (IOPTE_CONSISTENT(0UL) |
+ IOPTE_WRITE |
+ (first_page & IOPTE_PAGE));
+ iopte++;
+ first_page += IO_PAGE_SIZE;
+ }
+
+ return ret;
+}
+
+static void dma_4u_free_coherent(struct device *dev, size_t size,
+ void *cpu, dma_addr_t dvma)
+{
+ struct iommu *iommu;
+ iopte_t *iopte;
+ unsigned long flags, order, npages;
+
+ npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
+ iommu = dev->archdata.iommu;
+ iopte = iommu->page_table +
+ ((dvma - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
+
+ spin_lock_irqsave(&iommu->lock, flags);
+
+ iommu_range_free(iommu, dvma, npages);
+
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+ order = get_order(size);
+ if (order < 10)
+ free_pages((unsigned long)cpu, order);
+}
+
+static dma_addr_t dma_4u_map_single(struct device *dev, void *ptr, size_t sz,
+ enum dma_data_direction direction)
+{
+ struct iommu *iommu;
+ struct strbuf *strbuf;
+ iopte_t *base;
+ unsigned long flags, npages, oaddr;
+ unsigned long i, base_paddr, ctx;
+ u32 bus_addr, ret;
+ unsigned long iopte_protection;
+
+ iommu = dev->archdata.iommu;
+ strbuf = dev->archdata.stc;
+
+ if (unlikely(direction == DMA_NONE))
+ goto bad_no_ctx;
+
+ oaddr = (unsigned long)ptr;
+ npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
+ npages >>= IO_PAGE_SHIFT;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+ base = alloc_npages(dev, iommu, npages);
+ ctx = 0;
+ if (iommu->iommu_ctxflush)
+ ctx = iommu_alloc_ctx(iommu);
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+ if (unlikely(!base))
+ goto bad;
+
+ bus_addr = (iommu->page_table_map_base +
+ ((base - iommu->page_table) << IO_PAGE_SHIFT));
+ ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
+ base_paddr = __pa(oaddr & IO_PAGE_MASK);
+ if (strbuf->strbuf_enabled)
+ iopte_protection = IOPTE_STREAMING(ctx);
+ else
+ iopte_protection = IOPTE_CONSISTENT(ctx);
+ if (direction != DMA_TO_DEVICE)
+ iopte_protection |= IOPTE_WRITE;
+
+ for (i = 0; i < npages; i++, base++, base_paddr += IO_PAGE_SIZE)
+ iopte_val(*base) = iopte_protection | base_paddr;
+
+ return ret;
+
+bad:
+ iommu_free_ctx(iommu, ctx);
+bad_no_ctx:
+ if (printk_ratelimit())
+ WARN_ON(1);
+ return DMA_ERROR_CODE;
+}
+
+static void strbuf_flush(struct strbuf *strbuf, struct iommu *iommu,
+ u32 vaddr, unsigned long ctx, unsigned long npages,
+ enum dma_data_direction direction)
+{
+ int limit;
+
+ if (strbuf->strbuf_ctxflush &&
+ iommu->iommu_ctxflush) {
+ unsigned long matchreg, flushreg;
+ u64 val;
+
+ flushreg = strbuf->strbuf_ctxflush;
+ matchreg = STC_CTXMATCH_ADDR(strbuf, ctx);
+
+ iommu_write(flushreg, ctx);
+ val = iommu_read(matchreg);
+ val &= 0xffff;
+ if (!val)
+ goto do_flush_sync;
+
+ while (val) {
+ if (val & 0x1)
+ iommu_write(flushreg, ctx);
+ val >>= 1;
+ }
+ val = iommu_read(matchreg);
+ if (unlikely(val)) {
+ printk(KERN_WARNING "strbuf_flush: ctx flush "
+ "timeout matchreg[%lx] ctx[%lx]\n",
+ val, ctx);
+ goto do_page_flush;
+ }
+ } else {
+ unsigned long i;
+
+ do_page_flush:
+ for (i = 0; i < npages; i++, vaddr += IO_PAGE_SIZE)
+ iommu_write(strbuf->strbuf_pflush, vaddr);
+ }
+
+do_flush_sync:
+ /* If the device could not have possibly put dirty data into
+ * the streaming cache, no flush-flag synchronization needs
+ * to be performed.
+ */
+ if (direction == DMA_TO_DEVICE)
+ return;
+
+ STC_FLUSHFLAG_INIT(strbuf);
+ iommu_write(strbuf->strbuf_fsync, strbuf->strbuf_flushflag_pa);
+ (void) iommu_read(iommu->write_complete_reg);
+
+ limit = 100000;
+ while (!STC_FLUSHFLAG_SET(strbuf)) {
+ limit--;
+ if (!limit)
+ break;
+ udelay(1);
+ rmb();
+ }
+ if (!limit)
+ printk(KERN_WARNING "strbuf_flush: flushflag timeout "
+ "vaddr[%08x] ctx[%lx] npages[%ld]\n",
+ vaddr, ctx, npages);
+}
+
+static void dma_4u_unmap_single(struct device *dev, dma_addr_t bus_addr,
+ size_t sz, enum dma_data_direction direction)
+{
+ struct iommu *iommu;
+ struct strbuf *strbuf;
+ iopte_t *base;
+ unsigned long flags, npages, ctx, i;
+
+ if (unlikely(direction == DMA_NONE)) {
+ if (printk_ratelimit())
+ WARN_ON(1);
+ return;
+ }
+
+ iommu = dev->archdata.iommu;
+ strbuf = dev->archdata.stc;
+
+ npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
+ npages >>= IO_PAGE_SHIFT;
+ base = iommu->page_table +
+ ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
+ bus_addr &= IO_PAGE_MASK;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+
+ /* Record the context, if any. */
+ ctx = 0;
+ if (iommu->iommu_ctxflush)
+ ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL;
+
+ /* Step 1: Kick data out of streaming buffers if necessary. */
+ if (strbuf->strbuf_enabled)
+ strbuf_flush(strbuf, iommu, bus_addr, ctx,
+ npages, direction);
+
+ /* Step 2: Clear out TSB entries. */
+ for (i = 0; i < npages; i++)
+ iopte_make_dummy(iommu, base + i);
+
+ iommu_range_free(iommu, bus_addr, npages);
+
+ iommu_free_ctx(iommu, ctx);
+
+ spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
+static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
+ int nelems, enum dma_data_direction direction)
+{
+ struct scatterlist *s, *outs, *segstart;
+ unsigned long flags, handle, prot, ctx;
+ dma_addr_t dma_next = 0, dma_addr;
+ unsigned int max_seg_size;
+ unsigned long seg_boundary_size;
+ int outcount, incount, i;
+ struct strbuf *strbuf;
+ struct iommu *iommu;
+ unsigned long base_shift;
+
+ BUG_ON(direction == DMA_NONE);
+
+ iommu = dev->archdata.iommu;
+ strbuf = dev->archdata.stc;
+ if (nelems == 0 || !iommu)
+ return 0;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+
+ ctx = 0;
+ if (iommu->iommu_ctxflush)
+ ctx = iommu_alloc_ctx(iommu);
+
+ if (strbuf->strbuf_enabled)
+ prot = IOPTE_STREAMING(ctx);
+ else
+ prot = IOPTE_CONSISTENT(ctx);
+ if (direction != DMA_TO_DEVICE)
+ prot |= IOPTE_WRITE;
+
+ outs = s = segstart = &sglist[0];
+ outcount = 1;
+ incount = nelems;
+ handle = 0;
+
+ /* Init first segment length for backout at failure */
+ outs->dma_length = 0;
+
+ max_seg_size = dma_get_max_seg_size(dev);
+ seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
+ IO_PAGE_SIZE) >> IO_PAGE_SHIFT;
+ base_shift = iommu->page_table_map_base >> IO_PAGE_SHIFT;
+ for_each_sg(sglist, s, nelems, i) {
+ unsigned long paddr, npages, entry, out_entry = 0, slen;
+ iopte_t *base;
+
+ slen = s->length;
+ /* Sanity check */
+ if (slen == 0) {
+ dma_next = 0;
+ continue;
+ }
+ /* Allocate iommu entries for that segment */
+ paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s);
+ npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE);
+ entry = iommu_range_alloc(dev, iommu, npages, &handle);
+
+ /* Handle failure */
+ if (unlikely(entry == DMA_ERROR_CODE)) {
+ if (printk_ratelimit())
+ printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx"
+ " npages %lx\n", iommu, paddr, npages);
+ goto iommu_map_failed;
+ }
+
+ base = iommu->page_table + entry;
+
+ /* Convert entry to a dma_addr_t */
+ dma_addr = iommu->page_table_map_base +
+ (entry << IO_PAGE_SHIFT);
+ dma_addr |= (s->offset & ~IO_PAGE_MASK);
+
+ /* Insert into HW table */
+ paddr &= IO_PAGE_MASK;
+ while (npages--) {
+ iopte_val(*base) = prot | paddr;
+ base++;
+ paddr += IO_PAGE_SIZE;
+ }
+
+ /* If we are in an open segment, try merging */
+ if (segstart != s) {
+ /* We cannot merge if:
+ * - allocated dma_addr isn't contiguous to previous allocation
+ */
+ if ((dma_addr != dma_next) ||
+ (outs->dma_length + s->length > max_seg_size) ||
+ (is_span_boundary(out_entry, base_shift,
+ seg_boundary_size, outs, s))) {
+ /* Can't merge: create a new segment */
+ segstart = s;
+ outcount++;
+ outs = sg_next(outs);
+ } else {
+ outs->dma_length += s->length;
+ }
+ }
+
+ if (segstart == s) {
+ /* This is a new segment, fill entries */
+ outs->dma_address = dma_addr;
+ outs->dma_length = slen;
+ out_entry = entry;
+ }
+
+ /* Calculate next page pointer for contiguous check */
+ dma_next = dma_addr + slen;
+ }
+
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+ if (outcount < incount) {
+ outs = sg_next(outs);
+ outs->dma_address = DMA_ERROR_CODE;
+ outs->dma_length = 0;
+ }
+
+ return outcount;
+
+iommu_map_failed:
+ for_each_sg(sglist, s, nelems, i) {
+ if (s->dma_length != 0) {
+ unsigned long vaddr, npages, entry, j;
+ iopte_t *base;
+
+ vaddr = s->dma_address & IO_PAGE_MASK;
+ npages = iommu_num_pages(s->dma_address, s->dma_length,
+ IO_PAGE_SIZE);
+ iommu_range_free(iommu, vaddr, npages);
+
+ entry = (vaddr - iommu->page_table_map_base)
+ >> IO_PAGE_SHIFT;
+ base = iommu->page_table + entry;
+
+ for (j = 0; j < npages; j++)
+ iopte_make_dummy(iommu, base + j);
+
+ s->dma_address = DMA_ERROR_CODE;
+ s->dma_length = 0;
+ }
+ if (s == outs)
+ break;
+ }
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+ return 0;
+}
+
+/* If contexts are being used, they are the same in all of the mappings
+ * we make for a particular SG.
+ */
+static unsigned long fetch_sg_ctx(struct iommu *iommu, struct scatterlist *sg)
+{
+ unsigned long ctx = 0;
+
+ if (iommu->iommu_ctxflush) {
+ iopte_t *base;
+ u32 bus_addr;
+
+ bus_addr = sg->dma_address & IO_PAGE_MASK;
+ base = iommu->page_table +
+ ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
+
+ ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL;
+ }
+ return ctx;
+}
+
+static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist,
+ int nelems, enum dma_data_direction direction)
+{
+ unsigned long flags, ctx;
+ struct scatterlist *sg;
+ struct strbuf *strbuf;
+ struct iommu *iommu;
+
+ BUG_ON(direction == DMA_NONE);
+
+ iommu = dev->archdata.iommu;
+ strbuf = dev->archdata.stc;
+
+ ctx = fetch_sg_ctx(iommu, sglist);
+
+ spin_lock_irqsave(&iommu->lock, flags);
+
+ sg = sglist;
+ while (nelems--) {
+ dma_addr_t dma_handle = sg->dma_address;
+ unsigned int len = sg->dma_length;
+ unsigned long npages, entry;
+ iopte_t *base;
+ int i;
+
+ if (!len)
+ break;
+ npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE);
+ iommu_range_free(iommu, dma_handle, npages);
+
+ entry = ((dma_handle - iommu->page_table_map_base)
+ >> IO_PAGE_SHIFT);
+ base = iommu->page_table + entry;
+
+ dma_handle &= IO_PAGE_MASK;
+ if (strbuf->strbuf_enabled)
+ strbuf_flush(strbuf, iommu, dma_handle, ctx,
+ npages, direction);
+
+ for (i = 0; i < npages; i++)
+ iopte_make_dummy(iommu, base + i);
+
+ sg = sg_next(sg);
+ }
+
+ iommu_free_ctx(iommu, ctx);
+
+ spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
+static void dma_4u_sync_single_for_cpu(struct device *dev,
+ dma_addr_t bus_addr, size_t sz,
+ enum dma_data_direction direction)
+{
+ struct iommu *iommu;
+ struct strbuf *strbuf;
+ unsigned long flags, ctx, npages;
+
+ iommu = dev->archdata.iommu;
+ strbuf = dev->archdata.stc;
+
+ if (!strbuf->strbuf_enabled)
+ return;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+
+ npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
+ npages >>= IO_PAGE_SHIFT;
+ bus_addr &= IO_PAGE_MASK;
+
+ /* Step 1: Record the context, if any. */
+ ctx = 0;
+ if (iommu->iommu_ctxflush &&
+ strbuf->strbuf_ctxflush) {
+ iopte_t *iopte;
+
+ iopte = iommu->page_table +
+ ((bus_addr - iommu->page_table_map_base)>>IO_PAGE_SHIFT);
+ ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL;
+ }
+
+ /* Step 2: Kick data out of streaming buffers. */
+ strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction);
+
+ spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
+static void dma_4u_sync_sg_for_cpu(struct device *dev,
+ struct scatterlist *sglist, int nelems,
+ enum dma_data_direction direction)
+{
+ struct iommu *iommu;
+ struct strbuf *strbuf;
+ unsigned long flags, ctx, npages, i;
+ struct scatterlist *sg, *sgprv;
+ u32 bus_addr;
+
+ iommu = dev->archdata.iommu;
+ strbuf = dev->archdata.stc;
+
+ if (!strbuf->strbuf_enabled)
+ return;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+
+ /* Step 1: Record the context, if any. */
+ ctx = 0;
+ if (iommu->iommu_ctxflush &&
+ strbuf->strbuf_ctxflush) {
+ iopte_t *iopte;
+
+ iopte = iommu->page_table +
+ ((sglist[0].dma_address - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
+ ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL;
+ }
+
+ /* Step 2: Kick data out of streaming buffers. */
+ bus_addr = sglist[0].dma_address & IO_PAGE_MASK;
+ sgprv = NULL;
+ for_each_sg(sglist, sg, nelems, i) {
+ if (sg->dma_length == 0)
+ break;
+ sgprv = sg;
+ }
+
+ npages = (IO_PAGE_ALIGN(sgprv->dma_address + sgprv->dma_length)
+ - bus_addr) >> IO_PAGE_SHIFT;
+ strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction);
+
+ spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
+static const struct dma_ops sun4u_dma_ops = {
+ .alloc_coherent = dma_4u_alloc_coherent,
+ .free_coherent = dma_4u_free_coherent,
+ .map_single = dma_4u_map_single,
+ .unmap_single = dma_4u_unmap_single,
+ .map_sg = dma_4u_map_sg,
+ .unmap_sg = dma_4u_unmap_sg,
+ .sync_single_for_cpu = dma_4u_sync_single_for_cpu,
+ .sync_sg_for_cpu = dma_4u_sync_sg_for_cpu,
+};
+
+const struct dma_ops *dma_ops = &sun4u_dma_ops;
+EXPORT_SYMBOL(dma_ops);
+
+int dma_supported(struct device *dev, u64 device_mask)
+{
+ struct iommu *iommu = dev->archdata.iommu;
+ u64 dma_addr_mask = iommu->dma_addr_mask;
+
+ if (device_mask >= (1UL << 32UL))
+ return 0;
+
+ if ((device_mask & dma_addr_mask) == dma_addr_mask)
+ return 1;
+
+#ifdef CONFIG_PCI
+ if (dev->bus == &pci_bus_type)
+ return pci_dma_supported(to_pci_dev(dev), device_mask);
+#endif
+
+ return 0;
+}
+EXPORT_SYMBOL(dma_supported);
+
+int dma_set_mask(struct device *dev, u64 dma_mask)
+{
+#ifdef CONFIG_PCI
+ if (dev->bus == &pci_bus_type)
+ return pci_set_dma_mask(to_pci_dev(dev), dma_mask);
+#endif
+ return -EINVAL;
+}
+EXPORT_SYMBOL(dma_set_mask);
diff --git a/arch/sparc/kernel/iommu_common.h b/arch/sparc/kernel/iommu_common.h
new file mode 100644
index 000000000000..591f5879039c
--- /dev/null
+++ b/arch/sparc/kernel/iommu_common.h
@@ -0,0 +1,59 @@
+/* iommu_common.h: UltraSparc SBUS/PCI common iommu declarations.
+ *
+ * Copyright (C) 1999, 2008 David S. Miller (davem@davemloft.net)
+ */
+
+#ifndef _IOMMU_COMMON_H
+#define _IOMMU_COMMON_H
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/scatterlist.h>
+#include <linux/device.h>
+#include <linux/iommu-helper.h>
+
+#include <asm/iommu.h>
+#include <asm/scatterlist.h>
+
+/*
+ * These give mapping size of each iommu pte/tlb.
+ */
+#define IO_PAGE_SHIFT 13
+#define IO_PAGE_SIZE (1UL << IO_PAGE_SHIFT)
+#define IO_PAGE_MASK (~(IO_PAGE_SIZE-1))
+#define IO_PAGE_ALIGN(addr) ALIGN(addr, IO_PAGE_SIZE)
+
+#define IO_TSB_ENTRIES (128*1024)
+#define IO_TSB_SIZE (IO_TSB_ENTRIES * 8)
+
+/*
+ * This is the hardwired shift in the iotlb tag/data parts.
+ */
+#define IOMMU_PAGE_SHIFT 13
+
+#define SG_ENT_PHYS_ADDRESS(SG) (__pa(sg_virt((SG))))
+
+static inline int is_span_boundary(unsigned long entry,
+ unsigned long shift,
+ unsigned long boundary_size,
+ struct scatterlist *outs,
+ struct scatterlist *sg)
+{
+ unsigned long paddr = SG_ENT_PHYS_ADDRESS(outs);
+ int nr = iommu_num_pages(paddr, outs->dma_length + sg->length,
+ IO_PAGE_SIZE);
+
+ return iommu_is_span_boundary(entry, nr, shift, boundary_size);
+}
+
+extern unsigned long iommu_range_alloc(struct device *dev,
+ struct iommu *iommu,
+ unsigned long npages,
+ unsigned long *handle);
+extern void iommu_range_free(struct iommu *iommu,
+ dma_addr_t dma_addr,
+ unsigned long npages);
+
+#endif /* _IOMMU_COMMON_H */
diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c
index 4f025b36934b..7ce14f05eb48 100644
--- a/arch/sparc/kernel/ioport.c
+++ b/arch/sparc/kernel/ioport.c
@@ -552,8 +552,8 @@ int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sgl, int nents,
/* IIep is write-through, not flushing. */
for_each_sg(sgl, sg, nents, n) {
BUG_ON(page_address(sg_page(sg)) == NULL);
- sg->dvma_address = virt_to_phys(sg_virt(sg));
- sg->dvma_length = sg->length;
+ sg->dma_address = virt_to_phys(sg_virt(sg));
+ sg->dma_length = sg->length;
}
return nents;
}
diff --git a/arch/sparc/kernel/irq.c b/arch/sparc/kernel/irq_32.c
index 93e1d1c65290..f3488c45d57a 100644
--- a/arch/sparc/kernel/irq.c
+++ b/arch/sparc/kernel/irq_32.c
@@ -46,6 +46,7 @@
#include <asm/cacheflush.h>
#include <asm/irq_regs.h>
+#include "kernel.h"
#include "irq.h"
#ifdef CONFIG_SMP
@@ -592,19 +593,19 @@ EXPORT_SYMBOL(request_irq);
void disable_irq_nosync(unsigned int irq)
{
- return __disable_irq(irq);
+ __disable_irq(irq);
}
EXPORT_SYMBOL(disable_irq_nosync);
void disable_irq(unsigned int irq)
{
- return __disable_irq(irq);
+ __disable_irq(irq);
}
EXPORT_SYMBOL(disable_irq);
void enable_irq(unsigned int irq)
{
- return __enable_irq(irq);
+ __enable_irq(irq);
}
EXPORT_SYMBOL(enable_irq);
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
new file mode 100644
index 000000000000..cab8e0286871
--- /dev/null
+++ b/arch/sparc/kernel/irq_64.c
@@ -0,0 +1,1104 @@
+/* irq.c: UltraSparc IRQ handling/init/registry.
+ *
+ * Copyright (C) 1997, 2007, 2008 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be)
+ * Copyright (C) 1998 Jakub Jelinek (jj@ultra.linux.cz)
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/linkage.h>
+#include <linux/ptrace.h>
+#include <linux/errno.h>
+#include <linux/kernel_stat.h>
+#include <linux/signal.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/bootmem.h>
+#include <linux/irq.h>
+
+#include <asm/ptrace.h>
+#include <asm/processor.h>
+#include <asm/atomic.h>
+#include <asm/system.h>
+#include <asm/irq.h>
+#include <asm/io.h>
+#include <asm/iommu.h>
+#include <asm/upa.h>
+#include <asm/oplib.h>
+#include <asm/prom.h>
+#include <asm/timer.h>
+#include <asm/smp.h>
+#include <asm/starfire.h>
+#include <asm/uaccess.h>
+#include <asm/cache.h>
+#include <asm/cpudata.h>
+#include <asm/auxio.h>
+#include <asm/head.h>
+#include <asm/hypervisor.h>
+#include <asm/cacheflush.h>
+
+#include "entry.h"
+
+#define NUM_IVECS (IMAP_INR + 1)
+
+struct ino_bucket *ivector_table;
+unsigned long ivector_table_pa;
+
+/* On several sun4u processors, it is illegal to mix bypass and
+ * non-bypass accesses. Therefore we access all INO buckets
+ * using bypass accesses only.
+ */
+static unsigned long bucket_get_chain_pa(unsigned long bucket_pa)
+{
+ unsigned long ret;
+
+ __asm__ __volatile__("ldxa [%1] %2, %0"
+ : "=&r" (ret)
+ : "r" (bucket_pa +
+ offsetof(struct ino_bucket,
+ __irq_chain_pa)),
+ "i" (ASI_PHYS_USE_EC));
+
+ return ret;
+}
+
+static void bucket_clear_chain_pa(unsigned long bucket_pa)
+{
+ __asm__ __volatile__("stxa %%g0, [%0] %1"
+ : /* no outputs */
+ : "r" (bucket_pa +
+ offsetof(struct ino_bucket,
+ __irq_chain_pa)),
+ "i" (ASI_PHYS_USE_EC));
+}
+
+static unsigned int bucket_get_virt_irq(unsigned long bucket_pa)
+{
+ unsigned int ret;
+
+ __asm__ __volatile__("lduwa [%1] %2, %0"
+ : "=&r" (ret)
+ : "r" (bucket_pa +
+ offsetof(struct ino_bucket,
+ __virt_irq)),
+ "i" (ASI_PHYS_USE_EC));
+
+ return ret;
+}
+
+static void bucket_set_virt_irq(unsigned long bucket_pa,
+ unsigned int virt_irq)
+{
+ __asm__ __volatile__("stwa %0, [%1] %2"
+ : /* no outputs */
+ : "r" (virt_irq),
+ "r" (bucket_pa +
+ offsetof(struct ino_bucket,
+ __virt_irq)),
+ "i" (ASI_PHYS_USE_EC));
+}
+
+#define irq_work_pa(__cpu) &(trap_block[(__cpu)].irq_worklist_pa)
+
+static struct {
+ unsigned int dev_handle;
+ unsigned int dev_ino;
+ unsigned int in_use;
+} virt_irq_table[NR_IRQS];
+static DEFINE_SPINLOCK(virt_irq_alloc_lock);
+
+unsigned char virt_irq_alloc(unsigned int dev_handle,
+ unsigned int dev_ino)
+{
+ unsigned long flags;
+ unsigned char ent;
+
+ BUILD_BUG_ON(NR_IRQS >= 256);
+
+ spin_lock_irqsave(&virt_irq_alloc_lock, flags);
+
+ for (ent = 1; ent < NR_IRQS; ent++) {
+ if (!virt_irq_table[ent].in_use)
+ break;
+ }
+ if (ent >= NR_IRQS) {
+ printk(KERN_ERR "IRQ: Out of virtual IRQs.\n");
+ ent = 0;
+ } else {
+ virt_irq_table[ent].dev_handle = dev_handle;
+ virt_irq_table[ent].dev_ino = dev_ino;
+ virt_irq_table[ent].in_use = 1;
+ }
+
+ spin_unlock_irqrestore(&virt_irq_alloc_lock, flags);
+
+ return ent;
+}
+
+#ifdef CONFIG_PCI_MSI
+void virt_irq_free(unsigned int virt_irq)
+{
+ unsigned long flags;
+
+ if (virt_irq >= NR_IRQS)
+ return;
+
+ spin_lock_irqsave(&virt_irq_alloc_lock, flags);
+
+ virt_irq_table[virt_irq].in_use = 0;
+
+ spin_unlock_irqrestore(&virt_irq_alloc_lock, flags);
+}
+#endif
+
+/*
+ * /proc/interrupts printing:
+ */
+
+int show_interrupts(struct seq_file *p, void *v)
+{
+ int i = *(loff_t *) v, j;
+ struct irqaction * action;
+ unsigned long flags;
+
+ if (i == 0) {
+ seq_printf(p, " ");
+ for_each_online_cpu(j)
+ seq_printf(p, "CPU%d ",j);
+ seq_putc(p, '\n');
+ }
+
+ if (i < NR_IRQS) {
+ spin_lock_irqsave(&irq_desc[i].lock, flags);
+ action = irq_desc[i].action;
+ if (!action)
+ goto skip;
+ seq_printf(p, "%3d: ",i);
+#ifndef CONFIG_SMP
+ seq_printf(p, "%10u ", kstat_irqs(i));
+#else
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+#endif
+ seq_printf(p, " %9s", irq_desc[i].chip->typename);
+ seq_printf(p, " %s", action->name);
+
+ for (action=action->next; action; action = action->next)
+ seq_printf(p, ", %s", action->name);
+
+ seq_putc(p, '\n');
+skip:
+ spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+ }
+ return 0;
+}
+
+static unsigned int sun4u_compute_tid(unsigned long imap, unsigned long cpuid)
+{
+ unsigned int tid;
+
+ if (this_is_starfire) {
+ tid = starfire_translate(imap, cpuid);
+ tid <<= IMAP_TID_SHIFT;
+ tid &= IMAP_TID_UPA;
+ } else {
+ if (tlb_type == cheetah || tlb_type == cheetah_plus) {
+ unsigned long ver;
+
+ __asm__ ("rdpr %%ver, %0" : "=r" (ver));
+ if ((ver >> 32UL) == __JALAPENO_ID ||
+ (ver >> 32UL) == __SERRANO_ID) {
+ tid = cpuid << IMAP_TID_SHIFT;
+ tid &= IMAP_TID_JBUS;
+ } else {
+ unsigned int a = cpuid & 0x1f;
+ unsigned int n = (cpuid >> 5) & 0x1f;
+
+ tid = ((a << IMAP_AID_SHIFT) |
+ (n << IMAP_NID_SHIFT));
+ tid &= (IMAP_AID_SAFARI |
+ IMAP_NID_SAFARI);;
+ }
+ } else {
+ tid = cpuid << IMAP_TID_SHIFT;
+ tid &= IMAP_TID_UPA;
+ }
+ }
+
+ return tid;
+}
+
+struct irq_handler_data {
+ unsigned long iclr;
+ unsigned long imap;
+
+ void (*pre_handler)(unsigned int, void *, void *);
+ void *arg1;
+ void *arg2;
+};
+
+#ifdef CONFIG_SMP
+static int irq_choose_cpu(unsigned int virt_irq)
+{
+ cpumask_t mask = irq_desc[virt_irq].affinity;
+ int cpuid;
+
+ if (cpus_equal(mask, CPU_MASK_ALL)) {
+ static int irq_rover;
+ static DEFINE_SPINLOCK(irq_rover_lock);
+ unsigned long flags;
+
+ /* Round-robin distribution... */
+ do_round_robin:
+ spin_lock_irqsave(&irq_rover_lock, flags);
+
+ while (!cpu_online(irq_rover)) {
+ if (++irq_rover >= NR_CPUS)
+ irq_rover = 0;
+ }
+ cpuid = irq_rover;
+ do {
+ if (++irq_rover >= NR_CPUS)
+ irq_rover = 0;
+ } while (!cpu_online(irq_rover));
+
+ spin_unlock_irqrestore(&irq_rover_lock, flags);
+ } else {
+ cpumask_t tmp;
+
+ cpus_and(tmp, cpu_online_map, mask);
+
+ if (cpus_empty(tmp))
+ goto do_round_robin;
+
+ cpuid = first_cpu(tmp);
+ }
+
+ return cpuid;
+}
+#else
+static int irq_choose_cpu(unsigned int virt_irq)
+{
+ return real_hard_smp_processor_id();
+}
+#endif
+
+static void sun4u_irq_enable(unsigned int virt_irq)
+{
+ struct irq_handler_data *data = get_irq_chip_data(virt_irq);
+
+ if (likely(data)) {
+ unsigned long cpuid, imap, val;
+ unsigned int tid;
+
+ cpuid = irq_choose_cpu(virt_irq);
+ imap = data->imap;
+
+ tid = sun4u_compute_tid(imap, cpuid);
+
+ val = upa_readq(imap);
+ val &= ~(IMAP_TID_UPA | IMAP_TID_JBUS |
+ IMAP_AID_SAFARI | IMAP_NID_SAFARI);
+ val |= tid | IMAP_VALID;
+ upa_writeq(val, imap);
+ upa_writeq(ICLR_IDLE, data->iclr);
+ }
+}
+
+static void sun4u_set_affinity(unsigned int virt_irq,
+ const struct cpumask *mask)
+{
+ sun4u_irq_enable(virt_irq);
+}
+
+static void sun4u_irq_disable(unsigned int virt_irq)
+{
+ struct irq_handler_data *data = get_irq_chip_data(virt_irq);
+
+ if (likely(data)) {
+ unsigned long imap = data->imap;
+ unsigned long tmp = upa_readq(imap);
+
+ tmp &= ~IMAP_VALID;
+ upa_writeq(tmp, imap);
+ }
+}
+
+static void sun4u_irq_eoi(unsigned int virt_irq)
+{
+ struct irq_handler_data *data = get_irq_chip_data(virt_irq);
+ struct irq_desc *desc = irq_desc + virt_irq;
+
+ if (unlikely(desc->status & (IRQ_DISABLED|IRQ_INPROGRESS)))
+ return;
+
+ if (likely(data))
+ upa_writeq(ICLR_IDLE, data->iclr);
+}
+
+static void sun4v_irq_enable(unsigned int virt_irq)
+{
+ unsigned int ino = virt_irq_table[virt_irq].dev_ino;
+ unsigned long cpuid = irq_choose_cpu(virt_irq);
+ int err;
+
+ err = sun4v_intr_settarget(ino, cpuid);
+ if (err != HV_EOK)
+ printk(KERN_ERR "sun4v_intr_settarget(%x,%lu): "
+ "err(%d)\n", ino, cpuid, err);
+ err = sun4v_intr_setstate(ino, HV_INTR_STATE_IDLE);
+ if (err != HV_EOK)
+ printk(KERN_ERR "sun4v_intr_setstate(%x): "
+ "err(%d)\n", ino, err);
+ err = sun4v_intr_setenabled(ino, HV_INTR_ENABLED);
+ if (err != HV_EOK)
+ printk(KERN_ERR "sun4v_intr_setenabled(%x): err(%d)\n",
+ ino, err);
+}
+
+static void sun4v_set_affinity(unsigned int virt_irq,
+ const struct cpumask *mask)
+{
+ unsigned int ino = virt_irq_table[virt_irq].dev_ino;
+ unsigned long cpuid = irq_choose_cpu(virt_irq);
+ int err;
+
+ err = sun4v_intr_settarget(ino, cpuid);
+ if (err != HV_EOK)
+ printk(KERN_ERR "sun4v_intr_settarget(%x,%lu): "
+ "err(%d)\n", ino, cpuid, err);
+}
+
+static void sun4v_irq_disable(unsigned int virt_irq)
+{
+ unsigned int ino = virt_irq_table[virt_irq].dev_ino;
+ int err;
+
+ err = sun4v_intr_setenabled(ino, HV_INTR_DISABLED);
+ if (err != HV_EOK)
+ printk(KERN_ERR "sun4v_intr_setenabled(%x): "
+ "err(%d)\n", ino, err);
+}
+
+static void sun4v_irq_eoi(unsigned int virt_irq)
+{
+ unsigned int ino = virt_irq_table[virt_irq].dev_ino;
+ struct irq_desc *desc = irq_desc + virt_irq;
+ int err;
+
+ if (unlikely(desc->status & (IRQ_DISABLED|IRQ_INPROGRESS)))
+ return;
+
+ err = sun4v_intr_setstate(ino, HV_INTR_STATE_IDLE);
+ if (err != HV_EOK)
+ printk(KERN_ERR "sun4v_intr_setstate(%x): "
+ "err(%d)\n", ino, err);
+}
+
+static void sun4v_virq_enable(unsigned int virt_irq)
+{
+ unsigned long cpuid, dev_handle, dev_ino;
+ int err;
+
+ cpuid = irq_choose_cpu(virt_irq);
+
+ dev_handle = virt_irq_table[virt_irq].dev_handle;
+ dev_ino = virt_irq_table[virt_irq].dev_ino;
+
+ err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid);
+ if (err != HV_EOK)
+ printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): "
+ "err(%d)\n",
+ dev_handle, dev_ino, cpuid, err);
+ err = sun4v_vintr_set_state(dev_handle, dev_ino,
+ HV_INTR_STATE_IDLE);
+ if (err != HV_EOK)
+ printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx,"
+ "HV_INTR_STATE_IDLE): err(%d)\n",
+ dev_handle, dev_ino, err);
+ err = sun4v_vintr_set_valid(dev_handle, dev_ino,
+ HV_INTR_ENABLED);
+ if (err != HV_EOK)
+ printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx,"
+ "HV_INTR_ENABLED): err(%d)\n",
+ dev_handle, dev_ino, err);
+}
+
+static void sun4v_virt_set_affinity(unsigned int virt_irq,
+ const struct cpumask *mask)
+{
+ unsigned long cpuid, dev_handle, dev_ino;
+ int err;
+
+ cpuid = irq_choose_cpu(virt_irq);
+
+ dev_handle = virt_irq_table[virt_irq].dev_handle;
+ dev_ino = virt_irq_table[virt_irq].dev_ino;
+
+ err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid);
+ if (err != HV_EOK)
+ printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): "
+ "err(%d)\n",
+ dev_handle, dev_ino, cpuid, err);
+}
+
+static void sun4v_virq_disable(unsigned int virt_irq)
+{
+ unsigned long dev_handle, dev_ino;
+ int err;
+
+ dev_handle = virt_irq_table[virt_irq].dev_handle;
+ dev_ino = virt_irq_table[virt_irq].dev_ino;
+
+ err = sun4v_vintr_set_valid(dev_handle, dev_ino,
+ HV_INTR_DISABLED);
+ if (err != HV_EOK)
+ printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx,"
+ "HV_INTR_DISABLED): err(%d)\n",
+ dev_handle, dev_ino, err);
+}
+
+static void sun4v_virq_eoi(unsigned int virt_irq)
+{
+ struct irq_desc *desc = irq_desc + virt_irq;
+ unsigned long dev_handle, dev_ino;
+ int err;
+
+ if (unlikely(desc->status & (IRQ_DISABLED|IRQ_INPROGRESS)))
+ return;
+
+ dev_handle = virt_irq_table[virt_irq].dev_handle;
+ dev_ino = virt_irq_table[virt_irq].dev_ino;
+
+ err = sun4v_vintr_set_state(dev_handle, dev_ino,
+ HV_INTR_STATE_IDLE);
+ if (err != HV_EOK)
+ printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx,"
+ "HV_INTR_STATE_IDLE): err(%d)\n",
+ dev_handle, dev_ino, err);
+}
+
+static struct irq_chip sun4u_irq = {
+ .typename = "sun4u",
+ .enable = sun4u_irq_enable,
+ .disable = sun4u_irq_disable,
+ .eoi = sun4u_irq_eoi,
+ .set_affinity = sun4u_set_affinity,
+};
+
+static struct irq_chip sun4v_irq = {
+ .typename = "sun4v",
+ .enable = sun4v_irq_enable,
+ .disable = sun4v_irq_disable,
+ .eoi = sun4v_irq_eoi,
+ .set_affinity = sun4v_set_affinity,
+};
+
+static struct irq_chip sun4v_virq = {
+ .typename = "vsun4v",
+ .enable = sun4v_virq_enable,
+ .disable = sun4v_virq_disable,
+ .eoi = sun4v_virq_eoi,
+ .set_affinity = sun4v_virt_set_affinity,
+};
+
+static void pre_flow_handler(unsigned int virt_irq,
+ struct irq_desc *desc)
+{
+ struct irq_handler_data *data = get_irq_chip_data(virt_irq);
+ unsigned int ino = virt_irq_table[virt_irq].dev_ino;
+
+ data->pre_handler(ino, data->arg1, data->arg2);
+
+ handle_fasteoi_irq(virt_irq, desc);
+}
+
+void irq_install_pre_handler(int virt_irq,
+ void (*func)(unsigned int, void *, void *),
+ void *arg1, void *arg2)
+{
+ struct irq_handler_data *data = get_irq_chip_data(virt_irq);
+ struct irq_desc *desc = irq_desc + virt_irq;
+
+ data->pre_handler = func;
+ data->arg1 = arg1;
+ data->arg2 = arg2;
+
+ desc->handle_irq = pre_flow_handler;
+}
+
+unsigned int build_irq(int inofixup, unsigned long iclr, unsigned long imap)
+{
+ struct ino_bucket *bucket;
+ struct irq_handler_data *data;
+ unsigned int virt_irq;
+ int ino;
+
+ BUG_ON(tlb_type == hypervisor);
+
+ ino = (upa_readq(imap) & (IMAP_IGN | IMAP_INO)) + inofixup;
+ bucket = &ivector_table[ino];
+ virt_irq = bucket_get_virt_irq(__pa(bucket));
+ if (!virt_irq) {
+ virt_irq = virt_irq_alloc(0, ino);
+ bucket_set_virt_irq(__pa(bucket), virt_irq);
+ set_irq_chip_and_handler_name(virt_irq,
+ &sun4u_irq,
+ handle_fasteoi_irq,
+ "IVEC");
+ }
+
+ data = get_irq_chip_data(virt_irq);
+ if (unlikely(data))
+ goto out;
+
+ data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC);
+ if (unlikely(!data)) {
+ prom_printf("IRQ: kzalloc(irq_handler_data) failed.\n");
+ prom_halt();
+ }
+ set_irq_chip_data(virt_irq, data);
+
+ data->imap = imap;
+ data->iclr = iclr;
+
+out:
+ return virt_irq;
+}
+
+static unsigned int sun4v_build_common(unsigned long sysino,
+ struct irq_chip *chip)
+{
+ struct ino_bucket *bucket;
+ struct irq_handler_data *data;
+ unsigned int virt_irq;
+
+ BUG_ON(tlb_type != hypervisor);
+
+ bucket = &ivector_table[sysino];
+ virt_irq = bucket_get_virt_irq(__pa(bucket));
+ if (!virt_irq) {
+ virt_irq = virt_irq_alloc(0, sysino);
+ bucket_set_virt_irq(__pa(bucket), virt_irq);
+ set_irq_chip_and_handler_name(virt_irq, chip,
+ handle_fasteoi_irq,
+ "IVEC");
+ }
+
+ data = get_irq_chip_data(virt_irq);
+ if (unlikely(data))
+ goto out;
+
+ data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC);
+ if (unlikely(!data)) {
+ prom_printf("IRQ: kzalloc(irq_handler_data) failed.\n");
+ prom_halt();
+ }
+ set_irq_chip_data(virt_irq, data);
+
+ /* Catch accidental accesses to these things. IMAP/ICLR handling
+ * is done by hypervisor calls on sun4v platforms, not by direct
+ * register accesses.
+ */
+ data->imap = ~0UL;
+ data->iclr = ~0UL;
+
+out:
+ return virt_irq;
+}
+
+unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino)
+{
+ unsigned long sysino = sun4v_devino_to_sysino(devhandle, devino);
+
+ return sun4v_build_common(sysino, &sun4v_irq);
+}
+
+unsigned int sun4v_build_virq(u32 devhandle, unsigned int devino)
+{
+ struct irq_handler_data *data;
+ unsigned long hv_err, cookie;
+ struct ino_bucket *bucket;
+ struct irq_desc *desc;
+ unsigned int virt_irq;
+
+ bucket = kzalloc(sizeof(struct ino_bucket), GFP_ATOMIC);
+ if (unlikely(!bucket))
+ return 0;
+ __flush_dcache_range((unsigned long) bucket,
+ ((unsigned long) bucket +
+ sizeof(struct ino_bucket)));
+
+ virt_irq = virt_irq_alloc(devhandle, devino);
+ bucket_set_virt_irq(__pa(bucket), virt_irq);
+
+ set_irq_chip_and_handler_name(virt_irq, &sun4v_virq,
+ handle_fasteoi_irq,
+ "IVEC");
+
+ data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC);
+ if (unlikely(!data))
+ return 0;
+
+ /* In order to make the LDC channel startup sequence easier,
+ * especially wrt. locking, we do not let request_irq() enable
+ * the interrupt.
+ */
+ desc = irq_desc + virt_irq;
+ desc->status |= IRQ_NOAUTOEN;
+
+ set_irq_chip_data(virt_irq, data);
+
+ /* Catch accidental accesses to these things. IMAP/ICLR handling
+ * is done by hypervisor calls on sun4v platforms, not by direct
+ * register accesses.
+ */
+ data->imap = ~0UL;
+ data->iclr = ~0UL;
+
+ cookie = ~__pa(bucket);
+ hv_err = sun4v_vintr_set_cookie(devhandle, devino, cookie);
+ if (hv_err) {
+ prom_printf("IRQ: Fatal, cannot set cookie for [%x:%x] "
+ "err=%lu\n", devhandle, devino, hv_err);
+ prom_halt();
+ }
+
+ return virt_irq;
+}
+
+void ack_bad_irq(unsigned int virt_irq)
+{
+ unsigned int ino = virt_irq_table[virt_irq].dev_ino;
+
+ if (!ino)
+ ino = 0xdeadbeef;
+
+ printk(KERN_CRIT "Unexpected IRQ from ino[%x] virt_irq[%u]\n",
+ ino, virt_irq);
+}
+
+void *hardirq_stack[NR_CPUS];
+void *softirq_stack[NR_CPUS];
+
+static __attribute__((always_inline)) void *set_hardirq_stack(void)
+{
+ void *orig_sp, *sp = hardirq_stack[smp_processor_id()];
+
+ __asm__ __volatile__("mov %%sp, %0" : "=r" (orig_sp));
+ if (orig_sp < sp ||
+ orig_sp > (sp + THREAD_SIZE)) {
+ sp += THREAD_SIZE - 192 - STACK_BIAS;
+ __asm__ __volatile__("mov %0, %%sp" : : "r" (sp));
+ }
+
+ return orig_sp;
+}
+static __attribute__((always_inline)) void restore_hardirq_stack(void *orig_sp)
+{
+ __asm__ __volatile__("mov %0, %%sp" : : "r" (orig_sp));
+}
+
+void handler_irq(int irq, struct pt_regs *regs)
+{
+ unsigned long pstate, bucket_pa;
+ struct pt_regs *old_regs;
+ void *orig_sp;
+
+ clear_softint(1 << irq);
+
+ old_regs = set_irq_regs(regs);
+ irq_enter();
+
+ /* Grab an atomic snapshot of the pending IVECs. */
+ __asm__ __volatile__("rdpr %%pstate, %0\n\t"
+ "wrpr %0, %3, %%pstate\n\t"
+ "ldx [%2], %1\n\t"
+ "stx %%g0, [%2]\n\t"
+ "wrpr %0, 0x0, %%pstate\n\t"
+ : "=&r" (pstate), "=&r" (bucket_pa)
+ : "r" (irq_work_pa(smp_processor_id())),
+ "i" (PSTATE_IE)
+ : "memory");
+
+ orig_sp = set_hardirq_stack();
+
+ while (bucket_pa) {
+ struct irq_desc *desc;
+ unsigned long next_pa;
+ unsigned int virt_irq;
+
+ next_pa = bucket_get_chain_pa(bucket_pa);
+ virt_irq = bucket_get_virt_irq(bucket_pa);
+ bucket_clear_chain_pa(bucket_pa);
+
+ desc = irq_desc + virt_irq;
+
+ desc->handle_irq(virt_irq, desc);
+
+ bucket_pa = next_pa;
+ }
+
+ restore_hardirq_stack(orig_sp);
+
+ irq_exit();
+ set_irq_regs(old_regs);
+}
+
+void do_softirq(void)
+{
+ unsigned long flags;
+
+ if (in_interrupt())
+ return;
+
+ local_irq_save(flags);
+
+ if (local_softirq_pending()) {
+ void *orig_sp, *sp = softirq_stack[smp_processor_id()];
+
+ sp += THREAD_SIZE - 192 - STACK_BIAS;
+
+ __asm__ __volatile__("mov %%sp, %0\n\t"
+ "mov %1, %%sp"
+ : "=&r" (orig_sp)
+ : "r" (sp));
+ __do_softirq();
+ __asm__ __volatile__("mov %0, %%sp"
+ : : "r" (orig_sp));
+ }
+
+ local_irq_restore(flags);
+}
+
+static void unhandled_perf_irq(struct pt_regs *regs)
+{
+ unsigned long pcr, pic;
+
+ read_pcr(pcr);
+ read_pic(pic);
+
+ write_pcr(0);
+
+ printk(KERN_EMERG "CPU %d: Got unexpected perf counter IRQ.\n",
+ smp_processor_id());
+ printk(KERN_EMERG "CPU %d: PCR[%016lx] PIC[%016lx]\n",
+ smp_processor_id(), pcr, pic);
+}
+
+/* Almost a direct copy of the powerpc PMC code. */
+static DEFINE_SPINLOCK(perf_irq_lock);
+static void *perf_irq_owner_caller; /* mostly for debugging */
+static void (*perf_irq)(struct pt_regs *regs) = unhandled_perf_irq;
+
+/* Invoked from level 15 PIL handler in trap table. */
+void perfctr_irq(int irq, struct pt_regs *regs)
+{
+ clear_softint(1 << irq);
+ perf_irq(regs);
+}
+
+int register_perfctr_intr(void (*handler)(struct pt_regs *))
+{
+ int ret;
+
+ if (!handler)
+ return -EINVAL;
+
+ spin_lock(&perf_irq_lock);
+ if (perf_irq != unhandled_perf_irq) {
+ printk(KERN_WARNING "register_perfctr_intr: "
+ "perf IRQ busy (reserved by caller %p)\n",
+ perf_irq_owner_caller);
+ ret = -EBUSY;
+ goto out;
+ }
+
+ perf_irq_owner_caller = __builtin_return_address(0);
+ perf_irq = handler;
+
+ ret = 0;
+out:
+ spin_unlock(&perf_irq_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(register_perfctr_intr);
+
+void release_perfctr_intr(void (*handler)(struct pt_regs *))
+{
+ spin_lock(&perf_irq_lock);
+ perf_irq_owner_caller = NULL;
+ perf_irq = unhandled_perf_irq;
+ spin_unlock(&perf_irq_lock);
+}
+EXPORT_SYMBOL_GPL(release_perfctr_intr);
+
+#ifdef CONFIG_HOTPLUG_CPU
+void fixup_irqs(void)
+{
+ unsigned int irq;
+
+ for (irq = 0; irq < NR_IRQS; irq++) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&irq_desc[irq].lock, flags);
+ if (irq_desc[irq].action &&
+ !(irq_desc[irq].status & IRQ_PER_CPU)) {
+ if (irq_desc[irq].chip->set_affinity)
+ irq_desc[irq].chip->set_affinity(irq,
+ &irq_desc[irq].affinity);
+ }
+ spin_unlock_irqrestore(&irq_desc[irq].lock, flags);
+ }
+
+ tick_ops->disable_irq();
+}
+#endif
+
+struct sun5_timer {
+ u64 count0;
+ u64 limit0;
+ u64 count1;
+ u64 limit1;
+};
+
+static struct sun5_timer *prom_timers;
+static u64 prom_limit0, prom_limit1;
+
+static void map_prom_timers(void)
+{
+ struct device_node *dp;
+ const unsigned int *addr;
+
+ /* PROM timer node hangs out in the top level of device siblings... */
+ dp = of_find_node_by_path("/");
+ dp = dp->child;
+ while (dp) {
+ if (!strcmp(dp->name, "counter-timer"))
+ break;
+ dp = dp->sibling;
+ }
+
+ /* Assume if node is not present, PROM uses different tick mechanism
+ * which we should not care about.
+ */
+ if (!dp) {
+ prom_timers = (struct sun5_timer *) 0;
+ return;
+ }
+
+ /* If PROM is really using this, it must be mapped by him. */
+ addr = of_get_property(dp, "address", NULL);
+ if (!addr) {
+ prom_printf("PROM does not have timer mapped, trying to continue.\n");
+ prom_timers = (struct sun5_timer *) 0;
+ return;
+ }
+ prom_timers = (struct sun5_timer *) ((unsigned long)addr[0]);
+}
+
+static void kill_prom_timer(void)
+{
+ if (!prom_timers)
+ return;
+
+ /* Save them away for later. */
+ prom_limit0 = prom_timers->limit0;
+ prom_limit1 = prom_timers->limit1;
+
+ /* Just as in sun4c/sun4m PROM uses timer which ticks at IRQ 14.
+ * We turn both off here just to be paranoid.
+ */
+ prom_timers->limit0 = 0;
+ prom_timers->limit1 = 0;
+
+ /* Wheee, eat the interrupt packet too... */
+ __asm__ __volatile__(
+" mov 0x40, %%g2\n"
+" ldxa [%%g0] %0, %%g1\n"
+" ldxa [%%g2] %1, %%g1\n"
+" stxa %%g0, [%%g0] %0\n"
+" membar #Sync\n"
+ : /* no outputs */
+ : "i" (ASI_INTR_RECEIVE), "i" (ASI_INTR_R)
+ : "g1", "g2");
+}
+
+void notrace init_irqwork_curcpu(void)
+{
+ int cpu = hard_smp_processor_id();
+
+ trap_block[cpu].irq_worklist_pa = 0UL;
+}
+
+/* Please be very careful with register_one_mondo() and
+ * sun4v_register_mondo_queues().
+ *
+ * On SMP this gets invoked from the CPU trampoline before
+ * the cpu has fully taken over the trap table from OBP,
+ * and it's kernel stack + %g6 thread register state is
+ * not fully cooked yet.
+ *
+ * Therefore you cannot make any OBP calls, not even prom_printf,
+ * from these two routines.
+ */
+static void __cpuinit register_one_mondo(unsigned long paddr, unsigned long type, unsigned long qmask)
+{
+ unsigned long num_entries = (qmask + 1) / 64;
+ unsigned long status;
+
+ status = sun4v_cpu_qconf(type, paddr, num_entries);
+ if (status != HV_EOK) {
+ prom_printf("SUN4V: sun4v_cpu_qconf(%lu:%lx:%lu) failed, "
+ "err %lu\n", type, paddr, num_entries, status);
+ prom_halt();
+ }
+}
+
+void __cpuinit notrace sun4v_register_mondo_queues(int this_cpu)
+{
+ struct trap_per_cpu *tb = &trap_block[this_cpu];
+
+ register_one_mondo(tb->cpu_mondo_pa, HV_CPU_QUEUE_CPU_MONDO,
+ tb->cpu_mondo_qmask);
+ register_one_mondo(tb->dev_mondo_pa, HV_CPU_QUEUE_DEVICE_MONDO,
+ tb->dev_mondo_qmask);
+ register_one_mondo(tb->resum_mondo_pa, HV_CPU_QUEUE_RES_ERROR,
+ tb->resum_qmask);
+ register_one_mondo(tb->nonresum_mondo_pa, HV_CPU_QUEUE_NONRES_ERROR,
+ tb->nonresum_qmask);
+}
+
+static void __init alloc_one_mondo(unsigned long *pa_ptr, unsigned long qmask)
+{
+ unsigned long size = PAGE_ALIGN(qmask + 1);
+ void *p = __alloc_bootmem(size, size, 0);
+ if (!p) {
+ prom_printf("SUN4V: Error, cannot allocate mondo queue.\n");
+ prom_halt();
+ }
+
+ *pa_ptr = __pa(p);
+}
+
+static void __init alloc_one_kbuf(unsigned long *pa_ptr, unsigned long qmask)
+{
+ unsigned long size = PAGE_ALIGN(qmask + 1);
+ void *p = __alloc_bootmem(size, size, 0);
+
+ if (!p) {
+ prom_printf("SUN4V: Error, cannot allocate kbuf page.\n");
+ prom_halt();
+ }
+
+ *pa_ptr = __pa(p);
+}
+
+static void __init init_cpu_send_mondo_info(struct trap_per_cpu *tb)
+{
+#ifdef CONFIG_SMP
+ void *page;
+
+ BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > (PAGE_SIZE - 64));
+
+ page = alloc_bootmem_pages(PAGE_SIZE);
+ if (!page) {
+ prom_printf("SUN4V: Error, cannot allocate cpu mondo page.\n");
+ prom_halt();
+ }
+
+ tb->cpu_mondo_block_pa = __pa(page);
+ tb->cpu_list_pa = __pa(page + 64);
+#endif
+}
+
+/* Allocate mondo and error queues for all possible cpus. */
+static void __init sun4v_init_mondo_queues(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct trap_per_cpu *tb = &trap_block[cpu];
+
+ alloc_one_mondo(&tb->cpu_mondo_pa, tb->cpu_mondo_qmask);
+ alloc_one_mondo(&tb->dev_mondo_pa, tb->dev_mondo_qmask);
+ alloc_one_mondo(&tb->resum_mondo_pa, tb->resum_qmask);
+ alloc_one_kbuf(&tb->resum_kernel_buf_pa, tb->resum_qmask);
+ alloc_one_mondo(&tb->nonresum_mondo_pa, tb->nonresum_qmask);
+ alloc_one_kbuf(&tb->nonresum_kernel_buf_pa,
+ tb->nonresum_qmask);
+ }
+}
+
+static void __init init_send_mondo_info(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct trap_per_cpu *tb = &trap_block[cpu];
+
+ init_cpu_send_mondo_info(tb);
+ }
+}
+
+static struct irqaction timer_irq_action = {
+ .name = "timer",
+};
+
+/* Only invoked on boot processor. */
+void __init init_IRQ(void)
+{
+ unsigned long size;
+
+ map_prom_timers();
+ kill_prom_timer();
+
+ size = sizeof(struct ino_bucket) * NUM_IVECS;
+ ivector_table = alloc_bootmem(size);
+ if (!ivector_table) {
+ prom_printf("Fatal error, cannot allocate ivector_table\n");
+ prom_halt();
+ }
+ __flush_dcache_range((unsigned long) ivector_table,
+ ((unsigned long) ivector_table) + size);
+
+ ivector_table_pa = __pa(ivector_table);
+
+ if (tlb_type == hypervisor)
+ sun4v_init_mondo_queues();
+
+ init_send_mondo_info();
+
+ if (tlb_type == hypervisor) {
+ /* Load up the boot cpu's entries. */
+ sun4v_register_mondo_queues(hard_smp_processor_id());
+ }
+
+ /* We need to clear any IRQ's pending in the soft interrupt
+ * registers, a spurious one could be left around from the
+ * PROM timer which we just disabled.
+ */
+ clear_softint(get_softint());
+
+ /* Now that ivector table is initialized, it is safe
+ * to receive IRQ vector traps. We will normally take
+ * one or two right now, in case some device PROM used
+ * to boot us wants to speak to us. We just ignore them.
+ */
+ __asm__ __volatile__("rdpr %%pstate, %%g1\n\t"
+ "or %%g1, %0, %%g1\n\t"
+ "wrpr %%g1, 0x0, %%pstate"
+ : /* No outputs */
+ : "i" (PSTATE_IE)
+ : "g1");
+
+ irq_desc[0].action = &timer_irq_action;
+}
diff --git a/arch/sparc/kernel/itlb_miss.S b/arch/sparc/kernel/itlb_miss.S
new file mode 100644
index 000000000000..5a8377b54955
--- /dev/null
+++ b/arch/sparc/kernel/itlb_miss.S
@@ -0,0 +1,39 @@
+/* ITLB ** ICACHE line 1: Context 0 check and TSB load */
+ ldxa [%g0] ASI_IMMU_TSB_8KB_PTR, %g1 ! Get TSB 8K pointer
+ ldxa [%g0] ASI_IMMU, %g6 ! Get TAG TARGET
+ srlx %g6, 48, %g5 ! Get context
+ sllx %g6, 22, %g6 ! Zero out context
+ brz,pn %g5, kvmap_itlb ! Context 0 processing
+ srlx %g6, 22, %g6 ! Delay slot
+ TSB_LOAD_QUAD(%g1, %g4) ! Load TSB entry
+ cmp %g4, %g6 ! Compare TAG
+
+/* ITLB ** ICACHE line 2: TSB compare and TLB load */
+ bne,pn %xcc, tsb_miss_itlb ! Miss
+ mov FAULT_CODE_ITLB, %g3
+ sethi %hi(_PAGE_EXEC_4U), %g4
+ andcc %g5, %g4, %g0 ! Executable?
+ be,pn %xcc, tsb_do_fault
+ nop ! Delay slot, fill me
+ stxa %g5, [%g0] ASI_ITLB_DATA_IN ! Load TLB
+ retry ! Trap done
+
+/* ITLB ** ICACHE line 3: */
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+
+/* ITLB ** ICACHE line 4: */
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
diff --git a/arch/sparc/kernel/ivec.S b/arch/sparc/kernel/ivec.S
new file mode 100644
index 000000000000..d29f92ebca5e
--- /dev/null
+++ b/arch/sparc/kernel/ivec.S
@@ -0,0 +1,51 @@
+ /* The registers for cross calls will be:
+ *
+ * DATA 0: [low 32-bits] Address of function to call, jmp to this
+ * [high 32-bits] MMU Context Argument 0, place in %g5
+ * DATA 1: Address Argument 1, place in %g1
+ * DATA 2: Address Argument 2, place in %g7
+ *
+ * With this method we can do most of the cross-call tlb/cache
+ * flushing very quickly.
+ */
+ .align 32
+ .globl do_ivec
+ .type do_ivec,#function
+do_ivec:
+ mov 0x40, %g3
+ ldxa [%g3 + %g0] ASI_INTR_R, %g3
+ sethi %hi(KERNBASE), %g4
+ cmp %g3, %g4
+ bgeu,pn %xcc, do_ivec_xcall
+ srlx %g3, 32, %g5
+ stxa %g0, [%g0] ASI_INTR_RECEIVE
+ membar #Sync
+
+ sethi %hi(ivector_table_pa), %g2
+ ldx [%g2 + %lo(ivector_table_pa)], %g2
+ sllx %g3, 4, %g3
+ add %g2, %g3, %g3
+
+ TRAP_LOAD_IRQ_WORK_PA(%g6, %g1)
+
+ ldx [%g6], %g5
+ stxa %g5, [%g3] ASI_PHYS_USE_EC
+ stx %g3, [%g6]
+ wr %g0, 1 << PIL_DEVICE_IRQ, %set_softint
+ retry
+do_ivec_xcall:
+ mov 0x50, %g1
+ ldxa [%g1 + %g0] ASI_INTR_R, %g1
+ srl %g3, 0, %g3
+
+ mov 0x60, %g7
+ ldxa [%g7 + %g0] ASI_INTR_R, %g7
+ stxa %g0, [%g0] ASI_INTR_RECEIVE
+ membar #Sync
+ ba,pt %xcc, 1f
+ nop
+
+ .align 32
+1: jmpl %g3, %g0
+ nop
+ .size do_ivec,.-do_ivec
diff --git a/arch/sparc/kernel/kernel.h b/arch/sparc/kernel/kernel.h
new file mode 100644
index 000000000000..81a972e8d8ea
--- /dev/null
+++ b/arch/sparc/kernel/kernel.h
@@ -0,0 +1,31 @@
+#ifndef __SPARC_KERNEL_H
+#define __SPARC_KERNEL_H
+
+#include <linux/interrupt.h>
+
+/* cpu.c */
+extern const char *sparc_cpu_type;
+extern const char *sparc_fpu_type;
+
+extern unsigned int fsr_storage;
+
+#ifdef CONFIG_SPARC32
+/* cpu.c */
+extern void cpu_probe(void);
+
+/* traps_32.c */
+extern void handle_hw_divzero(struct pt_regs *regs, unsigned long pc,
+ unsigned long npc, unsigned long psr);
+/* muldiv.c */
+extern int do_user_muldiv (struct pt_regs *, unsigned long);
+
+/* irq_32.c */
+extern struct irqaction static_irqaction[];
+extern int static_irq_count;
+extern spinlock_t irq_action_lock;
+
+extern void unexpected_irq(int irq, void *dev_id, struct pt_regs * regs);
+
+#else /* CONFIG_SPARC32 */
+#endif /* CONFIG_SPARC32 */
+#endif /* !(__SPARC_KERNEL_H) */
diff --git a/arch/sparc/kernel/kgdb.c b/arch/sparc/kernel/kgdb_32.c
index 757805ce02ee..757805ce02ee 100644
--- a/arch/sparc/kernel/kgdb.c
+++ b/arch/sparc/kernel/kgdb_32.c
diff --git a/arch/sparc/kernel/kgdb_64.c b/arch/sparc/kernel/kgdb_64.c
new file mode 100644
index 000000000000..fefbe6dc51be
--- /dev/null
+++ b/arch/sparc/kernel/kgdb_64.c
@@ -0,0 +1,186 @@
+/* kgdb.c: KGDB support for 64-bit sparc.
+ *
+ * Copyright (C) 2008 David S. Miller <davem@davemloft.net>
+ */
+
+#include <linux/kgdb.h>
+#include <linux/kdebug.h>
+
+#include <asm/kdebug.h>
+#include <asm/ptrace.h>
+#include <asm/irq.h>
+
+void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
+{
+ struct reg_window *win;
+ int i;
+
+ gdb_regs[GDB_G0] = 0;
+ for (i = 0; i < 15; i++)
+ gdb_regs[GDB_G1 + i] = regs->u_regs[UREG_G1 + i];
+
+ win = (struct reg_window *) (regs->u_regs[UREG_FP] + STACK_BIAS);
+ for (i = 0; i < 8; i++)
+ gdb_regs[GDB_L0 + i] = win->locals[i];
+ for (i = 0; i < 8; i++)
+ gdb_regs[GDB_I0 + i] = win->ins[i];
+
+ for (i = GDB_F0; i <= GDB_F62; i++)
+ gdb_regs[i] = 0;
+
+ gdb_regs[GDB_PC] = regs->tpc;
+ gdb_regs[GDB_NPC] = regs->tnpc;
+ gdb_regs[GDB_STATE] = regs->tstate;
+ gdb_regs[GDB_FSR] = 0;
+ gdb_regs[GDB_FPRS] = 0;
+ gdb_regs[GDB_Y] = regs->y;
+}
+
+void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
+{
+ struct thread_info *t = task_thread_info(p);
+ extern unsigned int switch_to_pc;
+ extern unsigned int ret_from_syscall;
+ struct reg_window *win;
+ unsigned long pc, cwp;
+ int i;
+
+ for (i = GDB_G0; i < GDB_G6; i++)
+ gdb_regs[i] = 0;
+ gdb_regs[GDB_G6] = (unsigned long) t;
+ gdb_regs[GDB_G7] = (unsigned long) p;
+ for (i = GDB_O0; i < GDB_SP; i++)
+ gdb_regs[i] = 0;
+ gdb_regs[GDB_SP] = t->ksp;
+ gdb_regs[GDB_O7] = 0;
+
+ win = (struct reg_window *) (t->ksp + STACK_BIAS);
+ for (i = 0; i < 8; i++)
+ gdb_regs[GDB_L0 + i] = win->locals[i];
+ for (i = 0; i < 8; i++)
+ gdb_regs[GDB_I0 + i] = win->ins[i];
+
+ for (i = GDB_F0; i <= GDB_F62; i++)
+ gdb_regs[i] = 0;
+
+ if (t->new_child)
+ pc = (unsigned long) &ret_from_syscall;
+ else
+ pc = (unsigned long) &switch_to_pc;
+
+ gdb_regs[GDB_PC] = pc;
+ gdb_regs[GDB_NPC] = pc + 4;
+
+ cwp = __thread_flag_byte_ptr(t)[TI_FLAG_BYTE_CWP];
+
+ gdb_regs[GDB_STATE] = (TSTATE_PRIV | TSTATE_IE | cwp);
+ gdb_regs[GDB_FSR] = 0;
+ gdb_regs[GDB_FPRS] = 0;
+ gdb_regs[GDB_Y] = 0;
+}
+
+void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs)
+{
+ struct reg_window *win;
+ int i;
+
+ for (i = 0; i < 15; i++)
+ regs->u_regs[UREG_G1 + i] = gdb_regs[GDB_G1 + i];
+
+ /* If the TSTATE register is changing, we have to preserve
+ * the CWP field, otherwise window save/restore explodes.
+ */
+ if (regs->tstate != gdb_regs[GDB_STATE]) {
+ unsigned long cwp = regs->tstate & TSTATE_CWP;
+
+ regs->tstate = (gdb_regs[GDB_STATE] & ~TSTATE_CWP) | cwp;
+ }
+
+ regs->tpc = gdb_regs[GDB_PC];
+ regs->tnpc = gdb_regs[GDB_NPC];
+ regs->y = gdb_regs[GDB_Y];
+
+ win = (struct reg_window *) (regs->u_regs[UREG_FP] + STACK_BIAS);
+ for (i = 0; i < 8; i++)
+ win->locals[i] = gdb_regs[GDB_L0 + i];
+ for (i = 0; i < 8; i++)
+ win->ins[i] = gdb_regs[GDB_I0 + i];
+}
+
+#ifdef CONFIG_SMP
+void smp_kgdb_capture_client(struct pt_regs *regs)
+{
+ unsigned long flags;
+
+ __asm__ __volatile__("rdpr %%pstate, %0\n\t"
+ "wrpr %0, %1, %%pstate"
+ : "=r" (flags)
+ : "i" (PSTATE_IE));
+
+ flushw_all();
+
+ if (atomic_read(&kgdb_active) != -1)
+ kgdb_nmicallback(raw_smp_processor_id(), regs);
+
+ __asm__ __volatile__("wrpr %0, 0, %%pstate"
+ : : "r" (flags));
+}
+#endif
+
+int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
+ char *remcomInBuffer, char *remcomOutBuffer,
+ struct pt_regs *linux_regs)
+{
+ unsigned long addr;
+ char *ptr;
+
+ switch (remcomInBuffer[0]) {
+ case 'c':
+ /* try to read optional parameter, pc unchanged if no parm */
+ ptr = &remcomInBuffer[1];
+ if (kgdb_hex2long(&ptr, &addr)) {
+ linux_regs->tpc = addr;
+ linux_regs->tnpc = addr + 4;
+ }
+ /* fallthru */
+
+ case 'D':
+ case 'k':
+ if (linux_regs->tpc == (unsigned long) arch_kgdb_breakpoint) {
+ linux_regs->tpc = linux_regs->tnpc;
+ linux_regs->tnpc += 4;
+ }
+ return 0;
+ }
+ return -1;
+}
+
+asmlinkage void kgdb_trap(unsigned long trap_level, struct pt_regs *regs)
+{
+ unsigned long flags;
+
+ if (user_mode(regs)) {
+ bad_trap(regs, trap_level);
+ return;
+ }
+
+ flushw_all();
+
+ local_irq_save(flags);
+ kgdb_handle_exception(0x172, SIGTRAP, 0, regs);
+ local_irq_restore(flags);
+}
+
+int kgdb_arch_init(void)
+{
+ return 0;
+}
+
+void kgdb_arch_exit(void)
+{
+}
+
+struct kgdb_arch arch_kgdb_ops = {
+ /* Breakpoint instruction: ta 0x72 */
+ .gdb_bpt_instr = { 0x91, 0xd0, 0x20, 0x72 },
+};
diff --git a/arch/sparc/kernel/kprobes.c b/arch/sparc/kernel/kprobes.c
new file mode 100644
index 000000000000..201a6e547e4a
--- /dev/null
+++ b/arch/sparc/kernel/kprobes.c
@@ -0,0 +1,593 @@
+/* arch/sparc64/kernel/kprobes.c
+ *
+ * Copyright (C) 2004 David S. Miller <davem@davemloft.net>
+ */
+
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/kdebug.h>
+#include <asm/signal.h>
+#include <asm/cacheflush.h>
+#include <asm/uaccess.h>
+
+/* We do not have hardware single-stepping on sparc64.
+ * So we implement software single-stepping with breakpoint
+ * traps. The top-level scheme is similar to that used
+ * in the x86 kprobes implementation.
+ *
+ * In the kprobe->ainsn.insn[] array we store the original
+ * instruction at index zero and a break instruction at
+ * index one.
+ *
+ * When we hit a kprobe we:
+ * - Run the pre-handler
+ * - Remember "regs->tnpc" and interrupt level stored in
+ * "regs->tstate" so we can restore them later
+ * - Disable PIL interrupts
+ * - Set regs->tpc to point to kprobe->ainsn.insn[0]
+ * - Set regs->tnpc to point to kprobe->ainsn.insn[1]
+ * - Mark that we are actively in a kprobe
+ *
+ * At this point we wait for the second breakpoint at
+ * kprobe->ainsn.insn[1] to hit. When it does we:
+ * - Run the post-handler
+ * - Set regs->tpc to "remembered" regs->tnpc stored above,
+ * restore the PIL interrupt level in "regs->tstate" as well
+ * - Make any adjustments necessary to regs->tnpc in order
+ * to handle relative branches correctly. See below.
+ * - Mark that we are no longer actively in a kprobe.
+ */
+
+DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
+struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
+
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
+{
+ p->ainsn.insn[0] = *p->addr;
+ flushi(&p->ainsn.insn[0]);
+
+ p->ainsn.insn[1] = BREAKPOINT_INSTRUCTION_2;
+ flushi(&p->ainsn.insn[1]);
+
+ p->opcode = *p->addr;
+ return 0;
+}
+
+void __kprobes arch_arm_kprobe(struct kprobe *p)
+{
+ *p->addr = BREAKPOINT_INSTRUCTION;
+ flushi(p->addr);
+}
+
+void __kprobes arch_disarm_kprobe(struct kprobe *p)
+{
+ *p->addr = p->opcode;
+ flushi(p->addr);
+}
+
+static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+ kcb->prev_kprobe.kp = kprobe_running();
+ kcb->prev_kprobe.status = kcb->kprobe_status;
+ kcb->prev_kprobe.orig_tnpc = kcb->kprobe_orig_tnpc;
+ kcb->prev_kprobe.orig_tstate_pil = kcb->kprobe_orig_tstate_pil;
+}
+
+static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+ __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
+ kcb->kprobe_status = kcb->prev_kprobe.status;
+ kcb->kprobe_orig_tnpc = kcb->prev_kprobe.orig_tnpc;
+ kcb->kprobe_orig_tstate_pil = kcb->prev_kprobe.orig_tstate_pil;
+}
+
+static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+{
+ __get_cpu_var(current_kprobe) = p;
+ kcb->kprobe_orig_tnpc = regs->tnpc;
+ kcb->kprobe_orig_tstate_pil = (regs->tstate & TSTATE_PIL);
+}
+
+static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+{
+ regs->tstate |= TSTATE_PIL;
+
+ /*single step inline, if it a breakpoint instruction*/
+ if (p->opcode == BREAKPOINT_INSTRUCTION) {
+ regs->tpc = (unsigned long) p->addr;
+ regs->tnpc = kcb->kprobe_orig_tnpc;
+ } else {
+ regs->tpc = (unsigned long) &p->ainsn.insn[0];
+ regs->tnpc = (unsigned long) &p->ainsn.insn[1];
+ }
+}
+
+static int __kprobes kprobe_handler(struct pt_regs *regs)
+{
+ struct kprobe *p;
+ void *addr = (void *) regs->tpc;
+ int ret = 0;
+ struct kprobe_ctlblk *kcb;
+
+ /*
+ * We don't want to be preempted for the entire
+ * duration of kprobe processing
+ */
+ preempt_disable();
+ kcb = get_kprobe_ctlblk();
+
+ if (kprobe_running()) {
+ p = get_kprobe(addr);
+ if (p) {
+ if (kcb->kprobe_status == KPROBE_HIT_SS) {
+ regs->tstate = ((regs->tstate & ~TSTATE_PIL) |
+ kcb->kprobe_orig_tstate_pil);
+ goto no_kprobe;
+ }
+ /* We have reentered the kprobe_handler(), since
+ * another probe was hit while within the handler.
+ * We here save the original kprobes variables and
+ * just single step on the instruction of the new probe
+ * without calling any user handlers.
+ */
+ save_previous_kprobe(kcb);
+ set_current_kprobe(p, regs, kcb);
+ kprobes_inc_nmissed_count(p);
+ kcb->kprobe_status = KPROBE_REENTER;
+ prepare_singlestep(p, regs, kcb);
+ return 1;
+ } else {
+ if (*(u32 *)addr != BREAKPOINT_INSTRUCTION) {
+ /* The breakpoint instruction was removed by
+ * another cpu right after we hit, no further
+ * handling of this interrupt is appropriate
+ */
+ ret = 1;
+ goto no_kprobe;
+ }
+ p = __get_cpu_var(current_kprobe);
+ if (p->break_handler && p->break_handler(p, regs))
+ goto ss_probe;
+ }
+ goto no_kprobe;
+ }
+
+ p = get_kprobe(addr);
+ if (!p) {
+ if (*(u32 *)addr != BREAKPOINT_INSTRUCTION) {
+ /*
+ * The breakpoint instruction was removed right
+ * after we hit it. Another cpu has removed
+ * either a probepoint or a debugger breakpoint
+ * at this address. In either case, no further
+ * handling of this interrupt is appropriate.
+ */
+ ret = 1;
+ }
+ /* Not one of ours: let kernel handle it */
+ goto no_kprobe;
+ }
+
+ set_current_kprobe(p, regs, kcb);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+ if (p->pre_handler && p->pre_handler(p, regs))
+ return 1;
+
+ss_probe:
+ prepare_singlestep(p, regs, kcb);
+ kcb->kprobe_status = KPROBE_HIT_SS;
+ return 1;
+
+no_kprobe:
+ preempt_enable_no_resched();
+ return ret;
+}
+
+/* If INSN is a relative control transfer instruction,
+ * return the corrected branch destination value.
+ *
+ * regs->tpc and regs->tnpc still hold the values of the
+ * program counters at the time of trap due to the execution
+ * of the BREAKPOINT_INSTRUCTION_2 at p->ainsn.insn[1]
+ *
+ */
+static unsigned long __kprobes relbranch_fixup(u32 insn, struct kprobe *p,
+ struct pt_regs *regs)
+{
+ unsigned long real_pc = (unsigned long) p->addr;
+
+ /* Branch not taken, no mods necessary. */
+ if (regs->tnpc == regs->tpc + 0x4UL)
+ return real_pc + 0x8UL;
+
+ /* The three cases are call, branch w/prediction,
+ * and traditional branch.
+ */
+ if ((insn & 0xc0000000) == 0x40000000 ||
+ (insn & 0xc1c00000) == 0x00400000 ||
+ (insn & 0xc1c00000) == 0x00800000) {
+ unsigned long ainsn_addr;
+
+ ainsn_addr = (unsigned long) &p->ainsn.insn[0];
+
+ /* The instruction did all the work for us
+ * already, just apply the offset to the correct
+ * instruction location.
+ */
+ return (real_pc + (regs->tnpc - ainsn_addr));
+ }
+
+ /* It is jmpl or some other absolute PC modification instruction,
+ * leave NPC as-is.
+ */
+ return regs->tnpc;
+}
+
+/* If INSN is an instruction which writes it's PC location
+ * into a destination register, fix that up.
+ */
+static void __kprobes retpc_fixup(struct pt_regs *regs, u32 insn,
+ unsigned long real_pc)
+{
+ unsigned long *slot = NULL;
+
+ /* Simplest case is 'call', which always uses %o7 */
+ if ((insn & 0xc0000000) == 0x40000000) {
+ slot = &regs->u_regs[UREG_I7];
+ }
+
+ /* 'jmpl' encodes the register inside of the opcode */
+ if ((insn & 0xc1f80000) == 0x81c00000) {
+ unsigned long rd = ((insn >> 25) & 0x1f);
+
+ if (rd <= 15) {
+ slot = &regs->u_regs[rd];
+ } else {
+ /* Hard case, it goes onto the stack. */
+ flushw_all();
+
+ rd -= 16;
+ slot = (unsigned long *)
+ (regs->u_regs[UREG_FP] + STACK_BIAS);
+ slot += rd;
+ }
+ }
+ if (slot != NULL)
+ *slot = real_pc;
+}
+
+/*
+ * Called after single-stepping. p->addr is the address of the
+ * instruction which has been replaced by the breakpoint
+ * instruction. To avoid the SMP problems that can occur when we
+ * temporarily put back the original opcode to single-step, we
+ * single-stepped a copy of the instruction. The address of this
+ * copy is &p->ainsn.insn[0].
+ *
+ * This function prepares to return from the post-single-step
+ * breakpoint trap.
+ */
+static void __kprobes resume_execution(struct kprobe *p,
+ struct pt_regs *regs, struct kprobe_ctlblk *kcb)
+{
+ u32 insn = p->ainsn.insn[0];
+
+ regs->tnpc = relbranch_fixup(insn, p, regs);
+
+ /* This assignment must occur after relbranch_fixup() */
+ regs->tpc = kcb->kprobe_orig_tnpc;
+
+ retpc_fixup(regs, insn, (unsigned long) p->addr);
+
+ regs->tstate = ((regs->tstate & ~TSTATE_PIL) |
+ kcb->kprobe_orig_tstate_pil);
+}
+
+static int __kprobes post_kprobe_handler(struct pt_regs *regs)
+{
+ struct kprobe *cur = kprobe_running();
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ if (!cur)
+ return 0;
+
+ if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ cur->post_handler(cur, regs, 0);
+ }
+
+ resume_execution(cur, regs, kcb);
+
+ /*Restore back the original saved kprobes variables and continue. */
+ if (kcb->kprobe_status == KPROBE_REENTER) {
+ restore_previous_kprobe(kcb);
+ goto out;
+ }
+ reset_current_kprobe();
+out:
+ preempt_enable_no_resched();
+
+ return 1;
+}
+
+int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+{
+ struct kprobe *cur = kprobe_running();
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+ const struct exception_table_entry *entry;
+
+ switch(kcb->kprobe_status) {
+ case KPROBE_HIT_SS:
+ case KPROBE_REENTER:
+ /*
+ * We are here because the instruction being single
+ * stepped caused a page fault. We reset the current
+ * kprobe and the tpc points back to the probe address
+ * and allow the page fault handler to continue as a
+ * normal page fault.
+ */
+ regs->tpc = (unsigned long)cur->addr;
+ regs->tnpc = kcb->kprobe_orig_tnpc;
+ regs->tstate = ((regs->tstate & ~TSTATE_PIL) |
+ kcb->kprobe_orig_tstate_pil);
+ if (kcb->kprobe_status == KPROBE_REENTER)
+ restore_previous_kprobe(kcb);
+ else
+ reset_current_kprobe();
+ preempt_enable_no_resched();
+ break;
+ case KPROBE_HIT_ACTIVE:
+ case KPROBE_HIT_SSDONE:
+ /*
+ * We increment the nmissed count for accounting,
+ * we can also use npre/npostfault count for accouting
+ * these specific fault cases.
+ */
+ kprobes_inc_nmissed_count(cur);
+
+ /*
+ * We come here because instructions in the pre/post
+ * handler caused the page_fault, this could happen
+ * if handler tries to access user space by
+ * copy_from_user(), get_user() etc. Let the
+ * user-specified handler try to fix it first.
+ */
+ if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
+ return 1;
+
+ /*
+ * In case the user-specified fault handler returned
+ * zero, try to fix up.
+ */
+
+ entry = search_exception_tables(regs->tpc);
+ if (entry) {
+ regs->tpc = entry->fixup;
+ regs->tnpc = regs->tpc + 4;
+ return 1;
+ }
+
+ /*
+ * fixup_exception() could not handle it,
+ * Let do_page_fault() fix it.
+ */
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+/*
+ * Wrapper routine to for handling exceptions.
+ */
+int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
+ unsigned long val, void *data)
+{
+ struct die_args *args = (struct die_args *)data;
+ int ret = NOTIFY_DONE;
+
+ if (args->regs && user_mode(args->regs))
+ return ret;
+
+ switch (val) {
+ case DIE_DEBUG:
+ if (kprobe_handler(args->regs))
+ ret = NOTIFY_STOP;
+ break;
+ case DIE_DEBUG_2:
+ if (post_kprobe_handler(args->regs))
+ ret = NOTIFY_STOP;
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+asmlinkage void __kprobes kprobe_trap(unsigned long trap_level,
+ struct pt_regs *regs)
+{
+ BUG_ON(trap_level != 0x170 && trap_level != 0x171);
+
+ if (user_mode(regs)) {
+ local_irq_enable();
+ bad_trap(regs, trap_level);
+ return;
+ }
+
+ /* trap_level == 0x170 --> ta 0x70
+ * trap_level == 0x171 --> ta 0x71
+ */
+ if (notify_die((trap_level == 0x170) ? DIE_DEBUG : DIE_DEBUG_2,
+ (trap_level == 0x170) ? "debug" : "debug_2",
+ regs, 0, trap_level, SIGTRAP) != NOTIFY_STOP)
+ bad_trap(regs, trap_level);
+}
+
+/* Jprobes support. */
+int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct jprobe *jp = container_of(p, struct jprobe, kp);
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ memcpy(&(kcb->jprobe_saved_regs), regs, sizeof(*regs));
+
+ regs->tpc = (unsigned long) jp->entry;
+ regs->tnpc = ((unsigned long) jp->entry) + 0x4UL;
+ regs->tstate |= TSTATE_PIL;
+
+ return 1;
+}
+
+void __kprobes jprobe_return(void)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+ register unsigned long orig_fp asm("g1");
+
+ orig_fp = kcb->jprobe_saved_regs.u_regs[UREG_FP];
+ __asm__ __volatile__("\n"
+"1: cmp %%sp, %0\n\t"
+ "blu,a,pt %%xcc, 1b\n\t"
+ " restore\n\t"
+ ".globl jprobe_return_trap_instruction\n"
+"jprobe_return_trap_instruction:\n\t"
+ "ta 0x70"
+ : /* no outputs */
+ : "r" (orig_fp));
+}
+
+extern void jprobe_return_trap_instruction(void);
+
+int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ u32 *addr = (u32 *) regs->tpc;
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ if (addr == (u32 *) jprobe_return_trap_instruction) {
+ memcpy(regs, &(kcb->jprobe_saved_regs), sizeof(*regs));
+ preempt_enable_no_resched();
+ return 1;
+ }
+ return 0;
+}
+
+/* The value stored in the return address register is actually 2
+ * instructions before where the callee will return to.
+ * Sequences usually look something like this
+ *
+ * call some_function <--- return register points here
+ * nop <--- call delay slot
+ * whatever <--- where callee returns to
+ *
+ * To keep trampoline_probe_handler logic simpler, we normalize the
+ * value kept in ri->ret_addr so we don't need to keep adjusting it
+ * back and forth.
+ */
+void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
+ struct pt_regs *regs)
+{
+ ri->ret_addr = (kprobe_opcode_t *)(regs->u_regs[UREG_RETPC] + 8);
+
+ /* Replace the return addr with trampoline addr */
+ regs->u_regs[UREG_RETPC] =
+ ((unsigned long)kretprobe_trampoline) - 8;
+}
+
+/*
+ * Called when the probe at kretprobe trampoline is hit
+ */
+int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct kretprobe_instance *ri = NULL;
+ struct hlist_head *head, empty_rp;
+ struct hlist_node *node, *tmp;
+ unsigned long flags, orig_ret_address = 0;
+ unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
+
+ INIT_HLIST_HEAD(&empty_rp);
+ kretprobe_hash_lock(current, &head, &flags);
+
+ /*
+ * It is possible to have multiple instances associated with a given
+ * task either because an multiple functions in the call path
+ * have a return probe installed on them, and/or more then one return
+ * return probe was registered for a target function.
+ *
+ * We can handle this because:
+ * - instances are always inserted at the head of the list
+ * - when multiple return probes are registered for the same
+ * function, the first instance's ret_addr will point to the
+ * real return address, and all the rest will point to
+ * kretprobe_trampoline
+ */
+ hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+ if (ri->task != current)
+ /* another task is sharing our hash bucket */
+ continue;
+
+ if (ri->rp && ri->rp->handler)
+ ri->rp->handler(ri, regs);
+
+ orig_ret_address = (unsigned long)ri->ret_addr;
+ recycle_rp_inst(ri, &empty_rp);
+
+ if (orig_ret_address != trampoline_address)
+ /*
+ * This is the real return address. Any other
+ * instances associated with this task are for
+ * other calls deeper on the call stack
+ */
+ break;
+ }
+
+ kretprobe_assert(ri, orig_ret_address, trampoline_address);
+ regs->tpc = orig_ret_address;
+ regs->tnpc = orig_ret_address + 4;
+
+ reset_current_kprobe();
+ kretprobe_hash_unlock(current, &flags);
+ preempt_enable_no_resched();
+
+ hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+ hlist_del(&ri->hlist);
+ kfree(ri);
+ }
+ /*
+ * By returning a non-zero value, we are telling
+ * kprobe_handler() that we don't want the post_handler
+ * to run (and have re-enabled preemption)
+ */
+ return 1;
+}
+
+void kretprobe_trampoline_holder(void)
+{
+ asm volatile(".global kretprobe_trampoline\n"
+ "kretprobe_trampoline:\n"
+ "\tnop\n"
+ "\tnop\n");
+}
+static struct kprobe trampoline_p = {
+ .addr = (kprobe_opcode_t *) &kretprobe_trampoline,
+ .pre_handler = trampoline_probe_handler
+};
+
+int __init arch_init_kprobes(void)
+{
+ return register_kprobe(&trampoline_p);
+}
+
+int __kprobes arch_trampoline_kprobe(struct kprobe *p)
+{
+ if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline)
+ return 1;
+
+ return 0;
+}
diff --git a/arch/sparc/kernel/kstack.h b/arch/sparc/kernel/kstack.h
new file mode 100644
index 000000000000..4248d969272f
--- /dev/null
+++ b/arch/sparc/kernel/kstack.h
@@ -0,0 +1,60 @@
+#ifndef _KSTACK_H
+#define _KSTACK_H
+
+#include <linux/thread_info.h>
+#include <linux/sched.h>
+#include <asm/ptrace.h>
+#include <asm/irq.h>
+
+/* SP must be STACK_BIAS adjusted already. */
+static inline bool kstack_valid(struct thread_info *tp, unsigned long sp)
+{
+ unsigned long base = (unsigned long) tp;
+
+ if (sp >= (base + sizeof(struct thread_info)) &&
+ sp <= (base + THREAD_SIZE - sizeof(struct sparc_stackf)))
+ return true;
+
+ if (hardirq_stack[tp->cpu]) {
+ base = (unsigned long) hardirq_stack[tp->cpu];
+ if (sp >= base &&
+ sp <= (base + THREAD_SIZE - sizeof(struct sparc_stackf)))
+ return true;
+ base = (unsigned long) softirq_stack[tp->cpu];
+ if (sp >= base &&
+ sp <= (base + THREAD_SIZE - sizeof(struct sparc_stackf)))
+ return true;
+ }
+ return false;
+}
+
+/* Does "regs" point to a valid pt_regs trap frame? */
+static inline bool kstack_is_trap_frame(struct thread_info *tp, struct pt_regs *regs)
+{
+ unsigned long base = (unsigned long) tp;
+ unsigned long addr = (unsigned long) regs;
+
+ if (addr >= base &&
+ addr <= (base + THREAD_SIZE - sizeof(*regs)))
+ goto check_magic;
+
+ if (hardirq_stack[tp->cpu]) {
+ base = (unsigned long) hardirq_stack[tp->cpu];
+ if (addr >= base &&
+ addr <= (base + THREAD_SIZE - sizeof(*regs)))
+ goto check_magic;
+ base = (unsigned long) softirq_stack[tp->cpu];
+ if (addr >= base &&
+ addr <= (base + THREAD_SIZE - sizeof(*regs)))
+ goto check_magic;
+ }
+ return false;
+
+check_magic:
+ if ((regs->magic & ~0x1ff) == PT_REGS_MAGIC)
+ return true;
+ return false;
+
+}
+
+#endif /* _KSTACK_H */
diff --git a/arch/sparc/kernel/ktlb.S b/arch/sparc/kernel/ktlb.S
new file mode 100644
index 000000000000..cef8defcd7a9
--- /dev/null
+++ b/arch/sparc/kernel/ktlb.S
@@ -0,0 +1,304 @@
+/* arch/sparc64/kernel/ktlb.S: Kernel mapping TLB miss handling.
+ *
+ * Copyright (C) 1995, 1997, 2005, 2008 David S. Miller <davem@davemloft.net>
+ * Copyright (C) 1996 Eddie C. Dost (ecd@brainaid.de)
+ * Copyright (C) 1996 Miguel de Icaza (miguel@nuclecu.unam.mx)
+ * Copyright (C) 1996,98,99 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ */
+
+#include <asm/head.h>
+#include <asm/asi.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/tsb.h>
+
+ .text
+ .align 32
+
+kvmap_itlb:
+ /* g6: TAG TARGET */
+ mov TLB_TAG_ACCESS, %g4
+ ldxa [%g4] ASI_IMMU, %g4
+
+ /* sun4v_itlb_miss branches here with the missing virtual
+ * address already loaded into %g4
+ */
+kvmap_itlb_4v:
+
+kvmap_itlb_nonlinear:
+ /* Catch kernel NULL pointer calls. */
+ sethi %hi(PAGE_SIZE), %g5
+ cmp %g4, %g5
+ bleu,pn %xcc, kvmap_dtlb_longpath
+ nop
+
+ KERN_TSB_LOOKUP_TL1(%g4, %g6, %g5, %g1, %g2, %g3, kvmap_itlb_load)
+
+kvmap_itlb_tsb_miss:
+ sethi %hi(LOW_OBP_ADDRESS), %g5
+ cmp %g4, %g5
+ blu,pn %xcc, kvmap_itlb_vmalloc_addr
+ mov 0x1, %g5
+ sllx %g5, 32, %g5
+ cmp %g4, %g5
+ blu,pn %xcc, kvmap_itlb_obp
+ nop
+
+kvmap_itlb_vmalloc_addr:
+ KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_itlb_longpath)
+
+ KTSB_LOCK_TAG(%g1, %g2, %g7)
+
+ /* Load and check PTE. */
+ ldxa [%g5] ASI_PHYS_USE_EC, %g5
+ mov 1, %g7
+ sllx %g7, TSB_TAG_INVALID_BIT, %g7
+ brgez,a,pn %g5, kvmap_itlb_longpath
+ KTSB_STORE(%g1, %g7)
+
+ KTSB_WRITE(%g1, %g5, %g6)
+
+ /* fallthrough to TLB load */
+
+kvmap_itlb_load:
+
+661: stxa %g5, [%g0] ASI_ITLB_DATA_IN
+ retry
+ .section .sun4v_2insn_patch, "ax"
+ .word 661b
+ nop
+ nop
+ .previous
+
+ /* For sun4v the ASI_ITLB_DATA_IN store and the retry
+ * instruction get nop'd out and we get here to branch
+ * to the sun4v tlb load code. The registers are setup
+ * as follows:
+ *
+ * %g4: vaddr
+ * %g5: PTE
+ * %g6: TAG
+ *
+ * The sun4v TLB load wants the PTE in %g3 so we fix that
+ * up here.
+ */
+ ba,pt %xcc, sun4v_itlb_load
+ mov %g5, %g3
+
+kvmap_itlb_longpath:
+
+661: rdpr %pstate, %g5
+ wrpr %g5, PSTATE_AG | PSTATE_MG, %pstate
+ .section .sun4v_2insn_patch, "ax"
+ .word 661b
+ SET_GL(1)
+ nop
+ .previous
+
+ rdpr %tpc, %g5
+ ba,pt %xcc, sparc64_realfault_common
+ mov FAULT_CODE_ITLB, %g4
+
+kvmap_itlb_obp:
+ OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_itlb_longpath)
+
+ KTSB_LOCK_TAG(%g1, %g2, %g7)
+
+ KTSB_WRITE(%g1, %g5, %g6)
+
+ ba,pt %xcc, kvmap_itlb_load
+ nop
+
+kvmap_dtlb_obp:
+ OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_dtlb_longpath)
+
+ KTSB_LOCK_TAG(%g1, %g2, %g7)
+
+ KTSB_WRITE(%g1, %g5, %g6)
+
+ ba,pt %xcc, kvmap_dtlb_load
+ nop
+
+ .align 32
+kvmap_dtlb_tsb4m_load:
+ KTSB_LOCK_TAG(%g1, %g2, %g7)
+ KTSB_WRITE(%g1, %g5, %g6)
+ ba,pt %xcc, kvmap_dtlb_load
+ nop
+
+kvmap_dtlb:
+ /* %g6: TAG TARGET */
+ mov TLB_TAG_ACCESS, %g4
+ ldxa [%g4] ASI_DMMU, %g4
+
+ /* sun4v_dtlb_miss branches here with the missing virtual
+ * address already loaded into %g4
+ */
+kvmap_dtlb_4v:
+ brgez,pn %g4, kvmap_dtlb_nonlinear
+ nop
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ /* Index through the base page size TSB even for linear
+ * mappings when using page allocation debugging.
+ */
+ KERN_TSB_LOOKUP_TL1(%g4, %g6, %g5, %g1, %g2, %g3, kvmap_dtlb_load)
+#else
+ /* Correct TAG_TARGET is already in %g6, check 4mb TSB. */
+ KERN_TSB4M_LOOKUP_TL1(%g6, %g5, %g1, %g2, %g3, kvmap_dtlb_load)
+#endif
+ /* TSB entry address left in %g1, lookup linear PTE.
+ * Must preserve %g1 and %g6 (TAG).
+ */
+kvmap_dtlb_tsb4m_miss:
+ sethi %hi(kpte_linear_bitmap), %g2
+ or %g2, %lo(kpte_linear_bitmap), %g2
+
+ /* Clear the PAGE_OFFSET top virtual bits, then shift
+ * down to get a 256MB physical address index.
+ */
+ sllx %g4, 21, %g5
+ mov 1, %g7
+ srlx %g5, 21 + 28, %g5
+
+ /* Don't try this at home kids... this depends upon srlx
+ * only taking the low 6 bits of the shift count in %g5.
+ */
+ sllx %g7, %g5, %g7
+
+ /* Divide by 64 to get the offset into the bitmask. */
+ srlx %g5, 6, %g5
+ sllx %g5, 3, %g5
+
+ /* kern_linear_pte_xor[((mask & bit) ? 1 : 0)] */
+ ldx [%g2 + %g5], %g2
+ andcc %g2, %g7, %g0
+ sethi %hi(kern_linear_pte_xor), %g5
+ or %g5, %lo(kern_linear_pte_xor), %g5
+ bne,a,pt %xcc, 1f
+ add %g5, 8, %g5
+
+1: ldx [%g5], %g2
+
+ .globl kvmap_linear_patch
+kvmap_linear_patch:
+ ba,pt %xcc, kvmap_dtlb_tsb4m_load
+ xor %g2, %g4, %g5
+
+kvmap_dtlb_vmalloc_addr:
+ KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath)
+
+ KTSB_LOCK_TAG(%g1, %g2, %g7)
+
+ /* Load and check PTE. */
+ ldxa [%g5] ASI_PHYS_USE_EC, %g5
+ mov 1, %g7
+ sllx %g7, TSB_TAG_INVALID_BIT, %g7
+ brgez,a,pn %g5, kvmap_dtlb_longpath
+ KTSB_STORE(%g1, %g7)
+
+ KTSB_WRITE(%g1, %g5, %g6)
+
+ /* fallthrough to TLB load */
+
+kvmap_dtlb_load:
+
+661: stxa %g5, [%g0] ASI_DTLB_DATA_IN ! Reload TLB
+ retry
+ .section .sun4v_2insn_patch, "ax"
+ .word 661b
+ nop
+ nop
+ .previous
+
+ /* For sun4v the ASI_DTLB_DATA_IN store and the retry
+ * instruction get nop'd out and we get here to branch
+ * to the sun4v tlb load code. The registers are setup
+ * as follows:
+ *
+ * %g4: vaddr
+ * %g5: PTE
+ * %g6: TAG
+ *
+ * The sun4v TLB load wants the PTE in %g3 so we fix that
+ * up here.
+ */
+ ba,pt %xcc, sun4v_dtlb_load
+ mov %g5, %g3
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+kvmap_vmemmap:
+ sub %g4, %g5, %g5
+ srlx %g5, 22, %g5
+ sethi %hi(vmemmap_table), %g1
+ sllx %g5, 3, %g5
+ or %g1, %lo(vmemmap_table), %g1
+ ba,pt %xcc, kvmap_dtlb_load
+ ldx [%g1 + %g5], %g5
+#endif
+
+kvmap_dtlb_nonlinear:
+ /* Catch kernel NULL pointer derefs. */
+ sethi %hi(PAGE_SIZE), %g5
+ cmp %g4, %g5
+ bleu,pn %xcc, kvmap_dtlb_longpath
+ nop
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ /* Do not use the TSB for vmemmap. */
+ mov (VMEMMAP_BASE >> 24), %g5
+ sllx %g5, 24, %g5
+ cmp %g4,%g5
+ bgeu,pn %xcc, kvmap_vmemmap
+ nop
+#endif
+
+ KERN_TSB_LOOKUP_TL1(%g4, %g6, %g5, %g1, %g2, %g3, kvmap_dtlb_load)
+
+kvmap_dtlb_tsbmiss:
+ sethi %hi(MODULES_VADDR), %g5
+ cmp %g4, %g5
+ blu,pn %xcc, kvmap_dtlb_longpath
+ mov (VMALLOC_END >> 24), %g5
+ sllx %g5, 24, %g5
+ cmp %g4, %g5
+ bgeu,pn %xcc, kvmap_dtlb_longpath
+ nop
+
+kvmap_check_obp:
+ sethi %hi(LOW_OBP_ADDRESS), %g5
+ cmp %g4, %g5
+ blu,pn %xcc, kvmap_dtlb_vmalloc_addr
+ mov 0x1, %g5
+ sllx %g5, 32, %g5
+ cmp %g4, %g5
+ blu,pn %xcc, kvmap_dtlb_obp
+ nop
+ ba,pt %xcc, kvmap_dtlb_vmalloc_addr
+ nop
+
+kvmap_dtlb_longpath:
+
+661: rdpr %pstate, %g5
+ wrpr %g5, PSTATE_AG | PSTATE_MG, %pstate
+ .section .sun4v_2insn_patch, "ax"
+ .word 661b
+ SET_GL(1)
+ ldxa [%g0] ASI_SCRATCHPAD, %g5
+ .previous
+
+ rdpr %tl, %g3
+ cmp %g3, 1
+
+661: mov TLB_TAG_ACCESS, %g4
+ ldxa [%g4] ASI_DMMU, %g5
+ .section .sun4v_2insn_patch, "ax"
+ .word 661b
+ ldx [%g5 + HV_FAULT_D_ADDR_OFFSET], %g5
+ nop
+ .previous
+
+ be,pt %xcc, sparc64_realfault_common
+ mov FAULT_CODE_DTLB, %g4
+ ba,pt %xcc, winfix_trampoline
+ nop
diff --git a/arch/sparc/kernel/ldc.c b/arch/sparc/kernel/ldc.c
new file mode 100644
index 000000000000..d68982330f66
--- /dev/null
+++ b/arch/sparc/kernel/ldc.c
@@ -0,0 +1,2378 @@
+/* ldc.c: Logical Domain Channel link-layer protocol driver.
+ *
+ * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/scatterlist.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/init.h>
+
+#include <asm/hypervisor.h>
+#include <asm/iommu.h>
+#include <asm/page.h>
+#include <asm/ldc.h>
+#include <asm/mdesc.h>
+
+#define DRV_MODULE_NAME "ldc"
+#define PFX DRV_MODULE_NAME ": "
+#define DRV_MODULE_VERSION "1.1"
+#define DRV_MODULE_RELDATE "July 22, 2008"
+
+static char version[] __devinitdata =
+ DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
+#define LDC_PACKET_SIZE 64
+
+/* Packet header layout for unreliable and reliable mode frames.
+ * When in RAW mode, packets are simply straight 64-byte payloads
+ * with no headers.
+ */
+struct ldc_packet {
+ u8 type;
+#define LDC_CTRL 0x01
+#define LDC_DATA 0x02
+#define LDC_ERR 0x10
+
+ u8 stype;
+#define LDC_INFO 0x01
+#define LDC_ACK 0x02
+#define LDC_NACK 0x04
+
+ u8 ctrl;
+#define LDC_VERS 0x01 /* Link Version */
+#define LDC_RTS 0x02 /* Request To Send */
+#define LDC_RTR 0x03 /* Ready To Receive */
+#define LDC_RDX 0x04 /* Ready for Data eXchange */
+#define LDC_CTRL_MSK 0x0f
+
+ u8 env;
+#define LDC_LEN 0x3f
+#define LDC_FRAG_MASK 0xc0
+#define LDC_START 0x40
+#define LDC_STOP 0x80
+
+ u32 seqid;
+
+ union {
+ u8 u_data[LDC_PACKET_SIZE - 8];
+ struct {
+ u32 pad;
+ u32 ackid;
+ u8 r_data[LDC_PACKET_SIZE - 8 - 8];
+ } r;
+ } u;
+};
+
+struct ldc_version {
+ u16 major;
+ u16 minor;
+};
+
+/* Ordered from largest major to lowest. */
+static struct ldc_version ver_arr[] = {
+ { .major = 1, .minor = 0 },
+};
+
+#define LDC_DEFAULT_MTU (4 * LDC_PACKET_SIZE)
+#define LDC_DEFAULT_NUM_ENTRIES (PAGE_SIZE / LDC_PACKET_SIZE)
+
+struct ldc_channel;
+
+struct ldc_mode_ops {
+ int (*write)(struct ldc_channel *, const void *, unsigned int);
+ int (*read)(struct ldc_channel *, void *, unsigned int);
+};
+
+static const struct ldc_mode_ops raw_ops;
+static const struct ldc_mode_ops nonraw_ops;
+static const struct ldc_mode_ops stream_ops;
+
+int ldom_domaining_enabled;
+
+struct ldc_iommu {
+ /* Protects arena alloc/free. */
+ spinlock_t lock;
+ struct iommu_arena arena;
+ struct ldc_mtable_entry *page_table;
+};
+
+struct ldc_channel {
+ /* Protects all operations that depend upon channel state. */
+ spinlock_t lock;
+
+ unsigned long id;
+
+ u8 *mssbuf;
+ u32 mssbuf_len;
+ u32 mssbuf_off;
+
+ struct ldc_packet *tx_base;
+ unsigned long tx_head;
+ unsigned long tx_tail;
+ unsigned long tx_num_entries;
+ unsigned long tx_ra;
+
+ unsigned long tx_acked;
+
+ struct ldc_packet *rx_base;
+ unsigned long rx_head;
+ unsigned long rx_tail;
+ unsigned long rx_num_entries;
+ unsigned long rx_ra;
+
+ u32 rcv_nxt;
+ u32 snd_nxt;
+
+ unsigned long chan_state;
+
+ struct ldc_channel_config cfg;
+ void *event_arg;
+
+ const struct ldc_mode_ops *mops;
+
+ struct ldc_iommu iommu;
+
+ struct ldc_version ver;
+
+ u8 hs_state;
+#define LDC_HS_CLOSED 0x00
+#define LDC_HS_OPEN 0x01
+#define LDC_HS_GOTVERS 0x02
+#define LDC_HS_SENTRTR 0x03
+#define LDC_HS_GOTRTR 0x04
+#define LDC_HS_COMPLETE 0x10
+
+ u8 flags;
+#define LDC_FLAG_ALLOCED_QUEUES 0x01
+#define LDC_FLAG_REGISTERED_QUEUES 0x02
+#define LDC_FLAG_REGISTERED_IRQS 0x04
+#define LDC_FLAG_RESET 0x10
+
+ u8 mss;
+ u8 state;
+
+#define LDC_IRQ_NAME_MAX 32
+ char rx_irq_name[LDC_IRQ_NAME_MAX];
+ char tx_irq_name[LDC_IRQ_NAME_MAX];
+
+ struct hlist_head mh_list;
+
+ struct hlist_node list;
+};
+
+#define ldcdbg(TYPE, f, a...) \
+do { if (lp->cfg.debug & LDC_DEBUG_##TYPE) \
+ printk(KERN_INFO PFX "ID[%lu] " f, lp->id, ## a); \
+} while (0)
+
+static const char *state_to_str(u8 state)
+{
+ switch (state) {
+ case LDC_STATE_INVALID:
+ return "INVALID";
+ case LDC_STATE_INIT:
+ return "INIT";
+ case LDC_STATE_BOUND:
+ return "BOUND";
+ case LDC_STATE_READY:
+ return "READY";
+ case LDC_STATE_CONNECTED:
+ return "CONNECTED";
+ default:
+ return "<UNKNOWN>";
+ }
+}
+
+static void ldc_set_state(struct ldc_channel *lp, u8 state)
+{
+ ldcdbg(STATE, "STATE (%s) --> (%s)\n",
+ state_to_str(lp->state),
+ state_to_str(state));
+
+ lp->state = state;
+}
+
+static unsigned long __advance(unsigned long off, unsigned long num_entries)
+{
+ off += LDC_PACKET_SIZE;
+ if (off == (num_entries * LDC_PACKET_SIZE))
+ off = 0;
+
+ return off;
+}
+
+static unsigned long rx_advance(struct ldc_channel *lp, unsigned long off)
+{
+ return __advance(off, lp->rx_num_entries);
+}
+
+static unsigned long tx_advance(struct ldc_channel *lp, unsigned long off)
+{
+ return __advance(off, lp->tx_num_entries);
+}
+
+static struct ldc_packet *handshake_get_tx_packet(struct ldc_channel *lp,
+ unsigned long *new_tail)
+{
+ struct ldc_packet *p;
+ unsigned long t;
+
+ t = tx_advance(lp, lp->tx_tail);
+ if (t == lp->tx_head)
+ return NULL;
+
+ *new_tail = t;
+
+ p = lp->tx_base;
+ return p + (lp->tx_tail / LDC_PACKET_SIZE);
+}
+
+/* When we are in reliable or stream mode, have to track the next packet
+ * we haven't gotten an ACK for in the TX queue using tx_acked. We have
+ * to be careful not to stomp over the queue past that point. During
+ * the handshake, we don't have TX data packets pending in the queue
+ * and that's why handshake_get_tx_packet() need not be mindful of
+ * lp->tx_acked.
+ */
+static unsigned long head_for_data(struct ldc_channel *lp)
+{
+ if (lp->cfg.mode == LDC_MODE_STREAM)
+ return lp->tx_acked;
+ return lp->tx_head;
+}
+
+static int tx_has_space_for(struct ldc_channel *lp, unsigned int size)
+{
+ unsigned long limit, tail, new_tail, diff;
+ unsigned int mss;
+
+ limit = head_for_data(lp);
+ tail = lp->tx_tail;
+ new_tail = tx_advance(lp, tail);
+ if (new_tail == limit)
+ return 0;
+
+ if (limit > new_tail)
+ diff = limit - new_tail;
+ else
+ diff = (limit +
+ ((lp->tx_num_entries * LDC_PACKET_SIZE) - new_tail));
+ diff /= LDC_PACKET_SIZE;
+ mss = lp->mss;
+
+ if (diff * mss < size)
+ return 0;
+
+ return 1;
+}
+
+static struct ldc_packet *data_get_tx_packet(struct ldc_channel *lp,
+ unsigned long *new_tail)
+{
+ struct ldc_packet *p;
+ unsigned long h, t;
+
+ h = head_for_data(lp);
+ t = tx_advance(lp, lp->tx_tail);
+ if (t == h)
+ return NULL;
+
+ *new_tail = t;
+
+ p = lp->tx_base;
+ return p + (lp->tx_tail / LDC_PACKET_SIZE);
+}
+
+static int set_tx_tail(struct ldc_channel *lp, unsigned long tail)
+{
+ unsigned long orig_tail = lp->tx_tail;
+ int limit = 1000;
+
+ lp->tx_tail = tail;
+ while (limit-- > 0) {
+ unsigned long err;
+
+ err = sun4v_ldc_tx_set_qtail(lp->id, tail);
+ if (!err)
+ return 0;
+
+ if (err != HV_EWOULDBLOCK) {
+ lp->tx_tail = orig_tail;
+ return -EINVAL;
+ }
+ udelay(1);
+ }
+
+ lp->tx_tail = orig_tail;
+ return -EBUSY;
+}
+
+/* This just updates the head value in the hypervisor using
+ * a polling loop with a timeout. The caller takes care of
+ * upating software state representing the head change, if any.
+ */
+static int __set_rx_head(struct ldc_channel *lp, unsigned long head)
+{
+ int limit = 1000;
+
+ while (limit-- > 0) {
+ unsigned long err;
+
+ err = sun4v_ldc_rx_set_qhead(lp->id, head);
+ if (!err)
+ return 0;
+
+ if (err != HV_EWOULDBLOCK)
+ return -EINVAL;
+
+ udelay(1);
+ }
+
+ return -EBUSY;
+}
+
+static int send_tx_packet(struct ldc_channel *lp,
+ struct ldc_packet *p,
+ unsigned long new_tail)
+{
+ BUG_ON(p != (lp->tx_base + (lp->tx_tail / LDC_PACKET_SIZE)));
+
+ return set_tx_tail(lp, new_tail);
+}
+
+static struct ldc_packet *handshake_compose_ctrl(struct ldc_channel *lp,
+ u8 stype, u8 ctrl,
+ void *data, int dlen,
+ unsigned long *new_tail)
+{
+ struct ldc_packet *p = handshake_get_tx_packet(lp, new_tail);
+
+ if (p) {
+ memset(p, 0, sizeof(*p));
+ p->type = LDC_CTRL;
+ p->stype = stype;
+ p->ctrl = ctrl;
+ if (data)
+ memcpy(p->u.u_data, data, dlen);
+ }
+ return p;
+}
+
+static int start_handshake(struct ldc_channel *lp)
+{
+ struct ldc_packet *p;
+ struct ldc_version *ver;
+ unsigned long new_tail;
+
+ ver = &ver_arr[0];
+
+ ldcdbg(HS, "SEND VER INFO maj[%u] min[%u]\n",
+ ver->major, ver->minor);
+
+ p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
+ ver, sizeof(*ver), &new_tail);
+ if (p) {
+ int err = send_tx_packet(lp, p, new_tail);
+ if (!err)
+ lp->flags &= ~LDC_FLAG_RESET;
+ return err;
+ }
+ return -EBUSY;
+}
+
+static int send_version_nack(struct ldc_channel *lp,
+ u16 major, u16 minor)
+{
+ struct ldc_packet *p;
+ struct ldc_version ver;
+ unsigned long new_tail;
+
+ ver.major = major;
+ ver.minor = minor;
+
+ p = handshake_compose_ctrl(lp, LDC_NACK, LDC_VERS,
+ &ver, sizeof(ver), &new_tail);
+ if (p) {
+ ldcdbg(HS, "SEND VER NACK maj[%u] min[%u]\n",
+ ver.major, ver.minor);
+
+ return send_tx_packet(lp, p, new_tail);
+ }
+ return -EBUSY;
+}
+
+static int send_version_ack(struct ldc_channel *lp,
+ struct ldc_version *vp)
+{
+ struct ldc_packet *p;
+ unsigned long new_tail;
+
+ p = handshake_compose_ctrl(lp, LDC_ACK, LDC_VERS,
+ vp, sizeof(*vp), &new_tail);
+ if (p) {
+ ldcdbg(HS, "SEND VER ACK maj[%u] min[%u]\n",
+ vp->major, vp->minor);
+
+ return send_tx_packet(lp, p, new_tail);
+ }
+ return -EBUSY;
+}
+
+static int send_rts(struct ldc_channel *lp)
+{
+ struct ldc_packet *p;
+ unsigned long new_tail;
+
+ p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTS, NULL, 0,
+ &new_tail);
+ if (p) {
+ p->env = lp->cfg.mode;
+ p->seqid = 0;
+ lp->rcv_nxt = 0;
+
+ ldcdbg(HS, "SEND RTS env[0x%x] seqid[0x%x]\n",
+ p->env, p->seqid);
+
+ return send_tx_packet(lp, p, new_tail);
+ }
+ return -EBUSY;
+}
+
+static int send_rtr(struct ldc_channel *lp)
+{
+ struct ldc_packet *p;
+ unsigned long new_tail;
+
+ p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTR, NULL, 0,
+ &new_tail);
+ if (p) {
+ p->env = lp->cfg.mode;
+ p->seqid = 0;
+
+ ldcdbg(HS, "SEND RTR env[0x%x] seqid[0x%x]\n",
+ p->env, p->seqid);
+
+ return send_tx_packet(lp, p, new_tail);
+ }
+ return -EBUSY;
+}
+
+static int send_rdx(struct ldc_channel *lp)
+{
+ struct ldc_packet *p;
+ unsigned long new_tail;
+
+ p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RDX, NULL, 0,
+ &new_tail);
+ if (p) {
+ p->env = 0;
+ p->seqid = ++lp->snd_nxt;
+ p->u.r.ackid = lp->rcv_nxt;
+
+ ldcdbg(HS, "SEND RDX env[0x%x] seqid[0x%x] ackid[0x%x]\n",
+ p->env, p->seqid, p->u.r.ackid);
+
+ return send_tx_packet(lp, p, new_tail);
+ }
+ return -EBUSY;
+}
+
+static int send_data_nack(struct ldc_channel *lp, struct ldc_packet *data_pkt)
+{
+ struct ldc_packet *p;
+ unsigned long new_tail;
+ int err;
+
+ p = data_get_tx_packet(lp, &new_tail);
+ if (!p)
+ return -EBUSY;
+ memset(p, 0, sizeof(*p));
+ p->type = data_pkt->type;
+ p->stype = LDC_NACK;
+ p->ctrl = data_pkt->ctrl & LDC_CTRL_MSK;
+ p->seqid = lp->snd_nxt + 1;
+ p->u.r.ackid = lp->rcv_nxt;
+
+ ldcdbg(HS, "SEND DATA NACK type[0x%x] ctl[0x%x] seq[0x%x] ack[0x%x]\n",
+ p->type, p->ctrl, p->seqid, p->u.r.ackid);
+
+ err = send_tx_packet(lp, p, new_tail);
+ if (!err)
+ lp->snd_nxt++;
+
+ return err;
+}
+
+static int ldc_abort(struct ldc_channel *lp)
+{
+ unsigned long hv_err;
+
+ ldcdbg(STATE, "ABORT\n");
+
+ /* We report but do not act upon the hypervisor errors because
+ * there really isn't much we can do if they fail at this point.
+ */
+ hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
+ if (hv_err)
+ printk(KERN_ERR PFX "ldc_abort: "
+ "sun4v_ldc_tx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
+ lp->id, lp->tx_ra, lp->tx_num_entries, hv_err);
+
+ hv_err = sun4v_ldc_tx_get_state(lp->id,
+ &lp->tx_head,
+ &lp->tx_tail,
+ &lp->chan_state);
+ if (hv_err)
+ printk(KERN_ERR PFX "ldc_abort: "
+ "sun4v_ldc_tx_get_state(%lx,...) failed, err=%lu\n",
+ lp->id, hv_err);
+
+ hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
+ if (hv_err)
+ printk(KERN_ERR PFX "ldc_abort: "
+ "sun4v_ldc_rx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
+ lp->id, lp->rx_ra, lp->rx_num_entries, hv_err);
+
+ /* Refetch the RX queue state as well, because we could be invoked
+ * here in the queue processing context.
+ */
+ hv_err = sun4v_ldc_rx_get_state(lp->id,
+ &lp->rx_head,
+ &lp->rx_tail,
+ &lp->chan_state);
+ if (hv_err)
+ printk(KERN_ERR PFX "ldc_abort: "
+ "sun4v_ldc_rx_get_state(%lx,...) failed, err=%lu\n",
+ lp->id, hv_err);
+
+ return -ECONNRESET;
+}
+
+static struct ldc_version *find_by_major(u16 major)
+{
+ struct ldc_version *ret = NULL;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ver_arr); i++) {
+ struct ldc_version *v = &ver_arr[i];
+ if (v->major <= major) {
+ ret = v;
+ break;
+ }
+ }
+ return ret;
+}
+
+static int process_ver_info(struct ldc_channel *lp, struct ldc_version *vp)
+{
+ struct ldc_version *vap;
+ int err;
+
+ ldcdbg(HS, "GOT VERSION INFO major[%x] minor[%x]\n",
+ vp->major, vp->minor);
+
+ if (lp->hs_state == LDC_HS_GOTVERS) {
+ lp->hs_state = LDC_HS_OPEN;
+ memset(&lp->ver, 0, sizeof(lp->ver));
+ }
+
+ vap = find_by_major(vp->major);
+ if (!vap) {
+ err = send_version_nack(lp, 0, 0);
+ } else if (vap->major != vp->major) {
+ err = send_version_nack(lp, vap->major, vap->minor);
+ } else {
+ struct ldc_version ver = *vp;
+ if (ver.minor > vap->minor)
+ ver.minor = vap->minor;
+ err = send_version_ack(lp, &ver);
+ if (!err) {
+ lp->ver = ver;
+ lp->hs_state = LDC_HS_GOTVERS;
+ }
+ }
+ if (err)
+ return ldc_abort(lp);
+
+ return 0;
+}
+
+static int process_ver_ack(struct ldc_channel *lp, struct ldc_version *vp)
+{
+ ldcdbg(HS, "GOT VERSION ACK major[%x] minor[%x]\n",
+ vp->major, vp->minor);
+
+ if (lp->hs_state == LDC_HS_GOTVERS) {
+ if (lp->ver.major != vp->major ||
+ lp->ver.minor != vp->minor)
+ return ldc_abort(lp);
+ } else {
+ lp->ver = *vp;
+ lp->hs_state = LDC_HS_GOTVERS;
+ }
+ if (send_rts(lp))
+ return ldc_abort(lp);
+ return 0;
+}
+
+static int process_ver_nack(struct ldc_channel *lp, struct ldc_version *vp)
+{
+ struct ldc_version *vap;
+
+ if ((vp->major == 0 && vp->minor == 0) ||
+ !(vap = find_by_major(vp->major))) {
+ return ldc_abort(lp);
+ } else {
+ struct ldc_packet *p;
+ unsigned long new_tail;
+
+ p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
+ vap, sizeof(*vap),
+ &new_tail);
+ if (p)
+ return send_tx_packet(lp, p, new_tail);
+ else
+ return ldc_abort(lp);
+ }
+}
+
+static int process_version(struct ldc_channel *lp,
+ struct ldc_packet *p)
+{
+ struct ldc_version *vp;
+
+ vp = (struct ldc_version *) p->u.u_data;
+
+ switch (p->stype) {
+ case LDC_INFO:
+ return process_ver_info(lp, vp);
+
+ case LDC_ACK:
+ return process_ver_ack(lp, vp);
+
+ case LDC_NACK:
+ return process_ver_nack(lp, vp);
+
+ default:
+ return ldc_abort(lp);
+ }
+}
+
+static int process_rts(struct ldc_channel *lp,
+ struct ldc_packet *p)
+{
+ ldcdbg(HS, "GOT RTS stype[%x] seqid[%x] env[%x]\n",
+ p->stype, p->seqid, p->env);
+
+ if (p->stype != LDC_INFO ||
+ lp->hs_state != LDC_HS_GOTVERS ||
+ p->env != lp->cfg.mode)
+ return ldc_abort(lp);
+
+ lp->snd_nxt = p->seqid;
+ lp->rcv_nxt = p->seqid;
+ lp->hs_state = LDC_HS_SENTRTR;
+ if (send_rtr(lp))
+ return ldc_abort(lp);
+
+ return 0;
+}
+
+static int process_rtr(struct ldc_channel *lp,
+ struct ldc_packet *p)
+{
+ ldcdbg(HS, "GOT RTR stype[%x] seqid[%x] env[%x]\n",
+ p->stype, p->seqid, p->env);
+
+ if (p->stype != LDC_INFO ||
+ p->env != lp->cfg.mode)
+ return ldc_abort(lp);
+
+ lp->snd_nxt = p->seqid;
+ lp->hs_state = LDC_HS_COMPLETE;
+ ldc_set_state(lp, LDC_STATE_CONNECTED);
+ send_rdx(lp);
+
+ return LDC_EVENT_UP;
+}
+
+static int rx_seq_ok(struct ldc_channel *lp, u32 seqid)
+{
+ return lp->rcv_nxt + 1 == seqid;
+}
+
+static int process_rdx(struct ldc_channel *lp,
+ struct ldc_packet *p)
+{
+ ldcdbg(HS, "GOT RDX stype[%x] seqid[%x] env[%x] ackid[%x]\n",
+ p->stype, p->seqid, p->env, p->u.r.ackid);
+
+ if (p->stype != LDC_INFO ||
+ !(rx_seq_ok(lp, p->seqid)))
+ return ldc_abort(lp);
+
+ lp->rcv_nxt = p->seqid;
+
+ lp->hs_state = LDC_HS_COMPLETE;
+ ldc_set_state(lp, LDC_STATE_CONNECTED);
+
+ return LDC_EVENT_UP;
+}
+
+static int process_control_frame(struct ldc_channel *lp,
+ struct ldc_packet *p)
+{
+ switch (p->ctrl) {
+ case LDC_VERS:
+ return process_version(lp, p);
+
+ case LDC_RTS:
+ return process_rts(lp, p);
+
+ case LDC_RTR:
+ return process_rtr(lp, p);
+
+ case LDC_RDX:
+ return process_rdx(lp, p);
+
+ default:
+ return ldc_abort(lp);
+ }
+}
+
+static int process_error_frame(struct ldc_channel *lp,
+ struct ldc_packet *p)
+{
+ return ldc_abort(lp);
+}
+
+static int process_data_ack(struct ldc_channel *lp,
+ struct ldc_packet *ack)
+{
+ unsigned long head = lp->tx_acked;
+ u32 ackid = ack->u.r.ackid;
+
+ while (1) {
+ struct ldc_packet *p = lp->tx_base + (head / LDC_PACKET_SIZE);
+
+ head = tx_advance(lp, head);
+
+ if (p->seqid == ackid) {
+ lp->tx_acked = head;
+ return 0;
+ }
+ if (head == lp->tx_tail)
+ return ldc_abort(lp);
+ }
+
+ return 0;
+}
+
+static void send_events(struct ldc_channel *lp, unsigned int event_mask)
+{
+ if (event_mask & LDC_EVENT_RESET)
+ lp->cfg.event(lp->event_arg, LDC_EVENT_RESET);
+ if (event_mask & LDC_EVENT_UP)
+ lp->cfg.event(lp->event_arg, LDC_EVENT_UP);
+ if (event_mask & LDC_EVENT_DATA_READY)
+ lp->cfg.event(lp->event_arg, LDC_EVENT_DATA_READY);
+}
+
+static irqreturn_t ldc_rx(int irq, void *dev_id)
+{
+ struct ldc_channel *lp = dev_id;
+ unsigned long orig_state, hv_err, flags;
+ unsigned int event_mask;
+
+ spin_lock_irqsave(&lp->lock, flags);
+
+ orig_state = lp->chan_state;
+ hv_err = sun4v_ldc_rx_get_state(lp->id,
+ &lp->rx_head,
+ &lp->rx_tail,
+ &lp->chan_state);
+
+ ldcdbg(RX, "RX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
+ orig_state, lp->chan_state, lp->rx_head, lp->rx_tail);
+
+ event_mask = 0;
+
+ if (lp->cfg.mode == LDC_MODE_RAW &&
+ lp->chan_state == LDC_CHANNEL_UP) {
+ lp->hs_state = LDC_HS_COMPLETE;
+ ldc_set_state(lp, LDC_STATE_CONNECTED);
+
+ event_mask |= LDC_EVENT_UP;
+
+ orig_state = lp->chan_state;
+ }
+
+ /* If we are in reset state, flush the RX queue and ignore
+ * everything.
+ */
+ if (lp->flags & LDC_FLAG_RESET) {
+ (void) __set_rx_head(lp, lp->rx_tail);
+ goto out;
+ }
+
+ /* Once we finish the handshake, we let the ldc_read()
+ * paths do all of the control frame and state management.
+ * Just trigger the callback.
+ */
+ if (lp->hs_state == LDC_HS_COMPLETE) {
+handshake_complete:
+ if (lp->chan_state != orig_state) {
+ unsigned int event = LDC_EVENT_RESET;
+
+ if (lp->chan_state == LDC_CHANNEL_UP)
+ event = LDC_EVENT_UP;
+
+ event_mask |= event;
+ }
+ if (lp->rx_head != lp->rx_tail)
+ event_mask |= LDC_EVENT_DATA_READY;
+
+ goto out;
+ }
+
+ if (lp->chan_state != orig_state)
+ goto out;
+
+ while (lp->rx_head != lp->rx_tail) {
+ struct ldc_packet *p;
+ unsigned long new;
+ int err;
+
+ p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
+
+ switch (p->type) {
+ case LDC_CTRL:
+ err = process_control_frame(lp, p);
+ if (err > 0)
+ event_mask |= err;
+ break;
+
+ case LDC_DATA:
+ event_mask |= LDC_EVENT_DATA_READY;
+ err = 0;
+ break;
+
+ case LDC_ERR:
+ err = process_error_frame(lp, p);
+ break;
+
+ default:
+ err = ldc_abort(lp);
+ break;
+ }
+
+ if (err < 0)
+ break;
+
+ new = lp->rx_head;
+ new += LDC_PACKET_SIZE;
+ if (new == (lp->rx_num_entries * LDC_PACKET_SIZE))
+ new = 0;
+ lp->rx_head = new;
+
+ err = __set_rx_head(lp, new);
+ if (err < 0) {
+ (void) ldc_abort(lp);
+ break;
+ }
+ if (lp->hs_state == LDC_HS_COMPLETE)
+ goto handshake_complete;
+ }
+
+out:
+ spin_unlock_irqrestore(&lp->lock, flags);
+
+ send_events(lp, event_mask);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t ldc_tx(int irq, void *dev_id)
+{
+ struct ldc_channel *lp = dev_id;
+ unsigned long flags, hv_err, orig_state;
+ unsigned int event_mask = 0;
+
+ spin_lock_irqsave(&lp->lock, flags);
+
+ orig_state = lp->chan_state;
+ hv_err = sun4v_ldc_tx_get_state(lp->id,
+ &lp->tx_head,
+ &lp->tx_tail,
+ &lp->chan_state);
+
+ ldcdbg(TX, " TX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
+ orig_state, lp->chan_state, lp->tx_head, lp->tx_tail);
+
+ if (lp->cfg.mode == LDC_MODE_RAW &&
+ lp->chan_state == LDC_CHANNEL_UP) {
+ lp->hs_state = LDC_HS_COMPLETE;
+ ldc_set_state(lp, LDC_STATE_CONNECTED);
+
+ event_mask |= LDC_EVENT_UP;
+ }
+
+ spin_unlock_irqrestore(&lp->lock, flags);
+
+ send_events(lp, event_mask);
+
+ return IRQ_HANDLED;
+}
+
+/* XXX ldc_alloc() and ldc_free() needs to run under a mutex so
+ * XXX that addition and removal from the ldc_channel_list has
+ * XXX atomicity, otherwise the __ldc_channel_exists() check is
+ * XXX totally pointless as another thread can slip into ldc_alloc()
+ * XXX and add a channel with the same ID. There also needs to be
+ * XXX a spinlock for ldc_channel_list.
+ */
+static HLIST_HEAD(ldc_channel_list);
+
+static int __ldc_channel_exists(unsigned long id)
+{
+ struct ldc_channel *lp;
+ struct hlist_node *n;
+
+ hlist_for_each_entry(lp, n, &ldc_channel_list, list) {
+ if (lp->id == id)
+ return 1;
+ }
+ return 0;
+}
+
+static int alloc_queue(const char *name, unsigned long num_entries,
+ struct ldc_packet **base, unsigned long *ra)
+{
+ unsigned long size, order;
+ void *q;
+
+ size = num_entries * LDC_PACKET_SIZE;
+ order = get_order(size);
+
+ q = (void *) __get_free_pages(GFP_KERNEL, order);
+ if (!q) {
+ printk(KERN_ERR PFX "Alloc of %s queue failed with "
+ "size=%lu order=%lu\n", name, size, order);
+ return -ENOMEM;
+ }
+
+ memset(q, 0, PAGE_SIZE << order);
+
+ *base = q;
+ *ra = __pa(q);
+
+ return 0;
+}
+
+static void free_queue(unsigned long num_entries, struct ldc_packet *q)
+{
+ unsigned long size, order;
+
+ if (!q)
+ return;
+
+ size = num_entries * LDC_PACKET_SIZE;
+ order = get_order(size);
+
+ free_pages((unsigned long)q, order);
+}
+
+/* XXX Make this configurable... XXX */
+#define LDC_IOTABLE_SIZE (8 * 1024)
+
+static int ldc_iommu_init(struct ldc_channel *lp)
+{
+ unsigned long sz, num_tsb_entries, tsbsize, order;
+ struct ldc_iommu *iommu = &lp->iommu;
+ struct ldc_mtable_entry *table;
+ unsigned long hv_err;
+ int err;
+
+ num_tsb_entries = LDC_IOTABLE_SIZE;
+ tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
+
+ spin_lock_init(&iommu->lock);
+
+ sz = num_tsb_entries / 8;
+ sz = (sz + 7UL) & ~7UL;
+ iommu->arena.map = kzalloc(sz, GFP_KERNEL);
+ if (!iommu->arena.map) {
+ printk(KERN_ERR PFX "Alloc of arena map failed, sz=%lu\n", sz);
+ return -ENOMEM;
+ }
+
+ iommu->arena.limit = num_tsb_entries;
+
+ order = get_order(tsbsize);
+
+ table = (struct ldc_mtable_entry *)
+ __get_free_pages(GFP_KERNEL, order);
+ err = -ENOMEM;
+ if (!table) {
+ printk(KERN_ERR PFX "Alloc of MTE table failed, "
+ "size=%lu order=%lu\n", tsbsize, order);
+ goto out_free_map;
+ }
+
+ memset(table, 0, PAGE_SIZE << order);
+
+ iommu->page_table = table;
+
+ hv_err = sun4v_ldc_set_map_table(lp->id, __pa(table),
+ num_tsb_entries);
+ err = -EINVAL;
+ if (hv_err)
+ goto out_free_table;
+
+ return 0;
+
+out_free_table:
+ free_pages((unsigned long) table, order);
+ iommu->page_table = NULL;
+
+out_free_map:
+ kfree(iommu->arena.map);
+ iommu->arena.map = NULL;
+
+ return err;
+}
+
+static void ldc_iommu_release(struct ldc_channel *lp)
+{
+ struct ldc_iommu *iommu = &lp->iommu;
+ unsigned long num_tsb_entries, tsbsize, order;
+
+ (void) sun4v_ldc_set_map_table(lp->id, 0, 0);
+
+ num_tsb_entries = iommu->arena.limit;
+ tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
+ order = get_order(tsbsize);
+
+ free_pages((unsigned long) iommu->page_table, order);
+ iommu->page_table = NULL;
+
+ kfree(iommu->arena.map);
+ iommu->arena.map = NULL;
+}
+
+struct ldc_channel *ldc_alloc(unsigned long id,
+ const struct ldc_channel_config *cfgp,
+ void *event_arg)
+{
+ struct ldc_channel *lp;
+ const struct ldc_mode_ops *mops;
+ unsigned long dummy1, dummy2, hv_err;
+ u8 mss, *mssbuf;
+ int err;
+
+ err = -ENODEV;
+ if (!ldom_domaining_enabled)
+ goto out_err;
+
+ err = -EINVAL;
+ if (!cfgp)
+ goto out_err;
+
+ switch (cfgp->mode) {
+ case LDC_MODE_RAW:
+ mops = &raw_ops;
+ mss = LDC_PACKET_SIZE;
+ break;
+
+ case LDC_MODE_UNRELIABLE:
+ mops = &nonraw_ops;
+ mss = LDC_PACKET_SIZE - 8;
+ break;
+
+ case LDC_MODE_STREAM:
+ mops = &stream_ops;
+ mss = LDC_PACKET_SIZE - 8 - 8;
+ break;
+
+ default:
+ goto out_err;
+ }
+
+ if (!cfgp->event || !event_arg || !cfgp->rx_irq || !cfgp->tx_irq)
+ goto out_err;
+
+ hv_err = sun4v_ldc_tx_qinfo(id, &dummy1, &dummy2);
+ err = -ENODEV;
+ if (hv_err == HV_ECHANNEL)
+ goto out_err;
+
+ err = -EEXIST;
+ if (__ldc_channel_exists(id))
+ goto out_err;
+
+ mssbuf = NULL;
+
+ lp = kzalloc(sizeof(*lp), GFP_KERNEL);
+ err = -ENOMEM;
+ if (!lp)
+ goto out_err;
+
+ spin_lock_init(&lp->lock);
+
+ lp->id = id;
+
+ err = ldc_iommu_init(lp);
+ if (err)
+ goto out_free_ldc;
+
+ lp->mops = mops;
+ lp->mss = mss;
+
+ lp->cfg = *cfgp;
+ if (!lp->cfg.mtu)
+ lp->cfg.mtu = LDC_DEFAULT_MTU;
+
+ if (lp->cfg.mode == LDC_MODE_STREAM) {
+ mssbuf = kzalloc(lp->cfg.mtu, GFP_KERNEL);
+ if (!mssbuf) {
+ err = -ENOMEM;
+ goto out_free_iommu;
+ }
+ lp->mssbuf = mssbuf;
+ }
+
+ lp->event_arg = event_arg;
+
+ /* XXX allow setting via ldc_channel_config to override defaults
+ * XXX or use some formula based upon mtu
+ */
+ lp->tx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
+ lp->rx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
+
+ err = alloc_queue("TX", lp->tx_num_entries,
+ &lp->tx_base, &lp->tx_ra);
+ if (err)
+ goto out_free_mssbuf;
+
+ err = alloc_queue("RX", lp->rx_num_entries,
+ &lp->rx_base, &lp->rx_ra);
+ if (err)
+ goto out_free_txq;
+
+ lp->flags |= LDC_FLAG_ALLOCED_QUEUES;
+
+ lp->hs_state = LDC_HS_CLOSED;
+ ldc_set_state(lp, LDC_STATE_INIT);
+
+ INIT_HLIST_NODE(&lp->list);
+ hlist_add_head(&lp->list, &ldc_channel_list);
+
+ INIT_HLIST_HEAD(&lp->mh_list);
+
+ return lp;
+
+out_free_txq:
+ free_queue(lp->tx_num_entries, lp->tx_base);
+
+out_free_mssbuf:
+ if (mssbuf)
+ kfree(mssbuf);
+
+out_free_iommu:
+ ldc_iommu_release(lp);
+
+out_free_ldc:
+ kfree(lp);
+
+out_err:
+ return ERR_PTR(err);
+}
+EXPORT_SYMBOL(ldc_alloc);
+
+void ldc_free(struct ldc_channel *lp)
+{
+ if (lp->flags & LDC_FLAG_REGISTERED_IRQS) {
+ free_irq(lp->cfg.rx_irq, lp);
+ free_irq(lp->cfg.tx_irq, lp);
+ }
+
+ if (lp->flags & LDC_FLAG_REGISTERED_QUEUES) {
+ sun4v_ldc_tx_qconf(lp->id, 0, 0);
+ sun4v_ldc_rx_qconf(lp->id, 0, 0);
+ lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
+ }
+ if (lp->flags & LDC_FLAG_ALLOCED_QUEUES) {
+ free_queue(lp->tx_num_entries, lp->tx_base);
+ free_queue(lp->rx_num_entries, lp->rx_base);
+ lp->flags &= ~LDC_FLAG_ALLOCED_QUEUES;
+ }
+
+ hlist_del(&lp->list);
+
+ if (lp->mssbuf)
+ kfree(lp->mssbuf);
+
+ ldc_iommu_release(lp);
+
+ kfree(lp);
+}
+EXPORT_SYMBOL(ldc_free);
+
+/* Bind the channel. This registers the LDC queues with
+ * the hypervisor and puts the channel into a pseudo-listening
+ * state. This does not initiate a handshake, ldc_connect() does
+ * that.
+ */
+int ldc_bind(struct ldc_channel *lp, const char *name)
+{
+ unsigned long hv_err, flags;
+ int err = -EINVAL;
+
+ if (!name ||
+ (lp->state != LDC_STATE_INIT))
+ return -EINVAL;
+
+ snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name);
+ snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name);
+
+ err = request_irq(lp->cfg.rx_irq, ldc_rx,
+ IRQF_SAMPLE_RANDOM | IRQF_SHARED,
+ lp->rx_irq_name, lp);
+ if (err)
+ return err;
+
+ err = request_irq(lp->cfg.tx_irq, ldc_tx,
+ IRQF_SAMPLE_RANDOM | IRQF_SHARED,
+ lp->tx_irq_name, lp);
+ if (err) {
+ free_irq(lp->cfg.rx_irq, lp);
+ return err;
+ }
+
+
+ spin_lock_irqsave(&lp->lock, flags);
+
+ enable_irq(lp->cfg.rx_irq);
+ enable_irq(lp->cfg.tx_irq);
+
+ lp->flags |= LDC_FLAG_REGISTERED_IRQS;
+
+ err = -ENODEV;
+ hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
+ if (hv_err)
+ goto out_free_irqs;
+
+ hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
+ if (hv_err)
+ goto out_free_irqs;
+
+ hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
+ if (hv_err)
+ goto out_unmap_tx;
+
+ hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
+ if (hv_err)
+ goto out_unmap_tx;
+
+ lp->flags |= LDC_FLAG_REGISTERED_QUEUES;
+
+ hv_err = sun4v_ldc_tx_get_state(lp->id,
+ &lp->tx_head,
+ &lp->tx_tail,
+ &lp->chan_state);
+ err = -EBUSY;
+ if (hv_err)
+ goto out_unmap_rx;
+
+ lp->tx_acked = lp->tx_head;
+
+ lp->hs_state = LDC_HS_OPEN;
+ ldc_set_state(lp, LDC_STATE_BOUND);
+
+ spin_unlock_irqrestore(&lp->lock, flags);
+
+ return 0;
+
+out_unmap_rx:
+ lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
+ sun4v_ldc_rx_qconf(lp->id, 0, 0);
+
+out_unmap_tx:
+ sun4v_ldc_tx_qconf(lp->id, 0, 0);
+
+out_free_irqs:
+ lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
+ free_irq(lp->cfg.tx_irq, lp);
+ free_irq(lp->cfg.rx_irq, lp);
+
+ spin_unlock_irqrestore(&lp->lock, flags);
+
+ return err;
+}
+EXPORT_SYMBOL(ldc_bind);
+
+int ldc_connect(struct ldc_channel *lp)
+{
+ unsigned long flags;
+ int err;
+
+ if (lp->cfg.mode == LDC_MODE_RAW)
+ return -EINVAL;
+
+ spin_lock_irqsave(&lp->lock, flags);
+
+ if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
+ !(lp->flags & LDC_FLAG_REGISTERED_QUEUES) ||
+ lp->hs_state != LDC_HS_OPEN)
+ err = -EINVAL;
+ else
+ err = start_handshake(lp);
+
+ spin_unlock_irqrestore(&lp->lock, flags);
+
+ return err;
+}
+EXPORT_SYMBOL(ldc_connect);
+
+int ldc_disconnect(struct ldc_channel *lp)
+{
+ unsigned long hv_err, flags;
+ int err;
+
+ if (lp->cfg.mode == LDC_MODE_RAW)
+ return -EINVAL;
+
+ if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
+ !(lp->flags & LDC_FLAG_REGISTERED_QUEUES))
+ return -EINVAL;
+
+ spin_lock_irqsave(&lp->lock, flags);
+
+ err = -ENODEV;
+ hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
+ if (hv_err)
+ goto out_err;
+
+ hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
+ if (hv_err)
+ goto out_err;
+
+ hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
+ if (hv_err)
+ goto out_err;
+
+ hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
+ if (hv_err)
+ goto out_err;
+
+ ldc_set_state(lp, LDC_STATE_BOUND);
+ lp->hs_state = LDC_HS_OPEN;
+ lp->flags |= LDC_FLAG_RESET;
+
+ spin_unlock_irqrestore(&lp->lock, flags);
+
+ return 0;
+
+out_err:
+ sun4v_ldc_tx_qconf(lp->id, 0, 0);
+ sun4v_ldc_rx_qconf(lp->id, 0, 0);
+ free_irq(lp->cfg.tx_irq, lp);
+ free_irq(lp->cfg.rx_irq, lp);
+ lp->flags &= ~(LDC_FLAG_REGISTERED_IRQS |
+ LDC_FLAG_REGISTERED_QUEUES);
+ ldc_set_state(lp, LDC_STATE_INIT);
+
+ spin_unlock_irqrestore(&lp->lock, flags);
+
+ return err;
+}
+EXPORT_SYMBOL(ldc_disconnect);
+
+int ldc_state(struct ldc_channel *lp)
+{
+ return lp->state;
+}
+EXPORT_SYMBOL(ldc_state);
+
+static int write_raw(struct ldc_channel *lp, const void *buf, unsigned int size)
+{
+ struct ldc_packet *p;
+ unsigned long new_tail;
+ int err;
+
+ if (size > LDC_PACKET_SIZE)
+ return -EMSGSIZE;
+
+ p = data_get_tx_packet(lp, &new_tail);
+ if (!p)
+ return -EAGAIN;
+
+ memcpy(p, buf, size);
+
+ err = send_tx_packet(lp, p, new_tail);
+ if (!err)
+ err = size;
+
+ return err;
+}
+
+static int read_raw(struct ldc_channel *lp, void *buf, unsigned int size)
+{
+ struct ldc_packet *p;
+ unsigned long hv_err, new;
+ int err;
+
+ if (size < LDC_PACKET_SIZE)
+ return -EINVAL;
+
+ hv_err = sun4v_ldc_rx_get_state(lp->id,
+ &lp->rx_head,
+ &lp->rx_tail,
+ &lp->chan_state);
+ if (hv_err)
+ return ldc_abort(lp);
+
+ if (lp->chan_state == LDC_CHANNEL_DOWN ||
+ lp->chan_state == LDC_CHANNEL_RESETTING)
+ return -ECONNRESET;
+
+ if (lp->rx_head == lp->rx_tail)
+ return 0;
+
+ p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
+ memcpy(buf, p, LDC_PACKET_SIZE);
+
+ new = rx_advance(lp, lp->rx_head);
+ lp->rx_head = new;
+
+ err = __set_rx_head(lp, new);
+ if (err < 0)
+ err = -ECONNRESET;
+ else
+ err = LDC_PACKET_SIZE;
+
+ return err;
+}
+
+static const struct ldc_mode_ops raw_ops = {
+ .write = write_raw,
+ .read = read_raw,
+};
+
+static int write_nonraw(struct ldc_channel *lp, const void *buf,
+ unsigned int size)
+{
+ unsigned long hv_err, tail;
+ unsigned int copied;
+ u32 seq;
+ int err;
+
+ hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail,
+ &lp->chan_state);
+ if (unlikely(hv_err))
+ return -EBUSY;
+
+ if (unlikely(lp->chan_state != LDC_CHANNEL_UP))
+ return ldc_abort(lp);
+
+ if (!tx_has_space_for(lp, size))
+ return -EAGAIN;
+
+ seq = lp->snd_nxt;
+ copied = 0;
+ tail = lp->tx_tail;
+ while (copied < size) {
+ struct ldc_packet *p = lp->tx_base + (tail / LDC_PACKET_SIZE);
+ u8 *data = ((lp->cfg.mode == LDC_MODE_UNRELIABLE) ?
+ p->u.u_data :
+ p->u.r.r_data);
+ int data_len;
+
+ p->type = LDC_DATA;
+ p->stype = LDC_INFO;
+ p->ctrl = 0;
+
+ data_len = size - copied;
+ if (data_len > lp->mss)
+ data_len = lp->mss;
+
+ BUG_ON(data_len > LDC_LEN);
+
+ p->env = (data_len |
+ (copied == 0 ? LDC_START : 0) |
+ (data_len == size - copied ? LDC_STOP : 0));
+
+ p->seqid = ++seq;
+
+ ldcdbg(DATA, "SENT DATA [%02x:%02x:%02x:%02x:%08x]\n",
+ p->type,
+ p->stype,
+ p->ctrl,
+ p->env,
+ p->seqid);
+
+ memcpy(data, buf, data_len);
+ buf += data_len;
+ copied += data_len;
+
+ tail = tx_advance(lp, tail);
+ }
+
+ err = set_tx_tail(lp, tail);
+ if (!err) {
+ lp->snd_nxt = seq;
+ err = size;
+ }
+
+ return err;
+}
+
+static int rx_bad_seq(struct ldc_channel *lp, struct ldc_packet *p,
+ struct ldc_packet *first_frag)
+{
+ int err;
+
+ if (first_frag)
+ lp->rcv_nxt = first_frag->seqid - 1;
+
+ err = send_data_nack(lp, p);
+ if (err)
+ return err;
+
+ err = __set_rx_head(lp, lp->rx_tail);
+ if (err < 0)
+ return ldc_abort(lp);
+
+ return 0;
+}
+
+static int data_ack_nack(struct ldc_channel *lp, struct ldc_packet *p)
+{
+ if (p->stype & LDC_ACK) {
+ int err = process_data_ack(lp, p);
+ if (err)
+ return err;
+ }
+ if (p->stype & LDC_NACK)
+ return ldc_abort(lp);
+
+ return 0;
+}
+
+static int rx_data_wait(struct ldc_channel *lp, unsigned long cur_head)
+{
+ unsigned long dummy;
+ int limit = 1000;
+
+ ldcdbg(DATA, "DATA WAIT cur_head[%lx] rx_head[%lx] rx_tail[%lx]\n",
+ cur_head, lp->rx_head, lp->rx_tail);
+ while (limit-- > 0) {
+ unsigned long hv_err;
+
+ hv_err = sun4v_ldc_rx_get_state(lp->id,
+ &dummy,
+ &lp->rx_tail,
+ &lp->chan_state);
+ if (hv_err)
+ return ldc_abort(lp);
+
+ if (lp->chan_state == LDC_CHANNEL_DOWN ||
+ lp->chan_state == LDC_CHANNEL_RESETTING)
+ return -ECONNRESET;
+
+ if (cur_head != lp->rx_tail) {
+ ldcdbg(DATA, "DATA WAIT DONE "
+ "head[%lx] tail[%lx] chan_state[%lx]\n",
+ dummy, lp->rx_tail, lp->chan_state);
+ return 0;
+ }
+
+ udelay(1);
+ }
+ return -EAGAIN;
+}
+
+static int rx_set_head(struct ldc_channel *lp, unsigned long head)
+{
+ int err = __set_rx_head(lp, head);
+
+ if (err < 0)
+ return ldc_abort(lp);
+
+ lp->rx_head = head;
+ return 0;
+}
+
+static void send_data_ack(struct ldc_channel *lp)
+{
+ unsigned long new_tail;
+ struct ldc_packet *p;
+
+ p = data_get_tx_packet(lp, &new_tail);
+ if (likely(p)) {
+ int err;
+
+ memset(p, 0, sizeof(*p));
+ p->type = LDC_DATA;
+ p->stype = LDC_ACK;
+ p->ctrl = 0;
+ p->seqid = lp->snd_nxt + 1;
+ p->u.r.ackid = lp->rcv_nxt;
+
+ err = send_tx_packet(lp, p, new_tail);
+ if (!err)
+ lp->snd_nxt++;
+ }
+}
+
+static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size)
+{
+ struct ldc_packet *first_frag;
+ unsigned long hv_err, new;
+ int err, copied;
+
+ hv_err = sun4v_ldc_rx_get_state(lp->id,
+ &lp->rx_head,
+ &lp->rx_tail,
+ &lp->chan_state);
+ if (hv_err)
+ return ldc_abort(lp);
+
+ if (lp->chan_state == LDC_CHANNEL_DOWN ||
+ lp->chan_state == LDC_CHANNEL_RESETTING)
+ return -ECONNRESET;
+
+ if (lp->rx_head == lp->rx_tail)
+ return 0;
+
+ first_frag = NULL;
+ copied = err = 0;
+ new = lp->rx_head;
+ while (1) {
+ struct ldc_packet *p;
+ int pkt_len;
+
+ BUG_ON(new == lp->rx_tail);
+ p = lp->rx_base + (new / LDC_PACKET_SIZE);
+
+ ldcdbg(RX, "RX read pkt[%02x:%02x:%02x:%02x:%08x:%08x] "
+ "rcv_nxt[%08x]\n",
+ p->type,
+ p->stype,
+ p->ctrl,
+ p->env,
+ p->seqid,
+ p->u.r.ackid,
+ lp->rcv_nxt);
+
+ if (unlikely(!rx_seq_ok(lp, p->seqid))) {
+ err = rx_bad_seq(lp, p, first_frag);
+ copied = 0;
+ break;
+ }
+
+ if (p->type & LDC_CTRL) {
+ err = process_control_frame(lp, p);
+ if (err < 0)
+ break;
+ err = 0;
+ }
+
+ lp->rcv_nxt = p->seqid;
+
+ if (!(p->type & LDC_DATA)) {
+ new = rx_advance(lp, new);
+ goto no_data;
+ }
+ if (p->stype & (LDC_ACK | LDC_NACK)) {
+ err = data_ack_nack(lp, p);
+ if (err)
+ break;
+ }
+ if (!(p->stype & LDC_INFO)) {
+ new = rx_advance(lp, new);
+ err = rx_set_head(lp, new);
+ if (err)
+ break;
+ goto no_data;
+ }
+
+ pkt_len = p->env & LDC_LEN;
+
+ /* Every initial packet starts with the START bit set.
+ *
+ * Singleton packets will have both START+STOP set.
+ *
+ * Fragments will have START set in the first frame, STOP
+ * set in the last frame, and neither bit set in middle
+ * frames of the packet.
+ *
+ * Therefore if we are at the beginning of a packet and
+ * we don't see START, or we are in the middle of a fragmented
+ * packet and do see START, we are unsynchronized and should
+ * flush the RX queue.
+ */
+ if ((first_frag == NULL && !(p->env & LDC_START)) ||
+ (first_frag != NULL && (p->env & LDC_START))) {
+ if (!first_frag)
+ new = rx_advance(lp, new);
+
+ err = rx_set_head(lp, new);
+ if (err)
+ break;
+
+ if (!first_frag)
+ goto no_data;
+ }
+ if (!first_frag)
+ first_frag = p;
+
+ if (pkt_len > size - copied) {
+ /* User didn't give us a big enough buffer,
+ * what to do? This is a pretty serious error.
+ *
+ * Since we haven't updated the RX ring head to
+ * consume any of the packets, signal the error
+ * to the user and just leave the RX ring alone.
+ *
+ * This seems the best behavior because this allows
+ * a user of the LDC layer to start with a small
+ * RX buffer for ldc_read() calls and use -EMSGSIZE
+ * as a cue to enlarge it's read buffer.
+ */
+ err = -EMSGSIZE;
+ break;
+ }
+
+ /* Ok, we are gonna eat this one. */
+ new = rx_advance(lp, new);
+
+ memcpy(buf,
+ (lp->cfg.mode == LDC_MODE_UNRELIABLE ?
+ p->u.u_data : p->u.r.r_data), pkt_len);
+ buf += pkt_len;
+ copied += pkt_len;
+
+ if (p->env & LDC_STOP)
+ break;
+
+no_data:
+ if (new == lp->rx_tail) {
+ err = rx_data_wait(lp, new);
+ if (err)
+ break;
+ }
+ }
+
+ if (!err)
+ err = rx_set_head(lp, new);
+
+ if (err && first_frag)
+ lp->rcv_nxt = first_frag->seqid - 1;
+
+ if (!err) {
+ err = copied;
+ if (err > 0 && lp->cfg.mode != LDC_MODE_UNRELIABLE)
+ send_data_ack(lp);
+ }
+
+ return err;
+}
+
+static const struct ldc_mode_ops nonraw_ops = {
+ .write = write_nonraw,
+ .read = read_nonraw,
+};
+
+static int write_stream(struct ldc_channel *lp, const void *buf,
+ unsigned int size)
+{
+ if (size > lp->cfg.mtu)
+ size = lp->cfg.mtu;
+ return write_nonraw(lp, buf, size);
+}
+
+static int read_stream(struct ldc_channel *lp, void *buf, unsigned int size)
+{
+ if (!lp->mssbuf_len) {
+ int err = read_nonraw(lp, lp->mssbuf, lp->cfg.mtu);
+ if (err < 0)
+ return err;
+
+ lp->mssbuf_len = err;
+ lp->mssbuf_off = 0;
+ }
+
+ if (size > lp->mssbuf_len)
+ size = lp->mssbuf_len;
+ memcpy(buf, lp->mssbuf + lp->mssbuf_off, size);
+
+ lp->mssbuf_off += size;
+ lp->mssbuf_len -= size;
+
+ return size;
+}
+
+static const struct ldc_mode_ops stream_ops = {
+ .write = write_stream,
+ .read = read_stream,
+};
+
+int ldc_write(struct ldc_channel *lp, const void *buf, unsigned int size)
+{
+ unsigned long flags;
+ int err;
+
+ if (!buf)
+ return -EINVAL;
+
+ if (!size)
+ return 0;
+
+ spin_lock_irqsave(&lp->lock, flags);
+
+ if (lp->hs_state != LDC_HS_COMPLETE)
+ err = -ENOTCONN;
+ else
+ err = lp->mops->write(lp, buf, size);
+
+ spin_unlock_irqrestore(&lp->lock, flags);
+
+ return err;
+}
+EXPORT_SYMBOL(ldc_write);
+
+int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size)
+{
+ unsigned long flags;
+ int err;
+
+ if (!buf)
+ return -EINVAL;
+
+ if (!size)
+ return 0;
+
+ spin_lock_irqsave(&lp->lock, flags);
+
+ if (lp->hs_state != LDC_HS_COMPLETE)
+ err = -ENOTCONN;
+ else
+ err = lp->mops->read(lp, buf, size);
+
+ spin_unlock_irqrestore(&lp->lock, flags);
+
+ return err;
+}
+EXPORT_SYMBOL(ldc_read);
+
+static long arena_alloc(struct ldc_iommu *iommu, unsigned long npages)
+{
+ struct iommu_arena *arena = &iommu->arena;
+ unsigned long n, i, start, end, limit;
+ int pass;
+
+ limit = arena->limit;
+ start = arena->hint;
+ pass = 0;
+
+again:
+ n = find_next_zero_bit(arena->map, limit, start);
+ end = n + npages;
+ if (unlikely(end >= limit)) {
+ if (likely(pass < 1)) {
+ limit = start;
+ start = 0;
+ pass++;
+ goto again;
+ } else {
+ /* Scanned the whole thing, give up. */
+ return -1;
+ }
+ }
+
+ for (i = n; i < end; i++) {
+ if (test_bit(i, arena->map)) {
+ start = i + 1;
+ goto again;
+ }
+ }
+
+ for (i = n; i < end; i++)
+ __set_bit(i, arena->map);
+
+ arena->hint = end;
+
+ return n;
+}
+
+#define COOKIE_PGSZ_CODE 0xf000000000000000ULL
+#define COOKIE_PGSZ_CODE_SHIFT 60ULL
+
+static u64 pagesize_code(void)
+{
+ switch (PAGE_SIZE) {
+ default:
+ case (8ULL * 1024ULL):
+ return 0;
+ case (64ULL * 1024ULL):
+ return 1;
+ case (512ULL * 1024ULL):
+ return 2;
+ case (4ULL * 1024ULL * 1024ULL):
+ return 3;
+ case (32ULL * 1024ULL * 1024ULL):
+ return 4;
+ case (256ULL * 1024ULL * 1024ULL):
+ return 5;
+ }
+}
+
+static u64 make_cookie(u64 index, u64 pgsz_code, u64 page_offset)
+{
+ return ((pgsz_code << COOKIE_PGSZ_CODE_SHIFT) |
+ (index << PAGE_SHIFT) |
+ page_offset);
+}
+
+static u64 cookie_to_index(u64 cookie, unsigned long *shift)
+{
+ u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT;
+
+ cookie &= ~COOKIE_PGSZ_CODE;
+
+ *shift = szcode * 3;
+
+ return (cookie >> (13ULL + (szcode * 3ULL)));
+}
+
+static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu,
+ unsigned long npages)
+{
+ long entry;
+
+ entry = arena_alloc(iommu, npages);
+ if (unlikely(entry < 0))
+ return NULL;
+
+ return iommu->page_table + entry;
+}
+
+static u64 perm_to_mte(unsigned int map_perm)
+{
+ u64 mte_base;
+
+ mte_base = pagesize_code();
+
+ if (map_perm & LDC_MAP_SHADOW) {
+ if (map_perm & LDC_MAP_R)
+ mte_base |= LDC_MTE_COPY_R;
+ if (map_perm & LDC_MAP_W)
+ mte_base |= LDC_MTE_COPY_W;
+ }
+ if (map_perm & LDC_MAP_DIRECT) {
+ if (map_perm & LDC_MAP_R)
+ mte_base |= LDC_MTE_READ;
+ if (map_perm & LDC_MAP_W)
+ mte_base |= LDC_MTE_WRITE;
+ if (map_perm & LDC_MAP_X)
+ mte_base |= LDC_MTE_EXEC;
+ }
+ if (map_perm & LDC_MAP_IO) {
+ if (map_perm & LDC_MAP_R)
+ mte_base |= LDC_MTE_IOMMU_R;
+ if (map_perm & LDC_MAP_W)
+ mte_base |= LDC_MTE_IOMMU_W;
+ }
+
+ return mte_base;
+}
+
+static int pages_in_region(unsigned long base, long len)
+{
+ int count = 0;
+
+ do {
+ unsigned long new = (base + PAGE_SIZE) & PAGE_MASK;
+
+ len -= (new - base);
+ base = new;
+ count++;
+ } while (len > 0);
+
+ return count;
+}
+
+struct cookie_state {
+ struct ldc_mtable_entry *page_table;
+ struct ldc_trans_cookie *cookies;
+ u64 mte_base;
+ u64 prev_cookie;
+ u32 pte_idx;
+ u32 nc;
+};
+
+static void fill_cookies(struct cookie_state *sp, unsigned long pa,
+ unsigned long off, unsigned long len)
+{
+ do {
+ unsigned long tlen, new = pa + PAGE_SIZE;
+ u64 this_cookie;
+
+ sp->page_table[sp->pte_idx].mte = sp->mte_base | pa;
+
+ tlen = PAGE_SIZE;
+ if (off)
+ tlen = PAGE_SIZE - off;
+ if (tlen > len)
+ tlen = len;
+
+ this_cookie = make_cookie(sp->pte_idx,
+ pagesize_code(), off);
+
+ off = 0;
+
+ if (this_cookie == sp->prev_cookie) {
+ sp->cookies[sp->nc - 1].cookie_size += tlen;
+ } else {
+ sp->cookies[sp->nc].cookie_addr = this_cookie;
+ sp->cookies[sp->nc].cookie_size = tlen;
+ sp->nc++;
+ }
+ sp->prev_cookie = this_cookie + tlen;
+
+ sp->pte_idx++;
+
+ len -= tlen;
+ pa = new;
+ } while (len > 0);
+}
+
+static int sg_count_one(struct scatterlist *sg)
+{
+ unsigned long base = page_to_pfn(sg_page(sg)) << PAGE_SHIFT;
+ long len = sg->length;
+
+ if ((sg->offset | len) & (8UL - 1))
+ return -EFAULT;
+
+ return pages_in_region(base + sg->offset, len);
+}
+
+static int sg_count_pages(struct scatterlist *sg, int num_sg)
+{
+ int count;
+ int i;
+
+ count = 0;
+ for (i = 0; i < num_sg; i++) {
+ int err = sg_count_one(sg + i);
+ if (err < 0)
+ return err;
+ count += err;
+ }
+
+ return count;
+}
+
+int ldc_map_sg(struct ldc_channel *lp,
+ struct scatterlist *sg, int num_sg,
+ struct ldc_trans_cookie *cookies, int ncookies,
+ unsigned int map_perm)
+{
+ unsigned long i, npages, flags;
+ struct ldc_mtable_entry *base;
+ struct cookie_state state;
+ struct ldc_iommu *iommu;
+ int err;
+
+ if (map_perm & ~LDC_MAP_ALL)
+ return -EINVAL;
+
+ err = sg_count_pages(sg, num_sg);
+ if (err < 0)
+ return err;
+
+ npages = err;
+ if (err > ncookies)
+ return -EMSGSIZE;
+
+ iommu = &lp->iommu;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+ base = alloc_npages(iommu, npages);
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+ if (!base)
+ return -ENOMEM;
+
+ state.page_table = iommu->page_table;
+ state.cookies = cookies;
+ state.mte_base = perm_to_mte(map_perm);
+ state.prev_cookie = ~(u64)0;
+ state.pte_idx = (base - iommu->page_table);
+ state.nc = 0;
+
+ for (i = 0; i < num_sg; i++)
+ fill_cookies(&state, page_to_pfn(sg_page(&sg[i])) << PAGE_SHIFT,
+ sg[i].offset, sg[i].length);
+
+ return state.nc;
+}
+EXPORT_SYMBOL(ldc_map_sg);
+
+int ldc_map_single(struct ldc_channel *lp,
+ void *buf, unsigned int len,
+ struct ldc_trans_cookie *cookies, int ncookies,
+ unsigned int map_perm)
+{
+ unsigned long npages, pa, flags;
+ struct ldc_mtable_entry *base;
+ struct cookie_state state;
+ struct ldc_iommu *iommu;
+
+ if ((map_perm & ~LDC_MAP_ALL) || (ncookies < 1))
+ return -EINVAL;
+
+ pa = __pa(buf);
+ if ((pa | len) & (8UL - 1))
+ return -EFAULT;
+
+ npages = pages_in_region(pa, len);
+
+ iommu = &lp->iommu;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+ base = alloc_npages(iommu, npages);
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+ if (!base)
+ return -ENOMEM;
+
+ state.page_table = iommu->page_table;
+ state.cookies = cookies;
+ state.mte_base = perm_to_mte(map_perm);
+ state.prev_cookie = ~(u64)0;
+ state.pte_idx = (base - iommu->page_table);
+ state.nc = 0;
+ fill_cookies(&state, (pa & PAGE_MASK), (pa & ~PAGE_MASK), len);
+ BUG_ON(state.nc != 1);
+
+ return state.nc;
+}
+EXPORT_SYMBOL(ldc_map_single);
+
+static void free_npages(unsigned long id, struct ldc_iommu *iommu,
+ u64 cookie, u64 size)
+{
+ struct iommu_arena *arena = &iommu->arena;
+ unsigned long i, shift, index, npages;
+ struct ldc_mtable_entry *base;
+
+ npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT;
+ index = cookie_to_index(cookie, &shift);
+ base = iommu->page_table + index;
+
+ BUG_ON(index > arena->limit ||
+ (index + npages) > arena->limit);
+
+ for (i = 0; i < npages; i++) {
+ if (base->cookie)
+ sun4v_ldc_revoke(id, cookie + (i << shift),
+ base->cookie);
+ base->mte = 0;
+ __clear_bit(index + i, arena->map);
+ }
+}
+
+void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies,
+ int ncookies)
+{
+ struct ldc_iommu *iommu = &lp->iommu;
+ unsigned long flags;
+ int i;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+ for (i = 0; i < ncookies; i++) {
+ u64 addr = cookies[i].cookie_addr;
+ u64 size = cookies[i].cookie_size;
+
+ free_npages(lp->id, iommu, addr, size);
+ }
+ spin_unlock_irqrestore(&iommu->lock, flags);
+}
+EXPORT_SYMBOL(ldc_unmap);
+
+int ldc_copy(struct ldc_channel *lp, int copy_dir,
+ void *buf, unsigned int len, unsigned long offset,
+ struct ldc_trans_cookie *cookies, int ncookies)
+{
+ unsigned int orig_len;
+ unsigned long ra;
+ int i;
+
+ if (copy_dir != LDC_COPY_IN && copy_dir != LDC_COPY_OUT) {
+ printk(KERN_ERR PFX "ldc_copy: ID[%lu] Bad copy_dir[%d]\n",
+ lp->id, copy_dir);
+ return -EINVAL;
+ }
+
+ ra = __pa(buf);
+ if ((ra | len | offset) & (8UL - 1)) {
+ printk(KERN_ERR PFX "ldc_copy: ID[%lu] Unaligned buffer "
+ "ra[%lx] len[%x] offset[%lx]\n",
+ lp->id, ra, len, offset);
+ return -EFAULT;
+ }
+
+ if (lp->hs_state != LDC_HS_COMPLETE ||
+ (lp->flags & LDC_FLAG_RESET)) {
+ printk(KERN_ERR PFX "ldc_copy: ID[%lu] Link down hs_state[%x] "
+ "flags[%x]\n", lp->id, lp->hs_state, lp->flags);
+ return -ECONNRESET;
+ }
+
+ orig_len = len;
+ for (i = 0; i < ncookies; i++) {
+ unsigned long cookie_raddr = cookies[i].cookie_addr;
+ unsigned long this_len = cookies[i].cookie_size;
+ unsigned long actual_len;
+
+ if (unlikely(offset)) {
+ unsigned long this_off = offset;
+
+ if (this_off > this_len)
+ this_off = this_len;
+
+ offset -= this_off;
+ this_len -= this_off;
+ if (!this_len)
+ continue;
+ cookie_raddr += this_off;
+ }
+
+ if (this_len > len)
+ this_len = len;
+
+ while (1) {
+ unsigned long hv_err;
+
+ hv_err = sun4v_ldc_copy(lp->id, copy_dir,
+ cookie_raddr, ra,
+ this_len, &actual_len);
+ if (unlikely(hv_err)) {
+ printk(KERN_ERR PFX "ldc_copy: ID[%lu] "
+ "HV error %lu\n",
+ lp->id, hv_err);
+ if (lp->hs_state != LDC_HS_COMPLETE ||
+ (lp->flags & LDC_FLAG_RESET))
+ return -ECONNRESET;
+ else
+ return -EFAULT;
+ }
+
+ cookie_raddr += actual_len;
+ ra += actual_len;
+ len -= actual_len;
+ if (actual_len == this_len)
+ break;
+
+ this_len -= actual_len;
+ }
+
+ if (!len)
+ break;
+ }
+
+ /* It is caller policy what to do about short copies.
+ * For example, a networking driver can declare the
+ * packet a runt and drop it.
+ */
+
+ return orig_len - len;
+}
+EXPORT_SYMBOL(ldc_copy);
+
+void *ldc_alloc_exp_dring(struct ldc_channel *lp, unsigned int len,
+ struct ldc_trans_cookie *cookies, int *ncookies,
+ unsigned int map_perm)
+{
+ void *buf;
+ int err;
+
+ if (len & (8UL - 1))
+ return ERR_PTR(-EINVAL);
+
+ buf = kzalloc(len, GFP_KERNEL);
+ if (!buf)
+ return ERR_PTR(-ENOMEM);
+
+ err = ldc_map_single(lp, buf, len, cookies, *ncookies, map_perm);
+ if (err < 0) {
+ kfree(buf);
+ return ERR_PTR(err);
+ }
+ *ncookies = err;
+
+ return buf;
+}
+EXPORT_SYMBOL(ldc_alloc_exp_dring);
+
+void ldc_free_exp_dring(struct ldc_channel *lp, void *buf, unsigned int len,
+ struct ldc_trans_cookie *cookies, int ncookies)
+{
+ ldc_unmap(lp, cookies, ncookies);
+ kfree(buf);
+}
+EXPORT_SYMBOL(ldc_free_exp_dring);
+
+static int __init ldc_init(void)
+{
+ unsigned long major, minor;
+ struct mdesc_handle *hp;
+ const u64 *v;
+ int err;
+ u64 mp;
+
+ hp = mdesc_grab();
+ if (!hp)
+ return -ENODEV;
+
+ mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform");
+ err = -ENODEV;
+ if (mp == MDESC_NODE_NULL)
+ goto out;
+
+ v = mdesc_get_property(hp, mp, "domaining-enabled", NULL);
+ if (!v)
+ goto out;
+
+ major = 1;
+ minor = 0;
+ if (sun4v_hvapi_register(HV_GRP_LDOM, major, &minor)) {
+ printk(KERN_INFO PFX "Could not register LDOM hvapi.\n");
+ goto out;
+ }
+
+ printk(KERN_INFO "%s", version);
+
+ if (!*v) {
+ printk(KERN_INFO PFX "Domaining disabled.\n");
+ goto out;
+ }
+ ldom_domaining_enabled = 1;
+ err = 0;
+
+out:
+ mdesc_release(hp);
+ return err;
+}
+
+core_initcall(ldc_init);
diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c
new file mode 100644
index 000000000000..3c539a6d7c18
--- /dev/null
+++ b/arch/sparc/kernel/mdesc.c
@@ -0,0 +1,917 @@
+/* mdesc.c: Sun4V machine description handling.
+ *
+ * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/lmb.h>
+#include <linux/log2.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/miscdevice.h>
+
+#include <asm/cpudata.h>
+#include <asm/hypervisor.h>
+#include <asm/mdesc.h>
+#include <asm/prom.h>
+#include <asm/oplib.h>
+#include <asm/smp.h>
+
+/* Unlike the OBP device tree, the machine description is a full-on
+ * DAG. An arbitrary number of ARCs are possible from one
+ * node to other nodes and thus we can't use the OBP device_node
+ * data structure to represent these nodes inside of the kernel.
+ *
+ * Actually, it isn't even a DAG, because there are back pointers
+ * which create cycles in the graph.
+ *
+ * mdesc_hdr and mdesc_elem describe the layout of the data structure
+ * we get from the Hypervisor.
+ */
+struct mdesc_hdr {
+ u32 version; /* Transport version */
+ u32 node_sz; /* node block size */
+ u32 name_sz; /* name block size */
+ u32 data_sz; /* data block size */
+} __attribute__((aligned(16)));
+
+struct mdesc_elem {
+ u8 tag;
+#define MD_LIST_END 0x00
+#define MD_NODE 0x4e
+#define MD_NODE_END 0x45
+#define MD_NOOP 0x20
+#define MD_PROP_ARC 0x61
+#define MD_PROP_VAL 0x76
+#define MD_PROP_STR 0x73
+#define MD_PROP_DATA 0x64
+ u8 name_len;
+ u16 resv;
+ u32 name_offset;
+ union {
+ struct {
+ u32 data_len;
+ u32 data_offset;
+ } data;
+ u64 val;
+ } d;
+};
+
+struct mdesc_mem_ops {
+ struct mdesc_handle *(*alloc)(unsigned int mdesc_size);
+ void (*free)(struct mdesc_handle *handle);
+};
+
+struct mdesc_handle {
+ struct list_head list;
+ struct mdesc_mem_ops *mops;
+ void *self_base;
+ atomic_t refcnt;
+ unsigned int handle_size;
+ struct mdesc_hdr mdesc;
+};
+
+static void mdesc_handle_init(struct mdesc_handle *hp,
+ unsigned int handle_size,
+ void *base)
+{
+ BUG_ON(((unsigned long)&hp->mdesc) & (16UL - 1));
+
+ memset(hp, 0, handle_size);
+ INIT_LIST_HEAD(&hp->list);
+ hp->self_base = base;
+ atomic_set(&hp->refcnt, 1);
+ hp->handle_size = handle_size;
+}
+
+static struct mdesc_handle * __init mdesc_lmb_alloc(unsigned int mdesc_size)
+{
+ unsigned int handle_size, alloc_size;
+ struct mdesc_handle *hp;
+ unsigned long paddr;
+
+ handle_size = (sizeof(struct mdesc_handle) -
+ sizeof(struct mdesc_hdr) +
+ mdesc_size);
+ alloc_size = PAGE_ALIGN(handle_size);
+
+ paddr = lmb_alloc(alloc_size, PAGE_SIZE);
+
+ hp = NULL;
+ if (paddr) {
+ hp = __va(paddr);
+ mdesc_handle_init(hp, handle_size, hp);
+ }
+ return hp;
+}
+
+static void mdesc_lmb_free(struct mdesc_handle *hp)
+{
+ unsigned int alloc_size, handle_size = hp->handle_size;
+ unsigned long start, end;
+
+ BUG_ON(atomic_read(&hp->refcnt) != 0);
+ BUG_ON(!list_empty(&hp->list));
+
+ alloc_size = PAGE_ALIGN(handle_size);
+
+ start = (unsigned long) hp;
+ end = start + alloc_size;
+
+ while (start < end) {
+ struct page *p;
+
+ p = virt_to_page(start);
+ ClearPageReserved(p);
+ __free_page(p);
+ start += PAGE_SIZE;
+ }
+}
+
+static struct mdesc_mem_ops lmb_mdesc_ops = {
+ .alloc = mdesc_lmb_alloc,
+ .free = mdesc_lmb_free,
+};
+
+static struct mdesc_handle *mdesc_kmalloc(unsigned int mdesc_size)
+{
+ unsigned int handle_size;
+ void *base;
+
+ handle_size = (sizeof(struct mdesc_handle) -
+ sizeof(struct mdesc_hdr) +
+ mdesc_size);
+
+ base = kmalloc(handle_size + 15, GFP_KERNEL | __GFP_NOFAIL);
+ if (base) {
+ struct mdesc_handle *hp;
+ unsigned long addr;
+
+ addr = (unsigned long)base;
+ addr = (addr + 15UL) & ~15UL;
+ hp = (struct mdesc_handle *) addr;
+
+ mdesc_handle_init(hp, handle_size, base);
+ return hp;
+ }
+
+ return NULL;
+}
+
+static void mdesc_kfree(struct mdesc_handle *hp)
+{
+ BUG_ON(atomic_read(&hp->refcnt) != 0);
+ BUG_ON(!list_empty(&hp->list));
+
+ kfree(hp->self_base);
+}
+
+static struct mdesc_mem_ops kmalloc_mdesc_memops = {
+ .alloc = mdesc_kmalloc,
+ .free = mdesc_kfree,
+};
+
+static struct mdesc_handle *mdesc_alloc(unsigned int mdesc_size,
+ struct mdesc_mem_ops *mops)
+{
+ struct mdesc_handle *hp = mops->alloc(mdesc_size);
+
+ if (hp)
+ hp->mops = mops;
+
+ return hp;
+}
+
+static void mdesc_free(struct mdesc_handle *hp)
+{
+ hp->mops->free(hp);
+}
+
+static struct mdesc_handle *cur_mdesc;
+static LIST_HEAD(mdesc_zombie_list);
+static DEFINE_SPINLOCK(mdesc_lock);
+
+struct mdesc_handle *mdesc_grab(void)
+{
+ struct mdesc_handle *hp;
+ unsigned long flags;
+
+ spin_lock_irqsave(&mdesc_lock, flags);
+ hp = cur_mdesc;
+ if (hp)
+ atomic_inc(&hp->refcnt);
+ spin_unlock_irqrestore(&mdesc_lock, flags);
+
+ return hp;
+}
+EXPORT_SYMBOL(mdesc_grab);
+
+void mdesc_release(struct mdesc_handle *hp)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&mdesc_lock, flags);
+ if (atomic_dec_and_test(&hp->refcnt)) {
+ list_del_init(&hp->list);
+ hp->mops->free(hp);
+ }
+ spin_unlock_irqrestore(&mdesc_lock, flags);
+}
+EXPORT_SYMBOL(mdesc_release);
+
+static DEFINE_MUTEX(mdesc_mutex);
+static struct mdesc_notifier_client *client_list;
+
+void mdesc_register_notifier(struct mdesc_notifier_client *client)
+{
+ u64 node;
+
+ mutex_lock(&mdesc_mutex);
+ client->next = client_list;
+ client_list = client;
+
+ mdesc_for_each_node_by_name(cur_mdesc, node, client->node_name)
+ client->add(cur_mdesc, node);
+
+ mutex_unlock(&mdesc_mutex);
+}
+
+static const u64 *parent_cfg_handle(struct mdesc_handle *hp, u64 node)
+{
+ const u64 *id;
+ u64 a;
+
+ id = NULL;
+ mdesc_for_each_arc(a, hp, node, MDESC_ARC_TYPE_BACK) {
+ u64 target;
+
+ target = mdesc_arc_target(hp, a);
+ id = mdesc_get_property(hp, target,
+ "cfg-handle", NULL);
+ if (id)
+ break;
+ }
+
+ return id;
+}
+
+/* Run 'func' on nodes which are in A but not in B. */
+static void invoke_on_missing(const char *name,
+ struct mdesc_handle *a,
+ struct mdesc_handle *b,
+ void (*func)(struct mdesc_handle *, u64))
+{
+ u64 node;
+
+ mdesc_for_each_node_by_name(a, node, name) {
+ int found = 0, is_vdc_port = 0;
+ const char *name_prop;
+ const u64 *id;
+ u64 fnode;
+
+ name_prop = mdesc_get_property(a, node, "name", NULL);
+ if (name_prop && !strcmp(name_prop, "vdc-port")) {
+ is_vdc_port = 1;
+ id = parent_cfg_handle(a, node);
+ } else
+ id = mdesc_get_property(a, node, "id", NULL);
+
+ if (!id) {
+ printk(KERN_ERR "MD: Cannot find ID for %s node.\n",
+ (name_prop ? name_prop : name));
+ continue;
+ }
+
+ mdesc_for_each_node_by_name(b, fnode, name) {
+ const u64 *fid;
+
+ if (is_vdc_port) {
+ name_prop = mdesc_get_property(b, fnode,
+ "name", NULL);
+ if (!name_prop ||
+ strcmp(name_prop, "vdc-port"))
+ continue;
+ fid = parent_cfg_handle(b, fnode);
+ if (!fid) {
+ printk(KERN_ERR "MD: Cannot find ID "
+ "for vdc-port node.\n");
+ continue;
+ }
+ } else
+ fid = mdesc_get_property(b, fnode,
+ "id", NULL);
+
+ if (*id == *fid) {
+ found = 1;
+ break;
+ }
+ }
+ if (!found)
+ func(a, node);
+ }
+}
+
+static void notify_one(struct mdesc_notifier_client *p,
+ struct mdesc_handle *old_hp,
+ struct mdesc_handle *new_hp)
+{
+ invoke_on_missing(p->node_name, old_hp, new_hp, p->remove);
+ invoke_on_missing(p->node_name, new_hp, old_hp, p->add);
+}
+
+static void mdesc_notify_clients(struct mdesc_handle *old_hp,
+ struct mdesc_handle *new_hp)
+{
+ struct mdesc_notifier_client *p = client_list;
+
+ while (p) {
+ notify_one(p, old_hp, new_hp);
+ p = p->next;
+ }
+}
+
+void mdesc_update(void)
+{
+ unsigned long len, real_len, status;
+ struct mdesc_handle *hp, *orig_hp;
+ unsigned long flags;
+
+ mutex_lock(&mdesc_mutex);
+
+ (void) sun4v_mach_desc(0UL, 0UL, &len);
+
+ hp = mdesc_alloc(len, &kmalloc_mdesc_memops);
+ if (!hp) {
+ printk(KERN_ERR "MD: mdesc alloc fails\n");
+ goto out;
+ }
+
+ status = sun4v_mach_desc(__pa(&hp->mdesc), len, &real_len);
+ if (status != HV_EOK || real_len > len) {
+ printk(KERN_ERR "MD: mdesc reread fails with %lu\n",
+ status);
+ atomic_dec(&hp->refcnt);
+ mdesc_free(hp);
+ goto out;
+ }
+
+ spin_lock_irqsave(&mdesc_lock, flags);
+ orig_hp = cur_mdesc;
+ cur_mdesc = hp;
+ spin_unlock_irqrestore(&mdesc_lock, flags);
+
+ mdesc_notify_clients(orig_hp, hp);
+
+ spin_lock_irqsave(&mdesc_lock, flags);
+ if (atomic_dec_and_test(&orig_hp->refcnt))
+ mdesc_free(orig_hp);
+ else
+ list_add(&orig_hp->list, &mdesc_zombie_list);
+ spin_unlock_irqrestore(&mdesc_lock, flags);
+
+out:
+ mutex_unlock(&mdesc_mutex);
+}
+
+static struct mdesc_elem *node_block(struct mdesc_hdr *mdesc)
+{
+ return (struct mdesc_elem *) (mdesc + 1);
+}
+
+static void *name_block(struct mdesc_hdr *mdesc)
+{
+ return ((void *) node_block(mdesc)) + mdesc->node_sz;
+}
+
+static void *data_block(struct mdesc_hdr *mdesc)
+{
+ return ((void *) name_block(mdesc)) + mdesc->name_sz;
+}
+
+u64 mdesc_node_by_name(struct mdesc_handle *hp,
+ u64 from_node, const char *name)
+{
+ struct mdesc_elem *ep = node_block(&hp->mdesc);
+ const char *names = name_block(&hp->mdesc);
+ u64 last_node = hp->mdesc.node_sz / 16;
+ u64 ret;
+
+ if (from_node == MDESC_NODE_NULL) {
+ ret = from_node = 0;
+ } else if (from_node >= last_node) {
+ return MDESC_NODE_NULL;
+ } else {
+ ret = ep[from_node].d.val;
+ }
+
+ while (ret < last_node) {
+ if (ep[ret].tag != MD_NODE)
+ return MDESC_NODE_NULL;
+ if (!strcmp(names + ep[ret].name_offset, name))
+ break;
+ ret = ep[ret].d.val;
+ }
+ if (ret >= last_node)
+ ret = MDESC_NODE_NULL;
+ return ret;
+}
+EXPORT_SYMBOL(mdesc_node_by_name);
+
+const void *mdesc_get_property(struct mdesc_handle *hp, u64 node,
+ const char *name, int *lenp)
+{
+ const char *names = name_block(&hp->mdesc);
+ u64 last_node = hp->mdesc.node_sz / 16;
+ void *data = data_block(&hp->mdesc);
+ struct mdesc_elem *ep;
+
+ if (node == MDESC_NODE_NULL || node >= last_node)
+ return NULL;
+
+ ep = node_block(&hp->mdesc) + node;
+ ep++;
+ for (; ep->tag != MD_NODE_END; ep++) {
+ void *val = NULL;
+ int len = 0;
+
+ switch (ep->tag) {
+ case MD_PROP_VAL:
+ val = &ep->d.val;
+ len = 8;
+ break;
+
+ case MD_PROP_STR:
+ case MD_PROP_DATA:
+ val = data + ep->d.data.data_offset;
+ len = ep->d.data.data_len;
+ break;
+
+ default:
+ break;
+ }
+ if (!val)
+ continue;
+
+ if (!strcmp(names + ep->name_offset, name)) {
+ if (lenp)
+ *lenp = len;
+ return val;
+ }
+ }
+
+ return NULL;
+}
+EXPORT_SYMBOL(mdesc_get_property);
+
+u64 mdesc_next_arc(struct mdesc_handle *hp, u64 from, const char *arc_type)
+{
+ struct mdesc_elem *ep, *base = node_block(&hp->mdesc);
+ const char *names = name_block(&hp->mdesc);
+ u64 last_node = hp->mdesc.node_sz / 16;
+
+ if (from == MDESC_NODE_NULL || from >= last_node)
+ return MDESC_NODE_NULL;
+
+ ep = base + from;
+
+ ep++;
+ for (; ep->tag != MD_NODE_END; ep++) {
+ if (ep->tag != MD_PROP_ARC)
+ continue;
+
+ if (strcmp(names + ep->name_offset, arc_type))
+ continue;
+
+ return ep - base;
+ }
+
+ return MDESC_NODE_NULL;
+}
+EXPORT_SYMBOL(mdesc_next_arc);
+
+u64 mdesc_arc_target(struct mdesc_handle *hp, u64 arc)
+{
+ struct mdesc_elem *ep, *base = node_block(&hp->mdesc);
+
+ ep = base + arc;
+
+ return ep->d.val;
+}
+EXPORT_SYMBOL(mdesc_arc_target);
+
+const char *mdesc_node_name(struct mdesc_handle *hp, u64 node)
+{
+ struct mdesc_elem *ep, *base = node_block(&hp->mdesc);
+ const char *names = name_block(&hp->mdesc);
+ u64 last_node = hp->mdesc.node_sz / 16;
+
+ if (node == MDESC_NODE_NULL || node >= last_node)
+ return NULL;
+
+ ep = base + node;
+ if (ep->tag != MD_NODE)
+ return NULL;
+
+ return names + ep->name_offset;
+}
+EXPORT_SYMBOL(mdesc_node_name);
+
+static void __init report_platform_properties(void)
+{
+ struct mdesc_handle *hp = mdesc_grab();
+ u64 pn = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform");
+ const char *s;
+ const u64 *v;
+
+ if (pn == MDESC_NODE_NULL) {
+ prom_printf("No platform node in machine-description.\n");
+ prom_halt();
+ }
+
+ s = mdesc_get_property(hp, pn, "banner-name", NULL);
+ printk("PLATFORM: banner-name [%s]\n", s);
+ s = mdesc_get_property(hp, pn, "name", NULL);
+ printk("PLATFORM: name [%s]\n", s);
+
+ v = mdesc_get_property(hp, pn, "hostid", NULL);
+ if (v)
+ printk("PLATFORM: hostid [%08lx]\n", *v);
+ v = mdesc_get_property(hp, pn, "serial#", NULL);
+ if (v)
+ printk("PLATFORM: serial# [%08lx]\n", *v);
+ v = mdesc_get_property(hp, pn, "stick-frequency", NULL);
+ printk("PLATFORM: stick-frequency [%08lx]\n", *v);
+ v = mdesc_get_property(hp, pn, "mac-address", NULL);
+ if (v)
+ printk("PLATFORM: mac-address [%lx]\n", *v);
+ v = mdesc_get_property(hp, pn, "watchdog-resolution", NULL);
+ if (v)
+ printk("PLATFORM: watchdog-resolution [%lu ms]\n", *v);
+ v = mdesc_get_property(hp, pn, "watchdog-max-timeout", NULL);
+ if (v)
+ printk("PLATFORM: watchdog-max-timeout [%lu ms]\n", *v);
+ v = mdesc_get_property(hp, pn, "max-cpus", NULL);
+ if (v)
+ printk("PLATFORM: max-cpus [%lu]\n", *v);
+
+#ifdef CONFIG_SMP
+ {
+ int max_cpu, i;
+
+ if (v) {
+ max_cpu = *v;
+ if (max_cpu > NR_CPUS)
+ max_cpu = NR_CPUS;
+ } else {
+ max_cpu = NR_CPUS;
+ }
+ for (i = 0; i < max_cpu; i++)
+ cpu_set(i, cpu_possible_map);
+ }
+#endif
+
+ mdesc_release(hp);
+}
+
+static void __devinit fill_in_one_cache(cpuinfo_sparc *c,
+ struct mdesc_handle *hp,
+ u64 mp)
+{
+ const u64 *level = mdesc_get_property(hp, mp, "level", NULL);
+ const u64 *size = mdesc_get_property(hp, mp, "size", NULL);
+ const u64 *line_size = mdesc_get_property(hp, mp, "line-size", NULL);
+ const char *type;
+ int type_len;
+
+ type = mdesc_get_property(hp, mp, "type", &type_len);
+
+ switch (*level) {
+ case 1:
+ if (of_find_in_proplist(type, "instn", type_len)) {
+ c->icache_size = *size;
+ c->icache_line_size = *line_size;
+ } else if (of_find_in_proplist(type, "data", type_len)) {
+ c->dcache_size = *size;
+ c->dcache_line_size = *line_size;
+ }
+ break;
+
+ case 2:
+ c->ecache_size = *size;
+ c->ecache_line_size = *line_size;
+ break;
+
+ default:
+ break;
+ }
+
+ if (*level == 1) {
+ u64 a;
+
+ mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) {
+ u64 target = mdesc_arc_target(hp, a);
+ const char *name = mdesc_node_name(hp, target);
+
+ if (!strcmp(name, "cache"))
+ fill_in_one_cache(c, hp, target);
+ }
+ }
+}
+
+static void __devinit mark_core_ids(struct mdesc_handle *hp, u64 mp,
+ int core_id)
+{
+ u64 a;
+
+ mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_BACK) {
+ u64 t = mdesc_arc_target(hp, a);
+ const char *name;
+ const u64 *id;
+
+ name = mdesc_node_name(hp, t);
+ if (!strcmp(name, "cpu")) {
+ id = mdesc_get_property(hp, t, "id", NULL);
+ if (*id < NR_CPUS)
+ cpu_data(*id).core_id = core_id;
+ } else {
+ u64 j;
+
+ mdesc_for_each_arc(j, hp, t, MDESC_ARC_TYPE_BACK) {
+ u64 n = mdesc_arc_target(hp, j);
+ const char *n_name;
+
+ n_name = mdesc_node_name(hp, n);
+ if (strcmp(n_name, "cpu"))
+ continue;
+
+ id = mdesc_get_property(hp, n, "id", NULL);
+ if (*id < NR_CPUS)
+ cpu_data(*id).core_id = core_id;
+ }
+ }
+ }
+}
+
+static void __devinit set_core_ids(struct mdesc_handle *hp)
+{
+ int idx;
+ u64 mp;
+
+ idx = 1;
+ mdesc_for_each_node_by_name(hp, mp, "cache") {
+ const u64 *level;
+ const char *type;
+ int len;
+
+ level = mdesc_get_property(hp, mp, "level", NULL);
+ if (*level != 1)
+ continue;
+
+ type = mdesc_get_property(hp, mp, "type", &len);
+ if (!of_find_in_proplist(type, "instn", len))
+ continue;
+
+ mark_core_ids(hp, mp, idx);
+
+ idx++;
+ }
+}
+
+static void __devinit mark_proc_ids(struct mdesc_handle *hp, u64 mp,
+ int proc_id)
+{
+ u64 a;
+
+ mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_BACK) {
+ u64 t = mdesc_arc_target(hp, a);
+ const char *name;
+ const u64 *id;
+
+ name = mdesc_node_name(hp, t);
+ if (strcmp(name, "cpu"))
+ continue;
+
+ id = mdesc_get_property(hp, t, "id", NULL);
+ if (*id < NR_CPUS)
+ cpu_data(*id).proc_id = proc_id;
+ }
+}
+
+static void __devinit __set_proc_ids(struct mdesc_handle *hp,
+ const char *exec_unit_name)
+{
+ int idx;
+ u64 mp;
+
+ idx = 0;
+ mdesc_for_each_node_by_name(hp, mp, exec_unit_name) {
+ const char *type;
+ int len;
+
+ type = mdesc_get_property(hp, mp, "type", &len);
+ if (!of_find_in_proplist(type, "int", len) &&
+ !of_find_in_proplist(type, "integer", len))
+ continue;
+
+ mark_proc_ids(hp, mp, idx);
+
+ idx++;
+ }
+}
+
+static void __devinit set_proc_ids(struct mdesc_handle *hp)
+{
+ __set_proc_ids(hp, "exec_unit");
+ __set_proc_ids(hp, "exec-unit");
+}
+
+static void __devinit get_one_mondo_bits(const u64 *p, unsigned int *mask,
+ unsigned char def)
+{
+ u64 val;
+
+ if (!p)
+ goto use_default;
+ val = *p;
+
+ if (!val || val >= 64)
+ goto use_default;
+
+ *mask = ((1U << val) * 64U) - 1U;
+ return;
+
+use_default:
+ *mask = ((1U << def) * 64U) - 1U;
+}
+
+static void __devinit get_mondo_data(struct mdesc_handle *hp, u64 mp,
+ struct trap_per_cpu *tb)
+{
+ const u64 *val;
+
+ val = mdesc_get_property(hp, mp, "q-cpu-mondo-#bits", NULL);
+ get_one_mondo_bits(val, &tb->cpu_mondo_qmask, 7);
+
+ val = mdesc_get_property(hp, mp, "q-dev-mondo-#bits", NULL);
+ get_one_mondo_bits(val, &tb->dev_mondo_qmask, 7);
+
+ val = mdesc_get_property(hp, mp, "q-resumable-#bits", NULL);
+ get_one_mondo_bits(val, &tb->resum_qmask, 6);
+
+ val = mdesc_get_property(hp, mp, "q-nonresumable-#bits", NULL);
+ get_one_mondo_bits(val, &tb->nonresum_qmask, 2);
+}
+
+void __cpuinit mdesc_fill_in_cpu_data(cpumask_t mask)
+{
+ struct mdesc_handle *hp = mdesc_grab();
+ u64 mp;
+
+ ncpus_probed = 0;
+ mdesc_for_each_node_by_name(hp, mp, "cpu") {
+ const u64 *id = mdesc_get_property(hp, mp, "id", NULL);
+ const u64 *cfreq = mdesc_get_property(hp, mp, "clock-frequency", NULL);
+ struct trap_per_cpu *tb;
+ cpuinfo_sparc *c;
+ int cpuid;
+ u64 a;
+
+ ncpus_probed++;
+
+ cpuid = *id;
+
+#ifdef CONFIG_SMP
+ if (cpuid >= NR_CPUS) {
+ printk(KERN_WARNING "Ignoring CPU %d which is "
+ ">= NR_CPUS (%d)\n",
+ cpuid, NR_CPUS);
+ continue;
+ }
+ if (!cpu_isset(cpuid, mask))
+ continue;
+#else
+ /* On uniprocessor we only want the values for the
+ * real physical cpu the kernel booted onto, however
+ * cpu_data() only has one entry at index 0.
+ */
+ if (cpuid != real_hard_smp_processor_id())
+ continue;
+ cpuid = 0;
+#endif
+
+ c = &cpu_data(cpuid);
+ c->clock_tick = *cfreq;
+
+ tb = &trap_block[cpuid];
+ get_mondo_data(hp, mp, tb);
+
+ mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) {
+ u64 j, t = mdesc_arc_target(hp, a);
+ const char *t_name;
+
+ t_name = mdesc_node_name(hp, t);
+ if (!strcmp(t_name, "cache")) {
+ fill_in_one_cache(c, hp, t);
+ continue;
+ }
+
+ mdesc_for_each_arc(j, hp, t, MDESC_ARC_TYPE_FWD) {
+ u64 n = mdesc_arc_target(hp, j);
+ const char *n_name;
+
+ n_name = mdesc_node_name(hp, n);
+ if (!strcmp(n_name, "cache"))
+ fill_in_one_cache(c, hp, n);
+ }
+ }
+
+#ifdef CONFIG_SMP
+ cpu_set(cpuid, cpu_present_map);
+#endif
+
+ c->core_id = 0;
+ c->proc_id = -1;
+ }
+
+#ifdef CONFIG_SMP
+ sparc64_multi_core = 1;
+#endif
+
+ set_core_ids(hp);
+ set_proc_ids(hp);
+
+ smp_fill_in_sib_core_maps();
+
+ mdesc_release(hp);
+}
+
+static ssize_t mdesc_read(struct file *file, char __user *buf,
+ size_t len, loff_t *offp)
+{
+ struct mdesc_handle *hp = mdesc_grab();
+ int err;
+
+ if (!hp)
+ return -ENODEV;
+
+ err = hp->handle_size;
+ if (len < hp->handle_size)
+ err = -EMSGSIZE;
+ else if (copy_to_user(buf, &hp->mdesc, hp->handle_size))
+ err = -EFAULT;
+ mdesc_release(hp);
+
+ return err;
+}
+
+static const struct file_operations mdesc_fops = {
+ .read = mdesc_read,
+ .owner = THIS_MODULE,
+};
+
+static struct miscdevice mdesc_misc = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "mdesc",
+ .fops = &mdesc_fops,
+};
+
+static int __init mdesc_misc_init(void)
+{
+ return misc_register(&mdesc_misc);
+}
+
+__initcall(mdesc_misc_init);
+
+void __init sun4v_mdesc_init(void)
+{
+ struct mdesc_handle *hp;
+ unsigned long len, real_len, status;
+ cpumask_t mask;
+
+ (void) sun4v_mach_desc(0UL, 0UL, &len);
+
+ printk("MDESC: Size is %lu bytes.\n", len);
+
+ hp = mdesc_alloc(len, &lmb_mdesc_ops);
+ if (hp == NULL) {
+ prom_printf("MDESC: alloc of %lu bytes failed.\n", len);
+ prom_halt();
+ }
+
+ status = sun4v_mach_desc(__pa(&hp->mdesc), len, &real_len);
+ if (status != HV_EOK || real_len > len) {
+ prom_printf("sun4v_mach_desc fails, err(%lu), "
+ "len(%lu), real_len(%lu)\n",
+ status, len, real_len);
+ mdesc_free(hp);
+ prom_halt();
+ }
+
+ cur_mdesc = hp;
+
+ report_platform_properties();
+
+ cpus_setall(mask);
+ mdesc_fill_in_cpu_data(mask);
+}
diff --git a/arch/sparc/kernel/misctrap.S b/arch/sparc/kernel/misctrap.S
new file mode 100644
index 000000000000..753b4f031bfb
--- /dev/null
+++ b/arch/sparc/kernel/misctrap.S
@@ -0,0 +1,97 @@
+#ifdef CONFIG_KGDB
+ .globl arch_kgdb_breakpoint
+ .type arch_kgdb_breakpoint,#function
+arch_kgdb_breakpoint:
+ ta 0x72
+ retl
+ nop
+ .size arch_kgdb_breakpoint,.-arch_kgdb_breakpoint
+#endif
+
+ .type __do_privact,#function
+__do_privact:
+ mov TLB_SFSR, %g3
+ stxa %g0, [%g3] ASI_DMMU ! Clear FaultValid bit
+ membar #Sync
+ sethi %hi(109f), %g7
+ ba,pt %xcc, etrap
+109: or %g7, %lo(109b), %g7
+ call do_privact
+ add %sp, PTREGS_OFF, %o0
+ ba,pt %xcc, rtrap
+ nop
+ .size __do_privact,.-__do_privact
+
+ .type do_mna,#function
+do_mna:
+ rdpr %tl, %g3
+ cmp %g3, 1
+
+ /* Setup %g4/%g5 now as they are used in the
+ * winfixup code.
+ */
+ mov TLB_SFSR, %g3
+ mov DMMU_SFAR, %g4
+ ldxa [%g4] ASI_DMMU, %g4
+ ldxa [%g3] ASI_DMMU, %g5
+ stxa %g0, [%g3] ASI_DMMU ! Clear FaultValid bit
+ membar #Sync
+ bgu,pn %icc, winfix_mna
+ rdpr %tpc, %g3
+
+1: sethi %hi(109f), %g7
+ ba,pt %xcc, etrap
+109: or %g7, %lo(109b), %g7
+ mov %l4, %o1
+ mov %l5, %o2
+ call mem_address_unaligned
+ add %sp, PTREGS_OFF, %o0
+ ba,pt %xcc, rtrap
+ nop
+ .size do_mna,.-do_mna
+
+ .type do_lddfmna,#function
+do_lddfmna:
+ sethi %hi(109f), %g7
+ mov TLB_SFSR, %g4
+ ldxa [%g4] ASI_DMMU, %g5
+ stxa %g0, [%g4] ASI_DMMU ! Clear FaultValid bit
+ membar #Sync
+ mov DMMU_SFAR, %g4
+ ldxa [%g4] ASI_DMMU, %g4
+ ba,pt %xcc, etrap
+109: or %g7, %lo(109b), %g7
+ mov %l4, %o1
+ mov %l5, %o2
+ call handle_lddfmna
+ add %sp, PTREGS_OFF, %o0
+ ba,pt %xcc, rtrap
+ nop
+ .size do_lddfmna,.-do_lddfmna
+
+ .type do_stdfmna,#function
+do_stdfmna:
+ sethi %hi(109f), %g7
+ mov TLB_SFSR, %g4
+ ldxa [%g4] ASI_DMMU, %g5
+ stxa %g0, [%g4] ASI_DMMU ! Clear FaultValid bit
+ membar #Sync
+ mov DMMU_SFAR, %g4
+ ldxa [%g4] ASI_DMMU, %g4
+ ba,pt %xcc, etrap
+109: or %g7, %lo(109b), %g7
+ mov %l4, %o1
+ mov %l5, %o2
+ call handle_stdfmna
+ add %sp, PTREGS_OFF, %o0
+ ba,pt %xcc, rtrap
+ nop
+ .size do_stdfmna,.-do_stdfmna
+
+ .type breakpoint_trap,#function
+breakpoint_trap:
+ call sparc_breakpoint
+ add %sp, PTREGS_OFF, %o0
+ ba,pt %xcc, rtrap
+ nop
+ .size breakpoint_trap,.-breakpoint_trap
diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c
index 598682f31ebf..90273765e81f 100644
--- a/arch/sparc/kernel/module.c
+++ b/arch/sparc/kernel/module.c
@@ -1,4 +1,4 @@
-/* Kernel module help for sparc32.
+/* Kernel module help for sparc64.
*
* Copyright (C) 2001 Rusty Russell.
* Copyright (C) 2002 David S. Miller.
@@ -11,6 +11,48 @@
#include <linux/fs.h>
#include <linux/string.h>
#include <linux/ctype.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+
+#include <asm/processor.h>
+#include <asm/spitfire.h>
+
+#ifdef CONFIG_SPARC64
+static void *module_map(unsigned long size)
+{
+ struct vm_struct *area;
+
+ size = PAGE_ALIGN(size);
+ if (!size || size > MODULES_LEN)
+ return NULL;
+
+ area = __get_vm_area(size, VM_ALLOC, MODULES_VADDR, MODULES_END);
+ if (!area)
+ return NULL;
+
+ return __vmalloc_area(area, GFP_KERNEL, PAGE_KERNEL);
+}
+
+static char *dot2underscore(char *name)
+{
+ return name;
+}
+#else
+static void *module_map(unsigned long size)
+{
+ return vmalloc(size);
+}
+
+/* Replace references to .func with _Func */
+static char *dot2underscore(char *name)
+{
+ if (name[0] == '.') {
+ name[0] = '_';
+ name[1] = toupper(name[1]);
+ }
+ return name;
+}
+#endif /* CONFIG_SPARC64 */
void *module_alloc(unsigned long size)
{
@@ -20,7 +62,7 @@ void *module_alloc(unsigned long size)
if (size == 0)
return NULL;
- ret = vmalloc(size);
+ ret = module_map(size);
if (!ret)
ret = ERR_PTR(-ENOMEM);
else
@@ -37,16 +79,14 @@ void module_free(struct module *mod, void *module_region)
table entries. */
}
-/* Make generic code ignore STT_REGISTER dummy undefined symbols,
- * and replace references to .func with _Func
- */
+/* Make generic code ignore STT_REGISTER dummy undefined symbols. */
int module_frob_arch_sections(Elf_Ehdr *hdr,
Elf_Shdr *sechdrs,
char *secstrings,
struct module *mod)
{
unsigned int symidx;
- Elf32_Sym *sym;
+ Elf_Sym *sym;
char *strtab;
int i;
@@ -56,26 +96,23 @@ int module_frob_arch_sections(Elf_Ehdr *hdr,
return -ENOEXEC;
}
}
- sym = (Elf32_Sym *)sechdrs[symidx].sh_addr;
+ sym = (Elf_Sym *)sechdrs[symidx].sh_addr;
strtab = (char *)sechdrs[sechdrs[symidx].sh_link].sh_addr;
for (i = 1; i < sechdrs[symidx].sh_size / sizeof(Elf_Sym); i++) {
if (sym[i].st_shndx == SHN_UNDEF) {
- if (ELF32_ST_TYPE(sym[i].st_info) == STT_REGISTER)
+ if (ELF_ST_TYPE(sym[i].st_info) == STT_REGISTER) {
sym[i].st_shndx = SHN_ABS;
- else {
+ } else {
char *name = strtab + sym[i].st_name;
- if (name[0] == '.') {
- name[0] = '_';
- name[1] = toupper(name[1]);
- }
+ dot2underscore(name);
}
}
}
return 0;
}
-int apply_relocate(Elf32_Shdr *sechdrs,
+int apply_relocate(Elf_Shdr *sechdrs,
const char *strtab,
unsigned int symindex,
unsigned int relsec,
@@ -86,32 +123,68 @@ int apply_relocate(Elf32_Shdr *sechdrs,
return -ENOEXEC;
}
-int apply_relocate_add(Elf32_Shdr *sechdrs,
+int apply_relocate_add(Elf_Shdr *sechdrs,
const char *strtab,
unsigned int symindex,
unsigned int relsec,
struct module *me)
{
unsigned int i;
- Elf32_Rela *rel = (void *)sechdrs[relsec].sh_addr;
- Elf32_Sym *sym;
+ Elf_Rela *rel = (void *)sechdrs[relsec].sh_addr;
+ Elf_Sym *sym;
u8 *location;
u32 *loc32;
for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
- Elf32_Addr v;
+ Elf_Addr v;
/* This is where to make the change */
location = (u8 *)sechdrs[sechdrs[relsec].sh_info].sh_addr
+ rel[i].r_offset;
loc32 = (u32 *) location;
+
+#ifdef CONFIG_SPARC64
+ BUG_ON(((u64)location >> (u64)32) != (u64)0);
+#endif /* CONFIG_SPARC64 */
+
/* This is the symbol it is referring to. Note that all
undefined symbols have been resolved. */
- sym = (Elf32_Sym *)sechdrs[symindex].sh_addr
- + ELF32_R_SYM(rel[i].r_info);
+ sym = (Elf_Sym *)sechdrs[symindex].sh_addr
+ + ELF_R_SYM(rel[i].r_info);
v = sym->st_value + rel[i].r_addend;
- switch (ELF32_R_TYPE(rel[i].r_info)) {
+ switch (ELF_R_TYPE(rel[i].r_info) & 0xff) {
+#ifdef CONFIG_SPARC64
+ case R_SPARC_64:
+ location[0] = v >> 56;
+ location[1] = v >> 48;
+ location[2] = v >> 40;
+ location[3] = v >> 32;
+ location[4] = v >> 24;
+ location[5] = v >> 16;
+ location[6] = v >> 8;
+ location[7] = v >> 0;
+ break;
+
+ case R_SPARC_DISP32:
+ v -= (Elf_Addr) location;
+ *loc32 = v;
+ break;
+
+ case R_SPARC_WDISP19:
+ v -= (Elf_Addr) location;
+ *loc32 = (*loc32 & ~0x7ffff) |
+ ((v >> 2) & 0x7ffff);
+ break;
+
+ case R_SPARC_OLO10:
+ *loc32 = (*loc32 & ~0x1fff) |
+ (((v & 0x3ff) +
+ (ELF_R_TYPE(rel[i].r_info) >> 8))
+ & 0x1fff);
+ break;
+#endif /* CONFIG_SPARC64 */
+
case R_SPARC_32:
case R_SPARC_UA32:
location[0] = v >> 24;
@@ -121,13 +194,13 @@ int apply_relocate_add(Elf32_Shdr *sechdrs,
break;
case R_SPARC_WDISP30:
- v -= (Elf32_Addr) location;
+ v -= (Elf_Addr) location;
*loc32 = (*loc32 & ~0x3fffffff) |
((v >> 2) & 0x3fffffff);
break;
case R_SPARC_WDISP22:
- v -= (Elf32_Addr) location;
+ v -= (Elf_Addr) location;
*loc32 = (*loc32 & ~0x3fffff) |
((v >> 2) & 0x3fffff);
break;
@@ -144,19 +217,38 @@ int apply_relocate_add(Elf32_Shdr *sechdrs,
default:
printk(KERN_ERR "module %s: Unknown relocation: %x\n",
me->name,
- (int) (ELF32_R_TYPE(rel[i].r_info) & 0xff));
+ (int) (ELF_R_TYPE(rel[i].r_info) & 0xff));
return -ENOEXEC;
};
}
return 0;
}
+#ifdef CONFIG_SPARC64
int module_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs,
struct module *me)
{
+ /* Cheetah's I-cache is fully coherent. */
+ if (tlb_type == spitfire) {
+ unsigned long va;
+
+ flushw_all();
+ for (va = 0; va < (PAGE_SIZE << 1); va += 32)
+ spitfire_put_icache_tag(va, 0x0);
+ __asm__ __volatile__("flush %g6");
+ }
+
return 0;
}
+#else
+int module_finalize(const Elf_Ehdr *hdr,
+ const Elf_Shdr *sechdrs,
+ struct module *me)
+{
+ return 0;
+}
+#endif /* CONFIG_SPARC64 */
void module_arch_cleanup(struct module *mod)
{
diff --git a/arch/sparc/kernel/muldiv.c b/arch/sparc/kernel/muldiv.c
index e352239e72c8..ba960c02bb55 100644
--- a/arch/sparc/kernel/muldiv.c
+++ b/arch/sparc/kernel/muldiv.c
@@ -17,6 +17,8 @@
#include <asm/system.h>
#include <asm/uaccess.h>
+#include "kernel.h"
+
/* #define DEBUG_MULDIV */
static inline int has_imm13(int insn)
@@ -88,9 +90,6 @@ store_reg(unsigned int result, unsigned int reg, struct pt_regs *regs)
return (put_user(result, &win->locals[reg - 16]));
}
}
-
-extern void handle_hw_divzero (struct pt_regs *regs, unsigned long pc,
- unsigned long npc, unsigned long psr);
/* Should return 0 if mul/div emulation succeeded and SIGILL should
* not be issued.
diff --git a/arch/sparc/kernel/of_device.c b/arch/sparc/kernel/of_device_32.c
index 0a83bd737654..0a83bd737654 100644
--- a/arch/sparc/kernel/of_device.c
+++ b/arch/sparc/kernel/of_device_32.c
diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c
new file mode 100644
index 000000000000..322046cdf85f
--- /dev/null
+++ b/arch/sparc/kernel/of_device_64.c
@@ -0,0 +1,898 @@
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/irq.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+
+void __iomem *of_ioremap(struct resource *res, unsigned long offset, unsigned long size, char *name)
+{
+ unsigned long ret = res->start + offset;
+ struct resource *r;
+
+ if (res->flags & IORESOURCE_MEM)
+ r = request_mem_region(ret, size, name);
+ else
+ r = request_region(ret, size, name);
+ if (!r)
+ ret = 0;
+
+ return (void __iomem *) ret;
+}
+EXPORT_SYMBOL(of_ioremap);
+
+void of_iounmap(struct resource *res, void __iomem *base, unsigned long size)
+{
+ if (res->flags & IORESOURCE_MEM)
+ release_mem_region((unsigned long) base, size);
+ else
+ release_region((unsigned long) base, size);
+}
+EXPORT_SYMBOL(of_iounmap);
+
+static int node_match(struct device *dev, void *data)
+{
+ struct of_device *op = to_of_device(dev);
+ struct device_node *dp = data;
+
+ return (op->node == dp);
+}
+
+struct of_device *of_find_device_by_node(struct device_node *dp)
+{
+ struct device *dev = bus_find_device(&of_platform_bus_type, NULL,
+ dp, node_match);
+
+ if (dev)
+ return to_of_device(dev);
+
+ return NULL;
+}
+EXPORT_SYMBOL(of_find_device_by_node);
+
+unsigned int irq_of_parse_and_map(struct device_node *node, int index)
+{
+ struct of_device *op = of_find_device_by_node(node);
+
+ if (!op || index >= op->num_irqs)
+ return 0;
+
+ return op->irqs[index];
+}
+EXPORT_SYMBOL(irq_of_parse_and_map);
+
+/* Take the archdata values for IOMMU, STC, and HOSTDATA found in
+ * BUS and propagate to all child of_device objects.
+ */
+void of_propagate_archdata(struct of_device *bus)
+{
+ struct dev_archdata *bus_sd = &bus->dev.archdata;
+ struct device_node *bus_dp = bus->node;
+ struct device_node *dp;
+
+ for (dp = bus_dp->child; dp; dp = dp->sibling) {
+ struct of_device *op = of_find_device_by_node(dp);
+
+ op->dev.archdata.iommu = bus_sd->iommu;
+ op->dev.archdata.stc = bus_sd->stc;
+ op->dev.archdata.host_controller = bus_sd->host_controller;
+ op->dev.archdata.numa_node = bus_sd->numa_node;
+
+ if (dp->child)
+ of_propagate_archdata(op);
+ }
+}
+
+struct bus_type of_platform_bus_type;
+EXPORT_SYMBOL(of_platform_bus_type);
+
+static inline u64 of_read_addr(const u32 *cell, int size)
+{
+ u64 r = 0;
+ while (size--)
+ r = (r << 32) | *(cell++);
+ return r;
+}
+
+static void __init get_cells(struct device_node *dp,
+ int *addrc, int *sizec)
+{
+ if (addrc)
+ *addrc = of_n_addr_cells(dp);
+ if (sizec)
+ *sizec = of_n_size_cells(dp);
+}
+
+/* Max address size we deal with */
+#define OF_MAX_ADDR_CELLS 4
+
+struct of_bus {
+ const char *name;
+ const char *addr_prop_name;
+ int (*match)(struct device_node *parent);
+ void (*count_cells)(struct device_node *child,
+ int *addrc, int *sizec);
+ int (*map)(u32 *addr, const u32 *range,
+ int na, int ns, int pna);
+ unsigned long (*get_flags)(const u32 *addr, unsigned long);
+};
+
+/*
+ * Default translator (generic bus)
+ */
+
+static void of_bus_default_count_cells(struct device_node *dev,
+ int *addrc, int *sizec)
+{
+ get_cells(dev, addrc, sizec);
+}
+
+/* Make sure the least significant 64-bits are in-range. Even
+ * for 3 or 4 cell values it is a good enough approximation.
+ */
+static int of_out_of_range(const u32 *addr, const u32 *base,
+ const u32 *size, int na, int ns)
+{
+ u64 a = of_read_addr(addr, na);
+ u64 b = of_read_addr(base, na);
+
+ if (a < b)
+ return 1;
+
+ b += of_read_addr(size, ns);
+ if (a >= b)
+ return 1;
+
+ return 0;
+}
+
+static int of_bus_default_map(u32 *addr, const u32 *range,
+ int na, int ns, int pna)
+{
+ u32 result[OF_MAX_ADDR_CELLS];
+ int i;
+
+ if (ns > 2) {
+ printk("of_device: Cannot handle size cells (%d) > 2.", ns);
+ return -EINVAL;
+ }
+
+ if (of_out_of_range(addr, range, range + na + pna, na, ns))
+ return -EINVAL;
+
+ /* Start with the parent range base. */
+ memcpy(result, range + na, pna * 4);
+
+ /* Add in the child address offset. */
+ for (i = 0; i < na; i++)
+ result[pna - 1 - i] +=
+ (addr[na - 1 - i] -
+ range[na - 1 - i]);
+
+ memcpy(addr, result, pna * 4);
+
+ return 0;
+}
+
+static unsigned long of_bus_default_get_flags(const u32 *addr, unsigned long flags)
+{
+ if (flags)
+ return flags;
+ return IORESOURCE_MEM;
+}
+
+/*
+ * PCI bus specific translator
+ */
+
+static int of_bus_pci_match(struct device_node *np)
+{
+ if (!strcmp(np->name, "pci")) {
+ const char *model = of_get_property(np, "model", NULL);
+
+ if (model && !strcmp(model, "SUNW,simba"))
+ return 0;
+
+ /* Do not do PCI specific frobbing if the
+ * PCI bridge lacks a ranges property. We
+ * want to pass it through up to the next
+ * parent as-is, not with the PCI translate
+ * method which chops off the top address cell.
+ */
+ if (!of_find_property(np, "ranges", NULL))
+ return 0;
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static int of_bus_simba_match(struct device_node *np)
+{
+ const char *model = of_get_property(np, "model", NULL);
+
+ if (model && !strcmp(model, "SUNW,simba"))
+ return 1;
+
+ /* Treat PCI busses lacking ranges property just like
+ * simba.
+ */
+ if (!strcmp(np->name, "pci")) {
+ if (!of_find_property(np, "ranges", NULL))
+ return 1;
+ }
+
+ return 0;
+}
+
+static int of_bus_simba_map(u32 *addr, const u32 *range,
+ int na, int ns, int pna)
+{
+ return 0;
+}
+
+static void of_bus_pci_count_cells(struct device_node *np,
+ int *addrc, int *sizec)
+{
+ if (addrc)
+ *addrc = 3;
+ if (sizec)
+ *sizec = 2;
+}
+
+static int of_bus_pci_map(u32 *addr, const u32 *range,
+ int na, int ns, int pna)
+{
+ u32 result[OF_MAX_ADDR_CELLS];
+ int i;
+
+ /* Check address type match */
+ if ((addr[0] ^ range[0]) & 0x03000000)
+ return -EINVAL;
+
+ if (of_out_of_range(addr + 1, range + 1, range + na + pna,
+ na - 1, ns))
+ return -EINVAL;
+
+ /* Start with the parent range base. */
+ memcpy(result, range + na, pna * 4);
+
+ /* Add in the child address offset, skipping high cell. */
+ for (i = 0; i < na - 1; i++)
+ result[pna - 1 - i] +=
+ (addr[na - 1 - i] -
+ range[na - 1 - i]);
+
+ memcpy(addr, result, pna * 4);
+
+ return 0;
+}
+
+static unsigned long of_bus_pci_get_flags(const u32 *addr, unsigned long flags)
+{
+ u32 w = addr[0];
+
+ /* For PCI, we override whatever child busses may have used. */
+ flags = 0;
+ switch((w >> 24) & 0x03) {
+ case 0x01:
+ flags |= IORESOURCE_IO;
+ break;
+
+ case 0x02: /* 32 bits */
+ case 0x03: /* 64 bits */
+ flags |= IORESOURCE_MEM;
+ break;
+ }
+ if (w & 0x40000000)
+ flags |= IORESOURCE_PREFETCH;
+ return flags;
+}
+
+/*
+ * SBUS bus specific translator
+ */
+
+static int of_bus_sbus_match(struct device_node *np)
+{
+ return !strcmp(np->name, "sbus") ||
+ !strcmp(np->name, "sbi");
+}
+
+static void of_bus_sbus_count_cells(struct device_node *child,
+ int *addrc, int *sizec)
+{
+ if (addrc)
+ *addrc = 2;
+ if (sizec)
+ *sizec = 1;
+}
+
+/*
+ * FHC/Central bus specific translator.
+ *
+ * This is just needed to hard-code the address and size cell
+ * counts. 'fhc' and 'central' nodes lack the #address-cells and
+ * #size-cells properties, and if you walk to the root on such
+ * Enterprise boxes all you'll get is a #size-cells of 2 which is
+ * not what we want to use.
+ */
+static int of_bus_fhc_match(struct device_node *np)
+{
+ return !strcmp(np->name, "fhc") ||
+ !strcmp(np->name, "central");
+}
+
+#define of_bus_fhc_count_cells of_bus_sbus_count_cells
+
+/*
+ * Array of bus specific translators
+ */
+
+static struct of_bus of_busses[] = {
+ /* PCI */
+ {
+ .name = "pci",
+ .addr_prop_name = "assigned-addresses",
+ .match = of_bus_pci_match,
+ .count_cells = of_bus_pci_count_cells,
+ .map = of_bus_pci_map,
+ .get_flags = of_bus_pci_get_flags,
+ },
+ /* SIMBA */
+ {
+ .name = "simba",
+ .addr_prop_name = "assigned-addresses",
+ .match = of_bus_simba_match,
+ .count_cells = of_bus_pci_count_cells,
+ .map = of_bus_simba_map,
+ .get_flags = of_bus_pci_get_flags,
+ },
+ /* SBUS */
+ {
+ .name = "sbus",
+ .addr_prop_name = "reg",
+ .match = of_bus_sbus_match,
+ .count_cells = of_bus_sbus_count_cells,
+ .map = of_bus_default_map,
+ .get_flags = of_bus_default_get_flags,
+ },
+ /* FHC */
+ {
+ .name = "fhc",
+ .addr_prop_name = "reg",
+ .match = of_bus_fhc_match,
+ .count_cells = of_bus_fhc_count_cells,
+ .map = of_bus_default_map,
+ .get_flags = of_bus_default_get_flags,
+ },
+ /* Default */
+ {
+ .name = "default",
+ .addr_prop_name = "reg",
+ .match = NULL,
+ .count_cells = of_bus_default_count_cells,
+ .map = of_bus_default_map,
+ .get_flags = of_bus_default_get_flags,
+ },
+};
+
+static struct of_bus *of_match_bus(struct device_node *np)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(of_busses); i ++)
+ if (!of_busses[i].match || of_busses[i].match(np))
+ return &of_busses[i];
+ BUG();
+ return NULL;
+}
+
+static int __init build_one_resource(struct device_node *parent,
+ struct of_bus *bus,
+ struct of_bus *pbus,
+ u32 *addr,
+ int na, int ns, int pna)
+{
+ const u32 *ranges;
+ int rone, rlen;
+
+ ranges = of_get_property(parent, "ranges", &rlen);
+ if (ranges == NULL || rlen == 0) {
+ u32 result[OF_MAX_ADDR_CELLS];
+ int i;
+
+ memset(result, 0, pna * 4);
+ for (i = 0; i < na; i++)
+ result[pna - 1 - i] =
+ addr[na - 1 - i];
+
+ memcpy(addr, result, pna * 4);
+ return 0;
+ }
+
+ /* Now walk through the ranges */
+ rlen /= 4;
+ rone = na + pna + ns;
+ for (; rlen >= rone; rlen -= rone, ranges += rone) {
+ if (!bus->map(addr, ranges, na, ns, pna))
+ return 0;
+ }
+
+ /* When we miss an I/O space match on PCI, just pass it up
+ * to the next PCI bridge and/or controller.
+ */
+ if (!strcmp(bus->name, "pci") &&
+ (addr[0] & 0x03000000) == 0x01000000)
+ return 0;
+
+ return 1;
+}
+
+static int __init use_1to1_mapping(struct device_node *pp)
+{
+ /* If we have a ranges property in the parent, use it. */
+ if (of_find_property(pp, "ranges", NULL) != NULL)
+ return 0;
+
+ /* If the parent is the dma node of an ISA bus, pass
+ * the translation up to the root.
+ *
+ * Some SBUS devices use intermediate nodes to express
+ * hierarchy within the device itself. These aren't
+ * real bus nodes, and don't have a 'ranges' property.
+ * But, we should still pass the translation work up
+ * to the SBUS itself.
+ */
+ if (!strcmp(pp->name, "dma") ||
+ !strcmp(pp->name, "espdma") ||
+ !strcmp(pp->name, "ledma") ||
+ !strcmp(pp->name, "lebuffer"))
+ return 0;
+
+ /* Similarly for all PCI bridges, if we get this far
+ * it lacks a ranges property, and this will include
+ * cases like Simba.
+ */
+ if (!strcmp(pp->name, "pci"))
+ return 0;
+
+ return 1;
+}
+
+static int of_resource_verbose;
+
+static void __init build_device_resources(struct of_device *op,
+ struct device *parent)
+{
+ struct of_device *p_op;
+ struct of_bus *bus;
+ int na, ns;
+ int index, num_reg;
+ const void *preg;
+
+ if (!parent)
+ return;
+
+ p_op = to_of_device(parent);
+ bus = of_match_bus(p_op->node);
+ bus->count_cells(op->node, &na, &ns);
+
+ preg = of_get_property(op->node, bus->addr_prop_name, &num_reg);
+ if (!preg || num_reg == 0)
+ return;
+
+ /* Convert to num-cells. */
+ num_reg /= 4;
+
+ /* Convert to num-entries. */
+ num_reg /= na + ns;
+
+ /* Prevent overrunning the op->resources[] array. */
+ if (num_reg > PROMREG_MAX) {
+ printk(KERN_WARNING "%s: Too many regs (%d), "
+ "limiting to %d.\n",
+ op->node->full_name, num_reg, PROMREG_MAX);
+ num_reg = PROMREG_MAX;
+ }
+
+ for (index = 0; index < num_reg; index++) {
+ struct resource *r = &op->resource[index];
+ u32 addr[OF_MAX_ADDR_CELLS];
+ const u32 *reg = (preg + (index * ((na + ns) * 4)));
+ struct device_node *dp = op->node;
+ struct device_node *pp = p_op->node;
+ struct of_bus *pbus, *dbus;
+ u64 size, result = OF_BAD_ADDR;
+ unsigned long flags;
+ int dna, dns;
+ int pna, pns;
+
+ size = of_read_addr(reg + na, ns);
+ memcpy(addr, reg, na * 4);
+
+ flags = bus->get_flags(addr, 0);
+
+ if (use_1to1_mapping(pp)) {
+ result = of_read_addr(addr, na);
+ goto build_res;
+ }
+
+ dna = na;
+ dns = ns;
+ dbus = bus;
+
+ while (1) {
+ dp = pp;
+ pp = dp->parent;
+ if (!pp) {
+ result = of_read_addr(addr, dna);
+ break;
+ }
+
+ pbus = of_match_bus(pp);
+ pbus->count_cells(dp, &pna, &pns);
+
+ if (build_one_resource(dp, dbus, pbus, addr,
+ dna, dns, pna))
+ break;
+
+ flags = pbus->get_flags(addr, flags);
+
+ dna = pna;
+ dns = pns;
+ dbus = pbus;
+ }
+
+ build_res:
+ memset(r, 0, sizeof(*r));
+
+ if (of_resource_verbose)
+ printk("%s reg[%d] -> %lx\n",
+ op->node->full_name, index,
+ result);
+
+ if (result != OF_BAD_ADDR) {
+ if (tlb_type == hypervisor)
+ result &= 0x0fffffffffffffffUL;
+
+ r->start = result;
+ r->end = result + size - 1;
+ r->flags = flags;
+ }
+ r->name = op->node->name;
+ }
+}
+
+static struct device_node * __init
+apply_interrupt_map(struct device_node *dp, struct device_node *pp,
+ const u32 *imap, int imlen, const u32 *imask,
+ unsigned int *irq_p)
+{
+ struct device_node *cp;
+ unsigned int irq = *irq_p;
+ struct of_bus *bus;
+ phandle handle;
+ const u32 *reg;
+ int na, num_reg, i;
+
+ bus = of_match_bus(pp);
+ bus->count_cells(dp, &na, NULL);
+
+ reg = of_get_property(dp, "reg", &num_reg);
+ if (!reg || !num_reg)
+ return NULL;
+
+ imlen /= ((na + 3) * 4);
+ handle = 0;
+ for (i = 0; i < imlen; i++) {
+ int j;
+
+ for (j = 0; j < na; j++) {
+ if ((reg[j] & imask[j]) != imap[j])
+ goto next;
+ }
+ if (imap[na] == irq) {
+ handle = imap[na + 1];
+ irq = imap[na + 2];
+ break;
+ }
+
+ next:
+ imap += (na + 3);
+ }
+ if (i == imlen) {
+ /* Psycho and Sabre PCI controllers can have 'interrupt-map'
+ * properties that do not include the on-board device
+ * interrupts. Instead, the device's 'interrupts' property
+ * is already a fully specified INO value.
+ *
+ * Handle this by deciding that, if we didn't get a
+ * match in the parent's 'interrupt-map', and the
+ * parent is an IRQ translater, then use the parent as
+ * our IRQ controller.
+ */
+ if (pp->irq_trans)
+ return pp;
+
+ return NULL;
+ }
+
+ *irq_p = irq;
+ cp = of_find_node_by_phandle(handle);
+
+ return cp;
+}
+
+static unsigned int __init pci_irq_swizzle(struct device_node *dp,
+ struct device_node *pp,
+ unsigned int irq)
+{
+ const struct linux_prom_pci_registers *regs;
+ unsigned int bus, devfn, slot, ret;
+
+ if (irq < 1 || irq > 4)
+ return irq;
+
+ regs = of_get_property(dp, "reg", NULL);
+ if (!regs)
+ return irq;
+
+ bus = (regs->phys_hi >> 16) & 0xff;
+ devfn = (regs->phys_hi >> 8) & 0xff;
+ slot = (devfn >> 3) & 0x1f;
+
+ if (pp->irq_trans) {
+ /* Derived from Table 8-3, U2P User's Manual. This branch
+ * is handling a PCI controller that lacks a proper set of
+ * interrupt-map and interrupt-map-mask properties. The
+ * Ultra-E450 is one example.
+ *
+ * The bit layout is BSSLL, where:
+ * B: 0 on bus A, 1 on bus B
+ * D: 2-bit slot number, derived from PCI device number as
+ * (dev - 1) for bus A, or (dev - 2) for bus B
+ * L: 2-bit line number
+ */
+ if (bus & 0x80) {
+ /* PBM-A */
+ bus = 0x00;
+ slot = (slot - 1) << 2;
+ } else {
+ /* PBM-B */
+ bus = 0x10;
+ slot = (slot - 2) << 2;
+ }
+ irq -= 1;
+
+ ret = (bus | slot | irq);
+ } else {
+ /* Going through a PCI-PCI bridge that lacks a set of
+ * interrupt-map and interrupt-map-mask properties.
+ */
+ ret = ((irq - 1 + (slot & 3)) & 3) + 1;
+ }
+
+ return ret;
+}
+
+static int of_irq_verbose;
+
+static unsigned int __init build_one_device_irq(struct of_device *op,
+ struct device *parent,
+ unsigned int irq)
+{
+ struct device_node *dp = op->node;
+ struct device_node *pp, *ip;
+ unsigned int orig_irq = irq;
+ int nid;
+
+ if (irq == 0xffffffff)
+ return irq;
+
+ if (dp->irq_trans) {
+ irq = dp->irq_trans->irq_build(dp, irq,
+ dp->irq_trans->data);
+
+ if (of_irq_verbose)
+ printk("%s: direct translate %x --> %x\n",
+ dp->full_name, orig_irq, irq);
+
+ goto out;
+ }
+
+ /* Something more complicated. Walk up to the root, applying
+ * interrupt-map or bus specific translations, until we hit
+ * an IRQ translator.
+ *
+ * If we hit a bus type or situation we cannot handle, we
+ * stop and assume that the original IRQ number was in a
+ * format which has special meaning to it's immediate parent.
+ */
+ pp = dp->parent;
+ ip = NULL;
+ while (pp) {
+ const void *imap, *imsk;
+ int imlen;
+
+ imap = of_get_property(pp, "interrupt-map", &imlen);
+ imsk = of_get_property(pp, "interrupt-map-mask", NULL);
+ if (imap && imsk) {
+ struct device_node *iret;
+ int this_orig_irq = irq;
+
+ iret = apply_interrupt_map(dp, pp,
+ imap, imlen, imsk,
+ &irq);
+
+ if (of_irq_verbose)
+ printk("%s: Apply [%s:%x] imap --> [%s:%x]\n",
+ op->node->full_name,
+ pp->full_name, this_orig_irq,
+ (iret ? iret->full_name : "NULL"), irq);
+
+ if (!iret)
+ break;
+
+ if (iret->irq_trans) {
+ ip = iret;
+ break;
+ }
+ } else {
+ if (!strcmp(pp->name, "pci")) {
+ unsigned int this_orig_irq = irq;
+
+ irq = pci_irq_swizzle(dp, pp, irq);
+ if (of_irq_verbose)
+ printk("%s: PCI swizzle [%s] "
+ "%x --> %x\n",
+ op->node->full_name,
+ pp->full_name, this_orig_irq,
+ irq);
+
+ }
+
+ if (pp->irq_trans) {
+ ip = pp;
+ break;
+ }
+ }
+ dp = pp;
+ pp = pp->parent;
+ }
+ if (!ip)
+ return orig_irq;
+
+ irq = ip->irq_trans->irq_build(op->node, irq,
+ ip->irq_trans->data);
+ if (of_irq_verbose)
+ printk("%s: Apply IRQ trans [%s] %x --> %x\n",
+ op->node->full_name, ip->full_name, orig_irq, irq);
+
+out:
+ nid = of_node_to_nid(dp);
+ if (nid != -1) {
+ cpumask_t numa_mask = node_to_cpumask(nid);
+
+ irq_set_affinity(irq, &numa_mask);
+ }
+
+ return irq;
+}
+
+static struct of_device * __init scan_one_device(struct device_node *dp,
+ struct device *parent)
+{
+ struct of_device *op = kzalloc(sizeof(*op), GFP_KERNEL);
+ const unsigned int *irq;
+ struct dev_archdata *sd;
+ int len, i;
+
+ if (!op)
+ return NULL;
+
+ sd = &op->dev.archdata;
+ sd->prom_node = dp;
+ sd->op = op;
+
+ op->node = dp;
+
+ op->clock_freq = of_getintprop_default(dp, "clock-frequency",
+ (25*1000*1000));
+ op->portid = of_getintprop_default(dp, "upa-portid", -1);
+ if (op->portid == -1)
+ op->portid = of_getintprop_default(dp, "portid", -1);
+
+ irq = of_get_property(dp, "interrupts", &len);
+ if (irq) {
+ op->num_irqs = len / 4;
+
+ /* Prevent overrunning the op->irqs[] array. */
+ if (op->num_irqs > PROMINTR_MAX) {
+ printk(KERN_WARNING "%s: Too many irqs (%d), "
+ "limiting to %d.\n",
+ dp->full_name, op->num_irqs, PROMINTR_MAX);
+ op->num_irqs = PROMINTR_MAX;
+ }
+ memcpy(op->irqs, irq, op->num_irqs * 4);
+ } else {
+ op->num_irqs = 0;
+ }
+
+ build_device_resources(op, parent);
+ for (i = 0; i < op->num_irqs; i++)
+ op->irqs[i] = build_one_device_irq(op, parent, op->irqs[i]);
+
+ op->dev.parent = parent;
+ op->dev.bus = &of_platform_bus_type;
+ if (!parent)
+ dev_set_name(&op->dev, "root");
+ else
+ dev_set_name(&op->dev, "%08x", dp->node);
+
+ if (of_device_register(op)) {
+ printk("%s: Could not register of device.\n",
+ dp->full_name);
+ kfree(op);
+ op = NULL;
+ }
+
+ return op;
+}
+
+static void __init scan_tree(struct device_node *dp, struct device *parent)
+{
+ while (dp) {
+ struct of_device *op = scan_one_device(dp, parent);
+
+ if (op)
+ scan_tree(dp->child, &op->dev);
+
+ dp = dp->sibling;
+ }
+}
+
+static void __init scan_of_devices(void)
+{
+ struct device_node *root = of_find_node_by_path("/");
+ struct of_device *parent;
+
+ parent = scan_one_device(root, NULL);
+ if (!parent)
+ return;
+
+ scan_tree(root->child, &parent->dev);
+}
+
+static int __init of_bus_driver_init(void)
+{
+ int err;
+
+ err = of_bus_type_init(&of_platform_bus_type, "of");
+ if (!err)
+ scan_of_devices();
+
+ return err;
+}
+
+postcore_initcall(of_bus_driver_init);
+
+static int __init of_debug(char *str)
+{
+ int val = 0;
+
+ get_option(&str, &val);
+ if (val & 1)
+ of_resource_verbose = 1;
+ if (val & 2)
+ of_irq_verbose = 1;
+ return 1;
+}
+
+__setup("of_debug=", of_debug);
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
new file mode 100644
index 000000000000..bdb7c0a6d83d
--- /dev/null
+++ b/arch/sparc/kernel/pci.c
@@ -0,0 +1,1095 @@
+/* pci.c: UltraSparc PCI controller support.
+ *
+ * Copyright (C) 1997, 1998, 1999 David S. Miller (davem@redhat.com)
+ * Copyright (C) 1998, 1999 Eddie C. Dost (ecd@skynet.be)
+ * Copyright (C) 1999 Jakub Jelinek (jj@ultra.linux.cz)
+ *
+ * OF tree based PCI bus probing taken from the PowerPC port
+ * with minor modifications, see there for credits.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/capability.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/msi.h>
+#include <linux/irq.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/irq.h>
+#include <asm/prom.h>
+#include <asm/apb.h>
+
+#include "pci_impl.h"
+
+/* List of all PCI controllers found in the system. */
+struct pci_pbm_info *pci_pbm_root = NULL;
+
+/* Each PBM found gets a unique index. */
+int pci_num_pbms = 0;
+
+volatile int pci_poke_in_progress;
+volatile int pci_poke_cpu = -1;
+volatile int pci_poke_faulted;
+
+static DEFINE_SPINLOCK(pci_poke_lock);
+
+void pci_config_read8(u8 *addr, u8 *ret)
+{
+ unsigned long flags;
+ u8 byte;
+
+ spin_lock_irqsave(&pci_poke_lock, flags);
+ pci_poke_cpu = smp_processor_id();
+ pci_poke_in_progress = 1;
+ pci_poke_faulted = 0;
+ __asm__ __volatile__("membar #Sync\n\t"
+ "lduba [%1] %2, %0\n\t"
+ "membar #Sync"
+ : "=r" (byte)
+ : "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E_L)
+ : "memory");
+ pci_poke_in_progress = 0;
+ pci_poke_cpu = -1;
+ if (!pci_poke_faulted)
+ *ret = byte;
+ spin_unlock_irqrestore(&pci_poke_lock, flags);
+}
+
+void pci_config_read16(u16 *addr, u16 *ret)
+{
+ unsigned long flags;
+ u16 word;
+
+ spin_lock_irqsave(&pci_poke_lock, flags);
+ pci_poke_cpu = smp_processor_id();
+ pci_poke_in_progress = 1;
+ pci_poke_faulted = 0;
+ __asm__ __volatile__("membar #Sync\n\t"
+ "lduha [%1] %2, %0\n\t"
+ "membar #Sync"
+ : "=r" (word)
+ : "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E_L)
+ : "memory");
+ pci_poke_in_progress = 0;
+ pci_poke_cpu = -1;
+ if (!pci_poke_faulted)
+ *ret = word;
+ spin_unlock_irqrestore(&pci_poke_lock, flags);
+}
+
+void pci_config_read32(u32 *addr, u32 *ret)
+{
+ unsigned long flags;
+ u32 dword;
+
+ spin_lock_irqsave(&pci_poke_lock, flags);
+ pci_poke_cpu = smp_processor_id();
+ pci_poke_in_progress = 1;
+ pci_poke_faulted = 0;
+ __asm__ __volatile__("membar #Sync\n\t"
+ "lduwa [%1] %2, %0\n\t"
+ "membar #Sync"
+ : "=r" (dword)
+ : "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E_L)
+ : "memory");
+ pci_poke_in_progress = 0;
+ pci_poke_cpu = -1;
+ if (!pci_poke_faulted)
+ *ret = dword;
+ spin_unlock_irqrestore(&pci_poke_lock, flags);
+}
+
+void pci_config_write8(u8 *addr, u8 val)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&pci_poke_lock, flags);
+ pci_poke_cpu = smp_processor_id();
+ pci_poke_in_progress = 1;
+ pci_poke_faulted = 0;
+ __asm__ __volatile__("membar #Sync\n\t"
+ "stba %0, [%1] %2\n\t"
+ "membar #Sync"
+ : /* no outputs */
+ : "r" (val), "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E_L)
+ : "memory");
+ pci_poke_in_progress = 0;
+ pci_poke_cpu = -1;
+ spin_unlock_irqrestore(&pci_poke_lock, flags);
+}
+
+void pci_config_write16(u16 *addr, u16 val)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&pci_poke_lock, flags);
+ pci_poke_cpu = smp_processor_id();
+ pci_poke_in_progress = 1;
+ pci_poke_faulted = 0;
+ __asm__ __volatile__("membar #Sync\n\t"
+ "stha %0, [%1] %2\n\t"
+ "membar #Sync"
+ : /* no outputs */
+ : "r" (val), "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E_L)
+ : "memory");
+ pci_poke_in_progress = 0;
+ pci_poke_cpu = -1;
+ spin_unlock_irqrestore(&pci_poke_lock, flags);
+}
+
+void pci_config_write32(u32 *addr, u32 val)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&pci_poke_lock, flags);
+ pci_poke_cpu = smp_processor_id();
+ pci_poke_in_progress = 1;
+ pci_poke_faulted = 0;
+ __asm__ __volatile__("membar #Sync\n\t"
+ "stwa %0, [%1] %2\n\t"
+ "membar #Sync"
+ : /* no outputs */
+ : "r" (val), "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E_L)
+ : "memory");
+ pci_poke_in_progress = 0;
+ pci_poke_cpu = -1;
+ spin_unlock_irqrestore(&pci_poke_lock, flags);
+}
+
+static int ofpci_verbose;
+
+static int __init ofpci_debug(char *str)
+{
+ int val = 0;
+
+ get_option(&str, &val);
+ if (val)
+ ofpci_verbose = 1;
+ return 1;
+}
+
+__setup("ofpci_debug=", ofpci_debug);
+
+static unsigned long pci_parse_of_flags(u32 addr0)
+{
+ unsigned long flags = 0;
+
+ if (addr0 & 0x02000000) {
+ flags = IORESOURCE_MEM | PCI_BASE_ADDRESS_SPACE_MEMORY;
+ flags |= (addr0 >> 22) & PCI_BASE_ADDRESS_MEM_TYPE_64;
+ flags |= (addr0 >> 28) & PCI_BASE_ADDRESS_MEM_TYPE_1M;
+ if (addr0 & 0x40000000)
+ flags |= IORESOURCE_PREFETCH
+ | PCI_BASE_ADDRESS_MEM_PREFETCH;
+ } else if (addr0 & 0x01000000)
+ flags = IORESOURCE_IO | PCI_BASE_ADDRESS_SPACE_IO;
+ return flags;
+}
+
+/* The of_device layer has translated all of the assigned-address properties
+ * into physical address resources, we only have to figure out the register
+ * mapping.
+ */
+static void pci_parse_of_addrs(struct of_device *op,
+ struct device_node *node,
+ struct pci_dev *dev)
+{
+ struct resource *op_res;
+ const u32 *addrs;
+ int proplen;
+
+ addrs = of_get_property(node, "assigned-addresses", &proplen);
+ if (!addrs)
+ return;
+ if (ofpci_verbose)
+ printk(" parse addresses (%d bytes) @ %p\n",
+ proplen, addrs);
+ op_res = &op->resource[0];
+ for (; proplen >= 20; proplen -= 20, addrs += 5, op_res++) {
+ struct resource *res;
+ unsigned long flags;
+ int i;
+
+ flags = pci_parse_of_flags(addrs[0]);
+ if (!flags)
+ continue;
+ i = addrs[0] & 0xff;
+ if (ofpci_verbose)
+ printk(" start: %lx, end: %lx, i: %x\n",
+ op_res->start, op_res->end, i);
+
+ if (PCI_BASE_ADDRESS_0 <= i && i <= PCI_BASE_ADDRESS_5) {
+ res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2];
+ } else if (i == dev->rom_base_reg) {
+ res = &dev->resource[PCI_ROM_RESOURCE];
+ flags |= IORESOURCE_READONLY | IORESOURCE_CACHEABLE;
+ } else {
+ printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i);
+ continue;
+ }
+ res->start = op_res->start;
+ res->end = op_res->end;
+ res->flags = flags;
+ res->name = pci_name(dev);
+ }
+}
+
+static struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm,
+ struct device_node *node,
+ struct pci_bus *bus, int devfn)
+{
+ struct dev_archdata *sd;
+ struct of_device *op;
+ struct pci_dev *dev;
+ const char *type;
+ u32 class;
+
+ dev = alloc_pci_dev();
+ if (!dev)
+ return NULL;
+
+ sd = &dev->dev.archdata;
+ sd->iommu = pbm->iommu;
+ sd->stc = &pbm->stc;
+ sd->host_controller = pbm;
+ sd->prom_node = node;
+ sd->op = op = of_find_device_by_node(node);
+ sd->numa_node = pbm->numa_node;
+
+ sd = &op->dev.archdata;
+ sd->iommu = pbm->iommu;
+ sd->stc = &pbm->stc;
+ sd->numa_node = pbm->numa_node;
+
+ if (!strcmp(node->name, "ebus"))
+ of_propagate_archdata(op);
+
+ type = of_get_property(node, "device_type", NULL);
+ if (type == NULL)
+ type = "";
+
+ if (ofpci_verbose)
+ printk(" create device, devfn: %x, type: %s\n",
+ devfn, type);
+
+ dev->bus = bus;
+ dev->sysdata = node;
+ dev->dev.parent = bus->bridge;
+ dev->dev.bus = &pci_bus_type;
+ dev->devfn = devfn;
+ dev->multifunction = 0; /* maybe a lie? */
+
+ dev->vendor = of_getintprop_default(node, "vendor-id", 0xffff);
+ dev->device = of_getintprop_default(node, "device-id", 0xffff);
+ dev->subsystem_vendor =
+ of_getintprop_default(node, "subsystem-vendor-id", 0);
+ dev->subsystem_device =
+ of_getintprop_default(node, "subsystem-id", 0);
+
+ dev->cfg_size = pci_cfg_space_size(dev);
+
+ /* We can't actually use the firmware value, we have
+ * to read what is in the register right now. One
+ * reason is that in the case of IDE interfaces the
+ * firmware can sample the value before the the IDE
+ * interface is programmed into native mode.
+ */
+ pci_read_config_dword(dev, PCI_CLASS_REVISION, &class);
+ dev->class = class >> 8;
+ dev->revision = class & 0xff;
+
+ dev_set_name(&dev->dev, "%04x:%02x:%02x.%d", pci_domain_nr(bus),
+ dev->bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn));
+
+ if (ofpci_verbose)
+ printk(" class: 0x%x device name: %s\n",
+ dev->class, pci_name(dev));
+
+ /* I have seen IDE devices which will not respond to
+ * the bmdma simplex check reads if bus mastering is
+ * disabled.
+ */
+ if ((dev->class >> 8) == PCI_CLASS_STORAGE_IDE)
+ pci_set_master(dev);
+
+ dev->current_state = 4; /* unknown power state */
+ dev->error_state = pci_channel_io_normal;
+
+ if (!strcmp(node->name, "pci")) {
+ /* a PCI-PCI bridge */
+ dev->hdr_type = PCI_HEADER_TYPE_BRIDGE;
+ dev->rom_base_reg = PCI_ROM_ADDRESS1;
+ } else if (!strcmp(type, "cardbus")) {
+ dev->hdr_type = PCI_HEADER_TYPE_CARDBUS;
+ } else {
+ dev->hdr_type = PCI_HEADER_TYPE_NORMAL;
+ dev->rom_base_reg = PCI_ROM_ADDRESS;
+
+ dev->irq = sd->op->irqs[0];
+ if (dev->irq == 0xffffffff)
+ dev->irq = PCI_IRQ_NONE;
+ }
+
+ pci_parse_of_addrs(sd->op, node, dev);
+
+ if (ofpci_verbose)
+ printk(" adding to system ...\n");
+
+ pci_device_add(dev, bus);
+
+ return dev;
+}
+
+static void __devinit apb_calc_first_last(u8 map, u32 *first_p, u32 *last_p)
+{
+ u32 idx, first, last;
+
+ first = 8;
+ last = 0;
+ for (idx = 0; idx < 8; idx++) {
+ if ((map & (1 << idx)) != 0) {
+ if (first > idx)
+ first = idx;
+ if (last < idx)
+ last = idx;
+ }
+ }
+
+ *first_p = first;
+ *last_p = last;
+}
+
+static void pci_resource_adjust(struct resource *res,
+ struct resource *root)
+{
+ res->start += root->start;
+ res->end += root->start;
+}
+
+/* For PCI bus devices which lack a 'ranges' property we interrogate
+ * the config space values to set the resources, just like the generic
+ * Linux PCI probing code does.
+ */
+static void __devinit pci_cfg_fake_ranges(struct pci_dev *dev,
+ struct pci_bus *bus,
+ struct pci_pbm_info *pbm)
+{
+ struct resource *res;
+ u8 io_base_lo, io_limit_lo;
+ u16 mem_base_lo, mem_limit_lo;
+ unsigned long base, limit;
+
+ pci_read_config_byte(dev, PCI_IO_BASE, &io_base_lo);
+ pci_read_config_byte(dev, PCI_IO_LIMIT, &io_limit_lo);
+ base = (io_base_lo & PCI_IO_RANGE_MASK) << 8;
+ limit = (io_limit_lo & PCI_IO_RANGE_MASK) << 8;
+
+ if ((io_base_lo & PCI_IO_RANGE_TYPE_MASK) == PCI_IO_RANGE_TYPE_32) {
+ u16 io_base_hi, io_limit_hi;
+
+ pci_read_config_word(dev, PCI_IO_BASE_UPPER16, &io_base_hi);
+ pci_read_config_word(dev, PCI_IO_LIMIT_UPPER16, &io_limit_hi);
+ base |= (io_base_hi << 16);
+ limit |= (io_limit_hi << 16);
+ }
+
+ res = bus->resource[0];
+ if (base <= limit) {
+ res->flags = (io_base_lo & PCI_IO_RANGE_TYPE_MASK) | IORESOURCE_IO;
+ if (!res->start)
+ res->start = base;
+ if (!res->end)
+ res->end = limit + 0xfff;
+ pci_resource_adjust(res, &pbm->io_space);
+ }
+
+ pci_read_config_word(dev, PCI_MEMORY_BASE, &mem_base_lo);
+ pci_read_config_word(dev, PCI_MEMORY_LIMIT, &mem_limit_lo);
+ base = (mem_base_lo & PCI_MEMORY_RANGE_MASK) << 16;
+ limit = (mem_limit_lo & PCI_MEMORY_RANGE_MASK) << 16;
+
+ res = bus->resource[1];
+ if (base <= limit) {
+ res->flags = ((mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) |
+ IORESOURCE_MEM);
+ res->start = base;
+ res->end = limit + 0xfffff;
+ pci_resource_adjust(res, &pbm->mem_space);
+ }
+
+ pci_read_config_word(dev, PCI_PREF_MEMORY_BASE, &mem_base_lo);
+ pci_read_config_word(dev, PCI_PREF_MEMORY_LIMIT, &mem_limit_lo);
+ base = (mem_base_lo & PCI_PREF_RANGE_MASK) << 16;
+ limit = (mem_limit_lo & PCI_PREF_RANGE_MASK) << 16;
+
+ if ((mem_base_lo & PCI_PREF_RANGE_TYPE_MASK) == PCI_PREF_RANGE_TYPE_64) {
+ u32 mem_base_hi, mem_limit_hi;
+
+ pci_read_config_dword(dev, PCI_PREF_BASE_UPPER32, &mem_base_hi);
+ pci_read_config_dword(dev, PCI_PREF_LIMIT_UPPER32, &mem_limit_hi);
+
+ /*
+ * Some bridges set the base > limit by default, and some
+ * (broken) BIOSes do not initialize them. If we find
+ * this, just assume they are not being used.
+ */
+ if (mem_base_hi <= mem_limit_hi) {
+ base |= ((long) mem_base_hi) << 32;
+ limit |= ((long) mem_limit_hi) << 32;
+ }
+ }
+
+ res = bus->resource[2];
+ if (base <= limit) {
+ res->flags = ((mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) |
+ IORESOURCE_MEM | IORESOURCE_PREFETCH);
+ res->start = base;
+ res->end = limit + 0xfffff;
+ pci_resource_adjust(res, &pbm->mem_space);
+ }
+}
+
+/* Cook up fake bus resources for SUNW,simba PCI bridges which lack
+ * a proper 'ranges' property.
+ */
+static void __devinit apb_fake_ranges(struct pci_dev *dev,
+ struct pci_bus *bus,
+ struct pci_pbm_info *pbm)
+{
+ struct resource *res;
+ u32 first, last;
+ u8 map;
+
+ pci_read_config_byte(dev, APB_IO_ADDRESS_MAP, &map);
+ apb_calc_first_last(map, &first, &last);
+ res = bus->resource[0];
+ res->start = (first << 21);
+ res->end = (last << 21) + ((1 << 21) - 1);
+ res->flags = IORESOURCE_IO;
+ pci_resource_adjust(res, &pbm->io_space);
+
+ pci_read_config_byte(dev, APB_MEM_ADDRESS_MAP, &map);
+ apb_calc_first_last(map, &first, &last);
+ res = bus->resource[1];
+ res->start = (first << 21);
+ res->end = (last << 21) + ((1 << 21) - 1);
+ res->flags = IORESOURCE_MEM;
+ pci_resource_adjust(res, &pbm->mem_space);
+}
+
+static void __devinit pci_of_scan_bus(struct pci_pbm_info *pbm,
+ struct device_node *node,
+ struct pci_bus *bus);
+
+#define GET_64BIT(prop, i) ((((u64) (prop)[(i)]) << 32) | (prop)[(i)+1])
+
+static void __devinit of_scan_pci_bridge(struct pci_pbm_info *pbm,
+ struct device_node *node,
+ struct pci_dev *dev)
+{
+ struct pci_bus *bus;
+ const u32 *busrange, *ranges;
+ int len, i, simba;
+ struct resource *res;
+ unsigned int flags;
+ u64 size;
+
+ if (ofpci_verbose)
+ printk("of_scan_pci_bridge(%s)\n", node->full_name);
+
+ /* parse bus-range property */
+ busrange = of_get_property(node, "bus-range", &len);
+ if (busrange == NULL || len != 8) {
+ printk(KERN_DEBUG "Can't get bus-range for PCI-PCI bridge %s\n",
+ node->full_name);
+ return;
+ }
+ ranges = of_get_property(node, "ranges", &len);
+ simba = 0;
+ if (ranges == NULL) {
+ const char *model = of_get_property(node, "model", NULL);
+ if (model && !strcmp(model, "SUNW,simba"))
+ simba = 1;
+ }
+
+ bus = pci_add_new_bus(dev->bus, dev, busrange[0]);
+ if (!bus) {
+ printk(KERN_ERR "Failed to create pci bus for %s\n",
+ node->full_name);
+ return;
+ }
+
+ bus->primary = dev->bus->number;
+ bus->subordinate = busrange[1];
+ bus->bridge_ctl = 0;
+
+ /* parse ranges property, or cook one up by hand for Simba */
+ /* PCI #address-cells == 3 and #size-cells == 2 always */
+ res = &dev->resource[PCI_BRIDGE_RESOURCES];
+ for (i = 0; i < PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES; ++i) {
+ res->flags = 0;
+ bus->resource[i] = res;
+ ++res;
+ }
+ if (simba) {
+ apb_fake_ranges(dev, bus, pbm);
+ goto after_ranges;
+ } else if (ranges == NULL) {
+ pci_cfg_fake_ranges(dev, bus, pbm);
+ goto after_ranges;
+ }
+ i = 1;
+ for (; len >= 32; len -= 32, ranges += 8) {
+ struct resource *root;
+
+ flags = pci_parse_of_flags(ranges[0]);
+ size = GET_64BIT(ranges, 6);
+ if (flags == 0 || size == 0)
+ continue;
+ if (flags & IORESOURCE_IO) {
+ res = bus->resource[0];
+ if (res->flags) {
+ printk(KERN_ERR "PCI: ignoring extra I/O range"
+ " for bridge %s\n", node->full_name);
+ continue;
+ }
+ root = &pbm->io_space;
+ } else {
+ if (i >= PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES) {
+ printk(KERN_ERR "PCI: too many memory ranges"
+ " for bridge %s\n", node->full_name);
+ continue;
+ }
+ res = bus->resource[i];
+ ++i;
+ root = &pbm->mem_space;
+ }
+
+ res->start = GET_64BIT(ranges, 1);
+ res->end = res->start + size - 1;
+ res->flags = flags;
+
+ /* Another way to implement this would be to add an of_device
+ * layer routine that can calculate a resource for a given
+ * range property value in a PCI device.
+ */
+ pci_resource_adjust(res, root);
+ }
+after_ranges:
+ sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus),
+ bus->number);
+ if (ofpci_verbose)
+ printk(" bus name: %s\n", bus->name);
+
+ pci_of_scan_bus(pbm, node, bus);
+}
+
+static void __devinit pci_of_scan_bus(struct pci_pbm_info *pbm,
+ struct device_node *node,
+ struct pci_bus *bus)
+{
+ struct device_node *child;
+ const u32 *reg;
+ int reglen, devfn, prev_devfn;
+ struct pci_dev *dev;
+
+ if (ofpci_verbose)
+ printk("PCI: scan_bus[%s] bus no %d\n",
+ node->full_name, bus->number);
+
+ child = NULL;
+ prev_devfn = -1;
+ while ((child = of_get_next_child(node, child)) != NULL) {
+ if (ofpci_verbose)
+ printk(" * %s\n", child->full_name);
+ reg = of_get_property(child, "reg", &reglen);
+ if (reg == NULL || reglen < 20)
+ continue;
+
+ devfn = (reg[0] >> 8) & 0xff;
+
+ /* This is a workaround for some device trees
+ * which list PCI devices twice. On the V100
+ * for example, device number 3 is listed twice.
+ * Once as "pm" and once again as "lomp".
+ */
+ if (devfn == prev_devfn)
+ continue;
+ prev_devfn = devfn;
+
+ /* create a new pci_dev for this device */
+ dev = of_create_pci_dev(pbm, child, bus, devfn);
+ if (!dev)
+ continue;
+ if (ofpci_verbose)
+ printk("PCI: dev header type: %x\n",
+ dev->hdr_type);
+
+ if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
+ dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
+ of_scan_pci_bridge(pbm, child, dev);
+ }
+}
+
+static ssize_t
+show_pciobppath_attr(struct device * dev, struct device_attribute * attr, char * buf)
+{
+ struct pci_dev *pdev;
+ struct device_node *dp;
+
+ pdev = to_pci_dev(dev);
+ dp = pdev->dev.archdata.prom_node;
+
+ return snprintf (buf, PAGE_SIZE, "%s\n", dp->full_name);
+}
+
+static DEVICE_ATTR(obppath, S_IRUSR | S_IRGRP | S_IROTH, show_pciobppath_attr, NULL);
+
+static void __devinit pci_bus_register_of_sysfs(struct pci_bus *bus)
+{
+ struct pci_dev *dev;
+ struct pci_bus *child_bus;
+ int err;
+
+ list_for_each_entry(dev, &bus->devices, bus_list) {
+ /* we don't really care if we can create this file or
+ * not, but we need to assign the result of the call
+ * or the world will fall under alien invasion and
+ * everybody will be frozen on a spaceship ready to be
+ * eaten on alpha centauri by some green and jelly
+ * humanoid.
+ */
+ err = sysfs_create_file(&dev->dev.kobj, &dev_attr_obppath.attr);
+ }
+ list_for_each_entry(child_bus, &bus->children, node)
+ pci_bus_register_of_sysfs(child_bus);
+}
+
+struct pci_bus * __devinit pci_scan_one_pbm(struct pci_pbm_info *pbm,
+ struct device *parent)
+{
+ struct device_node *node = pbm->op->node;
+ struct pci_bus *bus;
+
+ printk("PCI: Scanning PBM %s\n", node->full_name);
+
+ bus = pci_create_bus(parent, pbm->pci_first_busno, pbm->pci_ops, pbm);
+ if (!bus) {
+ printk(KERN_ERR "Failed to create bus for %s\n",
+ node->full_name);
+ return NULL;
+ }
+ bus->secondary = pbm->pci_first_busno;
+ bus->subordinate = pbm->pci_last_busno;
+
+ bus->resource[0] = &pbm->io_space;
+ bus->resource[1] = &pbm->mem_space;
+
+ pci_of_scan_bus(pbm, node, bus);
+ pci_bus_add_devices(bus);
+ pci_bus_register_of_sysfs(bus);
+
+ return bus;
+}
+
+void __devinit pcibios_fixup_bus(struct pci_bus *pbus)
+{
+ struct pci_pbm_info *pbm = pbus->sysdata;
+
+ /* Generic PCI bus probing sets these to point at
+ * &io{port,mem}_resouce which is wrong for us.
+ */
+ pbus->resource[0] = &pbm->io_space;
+ pbus->resource[1] = &pbm->mem_space;
+}
+
+struct resource *pcibios_select_root(struct pci_dev *pdev, struct resource *r)
+{
+ struct pci_pbm_info *pbm = pdev->bus->sysdata;
+ struct resource *root = NULL;
+
+ if (r->flags & IORESOURCE_IO)
+ root = &pbm->io_space;
+ if (r->flags & IORESOURCE_MEM)
+ root = &pbm->mem_space;
+
+ return root;
+}
+
+void pcibios_update_irq(struct pci_dev *pdev, int irq)
+{
+}
+
+void pcibios_align_resource(void *data, struct resource *res,
+ resource_size_t size, resource_size_t align)
+{
+}
+
+int pcibios_enable_device(struct pci_dev *dev, int mask)
+{
+ u16 cmd, oldcmd;
+ int i;
+
+ pci_read_config_word(dev, PCI_COMMAND, &cmd);
+ oldcmd = cmd;
+
+ for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+ struct resource *res = &dev->resource[i];
+
+ /* Only set up the requested stuff */
+ if (!(mask & (1<<i)))
+ continue;
+
+ if (res->flags & IORESOURCE_IO)
+ cmd |= PCI_COMMAND_IO;
+ if (res->flags & IORESOURCE_MEM)
+ cmd |= PCI_COMMAND_MEMORY;
+ }
+
+ if (cmd != oldcmd) {
+ printk(KERN_DEBUG "PCI: Enabling device: (%s), cmd %x\n",
+ pci_name(dev), cmd);
+ /* Enable the appropriate bits in the PCI command register. */
+ pci_write_config_word(dev, PCI_COMMAND, cmd);
+ }
+ return 0;
+}
+
+void pcibios_resource_to_bus(struct pci_dev *pdev, struct pci_bus_region *region,
+ struct resource *res)
+{
+ struct pci_pbm_info *pbm = pdev->bus->sysdata;
+ struct resource zero_res, *root;
+
+ zero_res.start = 0;
+ zero_res.end = 0;
+ zero_res.flags = res->flags;
+
+ if (res->flags & IORESOURCE_IO)
+ root = &pbm->io_space;
+ else
+ root = &pbm->mem_space;
+
+ pci_resource_adjust(&zero_res, root);
+
+ region->start = res->start - zero_res.start;
+ region->end = res->end - zero_res.start;
+}
+EXPORT_SYMBOL(pcibios_resource_to_bus);
+
+void pcibios_bus_to_resource(struct pci_dev *pdev, struct resource *res,
+ struct pci_bus_region *region)
+{
+ struct pci_pbm_info *pbm = pdev->bus->sysdata;
+ struct resource *root;
+
+ res->start = region->start;
+ res->end = region->end;
+
+ if (res->flags & IORESOURCE_IO)
+ root = &pbm->io_space;
+ else
+ root = &pbm->mem_space;
+
+ pci_resource_adjust(res, root);
+}
+EXPORT_SYMBOL(pcibios_bus_to_resource);
+
+char * __devinit pcibios_setup(char *str)
+{
+ return str;
+}
+
+/* Platform support for /proc/bus/pci/X/Y mmap()s. */
+
+/* If the user uses a host-bridge as the PCI device, he may use
+ * this to perform a raw mmap() of the I/O or MEM space behind
+ * that controller.
+ *
+ * This can be useful for execution of x86 PCI bios initialization code
+ * on a PCI card, like the xfree86 int10 stuff does.
+ */
+static int __pci_mmap_make_offset_bus(struct pci_dev *pdev, struct vm_area_struct *vma,
+ enum pci_mmap_state mmap_state)
+{
+ struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller;
+ unsigned long space_size, user_offset, user_size;
+
+ if (mmap_state == pci_mmap_io) {
+ space_size = (pbm->io_space.end -
+ pbm->io_space.start) + 1;
+ } else {
+ space_size = (pbm->mem_space.end -
+ pbm->mem_space.start) + 1;
+ }
+
+ /* Make sure the request is in range. */
+ user_offset = vma->vm_pgoff << PAGE_SHIFT;
+ user_size = vma->vm_end - vma->vm_start;
+
+ if (user_offset >= space_size ||
+ (user_offset + user_size) > space_size)
+ return -EINVAL;
+
+ if (mmap_state == pci_mmap_io) {
+ vma->vm_pgoff = (pbm->io_space.start +
+ user_offset) >> PAGE_SHIFT;
+ } else {
+ vma->vm_pgoff = (pbm->mem_space.start +
+ user_offset) >> PAGE_SHIFT;
+ }
+
+ return 0;
+}
+
+/* Adjust vm_pgoff of VMA such that it is the physical page offset
+ * corresponding to the 32-bit pci bus offset for DEV requested by the user.
+ *
+ * Basically, the user finds the base address for his device which he wishes
+ * to mmap. They read the 32-bit value from the config space base register,
+ * add whatever PAGE_SIZE multiple offset they wish, and feed this into the
+ * offset parameter of mmap on /proc/bus/pci/XXX for that device.
+ *
+ * Returns negative error code on failure, zero on success.
+ */
+static int __pci_mmap_make_offset(struct pci_dev *pdev,
+ struct vm_area_struct *vma,
+ enum pci_mmap_state mmap_state)
+{
+ unsigned long user_paddr, user_size;
+ int i, err;
+
+ /* First compute the physical address in vma->vm_pgoff,
+ * making sure the user offset is within range in the
+ * appropriate PCI space.
+ */
+ err = __pci_mmap_make_offset_bus(pdev, vma, mmap_state);
+ if (err)
+ return err;
+
+ /* If this is a mapping on a host bridge, any address
+ * is OK.
+ */
+ if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST)
+ return err;
+
+ /* Otherwise make sure it's in the range for one of the
+ * device's resources.
+ */
+ user_paddr = vma->vm_pgoff << PAGE_SHIFT;
+ user_size = vma->vm_end - vma->vm_start;
+
+ for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
+ struct resource *rp = &pdev->resource[i];
+ resource_size_t aligned_end;
+
+ /* Active? */
+ if (!rp->flags)
+ continue;
+
+ /* Same type? */
+ if (i == PCI_ROM_RESOURCE) {
+ if (mmap_state != pci_mmap_mem)
+ continue;
+ } else {
+ if ((mmap_state == pci_mmap_io &&
+ (rp->flags & IORESOURCE_IO) == 0) ||
+ (mmap_state == pci_mmap_mem &&
+ (rp->flags & IORESOURCE_MEM) == 0))
+ continue;
+ }
+
+ /* Align the resource end to the next page address.
+ * PAGE_SIZE intentionally added instead of (PAGE_SIZE - 1),
+ * because actually we need the address of the next byte
+ * after rp->end.
+ */
+ aligned_end = (rp->end + PAGE_SIZE) & PAGE_MASK;
+
+ if ((rp->start <= user_paddr) &&
+ (user_paddr + user_size) <= aligned_end)
+ break;
+ }
+
+ if (i > PCI_ROM_RESOURCE)
+ return -EINVAL;
+
+ return 0;
+}
+
+/* Set vm_flags of VMA, as appropriate for this architecture, for a pci device
+ * mapping.
+ */
+static void __pci_mmap_set_flags(struct pci_dev *dev, struct vm_area_struct *vma,
+ enum pci_mmap_state mmap_state)
+{
+ vma->vm_flags |= (VM_IO | VM_RESERVED);
+}
+
+/* Set vm_page_prot of VMA, as appropriate for this architecture, for a pci
+ * device mapping.
+ */
+static void __pci_mmap_set_pgprot(struct pci_dev *dev, struct vm_area_struct *vma,
+ enum pci_mmap_state mmap_state)
+{
+ /* Our io_remap_pfn_range takes care of this, do nothing. */
+}
+
+/* Perform the actual remap of the pages for a PCI device mapping, as appropriate
+ * for this architecture. The region in the process to map is described by vm_start
+ * and vm_end members of VMA, the base physical address is found in vm_pgoff.
+ * The pci device structure is provided so that architectures may make mapping
+ * decisions on a per-device or per-bus basis.
+ *
+ * Returns a negative error code on failure, zero on success.
+ */
+int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
+ enum pci_mmap_state mmap_state,
+ int write_combine)
+{
+ int ret;
+
+ ret = __pci_mmap_make_offset(dev, vma, mmap_state);
+ if (ret < 0)
+ return ret;
+
+ __pci_mmap_set_flags(dev, vma, mmap_state);
+ __pci_mmap_set_pgprot(dev, vma, mmap_state);
+
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ ret = io_remap_pfn_range(vma, vma->vm_start,
+ vma->vm_pgoff,
+ vma->vm_end - vma->vm_start,
+ vma->vm_page_prot);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+#ifdef CONFIG_NUMA
+int pcibus_to_node(struct pci_bus *pbus)
+{
+ struct pci_pbm_info *pbm = pbus->sysdata;
+
+ return pbm->numa_node;
+}
+EXPORT_SYMBOL(pcibus_to_node);
+#endif
+
+/* Return the domain number for this pci bus */
+
+int pci_domain_nr(struct pci_bus *pbus)
+{
+ struct pci_pbm_info *pbm = pbus->sysdata;
+ int ret;
+
+ if (!pbm) {
+ ret = -ENXIO;
+ } else {
+ ret = pbm->index;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(pci_domain_nr);
+
+#ifdef CONFIG_PCI_MSI
+int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
+{
+ struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller;
+ unsigned int virt_irq;
+
+ if (!pbm->setup_msi_irq)
+ return -EINVAL;
+
+ return pbm->setup_msi_irq(&virt_irq, pdev, desc);
+}
+
+void arch_teardown_msi_irq(unsigned int virt_irq)
+{
+ struct msi_desc *entry = get_irq_msi(virt_irq);
+ struct pci_dev *pdev = entry->dev;
+ struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller;
+
+ if (pbm->teardown_msi_irq)
+ pbm->teardown_msi_irq(virt_irq, pdev);
+}
+#endif /* !(CONFIG_PCI_MSI) */
+
+struct device_node *pci_device_to_OF_node(struct pci_dev *pdev)
+{
+ return pdev->dev.archdata.prom_node;
+}
+EXPORT_SYMBOL(pci_device_to_OF_node);
+
+static void ali_sound_dma_hack(struct pci_dev *pdev, int set_bit)
+{
+ struct pci_dev *ali_isa_bridge;
+ u8 val;
+
+ /* ALI sound chips generate 31-bits of DMA, a special register
+ * determines what bit 31 is emitted as.
+ */
+ ali_isa_bridge = pci_get_device(PCI_VENDOR_ID_AL,
+ PCI_DEVICE_ID_AL_M1533,
+ NULL);
+
+ pci_read_config_byte(ali_isa_bridge, 0x7e, &val);
+ if (set_bit)
+ val |= 0x01;
+ else
+ val &= ~0x01;
+ pci_write_config_byte(ali_isa_bridge, 0x7e, val);
+ pci_dev_put(ali_isa_bridge);
+}
+
+int pci_dma_supported(struct pci_dev *pdev, u64 device_mask)
+{
+ u64 dma_addr_mask;
+
+ if (pdev == NULL) {
+ dma_addr_mask = 0xffffffff;
+ } else {
+ struct iommu *iommu = pdev->dev.archdata.iommu;
+
+ dma_addr_mask = iommu->dma_addr_mask;
+
+ if (pdev->vendor == PCI_VENDOR_ID_AL &&
+ pdev->device == PCI_DEVICE_ID_AL_M5451 &&
+ device_mask == 0x7fffffff) {
+ ali_sound_dma_hack(pdev,
+ (dma_addr_mask & 0x80000000) != 0);
+ return 1;
+ }
+ }
+
+ if (device_mask >= (1UL << 32UL))
+ return 0;
+
+ return (device_mask & dma_addr_mask) == dma_addr_mask;
+}
+
+void pci_resource_to_user(const struct pci_dev *pdev, int bar,
+ const struct resource *rp, resource_size_t *start,
+ resource_size_t *end)
+{
+ struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller;
+ unsigned long offset;
+
+ if (rp->flags & IORESOURCE_IO)
+ offset = pbm->io_space.start;
+ else
+ offset = pbm->mem_space.start;
+
+ *start = rp->start - offset;
+ *end = rp->end - offset;
+}
diff --git a/arch/sparc/kernel/pci_common.c b/arch/sparc/kernel/pci_common.c
new file mode 100644
index 000000000000..23b88082d0b2
--- /dev/null
+++ b/arch/sparc/kernel/pci_common.c
@@ -0,0 +1,545 @@
+/* pci_common.c: PCI controller common support.
+ *
+ * Copyright (C) 1999, 2007 David S. Miller (davem@davemloft.net)
+ */
+
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/of_device.h>
+
+#include <asm/prom.h>
+#include <asm/oplib.h>
+
+#include "pci_impl.h"
+#include "pci_sun4v.h"
+
+static int config_out_of_range(struct pci_pbm_info *pbm,
+ unsigned long bus,
+ unsigned long devfn,
+ unsigned long reg)
+{
+ if (bus < pbm->pci_first_busno ||
+ bus > pbm->pci_last_busno)
+ return 1;
+ return 0;
+}
+
+static void *sun4u_config_mkaddr(struct pci_pbm_info *pbm,
+ unsigned long bus,
+ unsigned long devfn,
+ unsigned long reg)
+{
+ unsigned long rbits = pbm->config_space_reg_bits;
+
+ if (config_out_of_range(pbm, bus, devfn, reg))
+ return NULL;
+
+ reg = (reg & ((1 << rbits) - 1));
+ devfn <<= rbits;
+ bus <<= rbits + 8;
+
+ return (void *) (pbm->config_space | bus | devfn | reg);
+}
+
+/* At least on Sabre, it is necessary to access all PCI host controller
+ * registers at their natural size, otherwise zeros are returned.
+ * Strange but true, and I see no language in the UltraSPARC-IIi
+ * programmer's manual that mentions this even indirectly.
+ */
+static int sun4u_read_pci_cfg_host(struct pci_pbm_info *pbm,
+ unsigned char bus, unsigned int devfn,
+ int where, int size, u32 *value)
+{
+ u32 tmp32, *addr;
+ u16 tmp16;
+ u8 tmp8;
+
+ addr = sun4u_config_mkaddr(pbm, bus, devfn, where);
+ if (!addr)
+ return PCIBIOS_SUCCESSFUL;
+
+ switch (size) {
+ case 1:
+ if (where < 8) {
+ unsigned long align = (unsigned long) addr;
+
+ align &= ~1;
+ pci_config_read16((u16 *)align, &tmp16);
+ if (where & 1)
+ *value = tmp16 >> 8;
+ else
+ *value = tmp16 & 0xff;
+ } else {
+ pci_config_read8((u8 *)addr, &tmp8);
+ *value = (u32) tmp8;
+ }
+ break;
+
+ case 2:
+ if (where < 8) {
+ pci_config_read16((u16 *)addr, &tmp16);
+ *value = (u32) tmp16;
+ } else {
+ pci_config_read8((u8 *)addr, &tmp8);
+ *value = (u32) tmp8;
+ pci_config_read8(((u8 *)addr) + 1, &tmp8);
+ *value |= ((u32) tmp8) << 8;
+ }
+ break;
+
+ case 4:
+ tmp32 = 0xffffffff;
+ sun4u_read_pci_cfg_host(pbm, bus, devfn,
+ where, 2, &tmp32);
+ *value = tmp32;
+
+ tmp32 = 0xffffffff;
+ sun4u_read_pci_cfg_host(pbm, bus, devfn,
+ where + 2, 2, &tmp32);
+ *value |= tmp32 << 16;
+ break;
+ }
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int sun4u_read_pci_cfg(struct pci_bus *bus_dev, unsigned int devfn,
+ int where, int size, u32 *value)
+{
+ struct pci_pbm_info *pbm = bus_dev->sysdata;
+ unsigned char bus = bus_dev->number;
+ u32 *addr;
+ u16 tmp16;
+ u8 tmp8;
+
+ switch (size) {
+ case 1:
+ *value = 0xff;
+ break;
+ case 2:
+ *value = 0xffff;
+ break;
+ case 4:
+ *value = 0xffffffff;
+ break;
+ }
+
+ if (!bus_dev->number && !PCI_SLOT(devfn))
+ return sun4u_read_pci_cfg_host(pbm, bus, devfn, where,
+ size, value);
+
+ addr = sun4u_config_mkaddr(pbm, bus, devfn, where);
+ if (!addr)
+ return PCIBIOS_SUCCESSFUL;
+
+ switch (size) {
+ case 1:
+ pci_config_read8((u8 *)addr, &tmp8);
+ *value = (u32) tmp8;
+ break;
+
+ case 2:
+ if (where & 0x01) {
+ printk("pci_read_config_word: misaligned reg [%x]\n",
+ where);
+ return PCIBIOS_SUCCESSFUL;
+ }
+ pci_config_read16((u16 *)addr, &tmp16);
+ *value = (u32) tmp16;
+ break;
+
+ case 4:
+ if (where & 0x03) {
+ printk("pci_read_config_dword: misaligned reg [%x]\n",
+ where);
+ return PCIBIOS_SUCCESSFUL;
+ }
+ pci_config_read32(addr, value);
+ break;
+ }
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int sun4u_write_pci_cfg_host(struct pci_pbm_info *pbm,
+ unsigned char bus, unsigned int devfn,
+ int where, int size, u32 value)
+{
+ u32 *addr;
+
+ addr = sun4u_config_mkaddr(pbm, bus, devfn, where);
+ if (!addr)
+ return PCIBIOS_SUCCESSFUL;
+
+ switch (size) {
+ case 1:
+ if (where < 8) {
+ unsigned long align = (unsigned long) addr;
+ u16 tmp16;
+
+ align &= ~1;
+ pci_config_read16((u16 *)align, &tmp16);
+ if (where & 1) {
+ tmp16 &= 0x00ff;
+ tmp16 |= value << 8;
+ } else {
+ tmp16 &= 0xff00;
+ tmp16 |= value;
+ }
+ pci_config_write16((u16 *)align, tmp16);
+ } else
+ pci_config_write8((u8 *)addr, value);
+ break;
+ case 2:
+ if (where < 8) {
+ pci_config_write16((u16 *)addr, value);
+ } else {
+ pci_config_write8((u8 *)addr, value & 0xff);
+ pci_config_write8(((u8 *)addr) + 1, value >> 8);
+ }
+ break;
+ case 4:
+ sun4u_write_pci_cfg_host(pbm, bus, devfn,
+ where, 2, value & 0xffff);
+ sun4u_write_pci_cfg_host(pbm, bus, devfn,
+ where + 2, 2, value >> 16);
+ break;
+ }
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int sun4u_write_pci_cfg(struct pci_bus *bus_dev, unsigned int devfn,
+ int where, int size, u32 value)
+{
+ struct pci_pbm_info *pbm = bus_dev->sysdata;
+ unsigned char bus = bus_dev->number;
+ u32 *addr;
+
+ if (!bus_dev->number && !PCI_SLOT(devfn))
+ return sun4u_write_pci_cfg_host(pbm, bus, devfn, where,
+ size, value);
+
+ addr = sun4u_config_mkaddr(pbm, bus, devfn, where);
+ if (!addr)
+ return PCIBIOS_SUCCESSFUL;
+
+ switch (size) {
+ case 1:
+ pci_config_write8((u8 *)addr, value);
+ break;
+
+ case 2:
+ if (where & 0x01) {
+ printk("pci_write_config_word: misaligned reg [%x]\n",
+ where);
+ return PCIBIOS_SUCCESSFUL;
+ }
+ pci_config_write16((u16 *)addr, value);
+ break;
+
+ case 4:
+ if (where & 0x03) {
+ printk("pci_write_config_dword: misaligned reg [%x]\n",
+ where);
+ return PCIBIOS_SUCCESSFUL;
+ }
+ pci_config_write32(addr, value);
+ }
+ return PCIBIOS_SUCCESSFUL;
+}
+
+struct pci_ops sun4u_pci_ops = {
+ .read = sun4u_read_pci_cfg,
+ .write = sun4u_write_pci_cfg,
+};
+
+static int sun4v_read_pci_cfg(struct pci_bus *bus_dev, unsigned int devfn,
+ int where, int size, u32 *value)
+{
+ struct pci_pbm_info *pbm = bus_dev->sysdata;
+ u32 devhandle = pbm->devhandle;
+ unsigned int bus = bus_dev->number;
+ unsigned int device = PCI_SLOT(devfn);
+ unsigned int func = PCI_FUNC(devfn);
+ unsigned long ret;
+
+ if (config_out_of_range(pbm, bus, devfn, where)) {
+ ret = ~0UL;
+ } else {
+ ret = pci_sun4v_config_get(devhandle,
+ HV_PCI_DEVICE_BUILD(bus, device, func),
+ where, size);
+ }
+ switch (size) {
+ case 1:
+ *value = ret & 0xff;
+ break;
+ case 2:
+ *value = ret & 0xffff;
+ break;
+ case 4:
+ *value = ret & 0xffffffff;
+ break;
+ };
+
+
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int sun4v_write_pci_cfg(struct pci_bus *bus_dev, unsigned int devfn,
+ int where, int size, u32 value)
+{
+ struct pci_pbm_info *pbm = bus_dev->sysdata;
+ u32 devhandle = pbm->devhandle;
+ unsigned int bus = bus_dev->number;
+ unsigned int device = PCI_SLOT(devfn);
+ unsigned int func = PCI_FUNC(devfn);
+ unsigned long ret;
+
+ if (config_out_of_range(pbm, bus, devfn, where)) {
+ /* Do nothing. */
+ } else {
+ ret = pci_sun4v_config_put(devhandle,
+ HV_PCI_DEVICE_BUILD(bus, device, func),
+ where, size, value);
+ }
+ return PCIBIOS_SUCCESSFUL;
+}
+
+struct pci_ops sun4v_pci_ops = {
+ .read = sun4v_read_pci_cfg,
+ .write = sun4v_write_pci_cfg,
+};
+
+void pci_get_pbm_props(struct pci_pbm_info *pbm)
+{
+ const u32 *val = of_get_property(pbm->op->node, "bus-range", NULL);
+
+ pbm->pci_first_busno = val[0];
+ pbm->pci_last_busno = val[1];
+
+ val = of_get_property(pbm->op->node, "ino-bitmap", NULL);
+ if (val) {
+ pbm->ino_bitmap = (((u64)val[1] << 32UL) |
+ ((u64)val[0] << 0UL));
+ }
+}
+
+static void pci_register_legacy_regions(struct resource *io_res,
+ struct resource *mem_res)
+{
+ struct resource *p;
+
+ /* VGA Video RAM. */
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return;
+
+ p->name = "Video RAM area";
+ p->start = mem_res->start + 0xa0000UL;
+ p->end = p->start + 0x1ffffUL;
+ p->flags = IORESOURCE_BUSY;
+ request_resource(mem_res, p);
+
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return;
+
+ p->name = "System ROM";
+ p->start = mem_res->start + 0xf0000UL;
+ p->end = p->start + 0xffffUL;
+ p->flags = IORESOURCE_BUSY;
+ request_resource(mem_res, p);
+
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return;
+
+ p->name = "Video ROM";
+ p->start = mem_res->start + 0xc0000UL;
+ p->end = p->start + 0x7fffUL;
+ p->flags = IORESOURCE_BUSY;
+ request_resource(mem_res, p);
+}
+
+static void pci_register_iommu_region(struct pci_pbm_info *pbm)
+{
+ const u32 *vdma = of_get_property(pbm->op->node, "virtual-dma", NULL);
+
+ if (vdma) {
+ struct resource *rp = kmalloc(sizeof(*rp), GFP_KERNEL);
+
+ if (!rp) {
+ prom_printf("Cannot allocate IOMMU resource.\n");
+ prom_halt();
+ }
+ rp->name = "IOMMU";
+ rp->start = pbm->mem_space.start + (unsigned long) vdma[0];
+ rp->end = rp->start + (unsigned long) vdma[1] - 1UL;
+ rp->flags = IORESOURCE_BUSY;
+ request_resource(&pbm->mem_space, rp);
+ }
+}
+
+void pci_determine_mem_io_space(struct pci_pbm_info *pbm)
+{
+ const struct linux_prom_pci_ranges *pbm_ranges;
+ int i, saw_mem, saw_io;
+ int num_pbm_ranges;
+
+ saw_mem = saw_io = 0;
+ pbm_ranges = of_get_property(pbm->op->node, "ranges", &i);
+ if (!pbm_ranges) {
+ prom_printf("PCI: Fatal error, missing PBM ranges property "
+ " for %s\n",
+ pbm->name);
+ prom_halt();
+ }
+
+ num_pbm_ranges = i / sizeof(*pbm_ranges);
+
+ for (i = 0; i < num_pbm_ranges; i++) {
+ const struct linux_prom_pci_ranges *pr = &pbm_ranges[i];
+ unsigned long a, size;
+ u32 parent_phys_hi, parent_phys_lo;
+ u32 size_hi, size_lo;
+ int type;
+
+ parent_phys_hi = pr->parent_phys_hi;
+ parent_phys_lo = pr->parent_phys_lo;
+ if (tlb_type == hypervisor)
+ parent_phys_hi &= 0x0fffffff;
+
+ size_hi = pr->size_hi;
+ size_lo = pr->size_lo;
+
+ type = (pr->child_phys_hi >> 24) & 0x3;
+ a = (((unsigned long)parent_phys_hi << 32UL) |
+ ((unsigned long)parent_phys_lo << 0UL));
+ size = (((unsigned long)size_hi << 32UL) |
+ ((unsigned long)size_lo << 0UL));
+
+ switch (type) {
+ case 0:
+ /* PCI config space, 16MB */
+ pbm->config_space = a;
+ break;
+
+ case 1:
+ /* 16-bit IO space, 16MB */
+ pbm->io_space.start = a;
+ pbm->io_space.end = a + size - 1UL;
+ pbm->io_space.flags = IORESOURCE_IO;
+ saw_io = 1;
+ break;
+
+ case 2:
+ /* 32-bit MEM space, 2GB */
+ pbm->mem_space.start = a;
+ pbm->mem_space.end = a + size - 1UL;
+ pbm->mem_space.flags = IORESOURCE_MEM;
+ saw_mem = 1;
+ break;
+
+ case 3:
+ /* XXX 64-bit MEM handling XXX */
+
+ default:
+ break;
+ };
+ }
+
+ if (!saw_io || !saw_mem) {
+ prom_printf("%s: Fatal error, missing %s PBM range.\n",
+ pbm->name,
+ (!saw_io ? "IO" : "MEM"));
+ prom_halt();
+ }
+
+ printk("%s: PCI IO[%lx] MEM[%lx]\n",
+ pbm->name,
+ pbm->io_space.start,
+ pbm->mem_space.start);
+
+ pbm->io_space.name = pbm->mem_space.name = pbm->name;
+
+ request_resource(&ioport_resource, &pbm->io_space);
+ request_resource(&iomem_resource, &pbm->mem_space);
+
+ pci_register_legacy_regions(&pbm->io_space,
+ &pbm->mem_space);
+ pci_register_iommu_region(pbm);
+}
+
+/* Generic helper routines for PCI error reporting. */
+void pci_scan_for_target_abort(struct pci_pbm_info *pbm,
+ struct pci_bus *pbus)
+{
+ struct pci_dev *pdev;
+ struct pci_bus *bus;
+
+ list_for_each_entry(pdev, &pbus->devices, bus_list) {
+ u16 status, error_bits;
+
+ pci_read_config_word(pdev, PCI_STATUS, &status);
+ error_bits =
+ (status & (PCI_STATUS_SIG_TARGET_ABORT |
+ PCI_STATUS_REC_TARGET_ABORT));
+ if (error_bits) {
+ pci_write_config_word(pdev, PCI_STATUS, error_bits);
+ printk("%s: Device %s saw Target Abort [%016x]\n",
+ pbm->name, pci_name(pdev), status);
+ }
+ }
+
+ list_for_each_entry(bus, &pbus->children, node)
+ pci_scan_for_target_abort(pbm, bus);
+}
+
+void pci_scan_for_master_abort(struct pci_pbm_info *pbm,
+ struct pci_bus *pbus)
+{
+ struct pci_dev *pdev;
+ struct pci_bus *bus;
+
+ list_for_each_entry(pdev, &pbus->devices, bus_list) {
+ u16 status, error_bits;
+
+ pci_read_config_word(pdev, PCI_STATUS, &status);
+ error_bits =
+ (status & (PCI_STATUS_REC_MASTER_ABORT));
+ if (error_bits) {
+ pci_write_config_word(pdev, PCI_STATUS, error_bits);
+ printk("%s: Device %s received Master Abort [%016x]\n",
+ pbm->name, pci_name(pdev), status);
+ }
+ }
+
+ list_for_each_entry(bus, &pbus->children, node)
+ pci_scan_for_master_abort(pbm, bus);
+}
+
+void pci_scan_for_parity_error(struct pci_pbm_info *pbm,
+ struct pci_bus *pbus)
+{
+ struct pci_dev *pdev;
+ struct pci_bus *bus;
+
+ list_for_each_entry(pdev, &pbus->devices, bus_list) {
+ u16 status, error_bits;
+
+ pci_read_config_word(pdev, PCI_STATUS, &status);
+ error_bits =
+ (status & (PCI_STATUS_PARITY |
+ PCI_STATUS_DETECTED_PARITY));
+ if (error_bits) {
+ pci_write_config_word(pdev, PCI_STATUS, error_bits);
+ printk("%s: Device %s saw Parity Error [%016x]\n",
+ pbm->name, pci_name(pdev), status);
+ }
+ }
+
+ list_for_each_entry(bus, &pbus->children, node)
+ pci_scan_for_parity_error(pbm, bus);
+}
diff --git a/arch/sparc/kernel/pci_fire.c b/arch/sparc/kernel/pci_fire.c
new file mode 100644
index 000000000000..9462b68f4894
--- /dev/null
+++ b/arch/sparc/kernel/pci_fire.c
@@ -0,0 +1,521 @@
+/* pci_fire.c: Sun4u platform PCI-E controller support.
+ *
+ * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
+ */
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/msi.h>
+#include <linux/irq.h>
+#include <linux/of_device.h>
+
+#include <asm/prom.h>
+#include <asm/irq.h>
+#include <asm/upa.h>
+
+#include "pci_impl.h"
+
+#define DRIVER_NAME "fire"
+#define PFX DRIVER_NAME ": "
+
+#define FIRE_IOMMU_CONTROL 0x40000UL
+#define FIRE_IOMMU_TSBBASE 0x40008UL
+#define FIRE_IOMMU_FLUSH 0x40100UL
+#define FIRE_IOMMU_FLUSHINV 0x40108UL
+
+static int pci_fire_pbm_iommu_init(struct pci_pbm_info *pbm)
+{
+ struct iommu *iommu = pbm->iommu;
+ u32 vdma[2], dma_mask;
+ u64 control;
+ int tsbsize, err;
+
+ /* No virtual-dma property on these guys, use largest size. */
+ vdma[0] = 0xc0000000; /* base */
+ vdma[1] = 0x40000000; /* size */
+ dma_mask = 0xffffffff;
+ tsbsize = 128;
+
+ /* Register addresses. */
+ iommu->iommu_control = pbm->pbm_regs + FIRE_IOMMU_CONTROL;
+ iommu->iommu_tsbbase = pbm->pbm_regs + FIRE_IOMMU_TSBBASE;
+ iommu->iommu_flush = pbm->pbm_regs + FIRE_IOMMU_FLUSH;
+ iommu->iommu_flushinv = pbm->pbm_regs + FIRE_IOMMU_FLUSHINV;
+
+ /* We use the main control/status register of FIRE as the write
+ * completion register.
+ */
+ iommu->write_complete_reg = pbm->controller_regs + 0x410000UL;
+
+ /*
+ * Invalidate TLB Entries.
+ */
+ upa_writeq(~(u64)0, iommu->iommu_flushinv);
+
+ err = iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask,
+ pbm->numa_node);
+ if (err)
+ return err;
+
+ upa_writeq(__pa(iommu->page_table) | 0x7UL, iommu->iommu_tsbbase);
+
+ control = upa_readq(iommu->iommu_control);
+ control |= (0x00000400 /* TSB cache snoop enable */ |
+ 0x00000300 /* Cache mode */ |
+ 0x00000002 /* Bypass enable */ |
+ 0x00000001 /* Translation enable */);
+ upa_writeq(control, iommu->iommu_control);
+
+ return 0;
+}
+
+#ifdef CONFIG_PCI_MSI
+struct pci_msiq_entry {
+ u64 word0;
+#define MSIQ_WORD0_RESV 0x8000000000000000UL
+#define MSIQ_WORD0_FMT_TYPE 0x7f00000000000000UL
+#define MSIQ_WORD0_FMT_TYPE_SHIFT 56
+#define MSIQ_WORD0_LEN 0x00ffc00000000000UL
+#define MSIQ_WORD0_LEN_SHIFT 46
+#define MSIQ_WORD0_ADDR0 0x00003fff00000000UL
+#define MSIQ_WORD0_ADDR0_SHIFT 32
+#define MSIQ_WORD0_RID 0x00000000ffff0000UL
+#define MSIQ_WORD0_RID_SHIFT 16
+#define MSIQ_WORD0_DATA0 0x000000000000ffffUL
+#define MSIQ_WORD0_DATA0_SHIFT 0
+
+#define MSIQ_TYPE_MSG 0x6
+#define MSIQ_TYPE_MSI32 0xb
+#define MSIQ_TYPE_MSI64 0xf
+
+ u64 word1;
+#define MSIQ_WORD1_ADDR1 0xffffffffffff0000UL
+#define MSIQ_WORD1_ADDR1_SHIFT 16
+#define MSIQ_WORD1_DATA1 0x000000000000ffffUL
+#define MSIQ_WORD1_DATA1_SHIFT 0
+
+ u64 resv[6];
+};
+
+/* All MSI registers are offset from pbm->pbm_regs */
+#define EVENT_QUEUE_BASE_ADDR_REG 0x010000UL
+#define EVENT_QUEUE_BASE_ADDR_ALL_ONES 0xfffc000000000000UL
+
+#define EVENT_QUEUE_CONTROL_SET(EQ) (0x011000UL + (EQ) * 0x8UL)
+#define EVENT_QUEUE_CONTROL_SET_OFLOW 0x0200000000000000UL
+#define EVENT_QUEUE_CONTROL_SET_EN 0x0000100000000000UL
+
+#define EVENT_QUEUE_CONTROL_CLEAR(EQ) (0x011200UL + (EQ) * 0x8UL)
+#define EVENT_QUEUE_CONTROL_CLEAR_OF 0x0200000000000000UL
+#define EVENT_QUEUE_CONTROL_CLEAR_E2I 0x0000800000000000UL
+#define EVENT_QUEUE_CONTROL_CLEAR_DIS 0x0000100000000000UL
+
+#define EVENT_QUEUE_STATE(EQ) (0x011400UL + (EQ) * 0x8UL)
+#define EVENT_QUEUE_STATE_MASK 0x0000000000000007UL
+#define EVENT_QUEUE_STATE_IDLE 0x0000000000000001UL
+#define EVENT_QUEUE_STATE_ACTIVE 0x0000000000000002UL
+#define EVENT_QUEUE_STATE_ERROR 0x0000000000000004UL
+
+#define EVENT_QUEUE_TAIL(EQ) (0x011600UL + (EQ) * 0x8UL)
+#define EVENT_QUEUE_TAIL_OFLOW 0x0200000000000000UL
+#define EVENT_QUEUE_TAIL_VAL 0x000000000000007fUL
+
+#define EVENT_QUEUE_HEAD(EQ) (0x011800UL + (EQ) * 0x8UL)
+#define EVENT_QUEUE_HEAD_VAL 0x000000000000007fUL
+
+#define MSI_MAP(MSI) (0x020000UL + (MSI) * 0x8UL)
+#define MSI_MAP_VALID 0x8000000000000000UL
+#define MSI_MAP_EQWR_N 0x4000000000000000UL
+#define MSI_MAP_EQNUM 0x000000000000003fUL
+
+#define MSI_CLEAR(MSI) (0x028000UL + (MSI) * 0x8UL)
+#define MSI_CLEAR_EQWR_N 0x4000000000000000UL
+
+#define IMONDO_DATA0 0x02C000UL
+#define IMONDO_DATA0_DATA 0xffffffffffffffc0UL
+
+#define IMONDO_DATA1 0x02C008UL
+#define IMONDO_DATA1_DATA 0xffffffffffffffffUL
+
+#define MSI_32BIT_ADDR 0x034000UL
+#define MSI_32BIT_ADDR_VAL 0x00000000ffff0000UL
+
+#define MSI_64BIT_ADDR 0x034008UL
+#define MSI_64BIT_ADDR_VAL 0xffffffffffff0000UL
+
+static int pci_fire_get_head(struct pci_pbm_info *pbm, unsigned long msiqid,
+ unsigned long *head)
+{
+ *head = upa_readq(pbm->pbm_regs + EVENT_QUEUE_HEAD(msiqid));
+ return 0;
+}
+
+static int pci_fire_dequeue_msi(struct pci_pbm_info *pbm, unsigned long msiqid,
+ unsigned long *head, unsigned long *msi)
+{
+ unsigned long type_fmt, type, msi_num;
+ struct pci_msiq_entry *base, *ep;
+
+ base = (pbm->msi_queues + ((msiqid - pbm->msiq_first) * 8192));
+ ep = &base[*head];
+
+ if ((ep->word0 & MSIQ_WORD0_FMT_TYPE) == 0)
+ return 0;
+
+ type_fmt = ((ep->word0 & MSIQ_WORD0_FMT_TYPE) >>
+ MSIQ_WORD0_FMT_TYPE_SHIFT);
+ type = (type_fmt >> 3);
+ if (unlikely(type != MSIQ_TYPE_MSI32 &&
+ type != MSIQ_TYPE_MSI64))
+ return -EINVAL;
+
+ *msi = msi_num = ((ep->word0 & MSIQ_WORD0_DATA0) >>
+ MSIQ_WORD0_DATA0_SHIFT);
+
+ upa_writeq(MSI_CLEAR_EQWR_N, pbm->pbm_regs + MSI_CLEAR(msi_num));
+
+ /* Clear the entry. */
+ ep->word0 &= ~MSIQ_WORD0_FMT_TYPE;
+
+ /* Go to next entry in ring. */
+ (*head)++;
+ if (*head >= pbm->msiq_ent_count)
+ *head = 0;
+
+ return 1;
+}
+
+static int pci_fire_set_head(struct pci_pbm_info *pbm, unsigned long msiqid,
+ unsigned long head)
+{
+ upa_writeq(head, pbm->pbm_regs + EVENT_QUEUE_HEAD(msiqid));
+ return 0;
+}
+
+static int pci_fire_msi_setup(struct pci_pbm_info *pbm, unsigned long msiqid,
+ unsigned long msi, int is_msi64)
+{
+ u64 val;
+
+ val = upa_readq(pbm->pbm_regs + MSI_MAP(msi));
+ val &= ~(MSI_MAP_EQNUM);
+ val |= msiqid;
+ upa_writeq(val, pbm->pbm_regs + MSI_MAP(msi));
+
+ upa_writeq(MSI_CLEAR_EQWR_N, pbm->pbm_regs + MSI_CLEAR(msi));
+
+ val = upa_readq(pbm->pbm_regs + MSI_MAP(msi));
+ val |= MSI_MAP_VALID;
+ upa_writeq(val, pbm->pbm_regs + MSI_MAP(msi));
+
+ return 0;
+}
+
+static int pci_fire_msi_teardown(struct pci_pbm_info *pbm, unsigned long msi)
+{
+ unsigned long msiqid;
+ u64 val;
+
+ val = upa_readq(pbm->pbm_regs + MSI_MAP(msi));
+ msiqid = (val & MSI_MAP_EQNUM);
+
+ val &= ~MSI_MAP_VALID;
+
+ upa_writeq(val, pbm->pbm_regs + MSI_MAP(msi));
+
+ return 0;
+}
+
+static int pci_fire_msiq_alloc(struct pci_pbm_info *pbm)
+{
+ unsigned long pages, order, i;
+
+ order = get_order(512 * 1024);
+ pages = __get_free_pages(GFP_KERNEL | __GFP_COMP, order);
+ if (pages == 0UL) {
+ printk(KERN_ERR "MSI: Cannot allocate MSI queues (o=%lu).\n",
+ order);
+ return -ENOMEM;
+ }
+ memset((char *)pages, 0, PAGE_SIZE << order);
+ pbm->msi_queues = (void *) pages;
+
+ upa_writeq((EVENT_QUEUE_BASE_ADDR_ALL_ONES |
+ __pa(pbm->msi_queues)),
+ pbm->pbm_regs + EVENT_QUEUE_BASE_ADDR_REG);
+
+ upa_writeq(pbm->portid << 6, pbm->pbm_regs + IMONDO_DATA0);
+ upa_writeq(0, pbm->pbm_regs + IMONDO_DATA1);
+
+ upa_writeq(pbm->msi32_start, pbm->pbm_regs + MSI_32BIT_ADDR);
+ upa_writeq(pbm->msi64_start, pbm->pbm_regs + MSI_64BIT_ADDR);
+
+ for (i = 0; i < pbm->msiq_num; i++) {
+ upa_writeq(0, pbm->pbm_regs + EVENT_QUEUE_HEAD(i));
+ upa_writeq(0, pbm->pbm_regs + EVENT_QUEUE_TAIL(i));
+ }
+
+ return 0;
+}
+
+static void pci_fire_msiq_free(struct pci_pbm_info *pbm)
+{
+ unsigned long pages, order;
+
+ order = get_order(512 * 1024);
+ pages = (unsigned long) pbm->msi_queues;
+
+ free_pages(pages, order);
+
+ pbm->msi_queues = NULL;
+}
+
+static int pci_fire_msiq_build_irq(struct pci_pbm_info *pbm,
+ unsigned long msiqid,
+ unsigned long devino)
+{
+ unsigned long cregs = (unsigned long) pbm->pbm_regs;
+ unsigned long imap_reg, iclr_reg, int_ctrlr;
+ unsigned int virt_irq;
+ int fixup;
+ u64 val;
+
+ imap_reg = cregs + (0x001000UL + (devino * 0x08UL));
+ iclr_reg = cregs + (0x001400UL + (devino * 0x08UL));
+
+ /* XXX iterate amongst the 4 IRQ controllers XXX */
+ int_ctrlr = (1UL << 6);
+
+ val = upa_readq(imap_reg);
+ val |= (1UL << 63) | int_ctrlr;
+ upa_writeq(val, imap_reg);
+
+ fixup = ((pbm->portid << 6) | devino) - int_ctrlr;
+
+ virt_irq = build_irq(fixup, iclr_reg, imap_reg);
+ if (!virt_irq)
+ return -ENOMEM;
+
+ upa_writeq(EVENT_QUEUE_CONTROL_SET_EN,
+ pbm->pbm_regs + EVENT_QUEUE_CONTROL_SET(msiqid));
+
+ return virt_irq;
+}
+
+static const struct sparc64_msiq_ops pci_fire_msiq_ops = {
+ .get_head = pci_fire_get_head,
+ .dequeue_msi = pci_fire_dequeue_msi,
+ .set_head = pci_fire_set_head,
+ .msi_setup = pci_fire_msi_setup,
+ .msi_teardown = pci_fire_msi_teardown,
+ .msiq_alloc = pci_fire_msiq_alloc,
+ .msiq_free = pci_fire_msiq_free,
+ .msiq_build_irq = pci_fire_msiq_build_irq,
+};
+
+static void pci_fire_msi_init(struct pci_pbm_info *pbm)
+{
+ sparc64_pbm_msi_init(pbm, &pci_fire_msiq_ops);
+}
+#else /* CONFIG_PCI_MSI */
+static void pci_fire_msi_init(struct pci_pbm_info *pbm)
+{
+}
+#endif /* !(CONFIG_PCI_MSI) */
+
+/* Based at pbm->controller_regs */
+#define FIRE_PARITY_CONTROL 0x470010UL
+#define FIRE_PARITY_ENAB 0x8000000000000000UL
+#define FIRE_FATAL_RESET_CTL 0x471028UL
+#define FIRE_FATAL_RESET_SPARE 0x0000000004000000UL
+#define FIRE_FATAL_RESET_MB 0x0000000002000000UL
+#define FIRE_FATAL_RESET_CPE 0x0000000000008000UL
+#define FIRE_FATAL_RESET_APE 0x0000000000004000UL
+#define FIRE_FATAL_RESET_PIO 0x0000000000000040UL
+#define FIRE_FATAL_RESET_JW 0x0000000000000004UL
+#define FIRE_FATAL_RESET_JI 0x0000000000000002UL
+#define FIRE_FATAL_RESET_JR 0x0000000000000001UL
+#define FIRE_CORE_INTR_ENABLE 0x471800UL
+
+/* Based at pbm->pbm_regs */
+#define FIRE_TLU_CTRL 0x80000UL
+#define FIRE_TLU_CTRL_TIM 0x00000000da000000UL
+#define FIRE_TLU_CTRL_QDET 0x0000000000000100UL
+#define FIRE_TLU_CTRL_CFG 0x0000000000000001UL
+#define FIRE_TLU_DEV_CTRL 0x90008UL
+#define FIRE_TLU_LINK_CTRL 0x90020UL
+#define FIRE_TLU_LINK_CTRL_CLK 0x0000000000000040UL
+#define FIRE_LPU_RESET 0xe2008UL
+#define FIRE_LPU_LLCFG 0xe2200UL
+#define FIRE_LPU_LLCFG_VC0 0x0000000000000100UL
+#define FIRE_LPU_FCTRL_UCTRL 0xe2240UL
+#define FIRE_LPU_FCTRL_UCTRL_N 0x0000000000000002UL
+#define FIRE_LPU_FCTRL_UCTRL_P 0x0000000000000001UL
+#define FIRE_LPU_TXL_FIFOP 0xe2430UL
+#define FIRE_LPU_LTSSM_CFG2 0xe2788UL
+#define FIRE_LPU_LTSSM_CFG3 0xe2790UL
+#define FIRE_LPU_LTSSM_CFG4 0xe2798UL
+#define FIRE_LPU_LTSSM_CFG5 0xe27a0UL
+#define FIRE_DMC_IENAB 0x31800UL
+#define FIRE_DMC_DBG_SEL_A 0x53000UL
+#define FIRE_DMC_DBG_SEL_B 0x53008UL
+#define FIRE_PEC_IENAB 0x51800UL
+
+static void pci_fire_hw_init(struct pci_pbm_info *pbm)
+{
+ u64 val;
+
+ upa_writeq(FIRE_PARITY_ENAB,
+ pbm->controller_regs + FIRE_PARITY_CONTROL);
+
+ upa_writeq((FIRE_FATAL_RESET_SPARE |
+ FIRE_FATAL_RESET_MB |
+ FIRE_FATAL_RESET_CPE |
+ FIRE_FATAL_RESET_APE |
+ FIRE_FATAL_RESET_PIO |
+ FIRE_FATAL_RESET_JW |
+ FIRE_FATAL_RESET_JI |
+ FIRE_FATAL_RESET_JR),
+ pbm->controller_regs + FIRE_FATAL_RESET_CTL);
+
+ upa_writeq(~(u64)0, pbm->controller_regs + FIRE_CORE_INTR_ENABLE);
+
+ val = upa_readq(pbm->pbm_regs + FIRE_TLU_CTRL);
+ val |= (FIRE_TLU_CTRL_TIM |
+ FIRE_TLU_CTRL_QDET |
+ FIRE_TLU_CTRL_CFG);
+ upa_writeq(val, pbm->pbm_regs + FIRE_TLU_CTRL);
+ upa_writeq(0, pbm->pbm_regs + FIRE_TLU_DEV_CTRL);
+ upa_writeq(FIRE_TLU_LINK_CTRL_CLK,
+ pbm->pbm_regs + FIRE_TLU_LINK_CTRL);
+
+ upa_writeq(0, pbm->pbm_regs + FIRE_LPU_RESET);
+ upa_writeq(FIRE_LPU_LLCFG_VC0, pbm->pbm_regs + FIRE_LPU_LLCFG);
+ upa_writeq((FIRE_LPU_FCTRL_UCTRL_N | FIRE_LPU_FCTRL_UCTRL_P),
+ pbm->pbm_regs + FIRE_LPU_FCTRL_UCTRL);
+ upa_writeq(((0xffff << 16) | (0x0000 << 0)),
+ pbm->pbm_regs + FIRE_LPU_TXL_FIFOP);
+ upa_writeq(3000000, pbm->pbm_regs + FIRE_LPU_LTSSM_CFG2);
+ upa_writeq(500000, pbm->pbm_regs + FIRE_LPU_LTSSM_CFG3);
+ upa_writeq((2 << 16) | (140 << 8),
+ pbm->pbm_regs + FIRE_LPU_LTSSM_CFG4);
+ upa_writeq(0, pbm->pbm_regs + FIRE_LPU_LTSSM_CFG5);
+
+ upa_writeq(~(u64)0, pbm->pbm_regs + FIRE_DMC_IENAB);
+ upa_writeq(0, pbm->pbm_regs + FIRE_DMC_DBG_SEL_A);
+ upa_writeq(0, pbm->pbm_regs + FIRE_DMC_DBG_SEL_B);
+
+ upa_writeq(~(u64)0, pbm->pbm_regs + FIRE_PEC_IENAB);
+}
+
+static int __init pci_fire_pbm_init(struct pci_pbm_info *pbm,
+ struct of_device *op, u32 portid)
+{
+ const struct linux_prom64_registers *regs;
+ struct device_node *dp = op->node;
+ int err;
+
+ pbm->numa_node = -1;
+
+ pbm->pci_ops = &sun4u_pci_ops;
+ pbm->config_space_reg_bits = 12;
+
+ pbm->index = pci_num_pbms++;
+
+ pbm->portid = portid;
+ pbm->op = op;
+ pbm->name = dp->full_name;
+
+ regs = of_get_property(dp, "reg", NULL);
+ pbm->pbm_regs = regs[0].phys_addr;
+ pbm->controller_regs = regs[1].phys_addr - 0x410000UL;
+
+ printk("%s: SUN4U PCIE Bus Module\n", pbm->name);
+
+ pci_determine_mem_io_space(pbm);
+
+ pci_get_pbm_props(pbm);
+
+ pci_fire_hw_init(pbm);
+
+ err = pci_fire_pbm_iommu_init(pbm);
+ if (err)
+ return err;
+
+ pci_fire_msi_init(pbm);
+
+ pbm->pci_bus = pci_scan_one_pbm(pbm, &op->dev);
+
+ /* XXX register error interrupt handlers XXX */
+
+ pbm->next = pci_pbm_root;
+ pci_pbm_root = pbm;
+
+ return 0;
+}
+
+static int __devinit fire_probe(struct of_device *op,
+ const struct of_device_id *match)
+{
+ struct device_node *dp = op->node;
+ struct pci_pbm_info *pbm;
+ struct iommu *iommu;
+ u32 portid;
+ int err;
+
+ portid = of_getintprop_default(dp, "portid", 0xff);
+
+ err = -ENOMEM;
+ pbm = kzalloc(sizeof(*pbm), GFP_KERNEL);
+ if (!pbm) {
+ printk(KERN_ERR PFX "Cannot allocate pci_pbminfo.\n");
+ goto out_err;
+ }
+
+ iommu = kzalloc(sizeof(struct iommu), GFP_KERNEL);
+ if (!iommu) {
+ printk(KERN_ERR PFX "Cannot allocate PBM iommu.\n");
+ goto out_free_controller;
+ }
+
+ pbm->iommu = iommu;
+
+ err = pci_fire_pbm_init(pbm, op, portid);
+ if (err)
+ goto out_free_iommu;
+
+ dev_set_drvdata(&op->dev, pbm);
+
+ return 0;
+
+out_free_iommu:
+ kfree(pbm->iommu);
+
+out_free_controller:
+ kfree(pbm);
+
+out_err:
+ return err;
+}
+
+static struct of_device_id __initdata fire_match[] = {
+ {
+ .name = "pci",
+ .compatible = "pciex108e,80f0",
+ },
+ {},
+};
+
+static struct of_platform_driver fire_driver = {
+ .name = DRIVER_NAME,
+ .match_table = fire_match,
+ .probe = fire_probe,
+};
+
+static int __init fire_init(void)
+{
+ return of_register_driver(&fire_driver, &of_bus_type);
+}
+
+subsys_initcall(fire_init);
diff --git a/arch/sparc/kernel/pci_impl.h b/arch/sparc/kernel/pci_impl.h
new file mode 100644
index 000000000000..03186824327e
--- /dev/null
+++ b/arch/sparc/kernel/pci_impl.h
@@ -0,0 +1,185 @@
+/* pci_impl.h: Helper definitions for PCI controller support.
+ *
+ * Copyright (C) 1999, 2007 David S. Miller (davem@davemloft.net)
+ */
+
+#ifndef PCI_IMPL_H
+#define PCI_IMPL_H
+
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/msi.h>
+#include <linux/of_device.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/iommu.h>
+
+/* The abstraction used here is that there are PCI controllers,
+ * each with one (Sabre) or two (PSYCHO/SCHIZO) PCI bus modules
+ * underneath. Each PCI bus module uses an IOMMU (shared by both
+ * PBMs of a controller, or per-PBM), and if a streaming buffer
+ * is present, each PCI bus module has it's own. (ie. the IOMMU
+ * might be shared between PBMs, the STC is never shared)
+ * Furthermore, each PCI bus module controls it's own autonomous
+ * PCI bus.
+ */
+
+#define PCI_STC_FLUSHFLAG_INIT(STC) \
+ (*((STC)->strbuf_flushflag) = 0UL)
+#define PCI_STC_FLUSHFLAG_SET(STC) \
+ (*((STC)->strbuf_flushflag) != 0UL)
+
+#ifdef CONFIG_PCI_MSI
+struct pci_pbm_info;
+struct sparc64_msiq_ops {
+ int (*get_head)(struct pci_pbm_info *pbm, unsigned long msiqid,
+ unsigned long *head);
+ int (*dequeue_msi)(struct pci_pbm_info *pbm, unsigned long msiqid,
+ unsigned long *head, unsigned long *msi);
+ int (*set_head)(struct pci_pbm_info *pbm, unsigned long msiqid,
+ unsigned long head);
+ int (*msi_setup)(struct pci_pbm_info *pbm, unsigned long msiqid,
+ unsigned long msi, int is_msi64);
+ int (*msi_teardown)(struct pci_pbm_info *pbm, unsigned long msi);
+ int (*msiq_alloc)(struct pci_pbm_info *pbm);
+ void (*msiq_free)(struct pci_pbm_info *pbm);
+ int (*msiq_build_irq)(struct pci_pbm_info *pbm, unsigned long msiqid,
+ unsigned long devino);
+};
+
+extern void sparc64_pbm_msi_init(struct pci_pbm_info *pbm,
+ const struct sparc64_msiq_ops *ops);
+
+struct sparc64_msiq_cookie {
+ struct pci_pbm_info *pbm;
+ unsigned long msiqid;
+};
+#endif
+
+struct pci_pbm_info {
+ struct pci_pbm_info *next;
+ struct pci_pbm_info *sibling;
+ int index;
+
+ /* Physical address base of controller registers. */
+ unsigned long controller_regs;
+
+ /* Physical address base of PBM registers. */
+ unsigned long pbm_regs;
+
+ /* Physical address of DMA sync register, if any. */
+ unsigned long sync_reg;
+
+ /* Opaque 32-bit system bus Port ID. */
+ u32 portid;
+
+ /* Opaque 32-bit handle used for hypervisor calls. */
+ u32 devhandle;
+
+ /* Chipset version information. */
+ int chip_type;
+#define PBM_CHIP_TYPE_SABRE 1
+#define PBM_CHIP_TYPE_PSYCHO 2
+#define PBM_CHIP_TYPE_SCHIZO 3
+#define PBM_CHIP_TYPE_SCHIZO_PLUS 4
+#define PBM_CHIP_TYPE_TOMATILLO 5
+ int chip_version;
+ int chip_revision;
+
+ /* Name used for top-level resources. */
+ char *name;
+
+ /* OBP specific information. */
+ struct of_device *op;
+ u64 ino_bitmap;
+
+ /* PBM I/O and Memory space resources. */
+ struct resource io_space;
+ struct resource mem_space;
+
+ /* Base of PCI Config space, can be per-PBM or shared. */
+ unsigned long config_space;
+
+ /* This will be 12 on PCI-E controllers, 8 elsewhere. */
+ unsigned long config_space_reg_bits;
+
+ unsigned long pci_afsr;
+ unsigned long pci_afar;
+ unsigned long pci_csr;
+
+ /* State of 66MHz capabilities on this PBM. */
+ int is_66mhz_capable;
+ int all_devs_66mhz;
+
+#ifdef CONFIG_PCI_MSI
+ /* MSI info. */
+ u32 msiq_num;
+ u32 msiq_ent_count;
+ u32 msiq_first;
+ u32 msiq_first_devino;
+ u32 msiq_rotor;
+ struct sparc64_msiq_cookie *msiq_irq_cookies;
+ u32 msi_num;
+ u32 msi_first;
+ u32 msi_data_mask;
+ u32 msix_data_width;
+ u64 msi32_start;
+ u64 msi64_start;
+ u32 msi32_len;
+ u32 msi64_len;
+ void *msi_queues;
+ unsigned long *msi_bitmap;
+ unsigned int *msi_irq_table;
+ int (*setup_msi_irq)(unsigned int *virt_irq_p, struct pci_dev *pdev,
+ struct msi_desc *entry);
+ void (*teardown_msi_irq)(unsigned int virt_irq, struct pci_dev *pdev);
+ const struct sparc64_msiq_ops *msi_ops;
+#endif /* !(CONFIG_PCI_MSI) */
+
+ /* This PBM's streaming buffer. */
+ struct strbuf stc;
+
+ /* IOMMU state, potentially shared by both PBM segments. */
+ struct iommu *iommu;
+
+ /* Now things for the actual PCI bus probes. */
+ unsigned int pci_first_busno;
+ unsigned int pci_last_busno;
+ struct pci_bus *pci_bus;
+ struct pci_ops *pci_ops;
+
+ int numa_node;
+};
+
+extern struct pci_pbm_info *pci_pbm_root;
+
+extern int pci_num_pbms;
+
+/* PCI bus scanning and fixup support. */
+extern void pci_get_pbm_props(struct pci_pbm_info *pbm);
+extern struct pci_bus *pci_scan_one_pbm(struct pci_pbm_info *pbm,
+ struct device *parent);
+extern void pci_determine_mem_io_space(struct pci_pbm_info *pbm);
+
+/* Error reporting support. */
+extern void pci_scan_for_target_abort(struct pci_pbm_info *, struct pci_bus *);
+extern void pci_scan_for_master_abort(struct pci_pbm_info *, struct pci_bus *);
+extern void pci_scan_for_parity_error(struct pci_pbm_info *, struct pci_bus *);
+
+/* Configuration space access. */
+extern void pci_config_read8(u8 *addr, u8 *ret);
+extern void pci_config_read16(u16 *addr, u16 *ret);
+extern void pci_config_read32(u32 *addr, u32 *ret);
+extern void pci_config_write8(u8 *addr, u8 val);
+extern void pci_config_write16(u16 *addr, u16 val);
+extern void pci_config_write32(u32 *addr, u32 val);
+
+extern struct pci_ops sun4u_pci_ops;
+extern struct pci_ops sun4v_pci_ops;
+
+extern volatile int pci_poke_in_progress;
+extern volatile int pci_poke_cpu;
+extern volatile int pci_poke_faulted;
+
+#endif /* !(PCI_IMPL_H) */
diff --git a/arch/sparc/kernel/pci_msi.c b/arch/sparc/kernel/pci_msi.c
new file mode 100644
index 000000000000..0d0cd815e83e
--- /dev/null
+++ b/arch/sparc/kernel/pci_msi.c
@@ -0,0 +1,447 @@
+/* pci_msi.c: Sparc64 MSI support common layer.
+ *
+ * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
+ */
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+
+#include "pci_impl.h"
+
+static irqreturn_t sparc64_msiq_interrupt(int irq, void *cookie)
+{
+ struct sparc64_msiq_cookie *msiq_cookie = cookie;
+ struct pci_pbm_info *pbm = msiq_cookie->pbm;
+ unsigned long msiqid = msiq_cookie->msiqid;
+ const struct sparc64_msiq_ops *ops;
+ unsigned long orig_head, head;
+ int err;
+
+ ops = pbm->msi_ops;
+
+ err = ops->get_head(pbm, msiqid, &head);
+ if (unlikely(err < 0))
+ goto err_get_head;
+
+ orig_head = head;
+ for (;;) {
+ unsigned long msi;
+
+ err = ops->dequeue_msi(pbm, msiqid, &head, &msi);
+ if (likely(err > 0)) {
+ struct irq_desc *desc;
+ unsigned int virt_irq;
+
+ virt_irq = pbm->msi_irq_table[msi - pbm->msi_first];
+ desc = irq_desc + virt_irq;
+
+ desc->handle_irq(virt_irq, desc);
+ }
+
+ if (unlikely(err < 0))
+ goto err_dequeue;
+
+ if (err == 0)
+ break;
+ }
+ if (likely(head != orig_head)) {
+ err = ops->set_head(pbm, msiqid, head);
+ if (unlikely(err < 0))
+ goto err_set_head;
+ }
+ return IRQ_HANDLED;
+
+err_get_head:
+ printk(KERN_EMERG "MSI: Get head on msiqid[%lu] gives error %d\n",
+ msiqid, err);
+ goto err_out;
+
+err_dequeue:
+ printk(KERN_EMERG "MSI: Dequeue head[%lu] from msiqid[%lu] "
+ "gives error %d\n",
+ head, msiqid, err);
+ goto err_out;
+
+err_set_head:
+ printk(KERN_EMERG "MSI: Set head[%lu] on msiqid[%lu] "
+ "gives error %d\n",
+ head, msiqid, err);
+ goto err_out;
+
+err_out:
+ return IRQ_NONE;
+}
+
+static u32 pick_msiq(struct pci_pbm_info *pbm)
+{
+ static DEFINE_SPINLOCK(rotor_lock);
+ unsigned long flags;
+ u32 ret, rotor;
+
+ spin_lock_irqsave(&rotor_lock, flags);
+
+ rotor = pbm->msiq_rotor;
+ ret = pbm->msiq_first + rotor;
+
+ if (++rotor >= pbm->msiq_num)
+ rotor = 0;
+ pbm->msiq_rotor = rotor;
+
+ spin_unlock_irqrestore(&rotor_lock, flags);
+
+ return ret;
+}
+
+
+static int alloc_msi(struct pci_pbm_info *pbm)
+{
+ int i;
+
+ for (i = 0; i < pbm->msi_num; i++) {
+ if (!test_and_set_bit(i, pbm->msi_bitmap))
+ return i + pbm->msi_first;
+ }
+
+ return -ENOENT;
+}
+
+static void free_msi(struct pci_pbm_info *pbm, int msi_num)
+{
+ msi_num -= pbm->msi_first;
+ clear_bit(msi_num, pbm->msi_bitmap);
+}
+
+static struct irq_chip msi_irq = {
+ .typename = "PCI-MSI",
+ .mask = mask_msi_irq,
+ .unmask = unmask_msi_irq,
+ .enable = unmask_msi_irq,
+ .disable = mask_msi_irq,
+ /* XXX affinity XXX */
+};
+
+static int sparc64_setup_msi_irq(unsigned int *virt_irq_p,
+ struct pci_dev *pdev,
+ struct msi_desc *entry)
+{
+ struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller;
+ const struct sparc64_msiq_ops *ops = pbm->msi_ops;
+ struct msi_msg msg;
+ int msi, err;
+ u32 msiqid;
+
+ *virt_irq_p = virt_irq_alloc(0, 0);
+ err = -ENOMEM;
+ if (!*virt_irq_p)
+ goto out_err;
+
+ set_irq_chip_and_handler_name(*virt_irq_p, &msi_irq,
+ handle_simple_irq, "MSI");
+
+ err = alloc_msi(pbm);
+ if (unlikely(err < 0))
+ goto out_virt_irq_free;
+
+ msi = err;
+
+ msiqid = pick_msiq(pbm);
+
+ err = ops->msi_setup(pbm, msiqid, msi,
+ (entry->msi_attrib.is_64 ? 1 : 0));
+ if (err)
+ goto out_msi_free;
+
+ pbm->msi_irq_table[msi - pbm->msi_first] = *virt_irq_p;
+
+ if (entry->msi_attrib.is_64) {
+ msg.address_hi = pbm->msi64_start >> 32;
+ msg.address_lo = pbm->msi64_start & 0xffffffff;
+ } else {
+ msg.address_hi = 0;
+ msg.address_lo = pbm->msi32_start;
+ }
+ msg.data = msi;
+
+ set_irq_msi(*virt_irq_p, entry);
+ write_msi_msg(*virt_irq_p, &msg);
+
+ return 0;
+
+out_msi_free:
+ free_msi(pbm, msi);
+
+out_virt_irq_free:
+ set_irq_chip(*virt_irq_p, NULL);
+ virt_irq_free(*virt_irq_p);
+ *virt_irq_p = 0;
+
+out_err:
+ return err;
+}
+
+static void sparc64_teardown_msi_irq(unsigned int virt_irq,
+ struct pci_dev *pdev)
+{
+ struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller;
+ const struct sparc64_msiq_ops *ops = pbm->msi_ops;
+ unsigned int msi_num;
+ int i, err;
+
+ for (i = 0; i < pbm->msi_num; i++) {
+ if (pbm->msi_irq_table[i] == virt_irq)
+ break;
+ }
+ if (i >= pbm->msi_num) {
+ printk(KERN_ERR "%s: teardown: No MSI for irq %u\n",
+ pbm->name, virt_irq);
+ return;
+ }
+
+ msi_num = pbm->msi_first + i;
+ pbm->msi_irq_table[i] = ~0U;
+
+ err = ops->msi_teardown(pbm, msi_num);
+ if (err) {
+ printk(KERN_ERR "%s: teardown: ops->teardown() on MSI %u, "
+ "irq %u, gives error %d\n",
+ pbm->name, msi_num, virt_irq, err);
+ return;
+ }
+
+ free_msi(pbm, msi_num);
+
+ set_irq_chip(virt_irq, NULL);
+ virt_irq_free(virt_irq);
+}
+
+static int msi_bitmap_alloc(struct pci_pbm_info *pbm)
+{
+ unsigned long size, bits_per_ulong;
+
+ bits_per_ulong = sizeof(unsigned long) * 8;
+ size = (pbm->msi_num + (bits_per_ulong - 1)) & ~(bits_per_ulong - 1);
+ size /= 8;
+ BUG_ON(size % sizeof(unsigned long));
+
+ pbm->msi_bitmap = kzalloc(size, GFP_KERNEL);
+ if (!pbm->msi_bitmap)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void msi_bitmap_free(struct pci_pbm_info *pbm)
+{
+ kfree(pbm->msi_bitmap);
+ pbm->msi_bitmap = NULL;
+}
+
+static int msi_table_alloc(struct pci_pbm_info *pbm)
+{
+ int size, i;
+
+ size = pbm->msiq_num * sizeof(struct sparc64_msiq_cookie);
+ pbm->msiq_irq_cookies = kzalloc(size, GFP_KERNEL);
+ if (!pbm->msiq_irq_cookies)
+ return -ENOMEM;
+
+ for (i = 0; i < pbm->msiq_num; i++) {
+ struct sparc64_msiq_cookie *p;
+
+ p = &pbm->msiq_irq_cookies[i];
+ p->pbm = pbm;
+ p->msiqid = pbm->msiq_first + i;
+ }
+
+ size = pbm->msi_num * sizeof(unsigned int);
+ pbm->msi_irq_table = kzalloc(size, GFP_KERNEL);
+ if (!pbm->msi_irq_table) {
+ kfree(pbm->msiq_irq_cookies);
+ pbm->msiq_irq_cookies = NULL;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void msi_table_free(struct pci_pbm_info *pbm)
+{
+ kfree(pbm->msiq_irq_cookies);
+ pbm->msiq_irq_cookies = NULL;
+
+ kfree(pbm->msi_irq_table);
+ pbm->msi_irq_table = NULL;
+}
+
+static int bringup_one_msi_queue(struct pci_pbm_info *pbm,
+ const struct sparc64_msiq_ops *ops,
+ unsigned long msiqid,
+ unsigned long devino)
+{
+ int irq = ops->msiq_build_irq(pbm, msiqid, devino);
+ int err, nid;
+
+ if (irq < 0)
+ return irq;
+
+ nid = pbm->numa_node;
+ if (nid != -1) {
+ cpumask_t numa_mask = node_to_cpumask(nid);
+
+ irq_set_affinity(irq, &numa_mask);
+ }
+ err = request_irq(irq, sparc64_msiq_interrupt, 0,
+ "MSIQ",
+ &pbm->msiq_irq_cookies[msiqid - pbm->msiq_first]);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int sparc64_bringup_msi_queues(struct pci_pbm_info *pbm,
+ const struct sparc64_msiq_ops *ops)
+{
+ int i;
+
+ for (i = 0; i < pbm->msiq_num; i++) {
+ unsigned long msiqid = i + pbm->msiq_first;
+ unsigned long devino = i + pbm->msiq_first_devino;
+ int err;
+
+ err = bringup_one_msi_queue(pbm, ops, msiqid, devino);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+void sparc64_pbm_msi_init(struct pci_pbm_info *pbm,
+ const struct sparc64_msiq_ops *ops)
+{
+ const u32 *val;
+ int len;
+
+ val = of_get_property(pbm->op->node, "#msi-eqs", &len);
+ if (!val || len != 4)
+ goto no_msi;
+ pbm->msiq_num = *val;
+ if (pbm->msiq_num) {
+ const struct msiq_prop {
+ u32 first_msiq;
+ u32 num_msiq;
+ u32 first_devino;
+ } *mqp;
+ const struct msi_range_prop {
+ u32 first_msi;
+ u32 num_msi;
+ } *mrng;
+ const struct addr_range_prop {
+ u32 msi32_high;
+ u32 msi32_low;
+ u32 msi32_len;
+ u32 msi64_high;
+ u32 msi64_low;
+ u32 msi64_len;
+ } *arng;
+
+ val = of_get_property(pbm->op->node, "msi-eq-size", &len);
+ if (!val || len != 4)
+ goto no_msi;
+
+ pbm->msiq_ent_count = *val;
+
+ mqp = of_get_property(pbm->op->node,
+ "msi-eq-to-devino", &len);
+ if (!mqp)
+ mqp = of_get_property(pbm->op->node,
+ "msi-eq-devino", &len);
+ if (!mqp || len != sizeof(struct msiq_prop))
+ goto no_msi;
+
+ pbm->msiq_first = mqp->first_msiq;
+ pbm->msiq_first_devino = mqp->first_devino;
+
+ val = of_get_property(pbm->op->node, "#msi", &len);
+ if (!val || len != 4)
+ goto no_msi;
+ pbm->msi_num = *val;
+
+ mrng = of_get_property(pbm->op->node, "msi-ranges", &len);
+ if (!mrng || len != sizeof(struct msi_range_prop))
+ goto no_msi;
+ pbm->msi_first = mrng->first_msi;
+
+ val = of_get_property(pbm->op->node, "msi-data-mask", &len);
+ if (!val || len != 4)
+ goto no_msi;
+ pbm->msi_data_mask = *val;
+
+ val = of_get_property(pbm->op->node, "msix-data-width", &len);
+ if (!val || len != 4)
+ goto no_msi;
+ pbm->msix_data_width = *val;
+
+ arng = of_get_property(pbm->op->node, "msi-address-ranges",
+ &len);
+ if (!arng || len != sizeof(struct addr_range_prop))
+ goto no_msi;
+ pbm->msi32_start = ((u64)arng->msi32_high << 32) |
+ (u64) arng->msi32_low;
+ pbm->msi64_start = ((u64)arng->msi64_high << 32) |
+ (u64) arng->msi64_low;
+ pbm->msi32_len = arng->msi32_len;
+ pbm->msi64_len = arng->msi64_len;
+
+ if (msi_bitmap_alloc(pbm))
+ goto no_msi;
+
+ if (msi_table_alloc(pbm)) {
+ msi_bitmap_free(pbm);
+ goto no_msi;
+ }
+
+ if (ops->msiq_alloc(pbm)) {
+ msi_table_free(pbm);
+ msi_bitmap_free(pbm);
+ goto no_msi;
+ }
+
+ if (sparc64_bringup_msi_queues(pbm, ops)) {
+ ops->msiq_free(pbm);
+ msi_table_free(pbm);
+ msi_bitmap_free(pbm);
+ goto no_msi;
+ }
+
+ printk(KERN_INFO "%s: MSI Queue first[%u] num[%u] count[%u] "
+ "devino[0x%x]\n",
+ pbm->name,
+ pbm->msiq_first, pbm->msiq_num,
+ pbm->msiq_ent_count,
+ pbm->msiq_first_devino);
+ printk(KERN_INFO "%s: MSI first[%u] num[%u] mask[0x%x] "
+ "width[%u]\n",
+ pbm->name,
+ pbm->msi_first, pbm->msi_num, pbm->msi_data_mask,
+ pbm->msix_data_width);
+ printk(KERN_INFO "%s: MSI addr32[0x%lx:0x%x] "
+ "addr64[0x%lx:0x%x]\n",
+ pbm->name,
+ pbm->msi32_start, pbm->msi32_len,
+ pbm->msi64_start, pbm->msi64_len);
+ printk(KERN_INFO "%s: MSI queues at RA [%016lx]\n",
+ pbm->name,
+ __pa(pbm->msi_queues));
+
+ pbm->msi_ops = ops;
+ pbm->setup_msi_irq = sparc64_setup_msi_irq;
+ pbm->teardown_msi_irq = sparc64_teardown_msi_irq;
+ }
+ return;
+
+no_msi:
+ pbm->msiq_num = 0;
+ printk(KERN_INFO "%s: No MSI support.\n", pbm->name);
+}
diff --git a/arch/sparc/kernel/pci_psycho.c b/arch/sparc/kernel/pci_psycho.c
new file mode 100644
index 000000000000..dfb3ec892987
--- /dev/null
+++ b/arch/sparc/kernel/pci_psycho.c
@@ -0,0 +1,618 @@
+/* pci_psycho.c: PSYCHO/U2P specific PCI controller support.
+ *
+ * Copyright (C) 1997, 1998, 1999, 2007 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1998, 1999 Eddie C. Dost (ecd@skynet.be)
+ * Copyright (C) 1999 Jakub Jelinek (jakub@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/of_device.h>
+
+#include <asm/iommu.h>
+#include <asm/irq.h>
+#include <asm/starfire.h>
+#include <asm/prom.h>
+#include <asm/upa.h>
+
+#include "pci_impl.h"
+#include "iommu_common.h"
+#include "psycho_common.h"
+
+#define DRIVER_NAME "psycho"
+#define PFX DRIVER_NAME ": "
+
+/* Misc. PSYCHO PCI controller register offsets and definitions. */
+#define PSYCHO_CONTROL 0x0010UL
+#define PSYCHO_CONTROL_IMPL 0xf000000000000000UL /* Implementation of this PSYCHO*/
+#define PSYCHO_CONTROL_VER 0x0f00000000000000UL /* Version of this PSYCHO */
+#define PSYCHO_CONTROL_MID 0x00f8000000000000UL /* UPA Module ID of PSYCHO */
+#define PSYCHO_CONTROL_IGN 0x0007c00000000000UL /* Interrupt Group Number */
+#define PSYCHO_CONTROL_RESV 0x00003ffffffffff0UL /* Reserved */
+#define PSYCHO_CONTROL_APCKEN 0x0000000000000008UL /* Address Parity Check Enable */
+#define PSYCHO_CONTROL_APERR 0x0000000000000004UL /* Incoming System Addr Parerr */
+#define PSYCHO_CONTROL_IAP 0x0000000000000002UL /* Invert UPA Parity */
+#define PSYCHO_CONTROL_MODE 0x0000000000000001UL /* PSYCHO clock mode */
+#define PSYCHO_PCIA_CTRL 0x2000UL
+#define PSYCHO_PCIB_CTRL 0x4000UL
+#define PSYCHO_PCICTRL_RESV1 0xfffffff000000000UL /* Reserved */
+#define PSYCHO_PCICTRL_SBH_ERR 0x0000000800000000UL /* Streaming byte hole error */
+#define PSYCHO_PCICTRL_SERR 0x0000000400000000UL /* SERR signal asserted */
+#define PSYCHO_PCICTRL_SPEED 0x0000000200000000UL /* PCI speed (1 is U2P clock) */
+#define PSYCHO_PCICTRL_RESV2 0x00000001ffc00000UL /* Reserved */
+#define PSYCHO_PCICTRL_ARB_PARK 0x0000000000200000UL /* PCI arbitration parking */
+#define PSYCHO_PCICTRL_RESV3 0x00000000001ff800UL /* Reserved */
+#define PSYCHO_PCICTRL_SBH_INT 0x0000000000000400UL /* Streaming byte hole int enab */
+#define PSYCHO_PCICTRL_WEN 0x0000000000000200UL /* Power Mgmt Wake Enable */
+#define PSYCHO_PCICTRL_EEN 0x0000000000000100UL /* PCI Error Interrupt Enable */
+#define PSYCHO_PCICTRL_RESV4 0x00000000000000c0UL /* Reserved */
+#define PSYCHO_PCICTRL_AEN 0x000000000000003fUL /* PCI DVMA Arbitration Enable */
+
+/* PSYCHO error handling support. */
+
+/* Helper function of IOMMU error checking, which checks out
+ * the state of the streaming buffers. The IOMMU lock is
+ * held when this is called.
+ *
+ * For the PCI error case we know which PBM (and thus which
+ * streaming buffer) caused the error, but for the uncorrectable
+ * error case we do not. So we always check both streaming caches.
+ */
+#define PSYCHO_STRBUF_CONTROL_A 0x2800UL
+#define PSYCHO_STRBUF_CONTROL_B 0x4800UL
+#define PSYCHO_STRBUF_CTRL_LPTR 0x00000000000000f0UL /* LRU Lock Pointer */
+#define PSYCHO_STRBUF_CTRL_LENAB 0x0000000000000008UL /* LRU Lock Enable */
+#define PSYCHO_STRBUF_CTRL_RRDIS 0x0000000000000004UL /* Rerun Disable */
+#define PSYCHO_STRBUF_CTRL_DENAB 0x0000000000000002UL /* Diagnostic Mode Enable */
+#define PSYCHO_STRBUF_CTRL_ENAB 0x0000000000000001UL /* Streaming Buffer Enable */
+#define PSYCHO_STRBUF_FLUSH_A 0x2808UL
+#define PSYCHO_STRBUF_FLUSH_B 0x4808UL
+#define PSYCHO_STRBUF_FSYNC_A 0x2810UL
+#define PSYCHO_STRBUF_FSYNC_B 0x4810UL
+#define PSYCHO_STC_DATA_A 0xb000UL
+#define PSYCHO_STC_DATA_B 0xc000UL
+#define PSYCHO_STC_ERR_A 0xb400UL
+#define PSYCHO_STC_ERR_B 0xc400UL
+#define PSYCHO_STC_TAG_A 0xb800UL
+#define PSYCHO_STC_TAG_B 0xc800UL
+#define PSYCHO_STC_LINE_A 0xb900UL
+#define PSYCHO_STC_LINE_B 0xc900UL
+
+/* When an Uncorrectable Error or a PCI Error happens, we
+ * interrogate the IOMMU state to see if it is the cause.
+ */
+#define PSYCHO_IOMMU_CONTROL 0x0200UL
+#define PSYCHO_IOMMU_CTRL_RESV 0xfffffffff9000000UL /* Reserved */
+#define PSYCHO_IOMMU_CTRL_XLTESTAT 0x0000000006000000UL /* Translation Error Status */
+#define PSYCHO_IOMMU_CTRL_XLTEERR 0x0000000001000000UL /* Translation Error encountered */
+#define PSYCHO_IOMMU_CTRL_LCKEN 0x0000000000800000UL /* Enable translation locking */
+#define PSYCHO_IOMMU_CTRL_LCKPTR 0x0000000000780000UL /* Translation lock pointer */
+#define PSYCHO_IOMMU_CTRL_TSBSZ 0x0000000000070000UL /* TSB Size */
+#define PSYCHO_IOMMU_TSBSZ_1K 0x0000000000000000UL /* TSB Table 1024 8-byte entries */
+#define PSYCHO_IOMMU_TSBSZ_2K 0x0000000000010000UL /* TSB Table 2048 8-byte entries */
+#define PSYCHO_IOMMU_TSBSZ_4K 0x0000000000020000UL /* TSB Table 4096 8-byte entries */
+#define PSYCHO_IOMMU_TSBSZ_8K 0x0000000000030000UL /* TSB Table 8192 8-byte entries */
+#define PSYCHO_IOMMU_TSBSZ_16K 0x0000000000040000UL /* TSB Table 16k 8-byte entries */
+#define PSYCHO_IOMMU_TSBSZ_32K 0x0000000000050000UL /* TSB Table 32k 8-byte entries */
+#define PSYCHO_IOMMU_TSBSZ_64K 0x0000000000060000UL /* TSB Table 64k 8-byte entries */
+#define PSYCHO_IOMMU_TSBSZ_128K 0x0000000000070000UL /* TSB Table 128k 8-byte entries */
+#define PSYCHO_IOMMU_CTRL_RESV2 0x000000000000fff8UL /* Reserved */
+#define PSYCHO_IOMMU_CTRL_TBWSZ 0x0000000000000004UL /* Assumed page size, 0=8k 1=64k */
+#define PSYCHO_IOMMU_CTRL_DENAB 0x0000000000000002UL /* Diagnostic mode enable */
+#define PSYCHO_IOMMU_CTRL_ENAB 0x0000000000000001UL /* IOMMU Enable */
+#define PSYCHO_IOMMU_TSBBASE 0x0208UL
+#define PSYCHO_IOMMU_FLUSH 0x0210UL
+#define PSYCHO_IOMMU_TAG 0xa580UL
+#define PSYCHO_IOMMU_DATA 0xa600UL
+
+/* Uncorrectable Errors. Cause of the error and the address are
+ * recorded in the UE_AFSR and UE_AFAR of PSYCHO. They are errors
+ * relating to UPA interface transactions.
+ */
+#define PSYCHO_UE_AFSR 0x0030UL
+#define PSYCHO_UEAFSR_PPIO 0x8000000000000000UL /* Primary PIO is cause */
+#define PSYCHO_UEAFSR_PDRD 0x4000000000000000UL /* Primary DVMA read is cause */
+#define PSYCHO_UEAFSR_PDWR 0x2000000000000000UL /* Primary DVMA write is cause */
+#define PSYCHO_UEAFSR_SPIO 0x1000000000000000UL /* Secondary PIO is cause */
+#define PSYCHO_UEAFSR_SDRD 0x0800000000000000UL /* Secondary DVMA read is cause */
+#define PSYCHO_UEAFSR_SDWR 0x0400000000000000UL /* Secondary DVMA write is cause*/
+#define PSYCHO_UEAFSR_RESV1 0x03ff000000000000UL /* Reserved */
+#define PSYCHO_UEAFSR_BMSK 0x0000ffff00000000UL /* Bytemask of failed transfer */
+#define PSYCHO_UEAFSR_DOFF 0x00000000e0000000UL /* Doubleword Offset */
+#define PSYCHO_UEAFSR_MID 0x000000001f000000UL /* UPA MID causing the fault */
+#define PSYCHO_UEAFSR_BLK 0x0000000000800000UL /* Trans was block operation */
+#define PSYCHO_UEAFSR_RESV2 0x00000000007fffffUL /* Reserved */
+#define PSYCHO_UE_AFAR 0x0038UL
+
+static irqreturn_t psycho_ue_intr(int irq, void *dev_id)
+{
+ struct pci_pbm_info *pbm = dev_id;
+ unsigned long afsr_reg = pbm->controller_regs + PSYCHO_UE_AFSR;
+ unsigned long afar_reg = pbm->controller_regs + PSYCHO_UE_AFAR;
+ unsigned long afsr, afar, error_bits;
+ int reported;
+
+ /* Latch uncorrectable error status. */
+ afar = upa_readq(afar_reg);
+ afsr = upa_readq(afsr_reg);
+
+ /* Clear the primary/secondary error status bits. */
+ error_bits = afsr &
+ (PSYCHO_UEAFSR_PPIO | PSYCHO_UEAFSR_PDRD | PSYCHO_UEAFSR_PDWR |
+ PSYCHO_UEAFSR_SPIO | PSYCHO_UEAFSR_SDRD | PSYCHO_UEAFSR_SDWR);
+ if (!error_bits)
+ return IRQ_NONE;
+ upa_writeq(error_bits, afsr_reg);
+
+ /* Log the error. */
+ printk("%s: Uncorrectable Error, primary error type[%s]\n",
+ pbm->name,
+ (((error_bits & PSYCHO_UEAFSR_PPIO) ?
+ "PIO" :
+ ((error_bits & PSYCHO_UEAFSR_PDRD) ?
+ "DMA Read" :
+ ((error_bits & PSYCHO_UEAFSR_PDWR) ?
+ "DMA Write" : "???")))));
+ printk("%s: bytemask[%04lx] dword_offset[%lx] UPA_MID[%02lx] was_block(%d)\n",
+ pbm->name,
+ (afsr & PSYCHO_UEAFSR_BMSK) >> 32UL,
+ (afsr & PSYCHO_UEAFSR_DOFF) >> 29UL,
+ (afsr & PSYCHO_UEAFSR_MID) >> 24UL,
+ ((afsr & PSYCHO_UEAFSR_BLK) ? 1 : 0));
+ printk("%s: UE AFAR [%016lx]\n", pbm->name, afar);
+ printk("%s: UE Secondary errors [", pbm->name);
+ reported = 0;
+ if (afsr & PSYCHO_UEAFSR_SPIO) {
+ reported++;
+ printk("(PIO)");
+ }
+ if (afsr & PSYCHO_UEAFSR_SDRD) {
+ reported++;
+ printk("(DMA Read)");
+ }
+ if (afsr & PSYCHO_UEAFSR_SDWR) {
+ reported++;
+ printk("(DMA Write)");
+ }
+ if (!reported)
+ printk("(none)");
+ printk("]\n");
+
+ /* Interrogate both IOMMUs for error status. */
+ psycho_check_iommu_error(pbm, afsr, afar, UE_ERR);
+ if (pbm->sibling)
+ psycho_check_iommu_error(pbm->sibling, afsr, afar, UE_ERR);
+
+ return IRQ_HANDLED;
+}
+
+/* Correctable Errors. */
+#define PSYCHO_CE_AFSR 0x0040UL
+#define PSYCHO_CEAFSR_PPIO 0x8000000000000000UL /* Primary PIO is cause */
+#define PSYCHO_CEAFSR_PDRD 0x4000000000000000UL /* Primary DVMA read is cause */
+#define PSYCHO_CEAFSR_PDWR 0x2000000000000000UL /* Primary DVMA write is cause */
+#define PSYCHO_CEAFSR_SPIO 0x1000000000000000UL /* Secondary PIO is cause */
+#define PSYCHO_CEAFSR_SDRD 0x0800000000000000UL /* Secondary DVMA read is cause */
+#define PSYCHO_CEAFSR_SDWR 0x0400000000000000UL /* Secondary DVMA write is cause*/
+#define PSYCHO_CEAFSR_RESV1 0x0300000000000000UL /* Reserved */
+#define PSYCHO_CEAFSR_ESYND 0x00ff000000000000UL /* Syndrome Bits */
+#define PSYCHO_CEAFSR_BMSK 0x0000ffff00000000UL /* Bytemask of failed transfer */
+#define PSYCHO_CEAFSR_DOFF 0x00000000e0000000UL /* Double Offset */
+#define PSYCHO_CEAFSR_MID 0x000000001f000000UL /* UPA MID causing the fault */
+#define PSYCHO_CEAFSR_BLK 0x0000000000800000UL /* Trans was block operation */
+#define PSYCHO_CEAFSR_RESV2 0x00000000007fffffUL /* Reserved */
+#define PSYCHO_CE_AFAR 0x0040UL
+
+static irqreturn_t psycho_ce_intr(int irq, void *dev_id)
+{
+ struct pci_pbm_info *pbm = dev_id;
+ unsigned long afsr_reg = pbm->controller_regs + PSYCHO_CE_AFSR;
+ unsigned long afar_reg = pbm->controller_regs + PSYCHO_CE_AFAR;
+ unsigned long afsr, afar, error_bits;
+ int reported;
+
+ /* Latch error status. */
+ afar = upa_readq(afar_reg);
+ afsr = upa_readq(afsr_reg);
+
+ /* Clear primary/secondary error status bits. */
+ error_bits = afsr &
+ (PSYCHO_CEAFSR_PPIO | PSYCHO_CEAFSR_PDRD | PSYCHO_CEAFSR_PDWR |
+ PSYCHO_CEAFSR_SPIO | PSYCHO_CEAFSR_SDRD | PSYCHO_CEAFSR_SDWR);
+ if (!error_bits)
+ return IRQ_NONE;
+ upa_writeq(error_bits, afsr_reg);
+
+ /* Log the error. */
+ printk("%s: Correctable Error, primary error type[%s]\n",
+ pbm->name,
+ (((error_bits & PSYCHO_CEAFSR_PPIO) ?
+ "PIO" :
+ ((error_bits & PSYCHO_CEAFSR_PDRD) ?
+ "DMA Read" :
+ ((error_bits & PSYCHO_CEAFSR_PDWR) ?
+ "DMA Write" : "???")))));
+
+ /* XXX Use syndrome and afar to print out module string just like
+ * XXX UDB CE trap handler does... -DaveM
+ */
+ printk("%s: syndrome[%02lx] bytemask[%04lx] dword_offset[%lx] "
+ "UPA_MID[%02lx] was_block(%d)\n",
+ pbm->name,
+ (afsr & PSYCHO_CEAFSR_ESYND) >> 48UL,
+ (afsr & PSYCHO_CEAFSR_BMSK) >> 32UL,
+ (afsr & PSYCHO_CEAFSR_DOFF) >> 29UL,
+ (afsr & PSYCHO_CEAFSR_MID) >> 24UL,
+ ((afsr & PSYCHO_CEAFSR_BLK) ? 1 : 0));
+ printk("%s: CE AFAR [%016lx]\n", pbm->name, afar);
+ printk("%s: CE Secondary errors [", pbm->name);
+ reported = 0;
+ if (afsr & PSYCHO_CEAFSR_SPIO) {
+ reported++;
+ printk("(PIO)");
+ }
+ if (afsr & PSYCHO_CEAFSR_SDRD) {
+ reported++;
+ printk("(DMA Read)");
+ }
+ if (afsr & PSYCHO_CEAFSR_SDWR) {
+ reported++;
+ printk("(DMA Write)");
+ }
+ if (!reported)
+ printk("(none)");
+ printk("]\n");
+
+ return IRQ_HANDLED;
+}
+
+/* PCI Errors. They are signalled by the PCI bus module since they
+ * are associated with a specific bus segment.
+ */
+#define PSYCHO_PCI_AFSR_A 0x2010UL
+#define PSYCHO_PCI_AFSR_B 0x4010UL
+#define PSYCHO_PCI_AFAR_A 0x2018UL
+#define PSYCHO_PCI_AFAR_B 0x4018UL
+
+/* XXX What about PowerFail/PowerManagement??? -DaveM */
+#define PSYCHO_ECC_CTRL 0x0020
+#define PSYCHO_ECCCTRL_EE 0x8000000000000000UL /* Enable ECC Checking */
+#define PSYCHO_ECCCTRL_UE 0x4000000000000000UL /* Enable UE Interrupts */
+#define PSYCHO_ECCCTRL_CE 0x2000000000000000UL /* Enable CE INterrupts */
+static void psycho_register_error_handlers(struct pci_pbm_info *pbm)
+{
+ struct of_device *op = of_find_device_by_node(pbm->op->node);
+ unsigned long base = pbm->controller_regs;
+ u64 tmp;
+ int err;
+
+ if (!op)
+ return;
+
+ /* Psycho interrupt property order is:
+ * 0: PCIERR INO for this PBM
+ * 1: UE ERR
+ * 2: CE ERR
+ * 3: POWER FAIL
+ * 4: SPARE HARDWARE
+ * 5: POWER MANAGEMENT
+ */
+
+ if (op->num_irqs < 6)
+ return;
+
+ /* We really mean to ignore the return result here. Two
+ * PCI controller share the same interrupt numbers and
+ * drive the same front-end hardware. Whichever of the
+ * two get in here first will register the IRQ handler
+ * the second will just error out since we do not pass in
+ * IRQF_SHARED.
+ */
+ err = request_irq(op->irqs[1], psycho_ue_intr, IRQF_SHARED,
+ "PSYCHO_UE", pbm);
+ err = request_irq(op->irqs[2], psycho_ce_intr, IRQF_SHARED,
+ "PSYCHO_CE", pbm);
+
+ /* This one, however, ought not to fail. We can just warn
+ * about it since the system can still operate properly even
+ * if this fails.
+ */
+ err = request_irq(op->irqs[0], psycho_pcierr_intr, IRQF_SHARED,
+ "PSYCHO_PCIERR", pbm);
+ if (err)
+ printk(KERN_WARNING "%s: Could not register PCIERR, "
+ "err=%d\n", pbm->name, err);
+
+ /* Enable UE and CE interrupts for controller. */
+ upa_writeq((PSYCHO_ECCCTRL_EE |
+ PSYCHO_ECCCTRL_UE |
+ PSYCHO_ECCCTRL_CE), base + PSYCHO_ECC_CTRL);
+
+ /* Enable PCI Error interrupts and clear error
+ * bits for each PBM.
+ */
+ tmp = upa_readq(base + PSYCHO_PCIA_CTRL);
+ tmp |= (PSYCHO_PCICTRL_SERR |
+ PSYCHO_PCICTRL_SBH_ERR |
+ PSYCHO_PCICTRL_EEN);
+ tmp &= ~(PSYCHO_PCICTRL_SBH_INT);
+ upa_writeq(tmp, base + PSYCHO_PCIA_CTRL);
+
+ tmp = upa_readq(base + PSYCHO_PCIB_CTRL);
+ tmp |= (PSYCHO_PCICTRL_SERR |
+ PSYCHO_PCICTRL_SBH_ERR |
+ PSYCHO_PCICTRL_EEN);
+ tmp &= ~(PSYCHO_PCICTRL_SBH_INT);
+ upa_writeq(tmp, base + PSYCHO_PCIB_CTRL);
+}
+
+/* PSYCHO boot time probing and initialization. */
+static void pbm_config_busmastering(struct pci_pbm_info *pbm)
+{
+ u8 *addr;
+
+ /* Set cache-line size to 64 bytes, this is actually
+ * a nop but I do it for completeness.
+ */
+ addr = psycho_pci_config_mkaddr(pbm, pbm->pci_first_busno,
+ 0, PCI_CACHE_LINE_SIZE);
+ pci_config_write8(addr, 64 / sizeof(u32));
+
+ /* Set PBM latency timer to 64 PCI clocks. */
+ addr = psycho_pci_config_mkaddr(pbm, pbm->pci_first_busno,
+ 0, PCI_LATENCY_TIMER);
+ pci_config_write8(addr, 64);
+}
+
+static void __init psycho_scan_bus(struct pci_pbm_info *pbm,
+ struct device *parent)
+{
+ pbm_config_busmastering(pbm);
+ pbm->is_66mhz_capable = 0;
+ pbm->pci_bus = pci_scan_one_pbm(pbm, parent);
+
+ /* After the PCI bus scan is complete, we can register
+ * the error interrupt handlers.
+ */
+ psycho_register_error_handlers(pbm);
+}
+
+#define PSYCHO_IRQ_RETRY 0x1a00UL
+#define PSYCHO_PCIA_DIAG 0x2020UL
+#define PSYCHO_PCIB_DIAG 0x4020UL
+#define PSYCHO_PCIDIAG_RESV 0xffffffffffffff80UL /* Reserved */
+#define PSYCHO_PCIDIAG_DRETRY 0x0000000000000040UL /* Disable retry limit */
+#define PSYCHO_PCIDIAG_DISYNC 0x0000000000000020UL /* Disable DMA wr / irq sync */
+#define PSYCHO_PCIDIAG_DDWSYNC 0x0000000000000010UL /* Disable DMA wr / PIO rd sync */
+#define PSYCHO_PCIDIAG_IDDPAR 0x0000000000000008UL /* Invert DMA data parity */
+#define PSYCHO_PCIDIAG_IPDPAR 0x0000000000000004UL /* Invert PIO data parity */
+#define PSYCHO_PCIDIAG_IPAPAR 0x0000000000000002UL /* Invert PIO address parity */
+#define PSYCHO_PCIDIAG_LPBACK 0x0000000000000001UL /* Enable loopback mode */
+
+static void psycho_controller_hwinit(struct pci_pbm_info *pbm)
+{
+ u64 tmp;
+
+ upa_writeq(5, pbm->controller_regs + PSYCHO_IRQ_RETRY);
+
+ /* Enable arbiter for all PCI slots. */
+ tmp = upa_readq(pbm->controller_regs + PSYCHO_PCIA_CTRL);
+ tmp |= PSYCHO_PCICTRL_AEN;
+ upa_writeq(tmp, pbm->controller_regs + PSYCHO_PCIA_CTRL);
+
+ tmp = upa_readq(pbm->controller_regs + PSYCHO_PCIB_CTRL);
+ tmp |= PSYCHO_PCICTRL_AEN;
+ upa_writeq(tmp, pbm->controller_regs + PSYCHO_PCIB_CTRL);
+
+ /* Disable DMA write / PIO read synchronization on
+ * both PCI bus segments.
+ * [ U2P Erratum 1243770, STP2223BGA data sheet ]
+ */
+ tmp = upa_readq(pbm->controller_regs + PSYCHO_PCIA_DIAG);
+ tmp |= PSYCHO_PCIDIAG_DDWSYNC;
+ upa_writeq(tmp, pbm->controller_regs + PSYCHO_PCIA_DIAG);
+
+ tmp = upa_readq(pbm->controller_regs + PSYCHO_PCIB_DIAG);
+ tmp |= PSYCHO_PCIDIAG_DDWSYNC;
+ upa_writeq(tmp, pbm->controller_regs + PSYCHO_PCIB_DIAG);
+}
+
+static void psycho_pbm_strbuf_init(struct pci_pbm_info *pbm,
+ int is_pbm_a)
+{
+ unsigned long base = pbm->controller_regs;
+ u64 control;
+
+ if (is_pbm_a) {
+ pbm->stc.strbuf_control = base + PSYCHO_STRBUF_CONTROL_A;
+ pbm->stc.strbuf_pflush = base + PSYCHO_STRBUF_FLUSH_A;
+ pbm->stc.strbuf_fsync = base + PSYCHO_STRBUF_FSYNC_A;
+ pbm->stc.strbuf_err_stat = base + PSYCHO_STC_ERR_A;
+ pbm->stc.strbuf_tag_diag = base + PSYCHO_STC_TAG_A;
+ pbm->stc.strbuf_line_diag= base + PSYCHO_STC_LINE_A;
+ } else {
+ pbm->stc.strbuf_control = base + PSYCHO_STRBUF_CONTROL_B;
+ pbm->stc.strbuf_pflush = base + PSYCHO_STRBUF_FLUSH_B;
+ pbm->stc.strbuf_fsync = base + PSYCHO_STRBUF_FSYNC_B;
+ pbm->stc.strbuf_err_stat = base + PSYCHO_STC_ERR_B;
+ pbm->stc.strbuf_tag_diag = base + PSYCHO_STC_TAG_B;
+ pbm->stc.strbuf_line_diag= base + PSYCHO_STC_LINE_B;
+ }
+ /* PSYCHO's streaming buffer lacks ctx flushing. */
+ pbm->stc.strbuf_ctxflush = 0;
+ pbm->stc.strbuf_ctxmatch_base = 0;
+
+ pbm->stc.strbuf_flushflag = (volatile unsigned long *)
+ ((((unsigned long)&pbm->stc.__flushflag_buf[0])
+ + 63UL)
+ & ~63UL);
+ pbm->stc.strbuf_flushflag_pa = (unsigned long)
+ __pa(pbm->stc.strbuf_flushflag);
+
+ /* Enable the streaming buffer. We have to be careful
+ * just in case OBP left it with LRU locking enabled.
+ *
+ * It is possible to control if PBM will be rerun on
+ * line misses. Currently I just retain whatever setting
+ * OBP left us with. All checks so far show it having
+ * a value of zero.
+ */
+#undef PSYCHO_STRBUF_RERUN_ENABLE
+#undef PSYCHO_STRBUF_RERUN_DISABLE
+ control = upa_readq(pbm->stc.strbuf_control);
+ control |= PSYCHO_STRBUF_CTRL_ENAB;
+ control &= ~(PSYCHO_STRBUF_CTRL_LENAB | PSYCHO_STRBUF_CTRL_LPTR);
+#ifdef PSYCHO_STRBUF_RERUN_ENABLE
+ control &= ~(PSYCHO_STRBUF_CTRL_RRDIS);
+#else
+#ifdef PSYCHO_STRBUF_RERUN_DISABLE
+ control |= PSYCHO_STRBUF_CTRL_RRDIS;
+#endif
+#endif
+ upa_writeq(control, pbm->stc.strbuf_control);
+
+ pbm->stc.strbuf_enabled = 1;
+}
+
+#define PSYCHO_IOSPACE_A 0x002000000UL
+#define PSYCHO_IOSPACE_B 0x002010000UL
+#define PSYCHO_IOSPACE_SIZE 0x00000ffffUL
+#define PSYCHO_MEMSPACE_A 0x100000000UL
+#define PSYCHO_MEMSPACE_B 0x180000000UL
+#define PSYCHO_MEMSPACE_SIZE 0x07fffffffUL
+
+static void __init psycho_pbm_init(struct pci_pbm_info *pbm,
+ struct of_device *op, int is_pbm_a)
+{
+ psycho_pbm_init_common(pbm, op, "PSYCHO", PBM_CHIP_TYPE_PSYCHO);
+ psycho_pbm_strbuf_init(pbm, is_pbm_a);
+ psycho_scan_bus(pbm, &op->dev);
+}
+
+static struct pci_pbm_info * __devinit psycho_find_sibling(u32 upa_portid)
+{
+ struct pci_pbm_info *pbm;
+
+ for (pbm = pci_pbm_root; pbm; pbm = pbm->next) {
+ if (pbm->portid == upa_portid)
+ return pbm;
+ }
+ return NULL;
+}
+
+#define PSYCHO_CONFIGSPACE 0x001000000UL
+
+static int __devinit psycho_probe(struct of_device *op,
+ const struct of_device_id *match)
+{
+ const struct linux_prom64_registers *pr_regs;
+ struct device_node *dp = op->node;
+ struct pci_pbm_info *pbm;
+ struct iommu *iommu;
+ int is_pbm_a, err;
+ u32 upa_portid;
+
+ upa_portid = of_getintprop_default(dp, "upa-portid", 0xff);
+
+ err = -ENOMEM;
+ pbm = kzalloc(sizeof(*pbm), GFP_KERNEL);
+ if (!pbm) {
+ printk(KERN_ERR PFX "Cannot allocate pci_pbm_info.\n");
+ goto out_err;
+ }
+
+ pbm->sibling = psycho_find_sibling(upa_portid);
+ if (pbm->sibling) {
+ iommu = pbm->sibling->iommu;
+ } else {
+ iommu = kzalloc(sizeof(struct iommu), GFP_KERNEL);
+ if (!iommu) {
+ printk(KERN_ERR PFX "Cannot allocate PBM iommu.\n");
+ goto out_free_controller;
+ }
+ }
+
+ pbm->iommu = iommu;
+ pbm->portid = upa_portid;
+
+ pr_regs = of_get_property(dp, "reg", NULL);
+ err = -ENODEV;
+ if (!pr_regs) {
+ printk(KERN_ERR PFX "No reg property.\n");
+ goto out_free_iommu;
+ }
+
+ is_pbm_a = ((pr_regs[0].phys_addr & 0x6000) == 0x2000);
+
+ pbm->controller_regs = pr_regs[2].phys_addr;
+ pbm->config_space = (pr_regs[2].phys_addr + PSYCHO_CONFIGSPACE);
+
+ if (is_pbm_a) {
+ pbm->pci_afsr = pbm->controller_regs + PSYCHO_PCI_AFSR_A;
+ pbm->pci_afar = pbm->controller_regs + PSYCHO_PCI_AFAR_A;
+ pbm->pci_csr = pbm->controller_regs + PSYCHO_PCIA_CTRL;
+ } else {
+ pbm->pci_afsr = pbm->controller_regs + PSYCHO_PCI_AFSR_B;
+ pbm->pci_afar = pbm->controller_regs + PSYCHO_PCI_AFAR_B;
+ pbm->pci_csr = pbm->controller_regs + PSYCHO_PCIB_CTRL;
+ }
+
+ psycho_controller_hwinit(pbm);
+ if (!pbm->sibling) {
+ err = psycho_iommu_init(pbm, 128, 0xc0000000,
+ 0xffffffff, PSYCHO_CONTROL);
+ if (err)
+ goto out_free_iommu;
+
+ /* If necessary, hook us up for starfire IRQ translations. */
+ if (this_is_starfire)
+ starfire_hookup(pbm->portid);
+ }
+
+ psycho_pbm_init(pbm, op, is_pbm_a);
+
+ pbm->next = pci_pbm_root;
+ pci_pbm_root = pbm;
+
+ if (pbm->sibling)
+ pbm->sibling->sibling = pbm;
+
+ dev_set_drvdata(&op->dev, pbm);
+
+ return 0;
+
+out_free_iommu:
+ if (!pbm->sibling)
+ kfree(pbm->iommu);
+
+out_free_controller:
+ kfree(pbm);
+
+out_err:
+ return err;
+}
+
+static struct of_device_id __initdata psycho_match[] = {
+ {
+ .name = "pci",
+ .compatible = "pci108e,8000",
+ },
+ {},
+};
+
+static struct of_platform_driver psycho_driver = {
+ .name = DRIVER_NAME,
+ .match_table = psycho_match,
+ .probe = psycho_probe,
+};
+
+static int __init psycho_init(void)
+{
+ return of_register_driver(&psycho_driver, &of_bus_type);
+}
+
+subsys_initcall(psycho_init);
diff --git a/arch/sparc/kernel/pci_sabre.c b/arch/sparc/kernel/pci_sabre.c
new file mode 100644
index 000000000000..713257b6963c
--- /dev/null
+++ b/arch/sparc/kernel/pci_sabre.c
@@ -0,0 +1,609 @@
+/* pci_sabre.c: Sabre specific PCI controller support.
+ *
+ * Copyright (C) 1997, 1998, 1999, 2007 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1998, 1999 Eddie C. Dost (ecd@skynet.be)
+ * Copyright (C) 1999 Jakub Jelinek (jakub@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/of_device.h>
+
+#include <asm/apb.h>
+#include <asm/iommu.h>
+#include <asm/irq.h>
+#include <asm/prom.h>
+#include <asm/upa.h>
+
+#include "pci_impl.h"
+#include "iommu_common.h"
+#include "psycho_common.h"
+
+#define DRIVER_NAME "sabre"
+#define PFX DRIVER_NAME ": "
+
+/* SABRE PCI controller register offsets and definitions. */
+#define SABRE_UE_AFSR 0x0030UL
+#define SABRE_UEAFSR_PDRD 0x4000000000000000UL /* Primary PCI DMA Read */
+#define SABRE_UEAFSR_PDWR 0x2000000000000000UL /* Primary PCI DMA Write */
+#define SABRE_UEAFSR_SDRD 0x0800000000000000UL /* Secondary PCI DMA Read */
+#define SABRE_UEAFSR_SDWR 0x0400000000000000UL /* Secondary PCI DMA Write */
+#define SABRE_UEAFSR_SDTE 0x0200000000000000UL /* Secondary DMA Translation Error */
+#define SABRE_UEAFSR_PDTE 0x0100000000000000UL /* Primary DMA Translation Error */
+#define SABRE_UEAFSR_BMSK 0x0000ffff00000000UL /* Bytemask */
+#define SABRE_UEAFSR_OFF 0x00000000e0000000UL /* Offset (AFAR bits [5:3] */
+#define SABRE_UEAFSR_BLK 0x0000000000800000UL /* Was block operation */
+#define SABRE_UECE_AFAR 0x0038UL
+#define SABRE_CE_AFSR 0x0040UL
+#define SABRE_CEAFSR_PDRD 0x4000000000000000UL /* Primary PCI DMA Read */
+#define SABRE_CEAFSR_PDWR 0x2000000000000000UL /* Primary PCI DMA Write */
+#define SABRE_CEAFSR_SDRD 0x0800000000000000UL /* Secondary PCI DMA Read */
+#define SABRE_CEAFSR_SDWR 0x0400000000000000UL /* Secondary PCI DMA Write */
+#define SABRE_CEAFSR_ESYND 0x00ff000000000000UL /* ECC Syndrome */
+#define SABRE_CEAFSR_BMSK 0x0000ffff00000000UL /* Bytemask */
+#define SABRE_CEAFSR_OFF 0x00000000e0000000UL /* Offset */
+#define SABRE_CEAFSR_BLK 0x0000000000800000UL /* Was block operation */
+#define SABRE_UECE_AFAR_ALIAS 0x0048UL /* Aliases to 0x0038 */
+#define SABRE_IOMMU_CONTROL 0x0200UL
+#define SABRE_IOMMUCTRL_ERRSTS 0x0000000006000000UL /* Error status bits */
+#define SABRE_IOMMUCTRL_ERR 0x0000000001000000UL /* Error present in IOTLB */
+#define SABRE_IOMMUCTRL_LCKEN 0x0000000000800000UL /* IOTLB lock enable */
+#define SABRE_IOMMUCTRL_LCKPTR 0x0000000000780000UL /* IOTLB lock pointer */
+#define SABRE_IOMMUCTRL_TSBSZ 0x0000000000070000UL /* TSB Size */
+#define SABRE_IOMMU_TSBSZ_1K 0x0000000000000000
+#define SABRE_IOMMU_TSBSZ_2K 0x0000000000010000
+#define SABRE_IOMMU_TSBSZ_4K 0x0000000000020000
+#define SABRE_IOMMU_TSBSZ_8K 0x0000000000030000
+#define SABRE_IOMMU_TSBSZ_16K 0x0000000000040000
+#define SABRE_IOMMU_TSBSZ_32K 0x0000000000050000
+#define SABRE_IOMMU_TSBSZ_64K 0x0000000000060000
+#define SABRE_IOMMU_TSBSZ_128K 0x0000000000070000
+#define SABRE_IOMMUCTRL_TBWSZ 0x0000000000000004UL /* TSB assumed page size */
+#define SABRE_IOMMUCTRL_DENAB 0x0000000000000002UL /* Diagnostic Mode Enable */
+#define SABRE_IOMMUCTRL_ENAB 0x0000000000000001UL /* IOMMU Enable */
+#define SABRE_IOMMU_TSBBASE 0x0208UL
+#define SABRE_IOMMU_FLUSH 0x0210UL
+#define SABRE_IMAP_A_SLOT0 0x0c00UL
+#define SABRE_IMAP_B_SLOT0 0x0c20UL
+#define SABRE_IMAP_SCSI 0x1000UL
+#define SABRE_IMAP_ETH 0x1008UL
+#define SABRE_IMAP_BPP 0x1010UL
+#define SABRE_IMAP_AU_REC 0x1018UL
+#define SABRE_IMAP_AU_PLAY 0x1020UL
+#define SABRE_IMAP_PFAIL 0x1028UL
+#define SABRE_IMAP_KMS 0x1030UL
+#define SABRE_IMAP_FLPY 0x1038UL
+#define SABRE_IMAP_SHW 0x1040UL
+#define SABRE_IMAP_KBD 0x1048UL
+#define SABRE_IMAP_MS 0x1050UL
+#define SABRE_IMAP_SER 0x1058UL
+#define SABRE_IMAP_UE 0x1070UL
+#define SABRE_IMAP_CE 0x1078UL
+#define SABRE_IMAP_PCIERR 0x1080UL
+#define SABRE_IMAP_GFX 0x1098UL
+#define SABRE_IMAP_EUPA 0x10a0UL
+#define SABRE_ICLR_A_SLOT0 0x1400UL
+#define SABRE_ICLR_B_SLOT0 0x1480UL
+#define SABRE_ICLR_SCSI 0x1800UL
+#define SABRE_ICLR_ETH 0x1808UL
+#define SABRE_ICLR_BPP 0x1810UL
+#define SABRE_ICLR_AU_REC 0x1818UL
+#define SABRE_ICLR_AU_PLAY 0x1820UL
+#define SABRE_ICLR_PFAIL 0x1828UL
+#define SABRE_ICLR_KMS 0x1830UL
+#define SABRE_ICLR_FLPY 0x1838UL
+#define SABRE_ICLR_SHW 0x1840UL
+#define SABRE_ICLR_KBD 0x1848UL
+#define SABRE_ICLR_MS 0x1850UL
+#define SABRE_ICLR_SER 0x1858UL
+#define SABRE_ICLR_UE 0x1870UL
+#define SABRE_ICLR_CE 0x1878UL
+#define SABRE_ICLR_PCIERR 0x1880UL
+#define SABRE_WRSYNC 0x1c20UL
+#define SABRE_PCICTRL 0x2000UL
+#define SABRE_PCICTRL_MRLEN 0x0000001000000000UL /* Use MemoryReadLine for block loads/stores */
+#define SABRE_PCICTRL_SERR 0x0000000400000000UL /* Set when SERR asserted on PCI bus */
+#define SABRE_PCICTRL_ARBPARK 0x0000000000200000UL /* Bus Parking 0=Ultra-IIi 1=prev-bus-owner */
+#define SABRE_PCICTRL_CPUPRIO 0x0000000000100000UL /* Ultra-IIi granted every other bus cycle */
+#define SABRE_PCICTRL_ARBPRIO 0x00000000000f0000UL /* Slot which is granted every other bus cycle */
+#define SABRE_PCICTRL_ERREN 0x0000000000000100UL /* PCI Error Interrupt Enable */
+#define SABRE_PCICTRL_RTRYWE 0x0000000000000080UL /* DMA Flow Control 0=wait-if-possible 1=retry */
+#define SABRE_PCICTRL_AEN 0x000000000000000fUL /* Slot PCI arbitration enables */
+#define SABRE_PIOAFSR 0x2010UL
+#define SABRE_PIOAFSR_PMA 0x8000000000000000UL /* Primary Master Abort */
+#define SABRE_PIOAFSR_PTA 0x4000000000000000UL /* Primary Target Abort */
+#define SABRE_PIOAFSR_PRTRY 0x2000000000000000UL /* Primary Excessive Retries */
+#define SABRE_PIOAFSR_PPERR 0x1000000000000000UL /* Primary Parity Error */
+#define SABRE_PIOAFSR_SMA 0x0800000000000000UL /* Secondary Master Abort */
+#define SABRE_PIOAFSR_STA 0x0400000000000000UL /* Secondary Target Abort */
+#define SABRE_PIOAFSR_SRTRY 0x0200000000000000UL /* Secondary Excessive Retries */
+#define SABRE_PIOAFSR_SPERR 0x0100000000000000UL /* Secondary Parity Error */
+#define SABRE_PIOAFSR_BMSK 0x0000ffff00000000UL /* Byte Mask */
+#define SABRE_PIOAFSR_BLK 0x0000000080000000UL /* Was Block Operation */
+#define SABRE_PIOAFAR 0x2018UL
+#define SABRE_PCIDIAG 0x2020UL
+#define SABRE_PCIDIAG_DRTRY 0x0000000000000040UL /* Disable PIO Retry Limit */
+#define SABRE_PCIDIAG_IPAPAR 0x0000000000000008UL /* Invert PIO Address Parity */
+#define SABRE_PCIDIAG_IPDPAR 0x0000000000000004UL /* Invert PIO Data Parity */
+#define SABRE_PCIDIAG_IDDPAR 0x0000000000000002UL /* Invert DMA Data Parity */
+#define SABRE_PCIDIAG_ELPBK 0x0000000000000001UL /* Loopback Enable - not supported */
+#define SABRE_PCITASR 0x2028UL
+#define SABRE_PCITASR_EF 0x0000000000000080UL /* Respond to 0xe0000000-0xffffffff */
+#define SABRE_PCITASR_CD 0x0000000000000040UL /* Respond to 0xc0000000-0xdfffffff */
+#define SABRE_PCITASR_AB 0x0000000000000020UL /* Respond to 0xa0000000-0xbfffffff */
+#define SABRE_PCITASR_89 0x0000000000000010UL /* Respond to 0x80000000-0x9fffffff */
+#define SABRE_PCITASR_67 0x0000000000000008UL /* Respond to 0x60000000-0x7fffffff */
+#define SABRE_PCITASR_45 0x0000000000000004UL /* Respond to 0x40000000-0x5fffffff */
+#define SABRE_PCITASR_23 0x0000000000000002UL /* Respond to 0x20000000-0x3fffffff */
+#define SABRE_PCITASR_01 0x0000000000000001UL /* Respond to 0x00000000-0x1fffffff */
+#define SABRE_PIOBUF_DIAG 0x5000UL
+#define SABRE_DMABUF_DIAGLO 0x5100UL
+#define SABRE_DMABUF_DIAGHI 0x51c0UL
+#define SABRE_IMAP_GFX_ALIAS 0x6000UL /* Aliases to 0x1098 */
+#define SABRE_IMAP_EUPA_ALIAS 0x8000UL /* Aliases to 0x10a0 */
+#define SABRE_IOMMU_VADIAG 0xa400UL
+#define SABRE_IOMMU_TCDIAG 0xa408UL
+#define SABRE_IOMMU_TAG 0xa580UL
+#define SABRE_IOMMUTAG_ERRSTS 0x0000000001800000UL /* Error status bits */
+#define SABRE_IOMMUTAG_ERR 0x0000000000400000UL /* Error present */
+#define SABRE_IOMMUTAG_WRITE 0x0000000000200000UL /* Page is writable */
+#define SABRE_IOMMUTAG_STREAM 0x0000000000100000UL /* Streamable bit - unused */
+#define SABRE_IOMMUTAG_SIZE 0x0000000000080000UL /* 0=8k 1=16k */
+#define SABRE_IOMMUTAG_VPN 0x000000000007ffffUL /* Virtual Page Number [31:13] */
+#define SABRE_IOMMU_DATA 0xa600UL
+#define SABRE_IOMMUDATA_VALID 0x0000000040000000UL /* Valid */
+#define SABRE_IOMMUDATA_USED 0x0000000020000000UL /* Used (for LRU algorithm) */
+#define SABRE_IOMMUDATA_CACHE 0x0000000010000000UL /* Cacheable */
+#define SABRE_IOMMUDATA_PPN 0x00000000001fffffUL /* Physical Page Number [33:13] */
+#define SABRE_PCI_IRQSTATE 0xa800UL
+#define SABRE_OBIO_IRQSTATE 0xa808UL
+#define SABRE_FFBCFG 0xf000UL
+#define SABRE_FFBCFG_SPRQS 0x000000000f000000 /* Slave P_RQST queue size */
+#define SABRE_FFBCFG_ONEREAD 0x0000000000004000 /* Slave supports one outstanding read */
+#define SABRE_MCCTRL0 0xf010UL
+#define SABRE_MCCTRL0_RENAB 0x0000000080000000 /* Refresh Enable */
+#define SABRE_MCCTRL0_EENAB 0x0000000010000000 /* Enable all ECC functions */
+#define SABRE_MCCTRL0_11BIT 0x0000000000001000 /* Enable 11-bit column addressing */
+#define SABRE_MCCTRL0_DPP 0x0000000000000f00 /* DIMM Pair Present Bits */
+#define SABRE_MCCTRL0_RINTVL 0x00000000000000ff /* Refresh Interval */
+#define SABRE_MCCTRL1 0xf018UL
+#define SABRE_MCCTRL1_AMDC 0x0000000038000000 /* Advance Memdata Clock */
+#define SABRE_MCCTRL1_ARDC 0x0000000007000000 /* Advance DRAM Read Data Clock */
+#define SABRE_MCCTRL1_CSR 0x0000000000e00000 /* CAS to RAS delay for CBR refresh */
+#define SABRE_MCCTRL1_CASRW 0x00000000001c0000 /* CAS length for read/write */
+#define SABRE_MCCTRL1_RCD 0x0000000000038000 /* RAS to CAS delay */
+#define SABRE_MCCTRL1_CP 0x0000000000007000 /* CAS Precharge */
+#define SABRE_MCCTRL1_RP 0x0000000000000e00 /* RAS Precharge */
+#define SABRE_MCCTRL1_RAS 0x00000000000001c0 /* Length of RAS for refresh */
+#define SABRE_MCCTRL1_CASRW2 0x0000000000000038 /* Must be same as CASRW */
+#define SABRE_MCCTRL1_RSC 0x0000000000000007 /* RAS after CAS hold time */
+#define SABRE_RESETCTRL 0xf020UL
+
+#define SABRE_CONFIGSPACE 0x001000000UL
+#define SABRE_IOSPACE 0x002000000UL
+#define SABRE_IOSPACE_SIZE 0x000ffffffUL
+#define SABRE_MEMSPACE 0x100000000UL
+#define SABRE_MEMSPACE_SIZE 0x07fffffffUL
+
+static int hummingbird_p;
+static struct pci_bus *sabre_root_bus;
+
+static irqreturn_t sabre_ue_intr(int irq, void *dev_id)
+{
+ struct pci_pbm_info *pbm = dev_id;
+ unsigned long afsr_reg = pbm->controller_regs + SABRE_UE_AFSR;
+ unsigned long afar_reg = pbm->controller_regs + SABRE_UECE_AFAR;
+ unsigned long afsr, afar, error_bits;
+ int reported;
+
+ /* Latch uncorrectable error status. */
+ afar = upa_readq(afar_reg);
+ afsr = upa_readq(afsr_reg);
+
+ /* Clear the primary/secondary error status bits. */
+ error_bits = afsr &
+ (SABRE_UEAFSR_PDRD | SABRE_UEAFSR_PDWR |
+ SABRE_UEAFSR_SDRD | SABRE_UEAFSR_SDWR |
+ SABRE_UEAFSR_SDTE | SABRE_UEAFSR_PDTE);
+ if (!error_bits)
+ return IRQ_NONE;
+ upa_writeq(error_bits, afsr_reg);
+
+ /* Log the error. */
+ printk("%s: Uncorrectable Error, primary error type[%s%s]\n",
+ pbm->name,
+ ((error_bits & SABRE_UEAFSR_PDRD) ?
+ "DMA Read" :
+ ((error_bits & SABRE_UEAFSR_PDWR) ?
+ "DMA Write" : "???")),
+ ((error_bits & SABRE_UEAFSR_PDTE) ?
+ ":Translation Error" : ""));
+ printk("%s: bytemask[%04lx] dword_offset[%lx] was_block(%d)\n",
+ pbm->name,
+ (afsr & SABRE_UEAFSR_BMSK) >> 32UL,
+ (afsr & SABRE_UEAFSR_OFF) >> 29UL,
+ ((afsr & SABRE_UEAFSR_BLK) ? 1 : 0));
+ printk("%s: UE AFAR [%016lx]\n", pbm->name, afar);
+ printk("%s: UE Secondary errors [", pbm->name);
+ reported = 0;
+ if (afsr & SABRE_UEAFSR_SDRD) {
+ reported++;
+ printk("(DMA Read)");
+ }
+ if (afsr & SABRE_UEAFSR_SDWR) {
+ reported++;
+ printk("(DMA Write)");
+ }
+ if (afsr & SABRE_UEAFSR_SDTE) {
+ reported++;
+ printk("(Translation Error)");
+ }
+ if (!reported)
+ printk("(none)");
+ printk("]\n");
+
+ /* Interrogate IOMMU for error status. */
+ psycho_check_iommu_error(pbm, afsr, afar, UE_ERR);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t sabre_ce_intr(int irq, void *dev_id)
+{
+ struct pci_pbm_info *pbm = dev_id;
+ unsigned long afsr_reg = pbm->controller_regs + SABRE_CE_AFSR;
+ unsigned long afar_reg = pbm->controller_regs + SABRE_UECE_AFAR;
+ unsigned long afsr, afar, error_bits;
+ int reported;
+
+ /* Latch error status. */
+ afar = upa_readq(afar_reg);
+ afsr = upa_readq(afsr_reg);
+
+ /* Clear primary/secondary error status bits. */
+ error_bits = afsr &
+ (SABRE_CEAFSR_PDRD | SABRE_CEAFSR_PDWR |
+ SABRE_CEAFSR_SDRD | SABRE_CEAFSR_SDWR);
+ if (!error_bits)
+ return IRQ_NONE;
+ upa_writeq(error_bits, afsr_reg);
+
+ /* Log the error. */
+ printk("%s: Correctable Error, primary error type[%s]\n",
+ pbm->name,
+ ((error_bits & SABRE_CEAFSR_PDRD) ?
+ "DMA Read" :
+ ((error_bits & SABRE_CEAFSR_PDWR) ?
+ "DMA Write" : "???")));
+
+ /* XXX Use syndrome and afar to print out module string just like
+ * XXX UDB CE trap handler does... -DaveM
+ */
+ printk("%s: syndrome[%02lx] bytemask[%04lx] dword_offset[%lx] "
+ "was_block(%d)\n",
+ pbm->name,
+ (afsr & SABRE_CEAFSR_ESYND) >> 48UL,
+ (afsr & SABRE_CEAFSR_BMSK) >> 32UL,
+ (afsr & SABRE_CEAFSR_OFF) >> 29UL,
+ ((afsr & SABRE_CEAFSR_BLK) ? 1 : 0));
+ printk("%s: CE AFAR [%016lx]\n", pbm->name, afar);
+ printk("%s: CE Secondary errors [", pbm->name);
+ reported = 0;
+ if (afsr & SABRE_CEAFSR_SDRD) {
+ reported++;
+ printk("(DMA Read)");
+ }
+ if (afsr & SABRE_CEAFSR_SDWR) {
+ reported++;
+ printk("(DMA Write)");
+ }
+ if (!reported)
+ printk("(none)");
+ printk("]\n");
+
+ return IRQ_HANDLED;
+}
+
+static void sabre_register_error_handlers(struct pci_pbm_info *pbm)
+{
+ struct device_node *dp = pbm->op->node;
+ struct of_device *op;
+ unsigned long base = pbm->controller_regs;
+ u64 tmp;
+ int err;
+
+ if (pbm->chip_type == PBM_CHIP_TYPE_SABRE)
+ dp = dp->parent;
+
+ op = of_find_device_by_node(dp);
+ if (!op)
+ return;
+
+ /* Sabre/Hummingbird IRQ property layout is:
+ * 0: PCI ERR
+ * 1: UE ERR
+ * 2: CE ERR
+ * 3: POWER FAIL
+ */
+ if (op->num_irqs < 4)
+ return;
+
+ /* We clear the error bits in the appropriate AFSR before
+ * registering the handler so that we don't get spurious
+ * interrupts.
+ */
+ upa_writeq((SABRE_UEAFSR_PDRD | SABRE_UEAFSR_PDWR |
+ SABRE_UEAFSR_SDRD | SABRE_UEAFSR_SDWR |
+ SABRE_UEAFSR_SDTE | SABRE_UEAFSR_PDTE),
+ base + SABRE_UE_AFSR);
+
+ err = request_irq(op->irqs[1], sabre_ue_intr, 0, "SABRE_UE", pbm);
+ if (err)
+ printk(KERN_WARNING "%s: Couldn't register UE, err=%d.\n",
+ pbm->name, err);
+
+ upa_writeq((SABRE_CEAFSR_PDRD | SABRE_CEAFSR_PDWR |
+ SABRE_CEAFSR_SDRD | SABRE_CEAFSR_SDWR),
+ base + SABRE_CE_AFSR);
+
+
+ err = request_irq(op->irqs[2], sabre_ce_intr, 0, "SABRE_CE", pbm);
+ if (err)
+ printk(KERN_WARNING "%s: Couldn't register CE, err=%d.\n",
+ pbm->name, err);
+ err = request_irq(op->irqs[0], psycho_pcierr_intr, 0,
+ "SABRE_PCIERR", pbm);
+ if (err)
+ printk(KERN_WARNING "%s: Couldn't register PCIERR, err=%d.\n",
+ pbm->name, err);
+
+ tmp = upa_readq(base + SABRE_PCICTRL);
+ tmp |= SABRE_PCICTRL_ERREN;
+ upa_writeq(tmp, base + SABRE_PCICTRL);
+}
+
+static void apb_init(struct pci_bus *sabre_bus)
+{
+ struct pci_dev *pdev;
+
+ list_for_each_entry(pdev, &sabre_bus->devices, bus_list) {
+ if (pdev->vendor == PCI_VENDOR_ID_SUN &&
+ pdev->device == PCI_DEVICE_ID_SUN_SIMBA) {
+ u16 word16;
+
+ pci_read_config_word(pdev, PCI_COMMAND, &word16);
+ word16 |= PCI_COMMAND_SERR | PCI_COMMAND_PARITY |
+ PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY |
+ PCI_COMMAND_IO;
+ pci_write_config_word(pdev, PCI_COMMAND, word16);
+
+ /* Status register bits are "write 1 to clear". */
+ pci_write_config_word(pdev, PCI_STATUS, 0xffff);
+ pci_write_config_word(pdev, PCI_SEC_STATUS, 0xffff);
+
+ /* Use a primary/seconday latency timer value
+ * of 64.
+ */
+ pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 64);
+ pci_write_config_byte(pdev, PCI_SEC_LATENCY_TIMER, 64);
+
+ /* Enable reporting/forwarding of master aborts,
+ * parity, and SERR.
+ */
+ pci_write_config_byte(pdev, PCI_BRIDGE_CONTROL,
+ (PCI_BRIDGE_CTL_PARITY |
+ PCI_BRIDGE_CTL_SERR |
+ PCI_BRIDGE_CTL_MASTER_ABORT));
+ }
+ }
+}
+
+static void __init sabre_scan_bus(struct pci_pbm_info *pbm,
+ struct device *parent)
+{
+ static int once;
+
+ /* The APB bridge speaks to the Sabre host PCI bridge
+ * at 66Mhz, but the front side of APB runs at 33Mhz
+ * for both segments.
+ *
+ * Hummingbird systems do not use APB, so they run
+ * at 66MHZ.
+ */
+ if (hummingbird_p)
+ pbm->is_66mhz_capable = 1;
+ else
+ pbm->is_66mhz_capable = 0;
+
+ /* This driver has not been verified to handle
+ * multiple SABREs yet, so trap this.
+ *
+ * Also note that the SABRE host bridge is hardwired
+ * to live at bus 0.
+ */
+ if (once != 0) {
+ printk(KERN_ERR PFX "Multiple controllers unsupported.\n");
+ return;
+ }
+ once++;
+
+ pbm->pci_bus = pci_scan_one_pbm(pbm, parent);
+ if (!pbm->pci_bus)
+ return;
+
+ sabre_root_bus = pbm->pci_bus;
+
+ apb_init(pbm->pci_bus);
+
+ sabre_register_error_handlers(pbm);
+}
+
+static void __init sabre_pbm_init(struct pci_pbm_info *pbm,
+ struct of_device *op)
+{
+ psycho_pbm_init_common(pbm, op, "SABRE", PBM_CHIP_TYPE_SABRE);
+ pbm->pci_afsr = pbm->controller_regs + SABRE_PIOAFSR;
+ pbm->pci_afar = pbm->controller_regs + SABRE_PIOAFAR;
+ pbm->pci_csr = pbm->controller_regs + SABRE_PCICTRL;
+ sabre_scan_bus(pbm, &op->dev);
+}
+
+static int __devinit sabre_probe(struct of_device *op,
+ const struct of_device_id *match)
+{
+ const struct linux_prom64_registers *pr_regs;
+ struct device_node *dp = op->node;
+ struct pci_pbm_info *pbm;
+ u32 upa_portid, dma_mask;
+ struct iommu *iommu;
+ int tsbsize, err;
+ const u32 *vdma;
+ u64 clear_irq;
+
+ hummingbird_p = (match->data != NULL);
+ if (!hummingbird_p) {
+ struct device_node *cpu_dp;
+
+ /* Of course, Sun has to encode things a thousand
+ * different ways, inconsistently.
+ */
+ for_each_node_by_type(cpu_dp, "cpu") {
+ if (!strcmp(cpu_dp->name, "SUNW,UltraSPARC-IIe"))
+ hummingbird_p = 1;
+ }
+ }
+
+ err = -ENOMEM;
+ pbm = kzalloc(sizeof(*pbm), GFP_KERNEL);
+ if (!pbm) {
+ printk(KERN_ERR PFX "Cannot allocate pci_pbm_info.\n");
+ goto out_err;
+ }
+
+ iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
+ if (!iommu) {
+ printk(KERN_ERR PFX "Cannot allocate PBM iommu.\n");
+ goto out_free_controller;
+ }
+
+ pbm->iommu = iommu;
+
+ upa_portid = of_getintprop_default(dp, "upa-portid", 0xff);
+
+ pbm->portid = upa_portid;
+
+ /*
+ * Map in SABRE register set and report the presence of this SABRE.
+ */
+
+ pr_regs = of_get_property(dp, "reg", NULL);
+ err = -ENODEV;
+ if (!pr_regs) {
+ printk(KERN_ERR PFX "No reg property\n");
+ goto out_free_iommu;
+ }
+
+ /*
+ * First REG in property is base of entire SABRE register space.
+ */
+ pbm->controller_regs = pr_regs[0].phys_addr;
+
+ /* Clear interrupts */
+
+ /* PCI first */
+ for (clear_irq = SABRE_ICLR_A_SLOT0; clear_irq < SABRE_ICLR_B_SLOT0 + 0x80; clear_irq += 8)
+ upa_writeq(0x0UL, pbm->controller_regs + clear_irq);
+
+ /* Then OBIO */
+ for (clear_irq = SABRE_ICLR_SCSI; clear_irq < SABRE_ICLR_SCSI + 0x80; clear_irq += 8)
+ upa_writeq(0x0UL, pbm->controller_regs + clear_irq);
+
+ /* Error interrupts are enabled later after the bus scan. */
+ upa_writeq((SABRE_PCICTRL_MRLEN | SABRE_PCICTRL_SERR |
+ SABRE_PCICTRL_ARBPARK | SABRE_PCICTRL_AEN),
+ pbm->controller_regs + SABRE_PCICTRL);
+
+ /* Now map in PCI config space for entire SABRE. */
+ pbm->config_space = pbm->controller_regs + SABRE_CONFIGSPACE;
+
+ vdma = of_get_property(dp, "virtual-dma", NULL);
+ if (!vdma) {
+ printk(KERN_ERR PFX "No virtual-dma property\n");
+ goto out_free_iommu;
+ }
+
+ dma_mask = vdma[0];
+ switch(vdma[1]) {
+ case 0x20000000:
+ dma_mask |= 0x1fffffff;
+ tsbsize = 64;
+ break;
+ case 0x40000000:
+ dma_mask |= 0x3fffffff;
+ tsbsize = 128;
+ break;
+
+ case 0x80000000:
+ dma_mask |= 0x7fffffff;
+ tsbsize = 128;
+ break;
+ default:
+ printk(KERN_ERR PFX "Strange virtual-dma size.\n");
+ goto out_free_iommu;
+ }
+
+ err = psycho_iommu_init(pbm, tsbsize, vdma[0], dma_mask, SABRE_WRSYNC);
+ if (err)
+ goto out_free_iommu;
+
+ /*
+ * Look for APB underneath.
+ */
+ sabre_pbm_init(pbm, op);
+
+ pbm->next = pci_pbm_root;
+ pci_pbm_root = pbm;
+
+ dev_set_drvdata(&op->dev, pbm);
+
+ return 0;
+
+out_free_iommu:
+ kfree(pbm->iommu);
+
+out_free_controller:
+ kfree(pbm);
+
+out_err:
+ return err;
+}
+
+static struct of_device_id __initdata sabre_match[] = {
+ {
+ .name = "pci",
+ .compatible = "pci108e,a001",
+ .data = (void *) 1,
+ },
+ {
+ .name = "pci",
+ .compatible = "pci108e,a000",
+ },
+ {},
+};
+
+static struct of_platform_driver sabre_driver = {
+ .name = DRIVER_NAME,
+ .match_table = sabre_match,
+ .probe = sabre_probe,
+};
+
+static int __init sabre_init(void)
+{
+ return of_register_driver(&sabre_driver, &of_bus_type);
+}
+
+subsys_initcall(sabre_init);
diff --git a/arch/sparc/kernel/pci_schizo.c b/arch/sparc/kernel/pci_schizo.c
new file mode 100644
index 000000000000..45d9dba1ba11
--- /dev/null
+++ b/arch/sparc/kernel/pci_schizo.c
@@ -0,0 +1,1504 @@
+/* pci_schizo.c: SCHIZO/TOMATILLO specific PCI controller support.
+ *
+ * Copyright (C) 2001, 2002, 2003, 2007, 2008 David S. Miller (davem@davemloft.net)
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/of_device.h>
+
+#include <asm/iommu.h>
+#include <asm/irq.h>
+#include <asm/pstate.h>
+#include <asm/prom.h>
+#include <asm/upa.h>
+
+#include "pci_impl.h"
+#include "iommu_common.h"
+
+#define DRIVER_NAME "schizo"
+#define PFX DRIVER_NAME ": "
+
+/* This is a convention that at least Excalibur and Merlin
+ * follow. I suppose the SCHIZO used in Starcat and friends
+ * will do similar.
+ *
+ * The only way I could see this changing is if the newlink
+ * block requires more space in Schizo's address space than
+ * they predicted, thus requiring an address space reorg when
+ * the newer Schizo is taped out.
+ */
+
+/* Streaming buffer control register. */
+#define SCHIZO_STRBUF_CTRL_LPTR 0x00000000000000f0UL /* LRU Lock Pointer */
+#define SCHIZO_STRBUF_CTRL_LENAB 0x0000000000000008UL /* LRU Lock Enable */
+#define SCHIZO_STRBUF_CTRL_RRDIS 0x0000000000000004UL /* Rerun Disable */
+#define SCHIZO_STRBUF_CTRL_DENAB 0x0000000000000002UL /* Diagnostic Mode Enable */
+#define SCHIZO_STRBUF_CTRL_ENAB 0x0000000000000001UL /* Streaming Buffer Enable */
+
+/* IOMMU control register. */
+#define SCHIZO_IOMMU_CTRL_RESV 0xfffffffff9000000UL /* Reserved */
+#define SCHIZO_IOMMU_CTRL_XLTESTAT 0x0000000006000000UL /* Translation Error Status */
+#define SCHIZO_IOMMU_CTRL_XLTEERR 0x0000000001000000UL /* Translation Error encountered */
+#define SCHIZO_IOMMU_CTRL_LCKEN 0x0000000000800000UL /* Enable translation locking */
+#define SCHIZO_IOMMU_CTRL_LCKPTR 0x0000000000780000UL /* Translation lock pointer */
+#define SCHIZO_IOMMU_CTRL_TSBSZ 0x0000000000070000UL /* TSB Size */
+#define SCHIZO_IOMMU_TSBSZ_1K 0x0000000000000000UL /* TSB Table 1024 8-byte entries */
+#define SCHIZO_IOMMU_TSBSZ_2K 0x0000000000010000UL /* TSB Table 2048 8-byte entries */
+#define SCHIZO_IOMMU_TSBSZ_4K 0x0000000000020000UL /* TSB Table 4096 8-byte entries */
+#define SCHIZO_IOMMU_TSBSZ_8K 0x0000000000030000UL /* TSB Table 8192 8-byte entries */
+#define SCHIZO_IOMMU_TSBSZ_16K 0x0000000000040000UL /* TSB Table 16k 8-byte entries */
+#define SCHIZO_IOMMU_TSBSZ_32K 0x0000000000050000UL /* TSB Table 32k 8-byte entries */
+#define SCHIZO_IOMMU_TSBSZ_64K 0x0000000000060000UL /* TSB Table 64k 8-byte entries */
+#define SCHIZO_IOMMU_TSBSZ_128K 0x0000000000070000UL /* TSB Table 128k 8-byte entries */
+#define SCHIZO_IOMMU_CTRL_RESV2 0x000000000000fff8UL /* Reserved */
+#define SCHIZO_IOMMU_CTRL_TBWSZ 0x0000000000000004UL /* Assumed page size, 0=8k 1=64k */
+#define SCHIZO_IOMMU_CTRL_DENAB 0x0000000000000002UL /* Diagnostic mode enable */
+#define SCHIZO_IOMMU_CTRL_ENAB 0x0000000000000001UL /* IOMMU Enable */
+
+/* Schizo config space address format is nearly identical to
+ * that of PSYCHO:
+ *
+ * 32 24 23 16 15 11 10 8 7 2 1 0
+ * ---------------------------------------------------------
+ * |0 0 0 0 0 0 0 0 0| bus | device | function | reg | 0 0 |
+ * ---------------------------------------------------------
+ */
+#define SCHIZO_CONFIG_BASE(PBM) ((PBM)->config_space)
+#define SCHIZO_CONFIG_ENCODE(BUS, DEVFN, REG) \
+ (((unsigned long)(BUS) << 16) | \
+ ((unsigned long)(DEVFN) << 8) | \
+ ((unsigned long)(REG)))
+
+static void *schizo_pci_config_mkaddr(struct pci_pbm_info *pbm,
+ unsigned char bus,
+ unsigned int devfn,
+ int where)
+{
+ if (!pbm)
+ return NULL;
+ bus -= pbm->pci_first_busno;
+ return (void *)
+ (SCHIZO_CONFIG_BASE(pbm) |
+ SCHIZO_CONFIG_ENCODE(bus, devfn, where));
+}
+
+/* SCHIZO error handling support. */
+enum schizo_error_type {
+ UE_ERR, CE_ERR, PCI_ERR, SAFARI_ERR
+};
+
+static DEFINE_SPINLOCK(stc_buf_lock);
+static unsigned long stc_error_buf[128];
+static unsigned long stc_tag_buf[16];
+static unsigned long stc_line_buf[16];
+
+#define SCHIZO_UE_INO 0x30 /* Uncorrectable ECC error */
+#define SCHIZO_CE_INO 0x31 /* Correctable ECC error */
+#define SCHIZO_PCIERR_A_INO 0x32 /* PBM A PCI bus error */
+#define SCHIZO_PCIERR_B_INO 0x33 /* PBM B PCI bus error */
+#define SCHIZO_SERR_INO 0x34 /* Safari interface error */
+
+#define SCHIZO_STC_ERR 0xb800UL /* --> 0xba00 */
+#define SCHIZO_STC_TAG 0xba00UL /* --> 0xba80 */
+#define SCHIZO_STC_LINE 0xbb00UL /* --> 0xbb80 */
+
+#define SCHIZO_STCERR_WRITE 0x2UL
+#define SCHIZO_STCERR_READ 0x1UL
+
+#define SCHIZO_STCTAG_PPN 0x3fffffff00000000UL
+#define SCHIZO_STCTAG_VPN 0x00000000ffffe000UL
+#define SCHIZO_STCTAG_VALID 0x8000000000000000UL
+#define SCHIZO_STCTAG_READ 0x4000000000000000UL
+
+#define SCHIZO_STCLINE_LINDX 0x0000000007800000UL
+#define SCHIZO_STCLINE_SPTR 0x000000000007e000UL
+#define SCHIZO_STCLINE_LADDR 0x0000000000001fc0UL
+#define SCHIZO_STCLINE_EPTR 0x000000000000003fUL
+#define SCHIZO_STCLINE_VALID 0x0000000000600000UL
+#define SCHIZO_STCLINE_FOFN 0x0000000000180000UL
+
+static void __schizo_check_stc_error_pbm(struct pci_pbm_info *pbm,
+ enum schizo_error_type type)
+{
+ struct strbuf *strbuf = &pbm->stc;
+ unsigned long regbase = pbm->pbm_regs;
+ unsigned long err_base, tag_base, line_base;
+ u64 control;
+ int i;
+
+ err_base = regbase + SCHIZO_STC_ERR;
+ tag_base = regbase + SCHIZO_STC_TAG;
+ line_base = regbase + SCHIZO_STC_LINE;
+
+ spin_lock(&stc_buf_lock);
+
+ /* This is __REALLY__ dangerous. When we put the
+ * streaming buffer into diagnostic mode to probe
+ * it's tags and error status, we _must_ clear all
+ * of the line tag valid bits before re-enabling
+ * the streaming buffer. If any dirty data lives
+ * in the STC when we do this, we will end up
+ * invalidating it before it has a chance to reach
+ * main memory.
+ */
+ control = upa_readq(strbuf->strbuf_control);
+ upa_writeq((control | SCHIZO_STRBUF_CTRL_DENAB),
+ strbuf->strbuf_control);
+ for (i = 0; i < 128; i++) {
+ unsigned long val;
+
+ val = upa_readq(err_base + (i * 8UL));
+ upa_writeq(0UL, err_base + (i * 8UL));
+ stc_error_buf[i] = val;
+ }
+ for (i = 0; i < 16; i++) {
+ stc_tag_buf[i] = upa_readq(tag_base + (i * 8UL));
+ stc_line_buf[i] = upa_readq(line_base + (i * 8UL));
+ upa_writeq(0UL, tag_base + (i * 8UL));
+ upa_writeq(0UL, line_base + (i * 8UL));
+ }
+
+ /* OK, state is logged, exit diagnostic mode. */
+ upa_writeq(control, strbuf->strbuf_control);
+
+ for (i = 0; i < 16; i++) {
+ int j, saw_error, first, last;
+
+ saw_error = 0;
+ first = i * 8;
+ last = first + 8;
+ for (j = first; j < last; j++) {
+ unsigned long errval = stc_error_buf[j];
+ if (errval != 0) {
+ saw_error++;
+ printk("%s: STC_ERR(%d)[wr(%d)rd(%d)]\n",
+ pbm->name,
+ j,
+ (errval & SCHIZO_STCERR_WRITE) ? 1 : 0,
+ (errval & SCHIZO_STCERR_READ) ? 1 : 0);
+ }
+ }
+ if (saw_error != 0) {
+ unsigned long tagval = stc_tag_buf[i];
+ unsigned long lineval = stc_line_buf[i];
+ printk("%s: STC_TAG(%d)[PA(%016lx)VA(%08lx)V(%d)R(%d)]\n",
+ pbm->name,
+ i,
+ ((tagval & SCHIZO_STCTAG_PPN) >> 19UL),
+ (tagval & SCHIZO_STCTAG_VPN),
+ ((tagval & SCHIZO_STCTAG_VALID) ? 1 : 0),
+ ((tagval & SCHIZO_STCTAG_READ) ? 1 : 0));
+
+ /* XXX Should spit out per-bank error information... -DaveM */
+ printk("%s: STC_LINE(%d)[LIDX(%lx)SP(%lx)LADDR(%lx)EP(%lx)"
+ "V(%d)FOFN(%d)]\n",
+ pbm->name,
+ i,
+ ((lineval & SCHIZO_STCLINE_LINDX) >> 23UL),
+ ((lineval & SCHIZO_STCLINE_SPTR) >> 13UL),
+ ((lineval & SCHIZO_STCLINE_LADDR) >> 6UL),
+ ((lineval & SCHIZO_STCLINE_EPTR) >> 0UL),
+ ((lineval & SCHIZO_STCLINE_VALID) ? 1 : 0),
+ ((lineval & SCHIZO_STCLINE_FOFN) ? 1 : 0));
+ }
+ }
+
+ spin_unlock(&stc_buf_lock);
+}
+
+/* IOMMU is per-PBM in Schizo, so interrogate both for anonymous
+ * controller level errors.
+ */
+
+#define SCHIZO_IOMMU_TAG 0xa580UL
+#define SCHIZO_IOMMU_DATA 0xa600UL
+
+#define SCHIZO_IOMMU_TAG_CTXT 0x0000001ffe000000UL
+#define SCHIZO_IOMMU_TAG_ERRSTS 0x0000000001800000UL
+#define SCHIZO_IOMMU_TAG_ERR 0x0000000000400000UL
+#define SCHIZO_IOMMU_TAG_WRITE 0x0000000000200000UL
+#define SCHIZO_IOMMU_TAG_STREAM 0x0000000000100000UL
+#define SCHIZO_IOMMU_TAG_SIZE 0x0000000000080000UL
+#define SCHIZO_IOMMU_TAG_VPAGE 0x000000000007ffffUL
+
+#define SCHIZO_IOMMU_DATA_VALID 0x0000000100000000UL
+#define SCHIZO_IOMMU_DATA_CACHE 0x0000000040000000UL
+#define SCHIZO_IOMMU_DATA_PPAGE 0x000000003fffffffUL
+
+static void schizo_check_iommu_error_pbm(struct pci_pbm_info *pbm,
+ enum schizo_error_type type)
+{
+ struct iommu *iommu = pbm->iommu;
+ unsigned long iommu_tag[16];
+ unsigned long iommu_data[16];
+ unsigned long flags;
+ u64 control;
+ int i;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+ control = upa_readq(iommu->iommu_control);
+ if (control & SCHIZO_IOMMU_CTRL_XLTEERR) {
+ unsigned long base;
+ char *type_string;
+
+ /* Clear the error encountered bit. */
+ control &= ~SCHIZO_IOMMU_CTRL_XLTEERR;
+ upa_writeq(control, iommu->iommu_control);
+
+ switch((control & SCHIZO_IOMMU_CTRL_XLTESTAT) >> 25UL) {
+ case 0:
+ type_string = "Protection Error";
+ break;
+ case 1:
+ type_string = "Invalid Error";
+ break;
+ case 2:
+ type_string = "TimeOut Error";
+ break;
+ case 3:
+ default:
+ type_string = "ECC Error";
+ break;
+ };
+ printk("%s: IOMMU Error, type[%s]\n",
+ pbm->name, type_string);
+
+ /* Put the IOMMU into diagnostic mode and probe
+ * it's TLB for entries with error status.
+ *
+ * It is very possible for another DVMA to occur
+ * while we do this probe, and corrupt the system
+ * further. But we are so screwed at this point
+ * that we are likely to crash hard anyways, so
+ * get as much diagnostic information to the
+ * console as we can.
+ */
+ upa_writeq(control | SCHIZO_IOMMU_CTRL_DENAB,
+ iommu->iommu_control);
+
+ base = pbm->pbm_regs;
+
+ for (i = 0; i < 16; i++) {
+ iommu_tag[i] =
+ upa_readq(base + SCHIZO_IOMMU_TAG + (i * 8UL));
+ iommu_data[i] =
+ upa_readq(base + SCHIZO_IOMMU_DATA + (i * 8UL));
+
+ /* Now clear out the entry. */
+ upa_writeq(0, base + SCHIZO_IOMMU_TAG + (i * 8UL));
+ upa_writeq(0, base + SCHIZO_IOMMU_DATA + (i * 8UL));
+ }
+
+ /* Leave diagnostic mode. */
+ upa_writeq(control, iommu->iommu_control);
+
+ for (i = 0; i < 16; i++) {
+ unsigned long tag, data;
+
+ tag = iommu_tag[i];
+ if (!(tag & SCHIZO_IOMMU_TAG_ERR))
+ continue;
+
+ data = iommu_data[i];
+ switch((tag & SCHIZO_IOMMU_TAG_ERRSTS) >> 23UL) {
+ case 0:
+ type_string = "Protection Error";
+ break;
+ case 1:
+ type_string = "Invalid Error";
+ break;
+ case 2:
+ type_string = "TimeOut Error";
+ break;
+ case 3:
+ default:
+ type_string = "ECC Error";
+ break;
+ };
+ printk("%s: IOMMU TAG(%d)[error(%s) ctx(%x) wr(%d) str(%d) "
+ "sz(%dK) vpg(%08lx)]\n",
+ pbm->name, i, type_string,
+ (int)((tag & SCHIZO_IOMMU_TAG_CTXT) >> 25UL),
+ ((tag & SCHIZO_IOMMU_TAG_WRITE) ? 1 : 0),
+ ((tag & SCHIZO_IOMMU_TAG_STREAM) ? 1 : 0),
+ ((tag & SCHIZO_IOMMU_TAG_SIZE) ? 64 : 8),
+ (tag & SCHIZO_IOMMU_TAG_VPAGE) << IOMMU_PAGE_SHIFT);
+ printk("%s: IOMMU DATA(%d)[valid(%d) cache(%d) ppg(%016lx)]\n",
+ pbm->name, i,
+ ((data & SCHIZO_IOMMU_DATA_VALID) ? 1 : 0),
+ ((data & SCHIZO_IOMMU_DATA_CACHE) ? 1 : 0),
+ (data & SCHIZO_IOMMU_DATA_PPAGE) << IOMMU_PAGE_SHIFT);
+ }
+ }
+ if (pbm->stc.strbuf_enabled)
+ __schizo_check_stc_error_pbm(pbm, type);
+ spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
+static void schizo_check_iommu_error(struct pci_pbm_info *pbm,
+ enum schizo_error_type type)
+{
+ schizo_check_iommu_error_pbm(pbm, type);
+ if (pbm->sibling)
+ schizo_check_iommu_error_pbm(pbm->sibling, type);
+}
+
+/* Uncorrectable ECC error status gathering. */
+#define SCHIZO_UE_AFSR 0x10030UL
+#define SCHIZO_UE_AFAR 0x10038UL
+
+#define SCHIZO_UEAFSR_PPIO 0x8000000000000000UL /* Safari */
+#define SCHIZO_UEAFSR_PDRD 0x4000000000000000UL /* Safari/Tomatillo */
+#define SCHIZO_UEAFSR_PDWR 0x2000000000000000UL /* Safari */
+#define SCHIZO_UEAFSR_SPIO 0x1000000000000000UL /* Safari */
+#define SCHIZO_UEAFSR_SDMA 0x0800000000000000UL /* Safari/Tomatillo */
+#define SCHIZO_UEAFSR_ERRPNDG 0x0300000000000000UL /* Safari */
+#define SCHIZO_UEAFSR_BMSK 0x000003ff00000000UL /* Safari */
+#define SCHIZO_UEAFSR_QOFF 0x00000000c0000000UL /* Safari/Tomatillo */
+#define SCHIZO_UEAFSR_AID 0x000000001f000000UL /* Safari/Tomatillo */
+#define SCHIZO_UEAFSR_PARTIAL 0x0000000000800000UL /* Safari */
+#define SCHIZO_UEAFSR_OWNEDIN 0x0000000000400000UL /* Safari */
+#define SCHIZO_UEAFSR_MTAGSYND 0x00000000000f0000UL /* Safari */
+#define SCHIZO_UEAFSR_MTAG 0x000000000000e000UL /* Safari */
+#define SCHIZO_UEAFSR_ECCSYND 0x00000000000001ffUL /* Safari */
+
+static irqreturn_t schizo_ue_intr(int irq, void *dev_id)
+{
+ struct pci_pbm_info *pbm = dev_id;
+ unsigned long afsr_reg = pbm->controller_regs + SCHIZO_UE_AFSR;
+ unsigned long afar_reg = pbm->controller_regs + SCHIZO_UE_AFAR;
+ unsigned long afsr, afar, error_bits;
+ int reported, limit;
+
+ /* Latch uncorrectable error status. */
+ afar = upa_readq(afar_reg);
+
+ /* If either of the error pending bits are set in the
+ * AFSR, the error status is being actively updated by
+ * the hardware and we must re-read to get a clean value.
+ */
+ limit = 1000;
+ do {
+ afsr = upa_readq(afsr_reg);
+ } while ((afsr & SCHIZO_UEAFSR_ERRPNDG) != 0 && --limit);
+
+ /* Clear the primary/secondary error status bits. */
+ error_bits = afsr &
+ (SCHIZO_UEAFSR_PPIO | SCHIZO_UEAFSR_PDRD | SCHIZO_UEAFSR_PDWR |
+ SCHIZO_UEAFSR_SPIO | SCHIZO_UEAFSR_SDMA);
+ if (!error_bits)
+ return IRQ_NONE;
+ upa_writeq(error_bits, afsr_reg);
+
+ /* Log the error. */
+ printk("%s: Uncorrectable Error, primary error type[%s]\n",
+ pbm->name,
+ (((error_bits & SCHIZO_UEAFSR_PPIO) ?
+ "PIO" :
+ ((error_bits & SCHIZO_UEAFSR_PDRD) ?
+ "DMA Read" :
+ ((error_bits & SCHIZO_UEAFSR_PDWR) ?
+ "DMA Write" : "???")))));
+ printk("%s: bytemask[%04lx] qword_offset[%lx] SAFARI_AID[%02lx]\n",
+ pbm->name,
+ (afsr & SCHIZO_UEAFSR_BMSK) >> 32UL,
+ (afsr & SCHIZO_UEAFSR_QOFF) >> 30UL,
+ (afsr & SCHIZO_UEAFSR_AID) >> 24UL);
+ printk("%s: partial[%d] owned_in[%d] mtag[%lx] mtag_synd[%lx] ecc_sync[%lx]\n",
+ pbm->name,
+ (afsr & SCHIZO_UEAFSR_PARTIAL) ? 1 : 0,
+ (afsr & SCHIZO_UEAFSR_OWNEDIN) ? 1 : 0,
+ (afsr & SCHIZO_UEAFSR_MTAG) >> 13UL,
+ (afsr & SCHIZO_UEAFSR_MTAGSYND) >> 16UL,
+ (afsr & SCHIZO_UEAFSR_ECCSYND) >> 0UL);
+ printk("%s: UE AFAR [%016lx]\n", pbm->name, afar);
+ printk("%s: UE Secondary errors [", pbm->name);
+ reported = 0;
+ if (afsr & SCHIZO_UEAFSR_SPIO) {
+ reported++;
+ printk("(PIO)");
+ }
+ if (afsr & SCHIZO_UEAFSR_SDMA) {
+ reported++;
+ printk("(DMA)");
+ }
+ if (!reported)
+ printk("(none)");
+ printk("]\n");
+
+ /* Interrogate IOMMU for error status. */
+ schizo_check_iommu_error(pbm, UE_ERR);
+
+ return IRQ_HANDLED;
+}
+
+#define SCHIZO_CE_AFSR 0x10040UL
+#define SCHIZO_CE_AFAR 0x10048UL
+
+#define SCHIZO_CEAFSR_PPIO 0x8000000000000000UL
+#define SCHIZO_CEAFSR_PDRD 0x4000000000000000UL
+#define SCHIZO_CEAFSR_PDWR 0x2000000000000000UL
+#define SCHIZO_CEAFSR_SPIO 0x1000000000000000UL
+#define SCHIZO_CEAFSR_SDMA 0x0800000000000000UL
+#define SCHIZO_CEAFSR_ERRPNDG 0x0300000000000000UL
+#define SCHIZO_CEAFSR_BMSK 0x000003ff00000000UL
+#define SCHIZO_CEAFSR_QOFF 0x00000000c0000000UL
+#define SCHIZO_CEAFSR_AID 0x000000001f000000UL
+#define SCHIZO_CEAFSR_PARTIAL 0x0000000000800000UL
+#define SCHIZO_CEAFSR_OWNEDIN 0x0000000000400000UL
+#define SCHIZO_CEAFSR_MTAGSYND 0x00000000000f0000UL
+#define SCHIZO_CEAFSR_MTAG 0x000000000000e000UL
+#define SCHIZO_CEAFSR_ECCSYND 0x00000000000001ffUL
+
+static irqreturn_t schizo_ce_intr(int irq, void *dev_id)
+{
+ struct pci_pbm_info *pbm = dev_id;
+ unsigned long afsr_reg = pbm->controller_regs + SCHIZO_CE_AFSR;
+ unsigned long afar_reg = pbm->controller_regs + SCHIZO_CE_AFAR;
+ unsigned long afsr, afar, error_bits;
+ int reported, limit;
+
+ /* Latch error status. */
+ afar = upa_readq(afar_reg);
+
+ /* If either of the error pending bits are set in the
+ * AFSR, the error status is being actively updated by
+ * the hardware and we must re-read to get a clean value.
+ */
+ limit = 1000;
+ do {
+ afsr = upa_readq(afsr_reg);
+ } while ((afsr & SCHIZO_UEAFSR_ERRPNDG) != 0 && --limit);
+
+ /* Clear primary/secondary error status bits. */
+ error_bits = afsr &
+ (SCHIZO_CEAFSR_PPIO | SCHIZO_CEAFSR_PDRD | SCHIZO_CEAFSR_PDWR |
+ SCHIZO_CEAFSR_SPIO | SCHIZO_CEAFSR_SDMA);
+ if (!error_bits)
+ return IRQ_NONE;
+ upa_writeq(error_bits, afsr_reg);
+
+ /* Log the error. */
+ printk("%s: Correctable Error, primary error type[%s]\n",
+ pbm->name,
+ (((error_bits & SCHIZO_CEAFSR_PPIO) ?
+ "PIO" :
+ ((error_bits & SCHIZO_CEAFSR_PDRD) ?
+ "DMA Read" :
+ ((error_bits & SCHIZO_CEAFSR_PDWR) ?
+ "DMA Write" : "???")))));
+
+ /* XXX Use syndrome and afar to print out module string just like
+ * XXX UDB CE trap handler does... -DaveM
+ */
+ printk("%s: bytemask[%04lx] qword_offset[%lx] SAFARI_AID[%02lx]\n",
+ pbm->name,
+ (afsr & SCHIZO_UEAFSR_BMSK) >> 32UL,
+ (afsr & SCHIZO_UEAFSR_QOFF) >> 30UL,
+ (afsr & SCHIZO_UEAFSR_AID) >> 24UL);
+ printk("%s: partial[%d] owned_in[%d] mtag[%lx] mtag_synd[%lx] ecc_sync[%lx]\n",
+ pbm->name,
+ (afsr & SCHIZO_UEAFSR_PARTIAL) ? 1 : 0,
+ (afsr & SCHIZO_UEAFSR_OWNEDIN) ? 1 : 0,
+ (afsr & SCHIZO_UEAFSR_MTAG) >> 13UL,
+ (afsr & SCHIZO_UEAFSR_MTAGSYND) >> 16UL,
+ (afsr & SCHIZO_UEAFSR_ECCSYND) >> 0UL);
+ printk("%s: CE AFAR [%016lx]\n", pbm->name, afar);<