From fa3959f457109cc7d082b86ea6daae927982815b Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Thu, 24 Apr 2008 01:27:02 +0200 Subject: mv643xx_eth: get rid of static variables, allow multiple instances Move mv643xx_eth's static state (ethernet register block base address and MII management interface spinlock) into a struct hanging off the shared platform device. This is necessary to support chips that contain multiple mv643xx_eth silicon blocks. Signed-off-by: Lennert Buytenhek Acked-by: Nicolas Pitre Signed-off-by: Dale Farnsworth --- arch/arm/mach-orion5x/common.c | 2 ++ arch/powerpc/platforms/chrp/pegasos_eth.c | 4 ++++ arch/powerpc/sysdev/mv64x60_dev.c | 2 ++ arch/ppc/syslib/mv64x60.c | 3 +++ 4 files changed, 11 insertions(+) (limited to 'arch') diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c index 439c7784af02..2bedf71659cb 100644 --- a/arch/arm/mach-orion5x/common.c +++ b/arch/arm/mach-orion5x/common.c @@ -223,7 +223,9 @@ static struct platform_device orion5x_eth = { void __init orion5x_eth_init(struct mv643xx_eth_platform_data *eth_data) { + eth_data->shared = &orion5x_eth_shared; orion5x_eth.dev.platform_data = eth_data; + platform_device_register(&orion5x_eth_shared); platform_device_register(&orion5x_eth); } diff --git a/arch/powerpc/platforms/chrp/pegasos_eth.c b/arch/powerpc/platforms/chrp/pegasos_eth.c index 5bcc58d9a4dd..130ff72d99dd 100644 --- a/arch/powerpc/platforms/chrp/pegasos_eth.c +++ b/arch/powerpc/platforms/chrp/pegasos_eth.c @@ -58,7 +58,9 @@ static struct resource mv643xx_eth0_resources[] = { static struct mv643xx_eth_platform_data eth0_pd = { + .shared = &mv643xx_eth_shared_device, .port_number = 0, + .tx_sram_addr = PEGASOS2_SRAM_BASE_ETH0, .tx_sram_size = PEGASOS2_SRAM_TXRING_SIZE, .tx_queue_size = PEGASOS2_SRAM_TXRING_SIZE/16, @@ -88,7 +90,9 @@ static struct resource mv643xx_eth1_resources[] = { }; static struct mv643xx_eth_platform_data eth1_pd = { + .shared = &mv643xx_eth_shared_device, .port_number = 1, + .tx_sram_addr = PEGASOS2_SRAM_BASE_ETH1, .tx_sram_size = PEGASOS2_SRAM_TXRING_SIZE, .tx_queue_size = PEGASOS2_SRAM_TXRING_SIZE/16, diff --git a/arch/powerpc/sysdev/mv64x60_dev.c b/arch/powerpc/sysdev/mv64x60_dev.c index 41af1223e2a0..a132e0de8ca5 100644 --- a/arch/powerpc/sysdev/mv64x60_dev.c +++ b/arch/powerpc/sysdev/mv64x60_dev.c @@ -239,6 +239,8 @@ static int __init mv64x60_eth_device_setup(struct device_node *np, int id, memset(&pdata, 0, sizeof(pdata)); + pdata.shared = shared_pdev; + prop = of_get_property(np, "reg", NULL); if (!prop) return -ENODEV; diff --git a/arch/ppc/syslib/mv64x60.c b/arch/ppc/syslib/mv64x60.c index 90fe904d3614..418f3053de52 100644 --- a/arch/ppc/syslib/mv64x60.c +++ b/arch/ppc/syslib/mv64x60.c @@ -341,6 +341,7 @@ static struct resource mv64x60_eth0_resources[] = { }; static struct mv643xx_eth_platform_data eth0_pd = { + .shared = &mv64x60_eth_shared_device; .port_number = 0, }; @@ -366,6 +367,7 @@ static struct resource mv64x60_eth1_resources[] = { }; static struct mv643xx_eth_platform_data eth1_pd = { + .shared = &mv64x60_eth_shared_device; .port_number = 1, }; @@ -391,6 +393,7 @@ static struct resource mv64x60_eth2_resources[] = { }; static struct mv643xx_eth_platform_data eth2_pd = { + .shared = &mv64x60_eth_shared_device; .port_number = 2, }; -- cgit v1.2.3 From 98db6f193c93e9b4729215af2c9101210e11d26c Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Tue, 29 Apr 2008 22:38:48 +0200 Subject: x86: fix section mismatch in pci_scan_bus Fix following section mismatch warning: WARNING: vmlinux.o(.text+0x275616): Section mismatch in reference from the function pci_scan_bus() to the function .devinit.text:pci_scan_bus_parented() The warning was seen with a CONFIG_DEBUG_SECTION_MISMATCH=y build. The inline function pci_scan_bus refer to functions annotated __devinit - so annotate it __devinit too. This revealed a few x86 specific functions that were only used from __init or __devinit context. So annotate these __devinit and the warning was killed. The added include in pci.h was not strictly required but added to avoid being dependent on indirect includes. Signed-off-by: Sam Ravnborg Signed-off-by: Jesse Barnes --- arch/x86/pci/common.c | 4 ++-- include/linux/pci.h | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 2a4d751818b7..88b5416cf009 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -489,7 +489,7 @@ void pcibios_disable_device (struct pci_dev *dev) pcibios_disable_irq(dev); } -struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node) +struct pci_bus * __devinit pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node) { struct pci_bus *bus = NULL; struct pci_sysdata *sd; @@ -512,7 +512,7 @@ struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node) return bus; } -struct pci_bus *pci_scan_bus_with_sysdata(int busno) +struct pci_bus * __devinit pci_scan_bus_with_sysdata(int busno) { return pci_scan_bus_on_node(busno, &pci_root_ops, -1); } diff --git a/include/linux/pci.h b/include/linux/pci.h index 96acd0dae241..a59517b4930f 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -44,6 +44,7 @@ #include #include +#include #include #include #include @@ -474,7 +475,7 @@ extern struct pci_bus *pci_find_bus(int domain, int busnr); void pci_bus_add_devices(struct pci_bus *bus); struct pci_bus *pci_scan_bus_parented(struct device *parent, int bus, struct pci_ops *ops, void *sysdata); -static inline struct pci_bus *pci_scan_bus(int bus, struct pci_ops *ops, +static inline struct pci_bus * __devinit pci_scan_bus(int bus, struct pci_ops *ops, void *sysdata) { struct pci_bus *root_bus; -- cgit v1.2.3 From 70b9f7dc1435412ca2b89b13a8353bd9915a7189 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 28 Apr 2008 16:27:23 -0700 Subject: x86/pci: remove flag in pci_cfg_space_size_ext so let pci_cfg_space_size call it directly without flag. Signed-off-by: Yinghai Lu Signed-off-by: Jesse Barnes --- arch/x86/pci/fixup.c | 2 +- drivers/pci/probe.c | 33 +++++++++++++++++---------------- include/linux/pci.h | 2 +- 3 files changed, 19 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index b60b2abd480c..ff3a6a336342 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -502,7 +502,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SIEMENS, 0x0015, */ static void fam10h_pci_cfg_space_size(struct pci_dev *dev) { - dev->cfg_size = pci_cfg_space_size_ext(dev, 0); + dev->cfg_size = pci_cfg_space_size_ext(dev); } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, fam10h_pci_cfg_space_size); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 4a55bf380957..3706ce7972dd 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -842,13 +842,25 @@ static void set_pcie_port_type(struct pci_dev *pdev) * reading the dword at 0x100 which must either be 0 or a valid extended * capability header. */ -int pci_cfg_space_size_ext(struct pci_dev *dev, unsigned check_exp_pcix) +int pci_cfg_space_size_ext(struct pci_dev *dev) { - int pos; u32 status; - if (!check_exp_pcix) - goto skip; + if (pci_read_config_dword(dev, 256, &status) != PCIBIOS_SUCCESSFUL) + goto fail; + if (status == 0xffffffff) + goto fail; + + return PCI_CFG_SPACE_EXP_SIZE; + + fail: + return PCI_CFG_SPACE_SIZE; +} + +int pci_cfg_space_size(struct pci_dev *dev) +{ + int pos; + u32 status; pos = pci_find_capability(dev, PCI_CAP_ID_EXP); if (!pos) { @@ -861,23 +873,12 @@ int pci_cfg_space_size_ext(struct pci_dev *dev, unsigned check_exp_pcix) goto fail; } - skip: - if (pci_read_config_dword(dev, 256, &status) != PCIBIOS_SUCCESSFUL) - goto fail; - if (status == 0xffffffff) - goto fail; - - return PCI_CFG_SPACE_EXP_SIZE; + return pci_cfg_space_size_ext(dev); fail: return PCI_CFG_SPACE_SIZE; } -int pci_cfg_space_size(struct pci_dev *dev) -{ - return pci_cfg_space_size_ext(dev, 1); -} - static void pci_release_bus_bridge_dev(struct device *dev) { kfree(dev); diff --git a/include/linux/pci.h b/include/linux/pci.h index a59517b4930f..509159bcd4e7 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -667,7 +667,7 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, void pci_walk_bus(struct pci_bus *top, void (*cb)(struct pci_dev *, void *), void *userdata); -int pci_cfg_space_size_ext(struct pci_dev *dev, unsigned check_exp_pcix); +int pci_cfg_space_size_ext(struct pci_dev *dev); int pci_cfg_space_size(struct pci_dev *dev); unsigned char pci_bus_max_busnr(struct pci_bus *bus); -- cgit v1.2.3 From d35c7b0e54a596c5a8134d75999b7f391a9c6550 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Sat, 3 May 2008 15:10:37 -0400 Subject: unified (weak) sys_pipe implementation This replaces the duplicated arch-specific versions of "sys_pipe()" with one unified implementation. This removes almost 250 lines of duplicated code. It's marked __weak, so that *if* an architecture wants to override the default implementation it can do so by simply having its own replacement version, since many architectures use alternate calling conventions for the 'pipe()' system call for legacy reasons (ie traditional UNIX implementations often return the two file descriptors in registers) I still haven't changed the cris version even though Linus says the BKL isn't needed. The arch maintainer can easily do it if there are really no obstacles. Signed-off-by: Ulrich Drepper Signed-off-by: Linus Torvalds --- arch/arm/kernel/sys_arm.c | 17 ----------------- arch/avr32/kernel/sys_avr32.c | 13 ------------- arch/blackfin/kernel/sys_bfin.c | 17 ----------------- arch/frv/kernel/sys_frv.c | 17 ----------------- arch/h8300/kernel/sys_h8300.c | 17 ----------------- arch/m68k/kernel/sys_m68k.c | 17 ----------------- arch/m68knommu/kernel/sys_m68k.c | 17 ----------------- arch/mn10300/kernel/sys_mn10300.c | 17 ----------------- arch/parisc/kernel/sys_parisc.c | 13 ------------- arch/powerpc/kernel/syscalls.c | 17 ----------------- arch/s390/kernel/sys_s390.c | 17 ----------------- arch/sh/kernel/sys_sh64.c | 17 ----------------- arch/um/kernel/syscall.c | 17 ----------------- arch/v850/kernel/syscalls.c | 17 ----------------- arch/x86/kernel/sys_i386_32.c | 17 ----------------- arch/x86/kernel/sys_x86_64.c | 17 ----------------- fs/pipe.c | 17 +++++++++++++++++ include/asm-powerpc/syscalls.h | 2 +- 18 files changed, 18 insertions(+), 265 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c index 9bd1870d980e..0128687ba0f7 100644 --- a/arch/arm/kernel/sys_arm.c +++ b/arch/arm/kernel/sys_arm.c @@ -34,23 +34,6 @@ extern unsigned long do_mremap(unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags, unsigned long new_addr); -/* - * sys_pipe() is the normal C calling standard for creating - * a pipe. It's not the way unix traditionally does this, though. - */ -asmlinkage int sys_pipe(unsigned long __user *fildes) -{ - int fd[2]; - int error; - - error = do_pipe(fd); - if (!error) { - if (copy_to_user(fildes, fd, 2*sizeof(int))) - error = -EFAULT; - } - return error; -} - /* common code for old and new mmaps */ inline long do_mmap2( unsigned long addr, unsigned long len, diff --git a/arch/avr32/kernel/sys_avr32.c b/arch/avr32/kernel/sys_avr32.c index 8deb6003ee62..8e8911e55c8f 100644 --- a/arch/avr32/kernel/sys_avr32.c +++ b/arch/avr32/kernel/sys_avr32.c @@ -14,19 +14,6 @@ #include #include -asmlinkage int sys_pipe(unsigned long __user *filedes) -{ - int fd[2]; - int error; - - error = do_pipe(fd); - if (!error) { - if (copy_to_user(filedes, fd, sizeof(fd))) - error = -EFAULT; - } - return error; -} - asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, off_t offset) diff --git a/arch/blackfin/kernel/sys_bfin.c b/arch/blackfin/kernel/sys_bfin.c index efb7b25a2633..fce49d7cf001 100644 --- a/arch/blackfin/kernel/sys_bfin.c +++ b/arch/blackfin/kernel/sys_bfin.c @@ -45,23 +45,6 @@ #include #include -/* - * sys_pipe() is the normal C calling standard for creating - * a pipe. It's not the way unix traditionally does this, though. - */ -asmlinkage int sys_pipe(unsigned long __user *fildes) -{ - int fd[2]; - int error; - - error = do_pipe(fd); - if (!error) { - if (copy_to_user(fildes, fd, 2 * sizeof(int))) - error = -EFAULT; - } - return error; -} - /* common code for old and new mmaps */ static inline long do_mmap2(unsigned long addr, unsigned long len, diff --git a/arch/frv/kernel/sys_frv.c b/arch/frv/kernel/sys_frv.c index 04c6b1677ccf..49b2cf2c38f3 100644 --- a/arch/frv/kernel/sys_frv.c +++ b/arch/frv/kernel/sys_frv.c @@ -28,23 +28,6 @@ #include #include -/* - * sys_pipe() is the normal C calling standard for creating - * a pipe. It's not the way unix traditionally does this, though. - */ -asmlinkage long sys_pipe(unsigned long __user * fildes) -{ - int fd[2]; - int error; - - error = do_pipe(fd); - if (!error) { - if (copy_to_user(fildes, fd, 2*sizeof(int))) - error = -EFAULT; - } - return error; -} - asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) diff --git a/arch/h8300/kernel/sys_h8300.c b/arch/h8300/kernel/sys_h8300.c index 00608be6d567..2745656dcc52 100644 --- a/arch/h8300/kernel/sys_h8300.c +++ b/arch/h8300/kernel/sys_h8300.c @@ -27,23 +27,6 @@ #include #include -/* - * sys_pipe() is the normal C calling standard for creating - * a pipe. It's not the way unix traditionally does this, though. - */ -asmlinkage int sys_pipe(unsigned long * fildes) -{ - int fd[2]; - int error; - - error = do_pipe(fd); - if (!error) { - if (copy_to_user(fildes, fd, 2*sizeof(int))) - error = -EFAULT; - } - return error; -} - /* common code for old and new mmaps */ static inline long do_mmap2( unsigned long addr, unsigned long len, diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c index e892f17ba3fa..7f54efaf60bb 100644 --- a/arch/m68k/kernel/sys_m68k.c +++ b/arch/m68k/kernel/sys_m68k.c @@ -30,23 +30,6 @@ #include #include -/* - * sys_pipe() is the normal C calling standard for creating - * a pipe. It's not the way unix traditionally does this, though. - */ -asmlinkage int sys_pipe(unsigned long __user * fildes) -{ - int fd[2]; - int error; - - error = do_pipe(fd); - if (!error) { - if (copy_to_user(fildes, fd, 2*sizeof(int))) - error = -EFAULT; - } - return error; -} - /* common code for old and new mmaps */ static inline long do_mmap2( unsigned long addr, unsigned long len, diff --git a/arch/m68knommu/kernel/sys_m68k.c b/arch/m68knommu/kernel/sys_m68k.c index 65f7a95f056e..700281638629 100644 --- a/arch/m68knommu/kernel/sys_m68k.c +++ b/arch/m68knommu/kernel/sys_m68k.c @@ -28,23 +28,6 @@ #include #include -/* - * sys_pipe() is the normal C calling standard for creating - * a pipe. It's not the way unix traditionally does this, though. - */ -asmlinkage int sys_pipe(unsigned long * fildes) -{ - int fd[2]; - int error; - - error = do_pipe(fd); - if (!error) { - if (copy_to_user(fildes, fd, 2*sizeof(int))) - error = -EFAULT; - } - return error; -} - /* common code for old and new mmaps */ static inline long do_mmap2( unsigned long addr, unsigned long len, diff --git a/arch/mn10300/kernel/sys_mn10300.c b/arch/mn10300/kernel/sys_mn10300.c index 5f17a1ebc825..bca5a84dc72c 100644 --- a/arch/mn10300/kernel/sys_mn10300.c +++ b/arch/mn10300/kernel/sys_mn10300.c @@ -28,23 +28,6 @@ #define MIN_MAP_ADDR PAGE_SIZE /* minimum fixed mmap address */ -/* - * sys_pipe() is the normal C calling standard for creating - * a pipe. It's not the way Unix traditionally does this, though. - */ -asmlinkage long sys_pipe(unsigned long __user *fildes) -{ - int fd[2]; - int error; - - error = do_pipe(fd); - if (!error) { - if (copy_to_user(fildes, fd, 2 * sizeof(int))) - error = -EFAULT; - } - return error; -} - /* * memory mapping syscall */ diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index 4f589216b39e..71b31957c8f1 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -33,19 +33,6 @@ #include #include -int sys_pipe(int __user *fildes) -{ - int fd[2]; - int error; - - error = do_pipe(fd); - if (!error) { - if (copy_to_user(fildes, fd, 2*sizeof(int))) - error = -EFAULT; - } - return error; -} - static unsigned long get_unshared_area(unsigned long addr, unsigned long len) { struct vm_area_struct *vma; diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index e722a4eeb5d0..4fe69ca24481 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -136,23 +136,6 @@ int sys_ipc(uint call, int first, unsigned long second, long third, return ret; } -/* - * sys_pipe() is the normal C calling standard for creating - * a pipe. It's not the way unix traditionally does this, though. - */ -int sys_pipe(int __user *fildes) -{ - int fd[2]; - int error; - - error = do_pipe(fd); - if (!error) { - if (copy_to_user(fildes, fd, 2*sizeof(int))) - error = -EFAULT; - } - return error; -} - static inline unsigned long do_mmap2(unsigned long addr, size_t len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long off, int shift) diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c index 988d0d64c2c8..5fdb799062b7 100644 --- a/arch/s390/kernel/sys_s390.c +++ b/arch/s390/kernel/sys_s390.c @@ -32,23 +32,6 @@ #include #include "entry.h" -/* - * sys_pipe() is the normal C calling standard for creating - * a pipe. It's not the way Unix traditionally does this, though. - */ -asmlinkage long sys_pipe(unsigned long __user *fildes) -{ - int fd[2]; - int error; - - error = do_pipe(fd); - if (!error) { - if (copy_to_user(fildes, fd, 2*sizeof(int))) - error = -EFAULT; - } - return error; -} - /* common code for old and new mmaps */ static inline long do_mmap2( unsigned long addr, unsigned long len, diff --git a/arch/sh/kernel/sys_sh64.c b/arch/sh/kernel/sys_sh64.c index 578004d71e02..91fb8445a5a0 100644 --- a/arch/sh/kernel/sys_sh64.c +++ b/arch/sh/kernel/sys_sh64.c @@ -30,23 +30,6 @@ #include #include -/* - * sys_pipe() is the normal C calling standard for creating - * a pipe. It's not the way Unix traditionally does this, though. - */ -asmlinkage int sys_pipe(unsigned long * fildes) -{ - int fd[2]; - int error; - - error = do_pipe(fd); - if (!error) { - if (copy_to_user(fildes, fd, 2*sizeof(int))) - error = -EFAULT; - } - return error; -} - /* * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c index 9cffc628a37e..128ee85bc8d9 100644 --- a/arch/um/kernel/syscall.c +++ b/arch/um/kernel/syscall.c @@ -73,23 +73,6 @@ long old_mmap(unsigned long addr, unsigned long len, out: return err; } -/* - * sys_pipe() is the normal C calling standard for creating - * a pipe. It's not the way unix traditionally does this, though. - */ -long sys_pipe(unsigned long __user * fildes) -{ - int fd[2]; - long error; - - error = do_pipe(fd); - if (!error) { - if (copy_to_user(fildes, fd, sizeof(fd))) - error = -EFAULT; - } - return error; -} - long sys_uname(struct old_utsname __user * name) { diff --git a/arch/v850/kernel/syscalls.c b/arch/v850/kernel/syscalls.c index 003db9c8c44a..1a83daf8e24f 100644 --- a/arch/v850/kernel/syscalls.c +++ b/arch/v850/kernel/syscalls.c @@ -132,23 +132,6 @@ sys_ipc (uint call, int first, int second, int third, void *ptr, long fifth) return ret; } -/* - * sys_pipe() is the normal C calling standard for creating - * a pipe. It's not the way unix traditionally does this, though. - */ -int sys_pipe (int *fildes) -{ - int fd[2]; - int error; - - error = do_pipe (fd); - if (!error) { - if (copy_to_user (fildes, fd, 2*sizeof (int))) - error = -EFAULT; - } - return error; -} - static inline unsigned long do_mmap2 (unsigned long addr, size_t len, unsigned long prot, unsigned long flags, diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c index a86d26f036e1..d2ab52cc1d6b 100644 --- a/arch/x86/kernel/sys_i386_32.c +++ b/arch/x86/kernel/sys_i386_32.c @@ -22,23 +22,6 @@ #include #include -/* - * sys_pipe() is the normal C calling standard for creating - * a pipe. It's not the way Unix traditionally does this, though. - */ -asmlinkage int sys_pipe(unsigned long __user * fildes) -{ - int fd[2]; - int error; - - error = do_pipe(fd); - if (!error) { - if (copy_to_user(fildes, fd, 2*sizeof(int))) - error = -EFAULT; - } - return error; -} - asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index bd802a5e1aa3..3b360ef33817 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -17,23 +17,6 @@ #include #include -/* - * sys_pipe() is the normal C calling standard for creating - * a pipe. It's not the way Unix traditionally does this, though. - */ -asmlinkage long sys_pipe(int __user *fildes) -{ - int fd[2]; - int error; - - error = do_pipe(fd); - if (!error) { - if (copy_to_user(fildes, fd, 2*sizeof(int))) - error = -EFAULT; - } - return error; -} - asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long off) { diff --git a/fs/pipe.c b/fs/pipe.c index f73492b6817e..3499f9ff6316 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1075,6 +1075,23 @@ int do_pipe(int *fd) return error; } +/* + * sys_pipe() is the normal C calling standard for creating + * a pipe. It's not the way Unix traditionally does this, though. + */ +asmlinkage long __weak sys_pipe(int __user *fildes) +{ + int fd[2]; + int error; + + error = do_pipe(fd); + if (!error) { + if (copy_to_user(fildes, fd, sizeof(fd))) + error = -EFAULT; + } + return error; +} + /* * pipefs should _never_ be mounted by userland - too much of security hassle, * no real gain from having the whole whorehouse mounted. So we don't need diff --git a/include/asm-powerpc/syscalls.h b/include/asm-powerpc/syscalls.h index b3ca41fc8bb1..2b8a458f990a 100644 --- a/include/asm-powerpc/syscalls.h +++ b/include/asm-powerpc/syscalls.h @@ -30,7 +30,7 @@ asmlinkage int sys_fork(unsigned long p1, unsigned long p2, asmlinkage int sys_vfork(unsigned long p1, unsigned long p2, unsigned long p3, unsigned long p4, unsigned long p5, unsigned long p6, struct pt_regs *regs); -asmlinkage int sys_pipe(int __user *fildes); +asmlinkage long sys_pipe(int __user *fildes); asmlinkage long sys_rt_sigaction(int sig, const struct sigaction __user *act, struct sigaction __user *oact, size_t sigsetsize); -- cgit v1.2.3 From 81d6ec6b36bdf30e283ab98e7646571484401dbb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 3 May 2008 21:00:55 -0700 Subject: Revert "[SPARC64]: Wrap SMP IPIs with irq_enter()/irq_exit()." This reverts commit 2664ef44cf5053d2b7dff01cecac70bc601a5f68. Ingo moved around where the softlockup dependency sits so this change is no longer necessary. Signed-off-by: David S. Miller --- arch/sparc64/kernel/smp.c | 27 +++++---------------------- 1 file changed, 5 insertions(+), 22 deletions(-) (limited to 'arch') diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 3aba47624df4..0d6403a630ac 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -865,21 +865,14 @@ void smp_call_function_client(int irq, struct pt_regs *regs) void *info = call_data->info; clear_softint(1 << irq); - - irq_enter(); - - if (!call_data->wait) { - /* let initiator proceed after getting data */ - atomic_inc(&call_data->finished); - } - - func(info); - - irq_exit(); - if (call_data->wait) { /* let initiator proceed only after completion */ + func(info); atomic_inc(&call_data->finished); + } else { + /* let initiator proceed after getting data */ + atomic_inc(&call_data->finished); + func(info); } } @@ -1041,9 +1034,7 @@ void smp_receive_signal(int cpu) void smp_receive_signal_client(int irq, struct pt_regs *regs) { - irq_enter(); clear_softint(1 << irq); - irq_exit(); } void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs) @@ -1051,8 +1042,6 @@ void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs) struct mm_struct *mm; unsigned long flags; - irq_enter(); - clear_softint(1 << irq); /* See if we need to allocate a new TLB context because @@ -1072,8 +1061,6 @@ void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs) load_secondary_context(mm); __flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT); - - irq_exit(); } void smp_new_mmu_context_version(void) @@ -1239,8 +1226,6 @@ void smp_penguin_jailcell(int irq, struct pt_regs *regs) { clear_softint(1 << irq); - irq_enter(); - preempt_disable(); __asm__ __volatile__("flushw"); @@ -1253,8 +1238,6 @@ void smp_penguin_jailcell(int irq, struct pt_regs *regs) prom_world(0); preempt_enable(); - - irq_exit(); } /* /proc/profile writes can call this, don't __init it please. */ -- cgit v1.2.3 From d56f546db97795dca5aa575b00b0e9886895ac87 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Fri, 25 Apr 2008 10:13:16 +0800 Subject: KVM: VMX: EPT Feature Detection Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++----- arch/x86/kvm/vmx.h | 25 ++++++++++++++++++++++ 2 files changed, 83 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8e5d6645b90d..d93250d11caa 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -42,6 +42,9 @@ module_param(enable_vpid, bool, 0); static int flexpriority_enabled = 1; module_param(flexpriority_enabled, bool, 0); +static int enable_ept; +module_param(enable_ept, bool, 0); + struct vmcs { u32 revision_id; u32 abort; @@ -107,6 +110,11 @@ static struct vmcs_config { u32 vmentry_ctrl; } vmcs_config; +struct vmx_capability { + u32 ept; + u32 vpid; +} vmx_capability; + #define VMX_SEGMENT_FIELD(seg) \ [VCPU_SREG_##seg] = { \ .selector = GUEST_##seg##_SELECTOR, \ @@ -214,6 +222,32 @@ static inline bool cpu_has_vmx_virtualize_apic_accesses(void) SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); } +static inline int cpu_has_vmx_invept_individual_addr(void) +{ + return (!!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT)); +} + +static inline int cpu_has_vmx_invept_context(void) +{ + return (!!(vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT)); +} + +static inline int cpu_has_vmx_invept_global(void) +{ + return (!!(vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT)); +} + +static inline int cpu_has_vmx_ept(void) +{ + return (vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_ENABLE_EPT); +} + +static inline int vm_need_ept(void) +{ + return (cpu_has_vmx_ept() && enable_ept); +} + static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) { return ((cpu_has_vmx_virtualize_apic_accesses()) && @@ -985,7 +1019,7 @@ static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) { u32 vmx_msr_low, vmx_msr_high; - u32 min, opt; + u32 min, opt, min2, opt2; u32 _pin_based_exec_control = 0; u32 _cpu_based_exec_control = 0; u32 _cpu_based_2nd_exec_control = 0; @@ -1003,6 +1037,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING | #endif + CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_CR3_STORE_EXITING | CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MOV_DR_EXITING | CPU_BASED_USE_TSC_OFFSETING; @@ -1018,11 +1054,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) ~CPU_BASED_CR8_STORE_EXITING; #endif if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { - min = 0; - opt = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + min2 = 0; + opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | SECONDARY_EXEC_WBINVD_EXITING | - SECONDARY_EXEC_ENABLE_VPID; - if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS2, + SECONDARY_EXEC_ENABLE_VPID | + SECONDARY_EXEC_ENABLE_EPT; + if (adjust_vmx_controls(min2, opt2, + MSR_IA32_VMX_PROCBASED_CTLS2, &_cpu_based_2nd_exec_control) < 0) return -EIO; } @@ -1031,6 +1069,16 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW; #endif + if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { + /* CR3 accesses don't need to cause VM Exits when EPT enabled */ + min &= ~(CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_CR3_STORE_EXITING); + if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS, + &_cpu_based_exec_control) < 0) + return -EIO; + rdmsr(MSR_IA32_VMX_EPT_VPID_CAP, + vmx_capability.ept, vmx_capability.vpid); + } min = 0; #ifdef CONFIG_X86_64 @@ -1638,6 +1686,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) CPU_BASED_CR8_LOAD_EXITING; #endif } + if (!vm_need_ept()) + exec_control |= CPU_BASED_CR3_STORE_EXITING | + CPU_BASED_CR3_LOAD_EXITING; vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control); if (cpu_has_secondary_exec_ctrls()) { @@ -1647,6 +1698,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; if (vmx->vpid == 0) exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; + if (!vm_need_ept()) + exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); } diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h index 5dff4606b988..5f7fdc965d39 100644 --- a/arch/x86/kvm/vmx.h +++ b/arch/x86/kvm/vmx.h @@ -35,6 +35,8 @@ #define CPU_BASED_MWAIT_EXITING 0x00000400 #define CPU_BASED_RDPMC_EXITING 0x00000800 #define CPU_BASED_RDTSC_EXITING 0x00001000 +#define CPU_BASED_CR3_LOAD_EXITING 0x00008000 +#define CPU_BASED_CR3_STORE_EXITING 0x00010000 #define CPU_BASED_CR8_LOAD_EXITING 0x00080000 #define CPU_BASED_CR8_STORE_EXITING 0x00100000 #define CPU_BASED_TPR_SHADOW 0x00200000 @@ -49,6 +51,7 @@ * Definitions of Secondary Processor-Based VM-Execution Controls. */ #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 +#define SECONDARY_EXEC_ENABLE_EPT 0x00000002 #define SECONDARY_EXEC_ENABLE_VPID 0x00000020 #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 @@ -100,10 +103,22 @@ enum vmcs_field { VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, APIC_ACCESS_ADDR = 0x00002014, APIC_ACCESS_ADDR_HIGH = 0x00002015, + EPT_POINTER = 0x0000201a, + EPT_POINTER_HIGH = 0x0000201b, + GUEST_PHYSICAL_ADDRESS = 0x00002400, + GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, VMCS_LINK_POINTER = 0x00002800, VMCS_LINK_POINTER_HIGH = 0x00002801, GUEST_IA32_DEBUGCTL = 0x00002802, GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, + GUEST_PDPTR0 = 0x0000280a, + GUEST_PDPTR0_HIGH = 0x0000280b, + GUEST_PDPTR1 = 0x0000280c, + GUEST_PDPTR1_HIGH = 0x0000280d, + GUEST_PDPTR2 = 0x0000280e, + GUEST_PDPTR2_HIGH = 0x0000280f, + GUEST_PDPTR3 = 0x00002810, + GUEST_PDPTR3_HIGH = 0x00002811, PIN_BASED_VM_EXEC_CONTROL = 0x00004000, CPU_BASED_VM_EXEC_CONTROL = 0x00004002, EXCEPTION_BITMAP = 0x00004004, @@ -226,6 +241,8 @@ enum vmcs_field { #define EXIT_REASON_MWAIT_INSTRUCTION 36 #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 #define EXIT_REASON_APIC_ACCESS 44 +#define EXIT_REASON_EPT_VIOLATION 48 +#define EXIT_REASON_EPT_MISCONFIG 49 #define EXIT_REASON_WBINVD 54 /* @@ -316,6 +333,7 @@ enum vmcs_field { #define MSR_IA32_VMX_CR4_FIXED1 0x489 #define MSR_IA32_VMX_VMCS_ENUM 0x48a #define MSR_IA32_VMX_PROCBASED_CTLS2 0x48b +#define MSR_IA32_VMX_EPT_VPID_CAP 0x48c #define MSR_IA32_FEATURE_CONTROL 0x3a #define MSR_IA32_FEATURE_CONTROL_LOCKED 0x1 @@ -327,4 +345,11 @@ enum vmcs_field { #define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 #define VMX_VPID_EXTENT_ALL_CONTEXT 2 +#define VMX_EPT_EXTENT_INDIVIDUAL_ADDR 0 +#define VMX_EPT_EXTENT_CONTEXT 1 +#define VMX_EPT_EXTENT_GLOBAL 2 +#define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24) +#define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) +#define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) + #endif -- cgit v1.2.3 From 8c6d6adc6b87daa364ee9deb2e966021d37a7622 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Fri, 25 Apr 2008 10:17:08 +0800 Subject: KVM: MMU: Move some definitions to a header file Move some definitions to mmu.h in order to allow building common table entries between EPT and non-EPT. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 34 ---------------------------------- arch/x86/kvm/mmu.h | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 34 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 2ad6f5481671..bcfaf7e4a2dc 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -79,36 +79,6 @@ static int dbg = 1; } #endif -#define PT64_PT_BITS 9 -#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS) -#define PT32_PT_BITS 10 -#define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS) - -#define PT_WRITABLE_SHIFT 1 - -#define PT_PRESENT_MASK (1ULL << 0) -#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT) -#define PT_USER_MASK (1ULL << 2) -#define PT_PWT_MASK (1ULL << 3) -#define PT_PCD_MASK (1ULL << 4) -#define PT_ACCESSED_MASK (1ULL << 5) -#define PT_DIRTY_MASK (1ULL << 6) -#define PT_PAGE_SIZE_MASK (1ULL << 7) -#define PT_PAT_MASK (1ULL << 7) -#define PT_GLOBAL_MASK (1ULL << 8) -#define PT64_NX_SHIFT 63 -#define PT64_NX_MASK (1ULL << PT64_NX_SHIFT) - -#define PT_PAT_SHIFT 7 -#define PT_DIR_PAT_SHIFT 12 -#define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT) - -#define PT32_DIR_PSE36_SIZE 4 -#define PT32_DIR_PSE36_SHIFT 13 -#define PT32_DIR_PSE36_MASK \ - (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT) - - #define PT_FIRST_AVAIL_BITS_SHIFT 9 #define PT64_SECOND_AVAIL_BITS_SHIFT 52 @@ -154,10 +124,6 @@ static int dbg = 1; #define PFERR_USER_MASK (1U << 2) #define PFERR_FETCH_MASK (1U << 4) -#define PT64_ROOT_LEVEL 4 -#define PT32_ROOT_LEVEL 2 -#define PT32E_ROOT_LEVEL 3 - #define PT_DIRECTORY_LEVEL 2 #define PT_PAGE_TABLE_LEVEL 1 diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index e64e9f56a65e..a4fcb78ebf42 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -9,6 +9,39 @@ #define TDP_ROOT_LEVEL PT32E_ROOT_LEVEL #endif +#define PT64_PT_BITS 9 +#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS) +#define PT32_PT_BITS 10 +#define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS) + +#define PT_WRITABLE_SHIFT 1 + +#define PT_PRESENT_MASK (1ULL << 0) +#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT) +#define PT_USER_MASK (1ULL << 2) +#define PT_PWT_MASK (1ULL << 3) +#define PT_PCD_MASK (1ULL << 4) +#define PT_ACCESSED_MASK (1ULL << 5) +#define PT_DIRTY_MASK (1ULL << 6) +#define PT_PAGE_SIZE_MASK (1ULL << 7) +#define PT_PAT_MASK (1ULL << 7) +#define PT_GLOBAL_MASK (1ULL << 8) +#define PT64_NX_SHIFT 63 +#define PT64_NX_MASK (1ULL << PT64_NX_SHIFT) + +#define PT_PAT_SHIFT 7 +#define PT_DIR_PAT_SHIFT 12 +#define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT) + +#define PT32_DIR_PSE36_SIZE 4 +#define PT32_DIR_PSE36_SHIFT 13 +#define PT32_DIR_PSE36_MASK \ + (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT) + +#define PT64_ROOT_LEVEL 4 +#define PT32_ROOT_LEVEL 2 +#define PT32E_ROOT_LEVEL 3 + static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) { if (unlikely(vcpu->kvm->arch.n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES)) -- cgit v1.2.3 From 67253af52e9133fb4cfbf7a2448a2d3524d1fa6c Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Fri, 25 Apr 2008 10:20:22 +0800 Subject: KVM: Add kvm_x86_ops get_tdp_level() The function get_tdp_level() provided the number of tdp level for EPT and NPT rather than the NPT specific macro. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 4 ++-- arch/x86/kvm/mmu.h | 6 ------ arch/x86/kvm/svm.c | 10 ++++++++++ arch/x86/kvm/vmx.c | 6 ++++++ arch/x86/kvm/vmx.h | 1 + include/asm-x86/kvm_host.h | 1 + 6 files changed, 20 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index bcfaf7e4a2dc..20fb3c852db7 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1343,7 +1343,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, spin_lock(&vcpu->kvm->mmu_lock); kvm_mmu_free_some_pages(vcpu); r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, - largepage, gfn, pfn, TDP_ROOT_LEVEL); + largepage, gfn, pfn, kvm_x86_ops->get_tdp_level()); spin_unlock(&vcpu->kvm->mmu_lock); return r; @@ -1450,7 +1450,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) context->page_fault = tdp_page_fault; context->free = nonpaging_free; context->prefetch_page = nonpaging_prefetch_page; - context->shadow_root_level = TDP_ROOT_LEVEL; + context->shadow_root_level = kvm_x86_ops->get_tdp_level(); context->root_hpa = INVALID_PAGE; if (!is_paging(vcpu)) { diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index a4fcb78ebf42..1730757bbc7a 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -3,12 +3,6 @@ #include -#ifdef CONFIG_X86_64 -#define TDP_ROOT_LEVEL PT64_ROOT_LEVEL -#else -#define TDP_ROOT_LEVEL PT32E_ROOT_LEVEL -#endif - #define PT64_PT_BITS 9 #define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS) #define PT32_PT_BITS 10 diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 89e0be2c10d0..ab22615eee89 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1863,6 +1863,15 @@ static bool svm_cpu_has_accelerated_tpr(void) return false; } +static int get_npt_level(void) +{ +#ifdef CONFIG_X86_64 + return PT64_ROOT_LEVEL; +#else + return PT32E_ROOT_LEVEL; +#endif +} + static struct kvm_x86_ops svm_x86_ops = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, @@ -1920,6 +1929,7 @@ static struct kvm_x86_ops svm_x86_ops = { .inject_pending_vectors = do_interrupt_requests, .set_tss_addr = svm_set_tss_addr, + .get_tdp_level = get_npt_level, }; static int __init svm_init(void) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d93250d11caa..98e4f2b036de 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2788,6 +2788,11 @@ static void __init vmx_check_processor_compat(void *rtn) } } +static int get_ept_level(void) +{ + return VMX_EPT_DEFAULT_GAW + 1; +} + static struct kvm_x86_ops vmx_x86_ops = { .cpu_has_kvm_support = cpu_has_kvm_support, .disabled_by_bios = vmx_disabled_by_bios, @@ -2844,6 +2849,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .inject_pending_vectors = do_interrupt_requests, .set_tss_addr = vmx_set_tss_addr, + .get_tdp_level = get_ept_level, }; static int __init vmx_init(void) diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h index 5f7fdc965d39..093b085daf6a 100644 --- a/arch/x86/kvm/vmx.h +++ b/arch/x86/kvm/vmx.h @@ -351,5 +351,6 @@ enum vmcs_field { #define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24) #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) +#define VMX_EPT_DEFAULT_GAW 3 #endif diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index 9d963cd6533c..897a1be24cf7 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -422,6 +422,7 @@ struct kvm_x86_ops { struct kvm_run *run); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); + int (*get_tdp_level)(void); }; extern struct kvm_x86_ops *kvm_x86_ops; -- cgit v1.2.3 From 7b52345e2c4c7333bf7eba8034ffc4683fa63c91 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Fri, 25 Apr 2008 21:13:50 +0800 Subject: KVM: MMU: Add EPT support Enable kvm_set_spte() to generate EPT entries. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 43 +++++++++++++++++++++++++++++++++---------- arch/x86/kvm/x86.c | 3 +++ include/asm-x86/kvm_host.h | 3 +++ 3 files changed, 39 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 20fb3c852db7..c28a36b4cbba 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -152,6 +152,12 @@ static struct kmem_cache *mmu_page_header_cache; static u64 __read_mostly shadow_trap_nonpresent_pte; static u64 __read_mostly shadow_notrap_nonpresent_pte; +static u64 __read_mostly shadow_base_present_pte; +static u64 __read_mostly shadow_nx_mask; +static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */ +static u64 __read_mostly shadow_user_mask; +static u64 __read_mostly shadow_accessed_mask; +static u64 __read_mostly shadow_dirty_mask; void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) { @@ -160,6 +166,23 @@ void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) } EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes); +void kvm_mmu_set_base_ptes(u64 base_pte) +{ + shadow_base_present_pte = base_pte; +} +EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes); + +void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, + u64 dirty_mask, u64 nx_mask, u64 x_mask) +{ + shadow_user_mask = user_mask; + shadow_accessed_mask = accessed_mask; + shadow_dirty_mask = dirty_mask; + shadow_nx_mask = nx_mask; + shadow_x_mask = x_mask; +} +EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); + static int is_write_protection(struct kvm_vcpu *vcpu) { return vcpu->arch.cr0 & X86_CR0_WP; @@ -198,7 +221,7 @@ static int is_writeble_pte(unsigned long pte) static int is_dirty_pte(unsigned long pte) { - return pte & PT_DIRTY_MASK; + return pte & shadow_dirty_mask; } static int is_rmap_pte(u64 pte) @@ -513,7 +536,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) return; sp = page_header(__pa(spte)); pfn = spte_to_pfn(*spte); - if (*spte & PT_ACCESSED_MASK) + if (*spte & shadow_accessed_mask) kvm_set_pfn_accessed(pfn); if (is_writeble_pte(*spte)) kvm_release_pfn_dirty(pfn); @@ -1039,17 +1062,17 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, * whether the guest actually used the pte (in order to detect * demand paging). */ - spte = PT_PRESENT_MASK | PT_DIRTY_MASK; + spte = shadow_base_present_pte | shadow_dirty_mask; if (!speculative) pte_access |= PT_ACCESSED_MASK; if (!dirty) pte_access &= ~ACC_WRITE_MASK; - if (!(pte_access & ACC_EXEC_MASK)) - spte |= PT64_NX_MASK; - - spte |= PT_PRESENT_MASK; + if (pte_access & ACC_EXEC_MASK) + spte |= shadow_x_mask; + else + spte |= shadow_nx_mask; if (pte_access & ACC_USER_MASK) - spte |= PT_USER_MASK; + spte |= shadow_user_mask; if (largepage) spte |= PT_PAGE_SIZE_MASK; @@ -1155,7 +1178,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, } table[index] = __pa(new_table->spt) | PT_PRESENT_MASK - | PT_WRITABLE_MASK | PT_USER_MASK; + | PT_WRITABLE_MASK | shadow_user_mask; } table_addr = table[index] & PT64_BASE_ADDR_MASK; } @@ -1599,7 +1622,7 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu) { u64 *spte = vcpu->arch.last_pte_updated; - return !!(spte && (*spte & PT_ACCESSED_MASK)); + return !!(spte && (*spte & shadow_accessed_mask)); } static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0ce556372a4d..0735efbfa712 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2417,6 +2417,9 @@ int kvm_arch_init(void *opaque) kvm_x86_ops = ops; kvm_mmu_set_nonpresent_ptes(0ull, 0ull); + kvm_mmu_set_base_ptes(PT_PRESENT_MASK); + kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, + PT_DIRTY_MASK, PT64_NX_MASK, 0); return 0; out: diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index 897a1be24cf7..d1dedda958ff 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -434,6 +434,9 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu); int kvm_mmu_create(struct kvm_vcpu *vcpu); int kvm_mmu_setup(struct kvm_vcpu *vcpu); void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); +void kvm_mmu_set_base_ptes(u64 base_pte); +void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, + u64 dirty_mask, u64 nx_mask, u64 x_mask); int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); -- cgit v1.2.3 From 1ac593c97eb229da44819f66fea47975537c1177 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Fri, 25 Apr 2008 21:44:42 +0800 Subject: KVM: MMU: Remove #ifdef CONFIG_X86_64 to support 4 level EPT Currently EPT level is 4 for both pae and x86_64. The patch remove the #ifdef for alloc root_hpa and free root_hpa to support EPT. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index c28a36b4cbba..3dbedf169730 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1233,7 +1233,6 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) return; spin_lock(&vcpu->kvm->mmu_lock); -#ifdef CONFIG_X86_64 if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { hpa_t root = vcpu->arch.mmu.root_hpa; @@ -1245,7 +1244,6 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) spin_unlock(&vcpu->kvm->mmu_lock); return; } -#endif for (i = 0; i < 4; ++i) { hpa_t root = vcpu->arch.mmu.pae_root[i]; @@ -1271,7 +1269,6 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT; -#ifdef CONFIG_X86_64 if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { hpa_t root = vcpu->arch.mmu.root_hpa; @@ -1286,7 +1283,6 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) vcpu->arch.mmu.root_hpa = root; return; } -#endif metaphysical = !is_paging(vcpu); if (tdp_enabled) metaphysical = 1; -- cgit v1.2.3 From b7ebfb0509692cd923e31650f81ed4d79c9a3e59 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Fri, 25 Apr 2008 21:44:52 +0800 Subject: KVM: VMX: Prepare an identity page table for EPT in real mode [aliguory: plug leak] Signed-off-by: Sheng Yang Signed-off-by: Anthony Liguori Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 79 ++++++++++++++++++++++++++++++++++++++++++++-- arch/x86/kvm/vmx.h | 3 ++ arch/x86/kvm/x86.c | 2 ++ include/asm-x86/kvm_host.h | 3 ++ 4 files changed, 84 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 98e4f2b036de..de5f6150f2f7 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -87,7 +87,7 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) return container_of(vcpu, struct vcpu_vmx, vcpu); } -static int init_rmode_tss(struct kvm *kvm); +static int init_rmode(struct kvm *kvm); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -1304,7 +1304,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu) fix_rmode_seg(VCPU_SREG_FS, &vcpu->arch.rmode.fs); kvm_mmu_reset_context(vcpu); - init_rmode_tss(vcpu->kvm); + init_rmode(vcpu->kvm); } #ifdef CONFIG_X86_64 @@ -1578,6 +1578,41 @@ out: return ret; } +static int init_rmode_identity_map(struct kvm *kvm) +{ + int i, r, ret; + pfn_t identity_map_pfn; + u32 tmp; + + if (!vm_need_ept()) + return 1; + if (unlikely(!kvm->arch.ept_identity_pagetable)) { + printk(KERN_ERR "EPT: identity-mapping pagetable " + "haven't been allocated!\n"); + return 0; + } + if (likely(kvm->arch.ept_identity_pagetable_done)) + return 1; + ret = 0; + identity_map_pfn = VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT; + r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE); + if (r < 0) + goto out; + /* Set up identity-mapping pagetable for EPT in real mode */ + for (i = 0; i < PT32_ENT_PER_PAGE; i++) { + tmp = (i << 22) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | + _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE); + r = kvm_write_guest_page(kvm, identity_map_pfn, + &tmp, i * sizeof(tmp), sizeof(tmp)); + if (r < 0) + goto out; + } + kvm->arch.ept_identity_pagetable_done = true; + ret = 1; +out: + return ret; +} + static void seg_setup(int seg) { struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; @@ -1612,6 +1647,31 @@ out: return r; } +static int alloc_identity_pagetable(struct kvm *kvm) +{ + struct kvm_userspace_memory_region kvm_userspace_mem; + int r = 0; + + down_write(&kvm->slots_lock); + if (kvm->arch.ept_identity_pagetable) + goto out; + kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; + kvm_userspace_mem.flags = 0; + kvm_userspace_mem.guest_phys_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR; + kvm_userspace_mem.memory_size = PAGE_SIZE; + r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0); + if (r) + goto out; + + down_read(¤t->mm->mmap_sem); + kvm->arch.ept_identity_pagetable = gfn_to_page(kvm, + VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT); + up_read(¤t->mm->mmap_sem); +out: + up_write(&kvm->slots_lock); + return r; +} + static void allocate_vpid(struct vcpu_vmx *vmx) { int vpid; @@ -1775,6 +1835,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) return 0; } +static int init_rmode(struct kvm *kvm) +{ + if (!init_rmode_tss(kvm)) + return 0; + if (!init_rmode_identity_map(kvm)) + return 0; + return 1; +} + static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -1782,7 +1851,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) int ret; down_read(&vcpu->kvm->slots_lock); - if (!init_rmode_tss(vmx->vcpu.kvm)) { + if (!init_rmode(vmx->vcpu.kvm)) { ret = -ENOMEM; goto out; } @@ -2759,6 +2828,10 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) if (alloc_apic_access_page(kvm) != 0) goto free_vmcs; + if (vm_need_ept()) + if (alloc_identity_pagetable(kvm) != 0) + goto free_vmcs; + return &vmx->vcpu; free_vmcs: diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h index 093b085daf6a..f97eccc754e8 100644 --- a/arch/x86/kvm/vmx.h +++ b/arch/x86/kvm/vmx.h @@ -340,6 +340,7 @@ enum vmcs_field { #define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED 0x4 #define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9 +#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT 10 #define VMX_NR_VPIDS (1 << 16) #define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 @@ -353,4 +354,6 @@ enum vmcs_field { #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) #define VMX_EPT_DEFAULT_GAW 3 +#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul + #endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0735efbfa712..1842a86f7c33 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3909,6 +3909,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_free_physmem(kvm); if (kvm->arch.apic_access_page) put_page(kvm->arch.apic_access_page); + if (kvm->arch.ept_identity_pagetable) + put_page(kvm->arch.ept_identity_pagetable); kfree(kvm); } diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index d1dedda958ff..e24afaa64a4d 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -314,6 +314,9 @@ struct kvm_arch{ struct page *apic_access_page; gpa_t wall_clock; + + struct page *ept_identity_pagetable; + bool ept_identity_pagetable_done; }; struct kvm_vm_stat { -- cgit v1.2.3 From 1439442c7b257b47a83aea4daed8fbf4a32cdff9 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Mon, 28 Apr 2008 12:24:45 +0800 Subject: KVM: VMX: Enable EPT feature for KVM Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 5 +- arch/x86/kvm/vmx.c | 229 +++++++++++++++++++++++++++++++++++++++++++-- arch/x86/kvm/vmx.h | 9 ++ include/asm-x86/kvm_host.h | 1 + 4 files changed, 234 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 3dbedf169730..d9344be36442 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1177,8 +1177,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, return -ENOMEM; } - table[index] = __pa(new_table->spt) | PT_PRESENT_MASK - | PT_WRITABLE_MASK | shadow_user_mask; + table[index] = __pa(new_table->spt) + | PT_PRESENT_MASK | PT_WRITABLE_MASK + | shadow_user_mask | shadow_x_mask; } table_addr = table[index] & PT64_BASE_ADDR_MASK; } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index de5f6150f2f7..bfe4db11989c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -42,7 +42,7 @@ module_param(enable_vpid, bool, 0); static int flexpriority_enabled = 1; module_param(flexpriority_enabled, bool, 0); -static int enable_ept; +static int enable_ept = 1; module_param(enable_ept, bool, 0); struct vmcs { @@ -284,6 +284,18 @@ static inline void __invvpid(int ext, u16 vpid, gva_t gva) : : "a"(&operand), "c"(ext) : "cc", "memory"); } +static inline void __invept(int ext, u64 eptp, gpa_t gpa) +{ + struct { + u64 eptp, gpa; + } operand = {eptp, gpa}; + + asm volatile (ASM_VMX_INVEPT + /* CF==1 or ZF==1 --> rc = -1 */ + "; ja 1f ; ud2 ; 1:\n" + : : "a" (&operand), "c" (ext) : "cc", "memory"); +} + static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) { int i; @@ -335,6 +347,33 @@ static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx) __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0); } +static inline void ept_sync_global(void) +{ + if (cpu_has_vmx_invept_global()) + __invept(VMX_EPT_EXTENT_GLOBAL, 0, 0); +} + +static inline void ept_sync_context(u64 eptp) +{ + if (vm_need_ept()) { + if (cpu_has_vmx_invept_context()) + __invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0); + else + ept_sync_global(); + } +} + +static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa) +{ + if (vm_need_ept()) { + if (cpu_has_vmx_invept_individual_addr()) + __invept(VMX_EPT_EXTENT_INDIVIDUAL_ADDR, + eptp, gpa); + else + ept_sync_context(eptp); + } +} + static unsigned long vmcs_readl(unsigned long field) { unsigned long value; @@ -422,6 +461,8 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) eb |= 1u << 1; if (vcpu->arch.rmode.active) eb = ~0; + if (vm_need_ept()) + eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ vmcs_write32(EXCEPTION_BITMAP, eb); } @@ -1352,8 +1393,64 @@ static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK; } +static void ept_load_pdptrs(struct kvm_vcpu *vcpu) +{ + if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { + if (!load_pdptrs(vcpu, vcpu->arch.cr3)) { + printk(KERN_ERR "EPT: Fail to load pdptrs!\n"); + return; + } + vmcs_write64(GUEST_PDPTR0, vcpu->arch.pdptrs[0]); + vmcs_write64(GUEST_PDPTR1, vcpu->arch.pdptrs[1]); + vmcs_write64(GUEST_PDPTR2, vcpu->arch.pdptrs[2]); + vmcs_write64(GUEST_PDPTR3, vcpu->arch.pdptrs[3]); + } +} + +static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); + +static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, + unsigned long cr0, + struct kvm_vcpu *vcpu) +{ + if (!(cr0 & X86_CR0_PG)) { + /* From paging/starting to nonpaging */ + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, + vmcs_config.cpu_based_exec_ctrl | + (CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_CR3_STORE_EXITING)); + vcpu->arch.cr0 = cr0; + vmx_set_cr4(vcpu, vcpu->arch.cr4); + *hw_cr0 |= X86_CR0_PE | X86_CR0_PG; + *hw_cr0 &= ~X86_CR0_WP; + } else if (!is_paging(vcpu)) { + /* From nonpaging to paging */ + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, + vmcs_config.cpu_based_exec_ctrl & + ~(CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_CR3_STORE_EXITING)); + vcpu->arch.cr0 = cr0; + vmx_set_cr4(vcpu, vcpu->arch.cr4); + if (!(vcpu->arch.cr0 & X86_CR0_WP)) + *hw_cr0 &= ~X86_CR0_WP; + } +} + +static void ept_update_paging_mode_cr4(unsigned long *hw_cr4, + struct kvm_vcpu *vcpu) +{ + if (!is_paging(vcpu)) { + *hw_cr4 &= ~X86_CR4_PAE; + *hw_cr4 |= X86_CR4_PSE; + } else if (!(vcpu->arch.cr4 & X86_CR4_PAE)) + *hw_cr4 &= ~X86_CR4_PAE; +} + static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { + unsigned long hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | + KVM_VM_CR0_ALWAYS_ON; + vmx_fpu_deactivate(vcpu); if (vcpu->arch.rmode.active && (cr0 & X86_CR0_PE)) @@ -1371,29 +1468,61 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) } #endif + if (vm_need_ept()) + ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); + vmcs_writel(CR0_READ_SHADOW, cr0); - vmcs_writel(GUEST_CR0, - (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON); + vmcs_writel(GUEST_CR0, hw_cr0); vcpu->arch.cr0 = cr0; if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE)) vmx_fpu_activate(vcpu); } +static u64 construct_eptp(unsigned long root_hpa) +{ + u64 eptp; + + /* TODO write the value reading from MSR */ + eptp = VMX_EPT_DEFAULT_MT | + VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT; + eptp |= (root_hpa & PAGE_MASK); + + return eptp; +} + static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { + unsigned long guest_cr3; + u64 eptp; + + guest_cr3 = cr3; + if (vm_need_ept()) { + eptp = construct_eptp(cr3); + vmcs_write64(EPT_POINTER, eptp); + ept_sync_context(eptp); + ept_load_pdptrs(vcpu); + guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 : + VMX_EPT_IDENTITY_PAGETABLE_ADDR; + } + vmx_flush_tlb(vcpu); - vmcs_writel(GUEST_CR3, cr3); + vmcs_writel(GUEST_CR3, guest_cr3); if (vcpu->arch.cr0 & X86_CR0_PE) vmx_fpu_deactivate(vcpu); } static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { - vmcs_writel(CR4_READ_SHADOW, cr4); - vmcs_writel(GUEST_CR4, cr4 | (vcpu->arch.rmode.active ? - KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON)); + unsigned long hw_cr4 = cr4 | (vcpu->arch.rmode.active ? + KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); + vcpu->arch.cr4 = cr4; + if (vm_need_ept()) + ept_update_paging_mode_cr4(&hw_cr4, vcpu); + + vmcs_writel(CR4_READ_SHADOW, cr4); + vmcs_writel(GUEST_CR4, hw_cr4); } static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) @@ -2116,6 +2245,9 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (intr_info & INTR_INFO_DELIVER_CODE_MASK) error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); if (is_page_fault(intr_info)) { + /* EPT won't cause page fault directly */ + if (vm_need_ept()) + BUG(); cr2 = vmcs_readl(EXIT_QUALIFICATION); KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, (u32)((u64)cr2 >> 32), handler); @@ -2445,6 +2577,64 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return kvm_task_switch(vcpu, tss_selector, reason); } +static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + u64 exit_qualification; + enum emulation_result er; + gpa_t gpa; + unsigned long hva; + int gla_validity; + int r; + + exit_qualification = vmcs_read64(EXIT_QUALIFICATION); + + if (exit_qualification & (1 << 6)) { + printk(KERN_ERR "EPT: GPA exceeds GAW!\n"); + return -ENOTSUPP; + } + + gla_validity = (exit_qualification >> 7) & 0x3; + if (gla_validity != 0x3 && gla_validity != 0x1 && gla_validity != 0) { + printk(KERN_ERR "EPT: Handling EPT violation failed!\n"); + printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n", + (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS), + (long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS)); + printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", + (long unsigned int)exit_qualification); + kvm_run->exit_reason = KVM_EXIT_UNKNOWN; + kvm_run->hw.hardware_exit_reason = 0; + return -ENOTSUPP; + } + + gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); + hva = gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT); + if (!kvm_is_error_hva(hva)) { + r = kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0); + if (r < 0) { + printk(KERN_ERR "EPT: Not enough memory!\n"); + return -ENOMEM; + } + return 1; + } else { + /* must be MMIO */ + er = emulate_instruction(vcpu, kvm_run, 0, 0, 0); + + if (er == EMULATE_FAIL) { + printk(KERN_ERR + "EPT: Fail to handle EPT violation vmexit!er is %d\n", + er); + printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n", + (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS), + (long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS)); + printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", + (long unsigned int)exit_qualification); + return -ENOTSUPP; + } else if (er == EMULATE_DO_MMIO) + return 0; + } + return 1; +} + /* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume. Otherwise they set the kvm_run parameter to indicate what needs @@ -2468,6 +2658,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, [EXIT_REASON_APIC_ACCESS] = handle_apic_access, [EXIT_REASON_WBINVD] = handle_wbinvd, [EXIT_REASON_TASK_SWITCH] = handle_task_switch, + [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, }; static const int kvm_vmx_max_exit_handlers = @@ -2486,6 +2677,13 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)vmcs_readl(GUEST_RIP), (u32)((u64)vmcs_readl(GUEST_RIP) >> 32), entryexit); + /* Access CR3 don't cause VMExit in paging mode, so we need + * to sync with guest real CR3. */ + if (vm_need_ept() && is_paging(vcpu)) { + vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); + ept_load_pdptrs(vcpu); + } + if (unlikely(vmx->fail)) { kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; kvm_run->fail_entry.hardware_entry_failure_reason @@ -2494,7 +2692,8 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) } if ((vectoring_info & VECTORING_INFO_VALID_MASK) && - exit_reason != EXIT_REASON_EXCEPTION_NMI) + (exit_reason != EXIT_REASON_EXCEPTION_NMI && + exit_reason != EXIT_REASON_EPT_VIOLATION)) printk(KERN_WARNING "%s: unexpected, valid vectoring info and " "exit reason is 0x%x\n", __func__, exit_reason); if (exit_reason < kvm_vmx_max_exit_handlers @@ -2796,6 +2995,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) return ERR_PTR(-ENOMEM); allocate_vpid(vmx); + if (id == 0 && vm_need_ept()) { + kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | + VMX_EPT_WRITABLE_MASK | + VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); + kvm_mmu_set_mask_ptes(0ull, VMX_EPT_FAKE_ACCESSED_MASK, + VMX_EPT_FAKE_DIRTY_MASK, 0ull, + VMX_EPT_EXECUTABLE_MASK); + kvm_enable_tdp(); + } err = kvm_vcpu_init(&vmx->vcpu, kvm, id); if (err) @@ -2975,9 +3183,14 @@ static int __init vmx_init(void) vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP); vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP); + if (cpu_has_vmx_ept()) + bypass_guest_pf = 0; + if (bypass_guest_pf) kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); + ept_sync_global(); + return 0; out2: diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h index f97eccc754e8..79d94c610dfe 100644 --- a/arch/x86/kvm/vmx.h +++ b/arch/x86/kvm/vmx.h @@ -353,6 +353,15 @@ enum vmcs_field { #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) #define VMX_EPT_DEFAULT_GAW 3 +#define VMX_EPT_MAX_GAW 0x4 +#define VMX_EPT_MT_EPTE_SHIFT 3 +#define VMX_EPT_GAW_EPTP_SHIFT 3 +#define VMX_EPT_DEFAULT_MT 0x6ull +#define VMX_EPT_READABLE_MASK 0x1ull +#define VMX_EPT_WRITABLE_MASK 0x2ull +#define VMX_EPT_EXECUTABLE_MASK 0x4ull +#define VMX_EPT_FAKE_ACCESSED_MASK (1ull << 62) +#define VMX_EPT_FAKE_DIRTY_MASK (1ull << 63) #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index e24afaa64a4d..4baa9c9e1f41 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -651,6 +651,7 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) #define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4" #define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4" #define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30" +#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08" #define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" #define MSR_IA32_TIME_STAMP_COUNTER 0x010 -- cgit v1.2.3 From 3fe913e7c550a869e250d04c34410f7a6e263f7c Mon Sep 17 00:00:00 2001 From: Izik Eidus Date: Mon, 28 Apr 2008 18:23:52 +0300 Subject: KVM: x86: task switch: fix wrong bit setting for the busy flag The busy bit is bit 1 of the type field, not bit 8. Signed-off-by: Izik Eidus Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1842a86f7c33..017daa2561f4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3484,7 +3484,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) } if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { - cseg_desc.type &= ~(1 << 8); //clear the B flag + cseg_desc.type &= ~(1 << 1); //clear the B flag save_guest_segment_descriptor(vcpu, tr_seg.selector, &cseg_desc); } @@ -3510,7 +3510,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) } if (reason != TASK_SWITCH_IRET) { - nseg_desc.type |= (1 << 8); + nseg_desc.type |= (1 << 1); save_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc); } -- cgit v1.2.3 From 45c5eb67da5a668abe79c23a7e64dbc87a600f90 Mon Sep 17 00:00:00 2001 From: Hollis Blanchard Date: Fri, 25 Apr 2008 17:55:49 -0500 Subject: KVM: ppc: Handle guest idle by emulating MSR[WE] writes This reduces host CPU usage when the guest is idle. However, the guest must set MSR[WE] in its idle loop, which Linux did not do until 2.6.26. Signed-off-by: Hollis Blanchard Signed-off-by: Jerone Young Signed-off-by: Avi Kivity --- arch/powerpc/kvm/booke_guest.c | 1 + arch/powerpc/kvm/powerpc.c | 20 +++++++++++++++++--- include/asm-powerpc/kvm_host.h | 1 + include/asm-powerpc/kvm_ppc.h | 5 +++++ 4 files changed, 24 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/booke_guest.c b/arch/powerpc/kvm/booke_guest.c index 6d9884a6884a..b3db6f4576ad 100644 --- a/arch/powerpc/kvm/booke_guest.c +++ b/arch/powerpc/kvm/booke_guest.c @@ -49,6 +49,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "inst_emu", VCPU_STAT(emulated_inst_exits) }, { "dec", VCPU_STAT(dec_exits) }, { "ext_intr", VCPU_STAT(ext_intr_exits) }, + { "halt_wakeup", VCPU_STAT(halt_wakeup) }, { NULL } }; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index bad40bd2d3ac..777e0f34e0ea 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -36,13 +36,12 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) int kvm_cpu_has_interrupt(struct kvm_vcpu *v) { - /* XXX implement me */ - return 0; + return !!(v->arch.pending_exceptions); } int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { - return 1; + return !(v->arch.msr & MSR_WE); } @@ -214,6 +213,11 @@ static void kvmppc_decrementer_func(unsigned long data) struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER); + + if (waitqueue_active(&vcpu->wq)) { + wake_up_interruptible(&vcpu->wq); + vcpu->stat.halt_wakeup++; + } } int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) @@ -339,6 +343,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) int r; sigset_t sigsaved; + vcpu_load(vcpu); + if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); @@ -363,12 +369,20 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &sigsaved, NULL); + vcpu_put(vcpu); + return r; } int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL); + + if (waitqueue_active(&vcpu->wq)) { + wake_up_interruptible(&vcpu->wq); + vcpu->stat.halt_wakeup++; + } + return 0; } diff --git a/include/asm-powerpc/kvm_host.h b/include/asm-powerpc/kvm_host.h index 04ffbb8e0a35..81a69d711017 100644 --- a/include/asm-powerpc/kvm_host.h +++ b/include/asm-powerpc/kvm_host.h @@ -59,6 +59,7 @@ struct kvm_vcpu_stat { u32 emulated_inst_exits; u32 dec_exits; u32 ext_intr_exits; + u32 halt_wakeup; }; struct tlbe { diff --git a/include/asm-powerpc/kvm_ppc.h b/include/asm-powerpc/kvm_ppc.h index 7ac820308a7e..b35a7e3ef978 100644 --- a/include/asm-powerpc/kvm_ppc.h +++ b/include/asm-powerpc/kvm_ppc.h @@ -77,12 +77,17 @@ static inline void kvmppc_clear_exception(struct kvm_vcpu *vcpu, int exception) clear_bit(priority, &vcpu->arch.pending_exceptions); } +/* Helper function for "full" MSR writes. No need to call this if only EE is + * changing. */ static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) { if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR)) kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR); vcpu->arch.msr = new_msr; + + if (vcpu->arch.msr & MSR_WE) + kvm_vcpu_block(vcpu); } #endif /* __POWERPC_KVM_PPC_H__ */ -- cgit v1.2.3 From de368dceb33c3c068dbde1407aff75cd8e126f04 Mon Sep 17 00:00:00 2001 From: Christian Ehrhardt Date: Tue, 29 Apr 2008 18:18:23 +0200 Subject: KVM: ppc: deliver INTERRUPT_FP_UNAVAIL to the guest This patch adds the delivery of INTERRUPT_FP_UNAVAIL exceptions to the guest. It's needed if a guest uses ppc binaries using the Floating point instructions. Signed-off-by: Christian Ehrhardt Acked-by: Hollis Blanchard Signed-off-by: Avi Kivity --- arch/powerpc/kvm/booke_guest.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/kvm/booke_guest.c b/arch/powerpc/kvm/booke_guest.c index b3db6f4576ad..712d89a28c46 100644 --- a/arch/powerpc/kvm/booke_guest.c +++ b/arch/powerpc/kvm/booke_guest.c @@ -339,6 +339,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, } break; + case BOOKE_INTERRUPT_FP_UNAVAIL: + kvmppc_queue_exception(vcpu, exit_nr); + r = RESUME_GUEST; + break; + case BOOKE_INTERRUPT_DATA_STORAGE: vcpu->arch.dear = vcpu->arch.fault_dear; vcpu->arch.esr = vcpu->arch.fault_esr; -- cgit v1.2.3 From dc7457ea52f88539dc72925360e6068d5c938a0f Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 30 Apr 2008 16:13:36 +0300 Subject: KVM: x86 emulator: disable writeback on lmsw The recent changes allowing memory operands with lmsw and smsw left lmsw with writeback enabled. Since lmsw has no oridinary destination operand, the dst pointer was not initialized, resulting in an oops. Close the hole by disabling writeback for lmsw. Signed-off-by: Avi Kivity --- arch/x86/kvm/x86_emulate.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index 2ca08386f993..f2a696d6a243 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c @@ -1761,6 +1761,7 @@ twobyte_insn: case 6: /* lmsw */ realmode_lmsw(ctxt->vcpu, (u16)c->src.val, &ctxt->eflags); + c->dst.type = OP_NONE; break; case 7: /* invlpg*/ emulate_invlpg(ctxt->vcpu, memop); -- cgit v1.2.3 From ece15babfa514e06118f62f4df2c757d6209f4f0 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Wed, 30 Apr 2008 13:23:54 -0300 Subject: KVM: PIT: support mode 4 The in-kernel PIT emulation ignores pending timers if operating under mode 4, which for example DragonFlyBSD uses (and Plan9 too, apparently). Mode 4 seems to be similar to one-shot mode, other than the fact that it starts counting after the next CLK pulse once programmed, while mode 1 starts counting immediately, so add a FIXME to enhance precision. Fixes sourceforge bug 1952988. Signed-off-by: Marcelo Tosatti Acked-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/kvm/i8254.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 4c943eabacc3..3324d90038e4 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -288,6 +288,8 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val) * mode 1 is one shot, mode 2 is period, otherwise del timer */ switch (ps->channels[0].mode) { case 1: + /* FIXME: enhance mode 4 precision */ + case 4: create_pit_timer(&ps->pit_timer, val, 0); break; case 2: -- cgit v1.2.3 From b4f14abd95cd8d42f08438f1c4ec3eafe41054ee Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 30 Apr 2008 17:59:04 +0200 Subject: KVM: Avoid spurious execeptions after setting registers Clear pending exceptions when setting new register values. This avoids spurious exceptions after restoring a vcpu state or after reset-on-triple-fault. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 017daa2561f4..bc224bba1e87 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3022,6 +3022,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) kvm_x86_ops->decache_regs(vcpu); + vcpu->arch.exception.pending = false; + vcpu_put(vcpu); return 0; -- cgit v1.2.3 From bc1a34f1bf354fabc03e3f465620c80e510d0e8f Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Thu, 1 May 2008 18:43:33 +0200 Subject: KVM: avoid fx_init() schedule in atomic This make sure not to schedule in atomic during fx_init. I also changed the name of fpu_init to fx_finit to avoid duplicating the name with fpu_init that is already used in the kernel, this makes grep simpler if nothing else. Signed-off-by: Andrea Arcangeli Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 11 ++++++++++- include/asm-x86/kvm_host.h | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bc224bba1e87..21338bdb28ff 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3703,10 +3703,19 @@ void fx_init(struct kvm_vcpu *vcpu) { unsigned after_mxcsr_mask; + /* + * Touch the fpu the first time in non atomic context as if + * this is the first fpu instruction the exception handler + * will fire before the instruction returns and it'll have to + * allocate ram with GFP_KERNEL. + */ + if (!used_math()) + fx_save(&vcpu->arch.host_fx_image); + /* Initialize guest FPU by resetting ours and saving into guest's */ preempt_disable(); fx_save(&vcpu->arch.host_fx_image); - fpu_init(); + fx_finit(); fx_save(&vcpu->arch.guest_fx_image); fx_restore(&vcpu->arch.host_fx_image); preempt_enable(); diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index 4baa9c9e1f41..1d8cd01fa514 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -627,7 +627,7 @@ static inline void fx_restore(struct i387_fxsave_struct *image) asm("fxrstor (%0)":: "r" (image)); } -static inline void fpu_init(void) +static inline void fx_finit(void) { asm("finit"); } -- cgit v1.2.3 From 93df766322ba1db2801e4b826985a4932dd75866 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Fri, 2 May 2008 13:23:10 +0300 Subject: KVM: MMU: Allow more than PAGES_PER_HPAGE write protections per large page nonpae guests can call rmap_write_protect twice per page (for page tables) or four times per page (for page directories), triggering a bogus warning. Remove the warning. Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index d9344be36442..36c5406b1813 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -376,7 +376,6 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn) write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); *write_count += 1; - WARN_ON(*write_count > KVM_PAGES_PER_HPAGE); } static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) -- cgit v1.2.3 From b8ba5f10c5956d2b297766fda8f4f5ab8ad1e2cc Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 30 Apr 2008 12:39:05 -0300 Subject: x86: KVM geust: make setup_secondary_clock definition dependent on local apic Since the pv_apic_ops are only present if CONFIG_X86_LOCAL_APIC is compiled in, kvmclock failed to build without this option. This patch fixes this. Signed-off-by: Glauber Costa Signed-off-by: Avi Kivity --- arch/x86/kernel/kvmclock.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index ddee04043aeb..4bc1be5d5472 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -133,6 +133,7 @@ static int kvm_register_clock(void) return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); } +#ifdef CONFIG_X86_LOCAL_APIC static void kvm_setup_secondary_clock(void) { /* @@ -143,6 +144,7 @@ static void kvm_setup_secondary_clock(void) /* ok, done with our trickery, call native */ setup_secondary_APIC_clock(); } +#endif /* * After the clock is registered, the host will keep writing to the @@ -177,7 +179,9 @@ void __init kvmclock_init(void) pv_time_ops.get_wallclock = kvm_get_wallclock; pv_time_ops.set_wallclock = kvm_set_wallclock; pv_time_ops.sched_clock = kvm_clock_read; +#ifdef CONFIG_X86_LOCAL_APIC pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock; +#endif machine_ops.shutdown = kvm_shutdown; #ifdef CONFIG_KEXEC machine_ops.crash_shutdown = kvm_crash_shutdown; -- cgit v1.2.3 From 48b83d2425d7781bb625b1c37b5f2a8963b6e23b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 2 May 2008 21:24:30 +0200 Subject: x86: undo visws/numaq build changes arch/x86/pci/Makefile_32 has a nasty detail. VISWS and NUMAQ build override the generic pci-y rules. This needs a proper cleanup, but that needs more thoughts. Undo commit 895d30935ebe05f192e844792668bf8d19deaae7 x86: numaq fix do not override the existing pci-y rule when adding visws or numaq rules. There is also a stupid init function ordering problem vs. acpi.o Add comments to the Makefile to avoid tripping over this again. Remove the srat stub code in discontig_32.c to allow a proper NUMAQ build. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/mm/discontig_32.c | 26 -------------------------- arch/x86/pci/Makefile_32 | 12 ++++++++++-- 2 files changed, 10 insertions(+), 28 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index 18378850e25a..914ccf983687 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c @@ -476,29 +476,3 @@ int memory_add_physaddr_to_nid(u64 addr) EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif - -#ifndef CONFIG_HAVE_ARCH_PARSE_SRAT -/* - * XXX FIXME: Make SLIT table parsing available to 32-bit NUMA - * - * These stub functions are needed to compile 32-bit NUMA when SRAT is - * not set. There are functions in srat_64.c for parsing this table - * and it may be possible to make them common functions. - */ -void acpi_numa_slit_init (struct acpi_table_slit *slit) -{ - printk(KERN_INFO "ACPI: No support for parsing SLIT table\n"); -} - -void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa) -{ -} - -void acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma) -{ -} - -void acpi_numa_arch_fixup(void) -{ -} -#endif /* CONFIG_HAVE_ARCH_PARSE_SRAT */ diff --git a/arch/x86/pci/Makefile_32 b/arch/x86/pci/Makefile_32 index 7fa519868d70..89ec35d00efd 100644 --- a/arch/x86/pci/Makefile_32 +++ b/arch/x86/pci/Makefile_32 @@ -6,11 +6,19 @@ obj-$(CONFIG_PCI_DIRECT) += direct.o obj-$(CONFIG_PCI_OLPC) += olpc.o pci-y := fixup.o + +# Do not change the ordering here. There is a nasty init function +# ordering dependency which breaks when you move acpi.o below +# legacy/irq.o pci-$(CONFIG_ACPI) += acpi.o pci-y += legacy.o irq.o -pci-$(CONFIG_X86_VISWS) += visws.o fixup.o -pci-$(CONFIG_X86_NUMAQ) += numa.o irq.o +# Careful: VISWS and NUMAQ overrule the pci-y above. The colons are +# therefor correct. This needs a proper fix by distangling the code. +pci-$(CONFIG_X86_VISWS) := visws.o fixup.o +pci-$(CONFIG_X86_NUMAQ) := numa.o irq.o + +# Necessary for NUMAQ as well pci-$(CONFIG_NUMA) += mp_bus_to_node.o obj-y += $(pci-y) common.o early.o -- cgit v1.2.3 From 22eecde2f9034764a3fd095eecfa3adfb8ec9a98 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 1 May 2008 12:06:54 +0200 Subject: uml: fix gcc problem this is what caused gcc 4.3 to throw an internal error when OPTIMIZE_INLINING was enabled ... Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/um/Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/um/Makefile b/arch/um/Makefile index dbeab15e7bb7..01b97c19a8ba 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -77,7 +77,10 @@ include $(srctree)/$(ARCH_DIR)/Makefile-os-$(OS) KERNEL_DEFINES = $(strip -Derrno=kernel_errno -Dsigprocmask=kernel_sigprocmask \ -Dmktime=kernel_mktime $(ARCH_KERNEL_DEFINES)) KBUILD_CFLAGS += $(KERNEL_DEFINES) -KBUILD_CFLAGS += $(call cc-option,-fno-unit-at-a-time,) +# Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use +# a lot more stack due to the lack of sharing of stacklots: +KBUILD_CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then \ + echo $(call cc-option,-fno-unit-at-a-time); fi ;) PHONY += linux -- cgit v1.2.3 From 4c6214c75a5aca5417156a47cd890b128c5f0637 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Thu, 1 May 2008 11:31:07 +0200 Subject: kbuild, suspend, x86: fix rebuild of wakeup.bin In kernel/acpi/realmode/Makefile use the 'always' variable to say that wakeup.bin should always be made. In acpi/Makefile we then do not need to specify the requested target and we avoid the message from make: `arch/x86/kernel/acpi/realmode/wakeup.bin' is up to date. Add wakeup.lds to list af targets to avoid rebuilding wakeup.bin - from Roland McGrath. Signed-off-by: Sam Ravnborg Cc: Rafael J. Wysocki Cc: Pavel Machek Cc: H. Peter Anvin Cc: Roland McGrath Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/Makefile | 2 +- arch/x86/kernel/acpi/realmode/Makefile | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile index 7335959b6aff..fd5ca97a2ad5 100644 --- a/arch/x86/kernel/acpi/Makefile +++ b/arch/x86/kernel/acpi/Makefile @@ -10,5 +10,5 @@ endif $(obj)/wakeup_rm.o: $(obj)/realmode/wakeup.bin $(obj)/realmode/wakeup.bin: FORCE - $(Q)$(MAKE) $(build)=$(obj)/realmode $@ + $(Q)$(MAKE) $(build)=$(obj)/realmode diff --git a/arch/x86/kernel/acpi/realmode/Makefile b/arch/x86/kernel/acpi/realmode/Makefile index 092900854acc..1c31cc0e9def 100644 --- a/arch/x86/kernel/acpi/realmode/Makefile +++ b/arch/x86/kernel/acpi/realmode/Makefile @@ -6,7 +6,8 @@ # for more details. # -targets := wakeup.bin wakeup.elf +always := wakeup.bin +targets := wakeup.elf wakeup.lds wakeup-y += wakeup.o wakemain.o video-mode.o copy.o @@ -48,7 +49,7 @@ LDFLAGS_wakeup.elf := -T CPPFLAGS_wakeup.lds += -P -C -$(obj)/wakeup.elf: $(src)/wakeup.lds $(WAKEUP_OBJS) FORCE +$(obj)/wakeup.elf: $(obj)/wakeup.lds $(WAKEUP_OBJS) FORCE $(call if_changed,ld) OBJCOPYFLAGS_wakeup.bin := -O binary -- cgit v1.2.3 From dbe55f4797712f86691a0ee0b5f508693c7310fe Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 22 Apr 2008 01:50:26 +0300 Subject: x86: make start_secondary() static start_secondary() needlessly became global. Signed-off-by: Adrian Bunk Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/smpboot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 84241a256dc8..b78e4d47a42b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -299,7 +299,7 @@ static void __cpuinit smp_callin(void) /* * Activate a secondary processor. */ -void __cpuinit start_secondary(void *unused) +static void __cpuinit start_secondary(void *unused) { /* * Don't put *anything* before cpu_init(), SMP booting is too -- cgit v1.2.3 From c5562faeaacf19e81a78ee37cc6b96ab1f3e68e4 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 22 Apr 2008 00:31:37 +0300 Subject: x86: make additional_cpus static This patch makes the needlessly global additional_cpus static. Signed-off-by: Adrian Bunk Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/smpboot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index b78e4d47a42b..6b087ab6cd8f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1306,7 +1306,7 @@ static void remove_siblinginfo(int cpu) cpu_clear(cpu, cpu_sibling_setup_map); } -int additional_cpus __initdata = -1; +static int additional_cpus __initdata = -1; static __init int setup_additional_cpus(char *s) { -- cgit v1.2.3 From e37ee42caadab46cec277546099fa2a6207fff0b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 3 May 2008 22:01:31 +0200 Subject: x86: es7000 build fix Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/mpparse.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 3e2c54dc8b29..404683b94e79 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -794,6 +794,11 @@ void __init find_smp_config(void) ACPI-based MP Configuration -------------------------------------------------------------------------- */ +/* + * Keep this outside and initialized to 0, for !CONFIG_ACPI builds: + */ +int es7000_plat; + #ifdef CONFIG_ACPI #ifdef CONFIG_X86_IO_APIC @@ -909,8 +914,6 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) MP_intsrc_info(&intsrc); } -int es7000_plat; - void __init mp_config_acpi_legacy_irqs(void) { struct mpc_config_intsrc intsrc; -- cgit v1.2.3 From 163ea310b68bdde89b1ac633fbf8c0db290d3f86 Mon Sep 17 00:00:00 2001 From: Ben Date: Sat, 3 May 2008 22:39:42 +0200 Subject: x86: remove dell reboot dmi quirk board name match http://bugzilla.kernel.org/show_bug.cgi?id=10547 Newer Dell OptiPlex 745s hang before rebooting after 'sudo reboot'. A patch for some versions of the OptiPlex was proposed here -- http://lkml.org/lkml/2007/6/5/59 -- and is included in 2.6.23 and later kernels, according to http://lxr.linux.no/linux+v2.6.23/arch/i386/kernel/reboot.c . However, the DMI_BOARD_NAME ("0WF810") is too restrictive. Newer OptiPlex machines have a DMI_BOARD_NAME of "0RF703". I therefore suggest adding another clause to reboot.c, similar to the one in the original patch, but matching a DMI_BOARD_NAME of "0RF703". On further inspection, it seems that there are other DMI_BOARD_NAMEs for this same machine. They seem to change from time to time, which means that the current code is fragile. Moreover, using bios reboot should not break non-SFF OptiPlex 745s, and so a reasonable fix is to simply drop the match on DMI_BOARD_NAME. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 07c6d42ab5ff..f6be7d5f82f8 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -149,7 +149,6 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"), - DMI_MATCH(DMI_BOARD_NAME, "0WF810"), }, }, { /* Handle problems with rebooting on Dell Optiplex 745's DFF*/ -- cgit v1.2.3 From ecb783eae1372d69a53d406e1bdba8284e4bafcc Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 3 May 2008 14:18:01 +0400 Subject: x86: vdso ELF handling - use SELFMAG instead of numeric constant Signed-off-by: Cyrill Gorcunov Cc: akpm@linux-foundation.org Cc: hpa@zytor.com Cc: mingo@elte.hu Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/vdso/vdso32-setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 4dceeb1fc5e0..cf058fecfcee 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c @@ -162,7 +162,7 @@ static __init void relocate_vdso(Elf32_Ehdr *ehdr) Elf32_Shdr *shdr; int i; - BUG_ON(memcmp(ehdr->e_ident, ELFMAG, 4) != 0 || + BUG_ON(memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0 || !elf_check_arch_ia32(ehdr) || ehdr->e_type != ET_DYN); -- cgit v1.2.3 From 8bd1796dedd50abd7553afbe6113bd97cc88390f Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 3 May 2008 14:18:03 +0400 Subject: x86: relocs ELF handling - use SELFMAG instead of numeric constant Signed-off-by: Cyrill Gorcunov Cc: akpm@linux-foundation.org Cc: hpa@zytor.com Cc: mingo@elte.hu Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/relocs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c index d01ea42187e6..edaadea90aaf 100644 --- a/arch/x86/boot/compressed/relocs.c +++ b/arch/x86/boot/compressed/relocs.c @@ -191,7 +191,7 @@ static void read_ehdr(FILE *fp) die("Cannot read ELF header: %s\n", strerror(errno)); } - if (memcmp(ehdr.e_ident, ELFMAG, 4) != 0) { + if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0) { die("No ELF magic\n"); } if (ehdr.e_ident[EI_CLASS] != ELFCLASS32) { -- cgit v1.2.3 From 7b04fa014c11e6415da8b5a7999dbd201abad53c Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Fri, 2 May 2008 13:32:32 -0700 Subject: x86: video/fbdev.c: add MODULE_LICENSE Add the missing MODULE_LICENSE("GPL"). Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/video/fbdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/video/fbdev.c b/arch/x86/video/fbdev.c index 4db42bff8c60..69527688f794 100644 --- a/arch/x86/video/fbdev.c +++ b/arch/x86/video/fbdev.c @@ -1,5 +1,4 @@ /* - * * Copyright (C) 2007 Antonino Daplas * * This file is subject to the terms and conditions of the GNU General Public @@ -29,3 +28,4 @@ int fb_is_primary_device(struct fb_info *info) return retval; } EXPORT_SYMBOL(fb_is_primary_device); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From e26a28d190304d910ee49b81cbfe6d9241f56e86 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 3 May 2008 23:49:59 +0200 Subject: x86: olpc build fix CONFIG_OLPC needs to depend on MGEODE_LX Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c3f880902d66..845ea2b2d487 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1661,6 +1661,7 @@ config GEODE_MFGPT_TIMER config OLPC bool "One Laptop Per Child support" + depends on MGEODE_LX default n help Add support for detecting the unique features of the OLPC -- cgit v1.2.3 From 62179849b40aded9e727cca5006627a1c4d6446e Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 2 May 2008 13:32:35 -0700 Subject: x86: fix setup printk format warning Fix x86 setup printk format warming: next-20080430/arch/x86/kernel/setup.c:172: warning: format '%lu' expects type 'long unsigned int', but argument 2 has type 'ssize_t' Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Cc: mingo@elte.hu Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index c0c68c18a788..cc6f5eb20b24 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -95,7 +95,7 @@ void __init setup_per_cpu_areas(void) /* Copy section for each CPU (we discard the original) */ size = PERCPU_ENOUGH_ROOM; - printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", + printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", size); for_each_possible_cpu(i) { -- cgit v1.2.3 From b41e5fffe8b81fc939067d8c1c195cc79115d5a3 Mon Sep 17 00:00:00 2001 From: Emil Medve Date: Sat, 3 May 2008 06:34:04 +1000 Subject: [POWERPC] devres: Add devm_ioremap_prot() We provide an ioremap_flags, so this provides a corresponding devm_ioremap_prot. The slight name difference is at Ben Herrenschmidt's request as he plans on changing ioremap_flags to ioremap_prot in the future. Signed-off-by: Emil Medve Signed-off-by: Kumar Gala Acked-by: Tejun Heo Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Paul Mackerras --- arch/powerpc/lib/Makefile | 1 + arch/powerpc/lib/devres.c | 42 ++++++++++++++++++++++++++++++++++++++++++ include/asm-powerpc/io.h | 8 +++++++- include/linux/io.h | 1 + lib/devres.c | 2 +- 5 files changed, 52 insertions(+), 2 deletions(-) create mode 100644 arch/powerpc/lib/devres.c (limited to 'arch') diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 4bb023f4c869..f1d2cdc5331b 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -23,3 +23,4 @@ obj-$(CONFIG_SMP) += locks.o endif obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o +obj-$(CONFIG_HAS_IOMEM) += devres.o diff --git a/arch/powerpc/lib/devres.c b/arch/powerpc/lib/devres.c new file mode 100644 index 000000000000..292115d98ea9 --- /dev/null +++ b/arch/powerpc/lib/devres.c @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2008 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include /* devres_*(), devm_ioremap_release() */ +#include /* ioremap_flags() */ +#include /* EXPORT_SYMBOL() */ + +/** + * devm_ioremap_prot - Managed ioremap_flags() + * @dev: Generic device to remap IO address for + * @offset: BUS offset to map + * @size: Size of map + * @flags: Page flags + * + * Managed ioremap_prot(). Map is automatically unmapped on driver + * detach. + */ +void __iomem *devm_ioremap_prot(struct device *dev, resource_size_t offset, + size_t size, unsigned long flags) +{ + void __iomem **ptr, *addr; + + ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return NULL; + + addr = ioremap_flags(offset, size, flags); + if (addr) { + *ptr = addr; + devres_add(dev, ptr); + } else + devres_free(ptr); + + return addr; +} +EXPORT_SYMBOL(devm_ioremap_prot); diff --git a/include/asm-powerpc/io.h b/include/asm-powerpc/io.h index afae0697e8ce..e0062d73db1c 100644 --- a/include/asm-powerpc/io.h +++ b/include/asm-powerpc/io.h @@ -2,7 +2,7 @@ #define _ASM_POWERPC_IO_H #ifdef __KERNEL__ -/* +/* * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -18,6 +18,9 @@ extern int check_legacy_ioport(unsigned long base_port); #define _PNPWRP 0xa79 #define PNPBIOS_BASE 0xf000 +#include +#include + #include #include #include @@ -744,6 +747,9 @@ static inline void * bus_to_virt(unsigned long address) #define clrsetbits_8(addr, clear, set) clrsetbits(8, addr, clear, set) +void __iomem *devm_ioremap_prot(struct device *dev, resource_size_t offset, + size_t size, unsigned long flags); + #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_IO_H */ diff --git a/include/linux/io.h b/include/linux/io.h index 3a03a3604cce..6c7f0ba0d5fa 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -65,5 +65,6 @@ void __iomem *devm_ioremap_nocache(struct device *dev, resource_size_t offset, void devm_iounmap(struct device *dev, void __iomem *addr); int check_signature(const volatile void __iomem *io_addr, const unsigned char *signature, int length); +void devm_ioremap_release(struct device *dev, void *res); #endif /* _LINUX_IO_H */ diff --git a/lib/devres.c b/lib/devres.c index 26c87c49d776..72c8909006da 100644 --- a/lib/devres.c +++ b/lib/devres.c @@ -2,7 +2,7 @@ #include #include -static void devm_ioremap_release(struct device *dev, void *res) +void devm_ioremap_release(struct device *dev, void *res) { iounmap(*(void __iomem **)res); } -- cgit v1.2.3 From 9185ef6787f1c8f1c06aa0cb3c7746fb4f101f50 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Sat, 3 May 2008 06:34:05 +1000 Subject: [POWERPC] Assign PDE->data before gluing PDE into /proc tree Simply replace proc_create and further data assigned with proc_create_data. No need to check for data!=NULL after that. Signed-off-by: Denis V. Lunev Cc: Alexey Dobriyan Cc: Eric W. Biederman Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/pseries/scanlog.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/scanlog.c b/arch/powerpc/platforms/pseries/scanlog.c index bec3803f0618..417eca79df69 100644 --- a/arch/powerpc/platforms/pseries/scanlog.c +++ b/arch/powerpc/platforms/pseries/scanlog.c @@ -55,11 +55,6 @@ static ssize_t scanlog_read(struct file *file, char __user *buf, dp = PDE(inode); data = (unsigned int *)dp->data; - if (!data) { - printk(KERN_ERR "scanlog: read failed no data\n"); - return -EIO; - } - if (count > RTAS_DATA_BUF_SIZE) count = RTAS_DATA_BUF_SIZE; @@ -146,11 +141,6 @@ static int scanlog_open(struct inode * inode, struct file * file) struct proc_dir_entry *dp = PDE(inode); unsigned int *data = (unsigned int *)dp->data; - if (!data) { - printk(KERN_ERR "scanlog: open failed no data\n"); - return -EIO; - } - if (data[0] != 0) { /* This imperfect test stops a second copy of the * data (or a reset while data is being copied) @@ -168,10 +158,6 @@ static int scanlog_release(struct inode * inode, struct file * file) struct proc_dir_entry *dp = PDE(inode); unsigned int *data = (unsigned int *)dp->data; - if (!data) { - printk(KERN_ERR "scanlog: release failed no data\n"); - return -EIO; - } data[0] = 0; return 0; @@ -200,12 +186,11 @@ static int __init scanlog_init(void) if (!data) goto err; - ent = proc_create("ppc64/rtas/scan-log-dump", S_IRUSR, NULL, - &scanlog_fops); + ent = proc_create_data("ppc64/rtas/scan-log-dump", S_IRUSR, NULL, + &scanlog_fops, data); if (!ent) goto err; - ent->data = data; proc_ppc64_scan_log_dump = ent; return 0; -- cgit v1.2.3 From 13a6ddb08e58a1bd344da7898c4e2f13bdf18c2f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 27 Mar 2008 01:31:18 -0700 Subject: x86/pci: add pci=skip_isa_align command lines. so we don't align the io port start address for pci cards. also move out dmi check out acpi.c, because it has nothing to do with acpi. it could spare some calling when we have several peer root buses. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Signed-off-by: Jesse Barnes --- Documentation/kernel-parameters.txt | 2 ++ arch/x86/pci/acpi.c | 41 -------------------------------- arch/x86/pci/common.c | 47 +++++++++++++++++++++++++++++++++++++ arch/x86/pci/init.c | 2 ++ arch/x86/pci/pci.h | 2 ++ 5 files changed, 53 insertions(+), 41 deletions(-) (limited to 'arch') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 3ce193f86565..15d8216a9345 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1518,6 +1518,8 @@ and is between 256 and 4096 characters. It is defined in the file This is normally done in pci_enable_device(), so this option is a temporary workaround for broken drivers that don't call it. + skip_isa_align [X86] do not align io start addr, so can + handle more pci cards firmware [ARM] Do not re-enumerate the bus but instead just use the configuration from the bootloader. This is currently used on diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 1a9c0c6a1a18..d95de2f199cd 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -6,45 +6,6 @@ #include #include "pci.h" -static int __devinit can_skip_ioresource_align(const struct dmi_system_id *d) -{ - pci_probe |= PCI_CAN_SKIP_ISA_ALIGN; - printk(KERN_INFO "PCI: %s detected, can skip ISA alignment\n", d->ident); - return 0; -} - -static struct dmi_system_id acpi_pciprobe_dmi_table[] __devinitdata = { -/* - * Systems where PCI IO resource ISA alignment can be skipped - * when the ISA enable bit in the bridge control is not set - */ - { - .callback = can_skip_ioresource_align, - .ident = "IBM System x3800", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "IBM"), - DMI_MATCH(DMI_PRODUCT_NAME, "x3800"), - }, - }, - { - .callback = can_skip_ioresource_align, - .ident = "IBM System x3850", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "IBM"), - DMI_MATCH(DMI_PRODUCT_NAME, "x3850"), - }, - }, - { - .callback = can_skip_ioresource_align, - .ident = "IBM System x3950", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "IBM"), - DMI_MATCH(DMI_PRODUCT_NAME, "x3950"), - }, - }, - {} -}; - struct pci_root_info { char *name; unsigned int res_num; @@ -196,8 +157,6 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do int pxm; #endif - dmi_check_system(acpi_pciprobe_dmi_table); - if (domain && !pci_domains_supported) { printk(KERN_WARNING "PCI: Multiple domains not supported " "(dom %d, bus %d)\n", domain, busnum); diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 88b5416cf009..a6d27797ef47 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -90,6 +90,50 @@ static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev) rom_r->start = rom_r->end = rom_r->flags = 0; } +static int __devinit can_skip_ioresource_align(const struct dmi_system_id *d) +{ + pci_probe |= PCI_CAN_SKIP_ISA_ALIGN; + printk(KERN_INFO "PCI: %s detected, can skip ISA alignment\n", d->ident); + return 0; +} + +static struct dmi_system_id can_skip_pciprobe_dmi_table[] __devinitdata = { +/* + * Systems where PCI IO resource ISA alignment can be skipped + * when the ISA enable bit in the bridge control is not set + */ + { + .callback = can_skip_ioresource_align, + .ident = "IBM System x3800", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "IBM"), + DMI_MATCH(DMI_PRODUCT_NAME, "x3800"), + }, + }, + { + .callback = can_skip_ioresource_align, + .ident = "IBM System x3850", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "IBM"), + DMI_MATCH(DMI_PRODUCT_NAME, "x3850"), + }, + }, + { + .callback = can_skip_ioresource_align, + .ident = "IBM System x3950", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "IBM"), + DMI_MATCH(DMI_PRODUCT_NAME, "x3950"), + }, + }, + {} +}; + +void __init dmi_check_skip_isa_align(void) +{ + dmi_check_system(can_skip_pciprobe_dmi_table); +} + /* * Called after each bus is probed, but before its children * are examined. @@ -462,6 +506,9 @@ char * __devinit pcibios_setup(char *str) } else if (!strcmp(str, "routeirq")) { pci_routeirq = 1; return NULL; + } else if (!strcmp(str, "skip_isa_align")) { + pci_probe |= PCI_CAN_SKIP_ISA_ALIGN; + return NULL; } return str; } diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c index dd30c6076b5d..b394b2a4b912 100644 --- a/arch/x86/pci/init.c +++ b/arch/x86/pci/init.c @@ -33,6 +33,8 @@ static __init int pci_access_init(void) printk(KERN_ERR "PCI: Fatal: No config space access function found\n"); + dmi_check_skip_isa_align(); + return 0; } arch_initcall(pci_access_init); diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h index c58805a92db5..101982027881 100644 --- a/arch/x86/pci/pci.h +++ b/arch/x86/pci/pci.h @@ -38,6 +38,8 @@ enum pci_bf_sort_state { pci_dmi_bf, }; +extern void __init dmi_check_skip_isa_align(void); + /* pci-i386.c */ extern unsigned int pcibios_max_latency; -- cgit v1.2.3 From 0df18ff366853cdf31e5238764ec5c63e6b5a398 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 14 Apr 2008 15:40:37 -0700 Subject: x86 PCI: call dmi_check_pciprobe() this change: | commit 08f1c192c3c32797068bfe97738babb3295bbf42 | Author: Muli Ben-Yehuda | Date: Sun Jul 22 00:23:39 2007 +0300 | | x86-64: introduce struct pci_sysdata to facilitate sharing of ->sysdata | | This patch introduces struct pci_sysdata to x86 and x86-64, and | converts the existing two users (NUMA, Calgary) to use it. | | This lays the groundwork for having other users of sysdata, such as | the PCI domains work. | | The Calgary bits are tested, the NUMA bits just look ok. replaces pcibios_scan_root with pci_scan_bus_parented... but in pcibios_scan_root we have a DMI check: dmi_check_system(pciprobe_dmi_table); when when have several peer root buses this could be called multiple times (which is bad), so move that call to pci_access_init(). Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Signed-off-by: Jesse Barnes --- arch/x86/pci/common.c | 7 +++++-- arch/x86/pci/init.c | 2 ++ arch/x86/pci/pci.h | 1 + 3 files changed, 8 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index a6d27797ef47..bfa72a9475b3 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -362,13 +362,16 @@ static struct dmi_system_id __devinitdata pciprobe_dmi_table[] = { {} }; +void __init dmi_check_pciprobe(void) +{ + dmi_check_system(pciprobe_dmi_table); +} + struct pci_bus * __devinit pcibios_scan_root(int busnum) { struct pci_bus *bus = NULL; struct pci_sysdata *sd; - dmi_check_system(pciprobe_dmi_table); - while ((bus = pci_find_next_bus(bus)) != NULL) { if (bus->number == busnum) { /* Already scanned */ diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c index b394b2a4b912..e70b9c57b88e 100644 --- a/arch/x86/pci/init.c +++ b/arch/x86/pci/init.c @@ -33,6 +33,8 @@ static __init int pci_access_init(void) printk(KERN_ERR "PCI: Fatal: No config space access function found\n"); + dmi_check_pciprobe(); + dmi_check_skip_isa_align(); return 0; diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h index 101982027881..f3972b12c60a 100644 --- a/arch/x86/pci/pci.h +++ b/arch/x86/pci/pci.h @@ -38,6 +38,7 @@ enum pci_bf_sort_state { pci_dmi_bf, }; +extern void __init dmi_check_pciprobe(void); extern void __init dmi_check_skip_isa_align(void); /* pci-i386.c */ -- cgit v1.2.3 From 8376005ea471762e7a5957d5b9e788121c0ba726 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 5 May 2008 12:32:39 -0700 Subject: sparc64: use compat_sys_utimes instead of home-grown local copy. Noticed by Christoph Hellwig. Signed-off-by: David S. Miller --- arch/sparc64/kernel/sys_sparc32.c | 31 ------------------------------- arch/sparc64/kernel/systbls.S | 2 +- 2 files changed, 1 insertion(+), 32 deletions(-) (limited to 'arch') diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c index 161ce4710fe7..1aa4288125f2 100644 --- a/arch/sparc64/kernel/sys_sparc32.c +++ b/arch/sparc64/kernel/sys_sparc32.c @@ -236,13 +236,6 @@ asmlinkage long sys32_getegid16(void) /* 32-bit timeval and related flotsam. */ -static long get_tv32(struct timeval *o, struct compat_timeval __user *i) -{ - return (!access_ok(VERIFY_READ, i, sizeof(*i)) || - (__get_user(o->tv_sec, &i->tv_sec) | - __get_user(o->tv_usec, &i->tv_usec))); -} - static inline long put_tv32(struct compat_timeval __user *o, struct timeval *i) { return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) || @@ -757,30 +750,6 @@ asmlinkage long sys32_settimeofday(struct compat_timeval __user *tv, return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); } -asmlinkage long sys32_utimes(char __user *filename, - struct compat_timeval __user *tvs) -{ - struct timespec tv[2]; - - if (tvs) { - struct timeval ktvs[2]; - if (get_tv32(&ktvs[0], tvs) || - get_tv32(&ktvs[1], 1+tvs)) - return -EFAULT; - - if (ktvs[0].tv_usec < 0 || ktvs[0].tv_usec >= 1000000 || - ktvs[1].tv_usec < 0 || ktvs[1].tv_usec >= 1000000) - return -EINVAL; - - tv[0].tv_sec = ktvs[0].tv_sec; - tv[0].tv_nsec = 1000 * ktvs[0].tv_usec; - tv[1].tv_sec = ktvs[1].tv_sec; - tv[1].tv_nsec = 1000 * ktvs[1].tv_usec; - } - - return do_utimes(AT_FDCWD, filename, tvs ? tv : NULL, 0); -} - /* These are here just in case some old sparc32 binary calls it. */ asmlinkage long sys32_pause(void) { diff --git a/arch/sparc64/kernel/systbls.S b/arch/sparc64/kernel/systbls.S index a4fef2ba1ae1..8b5282d433c4 100644 --- a/arch/sparc64/kernel/systbls.S +++ b/arch/sparc64/kernel/systbls.S @@ -45,7 +45,7 @@ sys_call_table32: /*120*/ .word compat_sys_readv, compat_sys_writev, sys32_settimeofday, sys32_fchown16, sys_fchmod .word sys_nis_syscall, sys32_setreuid16, sys32_setregid16, sys_rename, sys_truncate /*130*/ .word sys_ftruncate, sys_flock, compat_sys_lstat64, sys_nis_syscall, sys_nis_syscall - .word sys_nis_syscall, sys32_mkdir, sys_rmdir, sys32_utimes, compat_sys_stat64 + .word sys_nis_syscall, sys32_mkdir, sys_rmdir, compat_sys_utimes, compat_sys_stat64 /*140*/ .word sys32_sendfile64, sys_nis_syscall, sys32_futex, sys_gettid, compat_sys_getrlimit .word compat_sys_setrlimit, sys_pivot_root, sys32_prctl, sys_pciconfig_read, sys_pciconfig_write /*150*/ .word sys_nis_syscall, sys_inotify_init, sys_inotify_add_watch, sys_poll, sys_getdents64 -- cgit v1.2.3 From 3b17f136bf32984eb0faeb116bcd44ffe3503782 Mon Sep 17 00:00:00 2001 From: Roman Zippel Date: Mon, 5 May 2008 21:25:48 +0200 Subject: m68k: Handle 68040 bus faults Fix 68040 bus fault handling, so the standard kernel exception handling can be used for i/o probing. Contrary to normal access faults there is nothing to fix, but at least we have to disable writebacks to avoid recursive faults. Signed-off-by: Roman Zippel Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Torvalds --- arch/m68k/kernel/traps.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c index fd4858e2dd63..75b8340b254b 100644 --- a/arch/m68k/kernel/traps.c +++ b/arch/m68k/kernel/traps.c @@ -468,15 +468,26 @@ static inline void access_error040(struct frame *fp) * (if do_page_fault didn't fix the mapping, * the writeback won't do good) */ +disable_wb: #ifdef DEBUG printk(".. disabling wb2\n"); #endif if (fp->un.fmt7.wb2a == fp->un.fmt7.faddr) fp->un.fmt7.wb2s &= ~WBV_040; + if (fp->un.fmt7.wb3a == fp->un.fmt7.faddr) + fp->un.fmt7.wb3s &= ~WBV_040; } - } else if (send_fault_sig(&fp->ptregs) > 0) { - printk("68040 access error, ssw=%x\n", ssw); - trap_c(fp); + } else { + /* In case of a bus error we either kill the process or expect + * the kernel to catch the fault, which then is also responsible + * for cleaning up the mess. + */ + current->thread.signo = SIGBUS; + current->thread.faddr = fp->un.fmt7.faddr; + if (send_fault_sig(&fp->ptregs) >= 0) + printk("68040 bus error (ssw=%x, faddr=%lx)\n", ssw, + fp->un.fmt7.faddr); + goto disable_wb; } do_040writebacks(fp); -- cgit v1.2.3 From b6d9d267f0d68104df910fca89149803aec82426 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Mon, 5 May 2008 21:26:15 +0200 Subject: m68k: remove old mac_esp cruft Remove the rest of the old mac_esp driver. Also ditch the rest of the machw mechanism, it needs to be replaced by a fake openfirmware tree. Signed-off-by: Finn Thain Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 3 --- arch/m68k/mac/config.c | 24 ------------------------ include/asm-m68k/machw.h | 30 ------------------------------ 3 files changed, 57 deletions(-) (limited to 'arch') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index a3c35446e755..b2dd3dc32fd3 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1094,9 +1094,6 @@ and is between 256 and 4096 characters. It is defined in the file mac5380= [HW,SCSI] Format: ,,,, - mac53c9x= [HW,SCSI] Format: - ,,,,,,, - machvec= [IA64] Force the use of a particular machine-vector (machvec) in a generic kernel. Example: machvec=hpzx1_swiotlb diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c index 735a49b4b936..ad3e3bacae39 100644 --- a/arch/m68k/mac/config.c +++ b/arch/m68k/mac/config.c @@ -48,9 +48,6 @@ struct mac_booter_data mac_bi_data; int mac_bisize = sizeof mac_bi_data; -struct mac_hw_present mac_hw_present; -EXPORT_SYMBOL(mac_hw_present); - /* New m68k bootinfo stuff and videobase */ extern int m68k_num_memory; @@ -817,27 +814,6 @@ void __init mac_identify(void) m68k_ramdisk.addr, m68k_ramdisk.size); #endif - /* - * TODO: set the various fields in macintosh_config->hw_present here! - */ - switch (macintosh_config->scsi_type) { - case MAC_SCSI_OLD: - MACHW_SET(MAC_SCSI_80); - break; - case MAC_SCSI_QUADRA: - case MAC_SCSI_QUADRA2: - case MAC_SCSI_QUADRA3: - MACHW_SET(MAC_SCSI_96); - if ((macintosh_config->ident == MAC_MODEL_Q900) || - (macintosh_config->ident == MAC_MODEL_Q950)) - MACHW_SET(MAC_SCSI_96_2); - break; - default: - printk(KERN_WARNING "config.c: wtf: unknown scsi, using 53c80\n"); - MACHW_SET(MAC_SCSI_80); - break; - } - iop_init(); via_init(); oss_init(); diff --git a/include/asm-m68k/machw.h b/include/asm-m68k/machw.h index d2e0e25d5c90..35624998291c 100644 --- a/include/asm-m68k/machw.h +++ b/include/asm-m68k/machw.h @@ -66,36 +66,6 @@ struct MAC_SCC # define mac_scc ((*(volatile struct SCC*)MAC_SCC_BAS)) #endif -/* hardware stuff */ - -#define MACHW_DECLARE(name) unsigned name : 1 -#define MACHW_SET(name) (mac_hw_present.name = 1) -#define MACHW_PRESENT(name) (mac_hw_present.name) - -struct mac_hw_present { - /* video hardware */ - /* sound hardware */ - /* disk storage interfaces */ - MACHW_DECLARE(MAC_SCSI_80); /* Directly mapped NCR5380 */ - MACHW_DECLARE(MAC_SCSI_96); /* 53c9[46] */ - MACHW_DECLARE(MAC_SCSI_96_2); /* 2nd 53c9[46] Q900 and Q950 */ - MACHW_DECLARE(IDE); /* IDE Interface */ - /* other I/O hardware */ - MACHW_DECLARE(SCC); /* Serial Communications Contr. */ - /* DMA */ - MACHW_DECLARE(SCSI_DMA); /* DMA for the NCR5380 */ - /* real time clocks */ - MACHW_DECLARE(RTC_CLK); /* clock chip */ - /* supporting hardware */ - MACHW_DECLARE(VIA1); /* Versatile Interface Ad. 1 */ - MACHW_DECLARE(VIA2); /* Versatile Interface Ad. 2 */ - MACHW_DECLARE(RBV); /* Versatile Interface Ad. 2+ */ - /* NUBUS */ - MACHW_DECLARE(NUBUS); /* NUBUS */ -}; - -extern struct mac_hw_present mac_hw_present; - #endif /* __ASSEMBLY__ */ #endif /* linux/machw.h */ -- cgit v1.2.3 From abdefbdbd5c683ddcb1dd0d3dd414d02f078a5da Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 5 May 2008 12:38:58 -0700 Subject: sparc64: remove online_page() The identical online_page() implementations from all architectures got moved to mm/memory_hotplug.c - except for the sparc64 one that even was dead code due to MEMORY_HOTPLUG not being available there. Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- arch/sparc64/mm/init.c | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'arch') diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 4cad0b32b0af..db8e7fb5a3b9 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -2362,16 +2362,3 @@ void __flush_tlb_all(void) __asm__ __volatile__("wrpr %0, 0, %%pstate" : : "r" (pstate)); } - -#ifdef CONFIG_MEMORY_HOTPLUG - -void online_page(struct page *page) -{ - ClearPageReserved(page); - init_page_count(page); - __free_page(page); - totalram_pages++; - num_physpages++; -} - -#endif /* CONFIG_MEMORY_HOTPLUG */ -- cgit v1.2.3 From a5574cf65b5f03ce9ade3918764fe22e5e2371e3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 5 May 2008 23:19:50 +0200 Subject: sched, x86: add HAVE_UNSTABLE_SCHED_CLOCK add the HAVE_UNSTABLE_SCHED_CLOCK, for architectures to select. the next change utilizes it. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 + init/Kconfig | 6 ++++++ 2 files changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 845ea2b2d487..bbcafaa160c0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -18,6 +18,7 @@ config X86_64 ### Arch settings config X86 def_bool y + select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_IDE select HAVE_OPROFILE select HAVE_KPROBES diff --git a/init/Kconfig b/init/Kconfig index f0e62e5ce0dc..fa42e6b549d3 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -316,6 +316,12 @@ config CPUSETS Say N if unsure. +# +# Architectures with an unreliable sched_clock() should select this: +# +config HAVE_UNSTABLE_SCHED_CLOCK + bool + config GROUP_SCHED bool "Group CPU scheduler" default y -- cgit v1.2.3 From aeed5fce37196e09b4dac3a1c00d8b7122e040ce Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 May 2008 20:49:23 +0100 Subject: x86: fix PAE pmd_bad bootup warning Fix warning from pmd_bad() at bootup on a HIGHMEM64G HIGHPTE x86_32. That came from 9fc34113f6880b215cbea4e7017fc818700384c2 x86: debug pmd_bad(); but we understand now that the typecasting was wrong for PAE in the previous version: pagetable pages above 4GB looked bad and stopped Arjan from booting. And revert that cded932b75ab0a5f9181ee3da34a0a488d1a14fd x86: fix pmd_bad and pud_bad to support huge pages. It was the wrong way round: we shouldn't weaken every pmd_bad and pud_bad check to let huge pages slip through - in part they check that we _don't_ have a huge page where it's not expected. Put the x86 pmd_bad() and pud_bad() definitions back to what they have long been: they can be improved (x86_32 should use PTE_MASK, to stop PAE thinking junk in the upper word is good; and x86_64 should follow x86_32's stricter comparison, to stop thinking any subset of required bits is good); but that should be a later patch. Fix Hans' good observation that follow_page() will never find pmd_huge() because that would have already failed the pmd_bad test: test pmd_huge in between the pmd_none and pmd_bad tests. Tighten x86's pmd_huge() check? No, once it's a hugepage entry, it can get quite far from a good pmd: for example, PROT_NONE leaves it with only ACCESSED of the KERN_PGTABLE bits. However... though follow_page() contains this and another test for huge pages, so it's nice to keep it working on them, where does it actually get called on a huge page? get_user_pages() checks is_vm_hugetlb_page(vma) to to call alternative hugetlb processing, as does unmap_vmas() and others. Signed-off-by: Hugh Dickins Earlier-version-tested-by: Ingo Molnar Cc: Thomas Gleixner Cc: Jeff Chua Cc: Hans Rosenfeld Cc: Arjan van de Ven Signed-off-by: Linus Torvalds --- arch/x86/mm/pgtable_32.c | 7 ------- include/asm-x86/pgtable_32.h | 9 +-------- include/asm-x86/pgtable_64.h | 6 ++---- mm/memory.c | 5 ++++- 4 files changed, 7 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 9ee007be9142..369cf065b6a4 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -172,10 +172,3 @@ void reserve_top_address(unsigned long reserve) __FIXADDR_TOP = -reserve - PAGE_SIZE; __VMALLOC_RESERVE += reserve; } - -int pmd_bad(pmd_t pmd) -{ - WARN_ON_ONCE(pmd_bad_v1(pmd) != pmd_bad_v2(pmd)); - - return pmd_bad_v1(pmd); -} diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h index 577ab79c4c27..d7f0403bbecb 100644 --- a/include/asm-x86/pgtable_32.h +++ b/include/asm-x86/pgtable_32.h @@ -88,14 +88,7 @@ extern unsigned long pg0[]; /* To avoid harmful races, pmd_none(x) should check only the lower when PAE */ #define pmd_none(x) (!(unsigned long)pmd_val((x))) #define pmd_present(x) (pmd_val((x)) & _PAGE_PRESENT) - -extern int pmd_bad(pmd_t pmd); - -#define pmd_bad_v1(x) \ - (_KERNPG_TABLE != (pmd_val((x)) & ~(PAGE_MASK | _PAGE_USER))) -#define pmd_bad_v2(x) \ - (_KERNPG_TABLE != (pmd_val((x)) & ~(PAGE_MASK | _PAGE_USER | \ - _PAGE_PSE | _PAGE_NX))) +#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h index a3bbf8766c1d..efe83dcbd412 100644 --- a/include/asm-x86/pgtable_64.h +++ b/include/asm-x86/pgtable_64.h @@ -158,14 +158,12 @@ static inline unsigned long pgd_bad(pgd_t pgd) static inline unsigned long pud_bad(pud_t pud) { - return pud_val(pud) & - ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER | _PAGE_PSE | _PAGE_NX); + return pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER); } static inline unsigned long pmd_bad(pmd_t pmd) { - return pmd_val(pmd) & - ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER | _PAGE_PSE | _PAGE_NX); + return pmd_val(pmd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER); } #define pte_none(x) (!pte_val((x))) diff --git a/mm/memory.c b/mm/memory.c index bbab1e37055e..48c122d42ed7 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -969,7 +969,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, goto no_page_table; pmd = pmd_offset(pud, address); - if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) + if (pmd_none(*pmd)) goto no_page_table; if (pmd_huge(*pmd)) { @@ -978,6 +978,9 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, goto out; } + if (unlikely(pmd_bad(*pmd))) + goto no_page_table; + ptep = pte_offset_map_lock(mm, pmd, address, &ptl); if (!ptep) goto out; -- cgit v1.2.3 From d45100f7b69e3d9cd0cd5e956b6ac2c78d460d07 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 6 May 2008 15:19:54 -0700 Subject: sparc64: Fix initrd regression. We die because we forget to convert initrd_start and initrd_end to virtual addresses. Reported by Mikael Pettersson Signed-off-by: David S. Miller --- arch/sparc64/mm/init.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index db8e7fb5a3b9..ec3e2c72302a 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -771,6 +771,9 @@ static void __init find_ramdisk(unsigned long phys_base) initrd_end = ramdisk_image + sparc_ramdisk_size; lmb_reserve(initrd_start, initrd_end); + + initrd_start += PAGE_OFFSET; + initrd_end += PAGE_OFFSET; } #endif } -- cgit v1.2.3 From c0a18111e571138747a98af18b3a2124df56a0d1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 6 May 2008 17:09:27 -0700 Subject: Revert "uml: fix gcc problem" This reverts commit 22eecde2f9034764a3fd095eecfa3adfb8ec9a98. Uli reports that it breaks UML on x86-64 with the Fedora 8 gcc (gcc 4.1.2), causing a crash on startup. See http://marc.info/?l=linux-kernel&m=121011722806093&w=2 for a trace. Reported-by: Ulrich Drepper Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/Makefile | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/um/Makefile b/arch/um/Makefile index 01b97c19a8ba..dbeab15e7bb7 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -77,10 +77,7 @@ include $(srctree)/$(ARCH_DIR)/Makefile-os-$(OS) KERNEL_DEFINES = $(strip -Derrno=kernel_errno -Dsigprocmask=kernel_sigprocmask \ -Dmktime=kernel_mktime $(ARCH_KERNEL_DEFINES)) KBUILD_CFLAGS += $(KERNEL_DEFINES) -# Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use -# a lot more stack due to the lack of sharing of stacklots: -KBUILD_CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then \ - echo $(call cc-option,-fno-unit-at-a-time); fi ;) +KBUILD_CFLAGS += $(call cc-option,-fno-unit-at-a-time,) PHONY += linux -- cgit v1.2.3 From 2688905e6a9b3647bf7b452cb0ff2bdb166bd8fe Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 7 May 2008 09:22:52 +0200 Subject: [S390] s390: Optimize user and work TIF check On return from syscall or interrupt, we have to check if we return to userspace (likely) and if there is work todo (less likely) to decide if we handle the work. We can optimize this check: we first check for the less likely work case and then check for userspace. This patch is also a preparation for an additional patch, that fixes a bug in KVM dealing with cpu bound guests. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry.S | 29 ++++++++++++++--------------- arch/s390/kernel/entry64.S | 29 ++++++++++++++--------------- 2 files changed, 28 insertions(+), 30 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index bdbb3bcd78a5..708cf9cf9a35 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -279,8 +279,6 @@ sysc_do_restart: st %r2,SP_R2(%r15) # store return value (change R2 on stack) sysc_return: - tm SP_PSW+1(%r15),0x01 # returning to user ? - bno BASED(sysc_restore) tm __TI_flags+3(%r9),_TIF_WORK_SVC bnz BASED(sysc_work) # there is work to do (signals etc.) sysc_restore: @@ -312,6 +310,8 @@ sysc_work_loop: # One of the work bits is on. Find out which one. # sysc_work: + tm SP_PSW+1(%r15),0x01 # returning to user ? + bno BASED(sysc_restore) tm __TI_flags+3(%r9),_TIF_MCCK_PENDING bo BASED(sysc_mcck_pending) tm __TI_flags+3(%r9),_TIF_NEED_RESCHED @@ -602,12 +602,6 @@ io_no_vtime: la %r2,SP_PTREGS(%r15) # address of register-save area basr %r14,%r1 # branch to standard irq handler io_return: - tm SP_PSW+1(%r15),0x01 # returning to user ? -#ifdef CONFIG_PREEMPT - bno BASED(io_preempt) # no -> check for preemptive scheduling -#else - bno BASED(io_restore) # no-> skip resched & signal -#endif tm __TI_flags+3(%r9),_TIF_WORK_INT bnz BASED(io_work) # there is work to do (signals etc.) io_restore: @@ -629,10 +623,18 @@ io_restore_trace_psw: .long 0, io_restore_trace + 0x80000000 #endif -#ifdef CONFIG_PREEMPT -io_preempt: +# +# switch to kernel stack, then check the TIF bits +# +io_work: + tm SP_PSW+1(%r15),0x01 # returning to user ? +#ifndef CONFIG_PREEMPT + bno BASED(io_restore) # no-> skip resched & signal +#else + bnz BASED(io_work_user) # no -> check for preemptive scheduling + # check for preemptive scheduling icm %r0,15,__TI_precount(%r9) - bnz BASED(io_restore) + bnz BASED(io_restore) # preemption disabled l %r1,SP_R15(%r15) s %r1,BASED(.Lc_spsize) mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) @@ -646,10 +648,7 @@ io_resume_loop: br %r1 # call schedule #endif -# -# switch to kernel stack, then check the TIF bits -# -io_work: +io_work_user: l %r1,__LC_KERNEL_STACK s %r1,BASED(.Lc_spsize) mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 5a4a7bcd2bba..a57909d63149 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -271,8 +271,6 @@ sysc_noemu: stg %r2,SP_R2(%r15) # store return value (change R2 on stack) sysc_return: - tm SP_PSW+1(%r15),0x01 # returning to user ? - jno sysc_restore tm __TI_flags+7(%r9),_TIF_WORK_SVC jnz sysc_work # there is work to do (signals etc.) sysc_restore: @@ -304,6 +302,8 @@ sysc_work_loop: # One of the work bits is on. Find out which one. # sysc_work: + tm SP_PSW+1(%r15),0x01 # returning to user ? + jno sysc_restore tm __TI_flags+7(%r9),_TIF_MCCK_PENDING jo sysc_mcck_pending tm __TI_flags+7(%r9),_TIF_NEED_RESCHED @@ -585,12 +585,6 @@ io_no_vtime: la %r2,SP_PTREGS(%r15) # address of register-save area brasl %r14,do_IRQ # call standard irq handler io_return: - tm SP_PSW+1(%r15),0x01 # returning to user ? -#ifdef CONFIG_PREEMPT - jno io_preempt # no -> check for preemptive scheduling -#else - jno io_restore # no-> skip resched & signal -#endif tm __TI_flags+7(%r9),_TIF_WORK_INT jnz io_work # there is work to do (signals etc.) io_restore: @@ -612,10 +606,18 @@ io_restore_trace_psw: .quad 0, io_restore_trace #endif -#ifdef CONFIG_PREEMPT -io_preempt: +# +# switch to kernel stack, then check TIF bits +# +io_work: + tm SP_PSW+1(%r15),0x01 # returning to user ? +#ifndef CONFIG_PREEMPT + jno io_restore # no-> skip resched & signal +#else + jnz io_work_user # yes -> do resched & signal + # check for preemptive scheduling icm %r0,15,__TI_precount(%r9) - jnz io_restore + jnz io_restore # preemption is disabled # switch to kernel stack lg %r1,SP_R15(%r15) aghi %r1,-SP_SIZE @@ -629,10 +631,7 @@ io_resume_loop: jg preempt_schedule_irq #endif -# -# switch to kernel stack, then check TIF bits -# -io_work: +io_work_user: lg %r1,__LC_KERNEL_STACK aghi %r1,-SP_SIZE mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) -- cgit v1.2.3 From 0eaeafa10f3b2bd027e95859a6785d4c7fcc174c Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 7 May 2008 09:22:53 +0200 Subject: [S390] s390-kvm: leave sie context on work. Removes preemption requirement From: Martin Schwidefsky This patch fixes a bug with cpu bound guest on kvm-s390. Sometimes it was impossible to deliver a signal to a spinning guest. We used preemption as a circumvention. The preemption notifiers called vcpu_load, which checked for pending signals and triggered a host intercept. But even with preemption, a sigkill was not delivered immediately. This patch changes the low level host interrupt handler to check for the SIE instruction, if TIF_WORK is set. In that case we change the instruction pointer of the return PSW to rerun the vcpu_run loop. The kvm code sees an intercept reason 0 if that happens. This patch adds accounting for these types of intercept as well. The advantages: - works with and without preemption - signals are delivered immediately - much better host latencies without preemption Acked-by: Carsten Otte Signed-off-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry64.S | 30 +++++++++++++++++++++++++++++- arch/s390/kvm/Kconfig | 1 - arch/s390/kvm/intercept.c | 3 +++ arch/s390/kvm/kvm-s390.c | 5 +---- include/asm-s390/kvm_host.h | 1 + 5 files changed, 34 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index a57909d63149..fee10177dbfc 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -607,14 +607,37 @@ io_restore_trace_psw: #endif # -# switch to kernel stack, then check TIF bits +# There is work todo, we need to check if we return to userspace, then +# check, if we are in SIE, if yes leave it # io_work: tm SP_PSW+1(%r15),0x01 # returning to user ? #ifndef CONFIG_PREEMPT +#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) + jnz io_work_user # yes -> no need to check for SIE + la %r1, BASED(sie_opcode) # we return to kernel here + lg %r2, SP_PSW+8(%r15) + clc 0(2,%r1), 0(%r2) # is current instruction = SIE? + jne io_restore # no-> return to kernel + lg %r1, SP_PSW+8(%r15) # yes-> add 4 bytes to leave SIE + aghi %r1, 4 + stg %r1, SP_PSW+8(%r15) + j io_restore # return to kernel +#else jno io_restore # no-> skip resched & signal +#endif #else jnz io_work_user # yes -> do resched & signal +#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) + la %r1, BASED(sie_opcode) + lg %r2, SP_PSW+8(%r15) + clc 0(2,%r1), 0(%r2) # is current instruction = SIE? + jne 0f # no -> leave PSW alone + lg %r1, SP_PSW+8(%r15) # yes-> add 4 bytes to leave SIE + aghi %r1, 4 + stg %r1, SP_PSW+8(%r15) +0: +#endif # check for preemptive scheduling icm %r0,15,__TI_precount(%r9) jnz io_restore # preemption is disabled @@ -652,6 +675,11 @@ io_work_loop: j io_restore io_work_done: +#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) +sie_opcode: + .long 0xb2140000 +#endif + # # _TIF_MCCK_PENDING is set, call handler # diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index 1761b74d639b..e051cad1f1e0 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -22,7 +22,6 @@ config KVM select PREEMPT_NOTIFIERS select ANON_INODES select S390_SWITCH_AMODE - select PREEMPT ---help--- Support hosting paravirtualized guest machines using the SIE virtualization capability on the mainframe. This should work diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 349581a26103..47a0b642174c 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -105,6 +105,9 @@ static intercept_handler_t instruction_handlers[256] = { static int handle_noop(struct kvm_vcpu *vcpu) { switch (vcpu->arch.sie_block->icptcode) { + case 0x0: + vcpu->stat.exit_null++; + break; case 0x10: vcpu->stat.exit_external_request++; break; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 98d1e73e01f1..0ac36a649eba 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -31,6 +31,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "userspace_handled", VCPU_STAT(exit_userspace) }, + { "exit_null", VCPU_STAT(exit_null) }, { "exit_validity", VCPU_STAT(exit_validity) }, { "exit_stop_request", VCPU_STAT(exit_stop_request) }, { "exit_external_request", VCPU_STAT(exit_external_request) }, @@ -221,10 +222,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK; restore_fp_regs(&vcpu->arch.guest_fpregs); restore_access_regs(vcpu->arch.guest_acrs); - - if (signal_pending(current)) - atomic_set_mask(CPUSTAT_STOP_INT, - &vcpu->arch.sie_block->cpuflags); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) diff --git a/include/asm-s390/kvm_host.h b/include/asm-s390/kvm_host.h index f8204a4f2e02..18cbd8a39796 100644 --- a/include/asm-s390/kvm_host.h +++ b/include/asm-s390/kvm_host.h @@ -104,6 +104,7 @@ struct sie_block { struct kvm_vcpu_stat { u32 exit_userspace; + u32 exit_null; u32 exit_external_request; u32 exit_external_interrupt; u32 exit_stop_request; -- cgit v1.2.3 From b499d76bfd78e900039155247e1c21bfdf807b7b Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 7 May 2008 09:22:57 +0200 Subject: [S390] compat ptrace cleanup This removes redundant arch code for generic ptrace requests already handled by ptrace_request and compat_ptrace_request. It simplifies things to just have the standard entry points, and use the generic compat_sys_ptrace. Signed-off-by: Roland McGrath Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/compat_wrapper.S | 2 +- arch/s390/kernel/ptrace.c | 100 +++----------------------------------- include/asm-s390/ptrace.h | 2 + 3 files changed, 9 insertions(+), 95 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 743d54f0b8db..d003a6e16afb 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -121,7 +121,7 @@ sys32_ptrace_wrapper: lgfr %r3,%r3 # long llgtr %r4,%r4 # long llgfr %r5,%r5 # long - jg sys_ptrace # branch to system call + jg compat_sys_ptrace # branch to system call .globl sys32_alarm_wrapper sys32_alarm_wrapper: diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 7f4270163744..35827b9bd4d1 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -292,8 +292,7 @@ poke_user(struct task_struct *child, addr_t addr, addr_t data) return 0; } -static int -do_ptrace_normal(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, long addr, long data) { ptrace_area parea; int copied, ret; @@ -529,35 +528,19 @@ poke_user_emu31(struct task_struct *child, addr_t addr, addr_t data) return 0; } -static int -do_ptrace_emu31(struct task_struct *child, long request, long addr, long data) +long compat_arch_ptrace(struct task_struct *child, compat_long_t request, + compat_ulong_t caddr, compat_ulong_t cdata) { - unsigned int tmp; /* 4 bytes !! */ + unsigned long addr = caddr; + unsigned long data = cdata; ptrace_area_emu31 parea; int copied, ret; switch (request) { - case PTRACE_PEEKTEXT: - case PTRACE_PEEKDATA: - /* read word at location addr. */ - copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); - if (copied != sizeof(tmp)) - return -EIO; - return put_user(tmp, (unsigned int __force __user *) data); - case PTRACE_PEEKUSR: /* read the word at location addr in the USER area. */ return peek_user_emu31(child, addr, data); - case PTRACE_POKETEXT: - case PTRACE_POKEDATA: - /* write the word at location addr. */ - tmp = data; - copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 1); - if (copied != sizeof(tmp)) - return -EIO; - return 0; - case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ return poke_user_emu31(child, addr, data); @@ -587,82 +570,11 @@ do_ptrace_emu31(struct task_struct *child, long request, long addr, long data) copied += sizeof(unsigned int); } return 0; - case PTRACE_GETEVENTMSG: - return put_user((__u32) child->ptrace_message, - (unsigned int __force __user *) data); - case PTRACE_GETSIGINFO: - if (child->last_siginfo == NULL) - return -EINVAL; - return copy_siginfo_to_user32((compat_siginfo_t - __force __user *) data, - child->last_siginfo); - case PTRACE_SETSIGINFO: - if (child->last_siginfo == NULL) - return -EINVAL; - return copy_siginfo_from_user32(child->last_siginfo, - (compat_siginfo_t - __force __user *) data); } - return ptrace_request(child, request, addr, data); + return compat_ptrace_request(child, request, addr, data); } #endif -long arch_ptrace(struct task_struct *child, long request, long addr, long data) -{ - switch (request) { - case PTRACE_SYSCALL: - /* continue and stop at next (return from) syscall */ - case PTRACE_CONT: - /* restart after signal. */ - if (!valid_signal(data)) - return -EIO; - if (request == PTRACE_SYSCALL) - set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - else - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - child->exit_code = data; - /* make sure the single step bit is not set. */ - user_disable_single_step(child); - wake_up_process(child); - return 0; - - case PTRACE_KILL: - /* - * make the child exit. Best I can do is send it a sigkill. - * perhaps it should be put in the status that it wants to - * exit. - */ - if (child->exit_state == EXIT_ZOMBIE) /* already dead */ - return 0; - child->exit_code = SIGKILL; - /* make sure the single step bit is not set. */ - user_disable_single_step(child); - wake_up_process(child); - return 0; - - case PTRACE_SINGLESTEP: - /* set the trap flag. */ - if (!valid_signal(data)) - return -EIO; - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - child->exit_code = data; - user_enable_single_step(child); - /* give it a chance to run. */ - wake_up_process(child); - return 0; - - /* Do requests that differ for 31/64 bit */ - default: -#ifdef CONFIG_COMPAT - if (test_thread_flag(TIF_31BIT)) - return do_ptrace_emu31(child, request, addr, data); -#endif - return do_ptrace_normal(child, request, addr, data); - } - /* Not reached. */ - return -EIO; -} - asmlinkage void syscall_trace(struct pt_regs *regs, int entryexit) { diff --git a/include/asm-s390/ptrace.h b/include/asm-s390/ptrace.h index 441d7c260857..d7d4e2eb3e6f 100644 --- a/include/asm-s390/ptrace.h +++ b/include/asm-s390/ptrace.h @@ -471,6 +471,8 @@ struct task_struct; extern void user_enable_single_step(struct task_struct *); extern void user_disable_single_step(struct task_struct *); +#define __ARCH_WANT_COMPAT_SYS_PTRACE + #define user_mode(regs) (((regs)->psw.mask & PSW_MASK_PSTATE) != 0) #define instruction_pointer(regs) ((regs)->psw.addr & PSW_ADDR_INSN) #define regs_return_value(regs)((regs)->gprs[2]) -- cgit v1.2.3 From 45e576b1c3d0020607b8666c0247164e92c7d719 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 7 May 2008 09:22:59 +0200 Subject: [S390] guest page hinting light Use the existing arch_alloc_page/arch_free_page callbacks to do the guest page state transitions between stable and unused. Acked-by: Rik van Riel Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 7 ++++ arch/s390/mm/Makefile | 1 + arch/s390/mm/init.c | 3 ++ arch/s390/mm/page-states.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++ include/asm-s390/page.h | 11 +++++++ include/asm-s390/system.h | 6 ++++ 6 files changed, 107 insertions(+) create mode 100644 arch/s390/mm/page-states.c (limited to 'arch') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 29a7940f284f..1d035082e78e 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -430,6 +430,13 @@ config CMM_IUCV Select this option to enable the special message interface to the cooperative memory management. +config PAGE_STATES + bool "Unused page notification" + help + This enables the notification of unused pages to the + hypervisor. The ESSA instruction is used to do the states + changes between a page that has content and the unused state. + config VIRT_TIMER bool "Virtual CPU timer support" help diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index fb988a48a754..2a7458134544 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -5,3 +5,4 @@ obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o +obj-$(CONFIG_PAGE_STATES) += page-states.o diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index fa31de6ae97a..29f3a63806b9 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -126,6 +126,9 @@ void __init mem_init(void) /* clear the zero-page */ memset(empty_zero_page, 0, PAGE_SIZE); + /* Setup guest page hinting */ + cmma_init(); + /* this will put all low memory onto the freelists */ totalram_pages += free_all_bootmem(); diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c new file mode 100644 index 000000000000..fc0ad73ffd90 --- /dev/null +++ b/arch/s390/mm/page-states.c @@ -0,0 +1,79 @@ +/* + * arch/s390/mm/page-states.c + * + * Copyright IBM Corp. 2008 + * + * Guest page hinting for unused pages. + * + * Author(s): Martin Schwidefsky + */ + +#include +#include +#include +#include +#include + +#define ESSA_SET_STABLE 1 +#define ESSA_SET_UNUSED 2 + +static int cmma_flag; + +static int __init cmma(char *str) +{ + char *parm; + parm = strstrip(str); + if (strcmp(parm, "yes") == 0 || strcmp(parm, "on") == 0) { + cmma_flag = 1; + return 1; + } + cmma_flag = 0; + if (strcmp(parm, "no") == 0 || strcmp(parm, "off") == 0) + return 1; + return 0; +} + +__setup("cmma=", cmma); + +void __init cmma_init(void) +{ + register unsigned long tmp asm("0") = 0; + register int rc asm("1") = -EOPNOTSUPP; + + if (!cmma_flag) + return; + asm volatile( + " .insn rrf,0xb9ab0000,%1,%1,0,0\n" + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b,1b) + : "+&d" (rc), "+&d" (tmp)); + if (rc) + cmma_flag = 0; +} + +void arch_free_page(struct page *page, int order) +{ + int i, rc; + + if (!cmma_flag) + return; + for (i = 0; i < (1 << order); i++) + asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0" + : "=&d" (rc) + : "a" ((page_to_pfn(page) + i) << PAGE_SHIFT), + "i" (ESSA_SET_UNUSED)); +} + +void arch_alloc_page(struct page *page, int order) +{ + int i, rc; + + if (!cmma_flag) + return; + for (i = 0; i < (1 << order); i++) + asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0" + : "=&d" (rc) + : "a" ((page_to_pfn(page) + i) << PAGE_SHIFT), + "i" (ESSA_SET_STABLE)); +} diff --git a/include/asm-s390/page.h b/include/asm-s390/page.h index f0f4579eac13..12fd9c4f0f15 100644 --- a/include/asm-s390/page.h +++ b/include/asm-s390/page.h @@ -125,6 +125,17 @@ page_get_storage_key(unsigned long addr) return skey; } +#ifdef CONFIG_PAGE_STATES + +struct page; +void arch_free_page(struct page *page, int order); +void arch_alloc_page(struct page *page, int order); + +#define HAVE_ARCH_FREE_PAGE +#define HAVE_ARCH_ALLOC_PAGE + +#endif + #endif /* !__ASSEMBLY__ */ /* to align the pointer to the (next) page boundary */ diff --git a/include/asm-s390/system.h b/include/asm-s390/system.h index c819ae25a842..e0d4500d5f95 100644 --- a/include/asm-s390/system.h +++ b/include/asm-s390/system.h @@ -116,6 +116,12 @@ extern void pfault_fini(void); #define pfault_fini() do { } while (0) #endif /* CONFIG_PFAULT */ +#ifdef CONFIG_PAGE_STATES +extern void cmma_init(void); +#else +static inline void cmma_init(void) { } +#endif + #define finish_arch_switch(prev) do { \ set_fs(current->thread.mm_segment); \ account_vtime(prev); \ -- cgit v1.2.3 From 5816339310b2d9623cf413d33e538b45e815da5d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 7 May 2008 02:24:28 -0700 Subject: sparc: Fix mmap VA span checking. We should not conditionalize VA range checks on MAP_FIXED. Signed-off-by: David S. Miller --- arch/sparc/kernel/sys_sparc.c | 3 +-- arch/sparc64/kernel/sys_sparc.c | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/sparc/kernel/sys_sparc.c b/arch/sparc/kernel/sys_sparc.c index f188b5dc9fd0..e995491c4436 100644 --- a/arch/sparc/kernel/sys_sparc.c +++ b/arch/sparc/kernel/sys_sparc.c @@ -223,8 +223,7 @@ int sparc_mmap_check(unsigned long addr, unsigned long len, unsigned long flags) { if (ARCH_SUN4C_SUN4 && (len > 0x20000000 || - ((flags & MAP_FIXED) && - addr < 0xe0000000 && addr + len > 0x20000000))) + (addr < 0xe0000000 && addr + len > 0x20000000))) return -EINVAL; /* See asm-sparc/uaccess.h */ diff --git a/arch/sparc64/kernel/sys_sparc.c b/arch/sparc64/kernel/sys_sparc.c index 8d4761f15fa9..0dbc941f130e 100644 --- a/arch/sparc64/kernel/sys_sparc.c +++ b/arch/sparc64/kernel/sys_sparc.c @@ -549,13 +549,13 @@ int sparc64_mmap_check(unsigned long addr, unsigned long len, if (len >= STACK_TOP32) return -EINVAL; - if ((flags & MAP_FIXED) && addr > STACK_TOP32 - len) + if (addr > STACK_TOP32 - len) return -EINVAL; } else { if (len >= VA_EXCLUDE_START) return -EINVAL; - if ((flags & MAP_FIXED) && invalid_64bit_range(addr, len)) + if (invalid_64bit_range(addr, len)) return -EINVAL; } -- cgit v1.2.3 From e5e1d3cb20034a3cbcfff1f0bae12201aa2ce17e Mon Sep 17 00:00:00 2001 From: Stas Sergeev Date: Wed, 7 May 2008 12:39:56 +0200 Subject: pcspkr: fix dependancies fix pcspkr dependancies: make the pcspkr platform drivers to depend on a platform device, and not the other way around. Signed-off-by: Stas Sergeev Acked-by: Thomas Gleixner Acked-by: Dmitry Torokhov CC: Vojtech Pavlik CC: Michael Opdenacker [fixed for 2.6.26-rc1 by tiwai] Signed-off-by: Takashi Iwai --- arch/x86/kernel/Makefile | 4 +--- drivers/input/misc/Kconfig | 2 +- init/Kconfig | 8 ++++++++ sound/drivers/Kconfig | 4 ++-- 4 files changed, 12 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index bbdacb398d48..5e618c3b4720 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -83,9 +83,7 @@ obj-$(CONFIG_KVM_GUEST) += kvm.o obj-$(CONFIG_KVM_CLOCK) += kvmclock.o obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o -ifdef CONFIG_INPUT_PCSPKR -obj-y += pcspeaker.o -endif +obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o obj-$(CONFIG_SCx200) += scx200.o scx200-y += scx200_32.o diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig index 92b683411d5a..3ad8bd9f7543 100644 --- a/drivers/input/misc/Kconfig +++ b/drivers/input/misc/Kconfig @@ -14,7 +14,7 @@ if INPUT_MISC config INPUT_PCSPKR tristate "PC Speaker support" - depends on ALPHA || X86 || MIPS || PPC_PREP || PPC_CHRP || PPC_PSERIES + depends on PCSPKR_PLATFORM depends on SND_PCSP=n help Say Y here if you want the standard PC Speaker to be used for diff --git a/init/Kconfig b/init/Kconfig index 4c33316743f5..3b5adbf228c7 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -634,6 +634,14 @@ config ELF_CORE help Enable support for generating core dumps. Disabling saves about 4k. +config PCSPKR_PLATFORM + bool "Enable PC-Speaker support" if EMBEDDED + depends on ALPHA || X86 || MIPS || PPC_PREP || PPC_CHRP || PPC_PSERIES + default y + help + This option allows to disable the internal PC-Speaker + support, saving some memory. + config COMPAT_BRK bool "Disable heap randomization" default y diff --git a/sound/drivers/Kconfig b/sound/drivers/Kconfig index a78a8d045175..379bcb074463 100644 --- a/sound/drivers/Kconfig +++ b/sound/drivers/Kconfig @@ -5,8 +5,8 @@ menu "Generic devices" config SND_PCSP - tristate "Internal PC speaker support" - depends on X86_PC && HIGH_RES_TIMERS + tristate "PC-Speaker support" + depends on PCSPKR_PLATFORM && X86_PC && HIGH_RES_TIMERS depends on INPUT depends on SND select SND_PCM -- cgit v1.2.3 From 1e38c126c9252b612697e34f43b1b3371c8ee31d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 7 May 2008 16:21:28 -0700 Subject: sparc: Fix fork/clone/vfork system call restart. We clobber %i1 as well as %i0 for these system calls, because they give two return values. Therefore, on error, we have to restore %i1 properly or else the restart explodes since it uses the wrong arguments. This fixes glibc's nptl/tst-eintr1.c testcase. Signed-off-by: David S. Miller --- arch/sparc/kernel/process.c | 20 ++++++++++++++++---- arch/sparc64/kernel/process.c | 18 +++++++++++++++--- 2 files changed, 31 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/sparc/kernel/process.c b/arch/sparc/kernel/process.c index e7f35198ae34..36431f377dee 100644 --- a/arch/sparc/kernel/process.c +++ b/arch/sparc/kernel/process.c @@ -419,14 +419,26 @@ asmlinkage int sparc_do_fork(unsigned long clone_flags, unsigned long stack_size) { unsigned long parent_tid_ptr, child_tid_ptr; + unsigned long orig_i1 = regs->u_regs[UREG_I1]; + long ret; parent_tid_ptr = regs->u_regs[UREG_I2]; child_tid_ptr = regs->u_regs[UREG_I4]; - return do_fork(clone_flags, stack_start, - regs, stack_size, - (int __user *) parent_tid_ptr, - (int __user *) child_tid_ptr); + ret = do_fork(clone_flags, stack_start, + regs, stack_size, + (int __user *) parent_tid_ptr, + (int __user *) child_tid_ptr); + + /* If we get an error and potentially restart the system + * call, we're screwed because copy_thread() clobbered + * the parent's %o1. So detect that case and restore it + * here. + */ + if ((unsigned long)ret >= -ERESTART_RESTARTBLOCK) + regs->u_regs[UREG_I1] = orig_i1; + + return ret; } /* Copy a Sparc thread. The fork() return value conventions diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c index 500ac6d483a0..4129c0449856 100644 --- a/arch/sparc64/kernel/process.c +++ b/arch/sparc64/kernel/process.c @@ -503,6 +503,8 @@ asmlinkage long sparc_do_fork(unsigned long clone_flags, unsigned long stack_size) { int __user *parent_tid_ptr, *child_tid_ptr; + unsigned long orig_i1 = regs->u_regs[UREG_I1]; + long ret; #ifdef CONFIG_COMPAT if (test_thread_flag(TIF_32BIT)) { @@ -515,9 +517,19 @@ asmlinkage long sparc_do_fork(unsigned long clone_flags, child_tid_ptr = (int __user *) regs->u_regs[UREG_I4]; } - return do_fork(clone_flags, stack_start, - regs, stack_size, - parent_tid_ptr, child_tid_ptr); + ret = do_fork(clone_flags, stack_start, + regs, stack_size, + parent_tid_ptr, child_tid_ptr); + + /* If we get an error and potentially restart the system + * call, we're screwed because copy_thread() clobbered + * the parent's %o1. So detect that case and restore it + * here. + */ + if ((unsigned long)ret >= -ERESTART_RESTARTBLOCK) + regs->u_regs[UREG_I1] = orig_i1; + + return ret; } /* Copy a Sparc thread. The fork() return value conventions -- cgit v1.2.3 From dc5dc7e6d71ca9fd1ea01a1418150af3b2937489 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 7 May 2008 18:54:05 -0700 Subject: sparc: Fix SA_ONSTACK signal handling. We need to be more liberal about the alignment of the buffer given to us by sigaltstack(). The user should not need to be mindful of all of the alignment constraints we have for the stack frame. This mirrors how we handle this situation in clone() as well. Also, we align the stack even in non-SA_ONSTACK cases so that signals due to bad stack alignment can be delivered properly. This makes such errors easier to debug and recover from. Finally, add the sanity check x86 has to make sure we won't overflow the signal stack. This fixes glibc testcases nptl/tst-cancel20.c and nptl/tst-cancelx20.c Signed-off-by: David S. Miller --- arch/sparc/kernel/signal.c | 20 +++++++++++++++++--- arch/sparc64/kernel/signal.c | 21 +++++++++++++++++---- arch/sparc64/kernel/signal32.c | 18 +++++++++++++++++- 3 files changed, 51 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/sparc/kernel/signal.c b/arch/sparc/kernel/signal.c index 3c312290c3c2..368157926d24 100644 --- a/arch/sparc/kernel/signal.c +++ b/arch/sparc/kernel/signal.c @@ -245,15 +245,29 @@ static inline int invalid_frame_pointer(void __user *fp, int fplen) static inline void __user *get_sigframe(struct sigaction *sa, struct pt_regs *regs, unsigned long framesize) { - unsigned long sp; + unsigned long sp = regs->u_regs[UREG_FP]; - sp = regs->u_regs[UREG_FP]; + /* + * If we are on the alternate signal stack and would overflow it, don't. + * Return an always-bogus address instead so we will die with SIGSEGV. + */ + if (on_sig_stack(sp) && !likely(on_sig_stack(sp - framesize))) + return (void __user *) -1L; /* This is the X/Open sanctioned signal stack switching. */ if (sa->sa_flags & SA_ONSTACK) { - if (!on_sig_stack(sp) && !((current->sas_ss_sp + current->sas_ss_size) & 7)) + if (sas_ss_flags(sp) == 0) sp = current->sas_ss_sp + current->sas_ss_size; } + + /* Always align the stack frame. This handles two cases. First, + * sigaltstack need not be mindful of platform specific stack + * alignment. Second, if we took this signal because the stack + * is not aligned properly, we'd like to take the signal cleanly + * and report that. + */ + sp &= ~7UL; + return (void __user *)(sp - framesize); } diff --git a/arch/sparc64/kernel/signal.c b/arch/sparc64/kernel/signal.c index 45d6bf632daa..07c0443ea3f5 100644 --- a/arch/sparc64/kernel/signal.c +++ b/arch/sparc64/kernel/signal.c @@ -376,16 +376,29 @@ save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) static inline void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, unsigned long framesize) { - unsigned long sp; + unsigned long sp = regs->u_regs[UREG_FP] + STACK_BIAS; - sp = regs->u_regs[UREG_FP] + STACK_BIAS; + /* + * If we are on the alternate signal stack and would overflow it, don't. + * Return an always-bogus address instead so we will die with SIGSEGV. + */ + if (on_sig_stack(sp) && !likely(on_sig_stack(sp - framesize))) + return (void __user *) -1L; /* This is the X/Open sanctioned signal stack switching. */ if (ka->sa.sa_flags & SA_ONSTACK) { - if (!on_sig_stack(sp) && - !((current->sas_ss_sp + current->sas_ss_size) & 7)) + if (sas_ss_flags(sp) == 0) sp = current->sas_ss_sp + current->sas_ss_size; } + + /* Always align the stack frame. This handles two cases. First, + * sigaltstack need not be mindful of platform specific stack + * alignment. Second, if we took this signal because the stack + * is not aligned properly, we'd like to take the signal cleanly + * and report that. + */ + sp &= ~7UL; + return (void __user *)(sp - framesize); } diff --git a/arch/sparc64/kernel/signal32.c b/arch/sparc64/kernel/signal32.c index 9415d2c918c5..0f6b7b156efd 100644 --- a/arch/sparc64/kernel/signal32.c +++ b/arch/sparc64/kernel/signal32.c @@ -406,11 +406,27 @@ static void __user *get_sigframe(struct sigaction *sa, struct pt_regs *regs, uns regs->u_regs[UREG_FP] &= 0x00000000ffffffffUL; sp = regs->u_regs[UREG_FP]; + /* + * If we are on the alternate signal stack and would overflow it, don't. + * Return an always-bogus address instead so we will die with SIGSEGV. + */ + if (on_sig_stack(sp) && !likely(on_sig_stack(sp - framesize))) + return (void __user *) -1L; + /* This is the X/Open sanctioned signal stack switching. */ if (sa->sa_flags & SA_ONSTACK) { - if (!on_sig_stack(sp) && !((current->sas_ss_sp + current->sas_ss_size) & 7)) + if (sas_ss_flags(sp) == 0) sp = current->sas_ss_sp + current->sas_ss_size; } + + /* Always align the stack frame. This handles two cases. First, + * sigaltstack need not be mindful of platform specific stack + * alignment. Second, if we took this signal because the stack + * is not aligned properly, we'd like to take the signal cleanly + * and report that. + */ + sp &= ~7UL; + return (void __user *)(sp - framesize); } -- cgit v1.2.3 From 3168cb98be7199325de633052680098660ccaf84 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 6 May 2008 20:42:33 -0700 Subject: uml: fix inconsistence due to tty_operation change 'put_char' of 'struct tty_operations' has changed from 'void' into 'int'. This can also shut up compiler warnings. Cc: Jeff Dike Signed-off-by: WANG Cong Acked-by: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/drivers/line.c | 4 ++-- arch/um/include/line.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index 10b86e1cc659..5047490fc299 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c @@ -191,9 +191,9 @@ void line_flush_chars(struct tty_struct *tty) line_flush_buffer(tty); } -void line_put_char(struct tty_struct *tty, unsigned char ch) +int line_put_char(struct tty_struct *tty, unsigned char ch) { - line_write(tty, &ch, sizeof(ch)); + return line_write(tty, &ch, sizeof(ch)); } int line_write(struct tty_struct *tty, const unsigned char *buf, int len) diff --git a/arch/um/include/line.h b/arch/um/include/line.h index 1223f2c844b4..979b73e6352d 100644 --- a/arch/um/include/line.h +++ b/arch/um/include/line.h @@ -71,7 +71,7 @@ extern int line_setup(struct line *lines, unsigned int sizeof_lines, char *init, char **error_out); extern int line_write(struct tty_struct *tty, const unsigned char *buf, int len); -extern void line_put_char(struct tty_struct *tty, unsigned char ch); +extern int line_put_char(struct tty_struct *tty, unsigned char ch); extern void line_set_termios(struct tty_struct *tty, struct ktermios * old); extern int line_chars_in_buffer(struct tty_struct *tty); extern void line_flush_buffer(struct tty_struct *tty); -- cgit v1.2.3 From ba719baeabbff5476eeb91c223e6921ba29e1490 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Tue, 6 May 2008 20:42:38 -0700 Subject: sys_pipe(): fix file descriptor leaks Remember to close the files if copy_to_user() failed. Spotted by dm.n9107@gmail.com. Signed-off-by: Ulrich Drepper Cc: DM Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/cris/kernel/sys_cris.c | 5 ++++- arch/m32r/kernel/sys_m32r.c | 5 ++++- fs/pipe.c | 6 +++++- 3 files changed, 13 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/cris/kernel/sys_cris.c b/arch/cris/kernel/sys_cris.c index 8b9984197edc..d124066e1728 100644 --- a/arch/cris/kernel/sys_cris.c +++ b/arch/cris/kernel/sys_cris.c @@ -40,8 +40,11 @@ asmlinkage int sys_pipe(unsigned long __user * fildes) error = do_pipe(fd); unlock_kernel(); if (!error) { - if (copy_to_user(fildes, fd, 2*sizeof(int))) + if (copy_to_user(fildes, fd, 2*sizeof(int))) { + sys_close(fd[0]); + sys_close(fd[1]); error = -EFAULT; + } } return error; } diff --git a/arch/m32r/kernel/sys_m32r.c b/arch/m32r/kernel/sys_m32r.c index 6d7a80fdad48..319c79720b8a 100644 --- a/arch/m32r/kernel/sys_m32r.c +++ b/arch/m32r/kernel/sys_m32r.c @@ -90,8 +90,11 @@ sys_pipe(unsigned long r0, unsigned long r1, unsigned long r2, error = do_pipe(fd); if (!error) { - if (copy_to_user((void __user *)r0, fd, 2*sizeof(int))) + if (copy_to_user((void __user *)r0, fd, 2*sizeof(int))) { + sys_close(fd[0]); + sys_close(fd[1]); error = -EFAULT; + } } return error; } diff --git a/fs/pipe.c b/fs/pipe.c index 3499f9ff6316..ec228bc9f882 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -1086,8 +1087,11 @@ asmlinkage long __weak sys_pipe(int __user *fildes) error = do_pipe(fd); if (!error) { - if (copy_to_user(fildes, fd, sizeof(fd))) + if (copy_to_user(fildes, fd, sizeof(fd))) { + sys_close(fd[0]); + sys_close(fd[1]); error = -EFAULT; + } } return error; } -- cgit v1.2.3 From 8d539108560ec121d59eee05160236488266221c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 8 May 2008 18:41:48 -0700 Subject: Revert "PCI: remove default PCI expansion ROM memory allocation" This reverts commit 9f8daccaa05c14e5643bdd4faf5aed9cc8e6f11e, which was reported to break X startup (xf86-video-ati-6.8.0). See http://bugs.freedesktop.org/show_bug.cgi?id=15523 for details. Reported-by: Laurence Withers Cc: Gary Hade Cc: Greg KH Cc: Jan Beulich Cc: "Jun'ichi Nomura" Cc: Andrew Morton Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Linus Torvalds --- arch/x86/pci/common.c | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'arch') diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index bfa72a9475b3..8545c8a9d107 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -77,19 +77,6 @@ int pcibios_scanned; */ DEFINE_SPINLOCK(pci_config_lock); -static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev) -{ - struct resource *rom_r = &dev->resource[PCI_ROM_RESOURCE]; - - if (rom_r->parent) - return; - if (rom_r->start) - /* we deal with BIOS assigned ROM later */ - return; - if (!(pci_probe & PCI_ASSIGN_ROMS)) - rom_r->start = rom_r->end = rom_r->flags = 0; -} - static int __devinit can_skip_ioresource_align(const struct dmi_system_id *d) { pci_probe |= PCI_CAN_SKIP_ISA_ALIGN; @@ -141,11 +128,7 @@ void __init dmi_check_skip_isa_align(void) void __devinit pcibios_fixup_bus(struct pci_bus *b) { - struct pci_dev *dev; - pci_read_bridge_bases(b); - list_for_each_entry(dev, &b->devices, bus_list) - pcibios_fixup_device_resources(dev); } /* -- cgit v1.2.3 From d236f5a5f77183c270223e8816804e7763463282 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Sat, 26 Apr 2008 14:48:11 -0400 Subject: [ARM] Orion: use mv643xx_eth driver mbus window handling Make the Orion 5x platform code use the mbus window handling code that's in the mv643xx_eth driver, instead of programming the GigE block's mbus window registers by hand. Signed-off-by: Lennert Buytenhek Reviewed-by: Tzachi Perelstein Acked-by: Russell King Signed-off-by: Nicolas Pitre --- arch/arm/mach-orion5x/addr-map.c | 47 ---------------------------------------- arch/arm/mach-orion5x/common.c | 8 ++++++- arch/arm/mach-orion5x/common.h | 1 - 3 files changed, 7 insertions(+), 49 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-orion5x/addr-map.c b/arch/arm/mach-orion5x/addr-map.c index 9608503d67f5..186f51ce7c11 100644 --- a/arch/arm/mach-orion5x/addr-map.c +++ b/arch/arm/mach-orion5x/addr-map.c @@ -81,17 +81,6 @@ #define CPU_WIN_REMAP_LO(n) ORION5X_BRIDGE_REG(0x008 | ((n) << 4)) #define CPU_WIN_REMAP_HI(n) ORION5X_BRIDGE_REG(0x00c | ((n) << 4)) -/* - * Gigabit Ethernet Address Decode Windows registers - */ -#define ETH_WIN_BASE(win) ORION5X_ETH_REG(0x200 + ((win) * 8)) -#define ETH_WIN_SIZE(win) ORION5X_ETH_REG(0x204 + ((win) * 8)) -#define ETH_WIN_REMAP(win) ORION5X_ETH_REG(0x280 + ((win) * 4)) -#define ETH_WIN_EN ORION5X_ETH_REG(0x290) -#define ETH_WIN_PROT ORION5X_ETH_REG(0x294) -#define ETH_MAX_WIN 6 -#define ETH_MAX_REMAP_WIN 4 - struct mbus_dram_target_info orion5x_mbus_dram_info; @@ -202,39 +191,3 @@ void __init orion5x_setup_pcie_wa_win(u32 base, u32 size) { setup_cpu_win(7, base, size, TARGET_PCIE, ATTR_PCIE_WA, -1); } - -void __init orion5x_setup_eth_wins(void) -{ - int i; - - /* - * First, disable and clear windows - */ - for (i = 0; i < ETH_MAX_WIN; i++) { - orion5x_write(ETH_WIN_BASE(i), 0); - orion5x_write(ETH_WIN_SIZE(i), 0); - orion5x_setbits(ETH_WIN_EN, 1 << i); - orion5x_clrbits(ETH_WIN_PROT, 0x3 << (i * 2)); - if (i < ETH_MAX_REMAP_WIN) - orion5x_write(ETH_WIN_REMAP(i), 0); - } - - /* - * Setup windows for DDR banks. - */ - for (i = 0; i < DDR_MAX_CS; i++) { - u32 base, size; - size = orion5x_read(DDR_SIZE_CS(i)); - base = orion5x_read(DDR_BASE_CS(i)); - if (size & DDR_BANK_EN) { - base = DDR_REG_TO_BASE(base); - size = DDR_REG_TO_SIZE(size); - orion5x_write(ETH_WIN_SIZE(i), (size-1) & 0xffff0000); - orion5x_write(ETH_WIN_BASE(i), (base & 0xffff0000) | - (ATTR_DDR_CS(i) << 8) | - TARGET_DDR); - orion5x_clrbits(ETH_WIN_EN, 1 << i); - orion5x_setbits(ETH_WIN_PROT, 0x3 << (i * 2)); - } - } -} diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c index 0ecff5a61972..da6e5deab073 100644 --- a/arch/arm/mach-orion5x/common.c +++ b/arch/arm/mach-orion5x/common.c @@ -190,6 +190,10 @@ static struct platform_device orion5x_ehci1 = { * (The Orion and Discovery (MV643xx) families use the same Ethernet driver) ****************************************************************************/ +struct mv643xx_eth_shared_platform_data orion5x_eth_shared_data = { + .dram = &orion5x_mbus_dram_info, +}; + static struct resource orion5x_eth_shared_resources[] = { { .start = ORION5X_ETH_PHYS_BASE + 0x2000, @@ -201,6 +205,9 @@ static struct resource orion5x_eth_shared_resources[] = { static struct platform_device orion5x_eth_shared = { .name = MV643XX_ETH_SHARED_NAME, .id = 0, + .dev = { + .platform_data = &orion5x_eth_shared_data, + }, .num_resources = 1, .resource = orion5x_eth_shared_resources, }; @@ -362,7 +369,6 @@ void __init orion5x_init(void) * Setup Orion address map */ orion5x_setup_cpu_mbus_bridge(); - orion5x_setup_eth_wins(); /* * Register devices. diff --git a/arch/arm/mach-orion5x/common.h b/arch/arm/mach-orion5x/common.h index 14adf8d1a54a..bd0f05de6e18 100644 --- a/arch/arm/mach-orion5x/common.h +++ b/arch/arm/mach-orion5x/common.h @@ -22,7 +22,6 @@ void orion5x_setup_dev0_win(u32 base, u32 size); void orion5x_setup_dev1_win(u32 base, u32 size); void orion5x_setup_dev2_win(u32 base, u32 size); void orion5x_setup_pcie_wa_win(u32 base, u32 size); -void orion5x_setup_eth_wins(void); /* * Shared code used internally by other Orion core functions. -- cgit v1.2.3 From b8c15a6084e84497e31e75c9cededb73af768632 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Sat, 26 Apr 2008 14:48:11 -0400 Subject: [ARM] Orion: pass proper t_clk into mv643xx_eth Pass the Orion TCLK tick rate into the ethernet driver. Signed-off-by: Lennert Buytenhek Signed-off-by: Nicolas Pitre --- arch/arm/mach-orion5x/common.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c index da6e5deab073..4f13fd037f04 100644 --- a/arch/arm/mach-orion5x/common.c +++ b/arch/arm/mach-orion5x/common.c @@ -192,6 +192,7 @@ static struct platform_device orion5x_ehci1 = { struct mv643xx_eth_shared_platform_data orion5x_eth_shared_data = { .dram = &orion5x_mbus_dram_info, + .t_clk = ORION5X_TCLK, }; static struct resource orion5x_eth_shared_resources[] = { -- cgit v1.2.3 From da109897a142dd017172c0ce7abf0be8646f7109 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Sat, 26 Apr 2008 14:48:11 -0400 Subject: [ARM] Orion: clean up addr-map.c after window setting code purge This patch cleans up Orion's addr-map.c a bit after all peripheral window programming code has been moved out into the relevant drivers. Signed-off-by: Lennert Buytenhek Reviewed-by: Tzachi Perelstein Acked-by: Russell King Signed-off-by: Nicolas Pitre --- arch/arm/mach-orion5x/addr-map.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-orion5x/addr-map.c b/arch/arm/mach-orion5x/addr-map.c index 186f51ce7c11..e63fb05dc893 100644 --- a/arch/arm/mach-orion5x/addr-map.c +++ b/arch/arm/mach-orion5x/addr-map.c @@ -34,11 +34,7 @@ * Non-CPU Masters address decoding -- * Unlike the CPU, we setup the access from Orion's master interfaces to DDR * banks only (the typical use case). - * Setup access for each master to DDR is issued by common.c. - * - * Note: although orion_setbits() and orion_clrbits() are not atomic - * no locking is necessary here since code in this file is only called - * at boot time when there is no concurrency issues. + * Setup access for each master to DDR is issued by platform device setup. */ /* @@ -48,10 +44,6 @@ #define TARGET_DEV_BUS 1 #define TARGET_PCI 3 #define TARGET_PCIE 4 -#define ATTR_DDR_CS(n) (((n) ==0) ? 0xe : \ - ((n) == 1) ? 0xd : \ - ((n) == 2) ? 0xb : \ - ((n) == 3) ? 0x7 : 0xf) #define ATTR_PCIE_MEM 0x59 #define ATTR_PCIE_IO 0x51 #define ATTR_PCIE_WA 0x79 @@ -61,17 +53,12 @@ #define ATTR_DEV_CS1 0x1d #define ATTR_DEV_CS2 0x1b #define ATTR_DEV_BOOT 0xf -#define WIN_EN 1 /* * Helpers to get DDR bank info */ -#define DDR_BASE_CS(n) ORION5X_DDR_REG(0x1500 + ((n) * 8)) -#define DDR_SIZE_CS(n) ORION5X_DDR_REG(0x1504 + ((n) * 8)) -#define DDR_MAX_CS 4 -#define DDR_REG_TO_SIZE(reg) (((reg) | 0xffffff) + 1) -#define DDR_REG_TO_BASE(reg) ((reg) & 0xff000000) -#define DDR_BANK_EN 1 +#define DDR_BASE_CS(n) ORION5X_DDR_REG(0x1500 + ((n) << 3)) +#define DDR_SIZE_CS(n) ORION5X_DDR_REG(0x1504 + ((n) << 3)) /* * CPU Address Decode Windows registers -- cgit v1.2.3