diff options
Diffstat (limited to 'arch/s390')
30 files changed, 829 insertions, 157 deletions
diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild new file mode 100644 index 000000000000..ae4b01060edd --- /dev/null +++ b/arch/s390/Kbuild @@ -0,0 +1,6 @@ +obj-y += kernel/ +obj-y += mm/ +obj-y += crypto/ +obj-y += appldata/ +obj-y += hypfs/ +obj-y += kvm/ diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index f0777a47e3a5..409abfb9541d 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -1,8 +1,3 @@ -# -# For a description of the syntax of this configuration file, -# see Documentation/kbuild/kconfig-language.txt. -# - config SCHED_MC def_bool y depends on SMP @@ -75,8 +70,6 @@ config VIRT_CPU_ACCOUNTING config ARCH_SUPPORTS_DEBUG_PAGEALLOC def_bool y -mainmenu "Linux Kernel Configuration" - config S390 def_bool y select USE_GENERIC_SMP_HELPERS if SMP @@ -100,6 +93,7 @@ config S390 select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_LZMA select HAVE_KERNEL_LZO + select HAVE_GET_USER_PAGES_FAST select ARCH_INLINE_SPIN_TRYLOCK select ARCH_INLINE_SPIN_TRYLOCK_BH select ARCH_INLINE_SPIN_LOCK @@ -198,6 +192,13 @@ config HOTPLUG_CPU can be controlled through /sys/devices/system/cpu/cpu#. Say N if you want to disable CPU hotplug. +config SCHED_BOOK + bool "Book scheduler support" + depends on SMP + help + Book scheduler support improves the CPU scheduler's decision making + when dealing with machines that have several books. + config MATHEMU bool "IEEE FPU emulation" depends on MARCH_G5 diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 0c9e6c6d2a64..9d318476f81f 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -94,8 +94,8 @@ head-y := arch/s390/kernel/head.o head-y += arch/s390/kernel/$(if $(CONFIG_64BIT),head64.o,head31.o) head-y += arch/s390/kernel/init_task.o -core-y += arch/s390/mm/ arch/s390/kernel/ arch/s390/crypto/ \ - arch/s390/appldata/ arch/s390/hypfs/ arch/s390/kvm/ +# See arch/s390/Kbuild for content of core part of the kernel +core-y += arch/s390/ libs-y += arch/s390/lib/ drivers-y += drivers/s390/ diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index 42e512ba8b43..287d7bbb6d36 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -5,6 +5,7 @@ header-y += chsc.h header-y += cmb.h header-y += dasd.h header-y += debug.h +header-y += kvm_virtio.h header-y += monwriter.h header-y += qeth.h header-y += schid.h diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h index f3ba0fa98de6..e8501115eca8 100644 --- a/arch/s390/include/asm/ccwdev.h +++ b/arch/s390/include/asm/ccwdev.h @@ -92,6 +92,16 @@ struct ccw_device { }; /* + * Possible events used by the path_event notifier. + */ +#define PE_NONE 0x0 +#define PE_PATH_GONE 0x1 /* A path is no longer available. */ +#define PE_PATH_AVAILABLE 0x2 /* A path has become available and + was successfully verified. */ +#define PE_PATHGROUP_ESTABLISHED 0x4 /* A pathgroup was reset and had + to be established again. */ + +/* * Possible CIO actions triggered by the unit check handler. */ enum uc_todo { @@ -109,6 +119,7 @@ enum uc_todo { * @set_online: called when setting device online * @set_offline: called when setting device offline * @notify: notify driver of device state changes + * @path_event: notify driver of channel path events * @shutdown: called at device shutdown * @prepare: prepare for pm state transition * @complete: undo work done in @prepare @@ -127,6 +138,7 @@ struct ccw_driver { int (*set_online) (struct ccw_device *); int (*set_offline) (struct ccw_device *); int (*notify) (struct ccw_device *, int); + void (*path_event) (struct ccw_device *, int *); void (*shutdown) (struct ccw_device *); int (*prepare) (struct ccw_device *); void (*complete) (struct ccw_device *); diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h index 498bc3892385..881d94590aeb 100644 --- a/arch/s390/include/asm/hardirq.h +++ b/arch/s390/include/asm/hardirq.h @@ -12,10 +12,6 @@ #ifndef __ASM_HARDIRQ_H #define __ASM_HARDIRQ_H -#include <linux/threads.h> -#include <linux/sched.h> -#include <linux/cache.h> -#include <linux/interrupt.h> #include <asm/lowcore.h> #define local_softirq_pending() (S390_lowcore.softirq_pending) diff --git a/arch/s390/include/asm/kvm_virtio.h b/arch/s390/include/asm/kvm_virtio.h index acdfdff26611..72f614181eff 100644 --- a/arch/s390/include/asm/kvm_virtio.h +++ b/arch/s390/include/asm/kvm_virtio.h @@ -54,4 +54,11 @@ struct kvm_vqconfig { * This is pagesize for historical reasons. */ #define KVM_S390_VIRTIO_RING_ALIGN 4096 + +/* These values are supposed to be in ext_params on an interrupt */ +#define VIRTIO_PARAM_MASK 0xff +#define VIRTIO_PARAM_VRING_INTERRUPT 0x0 +#define VIRTIO_PARAM_CONFIG_CHANGED 0x1 +#define VIRTIO_PARAM_DEV_ADD 0x2 + #endif diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index af650fb47206..a8729ea7e9ac 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -108,9 +108,13 @@ typedef pte_t *pgtable_t; #define __pgprot(x) ((pgprot_t) { (x) } ) static inline void -page_set_storage_key(unsigned long addr, unsigned int skey) +page_set_storage_key(unsigned long addr, unsigned int skey, int mapped) { - asm volatile("sske %0,%1" : : "d" (skey), "a" (addr)); + if (!mapped) + asm volatile(".insn rrf,0xb22b0000,%0,%1,8,0" + : : "d" (skey), "a" (addr)); + else + asm volatile("sske %0,%1" : : "d" (skey), "a" (addr)); } static inline unsigned int diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 68940d0bad91..082eb4e50e8b 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -21,9 +21,11 @@ unsigned long *crst_table_alloc(struct mm_struct *, int); void crst_table_free(struct mm_struct *, unsigned long *); +void crst_table_free_rcu(struct mm_struct *, unsigned long *); unsigned long *page_table_alloc(struct mm_struct *); void page_table_free(struct mm_struct *, unsigned long *); +void page_table_free_rcu(struct mm_struct *, unsigned long *); void disable_noexec(struct mm_struct *, struct task_struct *); static inline void clear_table(unsigned long *s, unsigned long val, size_t n) @@ -176,4 +178,6 @@ static inline void pmd_populate(struct mm_struct *mm, #define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte) #define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte) +extern void rcu_table_freelist_finish(void); + #endif /* _S390_PGALLOC_H */ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 3157441ee1da..f79e7bb9ae1e 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -38,6 +38,7 @@ extern pgd_t swapper_pg_dir[] __attribute__ ((aligned (4096))); extern void paging_init(void); extern void vmem_map_init(void); +extern void fault_init(void); /* * The S390 doesn't have any external MMU info: the kernel page @@ -46,11 +47,27 @@ extern void vmem_map_init(void); #define update_mmu_cache(vma, address, ptep) do { } while (0) /* - * ZERO_PAGE is a global shared page that is always zero: used + * ZERO_PAGE is a global shared page that is always zero; used * for zero-mapped memory areas etc.. */ -extern char empty_zero_page[PAGE_SIZE]; -#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) + +extern unsigned long empty_zero_page; +extern unsigned long zero_page_mask; + +#define ZERO_PAGE(vaddr) \ + (virt_to_page((void *)(empty_zero_page + \ + (((unsigned long)(vaddr)) &zero_page_mask)))) + +#define is_zero_pfn is_zero_pfn +static inline int is_zero_pfn(unsigned long pfn) +{ + extern unsigned long zero_pfn; + unsigned long offset_from_zero_pfn = pfn - zero_pfn; + return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT); +} + +#define my_zero_pfn(addr) page_to_pfn(ZERO_PAGE(addr)) + #endif /* !__ASSEMBLY__ */ /* @@ -300,6 +317,7 @@ extern unsigned long VMALLOC_START; /* Bits in the segment table entry */ #define _SEGMENT_ENTRY_ORIGIN 0x7fffffc0UL /* page table origin */ +#define _SEGMENT_ENTRY_RO 0x200 /* page protection bit */ #define _SEGMENT_ENTRY_INV 0x20 /* invalid segment table entry */ #define _SEGMENT_ENTRY_COMMON 0x10 /* common segment bit */ #define _SEGMENT_ENTRY_PTL 0x0f /* page table length */ @@ -572,7 +590,7 @@ static inline void rcp_unlock(pte_t *ptep) } /* forward declaration for SetPageUptodate in page-flags.h*/ -static inline void page_clear_dirty(struct page *page); +static inline void page_clear_dirty(struct page *page, int mapped); #include <linux/page-flags.h> static inline void ptep_rcp_copy(pte_t *ptep) @@ -782,7 +800,7 @@ static inline int kvm_s390_test_and_clear_page_dirty(struct mm_struct *mm, } dirty = test_and_clear_bit_simple(KVM_UD_BIT, pgste); if (skey & _PAGE_CHANGED) - page_clear_dirty(page); + page_clear_dirty(page, 1); rcp_unlock(ptep); return dirty; } @@ -957,9 +975,9 @@ static inline int page_test_dirty(struct page *page) } #define __HAVE_ARCH_PAGE_CLEAR_DIRTY -static inline void page_clear_dirty(struct page *page) +static inline void page_clear_dirty(struct page *page, int mapped) { - page_set_storage_key(page_to_phys(page), PAGE_DEFAULT_KEY); + page_set_storage_key(page_to_phys(page), PAGE_DEFAULT_KEY, mapped); } /* diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 2ba630276295..46e96bc1f5a1 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -360,6 +360,7 @@ struct qdio_initialize { unsigned int no_output_qs; qdio_handler_t *input_handler; qdio_handler_t *output_handler; + void (*queue_start_poll) (struct ccw_device *, int, unsigned long); unsigned long int_parm; void **input_sbal_addr_array; void **output_sbal_addr_array; @@ -377,11 +378,13 @@ struct qdio_initialize { extern int qdio_allocate(struct qdio_initialize *); extern int qdio_establish(struct qdio_initialize *); extern int qdio_activate(struct ccw_device *); - -extern int do_QDIO(struct ccw_device *cdev, unsigned int callflags, - int q_nr, unsigned int bufnr, unsigned int count); -extern int qdio_shutdown(struct ccw_device*, int); +extern int do_QDIO(struct ccw_device *, unsigned int, int, unsigned int, + unsigned int); +extern int qdio_start_irq(struct ccw_device *, int); +extern int qdio_stop_irq(struct ccw_device *, int); +extern int qdio_get_next_buffers(struct ccw_device *, int, int *, int *); +extern int qdio_shutdown(struct ccw_device *, int); extern int qdio_free(struct ccw_device *); -extern int qdio_get_ssqd_desc(struct ccw_device *dev, struct qdio_ssqd_desc*); +extern int qdio_get_ssqd_desc(struct ccw_device *, struct qdio_ssqd_desc *); #endif /* __QDIO_H__ */ diff --git a/arch/s390/include/asm/scatterlist.h b/arch/s390/include/asm/scatterlist.h index 35d786fe93ae..6d45ef6c12a7 100644 --- a/arch/s390/include/asm/scatterlist.h +++ b/arch/s390/include/asm/scatterlist.h @@ -1 +1,3 @@ #include <asm-generic/scatterlist.h> + +#define ARCH_HAS_SG_CHAIN diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h index 22bdb2a0ee5f..c2ece4dd6c0a 100644 --- a/arch/s390/include/asm/sysinfo.h +++ b/arch/s390/include/asm/sysinfo.h @@ -15,7 +15,10 @@ #define __ASM_S390_SYSINFO_H struct sysinfo_1_1_1 { - char reserved_0[32]; + unsigned short :16; + unsigned char ccr; + unsigned char cai; + char reserved_0[28]; char manufacturer[16]; char type[4]; char reserved_1[12]; diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h index cef66210c846..c3932d2d66aa 100644 --- a/arch/s390/include/asm/system.h +++ b/arch/s390/include/asm/system.h @@ -85,14 +85,16 @@ static inline void restore_access_regs(unsigned int *acrs) asm volatile("lam 0,15,%0" : : "Q" (*acrs)); } -#define switch_to(prev,next,last) do { \ - if (prev == next) \ - break; \ - save_fp_regs(&prev->thread.fp_regs); \ - restore_fp_regs(&next->thread.fp_regs); \ - save_access_regs(&prev->thread.acrs[0]); \ - restore_access_regs(&next->thread.acrs[0]); \ - prev = __switch_to(prev,next); \ +#define switch_to(prev,next,last) do { \ + if (prev->mm) { \ + save_fp_regs(&prev->thread.fp_regs); \ + save_access_regs(&prev->thread.acrs[0]); \ + } \ + if (next->mm) { \ + restore_fp_regs(&next->thread.fp_regs); \ + restore_access_regs(&next->thread.acrs[0]); \ + } \ + prev = __switch_to(prev,next); \ } while (0) extern void account_vtime(struct task_struct *, struct task_struct *); diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index fd1c00d08bf5..f1f644f2240a 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -64,10 +64,9 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb, if (!tlb->fullmm && (tlb->nr_ptes > 0 || tlb->nr_pxds < TLB_NR_PTRS)) __tlb_flush_mm(tlb->mm); while (tlb->nr_ptes > 0) - pte_free(tlb->mm, tlb->array[--tlb->nr_ptes]); + page_table_free_rcu(tlb->mm, tlb->array[--tlb->nr_ptes]); while (tlb->nr_pxds < TLB_NR_PTRS) - /* pgd_free frees the pointer as region or segment table */ - pgd_free(tlb->mm, tlb->array[tlb->nr_pxds++]); + crst_table_free_rcu(tlb->mm, tlb->array[tlb->nr_pxds++]); } static inline void tlb_finish_mmu(struct mmu_gather *tlb, @@ -75,6 +74,8 @@ static inline void tlb_finish_mmu(struct mmu_gather *tlb, { tlb_flush_mmu(tlb, start, end); + rcu_table_freelist_finish(); + /* keep the page table cache within bounds */ check_pgt_cache(); @@ -103,7 +104,7 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, if (tlb->nr_ptes >= tlb->nr_pxds) tlb_flush_mmu(tlb, 0, 0); } else - pte_free(tlb->mm, pte); + page_table_free(tlb->mm, (unsigned long *) pte); } /* @@ -124,7 +125,7 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, if (tlb->nr_ptes >= tlb->nr_pxds) tlb_flush_mmu(tlb, 0, 0); } else - pmd_free(tlb->mm, pmd); + crst_table_free(tlb->mm, (unsigned long *) pmd); #endif } @@ -146,7 +147,7 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, if (tlb->nr_ptes >= tlb->nr_pxds) tlb_flush_mmu(tlb, 0, 0); } else - pud_free(tlb->mm, pud); + crst_table_free(tlb->mm, (unsigned long *) pud); #endif } diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index 831bd033ea77..051107a2c5e2 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -3,15 +3,32 @@ #include <linux/cpumask.h> -#define mc_capable() (1) - -const struct cpumask *cpu_coregroup_mask(unsigned int cpu); - extern unsigned char cpu_core_id[NR_CPUS]; extern cpumask_t cpu_core_map[NR_CPUS]; +static inline const struct cpumask *cpu_coregroup_mask(unsigned int cpu) +{ + return &cpu_core_map[cpu]; +} + #define topology_core_id(cpu) (cpu_core_id[cpu]) #define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) +#define mc_capable() (1) + +#ifdef CONFIG_SCHED_BOOK + +extern unsigned char cpu_book_id[NR_CPUS]; +extern cpumask_t cpu_book_map[NR_CPUS]; + +static inline const struct cpumask *cpu_book_mask(unsigned int cpu) +{ + return &cpu_book_map[cpu]; +} + +#define topology_book_id(cpu) (cpu_book_id[cpu]) +#define topology_book_cpumask(cpu) (&cpu_book_map[cpu]) + +#endif /* CONFIG_SCHED_BOOK */ int topology_set_cpu_management(int fc); void topology_schedule_update(void); @@ -30,6 +47,8 @@ static inline void s390_init_cpu_topology(void) }; #endif +#define SD_BOOK_INIT SD_CPU_INIT + #include <asm-generic/topology.h> #endif /* _ASM_S390_TOPOLOGY_H */ diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index b39b27d68b45..2b1c254542dd 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -113,7 +113,7 @@ enum { INSTR_INVALID, INSTR_E, INSTR_RIE_R0IU, INSTR_RIE_R0UU, INSTR_RIE_RRP, INSTR_RIE_RRPU, - INSTR_RIE_RRUUU, INSTR_RIE_RUPI, INSTR_RIE_RUPU, + INSTR_RIE_RRUUU, INSTR_RIE_RUPI, INSTR_RIE_RUPU, INSTR_RIE_RRI0, INSTR_RIL_RI, INSTR_RIL_RP, INSTR_RIL_RU, INSTR_RIL_UP, INSTR_RIS_R0RDU, INSTR_RIS_R0UU, INSTR_RIS_RURDI, INSTR_RIS_RURDU, INSTR_RI_RI, INSTR_RI_RP, INSTR_RI_RU, INSTR_RI_UP, @@ -122,13 +122,14 @@ enum { INSTR_RRE_RR, INSTR_RRE_RR_OPT, INSTR_RRF_0UFF, INSTR_RRF_F0FF, INSTR_RRF_F0FF2, INSTR_RRF_F0FR, INSTR_RRF_FFRU, INSTR_RRF_FUFF, INSTR_RRF_M0RR, INSTR_RRF_R0RR, - INSTR_RRF_RURR, INSTR_RRF_U0FF, INSTR_RRF_U0RF, INSTR_RRF_U0RR, - INSTR_RRF_UUFF, INSTR_RRR_F0FF, INSTR_RRS_RRRDU, + INSTR_RRF_R0RR2, INSTR_RRF_RURR, INSTR_RRF_U0FF, INSTR_RRF_U0RF, + INSTR_RRF_U0RR, INSTR_RRF_UUFF, INSTR_RRR_F0FF, INSTR_RRS_RRRDU, INSTR_RR_FF, INSTR_RR_R0, INSTR_RR_RR, INSTR_RR_U0, INSTR_RR_UR, INSTR_RSE_CCRD, INSTR_RSE_RRRD, INSTR_RSE_RURD, INSTR_RSI_RRP, INSTR_RSL_R0RD, INSTR_RSY_AARD, INSTR_RSY_CCRD, INSTR_RSY_RRRD, INSTR_RSY_RURD, + INSTR_RSY_RDRM, INSTR_RS_AARD, INSTR_RS_CCRD, INSTR_RS_R0RD, INSTR_RS_RRRD, INSTR_RS_RURD, INSTR_RXE_FRRD, INSTR_RXE_RRRD, @@ -139,7 +140,7 @@ enum { INSTR_SIY_IRD, INSTR_SIY_URD, INSTR_SI_URD, INSTR_SSE_RDRD, - INSTR_SSF_RRDRD, + INSTR_SSF_RRDRD, INSTR_SSF_RRDRD2, INSTR_SS_L0RDRD, INSTR_SS_LIRDRD, INSTR_SS_LLRDRD, INSTR_SS_RRRDRD, INSTR_SS_RRRDRD2, INSTR_SS_RRRDRD3, INSTR_S_00, INSTR_S_RD, @@ -152,7 +153,7 @@ struct operand { }; struct insn { - const char name[6]; + const char name[5]; unsigned char opfrag; unsigned char format; }; @@ -217,6 +218,7 @@ static const unsigned char formats[][7] = { [INSTR_RIE_RRP] = { 0xff, R_8,R_12,J16_16,0,0,0 }, [INSTR_RIE_RRUUU] = { 0xff, R_8,R_12,U8_16,U8_24,U8_32,0 }, [INSTR_RIE_RUPI] = { 0xff, R_8,I8_32,U4_12,J16_16,0,0 }, + [INSTR_RIE_RRI0] = { 0xff, R_8,R_12,I16_16,0,0,0 }, [INSTR_RIL_RI] = { 0x0f, R_8,I32_16,0,0,0,0 }, [INSTR_RIL_RP] = { 0x0f, R_8,J32_16,0,0,0,0 }, [INSTR_RIL_RU] = { 0x0f, R_8,U32_16,0,0,0,0 }, @@ -248,6 +250,7 @@ static const unsigned char formats[][7] = { [INSTR_RRF_FUFF] = { 0xff, F_24,F_16,F_28,U4_20,0,0 }, [INSTR_RRF_M0RR] = { 0xff, R_24,R_28,M_16,0,0,0 }, [INSTR_RRF_R0RR] = { 0xff, R_24,R_16,R_28,0,0,0 }, + [INSTR_RRF_R0RR2] = { 0xff, R_24,R_28,R_16,0,0,0 }, [INSTR_RRF_RURR] = { 0xff, R_24,R_28,R_16,U4_20,0,0 }, [INSTR_RRF_U0FF] = { 0xff, F_24,U4_16,F_28,0,0,0 }, [INSTR_RRF_U0RF] = { 0xff, R_24,U4_16,F_28,0,0,0 }, @@ -269,6 +272,7 @@ static const unsigned char formats[][7] = { [INSTR_RSY_CCRD] = { 0xff, C_8,C_12,D20_20,B_16,0,0 }, [INSTR_RSY_RRRD] = { 0xff, R_8,R_12,D20_20,B_16,0,0 }, [INSTR_RSY_RURD] = { 0xff, R_8,U4_12,D20_20,B_16,0,0 }, + [INSTR_RSY_RDRM] = { 0xff, R_8,D20_20,B_16,U4_12,0,0 }, [INSTR_RS_AARD] = { 0xff, A_8,A_12,D_20,B_16,0,0 }, [INSTR_RS_CCRD] = { 0xff, C_8,C_12,D_20,B_16,0,0 }, [INSTR_RS_R0RD] = { 0xff, R_8,D_20,B_16,0,0,0 }, @@ -290,6 +294,7 @@ static const unsigned char formats[][7] = { [INSTR_SI_URD] = { 0xff, D_20,B_16,U8_8,0,0,0 }, [INSTR_SSE_RDRD] = { 0xff, D_20,B_16,D_36,B_32,0,0 }, [INSTR_SSF_RRDRD] = { 0x00, D_20,B_16,D_36,B_32,R_8,0 }, + [INSTR_SSF_RRDRD2]= { 0x00, R_8,D_20,B_16,D_36,B_32,0 }, [INSTR_SS_L0RDRD] = { 0xff, D_20,L8_8,B_16,D_36,B_32,0 }, [INSTR_SS_LIRDRD] = { 0xff, D_20,L4_8,B_16,D_36,B_32,U4_12 }, [INSTR_SS_LLRDRD] = { 0xff, D_20,L4_8,B_16,D_36,L4_12,B_32 }, @@ -300,6 +305,36 @@ static const unsigned char formats[][7] = { [INSTR_S_RD] = { 0xff, D_20,B_16,0,0,0,0 }, }; +enum { + LONG_INSN_ALGHSIK, + LONG_INSN_ALHSIK, + LONG_INSN_CLFHSI, + LONG_INSN_CLGFRL, + LONG_INSN_CLGHRL, + LONG_INSN_CLGHSI, + LONG_INSN_CLHHSI, + LONG_INSN_LLGFRL, + LONG_INSN_LLGHRL, + LONG_INSN_POPCNT, + LONG_INSN_RISBHG, + LONG_INSN_RISBLG, +}; + +static char *long_insn_name[] = { + [LONG_INSN_ALGHSIK] = "alghsik", + [LONG_INSN_ALHSIK] = "alhsik", + [LONG_INSN_CLFHSI] = "clfhsi", + [LONG_INSN_CLGFRL] = "clgfrl", + [LONG_INSN_CLGHRL] = "clghrl", + [LONG_INSN_CLGHSI] = "clghsi", + [LONG_INSN_CLHHSI] = "clhhsi", + [LONG_INSN_LLGFRL] = "llgfrl", + [LONG_INSN_LLGHRL] = "llghrl", + [LONG_INSN_POPCNT] = "popcnt", + [LONG_INSN_RISBHG] = "risbhg", + [LONG_INSN_RISBLG] = "risblk", +}; + static struct insn opcode[] = { #ifdef CONFIG_64BIT { "lmd", 0xef, INSTR_SS_RRRDRD3 }, @@ -881,6 +916,35 @@ static struct insn opcode_b9[] = { { "pfmf", 0xaf, INSTR_RRE_RR }, { "trte", 0xbf, INSTR_RRF_M0RR }, { "trtre", 0xbd, INSTR_RRF_M0RR }, + { "ahhhr", 0xc8, INSTR_RRF_R0RR2 }, + { "shhhr", 0xc9, INSTR_RRF_R0RR2 }, + { "alhhh", 0xca, INSTR_RRF_R0RR2 }, + { "alhhl", 0xca, INSTR_RRF_R0RR2 }, + { "slhhh", 0xcb, INSTR_RRF_R0RR2 }, + { "chhr ", 0xcd, INSTR_RRE_RR }, + { "clhhr", 0xcf, INSTR_RRE_RR }, + { "ahhlr", 0xd8, INSTR_RRF_R0RR2 }, + { "shhlr", 0xd9, INSTR_RRF_R0RR2 }, + { "slhhl", 0xdb, INSTR_RRF_R0RR2 }, + { "chlr", 0xdd, INSTR_RRE_RR }, + { "clhlr", 0xdf, INSTR_RRE_RR }, + { { 0, LONG_INSN_POPCNT }, 0xe1, INSTR_RRE_RR }, + { "lgroc", 0xe2, INSTR_RRF_M0RR }, + { "ngrk", 0xe4, INSTR_RRF_R0RR2 }, + { "ogrk", 0xe6, INSTR_RRF_R0RR2 }, + { "xgrk", 0xe7, INSTR_RRF_R0RR2 }, + { "agrk", 0xe8, INSTR_RRF_R0RR2 }, + { "sgrk", 0xe9, INSTR_RRF_R0RR2 }, + { "algrk", 0xea, INSTR_RRF_R0RR2 }, + { "slgrk", 0xeb, INSTR_RRF_R0RR2 }, + { "lroc", 0xf2, INSTR_RRF_M0RR }, + { "nrk", 0xf4, INSTR_RRF_R0RR2 }, + { "ork", 0xf6, INSTR_RRF_R0RR2 }, + { "xrk", 0xf7, INSTR_RRF_R0RR2 }, + { "ark", 0xf8, INSTR_RRF_R0RR2 }, + { "srk", 0xf9, INSTR_RRF_R0RR2 }, + { "alrk", 0xfa, INSTR_RRF_R0RR2 }, + { "slrk", 0xfb, INSTR_RRF_R0RR2 }, #endif { "kmac", 0x1e, INSTR_RRE_RR }, { "lrvr", 0x1f, INSTR_RRE_RR }, @@ -949,9 +1013,9 @@ static struct insn opcode_c4[] = { { "lgfrl", 0x0c, INSTR_RIL_RP }, { "lhrl", 0x05, INSTR_RIL_RP }, { "lghrl", 0x04, INSTR_RIL_RP }, - { "llgfrl", 0x0e, INSTR_RIL_RP }, + { { 0, LONG_INSN_LLGFRL }, 0x0e, INSTR_RIL_RP }, { "llhrl", 0x02, INSTR_RIL_RP }, - { "llghrl", 0x06, INSTR_RIL_RP }, + { { 0, LONG_INSN_LLGHRL }, 0x06, INSTR_RIL_RP }, { "strl", 0x0f, INSTR_RIL_RP }, { "stgrl", 0x0b, INSTR_RIL_RP }, { "sthrl", 0x07, INSTR_RIL_RP }, @@ -968,9 +1032,9 @@ static struct insn opcode_c6[] = { { "cghrl", 0x04, INSTR_RIL_RP }, { "clrl", 0x0f, INSTR_RIL_RP }, { "clgrl", 0x0a, INSTR_RIL_RP }, - { "clgfrl", 0x0e, INSTR_RIL_RP }, + { { 0, LONG_INSN_CLGFRL }, 0x0e, INSTR_RIL_RP }, { "clhrl", 0x07, INSTR_RIL_RP }, - { "clghrl", 0x06, INSTR_RIL_RP }, + { { 0, LONG_INSN_CLGHRL }, 0x06, INSTR_RIL_RP }, { "pfdrl", 0x02, INSTR_RIL_UP }, { "exrl", 0x00, INSTR_RIL_RP }, #endif @@ -982,6 +1046,20 @@ static struct insn opcode_c8[] = { { "mvcos", 0x00, INSTR_SSF_RRDRD }, { "ectg", 0x01, INSTR_SSF_RRDRD }, { "csst", 0x02, INSTR_SSF_RRDRD }, + { "lpd", 0x04, INSTR_SSF_RRDRD2 }, + { "lpdg ", 0x05, INSTR_SSF_RRDRD2 }, +#endif + { "", 0, INSTR_INVALID } +}; + +static struct insn opcode_cc[] = { +#ifdef CONFIG_64BIT + { "brcth", 0x06, INSTR_RIL_RP }, + { "aih", 0x08, INSTR_RIL_RI }, + { "alsih", 0x0a, INSTR_RIL_RI }, + { "alsih", 0x0b, INSTR_RIL_RI }, + { "cih", 0x0d, INSTR_RIL_RI }, + { "clih ", 0x0f, INSTR_RIL_RI }, #endif { "", 0, INSTR_INVALID } }; @@ -1063,6 +1141,16 @@ static struct insn opcode_e3[] = { { "mfy", 0x5c, INSTR_RXY_RRRD }, { "mhy", 0x7c, INSTR_RXY_RRRD }, { "pfd", 0x36, INSTR_RXY_URRD }, + { "lbh", 0xc0, INSTR_RXY_RRRD }, + { "llch", 0xc2, INSTR_RXY_RRRD }, + { "stch", 0xc3, INSTR_RXY_RRRD }, + { "lhh", 0xc4, INSTR_RXY_RRRD }, + { "llhh", 0xc6, INSTR_RXY_RRRD }, + { "sthh", 0xc7, INSTR_RXY_RRRD }, + { "lfh", 0xca, INSTR_RXY_RRRD }, + { "stfh", 0xcb, INSTR_RXY_RRRD }, + { "chf", 0xcd, INSTR_RXY_RRRD }, + { "clhf", 0xcf, INSTR_RXY_RRRD }, #endif { "lrv", 0x1e, INSTR_RXY_RRRD }, { "lrvh", 0x1f, INSTR_RXY_RRRD }, @@ -1080,9 +1168,9 @@ static struct insn opcode_e5[] = { { "chhsi", 0x54, INSTR_SIL_RDI }, { "chsi", 0x5c, INSTR_SIL_RDI }, { "cghsi", 0x58, INSTR_SIL_RDI }, - { "clhhsi", 0x55, INSTR_SIL_RDU }, - { "clfhsi", 0x5d, INSTR_SIL_RDU }, - { "clghsi", 0x59, INSTR_SIL_RDU }, + { { 0, LONG_INSN_CLHHSI }, 0x55, INSTR_SIL_RDU }, + { { 0, LONG_INSN_CLFHSI }, 0x5d, INSTR_SIL_RDU }, + { { 0, LONG_INSN_CLGHSI }, 0x59, INSTR_SIL_RDU }, { "mvhhi", 0x44, INSTR_SIL_RDI }, { "mvhi", 0x4c, INSTR_SIL_RDI }, { "mvghi", 0x48, INSTR_SIL_RDI }, @@ -1137,6 +1225,24 @@ static struct insn opcode_eb[] = { { "alsi", 0x6e, INSTR_SIY_IRD }, { "algsi", 0x7e, INSTR_SIY_IRD }, { "ecag", 0x4c, INSTR_RSY_RRRD }, + { "srak", 0xdc, INSTR_RSY_RRRD }, + { "slak", 0xdd, INSTR_RSY_RRRD }, + { "srlk", 0xde, INSTR_RSY_RRRD }, + { "sllk", 0xdf, INSTR_RSY_RRRD }, + { "lgoc", 0xe2, INSTR_RSY_RDRM }, + { "stgoc", 0xe3, INSTR_RSY_RDRM }, + { "lang", 0xe4, INSTR_RSY_RRRD }, + { "laog", 0xe6, INSTR_RSY_RRRD }, + { "laxg", 0xe7, INSTR_RSY_RRRD }, + { "laag", 0xe8, INSTR_RSY_RRRD }, + { "laalg", 0xea, INSTR_RSY_RRRD }, + { "loc", 0xf2, INSTR_RSY_RDRM }, + { "stoc", 0xf3, INSTR_RSY_RDRM }, + { "lan", 0xf4, INSTR_RSY_RRRD }, + { "lao", 0xf6, INSTR_RSY_RRRD }, + { "lax", 0xf7, INSTR_RSY_RRRD }, + { "laa", 0xf8, INSTR_RSY_RRRD }, + { "laal", 0xfa, INSTR_RSY_RRRD }, #endif { "rll", 0x1d, INSTR_RSY_RRRD }, { "mvclu", 0x8e, INSTR_RSY_RRRD }, @@ -1172,6 +1278,12 @@ static struct insn opcode_ec[] = { { "rxsbg", 0x57, INSTR_RIE_RRUUU }, { "rosbg", 0x56, INSTR_RIE_RRUUU }, { "risbg", 0x55, INSTR_RIE_RRUUU }, + { { 0, LONG_INSN_RISBLG }, 0x51, INSTR_RIE_RRUUU }, + { { 0, LONG_INSN_RISBHG }, 0x5D, INSTR_RIE_RRUUU }, + { "ahik", 0xd8, INSTR_RIE_RRI0 }, + { "aghik", 0xd9, INSTR_RIE_RRI0 }, + { { 0, LONG_INSN_ALHSIK }, 0xda, INSTR_RIE_RRI0 }, + { { 0, LONG_INSN_ALGHSIK }, 0xdb, INSTR_RIE_RRI0 }, #endif { "", 0, INSTR_INVALID } }; @@ -1321,6 +1433,9 @@ static struct insn *find_insn(unsigned char *code) case 0xc8: table = opcode_c8; break; + case 0xcc: + table = opcode_cc; + break; case 0xe3: table = opcode_e3; opfrag = code[5]; @@ -1367,7 +1482,11 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr) ptr = buffer; insn = find_insn(code); if (insn) { - ptr += sprintf(ptr, "%.5s\t", insn->name); + if (insn->name[0] == '\0') + ptr += sprintf(ptr, "%s\t", + long_insn_name[(int) insn->name[1]]); + else + ptr += sprintf(ptr, "%.5s\t", insn->name); /* Extract the operands. */ separator = 0; for (ops = formats[insn->format] + 1, i = 0; diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index c00856ad4e5a..0badc6344eb4 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -208,7 +208,8 @@ static noinline __init void init_kernel_storage_key(void) end_pfn = PFN_UP(__pa(&_end)); for (init_pfn = 0 ; init_pfn < end_pfn; init_pfn++) - page_set_storage_key(init_pfn << PAGE_SHIFT, PAGE_DEFAULT_KEY); + page_set_storage_key(init_pfn << PAGE_SHIFT, + PAGE_DEFAULT_KEY, 0); } static __initdata struct sysinfo_3_2_2 vmms __aligned(PAGE_SIZE); diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 22cfd634c355..f7167ee4604c 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -407,10 +407,9 @@ int module_finalize(const Elf_Ehdr *hdr, { vfree(me->arch.syminfo); me->arch.syminfo = NULL; - return module_bug_finalize(hdr, sechdrs, me); + return 0; } void module_arch_cleanup(struct module *mod) { - module_bug_cleanup(mod); } diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index d3a2d1c6438e..ec2e03b22ead 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -76,17 +76,17 @@ unsigned long thread_saved_pc(struct task_struct *tsk) static void default_idle(void) { /* CPU is going idle. */ - local_irq_disable(); - if (need_resched()) { - local_irq_enable(); - return; - } #ifdef CONFIG_HOTPLUG_CPU if (cpu_is_offline(smp_processor_id())) { preempt_enable_no_resched(); cpu_die(); } #endif + local_irq_disable(); + if (need_resched()) { + local_irq_enable(); + return; + } local_mcck_disable(); if (test_thread_flag(TIF_MCCK_PENDING)) { local_mcck_enable(); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index c8e8e1354e1d..9071e984dcf1 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -627,7 +627,8 @@ setup_memory(void) add_active_range(0, start_chunk, end_chunk); pfn = max(start_chunk, start_pfn); for (; pfn < end_chunk; pfn++) - page_set_storage_key(PFN_PHYS(pfn), PAGE_DEFAULT_KEY); + page_set_storage_key(PFN_PHYS(pfn), + PAGE_DEFAULT_KEY, 0); } psw_set_key(PAGE_DEFAULT_KEY); diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index a0ffc7717ed6..a91274b4eb8f 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -74,6 +74,13 @@ static int stsi_1_1_1(struct sysinfo_1_1_1 *info, char *page, int len) "Model Temp. Capacity: %-16.16s %08u\n", info->model_temp_cap, *(u32 *) info->model_temp_cap_rating); + if (info->cai) { + len += sprintf(page + len, + "Capacity Adj. Ind.: %d\n", + info->cai); + len += sprintf(page + len, "Capacity Ch. Reason: %d\n", + info->ccr); + } return len; } diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index bcef00766a64..13559c993847 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -57,8 +57,8 @@ struct tl_info { union tl_entry tle[0]; }; -struct core_info { - struct core_info *next; +struct mask_info { + struct mask_info *next; unsigned char id; cpumask_t mask; }; @@ -66,7 +66,6 @@ struct core_info { static int topology_enabled; static void topology_work_fn(struct work_struct *work); static struct tl_info *tl_info; -static struct core_info core_info; static int machine_has_topology; static struct timer_list topology_timer; static void set_topology_timer(void); @@ -74,38 +73,37 @@ static DECLARE_WORK(topology_work, topology_work_fn); /* topology_lock protects the core linked list */ static DEFINE_SPINLOCK(topology_lock); +static struct mask_info core_info; cpumask_t cpu_core_map[NR_CPUS]; unsigned char cpu_core_id[NR_CPUS]; -static cpumask_t cpu_coregroup_map(unsigned int cpu) +#ifdef CONFIG_SCHED_BOOK +static struct mask_info book_info; +cpumask_t cpu_book_map[NR_CPUS]; +unsigned char cpu_book_id[NR_CPUS]; +#endif + +static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) { - struct core_info *core = &core_info; - unsigned long flags; cpumask_t mask; cpus_clear(mask); if (!topology_enabled || !machine_has_topology) return cpu_possible_map; - spin_lock_irqsave(&topology_lock, flags); - while (core) { - if (cpu_isset(cpu, core->mask)) { - mask = core->mask; + while (info) { + if (cpu_isset(cpu, info->mask)) { + mask = info->mask; break; } - core = core->next; + info = info->next; } - spin_unlock_irqrestore(&topology_lock, flags); if (cpus_empty(mask)) mask = cpumask_of_cpu(cpu); return mask; } -const struct cpumask *cpu_coregroup_mask(unsigned int cpu) -{ - return &cpu_core_map[cpu]; -} - -static void add_cpus_to_core(struct tl_cpu *tl_cpu, struct core_info *core) +static void add_cpus_to_mask(struct tl_cpu *tl_cpu, struct mask_info *book, + struct mask_info *core) { unsigned int cpu; @@ -117,23 +115,35 @@ static void add_cpus_to_core(struct tl_cpu *tl_cpu, struct core_info *core) rcpu = CPU_BITS - 1 - cpu + tl_cpu->origin; for_each_present_cpu(lcpu) { - if (cpu_logical_map(lcpu) == rcpu) { - cpu_set(lcpu, core->mask); - cpu_core_id[lcpu] = core->id; - smp_cpu_polarization[lcpu] = tl_cpu->pp; - } + if (cpu_logical_map(lcpu) != rcpu) + continue; +#ifdef CONFIG_SCHED_BOOK + cpu_set(lcpu, book->mask); + cpu_book_id[lcpu] = book->id; +#endif + cpu_set(lcpu, core->mask); + cpu_core_id[lcpu] = core->id; + smp_cpu_polarization[lcpu] = tl_cpu->pp; } } } -static void clear_cores(void) +static void clear_masks(void) { - struct core_info *core = &core_info; + struct mask_info *info; - while (core) { - cpus_clear(core->mask); - core = core->next; + info = &core_info; + while (info) { + cpus_clear(info->mask); + info = info->next; + } +#ifdef CONFIG_SCHED_BOOK + info = &book_info; + while (info) { + cpus_clear(info->mask); + info = info->next; } +#endif } static union tl_entry *next_tle(union tl_entry *tle) @@ -146,29 +156,36 @@ static union tl_entry *next_tle(union tl_entry *tle) static void tl_to_cores(struct tl_info *info) { +#ifdef CONFIG_SCHED_BOOK + struct mask_info *book = &book_info; +#else + struct mask_info *book = NULL; +#endif + struct mask_info *core = &core_info; union tl_entry *tle, *end; - struct core_info *core = &core_info; + spin_lock_irq(&topology_lock); - clear_cores(); + clear_masks(); tle = info->tle; end = (union tl_entry *)((unsigned long)info + info->length); while (tle < end) { switch (tle->nl) { - case 5: - case 4: - case 3: +#ifdef CONFIG_SCHED_BOOK case 2: + book = book->next; + book->id = tle->container.id; break; +#endif case 1: core = core->next; core->id = tle->container.id; break; case 0: - add_cpus_to_core(&tle->cpu, core); + add_cpus_to_mask(&tle->cpu, book, core); break; default: - clear_cores(); + clear_masks(); machine_has_topology = 0; goto out; } @@ -221,10 +238,29 @@ int topology_set_cpu_management(int fc) static void update_cpu_core_map(void) { + unsigned long flags; int cpu; - for_each_possible_cpu(cpu) - cpu_core_map[cpu] = cpu_coregroup_map(cpu); + spin_lock_irqsave(&topology_lock, flags); + for_each_possible_cpu(cpu) { + cpu_core_map[cpu] = cpu_group_map(&core_info, cpu); +#ifdef CONFIG_SCHED_BOOK + cpu_book_map[cpu] = cpu_group_map(&book_info, cpu); +#endif + } + spin_unlock_irqrestore(&topology_lock, flags); +} + +static void store_topology(struct tl_info *info) +{ +#ifdef CONFIG_SCHED_BOOK + int rc; + + rc = stsi(info, 15, 1, 3); + if (rc != -ENOSYS) + return; +#endif + stsi(info, 15, 1, 2); } int arch_update_cpu_topology(void) @@ -238,7 +274,7 @@ int arch_update_cpu_topology(void) topology_update_polarization_simple(); return 0; } - stsi(info, 15, 1, 2); + store_topology(info); tl_to_cores(info); update_cpu_core_map(); for_each_online_cpu(cpu) { @@ -299,12 +335,24 @@ out: } __initcall(init_topology_update); +static void alloc_masks(struct tl_info *info, struct mask_info *mask, int offset) +{ + int i, nr_masks; + + nr_masks = info->mag[NR_MAG - offset]; + for (i = 0; i < info->mnest - offset; i++) + nr_masks *= info->mag[NR_MAG - offset - 1 - i]; + nr_masks = max(nr_masks, 1); + for (i = 0; i < nr_masks; i++) { + mask->next = alloc_bootmem(sizeof(struct mask_info)); + mask = mask->next; + } +} + void __init s390_init_cpu_topology(void) { unsigned long long facility_bits; struct tl_info *info; - struct core_info *core; - int nr_cores; int i; if (stfle(&facility_bits, 1) <= 0) @@ -315,25 +363,13 @@ void __init s390_init_cpu_topology(void) tl_info = alloc_bootmem_pages(PAGE_SIZE); info = tl_info; - stsi(info, 15, 1, 2); - - nr_cores = info->mag[NR_MAG - 2]; - for (i = 0; i < info->mnest - 2; i++) - nr_cores *= info->mag[NR_MAG - 3 - i]; - + store_topology(info); pr_info("The CPU configuration topology of the machine is:"); for (i = 0; i < NR_MAG; i++) printk(" %d", info->mag[i]); printk(" / %d\n", info->mnest); - - core = &core_info; - for (i = 0; i < nr_cores; i++) { - core->next = alloc_bootmem(sizeof(struct core_info)); - core = core->next; - if (!core) - goto error; - } - return; -error: - machine_has_topology = 0; + alloc_masks(info, &core_info, 2); +#ifdef CONFIG_SCHED_BOOK + alloc_masks(info, &book_info, 3); +#endif } diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index eec054484419..6fbc6f3fbdf2 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -3,6 +3,6 @@ # obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o \ - page-states.o + page-states.o gup.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index a9550dca3e4b..c66ffd8dbbb7 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -23,7 +23,10 @@ #include <asm/pgalloc.h> #include <asm/diag.h> -static char *sender = "VMRMSVM"; +#ifdef CONFIG_CMM_IUCV +static char *cmm_default_sender = "VMRMSVM"; +#endif +static char *sender; module_param(sender, charp, 0400); MODULE_PARM_DESC(sender, "Guest name that may send SMSG messages (default VMRMSVM)"); @@ -440,6 +443,8 @@ static int __init cmm_init(void) int len = strlen(sender); while (len--) sender[len] = toupper(sender[len]); + } else { + sender = cmm_default_sender; } rc = smsg_register_callback(SMSG_PREFIX, cmm_smsg_target); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 2505b2ea0ef1..bae2c282221c 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -52,6 +52,19 @@ #define VM_FAULT_BADMAP 0x020000 #define VM_FAULT_BADACCESS 0x040000 +static unsigned long store_indication; + +void fault_init(void) +{ + unsigned long long facility_list[2]; + + if (stfle(facility_list, 2) < 2) + return; + if ((facility_list[0] & (1ULL << 61)) && + (facility_list[1] & (1ULL << 52))) + store_indication = 0xc00; +} + static inline int notify_page_fault(struct pt_regs *regs) { int ret = 0; @@ -199,14 +212,21 @@ static noinline void do_sigbus(struct pt_regs *regs, long int_code, unsigned long trans_exc_code) { struct task_struct *tsk = current; + unsigned long address; + struct siginfo si; /* * Send a sigbus, regardless of whether we were in kernel * or user mode. */ - tsk->thread.prot_addr = trans_exc_code & __FAIL_ADDR_MASK; + address = trans_exc_code & __FAIL_ADDR_MASK; + tsk->thread.prot_addr = address; tsk->thread.trap_no = int_code; - force_sig(SIGBUS, tsk); + si.si_signo = SIGBUS; + si.si_errno = 0; + si.si_code = BUS_ADRERR; + si.si_addr = (void __user *) address; + force_sig_info(SIGBUS, &si, tsk); } #ifdef CONFIG_S390_EXEC_PROTECT @@ -266,10 +286,11 @@ static noinline void do_fault_error(struct pt_regs *regs, long int_code, if (fault & VM_FAULT_OOM) pagefault_out_of_memory(); else if (fault & VM_FAULT_SIGBUS) { - do_sigbus(regs, int_code, trans_exc_code); /* Kernel mode? Handle exceptions or die */ if (!(regs->psw.mask & PSW_MASK_PSTATE)) do_no_context(regs, int_code, trans_exc_code); + else + do_sigbus(regs, int_code, trans_exc_code); } else BUG(); break; @@ -294,7 +315,7 @@ static inline int do_exception(struct pt_regs *regs, int access, struct mm_struct *mm; struct vm_area_struct *vma; unsigned long address; - int fault; + int fault, write; if (notify_page_fault(regs)) return 0; @@ -348,8 +369,10 @@ static inline int do_exception(struct pt_regs *regs, int access, * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(mm, vma, address, - (access == VM_WRITE) ? FAULT_FLAG_WRITE : 0); + write = (access == VM_WRITE || + (trans_exc_code & store_indication) == 0x400) ? + FAULT_FLAG_WRITE : 0; + fault = handle_mm_fault(mm, vma, address, write); if (unlikely(fault & VM_FAULT_ERROR)) goto out_up; diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c new file mode 100644 index 000000000000..38e641cdd977 --- /dev/null +++ b/arch/s390/mm/gup.c @@ -0,0 +1,225 @@ +/* + * Lockless get_user_pages_fast for s390 + * + * Copyright IBM Corp. 2010 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/vmstat.h> +#include <linux/pagemap.h> +#include <linux/rwsem.h> +#include <asm/pgtable.h> + +/* + * The performance critical leaf functions are made noinline otherwise gcc + * inlines everything into a single function which results in too much + * register pressure. + */ +static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) +{ + unsigned long mask, result; + pte_t *ptep, pte; + struct page *page; + + result = write ? 0 : _PAGE_RO; + mask = result | _PAGE_INVALID | _PAGE_SPECIAL; + + ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr); + do { + pte = *ptep; + barrier(); + if ((pte_val(pte) & mask) != result) + return 0; + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + page = pte_page(pte); + if (!page_cache_get_speculative(page)) + return 0; + if (unlikely(pte_val(pte) != pte_val(*ptep))) { + put_page(page); + return 0; + } + pages[*nr] = page; + (*nr)++; + + } while (ptep++, addr += PAGE_SIZE, addr != end); + + return 1; +} + +static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) +{ + unsigned long mask, result; + struct page *head, *page; + int refs; + + result = write ? 0 : _SEGMENT_ENTRY_RO; + mask = result | _SEGMENT_ENTRY_INV; + if ((pmd_val(pmd) & mask) != result) + return 0; + VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT)); + + refs = 0; + head = pmd_page(pmd); + page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); + do { + VM_BUG_ON(compound_head(page) != head); + pages[*nr] = page; + (*nr)++; + page++; + refs++; + } while (addr += PAGE_SIZE, addr != end); + + if (!page_cache_add_speculative(head, refs)) { + *nr -= refs; + return 0; + } + + if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) { + *nr -= refs; + while (refs--) + put_page(head); + } + + return 1; +} + + +static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) +{ + unsigned long next; + pmd_t *pmdp, pmd; + + pmdp = (pmd_t *) pudp; +#ifdef CONFIG_64BIT + if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) + pmdp = (pmd_t *) pud_deref(pud); + pmdp += pmd_index(addr); +#endif + do { + pmd = *pmdp; + barrier(); + next = pmd_addr_end(addr, end); + if (pmd_none(pmd)) + return 0; + if (unlikely(pmd_huge(pmd))) { + if (!gup_huge_pmd(pmdp, pmd, addr, next, + write, pages, nr)) + return 0; + } else if (!gup_pte_range(pmdp, pmd, addr, next, + write, pages, nr)) + return 0; + } while (pmdp++, addr = next, addr != end); + + return 1; +} + +static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) +{ + unsigned long next; + pud_t *pudp, pud; + + pudp = (pud_t *) pgdp; +#ifdef CONFIG_64BIT + if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) + pudp = (pud_t *) pgd_deref(pgd); + pudp += pud_index(addr); +#endif + do { + pud = *pudp; + barrier(); + next = pud_addr_end(addr, end); + if (pud_none(pud)) + return 0; + if (!gup_pmd_range(pudp, pud, addr, next, write, pages, nr)) + return 0; + } while (pudp++, addr = next, addr != end); + + return 1; +} + +/** + * get_user_pages_fast() - pin user pages in memory + * @start: starting user address + * @nr_pages: number of pages from start to pin + * @write: whether pages will be written to + * @pages: array that receives pointers to the pages pinned. + * Should be at least nr_pages long. + * + * Attempt to pin user pages in memory without taking mm->mmap_sem. + * If not successful, it will fall back to taking the lock and + * calling get_user_pages(). + * + * Returns number of pages pinned. This may be fewer than the number + * requested. If nr_pages is 0 or negative, returns 0. If no pages + * were pinned, returns -errno. + */ +int get_user_pages_fast(unsigned long start, int nr_pages, int write, + struct page **pages) +{ + struct mm_struct *mm = current->mm; + unsigned long addr, len, end; + unsigned long next; + pgd_t *pgdp, pgd; + int nr = 0; + + start &= PAGE_MASK; + addr = start; + len = (unsigned long) nr_pages << PAGE_SHIFT; + end = start + len; + if (end < start) + goto slow_irqon; + + /* + * local_irq_disable() doesn't prevent pagetable teardown, but does + * prevent the pagetables from being freed on s390. + * + * So long as we atomically load page table pointers versus teardown, + * we can follow the address down to the the page and take a ref on it. + */ + local_irq_disable(); + pgdp = pgd_offset(mm, addr); + do { + pgd = *pgdp; + barrier(); + next = pgd_addr_end(addr, end); + if (pgd_none(pgd)) + goto slow; + if (!gup_pud_range(pgdp, pgd, addr, next, write, pages, &nr)) + goto slow; + } while (pgdp++, addr = next, addr != end); + local_irq_enable(); + + VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); + return nr; + + { + int ret; +slow: + local_irq_enable(); +slow_irqon: + /* Try to get the remaining pages with get_user_pages */ + start += nr << PAGE_SHIFT; + pages += nr; + + down_read(&mm->mmap_sem); + ret = get_user_pages(current, mm, start, + (end - start) >> PAGE_SHIFT, write, 0, pages, NULL); + up_read(&mm->mmap_sem); + + /* Have to be a bit careful with return values */ + if (nr > 0) { + if (ret < 0) + ret = nr; + else + ret += nr; + } + + return ret; + } +} diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index f28c43d2f61d..639cd21f2218 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -68,7 +68,7 @@ void arch_release_hugepage(struct page *page) ptep = (pte_t *) page[1].index; if (!ptep) return; - pte_free(&init_mm, ptep); + page_table_free(&init_mm, (unsigned long *) ptep); page[1].index = 0; } diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 30eb6d02ddb8..33f7a08646e0 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -38,13 +38,54 @@ #include <asm/tlbflush.h> #include <asm/sections.h> -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); - pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE))); -char empty_zero_page[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE))); +unsigned long empty_zero_page, zero_page_mask; EXPORT_SYMBOL(empty_zero_page); +static unsigned long setup_zero_pages(void) +{ + struct cpuid cpu_id; + unsigned int order; + unsigned long size; + struct page *page; + int i; + + get_cpu_id(&cpu_id); + switch (cpu_id.machine) { + case 0x9672: /* g5 */ + case 0x2064: /* z900 */ + case 0x2066: /* z900 */ + case 0x2084: /* z990 */ + case 0x2086: /* z990 */ + case 0x2094: /* z9-109 */ + case 0x2096: /* z9-109 */ + order = 0; + break; + case 0x2097: /* z10 */ + case 0x2098: /* z10 */ + default: + order = 2; + break; + } + + empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); + if (!empty_zero_page) + panic("Out of memory in setup_zero_pages"); + + page = virt_to_page((void *) empty_zero_page); + split_page(page, order); + for (i = 1 << order; i > 0; i--) { + SetPageReserved(page); + page++; + } + + size = PAGE_SIZE << order; + zero_page_mask = (size - 1) & PAGE_MASK; + + return 1UL << order; +} + /* * paging_init() sets up the page tables */ @@ -84,6 +125,7 @@ void __init paging_init(void) #endif max_zone_pfns[ZONE_NORMAL] = max_low_pfn; free_area_init_nodes(max_zone_pfns); + fault_init(); } void __init mem_init(void) @@ -93,14 +135,12 @@ void __init mem_init(void) max_mapnr = num_physpages = max_low_pfn; high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); - /* clear the zero-page */ - memset(empty_zero_page, 0, PAGE_SIZE); - /* Setup guest page hinting */ cmma_init(); /* this will put all low memory onto the freelists */ totalram_pages += free_all_bootmem(); + totalram_pages -= setup_zero_pages(); /* Setup zeroed pages. */ reservedpages = 0; diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 8d999249d357..19338d228c9b 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -15,6 +15,7 @@ #include <linux/spinlock.h> #include <linux/module.h> #include <linux/quicklist.h> +#include <linux/rcupdate.h> #include <asm/system.h> #include <asm/pgtable.h> @@ -23,6 +24,67 @@ #include <asm/tlbflush.h> #include <asm/mmu_context.h> +struct rcu_table_freelist { + struct rcu_head rcu; + struct mm_struct *mm; + unsigned int pgt_index; + unsigned int crst_index; + unsigned long *table[0]; +}; + +#define RCU_FREELIST_SIZE \ + ((PAGE_SIZE - sizeof(struct rcu_table_freelist)) \ + / sizeof(unsigned long)) + +DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +static DEFINE_PER_CPU(struct rcu_table_freelist *, rcu_table_freelist); + +static void __page_table_free(struct mm_struct *mm, unsigned long *table); +static void __crst_table_free(struct mm_struct *mm, unsigned long *table); + +static struct rcu_table_freelist *rcu_table_freelist_get(struct mm_struct *mm) +{ + struct rcu_table_freelist **batchp = &__get_cpu_var(rcu_table_freelist); + struct rcu_table_freelist *batch = *batchp; + + if (batch) + return batch; + batch = (struct rcu_table_freelist *) __get_free_page(GFP_ATOMIC); + if (batch) { + batch->mm = mm; + batch->pgt_index = 0; + batch->crst_index = RCU_FREELIST_SIZE; + *batchp = batch; + } + return batch; +} + +static void rcu_table_freelist_callback(struct rcu_head *head) +{ + struct rcu_table_freelist *batch = + container_of(head, struct rcu_table_freelist, rcu); + + while (batch->pgt_index > 0) + __page_table_free(batch->mm, batch->table[--batch->pgt_index]); + while (batch->crst_index < RCU_FREELIST_SIZE) + __crst_table_free(batch->mm, batch->table[batch->crst_index++]); + free_page((unsigned long) batch); +} + +void rcu_table_freelist_finish(void) +{ + struct rcu_table_freelist *batch = __get_cpu_var(rcu_table_freelist); + + if (!batch) + return; + call_rcu(&batch->rcu, rcu_table_freelist_callback); + __get_cpu_var(rcu_table_freelist) = NULL; +} + +static void smp_sync(void *arg) +{ +} + #ifndef CONFIG_64BIT #define ALLOC_ORDER 1 #define TABLES_PER_PAGE 4 @@ -78,25 +140,55 @@ unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec) } page->index = page_to_phys(shadow); } - spin_lock(&mm->context.list_lock); + spin_lock_bh(&mm->context.list_lock); list_add(&page->lru, &mm->context.crst_list); - spin_unlock(&mm->context.list_lock); + spin_unlock_bh(&mm->context.list_lock); return (unsigned long *) page_to_phys(page); } -void crst_table_free(struct mm_struct *mm, unsigned long *table) +static void __crst_table_free(struct mm_struct *mm, unsigned long *table) { unsigned long *shadow = get_shadow_table(table); - struct page *page = virt_to_page(table); - spin_lock(&mm->context.list_lock); - list_del(&page->lru); - spin_unlock(&mm->context.list_lock); if (shadow) free_pages((unsigned long) shadow, ALLOC_ORDER); free_pages((unsigned long) table, ALLOC_ORDER); } +void crst_table_free(struct mm_struct *mm, unsigned long *table) +{ + struct page *page = virt_to_page(table); + + spin_lock_bh(&mm->context.list_lock); + list_del(&page->lru); + spin_unlock_bh(&mm->context.list_lock); + __crst_table_free(mm, table); +} + +void crst_table_free_rcu(struct mm_struct *mm, unsigned long *table) +{ + struct rcu_table_freelist *batch; + struct page *page = virt_to_page(table); + + spin_lock_bh(&mm->context.list_lock); + list_del(&page->lru); + spin_unlock_bh(&mm->context.list_lock); + if (atomic_read(&mm->mm_users) < 2 && + cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { + __crst_table_free(mm, table); + return; + } + batch = rcu_table_freelist_get(mm); + if (!batch) { + smp_call_function(smp_sync, NULL, 1); + __crst_table_free(mm, table); + return; + } + batch->table[--batch->crst_index] = table; + if (batch->pgt_index >= batch->crst_index) + rcu_table_freelist_finish(); +} + #ifdef CONFIG_64BIT int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) { @@ -108,7 +200,7 @@ repeat: table = crst_table_alloc(mm, mm->context.noexec); if (!table) return -ENOMEM; - spin_lock(&mm->page_table_lock); + spin_lock_bh(&mm->page_table_lock); if (mm->context.asce_limit < limit) { pgd = (unsigned long *) mm->pgd; if (mm->context.asce_limit <= (1UL << 31)) { @@ -130,7 +222,7 @@ repeat: mm->task_size = mm->context.asce_limit; table = NULL; } - spin_unlock(&mm->page_table_lock); + spin_unlock_bh(&mm->page_table_lock); if (table) crst_table_free(mm, table); if (mm->context.asce_limit < limit) @@ -182,7 +274,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) unsigned long bits; bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; - spin_lock(&mm->context.list_lock); + spin_lock_bh(&mm->context.list_lock); page = NULL; if (!list_empty(&mm->context.pgtable_list)) { page = list_first_entry(&mm->context.pgtable_list, @@ -191,7 +283,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) page = NULL; } if (!page) { - spin_unlock(&mm->context.list_lock); + spin_unlock_bh(&mm->context.list_lock); page = alloc_page(GFP_KERNEL|__GFP_REPEAT); if (!page) return NULL; @@ -202,7 +294,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) clear_table_pgstes(table); else clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); - spin_lock(&mm->context.list_lock); + spin_lock_bh(&mm->context.list_lock); list_add(&page->lru, &mm->context.pgtable_list); } table = (unsigned long *) page_to_phys(page); @@ -213,10 +305,25 @@ unsigned long *page_table_alloc(struct mm_struct *mm) page->flags |= bits; if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) list_move_tail(&page->lru, &mm->context.pgtable_list); - spin_unlock(&mm->context.list_lock); + spin_unlock_bh(&mm->context.list_lock); return table; } +static void __page_table_free(struct mm_struct *mm, unsigned long *table) +{ + struct page *page; + unsigned long bits; + + bits = ((unsigned long) table) & 15; + table = (unsigned long *)(((unsigned long) table) ^ bits); + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + page->flags ^= bits; + if (!(page->flags & FRAG_MASK)) { + pgtable_page_dtor(page); + __free_page(page); + } +} + void page_table_free(struct mm_struct *mm, unsigned long *table) { struct page *page; @@ -225,7 +332,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - spin_lock(&mm->context.list_lock); + spin_lock_bh(&mm->context.list_lock); page->flags ^= bits; if (page->flags & FRAG_MASK) { /* Page now has some free pgtable fragments. */ @@ -234,18 +341,48 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) } else /* All fragments of the 4K page have been freed. */ list_del(&page->lru); - spin_unlock(&mm->context.list_lock); + spin_unlock_bh(&mm->context.list_lock); if (page) { pgtable_page_dtor(page); __free_page(page); } } +void page_table_free_rcu(struct mm_struct *mm, unsigned long *table) +{ + struct rcu_table_freelist *batch; + struct page *page; + unsigned long bits; + + if (atomic_read(&mm->mm_users) < 2 && + cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { + page_table_free(mm, table); + return; + } + batch = rcu_table_freelist_get(mm); + if (!batch) { + smp_call_function(smp_sync, NULL, 1); + page_table_free(mm, table); + return; + } + bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; + bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + spin_lock_bh(&mm->context.list_lock); + /* Delayed freeing with rcu prevents reuse of pgtable fragments */ + list_del_init(&page->lru); + spin_unlock_bh(&mm->context.list_lock); + table = (unsigned long *)(((unsigned long) table) | bits); + batch->table[batch->pgt_index++] = table; + if (batch->pgt_index >= batch->crst_index) + rcu_table_freelist_finish(); +} + void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) { struct page *page; - spin_lock(&mm->context.list_lock); + spin_lock_bh(&mm->context.list_lock); /* Free shadow region and segment tables. */ list_for_each_entry(page, &mm->context.crst_list, lru) if (page->index) { @@ -255,7 +392,7 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) /* "Free" second halves of page tables. */ list_for_each_entry(page, &mm->context.pgtable_list, lru) page->flags &= ~SECOND_HALVES; - spin_unlock(&mm->context.list_lock); + spin_unlock_bh(&mm->context.list_lock); mm->context.noexec = 0; update_mm(mm, tsk); } |