diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2013-07-19 13:01:49 +1000 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2013-07-19 13:01:49 +1000 |
commit | a25f6f2e88b6f2bb492f22b8b91028604d51bfb2 (patch) | |
tree | 2b6744bfab0871da7942bb9864458d989e89df1e | |
parent | d03792f9db9b892f494d3aa19d767ddf0365d1ff (diff) | |
parent | 1f02e5b30607cbef3c8b4d1376cba3689c399288 (diff) |
Merge branch 'akpm-current/current'
131 files changed, 4218 insertions, 1422 deletions
diff --git a/.gitignore b/.gitignore index 3b8b9b33be38..7e9932e55475 100644 --- a/.gitignore +++ b/.gitignore @@ -29,6 +29,7 @@ modules.builtin *.bz2 *.lzma *.xz +*.lz4 *.lzo *.patch *.gcno diff --git a/Documentation/development-process/2.Process b/Documentation/development-process/2.Process index 4823577c6509..2e0617936e8f 100644 --- a/Documentation/development-process/2.Process +++ b/Documentation/development-process/2.Process @@ -276,7 +276,7 @@ mainline get there via -mm. The current -mm patch is available in the "mmotm" (-mm of the moment) directory at: - http://userweb.kernel.org/~akpm/mmotm/ + http://www.ozlabs.org/~akpm/mmotm/ Use of the MMOTM tree is likely to be a frustrating experience, though; there is a definite chance that it will not even compile. @@ -287,7 +287,7 @@ the mainline is expected to look like after the next merge window closes. Linux-next trees are announced on the linux-kernel and linux-next mailing lists when they are assembled; they can be downloaded from: - http://www.kernel.org/pub/linux/kernel/people/sfr/linux-next/ + http://www.kernel.org/pub/linux/kernel/next/ Some information about linux-next has been gathered at: diff --git a/MAINTAINERS b/MAINTAINERS index 5d3facfd7899..3034c1fdd4e8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2871,7 +2871,7 @@ F: drivers/media/usb/dvb-usb-v2/dvb_usb* F: drivers/media/usb/dvb-usb-v2/usb_urb.c DYNAMIC DEBUG -M: Jason Baron <jbaron@redhat.com> +M: Jason Baron <jbaron@akamai.com> S: Maintained F: lib/dynamic_debug.c F: include/linux/dynamic_debug.h @@ -7642,6 +7642,11 @@ W: http://tifmxx.berlios.de/ S: Maintained F: drivers/memstick/host/tifm_ms.c +SONY MEMORYSTICK STANDARD SUPPORT +M: Maxim Levitsky <maximlevitsky@gmail.com> +S: Maintained +F: drivers/memstick/core/ms_block.* + SOUND M: Jaroslav Kysela <perex@perex.cz> M: Takashi Iwai <tiwai@suse.de> diff --git a/arch/alpha/lib/csum_partial_copy.c b/arch/alpha/lib/csum_partial_copy.c index 40736da9bea8..ffb19b7da999 100644 --- a/arch/alpha/lib/csum_partial_copy.c +++ b/arch/alpha/lib/csum_partial_copy.c @@ -338,6 +338,11 @@ csum_partial_copy_from_user(const void __user *src, void *dst, int len, unsigned long doff = 7 & (unsigned long) dst; if (len) { + if (!access_ok(VERIFY_READ, src, len)) { + *errp = -EFAULT; + memset(dst, 0, len); + return sum; + } if (!doff) { if (!soff) checksum = csum_partial_cfu_aligned( diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c index 3d0ddbc005fe..71368850dfc0 100644 --- a/arch/sparc/kernel/sys_sparc32.c +++ b/arch/sparc/kernel/sys_sparc32.c @@ -169,10 +169,10 @@ COMPAT_SYSCALL_DEFINE5(rt_sigaction, int, sig, new_ka.ka_restorer = restorer; ret = get_user(u_handler, &act->sa_handler); new_ka.sa.sa_handler = compat_ptr(u_handler); - ret |= __copy_from_user(&set32, &act->sa_mask, sizeof(compat_sigset_t)); + ret |= copy_from_user(&set32, &act->sa_mask, sizeof(compat_sigset_t)); sigset_from_compat(&new_ka.sa.sa_mask, &set32); - ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags); - ret |= __get_user(u_restorer, &act->sa_restorer); + ret |= get_user(new_ka.sa.sa_flags, &act->sa_flags); + ret |= get_user(u_restorer, &act->sa_restorer); new_ka.sa.sa_restorer = compat_ptr(u_restorer); if (ret) return -EFAULT; @@ -183,9 +183,9 @@ COMPAT_SYSCALL_DEFINE5(rt_sigaction, int, sig, if (!ret && oact) { sigset_to_compat(&set32, &old_ka.sa.sa_mask); ret = put_user(ptr_to_compat(old_ka.sa.sa_handler), &oact->sa_handler); - ret |= __copy_to_user(&oact->sa_mask, &set32, sizeof(compat_sigset_t)); - ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags); - ret |= __put_user(ptr_to_compat(old_ka.sa.sa_restorer), &oact->sa_restorer); + ret |= copy_to_user(&oact->sa_mask, &set32, sizeof(compat_sigset_t)); + ret |= put_user(old_ka.sa.sa_flags, &oact->sa_flags); + ret |= put_user(ptr_to_compat(old_ka.sa.sa_restorer), &oact->sa_restorer); if (ret) ret = -EFAULT; } diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index cccd07fa5e3a..b8e9224f0b45 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h @@ -17,6 +17,8 @@ extern unsigned long pci_mem_start; extern int e820_any_mapped(u64 start, u64 end, unsigned type); extern int e820_all_mapped(u64 start, u64 end, unsigned type); extern void e820_add_region(u64 start, u64 size, int type); +extern void e820_add_limit_region(u64 start, u64 size, int type); +extern void e820_adjust_region(u64 *start, u64 *size); extern void e820_print_map(char *who); extern int sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, u32 *pnr_map); diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index d32abeabbda5..0d5bb689649a 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -47,6 +47,7 @@ unsigned long pci_mem_start = 0xaeedbabe; #ifdef CONFIG_PCI EXPORT_SYMBOL(pci_mem_start); #endif +static u64 mem_limit = ~0ULL; /* * This function checks if any part of the range <start,end> is mapped @@ -108,7 +109,7 @@ int __init e820_all_mapped(u64 start, u64 end, unsigned type) * Add a memory region to the kernel e820 map. */ static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size, - int type) + int type, bool limited) { int x = e820x->nr_map; @@ -119,6 +120,22 @@ static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size, return; } + if (limited) { + if (start >= mem_limit) { + printk(KERN_ERR "e820: ignoring [mem %#010llx-%#010llx]\n", + (unsigned long long)start, + (unsigned long long)(start + size - 1)); + return; + } + + if (mem_limit - start < size) { + printk(KERN_ERR "e820: ignoring [mem %#010llx-%#010llx]\n", + (unsigned long long)mem_limit, + (unsigned long long)(start + size - 1)); + size = mem_limit - start; + } + } + e820x->map[x].addr = start; e820x->map[x].size = size; e820x->map[x].type = type; @@ -127,7 +144,37 @@ static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size, void __init e820_add_region(u64 start, u64 size, int type) { - __e820_add_region(&e820, start, size, type); + __e820_add_region(&e820, start, size, type, false); +} + +/* + * do_add_efi_memmap() calls this function(). + * + * Note: BOOT_SERVICES_{CODE,DATA} regions on some efi machines are marked + * as E820_RAM, and they are needed to be mapped. Please use e820_add_region() + * to add BOOT_SERVICES_{CODE,DATA} regions. + */ +void __init e820_add_limit_region(u64 start, u64 size, int type) +{ + /* + * efi_init() is called after finish_e820_parsing(), so we should + * check whether [start, start + size) contains address above + * mem_limit if the type is E820_RAM. + */ + __e820_add_region(&e820, start, size, type, type == E820_RAM); +} + +void __init e820_adjust_region(u64 *start, u64 *size) +{ + if (*start >= mem_limit) { + *size = 0; + return; + } + + if (mem_limit - *start < *size) + *size = mem_limit - *start; + + return; } static void __init e820_print_type(u32 type) @@ -455,8 +502,9 @@ static u64 __init __e820_update_range(struct e820map *e820x, u64 start, /* new range is totally covered? */ if (ei->addr < start && ei_end > end) { - __e820_add_region(e820x, start, size, new_type); - __e820_add_region(e820x, end, ei_end - end, ei->type); + __e820_add_region(e820x, start, size, new_type, false); + __e820_add_region(e820x, end, ei_end - end, ei->type, + false); ei->size = start - ei->addr; real_updated_size += size; continue; @@ -469,7 +517,7 @@ static u64 __init __e820_update_range(struct e820map *e820x, u64 start, continue; __e820_add_region(e820x, final_start, final_end - final_start, - new_type); + new_type, false); real_updated_size += final_end - final_start; @@ -809,7 +857,7 @@ static int userdef __initdata; /* "mem=nopentium" disables the 4MB page tables. */ static int __init parse_memopt(char *p) { - u64 mem_size; + char *oldp; if (!p) return -EINVAL; @@ -825,11 +873,11 @@ static int __init parse_memopt(char *p) } userdef = 1; - mem_size = memparse(p, &p); + oldp = p; + mem_limit = memparse(p, &p); /* don't remove all of memory when handling "mem={invalid}" param */ - if (mem_size == 0) + if (mem_limit == 0 || p == oldp) return -EINVAL; - e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1); return 0; } @@ -895,6 +943,12 @@ early_param("memmap", parse_memmap_opt); void __init finish_e820_parsing(void) { + if (mem_limit != ~0ULL) { + userdef = 1; + e820_remove_range(mem_limit, ULLONG_MAX - mem_limit, + E820_RAM, 1); + } + if (userdef) { u32 nr = e820.nr_map; diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 282375f13c7e..f030cbe669a5 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -103,6 +103,7 @@ static void flush_tlb_func(void *info) if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm)) return; + count_vm_event(NR_TLB_REMOTE_FLUSH_RECEIVED); if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { if (f->flush_end == TLB_FLUSH_ALL) local_flush_tlb(); @@ -130,6 +131,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask, info.flush_start = start; info.flush_end = end; + count_vm_event(NR_TLB_REMOTE_FLUSH); if (is_uv_system()) { unsigned int cpu; @@ -149,6 +151,7 @@ void flush_tlb_current_task(void) preempt_disable(); + count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); local_flush_tlb(); if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); @@ -211,16 +214,19 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, act_entries = mm->total_vm > tlb_entries ? tlb_entries : mm->total_vm; /* tlb_flushall_shift is on balance point, details in commit log */ - if ((end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift) + if ((end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift) { + count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); local_flush_tlb(); - else { + } else { if (has_large_page(mm, start, end)) { local_flush_tlb(); goto flush_all; } /* flush range by one by one 'invlpg' */ - for (addr = start; addr < end; addr += PAGE_SIZE) + for (addr = start; addr < end; addr += PAGE_SIZE) { + count_vm_event(NR_TLB_LOCAL_FLUSH_ONE); __flush_tlb_single(addr); + } if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) @@ -256,6 +262,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long start) static void do_flush_tlb_all(void *info) { + count_vm_event(NR_TLB_REMOTE_FLUSH_RECEIVED); __flush_tlb_all(); if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) leave_mm(smp_processor_id()); @@ -263,6 +270,7 @@ static void do_flush_tlb_all(void *info) void flush_tlb_all(void) { + count_vm_event(NR_TLB_REMOTE_FLUSH); on_each_cpu(do_flush_tlb_all, NULL, 1); } @@ -272,8 +280,10 @@ static void do_kernel_range_flush(void *info) unsigned long addr; /* flush range by one by one 'invlpg' */ - for (addr = f->flush_start; addr < f->flush_end; addr += PAGE_SIZE) + for (addr = f->flush_start; addr < f->flush_end; addr += PAGE_SIZE) { + count_vm_event(NR_TLB_LOCAL_FLUSH_ONE_KERNEL); __flush_tlb_single(addr); + } } void flush_tlb_kernel_range(unsigned long start, unsigned long end) diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c index 643b8b5eee86..8244f5ec2f4c 100644 --- a/arch/x86/platform/ce4100/ce4100.c +++ b/arch/x86/platform/ce4100/ce4100.c @@ -12,6 +12,7 @@ #include <linux/kernel.h> #include <linux/irq.h> #include <linux/module.h> +#include <linux/reboot.h> #include <linux/serial_reg.h> #include <linux/serial_8250.h> #include <linux/reboot.h> diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 90f6ed127096..f0d732ce9134 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -343,10 +343,17 @@ static void __init do_add_efi_memmap(void) int e820_type; switch (md->type) { - case EFI_LOADER_CODE: - case EFI_LOADER_DATA: case EFI_BOOT_SERVICES_CODE: case EFI_BOOT_SERVICES_DATA: + /* EFI_BOOT_SERVICES_{CODE,DATA} needs to be mapped */ + if (md->attribute & EFI_MEMORY_WB) + e820_type = E820_RAM; + else + e820_type = E820_RESERVED; + e820_add_region(start, size, e820_type); + continue; + case EFI_LOADER_CODE: + case EFI_LOADER_DATA: case EFI_CONVENTIONAL_MEMORY: if (md->attribute & EFI_MEMORY_WB) e820_type = E820_RAM; @@ -371,7 +378,7 @@ static void __init do_add_efi_memmap(void) e820_type = E820_RESERVED; break; } - e820_add_region(start, size, e820_type); + e820_add_limit_region(start, size, e820_type); } sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); } @@ -479,6 +486,8 @@ void __init efi_free_boot_services(void) md->type != EFI_BOOT_SERVICES_DATA) continue; + e820_adjust_region(&start, &size); + /* Could not reserve boot area */ if (!size) continue; diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index 7e5d474dc6ba..fbd5a67cb773 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -70,7 +70,7 @@ static int compat_hdio_getgeo(struct gendisk *disk, struct block_device *bdev, return ret; ret = copy_to_user(ugeo, &geo, 4); - ret |= __put_user(geo.start, &ugeo->start); + ret |= put_user(geo.start, &ugeo->start); if (ret) ret = -EFAULT; diff --git a/block/genhd.c b/block/genhd.c index dadf42b454a3..01b481363711 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -849,7 +849,7 @@ static int show_partition(struct seq_file *seqf, void *v) char buf[BDEVNAME_SIZE]; /* Don't show non-partitionable removeable devices or empty devices */ - if (!get_capacity(sgp) || (!disk_max_parts(sgp) && + if (!get_capacity(sgp) || (!(disk_max_parts(sgp) > 1) && (sgp->flags & GENHD_FL_REMOVABLE))) return 0; if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO) diff --git a/drivers/accessibility/braille/braille_console.c b/drivers/accessibility/braille/braille_console.c index d21167bfc865..dc34a5b8bcee 100644 --- a/drivers/accessibility/braille/braille_console.c +++ b/drivers/accessibility/braille/braille_console.c @@ -359,6 +359,9 @@ int braille_register_console(struct console *console, int index, char *console_options, char *braille_options) { int ret; + + if (!(console->flags & CON_BRL)) + return 0; if (!console_options) /* Only support VisioBraille for now */ console_options = "57600o8"; @@ -374,15 +377,17 @@ int braille_register_console(struct console *console, int index, braille_co = console; register_keyboard_notifier(&keyboard_notifier_block); register_vt_notifier(&vt_notifier_block); - return 0; + return 1; } int braille_unregister_console(struct console *console) { if (braille_co != console) return -EINVAL; + if (!(console->flags & CON_BRL)) + return 0; unregister_keyboard_notifier(&keyboard_notifier_block); unregister_vt_notifier(&vt_notifier_block); braille_co = NULL; - return 0; + return 1; } diff --git a/drivers/atm/he.c b/drivers/atm/he.c index 507362a76a73..80f9743f596a 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c @@ -2872,15 +2872,4 @@ static struct pci_driver he_driver = { .id_table = he_pci_tbl, }; -static int __init he_init(void) -{ - return pci_register_driver(&he_driver); -} - -static void __exit he_cleanup(void) -{ - pci_unregister_driver(&he_driver); -} - -module_init(he_init); -module_exit(he_cleanup); +module_pci_driver(he_driver); diff --git a/drivers/base/node.c b/drivers/base/node.c index 7616a77ca322..bc9f43bf7e29 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -125,13 +125,7 @@ static ssize_t node_read_meminfo(struct device *dev, nid, K(node_page_state(nid, NR_WRITEBACK)), nid, K(node_page_state(nid, NR_FILE_PAGES)), nid, K(node_page_state(nid, NR_FILE_MAPPED)), -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - nid, K(node_page_state(nid, NR_ANON_PAGES) - + node_page_state(nid, NR_ANON_TRANSPARENT_HUGEPAGES) * - HPAGE_PMD_NR), -#else nid, K(node_page_state(nid, NR_ANON_PAGES)), -#endif nid, K(node_page_state(nid, NR_SHMEM)), nid, node_page_state(nid, NR_KERNEL_STACK) * THREAD_SIZE / 1024, diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index eb760a218da4..fa0affb699b4 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -14,7 +14,7 @@ * of and an antecedent to, SMBIOS, which stands for System * Management BIOS. See further: http://www.dmtf.org/standards */ -static char dmi_empty_string[] = " "; +static const char dmi_empty_string[] = " "; static u16 __initdata dmi_ver; /* @@ -49,7 +49,7 @@ static const char * __init dmi_string_nosave(const struct dmi_header *dm, u8 s) return ""; } -static char * __init dmi_string(const struct dmi_header *dm, u8 s) +static const char * __init dmi_string(const struct dmi_header *dm, u8 s) { const char *bp = dmi_string_nosave(dm, s); char *str; @@ -62,8 +62,6 @@ static char * __init dmi_string(const struct dmi_header *dm, u8 s) str = dmi_alloc(len); if (str != NULL) strcpy(str, bp); - else - printk(KERN_ERR "dmi_string: cannot allocate %Zu bytes.\n", len); return str; } @@ -133,17 +131,18 @@ static int __init dmi_checksum(const u8 *buf, u8 len) return sum == 0; } -static char *dmi_ident[DMI_STRING_MAX]; +static const char *dmi_ident[DMI_STRING_MAX]; static LIST_HEAD(dmi_devices); int dmi_available; /* * Save a DMI string */ -static void __init dmi_save_ident(const struct dmi_header *dm, int slot, int string) +static void __init dmi_save_ident(const struct dmi_header *dm, int slot, + int string) { - const char *d = (const char*) dm; - char *p; + const char *d = (const char *) dm; + const char *p; if (dmi_ident[slot]) return; @@ -155,9 +154,10 @@ static void __init dmi_save_ident(const struct dmi_header *dm, int slot, int str dmi_ident[slot] = p; } -static void __init dmi_save_uuid(const struct dmi_header *dm, int slot, int index) +static void __init dmi_save_uuid(const struct dmi_header *dm, int slot, + int index) { - const u8 *d = (u8*) dm + index; + const u8 *d = (u8 *) dm + index; char *s; int is_ff = 1, is_00 = 1, i; @@ -188,12 +188,13 @@ static void __init dmi_save_uuid(const struct dmi_header *dm, int slot, int inde else sprintf(s, "%pUB", d); - dmi_ident[slot] = s; + dmi_ident[slot] = s; } -static void __init dmi_save_type(const struct dmi_header *dm, int slot, int index) +static void __init dmi_save_type(const struct dmi_header *dm, int slot, + int index) { - const u8 *d = (u8*) dm + index; + const u8 *d = (u8 *) dm + index; char *s; if (dmi_ident[slot]) @@ -216,10 +217,8 @@ static void __init dmi_save_one_device(int type, const char *name) return; dev = dmi_alloc(sizeof(*dev) + strlen(name) + 1); - if (!dev) { - printk(KERN_ERR "dmi_save_one_device: out of memory.\n"); + if (!dev) return; - } dev->type = type; strcpy((char *)(dev + 1), name); @@ -249,17 +248,14 @@ static void __init dmi_save_oem_strings_devices(const struct dmi_header *dm) struct dmi_device *dev; for (i = 1; i <= count; i++) { - char *devname = dmi_string(dm, i); + const char *devname = dmi_string(dm, i); if (devname == dmi_empty_string) continue; dev = dmi_alloc(sizeof(*dev)); - if (!dev) { - printk(KERN_ERR - "dmi_save_oem_strings_devices: out of memory.\n"); + if (!dev) break; - } dev->type = DMI_DEV_TYPE_OEM_STRING; dev->name = devname; @@ -272,21 +268,17 @@ static void __init dmi_save_oem_strings_devices(const struct dmi_header *dm) static void __init dmi_save_ipmi_device(const struct dmi_header *dm) { struct dmi_device *dev; - void * data; + void *data; data = dmi_alloc(dm->length); - if (data == NULL) { - printk(KERN_ERR "dmi_save_ipmi_device: out of memory.\n"); + if (data == NULL) return; - } memcpy(data, dm, dm->length); dev = dmi_alloc(sizeof(*dev)); - if (!dev) { - printk(KERN_ERR "dmi_save_ipmi_device: out of memory.\n"); + if (!dev) return; - } dev->type = DMI_DEV_TYPE_IPMI; dev->name = "IPMI controller"; @@ -301,10 +293,9 @@ static void __init dmi_save_dev_onboard(int instance, int segment, int bus, struct dmi_dev_onboard *onboard_dev; onboard_dev = dmi_alloc(sizeof(*onboard_dev) + strlen(name) + 1); - if (!onboard_dev) { - printk(KERN_ERR "dmi_save_dev_onboard: out of memory.\n"); + if (!onboard_dev) return; - } + onboard_dev->instance = instance; onboard_dev->segment = segment; onboard_dev->bus = bus; @@ -320,7 +311,7 @@ static void __init dmi_save_dev_onboard(int instance, int segment, int bus, static void __init dmi_save_extended_devices(const struct dmi_header *dm) { - const u8 *d = (u8*) dm + 5; + const u8 *d = (u8 *) dm + 5; /* Skip disabled device */ if ((*d & 0x80) == 0) @@ -338,7 +329,7 @@ static void __init dmi_save_extended_devices(const struct dmi_header *dm) */ static void __init dmi_decode(const struct dmi_header *dm, void *dummy) { - switch(dm->type) { + switch (dm->type) { case 0: /* BIOS Information */ dmi_save_ident(dm, DMI_BIOS_VENDOR, 4); dmi_save_ident(dm, DMI_BIOS_VERSION, 5); @@ -419,6 +410,13 @@ static void __init dmi_format_ids(char *buf, size_t len) dmi_get_system_info(DMI_BIOS_DATE)); } +/* + * Check for DMI/SMBIOS headers in the system firmware image. Any + * SMBIOS header must start 16 bytes before the DMI header, so take a + * 32 byte buffer and check for DMI at offset 16 and SMBIOS at offset + * 0. If the DMI header is present, set dmi_ver accordingly (SMBIOS + * takes precedence) and return 0. Otherwise return 1. + */ static int __init dmi_present(const u8 *buf) { int smbios_ver; @@ -495,17 +493,18 @@ void __init dmi_scan_machine(void) dmi_available = 1; goto out; } - } - else { - /* - * no iounmap() for that ioremap(); it would be a no-op, but - * it's so early in setup that sucker gets confused into doing - * what it shouldn't if we actually call it. - */ + } else { p = dmi_ioremap(0xF0000, 0x10000); if (p == NULL) goto error; + /* + * Iterate over all possible DMI header addresses q. + * Maintain the 32 bytes around q in buf. On the + * first iteration, substitute zero for the + * out-of-range bytes so there is no chance of falsely + * detecting an SMBIOS header. + */ memset(buf, 0, 16); for (q = p; q < p + 0x10000; q += 16) { memcpy_fromio(buf + 16, q, 16); @@ -519,7 +518,7 @@ void __init dmi_scan_machine(void) dmi_iounmap(p, 0x10000); } error: - printk(KERN_INFO "DMI not present or invalid.\n"); + pr_info("DMI not present or invalid.\n"); out: dmi_initialized = 1; } @@ -655,7 +654,7 @@ int dmi_name_in_serial(const char *str) /** * dmi_name_in_vendors - Check if string is in the DMI system or board vendor name - * @str: Case sensitive Name + * @str: Case sensitive Name */ int dmi_name_in_vendors(const char *str) { @@ -682,13 +681,13 @@ EXPORT_SYMBOL(dmi_name_in_vendors); * A new search is initiated by passing %NULL as the @from argument. * If @from is not %NULL, searches continue from next device. */ -const struct dmi_device * dmi_find_device(int type, const char *name, +const struct dmi_device *dmi_find_device(int type, const char *name, const struct dmi_device *from) { const struct list_head *head = from ? &from->list : &dmi_devices; struct list_head *d; - for(d = head->next; d != &dmi_devices; d = d->next) { + for (d = head->next; d != &dmi_devices; d = d->next) { const struct dmi_device *dev = list_entry(d, struct dmi_device, list); diff --git a/drivers/gpu/drm/cirrus/cirrus_mode.c b/drivers/gpu/drm/cirrus/cirrus_mode.c index 60685b21cc36..379a47ea99f6 100644 --- a/drivers/gpu/drm/cirrus/cirrus_mode.c +++ b/drivers/gpu/drm/cirrus/cirrus_mode.c @@ -273,8 +273,8 @@ static int cirrus_crtc_mode_set(struct drm_crtc *crtc, sr07 |= 0x11; break; case 16: - sr07 |= 0xc1; - hdr = 0xc0; + sr07 |= 0x17; + hdr = 0xc1; break; case 24: sr07 |= 0x15; diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 3d13ca6e257f..f6f6cc7fc133 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -416,6 +416,14 @@ static void drm_fb_helper_dpms(struct fb_info *info, int dpms_mode) return; /* + * fbdev->blank can be called from irq context in case of a panic. + * Since we already have our own special panic handler which will + * restore the fbdev console mode completely, just bail out early. + */ + if (oops_in_progress) + return; + + /* * For each CRTC in this fb, turn the connectors on/off. */ drm_modeset_lock_all(dev); diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c index d97f20069d3e..4d70fb73d41b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_acpi.c +++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c @@ -46,6 +46,7 @@ bool nouveau_is_v1_dsm(void) { #define NOUVEAU_DSM_HAS_MUX 0x1 #define NOUVEAU_DSM_HAS_OPT 0x2 +#ifdef CONFIG_VGA_SWITCHEROO static const char nouveau_dsm_muid[] = { 0xA0, 0xA0, 0x95, 0x9D, 0x60, 0x00, 0x48, 0x4D, 0xB3, 0x4D, 0x7E, 0x5F, 0xEA, 0x12, 0x9F, 0xD4, @@ -337,6 +338,10 @@ void nouveau_unregister_dsm_handler(void) if (nouveau_dsm_priv.optimus_detected || nouveau_dsm_priv.dsm_detected) vga_switcheroo_unregister_handler(); } +#else +void nouveau_register_dsm_handler(void) {} +void nouveau_unregister_dsm_handler(void) {} +#endif /* retrieve the ROM in 4k blocks */ static int nouveau_rom_call(acpi_handle rom_handle, uint8_t *bios, diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 784b97cb05b0..f2ef7ef0f36f 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -383,14 +383,11 @@ static int cm_alloc_id(struct cm_id_private *cm_id_priv) { unsigned long flags; int id; - static int next_id; idr_preload(GFP_KERNEL); spin_lock_irqsave(&cm.lock, flags); - id = idr_alloc(&cm.local_id_table, cm_id_priv, next_id, 0, GFP_NOWAIT); - if (id >= 0) - next_id = max(id + 1, 0); + id = idr_alloc_cyclic(&cm.local_id_table, cm_id_priv, 0, 0, GFP_NOWAIT); spin_unlock_irqrestore(&cm.lock, flags); idr_preload_end(); diff --git a/drivers/isdn/hisax/amd7930_fn.c b/drivers/isdn/hisax/amd7930_fn.c index 1063babe1d3a..36817e0a0b94 100644 --- a/drivers/isdn/hisax/amd7930_fn.c +++ b/drivers/isdn/hisax/amd7930_fn.c @@ -314,7 +314,7 @@ Amd7930_empty_Dfifo(struct IsdnCardState *cs, int flag) t += sprintf(t, "Amd7930: empty_Dfifo cnt: %d |", cs->rcvidx); QuickHex(t, cs->rcvbuf, cs->rcvidx); - debugl1(cs, cs->dlog); + debugl1(cs, "%s", cs->dlog); } /* moves received data in sk-buffer */ memcpy(skb_put(skb, cs->rcvidx), cs->rcvbuf, cs->rcvidx); @@ -406,7 +406,7 @@ Amd7930_fill_Dfifo(struct IsdnCardState *cs) t += sprintf(t, "Amd7930: fill_Dfifo cnt: %d |", count); QuickHex(t, deb_ptr, count); - debugl1(cs, cs->dlog); + debugl1(cs, "%s", cs->dlog); } /* AMD interrupts on */ AmdIrqOn(cs); diff --git a/drivers/isdn/hisax/avm_pci.c b/drivers/isdn/hisax/avm_pci.c index ee9b9a03cffa..d1427bd6452d 100644 --- a/drivers/isdn/hisax/avm_pci.c +++ b/drivers/isdn/hisax/avm_pci.c @@ -285,7 +285,7 @@ hdlc_empty_fifo(struct BCState *bcs, int count) t += sprintf(t, "hdlc_empty_fifo %c cnt %d", bcs->channel ? 'B' : 'A', count); QuickHex(t, p, count); - debugl1(cs, bcs->blog); + debugl1(cs, "%s", bcs->blog); } } @@ -345,7 +345,7 @@ hdlc_fill_fifo(struct BCState *bcs) t += sprintf(t, "hdlc_fill_fifo %c cnt %d", bcs->channel ? 'B' : 'A', count); QuickHex(t, p, count); - debugl1(cs, bcs->blog); + debugl1(cs, "%s", bcs->blog); } } diff --git a/drivers/isdn/hisax/config.c b/drivers/isdn/hisax/config.c index bf04d2a3cf4a..b33f53b3ca93 100644 --- a/drivers/isdn/hisax/config.c +++ b/drivers/isdn/hisax/config.c @@ -1896,7 +1896,7 @@ static void EChannel_proc_rcv(struct hisax_d_if *d_if) ptr--; *ptr++ = '\n'; *ptr = 0; - HiSax_putstatus(cs, NULL, cs->dlog); + HiSax_putstatus(cs, NULL, "%s", cs->dlog); } else HiSax_putstatus(cs, "LogEcho: ", "warning Frame too big (%d)", diff --git a/drivers/isdn/hisax/diva.c b/drivers/isdn/hisax/diva.c index 8d0cf6e4dc00..4fc90de68d18 100644 --- a/drivers/isdn/hisax/diva.c +++ b/drivers/isdn/hisax/diva.c @@ -427,7 +427,7 @@ Memhscx_empty_fifo(struct BCState *bcs, int count) t += sprintf(t, "hscx_empty_fifo %c cnt %d", bcs->hw.hscx.hscx ? 'B' : 'A', count); QuickHex(t, ptr, count); - debugl1(cs, bcs->blog); + debugl1(cs, "%s", bcs->blog); } } @@ -469,7 +469,7 @@ Memhscx_fill_fifo(struct BCState *bcs) t += sprintf(t, "hscx_fill_fifo %c cnt %d", bcs->hw.hscx.hscx ? 'B' : 'A', count); QuickHex(t, ptr, count); - debugl1(cs, bcs->blog); + debugl1(cs, "%s", bcs->blog); } } diff --git a/drivers/isdn/hisax/elsa.c b/drivers/isdn/hisax/elsa.c index 1df6f9a56ca2..2be1c8a3bb5f 100644 --- a/drivers/isdn/hisax/elsa.c +++ b/drivers/isdn/hisax/elsa.c @@ -535,7 +535,7 @@ check_arcofi(struct IsdnCardState *cs) t = tmp; t += sprintf(tmp, "Arcofi data"); QuickHex(t, p, cs->dc.isac.mon_rxp); - debugl1(cs, tmp); + debugl1(cs, "%s", tmp); if ((cs->dc.isac.mon_rxp == 2) && (cs->dc.isac.mon_rx[0] == 0xa0)) { switch (cs->dc.isac.mon_rx[1]) { case 0x80: diff --git a/drivers/isdn/hisax/elsa_ser.c b/drivers/isdn/hisax/elsa_ser.c index d4c98d330bfe..3f84dd8f1757 100644 --- a/drivers/isdn/hisax/elsa_ser.c +++ b/drivers/isdn/hisax/elsa_ser.c @@ -344,7 +344,7 @@ static inline void receive_chars(struct IsdnCardState *cs, t += sprintf(t, "modem read cnt %d", cs->hw.elsa.rcvcnt); QuickHex(t, cs->hw.elsa.rcvbuf, cs->hw.elsa.rcvcnt); - debugl1(cs, tmp); + debugl1(cs, "%s", tmp); } cs->hw.elsa.rcvcnt = 0; } diff --git a/drivers/isdn/hisax/hfc_pci.c b/drivers/isdn/hisax/hfc_pci.c index 3ccd724ff8c2..497bd026c237 100644 --- a/drivers/isdn/hisax/hfc_pci.c +++ b/drivers/isdn/hisax/hfc_pci.c @@ -901,7 +901,7 @@ Begin: ptr--; *ptr++ = '\n'; *ptr = 0; - HiSax_putstatus(cs, NULL, cs->dlog); + HiSax_putstatus(cs, NULL, "%s", cs->dlog); } else HiSax_putstatus(cs, "LogEcho: ", "warning Frame too big (%d)", total - 3); } diff --git a/drivers/isdn/hisax/hfc_sx.c b/drivers/isdn/hisax/hfc_sx.c index dc4574f735ef..fa1fefd711cd 100644 --- a/drivers/isdn/hisax/hfc_sx.c +++ b/drivers/isdn/hisax/hfc_sx.c @@ -674,7 +674,7 @@ receive_emsg(struct IsdnCardState *cs) ptr--; *ptr++ = '\n'; *ptr = 0; - HiSax_putstatus(cs, NULL, cs->dlog); + HiSax_putstatus(cs, NULL, "%s", cs->dlog); } else HiSax_putstatus(cs, "LogEcho: ", "warning Frame too big (%d)", skb->len); } diff --git a/drivers/isdn/hisax/hscx_irq.c b/drivers/isdn/hisax/hscx_irq.c index f398d4838937..a8d6188402c6 100644 --- a/drivers/isdn/hisax/hscx_irq.c +++ b/drivers/isdn/hisax/hscx_irq.c @@ -75,7 +75,7 @@ hscx_empty_fifo(struct BCState *bcs, int count) t += sprintf(t, "hscx_empty_fifo %c cnt %d", bcs->hw.hscx.hscx ? 'B' : 'A', count); QuickHex(t, ptr, count); - debugl1(cs, bcs->blog); + debugl1(cs, "%s", bcs->blog); } } @@ -115,7 +115,7 @@ hscx_fill_fifo(struct BCState *bcs) t += sprintf(t, "hscx_fill_fifo %c cnt %d", bcs->hw.hscx.hscx ? 'B' : 'A', count); QuickHex(t, ptr, count); - debugl1(cs, bcs->blog); + debugl1(cs, "%s", bcs->blog); } } diff --git a/drivers/isdn/hisax/icc.c b/drivers/isdn/hisax/icc.c index db5321f6379b..51dae9167238 100644 --- a/drivers/isdn/hisax/icc.c +++ b/drivers/isdn/hisax/icc.c @@ -134,7 +134,7 @@ icc_empty_fifo(struct IsdnCardState *cs, int count) t += sprintf(t, "icc_empty_fifo cnt %d", count); QuickHex(t, ptr, count); - debugl1(cs, cs->dlog); + debugl1(cs, "%s", cs->dlog); } } @@ -176,7 +176,7 @@ icc_fill_fifo(struct IsdnCardState *cs) t += sprintf(t, "icc_fill_fifo cnt %d", count); QuickHex(t, ptr, count); - debugl1(cs, cs->dlog); + debugl1(cs, "%s", cs->dlog); } } diff --git a/drivers/isdn/hisax/ipacx.c b/drivers/isdn/hisax/ipacx.c index 74feb5c83067..5faa5de24305 100644 --- a/drivers/isdn/hisax/ipacx.c +++ b/drivers/isdn/hisax/ipacx.c @@ -260,7 +260,7 @@ dch_empty_fifo(struct IsdnCardState *cs, int count) t += sprintf(t, "dch_empty_fifo() cnt %d", count); QuickHex(t, ptr, count); - debugl1(cs, cs->dlog); + debugl1(cs, "%s", cs->dlog); } } @@ -307,7 +307,7 @@ dch_fill_fifo(struct IsdnCardState *cs) t += sprintf(t, "dch_fill_fifo() cnt %d", count); QuickHex(t, ptr, count); - debugl1(cs, cs->dlog); + debugl1(cs, "%s", cs->dlog); } } @@ -539,7 +539,7 @@ bch_empty_fifo(struct BCState *bcs, int count) t += sprintf(t, "bch_empty_fifo() B-%d cnt %d", hscx, count); QuickHex(t, ptr, count); - debugl1(cs, bcs->blog); + debugl1(cs, "%s", bcs->blog); } } @@ -582,7 +582,7 @@ bch_fill_fifo(struct BCState *bcs) t += sprintf(t, "chb_fill_fifo() B-%d cnt %d", hscx, count); QuickHex(t, ptr, count); - debugl1(cs, bcs->blog); + debugl1(cs, "%s", bcs->blog); } } diff --git a/drivers/isdn/hisax/isac.c b/drivers/isdn/hisax/isac.c index a365ccc1c99c..7fdf78f46433 100644 --- a/drivers/isdn/hisax/isac.c +++ b/drivers/isdn/hisax/isac.c @@ -137,7 +137,7 @@ isac_empty_fifo(struct IsdnCardState *cs, int count) t += sprintf(t, "isac_empty_fifo cnt %d", count); QuickHex(t, ptr, count); - debugl1(cs, cs->dlog); + debugl1(cs, "%s", cs->dlog); } } @@ -179,7 +179,7 @@ isac_fill_fifo(struct IsdnCardState *cs) t += sprintf(t, "isac_fill_fifo cnt %d", count); QuickHex(t, ptr, count); - debugl1(cs, cs->dlog); + debugl1(cs, "%s", cs->dlog); } } diff --git a/drivers/isdn/hisax/isar.c b/drivers/isdn/hisax/isar.c index 7fdf34704fe5..f4956c73aa11 100644 --- a/drivers/isdn/hisax/isar.c +++ b/drivers/isdn/hisax/isar.c @@ -74,7 +74,7 @@ sendmsg(struct IsdnCardState *cs, u_char his, u_char creg, u_char len, t = tmp; t += sprintf(t, "sendmbox cnt %d", len); QuickHex(t, &msg[len-i], (i > 64) ? 64 : i); - debugl1(cs, tmp); + debugl1(cs, "%s", tmp); i -= 64; } } @@ -105,7 +105,7 @@ rcv_mbox(struct IsdnCardState *cs, struct isar_reg *ireg, u_char *msg) t = tmp; t += sprintf(t, "rcv_mbox cnt %d", ireg->clsb); QuickHex(t, &msg[ireg->clsb - i], (i > 64) ? 64 : i); - debugl1(cs, tmp); + debugl1(cs, "%s", tmp); i -= 64; } } @@ -1248,7 +1248,7 @@ isar_int_main(struct IsdnCardState *cs) tp += sprintf(debbuf, "msg iis(%x) msb(%x)", ireg->iis, ireg->cmsb); QuickHex(tp, (u_char *)ireg->par, ireg->clsb); - debugl1(cs, debbuf); + debugl1(cs, "%s", debbuf); } break; case ISAR_IIS_INVMSG: diff --git a/drivers/isdn/hisax/jade.c b/drivers/isdn/hisax/jade.c index f946c58d8ab1..e2ae7871a209 100644 --- a/drivers/isdn/hisax/jade.c +++ b/drivers/isdn/hisax/jade.c @@ -81,10 +81,7 @@ modejade(struct BCState *bcs, int mode, int bc) int jade = bcs->hw.hscx.hscx; if (cs->debug & L1_DEB_HSCX) { - char tmp[40]; - sprintf(tmp, "jade %c mode %d ichan %d", - 'A' + jade, mode, bc); - debugl1(cs, tmp); + debugl1(cs, "jade %c mode %d ichan %d", 'A' + jade, mode, bc); } bcs->mode = mode; bcs->channel = bc; @@ -257,23 +254,18 @@ void clear_pending_jade_ints(struct IsdnCardState *cs) { int val; - char tmp[64]; cs->BC_Write_Reg(cs, 0, jade_HDLC_IMR, 0x00); cs->BC_Write_Reg(cs, 1, jade_HDLC_IMR, 0x00); val = cs->BC_Read_Reg(cs, 1, jade_HDLC_ISR); - sprintf(tmp, "jade B ISTA %x", val); - debugl1(cs, tmp); + debugl1(cs, "jade B ISTA %x", val); val = cs->BC_Read_Reg(cs, 0, jade_HDLC_ISR); - sprintf(tmp, "jade A ISTA %x", val); - debugl1(cs, tmp); + debugl1(cs, "jade A ISTA %x", val); val = cs->BC_Read_Reg(cs, 1, jade_HDLC_STAR); - sprintf(tmp, "jade B STAR %x", val); - debugl1(cs, tmp); + debugl1(cs, "jade B STAR %x", val); val = cs->BC_Read_Reg(cs, 0, jade_HDLC_STAR); - sprintf(tmp, "jade A STAR %x", val); - debugl1(cs, tmp); + debugl1(cs, "jade A STAR %x", val); /* Unmask ints */ cs->BC_Write_Reg(cs, 0, jade_HDLC_IMR, 0xF8); cs->BC_Write_Reg(cs, 1, jade_HDLC_IMR, 0xF8); diff --git a/drivers/isdn/hisax/jade_irq.c b/drivers/isdn/hisax/jade_irq.c index f521fc83dc76..b930da9b5aa6 100644 --- a/drivers/isdn/hisax/jade_irq.c +++ b/drivers/isdn/hisax/jade_irq.c @@ -65,7 +65,7 @@ jade_empty_fifo(struct BCState *bcs, int count) t += sprintf(t, "jade_empty_fifo %c cnt %d", bcs->hw.hscx.hscx ? 'B' : 'A', count); QuickHex(t, ptr, count); - debugl1(cs, bcs->blog); + debugl1(cs, "%s", bcs->blog); } } @@ -105,7 +105,7 @@ jade_fill_fifo(struct BCState *bcs) t += sprintf(t, "jade_fill_fifo %c cnt %d", bcs->hw.hscx.hscx ? 'B' : 'A', count); QuickHex(t, ptr, count); - debugl1(cs, bcs->blog); + debugl1(cs, "%s", bcs->blog); } } diff --git a/drivers/isdn/hisax/l3_1tr6.c b/drivers/isdn/hisax/l3_1tr6.c index 4c1bca5caa1d..875402e76d0a 100644 --- a/drivers/isdn/hisax/l3_1tr6.c +++ b/drivers/isdn/hisax/l3_1tr6.c @@ -63,7 +63,7 @@ l3_1tr6_error(struct l3_process *pc, u_char *msg, struct sk_buff *skb) { dev_kfree_skb(skb); if (pc->st->l3.debug & L3_DEB_WARN) - l3_debug(pc->st, msg); + l3_debug(pc->st, "%s", msg); l3_1tr6_release_req(pc, 0, NULL); } @@ -161,7 +161,6 @@ l3_1tr6_setup(struct l3_process *pc, u_char pr, void *arg) { u_char *p; int bcfound = 0; - char tmp[80]; struct sk_buff *skb = arg; /* Channel Identification */ @@ -214,10 +213,9 @@ l3_1tr6_setup(struct l3_process *pc, u_char pr, void *arg) /* Signal all services, linklevel takes care of Service-Indicator */ if (bcfound) { if ((pc->para.setup.si1 != 7) && (pc->st->l3.debug & L3_DEB_WARN)) { - sprintf(tmp, "non-digital call: %s -> %s", + l3_debug(pc->st, "non-digital call: %s -> %s", pc->para.setup.phone, pc->para.setup.eazmsn); - l3_debug(pc->st, tmp); } newl3state(pc, 6); pc->st->l3.l3l4(pc->st, CC_SETUP | INDICATION, pc); @@ -301,7 +299,7 @@ l3_1tr6_info(struct l3_process *pc, u_char pr, void *arg) { u_char *p; int i, tmpcharge = 0; - char a_charge[8], tmp[32]; + char a_charge[8]; struct sk_buff *skb = arg; p = skb->data; @@ -316,8 +314,8 @@ l3_1tr6_info(struct l3_process *pc, u_char pr, void *arg) pc->st->l3.l3l4(pc->st, CC_CHARGE | INDICATION, pc); } if (pc->st->l3.debug & L3_DEB_CHARGE) { - sprintf(tmp, "charging info %d", pc->para.chargeinfo); - l3_debug(pc->st, tmp); + l3_debug(pc->st, "charging info %d", + pc->para.chargeinfo); } } else if (pc->st->l3.debug & L3_DEB_CHARGE) l3_debug(pc->st, "charging info not found"); @@ -399,7 +397,7 @@ l3_1tr6_disc(struct l3_process *pc, u_char pr, void *arg) struct sk_buff *skb = arg; u_char *p; int i, tmpcharge = 0; - char a_charge[8], tmp[32]; + char a_charge[8]; StopAllL3Timer(pc); p = skb->data; @@ -414,8 +412,8 @@ l3_1tr6_disc(struct l3_process *pc, u_char pr, void *arg) pc->st->l3.l3l4(pc->st, CC_CHARGE | INDICATION, pc); } if (pc->st->l3.debug & L3_DEB_CHARGE) { - sprintf(tmp, "charging info %d", pc->para.chargeinfo); - l3_debug(pc->st, tmp); + l3_debug(pc->st, "charging info %d", + pc->para.chargeinfo); } } else if (pc->st->l3.debug & L3_DEB_CHARGE) l3_debug(pc->st, "charging info not found"); @@ -746,7 +744,6 @@ up1tr6(struct PStack *st, int pr, void *arg) int i, mt, cr; struct l3_process *proc; struct sk_buff *skb = arg; - char tmp[80]; switch (pr) { case (DL_DATA | INDICATION): @@ -762,26 +759,23 @@ up1tr6(struct PStack *st, int pr, void *arg) } if (skb->len < 4) { if (st->l3.debug & L3_DEB_PROTERR) { - sprintf(tmp, "up1tr6 len only %d", skb->len); - l3_debug(st, tmp); + l3_debug(st, "up1tr6 len only %d", skb->len); } dev_kfree_skb(skb); return; } if ((skb->data[0] & 0xfe) != PROTO_DIS_N0) { if (st->l3.debug & L3_DEB_PROTERR) { - sprintf(tmp, "up1tr6%sunexpected discriminator %x message len %d", + l3_debug(st, "up1tr6%sunexpected discriminator %x message len %d", (pr == (DL_DATA | INDICATION)) ? " " : "(broadcast) ", skb->data[0], skb->len); - l3_debug(st, tmp); } dev_kfree_skb(skb); return; } if (skb->data[1] != 1) { if (st->l3.debug & L3_DEB_PROTERR) { - sprintf(tmp, "up1tr6 CR len not 1"); - l3_debug(st, tmp); + l3_debug(st, "up1tr6 CR len not 1"); } dev_kfree_skb(skb); return; @@ -791,9 +785,8 @@ up1tr6(struct PStack *st, int pr, void *arg) if (skb->data[0] == PROTO_DIS_N0) { dev_kfree_skb(skb); if (st->l3.debug & L3_DEB_STATE) { - sprintf(tmp, "up1tr6%s N0 mt %x unhandled", + l3_debug(st, "up1tr6%s N0 mt %x unhandled", (pr == (DL_DATA | INDICATION)) ? " " : "(broadcast) ", mt); - l3_debug(st, tmp); } } else if (skb->data[0] == PROTO_DIS_N1) { if (!(proc = getl3proc(st, cr))) { @@ -801,8 +794,7 @@ up1tr6(struct PStack *st, int pr, void *arg) if (cr < 128) { if (!(proc = new_l3_process(st, cr))) { if (st->l3.debug & L3_DEB_PROTERR) { - sprintf(tmp, "up1tr6 no roc mem"); - l3_debug(st, tmp); + l3_debug(st, "up1tr6 no roc mem"); } dev_kfree_skb(skb); return; @@ -821,8 +813,7 @@ up1tr6(struct PStack *st, int pr, void *arg) } else { if (!(proc = new_l3_process(st, cr))) { if (st->l3.debug & L3_DEB_PROTERR) { - sprintf(tmp, "up1tr6 no roc mem"); - l3_debug(st, tmp); + l3_debug(st, "up1tr6 no roc mem"); } dev_kfree_skb(skb); return; @@ -837,18 +828,16 @@ up1tr6(struct PStack *st, int pr, void *arg) if (i == ARRAY_SIZE(datastln1)) { dev_kfree_skb(skb); if (st->l3.debug & L3_DEB_STATE) { - sprintf(tmp, "up1tr6%sstate %d mt %x unhandled", + l3_debug(st, "up1tr6%sstate %d mt %x unhandled", (pr == (DL_DATA | INDICATION)) ? " " : "(broadcast) ", proc->state, mt); - l3_debug(st, tmp); } return; } else { if (st->l3.debug & L3_DEB_STATE) { - sprintf(tmp, "up1tr6%sstate %d mt %x", + l3_debug(st, "up1tr6%sstate %d mt %x", (pr == (DL_DATA | INDICATION)) ? " " : "(broadcast) ", proc->state, mt); - l3_debug(st, tmp); } datastln1[i].rout(proc, pr, skb); } @@ -861,7 +850,6 @@ down1tr6(struct PStack *st, int pr, void *arg) int i, cr; struct l3_process *proc; struct Channel *chan; - char tmp[80]; if ((DL_ESTABLISH | REQUEST) == pr) { l3_msg(st, pr, NULL); @@ -888,15 +876,13 @@ down1tr6(struct PStack *st, int pr, void *arg) break; if (i == ARRAY_SIZE(downstl)) { if (st->l3.debug & L3_DEB_STATE) { - sprintf(tmp, "down1tr6 state %d prim %d unhandled", + l3_debug(st, "down1tr6 state %d prim %d unhandled", proc->state, pr); - l3_debug(st, tmp); } } else { if (st->l3.debug & L3_DEB_STATE) { - sprintf(tmp, "down1tr6 state %d prim %d", + l3_debug(st, "down1tr6 state %d prim %d", proc->state, pr); - l3_debug(st, tmp); } downstl[i].rout(proc, pr, arg); } diff --git a/drivers/isdn/hisax/netjet.c b/drivers/isdn/hisax/netjet.c index b646eed379df..233e432e06f6 100644 --- a/drivers/isdn/hisax/netjet.c +++ b/drivers/isdn/hisax/netjet.c @@ -176,7 +176,7 @@ static void printframe(struct IsdnCardState *cs, u_char *buf, int count, char *s else j = i; QuickHex(t, p, j); - debugl1(cs, tmp); + debugl1(cs, "%s", tmp); p += j; i -= j; t = tmp; diff --git a/drivers/isdn/hisax/q931.c b/drivers/isdn/hisax/q931.c index 041bf52d9d0a..af1b020a81f1 100644 --- a/drivers/isdn/hisax/q931.c +++ b/drivers/isdn/hisax/q931.c @@ -1179,7 +1179,7 @@ LogFrame(struct IsdnCardState *cs, u_char *buf, int size) dp--; *dp++ = '\n'; *dp = 0; - HiSax_putstatus(cs, NULL, cs->dlog); + HiSax_putstatus(cs, NULL, "%s", cs->dlog); } else HiSax_putstatus(cs, "LogFrame: ", "warning Frame too big (%d)", size); } @@ -1246,7 +1246,7 @@ dlogframe(struct IsdnCardState *cs, struct sk_buff *skb, int dir) } if (finish) { *dp = 0; - HiSax_putstatus(cs, NULL, cs->dlog); + HiSax_putstatus(cs, NULL, "%s", cs->dlog); return; } if ((0xfe & buf[0]) == PROTO_DIS_N0) { /* 1TR6 */ @@ -1509,5 +1509,5 @@ dlogframe(struct IsdnCardState *cs, struct sk_buff *skb, int dir) dp += sprintf(dp, "Unknown protocol %x!", buf[0]); } *dp = 0; - HiSax_putstatus(cs, NULL, cs->dlog); + HiSax_putstatus(cs, NULL, "%s", cs->dlog); } diff --git a/drivers/isdn/hisax/w6692.c b/drivers/isdn/hisax/w6692.c index d8cac6935818..a85895585d90 100644 --- a/drivers/isdn/hisax/w6692.c +++ b/drivers/isdn/hisax/w6692.c @@ -154,7 +154,7 @@ W6692_empty_fifo(struct IsdnCardState *cs, int count) t += sprintf(t, "W6692_empty_fifo cnt %d", count); QuickHex(t, ptr, count); - debugl1(cs, cs->dlog); + debugl1(cs, "%s", cs->dlog); } } @@ -196,7 +196,7 @@ W6692_fill_fifo(struct IsdnCardState *cs) t += sprintf(t, "W6692_fill_fifo cnt %d", count); QuickHex(t, ptr, count); - debugl1(cs, cs->dlog); + debugl1(cs, "%s", cs->dlog); } } @@ -226,7 +226,7 @@ W6692B_empty_fifo(struct BCState *bcs, int count) t += sprintf(t, "W6692B_empty_fifo %c cnt %d", bcs->channel + '1', count); QuickHex(t, ptr, count); - debugl1(cs, bcs->blog); + debugl1(cs, "%s", bcs->blog); } } @@ -264,7 +264,7 @@ W6692B_fill_fifo(struct BCState *bcs) t += sprintf(t, "W6692B_fill_fifo %c cnt %d", bcs->channel + '1', count); QuickHex(t, ptr, count); - debugl1(cs, bcs->blog); + debugl1(cs, "%s", bcs->blog); } } diff --git a/drivers/isdn/mISDN/core.c b/drivers/isdn/mISDN/core.c index da30c5cb9609..174aa3f17afb 100644 --- a/drivers/isdn/mISDN/core.c +++ b/drivers/isdn/mISDN/core.c @@ -21,10 +21,14 @@ #include "core.h" static u_int debug; +static u_int gid; +kgid_t misdn_permitted_gid; MODULE_AUTHOR("Karsten Keil"); MODULE_LICENSE("GPL"); module_param(debug, uint, S_IRUGO | S_IWUSR); +module_param(gid, uint, 0); +MODULE_PARM_DESC(gid, "Unix group for accessing misdn socket (default 0)"); static u64 device_ids; #define MAX_DEVICE_ID 63 @@ -372,6 +376,8 @@ mISDNInit(void) { int err; + misdn_permitted_gid = make_kgid(current_user_ns(), gid); + printk(KERN_INFO "Modular ISDN core version %d.%d.%d\n", MISDN_MAJOR_VERSION, MISDN_MINOR_VERSION, MISDN_RELEASE); mISDN_init_clock(&debug); diff --git a/drivers/isdn/mISDN/core.h b/drivers/isdn/mISDN/core.h index 52695bb81ee7..5f509bf93c00 100644 --- a/drivers/isdn/mISDN/core.h +++ b/drivers/isdn/mISDN/core.h @@ -17,6 +17,7 @@ extern struct mISDNdevice *get_mdevice(u_int); extern int get_mdevice_count(void); +extern kgid_t misdn_permitted_gid; /* stack status flag */ #define mISDN_STACK_ACTION_MASK 0x0000ffff diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index e47dcb9d1e91..8dcef368e9da 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -612,6 +612,11 @@ data_sock_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; + if (!capable(CAP_SYS_ADMIN) && + !gid_eq(misdn_permitted_gid, current_gid()) && + !in_group_p(misdn_permitted_gid)) + return -EPERM; + if (sock->type != SOCK_DGRAM) return -ESOCKTNOSUPPORT; @@ -694,6 +699,10 @@ base_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case IMSETDEVNAME: { struct mISDN_devrename dn; + if (!capable(CAP_SYS_ADMIN) && + !gid_eq(misdn_permitted_gid, current_gid()) && + !in_group_p(misdn_permitted_gid)) + return -EPERM; if (copy_from_user(&dn, (void __user *)arg, sizeof(dn))) { err = -EFAULT; diff --git a/drivers/memstick/core/Kconfig b/drivers/memstick/core/Kconfig index 95f1814b5368..1d389491d5fd 100644 --- a/drivers/memstick/core/Kconfig +++ b/drivers/memstick/core/Kconfig @@ -24,3 +24,15 @@ config MSPRO_BLOCK support. This provides a block device driver, which you can use to mount the filesystem. Almost everyone wishing MemoryStick support should say Y or M here. + +config MS_BLOCK + tristate "MemoryStick Standard device driver" + depends on BLOCK + help + Say Y here to enable the MemoryStick Standard device driver + support. This provides a block device driver, which you can use + to mount the filesystem. + This driver works with old (bulky) MemoryStick and MemoryStick Duo + but not PRO. Say Y if you have such card. + Driver is new and not yet well tested, thus it can damage your card + (even permanently) diff --git a/drivers/memstick/core/Makefile b/drivers/memstick/core/Makefile index ecd029937738..0d7f90c0ff25 100644 --- a/drivers/memstick/core/Makefile +++ b/drivers/memstick/core/Makefile @@ -3,5 +3,5 @@ # obj-$(CONFIG_MEMSTICK) += memstick.o - +obj-$(CONFIG_MS_BLOCK) += ms_block.o obj-$(CONFIG_MSPRO_BLOCK) += mspro_block.o diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c new file mode 100644 index 000000000000..08e70232062f --- /dev/null +++ b/drivers/memstick/core/ms_block.c @@ -0,0 +1,2385 @@ +/* + * ms_block.c - Sony MemoryStick (legacy) storage support + + * Copyright (C) 2013 Maxim Levitsky <maximlevitsky@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Minor portions of the driver were copied from mspro_block.c which is + * Copyright (C) 2007 Alex Dubov <oakad@yahoo.com> + * + */ +#define DRIVER_NAME "ms_block" +#define pr_fmt(fmt) DRIVER_NAME ": " fmt + +#include <linux/module.h> +#include <linux/blkdev.h> +#include <linux/memstick.h> +#include <linux/idr.h> +#include <linux/hdreg.h> +#include <linux/delay.h> +#include <linux/slab.h> +#include <linux/random.h> +#include <linux/bitmap.h> +#include <linux/scatterlist.h> +#include <linux/jiffies.h> +#include <linux/workqueue.h> +#include <linux/mutex.h> +#include "ms_block.h" + +static int debug; +static int cache_flush_timeout = 1000; +static bool verify_writes; + +/* + * Copies section of 'sg_from' starting from offset 'offset' and with length + * 'len' To another scatterlist of to_nents enties + */ +static size_t msb_sg_copy(struct scatterlist *sg_from, + struct scatterlist *sg_to, int to_nents, size_t offset, size_t len) +{ + size_t copied = 0; + + while (offset > 0) { + if (offset >= sg_from->length) { + if (sg_is_last(sg_from)) + return 0; + + offset -= sg_from->length; + sg_from = sg_next(sg_from); + continue; + } + + copied = min(len, sg_from->length - offset); + sg_set_page(sg_to, sg_page(sg_from), + copied, sg_from->offset + offset); + + len -= copied; + offset = 0; + + if (sg_is_last(sg_from) || !len) + goto out; + + sg_to = sg_next(sg_to); + to_nents--; + sg_from = sg_next(sg_from); + } + + while (len > sg_from->length && to_nents--) { + len -= sg_from->length; + copied += sg_from->length; + + sg_set_page(sg_to, sg_page(sg_from), + sg_from->length, sg_from->offset); + + if (sg_is_last(sg_from) || !len) + goto out; + + sg_from = sg_next(sg_from); + sg_to = sg_next(sg_to); + } + + if (len && to_nents) { + sg_set_page(sg_to, sg_page(sg_from), len, sg_from->offset); + copied += len; + } +out: + sg_mark_end(sg_to); + return copied; +} + +/* + * Compares section of 'sg' starting from offset 'offset' and with length 'len' + * to linear buffer of length 'len' at address 'buffer' + * Returns 0 if equal and -1 otherwice + */ +static int msb_sg_compare_to_buffer(struct scatterlist *sg, + size_t offset, u8 *buffer, size_t len) +{ + int retval = 0, cmplen; + struct sg_mapping_iter miter; + + sg_miter_start(&miter, sg, sg_nents(sg), + SG_MITER_ATOMIC | SG_MITER_FROM_SG); + + while (sg_miter_next(&miter) && len > 0) { + if (offset >= miter.length) { + offset -= miter.length; + continue; + } + + cmplen = min(miter.length - offset, len); + retval = memcmp(miter.addr + offset, buffer, cmplen) ? -1 : 0; + if (retval) + break; + + buffer += cmplen; + len -= cmplen; + offset = 0; + } + + if (!retval && len) + retval = -1; + + sg_miter_stop(&miter); + return retval; +} + + +/* Get zone at which block with logical address 'lba' lives + * Flash is broken into zones. + * Each zone consists of 512 eraseblocks, out of which in first + * zone 494 are used and 496 are for all following zones. + * Therefore zone #0 hosts blocks 0-493, zone #1 blocks 494-988, etc... +*/ +static int msb_get_zone_from_lba(int lba) +{ + if (lba < 494) + return 0; + return ((lba - 494) / 496) + 1; +} + +/* Get zone of physical block. Trivial */ +static int msb_get_zone_from_pba(int pba) +{ + return pba / MS_BLOCKS_IN_ZONE; +} + +/* Debug test to validate free block counts */ +static int msb_validate_used_block_bitmap(struct msb_data *msb) +{ + int total_free_blocks = 0; + int i; + + if (!debug) + return 0; + + for (i = 0; i < msb->zone_count; i++) + total_free_blocks += msb->free_block_count[i]; + + if (msb->block_count - bitmap_weight(msb->used_blocks_bitmap, + msb->block_count) == total_free_blocks) + return 0; + + pr_err("BUG: free block counts don't match the bitmap"); + msb->read_only = true; + return -EINVAL; +} + +/* Mark physical block as used */ +static void msb_mark_block_used(struct msb_data *msb, int pba) +{ + int zone = msb_get_zone_from_pba(pba); + + if (test_bit(pba, msb->used_blocks_bitmap)) { + pr_err( + "BUG: attempt to mark already used pba %d as used", pba); + msb->read_only = true; + return; + } + + if (msb_validate_used_block_bitmap(msb)) + return; + + /* No races because all IO is single threaded */ + __set_bit(pba, msb->used_blocks_bitmap); + msb->free_block_count[zone]--; +} + +/* Mark physical block as free */ +static void msb_mark_block_unused(struct msb_data *msb, int pba) +{ + int zone = msb_get_zone_from_pba(pba); + + if (!test_bit(pba, msb->used_blocks_bitmap)) { + pr_err("BUG: attempt to mark already unused pba %d as unused" , pba); + msb->read_only = true; + return; + } + + if (msb_validate_used_block_bitmap(msb)) + return; + + /* No races because all IO is single threaded */ + __clear_bit(pba, msb->used_blocks_bitmap); + msb->free_block_count[zone]++; +} + +/* Invalidate current register window */ +static void msb_invalidate_reg_window(struct msb_data *msb) +{ + msb->reg_addr.w_offset = offsetof(struct ms_register, id); + msb->reg_addr.w_length = sizeof(struct ms_id_register); + msb->reg_addr.r_offset = offsetof(struct ms_register, id); + msb->reg_addr.r_length = sizeof(struct ms_id_register); + msb->addr_valid = false; +} + +/* Start a state machine */ +static int msb_run_state_machine(struct msb_data *msb, int (*state_func) + (struct memstick_dev *card, struct memstick_request **req)) +{ + struct memstick_dev *card = msb->card; + + WARN_ON(msb->state != -1); + msb->int_polling = false; + msb->state = 0; + msb->exit_error = 0; + + memset(&card->current_mrq, 0, sizeof(card->current_mrq)); + + card->next_request = state_func; + memstick_new_req(card->host); + wait_for_completion(&card->mrq_complete); + + WARN_ON(msb->state != -1); + return msb->exit_error; +} + +/* State machines call that to exit */ +static int msb_exit_state_machine(struct msb_data *msb, int error) +{ + WARN_ON(msb->state == -1); + + msb->state = -1; + msb->exit_error = error; + msb->card->next_request = h_msb_default_bad; + + /* Invalidate reg window on errors */ + if (error) + msb_invalidate_reg_window(msb); + + complete(&msb->card->mrq_complete); + return -ENXIO; +} + +/* read INT register */ +static int msb_read_int_reg(struct msb_data *msb, long timeout) +{ + struct memstick_request *mrq = &msb->card->current_mrq; + + WARN_ON(msb->state == -1); + + if (!msb->int_polling) { + msb->int_timeout = jiffies + + msecs_to_jiffies(timeout == -1 ? 500 : timeout); + msb->int_polling = true; + } else if (time_after(jiffies, msb->int_timeout)) { + mrq->data[0] = MEMSTICK_INT_CMDNAK; + return 0; + } + + if ((msb->caps & MEMSTICK_CAP_AUTO_GET_INT) && + mrq->need_card_int && !mrq->error) { + mrq->data[0] = mrq->int_reg; + mrq->need_card_int = false; + return 0; + } else { + memstick_init_req(mrq, MS_TPC_GET_INT, NULL, 1); + return 1; + } +} + +/* Read a register */ +static int msb_read_regs(struct msb_data *msb, int offset, int len) +{ + struct memstick_request *req = &msb->card->current_mrq; + + if (msb->reg_addr.r_offset != offset || + msb->reg_addr.r_length != len || !msb->addr_valid) { + + msb->reg_addr.r_offset = offset; + msb->reg_addr.r_length = len; + msb->addr_valid = true; + + memstick_init_req(req, MS_TPC_SET_RW_REG_ADRS, + &msb->reg_addr, sizeof(msb->reg_addr)); + return 0; + } + + memstick_init_req(req, MS_TPC_READ_REG, NULL, len); + return 1; +} + +/* Write a card register */ +static int msb_write_regs(struct msb_data *msb, int offset, int len, void *buf) +{ + struct memstick_request *req = &msb->card->current_mrq; + + if (msb->reg_addr.w_offset != offset || + msb->reg_addr.w_length != len || !msb->addr_valid) { + + msb->reg_addr.w_offset = offset; + msb->reg_addr.w_length = len; + msb->addr_valid = true; + + memstick_init_req(req, MS_TPC_SET_RW_REG_ADRS, + &msb->reg_addr, sizeof(msb->reg_addr)); + return 0; + } + + memstick_init_req(req, MS_TPC_WRITE_REG, buf, len); + return 1; +} + +/* Handler for absence of IO */ +static int h_msb_default_bad(struct memstick_dev *card, + struct memstick_request **mrq) +{ + return -ENXIO; +} + +/* + * This function is a handler for reads of one page from device. + * Writes output to msb->current_sg, takes sector address from msb->reg.param + * Can also be used to read extra data only. Set params accordintly. + */ +static int h_msb_read_page(struct memstick_dev *card, + struct memstick_request **out_mrq) +{ + struct msb_data *msb = memstick_get_drvdata(card); + struct memstick_request *mrq = *out_mrq = &card->current_mrq; + struct scatterlist sg[2]; + u8 command, intreg; + + if (mrq->error) { + dbg("read_page, unknown error"); + return msb_exit_state_machine(msb, mrq->error); + } +again: + switch (msb->state) { + case MSB_RP_SEND_BLOCK_ADDRESS: + /* msb_write_regs sometimes "fails" because it needs to update + the reg window, and thus it returns request for that. + Then we stay in this state and retry */ + if (!msb_write_regs(msb, + offsetof(struct ms_register, param), + sizeof(struct ms_param_register), + (unsigned char *)&msb->regs.param)) + return 0; + + msb->state = MSB_RP_SEND_READ_COMMAND; + return 0; + + case MSB_RP_SEND_READ_COMMAND: + command = MS_CMD_BLOCK_READ; + memstick_init_req(mrq, MS_TPC_SET_CMD, &command, 1); + msb->state = MSB_RP_SEND_INT_REQ; + return 0; + + case MSB_RP_SEND_INT_REQ: + msb->state = MSB_RP_RECEIVE_INT_REQ_RESULT; + /* If dont actually need to send the int read request (only in + serial mode), then just fall through */ + if (msb_read_int_reg(msb, -1)) + return 0; + /* fallthrough */ + + case MSB_RP_RECEIVE_INT_REQ_RESULT: + intreg = mrq->data[0]; + msb->regs.status.interrupt = intreg; + + if (intreg & MEMSTICK_INT_CMDNAK) + return msb_exit_state_machine(msb, -EIO); + + if (!(intreg & MEMSTICK_INT_CED)) { + msb->state = MSB_RP_SEND_INT_REQ; + goto again; + } + + msb->int_polling = false; + msb->state = (intreg & MEMSTICK_INT_ERR) ? + MSB_RP_SEND_READ_STATUS_REG : MSB_RP_SEND_OOB_READ; + goto again; + + case MSB_RP_SEND_READ_STATUS_REG: + /* read the status register to understand source of the INT_ERR */ + if (!msb_read_regs(msb, + offsetof(struct ms_register, status), + sizeof(struct ms_status_register))) + return 0; + + msb->state = MSB_RP_RECEIVE_OOB_READ; + return 0; + + case MSB_RP_RECIVE_STATUS_REG: + msb->regs.status = *(struct ms_status_register *)mrq->data; + msb->state = MSB_RP_SEND_OOB_READ; + /* fallthrough */ + + case MSB_RP_SEND_OOB_READ: + if (!msb_read_regs(msb, + offsetof(struct ms_register, extra_data), + sizeof(struct ms_extra_data_register))) + return 0; + + msb->state = MSB_RP_RECEIVE_OOB_READ; + return 0; + + case MSB_RP_RECEIVE_OOB_READ: + msb->regs.extra_data = + *(struct ms_extra_data_register *) mrq->data; + msb->state = MSB_RP_SEND_READ_DATA; + /* fallthrough */ + + case MSB_RP_SEND_READ_DATA: + /* Skip that state if we only read the oob */ + if (msb->regs.param.cp == MEMSTICK_CP_EXTRA) { + msb->state = MSB_RP_RECEIVE_READ_DATA; + goto again; + } + + sg_init_table(sg, ARRAY_SIZE(sg)); + msb_sg_copy(msb->current_sg, sg, ARRAY_SIZE(sg), + msb->current_sg_offset, + msb->page_size); + + memstick_init_req_sg(mrq, MS_TPC_READ_LONG_DATA, sg); + msb->state = MSB_RP_RECEIVE_READ_DATA; + return 0; + + case MSB_RP_RECEIVE_READ_DATA: + if (!(msb->regs.status.interrupt & MEMSTICK_INT_ERR)) { + msb->current_sg_offset += msb->page_size; + return msb_exit_state_machine(msb, 0); + } + + if (msb->regs.status.status1 & MEMSTICK_UNCORR_ERROR) { + dbg("read_page: uncorrectable error"); + return msb_exit_state_machine(msb, -EBADMSG); + } + + if (msb->regs.status.status1 & MEMSTICK_CORR_ERROR) { + dbg("read_page: correctable error"); + msb->current_sg_offset += msb->page_size; + return msb_exit_state_machine(msb, -EUCLEAN); + } else { + dbg("read_page: INT error, but no status error bits"); + return msb_exit_state_machine(msb, -EIO); + } + } + + BUG(); +} + +/* + * Handler of writes of exactly one block. + * Takes address from msb->regs.param. + * Writes same extra data to blocks, also taken + * from msb->regs.extra + * Returns -EBADMSG if write fails due to uncorrectable error, or -EIO if + * device refuses to take the command or something else + */ +static int h_msb_write_block(struct memstick_dev *card, + struct memstick_request **out_mrq) +{ + struct msb_data *msb = memstick_get_drvdata(card); + struct memstick_request *mrq = *out_mrq = &card->current_mrq; + struct scatterlist sg[2]; + u8 intreg, command; + + if (mrq->error) + return msb_exit_state_machine(msb, mrq->error); + +again: + switch (msb->state) { + + /* HACK: Jmicon handling of TPCs between 8 and + * sizeof(memstick_request.data) is broken due to hardware + * bug in PIO mode that is used for these TPCs + * Therefore split the write + */ + + case MSB_WB_SEND_WRITE_PARAMS: + if (!msb_write_regs(msb, + offsetof(struct ms_register, param), + sizeof(struct ms_param_register), + &msb->regs.param)) + return 0; + + msb->state = MSB_WB_SEND_WRITE_OOB; + return 0; + + case MSB_WB_SEND_WRITE_OOB: + if (!msb_write_regs(msb, + offsetof(struct ms_register, extra_data), + sizeof(struct ms_extra_data_register), + &msb->regs.extra_data)) + return 0; + msb->state = MSB_WB_SEND_WRITE_COMMAND; + return 0; + + + case MSB_WB_SEND_WRITE_COMMAND: + command = MS_CMD_BLOCK_WRITE; + memstick_init_req(mrq, MS_TPC_SET_CMD, &command, 1); + msb->state = MSB_WB_SEND_INT_REQ; + return 0; + + case MSB_WB_SEND_INT_REQ: + msb->state = MSB_WB_RECEIVE_INT_REQ; + if (msb_read_int_reg(msb, -1)) + return 0; + /* fallthrough */ + + case MSB_WB_RECEIVE_INT_REQ: + intreg = mrq->data[0]; + msb->regs.status.interrupt = intreg; + + /* errors mean out of here, and fast... */ + if (intreg & (MEMSTICK_INT_CMDNAK)) + return msb_exit_state_machine(msb, -EIO); + + if (intreg & MEMSTICK_INT_ERR) + return msb_exit_state_machine(msb, -EBADMSG); + + + /* for last page we need to poll CED */ + if (msb->current_page == msb->pages_in_block) { + if (intreg & MEMSTICK_INT_CED) + return msb_exit_state_machine(msb, 0); + msb->state = MSB_WB_SEND_INT_REQ; + goto again; + + } + + /* for non-last page we need BREQ before writing next chunk */ + if (!(intreg & MEMSTICK_INT_BREQ)) { + msb->state = MSB_WB_SEND_INT_REQ; + goto again; + } + + msb->int_polling = false; + msb->state = MSB_WB_SEND_WRITE_DATA; + /* fallthrough */ + + case MSB_WB_SEND_WRITE_DATA: + sg_init_table(sg, ARRAY_SIZE(sg)); + + if (msb_sg_copy(msb->current_sg, sg, ARRAY_SIZE(sg), + msb->current_sg_offset, + msb->page_size) < msb->page_size) + return msb_exit_state_machine(msb, -EIO); + + memstick_init_req_sg(mrq, MS_TPC_WRITE_LONG_DATA, sg); + mrq->need_card_int = 1; + msb->state = MSB_WB_RECEIVE_WRITE_CONFIRMATION; + return 0; + + case MSB_WB_RECEIVE_WRITE_CONFIRMATION: + msb->current_page++; + msb->current_sg_offset += msb->page_size; + msb->state = MSB_WB_SEND_INT_REQ; + goto again; + default: + BUG(); + } + + return 0; +} + +/* + * This function is used to send simple IO requests to device that consist + * of register write + command + */ +static int h_msb_send_command(struct memstick_dev *card, + struct memstick_request **out_mrq) +{ + struct msb_data *msb = memstick_get_drvdata(card); + struct memstick_request *mrq = *out_mrq = &card->current_mrq; + u8 intreg; + + if (mrq->error) { + dbg("send_command: unknown error"); + return msb_exit_state_machine(msb, mrq->error); + } +again: + switch (msb->state) { + + /* HACK: see h_msb_write_block */ + case MSB_SC_SEND_WRITE_PARAMS: /* write param register*/ + if (!msb_write_regs(msb, + offsetof(struct ms_register, param), + sizeof(struct ms_param_register), + &msb->regs.param)) + return 0; + msb->state = MSB_SC_SEND_WRITE_OOB; + return 0; + + case MSB_SC_SEND_WRITE_OOB: + if (!msb->command_need_oob) { + msb->state = MSB_SC_SEND_COMMAND; + goto again; + } + + if (!msb_write_regs(msb, + offsetof(struct ms_register, extra_data), + sizeof(struct ms_extra_data_register), + &msb->regs.extra_data)) + return 0; + + msb->state = MSB_SC_SEND_COMMAND; + return 0; + + case MSB_SC_SEND_COMMAND: + memstick_init_req(mrq, MS_TPC_SET_CMD, &msb->command_value, 1); + msb->state = MSB_SC_SEND_INT_REQ; + return 0; + + case MSB_SC_SEND_INT_REQ: + msb->state = MSB_SC_RECEIVE_INT_REQ; + if (msb_read_int_reg(msb, -1)) + return 0; + /* fallthrough */ + + case MSB_SC_RECEIVE_INT_REQ: + intreg = mrq->data[0]; + + if (intreg & MEMSTICK_INT_CMDNAK) + return msb_exit_state_machine(msb, -EIO); + if (intreg & MEMSTICK_INT_ERR) + return msb_exit_state_machine(msb, -EBADMSG); + + if (!(intreg & MEMSTICK_INT_CED)) { + msb->state = MSB_SC_SEND_INT_REQ; + goto again; + } + + return msb_exit_state_machine(msb, 0); + } + + BUG(); +} + +/* Small handler for card reset */ +static int h_msb_reset(struct memstick_dev *card, + struct memstick_request **out_mrq) +{ + u8 command = MS_CMD_RESET; + struct msb_data *msb = memstick_get_drvdata(card); + struct memstick_request *mrq = *out_mrq = &card->current_mrq; + + if (mrq->error) + return msb_exit_state_machine(msb, mrq->error); + + switch (msb->state) { + case MSB_RS_SEND: + memstick_init_req(mrq, MS_TPC_SET_CMD, &command, 1); + mrq->need_card_int = 0; + msb->state = MSB_RS_CONFIRM; + return 0; + case MSB_RS_CONFIRM: + return msb_exit_state_machine(msb, 0); + } + BUG(); +} + +/* This handler is used to do serial->parallel switch */ +static int h_msb_parallel_switch(struct memstick_dev *card, + struct memstick_request **out_mrq) +{ + struct msb_data *msb = memstick_get_drvdata(card); + struct memstick_request *mrq = *out_mrq = &card->current_mrq; + struct memstick_host *host = card->host; + + if (mrq->error) { + dbg("parallel_switch: error"); + msb->regs.param.system &= ~MEMSTICK_SYS_PAM; + return msb_exit_state_machine(msb, mrq->error); + } + + switch (msb->state) { + case MSB_PS_SEND_SWITCH_COMMAND: + /* Set the parallel interface on memstick side */ + msb->regs.param.system |= MEMSTICK_SYS_PAM; + + if (!msb_write_regs(msb, + offsetof(struct ms_register, param), + 1, + (unsigned char *)&msb->regs.param)) + return 0; + + msb->state = MSB_PS_SWICH_HOST; + return 0; + + case MSB_PS_SWICH_HOST: + /* Set parallel interface on our side + send a dummy request + to see if card responds */ + host->set_param(host, MEMSTICK_INTERFACE, MEMSTICK_PAR4); + memstick_init_req(mrq, MS_TPC_GET_INT, NULL, 1); + msb->state = MSB_PS_CONFIRM; + return 0; + + case MSB_PS_CONFIRM: + return msb_exit_state_machine(msb, 0); + } + + BUG(); +} + +static int msb_switch_to_parallel(struct msb_data *msb); + +/* Reset the card, to guard against hw errors beeing treated as bad blocks */ +static int msb_reset(struct msb_data *msb, bool full) +{ + + bool was_parallel = msb->regs.param.system & MEMSTICK_SYS_PAM; + struct memstick_dev *card = msb->card; + struct memstick_host *host = card->host; + int error; + + /* Reset the card */ + msb->regs.param.system = MEMSTICK_SYS_BAMD; + + if (full) { + error = host->set_param(host, + MEMSTICK_POWER, MEMSTICK_POWER_OFF); + if (error) + goto out_error; + + msb_invalidate_reg_window(msb); + + error = host->set_param(host, + MEMSTICK_POWER, MEMSTICK_POWER_ON); + if (error) + goto out_error; + + error = host->set_param(host, + MEMSTICK_INTERFACE, MEMSTICK_SERIAL); + if (error) { +out_error: + dbg("Failed to reset the host controller"); + msb->read_only = true; + return -EFAULT; + } + } + + error = msb_run_state_machine(msb, h_msb_reset); + if (error) { + dbg("Failed to reset the card"); + msb->read_only = true; + return -ENODEV; + } + + /* Set parallel mode */ + if (was_parallel) + msb_switch_to_parallel(msb); + return 0; +} + +/* Attempts to switch interface to parallel mode */ +static int msb_switch_to_parallel(struct msb_data *msb) +{ + int error; + + error = msb_run_state_machine(msb, h_msb_parallel_switch); + if (error) { + pr_err("Switch to parallel failed"); + msb->regs.param.system &= ~MEMSTICK_SYS_PAM; + msb_reset(msb, true); + return -EFAULT; + } + + msb->caps |= MEMSTICK_CAP_AUTO_GET_INT; + return 0; +} + +/* Changes overwrite flag on a page */ +static int msb_set_overwrite_flag(struct msb_data *msb, + u16 pba, u8 page, u8 flag) +{ + if (msb->read_only) + return -EROFS; + + msb->regs.param.block_address = cpu_to_be16(pba); + msb->regs.param.page_address = page; + msb->regs.param.cp = MEMSTICK_CP_OVERWRITE; + msb->regs.extra_data.overwrite_flag = flag; + msb->command_value = MS_CMD_BLOCK_WRITE; + msb->command_need_oob = true; + + dbg_verbose("changing overwrite flag to %02x for sector %d, page %d", + flag, pba, page); + return msb_run_state_machine(msb, h_msb_send_command); +} + +static int msb_mark_bad(struct msb_data *msb, int pba) +{ + pr_notice("marking pba %d as bad", pba); + msb_reset(msb, true); + return msb_set_overwrite_flag( + msb, pba, 0, 0xFF & ~MEMSTICK_OVERWRITE_BKST); +} + +static int msb_mark_page_bad(struct msb_data *msb, int pba, int page) +{ + dbg("marking page %d of pba %d as bad", page, pba); + msb_reset(msb, true); + return msb_set_overwrite_flag(msb, + pba, page, ~MEMSTICK_OVERWRITE_PGST0); +} + +/* Erases one physical block */ +static int msb_erase_block(struct msb_data *msb, u16 pba) +{ + int error, try; + if (msb->read_only) + return -EROFS; + + dbg_verbose("erasing pba %d", pba); + + for (try = 1; try < 3; try++) { + msb->regs.param.block_address = cpu_to_be16(pba); + msb->regs.param.page_address = 0; + msb->regs.param.cp = MEMSTICK_CP_BLOCK; + msb->command_value = MS_CMD_BLOCK_ERASE; + msb->command_need_oob = false; + + + error = msb_run_state_machine(msb, h_msb_send_command); + if (!error || msb_reset(msb, true)) + break; + } + + if (error) { + pr_err("erase failed, marking pba %d as bad", pba); + msb_mark_bad(msb, pba); + } + + dbg_verbose("erase success, marking pba %d as unused", pba); + msb_mark_block_unused(msb, pba); + __set_bit(pba, msb->erased_blocks_bitmap); + return error; +} + +/* Reads one page from device */ +static int msb_read_page(struct msb_data *msb, + u16 pba, u8 page, struct ms_extra_data_register *extra, + struct scatterlist *sg, int offset) +{ + int try, error; + + if (pba == MS_BLOCK_INVALID) { + unsigned long flags; + struct sg_mapping_iter miter; + size_t len = msb->page_size; + + dbg_verbose("read unmapped sector. returning 0xFF"); + + local_irq_save(flags); + sg_miter_start(&miter, sg, sg_nents(sg), + SG_MITER_ATOMIC | SG_MITER_TO_SG); + + while (sg_miter_next(&miter) && len > 0) { + + int chunklen; + + if (offset && offset >= miter.length) { + offset -= miter.length; + continue; + } + + chunklen = min(miter.length - offset, len); + memset(miter.addr + offset, 0xFF, chunklen); + len -= chunklen; + offset = 0; + } + + sg_miter_stop(&miter); + local_irq_restore(flags); + + if (offset) + return -EFAULT; + + if (extra) + memset(extra, 0xFF, sizeof(*extra)); + return 0; + } + + if (pba >= msb->block_count) { + pr_err("BUG: attempt to read beyond the end of the card at pba %d", pba); + return -EINVAL; + } + + for (try = 1; try < 3; try++) { + msb->regs.param.block_address = cpu_to_be16(pba); + msb->regs.param.page_address = page; + msb->regs.param.cp = MEMSTICK_CP_PAGE; + + msb->current_sg = sg; + msb->current_sg_offset = offset; + error = msb_run_state_machine(msb, h_msb_read_page); + + + if (error == -EUCLEAN) { + pr_notice("correctable error on pba %d, page %d", + pba, page); + error = 0; + } + + if (!error && extra) + *extra = msb->regs.extra_data; + + if (!error || msb_reset(msb, true)) + break; + + } + + /* Mark bad pages */ + if (error == -EBADMSG) { + pr_err("uncorrectable error on read of pba %d, page %d", + pba, page); + + if (msb->regs.extra_data.overwrite_flag & + MEMSTICK_OVERWRITE_PGST0) + msb_mark_page_bad(msb, pba, page); + return -EBADMSG; + } + + if (error) + pr_err("read of pba %d, page %d failed with error %d", + pba, page, error); + return error; +} + +/* Reads oob of page only */ +static int msb_read_oob(struct msb_data *msb, u16 pba, u16 page, + struct ms_extra_data_register *extra) +{ + int error; + + BUG_ON(!extra); + msb->regs.param.block_address = cpu_to_be16(pba); + msb->regs.param.page_address = page; + msb->regs.param.cp = MEMSTICK_CP_EXTRA; + + if (pba > msb->block_count) { + pr_err("BUG: attempt to read beyond the end of card at pba %d", pba); + return -EINVAL; + } + + error = msb_run_state_machine(msb, h_msb_read_page); + *extra = msb->regs.extra_data; + + if (error == -EUCLEAN) { + pr_notice("correctable error on pba %d, page %d", + pba, page); + return 0; + } + + return error; +} + +/* Reads a block and compares it with data contained in scatterlist orig_sg */ +static int msb_verify_block(struct msb_data *msb, u16 pba, + struct scatterlist *orig_sg, int offset) +{ + struct scatterlist sg; + int page = 0, error; + + sg_init_one(&sg, msb->block_buffer, msb->block_size); + + while (page < msb->pages_in_block) { + + error = msb_read_page(msb, pba, page, + NULL, &sg, page * msb->page_size); + if (error) + return error; + page++; + } + + if (msb_sg_compare_to_buffer(orig_sg, offset, + msb->block_buffer, msb->block_size)) + return -EIO; + return 0; +} + +/* Writes exectly one block + oob */ +static int msb_write_block(struct msb_data *msb, + u16 pba, u32 lba, struct scatterlist *sg, int offset) +{ + int error, current_try = 1; + BUG_ON(sg->length < msb->page_size); + + if (msb->read_only) + return -EROFS; + + if (pba == MS_BLOCK_INVALID) { + pr_err( + "BUG: write: attempt to write MS_BLOCK_INVALID block"); + return -EINVAL; + } + + if (pba >= msb->block_count || lba >= msb->logical_block_count) { + pr_err( + "BUG: write: attempt to write beyond the end of device"); + return -EINVAL; + } + + if (msb_get_zone_from_lba(lba) != msb_get_zone_from_pba(pba)) { + pr_err("BUG: write: lba zone mismatch"); + return -EINVAL; + } + + if (pba == msb->boot_block_locations[0] || + pba == msb->boot_block_locations[1]) { + pr_err("BUG: write: attempt to write to boot blocks!"); + return -EINVAL; + } + + while (1) { + + if (msb->read_only) + return -EROFS; + + msb->regs.param.cp = MEMSTICK_CP_BLOCK; + msb->regs.param.page_address = 0; + msb->regs.param.block_address = cpu_to_be16(pba); + + msb->regs.extra_data.management_flag = 0xFF; + msb->regs.extra_data.overwrite_flag = 0xF8; + msb->regs.extra_data.logical_address = cpu_to_be16(lba); + + msb->current_sg = sg; + msb->current_sg_offset = offset; + msb->current_page = 0; + + error = msb_run_state_machine(msb, h_msb_write_block); + + /* Sector we just wrote to is assumed erased since its pba + was erased. If it wasn't erased, write will succeed + and will just clear the bits that were set in the block + thus test that what we have written, + matches what we expect. + We do trust the blocks that we erased */ + if (!error && (verify_writes || + !test_bit(pba, msb->erased_blocks_bitmap))) + error = msb_verify_block(msb, pba, sg, offset); + + if (!error) + break; + + if (current_try > 1 || msb_reset(msb, true)) + break; + + pr_err("write failed, trying to erase the pba %d", pba); + error = msb_erase_block(msb, pba); + if (error) + break; + + current_try++; + } + return error; +} + +/* Finds a free block for write replacement */ +static u16 msb_get_free_block(struct msb_data *msb, int zone) +{ + u16 pos; + int pba = zone * MS_BLOCKS_IN_ZONE; + int i; + + get_random_bytes(&pos, sizeof(pos)); + + if (!msb->free_block_count[zone]) { + pr_err("NO free blocks in the zone %d, to use for a write, (media is WORN out) switching to RO mode", zone); + msb->read_only = true; + return MS_BLOCK_INVALID; + } + + pos %= msb->free_block_count[zone]; + + dbg_verbose("have %d choices for a free block, selected randomally: %d", + msb->free_block_count[zone], pos); + + pba = find_next_zero_bit(msb->used_blocks_bitmap, + msb->block_count, pba); + for (i = 0; i < pos; ++i) + pba = find_next_zero_bit(msb->used_blocks_bitmap, + msb->block_count, pba + 1); + + dbg_verbose("result of the free blocks scan: pba %d", pba); + + if (pba == msb->block_count || (msb_get_zone_from_pba(pba)) != zone) { + pr_err("BUG: cant get a free block"); + msb->read_only = true; + return MS_BLOCK_INVALID; + } + + msb_mark_block_used(msb, pba); + return pba; +} + +static int msb_update_block(struct msb_data *msb, u16 lba, + struct scatterlist *sg, int offset) +{ + u16 pba, new_pba; + int error, try; + + pba = msb->lba_to_pba_table[lba]; + dbg_verbose("start of a block update at lba %d, pba %d", lba, pba); + + if (pba != MS_BLOCK_INVALID) { + dbg_verbose("setting the update flag on the block"); + msb_set_overwrite_flag(msb, pba, 0, + 0xFF & ~MEMSTICK_OVERWRITE_UDST); + } + + for (try = 0; try < 3; try++) { + new_pba = msb_get_free_block(msb, + msb_get_zone_from_lba(lba)); + + if (new_pba == MS_BLOCK_INVALID) { + error = -EIO; + goto out; + } + + dbg_verbose("block update: writing updated block to the pba %d", + new_pba); + error = msb_write_block(msb, new_pba, lba, sg, offset); + if (error == -EBADMSG) { + msb_mark_bad(msb, new_pba); + continue; + } + + if (error) + goto out; + + dbg_verbose("block update: erasing the old block"); + msb_erase_block(msb, pba); + msb->lba_to_pba_table[lba] = new_pba; + return 0; + } +out: + if (error) { + pr_err("block update error after %d tries, switching to r/o mode", try); + msb->read_only = true; + } + return error; +} + +/* Converts endiannes in the boot block for easy use */ +static void msb_fix_boot_page_endianness(struct ms_boot_page *p) +{ + p->header.block_id = be16_to_cpu(p->header.block_id); + p->header.format_reserved = be16_to_cpu(p->header.format_reserved); + p->entry.disabled_block.start_addr + = be32_to_cpu(p->entry.disabled_block.start_addr); + p->entry.disabled_block.data_size + = be32_to_cpu(p->entry.disabled_block.data_size); + p->entry.cis_idi.start_addr + = be32_to_cpu(p->entry.cis_idi.start_addr); + p->entry.cis_idi.data_size + = be32_to_cpu(p->entry.cis_idi.data_size); + p->attr.block_size = be16_to_cpu(p->attr.block_size); + p->attr.number_of_blocks = be16_to_cpu(p->attr.number_of_blocks); + p->attr.number_of_effective_blocks + = be16_to_cpu(p->attr.number_of_effective_blocks); + p->attr.page_size = be16_to_cpu(p->attr.page_size); + p->attr.memory_manufacturer_code + = be16_to_cpu(p->attr.memory_manufacturer_code); + p->attr.memory_device_code = be16_to_cpu(p->attr.memory_device_code); + p->attr.implemented_capacity + = be16_to_cpu(p->attr.implemented_capacity); + p->attr.controller_number = be16_to_cpu(p->attr.controller_number); + p->attr.controller_function = be16_to_cpu(p->attr.controller_function); +} + +static int msb_read_boot_blocks(struct msb_data *msb) +{ + int pba = 0; + struct scatterlist sg; + struct ms_extra_data_register extra; + struct ms_boot_page *page; + + msb->boot_block_locations[0] = MS_BLOCK_INVALID; + msb->boot_block_locations[1] = MS_BLOCK_INVALID; + msb->boot_block_count = 0; + + dbg_verbose("Start of a scan for the boot blocks"); + + if (!msb->boot_page) { + page = kmalloc(sizeof(struct ms_boot_page)*2, GFP_KERNEL); + if (!page) + return -ENOMEM; + + msb->boot_page = page; + } else + page = msb->boot_page; + + msb->block_count = MS_BLOCK_MAX_BOOT_ADDR; + + for (pba = 0; pba < MS_BLOCK_MAX_BOOT_ADDR; pba++) { + + sg_init_one(&sg, page, sizeof(*page)); + if (msb_read_page(msb, pba, 0, &extra, &sg, 0)) { + dbg("boot scan: can't read pba %d", pba); + continue; + } + + if (extra.management_flag & MEMSTICK_MANAGEMENT_SYSFLG) { + dbg("managment flag doesn't indicate boot block %d", + pba); + continue; + } + + if (be16_to_cpu(page->header.block_id) != MS_BLOCK_BOOT_ID) { + dbg("the pba at %d doesn' contain boot block ID", pba); + continue; + } + + msb_fix_boot_page_endianness(page); + msb->boot_block_locations[msb->boot_block_count] = pba; + + page++; + msb->boot_block_count++; + + if (msb->boot_block_count == 2) + break; + } + + if (!msb->boot_block_count) { + pr_err("media doesn't contain master page, aborting"); + return -EIO; + } + + dbg_verbose("End of scan for boot blocks"); + return 0; +} + +static int msb_read_bad_block_table(struct msb_data *msb, int block_nr) +{ + struct ms_boot_page *boot_block; + struct scatterlist sg; + u16 *buffer = NULL; + int offset = 0; + int i, error = 0; + int data_size, data_offset, page, page_offset, size_to_read; + u16 pba; + + BUG_ON(block_nr > 1); + boot_block = &msb->boot_page[block_nr]; + pba = msb->boot_block_locations[block_nr]; + + if (msb->boot_block_locations[block_nr] == MS_BLOCK_INVALID) + return -EINVAL; + + data_size = boot_block->entry.disabled_block.data_size; + data_offset = sizeof(struct ms_boot_page) + + boot_block->entry.disabled_block.start_addr; + if (!data_size) + return 0; + + page = data_offset / msb->page_size; + page_offset = data_offset % msb->page_size; + size_to_read = + DIV_ROUND_UP(data_size + page_offset, msb->page_size) * + msb->page_size; + + dbg("reading bad block of boot block at pba %d, offset %d len %d", + pba, data_offset, data_size); + + buffer = kzalloc(size_to_read, GFP_KERNEL); + if (!buffer) + return -ENOMEM; + + /* Read the buffer */ + sg_init_one(&sg, buffer, size_to_read); + + while (offset < size_to_read) { + error = msb_read_page(msb, pba, page, NULL, &sg, offset); + if (error) + goto out; + + page++; + offset += msb->page_size; + + if (page == msb->pages_in_block) { + pr_err( + "bad block table extends beyond the boot block"); + break; + } + } + + /* Process the bad block table */ + for (i = page_offset; i < data_size / sizeof(u16); i++) { + + u16 bad_block = be16_to_cpu(buffer[i]); + + if (bad_block >= msb->block_count) { + dbg("bad block table contains invalid block %d", + bad_block); + continue; + } + + if (test_bit(bad_block, msb->used_blocks_bitmap)) { + dbg("duplicate bad block %d in the table", + bad_block); + continue; + } + + dbg("block %d is marked as factory bad", bad_block); + msb_mark_block_used(msb, bad_block); + } +out: + kfree(buffer); + return error; +} + +static int msb_ftl_initialize(struct msb_data *msb) +{ + int i; + + if (msb->ftl_initialized) + return 0; + + msb->zone_count = msb->block_count / MS_BLOCKS_IN_ZONE; + msb->logical_block_count = msb->zone_count * 496 - 2; + + msb->used_blocks_bitmap = kzalloc(msb->block_count / 8, GFP_KERNEL); + msb->erased_blocks_bitmap = kzalloc(msb->block_count / 8, GFP_KERNEL); + msb->lba_to_pba_table = + kmalloc(msb->logical_block_count * sizeof(u16), GFP_KERNEL); + + if (!msb->used_blocks_bitmap || !msb->lba_to_pba_table || + !msb->erased_blocks_bitmap) { + kfree(msb->used_blocks_bitmap); + kfree(msb->lba_to_pba_table); + kfree(msb->erased_blocks_bitmap); + return -ENOMEM; + } + + for (i = 0; i < msb->zone_count; i++) + msb->free_block_count[i] = MS_BLOCKS_IN_ZONE; + + memset(msb->lba_to_pba_table, MS_BLOCK_INVALID, + msb->logical_block_count * sizeof(u16)); + + dbg("initial FTL tables created. Zone count = %d, Logical block count = %d", + msb->zone_count, msb->logical_block_count); + + msb->ftl_initialized = true; + return 0; +} + +static int msb_ftl_scan(struct msb_data *msb) +{ + u16 pba, lba, other_block; + u8 overwrite_flag, managment_flag, other_overwrite_flag; + int error; + struct ms_extra_data_register extra; + u8 *overwrite_flags = kzalloc(msb->block_count, GFP_KERNEL); + + if (!overwrite_flags) + return -ENOMEM; + + dbg("Start of media scanning"); + for (pba = 0; pba < msb->block_count; pba++) { + + if (pba == msb->boot_block_locations[0] || + pba == msb->boot_block_locations[1]) { + dbg_verbose("pba %05d -> [boot block]", pba); + msb_mark_block_used(msb, pba); + continue; + } + + if (test_bit(pba, msb->used_blocks_bitmap)) { + dbg_verbose("pba %05d -> [factory bad]", pba); + continue; + } + + memset(&extra, 0, sizeof(extra)); + error = msb_read_oob(msb, pba, 0, &extra); + + /* can't trust the page if we can't read the oob */ + if (error == -EBADMSG) { + pr_notice( + "oob of pba %d damaged, will try to erase it", pba); + msb_mark_block_used(msb, pba); + msb_erase_block(msb, pba); + continue; + } else if (error) { + pr_err("unknown error %d on read of oob of pba %d - aborting", + error, pba); + + kfree(overwrite_flags); + return error; + } + + lba = be16_to_cpu(extra.logical_address); + managment_flag = extra.management_flag; + overwrite_flag = extra.overwrite_flag; + overwrite_flags[pba] = overwrite_flag; + + /* Skip bad blocks */ + if (!(overwrite_flag & MEMSTICK_OVERWRITE_BKST)) { + dbg("pba %05d -> [BAD]", pba); + msb_mark_block_used(msb, pba); + continue; + } + + /* Skip system/drm blocks */ + if ((managment_flag & MEMSTICK_MANAGMENT_FLAG_NORMAL) != + MEMSTICK_MANAGMENT_FLAG_NORMAL) { + dbg("pba %05d -> [reserved managment flag %02x]", + pba, managment_flag); + msb_mark_block_used(msb, pba); + continue; + } + + /* Erase temporary tables */ + if (!(managment_flag & MEMSTICK_MANAGEMENT_ATFLG)) { + dbg("pba %05d -> [temp table] - will erase", pba); + + msb_mark_block_used(msb, pba); + msb_erase_block(msb, pba); + continue; + } + + if (lba == MS_BLOCK_INVALID) { + dbg_verbose("pba %05d -> [free]", pba); + continue; + } + + msb_mark_block_used(msb, pba); + + /* Block has LBA not according to zoning*/ + if (msb_get_zone_from_lba(lba) != msb_get_zone_from_pba(pba)) { + pr_notice("pba %05d -> [bad lba %05d] - will erase", + pba, lba); + msb_erase_block(msb, pba); + continue; + } + + /* No collisions - great */ + if (msb->lba_to_pba_table[lba] == MS_BLOCK_INVALID) { + dbg_verbose("pba %05d -> [lba %05d]", pba, lba); + msb->lba_to_pba_table[lba] = pba; + continue; + } + + other_block = msb->lba_to_pba_table[lba]; + other_overwrite_flag = overwrite_flags[other_block]; + + pr_notice("Collision between pba %d and pba %d", + pba, other_block); + + if (!(overwrite_flag & MEMSTICK_OVERWRITE_UDST)) { + pr_notice("pba %d is marked as stable, use it", pba); + msb_erase_block(msb, other_block); + msb->lba_to_pba_table[lba] = pba; + continue; + } + + if (!(other_overwrite_flag & MEMSTICK_OVERWRITE_UDST)) { + pr_notice("pba %d is marked as stable, use it", + other_block); + msb_erase_block(msb, pba); + continue; + } + + pr_notice("collision between blocks %d and %d, without stable flag set on both, erasing pba %d", + pba, other_block, other_block); + + msb_erase_block(msb, other_block); + msb->lba_to_pba_table[lba] = pba; + } + + dbg("End of media scanning"); + kfree(overwrite_flags); + return 0; +} + +static void msb_cache_flush_timer(unsigned long data) +{ + struct msb_data *msb = (struct msb_data *)data; + msb->need_flush_cache = true; + queue_work(msb->io_queue, &msb->io_work); +} + + +static void msb_cache_discard(struct msb_data *msb) +{ + if (msb->cache_block_lba == MS_BLOCK_INVALID) + return; + + del_timer_sync(&msb->cache_flush_timer); + + dbg_verbose("Discarding the write cache"); + msb->cache_block_lba = MS_BLOCK_INVALID; + bitmap_zero(&msb->valid_cache_bitmap, msb->pages_in_block); +} + +static int msb_cache_init(struct msb_data *msb) +{ + setup_timer(&msb->cache_flush_timer, msb_cache_flush_timer, + (unsigned long)msb); + + if (!msb->cache) + msb->cache = kzalloc(msb->block_size, GFP_KERNEL); + if (!msb->cache) + return -ENOMEM; + + msb_cache_discard(msb); + return 0; +} + +static int msb_cache_flush(struct msb_data *msb) +{ + struct scatterlist sg; + struct ms_extra_data_register extra; + int page, offset, error; + u16 pba, lba; + + if (msb->read_only) + return -EROFS; + + if (msb->cache_block_lba == MS_BLOCK_INVALID) + return 0; + + lba = msb->cache_block_lba; + pba = msb->lba_to_pba_table[lba]; + + dbg_verbose("Flushing the write cache of pba %d (LBA %d)", + pba, msb->cache_block_lba); + + sg_init_one(&sg, msb->cache , msb->block_size); + + /* Read all missing pages in cache */ + for (page = 0; page < msb->pages_in_block; page++) { + + if (test_bit(page, &msb->valid_cache_bitmap)) + continue; + + offset = page * msb->page_size; + + dbg_verbose("reading non-present sector %d of cache block %d", + page, lba); + error = msb_read_page(msb, pba, page, &extra, &sg, offset); + + /* Bad pages are copied with 00 page status */ + if (error == -EBADMSG) { + pr_err("read error on sector %d, contents probably damaged", page); + continue; + } + + if (error) + return error; + + if ((extra.overwrite_flag & MEMSTICK_OV_PG_NORMAL) != + MEMSTICK_OV_PG_NORMAL) { + dbg("page %d is marked as bad", page); + continue; + } + + set_bit(page, &msb->valid_cache_bitmap); + } + + /* Write the cache now */ + error = msb_update_block(msb, msb->cache_block_lba, &sg, 0); + pba = msb->lba_to_pba_table[msb->cache_block_lba]; + + /* Mark invalid pages */ + if (!error) { + for (page = 0; page < msb->pages_in_block; page++) { + + if (test_bit(page, &msb->valid_cache_bitmap)) + continue; + + dbg("marking page %d as containing damaged data", + page); + msb_set_overwrite_flag(msb, + pba , page, 0xFF & ~MEMSTICK_OV_PG_NORMAL); + } + } + + msb_cache_discard(msb); + return error; +} + +static int msb_cache_write(struct msb_data *msb, int lba, + int page, bool add_to_cache_only, struct scatterlist *sg, int offset) +{ + int error; + struct scatterlist sg_tmp[10]; + + if (msb->read_only) + return -EROFS; + + if (msb->cache_block_lba == MS_BLOCK_INVALID || + lba != msb->cache_block_lba) + if (add_to_cache_only) + return 0; + + /* If we need to write different block */ + if (msb->cache_block_lba != MS_BLOCK_INVALID && + lba != msb->cache_block_lba) { + dbg_verbose("first flush the cache"); + error = msb_cache_flush(msb); + if (error) + return error; + } + + if (msb->cache_block_lba == MS_BLOCK_INVALID) { + msb->cache_block_lba = lba; + mod_timer(&msb->cache_flush_timer, + jiffies + msecs_to_jiffies(cache_flush_timeout)); + } + + dbg_verbose("Write of LBA %d page %d to cache ", lba, page); + + sg_init_table(sg_tmp, ARRAY_SIZE(sg_tmp)); + msb_sg_copy(sg, sg_tmp, ARRAY_SIZE(sg_tmp), offset, msb->page_size); + + sg_copy_to_buffer(sg_tmp, sg_nents(sg_tmp), + msb->cache + page * msb->page_size, msb->page_size); + + set_bit(page, &msb->valid_cache_bitmap); + return 0; +} + +static int msb_cache_read(struct msb_data *msb, int lba, + int page, struct scatterlist *sg, int offset) +{ + int pba = msb->lba_to_pba_table[lba]; + struct scatterlist sg_tmp[10]; + int error = 0; + + if (lba == msb->cache_block_lba && + test_bit(page, &msb->valid_cache_bitmap)) { + + dbg_verbose("Read of LBA %d (pba %d) sector %d from cache", + lba, pba, page); + + sg_init_table(sg_tmp, ARRAY_SIZE(sg_tmp)); + msb_sg_copy(sg, sg_tmp, ARRAY_SIZE(sg_tmp), + offset, msb->page_size); + sg_copy_from_buffer(sg_tmp, sg_nents(sg_tmp), + msb->cache + msb->page_size * page, + msb->page_size); + } else { + dbg_verbose("Read of LBA %d (pba %d) sector %d from device", + lba, pba, page); + + error = msb_read_page(msb, pba, page, NULL, sg, offset); + if (error) + return error; + + msb_cache_write(msb, lba, page, true, sg, offset); + } + return error; +} + +/* Emulated geometry table + * This table content isn't that importaint, + * One could put here different values, providing that they still + * cover whole disk. + * 64 MB entry is what windows reports for my 64M memstick */ + +static const struct chs_entry chs_table[] = { +/* size sectors cylynders heads */ + { 4, 16, 247, 2 }, + { 8, 16, 495, 2 }, + { 16, 16, 495, 4 }, + { 32, 16, 991, 4 }, + { 64, 16, 991, 8 }, + {128, 16, 991, 16 }, + { 0 } +}; + +/* Load information about the card */ +static int msb_init_card(struct memstick_dev *card) +{ + struct msb_data *msb = memstick_get_drvdata(card); + struct memstick_host *host = card->host; + struct ms_boot_page *boot_block; + int error = 0, i, raw_size_in_megs; + + msb->caps = 0; + + if (card->id.class >= MEMSTICK_CLASS_ROM && + card->id.class <= MEMSTICK_CLASS_ROM) + msb->read_only = true; + + msb->state = -1; + error = msb_reset(msb, false); + if (error) + return error; + + /* Due to a bug in Jmicron driver written by Alex Dubov, + its serial mode barely works, + so we switch to parallel mode right away */ + if (host->caps & MEMSTICK_CAP_PAR4) + msb_switch_to_parallel(msb); + + msb->page_size = sizeof(struct ms_boot_page); + + /* Read the boot page */ + error = msb_read_boot_blocks(msb); + if (error) + return -EIO; + + boot_block = &msb->boot_page[0]; + + /* Save intersting attributes from boot page */ + msb->block_count = boot_block->attr.number_of_blocks; + msb->page_size = boot_block->attr.page_size; + + msb->pages_in_block = boot_block->attr.block_size * 2; + msb->block_size = msb->page_size * msb->pages_in_block; + + if (msb->page_size > PAGE_SIZE) { + /* this isn't supported by linux at all, anyway*/ + dbg("device page %d size isn't supported", msb->page_size); + return -EINVAL; + } + + msb->block_buffer = kzalloc(msb->block_size, GFP_KERNEL); + if (!msb->block_buffer) + return -ENOMEM; + + raw_size_in_megs = (msb->block_size * msb->block_count) >> 20; + + for (i = 0; chs_table[i].size; i++) { + + if (chs_table[i].size != raw_size_in_megs) + continue; + + msb->geometry.cylinders = chs_table[i].cyl; + msb->geometry.heads = chs_table[i].head; + msb->geometry.sectors = chs_table[i].sec; + break; + } + + if (boot_block->attr.transfer_supporting == 1) + msb->caps |= MEMSTICK_CAP_PAR4; + + if (boot_block->attr.device_type & 0x03) + msb->read_only = true; + + dbg("Total block count = %d", msb->block_count); + dbg("Each block consists of %d pages", msb->pages_in_block); + dbg("Page size = %d bytes", msb->page_size); + dbg("Parallel mode supported: %d", !!(msb->caps & MEMSTICK_CAP_PAR4)); + dbg("Read only: %d", msb->read_only); + +#if 0 + /* Now we can switch the interface */ + if (host->caps & msb->caps & MEMSTICK_CAP_PAR4) + msb_switch_to_parallel(msb); +#endif + + error = msb_cache_init(msb); + if (error) + return error; + + error = msb_ftl_initialize(msb); + if (error) + return error; + + + /* Read the bad block table */ + error = msb_read_bad_block_table(msb, 0); + + if (error && error != -ENOMEM) { + dbg("failed to read bad block table from primary boot block, trying from backup"); + error = msb_read_bad_block_table(msb, 1); + } + + if (error) + return error; + + /* *drum roll* Scan the media */ + error = msb_ftl_scan(msb); + if (error) { + pr_err("Scan of media failed"); + return error; + } + + return 0; + +} + +static int msb_do_write_request(struct msb_data *msb, int lba, + int page, struct scatterlist *sg, size_t len, int *sucessfuly_written) +{ + int error = 0; + off_t offset = 0; + *sucessfuly_written = 0; + + while (offset < len) { + if (page == 0 && len - offset >= msb->block_size) { + + if (msb->cache_block_lba == lba) + msb_cache_discard(msb); + + dbg_verbose("Writing whole lba %d", lba); + error = msb_update_block(msb, lba, sg, offset); + if (error) + return error; + + offset += msb->block_size; + *sucessfuly_written += msb->block_size; + lba++; + continue; + } + + error = msb_cache_write(msb, lba, page, false, sg, offset); + if (error) + return error; + + offset += msb->page_size; + *sucessfuly_written += msb->page_size; + + page++; + if (page == msb->pages_in_block) { + page = 0; + lba++; + } + } + return 0; +} + +static int msb_do_read_request(struct msb_data *msb, int lba, + int page, struct scatterlist *sg, int len, int *sucessfuly_read) +{ + int error = 0; + int offset = 0; + *sucessfuly_read = 0; + + while (offset < len) { + + error = msb_cache_read(msb, lba, page, sg, offset); + if (error) + return error; + + offset += msb->page_size; + *sucessfuly_read += msb->page_size; + + page++; + if (page == msb->pages_in_block) { + page = 0; + lba++; + } + } + return 0; +} + +static void msb_io_work(struct work_struct *work) +{ + struct msb_data *msb = container_of(work, struct msb_data, io_work); + int page, error, len; + sector_t lba; + unsigned long flags; + struct scatterlist *sg = msb->prealloc_sg; + + dbg_verbose("IO: work started"); + + while (1) { + spin_lock_irqsave(&msb->q_lock, flags); + + if (msb->need_flush_cache) { + msb->need_flush_cache = false; + spin_unlock_irqrestore(&msb->q_lock, flags); + msb_cache_flush(msb); + continue; + } + + if (!msb->req) { + msb->req = blk_fetch_request(msb->queue); + if (!msb->req) { + dbg_verbose("IO: no more requests exiting"); + spin_unlock_irqrestore(&msb->q_lock, flags); + return; + } + } + + spin_unlock_irqrestore(&msb->q_lock, flags); + + /* If card was removed meanwhile */ + if (!msb->req) + return; + + /* process the request */ + dbg_verbose("IO: processing new request"); + blk_rq_map_sg(msb->queue, msb->req, sg); + + lba = blk_rq_pos(msb->req); + + sector_div(lba, msb->page_size / 512); + page = do_div(lba, msb->pages_in_block); + + if (rq_data_dir(msb->req) == READ) + error = msb_do_read_request(msb, lba, page, sg, + blk_rq_bytes(msb->req), &len); + else + error = msb_do_write_request(msb, lba, page, sg, + blk_rq_bytes(msb->req), &len); + + spin_lock_irqsave(&msb->q_lock, flags); + + if (len) + if (!__blk_end_request(msb->req, 0, len)) + msb->req = NULL; + + if (error && msb->req) { + dbg_verbose("IO: ending one sector of the request with error"); + if (!__blk_end_request(msb->req, error, msb->page_size)) + msb->req = NULL; + } + + if (msb->req) + dbg_verbose("IO: request still pending"); + + spin_unlock_irqrestore(&msb->q_lock, flags); + } +} + +static DEFINE_IDR(msb_disk_idr); /*set of used disk numbers */ +static DEFINE_MUTEX(msb_disk_lock); /* protects against races in open/release */ + +static int msb_bd_open(struct block_device *bdev, fmode_t mode) +{ + struct gendisk *disk = bdev->bd_disk; + struct msb_data *msb = disk->private_data; + + dbg_verbose("block device open"); + + mutex_lock(&msb_disk_lock); + + if (msb && msb->card) + msb->usage_count++; + + mutex_unlock(&msb_disk_lock); + return 0; +} + +static void msb_data_clear(struct msb_data *msb) +{ + kfree(msb->boot_page); + kfree(msb->used_blocks_bitmap); + kfree(msb->lba_to_pba_table); + kfree(msb->cache); + msb->card = NULL; +} + +static int msb_disk_release(struct gendisk *disk) +{ + struct msb_data *msb = disk->private_data; + + dbg_verbose("block device release"); + mutex_lock(&msb_disk_lock); + + if (msb) { + if (msb->usage_count) + msb->usage_count--; + + if (!msb->usage_count) { + disk->private_data = NULL; + idr_remove(&msb_disk_idr, msb->disk_id); + put_disk(disk); + kfree(msb); + } + } + mutex_unlock(&msb_disk_lock); + return 0; +} + +static void msb_bd_release(struct gendisk *disk, fmode_t mode) +{ + msb_disk_release(disk); +} + +static int msb_bd_getgeo(struct block_device *bdev, + struct hd_geometry *geo) +{ + struct msb_data *msb = bdev->bd_disk->private_data; + *geo = msb->geometry; + return 0; +} + +static int msb_prepare_req(struct request_queue *q, struct request *req) +{ + if (req->cmd_type != REQ_TYPE_FS && + req->cmd_type != REQ_TYPE_BLOCK_PC) { + blk_dump_rq_flags(req, "MS unsupported request"); + return BLKPREP_KILL; + } + req->cmd_flags |= REQ_DONTPREP; + return BLKPREP_OK; +} + +static void msb_submit_req(struct request_queue *q) +{ + struct memstick_dev *card = q->queuedata; + struct msb_data *msb = memstick_get_drvdata(card); + struct request *req = NULL; + + dbg_verbose("Submit request"); + + if (msb->card_dead) { + dbg("Refusing requests on removed card"); + + WARN_ON(!msb->io_queue_stopped); + + while ((req = blk_fetch_request(q)) != NULL) + __blk_end_request_all(req, -ENODEV); + return; + } + + if (msb->req) + return; + + if (!msb->io_queue_stopped) + queue_work(msb->io_queue, &msb->io_work); +} + +static int msb_check_card(struct memstick_dev *card) +{ + struct msb_data *msb = memstick_get_drvdata(card); + return (msb->card_dead == 0); +} + +static void msb_stop(struct memstick_dev *card) +{ + struct msb_data *msb = memstick_get_drvdata(card); + unsigned long flags; + + dbg("Stopping all msblock IO"); + + spin_lock_irqsave(&msb->q_lock, flags); + blk_stop_queue(msb->queue); + msb->io_queue_stopped = true; + spin_unlock_irqrestore(&msb->q_lock, flags); + + del_timer_sync(&msb->cache_flush_timer); + flush_workqueue(msb->io_queue); + + if (msb->req) { + spin_lock_irqsave(&msb->q_lock, flags); + blk_requeue_request(msb->queue, msb->req); + msb->req = NULL; + spin_unlock_irqrestore(&msb->q_lock, flags); + } + +} + +static void msb_start(struct memstick_dev *card) +{ + struct msb_data *msb = memstick_get_drvdata(card); + unsigned long flags; + + dbg("Resuming IO from msblock"); + + msb_invalidate_reg_window(msb); + + spin_lock_irqsave(&msb->q_lock, flags); + if (!msb->io_queue_stopped || msb->card_dead) { + spin_unlock_irqrestore(&msb->q_lock, flags); + return; + } + spin_unlock_irqrestore(&msb->q_lock, flags); + + /* Kick cache flush anyway, its harmless */ + msb->need_flush_cache = true; + msb->io_queue_stopped = false; + + spin_lock_irqsave(&msb->q_lock, flags); + blk_start_queue(msb->queue); + spin_unlock_irqrestore(&msb->q_lock, flags); + + queue_work(msb->io_queue, &msb->io_work); + +} + +static const struct block_device_operations msb_bdops = { + .open = msb_bd_open, + .release = msb_bd_release, + .getgeo = msb_bd_getgeo, + .owner = THIS_MODULE +}; + +/* Registers the block device */ +static int msb_init_disk(struct memstick_dev *card) +{ + struct msb_data *msb = memstick_get_drvdata(card); + struct memstick_host *host = card->host; + int rc; + u64 limit = BLK_BOUNCE_HIGH; + unsigned long capacity; + + if (host->dev.dma_mask && *(host->dev.dma_mask)) + limit = *(host->dev.dma_mask); + + mutex_lock(&msb_disk_lock); + msb->disk_id = idr_alloc(&msb_disk_idr, card, 0, 256, GFP_KERNEL); + mutex_unlock(&msb_disk_lock); + + if (msb->disk_id < 0) + return msb->disk_id; + + msb->disk = alloc_disk(0); + if (!msb->disk) { + rc = -ENOMEM; + goto out_release_id; + } + + msb->queue = blk_init_queue(msb_submit_req, &msb->q_lock); + if (!msb->queue) { + rc = -ENOMEM; + goto out_put_disk; + } + + msb->queue->queuedata = card; + blk_queue_prep_rq(msb->queue, msb_prepare_req); + + blk_queue_bounce_limit(msb->queue, limit); + blk_queue_max_hw_sectors(msb->queue, MS_BLOCK_MAX_PAGES); + blk_queue_max_segments(msb->queue, MS_BLOCK_MAX_SEGS); + blk_queue_max_segment_size(msb->queue, + MS_BLOCK_MAX_PAGES * msb->page_size); + blk_queue_logical_block_size(msb->queue, msb->page_size); + + sprintf(msb->disk->disk_name, "msblk%d", msb->disk_id); + msb->disk->fops = &msb_bdops; + msb->disk->private_data = msb; + msb->disk->queue = msb->queue; + msb->disk->driverfs_dev = &card->dev; + msb->disk->flags |= GENHD_FL_EXT_DEVT; + + capacity = msb->pages_in_block * msb->logical_block_count; + capacity *= (msb->page_size / 512); + set_capacity(msb->disk, capacity); + dbg("Set total disk size to %lu sectors", capacity); + + msb->usage_count = 1; + msb->io_queue = alloc_ordered_workqueue("ms_block", WQ_MEM_RECLAIM); + INIT_WORK(&msb->io_work, msb_io_work); + sg_init_table(msb->prealloc_sg, MS_BLOCK_MAX_SEGS+1); + + if (msb->read_only) + set_disk_ro(msb->disk, 1); + + msb_start(card); + add_disk(msb->disk); + dbg("Disk added"); + return 0; + +out_put_disk: + put_disk(msb->disk); +out_release_id: + mutex_lock(&msb_disk_lock); + idr_remove(&msb_disk_idr, msb->disk_id); + mutex_unlock(&msb_disk_lock); + return rc; +} + +static int msb_probe(struct memstick_dev *card) +{ + struct msb_data *msb; + int rc = 0; + + msb = kzalloc(sizeof(struct msb_data), GFP_KERNEL); + if (!msb) + return -ENOMEM; + memstick_set_drvdata(card, msb); + msb->card = card; + spin_lock_init(&msb->q_lock); + + rc = msb_init_card(card); + if (rc) + goto out_free; + + rc = msb_init_disk(card); + if (!rc) { + card->check = msb_check_card; + card->stop = msb_stop; + card->start = msb_start; + return 0; + } +out_free: + memstick_set_drvdata(card, NULL); + msb_data_clear(msb); + kfree(msb); + return rc; +} + +static void msb_remove(struct memstick_dev *card) +{ + struct msb_data *msb = memstick_get_drvdata(card); + unsigned long flags; + + if (!msb->io_queue_stopped) + msb_stop(card); + + dbg("Removing the disk device"); + + /* Take care of unhandled + new requests from now on */ + spin_lock_irqsave(&msb->q_lock, flags); + msb->card_dead = true; + blk_start_queue(msb->queue); + spin_unlock_irqrestore(&msb->q_lock, flags); + + /* Remove the disk */ + del_gendisk(msb->disk); + blk_cleanup_queue(msb->queue); + msb->queue = NULL; + + mutex_lock(&msb_disk_lock); + msb_data_clear(msb); + mutex_unlock(&msb_disk_lock); + + msb_disk_release(msb->disk); + memstick_set_drvdata(card, NULL); +} + +#ifdef CONFIG_PM + +static int msb_suspend(struct memstick_dev *card, pm_message_t state) +{ + msb_stop(card); + return 0; +} + +static int msb_resume(struct memstick_dev *card) +{ + struct msb_data *msb = memstick_get_drvdata(card); + struct msb_data *new_msb = NULL; + bool card_dead = true; + +#ifndef CONFIG_MEMSTICK_UNSAFE_RESUME + msb->card_dead = true; + return 0; +#endif + mutex_lock(&card->host->lock); + + new_msb = kzalloc(sizeof(struct msb_data), GFP_KERNEL); + if (!new_msb) + goto out; + + new_msb->card = card; + memstick_set_drvdata(card, new_msb); + spin_lock_init(&new_msb->q_lock); + sg_init_table(msb->prealloc_sg, MS_BLOCK_MAX_SEGS+1); + + if (msb_init_card(card)) + goto out; + + if (msb->block_size != new_msb->block_size) + goto out; + + if (memcmp(msb->boot_page, new_msb->boot_page, + sizeof(struct ms_boot_page))) + goto out; + + if (msb->logical_block_count != new_msb->logical_block_count || + memcmp(msb->lba_to_pba_table, new_msb->lba_to_pba_table, + msb->logical_block_count)) + goto out; + + if (msb->block_count != new_msb->block_count || + memcmp(msb->used_blocks_bitmap, new_msb->used_blocks_bitmap, + msb->block_count / 8)) + goto out; + + card_dead = false; +out: + if (card_dead) + dbg("Card was removed/replaced during suspend"); + + msb->card_dead = card_dead; + memstick_set_drvdata(card, msb); + + if (new_msb) { + msb_data_clear(new_msb); + kfree(new_msb); + } + + msb_start(card); + mutex_unlock(&card->host->lock); + return 0; +} +#else + +#define msb_suspend NULL +#define msb_resume NULL + +#endif /* CONFIG_PM */ + +static struct memstick_device_id msb_id_tbl[] = { + {MEMSTICK_MATCH_ALL, MEMSTICK_TYPE_LEGACY, MEMSTICK_CATEGORY_STORAGE, + MEMSTICK_CLASS_FLASH}, + + {MEMSTICK_MATCH_ALL, MEMSTICK_TYPE_LEGACY, MEMSTICK_CATEGORY_STORAGE, + MEMSTICK_CLASS_ROM}, + + {MEMSTICK_MATCH_ALL, MEMSTICK_TYPE_LEGACY, MEMSTICK_CATEGORY_STORAGE, + MEMSTICK_CLASS_RO}, + + {MEMSTICK_MATCH_ALL, MEMSTICK_TYPE_LEGACY, MEMSTICK_CATEGORY_STORAGE, + MEMSTICK_CLASS_WP}, + + {MEMSTICK_MATCH_ALL, MEMSTICK_TYPE_DUO, MEMSTICK_CATEGORY_STORAGE_DUO, + MEMSTICK_CLASS_DUO}, + {} +}; +MODULE_DEVICE_TABLE(memstick, msb_id_tbl); + + +static struct memstick_driver msb_driver = { + .driver = { + .name = DRIVER_NAME, + .owner = THIS_MODULE + }, + .id_table = msb_id_tbl, + .probe = msb_probe, + .remove = msb_remove, + .suspend = msb_suspend, + .resume = msb_resume +}; + +static int major; + +static int __init msb_init(void) +{ + int rc = register_blkdev(0, DRIVER_NAME); + + if (rc < 0) { + pr_err("failed to register major (error %d)\n", rc); + return rc; + } + + major = rc; + rc = memstick_register_driver(&msb_driver); + if (rc) { + unregister_blkdev(major, DRIVER_NAME); + pr_err("failed to register memstick driver (error %d)\n", rc); + } + + return rc; +} + +static void __exit msb_exit(void) +{ + memstick_unregister_driver(&msb_driver); + unregister_blkdev(major, DRIVER_NAME); + idr_destroy(&msb_disk_idr); +} + +module_init(msb_init); +module_exit(msb_exit); + +module_param(cache_flush_timeout, int, S_IRUGO); +MODULE_PARM_DESC(cache_flush_timeout, + "Cache flush timeout in msec (1000 default)"); +module_param(debug, int, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(debug, "Debug level (0-2)"); + +module_param(verify_writes, bool, S_IRUGO); +MODULE_PARM_DESC(verify_writes, "Read back and check all data that is written"); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Maxim Levitsky"); +MODULE_DESCRIPTION("Sony MemoryStick block device driver"); diff --git a/drivers/memstick/core/ms_block.h b/drivers/memstick/core/ms_block.h new file mode 100644 index 000000000000..96e637550988 --- /dev/null +++ b/drivers/memstick/core/ms_block.h @@ -0,0 +1,290 @@ +/* + * ms_block.h - Sony MemoryStick (legacy) storage support + + * Copyright (C) 2013 Maxim Levitsky <maximlevitsky@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Minor portions of the driver are copied from mspro_block.c which is + * Copyright (C) 2007 Alex Dubov <oakad@yahoo.com> + * + * Also ms structures were copied from old broken driver by same author + * These probably come from MS spec + * + */ + +#ifndef MS_BLOCK_NEW_H +#define MS_BLOCK_NEW_H + +#define MS_BLOCK_MAX_SEGS 32 +#define MS_BLOCK_MAX_PAGES ((2 << 16) - 1) + +#define MS_BLOCK_MAX_BOOT_ADDR 0x000c +#define MS_BLOCK_BOOT_ID 0x0001 +#define MS_BLOCK_INVALID 0xffff +#define MS_MAX_ZONES 16 +#define MS_BLOCKS_IN_ZONE 512 + +#define MS_BLOCK_MAP_LINE_SZ 16 +#define MS_BLOCK_PART_SHIFT 3 + + +#define MEMSTICK_UNCORR_ERROR (MEMSTICK_STATUS1_UCFG | \ + MEMSTICK_STATUS1_UCEX | MEMSTICK_STATUS1_UCDT) + +#define MEMSTICK_CORR_ERROR (MEMSTICK_STATUS1_FGER | MEMSTICK_STATUS1_EXER | \ + MEMSTICK_STATUS1_DTER) + +#define MEMSTICK_INT_ERROR (MEMSTICK_INT_CMDNAK | MEMSTICK_INT_ERR) + +#define MEMSTICK_OVERWRITE_FLAG_NORMAL \ + (MEMSTICK_OVERWRITE_PGST1 | \ + MEMSTICK_OVERWRITE_PGST0 | \ + MEMSTICK_OVERWRITE_BKST) + +#define MEMSTICK_OV_PG_NORMAL \ + (MEMSTICK_OVERWRITE_PGST1 | MEMSTICK_OVERWRITE_PGST0) + +#define MEMSTICK_MANAGMENT_FLAG_NORMAL \ + (MEMSTICK_MANAGEMENT_SYSFLG | \ + MEMSTICK_MANAGEMENT_SCMS1 | \ + MEMSTICK_MANAGEMENT_SCMS0) \ + +struct ms_boot_header { + unsigned short block_id; + unsigned short format_reserved; + unsigned char reserved0[184]; + unsigned char data_entry; + unsigned char reserved1[179]; +} __packed; + + +struct ms_system_item { + unsigned int start_addr; + unsigned int data_size; + unsigned char data_type_id; + unsigned char reserved[3]; +} __packed; + +struct ms_system_entry { + struct ms_system_item disabled_block; + struct ms_system_item cis_idi; + unsigned char reserved[24]; +} __packed; + +struct ms_boot_attr_info { + unsigned char memorystick_class; + unsigned char format_unique_value1; + unsigned short block_size; + unsigned short number_of_blocks; + unsigned short number_of_effective_blocks; + unsigned short page_size; + unsigned char extra_data_size; + unsigned char format_unique_value2; + unsigned char assembly_time[8]; + unsigned char format_unique_value3; + unsigned char serial_number[3]; + unsigned char assembly_manufacturer_code; + unsigned char assembly_model_code[3]; + unsigned short memory_manufacturer_code; + unsigned short memory_device_code; + unsigned short implemented_capacity; + unsigned char format_unique_value4[2]; + unsigned char vcc; + unsigned char vpp; + unsigned short controller_number; + unsigned short controller_function; + unsigned char reserved0[9]; + unsigned char transfer_supporting; + unsigned short format_unique_value5; + unsigned char format_type; + unsigned char memorystick_application; + unsigned char device_type; + unsigned char reserved1[22]; + unsigned char format_uniqure_value6[2]; + unsigned char reserved2[15]; +} __packed; + +struct ms_cis_idi { + unsigned short general_config; + unsigned short logical_cylinders; + unsigned short reserved0; + unsigned short logical_heads; + unsigned short track_size; + unsigned short page_size; + unsigned short pages_per_track; + unsigned short msw; + unsigned short lsw; + unsigned short reserved1; + unsigned char serial_number[20]; + unsigned short buffer_type; + unsigned short buffer_size_increments; + unsigned short long_command_ecc; + unsigned char firmware_version[28]; + unsigned char model_name[18]; + unsigned short reserved2[5]; + unsigned short pio_mode_number; + unsigned short dma_mode_number; + unsigned short field_validity; + unsigned short current_logical_cylinders; + unsigned short current_logical_heads; + unsigned short current_pages_per_track; + unsigned int current_page_capacity; + unsigned short mutiple_page_setting; + unsigned int addressable_pages; + unsigned short single_word_dma; + unsigned short multi_word_dma; + unsigned char reserved3[128]; +} __packed; + + +struct ms_boot_page { + struct ms_boot_header header; + struct ms_system_entry entry; + struct ms_boot_attr_info attr; +} __packed; + +struct msb_data { + unsigned int usage_count; + struct memstick_dev *card; + struct gendisk *disk; + struct request_queue *queue; + spinlock_t q_lock; + struct hd_geometry geometry; + struct attribute_group attr_group; + struct request *req; + int caps; + int disk_id; + + /* IO */ + struct workqueue_struct *io_queue; + bool io_queue_stopped; + struct work_struct io_work; + bool card_dead; + + /* Media properties */ + struct ms_boot_page *boot_page; + u16 boot_block_locations[2]; + int boot_block_count; + + bool read_only; + unsigned short page_size; + int block_size; + int pages_in_block; + int zone_count; + int block_count; + int logical_block_count; + + /* FTL tables */ + unsigned long *used_blocks_bitmap; + unsigned long *erased_blocks_bitmap; + u16 *lba_to_pba_table; + int free_block_count[MS_MAX_ZONES]; + bool ftl_initialized; + + /* Cache */ + unsigned char *cache; + unsigned long valid_cache_bitmap; + int cache_block_lba; + bool need_flush_cache; + struct timer_list cache_flush_timer; + + /* Preallocated buffers */ + unsigned char *block_buffer; + struct scatterlist prealloc_sg[MS_BLOCK_MAX_SEGS+1]; + + + /* handler's local data */ + struct ms_register_addr reg_addr; + bool addr_valid; + + u8 command_value; + bool command_need_oob; + struct scatterlist *current_sg; + int current_sg_offset; + + struct ms_register regs; + int current_page; + + int state; + int exit_error; + bool int_polling; + unsigned long int_timeout; + +}; + +enum msb_readpage_states { + MSB_RP_SEND_BLOCK_ADDRESS = 0, + MSB_RP_SEND_READ_COMMAND, + + MSB_RP_SEND_INT_REQ, + MSB_RP_RECEIVE_INT_REQ_RESULT, + + MSB_RP_SEND_READ_STATUS_REG, + MSB_RP_RECIVE_STATUS_REG, + + MSB_RP_SEND_OOB_READ, + MSB_RP_RECEIVE_OOB_READ, + + MSB_RP_SEND_READ_DATA, + MSB_RP_RECEIVE_READ_DATA, +}; + +enum msb_write_block_states { + MSB_WB_SEND_WRITE_PARAMS = 0, + MSB_WB_SEND_WRITE_OOB, + MSB_WB_SEND_WRITE_COMMAND, + + MSB_WB_SEND_INT_REQ, + MSB_WB_RECEIVE_INT_REQ, + + MSB_WB_SEND_WRITE_DATA, + MSB_WB_RECEIVE_WRITE_CONFIRMATION, +}; + +enum msb_send_command_states { + MSB_SC_SEND_WRITE_PARAMS, + MSB_SC_SEND_WRITE_OOB, + MSB_SC_SEND_COMMAND, + + MSB_SC_SEND_INT_REQ, + MSB_SC_RECEIVE_INT_REQ, + +}; + +enum msb_reset_states { + MSB_RS_SEND, + MSB_RS_CONFIRM, +}; + +enum msb_par_switch_states { + MSB_PS_SEND_SWITCH_COMMAND, + MSB_PS_SWICH_HOST, + MSB_PS_CONFIRM, +}; + +struct chs_entry { + unsigned long size; + unsigned char sec; + unsigned short cyl; + unsigned char head; +}; + +static int msb_reset(struct msb_data *msb, bool full); + +static int h_msb_default_bad(struct memstick_dev *card, + struct memstick_request **mrq); + +#define __dbg(level, format, ...) \ + do { \ + if (debug >= level) \ + pr_err(format "\n", ## __VA_ARGS__); \ + } while (0) + + +#define dbg(format, ...) __dbg(1, format, ## __VA_ARGS__) +#define dbg_verbose(format, ...) __dbg(2, format, ## __VA_ARGS__) + +#endif diff --git a/drivers/mtd/chips/gen_probe.c b/drivers/mtd/chips/gen_probe.c index 74dbb6bcf488..ffb36ba8a6e0 100644 --- a/drivers/mtd/chips/gen_probe.c +++ b/drivers/mtd/chips/gen_probe.c @@ -211,9 +211,7 @@ static inline struct mtd_info *cfi_cmdset_unknown(struct map_info *map, probe_function = __symbol_get(probename); if (!probe_function) { - char modname[sizeof("cfi_cmdset_%4.4X")]; - sprintf(modname, "cfi_cmdset_%4.4X", type); - request_module(modname); + request_module("cfi_cmdset_%4.4X", type); probe_function = __symbol_get(probename); } diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c index 35853b43d66e..741f11edf341 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_main.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c @@ -102,6 +102,19 @@ static int ehea_probe_adapter(struct platform_device *dev); static int ehea_remove(struct platform_device *dev); +static struct of_device_id ehea_module_device_table[] = { + { + .name = "lhea", + .compatible = "IBM,lhea", + }, + { + .type = "network", + .compatible = "IBM,lhea-ethernet", + }, + {}, +}; +MODULE_DEVICE_TABLE(of, ehea_module_device_table); + static struct of_device_id ehea_device_table[] = { { .name = "lhea", @@ -109,7 +122,6 @@ static struct of_device_id ehea_device_table[] = { }, {}, }; -MODULE_DEVICE_TABLE(of, ehea_device_table); static struct platform_driver ehea_driver = { .driver = { diff --git a/drivers/net/irda/donauboe.c b/drivers/net/irda/donauboe.c index 510b9c8d23a9..c6bfc4ad8b50 100644 --- a/drivers/net/irda/donauboe.c +++ b/drivers/net/irda/donauboe.c @@ -1755,17 +1755,4 @@ static struct pci_driver donauboe_pci_driver = { .resume = toshoboe_wakeup }; -static int __init -donauboe_init (void) -{ - return pci_register_driver(&donauboe_pci_driver); -} - -static void __exit -donauboe_cleanup (void) -{ - pci_unregister_driver(&donauboe_pci_driver); -} - -module_init(donauboe_init); -module_exit(donauboe_cleanup); +module_pci_driver(donauboe_pci_driver); diff --git a/drivers/pcmcia/pd6729.c b/drivers/pcmcia/pd6729.c index a4c16ee5c718..622dd6fe7347 100644 --- a/drivers/pcmcia/pd6729.c +++ b/drivers/pcmcia/pd6729.c @@ -777,15 +777,4 @@ static struct pci_driver pd6729_pci_driver = { .remove = pd6729_pci_remove, }; -static int pd6729_module_init(void) -{ - return pci_register_driver(&pd6729_pci_driver); -} - -static void pd6729_module_exit(void) -{ - pci_unregister_driver(&pd6729_pci_driver); -} - -module_init(pd6729_module_init); -module_exit(pd6729_module_exit); +module_pci_driver(pd6729_pci_driver); diff --git a/drivers/pcmcia/yenta_socket.c b/drivers/pcmcia/yenta_socket.c index 6b4ff099fb13..dc18a3a5e010 100644 --- a/drivers/pcmcia/yenta_socket.c +++ b/drivers/pcmcia/yenta_socket.c @@ -1439,20 +1439,6 @@ static struct pci_driver yenta_cardbus_driver = { .driver.pm = YENTA_PM_OPS, }; - -static int __init yenta_socket_init(void) -{ - return pci_register_driver(¥ta_cardbus_driver); -} - - -static void __exit yenta_socket_exit(void) -{ - pci_unregister_driver(¥ta_cardbus_driver); -} - - -module_init(yenta_socket_init); -module_exit(yenta_socket_exit); +module_pci_driver(yenta_cardbus_driver); MODULE_LICENSE("GPL"); diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c index f4f30af2df68..2e8a20cac588 100644 --- a/drivers/rapidio/rio.c +++ b/drivers/rapidio/rio.c @@ -1715,11 +1715,13 @@ int rio_unregister_scan(int mport_id, struct rio_scan *scan_ops) (mport_id == RIO_MPORT_ANY && port->nscan == scan_ops)) port->nscan = NULL; - list_for_each_entry(scan, &rio_scans, node) + list_for_each_entry(scan, &rio_scans, node) { if (scan->mport_id == mport_id) { list_del(&scan->node); kfree(scan); + break; } + } mutex_unlock(&rio_mport_list_lock); diff --git a/drivers/rtc/rtc-hid-sensor-time.c b/drivers/rtc/rtc-hid-sensor-time.c index 7273b0139e5c..b5a2874b15ef 100644 --- a/drivers/rtc/rtc-hid-sensor-time.c +++ b/drivers/rtc/rtc-hid-sensor-time.c @@ -23,10 +23,6 @@ #include <linux/iio/iio.h> #include <linux/rtc.h> -/* Format: HID-SENSOR-usage_id_in_hex */ -/* Usage ID from spec for Time: 0x2000A0 */ -#define DRIVER_NAME "HID-SENSOR-2000a0" /* must be lowercase */ - enum hid_time_channel { CHANNEL_SCAN_INDEX_YEAR, CHANNEL_SCAN_INDEX_MONTH, @@ -300,9 +296,19 @@ static int hid_time_remove(struct platform_device *pdev) return 0; } +static struct platform_device_id hid_time_ids[] = { + { + /* Format: HID-SENSOR-usage_id_in_hex_lowercase */ + .name = "HID-SENSOR-2000a0", + }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(platform, hid_time_ids); + static struct platform_driver hid_time_platform_driver = { + .id_table = hid_time_ids, .driver = { - .name = DRIVER_NAME, + .name = KBUILD_MODNAME, .owner = THIS_MODULE, }, .probe = hid_time_probe, diff --git a/drivers/scsi/a100u2w.c b/drivers/scsi/a100u2w.c index 0163457c12bb..db3710ff1b68 100644 --- a/drivers/scsi/a100u2w.c +++ b/drivers/scsi/a100u2w.c @@ -1227,19 +1227,9 @@ static struct pci_driver inia100_pci_driver = { .remove = inia100_remove_one, }; -static int __init inia100_init(void) -{ - return pci_register_driver(&inia100_pci_driver); -} - -static void __exit inia100_exit(void) -{ - pci_unregister_driver(&inia100_pci_driver); -} +module_pci_driver(inia100_pci_driver); MODULE_DESCRIPTION("Initio A100U2W SCSI driver"); MODULE_AUTHOR("Initio Corporation"); MODULE_LICENSE("Dual BSD/GPL"); -module_init(inia100_init); -module_exit(inia100_exit); diff --git a/drivers/scsi/dc395x.c b/drivers/scsi/dc395x.c index 694e13c45dfd..e73445bd53a8 100644 --- a/drivers/scsi/dc395x.c +++ b/drivers/scsi/dc395x.c @@ -4882,29 +4882,7 @@ static struct pci_driver dc395x_driver = { .remove = dc395x_remove_one, }; - -/** - * dc395x_module_init - Module initialization function - * - * Used by both module and built-in driver to initialise this driver. - **/ -static int __init dc395x_module_init(void) -{ - return pci_register_driver(&dc395x_driver); -} - - -/** - * dc395x_module_exit - Module cleanup function. - **/ -static void __exit dc395x_module_exit(void) -{ - pci_unregister_driver(&dc395x_driver); -} - - -module_init(dc395x_module_init); -module_exit(dc395x_module_exit); +module_pci_driver(dc395x_driver); MODULE_AUTHOR("C.L. Huang / Erich Chen / Kurt Garloff"); MODULE_DESCRIPTION("SCSI host adapter driver for Tekram TRM-S1040 based adapters: Tekram DC395 and DC315 series"); diff --git a/drivers/scsi/dmx3191d.c b/drivers/scsi/dmx3191d.c index 4b0dd8c56707..bb28062a3237 100644 --- a/drivers/scsi/dmx3191d.c +++ b/drivers/scsi/dmx3191d.c @@ -153,18 +153,7 @@ static struct pci_driver dmx3191d_pci_driver = { .remove = dmx3191d_remove_one, }; -static int __init dmx3191d_init(void) -{ - return pci_register_driver(&dmx3191d_pci_driver); -} - -static void __exit dmx3191d_exit(void) -{ - pci_unregister_driver(&dmx3191d_pci_driver); -} - -module_init(dmx3191d_init); -module_exit(dmx3191d_exit); +module_pci_driver(dmx3191d_pci_driver); MODULE_AUTHOR("Massimo Piccioni <dafastidio@libero.it>"); MODULE_DESCRIPTION("Domex DMX3191D SCSI driver"); diff --git a/drivers/scsi/initio.c b/drivers/scsi/initio.c index 280d5af113d1..1befc26fd95e 100644 --- a/drivers/scsi/initio.c +++ b/drivers/scsi/initio.c @@ -2995,19 +2995,8 @@ static struct pci_driver initio_pci_driver = { .remove = initio_remove_one, }; -static int __init initio_init_driver(void) -{ - return pci_register_driver(&initio_pci_driver); -} - -static void __exit initio_exit_driver(void) -{ - pci_unregister_driver(&initio_pci_driver); -} +module_pci_driver(initio_pci_driver); MODULE_DESCRIPTION("Initio INI-9X00U/UW SCSI device driver"); MODULE_AUTHOR("Initio Corporation"); MODULE_LICENSE("GPL"); - -module_init(initio_init_driver); -module_exit(initio_exit_driver); diff --git a/drivers/scsi/mvumi.c b/drivers/scsi/mvumi.c index c3601b57a80c..d0b6a0320d94 100644 --- a/drivers/scsi/mvumi.c +++ b/drivers/scsi/mvumi.c @@ -2735,22 +2735,4 @@ static struct pci_driver mvumi_pci_driver = { #endif }; -/** - * mvumi_init - Driver load entry point - */ -static int __init mvumi_init(void) -{ - return pci_register_driver(&mvumi_pci_driver); -} - -/** - * mvumi_exit - Driver unload entry point - */ -static void __exit mvumi_exit(void) -{ - - pci_unregister_driver(&mvumi_pci_driver); -} - -module_init(mvumi_init); -module_exit(mvumi_exit); +module_pci_driver(mvumi_pci_driver); diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h index 042a2bc432be..2af15d41e77a 100644 --- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h +++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h @@ -63,9 +63,9 @@ #if BITS_PER_LONG == 32 /* limit to lowmem on 32-bit systems */ #define NUM_CACHEPAGES \ - min(num_physpages, 1UL << (30 - PAGE_CACHE_SHIFT) * 3 / 4) + min(totalram_pages, 1UL << (30 - PAGE_CACHE_SHIFT) * 3 / 4) #else -#define NUM_CACHEPAGES num_physpages +#define NUM_CACHEPAGES totalram_pages #endif /* @@ -79,42 +79,4 @@ do { __oldfs = get_fs(); set_fs(get_ds());} while(0) #define MMSPACE_CLOSE set_fs(__oldfs) -/* - * Shrinker - */ - -# define SHRINKER_ARGS(sc, nr_to_scan, gfp_mask) \ - struct shrinker *shrinker, \ - struct shrink_control *sc -# define shrink_param(sc, var) ((sc)->var) - -typedef int (*shrinker_t)(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)); - -static inline -struct shrinker *set_shrinker(int seek, shrinker_t func) -{ - struct shrinker *s; - - s = kmalloc(sizeof(*s), GFP_KERNEL); - if (s == NULL) - return (NULL); - - s->shrink = func; - s->seeks = seek; - - register_shrinker(s); - - return s; -} - -static inline -void remove_shrinker(struct shrinker *shrinker) -{ - if (shrinker == NULL) - return; - - unregister_shrinker(shrinker); - kfree(shrinker); -} - #endif /* __LINUX_CFS_MEM_H__ */ diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c index 42df53072dc3..aace5342b76a 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c @@ -339,8 +339,8 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg) cli->cl_avail_grant = 0; /* FIXME: Should limit this for the sum of all cl_dirty_max. */ cli->cl_dirty_max = OSC_MAX_DIRTY_DEFAULT * 1024 * 1024; - if (cli->cl_dirty_max >> PAGE_CACHE_SHIFT > num_physpages / 8) - cli->cl_dirty_max = num_physpages << (PAGE_CACHE_SHIFT - 3); + if (cli->cl_dirty_max >> PAGE_CACHE_SHIFT > totalram_pages / 8) + cli->cl_dirty_max = totalram_pages << (PAGE_CACHE_SHIFT - 3); INIT_LIST_HEAD(&cli->cl_cache_waiters); INIT_LIST_HEAD(&cli->cl_loi_ready_list); INIT_LIST_HEAD(&cli->cl_loi_hp_ready_list); @@ -388,11 +388,11 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg) if (!strcmp(name, LUSTRE_MDC_NAME)) { cli->cl_max_rpcs_in_flight = MDC_MAX_RIF_DEFAULT; - } else if (num_physpages >> (20 - PAGE_CACHE_SHIFT) <= 128 /* MB */) { + } else if (totalram_pages >> (20 - PAGE_CACHE_SHIFT) <= 128 /* MB */) { cli->cl_max_rpcs_in_flight = 2; - } else if (num_physpages >> (20 - PAGE_CACHE_SHIFT) <= 256 /* MB */) { + } else if (totalram_pages >> (20 - PAGE_CACHE_SHIFT) <= 256 /* MB */) { cli->cl_max_rpcs_in_flight = 3; - } else if (num_physpages >> (20 - PAGE_CACHE_SHIFT) <= 512 /* MB */) { + } else if (totalram_pages >> (20 - PAGE_CACHE_SHIFT) <= 512 /* MB */) { cli->cl_max_rpcs_in_flight = 4; } else { if (osc_on_mdt(obddev->obd_name)) diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c index b3b60288e5f5..4c41e02604f5 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c @@ -524,7 +524,7 @@ static int ldlm_cli_pool_shrink(struct ldlm_pool *pl, int nr, unsigned int gfp_mask) { struct ldlm_namespace *ns; - int canceled = 0, unused; + int unused; ns = ldlm_pl2ns(pl); @@ -543,14 +543,10 @@ static int ldlm_cli_pool_shrink(struct ldlm_pool *pl, unused = ns->ns_nr_unused; spin_unlock(&ns->ns_lock); - if (nr) { - canceled = ldlm_cancel_lru(ns, nr, LCF_ASYNC, - LDLM_CANCEL_SHRINK); - } - /* - * Return the number of potentially reclaimable locks. - */ - return ((unused - canceled) / 100) * sysctl_vfs_cache_pressure; + if (nr == 0) + return (unused / 100) * sysctl_vfs_cache_pressure; + else + return ldlm_cancel_lru(ns, nr, LCF_ASYNC, LDLM_CANCEL_SHRINK); } struct ldlm_pool_ops ldlm_srv_pool_ops = { @@ -605,9 +601,10 @@ int ldlm_pool_recalc(struct ldlm_pool *pl) } EXPORT_SYMBOL(ldlm_pool_recalc); -/** +/* * Pool shrink wrapper. Will call either client or server pool recalc callback - * depending what pool \a pl is used. + * depending what pool pl is used. When nr == 0, just return the number of + * freeable locks. Otherwise, return the number of canceled locks. */ int ldlm_pool_shrink(struct ldlm_pool *pl, int nr, unsigned int gfp_mask) @@ -1025,28 +1022,23 @@ static int ldlm_pool_granted(struct ldlm_pool *pl) } static struct ptlrpc_thread *ldlm_pools_thread; -static struct shrinker *ldlm_pools_srv_shrinker; -static struct shrinker *ldlm_pools_cli_shrinker; static struct completion ldlm_pools_comp; /* - * Cancel \a nr locks from all namespaces (if possible). Returns number of - * cached locks after shrink is finished. All namespaces are asked to - * cancel approximately equal amount of locks to keep balancing. + * count locks from all namespaces (if possible). Returns number of + * cached locks. */ -static int ldlm_pools_shrink(ldlm_side_t client, int nr, - unsigned int gfp_mask) +static unsigned long ldlm_pools_count(ldlm_side_t client, unsigned int gfp_mask) { - int total = 0, cached = 0, nr_ns; + unsigned long total = 0, nr_ns; struct ldlm_namespace *ns; void *cookie; - if (client == LDLM_NAMESPACE_CLIENT && nr != 0 && - !(gfp_mask & __GFP_FS)) - return -1; + if (client == LDLM_NAMESPACE_CLIENT && !(gfp_mask & __GFP_FS)) + return 0; - CDEBUG(D_DLMTRACE, "Request to shrink %d %s locks from all pools\n", - nr, client == LDLM_NAMESPACE_CLIENT ? "client" : "server"); + CDEBUG(D_DLMTRACE, "Request to count %s locks from all pools\n", + client == LDLM_NAMESPACE_CLIENT ? "client" : "server"); cookie = cl_env_reenter(); @@ -1070,16 +1062,26 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr, ldlm_namespace_put(ns); } - if (nr == 0 || total == 0) { - cl_env_reexit(cookie); - return total; - } + cl_env_reexit(cookie); + return total; +} + +static unsigned long ldlm_pools_scan(ldlm_side_t client, int nr, unsigned int gfp_mask) +{ + unsigned long freed = 0; + int tmp, nr_ns; + struct ldlm_namespace *ns; + void *cookie; + if (client == LDLM_NAMESPACE_CLIENT && !(gfp_mask & __GFP_FS)) + return -1; + + cookie = cl_env_reenter(); /* * Shrink at least ldlm_namespace_nr(client) namespaces. */ - for (nr_ns = atomic_read(ldlm_namespace_nr(client)); - nr_ns > 0; nr_ns--) + for (tmp = nr_ns = atomic_read(ldlm_namespace_nr(client)); + tmp > 0; tmp--) { int cancel, nr_locks; @@ -1089,12 +1091,6 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr, mutex_lock(ldlm_namespace_lock(client)); if (list_empty(ldlm_namespace_list(client))) { mutex_unlock(ldlm_namespace_lock(client)); - /* - * If list is empty, we can't return any @cached > 0, - * that probably would cause needless shrinker - * call. - */ - cached = 0; break; } ns = ldlm_namespace_first_locked(client); @@ -1103,29 +1099,42 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr, mutex_unlock(ldlm_namespace_lock(client)); nr_locks = ldlm_pool_granted(&ns->ns_pool); - cancel = 1 + nr_locks * nr / total; - ldlm_pool_shrink(&ns->ns_pool, cancel, gfp_mask); - cached += ldlm_pool_granted(&ns->ns_pool); + /* + * We use to shrink propotionally but with new shrinker API, + * we lost the total number of freeable locks. + */ + cancel = 1 + min_t(int, nr_locks, nr / nr_ns); + freed += ldlm_pool_shrink(&ns->ns_pool, cancel, gfp_mask); ldlm_namespace_put(ns); } cl_env_reexit(cookie); - /* we only decrease the SLV in server pools shrinker, return -1 to - * kernel to avoid needless loop. LU-1128 */ - return (client == LDLM_NAMESPACE_SERVER) ? -1 : cached; + /* + * we only decrease the SLV in server pools shrinker, return + * SHRINK_STOP to kernel to avoid needless loop. LU-1128 + */ + return (client == LDLM_NAMESPACE_SERVER) ? SHRINK_STOP : freed; +} + +static unsigned long ldlm_pools_srv_count(struct shrinker *s, struct shrink_control *sc) +{ + return ldlm_pools_count(LDLM_NAMESPACE_SERVER, sc->gfp_mask); } -static int ldlm_pools_srv_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)) +static unsigned long ldlm_pools_srv_scan(struct shrinker *s, struct shrink_control *sc) { - return ldlm_pools_shrink(LDLM_NAMESPACE_SERVER, - shrink_param(sc, nr_to_scan), - shrink_param(sc, gfp_mask)); + return ldlm_pools_scan(LDLM_NAMESPACE_SERVER, sc->nr_to_scan, + sc->gfp_mask); } -static int ldlm_pools_cli_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)) +static unsigned long ldlm_pools_cli_count(struct shrinker *s, struct shrink_control *sc) { - return ldlm_pools_shrink(LDLM_NAMESPACE_CLIENT, - shrink_param(sc, nr_to_scan), - shrink_param(sc, gfp_mask)); + return ldlm_pools_count(LDLM_NAMESPACE_CLIENT, sc->gfp_mask); +} + +static unsigned long ldlm_pools_cli_scan(struct shrinker *s, struct shrink_control *sc) +{ + return ldlm_pools_scan(LDLM_NAMESPACE_CLIENT, sc->nr_to_scan, + sc->gfp_mask); } void ldlm_pools_recalc(ldlm_side_t client) @@ -1351,6 +1360,18 @@ static void ldlm_pools_thread_stop(void) EXIT; } +static struct shrinker ldlm_pools_srv_shrinker = { + .count_objects = ldlm_pools_srv_count, + .scan_objects = ldlm_pools_srv_scan, + .seeks = DEFAULT_SEEKS, +}; + +static struct shrinker ldlm_pools_cli_shrinker = { + .count_objects = ldlm_pools_cli_count, + .scan_objects = ldlm_pools_cli_scan, + .seeks = DEFAULT_SEEKS, +}; + int ldlm_pools_init(void) { int rc; @@ -1358,12 +1379,8 @@ int ldlm_pools_init(void) rc = ldlm_pools_thread_start(); if (rc == 0) { - ldlm_pools_srv_shrinker = - set_shrinker(DEFAULT_SEEKS, - ldlm_pools_srv_shrink); - ldlm_pools_cli_shrinker = - set_shrinker(DEFAULT_SEEKS, - ldlm_pools_cli_shrink); + register_shrinker(&ldlm_pools_srv_shrinker); + register_shrinker(&ldlm_pools_cli_shrinker); } RETURN(rc); } @@ -1371,14 +1388,8 @@ EXPORT_SYMBOL(ldlm_pools_init); void ldlm_pools_fini(void) { - if (ldlm_pools_srv_shrinker != NULL) { - remove_shrinker(ldlm_pools_srv_shrinker); - ldlm_pools_srv_shrinker = NULL; - } - if (ldlm_pools_cli_shrinker != NULL) { - remove_shrinker(ldlm_pools_cli_shrinker); - ldlm_pools_cli_shrinker = NULL; - } + unregister_shrinker(&ldlm_pools_srv_shrinker); + unregister_shrinker(&ldlm_pools_cli_shrinker); ldlm_pools_thread_stop(); } EXPORT_SYMBOL(ldlm_pools_fini); diff --git a/drivers/staging/lustre/lustre/libcfs/linux/linux-tracefile.c b/drivers/staging/lustre/lustre/libcfs/linux/linux-tracefile.c index 6f563436a255..a500a0b5ba9f 100644 --- a/drivers/staging/lustre/lustre/libcfs/linux/linux-tracefile.c +++ b/drivers/staging/lustre/lustre/libcfs/linux/linux-tracefile.c @@ -269,7 +269,7 @@ void cfs_print_to_console(struct ptldebug_header *hdr, int mask, int cfs_trace_max_debug_mb(void) { - int total_mb = (num_physpages >> (20 - PAGE_SHIFT)); + int total_mb = (totalram_pages >> (20 - PAGE_SHIFT)); return MAX(512, (total_mb * 80)/100); } diff --git a/drivers/staging/lustre/lustre/llite/lproc_llite.c b/drivers/staging/lustre/lustre/llite/lproc_llite.c index 6a82505c7933..a30c411b583b 100644 --- a/drivers/staging/lustre/lustre/llite/lproc_llite.c +++ b/drivers/staging/lustre/lustre/llite/lproc_llite.c @@ -243,9 +243,9 @@ static ssize_t ll_max_readahead_mb_seq_write(struct file *file, const char *buff if (rc) return rc; - if (pages_number < 0 || pages_number > num_physpages / 2) { + if (pages_number < 0 || pages_number > totalram_pages / 2) { CERROR("can't set file readahead more than %lu MB\n", - num_physpages >> (20 - PAGE_CACHE_SHIFT + 1)); /*1/2 of RAM*/ + totalram_pages >> (20 - PAGE_CACHE_SHIFT + 1)); /*1/2 of RAM*/ return -ERANGE; } @@ -388,10 +388,10 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file, const char *buffer, if (rc) RETURN(rc); - if (pages_number < 0 || pages_number > num_physpages) { + if (pages_number < 0 || pages_number > totalram_pages) { CERROR("%s: can't set max cache more than %lu MB\n", ll_get_fsname(sb, NULL, 0), - num_physpages >> (20 - PAGE_CACHE_SHIFT)); + totalram_pages >> (20 - PAGE_CACHE_SHIFT)); RETURN(-ERANGE); } diff --git a/drivers/staging/lustre/lustre/obdclass/class_obd.c b/drivers/staging/lustre/lustre/obdclass/class_obd.c index af1c2d09c47b..0715cf2af3ce 100644 --- a/drivers/staging/lustre/lustre/obdclass/class_obd.c +++ b/drivers/staging/lustre/lustre/obdclass/class_obd.c @@ -558,10 +558,10 @@ static int __init init_obdclass(void) /* Default the dirty page cache cap to 1/2 of system memory. * For clients with less memory, a larger fraction is needed * for other purposes (mostly for BGL). */ - if (num_physpages <= 512 << (20 - PAGE_CACHE_SHIFT)) - obd_max_dirty_pages = num_physpages / 4; + if (totalram_pages <= 512 << (20 - PAGE_CACHE_SHIFT)) + obd_max_dirty_pages = totalram_pages / 4; else - obd_max_dirty_pages = num_physpages / 2; + obd_max_dirty_pages = totalram_pages / 2; err = obd_init_caches(); if (err) diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c index 46aad6813cab..7b94cb7b58e0 100644 --- a/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c +++ b/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c @@ -202,12 +202,12 @@ int LL_PROC_PROTO(proc_max_dirty_pages_in_mb) 1 << (20 - PAGE_CACHE_SHIFT)); /* Don't allow them to let dirty pages exceed 90% of system * memory and set a hard minimum of 4MB. */ - if (obd_max_dirty_pages > ((num_physpages / 10) * 9)) { + if (obd_max_dirty_pages > ((totalram_pages / 10) * 9)) { CERROR("Refusing to set max dirty pages to %u, which " "is more than 90%% of available RAM; setting " "to %lu\n", obd_max_dirty_pages, - ((num_physpages / 10) * 9)); - obd_max_dirty_pages = ((num_physpages / 10) * 9); + ((totalram_pages / 10) * 9)); + obd_max_dirty_pages = ((totalram_pages / 10) * 9); } else if (obd_max_dirty_pages < 4 << (20 - PAGE_CACHE_SHIFT)) { obd_max_dirty_pages = 4 << (20 - PAGE_CACHE_SHIFT); } diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c index fdf0ed367693..1a37c2c60add 100644 --- a/drivers/staging/lustre/lustre/obdclass/lu_object.c +++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c @@ -849,7 +849,7 @@ static int lu_htable_order(void) * * Size of lu_object is (arbitrary) taken as 1K (together with inode). */ - cache_size = num_physpages; + cache_size = totalram_pages; #if BITS_PER_LONG == 32 /* limit hashtable size for lowmem systems to low RAM */ @@ -1783,7 +1783,6 @@ int lu_env_refill_by_tags(struct lu_env *env, __u32 ctags, } EXPORT_SYMBOL(lu_env_refill_by_tags); -static struct shrinker *lu_site_shrinker = NULL; typedef struct lu_site_stats{ unsigned lss_populated; @@ -1839,61 +1838,68 @@ static void lu_site_stats_get(cfs_hash_t *hs, * objects without taking the lu_sites_guard lock, but this is not * possible in the current implementation. */ -static int lu_cache_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)) +static unsigned long lu_cache_shrink_count(struct shrinker *sk, + struct shrink_control *sc) { lu_site_stats_t stats; struct lu_site *s; struct lu_site *tmp; - int cached = 0; - int remain = shrink_param(sc, nr_to_scan); - LIST_HEAD(splice); - - if (!(shrink_param(sc, gfp_mask) & __GFP_FS)) { - if (remain != 0) - return -1; - else - /* We must not take the lu_sites_guard lock when - * __GFP_FS is *not* set because of the deadlock - * possibility detailed above. Additionally, - * since we cannot determine the number of - * objects in the cache without taking this - * lock, we're in a particularly tough spot. As - * a result, we'll just lie and say our cache is - * empty. This _should_ be ok, as we can't - * reclaim objects when __GFP_FS is *not* set - * anyways. - */ - return 0; - } + unsigned long cached = 0; - CDEBUG(D_INODE, "Shrink %d objects\n", remain); + if (!sc->gfp_mask & __GFP_FS) + return 0; mutex_lock(&lu_sites_guard); list_for_each_entry_safe(s, tmp, &lu_sites, ls_linkage) { - if (shrink_param(sc, nr_to_scan) != 0) { - remain = lu_site_purge(&lu_shrink_env, s, remain); - /* - * Move just shrunk site to the tail of site list to - * assure shrinking fairness. - */ - list_move_tail(&s->ls_linkage, &splice); - } - memset(&stats, 0, sizeof(stats)); lu_site_stats_get(s->ls_obj_hash, &stats, 0); cached += stats.lss_total - stats.lss_busy; - if (shrink_param(sc, nr_to_scan) && remain <= 0) - break; } - list_splice(&splice, lu_sites.prev); mutex_unlock(&lu_sites_guard); cached = (cached / 100) * sysctl_vfs_cache_pressure; - if (shrink_param(sc, nr_to_scan) == 0) - CDEBUG(D_INODE, "%d objects cached\n", cached); + CDEBUG(D_INODE, "%ld objects cached\n", cached); return cached; } +static unsigned long lu_cache_shrink_scan(struct shrinker *sk, + struct shrink_control *sc) +{ + struct lu_site *s; + struct lu_site *tmp; + unsigned long remain = sc->nr_to_scan, freed = 0; + LIST_HEAD(splice); + + if (!sc->gfp_mask & __GFP_FS) + /* We must not take the lu_sites_guard lock when + * __GFP_FS is *not* set because of the deadlock + * possibility detailed above. Additionally, + * since we cannot determine the number of + * objects in the cache without taking this + * lock, we're in a particularly tough spot. As + * a result, we'll just lie and say our cache is + * empty. This _should_ be ok, as we can't + * reclaim objects when __GFP_FS is *not* set + * anyways. + */ + return SHRINK_STOP; + + mutex_lock(&lu_sites_guard); + list_for_each_entry_safe(s, tmp, &lu_sites, ls_linkage) { + freed = lu_site_purge(&lu_shrink_env, s, remain); + remain -= freed; + /* + * Move just shrunk site to the tail of site list to + * assure shrinking fairness. + */ + list_move_tail(&s->ls_linkage, &splice); + } + list_splice(&splice, lu_sites.prev); + mutex_unlock(&lu_sites_guard); + + return sc->nr_to_scan - remain; +} + /* * Debugging stuff. */ @@ -1917,6 +1923,12 @@ int lu_printk_printer(const struct lu_env *env, return 0; } +static struct shrinker lu_site_shrinker = { + .count_objects = lu_cache_shrink_count, + .scan_objects = lu_cache_shrink_scan, + .seeks = DEFAULT_SEEKS, +}; + /** * Initialization of global lu_* data. */ @@ -1951,9 +1963,7 @@ int lu_global_init(void) * inode, one for ea. Unfortunately setting this high value results in * lu_object/inode cache consuming all the memory. */ - lu_site_shrinker = set_shrinker(DEFAULT_SEEKS, lu_cache_shrink); - if (lu_site_shrinker == NULL) - return -ENOMEM; + register_shrinker(&lu_site_shrinker); return result; } @@ -1963,11 +1973,7 @@ int lu_global_init(void) */ void lu_global_fini(void) { - if (lu_site_shrinker != NULL) { - remove_shrinker(lu_site_shrinker); - lu_site_shrinker = NULL; - } - + unregister_shrinker(&lu_site_shrinker); lu_context_key_degister(&lu_global_key); /* diff --git a/drivers/staging/lustre/lustre/osc/lproc_osc.c b/drivers/staging/lustre/lustre/osc/lproc_osc.c index 198cf3ba1374..d2f219877927 100644 --- a/drivers/staging/lustre/lustre/osc/lproc_osc.c +++ b/drivers/staging/lustre/lustre/osc/lproc_osc.c @@ -146,7 +146,7 @@ static ssize_t osc_max_dirty_mb_seq_write(struct file *file, const char *buffer, if (pages_number <= 0 || pages_number > OSC_MAX_DIRTY_MB_MAX << (20 - PAGE_CACHE_SHIFT) || - pages_number > num_physpages / 4) /* 1/4 of RAM */ + pages_number > totalram_pages / 4) /* 1/4 of RAM */ return -ERANGE; client_obd_list_lock(&cli->cl_loi_list_lock); diff --git a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c index 3e7325499d01..2bd0d985bf1d 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c +++ b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c @@ -302,7 +302,7 @@ ptlrpc_lprocfs_req_history_max_seq_write(struct file *file, const char *buffer, * hose a kernel by allowing the request history to grow too * far. */ bufpages = (svc->srv_buf_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (val > num_physpages/(2 * bufpages)) + if (val > totalram_pages / (2 * bufpages)) return -ERANGE; spin_lock(&svc->srv_lock); diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c index bf53f1bc1742..e90c8fb7da6a 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c +++ b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c @@ -121,13 +121,6 @@ static struct ptlrpc_enc_page_pool { } page_pools; /* - * memory shrinker - */ -const int pools_shrinker_seeks = DEFAULT_SEEKS; -static struct shrinker *pools_shrinker = NULL; - - -/* * /proc/fs/lustre/sptlrpc/encrypt_page_pools */ int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v) @@ -156,7 +149,7 @@ int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v) "max waitqueue depth: %u\n" "max wait time: "CFS_TIME_T"/%u\n" , - num_physpages, + totalram_pages, PAGES_PER_POOL, page_pools.epp_max_pages, page_pools.epp_max_pools, @@ -226,30 +219,46 @@ static void enc_pools_release_free_pages(long npages) } /* - * could be called frequently for query (@nr_to_scan == 0). * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool. */ -static int enc_pools_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)) +static unsigned long enc_pools_shrink_count(struct shrinker *s, + struct shrink_control *sc) { - if (unlikely(shrink_param(sc, nr_to_scan) != 0)) { + /* + * if no pool access for a long time, we consider it's fully idle. + * a little race here is fine. + */ + if (unlikely(cfs_time_current_sec() - page_pools.epp_last_access > + CACHE_QUIESCENT_PERIOD)) { spin_lock(&page_pools.epp_lock); - shrink_param(sc, nr_to_scan) = min_t(unsigned long, - shrink_param(sc, nr_to_scan), - page_pools.epp_free_pages - - PTLRPC_MAX_BRW_PAGES); - if (shrink_param(sc, nr_to_scan) > 0) { - enc_pools_release_free_pages(shrink_param(sc, - nr_to_scan)); - CDEBUG(D_SEC, "released %ld pages, %ld left\n", - (long)shrink_param(sc, nr_to_scan), - page_pools.epp_free_pages); - - page_pools.epp_st_shrinks++; - page_pools.epp_last_shrink = cfs_time_current_sec(); - } + page_pools.epp_idle_idx = IDLE_IDX_MAX; spin_unlock(&page_pools.epp_lock); } + LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX); + return max((int)page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES, 0) * + (IDLE_IDX_MAX - page_pools.epp_idle_idx) / IDLE_IDX_MAX; +} + +/* + * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool. + */ +static unsigned long enc_pools_shrink_scan(struct shrinker *s, + struct shrink_control *sc) +{ + spin_lock(&page_pools.epp_lock); + sc->nr_to_scan = min_t(unsigned long, sc->nr_to_scan, + page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES); + if (sc->nr_to_scan > 0) { + enc_pools_release_free_pages(sc->nr_to_scan); + CDEBUG(D_SEC, "released %ld pages, %ld left\n", + (long)sc->nr_to_scan, page_pools.epp_free_pages); + + page_pools.epp_st_shrinks++; + page_pools.epp_last_shrink = cfs_time_current_sec(); + } + spin_unlock(&page_pools.epp_lock); + /* * if no pool access for a long time, we consider it's fully idle. * a little race here is fine. @@ -262,8 +271,7 @@ static int enc_pools_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)) } LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX); - return max((int)page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES, 0) * - (IDLE_IDX_MAX - page_pools.epp_idle_idx) / IDLE_IDX_MAX; + return sc->nr_to_scan; } static inline @@ -699,13 +707,19 @@ static inline void enc_pools_free(void) sizeof(*page_pools.epp_pools)); } +static struct shrinker pools_shrinker = { + .count_objects = enc_pools_shrink_count, + .scan_objects = enc_pools_shrink_scan, + .seeks = DEFAULT_SEEKS, +}; + int sptlrpc_enc_pool_init(void) { /* * maximum capacity is 1/8 of total physical memory. * is the 1/8 a good number? */ - page_pools.epp_max_pages = num_physpages / 8; + page_pools.epp_max_pages = totalram_pages / 8; page_pools.epp_max_pools = npages_to_npools(page_pools.epp_max_pages); init_waitqueue_head(&page_pools.epp_waitq); @@ -736,12 +750,7 @@ int sptlrpc_enc_pool_init(void) if (page_pools.epp_pools == NULL) return -ENOMEM; - pools_shrinker = set_shrinker(pools_shrinker_seeks, - enc_pools_shrink); - if (pools_shrinker == NULL) { - enc_pools_free(); - return -ENOMEM; - } + register_shrinker(&pools_shrinker); return 0; } @@ -750,11 +759,10 @@ void sptlrpc_enc_pool_fini(void) { unsigned long cleaned, npools; - LASSERT(pools_shrinker); LASSERT(page_pools.epp_pools); LASSERT(page_pools.epp_total_pages == page_pools.epp_free_pages); - remove_shrinker(pools_shrinker); + unregister_shrinker(&pools_shrinker); npools = npages_to_npools(page_pools.epp_total_pages); cleaned = enc_pools_cleanup(page_pools.epp_pools, npools); diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c index 810143a6aa33..f36950e4134f 100644 --- a/drivers/thermal/x86_pkg_temp_thermal.c +++ b/drivers/thermal/x86_pkg_temp_thermal.c @@ -599,7 +599,6 @@ static int __init pkg_temp_thermal_init(void) return 0; err_ret: - get_online_cpus(); for_each_online_cpu(i) put_core_offline(i); put_online_cpus(); diff --git a/drivers/video/acornfb.c b/drivers/video/acornfb.c index 6488a7351a60..7e8346ec9cdc 100644 --- a/drivers/video/acornfb.c +++ b/drivers/video/acornfb.c @@ -38,14 +38,6 @@ #include "acornfb.h" /* - * VIDC machines can't do 16 or 32BPP modes. - */ -#ifdef HAS_VIDC -#undef FBCON_HAS_CFB16 -#undef FBCON_HAS_CFB32 -#endif - -/* * Default resolution. * NOTE that it has to be supported in the table towards * the end of this file. @@ -106,238 +98,6 @@ static struct vidc_timing current_vidc; extern unsigned int vram_size; /* set by setup.c */ -#ifdef HAS_VIDC - -#define MAX_SIZE 480*1024 - -/* CTL VIDC Actual - * 24.000 0 8.000 - * 25.175 0 8.392 - * 36.000 0 12.000 - * 24.000 1 12.000 - * 25.175 1 12.588 - * 24.000 2 16.000 - * 25.175 2 16.783 - * 36.000 1 18.000 - * 24.000 3 24.000 - * 36.000 2 24.000 - * 25.175 3 25.175 - * 36.000 3 36.000 - */ -struct pixclock { - u_long min_clock; - u_long max_clock; - u_int vidc_ctl; - u_int vid_ctl; -}; - -static struct pixclock arc_clocks[] = { - /* we allow +/-1% on these */ - { 123750, 126250, VIDC_CTRL_DIV3, VID_CTL_24MHz }, /* 8.000MHz */ - { 82500, 84167, VIDC_CTRL_DIV2, VID_CTL_24MHz }, /* 12.000MHz */ - { 61875, 63125, VIDC_CTRL_DIV1_5, VID_CTL_24MHz }, /* 16.000MHz */ - { 41250, 42083, VIDC_CTRL_DIV1, VID_CTL_24MHz }, /* 24.000MHz */ -}; - -static struct pixclock * -acornfb_valid_pixrate(struct fb_var_screeninfo *var) -{ - u_long pixclock = var->pixclock; - u_int i; - - if (!var->pixclock) - return NULL; - - for (i = 0; i < ARRAY_SIZE(arc_clocks); i++) - if (pixclock > arc_clocks[i].min_clock && - pixclock < arc_clocks[i].max_clock) - return arc_clocks + i; - - return NULL; -} - -/* VIDC Rules: - * hcr : must be even (interlace, hcr/2 must be even) - * hswr : must be even - * hdsr : must be odd - * hder : must be odd - * - * vcr : must be odd - * vswr : >= 1 - * vdsr : >= 1 - * vder : >= vdsr - * if interlaced, then hcr/2 must be even - */ -static void -acornfb_set_timing(struct fb_var_screeninfo *var) -{ - struct pixclock *pclk; - struct vidc_timing vidc; - u_int horiz_correction; - u_int sync_len, display_start, display_end, cycle; - u_int is_interlaced; - u_int vid_ctl, vidc_ctl; - u_int bandwidth; - - memset(&vidc, 0, sizeof(vidc)); - - pclk = acornfb_valid_pixrate(var); - vidc_ctl = pclk->vidc_ctl; - vid_ctl = pclk->vid_ctl; - - bandwidth = var->pixclock * 8 / var->bits_per_pixel; - /* 25.175, 4bpp = 79.444ns per byte, 317.776ns per word: fifo = 2,6 */ - if (bandwidth > 143500) - vidc_ctl |= VIDC_CTRL_FIFO_3_7; - else if (bandwidth > 71750) - vidc_ctl |= VIDC_CTRL_FIFO_2_6; - else if (bandwidth > 35875) - vidc_ctl |= VIDC_CTRL_FIFO_1_5; - else - vidc_ctl |= VIDC_CTRL_FIFO_0_4; - - switch (var->bits_per_pixel) { - case 1: - horiz_correction = 19; - vidc_ctl |= VIDC_CTRL_1BPP; - break; - - case 2: - horiz_correction = 11; - vidc_ctl |= VIDC_CTRL_2BPP; - break; - - case 4: - horiz_correction = 7; - vidc_ctl |= VIDC_CTRL_4BPP; - break; - - default: - case 8: - horiz_correction = 5; - vidc_ctl |= VIDC_CTRL_8BPP; - break; - } - - if (var->sync & FB_SYNC_COMP_HIGH_ACT) /* should be FB_SYNC_COMP */ - vidc_ctl |= VIDC_CTRL_CSYNC; - else { - if (!(var->sync & FB_SYNC_HOR_HIGH_ACT)) - vid_ctl |= VID_CTL_HS_NHSYNC; - - if (!(var->sync & FB_SYNC_VERT_HIGH_ACT)) - vid_ctl |= VID_CTL_VS_NVSYNC; - } - - sync_len = var->hsync_len; - display_start = sync_len + var->left_margin; - display_end = display_start + var->xres; - cycle = display_end + var->right_margin; - - /* if interlaced, then hcr/2 must be even */ - is_interlaced = (var->vmode & FB_VMODE_MASK) == FB_VMODE_INTERLACED; - - if (is_interlaced) { - vidc_ctl |= VIDC_CTRL_INTERLACE; - if (cycle & 2) { - cycle += 2; - var->right_margin += 2; - } - } - - vidc.h_cycle = (cycle - 2) / 2; - vidc.h_sync_width = (sync_len - 2) / 2; - vidc.h_border_start = (display_start - 1) / 2; - vidc.h_display_start = (display_start - horiz_correction) / 2; - vidc.h_display_end = (display_end - horiz_correction) / 2; - vidc.h_border_end = (display_end - 1) / 2; - vidc.h_interlace = (vidc.h_cycle + 1) / 2; - - sync_len = var->vsync_len; - display_start = sync_len + var->upper_margin; - display_end = display_start + var->yres; - cycle = display_end + var->lower_margin; - - if (is_interlaced) - cycle = (cycle - 3) / 2; - else - cycle = cycle - 1; - - vidc.v_cycle = cycle; - vidc.v_sync_width = sync_len - 1; - vidc.v_border_start = display_start - 1; - vidc.v_display_start = vidc.v_border_start; - vidc.v_display_end = display_end - 1; - vidc.v_border_end = vidc.v_display_end; - - if (machine_is_a5k()) - __raw_writeb(vid_ctl, IOEB_VID_CTL); - - if (memcmp(¤t_vidc, &vidc, sizeof(vidc))) { - current_vidc = vidc; - - vidc_writel(0xe0000000 | vidc_ctl); - vidc_writel(0x80000000 | (vidc.h_cycle << 14)); - vidc_writel(0x84000000 | (vidc.h_sync_width << 14)); - vidc_writel(0x88000000 | (vidc.h_border_start << 14)); - vidc_writel(0x8c000000 | (vidc.h_display_start << 14)); - vidc_writel(0x90000000 | (vidc.h_display_end << 14)); - vidc_writel(0x94000000 | (vidc.h_border_end << 14)); - vidc_writel(0x98000000); - vidc_writel(0x9c000000 | (vidc.h_interlace << 14)); - vidc_writel(0xa0000000 | (vidc.v_cycle << 14)); - vidc_writel(0xa4000000 | (vidc.v_sync_width << 14)); - vidc_writel(0xa8000000 | (vidc.v_border_start << 14)); - vidc_writel(0xac000000 | (vidc.v_display_start << 14)); - vidc_writel(0xb0000000 | (vidc.v_display_end << 14)); - vidc_writel(0xb4000000 | (vidc.v_border_end << 14)); - vidc_writel(0xb8000000); - vidc_writel(0xbc000000); - } -#ifdef DEBUG_MODE_SELECTION - printk(KERN_DEBUG "VIDC registers for %dx%dx%d:\n", var->xres, - var->yres, var->bits_per_pixel); - printk(KERN_DEBUG " H-cycle : %d\n", vidc.h_cycle); - printk(KERN_DEBUG " H-sync-width : %d\n", vidc.h_sync_width); - printk(KERN_DEBUG " H-border-start : %d\n", vidc.h_border_start); - printk(KERN_DEBUG " H-display-start : %d\n", vidc.h_display_start); - printk(KERN_DEBUG " H-display-end : %d\n", vidc.h_display_end); - printk(KERN_DEBUG " H-border-end : %d\n", vidc.h_border_end); - printk(KERN_DEBUG " H-interlace : %d\n", vidc.h_interlace); - printk(KERN_DEBUG " V-cycle : %d\n", vidc.v_cycle); - printk(KERN_DEBUG " V-sync-width : %d\n", vidc.v_sync_width); - printk(KERN_DEBUG " V-border-start : %d\n", vidc.v_border_start); - printk(KERN_DEBUG " V-display-start : %d\n", vidc.v_display_start); - printk(KERN_DEBUG " V-display-end : %d\n", vidc.v_display_end); - printk(KERN_DEBUG " V-border-end : %d\n", vidc.v_border_end); - printk(KERN_DEBUG " VIDC Ctrl (E) : 0x%08X\n", vidc_ctl); - printk(KERN_DEBUG " IOEB Ctrl : 0x%08X\n", vid_ctl); -#endif -} - -static int -acornfb_setcolreg(u_int regno, u_int red, u_int green, u_int blue, - u_int trans, struct fb_info *info) -{ - union palette pal; - - if (regno >= current_par.palette_size) - return 1; - - pal.p = 0; - pal.vidc.reg = regno; - pal.vidc.red = red >> 12; - pal.vidc.green = green >> 12; - pal.vidc.blue = blue >> 12; - - current_par.palette[regno] = pal; - - vidc_writel(pal.p); - - return 0; -} -#endif - #ifdef HAS_VIDC20 #include <mach/acornfb.h> @@ -634,16 +394,7 @@ acornfb_adjust_timing(struct fb_info *info, struct fb_var_screeninfo *var, u_int /* hsync_len must be even */ var->hsync_len = (var->hsync_len + 1) & ~1; -#ifdef HAS_VIDC - /* left_margin must be odd */ - if ((var->left_margin & 1) == 0) { - var->left_margin -= 1; - var->right_margin += 1; - } - - /* right_margin must be odd */ - var->right_margin |= 1; -#elif defined(HAS_VIDC20) +#if defined(HAS_VIDC20) /* left_margin must be even */ if (var->left_margin & 1) { var->left_margin += 1; @@ -787,11 +538,7 @@ static int acornfb_set_par(struct fb_info *info) break; case 8: current_par.palette_size = VIDC_PALETTE_SIZE; -#ifdef HAS_VIDC - info->fix.visual = FB_VISUAL_STATIC_PSEUDOCOLOR; -#else info->fix.visual = FB_VISUAL_PSEUDOCOLOR; -#endif break; #ifdef HAS_VIDC20 case 16: @@ -971,9 +718,6 @@ static void acornfb_init_fbinfo(void) #if defined(HAS_VIDC20) fb_info.var.red.length = 8; fb_info.var.transp.length = 4; -#elif defined(HAS_VIDC) - fb_info.var.red.length = 4; - fb_info.var.transp.length = 1; #endif fb_info.var.green = fb_info.var.red; fb_info.var.blue = fb_info.var.red; @@ -1310,14 +1054,6 @@ static int acornfb_probe(struct platform_device *dev) fb_info.fix.smem_start = handle; } #endif -#if defined(HAS_VIDC) - /* - * Archimedes/A5000 machines use a fixed address for their - * framebuffers. Free unused pages - */ - free_unused_pages(PAGE_OFFSET + size, PAGE_OFFSET + MAX_SIZE); -#endif - fb_info.fix.smem_len = size; current_par.palette_size = VIDC_PALETTE_SIZE; diff --git a/drivers/video/acornfb.h b/drivers/video/acornfb.h index fb2a7fffe506..175c8ff3367c 100644 --- a/drivers/video/acornfb.h +++ b/drivers/video/acornfb.h @@ -13,10 +13,6 @@ #include <asm/hardware/iomd.h> #define VIDC_PALETTE_SIZE 256 #define VIDC_NAME "VIDC20" -#elif defined(HAS_VIDC) -#include <asm/hardware/memc.h> -#define VIDC_PALETTE_SIZE 16 -#define VIDC_NAME "VIDC" #endif #define EXTEND8(x) ((x)|(x)<<8) @@ -101,31 +97,6 @@ struct modex_params { const struct modey_params *modey; }; -#ifdef HAS_VIDC - -#define VID_CTL_VS_NVSYNC (1 << 3) -#define VID_CTL_HS_NHSYNC (1 << 2) -#define VID_CTL_24MHz (0) -#define VID_CTL_25MHz (1) -#define VID_CTL_36MHz (2) - -#define VIDC_CTRL_CSYNC (1 << 7) -#define VIDC_CTRL_INTERLACE (1 << 6) -#define VIDC_CTRL_FIFO_0_4 (0 << 4) -#define VIDC_CTRL_FIFO_1_5 (1 << 4) -#define VIDC_CTRL_FIFO_2_6 (2 << 4) -#define VIDC_CTRL_FIFO_3_7 (3 << 4) -#define VIDC_CTRL_1BPP (0 << 2) -#define VIDC_CTRL_2BPP (1 << 2) -#define VIDC_CTRL_4BPP (2 << 2) -#define VIDC_CTRL_8BPP (3 << 2) -#define VIDC_CTRL_DIV3 (0 << 0) -#define VIDC_CTRL_DIV2 (1 << 0) -#define VIDC_CTRL_DIV1_5 (2 << 0) -#define VIDC_CTRL_DIV1 (3 << 0) - -#endif - #ifdef HAS_VIDC20 /* * VIDC20 registers diff --git a/drivers/video/cyber2000fb.c b/drivers/video/cyber2000fb.c index 57886787ead0..e78d9f2233b8 100644 --- a/drivers/video/cyber2000fb.c +++ b/drivers/video/cyber2000fb.c @@ -518,6 +518,9 @@ static void cyber2000fb_set_timing(struct cfb_info *cfb, struct par_info *hw) cyber2000_grphw(0xb9, 0x00, cfb); spin_unlock(&cfb->reg_b0_lock); + /* wait (for the PLL?) to avoid palette corruption at higher clocks */ + msleep(1000); + cfb->ramdac_ctrl = hw->ramdac; cyber2000fb_write_ramdac_ctrl(cfb); diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 3f1128b37e46..16d3288c808d 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -104,7 +104,7 @@ struct autofs_sb_info { u32 magic; int pipefd; struct file *pipe; - pid_t oz_pgrp; + struct pid *oz_pgrp; int catatonic; int version; int sub_version; @@ -139,7 +139,7 @@ static inline struct autofs_info *autofs4_dentry_ino(struct dentry *dentry) filesystem without "magic".) */ static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) { - return sbi->catatonic || task_pgrp_nr(current) == sbi->oz_pgrp; + return sbi->catatonic || task_pgrp(current) == sbi->oz_pgrp; } /* Does a dentry have some pending activity? */ diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 743c7c2c949d..91838211b66d 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -346,6 +346,7 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp, { int pipefd; int err = 0; + struct pid *new_pid = NULL; if (param->setpipefd.pipefd == -1) return -EINVAL; @@ -357,7 +358,17 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp, mutex_unlock(&sbi->wq_mutex); return -EBUSY; } else { - struct file *pipe = fget(pipefd); + struct file *pipe; + + new_pid = get_task_pid(current, PIDTYPE_PGID); + + if (ns_of_pid(new_pid) != ns_of_pid(sbi->oz_pgrp)) { + AUTOFS_WARN("Not allowed to change PID namespace"); + err = -EINVAL; + goto out; + } + + pipe = fget(pipefd); if (!pipe) { err = -EBADF; goto out; @@ -367,12 +378,13 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp, fput(pipe); goto out; } - sbi->oz_pgrp = task_pgrp_nr(current); + swap(sbi->oz_pgrp, new_pid); sbi->pipefd = pipefd; sbi->pipe = pipe; sbi->catatonic = 0; } out: + put_pid(new_pid); mutex_unlock(&sbi->wq_mutex); return err; } diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index b104726e2d0a..1b045ecfcea2 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -62,6 +62,8 @@ void autofs4_kill_sb(struct super_block *sb) /* Free wait queues, close pipe */ autofs4_catatonic_mode(sbi); + put_pid(sbi->oz_pgrp); + sb->s_fs_info = NULL; kfree(sbi); @@ -85,7 +87,7 @@ static int autofs4_show_options(struct seq_file *m, struct dentry *root) if (!gid_eq(root_inode->i_gid, GLOBAL_ROOT_GID)) seq_printf(m, ",gid=%u", from_kgid_munged(&init_user_ns, root_inode->i_gid)); - seq_printf(m, ",pgrp=%d", sbi->oz_pgrp); + seq_printf(m, ",pgrp=%d", pid_vnr(sbi->oz_pgrp)); seq_printf(m, ",timeout=%lu", sbi->exp_timeout/HZ); seq_printf(m, ",minproto=%d", sbi->min_proto); seq_printf(m, ",maxproto=%d", sbi->max_proto); @@ -129,7 +131,8 @@ static const match_table_t tokens = { }; static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid, - pid_t *pgrp, unsigned int *type, int *minproto, int *maxproto) + int *pgrp, bool *pgrp_set, unsigned int *type, + int *minproto, int *maxproto) { char *p; substring_t args[MAX_OPT_ARGS]; @@ -137,7 +140,6 @@ static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid, *uid = current_uid(); *gid = current_gid(); - *pgrp = task_pgrp_nr(current); *minproto = AUTOFS_MIN_PROTO_VERSION; *maxproto = AUTOFS_MAX_PROTO_VERSION; @@ -176,6 +178,7 @@ static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid, if (match_int(args, &option)) return 1; *pgrp = option; + *pgrp_set = true; break; case Opt_minproto: if (match_int(args, &option)) @@ -211,6 +214,8 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) int pipefd; struct autofs_sb_info *sbi; struct autofs_info *ino; + int pgrp; + bool pgrp_set = false; sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) @@ -223,7 +228,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) sbi->pipe = NULL; sbi->catatonic = 1; sbi->exp_timeout = 0; - sbi->oz_pgrp = task_pgrp_nr(current); + sbi->oz_pgrp = NULL; sbi->sb = s; sbi->version = 0; sbi->sub_version = 0; @@ -260,12 +265,23 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) /* Can this call block? */ if (parse_options(data, &pipefd, &root_inode->i_uid, &root_inode->i_gid, - &sbi->oz_pgrp, &sbi->type, &sbi->min_proto, - &sbi->max_proto)) { + &pgrp, &pgrp_set, &sbi->type, &sbi->min_proto, + &sbi->max_proto)) { printk("autofs: called with bogus options\n"); goto fail_dput; } + if (pgrp_set) { + sbi->oz_pgrp = find_get_pid(pgrp); + if (!sbi->oz_pgrp) { + pr_warn("autofs: could not find process group %d\n", + pgrp); + goto fail_dput; + } + } else { + sbi->oz_pgrp = get_task_pid(current, PIDTYPE_PGID); + } + if (autofs_type_trigger(sbi->type)) __managed_dentry_set_managed(root); @@ -289,9 +305,9 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) sbi->version = sbi->max_proto; sbi->sub_version = AUTOFS_PROTO_SUBVERSION; - DPRINTK("pipe fd = %d, pgrp = %u", pipefd, sbi->oz_pgrp); + DPRINTK("pipe fd = %d, pgrp = %u", pipefd, pid_nr(sbi->oz_pgrp)); pipe = fget(pipefd); - + if (!pipe) { printk("autofs: could not open pipe file descriptor\n"); goto fail_dput; @@ -321,6 +337,7 @@ fail_dput: fail_ino: kfree(ino); fail_free: + put_pid(sbi->oz_pgrp); kfree(sbi); s->s_fs_info = NULL; fail_unlock: diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 3db70dae40d3..309ca6bcbb09 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -353,11 +353,23 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, struct qstr qstr; char *name; int status, ret, type; + pid_t pid; + pid_t tgid; /* In catatonic mode, we don't wait for nobody */ if (sbi->catatonic) return -ENOENT; + /* + * Try translating pids to the namespace of the daemon. + * + * Zero means failure: we are in an unrelated pid namespace. + */ + pid = task_pid_nr_ns(current, ns_of_pid(sbi->oz_pgrp)); + tgid = task_tgid_nr_ns(current, ns_of_pid(sbi->oz_pgrp)); + if (pid == 0 || tgid == 0) + return -ENOENT; + if (!dentry->d_inode) { /* * A wait for a negative dentry is invalid for certain @@ -423,8 +435,8 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, wq->ino = autofs4_get_ino(sbi); wq->uid = current_uid(); wq->gid = current_gid(); - wq->pid = current->pid; - wq->tgid = current->tgid; + wq->pid = pid; + wq->tgid = tgid; wq->status = -EINTR; /* Status return if interrupted */ wq->wait_ctr = 2; mutex_unlock(&sbi->wq_mutex); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 7d863a4de5a5..dc8227940b64 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -140,6 +140,25 @@ static int padzero(unsigned long elf_bss) #define ELF_BASE_PLATFORM NULL #endif +/* + * Use get_random_int() to implement AT_RANDOM while avoiding depletion + * of the entropy pool. + */ +static void get_atrandom_bytes(unsigned char *buf, size_t nbytes) +{ + unsigned char *p = buf; + + while (nbytes) { + unsigned int random_variable; + size_t chunk = min(nbytes, sizeof(random_variable)); + + random_variable = get_random_int(); + memcpy(p, &random_variable, chunk); + p += chunk; + nbytes -= chunk; + } +} + static int create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, unsigned long load_addr, unsigned long interp_load_addr) @@ -201,7 +220,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, /* * Generate 16 random bytes for userspace PRNG seeding. */ - get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes)); + get_atrandom_bytes(k_rand_bytes, sizeof(k_rand_bytes)); u_rand_bytes = (elf_addr_t __user *) STACK_ALLOC(p, sizeof(k_rand_bytes)); if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes))) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 9ad17b15b454..d5e7de07901f 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1966,23 +1966,23 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, size_t, sigsetsize) { int error; - sigset_t ksigmask, sigsaved; - /* * If the caller wants a certain signal mask to be set during the wait, * we apply it here. */ if (sigmask) { + sigset_t ksigmask; + if (sigsetsize != sizeof(sigset_t)) return -EINVAL; if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) return -EFAULT; - sigsaved = current->blocked; + + current->saved_sigmask = current->blocked; set_current_blocked(&ksigmask); } error = sys_epoll_wait(epfd, events, maxevents, timeout); - /* * If we changed the signal mask, we need to restore the original one. * In case we've got a signal while waiting, we do not restore the @@ -1990,12 +1990,10 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, * the way back to userspace, before the signal mask is restored. */ if (sigmask) { - if (error == -EINTR) { - memcpy(¤t->saved_sigmask, &sigsaved, - sizeof(sigsaved)); + if (error == -EINTR) set_restore_sigmask(); - } else - set_current_blocked(&sigsaved); + else + __set_current_blocked(¤t->saved_sigmask); } return error; @@ -2009,25 +2007,25 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, compat_size_t, sigsetsize) { long err; - compat_sigset_t csigmask; - sigset_t ksigmask, sigsaved; - /* * If the caller wants a certain signal mask to be set during the wait, * we apply it here. */ if (sigmask) { + compat_sigset_t csigmask; + sigset_t ksigmask; + if (sigsetsize != sizeof(compat_sigset_t)) return -EINVAL; if (copy_from_user(&csigmask, sigmask, sizeof(csigmask))) return -EFAULT; sigset_from_compat(&ksigmask, &csigmask); - sigsaved = current->blocked; + + current->saved_sigmask = current->blocked; set_current_blocked(&ksigmask); } err = sys_epoll_wait(epfd, events, maxevents, timeout); - /* * If we changed the signal mask, we need to restore the original one. * In case we've got a signal while waiting, we do not restore the @@ -2035,12 +2033,10 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, * the way back to userspace, before the signal mask is restored. */ if (sigmask) { - if (err == -EINTR) { - memcpy(¤t->saved_sigmask, &sigsaved, - sizeof(sigsaved)); + if (err == -EINTR) set_restore_sigmask(); - } else - set_current_blocked(&sigsaved); + else + __set_current_blocked(¤t->saved_sigmask); } return err; diff --git a/fs/fat/file.c b/fs/fat/file.c index 9b104f543056..00b5810212da 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -17,8 +17,11 @@ #include <linux/blkdev.h> #include <linux/fsnotify.h> #include <linux/security.h> +#include <linux/falloc.h> #include "fat.h" +static long fat_fallocate(struct file *file, int mode, + loff_t offset, loff_t len); static int fat_ioctl_get_attributes(struct inode *inode, u32 __user *user_attr) { u32 attr; @@ -148,6 +151,22 @@ static long fat_generic_compat_ioctl(struct file *filp, unsigned int cmd, static int fat_file_release(struct inode *inode, struct file *filp) { + + struct super_block *sb = inode->i_sb; + loff_t mmu_private_ideal; + + /* + * Release unwritten fallocated blocks on file release. + * Do this only when the last open file descriptor is closed. + */ + mutex_lock(&inode->i_mutex); + mmu_private_ideal = round_up(inode->i_size, sb->s_blocksize); + + if (mmu_private_ideal < MSDOS_I(inode)->mmu_private && + filp->f_dentry->d_count == 1) + fat_truncate_blocks(inode, inode->i_size); + mutex_unlock(&inode->i_mutex); + if ((filp->f_mode & FMODE_WRITE) && MSDOS_SB(inode->i_sb)->options.flush) { fat_flush_inodes(inode->i_sb, inode, NULL); @@ -182,6 +201,7 @@ const struct file_operations fat_file_operations = { #endif .fsync = fat_file_fsync, .splice_read = generic_file_splice_read, + .fallocate = fat_fallocate, }; static int fat_cont_expand(struct inode *inode, loff_t size) @@ -220,6 +240,88 @@ out: return err; } +/* + * Preallocate space for a file. This implements fat's fallocate file + * operation, which gets called from sys_fallocate system call. User + * space requests len bytes at offset. If FALLOC_FL_KEEP_SIZE is set + * we just allocate clusters without zeroing them out. Otherwise we + * allocate and zero out clusters via an expanding truncate. The + * allocated clusters are freed in fat_file_release(). + */ +static long fat_fallocate(struct file *file, int mode, + loff_t offset, loff_t len) +{ + int cluster, fclus, dclus; + int nr_cluster; /* Number of clusters to be allocated */ + loff_t nr_bytes; /* Number of bytes to be allocated*/ + loff_t free_bytes; /* Unused bytes in the last cluster of file*/ + struct inode *inode = file->f_mapping->host; + struct super_block *sb = inode->i_sb; + struct msdos_sb_info *sbi = MSDOS_SB(sb); + int err = 0; + + /* No support for hole punch or other fallocate flags. */ + if (mode & ~FALLOC_FL_KEEP_SIZE) + return -EOPNOTSUPP; + + mutex_lock(&inode->i_mutex); + if ((offset + len) <= MSDOS_I(inode)->mmu_private) { + fat_msg(sb, KERN_ERR, + "fat_fallocate(): Blocks already allocated"); + err = -EINVAL; + goto error; + } + + if (mode & FALLOC_FL_KEEP_SIZE) { + /* First compute the number of clusters to be allocated */ + if (inode->i_size > 0) { + err = fat_get_cluster(inode, FAT_ENT_EOF, + &fclus, &dclus); + if (err < 0) { + fat_msg(sb, KERN_ERR, + "fat_fallocate(): fat_get_cluster() error"); + goto error; + } + free_bytes = ((fclus + 1) << sbi->cluster_bits) - + inode->i_size; + nr_bytes = offset + len - inode->i_size - free_bytes; + MSDOS_I(inode)->mmu_private = (fclus + 1) << + sbi->cluster_bits; + } else + nr_bytes = offset + len - inode->i_size; + + nr_cluster = (nr_bytes + (sbi->cluster_size - 1)) >> + sbi->cluster_bits; + + /* Start the allocation.We are not zeroing out the clusters */ + while (nr_cluster-- > 0) { + err = fat_alloc_clusters(inode, &cluster, 1); + if (err) { + fat_msg(sb, KERN_ERR, + "fat_fallocate(): fat_alloc_clusters() error"); + goto error; + } + err = fat_chain_add(inode, cluster, 1); + if (err) { + fat_free_clusters(inode, cluster); + goto error; + } + MSDOS_I(inode)->mmu_private += sbi->cluster_size; + } + } else { + /* This is just an expanding truncate */ + err = fat_cont_expand(inode, (offset + len)); + if (err) { + fat_msg(sb, KERN_ERR, + "fat_fallocate(): fat_cont_expand() error"); + } + } + +error: + mutex_unlock(&inode->i_mutex); + return err; +} + /* Free all clusters after the skip'th cluster. */ static int fat_free(struct inode *inode, int skip) { @@ -386,6 +488,9 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) struct inode *inode = dentry->d_inode; unsigned int ia_valid; int error; + loff_t mmu_private_ideal; + + mmu_private_ideal = round_up(inode->i_size, dentry->d_sb->s_blocksize); /* Check for setting the inode time. */ ia_valid = attr->ia_valid; @@ -411,7 +516,8 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) if (attr->ia_valid & ATTR_SIZE) { inode_dio_wait(inode); - if (attr->ia_size > inode->i_size) { + if (attr->ia_size > inode->i_size && + MSDOS_I(inode)->mmu_private <= mmu_private_ideal) { error = fat_cont_expand(inode, attr->ia_size); if (error || attr->ia_valid == ATTR_SIZE) goto out; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 11b51bb55b42..fc6b37d66533 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -152,11 +152,65 @@ static void fat_write_failed(struct address_space *mapping, loff_t to) } } +static int fat_zero_falloc_area(struct file *file, + struct address_space *mapping, loff_t pos) +{ + struct page *page; + struct inode *inode = mapping->host; + loff_t curpos = i_size_read(inode); + size_t count = pos - curpos; + int err; + + do { + unsigned offset; + size_t bytes; + void *fsdata; + + offset = (curpos & (PAGE_CACHE_SIZE - 1)); + bytes = PAGE_CACHE_SIZE - offset; + bytes = min(bytes, count); + + err = pagecache_write_begin(NULL, mapping, curpos, bytes, + AOP_FLAG_UNINTERRUPTIBLE, + &page, &fsdata); + if (err) + break; + + zero_user(page, offset, bytes); + + err = pagecache_write_end(NULL, mapping, curpos, bytes, bytes, + page, fsdata); + if (err < 0) + break; + curpos += bytes; + count -= bytes; + err = 0; + } while (count); + + return err; +} + static int fat_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { int err; + loff_t mmu_private_ideal, mmu_private_actual; + loff_t size; + struct inode *inode = mapping->host; + struct super_block *sb = inode->i_sb; + + size = i_size_read(inode); + mmu_private_actual = MSDOS_I(inode)->mmu_private; + mmu_private_ideal = round_up(size, sb->s_blocksize); + if ((mmu_private_actual > mmu_private_ideal) && (pos > size)) { + err = fat_zero_falloc_area(file, mapping, pos); + if (err) { + fat_msg(sb, KERN_ERR, + "Error (%d) zeroing fallocated area", err); + return err; + } + } *pagep = NULL; err = cont_write_begin(file, mapping, pos, len, flags, diff --git a/fs/file_table.c b/fs/file_table.c index b44e4c559786..7528930dab1c 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -311,8 +311,7 @@ void fput(struct file *file) return; /* * After this task has run exit_task_work(), - * task_work_add() will fail. free_ipc_ns()-> - * shm_destroy() can do this. Fall through to delayed + * task_work_add() will fail. Fall through to delayed * fput to avoid leaking *file. */ } diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 17e6bdde96c5..2fc6779655e6 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -7126,7 +7126,7 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh, if (end > i_size_read(inode)) end = i_size_read(inode); - BUG_ON(start >= end); + BUG_ON(start > end); if (!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) || !(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL) || diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 41000f223ca4..b0415c2a887b 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -474,11 +474,6 @@ static int ocfs2_truncate_file(struct inode *inode, goto bail; } - /* lets handle the simple truncate cases before doing any more - * cluster locking. */ - if (new_i_size == le64_to_cpu(fe->i_size)) - goto bail; - down_write(&OCFS2_I(inode)->ip_alloc_sem); ocfs2_resv_discard(&osb->osb_la_resmap, @@ -723,7 +718,8 @@ leave: * While a write will already be ordering the data, a truncate will not. * Thus, we need to explicitly order the zeroed pages. */ -static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode) +static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode, + struct buffer_head *di_bh) { struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); handle_t *handle = NULL; @@ -740,7 +736,14 @@ static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode) } ret = ocfs2_jbd2_file_inode(handle, inode); - if (ret < 0) + if (ret < 0) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) mlog_errno(ret); out: @@ -756,7 +759,7 @@ out: * to be too fragile to do exactly what we need without us having to * worry about recursive locking in ->write_begin() and ->write_end(). */ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, - u64 abs_to) + u64 abs_to, struct buffer_head *di_bh) { struct address_space *mapping = inode->i_mapping; struct page *page; @@ -764,6 +767,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, handle_t *handle = NULL; int ret = 0; unsigned zero_from, zero_to, block_start, block_end; + struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; BUG_ON(abs_from >= abs_to); BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT)); @@ -806,7 +810,8 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, } if (!handle) { - handle = ocfs2_zero_start_ordered_transaction(inode); + handle = ocfs2_zero_start_ordered_transaction(inode, + di_bh); if (IS_ERR(handle)) { ret = PTR_ERR(handle); handle = NULL; @@ -823,8 +828,22 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, ret = 0; } - if (handle) + if (handle) { + /* + * fs-writeback will release the dirty pages without page lock + * whose offset are over inode size, the release happens at + * block_write_full_page_endio(). + */ + i_size_write(inode, abs_to); + inode->i_blocks = ocfs2_inode_sector_count(inode); + di->i_size = cpu_to_le64((u64)i_size_read(inode)); + inode->i_mtime = inode->i_ctime = CURRENT_TIME; + di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); + di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); + di->i_mtime_nsec = di->i_ctime_nsec; + ocfs2_journal_dirty(handle, di_bh); ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); + } out_unlock: unlock_page(page); @@ -920,7 +939,7 @@ out: * has made sure that the entire range needs zeroing. */ static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start, - u64 range_end) + u64 range_end, struct buffer_head *di_bh) { int rc = 0; u64 next_pos; @@ -936,7 +955,7 @@ static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start, next_pos = (zero_pos & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE; if (next_pos > range_end) next_pos = range_end; - rc = ocfs2_write_zero_page(inode, zero_pos, next_pos); + rc = ocfs2_write_zero_page(inode, zero_pos, next_pos, di_bh); if (rc < 0) { mlog_errno(rc); break; @@ -982,7 +1001,7 @@ int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh, range_end = zero_to_size; ret = ocfs2_zero_extend_range(inode, range_start, - range_end); + range_end, di_bh); if (ret) { mlog_errno(ret); break; @@ -1150,14 +1169,14 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) goto bail_unlock_rw; } - if (size_change && attr->ia_size != i_size_read(inode)) { + if (size_change) { status = inode_newsize_ok(inode, attr->ia_size); if (status) goto bail_unlock; inode_dio_wait(inode); - if (i_size_read(inode) > attr->ia_size) { + if (i_size_read(inode) >= attr->ia_size) { if (ocfs2_should_order_data(inode)) { status = ocfs2_begin_ordered_truncate(inode, attr->ia_size); @@ -2626,7 +2645,16 @@ static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int whence) case SEEK_SET: break; case SEEK_END: - offset += inode->i_size; + /* SEEK_END requires the OCFS2 inode lock for the file + * because it references the file's size. + */ + ret = ocfs2_inode_lock(inode, NULL, 0); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + offset += i_size_read(inode); + ocfs2_inode_unlock(inode, 0); break; case SEEK_CUR: if (offset == 0) { diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index be3f8676a438..cec861d9d480 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -2101,17 +2101,17 @@ int ocfs2_orphan_del(struct ocfs2_super *osb, goto leave; } - /* remove it from the orphan directory */ - status = ocfs2_delete_entry(handle, orphan_dir_inode, &lookup); + status = ocfs2_journal_access_di(handle, + INODE_CACHE(orphan_dir_inode), + orphan_dir_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto leave; } - status = ocfs2_journal_access_di(handle, - INODE_CACHE(orphan_dir_inode), - orphan_dir_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + /* remove it from the orphan directory */ + status = ocfs2_delete_entry(handle, orphan_dir_inode, &lookup); if (status < 0) { mlog_errno(status); goto leave; diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 998b17eda09d..9f6b96a09615 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -2965,6 +2965,11 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle, to = map_end & (PAGE_CACHE_SIZE - 1); page = find_or_create_page(mapping, page_index, GFP_NOFS); + if (!page) { + ret = -ENOMEM; + mlog_errno(ret); + break; + } /* * In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 5aa847a603c0..59d85d608898 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -132,13 +132,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) K(i.freeswap), K(global_page_state(NR_FILE_DIRTY)), K(global_page_state(NR_WRITEBACK)), -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - K(global_page_state(NR_ANON_PAGES) - + global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) * - HPAGE_PMD_NR), -#else K(global_page_state(NR_ANON_PAGES)), -#endif K(global_page_state(NR_FILE_MAPPED)), K(global_page_state(NR_SHMEM)), K(global_page_state(NR_SLAB_RECLAIMABLE) + diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 05bcc0903766..ecc8e01f492a 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -94,6 +94,11 @@ */ #define in_nmi() (preempt_count() & NMI_MASK) +/* + * Are we in nmi,irq context, or softirq context? + */ +#define in_serving_irq() (in_nmi() || in_irq() || in_serving_softirq()) + #if defined(CONFIG_PREEMPT_COUNT) # define PREEMPT_CHECK_OFFSET 1 #else diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index b60de92e2edc..3935428c57cf 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -96,9 +96,6 @@ extern int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma, unsigned long addr, unsigned long end); -extern int handle_pte_fault(struct mm_struct *mm, - struct vm_area_struct *vma, unsigned long address, - pte_t *pte, pmd_t *pmd, unsigned int flags); extern int split_huge_page_to_list(struct page *page, struct list_head *list); static inline int split_huge_page(struct page *page) { diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 5fa5afeeb759..1a311e08b5ef 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -344,16 +344,6 @@ static inline void enable_irq_lockdep_irqrestore(unsigned int irq, unsigned long /* IRQ wakeup (PM) control: */ extern int irq_set_irq_wake(unsigned int irq, unsigned int on); -static inline int enable_irq_wake(unsigned int irq) -{ - return irq_set_irq_wake(irq, 1); -} - -static inline int disable_irq_wake(unsigned int irq) -{ - return irq_set_irq_wake(irq, 0); -} - #else /* !CONFIG_GENERIC_HARDIRQS */ /* * NOTE: non-genirq architectures, if they want to support the lock @@ -370,16 +360,23 @@ static inline int disable_irq_wake(unsigned int irq) enable_irq(irq) # endif -static inline int enable_irq_wake(unsigned int irq) +/* IRQ wakeup (PM) control: */ +static inline int irq_set_irq_wake(unsigned int irq, unsigned int on) { return 0; } +#endif /* CONFIG_GENERIC_HARDIRQS */ + +static inline int enable_irq_wake(unsigned int irq) +{ + return irq_set_irq_wake(irq, 1); +} + static inline int disable_irq_wake(unsigned int irq) { - return 0; + return irq_set_irq_wake(irq, 0); } -#endif /* CONFIG_GENERIC_HARDIRQS */ #ifdef CONFIG_IRQ_FORCED_THREADING diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index c4d870b0d5e6..19c19a5eee29 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -22,7 +22,7 @@ struct ipc_ids { int in_use; unsigned short seq; unsigned short seq_max; - struct rw_semaphore rw_mutex; + struct rw_semaphore rwsem; struct idr ipcs_idr; int next_id; }; diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 0d7df39a5885..b2f897789838 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -91,7 +91,6 @@ static inline struct mempolicy *mpol_dup(struct mempolicy *pol) } #define vma_policy(vma) ((vma)->vm_policy) -#define vma_set_policy(vma, pol) ((vma)->vm_policy = (pol)) static inline void mpol_get(struct mempolicy *pol) { @@ -126,6 +125,7 @@ struct shared_policy { spinlock_t lock; }; +int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst); void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol); int mpol_set_shared_policy(struct shared_policy *info, struct vm_area_struct *vma, @@ -240,7 +240,12 @@ mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx) } #define vma_policy(vma) NULL -#define vma_set_policy(vma, pol) do {} while(0) + +static inline int +vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst) +{ + return 0; +} static inline void numa_policy_init(void) { diff --git a/include/linux/mm.h b/include/linux/mm.h index f0224608d15e..d5c82dc33805 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -884,11 +884,12 @@ static inline int page_mapped(struct page *page) #define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */ #define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */ #define VM_FAULT_RETRY 0x0400 /* ->fault blocked, must retry */ +#define VM_FAULT_FALLBACK 0x0800 /* huge page fault failed, fall back to small */ #define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */ #define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | \ - VM_FAULT_HWPOISON_LARGE) + VM_FAULT_FALLBACK | VM_FAULT_HWPOISON_LARGE) /* Encode hstate index for a hwpoisoned large page */ #define VM_FAULT_SET_HINDEX(x) ((x) << 12) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 6d53675c2b54..f1a5b5937be4 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -228,9 +228,9 @@ PAGEFLAG(OwnerPriv1, owner_priv_1) TESTCLEARFLAG(OwnerPriv1, owner_priv_1) TESTPAGEFLAG(Writeback, writeback) TESTSCFLAG(Writeback, writeback) PAGEFLAG(MappedToDisk, mappedtodisk) -/* PG_readahead is only used for file reads; PG_reclaim is only for writes */ +/* PG_readahead is only used for reads; PG_reclaim is only for writes */ PAGEFLAG(Reclaim, reclaim) TESTCLEARFLAG(Reclaim, reclaim) -PAGEFLAG(Readahead, reclaim) /* Reminder to do async read-ahead */ +PAGEFLAG(Readahead, reclaim) TESTCLEARFLAG(Readahead, reclaim) #ifdef CONFIG_HIGHMEM /* diff --git a/include/linux/sched.h b/include/linux/sched.h index 50d04b92ceda..fc09d217d870 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2167,15 +2167,15 @@ static inline bool thread_group_leader(struct task_struct *p) * all we care about is that we have a task with the appropriate * pid, we don't actually care if we have the right task. */ -static inline int has_group_leader_pid(struct task_struct *p) +static inline bool has_group_leader_pid(struct task_struct *p) { - return p->pid == p->tgid; + return task_pid(p) == p->signal->leader_pid; } static inline -int same_thread_group(struct task_struct *p1, struct task_struct *p2) +bool same_thread_group(struct task_struct *p1, struct task_struct *p2) { - return p1->tgid == p2->tgid; + return p1->signal == p2->signal; } static inline struct task_struct *next_thread(const struct task_struct *p) diff --git a/include/linux/smp.h b/include/linux/smp.h index c181399f2c20..c8488763277f 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -11,6 +11,7 @@ #include <linux/list.h> #include <linux/cpumask.h> #include <linux/init.h> +#include <linux/irqflags.h> extern void cpu_idle(void); @@ -139,14 +140,17 @@ static inline int up_smp_call_function(smp_call_func_t func, void *info) } #define smp_call_function(func, info, wait) \ (up_smp_call_function(func, info)) -#define on_each_cpu(func, info, wait) \ - ({ \ - unsigned long __flags; \ - local_irq_save(__flags); \ - func(info); \ - local_irq_restore(__flags); \ - 0; \ - }) + +static inline int on_each_cpu(smp_call_func_t func, void *info, int wait) +{ + unsigned long flags; + + local_irq_save(flags); + func(info); + local_irq_restore(flags); + return 0; +} + /* * Note we still need to test the mask even for UP * because we actually can get an empty mask from diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index bd6cf61142be..dc2cdf07ac14 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -70,6 +70,11 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, THP_ZERO_PAGE_ALLOC, THP_ZERO_PAGE_ALLOC_FAILED, #endif + NR_TLB_REMOTE_FLUSH, /* cpu tried to flush others' tlbs */ + NR_TLB_REMOTE_FLUSH_RECEIVED,/* cpu received ipi for flush */ + NR_TLB_LOCAL_FLUSH_ALL, + NR_TLB_LOCAL_FLUSH_ONE, + NR_TLB_LOCAL_FLUSH_ONE_KERNEL, NR_VM_EVENT_ITEMS }; diff --git a/ipc/msg.c b/ipc/msg.c index bd60d7e159e8..14d64f8023f2 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -70,8 +70,6 @@ struct msg_sender { #define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS]) -#define msg_unlock(msq) ipc_unlock(&(msq)->q_perm) - static void freeque(struct ipc_namespace *, struct kern_ipc_perm *); static int newque(struct ipc_namespace *, struct ipc_params *); #ifdef CONFIG_PROC_FS @@ -172,7 +170,7 @@ static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s) * @ns: namespace * @params: ptr to the structure that contains the key and msgflg * - * Called with msg_ids.rw_mutex held (writer) + * Called with msg_ids.rwsem held (writer) */ static int newque(struct ipc_namespace *ns, struct ipc_params *params) { @@ -259,8 +257,8 @@ static void expunge_all(struct msg_queue *msq, int res) * removes the message queue from message queue ID IDR, and cleans up all the * messages associated with this queue. * - * msg_ids.rw_mutex (writer) and the spinlock for this message queue are held - * before freeque() is called. msg_ids.rw_mutex remains locked on exit. + * msg_ids.rwsem (writer) and the spinlock for this message queue are held + * before freeque() is called. msg_ids.rwsem remains locked on exit. */ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) { @@ -270,7 +268,8 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) expunge_all(msq, -EIDRM); ss_wakeup(&msq->q_senders, 1); msg_rmid(ns, msq); - msg_unlock(msq); + ipc_unlock_object(&msq->q_perm); + rcu_read_unlock(); list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) { atomic_dec(&ns->msg_hdrs); @@ -282,7 +281,7 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) } /* - * Called with msg_ids.rw_mutex and ipcp locked. + * Called with msg_ids.rwsem and ipcp locked. */ static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg) { @@ -386,9 +385,9 @@ copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version) } /* - * This function handles some msgctl commands which require the rw_mutex + * This function handles some msgctl commands which require the rwsem * to be held in write mode. - * NOTE: no locks must be held, the rw_mutex is taken inside this function. + * NOTE: no locks must be held, the rwsem is taken inside this function. */ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, struct msqid_ds __user *buf, int version) @@ -403,7 +402,7 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, return -EFAULT; } - down_write(&msg_ids(ns).rw_mutex); + down_write(&msg_ids(ns).rwsem); rcu_read_lock(); ipcp = ipcctl_pre_down_nolock(ns, &msg_ids(ns), msqid, cmd, @@ -459,7 +458,7 @@ out_unlock0: out_unlock1: rcu_read_unlock(); out_up: - up_write(&msg_ids(ns).rw_mutex); + up_write(&msg_ids(ns).rwsem); return err; } @@ -494,7 +493,7 @@ static int msgctl_nolock(struct ipc_namespace *ns, int msqid, msginfo.msgmnb = ns->msg_ctlmnb; msginfo.msgssz = MSGSSZ; msginfo.msgseg = MSGSEG; - down_read(&msg_ids(ns).rw_mutex); + down_read(&msg_ids(ns).rwsem); if (cmd == MSG_INFO) { msginfo.msgpool = msg_ids(ns).in_use; msginfo.msgmap = atomic_read(&ns->msg_hdrs); @@ -505,7 +504,7 @@ static int msgctl_nolock(struct ipc_namespace *ns, int msqid, msginfo.msgtql = MSGTQL; } max_id = ipc_get_maxid(&msg_ids(ns)); - up_read(&msg_ids(ns).rw_mutex); + up_read(&msg_ids(ns).rwsem); if (copy_to_user(buf, &msginfo, sizeof(struct msginfo))) return -EFAULT; return (max_id < 0) ? 0 : max_id; diff --git a/ipc/namespace.c b/ipc/namespace.c index 7ee61bf44933..67dc744974c6 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -81,7 +81,7 @@ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids, int next_id; int total, in_use; - down_write(&ids->rw_mutex); + down_write(&ids->rwsem); in_use = ids->in_use; @@ -93,7 +93,7 @@ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids, free(ns, perm); total++; } - up_write(&ids->rw_mutex); + up_write(&ids->rwsem); } static void free_ipc_ns(struct ipc_namespace *ns) diff --git a/ipc/sem.c b/ipc/sem.c index 41088899783d..69b6a21f3844 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -322,7 +322,7 @@ static inline void sem_unlock(struct sem_array *sma, int locknum) } /* - * sem_lock_(check_) routines are called in the paths where the rw_mutex + * sem_lock_(check_) routines are called in the paths where the rwsem * is not held. * * The caller holds the RCU read lock. @@ -426,7 +426,7 @@ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) * @ns: namespace * @params: ptr to the structure that contains key, semflg and nsems * - * Called with sem_ids.rw_mutex held (as a writer) + * Called with sem_ids.rwsem held (as a writer) */ static int newary(struct ipc_namespace *ns, struct ipc_params *params) @@ -492,7 +492,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) /* - * Called with sem_ids.rw_mutex and ipcp locked. + * Called with sem_ids.rwsem and ipcp locked. */ static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg) { @@ -503,7 +503,7 @@ static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg) } /* - * Called with sem_ids.rw_mutex and ipcp locked. + * Called with sem_ids.rwsem and ipcp locked. */ static inline int sem_more_checks(struct kern_ipc_perm *ipcp, struct ipc_params *params) @@ -994,8 +994,8 @@ static int count_semzcnt (struct sem_array * sma, ushort semnum) return semzcnt; } -/* Free a semaphore set. freeary() is called with sem_ids.rw_mutex locked - * as a writer and the spinlock for this semaphore set hold. sem_ids.rw_mutex +/* Free a semaphore set. freeary() is called with sem_ids.rwsem locked + * as a writer and the spinlock for this semaphore set hold. sem_ids.rwsem * remains locked on exit. */ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) @@ -1116,7 +1116,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, seminfo.semmnu = SEMMNU; seminfo.semmap = SEMMAP; seminfo.semume = SEMUME; - down_read(&sem_ids(ns).rw_mutex); + down_read(&sem_ids(ns).rwsem); if (cmd == SEM_INFO) { seminfo.semusz = sem_ids(ns).in_use; seminfo.semaem = ns->used_sems; @@ -1125,7 +1125,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, seminfo.semaem = SEMAEM; } max_id = ipc_get_maxid(&sem_ids(ns)); - up_read(&sem_ids(ns).rw_mutex); + up_read(&sem_ids(ns).rwsem); if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) return -EFAULT; return (max_id < 0) ? 0: max_id; @@ -1431,9 +1431,9 @@ copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version) } /* - * This function handles some semctl commands which require the rw_mutex + * This function handles some semctl commands which require the rwsem * to be held in write mode. - * NOTE: no locks must be held, the rw_mutex is taken inside this function. + * NOTE: no locks must be held, the rwsem is taken inside this function. */ static int semctl_down(struct ipc_namespace *ns, int semid, int cmd, int version, void __user *p) @@ -1448,7 +1448,7 @@ static int semctl_down(struct ipc_namespace *ns, int semid, return -EFAULT; } - down_write(&sem_ids(ns).rw_mutex); + down_write(&sem_ids(ns).rwsem); rcu_read_lock(); ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd, @@ -1487,7 +1487,7 @@ out_unlock0: out_unlock1: rcu_read_unlock(); out_up: - up_write(&sem_ids(ns).rw_mutex); + up_write(&sem_ids(ns).rwsem); return err; } diff --git a/ipc/shm.c b/ipc/shm.c index c6b4ad5ce3b7..59f2194481ce 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -19,6 +19,9 @@ * namespaces support * OpenVZ, SWsoft Inc. * Pavel Emelianov <xemul@openvz.org> + * + * Better ipc lock (kern_ipc_perm.lock) handling + * Davidlohr Bueso <davidlohr.bueso@hp.com>, June 2013. */ #include <linux/slab.h> @@ -80,8 +83,8 @@ void shm_init_ns(struct ipc_namespace *ns) } /* - * Called with shm_ids.rw_mutex (writer) and the shp structure locked. - * Only shm_ids.rw_mutex remains locked on exit. + * Called with shm_ids.rwsem (writer) and the shp structure locked. + * Only shm_ids.rwsem remains locked on exit. */ static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) { @@ -124,8 +127,28 @@ void __init shm_init (void) IPC_SHM_IDS, sysvipc_shm_proc_show); } +static inline struct shmid_kernel *shm_obtain_object(struct ipc_namespace *ns, int id) +{ + struct kern_ipc_perm *ipcp = ipc_obtain_object(&shm_ids(ns), id); + + if (IS_ERR(ipcp)) + return ERR_CAST(ipcp); + + return container_of(ipcp, struct shmid_kernel, shm_perm); +} + +static inline struct shmid_kernel *shm_obtain_object_check(struct ipc_namespace *ns, int id) +{ + struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&shm_ids(ns), id); + + if (IS_ERR(ipcp)) + return ERR_CAST(ipcp); + + return container_of(ipcp, struct shmid_kernel, shm_perm); +} + /* - * shm_lock_(check_) routines are called in the paths where the rw_mutex + * shm_lock_(check_) routines are called in the paths where the rwsem * is not necessarily held. */ static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) @@ -182,7 +205,7 @@ static void shm_open(struct vm_area_struct *vma) * @ns: namespace * @shp: struct to free * - * It has to be called with shp and shm_ids.rw_mutex (writer) locked, + * It has to be called with shp and shm_ids.rwsem (writer) locked, * but returns with shp unlocked and freed. */ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) @@ -230,7 +253,7 @@ static void shm_close(struct vm_area_struct *vma) struct shmid_kernel *shp; struct ipc_namespace *ns = sfd->ns; - down_write(&shm_ids(ns).rw_mutex); + down_write(&shm_ids(ns).rwsem); /* remove from the list of attaches of the shm segment */ shp = shm_lock(ns, sfd->id); BUG_ON(IS_ERR(shp)); @@ -241,10 +264,10 @@ static void shm_close(struct vm_area_struct *vma) shm_destroy(ns, shp); else shm_unlock(shp); - up_write(&shm_ids(ns).rw_mutex); + up_write(&shm_ids(ns).rwsem); } -/* Called with ns->shm_ids(ns).rw_mutex locked */ +/* Called with ns->shm_ids(ns).rwsem locked */ static int shm_try_destroy_current(int id, void *p, void *data) { struct ipc_namespace *ns = data; @@ -275,7 +298,7 @@ static int shm_try_destroy_current(int id, void *p, void *data) return 0; } -/* Called with ns->shm_ids(ns).rw_mutex locked */ +/* Called with ns->shm_ids(ns).rwsem locked */ static int shm_try_destroy_orphaned(int id, void *p, void *data) { struct ipc_namespace *ns = data; @@ -286,7 +309,7 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data) * We want to destroy segments without users and with already * exit'ed originating process. * - * As shp->* are changed under rw_mutex, it's safe to skip shp locking. + * As shp->* are changed under rwsem, it's safe to skip shp locking. */ if (shp->shm_creator != NULL) return 0; @@ -300,10 +323,10 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data) void shm_destroy_orphaned(struct ipc_namespace *ns) { - down_write(&shm_ids(ns).rw_mutex); + down_write(&shm_ids(ns).rwsem); if (shm_ids(ns).in_use) idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns); - up_write(&shm_ids(ns).rw_mutex); + up_write(&shm_ids(ns).rwsem); } @@ -315,10 +338,10 @@ void exit_shm(struct task_struct *task) return; /* Destroy all already created segments, but not mapped yet */ - down_write(&shm_ids(ns).rw_mutex); + down_write(&shm_ids(ns).rwsem); if (shm_ids(ns).in_use) idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns); - up_write(&shm_ids(ns).rw_mutex); + up_write(&shm_ids(ns).rwsem); } static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) @@ -452,7 +475,7 @@ static const struct vm_operations_struct shm_vm_ops = { * @ns: namespace * @params: ptr to the structure that contains key, size and shmflg * - * Called with shm_ids.rw_mutex held as a writer. + * Called with shm_ids.rwsem held as a writer. */ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) @@ -560,7 +583,7 @@ no_file: } /* - * Called with shm_ids.rw_mutex and ipcp locked. + * Called with shm_ids.rwsem and ipcp locked. */ static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg) { @@ -571,7 +594,7 @@ static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg) } /* - * Called with shm_ids.rw_mutex and ipcp locked. + * Called with shm_ids.rwsem and ipcp locked. */ static inline int shm_more_checks(struct kern_ipc_perm *ipcp, struct ipc_params *params) @@ -684,7 +707,7 @@ static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminf /* * Calculate and add used RSS and swap pages of a shm. - * Called with shm_ids.rw_mutex held as a reader + * Called with shm_ids.rwsem held as a reader */ static void shm_add_rss_swap(struct shmid_kernel *shp, unsigned long *rss_add, unsigned long *swp_add) @@ -711,7 +734,7 @@ static void shm_add_rss_swap(struct shmid_kernel *shp, } /* - * Called with shm_ids.rw_mutex held as a reader + * Called with shm_ids.rwsem held as a reader */ static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss, unsigned long *swp) @@ -740,9 +763,9 @@ static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss, } /* - * This function handles some shmctl commands which require the rw_mutex + * This function handles some shmctl commands which require the rwsem * to be held in write mode. - * NOTE: no locks must be held, the rw_mutex is taken inside this function. + * NOTE: no locks must be held, the rwsem is taken inside this function. */ static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd, struct shmid_ds __user *buf, int version) @@ -757,14 +780,13 @@ static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd, return -EFAULT; } - down_write(&shm_ids(ns).rw_mutex); + down_write(&shm_ids(ns).rwsem); rcu_read_lock(); - ipcp = ipcctl_pre_down(ns, &shm_ids(ns), shmid, cmd, - &shmid64.shm_perm, 0); + ipcp = ipcctl_pre_down_nolock(ns, &shm_ids(ns), shmid, cmd, + &shmid64.shm_perm, 0); if (IS_ERR(ipcp)) { err = PTR_ERR(ipcp); - /* the ipc lock is not held upon failure */ goto out_unlock1; } @@ -772,14 +794,16 @@ static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd, err = security_shm_shmctl(shp, cmd); if (err) - goto out_unlock0; + goto out_unlock1; switch (cmd) { case IPC_RMID: + ipc_lock_object(&shp->shm_perm); /* do_shm_rmid unlocks the ipc object and rcu */ do_shm_rmid(ns, ipcp); goto out_up; case IPC_SET: + ipc_lock_object(&shp->shm_perm); err = ipc_update_perm(&shmid64.shm_perm, ipcp); if (err) goto out_unlock0; @@ -787,6 +811,7 @@ static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd, break; default: err = -EINVAL; + goto out_unlock1; } out_unlock0: @@ -794,33 +819,28 @@ out_unlock0: out_unlock1: rcu_read_unlock(); out_up: - up_write(&shm_ids(ns).rw_mutex); + up_write(&shm_ids(ns).rwsem); return err; } -SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) +static int shmctl_nolock(struct ipc_namespace *ns, int shmid, + int cmd, int version, void __user *buf) { + int err; struct shmid_kernel *shp; - int err, version; - struct ipc_namespace *ns; - if (cmd < 0 || shmid < 0) { - err = -EINVAL; - goto out; + /* preliminary security checks for *_INFO */ + if (cmd == IPC_INFO || cmd == SHM_INFO) { + err = security_shm_shmctl(NULL, cmd); + if (err) + return err; } - version = ipc_parse_version(&cmd); - ns = current->nsproxy->ipc_ns; - - switch (cmd) { /* replace with proc interface ? */ + switch (cmd) { case IPC_INFO: { struct shminfo64 shminfo; - err = security_shm_shmctl(NULL, cmd); - if (err) - return err; - memset(&shminfo, 0, sizeof(shminfo)); shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni; shminfo.shmmax = ns->shm_ctlmax; @@ -830,9 +850,9 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) if(copy_shminfo_to_user (buf, &shminfo, version)) return -EFAULT; - down_read(&shm_ids(ns).rw_mutex); + down_read(&shm_ids(ns).rwsem); err = ipc_get_maxid(&shm_ids(ns)); - up_read(&shm_ids(ns).rw_mutex); + up_read(&shm_ids(ns).rwsem); if(err<0) err = 0; @@ -842,19 +862,15 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) { struct shm_info shm_info; - err = security_shm_shmctl(NULL, cmd); - if (err) - return err; - memset(&shm_info, 0, sizeof(shm_info)); - down_read(&shm_ids(ns).rw_mutex); + down_read(&shm_ids(ns).rwsem); shm_info.used_ids = shm_ids(ns).in_use; shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp); shm_info.shm_tot = ns->shm_tot; shm_info.swap_attempts = 0; shm_info.swap_successes = 0; err = ipc_get_maxid(&shm_ids(ns)); - up_read(&shm_ids(ns).rw_mutex); + up_read(&shm_ids(ns).rwsem); if (copy_to_user(buf, &shm_info, sizeof(shm_info))) { err = -EFAULT; goto out; @@ -869,27 +885,31 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) struct shmid64_ds tbuf; int result; + rcu_read_lock(); if (cmd == SHM_STAT) { - shp = shm_lock(ns, shmid); + shp = shm_obtain_object(ns, shmid); if (IS_ERR(shp)) { err = PTR_ERR(shp); - goto out; + goto out_unlock; } result = shp->shm_perm.id; } else { - shp = shm_lock_check(ns, shmid); + shp = shm_obtain_object_check(ns, shmid); if (IS_ERR(shp)) { err = PTR_ERR(shp); - goto out; + goto out_unlock; } result = 0; } + err = -EACCES; if (ipcperms(ns, &shp->shm_perm, S_IRUGO)) goto out_unlock; + err = security_shm_shmctl(shp, cmd); if (err) goto out_unlock; + memset(&tbuf, 0, sizeof(tbuf)); kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm); tbuf.shm_segsz = shp->shm_segsz; @@ -899,43 +919,76 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) tbuf.shm_cpid = shp->shm_cprid; tbuf.shm_lpid = shp->shm_lprid; tbuf.shm_nattch = shp->shm_nattch; - shm_unlock(shp); - if(copy_shmid_to_user (buf, &tbuf, version)) + rcu_read_unlock(); + + if (copy_shmid_to_user(buf, &tbuf, version)) err = -EFAULT; else err = result; goto out; } + default: + return -EINVAL; + } + +out_unlock: + rcu_read_unlock(); +out: + return err; +} + +SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) +{ + struct shmid_kernel *shp; + int err, version; + struct ipc_namespace *ns; + + if (cmd < 0 || shmid < 0) + return -EINVAL; + + version = ipc_parse_version(&cmd); + ns = current->nsproxy->ipc_ns; + + switch (cmd) { + case IPC_INFO: + case SHM_INFO: + case SHM_STAT: + case IPC_STAT: + return shmctl_nolock(ns, shmid, cmd, version, buf); + case IPC_RMID: + case IPC_SET: + return shmctl_down(ns, shmid, cmd, buf, version); case SHM_LOCK: case SHM_UNLOCK: { struct file *shm_file; - shp = shm_lock_check(ns, shmid); + rcu_read_lock(); + shp = shm_obtain_object_check(ns, shmid); if (IS_ERR(shp)) { err = PTR_ERR(shp); - goto out; + goto out_unlock1; } audit_ipc_obj(&(shp->shm_perm)); + err = security_shm_shmctl(shp, cmd); + if (err) + goto out_unlock1; + ipc_lock_object(&shp->shm_perm); if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) { kuid_t euid = current_euid(); err = -EPERM; if (!uid_eq(euid, shp->shm_perm.uid) && !uid_eq(euid, shp->shm_perm.cuid)) - goto out_unlock; + goto out_unlock0; if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) - goto out_unlock; + goto out_unlock0; } - err = security_shm_shmctl(shp, cmd); - if (err) - goto out_unlock; - shm_file = shp->shm_file; if (is_file_hugepages(shm_file)) - goto out_unlock; + goto out_unlock0; if (cmd == SHM_LOCK) { struct user_struct *user = current_user(); @@ -944,32 +997,31 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) shp->shm_perm.mode |= SHM_LOCKED; shp->mlock_user = user; } - goto out_unlock; + goto out_unlock0; } /* SHM_UNLOCK */ if (!(shp->shm_perm.mode & SHM_LOCKED)) - goto out_unlock; + goto out_unlock0; shmem_lock(shm_file, 0, shp->mlock_user); shp->shm_perm.mode &= ~SHM_LOCKED; shp->mlock_user = NULL; get_file(shm_file); - shm_unlock(shp); + ipc_unlock_object(&shp->shm_perm); + rcu_read_unlock(); shmem_unlock_mapping(shm_file->f_mapping); + fput(shm_file); - goto out; - } - case IPC_RMID: - case IPC_SET: - err = shmctl_down(ns, shmid, cmd, buf, version); return err; + } default: return -EINVAL; } -out_unlock: - shm_unlock(shp); -out: +out_unlock0: + ipc_unlock_object(&shp->shm_perm); +out_unlock1: + rcu_read_unlock(); return err; } @@ -1037,7 +1089,8 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, * additional creator id... */ ns = current->nsproxy->ipc_ns; - shp = shm_lock_check(ns, shmid); + rcu_read_lock(); + shp = shm_obtain_object_check(ns, shmid); if (IS_ERR(shp)) { err = PTR_ERR(shp); goto out; @@ -1051,24 +1104,31 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, if (err) goto out_unlock; + ipc_lock_object(&shp->shm_perm); path = shp->shm_file->f_path; path_get(&path); shp->shm_nattch++; size = i_size_read(path.dentry->d_inode); - shm_unlock(shp); + ipc_unlock_object(&shp->shm_perm); + rcu_read_unlock(); err = -ENOMEM; sfd = kzalloc(sizeof(*sfd), GFP_KERNEL); - if (!sfd) - goto out_put_dentry; + if (!sfd) { + path_put(&path); + goto out_nattch; + } file = alloc_file(&path, f_mode, is_file_hugepages(shp->shm_file) ? &shm_file_operations_huge : &shm_file_operations); err = PTR_ERR(file); - if (IS_ERR(file)) - goto out_free; + if (IS_ERR(file)) { + kfree(sfd); + path_put(&path); + goto out_nattch; + } file->private_data = sfd; file->f_mapping = shp->shm_file->f_mapping; @@ -1094,7 +1154,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, addr > current->mm->start_stack - size - PAGE_SIZE * 5) goto invalid; } - + addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate); *raddr = addr; err = 0; @@ -1109,7 +1169,7 @@ out_fput: fput(file); out_nattch: - down_write(&shm_ids(ns).rw_mutex); + down_write(&shm_ids(ns).rwsem); shp = shm_lock(ns, shmid); BUG_ON(IS_ERR(shp)); shp->shm_nattch--; @@ -1117,20 +1177,13 @@ out_nattch: shm_destroy(ns, shp); else shm_unlock(shp); - up_write(&shm_ids(ns).rw_mutex); - -out: + up_write(&shm_ids(ns).rwsem); return err; out_unlock: - shm_unlock(shp); - goto out; - -out_free: - kfree(sfd); -out_put_dentry: - path_put(&path); - goto out_nattch; + rcu_read_unlock(); +out: + return err; } SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg) diff --git a/ipc/util.c b/ipc/util.c index 4704223bfad4..1ddadcf9a2ab 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -15,6 +15,14 @@ * Jun 2006 - namespaces ssupport * OpenVZ, SWsoft Inc. * Pavel Emelianov <xemul@openvz.org> + * + * General sysv ipc locking scheme: + * when doing ipc id lookups, take the ids->rwsem + * rcu_read_lock() + * obtain the ipc object (kern_ipc_perm) + * perform security, capabilities, auditing and permission checks, etc. + * acquire the ipc lock (kern_ipc_perm.lock) throught ipc_lock_object() + * perform data updates (ie: SET, RMID, LOCK/UNLOCK commands) */ #include <linux/mm.h> @@ -119,7 +127,7 @@ __initcall(ipc_init); void ipc_init_ids(struct ipc_ids *ids) { - init_rwsem(&ids->rw_mutex); + init_rwsem(&ids->rwsem); ids->in_use = 0; ids->seq = 0; @@ -174,7 +182,7 @@ void __init ipc_init_proc_interface(const char *path, const char *header, * @ids: Identifier set * @key: The key to find * - * Requires ipc_ids.rw_mutex locked. + * Requires ipc_ids.rwsem locked. * Returns the LOCKED pointer to the ipc structure if found or NULL * if not. * If key is found ipc points to the owning ipc structure @@ -208,7 +216,7 @@ static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key) * ipc_get_maxid - get the last assigned id * @ids: IPC identifier set * - * Called with ipc_ids.rw_mutex held. + * Called with ipc_ids.rwsem held. */ int ipc_get_maxid(struct ipc_ids *ids) @@ -246,7 +254,7 @@ int ipc_get_maxid(struct ipc_ids *ids) * is returned. The 'new' entry is returned in a locked state on success. * On failure the entry is not locked and a negative err-code is returned. * - * Called with writer ipc_ids.rw_mutex held. + * Called with writer ipc_ids.rwsem held. */ int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size) { @@ -312,9 +320,9 @@ static int ipcget_new(struct ipc_namespace *ns, struct ipc_ids *ids, { int err; - down_write(&ids->rw_mutex); + down_write(&ids->rwsem); err = ops->getnew(ns, params); - up_write(&ids->rw_mutex); + up_write(&ids->rwsem); return err; } @@ -331,7 +339,7 @@ static int ipcget_new(struct ipc_namespace *ns, struct ipc_ids *ids, * * On success, the IPC id is returned. * - * It is called with ipc_ids.rw_mutex and ipcp->lock held. + * It is called with ipc_ids.rwsem and ipcp->lock held. */ static int ipc_check_perms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, @@ -376,7 +384,7 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids, * Take the lock as a writer since we are potentially going to add * a new entry + read locks are not "upgradable" */ - down_write(&ids->rw_mutex); + down_write(&ids->rwsem); ipcp = ipc_findkey(ids, params->key); if (ipcp == NULL) { /* key not used */ @@ -402,7 +410,7 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids, } ipc_unlock(ipcp); } - up_write(&ids->rw_mutex); + up_write(&ids->rwsem); return err; } @@ -413,7 +421,7 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids, * @ids: IPC identifier set * @ipcp: ipc perm structure containing the identifier to remove * - * ipc_ids.rw_mutex (as a writer) and the spinlock for this ID are held + * ipc_ids.rwsem (as a writer) and the spinlock for this ID are held * before this function is called, and remain locked on the exit. */ @@ -621,7 +629,7 @@ struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id) } /** - * ipc_lock - Lock an ipc structure without rw_mutex held + * ipc_lock - Lock an ipc structure without rwsem held * @ids: IPC identifier set * @id: ipc id to look for * @@ -733,7 +741,7 @@ int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out) } /** - * ipcctl_pre_down - retrieve an ipc and check permissions for some IPC_XXX cmd + * ipcctl_pre_down_nolock - retrieve an ipc and check permissions for some IPC_XXX cmd * @ns: the ipc namespace * @ids: the table of ids where to look for the ipc * @id: the id of the ipc to retrieve @@ -746,29 +754,13 @@ int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out) * It must be called without any lock held and * - retrieves the ipc with the given id in the given table. * - performs some audit and permission check, depending on the given cmd - * - returns the ipc with the ipc lock held in case of success - * or an err-code without any lock held otherwise. + * - returns a pointer to the ipc object or otherwise, the corresponding error. * - * Call holding the both the rw_mutex and the rcu read lock. + * Call holding the both the rwsem and the rcu read lock. */ -struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns, - struct ipc_ids *ids, int id, int cmd, - struct ipc64_perm *perm, int extra_perm) -{ - struct kern_ipc_perm *ipcp; - - ipcp = ipcctl_pre_down_nolock(ns, ids, id, cmd, perm, extra_perm); - if (IS_ERR(ipcp)) - goto out; - - spin_lock(&ipcp->lock); -out: - return ipcp; -} - struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns, - struct ipc_ids *ids, int id, int cmd, - struct ipc64_perm *perm, int extra_perm) + struct ipc_ids *ids, int id, int cmd, + struct ipc64_perm *perm, int extra_perm) { kuid_t euid; int err = -EPERM; @@ -884,7 +876,7 @@ static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos) * Take the lock - this will be released by the corresponding * call to stop(). */ - down_read(&ids->rw_mutex); + down_read(&ids->rwsem); /* pos < 0 is invalid */ if (*pos < 0) @@ -911,7 +903,7 @@ static void sysvipc_proc_stop(struct seq_file *s, void *it) ids = &iter->ns->ids[iface->ids]; /* Release the lock we took in start() */ - up_read(&ids->rw_mutex); + up_read(&ids->rwsem); } static int sysvipc_proc_show(struct seq_file *s, void *it) diff --git a/ipc/util.h b/ipc/util.h index b6a6a88f3002..0a362ffca972 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -94,10 +94,10 @@ void __init ipc_init_proc_interface(const char *path, const char *header, #define ipcid_to_idx(id) ((id) % SEQ_MULTIPLIER) #define ipcid_to_seqx(id) ((id) / SEQ_MULTIPLIER) -/* must be called with ids->rw_mutex acquired for writing */ +/* must be called with ids->rwsem acquired for writing */ int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int); -/* must be called with ids->rw_mutex acquired for reading */ +/* must be called with ids->rwsem acquired for reading */ int ipc_get_maxid(struct ipc_ids *); /* must be called with both locks acquired. */ @@ -131,9 +131,6 @@ int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out); struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns, struct ipc_ids *ids, int id, int cmd, struct ipc64_perm *perm, int extra_perm); -struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns, - struct ipc_ids *ids, int id, int cmd, - struct ipc64_perm *perm, int extra_perm); #ifndef CONFIG_ARCH_WANT_IPC_PARSE_VERSION /* On IA-64, we always use the "64-bit version" of the IPC structures. */ diff --git a/kernel/Makefile b/kernel/Makefile index 470839d1a30e..35ef1185e359 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -2,7 +2,7 @@ # Makefile for the linux kernel. # -obj-y = fork.o exec_domain.o panic.o printk.o \ +obj-y = fork.o exec_domain.o panic.o \ cpu.o exit.o itimer.o time.o softirq.o resource.o \ sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \ signal.o sys.o kmod.o workqueue.o pid.o task_work.o \ @@ -24,6 +24,7 @@ endif obj-y += sched/ obj-y += power/ +obj-y += printk/ obj-y += cpu/ obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o diff --git a/kernel/fork.c b/kernel/fork.c index 403d2bb8a968..9d1a5af6f132 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -351,7 +351,6 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) struct rb_node **rb_link, *rb_parent; int retval; unsigned long charge; - struct mempolicy *pol; uprobe_start_dup_mmap(); down_write(&oldmm->mmap_sem); @@ -400,11 +399,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) goto fail_nomem; *tmp = *mpnt; INIT_LIST_HEAD(&tmp->anon_vma_chain); - pol = mpol_dup(vma_policy(mpnt)); - retval = PTR_ERR(pol); - if (IS_ERR(pol)) + retval = vma_dup_policy(mpnt, tmp); + if (retval) goto fail_nomem_policy; - vma_set_policy(tmp, pol); tmp->vm_mm = mm; if (anon_vma_fork(tmp, mpnt)) goto fail_nomem_anon_vma_fork; @@ -472,7 +469,7 @@ out: uprobe_end_dup_mmap(); return retval; fail_nomem_anon_vma_fork: - mpol_put(pol); + mpol_put(vma_policy(tmp)); fail_nomem_policy: kmem_cache_free(vm_area_cachep, tmp); fail_nomem: diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 383319bae3f7..deb3ad944fd1 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1399,6 +1399,8 @@ retry: expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC); else expires_next = ktime_add(now, delta); + if (expires_next.tv64 < 0) + expires_next.tv64 = KTIME_MAX; tick_program_event(expires_next, 1); printk_once(KERN_WARNING "hrtimer: interrupt took %llu ns\n", ktime_to_ns(delta)); diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile new file mode 100644 index 000000000000..85405bdcf2b3 --- /dev/null +++ b/kernel/printk/Makefile @@ -0,0 +1,2 @@ +obj-y = printk.o +obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o diff --git a/kernel/printk/braille.c b/kernel/printk/braille.c new file mode 100644 index 000000000000..b51087fb9ace --- /dev/null +++ b/kernel/printk/braille.c @@ -0,0 +1,48 @@ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/kernel.h> +#include <linux/console.h> +#include <linux/string.h> + +#include "console_cmdline.h" +#include "braille.h" + +char *_braille_console_setup(char **str, char **brl_options) +{ + if (!memcmp(*str, "brl,", 4)) { + *brl_options = ""; + *str += 4; + } else if (!memcmp(str, "brl=", 4)) { + *brl_options = *str + 4; + *str = strchr(*brl_options, ','); + if (!*str) + pr_err("need port name after brl=\n"); + else + *((*str)++) = 0; + } + + return *str; +} + +int +_braille_register_console(struct console *console, struct console_cmdline *c) +{ + int rtn = 0; + + if (c->brl_options) { + console->flags |= CON_BRL; + rtn = braille_register_console(console, c->index, c->options, + c->brl_options); + } + + return rtn; +} + +int +_braille_unregister_console(struct console *console) +{ + if (console->flags & CON_BRL) + return braille_unregister_console(console); + + return 0; +} diff --git a/kernel/printk/braille.h b/kernel/printk/braille.h new file mode 100644 index 000000000000..769d771145c8 --- /dev/null +++ b/kernel/printk/braille.h @@ -0,0 +1,48 @@ +#ifndef _PRINTK_BRAILLE_H +#define _PRINTK_BRAILLE_H + +#ifdef CONFIG_A11Y_BRAILLE_CONSOLE + +static inline void +braille_set_options(struct console_cmdline *c, char *brl_options) +{ + c->brl_options = brl_options; +} + +char * +_braille_console_setup(char **str, char **brl_options); + +int +_braille_register_console(struct console *console, struct console_cmdline *c); + +int +_braille_unregister_console(struct console *console); + +#else + +static inline void +braille_set_options(struct console_cmdline *c, char *brl_options) +{ +} + +static inline char * +_braille_console_setup(char **str, char **brl_options) +{ + return NULL; +} + +static inline int +_braille_register_console(struct console *console, struct console_cmdline *c) +{ + return 0; +} + +static inline int +_braille_unregister_console(struct console *console) +{ + return 0; +} + +#endif + +#endif diff --git a/kernel/printk/console_cmdline.h b/kernel/printk/console_cmdline.h new file mode 100644 index 000000000000..cbd69d842341 --- /dev/null +++ b/kernel/printk/console_cmdline.h @@ -0,0 +1,14 @@ +#ifndef _CONSOLE_CMDLINE_H +#define _CONSOLE_CMDLINE_H + +struct console_cmdline +{ + char name[8]; /* Name of the driver */ + int index; /* Minor dev. to use */ + char *options; /* Options for the driver */ +#ifdef CONFIG_A11Y_BRAILLE_CONSOLE + char *brl_options; /* Options for braille driver */ +#endif +}; + +#endif diff --git a/kernel/printk.c b/kernel/printk/printk.c index a9cb8976e8c1..49739db37b92 100644 --- a/kernel/printk.c +++ b/kernel/printk/printk.c @@ -51,6 +51,9 @@ #define CREATE_TRACE_POINTS #include <trace/events/printk.h> +#include "console_cmdline.h" +#include "braille.h" + /* printk's without a loglevel use this.. */ #define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL @@ -105,19 +108,11 @@ static struct console *exclusive_console; /* * Array of consoles built from command line options (console=) */ -struct console_cmdline -{ - char name[8]; /* Name of the driver */ - int index; /* Minor dev. to use */ - char *options; /* Options for the driver */ -#ifdef CONFIG_A11Y_BRAILLE_CONSOLE - char *brl_options; /* Options for braille driver */ -#endif -}; #define MAX_CMDLINECONSOLES 8 static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES]; + static int selected_console = -1; static int preferred_console = -1; int console_set_on_cmdline; @@ -178,7 +173,7 @@ static int console_may_schedule; * 67 "g" * 0032 00 00 00 padding to next message header * - * The 'struct log' buffer header must never be directly exported to + * The 'struct printk_log' buffer header must never be directly exported to * userspace, it is a kernel-private implementation detail that might * need to be changed in the future, when the requirements change. * @@ -200,7 +195,7 @@ enum log_flags { LOG_CONT = 8, /* text is a fragment of a continuation line */ }; -struct log { +struct printk_log { u64 ts_nsec; /* timestamp in nanoseconds */ u16 len; /* length of entire record */ u16 text_len; /* length of text buffer */ @@ -248,7 +243,7 @@ static u32 clear_idx; #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) #define LOG_ALIGN 4 #else -#define LOG_ALIGN __alignof__(struct log) +#define LOG_ALIGN __alignof__(struct printk_log) #endif #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); @@ -259,35 +254,35 @@ static u32 log_buf_len = __LOG_BUF_LEN; static volatile unsigned int logbuf_cpu = UINT_MAX; /* human readable text of the record */ -static char *log_text(const struct log *msg) +static char *log_text(const struct printk_log *msg) { - return (char *)msg + sizeof(struct log); + return (char *)msg + sizeof(struct printk_log); } /* optional key/value pair dictionary attached to the record */ -static char *log_dict(const struct log *msg) +static char *log_dict(const struct printk_log *msg) { - return (char *)msg + sizeof(struct log) + msg->text_len; + return (char *)msg + sizeof(struct printk_log) + msg->text_len; } /* get record by index; idx must point to valid msg */ -static struct log *log_from_idx(u32 idx) +static struct printk_log *log_from_idx(u32 idx) { - struct log *msg = (struct log *)(log_buf + idx); + struct printk_log *msg = (struct printk_log *)(log_buf + idx); /* * A length == 0 record is the end of buffer marker. Wrap around and * read the message at the start of the buffer. */ if (!msg->len) - return (struct log *)log_buf; + return (struct printk_log *)log_buf; return msg; } /* get next record; idx must point to valid msg */ static u32 log_next(u32 idx) { - struct log *msg = (struct log *)(log_buf + idx); + struct printk_log *msg = (struct printk_log *)(log_buf + idx); /* length == 0 indicates the end of the buffer; wrap */ /* @@ -296,7 +291,7 @@ static u32 log_next(u32 idx) * return the one after that. */ if (!msg->len) { - msg = (struct log *)log_buf; + msg = (struct printk_log *)log_buf; return msg->len; } return idx + msg->len; @@ -308,11 +303,11 @@ static void log_store(int facility, int level, const char *dict, u16 dict_len, const char *text, u16 text_len) { - struct log *msg; + struct printk_log *msg; u32 size, pad_len; /* number of '\0' padding bytes to next message */ - size = sizeof(struct log) + text_len + dict_len; + size = sizeof(struct printk_log) + text_len + dict_len; pad_len = (-size) & (LOG_ALIGN - 1); size += pad_len; @@ -324,7 +319,7 @@ static void log_store(int facility, int level, else free = log_first_idx - log_next_idx; - if (free > size + sizeof(struct log)) + if (free > size + sizeof(struct printk_log)) break; /* drop old messages until we have enough contiuous space */ @@ -332,18 +327,18 @@ static void log_store(int facility, int level, log_first_seq++; } - if (log_next_idx + size + sizeof(struct log) >= log_buf_len) { + if (log_next_idx + size + sizeof(struct printk_log) >= log_buf_len) { /* * This message + an additional empty header does not fit * at the end of the buffer. Add an empty header with len == 0 * to signify a wrap around. */ - memset(log_buf + log_next_idx, 0, sizeof(struct log)); + memset(log_buf + log_next_idx, 0, sizeof(struct printk_log)); log_next_idx = 0; } /* fill message */ - msg = (struct log *)(log_buf + log_next_idx); + msg = (struct printk_log *)(log_buf + log_next_idx); memcpy(log_text(msg), text, text_len); msg->text_len = text_len; memcpy(log_dict(msg), dict, dict_len); @@ -356,7 +351,7 @@ static void log_store(int facility, int level, else msg->ts_nsec = local_clock(); memset(log_dict(msg) + dict_len, 0, pad_len); - msg->len = sizeof(struct log) + text_len + dict_len + pad_len; + msg->len = sizeof(struct printk_log) + text_len + dict_len + pad_len; /* insert message */ log_next_idx += msg->len; @@ -479,7 +474,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct devkmsg_user *user = file->private_data; - struct log *msg; + struct printk_log *msg; u64 ts_usec; size_t i; char cont = '-'; @@ -724,14 +719,14 @@ void log_buf_kexec_setup(void) VMCOREINFO_SYMBOL(log_first_idx); VMCOREINFO_SYMBOL(log_next_idx); /* - * Export struct log size and field offsets. User space tools can + * Export struct printk_log size and field offsets. User space tools can * parse it and detect any changes to structure down the line. */ - VMCOREINFO_STRUCT_SIZE(log); - VMCOREINFO_OFFSET(log, ts_nsec); - VMCOREINFO_OFFSET(log, len); - VMCOREINFO_OFFSET(log, text_len); - VMCOREINFO_OFFSET(log, dict_len); + VMCOREINFO_STRUCT_SIZE(printk_log); + VMCOREINFO_OFFSET(printk_log, ts_nsec); + VMCOREINFO_OFFSET(printk_log, len); + VMCOREINFO_OFFSET(printk_log, text_len); + VMCOREINFO_OFFSET(printk_log, dict_len); } #endif @@ -884,7 +879,7 @@ static size_t print_time(u64 ts, char *buf) (unsigned long)ts, rem_nsec / 1000); } -static size_t print_prefix(const struct log *msg, bool syslog, char *buf) +static size_t print_prefix(const struct printk_log *msg, bool syslog, char *buf) { size_t len = 0; unsigned int prefix = (msg->facility << 3) | msg->level; @@ -907,7 +902,7 @@ static size_t print_prefix(const struct log *msg, bool syslog, char *buf) return len; } -static size_t msg_print_text(const struct log *msg, enum log_flags prev, +static size_t msg_print_text(const struct printk_log *msg, enum log_flags prev, bool syslog, char *buf, size_t size) { const char *text = log_text(msg); @@ -969,7 +964,7 @@ static size_t msg_print_text(const struct log *msg, enum log_flags prev, static int syslog_print(char __user *buf, int size) { char *text; - struct log *msg; + struct printk_log *msg; int len = 0; text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); @@ -1060,7 +1055,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) idx = clear_idx; prev = 0; while (seq < log_next_seq) { - struct log *msg = log_from_idx(idx); + struct printk_log *msg = log_from_idx(idx); len += msg_print_text(msg, prev, true, NULL, 0); prev = msg->flags; @@ -1073,7 +1068,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) idx = clear_idx; prev = 0; while (len > size && seq < log_next_seq) { - struct log *msg = log_from_idx(idx); + struct printk_log *msg = log_from_idx(idx); len -= msg_print_text(msg, prev, true, NULL, 0); prev = msg->flags; @@ -1087,7 +1082,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) len = 0; prev = 0; while (len >= 0 && seq < next_seq) { - struct log *msg = log_from_idx(idx); + struct printk_log *msg = log_from_idx(idx); int textlen; textlen = msg_print_text(msg, prev, true, text, @@ -1233,7 +1228,7 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) error = 0; while (seq < log_next_seq) { - struct log *msg = log_from_idx(idx); + struct printk_log *msg = log_from_idx(idx); error += msg_print_text(msg, prev, true, NULL, 0); idx = log_next(idx); @@ -1720,10 +1715,10 @@ static struct cont { u8 level; bool flushed:1; } cont; -static struct log *log_from_idx(u32 idx) { return NULL; } +static struct printk_log *log_from_idx(u32 idx) { return NULL; } static u32 log_next(u32 idx) { return 0; } static void call_console_drivers(int level, const char *text, size_t len) {} -static size_t msg_print_text(const struct log *msg, enum log_flags prev, +static size_t msg_print_text(const struct printk_log *msg, enum log_flags prev, bool syslog, char *buf, size_t size) { return 0; } static size_t cont_print_text(char *text, size_t size) { return 0; } @@ -1762,23 +1757,23 @@ static int __add_preferred_console(char *name, int idx, char *options, * See if this tty is not yet registered, and * if we have a slot free. */ - for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) - if (strcmp(console_cmdline[i].name, name) == 0 && - console_cmdline[i].index == idx) { - if (!brl_options) - selected_console = i; - return 0; + for (i = 0, c = console_cmdline; + i < MAX_CMDLINECONSOLES && c->name[0]; + i++, c++) { + if (strcmp(c->name, name) == 0 && c->index == idx) { + if (!brl_options) + selected_console = i; + return 0; } + } if (i == MAX_CMDLINECONSOLES) return -E2BIG; if (!brl_options) selected_console = i; - c = &console_cmdline[i]; strlcpy(c->name, name, sizeof(c->name)); c->options = options; -#ifdef CONFIG_A11Y_BRAILLE_CONSOLE - c->brl_options = brl_options; -#endif + braille_set_options(c, brl_options); + c->index = idx; return 0; } @@ -1791,20 +1786,8 @@ static int __init console_setup(char *str) char *s, *options, *brl_options = NULL; int idx; -#ifdef CONFIG_A11Y_BRAILLE_CONSOLE - if (!memcmp(str, "brl,", 4)) { - brl_options = ""; - str += 4; - } else if (!memcmp(str, "brl=", 4)) { - brl_options = str + 4; - str = strchr(brl_options, ','); - if (!str) { - printk(KERN_ERR "need port name after brl=\n"); - return 1; - } - *(str++) = 0; - } -#endif + if (_braille_console_setup(&str, &brl_options)) + return 1; /* * Decode str into name, index, options. @@ -1859,15 +1842,15 @@ int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, cha struct console_cmdline *c; int i; - for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) - if (strcmp(console_cmdline[i].name, name) == 0 && - console_cmdline[i].index == idx) { - c = &console_cmdline[i]; - strlcpy(c->name, name_new, sizeof(c->name)); - c->name[sizeof(c->name) - 1] = 0; - c->options = options; - c->index = idx_new; - return i; + for (i = 0, c = console_cmdline; + i < MAX_CMDLINECONSOLES && c->name[0]; + i++, c++) + if (strcmp(c->name, name) == 0 && c->index == idx) { + strlcpy(c->name, name_new, sizeof(c->name)); + c->name[sizeof(c->name) - 1] = 0; + c->options = options; + c->index = idx_new; + return i; } /* not found */ return -1; @@ -2047,7 +2030,7 @@ void console_unlock(void) console_cont_flush(text, sizeof(text)); again: for (;;) { - struct log *msg; + struct printk_log *msg; size_t len; int level; @@ -2242,6 +2225,7 @@ void register_console(struct console *newcon) int i; unsigned long flags; struct console *bcon = NULL; + struct console_cmdline *c; /* * before we register a new CON_BOOT console, make sure we don't @@ -2289,30 +2273,25 @@ void register_console(struct console *newcon) * See if this console matches one we selected on * the command line. */ - for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; - i++) { - if (strcmp(console_cmdline[i].name, newcon->name) != 0) + for (i = 0, c = console_cmdline; + i < MAX_CMDLINECONSOLES && c->name[0]; + i++, c++) { + if (strcmp(c->name, newcon->name) != 0) continue; if (newcon->index >= 0 && - newcon->index != console_cmdline[i].index) + newcon->index != c->index) continue; if (newcon->index < 0) - newcon->index = console_cmdline[i].index; -#ifdef CONFIG_A11Y_BRAILLE_CONSOLE - if (console_cmdline[i].brl_options) { - newcon->flags |= CON_BRL; - braille_register_console(newcon, - console_cmdline[i].index, - console_cmdline[i].options, - console_cmdline[i].brl_options); + newcon->index = c->index; + + if (_braille_register_console(newcon, c)) return; - } -#endif + if (newcon->setup && newcon->setup(newcon, console_cmdline[i].options) != 0) break; newcon->flags |= CON_ENABLED; - newcon->index = console_cmdline[i].index; + newcon->index = c->index; if (i == selected_console) { newcon->flags |= CON_CONSDEV; preferred_console = selected_console; @@ -2395,13 +2374,13 @@ EXPORT_SYMBOL(register_console); int unregister_console(struct console *console) { struct console *a, *b; - int res = 1; + int res; -#ifdef CONFIG_A11Y_BRAILLE_CONSOLE - if (console->flags & CON_BRL) - return braille_unregister_console(console); -#endif + res = _braille_unregister_console(console); + if (res) + return res; + res = 1; console_lock(); if (console_drivers == console) { console_drivers=console->next; @@ -2667,7 +2646,7 @@ void kmsg_dump(enum kmsg_dump_reason reason) bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog, char *line, size_t size, size_t *len) { - struct log *msg; + struct printk_log *msg; size_t l = 0; bool ret = false; @@ -2779,7 +2758,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, idx = dumper->cur_idx; prev = 0; while (seq < dumper->next_seq) { - struct log *msg = log_from_idx(idx); + struct printk_log *msg = log_from_idx(idx); l += msg_print_text(msg, prev, true, NULL, 0); idx = log_next(idx); @@ -2792,7 +2771,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, idx = dumper->cur_idx; prev = 0; while (l > size && seq < dumper->next_seq) { - struct log *msg = log_from_idx(idx); + struct printk_log *msg = log_from_idx(idx); l -= msg_print_text(msg, prev, true, NULL, 0); idx = log_next(idx); @@ -2807,7 +2786,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, l = 0; prev = 0; while (seq < dumper->next_seq) { - struct log *msg = log_from_idx(idx); + struct printk_log *msg = log_from_idx(idx); l += msg_print_text(msg, prev, syslog, buf + l, size - l); idx = log_next(idx); diff --git a/kernel/relay.c b/kernel/relay.c index 5001c9887db1..08148fe25b13 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -339,6 +339,10 @@ static void wakeup_readers(unsigned long data) { struct rchan_buf *buf = (struct rchan_buf *)data; wake_up_interruptible(&buf->read_wait); + /* + * Stupid polling for now: + */ + mod_timer(&buf->timer, HZ / 10); } /** @@ -356,6 +360,7 @@ static void __relay_reset(struct rchan_buf *buf, unsigned int init) init_waitqueue_head(&buf->read_wait); kref_init(&buf->kref); setup_timer(&buf->timer, wakeup_readers, (unsigned long)buf); + mod_timer(&buf->timer, HZ / 10); } else del_timer_sync(&buf->timer); @@ -739,15 +744,6 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length) else buf->early_bytes += buf->chan->subbuf_size - buf->padding[old_subbuf]; - smp_mb(); - if (waitqueue_active(&buf->read_wait)) - /* - * Calling wake_up_interruptible() from here - * will deadlock if we happen to be logging - * from the scheduler (trying to re-grab - * rq->lock), so defer it. - */ - mod_timer(&buf->timer, jiffies + 1); } old = buf->data; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index bb456f44b7b1..9565645e3202 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -851,7 +851,7 @@ void task_numa_fault(int node, int pages, bool migrated) { struct task_struct *p = current; - if (!sched_feat_numa(NUMA)) + if (!numabalancing_enabled) return; /* FIXME: Allocate task-specific structure for placement policy here */ @@ -5786,7 +5786,7 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) entity_tick(cfs_rq, se, queued); } - if (sched_feat_numa(NUMA)) + if (numabalancing_enabled) task_tick_numa(rq, curr); update_rq_runnable_avg(rq, 1); diff --git a/kernel/signal.c b/kernel/signal.c index 50e41075ac77..ded28b91fa53 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -3394,7 +3394,7 @@ COMPAT_SYSCALL_DEFINE4(rt_sigaction, int, sig, new_ka.sa.sa_restorer = compat_ptr(restorer); #endif ret |= copy_from_user(&mask, &act->sa_mask, sizeof(mask)); - ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags); + ret |= get_user(new_ka.sa.sa_flags, &act->sa_flags); if (ret) return -EFAULT; sigset_from_compat(&new_ka.sa.sa_mask, &mask); @@ -3406,7 +3406,7 @@ COMPAT_SYSCALL_DEFINE4(rt_sigaction, int, sig, ret = put_user(ptr_to_compat(old_ka.sa.sa_handler), &oact->sa_handler); ret |= copy_to_user(&oact->sa_mask, &mask, sizeof(mask)); - ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags); + ret |= put_user(old_ka.sa.sa_flags, &oact->sa_flags); #ifdef __ARCH_HAS_SA_RESTORER ret |= put_user(ptr_to_compat(old_ka.sa.sa_restorer), &oact->sa_restorer); diff --git a/kernel/smp.c b/kernel/smp.c index fe9f773d7114..2a3a01784bc8 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -12,6 +12,7 @@ #include <linux/gfp.h> #include <linux/smp.h> #include <linux/cpu.h> +#include <linux/hardirq.h> #include "smpboot.h" @@ -48,10 +49,13 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) cpu_to_node(cpu))) return notifier_from_errno(-ENOMEM); if (!zalloc_cpumask_var_node(&cfd->cpumask_ipi, GFP_KERNEL, - cpu_to_node(cpu))) + cpu_to_node(cpu))) { + free_cpumask_var(cfd->cpumask); return notifier_from_errno(-ENOMEM); + } cfd->csd = alloc_percpu(struct call_single_data); if (!cfd->csd) { + free_cpumask_var(cfd->cpumask_ipi); free_cpumask_var(cfd->cpumask); return notifier_from_errno(-ENOMEM); } @@ -240,8 +244,9 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info, * send smp call function interrupt to this cpu and as such deadlocks * can't happen. */ - WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled() - && !oops_in_progress); + WARN_ON_ONCE(cpu_online(this_cpu) + && (irqs_disabled() || in_serving_irq()) + && !oops_in_progress); if (cpu == this_cpu) { local_irq_save(flags); @@ -378,8 +383,9 @@ void smp_call_function_many(const struct cpumask *mask, * send smp call function interrupt to this cpu and as such deadlocks * can't happen. */ - WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled() - && !oops_in_progress && !early_boot_irqs_disabled); + WARN_ON_ONCE(cpu_online(this_cpu) + && (irqs_disabled() || in_serving_irq()) + && !oops_in_progress && !early_boot_irqs_disabled); /* Try to fastpath. So, what's a CPU they want? Ignoring this one. */ cpu = cpumask_first_and(mask, cpu_online_mask); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 1241d8c91d5e..d24c4bbb2d7c 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -239,10 +239,12 @@ static void watchdog_overflow_callback(struct perf_event *event, if (__this_cpu_read(hard_watchdog_warn) == true) return; - if (hardlockup_panic) + if (hardlockup_panic) { + trigger_all_cpu_backtrace(); panic("Watchdog detected hard LOCKUP on cpu %d", this_cpu); - else + } else { WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu); + } __this_cpu_write(hard_watchdog_warn, true); return; @@ -323,8 +325,10 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) else dump_stack(); - if (softlockup_panic) + if (softlockup_panic) { + trigger_all_cpu_backtrace(); panic("softlockup: hung tasks"); + } __this_cpu_write(soft_watchdog_warn, true); } else __this_cpu_write(soft_watchdog_warn, false); diff --git a/mm/filemap.c b/mm/filemap.c index 4b51ac1acae7..a6981feed8e7 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -467,32 +467,34 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, error = mem_cgroup_cache_charge(page, current->mm, gfp_mask & GFP_RECLAIM_MASK); if (error) - goto out; + return error; error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM); - if (error == 0) { - page_cache_get(page); - page->mapping = mapping; - page->index = offset; - - spin_lock_irq(&mapping->tree_lock); - error = radix_tree_insert(&mapping->page_tree, offset, page); - if (likely(!error)) { - mapping->nrpages++; - __inc_zone_page_state(page, NR_FILE_PAGES); - spin_unlock_irq(&mapping->tree_lock); - trace_mm_filemap_add_to_page_cache(page); - } else { - page->mapping = NULL; - /* Leave page->index set: truncation relies upon it */ - spin_unlock_irq(&mapping->tree_lock); - mem_cgroup_uncharge_cache_page(page); - page_cache_release(page); - } - radix_tree_preload_end(); - } else + if (error) { mem_cgroup_uncharge_cache_page(page); -out: + return error; + } + + page_cache_get(page); + page->mapping = mapping; + page->index = offset; + + spin_lock_irq(&mapping->tree_lock); + error = radix_tree_insert(&mapping->page_tree, offset, page); + radix_tree_preload_end(); + if (unlikely(error)) + goto err_insert; + mapping->nrpages++; + __inc_zone_page_state(page, NR_FILE_PAGES); + spin_unlock_irq(&mapping->tree_lock); + trace_mm_filemap_add_to_page_cache(page); + return 0; +err_insert: + page->mapping = NULL; + /* Leave page->index set: truncation relies upon it */ + spin_unlock_irq(&mapping->tree_lock); + mem_cgroup_uncharge_cache_page(page); + page_cache_release(page); return error; } EXPORT_SYMBOL(add_to_page_cache_locked); @@ -637,6 +639,7 @@ void __lock_page(struct page *page) { DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); + VM_BUG_ON(PageTail(page)); __wait_on_bit_lock(page_waitqueue(page), &wait, sleep_on_page, TASK_UNINTERRUPTIBLE); } @@ -646,6 +649,7 @@ int __lock_page_killable(struct page *page) { DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); + VM_BUG_ON(PageTail(page)); return __wait_on_bit_lock(page_waitqueue(page), &wait, sleep_on_page_killable, TASK_KILLABLE); } @@ -1088,7 +1092,6 @@ static void shrink_readahead_size_eio(struct file *filp, * @filp: the file to read * @ppos: current file position * @desc: read_descriptor - * @actor: read method * * This is a generic file read routine, and uses the * mapping->a_ops->readpage() function for the actual low-level stuff. @@ -1097,7 +1100,7 @@ static void shrink_readahead_size_eio(struct file *filp, * of the logic when it comes to error handling etc. */ static void do_generic_file_read(struct file *filp, loff_t *ppos, - read_descriptor_t *desc, read_actor_t actor) + read_descriptor_t *desc) { struct address_space *mapping = filp->f_mapping; struct inode *inode = mapping->host; @@ -1198,13 +1201,14 @@ page_ok: * Ok, we have the page, and it's up-to-date, so * now we can copy it to user space... * - * The actor routine returns how many bytes were actually used.. + * The file_read_actor routine returns how many bytes were + * actually used.. * NOTE! This may not be the same as how much of a user buffer * we filled up (we may be padding etc), so we can only update * "pos" here (the actor routine has to update the user buffer * pointers and the remaining count). */ - ret = actor(desc, page, offset, nr); + ret = file_read_actor(desc, page, offset, nr); offset += ret; index += offset >> PAGE_CACHE_SHIFT; offset &= ~PAGE_CACHE_MASK; @@ -1477,7 +1481,7 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, if (desc.count == 0) continue; desc.error = 0; - do_generic_file_read(filp, ppos, &desc, file_read_actor); + do_generic_file_read(filp, ppos, &desc); retval += desc.written; if (desc.error) { retval = retval ?: desc.error; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 243e710c6039..c3b8c9c16eae 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -690,11 +690,10 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) return pmd; } -static inline pmd_t mk_huge_pmd(struct page *page, struct vm_area_struct *vma) +static inline pmd_t mk_huge_pmd(struct page *page, pgprot_t prot) { pmd_t entry; - entry = mk_pmd(page, vma->vm_page_prot); - entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); + entry = mk_pmd(page, prot); entry = pmd_mkhuge(entry); return entry; } @@ -727,7 +726,8 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, pte_free(mm, pgtable); } else { pmd_t entry; - entry = mk_huge_pmd(page, vma); + entry = mk_huge_pmd(page, vma->vm_page_prot); + entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); page_add_new_anon_rmap(page, vma, haddr); pgtable_trans_huge_deposit(mm, pmd, pgtable); set_pmd_at(mm, haddr, pmd, entry); @@ -783,77 +783,55 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, { struct page *page; unsigned long haddr = address & HPAGE_PMD_MASK; - pte_t *pte; - if (haddr >= vma->vm_start && haddr + HPAGE_PMD_SIZE <= vma->vm_end) { - if (unlikely(anon_vma_prepare(vma))) - return VM_FAULT_OOM; - if (unlikely(khugepaged_enter(vma))) + if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end) + return VM_FAULT_FALLBACK; + if (unlikely(anon_vma_prepare(vma))) + return VM_FAULT_OOM; + if (unlikely(khugepaged_enter(vma))) + return VM_FAULT_OOM; + if (!(flags & FAULT_FLAG_WRITE) && + transparent_hugepage_use_zero_page()) { + pgtable_t pgtable; + struct page *zero_page; + bool set; + pgtable = pte_alloc_one(mm, haddr); + if (unlikely(!pgtable)) return VM_FAULT_OOM; - if (!(flags & FAULT_FLAG_WRITE) && - transparent_hugepage_use_zero_page()) { - pgtable_t pgtable; - struct page *zero_page; - bool set; - pgtable = pte_alloc_one(mm, haddr); - if (unlikely(!pgtable)) - return VM_FAULT_OOM; - zero_page = get_huge_zero_page(); - if (unlikely(!zero_page)) { - pte_free(mm, pgtable); - count_vm_event(THP_FAULT_FALLBACK); - goto out; - } - spin_lock(&mm->page_table_lock); - set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd, - zero_page); - spin_unlock(&mm->page_table_lock); - if (!set) { - pte_free(mm, pgtable); - put_huge_zero_page(); - } - return 0; - } - page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), - vma, haddr, numa_node_id(), 0); - if (unlikely(!page)) { + zero_page = get_huge_zero_page(); + if (unlikely(!zero_page)) { + pte_free(mm, pgtable); count_vm_event(THP_FAULT_FALLBACK); - goto out; - } - count_vm_event(THP_FAULT_ALLOC); - if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) { - put_page(page); - goto out; + return VM_FAULT_FALLBACK; } - if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, - page))) { - mem_cgroup_uncharge_page(page); - put_page(page); - goto out; + spin_lock(&mm->page_table_lock); + set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd, + zero_page); + spin_unlock(&mm->page_table_lock); + if (!set) { + pte_free(mm, pgtable); + put_huge_zero_page(); } - return 0; } -out: - /* - * Use __pte_alloc instead of pte_alloc_map, because we can't - * run pte_offset_map on the pmd, if an huge pmd could - * materialize from under us from a different thread. - */ - if (unlikely(pmd_none(*pmd)) && - unlikely(__pte_alloc(mm, vma, pmd, address))) - return VM_FAULT_OOM; - /* if an huge pmd materialized from under us just retry later */ - if (unlikely(pmd_trans_huge(*pmd))) - return 0; - /* - * A regular pmd is established and it can't morph into a huge pmd - * from under us anymore at this point because we hold the mmap_sem - * read mode and khugepaged takes it in write mode. So now it's - * safe to run pte_offset_map(). - */ - pte = pte_offset_map(pmd, address); - return handle_pte_fault(mm, vma, address, pte, pmd, flags); + page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), + vma, haddr, numa_node_id(), 0); + if (unlikely(!page)) { + count_vm_event(THP_FAULT_FALLBACK); + return VM_FAULT_FALLBACK; + } + count_vm_event(THP_FAULT_ALLOC); + if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) { + put_page(page); + return VM_FAULT_FALLBACK; + } + if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page))) { + mem_cgroup_uncharge_page(page); + put_page(page); + return VM_FAULT_FALLBACK; + } + + return 0; } int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, @@ -1210,7 +1188,8 @@ alloc: goto out_mn; } else { pmd_t entry; - entry = mk_huge_pmd(new_page, vma); + entry = mk_huge_pmd(new_page, vma->vm_page_prot); + entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); pmdp_clear_flush(vma, haddr, pmd); page_add_new_anon_rmap(new_page, vma, haddr); set_pmd_at(mm, haddr, pmd, entry); @@ -1620,7 +1599,9 @@ static void __split_huge_page_refcount(struct page *page, ((1L << PG_referenced) | (1L << PG_swapbacked) | (1L << PG_mlocked) | - (1L << PG_uptodate))); + (1L << PG_uptodate) | + (1L << PG_active) | + (1L << PG_unevictable))); page_tail->flags |= (1L << PG_dirty); /* clear PageTail before overwriting first_page */ @@ -1659,7 +1640,6 @@ static void __split_huge_page_refcount(struct page *page, BUG_ON(atomic_read(&page->_count) <= 0); __mod_zone_page_state(zone, NR_ANON_TRANSPARENT_HUGEPAGES, -1); - __mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR); ClearPageCompound(page); compound_unlock(page); @@ -2355,7 +2335,8 @@ static void collapse_huge_page(struct mm_struct *mm, __SetPageUptodate(new_page); pgtable = pmd_pgtable(_pmd); - _pmd = mk_huge_pmd(new_page, vma); + _pmd = mk_huge_pmd(new_page, vma->vm_page_prot); + _pmd = maybe_pmd_mkwrite(pmd_mkdirty(_pmd), vma); /* * spin_lock() below is not the equivalent of smp_wmb(), so diff --git a/mm/madvise.c b/mm/madvise.c index 7055883e6e25..936799f042cc 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -42,11 +42,11 @@ static int madvise_need_mmap_write(int behavior) * We can potentially split a vm area into separate * areas, each area with its own behavior. */ -static long madvise_behavior(struct vm_area_struct * vma, +static long madvise_behavior(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, int behavior) { - struct mm_struct * mm = vma->vm_mm; + struct mm_struct *mm = vma->vm_mm; int error = 0; pgoff_t pgoff; unsigned long new_flags = vma->vm_flags; @@ -215,8 +215,8 @@ static void force_shm_swapin_readahead(struct vm_area_struct *vma, /* * Schedule all required I/O operations. Do not wait for completion. */ -static long madvise_willneed(struct vm_area_struct * vma, - struct vm_area_struct ** prev, +static long madvise_willneed(struct vm_area_struct *vma, + struct vm_area_struct **prev, unsigned long start, unsigned long end) { struct file *file = vma->vm_file; @@ -270,8 +270,8 @@ static long madvise_willneed(struct vm_area_struct * vma, * An interface that causes the system to free clean pages and flush * dirty pages is already available as msync(MS_INVALIDATE). */ -static long madvise_dontneed(struct vm_area_struct * vma, - struct vm_area_struct ** prev, +static long madvise_dontneed(struct vm_area_struct *vma, + struct vm_area_struct **prev, unsigned long start, unsigned long end) { *prev = vma; @@ -459,7 +459,7 @@ madvise_behavior_valid(int behavior) SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) { unsigned long end, tmp; - struct vm_area_struct * vma, *prev; + struct vm_area_struct *vma, *prev; int unmapped_error = 0; int error = -EINVAL; int write; diff --git a/mm/memory.c b/mm/memory.c index 1ce2e2a734fc..f2ab2a8b39b2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3693,7 +3693,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, * but allow concurrent faults), and pte mapped but not yet locked. * We return with mmap_sem still held, but pte unmapped and unlocked. */ -int handle_pte_fault(struct mm_struct *mm, +static int handle_pte_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *pte, pmd_t *pmd, unsigned int flags) { @@ -3780,9 +3780,12 @@ retry: if (!pmd) return VM_FAULT_OOM; if (pmd_none(*pmd) && transparent_hugepage_enabled(vma)) { + int ret = VM_FAULT_FALLBACK; if (!vma->vm_ops) - return do_huge_pmd_anonymous_page(mm, vma, address, - pmd, flags); + ret = do_huge_pmd_anonymous_page(mm, vma, address, + pmd, flags); + if (!(ret & VM_FAULT_FALLBACK)) + return ret; } else { pmd_t orig_pmd = *pmd; int ret; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 74310017296e..6b1d426731ae 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -732,7 +732,10 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, if (prev) { vma = prev; next = vma->vm_next; - continue; + if (mpol_equal(vma_policy(vma), new_pol)) + continue; + /* vma_merge() joined vma && vma->next, case 8 */ + goto replace; } if (vma->vm_start != vmstart) { err = split_vma(vma->vm_mm, vma, vmstart, 1); @@ -744,6 +747,7 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, if (err) goto out; } + replace: err = vma_replace_policy(vma, new_pol); if (err) goto out; @@ -2061,6 +2065,16 @@ retry_cpuset: } EXPORT_SYMBOL(alloc_pages_current); +int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst) +{ + struct mempolicy *pol = mpol_dup(vma_policy(src)); + + if (IS_ERR(pol)) + return PTR_ERR(pol); + dst->vm_policy = pol; + return 0; +} + /* * If mpol_dup() sees current->cpuset == cpuset_being_rebound, then it * rebinds the mempolicy its copying by calling mpol_rebind_policy() diff --git a/mm/mmap.c b/mm/mmap.c index fbad7b091090..e8ca6e484e52 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -865,7 +865,7 @@ again: remove_next = 1 + (end > next->vm_end); if (next->anon_vma) anon_vma_merge(vma, next); mm->map_count--; - vma_set_policy(vma, vma_policy(next)); + mpol_put(vma_policy(next)); kmem_cache_free(vm_area_cachep, next); /* * In mprotect's case 6 (see comments on vma_merge), @@ -1202,7 +1202,6 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, unsigned long *populate) { struct mm_struct * mm = current->mm; - struct inode *inode; vm_flags_t vm_flags; *populate = 0; @@ -1265,9 +1264,9 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, return -EAGAIN; } - inode = file ? file_inode(file) : NULL; - if (file) { + struct inode *inode = file_inode(file); + switch (flags & MAP_TYPE) { case MAP_SHARED: if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE)) @@ -1327,6 +1326,9 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, } } + /* Only MAP_PRIVATE|MAP_ANONYMOUS can use MAP_GROWS */ + if ((vm_flags & VM_MAYSHARE) && (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))) + return -EINVAL; /* * Set 'VM_NORESERVE' if we should not account for the * memory use of this mapping. @@ -1476,11 +1478,9 @@ unsigned long mmap_region(struct file *file, unsigned long addr, { struct mm_struct *mm = current->mm; struct vm_area_struct *vma, *prev; - int correct_wcount = 0; int error; struct rb_node **rb_link, *rb_parent; unsigned long charged = 0; - struct inode *inode = file ? file_inode(file) : NULL; /* Check against address space limit. */ if (!may_expand_vm(mm, len >> PAGE_SHIFT)) { @@ -1544,16 +1544,11 @@ munmap_back: vma->vm_pgoff = pgoff; INIT_LIST_HEAD(&vma->anon_vma_chain); - error = -EINVAL; /* when rejecting VM_GROWSDOWN|VM_GROWSUP */ - if (file) { - if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP)) - goto free_vma; if (vm_flags & VM_DENYWRITE) { error = deny_write_access(file); if (error) goto free_vma; - correct_wcount = 1; } vma->vm_file = get_file(file); error = file->f_op->mmap(file, vma); @@ -1573,8 +1568,6 @@ munmap_back: pgoff = vma->vm_pgoff; vm_flags = vma->vm_flags; } else if (vm_flags & VM_SHARED) { - if (unlikely(vm_flags & (VM_GROWSDOWN|VM_GROWSUP))) - goto free_vma; error = shmem_zero_setup(vma); if (error) goto free_vma; @@ -1596,11 +1589,10 @@ munmap_back: } vma_link(mm, vma, prev, rb_link, rb_parent); - file = vma->vm_file; - /* Once vma denies write, undo our temporary denial count */ - if (correct_wcount) - atomic_inc(&inode->i_writecount); + if (vm_flags & VM_DENYWRITE) + allow_write_access(file); + file = vma->vm_file; out: perf_event_mmap(vma); @@ -1619,8 +1611,8 @@ out: return addr; unmap_and_free_vma: - if (correct_wcount) - atomic_inc(&inode->i_writecount); + if (vm_flags & VM_DENYWRITE) + allow_write_access(file); vma->vm_file = NULL; fput(file); @@ -2380,7 +2372,6 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma, static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long addr, int new_below) { - struct mempolicy *pol; struct vm_area_struct *new; int err = -ENOMEM; @@ -2404,12 +2395,9 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma, new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT); } - pol = mpol_dup(vma_policy(vma)); - if (IS_ERR(pol)) { - err = PTR_ERR(pol); + err = vma_dup_policy(vma, new); + if (err) goto out_free_vma; - } - vma_set_policy(new, pol); if (anon_vma_clone(new, vma)) goto out_free_mpol; @@ -2437,7 +2425,7 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma, fput(new->vm_file); unlink_anon_vmas(new); out_free_mpol: - mpol_put(pol); + mpol_put(vma_policy(new)); out_free_vma: kmem_cache_free(vm_area_cachep, new); out_err: @@ -2780,7 +2768,6 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *new_vma, *prev; struct rb_node **rb_link, *rb_parent; - struct mempolicy *pol; bool faulted_in_anon_vma = true; /* @@ -2825,10 +2812,8 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, new_vma->vm_start = addr; new_vma->vm_end = addr + len; new_vma->vm_pgoff = pgoff; - pol = mpol_dup(vma_policy(vma)); - if (IS_ERR(pol)) + if (vma_dup_policy(vma, new_vma)) goto out_free_vma; - vma_set_policy(new_vma, pol); INIT_LIST_HEAD(&new_vma->anon_vma_chain); if (anon_vma_clone(new_vma, vma)) goto out_free_mempol; @@ -2843,7 +2828,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, return new_vma; out_free_mempol: - mpol_put(pol); + mpol_put(vma_policy(new_vma)); out_free_vma: kmem_cache_free(vm_area_cachep, new_vma); return NULL; diff --git a/mm/rmap.c b/mm/rmap.c index cd356df4f71a..706647014cc4 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1055,11 +1055,11 @@ void do_page_add_anon_rmap(struct page *page, { int first = atomic_inc_and_test(&page->_mapcount); if (first) { - if (!PageTransHuge(page)) - __inc_zone_page_state(page, NR_ANON_PAGES); - else + if (PageTransHuge(page)) __inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); + __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, + hpage_nr_pages(page)); } if (unlikely(PageKsm(page))) return; @@ -1088,10 +1088,10 @@ void page_add_new_anon_rmap(struct page *page, VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end); SetPageSwapBacked(page); atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */ - if (!PageTransHuge(page)) - __inc_zone_page_state(page, NR_ANON_PAGES); - else + if (PageTransHuge(page)) __inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); + __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, + hpage_nr_pages(page)); __page_set_anon_rmap(page, vma, address, 1); if (!mlocked_vma_newpage(vma, page)) { SetPageActive(page); @@ -1151,11 +1151,11 @@ void page_remove_rmap(struct page *page) goto out; if (anon) { mem_cgroup_uncharge_page(page); - if (!PageTransHuge(page)) - __dec_zone_page_state(page, NR_ANON_PAGES); - else + if (PageTransHuge(page)) __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); + __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, + hpage_nr_pages(page)); } else { __dec_zone_page_state(page, NR_FILE_MAPPED); mem_cgroup_dec_page_stat(page, MEMCG_NR_FILE_MAPPED); diff --git a/mm/shmem.c b/mm/shmem.c index a87990cf9f94..eb6ff2f37de0 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1464,7 +1464,7 @@ shmem_write_end(struct file *file, struct address_space *mapping, return copied; } -static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc, read_actor_t actor) +static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc) { struct inode *inode = file_inode(filp); struct address_space *mapping = inode->i_mapping; @@ -1546,13 +1546,14 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_ * Ok, we have the page, and it's up-to-date, so * now we can copy it to user space... * - * The actor routine returns how many bytes were actually used.. + * The file_read_actor routine returns how many bytes were actually + * used.. * NOTE! This may not be the same as how much of a user buffer * we filled up (we may be padding etc), so we can only update - * "pos" here (the actor routine has to update the user buffer + * "pos" here (file_read_actor has to update the user buffer * pointers and the remaining count). */ - ret = actor(desc, page, offset, nr); + ret = file_read_actor(desc, page, offset, nr); offset += ret; index += offset >> PAGE_CACHE_SHIFT; offset &= ~PAGE_CACHE_MASK; @@ -1590,7 +1591,7 @@ static ssize_t shmem_file_aio_read(struct kiocb *iocb, if (desc.count == 0) continue; desc.error = 0; - do_shmem_file_read(filp, ppos, &desc, file_read_actor); + do_shmem_file_read(filp, ppos, &desc); retval += desc.written; if (desc.error) { retval = retval ?: desc.error; diff --git a/mm/swap.c b/mm/swap.c index 4a1d0d2c52fa..62b78a6e224f 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -512,12 +512,7 @@ EXPORT_SYMBOL(__lru_cache_add); */ void lru_cache_add(struct page *page) { - if (PageActive(page)) { - VM_BUG_ON(PageUnevictable(page)); - } else if (PageUnevictable(page)) { - VM_BUG_ON(PageActive(page)); - } - + VM_BUG_ON(PageActive(page) && PageUnevictable(page)); VM_BUG_ON(PageLRU(page)); __lru_cache_add(page); } @@ -539,6 +534,7 @@ void add_page_to_unevictable_list(struct page *page) spin_lock_irq(&zone->lru_lock); lruvec = mem_cgroup_page_lruvec(page, zone); + ClearPageActive(page); SetPageUnevictable(page); SetPageLRU(page); add_page_to_lru_list(page, lruvec, LRU_UNEVICTABLE); @@ -774,8 +770,6 @@ EXPORT_SYMBOL(__pagevec_release); void lru_add_page_tail(struct page *page, struct page *page_tail, struct lruvec *lruvec, struct list_head *list) { - int uninitialized_var(active); - enum lru_list lru; const int file = 0; VM_BUG_ON(!PageHead(page)); @@ -787,20 +781,6 @@ void lru_add_page_tail(struct page *page, struct page *page_tail, if (!list) SetPageLRU(page_tail); - if (page_evictable(page_tail)) { - if (PageActive(page)) { - SetPageActive(page_tail); - active = 1; - lru = LRU_ACTIVE_ANON; - } else { - active = 0; - lru = LRU_INACTIVE_ANON; - } - } else { - SetPageUnevictable(page_tail); - lru = LRU_UNEVICTABLE; - } - if (likely(PageLRU(page))) list_add_tail(&page_tail->lru, &page->lru); else if (list) { @@ -816,13 +796,13 @@ void lru_add_page_tail(struct page *page, struct page *page_tail, * Use the standard add function to put page_tail on the list, * but then correct its position so they all end up in order. */ - add_page_to_lru_list(page_tail, lruvec, lru); + add_page_to_lru_list(page_tail, lruvec, page_lru(page_tail)); list_head = page_tail->lru.prev; list_move_tail(&page_tail->lru, list_head); } if (!PageUnevictable(page)) - update_page_reclaim_stat(lruvec, file, active); + update_page_reclaim_stat(lruvec, file, PageActive(page_tail)); } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ @@ -833,7 +813,6 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec, int active = PageActive(page); enum lru_list lru = page_lru(page); - VM_BUG_ON(PageUnevictable(page)); VM_BUG_ON(PageLRU(page)); SetPageLRU(page); diff --git a/mm/swap_state.c b/mm/swap_state.c index f24ab0dff554..9a22fc65ffa4 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -63,6 +63,8 @@ unsigned long total_swapcache_pages(void) return ret; } +static atomic_t swapin_readahead_hits = ATOMIC_INIT(4); + void show_swap_cache_info(void) { printk("%lu pages in swap cache\n", total_swapcache_pages()); @@ -286,8 +288,11 @@ struct page * lookup_swap_cache(swp_entry_t entry) page = find_get_page(swap_address_space(entry), entry.val); - if (page) + if (page) { INC_CACHE_INFO(find_success); + if (TestClearPageReadahead(page)) + atomic_inc(&swapin_readahead_hits); + } INC_CACHE_INFO(find_total); return page; @@ -389,6 +394,50 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, return found_page; } +static unsigned long swapin_nr_pages(unsigned long offset) +{ + static unsigned long prev_offset; + unsigned int pages, max_pages, last_ra; + static atomic_t last_readahead_pages; + + max_pages = 1 << ACCESS_ONCE(page_cluster); + if (max_pages <= 1) + return 1; + + /* + * This heuristic has been found to work well on both sequential and + * random loads, swapping to hard disk or to SSD: please don't ask + * what the "+ 2" means, it just happens to work well, that's all. + */ + pages = atomic_xchg(&swapin_readahead_hits, 0) + 2; + if (pages == 2) { + /* + * We can have no readahead hits to judge by: but must not get + * stuck here forever, so check for an adjacent offset instead + * (and don't even bother to check whether swap type is same). + */ + if (offset != prev_offset + 1 && offset != prev_offset - 1) + pages = 1; + prev_offset = offset; + } else { + unsigned int roundup = 4; + while (roundup < pages) + roundup <<= 1; + pages = roundup; + } + + if (pages > max_pages) + pages = max_pages; + + /* Don't shrink readahead too fast */ + last_ra = atomic_read(&last_readahead_pages) / 2; + if (pages < last_ra) + pages = last_ra; + atomic_set(&last_readahead_pages, pages); + + return pages; +} + /** * swapin_readahead - swap in pages in hope we need them soon * @entry: swap entry of this memory @@ -412,11 +461,16 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, struct vm_area_struct *vma, unsigned long addr) { struct page *page; - unsigned long offset = swp_offset(entry); + unsigned long entry_offset = swp_offset(entry); + unsigned long offset = entry_offset; unsigned long start_offset, end_offset; - unsigned long mask = (1UL << page_cluster) - 1; + unsigned long mask; struct blk_plug plug; + mask = swapin_nr_pages(offset) - 1; + if (!mask) + goto skip; + /* Read a page_cluster sized and aligned cluster around offset. */ start_offset = offset & ~mask; end_offset = offset | mask; @@ -430,10 +484,13 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, gfp_mask, vma, addr); if (!page) continue; + if (offset != entry_offset) + SetPageReadahead(page); page_cache_release(page); } blk_finish_plug(&plug); lru_add_drain(); /* Push any new pages onto the LRU now */ +skip: return read_swap_cache_async(entry, gfp_mask, vma, addr); } diff --git a/mm/swapfile.c b/mm/swapfile.c index 36af6eeaa67e..4383db8ba8d0 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -527,16 +527,16 @@ static struct swap_info_struct *swap_info_get(swp_entry_t entry) return p; bad_free: - printk(KERN_ERR "swap_free: %s%08lx\n", Unused_offset, entry.val); + pr_err("swap_free: %s%08lx\n", Unused_offset, entry.val); goto out; bad_offset: - printk(KERN_ERR "swap_free: %s%08lx\n", Bad_offset, entry.val); + pr_err("swap_free: %s%08lx\n", Bad_offset, entry.val); goto out; bad_device: - printk(KERN_ERR "swap_free: %s%08lx\n", Unused_file, entry.val); + pr_err("swap_free: %s%08lx\n", Unused_file, entry.val); goto out; bad_nofile: - printk(KERN_ERR "swap_free: %s%08lx\n", Bad_file, entry.val); + pr_err("swap_free: %s%08lx\n", Bad_file, entry.val); out: return NULL; } @@ -1911,9 +1911,10 @@ static unsigned long read_swap_header(struct swap_info_struct *p, int i; unsigned long maxpages; unsigned long swapfilepages; + unsigned long last_page; if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) { - printk(KERN_ERR "Unable to find swap-space signature\n"); + pr_err("Unable to find swap-space signature\n"); return 0; } @@ -1927,9 +1928,8 @@ static unsigned long read_swap_header(struct swap_info_struct *p, } /* Check the swap header's sub-version */ if (swap_header->info.version != 1) { - printk(KERN_WARNING - "Unable to handle swap header version %d\n", - swap_header->info.version); + pr_warn("Unable to handle swap header version %d\n", + swap_header->info.version); return 0; } @@ -1953,8 +1953,14 @@ static unsigned long read_swap_header(struct swap_info_struct *p, */ maxpages = swp_offset(pte_to_swp_entry( swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1; - if (maxpages > swap_header->info.last_page) { - maxpages = swap_header->info.last_page + 1; + last_page = swap_header->info.last_page; + if (last_page > maxpages) { + pr_warn("Truncating oversized swap area, only using %luk out of %luk\n", + maxpages << (PAGE_SHIFT - 10), + last_page << (PAGE_SHIFT - 10)); + } + if (maxpages > last_page) { + maxpages = last_page + 1; /* p->max is an unsigned int: don't overflow it */ if ((unsigned int)maxpages == 0) maxpages = UINT_MAX; @@ -1965,8 +1971,7 @@ static unsigned long read_swap_header(struct swap_info_struct *p, return 0; swapfilepages = i_size_read(inode) >> PAGE_SHIFT; if (swapfilepages && maxpages > swapfilepages) { - printk(KERN_WARNING - "Swap area shorter than signature indicates\n"); + pr_warn("Swap area shorter than signature indicates\n"); return 0; } if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode)) @@ -2009,7 +2014,7 @@ static int setup_swap_map_and_extents(struct swap_info_struct *p, nr_good_pages = p->pages; } if (!nr_good_pages) { - printk(KERN_WARNING "Empty swap-file\n"); + pr_warn("Empty swap-file\n"); return -EINVAL; } @@ -2163,8 +2168,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) if (p->flags & SWP_AREA_DISCARD) { int err = discard_swap(p); if (unlikely(err)) - printk(KERN_ERR - "swapon: discard_swap(%p): %d\n", + pr_err("swapon: discard_swap(%p): %d\n", p, err); } } @@ -2177,7 +2181,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT; enable_swap_info(p, prio, swap_map, frontswap_map); - printk(KERN_INFO "Adding %uk swap on %s. " + pr_info("Adding %uk swap on %s. " "Priority:%d extents:%d across:%lluk %s%s%s%s%s\n", p->pages<<(PAGE_SHIFT-10), name->name, p->prio, nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10), @@ -2311,7 +2315,7 @@ out: return err; bad_file: - printk(KERN_ERR "swap_dup: %s%08lx\n", Bad_file, entry.val); + pr_err("swap_dup: %s%08lx\n", Bad_file, entry.val); goto out; } diff --git a/mm/vmstat.c b/mm/vmstat.c index 20c2ef4458fa..00382c53f582 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -817,6 +817,11 @@ const char * const vmstat_text[] = { "thp_zero_page_alloc", "thp_zero_page_alloc_failed", #endif + "nr_tlb_remote_flush", + "nr_tlb_remote_flush_received", + "nr_tlb_local_flush_all", + "nr_tlb_local_flush_one", + "nr_tlb_local_flush_one_kernel", #endif /* CONFIG_VM_EVENTS_COUNTERS */ }; diff --git a/mm/zbud.c b/mm/zbud.c index 9bb4710e3589..ad1e781284fd 100644 --- a/mm/zbud.c +++ b/mm/zbud.c @@ -257,7 +257,7 @@ int zbud_alloc(struct zbud_pool *pool, int size, gfp_t gfp, if (size <= 0 || gfp & __GFP_HIGHMEM) return -EINVAL; - if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED) + if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE) return -ENOSPC; chunks = size_to_chunks(size); spin_lock(&pool->lock); diff --git a/mm/zswap.c b/mm/zswap.c index deda2b671e12..efed4c8b7f5b 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -409,7 +409,7 @@ static int zswap_get_swap_cache_page(swp_entry_t entry, struct page **retpage) { struct page *found_page, *new_page = NULL; - struct address_space *swapper_space = &swapper_spaces[swp_type(entry)]; + struct address_space *swapper_space = swap_address_space(entry); int err; *retpage = NULL; diff --git a/net/socket.c b/net/socket.c index 829b460acb87..c64d03f1772c 100644 --- a/net/socket.c +++ b/net/socket.c @@ -3072,12 +3072,12 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd, uifmap32 = &uifr32->ifr_ifru.ifru_map; err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name)); - err |= __get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); - err |= __get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); - err |= __get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); - err |= __get_user(ifr.ifr_map.irq, &uifmap32->irq); - err |= __get_user(ifr.ifr_map.dma, &uifmap32->dma); - err |= __get_user(ifr.ifr_map.port, &uifmap32->port); + err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); + err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); + err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); + err |= get_user(ifr.ifr_map.irq, &uifmap32->irq); + err |= get_user(ifr.ifr_map.dma, &uifmap32->dma); + err |= get_user(ifr.ifr_map.port, &uifmap32->port); if (err) return -EFAULT; @@ -3088,12 +3088,12 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd, if (cmd == SIOCGIFMAP && !err) { err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name)); - err |= __put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); - err |= __put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); - err |= __put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); - err |= __put_user(ifr.ifr_map.irq, &uifmap32->irq); - err |= __put_user(ifr.ifr_map.dma, &uifmap32->dma); - err |= __put_user(ifr.ifr_map.port, &uifmap32->port); + err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); + err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); + err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); + err |= put_user(ifr.ifr_map.irq, &uifmap32->irq); + err |= put_user(ifr.ifr_map.dma, &uifmap32->dma); + err |= put_user(ifr.ifr_map.port, &uifmap32->port); if (err) err = -EFAULT; } @@ -3167,25 +3167,25 @@ static int routing_ioctl(struct net *net, struct socket *sock, struct in6_rtmsg32 __user *ur6 = argp; ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst), 3 * sizeof(struct in6_addr)); - ret |= __get_user(r6.rtmsg_type, &(ur6->rtmsg_type)); - ret |= __get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len)); - ret |= __get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len)); - ret |= __get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric)); - ret |= __get_user(r6.rtmsg_info, &(ur6->rtmsg_info)); - ret |= __get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags)); - ret |= __get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex)); + ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type)); + ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len)); + ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len)); + ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric)); + ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info)); + ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags)); + ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex)); r = (void *) &r6; } else { /* ipv4 */ struct rtentry32 __user *ur4 = argp; ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst), 3 * sizeof(struct sockaddr)); - ret |= __get_user(r4.rt_flags, &(ur4->rt_flags)); - ret |= __get_user(r4.rt_metric, &(ur4->rt_metric)); - ret |= __get_user(r4.rt_mtu, &(ur4->rt_mtu)); - ret |= __get_user(r4.rt_window, &(ur4->rt_window)); - ret |= __get_user(r4.rt_irtt, &(ur4->rt_irtt)); - ret |= __get_user(rtdev, &(ur4->rt_dev)); + ret |= get_user(r4.rt_flags, &(ur4->rt_flags)); + ret |= get_user(r4.rt_metric, &(ur4->rt_metric)); + ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu)); + ret |= get_user(r4.rt_window, &(ur4->rt_window)); + ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt)); + ret |= get_user(rtdev, &(ur4->rt_dev)); if (rtdev) { ret |= copy_from_user(devname, compat_ptr(rtdev), 15); r4.rt_dev = (char __user __force *)devname; |