summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2015-01-08 15:31:13 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2015-01-08 15:31:13 +1100
commit7c6984c6bbd5b2d4bd13e964443f8948a5ffeb5b (patch)
treeb0a065b1aff42b2eb72cc83699ce46423cda9f7b
parentb4b04f8e6705143e5e17b2d29cc48fcfdcba2321 (diff)
parent1d9e76e2eb3f743709e3500fad0c456d05b3aea2 (diff)
Merge branch 'akpm-current/current'
-rw-r--r--.mailmap1
-rw-r--r--Documentation/cachetlb.txt8
-rw-r--r--Documentation/devicetree/bindings/rtc/isil,isl12057.txt78
-rw-r--r--Documentation/devicetree/bindings/rtc/nxp,rtc-2123.txt16
-rw-r--r--Documentation/filesystems/fiemap.txt3
-rw-r--r--Documentation/filesystems/ocfs2.txt4
-rw-r--r--Documentation/filesystems/proc.txt6
-rw-r--r--Documentation/filesystems/vfat.txt10
-rw-r--r--Documentation/leds/leds-class.txt3
-rw-r--r--Documentation/printk-formats.txt6
-rw-r--r--Documentation/vm/pagemap.txt8
-rw-r--r--Documentation/vm/remap_file_pages.txt7
-rw-r--r--MAINTAINERS12
-rw-r--r--arch/alpha/include/asm/pgtable.h7
-rw-r--r--arch/alpha/include/asm/thread_info.h5
-rw-r--r--arch/alpha/include/uapi/asm/mman.h1
-rw-r--r--arch/alpha/kernel/signal.c2
-rw-r--r--arch/arc/include/asm/pgtable.h13
-rw-r--r--arch/arc/include/asm/thread_info.h4
-rw-r--r--arch/arc/kernel/signal.c2
-rw-r--r--arch/arm/boot/dts/armada-370-netgear-rn102.dts1
-rw-r--r--arch/arm/boot/dts/armada-370-netgear-rn104.dts1
-rw-r--r--arch/arm/boot/dts/armada-xp-netgear-rn2120.dts1
-rw-r--r--arch/arm/include/asm/pgtable-2level.h1
-rw-r--r--arch/arm/include/asm/pgtable-3level.h2
-rw-r--r--arch/arm/include/asm/pgtable-nommu.h2
-rw-r--r--arch/arm/include/asm/pgtable.h20
-rw-r--r--arch/arm/include/asm/thread_info.h4
-rw-r--r--arch/arm/kernel/signal.c4
-rw-r--r--arch/arm/mm/proc-macros.S2
-rw-r--r--arch/arm64/include/asm/pgtable.h24
-rw-r--r--arch/arm64/include/asm/thread_info.h4
-rw-r--r--arch/arm64/kernel/signal.c2
-rw-r--r--arch/arm64/kernel/signal32.c4
-rw-r--r--arch/avr32/include/asm/pgtable.h25
-rw-r--r--arch/avr32/include/asm/thread_info.h4
-rw-r--r--arch/avr32/kernel/asm-offsets.c1
-rw-r--r--arch/avr32/kernel/signal.c2
-rw-r--r--arch/blackfin/include/asm/pgtable.h5
-rw-r--r--arch/blackfin/include/asm/thread_info.h4
-rw-r--r--arch/blackfin/kernel/signal.c2
-rw-r--r--arch/blackfin/mach-bf533/boards/stamp.c1
-rw-r--r--arch/c6x/include/asm/pgtable.h5
-rw-r--r--arch/c6x/include/asm/thread_info.h4
-rw-r--r--arch/c6x/kernel/signal.c2
-rw-r--r--arch/cris/arch-v10/kernel/signal.c2
-rw-r--r--arch/cris/arch-v32/kernel/signal.c2
-rw-r--r--arch/cris/include/arch-v10/arch/mmu.h3
-rw-r--r--arch/cris/include/arch-v32/arch/mmu.h3
-rw-r--r--arch/cris/include/asm/pgtable.h4
-rw-r--r--arch/cris/include/asm/thread_info.h4
-rw-r--r--arch/frv/include/asm/pgtable.h27
-rw-r--r--arch/frv/include/asm/string.h1
-rw-r--r--arch/frv/include/asm/thread_info.h4
-rw-r--r--arch/frv/kernel/asm-offsets.c1
-rw-r--r--arch/frv/kernel/signal.c2
-rw-r--r--arch/frv/mm/extable.c23
-rw-r--r--arch/hexagon/include/asm/pgtable.h60
-rw-r--r--arch/hexagon/include/asm/thread_info.h4
-rw-r--r--arch/hexagon/kernel/signal.c2
-rw-r--r--arch/ia64/include/asm/pgtable.h25
-rw-r--r--arch/ia64/include/asm/thread_info.h4
-rw-r--r--arch/ia64/kernel/signal.c2
-rw-r--r--arch/m32r/include/asm/pgtable-2level.h4
-rw-r--r--arch/m32r/include/asm/pgtable.h11
-rw-r--r--arch/m32r/include/asm/thread_info.h5
-rw-r--r--arch/m32r/kernel/signal.c2
-rw-r--r--arch/m68k/include/asm/mcf_pgtable.h23
-rw-r--r--arch/m68k/include/asm/motorola_pgtable.h15
-rw-r--r--arch/m68k/include/asm/pgtable_no.h2
-rw-r--r--arch/m68k/include/asm/sun3_pgtable.h15
-rw-r--r--arch/m68k/include/asm/thread_info.h4
-rw-r--r--arch/m68k/kernel/signal.c4
-rw-r--r--arch/metag/include/asm/pgtable.h6
-rw-r--r--arch/metag/include/asm/thread_info.h6
-rw-r--r--arch/metag/kernel/signal.c2
-rw-r--r--arch/microblaze/include/asm/pgtable.h11
-rw-r--r--arch/microblaze/include/asm/thread_info.h4
-rw-r--r--arch/microblaze/kernel/signal.c2
-rw-r--r--arch/mips/include/asm/pgtable-32.h36
-rw-r--r--arch/mips/include/asm/pgtable-64.h9
-rw-r--r--arch/mips/include/asm/pgtable-bits.h9
-rw-r--r--arch/mips/include/asm/pgtable.h2
-rw-r--r--arch/mips/include/asm/thread_info.h4
-rw-r--r--arch/mips/include/uapi/asm/mman.h1
-rw-r--r--arch/mips/kernel/asm-offsets.c1
-rw-r--r--arch/mips/kernel/signal.c2
-rw-r--r--arch/mips/kernel/signal32.c2
-rw-r--r--arch/mn10300/include/asm/pgtable.h17
-rw-r--r--arch/mn10300/include/asm/thread_info.h4
-rw-r--r--arch/mn10300/kernel/asm-offsets.c1
-rw-r--r--arch/mn10300/kernel/signal.c2
-rw-r--r--arch/nios2/include/asm/pgtable-bits.h1
-rw-r--r--arch/nios2/include/asm/pgtable.h10
-rw-r--r--arch/openrisc/include/asm/pgtable.h8
-rw-r--r--arch/openrisc/include/asm/thread_info.h4
-rw-r--r--arch/openrisc/kernel/head.S5
-rw-r--r--arch/openrisc/kernel/signal.c2
-rw-r--r--arch/parisc/include/asm/pgtable.h10
-rw-r--r--arch/parisc/include/asm/thread_info.h4
-rw-r--r--arch/parisc/include/uapi/asm/mman.h1
-rw-r--r--arch/parisc/kernel/signal.c2
-rw-r--r--arch/powerpc/include/asm/pgtable-ppc32.h9
-rw-r--r--arch/powerpc/include/asm/pgtable-ppc64.h7
-rw-r--r--arch/powerpc/include/asm/pgtable.h1
-rw-r--r--arch/powerpc/include/asm/pte-40x.h1
-rw-r--r--arch/powerpc/include/asm/pte-44x.h5
-rw-r--r--arch/powerpc/include/asm/pte-8xx.h1
-rw-r--r--arch/powerpc/include/asm/pte-book3e.h1
-rw-r--r--arch/powerpc/include/asm/pte-fsl-booke.h3
-rw-r--r--arch/powerpc/include/asm/pte-hash32.h1
-rw-r--r--arch/powerpc/include/asm/pte-hash64.h1
-rw-r--r--arch/powerpc/include/asm/thread_info.h4
-rw-r--r--arch/powerpc/kernel/signal_32.c4
-rw-r--r--arch/powerpc/kernel/signal_64.c2
-rw-r--r--arch/powerpc/mm/pgtable_64.c2
-rw-r--r--arch/s390/include/asm/pgtable.h29
-rw-r--r--arch/s390/include/asm/string.h1
-rw-r--r--arch/s390/include/asm/thread_info.h4
-rw-r--r--arch/s390/kernel/compat_signal.c2
-rw-r--r--arch/s390/kernel/signal.c2
-rw-r--r--arch/score/include/asm/pgtable-bits.h1
-rw-r--r--arch/score/include/asm/pgtable.h18
-rw-r--r--arch/score/include/asm/thread_info.h4
-rw-r--r--arch/score/kernel/asm-offsets.c1
-rw-r--r--arch/score/kernel/signal.c2
-rw-r--r--arch/sh/include/asm/pgtable_32.h30
-rw-r--r--arch/sh/include/asm/pgtable_64.h9
-rw-r--r--arch/sh/include/asm/thread_info.h4
-rw-r--r--arch/sh/kernel/asm-offsets.c1
-rw-r--r--arch/sh/kernel/signal_32.c4
-rw-r--r--arch/sh/kernel/signal_64.c4
-rw-r--r--arch/sparc/include/asm/pgtable_32.h24
-rw-r--r--arch/sparc/include/asm/pgtable_64.h49
-rw-r--r--arch/sparc/include/asm/pgtsrmmu.h14
-rw-r--r--arch/sparc/include/asm/thread_info_32.h6
-rw-r--r--arch/sparc/include/asm/thread_info_64.h12
-rw-r--r--arch/sparc/kernel/signal32.c4
-rw-r--r--arch/sparc/kernel/signal_32.c2
-rw-r--r--arch/sparc/kernel/signal_64.c2
-rw-r--r--arch/sparc/kernel/traps_64.c2
-rw-r--r--arch/tile/include/asm/pgtable.h11
-rw-r--r--arch/tile/include/asm/thread_info.h4
-rw-r--r--arch/tile/kernel/signal.c2
-rw-r--r--arch/tile/mm/homecache.c4
-rw-r--r--arch/um/include/asm/pgtable-2level.h9
-rw-r--r--arch/um/include/asm/pgtable-3level.h20
-rw-r--r--arch/um/include/asm/pgtable.h9
-rw-r--r--arch/um/include/asm/thread_info.h4
-rw-r--r--arch/unicore32/include/asm/pgtable-hwdef.h1
-rw-r--r--arch/unicore32/include/asm/pgtable.h14
-rw-r--r--arch/unicore32/include/asm/thread_info.h4
-rw-r--r--arch/unicore32/kernel/signal.c2
-rw-r--r--arch/x86/ia32/ia32_signal.c2
-rw-r--r--arch/x86/include/asm/pgtable-2level.h38
-rw-r--r--arch/x86/include/asm/pgtable-3level.h12
-rw-r--r--arch/x86/include/asm/pgtable.h25
-rw-r--r--arch/x86/include/asm/pgtable_64.h6
-rw-r--r--arch/x86/include/asm/pgtable_types.h3
-rw-r--r--arch/x86/include/asm/thread_info.h4
-rw-r--r--arch/x86/kernel/machine_kexec_64.c1
-rw-r--r--arch/x86/kernel/signal.c2
-rw-r--r--arch/x86/um/signal.c2
-rw-r--r--arch/xtensa/include/asm/pgtable.h10
-rw-r--r--arch/xtensa/include/asm/thread_info.h5
-rw-r--r--arch/xtensa/include/uapi/asm/mman.h1
-rw-r--r--arch/xtensa/kernel/signal.c2
-rw-r--r--block/genhd.c2
-rw-r--r--drivers/block/zram/zram_drv.c22
-rw-r--r--drivers/char/Kconfig9
-rw-r--r--drivers/char/mem.c18
-rw-r--r--drivers/gpu/drm/drm_vma_manager.c3
-rw-r--r--drivers/input/Kconfig9
-rw-r--r--drivers/input/Makefile3
-rw-r--r--drivers/input/input.c9
-rw-r--r--drivers/input/leds.c272
-rw-r--r--drivers/leds/Kconfig3
-rw-r--r--drivers/misc/ti-st/st_core.c2
-rw-r--r--drivers/rtc/class.c24
-rw-r--r--drivers/rtc/interface.c13
-rw-r--r--drivers/rtc/rtc-at91sam9.c2
-rw-r--r--drivers/rtc/rtc-imxdi.c50
-rw-r--r--drivers/rtc/rtc-isl12057.c348
-rw-r--r--drivers/rtc/rtc-pcf2123.c10
-rw-r--r--drivers/tty/Kconfig4
-rw-r--r--drivers/tty/vt/keyboard.c110
-rw-r--r--fs/9p/vfs_file.c2
-rw-r--r--fs/affs/amigaffs.c6
-rw-r--r--fs/affs/dir.c8
-rw-r--r--fs/affs/file.c42
-rw-r--r--fs/affs/inode.c5
-rw-r--r--fs/affs/namei.c18
-rw-r--r--fs/befs/linuxvfs.c6
-rw-r--r--fs/btrfs/file.c1
-rw-r--r--fs/ceph/addr.c1
-rw-r--r--fs/cifs/file.c1
-rw-r--r--fs/coda/dir.c138
-rw-r--r--fs/ext4/file.c1
-rw-r--r--fs/ext4/fsync.c5
-rw-r--r--fs/f2fs/file.c1
-rw-r--r--fs/fat/cache.c79
-rw-r--r--fs/fat/dir.c2
-rw-r--r--fs/fat/fat.h6
-rw-r--r--fs/fat/file.c61
-rw-r--r--fs/fat/inode.c77
-rw-r--r--fs/fcntl.c5
-rw-r--r--fs/fuse/file.c1
-rw-r--r--fs/gfs2/file.c1
-rw-r--r--fs/inode.c1
-rw-r--r--fs/ioctl.c5
-rw-r--r--fs/jffs2/scan.c5
-rw-r--r--fs/mpage.c23
-rw-r--r--fs/nfs/file.c1
-rw-r--r--fs/nilfs2/file.c1
-rw-r--r--fs/notify/fanotify/fanotify.c2
-rw-r--r--fs/notify/fanotify/fanotify_user.c35
-rw-r--r--fs/ocfs2/acl.c14
-rw-r--r--fs/ocfs2/aops.c258
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h12
-rw-r--r--fs/ocfs2/dlm/dlmast.c6
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c10
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c12
-rw-r--r--fs/ocfs2/dlm/dlmthread.c10
-rw-r--r--fs/ocfs2/file.c110
-rw-r--r--fs/ocfs2/file.h9
-rw-r--r--fs/ocfs2/inode.h2
-rw-r--r--fs/ocfs2/journal.c115
-rw-r--r--fs/ocfs2/journal.h5
-rw-r--r--fs/ocfs2/mmap.c1
-rw-r--r--fs/ocfs2/namei.c391
-rw-r--r--fs/ocfs2/namei.h5
-rw-r--r--fs/ocfs2/ocfs2.h17
-rw-r--r--fs/ocfs2/ocfs2_fs.h5
-rw-r--r--fs/ocfs2/ocfs2_trace.h3
-rw-r--r--fs/ocfs2/super.c19
-rw-r--r--fs/proc/generic.c2
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/proc/meminfo.c2
-rw-r--r--fs/proc/page.c16
-rw-r--r--fs/proc/task_mmu.c32
-rw-r--r--fs/proc/vmcore.c8
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--fs/select.c2
-rw-r--r--fs/ubifs/file.c1
-rw-r--r--fs/ufs/super.c4
-rw-r--r--fs/xfs/xfs_file.c1
-rw-r--r--include/asm-generic/pgtable.h15
-rw-r--r--include/linux/bitmap.h52
-rw-r--r--include/linux/cpumask.h22
-rw-r--r--include/linux/crc64_ecma.h56
-rw-r--r--include/linux/cryptohash.h2
-rw-r--r--include/linux/fs.h8
-rw-r--r--include/linux/huge_mm.h16
-rw-r--r--include/linux/hugetlb.h2
-rw-r--r--include/linux/init_task.h3
-rw-r--r--include/linux/input.h28
-rw-r--r--include/linux/kexec.h4
-rw-r--r--include/linux/memcontrol.h19
-rw-r--r--include/linux/mm.h44
-rw-r--r--include/linux/mm_types.h20
-rw-r--r--include/linux/mmzone.h2
-rw-r--r--include/linux/nodemask.h26
-rw-r--r--include/linux/oom.h5
-rw-r--r--include/linux/printk.h6
-rw-r--r--include/linux/rbtree.h2
-rw-r--r--include/linux/rmap.h21
-rw-r--r--include/linux/rtc.h4
-rw-r--r--include/linux/sched.h2
-rw-r--r--include/linux/slab.h7
-rw-r--r--include/linux/string.h4
-rw-r--r--include/linux/string_helpers.h4
-rw-r--r--include/linux/swap.h15
-rw-r--r--include/linux/swapops.h4
-rw-r--r--include/linux/vm_event_item.h1
-rw-r--r--include/linux/writeback.h1
-rw-r--r--include/linux/zpool.h5
-rw-r--r--include/linux/zsmalloc.h2
-rw-r--r--include/uapi/asm-generic/fcntl.h2
-rw-r--r--include/uapi/asm-generic/mman-common.h1
-rw-r--r--include/uapi/linux/kernel-page-flags.h1
-rw-r--r--init/Kconfig16
-rw-r--r--init/main.c5
-rw-r--r--kernel/compat.c5
-rw-r--r--kernel/exit.c12
-rw-r--r--kernel/fork.c8
-rw-r--r--kernel/futex.c2
-rw-r--r--kernel/kexec.c6
-rw-r--r--kernel/params.c2
-rw-r--r--kernel/printk/printk.c12
-rw-r--r--kernel/ptrace.c1
-rw-r--r--kernel/signal.c2
-rw-r--r--kernel/sysctl.c3
-rw-r--r--kernel/time/alarmtimer.c2
-rw-r--r--kernel/time/hrtimer.c2
-rw-r--r--kernel/time/posix-cpu-timers.c3
-rw-r--r--lib/Kconfig7
-rw-r--r--lib/Kconfig.debug3
-rw-r--r--lib/Makefile5
-rw-r--r--lib/bitmap.c138
-rw-r--r--lib/crc64_ecma.c341
-rw-r--r--lib/dynamic_queue_limits.c4
-rw-r--r--lib/gen_crc32table.c6
-rw-r--r--lib/genalloc.c3
-rw-r--r--lib/halfmd4.c2
-rw-r--r--lib/hexdump.c105
-rw-r--r--lib/idr.c1
-rw-r--r--lib/interval_tree.c4
-rw-r--r--lib/kobject_uevent.c1
-rw-r--r--lib/lcm.c2
-rw-r--r--lib/list_sort.c7
-rw-r--r--lib/llist.c1
-rw-r--r--lib/md5.c2
-rw-r--r--lib/nlattr.c1
-rw-r--r--lib/percpu_ida.c3
-rw-r--r--lib/plist.c1
-rw-r--r--lib/radix-tree.c2
-rw-r--r--lib/show_mem.c1
-rw-r--r--lib/sort.c6
-rw-r--r--lib/stmp_device.c3
-rw-r--r--lib/string.c8
-rw-r--r--lib/string_helpers.c26
-rw-r--r--lib/strncpy_from_user.c3
-rw-r--r--lib/test-hexdump.c180
-rw-r--r--lib/vsprintf.c20
-rw-r--r--mm/Kconfig10
-rw-r--r--mm/Kconfig.debug9
-rw-r--r--mm/Makefile2
-rw-r--r--mm/debug.c1
-rw-r--r--mm/filemap.c1
-rw-r--r--mm/filemap_xip.c1
-rw-r--r--mm/fremap.c283
-rw-r--r--mm/gup.c2
-rw-r--r--mm/huge_memory.c42
-rw-r--r--mm/hugetlb.c2
-rw-r--r--mm/interval_tree.c34
-rw-r--r--mm/ksm.c2
-rw-r--r--mm/madvise.c163
-rw-r--r--mm/memcontrol.c269
-rw-r--r--mm/memory.c281
-rw-r--r--mm/memory_hotplug.c11
-rw-r--r--mm/migrate.c32
-rw-r--r--mm/mincore.c9
-rw-r--r--mm/mmap.c93
-rw-r--r--mm/mprotect.c2
-rw-r--r--mm/mremap.c2
-rw-r--r--mm/msync.c5
-rw-r--r--mm/nommu.c8
-rw-r--r--mm/oom_kill.c13
-rw-r--r--mm/page-writeback.c55
-rw-r--r--mm/page_alloc.c147
-rw-r--r--mm/page_isolation.c7
-rw-r--r--mm/rmap.c325
-rw-r--r--mm/shmem.c3
-rw-r--r--mm/slab.h4
-rw-r--r--mm/slab_common.c149
-rw-r--r--mm/swap.c4
-rw-r--r--mm/util.c30
-rw-r--r--mm/vmscan.c90
-rw-r--r--mm/vmstat.c1
-rw-r--r--mm/zbud.c3
-rw-r--r--mm/zpool.c6
-rw-r--r--mm/zsmalloc.c239
-rw-r--r--mm/zswap.c5
-rwxr-xr-xscripts/checkpatch.pl22
-rw-r--r--tools/vm/page-types.c1
365 files changed, 4449 insertions, 3040 deletions
diff --git a/.mailmap b/.mailmap
index ada8ad696b2e..d357e1bd2a43 100644
--- a/.mailmap
+++ b/.mailmap
@@ -51,6 +51,7 @@ Greg Kroah-Hartman <gregkh@suse.de>
Greg Kroah-Hartman <greg@kroah.com>
Henk Vergonet <Henk.Vergonet@gmail.com>
Henrik Kretzschmar <henne@nachtwindheim.de>
+Henrik Rydberg <rydberg@bitmath.org>
Herbert Xu <herbert@gondor.apana.org.au>
Jacob Shin <Jacob.Shin@amd.com>
James Bottomley <jejb@mulgrave.(none)>
diff --git a/Documentation/cachetlb.txt b/Documentation/cachetlb.txt
index d79b008e4a32..3f9f808b5119 100644
--- a/Documentation/cachetlb.txt
+++ b/Documentation/cachetlb.txt
@@ -317,10 +317,10 @@ maps this page at its virtual address.
about doing this.
The idea is, first at flush_dcache_page() time, if
- page->mapping->i_mmap is an empty tree and ->i_mmap_nonlinear
- an empty list, just mark the architecture private page flag bit.
- Later, in update_mmu_cache(), a check is made of this flag bit,
- and if set the flush is done and the flag bit is cleared.
+ page->mapping->i_mmap is an empty tree, just mark the architecture
+ private page flag bit. Later, in update_mmu_cache(), a check is
+ made of this flag bit, and if set the flush is done and the flag
+ bit is cleared.
IMPORTANT NOTE: It is often important, if you defer the flush,
that the actual flush occurs on the same CPU
diff --git a/Documentation/devicetree/bindings/rtc/isil,isl12057.txt b/Documentation/devicetree/bindings/rtc/isil,isl12057.txt
new file mode 100644
index 000000000000..501c39ceae79
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/isil,isl12057.txt
@@ -0,0 +1,78 @@
+Intersil ISL12057 I2C RTC/Alarm chip
+
+ISL12057 is a trivial I2C device (it has simple device tree bindings,
+consisting of a compatible field, an address and possibly an interrupt
+line).
+
+Nonetheless, it also supports an option boolean property
+("isil,irq2-can-wakeup-machine") to handle the specific use-case found
+on at least three in-tree users of the chip (NETGEAR ReadyNAS 102, 104
+and 2120 ARM-based NAS); On those devices, the IRQ#2 pin of the chip
+(associated with the alarm supported by the driver) is not connected
+to the SoC but to a PMIC. It allows the device to be powered up when
+RTC alarm rings. In order to mark the device has a wakeup source and
+get access to the 'wakealarm' sysfs entry, this specific property can
+be set when the IRQ#2 pin of the chip is not connected to the SoC but
+can wake up the device.
+
+Required properties supported by the device:
+
+ - "compatible": must be "isil,isl12057"
+ - "reg": I2C bus address of the device
+
+Optional properties:
+
+ - "isil,irq2-can-wakeup-machine": mark the chip as a wakeup source,
+ independently of the availability of an IRQ line connected to the
+ SoC.
+
+ - "interrupt-parent", "interrupts": for passing the interrupt line
+ of the SoC connected to IRQ#2 of the RTC chip.
+
+
+Example isl12057 node without IRQ#2 pin connected (no alarm support):
+
+ isl12057: isl12057@68 {
+ compatible = "isil,isl12057";
+ reg = <0x68>;
+ };
+
+
+Example isl12057 node with IRQ#2 pin connected to main SoC via MPP6 (note
+that the pinctrl-related properties below are given for completeness and
+may not be required or may be different depending on your system or
+SoC, and the main function of the MPP used as IRQ line, i.e.
+"interrupt-parent" and "interrupts" are usually sufficient):
+
+ pinctrl {
+ ...
+
+ rtc_alarm_pin: rtc_alarm_pin {
+ marvell,pins = "mpp6";
+ marvell,function = "gpio";
+ };
+
+ ...
+
+ };
+
+ ...
+
+ isl12057: isl12057@68 {
+ compatible = "isil,isl12057";
+ reg = <0x68>;
+ pinctrl-0 = <&rtc_alarm_pin>;
+ pinctrl-names = "default";
+ interrupt-parent = <&gpio0>;
+ interrupts = <6 IRQ_TYPE_EDGE_FALLING>;
+ };
+
+
+Example isl12057 node without IRQ#2 pin connected to the SoC but to a
+PMIC, allowing the device to be started based on configured alarm:
+
+ isl12057: isl12057@68 {
+ compatible = "isil,isl12057";
+ reg = <0x68>;
+ isil,irq2-can-wakeup-machine;
+ };
diff --git a/Documentation/devicetree/bindings/rtc/nxp,rtc-2123.txt b/Documentation/devicetree/bindings/rtc/nxp,rtc-2123.txt
new file mode 100644
index 000000000000..5cbc0b145a61
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/nxp,rtc-2123.txt
@@ -0,0 +1,16 @@
+NXP PCF2123 SPI Real Time Clock
+
+Required properties:
+- compatible: should be: "nxp,rtc-pcf2123"
+- reg: should be the SPI slave chipselect address
+
+Optional properties:
+- spi-cs-high: PCF2123 needs chipselect high
+
+Example:
+
+rtc: nxp,rtc-pcf2123@3 {
+ compatible = "nxp,rtc-pcf2123"
+ reg = <3>
+ spi-cs-high;
+};
diff --git a/Documentation/filesystems/fiemap.txt b/Documentation/filesystems/fiemap.txt
index 1b805a0efbb0..f6d9c99103a4 100644
--- a/Documentation/filesystems/fiemap.txt
+++ b/Documentation/filesystems/fiemap.txt
@@ -196,7 +196,8 @@ struct fiemap_extent_info {
};
It is intended that the file system should not need to access any of this
-structure directly.
+structure directly. Filesystem handlers should be tolerant to signals and return
+EINTR once fatal signal received.
Flag checking should be done at the beginning of the ->fiemap callback via the
diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt
index 7618a287aa41..28f8c08201e2 100644
--- a/Documentation/filesystems/ocfs2.txt
+++ b/Documentation/filesystems/ocfs2.txt
@@ -100,3 +100,7 @@ coherency=full (*) Disallow concurrent O_DIRECT writes, cluster inode
coherency=buffered Allow concurrent O_DIRECT writes without EX lock among
nodes, which gains high performance at risk of getting
stale data on other nodes.
+journal_async_commit Commit block can be written to disk without waiting
+ for descriptor blocks. If enabled older kernels cannot
+ mount the device. This will enable 'journal_checksum'
+ internally.
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index aae9dd13c91f..13b5809beba7 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -488,6 +488,10 @@ To clear the bits for the file mapped pages associated with the process
To clear the soft-dirty bit
> echo 4 > /proc/PID/clear_refs
+To reset the peak resident set size ("high water mark") to the process's
+current value:
+ > echo 5 > /proc/PID/clear_refs
+
Any other value written to /proc/PID/clear_refs will have no effect.
The /proc/pid/pagemap gives the PFN, which can be used to find the pageflags
@@ -811,7 +815,7 @@ MemAvailable: An estimate of how much memory is available for starting new
Buffers: Relatively temporary storage for raw disk blocks
shouldn't get tremendously large (20MB or so)
Cached: in-memory cache for files read from the disk (the
- pagecache). Doesn't include SwapCached
+ pagecache). Doesn't include SwapCached or Shmem.
SwapCached: Memory that once was swapped out, is swapped back in but
still also is in the swapfile (if memory is needed it
doesn't need to be swapped out AGAIN because it is already
diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt
index ce1126aceed8..223c32171dcc 100644
--- a/Documentation/filesystems/vfat.txt
+++ b/Documentation/filesystems/vfat.txt
@@ -180,6 +180,16 @@ dos1xfloppy -- If set, use a fallback default BIOS Parameter Block
<bool>: 0,1,yes,no,true,false
+LIMITATION
+---------------------------------------------------------------------
+* The fallocated region of file is discarded at umount/evict time
+ when using fallocate with FALLOC_FL_KEEP_SIZE.
+ So, User should assume that fallocated region can be discarded at
+ last close if there is memory pressure resulting in eviction of
+ the inode from the memory. As a result, for any dependency on
+ the fallocated region, user should make sure to recheck fallocate
+ after reopening the file.
+
TODO
----------------------------------------------------------------------
* Need to get rid of the raw scanning stuff. Instead, always use
diff --git a/Documentation/leds/leds-class.txt b/Documentation/leds/leds-class.txt
index 79699c200766..62261c04060a 100644
--- a/Documentation/leds/leds-class.txt
+++ b/Documentation/leds/leds-class.txt
@@ -2,9 +2,6 @@
LED handling under Linux
========================
-If you're reading this and thinking about keyboard leds, these are
-handled by the input subsystem and the led class is *not* needed.
-
In its simplest form, the LED class just allows control of LEDs from
userspace. LEDs appear in /sys/class/leds/. The maximum brightness of the
LED is defined in max_brightness file. The brightness file will set the brightness
diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt
index 5a615c14f75d..8858db8f8805 100644
--- a/Documentation/printk-formats.txt
+++ b/Documentation/printk-formats.txt
@@ -216,6 +216,12 @@ dentry names:
equivalent of %s dentry->d_name.name we used to use, %pd<n> prints
n last components. %pD does the same thing for struct file.
+task_struct comm name:
+
+ %pT
+
+ For printing task_struct->comm.
+
struct va_format:
%pV
diff --git a/Documentation/vm/pagemap.txt b/Documentation/vm/pagemap.txt
index 5948e455c4d2..6fbd55ef6b45 100644
--- a/Documentation/vm/pagemap.txt
+++ b/Documentation/vm/pagemap.txt
@@ -62,6 +62,8 @@ There are three components to pagemap:
20. NOPAGE
21. KSM
22. THP
+ 23. BALLOON
+ 24. ZERO_PAGE
Short descriptions to the page flags:
@@ -102,6 +104,12 @@ Short descriptions to the page flags:
22. THP
contiguous pages which construct transparent hugepages
+23. BALLOON
+ balloon compaction page
+
+24. ZERO_PAGE
+ zero page for pfn_zero or huge_zero page
+
[IO related page flags]
1. ERROR IO error occurred
3. UPTODATE page has up-to-date data
diff --git a/Documentation/vm/remap_file_pages.txt b/Documentation/vm/remap_file_pages.txt
index 560e4363a55d..f609142f406a 100644
--- a/Documentation/vm/remap_file_pages.txt
+++ b/Documentation/vm/remap_file_pages.txt
@@ -18,10 +18,9 @@ on 32-bit systems to map files bigger than can linearly fit into 32-bit
virtual address space. This use-case is not critical anymore since 64-bit
systems are widely available.
-The plan is to deprecate the syscall and replace it with an emulation.
-The emulation will create new VMAs instead of nonlinear mappings. It's
-going to work slower for rare users of remap_file_pages() but ABI is
-preserved.
+The syscall is deprecated and replaced it with an emulation now. The
+emulation creates new VMAs instead of nonlinear mappings. It's going to
+work slower for rare users of remap_file_pages() but ABI is preserved.
One side effect of emulation (apart from performance) is that user can hit
vm.max_map_count limit more easily due to additional VMAs. See comment for
diff --git a/MAINTAINERS b/MAINTAINERS
index 6ae07437fff4..627aa83b3e35 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -724,15 +724,15 @@ F: include/uapi/linux/apm_bios.h
F: drivers/char/apm-emulation.c
APPLE BCM5974 MULTITOUCH DRIVER
-M: Henrik Rydberg <rydberg@euromail.se>
+M: Henrik Rydberg <rydberg@bitmath.org>
L: linux-input@vger.kernel.org
-S: Maintained
+S: Odd fixes
F: drivers/input/mouse/bcm5974.c
APPLE SMC DRIVER
-M: Henrik Rydberg <rydberg@euromail.se>
+M: Henrik Rydberg <rydberg@bitmath.org>
L: lm-sensors@lm-sensors.org
-S: Maintained
+S: Odd fixes
F: drivers/hwmon/applesmc.c
APPLETALK NETWORK LAYER
@@ -4934,10 +4934,10 @@ F: include/uapi/linux/input.h
F: include/linux/input/
INPUT MULTITOUCH (MT) PROTOCOL
-M: Henrik Rydberg <rydberg@euromail.se>
+M: Henrik Rydberg <rydberg@bitmath.org>
L: linux-input@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/rydberg/input-mt.git
-S: Maintained
+S: Odd fixes
F: Documentation/input/multi-touch-protocol.txt
F: drivers/input/input-mt.c
K: \b(ABS|SYN)_MT_
diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h
index d8f9b7e89234..fce22cf88ee9 100644
--- a/arch/alpha/include/asm/pgtable.h
+++ b/arch/alpha/include/asm/pgtable.h
@@ -73,7 +73,6 @@ struct vm_area_struct;
/* .. and these are ours ... */
#define _PAGE_DIRTY 0x20000
#define _PAGE_ACCESSED 0x40000
-#define _PAGE_FILE 0x80000 /* set:pagecache, unset:swap */
/*
* NOTE! The "accessed" bit isn't necessarily exact: it can be kept exactly
@@ -268,7 +267,6 @@ extern inline void pgd_clear(pgd_t * pgdp) { pgd_val(*pgdp) = 0; }
extern inline int pte_write(pte_t pte) { return !(pte_val(pte) & _PAGE_FOW); }
extern inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
extern inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
-extern inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
extern inline int pte_special(pte_t pte) { return 0; }
extern inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) |= _PAGE_FOW; return pte; }
@@ -345,11 +343,6 @@ extern inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-#define pte_to_pgoff(pte) (pte_val(pte) >> 32)
-#define pgoff_to_pte(off) ((pte_t) { ((off) << 32) | _PAGE_FILE })
-
-#define PTE_FILE_MAX_BITS 32
-
#ifndef CONFIG_DISCONTIGMEM
#define kern_addr_valid(addr) (1)
#endif
diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h
index 48bbea6898b3..d5b98ab514bb 100644
--- a/arch/alpha/include/asm/thread_info.h
+++ b/arch/alpha/include/asm/thread_info.h
@@ -27,8 +27,6 @@ struct thread_info {
int bpt_nsaved;
unsigned long bpt_addr[2]; /* breakpoint handling */
unsigned int bpt_insn[2];
-
- struct restart_block restart_block;
};
/*
@@ -40,9 +38,6 @@ struct thread_info {
.exec_domain = &default_exec_domain, \
.addr_limit = KERNEL_DS, \
.preempt_count = INIT_PREEMPT_COUNT, \
- .restart_block = { \
- .fn = do_no_restart_syscall, \
- }, \
}
#define init_thread_info (init_thread_union.thread_info)
diff --git a/arch/alpha/include/uapi/asm/mman.h b/arch/alpha/include/uapi/asm/mman.h
index 0086b472bc2b..836fbd44f65b 100644
--- a/arch/alpha/include/uapi/asm/mman.h
+++ b/arch/alpha/include/uapi/asm/mman.h
@@ -44,6 +44,7 @@
#define MADV_WILLNEED 3 /* will need these pages */
#define MADV_SPACEAVAIL 5 /* ensure resources are available */
#define MADV_DONTNEED 6 /* don't need these pages */
+#define MADV_FREE 7 /* free pages only if memory pressure */
/* common/generic parameters */
#define MADV_REMOVE 9 /* remove these pages & resources */
diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c
index 6cec2881acbf..8dbfb15f1745 100644
--- a/arch/alpha/kernel/signal.c
+++ b/arch/alpha/kernel/signal.c
@@ -150,7 +150,7 @@ restore_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs)
struct switch_stack *sw = (struct switch_stack *)regs - 1;
long i, err = __get_user(regs->pc, &sc->sc_pc);
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
sw->r26 = (unsigned long) ret_from_sys_call;
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 6b0b7f7ef783..bdc8ccaf390d 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -61,7 +61,6 @@
#define _PAGE_WRITE (1<<4) /* Page has user write perm (H) */
#define _PAGE_READ (1<<5) /* Page has user read perm (H) */
#define _PAGE_MODIFIED (1<<6) /* Page modified (dirty) (S) */
-#define _PAGE_FILE (1<<7) /* page cache/ swap (S) */
#define _PAGE_GLOBAL (1<<8) /* Page is global (H) */
#define _PAGE_PRESENT (1<<10) /* TLB entry is valid (H) */
@@ -73,7 +72,6 @@
#define _PAGE_READ (1<<3) /* Page has user read perm (H) */
#define _PAGE_ACCESSED (1<<4) /* Page is accessed (S) */
#define _PAGE_MODIFIED (1<<5) /* Page modified (dirty) (S) */
-#define _PAGE_FILE (1<<6) /* page cache/ swap (S) */
#define _PAGE_GLOBAL (1<<8) /* Page is global (H) */
#define _PAGE_PRESENT (1<<9) /* TLB entry is valid (H) */
#define _PAGE_SHARED_CODE (1<<11) /* Shared Code page with cmn vaddr
@@ -268,15 +266,6 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep)
pte; \
})
-/* TBD: Non linear mapping stuff */
-static inline int pte_file(pte_t pte)
-{
- return pte_val(pte) & _PAGE_FILE;
-}
-
-#define PTE_FILE_MAX_BITS 30
-#define pgoff_to_pte(x) __pte(x)
-#define pte_to_pgoff(x) (pte_val(x) >> 2)
#define pte_pfn(pte) (pte_val(pte) >> PAGE_SHIFT)
#define pfn_pte(pfn, prot) (__pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot)))
#define __pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
@@ -364,7 +353,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
/* Encode swap {type,off} tuple into PTE
* We reserve 13 bits for 5-bit @type, keeping bits 12-5 zero, ensuring that
- * both PAGE_FILE and PAGE_PRESENT are zero in a PTE holding swap "identifier"
+ * PAGE_PRESENT is zero in a PTE holding swap "identifier"
*/
#define __swp_entry(type, off) ((swp_entry_t) { \
((type) & 0x1f) | ((off) << 13) })
diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h
index 02bc5ec0fb2e..1163a1838ac1 100644
--- a/arch/arc/include/asm/thread_info.h
+++ b/arch/arc/include/asm/thread_info.h
@@ -46,7 +46,6 @@ struct thread_info {
struct exec_domain *exec_domain;/* execution domain */
__u32 cpu; /* current CPU */
unsigned long thr_ptr; /* TLS ptr */
- struct restart_block restart_block;
};
/*
@@ -62,9 +61,6 @@ struct thread_info {
.cpu = 0, \
.preempt_count = INIT_PREEMPT_COUNT, \
.addr_limit = KERNEL_DS, \
- .restart_block = { \
- .fn = do_no_restart_syscall, \
- }, \
}
#define init_thread_info (init_thread_union.thread_info)
diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c
index cb3142a2d40b..114234e83caa 100644
--- a/arch/arc/kernel/signal.c
+++ b/arch/arc/kernel/signal.c
@@ -104,7 +104,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
struct pt_regs *regs = current_pt_regs();
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
/* Since we stacked the signal on a word boundary,
* then 'sp' should be word aligned here. If it's
diff --git a/arch/arm/boot/dts/armada-370-netgear-rn102.dts b/arch/arm/boot/dts/armada-370-netgear-rn102.dts
index 4e24932c6e30..1c83b7ce0982 100644
--- a/arch/arm/boot/dts/armada-370-netgear-rn102.dts
+++ b/arch/arm/boot/dts/armada-370-netgear-rn102.dts
@@ -87,6 +87,7 @@
isl12057: isl12057@68 {
compatible = "isil,isl12057";
reg = <0x68>;
+ isil,irq2-can-wakeup-machine;
};
g762: g762@3e {
diff --git a/arch/arm/boot/dts/armada-370-netgear-rn104.dts b/arch/arm/boot/dts/armada-370-netgear-rn104.dts
index 30586e47986a..5fbfe02964dc 100644
--- a/arch/arm/boot/dts/armada-370-netgear-rn104.dts
+++ b/arch/arm/boot/dts/armada-370-netgear-rn104.dts
@@ -93,6 +93,7 @@
isl12057: isl12057@68 {
compatible = "isil,isl12057";
reg = <0x68>;
+ isil,irq2-can-wakeup-machine;
};
g762: g762@3e {
diff --git a/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts b/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts
index d81430aa4ab3..fc8bdfcd2348 100644
--- a/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts
+++ b/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts
@@ -100,6 +100,7 @@
isl12057: isl12057@68 {
compatible = "isil,isl12057";
reg = <0x68>;
+ isil,irq2-can-wakeup-machine;
};
/* Controller for rear fan #1 of 3 (Protechnic
diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h
index f0279411847d..bcc5e300413f 100644
--- a/arch/arm/include/asm/pgtable-2level.h
+++ b/arch/arm/include/asm/pgtable-2level.h
@@ -118,7 +118,6 @@
#define L_PTE_VALID (_AT(pteval_t, 1) << 0) /* Valid */
#define L_PTE_PRESENT (_AT(pteval_t, 1) << 0)
#define L_PTE_YOUNG (_AT(pteval_t, 1) << 1)
-#define L_PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !PRESENT */
#define L_PTE_DIRTY (_AT(pteval_t, 1) << 6)
#define L_PTE_RDONLY (_AT(pteval_t, 1) << 7)
#define L_PTE_USER (_AT(pteval_t, 1) << 8)
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index a31ecdad4b59..dd6dd555ad58 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -77,7 +77,6 @@
*/
#define L_PTE_VALID (_AT(pteval_t, 1) << 0) /* Valid */
#define L_PTE_PRESENT (_AT(pteval_t, 3) << 0) /* Present */
-#define L_PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !PRESENT */
#define L_PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */
#define L_PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */
#define L_PTE_YOUNG (_AT(pteval_t, 1) << 10) /* AF */
@@ -249,6 +248,7 @@ PMD_BIT_FUNC(mkold, &= ~PMD_SECT_AF);
PMD_BIT_FUNC(mksplitting, |= L_PMD_SECT_SPLITTING);
PMD_BIT_FUNC(mkwrite, &= ~L_PMD_SECT_RDONLY);
PMD_BIT_FUNC(mkdirty, |= L_PMD_SECT_DIRTY);
+PMD_BIT_FUNC(mkclean, &= ~L_PMD_SECT_DIRTY);
PMD_BIT_FUNC(mkyoung, |= PMD_SECT_AF);
#define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT))
diff --git a/arch/arm/include/asm/pgtable-nommu.h b/arch/arm/include/asm/pgtable-nommu.h
index 0642228ff785..c35e53ee6663 100644
--- a/arch/arm/include/asm/pgtable-nommu.h
+++ b/arch/arm/include/asm/pgtable-nommu.h
@@ -54,8 +54,6 @@
typedef pte_t *pte_addr_t;
-static inline int pte_file(pte_t pte) { return 0; }
-
/*
* ZERO_PAGE is a global shared page that is always zero: used
* for zero-mapped memory areas etc..
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index d5cac545ba33..f40354198bad 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -318,12 +318,12 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
*
* 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
* 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- * <--------------- offset ----------------------> < type -> 0 0 0
+ * <--------------- offset ------------------------> < type -> 0 0
*
- * This gives us up to 31 swap files and 64GB per swap file. Note that
+ * This gives us up to 31 swap files and 128GB per swap file. Note that
* the offset field is always non-zero.
*/
-#define __SWP_TYPE_SHIFT 3
+#define __SWP_TYPE_SHIFT 2
#define __SWP_TYPE_BITS 5
#define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1)
#define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT)
@@ -342,20 +342,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
*/
#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS)
-/*
- * Encode and decode a file entry. File entries are stored in the Linux
- * page tables as follows:
- *
- * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
- * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- * <----------------------- offset ------------------------> 1 0 0
- */
-#define pte_file(pte) (pte_val(pte) & L_PTE_FILE)
-#define pte_to_pgoff(x) (pte_val(x) >> 3)
-#define pgoff_to_pte(x) __pte(((x) << 3) | L_PTE_FILE)
-
-#define PTE_FILE_MAX_BITS 29
-
/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
/* FIXME: this is not correct */
#define kern_addr_valid(addr) (1)
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index d890e41f5520..72812a1f3d1c 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -68,7 +68,6 @@ struct thread_info {
#ifdef CONFIG_ARM_THUMBEE
unsigned long thumbee_state; /* ThumbEE Handler Base register */
#endif
- struct restart_block restart_block;
};
#define INIT_THREAD_INFO(tsk) \
@@ -81,9 +80,6 @@ struct thread_info {
.cpu_domain = domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \
domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
domain_val(DOMAIN_IO, DOMAIN_CLIENT), \
- .restart_block = { \
- .fn = do_no_restart_syscall, \
- }, \
}
#define init_thread_info (init_thread_union.thread_info)
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 8aa6f1b87c9e..023ac905e4c3 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -191,7 +191,7 @@ asmlinkage int sys_sigreturn(struct pt_regs *regs)
struct sigframe __user *frame;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
/*
* Since we stacked the signal on a 64-bit boundary,
@@ -221,7 +221,7 @@ asmlinkage int sys_rt_sigreturn(struct pt_regs *regs)
struct rt_sigframe __user *frame;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
/*
* Since we stacked the signal on a 64-bit boundary,
diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
index ba1196c968d8..082b9f2f7e90 100644
--- a/arch/arm/mm/proc-macros.S
+++ b/arch/arm/mm/proc-macros.S
@@ -98,7 +98,7 @@
#endif
#if !defined (CONFIG_ARM_LPAE) && \
(L_PTE_XN+L_PTE_USER+L_PTE_RDONLY+L_PTE_DIRTY+L_PTE_YOUNG+\
- L_PTE_FILE+L_PTE_PRESENT) > L_PTE_SHARED
+ L_PTE_PRESENT) > L_PTE_SHARED
#error Invalid Linux PTE bit settings
#endif
#endif /* CONFIG_MMU */
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 210d632aa5ad..f15ba57d637a 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -25,7 +25,6 @@
* Software defined PTE bits definition.
*/
#define PTE_VALID (_AT(pteval_t, 1) << 0)
-#define PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !pte_present() */
#define PTE_DIRTY (_AT(pteval_t, 1) << 55)
#define PTE_SPECIAL (_AT(pteval_t, 1) << 56)
#define PTE_WRITE (_AT(pteval_t, 1) << 57)
@@ -281,10 +280,12 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd))
#define pmd_young(pmd) pte_young(pmd_pte(pmd))
+#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd))
#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd)))
#define pmd_mksplitting(pmd) pte_pmd(pte_mkspecial(pmd_pte(pmd)))
#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd)))
#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd)))
+#define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd)))
#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd)))
#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd)))
#define pmd_mknotpresent(pmd) (__pmd(pmd_val(pmd) & ~PMD_TYPE_MASK))
@@ -469,13 +470,12 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
/*
* Encode and decode a swap entry:
* bits 0-1: present (must be zero)
- * bit 2: PTE_FILE
- * bits 3-8: swap type
- * bits 9-57: swap offset
+ * bits 2-7: swap type
+ * bits 8-57: swap offset
*/
-#define __SWP_TYPE_SHIFT 3
+#define __SWP_TYPE_SHIFT 2
#define __SWP_TYPE_BITS 6
-#define __SWP_OFFSET_BITS 49
+#define __SWP_OFFSET_BITS 50
#define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1)
#define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT)
#define __SWP_OFFSET_MASK ((1UL << __SWP_OFFSET_BITS) - 1)
@@ -493,18 +493,6 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
*/
#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS)
-/*
- * Encode and decode a file entry:
- * bits 0-1: present (must be zero)
- * bit 2: PTE_FILE
- * bits 3-57: file offset / PAGE_SIZE
- */
-#define pte_file(pte) (pte_val(pte) & PTE_FILE)
-#define pte_to_pgoff(x) (pte_val(x) >> 3)
-#define pgoff_to_pte(x) __pte(((x) << 3) | PTE_FILE)
-
-#define PTE_FILE_MAX_BITS 55
-
extern int kern_addr_valid(unsigned long addr);
#include <asm-generic/pgtable.h>
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 459bf8e53208..702e1e6a0d80 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -48,7 +48,6 @@ struct thread_info {
mm_segment_t addr_limit; /* address limit */
struct task_struct *task; /* main task structure */
struct exec_domain *exec_domain; /* execution domain */
- struct restart_block restart_block;
int preempt_count; /* 0 => preemptable, <0 => bug */
int cpu; /* cpu */
};
@@ -60,9 +59,6 @@ struct thread_info {
.flags = 0, \
.preempt_count = INIT_PREEMPT_COUNT, \
.addr_limit = KERNEL_DS, \
- .restart_block = { \
- .fn = do_no_restart_syscall, \
- }, \
}
#define init_thread_info (init_thread_union.thread_info)
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 6fa792137eda..660ccf9f7524 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -131,7 +131,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
struct rt_sigframe __user *frame;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
/*
* Since we stacked the signal on a 128-bit boundary, then 'sp' should
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index 5a1ba6e80d4e..64565c4ecbbc 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -347,7 +347,7 @@ asmlinkage int compat_sys_sigreturn(struct pt_regs *regs)
struct compat_sigframe __user *frame;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
/*
* Since we stacked the signal on a 64-bit boundary,
@@ -381,7 +381,7 @@ asmlinkage int compat_sys_rt_sigreturn(struct pt_regs *regs)
struct compat_rt_sigframe __user *frame;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
/*
* Since we stacked the signal on a 64-bit boundary,
diff --git a/arch/avr32/include/asm/pgtable.h b/arch/avr32/include/asm/pgtable.h
index 4beff97e2033..ac7a817e2126 100644
--- a/arch/avr32/include/asm/pgtable.h
+++ b/arch/avr32/include/asm/pgtable.h
@@ -86,9 +86,6 @@ extern struct page *empty_zero_page;
#define _PAGE_BIT_PRESENT 10
#define _PAGE_BIT_ACCESSED 11 /* software: page was accessed */
-/* The following flags are only valid when !PRESENT */
-#define _PAGE_BIT_FILE 0 /* software: pagecache or swap? */
-
#define _PAGE_WT (1 << _PAGE_BIT_WT)
#define _PAGE_DIRTY (1 << _PAGE_BIT_DIRTY)
#define _PAGE_EXECUTE (1 << _PAGE_BIT_EXECUTE)
@@ -101,7 +98,6 @@ extern struct page *empty_zero_page;
/* Software flags */
#define _PAGE_ACCESSED (1 << _PAGE_BIT_ACCESSED)
#define _PAGE_PRESENT (1 << _PAGE_BIT_PRESENT)
-#define _PAGE_FILE (1 << _PAGE_BIT_FILE)
/*
* Page types, i.e. sizes. _PAGE_TYPE_NONE corresponds to what is
@@ -210,14 +206,6 @@ static inline int pte_special(pte_t pte)
return 0;
}
-/*
- * The following only work if pte_present() is not true.
- */
-static inline int pte_file(pte_t pte)
-{
- return pte_val(pte) & _PAGE_FILE;
-}
-
/* Mutator functions for PTE bits */
static inline pte_t pte_wrprotect(pte_t pte)
{
@@ -329,7 +317,6 @@ extern void update_mmu_cache(struct vm_area_struct * vma,
* Encode and decode a swap entry
*
* Constraints:
- * _PAGE_FILE at bit 0
* _PAGE_TYPE_* at bits 2-3 (for emulating _PAGE_PROTNONE)
* _PAGE_PRESENT at bit 10
*
@@ -346,18 +333,6 @@ extern void update_mmu_cache(struct vm_area_struct * vma,
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-/*
- * Encode and decode a nonlinear file mapping entry. We have to
- * preserve _PAGE_FILE and _PAGE_PRESENT here. _PAGE_TYPE_* isn't
- * necessary, since _PAGE_FILE implies !_PAGE_PROTNONE (?)
- */
-#define PTE_FILE_MAX_BITS 30
-#define pte_to_pgoff(pte) (((pte_val(pte) >> 1) & 0x1ff) \
- | ((pte_val(pte) >> 11) << 9))
-#define pgoff_to_pte(off) ((pte_t) { ((((off) & 0x1ff) << 1) \
- | (((off) >> 9) << 11) \
- | _PAGE_FILE) })
-
typedef pte_t *pte_addr_t;
#define kern_addr_valid(addr) (1)
diff --git a/arch/avr32/include/asm/thread_info.h b/arch/avr32/include/asm/thread_info.h
index a978f3fe7c25..d56afa99a514 100644
--- a/arch/avr32/include/asm/thread_info.h
+++ b/arch/avr32/include/asm/thread_info.h
@@ -30,7 +30,6 @@ struct thread_info {
saved by debug handler
when setting up
trampoline */
- struct restart_block restart_block;
__u8 supervisor_stack[0];
};
@@ -41,9 +40,6 @@ struct thread_info {
.flags = 0, \
.cpu = 0, \
.preempt_count = INIT_PREEMPT_COUNT, \
- .restart_block = { \
- .fn = do_no_restart_syscall \
- } \
}
#define init_thread_info (init_thread_union.thread_info)
diff --git a/arch/avr32/kernel/asm-offsets.c b/arch/avr32/kernel/asm-offsets.c
index d6a8193a1d2f..e41c84516e5d 100644
--- a/arch/avr32/kernel/asm-offsets.c
+++ b/arch/avr32/kernel/asm-offsets.c
@@ -18,7 +18,6 @@ void foo(void)
OFFSET(TI_preempt_count, thread_info, preempt_count);
OFFSET(TI_rar_saved, thread_info, rar_saved);
OFFSET(TI_rsr_saved, thread_info, rsr_saved);
- OFFSET(TI_restart_block, thread_info, restart_block);
BLANK();
OFFSET(TSK_active_mm, task_struct, active_mm);
BLANK();
diff --git a/arch/avr32/kernel/signal.c b/arch/avr32/kernel/signal.c
index d309fbcc3bd6..8f1c63b9b983 100644
--- a/arch/avr32/kernel/signal.c
+++ b/arch/avr32/kernel/signal.c
@@ -69,7 +69,7 @@ asmlinkage int sys_rt_sigreturn(struct pt_regs *regs)
sigset_t set;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
frame = (struct rt_sigframe __user *)regs->sp;
pr_debug("SIG return: frame = %p\n", frame);
diff --git a/arch/blackfin/include/asm/pgtable.h b/arch/blackfin/include/asm/pgtable.h
index 0b049019eba7..b88a1558b0b9 100644
--- a/arch/blackfin/include/asm/pgtable.h
+++ b/arch/blackfin/include/asm/pgtable.h
@@ -45,11 +45,6 @@ extern void paging_init(void);
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-static inline int pte_file(pte_t pte)
-{
- return 0;
-}
-
#define set_pte(pteptr, pteval) (*(pteptr) = pteval)
#define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval)
diff --git a/arch/blackfin/include/asm/thread_info.h b/arch/blackfin/include/asm/thread_info.h
index 55f473bdad36..57c3a8bd583d 100644
--- a/arch/blackfin/include/asm/thread_info.h
+++ b/arch/blackfin/include/asm/thread_info.h
@@ -42,7 +42,6 @@ struct thread_info {
int cpu; /* cpu we're on */
int preempt_count; /* 0 => preemptable, <0 => BUG */
mm_segment_t addr_limit; /* address limit */
- struct restart_block restart_block;
#ifndef CONFIG_SMP
struct l1_scratch_task_info l1_task_info;
#endif
@@ -58,9 +57,6 @@ struct thread_info {
.flags = 0, \
.cpu = 0, \
.preempt_count = INIT_PREEMPT_COUNT, \
- .restart_block = { \
- .fn = do_no_restart_syscall, \
- }, \
}
#define init_thread_info (init_thread_union.thread_info)
#define init_stack (init_thread_union.stack)
diff --git a/arch/blackfin/kernel/signal.c b/arch/blackfin/kernel/signal.c
index ef275571d885..f2a8b5493bd3 100644
--- a/arch/blackfin/kernel/signal.c
+++ b/arch/blackfin/kernel/signal.c
@@ -44,7 +44,7 @@ rt_restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *p
int err = 0;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
#define RESTORE(x) err |= __get_user(regs->x, &sc->sc_##x)
diff --git a/arch/blackfin/mach-bf533/boards/stamp.c b/arch/blackfin/mach-bf533/boards/stamp.c
index 6f4bac969bf7..23eada79439c 100644
--- a/arch/blackfin/mach-bf533/boards/stamp.c
+++ b/arch/blackfin/mach-bf533/boards/stamp.c
@@ -7,6 +7,7 @@
*/
#include <linux/device.h>
+#include <linux/delay.h>
#include <linux/platform_device.h>
#include <linux/mtd/mtd.h>
#include <linux/mtd/partitions.h>
diff --git a/arch/c6x/include/asm/pgtable.h b/arch/c6x/include/asm/pgtable.h
index c0eed5b18860..78d4483ba40c 100644
--- a/arch/c6x/include/asm/pgtable.h
+++ b/arch/c6x/include/asm/pgtable.h
@@ -50,11 +50,6 @@ extern void paging_init(void);
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-static inline int pte_file(pte_t pte)
-{
- return 0;
-}
-
#define set_pte(pteptr, pteval) (*(pteptr) = pteval)
#define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval)
diff --git a/arch/c6x/include/asm/thread_info.h b/arch/c6x/include/asm/thread_info.h
index d4e9ef87076d..584e253f3217 100644
--- a/arch/c6x/include/asm/thread_info.h
+++ b/arch/c6x/include/asm/thread_info.h
@@ -45,7 +45,6 @@ struct thread_info {
int cpu; /* cpu we're on */
int preempt_count; /* 0 = preemptable, <0 = BUG */
mm_segment_t addr_limit; /* thread address space */
- struct restart_block restart_block;
};
/*
@@ -61,9 +60,6 @@ struct thread_info {
.cpu = 0, \
.preempt_count = INIT_PREEMPT_COUNT, \
.addr_limit = KERNEL_DS, \
- .restart_block = { \
- .fn = do_no_restart_syscall, \
- }, \
}
#define init_thread_info (init_thread_union.thread_info)
diff --git a/arch/c6x/kernel/signal.c b/arch/c6x/kernel/signal.c
index fe68226f6c4d..3c4bb5a5c382 100644
--- a/arch/c6x/kernel/signal.c
+++ b/arch/c6x/kernel/signal.c
@@ -68,7 +68,7 @@ asmlinkage int do_rt_sigreturn(struct pt_regs *regs)
sigset_t set;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
/*
* Since we stacked the signal on a dword boundary,
diff --git a/arch/cris/arch-v10/kernel/signal.c b/arch/cris/arch-v10/kernel/signal.c
index 9b32d338838b..74d7ba35120d 100644
--- a/arch/cris/arch-v10/kernel/signal.c
+++ b/arch/cris/arch-v10/kernel/signal.c
@@ -67,7 +67,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
unsigned long old_usp;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
/* restore the regs from &sc->regs (same as sc, since regs is first)
* (sc is already checked for VERIFY_READ since the sigframe was
diff --git a/arch/cris/arch-v32/kernel/signal.c b/arch/cris/arch-v32/kernel/signal.c
index 78ce3b1c9bcb..870e3e069318 100644
--- a/arch/cris/arch-v32/kernel/signal.c
+++ b/arch/cris/arch-v32/kernel/signal.c
@@ -59,7 +59,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
unsigned long old_usp;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
/*
* Restore the registers from &sc->regs. sc is already checked
diff --git a/arch/cris/include/arch-v10/arch/mmu.h b/arch/cris/include/arch-v10/arch/mmu.h
index e829e5a37bbe..47a5dd21749d 100644
--- a/arch/cris/include/arch-v10/arch/mmu.h
+++ b/arch/cris/include/arch-v10/arch/mmu.h
@@ -58,7 +58,6 @@ typedef struct
/* Bits the HW doesn't care about but the kernel uses them in SW */
#define _PAGE_PRESENT (1<<4) /* page present in memory */
-#define _PAGE_FILE (1<<5) /* set: pagecache, unset: swap (when !PRESENT) */
#define _PAGE_ACCESSED (1<<5) /* simulated in software using valid bit */
#define _PAGE_MODIFIED (1<<6) /* simulated in software using we bit */
#define _PAGE_READ (1<<7) /* read-enabled */
@@ -105,6 +104,4 @@ typedef struct
#define __S110 PAGE_SHARED
#define __S111 PAGE_SHARED
-#define PTE_FILE_MAX_BITS 26
-
#endif
diff --git a/arch/cris/include/arch-v32/arch/mmu.h b/arch/cris/include/arch-v32/arch/mmu.h
index c1a13e05e963..e6db1616dee5 100644
--- a/arch/cris/include/arch-v32/arch/mmu.h
+++ b/arch/cris/include/arch-v32/arch/mmu.h
@@ -53,7 +53,6 @@ typedef struct
* software.
*/
#define _PAGE_PRESENT (1 << 5) /* Page is present in memory. */
-#define _PAGE_FILE (1 << 6) /* 1=pagecache, 0=swap (when !present) */
#define _PAGE_ACCESSED (1 << 6) /* Simulated in software using valid bit. */
#define _PAGE_MODIFIED (1 << 7) /* Simulated in software using we bit. */
#define _PAGE_READ (1 << 8) /* Read enabled. */
@@ -108,6 +107,4 @@ typedef struct
#define __S110 PAGE_SHARED_EXEC
#define __S111 PAGE_SHARED_EXEC
-#define PTE_FILE_MAX_BITS 25
-
#endif /* _ASM_CRIS_ARCH_MMU_H */
diff --git a/arch/cris/include/asm/pgtable.h b/arch/cris/include/asm/pgtable.h
index 8b8c86793225..e824257971c4 100644
--- a/arch/cris/include/asm/pgtable.h
+++ b/arch/cris/include/asm/pgtable.h
@@ -114,7 +114,6 @@ extern unsigned long empty_zero_page;
static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; }
static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_MODIFIED; }
static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
-static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
static inline int pte_special(pte_t pte) { return 0; }
static inline pte_t pte_wrprotect(pte_t pte)
@@ -290,9 +289,6 @@ static inline void update_mmu_cache(struct vm_area_struct * vma,
*/
#define pgtable_cache_init() do { } while (0)
-#define pte_to_pgoff(x) (pte_val(x) >> 6)
-#define pgoff_to_pte(x) __pte(((x) << 6) | _PAGE_FILE)
-
typedef pte_t *pte_addr_t;
#endif /* __ASSEMBLY__ */
diff --git a/arch/cris/include/asm/thread_info.h b/arch/cris/include/asm/thread_info.h
index 55dede18c032..7286db5ed90e 100644
--- a/arch/cris/include/asm/thread_info.h
+++ b/arch/cris/include/asm/thread_info.h
@@ -38,7 +38,6 @@ struct thread_info {
0-0xBFFFFFFF for user-thead
0-0xFFFFFFFF for kernel-thread
*/
- struct restart_block restart_block;
__u8 supervisor_stack[0];
};
@@ -56,9 +55,6 @@ struct thread_info {
.cpu = 0, \
.preempt_count = INIT_PREEMPT_COUNT, \
.addr_limit = KERNEL_DS, \
- .restart_block = { \
- .fn = do_no_restart_syscall, \
- }, \
}
#define init_thread_info (init_thread_union.thread_info)
diff --git a/arch/frv/include/asm/pgtable.h b/arch/frv/include/asm/pgtable.h
index eb0110acd19b..c49699d5902d 100644
--- a/arch/frv/include/asm/pgtable.h
+++ b/arch/frv/include/asm/pgtable.h
@@ -62,10 +62,6 @@ typedef pte_t *pte_addr_t;
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-#ifndef __ASSEMBLY__
-static inline int pte_file(pte_t pte) { return 0; }
-#endif
-
#define ZERO_PAGE(vaddr) ({ BUG(); NULL; })
#define swapper_pg_dir ((pgd_t *) NULL)
@@ -298,7 +294,6 @@ static inline pmd_t *pmd_offset(pud_t *dir, unsigned long address)
#define _PAGE_RESERVED_MASK (xAMPRx_RESERVED8 | xAMPRx_RESERVED13)
-#define _PAGE_FILE 0x002 /* set:pagecache unset:swap */
#define _PAGE_PROTNONE 0x000 /* If not present */
#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
@@ -463,27 +458,15 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
* Handle swap and file entries
* - the PTE is encoded in the following format:
* bit 0: Must be 0 (!_PAGE_PRESENT)
- * bit 1: Type: 0 for swap, 1 for file (_PAGE_FILE)
- * bits 2-7: Swap type
- * bits 8-31: Swap offset
- * bits 2-31: File pgoff
+ * bits 1-6: Swap type
+ * bits 7-31: Swap offset
*/
-#define __swp_type(x) (((x).val >> 2) & 0x1f)
-#define __swp_offset(x) ((x).val >> 8)
-#define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 2) | ((offset) << 8) })
+#define __swp_type(x) (((x).val >> 1) & 0x1f)
+#define __swp_offset(x) ((x).val >> 7)
+#define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 7) })
#define __pte_to_swp_entry(_pte) ((swp_entry_t) { (_pte).pte })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-static inline int pte_file(pte_t pte)
-{
- return pte.pte & _PAGE_FILE;
-}
-
-#define PTE_FILE_MAX_BITS 29
-
-#define pte_to_pgoff(PTE) ((PTE).pte >> 2)
-#define pgoff_to_pte(off) __pte((off) << 2 | _PAGE_FILE)
-
/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
#define PageSkip(page) (0)
#define kern_addr_valid(addr) (1)
diff --git a/arch/frv/include/asm/string.h b/arch/frv/include/asm/string.h
index 5ed310f64b7e..1f6c35990439 100644
--- a/arch/frv/include/asm/string.h
+++ b/arch/frv/include/asm/string.h
@@ -33,7 +33,6 @@ extern void *memcpy(void *, const void *, __kernel_size_t);
#define __HAVE_ARCH_STRNCAT 1
#define __HAVE_ARCH_STRCMP 1
#define __HAVE_ARCH_STRNCMP 1
-#define __HAVE_ARCH_STRNICMP 1
#define __HAVE_ARCH_STRCHR 1
#define __HAVE_ARCH_STRRCHR 1
#define __HAVE_ARCH_STRSTR 1
diff --git a/arch/frv/include/asm/thread_info.h b/arch/frv/include/asm/thread_info.h
index af29e17c0181..6b917f1c2955 100644
--- a/arch/frv/include/asm/thread_info.h
+++ b/arch/frv/include/asm/thread_info.h
@@ -41,7 +41,6 @@ struct thread_info {
* 0-0xBFFFFFFF for user-thead
* 0-0xFFFFFFFF for kernel-thread
*/
- struct restart_block restart_block;
__u8 supervisor_stack[0];
};
@@ -65,9 +64,6 @@ struct thread_info {
.cpu = 0, \
.preempt_count = INIT_PREEMPT_COUNT, \
.addr_limit = KERNEL_DS, \
- .restart_block = { \
- .fn = do_no_restart_syscall, \
- }, \
}
#define init_thread_info (init_thread_union.thread_info)
diff --git a/arch/frv/kernel/asm-offsets.c b/arch/frv/kernel/asm-offsets.c
index 9de96843a278..446e89d500cc 100644
--- a/arch/frv/kernel/asm-offsets.c
+++ b/arch/frv/kernel/asm-offsets.c
@@ -40,7 +40,6 @@ void foo(void)
OFFSET(TI_CPU, thread_info, cpu);
OFFSET(TI_PREEMPT_COUNT, thread_info, preempt_count);
OFFSET(TI_ADDR_LIMIT, thread_info, addr_limit);
- OFFSET(TI_RESTART_BLOCK, thread_info, restart_block);
BLANK();
/* offsets into register file storage */
diff --git a/arch/frv/kernel/signal.c b/arch/frv/kernel/signal.c
index dc3d59de0870..336713ab4745 100644
--- a/arch/frv/kernel/signal.c
+++ b/arch/frv/kernel/signal.c
@@ -62,7 +62,7 @@ static int restore_sigcontext(struct sigcontext __user *sc, int *_gr8)
unsigned long tbr, psr;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
tbr = user->i.tbr;
psr = user->i.psr;
diff --git a/arch/frv/mm/extable.c b/arch/frv/mm/extable.c
index 2fb9b3ab57b9..8863d6c1df6e 100644
--- a/arch/frv/mm/extable.c
+++ b/arch/frv/mm/extable.c
@@ -10,29 +10,6 @@ extern const void __memset_end, __memset_user_error_lr, __memset_user_error_hand
extern const void __memcpy_end, __memcpy_user_error_lr, __memcpy_user_error_handler;
extern spinlock_t modlist_lock;
-/*****************************************************************************/
-/*
- *
- */
-static inline unsigned long search_one_table(const struct exception_table_entry *first,
- const struct exception_table_entry *last,
- unsigned long value)
-{
- while (first <= last) {
- const struct exception_table_entry __attribute__((aligned(8))) *mid;
- long diff;
-
- mid = (last - first) / 2 + first;
- diff = mid->insn - value;
- if (diff == 0)
- return mid->fixup;
- else if (diff < 0)
- first = mid + 1;
- else
- last = mid - 1;
- }
- return 0;
-} /* end search_one_table() */
/*****************************************************************************/
/*
diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h
index d8bd54fa431e..6e35e71d2aea 100644
--- a/arch/hexagon/include/asm/pgtable.h
+++ b/arch/hexagon/include/asm/pgtable.h
@@ -62,13 +62,6 @@ extern unsigned long zero_page_mask;
#define _PAGE_ACCESSED (1<<2)
/*
- * _PAGE_FILE is only meaningful if _PAGE_PRESENT is false, while
- * _PAGE_DIRTY is only meaningful if _PAGE_PRESENT is true.
- * So we can overload the bit...
- */
-#define _PAGE_FILE _PAGE_DIRTY /* set: pagecache, unset = swap */
-
-/*
* For now, let's say that Valid and Present are the same thing.
* Alternatively, we could say that it's the "or" of R, W, and X
* permissions.
@@ -456,57 +449,36 @@ static inline int pte_exec(pte_t pte)
#define pgtable_cache_init() do { } while (0)
/*
- * Swap/file PTE definitions. If _PAGE_PRESENT is zero, the rest of the
- * PTE is interpreted as swap information. Depending on the _PAGE_FILE
- * bit, the remaining free bits are eitehr interpreted as a file offset
- * or a swap type/offset tuple. Rather than have the TLB fill handler
- * test _PAGE_PRESENT, we're going to reserve the permissions bits
- * and set them to all zeros for swap entries, which speeds up the
- * miss handler at the cost of 3 bits of offset. That trade-off can
- * be revisited if necessary, but Hexagon processor architecture and
- * target applications suggest a lot of TLB misses and not much swap space.
+ * Swap/file PTE definitions. If _PAGE_PRESENT is zero, the rest of the PTE is
+ * interpreted as swap information. The remaining free bits are interpreted as
+ * swap type/offset tuple. Rather than have the TLB fill handler test
+ * _PAGE_PRESENT, we're going to reserve the permissions bits and set them to
+ * all zeros for swap entries, which speeds up the miss handler at the cost of
+ * 3 bits of offset. That trade-off can be revisited if necessary, but Hexagon
+ * processor architecture and target applications suggest a lot of TLB misses
+ * and not much swap space.
*
* Format of swap PTE:
* bit 0: Present (zero)
- * bit 1: _PAGE_FILE (zero)
- * bits 2-6: swap type (arch independent layer uses 5 bits max)
- * bits 7-9: bits 2:0 of offset
- * bits 10-12: effectively _PAGE_PROTNONE (all zero)
- * bits 13-31: bits 21:3 of swap offset
- *
- * Format of file PTE:
- * bit 0: Present (zero)
- * bit 1: _PAGE_FILE (zero)
- * bits 2-9: bits 7:0 of offset
- * bits 10-12: effectively _PAGE_PROTNONE (all zero)
- * bits 13-31: bits 26:8 of swap offset
+ * bits 1-5: swap type (arch independent layer uses 5 bits max)
+ * bits 6-9: bits 3:0 of offset
+ * bits 10-12: effectively _PAGE_PROTNONE (all zero)
+ * bits 13-31: bits 22:4 of swap offset
*
* The split offset makes some of the following macros a little gnarly,
* but there's plenty of precedent for this sort of thing.
*/
-#define PTE_FILE_MAX_BITS 27
/* Used for swap PTEs */
-#define __swp_type(swp_pte) (((swp_pte).val >> 2) & 0x1f)
+#define __swp_type(swp_pte) (((swp_pte).val >> 1) & 0x1f)
#define __swp_offset(swp_pte) \
- ((((swp_pte).val >> 7) & 0x7) | (((swp_pte).val >> 10) & 0x003ffff8))
+ ((((swp_pte).val >> 6) & 0xf) | (((swp_pte).val >> 9) & 0x7ffff0))
#define __swp_entry(type, offset) \
((swp_entry_t) { \
- ((type << 2) | \
- ((offset & 0x3ffff8) << 10) | ((offset & 0x7) << 7)) })
-
-/* Used for file PTEs */
-#define pte_file(pte) \
- ((pte_val(pte) & (_PAGE_FILE | _PAGE_PRESENT)) == _PAGE_FILE)
-
-#define pte_to_pgoff(pte) \
- (((pte_val(pte) >> 2) & 0xff) | ((pte_val(pte) >> 5) & 0x07ffff00))
-
-#define pgoff_to_pte(off) \
- ((pte_t) { ((((off) & 0x7ffff00) << 5) | (((off) & 0xff) << 2)\
- | _PAGE_FILE) })
+ ((type << 1) | \
+ ((offset & 0x7ffff0) << 9) | ((offset & 0xf) << 6)) })
/* Oh boy. There are a lot of possible arch overrides found in this file. */
#include <asm-generic/pgtable.h>
diff --git a/arch/hexagon/include/asm/thread_info.h b/arch/hexagon/include/asm/thread_info.h
index a59dad3b3695..bacd3d6030c5 100644
--- a/arch/hexagon/include/asm/thread_info.h
+++ b/arch/hexagon/include/asm/thread_info.h
@@ -56,7 +56,6 @@ struct thread_info {
* used for syscalls somehow;
* seems to have a function pointer and four arguments
*/
- struct restart_block restart_block;
/* Points to the current pt_regs frame */
struct pt_regs *regs;
/*
@@ -83,9 +82,6 @@ struct thread_info {
.cpu = 0, \
.preempt_count = 1, \
.addr_limit = KERNEL_DS, \
- .restart_block = { \
- .fn = do_no_restart_syscall, \
- }, \
.sp = 0, \
.regs = NULL, \
}
diff --git a/arch/hexagon/kernel/signal.c b/arch/hexagon/kernel/signal.c
index eadd70e47e7e..b039a624c170 100644
--- a/arch/hexagon/kernel/signal.c
+++ b/arch/hexagon/kernel/signal.c
@@ -239,7 +239,7 @@ asmlinkage int sys_rt_sigreturn(void)
sigset_t blocked;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
frame = (struct rt_sigframe __user *)pt_psp(regs);
if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h
index 7935115398a6..2f07bb3dda91 100644
--- a/arch/ia64/include/asm/pgtable.h
+++ b/arch/ia64/include/asm/pgtable.h
@@ -57,9 +57,6 @@
#define _PAGE_ED (__IA64_UL(1) << 52) /* exception deferral */
#define _PAGE_PROTNONE (__IA64_UL(1) << 63)
-/* Valid only for a PTE with the present bit cleared: */
-#define _PAGE_FILE (1 << 1) /* see swap & file pte remarks below */
-
#define _PFN_MASK _PAGE_PPN_MASK
/* Mask of bits which may be changed by pte_modify(); the odd bits are there for _PAGE_PROTNONE */
#define _PAGE_CHG_MASK (_PAGE_P | _PAGE_PROTNONE | _PAGE_PL_MASK | _PAGE_AR_MASK | _PAGE_ED)
@@ -300,7 +297,6 @@ extern unsigned long VMALLOC_END;
#define pte_exec(pte) ((pte_val(pte) & _PAGE_AR_RX) != 0)
#define pte_dirty(pte) ((pte_val(pte) & _PAGE_D) != 0)
#define pte_young(pte) ((pte_val(pte) & _PAGE_A) != 0)
-#define pte_file(pte) ((pte_val(pte) & _PAGE_FILE) != 0)
#define pte_special(pte) 0
/*
@@ -472,27 +468,16 @@ extern void paging_init (void);
*
* Format of swap pte:
* bit 0 : present bit (must be zero)
- * bit 1 : _PAGE_FILE (must be zero)
- * bits 2- 8: swap-type
- * bits 9-62: swap offset
- * bit 63 : _PAGE_PROTNONE bit
- *
- * Format of file pte:
- * bit 0 : present bit (must be zero)
- * bit 1 : _PAGE_FILE (must be one)
- * bits 2-62: file_offset/PAGE_SIZE
+ * bits 1- 7: swap-type
+ * bits 8-62: swap offset
* bit 63 : _PAGE_PROTNONE bit
*/
-#define __swp_type(entry) (((entry).val >> 2) & 0x7f)
-#define __swp_offset(entry) (((entry).val << 1) >> 10)
-#define __swp_entry(type,offset) ((swp_entry_t) { ((type) << 2) | ((long) (offset) << 9) })
+#define __swp_type(entry) (((entry).val >> 1) & 0x7f)
+#define __swp_offset(entry) (((entry).val << 1) >> 9)
+#define __swp_entry(type,offset) ((swp_entry_t) { ((type) << 1) | ((long) (offset) << 8) })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-#define PTE_FILE_MAX_BITS 61
-#define pte_to_pgoff(pte) ((pte_val(pte) << 1) >> 3)
-#define pgoff_to_pte(off) ((pte_t) { ((off) << 2) | _PAGE_FILE })
-
/*
* ZERO_PAGE is a global shared page that is always zero: used
* for zero-mapped memory areas etc..
diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
index 5b17418b4223..c16f21a068ff 100644
--- a/arch/ia64/include/asm/thread_info.h
+++ b/arch/ia64/include/asm/thread_info.h
@@ -27,7 +27,6 @@ struct thread_info {
__u32 status; /* Thread synchronous flags */
mm_segment_t addr_limit; /* user-level address space limit */
int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */
- struct restart_block restart_block;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
__u64 ac_stamp;
__u64 ac_leave;
@@ -46,9 +45,6 @@ struct thread_info {
.cpu = 0, \
.addr_limit = KERNEL_DS, \
.preempt_count = INIT_PREEMPT_COUNT, \
- .restart_block = { \
- .fn = do_no_restart_syscall, \
- }, \
}
#ifndef ASM_OFFSETS_C
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
index 6d92170be457..b3a124da71e5 100644
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -46,7 +46,7 @@ restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr)
long err;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
/* restore scratch that always needs gets updated during signal delivery: */
err = __get_user(flags, &sc->sc_flags);
diff --git a/arch/m32r/include/asm/pgtable-2level.h b/arch/m32r/include/asm/pgtable-2level.h
index 9cdaf7350ef6..8fd8ee70266a 100644
--- a/arch/m32r/include/asm/pgtable-2level.h
+++ b/arch/m32r/include/asm/pgtable-2level.h
@@ -70,9 +70,5 @@ static inline pmd_t *pmd_offset(pgd_t * dir, unsigned long address)
#define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
#define pfn_pmd(pfn, prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
-#define PTE_FILE_MAX_BITS 29
-#define pte_to_pgoff(pte) (((pte_val(pte) >> 2) & 0x7f) | (((pte_val(pte) >> 10)) << 7))
-#define pgoff_to_pte(off) ((pte_t) { (((off) & 0x7f) << 2) | (((off) >> 7) << 10) | _PAGE_FILE })
-
#endif /* __KERNEL__ */
#endif /* _ASM_M32R_PGTABLE_2LEVEL_H */
diff --git a/arch/m32r/include/asm/pgtable.h b/arch/m32r/include/asm/pgtable.h
index 103ce6710f07..050f7a686e3d 100644
--- a/arch/m32r/include/asm/pgtable.h
+++ b/arch/m32r/include/asm/pgtable.h
@@ -80,8 +80,6 @@ extern unsigned long empty_zero_page[1024];
*/
#define _PAGE_BIT_DIRTY 0 /* software: page changed */
-#define _PAGE_BIT_FILE 0 /* when !present: nonlinear file
- mapping */
#define _PAGE_BIT_PRESENT 1 /* Valid: page is valid */
#define _PAGE_BIT_GLOBAL 2 /* Global */
#define _PAGE_BIT_LARGE 3 /* Large */
@@ -93,7 +91,6 @@ extern unsigned long empty_zero_page[1024];
#define _PAGE_BIT_PROTNONE 9 /* software: if not present */
#define _PAGE_DIRTY (1UL << _PAGE_BIT_DIRTY)
-#define _PAGE_FILE (1UL << _PAGE_BIT_FILE)
#define _PAGE_PRESENT (1UL << _PAGE_BIT_PRESENT)
#define _PAGE_GLOBAL (1UL << _PAGE_BIT_GLOBAL)
#define _PAGE_LARGE (1UL << _PAGE_BIT_LARGE)
@@ -206,14 +203,6 @@ static inline int pte_write(pte_t pte)
return pte_val(pte) & _PAGE_WRITE;
}
-/*
- * The following only works if pte_present() is not true.
- */
-static inline int pte_file(pte_t pte)
-{
- return pte_val(pte) & _PAGE_FILE;
-}
-
static inline int pte_special(pte_t pte)
{
return 0;
diff --git a/arch/m32r/include/asm/thread_info.h b/arch/m32r/include/asm/thread_info.h
index 00171703402f..32422d0211c3 100644
--- a/arch/m32r/include/asm/thread_info.h
+++ b/arch/m32r/include/asm/thread_info.h
@@ -34,7 +34,6 @@ struct thread_info {
0-0xBFFFFFFF for user-thread
0-0xFFFFFFFF for kernel-thread
*/
- struct restart_block restart_block;
__u8 supervisor_stack[0];
};
@@ -49,7 +48,6 @@ struct thread_info {
#define TI_CPU 0x00000010
#define TI_PRE_COUNT 0x00000014
#define TI_ADDR_LIMIT 0x00000018
-#define TI_RESTART_BLOCK 0x000001C
#endif
@@ -68,9 +66,6 @@ struct thread_info {
.cpu = 0, \
.preempt_count = INIT_PREEMPT_COUNT, \
.addr_limit = KERNEL_DS, \
- .restart_block = { \
- .fn = do_no_restart_syscall, \
- }, \
}
#define init_thread_info (init_thread_union.thread_info)
diff --git a/arch/m32r/kernel/signal.c b/arch/m32r/kernel/signal.c
index 95408b8f130a..7736c6660a15 100644
--- a/arch/m32r/kernel/signal.c
+++ b/arch/m32r/kernel/signal.c
@@ -48,7 +48,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
unsigned int err = 0;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
#define COPY(x) err |= __get_user(regs->x, &sc->sc_##x)
COPY(r4);
diff --git a/arch/m68k/include/asm/mcf_pgtable.h b/arch/m68k/include/asm/mcf_pgtable.h
index 3c793682e5d9..2500ce04fcc4 100644
--- a/arch/m68k/include/asm/mcf_pgtable.h
+++ b/arch/m68k/include/asm/mcf_pgtable.h
@@ -35,7 +35,6 @@
* hitting hardware.
*/
#define CF_PAGE_DIRTY 0x00000001
-#define CF_PAGE_FILE 0x00000200
#define CF_PAGE_ACCESSED 0x00001000
#define _PAGE_CACHE040 0x020 /* 68040 cache mode, cachable, copyback */
@@ -243,11 +242,6 @@ static inline int pte_young(pte_t pte)
return pte_val(pte) & CF_PAGE_ACCESSED;
}
-static inline int pte_file(pte_t pte)
-{
- return pte_val(pte) & CF_PAGE_FILE;
-}
-
static inline int pte_special(pte_t pte)
{
return 0;
@@ -391,26 +385,13 @@ static inline void cache_page(void *vaddr)
*ptep = pte_mkcache(*ptep);
}
-#define PTE_FILE_MAX_BITS 21
-#define PTE_FILE_SHIFT 11
-
-static inline unsigned long pte_to_pgoff(pte_t pte)
-{
- return pte_val(pte) >> PTE_FILE_SHIFT;
-}
-
-static inline pte_t pgoff_to_pte(unsigned pgoff)
-{
- return __pte((pgoff << PTE_FILE_SHIFT) + CF_PAGE_FILE);
-}
-
/*
* Encode and de-code a swap entry (must be !pte_none(e) && !pte_present(e))
*/
#define __swp_type(x) ((x).val & 0xFF)
-#define __swp_offset(x) ((x).val >> PTE_FILE_SHIFT)
+#define __swp_offset(x) ((x).val >> 11)
#define __swp_entry(typ, off) ((swp_entry_t) { (typ) | \
- (off << PTE_FILE_SHIFT) })
+ (off << 11) })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) (__pte((x).val))
diff --git a/arch/m68k/include/asm/motorola_pgtable.h b/arch/m68k/include/asm/motorola_pgtable.h
index e0fdd4d08075..0085aab80e5a 100644
--- a/arch/m68k/include/asm/motorola_pgtable.h
+++ b/arch/m68k/include/asm/motorola_pgtable.h
@@ -28,7 +28,6 @@
#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_NOCACHE)
#define _PAGE_PROTNONE 0x004
-#define _PAGE_FILE 0x008 /* pagecache or swap? */
#ifndef __ASSEMBLY__
@@ -168,7 +167,6 @@ static inline void pgd_set(pgd_t *pgdp, pmd_t *pmdp)
static inline int pte_write(pte_t pte) { return !(pte_val(pte) & _PAGE_RONLY); }
static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
-static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
static inline int pte_special(pte_t pte) { return 0; }
static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) |= _PAGE_RONLY; return pte; }
@@ -266,19 +264,6 @@ static inline void cache_page(void *vaddr)
}
}
-#define PTE_FILE_MAX_BITS 28
-
-static inline unsigned long pte_to_pgoff(pte_t pte)
-{
- return pte.pte >> 4;
-}
-
-static inline pte_t pgoff_to_pte(unsigned off)
-{
- pte_t pte = { (off << 4) + _PAGE_FILE };
- return pte;
-}
-
/* Encode and de-code a swap entry (must be !pte_none(e) && !pte_present(e)) */
#define __swp_type(x) (((x).val >> 4) & 0xff)
#define __swp_offset(x) ((x).val >> 12)
diff --git a/arch/m68k/include/asm/pgtable_no.h b/arch/m68k/include/asm/pgtable_no.h
index 11859b86b1f9..ac7d87a02335 100644
--- a/arch/m68k/include/asm/pgtable_no.h
+++ b/arch/m68k/include/asm/pgtable_no.h
@@ -37,8 +37,6 @@ extern void paging_init(void);
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-static inline int pte_file(pte_t pte) { return 0; }
-
/*
* ZERO_PAGE is a global shared page that is always zero: used
* for zero-mapped memory areas etc..
diff --git a/arch/m68k/include/asm/sun3_pgtable.h b/arch/m68k/include/asm/sun3_pgtable.h
index f55aa04161e8..48657f9fdece 100644
--- a/arch/m68k/include/asm/sun3_pgtable.h
+++ b/arch/m68k/include/asm/sun3_pgtable.h
@@ -38,8 +38,6 @@
#define _PAGE_PRESENT (SUN3_PAGE_VALID)
#define _PAGE_ACCESSED (SUN3_PAGE_ACCESSED)
-#define PTE_FILE_MAX_BITS 28
-
/* Compound page protection values. */
//todo: work out which ones *should* have SUN3_PAGE_NOCACHE and fix...
// is it just PAGE_KERNEL and PAGE_SHARED?
@@ -168,7 +166,6 @@ static inline void pgd_clear (pgd_t *pgdp) {}
static inline int pte_write(pte_t pte) { return pte_val(pte) & SUN3_PAGE_WRITEABLE; }
static inline int pte_dirty(pte_t pte) { return pte_val(pte) & SUN3_PAGE_MODIFIED; }
static inline int pte_young(pte_t pte) { return pte_val(pte) & SUN3_PAGE_ACCESSED; }
-static inline int pte_file(pte_t pte) { return pte_val(pte) & SUN3_PAGE_ACCESSED; }
static inline int pte_special(pte_t pte) { return 0; }
static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~SUN3_PAGE_WRITEABLE; return pte; }
@@ -202,18 +199,6 @@ static inline pmd_t *pmd_offset (pgd_t *pgd, unsigned long address)
return (pmd_t *) pgd;
}
-static inline unsigned long pte_to_pgoff(pte_t pte)
-{
- return pte.pte & SUN3_PAGE_PGNUM_MASK;
-}
-
-static inline pte_t pgoff_to_pte(unsigned off)
-{
- pte_t pte = { off + SUN3_PAGE_ACCESSED };
- return pte;
-}
-
-
/* Find an entry in the third-level pagetable. */
#define pte_index(address) ((address >> PAGE_SHIFT) & (PTRS_PER_PTE-1))
#define pte_offset_kernel(pmd, address) ((pte_t *) __pmd_page(*pmd) + pte_index(address))
diff --git a/arch/m68k/include/asm/thread_info.h b/arch/m68k/include/asm/thread_info.h
index 21a4784ca5a1..c54256e69e64 100644
--- a/arch/m68k/include/asm/thread_info.h
+++ b/arch/m68k/include/asm/thread_info.h
@@ -31,7 +31,6 @@ struct thread_info {
int preempt_count; /* 0 => preemptable, <0 => BUG */
__u32 cpu; /* should always be 0 on m68k */
unsigned long tp_value; /* thread pointer */
- struct restart_block restart_block;
};
#endif /* __ASSEMBLY__ */
@@ -41,9 +40,6 @@ struct thread_info {
.exec_domain = &default_exec_domain, \
.addr_limit = KERNEL_DS, \
.preempt_count = INIT_PREEMPT_COUNT, \
- .restart_block = { \
- .fn = do_no_restart_syscall, \
- }, \
}
#define init_stack (init_thread_union.stack)
diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c
index 967a8b7e1527..d7179281e74a 100644
--- a/arch/m68k/kernel/signal.c
+++ b/arch/m68k/kernel/signal.c
@@ -655,7 +655,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *usc, void __u
int err = 0;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
/* get previous context */
if (copy_from_user(&context, usc, sizeof(context)))
@@ -693,7 +693,7 @@ rt_restore_ucontext(struct pt_regs *regs, struct switch_stack *sw,
int err;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
err = __get_user(temp, &uc->uc_mcontext.version);
if (temp != MCONTEXT_VERSION)
diff --git a/arch/metag/include/asm/pgtable.h b/arch/metag/include/asm/pgtable.h
index 0d9dc5487296..d0604c0a8702 100644
--- a/arch/metag/include/asm/pgtable.h
+++ b/arch/metag/include/asm/pgtable.h
@@ -47,7 +47,6 @@
*/
#define _PAGE_ACCESSED _PAGE_ALWAYS_ZERO_1
#define _PAGE_DIRTY _PAGE_ALWAYS_ZERO_2
-#define _PAGE_FILE _PAGE_ALWAYS_ZERO_3
/* Pages owned, and protected by, the kernel. */
#define _PAGE_KERNEL _PAGE_PRIV
@@ -219,7 +218,6 @@ extern unsigned long empty_zero_page;
static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; }
static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
-static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
static inline int pte_special(pte_t pte) { return 0; }
static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= (~_PAGE_WRITE); return pte; }
@@ -327,10 +325,6 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-#define PTE_FILE_MAX_BITS 22
-#define pte_to_pgoff(x) (pte_val(x) >> 10)
-#define pgoff_to_pte(x) __pte(((x) << 10) | _PAGE_FILE)
-
#define kern_addr_valid(addr) (1)
/*
diff --git a/arch/metag/include/asm/thread_info.h b/arch/metag/include/asm/thread_info.h
index 47711336119e..afb3ca4776d1 100644
--- a/arch/metag/include/asm/thread_info.h
+++ b/arch/metag/include/asm/thread_info.h
@@ -35,9 +35,8 @@ struct thread_info {
int preempt_count; /* 0 => preemptable, <0 => BUG */
mm_segment_t addr_limit; /* thread address space */
- struct restart_block restart_block;
- u8 supervisor_stack[0];
+ u8 supervisor_stack[0] __aligned(8);
};
#else /* !__ASSEMBLY__ */
@@ -74,9 +73,6 @@ struct thread_info {
.cpu = 0, \
.preempt_count = INIT_PREEMPT_COUNT, \
.addr_limit = KERNEL_DS, \
- .restart_block = { \
- .fn = do_no_restart_syscall, \
- }, \
}
#define init_thread_info (init_thread_union.thread_info)
diff --git a/arch/metag/kernel/signal.c b/arch/metag/kernel/signal.c
index 0d100d5c1407..ce49d429c74a 100644
--- a/arch/metag/kernel/signal.c
+++ b/arch/metag/kernel/signal.c
@@ -48,7 +48,7 @@ static int restore_sigcontext(struct pt_regs *regs,
int err;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
err = metag_gp_regs_copyin(regs, 0, sizeof(struct user_gp_regs), NULL,
&sc->regs);
diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index df19d0c47be8..91b9b46fbb5d 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -40,10 +40,6 @@ extern int mem_init_done;
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-#ifndef __ASSEMBLY__
-static inline int pte_file(pte_t pte) { return 0; }
-#endif /* __ASSEMBLY__ */
-
#define ZERO_PAGE(vaddr) ({ BUG(); NULL; })
#define swapper_pg_dir ((pgd_t *) NULL)
@@ -207,7 +203,6 @@ static inline pte_t pte_mkspecial(pte_t pte) { return pte; }
/* Definitions for MicroBlaze. */
#define _PAGE_GUARDED 0x001 /* G: page is guarded from prefetch */
-#define _PAGE_FILE 0x001 /* when !present: nonlinear file mapping */
#define _PAGE_PRESENT 0x002 /* software: PTE contains a translation */
#define _PAGE_NO_CACHE 0x004 /* I: caching is inhibited */
#define _PAGE_WRITETHRU 0x008 /* W: caching is write-through */
@@ -337,7 +332,6 @@ static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; }
static inline int pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_EXEC; }
static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
-static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
static inline void pte_uncache(pte_t pte) { pte_val(pte) |= _PAGE_NO_CACHE; }
static inline void pte_cache(pte_t pte) { pte_val(pte) &= ~_PAGE_NO_CACHE; }
@@ -499,11 +493,6 @@ static inline pmd_t *pmd_offset(pgd_t *dir, unsigned long address)
#define pte_unmap(pte) kunmap_atomic(pte)
-/* Encode and decode a nonlinear file mapping entry */
-#define PTE_FILE_MAX_BITS 29
-#define pte_to_pgoff(pte) (pte_val(pte) >> 3)
-#define pgoff_to_pte(off) ((pte_t) { ((off) << 3) | _PAGE_FILE })
-
extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
/*
diff --git a/arch/microblaze/include/asm/thread_info.h b/arch/microblaze/include/asm/thread_info.h
index 8c9d36591a03..b699fbd7de4a 100644
--- a/arch/microblaze/include/asm/thread_info.h
+++ b/arch/microblaze/include/asm/thread_info.h
@@ -71,7 +71,6 @@ struct thread_info {
__u32 cpu; /* current CPU */
__s32 preempt_count; /* 0 => preemptable,< 0 => BUG*/
mm_segment_t addr_limit; /* thread address space */
- struct restart_block restart_block;
struct cpu_context cpu_context;
};
@@ -87,9 +86,6 @@ struct thread_info {
.cpu = 0, \
.preempt_count = INIT_PREEMPT_COUNT, \
.addr_limit = KERNEL_DS, \
- .restart_block = { \
- .fn = do_no_restart_syscall, \
- }, \
}
#define init_thread_info (init_thread_union.thread_info)
diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c
index 235706055b7f..a1cbaf90e2ea 100644
--- a/arch/microblaze/kernel/signal.c
+++ b/arch/microblaze/kernel/signal.c
@@ -89,7 +89,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
int rval;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
goto badframe;
diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h
index 68984b612f9d..16aa9f23e17b 100644
--- a/arch/mips/include/asm/pgtable-32.h
+++ b/arch/mips/include/asm/pgtable-32.h
@@ -161,22 +161,6 @@ pfn_pte(unsigned long pfn, pgprot_t prot)
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-/*
- * Encode and decode a nonlinear file mapping entry
- */
-#define pte_to_pgoff(_pte) ((((_pte).pte >> 1 ) & 0x07) | \
- (((_pte).pte >> 2 ) & 0x38) | \
- (((_pte).pte >> 10) << 6 ))
-
-#define pgoff_to_pte(off) ((pte_t) { (((off) & 0x07) << 1 ) | \
- (((off) & 0x38) << 2 ) | \
- (((off) >> 6 ) << 10) | \
- _PAGE_FILE })
-
-/*
- * Bits 0, 4, 8, and 9 are taken, split up 28 bits of offset into this range:
- */
-#define PTE_FILE_MAX_BITS 28
#else
#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
@@ -188,13 +172,6 @@ pfn_pte(unsigned long pfn, pgprot_t prot)
#define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_high })
#define __swp_entry_to_pte(x) ((pte_t) { 0, (x).val })
-/*
- * Bits 0 and 1 of pte_high are taken, use the rest for the page offset...
- */
-#define pte_to_pgoff(_pte) ((_pte).pte_high >> 2)
-#define pgoff_to_pte(off) ((pte_t) { _PAGE_FILE, (off) << 2 })
-
-#define PTE_FILE_MAX_BITS 30
#else
/*
* Constraints:
@@ -209,19 +186,6 @@ pfn_pte(unsigned long pfn, pgprot_t prot)
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-/*
- * Encode and decode a nonlinear file mapping entry
- */
-#define pte_to_pgoff(_pte) ((((_pte).pte >> 1) & 0x7) | \
- (((_pte).pte >> 2) & 0x8) | \
- (((_pte).pte >> 8) << 4))
-
-#define pgoff_to_pte(off) ((pte_t) { (((off) & 0x7) << 1) | \
- (((off) & 0x8) << 2) | \
- (((off) >> 4) << 8) | \
- _PAGE_FILE })
-
-#define PTE_FILE_MAX_BITS 28
#endif /* defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) */
#endif /* defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX) */
diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h
index e1c49a96807d..1659bb91ae21 100644
--- a/arch/mips/include/asm/pgtable-64.h
+++ b/arch/mips/include/asm/pgtable-64.h
@@ -291,13 +291,4 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-/*
- * Bits 0, 4, 6, and 7 are taken. Let's leave bits 1, 2, 3, and 5 alone to
- * make things easier, and only use the upper 56 bits for the page offset...
- */
-#define PTE_FILE_MAX_BITS 56
-
-#define pte_to_pgoff(_pte) ((_pte).pte >> 8)
-#define pgoff_to_pte(off) ((pte_t) { ((off) << 8) | _PAGE_FILE })
-
#endif /* _ASM_PGTABLE_64_H */
diff --git a/arch/mips/include/asm/pgtable-bits.h b/arch/mips/include/asm/pgtable-bits.h
index ca11f14f40a3..fc807aa5ec8d 100644
--- a/arch/mips/include/asm/pgtable-bits.h
+++ b/arch/mips/include/asm/pgtable-bits.h
@@ -48,8 +48,6 @@
/*
* The following bits are implemented in software
- *
- * _PAGE_FILE semantics: set:pagecache unset:swap
*/
#define _PAGE_PRESENT_SHIFT (_CACHE_SHIFT + 3)
#define _PAGE_PRESENT (1 << _PAGE_PRESENT_SHIFT)
@@ -64,7 +62,6 @@
#define _PAGE_SILENT_READ _PAGE_VALID
#define _PAGE_SILENT_WRITE _PAGE_DIRTY
-#define _PAGE_FILE _PAGE_MODIFIED
#define _PFN_SHIFT (PAGE_SHIFT - 12 + _CACHE_SHIFT + 3)
@@ -72,8 +69,6 @@
/*
* The following are implemented by software
- *
- * _PAGE_FILE semantics: set:pagecache unset:swap
*/
#define _PAGE_PRESENT_SHIFT 0
#define _PAGE_PRESENT (1 << _PAGE_PRESENT_SHIFT)
@@ -85,8 +80,6 @@
#define _PAGE_ACCESSED (1 << _PAGE_ACCESSED_SHIFT)
#define _PAGE_MODIFIED_SHIFT 4
#define _PAGE_MODIFIED (1 << _PAGE_MODIFIED_SHIFT)
-#define _PAGE_FILE_SHIFT 4
-#define _PAGE_FILE (1 << _PAGE_FILE_SHIFT)
/*
* And these are the hardware TLB bits
@@ -116,7 +109,6 @@
* The following bits are implemented in software
*
* _PAGE_READ / _PAGE_READ_SHIFT should be unused if cpu_has_rixi.
- * _PAGE_FILE semantics: set:pagecache unset:swap
*/
#define _PAGE_PRESENT_SHIFT (0)
#define _PAGE_PRESENT (1 << _PAGE_PRESENT_SHIFT)
@@ -128,7 +120,6 @@
#define _PAGE_ACCESSED (1 << _PAGE_ACCESSED_SHIFT)
#define _PAGE_MODIFIED_SHIFT (_PAGE_ACCESSED_SHIFT + 1)
#define _PAGE_MODIFIED (1 << _PAGE_MODIFIED_SHIFT)
-#define _PAGE_FILE (_PAGE_MODIFIED)
#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
/* huge tlb page */
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 62a6ba383d4f..583ff4215479 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -231,7 +231,6 @@ extern pgd_t swapper_pg_dir[];
static inline int pte_write(pte_t pte) { return pte.pte_low & _PAGE_WRITE; }
static inline int pte_dirty(pte_t pte) { return pte.pte_low & _PAGE_MODIFIED; }
static inline int pte_young(pte_t pte) { return pte.pte_low & _PAGE_ACCESSED; }
-static inline int pte_file(pte_t pte) { return pte.pte_low & _PAGE_FILE; }
static inline pte_t pte_wrprotect(pte_t pte)
{
@@ -287,7 +286,6 @@ static inline pte_t pte_mkyoung(pte_t pte)
static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; }
static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_MODIFIED; }
static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
-static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
static inline pte_t pte_wrprotect(pte_t pte)
{
diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h
index 99eea59604e9..75a8c55d3dc1 100644
--- a/arch/mips/include/asm/thread_info.h
+++ b/arch/mips/include/asm/thread_info.h
@@ -34,7 +34,6 @@ struct thread_info {
* 0x7fffffff for user-thead
* 0xffffffff for kernel-thread
*/
- struct restart_block restart_block;
struct pt_regs *regs;
};
@@ -49,9 +48,6 @@ struct thread_info {
.cpu = 0, \
.preempt_count = INIT_PREEMPT_COUNT, \
.addr_limit = KERNEL_DS, \
- .restart_block = { \
- .fn = do_no_restart_syscall, \
- }, \
}
#define init_thread_info (init_thread_union.thread_info)
diff --git a/arch/mips/include/uapi/asm/mman.h b/arch/mips/include/uapi/asm/mman.h
index cfcb876cae6b..106e741aa7ee 100644
--- a/arch/mips/include/uapi/asm/mman.h
+++ b/arch/mips/include/uapi/asm/mman.h
@@ -67,6 +67,7 @@
#define MADV_SEQUENTIAL 2 /* expect sequential page references */
#define MADV_WILLNEED 3 /* will need these pages */
#define MADV_DONTNEED 4 /* don't need these pages */
+#define MADV_FREE 5 /* free pages only if memory pressure */
/* common parameters: try to keep these consistent across architectures */
#define MADV_REMOVE 9 /* remove these pages & resources */
diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
index b1d84bd4efb3..3b2dfdb4865f 100644
--- a/arch/mips/kernel/asm-offsets.c
+++ b/arch/mips/kernel/asm-offsets.c
@@ -98,7 +98,6 @@ void output_thread_info_defines(void)
OFFSET(TI_CPU, thread_info, cpu);
OFFSET(TI_PRE_COUNT, thread_info, preempt_count);
OFFSET(TI_ADDR_LIMIT, thread_info, addr_limit);
- OFFSET(TI_RESTART_BLOCK, thread_info, restart_block);
OFFSET(TI_REGS, thread_info, regs);
DEFINE(_THREAD_SIZE, THREAD_SIZE);
DEFINE(_THREAD_MASK, THREAD_MASK);
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index 545bf11bd2ed..6a28c792d862 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -243,7 +243,7 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
int i;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
err |= __get_user(regs->cp0_epc, &sc->sc_pc);
diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c
index d69179c0d49d..19a7705f2a01 100644
--- a/arch/mips/kernel/signal32.c
+++ b/arch/mips/kernel/signal32.c
@@ -220,7 +220,7 @@ static int restore_sigcontext32(struct pt_regs *regs,
int i;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
err |= __get_user(regs->cp0_epc, &sc->sc_pc);
err |= __get_user(regs->hi, &sc->sc_mdhi);
diff --git a/arch/mn10300/include/asm/pgtable.h b/arch/mn10300/include/asm/pgtable.h
index 2ddaa67e7983..629181ae111e 100644
--- a/arch/mn10300/include/asm/pgtable.h
+++ b/arch/mn10300/include/asm/pgtable.h
@@ -134,7 +134,6 @@ extern pte_t kernel_vmalloc_ptes[(VMALLOC_END - VMALLOC_START) / PAGE_SIZE];
#define _PAGE_NX 0 /* no-execute bit */
/* If _PAGE_VALID is clear, we use these: */
-#define _PAGE_FILE xPTEL2_C /* set:pagecache unset:swap */
#define _PAGE_PROTNONE 0x000 /* If not present */
#define __PAGE_PROT_UWAUX 0x010
@@ -241,11 +240,6 @@ static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
static inline int pte_write(pte_t pte) { return pte_val(pte) & __PAGE_PROT_WRITE; }
static inline int pte_special(pte_t pte){ return 0; }
-/*
- * The following only works if pte_present() is not true.
- */
-static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
-
static inline pte_t pte_rdprotect(pte_t pte)
{
pte_val(pte) &= ~(__PAGE_PROT_USER|__PAGE_PROT_UWAUX); return pte;
@@ -338,16 +332,11 @@ static inline int pte_exec_kernel(pte_t pte)
return 1;
}
-#define PTE_FILE_MAX_BITS 30
-
-#define pte_to_pgoff(pte) (pte_val(pte) >> 2)
-#define pgoff_to_pte(off) __pte((off) << 2 | _PAGE_FILE)
-
/* Encode and de-code a swap entry */
-#define __swp_type(x) (((x).val >> 2) & 0x3f)
-#define __swp_offset(x) ((x).val >> 8)
+#define __swp_type(x) (((x).val >> 1) & 0x3f)
+#define __swp_offset(x) ((x).val >> 7)
#define __swp_entry(type, offset) \
- ((swp_entry_t) { ((type) << 2) | ((offset) << 8) })
+ ((swp_entry_t) { ((type) << 1) | ((offset) << 7) })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) __pte((x).val)
diff --git a/arch/mn10300/include/asm/thread_info.h b/arch/mn10300/include/asm/thread_info.h
index bf280eaccd36..c1c374f0ec12 100644
--- a/arch/mn10300/include/asm/thread_info.h
+++ b/arch/mn10300/include/asm/thread_info.h
@@ -50,7 +50,6 @@ struct thread_info {
0-0xBFFFFFFF for user-thead
0-0xFFFFFFFF for kernel-thread
*/
- struct restart_block restart_block;
__u8 supervisor_stack[0];
};
@@ -80,9 +79,6 @@ struct thread_info {
.cpu = 0, \
.preempt_count = INIT_PREEMPT_COUNT, \
.addr_limit = KERNEL_DS, \
- .restart_block = { \
- .fn = do_no_restart_syscall, \
- }, \
}
#define init_thread_info (init_thread_union.thread_info)
diff --git a/arch/mn10300/kernel/asm-offsets.c b/arch/mn10300/kernel/asm-offsets.c
index 47b3bb0c04ff..d780670cbaf3 100644
--- a/arch/mn10300/kernel/asm-offsets.c
+++ b/arch/mn10300/kernel/asm-offsets.c
@@ -28,7 +28,6 @@ void foo(void)
OFFSET(TI_cpu, thread_info, cpu);
OFFSET(TI_preempt_count, thread_info, preempt_count);
OFFSET(TI_addr_limit, thread_info, addr_limit);
- OFFSET(TI_restart_block, thread_info, restart_block);
BLANK();
OFFSET(REG_D0, pt_regs, d0);
diff --git a/arch/mn10300/kernel/signal.c b/arch/mn10300/kernel/signal.c
index a6c0858592c3..8609845f12c5 100644
--- a/arch/mn10300/kernel/signal.c
+++ b/arch/mn10300/kernel/signal.c
@@ -40,7 +40,7 @@ static int restore_sigcontext(struct pt_regs *regs,
unsigned int err = 0;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
if (is_using_fpu(current))
fpu_kill_state(current);
diff --git a/arch/nios2/include/asm/pgtable-bits.h b/arch/nios2/include/asm/pgtable-bits.h
index ce9e7069aa96..bfddff383e89 100644
--- a/arch/nios2/include/asm/pgtable-bits.h
+++ b/arch/nios2/include/asm/pgtable-bits.h
@@ -30,6 +30,5 @@
#define _PAGE_PRESENT (1<<25) /* PTE contains a translation */
#define _PAGE_ACCESSED (1<<26) /* page referenced */
#define _PAGE_DIRTY (1<<27) /* dirty page */
-#define _PAGE_FILE (1<<28) /* PTE used for file mapping or swap */
#endif /* _ASM_NIOS2_PGTABLE_BITS_H */
diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h
index ccbaffd47671..7b292e3a3138 100644
--- a/arch/nios2/include/asm/pgtable.h
+++ b/arch/nios2/include/asm/pgtable.h
@@ -112,8 +112,6 @@ static inline int pte_dirty(pte_t pte) \
{ return pte_val(pte) & _PAGE_DIRTY; }
static inline int pte_young(pte_t pte) \
{ return pte_val(pte) & _PAGE_ACCESSED; }
-static inline int pte_file(pte_t pte) \
- { return pte_val(pte) & _PAGE_FILE; }
static inline int pte_special(pte_t pte) { return 0; }
#define pgprot_noncached pgprot_noncached
@@ -272,8 +270,7 @@ static inline void pte_clear(struct mm_struct *mm,
__FILE__, __LINE__, pgd_val(e))
/*
- * Encode and decode a swap entry (must be !pte_none(pte) && !pte_present(pte)
- * && !pte_file(pte)):
+ * Encode and decode a swap entry (must be !pte_none(pte) && !pte_present(pte):
*
* 31 30 29 28 27 26 25 24 23 22 21 20 19 18 ... 1 0
* 0 0 0 0 type. 0 0 0 0 0 0 offset.........
@@ -290,11 +287,6 @@ static inline void pte_clear(struct mm_struct *mm,
#define __swp_entry_to_pte(swp) ((pte_t) { (swp).val })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
-/* Encode and decode a nonlinear file mapping entry */
-#define PTE_FILE_MAX_BITS 25
-#define pte_to_pgoff(pte) (pte_val(pte) & 0x1ffffff)
-#define pgoff_to_pte(off) __pte(((off) & 0x1ffffff) | _PAGE_FILE)
-
#define kern_addr_valid(addr) (1)
#include <asm-generic/pgtable.h>
diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h
index 37bf6a3ef8f4..18994ccb1185 100644
--- a/arch/openrisc/include/asm/pgtable.h
+++ b/arch/openrisc/include/asm/pgtable.h
@@ -125,7 +125,6 @@ extern void paging_init(void);
#define _PAGE_CC 0x001 /* software: pte contains a translation */
#define _PAGE_CI 0x002 /* cache inhibit */
#define _PAGE_WBC 0x004 /* write back cache */
-#define _PAGE_FILE 0x004 /* set: pagecache, unset: swap (when !PRESENT) */
#define _PAGE_WOM 0x008 /* weakly ordered memory */
#define _PAGE_A 0x010 /* accessed */
@@ -240,7 +239,6 @@ static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; }
static inline int pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_EXEC; }
static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
-static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
static inline int pte_special(pte_t pte) { return 0; }
static inline pte_t pte_mkspecial(pte_t pte) { return pte; }
@@ -438,12 +436,6 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-/* Encode and decode a nonlinear file mapping entry */
-
-#define PTE_FILE_MAX_BITS 26
-#define pte_to_pgoff(x) (pte_val(x) >> 6)
-#define pgoff_to_pte(x) __pte(((x) << 6) | _PAGE_FILE)
-
#define kern_addr_valid(addr) (1)
#include <asm-generic/pgtable.h>
diff --git a/arch/openrisc/include/asm/thread_info.h b/arch/openrisc/include/asm/thread_info.h
index d797acc901e4..875f0845a707 100644
--- a/arch/openrisc/include/asm/thread_info.h
+++ b/arch/openrisc/include/asm/thread_info.h
@@ -57,7 +57,6 @@ struct thread_info {
0-0x7FFFFFFF for user-thead
0-0xFFFFFFFF for kernel-thread
*/
- struct restart_block restart_block;
__u8 supervisor_stack[0];
/* saved context data */
@@ -79,9 +78,6 @@ struct thread_info {
.cpu = 0, \
.preempt_count = 1, \
.addr_limit = KERNEL_DS, \
- .restart_block = { \
- .fn = do_no_restart_syscall, \
- }, \
.ksp = 0, \
}
diff --git a/arch/openrisc/kernel/head.S b/arch/openrisc/kernel/head.S
index 1d3c9c28ac25..f14793306b03 100644
--- a/arch/openrisc/kernel/head.S
+++ b/arch/openrisc/kernel/head.S
@@ -754,11 +754,6 @@ _dc_enable:
/* ===============================================[ page table masks ]=== */
-/* bit 4 is used in hardware as write back cache bit. we never use this bit
- * explicitly, so we can reuse it as _PAGE_FILE bit and mask it out when
- * writing into hardware pte's
- */
-
#define DTLB_UP_CONVERT_MASK 0x3fa
#define ITLB_UP_CONVERT_MASK 0x3a
diff --git a/arch/openrisc/kernel/signal.c b/arch/openrisc/kernel/signal.c
index 7d1b8235bf90..4112175bf803 100644
--- a/arch/openrisc/kernel/signal.c
+++ b/arch/openrisc/kernel/signal.c
@@ -46,7 +46,7 @@ static int restore_sigcontext(struct pt_regs *regs,
int err = 0;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
/*
* Restore the regs from &sc->regs.
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 22b89d1edba7..1d49a4a7749b 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -146,7 +146,6 @@ extern void purge_tlb_entries(struct mm_struct *, unsigned long);
#define _PAGE_GATEWAY_BIT 28 /* (0x008) privilege promotion allowed */
#define _PAGE_DMB_BIT 27 /* (0x010) Data Memory Break enable (B bit) */
#define _PAGE_DIRTY_BIT 26 /* (0x020) Page Dirty (D bit) */
-#define _PAGE_FILE_BIT _PAGE_DIRTY_BIT /* overload this bit */
#define _PAGE_REFTRAP_BIT 25 /* (0x040) Page Ref. Trap enable (T bit) */
#define _PAGE_NO_CACHE_BIT 24 /* (0x080) Uncached Page (U bit) */
#define _PAGE_ACCESSED_BIT 23 /* (0x100) Software: Page Accessed */
@@ -167,13 +166,6 @@ extern void purge_tlb_entries(struct mm_struct *, unsigned long);
/* PFN_PTE_SHIFT defines the shift of a PTE value to access the PFN field */
#define PFN_PTE_SHIFT 12
-
-/* this is how many bits may be used by the file functions */
-#define PTE_FILE_MAX_BITS (BITS_PER_LONG - PTE_SHIFT)
-
-#define pte_to_pgoff(pte) (pte_val(pte) >> PTE_SHIFT)
-#define pgoff_to_pte(off) ((pte_t) { ((off) << PTE_SHIFT) | _PAGE_FILE })
-
#define _PAGE_READ (1 << xlate_pabit(_PAGE_READ_BIT))
#define _PAGE_WRITE (1 << xlate_pabit(_PAGE_WRITE_BIT))
#define _PAGE_RW (_PAGE_READ | _PAGE_WRITE)
@@ -186,7 +178,6 @@ extern void purge_tlb_entries(struct mm_struct *, unsigned long);
#define _PAGE_ACCESSED (1 << xlate_pabit(_PAGE_ACCESSED_BIT))
#define _PAGE_PRESENT (1 << xlate_pabit(_PAGE_PRESENT_BIT))
#define _PAGE_USER (1 << xlate_pabit(_PAGE_USER_BIT))
-#define _PAGE_FILE (1 << xlate_pabit(_PAGE_FILE_BIT))
#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED)
#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
@@ -344,7 +335,6 @@ static inline void pgd_clear(pgd_t * pgdp) { }
static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; }
-static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
static inline int pte_special(pte_t pte) { return 0; }
static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~_PAGE_DIRTY; return pte; }
diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h
index a84611835549..fb13e3865563 100644
--- a/arch/parisc/include/asm/thread_info.h
+++ b/arch/parisc/include/asm/thread_info.h
@@ -14,7 +14,6 @@ struct thread_info {
mm_segment_t addr_limit; /* user-level address space limit */
__u32 cpu; /* current CPU */
int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */
- struct restart_block restart_block;
};
#define INIT_THREAD_INFO(tsk) \
@@ -25,9 +24,6 @@ struct thread_info {
.cpu = 0, \
.addr_limit = KERNEL_DS, \
.preempt_count = INIT_PREEMPT_COUNT, \
- .restart_block = { \
- .fn = do_no_restart_syscall \
- } \
}
#define init_thread_info (init_thread_union.thread_info)
diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h
index 294d251ca7b2..6cb8db76fd4e 100644
--- a/arch/parisc/include/uapi/asm/mman.h
+++ b/arch/parisc/include/uapi/asm/mman.h
@@ -40,6 +40,7 @@
#define MADV_SPACEAVAIL 5 /* insure that resources are reserved */
#define MADV_VPS_PURGE 6 /* Purge pages from VM page cache */
#define MADV_VPS_INHERIT 7 /* Inherit parents page size */
+#define MADV_FREE 8 /* free pages only if memory pressure */
/* common/generic parameters */
#define MADV_REMOVE 9 /* remove these pages & resources */
diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index 012d4fa63d97..9b910a0251b8 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -99,7 +99,7 @@ sys_rt_sigreturn(struct pt_regs *regs, int in_syscall)
sigframe_size = PARISC_RT_SIGFRAME_SIZE32;
#endif
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
/* Unwind the user stack to get the rt_sigframe structure. */
frame = (struct rt_sigframe __user *)
diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h
index 234e07c47803..c718bfd58bcb 100644
--- a/arch/powerpc/include/asm/pgtable-ppc32.h
+++ b/arch/powerpc/include/asm/pgtable-ppc32.h
@@ -332,8 +332,8 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
/*
* Encode and decode a swap entry.
* Note that the bits we use in a PTE for representing a swap entry
- * must not include the _PAGE_PRESENT bit, the _PAGE_FILE bit, or the
- *_PAGE_HASHPTE bit (if used). -- paulus
+ * must not include the _PAGE_PRESENT bit or the _PAGE_HASHPTE bit (if used).
+ * -- paulus
*/
#define __swp_type(entry) ((entry).val & 0x1f)
#define __swp_offset(entry) ((entry).val >> 5)
@@ -341,11 +341,6 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 })
-/* Encode and decode a nonlinear file mapping entry */
-#define PTE_FILE_MAX_BITS 29
-#define pte_to_pgoff(pte) (pte_val(pte) >> 3)
-#define pgoff_to_pte(off) ((pte_t) { ((off) << 3) | _PAGE_FILE })
-
/*
* No page table caches to initialise
*/
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index b9dcc936e2d1..3956c71b95b7 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -352,9 +352,6 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
#define __swp_entry(type, offset) ((swp_entry_t){((type)<< 1)|((offset)<<8)})
#define __pte_to_swp_entry(pte) ((swp_entry_t){pte_val(pte) >> PTE_RPN_SHIFT})
#define __swp_entry_to_pte(x) ((pte_t) { (x).val << PTE_RPN_SHIFT })
-#define pte_to_pgoff(pte) (pte_val(pte) >> PTE_RPN_SHIFT)
-#define pgoff_to_pte(off) ((pte_t) {((off) << PTE_RPN_SHIFT)|_PAGE_FILE})
-#define PTE_FILE_MAX_BITS (BITS_PER_LONG - PTE_RPN_SHIFT)
void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
void pgtable_cache_init(void);
@@ -389,7 +386,7 @@ void pgtable_cache_init(void);
* The last three bits are intentionally left to zero. This memory location
* are also used as normal page PTE pointers. So if we have any pointers
* left around while we collapse a hugepage, we need to make sure
- * _PAGE_PRESENT and _PAGE_FILE bits of that are zero when we look at them
+ * _PAGE_PRESENT bit of that is zero when we look at them
*/
static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index)
{
@@ -494,9 +491,11 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd)
#define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd))
#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd))
#define pmd_young(pmd) pte_young(pmd_pte(pmd))
+#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd))
#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd)))
#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd)))
#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd)))
+#define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd)))
#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd)))
#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd)))
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index a8805fee0df9..48c9a50e1151 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -33,7 +33,6 @@ struct mm_struct;
static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; }
static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
-static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; }
static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; }
static inline pgprot_t pte_pgprot(pte_t pte) { return __pgprot(pte_val(pte) & PAGE_PROT_BITS); }
diff --git a/arch/powerpc/include/asm/pte-40x.h b/arch/powerpc/include/asm/pte-40x.h
index ec0b0b0d1df9..486b1ef81338 100644
--- a/arch/powerpc/include/asm/pte-40x.h
+++ b/arch/powerpc/include/asm/pte-40x.h
@@ -38,7 +38,6 @@
*/
#define _PAGE_GUARDED 0x001 /* G: page is guarded from prefetch */
-#define _PAGE_FILE 0x001 /* when !present: nonlinear file mapping */
#define _PAGE_PRESENT 0x002 /* software: PTE contains a translation */
#define _PAGE_NO_CACHE 0x004 /* I: caching is inhibited */
#define _PAGE_WRITETHRU 0x008 /* W: caching is write-through */
diff --git a/arch/powerpc/include/asm/pte-44x.h b/arch/powerpc/include/asm/pte-44x.h
index 4192b9bad901..36f75fab23f5 100644
--- a/arch/powerpc/include/asm/pte-44x.h
+++ b/arch/powerpc/include/asm/pte-44x.h
@@ -44,9 +44,6 @@
* - PRESENT *must* be in the bottom three bits because swap cache
* entries use the top 29 bits for TLB2.
*
- * - FILE *must* be in the bottom three bits because swap cache
- * entries use the top 29 bits for TLB2.
- *
* - CACHE COHERENT bit (M) has no effect on original PPC440 cores,
* because it doesn't support SMP. However, some later 460 variants
* have -some- form of SMP support and so I keep the bit there for
@@ -68,7 +65,6 @@
*
* There are three protection bits available for SWAP entry:
* _PAGE_PRESENT
- * _PAGE_FILE
* _PAGE_HASHPTE (if HW has)
*
* So those three bits have to be inside of 0-2nd LSB of PTE.
@@ -77,7 +73,6 @@
#define _PAGE_PRESENT 0x00000001 /* S: PTE valid */
#define _PAGE_RW 0x00000002 /* S: Write permission */
-#define _PAGE_FILE 0x00000004 /* S: nonlinear file mapping */
#define _PAGE_EXEC 0x00000004 /* H: Execute permission */
#define _PAGE_ACCESSED 0x00000008 /* S: Page referenced */
#define _PAGE_DIRTY 0x00000010 /* S: Page dirty */
diff --git a/arch/powerpc/include/asm/pte-8xx.h b/arch/powerpc/include/asm/pte-8xx.h
index daa4616e61c4..b2d6b9a22e7c 100644
--- a/arch/powerpc/include/asm/pte-8xx.h
+++ b/arch/powerpc/include/asm/pte-8xx.h
@@ -29,7 +29,6 @@
/* Definitions for 8xx embedded chips. */
#define _PAGE_PRESENT 0x0001 /* Page is valid */
-#define _PAGE_FILE 0x0002 /* when !present: nonlinear file mapping */
#define _PAGE_NO_CACHE 0x0002 /* I: cache inhibit */
#define _PAGE_SHARED 0x0004 /* No ASID (context) compare */
#define _PAGE_SPECIAL 0x0008 /* SW entry, forced to 0 by the TLB miss */
diff --git a/arch/powerpc/include/asm/pte-book3e.h b/arch/powerpc/include/asm/pte-book3e.h
index 576ad88104cb..91a704952ca1 100644
--- a/arch/powerpc/include/asm/pte-book3e.h
+++ b/arch/powerpc/include/asm/pte-book3e.h
@@ -10,7 +10,6 @@
/* Architected bits */
#define _PAGE_PRESENT 0x000001 /* software: pte contains a translation */
-#define _PAGE_FILE 0x000002 /* (!present only) software: pte holds file offset */
#define _PAGE_SW1 0x000002
#define _PAGE_BAP_SR 0x000004
#define _PAGE_BAP_UR 0x000008
diff --git a/arch/powerpc/include/asm/pte-fsl-booke.h b/arch/powerpc/include/asm/pte-fsl-booke.h
index e84dd7ed505e..9f5c3d04a1a3 100644
--- a/arch/powerpc/include/asm/pte-fsl-booke.h
+++ b/arch/powerpc/include/asm/pte-fsl-booke.h
@@ -13,14 +13,11 @@
- PRESENT *must* be in the bottom three bits because swap cache
entries use the top 29 bits.
- - FILE *must* be in the bottom three bits because swap cache
- entries use the top 29 bits.
*/
/* Definitions for FSL Book-E Cores */
#define _PAGE_PRESENT 0x00001 /* S: PTE contains a translation */
#define _PAGE_USER 0x00002 /* S: User page (maps to UR) */
-#define _PAGE_FILE 0x00002 /* S: when !present: nonlinear file mapping */
#define _PAGE_RW 0x00004 /* S: Write permission (SW) */
#define _PAGE_DIRTY 0x00008 /* S: Page dirty */
#define _PAGE_EXEC 0x00010 /* H: SX permission */
diff --git a/arch/powerpc/include/asm/pte-hash32.h b/arch/powerpc/include/asm/pte-hash32.h
index 4aad4132d0a8..62cfb0c663bb 100644
--- a/arch/powerpc/include/asm/pte-hash32.h
+++ b/arch/powerpc/include/asm/pte-hash32.h
@@ -18,7 +18,6 @@
#define _PAGE_PRESENT 0x001 /* software: pte contains a translation */
#define _PAGE_HASHPTE 0x002 /* hash_page has made an HPTE for this pte */
-#define _PAGE_FILE 0x004 /* when !present: nonlinear file mapping */
#define _PAGE_USER 0x004 /* usermode access allowed */
#define _PAGE_GUARDED 0x008 /* G: prohibit speculative access */
#define _PAGE_COHERENT 0x010 /* M: enforce memory coherence (SMP systems) */
diff --git a/arch/powerpc/include/asm/pte-hash64.h b/arch/powerpc/include/asm/pte-hash64.h
index 2505d8eab15c..1a66c25eeac2 100644
--- a/arch/powerpc/include/asm/pte-hash64.h
+++ b/arch/powerpc/include/asm/pte-hash64.h
@@ -16,7 +16,6 @@
*/
#define _PAGE_PRESENT 0x0001 /* software: pte contains a translation */
#define _PAGE_USER 0x0002 /* matches one of the PP bits */
-#define _PAGE_FILE 0x0002 /* (!present only) software: pte holds file offset */
#define _PAGE_EXEC 0x0004 /* No execute on POWER4 and newer (we invert) */
#define _PAGE_GUARDED 0x0008
/* We can derive Memory coherence from _PAGE_NO_CACHE */
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index ebc4f165690a..06596aa4359b 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -43,7 +43,6 @@ struct thread_info {
int cpu; /* cpu we're on */
int preempt_count; /* 0 => preemptable,
<0 => BUG */
- struct restart_block restart_block;
unsigned long local_flags; /* private flags for thread */
/* low level flags - has atomic operations done on it */
@@ -59,9 +58,6 @@ struct thread_info {
.exec_domain = &default_exec_domain, \
.cpu = 0, \
.preempt_count = INIT_PREEMPT_COUNT, \
- .restart_block = { \
- .fn = do_no_restart_syscall, \
- }, \
.flags = 0, \
}
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index b171001698ff..d3a831ac0f92 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -1231,7 +1231,7 @@ long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
int tm_restore = 0;
#endif
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
rt_sf = (struct rt_sigframe __user *)
(regs->gpr[1] + __SIGNAL_FRAMESIZE + 16);
@@ -1504,7 +1504,7 @@ long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
#endif
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
sf = (struct sigframe __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE);
sc = &sf->sctx;
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 2cb0c94cafa5..c7c24d2e2bdb 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -666,7 +666,7 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5,
#endif
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
if (!access_ok(VERIFY_READ, uc, sizeof(*uc)))
goto badframe;
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 4fe5f64cc179..8526c5896c94 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -781,7 +781,7 @@ pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
{
pmd_t pmd;
/*
- * For a valid pte, we would have _PAGE_PRESENT or _PAGE_FILE always
+ * For a valid pte, we would have _PAGE_PRESENT always
* set. We use this to check THP page at pmd level.
* leaf pte for huge page, bottom two bits != 00
*/
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 5e102422c9ab..ffb1d8ce97ae 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -249,10 +249,10 @@ static inline int is_module_addr(void *addr)
_PAGE_YOUNG)
/*
- * handle_pte_fault uses pte_present, pte_none and pte_file to find out the
- * pte type WITHOUT holding the page table lock. The _PAGE_PRESENT bit
- * is used to distinguish present from not-present ptes. It is changed only
- * with the page table lock held.
+ * handle_pte_fault uses pte_present and pte_none to find out the pte type
+ * WITHOUT holding the page table lock. The _PAGE_PRESENT bit is used to
+ * distinguish present from not-present ptes. It is changed only with the page
+ * table lock held.
*
* The following table gives the different possible bit combinations for
* the pte hardware and software bits in the last 12 bits of a pte:
@@ -279,7 +279,6 @@ static inline int is_module_addr(void *addr)
*
* pte_present is true for the bit pattern .xx...xxxxx1, (pte & 0x001) == 0x001
* pte_none is true for the bit pattern .10...xxxx00, (pte & 0x603) == 0x400
- * pte_file is true for the bit pattern .11...xxxxx0, (pte & 0x601) == 0x600
* pte_swap is true for the bit pattern .10...xxxx10, (pte & 0x603) == 0x402
*/
@@ -671,13 +670,6 @@ static inline int pte_swap(pte_t pte)
== (_PAGE_INVALID | _PAGE_TYPE);
}
-static inline int pte_file(pte_t pte)
-{
- /* Bit pattern: (pte & 0x601) == 0x600 */
- return (pte_val(pte) & (_PAGE_INVALID | _PAGE_PROTECT | _PAGE_PRESENT))
- == (_PAGE_INVALID | _PAGE_PROTECT);
-}
-
static inline int pte_special(pte_t pte)
{
return (pte_val(pte) & _PAGE_SPECIAL);
@@ -1756,19 +1748,6 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-#ifndef CONFIG_64BIT
-# define PTE_FILE_MAX_BITS 26
-#else /* CONFIG_64BIT */
-# define PTE_FILE_MAX_BITS 59
-#endif /* CONFIG_64BIT */
-
-#define pte_to_pgoff(__pte) \
- ((((__pte).pte >> 12) << 7) + (((__pte).pte >> 1) & 0x7f))
-
-#define pgoff_to_pte(__off) \
- ((pte_t) { ((((__off) & 0x7f) << 1) + (((__off) >> 7) << 12)) \
- | _PAGE_INVALID | _PAGE_PROTECT })
-
#endif /* !__ASSEMBLY__ */
#define kern_addr_valid(addr) (1)
diff --git a/arch/s390/include/asm/string.h b/arch/s390/include/asm/string.h
index 7e2dcd7c57ef..8662f5c8e17f 100644
--- a/arch/s390/include/asm/string.h
+++ b/arch/s390/include/asm/string.h
@@ -44,7 +44,6 @@ extern char *strstr(const char *, const char *);
#undef __HAVE_ARCH_STRCHR
#undef __HAVE_ARCH_STRNCHR
#undef __HAVE_ARCH_STRNCMP
-#undef __HAVE_ARCH_STRNICMP
#undef __HAVE_ARCH_STRPBRK
#undef __HAVE_ARCH_STRSEP
#undef __HAVE_ARCH_STRSPN
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index 4d62fd5b56e5..ef1df718642d 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -39,7 +39,6 @@ struct thread_info {
unsigned long sys_call_table; /* System call table address */
unsigned int cpu; /* current CPU */
int preempt_count; /* 0 => preemptable, <0 => BUG */
- struct restart_block restart_block;
unsigned int system_call;
__u64 user_timer;
__u64 system_timer;
@@ -56,9 +55,6 @@ struct thread_info {
.flags = 0, \
.cpu = 0, \
.preempt_count = INIT_PREEMPT_COUNT, \
- .restart_block = { \
- .fn = do_no_restart_syscall, \
- }, \
}
#define init_thread_info (init_thread_union.thread_info)
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index 34d5fa7b01b5..bc1df12dd4f8 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -209,7 +209,7 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
int i;
/* Alwys make any pending restarted system call return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
if (__copy_from_user(&user_sregs, &sregs->regs, sizeof(user_sregs)))
return -EFAULT;
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 6a2ac257d98f..b3ae6f70c6d6 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -162,7 +162,7 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
_sigregs user_sregs;
/* Alwys make any pending restarted system call return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
if (__copy_from_user(&user_sregs, sregs, sizeof(user_sregs)))
return -EFAULT;
diff --git a/arch/score/include/asm/pgtable-bits.h b/arch/score/include/asm/pgtable-bits.h
index 7d65a96a82e5..0e5c6f466520 100644
--- a/arch/score/include/asm/pgtable-bits.h
+++ b/arch/score/include/asm/pgtable-bits.h
@@ -6,7 +6,6 @@
#define _PAGE_WRITE (1<<7) /* implemented in software */
#define _PAGE_PRESENT (1<<9) /* implemented in software */
#define _PAGE_MODIFIED (1<<10) /* implemented in software */
-#define _PAGE_FILE (1<<10)
#define _PAGE_GLOBAL (1<<0)
#define _PAGE_VALID (1<<1)
diff --git a/arch/score/include/asm/pgtable.h b/arch/score/include/asm/pgtable.h
index db96ad9afc03..5170ffdea643 100644
--- a/arch/score/include/asm/pgtable.h
+++ b/arch/score/include/asm/pgtable.h
@@ -90,15 +90,6 @@ static inline void pmd_clear(pmd_t *pmdp)
((pte_t *)page_address(pmd_page(*(dir))) + __pte_offset(address))
#define pte_unmap(pte) ((void)(pte))
-/*
- * Bits 9(_PAGE_PRESENT) and 10(_PAGE_FILE)are taken,
- * split up 30 bits of offset into this range:
- */
-#define PTE_FILE_MAX_BITS 30
-#define pte_to_pgoff(_pte) \
- (((_pte).pte & 0x1ff) | (((_pte).pte >> 11) << 9))
-#define pgoff_to_pte(off) \
- ((pte_t) {((off) & 0x1ff) | (((off) >> 9) << 11) | _PAGE_FILE})
#define __pte_to_swp_entry(pte) \
((swp_entry_t) { pte_val(pte)})
#define __swp_entry_to_pte(x) ((pte_t) {(x).val})
@@ -169,8 +160,8 @@ static inline pgprot_t pgprot_noncached(pgprot_t _prot)
}
#define __swp_type(x) ((x).val & 0x1f)
-#define __swp_offset(x) ((x).val >> 11)
-#define __swp_entry(type, offset) ((swp_entry_t){(type) | ((offset) << 11)})
+#define __swp_offset(x) ((x).val >> 10)
+#define __swp_entry(type, offset) ((swp_entry_t){(type) | ((offset) << 10)})
extern unsigned long empty_zero_page;
extern unsigned long zero_page_mask;
@@ -198,11 +189,6 @@ static inline int pte_young(pte_t pte)
return pte_val(pte) & _PAGE_ACCESSED;
}
-static inline int pte_file(pte_t pte)
-{
- return pte_val(pte) & _PAGE_FILE;
-}
-
#define pte_special(pte) (0)
static inline pte_t pte_wrprotect(pte_t pte)
diff --git a/arch/score/include/asm/thread_info.h b/arch/score/include/asm/thread_info.h
index 656b7ada9326..33864fa2a8d4 100644
--- a/arch/score/include/asm/thread_info.h
+++ b/arch/score/include/asm/thread_info.h
@@ -42,7 +42,6 @@ struct thread_info {
* 0-0xFFFFFFFF for kernel-thread
*/
mm_segment_t addr_limit;
- struct restart_block restart_block;
struct pt_regs *regs;
};
@@ -58,9 +57,6 @@ struct thread_info {
.cpu = 0, \
.preempt_count = 1, \
.addr_limit = KERNEL_DS, \
- .restart_block = { \
- .fn = do_no_restart_syscall, \
- }, \
}
#define init_thread_info (init_thread_union.thread_info)
diff --git a/arch/score/kernel/asm-offsets.c b/arch/score/kernel/asm-offsets.c
index 57788f44c6fb..b4d5214a7a7e 100644
--- a/arch/score/kernel/asm-offsets.c
+++ b/arch/score/kernel/asm-offsets.c
@@ -106,7 +106,6 @@ void output_thread_info_defines(void)
OFFSET(TI_CPU, thread_info, cpu);
OFFSET(TI_PRE_COUNT, thread_info, preempt_count);
OFFSET(TI_ADDR_LIMIT, thread_info, addr_limit);
- OFFSET(TI_RESTART_BLOCK, thread_info, restart_block);
OFFSET(TI_REGS, thread_info, regs);
DEFINE(KERNEL_STACK_SIZE, THREAD_SIZE);
DEFINE(KERNEL_STACK_MASK, THREAD_MASK);
diff --git a/arch/score/kernel/signal.c b/arch/score/kernel/signal.c
index 1651807774ad..e381c8c4ff65 100644
--- a/arch/score/kernel/signal.c
+++ b/arch/score/kernel/signal.c
@@ -141,7 +141,7 @@ score_rt_sigreturn(struct pt_regs *regs)
int sig;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
frame = (struct rt_sigframe __user *) regs->regs[0];
if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
diff --git a/arch/sh/include/asm/pgtable_32.h b/arch/sh/include/asm/pgtable_32.h
index 0bce3d81569e..c646e563abce 100644
--- a/arch/sh/include/asm/pgtable_32.h
+++ b/arch/sh/include/asm/pgtable_32.h
@@ -26,8 +26,6 @@
* and timing control which (together with bit 0) are moved into the
* old-style PTEA on the parts that support it.
*
- * XXX: Leave the _PAGE_FILE and _PAGE_WT overhaul for a rainy day.
- *
* SH-X2 MMUs and extended PTEs
*
* SH-X2 supports an extended mode TLB with split data arrays due to the
@@ -51,7 +49,6 @@
#define _PAGE_PRESENT 0x100 /* V-bit : page is valid */
#define _PAGE_PROTNONE 0x200 /* software: if not present */
#define _PAGE_ACCESSED 0x400 /* software: page referenced */
-#define _PAGE_FILE _PAGE_WT /* software: pagecache or swap? */
#define _PAGE_SPECIAL 0x800 /* software: special page */
#define _PAGE_SZ_MASK (_PAGE_SZ0 | _PAGE_SZ1)
@@ -105,14 +102,13 @@ static inline unsigned long copy_ptea_attributes(unsigned long x)
/* Mask which drops unused bits from the PTEL value */
#if defined(CONFIG_CPU_SH3)
#define _PAGE_CLEAR_FLAGS (_PAGE_PROTNONE | _PAGE_ACCESSED| \
- _PAGE_FILE | _PAGE_SZ1 | \
- _PAGE_HW_SHARED)
+ _PAGE_SZ1 | _PAGE_HW_SHARED)
#elif defined(CONFIG_X2TLB)
/* Get rid of the legacy PR/SZ bits when using extended mode */
#define _PAGE_CLEAR_FLAGS (_PAGE_PROTNONE | _PAGE_ACCESSED | \
- _PAGE_FILE | _PAGE_PR_MASK | _PAGE_SZ_MASK)
+ _PAGE_PR_MASK | _PAGE_SZ_MASK)
#else
-#define _PAGE_CLEAR_FLAGS (_PAGE_PROTNONE | _PAGE_ACCESSED | _PAGE_FILE)
+#define _PAGE_CLEAR_FLAGS (_PAGE_PROTNONE | _PAGE_ACCESSED)
#endif
#define _PAGE_FLAGS_HARDWARE_MASK (phys_addr_mask() & ~(_PAGE_CLEAR_FLAGS))
@@ -343,7 +339,6 @@ static inline void set_pte(pte_t *ptep, pte_t pte)
#define pte_not_present(pte) (!((pte).pte_low & _PAGE_PRESENT))
#define pte_dirty(pte) ((pte).pte_low & _PAGE_DIRTY)
#define pte_young(pte) ((pte).pte_low & _PAGE_ACCESSED)
-#define pte_file(pte) ((pte).pte_low & _PAGE_FILE)
#define pte_special(pte) ((pte).pte_low & _PAGE_SPECIAL)
#ifdef CONFIG_X2TLB
@@ -445,7 +440,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
* Encode and de-code a swap entry
*
* Constraints:
- * _PAGE_FILE at bit 0
* _PAGE_PRESENT at bit 8
* _PAGE_PROTNONE at bit 9
*
@@ -453,9 +447,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
* swap offset into bits 10:30. For the 64-bit PTE case, we keep the
* preserved bits in the low 32-bits and use the upper 32 as the swap
* offset (along with a 5-bit type), following the same approach as x86
- * PAE. This keeps the logic quite simple, and allows for a full 32
- * PTE_FILE_MAX_BITS, as opposed to the 29-bits we're constrained with
- * in the pte_low case.
+ * PAE. This keeps the logic quite simple.
*
* As is evident by the Alpha code, if we ever get a 64-bit unsigned
* long (swp_entry_t) to match up with the 64-bit PTEs, this all becomes
@@ -471,13 +463,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
#define __pte_to_swp_entry(pte) ((swp_entry_t){ (pte).pte_high })
#define __swp_entry_to_pte(x) ((pte_t){ 0, (x).val })
-/*
- * Encode and decode a nonlinear file mapping entry
- */
-#define pte_to_pgoff(pte) ((pte).pte_high)
-#define pgoff_to_pte(off) ((pte_t) { _PAGE_FILE, (off) })
-
-#define PTE_FILE_MAX_BITS 32
#else
#define __swp_type(x) ((x).val & 0xff)
#define __swp_offset(x) ((x).val >> 10)
@@ -485,13 +470,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 1 })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 1 })
-
-/*
- * Encode and decode a nonlinear file mapping entry
- */
-#define PTE_FILE_MAX_BITS 29
-#define pte_to_pgoff(pte) (pte_val(pte) >> 1)
-#define pgoff_to_pte(off) ((pte_t) { ((off) << 1) | _PAGE_FILE })
#endif
#endif /* __ASSEMBLY__ */
diff --git a/arch/sh/include/asm/pgtable_64.h b/arch/sh/include/asm/pgtable_64.h
index dda8c82601b9..07424968df62 100644
--- a/arch/sh/include/asm/pgtable_64.h
+++ b/arch/sh/include/asm/pgtable_64.h
@@ -107,7 +107,6 @@ static __inline__ void set_pte(pte_t *pteptr, pte_t pteval)
#define _PAGE_DEVICE 0x001 /* CB0: if uncacheable, 1->device (i.e. no write-combining or reordering at bus level) */
#define _PAGE_CACHABLE 0x002 /* CB1: uncachable/cachable */
#define _PAGE_PRESENT 0x004 /* software: page referenced */
-#define _PAGE_FILE 0x004 /* software: only when !present */
#define _PAGE_SIZE0 0x008 /* SZ0-bit : size of page */
#define _PAGE_SIZE1 0x010 /* SZ1-bit : size of page */
#define _PAGE_SHARED 0x020 /* software: reflects PTEH's SH */
@@ -129,7 +128,7 @@ static __inline__ void set_pte(pte_t *pteptr, pte_t pteval)
#define _PAGE_WIRED _PAGE_EXT(0x001) /* software: wire the tlb entry */
#define _PAGE_SPECIAL _PAGE_EXT(0x002)
-#define _PAGE_CLEAR_FLAGS (_PAGE_PRESENT | _PAGE_FILE | _PAGE_SHARED | \
+#define _PAGE_CLEAR_FLAGS (_PAGE_PRESENT | _PAGE_SHARED | \
_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_WIRED)
/* Mask which drops software flags */
@@ -260,7 +259,6 @@ static __inline__ void set_pte(pte_t *pteptr, pte_t pteval)
*/
static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
-static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; }
static inline int pte_special(pte_t pte){ return pte_val(pte) & _PAGE_SPECIAL; }
@@ -304,11 +302,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-/* Encode and decode a nonlinear file mapping entry */
-#define PTE_FILE_MAX_BITS 29
-#define pte_to_pgoff(pte) (pte_val(pte))
-#define pgoff_to_pte(off) ((pte_t) { (off) | _PAGE_FILE })
-
#endif /* !__ASSEMBLY__ */
#define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h
index ad27ffa65e2e..657c03919627 100644
--- a/arch/sh/include/asm/thread_info.h
+++ b/arch/sh/include/asm/thread_info.h
@@ -33,7 +33,6 @@ struct thread_info {
__u32 cpu;
int preempt_count; /* 0 => preemptable, <0 => BUG */
mm_segment_t addr_limit; /* thread address space */
- struct restart_block restart_block;
unsigned long previous_sp; /* sp of previous stack in case
of nested IRQ stacks */
__u8 supervisor_stack[0];
@@ -63,9 +62,6 @@ struct thread_info {
.cpu = 0, \
.preempt_count = INIT_PREEMPT_COUNT, \
.addr_limit = KERNEL_DS, \
- .restart_block = { \
- .fn = do_no_restart_syscall, \
- }, \
}
#define init_thread_info (init_thread_union.thread_info)
diff --git a/arch/sh/kernel/asm-offsets.c b/arch/sh/kernel/asm-offsets.c
index 08a2be775b6c..542225fedb11 100644
--- a/arch/sh/kernel/asm-offsets.c
+++ b/arch/sh/kernel/asm-offsets.c
@@ -25,7 +25,6 @@ int main(void)
DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
- DEFINE(TI_RESTART_BLOCK,offsetof(struct thread_info, restart_block));
DEFINE(TI_SIZE, sizeof(struct thread_info));
#ifdef CONFIG_HIBERNATION
diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c
index 2f002b24fb92..0b34f2a704fe 100644
--- a/arch/sh/kernel/signal_32.c
+++ b/arch/sh/kernel/signal_32.c
@@ -156,7 +156,7 @@ asmlinkage int sys_sigreturn(void)
int r0;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
goto badframe;
@@ -186,7 +186,7 @@ asmlinkage int sys_rt_sigreturn(void)
int r0;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
goto badframe;
diff --git a/arch/sh/kernel/signal_64.c b/arch/sh/kernel/signal_64.c
index 897abe7b871e..71993c6a7d94 100644
--- a/arch/sh/kernel/signal_64.c
+++ b/arch/sh/kernel/signal_64.c
@@ -260,7 +260,7 @@ asmlinkage int sys_sigreturn(unsigned long r2, unsigned long r3,
long long ret;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
goto badframe;
@@ -294,7 +294,7 @@ asmlinkage int sys_rt_sigreturn(unsigned long r2, unsigned long r3,
long long ret;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
goto badframe;
diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h
index b9b91ae19fe1..b2f7dc46a7d1 100644
--- a/arch/sparc/include/asm/pgtable_32.h
+++ b/arch/sparc/include/asm/pgtable_32.h
@@ -221,14 +221,6 @@ static inline int pte_young(pte_t pte)
return pte_val(pte) & SRMMU_REF;
}
-/*
- * The following only work if pte_present() is not true.
- */
-static inline int pte_file(pte_t pte)
-{
- return pte_val(pte) & SRMMU_FILE;
-}
-
static inline int pte_special(pte_t pte)
{
return 0;
@@ -375,22 +367,6 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-/* file-offset-in-pte helpers */
-static inline unsigned long pte_to_pgoff(pte_t pte)
-{
- return pte_val(pte) >> SRMMU_PTE_FILE_SHIFT;
-}
-
-static inline pte_t pgoff_to_pte(unsigned long pgoff)
-{
- return __pte((pgoff << SRMMU_PTE_FILE_SHIFT) | SRMMU_FILE);
-}
-
-/*
- * This is made a constant because mm/fremap.c required a constant.
- */
-#define PTE_FILE_MAX_BITS 24
-
static inline unsigned long
__get_phys (unsigned long addr)
{
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 1ff9e7864168..ecd207f7eef3 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -137,7 +137,6 @@ bool kern_addr_valid(unsigned long addr);
#define _PAGE_SOFT_4U _AC(0x0000000000001F80,UL) /* Software bits: */
#define _PAGE_EXEC_4U _AC(0x0000000000001000,UL) /* Executable SW bit */
#define _PAGE_MODIFIED_4U _AC(0x0000000000000800,UL) /* Modified (dirty) */
-#define _PAGE_FILE_4U _AC(0x0000000000000800,UL) /* Pagecache page */
#define _PAGE_ACCESSED_4U _AC(0x0000000000000400,UL) /* Accessed (ref'd) */
#define _PAGE_READ_4U _AC(0x0000000000000200,UL) /* Readable SW Bit */
#define _PAGE_WRITE_4U _AC(0x0000000000000100,UL) /* Writable SW Bit */
@@ -167,7 +166,6 @@ bool kern_addr_valid(unsigned long addr);
#define _PAGE_EXEC_4V _AC(0x0000000000000080,UL) /* Executable Page */
#define _PAGE_W_4V _AC(0x0000000000000040,UL) /* Writable */
#define _PAGE_SOFT_4V _AC(0x0000000000000030,UL) /* Software bits */
-#define _PAGE_FILE_4V _AC(0x0000000000000020,UL) /* Pagecache page */
#define _PAGE_PRESENT_4V _AC(0x0000000000000010,UL) /* Present */
#define _PAGE_RESV_4V _AC(0x0000000000000008,UL) /* Reserved */
#define _PAGE_SZ16GB_4V _AC(0x0000000000000007,UL) /* 16GB Page */
@@ -332,22 +330,6 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
}
#endif
-static inline pte_t pgoff_to_pte(unsigned long off)
-{
- off <<= PAGE_SHIFT;
-
- __asm__ __volatile__(
- "\n661: or %0, %2, %0\n"
- " .section .sun4v_1insn_patch, \"ax\"\n"
- " .word 661b\n"
- " or %0, %3, %0\n"
- " .previous\n"
- : "=r" (off)
- : "0" (off), "i" (_PAGE_FILE_4U), "i" (_PAGE_FILE_4V));
-
- return __pte(off);
-}
-
static inline pgprot_t pgprot_noncached(pgprot_t prot)
{
unsigned long val = pgprot_val(prot);
@@ -609,22 +591,6 @@ static inline unsigned long pte_exec(pte_t pte)
return (pte_val(pte) & mask);
}
-static inline unsigned long pte_file(pte_t pte)
-{
- unsigned long val = pte_val(pte);
-
- __asm__ __volatile__(
- "\n661: and %0, %2, %0\n"
- " .section .sun4v_1insn_patch, \"ax\"\n"
- " .word 661b\n"
- " and %0, %3, %0\n"
- " .previous\n"
- : "=r" (val)
- : "0" (val), "i" (_PAGE_FILE_4U), "i" (_PAGE_FILE_4V));
-
- return val;
-}
-
static inline unsigned long pte_present(pte_t pte)
{
unsigned long val = pte_val(pte);
@@ -731,6 +697,15 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd)
return __pmd(pte_val(pte));
}
+static inline pmd_t pmd_mkclean(pmd_t pmd)
+{
+ pte_t pte = __pte(pmd_val(pmd));
+
+ pte = pte_mkclean(pte);
+
+ return __pmd(pte_val(pte));
+}
+
static inline pmd_t pmd_mkyoung(pmd_t pmd)
{
pte_t pte = __pte(pmd_val(pmd));
@@ -971,12 +946,6 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-/* File offset in PTE support. */
-unsigned long pte_file(pte_t);
-#define pte_to_pgoff(pte) (pte_val(pte) >> PAGE_SHIFT)
-pte_t pgoff_to_pte(unsigned long);
-#define PTE_FILE_MAX_BITS (64UL - PAGE_SHIFT - 1UL)
-
int page_in_phys_avail(unsigned long paddr);
/*
diff --git a/arch/sparc/include/asm/pgtsrmmu.h b/arch/sparc/include/asm/pgtsrmmu.h
index 79da17866fa8..ae51a111a8c7 100644
--- a/arch/sparc/include/asm/pgtsrmmu.h
+++ b/arch/sparc/include/asm/pgtsrmmu.h
@@ -80,10 +80,6 @@
#define SRMMU_PRIV 0x1c
#define SRMMU_PRIV_RDONLY 0x18
-#define SRMMU_FILE 0x40 /* Implemented in software */
-
-#define SRMMU_PTE_FILE_SHIFT 8 /* == 32-PTE_FILE_MAX_BITS */
-
#define SRMMU_CHG_MASK (0xffffff00 | SRMMU_REF | SRMMU_DIRTY)
/* SRMMU swap entry encoding
@@ -94,13 +90,13 @@
* oooooooooooooooooootttttRRRRRRRR
* fedcba9876543210fedcba9876543210
*
- * The bottom 8 bits are reserved for protection and status bits, especially
- * FILE and PRESENT.
+ * The bottom 7 bits are reserved for protection and status bits, especially
+ * PRESENT.
*/
#define SRMMU_SWP_TYPE_MASK 0x1f
-#define SRMMU_SWP_TYPE_SHIFT SRMMU_PTE_FILE_SHIFT
-#define SRMMU_SWP_OFF_MASK 0x7ffff
-#define SRMMU_SWP_OFF_SHIFT (SRMMU_PTE_FILE_SHIFT + 5)
+#define SRMMU_SWP_TYPE_SHIFT 7
+#define SRMMU_SWP_OFF_MASK 0xfffff
+#define SRMMU_SWP_OFF_SHIFT (SRMMU_SWP_TYPE_SHIFT + 5)
/* Some day I will implement true fine grained access bits for
* user pages because the SRMMU gives us the capabilities to
diff --git a/arch/sparc/include/asm/thread_info_32.h b/arch/sparc/include/asm/thread_info_32.h
index 025c98446b1e..fd7bd0a440ca 100644
--- a/arch/sparc/include/asm/thread_info_32.h
+++ b/arch/sparc/include/asm/thread_info_32.h
@@ -47,8 +47,6 @@ struct thread_info {
struct reg_window32 reg_window[NSWINS]; /* align for ldd! */
unsigned long rwbuf_stkptrs[NSWINS];
unsigned long w_saved;
-
- struct restart_block restart_block;
};
/*
@@ -62,9 +60,6 @@ struct thread_info {
.flags = 0, \
.cpu = 0, \
.preempt_count = INIT_PREEMPT_COUNT, \
- .restart_block = { \
- .fn = do_no_restart_syscall, \
- }, \
}
#define init_thread_info (init_thread_union.thread_info)
@@ -103,7 +98,6 @@ register struct thread_info *current_thread_info_reg asm("g6");
#define TI_REG_WINDOW 0x30
#define TI_RWIN_SPTRS 0x230
#define TI_W_SAVED 0x250
-/* #define TI_RESTART_BLOCK 0x25n */ /* Nobody cares */
/*
* thread information flag bit numbers
diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h
index 798f0279a4b5..ff455164732a 100644
--- a/arch/sparc/include/asm/thread_info_64.h
+++ b/arch/sparc/include/asm/thread_info_64.h
@@ -58,8 +58,6 @@ struct thread_info {
unsigned long gsr[7];
unsigned long xfsr[7];
- struct restart_block restart_block;
-
struct pt_regs *kern_una_regs;
unsigned int kern_una_insn;
@@ -92,10 +90,9 @@ struct thread_info {
#define TI_RWIN_SPTRS 0x000003c8
#define TI_GSR 0x00000400
#define TI_XFSR 0x00000438
-#define TI_RESTART_BLOCK 0x00000470
-#define TI_KUNA_REGS 0x000004a0
-#define TI_KUNA_INSN 0x000004a8
-#define TI_FPREGS 0x000004c0
+#define TI_KUNA_REGS 0x00000470
+#define TI_KUNA_INSN 0x00000478
+#define TI_FPREGS 0x00000480
/* We embed this in the uppermost byte of thread_info->flags */
#define FAULT_CODE_WRITE 0x01 /* Write access, implies D-TLB */
@@ -124,9 +121,6 @@ struct thread_info {
.current_ds = ASI_P, \
.exec_domain = &default_exec_domain, \
.preempt_count = INIT_PREEMPT_COUNT, \
- .restart_block = { \
- .fn = do_no_restart_syscall, \
- }, \
}
#define init_thread_info (init_thread_union.thread_info)
diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c
index 62deba7be1a9..4eed773a7735 100644
--- a/arch/sparc/kernel/signal32.c
+++ b/arch/sparc/kernel/signal32.c
@@ -150,7 +150,7 @@ void do_sigreturn32(struct pt_regs *regs)
int err, i;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
synchronize_user_stack();
@@ -235,7 +235,7 @@ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs)
int err, i;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
synchronize_user_stack();
regs->u_regs[UREG_FP] &= 0x00000000ffffffffUL;
diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c
index 9ee72fc8e0e4..52aa5e4ce5e7 100644
--- a/arch/sparc/kernel/signal_32.c
+++ b/arch/sparc/kernel/signal_32.c
@@ -70,7 +70,7 @@ asmlinkage void do_sigreturn(struct pt_regs *regs)
int err;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
synchronize_user_stack();
diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c
index 1a6999868031..d88beff47bab 100644
--- a/arch/sparc/kernel/signal_64.c
+++ b/arch/sparc/kernel/signal_64.c
@@ -254,7 +254,7 @@ void do_rt_sigreturn(struct pt_regs *regs)
int err;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
synchronize_user_stack ();
sf = (struct rt_signal_frame __user *)
diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c
index 981a769b9558..a27651e866e7 100644
--- a/arch/sparc/kernel/traps_64.c
+++ b/arch/sparc/kernel/traps_64.c
@@ -2730,8 +2730,6 @@ void __init trap_init(void)
TI_NEW_CHILD != offsetof(struct thread_info, new_child) ||
TI_CURRENT_DS != offsetof(struct thread_info,
current_ds) ||
- TI_RESTART_BLOCK != offsetof(struct thread_info,
- restart_block) ||
TI_KUNA_REGS != offsetof(struct thread_info,
kern_una_regs) ||
TI_KUNA_INSN != offsetof(struct thread_info,
diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h
index 5d1950788c69..bc75b6ef2e79 100644
--- a/arch/tile/include/asm/pgtable.h
+++ b/arch/tile/include/asm/pgtable.h
@@ -285,17 +285,6 @@ extern void start_mm_caching(struct mm_struct *mm);
extern void check_mm_caching(struct mm_struct *prev, struct mm_struct *next);
/*
- * Support non-linear file mappings (see sys_remap_file_pages).
- * This is defined by CLIENT1 set but CLIENT0 and _PAGE_PRESENT clear, and the
- * file offset in the 32 high bits.
- */
-#define _PAGE_FILE HV_PTE_CLIENT1
-#define PTE_FILE_MAX_BITS 32
-#define pte_file(pte) (hv_pte_get_client1(pte) && !hv_pte_get_client0(pte))
-#define pte_to_pgoff(pte) ((pte).val >> 32)
-#define pgoff_to_pte(off) ((pte_t) { (((long long)(off)) << 32) | _PAGE_FILE })
-
-/*
* Encode and de-code a swap entry (see <linux/swapops.h>).
* We put the swap file type+offset in the 32 high bits;
* I believe we can just leave the low bits clear.
diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h
index 48e4fd0f38e4..96c14c1430d8 100644
--- a/arch/tile/include/asm/thread_info.h
+++ b/arch/tile/include/asm/thread_info.h
@@ -36,7 +36,6 @@ struct thread_info {
mm_segment_t addr_limit; /* thread address space
(KERNEL_DS or USER_DS) */
- struct restart_block restart_block;
struct single_step_state *step_state; /* single step state
(if non-zero) */
int align_ctl; /* controls unaligned access */
@@ -57,9 +56,6 @@ struct thread_info {
.cpu = 0, \
.preempt_count = INIT_PREEMPT_COUNT, \
.addr_limit = KERNEL_DS, \
- .restart_block = { \
- .fn = do_no_restart_syscall, \
- }, \
.step_state = NULL, \
.align_ctl = 0, \
}
diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c
index bb0a9ce7ae23..8a524e332c1a 100644
--- a/arch/tile/kernel/signal.c
+++ b/arch/tile/kernel/signal.c
@@ -48,7 +48,7 @@ int restore_sigcontext(struct pt_regs *regs,
int err;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
/*
* Enforce that sigcontext is like pt_regs, and doesn't mess
diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c
index cd3387370ebb..0029b3fb651b 100644
--- a/arch/tile/mm/homecache.c
+++ b/arch/tile/mm/homecache.c
@@ -263,10 +263,6 @@ static int pte_to_home(pte_t pte)
/* Update the home of a PTE if necessary (can also be used for a pgprot_t). */
pte_t pte_set_home(pte_t pte, int home)
{
- /* Check for non-linear file mapping "PTEs" and pass them through. */
- if (pte_file(pte))
- return pte;
-
#if CHIP_HAS_MMIO()
/* Check for MMIO mappings and pass them through. */
if (hv_pte_get_mode(pte) == HV_PTE_MODE_MMIO)
diff --git a/arch/um/include/asm/pgtable-2level.h b/arch/um/include/asm/pgtable-2level.h
index f534b73e753e..7afe86035fa7 100644
--- a/arch/um/include/asm/pgtable-2level.h
+++ b/arch/um/include/asm/pgtable-2level.h
@@ -41,13 +41,4 @@ static inline void pgd_mkuptodate(pgd_t pgd) { }
#define pfn_pte(pfn, prot) __pte(pfn_to_phys(pfn) | pgprot_val(prot))
#define pfn_pmd(pfn, prot) __pmd(pfn_to_phys(pfn) | pgprot_val(prot))
-/*
- * Bits 0 through 4 are taken
- */
-#define PTE_FILE_MAX_BITS 27
-
-#define pte_to_pgoff(pte) (pte_val(pte) >> 5)
-
-#define pgoff_to_pte(off) ((pte_t) { ((off) << 5) + _PAGE_FILE })
-
#endif
diff --git a/arch/um/include/asm/pgtable-3level.h b/arch/um/include/asm/pgtable-3level.h
index 0032f9212e74..344c559c0a17 100644
--- a/arch/um/include/asm/pgtable-3level.h
+++ b/arch/um/include/asm/pgtable-3level.h
@@ -112,25 +112,5 @@ static inline pmd_t pfn_pmd(pfn_t page_nr, pgprot_t pgprot)
return __pmd((page_nr << PAGE_SHIFT) | pgprot_val(pgprot));
}
-/*
- * Bits 0 through 3 are taken in the low part of the pte,
- * put the 32 bits of offset into the high part.
- */
-#define PTE_FILE_MAX_BITS 32
-
-#ifdef CONFIG_64BIT
-
-#define pte_to_pgoff(p) ((p).pte >> 32)
-
-#define pgoff_to_pte(off) ((pte_t) { ((off) << 32) | _PAGE_FILE })
-
-#else
-
-#define pte_to_pgoff(pte) ((pte).pte_high)
-
-#define pgoff_to_pte(off) ((pte_t) { _PAGE_FILE, (off) })
-
-#endif
-
#endif
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index bf974f712af7..2324b624f195 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -18,7 +18,6 @@
#define _PAGE_ACCESSED 0x080
#define _PAGE_DIRTY 0x100
/* If _PAGE_PRESENT is clear, we use these: */
-#define _PAGE_FILE 0x008 /* nonlinear file mapping, saved PTE; unset:swap */
#define _PAGE_PROTNONE 0x010 /* if the user mapped it with PROT_NONE;
pte_present gives true */
@@ -151,14 +150,6 @@ static inline int pte_write(pte_t pte)
!(pte_get_bits(pte, _PAGE_PROTNONE)));
}
-/*
- * The following only works if pte_present() is not true.
- */
-static inline int pte_file(pte_t pte)
-{
- return pte_get_bits(pte, _PAGE_FILE);
-}
-
static inline int pte_dirty(pte_t pte)
{
return pte_get_bits(pte, _PAGE_DIRTY);
diff --git a/arch/um/include/asm/thread_info.h b/arch/um/include/asm/thread_info.h
index 1c5b2a83046a..e04114c4fcd9 100644
--- a/arch/um/include/asm/thread_info.h
+++ b/arch/um/include/asm/thread_info.h
@@ -22,7 +22,6 @@ struct thread_info {
mm_segment_t addr_limit; /* thread address space:
0-0xBFFFFFFF for user
0-0xFFFFFFFF for kernel */
- struct restart_block restart_block;
struct thread_info *real_thread; /* Points to non-IRQ stack */
};
@@ -34,9 +33,6 @@ struct thread_info {
.cpu = 0, \
.preempt_count = INIT_PREEMPT_COUNT, \
.addr_limit = KERNEL_DS, \
- .restart_block = { \
- .fn = do_no_restart_syscall, \
- }, \
.real_thread = NULL, \
}
diff --git a/arch/unicore32/include/asm/pgtable-hwdef.h b/arch/unicore32/include/asm/pgtable-hwdef.h
index 7314e859cca0..e37fa471c2be 100644
--- a/arch/unicore32/include/asm/pgtable-hwdef.h
+++ b/arch/unicore32/include/asm/pgtable-hwdef.h
@@ -44,7 +44,6 @@
#define PTE_TYPE_INVALID (3 << 0)
#define PTE_PRESENT (1 << 2)
-#define PTE_FILE (1 << 3) /* only when !PRESENT */
#define PTE_YOUNG (1 << 3)
#define PTE_DIRTY (1 << 4)
#define PTE_CACHEABLE (1 << 5)
diff --git a/arch/unicore32/include/asm/pgtable.h b/arch/unicore32/include/asm/pgtable.h
index ed6f7d000fba..818d0f5598e3 100644
--- a/arch/unicore32/include/asm/pgtable.h
+++ b/arch/unicore32/include/asm/pgtable.h
@@ -283,20 +283,6 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
#define MAX_SWAPFILES_CHECK() \
BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS)
-/*
- * Encode and decode a file entry. File entries are stored in the Linux
- * page tables as follows:
- *
- * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
- * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- * <----------------------- offset ----------------------> 1 0 0 0
- */
-#define pte_file(pte) (pte_val(pte) & PTE_FILE)
-#define pte_to_pgoff(x) (pte_val(x) >> 4)
-#define pgoff_to_pte(x) __pte(((x) << 4) | PTE_FILE)
-
-#define PTE_FILE_MAX_BITS 28
-
/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
/* FIXME: this is not correct */
#define kern_addr_valid(addr) (1)
diff --git a/arch/unicore32/include/asm/thread_info.h b/arch/unicore32/include/asm/thread_info.h
index af36d8eabdf1..63e2839dfeb8 100644
--- a/arch/unicore32/include/asm/thread_info.h
+++ b/arch/unicore32/include/asm/thread_info.h
@@ -79,7 +79,6 @@ struct thread_info {
#ifdef CONFIG_UNICORE_FPU_F64
struct fp_state fpstate __attribute__((aligned(8)));
#endif
- struct restart_block restart_block;
};
#define INIT_THREAD_INFO(tsk) \
@@ -89,9 +88,6 @@ struct thread_info {
.flags = 0, \
.preempt_count = INIT_PREEMPT_COUNT, \
.addr_limit = KERNEL_DS, \
- .restart_block = { \
- .fn = do_no_restart_syscall, \
- }, \
}
#define init_thread_info (init_thread_union.thread_info)
diff --git a/arch/unicore32/kernel/signal.c b/arch/unicore32/kernel/signal.c
index 7c8fb7018dc6..d329f85766cc 100644
--- a/arch/unicore32/kernel/signal.c
+++ b/arch/unicore32/kernel/signal.c
@@ -105,7 +105,7 @@ asmlinkage int __sys_rt_sigreturn(struct pt_regs *regs)
struct rt_sigframe __user *frame;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
/*
* Since we stacked the signal on a 64-bit boundary,
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index f9e181aaba97..d0165c9a2932 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -169,7 +169,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
u32 tmp;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
get_user_try {
/*
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h
index 206a87fdd22d..fd74a11959de 100644
--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -62,44 +62,8 @@ static inline unsigned long pte_bitop(unsigned long value, unsigned int rightshi
return ((value >> rightshift) & mask) << leftshift;
}
-/*
- * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE and _PAGE_BIT_PROTNONE are taken,
- * split up the 29 bits of offset into this range.
- */
-#define PTE_FILE_MAX_BITS 29
-#define PTE_FILE_SHIFT1 (_PAGE_BIT_PRESENT + 1)
-#define PTE_FILE_SHIFT2 (_PAGE_BIT_FILE + 1)
-#define PTE_FILE_SHIFT3 (_PAGE_BIT_PROTNONE + 1)
-#define PTE_FILE_BITS1 (PTE_FILE_SHIFT2 - PTE_FILE_SHIFT1 - 1)
-#define PTE_FILE_BITS2 (PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1)
-
-#define PTE_FILE_MASK1 ((1U << PTE_FILE_BITS1) - 1)
-#define PTE_FILE_MASK2 ((1U << PTE_FILE_BITS2) - 1)
-
-#define PTE_FILE_LSHIFT2 (PTE_FILE_BITS1)
-#define PTE_FILE_LSHIFT3 (PTE_FILE_BITS1 + PTE_FILE_BITS2)
-
-static __always_inline pgoff_t pte_to_pgoff(pte_t pte)
-{
- return (pgoff_t)
- (pte_bitop(pte.pte_low, PTE_FILE_SHIFT1, PTE_FILE_MASK1, 0) +
- pte_bitop(pte.pte_low, PTE_FILE_SHIFT2, PTE_FILE_MASK2, PTE_FILE_LSHIFT2) +
- pte_bitop(pte.pte_low, PTE_FILE_SHIFT3, -1UL, PTE_FILE_LSHIFT3));
-}
-
-static __always_inline pte_t pgoff_to_pte(pgoff_t off)
-{
- return (pte_t){
- .pte_low =
- pte_bitop(off, 0, PTE_FILE_MASK1, PTE_FILE_SHIFT1) +
- pte_bitop(off, PTE_FILE_LSHIFT2, PTE_FILE_MASK2, PTE_FILE_SHIFT2) +
- pte_bitop(off, PTE_FILE_LSHIFT3, -1UL, PTE_FILE_SHIFT3) +
- _PAGE_FILE,
- };
-}
-
/* Encode and de-code a swap entry */
-#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
+#define SWP_TYPE_BITS 5
#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1)
#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 81bb91b49a88..cdaa58c9b39e 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -176,18 +176,6 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp)
#define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp)
#endif
-/*
- * Bits 0, 6 and 7 are taken in the low part of the pte,
- * put the 32 bits of offset into the high part.
- *
- * For soft-dirty tracking 11 bit is taken from
- * the low part of pte as well.
- */
-#define pte_to_pgoff(pte) ((pte).pte_high)
-#define pgoff_to_pte(off) \
- ((pte_t) { { .pte_low = _PAGE_FILE, .pte_high = (off) } })
-#define PTE_FILE_MAX_BITS 32
-
/* Encode and de-code a swap entry */
#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5)
#define __swp_type(x) (((x).val) & 0x1f)
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index e8a5454acc99..0c4bde7b9b5e 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -115,11 +115,6 @@ static inline int pte_write(pte_t pte)
return pte_flags(pte) & _PAGE_RW;
}
-static inline int pte_file(pte_t pte)
-{
- return pte_flags(pte) & _PAGE_FILE;
-}
-
static inline int pte_huge(pte_t pte)
{
return pte_flags(pte) & _PAGE_PSE;
@@ -278,6 +273,11 @@ static inline pmd_t pmd_mkold(pmd_t pmd)
return pmd_clear_flags(pmd, _PAGE_ACCESSED);
}
+static inline pmd_t pmd_mkclean(pmd_t pmd)
+{
+ return pmd_clear_flags(pmd, _PAGE_DIRTY);
+}
+
static inline pmd_t pmd_wrprotect(pmd_t pmd)
{
return pmd_clear_flags(pmd, _PAGE_RW);
@@ -329,21 +329,6 @@ static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY);
}
-static inline pte_t pte_file_clear_soft_dirty(pte_t pte)
-{
- return pte_clear_flags(pte, _PAGE_SOFT_DIRTY);
-}
-
-static inline pte_t pte_file_mksoft_dirty(pte_t pte)
-{
- return pte_set_flags(pte, _PAGE_SOFT_DIRTY);
-}
-
-static inline int pte_file_soft_dirty(pte_t pte)
-{
- return pte_flags(pte) & _PAGE_SOFT_DIRTY;
-}
-
#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
/*
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 4572b2f30237..e227970f983e 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -133,10 +133,6 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
/* PUD - Level3 access */
/* PMD - Level 2 access */
-#define pte_to_pgoff(pte) ((pte_val((pte)) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)
-#define pgoff_to_pte(off) ((pte_t) { .pte = ((off) << PAGE_SHIFT) | \
- _PAGE_FILE })
-#define PTE_FILE_MAX_BITS __PHYSICAL_MASK_SHIFT
/* PTE - Level 1 access. */
@@ -145,7 +141,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
#define pte_unmap(pte) ((void)(pte))/* NOP */
/* Encode and de-code a swap entry */
-#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
+#define SWP_TYPE_BITS 5
#ifdef CONFIG_NUMA_BALANCING
/* Automatic NUMA balancing needs to be distinguishable from swap entries */
#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 2)
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 25bcd4a89517..5185a4f599ec 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -38,8 +38,6 @@
/* If _PAGE_BIT_PRESENT is clear, we use these: */
/* - if the user mapped it with PROT_NONE; pte_present gives true */
#define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL
-/* - set: nonlinear file mapping, saved PTE; unset:swap */
-#define _PAGE_BIT_FILE _PAGE_BIT_DIRTY
#define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT)
#define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW)
@@ -114,7 +112,6 @@
#define _PAGE_NX (_AT(pteval_t, 0))
#endif
-#define _PAGE_FILE (_AT(pteval_t, 1) << _PAGE_BIT_FILE)
#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 8b13b0fbda8e..1a8f1fb1ed44 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -31,7 +31,6 @@ struct thread_info {
__u32 cpu; /* current CPU */
int saved_preempt_count;
mm_segment_t addr_limit;
- struct restart_block restart_block;
void __user *sysenter_return;
unsigned int sig_on_uaccess_error:1;
unsigned int uaccess_err:1; /* uaccess failed */
@@ -45,9 +44,6 @@ struct thread_info {
.cpu = 0, \
.saved_preempt_count = INIT_PREEMPT_COUNT, \
.addr_limit = KERNEL_DS, \
- .restart_block = { \
- .fn = do_no_restart_syscall, \
- }, \
}
#define init_thread_info (init_thread_union.thread_info)
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 415480d3ea84..ab48c252e661 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -335,6 +335,7 @@ void arch_crash_save_vmcoreinfo(void)
#endif
vmcoreinfo_append_str("KERNELOFFSET=%lx\n",
(unsigned long)&_text - __START_KERNEL);
+ VMCOREINFO_PHYS_BASE(phys_base);
}
/* arch-dependent functionality related to kexec file-based syscall */
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index ed37a768d0fc..0a62df4abcf7 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -69,7 +69,7 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
unsigned int err = 0;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
get_user_try {
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index 79d824551c1a..0c8c32bfd792 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c
@@ -157,7 +157,7 @@ static int copy_sc_from_user(struct pt_regs *regs,
int err, pid;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
err = copy_from_user(&sc, from, sizeof(sc));
if (err)
diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h
index 872bf0194e6d..01b80dce9d65 100644
--- a/arch/xtensa/include/asm/pgtable.h
+++ b/arch/xtensa/include/asm/pgtable.h
@@ -89,8 +89,6 @@
* (PAGE_NONE)| PPN | 0 | 00 | ADW | 01 | 11 | 11 |
* +-----------------------------------------+
* swap | index | type | 01 | 11 | 00 |
- * +- - - - - - - - - - - - - - - - - - - - -+
- * file | file offset | 01 | 11 | 10 |
* +-----------------------------------------+
*
* For T1050 hardware and earlier the layout differs for present and (PAGE_NONE)
@@ -111,7 +109,6 @@
* index swap offset / PAGE_SIZE (bit 11-31: 21 bits -> 8 GB)
* (note that the index is always non-zero)
* type swap type (5 bits -> 32 types)
- * file offset 26-bit offset into the file, in increments of PAGE_SIZE
*
* Notes:
* - (PROT_NONE) is a special case of 'present' but causes an exception for
@@ -144,7 +141,6 @@
#define _PAGE_HW_VALID 0x00
#define _PAGE_NONE 0x0f
#endif
-#define _PAGE_FILE (1<<1) /* file mapped page, only if !present */
#define _PAGE_USER (1<<4) /* user access (ring=1) */
@@ -260,7 +256,6 @@ static inline void pgtable_cache_init(void) { }
static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITABLE; }
static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
-static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
static inline int pte_special(pte_t pte) { return 0; }
static inline pte_t pte_wrprotect(pte_t pte)
@@ -390,11 +385,6 @@ ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-#define PTE_FILE_MAX_BITS 26
-#define pte_to_pgoff(pte) (pte_val(pte) >> 6)
-#define pgoff_to_pte(off) \
- ((pte_t) { ((off) << 6) | _PAGE_CA_INVALID | _PAGE_FILE | _PAGE_USER })
-
#endif /* !defined (__ASSEMBLY__) */
diff --git a/arch/xtensa/include/asm/thread_info.h b/arch/xtensa/include/asm/thread_info.h
index 470153e8547c..a9b5d3ba196c 100644
--- a/arch/xtensa/include/asm/thread_info.h
+++ b/arch/xtensa/include/asm/thread_info.h
@@ -51,7 +51,6 @@ struct thread_info {
__s32 preempt_count; /* 0 => preemptable,< 0 => BUG*/
mm_segment_t addr_limit; /* thread address space */
- struct restart_block restart_block;
unsigned long cpenable;
@@ -72,7 +71,6 @@ struct thread_info {
#define TI_CPU 0x00000010
#define TI_PRE_COUNT 0x00000014
#define TI_ADDR_LIMIT 0x00000018
-#define TI_RESTART_BLOCK 0x000001C
#endif
@@ -90,9 +88,6 @@ struct thread_info {
.cpu = 0, \
.preempt_count = INIT_PREEMPT_COUNT, \
.addr_limit = KERNEL_DS, \
- .restart_block = { \
- .fn = do_no_restart_syscall, \
- }, \
}
#define init_thread_info (init_thread_union.thread_info)
diff --git a/arch/xtensa/include/uapi/asm/mman.h b/arch/xtensa/include/uapi/asm/mman.h
index 201aec0e0446..1b19f25bc567 100644
--- a/arch/xtensa/include/uapi/asm/mman.h
+++ b/arch/xtensa/include/uapi/asm/mman.h
@@ -80,6 +80,7 @@
#define MADV_SEQUENTIAL 2 /* expect sequential page references */
#define MADV_WILLNEED 3 /* will need these pages */
#define MADV_DONTNEED 4 /* don't need these pages */
+#define MADV_FREE 5 /* free pages only if memory pressure */
/* common parameters: try to keep these consistent across architectures */
#define MADV_REMOVE 9 /* remove these pages & resources */
diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c
index 4612321c73cc..3d733ba16f28 100644
--- a/arch/xtensa/kernel/signal.c
+++ b/arch/xtensa/kernel/signal.c
@@ -245,7 +245,7 @@ asmlinkage long xtensa_rt_sigreturn(long a0, long a1, long a2, long a3,
int ret;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
if (regs->depc > 64)
panic("rt_sigreturn in double exception!\n");
diff --git a/block/genhd.c b/block/genhd.c
index 0a536dc05f3b..64600e911aaa 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -850,7 +850,7 @@ static int show_partition(struct seq_file *seqf, void *v)
char buf[BDEVNAME_SIZE];
/* Don't show non-partitionable removeable devices or empty devices */
- if (!get_capacity(sgp) || (!disk_max_parts(sgp) &&
+ if (!get_capacity(sgp) || (!(disk_max_parts(sgp) > 1) &&
(sgp->flags & GENHD_FL_REMOVABLE)))
return 0;
if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index bd8bda386e02..9250b3f54a8f 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -314,35 +314,35 @@ static void zram_meta_free(struct zram_meta *meta)
kfree(meta);
}
-static struct zram_meta *zram_meta_alloc(u64 disksize)
+static struct zram_meta *zram_meta_alloc(int device_id, u64 disksize)
{
size_t num_pages;
+ char pool_name[8];
struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL);
+
if (!meta)
- goto out;
+ return NULL;
num_pages = disksize >> PAGE_SHIFT;
meta->table = vzalloc(num_pages * sizeof(*meta->table));
if (!meta->table) {
pr_err("Error allocating zram address table\n");
- goto free_meta;
+ goto out_error;
}
- meta->mem_pool = zs_create_pool(GFP_NOIO | __GFP_HIGHMEM);
+ snprintf(pool_name, sizeof(pool_name), "zram%d", device_id);
+ meta->mem_pool = zs_create_pool(pool_name, GFP_NOIO | __GFP_HIGHMEM);
if (!meta->mem_pool) {
pr_err("Error creating memory pool\n");
- goto free_table;
+ goto out_error;
}
return meta;
-free_table:
+out_error:
vfree(meta->table);
-free_meta:
kfree(meta);
- meta = NULL;
-out:
- return meta;
+ return NULL;
}
static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
@@ -765,7 +765,7 @@ static ssize_t disksize_store(struct device *dev,
return -EINVAL;
disksize = PAGE_ALIGN(disksize);
- meta = zram_meta_alloc(disksize);
+ meta = zram_meta_alloc(zram->disk->first_minor, disksize);
if (!meta)
return -ENOMEM;
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index efefd12a0f7b..a4af8221751e 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -6,6 +6,15 @@ menu "Character devices"
source "drivers/tty/Kconfig"
+config DEVMEM
+ bool "/dev/mem virtual device support"
+ default y
+ help
+ Say Y here if you want to support the /dev/mem device.
+ The /dev/mem device is used to access areas of physical
+ memory.
+ When in doubt, say "Y".
+
config DEVKMEM
bool "/dev/kmem virtual device support"
default y
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 4c58333b4257..40c1580ecd47 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -341,7 +341,6 @@ static int mmap_mem(struct file *file, struct vm_area_struct *vma)
return 0;
}
-#ifdef CONFIG_DEVKMEM
static int mmap_kmem(struct file *file, struct vm_area_struct *vma)
{
unsigned long pfn;
@@ -362,9 +361,7 @@ static int mmap_kmem(struct file *file, struct vm_area_struct *vma)
vma->vm_pgoff = pfn;
return mmap_mem(file, vma);
}
-#endif
-#ifdef CONFIG_DEVKMEM
/*
* This function reads the *virtual* memory as seen by the kernel.
*/
@@ -544,9 +541,7 @@ static ssize_t write_kmem(struct file *file, const char __user *buf,
*ppos = p;
return virtr + wrote ? : err;
}
-#endif
-#ifdef CONFIG_DEVPORT
static ssize_t read_port(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
@@ -587,7 +582,6 @@ static ssize_t write_port(struct file *file, const char __user *buf,
*ppos = i;
return tmp-buf;
}
-#endif
static ssize_t read_null(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
@@ -715,7 +709,7 @@ static int open_port(struct inode *inode, struct file *filp)
#define open_mem open_port
#define open_kmem open_mem
-static const struct file_operations mem_fops = {
+static const struct file_operations __maybe_unused mem_fops = {
.llseek = memory_lseek,
.read = read_mem,
.write = write_mem,
@@ -724,8 +718,7 @@ static const struct file_operations mem_fops = {
.get_unmapped_area = get_unmapped_area_mem,
};
-#ifdef CONFIG_DEVKMEM
-static const struct file_operations kmem_fops = {
+static const struct file_operations __maybe_unused kmem_fops = {
.llseek = memory_lseek,
.read = read_kmem,
.write = write_kmem,
@@ -733,7 +726,6 @@ static const struct file_operations kmem_fops = {
.open = open_kmem,
.get_unmapped_area = get_unmapped_area_mem,
};
-#endif
static const struct file_operations null_fops = {
.llseek = null_lseek,
@@ -744,14 +736,12 @@ static const struct file_operations null_fops = {
.splice_write = splice_write_null,
};
-#ifdef CONFIG_DEVPORT
-static const struct file_operations port_fops = {
+static const struct file_operations __maybe_unused port_fops = {
.llseek = memory_lseek,
.read = read_port,
.write = write_port,
.open = open_port,
};
-#endif
static const struct file_operations zero_fops = {
.llseek = zero_lseek,
@@ -785,7 +775,9 @@ static const struct memdev {
const struct file_operations *fops;
struct backing_dev_info *dev_info;
} devlist[] = {
+#ifdef CONFIG_DEVMEM
[1] = { "mem", 0, &mem_fops, &directly_mappable_cdev_bdi },
+#endif
#ifdef CONFIG_DEVKMEM
[2] = { "kmem", 0, &kmem_fops, &directly_mappable_cdev_bdi },
#endif
diff --git a/drivers/gpu/drm/drm_vma_manager.c b/drivers/gpu/drm/drm_vma_manager.c
index 63b471205072..68c1f32fb086 100644
--- a/drivers/gpu/drm/drm_vma_manager.c
+++ b/drivers/gpu/drm/drm_vma_manager.c
@@ -50,8 +50,7 @@
*
* You must not use multiple offset managers on a single address_space.
* Otherwise, mm-core will be unable to tear down memory mappings as the VM will
- * no longer be linear. Please use VM_NONLINEAR in that case and implement your
- * own offset managers.
+ * no longer be linear.
*
* This offset manager works on page-based addresses. That is, every argument
* and return code (with the exception of drm_vma_node_offset_addr()) is given
diff --git a/drivers/input/Kconfig b/drivers/input/Kconfig
index a11ff74a5127..9eac8de9e8b7 100644
--- a/drivers/input/Kconfig
+++ b/drivers/input/Kconfig
@@ -178,6 +178,15 @@ comment "Input Device Drivers"
source "drivers/input/keyboard/Kconfig"
+config INPUT_LEDS
+ bool "LED Support"
+ depends on LEDS_CLASS = INPUT || LEDS_CLASS = y
+ select LEDS_TRIGGERS
+ default y
+ help
+ This option enables support for LEDs on keyboards managed
+ by the input layer.
+
source "drivers/input/mouse/Kconfig"
source "drivers/input/joystick/Kconfig"
diff --git a/drivers/input/Makefile b/drivers/input/Makefile
index 5ca3f631497f..2ab5f3336da5 100644
--- a/drivers/input/Makefile
+++ b/drivers/input/Makefile
@@ -6,6 +6,9 @@
obj-$(CONFIG_INPUT) += input-core.o
input-core-y := input.o input-compat.o input-mt.o ff-core.o
+ifeq ($(CONFIG_INPUT_LEDS),y)
+input-core-y += leds.o
+endif
obj-$(CONFIG_INPUT_FF_MEMLESS) += ff-memless.o
obj-$(CONFIG_INPUT_POLLDEV) += input-polldev.o
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 213e3a1903ee..a1e609a65222 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -711,6 +711,9 @@ static void input_disconnect_device(struct input_dev *dev)
handle->open = 0;
spin_unlock_irq(&dev->event_lock);
+
+ if (is_event_supported(EV_LED, dev->evbit, EV_MAX))
+ input_led_disconnect(dev);
}
/**
@@ -2141,6 +2144,9 @@ int input_register_device(struct input_dev *dev)
list_add_tail(&dev->node, &input_dev_list);
+ if (is_event_supported(EV_LED, dev->evbit, EV_MAX))
+ input_led_connect(dev);
+
list_for_each_entry(handler, &input_handler_list, node)
input_attach_handler(dev, handler);
@@ -2426,6 +2432,8 @@ static int __init input_init(void)
goto fail2;
}
+ input_led_init();
+
return 0;
fail2: input_proc_exit();
@@ -2435,6 +2443,7 @@ static int __init input_init(void)
static void __exit input_exit(void)
{
+ input_led_exit();
input_proc_exit();
unregister_chrdev_region(MKDEV(INPUT_MAJOR, 0),
INPUT_MAX_CHAR_DEVICES);
diff --git a/drivers/input/leds.c b/drivers/input/leds.c
new file mode 100644
index 000000000000..193ee2425db1
--- /dev/null
+++ b/drivers/input/leds.c
@@ -0,0 +1,272 @@
+/*
+ * LED support for the input layer
+ *
+ * Copyright 2010-2014 Samuel Thibault <samuel.thibault@ens-lyon.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/leds.h>
+#include <linux/input.h>
+
+/*
+ * Keyboard LEDs are propagated by default like the following example:
+ *
+ * VT keyboard numlock trigger
+ * -> vt::numl VT LED
+ * -> vt-numl VT trigger
+ * -> per-device inputX::numl LED
+ *
+ * Userland can however choose the trigger for the vt::numl LED, or
+ * independently choose the trigger for any inputx::numl LED.
+ *
+ *
+ * VT LED classes and triggers are registered on-demand according to
+ * existing LED devices
+ */
+
+/* Handler for VT LEDs, just triggers the corresponding VT trigger. */
+static void vt_led_set(struct led_classdev *cdev,
+ enum led_brightness brightness);
+static struct led_classdev vt_leds[LED_CNT] = {
+#define DEFINE_INPUT_LED(vt_led, nam, deftrig) \
+ [vt_led] = { \
+ .name = "vt::"nam, \
+ .max_brightness = 1, \
+ .brightness_set = vt_led_set, \
+ .default_trigger = deftrig, \
+ }
+/* Default triggers for the VT LEDs just correspond to the legacy
+ * usage. */
+ DEFINE_INPUT_LED(LED_NUML, "numl", "kbd-numlock"),
+ DEFINE_INPUT_LED(LED_CAPSL, "capsl", "kbd-capslock"),
+ DEFINE_INPUT_LED(LED_SCROLLL, "scrolll", "kbd-scrollock"),
+ DEFINE_INPUT_LED(LED_COMPOSE, "compose", NULL),
+ DEFINE_INPUT_LED(LED_KANA, "kana", "kbd-kanalock"),
+ DEFINE_INPUT_LED(LED_SLEEP, "sleep", NULL),
+ DEFINE_INPUT_LED(LED_SUSPEND, "suspend", NULL),
+ DEFINE_INPUT_LED(LED_MUTE, "mute", NULL),
+ DEFINE_INPUT_LED(LED_MISC, "misc", NULL),
+ DEFINE_INPUT_LED(LED_MAIL, "mail", NULL),
+ DEFINE_INPUT_LED(LED_CHARGING, "charging", NULL),
+};
+static const char *const vt_led_names[LED_CNT] = {
+ [LED_NUML] = "numl",
+ [LED_CAPSL] = "capsl",
+ [LED_SCROLLL] = "scrolll",
+ [LED_COMPOSE] = "compose",
+ [LED_KANA] = "kana",
+ [LED_SLEEP] = "sleep",
+ [LED_SUSPEND] = "suspend",
+ [LED_MUTE] = "mute",
+ [LED_MISC] = "misc",
+ [LED_MAIL] = "mail",
+ [LED_CHARGING] = "charging",
+};
+/* Handler for hotplug initialization */
+static void vt_led_trigger_activate(struct led_classdev *cdev);
+/* VT triggers */
+static struct led_trigger vt_led_triggers[LED_CNT] = {
+#define DEFINE_INPUT_LED_TRIGGER(vt_led, nam) \
+ [vt_led] = { \
+ .name = "vt-"nam, \
+ .activate = vt_led_trigger_activate, \
+ }
+ DEFINE_INPUT_LED_TRIGGER(LED_NUML, "numl"),
+ DEFINE_INPUT_LED_TRIGGER(LED_CAPSL, "capsl"),
+ DEFINE_INPUT_LED_TRIGGER(LED_SCROLLL, "scrolll"),
+ DEFINE_INPUT_LED_TRIGGER(LED_COMPOSE, "compose"),
+ DEFINE_INPUT_LED_TRIGGER(LED_KANA, "kana"),
+ DEFINE_INPUT_LED_TRIGGER(LED_SLEEP, "sleep"),
+ DEFINE_INPUT_LED_TRIGGER(LED_SUSPEND, "suspend"),
+ DEFINE_INPUT_LED_TRIGGER(LED_MUTE, "mute"),
+ DEFINE_INPUT_LED_TRIGGER(LED_MISC, "misc"),
+ DEFINE_INPUT_LED_TRIGGER(LED_MAIL, "mail"),
+ DEFINE_INPUT_LED_TRIGGER(LED_CHARGING, "charging"),
+};
+
+/* Lock for registration coherency */
+static DEFINE_MUTEX(vt_led_registered_lock);
+
+/* Which VT LED classes and triggers are registered */
+static unsigned long vt_led_registered[BITS_TO_LONGS(LED_CNT)];
+
+/* Number of input devices having each LED */
+static int vt_led_references[LED_CNT];
+
+static int vt_led_state[LED_CNT];
+static struct work_struct vt_led_work[LED_CNT];
+
+static void vt_led_cb(struct work_struct *work)
+{
+ int led = work - vt_led_work;
+
+ led_trigger_event(&vt_led_triggers[led], vt_led_state[led]);
+}
+
+/* VT LED state change, tell the VT trigger. */
+static void vt_led_set(struct led_classdev *cdev,
+ enum led_brightness brightness)
+{
+ int led = cdev - vt_leds;
+
+ vt_led_state[led] = !!brightness;
+ schedule_work(&vt_led_work[led]);
+}
+
+/* LED state change for some keyboard, notify that keyboard. */
+static void perdevice_input_led_set(struct led_classdev *cdev,
+ enum led_brightness brightness)
+{
+ struct input_dev *dev;
+ struct led_classdev *leds;
+ int led;
+
+ dev = cdev->dev->platform_data;
+ if (!dev)
+ /* Still initializing */
+ return;
+ leds = dev->leds;
+ led = cdev - leds;
+
+ input_event(dev, EV_LED, led, !!brightness);
+ input_event(dev, EV_SYN, SYN_REPORT, 0);
+}
+
+/* Keyboard hotplug, initialize its LED status */
+static void vt_led_trigger_activate(struct led_classdev *cdev)
+{
+ struct led_trigger *trigger = cdev->trigger;
+ int led = trigger - vt_led_triggers;
+
+ if (cdev->brightness_set)
+ cdev->brightness_set(cdev, vt_leds[led].brightness);
+}
+
+/* Free led stuff from input device, used at abortion and disconnection. */
+static void input_led_delete(struct input_dev *dev)
+{
+ if (dev) {
+ struct led_classdev *leds = dev->leds;
+ if (leds) {
+ int i;
+ for (i = 0; i < LED_CNT; i++)
+ kfree(leds[i].name);
+ kfree(leds);
+ dev->leds = NULL;
+ }
+ }
+}
+
+/* A new input device with potential LEDs to connect. */
+int input_led_connect(struct input_dev *dev)
+{
+ int i, error = 0;
+ struct led_classdev *leds;
+
+ dev->leds = leds = kcalloc(LED_CNT, sizeof(*leds), GFP_KERNEL);
+ if (!dev->leds)
+ return -ENOMEM;
+
+ /* lazily register missing VT LEDs */
+ mutex_lock(&vt_led_registered_lock);
+ for (i = 0; i < LED_CNT; i++)
+ if (vt_leds[i].name && test_bit(i, dev->ledbit)) {
+ if (!vt_led_references[i]) {
+ led_trigger_register(&vt_led_triggers[i]);
+ /* This keyboard is first to have led i,
+ * try to register it */
+ if (!led_classdev_register(NULL, &vt_leds[i]))
+ vt_led_references[i] = 1;
+ else
+ led_trigger_unregister(&vt_led_triggers[i]);
+ } else
+ vt_led_references[i]++;
+ }
+ mutex_unlock(&vt_led_registered_lock);
+
+ /* and register this device's LEDs */
+ for (i = 0; i < LED_CNT; i++)
+ if (vt_leds[i].name && test_bit(i, dev->ledbit)) {
+ leds[i].name = kasprintf(GFP_KERNEL, "%s::%s",
+ dev_name(&dev->dev),
+ vt_led_names[i]);
+ if (!leds[i].name) {
+ error = -ENOMEM;
+ goto err;
+ }
+ leds[i].max_brightness = 1;
+ leds[i].brightness_set = perdevice_input_led_set;
+ leds[i].default_trigger = vt_led_triggers[i].name;
+ }
+
+ /* No issue so far, we can register for real. */
+ for (i = 0; i < LED_CNT; i++)
+ if (leds[i].name) {
+ led_classdev_register(&dev->dev, &leds[i]);
+ leds[i].dev->platform_data = dev;
+ perdevice_input_led_set(&leds[i],
+ vt_leds[i].brightness);
+ }
+
+ return 0;
+
+err:
+ input_led_delete(dev);
+ return error;
+}
+
+/*
+ * Disconnected input device. Clean it, and deregister now-useless VT LEDs
+ * and triggers.
+ */
+void input_led_disconnect(struct input_dev *dev)
+{
+ int i;
+ struct led_classdev *leds = dev->leds;
+
+ for (i = 0; i < LED_CNT; i++)
+ if (leds[i].name)
+ led_classdev_unregister(&leds[i]);
+
+ input_led_delete(dev);
+
+ mutex_lock(&vt_led_registered_lock);
+ for (i = 0; i < LED_CNT; i++) {
+ if (!vt_leds[i].name || !test_bit(i, dev->ledbit))
+ continue;
+
+ vt_led_references[i]--;
+ if (vt_led_references[i]) {
+ /* Still some devices needing it */
+ continue;
+ }
+
+ led_classdev_unregister(&vt_leds[i]);
+ led_trigger_unregister(&vt_led_triggers[i]);
+ clear_bit(i, vt_led_registered);
+ }
+ mutex_unlock(&vt_led_registered_lock);
+}
+
+void __init input_led_init(void)
+{
+ unsigned i;
+
+ for (i = 0; i < LED_CNT; i++)
+ INIT_WORK(&vt_led_work[i], vt_led_cb);
+}
+
+void __exit input_led_exit(void)
+{
+ unsigned i;
+
+ for (i = 0; i < LED_CNT; i++)
+ cancel_work_sync(&vt_led_work[i]);
+}
diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index a6c3d2f153f3..59f4228cc9cd 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -11,9 +11,6 @@ menuconfig NEW_LEDS
Say Y to enable Linux LED support. This allows control of supported
LEDs from both userspace and optionally, by kernel events (triggers).
- This is not related to standard keyboard LEDs which are controlled
- via the input system.
-
if NEW_LEDS
config LEDS_CLASS
diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c
index 54be83d3efdd..374480cb69bf 100644
--- a/drivers/misc/ti-st/st_core.c
+++ b/drivers/misc/ti-st/st_core.c
@@ -343,7 +343,7 @@ void st_int_recv(void *disc_data,
/* Unknow packet? */
default:
type = *ptr;
- if (st_gdata->list[type] == NULL) {
+ if (type >= ST_MAX_CHANNELS || st_gdata->list[type] == NULL) {
pr_err("chip/interface misbehavior dropping"
" frame starting with 0x%02x", type);
goto done;
diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index 472a5adc4642..c7e09e27ae5d 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -55,6 +55,8 @@ static int rtc_suspend(struct device *dev)
struct timespec64 delta, delta_delta;
int err;
+ rtc->valid_alarm = !rtc_read_alarm(rtc, &rtc->alarm);
+
if (has_persistent_clock())
return 0;
@@ -102,6 +104,27 @@ static int rtc_resume(struct device *dev)
struct timespec64 sleep_time;
int err;
+ /*
+ * Ensure that the platform hasn't overwritten a pending alarm while
+ * suspended
+ */
+ if (rtc->valid_alarm) {
+ long now, scheduled;
+
+ rtc_read_time(rtc, &tm);
+ rtc_tm_to_time(&rtc->alarm.time, &scheduled);
+ rtc_tm_to_time(&tm, &now);
+
+ /* Clear the alarm registers if it went off during suspend */
+ if (scheduled <= now) {
+ rtc_time_to_tm(0, &rtc->alarm.time);
+ rtc->alarm.enabled = 0;
+ }
+
+ if (rtc->ops && rtc->ops->set_alarm)
+ rtc->ops->set_alarm(rtc->dev.parent, &rtc->alarm);
+ }
+
if (has_persistent_clock())
return 0;
@@ -145,6 +168,7 @@ static int rtc_resume(struct device *dev)
if (sleep_time.tv_sec >= 0)
timekeeping_inject_sleeptime64(&sleep_time);
rtc_hctosys_ret = 0;
+
return 0;
}
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index 45bfc28ee3aa..fa1f11906475 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -489,7 +489,10 @@ int rtc_update_irq_enable(struct rtc_device *rtc, unsigned int enabled)
struct rtc_time tm;
ktime_t now, onesec;
- __rtc_read_time(rtc, &tm);
+ err = __rtc_read_time(rtc, &tm);
+ if (err < 0)
+ goto out;
+
onesec = ktime_set(1, 0);
now = rtc_tm_to_ktime(tm);
rtc->uie_rtctimer.node.expires = ktime_add(now, onesec);
@@ -867,13 +870,17 @@ void rtc_timer_do_work(struct work_struct *work)
struct timerqueue_node *next;
ktime_t now;
struct rtc_time tm;
+ int err = 0;
struct rtc_device *rtc =
container_of(work, struct rtc_device, irqwork);
mutex_lock(&rtc->ops_lock);
again:
- __rtc_read_time(rtc, &tm);
+ err = __rtc_read_time(rtc, &tm);
+ if (err < 0)
+ goto out;
+
now = rtc_tm_to_ktime(tm);
while ((next = timerqueue_getnext(&rtc->timerqueue))) {
if (next->expires.tv64 > now.tv64)
@@ -898,7 +905,6 @@ again:
/* Set next alarm */
if (next) {
struct rtc_wkalrm alarm;
- int err;
int retry = 3;
alarm.time = rtc_ktime_to_tm(next->expires);
@@ -920,6 +926,7 @@ reprogram:
} else
rtc_alarm_disable(rtc);
+out:
pm_relax(rtc->dev.parent);
mutex_unlock(&rtc->ops_lock);
}
diff --git a/drivers/rtc/rtc-at91sam9.c b/drivers/rtc/rtc-at91sam9.c
index 6b9aaf1afc72..2183fd2750ab 100644
--- a/drivers/rtc/rtc-at91sam9.c
+++ b/drivers/rtc/rtc-at91sam9.c
@@ -313,7 +313,7 @@ static const struct rtc_class_ops at91_rtc_ops = {
.alarm_irq_enable = at91_rtc_alarm_irq_enable,
};
-static struct regmap_config gpbr_regmap_config = {
+static const struct regmap_config gpbr_regmap_config = {
.reg_bits = 32,
.val_bits = 32,
.reg_stride = 4,
diff --git a/drivers/rtc/rtc-imxdi.c b/drivers/rtc/rtc-imxdi.c
index 42f5570f42f8..c666eab98273 100644
--- a/drivers/rtc/rtc-imxdi.c
+++ b/drivers/rtc/rtc-imxdi.c
@@ -50,22 +50,58 @@
#define DCAMR_UNSET 0xFFFFFFFF /* doomsday - 1 sec */
#define DCR 0x10 /* Control Reg */
+#define DCR_TDCHL (1 << 30) /* Tamper-detect configuration hard lock */
+#define DCR_TDCSL (1 << 29) /* Tamper-detect configuration soft lock */
+#define DCR_KSSL (1 << 27) /* Key-select soft lock */
+#define DCR_MCHL (1 << 20) /* Monotonic-counter hard lock */
+#define DCR_MCSL (1 << 19) /* Monotonic-counter soft lock */
+#define DCR_TCHL (1 << 18) /* Timer-counter hard lock */
+#define DCR_TCSL (1 << 17) /* Timer-counter soft lock */
+#define DCR_FSHL (1 << 16) /* Failure state hard lock */
#define DCR_TCE (1 << 3) /* Time Counter Enable */
+#define DCR_MCE (1 << 2) /* Monotonic Counter Enable */
#define DSR 0x14 /* Status Reg */
-#define DSR_WBF (1 << 10) /* Write Busy Flag */
-#define DSR_WNF (1 << 9) /* Write Next Flag */
-#define DSR_WCF (1 << 8) /* Write Complete Flag */
+#define DSR_WTD (1 << 23) /* Wire-mesh tamper detected */
+#define DSR_ETBD (1 << 22) /* External tamper B detected */
+#define DSR_ETAD (1 << 21) /* External tamper A detected */
+#define DSR_EBD (1 << 20) /* External boot detected */
+#define DSR_SAD (1 << 19) /* SCC alarm detected */
+#define DSR_TTD (1 << 18) /* Temperatur tamper detected */
+#define DSR_CTD (1 << 17) /* Clock tamper detected */
+#define DSR_VTD (1 << 16) /* Voltage tamper detected */
+#define DSR_WBF (1 << 10) /* Write Busy Flag (synchronous) */
+#define DSR_WNF (1 << 9) /* Write Next Flag (synchronous) */
+#define DSR_WCF (1 << 8) /* Write Complete Flag (synchronous)*/
#define DSR_WEF (1 << 7) /* Write Error Flag */
#define DSR_CAF (1 << 4) /* Clock Alarm Flag */
+#define DSR_MCO (1 << 3) /* monotonic counter overflow */
+#define DSR_TCO (1 << 2) /* time counter overflow */
#define DSR_NVF (1 << 1) /* Non-Valid Flag */
#define DSR_SVF (1 << 0) /* Security Violation Flag */
-#define DIER 0x18 /* Interrupt Enable Reg */
+#define DIER 0x18 /* Interrupt Enable Reg (synchronous) */
#define DIER_WNIE (1 << 9) /* Write Next Interrupt Enable */
#define DIER_WCIE (1 << 8) /* Write Complete Interrupt Enable */
#define DIER_WEIE (1 << 7) /* Write Error Interrupt Enable */
#define DIER_CAIE (1 << 4) /* Clock Alarm Interrupt Enable */
+#define DIER_SVIE (1 << 0) /* Security-violation Interrupt Enable */
+
+#define DMCR 0x1c /* DryIce Monotonic Counter Reg */
+
+#define DTCR 0x28 /* DryIce Tamper Configuration Reg */
+#define DTCR_MOE (1 << 9) /* monotonic overflow enabled */
+#define DTCR_TOE (1 << 8) /* time overflow enabled */
+#define DTCR_WTE (1 << 7) /* wire-mesh tamper enabled */
+#define DTCR_ETBE (1 << 6) /* external B tamper enabled */
+#define DTCR_ETAE (1 << 5) /* external A tamper enabled */
+#define DTCR_EBE (1 << 4) /* external boot tamper enabled */
+#define DTCR_SAIE (1 << 3) /* SCC enabled */
+#define DTCR_TTE (1 << 2) /* temperature tamper enabled */
+#define DTCR_CTE (1 << 1) /* clock tamper enabled */
+#define DTCR_VTE (1 << 0) /* voltage tamper enabled */
+
+#define DGPR 0x3c /* DryIce General Purpose Reg */
/**
* struct imxdi_dev - private imxdi rtc data
@@ -313,7 +349,7 @@ static irqreturn_t dryice_norm_irq(int irq, void *dev_id)
dier = __raw_readl(imxdi->ioaddr + DIER);
/* handle write complete and write error cases */
- if ((dier & DIER_WCIE)) {
+ if (dier & DIER_WCIE) {
/*If the write wait queue is empty then there is no pending
operations. It means the interrupt is for DryIce -Security.
IRQ must be returned as none.*/
@@ -322,7 +358,7 @@ static irqreturn_t dryice_norm_irq(int irq, void *dev_id)
/* DSR_WCF clears itself on DSR read */
dsr = __raw_readl(imxdi->ioaddr + DSR);
- if ((dsr & (DSR_WCF | DSR_WEF))) {
+ if (dsr & (DSR_WCF | DSR_WEF)) {
/* mask the interrupt */
di_int_disable(imxdi, DIER_WCIE);
@@ -335,7 +371,7 @@ static irqreturn_t dryice_norm_irq(int irq, void *dev_id)
}
/* handle the alarm case */
- if ((dier & DIER_CAIE)) {
+ if (dier & DIER_CAIE) {
/* DSR_WCF clears itself on DSR read */
dsr = __raw_readl(imxdi->ioaddr + DSR);
if (dsr & DSR_CAF) {
diff --git a/drivers/rtc/rtc-isl12057.c b/drivers/rtc/rtc-isl12057.c
index 6e1fcfb5d7e6..b8f862953f7f 100644
--- a/drivers/rtc/rtc-isl12057.c
+++ b/drivers/rtc/rtc-isl12057.c
@@ -79,8 +79,10 @@
#define ISL12057_MEM_MAP_LEN 0x10
struct isl12057_rtc_data {
+ struct rtc_device *rtc;
struct regmap *regmap;
struct mutex lock;
+ int irq;
};
static void isl12057_rtc_regs_to_tm(struct rtc_time *tm, u8 *regs)
@@ -160,14 +162,47 @@ static int isl12057_i2c_validate_chip(struct regmap *regmap)
return 0;
}
-static int isl12057_rtc_read_time(struct device *dev, struct rtc_time *tm)
+static int _isl12057_rtc_clear_alarm(struct device *dev)
+{
+ struct isl12057_rtc_data *data = dev_get_drvdata(dev);
+ int ret;
+
+ ret = regmap_update_bits(data->regmap, ISL12057_REG_SR,
+ ISL12057_REG_SR_A1F, 0);
+ if (ret)
+ dev_err(dev, "%s: clearing alarm failed (%d)\n", __func__, ret);
+
+ return ret;
+}
+
+static int _isl12057_rtc_update_alarm(struct device *dev, int enable)
+{
+ struct isl12057_rtc_data *data = dev_get_drvdata(dev);
+ int ret;
+
+ ret = regmap_update_bits(data->regmap, ISL12057_REG_INT,
+ ISL12057_REG_INT_A1IE,
+ enable ? ISL12057_REG_INT_A1IE : 0);
+ if (ret)
+ dev_err(dev, "%s: changing alarm interrupt flag failed (%d)\n",
+ __func__, ret);
+
+ return ret;
+}
+
+/*
+ * Note: as we only read from device and do not perform any update, there is
+ * no need for an equivalent function which would try and get driver's main
+ * lock. Here, it is safe for everyone if we just use regmap internal lock
+ * on the device when reading.
+ */
+static int _isl12057_rtc_read_time(struct device *dev, struct rtc_time *tm)
{
struct isl12057_rtc_data *data = dev_get_drvdata(dev);
u8 regs[ISL12057_RTC_SEC_LEN];
unsigned int sr;
int ret;
- mutex_lock(&data->lock);
ret = regmap_read(data->regmap, ISL12057_REG_SR, &sr);
if (ret) {
dev_err(dev, "%s: unable to read oscillator status flag (%d)\n",
@@ -187,8 +222,6 @@ static int isl12057_rtc_read_time(struct device *dev, struct rtc_time *tm)
__func__, ret);
out:
- mutex_unlock(&data->lock);
-
if (ret)
return ret;
@@ -197,6 +230,168 @@ out:
return rtc_valid_tm(tm);
}
+static int isl12057_rtc_update_alarm(struct device *dev, int enable)
+{
+ struct isl12057_rtc_data *data = dev_get_drvdata(dev);
+ int ret;
+
+ mutex_lock(&data->lock);
+ ret = _isl12057_rtc_update_alarm(dev, enable);
+ mutex_unlock(&data->lock);
+
+ return ret;
+}
+
+static int isl12057_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm)
+{
+ struct isl12057_rtc_data *data = dev_get_drvdata(dev);
+ struct rtc_time rtc_tm, *alarm_tm = &alarm->time;
+ unsigned long rtc_secs, alarm_secs;
+ u8 regs[ISL12057_A1_SEC_LEN];
+ unsigned int ir;
+ int ret;
+
+ mutex_lock(&data->lock);
+ ret = regmap_bulk_read(data->regmap, ISL12057_REG_A1_SC, regs,
+ ISL12057_A1_SEC_LEN);
+ if (ret) {
+ dev_err(dev, "%s: reading alarm section failed (%d)\n",
+ __func__, ret);
+ goto err_unlock;
+ }
+
+ alarm_tm->tm_sec = bcd2bin(regs[0] & 0x7f);
+ alarm_tm->tm_min = bcd2bin(regs[1] & 0x7f);
+ alarm_tm->tm_hour = bcd2bin(regs[2] & 0x3f);
+ alarm_tm->tm_mday = bcd2bin(regs[3] & 0x3f);
+ alarm_tm->tm_wday = -1;
+
+ /*
+ * The alarm section does not store year/month. We use the ones in rtc
+ * section as a basis and increment month and then year if needed to get
+ * alarm after current time.
+ */
+ ret = _isl12057_rtc_read_time(dev, &rtc_tm);
+ if (ret)
+ goto err_unlock;
+
+ alarm_tm->tm_year = rtc_tm.tm_year;
+ alarm_tm->tm_mon = rtc_tm.tm_mon;
+
+ ret = rtc_tm_to_time(&rtc_tm, &rtc_secs);
+ if (ret)
+ goto err_unlock;
+
+ ret = rtc_tm_to_time(alarm_tm, &alarm_secs);
+ if (ret)
+ goto err_unlock;
+
+ if (alarm_secs < rtc_secs) {
+ if (alarm_tm->tm_mon == 11) {
+ alarm_tm->tm_mon = 0;
+ alarm_tm->tm_year += 1;
+ } else {
+ alarm_tm->tm_mon += 1;
+ }
+ }
+
+ ret = regmap_read(data->regmap, ISL12057_REG_INT, &ir);
+ if (ret) {
+ dev_err(dev, "%s: reading alarm interrupt flag failed (%d)\n",
+ __func__, ret);
+ goto err_unlock;
+ }
+
+ alarm->enabled = !!(ir & ISL12057_REG_INT_A1IE);
+
+err_unlock:
+ mutex_unlock(&data->lock);
+
+ return ret;
+}
+
+static int isl12057_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm)
+{
+ struct isl12057_rtc_data *data = dev_get_drvdata(dev);
+ struct rtc_time *alarm_tm = &alarm->time;
+ unsigned long rtc_secs, alarm_secs;
+ u8 regs[ISL12057_A1_SEC_LEN];
+ struct rtc_time rtc_tm;
+ int ret, enable = 1;
+
+ mutex_lock(&data->lock);
+ ret = _isl12057_rtc_read_time(dev, &rtc_tm);
+ if (ret)
+ goto err_unlock;
+
+ ret = rtc_tm_to_time(&rtc_tm, &rtc_secs);
+ if (ret)
+ goto err_unlock;
+
+ ret = rtc_tm_to_time(alarm_tm, &alarm_secs);
+ if (ret)
+ goto err_unlock;
+
+ /* If alarm time is before current time, disable the alarm */
+ if (!alarm->enabled || alarm_secs <= rtc_secs) {
+ enable = 0;
+ } else {
+ /*
+ * Chip only support alarms up to one month in the future. Let's
+ * return an error if we get something after that limit.
+ * Comparison is done by incrementing rtc_tm month field by one
+ * and checking alarm value is still below.
+ */
+ if (rtc_tm.tm_mon == 11) { /* handle year wrapping */
+ rtc_tm.tm_mon = 0;
+ rtc_tm.tm_year += 1;
+ } else {
+ rtc_tm.tm_mon += 1;
+ }
+
+ ret = rtc_tm_to_time(&rtc_tm, &rtc_secs);
+ if (ret)
+ goto err_unlock;
+
+ if (alarm_secs > rtc_secs) {
+ dev_err(dev, "%s: max for alarm is one month (%d)\n",
+ __func__, ret);
+ ret = -EINVAL;
+ goto err_unlock;
+ }
+ }
+
+ /* Disable the alarm before modifying it */
+ ret = _isl12057_rtc_update_alarm(dev, 0);
+ if (ret < 0) {
+ dev_err(dev, "%s: unable to disable the alarm (%d)\n",
+ __func__, ret);
+ goto err_unlock;
+ }
+
+ /* Program alarm registers */
+ regs[0] = bin2bcd(alarm_tm->tm_sec) & 0x7f;
+ regs[1] = bin2bcd(alarm_tm->tm_min) & 0x7f;
+ regs[2] = bin2bcd(alarm_tm->tm_hour) & 0x3f;
+ regs[3] = bin2bcd(alarm_tm->tm_mday) & 0x3f;
+
+ ret = regmap_bulk_write(data->regmap, ISL12057_REG_A1_SC, regs,
+ ISL12057_A1_SEC_LEN);
+ if (ret < 0) {
+ dev_err(dev, "%s: writing alarm section failed (%d)\n",
+ __func__, ret);
+ goto err_unlock;
+ }
+
+ /* Enable or disable alarm */
+ ret = _isl12057_rtc_update_alarm(dev, enable);
+
+err_unlock:
+ mutex_unlock(&data->lock);
+
+ return ret;
+}
+
static int isl12057_rtc_set_time(struct device *dev, struct rtc_time *tm)
{
struct isl12057_rtc_data *data = dev_get_drvdata(dev);
@@ -262,12 +457,85 @@ static int isl12057_check_rtc_status(struct device *dev, struct regmap *regmap)
return 0;
}
+#ifdef CONFIG_OF
+/*
+ * One would expect the device to be marked as a wakeup source only
+ * when an IRQ pin of the RTC is routed to an interrupt line of the
+ * CPU. In practice, such an IRQ pin can be connected to a PMIC and
+ * this allows the device to be powered up when RTC alarm rings. This
+ * is for instance the case on ReadyNAS 102, 104 and 2120. On those
+ * devices with no IRQ driectly connected to the SoC, the RTC chip
+ * can be forced as a wakeup source by stating that explicitly in
+ * the device's .dts file using the "isil,irq2-can-wakeup-machine"
+ * boolean property. This will guarantee 'wakealarm' sysfs entry is
+ * available on the device.
+ *
+ * The function below returns 1, i.e. the capability of the chip to
+ * wakeup the device, based on IRQ availability or if the boolean
+ * property has been set in the .dts file. Otherwise, it returns 0.
+ */
+
+static bool isl12057_can_wakeup_machine(struct device *dev)
+{
+ struct isl12057_rtc_data *data = dev_get_drvdata(dev);
+
+ return (data->irq || of_property_read_bool(dev->of_node,
+ "isil,irq2-can-wakeup-machine"));
+}
+#else
+static bool isl12057_can_wakeup_machine(struct device *dev)
+{
+ struct isl12057_rtc_data *data = dev_get_drvdata(dev);
+
+ return !!data->irq;
+}
+#endif
+
+static int isl12057_rtc_alarm_irq_enable(struct device *dev,
+ unsigned int enable)
+{
+ struct isl12057_rtc_data *rtc_data = dev_get_drvdata(dev);
+ int ret = -ENOTTY;
+
+ if (rtc_data->irq)
+ ret = isl12057_rtc_update_alarm(dev, enable);
+
+ return ret;
+}
+
+static irqreturn_t isl12057_rtc_interrupt(int irq, void *data)
+{
+ struct i2c_client *client = data;
+ struct isl12057_rtc_data *rtc_data = dev_get_drvdata(&client->dev);
+ struct rtc_device *rtc = rtc_data->rtc;
+ int ret, handled = IRQ_NONE;
+ unsigned int sr;
+
+ ret = regmap_read(rtc_data->regmap, ISL12057_REG_SR, &sr);
+ if (!ret && (sr & ISL12057_REG_SR_A1F)) {
+ dev_dbg(&client->dev, "RTC alarm!\n");
+
+ rtc_update_irq(rtc, 1, RTC_IRQF | RTC_AF);
+
+ /* Acknowledge and disable the alarm */
+ _isl12057_rtc_clear_alarm(&client->dev);
+ _isl12057_rtc_update_alarm(&client->dev, 0);
+
+ handled = IRQ_HANDLED;
+ }
+
+ return handled;
+}
+
static const struct rtc_class_ops rtc_ops = {
- .read_time = isl12057_rtc_read_time,
+ .read_time = _isl12057_rtc_read_time,
.set_time = isl12057_rtc_set_time,
+ .read_alarm = isl12057_rtc_read_alarm,
+ .set_alarm = isl12057_rtc_set_alarm,
+ .alarm_irq_enable = isl12057_rtc_alarm_irq_enable,
};
-static struct regmap_config isl12057_rtc_regmap_config = {
+static const struct regmap_config isl12057_rtc_regmap_config = {
.reg_bits = 8,
.val_bits = 8,
};
@@ -277,7 +545,6 @@ static int isl12057_probe(struct i2c_client *client,
{
struct device *dev = &client->dev;
struct isl12057_rtc_data *data;
- struct rtc_device *rtc;
struct regmap *regmap;
int ret;
@@ -310,9 +577,70 @@ static int isl12057_probe(struct i2c_client *client,
data->regmap = regmap;
dev_set_drvdata(dev, data);
- rtc = devm_rtc_device_register(dev, DRV_NAME, &rtc_ops, THIS_MODULE);
- return PTR_ERR_OR_ZERO(rtc);
+ if (client->irq > 0) {
+ ret = devm_request_threaded_irq(dev, client->irq, NULL,
+ isl12057_rtc_interrupt,
+ IRQF_SHARED|IRQF_ONESHOT,
+ DRV_NAME, client);
+ if (!ret)
+ data->irq = client->irq;
+ else
+ dev_err(dev, "%s: irq %d unavailable (%d)\n", __func__,
+ client->irq, ret);
+ }
+
+ if (isl12057_can_wakeup_machine(dev))
+ device_init_wakeup(dev, true);
+
+ data->rtc = devm_rtc_device_register(dev, DRV_NAME, &rtc_ops,
+ THIS_MODULE);
+ ret = PTR_ERR_OR_ZERO(data->rtc);
+ if (ret) {
+ dev_err(dev, "%s: unable to register RTC device (%d)\n",
+ __func__, ret);
+ goto err;
+ }
+
+ /* We cannot support UIE mode if we do not have an IRQ line */
+ if (!data->irq)
+ data->rtc->uie_unsupported = 1;
+
+err:
+ return ret;
+}
+
+static int isl12057_remove(struct i2c_client *client)
+{
+ if (isl12057_can_wakeup_machine(&client->dev))
+ device_init_wakeup(&client->dev, false);
+
+ return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int isl12057_rtc_suspend(struct device *dev)
+{
+ struct isl12057_rtc_data *rtc_data = dev_get_drvdata(dev);
+
+ if (rtc_data->irq && device_may_wakeup(dev))
+ return enable_irq_wake(rtc_data->irq);
+
+ return 0;
+}
+
+static int isl12057_rtc_resume(struct device *dev)
+{
+ struct isl12057_rtc_data *rtc_data = dev_get_drvdata(dev);
+
+ if (rtc_data->irq && device_may_wakeup(dev))
+ return disable_irq_wake(rtc_data->irq);
+
+ return 0;
}
+#endif
+
+static SIMPLE_DEV_PM_OPS(isl12057_rtc_pm_ops, isl12057_rtc_suspend,
+ isl12057_rtc_resume);
#ifdef CONFIG_OF
static const struct of_device_id isl12057_dt_match[] = {
@@ -331,9 +659,11 @@ static struct i2c_driver isl12057_driver = {
.driver = {
.name = DRV_NAME,
.owner = THIS_MODULE,
+ .pm = &isl12057_rtc_pm_ops,
.of_match_table = of_match_ptr(isl12057_dt_match),
},
.probe = isl12057_probe,
+ .remove = isl12057_remove,
.id_table = isl12057_id,
};
module_i2c_driver(isl12057_driver);
diff --git a/drivers/rtc/rtc-pcf2123.c b/drivers/rtc/rtc-pcf2123.c
index d1953bb244c5..8a7556cbcb7f 100644
--- a/drivers/rtc/rtc-pcf2123.c
+++ b/drivers/rtc/rtc-pcf2123.c
@@ -38,6 +38,7 @@
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/kernel.h>
+#include <linux/of.h>
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/rtc.h>
@@ -340,10 +341,19 @@ static int pcf2123_remove(struct spi_device *spi)
return 0;
}
+#ifdef CONFIG_OF
+static const struct of_device_id pcf2123_dt_ids[] = {
+ { .compatible = "nxp,rtc-pcf2123", },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, pcf2123_dt_ids);
+#endif
+
static struct spi_driver pcf2123_driver = {
.driver = {
.name = "rtc-pcf2123",
.owner = THIS_MODULE,
+ .of_match_table = of_match_ptr(pcf2123_dt_ids),
},
.probe = pcf2123_probe,
.remove = pcf2123_remove,
diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig
index b24aa010f68c..65cd80bf9aec 100644
--- a/drivers/tty/Kconfig
+++ b/drivers/tty/Kconfig
@@ -13,6 +13,10 @@ config VT
bool "Virtual terminal" if EXPERT
depends on !S390 && !UML
select INPUT
+ select NEW_LEDS
+ select LEDS_CLASS
+ select LEDS_TRIGGERS
+ select INPUT_LEDS
default y
---help---
If you say Y here, you will get support for terminal devices with
diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c
index 8a89f6e7715d..acfb4c00e489 100644
--- a/drivers/tty/vt/keyboard.c
+++ b/drivers/tty/vt/keyboard.c
@@ -33,6 +33,7 @@
#include <linux/string.h>
#include <linux/init.h>
#include <linux/slab.h>
+#include <linux/leds.h>
#include <linux/kbd_kern.h>
#include <linux/kbd_diacr.h>
@@ -130,6 +131,7 @@ static char rep; /* flag telling character repeat */
static int shift_state = 0;
static unsigned char ledstate = 0xff; /* undefined */
+static unsigned char lockstate = 0xff; /* undefined */
static unsigned char ledioctl;
/*
@@ -961,6 +963,41 @@ static void k_brl(struct vc_data *vc, unsigned char value, char up_flag)
}
}
+/* We route VT keyboard "leds" through triggers */
+static void kbd_ledstate_trigger_activate(struct led_classdev *cdev);
+
+static struct led_trigger ledtrig_ledstate[] = {
+#define DEFINE_LEDSTATE_TRIGGER(kbd_led, nam) \
+ [kbd_led] = { \
+ .name = nam, \
+ .activate = kbd_ledstate_trigger_activate, \
+ }
+ DEFINE_LEDSTATE_TRIGGER(VC_SCROLLOCK, "kbd-scrollock"),
+ DEFINE_LEDSTATE_TRIGGER(VC_NUMLOCK, "kbd-numlock"),
+ DEFINE_LEDSTATE_TRIGGER(VC_CAPSLOCK, "kbd-capslock"),
+ DEFINE_LEDSTATE_TRIGGER(VC_KANALOCK, "kbd-kanalock"),
+#undef DEFINE_LEDSTATE_TRIGGER
+};
+
+static void kbd_lockstate_trigger_activate(struct led_classdev *cdev);
+
+static struct led_trigger ledtrig_lockstate[] = {
+#define DEFINE_LOCKSTATE_TRIGGER(kbd_led, nam) \
+ [kbd_led] = { \
+ .name = nam, \
+ .activate = kbd_lockstate_trigger_activate, \
+ }
+ DEFINE_LOCKSTATE_TRIGGER(VC_SHIFTLOCK, "kbd-shiftlock"),
+ DEFINE_LOCKSTATE_TRIGGER(VC_ALTGRLOCK, "kbd-altgrlock"),
+ DEFINE_LOCKSTATE_TRIGGER(VC_CTRLLOCK, "kbd-ctrllock"),
+ DEFINE_LOCKSTATE_TRIGGER(VC_ALTLOCK, "kbd-altlock"),
+ DEFINE_LOCKSTATE_TRIGGER(VC_SHIFTLLOCK, "kbd-shiftllock"),
+ DEFINE_LOCKSTATE_TRIGGER(VC_SHIFTRLOCK, "kbd-shiftrlock"),
+ DEFINE_LOCKSTATE_TRIGGER(VC_CTRLLLOCK, "kbd-ctrlllock"),
+ DEFINE_LOCKSTATE_TRIGGER(VC_CTRLRLOCK, "kbd-ctrlrlock"),
+#undef DEFINE_LOCKSTATE_TRIGGER
+};
+
/*
* The leds display either (i) the status of NumLock, CapsLock, ScrollLock,
* or (ii) whatever pattern of lights people want to show using KDSETLED,
@@ -995,18 +1032,25 @@ static inline unsigned char getleds(void)
return kb->ledflagstate;
}
-static int kbd_update_leds_helper(struct input_handle *handle, void *data)
+/* Called on trigger connection, to set initial state */
+static void kbd_ledstate_trigger_activate(struct led_classdev *cdev)
{
- unsigned char leds = *(unsigned char *)data;
+ struct led_trigger *trigger = cdev->trigger;
+ int led = trigger - ledtrig_ledstate;
- if (test_bit(EV_LED, handle->dev->evbit)) {
- input_inject_event(handle, EV_LED, LED_SCROLLL, !!(leds & 0x01));
- input_inject_event(handle, EV_LED, LED_NUML, !!(leds & 0x02));
- input_inject_event(handle, EV_LED, LED_CAPSL, !!(leds & 0x04));
- input_inject_event(handle, EV_SYN, SYN_REPORT, 0);
- }
+ tasklet_disable(&keyboard_tasklet);
+ led_trigger_event(trigger, ledstate & (1 << led) ? LED_FULL : LED_OFF);
+ tasklet_enable(&keyboard_tasklet);
+}
- return 0;
+static void kbd_lockstate_trigger_activate(struct led_classdev *cdev)
+{
+ struct led_trigger *trigger = cdev->trigger;
+ int led = trigger - ledtrig_lockstate;
+
+ tasklet_disable(&keyboard_tasklet);
+ led_trigger_event(trigger, lockstate & (1 << led) ? LED_FULL : LED_OFF);
+ tasklet_enable(&keyboard_tasklet);
}
/**
@@ -1095,16 +1139,29 @@ static void kbd_bh(unsigned long dummy)
{
unsigned char leds;
unsigned long flags;
-
+ int i;
+
spin_lock_irqsave(&led_lock, flags);
leds = getleds();
spin_unlock_irqrestore(&led_lock, flags);
if (leds != ledstate) {
- input_handler_for_each_handle(&kbd_handler, &leds,
- kbd_update_leds_helper);
+ for (i = 0; i < ARRAY_SIZE(ledtrig_ledstate); i++)
+ if ((leds ^ ledstate) & (1 << i))
+ led_trigger_event(&ledtrig_ledstate[i],
+ leds & (1 << i)
+ ? LED_FULL : LED_OFF);
ledstate = leds;
}
+
+ if (kbd->lockstate != lockstate) {
+ for (i = 0; i < ARRAY_SIZE(ledtrig_lockstate); i++)
+ if ((kbd->lockstate ^ lockstate) & (1 << i))
+ led_trigger_event(&ledtrig_lockstate[i],
+ kbd->lockstate & (1 << i)
+ ? LED_FULL : LED_OFF);
+ lockstate = kbd->lockstate;
+ }
}
DECLARE_TASKLET_DISABLED(keyboard_tasklet, kbd_bh, 0);
@@ -1442,20 +1499,6 @@ static void kbd_disconnect(struct input_handle *handle)
kfree(handle);
}
-/*
- * Start keyboard handler on the new keyboard by refreshing LED state to
- * match the rest of the system.
- */
-static void kbd_start(struct input_handle *handle)
-{
- tasklet_disable(&keyboard_tasklet);
-
- if (ledstate != 0xff)
- kbd_update_leds_helper(handle, &ledstate);
-
- tasklet_enable(&keyboard_tasklet);
-}
-
static const struct input_device_id kbd_ids[] = {
{
.flags = INPUT_DEVICE_ID_MATCH_EVBIT,
@@ -1477,7 +1520,6 @@ static struct input_handler kbd_handler = {
.match = kbd_match,
.connect = kbd_connect,
.disconnect = kbd_disconnect,
- .start = kbd_start,
.name = "kbd",
.id_table = kbd_ids,
};
@@ -1501,6 +1543,20 @@ int __init kbd_init(void)
if (error)
return error;
+ for (i = 0; i < ARRAY_SIZE(ledtrig_ledstate); i++) {
+ error = led_trigger_register(&ledtrig_ledstate[i]);
+ if (error)
+ pr_err("error %d while registering trigger %s\n",
+ error, ledtrig_ledstate[i].name);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(ledtrig_lockstate); i++) {
+ error = led_trigger_register(&ledtrig_lockstate[i]);
+ if (error)
+ pr_err("error %d while registering trigger %s\n",
+ error, ledtrig_lockstate[i].name);
+ }
+
tasklet_enable(&keyboard_tasklet);
tasklet_schedule(&keyboard_tasklet);
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 5594505e6e73..b40133796b87 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -831,7 +831,6 @@ static const struct vm_operations_struct v9fs_file_vm_ops = {
.fault = filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = v9fs_vm_page_mkwrite,
- .remap_pages = generic_file_remap_pages,
};
static const struct vm_operations_struct v9fs_mmap_file_vm_ops = {
@@ -839,7 +838,6 @@ static const struct vm_operations_struct v9fs_mmap_file_vm_ops = {
.fault = filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = v9fs_vm_page_mkwrite,
- .remap_pages = generic_file_remap_pages,
};
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index c852f2fa1710..511ab6b12618 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -30,7 +30,7 @@ affs_insert_hash(struct inode *dir, struct buffer_head *bh)
ino = bh->b_blocknr;
offset = affs_hash_name(sb, AFFS_TAIL(sb, bh)->name + 1, AFFS_TAIL(sb, bh)->name[0]);
- pr_debug("%s(dir=%u, ino=%d)\n", __func__, (u32)dir->i_ino, ino);
+ pr_debug("%s(dir=%lu, ino=%d)\n", __func__, dir->i_ino, ino);
dir_bh = affs_bread(sb, dir->i_ino);
if (!dir_bh)
@@ -80,8 +80,8 @@ affs_remove_hash(struct inode *dir, struct buffer_head *rem_bh)
sb = dir->i_sb;
rem_ino = rem_bh->b_blocknr;
offset = affs_hash_name(sb, AFFS_TAIL(sb, rem_bh)->name+1, AFFS_TAIL(sb, rem_bh)->name[0]);
- pr_debug("%s(dir=%d, ino=%d, hashval=%d)\n",
- __func__, (u32)dir->i_ino, rem_ino, offset);
+ pr_debug("%s(dir=%lu, ino=%d, hashval=%d)\n", __func__, dir->i_ino,
+ rem_ino, offset);
bh = affs_bread(sb, dir->i_ino);
if (!bh)
diff --git a/fs/affs/dir.c b/fs/affs/dir.c
index 59f07bec92a6..a682892878a8 100644
--- a/fs/affs/dir.c
+++ b/fs/affs/dir.c
@@ -54,8 +54,7 @@ affs_readdir(struct file *file, struct dir_context *ctx)
u32 ino;
int error = 0;
- pr_debug("%s(ino=%lu,f_pos=%lx)\n",
- __func__, inode->i_ino, (unsigned long)ctx->pos);
+ pr_debug("%s(ino=%lu,f_pos=%llx)\n", __func__, inode->i_ino, ctx->pos);
if (ctx->pos < 2) {
file->private_data = (void *)0;
@@ -117,9 +116,8 @@ inside:
namelen = min(AFFS_TAIL(sb, fh_bh)->name[0], (u8)30);
name = AFFS_TAIL(sb, fh_bh)->name + 1;
- pr_debug("readdir(): dir_emit(\"%.*s\", "
- "ino=%u), hash=%d, f_pos=%x\n",
- namelen, name, ino, hash_pos, (u32)ctx->pos);
+ pr_debug("readdir(): dir_emit(\"%.*s\", ino=%u), hash=%d, f_pos=%llx\n",
+ namelen, name, ino, hash_pos, ctx->pos);
if (!dir_emit(ctx, name, namelen, ino, DT_UNKNOWN))
goto done;
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 8faa6593ca6d..7e83ba22bed4 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -180,8 +180,7 @@ affs_get_extblock_slow(struct inode *inode, u32 ext)
ext_key = be32_to_cpu(AFFS_TAIL(sb, bh)->extension);
if (ext < AFFS_I(inode)->i_extcnt)
goto read_ext;
- if (ext > AFFS_I(inode)->i_extcnt)
- BUG();
+ BUG_ON(ext > AFFS_I(inode)->i_extcnt);
bh = affs_alloc_extblock(inode, bh, ext);
if (IS_ERR(bh))
return bh;
@@ -198,8 +197,7 @@ affs_get_extblock_slow(struct inode *inode, u32 ext)
struct buffer_head *prev_bh;
/* allocate a new extended block */
- if (ext > AFFS_I(inode)->i_extcnt)
- BUG();
+ BUG_ON(ext > AFFS_I(inode)->i_extcnt);
/* get previous extended block */
prev_bh = affs_get_extblock(inode, ext - 1);
@@ -299,8 +297,8 @@ affs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_resul
struct buffer_head *ext_bh;
u32 ext;
- pr_debug("%s(%u, %lu)\n",
- __func__, (u32)inode->i_ino, (unsigned long)block);
+ pr_debug("%s(%lu, %llu)\n", __func__, inode->i_ino,
+ (unsigned long long)block);
BUG_ON(block > (sector_t)0x7fffffffUL);
@@ -330,8 +328,9 @@ affs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_resul
/* store new block */
if (bh_result->b_blocknr)
- affs_warning(sb, "get_block", "block already set (%lx)",
- (unsigned long)bh_result->b_blocknr);
+ affs_warning(sb, "get_block",
+ "block already set (%llx)",
+ (unsigned long long)bh_result->b_blocknr);
AFFS_BLOCK(sb, ext_bh, block) = cpu_to_be32(blocknr);
AFFS_HEAD(ext_bh)->block_count = cpu_to_be32(block + 1);
affs_adjust_checksum(ext_bh, blocknr - bh_result->b_blocknr + 1);
@@ -353,8 +352,8 @@ affs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_resul
return 0;
err_big:
- affs_error(inode->i_sb, "get_block", "strange block request %d",
- (int)block);
+ affs_error(inode->i_sb, "get_block", "strange block request %llu",
+ (unsigned long long)block);
return -EIO;
err_ext:
// unlock cache
@@ -503,7 +502,7 @@ affs_do_readpage_ofs(struct page *page, unsigned to)
u32 bidx, boff, bsize;
u32 tmp;
- pr_debug("%s(%u, %ld, 0, %d)\n", __func__, (u32)inode->i_ino,
+ pr_debug("%s(%lu, %ld, 0, %d)\n", __func__, inode->i_ino,
page->index, to);
BUG_ON(to > PAGE_CACHE_SIZE);
kmap(page);
@@ -539,7 +538,7 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize)
u32 size, bsize;
u32 tmp;
- pr_debug("%s(%u, %d)\n", __func__, (u32)inode->i_ino, newsize);
+ pr_debug("%s(%lu, %d)\n", __func__, inode->i_ino, newsize);
bsize = AFFS_SB(sb)->s_data_blksize;
bh = NULL;
size = AFFS_I(inode)->mmu_private;
@@ -608,7 +607,7 @@ affs_readpage_ofs(struct file *file, struct page *page)
u32 to;
int err;
- pr_debug("%s(%u, %ld)\n", __func__, (u32)inode->i_ino, page->index);
+ pr_debug("%s(%lu, %ld)\n", __func__, inode->i_ino, page->index);
to = PAGE_CACHE_SIZE;
if (((page->index + 1) << PAGE_CACHE_SHIFT) > inode->i_size) {
to = inode->i_size & ~PAGE_CACHE_MASK;
@@ -631,8 +630,8 @@ static int affs_write_begin_ofs(struct file *file, struct address_space *mapping
pgoff_t index;
int err = 0;
- pr_debug("%s(%u, %llu, %llu)\n", __func__, (u32)inode->i_ino,
- (unsigned long long)pos, (unsigned long long)pos + len);
+ pr_debug("%s(%lu, %llu, %llu)\n", __func__, inode->i_ino, pos,
+ pos + len);
if (pos > AFFS_I(inode)->mmu_private) {
/* XXX: this probably leaves a too-big i_size in case of
* failure. Should really be updating i_size at write_end time
@@ -681,9 +680,8 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
* due to write_begin.
*/
- pr_debug("%s(%u, %llu, %llu)\n",
- __func__, (u32)inode->i_ino, (unsigned long long)pos,
- (unsigned long long)pos + len);
+ pr_debug("%s(%lu, %llu, %llu)\n", __func__, inode->i_ino, pos,
+ pos + len);
bsize = AFFS_SB(sb)->s_data_blksize;
data = page_address(page);
@@ -831,8 +829,8 @@ affs_truncate(struct inode *inode)
struct buffer_head *ext_bh;
int i;
- pr_debug("truncate(inode=%d, oldsize=%u, newsize=%u)\n",
- (u32)inode->i_ino, (u32)AFFS_I(inode)->mmu_private, (u32)inode->i_size);
+ pr_debug("truncate(inode=%lu, oldsize=%llu, newsize=%llu)\n",
+ inode->i_ino, AFFS_I(inode)->mmu_private, inode->i_size);
last_blk = 0;
ext = 0;
@@ -863,7 +861,7 @@ affs_truncate(struct inode *inode)
if (IS_ERR(ext_bh)) {
affs_warning(sb, "truncate",
"unexpected read error for ext block %u (%ld)",
- (unsigned int)ext, PTR_ERR(ext_bh));
+ ext, PTR_ERR(ext_bh));
return;
}
if (AFFS_I(inode)->i_lc) {
@@ -911,7 +909,7 @@ affs_truncate(struct inode *inode)
if (IS_ERR(bh)) {
affs_warning(sb, "truncate",
"unexpected read error for last block %u (%ld)",
- (unsigned int)ext, PTR_ERR(bh));
+ ext, PTR_ERR(bh));
return;
}
tmp = be32_to_cpu(AFFS_DATA_HEAD(bh)->next);
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index d0609a282e1d..25cb4b43f2f1 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -348,9 +348,8 @@ affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s3
u32 block = 0;
int retval;
- pr_debug("%s(dir=%u, inode=%u, \"%pd\", type=%d)\n",
- __func__, (u32)dir->i_ino,
- (u32)inode->i_ino, dentry, type);
+ pr_debug("%s(dir=%lu, inode=%lu, \"%pd\", type=%d)\n", __func__,
+ dir->i_ino, inode->i_ino, dentry, type);
retval = -EIO;
bh = affs_bread(sb, inode->i_ino);
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index bbc38530e924..de84f4d3e9ec 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -248,9 +248,8 @@ affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
int
affs_unlink(struct inode *dir, struct dentry *dentry)
{
- pr_debug("%s(dir=%d, %lu \"%pd\")\n",
- __func__, (u32)dir->i_ino, dentry->d_inode->i_ino,
- dentry);
+ pr_debug("%s(dir=%lu, %lu \"%pd\")\n", __func__, dir->i_ino,
+ dentry->d_inode->i_ino, dentry);
return affs_remove_header(dentry);
}
@@ -317,9 +316,8 @@ affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
int
affs_rmdir(struct inode *dir, struct dentry *dentry)
{
- pr_debug("%s(dir=%u, %lu \"%pd\")\n",
- __func__, (u32)dir->i_ino, dentry->d_inode->i_ino,
- dentry);
+ pr_debug("%s(dir=%lu, %lu \"%pd\")\n", __func__, dir->i_ino,
+ dentry->d_inode->i_ino, dentry);
return affs_remove_header(dentry);
}
@@ -404,8 +402,7 @@ affs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
{
struct inode *inode = old_dentry->d_inode;
- pr_debug("%s(%u, %u, \"%pd\")\n",
- __func__, (u32)inode->i_ino, (u32)dir->i_ino,
+ pr_debug("%s(%lu, %lu, \"%pd\")\n", __func__, inode->i_ino, dir->i_ino,
dentry);
return affs_add_entry(dir, inode, dentry, ST_LINKFILE);
@@ -419,9 +416,8 @@ affs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct buffer_head *bh = NULL;
int retval;
- pr_debug("%s(old=%u,\"%pd\" to new=%u,\"%pd\")\n",
- __func__, (u32)old_dir->i_ino, old_dentry,
- (u32)new_dir->i_ino, new_dentry);
+ pr_debug("%s(old=%lu,\"%pd\" to new=%lu,\"%pd\")\n", __func__,
+ old_dir->i_ino, old_dentry, new_dir->i_ino, new_dentry);
retval = affs_check_name(new_dentry->d_name.name,
new_dentry->d_name.len,
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index edf47774b03d..e089f1985fca 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -274,9 +274,9 @@ more:
static struct inode *
befs_alloc_inode(struct super_block *sb)
{
- struct befs_inode_info *bi;
- bi = (struct befs_inode_info *)kmem_cache_alloc(befs_inode_cachep,
- GFP_KERNEL);
+ struct befs_inode_info *bi;
+
+ bi = kmem_cache_alloc(befs_inode_cachep, GFP_KERNEL);
if (!bi)
return NULL;
return &bi->vfs_inode;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e4090259569b..a606ab551296 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2081,7 +2081,6 @@ static const struct vm_operations_struct btrfs_file_vm_ops = {
.fault = filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = btrfs_page_mkwrite,
- .remap_pages = generic_file_remap_pages,
};
static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma)
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index f5013d92a7e6..4f68d0189d01 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1569,7 +1569,6 @@ out:
static struct vm_operations_struct ceph_vmops = {
.fault = ceph_filemap_fault,
.page_mkwrite = ceph_page_mkwrite,
- .remap_pages = generic_file_remap_pages,
};
int ceph_mmap(struct file *file, struct vm_area_struct *vma)
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 96b7e9b7706d..9f4d03954ecd 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3244,7 +3244,6 @@ static struct vm_operations_struct cifs_file_vm_ops = {
.fault = filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = cifs_page_mkwrite,
- .remap_pages = generic_file_remap_pages,
};
int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 86c893884eb9..281ee011bb6a 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -28,29 +28,6 @@
#include "coda_int.h"
-/* dir inode-ops */
-static int coda_create(struct inode *dir, struct dentry *new, umode_t mode, bool excl);
-static struct dentry *coda_lookup(struct inode *dir, struct dentry *target, unsigned int flags);
-static int coda_link(struct dentry *old_dentry, struct inode *dir_inode,
- struct dentry *entry);
-static int coda_unlink(struct inode *dir_inode, struct dentry *entry);
-static int coda_symlink(struct inode *dir_inode, struct dentry *entry,
- const char *symname);
-static int coda_mkdir(struct inode *dir_inode, struct dentry *entry, umode_t mode);
-static int coda_rmdir(struct inode *dir_inode, struct dentry *entry);
-static int coda_rename(struct inode *old_inode, struct dentry *old_dentry,
- struct inode *new_inode, struct dentry *new_dentry);
-
-/* dir file-ops */
-static int coda_readdir(struct file *file, struct dir_context *ctx);
-
-/* dentry ops */
-static int coda_dentry_revalidate(struct dentry *de, unsigned int flags);
-static int coda_dentry_delete(const struct dentry *);
-
-/* support routines */
-static int coda_venus_readdir(struct file *, struct dir_context *);
-
/* same as fs/bad_inode.c */
static int coda_return_EIO(void)
{
@@ -58,38 +35,6 @@ static int coda_return_EIO(void)
}
#define CODA_EIO_ERROR ((void *) (coda_return_EIO))
-const struct dentry_operations coda_dentry_operations =
-{
- .d_revalidate = coda_dentry_revalidate,
- .d_delete = coda_dentry_delete,
-};
-
-const struct inode_operations coda_dir_inode_operations =
-{
- .create = coda_create,
- .lookup = coda_lookup,
- .link = coda_link,
- .unlink = coda_unlink,
- .symlink = coda_symlink,
- .mkdir = coda_mkdir,
- .rmdir = coda_rmdir,
- .mknod = CODA_EIO_ERROR,
- .rename = coda_rename,
- .permission = coda_permission,
- .getattr = coda_getattr,
- .setattr = coda_setattr,
-};
-
-const struct file_operations coda_dir_operations = {
- .llseek = generic_file_llseek,
- .read = generic_read_dir,
- .iterate = coda_readdir,
- .open = coda_open,
- .release = coda_release,
- .fsync = coda_fsync,
-};
-
-
/* inode operations for directories */
/* access routines: lookup, readlink, permission */
static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, unsigned int flags)
@@ -374,33 +319,6 @@ static int coda_rename(struct inode *old_dir, struct dentry *old_dentry,
return error;
}
-
-/* file operations for directories */
-static int coda_readdir(struct file *coda_file, struct dir_context *ctx)
-{
- struct coda_file_info *cfi;
- struct file *host_file;
- int ret;
-
- cfi = CODA_FTOC(coda_file);
- BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
- host_file = cfi->cfi_container;
-
- if (host_file->f_op->iterate) {
- struct inode *host_inode = file_inode(host_file);
- mutex_lock(&host_inode->i_mutex);
- ret = -ENOENT;
- if (!IS_DEADDIR(host_inode)) {
- ret = host_file->f_op->iterate(host_file, ctx);
- file_accessed(host_file);
- }
- mutex_unlock(&host_inode->i_mutex);
- return ret;
- }
- /* Venus: we must read Venus dirents from a file */
- return coda_venus_readdir(coda_file, ctx);
-}
-
static inline unsigned int CDT2DT(unsigned char cdt)
{
unsigned int dt;
@@ -495,6 +413,33 @@ out:
return 0;
}
+/* file operations for directories */
+static int coda_readdir(struct file *coda_file, struct dir_context *ctx)
+{
+ struct coda_file_info *cfi;
+ struct file *host_file;
+ int ret;
+
+ cfi = CODA_FTOC(coda_file);
+ BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
+ host_file = cfi->cfi_container;
+
+ if (host_file->f_op->iterate) {
+ struct inode *host_inode = file_inode(host_file);
+
+ mutex_lock(&host_inode->i_mutex);
+ ret = -ENOENT;
+ if (!IS_DEADDIR(host_inode)) {
+ ret = host_file->f_op->iterate(host_file, ctx);
+ file_accessed(host_file);
+ }
+ mutex_unlock(&host_inode->i_mutex);
+ return ret;
+ }
+ /* Venus: we must read Venus dirents from a file */
+ return coda_venus_readdir(coda_file, ctx);
+}
+
/* called when a cache lookup succeeds */
static int coda_dentry_revalidate(struct dentry *de, unsigned int flags)
{
@@ -603,3 +548,32 @@ int coda_revalidate_inode(struct inode *inode)
}
return 0;
}
+
+const struct dentry_operations coda_dentry_operations = {
+ .d_revalidate = coda_dentry_revalidate,
+ .d_delete = coda_dentry_delete,
+};
+
+const struct inode_operations coda_dir_inode_operations = {
+ .create = coda_create,
+ .lookup = coda_lookup,
+ .link = coda_link,
+ .unlink = coda_unlink,
+ .symlink = coda_symlink,
+ .mkdir = coda_mkdir,
+ .rmdir = coda_rmdir,
+ .mknod = CODA_EIO_ERROR,
+ .rename = coda_rename,
+ .permission = coda_permission,
+ .getattr = coda_getattr,
+ .setattr = coda_setattr,
+};
+
+const struct file_operations coda_dir_operations = {
+ .llseek = generic_file_llseek,
+ .read = generic_read_dir,
+ .iterate = coda_readdir,
+ .open = coda_open,
+ .release = coda_release,
+ .fsync = coda_fsync,
+};
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 8131be8c0af3..7cb592386121 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -195,7 +195,6 @@ static const struct vm_operations_struct ext4_file_vm_ops = {
.fault = filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = ext4_page_mkwrite,
- .remap_pages = generic_file_remap_pages,
};
static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index a8bc47f75fa0..5b6e9f246233 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -107,7 +107,10 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
}
if (!journal) {
- ret = generic_file_fsync(file, start, end, datasync);
+ if (test_opt(inode->i_sb, BARRIER))
+ ret = generic_file_fsync(file, start, end, datasync);
+ else
+ ret = __generic_file_fsync(file, start, end, datasync);
if (!ret && !hlist_empty(&inode->i_dentry))
ret = ext4_sync_parent(inode);
goto out;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index f172ddc4ecdd..eee9450272d6 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -93,7 +93,6 @@ static const struct vm_operations_struct f2fs_file_vm_ops = {
.fault = filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = f2fs_vm_page_mkwrite,
- .remap_pages = generic_file_remap_pages,
};
static int get_parent_ino(struct inode *inode, nid_t *pino)
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 91ad9e1c9441..99312575636c 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -303,15 +303,59 @@ static int fat_bmap_cluster(struct inode *inode, int cluster)
return dclus;
}
-int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
- unsigned long *mapped_blocks, int create)
+int fat_get_mapped_cluster(struct inode *inode, sector_t sector,
+ sector_t last_block,
+ unsigned long *mapped_blocks, sector_t *bmap)
{
struct super_block *sb = inode->i_sb;
struct msdos_sb_info *sbi = MSDOS_SB(sb);
+ int cluster, offset;
+
+ cluster = sector >> (sbi->cluster_bits - sb->s_blocksize_bits);
+ offset = sector & (sbi->sec_per_clus - 1);
+ cluster = fat_bmap_cluster(inode, cluster);
+ if (cluster < 0)
+ return cluster;
+ else if (cluster) {
+ *bmap = fat_clus_to_blknr(sbi, cluster) + offset;
+ *mapped_blocks = sbi->sec_per_clus - offset;
+ if (*mapped_blocks > last_block - sector)
+ *mapped_blocks = last_block - sector;
+ }
+
+ return 0;
+}
+
+static int is_exceed_eof(struct inode *inode, sector_t sector,
+ sector_t *last_block, int create)
+{
+ struct super_block *sb = inode->i_sb;
const unsigned long blocksize = sb->s_blocksize;
const unsigned char blocksize_bits = sb->s_blocksize_bits;
+
+ *last_block = (i_size_read(inode) + (blocksize - 1)) >> blocksize_bits;
+ if (sector >= *last_block) {
+ if (!create)
+ return 1;
+
+ /*
+ * ->mmu_private can access on only allocation path.
+ * (caller must hold ->i_mutex)
+ */
+ *last_block = (MSDOS_I(inode)->mmu_private + (blocksize - 1))
+ >> blocksize_bits;
+ if (sector >= *last_block)
+ return 1;
+ }
+
+ return 0;
+}
+
+int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
+ unsigned long *mapped_blocks, int create, bool from_bmap)
+{
+ struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
sector_t last_block;
- int cluster, offset;
*phys = 0;
*mapped_blocks = 0;
@@ -323,31 +367,16 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
return 0;
}
- last_block = (i_size_read(inode) + (blocksize - 1)) >> blocksize_bits;
- if (sector >= last_block) {
- if (!create)
+ if (!from_bmap) {
+ if (is_exceed_eof(inode, sector, &last_block, create))
return 0;
-
- /*
- * ->mmu_private can access on only allocation path.
- * (caller must hold ->i_mutex)
- */
- last_block = (MSDOS_I(inode)->mmu_private + (blocksize - 1))
- >> blocksize_bits;
+ } else {
+ last_block = inode->i_blocks >>
+ (inode->i_sb->s_blocksize_bits - 9);
if (sector >= last_block)
return 0;
}
- cluster = sector >> (sbi->cluster_bits - sb->s_blocksize_bits);
- offset = sector & (sbi->sec_per_clus - 1);
- cluster = fat_bmap_cluster(inode, cluster);
- if (cluster < 0)
- return cluster;
- else if (cluster) {
- *phys = fat_clus_to_blknr(sbi, cluster) + offset;
- *mapped_blocks = sbi->sec_per_clus - offset;
- if (*mapped_blocks > last_block - sector)
- *mapped_blocks = last_block - sector;
- }
- return 0;
+ return fat_get_mapped_cluster(inode, sector, last_block, mapped_blocks,
+ phys);
}
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index c5d6bb939d19..da52bd1ad3e7 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -95,7 +95,7 @@ next:
*bh = NULL;
iblock = *pos >> sb->s_blocksize_bits;
- err = fat_bmap(dir, iblock, &phys, &mapped_blocks, 0);
+ err = fat_bmap(dir, iblock, &phys, &mapped_blocks, 0, false);
if (err || !phys)
return -1; /* beyond EOF or error */
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 64e295e8ff38..9217566b1042 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -288,8 +288,11 @@ static inline void fatwchar_to16(__u8 *dst, const wchar_t *src, size_t len)
extern void fat_cache_inval_inode(struct inode *inode);
extern int fat_get_cluster(struct inode *inode, int cluster,
int *fclus, int *dclus);
+extern int fat_get_mapped_cluster(struct inode *inode, sector_t sector,
+ sector_t last_block,
+ unsigned long *mapped_blocks, sector_t *bmap);
extern int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
- unsigned long *mapped_blocks, int create);
+ unsigned long *mapped_blocks, int create, bool from_bmap);
/* fat/dir.c */
extern const struct file_operations fat_dir_operations;
@@ -387,6 +390,7 @@ static inline unsigned long fat_dir_hash(int logstart)
{
return hash_32(logstart, FAT_HASH_BITS);
}
+extern int fat_add_cluster(struct inode *inode);
/* fat/misc.c */
extern __printf(3, 4) __cold
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 8429c68e3057..cebe7c90adb9 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -17,8 +17,12 @@
#include <linux/blkdev.h>
#include <linux/fsnotify.h>
#include <linux/security.h>
+#include <linux/falloc.h>
#include "fat.h"
+static long fat_fallocate(struct file *file, int mode,
+ loff_t offset, loff_t len);
+
static int fat_ioctl_get_attributes(struct inode *inode, u32 __user *user_attr)
{
u32 attr;
@@ -182,6 +186,7 @@ const struct file_operations fat_file_operations = {
#endif
.fsync = fat_file_fsync,
.splice_read = generic_file_splice_read,
+ .fallocate = fat_fallocate,
};
static int fat_cont_expand(struct inode *inode, loff_t size)
@@ -220,6 +225,62 @@ out:
return err;
}
+/*
+ * Preallocate space for a file. This implements fat's fallocate file
+ * operation, which gets called from sys_fallocate system call. User
+ * space requests len bytes at offset. If FALLOC_FL_KEEP_SIZE is set
+ * we just allocate clusters without zeroing them out. Otherwise we
+ * allocate and zero out clusters via an expanding truncate.
+ */
+static long fat_fallocate(struct file *file, int mode,
+ loff_t offset, loff_t len)
+{
+ int nr_cluster; /* Number of clusters to be allocated */
+ loff_t mm_bytes; /* Number of bytes to be allocated for file */
+ loff_t ondisksize; /* block aligned on-disk size in bytes*/
+ struct inode *inode = file->f_mapping->host;
+ struct super_block *sb = inode->i_sb;
+ struct msdos_sb_info *sbi = MSDOS_SB(sb);
+ int err = 0;
+
+ /* No support for hole punch or other fallocate flags. */
+ if (mode & ~FALLOC_FL_KEEP_SIZE)
+ return -EOPNOTSUPP;
+
+ /* No support for dir */
+ if (!S_ISREG(inode->i_mode))
+ return -EOPNOTSUPP;
+
+ mutex_lock(&inode->i_mutex);
+ if (mode & FALLOC_FL_KEEP_SIZE) {
+ ondisksize = inode->i_blocks << 9;
+ if ((offset + len) <= ondisksize)
+ goto error;
+
+ /* First compute the number of clusters to be allocated */
+ mm_bytes = offset + len - ondisksize;
+ nr_cluster = (mm_bytes + (sbi->cluster_size - 1)) >>
+ sbi->cluster_bits;
+
+ /* Start the allocation.We are not zeroing out the clusters */
+ while (nr_cluster-- > 0) {
+ err = fat_add_cluster(inode);
+ if (err)
+ goto error;
+ }
+ } else {
+ if ((offset + len) <= i_size_read(inode))
+ goto error;
+
+ /* This is just an expanding truncate */
+ err = fat_cont_expand(inode, (offset + len));
+ }
+
+error:
+ mutex_unlock(&inode->i_mutex);
+ return err;
+}
+
/* Free all clusters after the skip'th cluster. */
static int fat_free(struct inode *inode, int skip)
{
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 7b41a2dcdd76..df5803595eb9 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -101,7 +101,7 @@ static struct fat_floppy_defaults {
},
};
-static int fat_add_cluster(struct inode *inode)
+int fat_add_cluster(struct inode *inode)
{
int err, cluster;
@@ -123,10 +123,10 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock,
struct super_block *sb = inode->i_sb;
struct msdos_sb_info *sbi = MSDOS_SB(sb);
unsigned long mapped_blocks;
- sector_t phys;
+ sector_t phys, last_block;
int err, offset;
- err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create);
+ err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create, false);
if (err)
return err;
if (phys) {
@@ -143,8 +143,14 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock,
return -EIO;
}
+ last_block = inode->i_blocks >> (sb->s_blocksize_bits - 9);
offset = (unsigned long)iblock & (sbi->sec_per_clus - 1);
- if (!offset) {
+ /*
+ * allocate a cluster according to the following.
+ * 1) no more available blocks
+ * 2) not part of fallocate region
+ */
+ if (!offset && !(iblock < last_block)) {
/* TODO: multiple cluster allocation would be desirable. */
err = fat_add_cluster(inode);
if (err)
@@ -156,7 +162,7 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock,
*max_blocks = min(mapped_blocks, *max_blocks);
MSDOS_I(inode)->mmu_private += *max_blocks << sb->s_blocksize_bits;
- err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create);
+ err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create, false);
if (err)
return err;
@@ -282,13 +288,38 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
return ret;
}
+static int fat_get_block_bmap(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+{
+ struct super_block *sb = inode->i_sb;
+ unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;
+ int err;
+ sector_t bmap;
+ unsigned long mapped_blocks;
+
+ BUG_ON(create != 0);
+
+ err = fat_bmap(inode, iblock, &bmap, &mapped_blocks, create, true);
+ if (err)
+ return err;
+
+ if (bmap) {
+ map_bh(bh_result, sb, bmap);
+ max_blocks = min(mapped_blocks, max_blocks);
+ }
+
+ bh_result->b_size = max_blocks << sb->s_blocksize_bits;
+
+ return 0;
+}
+
static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
{
sector_t blocknr;
/* fat_get_cluster() assumes the requested blocknr isn't truncated. */
down_read(&MSDOS_I(mapping->host)->truncate_lock);
- blocknr = generic_block_bmap(mapping, block, fat_get_block);
+ blocknr = generic_block_bmap(mapping, block, fat_get_block_bmap);
up_read(&MSDOS_I(mapping->host)->truncate_lock);
return blocknr;
@@ -562,13 +593,43 @@ out:
EXPORT_SYMBOL_GPL(fat_build_inode);
+static int __fat_write_inode(struct inode *inode, int wait);
+
+static void fat_free_eofblocks(struct inode *inode)
+{
+ /* Release unwritten fallocated blocks on inode eviction. */
+ if ((inode->i_blocks << 9) >
+ round_up(MSDOS_I(inode)->mmu_private,
+ MSDOS_SB(inode->i_sb)->cluster_size)) {
+ int err;
+
+ fat_truncate_blocks(inode, MSDOS_I(inode)->mmu_private);
+ /* Fallocate results in updating the i_start/iogstart
+ * for the zero byte file. So, make it return to
+ * original state during evict and commit it to avoid
+ * any corruption on the next access to the cluster
+ * chain for the file.
+ */
+ err = __fat_write_inode(inode, inode_needs_sync(inode));
+ if (err) {
+ fat_msg(inode->i_sb, KERN_WARNING, "Failed to "
+ "update on disk inode for unused "
+ "fallocated blocks, inode could be "
+ "corrupted. Please run fsck");
+ }
+
+ }
+}
+
static void fat_evict_inode(struct inode *inode)
{
truncate_inode_pages_final(&inode->i_data);
if (!inode->i_nlink) {
inode->i_size = 0;
fat_truncate_blocks(inode, 0);
- }
+ } else
+ fat_free_eofblocks(inode);
+
invalidate_inode_buffers(inode);
clear_inode(inode);
fat_cache_inval_inode(inode);
@@ -580,7 +641,7 @@ static void fat_set_state(struct super_block *sb,
{
struct buffer_head *bh;
struct fat_boot_sector *b;
- struct msdos_sb_info *sbi = sb->s_fs_info;
+ struct msdos_sb_info *sbi = MSDOS_SB(sb);
/* do not change any thing if mounted read only */
if ((sb->s_flags & MS_RDONLY) && !force)
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 99d440a4a6ba..ee85cd4e136a 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -740,14 +740,15 @@ static int __init fcntl_init(void)
* Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
* is defined as O_NONBLOCK on some platforms and not on others.
*/
- BUILD_BUG_ON(20 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
+ BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
O_RDONLY | O_WRONLY | O_RDWR |
O_CREAT | O_EXCL | O_NOCTTY |
O_TRUNC | O_APPEND | /* O_NONBLOCK | */
__O_SYNC | O_DSYNC | FASYNC |
O_DIRECT | O_LARGEFILE | O_DIRECTORY |
O_NOFOLLOW | O_NOATIME | O_CLOEXEC |
- __FMODE_EXEC | O_PATH | __O_TMPFILE
+ __FMODE_EXEC | O_PATH | __O_TMPFILE |
+ __FMODE_NONOTIFY
));
fasync_cache = kmem_cache_create("fasync_cache",
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 760b2c552197..d769e594855b 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2062,7 +2062,6 @@ static const struct vm_operations_struct fuse_file_vm_ops = {
.fault = filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = fuse_page_mkwrite,
- .remap_pages = generic_file_remap_pages,
};
static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 6e600abf694a..ec9c2d33477a 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -498,7 +498,6 @@ static const struct vm_operations_struct gfs2_vm_ops = {
.fault = filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = gfs2_page_mkwrite,
- .remap_pages = generic_file_remap_pages,
};
/**
diff --git a/fs/inode.c b/fs/inode.c
index aa149e7262ac..c760fac33c92 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -355,7 +355,6 @@ void address_space_init_once(struct address_space *mapping)
INIT_LIST_HEAD(&mapping->private_list);
spin_lock_init(&mapping->private_lock);
mapping->i_mmap = RB_ROOT;
- INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
}
EXPORT_SYMBOL(address_space_init_once);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 214c3c11fbc2..5d01d2638ca5 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -379,6 +379,11 @@ int __generic_block_fiemap(struct inode *inode,
past_eof = true;
}
cond_resched();
+ if (fatal_signal_pending(current)) {
+ ret = -EINTR;
+ break;
+ }
+
} while (1);
/* If ret is 1 then we just hit the end of the extent array */
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 7654e87b0428..9ad5ba4b299b 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -510,6 +510,10 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo
sumlen = c->sector_size - je32_to_cpu(sm->offset);
sumptr = buf + buf_size - sumlen;
+ /* sm->offset maybe wrong but MAGIC maybe right */
+ if (sumlen > c->sector_size)
+ goto full_scan;
+
/* Now, make sure the summary itself is available */
if (sumlen > buf_size) {
/* Need to kmalloc for this. */
@@ -544,6 +548,7 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo
}
}
+full_scan:
buf_ofs = jeb->offset;
if (!buf_size) {
diff --git a/fs/mpage.c b/fs/mpage.c
index 3e79220babac..587c7ed4185d 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -482,6 +482,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
struct buffer_head map_bh;
loff_t i_size = i_size_read(inode);
int ret = 0;
+ int wr = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
if (page_has_buffers(page)) {
struct buffer_head *head = page_buffers(page);
@@ -590,7 +591,7 @@ page_is_mapped:
* This page will go to BIO. Do we need to send this BIO off first?
*/
if (bio && mpd->last_block_in_bio != blocks[0] - 1)
- bio = mpage_bio_submit(WRITE, bio);
+ bio = mpage_bio_submit(wr, bio);
alloc_new:
if (bio == NULL) {
@@ -614,7 +615,7 @@ alloc_new:
*/
length = first_unmapped << blkbits;
if (bio_add_page(bio, page, length, 0) < length) {
- bio = mpage_bio_submit(WRITE, bio);
+ bio = mpage_bio_submit(wr, bio);
goto alloc_new;
}
@@ -624,7 +625,7 @@ alloc_new:
set_page_writeback(page);
unlock_page(page);
if (boundary || (first_unmapped != blocks_per_page)) {
- bio = mpage_bio_submit(WRITE, bio);
+ bio = mpage_bio_submit(wr, bio);
if (boundary_block) {
write_boundary_block(boundary_bdev,
boundary_block, 1 << blkbits);
@@ -636,7 +637,7 @@ alloc_new:
confused:
if (bio)
- bio = mpage_bio_submit(WRITE, bio);
+ bio = mpage_bio_submit(wr, bio);
if (mpd->use_writepage) {
ret = mapping->a_ops->writepage(page, wbc);
@@ -692,8 +693,11 @@ mpage_writepages(struct address_space *mapping,
};
ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd);
- if (mpd.bio)
- mpage_bio_submit(WRITE, mpd.bio);
+ if (mpd.bio) {
+ int wr = (wbc->sync_mode == WB_SYNC_ALL ?
+ WRITE_SYNC : WRITE);
+ mpage_bio_submit(wr, mpd.bio);
+ }
}
blk_finish_plug(&plug);
return ret;
@@ -710,8 +714,11 @@ int mpage_writepage(struct page *page, get_block_t get_block,
.use_writepage = 0,
};
int ret = __mpage_writepage(page, wbc, &mpd);
- if (mpd.bio)
- mpage_bio_submit(WRITE, mpd.bio);
+ if (mpd.bio) {
+ int wr = (wbc->sync_mode == WB_SYNC_ALL ?
+ WRITE_SYNC : WRITE);
+ mpage_bio_submit(wr, mpd.bio);
+ }
return ret;
}
EXPORT_SYMBOL(mpage_writepage);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 2ab6f00dba5b..94712fc781fa 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -646,7 +646,6 @@ static const struct vm_operations_struct nfs_file_vm_ops = {
.fault = filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = nfs_vm_page_mkwrite,
- .remap_pages = generic_file_remap_pages,
};
static int nfs_need_sync_write(struct file *filp, struct inode *inode)
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 3a03e0aea1fb..a8c728acb7a8 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -128,7 +128,6 @@ static const struct vm_operations_struct nilfs_file_vm_ops = {
.fault = filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = nilfs_page_mkwrite,
- .remap_pages = generic_file_remap_pages,
};
static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma)
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 30d3addfad75..51ceb8107284 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -140,7 +140,7 @@ static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark,
}
if (S_ISDIR(path->dentry->d_inode->i_mode) &&
- (marks_ignored_mask & FS_ISDIR))
+ !(marks_mask & FS_ISDIR & ~marks_ignored_mask))
return false;
if (event_mask & marks_mask & ~marks_ignored_mask)
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index c991616acca9..81431ee1f757 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -487,20 +487,27 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark,
unsigned int flags,
int *destroy)
{
- __u32 oldmask;
+ __u32 oldmask = 0;
spin_lock(&fsn_mark->lock);
if (!(flags & FAN_MARK_IGNORED_MASK)) {
+ __u32 tmask = fsn_mark->mask & ~mask;
+
+ if (flags & FAN_MARK_ONDIR)
+ tmask &= ~FAN_ONDIR;
+
oldmask = fsn_mark->mask;
- fsnotify_set_mark_mask_locked(fsn_mark, (oldmask & ~mask));
+ fsnotify_set_mark_mask_locked(fsn_mark, tmask);
} else {
- oldmask = fsn_mark->ignored_mask;
- fsnotify_set_mark_ignored_mask_locked(fsn_mark, (oldmask & ~mask));
+ __u32 tmask = fsn_mark->ignored_mask & ~mask;
+ if (flags & FAN_MARK_ONDIR)
+ tmask &= ~FAN_ONDIR;
+
+ fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask);
}
+ *destroy = !(fsn_mark->mask | fsn_mark->ignored_mask);
spin_unlock(&fsn_mark->lock);
- *destroy = !(oldmask & ~mask);
-
return mask & oldmask;
}
@@ -569,20 +576,22 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
spin_lock(&fsn_mark->lock);
if (!(flags & FAN_MARK_IGNORED_MASK)) {
+ __u32 tmask = fsn_mark->mask | mask;
+
+ if (flags & FAN_MARK_ONDIR)
+ tmask |= FAN_ONDIR;
+
oldmask = fsn_mark->mask;
- fsnotify_set_mark_mask_locked(fsn_mark, (oldmask | mask));
+ fsnotify_set_mark_mask_locked(fsn_mark, tmask);
} else {
__u32 tmask = fsn_mark->ignored_mask | mask;
+ if (flags & FAN_MARK_ONDIR)
+ tmask |= FAN_ONDIR;
+
fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask);
if (flags & FAN_MARK_IGNORED_SURV_MODIFY)
fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY;
}
-
- if (!(flags & FAN_MARK_ONDIR)) {
- __u32 tmask = fsn_mark->ignored_mask | FAN_ONDIR;
- fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask);
- }
-
spin_unlock(&fsn_mark->lock);
return mask & ~oldmask;
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index 7e8282dcea2a..c58a1bcfda0f 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -245,16 +245,14 @@ int ocfs2_set_acl(handle_t *handle,
ret = posix_acl_equiv_mode(acl, &mode);
if (ret < 0)
return ret;
- else {
- if (ret == 0)
- acl = NULL;
- ret = ocfs2_acl_set_mode(inode, di_bh,
- handle, mode);
- if (ret)
- return ret;
+ if (ret == 0)
+ acl = NULL;
- }
+ ret = ocfs2_acl_set_mode(inode, di_bh,
+ handle, mode);
+ if (ret)
+ return ret;
}
break;
case ACL_TYPE_DEFAULT:
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 46d93e941f3d..89f43dc7e440 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -28,6 +28,7 @@
#include <linux/pipe_fs_i.h>
#include <linux/mpage.h>
#include <linux/quotaops.h>
+#include <linux/blkdev.h>
#include <cluster/masklog.h>
@@ -47,6 +48,9 @@
#include "ocfs2_trace.h"
#include "buffer_head_io.h"
+#include "dir.h"
+#include "namei.h"
+#include "sysfile.h"
static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
@@ -506,18 +510,21 @@ bail:
*
* called like this: dio->get_blocks(dio->inode, fs_startblk,
* fs_count, map_bh, dio->rw == WRITE);
- *
- * Note that we never bother to allocate blocks here, and thus ignore the
- * create argument.
*/
static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
int ret;
+ u32 cpos = 0;
+ int alloc_locked = 0;
u64 p_blkno, inode_blocks, contig_blocks;
unsigned int ext_flags;
unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits;
unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;
+ unsigned long len = bh_result->b_size;
+ unsigned int clusters_to_alloc = 0;
+
+ cpos = ocfs2_blocks_to_clusters(inode->i_sb, iblock);
/* This function won't even be called if the request isn't all
* nicely aligned and of the right size, so there's no need
@@ -539,6 +546,40 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
/* We should already CoW the refcounted extent in case of create. */
BUG_ON(create && (ext_flags & OCFS2_EXT_REFCOUNTED));
+ /* allocate blocks if no p_blkno is found, and create == 1 */
+ if (!p_blkno && create) {
+ ret = ocfs2_inode_lock(inode, NULL, 1);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto bail;
+ }
+
+ alloc_locked = 1;
+
+ /* fill hole, allocate blocks can't be larger than the size
+ * of the hole */
+ clusters_to_alloc = ocfs2_clusters_for_bytes(inode->i_sb, len);
+ if (clusters_to_alloc > contig_blocks)
+ clusters_to_alloc = contig_blocks;
+
+ /* allocate extent and insert them into the extent tree */
+ ret = ocfs2_extend_allocation(inode, cpos,
+ clusters_to_alloc, 0);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto bail;
+ }
+
+ ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno,
+ &contig_blocks, &ext_flags);
+ if (ret < 0) {
+ mlog(ML_ERROR, "get_blocks() failed iblock=%llu\n",
+ (unsigned long long)iblock);
+ ret = -EIO;
+ goto bail;
+ }
+ }
+
/*
* get_more_blocks() expects us to describe a hole by clearing
* the mapped bit on bh_result().
@@ -556,6 +597,8 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
contig_blocks = max_blocks;
bh_result->b_size = contig_blocks << blocksize_bits;
bail:
+ if (alloc_locked)
+ ocfs2_inode_unlock(inode, 1);
return ret;
}
@@ -597,6 +640,180 @@ static int ocfs2_releasepage(struct page *page, gfp_t wait)
return try_to_free_buffers(page);
}
+static int ocfs2_is_overwrite(struct ocfs2_super *osb,
+ struct inode *inode , loff_t offset)
+{
+ int ret = 0;
+ u32 v_cpos = 0;
+ u32 p_cpos = 0;
+ unsigned int num_clusters = 0;
+ unsigned int ext_flags = 0;
+
+ v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset);
+ ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos,
+ &num_clusters, &ext_flags);
+ if (ret < 0) {
+ mlog_errno(ret);
+ return ret;
+ }
+
+ if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN))
+ return 1;
+
+ return 0;
+}
+
+static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
+ struct iov_iter *iter,
+ loff_t offset)
+{
+ ssize_t ret = 0;
+ ssize_t written = 0;
+ bool orphaned = false;
+ int is_overwrite = 0;
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file_inode(file)->i_mapping->host;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct buffer_head *di_bh = NULL;
+ size_t count = iter->count;
+ journal_t *journal = osb->journal->j_journal;
+ u32 zero_len;
+ int cluster_align;
+ loff_t final_size = offset + count;
+ int append_write = offset >= i_size_read(inode) ? 1 : 0;
+ unsigned int num_clusters = 0;
+ unsigned int ext_flags = 0;
+
+ {
+ u64 o = offset;
+
+ zero_len = do_div(o, 1 << osb->s_clustersize_bits);
+ cluster_align = !!zero_len;
+ }
+
+ /*
+ * when final_size > inode->i_size, inode->i_size will be
+ * updated after direct write, so add the inode to orphan
+ * dir first.
+ */
+ if (final_size > i_size_read(inode)) {
+ ret = ocfs2_add_inode_to_orphan(osb, inode);
+ if (ret < 0)
+ goto out;
+ orphaned = true;
+ }
+
+ if (append_write) {
+ ret = ocfs2_inode_lock(inode, &di_bh, 1);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto clean_orphan;
+ }
+
+ if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
+ ret = ocfs2_zero_extend(inode, di_bh, offset);
+ else
+ ret = ocfs2_extend_no_holes(inode, di_bh, offset, offset);
+ if (ret < 0) {
+ mlog_errno(ret);
+ ocfs2_inode_unlock(inode, 1);
+ brelse(di_bh);
+ goto clean_orphan;
+ }
+
+ is_overwrite = ocfs2_is_overwrite(osb, inode, offset);
+ if (is_overwrite < 0) {
+ mlog_errno(is_overwrite);
+ ocfs2_inode_unlock(inode, 1);
+ brelse(di_bh);
+ goto clean_orphan;
+ }
+
+ ocfs2_inode_unlock(inode, 1);
+ brelse(di_bh);
+ di_bh = NULL;
+ }
+
+ written = __blockdev_direct_IO(WRITE, iocb, inode, inode->i_sb->s_bdev,
+ iter, offset,
+ ocfs2_direct_IO_get_blocks,
+ ocfs2_dio_end_io, NULL, 0);
+ if (unlikely(written < 0)) {
+ loff_t i_size = i_size_read(inode);
+
+ if (offset + count > i_size) {
+ ret = ocfs2_inode_lock(inode, &di_bh, 1);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto clean_orphan;
+ }
+
+ if (i_size == i_size_read(inode)) {
+ ret = ocfs2_truncate_file(inode, di_bh,
+ i_size);
+ if (ret < 0) {
+ if (ret != -ENOSPC)
+ mlog_errno(ret);
+
+ ocfs2_inode_unlock(inode, 1);
+ brelse(di_bh);
+ goto clean_orphan;
+ }
+ }
+
+ ocfs2_inode_unlock(inode, 1);
+ brelse(di_bh);
+
+ ret = jbd2_journal_force_commit(journal);
+ if (ret < 0)
+ mlog_errno(ret);
+ }
+ } else if (append_write && !is_overwrite && !cluster_align) {
+ u32 p_cpos = 0;
+ u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset);
+
+ ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos,
+ &num_clusters, &ext_flags);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto clean_orphan;
+ }
+
+ BUG_ON(!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN));
+
+ ret = blkdev_issue_zeroout(osb->sb->s_bdev,
+ p_cpos << (osb->s_clustersize_bits - 9),
+ zero_len >> 9,
+ GFP_KERNEL);
+ if (ret < 0)
+ mlog_errno(ret);
+ }
+
+clean_orphan:
+ if (orphaned) {
+ int tmp_ret;
+ int update_isize = written > 0 ? 1 : 0;
+ loff_t end = update_isize ? offset + written : 0;
+ tmp_ret = ocfs2_del_inode_from_orphan(osb, inode,
+ update_isize, end);
+ if (tmp_ret < 0) {
+ ret = tmp_ret;
+ goto out;
+ }
+
+ tmp_ret = jbd2_journal_force_commit(journal);
+ if (tmp_ret < 0) {
+ ret = tmp_ret;
+ mlog_errno(tmp_ret);
+ }
+ }
+
+out:
+ if (ret >= 0)
+ ret = written;
+ return ret;
+}
+
static ssize_t ocfs2_direct_IO(int rw,
struct kiocb *iocb,
struct iov_iter *iter,
@@ -604,6 +821,9 @@ static ssize_t ocfs2_direct_IO(int rw,
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file)->i_mapping->host;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ int full_coherency = !(osb->s_mount_opt &
+ OCFS2_MOUNT_COHERENCY_BUFFERED);
/*
* Fallback to buffered I/O if we see an inode without
@@ -612,14 +832,19 @@ static ssize_t ocfs2_direct_IO(int rw,
if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
return 0;
- /* Fallback to buffered I/O if we are appending. */
- if (i_size_read(inode) <= offset)
+ /* Fallback to buffered I/O if we are appending and
+ * concurrent O_DIRECT writes are allowed.
+ */
+ if (i_size_read(inode) <= offset && !full_coherency)
return 0;
- return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
+ if (rw == READ)
+ return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
iter, offset,
ocfs2_direct_IO_get_blocks,
ocfs2_dio_end_io, NULL, 0);
+ else
+ return ocfs2_direct_IO_write(iocb, iter, offset);
}
static void ocfs2_figure_cluster_boundaries(struct ocfs2_super *osb,
@@ -1822,16 +2047,6 @@ try_again:
if (ret)
goto out_commit;
}
- /*
- * We don't want this to fail in ocfs2_write_end(), so do it
- * here.
- */
- ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
- if (ret) {
- mlog_errno(ret);
- goto out_quota;
- }
/*
* Fill our page array first. That way we've grabbed enough so
@@ -1982,7 +2197,7 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
- int i;
+ int i, ret;
unsigned from, to, start = pos & (PAGE_CACHE_SIZE - 1);
struct inode *inode = mapping->host;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
@@ -2032,6 +2247,14 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
}
}
+ ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret) {
+ copied = ret;
+ mlog_errno(ret);
+ goto out;
+ }
+
out_write_size:
pos += copied;
if (pos > i_size_read(inode)) {
@@ -2053,6 +2276,7 @@ out_write_size:
*/
ocfs2_unlock_pages(wc);
+out:
ocfs2_commit_trans(osb, handle);
ocfs2_run_deallocs(osb, &wc->w_dealloc);
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index dc024367110a..b95e7df5b76a 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -107,12 +107,12 @@ struct o2net_node {
struct list_head nn_status_list;
/* connects are attempted from when heartbeat comes up until either hb
- * goes down, the node is unconfigured, no connect attempts succeed
- * before O2NET_CONN_IDLE_DELAY, or a connect succeeds. connect_work
- * is queued from set_nn_state both from hb up and from itself if a
- * connect attempt fails and so can be self-arming. shutdown is
- * careful to first mark the nn such that no connects will be attempted
- * before canceling delayed connect work and flushing the queue. */
+ * goes down, the node is unconfigured, or a connect succeeds.
+ * connect_work is queued from set_nn_state both from hb up and from
+ * itself if a connect attempt fails and so can be self-arming.
+ * shutdown is careful to first mark the nn such that no connects will
+ * be attempted before canceling delayed connect work and flushing the
+ * queue. */
struct delayed_work nn_connect_work;
unsigned long nn_last_connect_attempt;
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index b46278f9ae44..fd6bbbbd7d78 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -385,8 +385,12 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
head = &res->granted;
list_for_each_entry(lock, head, list) {
- if (lock->ml.cookie == cookie)
+ /* if lock is found but unlock is pending ignore the bast */
+ if (lock->ml.cookie == cookie) {
+ if (lock->unlock_pending)
+ break;
goto do_ast;
+ }
}
mlog(0, "Got %sast for unknown lock! cookie=%u:%llu, name=%.*s, "
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index a6944b25fd5b..b26b476e1f06 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -498,16 +498,6 @@ static void dlm_lockres_release(struct kref *kref)
mlog(0, "destroying lockres %.*s\n", res->lockname.len,
res->lockname.name);
- spin_lock(&dlm->track_lock);
- if (!list_empty(&res->tracking))
- list_del_init(&res->tracking);
- else {
- mlog(ML_ERROR, "Resource %.*s not on the Tracking list\n",
- res->lockname.len, res->lockname.name);
- dlm_print_one_lock_resource(res);
- }
- spin_unlock(&dlm->track_lock);
-
atomic_dec(&dlm->res_cur_count);
if (!hlist_unhashed(&res->hash_node) ||
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 79b5af5e6a7b..ce12e0b1a31f 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1070,6 +1070,9 @@ static void dlm_move_reco_locks_to_list(struct dlm_ctxt *dlm,
dead_node, dlm->name);
list_del_init(&lock->list);
dlm_lock_put(lock);
+ /* Can't schedule DLM_UNLOCK_FREE_LOCK
+ * - do manually */
+ dlm_lock_put(lock);
break;
}
}
@@ -2023,11 +2026,8 @@ leave:
dlm_lockres_drop_inflight_ref(dlm, res);
spin_unlock(&res->spinlock);
- if (ret < 0) {
+ if (ret < 0)
mlog_errno(ret);
- if (newlock)
- dlm_lock_put(newlock);
- }
return ret;
}
@@ -2349,6 +2349,10 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
dead_node, dlm->name);
list_del_init(&lock->list);
dlm_lock_put(lock);
+ /* Can't schedule
+ * DLM_UNLOCK_FREE_LOCK
+ * - do manually */
+ dlm_lock_put(lock);
break;
}
}
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 69aac6f088ad..2e5e6d5fffe8 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -211,6 +211,16 @@ static void dlm_purge_lockres(struct dlm_ctxt *dlm,
__dlm_unhash_lockres(dlm, res);
+ spin_lock(&dlm->track_lock);
+ if (!list_empty(&res->tracking))
+ list_del_init(&res->tracking);
+ else {
+ mlog(ML_ERROR, "Resource %.*s not on the Tracking list\n",
+ res->lockname.len, res->lockname.name);
+ __dlm_print_one_lock_resource(res);
+ }
+ spin_unlock(&dlm->track_lock);
+
/* lockres is not in the hash now. drop the flag and wake up
* any processes waiting in dlm_get_lock_resource. */
if (!master) {
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 3950693dd0f6..5b8954d9d3c4 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -295,7 +295,7 @@ out:
return ret;
}
-static int ocfs2_set_inode_size(handle_t *handle,
+int ocfs2_set_inode_size(handle_t *handle,
struct inode *inode,
struct buffer_head *fe_bh,
u64 new_i_size)
@@ -441,7 +441,7 @@ out:
return status;
}
-static int ocfs2_truncate_file(struct inode *inode,
+int ocfs2_truncate_file(struct inode *inode,
struct buffer_head *di_bh,
u64 new_i_size)
{
@@ -569,7 +569,7 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
handle_t *handle = NULL;
struct ocfs2_alloc_context *data_ac = NULL;
struct ocfs2_alloc_context *meta_ac = NULL;
- enum ocfs2_alloc_restarted why;
+ enum ocfs2_alloc_restarted why = RESTART_NONE;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_extent_tree et;
int did_quota = 0;
@@ -709,6 +709,13 @@ leave:
return status;
}
+int ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
+ u32 clusters_to_add, int mark_unwritten)
+{
+ return __ocfs2_extend_allocation(inode, logical_start,
+ clusters_to_add, mark_unwritten);
+}
+
/*
* While a write will already be ordering the data, a truncate will not.
* Thus, we need to explicitly order the zeroed pages.
@@ -1352,44 +1359,6 @@ out:
return ret;
}
-/*
- * Will look for holes and unwritten extents in the range starting at
- * pos for count bytes (inclusive).
- */
-static int ocfs2_check_range_for_holes(struct inode *inode, loff_t pos,
- size_t count)
-{
- int ret = 0;
- unsigned int extent_flags;
- u32 cpos, clusters, extent_len, phys_cpos;
- struct super_block *sb = inode->i_sb;
-
- cpos = pos >> OCFS2_SB(sb)->s_clustersize_bits;
- clusters = ocfs2_clusters_for_bytes(sb, pos + count) - cpos;
-
- while (clusters) {
- ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &extent_len,
- &extent_flags);
- if (ret < 0) {
- mlog_errno(ret);
- goto out;
- }
-
- if (phys_cpos == 0 || (extent_flags & OCFS2_EXT_UNWRITTEN)) {
- ret = 1;
- break;
- }
-
- if (extent_len > clusters)
- extent_len = clusters;
-
- clusters -= extent_len;
- cpos += extent_len;
- }
-out:
- return ret;
-}
-
static int ocfs2_write_remove_suid(struct inode *inode)
{
int ret;
@@ -2109,6 +2078,9 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
struct dentry *dentry = file->f_path.dentry;
struct inode *inode = dentry->d_inode;
loff_t saved_pos = 0, end;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ int full_coherency = !(osb->s_mount_opt &
+ OCFS2_MOUNT_COHERENCY_BUFFERED);
/*
* We start with a read level meta lock and only jump to an ex
@@ -2197,23 +2169,11 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
* one node could wind up truncating another
* nodes writes.
*/
- if (end > i_size_read(inode)) {
+ if (end > i_size_read(inode) && !full_coherency) {
*direct_io = 0;
break;
}
- /*
- * We don't fill holes during direct io, so
- * check for them here. If any are found, the
- * caller will have to retake some cluster
- * locks and initiate the io as buffered.
- */
- ret = ocfs2_check_range_for_holes(inode, saved_pos, count);
- if (ret == 1) {
- *direct_io = 0;
- ret = 0;
- } else if (ret < 0)
- mlog_errno(ret);
break;
}
@@ -2243,6 +2203,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
u32 old_clusters;
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
+ struct address_space *mapping = file->f_mapping;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
int full_coherency = !(osb->s_mount_opt &
OCFS2_MOUNT_COHERENCY_BUFFERED);
@@ -2357,11 +2318,50 @@ relock:
iov_iter_truncate(from, count);
if (direct_io) {
+ loff_t endbyte;
+ ssize_t written_buffered;
written = generic_file_direct_write(iocb, from, *ppos);
- if (written < 0) {
+ if (written < 0 || written == count) {
ret = written;
goto out_dio;
}
+
+ /*
+ * direct-io write to a hole: fall through to buffered I/O
+ * for completing the rest of the request.
+ */
+ count -= written;
+ written_buffered = generic_perform_write(file, from, *ppos);
+ /*
+ * If generic_file_buffered_write() returned a synchronous error
+ * then we want to return the number of bytes which were
+ * direct-written, or the error code if that was zero. Note
+ * that this differs from normal direct-io semantics, which
+ * will return -EFOO even if some bytes were written.
+ */
+ if (written_buffered < 0) {
+ ret = written_buffered;
+ goto out;
+ }
+
+ /* We need to ensure that the page cache pages are written to
+ * disk and invalidated to preserve the expected O_DIRECT
+ * semantics.
+ */
+ endbyte = *ppos + written_buffered - written - 1;
+ ret = filemap_write_and_wait_range(file->f_mapping, *ppos,
+ endbyte);
+ if (ret == 0) {
+ written = written_buffered;
+ invalidate_mapping_pages(mapping,
+ *ppos >> PAGE_CACHE_SHIFT,
+ endbyte >> PAGE_CACHE_SHIFT);
+ } else {
+ /*
+ * We don't know how much we wrote, so just return
+ * the number of bytes which were direct-written
+ */
+ }
} else {
current->backing_dev_info = file->f_mapping->backing_dev_info;
written = generic_perform_write(file, from, *ppos);
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 97bf761c9e7c..e8c62f22215c 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -51,13 +51,22 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb,
struct ocfs2_alloc_context *data_ac,
struct ocfs2_alloc_context *meta_ac,
enum ocfs2_alloc_restarted *reason_ret);
+int ocfs2_set_inode_size(handle_t *handle,
+ struct inode *inode,
+ struct buffer_head *fe_bh,
+ u64 new_i_size);
int ocfs2_simple_size_update(struct inode *inode,
struct buffer_head *di_bh,
u64 new_i_size);
+int ocfs2_truncate_file(struct inode *inode,
+ struct buffer_head *di_bh,
+ u64 new_i_size);
int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
u64 new_i_size, u64 zero_to);
int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
loff_t zero_to);
+int ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
+ u32 clusters_to_add, int mark_unwritten);
int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat);
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index ca3431ee7f24..5e86b247c821 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -81,6 +81,8 @@ struct ocfs2_inode_info
tid_t i_sync_tid;
tid_t i_datasync_tid;
+ wait_queue_head_t append_dio_wq;
+
struct dquot *i_dquot[MAXQUOTAS];
};
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 4f502382180f..e238bbc27a3c 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -50,6 +50,8 @@
#include "sysfile.h"
#include "uptodate.h"
#include "quota.h"
+#include "file.h"
+#include "namei.h"
#include "buffer_head_io.h"
#include "ocfs2_trace.h"
@@ -69,13 +71,15 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
static int ocfs2_trylock_journal(struct ocfs2_super *osb,
int slot_num);
static int ocfs2_recover_orphans(struct ocfs2_super *osb,
- int slot);
+ int slot,
+ enum ocfs2_orphan_reco_type orphan_reco_type);
static int ocfs2_commit_thread(void *arg);
static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
int slot_num,
struct ocfs2_dinode *la_dinode,
struct ocfs2_dinode *tl_dinode,
- struct ocfs2_quota_recovery *qrec);
+ struct ocfs2_quota_recovery *qrec,
+ enum ocfs2_orphan_reco_type orphan_reco_type);
static inline int ocfs2_wait_on_mount(struct ocfs2_super *osb)
{
@@ -149,7 +153,8 @@ int ocfs2_compute_replay_slots(struct ocfs2_super *osb)
return 0;
}
-void ocfs2_queue_replay_slots(struct ocfs2_super *osb)
+void ocfs2_queue_replay_slots(struct ocfs2_super *osb,
+ enum ocfs2_orphan_reco_type orphan_reco_type)
{
struct ocfs2_replay_map *replay_map = osb->replay_map;
int i;
@@ -163,7 +168,8 @@ void ocfs2_queue_replay_slots(struct ocfs2_super *osb)
for (i = 0; i < replay_map->rm_slots; i++)
if (replay_map->rm_replay_slots[i])
ocfs2_queue_recovery_completion(osb->journal, i, NULL,
- NULL, NULL);
+ NULL, NULL,
+ orphan_reco_type);
replay_map->rm_state = REPLAY_DONE;
}
@@ -1174,6 +1180,7 @@ struct ocfs2_la_recovery_item {
struct ocfs2_dinode *lri_la_dinode;
struct ocfs2_dinode *lri_tl_dinode;
struct ocfs2_quota_recovery *lri_qrec;
+ enum ocfs2_orphan_reco_type lri_orphan_reco_type;
};
/* Does the second half of the recovery process. By this point, the
@@ -1195,6 +1202,7 @@ void ocfs2_complete_recovery(struct work_struct *work)
struct ocfs2_dinode *la_dinode, *tl_dinode;
struct ocfs2_la_recovery_item *item, *n;
struct ocfs2_quota_recovery *qrec;
+ enum ocfs2_orphan_reco_type orphan_reco_type;
LIST_HEAD(tmp_la_list);
trace_ocfs2_complete_recovery(
@@ -1212,6 +1220,7 @@ void ocfs2_complete_recovery(struct work_struct *work)
la_dinode = item->lri_la_dinode;
tl_dinode = item->lri_tl_dinode;
qrec = item->lri_qrec;
+ orphan_reco_type = item->lri_orphan_reco_type;
trace_ocfs2_complete_recovery_slot(item->lri_slot,
la_dinode ? le64_to_cpu(la_dinode->i_blkno) : 0,
@@ -1236,7 +1245,8 @@ void ocfs2_complete_recovery(struct work_struct *work)
kfree(tl_dinode);
}
- ret = ocfs2_recover_orphans(osb, item->lri_slot);
+ ret = ocfs2_recover_orphans(osb, item->lri_slot,
+ orphan_reco_type);
if (ret < 0)
mlog_errno(ret);
@@ -1261,7 +1271,8 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
int slot_num,
struct ocfs2_dinode *la_dinode,
struct ocfs2_dinode *tl_dinode,
- struct ocfs2_quota_recovery *qrec)
+ struct ocfs2_quota_recovery *qrec,
+ enum ocfs2_orphan_reco_type orphan_reco_type)
{
struct ocfs2_la_recovery_item *item;
@@ -1285,6 +1296,7 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
item->lri_slot = slot_num;
item->lri_tl_dinode = tl_dinode;
item->lri_qrec = qrec;
+ item->lri_orphan_reco_type = orphan_reco_type;
spin_lock(&journal->j_lock);
list_add_tail(&item->lri_list, &journal->j_la_cleanups);
@@ -1304,7 +1316,8 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
/* No need to queue up our truncate_log as regular cleanup will catch
* that */
ocfs2_queue_recovery_completion(journal, osb->slot_num,
- osb->local_alloc_copy, NULL, NULL);
+ osb->local_alloc_copy, NULL, NULL,
+ ORPHAN_NEED_TRUNCATE);
ocfs2_schedule_truncate_log_flush(osb, 0);
osb->local_alloc_copy = NULL;
@@ -1312,7 +1325,7 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
/* queue to recover orphan slots for all offline slots */
ocfs2_replay_map_set_state(osb, REPLAY_NEEDED);
- ocfs2_queue_replay_slots(osb);
+ ocfs2_queue_replay_slots(osb, ORPHAN_NEED_TRUNCATE);
ocfs2_free_replay_slots(osb);
}
@@ -1323,7 +1336,8 @@ void ocfs2_complete_quota_recovery(struct ocfs2_super *osb)
osb->slot_num,
NULL,
NULL,
- osb->quota_rec);
+ osb->quota_rec,
+ ORPHAN_NEED_TRUNCATE);
osb->quota_rec = NULL;
}
}
@@ -1360,7 +1374,7 @@ restart:
/* queue recovery for our own slot */
ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,
- NULL, NULL);
+ NULL, NULL, ORPHAN_NO_NEED_TRUNCATE);
spin_lock(&osb->osb_lock);
while (rm->rm_used) {
@@ -1419,13 +1433,14 @@ skip_recovery:
continue;
}
ocfs2_queue_recovery_completion(osb->journal, rm_quota[i],
- NULL, NULL, qrec);
+ NULL, NULL, qrec,
+ ORPHAN_NEED_TRUNCATE);
}
ocfs2_super_unlock(osb, 1);
/* queue recovery for offline slots */
- ocfs2_queue_replay_slots(osb);
+ ocfs2_queue_replay_slots(osb, ORPHAN_NEED_TRUNCATE);
bail:
mutex_lock(&osb->recovery_lock);
@@ -1712,7 +1727,7 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
/* This will kfree the memory pointed to by la_copy and tl_copy */
ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy,
- tl_copy, NULL);
+ tl_copy, NULL, ORPHAN_NEED_TRUNCATE);
status = 0;
done:
@@ -1902,7 +1917,7 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
for (i = 0; i < osb->max_slots; i++)
ocfs2_queue_recovery_completion(osb->journal, i, NULL, NULL,
- NULL);
+ NULL, ORPHAN_NO_NEED_TRUNCATE);
/*
* We queued a recovery on orphan slots, increment the sequence
* number and update LVB so other node will skip the scan for a while
@@ -2090,6 +2105,39 @@ static void ocfs2_clear_recovering_orphan_dir(struct ocfs2_super *osb,
ocfs2_node_map_clear_bit(osb, &osb->osb_recovering_orphan_dirs, slot);
}
+static int ocfs2_truncate_file_locked(struct inode *inode)
+{
+ struct buffer_head *di_bh = NULL;
+ int ret;
+
+ ret = ocfs2_rw_lock(inode, 1);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ret = ocfs2_inode_lock(inode, &di_bh, 1);
+ if (ret < 0) {
+ ocfs2_rw_unlock(inode, 1);
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ret = ocfs2_truncate_file(inode, di_bh, i_size_read(inode));
+ if (ret < 0) {
+ if (ret != -ENOSPC)
+ mlog_errno(ret);
+ ret = -ENOSPC;
+ }
+
+ ocfs2_inode_unlock(inode, 1);
+ ocfs2_rw_unlock(inode, 1);
+ brelse(di_bh);
+
+out:
+ return ret;
+}
+
/*
* Orphan recovery. Each mounted node has it's own orphan dir which we
* must run during recovery. Our strategy here is to build a list of
@@ -2109,7 +2157,8 @@ static void ocfs2_clear_recovering_orphan_dir(struct ocfs2_super *osb,
* advertising our state to ocfs2_delete_inode().
*/
static int ocfs2_recover_orphans(struct ocfs2_super *osb,
- int slot)
+ int slot,
+ enum ocfs2_orphan_reco_type orphan_reco_type)
{
int ret = 0;
struct inode *inode = NULL;
@@ -2134,12 +2183,38 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
iter = oi->ip_next_orphan;
- spin_lock(&oi->ip_lock);
- /* Set the proper information to get us going into
- * ocfs2_delete_inode. */
- oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
- spin_unlock(&oi->ip_lock);
+ /*
+ * We need to take and drop the inode lock to
+ * force read inode from disk.
+ */
+ ret = ocfs2_inode_lock(inode, NULL, 0);
+ if (ret) {
+ mlog_errno(ret);
+ goto next;
+ }
+ ocfs2_inode_unlock(inode, 0);
+
+ if (inode->i_nlink == 0) {
+ spin_lock(&oi->ip_lock);
+ /* Set the proper information to get us going into
+ * ocfs2_delete_inode. */
+ oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
+ spin_unlock(&oi->ip_lock);
+ } else if (orphan_reco_type == ORPHAN_NEED_TRUNCATE) {
+ ret = ocfs2_truncate_file_locked(inode);
+ if (ret) {
+ mlog_errno(ret);
+ goto next;
+ }
+
+ ret = ocfs2_del_inode_from_orphan(osb, inode, 0, 0);
+ if (ret)
+ mlog_errno(ret);
+
+ wake_up(&OCFS2_I(inode)->append_dio_wq);
+ } /* else if ORPHAN_NO_NEED_TRUNCATE, do nothing */
+next:
iput(inode);
inode = iter;
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 7f8cde94abfe..f4cd3c3e9fb7 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -472,6 +472,11 @@ static inline int ocfs2_unlink_credits(struct super_block *sb)
* orphan dir index leaf */
#define OCFS2_DELETE_INODE_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 4)
+/* dinode + orphan dir dinode + extent tree leaf block + orphan dir entry +
+ * orphan dir index root + orphan dir index leaf */
+#define OCFS2_INODE_ADD_TO_ORPHAN_CREDITS (2 * OCFS2_INODE_UPDATE_CREDITS + 4)
+#define OCFS2_INODE_DEL_FROM_ORPHAN_CREDITS OCFS2_INODE_ADD_TO_ORPHAN_CREDITS
+
/* dinode update, old dir dinode update, new dir dinode update, old
* dir dir entry, new dir dir entry, dir entry update for renaming
* directory + target unlink + 3 x dir index leaves */
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 10d66c75cecb..9581d190f6e1 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -173,7 +173,6 @@ out:
static const struct vm_operations_struct ocfs2_file_vm_ops = {
.fault = ocfs2_fault,
.page_mkwrite = ocfs2_page_mkwrite,
- .remap_pages = generic_file_remap_pages,
};
int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index b931e04e3388..e3b951f45f3f 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -81,6 +81,12 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
char *name,
struct ocfs2_dir_lookup_result *lookup);
+static int ocfs2_dio_prepare_orphan_dir(struct ocfs2_super *osb,
+ struct inode **ret_orphan_dir,
+ u64 blkno,
+ char *name,
+ struct ocfs2_dir_lookup_result *lookup);
+
static int ocfs2_orphan_add(struct ocfs2_super *osb,
handle_t *handle,
struct inode *inode,
@@ -89,11 +95,28 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
struct ocfs2_dir_lookup_result *lookup,
struct inode *orphan_dir_inode);
+static int ocfs2_dio_orphan_add(struct ocfs2_super *osb,
+ handle_t *handle,
+ struct inode *inode,
+ struct buffer_head *fe_bh,
+ char *name,
+ struct ocfs2_dir_lookup_result *lookup,
+ struct inode *orphan_dir_inode,
+ bool orphaned);
+
static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
handle_t *handle,
struct inode *inode,
const char *symname);
+static int ocfs2_double_lock(struct ocfs2_super *osb,
+ struct buffer_head **bh1,
+ struct inode *inode1,
+ struct buffer_head **bh2,
+ struct inode *inode2,
+ int rename);
+
+static void ocfs2_double_unlock(struct inode *inode1, struct inode *inode2);
/* An orphan dir name is an 8 byte value, printed as a hex string */
#define OCFS2_ORPHAN_NAMELEN ((int)(2 * sizeof(u64)))
@@ -678,8 +701,10 @@ static int ocfs2_link(struct dentry *old_dentry,
{
handle_t *handle;
struct inode *inode = old_dentry->d_inode;
+ struct inode *old_dir = old_dentry->d_parent->d_inode;
int err;
struct buffer_head *fe_bh = NULL;
+ struct buffer_head *old_dir_bh = NULL;
struct buffer_head *parent_fe_bh = NULL;
struct ocfs2_dinode *fe = NULL;
struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
@@ -696,19 +721,33 @@ static int ocfs2_link(struct dentry *old_dentry,
dquot_initialize(dir);
- err = ocfs2_inode_lock_nested(dir, &parent_fe_bh, 1, OI_LS_PARENT);
+ err = ocfs2_double_lock(osb, &old_dir_bh, old_dir,
+ &parent_fe_bh, dir, 0);
if (err < 0) {
if (err != -ENOENT)
mlog_errno(err);
return err;
}
+ /* make sure both dirs have bhs
+ * get an extra ref on old_dir_bh if old==new */
+ if (!parent_fe_bh) {
+ if (old_dir_bh) {
+ parent_fe_bh = old_dir_bh;
+ get_bh(parent_fe_bh);
+ } else {
+ mlog(ML_ERROR, "%s: no old_dir_bh!\n", osb->uuid_str);
+ err = -EIO;
+ goto out;
+ }
+ }
+
if (!dir->i_nlink) {
err = -ENOENT;
goto out;
}
- err = ocfs2_lookup_ino_from_name(dir, old_dentry->d_name.name,
+ err = ocfs2_lookup_ino_from_name(old_dir, old_dentry->d_name.name,
old_dentry->d_name.len, &old_de_ino);
if (err) {
err = -ENOENT;
@@ -801,10 +840,11 @@ out_unlock_inode:
ocfs2_inode_unlock(inode, 1);
out:
- ocfs2_inode_unlock(dir, 1);
+ ocfs2_double_unlock(old_dir, dir);
brelse(fe_bh);
brelse(parent_fe_bh);
+ brelse(old_dir_bh);
ocfs2_free_dir_lookup_result(&lookup);
@@ -1072,14 +1112,15 @@ static int ocfs2_check_if_ancestor(struct ocfs2_super *osb,
}
/*
- * The only place this should be used is rename!
+ * The only place this should be used is rename and link!
* if they have the same id, then the 1st one is the only one locked.
*/
static int ocfs2_double_lock(struct ocfs2_super *osb,
struct buffer_head **bh1,
struct inode *inode1,
struct buffer_head **bh2,
- struct inode *inode2)
+ struct inode *inode2,
+ int rename)
{
int status;
int inode1_is_ancestor, inode2_is_ancestor;
@@ -1127,7 +1168,7 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
}
/* lock id2 */
status = ocfs2_inode_lock_nested(inode2, bh2, 1,
- OI_LS_RENAME1);
+ rename == 1 ? OI_LS_RENAME1 : OI_LS_PARENT);
if (status < 0) {
if (status != -ENOENT)
mlog_errno(status);
@@ -1136,7 +1177,8 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
}
/* lock id1 */
- status = ocfs2_inode_lock_nested(inode1, bh1, 1, OI_LS_RENAME2);
+ status = ocfs2_inode_lock_nested(inode1, bh1, 1,
+ rename == 1 ? OI_LS_RENAME2 : OI_LS_PARENT);
if (status < 0) {
/*
* An error return must mean that no cluster locks
@@ -1252,7 +1294,7 @@ static int ocfs2_rename(struct inode *old_dir,
/* if old and new are the same, this'll just do one lock. */
status = ocfs2_double_lock(osb, &old_dir_bh, old_dir,
- &new_dir_bh, new_dir);
+ &new_dir_bh, new_dir, 1);
if (status < 0) {
mlog_errno(status);
goto bail;
@@ -2137,6 +2179,51 @@ out:
return ret;
}
+/**
+ * Copy from ocfs2_prepare_orphan_dir(). The difference:
+ * It will still lock orphan dir if entry exists.
+ * Caller must take care of -EEXIST and responsible for unlock.
+*/
+static int ocfs2_dio_prepare_orphan_dir(struct ocfs2_super *osb,
+ struct inode **ret_orphan_dir,
+ u64 blkno,
+ char *name,
+ struct ocfs2_dir_lookup_result *lookup)
+{
+ struct inode *orphan_dir_inode = NULL;
+ struct buffer_head *orphan_dir_bh = NULL;
+ int ret = 0;
+
+ ret = ocfs2_lookup_lock_orphan_dir(osb, &orphan_dir_inode,
+ &orphan_dir_bh);
+ if (ret < 0) {
+ mlog_errno(ret);
+ return ret;
+ }
+
+ ret = __ocfs2_prepare_orphan_dir(orphan_dir_inode, orphan_dir_bh,
+ blkno, name, lookup);
+ if (ret < 0 && ret != -EEXIST) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ *ret_orphan_dir = orphan_dir_inode;
+
+out:
+ brelse(orphan_dir_bh);
+
+ if (ret && ret != -EEXIST) {
+ ocfs2_inode_unlock(orphan_dir_inode, 1);
+ mutex_unlock(&orphan_dir_inode->i_mutex);
+ iput(orphan_dir_inode);
+ }
+
+ if (ret && ret != -EEXIST)
+ mlog_errno(ret);
+ return ret;
+}
+
static int ocfs2_orphan_add(struct ocfs2_super *osb,
handle_t *handle,
struct inode *inode,
@@ -2226,6 +2313,100 @@ leave:
return status;
}
+/**
+ * Copy from ocfs2_orphan_add, the difference:
+ * 1. Do not add entry if already added.
+ * 2. Update di flags OCFS2_DIO_ORPHANED_FL and record the
+ * orphan slot.
+*/
+static int ocfs2_dio_orphan_add(struct ocfs2_super *osb,
+ handle_t *handle,
+ struct inode *inode,
+ struct buffer_head *fe_bh,
+ char *name,
+ struct ocfs2_dir_lookup_result *lookup,
+ struct inode *orphan_dir_inode,
+ bool orphaned)
+{
+ struct buffer_head *orphan_dir_bh = NULL;
+ int status = 0;
+ struct ocfs2_dinode *orphan_fe;
+ struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data;
+
+ trace_ocfs2_dio_orphan_add_begin(
+ (unsigned long long)OCFS2_I(inode)->ip_blkno);
+
+ status = ocfs2_read_inode_block(orphan_dir_inode, &orphan_dir_bh);
+ if (status < 0) {
+ mlog_errno(status);
+ goto leave;
+ }
+
+ status = ocfs2_journal_access_di(handle,
+ INODE_CACHE(orphan_dir_inode),
+ orphan_dir_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ mlog_errno(status);
+ goto leave;
+ }
+
+ /*
+ * We're going to journal the change of i_flags and i_dio_orphaned_slot.
+ * It's safe anyway, though some callers may duplicate the journaling.
+ * Journaling within the func just make the logic look more
+ * straightforward.
+ */
+ status = ocfs2_journal_access_di(handle,
+ INODE_CACHE(inode),
+ fe_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ mlog_errno(status);
+ goto leave;
+ }
+
+ /* we're a cluster, and nlink can change on disk from
+ * underneath us... */
+ orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data;
+ if (S_ISDIR(inode->i_mode))
+ ocfs2_add_links_count(orphan_fe, 1);
+ set_nlink(orphan_dir_inode, ocfs2_read_links_count(orphan_fe));
+ ocfs2_journal_dirty(handle, orphan_dir_bh);
+
+ /* It may already be orphaned by ocfs2_unlink/ocfs2_rename */
+ if (!orphaned) {
+ status = __ocfs2_add_entry(handle, orphan_dir_inode, name,
+ OCFS2_ORPHAN_NAMELEN, inode,
+ OCFS2_I(inode)->ip_blkno,
+ orphan_dir_bh, lookup);
+ if (status < 0) {
+ mlog_errno(status);
+ goto rollback;
+ }
+ }
+
+ /* Update flag OCFS2_DIO_ORPHANED_FL and record the orphan slot */
+ fe->i_flags |= cpu_to_le32(OCFS2_DIO_ORPHANED_FL);
+ fe->i_dio_orphaned_slot = cpu_to_le16(osb->slot_num);
+
+ ocfs2_journal_dirty(handle, fe_bh);
+
+ trace_ocfs2_dio_orphan_add_end((unsigned long long)OCFS2_I(inode)->ip_blkno,
+ osb->slot_num);
+
+rollback:
+ if (status < 0) {
+ if (S_ISDIR(inode->i_mode))
+ ocfs2_add_links_count(orphan_fe, -1);
+ set_nlink(orphan_dir_inode, ocfs2_read_links_count(orphan_fe));
+ }
+
+leave:
+ brelse(orphan_dir_bh);
+
+ return status;
+}
/* unlike orphan_add, we expect the orphan dir to already be locked here. */
int ocfs2_orphan_del(struct ocfs2_super *osb,
handle_t *handle,
@@ -2500,6 +2681,200 @@ leave:
return status;
}
+static int ocfs2_dio_orphan_recovered(struct inode *inode)
+{
+ int ret;
+ struct buffer_head *di_bh = NULL;
+ struct ocfs2_dinode *di = NULL;
+
+ ret = ocfs2_inode_lock(inode, &di_bh, 1);
+ if (ret < 0) {
+ mlog_errno(ret);
+ return 0;
+ }
+
+ di = (struct ocfs2_dinode *) di_bh->b_data;
+ ret = !(di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL));
+ ocfs2_inode_unlock(inode, 1);
+ brelse(di_bh);
+
+ return ret;
+}
+
+int ocfs2_add_inode_to_orphan(struct ocfs2_super *osb,
+ struct inode *inode)
+{
+ char orphan_name[OCFS2_ORPHAN_NAMELEN + 1];
+ struct inode *orphan_dir_inode = NULL;
+ struct ocfs2_dir_lookup_result orphan_insert = { NULL, };
+ struct buffer_head *di_bh = NULL;
+ int status = 0;
+ handle_t *handle = NULL;
+ struct ocfs2_dinode *di = NULL;
+ bool orphaned = false;
+
+restart:
+ status = ocfs2_inode_lock(inode, &di_bh, 1);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
+
+ di = (struct ocfs2_dinode *) di_bh->b_data;
+ /*
+ * Another append dio crashed?
+ * If so, wait for recovery first.
+ */
+ if (unlikely(di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL))) {
+ ocfs2_inode_unlock(inode, 1);
+ brelse(di_bh);
+ wait_event_interruptible(OCFS2_I(inode)->append_dio_wq,
+ ocfs2_dio_orphan_recovered(inode));
+ goto restart;
+ }
+
+ status = ocfs2_dio_prepare_orphan_dir(osb, &orphan_dir_inode,
+ OCFS2_I(inode)->ip_blkno,
+ orphan_name,
+ &orphan_insert);
+ if (status < 0 && status != -EEXIST) {
+ mlog_errno(status);
+ goto bail_unlock_inode;
+ } else if (status == -EEXIST) {
+ mlog(ML_NOTICE, "inode %llu already added to "
+ "orphan dir %llu.\n",
+ OCFS2_I(inode)->ip_blkno,
+ OCFS2_I(orphan_dir_inode)->ip_blkno);
+ if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL))) {
+ mlog_errno(status);
+ goto bail_unlock_orphan;
+ }
+ orphaned = true;
+ }
+
+ handle = ocfs2_start_trans(osb,
+ OCFS2_INODE_ADD_TO_ORPHAN_CREDITS);
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ goto bail_unlock_orphan;
+ }
+
+ status = ocfs2_dio_orphan_add(osb, handle, inode, di_bh, orphan_name,
+ &orphan_insert, orphan_dir_inode, orphaned);
+ if (status)
+ mlog_errno(status);
+
+ ocfs2_commit_trans(osb, handle);
+
+bail_unlock_orphan:
+ ocfs2_inode_unlock(orphan_dir_inode, 1);
+ mutex_unlock(&orphan_dir_inode->i_mutex);
+ iput(orphan_dir_inode);
+
+ ocfs2_free_dir_lookup_result(&orphan_insert);
+
+bail_unlock_inode:
+ ocfs2_inode_unlock(inode, 1);
+ brelse(di_bh);
+
+bail:
+ return status;
+}
+
+int ocfs2_del_inode_from_orphan(struct ocfs2_super *osb,
+ struct inode *inode, int update_isize,
+ loff_t end)
+{
+ struct inode *orphan_dir_inode = NULL;
+ struct buffer_head *orphan_dir_bh = NULL;
+ struct buffer_head *di_bh = NULL;
+ struct ocfs2_dinode *di = NULL;
+ handle_t *handle = NULL;
+ int status = 0;
+
+ status = ocfs2_inode_lock(inode, &di_bh, 1);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
+ di = (struct ocfs2_dinode *) di_bh->b_data;
+
+ orphan_dir_inode = ocfs2_get_system_file_inode(osb,
+ ORPHAN_DIR_SYSTEM_INODE,
+ le16_to_cpu(di->i_dio_orphaned_slot));
+ if (!orphan_dir_inode) {
+ status = -EEXIST;
+ mlog_errno(status);
+ goto bail_unlock_inode;
+ }
+
+ mutex_lock(&orphan_dir_inode->i_mutex);
+ status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
+ if (status < 0) {
+ mutex_unlock(&orphan_dir_inode->i_mutex);
+ iput(orphan_dir_inode);
+ mlog_errno(status);
+ goto bail_unlock_inode;
+ }
+
+ handle = ocfs2_start_trans(osb,
+ OCFS2_INODE_DEL_FROM_ORPHAN_CREDITS);
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ goto bail_unlock_orphan;
+ }
+
+ BUG_ON(!(di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL)));
+
+ /* Only delete entry if OCFS2_ORPHANED_FL not set, or
+ * there are two entries added */
+ if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL)) ||
+ (di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL) &&
+ (di->i_orphaned_slot != di->i_dio_orphaned_slot))) {
+ status = ocfs2_orphan_del(osb, handle, orphan_dir_inode,
+ inode, orphan_dir_bh);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail_commit;
+ }
+ }
+
+ status = ocfs2_journal_access_di(handle,
+ INODE_CACHE(inode),
+ di_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail_commit;
+ }
+
+ di->i_flags &= ~cpu_to_le32(OCFS2_DIO_ORPHANED_FL);
+ di->i_dio_orphaned_slot = 0;
+
+ if (update_isize) {
+ status = ocfs2_set_inode_size(handle, inode, di_bh, end);
+ if (status)
+ mlog_errno(status);
+ } else
+ ocfs2_journal_dirty(handle, di_bh);
+
+bail_commit:
+ ocfs2_commit_trans(osb, handle);
+
+bail_unlock_orphan:
+ ocfs2_inode_unlock(orphan_dir_inode, 1);
+ mutex_unlock(&orphan_dir_inode->i_mutex);
+ brelse(orphan_dir_bh);
+ iput(orphan_dir_inode);
+
+bail_unlock_inode:
+ ocfs2_inode_unlock(inode, 1);
+ brelse(di_bh);
+
+bail:
+ return status;
+}
+
int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
struct inode *inode,
struct dentry *dentry)
diff --git a/fs/ocfs2/namei.h b/fs/ocfs2/namei.h
index e5d059d4f115..562554026a60 100644
--- a/fs/ocfs2/namei.h
+++ b/fs/ocfs2/namei.h
@@ -38,6 +38,11 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
int ocfs2_create_inode_in_orphan(struct inode *dir,
int mode,
struct inode **new_inode);
+int ocfs2_add_inode_to_orphan(struct ocfs2_super *osb,
+ struct inode *inode);
+int ocfs2_del_inode_from_orphan(struct ocfs2_super *osb,
+ struct inode *inode, int update_isize,
+ loff_t end);
int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
struct inode *new_inode,
struct dentry *new_dentry);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 7d6b7d090452..7e39cd654834 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -209,6 +209,11 @@ struct ocfs2_lock_res {
#endif
};
+enum ocfs2_orphan_reco_type {
+ ORPHAN_NO_NEED_TRUNCATE = 0,
+ ORPHAN_NEED_TRUNCATE,
+};
+
enum ocfs2_orphan_scan_state {
ORPHAN_SCAN_ACTIVE,
ORPHAN_SCAN_INACTIVE
@@ -279,6 +284,8 @@ enum ocfs2_mount_options
writes */
OCFS2_MOUNT_HB_NONE = 1 << 13, /* No heartbeat */
OCFS2_MOUNT_HB_GLOBAL = 1 << 14, /* Global heartbeat */
+
+ OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT = 1 << 15, /* Journal Async Commit */
};
#define OCFS2_OSB_SOFT_RO 0x0001
@@ -724,6 +731,16 @@ static inline unsigned int ocfs2_clusters_for_bytes(struct super_block *sb,
return clusters;
}
+static inline unsigned int ocfs2_bytes_to_clusters(struct super_block *sb,
+ u64 bytes)
+{
+ int cl_bits = OCFS2_SB(sb)->s_clustersize_bits;
+ unsigned int clusters;
+
+ clusters = (unsigned int)(bytes >> cl_bits);
+ return clusters;
+}
+
static inline u64 ocfs2_blocks_for_bytes(struct super_block *sb,
u64 bytes)
{
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 938387a10d5d..e933ea2e976e 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -229,6 +229,7 @@
#define OCFS2_CHAIN_FL (0x00000400) /* Chain allocator */
#define OCFS2_DEALLOC_FL (0x00000800) /* Truncate log */
#define OCFS2_QUOTA_FL (0x00001000) /* Quota file */
+#define OCFS2_DIO_ORPHANED_FL (0X00002000) /* On the orphan list especially for dio */
/*
* Flags on ocfs2_dinode.i_dyn_features
@@ -729,7 +730,9 @@ struct ocfs2_dinode {
inode belongs to. Only valid
if allocated from a
discontiguous block group */
-/*A0*/ __le64 i_reserved2[3];
+/*A0*/ __le16 i_dio_orphaned_slot; /* only used for append dio write */
+ __le16 i_reserved1[3];
+ __le64 i_reserved2[2];
/*B8*/ union {
__le64 i_pad1; /* Generic way to refer to this
64bit union */
diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h
index 6cb019b7c6a8..2d471cc879cb 100644
--- a/fs/ocfs2/ocfs2_trace.h
+++ b/fs/ocfs2/ocfs2_trace.h
@@ -2373,6 +2373,9 @@ DEFINE_OCFS2_ULL_EVENT(ocfs2_orphan_add_begin);
DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_orphan_add_end);
+DEFINE_OCFS2_ULL_EVENT(ocfs2_dio_orphan_add_begin);
+DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_dio_orphan_add_end);
+
TRACE_EVENT(ocfs2_orphan_del,
TP_PROTO(unsigned long long dir, const char *name, int namelen),
TP_ARGS(dir, name, namelen),
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 83723179e1ec..cdf5e28198b5 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -191,6 +191,7 @@ enum {
Opt_coherency_full,
Opt_resv_level,
Opt_dir_resv_level,
+ Opt_journal_async_commit,
Opt_err,
};
@@ -222,6 +223,7 @@ static const match_table_t tokens = {
{Opt_coherency_full, "coherency=full"},
{Opt_resv_level, "resv_level=%u"},
{Opt_dir_resv_level, "dir_resv_level=%u"},
+ {Opt_journal_async_commit, "journal_async_commit"},
{Opt_err, NULL}
};
@@ -1500,6 +1502,9 @@ static int ocfs2_parse_options(struct super_block *sb,
option < OCFS2_MAX_RESV_LEVEL)
mopt->dir_resv_level = option;
break;
+ case Opt_journal_async_commit:
+ mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT;
+ break;
default:
mlog(ML_ERROR,
"Unrecognized mount option \"%s\" "
@@ -1606,6 +1611,9 @@ static int ocfs2_show_options(struct seq_file *s, struct dentry *root)
if (osb->osb_dir_resv_level != osb->osb_resv_level)
seq_printf(s, ",dir_resv_level=%d", osb->osb_resv_level);
+ if (opts & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT)
+ seq_printf(s, ",journal_async_commit");
+
return 0;
}
@@ -1768,6 +1776,8 @@ static void ocfs2_inode_init_once(void *data)
ocfs2_lock_res_init_once(&oi->ip_inode_lockres);
ocfs2_lock_res_init_once(&oi->ip_open_lockres);
+ init_waitqueue_head(&oi->append_dio_wq);
+
ocfs2_metadata_cache_init(INODE_CACHE(&oi->vfs_inode),
&ocfs2_inode_caching_ops);
@@ -2475,6 +2485,15 @@ static int ocfs2_check_volume(struct ocfs2_super *osb)
goto finally;
}
+ if (osb->s_mount_opt & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT)
+ jbd2_journal_set_features(osb->journal->j_journal,
+ JBD2_FEATURE_COMPAT_CHECKSUM, 0,
+ JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
+ else
+ jbd2_journal_clear_features(osb->journal->j_journal,
+ JBD2_FEATURE_COMPAT_CHECKSUM, 0,
+ JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
+
if (dirty) {
/* recover my local alloc if we didn't unmount cleanly. */
status = ocfs2_begin_local_alloc_recovery(osb,
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 7fea13229f33..de14e46fd807 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -122,7 +122,7 @@ static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat)
{
struct inode *inode = dentry->d_inode;
- struct proc_dir_entry *de = PROC_I(inode)->pde;
+ struct proc_dir_entry *de = PDE(inode);
if (de && de->nlink)
set_nlink(inode, de->nlink);
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 8420a2f80811..13a50a32652d 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -40,7 +40,7 @@ static void proc_evict_inode(struct inode *inode)
put_pid(PROC_I(inode)->pid);
/* Let go of any associated proc directory entry */
- de = PROC_I(inode)->pde;
+ de = PDE(inode);
if (de)
pde_put(de);
head = PROC_I(inode)->sysctl;
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index d3ebf2e61853..2b598a6e1ccc 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -45,7 +45,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
committed = percpu_counter_read_positive(&vm_committed_as);
cached = global_page_state(NR_FILE_PAGES) -
- total_swapcache_pages() - i.bufferram;
+ total_swapcache_pages() - i.bufferram - i.sharedram;
if (cached < 0)
cached = 0;
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 1e3187da1fed..7eee2d8b97d9 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -5,6 +5,7 @@
#include <linux/ksm.h>
#include <linux/mm.h>
#include <linux/mmzone.h>
+#include <linux/huge_mm.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/hugetlb.h>
@@ -121,9 +122,18 @@ u64 stable_page_flags(struct page *page)
* just checks PG_head/PG_tail, so we need to check PageLRU/PageAnon
* to make sure a given page is a thp, not a non-huge compound page.
*/
- else if (PageTransCompound(page) && (PageLRU(compound_head(page)) ||
- PageAnon(compound_head(page))))
- u |= 1 << KPF_THP;
+ else if (PageTransCompound(page)) {
+ struct page *head = compound_head(page);
+
+ if (PageLRU(head) || PageAnon(head))
+ u |= 1 << KPF_THP;
+ else if (is_huge_zero_page(head)) {
+ u |= 1 << KPF_ZERO_PAGE;
+ u |= 1 << KPF_THP;
+ }
+ } else if (is_zero_pfn(page_to_pfn(page)))
+ u |= 1 << KPF_ZERO_PAGE;
+
/*
* Caveats on high order pages: page->_count will only be set
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 246eae84b13b..8faae6fed085 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -443,7 +443,6 @@ struct mem_size_stats {
unsigned long anonymous;
unsigned long anonymous_thp;
unsigned long swap;
- unsigned long nonlinear;
u64 pss;
};
@@ -484,7 +483,6 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
{
struct mem_size_stats *mss = walk->private;
struct vm_area_struct *vma = mss->vma;
- pgoff_t pgoff = linear_page_index(vma, addr);
struct page *page = NULL;
if (pte_present(*pte)) {
@@ -496,17 +494,10 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
mss->swap += PAGE_SIZE;
else if (is_migration_entry(swpent))
page = migration_entry_to_page(swpent);
- } else if (pte_file(*pte)) {
- if (pte_to_pgoff(*pte) != pgoff)
- mss->nonlinear += PAGE_SIZE;
}
if (!page)
return;
-
- if (page->index != pgoff)
- mss->nonlinear += PAGE_SIZE;
-
smaps_account(mss, page, PAGE_SIZE, pte_young(*pte), pte_dirty(*pte));
}
@@ -596,7 +587,6 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
[ilog2(VM_ACCOUNT)] = "ac",
[ilog2(VM_NORESERVE)] = "nr",
[ilog2(VM_HUGETLB)] = "ht",
- [ilog2(VM_NONLINEAR)] = "nl",
[ilog2(VM_ARCH_1)] = "ar",
[ilog2(VM_DONTDUMP)] = "dd",
#ifdef CONFIG_MEM_SOFT_DIRTY
@@ -668,10 +658,6 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
(vma->vm_flags & VM_LOCKED) ?
(unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0);
- if (vma->vm_flags & VM_NONLINEAR)
- seq_printf(m, "Nonlinear: %8lu kB\n",
- mss.nonlinear >> 10);
-
show_smap_vma_flags(m, vma);
m_cache_vma(m, vma);
return 0;
@@ -747,6 +733,7 @@ enum clear_refs_types {
CLEAR_REFS_ANON,
CLEAR_REFS_MAPPED,
CLEAR_REFS_SOFT_DIRTY,
+ CLEAR_REFS_MM_HIWATER_RSS,
CLEAR_REFS_LAST,
};
@@ -772,8 +759,6 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY);
} else if (is_swap_pte(ptent)) {
ptent = pte_swp_clear_soft_dirty(ptent);
- } else if (pte_file(ptent)) {
- ptent = pte_file_clear_soft_dirty(ptent);
}
set_pte_at(vma->vm_mm, addr, pte, ptent);
@@ -861,6 +846,18 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
.mm = mm,
.private = &cp,
};
+
+ if (type == CLEAR_REFS_MM_HIWATER_RSS) {
+ /*
+ * Writing 5 to /proc/pid/clear_refs resets the peak
+ * resident set size to this mm's current rss value.
+ */
+ down_write(&mm->mmap_sem);
+ reset_mm_hiwater_rss(mm);
+ up_write(&mm->mmap_sem);
+ goto out_mm;
+ }
+
down_read(&mm->mmap_sem);
if (type == CLEAR_REFS_SOFT_DIRTY) {
for (vma = mm->mmap; vma; vma = vma->vm_next) {
@@ -903,6 +900,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
mmu_notifier_invalidate_range_end(mm, 0, -1);
flush_tlb_mm(mm);
up_read(&mm->mmap_sem);
+out_mm:
mmput(mm);
}
put_task_struct(task);
@@ -1533,6 +1531,8 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
if (!md->pages)
goto out;
+ seq_printf(m, " kernelpagesize_kB=%lu", vma_kernel_pagesize(vma) >> 10);
+
if (md->anon)
seq_printf(m, " anon=%lu", md->anon);
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index a90d6d354199..4e61388ec03d 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -546,8 +546,8 @@ static int __init update_note_header_size_elf64(const Elf64_Ehdr *ehdr_ptr)
nhdr_ptr = notes_section;
while (nhdr_ptr->n_namesz != 0) {
sz = sizeof(Elf64_Nhdr) +
- ((nhdr_ptr->n_namesz + 3) & ~3) +
- ((nhdr_ptr->n_descsz + 3) & ~3);
+ (((u64)nhdr_ptr->n_namesz + 3) & ~3) +
+ (((u64)nhdr_ptr->n_descsz + 3) & ~3);
if ((real_sz + sz) > max_sz) {
pr_warn("Warning: Exceeded p_memsz, dropping PT_NOTE entry n_namesz=0x%x, n_descsz=0x%x\n",
nhdr_ptr->n_namesz, nhdr_ptr->n_descsz);
@@ -732,8 +732,8 @@ static int __init update_note_header_size_elf32(const Elf32_Ehdr *ehdr_ptr)
nhdr_ptr = notes_section;
while (nhdr_ptr->n_namesz != 0) {
sz = sizeof(Elf32_Nhdr) +
- ((nhdr_ptr->n_namesz + 3) & ~3) +
- ((nhdr_ptr->n_descsz + 3) & ~3);
+ (((u64)nhdr_ptr->n_namesz + 3) & ~3) +
+ (((u64)nhdr_ptr->n_descsz + 3) & ~3);
if ((real_sz + sz) > max_sz) {
pr_warn("Warning: Exceeded p_memsz, dropping PT_NOTE entry n_namesz=0x%x, n_descsz=0x%x\n",
nhdr_ptr->n_namesz, nhdr_ptr->n_descsz);
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index a7eec9888f10..e72401e1f995 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2766,7 +2766,7 @@ static int reiserfs_write_begin(struct file *file,
int old_ref = 0;
inode = mapping->host;
- *fsdata = 0;
+ *fsdata = NULL;
if (flags & AOP_FLAG_CONT_EXPAND &&
(pos & (inode->i_sb->s_blocksize - 1)) == 0) {
pos ++;
diff --git a/fs/select.c b/fs/select.c
index 467bb1cb3ea5..f684c750e08a 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -971,7 +971,7 @@ SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,
if (ret == -EINTR) {
struct restart_block *restart_block;
- restart_block = &current_thread_info()->restart_block;
+ restart_block = &current->restart_block;
restart_block->fn = do_restart_poll;
restart_block->poll.ufds = ufds;
restart_block->poll.nfds = nfds;
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 538519ee37d9..035e51011444 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1536,7 +1536,6 @@ static const struct vm_operations_struct ubifs_file_vm_ops = {
.fault = filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = ubifs_vm_page_mkwrite,
- .remap_pages = generic_file_remap_pages,
};
static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma)
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index da73801301d5..e515e99a02f9 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1415,9 +1415,11 @@ static struct kmem_cache * ufs_inode_cachep;
static struct inode *ufs_alloc_inode(struct super_block *sb)
{
struct ufs_inode_info *ei;
- ei = (struct ufs_inode_info *)kmem_cache_alloc(ufs_inode_cachep, GFP_NOFS);
+
+ ei = kmem_cache_alloc(ufs_inode_cachep, GFP_NOFS);
if (!ei)
return NULL;
+
ei->vfs_inode.i_version = 1;
return &ei->vfs_inode;
}
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 13e974e6a889..ac7f1e8f92b3 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1384,5 +1384,4 @@ static const struct vm_operations_struct xfs_file_vm_ops = {
.fault = filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = xfs_vm_page_mkwrite,
- .remap_pages = generic_file_remap_pages,
};
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 177d5973b132..129de9204d18 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -474,21 +474,6 @@ static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
{
return pte;
}
-
-static inline pte_t pte_file_clear_soft_dirty(pte_t pte)
-{
- return pte;
-}
-
-static inline pte_t pte_file_mksoft_dirty(pte_t pte)
-{
- return pte;
-}
-
-static inline int pte_file_soft_dirty(pte_t pte)
-{
- return 0;
-}
#endif
#ifndef __HAVE_PFNMAP_TRACKING
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 202e4034fe26..5e7f75a6d7d0 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -96,10 +96,10 @@ extern int __bitmap_equal(const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int nbits);
extern void __bitmap_complement(unsigned long *dst, const unsigned long *src,
unsigned int nbits);
-extern void __bitmap_shift_right(unsigned long *dst,
- const unsigned long *src, int shift, int bits);
-extern void __bitmap_shift_left(unsigned long *dst,
- const unsigned long *src, int shift, int bits);
+extern void __bitmap_shift_right(unsigned long *dst, const unsigned long *src,
+ unsigned int shift, unsigned int nbits);
+extern void __bitmap_shift_left(unsigned long *dst, const unsigned long *src,
+ unsigned int shift, unsigned int nbits);
extern int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int nbits);
extern void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
@@ -160,18 +160,22 @@ extern int bitmap_parselist(const char *buf, unsigned long *maskp,
extern int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen,
unsigned long *dst, int nbits);
extern void bitmap_remap(unsigned long *dst, const unsigned long *src,
- const unsigned long *old, const unsigned long *new, int bits);
+ const unsigned long *old, const unsigned long *new, unsigned int nbits);
extern int bitmap_bitremap(int oldbit,
const unsigned long *old, const unsigned long *new, int bits);
extern void bitmap_onto(unsigned long *dst, const unsigned long *orig,
- const unsigned long *relmap, int bits);
+ const unsigned long *relmap, unsigned int bits);
extern void bitmap_fold(unsigned long *dst, const unsigned long *orig,
- int sz, int bits);
+ unsigned int sz, unsigned int nbits);
extern int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order);
extern void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order);
extern int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order);
-extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits);
-extern int bitmap_ord_to_pos(const unsigned long *bitmap, int n, int bits);
+#ifdef __BIG_ENDIAN
+extern void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits);
+#else
+#define bitmap_copy_le bitmap_copy
+#endif
+extern unsigned int bitmap_ord_to_pos(const unsigned long *bitmap, unsigned int ord, unsigned int nbits);
extern int bitmap_print_to_pagebuf(bool list, char *buf,
const unsigned long *maskp, int nmaskbits);
@@ -185,33 +189,33 @@ extern int bitmap_print_to_pagebuf(bool list, char *buf,
#define small_const_nbits(nbits) \
(__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG)
-static inline void bitmap_zero(unsigned long *dst, int nbits)
+static inline void bitmap_zero(unsigned long *dst, unsigned int nbits)
{
if (small_const_nbits(nbits))
*dst = 0UL;
else {
- int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
+ unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
memset(dst, 0, len);
}
}
-static inline void bitmap_fill(unsigned long *dst, int nbits)
+static inline void bitmap_fill(unsigned long *dst, unsigned int nbits)
{
- size_t nlongs = BITS_TO_LONGS(nbits);
+ unsigned int nlongs = BITS_TO_LONGS(nbits);
if (!small_const_nbits(nbits)) {
- int len = (nlongs - 1) * sizeof(unsigned long);
+ unsigned int len = (nlongs - 1) * sizeof(unsigned long);
memset(dst, 0xff, len);
}
dst[nlongs - 1] = BITMAP_LAST_WORD_MASK(nbits);
}
static inline void bitmap_copy(unsigned long *dst, const unsigned long *src,
- int nbits)
+ unsigned int nbits)
{
if (small_const_nbits(nbits))
*dst = *src;
else {
- int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
+ unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
memcpy(dst, src, len);
}
}
@@ -309,22 +313,22 @@ static inline int bitmap_weight(const unsigned long *src, unsigned int nbits)
return __bitmap_weight(src, nbits);
}
-static inline void bitmap_shift_right(unsigned long *dst,
- const unsigned long *src, int n, int nbits)
+static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *src,
+ unsigned int shift, int nbits)
{
if (small_const_nbits(nbits))
- *dst = (*src & BITMAP_LAST_WORD_MASK(nbits)) >> n;
+ *dst = (*src & BITMAP_LAST_WORD_MASK(nbits)) >> shift;
else
- __bitmap_shift_right(dst, src, n, nbits);
+ __bitmap_shift_right(dst, src, shift, nbits);
}
-static inline void bitmap_shift_left(unsigned long *dst,
- const unsigned long *src, int n, int nbits)
+static inline void bitmap_shift_left(unsigned long *dst, const unsigned long *src,
+ unsigned int shift, unsigned int nbits)
{
if (small_const_nbits(nbits))
- *dst = (*src << n) & BITMAP_LAST_WORD_MASK(nbits);
+ *dst = (*src << shift) & BITMAP_LAST_WORD_MASK(nbits);
else
- __bitmap_shift_left(dst, src, n, nbits);
+ __bitmap_shift_left(dst, src, shift, nbits);
}
static inline int bitmap_parse(const char *buf, unsigned int buflen,
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index b950e9d6008b..ff9044286d88 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -905,13 +905,13 @@ static inline void __cpu_clear(int cpu, volatile cpumask_t *dstp)
}
#define cpus_setall(dst) __cpus_setall(&(dst), NR_CPUS)
-static inline void __cpus_setall(cpumask_t *dstp, int nbits)
+static inline void __cpus_setall(cpumask_t *dstp, unsigned int nbits)
{
bitmap_fill(dstp->bits, nbits);
}
#define cpus_clear(dst) __cpus_clear(&(dst), NR_CPUS)
-static inline void __cpus_clear(cpumask_t *dstp, int nbits)
+static inline void __cpus_clear(cpumask_t *dstp, unsigned int nbits)
{
bitmap_zero(dstp->bits, nbits);
}
@@ -927,21 +927,21 @@ static inline int __cpu_test_and_set(int cpu, cpumask_t *addr)
#define cpus_and(dst, src1, src2) __cpus_and(&(dst), &(src1), &(src2), NR_CPUS)
static inline int __cpus_and(cpumask_t *dstp, const cpumask_t *src1p,
- const cpumask_t *src2p, int nbits)
+ const cpumask_t *src2p, unsigned int nbits)
{
return bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits);
}
#define cpus_or(dst, src1, src2) __cpus_or(&(dst), &(src1), &(src2), NR_CPUS)
static inline void __cpus_or(cpumask_t *dstp, const cpumask_t *src1p,
- const cpumask_t *src2p, int nbits)
+ const cpumask_t *src2p, unsigned int nbits)
{
bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits);
}
#define cpus_xor(dst, src1, src2) __cpus_xor(&(dst), &(src1), &(src2), NR_CPUS)
static inline void __cpus_xor(cpumask_t *dstp, const cpumask_t *src1p,
- const cpumask_t *src2p, int nbits)
+ const cpumask_t *src2p, unsigned int nbits)
{
bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits);
}
@@ -949,40 +949,40 @@ static inline void __cpus_xor(cpumask_t *dstp, const cpumask_t *src1p,
#define cpus_andnot(dst, src1, src2) \
__cpus_andnot(&(dst), &(src1), &(src2), NR_CPUS)
static inline int __cpus_andnot(cpumask_t *dstp, const cpumask_t *src1p,
- const cpumask_t *src2p, int nbits)
+ const cpumask_t *src2p, unsigned int nbits)
{
return bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits);
}
#define cpus_equal(src1, src2) __cpus_equal(&(src1), &(src2), NR_CPUS)
static inline int __cpus_equal(const cpumask_t *src1p,
- const cpumask_t *src2p, int nbits)
+ const cpumask_t *src2p, unsigned int nbits)
{
return bitmap_equal(src1p->bits, src2p->bits, nbits);
}
#define cpus_intersects(src1, src2) __cpus_intersects(&(src1), &(src2), NR_CPUS)
static inline int __cpus_intersects(const cpumask_t *src1p,
- const cpumask_t *src2p, int nbits)
+ const cpumask_t *src2p, unsigned int nbits)
{
return bitmap_intersects(src1p->bits, src2p->bits, nbits);
}
#define cpus_subset(src1, src2) __cpus_subset(&(src1), &(src2), NR_CPUS)
static inline int __cpus_subset(const cpumask_t *src1p,
- const cpumask_t *src2p, int nbits)
+ const cpumask_t *src2p, unsigned int nbits)
{
return bitmap_subset(src1p->bits, src2p->bits, nbits);
}
#define cpus_empty(src) __cpus_empty(&(src), NR_CPUS)
-static inline int __cpus_empty(const cpumask_t *srcp, int nbits)
+static inline int __cpus_empty(const cpumask_t *srcp, unsigned int nbits)
{
return bitmap_empty(srcp->bits, nbits);
}
#define cpus_weight(cpumask) __cpus_weight(&(cpumask), NR_CPUS)
-static inline int __cpus_weight(const cpumask_t *srcp, int nbits)
+static inline int __cpus_weight(const cpumask_t *srcp, unsigned int nbits)
{
return bitmap_weight(srcp->bits, nbits);
}
diff --git a/include/linux/crc64_ecma.h b/include/linux/crc64_ecma.h
new file mode 100644
index 000000000000..bba7a4d692b3
--- /dev/null
+++ b/include/linux/crc64_ecma.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CRC64_ECMA_H_
+#define __CRC64_ECMA_H_
+
+#include <linux/types.h>
+
+
+#define CRC64_DEFAULT_INITVAL 0xFFFFFFFFFFFFFFFFULL
+
+
+/*
+ * crc64_ecma_seed - Initializes the CRC64 ECMA seed.
+ */
+u64 crc64_ecma_seed(void);
+
+/*
+ * crc64_ecma - Computes the 64 bit ECMA CRC.
+ *
+ * @pdata: pointer to the data to compute checksum for.
+ * @nbytes: number of bytes in data buffer.
+ * @seed: CRC seed.
+ */
+u64 crc64_ecma(u8 const *pdata, u32 nbytes, u64 seed);
+
+#endif /* __CRC64_ECMA_H_ */
diff --git a/include/linux/cryptohash.h b/include/linux/cryptohash.h
index 2cd9f1cf9fa3..f4754282c9c2 100644
--- a/include/linux/cryptohash.h
+++ b/include/linux/cryptohash.h
@@ -1,6 +1,8 @@
#ifndef __CRYPTOHASH_H
#define __CRYPTOHASH_H
+#include <uapi/linux/types.h>
+
#define SHA_DIGEST_WORDS 5
#define SHA_MESSAGE_BYTES (512 /*bits*/ / 8)
#define SHA_WORKSPACE_WORDS 16
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f90c0282c114..60acab209701 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -135,7 +135,7 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
#define FMODE_CAN_WRITE ((__force fmode_t)0x40000)
/* File was opened by fanotify and shouldn't generate fanotify events */
-#define FMODE_NONOTIFY ((__force fmode_t)0x1000000)
+#define FMODE_NONOTIFY ((__force fmode_t)0x4000000)
/*
* Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
@@ -401,7 +401,6 @@ struct address_space {
spinlock_t tree_lock; /* and lock protecting it */
atomic_t i_mmap_writable;/* count VM_SHARED mappings */
struct rb_root i_mmap; /* tree of private and shared mappings */
- struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
struct rw_semaphore i_mmap_rwsem; /* protect tree, count, list */
/* Protected by tree_lock together with the radix tree */
unsigned long nrpages; /* number of total pages */
@@ -493,8 +492,7 @@ static inline void i_mmap_unlock_read(struct address_space *mapping)
*/
static inline int mapping_mapped(struct address_space *mapping)
{
- return !RB_EMPTY_ROOT(&mapping->i_mmap) ||
- !list_empty(&mapping->i_mmap_nonlinear);
+ return !RB_EMPTY_ROOT(&mapping->i_mmap);
}
/*
@@ -2481,8 +2479,6 @@ extern int sb_min_blocksize(struct super_block *, int);
extern int generic_file_mmap(struct file *, struct vm_area_struct *);
extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
-extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr,
- unsigned long size, pgoff_t pgoff);
int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index ad9051bab267..44a840a53974 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -19,6 +19,9 @@ extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
unsigned long addr,
pmd_t *pmd,
unsigned int flags);
+extern int madvise_free_huge_pmd(struct mmu_gather *tlb,
+ struct vm_area_struct *vma,
+ pmd_t *pmd, unsigned long addr);
extern int zap_huge_pmd(struct mmu_gather *tlb,
struct vm_area_struct *vma,
pmd_t *pmd, unsigned long addr);
@@ -56,6 +59,7 @@ extern pmd_t *page_check_address_pmd(struct page *page,
unsigned long address,
enum page_check_address_pmd_flag flag,
spinlock_t **ptl);
+extern int pmd_freeable(pmd_t pmd);
#define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT)
#define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER)
@@ -157,6 +161,13 @@ static inline int hpage_nr_pages(struct page *page)
extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, pmd_t pmd, pmd_t *pmdp);
+extern struct page *huge_zero_page;
+
+static inline bool is_huge_zero_page(struct page *page)
+{
+ return ACCESS_ONCE(huge_zero_page) == page;
+}
+
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
#define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
#define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })
@@ -206,6 +217,11 @@ static inline int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_str
return 0;
}
+static inline bool is_huge_zero_page(struct page *page)
+{
+ return false;
+}
+
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif /* _LINUX_HUGE_MM_H */
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 431b7fc605c9..7d7856359920 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -86,7 +86,7 @@ void free_huge_page(struct page *page);
pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud);
#endif
-extern unsigned long hugepages_treat_as_movable;
+extern int hugepages_treat_as_movable;
extern int sysctl_hugetlb_shm_group;
extern struct list_head huge_boot_pages;
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 3037fc085e8e..d3d43ecf148c 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -193,6 +193,9 @@ extern struct task_group root_task_group;
.nr_cpus_allowed= NR_CPUS, \
.mm = NULL, \
.active_mm = &init_mm, \
+ .restart_block = { \
+ .fn = do_no_restart_syscall, \
+ }, \
.se = { \
.group_node = LIST_HEAD_INIT(tsk.se.group_node), \
}, \
diff --git a/include/linux/input.h b/include/linux/input.h
index 82ce323b9986..3b4c32f7312a 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -79,6 +79,7 @@ struct input_value {
* @led: reflects current state of device's LEDs
* @snd: reflects current state of sound effects
* @sw: reflects current state of device's switches
+ * @leds: leds objects for the device's LEDs
* @open: this method is called when the very first user calls
* input_open_device(). The driver must prepare the device
* to start generating events (start polling thread,
@@ -164,6 +165,8 @@ struct input_dev {
unsigned long snd[BITS_TO_LONGS(SND_CNT)];
unsigned long sw[BITS_TO_LONGS(SW_CNT)];
+ struct led_classdev *leds;
+
int (*open)(struct input_dev *dev);
void (*close)(struct input_dev *dev);
int (*flush)(struct input_dev *dev, struct file *file);
@@ -531,4 +534,29 @@ int input_ff_erase(struct input_dev *dev, int effect_id, struct file *file);
int input_ff_create_memless(struct input_dev *dev, void *data,
int (*play_effect)(struct input_dev *, void *, struct ff_effect *));
+#ifdef CONFIG_INPUT_LEDS
+
+void input_led_init(void);
+void input_led_exit(void);
+
+int input_led_connect(struct input_dev *dev);
+void input_led_disconnect(struct input_dev *dev);
+
+#else
+
+static inline void input_led_init(void) { }
+
+static inline void input_led_exit(void) { }
+
+static inline int input_led_connect(struct input_dev *dev)
+{
+ return 0;
+}
+
+static inline void input_led_disconnect(struct input_dev *dev)
+{
+}
+
+#endif
+
#endif
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 9d957b7ae095..e2244f704790 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -122,8 +122,6 @@ struct kimage {
kimage_entry_t *entry;
kimage_entry_t *last_entry;
- unsigned long destination;
-
unsigned long start;
struct page *control_code_page;
struct page *swap_page;
@@ -258,6 +256,8 @@ unsigned long paddr_vmcoreinfo_note(void);
vmcoreinfo_append_str("NUMBER(%s)=%ld\n", #name, (long)name)
#define VMCOREINFO_CONFIG(name) \
vmcoreinfo_append_str("CONFIG_%s=y\n", #name)
+#define VMCOREINFO_PHYS_BASE(value) \
+ vmcoreinfo_append_str("PHYS_BASE=%lx\n", (unsigned long)value)
extern struct kimage *kexec_image;
extern struct kimage *kexec_crash_image;
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 7c95af8d552c..76b4084b8d08 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -138,12 +138,10 @@ static inline bool mem_cgroup_disabled(void)
return false;
}
-struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page, bool *locked,
- unsigned long *flags);
-void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, bool *locked,
- unsigned long *flags);
+struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page);
void mem_cgroup_update_page_stat(struct mem_cgroup *memcg,
enum mem_cgroup_stat_index idx, int val);
+void mem_cgroup_end_page_stat(struct mem_cgroup *memcg);
static inline void mem_cgroup_inc_page_stat(struct mem_cgroup *memcg,
enum mem_cgroup_stat_index idx)
@@ -285,14 +283,12 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
{
}
-static inline struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page,
- bool *locked, unsigned long *flags)
+static inline struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page)
{
return NULL;
}
-static inline void mem_cgroup_end_page_stat(struct mem_cgroup *memcg,
- bool *locked, unsigned long *flags)
+static inline void mem_cgroup_end_page_stat(struct mem_cgroup *memcg)
{
}
@@ -403,10 +399,9 @@ void memcg_update_array_size(int num_groups);
struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep);
void __memcg_kmem_put_cache(struct kmem_cache *cachep);
-int __memcg_charge_slab(struct kmem_cache *cachep, gfp_t gfp, int order);
-void __memcg_uncharge_slab(struct kmem_cache *cachep, int order);
-
-int __memcg_cleanup_cache_params(struct kmem_cache *s);
+int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp,
+ unsigned long nr_pages);
+void memcg_uncharge_kmem(struct mem_cgroup *memcg, unsigned long nr_pages);
/**
* memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 80fc92a49649..3b829b82e226 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -138,7 +138,6 @@ extern unsigned int kobjsize(const void *objp);
#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */
#define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */
#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
-#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */
#define VM_ARCH_1 0x01000000 /* Architecture-specific flag */
#define VM_ARCH_2 0x02000000
#define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */
@@ -206,21 +205,19 @@ extern unsigned int kobjsize(const void *objp);
extern pgprot_t protection_map[16];
#define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */
-#define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */
-#define FAULT_FLAG_MKWRITE 0x04 /* Fault was mkwrite of existing pte */
-#define FAULT_FLAG_ALLOW_RETRY 0x08 /* Retry fault if blocking */
-#define FAULT_FLAG_RETRY_NOWAIT 0x10 /* Don't drop mmap_sem and wait when retrying */
-#define FAULT_FLAG_KILLABLE 0x20 /* The fault task is in SIGKILL killable region */
-#define FAULT_FLAG_TRIED 0x40 /* second try */
-#define FAULT_FLAG_USER 0x80 /* The fault originated in userspace */
+#define FAULT_FLAG_MKWRITE 0x02 /* Fault was mkwrite of existing pte */
+#define FAULT_FLAG_ALLOW_RETRY 0x04 /* Retry fault if blocking */
+#define FAULT_FLAG_RETRY_NOWAIT 0x08 /* Don't drop mmap_sem and wait when retrying */
+#define FAULT_FLAG_KILLABLE 0x10 /* The fault task is in SIGKILL killable region */
+#define FAULT_FLAG_TRIED 0x20 /* Second try */
+#define FAULT_FLAG_USER 0x40 /* The fault originated in userspace */
/*
* vm_fault is filled by the the pagefault handler and passed to the vma's
* ->fault function. The vma's ->fault is responsible for returning a bitmask
* of VM_FAULT_xxx flags that give details about how the fault was handled.
*
- * pgoff should be used in favour of virtual_address, if possible. If pgoff
- * is used, one may implement ->remap_pages to get nonlinear mapping support.
+ * pgoff should be used in favour of virtual_address, if possible.
*/
struct vm_fault {
unsigned int flags; /* FAULT_FLAG_xxx flags */
@@ -287,9 +284,6 @@ struct vm_operations_struct {
struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
unsigned long addr);
#endif
- /* called by sys_remap_file_pages() to populate non-linear mapping */
- int (*remap_pages)(struct vm_area_struct *vma, unsigned long addr,
- unsigned long size, pgoff_t pgoff);
};
struct mmu_gather;
@@ -465,7 +459,8 @@ static inline void page_mapcount_reset(struct page *page)
static inline int page_mapcount(struct page *page)
{
- return atomic_read(&(page)->_mapcount) + 1;
+ VM_BUG_ON_PAGE(PageSlab(page), page);
+ return atomic_read(&page->_mapcount) + 1;
}
static inline int page_count(struct page *page)
@@ -601,29 +596,28 @@ int split_free_page(struct page *page);
* prototype for that function and accessor functions.
* These are _only_ valid on the head of a PG_compound page.
*/
-typedef void compound_page_dtor(struct page *);
static inline void set_compound_page_dtor(struct page *page,
compound_page_dtor *dtor)
{
- page[1].lru.next = (void *)dtor;
+ page[1].compound_dtor = dtor;
}
static inline compound_page_dtor *get_compound_page_dtor(struct page *page)
{
- return (compound_page_dtor *)page[1].lru.next;
+ return page[1].compound_dtor;
}
static inline int compound_order(struct page *page)
{
if (!PageHead(page))
return 0;
- return (unsigned long)page[1].lru.prev;
+ return page[1].compound_order;
}
static inline void set_compound_order(struct page *page, unsigned long order)
{
- page[1].lru.prev = (void *)order;
+ page[1].compound_order = order;
}
#ifdef CONFIG_MMU
@@ -1119,7 +1113,6 @@ extern void user_shm_unlock(size_t, struct user_struct *);
* Parameter block passed down to zap_pte_range in exceptional cases.
*/
struct zap_details {
- struct vm_area_struct *nonlinear_vma; /* Check page->index if set */
struct address_space *check_mapping; /* Check page->mapping if set */
pgoff_t first_index; /* Lowest page->index to unmap */
pgoff_t last_index; /* Highest page->index to unmap */
@@ -1366,6 +1359,11 @@ static inline void update_hiwater_vm(struct mm_struct *mm)
mm->hiwater_vm = mm->total_vm;
}
+static inline void reset_mm_hiwater_rss(struct mm_struct *mm)
+{
+ mm->hiwater_rss = get_mm_rss(mm);
+}
+
static inline void setmax_mm_hiwater_rss(unsigned long *maxrss,
struct mm_struct *mm)
{
@@ -1775,12 +1773,6 @@ struct vm_area_struct *vma_interval_tree_iter_next(struct vm_area_struct *node,
for (vma = vma_interval_tree_iter_first(root, start, last); \
vma; vma = vma_interval_tree_iter_next(vma, start, last))
-static inline void vma_nonlinear_insert(struct vm_area_struct *vma,
- struct list_head *list)
-{
- list_add_tail(&vma->shared.nonlinear, list);
-}
-
void anon_vma_interval_tree_insert(struct anon_vma_chain *node,
struct rb_root *root);
void anon_vma_interval_tree_remove(struct anon_vma_chain *node,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6d34aa266a8c..20ff2105b564 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -28,6 +28,8 @@ struct mem_cgroup;
IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK))
#define ALLOC_SPLIT_PTLOCKS (SPINLOCK_SIZE > BITS_PER_LONG/8)
+typedef void compound_page_dtor(struct page *);
+
/*
* Each physical page in the system has a struct page associated with
* it to keep track of whatever it is we are using the page for at the
@@ -142,6 +144,12 @@ struct page {
struct rcu_head rcu_head; /* Used by SLAB
* when destroying via RCU
*/
+ /* First tail page of compound page */
+ struct {
+ compound_page_dtor *compound_dtor;
+ unsigned long compound_order;
+ };
+
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS
pgtable_t pmd_huge_pte; /* protected by page->ptl */
#endif
@@ -273,15 +281,11 @@ struct vm_area_struct {
/*
* For areas with an address space and backing store,
- * linkage into the address_space->i_mmap interval tree, or
- * linkage of vma in the address_space->i_mmap_nonlinear list.
+ * linkage into the address_space->i_mmap interval tree.
*/
- union {
- struct {
- struct rb_node rb;
- unsigned long rb_subtree_last;
- } linear;
- struct list_head nonlinear;
+ struct {
+ struct rb_node rb;
+ unsigned long rb_subtree_last;
} shared;
/*
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 2f0856d14b21..b41829701334 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -426,7 +426,7 @@ struct zone {
const char *name;
/*
- * Number of MIGRATE_RESEVE page block. To maintain for just
+ * Number of MIGRATE_RESERVE page block. To maintain for just
* optimization. Protected by zone->lock.
*/
int nr_migrate_reserve_block;
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 83a6aeda899d..21cef483dc1b 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -120,13 +120,13 @@ static inline void __node_clear(int node, volatile nodemask_t *dstp)
}
#define nodes_setall(dst) __nodes_setall(&(dst), MAX_NUMNODES)
-static inline void __nodes_setall(nodemask_t *dstp, int nbits)
+static inline void __nodes_setall(nodemask_t *dstp, unsigned int nbits)
{
bitmap_fill(dstp->bits, nbits);
}
#define nodes_clear(dst) __nodes_clear(&(dst), MAX_NUMNODES)
-static inline void __nodes_clear(nodemask_t *dstp, int nbits)
+static inline void __nodes_clear(nodemask_t *dstp, unsigned int nbits)
{
bitmap_zero(dstp->bits, nbits);
}
@@ -144,7 +144,7 @@ static inline int __node_test_and_set(int node, nodemask_t *addr)
#define nodes_and(dst, src1, src2) \
__nodes_and(&(dst), &(src1), &(src2), MAX_NUMNODES)
static inline void __nodes_and(nodemask_t *dstp, const nodemask_t *src1p,
- const nodemask_t *src2p, int nbits)
+ const nodemask_t *src2p, unsigned int nbits)
{
bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits);
}
@@ -152,7 +152,7 @@ static inline void __nodes_and(nodemask_t *dstp, const nodemask_t *src1p,
#define nodes_or(dst, src1, src2) \
__nodes_or(&(dst), &(src1), &(src2), MAX_NUMNODES)
static inline void __nodes_or(nodemask_t *dstp, const nodemask_t *src1p,
- const nodemask_t *src2p, int nbits)
+ const nodemask_t *src2p, unsigned int nbits)
{
bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits);
}
@@ -160,7 +160,7 @@ static inline void __nodes_or(nodemask_t *dstp, const nodemask_t *src1p,
#define nodes_xor(dst, src1, src2) \
__nodes_xor(&(dst), &(src1), &(src2), MAX_NUMNODES)
static inline void __nodes_xor(nodemask_t *dstp, const nodemask_t *src1p,
- const nodemask_t *src2p, int nbits)
+ const nodemask_t *src2p, unsigned int nbits)
{
bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits);
}
@@ -168,7 +168,7 @@ static inline void __nodes_xor(nodemask_t *dstp, const nodemask_t *src1p,
#define nodes_andnot(dst, src1, src2) \
__nodes_andnot(&(dst), &(src1), &(src2), MAX_NUMNODES)
static inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *src1p,
- const nodemask_t *src2p, int nbits)
+ const nodemask_t *src2p, unsigned int nbits)
{
bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits);
}
@@ -176,7 +176,7 @@ static inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *src1p,
#define nodes_complement(dst, src) \
__nodes_complement(&(dst), &(src), MAX_NUMNODES)
static inline void __nodes_complement(nodemask_t *dstp,
- const nodemask_t *srcp, int nbits)
+ const nodemask_t *srcp, unsigned int nbits)
{
bitmap_complement(dstp->bits, srcp->bits, nbits);
}
@@ -184,7 +184,7 @@ static inline void __nodes_complement(nodemask_t *dstp,
#define nodes_equal(src1, src2) \
__nodes_equal(&(src1), &(src2), MAX_NUMNODES)
static inline int __nodes_equal(const nodemask_t *src1p,
- const nodemask_t *src2p, int nbits)
+ const nodemask_t *src2p, unsigned int nbits)
{
return bitmap_equal(src1p->bits, src2p->bits, nbits);
}
@@ -192,7 +192,7 @@ static inline int __nodes_equal(const nodemask_t *src1p,
#define nodes_intersects(src1, src2) \
__nodes_intersects(&(src1), &(src2), MAX_NUMNODES)
static inline int __nodes_intersects(const nodemask_t *src1p,
- const nodemask_t *src2p, int nbits)
+ const nodemask_t *src2p, unsigned int nbits)
{
return bitmap_intersects(src1p->bits, src2p->bits, nbits);
}
@@ -200,25 +200,25 @@ static inline int __nodes_intersects(const nodemask_t *src1p,
#define nodes_subset(src1, src2) \
__nodes_subset(&(src1), &(src2), MAX_NUMNODES)
static inline int __nodes_subset(const nodemask_t *src1p,
- const nodemask_t *src2p, int nbits)
+ const nodemask_t *src2p, unsigned int nbits)
{
return bitmap_subset(src1p->bits, src2p->bits, nbits);
}
#define nodes_empty(src) __nodes_empty(&(src), MAX_NUMNODES)
-static inline int __nodes_empty(const nodemask_t *srcp, int nbits)
+static inline int __nodes_empty(const nodemask_t *srcp, unsigned int nbits)
{
return bitmap_empty(srcp->bits, nbits);
}
#define nodes_full(nodemask) __nodes_full(&(nodemask), MAX_NUMNODES)
-static inline int __nodes_full(const nodemask_t *srcp, int nbits)
+static inline int __nodes_full(const nodemask_t *srcp, unsigned int nbits)
{
return bitmap_full(srcp->bits, nbits);
}
#define nodes_weight(nodemask) __nodes_weight(&(nodemask), MAX_NUMNODES)
-static inline int __nodes_weight(const nodemask_t *srcp, int nbits)
+static inline int __nodes_weight(const nodemask_t *srcp, unsigned int nbits)
{
return bitmap_weight(srcp->bits, nbits);
}
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 853698c721f7..76200984d1e2 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -85,11 +85,6 @@ static inline void oom_killer_enable(void)
oom_killer_disabled = false;
}
-static inline bool oom_gfp_allowed(gfp_t gfp_mask)
-{
- return (gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY);
-}
-
extern struct task_struct *find_lock_task_mm(struct task_struct *p);
static inline bool task_will_free_mem(struct task_struct *task)
diff --git a/include/linux/printk.h b/include/linux/printk.h
index c8f170324e64..aeb9d7f43e27 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -408,9 +408,9 @@ enum {
DUMP_PREFIX_ADDRESS,
DUMP_PREFIX_OFFSET
};
-extern void hex_dump_to_buffer(const void *buf, size_t len,
- int rowsize, int groupsize,
- char *linebuf, size_t linebuflen, bool ascii);
+extern int hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
+ int groupsize, char *linebuf, size_t linebuflen,
+ bool ascii);
#ifdef CONFIG_PRINTK
extern void print_hex_dump(const char *level, const char *prefix_str,
int prefix_type, int rowsize, int groupsize,
diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index 57e75ae9910f..fb31765e935a 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -51,7 +51,7 @@ struct rb_root {
#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL)
-/* 'empty' nodes are nodes that are known not to be inserted in an rbree */
+/* 'empty' nodes are nodes that are known not to be inserted in an rbtree */
#define RB_EMPTY_NODE(node) \
((node)->__rb_parent_color == (unsigned long)(node))
#define RB_CLEAR_NODE(node) \
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index c0c2bce6b0b7..dbcd5ec3f291 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -37,6 +37,16 @@ struct anon_vma {
atomic_t refcount;
/*
+ * Count of child anon_vmas and VMAs which points to this anon_vma.
+ *
+ * This counter is used for making decision about reusing anon_vma
+ * instead of forking new one. See comments in function anon_vma_clone.
+ */
+ unsigned degree;
+
+ struct anon_vma *parent; /* Parent of this anon_vma */
+
+ /*
* NOTE: the LSB of the rb_root.rb_node is set by
* mm_take_all_locks() _after_ taking the above lock. So the
* rb_root must only be read/written after taking the above lock
@@ -75,6 +85,7 @@ enum ttu_flags {
TTU_UNMAP = 1, /* unmap mode */
TTU_MIGRATION = 2, /* migration mode */
TTU_MUNLOCK = 4, /* munlock mode */
+ TTU_FREE = 8, /* free mode */
TTU_IGNORE_MLOCK = (1 << 8), /* ignore mlock */
TTU_IGNORE_ACCESS = (1 << 9), /* don't age */
@@ -181,7 +192,8 @@ static inline void page_dup_rmap(struct page *page)
* Called from mm/vmscan.c to handle paging out
*/
int page_referenced(struct page *, int is_locked,
- struct mem_cgroup *memcg, unsigned long *vm_flags);
+ struct mem_cgroup *memcg, unsigned long *vm_flags,
+ int *is_pte_dirty);
#define TTU_ACTION(x) ((x) & TTU_ACTION_MASK)
@@ -236,7 +248,6 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
* arg: passed to rmap_one() and invalid_vma()
* rmap_one: executed on each vma where page is mapped
* done: for checking traversing termination condition
- * file_nonlinear: for handling file nonlinear mapping
* anon_lock: for getting anon_lock by optimized way rather than default
* invalid_vma: for skipping uninterested vma
*/
@@ -245,7 +256,6 @@ struct rmap_walk_control {
int (*rmap_one)(struct page *page, struct vm_area_struct *vma,
unsigned long addr, void *arg);
int (*done)(struct page *page);
- int (*file_nonlinear)(struct page *, struct address_space *, void *arg);
struct anon_vma *(*anon_lock)(struct page *page);
bool (*invalid_vma)(struct vm_area_struct *vma, void *arg);
};
@@ -260,9 +270,12 @@ int rmap_walk(struct page *page, struct rmap_walk_control *rwc);
static inline int page_referenced(struct page *page, int is_locked,
struct mem_cgroup *memcg,
- unsigned long *vm_flags)
+ unsigned long *vm_flags,
+ int *is_pte_dirty)
{
*vm_flags = 0;
+ if (is_pte_dirty)
+ *is_pte_dirty = 0;
return 0;
}
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 6d6be09a2fe5..bc805fff00dc 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -133,6 +133,10 @@ struct rtc_device
/* Some hardware can't support UIE mode */
int uie_unsupported;
+#ifdef CONFIG_PM_SLEEP
+ struct rtc_wkalrm alarm;
+ bool valid_alarm;
+#endif
#ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
struct work_struct uie_task;
struct timer_list uie_timer;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8db31ef98d2f..22ee0d5d7f8c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1370,6 +1370,8 @@ struct task_struct {
unsigned long atomic_flags; /* Flags needing atomic access. */
+ struct restart_block restart_block;
+
pid_t pid;
pid_t tgid;
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 9a139b637069..2e3b448cfa2d 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -116,9 +116,8 @@ struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
unsigned long,
void (*)(void *));
#ifdef CONFIG_MEMCG_KMEM
-struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *,
- struct kmem_cache *,
- const char *);
+void memcg_create_kmem_cache(struct mem_cgroup *, struct kmem_cache *);
+void memcg_destroy_kmem_caches(struct mem_cgroup *);
#endif
void kmem_cache_destroy(struct kmem_cache *);
int kmem_cache_shrink(struct kmem_cache *);
@@ -491,7 +490,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
* Child caches will hold extra metadata needed for its operation. Fields are:
*
* @memcg: pointer to the memcg this cache belongs to
- * @list: list_head for the list of all caches in this memcg
* @root_cache: pointer to the global, root cache, this cache was derived from
*/
struct memcg_cache_params {
@@ -503,7 +501,6 @@ struct memcg_cache_params {
};
struct {
struct mem_cgroup *memcg;
- struct list_head list;
struct kmem_cache *root_cache;
};
};
diff --git a/include/linux/string.h b/include/linux/string.h
index 2e22a2e58f3a..2bfba832a0ab 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -40,9 +40,6 @@ extern int strcmp(const char *,const char *);
#ifndef __HAVE_ARCH_STRNCMP
extern int strncmp(const char *,const char *,__kernel_size_t);
#endif
-#ifndef __HAVE_ARCH_STRNICMP
-#define strnicmp strncasecmp
-#endif
#ifndef __HAVE_ARCH_STRCASECMP
extern int strcasecmp(const char *s1, const char *s2);
#endif
@@ -117,6 +114,7 @@ void *memchr_inv(const void *s, int c, size_t n);
extern char *kstrdup(const char *s, gfp_t gfp);
extern char *kstrndup(const char *s, size_t len, gfp_t gfp);
+extern char *kstrimdup(const char *s, gfp_t gfp);
extern void *kmemdup(const void *src, size_t len, gfp_t gfp);
extern char **argv_split(gfp_t gfp, const char *str, int *argcp);
diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h
index 6eb567ac56bc..657571817260 100644
--- a/include/linux/string_helpers.h
+++ b/include/linux/string_helpers.h
@@ -10,8 +10,8 @@ enum string_size_units {
STRING_UNITS_2, /* use binary powers of 2^10 */
};
-int string_get_size(u64 size, enum string_size_units units,
- char *buf, int len);
+void string_get_size(u64 size, enum string_size_units units,
+ char *buf, int len);
#define UNESCAPE_SPACE 0x01
#define UNESCAPE_OCTAL 0x02
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 34e8b60ab973..7067eca501e2 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -437,16 +437,6 @@ extern int reuse_swap_page(struct page *);
extern int try_to_free_swap(struct page *);
struct backing_dev_info;
-#ifdef CONFIG_MEMCG
-extern void
-mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout);
-#else
-static inline void
-mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
-{
-}
-#endif
-
#else /* CONFIG_SWAP */
#define swap_address_space(entry) (NULL)
@@ -547,11 +537,6 @@ static inline swp_entry_t get_swap_page(void)
return entry;
}
-static inline void
-mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
-{
-}
-
#endif /* CONFIG_SWAP */
#endif /* __KERNEL__*/
#endif /* _LINUX_SWAP_H */
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 6adfb7bfbf44..50cbc876be56 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -54,7 +54,7 @@ static inline pgoff_t swp_offset(swp_entry_t entry)
/* check whether a pte points to a swap entry */
static inline int is_swap_pte(pte_t pte)
{
- return !pte_none(pte) && !pte_present_nonuma(pte) && !pte_file(pte);
+ return !pte_none(pte) && !pte_present_nonuma(pte);
}
#endif
@@ -66,7 +66,6 @@ static inline swp_entry_t pte_to_swp_entry(pte_t pte)
{
swp_entry_t arch_entry;
- BUG_ON(pte_file(pte));
if (pte_swp_soft_dirty(pte))
pte = pte_swp_clear_soft_dirty(pte);
arch_entry = __pte_to_swp_entry(pte);
@@ -82,7 +81,6 @@ static inline pte_t swp_entry_to_pte(swp_entry_t entry)
swp_entry_t arch_entry;
arch_entry = __swp_entry(swp_type(entry), swp_offset(entry));
- BUG_ON(pte_file(__swp_entry_to_pte(arch_entry)));
return __swp_entry_to_pte(arch_entry);
}
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 9246d32dc973..2b1cef88b827 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -25,6 +25,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
FOR_ALL_ZONES(PGALLOC),
PGFREE, PGACTIVATE, PGDEACTIVATE,
PGFAULT, PGMAJFAULT,
+ PGLAZYFREED,
FOR_ALL_ZONES(PGREFILL),
FOR_ALL_ZONES(PGSTEAL_KSWAPD),
FOR_ALL_ZONES(PGSTEAL_DIRECT),
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index a219be961c0a..00048339c23e 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -177,7 +177,6 @@ int write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc, writepage_t writepage,
void *data);
int do_writepages(struct address_space *mapping, struct writeback_control *wbc);
-void set_page_dirty_balance(struct page *page);
void writeback_set_ratelimit(void);
void tag_pages_for_writeback(struct address_space *mapping,
pgoff_t start, pgoff_t end);
diff --git a/include/linux/zpool.h b/include/linux/zpool.h
index f14bd75f08b3..56529b34dc63 100644
--- a/include/linux/zpool.h
+++ b/include/linux/zpool.h
@@ -36,7 +36,8 @@ enum zpool_mapmode {
ZPOOL_MM_DEFAULT = ZPOOL_MM_RW
};
-struct zpool *zpool_create_pool(char *type, gfp_t gfp, struct zpool_ops *ops);
+struct zpool *zpool_create_pool(char *type, char *name,
+ gfp_t gfp, struct zpool_ops *ops);
char *zpool_get_type(struct zpool *pool);
@@ -80,7 +81,7 @@ struct zpool_driver {
atomic_t refcount;
struct list_head list;
- void *(*create)(gfp_t gfp, struct zpool_ops *ops);
+ void *(*create)(char *name, gfp_t gfp, struct zpool_ops *ops);
void (*destroy)(void *pool);
int (*malloc)(void *pool, size_t size, gfp_t gfp,
diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
index 05c214760977..3283c6a55425 100644
--- a/include/linux/zsmalloc.h
+++ b/include/linux/zsmalloc.h
@@ -36,7 +36,7 @@ enum zs_mapmode {
struct zs_pool;
-struct zs_pool *zs_create_pool(gfp_t flags);
+struct zs_pool *zs_create_pool(char *name, gfp_t flags);
void zs_destroy_pool(struct zs_pool *pool);
unsigned long zs_malloc(struct zs_pool *pool, size_t size);
diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h
index 7543b3e51331..e063effe0cc1 100644
--- a/include/uapi/asm-generic/fcntl.h
+++ b/include/uapi/asm-generic/fcntl.h
@@ -5,7 +5,7 @@
/*
* FMODE_EXEC is 0x20
- * FMODE_NONOTIFY is 0x1000000
+ * FMODE_NONOTIFY is 0x4000000
* These cannot be used by userspace O_* until internal and external open
* flags are split.
* -Eric Paris
diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h
index ddc3b36f1046..7a94102b7a02 100644
--- a/include/uapi/asm-generic/mman-common.h
+++ b/include/uapi/asm-generic/mman-common.h
@@ -34,6 +34,7 @@
#define MADV_SEQUENTIAL 2 /* expect sequential page references */
#define MADV_WILLNEED 3 /* will need these pages */
#define MADV_DONTNEED 4 /* don't need these pages */
+#define MADV_FREE 5 /* free pages only if memory pressure */
/* common parameters: try to keep these consistent across architectures */
#define MADV_REMOVE 9 /* remove these pages & resources */
diff --git a/include/uapi/linux/kernel-page-flags.h b/include/uapi/linux/kernel-page-flags.h
index 2f96d233c980..a6c4962e5d46 100644
--- a/include/uapi/linux/kernel-page-flags.h
+++ b/include/uapi/linux/kernel-page-flags.h
@@ -32,6 +32,7 @@
#define KPF_KSM 21
#define KPF_THP 22
#define KPF_BALLOON 23
+#define KPF_ZERO_PAGE 24
#endif /* _UAPILINUX_KERNEL_PAGE_FLAGS_H */
diff --git a/init/Kconfig b/init/Kconfig
index d49cfc81db6f..dd7d19120db4 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1285,22 +1285,6 @@ source "usr/Kconfig"
endif
-config INIT_FALLBACK
- bool "Fall back to defaults if init= parameter is bad"
- default y
- help
- If enabled, the kernel will try the default init binaries if an
- explicit request from the init= parameter fails.
-
- This can have unexpected effects. For example, booting
- with init=/sbin/kiosk_app will run /sbin/init or even /bin/sh
- if /sbin/kiosk_app cannot be executed.
-
- The default value of Y is consistent with historical behavior.
- Selecting N is likely to be more appropriate for most uses,
- especially on kiosks and on kernels that are intended to be
- run under the control of a script.
-
config CC_OPTIMIZE_FOR_SIZE
bool "Optimize for size"
help
diff --git a/init/main.c b/init/main.c
index 61b993767db5..cf954286596b 100644
--- a/init/main.c
+++ b/init/main.c
@@ -966,13 +966,8 @@ static int __ref kernel_init(void *unused)
ret = run_init_process(execute_command);
if (!ret)
return 0;
-#ifndef CONFIG_INIT_FALLBACK
panic("Requested init %s failed (error %d).",
execute_command, ret);
-#else
- pr_err("Failed to execute %s (error %d). Attempting defaults...\n",
- execute_command, ret);
-#endif
}
if (!try_to_run_init_process("/sbin/init") ||
!try_to_run_init_process("/etc/init") ||
diff --git a/kernel/compat.c b/kernel/compat.c
index ebb3c369d03d..24f00610c575 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -276,8 +276,7 @@ COMPAT_SYSCALL_DEFINE2(nanosleep, struct compat_timespec __user *, rqtp,
* core implementation decides to return random nonsense.
*/
if (ret == -ERESTART_RESTARTBLOCK) {
- struct restart_block *restart
- = &current_thread_info()->restart_block;
+ struct restart_block *restart = &current->restart_block;
restart->fn = compat_nanosleep_restart;
restart->nanosleep.compat_rmtp = rmtp;
@@ -860,7 +859,7 @@ COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags,
return -EFAULT;
if (err == -ERESTART_RESTARTBLOCK) {
- restart = &current_thread_info()->restart_block;
+ restart = &current->restart_block;
restart->fn = compat_clock_nanosleep_restart;
restart->nanosleep.compat_rmtp = rmtp;
}
diff --git a/kernel/exit.c b/kernel/exit.c
index 1ea4369890a3..6806c55475ee 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1287,9 +1287,15 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)
static int wait_consider_task(struct wait_opts *wo, int ptrace,
struct task_struct *p)
{
+ /*
+ * We can race with wait_task_zombie() from another thread.
+ * Ensure that EXIT_ZOMBIE -> EXIT_DEAD/EXIT_TRACE transition
+ * can't confuse the checks below.
+ */
+ int exit_state = ACCESS_ONCE(p->exit_state);
int ret;
- if (unlikely(p->exit_state == EXIT_DEAD))
+ if (unlikely(exit_state == EXIT_DEAD))
return 0;
ret = eligible_child(wo, p);
@@ -1310,7 +1316,7 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace,
return 0;
}
- if (unlikely(p->exit_state == EXIT_TRACE)) {
+ if (unlikely(exit_state == EXIT_TRACE)) {
/*
* ptrace == 0 means we are the natural parent. In this case
* we should clear notask_error, debugger will notify us.
@@ -1337,7 +1343,7 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace,
}
/* slay zombie? */
- if (p->exit_state == EXIT_ZOMBIE) {
+ if (exit_state == EXIT_ZOMBIE) {
/* we don't reap group leaders with subthreads */
if (!delay_group_leader(p)) {
/*
diff --git a/kernel/fork.c b/kernel/fork.c
index 4dc2ddade9f1..b379d9abddc7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -438,12 +438,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
atomic_inc(&mapping->i_mmap_writable);
flush_dcache_mmap_lock(mapping);
/* insert tmp into the share list, just after mpnt */
- if (unlikely(tmp->vm_flags & VM_NONLINEAR))
- vma_nonlinear_insert(tmp,
- &mapping->i_mmap_nonlinear);
- else
- vma_interval_tree_insert_after(tmp, mpnt,
- &mapping->i_mmap);
+ vma_interval_tree_insert_after(tmp, mpnt,
+ &mapping->i_mmap);
flush_dcache_mmap_unlock(mapping);
i_mmap_unlock_write(mapping);
}
diff --git a/kernel/futex.c b/kernel/futex.c
index 63678b573d61..f4d8a85641ed 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2217,7 +2217,7 @@ retry:
if (!abs_time)
goto out;
- restart = &current_thread_info()->restart_block;
+ restart = &current->restart_block;
restart->fn = futex_wait_restart;
restart->futex.uaddr = uaddr;
restart->futex.val = val;
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 9a8a01abbaed..34677bc81c01 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -444,7 +444,7 @@ arch_kexec_apply_relocations(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
}
/*
- * Free up memory used by kernel, initrd, and comand line. This is temporary
+ * Free up memory used by kernel, initrd, and command line. This is temporary
* memory allocation which is not needed any more after these buffers have
* been loaded into separate segments and have been copied elsewhere.
*/
@@ -856,8 +856,6 @@ static int kimage_set_destination(struct kimage *image,
destination &= PAGE_MASK;
result = kimage_add_entry(image, destination | IND_DESTINATION);
- if (result == 0)
- image->destination = destination;
return result;
}
@@ -869,8 +867,6 @@ static int kimage_add_page(struct kimage *image, unsigned long page)
page &= PAGE_MASK;
result = kimage_add_entry(image, page | IND_SOURCE);
- if (result == 0)
- image->destination += PAGE_SIZE;
return result;
}
diff --git a/kernel/params.c b/kernel/params.c
index 0af9b2c4e56c..bd65d136a470 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -648,6 +648,8 @@ static __modinit int add_sysfs_param(struct module_kobject *mk,
/* Do not allow runtime DAC changes to make param writable. */
if ((kp->perm & (S_IWUSR | S_IWGRP | S_IWOTH)) != 0)
mk->mp->attrs[mk->mp->num].mattr.store = param_attr_store;
+ else
+ mk->mp->attrs[mk->mp->num].mattr.store = NULL;
mk->mp->attrs[mk->mp->num].mattr.attr.name = (char *)name;
mk->mp->attrs[mk->mp->num].mattr.attr.mode = kp->perm;
mk->mp->num++;
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 02d6b6d28796..c06df7de0963 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -935,8 +935,8 @@ static int __init ignore_loglevel_setup(char *str)
early_param("ignore_loglevel", ignore_loglevel_setup);
module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(ignore_loglevel, "ignore loglevel setting, to"
- "print all kernel messages to the console.");
+MODULE_PARM_DESC(ignore_loglevel,
+ "ignore loglevel setting (prints all kernel messages to the console)");
#ifdef CONFIG_BOOT_PRINTK_DELAY
@@ -1419,16 +1419,16 @@ static void call_console_drivers(int level, const char *text, size_t len)
}
/*
- * Zap console related locks when oopsing. Only zap at most once
- * every 10 seconds, to leave time for slow consoles to print a
- * full oops.
+ * Zap console related locks when oopsing.
+ * To leave time for slow consoles to print a full oops,
+ * only zap at most once every 30 seconds.
*/
static void zap_locks(void)
{
static unsigned long oops_timestamp;
if (time_after_eq(jiffies, oops_timestamp) &&
- !time_after(jiffies, oops_timestamp + 30 * HZ))
+ !time_after(jiffies, oops_timestamp + 30 * HZ))
return;
oops_timestamp = jiffies;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 1eb9d90c3af9..227fec36b12a 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -1077,7 +1077,6 @@ int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr,
}
#if defined CONFIG_COMPAT
-#include <linux/compat.h>
int compat_ptrace_request(struct task_struct *child, compat_long_t request,
compat_ulong_t addr, compat_ulong_t data)
diff --git a/kernel/signal.c b/kernel/signal.c
index 16a305295256..33a52759cc0e 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2501,7 +2501,7 @@ EXPORT_SYMBOL(unblock_all_signals);
*/
SYSCALL_DEFINE0(restart_syscall)
{
- struct restart_block *restart = &current_thread_info()->restart_block;
+ struct restart_block *restart = &current->restart_block;
return restart->fn(restart);
}
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 137c7f69b264..88ea2d6e0031 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1248,7 +1248,6 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = hugetlb_sysctl_handler,
- .extra1 = &zero,
},
#ifdef CONFIG_NUMA
{
@@ -1257,7 +1256,6 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = &hugetlb_mempolicy_sysctl_handler,
- .extra1 = &zero,
},
#endif
{
@@ -1280,7 +1278,6 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = hugetlb_overcommit_handler,
- .extra1 = &zero,
},
#endif
{
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index a7077d3ae52f..1b001ed1edb9 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -788,7 +788,7 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags,
goto out;
}
- restart = &current_thread_info()->restart_block;
+ restart = &current->restart_block;
restart->fn = alarm_timer_nsleep_restart;
restart->nanosleep.clockid = type;
restart->nanosleep.expires = exp.tv64;
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 37e50aadd471..dbcec65d08c4 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1591,7 +1591,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
goto out;
}
- restart = &current_thread_info()->restart_block;
+ restart = &current->restart_block;
restart->fn = hrtimer_nanosleep_restart;
restart->nanosleep.clockid = t.timer.base->clockid;
restart->nanosleep.rmtp = rmtp;
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index a16b67859e2a..0075da74abf0 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -1334,8 +1334,7 @@ static long posix_cpu_nsleep_restart(struct restart_block *restart_block);
static int posix_cpu_nsleep(const clockid_t which_clock, int flags,
struct timespec *rqtp, struct timespec __user *rmtp)
{
- struct restart_block *restart_block =
- &current_thread_info()->restart_block;
+ struct restart_block *restart_block = &current->restart_block;
struct itimerspec it;
int error;
diff --git a/lib/Kconfig b/lib/Kconfig
index 75da4d3e2afd..973ca9a3a1a0 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -180,6 +180,13 @@ config CRC8
when they need to do cyclic redundancy check according CRC8
algorithm. Module will be called crc8.
+config CRC64_ECMA
+ tristate "CRC64 ECMA function"
+ help
+ This option provides CRC64 ECMA function. Drivers may select this
+ when they need to do cyclic redundancy check according to the CRC64
+ ECMA algorithm.
+
config AUDIT_GENERIC
bool
depends on AUDIT && !AUDIT_ARCH
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index a4a6d9af8dca..5fe0a71367a7 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1580,6 +1580,9 @@ config ASYNC_RAID6_TEST
If unsure, say N.
+config TEST_HEXDUMP
+ tristate "Test functions located in the hexdump module at runtime"
+
config TEST_STRING_HELPERS
tristate "Test functions located in the string_helpers module at runtime"
diff --git a/lib/Makefile b/lib/Makefile
index 3c3b30b9e020..3dca86fa7dc9 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -23,12 +23,14 @@ lib-y += kobject.o klist.o
obj-y += lockref.o
obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
- bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
+ bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \
gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \
bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \
percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o
obj-y += string_helpers.o
obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
+obj-y += hexdump.o
+obj-$(CONFIG_TEST_HEXDUMP) += test-hexdump.o
obj-y += kstrtox.o
obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
obj-$(CONFIG_TEST_LKM) += test_module.o
@@ -72,6 +74,7 @@ obj-$(CONFIG_CRC32) += crc32.o
obj-$(CONFIG_CRC7) += crc7.o
obj-$(CONFIG_LIBCRC32C) += libcrc32c.o
obj-$(CONFIG_CRC8) += crc8.o
+obj-$(CONFIG_CRC64_ECMA) += crc64_ecma.o
obj-$(CONFIG_GENERIC_ALLOCATOR) += genalloc.o
obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 324ea9eab8c1..a13c7f4e325a 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -104,18 +104,18 @@ EXPORT_SYMBOL(__bitmap_complement);
* @dst : destination bitmap
* @src : source bitmap
* @shift : shift by this many bits
- * @bits : bitmap size, in bits
+ * @nbits : bitmap size, in bits
*
* Shifting right (dividing) means moving bits in the MS -> LS bit
* direction. Zeros are fed into the vacated MS positions and the
* LS bits shifted off the bottom are lost.
*/
-void __bitmap_shift_right(unsigned long *dst,
- const unsigned long *src, int shift, int bits)
+void __bitmap_shift_right(unsigned long *dst, const unsigned long *src,
+ unsigned shift, unsigned nbits)
{
- int k, lim = BITS_TO_LONGS(bits), left = bits % BITS_PER_LONG;
- int off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG;
- unsigned long mask = (1UL << left) - 1;
+ unsigned k, lim = BITS_TO_LONGS(nbits);
+ unsigned off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG;
+ unsigned long mask = BITMAP_LAST_WORD_MASK(nbits);
for (k = 0; off + k < lim; ++k) {
unsigned long upper, lower;
@@ -127,17 +127,15 @@ void __bitmap_shift_right(unsigned long *dst,
upper = 0;
else {
upper = src[off + k + 1];
- if (off + k + 1 == lim - 1 && left)
+ if (off + k + 1 == lim - 1)
upper &= mask;
+ upper <<= (BITS_PER_LONG - rem);
}
lower = src[off + k];
- if (left && off + k == lim - 1)
+ if (off + k == lim - 1)
lower &= mask;
- dst[k] = lower >> rem;
- if (rem)
- dst[k] |= upper << (BITS_PER_LONG - rem);
- if (left && k == lim - 1)
- dst[k] &= mask;
+ lower >>= rem;
+ dst[k] = lower | upper;
}
if (off)
memset(&dst[lim - off], 0, off*sizeof(unsigned long));
@@ -150,18 +148,19 @@ EXPORT_SYMBOL(__bitmap_shift_right);
* @dst : destination bitmap
* @src : source bitmap
* @shift : shift by this many bits
- * @bits : bitmap size, in bits
+ * @nbits : bitmap size, in bits
*
* Shifting left (multiplying) means moving bits in the LS -> MS
* direction. Zeros are fed into the vacated LS bit positions
* and those MS bits shifted off the top are lost.
*/
-void __bitmap_shift_left(unsigned long *dst,
- const unsigned long *src, int shift, int bits)
+void __bitmap_shift_left(unsigned long *dst, const unsigned long *src,
+ unsigned int shift, unsigned int nbits)
{
- int k, lim = BITS_TO_LONGS(bits), left = bits % BITS_PER_LONG;
- int off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG;
+ int k;
+ unsigned int lim = BITS_TO_LONGS(nbits);
+ unsigned int off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG;
for (k = lim - off - 1; k >= 0; --k) {
unsigned long upper, lower;
@@ -170,17 +169,11 @@ void __bitmap_shift_left(unsigned long *dst,
* word below and make them the bottom rem bits of result.
*/
if (rem && k > 0)
- lower = src[k - 1];
+ lower = src[k - 1] >> (BITS_PER_LONG - rem);
else
lower = 0;
- upper = src[k];
- if (left && k == lim - 1)
- upper &= (1UL << left) - 1;
- dst[k + off] = upper << rem;
- if (rem)
- dst[k + off] |= lower >> (BITS_PER_LONG - rem);
- if (left && k + off == lim - 1)
- dst[k + off] &= (1UL << left) - 1;
+ upper = src[k] << rem;
+ dst[k + off] = lower | upper;
}
if (off)
memset(dst, 0, off*sizeof(unsigned long));
@@ -744,10 +737,10 @@ EXPORT_SYMBOL(bitmap_parselist_user);
/**
* bitmap_pos_to_ord - find ordinal of set bit at given position in bitmap
* @buf: pointer to a bitmap
- * @pos: a bit position in @buf (0 <= @pos < @bits)
- * @bits: number of valid bit positions in @buf
+ * @pos: a bit position in @buf (0 <= @pos < @nbits)
+ * @nbits: number of valid bit positions in @buf
*
- * Map the bit at position @pos in @buf (of length @bits) to the
+ * Map the bit at position @pos in @buf (of length @nbits) to the
* ordinal of which set bit it is. If it is not set or if @pos
* is not a valid bit position, map to -1.
*
@@ -759,56 +752,40 @@ EXPORT_SYMBOL(bitmap_parselist_user);
*
* The bit positions 0 through @bits are valid positions in @buf.
*/
-static int bitmap_pos_to_ord(const unsigned long *buf, int pos, int bits)
+static int bitmap_pos_to_ord(const unsigned long *buf, unsigned int pos, unsigned int nbits)
{
- int i, ord;
-
- if (pos < 0 || pos >= bits || !test_bit(pos, buf))
+ if (pos >= nbits || !test_bit(pos, buf))
return -1;
- i = find_first_bit(buf, bits);
- ord = 0;
- while (i < pos) {
- i = find_next_bit(buf, bits, i + 1);
- ord++;
- }
- BUG_ON(i != pos);
-
- return ord;
+ return __bitmap_weight(buf, pos);
}
/**
* bitmap_ord_to_pos - find position of n-th set bit in bitmap
* @buf: pointer to bitmap
* @ord: ordinal bit position (n-th set bit, n >= 0)
- * @bits: number of valid bit positions in @buf
+ * @nbits: number of valid bit positions in @buf
*
* Map the ordinal offset of bit @ord in @buf to its position in @buf.
- * Value of @ord should be in range 0 <= @ord < weight(buf), else
- * results are undefined.
+ * Value of @ord should be in range 0 <= @ord < weight(buf). If @ord
+ * >= weight(buf), returns @nbits.
*
* If for example, just bits 4 through 7 are set in @buf, then @ord
* values 0 through 3 will get mapped to 4 through 7, respectively,
- * and all other @ord values return undefined values. When @ord value 3
+ * and all other @ord values returns @nbits. When @ord value 3
* gets mapped to (returns) @pos value 7 in this example, that means
* that the 3rd set bit (starting with 0th) is at position 7 in @buf.
*
- * The bit positions 0 through @bits are valid positions in @buf.
+ * The bit positions 0 through @nbits-1 are valid positions in @buf.
*/
-int bitmap_ord_to_pos(const unsigned long *buf, int ord, int bits)
+unsigned int bitmap_ord_to_pos(const unsigned long *buf, unsigned int ord, unsigned int nbits)
{
- int pos = 0;
-
- if (ord >= 0 && ord < bits) {
- int i;
+ unsigned int pos;
- for (i = find_first_bit(buf, bits);
- i < bits && ord > 0;
- i = find_next_bit(buf, bits, i + 1))
- ord--;
- if (i < bits && ord == 0)
- pos = i;
- }
+ for (pos = find_first_bit(buf, nbits);
+ pos < nbits && ord;
+ pos = find_next_bit(buf, nbits, pos + 1))
+ ord--;
return pos;
}
@@ -819,7 +796,7 @@ int bitmap_ord_to_pos(const unsigned long *buf, int ord, int bits)
* @src: subset to be remapped
* @old: defines domain of map
* @new: defines range of map
- * @bits: number of bits in each of these bitmaps
+ * @nbits: number of bits in each of these bitmaps
*
* Let @old and @new define a mapping of bit positions, such that
* whatever position is held by the n-th set bit in @old is mapped
@@ -847,22 +824,22 @@ int bitmap_ord_to_pos(const unsigned long *buf, int ord, int bits)
*/
void bitmap_remap(unsigned long *dst, const unsigned long *src,
const unsigned long *old, const unsigned long *new,
- int bits)
+ unsigned int nbits)
{
- int oldbit, w;
+ unsigned int oldbit, w;
if (dst == src) /* following doesn't handle inplace remaps */
return;
- bitmap_zero(dst, bits);
+ bitmap_zero(dst, nbits);
- w = bitmap_weight(new, bits);
- for_each_set_bit(oldbit, src, bits) {
- int n = bitmap_pos_to_ord(old, oldbit, bits);
+ w = bitmap_weight(new, nbits);
+ for_each_set_bit(oldbit, src, nbits) {
+ int n = bitmap_pos_to_ord(old, oldbit, nbits);
if (n < 0 || w == 0)
set_bit(oldbit, dst); /* identity map */
else
- set_bit(bitmap_ord_to_pos(new, n % w, bits), dst);
+ set_bit(bitmap_ord_to_pos(new, n % w, nbits), dst);
}
}
EXPORT_SYMBOL(bitmap_remap);
@@ -1006,9 +983,9 @@ EXPORT_SYMBOL(bitmap_bitremap);
* All bits in @dst not set by the above rule are cleared.
*/
void bitmap_onto(unsigned long *dst, const unsigned long *orig,
- const unsigned long *relmap, int bits)
+ const unsigned long *relmap, unsigned int bits)
{
- int n, m; /* same meaning as in above comment */
+ unsigned int n, m; /* same meaning as in above comment */
if (dst == orig) /* following doesn't handle inplace mappings */
return;
@@ -1039,22 +1016,22 @@ EXPORT_SYMBOL(bitmap_onto);
* @dst: resulting smaller bitmap
* @orig: original larger bitmap
* @sz: specified size
- * @bits: number of bits in each of these bitmaps
+ * @nbits: number of bits in each of these bitmaps
*
* For each bit oldbit in @orig, set bit oldbit mod @sz in @dst.
* Clear all other bits in @dst. See further the comment and
* Example [2] for bitmap_onto() for why and how to use this.
*/
void bitmap_fold(unsigned long *dst, const unsigned long *orig,
- int sz, int bits)
+ unsigned int sz, unsigned int nbits)
{
- int oldbit;
+ unsigned int oldbit;
if (dst == orig) /* following doesn't handle inplace mappings */
return;
- bitmap_zero(dst, bits);
+ bitmap_zero(dst, nbits);
- for_each_set_bit(oldbit, orig, bits)
+ for_each_set_bit(oldbit, orig, nbits)
set_bit(oldbit % sz, dst);
}
EXPORT_SYMBOL(bitmap_fold);
@@ -1207,16 +1184,17 @@ EXPORT_SYMBOL(bitmap_allocate_region);
*
* Require nbits % BITS_PER_LONG == 0.
*/
-void bitmap_copy_le(void *dst, const unsigned long *src, int nbits)
+#ifdef __BIG_ENDIAN
+void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits)
{
- unsigned long *d = dst;
- int i;
+ unsigned int i;
for (i = 0; i < nbits/BITS_PER_LONG; i++) {
if (BITS_PER_LONG == 64)
- d[i] = cpu_to_le64(src[i]);
+ dst[i] = cpu_to_le64(src[i]);
else
- d[i] = cpu_to_le32(src[i]);
+ dst[i] = cpu_to_le32(src[i]);
}
}
EXPORT_SYMBOL(bitmap_copy_le);
+#endif
diff --git a/lib/crc64_ecma.c b/lib/crc64_ecma.c
new file mode 100644
index 000000000000..41629ea5a60c
--- /dev/null
+++ b/lib/crc64_ecma.c
@@ -0,0 +1,341 @@
+/*
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/module.h>
+#include <linux/crc64_ecma.h>
+
+
+#define CRC64_BYTE_MASK 0xFF
+#define CRC64_TABLE_SIZE 256
+
+
+struct crc64_table {
+ u64 seed;
+ u64 table[CRC64_TABLE_SIZE];
+};
+
+
+static struct crc64_table CRC64_ECMA_182 = {
+ CRC64_DEFAULT_INITVAL,
+ {
+ 0x0000000000000000ULL,
+ 0xb32e4cbe03a75f6fULL,
+ 0xf4843657a840a05bULL,
+ 0x47aa7ae9abe7ff34ULL,
+ 0x7bd0c384ff8f5e33ULL,
+ 0xc8fe8f3afc28015cULL,
+ 0x8f54f5d357cffe68ULL,
+ 0x3c7ab96d5468a107ULL,
+ 0xf7a18709ff1ebc66ULL,
+ 0x448fcbb7fcb9e309ULL,
+ 0x0325b15e575e1c3dULL,
+ 0xb00bfde054f94352ULL,
+ 0x8c71448d0091e255ULL,
+ 0x3f5f08330336bd3aULL,
+ 0x78f572daa8d1420eULL,
+ 0xcbdb3e64ab761d61ULL,
+ 0x7d9ba13851336649ULL,
+ 0xceb5ed8652943926ULL,
+ 0x891f976ff973c612ULL,
+ 0x3a31dbd1fad4997dULL,
+ 0x064b62bcaebc387aULL,
+ 0xb5652e02ad1b6715ULL,
+ 0xf2cf54eb06fc9821ULL,
+ 0x41e11855055bc74eULL,
+ 0x8a3a2631ae2dda2fULL,
+ 0x39146a8fad8a8540ULL,
+ 0x7ebe1066066d7a74ULL,
+ 0xcd905cd805ca251bULL,
+ 0xf1eae5b551a2841cULL,
+ 0x42c4a90b5205db73ULL,
+ 0x056ed3e2f9e22447ULL,
+ 0xb6409f5cfa457b28ULL,
+ 0xfb374270a266cc92ULL,
+ 0x48190ecea1c193fdULL,
+ 0x0fb374270a266cc9ULL,
+ 0xbc9d3899098133a6ULL,
+ 0x80e781f45de992a1ULL,
+ 0x33c9cd4a5e4ecdceULL,
+ 0x7463b7a3f5a932faULL,
+ 0xc74dfb1df60e6d95ULL,
+ 0x0c96c5795d7870f4ULL,
+ 0xbfb889c75edf2f9bULL,
+ 0xf812f32ef538d0afULL,
+ 0x4b3cbf90f69f8fc0ULL,
+ 0x774606fda2f72ec7ULL,
+ 0xc4684a43a15071a8ULL,
+ 0x83c230aa0ab78e9cULL,
+ 0x30ec7c140910d1f3ULL,
+ 0x86ace348f355aadbULL,
+ 0x3582aff6f0f2f5b4ULL,
+ 0x7228d51f5b150a80ULL,
+ 0xc10699a158b255efULL,
+ 0xfd7c20cc0cdaf4e8ULL,
+ 0x4e526c720f7dab87ULL,
+ 0x09f8169ba49a54b3ULL,
+ 0xbad65a25a73d0bdcULL,
+ 0x710d64410c4b16bdULL,
+ 0xc22328ff0fec49d2ULL,
+ 0x85895216a40bb6e6ULL,
+ 0x36a71ea8a7ace989ULL,
+ 0x0adda7c5f3c4488eULL,
+ 0xb9f3eb7bf06317e1ULL,
+ 0xfe5991925b84e8d5ULL,
+ 0x4d77dd2c5823b7baULL,
+ 0x64b62bcaebc387a1ULL,
+ 0xd7986774e864d8ceULL,
+ 0x90321d9d438327faULL,
+ 0x231c512340247895ULL,
+ 0x1f66e84e144cd992ULL,
+ 0xac48a4f017eb86fdULL,
+ 0xebe2de19bc0c79c9ULL,
+ 0x58cc92a7bfab26a6ULL,
+ 0x9317acc314dd3bc7ULL,
+ 0x2039e07d177a64a8ULL,
+ 0x67939a94bc9d9b9cULL,
+ 0xd4bdd62abf3ac4f3ULL,
+ 0xe8c76f47eb5265f4ULL,
+ 0x5be923f9e8f53a9bULL,
+ 0x1c4359104312c5afULL,
+ 0xaf6d15ae40b59ac0ULL,
+ 0x192d8af2baf0e1e8ULL,
+ 0xaa03c64cb957be87ULL,
+ 0xeda9bca512b041b3ULL,
+ 0x5e87f01b11171edcULL,
+ 0x62fd4976457fbfdbULL,
+ 0xd1d305c846d8e0b4ULL,
+ 0x96797f21ed3f1f80ULL,
+ 0x2557339fee9840efULL,
+ 0xee8c0dfb45ee5d8eULL,
+ 0x5da24145464902e1ULL,
+ 0x1a083bacedaefdd5ULL,
+ 0xa9267712ee09a2baULL,
+ 0x955cce7fba6103bdULL,
+ 0x267282c1b9c65cd2ULL,
+ 0x61d8f8281221a3e6ULL,
+ 0xd2f6b4961186fc89ULL,
+ 0x9f8169ba49a54b33ULL,
+ 0x2caf25044a02145cULL,
+ 0x6b055fede1e5eb68ULL,
+ 0xd82b1353e242b407ULL,
+ 0xe451aa3eb62a1500ULL,
+ 0x577fe680b58d4a6fULL,
+ 0x10d59c691e6ab55bULL,
+ 0xa3fbd0d71dcdea34ULL,
+ 0x6820eeb3b6bbf755ULL,
+ 0xdb0ea20db51ca83aULL,
+ 0x9ca4d8e41efb570eULL,
+ 0x2f8a945a1d5c0861ULL,
+ 0x13f02d374934a966ULL,
+ 0xa0de61894a93f609ULL,
+ 0xe7741b60e174093dULL,
+ 0x545a57dee2d35652ULL,
+ 0xe21ac88218962d7aULL,
+ 0x5134843c1b317215ULL,
+ 0x169efed5b0d68d21ULL,
+ 0xa5b0b26bb371d24eULL,
+ 0x99ca0b06e7197349ULL,
+ 0x2ae447b8e4be2c26ULL,
+ 0x6d4e3d514f59d312ULL,
+ 0xde6071ef4cfe8c7dULL,
+ 0x15bb4f8be788911cULL,
+ 0xa6950335e42fce73ULL,
+ 0xe13f79dc4fc83147ULL,
+ 0x521135624c6f6e28ULL,
+ 0x6e6b8c0f1807cf2fULL,
+ 0xdd45c0b11ba09040ULL,
+ 0x9aefba58b0476f74ULL,
+ 0x29c1f6e6b3e0301bULL,
+ 0xc96c5795d7870f42ULL,
+ 0x7a421b2bd420502dULL,
+ 0x3de861c27fc7af19ULL,
+ 0x8ec62d7c7c60f076ULL,
+ 0xb2bc941128085171ULL,
+ 0x0192d8af2baf0e1eULL,
+ 0x4638a2468048f12aULL,
+ 0xf516eef883efae45ULL,
+ 0x3ecdd09c2899b324ULL,
+ 0x8de39c222b3eec4bULL,
+ 0xca49e6cb80d9137fULL,
+ 0x7967aa75837e4c10ULL,
+ 0x451d1318d716ed17ULL,
+ 0xf6335fa6d4b1b278ULL,
+ 0xb199254f7f564d4cULL,
+ 0x02b769f17cf11223ULL,
+ 0xb4f7f6ad86b4690bULL,
+ 0x07d9ba1385133664ULL,
+ 0x4073c0fa2ef4c950ULL,
+ 0xf35d8c442d53963fULL,
+ 0xcf273529793b3738ULL,
+ 0x7c0979977a9c6857ULL,
+ 0x3ba3037ed17b9763ULL,
+ 0x888d4fc0d2dcc80cULL,
+ 0x435671a479aad56dULL,
+ 0xf0783d1a7a0d8a02ULL,
+ 0xb7d247f3d1ea7536ULL,
+ 0x04fc0b4dd24d2a59ULL,
+ 0x3886b22086258b5eULL,
+ 0x8ba8fe9e8582d431ULL,
+ 0xcc0284772e652b05ULL,
+ 0x7f2cc8c92dc2746aULL,
+ 0x325b15e575e1c3d0ULL,
+ 0x8175595b76469cbfULL,
+ 0xc6df23b2dda1638bULL,
+ 0x75f16f0cde063ce4ULL,
+ 0x498bd6618a6e9de3ULL,
+ 0xfaa59adf89c9c28cULL,
+ 0xbd0fe036222e3db8ULL,
+ 0x0e21ac88218962d7ULL,
+ 0xc5fa92ec8aff7fb6ULL,
+ 0x76d4de52895820d9ULL,
+ 0x317ea4bb22bfdfedULL,
+ 0x8250e80521188082ULL,
+ 0xbe2a516875702185ULL,
+ 0x0d041dd676d77eeaULL,
+ 0x4aae673fdd3081deULL,
+ 0xf9802b81de97deb1ULL,
+ 0x4fc0b4dd24d2a599ULL,
+ 0xfceef8632775faf6ULL,
+ 0xbb44828a8c9205c2ULL,
+ 0x086ace348f355aadULL,
+ 0x34107759db5dfbaaULL,
+ 0x873e3be7d8faa4c5ULL,
+ 0xc094410e731d5bf1ULL,
+ 0x73ba0db070ba049eULL,
+ 0xb86133d4dbcc19ffULL,
+ 0x0b4f7f6ad86b4690ULL,
+ 0x4ce50583738cb9a4ULL,
+ 0xffcb493d702be6cbULL,
+ 0xc3b1f050244347ccULL,
+ 0x709fbcee27e418a3ULL,
+ 0x3735c6078c03e797ULL,
+ 0x841b8ab98fa4b8f8ULL,
+ 0xadda7c5f3c4488e3ULL,
+ 0x1ef430e13fe3d78cULL,
+ 0x595e4a08940428b8ULL,
+ 0xea7006b697a377d7ULL,
+ 0xd60abfdbc3cbd6d0ULL,
+ 0x6524f365c06c89bfULL,
+ 0x228e898c6b8b768bULL,
+ 0x91a0c532682c29e4ULL,
+ 0x5a7bfb56c35a3485ULL,
+ 0xe955b7e8c0fd6beaULL,
+ 0xaeffcd016b1a94deULL,
+ 0x1dd181bf68bdcbb1ULL,
+ 0x21ab38d23cd56ab6ULL,
+ 0x9285746c3f7235d9ULL,
+ 0xd52f0e859495caedULL,
+ 0x6601423b97329582ULL,
+ 0xd041dd676d77eeaaULL,
+ 0x636f91d96ed0b1c5ULL,
+ 0x24c5eb30c5374ef1ULL,
+ 0x97eba78ec690119eULL,
+ 0xab911ee392f8b099ULL,
+ 0x18bf525d915feff6ULL,
+ 0x5f1528b43ab810c2ULL,
+ 0xec3b640a391f4fadULL,
+ 0x27e05a6e926952ccULL,
+ 0x94ce16d091ce0da3ULL,
+ 0xd3646c393a29f297ULL,
+ 0x604a2087398eadf8ULL,
+ 0x5c3099ea6de60cffULL,
+ 0xef1ed5546e415390ULL,
+ 0xa8b4afbdc5a6aca4ULL,
+ 0x1b9ae303c601f3cbULL,
+ 0x56ed3e2f9e224471ULL,
+ 0xe5c372919d851b1eULL,
+ 0xa26908783662e42aULL,
+ 0x114744c635c5bb45ULL,
+ 0x2d3dfdab61ad1a42ULL,
+ 0x9e13b115620a452dULL,
+ 0xd9b9cbfcc9edba19ULL,
+ 0x6a978742ca4ae576ULL,
+ 0xa14cb926613cf817ULL,
+ 0x1262f598629ba778ULL,
+ 0x55c88f71c97c584cULL,
+ 0xe6e6c3cfcadb0723ULL,
+ 0xda9c7aa29eb3a624ULL,
+ 0x69b2361c9d14f94bULL,
+ 0x2e184cf536f3067fULL,
+ 0x9d36004b35545910ULL,
+ 0x2b769f17cf112238ULL,
+ 0x9858d3a9ccb67d57ULL,
+ 0xdff2a94067518263ULL,
+ 0x6cdce5fe64f6dd0cULL,
+ 0x50a65c93309e7c0bULL,
+ 0xe388102d33392364ULL,
+ 0xa4226ac498dedc50ULL,
+ 0x170c267a9b79833fULL,
+ 0xdcd7181e300f9e5eULL,
+ 0x6ff954a033a8c131ULL,
+ 0x28532e49984f3e05ULL,
+ 0x9b7d62f79be8616aULL,
+ 0xa707db9acf80c06dULL,
+ 0x14299724cc279f02ULL,
+ 0x5383edcd67c06036ULL,
+ 0xe0ada17364673f59ULL
+ }
+};
+
+
+/*
+ * crc64_ecma_seed - Initializes the CRC64 ECMA seed.
+ */
+u64 crc64_ecma_seed(void)
+{
+ return CRC64_ECMA_182.seed;
+}
+EXPORT_SYMBOL(crc64_ecma_seed);
+
+/*
+ * crc64_ecma - Computes the 64 bit ECMA CRC.
+ *
+ * pdata: pointer to the data to compute checksum for.
+ * nbytes: number of bytes in data buffer.
+ * seed: CRC seed.
+ */
+u64 crc64_ecma(u8 const *pdata, u32 nbytes, u64 seed)
+{
+ unsigned int i;
+ u64 crc = seed;
+
+ for (i = 0; i < nbytes; i++)
+ crc = CRC64_ECMA_182.table[(crc ^ pdata[i]) & CRC64_BYTE_MASK] ^
+ (crc >> 8);
+
+ return crc;
+}
+EXPORT_SYMBOL(crc64_ecma);
+
+MODULE_DESCRIPTION("CRC64 ECMA function");
+MODULE_AUTHOR("Freescale Semiconductor Inc.");
+MODULE_LICENSE("GPL");
diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c
index 0777c5a45fa0..f346715e2255 100644
--- a/lib/dynamic_queue_limits.c
+++ b/lib/dynamic_queue_limits.c
@@ -3,12 +3,12 @@
*
* Copyright (c) 2011, Tom Herbert <therbert@google.com>
*/
-#include <linux/module.h>
#include <linux/types.h>
-#include <linux/ctype.h>
#include <linux/kernel.h>
#include <linux/jiffies.h>
#include <linux/dynamic_queue_limits.h>
+#include <linux/compiler.h>
+#include <linux/export.h>
#define POSDIFF(A, B) ((int)((A) - (B)) > 0 ? (A) - (B) : 0)
#define AFTER_EQ(A, B) ((int)((A) - (B)) >= 0)
diff --git a/lib/gen_crc32table.c b/lib/gen_crc32table.c
index 71fcfcd96410..d83a372fa76f 100644
--- a/lib/gen_crc32table.c
+++ b/lib/gen_crc32table.c
@@ -109,7 +109,7 @@ int main(int argc, char** argv)
if (CRC_LE_BITS > 1) {
crc32init_le();
- printf("static u32 __cacheline_aligned "
+ printf("static const u32 ____cacheline_aligned "
"crc32table_le[%d][%d] = {",
LE_TABLE_ROWS, LE_TABLE_SIZE);
output_table(crc32table_le, LE_TABLE_ROWS,
@@ -119,7 +119,7 @@ int main(int argc, char** argv)
if (CRC_BE_BITS > 1) {
crc32init_be();
- printf("static u32 __cacheline_aligned "
+ printf("static const u32 ____cacheline_aligned "
"crc32table_be[%d][%d] = {",
BE_TABLE_ROWS, BE_TABLE_SIZE);
output_table(crc32table_be, LE_TABLE_ROWS,
@@ -128,7 +128,7 @@ int main(int argc, char** argv)
}
if (CRC_LE_BITS > 1) {
crc32cinit_le();
- printf("static u32 __cacheline_aligned "
+ printf("static const u32 ____cacheline_aligned "
"crc32ctable_le[%d][%d] = {",
LE_TABLE_ROWS, LE_TABLE_SIZE);
output_table(crc32ctable_le, LE_TABLE_ROWS,
diff --git a/lib/genalloc.c b/lib/genalloc.c
index 2e65d206b01c..0fe1cbe87700 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -34,7 +34,6 @@
#include <linux/rculist.h>
#include <linux/interrupt.h>
#include <linux/genalloc.h>
-#include <linux/of_address.h>
#include <linux/of_device.h>
static inline size_t chunk_size(const struct gen_pool_chunk *chunk)
@@ -415,7 +414,7 @@ bool addr_in_gen_pool(struct gen_pool *pool, unsigned long start,
size_t size)
{
bool found = false;
- unsigned long end = start + size;
+ unsigned long end = start + size - 1;
struct gen_pool_chunk *chunk;
rcu_read_lock();
diff --git a/lib/halfmd4.c b/lib/halfmd4.c
index 66d0ee8b7776..a8fe6274a13c 100644
--- a/lib/halfmd4.c
+++ b/lib/halfmd4.c
@@ -1,4 +1,4 @@
-#include <linux/kernel.h>
+#include <linux/compiler.h>
#include <linux/export.h>
#include <linux/cryptohash.h>
diff --git a/lib/hexdump.c b/lib/hexdump.c
index 270773b91923..7ea09699855d 100644
--- a/lib/hexdump.c
+++ b/lib/hexdump.c
@@ -97,63 +97,79 @@ EXPORT_SYMBOL(bin2hex);
*
* example output buffer:
* 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f @ABCDEFGHIJKLMNO
+ *
+ * Return:
+ * The amount of bytes placed in the buffer without terminating NUL. If the
+ * output was truncated, then the return value is the number of bytes
+ * (excluding the terminating NUL) which would have been written to the final
+ * string if enough space had been available.
*/
-void hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
- int groupsize, char *linebuf, size_t linebuflen,
- bool ascii)
+int hex_dump_to_buffer(const void *buf, size_t len, int rowsize, int groupsize,
+ char *linebuf, size_t linebuflen, bool ascii)
{
const u8 *ptr = buf;
+ int ngroups;
u8 ch;
int j, lx = 0;
int ascii_column;
+ int ret;
if (rowsize != 16 && rowsize != 32)
rowsize = 16;
- if (!len)
- goto nil;
if (len > rowsize) /* limit to one line at a time */
len = rowsize;
+ if (!is_power_of_2(groupsize) || groupsize > 8)
+ groupsize = 1;
if ((len % groupsize) != 0) /* no mixed size output */
groupsize = 1;
- switch (groupsize) {
- case 8: {
- const u64 *ptr8 = buf;
- int ngroups = len / groupsize;
+ ngroups = len / groupsize;
+ ascii_column = rowsize * 2 + rowsize / groupsize + 1;
- for (j = 0; j < ngroups; j++)
- lx += scnprintf(linebuf + lx, linebuflen - lx,
- "%s%16.16llx", j ? " " : "",
- (unsigned long long)*(ptr8 + j));
- ascii_column = 17 * ngroups + 2;
- break;
- }
+ if (!linebuflen)
+ goto overflow1;
- case 4: {
- const u32 *ptr4 = buf;
- int ngroups = len / groupsize;
+ if (!len)
+ goto nil;
- for (j = 0; j < ngroups; j++)
- lx += scnprintf(linebuf + lx, linebuflen - lx,
- "%s%8.8x", j ? " " : "", *(ptr4 + j));
- ascii_column = 9 * ngroups + 2;
- break;
- }
+ if (groupsize == 8) {
+ const u64 *ptr8 = buf;
- case 2: {
- const u16 *ptr2 = buf;
- int ngroups = len / groupsize;
+ for (j = 0; j < ngroups; j++) {
+ ret = snprintf(linebuf + lx, linebuflen - lx,
+ "%s%16.16llx", j ? " " : "",
+ (unsigned long long)*(ptr8 + j));
+ if (ret >= linebuflen - lx)
+ goto overflow1;
+ lx += ret;
+ }
+ } else if (groupsize == 4) {
+ const u32 *ptr4 = buf;
- for (j = 0; j < ngroups; j++)
- lx += scnprintf(linebuf + lx, linebuflen - lx,
- "%s%4.4x", j ? " " : "", *(ptr2 + j));
- ascii_column = 5 * ngroups + 2;
- break;
- }
+ for (j = 0; j < ngroups; j++) {
+ ret = snprintf(linebuf + lx, linebuflen - lx,
+ "%s%8.8x", j ? " " : "",
+ *(ptr4 + j));
+ if (ret >= linebuflen - lx)
+ goto overflow1;
+ lx += ret;
+ }
+ } else if (groupsize == 2) {
+ const u16 *ptr2 = buf;
- default:
- for (j = 0; (j < len) && (lx + 3) <= linebuflen; j++) {
+ for (j = 0; j < ngroups; j++) {
+ ret = snprintf(linebuf + lx, linebuflen - lx,
+ "%s%4.4x", j ? " " : "",
+ *(ptr2 + j));
+ if (ret >= linebuflen - lx)
+ goto overflow1;
+ lx += ret;
+ }
+ } else {
+ for (j = 0; j < len; j++) {
+ if (linebuflen < lx + 3)
+ goto overflow2;
ch = ptr[j];
linebuf[lx++] = hex_asc_hi(ch);
linebuf[lx++] = hex_asc_lo(ch);
@@ -161,21 +177,28 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
}
if (j)
lx--;
-
- ascii_column = 3 * rowsize + 2;
- break;
}
if (!ascii)
goto nil;
- while (lx < (linebuflen - 1) && lx < (ascii_column - 1))
+ while (lx < ascii_column) {
+ if (linebuflen < lx + 2)
+ goto overflow2;
linebuf[lx++] = ' ';
- for (j = 0; (j < len) && (lx + 2) < linebuflen; j++) {
+ }
+ for (j = 0; j < len; j++) {
+ if (linebuflen < lx + 2)
+ goto overflow2;
ch = ptr[j];
linebuf[lx++] = (isascii(ch) && isprint(ch)) ? ch : '.';
}
nil:
+ linebuf[lx] = '\0';
+ return lx;
+overflow2:
linebuf[lx++] = '\0';
+overflow1:
+ return ascii ? ascii_column + len : (groupsize * 2 + 1) * ngroups - 1;
}
EXPORT_SYMBOL(hex_dump_to_buffer);
diff --git a/lib/idr.c b/lib/idr.c
index e654aebd5f80..5335c43adf46 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -30,7 +30,6 @@
#include <linux/idr.h>
#include <linux/spinlock.h>
#include <linux/percpu.h>
-#include <linux/hardirq.h>
#define MAX_IDR_SHIFT (sizeof(int) * 8 - 1)
#define MAX_IDR_BIT (1U << MAX_IDR_SHIFT)
diff --git a/lib/interval_tree.c b/lib/interval_tree.c
index f367f9ad544c..c85f6600a5f8 100644
--- a/lib/interval_tree.c
+++ b/lib/interval_tree.c
@@ -1,7 +1,7 @@
-#include <linux/init.h>
#include <linux/interval_tree.h>
#include <linux/interval_tree_generic.h>
-#include <linux/module.h>
+#include <linux/compiler.h>
+#include <linux/export.h>
#define START(node) ((node)->start)
#define LAST(node) ((node)->last)
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 9ebf9e20de53..f6c2c1e7779c 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -20,7 +20,6 @@
#include <linux/export.h>
#include <linux/kmod.h>
#include <linux/slab.h>
-#include <linux/user_namespace.h>
#include <linux/socket.h>
#include <linux/skbuff.h>
#include <linux/netlink.h>
diff --git a/lib/lcm.c b/lib/lcm.c
index 51cc6b13cd52..e97dbd51e756 100644
--- a/lib/lcm.c
+++ b/lib/lcm.c
@@ -1,4 +1,4 @@
-#include <linux/kernel.h>
+#include <linux/compiler.h>
#include <linux/gcd.h>
#include <linux/export.h>
#include <linux/lcm.h>
diff --git a/lib/list_sort.c b/lib/list_sort.c
index 12bcba1c8612..b29015102698 100644
--- a/lib/list_sort.c
+++ b/lib/list_sort.c
@@ -2,9 +2,11 @@
#define pr_fmt(fmt) "list_sort_test: " fmt
#include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/bug.h>
+#include <linux/compiler.h>
+#include <linux/export.h>
+#include <linux/string.h>
#include <linux/list_sort.h>
-#include <linux/slab.h>
#include <linux/list.h>
#define MAX_LIST_LENGTH_BITS 20
@@ -146,6 +148,7 @@ EXPORT_SYMBOL(list_sort);
#ifdef CONFIG_TEST_LIST_SORT
+#include <linux/slab.h>
#include <linux/random.h>
/*
diff --git a/lib/llist.c b/lib/llist.c
index f76196d07409..0b0e9779d675 100644
--- a/lib/llist.c
+++ b/lib/llist.c
@@ -24,7 +24,6 @@
*/
#include <linux/kernel.h>
#include <linux/export.h>
-#include <linux/interrupt.h>
#include <linux/llist.h>
diff --git a/lib/md5.c b/lib/md5.c
index 958a3c15923c..bb0cd01d356d 100644
--- a/lib/md5.c
+++ b/lib/md5.c
@@ -1,4 +1,4 @@
-#include <linux/kernel.h>
+#include <linux/compiler.h>
#include <linux/export.h>
#include <linux/cryptohash.h>
diff --git a/lib/nlattr.c b/lib/nlattr.c
index 9c3e85ff0a6c..76a1b59523ab 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -9,7 +9,6 @@
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/jiffies.h>
-#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/string.h>
#include <linux/types.h>
diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c
index 93d145e5539c..f75715131f20 100644
--- a/lib/percpu_ida.c
+++ b/lib/percpu_ida.c
@@ -19,13 +19,10 @@
#include <linux/bug.h>
#include <linux/err.h>
#include <linux/export.h>
-#include <linux/hardirq.h>
-#include <linux/idr.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/percpu.h>
#include <linux/sched.h>
-#include <linux/slab.h>
#include <linux/string.h>
#include <linux/spinlock.h>
#include <linux/percpu_ida.h>
diff --git a/lib/plist.c b/lib/plist.c
index d408e774b746..3a30c53db061 100644
--- a/lib/plist.c
+++ b/lib/plist.c
@@ -25,7 +25,6 @@
#include <linux/bug.h>
#include <linux/plist.h>
-#include <linux/spinlock.h>
#ifdef CONFIG_DEBUG_PI_LIST
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 3291a8e37490..3d2aa27b845b 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -33,7 +33,7 @@
#include <linux/string.h>
#include <linux/bitops.h>
#include <linux/rcupdate.h>
-#include <linux/hardirq.h> /* in_interrupt() */
+#include <linux/preempt_mask.h> /* in_interrupt() */
/*
diff --git a/lib/show_mem.c b/lib/show_mem.c
index 7de89f4a36cf..adc98e1825ba 100644
--- a/lib/show_mem.c
+++ b/lib/show_mem.c
@@ -6,7 +6,6 @@
*/
#include <linux/mm.h>
-#include <linux/nmi.h>
#include <linux/quicklist.h>
#include <linux/cma.h>
diff --git a/lib/sort.c b/lib/sort.c
index 926d00429ed2..43c9fe73ae2e 100644
--- a/lib/sort.c
+++ b/lib/sort.c
@@ -4,10 +4,9 @@
* Jan 23 2005 Matt Mackall <mpm@selenic.com>
*/
-#include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/export.h>
#include <linux/sort.h>
-#include <linux/slab.h>
static void u32_swap(void *a, void *b, int size)
{
@@ -85,6 +84,7 @@ void sort(void *base, size_t num, size_t size,
EXPORT_SYMBOL(sort);
#if 0
+#include <linux/slab.h>
/* a simple boot-time regression test */
int cmpint(const void *a, const void *b)
diff --git a/lib/stmp_device.c b/lib/stmp_device.c
index 8ac9bcc4289a..a904656f4fd7 100644
--- a/lib/stmp_device.c
+++ b/lib/stmp_device.c
@@ -15,7 +15,8 @@
#include <linux/io.h>
#include <linux/errno.h>
#include <linux/delay.h>
-#include <linux/module.h>
+#include <linux/compiler.h>
+#include <linux/export.h>
#include <linux/stmp_device.h>
#define STMP_MODULE_CLKGATE (1 << 30)
diff --git a/lib/string.c b/lib/string.c
index 10063300b830..3206d0178296 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -58,14 +58,6 @@ int strncasecmp(const char *s1, const char *s2, size_t len)
}
EXPORT_SYMBOL(strncasecmp);
#endif
-#ifndef __HAVE_ARCH_STRNICMP
-#undef strnicmp
-int strnicmp(const char *s1, const char *s2, size_t len)
-{
- return strncasecmp(s1, s2, len);
-}
-EXPORT_SYMBOL(strnicmp);
-#endif
#ifndef __HAVE_ARCH_STRCASECMP
int strcasecmp(const char *s1, const char *s2)
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
index 58b78ba57439..8f8c4417f228 100644
--- a/lib/string_helpers.c
+++ b/lib/string_helpers.c
@@ -20,19 +20,18 @@
* @len: length of buffer
*
* This function returns a string formatted to 3 significant figures
- * giving the size in the required units. Returns 0 on success or
- * error on failure. @buf is always zero terminated.
+ * giving the size in the required units. @buf should have room for
+ * at least 9 bytes and will always be zero terminated.
*
*/
-int string_get_size(u64 size, const enum string_size_units units,
- char *buf, int len)
+void string_get_size(u64 size, const enum string_size_units units,
+ char *buf, int len)
{
static const char *const units_10[] = {
- "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB", NULL
+ "B", "kB", "MB", "GB", "TB", "PB", "EB"
};
static const char *const units_2[] = {
- "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB",
- NULL
+ "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB"
};
static const char *const *const units_str[] = {
[STRING_UNITS_10] = units_10,
@@ -43,13 +42,13 @@ int string_get_size(u64 size, const enum string_size_units units,
[STRING_UNITS_2] = 1024,
};
int i, j;
- u64 remainder = 0, sf_cap;
+ u32 remainder = 0, sf_cap;
char tmp[8];
tmp[0] = '\0';
i = 0;
if (size >= divisor[units]) {
- while (size >= divisor[units] && units_str[units][i]) {
+ while (size >= divisor[units]) {
remainder = do_div(size, divisor[units]);
i++;
}
@@ -60,17 +59,14 @@ int string_get_size(u64 size, const enum string_size_units units,
if (j) {
remainder *= 1000;
- do_div(remainder, divisor[units]);
- snprintf(tmp, sizeof(tmp), ".%03lld",
- (unsigned long long)remainder);
+ remainder /= divisor[units];
+ snprintf(tmp, sizeof(tmp), ".%03u", remainder);
tmp[j+1] = '\0';
}
}
- snprintf(buf, len, "%lld%s %s", (unsigned long long)size,
+ snprintf(buf, len, "%u%s %s", (u32)size,
tmp, units_str[units][i]);
-
- return 0;
}
EXPORT_SYMBOL(string_get_size);
diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c
index bb2b201d6ad0..e0af6ff73d14 100644
--- a/lib/strncpy_from_user.c
+++ b/lib/strncpy_from_user.c
@@ -1,4 +1,5 @@
-#include <linux/module.h>
+#include <linux/compiler.h>
+#include <linux/export.h>
#include <linux/uaccess.h>
#include <linux/kernel.h>
#include <linux/errno.h>
diff --git a/lib/test-hexdump.c b/lib/test-hexdump.c
new file mode 100644
index 000000000000..daf29a390a89
--- /dev/null
+++ b/lib/test-hexdump.c
@@ -0,0 +1,180 @@
+/*
+ * Test cases for lib/hexdump.c module.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/string.h>
+
+static const unsigned char data_b[] = {
+ '\xbe', '\x32', '\xdb', '\x7b', '\x0a', '\x18', '\x93', '\xb2', /* 00 - 07 */
+ '\x70', '\xba', '\xc4', '\x24', '\x7d', '\x83', '\x34', '\x9b', /* 08 - 0f */
+ '\xa6', '\x9c', '\x31', '\xad', '\x9c', '\x0f', '\xac', '\xe9', /* 10 - 17 */
+ '\x4c', '\xd1', '\x19', '\x99', '\x43', '\xb1', '\xaf', '\x0c', /* 18 - 1f */
+};
+
+static const unsigned char data_a[] = ".2.{....p..$}.4...1.....L...C...";
+
+static const char *test_data_1_le[] __initconst = {
+ "be", "32", "db", "7b", "0a", "18", "93", "b2",
+ "70", "ba", "c4", "24", "7d", "83", "34", "9b",
+ "a6", "9c", "31", "ad", "9c", "0f", "ac", "e9",
+ "4c", "d1", "19", "99", "43", "b1", "af", "0c",
+};
+
+static const char *test_data_2_le[] __initconst = {
+ "32be", "7bdb", "180a", "b293",
+ "ba70", "24c4", "837d", "9b34",
+ "9ca6", "ad31", "0f9c", "e9ac",
+ "d14c", "9919", "b143", "0caf",
+};
+
+static const char *test_data_4_le[] __initconst = {
+ "7bdb32be", "b293180a", "24c4ba70", "9b34837d",
+ "ad319ca6", "e9ac0f9c", "9919d14c", "0cafb143",
+};
+
+static const char *test_data_8_le[] __initconst = {
+ "b293180a7bdb32be", "9b34837d24c4ba70",
+ "e9ac0f9cad319ca6", "0cafb1439919d14c",
+};
+
+static void __init test_hexdump(size_t len, int rowsize, int groupsize,
+ bool ascii)
+{
+ char test[32 * 3 + 2 + 32 + 1];
+ char real[32 * 3 + 2 + 32 + 1];
+ char *p;
+ const char **result;
+ size_t l = len;
+ int gs = groupsize, rs = rowsize;
+ unsigned int i;
+
+ hex_dump_to_buffer(data_b, l, rs, gs, real, sizeof(real), ascii);
+
+ if (rs != 16 && rs != 32)
+ rs = 16;
+
+ if (l > rs)
+ l = rs;
+
+ if (!is_power_of_2(gs) || gs > 8 || (len % gs != 0))
+ gs = 1;
+
+ if (gs == 8)
+ result = test_data_8_le;
+ else if (gs == 4)
+ result = test_data_4_le;
+ else if (gs == 2)
+ result = test_data_2_le;
+ else
+ result = test_data_1_le;
+
+ memset(test, ' ', sizeof(test));
+
+ /* hex dump */
+ p = test;
+ for (i = 0; i < l / gs; i++) {
+ const char *q = *result++;
+ size_t amount = strlen(q);
+
+ strncpy(p, q, amount);
+ p += amount + 1;
+ }
+ if (i)
+ p--;
+
+ /* ASCII part */
+ if (ascii) {
+ p = test + rs * 2 + rs / gs + 1;
+ strncpy(p, data_a, l);
+ p += l;
+ }
+
+ *p = '\0';
+
+ if (strcmp(test, real)) {
+ pr_err("Len: %zu row: %d group: %d\n", len, rowsize, groupsize);
+ pr_err("Result: '%s'\n", real);
+ pr_err("Expect: '%s'\n", test);
+ }
+}
+
+static void __init test_hexdump_set(int rowsize, bool ascii)
+{
+ size_t d = min_t(size_t, sizeof(data_b), rowsize);
+ size_t len = get_random_int() % d + 1;
+
+ test_hexdump(len, rowsize, 4, ascii);
+ test_hexdump(len, rowsize, 2, ascii);
+ test_hexdump(len, rowsize, 8, ascii);
+ test_hexdump(len, rowsize, 1, ascii);
+}
+
+static void __init test_hexdump_overflow(bool ascii)
+{
+ char buf[56];
+ const char *t = test_data_1_le[0];
+ size_t l = get_random_int() % sizeof(buf);
+ bool a;
+ int e, r;
+
+ memset(buf, ' ', sizeof(buf));
+
+ r = hex_dump_to_buffer(data_b, 1, 16, 1, buf, l, ascii);
+
+ if (ascii)
+ e = 50;
+ else
+ e = 2;
+ buf[e + 2] = '\0';
+
+ if (!l) {
+ a = r == e && buf[0] == ' ';
+ } else if (l < 3) {
+ a = r == e && buf[0] == '\0';
+ } else if (l < 4) {
+ a = r == e && !strcmp(buf, t);
+ } else if (ascii) {
+ if (l < 51)
+ a = r == e && buf[l - 1] == '\0' && buf[l - 2] == ' ';
+ else
+ a = r == e && buf[50] == '\0' && buf[49] == '.';
+ } else {
+ a = r == e && buf[e] == '\0';
+ }
+
+ if (!a) {
+ pr_err("Len: %zu rc: %u strlen: %zu\n", l, r, strlen(buf));
+ pr_err("Result: '%s'\n", buf);
+ }
+}
+
+static int __init test_hexdump_init(void)
+{
+ unsigned int i;
+ int rowsize;
+
+ pr_info("Running tests...\n");
+
+ rowsize = (get_random_int() % 2 + 1) * 16;
+ for (i = 0; i < 16; i++)
+ test_hexdump_set(rowsize, false);
+
+ rowsize = (get_random_int() % 2 + 1) * 16;
+ for (i = 0; i < 16; i++)
+ test_hexdump_set(rowsize, true);
+
+ for (i = 0; i < 16; i++)
+ test_hexdump_overflow(false);
+
+ for (i = 0; i < 16; i++)
+ test_hexdump_overflow(true);
+
+ return -EINVAL;
+}
+module_init(test_hexdump_init);
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index ec337f64f52d..d57551c031f4 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1240,6 +1240,21 @@ char *address_val(char *buf, char *end, const void *addr,
return number(buf, end, num, spec);
}
+static noinline_for_stack
+char *comm_name(char *buf, char *end, struct task_struct *tsk,
+ struct printf_spec spec, const char *fmt)
+{
+ char name[TASK_COMM_LEN];
+
+ /* Caller can pass NULL instead of current. */
+ if (!tsk)
+ tsk = current;
+ /* Not using get_task_comm() in case I'm in IRQ context. */
+ memcpy(name, tsk->comm, TASK_COMM_LEN);
+ name[sizeof(name) - 1] = '\0';
+ return string(buf, end, name, spec);
+}
+
int kptr_restrict __read_mostly;
/*
@@ -1318,6 +1333,7 @@ int kptr_restrict __read_mostly;
* (default assumed to be phys_addr_t, passed by reference)
* - 'd[234]' For a dentry name (optionally 2-4 last components)
* - 'D[234]' Same as 'd' but for a struct file
+ * - 'T' task_struct->comm
*
* Note: The difference between 'S' and 'F' is that on ia64 and ppc64
* function pointers are really function descriptors, which contain a
@@ -1329,7 +1345,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
{
int default_width = 2 * sizeof(void *) + (spec.flags & SPECIAL ? 2 : 0);
- if (!ptr && *fmt != 'K') {
+ if (!ptr && *fmt != 'K' && *fmt != 'T') {
/*
* Print (null) with the same width as a pointer so it makes
* tabular output look nice.
@@ -1459,6 +1475,8 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
return dentry_name(buf, end,
((const struct file *)ptr)->f_path.dentry,
spec, fmt);
+ case 'T':
+ return comm_name(buf, end, ptr, spec, fmt);
}
spec.flags |= SMALL;
if (spec.field_width == -1) {
diff --git a/mm/Kconfig b/mm/Kconfig
index 0252ddfdb7f4..a03131b6ba8e 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -602,6 +602,16 @@ config PGTABLE_MAPPING
You can check speed with zsmalloc benchmark:
https://github.com/spartacus06/zsmapbench
+config ZSMALLOC_STAT
+ bool "Export zsmalloc statistics"
+ depends on ZSMALLOC
+ select DEBUG_FS
+ help
+ This option enables code in the zsmalloc to collect various
+ statistics about whats happening in zsmalloc and exports that
+ information to userspace via debugfs.
+ If unsure, say N.
+
config GENERIC_EARLY_IOREMAP
bool
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index 56badfc4810a..957d3da53ddd 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -14,7 +14,6 @@ config DEBUG_PAGEALLOC
depends on !KMEMCHECK
select PAGE_EXTENSION
select PAGE_POISONING if !ARCH_SUPPORTS_DEBUG_PAGEALLOC
- select PAGE_GUARD if ARCH_SUPPORTS_DEBUG_PAGEALLOC
---help---
Unmap pages from the kernel linear mapping after free_pages().
This results in a large slowdown, but helps to find certain types
@@ -27,13 +26,5 @@ config DEBUG_PAGEALLOC
that would result in incorrect warnings of memory corruption after
a resume because free pages are not saved to the suspend image.
-config WANT_PAGE_DEBUG_FLAGS
- bool
-
config PAGE_POISONING
bool
- select WANT_PAGE_DEBUG_FLAGS
-
-config PAGE_GUARD
- bool
- select WANT_PAGE_DEBUG_FLAGS
diff --git a/mm/Makefile b/mm/Makefile
index 4bf586e66378..3548460ab7b6 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -3,7 +3,7 @@
#
mmu-y := nommu.o
-mmu-$(CONFIG_MMU) := fremap.o gup.o highmem.o memory.o mincore.o \
+mmu-$(CONFIG_MMU) := gup.o highmem.o memory.o mincore.o \
mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \
vmalloc.o pagewalk.o pgtable-generic.o
diff --git a/mm/debug.c b/mm/debug.c
index 0e58f3211f89..d69cb5a7ba9a 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -130,7 +130,6 @@ static const struct trace_print_flags vmaflags_names[] = {
{VM_ACCOUNT, "account" },
{VM_NORESERVE, "noreserve" },
{VM_HUGETLB, "hugetlb" },
- {VM_NONLINEAR, "nonlinear" },
#if defined(CONFIG_X86)
{VM_PAT, "pat" },
#elif defined(CONFIG_PPC)
diff --git a/mm/filemap.c b/mm/filemap.c
index 673e4581a2e5..bf7a27142704 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2087,7 +2087,6 @@ const struct vm_operations_struct generic_file_vm_ops = {
.fault = filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = filemap_page_mkwrite,
- .remap_pages = generic_file_remap_pages,
};
/* This is used for a general mmap of a disk file */
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 0d105aeff82f..70c09da1a419 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -301,7 +301,6 @@ out:
static const struct vm_operations_struct xip_file_vm_ops = {
.fault = xip_file_fault,
.page_mkwrite = filemap_page_mkwrite,
- .remap_pages = generic_file_remap_pages,
};
int xip_file_mmap(struct file * file, struct vm_area_struct * vma)
diff --git a/mm/fremap.c b/mm/fremap.c
deleted file mode 100644
index 2805d71cf476..000000000000
--- a/mm/fremap.c
+++ /dev/null
@@ -1,283 +0,0 @@
-/*
- * linux/mm/fremap.c
- *
- * Explicit pagetable population and nonlinear (random) mappings support.
- *
- * started by Ingo Molnar, Copyright (C) 2002, 2003
- */
-#include <linux/export.h>
-#include <linux/backing-dev.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/file.h>
-#include <linux/mman.h>
-#include <linux/pagemap.h>
-#include <linux/swapops.h>
-#include <linux/rmap.h>
-#include <linux/syscalls.h>
-#include <linux/mmu_notifier.h>
-
-#include <asm/mmu_context.h>
-#include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
-
-#include "internal.h"
-
-static int mm_counter(struct page *page)
-{
- return PageAnon(page) ? MM_ANONPAGES : MM_FILEPAGES;
-}
-
-static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long addr, pte_t *ptep)
-{
- pte_t pte = *ptep;
- struct page *page;
- swp_entry_t entry;
-
- if (pte_present(pte)) {
- flush_cache_page(vma, addr, pte_pfn(pte));
- pte = ptep_clear_flush_notify(vma, addr, ptep);
- page = vm_normal_page(vma, addr, pte);
- if (page) {
- if (pte_dirty(pte))
- set_page_dirty(page);
- update_hiwater_rss(mm);
- dec_mm_counter(mm, mm_counter(page));
- page_remove_rmap(page);
- page_cache_release(page);
- }
- } else { /* zap_pte() is not called when pte_none() */
- if (!pte_file(pte)) {
- update_hiwater_rss(mm);
- entry = pte_to_swp_entry(pte);
- if (non_swap_entry(entry)) {
- if (is_migration_entry(entry)) {
- page = migration_entry_to_page(entry);
- dec_mm_counter(mm, mm_counter(page));
- }
- } else {
- free_swap_and_cache(entry);
- dec_mm_counter(mm, MM_SWAPENTS);
- }
- }
- pte_clear_not_present_full(mm, addr, ptep, 0);
- }
-}
-
-/*
- * Install a file pte to a given virtual memory address, release any
- * previously existing mapping.
- */
-static int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long addr, unsigned long pgoff, pgprot_t prot)
-{
- int err = -ENOMEM;
- pte_t *pte, ptfile;
- spinlock_t *ptl;
-
- pte = get_locked_pte(mm, addr, &ptl);
- if (!pte)
- goto out;
-
- ptfile = pgoff_to_pte(pgoff);
-
- if (!pte_none(*pte))
- zap_pte(mm, vma, addr, pte);
-
- set_pte_at(mm, addr, pte, pte_file_mksoft_dirty(ptfile));
- /*
- * We don't need to run update_mmu_cache() here because the "file pte"
- * being installed by install_file_pte() is not a real pte - it's a
- * non-present entry (like a swap entry), noting what file offset should
- * be mapped there when there's a fault (in a non-linear vma where
- * that's not obvious).
- */
- pte_unmap_unlock(pte, ptl);
- err = 0;
-out:
- return err;
-}
-
-int generic_file_remap_pages(struct vm_area_struct *vma, unsigned long addr,
- unsigned long size, pgoff_t pgoff)
-{
- struct mm_struct *mm = vma->vm_mm;
- int err;
-
- do {
- err = install_file_pte(mm, vma, addr, pgoff, vma->vm_page_prot);
- if (err)
- return err;
-
- size -= PAGE_SIZE;
- addr += PAGE_SIZE;
- pgoff++;
- } while (size);
-
- return 0;
-}
-EXPORT_SYMBOL(generic_file_remap_pages);
-
-/**
- * sys_remap_file_pages - remap arbitrary pages of an existing VM_SHARED vma
- * @start: start of the remapped virtual memory range
- * @size: size of the remapped virtual memory range
- * @prot: new protection bits of the range (see NOTE)
- * @pgoff: to-be-mapped page of the backing store file
- * @flags: 0 or MAP_NONBLOCKED - the later will cause no IO.
- *
- * sys_remap_file_pages remaps arbitrary pages of an existing VM_SHARED vma
- * (shared backing store file).
- *
- * This syscall works purely via pagetables, so it's the most efficient
- * way to map the same (large) file into a given virtual window. Unlike
- * mmap()/mremap() it does not create any new vmas. The new mappings are
- * also safe across swapout.
- *
- * NOTE: the @prot parameter right now is ignored (but must be zero),
- * and the vma's default protection is used. Arbitrary protections
- * might be implemented in the future.
- */
-SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
- unsigned long, prot, unsigned long, pgoff, unsigned long, flags)
-{
- struct mm_struct *mm = current->mm;
- struct address_space *mapping;
- struct vm_area_struct *vma;
- int err = -EINVAL;
- int has_write_lock = 0;
- vm_flags_t vm_flags = 0;
-
- pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. "
- "See Documentation/vm/remap_file_pages.txt.\n",
- current->comm, current->pid);
-
- if (prot)
- return err;
- /*
- * Sanitize the syscall parameters:
- */
- start = start & PAGE_MASK;
- size = size & PAGE_MASK;
-
- /* Does the address range wrap, or is the span zero-sized? */
- if (start + size <= start)
- return err;
-
- /* Does pgoff wrap? */
- if (pgoff + (size >> PAGE_SHIFT) < pgoff)
- return err;
-
- /* Can we represent this offset inside this architecture's pte's? */
-#if PTE_FILE_MAX_BITS < BITS_PER_LONG
- if (pgoff + (size >> PAGE_SHIFT) >= (1UL << PTE_FILE_MAX_BITS))
- return err;
-#endif
-
- /* We need down_write() to change vma->vm_flags. */
- down_read(&mm->mmap_sem);
- retry:
- vma = find_vma(mm, start);
-
- /*
- * Make sure the vma is shared, that it supports prefaulting,
- * and that the remapped range is valid and fully within
- * the single existing vma.
- */
- if (!vma || !(vma->vm_flags & VM_SHARED))
- goto out;
-
- if (!vma->vm_ops || !vma->vm_ops->remap_pages)
- goto out;
-
- if (start < vma->vm_start || start + size > vma->vm_end)
- goto out;
-
- /* Must set VM_NONLINEAR before any pages are populated. */
- if (!(vma->vm_flags & VM_NONLINEAR)) {
- /*
- * vm_private_data is used as a swapout cursor
- * in a VM_NONLINEAR vma.
- */
- if (vma->vm_private_data)
- goto out;
-
- /* Don't need a nonlinear mapping, exit success */
- if (pgoff == linear_page_index(vma, start)) {
- err = 0;
- goto out;
- }
-
- if (!has_write_lock) {
-get_write_lock:
- up_read(&mm->mmap_sem);
- down_write(&mm->mmap_sem);
- has_write_lock = 1;
- goto retry;
- }
- mapping = vma->vm_file->f_mapping;
- /*
- * page_mkclean doesn't work on nonlinear vmas, so if
- * dirty pages need to be accounted, emulate with linear
- * vmas.
- */
- if (mapping_cap_account_dirty(mapping)) {
- unsigned long addr;
- struct file *file = get_file(vma->vm_file);
- /* mmap_region may free vma; grab the info now */
- vm_flags = vma->vm_flags;
-
- addr = mmap_region(file, start, size, vm_flags, pgoff);
- fput(file);
- if (IS_ERR_VALUE(addr)) {
- err = addr;
- } else {
- BUG_ON(addr != start);
- err = 0;
- }
- goto out_freed;
- }
- i_mmap_lock_write(mapping);
- flush_dcache_mmap_lock(mapping);
- vma->vm_flags |= VM_NONLINEAR;
- vma_interval_tree_remove(vma, &mapping->i_mmap);
- vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
- flush_dcache_mmap_unlock(mapping);
- i_mmap_unlock_write(mapping);
- }
-
- if (vma->vm_flags & VM_LOCKED) {
- /*
- * drop PG_Mlocked flag for over-mapped range
- */
- if (!has_write_lock)
- goto get_write_lock;
- vm_flags = vma->vm_flags;
- munlock_vma_pages_range(vma, start, start + size);
- vma->vm_flags = vm_flags;
- }
-
- mmu_notifier_invalidate_range_start(mm, start, start + size);
- err = vma->vm_ops->remap_pages(vma, start, size, pgoff);
- mmu_notifier_invalidate_range_end(mm, start, start + size);
-
- /*
- * We can't clear VM_NONLINEAR because we'd have to do
- * it after ->populate completes, and that would prevent
- * downgrading the lock. (Locks can't be upgraded).
- */
-
-out:
- if (vma)
- vm_flags = vma->vm_flags;
-out_freed:
- if (likely(!has_write_lock))
- up_read(&mm->mmap_sem);
- else
- up_write(&mm->mmap_sem);
- if (!err && ((vm_flags & VM_LOCKED) || !(flags & MAP_NONBLOCK)))
- mm_populate(start, size);
-
- return err;
-}
diff --git a/mm/gup.c b/mm/gup.c
index bed30efad77c..27096dc2c28a 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -55,7 +55,7 @@ retry:
*/
if (likely(!(flags & FOLL_MIGRATION)))
goto no_page;
- if (pte_none(pte) || pte_file(pte))
+ if (pte_none(pte))
goto no_page;
entry = pte_to_swp_entry(pte);
if (!is_migration_entry(entry))
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 817a875f2b8c..b29c48708b89 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -171,12 +171,7 @@ static int start_khugepaged(void)
}
static atomic_t huge_zero_refcount;
-static struct page *huge_zero_page __read_mostly;
-
-static inline bool is_huge_zero_page(struct page *page)
-{
- return ACCESS_ONCE(huge_zero_page) == page;
-}
+struct page *huge_zero_page __read_mostly;
static inline bool is_huge_zero_pmd(pmd_t pmd)
{
@@ -1383,6 +1378,36 @@ out:
return 0;
}
+int madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
+ pmd_t *pmd, unsigned long addr)
+
+{
+ spinlock_t *ptl;
+ struct mm_struct *mm = tlb->mm;
+ int ret = 1;
+
+ if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
+ struct page *page;
+ pmd_t orig_pmd;
+
+ orig_pmd = pmdp_get_and_clear(mm, addr, pmd);
+
+ /* No hugepage in swapcache */
+ page = pmd_page(orig_pmd);
+ VM_BUG_ON_PAGE(PageSwapCache(page), page);
+
+ orig_pmd = pmd_mkold(orig_pmd);
+ orig_pmd = pmd_mkclean(orig_pmd);
+
+ set_pmd_at(mm, addr, pmd, orig_pmd);
+ tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
+ spin_unlock(ptl);
+ ret = 0;
+ }
+
+ return ret;
+}
+
int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
pmd_t *pmd, unsigned long addr)
{
@@ -1620,6 +1645,11 @@ unlock:
return NULL;
}
+int pmd_freeable(pmd_t pmd)
+{
+ return !pmd_dirty(pmd);
+}
+
static int __split_huge_page_splitting(struct page *page,
struct vm_area_struct *vma,
unsigned long address)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 85032de5e20f..be0e5d0db5ec 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -35,7 +35,7 @@
#include <linux/node.h>
#include "internal.h"
-unsigned long hugepages_treat_as_movable;
+int hugepages_treat_as_movable;
int hugetlb_max_hstate __read_mostly;
unsigned int default_hstate_idx;
diff --git a/mm/interval_tree.c b/mm/interval_tree.c
index 8da581fa9060..f2c2492681bf 100644
--- a/mm/interval_tree.c
+++ b/mm/interval_tree.c
@@ -21,8 +21,8 @@ static inline unsigned long vma_last_pgoff(struct vm_area_struct *v)
return v->vm_pgoff + ((v->vm_end - v->vm_start) >> PAGE_SHIFT) - 1;
}
-INTERVAL_TREE_DEFINE(struct vm_area_struct, shared.linear.rb,
- unsigned long, shared.linear.rb_subtree_last,
+INTERVAL_TREE_DEFINE(struct vm_area_struct, shared.rb,
+ unsigned long, shared.rb_subtree_last,
vma_start_pgoff, vma_last_pgoff,, vma_interval_tree)
/* Insert node immediately after prev in the interval tree */
@@ -36,26 +36,26 @@ void vma_interval_tree_insert_after(struct vm_area_struct *node,
VM_BUG_ON_VMA(vma_start_pgoff(node) != vma_start_pgoff(prev), node);
- if (!prev->shared.linear.rb.rb_right) {
+ if (!prev->shared.rb.rb_right) {
parent = prev;
- link = &prev->shared.linear.rb.rb_right;
+ link = &prev->shared.rb.rb_right;
} else {
- parent = rb_entry(prev->shared.linear.rb.rb_right,
- struct vm_area_struct, shared.linear.rb);
- if (parent->shared.linear.rb_subtree_last < last)
- parent->shared.linear.rb_subtree_last = last;
- while (parent->shared.linear.rb.rb_left) {
- parent = rb_entry(parent->shared.linear.rb.rb_left,
- struct vm_area_struct, shared.linear.rb);
- if (parent->shared.linear.rb_subtree_last < last)
- parent->shared.linear.rb_subtree_last = last;
+ parent = rb_entry(prev->shared.rb.rb_right,
+ struct vm_area_struct, shared.rb);
+ if (parent->shared.rb_subtree_last < last)
+ parent->shared.rb_subtree_last = last;
+ while (parent->shared.rb.rb_left) {
+ parent = rb_entry(parent->shared.rb.rb_left,
+ struct vm_area_struct, shared.rb);
+ if (parent->shared.rb_subtree_last < last)
+ parent->shared.rb_subtree_last = last;
}
- link = &parent->shared.linear.rb.rb_left;
+ link = &parent->shared.rb.rb_left;
}
- node->shared.linear.rb_subtree_last = last;
- rb_link_node(&node->shared.linear.rb, &parent->shared.linear.rb, link);
- rb_insert_augmented(&node->shared.linear.rb, root,
+ node->shared.rb_subtree_last = last;
+ rb_link_node(&node->shared.rb, &parent->shared.rb, link);
+ rb_insert_augmented(&node->shared.rb, root,
&vma_interval_tree_augment);
}
diff --git a/mm/ksm.c b/mm/ksm.c
index d247efab5073..2d12da1fda05 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1748,7 +1748,7 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
*/
if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE |
VM_PFNMAP | VM_IO | VM_DONTEXPAND |
- VM_HUGETLB | VM_NONLINEAR | VM_MIXEDMAP))
+ VM_HUGETLB | VM_MIXEDMAP))
return 0; /* just ignore the advice */
#ifdef VM_SAO
diff --git a/mm/madvise.c b/mm/madvise.c
index a271adc93289..d03e1bbd3af3 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -19,6 +19,14 @@
#include <linux/blkdev.h>
#include <linux/swap.h>
#include <linux/swapops.h>
+#include <linux/mmu_notifier.h>
+
+#include <asm/tlb.h>
+
+struct madvise_free_private {
+ struct vm_area_struct *vma;
+ struct mmu_gather *tlb;
+};
/*
* Any behaviour which results in changes to the vma->vm_flags needs to
@@ -31,6 +39,7 @@ static int madvise_need_mmap_write(int behavior)
case MADV_REMOVE:
case MADV_WILLNEED:
case MADV_DONTNEED:
+ case MADV_FREE:
return 0;
default:
/* be safe, default to 1. list exceptions explicitly */
@@ -155,7 +164,7 @@ static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start,
pte = *(orig_pte + ((index - start) / PAGE_SIZE));
pte_unmap_unlock(orig_pte, ptl);
- if (pte_present(pte) || pte_none(pte) || pte_file(pte))
+ if (pte_present(pte) || pte_none(pte))
continue;
entry = pte_to_swp_entry(pte);
if (unlikely(non_swap_entry(entry)))
@@ -251,6 +260,138 @@ static long madvise_willneed(struct vm_area_struct *vma,
return 0;
}
+static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
+ unsigned long end, struct mm_walk *walk)
+
+{
+ struct madvise_free_private *fp = walk->private;
+ struct mmu_gather *tlb = fp->tlb;
+ struct mm_struct *mm = tlb->mm;
+ struct vm_area_struct *vma = fp->vma;
+ spinlock_t *ptl;
+ pte_t *pte, ptent;
+ struct page *page;
+ unsigned long next;
+
+ next = pmd_addr_end(addr, end);
+ if (pmd_trans_huge(*pmd)) {
+ if (next - addr != HPAGE_PMD_SIZE)
+ split_huge_page_pmd(vma, addr, pmd);
+ else if (!madvise_free_huge_pmd(tlb, vma, pmd, addr))
+ goto next;
+ /* fall through */
+ }
+
+ if (pmd_trans_unstable(pmd))
+ return 0;
+
+ pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+ arch_enter_lazy_mmu_mode();
+ for (; addr != end; pte++, addr += PAGE_SIZE) {
+ ptent = *pte;
+
+ if (!pte_present(ptent))
+ continue;
+
+ page = vm_normal_page(vma, addr, ptent);
+ if (!page)
+ continue;
+
+ if (PageSwapCache(page)) {
+ if (!trylock_page(page))
+ continue;
+
+ if (!try_to_free_swap(page)) {
+ unlock_page(page);
+ continue;
+ }
+
+ ClearPageDirty(page);
+ unlock_page(page);
+ }
+
+ /*
+ * Some of architecture(ex, PPC) don't update TLB
+ * with set_pte_at and tlb_remove_tlb_entry so for
+ * the portability, remap the pte with old|clean
+ * after pte clearing.
+ */
+ ptent = ptep_get_and_clear_full(mm, addr, pte,
+ tlb->fullmm);
+ ptent = pte_mkold(ptent);
+ ptent = pte_mkclean(ptent);
+ set_pte_at(mm, addr, pte, ptent);
+ tlb_remove_tlb_entry(tlb, pte, addr);
+ }
+ arch_leave_lazy_mmu_mode();
+ pte_unmap_unlock(pte - 1, ptl);
+next:
+ cond_resched();
+ return 0;
+}
+
+static void madvise_free_page_range(struct mmu_gather *tlb,
+ struct vm_area_struct *vma,
+ unsigned long addr, unsigned long end)
+{
+ struct madvise_free_private fp = {
+ .vma = vma,
+ .tlb = tlb,
+ };
+
+ struct mm_walk free_walk = {
+ .pmd_entry = madvise_free_pte_range,
+ .mm = vma->vm_mm,
+ .private = &fp,
+ };
+
+ BUG_ON(addr >= end);
+ tlb_start_vma(tlb, vma);
+ walk_page_range(addr, end, &free_walk);
+ tlb_end_vma(tlb, vma);
+}
+
+static int madvise_free_single_vma(struct vm_area_struct *vma,
+ unsigned long start_addr, unsigned long end_addr)
+{
+ unsigned long start, end;
+ struct mm_struct *mm = vma->vm_mm;
+ struct mmu_gather tlb;
+
+ if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))
+ return -EINVAL;
+
+ /* MADV_FREE works for only anon vma at the moment */
+ if (vma->vm_file)
+ return -EINVAL;
+
+ start = max(vma->vm_start, start_addr);
+ if (start >= vma->vm_end)
+ return -EINVAL;
+ end = min(vma->vm_end, end_addr);
+ if (end <= vma->vm_start)
+ return -EINVAL;
+
+ lru_add_drain();
+ tlb_gather_mmu(&tlb, mm, start, end);
+ update_hiwater_rss(mm);
+
+ mmu_notifier_invalidate_range_start(mm, start, end);
+ madvise_free_page_range(&tlb, vma, start, end);
+ mmu_notifier_invalidate_range_end(mm, start, end);
+ tlb_finish_mmu(&tlb, start, end);
+
+ return 0;
+}
+
+static long madvise_free(struct vm_area_struct *vma,
+ struct vm_area_struct **prev,
+ unsigned long start, unsigned long end)
+{
+ *prev = vma;
+ return madvise_free_single_vma(vma, start, end);
+}
+
/*
* Application no longer needs these pages. If the pages are dirty,
* it's OK to just throw them away. The app will be more careful about
@@ -278,14 +419,7 @@ static long madvise_dontneed(struct vm_area_struct *vma,
if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))
return -EINVAL;
- if (unlikely(vma->vm_flags & VM_NONLINEAR)) {
- struct zap_details details = {
- .nonlinear_vma = vma,
- .last_index = ULONG_MAX,
- };
- zap_page_range(vma, start, end - start, &details);
- } else
- zap_page_range(vma, start, end - start, NULL);
+ zap_page_range(vma, start, end - start, NULL);
return 0;
}
@@ -303,7 +437,7 @@ static long madvise_remove(struct vm_area_struct *vma,
*prev = NULL; /* tell sys_madvise we drop mmap_sem */
- if (vma->vm_flags & (VM_LOCKED|VM_NONLINEAR|VM_HUGETLB))
+ if (vma->vm_flags & (VM_LOCKED | VM_HUGETLB))
return -EINVAL;
f = vma->vm_file;
@@ -381,6 +515,14 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
return madvise_remove(vma, prev, start, end);
case MADV_WILLNEED:
return madvise_willneed(vma, prev, start, end);
+ case MADV_FREE:
+ /*
+ * XXX: In this implementation, MADV_FREE works like
+ * MADV_DONTNEED on swapless system or full swap.
+ */
+ if (get_nr_swap_pages() > 0)
+ return madvise_free(vma, prev, start, end);
+ /* passthrough */
case MADV_DONTNEED:
return madvise_dontneed(vma, prev, start, end);
default:
@@ -400,6 +542,7 @@ madvise_behavior_valid(int behavior)
case MADV_REMOVE:
case MADV_WILLNEED:
case MADV_DONTNEED:
+ case MADV_FREE:
#ifdef CONFIG_KSM
case MADV_MERGEABLE:
case MADV_UNMERGEABLE:
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ef91e856c7e4..bfa1a849d113 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -325,9 +325,11 @@ struct mem_cgroup {
/*
* set > 0 if pages under this cgroup are moving to other cgroup.
*/
- atomic_t moving_account;
+ atomic_t moving_account;
/* taken only while moving_account > 0 */
- spinlock_t move_lock;
+ spinlock_t move_lock;
+ struct task_struct *move_lock_task;
+ unsigned long move_lock_flags;
/*
* percpu counter.
*/
@@ -343,9 +345,6 @@ struct mem_cgroup {
struct cg_proto tcp_mem;
#endif
#if defined(CONFIG_MEMCG_KMEM)
- /* analogous to slab_common's slab_caches list, but per-memcg;
- * protected by memcg_slab_mutex */
- struct list_head memcg_slab_caches;
/* Index in the kmem_cache->memcg_params->memcg_caches array */
int kmemcg_id;
#endif
@@ -1980,34 +1979,33 @@ cleanup:
/**
* mem_cgroup_begin_page_stat - begin a page state statistics transaction
* @page: page that is going to change accounted state
- * @locked: &memcg->move_lock slowpath was taken
- * @flags: IRQ-state flags for &memcg->move_lock
*
* This function must mark the beginning of an accounted page state
* change to prevent double accounting when the page is concurrently
* being moved to another memcg:
*
- * memcg = mem_cgroup_begin_page_stat(page, &locked, &flags);
+ * memcg = mem_cgroup_begin_page_stat(page);
* if (TestClearPageState(page))
* mem_cgroup_update_page_stat(memcg, state, -1);
- * mem_cgroup_end_page_stat(memcg, locked, flags);
- *
- * The RCU lock is held throughout the transaction. The fast path can
- * get away without acquiring the memcg->move_lock (@locked is false)
- * because page moving starts with an RCU grace period.
- *
- * The RCU lock also protects the memcg from being freed when the page
- * state that is going to change is the only thing preventing the page
- * from being uncharged. E.g. end-writeback clearing PageWriteback(),
- * which allows migration to go ahead and uncharge the page before the
- * account transaction might be complete.
+ * mem_cgroup_end_page_stat(memcg);
*/
-struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page,
- bool *locked,
- unsigned long *flags)
+struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page)
{
struct mem_cgroup *memcg;
+ unsigned long flags;
+ /*
+ * The RCU lock is held throughout the transaction. The fast
+ * path can get away without acquiring the memcg->move_lock
+ * because page moving starts with an RCU grace period.
+ *
+ * The RCU lock also protects the memcg from being freed when
+ * the page state that is going to change is the only thing
+ * preventing the page from being uncharged.
+ * E.g. end-writeback clearing PageWriteback(), which allows
+ * migration to go ahead and uncharge the page before the
+ * account transaction might be complete.
+ */
rcu_read_lock();
if (mem_cgroup_disabled())
@@ -2017,16 +2015,22 @@ again:
if (unlikely(!memcg))
return NULL;
- *locked = false;
if (atomic_read(&memcg->moving_account) <= 0)
return memcg;
- spin_lock_irqsave(&memcg->move_lock, *flags);
+ spin_lock_irqsave(&memcg->move_lock, flags);
if (memcg != page->mem_cgroup) {
- spin_unlock_irqrestore(&memcg->move_lock, *flags);
+ spin_unlock_irqrestore(&memcg->move_lock, flags);
goto again;
}
- *locked = true;
+
+ /*
+ * When charge migration first begins, we can have locked and
+ * unlocked page stat updates happening concurrently. Track
+ * the task who has the lock for mem_cgroup_end_page_stat().
+ */
+ memcg->move_lock_task = current;
+ memcg->move_lock_flags = flags;
return memcg;
}
@@ -2034,14 +2038,17 @@ again:
/**
* mem_cgroup_end_page_stat - finish a page state statistics transaction
* @memcg: the memcg that was accounted against
- * @locked: value received from mem_cgroup_begin_page_stat()
- * @flags: value received from mem_cgroup_begin_page_stat()
*/
-void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, bool *locked,
- unsigned long *flags)
+void mem_cgroup_end_page_stat(struct mem_cgroup *memcg)
{
- if (memcg && *locked)
- spin_unlock_irqrestore(&memcg->move_lock, *flags);
+ if (memcg && memcg->move_lock_task == current) {
+ unsigned long flags = memcg->move_lock_flags;
+
+ memcg->move_lock_task = NULL;
+ memcg->move_lock_flags = 0;
+
+ spin_unlock_irqrestore(&memcg->move_lock, flags);
+ }
rcu_read_unlock();
}
@@ -2476,27 +2483,8 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
}
#ifdef CONFIG_MEMCG_KMEM
-/*
- * The memcg_slab_mutex is held whenever a per memcg kmem cache is created or
- * destroyed. It protects memcg_caches arrays and memcg_slab_caches lists.
- */
-static DEFINE_MUTEX(memcg_slab_mutex);
-
-/*
- * This is a bit cumbersome, but it is rarely used and avoids a backpointer
- * in the memcg_cache_params struct.
- */
-static struct kmem_cache *memcg_params_to_cache(struct memcg_cache_params *p)
-{
- struct kmem_cache *cachep;
-
- VM_BUG_ON(p->is_root_cache);
- cachep = p->root_cache;
- return cache_from_memcg_idx(cachep, memcg_cache_id(p->memcg));
-}
-
-static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp,
- unsigned long nr_pages)
+int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp,
+ unsigned long nr_pages)
{
struct page_counter *counter;
int ret = 0;
@@ -2533,8 +2521,7 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp,
return ret;
}
-static void memcg_uncharge_kmem(struct mem_cgroup *memcg,
- unsigned long nr_pages)
+void memcg_uncharge_kmem(struct mem_cgroup *memcg, unsigned long nr_pages)
{
page_counter_uncharge(&memcg->memory, nr_pages);
if (do_swap_account)
@@ -2579,10 +2566,7 @@ static int memcg_alloc_cache_id(void)
else if (size > MEMCG_CACHES_MAX_SIZE)
size = MEMCG_CACHES_MAX_SIZE;
- mutex_lock(&memcg_slab_mutex);
err = memcg_update_all_caches(size);
- mutex_unlock(&memcg_slab_mutex);
-
if (err) {
ida_simple_remove(&kmem_limited_groups, id);
return err;
@@ -2605,123 +2589,20 @@ void memcg_update_array_size(int num)
memcg_limited_groups_array_size = num;
}
-static void memcg_register_cache(struct mem_cgroup *memcg,
- struct kmem_cache *root_cache)
-{
- static char memcg_name_buf[NAME_MAX + 1]; /* protected by
- memcg_slab_mutex */
- struct kmem_cache *cachep;
- int id;
-
- lockdep_assert_held(&memcg_slab_mutex);
-
- id = memcg_cache_id(memcg);
-
- /*
- * Since per-memcg caches are created asynchronously on first
- * allocation (see memcg_kmem_get_cache()), several threads can try to
- * create the same cache, but only one of them may succeed.
- */
- if (cache_from_memcg_idx(root_cache, id))
- return;
-
- cgroup_name(memcg->css.cgroup, memcg_name_buf, NAME_MAX + 1);
- cachep = memcg_create_kmem_cache(memcg, root_cache, memcg_name_buf);
- /*
- * If we could not create a memcg cache, do not complain, because
- * that's not critical at all as we can always proceed with the root
- * cache.
- */
- if (!cachep)
- return;
-
- list_add(&cachep->memcg_params->list, &memcg->memcg_slab_caches);
-
- /*
- * Since readers won't lock (see cache_from_memcg_idx()), we need a
- * barrier here to ensure nobody will see the kmem_cache partially
- * initialized.
- */
- smp_wmb();
-
- BUG_ON(root_cache->memcg_params->memcg_caches[id]);
- root_cache->memcg_params->memcg_caches[id] = cachep;
-}
-
-static void memcg_unregister_cache(struct kmem_cache *cachep)
-{
- struct kmem_cache *root_cache;
- struct mem_cgroup *memcg;
- int id;
-
- lockdep_assert_held(&memcg_slab_mutex);
-
- BUG_ON(is_root_cache(cachep));
-
- root_cache = cachep->memcg_params->root_cache;
- memcg = cachep->memcg_params->memcg;
- id = memcg_cache_id(memcg);
-
- BUG_ON(root_cache->memcg_params->memcg_caches[id] != cachep);
- root_cache->memcg_params->memcg_caches[id] = NULL;
-
- list_del(&cachep->memcg_params->list);
-
- kmem_cache_destroy(cachep);
-}
-
-int __memcg_cleanup_cache_params(struct kmem_cache *s)
-{
- struct kmem_cache *c;
- int i, failed = 0;
-
- mutex_lock(&memcg_slab_mutex);
- for_each_memcg_cache_index(i) {
- c = cache_from_memcg_idx(s, i);
- if (!c)
- continue;
-
- memcg_unregister_cache(c);
-
- if (cache_from_memcg_idx(s, i))
- failed++;
- }
- mutex_unlock(&memcg_slab_mutex);
- return failed;
-}
-
-static void memcg_unregister_all_caches(struct mem_cgroup *memcg)
-{
- struct kmem_cache *cachep;
- struct memcg_cache_params *params, *tmp;
-
- if (!memcg_kmem_is_active(memcg))
- return;
-
- mutex_lock(&memcg_slab_mutex);
- list_for_each_entry_safe(params, tmp, &memcg->memcg_slab_caches, list) {
- cachep = memcg_params_to_cache(params);
- memcg_unregister_cache(cachep);
- }
- mutex_unlock(&memcg_slab_mutex);
-}
-
-struct memcg_register_cache_work {
+struct memcg_kmem_cache_create_work {
struct mem_cgroup *memcg;
struct kmem_cache *cachep;
struct work_struct work;
};
-static void memcg_register_cache_func(struct work_struct *w)
+static void memcg_kmem_cache_create_func(struct work_struct *w)
{
- struct memcg_register_cache_work *cw =
- container_of(w, struct memcg_register_cache_work, work);
+ struct memcg_kmem_cache_create_work *cw =
+ container_of(w, struct memcg_kmem_cache_create_work, work);
struct mem_cgroup *memcg = cw->memcg;
struct kmem_cache *cachep = cw->cachep;
- mutex_lock(&memcg_slab_mutex);
- memcg_register_cache(memcg, cachep);
- mutex_unlock(&memcg_slab_mutex);
+ memcg_create_kmem_cache(memcg, cachep);
css_put(&memcg->css);
kfree(cw);
@@ -2730,10 +2611,10 @@ static void memcg_register_cache_func(struct work_struct *w)
/*
* Enqueue the creation of a per-memcg kmem_cache.
*/
-static void __memcg_schedule_register_cache(struct mem_cgroup *memcg,
- struct kmem_cache *cachep)
+static void __memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
+ struct kmem_cache *cachep)
{
- struct memcg_register_cache_work *cw;
+ struct memcg_kmem_cache_create_work *cw;
cw = kmalloc(sizeof(*cw), GFP_NOWAIT);
if (!cw)
@@ -2743,18 +2624,18 @@ static void __memcg_schedule_register_cache(struct mem_cgroup *memcg,
cw->memcg = memcg;
cw->cachep = cachep;
+ INIT_WORK(&cw->work, memcg_kmem_cache_create_func);
- INIT_WORK(&cw->work, memcg_register_cache_func);
schedule_work(&cw->work);
}
-static void memcg_schedule_register_cache(struct mem_cgroup *memcg,
- struct kmem_cache *cachep)
+static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
+ struct kmem_cache *cachep)
{
/*
* We need to stop accounting when we kmalloc, because if the
* corresponding kmalloc cache is not yet created, the first allocation
- * in __memcg_schedule_register_cache will recurse.
+ * in __memcg_schedule_kmem_cache_create will recurse.
*
* However, it is better to enclose the whole function. Depending on
* the debugging options enabled, INIT_WORK(), for instance, can
@@ -2763,24 +2644,10 @@ static void memcg_schedule_register_cache(struct mem_cgroup *memcg,
* the safest choice is to do it like this, wrapping the whole function.
*/
current->memcg_kmem_skip_account = 1;
- __memcg_schedule_register_cache(memcg, cachep);
+ __memcg_schedule_kmem_cache_create(memcg, cachep);
current->memcg_kmem_skip_account = 0;
}
-int __memcg_charge_slab(struct kmem_cache *cachep, gfp_t gfp, int order)
-{
- unsigned int nr_pages = 1 << order;
-
- return memcg_charge_kmem(cachep->memcg_params->memcg, gfp, nr_pages);
-}
-
-void __memcg_uncharge_slab(struct kmem_cache *cachep, int order)
-{
- unsigned int nr_pages = 1 << order;
-
- memcg_uncharge_kmem(cachep->memcg_params->memcg, nr_pages);
-}
-
/*
* Return the kmem_cache we're supposed to use for a slab allocation.
* We try to use the current memcg's version of the cache.
@@ -2825,7 +2692,7 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep)
* could happen with the slab_mutex held. So it's better to
* defer everything.
*/
- memcg_schedule_register_cache(memcg, cachep);
+ memcg_schedule_kmem_cache_create(memcg, cachep);
out:
css_put(&memcg->css);
return cachep;
@@ -3043,18 +2910,6 @@ static int mem_cgroup_move_swap_account(swp_entry_t entry,
if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) {
mem_cgroup_swap_statistics(from, false);
mem_cgroup_swap_statistics(to, true);
- /*
- * This function is only called from task migration context now.
- * It postpones page_counter and refcount handling till the end
- * of task migration(mem_cgroup_clear_mc()) for performance
- * improvement. But we cannot postpone css_get(to) because if
- * the process that has been moved to @to does swap-in, the
- * refcount of @to might be decreased to 0.
- *
- * We are in attach() phase, so the cgroup is guaranteed to be
- * alive, so we can just call css_get().
- */
- css_get(&to->css);
return 0;
}
return -EINVAL;
@@ -4166,7 +4021,7 @@ static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
static void memcg_destroy_kmem(struct mem_cgroup *memcg)
{
- memcg_unregister_all_caches(memcg);
+ memcg_destroy_kmem_caches(memcg);
mem_cgroup_sockets_destroy(memcg);
}
#else
@@ -4679,6 +4534,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
if (parent_css == NULL) {
root_mem_cgroup = memcg;
page_counter_init(&memcg->memory, NULL);
+ memcg->soft_limit = PAGE_COUNTER_MAX;
page_counter_init(&memcg->memsw, NULL);
page_counter_init(&memcg->kmem, NULL);
}
@@ -4693,7 +4549,6 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
spin_lock_init(&memcg->event_list_lock);
#ifdef CONFIG_MEMCG_KMEM
memcg->kmemcg_id = -1;
- INIT_LIST_HEAD(&memcg->memcg_slab_caches);
#endif
return &memcg->css;
@@ -4724,6 +4579,7 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
if (parent->use_hierarchy) {
page_counter_init(&memcg->memory, &parent->memory);
+ memcg->soft_limit = PAGE_COUNTER_MAX;
page_counter_init(&memcg->memsw, &parent->memsw);
page_counter_init(&memcg->kmem, &parent->kmem);
@@ -4733,6 +4589,7 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
*/
} else {
page_counter_init(&memcg->memory, NULL);
+ memcg->soft_limit = PAGE_COUNTER_MAX;
page_counter_init(&memcg->memsw, NULL);
page_counter_init(&memcg->kmem, NULL);
/*
@@ -4807,7 +4664,7 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
mem_cgroup_resize_limit(memcg, PAGE_COUNTER_MAX);
mem_cgroup_resize_memsw_limit(memcg, PAGE_COUNTER_MAX);
memcg_update_kmem_limit(memcg, PAGE_COUNTER_MAX);
- memcg->soft_limit = 0;
+ memcg->soft_limit = PAGE_COUNTER_MAX;
}
#ifdef CONFIG_MMU
@@ -4937,8 +4794,6 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
mapping = vma->vm_file->f_mapping;
if (pte_none(ptent))
pgoff = linear_page_index(vma, addr);
- else /* pte_file(ptent) is true */
- pgoff = pte_to_pgoff(ptent);
/* page is moved even if it's not RSS of this task(page-faulted). */
#ifdef CONFIG_SWAP
@@ -4970,7 +4825,7 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
page = mc_handle_present_pte(vma, addr, ptent);
else if (is_swap_pte(ptent))
page = mc_handle_swap_pte(vma, addr, ptent, &ent);
- else if (pte_none(ptent) || pte_file(ptent))
+ else if (pte_none(ptent))
page = mc_handle_file_pte(vma, addr, ptent, &ent);
if (!page && !ent.val)
diff --git a/mm/memory.c b/mm/memory.c
index d7e497e98f46..5afb6d89ac96 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -811,42 +811,40 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
/* pte contains position in swap or file, so copy. */
if (unlikely(!pte_present(pte))) {
- if (!pte_file(pte)) {
- swp_entry_t entry = pte_to_swp_entry(pte);
-
- if (likely(!non_swap_entry(entry))) {
- if (swap_duplicate(entry) < 0)
- return entry.val;
-
- /* make sure dst_mm is on swapoff's mmlist. */
- if (unlikely(list_empty(&dst_mm->mmlist))) {
- spin_lock(&mmlist_lock);
- if (list_empty(&dst_mm->mmlist))
- list_add(&dst_mm->mmlist,
- &src_mm->mmlist);
- spin_unlock(&mmlist_lock);
- }
- rss[MM_SWAPENTS]++;
- } else if (is_migration_entry(entry)) {
- page = migration_entry_to_page(entry);
-
- if (PageAnon(page))
- rss[MM_ANONPAGES]++;
- else
- rss[MM_FILEPAGES]++;
-
- if (is_write_migration_entry(entry) &&
- is_cow_mapping(vm_flags)) {
- /*
- * COW mappings require pages in both
- * parent and child to be set to read.
- */
- make_migration_entry_read(&entry);
- pte = swp_entry_to_pte(entry);
- if (pte_swp_soft_dirty(*src_pte))
- pte = pte_swp_mksoft_dirty(pte);
- set_pte_at(src_mm, addr, src_pte, pte);
- }
+ swp_entry_t entry = pte_to_swp_entry(pte);
+
+ if (likely(!non_swap_entry(entry))) {
+ if (swap_duplicate(entry) < 0)
+ return entry.val;
+
+ /* make sure dst_mm is on swapoff's mmlist. */
+ if (unlikely(list_empty(&dst_mm->mmlist))) {
+ spin_lock(&mmlist_lock);
+ if (list_empty(&dst_mm->mmlist))
+ list_add(&dst_mm->mmlist,
+ &src_mm->mmlist);
+ spin_unlock(&mmlist_lock);
+ }
+ rss[MM_SWAPENTS]++;
+ } else if (is_migration_entry(entry)) {
+ page = migration_entry_to_page(entry);
+
+ if (PageAnon(page))
+ rss[MM_ANONPAGES]++;
+ else
+ rss[MM_FILEPAGES]++;
+
+ if (is_write_migration_entry(entry) &&
+ is_cow_mapping(vm_flags)) {
+ /*
+ * COW mappings require pages in both
+ * parent and child to be set to read.
+ */
+ make_migration_entry_read(&entry);
+ pte = swp_entry_to_pte(entry);
+ if (pte_swp_soft_dirty(*src_pte))
+ pte = pte_swp_mksoft_dirty(pte);
+ set_pte_at(src_mm, addr, src_pte, pte);
}
}
goto out_set_pte;
@@ -1020,11 +1018,9 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
* readonly mappings. The tradeoff is that copy_page_range is more
* efficient than faulting.
*/
- if (!(vma->vm_flags & (VM_HUGETLB | VM_NONLINEAR |
- VM_PFNMAP | VM_MIXEDMAP))) {
- if (!vma->anon_vma)
- return 0;
- }
+ if (!(vma->vm_flags & (VM_HUGETLB | VM_PFNMAP | VM_MIXEDMAP)) &&
+ !vma->anon_vma)
+ return 0;
if (is_vm_hugetlb_page(vma))
return copy_hugetlb_page_range(dst_mm, src_mm, vma);
@@ -1082,6 +1078,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
spinlock_t *ptl;
pte_t *start_pte;
pte_t *pte;
+ swp_entry_t entry;
again:
init_rss_vec(rss);
@@ -1107,28 +1104,12 @@ again:
if (details->check_mapping &&
details->check_mapping != page->mapping)
continue;
- /*
- * Each page->index must be checked when
- * invalidating or truncating nonlinear.
- */
- if (details->nonlinear_vma &&
- (page->index < details->first_index ||
- page->index > details->last_index))
- continue;
}
ptent = ptep_get_and_clear_full(mm, addr, pte,
tlb->fullmm);
tlb_remove_tlb_entry(tlb, pte, addr);
if (unlikely(!page))
continue;
- if (unlikely(details) && details->nonlinear_vma
- && linear_page_index(details->nonlinear_vma,
- addr) != page->index) {
- pte_t ptfile = pgoff_to_pte(page->index);
- if (pte_soft_dirty(ptent))
- ptfile = pte_file_mksoft_dirty(ptfile);
- set_pte_at(mm, addr, pte, ptfile);
- }
if (PageAnon(page))
rss[MM_ANONPAGES]--;
else {
@@ -1151,33 +1132,25 @@ again:
}
continue;
}
- /*
- * If details->check_mapping, we leave swap entries;
- * if details->nonlinear_vma, we leave file entries.
- */
+ /* If details->check_mapping, we leave swap entries. */
if (unlikely(details))
continue;
- if (pte_file(ptent)) {
- if (unlikely(!(vma->vm_flags & VM_NONLINEAR)))
- print_bad_pte(vma, addr, ptent, NULL);
- } else {
- swp_entry_t entry = pte_to_swp_entry(ptent);
- if (!non_swap_entry(entry))
- rss[MM_SWAPENTS]--;
- else if (is_migration_entry(entry)) {
- struct page *page;
+ entry = pte_to_swp_entry(ptent);
+ if (!non_swap_entry(entry))
+ rss[MM_SWAPENTS]--;
+ else if (is_migration_entry(entry)) {
+ struct page *page;
- page = migration_entry_to_page(entry);
+ page = migration_entry_to_page(entry);
- if (PageAnon(page))
- rss[MM_ANONPAGES]--;
- else
- rss[MM_FILEPAGES]--;
- }
- if (unlikely(!free_swap_and_cache(entry)))
- print_bad_pte(vma, addr, ptent, NULL);
+ if (PageAnon(page))
+ rss[MM_ANONPAGES]--;
+ else
+ rss[MM_FILEPAGES]--;
}
+ if (unlikely(!free_swap_and_cache(entry)))
+ print_bad_pte(vma, addr, ptent, NULL);
pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
} while (pte++, addr += PAGE_SIZE, addr != end);
@@ -1277,7 +1250,7 @@ static void unmap_page_range(struct mmu_gather *tlb,
pgd_t *pgd;
unsigned long next;
- if (details && !details->check_mapping && !details->nonlinear_vma)
+ if (details && !details->check_mapping)
details = NULL;
BUG_ON(addr >= end);
@@ -1371,7 +1344,7 @@ void unmap_vmas(struct mmu_gather *tlb,
* @vma: vm_area_struct holding the applicable pages
* @start: starting address of pages to zap
* @size: number of bytes to zap
- * @details: details of nonlinear truncation or shared cache invalidation
+ * @details: details of shared cache invalidation
*
* Caller must protect the VMA list
*/
@@ -1397,7 +1370,7 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start,
* @vma: vm_area_struct holding the applicable pages
* @address: starting address of pages to zap
* @size: number of bytes to zap
- * @details: details of nonlinear truncation or shared cache invalidation
+ * @details: details of shared cache invalidation
*
* The range must fit into one VMA.
*/
@@ -1922,12 +1895,11 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
EXPORT_SYMBOL_GPL(apply_to_page_range);
/*
- * handle_pte_fault chooses page fault handler according to an entry
- * which was read non-atomically. Before making any commitment, on
- * those architectures or configurations (e.g. i386 with PAE) which
- * might give a mix of unmatched parts, do_swap_page and do_nonlinear_fault
- * must check under lock before unmapping the pte and proceeding
- * (but do_wp_page is only called after already making such a check;
+ * handle_pte_fault chooses page fault handler according to an entry which was
+ * read non-atomically. Before making any commitment, on those architectures
+ * or configurations (e.g. i386 with PAE) which might give a mix of unmatched
+ * parts, do_swap_page must check under lock before unmapping the pte and
+ * proceeding (but do_wp_page is only called after already making such a check;
* and do_anonymous_page can safely check later on).
*/
static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
@@ -2033,7 +2005,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
pte_t entry;
int ret = 0;
int page_mkwrite = 0;
- struct page *dirty_page = NULL;
+ bool dirty_shared = false;
unsigned long mmun_start = 0; /* For mmu_notifiers */
unsigned long mmun_end = 0; /* For mmu_notifiers */
struct mem_cgroup *memcg;
@@ -2084,6 +2056,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
unlock_page(old_page);
} else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
(VM_WRITE|VM_SHARED))) {
+ page_cache_get(old_page);
/*
* Only catch write-faults on shared writable pages,
* read-only shared pages can get COWed by
@@ -2091,7 +2064,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
*/
if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
int tmp;
- page_cache_get(old_page);
+
pte_unmap_unlock(page_table, ptl);
tmp = do_page_mkwrite(vma, old_page, address);
if (unlikely(!tmp || (tmp &
@@ -2111,11 +2084,10 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
unlock_page(old_page);
goto unlock;
}
-
page_mkwrite = 1;
}
- dirty_page = old_page;
- get_page(dirty_page);
+
+ dirty_shared = true;
reuse:
/*
@@ -2134,38 +2106,29 @@ reuse:
pte_unmap_unlock(page_table, ptl);
ret |= VM_FAULT_WRITE;
- if (!dirty_page)
- return ret;
+ if (dirty_shared) {
+ struct address_space *mapping;
+ int dirtied;
- /*
- * Yes, Virginia, this is actually required to prevent a race
- * with clear_page_dirty_for_io() from clearing the page dirty
- * bit after it clear all dirty ptes, but before a racing
- * do_wp_page installs a dirty pte.
- *
- * do_shared_fault is protected similarly.
- */
- if (!page_mkwrite) {
- wait_on_page_locked(dirty_page);
- set_page_dirty_balance(dirty_page);
- /* file_update_time outside page_lock */
- if (vma->vm_file)
- file_update_time(vma->vm_file);
- }
- put_page(dirty_page);
- if (page_mkwrite) {
- struct address_space *mapping = dirty_page->mapping;
-
- set_page_dirty(dirty_page);
- unlock_page(dirty_page);
- page_cache_release(dirty_page);
- if (mapping) {
+ if (!page_mkwrite)
+ lock_page(old_page);
+
+ dirtied = set_page_dirty(old_page);
+ VM_BUG_ON_PAGE(PageAnon(old_page), old_page);
+ mapping = old_page->mapping;
+ unlock_page(old_page);
+ page_cache_release(old_page);
+
+ if ((dirtied || page_mkwrite) && mapping) {
/*
* Some device drivers do not set page.mapping
* but still dirty their pages
*/
balance_dirty_pages_ratelimited(mapping);
}
+
+ if (!page_mkwrite)
+ file_update_time(vma->vm_file);
}
return ret;
@@ -2324,25 +2287,11 @@ static inline void unmap_mapping_range_tree(struct rb_root *root,
}
}
-static inline void unmap_mapping_range_list(struct list_head *head,
- struct zap_details *details)
-{
- struct vm_area_struct *vma;
-
- /*
- * In nonlinear VMAs there is no correspondence between virtual address
- * offset and file offset. So we must perform an exhaustive search
- * across *all* the pages in each nonlinear VMA, not just the pages
- * whose virtual address lies outside the file truncation point.
- */
- list_for_each_entry(vma, head, shared.nonlinear) {
- details->nonlinear_vma = vma;
- unmap_mapping_range_vma(vma, vma->vm_start, vma->vm_end, details);
- }
-}
-
/**
- * unmap_mapping_range - unmap the portion of all mmaps in the specified address_space corresponding to the specified page range in the underlying file.
+ * unmap_mapping_range - unmap the portion of all mmaps in the specified
+ * address_space corresponding to the specified page range in the underlying
+ * file.
+ *
* @mapping: the address space containing mmaps to be unmapped.
* @holebegin: byte in first page to unmap, relative to the start of
* the underlying file. This will be rounded down to a PAGE_SIZE
@@ -2371,7 +2320,6 @@ void unmap_mapping_range(struct address_space *mapping,
}
details.check_mapping = even_cows? NULL: mapping;
- details.nonlinear_vma = NULL;
details.first_index = hba;
details.last_index = hba + hlen - 1;
if (details.last_index < details.first_index)
@@ -2381,8 +2329,6 @@ void unmap_mapping_range(struct address_space *mapping,
i_mmap_lock_write(mapping);
if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap)))
unmap_mapping_range_tree(&mapping->i_mmap, &details);
- if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
- unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details);
i_mmap_unlock_write(mapping);
}
EXPORT_SYMBOL(unmap_mapping_range);
@@ -2743,8 +2689,6 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
entry = mk_pte(page, vma->vm_page_prot);
if (write)
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
- else if (pte_file(*pte) && pte_file_soft_dirty(*pte))
- entry = pte_mksoft_dirty(entry);
if (anon) {
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, vma, address);
@@ -2879,8 +2823,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
* if page by the offset is not ready to be mapped (cold cache or
* something).
*/
- if (vma->vm_ops->map_pages && !(flags & FAULT_FLAG_NONLINEAR) &&
- fault_around_bytes >> PAGE_SHIFT > 1) {
+ if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) {
pte = pte_offset_map_lock(mm, pmd, address, &ptl);
do_fault_around(vma, address, pte, pgoff, flags);
if (!pte_same(*pte, orig_pte))
@@ -2966,6 +2909,8 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
int dirtied = 0;
int ret, tmp;
+ WARN_ON_ONCE(!rwsem_is_locked(&mm->mmap_sem));
+
ret = __do_fault(vma, address, pgoff, flags, &fault_page);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
return ret;
@@ -3012,8 +2957,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
balance_dirty_pages_ratelimited(mapping);
}
- /* file_update_time outside page_lock */
- if (vma->vm_file && !vma->vm_ops->page_mkwrite)
+ if (!vma->vm_ops->page_mkwrite)
file_update_time(vma->vm_file);
return ret;
@@ -3025,7 +2969,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
* The mmap_sem may have been released depending on flags and our
* return value. See filemap_fault() and __lock_page_or_retry().
*/
-static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+static int do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd,
unsigned int flags, pte_t orig_pte)
{
@@ -3042,46 +2986,6 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
return do_shared_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
}
-/*
- * Fault of a previously existing named mapping. Repopulate the pte
- * from the encoded file_pte if possible. This enables swappable
- * nonlinear vmas.
- *
- * We enter with non-exclusive mmap_sem (to exclude vma changes,
- * but allow concurrent faults), and pte mapped but not yet locked.
- * We return with pte unmapped and unlocked.
- * The mmap_sem may have been released depending on flags and our
- * return value. See filemap_fault() and __lock_page_or_retry().
- */
-static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long address, pte_t *page_table, pmd_t *pmd,
- unsigned int flags, pte_t orig_pte)
-{
- pgoff_t pgoff;
-
- flags |= FAULT_FLAG_NONLINEAR;
-
- if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
- return 0;
-
- if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) {
- /*
- * Page table corrupted: show pte and kill process.
- */
- print_bad_pte(vma, address, orig_pte, NULL);
- return VM_FAULT_SIGBUS;
- }
-
- pgoff = pte_to_pgoff(orig_pte);
- if (!(flags & FAULT_FLAG_WRITE))
- return do_read_fault(mm, vma, address, pmd, pgoff, flags,
- orig_pte);
- if (!(vma->vm_flags & VM_SHARED))
- return do_cow_fault(mm, vma, address, pmd, pgoff, flags,
- orig_pte);
- return do_shared_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
-}
-
static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
unsigned long addr, int page_nid,
int *flags)
@@ -3209,15 +3113,12 @@ static int handle_pte_fault(struct mm_struct *mm,
if (pte_none(entry)) {
if (vma->vm_ops) {
if (likely(vma->vm_ops->fault))
- return do_linear_fault(mm, vma, address,
- pte, pmd, flags, entry);
+ return do_fault(mm, vma, address, pte,
+ pmd, flags, entry);
}
return do_anonymous_page(mm, vma, address,
pte, pmd, flags);
}
- if (pte_file(entry))
- return do_nonlinear_fault(mm, vma, address,
- pte, pmd, flags, entry);
return do_swap_page(mm, vma, address,
pte, pmd, flags, entry);
}
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 9fab10795bea..b82b61e94bb8 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1331,7 +1331,7 @@ int is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages)
}
/*
- * Confirm all pages in a range [start, end) is belongs to the same zone.
+ * Confirm all pages in a range [start, end) belong to the same zone.
*/
int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn)
{
@@ -1342,10 +1342,11 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn)
for (pfn = start_pfn;
pfn < end_pfn;
pfn += MAX_ORDER_NR_PAGES) {
- i = 0;
- /* This is just a CONFIG_HOLES_IN_ZONE check.*/
- while ((i < MAX_ORDER_NR_PAGES) && !pfn_valid_within(pfn + i))
- i++;
+ /* Find the first valid pfn in this pageblock */
+ for (i = 0; i < MAX_ORDER_NR_PAGES; i++) {
+ if (pfn_valid(pfn + i))
+ break;
+ }
if (i == MAX_ORDER_NR_PAGES)
continue;
page = pfn_to_page(pfn + i);
diff --git a/mm/migrate.c b/mm/migrate.c
index 344cdf692fc8..6e284bcca8bb 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -179,37 +179,6 @@ out:
}
/*
- * Congratulations to trinity for discovering this bug.
- * mm/fremap.c's remap_file_pages() accepts any range within a single vma to
- * convert that vma to VM_NONLINEAR; and generic_file_remap_pages() will then
- * replace the specified range by file ptes throughout (maybe populated after).
- * If page migration finds a page within that range, while it's still located
- * by vma_interval_tree rather than lost to i_mmap_nonlinear list, no problem:
- * zap_pte() clears the temporary migration entry before mmap_sem is dropped.
- * But if the migrating page is in a part of the vma outside the range to be
- * remapped, then it will not be cleared, and remove_migration_ptes() needs to
- * deal with it. Fortunately, this part of the vma is of course still linear,
- * so we just need to use linear location on the nonlinear list.
- */
-static int remove_linear_migration_ptes_from_nonlinear(struct page *page,
- struct address_space *mapping, void *arg)
-{
- struct vm_area_struct *vma;
- /* hugetlbfs does not support remap_pages, so no huge pgoff worries */
- pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
- unsigned long addr;
-
- list_for_each_entry(vma,
- &mapping->i_mmap_nonlinear, shared.nonlinear) {
-
- addr = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
- if (addr >= vma->vm_start && addr < vma->vm_end)
- remove_migration_pte(page, vma, addr, arg);
- }
- return SWAP_AGAIN;
-}
-
-/*
* Get rid of all migration entries and replace them by
* references to the indicated page.
*/
@@ -218,7 +187,6 @@ static void remove_migration_ptes(struct page *old, struct page *new)
struct rmap_walk_control rwc = {
.rmap_one = remove_migration_pte,
.arg = old,
- .file_nonlinear = remove_linear_migration_ptes_from_nonlinear,
};
rmap_walk(new, &rwc);
diff --git a/mm/mincore.c b/mm/mincore.c
index c8c528b36641..46527c023e0c 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -124,17 +124,13 @@ static void mincore_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
ptep = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
do {
pte_t pte = *ptep;
- pgoff_t pgoff;
next = addr + PAGE_SIZE;
if (pte_none(pte))
mincore_unmapped_range(vma, addr, next, vec);
else if (pte_present(pte))
*vec = 1;
- else if (pte_file(pte)) {
- pgoff = pte_to_pgoff(pte);
- *vec = mincore_page(vma->vm_file->f_mapping, pgoff);
- } else { /* pte is a swap entry */
+ else { /* pte is a swap entry */
swp_entry_t entry = pte_to_swp_entry(pte);
if (non_swap_entry(entry)) {
@@ -145,9 +141,8 @@ static void mincore_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
*vec = 1;
} else {
#ifdef CONFIG_SWAP
- pgoff = entry.val;
*vec = mincore_page(swap_address_space(entry),
- pgoff);
+ entry.val);
#else
WARN_ON(1);
*vec = 1;
diff --git a/mm/mmap.c b/mm/mmap.c
index 7b36aa7cc89a..e58a3b453a2a 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -243,10 +243,7 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma,
mapping_unmap_writable(mapping);
flush_dcache_mmap_lock(mapping);
- if (unlikely(vma->vm_flags & VM_NONLINEAR))
- list_del_init(&vma->shared.nonlinear);
- else
- vma_interval_tree_remove(vma, &mapping->i_mmap);
+ vma_interval_tree_remove(vma, &mapping->i_mmap);
flush_dcache_mmap_unlock(mapping);
}
@@ -649,10 +646,7 @@ static void __vma_link_file(struct vm_area_struct *vma)
atomic_inc(&mapping->i_mmap_writable);
flush_dcache_mmap_lock(mapping);
- if (unlikely(vma->vm_flags & VM_NONLINEAR))
- vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
- else
- vma_interval_tree_insert(vma, &mapping->i_mmap);
+ vma_interval_tree_insert(vma, &mapping->i_mmap);
flush_dcache_mmap_unlock(mapping);
}
}
@@ -787,14 +781,11 @@ again: remove_next = 1 + (end > next->vm_end);
if (file) {
mapping = file->f_mapping;
- if (!(vma->vm_flags & VM_NONLINEAR)) {
- root = &mapping->i_mmap;
- uprobe_munmap(vma, vma->vm_start, vma->vm_end);
+ root = &mapping->i_mmap;
+ uprobe_munmap(vma, vma->vm_start, vma->vm_end);
- if (adjust_next)
- uprobe_munmap(next, next->vm_start,
- next->vm_end);
- }
+ if (adjust_next)
+ uprobe_munmap(next, next->vm_start, next->vm_end);
i_mmap_lock_write(mapping);
if (insert) {
@@ -2629,6 +2620,75 @@ SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
return vm_munmap(addr, len);
}
+
+/*
+ * Emulation of deprecated remap_file_pages() syscall.
+ */
+SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
+ unsigned long, prot, unsigned long, pgoff, unsigned long, flags)
+{
+
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ unsigned long populate = 0;
+ unsigned long ret = -EINVAL;
+ struct file *file;
+
+ pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. "
+ "See Documentation/vm/remap_file_pages.txt.\n",
+ current->comm, current->pid);
+
+ if (prot)
+ return ret;
+ start = start & PAGE_MASK;
+ size = size & PAGE_MASK;
+
+ if (start + size <= start)
+ return ret;
+
+ /* Does pgoff wrap? */
+ if (pgoff + (size >> PAGE_SHIFT) < pgoff)
+ return ret;
+
+ down_write(&mm->mmap_sem);
+ vma = find_vma(mm, start);
+
+ if (!vma || !(vma->vm_flags & VM_SHARED))
+ goto out;
+
+ if (start < vma->vm_start || start + size > vma->vm_end)
+ goto out;
+
+ if (pgoff == linear_page_index(vma, start)) {
+ ret = 0;
+ goto out;
+ }
+
+ prot |= vma->vm_flags & VM_READ ? PROT_READ : 0;
+ prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0;
+ prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0;
+
+ flags &= MAP_NONBLOCK;
+ flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE;
+ if (vma->vm_flags & VM_LOCKED) {
+ flags |= MAP_LOCKED;
+ /* drop PG_Mlocked flag for over-mapped range */
+ munlock_vma_pages_range(vma, start, start + size);
+ }
+
+ file = get_file(vma->vm_file);
+ ret = do_mmap_pgoff(vma->vm_file, start, size,
+ prot, flags, pgoff, &populate);
+ fput(file);
+out:
+ up_write(&mm->mmap_sem);
+ if (populate)
+ mm_populate(ret, populate);
+ if (!IS_ERR_VALUE(ret))
+ ret = 0;
+ return ret;
+}
+
static inline void verify_mm_writelocked(struct mm_struct *mm)
{
#ifdef CONFIG_DEBUG_VM
@@ -3103,8 +3163,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
*
* mmap_sem in write mode is required in order to block all operations
* that could modify pagetables and free pages without need of
- * altering the vma layout (for example populate_range() with
- * nonlinear vmas). It's also needed in write mode to avoid new
+ * altering the vma layout. It's also needed in write mode to avoid new
* anon_vmas to be associated with existing vmas.
*
* A single task can't take more than one mm_take_all_locks() in a row
diff --git a/mm/mprotect.c b/mm/mprotect.c
index ace93454ce8e..33121662f08b 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -105,7 +105,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
}
if (updated)
pages++;
- } else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) {
+ } else if (IS_ENABLED(CONFIG_MIGRATION)) {
swp_entry_t entry = pte_to_swp_entry(oldpte);
if (is_write_migration_entry(entry)) {
diff --git a/mm/mremap.c b/mm/mremap.c
index 17fa018f5f39..57dadc025c64 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -81,8 +81,6 @@ static pte_t move_soft_dirty_pte(pte_t pte)
pte = pte_mksoft_dirty(pte);
else if (is_swap_pte(pte))
pte = pte_swp_mksoft_dirty(pte);
- else if (pte_file(pte))
- pte = pte_file_mksoft_dirty(pte);
#endif
return pte;
}
diff --git a/mm/msync.c b/mm/msync.c
index 992a1673d488..bb04d53ae852 100644
--- a/mm/msync.c
+++ b/mm/msync.c
@@ -86,10 +86,7 @@ SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, int, flags)
(vma->vm_flags & VM_SHARED)) {
get_file(file);
up_read(&mm->mmap_sem);
- if (vma->vm_flags & VM_NONLINEAR)
- error = vfs_fsync(file, 1);
- else
- error = vfs_fsync_range(file, fstart, fend, 1);
+ error = vfs_fsync_range(file, fstart, fend, 1);
fput(file);
if (error || start >= end)
goto out;
diff --git a/mm/nommu.c b/mm/nommu.c
index b51eadf6d952..099cc72aa39e 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1983,14 +1983,6 @@ void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf)
}
EXPORT_SYMBOL(filemap_map_pages);
-int generic_file_remap_pages(struct vm_area_struct *vma, unsigned long addr,
- unsigned long size, pgoff_t pgoff)
-{
- BUG();
- return 0;
-}
-EXPORT_SYMBOL(generic_file_remap_pages);
-
static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
unsigned long addr, void *buf, int len, int write)
{
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index d503e9ce1c7b..294493a7ae4b 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -438,11 +438,14 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
* If the task is already exiting, don't alarm the sysadmin or kill
* its children or threads, just set TIF_MEMDIE so it can die quickly
*/
- if (task_will_free_mem(p)) {
+ task_lock(p);
+ if (p->mm && task_will_free_mem(p)) {
set_tsk_thread_flag(p, TIF_MEMDIE);
+ task_unlock(p);
put_task_struct(p);
return;
}
+ task_unlock(p);
if (__ratelimit(&oom_rs))
dump_header(p, gfp_mask, order, memcg, nodemask);
@@ -492,6 +495,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
/* mm cannot safely be dereferenced after task_unlock(victim) */
mm = victim->mm;
+ set_tsk_thread_flag(victim, TIF_MEMDIE);
pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB\n",
task_pid_nr(victim), victim->comm, K(victim->mm->total_vm),
K(get_mm_counter(victim->mm, MM_ANONPAGES)),
@@ -522,7 +526,6 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
}
rcu_read_unlock();
- set_tsk_thread_flag(victim, TIF_MEMDIE);
do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, true);
put_task_struct(victim);
}
@@ -643,8 +646,12 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
* If current has a pending SIGKILL or is exiting, then automatically
* select it. The goal is to allow it to allocate so that it may
* quickly exit and free its memory.
+ *
+ * But don't select if current has already released its mm and cleared
+ * TIF_MEMDIE flag at exit_mm(), otherwise an OOM livelock may occur.
*/
- if (fatal_signal_pending(current) || task_will_free_mem(current)) {
+ if (current->mm &&
+ (fatal_signal_pending(current) || task_will_free_mem(current))) {
set_thread_flag(TIF_MEMDIE);
return;
}
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index d5d81f5384d1..fb71e9deca85 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1541,16 +1541,6 @@ pause:
bdi_start_background_writeback(bdi);
}
-void set_page_dirty_balance(struct page *page)
-{
- if (set_page_dirty(page)) {
- struct address_space *mapping = page_mapping(page);
-
- if (mapping)
- balance_dirty_pages_ratelimited(mapping);
- }
-}
-
static DEFINE_PER_CPU(int, bdp_ratelimits);
/*
@@ -2123,32 +2113,25 @@ EXPORT_SYMBOL(account_page_dirtied);
* page dirty in that case, but not all the buffers. This is a "bottom-up"
* dirtying, whereas __set_page_dirty_buffers() is a "top-down" dirtying.
*
- * Most callers have locked the page, which pins the address_space in memory.
- * But zap_pte_range() does not lock the page, however in that case the
- * mapping is pinned by the vma's ->vm_file reference.
- *
- * We take care to handle the case where the page was truncated from the
- * mapping by re-checking page_mapping() inside tree_lock.
+ * The caller must ensure this doesn't race with truncation. Most will simply
+ * hold the page lock, but e.g. zap_pte_range() calls with the page mapped and
+ * the pte lock held, which also locks out truncation.
*/
int __set_page_dirty_nobuffers(struct page *page)
{
if (!TestSetPageDirty(page)) {
struct address_space *mapping = page_mapping(page);
- struct address_space *mapping2;
unsigned long flags;
if (!mapping)
return 1;
spin_lock_irqsave(&mapping->tree_lock, flags);
- mapping2 = page_mapping(page);
- if (mapping2) { /* Race with truncate? */
- BUG_ON(mapping2 != mapping);
- WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
- account_page_dirtied(page, mapping);
- radix_tree_tag_set(&mapping->page_tree,
- page_index(page), PAGECACHE_TAG_DIRTY);
- }
+ BUG_ON(page_mapping(page) != mapping);
+ WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
+ account_page_dirtied(page, mapping);
+ radix_tree_tag_set(&mapping->page_tree, page_index(page),
+ PAGECACHE_TAG_DIRTY);
spin_unlock_irqrestore(&mapping->tree_lock, flags);
if (mapping->host) {
/* !PageAnon && !swapper_space */
@@ -2305,12 +2288,10 @@ int clear_page_dirty_for_io(struct page *page)
/*
* We carefully synchronise fault handlers against
* installing a dirty pte and marking the page dirty
- * at this point. We do this by having them hold the
- * page lock at some point after installing their
- * pte, but before marking the page dirty.
- * Pages are always locked coming in here, so we get
- * the desired exclusion. See mm/memory.c:do_wp_page()
- * for more comments.
+ * at this point. We do this by having them hold the
+ * page lock while dirtying the page, and pages are
+ * always locked coming in here, so we get the desired
+ * exclusion.
*/
if (TestClearPageDirty(page)) {
dec_zone_page_state(page, NR_FILE_DIRTY);
@@ -2327,12 +2308,10 @@ EXPORT_SYMBOL(clear_page_dirty_for_io);
int test_clear_page_writeback(struct page *page)
{
struct address_space *mapping = page_mapping(page);
- unsigned long memcg_flags;
struct mem_cgroup *memcg;
- bool locked;
int ret;
- memcg = mem_cgroup_begin_page_stat(page, &locked, &memcg_flags);
+ memcg = mem_cgroup_begin_page_stat(page);
if (mapping) {
struct backing_dev_info *bdi = mapping->backing_dev_info;
unsigned long flags;
@@ -2357,19 +2336,17 @@ int test_clear_page_writeback(struct page *page)
dec_zone_page_state(page, NR_WRITEBACK);
inc_zone_page_state(page, NR_WRITTEN);
}
- mem_cgroup_end_page_stat(memcg, &locked, &memcg_flags);
+ mem_cgroup_end_page_stat(memcg);
return ret;
}
int __test_set_page_writeback(struct page *page, bool keep_write)
{
struct address_space *mapping = page_mapping(page);
- unsigned long memcg_flags;
struct mem_cgroup *memcg;
- bool locked;
int ret;
- memcg = mem_cgroup_begin_page_stat(page, &locked, &memcg_flags);
+ memcg = mem_cgroup_begin_page_stat(page);
if (mapping) {
struct backing_dev_info *bdi = mapping->backing_dev_info;
unsigned long flags;
@@ -2399,7 +2376,7 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_WRITEBACK);
inc_zone_page_state(page, NR_WRITEBACK);
}
- mem_cgroup_end_page_stat(memcg, &locked, &memcg_flags);
+ mem_cgroup_end_page_stat(memcg);
return ret;
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7633c503a116..5ed7f93d0152 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -381,36 +381,6 @@ void prep_compound_page(struct page *page, unsigned long order)
}
}
-/* update __split_huge_page_refcount if you change this function */
-static int destroy_compound_page(struct page *page, unsigned long order)
-{
- int i;
- int nr_pages = 1 << order;
- int bad = 0;
-
- if (unlikely(compound_order(page) != order)) {
- bad_page(page, "wrong compound order", 0);
- bad++;
- }
-
- __ClearPageHead(page);
-
- for (i = 1; i < nr_pages; i++) {
- struct page *p = page + i;
-
- if (unlikely(!PageTail(p))) {
- bad_page(page, "PageTail not set", 0);
- bad++;
- } else if (unlikely(p->first_page != page)) {
- bad_page(page, "first_page not consistent", 0);
- bad++;
- }
- __ClearPageTail(p);
- }
-
- return bad;
-}
-
static inline void prep_zero_page(struct page *page, unsigned int order,
gfp_t gfp_flags)
{
@@ -552,17 +522,15 @@ static inline int page_is_buddy(struct page *page, struct page *buddy,
return 0;
if (page_is_guard(buddy) && page_order(buddy) == order) {
- VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy);
-
if (page_zone_id(page) != page_zone_id(buddy))
return 0;
+ VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy);
+
return 1;
}
if (PageBuddy(buddy) && page_order(buddy) == order) {
- VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy);
-
/*
* zone check is done late to avoid uselessly
* calculating zone/node ids for pages that could
@@ -571,6 +539,8 @@ static inline int page_is_buddy(struct page *page, struct page *buddy,
if (page_zone_id(page) != page_zone_id(buddy))
return 0;
+ VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy);
+
return 1;
}
return 0;
@@ -613,10 +583,7 @@ static inline void __free_one_page(struct page *page,
int max_order = MAX_ORDER;
VM_BUG_ON(!zone_is_initialized(zone));
-
- if (unlikely(PageCompound(page)))
- if (unlikely(destroy_compound_page(page, order)))
- return;
+ VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page);
VM_BUG_ON(migratetype == -1);
if (is_migrate_isolate(migratetype)) {
@@ -2332,12 +2299,21 @@ static inline struct page *
__alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
struct zonelist *zonelist, enum zone_type high_zoneidx,
nodemask_t *nodemask, struct zone *preferred_zone,
- int classzone_idx, int migratetype)
+ int classzone_idx, int migratetype, unsigned long *did_some_progress)
{
struct page *page;
- /* Acquire the per-zone oom lock for each zone */
+ *did_some_progress = 0;
+
+ if (oom_killer_disabled)
+ return NULL;
+
+ /*
+ * Acquire the per-zone oom lock for each zone. If that
+ * fails, somebody else is making progress for us.
+ */
if (!oom_zonelist_trylock(zonelist, gfp_mask)) {
+ *did_some_progress = 1;
schedule_timeout_uninterruptible(1);
return NULL;
}
@@ -2363,12 +2339,18 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
goto out;
if (!(gfp_mask & __GFP_NOFAIL)) {
+ /* Coredumps can quickly deplete all memory reserves */
+ if (current->flags & PF_DUMPCORE)
+ goto out;
/* The OOM killer will not help higher order allocs */
if (order > PAGE_ALLOC_COSTLY_ORDER)
goto out;
/* The OOM killer does not needlessly kill tasks for lowmem */
if (high_zoneidx < ZONE_NORMAL)
goto out;
+ /* The OOM killer does not compensate for light reclaim */
+ if (!(gfp_mask & __GFP_FS))
+ goto out;
/*
* GFP_THISNODE contains __GFP_NORETRY and we never hit this.
* Sanity check for bare calls of __GFP_THISNODE, not real OOM.
@@ -2381,7 +2363,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
}
/* Exhausted what can be done so it's blamo time */
out_of_memory(zonelist, gfp_mask, order, nodemask, false);
-
+ *did_some_progress = 1;
out:
oom_zonelist_unlock(zonelist, gfp_mask);
return page;
@@ -2658,7 +2640,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
(gfp_mask & GFP_THISNODE) == GFP_THISNODE)
goto nopage;
-restart:
+retry:
if (!(gfp_mask & __GFP_NO_KSWAPD))
wake_all_kswapds(order, zonelist, high_zoneidx,
preferred_zone, nodemask);
@@ -2681,7 +2663,6 @@ restart:
classzone_idx = zonelist_zone_idx(preferred_zoneref);
}
-rebalance:
/* This is the last chance, in general, before the goto nopage. */
page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist,
high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS,
@@ -2788,54 +2769,28 @@ rebalance:
if (page)
goto got_pg;
- /*
- * If we failed to make any progress reclaiming, then we are
- * running out of options and have to consider going OOM
- */
- if (!did_some_progress) {
- if (oom_gfp_allowed(gfp_mask)) {
- if (oom_killer_disabled)
- goto nopage;
- /* Coredumps can quickly deplete all memory reserves */
- if ((current->flags & PF_DUMPCORE) &&
- !(gfp_mask & __GFP_NOFAIL))
- goto nopage;
- page = __alloc_pages_may_oom(gfp_mask, order,
- zonelist, high_zoneidx,
- nodemask, preferred_zone,
- classzone_idx, migratetype);
- if (page)
- goto got_pg;
-
- if (!(gfp_mask & __GFP_NOFAIL)) {
- /*
- * The oom killer is not called for high-order
- * allocations that may fail, so if no progress
- * is being made, there are no other options and
- * retrying is unlikely to help.
- */
- if (order > PAGE_ALLOC_COSTLY_ORDER)
- goto nopage;
- /*
- * The oom killer is not called for lowmem
- * allocations to prevent needlessly killing
- * innocent tasks.
- */
- if (high_zoneidx < ZONE_NORMAL)
- goto nopage;
- }
-
- goto restart;
- }
- }
-
/* Check if we should retry the allocation */
pages_reclaimed += did_some_progress;
if (should_alloc_retry(gfp_mask, order, did_some_progress,
pages_reclaimed)) {
+ /*
+ * If we fail to make progress by freeing individual
+ * pages, but the allocation wants us to keep going,
+ * start OOM killing tasks.
+ */
+ if (!did_some_progress) {
+ page = __alloc_pages_may_oom(gfp_mask, order, zonelist,
+ high_zoneidx, nodemask,
+ preferred_zone, classzone_idx,
+ migratetype,&did_some_progress);
+ if (page)
+ goto got_pg;
+ if (!did_some_progress)
+ goto nopage;
+ }
/* Wait for some write requests to complete then retry */
wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50);
- goto rebalance;
+ goto retry;
} else {
/*
* High-order allocations do not necessarily loop after
@@ -2854,11 +2809,7 @@ rebalance:
nopage:
warn_alloc_failed(gfp_mask, order, NULL);
- return page;
got_pg:
- if (kmemcheck_enabled)
- kmemcheck_pagealloc_alloc(page, order, gfp_mask);
-
return page;
}
@@ -2877,6 +2828,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
unsigned int cpuset_mems_cookie;
int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR;
int classzone_idx;
+ gfp_t alloc_mask; /* The gfp_t that was actually used for allocation */
gfp_mask &= gfp_allowed_mask;
@@ -2910,22 +2862,27 @@ retry_cpuset:
classzone_idx = zonelist_zone_idx(preferred_zoneref);
/* First allocation attempt */
- page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
- zonelist, high_zoneidx, alloc_flags,
- preferred_zone, classzone_idx, migratetype);
+ alloc_mask = gfp_mask|__GFP_HARDWALL;
+ page = get_page_from_freelist(alloc_mask, nodemask, order, zonelist,
+ high_zoneidx, alloc_flags, preferred_zone,
+ classzone_idx, migratetype);
if (unlikely(!page)) {
/*
* Runtime PM, block IO and its error handling path
* can deadlock because I/O on the device might not
* complete.
*/
- gfp_mask = memalloc_noio_flags(gfp_mask);
- page = __alloc_pages_slowpath(gfp_mask, order,
+ alloc_mask = memalloc_noio_flags(gfp_mask);
+
+ page = __alloc_pages_slowpath(alloc_mask, order,
zonelist, high_zoneidx, nodemask,
preferred_zone, classzone_idx, migratetype);
}
- trace_mm_page_alloc(page, order, gfp_mask, migratetype);
+ if (kmemcheck_enabled && page)
+ kmemcheck_pagealloc_alloc(page, order, gfp_mask);
+
+ trace_mm_page_alloc(page, order, alloc_mask, migratetype);
out:
/*
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 72f5ac381ab3..e5d9c5277966 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -176,8 +176,11 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
undo:
for (pfn = start_pfn;
pfn < undo_pfn;
- pfn += pageblock_nr_pages)
- unset_migratetype_isolate(pfn_to_page(pfn), migratetype);
+ pfn += pageblock_nr_pages) {
+ page = __first_valid_page(pfn, pageblock_nr_pages);
+ if (page)
+ unset_migratetype_isolate(page, migratetype);
+ }
return -EBUSY;
}
diff --git a/mm/rmap.c b/mm/rmap.c
index c5bc241127b2..47b3ba87c2dd 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -72,6 +72,8 @@ static inline struct anon_vma *anon_vma_alloc(void)
anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
if (anon_vma) {
atomic_set(&anon_vma->refcount, 1);
+ anon_vma->degree = 1; /* Reference for first vma */
+ anon_vma->parent = anon_vma;
/*
* Initialise the anon_vma root to point to itself. If called
* from fork, the root will be reset to the parents anon_vma.
@@ -188,6 +190,8 @@ int anon_vma_prepare(struct vm_area_struct *vma)
if (likely(!vma->anon_vma)) {
vma->anon_vma = anon_vma;
anon_vma_chain_link(vma, avc, anon_vma);
+ /* vma reference or self-parent link for new root */
+ anon_vma->degree++;
allocated = NULL;
avc = NULL;
}
@@ -236,6 +240,14 @@ static inline void unlock_anon_vma_root(struct anon_vma *root)
/*
* Attach the anon_vmas from src to dst.
* Returns 0 on success, -ENOMEM on failure.
+ *
+ * If dst->anon_vma is NULL this function tries to find and reuse existing
+ * anon_vma which has no vmas and only one child anon_vma. This prevents
+ * degradation of anon_vma hierarchy to endless linear chain in case of
+ * constantly forking task. On the other hand, an anon_vma with more than one
+ * child isn't reused even if there was no alive vma, thus rmap walker has a
+ * good chance of avoiding scanning the whole hierarchy when it searches where
+ * page is mapped.
*/
int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
{
@@ -256,7 +268,21 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
anon_vma = pavc->anon_vma;
root = lock_anon_vma_root(root, anon_vma);
anon_vma_chain_link(dst, avc, anon_vma);
+
+ /*
+ * Reuse existing anon_vma if its degree lower than two,
+ * that means it has no vma and only one anon_vma child.
+ *
+ * Do not chose parent anon_vma, otherwise first child
+ * will always reuse it. Root anon_vma is never reused:
+ * it has self-parent reference and at least one child.
+ */
+ if (!dst->anon_vma && anon_vma != src->anon_vma &&
+ anon_vma->degree < 2)
+ dst->anon_vma = anon_vma;
}
+ if (dst->anon_vma)
+ dst->anon_vma->degree++;
unlock_anon_vma_root(root);
return 0;
@@ -280,6 +306,9 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
if (!pvma->anon_vma)
return 0;
+ /* Drop inherited anon_vma, we'll reuse existing or allocate new. */
+ vma->anon_vma = NULL;
+
/*
* First, attach the new VMA to the parent VMA's anon_vmas,
* so rmap can find non-COWed pages in child processes.
@@ -288,6 +317,10 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
if (error)
return error;
+ /* An existing anon_vma has been reused, all done then. */
+ if (vma->anon_vma)
+ return 0;
+
/* Then add our own anon_vma. */
anon_vma = anon_vma_alloc();
if (!anon_vma)
@@ -301,6 +334,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
* lock any of the anon_vmas in this anon_vma tree.
*/
anon_vma->root = pvma->anon_vma->root;
+ anon_vma->parent = pvma->anon_vma;
/*
* With refcounts, an anon_vma can stay around longer than the
* process it belongs to. The root anon_vma needs to be pinned until
@@ -311,6 +345,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
vma->anon_vma = anon_vma;
anon_vma_lock_write(anon_vma);
anon_vma_chain_link(vma, avc, anon_vma);
+ anon_vma->parent->degree++;
anon_vma_unlock_write(anon_vma);
return 0;
@@ -341,12 +376,16 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
* Leave empty anon_vmas on the list - we'll need
* to free them outside the lock.
*/
- if (RB_EMPTY_ROOT(&anon_vma->rb_root))
+ if (RB_EMPTY_ROOT(&anon_vma->rb_root)) {
+ anon_vma->parent->degree--;
continue;
+ }
list_del(&avc->same_vma);
anon_vma_chain_free(avc);
}
+ if (vma->anon_vma)
+ vma->anon_vma->degree--;
unlock_anon_vma_root(root);
/*
@@ -357,6 +396,7 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
struct anon_vma *anon_vma = avc->anon_vma;
+ BUG_ON(anon_vma->degree);
put_anon_vma(anon_vma);
list_del(&avc->same_vma);
@@ -550,9 +590,8 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
if (!vma->anon_vma || !page__anon_vma ||
vma->anon_vma->root != page__anon_vma->root)
return -EFAULT;
- } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) {
- if (!vma->vm_file ||
- vma->vm_file->f_mapping != page->mapping)
+ } else if (page->mapping) {
+ if (!vma->vm_file || vma->vm_file->f_mapping != page->mapping)
return -EFAULT;
} else
return -EFAULT;
@@ -666,6 +705,7 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
}
struct page_referenced_arg {
+ int dirtied;
int mapcount;
int referenced;
unsigned long vm_flags;
@@ -680,6 +720,7 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
struct mm_struct *mm = vma->vm_mm;
spinlock_t *ptl;
int referenced = 0;
+ int dirty = 0;
struct page_referenced_arg *pra = arg;
if (unlikely(PageTransHuge(page))) {
@@ -703,6 +744,15 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
/* go ahead even if the pmd is pmd_trans_splitting() */
if (pmdp_clear_flush_young_notify(vma, address, pmd))
referenced++;
+
+ /*
+ * Use pmd_freeable instead of raw pmd_dirty because in some
+ * of architecture, pmd_dirty is not defined unless
+ * CONFIG_TRANSPARENT_HUGEPAGE is enabled
+ */
+ if (!pmd_freeable(*pmd))
+ dirty++;
+
spin_unlock(ptl);
} else {
pte_t *pte;
@@ -732,6 +782,10 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
if (likely(!(vma->vm_flags & VM_SEQ_READ)))
referenced++;
}
+
+ if (pte_dirty(*pte))
+ dirty++;
+
pte_unmap_unlock(pte, ptl);
}
@@ -740,6 +794,9 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
pra->vm_flags |= vma->vm_flags;
}
+ if (dirty)
+ pra->dirtied++;
+
pra->mapcount--;
if (!pra->mapcount)
return SWAP_SUCCESS; /* To break the loop */
@@ -764,6 +821,7 @@ static bool invalid_page_referenced_vma(struct vm_area_struct *vma, void *arg)
* @is_locked: caller holds lock on the page
* @memcg: target memory cgroup
* @vm_flags: collect encountered vma->vm_flags who actually referenced the page
+ * @is_pte_dirty: ptes which have marked dirty bit - used for lazyfree page
*
* Quick test_and_clear_referenced for all mappings to a page,
* returns the number of ptes which referenced the page.
@@ -771,7 +829,8 @@ static bool invalid_page_referenced_vma(struct vm_area_struct *vma, void *arg)
int page_referenced(struct page *page,
int is_locked,
struct mem_cgroup *memcg,
- unsigned long *vm_flags)
+ unsigned long *vm_flags,
+ int *is_pte_dirty)
{
int ret;
int we_locked = 0;
@@ -786,6 +845,9 @@ int page_referenced(struct page *page,
};
*vm_flags = 0;
+ if (is_pte_dirty)
+ *is_pte_dirty = 0;
+
if (!page_mapped(page))
return 0;
@@ -813,6 +875,9 @@ int page_referenced(struct page *page,
if (we_locked)
unlock_page(page);
+ if (is_pte_dirty)
+ *is_pte_dirty = pra.dirtied;
+
return pra.referenced;
}
@@ -1046,24 +1111,20 @@ void page_add_new_anon_rmap(struct page *page,
void page_add_file_rmap(struct page *page)
{
struct mem_cgroup *memcg;
- unsigned long flags;
- bool locked;
- memcg = mem_cgroup_begin_page_stat(page, &locked, &flags);
+ memcg = mem_cgroup_begin_page_stat(page);
if (atomic_inc_and_test(&page->_mapcount)) {
__inc_zone_page_state(page, NR_FILE_MAPPED);
mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED);
}
- mem_cgroup_end_page_stat(memcg, &locked, &flags);
+ mem_cgroup_end_page_stat(memcg);
}
static void page_remove_file_rmap(struct page *page)
{
struct mem_cgroup *memcg;
- unsigned long flags;
- bool locked;
- memcg = mem_cgroup_begin_page_stat(page, &locked, &flags);
+ memcg = mem_cgroup_begin_page_stat(page);
/* page still mapped by someone else? */
if (!atomic_add_negative(-1, &page->_mapcount))
@@ -1084,7 +1145,7 @@ static void page_remove_file_rmap(struct page *page)
if (unlikely(PageMlocked(page)))
clear_page_mlock(page);
out:
- mem_cgroup_end_page_stat(memcg, &locked, &flags);
+ mem_cgroup_end_page_stat(memcg);
}
/**
@@ -1145,6 +1206,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
spinlock_t *ptl;
int ret = SWAP_AGAIN;
enum ttu_flags flags = (enum ttu_flags)arg;
+ int dirty = 0;
pte = page_check_address(page, mm, address, &ptl, 0);
if (!pte)
@@ -1174,7 +1236,8 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
pteval = ptep_clear_flush(vma, address, pte);
/* Move the dirty bit to the physical page now the pte is gone. */
- if (pte_dirty(pteval))
+ dirty = pte_dirty(pteval);
+ if (dirty)
set_page_dirty(page);
/* Update high watermark before we lower rss */
@@ -1203,6 +1266,19 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
swp_entry_t entry = { .val = page_private(page) };
pte_t swp_pte;
+ if (flags & TTU_FREE) {
+ VM_BUG_ON_PAGE(PageSwapCache(page), page);
+ if (!dirty && !PageDirty(page)) {
+ /* It's a freeable page by MADV_FREE */
+ dec_mm_counter(mm, MM_ANONPAGES);
+ goto discard;
+ } else {
+ set_pte_at(mm, address, pte, pteval);
+ ret = SWAP_FAIL;
+ goto out_unmap;
+ }
+ }
+
if (PageSwapCache(page)) {
/*
* Store the swap location in the pte.
@@ -1234,7 +1310,6 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
if (pte_soft_dirty(pteval))
swp_pte = pte_swp_mksoft_dirty(swp_pte);
set_pte_at(mm, address, pte, swp_pte);
- BUG_ON(pte_file(*pte));
} else if (IS_ENABLED(CONFIG_MIGRATION) &&
(flags & TTU_MIGRATION)) {
/* Establish migration entry for a file page */
@@ -1244,6 +1319,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
} else
dec_mm_counter(mm, MM_FILEPAGES);
+discard:
page_remove_rmap(page);
page_cache_release(page);
@@ -1276,211 +1352,6 @@ out_mlock:
return ret;
}
-/*
- * objrmap doesn't work for nonlinear VMAs because the assumption that
- * offset-into-file correlates with offset-into-virtual-addresses does not hold.
- * Consequently, given a particular page and its ->index, we cannot locate the
- * ptes which are mapping that page without an exhaustive linear search.
- *
- * So what this code does is a mini "virtual scan" of each nonlinear VMA which
- * maps the file to which the target page belongs. The ->vm_private_data field
- * holds the current cursor into that scan. Successive searches will circulate
- * around the vma's virtual address space.
- *
- * So as more replacement pressure is applied to the pages in a nonlinear VMA,
- * more scanning pressure is placed against them as well. Eventually pages
- * will become fully unmapped and are eligible for eviction.
- *
- * For very sparsely populated VMAs this is a little inefficient - chances are
- * there there won't be many ptes located within the scan cluster. In this case
- * maybe we could scan further - to the end of the pte page, perhaps.
- *
- * Mlocked pages: check VM_LOCKED under mmap_sem held for read, if we can
- * acquire it without blocking. If vma locked, mlock the pages in the cluster,
- * rather than unmapping them. If we encounter the "check_page" that vmscan is
- * trying to unmap, return SWAP_MLOCK, else default SWAP_AGAIN.
- */
-#define CLUSTER_SIZE min(32*PAGE_SIZE, PMD_SIZE)
-#define CLUSTER_MASK (~(CLUSTER_SIZE - 1))
-
-static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
- struct vm_area_struct *vma, struct page *check_page)
-{
- struct mm_struct *mm = vma->vm_mm;
- pmd_t *pmd;
- pte_t *pte;
- pte_t pteval;
- spinlock_t *ptl;
- struct page *page;
- unsigned long address;
- unsigned long mmun_start; /* For mmu_notifiers */
- unsigned long mmun_end; /* For mmu_notifiers */
- unsigned long end;
- int ret = SWAP_AGAIN;
- int locked_vma = 0;
-
- address = (vma->vm_start + cursor) & CLUSTER_MASK;
- end = address + CLUSTER_SIZE;
- if (address < vma->vm_start)
- address = vma->vm_start;
- if (end > vma->vm_end)
- end = vma->vm_end;
-
- pmd = mm_find_pmd(mm, address);
- if (!pmd)
- return ret;
-
- mmun_start = address;
- mmun_end = end;
- mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
-
- /*
- * If we can acquire the mmap_sem for read, and vma is VM_LOCKED,
- * keep the sem while scanning the cluster for mlocking pages.
- */
- if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
- locked_vma = (vma->vm_flags & VM_LOCKED);
- if (!locked_vma)
- up_read(&vma->vm_mm->mmap_sem); /* don't need it */
- }
-
- pte = pte_offset_map_lock(mm, pmd, address, &ptl);
-
- /* Update high watermark before we lower rss */
- update_hiwater_rss(mm);
-
- for (; address < end; pte++, address += PAGE_SIZE) {
- if (!pte_present(*pte))
- continue;
- page = vm_normal_page(vma, address, *pte);
- BUG_ON(!page || PageAnon(page));
-
- if (locked_vma) {
- if (page == check_page) {
- /* we know we have check_page locked */
- mlock_vma_page(page);
- ret = SWAP_MLOCK;
- } else if (trylock_page(page)) {
- /*
- * If we can lock the page, perform mlock.
- * Otherwise leave the page alone, it will be
- * eventually encountered again later.
- */
- mlock_vma_page(page);
- unlock_page(page);
- }
- continue; /* don't unmap */
- }
-
- /*
- * No need for _notify because we're within an
- * mmu_notifier_invalidate_range_ {start|end} scope.
- */
- if (ptep_clear_flush_young(vma, address, pte))
- continue;
-
- /* Nuke the page table entry. */
- flush_cache_page(vma, address, pte_pfn(*pte));
- pteval = ptep_clear_flush_notify(vma, address, pte);
-
- /* If nonlinear, store the file page offset in the pte. */
- if (page->index != linear_page_index(vma, address)) {
- pte_t ptfile = pgoff_to_pte(page->index);
- if (pte_soft_dirty(pteval))
- ptfile = pte_file_mksoft_dirty(ptfile);
- set_pte_at(mm, address, pte, ptfile);
- }
-
- /* Move the dirty bit to the physical page now the pte is gone. */
- if (pte_dirty(pteval))
- set_page_dirty(page);
-
- page_remove_rmap(page);
- page_cache_release(page);
- dec_mm_counter(mm, MM_FILEPAGES);
- (*mapcount)--;
- }
- pte_unmap_unlock(pte - 1, ptl);
- mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
- if (locked_vma)
- up_read(&vma->vm_mm->mmap_sem);
- return ret;
-}
-
-static int try_to_unmap_nonlinear(struct page *page,
- struct address_space *mapping, void *arg)
-{
- struct vm_area_struct *vma;
- int ret = SWAP_AGAIN;
- unsigned long cursor;
- unsigned long max_nl_cursor = 0;
- unsigned long max_nl_size = 0;
- unsigned int mapcount;
-
- list_for_each_entry(vma,
- &mapping->i_mmap_nonlinear, shared.nonlinear) {
-
- cursor = (unsigned long) vma->vm_private_data;
- if (cursor > max_nl_cursor)
- max_nl_cursor = cursor;
- cursor = vma->vm_end - vma->vm_start;
- if (cursor > max_nl_size)
- max_nl_size = cursor;
- }
-
- if (max_nl_size == 0) { /* all nonlinears locked or reserved ? */
- return SWAP_FAIL;
- }
-
- /*
- * We don't try to search for this page in the nonlinear vmas,
- * and page_referenced wouldn't have found it anyway. Instead
- * just walk the nonlinear vmas trying to age and unmap some.
- * The mapcount of the page we came in with is irrelevant,
- * but even so use it as a guide to how hard we should try?
- */
- mapcount = page_mapcount(page);
- if (!mapcount)
- return ret;
-
- cond_resched();
-
- max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK;
- if (max_nl_cursor == 0)
- max_nl_cursor = CLUSTER_SIZE;
-
- do {
- list_for_each_entry(vma,
- &mapping->i_mmap_nonlinear, shared.nonlinear) {
-
- cursor = (unsigned long) vma->vm_private_data;
- while (cursor < max_nl_cursor &&
- cursor < vma->vm_end - vma->vm_start) {
- if (try_to_unmap_cluster(cursor, &mapcount,
- vma, page) == SWAP_MLOCK)
- ret = SWAP_MLOCK;
- cursor += CLUSTER_SIZE;
- vma->vm_private_data = (void *) cursor;
- if ((int)mapcount <= 0)
- return ret;
- }
- vma->vm_private_data = (void *) max_nl_cursor;
- }
- cond_resched();
- max_nl_cursor += CLUSTER_SIZE;
- } while (max_nl_cursor <= max_nl_size);
-
- /*
- * Don't loop forever (perhaps all the remaining pages are
- * in locked vmas). Reset cursor on all unreserved nonlinear
- * vmas, now forgetting on which ones it had fallen behind.
- */
- list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.nonlinear)
- vma->vm_private_data = NULL;
-
- return ret;
-}
-
bool is_vma_temporary_stack(struct vm_area_struct *vma)
{
int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP);
@@ -1526,7 +1397,6 @@ int try_to_unmap(struct page *page, enum ttu_flags flags)
.rmap_one = try_to_unmap_one,
.arg = (void *)flags,
.done = page_not_mapped,
- .file_nonlinear = try_to_unmap_nonlinear,
.anon_lock = page_lock_anon_vma_read,
};
@@ -1572,12 +1442,6 @@ int try_to_munlock(struct page *page)
.rmap_one = try_to_unmap_one,
.arg = (void *)TTU_MUNLOCK,
.done = page_not_mapped,
- /*
- * We don't bother to try to find the munlocked page in
- * nonlinears. It's costly. Instead, later, page reclaim logic
- * may call try_to_unmap() and recover PG_mlocked lazily.
- */
- .file_nonlinear = NULL,
.anon_lock = page_lock_anon_vma_read,
};
@@ -1708,13 +1572,6 @@ static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc)
goto done;
}
- if (!rwc->file_nonlinear)
- goto done;
-
- if (list_empty(&mapping->i_mmap_nonlinear))
- goto done;
-
- ret = rwc->file_nonlinear(page, mapping, rwc->arg);
done:
i_mmap_unlock_read(mapping);
return ret;
diff --git a/mm/shmem.c b/mm/shmem.c
index 73ba1df7c8ba..f69d296bd0a3 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1131,7 +1131,7 @@ repeat:
* truncated or holepunched since swap was confirmed.
* shmem_undo_range() will have done some of the
* unaccounting, now delete_from_swap_cache() will do
- * the rest (including mem_cgroup_uncharge_swapcache).
+ * the rest.
* Reset swap.val? No, leave it so "failed" goes back to
* "repeat": reading a hole and writing should succeed.
*/
@@ -3201,7 +3201,6 @@ static const struct vm_operations_struct shmem_vm_ops = {
.set_policy = shmem_set_policy,
.get_policy = shmem_get_policy,
#endif
- .remap_pages = generic_file_remap_pages,
};
static struct dentry *shmem_mount(struct file_system_type *fs_type,
diff --git a/mm/slab.h b/mm/slab.h
index 1cf4005482dd..90430d6f665e 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -235,7 +235,7 @@ static __always_inline int memcg_charge_slab(struct kmem_cache *s,
return 0;
if (is_root_cache(s))
return 0;
- return __memcg_charge_slab(s, gfp, order);
+ return memcg_charge_kmem(s->memcg_params->memcg, gfp, 1 << order);
}
static __always_inline void memcg_uncharge_slab(struct kmem_cache *s, int order)
@@ -244,7 +244,7 @@ static __always_inline void memcg_uncharge_slab(struct kmem_cache *s, int order)
return;
if (is_root_cache(s))
return;
- __memcg_uncharge_slab(s, order);
+ memcg_uncharge_kmem(s->memcg_params->memcg, 1 << order);
}
#else
static inline bool is_root_cache(struct kmem_cache *s)
diff --git a/mm/slab_common.c b/mm/slab_common.c
index e03dd6f2a272..481cf81eadc3 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -425,21 +425,64 @@ out_unlock:
}
EXPORT_SYMBOL(kmem_cache_create);
+static int do_kmem_cache_shutdown(struct kmem_cache *s,
+ struct list_head *release, bool *need_rcu_barrier)
+{
+ if (__kmem_cache_shutdown(s) != 0) {
+ printk(KERN_ERR "kmem_cache_destroy %s: "
+ "Slab cache still has objects\n", s->name);
+ dump_stack();
+ return -EBUSY;
+ }
+
+ if (s->flags & SLAB_DESTROY_BY_RCU)
+ *need_rcu_barrier = true;
+
+#ifdef CONFIG_MEMCG_KMEM
+ if (!is_root_cache(s)) {
+ struct kmem_cache *root_cache = s->memcg_params->root_cache;
+ int memcg_id = memcg_cache_id(s->memcg_params->memcg);
+
+ BUG_ON(root_cache->memcg_params->memcg_caches[memcg_id] != s);
+ root_cache->memcg_params->memcg_caches[memcg_id] = NULL;
+ }
+#endif
+ list_move(&s->list, release);
+ return 0;
+}
+
+static void do_kmem_cache_release(struct list_head *release,
+ bool need_rcu_barrier)
+{
+ struct kmem_cache *s, *s2;
+
+ if (need_rcu_barrier)
+ rcu_barrier();
+
+ list_for_each_entry_safe(s, s2, release, list) {
+#ifdef SLAB_SUPPORTS_SYSFS
+ sysfs_slab_remove(s);
+#else
+ slab_kmem_cache_release(s);
+#endif
+ }
+}
+
#ifdef CONFIG_MEMCG_KMEM
/*
* memcg_create_kmem_cache - Create a cache for a memory cgroup.
* @memcg: The memory cgroup the new cache is for.
* @root_cache: The parent of the new cache.
- * @memcg_name: The name of the memory cgroup (used for naming the new cache).
*
* This function attempts to create a kmem cache that will serve allocation
* requests going from @memcg to @root_cache. The new cache inherits properties
* from its parent.
*/
-struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
- struct kmem_cache *root_cache,
- const char *memcg_name)
+void memcg_create_kmem_cache(struct mem_cgroup *memcg,
+ struct kmem_cache *root_cache)
{
+ static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */
+ int memcg_id = memcg_cache_id(memcg);
struct kmem_cache *s = NULL;
char *cache_name;
@@ -448,8 +491,18 @@ struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
mutex_lock(&slab_mutex);
+ /*
+ * Since per-memcg caches are created asynchronously on first
+ * allocation (see memcg_kmem_get_cache()), several threads can try to
+ * create the same cache, but only one of them may succeed.
+ */
+ if (cache_from_memcg_idx(root_cache, memcg_id))
+ goto out_unlock;
+
+ cgroup_name(mem_cgroup_css(memcg)->cgroup,
+ memcg_name_buf, sizeof(memcg_name_buf));
cache_name = kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name,
- memcg_cache_id(memcg), memcg_name);
+ memcg_cache_id(memcg), memcg_name_buf);
if (!cache_name)
goto out_unlock;
@@ -457,49 +510,73 @@ struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
root_cache->size, root_cache->align,
root_cache->flags, root_cache->ctor,
memcg, root_cache);
+ /*
+ * If we could not create a memcg cache, do not complain, because
+ * that's not critical at all as we can always proceed with the root
+ * cache.
+ */
if (IS_ERR(s)) {
kfree(cache_name);
- s = NULL;
+ goto out_unlock;
}
+ /*
+ * Since readers won't lock (see cache_from_memcg_idx()), we need a
+ * barrier here to ensure nobody will see the kmem_cache partially
+ * initialized.
+ */
+ smp_wmb();
+ root_cache->memcg_params->memcg_caches[memcg_id] = s;
+
out_unlock:
mutex_unlock(&slab_mutex);
put_online_mems();
put_online_cpus();
-
- return s;
}
-static int memcg_cleanup_cache_params(struct kmem_cache *s)
+void memcg_destroy_kmem_caches(struct mem_cgroup *memcg)
{
- int rc;
+ LIST_HEAD(release);
+ bool need_rcu_barrier = false;
+ struct kmem_cache *s, *s2;
- if (!s->memcg_params ||
- !s->memcg_params->is_root_cache)
- return 0;
+ get_online_cpus();
+ get_online_mems();
- mutex_unlock(&slab_mutex);
- rc = __memcg_cleanup_cache_params(s);
mutex_lock(&slab_mutex);
+ list_for_each_entry_safe(s, s2, &slab_caches, list) {
+ if (is_root_cache(s) || s->memcg_params->memcg != memcg)
+ continue;
+ /*
+ * The cgroup is about to be freed and therefore has no charges
+ * left. Hence, all its caches must be empty by now.
+ */
+ BUG_ON(do_kmem_cache_shutdown(s, &release, &need_rcu_barrier));
+ }
+ mutex_unlock(&slab_mutex);
- return rc;
-}
-#else
-static int memcg_cleanup_cache_params(struct kmem_cache *s)
-{
- return 0;
+ put_online_mems();
+ put_online_cpus();
+
+ do_kmem_cache_release(&release, need_rcu_barrier);
}
#endif /* CONFIG_MEMCG_KMEM */
void slab_kmem_cache_release(struct kmem_cache *s)
{
+ memcg_free_cache_params(s);
kfree(s->name);
kmem_cache_free(kmem_cache, s);
}
void kmem_cache_destroy(struct kmem_cache *s)
{
+ int i;
+ LIST_HEAD(release);
+ bool need_rcu_barrier = false;
+ bool busy = false;
+
get_online_cpus();
get_online_mems();
@@ -509,35 +586,23 @@ void kmem_cache_destroy(struct kmem_cache *s)
if (s->refcount)
goto out_unlock;
- if (memcg_cleanup_cache_params(s) != 0)
- goto out_unlock;
+ for_each_memcg_cache_index(i) {
+ struct kmem_cache *c = cache_from_memcg_idx(s, i);
- if (__kmem_cache_shutdown(s) != 0) {
- printk(KERN_ERR "kmem_cache_destroy %s: "
- "Slab cache still has objects\n", s->name);
- dump_stack();
- goto out_unlock;
+ if (c && do_kmem_cache_shutdown(c, &release, &need_rcu_barrier))
+ busy = true;
}
- list_del(&s->list);
-
- mutex_unlock(&slab_mutex);
- if (s->flags & SLAB_DESTROY_BY_RCU)
- rcu_barrier();
-
- memcg_free_cache_params(s);
-#ifdef SLAB_SUPPORTS_SYSFS
- sysfs_slab_remove(s);
-#else
- slab_kmem_cache_release(s);
-#endif
- goto out;
+ if (!busy)
+ do_kmem_cache_shutdown(s, &release, &need_rcu_barrier);
out_unlock:
mutex_unlock(&slab_mutex);
-out:
+
put_online_mems();
put_online_cpus();
+
+ do_kmem_cache_release(&release, need_rcu_barrier);
}
EXPORT_SYMBOL(kmem_cache_destroy);
diff --git a/mm/swap.c b/mm/swap.c
index 8a12b33936b4..5b3087228b99 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -1140,10 +1140,8 @@ void __init swap_setup(void)
if (bdi_init(swapper_spaces[0].backing_dev_info))
panic("Failed to init swap bdi");
- for (i = 0; i < MAX_SWAPFILES; i++) {
+ for (i = 0; i < MAX_SWAPFILES; i++)
spin_lock_init(&swapper_spaces[i].tree_lock);
- INIT_LIST_HEAD(&swapper_spaces[i].i_mmap_nonlinear);
- }
#endif
/* Use a smaller cluster for small-memory machines */
diff --git a/mm/util.c b/mm/util.c
index fec39d4509a9..d25558ba661c 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -3,6 +3,7 @@
#include <linux/string.h>
#include <linux/compiler.h>
#include <linux/export.h>
+#include <linux/ctype.h>
#include <linux/err.h>
#include <linux/sched.h>
#include <linux/security.h>
@@ -62,6 +63,35 @@ char *kstrndup(const char *s, size_t max, gfp_t gfp)
EXPORT_SYMBOL(kstrndup);
/**
+ * kstrimdup - Trim and copy a %NUL terminated string.
+ * @s: the string to trim and duplicate
+ * @gfp: the GFP mask used in the kmalloc() call when allocating memory
+ *
+ * Returns an address, which the caller must kfree, containing
+ * a duplicate of the passed string with leading and/or trailing
+ * whitespace (as defined by isspace) removed.
+ */
+char *kstrimdup(const char *s, gfp_t gfp)
+{
+ char *buf;
+ char *begin = skip_spaces(s);
+ size_t len = strlen(begin);
+
+ while (len && isspace(begin[len - 1]))
+ len--;
+
+ buf = kmalloc_track_caller(len + 1, gfp);
+ if (!buf)
+ return NULL;
+
+ memcpy(buf, begin, len);
+ buf[len] = '\0';
+
+ return buf;
+}
+EXPORT_SYMBOL(kstrimdup);
+
+/**
* kmemdup - duplicate region of memory
*
* @src: memory region to duplicate
diff --git a/mm/vmscan.c b/mm/vmscan.c
index bd9a72bc4a1b..e29f411b38ac 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -713,13 +713,17 @@ enum page_references {
};
static enum page_references page_check_references(struct page *page,
- struct scan_control *sc)
+ struct scan_control *sc,
+ bool *freeable)
{
int referenced_ptes, referenced_page;
unsigned long vm_flags;
+ int pte_dirty;
+
+ VM_BUG_ON_PAGE(!PageLocked(page), page);
referenced_ptes = page_referenced(page, 1, sc->target_mem_cgroup,
- &vm_flags);
+ &vm_flags, &pte_dirty);
referenced_page = TestClearPageReferenced(page);
/*
@@ -760,6 +764,10 @@ static enum page_references page_check_references(struct page *page,
return PAGEREF_KEEP;
}
+ if (PageAnon(page) && !pte_dirty && !PageSwapCache(page) &&
+ !PageDirty(page))
+ *freeable = true;
+
/* Reclaim if clean, defer dirty pages to writeback */
if (referenced_page && !PageSwapBacked(page))
return PAGEREF_RECLAIM_CLEAN;
@@ -828,6 +836,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
int may_enter_fs;
enum page_references references = PAGEREF_RECLAIM_CLEAN;
bool dirty, writeback;
+ bool freeable = false;
cond_resched();
@@ -951,7 +960,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
}
if (!force_reclaim)
- references = page_check_references(page, sc);
+ references = page_check_references(page, sc,
+ &freeable);
switch (references) {
case PAGEREF_ACTIVATE:
@@ -968,22 +978,31 @@ static unsigned long shrink_page_list(struct list_head *page_list,
* Try to allocate it some swap space here.
*/
if (PageAnon(page) && !PageSwapCache(page)) {
- if (!(sc->gfp_mask & __GFP_IO))
- goto keep_locked;
- if (!add_to_swap(page, page_list))
- goto activate_locked;
- may_enter_fs = 1;
-
- /* Adding to swap updated mapping */
- mapping = page_mapping(page);
+ if (!freeable) {
+ if (!(sc->gfp_mask & __GFP_IO))
+ goto keep_locked;
+ if (!add_to_swap(page, page_list))
+ goto activate_locked;
+ may_enter_fs = 1;
+ /* Adding to swap updated mapping */
+ mapping = page_mapping(page);
+ } else {
+ if (likely(!PageTransHuge(page)))
+ goto unmap;
+ /* try_to_unmap isn't aware of THP page */
+ if (unlikely(split_huge_page_to_list(page,
+ page_list)))
+ goto keep_locked;
+ }
}
-
+unmap:
/*
* The page is mapped into the page tables of one or more
* processes. Try to unmap it here.
*/
- if (page_mapped(page) && mapping) {
- switch (try_to_unmap(page, ttu_flags)) {
+ if (page_mapped(page) && (mapping || freeable)) {
+ switch (try_to_unmap(page,
+ freeable ? TTU_FREE : ttu_flags)) {
case SWAP_FAIL:
goto activate_locked;
case SWAP_AGAIN:
@@ -991,7 +1010,20 @@ static unsigned long shrink_page_list(struct list_head *page_list,
case SWAP_MLOCK:
goto cull_mlocked;
case SWAP_SUCCESS:
- ; /* try to free the page below */
+ /* try to free the page below */
+ if (!freeable)
+ break;
+ /*
+ * Freeable anon page doesn't have mapping
+ * due to skipping of swapcache so we free
+ * page in here rather than __remove_mapping.
+ */
+ VM_BUG_ON_PAGE(PageSwapCache(page), page);
+ if (!page_freeze_refs(page, 1))
+ goto keep_locked;
+ __clear_page_locked(page);
+ count_vm_event(PGLAZYFREED);
+ goto free_it;
}
}
@@ -1731,7 +1763,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
}
if (page_referenced(page, 0, sc->target_mem_cgroup,
- &vm_flags)) {
+ &vm_flags, NULL)) {
nr_rotated += hpage_nr_pages(page);
/*
* Identify referenced, file-backed active pages and
@@ -2921,18 +2953,20 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining,
return false;
/*
- * There is a potential race between when kswapd checks its watermarks
- * and a process gets throttled. There is also a potential race if
- * processes get throttled, kswapd wakes, a large process exits therby
- * balancing the zones that causes kswapd to miss a wakeup. If kswapd
- * is going to sleep, no process should be sleeping on pfmemalloc_wait
- * so wake them now if necessary. If necessary, processes will wake
- * kswapd and get throttled again
+ * The throttled processes are normally woken up in balance_pgdat() as
+ * soon as pfmemalloc_watermark_ok() is true. But there is a potential
+ * race between when kswapd checks the watermarks and a process gets
+ * throttled. There is also a potential race if processes get
+ * throttled, kswapd wakes, a large process exits thereby balancing the
+ * zones, which causes kswapd to exit balance_pgdat() before reaching
+ * the wake up checks. If kswapd is going to sleep, no process should
+ * be sleeping on pfmemalloc_wait, so wake them now if necessary. If
+ * the wake up is premature, processes will wake kswapd and get
+ * throttled again. The difference from wake ups in balance_pgdat() is
+ * that here we are under prepare_to_wait().
*/
- if (waitqueue_active(&pgdat->pfmemalloc_wait)) {
- wake_up(&pgdat->pfmemalloc_wait);
- return false;
- }
+ if (waitqueue_active(&pgdat->pfmemalloc_wait))
+ wake_up_all(&pgdat->pfmemalloc_wait);
return pgdat_balanced(pgdat, order, classzone_idx);
}
@@ -3173,7 +3207,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
*/
if (waitqueue_active(&pgdat->pfmemalloc_wait) &&
pfmemalloc_watermark_ok(pgdat))
- wake_up(&pgdat->pfmemalloc_wait);
+ wake_up_all(&pgdat->pfmemalloc_wait);
/*
* Fragmentation may mean that the system cannot be rebalanced
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 1284f89fca08..5fba97d122aa 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -816,6 +816,7 @@ const char * const vmstat_text[] = {
"pgfault",
"pgmajfault",
+ "pglazyfreed",
TEXTS_FOR_ZONES("pgrefill")
TEXTS_FOR_ZONES("pgsteal_kswapd")
diff --git a/mm/zbud.c b/mm/zbud.c
index 4e387bea702e..2ee4e4520493 100644
--- a/mm/zbud.c
+++ b/mm/zbud.c
@@ -130,7 +130,8 @@ static struct zbud_ops zbud_zpool_ops = {
.evict = zbud_zpool_evict
};
-static void *zbud_zpool_create(gfp_t gfp, struct zpool_ops *zpool_ops)
+static void *zbud_zpool_create(char *name, gfp_t gfp,
+ struct zpool_ops *zpool_ops)
{
return zbud_create_pool(gfp, zpool_ops ? &zbud_zpool_ops : NULL);
}
diff --git a/mm/zpool.c b/mm/zpool.c
index 739cdf0d183a..bacdab6e47de 100644
--- a/mm/zpool.c
+++ b/mm/zpool.c
@@ -129,6 +129,7 @@ static void zpool_put_driver(struct zpool_driver *driver)
/**
* zpool_create_pool() - Create a new zpool
* @type The type of the zpool to create (e.g. zbud, zsmalloc)
+ * @name The name of the zpool (e.g. zram0, zswap)
* @gfp The GFP flags to use when allocating the pool.
* @ops The optional ops callback.
*
@@ -140,7 +141,8 @@ static void zpool_put_driver(struct zpool_driver *driver)
*
* Returns: New zpool on success, NULL on failure.
*/
-struct zpool *zpool_create_pool(char *type, gfp_t gfp, struct zpool_ops *ops)
+struct zpool *zpool_create_pool(char *type, char *name, gfp_t gfp,
+ struct zpool_ops *ops)
{
struct zpool_driver *driver;
struct zpool *zpool;
@@ -168,7 +170,7 @@ struct zpool *zpool_create_pool(char *type, gfp_t gfp, struct zpool_ops *ops)
zpool->type = driver->type;
zpool->driver = driver;
- zpool->pool = driver->create(gfp, ops);
+ zpool->pool = driver->create(name, gfp, ops);
zpool->ops = ops;
if (!zpool->pool) {
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index b72403927aa4..0dec1fa5f656 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -91,6 +91,7 @@
#include <linux/hardirq.h>
#include <linux/spinlock.h>
#include <linux/types.h>
+#include <linux/debugfs.h>
#include <linux/zsmalloc.h>
#include <linux/zpool.h>
@@ -168,6 +169,22 @@ enum fullness_group {
ZS_FULL
};
+enum zs_stat_type {
+ OBJ_ALLOCATED,
+ OBJ_USED,
+ NR_ZS_STAT_TYPE,
+};
+
+#ifdef CONFIG_ZSMALLOC_STAT
+
+static struct dentry *zs_stat_root;
+
+struct zs_size_stat {
+ unsigned long objs[NR_ZS_STAT_TYPE];
+};
+
+#endif
+
/*
* number of size_classes
*/
@@ -200,6 +217,10 @@ struct size_class {
/* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */
int pages_per_zspage;
+#ifdef CONFIG_ZSMALLOC_STAT
+ struct zs_size_stat stats;
+#endif
+
spinlock_t lock;
struct page *fullness_list[_ZS_NR_FULLNESS_GROUPS];
@@ -217,10 +238,16 @@ struct link_free {
};
struct zs_pool {
+ char *name;
+
struct size_class **size_class;
gfp_t flags; /* allocation flags used when growing pool */
atomic_long_t pages_allocated;
+
+#ifdef CONFIG_ZSMALLOC_STAT
+ struct dentry *stat_dentry;
+#endif
};
/*
@@ -246,9 +273,9 @@ struct mapping_area {
#ifdef CONFIG_ZPOOL
-static void *zs_zpool_create(gfp_t gfp, struct zpool_ops *zpool_ops)
+static void *zs_zpool_create(char *name, gfp_t gfp, struct zpool_ops *zpool_ops)
{
- return zs_create_pool(gfp);
+ return zs_create_pool(name, gfp);
}
static void zs_zpool_destroy(void *pool)
@@ -942,6 +969,166 @@ static bool can_merge(struct size_class *prev, int size, int pages_per_zspage)
return true;
}
+#ifdef CONFIG_ZSMALLOC_STAT
+
+static inline void zs_stat_inc(struct size_class *class,
+ enum zs_stat_type type, unsigned long cnt)
+{
+ class->stats.objs[type] += cnt;
+}
+
+static inline void zs_stat_dec(struct size_class *class,
+ enum zs_stat_type type, unsigned long cnt)
+{
+ class->stats.objs[type] -= cnt;
+}
+
+static inline unsigned long zs_stat_get(struct size_class *class,
+ enum zs_stat_type type)
+{
+ return class->stats.objs[type];
+}
+
+static int __init zs_stat_init(void)
+{
+ if (!debugfs_initialized())
+ return -ENODEV;
+
+ zs_stat_root = debugfs_create_dir("zsmalloc", NULL);
+ if (!zs_stat_root)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void __exit zs_stat_exit(void)
+{
+ debugfs_remove_recursive(zs_stat_root);
+}
+
+static int zs_stats_size_show(struct seq_file *s, void *v)
+{
+ int i;
+ struct zs_pool *pool = s->private;
+ struct size_class *class;
+ int objs_per_zspage;
+ unsigned long obj_allocated, obj_used, pages_used;
+ unsigned long total_objs = 0, total_used_objs = 0, total_pages = 0;
+
+ seq_printf(s, " %5s %5s %13s %10s %10s\n", "class", "size",
+ "obj_allocated", "obj_used", "pages_used");
+
+ for (i = 0; i < zs_size_classes; i++) {
+ class = pool->size_class[i];
+
+ if (class->index != i)
+ continue;
+
+ spin_lock(&class->lock);
+ obj_allocated = zs_stat_get(class, OBJ_ALLOCATED);
+ obj_used = zs_stat_get(class, OBJ_USED);
+ spin_unlock(&class->lock);
+
+ objs_per_zspage = get_maxobj_per_zspage(class->size,
+ class->pages_per_zspage);
+ pages_used = obj_allocated / objs_per_zspage *
+ class->pages_per_zspage;
+
+ seq_printf(s, " %5u %5u %10lu %10lu %10lu\n", i,
+ class->size, obj_allocated, obj_used, pages_used);
+
+ total_objs += obj_allocated;
+ total_used_objs += obj_used;
+ total_pages += pages_used;
+ }
+
+ seq_puts(s, "\n");
+ seq_printf(s, " %5s %5s %10lu %10lu %10lu\n", "Total", "",
+ total_objs, total_used_objs, total_pages);
+
+ return 0;
+}
+
+static int zs_stats_size_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, zs_stats_size_show, inode->i_private);
+}
+
+static const struct file_operations zs_stat_size_ops = {
+ .open = zs_stats_size_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int zs_pool_stat_create(char *name, struct zs_pool *pool)
+{
+ struct dentry *entry;
+
+ if (!zs_stat_root)
+ return -ENODEV;
+
+ entry = debugfs_create_dir(name, zs_stat_root);
+ if (!entry) {
+ pr_warn("debugfs dir <%s> creation failed\n", name);
+ return -ENOMEM;
+ }
+ pool->stat_dentry = entry;
+
+ entry = debugfs_create_file("obj_in_classes", S_IFREG | S_IRUGO,
+ pool->stat_dentry, pool, &zs_stat_size_ops);
+ if (!entry) {
+ pr_warn("%s: debugfs file entry <%s> creation failed\n",
+ name, "obj_in_classes");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void zs_pool_stat_destroy(struct zs_pool *pool)
+{
+ debugfs_remove_recursive(pool->stat_dentry);
+}
+
+#else /* CONFIG_ZSMALLOC_STAT */
+
+static inline void zs_stat_inc(struct size_class *class,
+ enum zs_stat_type type, unsigned long cnt)
+{
+}
+
+static inline void zs_stat_dec(struct size_class *class,
+ enum zs_stat_type type, unsigned long cnt)
+{
+}
+
+static inline unsigned long zs_stat_get(struct size_class *class,
+ enum zs_stat_type type)
+{
+ return 0;
+}
+
+static int __init zs_stat_init(void)
+{
+ return 0;
+}
+
+static void __exit zs_stat_exit(void)
+{
+}
+
+static inline int zs_pool_stat_create(char *name, struct zs_pool *pool)
+{
+ return 0;
+}
+
+static inline void zs_pool_stat_destroy(struct zs_pool *pool)
+{
+}
+
+#endif
+
unsigned long zs_get_total_pages(struct zs_pool *pool)
{
return atomic_long_read(&pool->pages_allocated);
@@ -1074,7 +1261,10 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size)
set_zspage_mapping(first_page, class->index, ZS_EMPTY);
atomic_long_add(class->pages_per_zspage,
&pool->pages_allocated);
+
spin_lock(&class->lock);
+ zs_stat_inc(class, OBJ_ALLOCATED, get_maxobj_per_zspage(
+ class->size, class->pages_per_zspage));
}
obj = (unsigned long)first_page->freelist;
@@ -1088,6 +1278,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size)
kunmap_atomic(vaddr);
first_page->inuse++;
+ zs_stat_inc(class, OBJ_USED, 1);
/* Now move the zspage to another fullness group, if required */
fix_fullness_group(pool, first_page);
spin_unlock(&class->lock);
@@ -1128,6 +1319,12 @@ void zs_free(struct zs_pool *pool, unsigned long obj)
first_page->inuse--;
fullness = fix_fullness_group(pool, first_page);
+
+ zs_stat_dec(class, OBJ_USED, 1);
+ if (fullness == ZS_EMPTY)
+ zs_stat_dec(class, OBJ_ALLOCATED, get_maxobj_per_zspage(
+ class->size, class->pages_per_zspage));
+
spin_unlock(&class->lock);
if (fullness == ZS_EMPTY) {
@@ -1148,7 +1345,7 @@ EXPORT_SYMBOL_GPL(zs_free);
* On success, a pointer to the newly created pool is returned,
* otherwise NULL.
*/
-struct zs_pool *zs_create_pool(gfp_t flags)
+struct zs_pool *zs_create_pool(char *name, gfp_t flags)
{
int i;
struct zs_pool *pool;
@@ -1158,9 +1355,16 @@ struct zs_pool *zs_create_pool(gfp_t flags)
if (!pool)
return NULL;
+ pool->name = kstrdup(name, GFP_KERNEL);
+ if (!pool->name) {
+ kfree(pool);
+ return NULL;
+ }
+
pool->size_class = kcalloc(zs_size_classes, sizeof(struct size_class *),
GFP_KERNEL);
if (!pool->size_class) {
+ kfree(pool->name);
kfree(pool);
return NULL;
}
@@ -1210,6 +1414,9 @@ struct zs_pool *zs_create_pool(gfp_t flags)
pool->flags = flags;
+ if (zs_pool_stat_create(name, pool))
+ goto err;
+
return pool;
err:
@@ -1222,6 +1429,8 @@ void zs_destroy_pool(struct zs_pool *pool)
{
int i;
+ zs_pool_stat_destroy(pool);
+
for (i = 0; i < zs_size_classes; i++) {
int fg;
struct size_class *class = pool->size_class[i];
@@ -1242,6 +1451,7 @@ void zs_destroy_pool(struct zs_pool *pool)
}
kfree(pool->size_class);
+ kfree(pool->name);
kfree(pool);
}
EXPORT_SYMBOL_GPL(zs_destroy_pool);
@@ -1250,17 +1460,30 @@ static int __init zs_init(void)
{
int ret = zs_register_cpu_notifier();
- if (ret) {
- zs_unregister_cpu_notifier();
- return ret;
- }
+ if (ret)
+ goto notifier_fail;
init_zs_size_classes();
#ifdef CONFIG_ZPOOL
zpool_register_driver(&zs_zpool_driver);
#endif
+
+ ret = zs_stat_init();
+ if (ret) {
+ pr_err("zs stat initialization failed\n");
+ goto stat_fail;
+ }
return 0;
+
+stat_fail:
+#ifdef CONFIG_ZPOOL
+ zpool_unregister_driver(&zs_zpool_driver);
+#endif
+notifier_fail:
+ zs_unregister_cpu_notifier();
+
+ return ret;
}
static void __exit zs_exit(void)
@@ -1269,6 +1492,8 @@ static void __exit zs_exit(void)
zpool_unregister_driver(&zs_zpool_driver);
#endif
zs_unregister_cpu_notifier();
+
+ zs_stat_exit();
}
module_init(zs_init);
diff --git a/mm/zswap.c b/mm/zswap.c
index 0cfce9bc51e4..4249e82ff934 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -906,11 +906,12 @@ static int __init init_zswap(void)
pr_info("loading zswap\n");
- zswap_pool = zpool_create_pool(zswap_zpool_type, gfp, &zswap_zpool_ops);
+ zswap_pool = zpool_create_pool(zswap_zpool_type, "zswap", gfp,
+ &zswap_zpool_ops);
if (!zswap_pool && strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) {
pr_info("%s zpool not available\n", zswap_zpool_type);
zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
- zswap_pool = zpool_create_pool(zswap_zpool_type, gfp,
+ zswap_pool = zpool_create_pool(zswap_zpool_type, "zswap", gfp,
&zswap_zpool_ops);
}
if (!zswap_pool) {
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index f0bb6d60c07b..6afc24ba77a6 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -278,6 +278,7 @@ our $Attribute = qr{
__noreturn|
__used|
__cold|
+ __pure|
__noclone|
__deprecated|
__read_mostly|
@@ -298,6 +299,7 @@ our $Binary = qr{(?i)0b[01]+$Int_type?};
our $Hex = qr{(?i)0x[0-9a-f]+$Int_type?};
our $Int = qr{[0-9]+$Int_type?};
our $Octal = qr{0[0-7]+$Int_type?};
+our $String = qr{"[X\t]*"};
our $Float_hex = qr{(?i)0x[0-9a-f]+p-?[0-9]+[fl]?};
our $Float_dec = qr{(?i)(?:[0-9]+\.[0-9]*|[0-9]*\.[0-9]+)(?:e-?[0-9]+)?[fl]?};
our $Float_int = qr{(?i)[0-9]+e-?[0-9]+[fl]?};
@@ -517,7 +519,7 @@ our $Typecast = qr{\s*(\(\s*$NonptrType\s*\)){0,1}\s*};
our $balanced_parens = qr/(\((?:[^\(\)]++|(?-1))*\))/;
our $LvalOrFunc = qr{((?:[\&\*]\s*)?$Lval)\s*($balanced_parens{0,1})\s*};
-our $FuncArg = qr{$Typecast{0,1}($LvalOrFunc|$Constant)};
+our $FuncArg = qr{$Typecast{0,1}($LvalOrFunc|$Constant|$String)};
our $declaration_macros = qr{(?x:
(?:$Storage\s+)?(?:[A-Z_][A-Z0-9]*_){0,2}(?:DEFINE|DECLARE)(?:_[A-Z0-9]+){1,2}\s*\(|
@@ -2355,6 +2357,13 @@ sub process {
"Use of CONFIG_EXPERIMENTAL is deprecated. For alternatives, see https://lkml.org/lkml/2012/10/23/580\n");
}
+# discourage the use of boolean for type definition attributes of Kconfig options
+ if ($realfile =~ /Kconfig/ &&
+ $line =~ /^\+\s*\bboolean\b/) {
+ WARN("CONFIG_TYPE_BOOLEAN",
+ "Use of boolean is deprecated, please use bool instead.\n" . $herecurr);
+ }
+
if (($realfile =~ /Makefile.*/ || $realfile =~ /Kbuild.*/) &&
($line =~ /\+(EXTRA_[A-Z]+FLAGS).*/)) {
my $flag = $1;
@@ -4545,7 +4554,7 @@ sub process {
}
# check for logging functions with KERN_<LEVEL>
- if ($line !~ /printk\s*\(/ &&
+ if ($line !~ /printk(?:_ratelimited|_once)?\s*\(/ &&
$line =~ /\b$logFunctions\s*\(.*\b(KERN_[A-Z]+)\b/) {
my $level = $1;
if (WARN("UNNECESSARY_KERN_LEVEL",
@@ -5089,6 +5098,12 @@ sub process {
}
}
+# check for uses of __DATE__, __TIME__, __TIMESTAMP__
+ while ($line =~ /\b(__(?:DATE|TIME|TIMESTAMP)__)\b/g) {
+ ERROR("DATE_TIME",
+ "Use of the '$1' macro makes the build non-deterministic\n" . $herecurr);
+ }
+
# check for use of yield()
if ($line =~ /\byield\s*\(\s*\)/) {
WARN("YIELD",
@@ -5255,6 +5270,9 @@ sub process {
length($val) ne 4)) {
ERROR("NON_OCTAL_PERMISSIONS",
"Use 4 digit octal (0777) not decimal permissions\n" . $herecurr);
+ } elsif ($val =~ /^$Octal$/ && (oct($val) & 02)) {
+ ERROR("EXPORTED_WORLD_WRITABLE",
+ "Exporting writable files is usually an error. Consider more restrictive permissions.\n" . $herecurr);
}
}
}
diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c
index 264fbc297e0b..8bdf16b8ba60 100644
--- a/tools/vm/page-types.c
+++ b/tools/vm/page-types.c
@@ -133,6 +133,7 @@ static const char * const page_flag_names[] = {
[KPF_KSM] = "x:ksm",
[KPF_THP] = "t:thp",
[KPF_BALLOON] = "o:balloon",
+ [KPF_ZERO_PAGE] = "z:zero_page",
[KPF_RESERVED] = "r:reserved",
[KPF_MLOCKED] = "m:mlocked",