diff options
265 files changed, 9536 insertions, 4845 deletions
diff --git a/Documentation/ABI/obsolete/sysfs-block-zram b/Documentation/ABI/obsolete/sysfs-block-zram deleted file mode 100644 index 720ea92cfb2e..000000000000 --- a/Documentation/ABI/obsolete/sysfs-block-zram +++ /dev/null @@ -1,119 +0,0 @@ -What: /sys/block/zram<id>/num_reads -Date: August 2015 -Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> -Description: - The num_reads file is read-only and specifies the number of - reads (failed or successful) done on this device. - Now accessible via zram<id>/stat node. - -What: /sys/block/zram<id>/num_writes -Date: August 2015 -Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> -Description: - The num_writes file is read-only and specifies the number of - writes (failed or successful) done on this device. - Now accessible via zram<id>/stat node. - -What: /sys/block/zram<id>/invalid_io -Date: August 2015 -Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> -Description: - The invalid_io file is read-only and specifies the number of - non-page-size-aligned I/O requests issued to this device. - Now accessible via zram<id>/io_stat node. - -What: /sys/block/zram<id>/failed_reads -Date: August 2015 -Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> -Description: - The failed_reads file is read-only and specifies the number of - failed reads happened on this device. - Now accessible via zram<id>/io_stat node. - -What: /sys/block/zram<id>/failed_writes -Date: August 2015 -Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> -Description: - The failed_writes file is read-only and specifies the number of - failed writes happened on this device. - Now accessible via zram<id>/io_stat node. - -What: /sys/block/zram<id>/notify_free -Date: August 2015 -Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> -Description: - The notify_free file is read-only. Depending on device usage - scenario it may account a) the number of pages freed because - of swap slot free notifications or b) the number of pages freed - because of REQ_DISCARD requests sent by bio. The former ones - are sent to a swap block device when a swap slot is freed, which - implies that this disk is being used as a swap disk. The latter - ones are sent by filesystem mounted with discard option, - whenever some data blocks are getting discarded. - Now accessible via zram<id>/io_stat node. - -What: /sys/block/zram<id>/zero_pages -Date: August 2015 -Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> -Description: - The zero_pages file is read-only and specifies number of zero - filled pages written to this disk. No memory is allocated for - such pages. - Now accessible via zram<id>/mm_stat node. - -What: /sys/block/zram<id>/orig_data_size -Date: August 2015 -Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> -Description: - The orig_data_size file is read-only and specifies uncompressed - size of data stored in this disk. This excludes zero-filled - pages (zero_pages) since no memory is allocated for them. - Unit: bytes - Now accessible via zram<id>/mm_stat node. - -What: /sys/block/zram<id>/compr_data_size -Date: August 2015 -Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> -Description: - The compr_data_size file is read-only and specifies compressed - size of data stored in this disk. So, compression ratio can be - calculated using orig_data_size and this statistic. - Unit: bytes - Now accessible via zram<id>/mm_stat node. - -What: /sys/block/zram<id>/mem_used_total -Date: August 2015 -Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> -Description: - The mem_used_total file is read-only and specifies the amount - of memory, including allocator fragmentation and metadata - overhead, allocated for this disk. So, allocator space - efficiency can be calculated using compr_data_size and this - statistic. - Unit: bytes - Now accessible via zram<id>/mm_stat node. - -What: /sys/block/zram<id>/mem_used_max -Date: August 2015 -Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> -Description: - The mem_used_max file is read/write and specifies the amount - of maximum memory zram have consumed to store compressed data. - For resetting the value, you should write "0". Otherwise, - you could see -EINVAL. - Unit: bytes - Downgraded to write-only node: so it's possible to set new - value only; its current value is stored in zram<id>/mm_stat - node. - -What: /sys/block/zram<id>/mem_limit -Date: August 2015 -Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> -Description: - The mem_limit file is read/write and specifies the maximum - amount of memory ZRAM can use to store the compressed data. - The limit could be changed in run time and "0" means disable - the limit. No limit is the initial state. Unit: bytes - Downgraded to write-only node: so it's possible to set new - value only; its current value is stored in zram<id>/mm_stat - node. diff --git a/Documentation/ABI/testing/sysfs-block-zram b/Documentation/ABI/testing/sysfs-block-zram index 4518d30b8c2e..451b6d882b2c 100644 --- a/Documentation/ABI/testing/sysfs-block-zram +++ b/Documentation/ABI/testing/sysfs-block-zram @@ -22,41 +22,6 @@ Description: device. The reset operation frees all the memory associated with this device. -What: /sys/block/zram<id>/num_reads -Date: August 2010 -Contact: Nitin Gupta <ngupta@vflare.org> -Description: - The num_reads file is read-only and specifies the number of - reads (failed or successful) done on this device. - -What: /sys/block/zram<id>/num_writes -Date: August 2010 -Contact: Nitin Gupta <ngupta@vflare.org> -Description: - The num_writes file is read-only and specifies the number of - writes (failed or successful) done on this device. - -What: /sys/block/zram<id>/invalid_io -Date: August 2010 -Contact: Nitin Gupta <ngupta@vflare.org> -Description: - The invalid_io file is read-only and specifies the number of - non-page-size-aligned I/O requests issued to this device. - -What: /sys/block/zram<id>/failed_reads -Date: February 2014 -Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> -Description: - The failed_reads file is read-only and specifies the number of - failed reads happened on this device. - -What: /sys/block/zram<id>/failed_writes -Date: February 2014 -Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> -Description: - The failed_writes file is read-only and specifies the number of - failed writes happened on this device. - What: /sys/block/zram<id>/max_comp_streams Date: February 2014 Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> @@ -73,74 +38,24 @@ Description: available and selected compression algorithms, change compression algorithm selection. -What: /sys/block/zram<id>/notify_free -Date: August 2010 -Contact: Nitin Gupta <ngupta@vflare.org> -Description: - The notify_free file is read-only. Depending on device usage - scenario it may account a) the number of pages freed because - of swap slot free notifications or b) the number of pages freed - because of REQ_DISCARD requests sent by bio. The former ones - are sent to a swap block device when a swap slot is freed, which - implies that this disk is being used as a swap disk. The latter - ones are sent by filesystem mounted with discard option, - whenever some data blocks are getting discarded. - -What: /sys/block/zram<id>/zero_pages -Date: August 2010 -Contact: Nitin Gupta <ngupta@vflare.org> -Description: - The zero_pages file is read-only and specifies number of zero - filled pages written to this disk. No memory is allocated for - such pages. - -What: /sys/block/zram<id>/orig_data_size -Date: August 2010 -Contact: Nitin Gupta <ngupta@vflare.org> -Description: - The orig_data_size file is read-only and specifies uncompressed - size of data stored in this disk. This excludes zero-filled - pages (zero_pages) since no memory is allocated for them. - Unit: bytes - -What: /sys/block/zram<id>/compr_data_size -Date: August 2010 -Contact: Nitin Gupta <ngupta@vflare.org> -Description: - The compr_data_size file is read-only and specifies compressed - size of data stored in this disk. So, compression ratio can be - calculated using orig_data_size and this statistic. - Unit: bytes - -What: /sys/block/zram<id>/mem_used_total -Date: August 2010 -Contact: Nitin Gupta <ngupta@vflare.org> -Description: - The mem_used_total file is read-only and specifies the amount - of memory, including allocator fragmentation and metadata - overhead, allocated for this disk. So, allocator space - efficiency can be calculated using compr_data_size and this - statistic. - Unit: bytes - What: /sys/block/zram<id>/mem_used_max Date: August 2014 Contact: Minchan Kim <minchan@kernel.org> Description: - The mem_used_max file is read/write and specifies the amount - of maximum memory zram have consumed to store compressed data. - For resetting the value, you should write "0". Otherwise, - you could see -EINVAL. + The mem_used_max file is write-only and is used to reset + the counter of maximum memory zram have consumed to store + compressed data. For resetting the value, you should write + "0". Otherwise, you could see -EINVAL. Unit: bytes What: /sys/block/zram<id>/mem_limit Date: August 2014 Contact: Minchan Kim <minchan@kernel.org> Description: - The mem_limit file is read/write and specifies the maximum - amount of memory ZRAM can use to store the compressed data. The - limit could be changed in run time and "0" means disable the - limit. No limit is the initial state. Unit: bytes + The mem_limit file is write-only and specifies the maximum + amount of memory ZRAM can use to store the compressed data. + The limit could be changed in run time and "0" means disable + the limit. No limit is the initial state. Unit: bytes What: /sys/block/zram<id>/compact Date: August 2015 diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt index 0535ae1f73e5..1c0c08d9206b 100644 --- a/Documentation/blockdev/zram.txt +++ b/Documentation/blockdev/zram.txt @@ -161,42 +161,14 @@ Name access description disksize RW show and set the device's disk size initstate RO shows the initialization state of the device reset WO trigger device reset -num_reads RO the number of reads -failed_reads RO the number of failed reads -num_write RO the number of writes -failed_writes RO the number of failed writes -invalid_io RO the number of non-page-size-aligned I/O requests +mem_used_max WO reset the `mem_used_max' counter (see later) +mem_limit WO specifies the maximum amount of memory ZRAM can use + to store the compressed data max_comp_streams RW the number of possible concurrent compress operations comp_algorithm RW show and change the compression algorithm -notify_free RO the number of notifications to free pages (either - slot free notifications or REQ_DISCARD requests) -zero_pages RO the number of zero filled pages written to this disk -orig_data_size RO uncompressed size of data stored in this disk -compr_data_size RO compressed size of data stored in this disk -mem_used_total RO the amount of memory allocated for this disk -mem_used_max RW the maximum amount of memory zram have consumed to - store the data (to reset this counter to the actual - current value, write 1 to this attribute) -mem_limit RW the maximum amount of memory ZRAM can use to store - the compressed data -pages_compacted RO the number of pages freed during compaction - (available only via zram<id>/mm_stat node) compact WO trigger memory compaction debug_stat RO this file is used for zram debugging purposes -WARNING -======= -per-stat sysfs attributes are considered to be deprecated. -The basic strategy is: --- the existing RW nodes will be downgraded to WO nodes (in linux 4.11) --- deprecated RO sysfs nodes will eventually be removed (in linux 4.11) - -The list of deprecated attributes can be found here: -Documentation/ABI/obsolete/sysfs-block-zram - -Basically, every attribute that has its own read accessible sysfs node -(e.g. num_reads) *AND* is accessible via one of the stat files (zram<id>/stat -or zram<id>/io_stat or zram<id>/mm_stat) is considered to be deprecated. User space is advised to use the following files to read the device statistics. @@ -211,22 +183,40 @@ The stat file represents device's I/O statistics not accounted by block layer and, thus, not available in zram<id>/stat file. It consists of a single line of text and contains the following stats separated by whitespace: - failed_reads - failed_writes - invalid_io - notify_free + failed_reads the number of failed reads + failed_writes the number of failed writes + invalid_io the number of non-page-size-aligned I/O requests + notify_free Depending on device usage scenario it may account + a) the number of pages freed because of swap slot free + notifications or b) the number of pages freed because of + REQ_DISCARD requests sent by bio. The former ones are + sent to a swap block device when a swap slot is freed, + which implies that this disk is being used as a swap disk. + The latter ones are sent by filesystem mounted with + discard option, whenever some data blocks are getting + discarded. File /sys/block/zram<id>/mm_stat The stat file represents device's mm statistics. It consists of a single line of text and contains the following stats separated by whitespace: - orig_data_size - compr_data_size - mem_used_total - mem_limit - mem_used_max - zero_pages - num_migrated + orig_data_size uncompressed size of data stored in this disk. + This excludes zero-filled pages (zero_pages) since no + memory is allocated for them. + Unit: bytes + compr_data_size compressed size of data stored in this disk + mem_used_total the amount of memory allocated for this disk. This + includes allocator fragmentation and metadata overhead, + allocated for this disk. So, allocator space efficiency + can be calculated using compr_data_size and this statistic. + Unit: bytes + mem_limit the maximum amount of memory ZRAM can use to store + the compressed data + mem_used_max the maximum amount of memory zram have consumed to + store the data + zero_pages the number of zero filled pages written to this disk. + No memory is allocated for such pages. + pages_compacted the number of pages freed during compaction 9) Deactivate: swapoff /dev/zram0 diff --git a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl index 8f961ef2b457..ba976805853a 100644 --- a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl +++ b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl @@ -112,8 +112,8 @@ my $regex_direct_end_default = 'nr_reclaimed=([0-9]*)'; my $regex_kswapd_wake_default = 'nid=([0-9]*) order=([0-9]*)'; my $regex_kswapd_sleep_default = 'nid=([0-9]*)'; my $regex_wakeup_kswapd_default = 'nid=([0-9]*) zid=([0-9]*) order=([0-9]*)'; -my $regex_lru_isolate_default = 'isolate_mode=([0-9]*) order=([0-9]*) nr_requested=([0-9]*) nr_scanned=([0-9]*) nr_taken=([0-9]*) file=([0-9]*)'; -my $regex_lru_shrink_inactive_default = 'nid=([0-9]*) zid=([0-9]*) nr_scanned=([0-9]*) nr_reclaimed=([0-9]*) priority=([0-9]*) flags=([A-Z_|]*)'; +my $regex_lru_isolate_default = 'isolate_mode=([0-9]*) classzone_idx=([0-9]*) order=([0-9]*) nr_requested=([0-9]*) nr_scanned=([0-9]*) nr_skipped=([0-9]*) nr_taken=([0-9]*) lru=([a-z_]*)'; +my $regex_lru_shrink_inactive_default = 'nid=([0-9]*) nr_scanned=([0-9]*) nr_reclaimed=([0-9]*) nr_dirty=([0-9]*) nr_writeback=([0-9]*) nr_congested=([0-9]*) nr_immediate=([0-9]*) nr_activate=([0-9]*) nr_ref_keep=([0-9]*) nr_unmap_fail=([0-9]*) priority=([0-9]*) flags=([A-Z_|]*)'; my $regex_lru_shrink_active_default = 'lru=([A-Z_]*) nr_scanned=([0-9]*) nr_rotated=([0-9]*) priority=([0-9]*)'; my $regex_writepage_default = 'page=([0-9a-f]*) pfn=([0-9]*) flags=([A-Z_|]*)'; @@ -205,15 +205,15 @@ $regex_wakeup_kswapd = generate_traceevent_regex( $regex_lru_isolate = generate_traceevent_regex( "vmscan/mm_vmscan_lru_isolate", $regex_lru_isolate_default, - "isolate_mode", "order", - "nr_requested", "nr_scanned", "nr_taken", - "file"); + "isolate_mode", "classzone_idx", "order", + "nr_requested", "nr_scanned", "nr_skipped", "nr_taken", + "lru"); $regex_lru_shrink_inactive = generate_traceevent_regex( "vmscan/mm_vmscan_lru_shrink_inactive", $regex_lru_shrink_inactive_default, - "nid", "zid", - "nr_scanned", "nr_reclaimed", "priority", - "flags"); + "nid", "nr_scanned", "nr_reclaimed", "nr_dirty", "nr_writeback", + "nr_congested", "nr_immediate", "nr_activate", "nr_ref_keep", + "nr_unmap_fail", "priority", "flags"); $regex_lru_shrink_active = generate_traceevent_regex( "vmscan/mm_vmscan_lru_shrink_active", $regex_lru_shrink_active_default, @@ -381,8 +381,8 @@ EVENT_PROCESS: next; } my $isolate_mode = $1; - my $nr_scanned = $4; - my $file = $6; + my $nr_scanned = $5; + my $file = $8; # To closer match vmstat scanning statistics, only count isolate_both # and isolate_inactive as scanning. isolate_active is rotation @@ -391,7 +391,7 @@ EVENT_PROCESS: # isolate_both == 3 if ($isolate_mode != 2) { $perprocesspid{$process_pid}->{HIGH_NR_SCANNED} += $nr_scanned; - if ($file == 1) { + if ($file =~ /_file/) { $perprocesspid{$process_pid}->{HIGH_NR_FILE_SCANNED} += $nr_scanned; } else { $perprocesspid{$process_pid}->{HIGH_NR_ANON_SCANNED} += $nr_scanned; @@ -406,8 +406,8 @@ EVENT_PROCESS: next; } - my $nr_reclaimed = $4; - my $flags = $6; + my $nr_reclaimed = $3; + my $flags = $12; my $file = 0; if ($flags =~ /RECLAIM_WB_FILE/) { $file = 1; diff --git a/Documentation/vm/ksm.txt b/Documentation/vm/ksm.txt index f34a8ee6f860..0c64a81d6808 100644 --- a/Documentation/vm/ksm.txt +++ b/Documentation/vm/ksm.txt @@ -80,6 +80,20 @@ run - set 0 to stop ksmd from running but keep merged pages, Default: 0 (must be changed to 1 to activate KSM, except if CONFIG_SYSFS is disabled) +use_zero_pages - specifies whether empty pages (i.e. allocated pages + that only contain zeroes) should be treated specially. + When set to 1, empty pages are merged with the kernel + zero page(s) instead of with each other as it would + happen normally. This can improve the performance on + architectures with coloured zero pages, depending on + the workload. Care should be taken when enabling this + setting, as it can potentially degrade the performance + of KSM for some workloads, for example if the checksums + of pages candidate for merging match the checksum of + an empty page. This setting can be changed at any time, + it is only effective for pages merged after the change. + Default: 0 (normal KSM behaviour as in earlier releases) + The effectiveness of KSM and MADV_MERGEABLE is shown in /sys/kernel/mm/ksm/: pages_shared - how many shared pages are being used diff --git a/Documentation/vm/transhuge.txt b/Documentation/vm/transhuge.txt index f2e739545e74..cd28d5ee5273 100644 --- a/Documentation/vm/transhuge.txt +++ b/Documentation/vm/transhuge.txt @@ -110,6 +110,7 @@ MADV_HUGEPAGE region. echo always >/sys/kernel/mm/transparent_hugepage/defrag echo defer >/sys/kernel/mm/transparent_hugepage/defrag +echo defer+madvise >/sys/kernel/mm/transparent_hugepage/defrag echo madvise >/sys/kernel/mm/transparent_hugepage/defrag echo never >/sys/kernel/mm/transparent_hugepage/defrag @@ -120,10 +121,15 @@ that benefit heavily from THP use and are willing to delay the VM start to utilise them. "defer" means that an application will wake kswapd in the background -to reclaim pages and wake kcompact to compact memory so that THP is +to reclaim pages and wake kcompactd to compact memory so that THP is available in the near future. It's the responsibility of khugepaged to then install the THP pages later. +"defer+madvise" will enter direct reclaim and compaction like "always", but +only for regions that have used madvise(MADV_HUGEPAGE); all other regions +will wake kswapd in the background to reclaim pages and wake kcompactd to +compact memory so that THP is available in the near future. + "madvise" will enter direct reclaim like "always" but only for regions that are have used madvise(MADV_HUGEPAGE). This is the default behaviour. diff --git a/MAINTAINERS b/MAINTAINERS index e90bfece8aef..d93513516599 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3924,10 +3924,13 @@ S: Maintained F: drivers/i2c/busses/i2c-diolan-u2c.c DIRECT ACCESS (DAX) -M: Matthew Wilcox <willy@linux.intel.com> +M: Matthew Wilcox <mawilcox@microsoft.com> +M: Ross Zwisler <ross.zwisler@linux.intel.com> L: linux-fsdevel@vger.kernel.org S: Supported F: fs/dax.c +F: include/linux/dax.h +F: include/trace/events/fs_dax.h DIRECTORY NOTIFICATION (DNOTIFY) M: Eric Paris <eparis@parisplace.org> diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h index 4355f0ec44d6..f98baaec0a15 100644 --- a/arch/arm/include/asm/page.h +++ b/arch/arm/include/asm/page.h @@ -17,6 +17,8 @@ #ifndef __ASSEMBLY__ +#include <linux/personality.h> /* For READ_IMPLIES_EXEC */ + #ifndef CONFIG_MMU #include <asm/page-nommu.h> diff --git a/arch/frv/mb93090-mb00/pci-frv.c b/arch/frv/mb93090-mb00/pci-frv.c index 34bb4b13e079..c452ddb5620f 100644 --- a/arch/frv/mb93090-mb00/pci-frv.c +++ b/arch/frv/mb93090-mb00/pci-frv.c @@ -147,7 +147,7 @@ static void __init pcibios_allocate_resources(int pass) static void __init pcibios_assign_resources(void) { struct pci_dev *dev = NULL; - int idx; + int idx, err; struct resource *r; for_each_pci_dev(dev) { @@ -172,8 +172,13 @@ static void __init pcibios_assign_resources(void) * the BIOS forgot to do so or because we have decided the old * address was unusable for some reason. */ - if (!r->start && r->end) - pci_assign_resource(dev, idx); + if (!r->start && r->end) { + err = pci_assign_resource(dev, idx); + if (err) + dev_err(&dev->dev, + "Failed to assign new address to %d\n", + idx); + } } } } diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index bb4610faca84..06cdaef54b2e 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -684,51 +684,3 @@ int arch_remove_memory(u64 start, u64 size) } #endif #endif - -/** - * show_mem - give short summary of memory stats - * - * Shows a simple page count of reserved and used pages in the system. - * For discontig machines, it does this on a per-pgdat basis. - */ -void show_mem(unsigned int filter) -{ - int total_reserved = 0; - unsigned long total_present = 0; - pg_data_t *pgdat; - - printk(KERN_INFO "Mem-info:\n"); - show_free_areas(filter); - printk(KERN_INFO "Node memory in pages:\n"); - for_each_online_pgdat(pgdat) { - unsigned long present; - unsigned long flags; - int reserved = 0; - int nid = pgdat->node_id; - int zoneid; - - if (skip_free_areas_node(filter, nid)) - continue; - pgdat_resize_lock(pgdat, &flags); - - for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) { - struct zone *zone = &pgdat->node_zones[zoneid]; - if (!populated_zone(zone)) - continue; - - reserved += zone->present_pages - zone->managed_pages; - } - present = pgdat->node_present_pages; - - pgdat_resize_unlock(pgdat, &flags); - total_present += present; - total_reserved += reserved; - printk(KERN_INFO "Node %4d: RAM: %11ld, rsvd: %8d, ", - nid, present, reserved); - } - printk(KERN_INFO "%ld pages of RAM\n", total_present); - printk(KERN_INFO "%d reserved pages\n", total_reserved); - printk(KERN_INFO "Total of %ld pages in page table cache\n", - quicklist_total_size()); - printk(KERN_INFO "%ld free buffer pages\n", nr_free_buffer_pages()); -} diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild index 860e440611c9..6e69d2c42eee 100644 --- a/arch/m32r/include/asm/Kbuild +++ b/arch/m32r/include/asm/Kbuild @@ -1,6 +1,7 @@ generic-y += clkdev.h generic-y += cputime.h +generic-y += current.h generic-y += exec.h generic-y += irq_work.h generic-y += kvm_para.h diff --git a/arch/m32r/include/asm/cmpxchg.h b/arch/m32r/include/asm/cmpxchg.h index 14bf9b739dd2..23c9d0537201 100644 --- a/arch/m32r/include/asm/cmpxchg.h +++ b/arch/m32r/include/asm/cmpxchg.h @@ -64,8 +64,10 @@ __xchg(unsigned long x, volatile void *ptr, int size) return (tmp); } -#define xchg(ptr, x) \ - ((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))) +#define xchg(ptr, x) ({ \ + ((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), \ + sizeof(*(ptr)))); \ +}) static __always_inline unsigned long __xchg_local(unsigned long x, volatile void *ptr, int size) @@ -187,9 +189,12 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size) return old; } -#define cmpxchg(ptr, o, n) \ - ((__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)(o), \ - (unsigned long)(n), sizeof(*(ptr)))) +#define cmpxchg(ptr, o, n) ({ \ + ((__typeof__(*(ptr))) \ + __cmpxchg((ptr), (unsigned long)(o), \ + (unsigned long)(n), \ + sizeof(*(ptr)))); \ +}) #include <asm-generic/cmpxchg-local.h> diff --git a/arch/m32r/include/asm/current.h b/arch/m32r/include/asm/current.h deleted file mode 100644 index 7859d864f2c2..000000000000 --- a/arch/m32r/include/asm/current.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef _ASM_M32R_CURRENT_H -#define _ASM_M32R_CURRENT_H - -#include <linux/thread_info.h> - -struct task_struct; - -static __inline__ struct task_struct *get_current(void) -{ - return current_thread_info()->task; -} - -#define current (get_current()) - -#endif /* _ASM_M32R_CURRENT_H */ diff --git a/arch/m68k/68000/bootlogo-vz.h b/arch/m68k/68000/bootlogo-vz.h index b38e2b255142..6ff09beba1ba 100644 --- a/arch/m68k/68000/bootlogo-vz.h +++ b/arch/m68k/68000/bootlogo-vz.h @@ -1,6 +1,8 @@ +#include <linux/compiler.h> + #define splash_width 640 #define splash_height 480 -unsigned char __attribute__ ((aligned(16))) bootlogo_bits[] = { +unsigned char __aligned(16) bootlogo_bits[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, diff --git a/arch/m68k/68000/bootlogo.h b/arch/m68k/68000/bootlogo.h index b896c933fafc..c466db3ca3a8 100644 --- a/arch/m68k/68000/bootlogo.h +++ b/arch/m68k/68000/bootlogo.h @@ -1,6 +1,8 @@ +#include <linux/compiler.h> + #define bootlogo_width 160 #define bootlogo_height 160 -unsigned char __attribute__ ((aligned(16))) bootlogo_bits[] = { +unsigned char __aligned(16) bootlogo_bits[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x55, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x55, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, diff --git a/arch/m68k/include/asm/MC68328.h b/arch/m68k/include/asm/MC68328.h index 1a8080c4cc40..b61230e74e63 100644 --- a/arch/m68k/include/asm/MC68328.h +++ b/arch/m68k/include/asm/MC68328.h @@ -8,6 +8,7 @@ * Copyright (C) 1998 Kenneth Albanowski <kjahds@kjahds.com>, * */ +#include <linux/compiler.h> #ifndef _MC68328_H_ #define _MC68328_H_ @@ -993,7 +994,7 @@ typedef volatile struct { volatile unsigned short int pad1; volatile unsigned short int pad2; volatile unsigned short int pad3; -} __attribute__((packed)) m68328_uart; +} __packed m68328_uart; /********** diff --git a/arch/m68k/include/asm/MC68EZ328.h b/arch/m68k/include/asm/MC68EZ328.h index fedac87c5d13..703331ece328 100644 --- a/arch/m68k/include/asm/MC68EZ328.h +++ b/arch/m68k/include/asm/MC68EZ328.h @@ -9,6 +9,7 @@ * The Silver Hammer Group, Ltd. * */ +#include <linux/compiler.h> #ifndef _MC68EZ328_H_ #define _MC68EZ328_H_ @@ -815,7 +816,7 @@ typedef volatile struct { volatile unsigned short int nipr; volatile unsigned short int pad1; volatile unsigned short int pad2; -} __attribute__((packed)) m68328_uart; +} __packed m68328_uart; /********** diff --git a/arch/m68k/include/asm/MC68VZ328.h b/arch/m68k/include/asm/MC68VZ328.h index 34a51b2c784f..fbaed7ddfb41 100644 --- a/arch/m68k/include/asm/MC68VZ328.h +++ b/arch/m68k/include/asm/MC68VZ328.h @@ -909,7 +909,7 @@ typedef struct { volatile unsigned short int nipr; volatile unsigned short int hmark; volatile unsigned short int unused; -} __attribute__((packed)) m68328_uart; +} __packed m68328_uart; diff --git a/arch/m68k/include/asm/natfeat.h b/arch/m68k/include/asm/natfeat.h index a3521b80c3b9..1feceb31d564 100644 --- a/arch/m68k/include/asm/natfeat.h +++ b/arch/m68k/include/asm/natfeat.h @@ -6,6 +6,7 @@ * This software may be used and distributed according to the terms of * the GNU General Public License (GPL), incorporated herein by reference. */ +#include <linux/compiler.h> #ifndef _NATFEAT_H #define _NATFEAT_H @@ -17,6 +18,6 @@ void nf_init(void); void nf_shutdown(void); void nfprint(const char *fmt, ...) - __attribute__ ((format (printf, 1, 2))); + __print(1, 2); # endif /* _NATFEAT_H */ diff --git a/arch/m68k/lib/ashldi3.c b/arch/m68k/lib/ashldi3.c index 8dffd36ec4f2..ac08f8141390 100644 --- a/arch/m68k/lib/ashldi3.c +++ b/arch/m68k/lib/ashldi3.c @@ -18,10 +18,10 @@ GNU General Public License for more details. */ #define BITS_PER_UNIT 8 -typedef int SItype __attribute__ ((mode (SI))); -typedef unsigned int USItype __attribute__ ((mode (SI))); -typedef int DItype __attribute__ ((mode (DI))); -typedef int word_type __attribute__ ((mode (__word__))); +typedef int SItype __mode(SI); +typedef unsigned int USItype __mode(SI); +typedef int DItype __mode(DI); +typedef int word_type __mode(__word__); struct DIstruct {SItype high, low;}; diff --git a/arch/m68k/lib/ashrdi3.c b/arch/m68k/lib/ashrdi3.c index e6565a3ee2c3..5837b1dd3334 100644 --- a/arch/m68k/lib/ashrdi3.c +++ b/arch/m68k/lib/ashrdi3.c @@ -18,10 +18,10 @@ GNU General Public License for more details. */ #define BITS_PER_UNIT 8 -typedef int SItype __attribute__ ((mode (SI))); -typedef unsigned int USItype __attribute__ ((mode (SI))); -typedef int DItype __attribute__ ((mode (DI))); -typedef int word_type __attribute__ ((mode (__word__))); +typedef int SItype __mode(SI); +typedef unsigned int USItype __mode(SI); +typedef int DItype __mode(DI); +typedef int word_type __mode(__word__); struct DIstruct {SItype high, low;}; diff --git a/arch/m68k/lib/lshrdi3.c b/arch/m68k/lib/lshrdi3.c index 039779737c7d..7f40566be6c8 100644 --- a/arch/m68k/lib/lshrdi3.c +++ b/arch/m68k/lib/lshrdi3.c @@ -18,10 +18,10 @@ GNU General Public License for more details. */ #define BITS_PER_UNIT 8 -typedef int SItype __attribute__ ((mode (SI))); -typedef unsigned int USItype __attribute__ ((mode (SI))); -typedef int DItype __attribute__ ((mode (DI))); -typedef int word_type __attribute__ ((mode (__word__))); +typedef int SItype __mode(SI); +typedef unsigned int USItype __mode(SI); +typedef int DItype __mode(DI); +typedef int word_type __mode(__word__); struct DIstruct {SItype high, low;}; diff --git a/arch/m68k/lib/muldi3.c b/arch/m68k/lib/muldi3.c index 6459af5b2af0..3fb05c698c41 100644 --- a/arch/m68k/lib/muldi3.c +++ b/arch/m68k/lib/muldi3.c @@ -65,10 +65,10 @@ GNU General Public License for more details. */ umul_ppmm (__w.s.high, __w.s.low, u, v); \ __w.ll; }) -typedef int SItype __attribute__ ((mode (SI))); -typedef unsigned int USItype __attribute__ ((mode (SI))); -typedef int DItype __attribute__ ((mode (DI))); -typedef int word_type __attribute__ ((mode (__word__))); +typedef int SItype __mode(SI); +typedef unsigned int USItype __mode(SI); +typedef int DItype __mode(DI); +typedef int word_type __mode(__word__); struct DIstruct {SItype high, low;}; diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index e02ada312be8..64bfdf636f39 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -653,55 +653,6 @@ void __init mem_init(void) unsigned long *empty_zero_page __read_mostly; EXPORT_SYMBOL(empty_zero_page); -void show_mem(unsigned int filter) -{ - int total = 0,reserved = 0; - pg_data_t *pgdat; - - printk(KERN_INFO "Mem-info:\n"); - show_free_areas(filter); - - for_each_online_pgdat(pgdat) { - unsigned long flags; - int zoneid; - - pgdat_resize_lock(pgdat, &flags); - for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) { - struct zone *zone = &pgdat->node_zones[zoneid]; - if (!populated_zone(zone)) - continue; - - total += zone->present_pages; - reserved = zone->present_pages - zone->managed_pages; - } - pgdat_resize_unlock(pgdat, &flags); - } - - printk(KERN_INFO "%d pages of RAM\n", total); - printk(KERN_INFO "%d reserved pages\n", reserved); - -#ifdef CONFIG_DISCONTIGMEM - { - struct zonelist *zl; - int i, j; - - for (i = 0; i < npmem_ranges; i++) { - zl = node_zonelist(i, 0); - for (j = 0; j < MAX_NR_ZONES; j++) { - struct zoneref *z; - struct zone *zone; - - printk("Zone list for zone %d on node %d: ", j, i); - for_each_zone_zonelist(zone, z, zl, j) - printk("[%d/%s] ", zone_to_nid(zone), - zone->name); - printk("\n"); - } - } - } -#endif -} - /* * pagetable_init() sets up the page tables * diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 47120bf2670c..2a32483c7b6c 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -230,7 +230,9 @@ extern long long virt_phys_offset; * and needs to be executable. This means the whole heap ends * up being executable. */ -#define VM_DATA_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \ +#define VM_DATA_DEFAULT_FLAGS32 \ + (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ + VM_READ | VM_WRITE | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) #define VM_DATA_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \ diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index c379ff5a4438..d71f872c9414 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -102,9 +102,9 @@ static void release_spapr_tce_table(struct rcu_head *head) kfree(stt); } -static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int kvm_spapr_tce_fault(struct vm_fault *vmf) { - struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data; + struct kvmppc_spapr_tce_table *stt = vmf->vma->vm_file->private_data; struct page *page; if (vmf->pgoff >= kvmppc_tce_pages(stt->size)) diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index a35e2c29d7ee..e5ec1368f0cd 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -233,8 +233,9 @@ spufs_mem_write(struct file *file, const char __user *buffer, } static int -spufs_mem_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +spufs_mem_mmap_fault(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; struct spu_context *ctx = vma->vm_file->private_data; unsigned long pfn, offset; @@ -311,12 +312,11 @@ static const struct file_operations spufs_mem_fops = { .mmap = spufs_mem_mmap, }; -static int spufs_ps_fault(struct vm_area_struct *vma, - struct vm_fault *vmf, +static int spufs_ps_fault(struct vm_fault *vmf, unsigned long ps_offs, unsigned long ps_size) { - struct spu_context *ctx = vma->vm_file->private_data; + struct spu_context *ctx = vmf->vma->vm_file->private_data; unsigned long area, offset = vmf->pgoff << PAGE_SHIFT; int ret = 0; @@ -354,7 +354,7 @@ static int spufs_ps_fault(struct vm_area_struct *vma, down_read(¤t->mm->mmap_sem); } else { area = ctx->spu->problem_phys + ps_offs; - vm_insert_pfn(vma, vmf->address, (area + offset) >> PAGE_SHIFT); + vm_insert_pfn(vmf->vma, vmf->address, (area + offset) >> PAGE_SHIFT); spu_context_trace(spufs_ps_fault__insert, ctx, ctx->spu); } @@ -367,10 +367,9 @@ refault: } #if SPUFS_MMAP_4K -static int spufs_cntl_mmap_fault(struct vm_area_struct *vma, - struct vm_fault *vmf) +static int spufs_cntl_mmap_fault(struct vm_fault *vmf) { - return spufs_ps_fault(vma, vmf, 0x4000, SPUFS_CNTL_MAP_SIZE); + return spufs_ps_fault(vmf, 0x4000, SPUFS_CNTL_MAP_SIZE); } static const struct vm_operations_struct spufs_cntl_mmap_vmops = { @@ -1067,15 +1066,15 @@ static ssize_t spufs_signal1_write(struct file *file, const char __user *buf, } static int -spufs_signal1_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +spufs_signal1_mmap_fault(struct vm_fault *vmf) { #if SPUFS_SIGNAL_MAP_SIZE == 0x1000 - return spufs_ps_fault(vma, vmf, 0x14000, SPUFS_SIGNAL_MAP_SIZE); + return spufs_ps_fault(vmf, 0x14000, SPUFS_SIGNAL_MAP_SIZE); #elif SPUFS_SIGNAL_MAP_SIZE == 0x10000 /* For 64k pages, both signal1 and signal2 can be used to mmap the whole * signal 1 and 2 area */ - return spufs_ps_fault(vma, vmf, 0x10000, SPUFS_SIGNAL_MAP_SIZE); + return spufs_ps_fault(vmf, 0x10000, SPUFS_SIGNAL_MAP_SIZE); #else #error unsupported page size #endif @@ -1205,15 +1204,15 @@ static ssize_t spufs_signal2_write(struct file *file, const char __user *buf, #if SPUFS_MMAP_4K static int -spufs_signal2_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +spufs_signal2_mmap_fault(struct vm_fault *vmf) { #if SPUFS_SIGNAL_MAP_SIZE == 0x1000 - return spufs_ps_fault(vma, vmf, 0x1c000, SPUFS_SIGNAL_MAP_SIZE); + return spufs_ps_fault(vmf, 0x1c000, SPUFS_SIGNAL_MAP_SIZE); #elif SPUFS_SIGNAL_MAP_SIZE == 0x10000 /* For 64k pages, both signal1 and signal2 can be used to mmap the whole * signal 1 and 2 area */ - return spufs_ps_fault(vma, vmf, 0x10000, SPUFS_SIGNAL_MAP_SIZE); + return spufs_ps_fault(vmf, 0x10000, SPUFS_SIGNAL_MAP_SIZE); #else #error unsupported page size #endif @@ -1334,9 +1333,9 @@ DEFINE_SPUFS_ATTRIBUTE(spufs_signal2_type, spufs_signal2_type_get, #if SPUFS_MMAP_4K static int -spufs_mss_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +spufs_mss_mmap_fault(struct vm_fault *vmf) { - return spufs_ps_fault(vma, vmf, 0x0000, SPUFS_MSS_MAP_SIZE); + return spufs_ps_fault(vmf, 0x0000, SPUFS_MSS_MAP_SIZE); } static const struct vm_operations_struct spufs_mss_mmap_vmops = { @@ -1396,9 +1395,9 @@ static const struct file_operations spufs_mss_fops = { }; static int -spufs_psmap_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +spufs_psmap_mmap_fault(struct vm_fault *vmf) { - return spufs_ps_fault(vma, vmf, 0x0000, SPUFS_PS_MAP_SIZE); + return spufs_ps_fault(vmf, 0x0000, SPUFS_PS_MAP_SIZE); } static const struct vm_operations_struct spufs_psmap_mmap_vmops = { @@ -1456,9 +1455,9 @@ static const struct file_operations spufs_psmap_fops = { #if SPUFS_MMAP_4K static int -spufs_mfc_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +spufs_mfc_mmap_fault(struct vm_fault *vmf) { - return spufs_ps_fault(vma, vmf, 0x3000, SPUFS_MFC_MAP_SIZE); + return spufs_ps_fault(vmf, 0x3000, SPUFS_MFC_MAP_SIZE); } static const struct vm_operations_struct spufs_mfc_mmap_vmops = { diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 3f864c36d847..509456c0b08c 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -916,7 +916,7 @@ cmds(struct pt_regs *excp) memzcan(); break; case 'i': - show_mem(0); + show_mem(0, NULL); break; default: termch = cmd; diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index f9293bfefb7f..9c9440dc253a 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -31,6 +31,7 @@ static struct memblock_type oldmem_type = { .max = 1, .total_size = 0, .regions = &oldmem_region, + .name = "oldmem", }; struct save_area { diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index ec1f0dedb948..59ac93714fa4 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -687,7 +687,7 @@ void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to) /* Find vma in the parent mm */ vma = find_vma(gmap->mm, vmaddr); size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK)); - zap_page_range(vma, vmaddr, size, NULL); + zap_page_range(vma, vmaddr, size); } up_read(&gmap->mm->mmap_sem); } diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c index c4e65cb3280f..6f06058c5ae7 100644 --- a/arch/sparc/kernel/setup_32.c +++ b/arch/sparc/kernel/setup_32.c @@ -82,7 +82,7 @@ static void prom_sync_me(void) "nop\n\t" : : "r" (&trapbase)); prom_printf("PROM SYNC COMMAND...\n"); - show_free_areas(0); + show_free_areas(0, NULL); if (!is_idle_task(current)) { local_irq_enable(); sys_sync(); diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c index eb8287155279..c6afe98de4d9 100644 --- a/arch/sparc/mm/init_32.c +++ b/arch/sparc/mm/init_32.c @@ -55,17 +55,6 @@ extern unsigned int sparc_ramdisk_size; unsigned long highstart_pfn, highend_pfn; -void show_mem(unsigned int filter) -{ - printk("Mem-info:\n"); - show_free_areas(filter); - printk("Free swap: %6ldkB\n", - get_nr_swap_pages() << (PAGE_SHIFT-10)); - printk("%ld pages of RAM\n", totalram_pages); - printk("%ld free pages\n", nr_free_pages()); -} - - unsigned long last_valid_pfn; unsigned long calc_highpages(void) diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c index 7cc6ee7f1a58..492a7361e58e 100644 --- a/arch/tile/mm/pgtable.c +++ b/arch/tile/mm/pgtable.c @@ -36,51 +36,6 @@ #define K(x) ((x) << (PAGE_SHIFT-10)) -/* - * The normal show_free_areas() is too verbose on Tile, with dozens - * of processors and often four NUMA zones each with high and lowmem. - */ -void show_mem(unsigned int filter) -{ - struct zone *zone; - - pr_err("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu free:%lu\n slab:%lu mapped:%lu pagetables:%lu bounce:%lu pagecache:%lu swap:%lu\n", - (global_node_page_state(NR_ACTIVE_ANON) + - global_node_page_state(NR_ACTIVE_FILE)), - (global_node_page_state(NR_INACTIVE_ANON) + - global_node_page_state(NR_INACTIVE_FILE)), - global_node_page_state(NR_FILE_DIRTY), - global_node_page_state(NR_WRITEBACK), - global_node_page_state(NR_UNSTABLE_NFS), - global_page_state(NR_FREE_PAGES), - (global_page_state(NR_SLAB_RECLAIMABLE) + - global_page_state(NR_SLAB_UNRECLAIMABLE)), - global_node_page_state(NR_FILE_MAPPED), - global_page_state(NR_PAGETABLE), - global_page_state(NR_BOUNCE), - global_node_page_state(NR_FILE_PAGES), - get_nr_swap_pages()); - - for_each_zone(zone) { - unsigned long flags, order, total = 0, largest_order = -1; - - if (!populated_zone(zone)) - continue; - - spin_lock_irqsave(&zone->lock, flags); - for (order = 0; order < MAX_ORDER; order++) { - int nr = zone->free_area[order].nr_free; - total += nr << order; - if (nr) - largest_order = order; - } - spin_unlock_irqrestore(&zone->lock, flags); - pr_err("Node %d %7s: %lukB (largest %luKb)\n", - zone_to_nid(zone), zone->name, - K(total), largest_order ? K(1UL) << largest_order : 0); - } -} - /** * shatter_huge_page() - ensure a given address is mapped by a small page. * diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c index be2bde9b07cf..f4950fbfe574 100644 --- a/arch/unicore32/mm/init.c +++ b/arch/unicore32/mm/init.c @@ -57,50 +57,6 @@ early_param("initrd", early_initrd); */ struct meminfo meminfo; -void show_mem(unsigned int filter) -{ - int free = 0, total = 0, reserved = 0; - int shared = 0, cached = 0, slab = 0, i; - struct meminfo *mi = &meminfo; - - printk(KERN_DEFAULT "Mem-info:\n"); - show_free_areas(filter); - - for_each_bank(i, mi) { - struct membank *bank = &mi->bank[i]; - unsigned int pfn1, pfn2; - struct page *page, *end; - - pfn1 = bank_pfn_start(bank); - pfn2 = bank_pfn_end(bank); - - page = pfn_to_page(pfn1); - end = pfn_to_page(pfn2 - 1) + 1; - - do { - total++; - if (PageReserved(page)) - reserved++; - else if (PageSwapCache(page)) - cached++; - else if (PageSlab(page)) - slab++; - else if (!page_count(page)) - free++; - else - shared += page_count(page) - 1; - page++; - } while (page < end); - } - - printk(KERN_DEFAULT "%d pages of RAM\n", total); - printk(KERN_DEFAULT "%d free pages\n", free); - printk(KERN_DEFAULT "%d reserved pages\n", reserved); - printk(KERN_DEFAULT "%d slab pages\n", slab); - printk(KERN_DEFAULT "%d pages shared\n", shared); - printk(KERN_DEFAULT "%d pages swap cached\n", cached); -} - static void __init find_limits(unsigned long *min, unsigned long *max_low, unsigned long *max_high) { diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 307b1f4543de..2e3c34b1df37 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -338,6 +338,7 @@ void arch_crash_save_vmcoreinfo(void) vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE); + VMCOREINFO_PHYS_BASE(phys_base); } /* arch-dependent functionality related to kexec file-based syscall */ diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index af85b686a7b0..5fff9138c442 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -650,6 +650,17 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device) unsigned long nr_pages = size >> PAGE_SHIFT; int ret; + /* + * Only allow partial section hotplug for ZONE_DEVICE ranges, + * since register_new_memory() requires section alignment, and + * CONFIG_SPARSEMEM_VMEMMAP=n requires sections to be fully + * populated. + */ + if ((!IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP) || !for_device) + && ((start & ~PA_SECTION_MASK) + || (size & ~PA_SECTION_MASK))) + return -EINVAL; + init_memory_mapping(start, start + size); ret = __add_pages(nid, zone, start_pfn, nr_pages); @@ -679,7 +690,7 @@ static void __meminit free_pagetable(struct page *page, int order) if (PageReserved(page)) { __ClearPageReserved(page); - magic = (unsigned long)page->lru.next; + magic = (unsigned long)page->freelist; if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) { while (nr_pages--) put_page_bootmem(page++); @@ -1220,7 +1231,9 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) struct vmem_altmap *altmap = to_vmem_altmap(start); int err; - if (boot_cpu_has(X86_FEATURE_PSE)) + if (end - start < PAGES_PER_SECTION * sizeof(struct page)) + err = vmemmap_populate_basepages(start, end, node); + else if (boot_cpu_has(X86_FEATURE_PSE)) err = vmemmap_populate_hugepages(start, end, node, altmap); else if (altmap) { pr_err_once("%s: no cpu support for altmap allocations\n", diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index af59f808742f..aad4ac386f98 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -796,7 +796,7 @@ static noinline int zap_bt_entries_mapping(struct mm_struct *mm, return -EINVAL; len = min(vma->vm_end, end) - addr; - zap_page_range(vma, addr, len, NULL); + zap_page_range(vma, addr, len); trace_mpx_unmap_zap(addr, addr+len); vma = vma->vm_next; diff --git a/block/genhd.c b/block/genhd.c index fcd6d4fae657..a178c8e59492 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -878,7 +878,7 @@ static int show_partition(struct seq_file *seqf, void *v) char buf[BDEVNAME_SIZE]; /* Don't show non-partitionable removeable devices or empty devices */ - if (!get_capacity(sgp) || (!disk_max_parts(sgp) && + if (!get_capacity(sgp) || (!(disk_max_parts(sgp) > 1) && (sgp->flags & GENHD_FL_REMOVABLE))) return 0; if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO) diff --git a/crypto/lz4.c b/crypto/lz4.c index 99c1b2cc2976..40fd2c2d1479 100644 --- a/crypto/lz4.c +++ b/crypto/lz4.c @@ -66,15 +66,13 @@ static void lz4_exit(struct crypto_tfm *tfm) static int __lz4_compress_crypto(const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen, void *ctx) { - size_t tmp_len = *dlen; - int err; + int out_len = LZ4_compress_default(src, dst, + slen, (int)((size_t)dlen), ctx); - err = lz4_compress(src, slen, dst, &tmp_len, ctx); - - if (err < 0) + if (!out_len) return -EINVAL; - *dlen = tmp_len; + *dlen = out_len; return 0; } @@ -96,16 +94,13 @@ static int lz4_compress_crypto(struct crypto_tfm *tfm, const u8 *src, static int __lz4_decompress_crypto(const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen, void *ctx) { - int err; - size_t tmp_len = *dlen; - size_t __slen = slen; + int out_len = LZ4_decompress_safe(src, dst, slen, (int)((size_t)dlen)); - err = lz4_decompress_unknownoutputsize(src, __slen, dst, &tmp_len); - if (err < 0) + if (out_len < 0) return -EINVAL; - *dlen = tmp_len; - return err; + *dlen = out_len; + return out_len; } static int lz4_sdecompress(struct crypto_scomp *tfm, const u8 *src, diff --git a/crypto/lz4hc.c b/crypto/lz4hc.c index 75ffc4a3f786..6f16f96e69cc 100644 --- a/crypto/lz4hc.c +++ b/crypto/lz4hc.c @@ -65,15 +65,13 @@ static void lz4hc_exit(struct crypto_tfm *tfm) static int __lz4hc_compress_crypto(const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen, void *ctx) { - size_t tmp_len = *dlen; - int err; + int out_len = LZ4_compress_HC(src, dst, slen, + (int)((size_t)dlen), LZ4HC_DEFAULT_CLEVEL, ctx); - err = lz4hc_compress(src, slen, dst, &tmp_len, ctx); - - if (err < 0) + if (out_len == 0) return -EINVAL; - *dlen = tmp_len; + *dlen = out_len; return 0; } @@ -97,16 +95,13 @@ static int lz4hc_compress_crypto(struct crypto_tfm *tfm, const u8 *src, static int __lz4hc_decompress_crypto(const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen, void *ctx) { - int err; - size_t tmp_len = *dlen; - size_t __slen = slen; + int out_len = LZ4_decompress_safe(src, dst, slen, (int)((size_t)dlen)); - err = lz4_decompress_unknownoutputsize(src, __slen, dst, &tmp_len); - if (err < 0) + if (out_len < 0) return -EINVAL; - *dlen = tmp_len; - return err; + *dlen = out_len; + return out_len; } static int lz4hc_sdecompress(struct crypto_scomp *tfm, const u8 *src, diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 3c71b982bf2a..79d1727ae3eb 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -629,7 +629,7 @@ free_range: page = &proc->pages[(page_addr - proc->buffer) / PAGE_SIZE]; if (vma) zap_page_range(vma, (uintptr_t)page_addr + - proc->user_buffer_offset, PAGE_SIZE, NULL); + proc->user_buffer_offset, PAGE_SIZE); err_vm_insert_page_failed: unmap_kernel_range((unsigned long)page_addr, PAGE_SIZE); err_map_kernel_failed: @@ -2856,7 +2856,7 @@ static void binder_vma_close(struct vm_area_struct *vma) binder_defer_work(proc, BINDER_DEFERRED_PUT_FILES); } -static int binder_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int binder_vm_fault(struct vm_fault *vmf) { return VM_FAULT_SIGBUS; } diff --git a/drivers/base/memory.c b/drivers/base/memory.c index dacb6a8418aa..730d1cfcf8da 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -685,24 +685,16 @@ static int add_memory_block(int base_section_nr) return 0; } -static bool is_zone_device_section(struct mem_section *ms) -{ - struct page *page; - - page = sparse_decode_mem_map(ms->section_mem_map, __section_nr(ms)); - return is_zone_device_page(page); -} - /* * need an interface for the VM to add new memory regions, * but without onlining it. */ -int register_new_memory(int nid, struct mem_section *section) +int register_new_memory(struct zone *zone, int nid, struct mem_section *section) { int ret = 0; struct memory_block *mem; - if (is_zone_device_section(section)) + if (is_dev_zone(zone)) return 0; mutex_lock(&mem_sysfs_mutex); @@ -736,14 +728,11 @@ unregister_memory(struct memory_block *memory) device_unregister(&memory->dev); } -static int remove_memory_section(unsigned long node_id, - struct mem_section *section, int phys_device) +static int remove_memory_section(struct zone *zone, unsigned long node_id, + struct mem_section *section, int phys_device) { struct memory_block *mem; - if (is_zone_device_section(section)) - return 0; - mutex_lock(&mem_sysfs_mutex); mem = find_memory_block(section); unregister_mem_sect_under_nodes(mem, __section_nr(section)); @@ -758,12 +747,15 @@ static int remove_memory_section(unsigned long node_id, return 0; } -int unregister_memory_section(struct mem_section *section) +int unregister_memory_section(struct zone *zone, struct mem_section *section) { + if (is_dev_zone(zone)) + return 0; + if (!present_section(section)) return -EINVAL; - return remove_memory_section(0, section, 0); + return remove_memory_section(zone, 0, section, 0); } #endif /* CONFIG_MEMORY_HOTREMOVE */ diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index e5ab7d9e8c45..eaeff0696420 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -45,27 +45,6 @@ static const char *default_compressor = "lzo"; /* Module params (documentation at end) */ static unsigned int num_devices = 1; -static inline void deprecated_attr_warn(const char *name) -{ - pr_warn_once("%d (%s) Attribute %s (and others) will be removed. %s\n", - task_pid_nr(current), - current->comm, - name, - "See zram documentation."); -} - -#define ZRAM_ATTR_RO(name) \ -static ssize_t name##_show(struct device *d, \ - struct device_attribute *attr, char *b) \ -{ \ - struct zram *zram = dev_to_zram(d); \ - \ - deprecated_attr_warn(__stringify(name)); \ - return scnprintf(b, PAGE_SIZE, "%llu\n", \ - (u64)atomic64_read(&zram->stats.name)); \ -} \ -static DEVICE_ATTR_RO(name); - static inline bool init_done(struct zram *zram) { return zram->disksize; @@ -218,47 +197,6 @@ static ssize_t disksize_show(struct device *dev, return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize); } -static ssize_t orig_data_size_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - deprecated_attr_warn("orig_data_size"); - return scnprintf(buf, PAGE_SIZE, "%llu\n", - (u64)(atomic64_read(&zram->stats.pages_stored)) << PAGE_SHIFT); -} - -static ssize_t mem_used_total_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - u64 val = 0; - struct zram *zram = dev_to_zram(dev); - - deprecated_attr_warn("mem_used_total"); - down_read(&zram->init_lock); - if (init_done(zram)) { - struct zram_meta *meta = zram->meta; - val = zs_get_total_pages(meta->mem_pool); - } - up_read(&zram->init_lock); - - return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT); -} - -static ssize_t mem_limit_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - u64 val; - struct zram *zram = dev_to_zram(dev); - - deprecated_attr_warn("mem_limit"); - down_read(&zram->init_lock); - val = zram->limit_pages; - up_read(&zram->init_lock); - - return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT); -} - static ssize_t mem_limit_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { @@ -277,21 +215,6 @@ static ssize_t mem_limit_store(struct device *dev, return len; } -static ssize_t mem_used_max_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - u64 val = 0; - struct zram *zram = dev_to_zram(dev); - - deprecated_attr_warn("mem_used_max"); - down_read(&zram->init_lock); - if (init_done(zram)) - val = atomic_long_read(&zram->stats.max_used_pages); - up_read(&zram->init_lock); - - return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT); -} - static ssize_t mem_used_max_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { @@ -467,26 +390,6 @@ static ssize_t debug_stat_show(struct device *dev, static DEVICE_ATTR_RO(io_stat); static DEVICE_ATTR_RO(mm_stat); static DEVICE_ATTR_RO(debug_stat); -ZRAM_ATTR_RO(num_reads); -ZRAM_ATTR_RO(num_writes); -ZRAM_ATTR_RO(failed_reads); -ZRAM_ATTR_RO(failed_writes); -ZRAM_ATTR_RO(invalid_io); -ZRAM_ATTR_RO(notify_free); -ZRAM_ATTR_RO(zero_pages); -ZRAM_ATTR_RO(compr_data_size); - -static inline bool zram_meta_get(struct zram *zram) -{ - if (atomic_inc_not_zero(&zram->refcount)) - return true; - return false; -} - -static inline void zram_meta_put(struct zram *zram) -{ - atomic_dec(&zram->refcount); -} static void zram_meta_free(struct zram_meta *meta, u64 disksize) { @@ -944,22 +847,17 @@ static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio) { struct zram *zram = queue->queuedata; - if (unlikely(!zram_meta_get(zram))) - goto error; - blk_queue_split(queue, &bio, queue->bio_split); if (!valid_io_request(zram, bio->bi_iter.bi_sector, bio->bi_iter.bi_size)) { atomic64_inc(&zram->stats.invalid_io); - goto put_zram; + goto error; } __zram_make_request(zram, bio); - zram_meta_put(zram); return BLK_QC_T_NONE; -put_zram: - zram_meta_put(zram); + error: bio_io_error(bio); return BLK_QC_T_NONE; @@ -989,13 +887,11 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector, struct bio_vec bv; zram = bdev->bd_disk->private_data; - if (unlikely(!zram_meta_get(zram))) - goto out; if (!valid_io_request(zram, sector, PAGE_SIZE)) { atomic64_inc(&zram->stats.invalid_io); err = -EINVAL; - goto put_zram; + goto out; } index = sector >> SECTORS_PER_PAGE_SHIFT; @@ -1006,8 +902,6 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector, bv.bv_offset = 0; err = zram_bvec_rw(zram, &bv, index, offset, is_write); -put_zram: - zram_meta_put(zram); out: /* * If I/O fails, just return error(ie, non-zero) without @@ -1040,17 +934,6 @@ static void zram_reset_device(struct zram *zram) meta = zram->meta; comp = zram->comp; disksize = zram->disksize; - /* - * Refcount will go down to 0 eventually and r/w handler - * cannot handle further I/O so it will bail out by - * check zram_meta_get. - */ - zram_meta_put(zram); - /* - * We want to free zram_meta in process context to avoid - * deadlock between reclaim path and any other locks. - */ - wait_event(zram->io_done, atomic_read(&zram->refcount) == 0); /* Reset stats */ memset(&zram->stats, 0, sizeof(zram->stats)); @@ -1098,8 +981,6 @@ static ssize_t disksize_store(struct device *dev, goto out_destroy_comp; } - init_waitqueue_head(&zram->io_done); - atomic_set(&zram->refcount, 1); zram->meta = meta; zram->comp = comp; zram->disksize = disksize; @@ -1188,10 +1069,8 @@ static DEVICE_ATTR_WO(compact); static DEVICE_ATTR_RW(disksize); static DEVICE_ATTR_RO(initstate); static DEVICE_ATTR_WO(reset); -static DEVICE_ATTR_RO(orig_data_size); -static DEVICE_ATTR_RO(mem_used_total); -static DEVICE_ATTR_RW(mem_limit); -static DEVICE_ATTR_RW(mem_used_max); +static DEVICE_ATTR_WO(mem_limit); +static DEVICE_ATTR_WO(mem_used_max); static DEVICE_ATTR_RW(max_comp_streams); static DEVICE_ATTR_RW(comp_algorithm); @@ -1199,17 +1078,7 @@ static struct attribute *zram_disk_attrs[] = { &dev_attr_disksize.attr, &dev_attr_initstate.attr, &dev_attr_reset.attr, - &dev_attr_num_reads.attr, - &dev_attr_num_writes.attr, - &dev_attr_failed_reads.attr, - &dev_attr_failed_writes.attr, &dev_attr_compact.attr, - &dev_attr_invalid_io.attr, - &dev_attr_notify_free.attr, - &dev_attr_zero_pages.attr, - &dev_attr_orig_data_size.attr, - &dev_attr_compr_data_size.attr, - &dev_attr_mem_used_total.attr, &dev_attr_mem_limit.attr, &dev_attr_mem_used_max.attr, &dev_attr_max_comp_streams.attr, diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 74fcf10da374..2692554b7737 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -106,9 +106,6 @@ struct zram { unsigned long limit_pages; struct zram_stats stats; - atomic_t refcount; /* refcount for zram_meta */ - /* wait all IO under all of cpu are done */ - wait_queue_head_t io_done; /* * This is the limit on amount of *uncompressed* worth of data * we can store in a disk. diff --git a/drivers/char/agp/alpha-agp.c b/drivers/char/agp/alpha-agp.c index 737187865269..53fe633df1e8 100644 --- a/drivers/char/agp/alpha-agp.c +++ b/drivers/char/agp/alpha-agp.c @@ -11,15 +11,14 @@ #include "agp.h" -static int alpha_core_agp_vm_fault(struct vm_area_struct *vma, - struct vm_fault *vmf) +static int alpha_core_agp_vm_fault(struct vm_fault *vmf) { alpha_agp_info *agp = agp_bridge->dev_private_data; dma_addr_t dma_addr; unsigned long pa; struct page *page; - dma_addr = vmf->address - vma->vm_start + agp->aperture.bus_base; + dma_addr = vmf->address - vmf->vma->vm_start + agp->aperture.bus_base; pa = agp->ops->translate(agp, dma_addr); if (pa == (unsigned long)-EINVAL) diff --git a/drivers/char/mspec.c b/drivers/char/mspec.c index a697ca0cab1e..a9c2fa3c81e5 100644 --- a/drivers/char/mspec.c +++ b/drivers/char/mspec.c @@ -191,12 +191,12 @@ mspec_close(struct vm_area_struct *vma) * Creates a mspec page and maps it to user space. */ static int -mspec_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +mspec_fault(struct vm_fault *vmf) { unsigned long paddr, maddr; unsigned long pfn; pgoff_t index = vmf->pgoff; - struct vma_data *vdata = vma->vm_private_data; + struct vma_data *vdata = vmf->vma->vm_private_data; maddr = (volatile unsigned long) vdata->maddr[index]; if (maddr == 0) { @@ -227,7 +227,7 @@ mspec_fault(struct vm_area_struct *vma, struct vm_fault *vmf) * be because another thread has installed the pte first, so it * is no problem. */ - vm_insert_pfn(vma, vmf->address, pfn); + vm_insert_pfn(vmf->vma, vmf->address, pfn); return VM_FAULT_NOPAGE; } diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c index ed758b74ddf0..0261f332bf3e 100644 --- a/drivers/dax/dax.c +++ b/drivers/dax/dax.c @@ -419,8 +419,7 @@ static phys_addr_t pgoff_to_phys(struct dax_dev *dax_dev, pgoff_t pgoff, return -1; } -static int __dax_dev_fault(struct dax_dev *dax_dev, struct vm_area_struct *vma, - struct vm_fault *vmf) +static int __dax_dev_fault(struct dax_dev *dax_dev, struct vm_fault *vmf) { struct device *dev = &dax_dev->dev; struct dax_region *dax_region; @@ -428,7 +427,7 @@ static int __dax_dev_fault(struct dax_dev *dax_dev, struct vm_area_struct *vma, phys_addr_t phys; pfn_t pfn; - if (check_vma(dax_dev, vma, __func__)) + if (check_vma(dax_dev, vmf->vma, __func__)) return VM_FAULT_SIGBUS; dax_region = dax_dev->region; @@ -446,7 +445,7 @@ static int __dax_dev_fault(struct dax_dev *dax_dev, struct vm_area_struct *vma, pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); - rc = vm_insert_mixed(vma, vmf->address, pfn); + rc = vm_insert_mixed(vmf->vma, vmf->address, pfn); if (rc == -ENOMEM) return VM_FAULT_OOM; @@ -456,8 +455,9 @@ static int __dax_dev_fault(struct dax_dev *dax_dev, struct vm_area_struct *vma, return VM_FAULT_NOPAGE; } -static int dax_dev_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int dax_dev_fault(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; int rc; struct file *filp = vma->vm_file; struct dax_dev *dax_dev = filp->private_data; @@ -466,24 +466,22 @@ static int dax_dev_fault(struct vm_area_struct *vma, struct vm_fault *vmf) current->comm, (vmf->flags & FAULT_FLAG_WRITE) ? "write" : "read", vma->vm_start, vma->vm_end); rcu_read_lock(); - rc = __dax_dev_fault(dax_dev, vma, vmf); + rc = __dax_dev_fault(dax_dev, vmf); rcu_read_unlock(); return rc; } -static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, - struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, - unsigned int flags) +static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, struct vm_fault *vmf) { - unsigned long pmd_addr = addr & PMD_MASK; + unsigned long pmd_addr = vmf->address & PMD_MASK; struct device *dev = &dax_dev->dev; struct dax_region *dax_region; phys_addr_t phys; pgoff_t pgoff; pfn_t pfn; - if (check_vma(dax_dev, vma, __func__)) + if (check_vma(dax_dev, vmf->vma, __func__)) return VM_FAULT_SIGBUS; dax_region = dax_dev->region; @@ -498,7 +496,7 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, return VM_FAULT_SIGBUS; } - pgoff = linear_page_index(vma, pmd_addr); + pgoff = linear_page_index(vmf->vma, pmd_addr); phys = pgoff_to_phys(dax_dev, pgoff, PMD_SIZE); if (phys == -1) { dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__, @@ -508,23 +506,23 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); - return vmf_insert_pfn_pmd(vma, addr, pmd, pfn, - flags & FAULT_FLAG_WRITE); + return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, pfn, + vmf->flags & FAULT_FLAG_WRITE); } -static int dax_dev_pmd_fault(struct vm_area_struct *vma, unsigned long addr, - pmd_t *pmd, unsigned int flags) +static int dax_dev_pmd_fault(struct vm_fault *vmf) { int rc; - struct file *filp = vma->vm_file; + struct file *filp = vmf->vma->vm_file; struct dax_dev *dax_dev = filp->private_data; dev_dbg(&dax_dev->dev, "%s: %s: %s (%#lx - %#lx)\n", __func__, - current->comm, (flags & FAULT_FLAG_WRITE) - ? "write" : "read", vma->vm_start, vma->vm_end); + current->comm, (vmf->flags & FAULT_FLAG_WRITE) + ? "write" : "read", + vmf->vma->vm_start, vmf->vma->vm_end); rcu_read_lock(); - rc = __dax_dev_pmd_fault(dax_dev, vma, addr, pmd, flags); + rc = __dax_dev_pmd_fault(dax_dev, vmf); rcu_read_unlock(); return rc; diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c index bd311c77c254..bae6e26038ee 100644 --- a/drivers/gpu/drm/drm_vm.c +++ b/drivers/gpu/drm/drm_vm.c @@ -96,8 +96,9 @@ static pgprot_t drm_dma_prot(uint32_t map_type, struct vm_area_struct *vma) * map, get the page, increment the use count and return it. */ #if IS_ENABLED(CONFIG_AGP) -static int drm_do_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int drm_do_vm_fault(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; struct drm_file *priv = vma->vm_file->private_data; struct drm_device *dev = priv->minor->dev; struct drm_local_map *map = NULL; @@ -168,7 +169,7 @@ vm_fault_error: return VM_FAULT_SIGBUS; /* Disallow mremap */ } #else -static int drm_do_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int drm_do_vm_fault(struct vm_fault *vmf) { return VM_FAULT_SIGBUS; } @@ -184,8 +185,9 @@ static int drm_do_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) * Get the mapping, find the real physical page to map, get the page, and * return it. */ -static int drm_do_vm_shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int drm_do_vm_shm_fault(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; struct drm_local_map *map = vma->vm_private_data; unsigned long offset; unsigned long i; @@ -280,14 +282,14 @@ static void drm_vm_shm_close(struct vm_area_struct *vma) /** * \c fault method for DMA virtual memory. * - * \param vma virtual memory area. * \param address access address. * \return pointer to the page structure. * * Determine the page number from the page offset and get it from drm_device_dma::pagelist. */ -static int drm_do_vm_dma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int drm_do_vm_dma_fault(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; struct drm_file *priv = vma->vm_file->private_data; struct drm_device *dev = priv->minor->dev; struct drm_device_dma *dma = dev->dma; @@ -315,14 +317,14 @@ static int drm_do_vm_dma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) /** * \c fault method for scatter-gather virtual memory. * - * \param vma virtual memory area. * \param address access address. * \return pointer to the page structure. * * Determine the map offset from the page offset and get it from drm_sg_mem::pagelist. */ -static int drm_do_vm_sg_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int drm_do_vm_sg_fault(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; struct drm_local_map *map = vma->vm_private_data; struct drm_file *priv = vma->vm_file->private_data; struct drm_device *dev = priv->minor->dev; @@ -347,24 +349,24 @@ static int drm_do_vm_sg_fault(struct vm_area_struct *vma, struct vm_fault *vmf) return 0; } -static int drm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int drm_vm_fault(struct vm_fault *vmf) { - return drm_do_vm_fault(vma, vmf); + return drm_do_vm_fault(vmf); } -static int drm_vm_shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int drm_vm_shm_fault(struct vm_fault *vmf) { - return drm_do_vm_shm_fault(vma, vmf); + return drm_do_vm_shm_fault(vmf); } -static int drm_vm_dma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int drm_vm_dma_fault(struct vm_fault *vmf) { - return drm_do_vm_dma_fault(vma, vmf); + return drm_do_vm_dma_fault(vmf); } -static int drm_vm_sg_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int drm_vm_sg_fault(struct vm_fault *vmf) { - return drm_do_vm_sg_fault(vma, vmf); + return drm_do_vm_sg_fault(vmf); } /** AGP virtual memory operations */ diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index aa6e35ddc87f..e78f1406885d 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -175,8 +175,9 @@ int etnaviv_gem_mmap(struct file *filp, struct vm_area_struct *vma) return obj->ops->mmap(obj, vma); } -int etnaviv_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +int etnaviv_gem_fault(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; struct drm_gem_object *obj = vma->vm_private_data; struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj); struct page **pages, *page; diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index 57b81460fec8..4c28f7ffcc4d 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -447,8 +447,9 @@ int exynos_drm_gem_dumb_map_offset(struct drm_file *file_priv, return ret; } -int exynos_drm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +int exynos_drm_gem_fault(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; struct drm_gem_object *obj = vma->vm_private_data; struct exynos_drm_gem *exynos_gem = to_exynos_gem(obj); unsigned long pfn; diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.h b/drivers/gpu/drm/exynos/exynos_drm_gem.h index df7c543d6558..85457255fcd1 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.h +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.h @@ -116,7 +116,7 @@ int exynos_drm_gem_dumb_map_offset(struct drm_file *file_priv, uint64_t *offset); /* page fault handler and mmap fault address(virtual) to physical memory. */ -int exynos_drm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf); +int exynos_drm_gem_fault(struct vm_fault *vmf); /* set vm_flags and we can change the vm attribute to other one at here. */ int exynos_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma); diff --git a/drivers/gpu/drm/gma500/framebuffer.c b/drivers/gpu/drm/gma500/framebuffer.c index fd1488bf5189..3eb32953fc1a 100644 --- a/drivers/gpu/drm/gma500/framebuffer.c +++ b/drivers/gpu/drm/gma500/framebuffer.c @@ -111,8 +111,9 @@ static int psbfb_pan(struct fb_var_screeninfo *var, struct fb_info *info) return 0; } -static int psbfb_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int psbfb_vm_fault(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; struct psb_framebuffer *psbfb = vma->vm_private_data; struct drm_device *dev = psbfb->base.dev; struct drm_psb_private *dev_priv = dev->dev_private; diff --git a/drivers/gpu/drm/gma500/gem.c b/drivers/gpu/drm/gma500/gem.c index 527c62917660..7da061aab729 100644 --- a/drivers/gpu/drm/gma500/gem.c +++ b/drivers/gpu/drm/gma500/gem.c @@ -164,8 +164,9 @@ int psb_gem_dumb_create(struct drm_file *file, struct drm_device *dev, * vma->vm_private_data points to the GEM object that is backing this * mapping. */ -int psb_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +int psb_gem_fault(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; struct drm_gem_object *obj; struct gtt_range *r; int ret; diff --git a/drivers/gpu/drm/gma500/psb_drv.h b/drivers/gpu/drm/gma500/psb_drv.h index 05d7aaf47eea..83e22fd4cfc0 100644 --- a/drivers/gpu/drm/gma500/psb_drv.h +++ b/drivers/gpu/drm/gma500/psb_drv.h @@ -752,7 +752,7 @@ extern int psb_gem_dumb_create(struct drm_file *file, struct drm_device *dev, struct drm_mode_create_dumb *args); extern int psb_gem_dumb_map_gtt(struct drm_file *file, struct drm_device *dev, uint32_t handle, uint64_t *offset); -extern int psb_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf); +extern int psb_gem_fault(struct vm_fault *vmf); /* psb_device.c */ extern const struct psb_ops psb_chip_ops; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 033efc99a526..b3137c107099 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3352,7 +3352,7 @@ int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, unsigned int flags); int __must_check i915_gem_suspend(struct drm_i915_private *dev_priv); void i915_gem_resume(struct drm_i915_private *dev_priv); -int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf); +int i915_gem_fault(struct vm_fault *vmf); int i915_gem_object_wait(struct drm_i915_gem_object *obj, unsigned int flags, long timeout, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ab358cb0edc4..32a41e0f02f2 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1774,7 +1774,6 @@ compute_partial_view(struct drm_i915_gem_object *obj, /** * i915_gem_fault - fault a page into the GTT - * @area: CPU VMA in question * @vmf: fault info * * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped @@ -1791,9 +1790,10 @@ compute_partial_view(struct drm_i915_gem_object *obj, * The current feature set supported by i915_gem_fault() and thus GTT mmaps * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version). */ -int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf) +int i915_gem_fault(struct vm_fault *vmf) { #define MIN_CHUNK_PAGES ((1 << 20) >> PAGE_SHIFT) /* 1 MiB */ + struct vm_area_struct *area = vmf->vma; struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data); struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index ed4dad3ca133..577079b6eaa7 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -206,7 +206,7 @@ void msm_gem_shrinker_cleanup(struct drm_device *dev); int msm_gem_mmap_obj(struct drm_gem_object *obj, struct vm_area_struct *vma); int msm_gem_mmap(struct file *filp, struct vm_area_struct *vma); -int msm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf); +int msm_gem_fault(struct vm_fault *vmf); uint64_t msm_gem_mmap_offset(struct drm_gem_object *obj); int msm_gem_get_iova_locked(struct drm_gem_object *obj, int id, uint64_t *iova); diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 1974ccb781de..5daa7cae5873 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -192,8 +192,9 @@ int msm_gem_mmap(struct file *filp, struct vm_area_struct *vma) return msm_gem_mmap_obj(vma->vm_private_data, vma); } -int msm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +int msm_gem_fault(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; struct drm_gem_object *obj = vma->vm_private_data; struct drm_device *dev = obj->dev; struct msm_drm_private *priv = dev->dev_private; diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c index 74a9968df421..5d5a9f517c30 100644 --- a/drivers/gpu/drm/omapdrm/omap_gem.c +++ b/drivers/gpu/drm/omapdrm/omap_gem.c @@ -518,7 +518,6 @@ static int fault_2d(struct drm_gem_object *obj, /** * omap_gem_fault - pagefault handler for GEM objects - * @vma: the VMA of the GEM object * @vmf: fault detail * * Invoked when a fault occurs on an mmap of a GEM managed area. GEM @@ -529,8 +528,9 @@ static int fault_2d(struct drm_gem_object *obj, * vma->vm_private_data points to the GEM object that is backing this * mapping. */ -int omap_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +int omap_gem_fault(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; struct drm_gem_object *obj = vma->vm_private_data; struct omap_gem_object *omap_obj = to_omap_bo(obj); struct drm_device *dev = obj->dev; diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c index bc1c896bc5e1..1d140c39e91c 100644 --- a/drivers/gpu/drm/qxl/qxl_ttm.c +++ b/drivers/gpu/drm/qxl/qxl_ttm.c @@ -105,15 +105,15 @@ static void qxl_ttm_global_fini(struct qxl_device *qdev) static struct vm_operations_struct qxl_ttm_vm_ops; static const struct vm_operations_struct *ttm_vm_ops; -static int qxl_ttm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int qxl_ttm_fault(struct vm_fault *vmf) { struct ttm_buffer_object *bo; int r; - bo = (struct ttm_buffer_object *)vma->vm_private_data; + bo = (struct ttm_buffer_object *)vmf->vma->vm_private_data; if (bo == NULL) return VM_FAULT_NOPAGE; - r = ttm_vm_ops->fault(vma, vmf); + r = ttm_vm_ops->fault(vmf); return r; } diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 1888144d0fed..a0646dd8bc89 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -979,19 +979,19 @@ void radeon_ttm_set_active_vram_size(struct radeon_device *rdev, u64 size) static struct vm_operations_struct radeon_ttm_vm_ops; static const struct vm_operations_struct *ttm_vm_ops = NULL; -static int radeon_ttm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int radeon_ttm_fault(struct vm_fault *vmf) { struct ttm_buffer_object *bo; struct radeon_device *rdev; int r; - bo = (struct ttm_buffer_object *)vma->vm_private_data; + bo = (struct ttm_buffer_object *)vmf->vma->vm_private_data; if (bo == NULL) { return VM_FAULT_NOPAGE; } rdev = radeon_get_rdev(bo->bdev); down_read(&rdev->pm.mclk_lock); - r = ttm_vm_ops->fault(vma, vmf); + r = ttm_vm_ops->fault(vmf); up_read(&rdev->pm.mclk_lock); return r; } diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c index 7d853e6b5ff0..9f34662f5140 100644 --- a/drivers/gpu/drm/tegra/gem.c +++ b/drivers/gpu/drm/tegra/gem.c @@ -441,8 +441,9 @@ int tegra_bo_dumb_map_offset(struct drm_file *file, struct drm_device *drm, return 0; } -static int tegra_bo_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int tegra_bo_fault(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; struct drm_gem_object *gem = vma->vm_private_data; struct tegra_bo *bo = to_tegra_bo(gem); struct page *page; diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 68ef993ab431..247986a1d097 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -43,7 +43,6 @@ #define TTM_BO_VM_NUM_PREFAULT 16 static int ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo, - struct vm_area_struct *vma, struct vm_fault *vmf) { int ret = 0; @@ -66,7 +65,7 @@ static int ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo, if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT) goto out_unlock; - up_read(&vma->vm_mm->mmap_sem); + up_read(&vmf->vma->vm_mm->mmap_sem); (void) dma_fence_wait(bo->moving, true); goto out_unlock; } @@ -89,8 +88,9 @@ out_unlock: return ret; } -static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int ttm_bo_vm_fault(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; struct ttm_buffer_object *bo = (struct ttm_buffer_object *) vma->vm_private_data; struct ttm_bo_device *bdev = bo->bdev; @@ -163,7 +163,7 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) * Wait for buffer data in transit, due to a pipelined * move. */ - ret = ttm_bo_vm_fault_idle(bo, vma, vmf); + ret = ttm_bo_vm_fault_idle(bo, vmf); if (unlikely(ret != 0)) { retval = ret; goto out_unlock; diff --git a/drivers/gpu/drm/udl/udl_drv.h b/drivers/gpu/drm/udl/udl_drv.h index 6c4286e57362..2a75ab80527a 100644 --- a/drivers/gpu/drm/udl/udl_drv.h +++ b/drivers/gpu/drm/udl/udl_drv.h @@ -134,7 +134,7 @@ void udl_gem_put_pages(struct udl_gem_object *obj); int udl_gem_vmap(struct udl_gem_object *obj); void udl_gem_vunmap(struct udl_gem_object *obj); int udl_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma); -int udl_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf); +int udl_gem_fault(struct vm_fault *vmf); int udl_handle_damage(struct udl_framebuffer *fb, int x, int y, int width, int height); diff --git a/drivers/gpu/drm/udl/udl_gem.c b/drivers/gpu/drm/udl/udl_gem.c index 3c0c4bd3f750..775c50e4f02c 100644 --- a/drivers/gpu/drm/udl/udl_gem.c +++ b/drivers/gpu/drm/udl/udl_gem.c @@ -100,8 +100,9 @@ int udl_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma) return ret; } -int udl_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +int udl_gem_fault(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; struct udl_gem_object *obj = to_udl_bo(vma->vm_private_data); struct page *page; unsigned int page_offset; diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c index 477e07f0ecb6..7ccbb03e98de 100644 --- a/drivers/gpu/drm/vgem/vgem_drv.c +++ b/drivers/gpu/drm/vgem/vgem_drv.c @@ -50,8 +50,9 @@ static void vgem_gem_free_object(struct drm_gem_object *obj) kfree(vgem_obj); } -static int vgem_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int vgem_gem_fault(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; struct drm_vgem_gem_object *obj = vma->vm_private_data; /* We don't use vmf->pgoff since that has the fake offset */ unsigned long vaddr = vmf->address; diff --git a/drivers/gpu/drm/virtio/virtgpu_ttm.c b/drivers/gpu/drm/virtio/virtgpu_ttm.c index 63b3d5d35cf6..ffcbeb14ece6 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ttm.c +++ b/drivers/gpu/drm/virtio/virtgpu_ttm.c @@ -114,18 +114,17 @@ static void virtio_gpu_ttm_global_fini(struct virtio_gpu_device *vgdev) static struct vm_operations_struct virtio_gpu_ttm_vm_ops; static const struct vm_operations_struct *ttm_vm_ops; -static int virtio_gpu_ttm_fault(struct vm_area_struct *vma, - struct vm_fault *vmf) +static int virtio_gpu_ttm_fault(struct vm_fault *vmf) { struct ttm_buffer_object *bo; struct virtio_gpu_device *vgdev; int r; - bo = (struct ttm_buffer_object *)vma->vm_private_data; + bo = (struct ttm_buffer_object *)vmf->vma->vm_private_data; if (bo == NULL) return VM_FAULT_NOPAGE; vgdev = virtio_gpu_get_vgdev(bo->bdev); - r = ttm_vm_ops->fault(vma, vmf); + r = ttm_vm_ops->fault(vmf); return r; } #endif diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c index e8d55a153a65..e88afe1a435c 100644 --- a/drivers/hwtracing/intel_th/msu.c +++ b/drivers/hwtracing/intel_th/msu.c @@ -1188,9 +1188,9 @@ static void msc_mmap_close(struct vm_area_struct *vma) mutex_unlock(&msc->buf_mutex); } -static int msc_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int msc_mmap_fault(struct vm_fault *vmf) { - struct msc_iter *iter = vma->vm_file->private_data; + struct msc_iter *iter = vmf->vma->vm_file->private_data; struct msc *msc = iter->msc; vmf->page = msc_buffer_get_page(msc, vmf->pgoff); @@ -1198,7 +1198,7 @@ static int msc_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) return VM_FAULT_SIGBUS; get_page(vmf->page); - vmf->page->mapping = vma->vm_file->f_mapping; + vmf->page->mapping = vmf->vma->vm_file->f_mapping; vmf->page->index = vmf->pgoff; return 0; diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index bd786b7bd30b..f46033984d07 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -92,7 +92,7 @@ static unsigned int poll_next(struct file *, struct poll_table_struct *); static int user_event_ack(struct hfi1_ctxtdata *, int, unsigned long); static int set_ctxt_pkey(struct hfi1_ctxtdata *, unsigned, u16); static int manage_rcvq(struct hfi1_ctxtdata *, unsigned, int); -static int vma_fault(struct vm_area_struct *, struct vm_fault *); +static int vma_fault(struct vm_fault *); static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, unsigned long arg); @@ -695,7 +695,7 @@ done: * Local (non-chip) user memory is not mapped right away but as it is * accessed by the user-level code. */ -static int vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int vma_fault(struct vm_fault *vmf) { struct page *page; diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index 2d1eacf1dfed..9396c1807cc3 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -893,7 +893,7 @@ bail: /* * qib_file_vma_fault - handle a VMA page fault. */ -static int qib_file_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int qib_file_vma_fault(struct vm_fault *vmf) { struct page *page; diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c b/drivers/media/v4l2-core/videobuf-dma-sg.c index ba63ca57ed7e..36bd904946bd 100644 --- a/drivers/media/v4l2-core/videobuf-dma-sg.c +++ b/drivers/media/v4l2-core/videobuf-dma-sg.c @@ -434,8 +434,9 @@ static void videobuf_vm_close(struct vm_area_struct *vma) * now ...). Bounce buffers don't work very well for the data rates * video capture has. */ -static int videobuf_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int videobuf_vm_fault(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; struct page *page; dprintk(3, "fault: fault @ %08lx [vma %08lx-%08lx]\n", diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index 3907387b6d15..062bf6ca2625 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -121,8 +121,9 @@ void cxl_context_set_mapping(struct cxl_context *ctx, mutex_unlock(&ctx->mapping_lock); } -static int cxl_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int cxl_mmap_fault(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; struct cxl_context *ctx = vma->vm_file->private_data; u64 area, offset; diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c index af2e077da4b8..3641f1334cf0 100644 --- a/drivers/misc/sgi-gru/grumain.c +++ b/drivers/misc/sgi-gru/grumain.c @@ -926,8 +926,9 @@ again: * * Note: gru segments alway mmaped on GRU_GSEG_PAGESIZE boundaries. */ -int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +int gru_fault(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; struct gru_thread_state *gts; unsigned long paddr, vaddr; unsigned long expires; diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h index 5c3ce2459675..b5e308b50ed1 100644 --- a/drivers/misc/sgi-gru/grutables.h +++ b/drivers/misc/sgi-gru/grutables.h @@ -665,7 +665,7 @@ extern unsigned long gru_reserve_cb_resources(struct gru_state *gru, int cbr_au_count, char *cbmap); extern unsigned long gru_reserve_ds_resources(struct gru_state *gru, int dsr_au_count, char *dsmap); -extern int gru_fault(struct vm_area_struct *, struct vm_fault *vmf); +extern int gru_fault(struct vm_fault *vmf); extern struct gru_mm_struct *gru_register_mmu_notifier(void); extern void gru_drop_mmu_notifier(struct gru_mm_struct *gms); diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c index d390b9663dc3..57e6cef81ebe 100644 --- a/drivers/net/ethernet/sgi/ioc3-eth.c +++ b/drivers/net/ethernet/sgi/ioc3-eth.c @@ -914,7 +914,7 @@ static void ioc3_alloc_rings(struct net_device *dev) skb = ioc3_alloc_skb(RX_BUF_ALLOC_SIZE, GFP_ATOMIC); if (!skb) { - show_free_areas(0); + show_free_areas(0, NULL); continue; } diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index a2ac9e641aa9..5d17710f7402 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -538,7 +538,7 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn, nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); altmap = NULL; } else if (nd_pfn->mode == PFN_MODE_PMEM) { - nd_pfn->npfns = (resource_size(res) - offset) / PAGE_SIZE; + nd_pfn->npfns = PHYS_PFN((resource_size(res) - offset)); if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns) dev_info(&nd_pfn->dev, "number of pfns truncated from %lld to %ld\n", @@ -557,7 +557,6 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) { u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0; struct nd_namespace_common *ndns = nd_pfn->ndns; - u32 start_pad = 0, end_trunc = 0; resource_size_t start, size; struct nd_namespace_io *nsio; struct nd_region *nd_region; @@ -590,42 +589,16 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) return -ENXIO; } - memset(pfn_sb, 0, sizeof(*pfn_sb)); - - /* - * Check if pmem collides with 'System RAM' when section aligned and - * trim it accordingly - */ - nsio = to_nd_namespace_io(&ndns->dev); - start = PHYS_SECTION_ALIGN_DOWN(nsio->res.start); - size = resource_size(&nsio->res); - if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM, - IORES_DESC_NONE) == REGION_MIXED) { - start = nsio->res.start; - start_pad = PHYS_SECTION_ALIGN_UP(start) - start; - } - - start = nsio->res.start; - size = PHYS_SECTION_ALIGN_UP(start + size) - start; - if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM, - IORES_DESC_NONE) == REGION_MIXED) { - size = resource_size(&nsio->res); - end_trunc = start + size - PHYS_SECTION_ALIGN_DOWN(start + size); - } - - if (start_pad + end_trunc) - dev_info(&nd_pfn->dev, "%s section collision, truncate %d bytes\n", - dev_name(&ndns->dev), start_pad + end_trunc); - /* * Note, we use 64 here for the standard size of struct page, * debugging options may cause it to be larger in which case the * implementation will limit the pfns advertised through * ->direct_access() to those that are included in the memmap. */ - start += start_pad; + nsio = to_nd_namespace_io(&ndns->dev); + start = nsio->res.start; size = resource_size(&nsio->res); - npfns = (size - start_pad - end_trunc - SZ_8K) / SZ_4K; + npfns = PHYS_PFN(size - SZ_8K); if (nd_pfn->mode == PFN_MODE_PMEM) { unsigned long memmap_size; @@ -642,13 +615,14 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) else return -ENXIO; - if (offset + start_pad + end_trunc >= size) { + if (offset >= size) { dev_err(&nd_pfn->dev, "%s unable to satisfy requested alignment\n", dev_name(&ndns->dev)); return -ENXIO; } - npfns = (size - offset - start_pad - end_trunc) / SZ_4K; + memset(pfn_sb, 0, sizeof(*pfn_sb)); + npfns = PHYS_PFN(size - offset); pfn_sb->mode = cpu_to_le32(nd_pfn->mode); pfn_sb->dataoff = cpu_to_le64(offset); pfn_sb->npfns = cpu_to_le64(npfns); @@ -657,8 +631,6 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16); pfn_sb->version_major = cpu_to_le16(1); pfn_sb->version_minor = cpu_to_le16(2); - pfn_sb->start_pad = cpu_to_le32(start_pad); - pfn_sb->end_trunc = cpu_to_le32(end_trunc); pfn_sb->align = cpu_to_le32(nd_pfn->align); checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb); pfn_sb->checksum = cpu_to_le64(checksum); diff --git a/drivers/rapidio/devices/rio_mport_cdev.c b/drivers/rapidio/devices/rio_mport_cdev.c index 9013a585507e..50b617af81bd 100644 --- a/drivers/rapidio/devices/rio_mport_cdev.c +++ b/drivers/rapidio/devices/rio_mport_cdev.c @@ -889,17 +889,16 @@ rio_dma_transfer(struct file *filp, u32 transfer_mode, goto err_req; } - down_read(¤t->mm->mmap_sem); - pinned = get_user_pages( + pinned = get_user_pages_unlocked( (unsigned long)xfer->loc_addr & PAGE_MASK, nr_pages, - dir == DMA_FROM_DEVICE ? FOLL_WRITE : 0, - page_list, NULL); - up_read(¤t->mm->mmap_sem); + page_list, + dir == DMA_FROM_DEVICE ? FOLL_WRITE : 0); if (pinned != nr_pages) { if (pinned < 0) { - rmcd_error("get_user_pages err=%ld", pinned); + rmcd_error("get_user_pages_unlocked err=%ld", + pinned); nr_pages = 0; } else rmcd_error("pinned %ld out of %ld pages", diff --git a/drivers/scsi/cxlflash/superpipe.c b/drivers/scsi/cxlflash/superpipe.c index 90869cee2b20..ef5bf55f08a4 100644 --- a/drivers/scsi/cxlflash/superpipe.c +++ b/drivers/scsi/cxlflash/superpipe.c @@ -1053,7 +1053,6 @@ out: /** * cxlflash_mmap_fault() - mmap fault handler for adapter file descriptor - * @vma: VM area associated with mapping. * @vmf: VM fault associated with current fault. * * To support error notification via MMIO, faults are 'caught' by this routine @@ -1067,8 +1066,9 @@ out: * * Return: 0 on success, VM_FAULT_SIGBUS on failure */ -static int cxlflash_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int cxlflash_mmap_fault(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; struct file *file = vma->vm_file; struct cxl_context *ctx = cxl_fops_get_context(file); struct cxlflash_cfg *cfg = container_of(file->f_op, struct cxlflash_cfg, @@ -1097,7 +1097,7 @@ static int cxlflash_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) if (likely(!ctxi->err_recovery_active)) { vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - rc = ctxi->cxl_mmap_vmops->fault(vma, vmf); + rc = ctxi->cxl_mmap_vmops->fault(vmf); } else { dev_dbg(dev, "%s: err recovery active, use err_page\n", __func__); diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 226a8def7a8b..648e5bc1137f 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1185,8 +1185,9 @@ sg_fasync(int fd, struct file *filp, int mode) } static int -sg_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +sg_vma_fault(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; Sg_fd *sfp; unsigned long offset, len, sa; Sg_scatter_hold *rsv_schp; diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c index 937c2d5d7ec3..2c3ffbcbd621 100644 --- a/drivers/staging/android/ion/ion.c +++ b/drivers/staging/android/ion/ion.c @@ -865,15 +865,14 @@ static void ion_buffer_sync_for_device(struct ion_buffer *buffer, list_for_each_entry(vma_list, &buffer->vmas, list) { struct vm_area_struct *vma = vma_list->vma; - zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, - NULL); + zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start); } mutex_unlock(&buffer->lock); } -static int ion_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int ion_vm_fault(struct vm_fault *vmf) { - struct ion_buffer *buffer = vma->vm_private_data; + struct ion_buffer *buffer = vmf->vma->vm_private_data; unsigned long pfn; int ret; @@ -882,7 +881,7 @@ static int ion_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) BUG_ON(!buffer->pages || !buffer->pages[vmf->pgoff]); pfn = page_to_pfn(ion_buffer_page(buffer->pages[vmf->pgoff])); - ret = vm_insert_pfn(vma, vmf->address, pfn); + ret = vm_insert_pfn(vmf->vma, vmf->address, pfn); mutex_unlock(&buffer->lock); if (ret) return VM_FAULT_ERROR; diff --git a/drivers/staging/lustre/lustre/llite/llite_mmap.c b/drivers/staging/lustre/lustre/llite/llite_mmap.c index ee01f20d8b11..3c151e1ace8f 100644 --- a/drivers/staging/lustre/lustre/llite/llite_mmap.c +++ b/drivers/staging/lustre/lustre/llite/llite_mmap.c @@ -321,7 +321,7 @@ out: return fault_ret; } -static int ll_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int ll_fault(struct vm_fault *vmf) { int count = 0; bool printed = false; @@ -335,7 +335,7 @@ static int ll_fault(struct vm_area_struct *vma, struct vm_fault *vmf) set = cfs_block_sigsinv(sigmask(SIGKILL) | sigmask(SIGTERM)); restart: - result = ll_fault0(vma, vmf); + result = ll_fault0(vmf->vma, vmf); LASSERT(!(result & VM_FAULT_LOCKED)); if (result == 0) { struct page *vmpage = vmf->page; @@ -362,8 +362,9 @@ restart: return result; } -static int ll_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +static int ll_page_mkwrite(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; int count = 0; bool printed = false; bool retry; diff --git a/drivers/staging/lustre/lustre/llite/vvp_io.c b/drivers/staging/lustre/lustre/llite/vvp_io.c index 697cbfbe9374..99f995c27f15 100644 --- a/drivers/staging/lustre/lustre/llite/vvp_io.c +++ b/drivers/staging/lustre/lustre/llite/vvp_io.c @@ -1006,7 +1006,7 @@ static int vvp_io_kernel_fault(struct vvp_fault_io *cfio) { struct vm_fault *vmf = cfio->ft_vmf; - cfio->ft_flags = filemap_fault(cfio->ft_vma, vmf); + cfio->ft_flags = filemap_fault(vmf); cfio->ft_flags_valid = 1; if (vmf->page) { diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 18f0ec2e1f9c..c3adefe95e50 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -781,15 +781,15 @@ static int tcmu_find_mem_index(struct vm_area_struct *vma) return -1; } -static int tcmu_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int tcmu_vma_fault(struct vm_fault *vmf) { - struct tcmu_dev *udev = vma->vm_private_data; + struct tcmu_dev *udev = vmf->vma->vm_private_data; struct uio_info *info = &udev->uio_info; struct page *page; unsigned long offset; void *addr; - int mi = tcmu_find_mem_index(vma); + int mi = tcmu_find_mem_index(vmf->vma); if (mi < 0) return VM_FAULT_SIGBUS; diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 701c085bb19b..71136742e606 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -317,7 +317,7 @@ static struct sysrq_key_op sysrq_ftrace_dump_op = { static void sysrq_handle_showmem(int key) { - show_mem(0); + show_mem(0, NULL); } static struct sysrq_key_op sysrq_showmem_op = { .handler = sysrq_handle_showmem, diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c index 3dd6a491cdba..397e1509fe51 100644 --- a/drivers/tty/vt/keyboard.c +++ b/drivers/tty/vt/keyboard.c @@ -572,7 +572,7 @@ static void fn_scroll_back(struct vc_data *vc) static void fn_show_mem(struct vc_data *vc) { - show_mem(0); + show_mem(0, NULL); } static void fn_show_state(struct vc_data *vc) diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index fba021f5736a..31d95dc9c202 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -597,14 +597,14 @@ static int uio_find_mem_index(struct vm_area_struct *vma) return -1; } -static int uio_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int uio_vma_fault(struct vm_fault *vmf) { - struct uio_device *idev = vma->vm_private_data; + struct uio_device *idev = vmf->vma->vm_private_data; struct page *page; unsigned long offset; void *addr; - int mi = uio_find_mem_index(vma); + int mi = uio_find_mem_index(vmf->vma); if (mi < 0) return VM_FAULT_SIGBUS; diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c index 91c22276c03b..9fb8b1e6ecc2 100644 --- a/drivers/usb/mon/mon_bin.c +++ b/drivers/usb/mon/mon_bin.c @@ -1223,9 +1223,9 @@ static void mon_bin_vma_close(struct vm_area_struct *vma) /* * Map ring pages to user space. */ -static int mon_bin_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int mon_bin_vma_fault(struct vm_fault *vmf) { - struct mon_reader_bin *rp = vma->vm_private_data; + struct mon_reader_bin *rp = vmf->vma->vm_private_data; unsigned long offset, chunk_idx; struct page *pageptr; diff --git a/drivers/video/fbdev/core/fb_defio.c b/drivers/video/fbdev/core/fb_defio.c index 74b5bcac8bf2..37f69c061210 100644 --- a/drivers/video/fbdev/core/fb_defio.c +++ b/drivers/video/fbdev/core/fb_defio.c @@ -37,12 +37,11 @@ static struct page *fb_deferred_io_page(struct fb_info *info, unsigned long offs } /* this is to find and return the vmalloc-ed fb pages */ -static int fb_deferred_io_fault(struct vm_area_struct *vma, - struct vm_fault *vmf) +static int fb_deferred_io_fault(struct vm_fault *vmf) { unsigned long offset; struct page *page; - struct fb_info *info = vma->vm_private_data; + struct fb_info *info = vmf->vma->vm_private_data; offset = vmf->pgoff << PAGE_SHIFT; if (offset >= info->fix.smem_len) @@ -54,8 +53,8 @@ static int fb_deferred_io_fault(struct vm_area_struct *vma, get_page(page); - if (vma->vm_file) - page->mapping = vma->vm_file->f_mapping; + if (vmf->vma->vm_file) + page->mapping = vmf->vma->vm_file->f_mapping; else printk(KERN_ERR "no mapping available\n"); @@ -91,11 +90,10 @@ int fb_deferred_io_fsync(struct file *file, loff_t start, loff_t end, int datasy EXPORT_SYMBOL_GPL(fb_deferred_io_fsync); /* vm_ops->page_mkwrite handler */ -static int fb_deferred_io_mkwrite(struct vm_area_struct *vma, - struct vm_fault *vmf) +static int fb_deferred_io_mkwrite(struct vm_fault *vmf) { struct page *page = vmf->page; - struct fb_info *info = vma->vm_private_data; + struct fb_info *info = vmf->vma->vm_private_data; struct fb_deferred_io *fbdefio = info->fbdefio; struct page *cur; @@ -105,7 +103,7 @@ static int fb_deferred_io_mkwrite(struct vm_area_struct *vma, deferred framebuffer IO. then if userspace touches a page again, we repeat the same scheme */ - file_update_time(vma->vm_file); + file_update_time(vmf->vma->vm_file); /* protect against the workqueue changing the page list */ mutex_lock(&fbdefio->lock); diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 6e3306f4a525..fae21a0f1cee 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -598,10 +598,10 @@ static void privcmd_close(struct vm_area_struct *vma) kfree(pages); } -static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int privcmd_fault(struct vm_fault *vmf) { printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n", - vma, vma->vm_start, vma->vm_end, + vmf->vma, vmf->vma->vm_start, vmf->vma->vm_end, vmf->pgoff, (void *)vmf->address); return VM_FAULT_SIGBUS; diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 6a0f3fa85ef7..3de3b4a89d89 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -534,11 +534,11 @@ v9fs_mmap_file_mmap(struct file *filp, struct vm_area_struct *vma) } static int -v9fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +v9fs_vm_page_mkwrite(struct vm_fault *vmf) { struct v9fs_inode *v9inode; struct page *page = vmf->page; - struct file *filp = vma->vm_file; + struct file *filp = vmf->vma->vm_file; struct inode *inode = file_inode(filp); diff --git a/fs/affs/affs.h b/fs/affs/affs.h index 2f088773f1c0..2f8bab390d13 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -138,9 +138,9 @@ extern int affs_remove_hash(struct inode *dir, struct buffer_head *rem_bh); extern int affs_remove_header(struct dentry *dentry); extern u32 affs_checksum_block(struct super_block *sb, struct buffer_head *bh); extern void affs_fix_checksum(struct super_block *sb, struct buffer_head *bh); -extern void secs_to_datestamp(time64_t secs, struct affs_date *ds); -extern umode_t prot_to_mode(u32 prot); -extern void mode_to_prot(struct inode *inode); +extern void affs_secs_to_datestamp(time64_t secs, struct affs_date *ds); +extern umode_t affs_prot_to_mode(u32 prot); +extern void affs_mode_to_prot(struct inode *inode); __printf(3, 4) extern void affs_error(struct super_block *sb, const char *function, const char *fmt, ...); @@ -162,6 +162,7 @@ extern void affs_free_bitmap(struct super_block *sb); /* namei.c */ +extern const struct export_operations affs_export_ops; extern int affs_hash_name(struct super_block *sb, const u8 *name, unsigned int len); extern struct dentry *affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int); extern int affs_unlink(struct inode *dir, struct dentry *dentry); @@ -178,7 +179,6 @@ extern int affs_rename(struct inode *old_dir, struct dentry *old_dentry, /* inode.c */ -extern unsigned long affs_parent_ino(struct inode *dir); extern struct inode *affs_new_inode(struct inode *dir); extern int affs_notify_change(struct dentry *dentry, struct iattr *attr); extern void affs_evict_inode(struct inode *inode); @@ -213,6 +213,12 @@ extern const struct address_space_operations affs_aops_ofs; extern const struct dentry_operations affs_dentry_operations; extern const struct dentry_operations affs_intl_dentry_operations; +static inline bool affs_validblock(struct super_block *sb, int block) +{ + return(block >= AFFS_SB(sb)->s_reserved && + block < AFFS_SB(sb)->s_partition_size); +} + static inline void affs_set_blocksize(struct super_block *sb, int size) { @@ -222,7 +228,7 @@ static inline struct buffer_head * affs_bread(struct super_block *sb, int block) { pr_debug("%s: %d\n", __func__, block); - if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size) + if (affs_validblock(sb, block)) return sb_bread(sb, block); return NULL; } @@ -230,7 +236,7 @@ static inline struct buffer_head * affs_getblk(struct super_block *sb, int block) { pr_debug("%s: %d\n", __func__, block); - if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size) + if (affs_validblock(sb, block)) return sb_getblk(sb, block); return NULL; } @@ -239,7 +245,7 @@ affs_getzeroblk(struct super_block *sb, int block) { struct buffer_head *bh; pr_debug("%s: %d\n", __func__, block); - if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size) { + if (affs_validblock(sb, block)) { bh = sb_getblk(sb, block); lock_buffer(bh); memset(bh->b_data, 0 , sb->s_blocksize); @@ -254,7 +260,7 @@ affs_getemptyblk(struct super_block *sb, int block) { struct buffer_head *bh; pr_debug("%s: %d\n", __func__, block); - if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size) { + if (affs_validblock(sb, block)) { bh = sb_getblk(sb, block); wait_on_buffer(bh); set_buffer_uptodate(bh); diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index 0ec65c133b93..b573c3b9a328 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c @@ -367,7 +367,7 @@ affs_fix_checksum(struct super_block *sb, struct buffer_head *bh) } void -secs_to_datestamp(time64_t secs, struct affs_date *ds) +affs_secs_to_datestamp(time64_t secs, struct affs_date *ds) { u32 days; u32 minute; @@ -386,55 +386,55 @@ secs_to_datestamp(time64_t secs, struct affs_date *ds) } umode_t -prot_to_mode(u32 prot) +affs_prot_to_mode(u32 prot) { umode_t mode = 0; if (!(prot & FIBF_NOWRITE)) - mode |= S_IWUSR; + mode |= 0200; if (!(prot & FIBF_NOREAD)) - mode |= S_IRUSR; + mode |= 0400; if (!(prot & FIBF_NOEXECUTE)) - mode |= S_IXUSR; + mode |= 0100; if (prot & FIBF_GRP_WRITE) - mode |= S_IWGRP; + mode |= 0020; if (prot & FIBF_GRP_READ) - mode |= S_IRGRP; + mode |= 0040; if (prot & FIBF_GRP_EXECUTE) - mode |= S_IXGRP; + mode |= 0010; if (prot & FIBF_OTR_WRITE) - mode |= S_IWOTH; + mode |= 0002; if (prot & FIBF_OTR_READ) - mode |= S_IROTH; + mode |= 0004; if (prot & FIBF_OTR_EXECUTE) - mode |= S_IXOTH; + mode |= 0001; return mode; } void -mode_to_prot(struct inode *inode) +affs_mode_to_prot(struct inode *inode) { u32 prot = AFFS_I(inode)->i_protect; umode_t mode = inode->i_mode; - if (!(mode & S_IXUSR)) + if (!(mode & 0100)) prot |= FIBF_NOEXECUTE; - if (!(mode & S_IRUSR)) + if (!(mode & 0400)) prot |= FIBF_NOREAD; - if (!(mode & S_IWUSR)) + if (!(mode & 0200)) prot |= FIBF_NOWRITE; - if (mode & S_IXGRP) + if (mode & 0010) prot |= FIBF_GRP_EXECUTE; - if (mode & S_IRGRP) + if (mode & 0040) prot |= FIBF_GRP_READ; - if (mode & S_IWGRP) + if (mode & 0020) prot |= FIBF_GRP_WRITE; - if (mode & S_IXOTH) + if (mode & 0001) prot |= FIBF_OTR_EXECUTE; - if (mode & S_IROTH) + if (mode & 0004) prot |= FIBF_OTR_READ; - if (mode & S_IWOTH) + if (mode & 0002) prot |= FIBF_OTR_WRITE; AFFS_I(inode)->i_protect = prot; diff --git a/fs/affs/inode.c b/fs/affs/inode.c index fe4e1290dbb5..a5e6097eb5a9 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c @@ -69,7 +69,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino) if (affs_test_opt(sbi->s_flags, SF_SETMODE)) inode->i_mode = sbi->s_mode; else - inode->i_mode = prot_to_mode(prot); + inode->i_mode = affs_prot_to_mode(prot); id = be16_to_cpu(tail->uid); if (id == 0 || affs_test_opt(sbi->s_flags, SF_SETUID)) @@ -184,11 +184,12 @@ affs_write_inode(struct inode *inode, struct writeback_control *wbc) } tail = AFFS_TAIL(sb, bh); if (tail->stype == cpu_to_be32(ST_ROOT)) { - secs_to_datestamp(inode->i_mtime.tv_sec,&AFFS_ROOT_TAIL(sb, bh)->root_change); + affs_secs_to_datestamp(inode->i_mtime.tv_sec, + &AFFS_ROOT_TAIL(sb, bh)->root_change); } else { tail->protect = cpu_to_be32(AFFS_I(inode)->i_protect); tail->size = cpu_to_be32(inode->i_size); - secs_to_datestamp(inode->i_mtime.tv_sec,&tail->change); + affs_secs_to_datestamp(inode->i_mtime.tv_sec, &tail->change); if (!(inode->i_ino == AFFS_SB(sb)->s_root_block)) { uid = i_uid_read(inode); gid = i_gid_read(inode); @@ -249,7 +250,7 @@ affs_notify_change(struct dentry *dentry, struct iattr *attr) mark_inode_dirty(inode); if (attr->ia_valid & ATTR_MODE) - mode_to_prot(inode); + affs_mode_to_prot(inode); out: return error; } diff --git a/fs/affs/namei.c b/fs/affs/namei.c index 29186d29a3b6..96dd1d09a273 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -9,29 +9,10 @@ */ #include "affs.h" +#include <linux/exportfs.h> typedef int (*toupper_t)(int); -static int affs_toupper(int ch); -static int affs_hash_dentry(const struct dentry *, struct qstr *); -static int affs_compare_dentry(const struct dentry *dentry, - unsigned int len, const char *str, const struct qstr *name); -static int affs_intl_toupper(int ch); -static int affs_intl_hash_dentry(const struct dentry *, struct qstr *); -static int affs_intl_compare_dentry(const struct dentry *dentry, - unsigned int len, const char *str, const struct qstr *name); - -const struct dentry_operations affs_dentry_operations = { - .d_hash = affs_hash_dentry, - .d_compare = affs_compare_dentry, -}; - -const struct dentry_operations affs_intl_dentry_operations = { - .d_hash = affs_intl_hash_dentry, - .d_compare = affs_intl_compare_dentry, -}; - - /* Simple toupper() for DOS\1 */ static int @@ -271,7 +252,7 @@ affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) return -ENOSPC; inode->i_mode = mode; - mode_to_prot(inode); + affs_mode_to_prot(inode); mark_inode_dirty(inode); inode->i_op = &affs_file_inode_operations; @@ -301,7 +282,7 @@ affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) return -ENOSPC; inode->i_mode = S_IFDIR | mode; - mode_to_prot(inode); + affs_mode_to_prot(inode); inode->i_op = &affs_dir_inode_operations; inode->i_fop = &affs_dir_operations; @@ -347,7 +328,7 @@ affs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) inode_nohighmem(inode); inode->i_data.a_ops = &affs_symlink_aops; inode->i_mode = S_IFLNK | 0777; - mode_to_prot(inode); + affs_mode_to_prot(inode); error = -EIO; bh = affs_bread(sb, inode->i_ino); @@ -465,3 +446,71 @@ done: affs_brelse(bh); return retval; } + +static struct dentry *affs_get_parent(struct dentry *child) +{ + struct inode *parent; + struct buffer_head *bh; + + bh = affs_bread(child->d_sb, d_inode(child)->i_ino); + if (!bh) + return ERR_PTR(-EIO); + + parent = affs_iget(child->d_sb, + be32_to_cpu(AFFS_TAIL(child->d_sb, bh)->parent)); + brelse(bh); + if (IS_ERR(parent)) + return ERR_CAST(parent); + + return d_obtain_alias(parent); +} + +static struct inode *affs_nfs_get_inode(struct super_block *sb, u64 ino, + u32 generation) +{ + struct inode *inode; + + if (!affs_validblock(sb, ino)) + return ERR_PTR(-ESTALE); + + inode = affs_iget(sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); + + if (generation && inode->i_generation != generation) { + iput(inode); + return ERR_PTR(-ESTALE); + } + + return inode; +} + +static struct dentry *affs_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + return generic_fh_to_dentry(sb, fid, fh_len, fh_type, + affs_nfs_get_inode); +} + +static struct dentry *affs_fh_to_parent(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + return generic_fh_to_parent(sb, fid, fh_len, fh_type, + affs_nfs_get_inode); +} + +const struct export_operations affs_export_ops = { + .fh_to_dentry = affs_fh_to_dentry, + .fh_to_parent = affs_fh_to_parent, + .get_parent = affs_get_parent, +}; + +const struct dentry_operations affs_dentry_operations = { + .d_hash = affs_hash_dentry, + .d_compare = affs_compare_dentry, +}; + +const struct dentry_operations affs_intl_dentry_operations = { + .d_hash = affs_intl_hash_dentry, + .d_compare = affs_intl_compare_dentry, +}; diff --git a/fs/affs/super.c b/fs/affs/super.c index d6384863192c..37532538e8ab 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -32,7 +32,7 @@ affs_commit_super(struct super_block *sb, int wait) struct affs_root_tail *tail = AFFS_ROOT_TAIL(sb, bh); lock_buffer(bh); - secs_to_datestamp(ktime_get_real_seconds(), &tail->disk_change); + affs_secs_to_datestamp(ktime_get_real_seconds(), &tail->disk_change); affs_fix_checksum(sb, bh); unlock_buffer(bh); @@ -507,6 +507,7 @@ got_root: return -ENOMEM; } + sb->s_export_op = &affs_export_ops; pr_debug("s_flags=%lX\n", sb->s_flags); return 0; } diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 422370293cfd..cacb05644c2b 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -91,12 +91,18 @@ static struct linux_binfmt elf_format = { #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE) -static int set_brk(unsigned long start, unsigned long end) +static int set_brk(unsigned long start, unsigned long end, int prot) { start = ELF_PAGEALIGN(start); end = ELF_PAGEALIGN(end); if (end > start) { - int error = vm_brk(start, end - start); + /* + * Map the last of the bss segment. + * If the header is requesting these pages to be + * executable, honour that (ppc32 needs this). + */ + int error = vm_brk_flags(start, end - start, + prot & PROT_EXEC ? VM_EXEC : 0); if (error) return error; } @@ -524,6 +530,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, unsigned long load_addr = 0; int load_addr_set = 0; unsigned long last_bss = 0, elf_bss = 0; + int bss_prot = 0; unsigned long error = ~0UL; unsigned long total_size; int i; @@ -606,8 +613,10 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, * elf_bss and last_bss is the bss section. */ k = load_addr + eppnt->p_vaddr + eppnt->p_memsz; - if (k > last_bss) + if (k > last_bss) { last_bss = k; + bss_prot = elf_prot; + } } } @@ -623,13 +632,14 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, /* * Next, align both the file and mem bss up to the page size, * since this is where elf_bss was just zeroed up to, and where - * last_bss will end after the vm_brk() below. + * last_bss will end after the vm_brk_flags() below. */ elf_bss = ELF_PAGEALIGN(elf_bss); last_bss = ELF_PAGEALIGN(last_bss); /* Finally, if there is still more bss to allocate, do it. */ if (last_bss > elf_bss) { - error = vm_brk(elf_bss, last_bss - elf_bss); + error = vm_brk_flags(elf_bss, last_bss - elf_bss, + bss_prot & PROT_EXEC ? VM_EXEC : 0); if (error) goto out; } @@ -674,6 +684,7 @@ static int load_elf_binary(struct linux_binprm *bprm) unsigned long error; struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL; unsigned long elf_bss, elf_brk; + int bss_prot = 0; int retval, i; unsigned long elf_entry; unsigned long interp_load_addr = 0; @@ -882,7 +893,8 @@ static int load_elf_binary(struct linux_binprm *bprm) before this one. Map anonymous pages, if needed, and clear the area. */ retval = set_brk(elf_bss + load_bias, - elf_brk + load_bias); + elf_brk + load_bias, + bss_prot); if (retval) goto out_free_dentry; nbyte = ELF_PAGEOFFSET(elf_bss); @@ -976,8 +988,10 @@ static int load_elf_binary(struct linux_binprm *bprm) if (end_data < k) end_data = k; k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz; - if (k > elf_brk) + if (k > elf_brk) { + bss_prot = elf_prot; elf_brk = k; + } } loc->elf_ex.e_entry += load_bias; @@ -993,7 +1007,7 @@ static int load_elf_binary(struct linux_binprm *bprm) * mapping in the interpreter, to make sure it doesn't wind * up getting placed where the bss needs to go. */ - retval = set_brk(elf_bss, elf_brk); + retval = set_brk(elf_bss, elf_brk, bss_prot); if (retval) goto out_free_dentry; if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) { diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 56b8595eacbb..be4d78031e09 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3157,7 +3157,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, int btrfs_merge_bio_hook(struct page *page, unsigned long offset, size_t size, struct bio *bio, unsigned long bio_flags); -int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); +int btrfs_page_mkwrite(struct vm_fault *vmf); int btrfs_readpage(struct file *file, struct page *page); void btrfs_evict_inode(struct inode *inode); int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ac5f8c404268..edda32917a9e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8970,10 +8970,10 @@ again: * beyond EOF, then the page is guaranteed safe against truncation until we * unlock the page. */ -int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +int btrfs_page_mkwrite(struct vm_fault *vmf) { struct page *page = vmf->page; - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct btrfs_ordered_extent *ordered; @@ -9006,7 +9006,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) ret = btrfs_delalloc_reserve_space(inode, page_start, reserved_space); if (!ret) { - ret = file_update_time(vma->vm_file); + ret = file_update_time(vmf->vma->vm_file); reserved = 1; } if (ret) { diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index e4b066cd912a..09860c0ec7c1 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1386,8 +1386,9 @@ static void ceph_restore_sigs(sigset_t *oldset) /* * vm ops */ -static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int ceph_filemap_fault(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; struct inode *inode = file_inode(vma->vm_file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_file_info *fi = vma->vm_file->private_data; @@ -1416,7 +1417,7 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) || ci->i_inline_version == CEPH_INLINE_NONE) { current->journal_info = vma->vm_file; - ret = filemap_fault(vma, vmf); + ret = filemap_fault(vmf); current->journal_info = NULL; } else ret = -EAGAIN; @@ -1477,8 +1478,9 @@ out_restore: /* * Reuse write_begin here for simplicity. */ -static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +static int ceph_page_mkwrite(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; struct inode *inode = file_inode(vma->vm_file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_file_info *fi = vma->vm_file->private_data; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 18a1e1d6671f..82b9f4053603 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3254,7 +3254,7 @@ cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset) * sure that it doesn't change while being written back. */ static int -cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +cifs_page_mkwrite(struct vm_fault *vmf) { struct page *page = vmf->page; @@ -35,6 +35,9 @@ #include <linux/iomap.h> #include "internal.h" +#define CREATE_TRACE_POINTS +#include <trace/events/fs_dax.h> + /* We choose 4096 entries - same as per-zone page wait tables */ #define DAX_WAIT_TABLE_BITS 12 #define DAX_WAIT_TABLE_ENTRIES (1 << DAX_WAIT_TABLE_BITS) @@ -922,12 +925,11 @@ static int dax_insert_mapping(struct address_space *mapping, /** * dax_pfn_mkwrite - handle first write to DAX page - * @vma: The virtual memory area where the fault occurred * @vmf: The description of the fault */ -int dax_pfn_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +int dax_pfn_mkwrite(struct vm_fault *vmf) { - struct file *file = vma->vm_file; + struct file *file = vmf->vma->vm_file; struct address_space *mapping = file->f_mapping; void *entry, **slot; pgoff_t index = vmf->pgoff; @@ -1078,12 +1080,24 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, { struct address_space *mapping = iocb->ki_filp->f_mapping; struct inode *inode = mapping->host; + struct gendisk *disk = inode->i_sb->s_bdev->bd_disk; loff_t pos = iocb->ki_pos, ret = 0, done = 0; unsigned flags = 0; + unsigned long start = 0; + bool do_acct = blk_queue_io_stat(disk->queue); if (iov_iter_rw(iter) == WRITE) flags |= IOMAP_WRITE; + if (do_acct) { + size_t sec = iov_iter_count(iter) >> 9; + + start = jiffies; + generic_start_io_acct(iov_iter_rw(iter), + max_t(unsigned long, 1, sec), + &disk->part0); + } + while (iov_iter_count(iter)) { ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops, iter, dax_iomap_actor); @@ -1093,6 +1107,9 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, done += ret; } + if (do_acct) + generic_end_io_acct(iov_iter_rw(iter), &disk->part0, start); + iocb->ki_pos += done; return done ? done : ret; } @@ -1109,7 +1126,6 @@ static int dax_fault_return(int error) /** * dax_iomap_fault - handle a page fault on a DAX file - * @vma: The virtual memory area where the fault occurred * @vmf: The description of the fault * @ops: iomap ops passed from the file system * @@ -1117,10 +1133,9 @@ static int dax_fault_return(int error) * or mkwrite handler for DAX files. Assumes the caller has done all the * necessary locking for the page fault to proceed successfully. */ -int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, - struct iomap_ops *ops) +int dax_iomap_fault(struct vm_fault *vmf, struct iomap_ops *ops) { - struct address_space *mapping = vma->vm_file->f_mapping; + struct address_space *mapping = vmf->vma->vm_file->f_mapping; struct inode *inode = mapping->host; unsigned long vaddr = vmf->address; loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT; @@ -1193,11 +1208,11 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, case IOMAP_MAPPED: if (iomap.flags & IOMAP_F_NEW) { count_vm_event(PGMAJFAULT); - mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT); + mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT); major = VM_FAULT_MAJOR; } error = dax_insert_mapping(mapping, iomap.bdev, sector, - PAGE_SIZE, &entry, vma, vmf); + PAGE_SIZE, &entry, vmf->vma, vmf); /* -EBUSY is fine, somebody else faulted on the same PTE */ if (error == -EBUSY) error = 0; @@ -1244,21 +1259,21 @@ EXPORT_SYMBOL_GPL(dax_iomap_fault); */ #define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1) -static int dax_pmd_insert_mapping(struct vm_area_struct *vma, pmd_t *pmd, - struct vm_fault *vmf, unsigned long address, - struct iomap *iomap, loff_t pos, bool write, void **entryp) +static int dax_pmd_insert_mapping(struct vm_fault *vmf, struct iomap *iomap, + loff_t pos, void **entryp) { - struct address_space *mapping = vma->vm_file->f_mapping; + struct address_space *mapping = vmf->vma->vm_file->f_mapping; struct block_device *bdev = iomap->bdev; + struct inode *inode = mapping->host; struct blk_dax_ctl dax = { .sector = dax_iomap_sector(iomap, pos), .size = PMD_SIZE, }; long length = dax_map_atomic(bdev, &dax); - void *ret; + void *ret = NULL; if (length < 0) /* dax_map_atomic() failed */ - return VM_FAULT_FALLBACK; + goto fallback; if (length < PMD_SIZE) goto unmap_fallback; if (pfn_t_to_pfn(dax.pfn) & PG_PMD_COLOUR) @@ -1271,67 +1286,86 @@ static int dax_pmd_insert_mapping(struct vm_area_struct *vma, pmd_t *pmd, ret = dax_insert_mapping_entry(mapping, vmf, *entryp, dax.sector, RADIX_DAX_PMD); if (IS_ERR(ret)) - return VM_FAULT_FALLBACK; + goto fallback; *entryp = ret; - return vmf_insert_pfn_pmd(vma, address, pmd, dax.pfn, write); + trace_dax_pmd_insert_mapping(inode, vmf, length, dax.pfn, ret); + return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, + dax.pfn, vmf->flags & FAULT_FLAG_WRITE); unmap_fallback: dax_unmap_atomic(bdev, &dax); +fallback: + trace_dax_pmd_insert_mapping_fallback(inode, vmf, length, + dax.pfn, ret); return VM_FAULT_FALLBACK; } -static int dax_pmd_load_hole(struct vm_area_struct *vma, pmd_t *pmd, - struct vm_fault *vmf, unsigned long address, - struct iomap *iomap, void **entryp) +static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap, + void **entryp) { - struct address_space *mapping = vma->vm_file->f_mapping; - unsigned long pmd_addr = address & PMD_MASK; + struct address_space *mapping = vmf->vma->vm_file->f_mapping; + unsigned long pmd_addr = vmf->address & PMD_MASK; + struct inode *inode = mapping->host; struct page *zero_page; + void *ret = NULL; spinlock_t *ptl; pmd_t pmd_entry; - void *ret; - zero_page = mm_get_huge_zero_page(vma->vm_mm); + zero_page = mm_get_huge_zero_page(vmf->vma->vm_mm); if (unlikely(!zero_page)) - return VM_FAULT_FALLBACK; + goto fallback; ret = dax_insert_mapping_entry(mapping, vmf, *entryp, 0, RADIX_DAX_PMD | RADIX_DAX_HZP); if (IS_ERR(ret)) - return VM_FAULT_FALLBACK; + goto fallback; *entryp = ret; - ptl = pmd_lock(vma->vm_mm, pmd); - if (!pmd_none(*pmd)) { + ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd); + if (!pmd_none(*(vmf->pmd))) { spin_unlock(ptl); - return VM_FAULT_FALLBACK; + goto fallback; } - pmd_entry = mk_pmd(zero_page, vma->vm_page_prot); + pmd_entry = mk_pmd(zero_page, vmf->vma->vm_page_prot); pmd_entry = pmd_mkhuge(pmd_entry); - set_pmd_at(vma->vm_mm, pmd_addr, pmd, pmd_entry); + set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry); spin_unlock(ptl); + trace_dax_pmd_load_hole(inode, vmf, zero_page, ret); return VM_FAULT_NOPAGE; + +fallback: + trace_dax_pmd_load_hole_fallback(inode, vmf, zero_page, ret); + return VM_FAULT_FALLBACK; } -int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, - pmd_t *pmd, unsigned int flags, struct iomap_ops *ops) +int dax_iomap_pmd_fault(struct vm_fault *vmf, struct iomap_ops *ops) { + struct vm_area_struct *vma = vmf->vma; struct address_space *mapping = vma->vm_file->f_mapping; - unsigned long pmd_addr = address & PMD_MASK; - bool write = flags & FAULT_FLAG_WRITE; + unsigned long pmd_addr = vmf->address & PMD_MASK; + bool write = vmf->flags & FAULT_FLAG_WRITE; unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT; struct inode *inode = mapping->host; int result = VM_FAULT_FALLBACK; struct iomap iomap = { 0 }; pgoff_t max_pgoff, pgoff; - struct vm_fault vmf; void *entry; loff_t pos; int error; + /* + * Check whether offset isn't beyond end of file now. Caller is + * supposed to hold locks serializing us with truncate / punch hole so + * this is a reliable test. + */ + pgoff = linear_page_index(vma, pmd_addr); + max_pgoff = (i_size_read(inode) - 1) >> PAGE_SHIFT; + + trace_dax_pmd_fault(inode, vmf, max_pgoff, 0); + /* Fall back to PTEs if we're going to COW */ if (write && !(vma->vm_flags & VM_SHARED)) goto fallback; @@ -1342,16 +1376,10 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, if ((pmd_addr + PMD_SIZE) > vma->vm_end) goto fallback; - /* - * Check whether offset isn't beyond end of file now. Caller is - * supposed to hold locks serializing us with truncate / punch hole so - * this is a reliable test. - */ - pgoff = linear_page_index(vma, pmd_addr); - max_pgoff = (i_size_read(inode) - 1) >> PAGE_SHIFT; - - if (pgoff > max_pgoff) - return VM_FAULT_SIGBUS; + if (pgoff > max_pgoff) { + result = VM_FAULT_SIGBUS; + goto out; + } /* If the PMD would extend beyond the file size */ if ((pgoff | PG_PMD_COLOUR) > max_pgoff) @@ -1380,21 +1408,15 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, if (IS_ERR(entry)) goto finish_iomap; - vmf.pgoff = pgoff; - vmf.flags = flags; - vmf.gfp_mask = mapping_gfp_mask(mapping) | __GFP_IO; - switch (iomap.type) { case IOMAP_MAPPED: - result = dax_pmd_insert_mapping(vma, pmd, &vmf, address, - &iomap, pos, write, &entry); + result = dax_pmd_insert_mapping(vmf, &iomap, pos, &entry); break; case IOMAP_UNWRITTEN: case IOMAP_HOLE: if (WARN_ON_ONCE(write)) goto unlock_entry; - result = dax_pmd_load_hole(vma, pmd, &vmf, address, &iomap, - &entry); + result = dax_pmd_load_hole(vmf, &iomap, &entry); break; default: WARN_ON_ONCE(1); @@ -1420,9 +1442,11 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, } fallback: if (result == VM_FAULT_FALLBACK) { - split_huge_pmd(vma, pmd, address); + split_huge_pmd(vma, vmf->pmd, vmf->address); count_vm_event(THP_FAULT_FALLBACK); } +out: + trace_dax_pmd_fault_done(inode, vmf, max_pgoff, result); return result; } EXPORT_SYMBOL_GPL(dax_iomap_pmd_fault); diff --git a/fs/ext2/file.c b/fs/ext2/file.c index b0f241528a30..0bf0d971205a 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -87,19 +87,19 @@ out_unlock: * The default page_lock and i_size verification done by non-DAX fault paths * is sufficient because ext2 doesn't support hole punching. */ -static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int ext2_dax_fault(struct vm_fault *vmf) { - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); struct ext2_inode_info *ei = EXT2_I(inode); int ret; if (vmf->flags & FAULT_FLAG_WRITE) { sb_start_pagefault(inode->i_sb); - file_update_time(vma->vm_file); + file_update_time(vmf->vma->vm_file); } down_read(&ei->dax_sem); - ret = dax_iomap_fault(vma, vmf, &ext2_iomap_ops); + ret = dax_iomap_fault(vmf, &ext2_iomap_ops); up_read(&ei->dax_sem); if (vmf->flags & FAULT_FLAG_WRITE) @@ -107,16 +107,15 @@ static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) return ret; } -static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma, - struct vm_fault *vmf) +static int ext2_dax_pfn_mkwrite(struct vm_fault *vmf) { - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); struct ext2_inode_info *ei = EXT2_I(inode); loff_t size; int ret; sb_start_pagefault(inode->i_sb); - file_update_time(vma->vm_file); + file_update_time(vmf->vma->vm_file); down_read(&ei->dax_sem); /* check that the faulting page hasn't raced with truncate */ @@ -124,7 +123,7 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma, if (vmf->pgoff >= size) ret = VM_FAULT_SIGBUS; else - ret = dax_pfn_mkwrite(vma, vmf); + ret = dax_pfn_mkwrite(vmf); up_read(&ei->dax_sem); sb_end_pagefault(inode->i_sb); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 6bcb9622fdf9..164a2e003729 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2480,8 +2480,8 @@ extern int ext4_writepage_trans_blocks(struct inode *); extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, loff_t lstart, loff_t lend); -extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); -extern int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf); +extern int ext4_page_mkwrite(struct vm_fault *vmf); +extern int ext4_filemap_fault(struct vm_fault *vmf); extern qsize_t *ext4_get_reserved_space(struct inode *inode); extern int ext4_get_projid(struct inode *inode, kprojid_t *projid); extern void ext4_da_update_reserve_space(struct inode *inode, diff --git a/fs/ext4/file.c b/fs/ext4/file.c index d663d3d7c81c..cc0b1113276d 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -255,19 +255,19 @@ out: } #ifdef CONFIG_FS_DAX -static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int ext4_dax_fault(struct vm_fault *vmf) { int result; - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); struct super_block *sb = inode->i_sb; bool write = vmf->flags & FAULT_FLAG_WRITE; if (write) { sb_start_pagefault(sb); - file_update_time(vma->vm_file); + file_update_time(vmf->vma->vm_file); } down_read(&EXT4_I(inode)->i_mmap_sem); - result = dax_iomap_fault(vma, vmf, &ext4_iomap_ops); + result = dax_iomap_fault(vmf, &ext4_iomap_ops); up_read(&EXT4_I(inode)->i_mmap_sem); if (write) sb_end_pagefault(sb); @@ -275,21 +275,20 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) return result; } -static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, - pmd_t *pmd, unsigned int flags) +static int +ext4_dax_pmd_fault(struct vm_fault *vmf) { int result; - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); struct super_block *sb = inode->i_sb; - bool write = flags & FAULT_FLAG_WRITE; + bool write = vmf->flags & FAULT_FLAG_WRITE; if (write) { sb_start_pagefault(sb); - file_update_time(vma->vm_file); + file_update_time(vmf->vma->vm_file); } down_read(&EXT4_I(inode)->i_mmap_sem); - result = dax_iomap_pmd_fault(vma, addr, pmd, flags, - &ext4_iomap_ops); + result = dax_iomap_pmd_fault(vmf, &ext4_iomap_ops); up_read(&EXT4_I(inode)->i_mmap_sem); if (write) sb_end_pagefault(sb); @@ -306,22 +305,21 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, * wp_pfn_shared() fails. Thus fault gets retried and things work out as * desired. */ -static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma, - struct vm_fault *vmf) +static int ext4_dax_pfn_mkwrite(struct vm_fault *vmf) { - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); struct super_block *sb = inode->i_sb; loff_t size; int ret; sb_start_pagefault(sb); - file_update_time(vma->vm_file); + file_update_time(vmf->vma->vm_file); down_read(&EXT4_I(inode)->i_mmap_sem); size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; if (vmf->pgoff >= size) ret = VM_FAULT_SIGBUS; else - ret = dax_pfn_mkwrite(vma, vmf); + ret = dax_pfn_mkwrite(vmf); up_read(&EXT4_I(inode)->i_mmap_sem); sb_end_pagefault(sb); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 88d57af1b516..fb6fd97b4cfc 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5776,8 +5776,9 @@ static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh) return !buffer_mapped(bh); } -int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +int ext4_page_mkwrite(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; struct page *page = vmf->page; loff_t size; unsigned long len; @@ -5867,13 +5868,13 @@ out: return ret; } -int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +int ext4_filemap_fault(struct vm_fault *vmf) { - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); int err; down_read(&EXT4_I(inode)->i_mmap_sem); - err = filemap_fault(vma, vmf); + err = filemap_fault(vmf); up_read(&EXT4_I(inode)->i_mmap_sem); return err; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 9c0f469cde13..3ee9abf0369d 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -33,11 +33,10 @@ #include "trace.h" #include <trace/events/f2fs.h> -static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, - struct vm_fault *vmf) +static int f2fs_vm_page_mkwrite(struct vm_fault *vmf) { struct page *page = vmf->page; - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; int err; @@ -59,7 +58,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, f2fs_balance_fs(sbi, dn.node_changed); - file_update_time(vma->vm_file); + file_update_time(vmf->vma->vm_file); lock_page(page); if (unlikely(page->mapping != inode->i_mapping || page_offset(page) > i_size_read(inode) || diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 2401c5dabb2a..e80bfd06daf5 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2043,12 +2043,12 @@ static void fuse_vma_close(struct vm_area_struct *vma) * - sync(2) * - try_to_free_pages() with order > PAGE_ALLOC_COSTLY_ORDER */ -static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +static int fuse_page_mkwrite(struct vm_fault *vmf) { struct page *page = vmf->page; - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); - file_update_time(vma->vm_file); + file_update_time(vmf->vma->vm_file); lock_page(page); if (page->mapping != inode->i_mapping) { unlock_page(page); diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 016c11eaca7c..6fe2a59c6a9a 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -379,10 +379,10 @@ static int gfs2_allocate_page_backing(struct page *page) * blocks allocated on disk to back that page. */ -static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +static int gfs2_page_mkwrite(struct vm_fault *vmf) { struct page *page = vmf->page; - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_alloc_parms ap = { .aflags = 0, }; @@ -399,7 +399,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) if (ret) goto out; - gfs2_size_hint(vma->vm_file, pos, PAGE_SIZE); + gfs2_size_hint(vmf->vma->vm_file, pos, PAGE_SIZE); gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); ret = gfs2_glock_nq(&gh); @@ -407,7 +407,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) goto out_uninit; /* Update file times before taking page lock */ - file_update_time(vma->vm_file); + file_update_time(vmf->vma->vm_file); set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); set_bit(GIF_SW_PAGED, &ip->i_flags); diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index 5de5c48b418d..75b254280ff6 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c @@ -169,7 +169,7 @@ static int hfs_readdir(struct file *file, struct dir_context *ctx) * Can be done after the list insertion; exclusion with * hfs_delete_cat() is provided by directory lock. */ - memcpy(&rd->key, &fd.key, sizeof(struct hfs_cat_key)); + memcpy(&rd->key, &fd.key->cat, sizeof(struct hfs_cat_key)); out: hfs_find_exit(&fd); return err; diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c index a3ec3ae7d347..482081bcdf70 100644 --- a/fs/hfs/mdb.c +++ b/fs/hfs/mdb.c @@ -38,7 +38,7 @@ static int hfs_get_last_session(struct super_block *sb, /* default values */ *start = 0; - *size = sb->s_bdev->bd_inode->i_size >> 9; + *size = i_size_read(sb->s_bdev->bd_inode) >> 9; if (HFS_SB(sb)->session >= 0) { te.cdte_track = HFS_SB(sb)->session; diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index ebb85e5f6549..e254fa0f0697 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c @@ -132,7 +132,7 @@ static int hfsplus_get_last_session(struct super_block *sb, /* default values */ *start = 0; - *size = sb->s_bdev->bd_inode->i_size >> 9; + *size = i_size_read(sb->s_bdev->bd_inode) >> 9; if (HFSPLUS_SB(sb)->session >= 0) { te.cdte_track = HFSPLUS_SB(sb)->session; diff --git a/fs/iomap.c b/fs/iomap.c index 354a123f170e..9b58cbd10973 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -442,11 +442,10 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length, return length; } -int iomap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, - struct iomap_ops *ops) +int iomap_page_mkwrite(struct vm_fault *vmf, struct iomap_ops *ops) { struct page *page = vmf->page; - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); unsigned long length; loff_t offset, size; ssize_t ret; diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index 20c396291ac1..fb3ab62b9c54 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -348,9 +348,9 @@ static void kernfs_vma_open(struct vm_area_struct *vma) kernfs_put_active(of->kn); } -static int kernfs_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int kernfs_vma_fault(struct vm_fault *vmf) { - struct file *file = vma->vm_file; + struct file *file = vmf->vma->vm_file; struct kernfs_open_file *of = kernfs_of(file); int ret; @@ -362,16 +362,15 @@ static int kernfs_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ret = VM_FAULT_SIGBUS; if (of->vm_ops->fault) - ret = of->vm_ops->fault(vma, vmf); + ret = of->vm_ops->fault(vmf); kernfs_put_active(of->kn); return ret; } -static int kernfs_vma_page_mkwrite(struct vm_area_struct *vma, - struct vm_fault *vmf) +static int kernfs_vma_page_mkwrite(struct vm_fault *vmf) { - struct file *file = vma->vm_file; + struct file *file = vmf->vma->vm_file; struct kernfs_open_file *of = kernfs_of(file); int ret; @@ -383,7 +382,7 @@ static int kernfs_vma_page_mkwrite(struct vm_area_struct *vma, ret = 0; if (of->vm_ops->page_mkwrite) - ret = of->vm_ops->page_mkwrite(vma, vmf); + ret = of->vm_ops->page_mkwrite(vmf); else file_update_time(file); diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c index 39f57bef8531..0c3905e0542e 100644 --- a/fs/ncpfs/mmap.c +++ b/fs/ncpfs/mmap.c @@ -27,10 +27,9 @@ * XXX: how are we excluding truncate/invalidate here? Maybe need to lock * page? */ -static int ncp_file_mmap_fault(struct vm_area_struct *area, - struct vm_fault *vmf) +static int ncp_file_mmap_fault(struct vm_fault *vmf) { - struct inode *inode = file_inode(area->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); char *pg_addr; unsigned int already_read; unsigned int count; @@ -90,7 +89,7 @@ static int ncp_file_mmap_fault(struct vm_area_struct *area, * -- nyc */ count_vm_event(PGMAJFAULT); - mem_cgroup_count_vm_event(area->vm_mm, PGMAJFAULT); + mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT); return VM_FAULT_MAJOR; } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 26dbe8b0c10d..668213984d68 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -528,10 +528,10 @@ const struct address_space_operations nfs_file_aops = { * writable, implying that someone is about to modify the page through a * shared-writable mapping */ -static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +static int nfs_vm_page_mkwrite(struct vm_fault *vmf) { struct page *page = vmf->page; - struct file *filp = vma->vm_file; + struct file *filp = vmf->vma->vm_file; struct inode *inode = file_inode(filp); unsigned pagelen; int ret = VM_FAULT_NOPAGE; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index b00d53d13d47..006068526542 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -728,8 +728,6 @@ static void nfs_inode_remove_request(struct nfs_page *req) if (likely(head->wb_page && !PageSwapCache(head->wb_page))) { set_page_private(head->wb_page, 0); ClearPagePrivate(head->wb_page); - smp_mb__after_atomic(); - wake_up_page(head->wb_page, PG_private); clear_bit(PG_MAPPED, &head->wb_flags); } nfsi->nrequests--; diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index 547381f3ce13..c5fa3dee72fc 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -51,8 +51,9 @@ int nilfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) return err; } -static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +static int nilfs_page_mkwrite(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; struct page *page = vmf->page; struct inode *inode = file_inode(vma->vm_file); struct nilfs_transaction_info ti; diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index bed1fcb63088..dc22ba8c710f 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -283,16 +283,14 @@ int ocfs2_set_acl(handle_t *handle, int ocfs2_iop_set_acl(struct inode *inode, struct posix_acl *acl, int type) { struct buffer_head *bh = NULL; - int status = 0; + int status, had_lock; + struct ocfs2_lock_holder oh; - status = ocfs2_inode_lock(inode, &bh, 1); - if (status < 0) { - if (status != -ENOENT) - mlog_errno(status); - return status; - } + had_lock = ocfs2_inode_lock_tracker(inode, &bh, 1, &oh); + if (had_lock < 0) + return had_lock; status = ocfs2_set_acl(NULL, inode, bh, type, acl, NULL, NULL); - ocfs2_inode_unlock(inode, 1); + ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock); brelse(bh); return status; } @@ -302,21 +300,20 @@ struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type) struct ocfs2_super *osb; struct buffer_head *di_bh = NULL; struct posix_acl *acl; - int ret; + int had_lock; + struct ocfs2_lock_holder oh; osb = OCFS2_SB(inode->i_sb); if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) return NULL; - ret = ocfs2_inode_lock(inode, &di_bh, 0); - if (ret < 0) { - if (ret != -ENOENT) - mlog_errno(ret); - return ERR_PTR(ret); - } + + had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 0, &oh); + if (had_lock < 0) + return ERR_PTR(had_lock); acl = ocfs2_get_acl_nolock(inode, type, di_bh); - ocfs2_inode_unlock(inode, 0); + ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock); brelse(di_bh); return acl; } diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 7025d8c27999..d5af152b94e0 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -2612,20 +2612,48 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, spin_lock(&dlm->master_lock); ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name, namelen, target, dlm->node_num); + if (ret == -EEXIST) { + if (oldmle) + __dlm_put_mle(oldmle); + + spin_unlock(&dlm->master_lock); + spin_unlock(&dlm->spinlock); + mlog(0, "another process is already migrating it\n"); + goto fail; + } + + /* + * If an old mle is found, it should be put. If its type is BLOCK, + * it should be put again. Because it has been unhasded from the map + * in the function dlm_add_migration_mle. + * Otherwise the memory will be leaked. It will not be found again from + * the hash map. + */ + if (oldmle) { + /* master is known, detach if not already detached */ + __dlm_mle_detach_hb_events(dlm, oldmle); + __dlm_put_mle(oldmle); + + /* + * If the type of the mle is BLOCK, it should be put once for + * release. Otherwise a memory leak may be caused because + * oldmle has been unhashed from the hash map and it will not + * be found any more. + */ + if (oldmle->type == DLM_MLE_BLOCK) + __dlm_put_mle(oldmle); + } + /* get an extra reference on the mle. * otherwise the assert_master from the new * master will destroy this. */ dlm_get_mle_inuse(mle); + mle_added = 1; + spin_unlock(&dlm->master_lock); spin_unlock(&dlm->spinlock); - if (ret == -EEXIST) { - mlog(0, "another process is already migrating it\n"); - goto fail; - } - mle_added = 1; - /* * set the MIGRATING flag and flush asts * if we fail after this we need to re-dirty the lockres @@ -2642,12 +2670,6 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, } fail: - if (ret != -EEXIST && oldmle) { - /* master is known, detach if not already detached */ - dlm_mle_detach_hb_events(dlm, oldmle); - dlm_put_mle(oldmle); - } - if (ret < 0) { if (mle_added) { dlm_mle_detach_hb_events(dlm, mle); @@ -3182,16 +3204,24 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data, if (ret < 0) kmem_cache_free(dlm_mle_cache, mle); + /* + * If an old mle is found, it should be put. If its type is BLOCK, + * it should be put again because it has been unhashed from the map + * in the dlm_add_migration_mle(). + * Otherwise the memory will be leaked. It will not be found again from + * the hash map. + */ + if (oldmle) { + __dlm_mle_detach_hb_events(dlm, oldmle); + __dlm_put_mle(oldmle); + if (ret >= 0 && oldmle->type == DLM_MLE_BLOCK) + __dlm_put_mle(oldmle); + } + spin_unlock(&dlm->master_lock); unlock: spin_unlock(&dlm->spinlock); - if (oldmle) { - /* master is known, detach if not already detached */ - dlm_mle_detach_hb_events(dlm, oldmle); - dlm_put_mle(oldmle); - } - if (res) dlm_lockres_put(res); leave: diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 74407c6dd592..908b05942282 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -2268,6 +2268,8 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm, { struct dlm_lock *lock, *next; unsigned int freed = 0; + struct list_head *queue = NULL; + int i; /* this node is the lockres master: * 1) remove any stale locks for the dead node @@ -2280,31 +2282,19 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm, * to force the DLM_UNLOCK_FREE_LOCK action so as to free the locks */ /* TODO: check pending_asts, pending_basts here */ - list_for_each_entry_safe(lock, next, &res->granted, list) { - if (lock->ml.node == dead_node) { - list_del_init(&lock->list); - dlm_lock_put(lock); - /* Can't schedule DLM_UNLOCK_FREE_LOCK - do manually */ - dlm_lock_put(lock); - freed++; - } - } - list_for_each_entry_safe(lock, next, &res->converting, list) { - if (lock->ml.node == dead_node) { - list_del_init(&lock->list); - dlm_lock_put(lock); - /* Can't schedule DLM_UNLOCK_FREE_LOCK - do manually */ - dlm_lock_put(lock); - freed++; - } - } - list_for_each_entry_safe(lock, next, &res->blocked, list) { - if (lock->ml.node == dead_node) { - list_del_init(&lock->list); - dlm_lock_put(lock); - /* Can't schedule DLM_UNLOCK_FREE_LOCK - do manually */ - dlm_lock_put(lock); - freed++; + for (i = DLM_GRANTED_LIST; i <= DLM_BLOCKED_LIST; i++) { + queue = dlm_list_idx_to_ptr(res, i); + list_for_each_entry_safe(lock, next, queue, list) { + if (lock->ml.node == dead_node) { + list_del_init(&lock->list); + dlm_lock_put(lock); + /* + * Can't schedule DLM_UNLOCK_FREE_LOCK: do + * manually + */ + dlm_lock_put(lock); + freed++; + } } } diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 77d1632e905d..8dce4099a6ca 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -532,6 +532,7 @@ void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res) init_waitqueue_head(&res->l_event); INIT_LIST_HEAD(&res->l_blocked_list); INIT_LIST_HEAD(&res->l_mask_waiters); + INIT_LIST_HEAD(&res->l_holders); } void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, @@ -749,6 +750,50 @@ void ocfs2_lock_res_free(struct ocfs2_lock_res *res) res->l_flags = 0UL; } +/* + * Keep a list of processes who have interest in a lockres. + * Note: this is now only uesed for check recursive cluster locking. + */ +static inline void ocfs2_add_holder(struct ocfs2_lock_res *lockres, + struct ocfs2_lock_holder *oh) +{ + INIT_LIST_HEAD(&oh->oh_list); + oh->oh_owner_pid = get_pid(task_pid(current)); + + spin_lock(&lockres->l_lock); + list_add_tail(&oh->oh_list, &lockres->l_holders); + spin_unlock(&lockres->l_lock); +} + +static inline void ocfs2_remove_holder(struct ocfs2_lock_res *lockres, + struct ocfs2_lock_holder *oh) +{ + spin_lock(&lockres->l_lock); + list_del(&oh->oh_list); + spin_unlock(&lockres->l_lock); + + put_pid(oh->oh_owner_pid); +} + +static inline int ocfs2_is_locked_by_me(struct ocfs2_lock_res *lockres) +{ + struct ocfs2_lock_holder *oh; + struct pid *pid; + + /* look in the list of holders for one with the current task as owner */ + spin_lock(&lockres->l_lock); + pid = task_pid(current); + list_for_each_entry(oh, &lockres->l_holders, oh_list) { + if (oh->oh_owner_pid == pid) { + spin_unlock(&lockres->l_lock); + return 1; + } + } + spin_unlock(&lockres->l_lock); + + return 0; +} + static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres, int level) { @@ -2333,8 +2378,9 @@ int ocfs2_inode_lock_full_nested(struct inode *inode, goto getbh; } - if (ocfs2_mount_local(osb)) - goto local; + if ((arg_flags & OCFS2_META_LOCK_GETBH) || + ocfs2_mount_local(osb)) + goto update; if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) ocfs2_wait_for_recovery(osb); @@ -2363,7 +2409,7 @@ int ocfs2_inode_lock_full_nested(struct inode *inode, if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) ocfs2_wait_for_recovery(osb); -local: +update: /* * We only see this flag if we're being called from * ocfs2_read_locked_inode(). It means we're locking an inode @@ -2497,6 +2543,59 @@ void ocfs2_inode_unlock(struct inode *inode, ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); } +/* + * This _tracker variantes are introduced to deal with the recursive cluster + * locking issue. The idea is to keep track of a lock holder on the stack of + * the current process. If there's a lock holder on the stack, we know the + * task context is already protected by cluster locking. Currently, they're + * used in some VFS entry routines. + * + * return < 0 on error, return == 0 if there's no lock holder on the stack + * before this call, return == 1 if this call would be a recursive locking. + */ +int ocfs2_inode_lock_tracker(struct inode *inode, + struct buffer_head **ret_bh, + int ex, + struct ocfs2_lock_holder *oh) +{ + int status; + int arg_flags = 0, has_locked; + struct ocfs2_lock_res *lockres; + + lockres = &OCFS2_I(inode)->ip_inode_lockres; + has_locked = ocfs2_is_locked_by_me(lockres); + /* Just get buffer head if the cluster lock has been taken */ + if (has_locked) + arg_flags = OCFS2_META_LOCK_GETBH; + + if (likely(!has_locked || ret_bh)) { + status = ocfs2_inode_lock_full(inode, ret_bh, ex, arg_flags); + if (status < 0) { + if (status != -ENOENT) + mlog_errno(status); + return status; + } + } + if (!has_locked) + ocfs2_add_holder(lockres, oh); + + return has_locked; +} + +void ocfs2_inode_unlock_tracker(struct inode *inode, + int ex, + struct ocfs2_lock_holder *oh, + int had_lock) +{ + struct ocfs2_lock_res *lockres; + + lockres = &OCFS2_I(inode)->ip_inode_lockres; + if (!had_lock) { + ocfs2_remove_holder(lockres, oh); + ocfs2_inode_unlock(inode, ex); + } +} + int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno) { struct ocfs2_lock_res *lockres; diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index d293a22c32c5..a7fc18ba0dc1 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h @@ -70,6 +70,11 @@ struct ocfs2_orphan_scan_lvb { __be32 lvb_os_seqno; }; +struct ocfs2_lock_holder { + struct list_head oh_list; + struct pid *oh_owner_pid; +}; + /* ocfs2_inode_lock_full() 'arg_flags' flags */ /* don't wait on recovery. */ #define OCFS2_META_LOCK_RECOVERY (0x01) @@ -77,6 +82,8 @@ struct ocfs2_orphan_scan_lvb { #define OCFS2_META_LOCK_NOQUEUE (0x02) /* don't block waiting for the downconvert thread, instead return -EAGAIN */ #define OCFS2_LOCK_NONBLOCK (0x04) +/* just get back disk inode bh if we've got cluster lock. */ +#define OCFS2_META_LOCK_GETBH (0x08) /* Locking subclasses of inode cluster lock */ enum { @@ -170,4 +177,15 @@ void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug); /* To set the locking protocol on module initialization */ void ocfs2_set_locking_protocol(void); + +/* The _tracker pair is used to avoid cluster recursive locking */ +int ocfs2_inode_lock_tracker(struct inode *inode, + struct buffer_head **ret_bh, + int ex, + struct ocfs2_lock_holder *oh); +void ocfs2_inode_unlock_tracker(struct inode *inode, + int ex, + struct ocfs2_lock_holder *oh, + int had_lock); + #endif /* DLMGLUE_H */ diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index c4889655d32b..7b6a146327d7 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1138,6 +1138,8 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) handle_t *handle = NULL; struct dquot *transfer_to[MAXQUOTAS] = { }; int qtype; + int had_lock; + struct ocfs2_lock_holder oh; trace_ocfs2_setattr(inode, dentry, (unsigned long long)OCFS2_I(inode)->ip_blkno, @@ -1173,11 +1175,30 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) } } - status = ocfs2_inode_lock(inode, &bh, 1); - if (status < 0) { - if (status != -ENOENT) - mlog_errno(status); + had_lock = ocfs2_inode_lock_tracker(inode, &bh, 1, &oh); + if (had_lock < 0) { + status = had_lock; goto bail_unlock_rw; + } else if (had_lock) { + /* + * As far as we know, ocfs2_setattr() could only be the first + * VFS entry point in the call chain of recursive cluster + * locking issue. + * + * For instance: + * chmod_common() + * notify_change() + * ocfs2_setattr() + * posix_acl_chmod() + * ocfs2_iop_get_acl() + * + * But, we're not 100% sure if it's always true, because the + * ordering of the VFS entry points in the call chain is out + * of our control. So, we'd better dump the stack here to + * catch the other cases of recursive locking. + */ + mlog(ML_ERROR, "Another case of recursive locking:\n"); + dump_stack(); } inode_locked = 1; @@ -1260,8 +1281,8 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) bail_commit: ocfs2_commit_trans(osb, handle); bail_unlock: - if (status) { - ocfs2_inode_unlock(inode, 1); + if (status && inode_locked) { + ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock); inode_locked = 0; } bail_unlock_rw: @@ -1279,7 +1300,7 @@ bail: mlog_errno(status); } if (inode_locked) - ocfs2_inode_unlock(inode, 1); + ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock); brelse(bh); return status; @@ -1320,21 +1341,32 @@ bail: int ocfs2_permission(struct inode *inode, int mask) { - int ret; + int ret, had_lock; + struct ocfs2_lock_holder oh; if (mask & MAY_NOT_BLOCK) return -ECHILD; - ret = ocfs2_inode_lock(inode, NULL, 0); - if (ret) { - if (ret != -ENOENT) - mlog_errno(ret); + had_lock = ocfs2_inode_lock_tracker(inode, NULL, 0, &oh); + if (had_lock < 0) { + ret = had_lock; goto out; + } else if (had_lock) { + /* See comments in ocfs2_setattr() for details. + * The call chain of this case could be: + * do_sys_open() + * may_open() + * inode_permission() + * ocfs2_permission() + * ocfs2_iop_get_acl() + */ + mlog(ML_ERROR, "Another case of recursive locking:\n"); + dump_stack(); } ret = generic_permission(inode, mask); - ocfs2_inode_unlock(inode, 0); + ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock); out: return ret; } diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 429088786e93..098f5c712569 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -44,17 +44,18 @@ #include "ocfs2_trace.h" -static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf) +static int ocfs2_fault(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; sigset_t oldset; int ret; ocfs2_block_signals(&oldset); - ret = filemap_fault(area, vmf); + ret = filemap_fault(vmf); ocfs2_unblock_signals(&oldset); - trace_ocfs2_fault(OCFS2_I(area->vm_file->f_mapping->host)->ip_blkno, - area, vmf->page, vmf->pgoff); + trace_ocfs2_fault(OCFS2_I(vma->vm_file->f_mapping->host)->ip_blkno, + vma, vmf->page, vmf->pgoff); return ret; } @@ -127,10 +128,10 @@ out: return ret; } -static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +static int ocfs2_page_mkwrite(struct vm_fault *vmf) { struct page *page = vmf->page; - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); struct buffer_head *di_bh = NULL; sigset_t oldset; int ret; @@ -160,7 +161,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) */ down_write(&OCFS2_I(inode)->ip_alloc_sem); - ret = __ocfs2_page_mkwrite(vma->vm_file, di_bh, page); + ret = __ocfs2_page_mkwrite(vmf->vma->vm_file, di_bh, page); up_write(&OCFS2_I(inode)->ip_alloc_sem); diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 7e5958b0be6b..0c39d71c67a1 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -172,6 +172,7 @@ struct ocfs2_lock_res { struct list_head l_blocked_list; struct list_head l_mask_waiters; + struct list_head l_holders; unsigned long l_flags; char l_name[OCFS2_LOCK_ID_MAX_LEN]; diff --git a/fs/proc/base.c b/fs/proc/base.c index b73b4de8fb36..4ecb5edc3c61 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -292,101 +292,69 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, } } else { /* - * Command line (1 string) occupies ARGV and maybe - * extends into ENVP. - */ - if (len1 + len2 <= *pos) - goto skip_argv_envp; - if (len1 <= *pos) - goto skip_argv; - - p = arg_start + *pos; - len = len1 - *pos; - while (count > 0 && len > 0) { - unsigned int _count, l; - int nr_read; - bool final; - - _count = min3(count, len, PAGE_SIZE); - nr_read = access_remote_vm(mm, p, page, _count, 0); - if (nr_read < 0) - rv = nr_read; - if (nr_read <= 0) - goto out_free_page; - - /* - * Command line can be shorter than whole ARGV - * even if last "marker" byte says it is not. - */ - final = false; - l = strnlen(page, nr_read); - if (l < nr_read) { - nr_read = l; - final = true; - } - - if (copy_to_user(buf, page, nr_read)) { - rv = -EFAULT; - goto out_free_page; - } - - p += nr_read; - len -= nr_read; - buf += nr_read; - count -= nr_read; - rv += nr_read; - - if (final) - goto out_free_page; - } -skip_argv: - /* * Command line (1 string) occupies ARGV and * extends into ENVP. */ - if (len1 <= *pos) { - p = env_start + *pos - len1; - len = len1 + len2 - *pos; - } else { - p = env_start; - len = len2; + struct { + unsigned long p; + unsigned long len; + } cmdline[2] = { + { .p = arg_start, .len = len1 }, + { .p = env_start, .len = len2 }, + }; + loff_t pos1 = *pos; + unsigned int i; + + i = 0; + while (i < 2 && pos1 >= cmdline[i].len) { + pos1 -= cmdline[i].len; + i++; } - while (count > 0 && len > 0) { - unsigned int _count, l; - int nr_read; - bool final; - - _count = min3(count, len, PAGE_SIZE); - nr_read = access_remote_vm(mm, p, page, _count, 0); - if (nr_read < 0) - rv = nr_read; - if (nr_read <= 0) - goto out_free_page; - - /* Find EOS. */ - final = false; - l = strnlen(page, nr_read); - if (l < nr_read) { - nr_read = l; - final = true; - } - - if (copy_to_user(buf, page, nr_read)) { - rv = -EFAULT; - goto out_free_page; + while (i < 2) { + p = cmdline[i].p + pos1; + len = cmdline[i].len - pos1; + while (count > 0 && len > 0) { + unsigned int _count, l; + int nr_read; + bool final; + + _count = min3(count, len, PAGE_SIZE); + nr_read = access_remote_vm(mm, p, page, _count, 0); + if (nr_read < 0) + rv = nr_read; + if (nr_read <= 0) + goto out_free_page; + + /* + * Command line can be shorter than whole ARGV + * even if last "marker" byte says it is not. + */ + final = false; + l = strnlen(page, nr_read); + if (l < nr_read) { + nr_read = l; + final = true; + } + + if (copy_to_user(buf, page, nr_read)) { + rv = -EFAULT; + goto out_free_page; + } + + p += nr_read; + len -= nr_read; + buf += nr_read; + count -= nr_read; + rv += nr_read; + + if (final) + goto out_free_page; } - p += nr_read; - len -= nr_read; - buf += nr_read; - count -= nr_read; - rv += nr_read; - - if (final) - goto out_free_page; + /* Only first chunk can be read partially. */ + pos1 = 0; + i++; } -skip_argv_envp: - ; } out_free_page: diff --git a/fs/proc/generic.c b/fs/proc/generic.c index f6a01f09f79d..06c73904d497 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -57,9 +57,9 @@ static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir, struct rb_node *node = dir->subdir.rb_node; while (node) { - struct proc_dir_entry *de = container_of(node, - struct proc_dir_entry, - subdir_node); + struct proc_dir_entry *de = rb_entry(node, + struct proc_dir_entry, + subdir_node); int result = proc_match(len, name, de); if (result < 0) @@ -80,8 +80,9 @@ static bool pde_subdir_insert(struct proc_dir_entry *dir, /* Figure out where to put new node */ while (*new) { - struct proc_dir_entry *this = - container_of(*new, struct proc_dir_entry, subdir_node); + struct proc_dir_entry *this = rb_entry(*new, + struct proc_dir_entry, + subdir_node); int result = proc_match(de->namelen, de->name, this); parent = *new; diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 5105b1599981..f52d8e857ff7 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -265,10 +265,10 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, * On s390 the fault handler is used for memory regions that can't be mapped * directly with remap_pfn_range(). */ -static int mmap_vmcore_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int mmap_vmcore_fault(struct vm_fault *vmf) { #ifdef CONFIG_S390 - struct address_space *mapping = vma->vm_file->f_mapping; + struct address_space *mapping = vmf->vma->vm_file->f_mapping; pgoff_t index = vmf->pgoff; struct page *page; loff_t offset; diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 729677e18e36..85c4c58a3409 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -342,31 +342,31 @@ static int compress_lz4(const void *in, void *out, size_t inlen, size_t outlen) { int ret; - ret = lz4_compress(in, inlen, out, &outlen, workspace); - if (ret) { + ret = LZ4_compress_default(in, out, inlen, outlen, workspace); + if (!ret) { pr_err("lz4_compress error, ret = %d!\n", ret); return -EIO; } - return outlen; + return ret; } static int decompress_lz4(void *in, void *out, size_t inlen, size_t outlen) { int ret; - ret = lz4_decompress_unknownoutputsize(in, inlen, out, &outlen); - if (ret) { + ret = LZ4_decompress_safe(in, out, inlen, outlen); + if (ret < 0) { pr_err("lz4_decompress error, ret = %d!\n", ret); return -EIO; } - return outlen; + return ret; } static void allocate_lz4(void) { - big_oops_buf_sz = lz4_compressbound(psinfo->bufsize); + big_oops_buf_sz = LZ4_compressBound(psinfo->bufsize); big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL); if (big_oops_buf) { workspace = kmalloc(LZ4_MEM_COMPRESS, GFP_KERNEL); diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index e314cb30a181..feabcde0290d 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1166,7 +1166,7 @@ static int reiserfs_parse_options(struct super_block *s, if (!strcmp(arg, "auto")) { /* From JFS code, to auto-get the size. */ *blocks = - s->s_bdev->bd_inode->i_size >> s-> + i_size_read(s->s_bdev->bd_inode) >> s-> s_blocksize_bits; } else { *blocks = simple_strtoul(arg, &p, 0); diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c index ff4468bd18b0..95da65366548 100644 --- a/fs/squashfs/lz4_wrapper.c +++ b/fs/squashfs/lz4_wrapper.c @@ -97,7 +97,6 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm, struct squashfs_lz4 *stream = strm; void *buff = stream->input, *data; int avail, i, bytes = length, res; - size_t dest_len = output->length; for (i = 0; i < b; i++) { avail = min(bytes, msblk->devblksize - offset); @@ -108,12 +107,13 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm, put_bh(bh[i]); } - res = lz4_decompress_unknownoutputsize(stream->input, length, - stream->output, &dest_len); - if (res) + res = LZ4_decompress_safe(stream->input, stream->output, + length, output->length); + + if (res < 0) return -EIO; - bytes = dest_len; + bytes = res; data = squashfs_first_page(output); buff = stream->output; while (data) { @@ -128,7 +128,7 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm, } squashfs_finish_page(output); - return dest_len; + return res; } const struct squashfs_decompressor squashfs_lz4_comp_ops = { diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index b0d783774c96..d9ae86f96df7 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1506,11 +1506,10 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags) * mmap()d file has taken write protection fault and is being made writable. * UBIFS must ensure page is budgeted for. */ -static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, - struct vm_fault *vmf) +static int ubifs_vm_page_mkwrite(struct vm_fault *vmf) { struct page *page = vmf->page; - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); struct ubifs_info *c = inode->i_sb->s_fs_info; struct timespec now = ubifs_current_time(inode); struct ubifs_budget_req req = { .new_page = 1 }; diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 43953e03c356..8fe601b4875e 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -12,6 +12,7 @@ * mm/ksm.c (mm hashing). */ +#include <linux/list.h> #include <linux/hashtable.h> #include <linux/sched.h> #include <linux/mm.h> @@ -26,6 +27,7 @@ #include <linux/mempolicy.h> #include <linux/ioctl.h> #include <linux/security.h> +#include <linux/hugetlb.h> static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly; @@ -45,12 +47,16 @@ struct userfaultfd_ctx { wait_queue_head_t fault_wqh; /* waitqueue head for the pseudo fd to wakeup poll/read */ wait_queue_head_t fd_wqh; + /* waitqueue head for events */ + wait_queue_head_t event_wqh; /* a refile sequence protected by fault_pending_wqh lock */ struct seqcount refile_seq; /* pseudo fd refcounting */ atomic_t refcount; /* userfaultfd syscall flags */ unsigned int flags; + /* features requested from the userspace */ + unsigned int features; /* state machine */ enum userfaultfd_state state; /* released */ @@ -59,6 +65,12 @@ struct userfaultfd_ctx { struct mm_struct *mm; }; +struct userfaultfd_fork_ctx { + struct userfaultfd_ctx *orig; + struct userfaultfd_ctx *new; + struct list_head list; +}; + struct userfaultfd_wait_queue { struct uffd_msg msg; wait_queue_t wq; @@ -142,6 +154,8 @@ static void userfaultfd_ctx_put(struct userfaultfd_ctx *ctx) VM_BUG_ON(waitqueue_active(&ctx->fault_pending_wqh)); VM_BUG_ON(spin_is_locked(&ctx->fault_wqh.lock)); VM_BUG_ON(waitqueue_active(&ctx->fault_wqh)); + VM_BUG_ON(spin_is_locked(&ctx->event_wqh.lock)); + VM_BUG_ON(waitqueue_active(&ctx->event_wqh)); VM_BUG_ON(spin_is_locked(&ctx->fd_wqh.lock)); VM_BUG_ON(waitqueue_active(&ctx->fd_wqh)); mmdrop(ctx->mm); @@ -169,7 +183,7 @@ static inline struct uffd_msg userfault_msg(unsigned long address, msg.arg.pagefault.address = address; if (flags & FAULT_FLAG_WRITE) /* - * If UFFD_FEATURE_PAGEFAULT_FLAG_WRITE was set in the + * If UFFD_FEATURE_PAGEFAULT_FLAG_WP was set in the * uffdio_api.features and UFFD_PAGEFAULT_FLAG_WRITE * was not set in a UFFD_EVENT_PAGEFAULT, it means it * was a read fault, otherwise if set it means it's @@ -188,6 +202,49 @@ static inline struct uffd_msg userfault_msg(unsigned long address, return msg; } +#ifdef CONFIG_HUGETLB_PAGE +/* + * Same functionality as userfaultfd_must_wait below with modifications for + * hugepmd ranges. + */ +static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx, + unsigned long address, + unsigned long flags, + unsigned long reason) +{ + struct mm_struct *mm = ctx->mm; + pte_t *pte; + bool ret = true; + + VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); + + pte = huge_pte_offset(mm, address); + if (!pte) + goto out; + + ret = false; + + /* + * Lockless access: we're in a wait_event so it's ok if it + * changes under us. + */ + if (huge_pte_none(*pte)) + ret = true; + if (!huge_pte_write(*pte) && (reason & VM_UFFD_WP)) + ret = true; +out: + return ret; +} +#else +static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx, + unsigned long address, + unsigned long flags, + unsigned long reason) +{ + return false; /* should never get here */ +} +#endif /* CONFIG_HUGETLB_PAGE */ + /* * Verify the pagetables are still not ok after having reigstered into * the fault_pending_wqh to avoid userland having to UFFDIO_WAKE any @@ -364,8 +421,12 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) set_current_state(blocking_state); spin_unlock(&ctx->fault_pending_wqh.lock); - must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags, - reason); + if (!is_vm_hugetlb_page(vmf->vma)) + must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags, + reason); + else + must_wait = userfaultfd_huge_must_wait(ctx, vmf->address, + vmf->flags, reason); up_read(&mm->mmap_sem); if (likely(must_wait && !ACCESS_ONCE(ctx->released) && @@ -458,6 +519,196 @@ out: return ret; } +static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, + struct userfaultfd_wait_queue *ewq) +{ + int ret = 0; + + ewq->ctx = ctx; + init_waitqueue_entry(&ewq->wq, current); + + spin_lock(&ctx->event_wqh.lock); + /* + * After the __add_wait_queue the uwq is visible to userland + * through poll/read(). + */ + __add_wait_queue(&ctx->event_wqh, &ewq->wq); + for (;;) { + set_current_state(TASK_KILLABLE); + if (ewq->msg.event == 0) + break; + if (ACCESS_ONCE(ctx->released) || + fatal_signal_pending(current)) { + ret = -1; + __remove_wait_queue(&ctx->event_wqh, &ewq->wq); + break; + } + + spin_unlock(&ctx->event_wqh.lock); + + wake_up_poll(&ctx->fd_wqh, POLLIN); + schedule(); + + spin_lock(&ctx->event_wqh.lock); + } + __set_current_state(TASK_RUNNING); + spin_unlock(&ctx->event_wqh.lock); + + /* + * ctx may go away after this if the userfault pseudo fd is + * already released. + */ + + userfaultfd_ctx_put(ctx); + return ret; +} + +static void userfaultfd_event_complete(struct userfaultfd_ctx *ctx, + struct userfaultfd_wait_queue *ewq) +{ + ewq->msg.event = 0; + wake_up_locked(&ctx->event_wqh); + __remove_wait_queue(&ctx->event_wqh, &ewq->wq); +} + +int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs) +{ + struct userfaultfd_ctx *ctx = NULL, *octx; + struct userfaultfd_fork_ctx *fctx; + + octx = vma->vm_userfaultfd_ctx.ctx; + if (!octx || !(octx->features & UFFD_FEATURE_EVENT_FORK)) { + vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; + vma->vm_flags &= ~(VM_UFFD_WP | VM_UFFD_MISSING); + return 0; + } + + list_for_each_entry(fctx, fcs, list) + if (fctx->orig == octx) { + ctx = fctx->new; + break; + } + + if (!ctx) { + fctx = kmalloc(sizeof(*fctx), GFP_KERNEL); + if (!fctx) + return -ENOMEM; + + ctx = kmem_cache_alloc(userfaultfd_ctx_cachep, GFP_KERNEL); + if (!ctx) { + kfree(fctx); + return -ENOMEM; + } + + atomic_set(&ctx->refcount, 1); + ctx->flags = octx->flags; + ctx->state = UFFD_STATE_RUNNING; + ctx->features = octx->features; + ctx->released = false; + ctx->mm = vma->vm_mm; + atomic_inc(&ctx->mm->mm_count); + + userfaultfd_ctx_get(octx); + fctx->orig = octx; + fctx->new = ctx; + list_add_tail(&fctx->list, fcs); + } + + vma->vm_userfaultfd_ctx.ctx = ctx; + return 0; +} + +static int dup_fctx(struct userfaultfd_fork_ctx *fctx) +{ + struct userfaultfd_ctx *ctx = fctx->orig; + struct userfaultfd_wait_queue ewq; + + msg_init(&ewq.msg); + + ewq.msg.event = UFFD_EVENT_FORK; + ewq.msg.arg.reserved.reserved1 = (unsigned long)fctx->new; + + return userfaultfd_event_wait_completion(ctx, &ewq); +} + +void dup_userfaultfd_complete(struct list_head *fcs) +{ + int ret = 0; + struct userfaultfd_fork_ctx *fctx, *n; + + list_for_each_entry_safe(fctx, n, fcs, list) { + if (!ret) + ret = dup_fctx(fctx); + list_del(&fctx->list); + kfree(fctx); + } +} + +void mremap_userfaultfd_prep(struct vm_area_struct *vma, + struct vm_userfaultfd_ctx *vm_ctx) +{ + struct userfaultfd_ctx *ctx; + + ctx = vma->vm_userfaultfd_ctx.ctx; + if (ctx && (ctx->features & UFFD_FEATURE_EVENT_REMAP)) { + vm_ctx->ctx = ctx; + userfaultfd_ctx_get(ctx); + } +} + +void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *vm_ctx, + unsigned long from, unsigned long to, + unsigned long len) +{ + struct userfaultfd_ctx *ctx = vm_ctx->ctx; + struct userfaultfd_wait_queue ewq; + + if (!ctx) + return; + + if (to & ~PAGE_MASK) { + userfaultfd_ctx_put(ctx); + return; + } + + msg_init(&ewq.msg); + + ewq.msg.event = UFFD_EVENT_REMAP; + ewq.msg.arg.remap.from = from; + ewq.msg.arg.remap.to = to; + ewq.msg.arg.remap.len = len; + + userfaultfd_event_wait_completion(ctx, &ewq); +} + +void userfaultfd_remove(struct vm_area_struct *vma, + struct vm_area_struct **prev, + unsigned long start, unsigned long end) +{ + struct mm_struct *mm = vma->vm_mm; + struct userfaultfd_ctx *ctx; + struct userfaultfd_wait_queue ewq; + + ctx = vma->vm_userfaultfd_ctx.ctx; + if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_REMOVE)) + return; + + userfaultfd_ctx_get(ctx); + up_read(&mm->mmap_sem); + + *prev = NULL; /* We wait for ACK w/o the mmap semaphore */ + + msg_init(&ewq.msg); + + ewq.msg.event = UFFD_EVENT_REMOVE; + ewq.msg.arg.remove.start = start; + ewq.msg.arg.remove.end = end; + + userfaultfd_event_wait_completion(ctx, &ewq); + + down_read(&mm->mmap_sem); +} + static int userfaultfd_release(struct inode *inode, struct file *file) { struct userfaultfd_ctx *ctx = file->private_data; @@ -522,25 +773,36 @@ wakeup: } /* fault_pending_wqh.lock must be hold by the caller */ -static inline struct userfaultfd_wait_queue *find_userfault( - struct userfaultfd_ctx *ctx) +static inline struct userfaultfd_wait_queue *find_userfault_in( + wait_queue_head_t *wqh) { wait_queue_t *wq; struct userfaultfd_wait_queue *uwq; - VM_BUG_ON(!spin_is_locked(&ctx->fault_pending_wqh.lock)); + VM_BUG_ON(!spin_is_locked(&wqh->lock)); uwq = NULL; - if (!waitqueue_active(&ctx->fault_pending_wqh)) + if (!waitqueue_active(wqh)) goto out; /* walk in reverse to provide FIFO behavior to read userfaults */ - wq = list_last_entry(&ctx->fault_pending_wqh.task_list, - typeof(*wq), task_list); + wq = list_last_entry(&wqh->task_list, typeof(*wq), task_list); uwq = container_of(wq, struct userfaultfd_wait_queue, wq); out: return uwq; } +static inline struct userfaultfd_wait_queue *find_userfault( + struct userfaultfd_ctx *ctx) +{ + return find_userfault_in(&ctx->fault_pending_wqh); +} + +static inline struct userfaultfd_wait_queue *find_userfault_evt( + struct userfaultfd_ctx *ctx) +{ + return find_userfault_in(&ctx->event_wqh); +} + static unsigned int userfaultfd_poll(struct file *file, poll_table *wait) { struct userfaultfd_ctx *ctx = file->private_data; @@ -572,10 +834,42 @@ static unsigned int userfaultfd_poll(struct file *file, poll_table *wait) smp_mb(); if (waitqueue_active(&ctx->fault_pending_wqh)) ret = POLLIN; + else if (waitqueue_active(&ctx->event_wqh)) + ret = POLLIN; + return ret; default: - BUG(); + WARN_ON_ONCE(1); + return POLLERR; + } +} + +static const struct file_operations userfaultfd_fops; + +static int resolve_userfault_fork(struct userfaultfd_ctx *ctx, + struct userfaultfd_ctx *new, + struct uffd_msg *msg) +{ + int fd; + struct file *file; + unsigned int flags = new->flags & UFFD_SHARED_FCNTL_FLAGS; + + fd = get_unused_fd_flags(flags); + if (fd < 0) + return fd; + + file = anon_inode_getfile("[userfaultfd]", &userfaultfd_fops, new, + O_RDWR | flags); + if (IS_ERR(file)) { + put_unused_fd(fd); + return PTR_ERR(file); } + + fd_install(fd, file); + msg->arg.reserved.reserved1 = 0; + msg->arg.fork.ufd = fd; + + return 0; } static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait, @@ -584,6 +878,15 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait, ssize_t ret; DECLARE_WAITQUEUE(wait, current); struct userfaultfd_wait_queue *uwq; + /* + * Handling fork event requires sleeping operations, so + * we drop the event_wqh lock, then do these ops, then + * lock it back and wake up the waiter. While the lock is + * dropped the ewq may go away so we keep track of it + * carefully. + */ + LIST_HEAD(fork_event); + struct userfaultfd_ctx *fork_nctx = NULL; /* always take the fd_wqh lock before the fault_pending_wqh lock */ spin_lock(&ctx->fd_wqh.lock); @@ -635,6 +938,29 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait, break; } spin_unlock(&ctx->fault_pending_wqh.lock); + + spin_lock(&ctx->event_wqh.lock); + uwq = find_userfault_evt(ctx); + if (uwq) { + *msg = uwq->msg; + + if (uwq->msg.event == UFFD_EVENT_FORK) { + fork_nctx = (struct userfaultfd_ctx *) + (unsigned long) + uwq->msg.arg.reserved.reserved1; + list_move(&uwq->wq.task_list, &fork_event); + spin_unlock(&ctx->event_wqh.lock); + ret = 0; + break; + } + + userfaultfd_event_complete(ctx, uwq); + spin_unlock(&ctx->event_wqh.lock); + ret = 0; + break; + } + spin_unlock(&ctx->event_wqh.lock); + if (signal_pending(current)) { ret = -ERESTARTSYS; break; @@ -651,6 +977,23 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait, __set_current_state(TASK_RUNNING); spin_unlock(&ctx->fd_wqh.lock); + if (!ret && msg->event == UFFD_EVENT_FORK) { + ret = resolve_userfault_fork(ctx, fork_nctx, msg); + + if (!ret) { + spin_lock(&ctx->event_wqh.lock); + if (!list_empty(&fork_event)) { + uwq = list_first_entry(&fork_event, + typeof(*uwq), + wq.task_list); + list_del(&uwq->wq.task_list); + __add_wait_queue(&ctx->event_wqh, &uwq->wq); + userfaultfd_event_complete(ctx, uwq); + } + spin_unlock(&ctx->event_wqh.lock); + } + } + return ret; } @@ -753,6 +1096,12 @@ static __always_inline int validate_range(struct mm_struct *mm, return 0; } +static inline bool vma_can_userfault(struct vm_area_struct *vma) +{ + return vma_is_anonymous(vma) || is_vm_hugetlb_page(vma) || + vma_is_shmem(vma); +} + static int userfaultfd_register(struct userfaultfd_ctx *ctx, unsigned long arg) { @@ -763,6 +1112,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, struct uffdio_register __user *user_uffdio_register; unsigned long vm_flags, new_flags; bool found; + bool non_anon_pages; unsigned long start, end, vma_end; user_uffdio_register = (struct uffdio_register __user *) arg; @@ -814,13 +1164,21 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, goto out_unlock; /* + * If the first vma contains huge pages, make sure start address + * is aligned to huge page size. + */ + if (is_vm_hugetlb_page(vma)) { + unsigned long vma_hpagesize = vma_kernel_pagesize(vma); + + if (start & (vma_hpagesize - 1)) + goto out_unlock; + } + + /* * Search for not compatible vmas. - * - * FIXME: this shall be relaxed later so that it doesn't fail - * on tmpfs backed vmas (in addition to the current allowance - * on anonymous vmas). */ found = false; + non_anon_pages = false; for (cur = vma; cur && cur->vm_start < end; cur = cur->vm_next) { cond_resched(); @@ -829,8 +1187,21 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, /* check not compatible vmas */ ret = -EINVAL; - if (cur->vm_ops) + if (!vma_can_userfault(cur)) goto out_unlock; + /* + * If this vma contains ending address, and huge pages + * check alignment. + */ + if (is_vm_hugetlb_page(cur) && end <= cur->vm_end && + end > cur->vm_start) { + unsigned long vma_hpagesize = vma_kernel_pagesize(cur); + + ret = -EINVAL; + + if (end & (vma_hpagesize - 1)) + goto out_unlock; + } /* * Check that this vma isn't already owned by a @@ -843,6 +1214,12 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, cur->vm_userfaultfd_ctx.ctx != ctx) goto out_unlock; + /* + * Note vmas containing huge pages + */ + if (is_vm_hugetlb_page(cur) || vma_is_shmem(cur)) + non_anon_pages = true; + found = true; } BUG_ON(!found); @@ -854,7 +1231,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, do { cond_resched(); - BUG_ON(vma->vm_ops); + BUG_ON(!vma_can_userfault(vma)); BUG_ON(vma->vm_userfaultfd_ctx.ctx && vma->vm_userfaultfd_ctx.ctx != ctx); @@ -912,7 +1289,8 @@ out_unlock: * userland which ioctls methods are guaranteed to * succeed on this range. */ - if (put_user(UFFD_API_RANGE_IOCTLS, + if (put_user(non_anon_pages ? UFFD_API_RANGE_IOCTLS_BASIC : + UFFD_API_RANGE_IOCTLS, &user_uffdio_register->ioctls)) ret = -EFAULT; } @@ -959,11 +1337,18 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, goto out_unlock; /* + * If the first vma contains huge pages, make sure start address + * is aligned to huge page size. + */ + if (is_vm_hugetlb_page(vma)) { + unsigned long vma_hpagesize = vma_kernel_pagesize(vma); + + if (start & (vma_hpagesize - 1)) + goto out_unlock; + } + + /* * Search for not compatible vmas. - * - * FIXME: this shall be relaxed later so that it doesn't fail - * on tmpfs backed vmas (in addition to the current allowance - * on anonymous vmas). */ found = false; ret = -EINVAL; @@ -980,7 +1365,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, * provides for more strict behavior to notice * unregistration errors. */ - if (cur->vm_ops) + if (!vma_can_userfault(cur)) goto out_unlock; found = true; @@ -994,7 +1379,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, do { cond_resched(); - BUG_ON(vma->vm_ops); + BUG_ON(!vma_can_userfault(vma)); /* * Nothing to do: this vma is already registered into this @@ -1007,6 +1392,19 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, start = vma->vm_start; vma_end = min(end, vma->vm_end); + if (userfaultfd_missing(vma)) { + /* + * Wake any concurrent pending userfault while + * we unregister, so they will not hang + * permanently and it avoids userland to call + * UFFDIO_WAKE explicitly. + */ + struct userfaultfd_wake_range range; + range.start = start; + range.len = vma_end - start; + wake_userfault(vma->vm_userfaultfd_ctx.ctx, &range); + } + new_flags = vma->vm_flags & ~(VM_UFFD_MISSING | VM_UFFD_WP); prev = vma_merge(mm, prev, start, vma_end, new_flags, vma->anon_vma, vma->vm_file, vma->vm_pgoff, @@ -1178,6 +1576,14 @@ out: return ret; } +static inline unsigned int uffd_ctx_features(__u64 user_features) +{ + /* + * For the current set of features the bits just coincide + */ + return (unsigned int)user_features; +} + /* * userland asks for a certain API version and we return which bits * and ioctl commands are implemented in this kernel for such API @@ -1189,6 +1595,7 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx, struct uffdio_api uffdio_api; void __user *buf = (void __user *)arg; int ret; + __u64 features; ret = -EINVAL; if (ctx->state != UFFD_STATE_WAIT_API) @@ -1196,19 +1603,23 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx, ret = -EFAULT; if (copy_from_user(&uffdio_api, buf, sizeof(uffdio_api))) goto out; - if (uffdio_api.api != UFFD_API || uffdio_api.features) { + features = uffdio_api.features; + if (uffdio_api.api != UFFD_API || (features & ~UFFD_API_FEATURES)) { memset(&uffdio_api, 0, sizeof(uffdio_api)); if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api))) goto out; ret = -EINVAL; goto out; } + /* report all available features and ioctls to userland */ uffdio_api.features = UFFD_API_FEATURES; uffdio_api.ioctls = UFFD_API_IOCTLS; ret = -EFAULT; if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api))) goto out; ctx->state = UFFD_STATE_RUNNING; + /* only enable the requested features for this uffd context */ + ctx->features = uffd_ctx_features(features); ret = 0; out: return ret; @@ -1295,6 +1706,7 @@ static void init_once_userfaultfd_ctx(void *mem) init_waitqueue_head(&ctx->fault_pending_wqh); init_waitqueue_head(&ctx->fault_wqh); + init_waitqueue_head(&ctx->event_wqh); init_waitqueue_head(&ctx->fd_wqh); seqcount_init(&ctx->refile_seq); } @@ -1335,6 +1747,7 @@ static struct file *userfaultfd_file_create(int flags) atomic_set(&ctx->refcount, 1); ctx->flags = flags; + ctx->features = 0; ctx->state = UFFD_STATE_WAIT_API; ctx->released = false; ctx->mm = current->mm; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index bbb9eb6811b2..f1849817d237 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1373,22 +1373,21 @@ xfs_file_llseek( */ STATIC int xfs_filemap_page_mkwrite( - struct vm_area_struct *vma, struct vm_fault *vmf) { - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); int ret; trace_xfs_filemap_page_mkwrite(XFS_I(inode)); sb_start_pagefault(inode->i_sb); - file_update_time(vma->vm_file); + file_update_time(vmf->vma->vm_file); xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); if (IS_DAX(inode)) { - ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops); + ret = dax_iomap_fault(vmf, &xfs_iomap_ops); } else { - ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops); + ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops); ret = block_page_mkwrite_return(ret); } @@ -1400,23 +1399,22 @@ xfs_filemap_page_mkwrite( STATIC int xfs_filemap_fault( - struct vm_area_struct *vma, struct vm_fault *vmf) { - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); int ret; trace_xfs_filemap_fault(XFS_I(inode)); /* DAX can shortcut the normal fault path on write faults! */ if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(inode)) - return xfs_filemap_page_mkwrite(vma, vmf); + return xfs_filemap_page_mkwrite(vmf); xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); if (IS_DAX(inode)) - ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops); + ret = dax_iomap_fault(vmf, &xfs_iomap_ops); else - ret = filemap_fault(vma, vmf); + ret = filemap_fault(vmf); xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); return ret; @@ -1431,12 +1429,9 @@ xfs_filemap_fault( */ STATIC int xfs_filemap_pmd_fault( - struct vm_area_struct *vma, - unsigned long addr, - pmd_t *pmd, - unsigned int flags) + struct vm_fault *vmf) { - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); struct xfs_inode *ip = XFS_I(inode); int ret; @@ -1445,16 +1440,16 @@ xfs_filemap_pmd_fault( trace_xfs_filemap_pmd_fault(ip); - if (flags & FAULT_FLAG_WRITE) { + if (vmf->flags & FAULT_FLAG_WRITE) { sb_start_pagefault(inode->i_sb); - file_update_time(vma->vm_file); + file_update_time(vmf->vma->vm_file); } xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); - ret = dax_iomap_pmd_fault(vma, addr, pmd, flags, &xfs_iomap_ops); + ret = dax_iomap_pmd_fault(vmf, &xfs_iomap_ops); xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); - if (flags & FAULT_FLAG_WRITE) + if (vmf->flags & FAULT_FLAG_WRITE) sb_end_pagefault(inode->i_sb); return ret; @@ -1468,11 +1463,10 @@ xfs_filemap_pmd_fault( */ static int xfs_filemap_pfn_mkwrite( - struct vm_area_struct *vma, struct vm_fault *vmf) { - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); struct xfs_inode *ip = XFS_I(inode); int ret = VM_FAULT_NOPAGE; loff_t size; @@ -1480,7 +1474,7 @@ xfs_filemap_pfn_mkwrite( trace_xfs_filemap_pfn_mkwrite(ip); sb_start_pagefault(inode->i_sb); - file_update_time(vma->vm_file); + file_update_time(vmf->vma->vm_file); /* check if the faulting page hasn't raced with truncate */ xfs_ilock(ip, XFS_MMAPLOCK_SHARED); @@ -1488,7 +1482,7 @@ xfs_filemap_pfn_mkwrite( if (vmf->pgoff >= size) ret = VM_FAULT_SIGBUS; else if (IS_DAX(inode)) - ret = dax_pfn_mkwrite(vma, vmf); + ret = dax_pfn_mkwrite(vmf); xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); sb_end_pagefault(inode->i_sb); return ret; diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index fddd1a5eb322..c5dde8f3d622 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -122,6 +122,7 @@ #define __attribute_const__ __attribute__((__const__)) #define __maybe_unused __attribute__((unused)) #define __always_unused __attribute__((unused)) +#define __mode __attribute__((mode(x))) /* gcc version specific checks */ diff --git a/include/linux/dax.h b/include/linux/dax.h index 24ad71173995..44177005262d 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -38,8 +38,7 @@ static inline void *dax_radix_locked_entry(sector_t sector, unsigned long flags) ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, struct iomap_ops *ops); -int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, - struct iomap_ops *ops); +int dax_iomap_fault(struct vm_fault *vmf, struct iomap_ops *ops); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_invalidate_mapping_entry_sync(struct address_space *mapping, @@ -71,21 +70,19 @@ static inline unsigned int dax_radix_order(void *entry) return PMD_SHIFT - PAGE_SHIFT; return 0; } -int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, - pmd_t *pmd, unsigned int flags, struct iomap_ops *ops); +int dax_iomap_pmd_fault(struct vm_fault *vmf, struct iomap_ops *ops); #else static inline unsigned int dax_radix_order(void *entry) { return 0; } -static inline int dax_iomap_pmd_fault(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmd, unsigned int flags, +static inline int dax_iomap_pmd_fault(struct vm_fault *vmf, struct iomap_ops *ops) { return VM_FAULT_FALLBACK; } #endif -int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *); +int dax_pfn_mkwrite(struct vm_fault *vmf); static inline bool vma_is_dax(struct vm_area_struct *vma) { diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 97e478d6b690..f0029e786205 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -33,6 +33,7 @@ enum transparent_hugepage_flag { TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, + TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG, TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG, diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 48c76d612d40..503099d8aada 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -65,7 +65,8 @@ int hugetlb_mempolicy_sysctl_handler(struct ctl_table *, int, int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, - unsigned long *, unsigned long *, long, unsigned int); + unsigned long *, unsigned long *, long, unsigned int, + int *); void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long, struct page *); void __unmap_hugepage_range_final(struct mmu_gather *tlb, @@ -81,6 +82,11 @@ void hugetlb_show_meminfo(void); unsigned long hugetlb_total_pages(void); int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags); +int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte, + struct vm_area_struct *dst_vma, + unsigned long dst_addr, + unsigned long src_addr, + struct page **pagep); int hugetlb_reserve_pages(struct inode *inode, long from, long to, struct vm_area_struct *vma, vm_flags_t vm_flags); @@ -131,7 +137,7 @@ static inline unsigned long hugetlb_total_pages(void) return 0; } -#define follow_hugetlb_page(m,v,p,vs,a,b,i,w) ({ BUG(); 0; }) +#define follow_hugetlb_page(m,v,p,vs,a,b,i,w,n) ({ BUG(); 0; }) #define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL) #define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; }) static inline void hugetlb_report_meminfo(struct seq_file *m) @@ -149,6 +155,8 @@ static inline void hugetlb_show_meminfo(void) #define is_hugepage_only_range(mm, addr, len) 0 #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; }) #define hugetlb_fault(mm, vma, addr, flags) ({ BUG(); 0; }) +#define hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \ + src_addr, pagep) ({ BUG(); 0; }) #define huge_pte_offset(mm, address) 0 static inline int dequeue_hwpoisoned_huge_page(struct page *page) { diff --git a/include/linux/iomap.h b/include/linux/iomap.h index a4c94b86401e..857e440af9d5 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -79,8 +79,7 @@ int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, struct iomap_ops *ops); int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, struct iomap_ops *ops); -int iomap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, - struct iomap_ops *ops); +int iomap_page_mkwrite(struct vm_fault *vmf, struct iomap_ops *ops); int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, loff_t start, loff_t len, struct iomap_ops *ops); diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h index 1c30014ed176..d29e1e21bf3f 100644 --- a/include/linux/iopoll.h +++ b/include/linux/iopoll.h @@ -17,7 +17,7 @@ #include <linux/kernel.h> #include <linux/types.h> -#include <linux/hrtimer.h> +#include <linux/ktime.h> #include <linux/delay.h> #include <linux/errno.h> #include <linux/io.h> diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 820c0ad54a01..c908b25bf5a5 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -52,7 +52,7 @@ void kasan_free_pages(struct page *page, unsigned int order); void kasan_cache_create(struct kmem_cache *cache, size_t *size, unsigned long *flags); void kasan_cache_shrink(struct kmem_cache *cache); -void kasan_cache_destroy(struct kmem_cache *cache); +void kasan_cache_shutdown(struct kmem_cache *cache); void kasan_poison_slab(struct page *page); void kasan_unpoison_object_data(struct kmem_cache *cache, void *object); @@ -98,7 +98,7 @@ static inline void kasan_cache_create(struct kmem_cache *cache, size_t *size, unsigned long *flags) {} static inline void kasan_cache_shrink(struct kmem_cache *cache) {} -static inline void kasan_cache_destroy(struct kmem_cache *cache) {} +static inline void kasan_cache_shutdown(struct kmem_cache *cache) {} static inline void kasan_poison_slab(struct page *page) {} static inline void kasan_unpoison_object_data(struct kmem_cache *cache, diff --git a/include/linux/kernel.h b/include/linux/kernel.h index cb09238f6d32..4c26dc3a8295 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -100,16 +100,18 @@ ) /* - * Divide positive or negative dividend by positive divisor and round - * to closest integer. Result is undefined for negative divisors and - * for negative dividends if the divisor variable type is unsigned. + * Divide positive or negative dividend by positive or negative divisor + * and round to closest integer. Result is undefined for negative + * divisors if he dividend variable type is unsigned and for negative + * dividends if the divisor variable type is unsigned. */ #define DIV_ROUND_CLOSEST(x, divisor)( \ { \ typeof(x) __x = x; \ typeof(divisor) __d = divisor; \ (((typeof(x))-1) > 0 || \ - ((typeof(divisor))-1) > 0 || (__x) > 0) ? \ + ((typeof(divisor))-1) > 0 || \ + (((__x) > 0) == ((__d) > 0))) ? \ (((__x) + ((__d) / 2)) / (__d)) : \ (((__x) - ((__d) / 2)) / (__d)); \ } \ diff --git a/include/linux/kexec.h b/include/linux/kexec.h index d419d0e51fe5..e98e546b543c 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -283,6 +283,8 @@ phys_addr_t paddr_vmcoreinfo_note(void); vmcoreinfo_append_str("NUMBER(%s)=%ld\n", #name, (long)name) #define VMCOREINFO_CONFIG(name) \ vmcoreinfo_append_str("CONFIG_%s=y\n", #name) +#define VMCOREINFO_PHYS_BASE(value) \ + vmcoreinfo_append_str("PHYS_BASE=%lx\n", (unsigned long)value) extern struct kimage *kexec_image; extern struct kimage *kexec_crash_image; diff --git a/include/linux/lz4.h b/include/linux/lz4.h index 6b784c59f321..207225509540 100644 --- a/include/linux/lz4.h +++ b/include/linux/lz4.h @@ -1,87 +1,554 @@ -#ifndef __LZ4_H__ -#define __LZ4_H__ -/* - * LZ4 Kernel Interface +/* LZ4 Kernel Interface * * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com> + * Copyright (C) 2016, Sven Schmidt <4sschmid@informatik.uni-hamburg.de> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. + * + * This file is based on the original header file + * for LZ4 - Fast LZ compression algorithm. + * + * LZ4 - Fast LZ compression algorithm + * Copyright (C) 2011-2016, Yann Collet. + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * You can contact the author at : + * - LZ4 homepage : http://www.lz4.org + * - LZ4 source repository : https://github.com/lz4/lz4 + */ + +#ifndef __LZ4_H__ +#define __LZ4_H__ + +#include <linux/types.h> +#include <linux/string.h> /* memset, memcpy */ + +/*-************************************************************************ + * CONSTANTS + **************************************************************************/ +/* + * LZ4_MEMORY_USAGE : + * Memory usage formula : N->2^N Bytes + * (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) + * Increasing memory usage improves compression ratio + * Reduced memory usage can improve speed, due to cache effect + * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache + */ +#define LZ4_MEMORY_USAGE 10 + +#define LZ4_MAX_INPUT_SIZE 0x7E000000 /* 2 113 929 216 bytes */ +#define LZ4_COMPRESSBOUND(isize) (\ + (unsigned int)(isize) > (unsigned int)LZ4_MAX_INPUT_SIZE \ + ? 0 \ + : (isize) + ((isize)/255) + 16) + +#define LZ4_ACCELERATION_DEFAULT 1 +#define LZ4_HASHLOG (LZ4_MEMORY_USAGE-2) +#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE) +#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG) + +#define LZ4HC_MIN_CLEVEL 3 +#define LZ4HC_DEFAULT_CLEVEL 9 +#define LZ4HC_MAX_CLEVEL 16 + +#define LZ4HC_DICTIONARY_LOGSIZE 16 +#define LZ4HC_MAXD (1<<LZ4HC_DICTIONARY_LOGSIZE) +#define LZ4HC_MAXD_MASK (LZ4HC_MAXD - 1) +#define LZ4HC_HASH_LOG (LZ4HC_DICTIONARY_LOGSIZE-1) +#define LZ4HC_HASHTABLESIZE (1 << LZ4HC_HASH_LOG) +#define LZ4HC_HASH_MASK (LZ4HC_HASHTABLESIZE - 1) + +/*-************************************************************************ + * STREAMING CONSTANTS AND STRUCTURES + **************************************************************************/ +#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4) +#define LZ4_STREAMSIZE (LZ4_STREAMSIZE_U64 * sizeof(unsigned long long)) + +#define LZ4_STREAMHCSIZE 262192 +#define LZ4_STREAMHCSIZE_SIZET (262192 / sizeof(size_t)) + +#define LZ4_STREAMDECODESIZE_U64 4 +#define LZ4_STREAMDECODESIZE (LZ4_STREAMDECODESIZE_U64 * \ + sizeof(unsigned long long)) + +/* + * LZ4_stream_t : + * information structure to track an LZ4 stream. + */ +typedef struct { + uint32_t hashTable[LZ4_HASH_SIZE_U32]; + uint32_t currentOffset; + uint32_t initCheck; + const uint8_t *dictionary; + uint8_t *bufferStart; + uint32_t dictSize; +} LZ4_stream_t_internal; +typedef union { + unsigned long long table[LZ4_STREAMSIZE_U64]; + LZ4_stream_t_internal internal_donotuse; +} LZ4_stream_t; + +/* + * LZ4_streamHC_t : + * information structure to track an LZ4HC stream. */ -#define LZ4_MEM_COMPRESS (16384) -#define LZ4HC_MEM_COMPRESS (262144 + (2 * sizeof(unsigned char *))) +typedef struct { + unsigned int hashTable[LZ4HC_HASHTABLESIZE]; + unsigned short chainTable[LZ4HC_MAXD]; + /* next block to continue on current prefix */ + const unsigned char *end; + /* All index relative to this position */ + const unsigned char *base; + /* alternate base for extDict */ + const unsigned char *dictBase; + /* below that point, need extDict */ + unsigned int dictLimit; + /* below that point, no more dict */ + unsigned int lowLimit; + /* index from which to continue dict update */ + unsigned int nextToUpdate; + unsigned int compressionLevel; +} LZ4HC_CCtx_internal; +typedef union { + size_t table[LZ4_STREAMHCSIZE_SIZET]; + LZ4HC_CCtx_internal internal_donotuse; +} LZ4_streamHC_t; /* - * lz4_compressbound() - * Provides the maximum size that LZ4 may output in a "worst case" scenario - * (input data not compressible) + * LZ4_streamDecode_t : + * information structure to track an LZ4 stream during decompression. + * init this structure using LZ4_setStreamDecode + * (or memset()) before first use */ -static inline size_t lz4_compressbound(size_t isize) +typedef struct { + const uint8_t *externalDict; + size_t extDictSize; + const uint8_t *prefixEnd; + size_t prefixSize; +} LZ4_streamDecode_t_internal; +typedef union { + unsigned long long table[LZ4_STREAMDECODESIZE_U64]; + LZ4_streamDecode_t_internal internal_donotuse; +} LZ4_streamDecode_t; + +/*-************************************************************************ + * SIZE OF STATE + **************************************************************************/ +#define LZ4_MEM_COMPRESS LZ4_STREAMSIZE +#define LZ4HC_MEM_COMPRESS LZ4_STREAMHCSIZE + +/*-************************************************************************ + * Compression Functions + **************************************************************************/ + +/* + * LZ4_compressBound() : + * Provides the maximum size that LZ4 may output in a "worst case" scenario + * (input data not compressible) + */ +static inline int LZ4_compressBound(size_t isize) +{ + return LZ4_COMPRESSBOUND(isize); +} + +/* + * For backward compatibility + */ +static inline int lz4_compressbound(size_t isize) { - return isize + (isize / 255) + 16; + return LZ4_COMPRESSBOUND(isize); } /* + * LZ4_compress_default() + * Compresses 'sourceSize' bytes from buffer 'source' + * into already allocated 'dest' buffer of size 'maxOutputSize'. + * Compression is guaranteed to succeed if + * 'maxOutputSize' >= LZ4_compressBound(inputSize). + * It also runs faster, so it's a recommended setting. + * If the function cannot compress 'source' + * into a more limited 'dest' budget, + * compression stops *immediately*, + * and the function result is zero. + * As a consequence, 'dest' content is not valid. + * + * source : source address of the original data + * dest : output buffer address + * of the compressed data + * inputSize : Max supported value is + * LZ4_MAX_INPUT_SIZE + * maxOutputSize: full or partial size of buffer 'dest' + * (which must be already allocated) + * workmem : address of the working memory. + * This requires 'workmem' of size LZ4_MEM_COMPRESS. + * return : the number of bytes written into buffer 'dest' + * (necessarily <= maxOutputSize) or 0 if compression fails + */ +int LZ4_compress_default(const char *source, char *dest, int inputSize, + int maxOutputSize, void *wrkmem); + +/* + * LZ4_compress_fast() : + * Same as LZ4_compress_default(), + * but allows to select an "acceleration" factor. + * The larger the acceleration value, + * the faster the algorithm, + * but also the lesser the compression. + * It's a trade-off. It can be fine tuned, + * with each successive value + * providing roughly +~3% to speed. + * An acceleration value of "1" + * is the same as regular LZ4_compress_default() + * Values <= 0 will be replaced by + * LZ4_ACCELERATION_DEFAULT, which is 1. + */ +int LZ4_compress_fast(const char *source, char *dest, int inputSize, + int maxOutputSize, void *wrkmem, int acceleration); + +/* + * LZ4_compress_destSize() + * Reverse the logic, by compressing as much data as possible + * from 'source' buffer into already allocated buffer 'dest' + * of size 'targetDestSize'. + * This function either compresses the entire 'source' content into 'dest' + * if it's large enough, or fill 'dest' buffer completely with as much data as + * ossible from 'source'. + * + * source : source address of the original data + * dest : output buffer address of the compressed data + * inputSize : Max supported value is LZ4_MAX_INPUT_SIZE + * *sourceSizePtr: will be modified to indicate how many bytes where read + * from 'source' to fill 'dest'. + * New value is necessarily <= old value. + * workmem : address of the working memory. + * This requires 'workmem' of size LZ4_MEM_COMPRESS. + * return : Nb bytes written into 'dest' (necessarily <= targetDestSize) + or 0 if compression fails + */ +int LZ4_compress_destSize(const char *source, char *dest, int *sourceSizePtr, + int targetDestSize, void *wrkmem); + +/* * lz4_compress() - * src : source address of the original data - * src_len : size of the original data - * dst : output buffer address of the compressed data - * This requires 'dst' of size LZ4_COMPRESSBOUND. - * dst_len : is the output size, which is returned after compress done - * workmem : address of the working memory. - * This requires 'workmem' of size LZ4_MEM_COMPRESS. - * return : Success if return 0 - * Error if return (< 0) - * note : Destination buffer and workmem must be already allocated with + * src : source address of the original data + * src_len: size of the original data + * dst : output buffer address of the compressed data + * This requires 'dst' of size LZ4_COMPRESSBOUND. + * dst_len: is the output size, which is returned after compress done + * workmem: address of the working memory. + * This requires 'workmem' of size LZ4_MEM_COMPRESS. + * return : Success if return 0 + * Error if return (< 0) + * note : Destination buffer and workmem must be already allocated with * the defined size. */ -int lz4_compress(const unsigned char *src, size_t src_len, - unsigned char *dst, size_t *dst_len, void *wrkmem); - - /* - * lz4hc_compress() - * src : source address of the original data - * src_len : size of the original data - * dst : output buffer address of the compressed data - * This requires 'dst' of size LZ4_COMPRESSBOUND. - * dst_len : is the output size, which is returned after compress done - * workmem : address of the working memory. - * This requires 'workmem' of size LZ4HC_MEM_COMPRESS. - * return : Success if return 0 - * Error if return (< 0) - * note : Destination buffer and workmem must be already allocated with - * the defined size. - */ -int lz4hc_compress(const unsigned char *src, size_t src_len, - unsigned char *dst, size_t *dst_len, void *wrkmem); - -/* - * lz4_decompress() - * src : source address of the compressed data - * src_len : is the input size, whcih is returned after decompress done - * dest : output buffer address of the decompressed data - * actual_dest_len: is the size of uncompressed data, supposing it's known - * return : Success if return 0 - * Error if return (< 0) - * note : Destination buffer must be already allocated. - * slightly faster than lz4_decompress_unknownoutputsize() +int lz4_compress(const unsigned char *src, size_t src_len, unsigned char *dst, + size_t *dst_len, void *wrkmem); + +/*-************************************************************************ + * Decompression Functions + **************************************************************************/ + +/* + * LZ4_decompress_fast() + * source : source address of the compressed data + * dst : output buffer address of the decompressed data + * originalSize: is the original and therefore uncompressed size + * return : the number of bytes read from the source buffer + * (in other words, the compressed size) + * If the source stream is detected malformed, the function will + * stop decoding and return a negative result. + * Destination buffer must be already allocated. + * Its size must be a minimum of 'originalSize' bytes. + * note : This function fully respect memory boundaries + * for properly formed compressed data. + * It is a bit faster than LZ4_decompress_safe(). + * However, it does not provide any protection against intentionally + * modified data stream (malicious input). + * Use this function in trusted environment only + * (data to decode comes from a trusted source). */ -int lz4_decompress(const unsigned char *src, size_t *src_len, - unsigned char *dest, size_t actual_dest_len); +int LZ4_decompress_fast(const char *source, char *dest, int originalSize); + +/* + * LZ4_decompress_safe() + * source : source address of the compressed data + * dest : output buffer address of the decompressed data + * compressedSize : is the precise full size of the compressed block. + * maxDecompressedSize: is the size of destination buffer, + * which must be already allocated. + * return : the number of bytes decompressed into destination buffer + * (necessarily <= maxDecompressedSize) + * If destination buffer is not large enough, decoding will stop + * and output an error code (<0). + * If the source stream is detected malformed, the function will + * stop decoding and return a negative result. + * This function is protected against buffer overflow exploits, + * including malicious data packets. It never writes outside + * output buffer, nor reads outside input buffer. + */ +int LZ4_decompress_safe(const char *source, char *dest, int compressedSize, + int maxDecompressedSize); /* - * lz4_decompress_unknownoutputsize() + * LZ4_decompress_safe_partial() : + * This function decompress a compressed block of size 'compressedSize' + * at position 'source' into destination buffer 'dest' + * of size 'maxDecompressedSize'. + * The function tries to stop decompressing operation as soon as + * 'targetOutputSize' has been reached, reducing decompression time. + * + * source : source address of the compressed data + * dest : output buffer address of the decompressed data + * compressedSize : is the precise full size of the compressed block. + * targetOutputSize : the decompression operation will try + * to stop as soon as 'targetOutputSize' + * has been reached + * maxDecompressedSize: is the size of destination buffer, + * which must be already allocated. + * return : the number of bytes decoded in the destination buffer + * (necessarily <= maxDecompressedSize) + * Note : this number can be < 'targetOutputSize' should the compressed + * block to decode be smaller. Always control how many bytes + * were decoded. If the source stream is detected malformed, + * the function will stop decoding and return a negative result. + * This function never writes outside of output buffer, + * and never reads outside of input buffer. + * It is therefore protected against malicious data packets + */ +int LZ4_decompress_safe_partial(const char *source, char *dest, + int compressedSize, int targetOutputSize, int maxDecompressedSize); + + +/* + * lz4_decompress_unknownoutputsize() : * src : source address of the compressed data * src_len : is the input size, therefore the compressed size - * dest : output buffer address of the decompressed data + * dest : output buffer address of the decompressed data * dest_len: is the max size of the destination buffer, which is - * returned with actual size of decompressed data after - * decompress done - * return : Success if return 0 - * Error if return (< 0) - * note : Destination buffer must be already allocated. + * returned with actual size of decompressed data after + * decompress done + * return: Success if return 0 + * Error if return (< 0) + * note: Destination buffer must be already allocated. */ int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len, - unsigned char *dest, size_t *dest_len); + unsigned char *dest, size_t *dest_len); + +/* + * lz4_decompress() : + * src : source address of the compressed data + * src_len : is the input size, + * which is returned after decompress done + * dest : output buffer address of the decompressed data + * actual_dest_len: is the size of uncompressed data, supposing it's known + * return: Success if return 0 + * Error if return (< 0) + * note : Destination buffer must be already allocated. + * slightly faster than lz4_decompress_unknownoutputsize() + */ +int lz4_decompress(const unsigned char *src, size_t *src_len, + unsigned char *dest, size_t actual_dest_len); + +/*-************************************************************************ + * LZ4 HC Compression + **************************************************************************/ + +/*! LZ4_compress_HC() : + * Compress data from `src` into `dst`, using the more powerful + * but slower "HC" algorithm. dst` must be already allocated. + * Compression is guaranteed to succeed if + * `dstCapacity >= LZ4_compressBound(srcSize) + * Max supported `srcSize` value is LZ4_MAX_INPUT_SIZE + * + * src : source address of the original data + * dst : output buffer address of the compressed data + * srcSize : Max supported value is LZ4_MAX_INPUT_SIZE + * dstCapacity : full or partial size of buffer 'dst' + * (which must be already allocated) + * compressionLevel: Recommended values are between 4 and 9, although any + * value between 1 and LZ4HC_MAX_CLEVEL will work. + * Values >LZ4HC_MAX_CLEVEL behave the same as 16. + * wrkmem : address of the working memory. + * This requires 'wrkmem' of size LZ4HC_MEM_COMPRESS. + * return : the number of bytes written into 'dst' + * or 0 if compression fails. + */ +int LZ4_compress_HC(const char *src, char *dst, int srcSize, int dstCapacity, + int compressionLevel, void *wrkmem); + +/* + * lz4hc_compress() + * src : source address of the original data + * src_len: size of the original data + * dst : output buffer address of the compressed data + * This requires 'dst' of size LZ4_COMPRESSBOUND. + * dst_len: is the output size, which is returned after compress done + * workmem: address of the working memory. + * This requires 'workmem' of size LZ4HC_MEM_COMPRESS. + * return : Success if return 0 + * Error if return (< 0) + * note : Destination buffer and workmem must be already allocated with + * the defined size. + */ +int lz4hc_compress(const unsigned char *src, size_t src_len, unsigned char *dst, + size_t *dst_len, void *wrkmem); + +/* + * These functions compress data in successive blocks of any size, + * using previous blocks as dictionary. One key assumption is that previous + * blocks (up to 64 KB) remain read-accessible while + * compressing next blocks. + * There is an exception for ring buffers, which can be smaller than 64 KB. + * Ring buffers scenario is automatically detected and handled by + * LZ4_compress_HC_continue(). + * Before starting compression, state must be properly initialized, + * using LZ4_resetStreamHC(). + * A first "fictional block" can then be designated as initial dictionary, + * using LZ4_loadDictHC() (Optional). + * Then, use LZ4_compress_HC_continue() to compress each successive block. + * Previous memory blocks (including initial dictionary when present) must + * remain accessible and unmodified during compression. + * 'dst' buffer should be sized to handle worst case scenarios, using + * LZ4_compressBound(), to ensure operation success. + * If, for any reason, previous data blocks can't be preserved unmodified + * in memory during next compression block, + * you must save it to a safer memory space, using LZ4_saveDictHC(). + * Return value of LZ4_saveDictHC() is the size of dictionary + * effectively saved into 'safeBuffer'. + */ +void LZ4_resetStreamHC(LZ4_streamHC_t *streamHCPtr, int compressionLevel); +int LZ4_loadDictHC(LZ4_streamHC_t *streamHCPtr, const char *dictionary, + int dictSize); +int LZ4_compress_HC_continue(LZ4_streamHC_t *streamHCPtr, const char *src, + char *dst, int srcSize, int maxDstSize); +int LZ4_saveDictHC(LZ4_streamHC_t *streamHCPtr, char *safeBuffer, + int maxDictSize); + +/*-********************************************* + * Streaming Compression Functions + ***********************************************/ +/* + * LZ4_resetStream() : + * An LZ4_stream_t structure can be allocated once + * and re-used multiple times. + * Use this function to init an allocated `LZ4_stream_t` structure + * and start a new compression. + */ +void LZ4_resetStream(LZ4_stream_t *LZ4_stream); + +/* + * LZ4_loadDict() : + * Use this function to load a static dictionary into LZ4_stream. + * Any previous data will be forgotten, only 'dictionary' + * will remain in memory. + * Loading a size of 0 is allowed. + * Return : dictionary size, in bytes (necessarily <= 64 KB) + */ +int LZ4_loadDict(LZ4_stream_t *streamPtr, const char *dictionary, + int dictSize); + +/* + * LZ4_compress_fast_continue() : + * Compress buffer content 'src', + * using data from previously compressed blocks + * as dictionary to improve compression ratio. + * Important : Previous data blocks are assumed to still + * be present and unmodified ! + * 'dst' buffer must be already allocated. + * If maxDstSize >= LZ4_compressBound(srcSize), + * compression is guaranteed to succeed, and runs faster. + * If not, and if compressed data cannot fit into 'dst' buffer size, + * compression stops, and function returns a zero. + */ +int LZ4_compress_fast_continue(LZ4_stream_t *streamPtr, const char *src, + char *dst, int srcSize, int maxDstSize, int acceleration); + +/* + * LZ4_saveDict() : + * If previously compressed data block is not guaranteed + * to remain available at its memory location, + * save it into a safer place (char *safeBuffer). + * Note : you don't need to call LZ4_loadDict() afterwards, + * dictionary is immediately usable, you can therefore call + * LZ4_compress_fast_continue(). + * Return : saved dictionary size in bytes (necessarily <= dictSize), + * or 0 if error. + */ +int LZ4_saveDict(LZ4_stream_t *streamPtr, char *safeBuffer, int dictSize); + +/* + * LZ4_setStreamDecode() : + * Use this function to instruct where to find the dictionary. + * Setting a size of 0 is allowed (same effect as reset). + * @return : 1 if OK, 0 if error + */ +int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode, + const char *dictionary, int dictSize); + +/* + * LZ4_decompress_*_continue() : + * These decoding functions allow decompression of multiple blocks + * in "streaming" mode. + * Previously decoded blocks *must* remain available at the memory position + * where they were decoded (up to 64 KB) + * In the case of a ring buffers, decoding buffer must be either : + * - Exactly same size as encoding buffer, with same update rule + * (block boundaries at same positions) In which case, + * the decoding & encoding ring buffer can have any size, + * including very small ones ( < 64 KB). + * - Larger than encoding buffer, by a minimum of maxBlockSize more bytes. + * maxBlockSize is implementation dependent. + * It's the maximum size you intend to compress into a single block. + * In which case, encoding and decoding buffers do not need + * to be synchronized, and encoding ring buffer can have any size, + * including small ones ( < 64 KB). + * - _At least_ 64 KB + 8 bytes + maxBlockSize. + * In which case, encoding and decoding buffers do not need to be + * synchronized, and encoding ring buffer can have any size, + * including larger than decoding buffer. W + * Whenever these conditions are not possible, save the last 64KB of decoded + * data into a safe buffer, and indicate where it is saved + * using LZ4_setStreamDecode() + */ +int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode, + const char *source, char *dest, int compressedSize, + int maxDecompressedSize); +int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode, + const char *source, char *dest, int originalSize); + +/* + * LZ4_decompress_*_usingDict() : + * These decoding functions work the same as + * a combination of LZ4_setStreamDecode() followed by + * LZ4_decompress_*_continue() + * They are stand-alone, and don't need an LZ4_streamDecode_t structure. + */ +int LZ4_decompress_safe_usingDict(const char *source, char *dest, + int compressedSize, int maxDecompressedSize, const char *dictStart, + int dictSize); +int LZ4_decompress_fast_usingDict(const char *source, char *dest, + int originalSize, const char *dictStart, int dictSize); + #endif diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 5b759c9acf97..bdfc65af4152 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -42,6 +42,7 @@ struct memblock_type { unsigned long max; /* size of the allocated array */ phys_addr_t total_size; /* size of all regions */ struct memblock_region *regions; + char *name; }; struct memblock { @@ -203,6 +204,7 @@ int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn, unsigned long *end_pfn); void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, unsigned long *out_end_pfn, int *out_nid); +unsigned long memblock_next_valid_pfn(unsigned long pfn, unsigned long max_pfn); /** * for_each_mem_pfn_range - early memory pfn range iterator diff --git a/include/linux/memory.h b/include/linux/memory.h index 093607f90b91..301dfb03ecb7 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -108,12 +108,12 @@ extern int register_memory_notifier(struct notifier_block *nb); extern void unregister_memory_notifier(struct notifier_block *nb); extern int register_memory_isolate_notifier(struct notifier_block *nb); extern void unregister_memory_isolate_notifier(struct notifier_block *nb); -extern int register_new_memory(int, struct mem_section *); +extern int register_new_memory(struct zone *, int, struct mem_section *); extern int memory_block_change_state(struct memory_block *mem, unsigned long to_state, unsigned long from_state_req); #ifdef CONFIG_MEMORY_HOTREMOVE -extern int unregister_memory_section(struct mem_section *); +extern int unregister_memory_section(struct zone *, struct mem_section *); #endif extern int memory_dev_init(void); extern int memory_notify(unsigned long val, void *v); diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index c1784c0b4f35..08856a6fd3b1 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -279,8 +279,10 @@ extern int arch_add_memory(int nid, u64 start, u64 size, bool for_device); extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); extern bool is_memblock_offlined(struct memory_block *mem); extern void remove_memory(int nid, u64 start, u64 size); -extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn); -extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, +extern int sparse_add_section(struct zone *zone, unsigned long pfn, + unsigned long nr_pages); +extern void sparse_remove_section(struct zone *zone, struct mem_section *ms, + unsigned long pfn, unsigned long nr_pages, unsigned long map_offset); extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum); diff --git a/include/linux/mm.h b/include/linux/mm.h index 6ff66d6fe8e2..a6b9ab0945c4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -285,6 +285,17 @@ extern pgprot_t protection_map[16]; #define FAULT_FLAG_REMOTE 0x80 /* faulting for non current tsk/mm */ #define FAULT_FLAG_INSTRUCTION 0x100 /* The fault was during an instruction fetch */ +#define FAULT_FLAG_TRACE \ + { FAULT_FLAG_WRITE, "WRITE" }, \ + { FAULT_FLAG_MKWRITE, "MKWRITE" }, \ + { FAULT_FLAG_ALLOW_RETRY, "ALLOW_RETRY" }, \ + { FAULT_FLAG_RETRY_NOWAIT, "RETRY_NOWAIT" }, \ + { FAULT_FLAG_KILLABLE, "KILLABLE" }, \ + { FAULT_FLAG_TRIED, "TRIED" }, \ + { FAULT_FLAG_USER, "USER" }, \ + { FAULT_FLAG_REMOTE, "REMOTE" }, \ + { FAULT_FLAG_INSTRUCTION, "INSTRUCTION" } + /* * vm_fault is filled by the the pagefault handler and passed to the vma's * ->fault function. The vma's ->fault is responsible for returning a bitmask @@ -339,18 +350,17 @@ struct vm_operations_struct { void (*open)(struct vm_area_struct * area); void (*close)(struct vm_area_struct * area); int (*mremap)(struct vm_area_struct * area); - int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf); - int (*pmd_fault)(struct vm_area_struct *, unsigned long address, - pmd_t *, unsigned int flags); + int (*fault)(struct vm_fault *vmf); + int (*pmd_fault)(struct vm_fault *vmf); void (*map_pages)(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff); /* notification that a previously read-only page is about to become * writable, if an error is returned it will cause a SIGBUS */ - int (*page_mkwrite)(struct vm_area_struct *vma, struct vm_fault *vmf); + int (*page_mkwrite)(struct vm_fault *vmf); /* same as page_mkwrite when using VM_PFNMAP|VM_MIXEDMAP */ - int (*pfn_mkwrite)(struct vm_area_struct *vma, struct vm_fault *vmf); + int (*pfn_mkwrite)(struct vm_fault *vmf); /* called by access_process_vm when get_user_pages() fails, typically * for use by special VMAs that can switch between memory and hardware @@ -1111,6 +1121,20 @@ static inline void clear_page_pfmemalloc(struct page *page) VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE | \ VM_FAULT_FALLBACK) +#define VM_FAULT_RESULT_TRACE \ + { VM_FAULT_OOM, "OOM" }, \ + { VM_FAULT_SIGBUS, "SIGBUS" }, \ + { VM_FAULT_MAJOR, "MAJOR" }, \ + { VM_FAULT_WRITE, "WRITE" }, \ + { VM_FAULT_HWPOISON, "HWPOISON" }, \ + { VM_FAULT_HWPOISON_LARGE, "HWPOISON_LARGE" }, \ + { VM_FAULT_SIGSEGV, "SIGSEGV" }, \ + { VM_FAULT_NOPAGE, "NOPAGE" }, \ + { VM_FAULT_LOCKED, "LOCKED" }, \ + { VM_FAULT_RETRY, "RETRY" }, \ + { VM_FAULT_FALLBACK, "FALLBACK" }, \ + { VM_FAULT_DONE_COW, "DONE_COW" } + /* Encode hstate index for a hwpoisoned large page */ #define VM_FAULT_SET_HINDEX(x) ((x) << 12) #define VM_FAULT_GET_HINDEX(x) (((x) >> 12) & 0xf) @@ -1128,8 +1152,7 @@ extern void pagefault_out_of_memory(void); */ #define SHOW_MEM_FILTER_NODES (0x0001u) /* disallowed nodes */ -extern void show_free_areas(unsigned int flags); -extern bool skip_free_areas_node(unsigned int flags, int nid); +extern void show_free_areas(unsigned int flags, nodemask_t *nodemask); int shmem_zero_setup(struct vm_area_struct *); #ifdef CONFIG_SHMEM @@ -1152,8 +1175,6 @@ struct zap_details { struct address_space *check_mapping; /* Check page->mapping if set */ pgoff_t first_index; /* Lowest page->index to unmap */ pgoff_t last_index; /* Highest page->index to unmap */ - bool ignore_dirty; /* Ignore dirty pages */ - bool check_swap_entries; /* Check also swap entries */ }; struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, @@ -1164,7 +1185,7 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, unsigned long size); void zap_page_range(struct vm_area_struct *vma, unsigned long address, - unsigned long size, struct zap_details *); + unsigned long size); void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma, unsigned long start, unsigned long end); @@ -1359,6 +1380,16 @@ static inline bool vma_is_anonymous(struct vm_area_struct *vma) return !vma->vm_ops; } +#ifdef CONFIG_SHMEM +/* + * The vma_is_shmem is not inline because it is used only by slow + * paths in userfault. + */ +bool vma_is_shmem(struct vm_area_struct *vma); +#else +static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; } +#endif + static inline int stack_guard_page_start(struct vm_area_struct *vma, unsigned long addr) { @@ -1900,7 +1931,7 @@ extern void setup_per_zone_wmarks(void); extern int __meminit init_per_zone_wmark_min(void); extern void mem_init(void); extern void __init mmap_init(void); -extern void show_mem(unsigned int flags); +extern void show_mem(unsigned int flags, nodemask_t *nodemask); extern long si_mem_available(void); extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); @@ -1908,8 +1939,8 @@ extern void si_meminfo_node(struct sysinfo *val, int nid); extern unsigned long arch_reserved_kernel_pages(void); #endif -extern __printf(2, 3) -void warn_alloc(gfp_t gfp_mask, const char *fmt, ...); +extern __printf(3, 4) +void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...); extern void setup_per_cpu_pageset(void); @@ -2049,6 +2080,7 @@ static inline void mm_populate(unsigned long addr, unsigned long len) {} /* These take the mm semaphore themselves */ extern int __must_check vm_brk(unsigned long, unsigned long); +extern int __must_check vm_brk_flags(unsigned long, unsigned long, unsigned long); extern int vm_munmap(unsigned long, size_t); extern unsigned long __must_check vm_mmap(struct file *, unsigned long, unsigned long, unsigned long, @@ -2092,10 +2124,10 @@ extern void truncate_inode_pages_range(struct address_space *, extern void truncate_inode_pages_final(struct address_space *); /* generic vm_area_ops exported for stackable file systems */ -extern int filemap_fault(struct vm_area_struct *, struct vm_fault *); +extern int filemap_fault(struct vm_fault *vmf); extern void filemap_map_pages(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff); -extern int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); +extern int filemap_page_mkwrite(struct vm_fault *vmf); /* mm/page-writeback.c */ int write_one_page(struct page *page, int wait); @@ -2317,7 +2349,8 @@ void sparse_mem_maps_populate_node(struct page **map_map, unsigned long map_count, int nodeid); -struct page *sparse_mem_map_populate(unsigned long pnum, int nid); +struct page *__populate_section_memmap(unsigned long pfn, + unsigned long nr_pages, int nid); pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); pud_t *vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node); pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node); @@ -2400,6 +2433,10 @@ extern void clear_huge_page(struct page *page, extern void copy_user_huge_page(struct page *dst, struct page *src, unsigned long addr, struct vm_area_struct *vma, unsigned int pages_per_huge_page); +extern long copy_huge_page_from_user(struct page *dst_page, + const void __user *usr_src, + unsigned int pages_per_huge_page, + bool allow_pagefault); #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */ extern struct page_ext_operations debug_guardpage_ops; diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 41d376e7116d..e030a68ead7e 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -50,6 +50,13 @@ static __always_inline void add_page_to_lru_list(struct page *page, list_add(&page->lru, &lruvec->lists[lru]); } +static __always_inline void add_page_to_lru_list_tail(struct page *page, + struct lruvec *lruvec, enum lru_list lru) +{ + update_lru_size(lruvec, lru, page_zonenum(page), hpage_nr_pages(page)); + list_add_tail(&page->lru, &lruvec->lists[lru]); +} + static __always_inline void del_page_from_lru_list(struct page *page, struct lruvec *lruvec, enum lru_list lru) { diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index f4aac87adcc3..338a786a993f 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -236,8 +236,6 @@ struct lruvec { #define LRU_ALL_ANON (BIT(LRU_INACTIVE_ANON) | BIT(LRU_ACTIVE_ANON)) #define LRU_ALL ((1 << NR_LRU_LISTS) - 1) -/* Isolate clean file */ -#define ISOLATE_CLEAN ((__force isolate_mode_t)0x1) /* Isolate unmapped file */ #define ISOLATE_UNMAPPED ((__force isolate_mode_t)0x2) /* Isolate for asynchronous migration */ @@ -779,7 +777,7 @@ static inline struct pglist_data *lruvec_pgdat(struct lruvec *lruvec) #endif } -extern unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru); +extern unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx); #ifdef CONFIG_HAVE_MEMORY_PRESENT void memory_present(int nid, unsigned long start, unsigned long end); @@ -1052,6 +1050,8 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn) * PFN_SECTION_SHIFT pfn to/from section number */ #define PA_SECTION_SHIFT (SECTION_SIZE_BITS) +#define PA_SECTION_SIZE (1UL << PA_SECTION_SHIFT) +#define PA_SECTION_MASK (~(PA_SECTION_SIZE-1)) #define PFN_SECTION_SHIFT (SECTION_SIZE_BITS - PAGE_SHIFT) #define NR_MEM_SECTIONS (1UL << SECTIONS_SHIFT) @@ -1066,12 +1066,27 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn) #error Allocator MAX_ORDER exceeds SECTION_SIZE #endif -#define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT) -#define section_nr_to_pfn(sec) ((sec) << PFN_SECTION_SHIFT) +#define pfn_to_section_nr(pfn) ((unsigned long)(pfn) >> PFN_SECTION_SHIFT) +#define section_nr_to_pfn(sec) ((unsigned long)(sec) << PFN_SECTION_SHIFT) #define SECTION_ALIGN_UP(pfn) (((pfn) + PAGES_PER_SECTION - 1) & PAGE_SECTION_MASK) #define SECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SECTION_MASK) +#define SECTION_ACTIVE_SIZE ((1UL << SECTION_SIZE_BITS) / BITS_PER_LONG) +#define SECTION_ACTIVE_MASK (~(SECTION_ACTIVE_SIZE - 1)) + +struct mem_section_usage { + /* + * SECTION_ACTIVE_SIZE portions of the section that are populated in + * the memmap + */ + unsigned long map_active; + /* See declaration of similar field in struct zone */ + unsigned long pageblock_flags[0]; +}; + +void __init section_active_init(unsigned long pfn, unsigned long nr_pages); + struct page; struct page_ext; struct mem_section { @@ -1089,8 +1104,7 @@ struct mem_section { */ unsigned long section_mem_map; - /* See declaration of similar field in struct zone */ - unsigned long *pageblock_flags; + struct mem_section_usage *usage; #ifdef CONFIG_PAGE_EXTENSION /* * If SPARSEMEM, pgdat doesn't have page_ext pointer. We use @@ -1121,6 +1135,11 @@ extern struct mem_section *mem_section[NR_SECTION_ROOTS]; extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]; #endif +static inline unsigned long *section_to_usemap(struct mem_section *ms) +{ + return ms->usage->pageblock_flags; +} + static inline struct mem_section *__nr_to_section(unsigned long nr) { if (!mem_section[SECTION_NR_TO_ROOT(nr)]) @@ -1209,6 +1228,11 @@ void sparse_init(void); #else #define sparse_init() do {} while (0) #define sparse_index_init(_sec, _nid) do {} while (0) +static inline void section_active_init(unsigned long pfn, + unsigned long nr_pages) +{ +} +#define section_active_init(_pfn, _nr_pages) do {} while (0) #endif /* CONFIG_SPARSEMEM */ /* diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 324c8dbad1e1..84943e8057ef 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -266,7 +266,6 @@ static inline struct page *find_get_page_flags(struct address_space *mapping, /** * find_lock_page - locate, pin and lock a pagecache page - * pagecache_get_page - find and get a page reference * @mapping: the address_space to search * @offset: the page index * @@ -482,19 +481,11 @@ static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm, } /* - * This is exported only for wait_on_page_locked/wait_on_page_writeback, - * and for filesystems which need to wait on PG_private. + * This is exported only for wait_on_page_locked/wait_on_page_writeback, etc., + * and should not be used directly. */ extern void wait_on_page_bit(struct page *page, int bit_nr); extern int wait_on_page_bit_killable(struct page *page, int bit_nr); -extern void wake_up_page_bit(struct page *page, int bit_nr); - -static inline void wake_up_page(struct page *page, int bit) -{ - if (!PageWaiters(page)) - return; - wake_up_page_bit(page, bit); -} /* * Wait for a page to be unlocked. diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h index a3d90b9da18d..033fc7bbcefa 100644 --- a/include/linux/pfn_t.h +++ b/include/linux/pfn_t.h @@ -15,6 +15,12 @@ #define PFN_DEV (1ULL << (BITS_PER_LONG_LONG - 3)) #define PFN_MAP (1ULL << (BITS_PER_LONG_LONG - 4)) +#define PFN_FLAGS_TRACE \ + { PFN_SG_CHAIN, "SG_CHAIN" }, \ + { PFN_SG_LAST, "SG_LAST" }, \ + { PFN_DEV, "DEV" }, \ + { PFN_MAP, "MAP" } + static inline pfn_t __pfn_to_pfn_t(unsigned long pfn, u64 flags) { pfn_t pfn_t = { .val = pfn | (flags & PFN_FLAGS_MASK), }; diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h index d076183e49be..9702b6e183bc 100644 --- a/include/linux/rbtree_augmented.h +++ b/include/linux/rbtree_augmented.h @@ -90,7 +90,9 @@ rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new) \ old->rbaugmented = rbcompute(old); \ } \ rbstatic const struct rb_augment_callbacks rbname = { \ - rbname ## _propagate, rbname ## _copy, rbname ## _rotate \ + .propagate = rbname ## _propagate, \ + .copy = rbname ## _copy, \ + .rotate = rbname ## _rotate \ }; diff --git a/include/linux/sem.h b/include/linux/sem.h index d0efd6e6c20a..4fc222f8755d 100644 --- a/include/linux/sem.h +++ b/include/linux/sem.h @@ -21,7 +21,7 @@ struct sem_array { struct list_head list_id; /* undo requests on this array */ int sem_nsems; /* no. of semaphores in array */ int complex_count; /* pending complex operations */ - bool complex_mode; /* no parallel simple ops */ + unsigned int use_global_lock;/* >0: global lock required */ }; #ifdef CONFIG_SYSVIPC diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index ff078e7043b6..fdaac9d4d46d 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -124,4 +124,15 @@ static inline bool shmem_huge_enabled(struct vm_area_struct *vma) } #endif +#ifdef CONFIG_SHMEM +extern int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, + struct vm_area_struct *dst_vma, + unsigned long dst_addr, + unsigned long src_addr, + struct page **pagep); +#else +#define shmem_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \ + src_addr, pagep) ({ BUG(); 0; }) +#endif + #endif diff --git a/include/linux/swap.h b/include/linux/swap.h index 7f47b7098b1b..45e91dd6716d 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -27,6 +27,7 @@ struct bio; #define SWAP_FLAGS_VALID (SWAP_FLAG_PRIO_MASK | SWAP_FLAG_PREFER | \ SWAP_FLAG_DISCARD | SWAP_FLAG_DISCARD_ONCE | \ SWAP_FLAG_DISCARD_PAGES) +#define SWAP_BATCH 64 static inline int current_is_kswapd(void) { @@ -176,6 +177,12 @@ enum { * protected by swap_info_struct.lock. */ struct swap_cluster_info { + spinlock_t lock; /* + * Protect swap_cluster_info fields + * and swap_info_struct->swap_map + * elements correspond to the swap + * cluster + */ unsigned int data:24; unsigned int flags:8; }; @@ -337,8 +344,13 @@ int generic_swapfile_activate(struct swap_info_struct *, struct file *, sector_t *); /* linux/mm/swap_state.c */ -extern struct address_space swapper_spaces[]; -#define swap_address_space(entry) (&swapper_spaces[swp_type(entry)]) +/* One swap address space for each 64M swap space */ +#define SWAP_ADDRESS_SPACE_SHIFT 14 +#define SWAP_ADDRESS_SPACE_PAGES (1 << SWAP_ADDRESS_SPACE_SHIFT) +extern struct address_space *swapper_spaces[]; +#define swap_address_space(entry) \ + (&swapper_spaces[swp_type(entry)][swp_offset(entry) \ + >> SWAP_ADDRESS_SPACE_SHIFT]) extern unsigned long total_swapcache_pages(void); extern void show_swap_cache_info(void); extern int add_to_swap(struct page *, struct list_head *list); @@ -360,6 +372,7 @@ extern struct page *swapin_readahead(swp_entry_t, gfp_t, /* linux/mm/swapfile.c */ extern atomic_long_t nr_swap_pages; extern long total_swap_pages; +extern bool has_usable_swap(void); /* Swap 50% full? Release swapcache more aggressively.. */ static inline bool vm_swap_full(void) @@ -375,23 +388,31 @@ static inline long get_nr_swap_pages(void) extern void si_swapinfo(struct sysinfo *); extern swp_entry_t get_swap_page(void); extern swp_entry_t get_swap_page_of_type(int); +extern int get_swap_pages(int n, swp_entry_t swp_entries[]); extern int add_swap_count_continuation(swp_entry_t, gfp_t); extern void swap_shmem_alloc(swp_entry_t); extern int swap_duplicate(swp_entry_t); extern int swapcache_prepare(swp_entry_t); extern void swap_free(swp_entry_t); extern void swapcache_free(swp_entry_t); +extern void swapcache_free_entries(swp_entry_t *entries, int n); extern int free_swap_and_cache(swp_entry_t); extern int swap_type_of(dev_t, sector_t, struct block_device **); extern unsigned int count_swap_pages(int, int); extern sector_t map_swap_page(struct page *, struct block_device **); extern sector_t swapdev_block(int, pgoff_t); extern int page_swapcount(struct page *); +extern int __swp_swapcount(swp_entry_t entry); extern int swp_swapcount(swp_entry_t entry); extern struct swap_info_struct *page_swap_info(struct page *); extern bool reuse_swap_page(struct page *, int *); extern int try_to_free_swap(struct page *); struct backing_dev_info; +extern int init_swap_address_space(unsigned int type, unsigned long nr_pages); +extern void exit_swap_address_space(unsigned int type); + +extern int get_swap_slots(int n, swp_entry_t *slots); +extern void swapcache_free_batch(swp_entry_t *entries, int n); #else /* CONFIG_SWAP */ @@ -479,6 +500,11 @@ static inline int page_swapcount(struct page *page) return 0; } +static inline int __swp_swapcount(swp_entry_t entry) +{ + return 0; +} + static inline int swp_swapcount(swp_entry_t entry) { return 0; diff --git a/include/linux/swap_slots.h b/include/linux/swap_slots.h new file mode 100644 index 000000000000..6ef92d17633d --- /dev/null +++ b/include/linux/swap_slots.h @@ -0,0 +1,30 @@ +#ifndef _LINUX_SWAP_SLOTS_H +#define _LINUX_SWAP_SLOTS_H + +#include <linux/swap.h> +#include <linux/spinlock.h> +#include <linux/mutex.h> + +#define SWAP_SLOTS_CACHE_SIZE SWAP_BATCH +#define THRESHOLD_ACTIVATE_SWAP_SLOTS_CACHE (5*SWAP_SLOTS_CACHE_SIZE) +#define THRESHOLD_DEACTIVATE_SWAP_SLOTS_CACHE (2*SWAP_SLOTS_CACHE_SIZE) + +struct swap_slots_cache { + bool lock_initialized; + struct mutex alloc_lock; /* protects slots, nr, cur */ + swp_entry_t *slots; + int nr; + int cur; + spinlock_t free_lock; /* protects slots_ret, n_ret */ + swp_entry_t *slots_ret; + int n_ret; +}; + +void disable_swap_slots_cache_lock(void); +void reenable_swap_slots_cache_unlock(void); +int enable_swap_slots_cache(void); +int free_swap_slot(swp_entry_t entry); + +extern bool swap_slot_cache_enabled; + +#endif /* _LINUX_SWAP_SLOTS_H */ diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index cfa475a0e9ca..6edbccad8bd3 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -23,6 +23,10 @@ const char *trace_print_symbols_seq(struct trace_seq *p, unsigned long val, const struct trace_print_flags *symbol_array); #if BITS_PER_LONG == 32 +const char *trace_print_flags_seq_u64(struct trace_seq *p, const char *delim, + unsigned long long flags, + const struct trace_print_flags_u64 *flag_array); + const char *trace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val, const struct trace_print_flags_u64 diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index 11b92b047a1e..2521542f6c07 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -52,6 +52,20 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma) return vma->vm_flags & (VM_UFFD_MISSING | VM_UFFD_WP); } +extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *); +extern void dup_userfaultfd_complete(struct list_head *); + +extern void mremap_userfaultfd_prep(struct vm_area_struct *, + struct vm_userfaultfd_ctx *); +extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *, + unsigned long from, unsigned long to, + unsigned long len); + +extern void userfaultfd_remove(struct vm_area_struct *vma, + struct vm_area_struct **prev, + unsigned long start, + unsigned long end); + #else /* CONFIG_USERFAULTFD */ /* mm helpers */ @@ -76,6 +90,34 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma) return false; } +static inline int dup_userfaultfd(struct vm_area_struct *vma, + struct list_head *l) +{ + return 0; +} + +static inline void dup_userfaultfd_complete(struct list_head *l) +{ +} + +static inline void mremap_userfaultfd_prep(struct vm_area_struct *vma, + struct vm_userfaultfd_ctx *ctx) +{ +} + +static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *ctx, + unsigned long from, + unsigned long to, + unsigned long len) +{ +} + +static inline void userfaultfd_remove(struct vm_area_struct *vma, + struct vm_area_struct **prev, + unsigned long start, + unsigned long end) +{ +} #endif /* CONFIG_USERFAULTFD */ #endif /* _LINUX_USERFAULTFD_K_H */ diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 4d6ec58a8d45..6aa1b6cb5828 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -56,6 +56,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, COMPACTISOLATED, COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS, KCOMPACTD_WAKE, + KCOMPACTD_MIGRATE_SCANNED, KCOMPACTD_FREE_SCANNED, #endif #ifdef CONFIG_HUGETLB_PAGE HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL, diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 5527d910ba3d..a3c0cbd7c888 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -46,7 +46,7 @@ enum writeback_sync_modes { */ enum wb_reason { WB_REASON_BACKGROUND, - WB_REASON_TRY_TO_FREE_PAGES, + WB_REASON_VMSCAN, WB_REASON_SYNC, WB_REASON_PERIODIC, WB_REASON_LAPTOP_TIMER, diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h index cbdb90b6b308..0a18ab6483ff 100644 --- a/include/trace/events/compaction.h +++ b/include/trace/events/compaction.h @@ -9,62 +9,6 @@ #include <linux/tracepoint.h> #include <trace/events/mmflags.h> -#define COMPACTION_STATUS \ - EM( COMPACT_SKIPPED, "skipped") \ - EM( COMPACT_DEFERRED, "deferred") \ - EM( COMPACT_CONTINUE, "continue") \ - EM( COMPACT_SUCCESS, "success") \ - EM( COMPACT_PARTIAL_SKIPPED, "partial_skipped") \ - EM( COMPACT_COMPLETE, "complete") \ - EM( COMPACT_NO_SUITABLE_PAGE, "no_suitable_page") \ - EM( COMPACT_NOT_SUITABLE_ZONE, "not_suitable_zone") \ - EMe(COMPACT_CONTENDED, "contended") - -#ifdef CONFIG_ZONE_DMA -#define IFDEF_ZONE_DMA(X) X -#else -#define IFDEF_ZONE_DMA(X) -#endif - -#ifdef CONFIG_ZONE_DMA32 -#define IFDEF_ZONE_DMA32(X) X -#else -#define IFDEF_ZONE_DMA32(X) -#endif - -#ifdef CONFIG_HIGHMEM -#define IFDEF_ZONE_HIGHMEM(X) X -#else -#define IFDEF_ZONE_HIGHMEM(X) -#endif - -#define ZONE_TYPE \ - IFDEF_ZONE_DMA( EM (ZONE_DMA, "DMA")) \ - IFDEF_ZONE_DMA32( EM (ZONE_DMA32, "DMA32")) \ - EM (ZONE_NORMAL, "Normal") \ - IFDEF_ZONE_HIGHMEM( EM (ZONE_HIGHMEM,"HighMem")) \ - EMe(ZONE_MOVABLE,"Movable") - -/* - * First define the enums in the above macros to be exported to userspace - * via TRACE_DEFINE_ENUM(). - */ -#undef EM -#undef EMe -#define EM(a, b) TRACE_DEFINE_ENUM(a); -#define EMe(a, b) TRACE_DEFINE_ENUM(a); - -COMPACTION_STATUS -ZONE_TYPE - -/* - * Now redefine the EM() and EMe() macros to map the enums to the strings - * that will be printed in the output. - */ -#undef EM -#undef EMe -#define EM(a, b) {a, b}, -#define EMe(a, b) {a, b} DECLARE_EVENT_CLASS(mm_compaction_isolate_template, @@ -187,6 +131,7 @@ TRACE_EVENT(mm_compaction_begin, __entry->sync ? "sync" : "async") ); +#ifdef CONFIG_COMPACTION TRACE_EVENT(mm_compaction_end, TP_PROTO(unsigned long zone_start, unsigned long migrate_pfn, unsigned long free_pfn, unsigned long zone_end, bool sync, @@ -220,6 +165,7 @@ TRACE_EVENT(mm_compaction_end, __entry->sync ? "sync" : "async", __print_symbolic(__entry->status, COMPACTION_STATUS)) ); +#endif TRACE_EVENT(mm_compaction_try_to_compact_pages, @@ -248,6 +194,7 @@ TRACE_EVENT(mm_compaction_try_to_compact_pages, __entry->prio) ); +#ifdef CONFIG_COMPACTION DECLARE_EVENT_CLASS(mm_compaction_suitable_template, TP_PROTO(struct zone *zone, @@ -295,7 +242,6 @@ DEFINE_EVENT(mm_compaction_suitable_template, mm_compaction_suitable, TP_ARGS(zone, order, ret) ); -#ifdef CONFIG_COMPACTION DECLARE_EVENT_CLASS(mm_compaction_defer_template, TP_PROTO(struct zone *zone, int order), diff --git a/include/trace/events/fs_dax.h b/include/trace/events/fs_dax.h new file mode 100644 index 000000000000..c566ddc87f73 --- /dev/null +++ b/include/trace/events/fs_dax.h @@ -0,0 +1,156 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM fs_dax + +#if !defined(_TRACE_FS_DAX_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_FS_DAX_H + +#include <linux/tracepoint.h> + +DECLARE_EVENT_CLASS(dax_pmd_fault_class, + TP_PROTO(struct inode *inode, struct vm_fault *vmf, + pgoff_t max_pgoff, int result), + TP_ARGS(inode, vmf, max_pgoff, result), + TP_STRUCT__entry( + __field(unsigned long, ino) + __field(unsigned long, vm_start) + __field(unsigned long, vm_end) + __field(unsigned long, vm_flags) + __field(unsigned long, address) + __field(pgoff_t, pgoff) + __field(pgoff_t, max_pgoff) + __field(dev_t, dev) + __field(unsigned int, flags) + __field(int, result) + ), + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->vm_start = vmf->vma->vm_start; + __entry->vm_end = vmf->vma->vm_end; + __entry->vm_flags = vmf->vma->vm_flags; + __entry->address = vmf->address; + __entry->flags = vmf->flags; + __entry->pgoff = vmf->pgoff; + __entry->max_pgoff = max_pgoff; + __entry->result = result; + ), + TP_printk("dev %d:%d ino %#lx %s %s address %#lx vm_start " + "%#lx vm_end %#lx pgoff %#lx max_pgoff %#lx %s", + MAJOR(__entry->dev), + MINOR(__entry->dev), + __entry->ino, + __entry->vm_flags & VM_SHARED ? "shared" : "private", + __print_flags(__entry->flags, "|", FAULT_FLAG_TRACE), + __entry->address, + __entry->vm_start, + __entry->vm_end, + __entry->pgoff, + __entry->max_pgoff, + __print_flags(__entry->result, "|", VM_FAULT_RESULT_TRACE) + ) +) + +#define DEFINE_PMD_FAULT_EVENT(name) \ +DEFINE_EVENT(dax_pmd_fault_class, name, \ + TP_PROTO(struct inode *inode, struct vm_fault *vmf, \ + pgoff_t max_pgoff, int result), \ + TP_ARGS(inode, vmf, max_pgoff, result)) + +DEFINE_PMD_FAULT_EVENT(dax_pmd_fault); +DEFINE_PMD_FAULT_EVENT(dax_pmd_fault_done); + +DECLARE_EVENT_CLASS(dax_pmd_load_hole_class, + TP_PROTO(struct inode *inode, struct vm_fault *vmf, + struct page *zero_page, + void *radix_entry), + TP_ARGS(inode, vmf, zero_page, radix_entry), + TP_STRUCT__entry( + __field(unsigned long, ino) + __field(unsigned long, vm_flags) + __field(unsigned long, address) + __field(struct page *, zero_page) + __field(void *, radix_entry) + __field(dev_t, dev) + ), + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->vm_flags = vmf->vma->vm_flags; + __entry->address = vmf->address; + __entry->zero_page = zero_page; + __entry->radix_entry = radix_entry; + ), + TP_printk("dev %d:%d ino %#lx %s address %#lx zero_page %p " + "radix_entry %#lx", + MAJOR(__entry->dev), + MINOR(__entry->dev), + __entry->ino, + __entry->vm_flags & VM_SHARED ? "shared" : "private", + __entry->address, + __entry->zero_page, + (unsigned long)__entry->radix_entry + ) +) + +#define DEFINE_PMD_LOAD_HOLE_EVENT(name) \ +DEFINE_EVENT(dax_pmd_load_hole_class, name, \ + TP_PROTO(struct inode *inode, struct vm_fault *vmf, \ + struct page *zero_page, void *radix_entry), \ + TP_ARGS(inode, vmf, zero_page, radix_entry)) + +DEFINE_PMD_LOAD_HOLE_EVENT(dax_pmd_load_hole); +DEFINE_PMD_LOAD_HOLE_EVENT(dax_pmd_load_hole_fallback); + +DECLARE_EVENT_CLASS(dax_pmd_insert_mapping_class, + TP_PROTO(struct inode *inode, struct vm_fault *vmf, + long length, pfn_t pfn, void *radix_entry), + TP_ARGS(inode, vmf, length, pfn, radix_entry), + TP_STRUCT__entry( + __field(unsigned long, ino) + __field(unsigned long, vm_flags) + __field(unsigned long, address) + __field(long, length) + __field(u64, pfn_val) + __field(void *, radix_entry) + __field(dev_t, dev) + __field(int, write) + ), + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->vm_flags = vmf->vma->vm_flags; + __entry->address = vmf->address; + __entry->write = vmf->flags & FAULT_FLAG_WRITE; + __entry->length = length; + __entry->pfn_val = pfn.val; + __entry->radix_entry = radix_entry; + ), + TP_printk("dev %d:%d ino %#lx %s %s address %#lx length %#lx " + "pfn %#llx %s radix_entry %#lx", + MAJOR(__entry->dev), + MINOR(__entry->dev), + __entry->ino, + __entry->vm_flags & VM_SHARED ? "shared" : "private", + __entry->write ? "write" : "read", + __entry->address, + __entry->length, + __entry->pfn_val & ~PFN_FLAGS_MASK, + __print_flags_u64(__entry->pfn_val & PFN_FLAGS_MASK, "|", + PFN_FLAGS_TRACE), + (unsigned long)__entry->radix_entry + ) +) + +#define DEFINE_PMD_INSERT_MAPPING_EVENT(name) \ +DEFINE_EVENT(dax_pmd_insert_mapping_class, name, \ + TP_PROTO(struct inode *inode, struct vm_fault *vmf, \ + long length, pfn_t pfn, void *radix_entry), \ + TP_ARGS(inode, vmf, length, pfn, radix_entry)) + +DEFINE_PMD_INSERT_MAPPING_EVENT(dax_pmd_insert_mapping); +DEFINE_PMD_INSERT_MAPPING_EVENT(dax_pmd_insert_mapping_fallback); + +#endif /* _TRACE_FS_DAX_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 15bf875d0e4a..12cd88c86b66 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -1,3 +1,6 @@ +#include <linux/node.h> +#include <linux/mmzone.h> +#include <linux/compaction.h> /* * The order of these masks is important. Matching masks will be seen * first and the left over flags will end up showing by themselves. @@ -171,3 +174,98 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \ (flags) ? __print_flags(flags, "|", \ __def_vmaflag_names \ ) : "none" + +#ifdef CONFIG_COMPACTION +#define COMPACTION_STATUS \ + EM( COMPACT_SKIPPED, "skipped") \ + EM( COMPACT_DEFERRED, "deferred") \ + EM( COMPACT_CONTINUE, "continue") \ + EM( COMPACT_SUCCESS, "success") \ + EM( COMPACT_PARTIAL_SKIPPED, "partial_skipped") \ + EM( COMPACT_COMPLETE, "complete") \ + EM( COMPACT_NO_SUITABLE_PAGE, "no_suitable_page") \ + EM( COMPACT_NOT_SUITABLE_ZONE, "not_suitable_zone") \ + EMe(COMPACT_CONTENDED, "contended") + +/* High-level compaction status feedback */ +#define COMPACTION_FAILED 1 +#define COMPACTION_WITHDRAWN 2 +#define COMPACTION_PROGRESS 3 + +#define compact_result_to_feedback(result) \ +({ \ + enum compact_result __result = result; \ + (compaction_failed(__result)) ? COMPACTION_FAILED : \ + (compaction_withdrawn(__result)) ? COMPACTION_WITHDRAWN : COMPACTION_PROGRESS; \ +}) + +#define COMPACTION_FEEDBACK \ + EM(COMPACTION_FAILED, "failed") \ + EM(COMPACTION_WITHDRAWN, "withdrawn") \ + EMe(COMPACTION_PROGRESS, "progress") + +#define COMPACTION_PRIORITY \ + EM(COMPACT_PRIO_SYNC_FULL, "COMPACT_PRIO_SYNC_FULL") \ + EM(COMPACT_PRIO_SYNC_LIGHT, "COMPACT_PRIO_SYNC_LIGHT") \ + EMe(COMPACT_PRIO_ASYNC, "COMPACT_PRIO_ASYNC") +#else +#define COMPACTION_STATUS +#define COMPACTION_PRIORITY +#define COMPACTION_FEEDBACK +#endif + +#ifdef CONFIG_ZONE_DMA +#define IFDEF_ZONE_DMA(X) X +#else +#define IFDEF_ZONE_DMA(X) +#endif + +#ifdef CONFIG_ZONE_DMA32 +#define IFDEF_ZONE_DMA32(X) X +#else +#define IFDEF_ZONE_DMA32(X) +#endif + +#ifdef CONFIG_HIGHMEM +#define IFDEF_ZONE_HIGHMEM(X) X +#else +#define IFDEF_ZONE_HIGHMEM(X) +#endif + +#define ZONE_TYPE \ + IFDEF_ZONE_DMA( EM (ZONE_DMA, "DMA")) \ + IFDEF_ZONE_DMA32( EM (ZONE_DMA32, "DMA32")) \ + EM (ZONE_NORMAL, "Normal") \ + IFDEF_ZONE_HIGHMEM( EM (ZONE_HIGHMEM,"HighMem")) \ + EMe(ZONE_MOVABLE,"Movable") + +#define LRU_NAMES \ + EM (LRU_INACTIVE_ANON, "inactive_anon") \ + EM (LRU_ACTIVE_ANON, "active_anon") \ + EM (LRU_INACTIVE_FILE, "inactive_file") \ + EM (LRU_ACTIVE_FILE, "active_file") \ + EMe(LRU_UNEVICTABLE, "unevictable") + +/* + * First define the enums in the above macros to be exported to userspace + * via TRACE_DEFINE_ENUM(). + */ +#undef EM +#undef EMe +#define EM(a, b) TRACE_DEFINE_ENUM(a); +#define EMe(a, b) TRACE_DEFINE_ENUM(a); + +COMPACTION_STATUS +COMPACTION_PRIORITY +COMPACTION_FEEDBACK +ZONE_TYPE +LRU_NAMES + +/* + * Now redefine the EM() and EMe() macros to map the enums to the strings + * that will be printed in the output. + */ +#undef EM +#undef EMe +#define EM(a, b) {a, b}, +#define EMe(a, b) {a, b} diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h index 1e974983757e..38baeb27221a 100644 --- a/include/trace/events/oom.h +++ b/include/trace/events/oom.h @@ -4,6 +4,7 @@ #if !defined(_TRACE_OOM_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_OOM_H #include <linux/tracepoint.h> +#include <trace/events/mmflags.h> TRACE_EVENT(oom_score_adj_update, @@ -27,6 +28,86 @@ TRACE_EVENT(oom_score_adj_update, __entry->pid, __entry->comm, __entry->oom_score_adj) ); +TRACE_EVENT(reclaim_retry_zone, + + TP_PROTO(struct zoneref *zoneref, + int order, + unsigned long reclaimable, + unsigned long available, + unsigned long min_wmark, + int no_progress_loops, + bool wmark_check), + + TP_ARGS(zoneref, order, reclaimable, available, min_wmark, no_progress_loops, wmark_check), + + TP_STRUCT__entry( + __field( int, node) + __field( int, zone_idx) + __field( int, order) + __field( unsigned long, reclaimable) + __field( unsigned long, available) + __field( unsigned long, min_wmark) + __field( int, no_progress_loops) + __field( bool, wmark_check) + ), + + TP_fast_assign( + __entry->node = zone_to_nid(zoneref->zone); + __entry->zone_idx = zoneref->zone_idx; + __entry->order = order; + __entry->reclaimable = reclaimable; + __entry->available = available; + __entry->min_wmark = min_wmark; + __entry->no_progress_loops = no_progress_loops; + __entry->wmark_check = wmark_check; + ), + + TP_printk("node=%d zone=%-8s order=%d reclaimable=%lu available=%lu min_wmark=%lu no_progress_loops=%d wmark_check=%d", + __entry->node, __print_symbolic(__entry->zone_idx, ZONE_TYPE), + __entry->order, + __entry->reclaimable, __entry->available, __entry->min_wmark, + __entry->no_progress_loops, + __entry->wmark_check) +); + +#ifdef CONFIG_COMPACTION +TRACE_EVENT(compact_retry, + + TP_PROTO(int order, + enum compact_priority priority, + enum compact_result result, + int retries, + int max_retries, + bool ret), + + TP_ARGS(order, priority, result, retries, max_retries, ret), + + TP_STRUCT__entry( + __field( int, order) + __field( int, priority) + __field( int, result) + __field( int, retries) + __field( int, max_retries) + __field( bool, ret) + ), + + TP_fast_assign( + __entry->order = order; + __entry->priority = priority; + __entry->result = compact_result_to_feedback(result); + __entry->retries = retries; + __entry->max_retries = max_retries; + __entry->ret = ret; + ), + + TP_printk("order=%d priority=%s compaction_result=%s retries=%d max_retries=%d should_retry=%d", + __entry->order, + __print_symbolic(__entry->priority, COMPACTION_PRIORITY), + __print_symbolic(__entry->result, COMPACTION_FEEDBACK), + __entry->retries, __entry->max_retries, + __entry->ret) +); +#endif /* CONFIG_COMPACTION */ #endif /* This part must be outside protection */ diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index c88fd0934e7e..27e8a5c77579 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -15,6 +15,7 @@ #define RECLAIM_WB_MIXED 0x0010u #define RECLAIM_WB_SYNC 0x0004u /* Unused, all reclaim async */ #define RECLAIM_WB_ASYNC 0x0008u +#define RECLAIM_WB_LRU (RECLAIM_WB_ANON|RECLAIM_WB_FILE) #define show_reclaim_flags(flags) \ (flags) ? __print_flags(flags, "|", \ @@ -269,26 +270,27 @@ TRACE_EVENT(mm_shrink_slab_end, __entry->retval) ); -DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template, - +TRACE_EVENT(mm_vmscan_lru_isolate, TP_PROTO(int classzone_idx, int order, unsigned long nr_requested, unsigned long nr_scanned, + unsigned long nr_skipped, unsigned long nr_taken, isolate_mode_t isolate_mode, - int file), + int lru), - TP_ARGS(classzone_idx, order, nr_requested, nr_scanned, nr_taken, isolate_mode, file), + TP_ARGS(classzone_idx, order, nr_requested, nr_scanned, nr_skipped, nr_taken, isolate_mode, lru), TP_STRUCT__entry( __field(int, classzone_idx) __field(int, order) __field(unsigned long, nr_requested) __field(unsigned long, nr_scanned) + __field(unsigned long, nr_skipped) __field(unsigned long, nr_taken) __field(isolate_mode_t, isolate_mode) - __field(int, file) + __field(int, lru) ), TP_fast_assign( @@ -296,47 +298,21 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template, __entry->order = order; __entry->nr_requested = nr_requested; __entry->nr_scanned = nr_scanned; + __entry->nr_skipped = nr_skipped; __entry->nr_taken = nr_taken; __entry->isolate_mode = isolate_mode; - __entry->file = file; + __entry->lru = lru; ), - TP_printk("isolate_mode=%d classzone=%d order=%d nr_requested=%lu nr_scanned=%lu nr_taken=%lu file=%d", + TP_printk("isolate_mode=%d classzone=%d order=%d nr_requested=%lu nr_scanned=%lu nr_skipped=%lu nr_taken=%lu lru=%s", __entry->isolate_mode, __entry->classzone_idx, __entry->order, __entry->nr_requested, __entry->nr_scanned, + __entry->nr_skipped, __entry->nr_taken, - __entry->file) -); - -DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_lru_isolate, - - TP_PROTO(int classzone_idx, - int order, - unsigned long nr_requested, - unsigned long nr_scanned, - unsigned long nr_taken, - isolate_mode_t isolate_mode, - int file), - - TP_ARGS(classzone_idx, order, nr_requested, nr_scanned, nr_taken, isolate_mode, file) - -); - -DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_memcg_isolate, - - TP_PROTO(int classzone_idx, - int order, - unsigned long nr_requested, - unsigned long nr_scanned, - unsigned long nr_taken, - isolate_mode_t isolate_mode, - int file), - - TP_ARGS(classzone_idx, order, nr_requested, nr_scanned, nr_taken, isolate_mode, file) - + __print_symbolic(__entry->lru, LRU_NAMES)) ); TRACE_EVENT(mm_vmscan_writepage, @@ -365,14 +341,27 @@ TRACE_EVENT(mm_vmscan_lru_shrink_inactive, TP_PROTO(int nid, unsigned long nr_scanned, unsigned long nr_reclaimed, + unsigned long nr_dirty, unsigned long nr_writeback, + unsigned long nr_congested, unsigned long nr_immediate, + unsigned long nr_activate, unsigned long nr_ref_keep, + unsigned long nr_unmap_fail, int priority, int file), - TP_ARGS(nid, nr_scanned, nr_reclaimed, priority, file), + TP_ARGS(nid, nr_scanned, nr_reclaimed, nr_dirty, nr_writeback, + nr_congested, nr_immediate, nr_activate, nr_ref_keep, + nr_unmap_fail, priority, file), TP_STRUCT__entry( __field(int, nid) __field(unsigned long, nr_scanned) __field(unsigned long, nr_reclaimed) + __field(unsigned long, nr_dirty) + __field(unsigned long, nr_writeback) + __field(unsigned long, nr_congested) + __field(unsigned long, nr_immediate) + __field(unsigned long, nr_activate) + __field(unsigned long, nr_ref_keep) + __field(unsigned long, nr_unmap_fail) __field(int, priority) __field(int, reclaim_flags) ), @@ -381,17 +370,102 @@ TRACE_EVENT(mm_vmscan_lru_shrink_inactive, __entry->nid = nid; __entry->nr_scanned = nr_scanned; __entry->nr_reclaimed = nr_reclaimed; + __entry->nr_dirty = nr_dirty; + __entry->nr_writeback = nr_writeback; + __entry->nr_congested = nr_congested; + __entry->nr_immediate = nr_immediate; + __entry->nr_activate = nr_activate; + __entry->nr_ref_keep = nr_ref_keep; + __entry->nr_unmap_fail = nr_unmap_fail; __entry->priority = priority; __entry->reclaim_flags = trace_shrink_flags(file); ), - TP_printk("nid=%d nr_scanned=%ld nr_reclaimed=%ld priority=%d flags=%s", + TP_printk("nid=%d nr_scanned=%ld nr_reclaimed=%ld nr_dirty=%ld nr_writeback=%ld nr_congested=%ld nr_immediate=%ld nr_activate=%ld nr_ref_keep=%ld nr_unmap_fail=%ld priority=%d flags=%s", __entry->nid, __entry->nr_scanned, __entry->nr_reclaimed, + __entry->nr_dirty, __entry->nr_writeback, + __entry->nr_congested, __entry->nr_immediate, + __entry->nr_activate, __entry->nr_ref_keep, + __entry->nr_unmap_fail, __entry->priority, + show_reclaim_flags(__entry->reclaim_flags)) +); + +TRACE_EVENT(mm_vmscan_lru_shrink_active, + + TP_PROTO(int nid, unsigned long nr_taken, + unsigned long nr_active, unsigned long nr_deactivated, + unsigned long nr_referenced, int priority, int file), + + TP_ARGS(nid, nr_taken, nr_active, nr_deactivated, nr_referenced, priority, file), + + TP_STRUCT__entry( + __field(int, nid) + __field(unsigned long, nr_taken) + __field(unsigned long, nr_active) + __field(unsigned long, nr_deactivated) + __field(unsigned long, nr_referenced) + __field(int, priority) + __field(int, reclaim_flags) + ), + + TP_fast_assign( + __entry->nid = nid; + __entry->nr_taken = nr_taken; + __entry->nr_active = nr_active; + __entry->nr_deactivated = nr_deactivated; + __entry->nr_referenced = nr_referenced; + __entry->priority = priority; + __entry->reclaim_flags = trace_shrink_flags(file); + ), + + TP_printk("nid=%d nr_taken=%ld nr_active=%ld nr_deactivated=%ld nr_referenced=%ld priority=%d flags=%s", + __entry->nid, + __entry->nr_taken, + __entry->nr_active, __entry->nr_deactivated, __entry->nr_referenced, __entry->priority, show_reclaim_flags(__entry->reclaim_flags)) ); +TRACE_EVENT(mm_vmscan_inactive_list_is_low, + + TP_PROTO(int nid, int reclaim_idx, + unsigned long total_inactive, unsigned long inactive, + unsigned long total_active, unsigned long active, + unsigned long ratio, int file), + + TP_ARGS(nid, reclaim_idx, total_inactive, inactive, total_active, active, ratio, file), + + TP_STRUCT__entry( + __field(int, nid) + __field(int, reclaim_idx) + __field(unsigned long, total_inactive) + __field(unsigned long, inactive) + __field(unsigned long, total_active) + __field(unsigned long, active) + __field(unsigned long, ratio) + __field(int, reclaim_flags) + ), + + TP_fast_assign( + __entry->nid = nid; + __entry->reclaim_idx = reclaim_idx; + __entry->total_inactive = total_inactive; + __entry->inactive = inactive; + __entry->total_active = total_active; + __entry->active = active; + __entry->ratio = ratio; + __entry->reclaim_flags = trace_shrink_flags(file) & RECLAIM_WB_LRU; + ), + + TP_printk("nid=%d reclaim_idx=%d total_inactive=%ld inactive=%ld total_active=%ld active=%ld ratio=%ld flags=%s", + __entry->nid, + __entry->reclaim_idx, + __entry->total_inactive, __entry->inactive, + __entry->total_active, __entry->active, + __entry->ratio, + show_reclaim_flags(__entry->reclaim_flags)) +); #endif /* _TRACE_VMSCAN_H */ /* This part must be outside protection */ diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 2ccd9ccbf9ef..7bd8783a590f 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -31,7 +31,7 @@ #define WB_WORK_REASON \ EM( WB_REASON_BACKGROUND, "background") \ - EM( WB_REASON_TRY_TO_FREE_PAGES, "try_to_free_pages") \ + EM( WB_REASON_VMSCAN, "vmscan") \ EM( WB_REASON_SYNC, "sync") \ EM( WB_REASON_PERIODIC, "periodic") \ EM( WB_REASON_LAPTOP_TIMER, "laptop_timer") \ diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index 9f684629c3cf..1aa57cb93d7c 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -283,8 +283,16 @@ TRACE_MAKE_SYSTEM_STR(); trace_print_symbols_seq(p, value, symbols); \ }) +#undef __print_flags_u64 #undef __print_symbolic_u64 #if BITS_PER_LONG == 32 +#define __print_flags_u64(flag, delim, flag_array...) \ + ({ \ + static const struct trace_print_flags_u64 __flags[] = \ + { flag_array, { -1, NULL } }; \ + trace_print_flags_seq_u64(p, delim, flag, __flags); \ + }) + #define __print_symbolic_u64(value, symbol_array...) \ ({ \ static const struct trace_print_flags_u64 symbols[] = \ @@ -292,6 +300,9 @@ TRACE_MAKE_SYSTEM_STR(); trace_print_symbols_seq_u64(p, value, symbols); \ }) #else +#define __print_flags_u64(flag, delim, flag_array...) \ + __print_flags(flag, delim, flag_array) + #define __print_symbolic_u64(value, symbol_array...) \ __print_symbolic(value, symbol_array) #endif diff --git a/include/uapi/asm-generic/ioctl.h b/include/uapi/asm-generic/ioctl.h index 7e7c11b52143..749b32fe5623 100644 --- a/include/uapi/asm-generic/ioctl.h +++ b/include/uapi/asm-generic/ioctl.h @@ -48,6 +48,9 @@ /* * Direction bits, which any architecture can choose to override * before including this file. + * + * NOTE: _IOC_WRITE means userland is writing and kernel is + * reading. _IOC_READ means userland is reading and kernel is writing. */ #ifndef _IOC_NONE @@ -72,7 +75,12 @@ #define _IOC_TYPECHECK(t) (sizeof(t)) #endif -/* used to create numbers */ +/* + * Used to create numbers. + * + * NOTE: _IOW means userland is writing and kernel is reading. _IOR + * means userland is reading and kernel is writing. + */ #define _IO(type,nr) _IOC(_IOC_NONE,(type),(nr),0) #define _IOR(type,nr,size) _IOC(_IOC_READ,(type),(nr),(_IOC_TYPECHECK(size))) #define _IOW(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),(_IOC_TYPECHECK(size))) diff --git a/include/uapi/linux/mqueue.h b/include/uapi/linux/mqueue.h index d0a2b8e89813..bbd5116ea739 100644 --- a/include/uapi/linux/mqueue.h +++ b/include/uapi/linux/mqueue.h @@ -18,6 +18,8 @@ #ifndef _LINUX_MQUEUE_H #define _LINUX_MQUEUE_H +#include <linux/types.h> + #define MQ_PRIO_MAX 32768 /* per-uid limit of kernel memory used by mqueue, in bytes */ #define MQ_BYTES_MAX 819200 diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index 9057d7af3ae1..b742c40c2880 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -11,13 +11,18 @@ #include <linux/types.h> -#define UFFD_API ((__u64)0xAA) /* - * After implementing the respective features it will become: - * #define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \ - * UFFD_FEATURE_EVENT_FORK) + * If the UFFDIO_API is upgraded someday, the UFFDIO_UNREGISTER and + * UFFDIO_WAKE ioctls should be defined as _IOW and not as _IOR. In + * userfaultfd.h we assumed the kernel was reading (instead _IOC_READ + * means the userland is reading). */ -#define UFFD_API_FEATURES (0) +#define UFFD_API ((__u64)0xAA) +#define UFFD_API_FEATURES (UFFD_FEATURE_EVENT_FORK | \ + UFFD_FEATURE_EVENT_REMAP | \ + UFFD_FEATURE_EVENT_REMOVE | \ + UFFD_FEATURE_MISSING_HUGETLBFS | \ + UFFD_FEATURE_MISSING_SHMEM) #define UFFD_API_IOCTLS \ ((__u64)1 << _UFFDIO_REGISTER | \ (__u64)1 << _UFFDIO_UNREGISTER | \ @@ -26,6 +31,9 @@ ((__u64)1 << _UFFDIO_WAKE | \ (__u64)1 << _UFFDIO_COPY | \ (__u64)1 << _UFFDIO_ZEROPAGE) +#define UFFD_API_RANGE_IOCTLS_BASIC \ + ((__u64)1 << _UFFDIO_WAKE | \ + (__u64)1 << _UFFDIO_COPY) /* * Valid ioctl command number range with this API is from 0x00 to @@ -72,6 +80,21 @@ struct uffd_msg { } pagefault; struct { + __u32 ufd; + } fork; + + struct { + __u64 from; + __u64 to; + __u64 len; + } remap; + + struct { + __u64 start; + __u64 end; + } remove; + + struct { /* unused reserved fields */ __u64 reserved1; __u64 reserved2; @@ -84,9 +107,9 @@ struct uffd_msg { * Start at 0x12 and not at 0 to be more strict against bugs. */ #define UFFD_EVENT_PAGEFAULT 0x12 -#if 0 /* not available yet */ #define UFFD_EVENT_FORK 0x13 -#endif +#define UFFD_EVENT_REMAP 0x14 +#define UFFD_EVENT_REMOVE 0x15 /* flags for UFFD_EVENT_PAGEFAULT */ #define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */ @@ -104,11 +127,37 @@ struct uffdio_api { * Note: UFFD_EVENT_PAGEFAULT and UFFD_PAGEFAULT_FLAG_WRITE * are to be considered implicitly always enabled in all kernels as * long as the uffdio_api.api requested matches UFFD_API. + * + * UFFD_FEATURE_MISSING_HUGETLBFS means an UFFDIO_REGISTER + * with UFFDIO_REGISTER_MODE_MISSING mode will succeed on + * hugetlbfs virtual memory ranges. Adding or not adding + * UFFD_FEATURE_MISSING_HUGETLBFS to uffdio_api.features has + * no real functional effect after UFFDIO_API returns, but + * it's only useful for an initial feature set probe at + * UFFDIO_API time. There are two ways to use it: + * + * 1) by adding UFFD_FEATURE_MISSING_HUGETLBFS to the + * uffdio_api.features before calling UFFDIO_API, an error + * will be returned by UFFDIO_API on a kernel without + * hugetlbfs missing support + * + * 2) the UFFD_FEATURE_MISSING_HUGETLBFS can not be added in + * uffdio_api.features and instead it will be set by the + * kernel in the uffdio_api.features if the kernel supports + * it, so userland can later check if the feature flag is + * present in uffdio_api.features after UFFDIO_API + * succeeded. + * + * UFFD_FEATURE_MISSING_SHMEM works the same as + * UFFD_FEATURE_MISSING_HUGETLBFS, but it applies to shmem + * (i.e. tmpfs and other shmem based APIs). */ -#if 0 /* not available yet */ #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) #define UFFD_FEATURE_EVENT_FORK (1<<1) -#endif +#define UFFD_FEATURE_EVENT_REMAP (1<<2) +#define UFFD_FEATURE_EVENT_REMOVE (1<<3) +#define UFFD_FEATURE_MISSING_HUGETLBFS (1<<4) +#define UFFD_FEATURE_MISSING_SHMEM (1<<5) __u64 features; __u64 ioctls; diff --git a/ipc/sem.c b/ipc/sem.c index 3ec5742b5640..e468cd1c12f0 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -159,22 +159,42 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it); #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */ /* + * Switching from the mode suitable for simple ops + * to the mode for complex ops is costly. Therefore: + * use some hysteresis + */ +#define USE_GLOBAL_LOCK_HYSTERESIS 10 + +/* * Locking: * a) global sem_lock() for read/write * sem_undo.id_next, * sem_array.complex_count, - * sem_array.complex_mode * sem_array.pending{_alter,_const}, * sem_array.sem_undo * * b) global or semaphore sem_lock() for read/write: * sem_array.sem_base[i].pending_{const,alter}: - * sem_array.complex_mode (for read) * * c) special: * sem_undo_list.list_proc: * * undo_list->lock for write * * rcu for read + * use_global_lock: + * * global sem_lock() for write + * * either local or global sem_lock() for read. + * + * Memory ordering: + * Most ordering is enforced by using spin_lock() and spin_unlock(). + * The special case is use_global_lock: + * Setting it from non-zero to 0 is a RELEASE, this is ensured by + * using smp_store_release(). + * Testing if it is non-zero is an ACQUIRE, this is ensured by using + * smp_load_acquire(). + * Setting it from 0 to non-zero must be ordered with regards to + * this smp_load_acquire(), this is guaranteed because the smp_load_acquire() + * is inside a spin_lock() and after a write from 0 to non-zero a + * spin_lock()+spin_unlock() is done. */ #define sc_semmsl sem_ctls[0] @@ -273,29 +293,22 @@ static void complexmode_enter(struct sem_array *sma) int i; struct sem *sem; - if (sma->complex_mode) { - /* We are already in complex_mode. Nothing to do */ + if (sma->use_global_lock > 0) { + /* + * We are already in global lock mode. + * Nothing to do, just reset the + * counter until we return to simple mode. + */ + sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; return; } - - /* We need a full barrier after seting complex_mode: - * The write to complex_mode must be visible - * before we read the first sem->lock spinlock state. - */ - smp_store_mb(sma->complex_mode, true); + sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; for (i = 0; i < sma->sem_nsems; i++) { sem = sma->sem_base + i; - spin_unlock_wait(&sem->lock); + spin_lock(&sem->lock); + spin_unlock(&sem->lock); } - /* - * spin_unlock_wait() is not a memory barriers, it is only a - * control barrier. The code must pair with spin_unlock(&sem->lock), - * thus just the control barrier is insufficient. - * - * smp_rmb() is sufficient, as writes cannot pass the control barrier. - */ - smp_rmb(); } /* @@ -310,13 +323,17 @@ static void complexmode_tryleave(struct sem_array *sma) */ return; } - /* - * Immediately after setting complex_mode to false, - * a simple op can start. Thus: all memory writes - * performed by the current operation must be visible - * before we set complex_mode to false. - */ - smp_store_release(&sma->complex_mode, false); + if (sma->use_global_lock == 1) { + /* + * Immediately after setting use_global_lock to 0, + * a simple op can start. Thus: all memory writes + * performed by the current operation must be visible + * before we set use_global_lock to 0. + */ + smp_store_release(&sma->use_global_lock, 0); + } else { + sma->use_global_lock--; + } } #define SEM_GLOBAL_LOCK (-1) @@ -346,30 +363,23 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, * Optimized locking is possible if no complex operation * is either enqueued or processed right now. * - * Both facts are tracked by complex_mode. + * Both facts are tracked by use_global_mode. */ sem = sma->sem_base + sops->sem_num; /* - * Initial check for complex_mode. Just an optimization, + * Initial check for use_global_lock. Just an optimization, * no locking, no memory barrier. */ - if (!sma->complex_mode) { + if (!sma->use_global_lock) { /* * It appears that no complex operation is around. * Acquire the per-semaphore lock. */ spin_lock(&sem->lock); - /* - * See 51d7d5205d33 - * ("powerpc: Add smp_mb() to arch_spin_is_locked()"): - * A full barrier is required: the write of sem->lock - * must be visible before the read is executed - */ - smp_mb(); - - if (!smp_load_acquire(&sma->complex_mode)) { + /* pairs with smp_store_release() */ + if (!smp_load_acquire(&sma->use_global_lock)) { /* fast path successful! */ return sops->sem_num; } @@ -379,19 +389,26 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, /* slow path: acquire the full lock */ ipc_lock_object(&sma->sem_perm); - if (sma->complex_count == 0) { - /* False alarm: - * There is no complex operation, thus we can switch - * back to the fast path. + if (sma->use_global_lock == 0) { + /* + * The use_global_lock mode ended while we waited for + * sma->sem_perm.lock. Thus we must switch to locking + * with sem->lock. + * Unlike in the fast path, there is no need to recheck + * sma->use_global_lock after we have acquired sem->lock: + * We own sma->sem_perm.lock, thus use_global_lock cannot + * change. */ spin_lock(&sem->lock); + ipc_unlock_object(&sma->sem_perm); return sops->sem_num; } else { - /* Not a false alarm, thus complete the sequence for a - * full lock. + /* + * Not a false alarm, thus continue to use the global lock + * mode. No need for complexmode_enter(), this was done by + * the caller that has set use_global_mode to non-zero. */ - complexmode_enter(sma); return SEM_GLOBAL_LOCK; } } @@ -495,7 +512,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) } sma->complex_count = 0; - sma->complex_mode = true; /* dropped by sem_unlock below */ + sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; INIT_LIST_HEAD(&sma->pending_alter); INIT_LIST_HEAD(&sma->pending_const); INIT_LIST_HEAD(&sma->list_id); diff --git a/ipc/shm.c b/ipc/shm.c index 81203e8ba013..7f6537b84ef5 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -374,12 +374,12 @@ void exit_shm(struct task_struct *task) up_write(&shm_ids(ns).rwsem); } -static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int shm_fault(struct vm_fault *vmf) { - struct file *file = vma->vm_file; + struct file *file = vmf->vma->vm_file; struct shm_file_data *sfd = shm_file_data(file); - return sfd->vm_ops->fault(vma, vmf); + return sfd->vm_ops->fault(vmf); } #ifdef CONFIG_NUMA diff --git a/kernel/configs/android-base.config b/kernel/configs/android-base.config index 1a8f34f63601..26a06e09a5bd 100644 --- a/kernel/configs/android-base.config +++ b/kernel/configs/android-base.config @@ -21,6 +21,7 @@ CONFIG_CP15_BARRIER_EMULATION=y CONFIG_DEFAULT_SECURITY_SELINUX=y CONFIG_EMBEDDED=y CONFIG_FB=y +CONFIG_HARDENED_USERCOPY=y CONFIG_HIGH_RES_TIMERS=y CONFIG_INET6_AH=y CONFIG_INET6_ESP=y @@ -129,6 +130,7 @@ CONFIG_PPP_DEFLATE=y CONFIG_PPP_MPPE=y CONFIG_PREEMPT=y CONFIG_QUOTA=y +CONFIG_RANDOMIZE_BASE=y CONFIG_RTC_CLASS=y CONFIG_RT_GROUP_SCHED=y CONFIG_SECCOMP=y diff --git a/kernel/configs/android-recommended.config b/kernel/configs/android-recommended.config index 297756be369c..4719871a35f6 100644 --- a/kernel/configs/android-recommended.config +++ b/kernel/configs/android-recommended.config @@ -1,4 +1,5 @@ # KEEP ALPHABETICALLY SORTED +# CONFIG_AIO is not set # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_LEGACY_PTYS is not set diff --git a/kernel/events/core.c b/kernel/events/core.c index 88676ff98c0f..a70f1d0911df 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4920,9 +4920,9 @@ unlock: rcu_read_unlock(); } -static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int perf_mmap_fault(struct vm_fault *vmf) { - struct perf_event *event = vma->vm_file->private_data; + struct perf_event *event = vmf->vma->vm_file->private_data; struct ring_buffer *rb; int ret = VM_FAULT_SIGBUS; @@ -4945,7 +4945,7 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) goto unlock; get_page(vmf->page); - vmf->page->mapping = vma->vm_file->f_mapping; + vmf->page->mapping = vmf->vma->vm_file->f_mapping; vmf->page->index = vmf->pgoff; ret = 0; diff --git a/kernel/fork.c b/kernel/fork.c index 84a907592d34..47ecfb56bcd3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -55,6 +55,7 @@ #include <linux/rmap.h> #include <linux/ksm.h> #include <linux/acct.h> +#include <linux/userfaultfd_k.h> #include <linux/tsacct_kern.h> #include <linux/cn_proc.h> #include <linux/freezer.h> @@ -561,6 +562,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, struct rb_node **rb_link, *rb_parent; int retval; unsigned long charge; + LIST_HEAD(uf); uprobe_start_dup_mmap(); if (down_write_killable(&oldmm->mmap_sem)) { @@ -617,12 +619,13 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, if (retval) goto fail_nomem_policy; tmp->vm_mm = mm; + retval = dup_userfaultfd(tmp, &uf); + if (retval) + goto fail_nomem_anon_vma_fork; if (anon_vma_fork(tmp, mpnt)) goto fail_nomem_anon_vma_fork; - tmp->vm_flags &= - ~(VM_LOCKED|VM_LOCKONFAULT|VM_UFFD_MISSING|VM_UFFD_WP); + tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT); tmp->vm_next = tmp->vm_prev = NULL; - tmp->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; file = tmp->vm_file; if (file) { struct inode *inode = file_inode(file); @@ -678,6 +681,7 @@ out: up_write(&mm->mmap_sem); flush_tlb_mm(oldmm); up_write(&oldmm->mmap_sem); + dup_userfaultfd_complete(&uf); fail_uprobe_end: uprobe_end_dup_mmap(); return retval; diff --git a/kernel/memremap.c b/kernel/memremap.c index 9ecedc28b928..e6476a8e8b6a 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -172,8 +172,6 @@ EXPORT_SYMBOL(devm_memunmap); #ifdef CONFIG_ZONE_DEVICE static DEFINE_MUTEX(pgmap_lock); static RADIX_TREE(pgmap_radix, GFP_KERNEL); -#define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1) -#define SECTION_SIZE (1UL << PA_SECTION_SHIFT) struct page_map { struct resource res; @@ -194,18 +192,41 @@ void put_zone_device_page(struct page *page) } EXPORT_SYMBOL(put_zone_device_page); -static void pgmap_radix_release(struct resource *res) +static unsigned long order_at(struct resource *res, unsigned long pgoff) { - resource_size_t key, align_start, align_size, align_end; + unsigned long phys_pgoff = PHYS_PFN(res->start) + pgoff; + unsigned long nr_pages, mask; + + nr_pages = PHYS_PFN(resource_size(res)); + if (nr_pages == pgoff) + return ULONG_MAX; + + /* + * What is the largest aligned power-of-2 range available from + * this resource pgoff to the end of the resource range, + * considering the alignment of the current pgoff? + */ + mask = phys_pgoff | rounddown_pow_of_two(nr_pages - pgoff); + if (!mask) + return ULONG_MAX; + + return find_first_bit(&mask, BITS_PER_LONG); +} - align_start = res->start & ~(SECTION_SIZE - 1); - align_size = ALIGN(resource_size(res), SECTION_SIZE); - align_end = align_start + align_size - 1; +#define foreach_order_pgoff(res, order, pgoff) \ + for (pgoff = 0, order = order_at((res), pgoff); order < ULONG_MAX; \ + pgoff += 1UL << order, order = order_at((res), pgoff)) + +static void pgmap_radix_release(struct resource *res) +{ + unsigned long pgoff, order; mutex_lock(&pgmap_lock); - for (key = res->start; key <= res->end; key += SECTION_SIZE) - radix_tree_delete(&pgmap_radix, key >> PA_SECTION_SHIFT); + foreach_order_pgoff(res, order, pgoff) + radix_tree_delete(&pgmap_radix, PHYS_PFN(res->start) + pgoff); mutex_unlock(&pgmap_lock); + + synchronize_rcu(); } static unsigned long pfn_first(struct page_map *page_map) @@ -235,7 +256,6 @@ static void devm_memremap_pages_release(struct device *dev, void *data) { struct page_map *page_map = data; struct resource *res = &page_map->res; - resource_size_t align_start, align_size; struct dev_pagemap *pgmap = &page_map->pgmap; if (percpu_ref_tryget_live(pgmap->ref)) { @@ -244,12 +264,10 @@ static void devm_memremap_pages_release(struct device *dev, void *data) } /* pages are dead and unused, undo the arch mapping */ - align_start = res->start & ~(SECTION_SIZE - 1); - align_size = ALIGN(resource_size(res), SECTION_SIZE); mem_hotplug_begin(); - arch_remove_memory(align_start, align_size); + arch_remove_memory(res->start, resource_size(res)); mem_hotplug_done(); - untrack_pfn(NULL, PHYS_PFN(align_start), align_size); + untrack_pfn(NULL, PHYS_PFN(res->start), resource_size(res)); pgmap_radix_release(res); dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc, "%s: failed to free all reserved pages\n", __func__); @@ -262,7 +280,7 @@ struct dev_pagemap *find_dev_pagemap(resource_size_t phys) WARN_ON_ONCE(!rcu_read_lock_held()); - page_map = radix_tree_lookup(&pgmap_radix, phys >> PA_SECTION_SHIFT); + page_map = radix_tree_lookup(&pgmap_radix, PHYS_PFN(phys)); return page_map ? &page_map->pgmap : NULL; } @@ -284,17 +302,13 @@ struct dev_pagemap *find_dev_pagemap(resource_size_t phys) void *devm_memremap_pages(struct device *dev, struct resource *res, struct percpu_ref *ref, struct vmem_altmap *altmap) { - resource_size_t key, align_start, align_size, align_end; + unsigned long pfn, pgoff, order; pgprot_t pgprot = PAGE_KERNEL; struct dev_pagemap *pgmap; struct page_map *page_map; int error, nid, is_ram; - unsigned long pfn; - align_start = res->start & ~(SECTION_SIZE - 1); - align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE) - - align_start; - is_ram = region_intersects(align_start, align_size, + is_ram = region_intersects(res->start, resource_size(res), IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE); if (is_ram == REGION_MIXED) { @@ -327,12 +341,12 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, mutex_lock(&pgmap_lock); error = 0; - align_end = align_start + align_size - 1; - for (key = align_start; key <= align_end; key += SECTION_SIZE) { + + foreach_order_pgoff(res, order, pgoff) { struct dev_pagemap *dup; rcu_read_lock(); - dup = find_dev_pagemap(key); + dup = find_dev_pagemap(res->start + PFN_PHYS(pgoff)); rcu_read_unlock(); if (dup) { dev_err(dev, "%s: %pr collides with mapping for %s\n", @@ -340,8 +354,8 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, error = -EBUSY; break; } - error = radix_tree_insert(&pgmap_radix, key >> PA_SECTION_SHIFT, - page_map); + error = __radix_tree_insert(&pgmap_radix, + PHYS_PFN(res->start) + pgoff, order, page_map); if (error) { dev_err(dev, "%s: failed: %d\n", __func__, error); break; @@ -355,13 +369,13 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, if (nid < 0) nid = numa_mem_id(); - error = track_pfn_remap(NULL, &pgprot, PHYS_PFN(align_start), 0, - align_size); + error = track_pfn_remap(NULL, &pgprot, PHYS_PFN(res->start), 0, + resource_size(res)); if (error) goto err_pfn_remap; mem_hotplug_begin(); - error = arch_add_memory(nid, align_start, align_size, true); + error = arch_add_memory(nid, res->start, resource_size(res), true); mem_hotplug_done(); if (error) goto err_add_memory; @@ -382,7 +396,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, return __va(res->start); err_add_memory: - untrack_pfn(NULL, PHYS_PFN(align_start), align_size); + untrack_pfn(NULL, PHYS_PFN(res->start), resource_size(res)); err_pfn_remap: err_radix: pgmap_radix_release(res); diff --git a/kernel/notifier.c b/kernel/notifier.c index fd2c9acbcc19..6196af8a8223 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -95,7 +95,7 @@ static int notifier_call_chain(struct notifier_block **nl, if (nr_calls) (*nr_calls)++; - if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK) + if (ret & NOTIFY_STOP_MASK) break; nb = next_nb; nr_to_call--; diff --git a/kernel/relay.c b/kernel/relay.c index 9b48284eac56..8a7d57ec1a4d 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -39,10 +39,10 @@ static void relay_file_mmap_close(struct vm_area_struct *vma) /* * fault() vm_op implementation for relay file mapping. */ -static int relay_buf_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int relay_buf_fault(struct vm_fault *vmf) { struct page *page; - struct rchan_buf *buf = vma->vm_private_data; + struct rchan_buf *buf = vmf->vma->vm_private_data; pgoff_t pgoff = vmf->pgoff; if (!buf) diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 30a144b1b9ee..d006f870c249 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -124,6 +124,44 @@ EXPORT_SYMBOL(trace_print_symbols_seq); #if BITS_PER_LONG == 32 const char * +trace_print_flags_seq_u64(struct trace_seq *p, const char *delim, + unsigned long long flags, + const struct trace_print_flags_u64 *flag_array) +{ + unsigned long long mask; + const char *str; + const char *ret = trace_seq_buffer_ptr(p); + int i, first = 1; + + for (i = 0; flag_array[i].name && flags; i++) { + + mask = flag_array[i].mask; + if ((flags & mask) != mask) + continue; + + str = flag_array[i].name; + flags &= ~mask; + if (!first && delim) + trace_seq_puts(p, delim); + else + first = 0; + trace_seq_puts(p, str); + } + + /* check for left over flags */ + if (flags) { + if (!first && delim) + trace_seq_puts(p, delim); + trace_seq_printf(p, "0x%llx", flags); + } + + trace_seq_putc(p, 0); + + return ret; +} +EXPORT_SYMBOL(trace_print_flags_seq_u64); + +const char * trace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val, const struct trace_print_flags_u64 *symbol_array) { diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 12b8dd640786..b5de262a9eb9 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -137,12 +137,14 @@ static void watchdog_overflow_callback(struct perf_event *event, * Reduce the watchdog noise by only printing messages * that are different from what cpu0 displayed. */ -static unsigned long cpu0_err; +static unsigned long firstcpu_err; +static atomic_t watchdog_cpus; int watchdog_nmi_enable(unsigned int cpu) { struct perf_event_attr *wd_attr; struct perf_event *event = per_cpu(watchdog_ev, cpu); + int firstcpu = 0; /* nothing to do if the hard lockup detector is disabled */ if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) @@ -156,19 +158,22 @@ int watchdog_nmi_enable(unsigned int cpu) if (event != NULL) goto out_enable; + if (atomic_inc_return(&watchdog_cpus) == 1) + firstcpu = 1; + wd_attr = &wd_hw_attr; wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh); /* Try to register using hardware perf events */ event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL); - /* save cpu0 error for future comparision */ - if (cpu == 0 && IS_ERR(event)) - cpu0_err = PTR_ERR(event); + /* save the first cpu's error for future comparision */ + if (firstcpu && IS_ERR(event)) + firstcpu_err = PTR_ERR(event); if (!IS_ERR(event)) { - /* only print for cpu0 or different than cpu0 */ - if (cpu == 0 || cpu0_err) + /* only print for the first cpu initialized */ + if (firstcpu || firstcpu_err) pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.\n"); goto out_save; } @@ -186,7 +191,7 @@ int watchdog_nmi_enable(unsigned int cpu) smp_mb__after_atomic(); /* skip displaying the same error again */ - if (cpu > 0 && (PTR_ERR(event) == cpu0_err)) + if (!firstcpu && (PTR_ERR(event) == firstcpu_err)) return PTR_ERR(event); /* vary the KERN level based on the returned errno */ @@ -222,9 +227,9 @@ void watchdog_nmi_disable(unsigned int cpu) /* should be in cleanup, but blocks oprofile */ perf_event_release_kernel(event); - } - if (cpu == 0) { + /* watchdog_nmi_enable() expects this to be zero initially. */ - cpu0_err = 0; + if (atomic_dec_and_test(&watchdog_cpus)) + firstcpu_err = 0; } } diff --git a/lib/Kconfig b/lib/Kconfig index 1788a1f50d28..013fc0e6c5e5 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -103,8 +103,7 @@ config CRC32 functions require M here. config CRC32_SELFTEST - bool "CRC32 perform self test on init" - default n + tristate "CRC32 perform self test on init" depends on CRC32 help This option enables the CRC32 library functions to perform a @@ -432,8 +431,7 @@ config GLOB depends on this. config GLOB_SELFTEST - bool "glob self-test on init" - default n + tristate "glob self-test on init" depends on GLOB help This option enables a simple self-test of the glob_match diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 8d68ebc2ff3c..930856ce9f7e 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1740,6 +1740,14 @@ config TEST_LIST_SORT If unsure, say N. +config TEST_SORT + bool "Array-based sort test" + depends on DEBUG_KERNEL + help + This option enables the self-test function of 'sort()' at boot. + + If unsure, say N. + config KPROBES_SANITY_TEST bool "Kprobes sanity tests" depends on DEBUG_KERNEL @@ -1791,9 +1799,10 @@ config PERCPU_TEST If unsure, say N. config ATOMIC64_SELFTEST - bool "Perform an atomic64_t self-test at boot" + tristate "Perform an atomic64_t self-test" help - Enable this option to test the atomic64_t functions at boot. + Enable this option to test the atomic64_t functions at boot or + at module load time. If unsure, say N. diff --git a/lib/Makefile b/lib/Makefile index 4581f09fb10f..54d814a462da 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -50,6 +50,7 @@ obj-$(CONFIG_TEST_KASAN) += test_kasan.o obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o obj-$(CONFIG_TEST_LKM) += test_module.o obj-$(CONFIG_TEST_RHASHTABLE) += test_rhashtable.o +obj-$(CONFIG_TEST_SORT) += test_sort.o obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o @@ -91,6 +92,7 @@ obj-$(CONFIG_CRC16) += crc16.o obj-$(CONFIG_CRC_T10DIF)+= crc-t10dif.o obj-$(CONFIG_CRC_ITU_T) += crc-itu-t.o obj-$(CONFIG_CRC32) += crc32.o +obj-$(CONFIG_CRC32_SELFTEST) += crc32test.o obj-$(CONFIG_CRC7) += crc7.o obj-$(CONFIG_LIBCRC32C) += libcrc32c.o obj-$(CONFIG_CRC8) += crc8.o @@ -160,6 +162,7 @@ obj-$(CONFIG_CORDIC) += cordic.o obj-$(CONFIG_DQL) += dynamic_queue_limits.o obj-$(CONFIG_GLOB) += glob.o +obj-$(CONFIG_GLOB_SELFTEST) += globtest.o obj-$(CONFIG_MPILIB) += mpi/ obj-$(CONFIG_SIGNATURE) += digsig.o diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c index 46042901130f..fd70c0e0e673 100644 --- a/lib/atomic64_test.c +++ b/lib/atomic64_test.c @@ -15,6 +15,7 @@ #include <linux/bug.h> #include <linux/kernel.h> #include <linux/atomic.h> +#include <linux/module.h> #ifdef CONFIG_X86 #include <asm/cpufeature.h> /* for boot_cpu_has below */ @@ -241,7 +242,7 @@ static __init void test_atomic64(void) BUG_ON(v.counter != r); } -static __init int test_atomics(void) +static __init int test_atomics_init(void) { test_atomic(); test_atomic64(); @@ -264,4 +265,9 @@ static __init int test_atomics(void) return 0; } -core_initcall(test_atomics); +static __exit void test_atomics_exit(void) {} + +module_init(test_atomics_init); +module_exit(test_atomics_exit); + +MODULE_LICENSE("GPL"); diff --git a/lib/crc32.c b/lib/crc32.c index 7fbd1a112b9d..6ddc92bc1460 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -340,827 +340,3 @@ u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) } #endif EXPORT_SYMBOL(crc32_be); - -#ifdef CONFIG_CRC32_SELFTEST - -/* 4096 random bytes */ -static u8 const __aligned(8) test_buf[] __initconst = -{ - 0x5b, 0x85, 0x21, 0xcb, 0x09, 0x68, 0x7d, 0x30, - 0xc7, 0x69, 0xd7, 0x30, 0x92, 0xde, 0x59, 0xe4, - 0xc9, 0x6e, 0x8b, 0xdb, 0x98, 0x6b, 0xaa, 0x60, - 0xa8, 0xb5, 0xbc, 0x6c, 0xa9, 0xb1, 0x5b, 0x2c, - 0xea, 0xb4, 0x92, 0x6a, 0x3f, 0x79, 0x91, 0xe4, - 0xe9, 0x70, 0x51, 0x8c, 0x7f, 0x95, 0x6f, 0x1a, - 0x56, 0xa1, 0x5c, 0x27, 0x03, 0x67, 0x9f, 0x3a, - 0xe2, 0x31, 0x11, 0x29, 0x6b, 0x98, 0xfc, 0xc4, - 0x53, 0x24, 0xc5, 0x8b, 0xce, 0x47, 0xb2, 0xb9, - 0x32, 0xcb, 0xc1, 0xd0, 0x03, 0x57, 0x4e, 0xd4, - 0xe9, 0x3c, 0xa1, 0x63, 0xcf, 0x12, 0x0e, 0xca, - 0xe1, 0x13, 0xd1, 0x93, 0xa6, 0x88, 0x5c, 0x61, - 0x5b, 0xbb, 0xf0, 0x19, 0x46, 0xb4, 0xcf, 0x9e, - 0xb6, 0x6b, 0x4c, 0x3a, 0xcf, 0x60, 0xf9, 0x7a, - 0x8d, 0x07, 0x63, 0xdb, 0x40, 0xe9, 0x0b, 0x6f, - 0xad, 0x97, 0xf1, 0xed, 0xd0, 0x1e, 0x26, 0xfd, - 0xbf, 0xb7, 0xc8, 0x04, 0x94, 0xf8, 0x8b, 0x8c, - 0xf1, 0xab, 0x7a, 0xd4, 0xdd, 0xf3, 0xe8, 0x88, - 0xc3, 0xed, 0x17, 0x8a, 0x9b, 0x40, 0x0d, 0x53, - 0x62, 0x12, 0x03, 0x5f, 0x1b, 0x35, 0x32, 0x1f, - 0xb4, 0x7b, 0x93, 0x78, 0x0d, 0xdb, 0xce, 0xa4, - 0xc0, 0x47, 0xd5, 0xbf, 0x68, 0xe8, 0x5d, 0x74, - 0x8f, 0x8e, 0x75, 0x1c, 0xb2, 0x4f, 0x9a, 0x60, - 0xd1, 0xbe, 0x10, 0xf4, 0x5c, 0xa1, 0x53, 0x09, - 0xa5, 0xe0, 0x09, 0x54, 0x85, 0x5c, 0xdc, 0x07, - 0xe7, 0x21, 0x69, 0x7b, 0x8a, 0xfd, 0x90, 0xf1, - 0x22, 0xd0, 0xb4, 0x36, 0x28, 0xe6, 0xb8, 0x0f, - 0x39, 0xde, 0xc8, 0xf3, 0x86, 0x60, 0x34, 0xd2, - 0x5e, 0xdf, 0xfd, 0xcf, 0x0f, 0xa9, 0x65, 0xf0, - 0xd5, 0x4d, 0x96, 0x40, 0xe3, 0xdf, 0x3f, 0x95, - 0x5a, 0x39, 0x19, 0x93, 0xf4, 0x75, 0xce, 0x22, - 0x00, 0x1c, 0x93, 0xe2, 0x03, 0x66, 0xf4, 0x93, - 0x73, 0x86, 0x81, 0x8e, 0x29, 0x44, 0x48, 0x86, - 0x61, 0x7c, 0x48, 0xa3, 0x43, 0xd2, 0x9c, 0x8d, - 0xd4, 0x95, 0xdd, 0xe1, 0x22, 0x89, 0x3a, 0x40, - 0x4c, 0x1b, 0x8a, 0x04, 0xa8, 0x09, 0x69, 0x8b, - 0xea, 0xc6, 0x55, 0x8e, 0x57, 0xe6, 0x64, 0x35, - 0xf0, 0xc7, 0x16, 0x9f, 0x5d, 0x5e, 0x86, 0x40, - 0x46, 0xbb, 0xe5, 0x45, 0x88, 0xfe, 0xc9, 0x63, - 0x15, 0xfb, 0xf5, 0xbd, 0x71, 0x61, 0xeb, 0x7b, - 0x78, 0x70, 0x07, 0x31, 0x03, 0x9f, 0xb2, 0xc8, - 0xa7, 0xab, 0x47, 0xfd, 0xdf, 0xa0, 0x78, 0x72, - 0xa4, 0x2a, 0xe4, 0xb6, 0xba, 0xc0, 0x1e, 0x86, - 0x71, 0xe6, 0x3d, 0x18, 0x37, 0x70, 0xe6, 0xff, - 0xe0, 0xbc, 0x0b, 0x22, 0xa0, 0x1f, 0xd3, 0xed, - 0xa2, 0x55, 0x39, 0xab, 0xa8, 0x13, 0x73, 0x7c, - 0x3f, 0xb2, 0xd6, 0x19, 0xac, 0xff, 0x99, 0xed, - 0xe8, 0xe6, 0xa6, 0x22, 0xe3, 0x9c, 0xf1, 0x30, - 0xdc, 0x01, 0x0a, 0x56, 0xfa, 0xe4, 0xc9, 0x99, - 0xdd, 0xa8, 0xd8, 0xda, 0x35, 0x51, 0x73, 0xb4, - 0x40, 0x86, 0x85, 0xdb, 0x5c, 0xd5, 0x85, 0x80, - 0x14, 0x9c, 0xfd, 0x98, 0xa9, 0x82, 0xc5, 0x37, - 0xff, 0x32, 0x5d, 0xd0, 0x0b, 0xfa, 0xdc, 0x04, - 0x5e, 0x09, 0xd2, 0xca, 0x17, 0x4b, 0x1a, 0x8e, - 0x15, 0xe1, 0xcc, 0x4e, 0x52, 0x88, 0x35, 0xbd, - 0x48, 0xfe, 0x15, 0xa0, 0x91, 0xfd, 0x7e, 0x6c, - 0x0e, 0x5d, 0x79, 0x1b, 0x81, 0x79, 0xd2, 0x09, - 0x34, 0x70, 0x3d, 0x81, 0xec, 0xf6, 0x24, 0xbb, - 0xfb, 0xf1, 0x7b, 0xdf, 0x54, 0xea, 0x80, 0x9b, - 0xc7, 0x99, 0x9e, 0xbd, 0x16, 0x78, 0x12, 0x53, - 0x5e, 0x01, 0xa7, 0x4e, 0xbd, 0x67, 0xe1, 0x9b, - 0x4c, 0x0e, 0x61, 0x45, 0x97, 0xd2, 0xf0, 0x0f, - 0xfe, 0x15, 0x08, 0xb7, 0x11, 0x4c, 0xe7, 0xff, - 0x81, 0x53, 0xff, 0x91, 0x25, 0x38, 0x7e, 0x40, - 0x94, 0xe5, 0xe0, 0xad, 0xe6, 0xd9, 0x79, 0xb6, - 0x92, 0xc9, 0xfc, 0xde, 0xc3, 0x1a, 0x23, 0xbb, - 0xdd, 0xc8, 0x51, 0x0c, 0x3a, 0x72, 0xfa, 0x73, - 0x6f, 0xb7, 0xee, 0x61, 0x39, 0x03, 0x01, 0x3f, - 0x7f, 0x94, 0x2e, 0x2e, 0xba, 0x3a, 0xbb, 0xb4, - 0xfa, 0x6a, 0x17, 0xfe, 0xea, 0xef, 0x5e, 0x66, - 0x97, 0x3f, 0x32, 0x3d, 0xd7, 0x3e, 0xb1, 0xf1, - 0x6c, 0x14, 0x4c, 0xfd, 0x37, 0xd3, 0x38, 0x80, - 0xfb, 0xde, 0xa6, 0x24, 0x1e, 0xc8, 0xca, 0x7f, - 0x3a, 0x93, 0xd8, 0x8b, 0x18, 0x13, 0xb2, 0xe5, - 0xe4, 0x93, 0x05, 0x53, 0x4f, 0x84, 0x66, 0xa7, - 0x58, 0x5c, 0x7b, 0x86, 0x52, 0x6d, 0x0d, 0xce, - 0xa4, 0x30, 0x7d, 0xb6, 0x18, 0x9f, 0xeb, 0xff, - 0x22, 0xbb, 0x72, 0x29, 0xb9, 0x44, 0x0b, 0x48, - 0x1e, 0x84, 0x71, 0x81, 0xe3, 0x6d, 0x73, 0x26, - 0x92, 0xb4, 0x4d, 0x2a, 0x29, 0xb8, 0x1f, 0x72, - 0xed, 0xd0, 0xe1, 0x64, 0x77, 0xea, 0x8e, 0x88, - 0x0f, 0xef, 0x3f, 0xb1, 0x3b, 0xad, 0xf9, 0xc9, - 0x8b, 0xd0, 0xac, 0xc6, 0xcc, 0xa9, 0x40, 0xcc, - 0x76, 0xf6, 0x3b, 0x53, 0xb5, 0x88, 0xcb, 0xc8, - 0x37, 0xf1, 0xa2, 0xba, 0x23, 0x15, 0x99, 0x09, - 0xcc, 0xe7, 0x7a, 0x3b, 0x37, 0xf7, 0x58, 0xc8, - 0x46, 0x8c, 0x2b, 0x2f, 0x4e, 0x0e, 0xa6, 0x5c, - 0xea, 0x85, 0x55, 0xba, 0x02, 0x0e, 0x0e, 0x48, - 0xbc, 0xe1, 0xb1, 0x01, 0x35, 0x79, 0x13, 0x3d, - 0x1b, 0xc0, 0x53, 0x68, 0x11, 0xe7, 0x95, 0x0f, - 0x9d, 0x3f, 0x4c, 0x47, 0x7b, 0x4d, 0x1c, 0xae, - 0x50, 0x9b, 0xcb, 0xdd, 0x05, 0x8d, 0x9a, 0x97, - 0xfd, 0x8c, 0xef, 0x0c, 0x1d, 0x67, 0x73, 0xa8, - 0x28, 0x36, 0xd5, 0xb6, 0x92, 0x33, 0x40, 0x75, - 0x0b, 0x51, 0xc3, 0x64, 0xba, 0x1d, 0xc2, 0xcc, - 0xee, 0x7d, 0x54, 0x0f, 0x27, 0x69, 0xa7, 0x27, - 0x63, 0x30, 0x29, 0xd9, 0xc8, 0x84, 0xd8, 0xdf, - 0x9f, 0x68, 0x8d, 0x04, 0xca, 0xa6, 0xc5, 0xc7, - 0x7a, 0x5c, 0xc8, 0xd1, 0xcb, 0x4a, 0xec, 0xd0, - 0xd8, 0x20, 0x69, 0xc5, 0x17, 0xcd, 0x78, 0xc8, - 0x75, 0x23, 0x30, 0x69, 0xc9, 0xd4, 0xea, 0x5c, - 0x4f, 0x6b, 0x86, 0x3f, 0x8b, 0xfe, 0xee, 0x44, - 0xc9, 0x7c, 0xb7, 0xdd, 0x3e, 0xe5, 0xec, 0x54, - 0x03, 0x3e, 0xaa, 0x82, 0xc6, 0xdf, 0xb2, 0x38, - 0x0e, 0x5d, 0xb3, 0x88, 0xd9, 0xd3, 0x69, 0x5f, - 0x8f, 0x70, 0x8a, 0x7e, 0x11, 0xd9, 0x1e, 0x7b, - 0x38, 0xf1, 0x42, 0x1a, 0xc0, 0x35, 0xf5, 0xc7, - 0x36, 0x85, 0xf5, 0xf7, 0xb8, 0x7e, 0xc7, 0xef, - 0x18, 0xf1, 0x63, 0xd6, 0x7a, 0xc6, 0xc9, 0x0e, - 0x4d, 0x69, 0x4f, 0x84, 0xef, 0x26, 0x41, 0x0c, - 0xec, 0xc7, 0xe0, 0x7e, 0x3c, 0x67, 0x01, 0x4c, - 0x62, 0x1a, 0x20, 0x6f, 0xee, 0x47, 0x4d, 0xc0, - 0x99, 0x13, 0x8d, 0x91, 0x4a, 0x26, 0xd4, 0x37, - 0x28, 0x90, 0x58, 0x75, 0x66, 0x2b, 0x0a, 0xdf, - 0xda, 0xee, 0x92, 0x25, 0x90, 0x62, 0x39, 0x9e, - 0x44, 0x98, 0xad, 0xc1, 0x88, 0xed, 0xe4, 0xb4, - 0xaf, 0xf5, 0x8c, 0x9b, 0x48, 0x4d, 0x56, 0x60, - 0x97, 0x0f, 0x61, 0x59, 0x9e, 0xa6, 0x27, 0xfe, - 0xc1, 0x91, 0x15, 0x38, 0xb8, 0x0f, 0xae, 0x61, - 0x7d, 0x26, 0x13, 0x5a, 0x73, 0xff, 0x1c, 0xa3, - 0x61, 0x04, 0x58, 0x48, 0x55, 0x44, 0x11, 0xfe, - 0x15, 0xca, 0xc3, 0xbd, 0xca, 0xc5, 0xb4, 0x40, - 0x5d, 0x1b, 0x7f, 0x39, 0xb5, 0x9c, 0x35, 0xec, - 0x61, 0x15, 0x32, 0x32, 0xb8, 0x4e, 0x40, 0x9f, - 0x17, 0x1f, 0x0a, 0x4d, 0xa9, 0x91, 0xef, 0xb7, - 0xb0, 0xeb, 0xc2, 0x83, 0x9a, 0x6c, 0xd2, 0x79, - 0x43, 0x78, 0x5e, 0x2f, 0xe5, 0xdd, 0x1a, 0x3c, - 0x45, 0xab, 0x29, 0x40, 0x3a, 0x37, 0x5b, 0x6f, - 0xd7, 0xfc, 0x48, 0x64, 0x3c, 0x49, 0xfb, 0x21, - 0xbe, 0xc3, 0xff, 0x07, 0xfb, 0x17, 0xe9, 0xc9, - 0x0c, 0x4c, 0x5c, 0x15, 0x9e, 0x8e, 0x22, 0x30, - 0x0a, 0xde, 0x48, 0x7f, 0xdb, 0x0d, 0xd1, 0x2b, - 0x87, 0x38, 0x9e, 0xcc, 0x5a, 0x01, 0x16, 0xee, - 0x75, 0x49, 0x0d, 0x30, 0x01, 0x34, 0x6a, 0xb6, - 0x9a, 0x5a, 0x2a, 0xec, 0xbb, 0x48, 0xac, 0xd3, - 0x77, 0x83, 0xd8, 0x08, 0x86, 0x4f, 0x48, 0x09, - 0x29, 0x41, 0x79, 0xa1, 0x03, 0x12, 0xc4, 0xcd, - 0x90, 0x55, 0x47, 0x66, 0x74, 0x9a, 0xcc, 0x4f, - 0x35, 0x8c, 0xd6, 0x98, 0xef, 0xeb, 0x45, 0xb9, - 0x9a, 0x26, 0x2f, 0x39, 0xa5, 0x70, 0x6d, 0xfc, - 0xb4, 0x51, 0xee, 0xf4, 0x9c, 0xe7, 0x38, 0x59, - 0xad, 0xf4, 0xbc, 0x46, 0xff, 0x46, 0x8e, 0x60, - 0x9c, 0xa3, 0x60, 0x1d, 0xf8, 0x26, 0x72, 0xf5, - 0x72, 0x9d, 0x68, 0x80, 0x04, 0xf6, 0x0b, 0xa1, - 0x0a, 0xd5, 0xa7, 0x82, 0x3a, 0x3e, 0x47, 0xa8, - 0x5a, 0xde, 0x59, 0x4f, 0x7b, 0x07, 0xb3, 0xe9, - 0x24, 0x19, 0x3d, 0x34, 0x05, 0xec, 0xf1, 0xab, - 0x6e, 0x64, 0x8f, 0xd3, 0xe6, 0x41, 0x86, 0x80, - 0x70, 0xe3, 0x8d, 0x60, 0x9c, 0x34, 0x25, 0x01, - 0x07, 0x4d, 0x19, 0x41, 0x4e, 0x3d, 0x5c, 0x7e, - 0xa8, 0xf5, 0xcc, 0xd5, 0x7b, 0xe2, 0x7d, 0x3d, - 0x49, 0x86, 0x7d, 0x07, 0xb7, 0x10, 0xe3, 0x35, - 0xb8, 0x84, 0x6d, 0x76, 0xab, 0x17, 0xc6, 0x38, - 0xb4, 0xd3, 0x28, 0x57, 0xad, 0xd3, 0x88, 0x5a, - 0xda, 0xea, 0xc8, 0x94, 0xcc, 0x37, 0x19, 0xac, - 0x9c, 0x9f, 0x4b, 0x00, 0x15, 0xc0, 0xc8, 0xca, - 0x1f, 0x15, 0xaa, 0xe0, 0xdb, 0xf9, 0x2f, 0x57, - 0x1b, 0x24, 0xc7, 0x6f, 0x76, 0x29, 0xfb, 0xed, - 0x25, 0x0d, 0xc0, 0xfe, 0xbd, 0x5a, 0xbf, 0x20, - 0x08, 0x51, 0x05, 0xec, 0x71, 0xa3, 0xbf, 0xef, - 0x5e, 0x99, 0x75, 0xdb, 0x3c, 0x5f, 0x9a, 0x8c, - 0xbb, 0x19, 0x5c, 0x0e, 0x93, 0x19, 0xf8, 0x6a, - 0xbc, 0xf2, 0x12, 0x54, 0x2f, 0xcb, 0x28, 0x64, - 0x88, 0xb3, 0x92, 0x0d, 0x96, 0xd1, 0xa6, 0xe4, - 0x1f, 0xf1, 0x4d, 0xa4, 0xab, 0x1c, 0xee, 0x54, - 0xf2, 0xad, 0x29, 0x6d, 0x32, 0x37, 0xb2, 0x16, - 0x77, 0x5c, 0xdc, 0x2e, 0x54, 0xec, 0x75, 0x26, - 0xc6, 0x36, 0xd9, 0x17, 0x2c, 0xf1, 0x7a, 0xdc, - 0x4b, 0xf1, 0xe2, 0xd9, 0x95, 0xba, 0xac, 0x87, - 0xc1, 0xf3, 0x8e, 0x58, 0x08, 0xd8, 0x87, 0x60, - 0xc9, 0xee, 0x6a, 0xde, 0xa4, 0xd2, 0xfc, 0x0d, - 0xe5, 0x36, 0xc4, 0x5c, 0x52, 0xb3, 0x07, 0x54, - 0x65, 0x24, 0xc1, 0xb1, 0xd1, 0xb1, 0x53, 0x13, - 0x31, 0x79, 0x7f, 0x05, 0x76, 0xeb, 0x37, 0x59, - 0x15, 0x2b, 0xd1, 0x3f, 0xac, 0x08, 0x97, 0xeb, - 0x91, 0x98, 0xdf, 0x6c, 0x09, 0x0d, 0x04, 0x9f, - 0xdc, 0x3b, 0x0e, 0x60, 0x68, 0x47, 0x23, 0x15, - 0x16, 0xc6, 0x0b, 0x35, 0xf8, 0x77, 0xa2, 0x78, - 0x50, 0xd4, 0x64, 0x22, 0x33, 0xff, 0xfb, 0x93, - 0x71, 0x46, 0x50, 0x39, 0x1b, 0x9c, 0xea, 0x4e, - 0x8d, 0x0c, 0x37, 0xe5, 0x5c, 0x51, 0x3a, 0x31, - 0xb2, 0x85, 0x84, 0x3f, 0x41, 0xee, 0xa2, 0xc1, - 0xc6, 0x13, 0x3b, 0x54, 0x28, 0xd2, 0x18, 0x37, - 0xcc, 0x46, 0x9f, 0x6a, 0x91, 0x3d, 0x5a, 0x15, - 0x3c, 0x89, 0xa3, 0x61, 0x06, 0x7d, 0x2e, 0x78, - 0xbe, 0x7d, 0x40, 0xba, 0x2f, 0x95, 0xb1, 0x2f, - 0x87, 0x3b, 0x8a, 0xbe, 0x6a, 0xf4, 0xc2, 0x31, - 0x74, 0xee, 0x91, 0xe0, 0x23, 0xaa, 0x5d, 0x7f, - 0xdd, 0xf0, 0x44, 0x8c, 0x0b, 0x59, 0x2b, 0xfc, - 0x48, 0x3a, 0xdf, 0x07, 0x05, 0x38, 0x6c, 0xc9, - 0xeb, 0x18, 0x24, 0x68, 0x8d, 0x58, 0x98, 0xd3, - 0x31, 0xa3, 0xe4, 0x70, 0x59, 0xb1, 0x21, 0xbe, - 0x7e, 0x65, 0x7d, 0xb8, 0x04, 0xab, 0xf6, 0xe4, - 0xd7, 0xda, 0xec, 0x09, 0x8f, 0xda, 0x6d, 0x24, - 0x07, 0xcc, 0x29, 0x17, 0x05, 0x78, 0x1a, 0xc1, - 0xb1, 0xce, 0xfc, 0xaa, 0x2d, 0xe7, 0xcc, 0x85, - 0x84, 0x84, 0x03, 0x2a, 0x0c, 0x3f, 0xa9, 0xf8, - 0xfd, 0x84, 0x53, 0x59, 0x5c, 0xf0, 0xd4, 0x09, - 0xf0, 0xd2, 0x6c, 0x32, 0x03, 0xb0, 0xa0, 0x8c, - 0x52, 0xeb, 0x23, 0x91, 0x88, 0x43, 0x13, 0x46, - 0xf6, 0x1e, 0xb4, 0x1b, 0xf5, 0x8e, 0x3a, 0xb5, - 0x3d, 0x00, 0xf6, 0xe5, 0x08, 0x3d, 0x5f, 0x39, - 0xd3, 0x21, 0x69, 0xbc, 0x03, 0x22, 0x3a, 0xd2, - 0x5c, 0x84, 0xf8, 0x15, 0xc4, 0x80, 0x0b, 0xbc, - 0x29, 0x3c, 0xf3, 0x95, 0x98, 0xcd, 0x8f, 0x35, - 0xbc, 0xa5, 0x3e, 0xfc, 0xd4, 0x13, 0x9e, 0xde, - 0x4f, 0xce, 0x71, 0x9d, 0x09, 0xad, 0xf2, 0x80, - 0x6b, 0x65, 0x7f, 0x03, 0x00, 0x14, 0x7c, 0x15, - 0x85, 0x40, 0x6d, 0x70, 0xea, 0xdc, 0xb3, 0x63, - 0x35, 0x4f, 0x4d, 0xe0, 0xd9, 0xd5, 0x3c, 0x58, - 0x56, 0x23, 0x80, 0xe2, 0x36, 0xdd, 0x75, 0x1d, - 0x94, 0x11, 0x41, 0x8e, 0xe0, 0x81, 0x8e, 0xcf, - 0xe0, 0xe5, 0xf6, 0xde, 0xd1, 0xe7, 0x04, 0x12, - 0x79, 0x92, 0x2b, 0x71, 0x2a, 0x79, 0x8b, 0x7c, - 0x44, 0x79, 0x16, 0x30, 0x4e, 0xf4, 0xf6, 0x9b, - 0xb7, 0x40, 0xa3, 0x5a, 0xa7, 0x69, 0x3e, 0xc1, - 0x3a, 0x04, 0xd0, 0x88, 0xa0, 0x3b, 0xdd, 0xc6, - 0x9e, 0x7e, 0x1e, 0x1e, 0x8f, 0x44, 0xf7, 0x73, - 0x67, 0x1e, 0x1a, 0x78, 0xfa, 0x62, 0xf4, 0xa9, - 0xa8, 0xc6, 0x5b, 0xb8, 0xfa, 0x06, 0x7d, 0x5e, - 0x38, 0x1c, 0x9a, 0x39, 0xe9, 0x39, 0x98, 0x22, - 0x0b, 0xa7, 0xac, 0x0b, 0xf3, 0xbc, 0xf1, 0xeb, - 0x8c, 0x81, 0xe3, 0x48, 0x8a, 0xed, 0x42, 0xc2, - 0x38, 0xcf, 0x3e, 0xda, 0xd2, 0x89, 0x8d, 0x9c, - 0x53, 0xb5, 0x2f, 0x41, 0x01, 0x26, 0x84, 0x9c, - 0xa3, 0x56, 0xf6, 0x49, 0xc7, 0xd4, 0x9f, 0x93, - 0x1b, 0x96, 0x49, 0x5e, 0xad, 0xb3, 0x84, 0x1f, - 0x3c, 0xa4, 0xe0, 0x9b, 0xd1, 0x90, 0xbc, 0x38, - 0x6c, 0xdd, 0x95, 0x4d, 0x9d, 0xb1, 0x71, 0x57, - 0x2d, 0x34, 0xe8, 0xb8, 0x42, 0xc7, 0x99, 0x03, - 0xc7, 0x07, 0x30, 0x65, 0x91, 0x55, 0xd5, 0x90, - 0x70, 0x97, 0x37, 0x68, 0xd4, 0x11, 0xf9, 0xe8, - 0xce, 0xec, 0xdc, 0x34, 0xd5, 0xd3, 0xb7, 0xc4, - 0xb8, 0x97, 0x05, 0x92, 0xad, 0xf8, 0xe2, 0x36, - 0x64, 0x41, 0xc9, 0xc5, 0x41, 0x77, 0x52, 0xd7, - 0x2c, 0xa5, 0x24, 0x2f, 0xd9, 0x34, 0x0b, 0x47, - 0x35, 0xa7, 0x28, 0x8b, 0xc5, 0xcd, 0xe9, 0x46, - 0xac, 0x39, 0x94, 0x3c, 0x10, 0xc6, 0x29, 0x73, - 0x0e, 0x0e, 0x5d, 0xe0, 0x71, 0x03, 0x8a, 0x72, - 0x0e, 0x26, 0xb0, 0x7d, 0x84, 0xed, 0x95, 0x23, - 0x49, 0x5a, 0x45, 0x83, 0x45, 0x60, 0x11, 0x4a, - 0x46, 0x31, 0xd4, 0xd8, 0x16, 0x54, 0x98, 0x58, - 0xed, 0x6d, 0xcc, 0x5d, 0xd6, 0x50, 0x61, 0x9f, - 0x9d, 0xc5, 0x3e, 0x9d, 0x32, 0x47, 0xde, 0x96, - 0xe1, 0x5d, 0xd8, 0xf8, 0xb4, 0x69, 0x6f, 0xb9, - 0x15, 0x90, 0x57, 0x7a, 0xf6, 0xad, 0xb0, 0x5b, - 0xf5, 0xa6, 0x36, 0x94, 0xfd, 0x84, 0xce, 0x1c, - 0x0f, 0x4b, 0xd0, 0xc2, 0x5b, 0x6b, 0x56, 0xef, - 0x73, 0x93, 0x0b, 0xc3, 0xee, 0xd9, 0xcf, 0xd3, - 0xa4, 0x22, 0x58, 0xcd, 0x50, 0x6e, 0x65, 0xf4, - 0xe9, 0xb7, 0x71, 0xaf, 0x4b, 0xb3, 0xb6, 0x2f, - 0x0f, 0x0e, 0x3b, 0xc9, 0x85, 0x14, 0xf5, 0x17, - 0xe8, 0x7a, 0x3a, 0xbf, 0x5f, 0x5e, 0xf8, 0x18, - 0x48, 0xa6, 0x72, 0xab, 0x06, 0x95, 0xe9, 0xc8, - 0xa7, 0xf4, 0x32, 0x44, 0x04, 0x0c, 0x84, 0x98, - 0x73, 0xe3, 0x89, 0x8d, 0x5f, 0x7e, 0x4a, 0x42, - 0x8f, 0xc5, 0x28, 0xb1, 0x82, 0xef, 0x1c, 0x97, - 0x31, 0x3b, 0x4d, 0xe0, 0x0e, 0x10, 0x10, 0x97, - 0x93, 0x49, 0x78, 0x2f, 0x0d, 0x86, 0x8b, 0xa1, - 0x53, 0xa9, 0x81, 0x20, 0x79, 0xe7, 0x07, 0x77, - 0xb6, 0xac, 0x5e, 0xd2, 0x05, 0xcd, 0xe9, 0xdb, - 0x8a, 0x94, 0x82, 0x8a, 0x23, 0xb9, 0x3d, 0x1c, - 0xa9, 0x7d, 0x72, 0x4a, 0xed, 0x33, 0xa3, 0xdb, - 0x21, 0xa7, 0x86, 0x33, 0x45, 0xa5, 0xaa, 0x56, - 0x45, 0xb5, 0x83, 0x29, 0x40, 0x47, 0x79, 0x04, - 0x6e, 0xb9, 0x95, 0xd0, 0x81, 0x77, 0x2d, 0x48, - 0x1e, 0xfe, 0xc3, 0xc2, 0x1e, 0xe5, 0xf2, 0xbe, - 0xfd, 0x3b, 0x94, 0x9f, 0xc4, 0xc4, 0x26, 0x9d, - 0xe4, 0x66, 0x1e, 0x19, 0xee, 0x6c, 0x79, 0x97, - 0x11, 0x31, 0x4b, 0x0d, 0x01, 0xcb, 0xde, 0xa8, - 0xf6, 0x6d, 0x7c, 0x39, 0x46, 0x4e, 0x7e, 0x3f, - 0x94, 0x17, 0xdf, 0xa1, 0x7d, 0xd9, 0x1c, 0x8e, - 0xbc, 0x7d, 0x33, 0x7d, 0xe3, 0x12, 0x40, 0xca, - 0xab, 0x37, 0x11, 0x46, 0xd4, 0xae, 0xef, 0x44, - 0xa2, 0xb3, 0x6a, 0x66, 0x0e, 0x0c, 0x90, 0x7f, - 0xdf, 0x5c, 0x66, 0x5f, 0xf2, 0x94, 0x9f, 0xa6, - 0x73, 0x4f, 0xeb, 0x0d, 0xad, 0xbf, 0xc0, 0x63, - 0x5c, 0xdc, 0x46, 0x51, 0xe8, 0x8e, 0x90, 0x19, - 0xa8, 0xa4, 0x3c, 0x91, 0x79, 0xfa, 0x7e, 0x58, - 0x85, 0x13, 0x55, 0xc5, 0x19, 0x82, 0x37, 0x1b, - 0x0a, 0x02, 0x1f, 0x99, 0x6b, 0x18, 0xf1, 0x28, - 0x08, 0xa2, 0x73, 0xb8, 0x0f, 0x2e, 0xcd, 0xbf, - 0xf3, 0x86, 0x7f, 0xea, 0xef, 0xd0, 0xbb, 0xa6, - 0x21, 0xdf, 0x49, 0x73, 0x51, 0xcc, 0x36, 0xd3, - 0x3e, 0xa0, 0xf8, 0x44, 0xdf, 0xd3, 0xa6, 0xbe, - 0x8a, 0xd4, 0x57, 0xdd, 0x72, 0x94, 0x61, 0x0f, - 0x82, 0xd1, 0x07, 0xb8, 0x7c, 0x18, 0x83, 0xdf, - 0x3a, 0xe5, 0x50, 0x6a, 0x82, 0x20, 0xac, 0xa9, - 0xa8, 0xff, 0xd9, 0xf3, 0x77, 0x33, 0x5a, 0x9e, - 0x7f, 0x6d, 0xfe, 0x5d, 0x33, 0x41, 0x42, 0xe7, - 0x6c, 0x19, 0xe0, 0x44, 0x8a, 0x15, 0xf6, 0x70, - 0x98, 0xb7, 0x68, 0x4d, 0xfa, 0x97, 0x39, 0xb0, - 0x8e, 0xe8, 0x84, 0x8b, 0x75, 0x30, 0xb7, 0x7d, - 0x92, 0x69, 0x20, 0x9c, 0x81, 0xfb, 0x4b, 0xf4, - 0x01, 0x50, 0xeb, 0xce, 0x0c, 0x1c, 0x6c, 0xb5, - 0x4a, 0xd7, 0x27, 0x0c, 0xce, 0xbb, 0xe5, 0x85, - 0xf0, 0xb6, 0xee, 0xd5, 0x70, 0xdd, 0x3b, 0xfc, - 0xd4, 0x99, 0xf1, 0x33, 0xdd, 0x8b, 0xc4, 0x2f, - 0xae, 0xab, 0x74, 0x96, 0x32, 0xc7, 0x4c, 0x56, - 0x3c, 0x89, 0x0f, 0x96, 0x0b, 0x42, 0xc0, 0xcb, - 0xee, 0x0f, 0x0b, 0x8c, 0xfb, 0x7e, 0x47, 0x7b, - 0x64, 0x48, 0xfd, 0xb2, 0x00, 0x80, 0x89, 0xa5, - 0x13, 0x55, 0x62, 0xfc, 0x8f, 0xe2, 0x42, 0x03, - 0xb7, 0x4e, 0x2a, 0x79, 0xb4, 0x82, 0xea, 0x23, - 0x49, 0xda, 0xaf, 0x52, 0x63, 0x1e, 0x60, 0x03, - 0x89, 0x06, 0x44, 0x46, 0x08, 0xc3, 0xc4, 0x87, - 0x70, 0x2e, 0xda, 0x94, 0xad, 0x6b, 0xe0, 0xe4, - 0xd1, 0x8a, 0x06, 0xc2, 0xa8, 0xc0, 0xa7, 0x43, - 0x3c, 0x47, 0x52, 0x0e, 0xc3, 0x77, 0x81, 0x11, - 0x67, 0x0e, 0xa0, 0x70, 0x04, 0x47, 0x29, 0x40, - 0x86, 0x0d, 0x34, 0x56, 0xa7, 0xc9, 0x35, 0x59, - 0x68, 0xdc, 0x93, 0x81, 0x70, 0xee, 0x86, 0xd9, - 0x80, 0x06, 0x40, 0x4f, 0x1a, 0x0d, 0x40, 0x30, - 0x0b, 0xcb, 0x96, 0x47, 0xc1, 0xb7, 0x52, 0xfd, - 0x56, 0xe0, 0x72, 0x4b, 0xfb, 0xbd, 0x92, 0x45, - 0x61, 0x71, 0xc2, 0x33, 0x11, 0xbf, 0x52, 0x83, - 0x79, 0x26, 0xe0, 0x49, 0x6b, 0xb7, 0x05, 0x8b, - 0xe8, 0x0e, 0x87, 0x31, 0xd7, 0x9d, 0x8a, 0xf5, - 0xc0, 0x5f, 0x2e, 0x58, 0x4a, 0xdb, 0x11, 0xb3, - 0x6c, 0x30, 0x2a, 0x46, 0x19, 0xe3, 0x27, 0x84, - 0x1f, 0x63, 0x6e, 0xf6, 0x57, 0xc7, 0xc9, 0xd8, - 0x5e, 0xba, 0xb3, 0x87, 0xd5, 0x83, 0x26, 0x34, - 0x21, 0x9e, 0x65, 0xde, 0x42, 0xd3, 0xbe, 0x7b, - 0xbc, 0x91, 0x71, 0x44, 0x4d, 0x99, 0x3b, 0x31, - 0xe5, 0x3f, 0x11, 0x4e, 0x7f, 0x13, 0x51, 0x3b, - 0xae, 0x79, 0xc9, 0xd3, 0x81, 0x8e, 0x25, 0x40, - 0x10, 0xfc, 0x07, 0x1e, 0xf9, 0x7b, 0x9a, 0x4b, - 0x6c, 0xe3, 0xb3, 0xad, 0x1a, 0x0a, 0xdd, 0x9e, - 0x59, 0x0c, 0xa2, 0xcd, 0xae, 0x48, 0x4a, 0x38, - 0x5b, 0x47, 0x41, 0x94, 0x65, 0x6b, 0xbb, 0xeb, - 0x5b, 0xe3, 0xaf, 0x07, 0x5b, 0xd4, 0x4a, 0xa2, - 0xc9, 0x5d, 0x2f, 0x64, 0x03, 0xd7, 0x3a, 0x2c, - 0x6e, 0xce, 0x76, 0x95, 0xb4, 0xb3, 0xc0, 0xf1, - 0xe2, 0x45, 0x73, 0x7a, 0x5c, 0xab, 0xc1, 0xfc, - 0x02, 0x8d, 0x81, 0x29, 0xb3, 0xac, 0x07, 0xec, - 0x40, 0x7d, 0x45, 0xd9, 0x7a, 0x59, 0xee, 0x34, - 0xf0, 0xe9, 0xd5, 0x7b, 0x96, 0xb1, 0x3d, 0x95, - 0xcc, 0x86, 0xb5, 0xb6, 0x04, 0x2d, 0xb5, 0x92, - 0x7e, 0x76, 0xf4, 0x06, 0xa9, 0xa3, 0x12, 0x0f, - 0xb1, 0xaf, 0x26, 0xba, 0x7c, 0xfc, 0x7e, 0x1c, - 0xbc, 0x2c, 0x49, 0x97, 0x53, 0x60, 0x13, 0x0b, - 0xa6, 0x61, 0x83, 0x89, 0x42, 0xd4, 0x17, 0x0c, - 0x6c, 0x26, 0x52, 0xc3, 0xb3, 0xd4, 0x67, 0xf5, - 0xe3, 0x04, 0xb7, 0xf4, 0xcb, 0x80, 0xb8, 0xcb, - 0x77, 0x56, 0x3e, 0xaa, 0x57, 0x54, 0xee, 0xb4, - 0x2c, 0x67, 0xcf, 0xf2, 0xdc, 0xbe, 0x55, 0xf9, - 0x43, 0x1f, 0x6e, 0x22, 0x97, 0x67, 0x7f, 0xc4, - 0xef, 0xb1, 0x26, 0x31, 0x1e, 0x27, 0xdf, 0x41, - 0x80, 0x47, 0x6c, 0xe2, 0xfa, 0xa9, 0x8c, 0x2a, - 0xf6, 0xf2, 0xab, 0xf0, 0x15, 0xda, 0x6c, 0xc8, - 0xfe, 0xb5, 0x23, 0xde, 0xa9, 0x05, 0x3f, 0x06, - 0x54, 0x4c, 0xcd, 0xe1, 0xab, 0xfc, 0x0e, 0x62, - 0x33, 0x31, 0x73, 0x2c, 0x76, 0xcb, 0xb4, 0x47, - 0x1e, 0x20, 0xad, 0xd8, 0xf2, 0x31, 0xdd, 0xc4, - 0x8b, 0x0c, 0x77, 0xbe, 0xe1, 0x8b, 0x26, 0x00, - 0x02, 0x58, 0xd6, 0x8d, 0xef, 0xad, 0x74, 0x67, - 0xab, 0x3f, 0xef, 0xcb, 0x6f, 0xb0, 0xcc, 0x81, - 0x44, 0x4c, 0xaf, 0xe9, 0x49, 0x4f, 0xdb, 0xa0, - 0x25, 0xa4, 0xf0, 0x89, 0xf1, 0xbe, 0xd8, 0x10, - 0xff, 0xb1, 0x3b, 0x4b, 0xfa, 0x98, 0xf5, 0x79, - 0x6d, 0x1e, 0x69, 0x4d, 0x57, 0xb1, 0xc8, 0x19, - 0x1b, 0xbd, 0x1e, 0x8c, 0x84, 0xb7, 0x7b, 0xe8, - 0xd2, 0x2d, 0x09, 0x41, 0x41, 0x37, 0x3d, 0xb1, - 0x6f, 0x26, 0x5d, 0x71, 0x16, 0x3d, 0xb7, 0x83, - 0x27, 0x2c, 0xa7, 0xb6, 0x50, 0xbd, 0x91, 0x86, - 0xab, 0x24, 0xa1, 0x38, 0xfd, 0xea, 0x71, 0x55, - 0x7e, 0x9a, 0x07, 0x77, 0x4b, 0xfa, 0x61, 0x66, - 0x20, 0x1e, 0x28, 0x95, 0x18, 0x1b, 0xa4, 0xa0, - 0xfd, 0xc0, 0x89, 0x72, 0x43, 0xd9, 0x3b, 0x49, - 0x5a, 0x3f, 0x9d, 0xbf, 0xdb, 0xb4, 0x46, 0xea, - 0x42, 0x01, 0x77, 0x23, 0x68, 0x95, 0xb6, 0x24, - 0xb3, 0xa8, 0x6c, 0x28, 0x3b, 0x11, 0x40, 0x7e, - 0x18, 0x65, 0x6d, 0xd8, 0x24, 0x42, 0x7d, 0x88, - 0xc0, 0x52, 0xd9, 0x05, 0xe4, 0x95, 0x90, 0x87, - 0x8c, 0xf4, 0xd0, 0x6b, 0xb9, 0x83, 0x99, 0x34, - 0x6d, 0xfe, 0x54, 0x40, 0x94, 0x52, 0x21, 0x4f, - 0x14, 0x25, 0xc5, 0xd6, 0x5e, 0x95, 0xdc, 0x0a, - 0x2b, 0x89, 0x20, 0x11, 0x84, 0x48, 0xd6, 0x3a, - 0xcd, 0x5c, 0x24, 0xad, 0x62, 0xe3, 0xb1, 0x93, - 0x25, 0x8d, 0xcd, 0x7e, 0xfc, 0x27, 0xa3, 0x37, - 0xfd, 0x84, 0xfc, 0x1b, 0xb2, 0xf1, 0x27, 0x38, - 0x5a, 0xb7, 0xfc, 0xf2, 0xfa, 0x95, 0x66, 0xd4, - 0xfb, 0xba, 0xa7, 0xd7, 0xa3, 0x72, 0x69, 0x48, - 0x48, 0x8c, 0xeb, 0x28, 0x89, 0xfe, 0x33, 0x65, - 0x5a, 0x36, 0x01, 0x7e, 0x06, 0x79, 0x0a, 0x09, - 0x3b, 0x74, 0x11, 0x9a, 0x6e, 0xbf, 0xd4, 0x9e, - 0x58, 0x90, 0x49, 0x4f, 0x4d, 0x08, 0xd4, 0xe5, - 0x4a, 0x09, 0x21, 0xef, 0x8b, 0xb8, 0x74, 0x3b, - 0x91, 0xdd, 0x36, 0x85, 0x60, 0x2d, 0xfa, 0xd4, - 0x45, 0x7b, 0x45, 0x53, 0xf5, 0x47, 0x87, 0x7e, - 0xa6, 0x37, 0xc8, 0x78, 0x7a, 0x68, 0x9d, 0x8d, - 0x65, 0x2c, 0x0e, 0x91, 0x5c, 0xa2, 0x60, 0xf0, - 0x8e, 0x3f, 0xe9, 0x1a, 0xcd, 0xaa, 0xe7, 0xd5, - 0x77, 0x18, 0xaf, 0xc9, 0xbc, 0x18, 0xea, 0x48, - 0x1b, 0xfb, 0x22, 0x48, 0x70, 0x16, 0x29, 0x9e, - 0x5b, 0xc1, 0x2c, 0x66, 0x23, 0xbc, 0xf0, 0x1f, - 0xef, 0xaf, 0xe4, 0xd6, 0x04, 0x19, 0x82, 0x7a, - 0x0b, 0xba, 0x4b, 0x46, 0xb1, 0x6a, 0x85, 0x5d, - 0xb4, 0x73, 0xd6, 0x21, 0xa1, 0x71, 0x60, 0x14, - 0xee, 0x0a, 0x77, 0xc4, 0x66, 0x2e, 0xf9, 0x69, - 0x30, 0xaf, 0x41, 0x0b, 0xc8, 0x83, 0x3c, 0x53, - 0x99, 0x19, 0x27, 0x46, 0xf7, 0x41, 0x6e, 0x56, - 0xdc, 0x94, 0x28, 0x67, 0x4e, 0xb7, 0x25, 0x48, - 0x8a, 0xc2, 0xe0, 0x60, 0x96, 0xcc, 0x18, 0xf4, - 0x84, 0xdd, 0xa7, 0x5e, 0x3e, 0x05, 0x0b, 0x26, - 0x26, 0xb2, 0x5c, 0x1f, 0x57, 0x1a, 0x04, 0x7e, - 0x6a, 0xe3, 0x2f, 0xb4, 0x35, 0xb6, 0x38, 0x40, - 0x40, 0xcd, 0x6f, 0x87, 0x2e, 0xef, 0xa3, 0xd7, - 0xa9, 0xc2, 0xe8, 0x0d, 0x27, 0xdf, 0x44, 0x62, - 0x99, 0xa0, 0xfc, 0xcf, 0x81, 0x78, 0xcb, 0xfe, - 0xe5, 0xa0, 0x03, 0x4e, 0x6c, 0xd7, 0xf4, 0xaf, - 0x7a, 0xbb, 0x61, 0x82, 0xfe, 0x71, 0x89, 0xb2, - 0x22, 0x7c, 0x8e, 0x83, 0x04, 0xce, 0xf6, 0x5d, - 0x84, 0x8f, 0x95, 0x6a, 0x7f, 0xad, 0xfd, 0x32, - 0x9c, 0x5e, 0xe4, 0x9c, 0x89, 0x60, 0x54, 0xaa, - 0x96, 0x72, 0xd2, 0xd7, 0x36, 0x85, 0xa9, 0x45, - 0xd2, 0x2a, 0xa1, 0x81, 0x49, 0x6f, 0x7e, 0x04, - 0xfa, 0xe2, 0xfe, 0x90, 0x26, 0x77, 0x5a, 0x33, - 0xb8, 0x04, 0x9a, 0x7a, 0xe6, 0x4c, 0x4f, 0xad, - 0x72, 0x96, 0x08, 0x28, 0x58, 0x13, 0xf8, 0xc4, - 0x1c, 0xf0, 0xc3, 0x45, 0x95, 0x49, 0x20, 0x8c, - 0x9f, 0x39, 0x70, 0xe1, 0x77, 0xfe, 0xd5, 0x4b, - 0xaf, 0x86, 0xda, 0xef, 0x22, 0x06, 0x83, 0x36, - 0x29, 0x12, 0x11, 0x40, 0xbc, 0x3b, 0x86, 0xaa, - 0xaa, 0x65, 0x60, 0xc3, 0x80, 0xca, 0xed, 0xa9, - 0xf3, 0xb0, 0x79, 0x96, 0xa2, 0x55, 0x27, 0x28, - 0x55, 0x73, 0x26, 0xa5, 0x50, 0xea, 0x92, 0x4b, - 0x3c, 0x5c, 0x82, 0x33, 0xf0, 0x01, 0x3f, 0x03, - 0xc1, 0x08, 0x05, 0xbf, 0x98, 0xf4, 0x9b, 0x6d, - 0xa5, 0xa8, 0xb4, 0x82, 0x0c, 0x06, 0xfa, 0xff, - 0x2d, 0x08, 0xf3, 0x05, 0x4f, 0x57, 0x2a, 0x39, - 0xd4, 0x83, 0x0d, 0x75, 0x51, 0xd8, 0x5b, 0x1b, - 0xd3, 0x51, 0x5a, 0x32, 0x2a, 0x9b, 0x32, 0xb2, - 0xf2, 0xa4, 0x96, 0x12, 0xf2, 0xae, 0x40, 0x34, - 0x67, 0xa8, 0xf5, 0x44, 0xd5, 0x35, 0x53, 0xfe, - 0xa3, 0x60, 0x96, 0x63, 0x0f, 0x1f, 0x6e, 0xb0, - 0x5a, 0x42, 0xa6, 0xfc, 0x51, 0x0b, 0x60, 0x27, - 0xbc, 0x06, 0x71, 0xed, 0x65, 0x5b, 0x23, 0x86, - 0x4a, 0x07, 0x3b, 0x22, 0x07, 0x46, 0xe6, 0x90, - 0x3e, 0xf3, 0x25, 0x50, 0x1b, 0x4c, 0x7f, 0x03, - 0x08, 0xa8, 0x36, 0x6b, 0x87, 0xe5, 0xe3, 0xdb, - 0x9a, 0x38, 0x83, 0xff, 0x9f, 0x1a, 0x9f, 0x57, - 0xa4, 0x2a, 0xf6, 0x37, 0xbc, 0x1a, 0xff, 0xc9, - 0x1e, 0x35, 0x0c, 0xc3, 0x7c, 0xa3, 0xb2, 0xe5, - 0xd2, 0xc6, 0xb4, 0x57, 0x47, 0xe4, 0x32, 0x16, - 0x6d, 0xa9, 0xae, 0x64, 0xe6, 0x2d, 0x8d, 0xc5, - 0x8d, 0x50, 0x8e, 0xe8, 0x1a, 0x22, 0x34, 0x2a, - 0xd9, 0xeb, 0x51, 0x90, 0x4a, 0xb1, 0x41, 0x7d, - 0x64, 0xf9, 0xb9, 0x0d, 0xf6, 0x23, 0x33, 0xb0, - 0x33, 0xf4, 0xf7, 0x3f, 0x27, 0x84, 0xc6, 0x0f, - 0x54, 0xa5, 0xc0, 0x2e, 0xec, 0x0b, 0x3a, 0x48, - 0x6e, 0x80, 0x35, 0x81, 0x43, 0x9b, 0x90, 0xb1, - 0xd0, 0x2b, 0xea, 0x21, 0xdc, 0xda, 0x5b, 0x09, - 0xf4, 0xcc, 0x10, 0xb4, 0xc7, 0xfe, 0x79, 0x51, - 0xc3, 0xc5, 0xac, 0x88, 0x74, 0x84, 0x0b, 0x4b, - 0xca, 0x79, 0x16, 0x29, 0xfb, 0x69, 0x54, 0xdf, - 0x41, 0x7e, 0xe9, 0xc7, 0x8e, 0xea, 0xa5, 0xfe, - 0xfc, 0x76, 0x0e, 0x90, 0xc4, 0x92, 0x38, 0xad, - 0x7b, 0x48, 0xe6, 0x6e, 0xf7, 0x21, 0xfd, 0x4e, - 0x93, 0x0a, 0x7b, 0x41, 0x83, 0x68, 0xfb, 0x57, - 0x51, 0x76, 0x34, 0xa9, 0x6c, 0x00, 0xaa, 0x4f, - 0x66, 0x65, 0x98, 0x4a, 0x4f, 0xa3, 0xa0, 0xef, - 0x69, 0x3f, 0xe3, 0x1c, 0x92, 0x8c, 0xfd, 0xd8, - 0xe8, 0xde, 0x7c, 0x7f, 0x3e, 0x84, 0x8e, 0x69, - 0x3c, 0xf1, 0xf2, 0x05, 0x46, 0xdc, 0x2f, 0x9d, - 0x5e, 0x6e, 0x4c, 0xfb, 0xb5, 0x99, 0x2a, 0x59, - 0x63, 0xc1, 0x34, 0xbc, 0x57, 0xc0, 0x0d, 0xb9, - 0x61, 0x25, 0xf3, 0x33, 0x23, 0x51, 0xb6, 0x0d, - 0x07, 0xa6, 0xab, 0x94, 0x4a, 0xb7, 0x2a, 0xea, - 0xee, 0xac, 0xa3, 0xc3, 0x04, 0x8b, 0x0e, 0x56, - 0xfe, 0x44, 0xa7, 0x39, 0xe2, 0xed, 0xed, 0xb4, - 0x22, 0x2b, 0xac, 0x12, 0x32, 0x28, 0x91, 0xd8, - 0xa5, 0xab, 0xff, 0x5f, 0xe0, 0x4b, 0xda, 0x78, - 0x17, 0xda, 0xf1, 0x01, 0x5b, 0xcd, 0xe2, 0x5f, - 0x50, 0x45, 0x73, 0x2b, 0xe4, 0x76, 0x77, 0xf4, - 0x64, 0x1d, 0x43, 0xfb, 0x84, 0x7a, 0xea, 0x91, - 0xae, 0xf9, 0x9e, 0xb7, 0xb4, 0xb0, 0x91, 0x5f, - 0x16, 0x35, 0x9a, 0x11, 0xb8, 0xc7, 0xc1, 0x8c, - 0xc6, 0x10, 0x8d, 0x2f, 0x63, 0x4a, 0xa7, 0x57, - 0x3a, 0x51, 0xd6, 0x32, 0x2d, 0x64, 0x72, 0xd4, - 0x66, 0xdc, 0x10, 0xa6, 0x67, 0xd6, 0x04, 0x23, - 0x9d, 0x0a, 0x11, 0x77, 0xdd, 0x37, 0x94, 0x17, - 0x3c, 0xbf, 0x8b, 0x65, 0xb0, 0x2e, 0x5e, 0x66, - 0x47, 0x64, 0xac, 0xdd, 0xf0, 0x84, 0xfd, 0x39, - 0xfa, 0x15, 0x5d, 0xef, 0xae, 0xca, 0xc1, 0x36, - 0xa7, 0x5c, 0xbf, 0xc7, 0x08, 0xc2, 0x66, 0x00, - 0x74, 0x74, 0x4e, 0x27, 0x3f, 0x55, 0x8a, 0xb7, - 0x38, 0x66, 0x83, 0x6d, 0xcf, 0x99, 0x9e, 0x60, - 0x8f, 0xdd, 0x2e, 0x62, 0x22, 0x0e, 0xef, 0x0c, - 0x98, 0xa7, 0x85, 0x74, 0x3b, 0x9d, 0xec, 0x9e, - 0xa9, 0x19, 0x72, 0xa5, 0x7f, 0x2c, 0x39, 0xb7, - 0x7d, 0xb7, 0xf1, 0x12, 0x65, 0x27, 0x4b, 0x5a, - 0xde, 0x17, 0xfe, 0xad, 0x44, 0xf3, 0x20, 0x4d, - 0xfd, 0xe4, 0x1f, 0xb5, 0x81, 0xb0, 0x36, 0x37, - 0x08, 0x6f, 0xc3, 0x0c, 0xe9, 0x85, 0x98, 0x82, - 0xa9, 0x62, 0x0c, 0xc4, 0x97, 0xc0, 0x50, 0xc8, - 0xa7, 0x3c, 0x50, 0x9f, 0x43, 0xb9, 0xcd, 0x5e, - 0x4d, 0xfa, 0x1c, 0x4b, 0x0b, 0xa9, 0x98, 0x85, - 0x38, 0x92, 0xac, 0x8d, 0xe4, 0xad, 0x9b, 0x98, - 0xab, 0xd9, 0x38, 0xac, 0x62, 0x52, 0xa3, 0x22, - 0x63, 0x0f, 0xbf, 0x95, 0x48, 0xdf, 0x69, 0xe7, - 0x8b, 0x33, 0xd5, 0xb2, 0xbd, 0x05, 0x49, 0x49, - 0x9d, 0x57, 0x73, 0x19, 0x33, 0xae, 0xfa, 0x33, - 0xf1, 0x19, 0xa8, 0x80, 0xce, 0x04, 0x9f, 0xbc, - 0x1d, 0x65, 0x82, 0x1b, 0xe5, 0x3a, 0x51, 0xc8, - 0x1c, 0x21, 0xe3, 0x5d, 0xf3, 0x7d, 0x9b, 0x2f, - 0x2c, 0x1d, 0x4a, 0x7f, 0x9b, 0x68, 0x35, 0xa3, - 0xb2, 0x50, 0xf7, 0x62, 0x79, 0xcd, 0xf4, 0x98, - 0x4f, 0xe5, 0x63, 0x7c, 0x3e, 0x45, 0x31, 0x8c, - 0x16, 0xa0, 0x12, 0xc8, 0x58, 0xce, 0x39, 0xa6, - 0xbc, 0x54, 0xdb, 0xc5, 0xe0, 0xd5, 0xba, 0xbc, - 0xb9, 0x04, 0xf4, 0x8d, 0xe8, 0x2f, 0x15, 0x9d, -}; - -/* 100 test cases */ -static struct crc_test { - u32 crc; /* random starting crc */ - u32 start; /* random 6 bit offset in buf */ - u32 length; /* random 11 bit length of test */ - u32 crc_le; /* expected crc32_le result */ - u32 crc_be; /* expected crc32_be result */ - u32 crc32c_le; /* expected crc32c_le result */ -} const test[] __initconst = -{ - {0x674bf11d, 0x00000038, 0x00000542, 0x0af6d466, 0xd8b6e4c1, 0xf6e93d6c}, - {0x35c672c6, 0x0000003a, 0x000001aa, 0xc6d3dfba, 0x28aaf3ad, 0x0fe92aca}, - {0x496da28e, 0x00000039, 0x000005af, 0xd933660f, 0x5d57e81f, 0x52e1ebb8}, - {0x09a9b90e, 0x00000027, 0x000001f8, 0xb45fe007, 0xf45fca9a, 0x0798af9a}, - {0xdc97e5a9, 0x00000025, 0x000003b6, 0xf81a3562, 0xe0126ba2, 0x18eb3152}, - {0x47c58900, 0x0000000a, 0x000000b9, 0x8e58eccf, 0xf3afc793, 0xd00d08c7}, - {0x292561e8, 0x0000000c, 0x00000403, 0xa2ba8aaf, 0x0b797aed, 0x8ba966bc}, - {0x415037f6, 0x00000003, 0x00000676, 0xa17d52e8, 0x7f0fdf35, 0x11d694a2}, - {0x3466e707, 0x00000026, 0x00000042, 0x258319be, 0x75c484a2, 0x6ab3208d}, - {0xafd1281b, 0x00000023, 0x000002ee, 0x4428eaf8, 0x06c7ad10, 0xba4603c5}, - {0xd3857b18, 0x00000028, 0x000004a2, 0x5c430821, 0xb062b7cb, 0xe6071c6f}, - {0x1d825a8f, 0x0000002b, 0x0000050b, 0xd2c45f0c, 0xd68634e0, 0x179ec30a}, - {0x5033e3bc, 0x0000000b, 0x00000078, 0xa3ea4113, 0xac6d31fb, 0x0903beb8}, - {0x94f1fb5e, 0x0000000f, 0x000003a2, 0xfbfc50b1, 0x3cfe50ed, 0x6a7cb4fa}, - {0xc9a0fe14, 0x00000009, 0x00000473, 0x5fb61894, 0x87070591, 0xdb535801}, - {0x88a034b1, 0x0000001c, 0x000005ad, 0xc1b16053, 0x46f95c67, 0x92bed597}, - {0xf0f72239, 0x00000020, 0x0000026d, 0xa6fa58f3, 0xf8c2c1dd, 0x192a3f1b}, - {0xcc20a5e3, 0x0000003b, 0x0000067a, 0x7740185a, 0x308b979a, 0xccbaec1a}, - {0xce589c95, 0x0000002b, 0x00000641, 0xd055e987, 0x40aae25b, 0x7eabae4d}, - {0x78edc885, 0x00000035, 0x000005be, 0xa39cb14b, 0x035b0d1f, 0x28c72982}, - {0x9d40a377, 0x0000003b, 0x00000038, 0x1f47ccd2, 0x197fbc9d, 0xc3cd4d18}, - {0x703d0e01, 0x0000003c, 0x000006f1, 0x88735e7c, 0xfed57c5a, 0xbca8f0e7}, - {0x776bf505, 0x0000000f, 0x000005b2, 0x5cc4fc01, 0xf32efb97, 0x713f60b3}, - {0x4a3e7854, 0x00000027, 0x000004b8, 0x8d923c82, 0x0cbfb4a2, 0xebd08fd5}, - {0x209172dd, 0x0000003b, 0x00000356, 0xb89e9c2b, 0xd7868138, 0x64406c59}, - {0x3ba4cc5b, 0x0000002f, 0x00000203, 0xe51601a9, 0x5b2a1032, 0x7421890e}, - {0xfc62f297, 0x00000000, 0x00000079, 0x71a8e1a2, 0x5d88685f, 0xe9347603}, - {0x64280b8b, 0x00000016, 0x000007ab, 0x0fa7a30c, 0xda3a455f, 0x1bef9060}, - {0x97dd724b, 0x00000033, 0x000007ad, 0x5788b2f4, 0xd7326d32, 0x34720072}, - {0x61394b52, 0x00000035, 0x00000571, 0xc66525f1, 0xcabe7fef, 0x48310f59}, - {0x29b4faff, 0x00000024, 0x0000006e, 0xca13751e, 0x993648e0, 0x783a4213}, - {0x29bfb1dc, 0x0000000b, 0x00000244, 0x436c43f7, 0x429f7a59, 0x9e8efd41}, - {0x86ae934b, 0x00000035, 0x00000104, 0x0760ec93, 0x9cf7d0f4, 0xfc3d34a5}, - {0xc4c1024e, 0x0000002e, 0x000006b1, 0x6516a3ec, 0x19321f9c, 0x17a52ae2}, - {0x3287a80a, 0x00000026, 0x00000496, 0x0b257eb1, 0x754ebd51, 0x886d935a}, - {0xa4db423e, 0x00000023, 0x0000045d, 0x9b3a66dc, 0x873e9f11, 0xeaaeaeb2}, - {0x7a1078df, 0x00000015, 0x0000014a, 0x8c2484c5, 0x6a628659, 0x8e900a4b}, - {0x6048bd5b, 0x00000006, 0x0000006a, 0x897e3559, 0xac9961af, 0xd74662b1}, - {0xd8f9ea20, 0x0000003d, 0x00000277, 0x60eb905b, 0xed2aaf99, 0xd26752ba}, - {0xea5ec3b4, 0x0000002a, 0x000004fe, 0x869965dc, 0x6c1f833b, 0x8b1fcd62}, - {0x2dfb005d, 0x00000016, 0x00000345, 0x6a3b117e, 0xf05e8521, 0xf54342fe}, - {0x5a214ade, 0x00000020, 0x000005b6, 0x467f70be, 0xcb22ccd3, 0x5b95b988}, - {0xf0ab9cca, 0x00000032, 0x00000515, 0xed223df3, 0x7f3ef01d, 0x2e1176be}, - {0x91b444f9, 0x0000002e, 0x000007f8, 0x84e9a983, 0x5676756f, 0x66120546}, - {0x1b5d2ddb, 0x0000002e, 0x0000012c, 0xba638c4c, 0x3f42047b, 0xf256a5cc}, - {0xd824d1bb, 0x0000003a, 0x000007b5, 0x6288653b, 0x3a3ebea0, 0x4af1dd69}, - {0x0470180c, 0x00000034, 0x000001f0, 0x9d5b80d6, 0x3de08195, 0x56f0a04a}, - {0xffaa3a3f, 0x00000036, 0x00000299, 0xf3a82ab8, 0x53e0c13d, 0x74f6b6b2}, - {0x6406cfeb, 0x00000023, 0x00000600, 0xa920b8e8, 0xe4e2acf4, 0x085951fd}, - {0xb24aaa38, 0x0000003e, 0x000004a1, 0x657cc328, 0x5077b2c3, 0xc65387eb}, - {0x58b2ab7c, 0x00000039, 0x000002b4, 0x3a17ee7e, 0x9dcb3643, 0x1ca9257b}, - {0x3db85970, 0x00000006, 0x000002b6, 0x95268b59, 0xb9812c10, 0xfd196d76}, - {0x857830c5, 0x00000003, 0x00000590, 0x4ef439d5, 0xf042161d, 0x5ef88339}, - {0xe1fcd978, 0x0000003e, 0x000007d8, 0xae8d8699, 0xce0a1ef5, 0x2c3714d9}, - {0xb982a768, 0x00000016, 0x000006e0, 0x62fad3df, 0x5f8a067b, 0x58576548}, - {0x1d581ce8, 0x0000001e, 0x0000058b, 0xf0f5da53, 0x26e39eee, 0xfd7c57de}, - {0x2456719b, 0x00000025, 0x00000503, 0x4296ac64, 0xd50e4c14, 0xd5fedd59}, - {0xfae6d8f2, 0x00000000, 0x0000055d, 0x057fdf2e, 0x2a31391a, 0x1cc3b17b}, - {0xcba828e3, 0x00000039, 0x000002ce, 0xe3f22351, 0x8f00877b, 0x270eed73}, - {0x13d25952, 0x0000000a, 0x0000072d, 0x76d4b4cc, 0x5eb67ec3, 0x91ecbb11}, - {0x0342be3f, 0x00000015, 0x00000599, 0xec75d9f1, 0x9d4d2826, 0x05ed8d0c}, - {0xeaa344e0, 0x00000014, 0x000004d8, 0x72a4c981, 0x2064ea06, 0x0b09ad5b}, - {0xbbb52021, 0x0000003b, 0x00000272, 0x04af99fc, 0xaf042d35, 0xf8d511fb}, - {0xb66384dc, 0x0000001d, 0x000007fc, 0xd7629116, 0x782bd801, 0x5ad832cc}, - {0x616c01b6, 0x00000022, 0x000002c8, 0x5b1dab30, 0x783ce7d2, 0x1214d196}, - {0xce2bdaad, 0x00000016, 0x0000062a, 0x932535c8, 0x3f02926d, 0x5747218a}, - {0x00fe84d7, 0x00000005, 0x00000205, 0x850e50aa, 0x753d649c, 0xde8f14de}, - {0xbebdcb4c, 0x00000006, 0x0000055d, 0xbeaa37a2, 0x2d8c9eba, 0x3563b7b9}, - {0xd8b1a02a, 0x00000010, 0x00000387, 0x5017d2fc, 0x503541a5, 0x071475d0}, - {0x3b96cad2, 0x00000036, 0x00000347, 0x1d2372ae, 0x926cd90b, 0x54c79d60}, - {0xc94c1ed7, 0x00000005, 0x0000038b, 0x9e9fdb22, 0x144a9178, 0x4c53eee6}, - {0x1aad454e, 0x00000025, 0x000002b2, 0xc3f6315c, 0x5c7a35b3, 0x10137a3c}, - {0xa4fec9a6, 0x00000000, 0x000006d6, 0x90be5080, 0xa4107605, 0xaa9d6c73}, - {0x1bbe71e2, 0x0000001f, 0x000002fd, 0x4e504c3b, 0x284ccaf1, 0xb63d23e7}, - {0x4201c7e4, 0x00000002, 0x000002b7, 0x7822e3f9, 0x0cc912a9, 0x7f53e9cf}, - {0x23fddc96, 0x00000003, 0x00000627, 0x8a385125, 0x07767e78, 0x13c1cd83}, - {0xd82ba25c, 0x00000016, 0x0000063e, 0x98e4148a, 0x283330c9, 0x49ff5867}, - {0x786f2032, 0x0000002d, 0x0000060f, 0xf201600a, 0xf561bfcd, 0x8467f211}, - {0xfebe4e1f, 0x0000002a, 0x000004f2, 0x95e51961, 0xfd80dcab, 0x3f9683b2}, - {0x1a6e0a39, 0x00000008, 0x00000672, 0x8af6c2a5, 0x78dd84cb, 0x76a3f874}, - {0x56000ab8, 0x0000000e, 0x000000e5, 0x36bacb8f, 0x22ee1f77, 0x863b702f}, - {0x4717fe0c, 0x00000000, 0x000006ec, 0x8439f342, 0x5c8e03da, 0xdc6c58ff}, - {0xd5d5d68e, 0x0000003c, 0x000003a3, 0x46fff083, 0x177d1b39, 0x0622cc95}, - {0xc25dd6c6, 0x00000024, 0x000006c0, 0x5ceb8eb4, 0x892b0d16, 0xe85605cd}, - {0xe9b11300, 0x00000023, 0x00000683, 0x07a5d59a, 0x6c6a3208, 0x31da5f06}, - {0x95cd285e, 0x00000001, 0x00000047, 0x7b3a4368, 0x0202c07e, 0xa1f2e784}, - {0xd9245a25, 0x0000001e, 0x000003a6, 0xd33c1841, 0x1936c0d5, 0xb07cc616}, - {0x103279db, 0x00000006, 0x0000039b, 0xca09b8a0, 0x77d62892, 0xbf943b6c}, - {0x1cba3172, 0x00000027, 0x000001c8, 0xcb377194, 0xebe682db, 0x2c01af1c}, - {0x8f613739, 0x0000000c, 0x000001df, 0xb4b0bc87, 0x7710bd43, 0x0fe5f56d}, - {0x1c6aa90d, 0x0000001b, 0x0000053c, 0x70559245, 0xda7894ac, 0xf8943b2d}, - {0xaabe5b93, 0x0000003d, 0x00000715, 0xcdbf42fa, 0x0c3b99e7, 0xe4d89272}, - {0xf15dd038, 0x00000006, 0x000006db, 0x6e104aea, 0x8d5967f2, 0x7c2f6bbb}, - {0x584dd49c, 0x00000020, 0x000007bc, 0x36b6cfd6, 0xad4e23b2, 0xabbf388b}, - {0x5d8c9506, 0x00000020, 0x00000470, 0x4c62378e, 0x31d92640, 0x1dca1f4e}, - {0xb80d17b0, 0x00000032, 0x00000346, 0x22a5bb88, 0x9a7ec89f, 0x5c170e23}, - {0xdaf0592e, 0x00000023, 0x000007b0, 0x3cab3f99, 0x9b1fdd99, 0xc0e9d672}, - {0x4793cc85, 0x0000000d, 0x00000706, 0xe82e04f6, 0xed3db6b7, 0xc18bdc86}, - {0x82ebf64e, 0x00000009, 0x000007c3, 0x69d590a9, 0x9efa8499, 0xa874fcdd}, - {0xb18a0319, 0x00000026, 0x000007db, 0x1cf98dcc, 0x8fa9ad6a, 0x9dc0bb48}, -}; - -#include <linux/time.h> - -static int __init crc32c_test(void) -{ - int i; - int errors = 0; - int bytes = 0; - u64 nsec; - unsigned long flags; - - /* keep static to prevent cache warming code from - * getting eliminated by the compiler */ - static u32 crc; - - /* pre-warm the cache */ - for (i = 0; i < 100; i++) { - bytes += 2*test[i].length; - - crc ^= __crc32c_le(test[i].crc, test_buf + - test[i].start, test[i].length); - } - - /* reduce OS noise */ - local_irq_save(flags); - local_irq_disable(); - - nsec = ktime_get_ns(); - for (i = 0; i < 100; i++) { - if (test[i].crc32c_le != __crc32c_le(test[i].crc, test_buf + - test[i].start, test[i].length)) - errors++; - } - nsec = ktime_get_ns() - nsec; - - local_irq_restore(flags); - local_irq_enable(); - - pr_info("crc32c: CRC_LE_BITS = %d\n", CRC_LE_BITS); - - if (errors) - pr_warn("crc32c: %d self tests failed\n", errors); - else { - pr_info("crc32c: self tests passed, processed %d bytes in %lld nsec\n", - bytes, nsec); - } - - return 0; -} - -static int __init crc32c_combine_test(void) -{ - int i, j; - int errors = 0, runs = 0; - - for (i = 0; i < 10; i++) { - u32 crc_full; - - crc_full = __crc32c_le(test[i].crc, test_buf + test[i].start, - test[i].length); - for (j = 0; j <= test[i].length; ++j) { - u32 crc1, crc2; - u32 len1 = j, len2 = test[i].length - j; - - crc1 = __crc32c_le(test[i].crc, test_buf + - test[i].start, len1); - crc2 = __crc32c_le(0, test_buf + test[i].start + - len1, len2); - - if (!(crc_full == __crc32c_le_combine(crc1, crc2, len2) && - crc_full == test[i].crc32c_le)) - errors++; - runs++; - cond_resched(); - } - } - - if (errors) - pr_warn("crc32c_combine: %d/%d self tests failed\n", errors, runs); - else - pr_info("crc32c_combine: %d self tests passed\n", runs); - - return 0; -} - -static int __init crc32_test(void) -{ - int i; - int errors = 0; - int bytes = 0; - u64 nsec; - unsigned long flags; - - /* keep static to prevent cache warming code from - * getting eliminated by the compiler */ - static u32 crc; - - /* pre-warm the cache */ - for (i = 0; i < 100; i++) { - bytes += 2*test[i].length; - - crc ^= crc32_le(test[i].crc, test_buf + - test[i].start, test[i].length); - - crc ^= crc32_be(test[i].crc, test_buf + - test[i].start, test[i].length); - } - - /* reduce OS noise */ - local_irq_save(flags); - local_irq_disable(); - - nsec = ktime_get_ns(); - for (i = 0; i < 100; i++) { - if (test[i].crc_le != crc32_le(test[i].crc, test_buf + - test[i].start, test[i].length)) - errors++; - - if (test[i].crc_be != crc32_be(test[i].crc, test_buf + - test[i].start, test[i].length)) - errors++; - } - nsec = ktime_get_ns() - nsec; - - local_irq_restore(flags); - local_irq_enable(); - - pr_info("crc32: CRC_LE_BITS = %d, CRC_BE BITS = %d\n", - CRC_LE_BITS, CRC_BE_BITS); - - if (errors) - pr_warn("crc32: %d self tests failed\n", errors); - else { - pr_info("crc32: self tests passed, processed %d bytes in %lld nsec\n", - bytes, nsec); - } - - return 0; -} - -static int __init crc32_combine_test(void) -{ - int i, j; - int errors = 0, runs = 0; - - for (i = 0; i < 10; i++) { - u32 crc_full; - - crc_full = crc32_le(test[i].crc, test_buf + test[i].start, - test[i].length); - for (j = 0; j <= test[i].length; ++j) { - u32 crc1, crc2; - u32 len1 = j, len2 = test[i].length - j; - - crc1 = crc32_le(test[i].crc, test_buf + - test[i].start, len1); - crc2 = crc32_le(0, test_buf + test[i].start + - len1, len2); - - if (!(crc_full == crc32_le_combine(crc1, crc2, len2) && - crc_full == test[i].crc_le)) - errors++; - runs++; - cond_resched(); - } - } - - if (errors) - pr_warn("crc32_combine: %d/%d self tests failed\n", errors, runs); - else - pr_info("crc32_combine: %d self tests passed\n", runs); - - return 0; -} - -static int __init crc32test_init(void) -{ - crc32_test(); - crc32c_test(); - - crc32_combine_test(); - crc32c_combine_test(); - - return 0; -} - -static void __exit crc32_exit(void) -{ -} - -module_init(crc32test_init); -module_exit(crc32_exit); -#endif /* CONFIG_CRC32_SELFTEST */ diff --git a/lib/crc32test.c b/lib/crc32test.c new file mode 100644 index 000000000000..97d6a57cefcc --- /dev/null +++ b/lib/crc32test.c @@ -0,0 +1,856 @@ +/* + * Aug 8, 2011 Bob Pearson with help from Joakim Tjernlund and George Spelvin + * cleaned up code to current version of sparse and added the slicing-by-8 + * algorithm to the closely similar existing slicing-by-4 algorithm. + * + * Oct 15, 2000 Matt Domsch <Matt_Domsch@dell.com> + * Nicer crc32 functions/docs submitted by linux@horizon.com. Thanks! + * Code was from the public domain, copyright abandoned. Code was + * subsequently included in the kernel, thus was re-licensed under the + * GNU GPL v2. + * + * Oct 12, 2000 Matt Domsch <Matt_Domsch@dell.com> + * Same crc32 function was used in 5 other places in the kernel. + * I made one version, and deleted the others. + * There are various incantations of crc32(). Some use a seed of 0 or ~0. + * Some xor at the end with ~0. The generic crc32() function takes + * seed as an argument, and doesn't xor at the end. Then individual + * users can do whatever they need. + * drivers/net/smc9194.c uses seed ~0, doesn't xor with ~0. + * fs/jffs2 uses seed 0, doesn't xor with ~0. + * fs/partitions/efi.c uses seed ~0, xor's with ~0. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <linux/crc32.h> +#include <linux/module.h> +#include <linux/sched.h> + +#include "crc32defs.h" + +/* 4096 random bytes */ +static u8 const __aligned(8) test_buf[] __initconst = +{ + 0x5b, 0x85, 0x21, 0xcb, 0x09, 0x68, 0x7d, 0x30, + 0xc7, 0x69, 0xd7, 0x30, 0x92, 0xde, 0x59, 0xe4, + 0xc9, 0x6e, 0x8b, 0xdb, 0x98, 0x6b, 0xaa, 0x60, + 0xa8, 0xb5, 0xbc, 0x6c, 0xa9, 0xb1, 0x5b, 0x2c, + 0xea, 0xb4, 0x92, 0x6a, 0x3f, 0x79, 0x91, 0xe4, + 0xe9, 0x70, 0x51, 0x8c, 0x7f, 0x95, 0x6f, 0x1a, + 0x56, 0xa1, 0x5c, 0x27, 0x03, 0x67, 0x9f, 0x3a, + 0xe2, 0x31, 0x11, 0x29, 0x6b, 0x98, 0xfc, 0xc4, + 0x53, 0x24, 0xc5, 0x8b, 0xce, 0x47, 0xb2, 0xb9, + 0x32, 0xcb, 0xc1, 0xd0, 0x03, 0x57, 0x4e, 0xd4, + 0xe9, 0x3c, 0xa1, 0x63, 0xcf, 0x12, 0x0e, 0xca, + 0xe1, 0x13, 0xd1, 0x93, 0xa6, 0x88, 0x5c, 0x61, + 0x5b, 0xbb, 0xf0, 0x19, 0x46, 0xb4, 0xcf, 0x9e, + 0xb6, 0x6b, 0x4c, 0x3a, 0xcf, 0x60, 0xf9, 0x7a, + 0x8d, 0x07, 0x63, 0xdb, 0x40, 0xe9, 0x0b, 0x6f, + 0xad, 0x97, 0xf1, 0xed, 0xd0, 0x1e, 0x26, 0xfd, + 0xbf, 0xb7, 0xc8, 0x04, 0x94, 0xf8, 0x8b, 0x8c, + 0xf1, 0xab, 0x7a, 0xd4, 0xdd, 0xf3, 0xe8, 0x88, + 0xc3, 0xed, 0x17, 0x8a, 0x9b, 0x40, 0x0d, 0x53, + 0x62, 0x12, 0x03, 0x5f, 0x1b, 0x35, 0x32, 0x1f, + 0xb4, 0x7b, 0x93, 0x78, 0x0d, 0xdb, 0xce, 0xa4, + 0xc0, 0x47, 0xd5, 0xbf, 0x68, 0xe8, 0x5d, 0x74, + 0x8f, 0x8e, 0x75, 0x1c, 0xb2, 0x4f, 0x9a, 0x60, + 0xd1, 0xbe, 0x10, 0xf4, 0x5c, 0xa1, 0x53, 0x09, + 0xa5, 0xe0, 0x09, 0x54, 0x85, 0x5c, 0xdc, 0x07, + 0xe7, 0x21, 0x69, 0x7b, 0x8a, 0xfd, 0x90, 0xf1, + 0x22, 0xd0, 0xb4, 0x36, 0x28, 0xe6, 0xb8, 0x0f, + 0x39, 0xde, 0xc8, 0xf3, 0x86, 0x60, 0x34, 0xd2, + 0x5e, 0xdf, 0xfd, 0xcf, 0x0f, 0xa9, 0x65, 0xf0, + 0xd5, 0x4d, 0x96, 0x40, 0xe3, 0xdf, 0x3f, 0x95, + 0x5a, 0x39, 0x19, 0x93, 0xf4, 0x75, 0xce, 0x22, + 0x00, 0x1c, 0x93, 0xe2, 0x03, 0x66, 0xf4, 0x93, + 0x73, 0x86, 0x81, 0x8e, 0x29, 0x44, 0x48, 0x86, + 0x61, 0x7c, 0x48, 0xa3, 0x43, 0xd2, 0x9c, 0x8d, + 0xd4, 0x95, 0xdd, 0xe1, 0x22, 0x89, 0x3a, 0x40, + 0x4c, 0x1b, 0x8a, 0x04, 0xa8, 0x09, 0x69, 0x8b, + 0xea, 0xc6, 0x55, 0x8e, 0x57, 0xe6, 0x64, 0x35, + 0xf0, 0xc7, 0x16, 0x9f, 0x5d, 0x5e, 0x86, 0x40, + 0x46, 0xbb, 0xe5, 0x45, 0x88, 0xfe, 0xc9, 0x63, + 0x15, 0xfb, 0xf5, 0xbd, 0x71, 0x61, 0xeb, 0x7b, + 0x78, 0x70, 0x07, 0x31, 0x03, 0x9f, 0xb2, 0xc8, + 0xa7, 0xab, 0x47, 0xfd, 0xdf, 0xa0, 0x78, 0x72, + 0xa4, 0x2a, 0xe4, 0xb6, 0xba, 0xc0, 0x1e, 0x86, + 0x71, 0xe6, 0x3d, 0x18, 0x37, 0x70, 0xe6, 0xff, + 0xe0, 0xbc, 0x0b, 0x22, 0xa0, 0x1f, 0xd3, 0xed, + 0xa2, 0x55, 0x39, 0xab, 0xa8, 0x13, 0x73, 0x7c, + 0x3f, 0xb2, 0xd6, 0x19, 0xac, 0xff, 0x99, 0xed, + 0xe8, 0xe6, 0xa6, 0x22, 0xe3, 0x9c, 0xf1, 0x30, + 0xdc, 0x01, 0x0a, 0x56, 0xfa, 0xe4, 0xc9, 0x99, + 0xdd, 0xa8, 0xd8, 0xda, 0x35, 0x51, 0x73, 0xb4, + 0x40, 0x86, 0x85, 0xdb, 0x5c, 0xd5, 0x85, 0x80, + 0x14, 0x9c, 0xfd, 0x98, 0xa9, 0x82, 0xc5, 0x37, + 0xff, 0x32, 0x5d, 0xd0, 0x0b, 0xfa, 0xdc, 0x04, + 0x5e, 0x09, 0xd2, 0xca, 0x17, 0x4b, 0x1a, 0x8e, + 0x15, 0xe1, 0xcc, 0x4e, 0x52, 0x88, 0x35, 0xbd, + 0x48, 0xfe, 0x15, 0xa0, 0x91, 0xfd, 0x7e, 0x6c, + 0x0e, 0x5d, 0x79, 0x1b, 0x81, 0x79, 0xd2, 0x09, + 0x34, 0x70, 0x3d, 0x81, 0xec, 0xf6, 0x24, 0xbb, + 0xfb, 0xf1, 0x7b, 0xdf, 0x54, 0xea, 0x80, 0x9b, + 0xc7, 0x99, 0x9e, 0xbd, 0x16, 0x78, 0x12, 0x53, + 0x5e, 0x01, 0xa7, 0x4e, 0xbd, 0x67, 0xe1, 0x9b, + 0x4c, 0x0e, 0x61, 0x45, 0x97, 0xd2, 0xf0, 0x0f, + 0xfe, 0x15, 0x08, 0xb7, 0x11, 0x4c, 0xe7, 0xff, + 0x81, 0x53, 0xff, 0x91, 0x25, 0x38, 0x7e, 0x40, + 0x94, 0xe5, 0xe0, 0xad, 0xe6, 0xd9, 0x79, 0xb6, + 0x92, 0xc9, 0xfc, 0xde, 0xc3, 0x1a, 0x23, 0xbb, + 0xdd, 0xc8, 0x51, 0x0c, 0x3a, 0x72, 0xfa, 0x73, + 0x6f, 0xb7, 0xee, 0x61, 0x39, 0x03, 0x01, 0x3f, + 0x7f, 0x94, 0x2e, 0x2e, 0xba, 0x3a, 0xbb, 0xb4, + 0xfa, 0x6a, 0x17, 0xfe, 0xea, 0xef, 0x5e, 0x66, + 0x97, 0x3f, 0x32, 0x3d, 0xd7, 0x3e, 0xb1, 0xf1, + 0x6c, 0x14, 0x4c, 0xfd, 0x37, 0xd3, 0x38, 0x80, + 0xfb, 0xde, 0xa6, 0x24, 0x1e, 0xc8, 0xca, 0x7f, + 0x3a, 0x93, 0xd8, 0x8b, 0x18, 0x13, 0xb2, 0xe5, + 0xe4, 0x93, 0x05, 0x53, 0x4f, 0x84, 0x66, 0xa7, + 0x58, 0x5c, 0x7b, 0x86, 0x52, 0x6d, 0x0d, 0xce, + 0xa4, 0x30, 0x7d, 0xb6, 0x18, 0x9f, 0xeb, 0xff, + 0x22, 0xbb, 0x72, 0x29, 0xb9, 0x44, 0x0b, 0x48, + 0x1e, 0x84, 0x71, 0x81, 0xe3, 0x6d, 0x73, 0x26, + 0x92, 0xb4, 0x4d, 0x2a, 0x29, 0xb8, 0x1f, 0x72, + 0xed, 0xd0, 0xe1, 0x64, 0x77, 0xea, 0x8e, 0x88, + 0x0f, 0xef, 0x3f, 0xb1, 0x3b, 0xad, 0xf9, 0xc9, + 0x8b, 0xd0, 0xac, 0xc6, 0xcc, 0xa9, 0x40, 0xcc, + 0x76, 0xf6, 0x3b, 0x53, 0xb5, 0x88, 0xcb, 0xc8, + 0x37, 0xf1, 0xa2, 0xba, 0x23, 0x15, 0x99, 0x09, + 0xcc, 0xe7, 0x7a, 0x3b, 0x37, 0xf7, 0x58, 0xc8, + 0x46, 0x8c, 0x2b, 0x2f, 0x4e, 0x0e, 0xa6, 0x5c, + 0xea, 0x85, 0x55, 0xba, 0x02, 0x0e, 0x0e, 0x48, + 0xbc, 0xe1, 0xb1, 0x01, 0x35, 0x79, 0x13, 0x3d, + 0x1b, 0xc0, 0x53, 0x68, 0x11, 0xe7, 0x95, 0x0f, + 0x9d, 0x3f, 0x4c, 0x47, 0x7b, 0x4d, 0x1c, 0xae, + 0x50, 0x9b, 0xcb, 0xdd, 0x05, 0x8d, 0x9a, 0x97, + 0xfd, 0x8c, 0xef, 0x0c, 0x1d, 0x67, 0x73, 0xa8, + 0x28, 0x36, 0xd5, 0xb6, 0x92, 0x33, 0x40, 0x75, + 0x0b, 0x51, 0xc3, 0x64, 0xba, 0x1d, 0xc2, 0xcc, + 0xee, 0x7d, 0x54, 0x0f, 0x27, 0x69, 0xa7, 0x27, + 0x63, 0x30, 0x29, 0xd9, 0xc8, 0x84, 0xd8, 0xdf, + 0x9f, 0x68, 0x8d, 0x04, 0xca, 0xa6, 0xc5, 0xc7, + 0x7a, 0x5c, 0xc8, 0xd1, 0xcb, 0x4a, 0xec, 0xd0, + 0xd8, 0x20, 0x69, 0xc5, 0x17, 0xcd, 0x78, 0xc8, + 0x75, 0x23, 0x30, 0x69, 0xc9, 0xd4, 0xea, 0x5c, + 0x4f, 0x6b, 0x86, 0x3f, 0x8b, 0xfe, 0xee, 0x44, + 0xc9, 0x7c, 0xb7, 0xdd, 0x3e, 0xe5, 0xec, 0x54, + 0x03, 0x3e, 0xaa, 0x82, 0xc6, 0xdf, 0xb2, 0x38, + 0x0e, 0x5d, 0xb3, 0x88, 0xd9, 0xd3, 0x69, 0x5f, + 0x8f, 0x70, 0x8a, 0x7e, 0x11, 0xd9, 0x1e, 0x7b, + 0x38, 0xf1, 0x42, 0x1a, 0xc0, 0x35, 0xf5, 0xc7, + 0x36, 0x85, 0xf5, 0xf7, 0xb8, 0x7e, 0xc7, 0xef, + 0x18, 0xf1, 0x63, 0xd6, 0x7a, 0xc6, 0xc9, 0x0e, + 0x4d, 0x69, 0x4f, 0x84, 0xef, 0x26, 0x41, 0x0c, + 0xec, 0xc7, 0xe0, 0x7e, 0x3c, 0x67, 0x01, 0x4c, + 0x62, 0x1a, 0x20, 0x6f, 0xee, 0x47, 0x4d, 0xc0, + 0x99, 0x13, 0x8d, 0x91, 0x4a, 0x26, 0xd4, 0x37, + 0x28, 0x90, 0x58, 0x75, 0x66, 0x2b, 0x0a, 0xdf, + 0xda, 0xee, 0x92, 0x25, 0x90, 0x62, 0x39, 0x9e, + 0x44, 0x98, 0xad, 0xc1, 0x88, 0xed, 0xe4, 0xb4, + 0xaf, 0xf5, 0x8c, 0x9b, 0x48, 0x4d, 0x56, 0x60, + 0x97, 0x0f, 0x61, 0x59, 0x9e, 0xa6, 0x27, 0xfe, + 0xc1, 0x91, 0x15, 0x38, 0xb8, 0x0f, 0xae, 0x61, + 0x7d, 0x26, 0x13, 0x5a, 0x73, 0xff, 0x1c, 0xa3, + 0x61, 0x04, 0x58, 0x48, 0x55, 0x44, 0x11, 0xfe, + 0x15, 0xca, 0xc3, 0xbd, 0xca, 0xc5, 0xb4, 0x40, + 0x5d, 0x1b, 0x7f, 0x39, 0xb5, 0x9c, 0x35, 0xec, + 0x61, 0x15, 0x32, 0x32, 0xb8, 0x4e, 0x40, 0x9f, + 0x17, 0x1f, 0x0a, 0x4d, 0xa9, 0x91, 0xef, 0xb7, + 0xb0, 0xeb, 0xc2, 0x83, 0x9a, 0x6c, 0xd2, 0x79, + 0x43, 0x78, 0x5e, 0x2f, 0xe5, 0xdd, 0x1a, 0x3c, + 0x45, 0xab, 0x29, 0x40, 0x3a, 0x37, 0x5b, 0x6f, + 0xd7, 0xfc, 0x48, 0x64, 0x3c, 0x49, 0xfb, 0x21, + 0xbe, 0xc3, 0xff, 0x07, 0xfb, 0x17, 0xe9, 0xc9, + 0x0c, 0x4c, 0x5c, 0x15, 0x9e, 0x8e, 0x22, 0x30, + 0x0a, 0xde, 0x48, 0x7f, 0xdb, 0x0d, 0xd1, 0x2b, + 0x87, 0x38, 0x9e, 0xcc, 0x5a, 0x01, 0x16, 0xee, + 0x75, 0x49, 0x0d, 0x30, 0x01, 0x34, 0x6a, 0xb6, + 0x9a, 0x5a, 0x2a, 0xec, 0xbb, 0x48, 0xac, 0xd3, + 0x77, 0x83, 0xd8, 0x08, 0x86, 0x4f, 0x48, 0x09, + 0x29, 0x41, 0x79, 0xa1, 0x03, 0x12, 0xc4, 0xcd, + 0x90, 0x55, 0x47, 0x66, 0x74, 0x9a, 0xcc, 0x4f, + 0x35, 0x8c, 0xd6, 0x98, 0xef, 0xeb, 0x45, 0xb9, + 0x9a, 0x26, 0x2f, 0x39, 0xa5, 0x70, 0x6d, 0xfc, + 0xb4, 0x51, 0xee, 0xf4, 0x9c, 0xe7, 0x38, 0x59, + 0xad, 0xf4, 0xbc, 0x46, 0xff, 0x46, 0x8e, 0x60, + 0x9c, 0xa3, 0x60, 0x1d, 0xf8, 0x26, 0x72, 0xf5, + 0x72, 0x9d, 0x68, 0x80, 0x04, 0xf6, 0x0b, 0xa1, + 0x0a, 0xd5, 0xa7, 0x82, 0x3a, 0x3e, 0x47, 0xa8, + 0x5a, 0xde, 0x59, 0x4f, 0x7b, 0x07, 0xb3, 0xe9, + 0x24, 0x19, 0x3d, 0x34, 0x05, 0xec, 0xf1, 0xab, + 0x6e, 0x64, 0x8f, 0xd3, 0xe6, 0x41, 0x86, 0x80, + 0x70, 0xe3, 0x8d, 0x60, 0x9c, 0x34, 0x25, 0x01, + 0x07, 0x4d, 0x19, 0x41, 0x4e, 0x3d, 0x5c, 0x7e, + 0xa8, 0xf5, 0xcc, 0xd5, 0x7b, 0xe2, 0x7d, 0x3d, + 0x49, 0x86, 0x7d, 0x07, 0xb7, 0x10, 0xe3, 0x35, + 0xb8, 0x84, 0x6d, 0x76, 0xab, 0x17, 0xc6, 0x38, + 0xb4, 0xd3, 0x28, 0x57, 0xad, 0xd3, 0x88, 0x5a, + 0xda, 0xea, 0xc8, 0x94, 0xcc, 0x37, 0x19, 0xac, + 0x9c, 0x9f, 0x4b, 0x00, 0x15, 0xc0, 0xc8, 0xca, + 0x1f, 0x15, 0xaa, 0xe0, 0xdb, 0xf9, 0x2f, 0x57, + 0x1b, 0x24, 0xc7, 0x6f, 0x76, 0x29, 0xfb, 0xed, + 0x25, 0x0d, 0xc0, 0xfe, 0xbd, 0x5a, 0xbf, 0x20, + 0x08, 0x51, 0x05, 0xec, 0x71, 0xa3, 0xbf, 0xef, + 0x5e, 0x99, 0x75, 0xdb, 0x3c, 0x5f, 0x9a, 0x8c, + 0xbb, 0x19, 0x5c, 0x0e, 0x93, 0x19, 0xf8, 0x6a, + 0xbc, 0xf2, 0x12, 0x54, 0x2f, 0xcb, 0x28, 0x64, + 0x88, 0xb3, 0x92, 0x0d, 0x96, 0xd1, 0xa6, 0xe4, + 0x1f, 0xf1, 0x4d, 0xa4, 0xab, 0x1c, 0xee, 0x54, + 0xf2, 0xad, 0x29, 0x6d, 0x32, 0x37, 0xb2, 0x16, + 0x77, 0x5c, 0xdc, 0x2e, 0x54, 0xec, 0x75, 0x26, + 0xc6, 0x36, 0xd9, 0x17, 0x2c, 0xf1, 0x7a, 0xdc, + 0x4b, 0xf1, 0xe2, 0xd9, 0x95, 0xba, 0xac, 0x87, + 0xc1, 0xf3, 0x8e, 0x58, 0x08, 0xd8, 0x87, 0x60, + 0xc9, 0xee, 0x6a, 0xde, 0xa4, 0xd2, 0xfc, 0x0d, + 0xe5, 0x36, 0xc4, 0x5c, 0x52, 0xb3, 0x07, 0x54, + 0x65, 0x24, 0xc1, 0xb1, 0xd1, 0xb1, 0x53, 0x13, + 0x31, 0x79, 0x7f, 0x05, 0x76, 0xeb, 0x37, 0x59, + 0x15, 0x2b, 0xd1, 0x3f, 0xac, 0x08, 0x97, 0xeb, + 0x91, 0x98, 0xdf, 0x6c, 0x09, 0x0d, 0x04, 0x9f, + 0xdc, 0x3b, 0x0e, 0x60, 0x68, 0x47, 0x23, 0x15, + 0x16, 0xc6, 0x0b, 0x35, 0xf8, 0x77, 0xa2, 0x78, + 0x50, 0xd4, 0x64, 0x22, 0x33, 0xff, 0xfb, 0x93, + 0x71, 0x46, 0x50, 0x39, 0x1b, 0x9c, 0xea, 0x4e, + 0x8d, 0x0c, 0x37, 0xe5, 0x5c, 0x51, 0x3a, 0x31, + 0xb2, 0x85, 0x84, 0x3f, 0x41, 0xee, 0xa2, 0xc1, + 0xc6, 0x13, 0x3b, 0x54, 0x28, 0xd2, 0x18, 0x37, + 0xcc, 0x46, 0x9f, 0x6a, 0x91, 0x3d, 0x5a, 0x15, + 0x3c, 0x89, 0xa3, 0x61, 0x06, 0x7d, 0x2e, 0x78, + 0xbe, 0x7d, 0x40, 0xba, 0x2f, 0x95, 0xb1, 0x2f, + 0x87, 0x3b, 0x8a, 0xbe, 0x6a, 0xf4, 0xc2, 0x31, + 0x74, 0xee, 0x91, 0xe0, 0x23, 0xaa, 0x5d, 0x7f, + 0xdd, 0xf0, 0x44, 0x8c, 0x0b, 0x59, 0x2b, 0xfc, + 0x48, 0x3a, 0xdf, 0x07, 0x05, 0x38, 0x6c, 0xc9, + 0xeb, 0x18, 0x24, 0x68, 0x8d, 0x58, 0x98, 0xd3, + 0x31, 0xa3, 0xe4, 0x70, 0x59, 0xb1, 0x21, 0xbe, + 0x7e, 0x65, 0x7d, 0xb8, 0x04, 0xab, 0xf6, 0xe4, + 0xd7, 0xda, 0xec, 0x09, 0x8f, 0xda, 0x6d, 0x24, + 0x07, 0xcc, 0x29, 0x17, 0x05, 0x78, 0x1a, 0xc1, + 0xb1, 0xce, 0xfc, 0xaa, 0x2d, 0xe7, 0xcc, 0x85, + 0x84, 0x84, 0x03, 0x2a, 0x0c, 0x3f, 0xa9, 0xf8, + 0xfd, 0x84, 0x53, 0x59, 0x5c, 0xf0, 0xd4, 0x09, + 0xf0, 0xd2, 0x6c, 0x32, 0x03, 0xb0, 0xa0, 0x8c, + 0x52, 0xeb, 0x23, 0x91, 0x88, 0x43, 0x13, 0x46, + 0xf6, 0x1e, 0xb4, 0x1b, 0xf5, 0x8e, 0x3a, 0xb5, + 0x3d, 0x00, 0xf6, 0xe5, 0x08, 0x3d, 0x5f, 0x39, + 0xd3, 0x21, 0x69, 0xbc, 0x03, 0x22, 0x3a, 0xd2, + 0x5c, 0x84, 0xf8, 0x15, 0xc4, 0x80, 0x0b, 0xbc, + 0x29, 0x3c, 0xf3, 0x95, 0x98, 0xcd, 0x8f, 0x35, + 0xbc, 0xa5, 0x3e, 0xfc, 0xd4, 0x13, 0x9e, 0xde, + 0x4f, 0xce, 0x71, 0x9d, 0x09, 0xad, 0xf2, 0x80, + 0x6b, 0x65, 0x7f, 0x03, 0x00, 0x14, 0x7c, 0x15, + 0x85, 0x40, 0x6d, 0x70, 0xea, 0xdc, 0xb3, 0x63, + 0x35, 0x4f, 0x4d, 0xe0, 0xd9, 0xd5, 0x3c, 0x58, + 0x56, 0x23, 0x80, 0xe2, 0x36, 0xdd, 0x75, 0x1d, + 0x94, 0x11, 0x41, 0x8e, 0xe0, 0x81, 0x8e, 0xcf, + 0xe0, 0xe5, 0xf6, 0xde, 0xd1, 0xe7, 0x04, 0x12, + 0x79, 0x92, 0x2b, 0x71, 0x2a, 0x79, 0x8b, 0x7c, + 0x44, 0x79, 0x16, 0x30, 0x4e, 0xf4, 0xf6, 0x9b, + 0xb7, 0x40, 0xa3, 0x5a, 0xa7, 0x69, 0x3e, 0xc1, + 0x3a, 0x04, 0xd0, 0x88, 0xa0, 0x3b, 0xdd, 0xc6, + 0x9e, 0x7e, 0x1e, 0x1e, 0x8f, 0x44, 0xf7, 0x73, + 0x67, 0x1e, 0x1a, 0x78, 0xfa, 0x62, 0xf4, 0xa9, + 0xa8, 0xc6, 0x5b, 0xb8, 0xfa, 0x06, 0x7d, 0x5e, + 0x38, 0x1c, 0x9a, 0x39, 0xe9, 0x39, 0x98, 0x22, + 0x0b, 0xa7, 0xac, 0x0b, 0xf3, 0xbc, 0xf1, 0xeb, + 0x8c, 0x81, 0xe3, 0x48, 0x8a, 0xed, 0x42, 0xc2, + 0x38, 0xcf, 0x3e, 0xda, 0xd2, 0x89, 0x8d, 0x9c, + 0x53, 0xb5, 0x2f, 0x41, 0x01, 0x26, 0x84, 0x9c, + 0xa3, 0x56, 0xf6, 0x49, 0xc7, 0xd4, 0x9f, 0x93, + 0x1b, 0x96, 0x49, 0x5e, 0xad, 0xb3, 0x84, 0x1f, + 0x3c, 0xa4, 0xe0, 0x9b, 0xd1, 0x90, 0xbc, 0x38, + 0x6c, 0xdd, 0x95, 0x4d, 0x9d, 0xb1, 0x71, 0x57, + 0x2d, 0x34, 0xe8, 0xb8, 0x42, 0xc7, 0x99, 0x03, + 0xc7, 0x07, 0x30, 0x65, 0x91, 0x55, 0xd5, 0x90, + 0x70, 0x97, 0x37, 0x68, 0xd4, 0x11, 0xf9, 0xe8, + 0xce, 0xec, 0xdc, 0x34, 0xd5, 0xd3, 0xb7, 0xc4, + 0xb8, 0x97, 0x05, 0x92, 0xad, 0xf8, 0xe2, 0x36, + 0x64, 0x41, 0xc9, 0xc5, 0x41, 0x77, 0x52, 0xd7, + 0x2c, 0xa5, 0x24, 0x2f, 0xd9, 0x34, 0x0b, 0x47, + 0x35, 0xa7, 0x28, 0x8b, 0xc5, 0xcd, 0xe9, 0x46, + 0xac, 0x39, 0x94, 0x3c, 0x10, 0xc6, 0x29, 0x73, + 0x0e, 0x0e, 0x5d, 0xe0, 0x71, 0x03, 0x8a, 0x72, + 0x0e, 0x26, 0xb0, 0x7d, 0x84, 0xed, 0x95, 0x23, + 0x49, 0x5a, 0x45, 0x83, 0x45, 0x60, 0x11, 0x4a, + 0x46, 0x31, 0xd4, 0xd8, 0x16, 0x54, 0x98, 0x58, + 0xed, 0x6d, 0xcc, 0x5d, 0xd6, 0x50, 0x61, 0x9f, + 0x9d, 0xc5, 0x3e, 0x9d, 0x32, 0x47, 0xde, 0x96, + 0xe1, 0x5d, 0xd8, 0xf8, 0xb4, 0x69, 0x6f, 0xb9, + 0x15, 0x90, 0x57, 0x7a, 0xf6, 0xad, 0xb0, 0x5b, + 0xf5, 0xa6, 0x36, 0x94, 0xfd, 0x84, 0xce, 0x1c, + 0x0f, 0x4b, 0xd0, 0xc2, 0x5b, 0x6b, 0x56, 0xef, + 0x73, 0x93, 0x0b, 0xc3, 0xee, 0xd9, 0xcf, 0xd3, + 0xa4, 0x22, 0x58, 0xcd, 0x50, 0x6e, 0x65, 0xf4, + 0xe9, 0xb7, 0x71, 0xaf, 0x4b, 0xb3, 0xb6, 0x2f, + 0x0f, 0x0e, 0x3b, 0xc9, 0x85, 0x14, 0xf5, 0x17, + 0xe8, 0x7a, 0x3a, 0xbf, 0x5f, 0x5e, 0xf8, 0x18, + 0x48, 0xa6, 0x72, 0xab, 0x06, 0x95, 0xe9, 0xc8, + 0xa7, 0xf4, 0x32, 0x44, 0x04, 0x0c, 0x84, 0x98, + 0x73, 0xe3, 0x89, 0x8d, 0x5f, 0x7e, 0x4a, 0x42, + 0x8f, 0xc5, 0x28, 0xb1, 0x82, 0xef, 0x1c, 0x97, + 0x31, 0x3b, 0x4d, 0xe0, 0x0e, 0x10, 0x10, 0x97, + 0x93, 0x49, 0x78, 0x2f, 0x0d, 0x86, 0x8b, 0xa1, + 0x53, 0xa9, 0x81, 0x20, 0x79, 0xe7, 0x07, 0x77, + 0xb6, 0xac, 0x5e, 0xd2, 0x05, 0xcd, 0xe9, 0xdb, + 0x8a, 0x94, 0x82, 0x8a, 0x23, 0xb9, 0x3d, 0x1c, + 0xa9, 0x7d, 0x72, 0x4a, 0xed, 0x33, 0xa3, 0xdb, + 0x21, 0xa7, 0x86, 0x33, 0x45, 0xa5, 0xaa, 0x56, + 0x45, 0xb5, 0x83, 0x29, 0x40, 0x47, 0x79, 0x04, + 0x6e, 0xb9, 0x95, 0xd0, 0x81, 0x77, 0x2d, 0x48, + 0x1e, 0xfe, 0xc3, 0xc2, 0x1e, 0xe5, 0xf2, 0xbe, + 0xfd, 0x3b, 0x94, 0x9f, 0xc4, 0xc4, 0x26, 0x9d, + 0xe4, 0x66, 0x1e, 0x19, 0xee, 0x6c, 0x79, 0x97, + 0x11, 0x31, 0x4b, 0x0d, 0x01, 0xcb, 0xde, 0xa8, + 0xf6, 0x6d, 0x7c, 0x39, 0x46, 0x4e, 0x7e, 0x3f, + 0x94, 0x17, 0xdf, 0xa1, 0x7d, 0xd9, 0x1c, 0x8e, + 0xbc, 0x7d, 0x33, 0x7d, 0xe3, 0x12, 0x40, 0xca, + 0xab, 0x37, 0x11, 0x46, 0xd4, 0xae, 0xef, 0x44, + 0xa2, 0xb3, 0x6a, 0x66, 0x0e, 0x0c, 0x90, 0x7f, + 0xdf, 0x5c, 0x66, 0x5f, 0xf2, 0x94, 0x9f, 0xa6, + 0x73, 0x4f, 0xeb, 0x0d, 0xad, 0xbf, 0xc0, 0x63, + 0x5c, 0xdc, 0x46, 0x51, 0xe8, 0x8e, 0x90, 0x19, + 0xa8, 0xa4, 0x3c, 0x91, 0x79, 0xfa, 0x7e, 0x58, + 0x85, 0x13, 0x55, 0xc5, 0x19, 0x82, 0x37, 0x1b, + 0x0a, 0x02, 0x1f, 0x99, 0x6b, 0x18, 0xf1, 0x28, + 0x08, 0xa2, 0x73, 0xb8, 0x0f, 0x2e, 0xcd, 0xbf, + 0xf3, 0x86, 0x7f, 0xea, 0xef, 0xd0, 0xbb, 0xa6, + 0x21, 0xdf, 0x49, 0x73, 0x51, 0xcc, 0x36, 0xd3, + 0x3e, 0xa0, 0xf8, 0x44, 0xdf, 0xd3, 0xa6, 0xbe, + 0x8a, 0xd4, 0x57, 0xdd, 0x72, 0x94, 0x61, 0x0f, + 0x82, 0xd1, 0x07, 0xb8, 0x7c, 0x18, 0x83, 0xdf, + 0x3a, 0xe5, 0x50, 0x6a, 0x82, 0x20, 0xac, 0xa9, + 0xa8, 0xff, 0xd9, 0xf3, 0x77, 0x33, 0x5a, 0x9e, + 0x7f, 0x6d, 0xfe, 0x5d, 0x33, 0x41, 0x42, 0xe7, + 0x6c, 0x19, 0xe0, 0x44, 0x8a, 0x15, 0xf6, 0x70, + 0x98, 0xb7, 0x68, 0x4d, 0xfa, 0x97, 0x39, 0xb0, + 0x8e, 0xe8, 0x84, 0x8b, 0x75, 0x30, 0xb7, 0x7d, + 0x92, 0x69, 0x20, 0x9c, 0x81, 0xfb, 0x4b, 0xf4, + 0x01, 0x50, 0xeb, 0xce, 0x0c, 0x1c, 0x6c, 0xb5, + 0x4a, 0xd7, 0x27, 0x0c, 0xce, 0xbb, 0xe5, 0x85, + 0xf0, 0xb6, 0xee, 0xd5, 0x70, 0xdd, 0x3b, 0xfc, + 0xd4, 0x99, 0xf1, 0x33, 0xdd, 0x8b, 0xc4, 0x2f, + 0xae, 0xab, 0x74, 0x96, 0x32, 0xc7, 0x4c, 0x56, + 0x3c, 0x89, 0x0f, 0x96, 0x0b, 0x42, 0xc0, 0xcb, + 0xee, 0x0f, 0x0b, 0x8c, 0xfb, 0x7e, 0x47, 0x7b, + 0x64, 0x48, 0xfd, 0xb2, 0x00, 0x80, 0x89, 0xa5, + 0x13, 0x55, 0x62, 0xfc, 0x8f, 0xe2, 0x42, 0x03, + 0xb7, 0x4e, 0x2a, 0x79, 0xb4, 0x82, 0xea, 0x23, + 0x49, 0xda, 0xaf, 0x52, 0x63, 0x1e, 0x60, 0x03, + 0x89, 0x06, 0x44, 0x46, 0x08, 0xc3, 0xc4, 0x87, + 0x70, 0x2e, 0xda, 0x94, 0xad, 0x6b, 0xe0, 0xe4, + 0xd1, 0x8a, 0x06, 0xc2, 0xa8, 0xc0, 0xa7, 0x43, + 0x3c, 0x47, 0x52, 0x0e, 0xc3, 0x77, 0x81, 0x11, + 0x67, 0x0e, 0xa0, 0x70, 0x04, 0x47, 0x29, 0x40, + 0x86, 0x0d, 0x34, 0x56, 0xa7, 0xc9, 0x35, 0x59, + 0x68, 0xdc, 0x93, 0x81, 0x70, 0xee, 0x86, 0xd9, + 0x80, 0x06, 0x40, 0x4f, 0x1a, 0x0d, 0x40, 0x30, + 0x0b, 0xcb, 0x96, 0x47, 0xc1, 0xb7, 0x52, 0xfd, + 0x56, 0xe0, 0x72, 0x4b, 0xfb, 0xbd, 0x92, 0x45, + 0x61, 0x71, 0xc2, 0x33, 0x11, 0xbf, 0x52, 0x83, + 0x79, 0x26, 0xe0, 0x49, 0x6b, 0xb7, 0x05, 0x8b, + 0xe8, 0x0e, 0x87, 0x31, 0xd7, 0x9d, 0x8a, 0xf5, + 0xc0, 0x5f, 0x2e, 0x58, 0x4a, 0xdb, 0x11, 0xb3, + 0x6c, 0x30, 0x2a, 0x46, 0x19, 0xe3, 0x27, 0x84, + 0x1f, 0x63, 0x6e, 0xf6, 0x57, 0xc7, 0xc9, 0xd8, + 0x5e, 0xba, 0xb3, 0x87, 0xd5, 0x83, 0x26, 0x34, + 0x21, 0x9e, 0x65, 0xde, 0x42, 0xd3, 0xbe, 0x7b, + 0xbc, 0x91, 0x71, 0x44, 0x4d, 0x99, 0x3b, 0x31, + 0xe5, 0x3f, 0x11, 0x4e, 0x7f, 0x13, 0x51, 0x3b, + 0xae, 0x79, 0xc9, 0xd3, 0x81, 0x8e, 0x25, 0x40, + 0x10, 0xfc, 0x07, 0x1e, 0xf9, 0x7b, 0x9a, 0x4b, + 0x6c, 0xe3, 0xb3, 0xad, 0x1a, 0x0a, 0xdd, 0x9e, + 0x59, 0x0c, 0xa2, 0xcd, 0xae, 0x48, 0x4a, 0x38, + 0x5b, 0x47, 0x41, 0x94, 0x65, 0x6b, 0xbb, 0xeb, + 0x5b, 0xe3, 0xaf, 0x07, 0x5b, 0xd4, 0x4a, 0xa2, + 0xc9, 0x5d, 0x2f, 0x64, 0x03, 0xd7, 0x3a, 0x2c, + 0x6e, 0xce, 0x76, 0x95, 0xb4, 0xb3, 0xc0, 0xf1, + 0xe2, 0x45, 0x73, 0x7a, 0x5c, 0xab, 0xc1, 0xfc, + 0x02, 0x8d, 0x81, 0x29, 0xb3, 0xac, 0x07, 0xec, + 0x40, 0x7d, 0x45, 0xd9, 0x7a, 0x59, 0xee, 0x34, + 0xf0, 0xe9, 0xd5, 0x7b, 0x96, 0xb1, 0x3d, 0x95, + 0xcc, 0x86, 0xb5, 0xb6, 0x04, 0x2d, 0xb5, 0x92, + 0x7e, 0x76, 0xf4, 0x06, 0xa9, 0xa3, 0x12, 0x0f, + 0xb1, 0xaf, 0x26, 0xba, 0x7c, 0xfc, 0x7e, 0x1c, + 0xbc, 0x2c, 0x49, 0x97, 0x53, 0x60, 0x13, 0x0b, + 0xa6, 0x61, 0x83, 0x89, 0x42, 0xd4, 0x17, 0x0c, + 0x6c, 0x26, 0x52, 0xc3, 0xb3, 0xd4, 0x67, 0xf5, + 0xe3, 0x04, 0xb7, 0xf4, 0xcb, 0x80, 0xb8, 0xcb, + 0x77, 0x56, 0x3e, 0xaa, 0x57, 0x54, 0xee, 0xb4, + 0x2c, 0x67, 0xcf, 0xf2, 0xdc, 0xbe, 0x55, 0xf9, + 0x43, 0x1f, 0x6e, 0x22, 0x97, 0x67, 0x7f, 0xc4, + 0xef, 0xb1, 0x26, 0x31, 0x1e, 0x27, 0xdf, 0x41, + 0x80, 0x47, 0x6c, 0xe2, 0xfa, 0xa9, 0x8c, 0x2a, + 0xf6, 0xf2, 0xab, 0xf0, 0x15, 0xda, 0x6c, 0xc8, + 0xfe, 0xb5, 0x23, 0xde, 0xa9, 0x05, 0x3f, 0x06, + 0x54, 0x4c, 0xcd, 0xe1, 0xab, 0xfc, 0x0e, 0x62, + 0x33, 0x31, 0x73, 0x2c, 0x76, 0xcb, 0xb4, 0x47, + 0x1e, 0x20, 0xad, 0xd8, 0xf2, 0x31, 0xdd, 0xc4, + 0x8b, 0x0c, 0x77, 0xbe, 0xe1, 0x8b, 0x26, 0x00, + 0x02, 0x58, 0xd6, 0x8d, 0xef, 0xad, 0x74, 0x67, + 0xab, 0x3f, 0xef, 0xcb, 0x6f, 0xb0, 0xcc, 0x81, + 0x44, 0x4c, 0xaf, 0xe9, 0x49, 0x4f, 0xdb, 0xa0, + 0x25, 0xa4, 0xf0, 0x89, 0xf1, 0xbe, 0xd8, 0x10, + 0xff, 0xb1, 0x3b, 0x4b, 0xfa, 0x98, 0xf5, 0x79, + 0x6d, 0x1e, 0x69, 0x4d, 0x57, 0xb1, 0xc8, 0x19, + 0x1b, 0xbd, 0x1e, 0x8c, 0x84, 0xb7, 0x7b, 0xe8, + 0xd2, 0x2d, 0x09, 0x41, 0x41, 0x37, 0x3d, 0xb1, + 0x6f, 0x26, 0x5d, 0x71, 0x16, 0x3d, 0xb7, 0x83, + 0x27, 0x2c, 0xa7, 0xb6, 0x50, 0xbd, 0x91, 0x86, + 0xab, 0x24, 0xa1, 0x38, 0xfd, 0xea, 0x71, 0x55, + 0x7e, 0x9a, 0x07, 0x77, 0x4b, 0xfa, 0x61, 0x66, + 0x20, 0x1e, 0x28, 0x95, 0x18, 0x1b, 0xa4, 0xa0, + 0xfd, 0xc0, 0x89, 0x72, 0x43, 0xd9, 0x3b, 0x49, + 0x5a, 0x3f, 0x9d, 0xbf, 0xdb, 0xb4, 0x46, 0xea, + 0x42, 0x01, 0x77, 0x23, 0x68, 0x95, 0xb6, 0x24, + 0xb3, 0xa8, 0x6c, 0x28, 0x3b, 0x11, 0x40, 0x7e, + 0x18, 0x65, 0x6d, 0xd8, 0x24, 0x42, 0x7d, 0x88, + 0xc0, 0x52, 0xd9, 0x05, 0xe4, 0x95, 0x90, 0x87, + 0x8c, 0xf4, 0xd0, 0x6b, 0xb9, 0x83, 0x99, 0x34, + 0x6d, 0xfe, 0x54, 0x40, 0x94, 0x52, 0x21, 0x4f, + 0x14, 0x25, 0xc5, 0xd6, 0x5e, 0x95, 0xdc, 0x0a, + 0x2b, 0x89, 0x20, 0x11, 0x84, 0x48, 0xd6, 0x3a, + 0xcd, 0x5c, 0x24, 0xad, 0x62, 0xe3, 0xb1, 0x93, + 0x25, 0x8d, 0xcd, 0x7e, 0xfc, 0x27, 0xa3, 0x37, + 0xfd, 0x84, 0xfc, 0x1b, 0xb2, 0xf1, 0x27, 0x38, + 0x5a, 0xb7, 0xfc, 0xf2, 0xfa, 0x95, 0x66, 0xd4, + 0xfb, 0xba, 0xa7, 0xd7, 0xa3, 0x72, 0x69, 0x48, + 0x48, 0x8c, 0xeb, 0x28, 0x89, 0xfe, 0x33, 0x65, + 0x5a, 0x36, 0x01, 0x7e, 0x06, 0x79, 0x0a, 0x09, + 0x3b, 0x74, 0x11, 0x9a, 0x6e, 0xbf, 0xd4, 0x9e, + 0x58, 0x90, 0x49, 0x4f, 0x4d, 0x08, 0xd4, 0xe5, + 0x4a, 0x09, 0x21, 0xef, 0x8b, 0xb8, 0x74, 0x3b, + 0x91, 0xdd, 0x36, 0x85, 0x60, 0x2d, 0xfa, 0xd4, + 0x45, 0x7b, 0x45, 0x53, 0xf5, 0x47, 0x87, 0x7e, + 0xa6, 0x37, 0xc8, 0x78, 0x7a, 0x68, 0x9d, 0x8d, + 0x65, 0x2c, 0x0e, 0x91, 0x5c, 0xa2, 0x60, 0xf0, + 0x8e, 0x3f, 0xe9, 0x1a, 0xcd, 0xaa, 0xe7, 0xd5, + 0x77, 0x18, 0xaf, 0xc9, 0xbc, 0x18, 0xea, 0x48, + 0x1b, 0xfb, 0x22, 0x48, 0x70, 0x16, 0x29, 0x9e, + 0x5b, 0xc1, 0x2c, 0x66, 0x23, 0xbc, 0xf0, 0x1f, + 0xef, 0xaf, 0xe4, 0xd6, 0x04, 0x19, 0x82, 0x7a, + 0x0b, 0xba, 0x4b, 0x46, 0xb1, 0x6a, 0x85, 0x5d, + 0xb4, 0x73, 0xd6, 0x21, 0xa1, 0x71, 0x60, 0x14, + 0xee, 0x0a, 0x77, 0xc4, 0x66, 0x2e, 0xf9, 0x69, + 0x30, 0xaf, 0x41, 0x0b, 0xc8, 0x83, 0x3c, 0x53, + 0x99, 0x19, 0x27, 0x46, 0xf7, 0x41, 0x6e, 0x56, + 0xdc, 0x94, 0x28, 0x67, 0x4e, 0xb7, 0x25, 0x48, + 0x8a, 0xc2, 0xe0, 0x60, 0x96, 0xcc, 0x18, 0xf4, + 0x84, 0xdd, 0xa7, 0x5e, 0x3e, 0x05, 0x0b, 0x26, + 0x26, 0xb2, 0x5c, 0x1f, 0x57, 0x1a, 0x04, 0x7e, + 0x6a, 0xe3, 0x2f, 0xb4, 0x35, 0xb6, 0x38, 0x40, + 0x40, 0xcd, 0x6f, 0x87, 0x2e, 0xef, 0xa3, 0xd7, + 0xa9, 0xc2, 0xe8, 0x0d, 0x27, 0xdf, 0x44, 0x62, + 0x99, 0xa0, 0xfc, 0xcf, 0x81, 0x78, 0xcb, 0xfe, + 0xe5, 0xa0, 0x03, 0x4e, 0x6c, 0xd7, 0xf4, 0xaf, + 0x7a, 0xbb, 0x61, 0x82, 0xfe, 0x71, 0x89, 0xb2, + 0x22, 0x7c, 0x8e, 0x83, 0x04, 0xce, 0xf6, 0x5d, + 0x84, 0x8f, 0x95, 0x6a, 0x7f, 0xad, 0xfd, 0x32, + 0x9c, 0x5e, 0xe4, 0x9c, 0x89, 0x60, 0x54, 0xaa, + 0x96, 0x72, 0xd2, 0xd7, 0x36, 0x85, 0xa9, 0x45, + 0xd2, 0x2a, 0xa1, 0x81, 0x49, 0x6f, 0x7e, 0x04, + 0xfa, 0xe2, 0xfe, 0x90, 0x26, 0x77, 0x5a, 0x33, + 0xb8, 0x04, 0x9a, 0x7a, 0xe6, 0x4c, 0x4f, 0xad, + 0x72, 0x96, 0x08, 0x28, 0x58, 0x13, 0xf8, 0xc4, + 0x1c, 0xf0, 0xc3, 0x45, 0x95, 0x49, 0x20, 0x8c, + 0x9f, 0x39, 0x70, 0xe1, 0x77, 0xfe, 0xd5, 0x4b, + 0xaf, 0x86, 0xda, 0xef, 0x22, 0x06, 0x83, 0x36, + 0x29, 0x12, 0x11, 0x40, 0xbc, 0x3b, 0x86, 0xaa, + 0xaa, 0x65, 0x60, 0xc3, 0x80, 0xca, 0xed, 0xa9, + 0xf3, 0xb0, 0x79, 0x96, 0xa2, 0x55, 0x27, 0x28, + 0x55, 0x73, 0x26, 0xa5, 0x50, 0xea, 0x92, 0x4b, + 0x3c, 0x5c, 0x82, 0x33, 0xf0, 0x01, 0x3f, 0x03, + 0xc1, 0x08, 0x05, 0xbf, 0x98, 0xf4, 0x9b, 0x6d, + 0xa5, 0xa8, 0xb4, 0x82, 0x0c, 0x06, 0xfa, 0xff, + 0x2d, 0x08, 0xf3, 0x05, 0x4f, 0x57, 0x2a, 0x39, + 0xd4, 0x83, 0x0d, 0x75, 0x51, 0xd8, 0x5b, 0x1b, + 0xd3, 0x51, 0x5a, 0x32, 0x2a, 0x9b, 0x32, 0xb2, + 0xf2, 0xa4, 0x96, 0x12, 0xf2, 0xae, 0x40, 0x34, + 0x67, 0xa8, 0xf5, 0x44, 0xd5, 0x35, 0x53, 0xfe, + 0xa3, 0x60, 0x96, 0x63, 0x0f, 0x1f, 0x6e, 0xb0, + 0x5a, 0x42, 0xa6, 0xfc, 0x51, 0x0b, 0x60, 0x27, + 0xbc, 0x06, 0x71, 0xed, 0x65, 0x5b, 0x23, 0x86, + 0x4a, 0x07, 0x3b, 0x22, 0x07, 0x46, 0xe6, 0x90, + 0x3e, 0xf3, 0x25, 0x50, 0x1b, 0x4c, 0x7f, 0x03, + 0x08, 0xa8, 0x36, 0x6b, 0x87, 0xe5, 0xe3, 0xdb, + 0x9a, 0x38, 0x83, 0xff, 0x9f, 0x1a, 0x9f, 0x57, + 0xa4, 0x2a, 0xf6, 0x37, 0xbc, 0x1a, 0xff, 0xc9, + 0x1e, 0x35, 0x0c, 0xc3, 0x7c, 0xa3, 0xb2, 0xe5, + 0xd2, 0xc6, 0xb4, 0x57, 0x47, 0xe4, 0x32, 0x16, + 0x6d, 0xa9, 0xae, 0x64, 0xe6, 0x2d, 0x8d, 0xc5, + 0x8d, 0x50, 0x8e, 0xe8, 0x1a, 0x22, 0x34, 0x2a, + 0xd9, 0xeb, 0x51, 0x90, 0x4a, 0xb1, 0x41, 0x7d, + 0x64, 0xf9, 0xb9, 0x0d, 0xf6, 0x23, 0x33, 0xb0, + 0x33, 0xf4, 0xf7, 0x3f, 0x27, 0x84, 0xc6, 0x0f, + 0x54, 0xa5, 0xc0, 0x2e, 0xec, 0x0b, 0x3a, 0x48, + 0x6e, 0x80, 0x35, 0x81, 0x43, 0x9b, 0x90, 0xb1, + 0xd0, 0x2b, 0xea, 0x21, 0xdc, 0xda, 0x5b, 0x09, + 0xf4, 0xcc, 0x10, 0xb4, 0xc7, 0xfe, 0x79, 0x51, + 0xc3, 0xc5, 0xac, 0x88, 0x74, 0x84, 0x0b, 0x4b, + 0xca, 0x79, 0x16, 0x29, 0xfb, 0x69, 0x54, 0xdf, + 0x41, 0x7e, 0xe9, 0xc7, 0x8e, 0xea, 0xa5, 0xfe, + 0xfc, 0x76, 0x0e, 0x90, 0xc4, 0x92, 0x38, 0xad, + 0x7b, 0x48, 0xe6, 0x6e, 0xf7, 0x21, 0xfd, 0x4e, + 0x93, 0x0a, 0x7b, 0x41, 0x83, 0x68, 0xfb, 0x57, + 0x51, 0x76, 0x34, 0xa9, 0x6c, 0x00, 0xaa, 0x4f, + 0x66, 0x65, 0x98, 0x4a, 0x4f, 0xa3, 0xa0, 0xef, + 0x69, 0x3f, 0xe3, 0x1c, 0x92, 0x8c, 0xfd, 0xd8, + 0xe8, 0xde, 0x7c, 0x7f, 0x3e, 0x84, 0x8e, 0x69, + 0x3c, 0xf1, 0xf2, 0x05, 0x46, 0xdc, 0x2f, 0x9d, + 0x5e, 0x6e, 0x4c, 0xfb, 0xb5, 0x99, 0x2a, 0x59, + 0x63, 0xc1, 0x34, 0xbc, 0x57, 0xc0, 0x0d, 0xb9, + 0x61, 0x25, 0xf3, 0x33, 0x23, 0x51, 0xb6, 0x0d, + 0x07, 0xa6, 0xab, 0x94, 0x4a, 0xb7, 0x2a, 0xea, + 0xee, 0xac, 0xa3, 0xc3, 0x04, 0x8b, 0x0e, 0x56, + 0xfe, 0x44, 0xa7, 0x39, 0xe2, 0xed, 0xed, 0xb4, + 0x22, 0x2b, 0xac, 0x12, 0x32, 0x28, 0x91, 0xd8, + 0xa5, 0xab, 0xff, 0x5f, 0xe0, 0x4b, 0xda, 0x78, + 0x17, 0xda, 0xf1, 0x01, 0x5b, 0xcd, 0xe2, 0x5f, + 0x50, 0x45, 0x73, 0x2b, 0xe4, 0x76, 0x77, 0xf4, + 0x64, 0x1d, 0x43, 0xfb, 0x84, 0x7a, 0xea, 0x91, + 0xae, 0xf9, 0x9e, 0xb7, 0xb4, 0xb0, 0x91, 0x5f, + 0x16, 0x35, 0x9a, 0x11, 0xb8, 0xc7, 0xc1, 0x8c, + 0xc6, 0x10, 0x8d, 0x2f, 0x63, 0x4a, 0xa7, 0x57, + 0x3a, 0x51, 0xd6, 0x32, 0x2d, 0x64, 0x72, 0xd4, + 0x66, 0xdc, 0x10, 0xa6, 0x67, 0xd6, 0x04, 0x23, + 0x9d, 0x0a, 0x11, 0x77, 0xdd, 0x37, 0x94, 0x17, + 0x3c, 0xbf, 0x8b, 0x65, 0xb0, 0x2e, 0x5e, 0x66, + 0x47, 0x64, 0xac, 0xdd, 0xf0, 0x84, 0xfd, 0x39, + 0xfa, 0x15, 0x5d, 0xef, 0xae, 0xca, 0xc1, 0x36, + 0xa7, 0x5c, 0xbf, 0xc7, 0x08, 0xc2, 0x66, 0x00, + 0x74, 0x74, 0x4e, 0x27, 0x3f, 0x55, 0x8a, 0xb7, + 0x38, 0x66, 0x83, 0x6d, 0xcf, 0x99, 0x9e, 0x60, + 0x8f, 0xdd, 0x2e, 0x62, 0x22, 0x0e, 0xef, 0x0c, + 0x98, 0xa7, 0x85, 0x74, 0x3b, 0x9d, 0xec, 0x9e, + 0xa9, 0x19, 0x72, 0xa5, 0x7f, 0x2c, 0x39, 0xb7, + 0x7d, 0xb7, 0xf1, 0x12, 0x65, 0x27, 0x4b, 0x5a, + 0xde, 0x17, 0xfe, 0xad, 0x44, 0xf3, 0x20, 0x4d, + 0xfd, 0xe4, 0x1f, 0xb5, 0x81, 0xb0, 0x36, 0x37, + 0x08, 0x6f, 0xc3, 0x0c, 0xe9, 0x85, 0x98, 0x82, + 0xa9, 0x62, 0x0c, 0xc4, 0x97, 0xc0, 0x50, 0xc8, + 0xa7, 0x3c, 0x50, 0x9f, 0x43, 0xb9, 0xcd, 0x5e, + 0x4d, 0xfa, 0x1c, 0x4b, 0x0b, 0xa9, 0x98, 0x85, + 0x38, 0x92, 0xac, 0x8d, 0xe4, 0xad, 0x9b, 0x98, + 0xab, 0xd9, 0x38, 0xac, 0x62, 0x52, 0xa3, 0x22, + 0x63, 0x0f, 0xbf, 0x95, 0x48, 0xdf, 0x69, 0xe7, + 0x8b, 0x33, 0xd5, 0xb2, 0xbd, 0x05, 0x49, 0x49, + 0x9d, 0x57, 0x73, 0x19, 0x33, 0xae, 0xfa, 0x33, + 0xf1, 0x19, 0xa8, 0x80, 0xce, 0x04, 0x9f, 0xbc, + 0x1d, 0x65, 0x82, 0x1b, 0xe5, 0x3a, 0x51, 0xc8, + 0x1c, 0x21, 0xe3, 0x5d, 0xf3, 0x7d, 0x9b, 0x2f, + 0x2c, 0x1d, 0x4a, 0x7f, 0x9b, 0x68, 0x35, 0xa3, + 0xb2, 0x50, 0xf7, 0x62, 0x79, 0xcd, 0xf4, 0x98, + 0x4f, 0xe5, 0x63, 0x7c, 0x3e, 0x45, 0x31, 0x8c, + 0x16, 0xa0, 0x12, 0xc8, 0x58, 0xce, 0x39, 0xa6, + 0xbc, 0x54, 0xdb, 0xc5, 0xe0, 0xd5, 0xba, 0xbc, + 0xb9, 0x04, 0xf4, 0x8d, 0xe8, 0x2f, 0x15, 0x9d, +}; + +/* 100 test cases */ +static struct crc_test { + u32 crc; /* random starting crc */ + u32 start; /* random 6 bit offset in buf */ + u32 length; /* random 11 bit length of test */ + u32 crc_le; /* expected crc32_le result */ + u32 crc_be; /* expected crc32_be result */ + u32 crc32c_le; /* expected crc32c_le result */ +} const test[] __initconst = +{ + {0x674bf11d, 0x00000038, 0x00000542, 0x0af6d466, 0xd8b6e4c1, 0xf6e93d6c}, + {0x35c672c6, 0x0000003a, 0x000001aa, 0xc6d3dfba, 0x28aaf3ad, 0x0fe92aca}, + {0x496da28e, 0x00000039, 0x000005af, 0xd933660f, 0x5d57e81f, 0x52e1ebb8}, + {0x09a9b90e, 0x00000027, 0x000001f8, 0xb45fe007, 0xf45fca9a, 0x0798af9a}, + {0xdc97e5a9, 0x00000025, 0x000003b6, 0xf81a3562, 0xe0126ba2, 0x18eb3152}, + {0x47c58900, 0x0000000a, 0x000000b9, 0x8e58eccf, 0xf3afc793, 0xd00d08c7}, + {0x292561e8, 0x0000000c, 0x00000403, 0xa2ba8aaf, 0x0b797aed, 0x8ba966bc}, + {0x415037f6, 0x00000003, 0x00000676, 0xa17d52e8, 0x7f0fdf35, 0x11d694a2}, + {0x3466e707, 0x00000026, 0x00000042, 0x258319be, 0x75c484a2, 0x6ab3208d}, + {0xafd1281b, 0x00000023, 0x000002ee, 0x4428eaf8, 0x06c7ad10, 0xba4603c5}, + {0xd3857b18, 0x00000028, 0x000004a2, 0x5c430821, 0xb062b7cb, 0xe6071c6f}, + {0x1d825a8f, 0x0000002b, 0x0000050b, 0xd2c45f0c, 0xd68634e0, 0x179ec30a}, + {0x5033e3bc, 0x0000000b, 0x00000078, 0xa3ea4113, 0xac6d31fb, 0x0903beb8}, + {0x94f1fb5e, 0x0000000f, 0x000003a2, 0xfbfc50b1, 0x3cfe50ed, 0x6a7cb4fa}, + {0xc9a0fe14, 0x00000009, 0x00000473, 0x5fb61894, 0x87070591, 0xdb535801}, + {0x88a034b1, 0x0000001c, 0x000005ad, 0xc1b16053, 0x46f95c67, 0x92bed597}, + {0xf0f72239, 0x00000020, 0x0000026d, 0xa6fa58f3, 0xf8c2c1dd, 0x192a3f1b}, + {0xcc20a5e3, 0x0000003b, 0x0000067a, 0x7740185a, 0x308b979a, 0xccbaec1a}, + {0xce589c95, 0x0000002b, 0x00000641, 0xd055e987, 0x40aae25b, 0x7eabae4d}, + {0x78edc885, 0x00000035, 0x000005be, 0xa39cb14b, 0x035b0d1f, 0x28c72982}, + {0x9d40a377, 0x0000003b, 0x00000038, 0x1f47ccd2, 0x197fbc9d, 0xc3cd4d18}, + {0x703d0e01, 0x0000003c, 0x000006f1, 0x88735e7c, 0xfed57c5a, 0xbca8f0e7}, + {0x776bf505, 0x0000000f, 0x000005b2, 0x5cc4fc01, 0xf32efb97, 0x713f60b3}, + {0x4a3e7854, 0x00000027, 0x000004b8, 0x8d923c82, 0x0cbfb4a2, 0xebd08fd5}, + {0x209172dd, 0x0000003b, 0x00000356, 0xb89e9c2b, 0xd7868138, 0x64406c59}, + {0x3ba4cc5b, 0x0000002f, 0x00000203, 0xe51601a9, 0x5b2a1032, 0x7421890e}, + {0xfc62f297, 0x00000000, 0x00000079, 0x71a8e1a2, 0x5d88685f, 0xe9347603}, + {0x64280b8b, 0x00000016, 0x000007ab, 0x0fa7a30c, 0xda3a455f, 0x1bef9060}, + {0x97dd724b, 0x00000033, 0x000007ad, 0x5788b2f4, 0xd7326d32, 0x34720072}, + {0x61394b52, 0x00000035, 0x00000571, 0xc66525f1, 0xcabe7fef, 0x48310f59}, + {0x29b4faff, 0x00000024, 0x0000006e, 0xca13751e, 0x993648e0, 0x783a4213}, + {0x29bfb1dc, 0x0000000b, 0x00000244, 0x436c43f7, 0x429f7a59, 0x9e8efd41}, + {0x86ae934b, 0x00000035, 0x00000104, 0x0760ec93, 0x9cf7d0f4, 0xfc3d34a5}, + {0xc4c1024e, 0x0000002e, 0x000006b1, 0x6516a3ec, 0x19321f9c, 0x17a52ae2}, + {0x3287a80a, 0x00000026, 0x00000496, 0x0b257eb1, 0x754ebd51, 0x886d935a}, + {0xa4db423e, 0x00000023, 0x0000045d, 0x9b3a66dc, 0x873e9f11, 0xeaaeaeb2}, + {0x7a1078df, 0x00000015, 0x0000014a, 0x8c2484c5, 0x6a628659, 0x8e900a4b}, + {0x6048bd5b, 0x00000006, 0x0000006a, 0x897e3559, 0xac9961af, 0xd74662b1}, + {0xd8f9ea20, 0x0000003d, 0x00000277, 0x60eb905b, 0xed2aaf99, 0xd26752ba}, + {0xea5ec3b4, 0x0000002a, 0x000004fe, 0x869965dc, 0x6c1f833b, 0x8b1fcd62}, + {0x2dfb005d, 0x00000016, 0x00000345, 0x6a3b117e, 0xf05e8521, 0xf54342fe}, + {0x5a214ade, 0x00000020, 0x000005b6, 0x467f70be, 0xcb22ccd3, 0x5b95b988}, + {0xf0ab9cca, 0x00000032, 0x00000515, 0xed223df3, 0x7f3ef01d, 0x2e1176be}, + {0x91b444f9, 0x0000002e, 0x000007f8, 0x84e9a983, 0x5676756f, 0x66120546}, + {0x1b5d2ddb, 0x0000002e, 0x0000012c, 0xba638c4c, 0x3f42047b, 0xf256a5cc}, + {0xd824d1bb, 0x0000003a, 0x000007b5, 0x6288653b, 0x3a3ebea0, 0x4af1dd69}, + {0x0470180c, 0x00000034, 0x000001f0, 0x9d5b80d6, 0x3de08195, 0x56f0a04a}, + {0xffaa3a3f, 0x00000036, 0x00000299, 0xf3a82ab8, 0x53e0c13d, 0x74f6b6b2}, + {0x6406cfeb, 0x00000023, 0x00000600, 0xa920b8e8, 0xe4e2acf4, 0x085951fd}, + {0xb24aaa38, 0x0000003e, 0x000004a1, 0x657cc328, 0x5077b2c3, 0xc65387eb}, + {0x58b2ab7c, 0x00000039, 0x000002b4, 0x3a17ee7e, 0x9dcb3643, 0x1ca9257b}, + {0x3db85970, 0x00000006, 0x000002b6, 0x95268b59, 0xb9812c10, 0xfd196d76}, + {0x857830c5, 0x00000003, 0x00000590, 0x4ef439d5, 0xf042161d, 0x5ef88339}, + {0xe1fcd978, 0x0000003e, 0x000007d8, 0xae8d8699, 0xce0a1ef5, 0x2c3714d9}, + {0xb982a768, 0x00000016, 0x000006e0, 0x62fad3df, 0x5f8a067b, 0x58576548}, + {0x1d581ce8, 0x0000001e, 0x0000058b, 0xf0f5da53, 0x26e39eee, 0xfd7c57de}, + {0x2456719b, 0x00000025, 0x00000503, 0x4296ac64, 0xd50e4c14, 0xd5fedd59}, + {0xfae6d8f2, 0x00000000, 0x0000055d, 0x057fdf2e, 0x2a31391a, 0x1cc3b17b}, + {0xcba828e3, 0x00000039, 0x000002ce, 0xe3f22351, 0x8f00877b, 0x270eed73}, + {0x13d25952, 0x0000000a, 0x0000072d, 0x76d4b4cc, 0x5eb67ec3, 0x91ecbb11}, + {0x0342be3f, 0x00000015, 0x00000599, 0xec75d9f1, 0x9d4d2826, 0x05ed8d0c}, + {0xeaa344e0, 0x00000014, 0x000004d8, 0x72a4c981, 0x2064ea06, 0x0b09ad5b}, + {0xbbb52021, 0x0000003b, 0x00000272, 0x04af99fc, 0xaf042d35, 0xf8d511fb}, + {0xb66384dc, 0x0000001d, 0x000007fc, 0xd7629116, 0x782bd801, 0x5ad832cc}, + {0x616c01b6, 0x00000022, 0x000002c8, 0x5b1dab30, 0x783ce7d2, 0x1214d196}, + {0xce2bdaad, 0x00000016, 0x0000062a, 0x932535c8, 0x3f02926d, 0x5747218a}, + {0x00fe84d7, 0x00000005, 0x00000205, 0x850e50aa, 0x753d649c, 0xde8f14de}, + {0xbebdcb4c, 0x00000006, 0x0000055d, 0xbeaa37a2, 0x2d8c9eba, 0x3563b7b9}, + {0xd8b1a02a, 0x00000010, 0x00000387, 0x5017d2fc, 0x503541a5, 0x071475d0}, + {0x3b96cad2, 0x00000036, 0x00000347, 0x1d2372ae, 0x926cd90b, 0x54c79d60}, + {0xc94c1ed7, 0x00000005, 0x0000038b, 0x9e9fdb22, 0x144a9178, 0x4c53eee6}, + {0x1aad454e, 0x00000025, 0x000002b2, 0xc3f6315c, 0x5c7a35b3, 0x10137a3c}, + {0xa4fec9a6, 0x00000000, 0x000006d6, 0x90be5080, 0xa4107605, 0xaa9d6c73}, + {0x1bbe71e2, 0x0000001f, 0x000002fd, 0x4e504c3b, 0x284ccaf1, 0xb63d23e7}, + {0x4201c7e4, 0x00000002, 0x000002b7, 0x7822e3f9, 0x0cc912a9, 0x7f53e9cf}, + {0x23fddc96, 0x00000003, 0x00000627, 0x8a385125, 0x07767e78, 0x13c1cd83}, + {0xd82ba25c, 0x00000016, 0x0000063e, 0x98e4148a, 0x283330c9, 0x49ff5867}, + {0x786f2032, 0x0000002d, 0x0000060f, 0xf201600a, 0xf561bfcd, 0x8467f211}, + {0xfebe4e1f, 0x0000002a, 0x000004f2, 0x95e51961, 0xfd80dcab, 0x3f9683b2}, + {0x1a6e0a39, 0x00000008, 0x00000672, 0x8af6c2a5, 0x78dd84cb, 0x76a3f874}, + {0x56000ab8, 0x0000000e, 0x000000e5, 0x36bacb8f, 0x22ee1f77, 0x863b702f}, + {0x4717fe0c, 0x00000000, 0x000006ec, 0x8439f342, 0x5c8e03da, 0xdc6c58ff}, + {0xd5d5d68e, 0x0000003c, 0x000003a3, 0x46fff083, 0x177d1b39, 0x0622cc95}, + {0xc25dd6c6, 0x00000024, 0x000006c0, 0x5ceb8eb4, 0x892b0d16, 0xe85605cd}, + {0xe9b11300, 0x00000023, 0x00000683, 0x07a5d59a, 0x6c6a3208, 0x31da5f06}, + {0x95cd285e, 0x00000001, 0x00000047, 0x7b3a4368, 0x0202c07e, 0xa1f2e784}, + {0xd9245a25, 0x0000001e, 0x000003a6, 0xd33c1841, 0x1936c0d5, 0xb07cc616}, + {0x103279db, 0x00000006, 0x0000039b, 0xca09b8a0, 0x77d62892, 0xbf943b6c}, + {0x1cba3172, 0x00000027, 0x000001c8, 0xcb377194, 0xebe682db, 0x2c01af1c}, + {0x8f613739, 0x0000000c, 0x000001df, 0xb4b0bc87, 0x7710bd43, 0x0fe5f56d}, + {0x1c6aa90d, 0x0000001b, 0x0000053c, 0x70559245, 0xda7894ac, 0xf8943b2d}, + {0xaabe5b93, 0x0000003d, 0x00000715, 0xcdbf42fa, 0x0c3b99e7, 0xe4d89272}, + {0xf15dd038, 0x00000006, 0x000006db, 0x6e104aea, 0x8d5967f2, 0x7c2f6bbb}, + {0x584dd49c, 0x00000020, 0x000007bc, 0x36b6cfd6, 0xad4e23b2, 0xabbf388b}, + {0x5d8c9506, 0x00000020, 0x00000470, 0x4c62378e, 0x31d92640, 0x1dca1f4e}, + {0xb80d17b0, 0x00000032, 0x00000346, 0x22a5bb88, 0x9a7ec89f, 0x5c170e23}, + {0xdaf0592e, 0x00000023, 0x000007b0, 0x3cab3f99, 0x9b1fdd99, 0xc0e9d672}, + {0x4793cc85, 0x0000000d, 0x00000706, 0xe82e04f6, 0xed3db6b7, 0xc18bdc86}, + {0x82ebf64e, 0x00000009, 0x000007c3, 0x69d590a9, 0x9efa8499, 0xa874fcdd}, + {0xb18a0319, 0x00000026, 0x000007db, 0x1cf98dcc, 0x8fa9ad6a, 0x9dc0bb48}, +}; + +#include <linux/time.h> + +static int __init crc32c_test(void) +{ + int i; + int errors = 0; + int bytes = 0; + u64 nsec; + unsigned long flags; + + /* keep static to prevent cache warming code from + * getting eliminated by the compiler */ + static u32 crc; + + /* pre-warm the cache */ + for (i = 0; i < 100; i++) { + bytes += 2*test[i].length; + + crc ^= __crc32c_le(test[i].crc, test_buf + + test[i].start, test[i].length); + } + + /* reduce OS noise */ + local_irq_save(flags); + local_irq_disable(); + + nsec = ktime_get_ns(); + for (i = 0; i < 100; i++) { + if (test[i].crc32c_le != __crc32c_le(test[i].crc, test_buf + + test[i].start, test[i].length)) + errors++; + } + nsec = ktime_get_ns() - nsec; + + local_irq_restore(flags); + local_irq_enable(); + + pr_info("crc32c: CRC_LE_BITS = %d\n", CRC_LE_BITS); + + if (errors) + pr_warn("crc32c: %d self tests failed\n", errors); + else { + pr_info("crc32c: self tests passed, processed %d bytes in %lld nsec\n", + bytes, nsec); + } + + return 0; +} + +static int __init crc32c_combine_test(void) +{ + int i, j; + int errors = 0, runs = 0; + + for (i = 0; i < 10; i++) { + u32 crc_full; + + crc_full = __crc32c_le(test[i].crc, test_buf + test[i].start, + test[i].length); + for (j = 0; j <= test[i].length; ++j) { + u32 crc1, crc2; + u32 len1 = j, len2 = test[i].length - j; + + crc1 = __crc32c_le(test[i].crc, test_buf + + test[i].start, len1); + crc2 = __crc32c_le(0, test_buf + test[i].start + + len1, len2); + + if (!(crc_full == __crc32c_le_combine(crc1, crc2, len2) && + crc_full == test[i].crc32c_le)) + errors++; + runs++; + cond_resched(); + } + } + + if (errors) + pr_warn("crc32c_combine: %d/%d self tests failed\n", errors, runs); + else + pr_info("crc32c_combine: %d self tests passed\n", runs); + + return 0; +} + +static int __init crc32_test(void) +{ + int i; + int errors = 0; + int bytes = 0; + u64 nsec; + unsigned long flags; + + /* keep static to prevent cache warming code from + * getting eliminated by the compiler */ + static u32 crc; + + /* pre-warm the cache */ + for (i = 0; i < 100; i++) { + bytes += 2*test[i].length; + + crc ^= crc32_le(test[i].crc, test_buf + + test[i].start, test[i].length); + + crc ^= crc32_be(test[i].crc, test_buf + + test[i].start, test[i].length); + } + + /* reduce OS noise */ + local_irq_save(flags); + local_irq_disable(); + + nsec = ktime_get_ns(); + for (i = 0; i < 100; i++) { + if (test[i].crc_le != crc32_le(test[i].crc, test_buf + + test[i].start, test[i].length)) + errors++; + + if (test[i].crc_be != crc32_be(test[i].crc, test_buf + + test[i].start, test[i].length)) + errors++; + } + nsec = ktime_get_ns() - nsec; + + local_irq_restore(flags); + local_irq_enable(); + + pr_info("crc32: CRC_LE_BITS = %d, CRC_BE BITS = %d\n", + CRC_LE_BITS, CRC_BE_BITS); + + if (errors) + pr_warn("crc32: %d self tests failed\n", errors); + else { + pr_info("crc32: self tests passed, processed %d bytes in %lld nsec\n", + bytes, nsec); + } + + return 0; +} + +static int __init crc32_combine_test(void) +{ + int i, j; + int errors = 0, runs = 0; + + for (i = 0; i < 10; i++) { + u32 crc_full; + + crc_full = crc32_le(test[i].crc, test_buf + test[i].start, + test[i].length); + for (j = 0; j <= test[i].length; ++j) { + u32 crc1, crc2; + u32 len1 = j, len2 = test[i].length - j; + + crc1 = crc32_le(test[i].crc, test_buf + + test[i].start, len1); + crc2 = crc32_le(0, test_buf + test[i].start + + len1, len2); + + if (!(crc_full == crc32_le_combine(crc1, crc2, len2) && + crc_full == test[i].crc_le)) + errors++; + runs++; + cond_resched(); + } + } + + if (errors) + pr_warn("crc32_combine: %d/%d self tests failed\n", errors, runs); + else + pr_info("crc32_combine: %d self tests passed\n", runs); + + return 0; +} + +static int __init crc32test_init(void) +{ + crc32_test(); + crc32c_test(); + + crc32_combine_test(); + crc32c_combine_test(); + + return 0; +} + +static void __exit crc32_exit(void) +{ +} + +module_init(crc32test_init); +module_exit(crc32_exit); + +MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>"); +MODULE_DESCRIPTION("CRC32 selftest"); +MODULE_LICENSE("GPL"); diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 04c1ef717fe0..5476bbe318e4 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -52,9 +52,18 @@ static int debug_objects_fixups __read_mostly; static int debug_objects_warnings __read_mostly; static int debug_objects_enabled __read_mostly = CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT; - +static int debug_objects_pool_size __read_mostly + = ODEBUG_POOL_SIZE; +static int debug_objects_pool_min_level __read_mostly + = ODEBUG_POOL_MIN_LEVEL; static struct debug_obj_descr *descr_test __read_mostly; +/* + * Track numbers of kmem_cache_alloc and kmem_cache_free done. + */ +static int debug_objects_alloc; +static int debug_objects_freed; + static void free_obj_work(struct work_struct *work); static DECLARE_WORK(debug_obj_work, free_obj_work); @@ -88,13 +97,13 @@ static void fill_pool(void) struct debug_obj *new; unsigned long flags; - if (likely(obj_pool_free >= ODEBUG_POOL_MIN_LEVEL)) + if (likely(obj_pool_free >= debug_objects_pool_min_level)) return; if (unlikely(!obj_cache)) return; - while (obj_pool_free < ODEBUG_POOL_MIN_LEVEL) { + while (obj_pool_free < debug_objects_pool_min_level) { new = kmem_cache_zalloc(obj_cache, gfp); if (!new) @@ -102,6 +111,7 @@ static void fill_pool(void) raw_spin_lock_irqsave(&pool_lock, flags); hlist_add_head(&new->node, &obj_pool); + debug_objects_alloc++; obj_pool_free++; raw_spin_unlock_irqrestore(&pool_lock, flags); } @@ -162,24 +172,38 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr) /* * workqueue function to free objects. + * + * To reduce contention on the global pool_lock, the actual freeing of + * debug objects will be delayed if the pool_lock is busy. We also free + * the objects in a batch of 4 for each lock/unlock cycle. */ +#define ODEBUG_FREE_BATCH 4 static void free_obj_work(struct work_struct *work) { - struct debug_obj *obj; + struct debug_obj *objs[ODEBUG_FREE_BATCH]; unsigned long flags; + int i; - raw_spin_lock_irqsave(&pool_lock, flags); - while (obj_pool_free > ODEBUG_POOL_SIZE) { - obj = hlist_entry(obj_pool.first, typeof(*obj), node); - hlist_del(&obj->node); - obj_pool_free--; + if (!raw_spin_trylock_irqsave(&pool_lock, flags)) + return; + while (obj_pool_free >= debug_objects_pool_size + ODEBUG_FREE_BATCH) { + for (i = 0; i < ODEBUG_FREE_BATCH; i++) { + objs[i] = hlist_entry(obj_pool.first, + typeof(*objs[0]), node); + hlist_del(&objs[i]->node); + } + + obj_pool_free -= ODEBUG_FREE_BATCH; + debug_objects_freed += ODEBUG_FREE_BATCH; /* * We release pool_lock across kmem_cache_free() to * avoid contention on pool_lock. */ raw_spin_unlock_irqrestore(&pool_lock, flags); - kmem_cache_free(obj_cache, obj); - raw_spin_lock_irqsave(&pool_lock, flags); + for (i = 0; i < ODEBUG_FREE_BATCH; i++) + kmem_cache_free(obj_cache, objs[i]); + if (!raw_spin_trylock_irqsave(&pool_lock, flags)) + return; } raw_spin_unlock_irqrestore(&pool_lock, flags); } @@ -198,7 +222,7 @@ static void free_object(struct debug_obj *obj) * schedule work when the pool is filled and the cache is * initialized: */ - if (obj_pool_free > ODEBUG_POOL_SIZE && obj_cache) + if (obj_pool_free > debug_objects_pool_size && obj_cache) sched = 1; hlist_add_head(&obj->node, &obj_pool); obj_pool_free++; @@ -758,6 +782,8 @@ static int debug_stats_show(struct seq_file *m, void *v) seq_printf(m, "pool_min_free :%d\n", obj_pool_min_free); seq_printf(m, "pool_used :%d\n", obj_pool_used); seq_printf(m, "pool_max_used :%d\n", obj_pool_max_used); + seq_printf(m, "objects_alloc :%d\n", debug_objects_alloc); + seq_printf(m, "objects_freed :%d\n", debug_objects_freed); return 0; } @@ -1116,4 +1142,11 @@ void __init debug_objects_mem_init(void) pr_warn("out of memory.\n"); } else debug_objects_selftest(); + + /* + * Increase the thresholds for allocating and freeing objects + * according to the number of possible CPUs available in the system. + */ + debug_objects_pool_size += num_possible_cpus() * 32; + debug_objects_pool_min_level += num_possible_cpus() * 4; } diff --git a/lib/decompress_unlz4.c b/lib/decompress_unlz4.c index 036fc882cd72..1b0baf3008ea 100644 --- a/lib/decompress_unlz4.c +++ b/lib/decompress_unlz4.c @@ -72,7 +72,7 @@ STATIC inline int INIT unlz4(u8 *input, long in_len, error("NULL input pointer and missing fill function"); goto exit_1; } else { - inp = large_malloc(lz4_compressbound(uncomp_chunksize)); + inp = large_malloc(LZ4_compressBound(uncomp_chunksize)); if (!inp) { error("Could not allocate input buffer"); goto exit_1; @@ -136,7 +136,7 @@ STATIC inline int INIT unlz4(u8 *input, long in_len, inp += 4; size -= 4; } else { - if (chunksize > lz4_compressbound(uncomp_chunksize)) { + if (chunksize > LZ4_compressBound(uncomp_chunksize)) { error("chunk length is longer than allocated"); goto exit_2; } @@ -152,11 +152,14 @@ STATIC inline int INIT unlz4(u8 *input, long in_len, out_len -= dest_len; } else dest_len = out_len; - ret = lz4_decompress(inp, &chunksize, outp, dest_len); + + ret = LZ4_decompress_fast(inp, outp, dest_len); + chunksize = ret; #else dest_len = uncomp_chunksize; - ret = lz4_decompress_unknownoutputsize(inp, chunksize, outp, - &dest_len); + + ret = LZ4_decompress_safe(inp, outp, chunksize, dest_len); + dest_len = ret; #endif if (ret < 0) { error("Decoding failed"); diff --git a/lib/dma-debug.c b/lib/dma-debug.c index 8971370bfb16..60c57ec936db 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -1155,6 +1155,11 @@ static void check_unmap(struct dma_debug_entry *ref) dir2name[ref->direction]); } + /* + * Drivers should use dma_mapping_error() to check the returned + * addresses of dma_map_single() and dma_map_page(). + * If not, print this warning message. See Documentation/DMA-API.txt. + */ if (entry->map_err_type == MAP_ERR_NOT_CHECKED) { err_printk(ref->dev, entry, "DMA-API: device driver failed to check map error" diff --git a/lib/find_bit.c b/lib/find_bit.c index 18072ea9c20e..6ed74f78380c 100644 --- a/lib/find_bit.c +++ b/lib/find_bit.c @@ -33,7 +33,7 @@ static unsigned long _find_next_bit(const unsigned long *addr, { unsigned long tmp; - if (!nbits || start >= nbits) + if (unlikely(start >= nbits)) return nbits; tmp = addr[start / BITS_PER_LONG] ^ invert; @@ -151,7 +151,7 @@ static unsigned long _find_next_bit_le(const unsigned long *addr, { unsigned long tmp; - if (!nbits || start >= nbits) + if (unlikely(start >= nbits)) return nbits; tmp = addr[start / BITS_PER_LONG] ^ invert; diff --git a/lib/fonts/Kconfig b/lib/fonts/Kconfig index e77dfe00de36..8fa0791e8a1e 100644 --- a/lib/fonts/Kconfig +++ b/lib/fonts/Kconfig @@ -87,6 +87,14 @@ config FONT_6x10 embedded devices with a 320x240 screen, to get a reasonable number of characters (53x24) that are still at a readable size. +config FONT_10x18 + bool "console 10x18 font (not supported by all drivers)" if FONTS + depends on FRAMEBUFFER_CONSOLE + help + This is a high resolution console font for machines with very + big letters. It fits between the sun 12x22 and the normal 8x16 font. + If other fonts are too big or too small for you, say Y, otherwise say N. + config FONT_SUN8x16 bool "Sparc console 8x16 font" depends on FRAMEBUFFER_CONSOLE && (!SPARC && FONTS || SPARC) @@ -101,14 +109,6 @@ config FONT_SUN12x22 big letters (like the letters used in the SPARC PROM). If the standard font is unreadable for you, say Y, otherwise say N. -config FONT_10x18 - bool "console 10x18 font (not supported by all drivers)" if FONTS - depends on FRAMEBUFFER_CONSOLE - help - This is a high resolution console font for machines with very - big letters. It fits between the sun 12x22 and the normal 8x16 font. - If other fonts are too big or too small for you, say Y, otherwise say N. - config FONT_AUTOSELECT def_bool y depends on !FONT_8x8 diff --git a/lib/glob.c b/lib/glob.c index 500fc80d23e1..0ba3ea86b546 100644 --- a/lib/glob.c +++ b/lib/glob.c @@ -121,167 +121,3 @@ backtrack: } } EXPORT_SYMBOL(glob_match); - - -#ifdef CONFIG_GLOB_SELFTEST - -#include <linux/printk.h> -#include <linux/moduleparam.h> - -/* Boot with "glob.verbose=1" to show successful tests, too */ -static bool verbose = false; -module_param(verbose, bool, 0); - -struct glob_test { - char const *pat, *str; - bool expected; -}; - -static bool __pure __init test(char const *pat, char const *str, bool expected) -{ - bool match = glob_match(pat, str); - bool success = match == expected; - - /* Can't get string literals into a particular section, so... */ - static char const msg_error[] __initconst = - KERN_ERR "glob: \"%s\" vs. \"%s\": %s *** ERROR ***\n"; - static char const msg_ok[] __initconst = - KERN_DEBUG "glob: \"%s\" vs. \"%s\": %s OK\n"; - static char const mismatch[] __initconst = "mismatch"; - char const *message; - - if (!success) - message = msg_error; - else if (verbose) - message = msg_ok; - else - return success; - - printk(message, pat, str, mismatch + 3*match); - return success; -} - -/* - * The tests are all jammed together in one array to make it simpler - * to place that array in the .init.rodata section. The obvious - * "array of structures containing char *" has no way to force the - * pointed-to strings to be in a particular section. - * - * Anyway, a test consists of: - * 1. Expected glob_match result: '1' or '0'. - * 2. Pattern to match: null-terminated string - * 3. String to match against: null-terminated string - * - * The list of tests is terminated with a final '\0' instead of - * a glob_match result character. - */ -static char const glob_tests[] __initconst = - /* Some basic tests */ - "1" "a\0" "a\0" - "0" "a\0" "b\0" - "0" "a\0" "aa\0" - "0" "a\0" "\0" - "1" "\0" "\0" - "0" "\0" "a\0" - /* Simple character class tests */ - "1" "[a]\0" "a\0" - "0" "[a]\0" "b\0" - "0" "[!a]\0" "a\0" - "1" "[!a]\0" "b\0" - "1" "[ab]\0" "a\0" - "1" "[ab]\0" "b\0" - "0" "[ab]\0" "c\0" - "1" "[!ab]\0" "c\0" - "1" "[a-c]\0" "b\0" - "0" "[a-c]\0" "d\0" - /* Corner cases in character class parsing */ - "1" "[a-c-e-g]\0" "-\0" - "0" "[a-c-e-g]\0" "d\0" - "1" "[a-c-e-g]\0" "f\0" - "1" "[]a-ceg-ik[]\0" "a\0" - "1" "[]a-ceg-ik[]\0" "]\0" - "1" "[]a-ceg-ik[]\0" "[\0" - "1" "[]a-ceg-ik[]\0" "h\0" - "0" "[]a-ceg-ik[]\0" "f\0" - "0" "[!]a-ceg-ik[]\0" "h\0" - "0" "[!]a-ceg-ik[]\0" "]\0" - "1" "[!]a-ceg-ik[]\0" "f\0" - /* Simple wild cards */ - "1" "?\0" "a\0" - "0" "?\0" "aa\0" - "0" "??\0" "a\0" - "1" "?x?\0" "axb\0" - "0" "?x?\0" "abx\0" - "0" "?x?\0" "xab\0" - /* Asterisk wild cards (backtracking) */ - "0" "*??\0" "a\0" - "1" "*??\0" "ab\0" - "1" "*??\0" "abc\0" - "1" "*??\0" "abcd\0" - "0" "??*\0" "a\0" - "1" "??*\0" "ab\0" - "1" "??*\0" "abc\0" - "1" "??*\0" "abcd\0" - "0" "?*?\0" "a\0" - "1" "?*?\0" "ab\0" - "1" "?*?\0" "abc\0" - "1" "?*?\0" "abcd\0" - "1" "*b\0" "b\0" - "1" "*b\0" "ab\0" - "0" "*b\0" "ba\0" - "1" "*b\0" "bb\0" - "1" "*b\0" "abb\0" - "1" "*b\0" "bab\0" - "1" "*bc\0" "abbc\0" - "1" "*bc\0" "bc\0" - "1" "*bc\0" "bbc\0" - "1" "*bc\0" "bcbc\0" - /* Multiple asterisks (complex backtracking) */ - "1" "*ac*\0" "abacadaeafag\0" - "1" "*ac*ae*ag*\0" "abacadaeafag\0" - "1" "*a*b*[bc]*[ef]*g*\0" "abacadaeafag\0" - "0" "*a*b*[ef]*[cd]*g*\0" "abacadaeafag\0" - "1" "*abcd*\0" "abcabcabcabcdefg\0" - "1" "*ab*cd*\0" "abcabcabcabcdefg\0" - "1" "*abcd*abcdef*\0" "abcabcdabcdeabcdefg\0" - "0" "*abcd*\0" "abcabcabcabcefg\0" - "0" "*ab*cd*\0" "abcabcabcabcefg\0"; - -static int __init glob_init(void) -{ - unsigned successes = 0; - unsigned n = 0; - char const *p = glob_tests; - static char const message[] __initconst = - KERN_INFO "glob: %u self-tests passed, %u failed\n"; - - /* - * Tests are jammed together in a string. The first byte is '1' - * or '0' to indicate the expected outcome, or '\0' to indicate the - * end of the tests. Then come two null-terminated strings: the - * pattern and the string to match it against. - */ - while (*p) { - bool expected = *p++ & 1; - char const *pat = p; - - p += strlen(p) + 1; - successes += test(pat, p, expected); - p += strlen(p) + 1; - n++; - } - - n -= successes; - printk(message, successes, n); - - /* What's the errno for "kernel bug detected"? Guess... */ - return n ? -ECANCELED : 0; -} - -/* We need a dummy exit function to allow unload */ -static void __exit glob_fini(void) { } - -module_init(glob_init); -module_exit(glob_fini); - -#endif /* CONFIG_GLOB_SELFTEST */ diff --git a/lib/globtest.c b/lib/globtest.c new file mode 100644 index 000000000000..d8e97d43b905 --- /dev/null +++ b/lib/globtest.c @@ -0,0 +1,167 @@ +/* + * Extracted fronm glob.c + */ + +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/glob.h> +#include <linux/printk.h> + +/* Boot with "glob.verbose=1" to show successful tests, too */ +static bool verbose = false; +module_param(verbose, bool, 0); + +struct glob_test { + char const *pat, *str; + bool expected; +}; + +static bool __pure __init test(char const *pat, char const *str, bool expected) +{ + bool match = glob_match(pat, str); + bool success = match == expected; + + /* Can't get string literals into a particular section, so... */ + static char const msg_error[] __initconst = + KERN_ERR "glob: \"%s\" vs. \"%s\": %s *** ERROR ***\n"; + static char const msg_ok[] __initconst = + KERN_DEBUG "glob: \"%s\" vs. \"%s\": %s OK\n"; + static char const mismatch[] __initconst = "mismatch"; + char const *message; + + if (!success) + message = msg_error; + else if (verbose) + message = msg_ok; + else + return success; + + printk(message, pat, str, mismatch + 3*match); + return success; +} + +/* + * The tests are all jammed together in one array to make it simpler + * to place that array in the .init.rodata section. The obvious + * "array of structures containing char *" has no way to force the + * pointed-to strings to be in a particular section. + * + * Anyway, a test consists of: + * 1. Expected glob_match result: '1' or '0'. + * 2. Pattern to match: null-terminated string + * 3. String to match against: null-terminated string + * + * The list of tests is terminated with a final '\0' instead of + * a glob_match result character. + */ +static char const glob_tests[] __initconst = + /* Some basic tests */ + "1" "a\0" "a\0" + "0" "a\0" "b\0" + "0" "a\0" "aa\0" + "0" "a\0" "\0" + "1" "\0" "\0" + "0" "\0" "a\0" + /* Simple character class tests */ + "1" "[a]\0" "a\0" + "0" "[a]\0" "b\0" + "0" "[!a]\0" "a\0" + "1" "[!a]\0" "b\0" + "1" "[ab]\0" "a\0" + "1" "[ab]\0" "b\0" + "0" "[ab]\0" "c\0" + "1" "[!ab]\0" "c\0" + "1" "[a-c]\0" "b\0" + "0" "[a-c]\0" "d\0" + /* Corner cases in character class parsing */ + "1" "[a-c-e-g]\0" "-\0" + "0" "[a-c-e-g]\0" "d\0" + "1" "[a-c-e-g]\0" "f\0" + "1" "[]a-ceg-ik[]\0" "a\0" + "1" "[]a-ceg-ik[]\0" "]\0" + "1" "[]a-ceg-ik[]\0" "[\0" + "1" "[]a-ceg-ik[]\0" "h\0" + "0" "[]a-ceg-ik[]\0" "f\0" + "0" "[!]a-ceg-ik[]\0" "h\0" + "0" "[!]a-ceg-ik[]\0" "]\0" + "1" "[!]a-ceg-ik[]\0" "f\0" + /* Simple wild cards */ + "1" "?\0" "a\0" + "0" "?\0" "aa\0" + "0" "??\0" "a\0" + "1" "?x?\0" "axb\0" + "0" "?x?\0" "abx\0" + "0" "?x?\0" "xab\0" + /* Asterisk wild cards (backtracking) */ + "0" "*??\0" "a\0" + "1" "*??\0" "ab\0" + "1" "*??\0" "abc\0" + "1" "*??\0" "abcd\0" + "0" "??*\0" "a\0" + "1" "??*\0" "ab\0" + "1" "??*\0" "abc\0" + "1" "??*\0" "abcd\0" + "0" "?*?\0" "a\0" + "1" "?*?\0" "ab\0" + "1" "?*?\0" "abc\0" + "1" "?*?\0" "abcd\0" + "1" "*b\0" "b\0" + "1" "*b\0" "ab\0" + "0" "*b\0" "ba\0" + "1" "*b\0" "bb\0" + "1" "*b\0" "abb\0" + "1" "*b\0" "bab\0" + "1" "*bc\0" "abbc\0" + "1" "*bc\0" "bc\0" + "1" "*bc\0" "bbc\0" + "1" "*bc\0" "bcbc\0" + /* Multiple asterisks (complex backtracking) */ + "1" "*ac*\0" "abacadaeafag\0" + "1" "*ac*ae*ag*\0" "abacadaeafag\0" + "1" "*a*b*[bc]*[ef]*g*\0" "abacadaeafag\0" + "0" "*a*b*[ef]*[cd]*g*\0" "abacadaeafag\0" + "1" "*abcd*\0" "abcabcabcabcdefg\0" + "1" "*ab*cd*\0" "abcabcabcabcdefg\0" + "1" "*abcd*abcdef*\0" "abcabcdabcdeabcdefg\0" + "0" "*abcd*\0" "abcabcabcabcefg\0" + "0" "*ab*cd*\0" "abcabcabcabcefg\0"; + +static int __init glob_init(void) +{ + unsigned successes = 0; + unsigned n = 0; + char const *p = glob_tests; + static char const message[] __initconst = + KERN_INFO "glob: %u self-tests passed, %u failed\n"; + + /* + * Tests are jammed together in a string. The first byte is '1' + * or '0' to indicate the expected outcome, or '\0' to indicate the + * end of the tests. Then come two null-terminated strings: the + * pattern and the string to match it against. + */ + while (*p) { + bool expected = *p++ & 1; + char const *pat = p; + + p += strlen(p) + 1; + successes += test(pat, p, expected); + p += strlen(p) + 1; + n++; + } + + n -= successes; + printk(message, successes, n); + + /* What's the errno for "kernel bug detected"? Guess... */ + return n ? -ECANCELED : 0; +} + +/* We need a dummy exit function to allow unload */ +static void __exit glob_fini(void) { } + +module_init(glob_init); +module_exit(glob_fini); + +MODULE_DESCRIPTION("glob(7) matching tests"); +MODULE_LICENSE("Dual MIT/GPL"); diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c index 28321d8f75ef..e59589626053 100644 --- a/lib/lz4/lz4_compress.c +++ b/lib/lz4/lz4_compress.c @@ -1,19 +1,16 @@ /* * LZ4 - Fast LZ compression algorithm - * Copyright (C) 2011-2012, Yann Collet. - * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - + * Copyright (C) 2011 - 2016, Yann Collet. + * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php) * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: - * - * * Redistributions of source code must retain the above copyright + * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above + * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. - * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR @@ -25,417 +22,875 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * * You can contact the author at : - * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html - * - LZ4 source repository : http://code.google.com/p/lz4/ + * - LZ4 homepage : http://www.lz4.org + * - LZ4 source repository : https://github.com/lz4/lz4 * - * Changed for kernel use by: - * Chanho Min <chanho.min@lge.com> + * Changed for kernel usage by: + * Sven Schmidt <4sschmid@informatik.uni-hamburg.de> */ +/*-************************************ + * Dependencies + **************************************/ +#include <linux/lz4.h> +#include "lz4defs.h" #include <linux/module.h> #include <linux/kernel.h> -#include <linux/lz4.h> #include <asm/unaligned.h> -#include "lz4defs.h" -/* - * LZ4_compressCtx : - * ----------------- - * Compress 'isize' bytes from 'source' into an output buffer 'dest' of - * maximum size 'maxOutputSize'. * If it cannot achieve it, compression - * will stop, and result of the function will be zero. - * return : the number of bytes written in buffer 'dest', or 0 if the - * compression fails - */ -static inline int lz4_compressctx(void *ctx, - const char *source, - char *dest, - int isize, - int maxoutputsize) +/*-****************************** + * Compression functions + ********************************/ +static U32 LZ4_hash4(U32 sequence, tableType_t const tableType) { - HTYPE *hashtable = (HTYPE *)ctx; - const u8 *ip = (u8 *)source; + if (tableType == byU16) + return ((sequence * 2654435761U) + >> ((MINMATCH*8) - (LZ4_HASHLOG + 1))); + else + return ((sequence * 2654435761U) + >> ((MINMATCH*8) - LZ4_HASHLOG)); +} + #if LZ4_ARCH64 - const BYTE * const base = ip; +static U32 LZ4_hash5(U64 sequence, tableType_t const tableType) +{ + const U32 hashLog = (tableType == byU16) + ? LZ4_HASHLOG + 1 + : LZ4_HASHLOG; + +#ifdef __LITTLE_ENDIAN__ + static const U64 prime5bytes = 889523592379ULL; + + return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog)); #else - const int base = 0; + static const U64 prime8bytes = 11400714785074694791ULL; + + return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog)); #endif - const u8 *anchor = ip; - const u8 *const iend = ip + isize; - const u8 *const mflimit = iend - MFLIMIT; - #define MATCHLIMIT (iend - LASTLITERALS) - - u8 *op = (u8 *) dest; - u8 *const oend = op + maxoutputsize; - int length; - const int skipstrength = SKIPSTRENGTH; - u32 forwardh; - int lastrun; - - /* Init */ - if (isize < MINLENGTH) - goto _last_literals; +} +#endif + +static U32 LZ4_hashPosition(const void *p, tableType_t tableType) +{ +#if LZ4_ARCH64 + if (tableType == byU32) + return LZ4_hash5(LZ4_read_ARCH(p), tableType); +#endif + + return LZ4_hash4(LZ4_read32(p), tableType); +} + +static void LZ4_putPositionOnHash(const BYTE *p, U32 h, void *tableBase, + tableType_t const tableType, const BYTE *srcBase) +{ + switch (tableType) { + case byPtr: + { + const BYTE **hashTable = (const BYTE **)tableBase; + + hashTable[h] = p; + return; + } + case byU32: + { + U32 *hashTable = (U32 *) tableBase; + + hashTable[h] = (U32)(p - srcBase); + return; + } + case byU16: + { + U16 *hashTable = (U16 *) tableBase; + + hashTable[h] = (U16)(p - srcBase); + return; + } + } +} + +static inline void LZ4_putPosition(const BYTE *p, void *tableBase, + tableType_t tableType, const BYTE *srcBase) +{ + U32 const h = LZ4_hashPosition(p, tableType); + + LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase); +} + +static const BYTE *LZ4_getPositionOnHash(U32 h, void *tableBase, + tableType_t tableType, const BYTE *srcBase) +{ + if (tableType == byPtr) { + const BYTE **hashTable = (const BYTE **) tableBase; + + return hashTable[h]; + } + + if (tableType == byU32) { + const U32 * const hashTable = (U32 *) tableBase; + + return hashTable[h] + srcBase; + } + + { + /* default, to ensure a return */ + const U16 * const hashTable = (U16 *) tableBase; + return hashTable[h] + srcBase; + } +} + +static inline const BYTE *LZ4_getPosition(const BYTE *p, void *tableBase, + tableType_t tableType, const BYTE *srcBase) +{ + U32 const h = LZ4_hashPosition(p, tableType); + + return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase); +} + + +/* + * LZ4_compress_generic() : + * inlined, to ensure branches are decided at compilation time + */ +static inline int LZ4_compress_generic( + LZ4_stream_t_internal * const dictPtr, + const char * const source, + char * const dest, + const int inputSize, + const int maxOutputSize, + const limitedOutput_directive outputLimited, + const tableType_t tableType, + const dict_directive dict, + const dictIssue_directive dictIssue, + const U32 acceleration) +{ + const BYTE *ip = (const BYTE *) source; + const BYTE *base; + const BYTE *lowLimit; + const BYTE * const lowRefLimit = ip - dictPtr->dictSize; + const BYTE * const dictionary = dictPtr->dictionary; + const BYTE * const dictEnd = dictionary + dictPtr->dictSize; + const size_t dictDelta = dictEnd - (const BYTE *)source; + const BYTE *anchor = (const BYTE *) source; + const BYTE * const iend = ip + inputSize; + const BYTE * const mflimit = iend - MFLIMIT; + const BYTE * const matchlimit = iend - LASTLITERALS; + + BYTE *op = (BYTE *) dest; + BYTE * const olimit = op + maxOutputSize; + + U32 forwardH; + size_t refDelta = 0; + + /* Init conditions */ + if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) { + /* Unsupported inputSize, too large (or negative) */ + return 0; + } + switch (dict) { + case noDict: + default: + base = (const BYTE *)source; + lowLimit = (const BYTE *)source; + break; + case withPrefix64k: + base = (const BYTE *)source - dictPtr->currentOffset; + lowLimit = (const BYTE *)source - dictPtr->dictSize; + break; + case usingExtDict: + base = (const BYTE *)source - dictPtr->currentOffset; + lowLimit = (const BYTE *)source; + break; + } + + if ((tableType == byU16) + && (inputSize >= LZ4_64Klimit)) { + /* Size too large (not within 64K limit) */ + return 0; + } - memset((void *)hashtable, 0, LZ4_MEM_COMPRESS); + if (inputSize < LZ4_minLength) { + /* Input too small, no compression (all literals) */ + goto _last_literals; + } /* First Byte */ - hashtable[LZ4_HASH_VALUE(ip)] = ip - base; - ip++; - forwardh = LZ4_HASH_VALUE(ip); + LZ4_putPosition(ip, dictPtr->hashTable, tableType, base); + ip++; forwardH = LZ4_hashPosition(ip, tableType); /* Main Loop */ - for (;;) { - int findmatchattempts = (1U << skipstrength) + 3; - const u8 *forwardip = ip; - const u8 *ref; - u8 *token; + for ( ; ; ) { + const BYTE *match; + BYTE *token; /* Find a match */ - do { - u32 h = forwardh; - int step = findmatchattempts++ >> skipstrength; - ip = forwardip; - forwardip = ip + step; - - if (unlikely(forwardip > mflimit)) - goto _last_literals; - - forwardh = LZ4_HASH_VALUE(forwardip); - ref = base + hashtable[h]; - hashtable[h] = ip - base; - } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); + { const BYTE *forwardIp = ip; + unsigned int step = 1; + unsigned int searchMatchNb = acceleration + << LZ4_skipTrigger; + + do { + U32 const h = forwardH; + + ip = forwardIp; + forwardIp += step; + step = (searchMatchNb++ >> LZ4_skipTrigger); + + if (unlikely(forwardIp > mflimit)) + goto _last_literals; + + match = LZ4_getPositionOnHash(h, + dictPtr->hashTable, + tableType, base); + if (dict == usingExtDict) { + if (match < (const BYTE *)source) { + refDelta = dictDelta; + lowLimit = dictionary; + } else { + refDelta = 0; + lowLimit = (const BYTE *)source; + } } + forwardH = LZ4_hashPosition(forwardIp, + tableType); + LZ4_putPositionOnHash(ip, h, dictPtr->hashTable, + tableType, base); + + } while (((dictIssue == dictSmall) + ? (match < lowRefLimit) + : 0) + || ((tableType == byU16) + ? 0 + : (match + MAX_DISTANCE < ip)) + || (LZ4_read32(match + refDelta) + != LZ4_read32(ip))); + } /* Catch up */ - while ((ip > anchor) && (ref > (u8 *)source) && - unlikely(ip[-1] == ref[-1])) { + while (((ip > anchor) & (match + refDelta > lowLimit)) + && (unlikely(ip[-1] == match[refDelta - 1]))) { ip--; - ref--; - } + match--; + } - /* Encode Literal length */ - length = (int)(ip - anchor); - token = op++; - /* check output limit */ - if (unlikely(op + length + (2 + 1 + LASTLITERALS) + - (length >> 8) > oend)) - return 0; + /* Encode Literals */ + { unsigned const int litLength = (unsigned int)(ip - anchor); - if (length >= (int)RUN_MASK) { - int len; - *token = (RUN_MASK << ML_BITS); - len = length - RUN_MASK; - for (; len > 254 ; len -= 255) - *op++ = 255; - *op++ = (u8)len; - } else - *token = (length << ML_BITS); + token = op++; + if ((outputLimited) && + /* Check output buffer overflow */ + (unlikely(op + litLength + + (2 + 1 + LASTLITERALS) + + (litLength/255) > olimit))) + return 0; + if (litLength >= RUN_MASK) { + int len = (int)litLength - RUN_MASK; + + *token = (RUN_MASK<<ML_BITS); + for (; len >= 255 ; len -= 255) + *op++ = 255; + *op++ = (BYTE)len; + } else + *token = (BYTE)(litLength<<ML_BITS); + + /* Copy Literals */ + LZ4_wildCopy(op, anchor, op + litLength); + op += litLength; + } - /* Copy Literals */ - LZ4_BLINDCOPY(anchor, op, length); _next_match: /* Encode Offset */ - LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref)); + LZ4_writeLE16(op, (U16)(ip - match)); op += 2; - /* Start Counting */ - ip += MINMATCH; - /* MinMatch verified */ - ref += MINMATCH; - anchor = ip; - while (likely(ip < MATCHLIMIT - (STEPSIZE - 1))) { - #if LZ4_ARCH64 - u64 diff = A64(ref) ^ A64(ip); - #else - u32 diff = A32(ref) ^ A32(ip); - #endif - if (!diff) { - ip += STEPSIZE; - ref += STEPSIZE; - continue; - } - ip += LZ4_NBCOMMONBYTES(diff); - goto _endcount; - } - #if LZ4_ARCH64 - if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) { - ip += 4; - ref += 4; - } - #endif - if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) { - ip += 2; - ref += 2; - } - if ((ip < MATCHLIMIT) && (*ref == *ip)) - ip++; -_endcount: /* Encode MatchLength */ - length = (int)(ip - anchor); - /* Check output limit */ - if (unlikely(op + (1 + LASTLITERALS) + (length >> 8) > oend)) - return 0; - if (length >= (int)ML_MASK) { - *token += ML_MASK; - length -= ML_MASK; - for (; length > 509 ; length -= 510) { - *op++ = 255; - *op++ = 255; - } - if (length > 254) { - length -= 255; - *op++ = 255; + { unsigned int matchCode; + + if ((dict == usingExtDict) + && (lowLimit == dictionary)) { + const BYTE *limit; + + match += refDelta; + limit = ip + (dictEnd - match); + if (limit > matchlimit) + limit = matchlimit; + matchCode = LZ4_count(ip + MINMATCH, + match + MINMATCH, limit); + ip += MINMATCH + matchCode; + if (ip == limit) { + unsigned const int more = LZ4_count(ip, + (const BYTE *)source, + matchlimit); + + matchCode += more; + ip += more; + } + } else { + matchCode = LZ4_count(ip + MINMATCH, + match + MINMATCH, matchlimit); + ip += MINMATCH + matchCode; } - *op++ = (u8)length; - } else - *token += length; + + if (outputLimited && + /* Check output buffer overflow */ + (unlikely(op + + (1 + LASTLITERALS) + + (matchCode>>8) > olimit))) + return 0; + if (matchCode >= ML_MASK) { + *token += ML_MASK; + matchCode -= ML_MASK; + LZ4_write32(op, 0xFFFFFFFF); + while (matchCode >= 4*255) { + op += 4; + LZ4_write32(op, 0xFFFFFFFF); + matchCode -= 4*255; + } + op += matchCode / 255; + *op++ = (BYTE)(matchCode % 255); + } else + *token += (BYTE)(matchCode); + } + + anchor = ip; /* Test end of chunk */ - if (ip > mflimit) { - anchor = ip; + if (ip > mflimit) break; - } /* Fill table */ - hashtable[LZ4_HASH_VALUE(ip-2)] = ip - 2 - base; + LZ4_putPosition(ip - 2, dictPtr->hashTable, tableType, base); /* Test next position */ - ref = base + hashtable[LZ4_HASH_VALUE(ip)]; - hashtable[LZ4_HASH_VALUE(ip)] = ip - base; - if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { + match = LZ4_getPosition(ip, dictPtr->hashTable, + tableType, base); + if (dict == usingExtDict) { + if (match < (const BYTE *)source) { + refDelta = dictDelta; + lowLimit = dictionary; + } else { + refDelta = 0; + lowLimit = (const BYTE *)source; + } + } + LZ4_putPosition(ip, dictPtr->hashTable, tableType, base); + if (((dictIssue == dictSmall) ? (match >= lowRefLimit) : 1) + && (match + MAX_DISTANCE >= ip) + && (LZ4_read32(match + refDelta) == LZ4_read32(ip))) { token = op++; *token = 0; goto _next_match; } /* Prepare next loop */ - anchor = ip++; - forwardh = LZ4_HASH_VALUE(ip); + forwardH = LZ4_hashPosition(++ip, tableType); } _last_literals: /* Encode Last Literals */ - lastrun = (int)(iend - anchor); - if (((char *)op - dest) + lastrun + 1 - + ((lastrun + 255 - RUN_MASK) / 255) > (u32)maxoutputsize) - return 0; - - if (lastrun >= (int)RUN_MASK) { - *op++ = (RUN_MASK << ML_BITS); - lastrun -= RUN_MASK; - for (; lastrun > 254 ; lastrun -= 255) - *op++ = 255; - *op++ = (u8)lastrun; - } else - *op++ = (lastrun << ML_BITS); - memcpy(op, anchor, iend - anchor); - op += iend - anchor; + { size_t const lastRun = (size_t)(iend - anchor); + if ((outputLimited) && + /* Check output buffer overflow */ + ((op - (BYTE *)dest) + lastRun + 1 + + ((lastRun + 255 - RUN_MASK)/255) > (U32)maxOutputSize)) + return 0; + if (lastRun >= RUN_MASK) { + size_t accumulator = lastRun - RUN_MASK; + *op++ = RUN_MASK << ML_BITS; + for (; accumulator >= 255 ; accumulator -= 255) + *op++ = 255; + *op++ = (BYTE) accumulator; + } else { + *op++ = (BYTE)(lastRun<<ML_BITS); + } + memcpy(op, anchor, lastRun); + op += lastRun; + } /* End */ - return (int)(((char *)op) - dest); + return (int) (((char *)op) - dest); } -static inline int lz4_compress64kctx(void *ctx, - const char *source, - char *dest, - int isize, - int maxoutputsize) +int LZ4_compress_fast_extState(void *state, const char *source, char *dest, + int inputSize, int maxOutputSize, int acceleration) { - u16 *hashtable = (u16 *)ctx; - const u8 *ip = (u8 *) source; - const u8 *anchor = ip; - const u8 *const base = ip; - const u8 *const iend = ip + isize; - const u8 *const mflimit = iend - MFLIMIT; - #define MATCHLIMIT (iend - LASTLITERALS) - - u8 *op = (u8 *) dest; - u8 *const oend = op + maxoutputsize; - int len, length; - const int skipstrength = SKIPSTRENGTH; - u32 forwardh; - int lastrun; - - /* Init */ - if (isize < MINLENGTH) - goto _last_literals; + #if LZ4_ARCH64 + tableType_t tableType = byU32; + #else + tableType_t tableType = byPtr; + #endif + + LZ4_stream_t_internal *ctx = &((LZ4_stream_t *)state)->internal_donotuse; + + LZ4_resetStream((LZ4_stream_t *)state); + + if (acceleration < 1) + acceleration = LZ4_ACCELERATION_DEFAULT; + + if (maxOutputSize >= LZ4_compressBound(inputSize)) { + if (inputSize < LZ4_64Klimit) + return LZ4_compress_generic(ctx, source, + dest, inputSize, 0, + noLimit, byU16, noDict, + noDictIssue, acceleration); + else + return LZ4_compress_generic(ctx, source, + dest, inputSize, 0, + noLimit, tableType, noDict, + noDictIssue, acceleration); + } else { + if (inputSize < LZ4_64Klimit) + return LZ4_compress_generic(ctx, source, + dest, inputSize, + maxOutputSize, limitedOutput, byU16, noDict, + noDictIssue, acceleration); + else + return LZ4_compress_generic(ctx, source, + dest, inputSize, + maxOutputSize, limitedOutput, tableType, noDict, + noDictIssue, acceleration); + } +} - memset((void *)hashtable, 0, LZ4_MEM_COMPRESS); +int LZ4_compress_fast(const char *source, char *dest, int inputSize, + int maxOutputSize, void *wrkmem, int acceleration) +{ + return LZ4_compress_fast_extState(wrkmem, source, dest, inputSize, + maxOutputSize, acceleration); +} +EXPORT_SYMBOL(LZ4_compress_fast); + +int LZ4_compress_default(const char *source, char *dest, int inputSize, + int maxOutputSize, void *wrkmem) +{ + return LZ4_compress_fast(source, dest, inputSize, + maxOutputSize, wrkmem, 1); +} +EXPORT_SYMBOL(LZ4_compress_default); + +/*-****************************** + * *_destSize() variant + ********************************/ + +static int LZ4_compress_destSize_generic( + LZ4_stream_t_internal * const ctx, + const char * const src, + char * const dst, + int * const srcSizePtr, + const int targetDstSize, + const tableType_t tableType) +{ + const BYTE *ip = (const BYTE *) src; + const BYTE *base = (const BYTE *) src; + const BYTE *lowLimit = (const BYTE *) src; + const BYTE *anchor = ip; + const BYTE * const iend = ip + *srcSizePtr; + const BYTE * const mflimit = iend - MFLIMIT; + const BYTE * const matchlimit = iend - LASTLITERALS; + + BYTE *op = (BYTE *) dst; + BYTE * const oend = op + targetDstSize; + BYTE * const oMaxLit = op + targetDstSize - 2 /* offset */ + - 8 /* because 8 + MINMATCH == MFLIMIT */ - 1 /* token */; + BYTE * const oMaxMatch = op + targetDstSize + - (LASTLITERALS + 1 /* token */); + BYTE * const oMaxSeq = oMaxLit - 1 /* token */; + + U32 forwardH; + + /* Init conditions */ + /* Impossible to store anything */ + if (targetDstSize < 1) + return 0; + /* Unsupported input size, too large (or negative) */ + if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) + return 0; + /* Size too large (not within 64K limit) */ + if ((tableType == byU16) && (*srcSizePtr >= LZ4_64Klimit)) + return 0; + /* Input too small, no compression (all literals) */ + if (*srcSizePtr < LZ4_minLength) + goto _last_literals; /* First Byte */ - ip++; - forwardh = LZ4_HASH64K_VALUE(ip); + *srcSizePtr = 0; + LZ4_putPosition(ip, ctx->hashTable, tableType, base); + ip++; forwardH = LZ4_hashPosition(ip, tableType); /* Main Loop */ - for (;;) { - int findmatchattempts = (1U << skipstrength) + 3; - const u8 *forwardip = ip; - const u8 *ref; - u8 *token; + for ( ; ; ) { + const BYTE *match; + BYTE *token; /* Find a match */ - do { - u32 h = forwardh; - int step = findmatchattempts++ >> skipstrength; - ip = forwardip; - forwardip = ip + step; - - if (forwardip > mflimit) - goto _last_literals; - - forwardh = LZ4_HASH64K_VALUE(forwardip); - ref = base + hashtable[h]; - hashtable[h] = (u16)(ip - base); - } while (A32(ref) != A32(ip)); + { const BYTE *forwardIp = ip; + unsigned int step = 1; + unsigned int searchMatchNb = 1 << LZ4_skipTrigger; + + do { + U32 h = forwardH; + + ip = forwardIp; + forwardIp += step; + step = (searchMatchNb++ >> LZ4_skipTrigger); + + if (unlikely(forwardIp > mflimit)) + goto _last_literals; + + match = LZ4_getPositionOnHash(h, ctx->hashTable, + tableType, base); + forwardH = LZ4_hashPosition(forwardIp, + tableType); + LZ4_putPositionOnHash(ip, h, + ctx->hashTable, tableType, + base); + + } while (((tableType == byU16) + ? 0 + : (match + MAX_DISTANCE < ip)) + || (LZ4_read32(match) != LZ4_read32(ip))); + } /* Catch up */ - while ((ip > anchor) && (ref > (u8 *)source) - && (ip[-1] == ref[-1])) { - ip--; - ref--; - } + while ((ip > anchor) + && (match > lowLimit) + && (unlikely(ip[-1] == match[-1]))) { + ip--; match--; + } /* Encode Literal length */ - length = (int)(ip - anchor); - token = op++; - /* Check output limit */ - if (unlikely(op + length + (2 + 1 + LASTLITERALS) - + (length >> 8) > oend)) - return 0; - if (length >= (int)RUN_MASK) { - *token = (RUN_MASK << ML_BITS); - len = length - RUN_MASK; - for (; len > 254 ; len -= 255) - *op++ = 255; - *op++ = (u8)len; - } else - *token = (length << ML_BITS); + { unsigned int litLength = (unsigned int)(ip - anchor); - /* Copy Literals */ - LZ4_BLINDCOPY(anchor, op, length); + token = op++; + if (op + ((litLength + 240)/255) + + litLength > oMaxLit) { + /* Not enough space for a last match */ + op--; + goto _last_literals; + } + if (litLength >= RUN_MASK) { + unsigned int len = litLength - RUN_MASK; + *token = (RUN_MASK<<ML_BITS); + for (; len >= 255 ; len -= 255) + *op++ = 255; + *op++ = (BYTE)len; + } else + *token = (BYTE)(litLength<<ML_BITS); + + /* Copy Literals */ + LZ4_wildCopy(op, anchor, op + litLength); + op += litLength; + } _next_match: /* Encode Offset */ - LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref)); + LZ4_writeLE16(op, (U16)(ip - match)); op += 2; - /* Start Counting */ - ip += MINMATCH; - /* MinMatch verified */ - ref += MINMATCH; - anchor = ip; + /* Encode MatchLength */ + { size_t matchLength = LZ4_count(ip + MINMATCH, + match + MINMATCH, matchlimit); - while (ip < MATCHLIMIT - (STEPSIZE - 1)) { - #if LZ4_ARCH64 - u64 diff = A64(ref) ^ A64(ip); - #else - u32 diff = A32(ref) ^ A32(ip); - #endif - - if (!diff) { - ip += STEPSIZE; - ref += STEPSIZE; - continue; + if (op + ((matchLength + 240)/255) > oMaxMatch) { + /* Match description too long : reduce it */ + matchLength = (15 - 1) + (oMaxMatch - op) * 255; } - ip += LZ4_NBCOMMONBYTES(diff); - goto _endcount; - } - #if LZ4_ARCH64 - if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) { - ip += 4; - ref += 4; + ip += MINMATCH + matchLength; + + if (matchLength >= ML_MASK) { + *token += ML_MASK; + matchLength -= ML_MASK; + while (matchLength >= 255) { + matchLength -= 255; *op++ = 255; + } + *op++ = (BYTE)matchLength; + } else + *token += (BYTE)(matchLength); } - #endif - if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) { - ip += 2; - ref += 2; - } - if ((ip < MATCHLIMIT) && (*ref == *ip)) - ip++; -_endcount: - /* Encode MatchLength */ - len = (int)(ip - anchor); - /* Check output limit */ - if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)) - return 0; - if (len >= (int)ML_MASK) { - *token += ML_MASK; - len -= ML_MASK; - for (; len > 509 ; len -= 510) { - *op++ = 255; - *op++ = 255; - } - if (len > 254) { - len -= 255; - *op++ = 255; - } - *op++ = (u8)len; - } else - *token += len; + anchor = ip; - /* Test end of chunk */ - if (ip > mflimit) { - anchor = ip; + /* Test end of block */ + if (ip > mflimit) + break; + if (op > oMaxSeq) break; - } /* Fill table */ - hashtable[LZ4_HASH64K_VALUE(ip-2)] = (u16)(ip - 2 - base); + LZ4_putPosition(ip - 2, ctx->hashTable, tableType, base); /* Test next position */ - ref = base + hashtable[LZ4_HASH64K_VALUE(ip)]; - hashtable[LZ4_HASH64K_VALUE(ip)] = (u16)(ip - base); - if (A32(ref) == A32(ip)) { - token = op++; - *token = 0; + match = LZ4_getPosition(ip, ctx->hashTable, tableType, base); + LZ4_putPosition(ip, ctx->hashTable, tableType, base); + + if ((match + MAX_DISTANCE >= ip) + && (LZ4_read32(match) == LZ4_read32(ip))) { + token = op++; *token = 0; goto _next_match; } /* Prepare next loop */ - anchor = ip++; - forwardh = LZ4_HASH64K_VALUE(ip); + forwardH = LZ4_hashPosition(++ip, tableType); } _last_literals: /* Encode Last Literals */ - lastrun = (int)(iend - anchor); - if (op + lastrun + 1 + (lastrun - RUN_MASK + 255) / 255 > oend) - return 0; - if (lastrun >= (int)RUN_MASK) { - *op++ = (RUN_MASK << ML_BITS); - lastrun -= RUN_MASK; - for (; lastrun > 254 ; lastrun -= 255) - *op++ = 255; - *op++ = (u8)lastrun; - } else - *op++ = (lastrun << ML_BITS); - memcpy(op, anchor, iend - anchor); - op += iend - anchor; + { size_t lastRunSize = (size_t)(iend - anchor); + + if (op + 1 /* token */ + + ((lastRunSize + 240)/255) /* litLength */ + + lastRunSize /* literals */ > oend) { + /* adapt lastRunSize to fill 'dst' */ + lastRunSize = (oend - op) - 1; + lastRunSize -= (lastRunSize + 240)/255; + } + ip = anchor + lastRunSize; + + if (lastRunSize >= RUN_MASK) { + size_t accumulator = lastRunSize - RUN_MASK; + + *op++ = RUN_MASK << ML_BITS; + for (; accumulator >= 255 ; accumulator -= 255) + *op++ = 255; + *op++ = (BYTE) accumulator; + } else { + *op++ = (BYTE)(lastRunSize<<ML_BITS); + } + memcpy(op, anchor, lastRunSize); + op += lastRunSize; + } + /* End */ - return (int)(((char *)op) - dest); + *srcSizePtr = (int) (((const char *)ip) - src); + return (int) (((char *)op) - dst); } -int lz4_compress(const unsigned char *src, size_t src_len, - unsigned char *dst, size_t *dst_len, void *wrkmem) +static int LZ4_compress_destSize_extState(LZ4_stream_t *state, const char *src, + char *dst, int *srcSizePtr, int targetDstSize) { - int ret = -1; - int out_len = 0; + #if LZ4_ARCH64 + tableType_t tableType = byU32; + #else + tableType_t tableType = byPtr; + #endif + + LZ4_resetStream(state); + + if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) { + /* compression success is guaranteed */ + return LZ4_compress_fast_extState( + state, src, dst, *srcSizePtr, + targetDstSize, 1); + } else { + if (*srcSizePtr < LZ4_64Klimit) + return LZ4_compress_destSize_generic( + &state->internal_donotuse, + src, dst, srcSizePtr, + targetDstSize, byU16); + else + return LZ4_compress_destSize_generic( + &state->internal_donotuse, + src, dst, srcSizePtr, + targetDstSize, tableType); + } +} - if (src_len < LZ4_64KLIMIT) - out_len = lz4_compress64kctx(wrkmem, src, dst, src_len, - lz4_compressbound(src_len)); - else - out_len = lz4_compressctx(wrkmem, src, dst, src_len, - lz4_compressbound(src_len)); - if (out_len < 0) - goto exit; +int LZ4_compress_destSize(const char *src, char *dst, int *srcSizePtr, + int targetDstSize, void *wrkmem) +{ + return LZ4_compress_destSize_extState(wrkmem, src, dst, srcSizePtr, + targetDstSize); +} +EXPORT_SYMBOL(LZ4_compress_destSize); + +/*-****************************** + * Streaming functions + ********************************/ +void LZ4_resetStream(LZ4_stream_t *LZ4_stream) +{ + memset(LZ4_stream, 0, sizeof(LZ4_stream_t)); +} + +#define HASH_UNIT sizeof(reg_t) +int LZ4_loadDict(LZ4_stream_t *LZ4_dict, + const char *dictionary, int dictSize) +{ + LZ4_stream_t_internal *dict = &LZ4_dict->internal_donotuse; + const BYTE *p = (const BYTE *)dictionary; + const BYTE * const dictEnd = p + dictSize; + const BYTE *base; + + if ((dict->initCheck) + || (dict->currentOffset > 1 * GB)) { + /* Uninitialized structure, or reuse overflow */ + LZ4_resetStream(LZ4_dict); + } + + if (dictSize < (int)HASH_UNIT) { + dict->dictionary = NULL; + dict->dictSize = 0; + return 0; + } + + if ((dictEnd - p) > 64 * KB) + p = dictEnd - 64 * KB; + dict->currentOffset += 64 * KB; + base = p - dict->currentOffset; + dict->dictionary = p; + dict->dictSize = (U32)(dictEnd - p); + dict->currentOffset += dict->dictSize; + + while (p <= dictEnd - HASH_UNIT) { + LZ4_putPosition(p, dict->hashTable, byU32, base); + p += 3; + } - *dst_len = out_len; + return dict->dictSize; +} +EXPORT_SYMBOL(LZ4_loadDict); + +static void LZ4_renormDictT(LZ4_stream_t_internal *LZ4_dict, + const BYTE *src) +{ + if ((LZ4_dict->currentOffset > 0x80000000) || + ((uptrval)LZ4_dict->currentOffset > (uptrval)src)) { + /* address space overflow */ + /* rescale hash table */ + U32 const delta = LZ4_dict->currentOffset - 64 * KB; + const BYTE *dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize; + int i; + + for (i = 0; i < LZ4_HASH_SIZE_U32; i++) { + if (LZ4_dict->hashTable[i] < delta) + LZ4_dict->hashTable[i] = 0; + else + LZ4_dict->hashTable[i] -= delta; + } + LZ4_dict->currentOffset = 64 * KB; + if (LZ4_dict->dictSize > 64 * KB) + LZ4_dict->dictSize = 64 * KB; + LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize; + } +} + +int LZ4_saveDict(LZ4_stream_t *LZ4_dict, char *safeBuffer, int dictSize) +{ + LZ4_stream_t_internal * const dict = &LZ4_dict->internal_donotuse; + const BYTE * const previousDictEnd = dict->dictionary + dict->dictSize; + + if ((U32)dictSize > 64 * KB) { + /* useless to define a dictionary > 64 * KB */ + dictSize = 64 * KB; + } + if ((U32)dictSize > dict->dictSize) + dictSize = dict->dictSize; - return 0; -exit: - return ret; + memmove(safeBuffer, previousDictEnd - dictSize, dictSize); + + dict->dictionary = (const BYTE *)safeBuffer; + dict->dictSize = (U32)dictSize; + + return dictSize; +} +EXPORT_SYMBOL(LZ4_saveDict); + +int LZ4_compress_fast_continue(LZ4_stream_t *LZ4_stream, const char *source, + char *dest, int inputSize, int maxOutputSize, int acceleration) +{ + LZ4_stream_t_internal *streamPtr = &LZ4_stream->internal_donotuse; + const BYTE * const dictEnd = streamPtr->dictionary + + streamPtr->dictSize; + + const BYTE *smallest = (const BYTE *) source; + + if (streamPtr->initCheck) { + /* Uninitialized structure detected */ + return 0; + } + + if ((streamPtr->dictSize > 0) && (smallest > dictEnd)) + smallest = dictEnd; + + LZ4_renormDictT(streamPtr, smallest); + + if (acceleration < 1) + acceleration = LZ4_ACCELERATION_DEFAULT; + + /* Check overlapping input/dictionary space */ + { const BYTE *sourceEnd = (const BYTE *) source + inputSize; + + if ((sourceEnd > streamPtr->dictionary) + && (sourceEnd < dictEnd)) { + streamPtr->dictSize = (U32)(dictEnd - sourceEnd); + if (streamPtr->dictSize > 64 * KB) + streamPtr->dictSize = 64 * KB; + if (streamPtr->dictSize < 4) + streamPtr->dictSize = 0; + streamPtr->dictionary = dictEnd - streamPtr->dictSize; + } + } + + /* prefix mode : source data follows dictionary */ + if (dictEnd == (const BYTE *)source) { + int result; + + if ((streamPtr->dictSize < 64 * KB) && + (streamPtr->dictSize < streamPtr->currentOffset)) { + result = LZ4_compress_generic( + streamPtr, source, dest, inputSize, + maxOutputSize, limitedOutput, byU32, + withPrefix64k, dictSmall, acceleration); + } else { + result = LZ4_compress_generic( + streamPtr, source, dest, inputSize, + maxOutputSize, limitedOutput, byU32, + withPrefix64k, noDictIssue, acceleration); + } + streamPtr->dictSize += (U32)inputSize; + streamPtr->currentOffset += (U32)inputSize; + return result; + } + + /* external dictionary mode */ + { int result; + + if ((streamPtr->dictSize < 64 * KB) && + (streamPtr->dictSize < streamPtr->currentOffset)) { + result = LZ4_compress_generic( + streamPtr, source, dest, inputSize, + maxOutputSize, limitedOutput, byU32, + usingExtDict, dictSmall, acceleration); + } else { + result = LZ4_compress_generic( + streamPtr, source, dest, inputSize, + maxOutputSize, limitedOutput, byU32, + usingExtDict, noDictIssue, acceleration); + } + streamPtr->dictionary = (const BYTE *)source; + streamPtr->dictSize = (U32)inputSize; + streamPtr->currentOffset += (U32)inputSize; + return result; + } +} +EXPORT_SYMBOL(LZ4_compress_fast_continue); + +/*-****************************** + * For backwards compatibility + ********************************/ +int lz4_compress(const unsigned char *src, size_t src_len, unsigned char *dst, + size_t *dst_len, void *wrkmem) { + *dst_len = LZ4_compress_default(src, dst, (int)src_len, + (int)((size_t)dst_len), wrkmem); + + /* + * Prior lz4_compress will return -1 in case of error + * and 0 on success + * while new LZ4_compress_fast/default + * returns 0 in case of error + * and the output length on success + */ + if (!dst_len) + return -1; + else + return 0; } EXPORT_SYMBOL(lz4_compress); diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c index 6d940c72b5fc..ca71375a6c40 100644 --- a/lib/lz4/lz4_decompress.c +++ b/lib/lz4/lz4_decompress.c @@ -1,25 +1,16 @@ /* - * LZ4 Decompressor for Linux kernel - * - * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com> - * - * Based on LZ4 implementation by Yann Collet. - * * LZ4 - Fast LZ compression algorithm - * Copyright (C) 2011-2012, Yann Collet. - * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - * + * Copyright (C) 2011 - 2016, Yann Collet. + * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php) * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: - * - * * Redistributions of source code must retain the above copyright + * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above + * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. - * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR @@ -31,313 +22,508 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * You can contact the author at : + * - LZ4 homepage : http://www.lz4.org + * - LZ4 source repository : https://github.com/lz4/lz4 * - * You can contact the author at : - * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html - * - LZ4 source repository : http://code.google.com/p/lz4/ + * Changed for kernel usage by: + * Sven Schmidt <4sschmid@informatik.uni-hamburg.de> */ -#ifndef STATIC +/*-************************************ + * Dependencies + **************************************/ +#include <linux/lz4.h> +#include "lz4defs.h" +#include <linux/init.h> #include <linux/module.h> #include <linux/kernel.h> -#endif -#include <linux/lz4.h> - #include <asm/unaligned.h> -#include "lz4defs.h" - -static const int dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; -#if LZ4_ARCH64 -static const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3}; -#endif - -static int lz4_uncompress(const char *source, char *dest, int osize) +/*-***************************** + * Decompression functions + *******************************/ +/* LZ4_decompress_generic() : + * This generic decompression function cover all use cases. + * It shall be instantiated several times, using different sets of directives + * Note that it is important this generic function is really inlined, + * in order to remove useless branches during compilation optimization. + */ +static inline int LZ4_decompress_generic( + const char *const source, + char * const dest, + int inputSize, + /* + * If endOnInput == endOnInputSize, + * this value is the max size of Output Buffer. + */ + int outputSize, + /* endOnOutputSize, endOnInputSize */ + int endOnInput, + /* full, partial */ + int partialDecoding, + /* only used if partialDecoding == partial */ + int targetOutputSize, + /* noDict, withPrefix64k, usingExtDict */ + int dict, + /* == dest when no prefix */ + const BYTE * const lowPrefix, + /* only if dict == usingExtDict */ + const BYTE * const dictStart, + /* note : = 0 if noDict */ + const size_t dictSize + ) { + /* Local Variables */ const BYTE *ip = (const BYTE *) source; - const BYTE *ref; + const BYTE * const iend = ip + inputSize; + BYTE *op = (BYTE *) dest; - BYTE * const oend = op + osize; + BYTE * const oend = op + outputSize; BYTE *cpy; - unsigned token; - size_t length; + BYTE *oexit = op + targetOutputSize; + const BYTE * const lowLimit = lowPrefix - dictSize; + + const BYTE * const dictEnd = (const BYTE *)dictStart + dictSize; + const unsigned int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; + const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3}; + + const int safeDecode = (endOnInput == endOnInputSize); + const int checkOffset = ((safeDecode) && (dictSize < (int)(64 * KB))); + /* Special cases */ + /* targetOutputSize too high => decode everything */ + if ((partialDecoding) && (oexit > oend - MFLIMIT)) + oexit = oend - MFLIMIT; + + /* Empty output buffer */ + if ((endOnInput) && (unlikely(outputSize == 0))) + return ((inputSize == 1) && (*ip == 0)) ? 0 : -1; + + if ((!endOnInput) && (unlikely(outputSize == 0))) + return (*ip == 0 ? 1 : -1); + + /* Main Loop : decode sequences */ while (1) { + size_t length; + const BYTE *match; + size_t offset; + + /* get literal length */ + unsigned int const token = *ip++; + + length = token>>ML_BITS; - /* get runlength */ - token = *ip++; - length = (token >> ML_BITS); if (length == RUN_MASK) { - size_t len; + unsigned int s; - len = *ip++; - for (; len == 255; length += 255) - len = *ip++; - if (unlikely(length > (size_t)(length + len))) + do { + s = *ip++; + length += s; + } while (likely(endOnInput + ? ip < iend - RUN_MASK + : 1) & (s == 255)); + + if ((safeDecode) + && unlikely( + (size_t)(op + length) < (size_t)(op))) { + /* overflow detection */ goto _output_error; - length += len; + } + if ((safeDecode) + && unlikely( + (size_t)(ip + length) < (size_t)(ip))) { + /* overflow detection */ + goto _output_error; + } } /* copy literals */ cpy = op + length; - if (unlikely(cpy > oend - COPYLENGTH)) { - /* - * Error: not enough place for another match - * (min 4) + 5 literals - */ - if (cpy != oend) - goto _output_error; - + if (((endOnInput) && ((cpy > (partialDecoding ? oexit : oend - MFLIMIT)) + || (ip + length > iend - (2 + 1 + LASTLITERALS)))) + || ((!endOnInput) && (cpy > oend - WILDCOPYLENGTH))) { + if (partialDecoding) { + if (cpy > oend) { + /* + * Error : + * write attempt beyond end of output buffer + */ + goto _output_error; + } + if ((endOnInput) + && (ip + length > iend)) { + /* + * Error : + * read attempt beyond + * end of input buffer + */ + goto _output_error; + } + } else { + if ((!endOnInput) + && (cpy != oend)) { + /* + * Error : + * block decoding must + * stop exactly there + */ + goto _output_error; + } + if ((endOnInput) + && ((ip + length != iend) + || (cpy > oend))) { + /* + * Error : + * input must be consumed + */ + goto _output_error; + } + } memcpy(op, ip, length); ip += length; - break; /* EOF */ + op += length; + /* Necessarily EOF, due to parsing restrictions */ + break; } - LZ4_WILDCOPY(ip, op, cpy); - ip -= (op - cpy); - op = cpy; + LZ4_wildCopy(op, ip, cpy); + ip += length; op = cpy; /* get offset */ - LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); - ip += 2; - - /* Error: offset create reference outside destination buffer */ - if (unlikely(ref < (BYTE *const) dest)) + offset = LZ4_readLE16(ip); ip += 2; + match = op - offset; + if ((checkOffset) && (unlikely(match < lowLimit))) { + /* Error : offset outside buffers */ goto _output_error; + } + /* costs ~1%; silence an msan warning when offset == 0 */ + LZ4_write32(op, (U32)offset); /* get matchlength */ length = token & ML_MASK; if (length == ML_MASK) { - for (; *ip == 255; length += 255) - ip++; - if (unlikely(length > (size_t)(length + *ip))) + unsigned int s; + + do { + s = *ip++; + if ((endOnInput) && (ip > iend - LASTLITERALS)) + goto _output_error; + length += s; + } while (s == 255); + if ((safeDecode) + && unlikely( + (size_t)(op + length) < (size_t)op)) { + /* overflow detection */ goto _output_error; - length += *ip++; + } } + length += MINMATCH; + + /* check external dictionary */ + if ((dict == usingExtDict) && (match < lowPrefix)) { + if (unlikely(op + length > oend - LASTLITERALS)) { + /* doesn't respect parsing restriction */ + goto _output_error; + } - /* copy repeated sequence */ - if (unlikely((op - ref) < STEPSIZE)) { -#if LZ4_ARCH64 - int dec64 = dec64table[op - ref]; -#else - const int dec64 = 0; -#endif - op[0] = ref[0]; - op[1] = ref[1]; - op[2] = ref[2]; - op[3] = ref[3]; - op += 4; - ref += 4; - ref -= dec32table[op-ref]; - PUT4(ref, op); - op += STEPSIZE - 4; - ref -= dec64; + if (length <= (size_t)(lowPrefix - match)) { + /* + * match can be copied as a single segment + * from external dictionary + */ + memmove(op, dictEnd - (lowPrefix - match), length); + op += length; + } else { + /* + * match encompass external + * dictionary and current block + */ + size_t const copySize = (size_t)(lowPrefix - match); + size_t const restSize = length - copySize; + + memcpy(op, dictEnd - copySize, copySize); + op += copySize; + + if (restSize > (size_t)(op - lowPrefix)) { + /* overlap copy */ + BYTE * const endOfMatch = op + restSize; + const BYTE *copyFrom = lowPrefix; + + while (op < endOfMatch) + *op++ = *copyFrom++; + } else { + memcpy(op, lowPrefix, restSize); + op += restSize; + } + } + continue; + } + + /* copy match within block */ + cpy = op + length; + if (unlikely(offset < 8)) { + const int dec64 = dec64table[offset]; + + op[0] = match[0]; + op[1] = match[1]; + op[2] = match[2]; + op[3] = match[3]; + match += dec32table[offset]; + memcpy(op + 4, match, 4); + match -= dec64; } else { - LZ4_COPYSTEP(ref, op); + LZ4_copy8(op, match); match += 8; } - cpy = op + length - (STEPSIZE - 4); - if (cpy > (oend - COPYLENGTH)) { - /* Error: request to write beyond destination buffer */ - if (cpy > oend) - goto _output_error; -#if LZ4_ARCH64 - if ((ref + COPYLENGTH) > oend) -#else - if ((ref + COPYLENGTH) > oend || - (op + COPYLENGTH) > oend) -#endif + op += 8; + + if (unlikely(cpy > oend - 12)) { + BYTE * const oCopyLimit = oend - (WILDCOPYLENGTH - 1); + + if (cpy > oend - LASTLITERALS) { + /* + * Error : last LASTLITERALS bytes + * must be literals (uncompressed) + */ goto _output_error; - LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); + } + if (op < oCopyLimit) { + LZ4_wildCopy(op, match, oCopyLimit); + match += oCopyLimit - op; + op = oCopyLimit; + } while (op < cpy) - *op++ = *ref++; - op = cpy; - /* - * Check EOF (should never happen, since last 5 bytes - * are supposed to be literals) - */ - if (op == oend) - goto _output_error; - continue; + *op++ = *match++; + } else { + LZ4_copy8(op, match); + if (length > 16) + LZ4_wildCopy(op + 8, match + 8, cpy); } - LZ4_SECURECOPY(ref, op, cpy); op = cpy; /* correction */ } + /* end of decoding */ - return (int) (((char *)ip) - source); + if (endOnInput) { + /* Nb of output bytes decoded */ + return (int) (((char *)op) - dest); + } else { + /* Nb of input bytes read */ + return (int) (((const char *)ip) - source); + } - /* write overflow error detected */ + /* Overflow error detected */ _output_error: return -1; } -static int lz4_uncompress_unknownoutputsize(const char *source, char *dest, - int isize, size_t maxoutputsize) +int LZ4_decompress_safe(const char *source, char *dest, + int compressedSize, int maxDecompressedSize) { - const BYTE *ip = (const BYTE *) source; - const BYTE *const iend = ip + isize; - const BYTE *ref; - + return LZ4_decompress_generic(source, dest, compressedSize, + maxDecompressedSize, endOnInputSize, full, 0, + noDict, (BYTE *)dest, NULL, 0); +} +EXPORT_SYMBOL(LZ4_decompress_safe); - BYTE *op = (BYTE *) dest; - BYTE * const oend = op + maxoutputsize; - BYTE *cpy; +int LZ4_decompress_safe_partial(const char *source, char *dest, + int compressedSize, int targetOutputSize, int maxDecompressedSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, + maxDecompressedSize, endOnInputSize, partial, + targetOutputSize, noDict, (BYTE *)dest, NULL, 0); +} +EXPORT_SYMBOL(LZ4_decompress_safe_partial); - /* Main Loop */ - while (ip < iend) { +int LZ4_decompress_fast(const char *source, char *dest, int originalSize) +{ + return LZ4_decompress_generic(source, dest, 0, originalSize, + endOnOutputSize, full, 0, withPrefix64k, + (BYTE *)(dest - 64 * KB), NULL, 64 * KB); +} +EXPORT_SYMBOL(LZ4_decompress_fast); - unsigned token; - size_t length; +int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode, + const char *dictionary, int dictSize) +{ + LZ4_streamDecode_t_internal *lz4sd = (LZ4_streamDecode_t_internal *) LZ4_streamDecode; - /* get runlength */ - token = *ip++; - length = (token >> ML_BITS); - if (length == RUN_MASK) { - int s = 255; - while ((ip < iend) && (s == 255)) { - s = *ip++; - if (unlikely(length > (size_t)(length + s))) - goto _output_error; - length += s; - } - } - /* copy literals */ - cpy = op + length; - if ((cpy > oend - COPYLENGTH) || - (ip + length > iend - COPYLENGTH)) { - - if (cpy > oend) - goto _output_error;/* writes beyond buffer */ - - if (ip + length != iend) - goto _output_error;/* - * Error: LZ4 format requires - * to consume all input - * at this stage - */ - memcpy(op, ip, length); - op += length; - break;/* Necessarily EOF, due to parsing restrictions */ - } - LZ4_WILDCOPY(ip, op, cpy); - ip -= (op - cpy); - op = cpy; + lz4sd->prefixSize = (size_t) dictSize; + lz4sd->prefixEnd = (const BYTE *) dictionary + dictSize; + lz4sd->externalDict = NULL; + lz4sd->extDictSize = 0; + return 1; +} +EXPORT_SYMBOL(LZ4_setStreamDecode); - /* get offset */ - LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); - ip += 2; - if (ref < (BYTE * const) dest) - goto _output_error; - /* - * Error : offset creates reference - * outside of destination buffer - */ +/* + * *_continue() : + * These decoding functions allow decompression of multiple blocks + * in "streaming" mode. + * Previously decoded blocks must still be available at the memory + * position where they were decoded. + * If it's not possible, save the relevant part of + * decoded data into a safe buffer, + * and indicate where it stands using LZ4_setStreamDecode() + */ +int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode, + const char *source, char *dest, int compressedSize, int maxOutputSize) +{ + LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse; + int result; + + if (lz4sd->prefixEnd == (BYTE *)dest) { + result = LZ4_decompress_generic(source, dest, + compressedSize, + maxOutputSize, + endOnInputSize, full, 0, + usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize, + lz4sd->externalDict, + lz4sd->extDictSize); + + if (result <= 0) + return result; + + lz4sd->prefixSize += result; + lz4sd->prefixEnd += result; + } else { + lz4sd->extDictSize = lz4sd->prefixSize; + lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; + result = LZ4_decompress_generic(source, dest, + compressedSize, maxOutputSize, + endOnInputSize, full, 0, + usingExtDict, (BYTE *)dest, + lz4sd->externalDict, lz4sd->extDictSize); + if (result <= 0) + return result; + lz4sd->prefixSize = result; + lz4sd->prefixEnd = (BYTE *)dest + result; + } - /* get matchlength */ - length = (token & ML_MASK); - if (length == ML_MASK) { - while (ip < iend) { - int s = *ip++; - if (unlikely(length > (size_t)(length + s))) - goto _output_error; - length += s; - if (s == 255) - continue; - break; - } - } + return result; +} +EXPORT_SYMBOL(LZ4_decompress_safe_continue); - /* copy repeated sequence */ - if (unlikely((op - ref) < STEPSIZE)) { -#if LZ4_ARCH64 - int dec64 = dec64table[op - ref]; -#else - const int dec64 = 0; -#endif - op[0] = ref[0]; - op[1] = ref[1]; - op[2] = ref[2]; - op[3] = ref[3]; - op += 4; - ref += 4; - ref -= dec32table[op - ref]; - PUT4(ref, op); - op += STEPSIZE - 4; - ref -= dec64; - } else { - LZ4_COPYSTEP(ref, op); - } - cpy = op + length - (STEPSIZE-4); - if (cpy > oend - COPYLENGTH) { - if (cpy > oend) - goto _output_error; /* write outside of buf */ -#if LZ4_ARCH64 - if ((ref + COPYLENGTH) > oend) -#else - if ((ref + COPYLENGTH) > oend || - (op + COPYLENGTH) > oend) -#endif - goto _output_error; - LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); - while (op < cpy) - *op++ = *ref++; - op = cpy; - /* - * Check EOF (should never happen, since last 5 bytes - * are supposed to be literals) - */ - if (op == oend) - goto _output_error; - continue; - } - LZ4_SECURECOPY(ref, op, cpy); - op = cpy; /* correction */ +int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode, + const char *source, char *dest, int originalSize) +{ + LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse; + int result; + + if (lz4sd->prefixEnd == (BYTE *)dest) { + result = LZ4_decompress_generic(source, dest, 0, originalSize, + endOnOutputSize, full, 0, + usingExtDict, + lz4sd->prefixEnd - lz4sd->prefixSize, + lz4sd->externalDict, lz4sd->extDictSize); + + if (result <= 0) + return result; + + lz4sd->prefixSize += originalSize; + lz4sd->prefixEnd += originalSize; + } else { + lz4sd->extDictSize = lz4sd->prefixSize; + lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; + result = LZ4_decompress_generic(source, dest, 0, originalSize, + endOnOutputSize, full, 0, + usingExtDict, (BYTE *)dest, + lz4sd->externalDict, lz4sd->extDictSize); + if (result <= 0) + return result; + lz4sd->prefixSize = originalSize; + lz4sd->prefixEnd = (BYTE *)dest + originalSize; } - /* end of decoding */ - return (int) (((char *) op) - dest); - /* write overflow error detected */ -_output_error: - return -1; + return result; } +EXPORT_SYMBOL(LZ4_decompress_fast_continue); -int lz4_decompress(const unsigned char *src, size_t *src_len, - unsigned char *dest, size_t actual_dest_len) +/* + * Advanced decoding functions : + * *_usingDict() : + * These decoding functions work the same as "_continue" ones, + * the dictionary must be explicitly provided within parameters + */ +static inline int LZ4_decompress_usingDict_generic(const char *source, + char *dest, int compressedSize, int maxOutputSize, int safe, + const char *dictStart, int dictSize) { - int ret = -1; - int input_len = 0; - - input_len = lz4_uncompress(src, dest, actual_dest_len); - if (input_len < 0) - goto exit_0; - *src_len = input_len; + if (dictSize == 0) + return LZ4_decompress_generic(source, dest, + compressedSize, maxOutputSize, safe, full, 0, + noDict, (BYTE *)dest, NULL, 0); + if (dictStart + dictSize == dest) { + if (dictSize >= (int)(64 * KB - 1)) + return LZ4_decompress_generic(source, dest, + compressedSize, maxOutputSize, safe, full, 0, + withPrefix64k, (BYTE *)dest - 64 * KB, NULL, 0); + return LZ4_decompress_generic(source, dest, compressedSize, + maxOutputSize, safe, full, 0, noDict, + (BYTE *)dest - dictSize, NULL, 0); + } + return LZ4_decompress_generic(source, dest, compressedSize, + maxOutputSize, safe, full, 0, usingExtDict, + (BYTE *)dest, (const BYTE *)dictStart, dictSize); +} - return 0; -exit_0: - return ret; +int LZ4_decompress_safe_usingDict(const char *source, char *dest, + int compressedSize, int maxOutputSize, + const char *dictStart, int dictSize) +{ + return LZ4_decompress_usingDict_generic(source, dest, + compressedSize, maxOutputSize, 1, dictStart, dictSize); } -#ifndef STATIC -EXPORT_SYMBOL(lz4_decompress); -#endif +EXPORT_SYMBOL(LZ4_decompress_safe_usingDict); -int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len, - unsigned char *dest, size_t *dest_len) +int LZ4_decompress_fast_usingDict(const char *source, char *dest, + int originalSize, const char *dictStart, int dictSize) { - int ret = -1; - int out_len = 0; - - out_len = lz4_uncompress_unknownoutputsize(src, dest, src_len, - *dest_len); - if (out_len < 0) - goto exit_0; - *dest_len = out_len; - - return 0; -exit_0: - return ret; + return LZ4_decompress_usingDict_generic(source, dest, 0, + originalSize, 0, dictStart, dictSize); +} +EXPORT_SYMBOL(LZ4_decompress_fast_usingDict); + +/*-****************************** + * For backwards compatibility + ********************************/ +int lz4_decompress_unknownoutputsize(const unsigned char *src, + size_t src_len, unsigned char *dest, size_t *dest_len) { + *dest_len = LZ4_decompress_safe(src, dest, + (int)src_len, (int)((size_t)dest_len)); + + /* + * Prior lz4_decompress_unknownoutputsize will return + * 0 for success and a negative result for error + * new LZ4_decompress_safe returns + * - the length of data read on success + * - and also a negative result on error + * meaning when result > 0, we just return 0 here + */ + if (src_len > 0) { + return 0; + } else + return -1; } -#ifndef STATIC EXPORT_SYMBOL(lz4_decompress_unknownoutputsize); +int lz4_decompress(const unsigned char *src, size_t *src_len, + unsigned char *dest, size_t actual_dest_len) { + *src_len = LZ4_decompress_fast(src, dest, (int)actual_dest_len); + + /* + * Prior lz4_decompress will return + * 0 for success and a negative result for error + * new LZ4_decompress_fast returns + * - the length of data read on success + * - and also a negative result on error + * meaning when result > 0, we just return 0 here + */ + if ((int)((size_t)src_len) > 0) + return 0; + else + return -1; +} +EXPORT_SYMBOL(lz4_decompress); + MODULE_LICENSE("Dual BSD/GPL"); -MODULE_DESCRIPTION("LZ4 Decompressor"); -#endif +MODULE_DESCRIPTION("LZ4 decompressor"); diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h index c79d7ea8a38e..b0d21b5f7d58 100644 --- a/lib/lz4/lz4defs.h +++ b/lib/lz4/lz4defs.h @@ -1,157 +1,219 @@ +#ifndef __LZ4DEFS_H__ +#define __LZ4DEFS_H__ + /* - * lz4defs.h -- architecture specific defines - * - * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com> + * lz4defs.h -- common and architecture specific defines for the kernel usage + + * LZ4 - Fast LZ compression algorithm + * Copyright (C) 2011-2016, Yann Collet. + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * You can contact the author at : + * - LZ4 homepage : http://www.lz4.org + * - LZ4 source repository : https://github.com/lz4/lz4 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. + * Changed for kernel usage by: + * Sven Schmidt <4sschmid@informatik.uni-hamburg.de> */ +#include <asm/unaligned.h> +#include <linux/string.h> /* memset, memcpy */ + /* * Detects 64 bits mode - */ +*/ #if defined(CONFIG_64BIT) #define LZ4_ARCH64 1 #else #define LZ4_ARCH64 0 #endif -/* - * Architecture-specific macros - */ -#define BYTE u8 -typedef struct _U16_S { u16 v; } U16_S; -typedef struct _U32_S { u32 v; } U32_S; -typedef struct _U64_S { u64 v; } U64_S; -#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) - -#define A16(x) (((U16_S *)(x))->v) -#define A32(x) (((U32_S *)(x))->v) -#define A64(x) (((U64_S *)(x))->v) - -#define PUT4(s, d) (A32(d) = A32(s)) -#define PUT8(s, d) (A64(d) = A64(s)) - -#define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ - (d = s - A16(p)) - -#define LZ4_WRITE_LITTLEENDIAN_16(p, v) \ - do { \ - A16(p) = v; \ - p += 2; \ - } while (0) -#else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ - -#define A64(x) get_unaligned((u64 *)&(((U16_S *)(x))->v)) -#define A32(x) get_unaligned((u32 *)&(((U16_S *)(x))->v)) -#define A16(x) get_unaligned((u16 *)&(((U16_S *)(x))->v)) - -#define PUT4(s, d) \ - put_unaligned(get_unaligned((const u32 *) s), (u32 *) d) -#define PUT8(s, d) \ - put_unaligned(get_unaligned((const u64 *) s), (u64 *) d) - -#define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ - (d = s - get_unaligned_le16(p)) - -#define LZ4_WRITE_LITTLEENDIAN_16(p, v) \ - do { \ - put_unaligned_le16(v, (u16 *)(p)); \ - p += 2; \ - } while (0) +/*-************************************ + * Basic Types + **************************************/ +#include <linux/types.h> + +typedef uint8_t BYTE; +typedef uint16_t U16; +typedef uint32_t U32; +typedef int32_t S32; +typedef uint64_t U64; +typedef uintptr_t uptrval; + +#if defined(__x86_64__) + typedef U64 reg_t; /* 64-bits in x32 mode */ +#else + typedef size_t reg_t; /* 32-bits in x32 mode */ #endif -#define COPYLENGTH 8 -#define ML_BITS 4 -#define ML_MASK ((1U << ML_BITS) - 1) -#define RUN_BITS (8 - ML_BITS) -#define RUN_MASK ((1U << RUN_BITS) - 1) -#define MEMORY_USAGE 14 -#define MINMATCH 4 -#define SKIPSTRENGTH 6 -#define LASTLITERALS 5 -#define MFLIMIT (COPYLENGTH + MINMATCH) -#define MINLENGTH (MFLIMIT + 1) -#define MAXD_LOG 16 -#define MAXD (1 << MAXD_LOG) -#define MAXD_MASK (u32)(MAXD - 1) -#define MAX_DISTANCE (MAXD - 1) -#define HASH_LOG (MAXD_LOG - 1) -#define HASHTABLESIZE (1 << HASH_LOG) -#define MAX_NB_ATTEMPTS 256 -#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH) -#define LZ4_64KLIMIT ((1<<16) + (MFLIMIT - 1)) -#define HASHLOG64K ((MEMORY_USAGE - 2) + 1) -#define HASH64KTABLESIZE (1U << HASHLOG64K) -#define LZ4_HASH_VALUE(p) (((A32(p)) * 2654435761U) >> \ - ((MINMATCH * 8) - (MEMORY_USAGE-2))) -#define LZ4_HASH64K_VALUE(p) (((A32(p)) * 2654435761U) >> \ - ((MINMATCH * 8) - HASHLOG64K)) -#define HASH_VALUE(p) (((A32(p)) * 2654435761U) >> \ - ((MINMATCH * 8) - HASH_LOG)) - -#if LZ4_ARCH64/* 64-bit */ -#define STEPSIZE 8 - -#define LZ4_COPYSTEP(s, d) \ - do { \ - PUT8(s, d); \ - d += 8; \ - s += 8; \ - } while (0) - -#define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d) - -#define LZ4_SECURECOPY(s, d, e) \ - do { \ - if (d < e) { \ - LZ4_WILDCOPY(s, d, e); \ - } \ - } while (0) -#define HTYPE u32 - -#ifdef __BIG_ENDIAN -#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3) +/*-************************************ + * Constants + **************************************/ +#define MINMATCH 4 + +#define WILDCOPYLENGTH 8 +#define LASTLITERALS 5 +#define MFLIMIT (WILDCOPYLENGTH+MINMATCH) +static const int LZ4_minLength = (MFLIMIT+1); + +#define KB (1<<10) +#define MB (1<<20) +#define GB (1U<<30) + +#define MAXD_LOG 16 +#define MAX_DISTANCE ((1<<MAXD_LOG) - 1) +#define STEPSIZE sizeof(size_t) + +#define ML_BITS 4 +#define ML_MASK ((1U<<ML_BITS)-1) +#define RUN_BITS (8-ML_BITS) +#define RUN_MASK ((1U<<RUN_BITS)-1) + +static const int LZ4_64Klimit = ((64 * KB) + (MFLIMIT-1)); +static const U32 LZ4_skipTrigger = 6; + +/*-************************************ + * Reading and writing into memory + **************************************/ + +static inline U16 LZ4_read16(const void *memPtr) +{ + U16 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +static inline U32 LZ4_read32(const void *memPtr) +{ + U32 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +static inline size_t LZ4_read_ARCH(const void *memPtr) +{ + size_t val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +static inline void LZ4_write16(void *memPtr, U16 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} + +static inline void LZ4_write32(void *memPtr, U32 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} + +static inline U16 LZ4_readLE16(const void *memPtr) +{ +#ifdef __LITTLE_ENDIAN__ + return LZ4_read16(memPtr); #else -#define LZ4_NBCOMMONBYTES(val) (__builtin_ctzll(val) >> 3) + const BYTE *p = (const BYTE *)memPtr; + + return (U16)((U16)p[0] + (p[1] << 8)); #endif +} + +static inline void LZ4_writeLE16(void *memPtr, U16 value) +{ +#ifdef __LITTLE_ENDIAN__ + LZ4_write16(memPtr, value); +#else + BYTE *p = (BYTE *)memPtr; -#else /* 32-bit */ -#define STEPSIZE 4 + p[0] = (BYTE) value; + p[1] = (BYTE)(value>>8); +#endif +} -#define LZ4_COPYSTEP(s, d) \ - do { \ - PUT4(s, d); \ - d += 4; \ - s += 4; \ - } while (0) +static inline void LZ4_copy8(void *dst, const void *src) +{ + memcpy(dst, src, 8); +} -#define LZ4_COPYPACKET(s, d) \ - do { \ - LZ4_COPYSTEP(s, d); \ - LZ4_COPYSTEP(s, d); \ - } while (0) +/* + * customized variant of memcpy, + * which can overwrite up to 7 bytes beyond dstEnd + */ +static inline void LZ4_wildCopy(void *dstPtr, const void *srcPtr, void *dstEnd) +{ + BYTE *d = (BYTE *)dstPtr; + const BYTE *s = (const BYTE *)srcPtr; + BYTE *const e = (BYTE *)dstEnd; -#define LZ4_SECURECOPY LZ4_WILDCOPY -#define HTYPE const u8* + do { LZ4_copy8(d, s); d += 8; s += 8; } while (d < e); +} -#ifdef __BIG_ENDIAN +#if LZ4_ARCH64 +#ifdef __BIG_ENDIAN__ +#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3) +#else +#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3) +#endif +#else +#ifdef __BIG_ENDIAN__ #define LZ4_NBCOMMONBYTES(val) (__builtin_clz(val) >> 3) #else #define LZ4_NBCOMMONBYTES(val) (__builtin_ctz(val) >> 3) #endif +#endif +static inline unsigned int LZ4_count(const BYTE *pIn, const BYTE *pMatch, + const BYTE *pInLimit) +{ + const BYTE *const pStart = pIn; + + while (likely(pIn < pInLimit-(STEPSIZE-1))) { + size_t diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn); + + if (!diff) { + pIn += STEPSIZE; + pMatch += STEPSIZE; + continue; + } + pIn += LZ4_NBCOMMONBYTES(diff); + return (unsigned int)(pIn - pStart); + } + +#ifdef LZ4_ARCH64 + if ((pIn < (pInLimit-3)) + && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { + pIn += 4; pMatch += 4; + } #endif + if ((pIn < (pInLimit-1)) + && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { + pIn += 2; pMatch += 2; + } + if ((pIn < pInLimit) && (*pMatch == *pIn)) + pIn++; + return (unsigned int)(pIn - pStart); +} + +typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive; +typedef enum { byPtr, byU32, byU16 } tableType_t; -#define LZ4_WILDCOPY(s, d, e) \ - do { \ - LZ4_COPYPACKET(s, d); \ - } while (d < e) - -#define LZ4_BLINDCOPY(s, d, l) \ - do { \ - u8 *e = (d) + l; \ - LZ4_WILDCOPY(s, d, e); \ - d = e; \ - } while (0) +typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive; +typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive; + +typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive; +typedef enum { full = 0, partial = 1 } earlyEnd_directive; + +#endif diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c index f344f76b6559..880c778b6e64 100644 --- a/lib/lz4/lz4hc_compress.c +++ b/lib/lz4/lz4hc_compress.c @@ -1,19 +1,17 @@ /* * LZ4 HC - High Compression Mode of LZ4 - * Copyright (C) 2011-2012, Yann Collet. - * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + * Copyright (C) 2011-2015, Yann Collet. * + * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php) * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: - * - * * Redistributions of source code must retain the above copyright + * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above + * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. - * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR @@ -25,323 +23,353 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * * You can contact the author at : - * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html - * - LZ4 source repository : http://code.google.com/p/lz4/ + * - LZ4 homepage : http://www.lz4.org + * - LZ4 source repository : https://github.com/lz4/lz4 * - * Changed for kernel use by: - * Chanho Min <chanho.min@lge.com> + * Changed for kernel usage by: + * Sven Schmidt <4sschmid@informatik.uni-hamburg.de> */ -#include <linux/module.h> -#include <linux/kernel.h> +/*-************************************ + * Dependencies + **************************************/ #include <linux/lz4.h> -#include <asm/unaligned.h> #include "lz4defs.h" +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/string.h> /* memset */ -struct lz4hc_data { - const u8 *base; - HTYPE hashtable[HASHTABLESIZE]; - u16 chaintable[MAXD]; - const u8 *nexttoupdate; -} __attribute__((__packed__)); +/* ************************************* + * Local Constants and types + ***************************************/ -static inline int lz4hc_init(struct lz4hc_data *hc4, const u8 *base) +#define OPTIMAL_ML (int)((ML_MASK - 1) + MINMATCH) + +#define HASH_FUNCTION(i) (((i) * 2654435761U) \ + >> ((MINMATCH*8) - LZ4HC_HASH_LOG)) +#define DELTANEXTU16(p) chainTable[(U16)(p)] /* faster */ + +static U32 LZ4HC_hashPtr(const void *ptr) { - memset((void *)hc4->hashtable, 0, sizeof(hc4->hashtable)); - memset(hc4->chaintable, 0xFF, sizeof(hc4->chaintable)); - -#if LZ4_ARCH64 - hc4->nexttoupdate = base + 1; -#else - hc4->nexttoupdate = base; -#endif - hc4->base = base; - return 1; + return HASH_FUNCTION(LZ4_read32(ptr)); +} + +/************************************** + * HC Compression + **************************************/ +static void LZ4HC_init(LZ4HC_CCtx_internal *hc4, const BYTE *start) +{ + memset((void *)hc4->hashTable, 0, sizeof(hc4->hashTable)); + memset(hc4->chainTable, 0xFF, sizeof(hc4->chainTable)); + hc4->nextToUpdate = 64 * KB; + hc4->base = start - 64 * KB; + hc4->end = start; + hc4->dictBase = start - 64 * KB; + hc4->dictLimit = 64 * KB; + hc4->lowLimit = 64 * KB; } /* Update chains up to ip (excluded) */ -static inline void lz4hc_insert(struct lz4hc_data *hc4, const u8 *ip) +static inline void LZ4HC_Insert(LZ4HC_CCtx_internal *hc4, + const BYTE *ip) { - u16 *chaintable = hc4->chaintable; - HTYPE *hashtable = hc4->hashtable; -#if LZ4_ARCH64 + U16 * const chainTable = hc4->chainTable; + U32 * const hashTable = hc4->hashTable; const BYTE * const base = hc4->base; -#else - const int base = 0; -#endif + U32 const target = (U32)(ip - base); + U32 idx = hc4->nextToUpdate; + + while (idx < target) { + U32 const h = LZ4HC_hashPtr(base + idx); + size_t delta = idx - hashTable[h]; - while (hc4->nexttoupdate < ip) { - const u8 *p = hc4->nexttoupdate; - size_t delta = p - (hashtable[HASH_VALUE(p)] + base); if (delta > MAX_DISTANCE) delta = MAX_DISTANCE; - chaintable[(size_t)(p) & MAXD_MASK] = (u16)delta; - hashtable[HASH_VALUE(p)] = (p) - base; - hc4->nexttoupdate++; - } -} -static inline size_t lz4hc_commonlength(const u8 *p1, const u8 *p2, - const u8 *const matchlimit) -{ - const u8 *p1t = p1; - - while (p1t < matchlimit - (STEPSIZE - 1)) { -#if LZ4_ARCH64 - u64 diff = A64(p2) ^ A64(p1t); -#else - u32 diff = A32(p2) ^ A32(p1t); -#endif - if (!diff) { - p1t += STEPSIZE; - p2 += STEPSIZE; - continue; - } - p1t += LZ4_NBCOMMONBYTES(diff); - return p1t - p1; - } -#if LZ4_ARCH64 - if ((p1t < (matchlimit-3)) && (A32(p2) == A32(p1t))) { - p1t += 4; - p2 += 4; - } -#endif + DELTANEXTU16(idx) = (U16)delta; - if ((p1t < (matchlimit - 1)) && (A16(p2) == A16(p1t))) { - p1t += 2; - p2 += 2; + hashTable[h] = idx; + idx++; } - if ((p1t < matchlimit) && (*p2 == *p1t)) - p1t++; - return p1t - p1; + + hc4->nextToUpdate = target; } -static inline int lz4hc_insertandfindbestmatch(struct lz4hc_data *hc4, - const u8 *ip, const u8 *const matchlimit, const u8 **matchpos) +static inline int LZ4HC_InsertAndFindBestMatch( + LZ4HC_CCtx_internal *hc4, /* Index table will be updated */ + const BYTE *ip, + const BYTE * const iLimit, + const BYTE **matchpos, + const int maxNbAttempts) { - u16 *const chaintable = hc4->chaintable; - HTYPE *const hashtable = hc4->hashtable; - const u8 *ref; -#if LZ4_ARCH64 + U16 * const chainTable = hc4->chainTable; + U32 * const HashTable = hc4->hashTable; const BYTE * const base = hc4->base; -#else - const int base = 0; -#endif - int nbattempts = MAX_NB_ATTEMPTS; - size_t repl = 0, ml = 0; - u16 delta; + const BYTE * const dictBase = hc4->dictBase; + const U32 dictLimit = hc4->dictLimit; + const U32 lowLimit = (hc4->lowLimit + 64 * KB > (U32)(ip - base)) + ? hc4->lowLimit + : (U32)(ip - base) - (64 * KB - 1); + U32 matchIndex; + int nbAttempts = maxNbAttempts; + size_t ml = 0; /* HC4 match finder */ - lz4hc_insert(hc4, ip); - ref = hashtable[HASH_VALUE(ip)] + base; - - /* potential repetition */ - if (ref >= ip-4) { - /* confirmed */ - if (A32(ref) == A32(ip)) { - delta = (u16)(ip-ref); - repl = ml = lz4hc_commonlength(ip + MINMATCH, - ref + MINMATCH, matchlimit) + MINMATCH; - *matchpos = ref; - } - ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK]; - } + LZ4HC_Insert(hc4, ip); + matchIndex = HashTable[LZ4HC_hashPtr(ip)]; + + while ((matchIndex >= lowLimit) + && (nbAttempts)) { + nbAttempts--; + if (matchIndex >= dictLimit) { + const BYTE * const match = base + matchIndex; + + if (*(match + ml) == *(ip + ml) + && (LZ4_read32(match) == LZ4_read32(ip))) { + size_t const mlt = LZ4_count(ip + MINMATCH, + match + MINMATCH, iLimit) + MINMATCH; - while ((ref >= ip - MAX_DISTANCE) && nbattempts) { - nbattempts--; - if (*(ref + ml) == *(ip + ml)) { - if (A32(ref) == A32(ip)) { - size_t mlt = - lz4hc_commonlength(ip + MINMATCH, - ref + MINMATCH, matchlimit) + MINMATCH; if (mlt > ml) { ml = mlt; - *matchpos = ref; + *matchpos = match; + } + } + } else { + const BYTE * const match = dictBase + matchIndex; + + if (LZ4_read32(match) == LZ4_read32(ip)) { + size_t mlt; + const BYTE *vLimit = ip + + (dictLimit - matchIndex); + + if (vLimit > iLimit) + vLimit = iLimit; + mlt = LZ4_count(ip + MINMATCH, + match + MINMATCH, vLimit) + MINMATCH; + if ((ip + mlt == vLimit) + && (vLimit < iLimit)) + mlt += LZ4_count(ip + mlt, + base + dictLimit, + iLimit); + if (mlt > ml) { + /* virtual matchpos */ + ml = mlt; + *matchpos = base + matchIndex; } } } - ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK]; - } - - /* Complete table */ - if (repl) { - const BYTE *ptr = ip; - const BYTE *end; - end = ip + repl - (MINMATCH-1); - /* Pre-Load */ - while (ptr < end - delta) { - chaintable[(size_t)(ptr) & MAXD_MASK] = delta; - ptr++; - } - do { - chaintable[(size_t)(ptr) & MAXD_MASK] = delta; - /* Head of chain */ - hashtable[HASH_VALUE(ptr)] = (ptr) - base; - ptr++; - } while (ptr < end); - hc4->nexttoupdate = end; + matchIndex -= DELTANEXTU16(matchIndex); } return (int)ml; } -static inline int lz4hc_insertandgetwidermatch(struct lz4hc_data *hc4, - const u8 *ip, const u8 *startlimit, const u8 *matchlimit, int longest, - const u8 **matchpos, const u8 **startpos) +static inline int LZ4HC_InsertAndGetWiderMatch( + LZ4HC_CCtx_internal *hc4, + const BYTE * const ip, + const BYTE * const iLowLimit, + const BYTE * const iHighLimit, + int longest, + const BYTE **matchpos, + const BYTE **startpos, + const int maxNbAttempts) { - u16 *const chaintable = hc4->chaintable; - HTYPE *const hashtable = hc4->hashtable; -#if LZ4_ARCH64 + U16 * const chainTable = hc4->chainTable; + U32 * const HashTable = hc4->hashTable; const BYTE * const base = hc4->base; -#else - const int base = 0; -#endif - const u8 *ref; - int nbattempts = MAX_NB_ATTEMPTS; - int delta = (int)(ip - startlimit); + const U32 dictLimit = hc4->dictLimit; + const BYTE * const lowPrefixPtr = base + dictLimit; + const U32 lowLimit = (hc4->lowLimit + 64 * KB > (U32)(ip - base)) + ? hc4->lowLimit + : (U32)(ip - base) - (64 * KB - 1); + const BYTE * const dictBase = hc4->dictBase; + U32 matchIndex; + int nbAttempts = maxNbAttempts; + int delta = (int)(ip - iLowLimit); /* First Match */ - lz4hc_insert(hc4, ip); - ref = hashtable[HASH_VALUE(ip)] + base; - - while ((ref >= ip - MAX_DISTANCE) && (ref >= hc4->base) - && (nbattempts)) { - nbattempts--; - if (*(startlimit + longest) == *(ref - delta + longest)) { - if (A32(ref) == A32(ip)) { - const u8 *reft = ref + MINMATCH; - const u8 *ipt = ip + MINMATCH; - const u8 *startt = ip; - - while (ipt < matchlimit-(STEPSIZE - 1)) { - #if LZ4_ARCH64 - u64 diff = A64(reft) ^ A64(ipt); - #else - u32 diff = A32(reft) ^ A32(ipt); - #endif - - if (!diff) { - ipt += STEPSIZE; - reft += STEPSIZE; - continue; + LZ4HC_Insert(hc4, ip); + matchIndex = HashTable[LZ4HC_hashPtr(ip)]; + + while ((matchIndex >= lowLimit) + && (nbAttempts)) { + nbAttempts--; + if (matchIndex >= dictLimit) { + const BYTE *matchPtr = base + matchIndex; + + if (*(iLowLimit + longest) + == *(matchPtr - delta + longest)) { + if (LZ4_read32(matchPtr) == LZ4_read32(ip)) { + int mlt = MINMATCH + + LZ4_count(ip + MINMATCH, + matchPtr + MINMATCH, + iHighLimit); + int back = 0; + + while ((ip + back > iLowLimit) + && (matchPtr + back > lowPrefixPtr) + && (ip[back - 1] == matchPtr[back - 1])) + back--; + + mlt -= back; + + if (mlt > longest) { + longest = (int)mlt; + *matchpos = matchPtr + back; + *startpos = ip + back; } - ipt += LZ4_NBCOMMONBYTES(diff); - goto _endcount; - } - #if LZ4_ARCH64 - if ((ipt < (matchlimit - 3)) - && (A32(reft) == A32(ipt))) { - ipt += 4; - reft += 4; - } - ipt += 2; - #endif - if ((ipt < (matchlimit - 1)) - && (A16(reft) == A16(ipt))) { - reft += 2; } - if ((ipt < matchlimit) && (*reft == *ipt)) - ipt++; -_endcount: - reft = ref; - - while ((startt > startlimit) - && (reft > hc4->base) - && (startt[-1] == reft[-1])) { - startt--; - reft--; - } - - if ((ipt - startt) > longest) { - longest = (int)(ipt - startt); - *matchpos = reft; - *startpos = startt; + } + } else { + const BYTE * const matchPtr = dictBase + matchIndex; + + if (LZ4_read32(matchPtr) == LZ4_read32(ip)) { + size_t mlt; + int back = 0; + const BYTE *vLimit = ip + (dictLimit - matchIndex); + + if (vLimit > iHighLimit) + vLimit = iHighLimit; + + mlt = LZ4_count(ip + MINMATCH, + matchPtr + MINMATCH, vLimit) + MINMATCH; + + if ((ip + mlt == vLimit) && (vLimit < iHighLimit)) + mlt += LZ4_count(ip + mlt, base + dictLimit, + iHighLimit); + while ((ip + back > iLowLimit) + && (matchIndex + back > lowLimit) + && (ip[back - 1] == matchPtr[back - 1])) + back--; + mlt -= back; + if ((int)mlt > longest) { + longest = (int)mlt; + *matchpos = base + matchIndex + back; + *startpos = ip + back; } } } - ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK]; + matchIndex -= DELTANEXTU16(matchIndex); } + return longest; } -static inline int lz4_encodesequence(const u8 **ip, u8 **op, const u8 **anchor, - int ml, const u8 *ref) +static inline int LZ4HC_encodeSequence( + const BYTE **ip, + BYTE **op, + const BYTE **anchor, + int matchLength, + const BYTE * const match, + limitedOutput_directive limitedOutputBuffer, + BYTE *oend) { - int length, len; - u8 *token; + int length; + BYTE *token; /* Encode Literal length */ length = (int)(*ip - *anchor); token = (*op)++; + + if ((limitedOutputBuffer) + && ((*op + (length>>8) + + length + (2 + 1 + LASTLITERALS)) > oend)) { + /* Check output limit */ + return 1; + } if (length >= (int)RUN_MASK) { - *token = (RUN_MASK << ML_BITS); + int len; + + *token = (RUN_MASK<<ML_BITS); len = length - RUN_MASK; - for (; len > 254 ; len -= 255) + for (; len > 254 ; len -= 255) { *(*op)++ = 255; - *(*op)++ = (u8)len; + *(*op)++ = (BYTE)len; + } } else - *token = (length << ML_BITS); + *token = (BYTE)(length<<ML_BITS); /* Copy Literals */ - LZ4_BLINDCOPY(*anchor, *op, length); + LZ4_wildCopy(*op, *anchor, (*op) + length); + *op += length; /* Encode Offset */ - LZ4_WRITE_LITTLEENDIAN_16(*op, (u16)(*ip - ref)); + LZ4_writeLE16(*op, (U16)(*ip - match)); *op += 2; /* Encode MatchLength */ - len = (int)(ml - MINMATCH); - if (len >= (int)ML_MASK) { + length = (int)(matchLength - MINMATCH); + if ((limitedOutputBuffer) + && (*op + (length>>8) + + (1 + LASTLITERALS) > oend)) { + /* Check output limit */ + return 1; + } + if (length >= (int)ML_MASK) { *token += ML_MASK; - len -= ML_MASK; - for (; len > 509 ; len -= 510) { + length -= ML_MASK; + for (; length > 509 ; length -= 510) { *(*op)++ = 255; *(*op)++ = 255; } - if (len > 254) { - len -= 255; + if (length > 254) { + length -= 255; *(*op)++ = 255; } - *(*op)++ = (u8)len; + *(*op)++ = (BYTE)length; } else - *token += len; + *token += (BYTE)(length); /* Prepare next loop */ - *ip += ml; + *ip += matchLength; *anchor = *ip; return 0; } -static int lz4_compresshcctx(struct lz4hc_data *ctx, - const char *source, - char *dest, - int isize) +static int LZ4HC_compress_generic( + LZ4HC_CCtx_internal *const ctx, + const char * const source, + char * const dest, + int const inputSize, + int const maxOutputSize, + int compressionLevel, + limitedOutput_directive limit + ) { - const u8 *ip = (const u8 *)source; - const u8 *anchor = ip; - const u8 *const iend = ip + isize; - const u8 *const mflimit = iend - MFLIMIT; - const u8 *const matchlimit = (iend - LASTLITERALS); + const BYTE *ip = (const BYTE *) source; + const BYTE *anchor = ip; + const BYTE * const iend = ip + inputSize; + const BYTE * const mflimit = iend - MFLIMIT; + const BYTE * const matchlimit = (iend - LASTLITERALS); - u8 *op = (u8 *)dest; + BYTE *op = (BYTE *) dest; + BYTE * const oend = op + maxOutputSize; + unsigned int maxNbAttempts; int ml, ml2, ml3, ml0; - const u8 *ref = NULL; - const u8 *start2 = NULL; - const u8 *ref2 = NULL; - const u8 *start3 = NULL; - const u8 *ref3 = NULL; - const u8 *start0; - const u8 *ref0; - int lastrun; + const BYTE *ref = NULL; + const BYTE *start2 = NULL; + const BYTE *ref2 = NULL; + const BYTE *start3 = NULL; + const BYTE *ref3 = NULL; + const BYTE *start0; + const BYTE *ref0; + + /* init */ + if (compressionLevel > LZ4HC_MAX_CLEVEL) + compressionLevel = LZ4HC_MAX_CLEVEL; + if (compressionLevel < 1) + compressionLevel = LZ4HC_DEFAULT_CLEVEL; + maxNbAttempts = 1 << (compressionLevel - 1); + ctx->end += inputSize; ip++; /* Main Loop */ while (ip < mflimit) { - ml = lz4hc_insertandfindbestmatch(ctx, ip, matchlimit, (&ref)); + ml = LZ4HC_InsertAndFindBestMatch(ctx, ip, + matchlimit, (&ref), maxNbAttempts); if (!ml) { ip++; continue; @@ -351,46 +379,52 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx, start0 = ip; ref0 = ref; ml0 = ml; -_search2: - if (ip+ml < mflimit) - ml2 = lz4hc_insertandgetwidermatch(ctx, ip + ml - 2, - ip + 1, matchlimit, ml, &ref2, &start2); + +_Search2: + if (ip + ml < mflimit) + ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, + ip + ml - 2, ip + 0, + matchlimit, ml, &ref2, + &start2, maxNbAttempts); else ml2 = ml; - /* No better match */ + if (ml2 == ml) { - lz4_encodesequence(&ip, &op, &anchor, ml, ref); + /* No better match */ + if (LZ4HC_encodeSequence(&ip, &op, + &anchor, ml, ref, limit, oend)) + return 0; continue; } if (start0 < ip) { - /* empirical */ if (start2 < ip + ml0) { + /* empirical */ ip = start0; ref = ref0; ml = ml0; } } - /* - * Here, start0==ip - * First Match too small : removed - */ + + /* Here, start0 == ip */ if ((start2 - ip) < 3) { + /* First Match too small : removed */ ml = ml2; ip = start2; ref = ref2; - goto _search2; + goto _Search2; } -_search3: +_Search3: /* - * Currently we have : - * ml2 > ml1, and - * ip1+3 <= ip2 (usually < ip1+ml1) - */ + * Currently we have : + * ml2 > ml1, and + * ip1 + 3 <= ip2 (usually < ip1 + ml1) + */ if ((start2 - ip) < OPTIMAL_ML) { int correction; int new_ml = ml; + if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML; if (ip + new_ml > start2 + ml2 - MINMATCH) @@ -403,39 +437,44 @@ _search3: } } /* - * Now, we have start2 = ip+new_ml, - * with new_ml=min(ml, OPTIMAL_ML=18) + * Now, we have start2 = ip + new_ml, + * with new_ml = min(ml, OPTIMAL_ML = 18) */ + if (start2 + ml2 < mflimit) - ml3 = lz4hc_insertandgetwidermatch(ctx, - start2 + ml2 - 3, start2, matchlimit, - ml2, &ref3, &start3); + ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, + start2 + ml2 - 3, start2, + matchlimit, ml2, &ref3, &start3, + maxNbAttempts); else ml3 = ml2; - /* No better match : 2 sequences to encode */ if (ml3 == ml2) { + /* No better match : 2 sequences to encode */ /* ip & ref are known; Now for ml */ - if (start2 < ip+ml) + if (start2 < ip + ml) ml = (int)(start2 - ip); - /* Now, encode 2 sequences */ - lz4_encodesequence(&ip, &op, &anchor, ml, ref); + if (LZ4HC_encodeSequence(&ip, &op, &anchor, + ml, ref, limit, oend)) + return 0; ip = start2; - lz4_encodesequence(&ip, &op, &anchor, ml2, ref2); + if (LZ4HC_encodeSequence(&ip, &op, &anchor, + ml2, ref2, limit, oend)) + return 0; continue; } - /* Not enough space for match 2 : remove it */ if (start3 < ip + ml + 3) { - /* - * can write Seq1 immediately ==> Seq2 is removed, - * so Seq3 becomes Seq1 - */ + /* Not enough space for match 2 : remove it */ if (start3 >= (ip + ml)) { + /* can write Seq1 immediately + * ==> Seq2 is removed, + * so Seq3 becomes Seq1 + */ if (start2 < ip + ml) { - int correction = - (int)(ip + ml - start2); + int correction = (int)(ip + ml - start2); + start2 += correction; ref2 += correction; ml2 -= correction; @@ -446,35 +485,38 @@ _search3: } } - lz4_encodesequence(&ip, &op, &anchor, ml, ref); - ip = start3; + if (LZ4HC_encodeSequence(&ip, &op, &anchor, + ml, ref, limit, oend)) + return 0; + ip = start3; ref = ref3; - ml = ml3; + ml = ml3; start0 = start2; ref0 = ref2; ml0 = ml2; - goto _search2; + goto _Search2; } start2 = start3; ref2 = ref3; ml2 = ml3; - goto _search3; + goto _Search3; } /* - * OK, now we have 3 ascending matches; let's write at least - * the first one ip & ref are known; Now for ml - */ + * OK, now we have 3 ascending matches; + * let's write at least the first one + * ip & ref are known; Now for ml + */ if (start2 < ip + ml) { if ((start2 - ip) < (int)ML_MASK) { int correction; + if (ml > OPTIMAL_ML) ml = OPTIMAL_ML; if (ip + ml > start2 + ml2 - MINMATCH) - ml = (int)(start2 - ip) + ml2 - - MINMATCH; + ml = (int)(start2 - ip) + ml2 - MINMATCH; correction = ml - (int)(start2 - ip); if (correction > 0) { start2 += correction; @@ -484,7 +526,9 @@ _search3: } else ml = (int)(start2 - ip); } - lz4_encodesequence(&ip, &op, &anchor, ml, ref); + if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, + ref, limit, oend)) + return 0; ip = start2; ref = ref2; @@ -494,46 +538,243 @@ _search3: ref2 = ref3; ml2 = ml3; - goto _search3; + goto _Search3; } /* Encode Last Literals */ - lastrun = (int)(iend - anchor); - if (lastrun >= (int)RUN_MASK) { - *op++ = (RUN_MASK << ML_BITS); - lastrun -= RUN_MASK; - for (; lastrun > 254 ; lastrun -= 255) - *op++ = 255; - *op++ = (u8) lastrun; - } else - *op++ = (lastrun << ML_BITS); - memcpy(op, anchor, iend - anchor); - op += iend - anchor; + { int lastRun = (int)(iend - anchor); + + if ((limit) + && (((char *)op - dest) + lastRun + 1 + + ((lastRun + 255 - RUN_MASK)/255) + > (U32)maxOutputSize)) { + /* Check output limit */ + return 0; + } + if (lastRun >= (int)RUN_MASK) { + *op++ = (RUN_MASK<<ML_BITS); + lastRun -= RUN_MASK; + for (; lastRun > 254 ; lastRun -= 255) { + *op++ = 255; + *op++ = (BYTE) lastRun; + } + } else + *op++ = (BYTE)(lastRun<<ML_BITS); + memcpy(op, anchor, iend - anchor); + op += iend - anchor; + } + /* End */ return (int) (((char *)op) - dest); } -int lz4hc_compress(const unsigned char *src, size_t src_len, - unsigned char *dst, size_t *dst_len, void *wrkmem) +static int LZ4_compress_HC_extStateHC( + void *state, + const char *src, + char *dst, + int srcSize, + int maxDstSize, + int compressionLevel) { - int ret = -1; - int out_len = 0; + LZ4HC_CCtx_internal *ctx = &((LZ4_streamHC_t *)state)->internal_donotuse; - struct lz4hc_data *hc4 = (struct lz4hc_data *)wrkmem; - lz4hc_init(hc4, (const u8 *)src); - out_len = lz4_compresshcctx((struct lz4hc_data *)hc4, (const u8 *)src, - (char *)dst, (int)src_len); + if (((size_t)(state)&(sizeof(void *) - 1)) != 0) { + /* Error : state is not aligned + * for pointers (32 or 64 bits) + */ + return 0; + } - if (out_len < 0) - goto exit; + LZ4HC_init(ctx, (const BYTE *)src); - *dst_len = out_len; - return 0; + if (maxDstSize < LZ4_compressBound(srcSize)) + return LZ4HC_compress_generic(ctx, src, dst, + srcSize, maxDstSize, compressionLevel, limitedOutput); + else + return LZ4HC_compress_generic(ctx, src, dst, + srcSize, maxDstSize, compressionLevel, noLimit); +} -exit: - return ret; +int LZ4_compress_HC(const char *src, char *dst, int srcSize, + int maxDstSize, int compressionLevel, void *wrkmem) +{ + return LZ4_compress_HC_extStateHC(wrkmem, src, dst, + srcSize, maxDstSize, compressionLevel); +} +EXPORT_SYMBOL(LZ4_compress_HC); + +/*-****************************** + * For backwards compatibility + ********************************/ +int lz4hc_compress(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem) +{ + *dst_len = LZ4_compress_HC(src, dst, (int)src_len, + (int)((size_t)dst_len), LZ4HC_DEFAULT_CLEVEL, wrkmem); + + /* + * Prior lz4hc_compress will return -1 in case of error + * and 0 on success + * while new LZ4_compress_HC + * returns 0 in case of error + * and the output length on success + */ + if (!dst_len) + return -1; + else + return 0; } EXPORT_SYMBOL(lz4hc_compress); +/************************************** + * Streaming Functions + **************************************/ +void LZ4_resetStreamHC(LZ4_streamHC_t *LZ4_streamHCPtr, int compressionLevel) +{ + LZ4_streamHCPtr->internal_donotuse.base = NULL; + LZ4_streamHCPtr->internal_donotuse.compressionLevel = (unsigned int)compressionLevel; +} + +int LZ4_loadDictHC(LZ4_streamHC_t *LZ4_streamHCPtr, + const char *dictionary, + int dictSize) +{ + LZ4HC_CCtx_internal *ctxPtr = &LZ4_streamHCPtr->internal_donotuse; + + if (dictSize > 64 * KB) { + dictionary += dictSize - 64 * KB; + dictSize = 64 * KB; + } + LZ4HC_init(ctxPtr, (const BYTE *)dictionary); + if (dictSize >= 4) + LZ4HC_Insert(ctxPtr, (const BYTE *)dictionary + (dictSize - 3)); + ctxPtr->end = (const BYTE *)dictionary + dictSize; + return dictSize; +} +EXPORT_SYMBOL(LZ4_loadDictHC); + +/* compression */ + +static void LZ4HC_setExternalDict( + LZ4HC_CCtx_internal *ctxPtr, + const BYTE *newBlock) +{ + if (ctxPtr->end >= ctxPtr->base + 4) { + /* Referencing remaining dictionary content */ + LZ4HC_Insert(ctxPtr, ctxPtr->end - 3); + } + + /* + * Only one memory segment for extDict, + * so any previous extDict is lost at this stage + */ + ctxPtr->lowLimit = ctxPtr->dictLimit; + ctxPtr->dictLimit = (U32)(ctxPtr->end - ctxPtr->base); + ctxPtr->dictBase = ctxPtr->base; + ctxPtr->base = newBlock - ctxPtr->dictLimit; + ctxPtr->end = newBlock; + /* match referencing will resume from there */ + ctxPtr->nextToUpdate = ctxPtr->dictLimit; +} +EXPORT_SYMBOL(LZ4HC_setExternalDict); + +static int LZ4_compressHC_continue_generic( + LZ4_streamHC_t *LZ4_streamHCPtr, + const char *source, + char *dest, + int inputSize, + int maxOutputSize, + limitedOutput_directive limit) +{ + LZ4HC_CCtx_internal *ctxPtr = &LZ4_streamHCPtr->internal_donotuse; + + /* auto - init if forgotten */ + if (ctxPtr->base == NULL) + LZ4HC_init(ctxPtr, (const BYTE *) source); + + /* Check overflow */ + if ((size_t)(ctxPtr->end - ctxPtr->base) > 2 * GB) { + size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->base) + - ctxPtr->dictLimit; + if (dictSize > 64 * KB) + dictSize = 64 * KB; + LZ4_loadDictHC(LZ4_streamHCPtr, + (const char *)(ctxPtr->end) - dictSize, (int)dictSize); + } + + /* Check if blocks follow each other */ + if ((const BYTE *)source != ctxPtr->end) + LZ4HC_setExternalDict(ctxPtr, (const BYTE *)source); + + /* Check overlapping input/dictionary space */ + { const BYTE *sourceEnd = (const BYTE *) source + inputSize; + const BYTE * const dictBegin = ctxPtr->dictBase + ctxPtr->lowLimit; + const BYTE * const dictEnd = ctxPtr->dictBase + ctxPtr->dictLimit; + + if ((sourceEnd > dictBegin) + && ((const BYTE *)source < dictEnd)) { + if (sourceEnd > dictEnd) + sourceEnd = dictEnd; + ctxPtr->lowLimit = (U32)(sourceEnd - ctxPtr->dictBase); + + if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4) + ctxPtr->lowLimit = ctxPtr->dictLimit; + } + } + + return LZ4HC_compress_generic(ctxPtr, source, dest, + inputSize, maxOutputSize, ctxPtr->compressionLevel, limit); +} + +int LZ4_compress_HC_continue( + LZ4_streamHC_t *LZ4_streamHCPtr, + const char *source, + char *dest, + int inputSize, + int maxOutputSize) +{ + if (maxOutputSize < LZ4_compressBound(inputSize)) + return LZ4_compressHC_continue_generic(LZ4_streamHCPtr, + source, dest, inputSize, maxOutputSize, limitedOutput); + else + return LZ4_compressHC_continue_generic(LZ4_streamHCPtr, + source, dest, inputSize, maxOutputSize, noLimit); +} +EXPORT_SYMBOL(LZ4_compress_HC_continue); + +/* dictionary saving */ + +int LZ4_saveDictHC( + LZ4_streamHC_t *LZ4_streamHCPtr, + char *safeBuffer, + int dictSize) +{ + LZ4HC_CCtx_internal *const streamPtr = &LZ4_streamHCPtr->internal_donotuse; + int const prefixSize = (int)(streamPtr->end + - (streamPtr->base + streamPtr->dictLimit)); + + if (dictSize > 64 * KB) + dictSize = 64 * KB; + if (dictSize < 4) + dictSize = 0; + if (dictSize > prefixSize) + dictSize = prefixSize; + + memmove(safeBuffer, streamPtr->end - dictSize, dictSize); + + { U32 const endIndex = (U32)(streamPtr->end - streamPtr->base); + + streamPtr->end = (const BYTE *)safeBuffer + dictSize; + streamPtr->base = streamPtr->end - endIndex; + streamPtr->dictLimit = endIndex - dictSize; + streamPtr->lowLimit = endIndex - dictSize; + + if (streamPtr->nextToUpdate < streamPtr->dictLimit) + streamPtr->nextToUpdate = streamPtr->dictLimit; + } + return dictSize; +} +EXPORT_SYMBOL(LZ4_saveDictHC); + MODULE_LICENSE("Dual BSD/GPL"); -MODULE_DESCRIPTION("LZ4HC compressor"); +MODULE_DESCRIPTION("LZ4 HC compressor"); diff --git a/lib/rbtree.c b/lib/rbtree.c index 1f8b112a7c35..4ba2828a67c0 100644 --- a/lib/rbtree.c +++ b/lib/rbtree.c @@ -427,7 +427,9 @@ static inline void dummy_copy(struct rb_node *old, struct rb_node *new) {} static inline void dummy_rotate(struct rb_node *old, struct rb_node *new) {} static const struct rb_augment_callbacks dummy_callbacks = { - dummy_propagate, dummy_copy, dummy_rotate + .propagate = dummy_propagate, + .copy = dummy_copy, + .rotate = dummy_rotate }; void rb_insert_color(struct rb_node *node, struct rb_root *root) diff --git a/lib/show_mem.c b/lib/show_mem.c index 1feed6a2b12a..0beaa1d899aa 100644 --- a/lib/show_mem.c +++ b/lib/show_mem.c @@ -9,13 +9,13 @@ #include <linux/quicklist.h> #include <linux/cma.h> -void show_mem(unsigned int filter) +void show_mem(unsigned int filter, nodemask_t *nodemask) { pg_data_t *pgdat; unsigned long total = 0, reserved = 0, highmem = 0; printk("Mem-Info:\n"); - show_free_areas(filter); + show_free_areas(filter, nodemask); for_each_online_pgdat(pgdat) { unsigned long flags; diff --git a/lib/sort.c b/lib/sort.c index fc20df42aa6f..975c6ef6fec7 100644 --- a/lib/sort.c +++ b/lib/sort.c @@ -4,6 +4,8 @@ * Jan 23 2005 Matt Mackall <mpm@selenic.com> */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/types.h> #include <linux/export.h> #include <linux/sort.h> @@ -101,42 +103,3 @@ void sort(void *base, size_t num, size_t size, } EXPORT_SYMBOL(sort); - -#if 0 -#include <linux/slab.h> -/* a simple boot-time regression test */ - -int cmpint(const void *a, const void *b) -{ - return *(int *)a - *(int *)b; -} - -static int sort_test(void) -{ - int *a, i, r = 1; - - a = kmalloc(1000 * sizeof(int), GFP_KERNEL); - BUG_ON(!a); - - printk("testing sort()\n"); - - for (i = 0; i < 1000; i++) { - r = (r * 725861) % 6599; - a[i] = r; - } - - sort(a, 1000, sizeof(int), cmpint, NULL); - - for (i = 0; i < 999; i++) - if (a[i] > a[i+1]) { - printk("sort() failed!\n"); - break; - } - - kfree(a); - - return 0; -} - -module_init(sort_test); -#endif diff --git a/lib/test_kasan.c b/lib/test_kasan.c index fbdf87920093..0b1d3140fbb8 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -11,6 +11,7 @@ #define pr_fmt(fmt) "kasan test: %s " fmt, __func__ +#include <linux/delay.h> #include <linux/kernel.h> #include <linux/mman.h> #include <linux/mm.h> @@ -331,6 +332,38 @@ static noinline void __init kmem_cache_oob(void) kmem_cache_destroy(cache); } +static noinline void __init memcg_accounted_kmem_cache(void) +{ + int i; + char *p; + size_t size = 200; + struct kmem_cache *cache; + + cache = kmem_cache_create("test_cache", size, 0, SLAB_ACCOUNT, NULL); + if (!cache) { + pr_err("Cache allocation failed\n"); + return; + } + + pr_info("allocate memcg accounted object\n"); + /* + * Several allocations with a delay to allow for lazy per memcg kmem + * cache creation. + */ + for (i = 0; i < 5; i++) { + p = kmem_cache_alloc(cache, GFP_KERNEL); + if (!p) { + pr_err("Allocation failed\n"); + goto free_cache; + } + kmem_cache_free(cache, p); + msleep(100); + } + +free_cache: + kmem_cache_destroy(cache); +} + static char global_array[10]; static noinline void __init kasan_global_oob(void) @@ -460,6 +493,7 @@ static int __init kmalloc_tests_init(void) kmalloc_uaf_memset(); kmalloc_uaf2(); kmem_cache_oob(); + memcg_accounted_kmem_cache(); kasan_stack_oob(); kasan_global_oob(); ksize_unpoisons_memory(); diff --git a/lib/test_sort.c b/lib/test_sort.c new file mode 100644 index 000000000000..d389c1cc2f6c --- /dev/null +++ b/lib/test_sort.c @@ -0,0 +1,43 @@ +#include <linux/sort.h> +#include <linux/slab.h> +#include <linux/module.h> + +/* a simple boot-time regression test */ + +#define TEST_LEN 1000 + +static int __init cmpint(const void *a, const void *b) +{ + return *(int *)a - *(int *)b; +} + +static int __init test_sort_init(void) +{ + int *a, i, r = 1, err = -ENOMEM; + + a = kmalloc_array(TEST_LEN, sizeof(*a), GFP_KERNEL); + if (!a) + return err; + + for (i = 0; i < TEST_LEN; i++) { + r = (r * 725861) % 6599; + a[i] = r; + } + + sort(a, TEST_LEN, sizeof(*a), cmpint, NULL); + + err = -EINVAL; + for (i = 0; i < TEST_LEN-1; i++) + if (a[i] > a[i+1]) { + pr_err("test has failed\n"); + goto exit; + } + err = 0; + pr_info("test passed\n"); +exit: + kfree(a); + return err; +} + +module_init(test_sort_init); +MODULE_LICENSE("GPL"); diff --git a/mm/Kconfig b/mm/Kconfig index 9b8fccb969dc..4aff67e042f1 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -690,6 +690,7 @@ config ZONE_DEVICE depends on MEMORY_HOTREMOVE depends on SPARSEMEM_VMEMMAP depends on X86_64 #arch_add_memory() comprehends device memory + select RADIX_TREE_MULTIORDER help Device memory hotplug support allows for establishing pmem, diff --git a/mm/Makefile b/mm/Makefile index 295bd7a9f76b..433eaf9a876e 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -35,7 +35,7 @@ obj-y := filemap.o mempool.o oom_kill.o \ readahead.o swap.o truncate.o vmscan.o shmem.o \ util.o mmzone.o vmstat.o backing-dev.o \ mm_init.o mmu_context.o percpu.o slab_common.o \ - compaction.o vmacache.o \ + compaction.o vmacache.o swap_slots.o \ interval_tree.o list_lru.o workingset.o \ debug.o $(mmu-y) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 3bfed5ab2475..ffb77a1130af 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -410,8 +410,8 @@ retry: while (*node != NULL) { parent = *node; - congested = container_of(parent, struct bdi_writeback_congested, - rb_node); + congested = rb_entry(parent, struct bdi_writeback_congested, + rb_node); if (congested->blkcg_id < blkcg_id) node = &parent->rb_left; else if (congested->blkcg_id > blkcg_id) diff --git a/mm/bootmem.c b/mm/bootmem.c index e8a55a3c9feb..9fedb27c6451 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -53,7 +53,7 @@ early_param("bootmem_debug", bootmem_debug_setup); static unsigned long __init bootmap_bytes(unsigned long pages) { - unsigned long bytes = DIV_ROUND_UP(pages, 8); + unsigned long bytes = DIV_ROUND_UP(pages, BITS_PER_BYTE); return ALIGN(bytes, sizeof(long)); } diff --git a/mm/compaction.c b/mm/compaction.c index 949198d01260..0aa2757399ee 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -548,7 +548,7 @@ isolate_fail: if (blockpfn == end_pfn) update_pageblock_skip(cc, valid_page, total_isolated, false); - count_compact_events(COMPACTFREE_SCANNED, nr_scanned); + cc->total_free_scanned += nr_scanned; if (total_isolated) count_compact_events(COMPACTISOLATED, total_isolated); return total_isolated; @@ -931,7 +931,7 @@ isolate_fail: trace_mm_compaction_isolate_migratepages(start_pfn, low_pfn, nr_scanned, nr_isolated); - count_compact_events(COMPACTMIGRATE_SCANNED, nr_scanned); + cc->total_migrate_scanned += nr_scanned; if (nr_isolated) count_compact_events(COMPACTISOLATED, nr_isolated); @@ -1631,6 +1631,9 @@ out: zone->compact_cached_free_pfn = free_pfn; } + count_compact_events(COMPACTMIGRATE_SCANNED, cc->total_migrate_scanned); + count_compact_events(COMPACTFREE_SCANNED, cc->total_free_scanned); + trace_mm_compaction_end(start_pfn, cc->migrate_pfn, cc->free_pfn, end_pfn, sync, ret); @@ -1645,6 +1648,8 @@ static enum compact_result compact_zone_order(struct zone *zone, int order, struct compact_control cc = { .nr_freepages = 0, .nr_migratepages = 0, + .total_migrate_scanned = 0, + .total_free_scanned = 0, .order = order, .gfp_mask = gfp_mask, .zone = zone, @@ -1757,6 +1762,8 @@ static void compact_node(int nid) struct zone *zone; struct compact_control cc = { .order = -1, + .total_migrate_scanned = 0, + .total_free_scanned = 0, .mode = MIGRATE_SYNC, .ignore_skip_hint = true, .whole_zone = true, @@ -1883,6 +1890,8 @@ static void kcompactd_do_work(pg_data_t *pgdat) struct zone *zone; struct compact_control cc = { .order = pgdat->kcompactd_max_order, + .total_migrate_scanned = 0, + .total_free_scanned = 0, .classzone_idx = pgdat->kcompactd_classzone_idx, .mode = MIGRATE_SYNC_LIGHT, .ignore_skip_hint = true, @@ -1891,7 +1900,7 @@ static void kcompactd_do_work(pg_data_t *pgdat) }; trace_mm_compaction_kcompactd_wake(pgdat->node_id, cc.order, cc.classzone_idx); - count_vm_event(KCOMPACTD_WAKE); + count_compact_event(KCOMPACTD_WAKE); for (zoneid = 0; zoneid <= cc.classzone_idx; zoneid++) { int status; @@ -1909,6 +1918,8 @@ static void kcompactd_do_work(pg_data_t *pgdat) cc.nr_freepages = 0; cc.nr_migratepages = 0; + cc.total_migrate_scanned = 0; + cc.total_free_scanned = 0; cc.zone = zone; INIT_LIST_HEAD(&cc.freepages); INIT_LIST_HEAD(&cc.migratepages); @@ -1927,6 +1938,11 @@ static void kcompactd_do_work(pg_data_t *pgdat) defer_compaction(zone, cc.order); } + count_compact_events(KCOMPACTD_MIGRATE_SCANNED, + cc.total_migrate_scanned); + count_compact_events(KCOMPACTD_FREE_SCANNED, + cc.total_free_scanned); + VM_BUG_ON(!list_empty(&cc.freepages)); VM_BUG_ON(!list_empty(&cc.migratepages)); } @@ -1950,6 +1966,13 @@ void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx) if (pgdat->kcompactd_max_order < order) pgdat->kcompactd_max_order = order; + /* + * Pairs with implicit barrier in wait_event_freezable() + * such that wakeups are not missed in the lockless + * waitqueue_active() call. + */ + smp_acquire__after_ctrl_dep(); + if (pgdat->kcompactd_classzone_idx > classzone_idx) pgdat->kcompactd_classzone_idx = classzone_idx; diff --git a/mm/filemap.c b/mm/filemap.c index b772a33ef640..14bddd0d7fa4 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -788,7 +788,7 @@ static int wake_page_function(wait_queue_t *wait, unsigned mode, int sync, void return autoremove_wake_function(wait, mode, sync, key); } -void wake_up_page_bit(struct page *page, int bit_nr) +static void wake_up_page_bit(struct page *page, int bit_nr) { wait_queue_head_t *q = page_waitqueue(page); struct wait_page_key key; @@ -821,7 +821,13 @@ void wake_up_page_bit(struct page *page, int bit_nr) } spin_unlock_irqrestore(&q->lock, flags); } -EXPORT_SYMBOL(wake_up_page_bit); + +static void wake_up_page(struct page *page, int bit) +{ + if (!PageWaiters(page)) + return; + wake_up_page_bit(page, bit); +} static inline int wait_on_page_bit_common(wait_queue_head_t *q, struct page *page, int bit_nr, int state, bool lock) @@ -1013,7 +1019,7 @@ EXPORT_SYMBOL_GPL(page_endio); /** * __lock_page - get a lock on the page, assuming we need to sleep to get it - * @page: the page to lock + * @__page: the page to lock */ void __lock_page(struct page *__page) { @@ -2158,7 +2164,6 @@ static void do_async_mmap_readahead(struct vm_area_struct *vma, /** * filemap_fault - read in file data for page fault handling - * @vma: vma in which the fault was taken * @vmf: struct vm_fault containing details of the fault * * filemap_fault() is invoked via the vma operations vector for a @@ -2180,10 +2185,10 @@ static void do_async_mmap_readahead(struct vm_area_struct *vma, * * We never return with VM_FAULT_RETRY and a bit from VM_FAULT_ERROR set. */ -int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +int filemap_fault(struct vm_fault *vmf) { int error; - struct file *file = vma->vm_file; + struct file *file = vmf->vma->vm_file; struct address_space *mapping = file->f_mapping; struct file_ra_state *ra = &file->f_ra; struct inode *inode = mapping->host; @@ -2205,12 +2210,12 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) * We found the page, so try async readahead before * waiting for the lock. */ - do_async_mmap_readahead(vma, ra, file, page, offset); + do_async_mmap_readahead(vmf->vma, ra, file, page, offset); } else if (!page) { /* No page in the page cache at all */ - do_sync_mmap_readahead(vma, ra, file, offset); + do_sync_mmap_readahead(vmf->vma, ra, file, offset); count_vm_event(PGMAJFAULT); - mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT); + mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT); ret = VM_FAULT_MAJOR; retry_find: page = find_get_page(mapping, offset); @@ -2218,7 +2223,7 @@ retry_find: goto no_cached_page; } - if (!lock_page_or_retry(page, vma->vm_mm, vmf->flags)) { + if (!lock_page_or_retry(page, vmf->vma->vm_mm, vmf->flags)) { put_page(page); return ret | VM_FAULT_RETRY; } @@ -2385,14 +2390,14 @@ next: } EXPORT_SYMBOL(filemap_map_pages); -int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +int filemap_page_mkwrite(struct vm_fault *vmf) { struct page *page = vmf->page; - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); int ret = VM_FAULT_LOCKED; sb_start_pagefault(inode->i_sb); - file_update_time(vma->vm_file); + file_update_time(vmf->vma->vm_file); lock_page(page); if (page->mapping != inode->i_mapping) { unlock_page(page); @@ -572,7 +572,7 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, if (is_vm_hugetlb_page(vma)) { i = follow_hugetlb_page(mm, vma, pages, vmas, &start, &nr_pages, i, - gup_flags); + gup_flags, nonblocking); continue; } } diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 5f3ad65c85de..40bd376dcb79 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -142,42 +142,6 @@ static struct shrinker huge_zero_page_shrinker = { }; #ifdef CONFIG_SYSFS - -static ssize_t triple_flag_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t count, - enum transparent_hugepage_flag enabled, - enum transparent_hugepage_flag deferred, - enum transparent_hugepage_flag req_madv) -{ - if (!memcmp("defer", buf, - min(sizeof("defer")-1, count))) { - if (enabled == deferred) - return -EINVAL; - clear_bit(enabled, &transparent_hugepage_flags); - clear_bit(req_madv, &transparent_hugepage_flags); - set_bit(deferred, &transparent_hugepage_flags); - } else if (!memcmp("always", buf, - min(sizeof("always")-1, count))) { - clear_bit(deferred, &transparent_hugepage_flags); - clear_bit(req_madv, &transparent_hugepage_flags); - set_bit(enabled, &transparent_hugepage_flags); - } else if (!memcmp("madvise", buf, - min(sizeof("madvise")-1, count))) { - clear_bit(enabled, &transparent_hugepage_flags); - clear_bit(deferred, &transparent_hugepage_flags); - set_bit(req_madv, &transparent_hugepage_flags); - } else if (!memcmp("never", buf, - min(sizeof("never")-1, count))) { - clear_bit(enabled, &transparent_hugepage_flags); - clear_bit(req_madv, &transparent_hugepage_flags); - clear_bit(deferred, &transparent_hugepage_flags); - } else - return -EINVAL; - - return count; -} - static ssize_t enabled_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -193,19 +157,28 @@ static ssize_t enabled_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { - ssize_t ret; + ssize_t ret = count; - ret = triple_flag_store(kobj, attr, buf, count, - TRANSPARENT_HUGEPAGE_FLAG, - TRANSPARENT_HUGEPAGE_FLAG, - TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG); + if (!memcmp("always", buf, + min(sizeof("always")-1, count))) { + clear_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags); + set_bit(TRANSPARENT_HUGEPAGE_FLAG, &transparent_hugepage_flags); + } else if (!memcmp("madvise", buf, + min(sizeof("madvise")-1, count))) { + clear_bit(TRANSPARENT_HUGEPAGE_FLAG, &transparent_hugepage_flags); + set_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags); + } else if (!memcmp("never", buf, + min(sizeof("never")-1, count))) { + clear_bit(TRANSPARENT_HUGEPAGE_FLAG, &transparent_hugepage_flags); + clear_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags); + } else + ret = -EINVAL; if (ret > 0) { int err = start_stop_khugepaged(); if (err) ret = err; } - return ret; } static struct kobj_attribute enabled_attr = @@ -241,32 +214,58 @@ ssize_t single_hugepage_flag_store(struct kobject *kobj, return count; } -/* - * Currently defrag only disables __GFP_NOWAIT for allocation. A blind - * __GFP_REPEAT is too aggressive, it's never worth swapping tons of - * memory just to allocate one more hugepage. - */ static ssize_t defrag_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags)) - return sprintf(buf, "[always] defer madvise never\n"); + return sprintf(buf, "[always] defer defer+madvise madvise never\n"); if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags)) - return sprintf(buf, "always [defer] madvise never\n"); - else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags)) - return sprintf(buf, "always defer [madvise] never\n"); - else - return sprintf(buf, "always defer madvise [never]\n"); - + return sprintf(buf, "always [defer] defer+madvise madvise never\n"); + if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags)) + return sprintf(buf, "always defer [defer+madvise] madvise never\n"); + if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags)) + return sprintf(buf, "always defer defer+madvise [madvise] never\n"); + return sprintf(buf, "always defer defer+madvise madvise [never]\n"); } + static ssize_t defrag_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { - return triple_flag_store(kobj, attr, buf, count, - TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, - TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, - TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG); + if (!memcmp("always", buf, + min(sizeof("always")-1, count))) { + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); + set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); + } else if (!memcmp("defer", buf, + min(sizeof("defer")-1, count))) { + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); + set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); + } else if (!memcmp("defer+madvise", buf, + min(sizeof("defer+madvise")-1, count))) { + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); + set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); + } else if (!memcmp("madvise", buf, + min(sizeof("madvise")-1, count))) { + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); + set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); + } else if (!memcmp("never", buf, + min(sizeof("never")-1, count))) { + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); + } else + return -EINVAL; + + return count; } static struct kobj_attribute defrag_attr = __ATTR(defrag, 0644, defrag_show, defrag_store); @@ -612,25 +611,28 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page, } /* - * If THP defrag is set to always then directly reclaim/compact as necessary - * If set to defer then do only background reclaim/compact and defer to khugepaged - * If set to madvise and the VMA is flagged then directly reclaim/compact - * When direct reclaim/compact is allowed, don't retry except for flagged VMA's + * always: directly stall for all thp allocations + * defer: wake kswapd and fail if not immediately available + * defer+madvise: wake kswapd and directly stall for MADV_HUGEPAGE, otherwise + * fail if not immediately available + * madvise: directly stall for MADV_HUGEPAGE, otherwise fail if not immediately + * available + * never: never stall for any thp allocation */ static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma) { - bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE); + const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE); - if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, - &transparent_hugepage_flags) && vma_madvised) - return GFP_TRANSHUGE; - else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, - &transparent_hugepage_flags)) - return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM; - else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, - &transparent_hugepage_flags)) + if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags)) return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY); - + if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags)) + return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM; + if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags)) + return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM : + __GFP_KSWAPD_RECLAIM); + if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags)) + return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM : + 0); return GFP_TRANSHUGE_LIGHT; } @@ -1397,6 +1399,7 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, tlb->fullmm); orig_pmd = pmd_mkold(orig_pmd); orig_pmd = pmd_mkclean(orig_pmd); + orig_pmd = pmd_wrprotect(orig_pmd); set_pmd_at(mm, addr, pmd, orig_pmd); tlb_remove_pmd_tlb_entry(tlb, pmd, addr); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index c7025c132670..1b8789a2395d 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -32,6 +32,7 @@ #include <linux/hugetlb.h> #include <linux/hugetlb_cgroup.h> #include <linux/node.h> +#include <linux/userfaultfd_k.h> #include "internal.h" int hugepages_treat_as_movable; @@ -3141,7 +3142,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma) * hugegpage VMA. do_page_fault() is supposed to trap this, so BUG is we get * this far. */ -static int hugetlb_vm_op_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int hugetlb_vm_op_fault(struct vm_fault *vmf) { BUG(); return 0; @@ -3680,6 +3681,38 @@ retry: size = i_size_read(mapping->host) >> huge_page_shift(h); if (idx >= size) goto out; + + /* + * Check for page in userfault range + */ + if (userfaultfd_missing(vma)) { + u32 hash; + struct vm_fault vmf = { + .vma = vma, + .address = address, + .flags = flags, + /* + * Hard to debug if it ends up being + * used by a callee that assumes + * something about the other + * uninitialized fields... same as in + * memory.c + */ + }; + + /* + * hugetlb_fault_mutex must be dropped before + * handling userfault. Reacquire after handling + * fault to make calling code simpler. + */ + hash = hugetlb_fault_mutex_hash(h, mm, vma, mapping, + idx, address); + mutex_unlock(&hugetlb_fault_mutex_table[hash]); + ret = handle_userfault(&vmf, VM_UFFD_MISSING); + mutex_lock(&hugetlb_fault_mutex_table[hash]); + goto out; + } + page = alloc_huge_page(vma, address, 0); if (IS_ERR(page)) { ret = PTR_ERR(page); @@ -3948,10 +3981,91 @@ out_mutex: return ret; } +/* + * Used by userfaultfd UFFDIO_COPY. Based on mcopy_atomic_pte with + * modifications for huge pages. + */ +int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, + pte_t *dst_pte, + struct vm_area_struct *dst_vma, + unsigned long dst_addr, + unsigned long src_addr, + struct page **pagep) +{ + struct hstate *h = hstate_vma(dst_vma); + pte_t _dst_pte; + spinlock_t *ptl; + int ret; + struct page *page; + + if (!*pagep) { + ret = -ENOMEM; + page = alloc_huge_page(dst_vma, dst_addr, 0); + if (IS_ERR(page)) + goto out; + + ret = copy_huge_page_from_user(page, + (const void __user *) src_addr, + pages_per_huge_page(h), false); + + /* fallback to copy_from_user outside mmap_sem */ + if (unlikely(ret)) { + ret = -EFAULT; + *pagep = page; + /* don't free the page */ + goto out; + } + } else { + page = *pagep; + *pagep = NULL; + } + + /* + * The memory barrier inside __SetPageUptodate makes sure that + * preceding stores to the page contents become visible before + * the set_pte_at() write. + */ + __SetPageUptodate(page); + set_page_huge_active(page); + + ptl = huge_pte_lockptr(h, dst_mm, dst_pte); + spin_lock(ptl); + + ret = -EEXIST; + if (!huge_pte_none(huge_ptep_get(dst_pte))) + goto out_release_unlock; + + ClearPagePrivate(page); + hugepage_add_new_anon_rmap(page, dst_vma, dst_addr); + + _dst_pte = make_huge_pte(dst_vma, page, dst_vma->vm_flags & VM_WRITE); + if (dst_vma->vm_flags & VM_WRITE) + _dst_pte = huge_pte_mkdirty(_dst_pte); + _dst_pte = pte_mkyoung(_dst_pte); + + set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte); + + (void)huge_ptep_set_access_flags(dst_vma, dst_addr, dst_pte, _dst_pte, + dst_vma->vm_flags & VM_WRITE); + hugetlb_count_add(pages_per_huge_page(h), dst_mm); + + /* No need to invalidate - it was non-present before */ + update_mmu_cache(dst_vma, dst_addr, dst_pte); + + spin_unlock(ptl); + ret = 0; +out: + return ret; +out_release_unlock: + spin_unlock(ptl); + put_page(page); + goto out; +} + long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, unsigned long *nr_pages, - long i, unsigned int flags) + long i, unsigned int flags, int *nonblocking) { unsigned long pfn_offset; unsigned long vaddr = *position; @@ -4014,16 +4128,43 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, ((flags & FOLL_WRITE) && !huge_pte_write(huge_ptep_get(pte)))) { int ret; + unsigned int fault_flags = 0; if (pte) spin_unlock(ptl); - ret = hugetlb_fault(mm, vma, vaddr, - (flags & FOLL_WRITE) ? FAULT_FLAG_WRITE : 0); - if (!(ret & VM_FAULT_ERROR)) - continue; - - remainder = 0; - break; + if (flags & FOLL_WRITE) + fault_flags |= FAULT_FLAG_WRITE; + if (nonblocking) + fault_flags |= FAULT_FLAG_ALLOW_RETRY; + if (flags & FOLL_NOWAIT) + fault_flags |= FAULT_FLAG_ALLOW_RETRY | + FAULT_FLAG_RETRY_NOWAIT; + if (flags & FOLL_TRIED) { + VM_WARN_ON_ONCE(fault_flags & + FAULT_FLAG_ALLOW_RETRY); + fault_flags |= FAULT_FLAG_TRIED; + } + ret = hugetlb_fault(mm, vma, vaddr, fault_flags); + if (ret & VM_FAULT_ERROR) { + remainder = 0; + break; + } + if (ret & VM_FAULT_RETRY) { + if (nonblocking) + *nonblocking = 0; + *nr_pages = 0; + /* + * VM_FAULT_RETRY must not return an + * error, it will return zero + * instead. + * + * No need to update "position" as the + * caller will not check it after + * *nr_pages is set to 0. + */ + return i; + } + continue; } pfn_offset = (vaddr & ~huge_page_mask(h)) >> PAGE_SHIFT; @@ -4052,6 +4193,11 @@ same_page: spin_unlock(ptl); } *nr_pages = remainder; + /* + * setting position is actually required only if remainder is + * not zero but it's faster not to add a "if (remainder)" + * branch. + */ *position = vaddr; return i ? i : -EFAULT; diff --git a/mm/internal.h b/mm/internal.h index 7aa2ea0a8623..8ab72f4374e0 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -43,6 +43,11 @@ int do_swap_page(struct vm_fault *vmf); void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, unsigned long floor, unsigned long ceiling); +static inline bool can_madv_dontneed_vma(struct vm_area_struct *vma) +{ + return !(vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP)); +} + void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long addr, unsigned long end, @@ -133,9 +138,9 @@ struct alloc_context { * Assumption: *_mem_map is contiguous at least up to MAX_ORDER */ static inline unsigned long -__find_buddy_index(unsigned long page_idx, unsigned int order) +__find_buddy_pfn(unsigned long page_pfn, unsigned int order) { - return page_idx ^ (1 << order); + return page_pfn ^ (1 << order); } extern struct page *__pageblock_pfn_to_page(unsigned long start_pfn, @@ -175,6 +180,8 @@ struct compact_control { struct list_head migratepages; /* List of pages being migrated */ unsigned long nr_freepages; /* Number of isolated free pages */ unsigned long nr_migratepages; /* Number of pages to migrate */ + unsigned long total_migrate_scanned; + unsigned long total_free_scanned; unsigned long free_pfn; /* isolate_freepages search base */ unsigned long migrate_pfn; /* isolate_migratepages search base */ unsigned long last_migrated_pfn;/* Not yet flushed page being freed */ diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 8f380e4e5cfe..5f6e09c88d25 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -435,7 +435,7 @@ void kasan_cache_shrink(struct kmem_cache *cache) quarantine_remove_cache(cache); } -void kasan_cache_destroy(struct kmem_cache *cache) +void kasan_cache_shutdown(struct kmem_cache *cache) { quarantine_remove_cache(cache); } diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c index dae929c02bbb..6f1ed1630873 100644 --- a/mm/kasan/quarantine.c +++ b/mm/kasan/quarantine.c @@ -274,6 +274,7 @@ static void per_cpu_remove_cache(void *arg) qlist_free_all(&to_free, cache); } +/* Free all quarantined objects belonging to cache. */ void quarantine_remove_cache(struct kmem_cache *cache) { unsigned long flags, i; @@ -223,6 +223,12 @@ static unsigned int ksm_thread_pages_to_scan = 100; /* Milliseconds ksmd should sleep between batches */ static unsigned int ksm_thread_sleep_millisecs = 20; +/* Checksum of an empty (zeroed) page */ +static unsigned int zero_checksum __read_mostly; + +/* Whether to merge empty (zeroed) pages with actual zero pages */ +static bool ksm_use_zero_pages __read_mostly; + #ifdef CONFIG_NUMA /* Zeroed when merging across nodes is not allowed */ static unsigned int ksm_merge_across_nodes = 1; @@ -926,6 +932,7 @@ static int replace_page(struct vm_area_struct *vma, struct page *page, struct mm_struct *mm = vma->vm_mm; pmd_t *pmd; pte_t *ptep; + pte_t newpte; spinlock_t *ptl; unsigned long addr; int err = -EFAULT; @@ -950,12 +957,22 @@ static int replace_page(struct vm_area_struct *vma, struct page *page, goto out_mn; } - get_page(kpage); - page_add_anon_rmap(kpage, vma, addr, false); + /* + * No need to check ksm_use_zero_pages here: we can only have a + * zero_page here if ksm_use_zero_pages was enabled alreaady. + */ + if (!is_zero_pfn(page_to_pfn(kpage))) { + get_page(kpage); + page_add_anon_rmap(kpage, vma, addr, false); + newpte = mk_pte(kpage, vma->vm_page_prot); + } else { + newpte = pte_mkspecial(pfn_pte(page_to_pfn(kpage), + vma->vm_page_prot)); + } flush_cache_page(vma, addr, pte_pfn(*ptep)); ptep_clear_flush_notify(vma, addr, ptep); - set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot)); + set_pte_at_notify(mm, addr, ptep, newpte); page_remove_rmap(page, false); if (!page_mapped(page)) @@ -1467,6 +1484,23 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item) return; } + /* + * Same checksum as an empty page. We attempt to merge it with the + * appropriate zero page if the user enabled this via sysfs. + */ + if (ksm_use_zero_pages && (checksum == zero_checksum)) { + struct vm_area_struct *vma; + + vma = find_mergeable_vma(rmap_item->mm, rmap_item->address); + err = try_to_merge_one_page(vma, page, + ZERO_PAGE(rmap_item->address)); + /* + * In case of failure, the page was not really empty, so we + * need to continue. Otherwise we're done. + */ + if (!err) + return; + } tree_rmap_item = unstable_tree_search_insert(rmap_item, page, &tree_page); if (tree_rmap_item) { @@ -2233,6 +2267,28 @@ static ssize_t merge_across_nodes_store(struct kobject *kobj, KSM_ATTR(merge_across_nodes); #endif +static ssize_t use_zero_pages_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", ksm_use_zero_pages); +} +static ssize_t use_zero_pages_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + int err; + bool value; + + err = kstrtobool(buf, &value); + if (err) + return -EINVAL; + + ksm_use_zero_pages = value; + + return count; +} +KSM_ATTR(use_zero_pages); + static ssize_t pages_shared_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -2290,6 +2346,7 @@ static struct attribute *ksm_attrs[] = { #ifdef CONFIG_NUMA &merge_across_nodes_attr.attr, #endif + &use_zero_pages_attr.attr, NULL, }; @@ -2304,6 +2361,11 @@ static int __init ksm_init(void) struct task_struct *ksm_thread; int err; + /* The correct value depends on page size and endianness */ + zero_checksum = calc_checksum(ZERO_PAGE(0)); + /* Default to false for backwards compatibility */ + ksm_use_zero_pages = false; + err = ksm_slab_init(); if (err) goto out; diff --git a/mm/madvise.c b/mm/madvise.c index 0e3828eae9f8..c867d888e362 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -10,6 +10,7 @@ #include <linux/syscalls.h> #include <linux/mempolicy.h> #include <linux/page-isolation.h> +#include <linux/userfaultfd_k.h> #include <linux/hugetlb.h> #include <linux/falloc.h> #include <linux/sched.h> @@ -24,6 +25,8 @@ #include <asm/tlb.h> +#include "internal.h" + /* * Any behaviour which results in changes to the vma->vm_flags needs to * take mmap_sem for writing. Others, which simply traverse vmas, need @@ -373,6 +376,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, ptent = pte_mkold(ptent); ptent = pte_mkclean(ptent); + ptent = pte_wrprotect(ptent); set_pte_at(mm, addr, pte, ptent); if (PageActive(page)) deactivate_page(page); @@ -473,10 +477,11 @@ static long madvise_dontneed(struct vm_area_struct *vma, unsigned long start, unsigned long end) { *prev = vma; - if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP)) + if (!can_madv_dontneed_vma(vma)) return -EINVAL; - zap_page_range(vma, start, end - start, NULL); + userfaultfd_remove(vma, prev, start, end); + zap_page_range(vma, start, end - start); return 0; } @@ -516,6 +521,7 @@ static long madvise_remove(struct vm_area_struct *vma, * mmap_sem. */ get_file(f); + userfaultfd_remove(vma, prev, start, end); up_read(¤t->mm->mmap_sem); error = vfs_fallocate(f, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, diff --git a/mm/memblock.c b/mm/memblock.c index 7608bc305936..b64b47803e52 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -35,15 +35,18 @@ struct memblock memblock __initdata_memblock = { .memory.regions = memblock_memory_init_regions, .memory.cnt = 1, /* empty dummy entry */ .memory.max = INIT_MEMBLOCK_REGIONS, + .memory.name = "memory", .reserved.regions = memblock_reserved_init_regions, .reserved.cnt = 1, /* empty dummy entry */ .reserved.max = INIT_MEMBLOCK_REGIONS, + .reserved.name = "reserved", #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP .physmem.regions = memblock_physmem_init_regions, .physmem.cnt = 1, /* empty dummy entry */ .physmem.max = INIT_PHYSMEM_REGIONS, + .physmem.name = "physmem", #endif .bottom_up = false, @@ -64,18 +67,6 @@ ulong __init_memblock choose_memblock_flags(void) return system_has_some_mirror ? MEMBLOCK_MIRROR : MEMBLOCK_NONE; } -/* inline so we don't get a warning when pr_debug is compiled out */ -static __init_memblock const char * -memblock_type_name(struct memblock_type *type) -{ - if (type == &memblock.memory) - return "memory"; - else if (type == &memblock.reserved) - return "reserved"; - else - return "unknown"; -} - /* adjust *@size so that (@base + *@size) doesn't overflow, return new size */ static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size) { @@ -402,12 +393,12 @@ static int __init_memblock memblock_double_array(struct memblock_type *type, } if (!addr) { pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n", - memblock_type_name(type), type->max, type->max * 2); + type->name, type->max, type->max * 2); return -1; } memblock_dbg("memblock: %s is doubled to %ld at [%#010llx-%#010llx]", - memblock_type_name(type), type->max * 2, (u64)addr, + type->name, type->max * 2, (u64)addr, (u64)addr + new_size - 1); /* @@ -611,10 +602,10 @@ int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size, int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) { - memblock_dbg("memblock_add: [%#016llx-%#016llx] flags %#02lx %pF\n", - (unsigned long long)base, - (unsigned long long)base + size - 1, - 0UL, (void *)_RET_IP_); + phys_addr_t end = base + size - 1; + + memblock_dbg("memblock_add: [%pa-%pa] %pF\n", + &base, &end, (void *)_RET_IP_); return memblock_add_range(&memblock.memory, base, size, MAX_NUMNODES, 0); } @@ -718,10 +709,10 @@ int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) int __init_memblock memblock_free(phys_addr_t base, phys_addr_t size) { - memblock_dbg(" memblock_free: [%#016llx-%#016llx] %pF\n", - (unsigned long long)base, - (unsigned long long)base + size - 1, - (void *)_RET_IP_); + phys_addr_t end = base + size - 1; + + memblock_dbg(" memblock_free: [%pa-%pa] %pF\n", + &base, &end, (void *)_RET_IP_); kmemleak_free_part_phys(base, size); return memblock_remove_range(&memblock.reserved, base, size); @@ -729,10 +720,10 @@ int __init_memblock memblock_free(phys_addr_t base, phys_addr_t size) int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) { - memblock_dbg("memblock_reserve: [%#016llx-%#016llx] flags %#02lx %pF\n", - (unsigned long long)base, - (unsigned long long)base + size - 1, - 0UL, (void *)_RET_IP_); + phys_addr_t end = base + size - 1; + + memblock_dbg("memblock_reserve: [%pa-%pa] %pF\n", + &base, &end, (void *)_RET_IP_); return memblock_add_range(&memblock.reserved, base, size, MAX_NUMNODES, 0); } @@ -1105,6 +1096,31 @@ void __init_memblock __next_mem_pfn_range(int *idx, int nid, *out_nid = r->nid; } +unsigned long __init_memblock memblock_next_valid_pfn(unsigned long pfn, + unsigned long max_pfn) +{ + struct memblock_type *type = &memblock.memory; + unsigned int right = type->cnt; + unsigned int mid, left = 0; + phys_addr_t addr = PFN_PHYS(pfn + 1); + + do { + mid = (right + left) / 2; + + if (addr < type->regions[mid].base) + right = mid; + else if (addr >= (type->regions[mid].base + + type->regions[mid].size)) + left = mid + 1; + else { + /* addr is within the region, so pfn + 1 is valid */ + return min(pfn + 1, max_pfn); + } + } while (left < right); + + return min(PHYS_PFN(type->regions[right].base), max_pfn); +} + /** * memblock_set_node - set node ID on memblock regions * @base: base of area to set node ID for @@ -1202,8 +1218,8 @@ phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys alloc = __memblock_alloc_base(size, align, max_addr); if (alloc == 0) - panic("ERROR: Failed to allocate 0x%llx bytes below 0x%llx.\n", - (unsigned long long) size, (unsigned long long) max_addr); + panic("ERROR: Failed to allocate %pa bytes below %pa.\n", + &size, &max_addr); return alloc; } @@ -1274,18 +1290,17 @@ static void * __init memblock_virt_alloc_internal( if (max_addr > memblock.current_limit) max_addr = memblock.current_limit; - again: alloc = memblock_find_in_range_node(size, align, min_addr, max_addr, nid, flags); - if (alloc) + if (alloc && !memblock_reserve(alloc, size)) goto done; if (nid != NUMA_NO_NODE) { alloc = memblock_find_in_range_node(size, align, min_addr, max_addr, NUMA_NO_NODE, flags); - if (alloc) + if (alloc && !memblock_reserve(alloc, size)) goto done; } @@ -1303,7 +1318,6 @@ again: return NULL; done: - memblock_reserve(alloc, size); ptr = phys_to_virt(alloc); memset(ptr, 0, size); @@ -1615,8 +1629,7 @@ int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size if (idx == -1) return 0; - return memblock.memory.regions[idx].base <= base && - (memblock.memory.regions[idx].base + + return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size) >= end; } @@ -1671,40 +1684,44 @@ phys_addr_t __init_memblock memblock_get_current_limit(void) return memblock.current_limit; } -static void __init_memblock memblock_dump(struct memblock_type *type, char *name) +static void __init_memblock memblock_dump(struct memblock_type *type) { - unsigned long long base, size; + phys_addr_t base, end, size; unsigned long flags; int idx; struct memblock_region *rgn; - pr_info(" %s.cnt = 0x%lx\n", name, type->cnt); + pr_info(" %s.cnt = 0x%lx\n", type->name, type->cnt); for_each_memblock_type(type, rgn) { char nid_buf[32] = ""; base = rgn->base; size = rgn->size; + end = base + size - 1; flags = rgn->flags; #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP if (memblock_get_region_node(rgn) != MAX_NUMNODES) snprintf(nid_buf, sizeof(nid_buf), " on node %d", memblock_get_region_node(rgn)); #endif - pr_info(" %s[%#x]\t[%#016llx-%#016llx], %#llx bytes%s flags: %#lx\n", - name, idx, base, base + size - 1, size, nid_buf, flags); + pr_info(" %s[%#x]\t[%pa-%pa], %pa bytes%s flags: %#lx\n", + type->name, idx, &base, &end, &size, nid_buf, flags); } } void __init_memblock __memblock_dump_all(void) { pr_info("MEMBLOCK configuration:\n"); - pr_info(" memory size = %#llx reserved size = %#llx\n", - (unsigned long long)memblock.memory.total_size, - (unsigned long long)memblock.reserved.total_size); + pr_info(" memory size = %pa reserved size = %pa\n", + &memblock.memory.total_size, + &memblock.reserved.total_size); - memblock_dump(&memblock.memory, "memory"); - memblock_dump(&memblock.reserved, "reserved"); + memblock_dump(&memblock.memory); + memblock_dump(&memblock.reserved); +#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP + memblock_dump(&memblock.physmem); +#endif } void __init memblock_allow_resize(void) @@ -1727,19 +1744,14 @@ static int memblock_debug_show(struct seq_file *m, void *private) struct memblock_type *type = m->private; struct memblock_region *reg; int i; + phys_addr_t end; for (i = 0; i < type->cnt; i++) { reg = &type->regions[i]; - seq_printf(m, "%4d: ", i); - if (sizeof(phys_addr_t) == 4) - seq_printf(m, "0x%08lx..0x%08lx\n", - (unsigned long)reg->base, - (unsigned long)(reg->base + reg->size - 1)); - else - seq_printf(m, "0x%016llx..0x%016llx\n", - (unsigned long long)reg->base, - (unsigned long long)(reg->base + reg->size - 1)); + end = reg->base + reg->size - 1; + seq_printf(m, "%4d: ", i); + seq_printf(m, "%pa..%pa\n", ®->base, &end); } return 0; } diff --git a/mm/memory-failure.c b/mm/memory-failure.c index f283c7e0a2a3..3f3cfd4e1901 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1527,7 +1527,8 @@ static int get_any_page(struct page *page, unsigned long pfn, int flags) { int ret = __get_any_page(page, pfn, flags); - if (ret == 1 && !PageHuge(page) && !PageLRU(page)) { + if (ret == 1 && !PageHuge(page) && + !PageLRU(page) && !__PageMovable(page)) { /* * Try to free it. */ @@ -1649,7 +1650,10 @@ static int __soft_offline_page(struct page *page, int flags) * Try to migrate to a new page instead. migrate.c * handles a large number of cases for us. */ - ret = isolate_lru_page(page); + if (PageLRU(page)) + ret = isolate_lru_page(page); + else + ret = !isolate_movable_page(page, ISOLATE_UNEVICTABLE); /* * Drop page reference which is came from get_any_page() * successful isolate_lru_page() already took another one. @@ -1657,18 +1661,20 @@ static int __soft_offline_page(struct page *page, int flags) put_hwpoison_page(page); if (!ret) { LIST_HEAD(pagelist); - inc_node_page_state(page, NR_ISOLATED_ANON + - page_is_file_cache(page)); + /* + * After isolated lru page, the PageLRU will be cleared, + * so use !__PageMovable instead for LRU page's mapping + * cannot have PAGE_MAPPING_MOVABLE. + */ + if (!__PageMovable(page)) + inc_node_page_state(page, NR_ISOLATED_ANON + + page_is_file_cache(page)); list_add(&page->lru, &pagelist); ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL, MIGRATE_SYNC, MR_MEMORY_FAILURE); if (ret) { - if (!list_empty(&pagelist)) { - list_del(&page->lru); - dec_node_page_state(page, NR_ISOLATED_ANON + - page_is_file_cache(page)); - putback_lru_page(page); - } + if (!list_empty(&pagelist)) + putback_movable_pages(&pagelist); pr_info("soft offline: %#lx: migration failed %d, type %lx\n", pfn, ret, page->flags); diff --git a/mm/memory.c b/mm/memory.c index 6bf2b471e30c..cf97d88158cd 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1155,12 +1155,6 @@ again: if (!PageAnon(page)) { if (pte_dirty(ptent)) { - /* - * oom_reaper cannot tear down dirty - * pages - */ - if (unlikely(details && details->ignore_dirty)) - continue; force_flush = 1; set_page_dirty(page); } @@ -1179,8 +1173,8 @@ again: } continue; } - /* only check swap_entries if explicitly asked for in details */ - if (unlikely(details && !details->check_swap_entries)) + /* If details->check_mapping, we leave swap entries. */ + if (unlikely(details)) continue; entry = pte_to_swp_entry(ptent); @@ -1376,12 +1370,11 @@ void unmap_vmas(struct mmu_gather *tlb, * @vma: vm_area_struct holding the applicable pages * @start: starting address of pages to zap * @size: number of bytes to zap - * @details: details of shared cache invalidation * * Caller must protect the VMA list */ void zap_page_range(struct vm_area_struct *vma, unsigned long start, - unsigned long size, struct zap_details *details) + unsigned long size) { struct mm_struct *mm = vma->vm_mm; struct mmu_gather tlb; @@ -1392,7 +1385,7 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start, update_hiwater_rss(mm); mmu_notifier_invalidate_range_start(mm, start, end); for ( ; vma && vma->vm_start < end; vma = vma->vm_next) - unmap_single_vma(&tlb, vma, start, end, details); + unmap_single_vma(&tlb, vma, start, end, NULL); mmu_notifier_invalidate_range_end(mm, start, end); tlb_finish_mmu(&tlb, start, end); } @@ -2042,7 +2035,7 @@ static int do_page_mkwrite(struct vm_fault *vmf) vmf->flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE; - ret = vmf->vma->vm_ops->page_mkwrite(vmf->vma, vmf); + ret = vmf->vma->vm_ops->page_mkwrite(vmf); /* Restore original flags so that caller is not surprised */ vmf->flags = old_flags; if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) @@ -2314,7 +2307,7 @@ static int wp_pfn_shared(struct vm_fault *vmf) pte_unmap_unlock(vmf->pte, vmf->ptl); vmf->flags |= FAULT_FLAG_MKWRITE; - ret = vma->vm_ops->pfn_mkwrite(vma, vmf); + ret = vma->vm_ops->pfn_mkwrite(vmf); if (ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)) return ret; return finish_mkwrite_fault(vmf); @@ -2868,7 +2861,7 @@ static int __do_fault(struct vm_fault *vmf) struct vm_area_struct *vma = vmf->vma; int ret; - ret = vma->vm_ops->fault(vma, vmf); + ret = vma->vm_ops->fault(vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY | VM_FAULT_DONE_COW))) return ret; @@ -3471,12 +3464,10 @@ out: static int create_huge_pmd(struct vm_fault *vmf) { - struct vm_area_struct *vma = vmf->vma; - if (vma_is_anonymous(vma)) + if (vma_is_anonymous(vmf->vma)) return do_huge_pmd_anonymous_page(vmf); - if (vma->vm_ops->pmd_fault) - return vma->vm_ops->pmd_fault(vma, vmf->address, vmf->pmd, - vmf->flags); + if (vmf->vma->vm_ops->pmd_fault) + return vmf->vma->vm_ops->pmd_fault(vmf); return VM_FAULT_FALLBACK; } @@ -3485,8 +3476,7 @@ static int wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd) if (vma_is_anonymous(vmf->vma)) return do_huge_pmd_wp_page(vmf, orig_pmd); if (vmf->vma->vm_ops->pmd_fault) - return vmf->vma->vm_ops->pmd_fault(vmf->vma, vmf->address, - vmf->pmd, vmf->flags); + return vmf->vma->vm_ops->pmd_fault(vmf); /* COW handled on pte level: split pmd */ VM_BUG_ON_VMA(vmf->vma->vm_flags & VM_SHARED, vmf->vma); @@ -4155,6 +4145,38 @@ void copy_user_huge_page(struct page *dst, struct page *src, copy_user_highpage(dst + i, src + i, addr + i*PAGE_SIZE, vma); } } + +long copy_huge_page_from_user(struct page *dst_page, + const void __user *usr_src, + unsigned int pages_per_huge_page, + bool allow_pagefault) +{ + void *src = (void *)usr_src; + void *page_kaddr; + unsigned long i, rc = 0; + unsigned long ret_val = pages_per_huge_page * PAGE_SIZE; + + for (i = 0; i < pages_per_huge_page; i++) { + if (allow_pagefault) + page_kaddr = kmap(dst_page + i); + else + page_kaddr = kmap_atomic(dst_page + i); + rc = copy_from_user(page_kaddr, + (const void __user *)(src + i * PAGE_SIZE), + PAGE_SIZE); + if (allow_pagefault) + kunmap(dst_page + i); + else + kunmap_atomic(page_kaddr); + + ret_val -= (PAGE_SIZE - rc); + if (rc) + break; + + cond_resched(); + } + return ret_val; +} #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */ #if USE_SPLIT_PTE_PTLOCKS && ALLOC_SPLIT_PTLOCKS diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index ca2723d47338..3e3db7a1f7ba 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -179,7 +179,8 @@ static void release_memory_resource(struct resource *res) void get_page_bootmem(unsigned long info, struct page *page, unsigned long type) { - page->lru.next = (struct list_head *) type; + page->lru.next = (struct list_head *)type; + page->freelist = (void *)type; SetPagePrivate(page); set_page_private(page, info); page_ref_inc(page); @@ -189,11 +190,12 @@ void put_page_bootmem(struct page *page) { unsigned long type; - type = (unsigned long) page->lru.next; + type = (unsigned long) page->freelist; BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE || type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE); if (page_ref_dec_return(page) == 1) { + page->freelist = NULL; ClearPagePrivate(page); set_page_private(page, 0); INIT_LIST_HEAD(&page->lru); @@ -227,7 +229,7 @@ static void register_page_bootmem_info_section(unsigned long start_pfn) for (i = 0; i < mapsize; i++, page++) get_page_bootmem(section_nr, page, SECTION_INFO); - usemap = __nr_to_section(section_nr)->pageblock_flags; + usemap = section_to_usemap(__nr_to_section(section_nr)); page = virt_to_page(usemap); mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT; @@ -253,7 +255,7 @@ static void register_page_bootmem_info_section(unsigned long start_pfn) register_page_bootmem_memmap(section_nr, memmap, PAGES_PER_SECTION); - usemap = __nr_to_section(section_nr)->pageblock_flags; + usemap = section_to_usemap(__nr_to_section(section_nr)); page = virt_to_page(usemap); mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT; @@ -466,10 +468,10 @@ static void __meminit grow_pgdat_span(struct pglist_data *pgdat, unsigned long s pgdat->node_start_pfn; } -static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn) +static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn, + unsigned long nr_pages) { struct pglist_data *pgdat = zone->zone_pgdat; - int nr_pages = PAGES_PER_SECTION; int nid = pgdat->node_id; int zone_type; unsigned long flags, pfn; @@ -499,24 +501,21 @@ static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn) } static int __meminit __add_section(int nid, struct zone *zone, - unsigned long phys_start_pfn) + unsigned long pfn, unsigned long nr_pages) { int ret; - if (pfn_valid(phys_start_pfn)) - return -EEXIST; - - ret = sparse_add_one_section(zone, phys_start_pfn); + ret = sparse_add_section(zone, pfn, nr_pages); if (ret < 0) return ret; - ret = __add_zone(zone, phys_start_pfn); + ret = __add_zone(zone, pfn, nr_pages); if (ret < 0) return ret; - return register_new_memory(nid, __pfn_to_section(phys_start_pfn)); + return register_new_memory(zone, nid, __pfn_to_section(pfn)); } /* @@ -525,26 +524,20 @@ static int __meminit __add_section(int nid, struct zone *zone, * call this function after deciding the zone to which to * add the new pages. */ -int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn, +int __ref __add_pages(int nid, struct zone *zone, unsigned long pfn, unsigned long nr_pages) { - unsigned long i; - int err = 0; - int start_sec, end_sec; + int err = 0, i, start_sec, end_sec; struct vmem_altmap *altmap; clear_zone_contiguous(zone); - /* during initialize mem_map, align hot-added range to section */ - start_sec = pfn_to_section_nr(phys_start_pfn); - end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1); - - altmap = to_vmem_altmap((unsigned long) pfn_to_page(phys_start_pfn)); + altmap = to_vmem_altmap((unsigned long) pfn_to_page(pfn)); if (altmap) { /* * Validate altmap is within bounds of the total request */ - if (altmap->base_pfn != phys_start_pfn + if (altmap->base_pfn != pfn || vmem_altmap_offset(altmap) > nr_pages) { pr_warn_once("memory add fail, invalid altmap\n"); err = -EINVAL; @@ -553,8 +546,16 @@ int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn, altmap->alloc = 0; } + start_sec = pfn_to_section_nr(pfn); + end_sec = pfn_to_section_nr(pfn + nr_pages - 1); for (i = start_sec; i <= end_sec; i++) { - err = __add_section(nid, zone, section_nr_to_pfn(i)); + unsigned long pfns; + + pfns = min(nr_pages, PAGES_PER_SECTION + - (pfn & ~PAGE_SECTION_MASK)); + err = __add_section(nid, zone, pfn, pfns); + pfn += pfns; + nr_pages -= pfns; /* * EEXIST is finally dealt with by ioresource collision @@ -760,10 +761,10 @@ static void shrink_pgdat_span(struct pglist_data *pgdat, pgdat->node_spanned_pages = 0; } -static void __remove_zone(struct zone *zone, unsigned long start_pfn) +static void __remove_zone(struct zone *zone, unsigned long start_pfn, + unsigned long nr_pages) { struct pglist_data *pgdat = zone->zone_pgdat; - int nr_pages = PAGES_PER_SECTION; int zone_type; unsigned long flags; @@ -775,25 +776,22 @@ static void __remove_zone(struct zone *zone, unsigned long start_pfn) pgdat_resize_unlock(zone->zone_pgdat, &flags); } -static int __remove_section(struct zone *zone, struct mem_section *ms, - unsigned long map_offset) +static int __remove_section(struct zone *zone, unsigned long pfn, + unsigned long nr_pages, unsigned long map_offset) { - unsigned long start_pfn; - int scn_nr; + struct mem_section *ms = __nr_to_section(pfn_to_section_nr(pfn)); int ret = -EINVAL; if (!valid_section(ms)) return ret; - ret = unregister_memory_section(ms); + ret = unregister_memory_section(zone, ms); if (ret) return ret; - scn_nr = __section_nr(ms); - start_pfn = section_nr_to_pfn(scn_nr); - __remove_zone(zone, start_pfn); + __remove_zone(zone, pfn, nr_pages); - sparse_remove_one_section(zone, ms, map_offset); + sparse_remove_section(zone, ms, pfn, nr_pages, map_offset); return 0; } @@ -808,16 +806,15 @@ static int __remove_section(struct zone *zone, struct mem_section *ms, * sure that pages are marked reserved and zones are adjust properly by * calling offline_pages(). */ -int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, +int __remove_pages(struct zone *zone, unsigned long pfn, unsigned long nr_pages) { - unsigned long i; unsigned long map_offset = 0; - int sections_to_remove, ret = 0; + int i, start_sec, end_sec, ret = 0; /* In the ZONE_DEVICE case device driver owns the memory region */ if (is_dev_zone(zone)) { - struct page *page = pfn_to_page(phys_start_pfn); + struct page *page = pfn_to_page(pfn); struct vmem_altmap *altmap; altmap = to_vmem_altmap((unsigned long) page); @@ -826,7 +823,7 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, } else { resource_size_t start, size; - start = phys_start_pfn << PAGE_SHIFT; + start = pfn << PAGE_SHIFT; size = nr_pages * PAGE_SIZE; ret = release_mem_region_adjustable(&iomem_resource, start, @@ -842,16 +839,26 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, clear_zone_contiguous(zone); /* - * We can only remove entire sections + * Only ZONE_DEVICE memory is enabled to remove + * section-unaligned ranges. See register_new_memory() which + * assumes section alignment and is skipped for ZONE_DEVICE + * ranges. */ - BUG_ON(phys_start_pfn & ~PAGE_SECTION_MASK); - BUG_ON(nr_pages % PAGES_PER_SECTION); + if (!is_dev_zone(zone) && ((pfn | nr_pages) & ~PAGE_SECTION_MASK)) { + WARN(1, "section unaligned removal not supported\n"); + return -EINVAL; + } - sections_to_remove = nr_pages / PAGES_PER_SECTION; - for (i = 0; i < sections_to_remove; i++) { - unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION; + start_sec = pfn_to_section_nr(pfn); + end_sec = pfn_to_section_nr(pfn + nr_pages - 1); + for (i = start_sec; i <= end_sec; i++) { + unsigned long pfns; - ret = __remove_section(zone, __pfn_to_section(pfn), map_offset); + pfns = min(nr_pages, PAGES_PER_SECTION + - (pfn & ~PAGE_SECTION_MASK)); + ret = __remove_section(zone, pfn, pfns, map_offset); + pfn += pfns; + nr_pages -= pfns; map_offset = 0; if (ret) break; @@ -861,7 +868,6 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, return ret; } -EXPORT_SYMBOL_GPL(__remove_pages); #endif /* CONFIG_MEMORY_HOTREMOVE */ int set_online_page_callback(online_page_callback_t callback) diff --git a/mm/mmap.c b/mm/mmap.c index dc4291dcc99b..46059b7a84e4 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2806,11 +2806,11 @@ static inline void verify_mm_writelocked(struct mm_struct *mm) * anonymous maps. eventually we may be able to do some * brk-specific accounting here. */ -static int do_brk(unsigned long addr, unsigned long request) +static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma, *prev; - unsigned long flags, len; + unsigned long len; struct rb_node **rb_link, *rb_parent; pgoff_t pgoff = addr >> PAGE_SHIFT; int error; @@ -2821,7 +2821,10 @@ static int do_brk(unsigned long addr, unsigned long request) if (!len) return 0; - flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; + /* Until we need other flags, refuse anything except VM_EXEC. */ + if ((flags & (~VM_EXEC)) != 0) + return -EINVAL; + flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED); if (offset_in_page(error)) @@ -2889,7 +2892,12 @@ out: return 0; } -int vm_brk(unsigned long addr, unsigned long len) +static int do_brk(unsigned long addr, unsigned long len) +{ + return do_brk_flags(addr, len, 0); +} + +int vm_brk_flags(unsigned long addr, unsigned long len, unsigned long flags) { struct mm_struct *mm = current->mm; int ret; @@ -2898,13 +2906,19 @@ int vm_brk(unsigned long addr, unsigned long len) if (down_write_killable(&mm->mmap_sem)) return -EINTR; - ret = do_brk(addr, len); + ret = do_brk_flags(addr, len, flags); populate = ((mm->def_flags & VM_LOCKED) != 0); up_write(&mm->mmap_sem); if (populate && !ret) mm_populate(addr, len); return ret; } +EXPORT_SYMBOL(vm_brk_flags); + +int vm_brk(unsigned long addr, unsigned long len) +{ + return vm_brk_flags(addr, len, 0); +} EXPORT_SYMBOL(vm_brk); /* Release all mmaps. */ @@ -3111,8 +3125,7 @@ void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages) mm->data_vm += npages; } -static int special_mapping_fault(struct vm_area_struct *vma, - struct vm_fault *vmf); +static int special_mapping_fault(struct vm_fault *vmf); /* * Having a close hook prevents vma merging regardless of flags. @@ -3147,9 +3160,9 @@ static const struct vm_operations_struct legacy_special_mapping_vmops = { .fault = special_mapping_fault, }; -static int special_mapping_fault(struct vm_area_struct *vma, - struct vm_fault *vmf) +static int special_mapping_fault(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; pgoff_t pgoff; struct page **pages; @@ -3159,7 +3172,7 @@ static int special_mapping_fault(struct vm_area_struct *vma, struct vm_special_mapping *sm = vma->vm_private_data; if (sm->fault) - return sm->fault(sm, vma, vmf); + return sm->fault(sm, vmf->vma, vmf); pages = sm->pages; } @@ -3433,7 +3446,7 @@ void mm_drop_all_locks(struct mm_struct *mm) } /* - * initialise the VMA slab + * initialise the percpu counter for VM */ void __init mmap_init(void) { diff --git a/mm/mmzone.c b/mm/mmzone.c index 5652be858e5e..a51c0a67ea3d 100644 --- a/mm/mmzone.c +++ b/mm/mmzone.c @@ -60,7 +60,7 @@ struct zoneref *__next_zones_zonelist(struct zoneref *z, * Find the next suitable zone to use for the allocation. * Only filter based on nodemask if it's set */ - if (likely(nodes == NULL)) + if (unlikely(nodes == NULL)) while (zonelist_zone_idx(z) > highest_zoneidx) z++; else diff --git a/mm/mprotect.c b/mm/mprotect.c index f9c07f54dd62..77115bb7c953 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -33,34 +33,6 @@ #include "internal.h" -/* - * For a prot_numa update we only hold mmap_sem for read so there is a - * potential race with faulting where a pmd was temporarily none. This - * function checks for a transhuge pmd under the appropriate lock. It - * returns a pte if it was successfully locked or NULL if it raced with - * a transhuge insertion. - */ -static pte_t *lock_pte_protection(struct vm_area_struct *vma, pmd_t *pmd, - unsigned long addr, int prot_numa, spinlock_t **ptl) -{ - pte_t *pte; - spinlock_t *pmdl; - - /* !prot_numa is protected by mmap_sem held for write */ - if (!prot_numa) - return pte_offset_map_lock(vma->vm_mm, pmd, addr, ptl); - - pmdl = pmd_lock(vma->vm_mm, pmd); - if (unlikely(pmd_trans_huge(*pmd) || pmd_none(*pmd))) { - spin_unlock(pmdl); - return NULL; - } - - pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, ptl); - spin_unlock(pmdl); - return pte; -} - static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, pgprot_t newprot, int dirty_accountable, int prot_numa) @@ -71,7 +43,21 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, unsigned long pages = 0; int target_node = NUMA_NO_NODE; - pte = lock_pte_protection(vma, pmd, addr, prot_numa, &ptl); + /* + * Can be called with only the mmap_sem for reading by + * prot_numa so we must check the pmd isn't constantly + * changing from under us from pmd_none to pmd_trans_huge + * and/or the other way around. + */ + if (pmd_trans_unstable(pmd)) + return 0; + + /* + * The pmd points to a regular pte so the pmd can't change + * from under us even if the mmap_sem is only hold for + * reading. + */ + pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); if (!pte) return 0; diff --git a/mm/mremap.c b/mm/mremap.c index 30d7d2482eea..8779928d6a70 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -22,6 +22,7 @@ #include <linux/mmu_notifier.h> #include <linux/uaccess.h> #include <linux/mm-arch-hooks.h> +#include <linux/userfaultfd_k.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> @@ -250,7 +251,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma, static unsigned long move_vma(struct vm_area_struct *vma, unsigned long old_addr, unsigned long old_len, - unsigned long new_len, unsigned long new_addr, bool *locked) + unsigned long new_len, unsigned long new_addr, + bool *locked, struct vm_userfaultfd_ctx *uf) { struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *new_vma; @@ -309,6 +311,7 @@ static unsigned long move_vma(struct vm_area_struct *vma, old_addr = new_addr; new_addr = err; } else { + mremap_userfaultfd_prep(new_vma, uf); arch_remap(mm, old_addr, old_addr + old_len, new_addr, new_addr + new_len); } @@ -413,7 +416,8 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr, } static unsigned long mremap_to(unsigned long addr, unsigned long old_len, - unsigned long new_addr, unsigned long new_len, bool *locked) + unsigned long new_addr, unsigned long new_len, bool *locked, + struct vm_userfaultfd_ctx *uf) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; @@ -458,7 +462,7 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len, if (offset_in_page(ret)) goto out1; - ret = move_vma(vma, addr, old_len, new_len, new_addr, locked); + ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, uf); if (!(offset_in_page(ret))) goto out; out1: @@ -497,6 +501,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, unsigned long ret = -EINVAL; unsigned long charged = 0; bool locked = false; + struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX; if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) return ret; @@ -523,7 +528,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, if (flags & MREMAP_FIXED) { ret = mremap_to(addr, old_len, new_addr, new_len, - &locked); + &locked, &uf); goto out; } @@ -592,7 +597,8 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, goto out; } - ret = move_vma(vma, addr, old_len, new_len, new_addr, &locked); + ret = move_vma(vma, addr, old_len, new_len, new_addr, + &locked, &uf); } out: if (offset_in_page(ret)) { @@ -602,5 +608,6 @@ out: up_write(¤t->mm->mmap_sem); if (locked && new_len > old_len) mm_populate(new_addr + old_len, new_len - old_len); + mremap_userfaultfd_complete(&uf, addr, new_addr, old_len); return ret; } diff --git a/mm/nommu.c b/mm/nommu.c index 24f9f5f39145..215c62296028 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -517,7 +517,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) } /* - * initialise the VMA and region record slabs + * initialise the percpu counter for VM and region record slabs */ void __init mmap_init(void) { @@ -1191,7 +1191,7 @@ error_free: enomem: pr_err("Allocation of length %lu from process %d (%s) failed\n", len, current->pid, current->comm); - show_free_areas(0); + show_free_areas(0, NULL); return -ENOMEM; } @@ -1412,13 +1412,13 @@ error_getting_vma: kmem_cache_free(vm_region_jar, region); pr_warn("Allocation of vma for %lu byte allocation from process %d failed\n", len, current->pid); - show_free_areas(0); + show_free_areas(0, NULL); return -ENOMEM; error_getting_region: pr_warn("Allocation of vm region for %lu byte allocation from process %d failed\n", len, current->pid); - show_free_areas(0); + show_free_areas(0, NULL); return -ENOMEM; } @@ -1794,7 +1794,7 @@ void unmap_mapping_range(struct address_space *mapping, } EXPORT_SYMBOL(unmap_mapping_range); -int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +int filemap_fault(struct vm_fault *vmf) { BUG(); return 0; diff --git a/mm/oom_kill.c b/mm/oom_kill.c index ec9f11d4f094..578321f1c070 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -403,12 +403,14 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask) static void dump_header(struct oom_control *oc, struct task_struct *p) { - nodemask_t *nm = (oc->nodemask) ? oc->nodemask : &cpuset_current_mems_allowed; - - pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), nodemask=%*pbl, order=%d, oom_score_adj=%hd\n", - current->comm, oc->gfp_mask, &oc->gfp_mask, - nodemask_pr_args(nm), oc->order, - current->signal->oom_score_adj); + pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), nodemask=", + current->comm, oc->gfp_mask, &oc->gfp_mask); + if (oc->nodemask) + pr_cont("%*pbl", nodemask_pr_args(oc->nodemask)); + else + pr_cont("(null)"); + pr_cont(", order=%d, oom_score_adj=%hd\n", + oc->order, current->signal->oom_score_adj); if (!IS_ENABLED(CONFIG_COMPACTION) && oc->order) pr_warn("COMPACTION is disabled!!!\n"); @@ -417,7 +419,7 @@ static void dump_header(struct oom_control *oc, struct task_struct *p) if (oc->memcg) mem_cgroup_print_oom_info(oc->memcg, p); else - show_mem(SHOW_MEM_FILTER_NODES); + show_mem(SHOW_MEM_FILTER_NODES, oc->nodemask); if (sysctl_oom_dump_tasks) dump_tasks(oc->memcg, oc->nodemask); } @@ -465,8 +467,6 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) { struct mmu_gather tlb; struct vm_area_struct *vma; - struct zap_details details = {.check_swap_entries = true, - .ignore_dirty = true}; bool ret = true; /* @@ -510,14 +510,7 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) tlb_gather_mmu(&tlb, mm, 0, -1); for (vma = mm->mmap ; vma; vma = vma->vm_next) { - if (is_vm_hugetlb_page(vma)) - continue; - - /* - * mlocked VMAs require explicit munlocking before unmap. - * Let's keep it simple here and skip such VMAs. - */ - if (vma->vm_flags & VM_LOCKED) + if (!can_madv_dontneed_vma(vma)) continue; /* @@ -532,7 +525,7 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) */ if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) unmap_page_range(&tlb, vma, vma->vm_start, vma->vm_end, - &details); + NULL); } tlb_finish_mmu(&tlb, 0, -1); pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n", @@ -1013,7 +1006,7 @@ bool out_of_memory(struct oom_control *oc) * make sure exclude 0 mask - all other users should have at least * ___GFP_DIRECT_RECLAIM to get here. */ - if (oc->gfp_mask && !(oc->gfp_mask & (__GFP_FS|__GFP_NOFAIL))) + if (oc->gfp_mask && !(oc->gfp_mask & __GFP_FS)) return true; /* diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f3e0c69a97b7..a1576952e6a2 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -55,6 +55,7 @@ #include <linux/kmemleak.h> #include <linux/compaction.h> #include <trace/events/kmem.h> +#include <trace/events/oom.h> #include <linux/prefetch.h> #include <linux/mm_inline.h> #include <linux/migrate.h> @@ -352,7 +353,7 @@ static inline unsigned long *get_pageblock_bitmap(struct page *page, unsigned long pfn) { #ifdef CONFIG_SPARSEMEM - return __pfn_to_section(pfn)->pageblock_flags; + return section_to_usemap(__pfn_to_section(pfn)); #else return page_zone(page)->pageblock_flags; #endif /* CONFIG_SPARSEMEM */ @@ -714,7 +715,7 @@ static inline void rmv_page_order(struct page *page) /* * This function checks whether a page is free && is the buddy * we can do coalesce a page and its buddy if - * (a) the buddy is not in a hole && + * (a) the buddy is not in a hole (check before calling!) && * (b) the buddy is in the buddy system && * (c) a page and its buddy have the same order && * (d) a page and its buddy are in the same zone. @@ -729,9 +730,6 @@ static inline void rmv_page_order(struct page *page) static inline int page_is_buddy(struct page *page, struct page *buddy, unsigned int order) { - if (!pfn_valid_within(page_to_pfn(buddy))) - return 0; - if (page_is_guard(buddy) && page_order(buddy) == order) { if (page_zone_id(page) != page_zone_id(buddy)) return 0; @@ -787,9 +785,8 @@ static inline void __free_one_page(struct page *page, struct zone *zone, unsigned int order, int migratetype) { - unsigned long page_idx; - unsigned long combined_idx; - unsigned long uninitialized_var(buddy_idx); + unsigned long combined_pfn; + unsigned long uninitialized_var(buddy_pfn); struct page *buddy; unsigned int max_order; @@ -802,15 +799,16 @@ static inline void __free_one_page(struct page *page, if (likely(!is_migrate_isolate(migratetype))) __mod_zone_freepage_state(zone, 1 << order, migratetype); - page_idx = pfn & ((1 << MAX_ORDER) - 1); - - VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page); + VM_BUG_ON_PAGE(pfn & ((1 << order) - 1), page); VM_BUG_ON_PAGE(bad_range(zone, page), page); continue_merging: while (order < max_order - 1) { - buddy_idx = __find_buddy_index(page_idx, order); - buddy = page + (buddy_idx - page_idx); + buddy_pfn = __find_buddy_pfn(pfn, order); + buddy = page + (buddy_pfn - pfn); + + if (!pfn_valid_within(buddy_pfn)) + goto done_merging; if (!page_is_buddy(page, buddy, order)) goto done_merging; /* @@ -824,9 +822,9 @@ continue_merging: zone->free_area[order].nr_free--; rmv_page_order(buddy); } - combined_idx = buddy_idx & page_idx; - page = page + (combined_idx - page_idx); - page_idx = combined_idx; + combined_pfn = buddy_pfn & pfn; + page = page + (combined_pfn - pfn); + pfn = combined_pfn; order++; } if (max_order < MAX_ORDER) { @@ -841,8 +839,8 @@ continue_merging: if (unlikely(has_isolate_pageblock(zone))) { int buddy_mt; - buddy_idx = __find_buddy_index(page_idx, order); - buddy = page + (buddy_idx - page_idx); + buddy_pfn = __find_buddy_pfn(pfn, order); + buddy = page + (buddy_pfn - pfn); buddy_mt = get_pageblock_migratetype(buddy); if (migratetype != buddy_mt @@ -865,12 +863,12 @@ done_merging: * so it's less likely to be used soon and more likely to be merged * as a higher order page */ - if ((order < MAX_ORDER-2) && pfn_valid_within(page_to_pfn(buddy))) { + if ((order < MAX_ORDER-2) && pfn_valid_within(buddy_pfn)) { struct page *higher_page, *higher_buddy; - combined_idx = buddy_idx & page_idx; - higher_page = page + (combined_idx - page_idx); - buddy_idx = __find_buddy_index(combined_idx, order + 1); - higher_buddy = higher_page + (buddy_idx - combined_idx); + combined_pfn = buddy_pfn & pfn; + higher_page = page + (combined_pfn - pfn); + buddy_pfn = __find_buddy_pfn(combined_pfn, order + 1); + higher_buddy = higher_page + (buddy_pfn - combined_pfn); if (page_is_buddy(higher_page, higher_buddy, order + 1)) { list_add_tail(&page->lru, &zone->free_area[order].free_list[migratetype]); @@ -1087,10 +1085,10 @@ static void free_pcppages_bulk(struct zone *zone, int count, { int migratetype = 0; int batch_free = 0; - unsigned long nr_scanned; + unsigned long nr_scanned, flags; bool isolated_pageblocks; - spin_lock(&zone->lock); + spin_lock_irqsave(&zone->lock, flags); isolated_pageblocks = has_isolate_pageblock(zone); nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED); if (nr_scanned) @@ -1139,7 +1137,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, trace_mm_page_pcpu_drain(page, 0, mt); } while (--count && --batch_free && !list_empty(list)); } - spin_unlock(&zone->lock); + spin_unlock_irqrestore(&zone->lock, flags); } static void free_one_page(struct zone *zone, @@ -1147,8 +1145,9 @@ static void free_one_page(struct zone *zone, unsigned int order, int migratetype) { - unsigned long nr_scanned; - spin_lock(&zone->lock); + unsigned long nr_scanned, flags; + spin_lock_irqsave(&zone->lock, flags); + __count_vm_events(PGFREE, 1 << order); nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED); if (nr_scanned) __mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned); @@ -1158,7 +1157,7 @@ static void free_one_page(struct zone *zone, migratetype = get_pfnblock_migratetype(page, pfn); } __free_one_page(page, pfn, zone, order, migratetype); - spin_unlock(&zone->lock); + spin_unlock_irqrestore(&zone->lock, flags); } static void __meminit __init_single_page(struct page *page, unsigned long pfn, @@ -1236,7 +1235,6 @@ void __meminit reserve_bootmem_region(phys_addr_t start, phys_addr_t end) static void __free_pages_ok(struct page *page, unsigned int order) { - unsigned long flags; int migratetype; unsigned long pfn = page_to_pfn(page); @@ -1244,10 +1242,7 @@ static void __free_pages_ok(struct page *page, unsigned int order) return; migratetype = get_pfnblock_migratetype(page, pfn); - local_irq_save(flags); - __count_vm_events(PGFREE, 1 << order); free_one_page(page_zone(page), page, pfn, order, migratetype); - local_irq_restore(flags); } static void __init __free_pages_boot_core(struct page *page, unsigned int order) @@ -2219,8 +2214,9 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, int migratetype, bool cold) { int i, alloced = 0; + unsigned long flags; - spin_lock(&zone->lock); + spin_lock_irqsave(&zone->lock, flags); for (i = 0; i < count; ++i) { struct page *page = __rmqueue(zone, order, migratetype); if (unlikely(page == NULL)) @@ -2256,7 +2252,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, * pages added to the pcp list. */ __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); - spin_unlock(&zone->lock); + spin_unlock_irqrestore(&zone->lock, flags); return alloced; } @@ -2341,19 +2337,21 @@ void drain_local_pages(struct zone *zone) drain_pages(cpu); } +static void drain_local_pages_wq(struct work_struct *work) +{ + drain_local_pages(NULL); +} + /* * Spill all the per-cpu pages from all CPUs back into the buddy allocator. * * When zone parameter is non-NULL, spill just the single zone's pages. * - * Note that this code is protected against sending an IPI to an offline - * CPU but does not guarantee sending an IPI to newly hotplugged CPUs: - * on_each_cpu_mask() blocks hotplug and won't talk to offlined CPUs but - * nothing keeps CPUs from showing up after we populated the cpumask and - * before the call to on_each_cpu_mask(). + * Note that this can be extremely slow as the draining happens in a workqueue. */ void drain_all_pages(struct zone *zone) { + struct work_struct __percpu *works; int cpu; /* @@ -2362,6 +2360,17 @@ void drain_all_pages(struct zone *zone) */ static cpumask_t cpus_with_pcps; + /* Workqueues cannot recurse */ + if (current->flags & PF_WQ_WORKER) + return; + + /* + * As this can be called from reclaim context, do not reenter reclaim. + * An allocation failure can be handled, it's simply slower + */ + get_online_cpus(); + works = alloc_percpu_gfp(struct work_struct, GFP_ATOMIC); + /* * We don't care about racing with CPU hotplug event * as offline notification will cause the notified @@ -2392,8 +2401,25 @@ void drain_all_pages(struct zone *zone) else cpumask_clear_cpu(cpu, &cpus_with_pcps); } - on_each_cpu_mask(&cpus_with_pcps, (smp_call_func_t) drain_local_pages, - zone, 1); + + if (works) { + for_each_cpu(cpu, &cpus_with_pcps) { + struct work_struct *work = per_cpu_ptr(works, cpu); + INIT_WORK(work, drain_local_pages_wq); + schedule_work_on(cpu, work); + } + for_each_cpu(cpu, &cpus_with_pcps) + flush_work(per_cpu_ptr(works, cpu)); + } else { + for_each_cpu(cpu, &cpus_with_pcps) { + struct work_struct work; + + INIT_WORK(&work, drain_local_pages_wq); + schedule_work_on(cpu, &work); + flush_work(&work); + } + } + put_online_cpus(); } #ifdef CONFIG_HIBERNATION @@ -2444,17 +2470,20 @@ void free_hot_cold_page(struct page *page, bool cold) { struct zone *zone = page_zone(page); struct per_cpu_pages *pcp; - unsigned long flags; unsigned long pfn = page_to_pfn(page); int migratetype; + if (in_interrupt()) { + __free_pages_ok(page, 0); + return; + } + if (!free_pcp_prepare(page)) return; migratetype = get_pfnblock_migratetype(page, pfn); set_pcppage_migratetype(page, migratetype); - local_irq_save(flags); - __count_vm_event(PGFREE); + preempt_disable(); /* * We only track unmovable, reclaimable and movable on pcp lists. @@ -2471,6 +2500,7 @@ void free_hot_cold_page(struct page *page, bool cold) migratetype = MIGRATE_MOVABLE; } + __count_vm_event(PGFREE); pcp = &this_cpu_ptr(zone->pageset)->pcp; if (!cold) list_add(&page->lru, &pcp->lists[migratetype]); @@ -2484,7 +2514,7 @@ void free_hot_cold_page(struct page *page, bool cold) } out: - local_irq_restore(flags); + preempt_enable_no_resched(); } /* @@ -2602,74 +2632,105 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z) #endif } +/* Remove page from the per-cpu list, caller must protect the list */ +static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype, + bool cold, struct per_cpu_pages *pcp, + struct list_head *list) +{ + struct page *page; + + VM_BUG_ON(in_interrupt()); + + do { + if (list_empty(list)) { + pcp->count += rmqueue_bulk(zone, 0, + pcp->batch, list, + migratetype, cold); + if (unlikely(list_empty(list))) + return NULL; + } + + if (cold) + page = list_last_entry(list, struct page, lru); + else + page = list_first_entry(list, struct page, lru); + + list_del(&page->lru); + pcp->count--; + } while (check_new_pcp(page)); + + return page; +} + +/* Lock and remove page from the per-cpu list */ +static struct page *rmqueue_pcplist(struct zone *preferred_zone, + struct zone *zone, unsigned int order, + gfp_t gfp_flags, int migratetype) +{ + struct per_cpu_pages *pcp; + struct list_head *list; + bool cold = ((gfp_flags & __GFP_COLD) != 0); + struct page *page; + + preempt_disable(); + pcp = &this_cpu_ptr(zone->pageset)->pcp; + list = &pcp->lists[migratetype]; + page = __rmqueue_pcplist(zone, migratetype, cold, pcp, list); + if (page) { + __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); + zone_statistics(preferred_zone, zone); + } + preempt_enable_no_resched(); + return page; +} + /* * Allocate a page from the given zone. Use pcplists for order-0 allocations. */ static inline -struct page *buffered_rmqueue(struct zone *preferred_zone, +struct page *rmqueue(struct zone *preferred_zone, struct zone *zone, unsigned int order, gfp_t gfp_flags, unsigned int alloc_flags, int migratetype) { unsigned long flags; struct page *page; - bool cold = ((gfp_flags & __GFP_COLD) != 0); - - if (likely(order == 0)) { - struct per_cpu_pages *pcp; - struct list_head *list; - local_irq_save(flags); - do { - pcp = &this_cpu_ptr(zone->pageset)->pcp; - list = &pcp->lists[migratetype]; - if (list_empty(list)) { - pcp->count += rmqueue_bulk(zone, 0, - pcp->batch, list, - migratetype, cold); - if (unlikely(list_empty(list))) - goto failed; - } - - if (cold) - page = list_last_entry(list, struct page, lru); - else - page = list_first_entry(list, struct page, lru); - - list_del(&page->lru); - pcp->count--; + if (likely(order == 0) && !in_interrupt()) { + page = rmqueue_pcplist(preferred_zone, zone, order, + gfp_flags, migratetype); + goto out; + } - } while (check_new_pcp(page)); - } else { - /* - * We most definitely don't want callers attempting to - * allocate greater than order-1 page units with __GFP_NOFAIL. - */ - WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1)); - spin_lock_irqsave(&zone->lock, flags); + /* + * We most definitely don't want callers attempting to + * allocate greater than order-1 page units with __GFP_NOFAIL. + */ + WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1)); + spin_lock_irqsave(&zone->lock, flags); - do { - page = NULL; - if (alloc_flags & ALLOC_HARDER) { - page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC); - if (page) - trace_mm_page_alloc_zone_locked(page, order, migratetype); - } - if (!page) - page = __rmqueue(zone, order, migratetype); - } while (page && check_new_pages(page, order)); - spin_unlock(&zone->lock); + do { + page = NULL; + if (alloc_flags & ALLOC_HARDER) { + page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC); + if (page) + trace_mm_page_alloc_zone_locked(page, order, migratetype); + } if (!page) - goto failed; - __mod_zone_freepage_state(zone, -(1 << order), - get_pcppage_migratetype(page)); - } + page = __rmqueue(zone, order, migratetype); + } while (page && check_new_pages(page, order)); + spin_unlock(&zone->lock); + if (!page) + goto failed; + __mod_zone_freepage_state(zone, -(1 << order), + get_pcppage_migratetype(page)); __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); zone_statistics(preferred_zone, zone); local_irq_restore(flags); - VM_BUG_ON_PAGE(bad_range(zone, page), page); +out: + VM_BUG_ON_PAGE(page && bad_range(zone, page), page); return page; failed: @@ -2974,7 +3035,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, } try_this_zone: - page = buffered_rmqueue(ac->preferred_zoneref->zone, zone, order, + page = rmqueue(ac->preferred_zoneref->zone, zone, order, gfp_mask, alloc_flags, ac->migratetype); if (page) { prep_new_page(page, order, gfp_mask, alloc_flags); @@ -3007,18 +3068,12 @@ static inline bool should_suppress_show_mem(void) return ret; } -static DEFINE_RATELIMIT_STATE(nopage_rs, - DEFAULT_RATELIMIT_INTERVAL, - DEFAULT_RATELIMIT_BURST); - -void warn_alloc(gfp_t gfp_mask, const char *fmt, ...) +static void warn_alloc_show_mem(gfp_t gfp_mask, nodemask_t *nodemask) { unsigned int filter = SHOW_MEM_FILTER_NODES; - struct va_format vaf; - va_list args; + static DEFINE_RATELIMIT_STATE(show_mem_rs, HZ, 1); - if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs) || - debug_guardpage_minorder() > 0) + if (should_suppress_show_mem() || !__ratelimit(&show_mem_rs)) return; /* @@ -3033,6 +3088,20 @@ void warn_alloc(gfp_t gfp_mask, const char *fmt, ...) if (in_interrupt() || !(gfp_mask & __GFP_DIRECT_RECLAIM)) filter &= ~SHOW_MEM_FILTER_NODES; + show_mem(filter, nodemask); +} + +void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + static DEFINE_RATELIMIT_STATE(nopage_rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + + if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs) || + debug_guardpage_minorder() > 0) + return; + pr_warn("%s: ", current->comm); va_start(args, fmt); @@ -3041,11 +3110,36 @@ void warn_alloc(gfp_t gfp_mask, const char *fmt, ...) pr_cont("%pV", &vaf); va_end(args); - pr_cont(", mode:%#x(%pGg)\n", gfp_mask, &gfp_mask); + pr_cont(", mode:%#x(%pGg), nodemask=", gfp_mask, &gfp_mask); + if (nodemask) + pr_cont("%*pbl\n", nodemask_pr_args(nodemask)); + else + pr_cont("(null)\n"); + + cpuset_print_current_mems_allowed(); dump_stack(); - if (!should_suppress_show_mem()) - show_mem(filter); + warn_alloc_show_mem(gfp_mask, nodemask); +} + +static inline struct page * +__alloc_pages_cpuset_fallback(gfp_t gfp_mask, unsigned int order, + unsigned int alloc_flags, + const struct alloc_context *ac) +{ + struct page *page; + + page = get_page_from_freelist(gfp_mask, order, + alloc_flags|ALLOC_CPUSET, ac); + /* + * fallback to ignore cpuset restriction if our nodes + * are depleted + */ + if (!page) + page = get_page_from_freelist(gfp_mask, order, + alloc_flags, ac); + + return page; } static inline struct page * @@ -3083,47 +3177,42 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, if (page) goto out; - if (!(gfp_mask & __GFP_NOFAIL)) { - /* Coredumps can quickly deplete all memory reserves */ - if (current->flags & PF_DUMPCORE) - goto out; - /* The OOM killer will not help higher order allocs */ - if (order > PAGE_ALLOC_COSTLY_ORDER) - goto out; - /* The OOM killer does not needlessly kill tasks for lowmem */ - if (ac->high_zoneidx < ZONE_NORMAL) - goto out; - if (pm_suspended_storage()) - goto out; - /* - * XXX: GFP_NOFS allocations should rather fail than rely on - * other request to make a forward progress. - * We are in an unfortunate situation where out_of_memory cannot - * do much for this context but let's try it to at least get - * access to memory reserved if the current task is killed (see - * out_of_memory). Once filesystems are ready to handle allocation - * failures more gracefully we should just bail out here. - */ + /* Coredumps can quickly deplete all memory reserves */ + if (current->flags & PF_DUMPCORE) + goto out; + /* The OOM killer will not help higher order allocs */ + if (order > PAGE_ALLOC_COSTLY_ORDER) + goto out; + /* The OOM killer does not needlessly kill tasks for lowmem */ + if (ac->high_zoneidx < ZONE_NORMAL) + goto out; + if (pm_suspended_storage()) + goto out; + /* + * XXX: GFP_NOFS allocations should rather fail than rely on + * other request to make a forward progress. + * We are in an unfortunate situation where out_of_memory cannot + * do much for this context but let's try it to at least get + * access to memory reserved if the current task is killed (see + * out_of_memory). Once filesystems are ready to handle allocation + * failures more gracefully we should just bail out here. + */ + + /* The OOM killer may not free memory on a specific node */ + if (gfp_mask & __GFP_THISNODE) + goto out; - /* The OOM killer may not free memory on a specific node */ - if (gfp_mask & __GFP_THISNODE) - goto out; - } /* Exhausted what can be done so it's blamo time */ if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) { *did_some_progress = 1; - if (gfp_mask & __GFP_NOFAIL) { - page = get_page_from_freelist(gfp_mask, order, - ALLOC_NO_WATERMARKS|ALLOC_CPUSET, ac); - /* - * fallback to ignore cpuset restriction if our nodes - * are depleted - */ - if (!page) - page = get_page_from_freelist(gfp_mask, order, + /* + * Help non-failing allocations by giving them access to memory + * reserves + */ + if (gfp_mask & __GFP_NOFAIL) + page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_NO_WATERMARKS, ac); - } } out: mutex_unlock(&oom_lock); @@ -3192,6 +3281,9 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags, { int max_retries = MAX_COMPACT_RETRIES; int min_priority; + bool ret = false; + int retries = *compaction_retries; + enum compact_priority priority = *compact_priority; if (!order) return false; @@ -3213,8 +3305,10 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags, * But do not retry if the given zonelist is not suitable for * compaction. */ - if (compaction_withdrawn(compact_result)) - return compaction_zonelist_suitable(ac, order, alloc_flags); + if (compaction_withdrawn(compact_result)) { + ret = compaction_zonelist_suitable(ac, order, alloc_flags); + goto out; + } /* * !costly requests are much more important than __GFP_REPEAT @@ -3226,8 +3320,10 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags, */ if (order > PAGE_ALLOC_COSTLY_ORDER) max_retries /= 4; - if (*compaction_retries <= max_retries) - return true; + if (*compaction_retries <= max_retries) { + ret = true; + goto out; + } /* * Make sure there are attempts at the highest priority if we exhausted @@ -3236,12 +3332,15 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags, check_priority: min_priority = (order > PAGE_ALLOC_COSTLY_ORDER) ? MIN_COMPACT_COSTLY_PRIORITY : MIN_COMPACT_PRIORITY; + if (*compact_priority > min_priority) { (*compact_priority)--; *compaction_retries = 0; - return true; + ret = true; } - return false; +out: + trace_compact_retry(order, priority, compact_result, retries, max_retries, ret); + return ret; } #else static inline struct page * @@ -3464,6 +3563,8 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order, ac->nodemask) { unsigned long available; unsigned long reclaimable; + unsigned long min_wmark = min_wmark_pages(zone); + bool wmark; available = reclaimable = zone_reclaimable_pages(zone); available -= DIV_ROUND_UP((*no_progress_loops) * available, @@ -3474,8 +3575,11 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order, * Would the allocation succeed if we reclaimed the whole * available? */ - if (__zone_watermark_ok(zone, order, min_wmark_pages(zone), - ac_classzone_idx(ac), alloc_flags, available)) { + wmark = __zone_watermark_ok(zone, order, min_wmark, + ac_classzone_idx(ac), alloc_flags, available); + trace_reclaim_retry_zone(z, order, reclaimable, + available, min_wmark, *no_progress_loops, wmark); + if (wmark) { /* * If we didn't make any progress and have a lot of * dirty + writeback pages then we should wait for @@ -3555,6 +3659,14 @@ retry_cpuset: no_progress_loops = 0; compact_priority = DEF_COMPACT_PRIORITY; cpuset_mems_cookie = read_mems_allowed_begin(); + + /* + * The fast path uses conservative alloc_flags to succeed only until + * kswapd needs to be woken up, and to avoid the cost of setting up + * alloc_flags precisely. So we do that now. + */ + alloc_flags = gfp_to_alloc_flags(gfp_mask); + /* * We need to recalculate the starting point for the zonelist iterator * because we might have used different nodemask in the fast path, or @@ -3566,14 +3678,6 @@ retry_cpuset: if (!ac->preferred_zoneref->zone) goto nopage; - - /* - * The fast path uses conservative alloc_flags to succeed only until - * kswapd needs to be woken up, and to avoid the cost of setting up - * alloc_flags precisely. So we do that now. - */ - alloc_flags = gfp_to_alloc_flags(gfp_mask); - if (gfp_mask & __GFP_KSWAPD_RECLAIM) wake_all_kswapds(order, ac); @@ -3650,35 +3754,21 @@ retry: goto got_pg; /* Caller is not willing to reclaim, we can't balance anything */ - if (!can_direct_reclaim) { - /* - * All existing users of the __GFP_NOFAIL are blockable, so warn - * of any new users that actually allow this type of allocation - * to fail. - */ - WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL); + if (!can_direct_reclaim) goto nopage; - } - /* Avoid recursion of direct reclaim */ - if (current->flags & PF_MEMALLOC) { - /* - * __GFP_NOFAIL request from this context is rather bizarre - * because we cannot reclaim anything and only can loop waiting - * for somebody to do a work for us. - */ - if (WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) { - cond_resched(); - goto retry; - } - goto nopage; + /* Make sure we know about allocations which stall for too long */ + if (time_after(jiffies, alloc_start + stall_timeout)) { + warn_alloc(gfp_mask, ac->nodemask, + "page allocation stalls for %ums, order:%u", + jiffies_to_msecs(jiffies-alloc_start), order); + stall_timeout += 10 * HZ; } - /* Avoid allocations with no watermarks from looping endlessly */ - if (test_thread_flag(TIF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL)) + /* Avoid recursion of direct reclaim */ + if (current->flags & PF_MEMALLOC) goto nopage; - /* Try direct reclaim and then allocating */ page = __alloc_pages_direct_reclaim(gfp_mask, order, alloc_flags, ac, &did_some_progress); @@ -3702,14 +3792,6 @@ retry: if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_REPEAT)) goto nopage; - /* Make sure we know about allocations which stall for too long */ - if (time_after(jiffies, alloc_start + stall_timeout)) { - warn_alloc(gfp_mask, - "page allocation stalls for %ums, order:%u", - jiffies_to_msecs(jiffies-alloc_start), order); - stall_timeout += 10 * HZ; - } - if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags, did_some_progress > 0, &no_progress_loops)) goto retry; @@ -3738,6 +3820,10 @@ retry: if (page) goto got_pg; + /* Avoid allocations with no watermarks from looping endlessly */ + if (test_thread_flag(TIF_MEMDIE)) + goto nopage; + /* Retry as long as the OOM killer is making progress */ if (did_some_progress) { no_progress_loops = 0; @@ -3755,82 +3841,123 @@ nopage: if (read_mems_allowed_retry(cpuset_mems_cookie)) goto retry_cpuset; - warn_alloc(gfp_mask, + /* + * Make sure that __GFP_NOFAIL request doesn't leak out and make sure + * we always retry + */ + if (gfp_mask & __GFP_NOFAIL) { + /* + * All existing users of the __GFP_NOFAIL are blockable, so warn + * of any new users that actually require GFP_NOWAIT + */ + if (WARN_ON_ONCE(!can_direct_reclaim)) + goto fail; + + /* + * PF_MEMALLOC request from this context is rather bizarre + * because we cannot reclaim anything and only can loop waiting + * for somebody to do a work for us + */ + WARN_ON_ONCE(current->flags & PF_MEMALLOC); + + /* + * non failing costly orders are a hard requirement which we + * are not prepared for much so let's warn about these users + * so that we can identify them and convert them to something + * else. + */ + WARN_ON_ONCE(order > PAGE_ALLOC_COSTLY_ORDER); + + /* + * Help non-failing allocations by giving them access to memory + * reserves but do not use ALLOC_NO_WATERMARKS because this + * could deplete whole memory reserves which would just make + * the situation worse + */ + page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_HARDER, ac); + if (page) + goto got_pg; + + cond_resched(); + goto retry; + } +fail: + warn_alloc(gfp_mask, ac->nodemask, "page allocation failure: order:%u", order); got_pg: return page; } -/* - * This is the 'heart' of the zoned buddy allocator. - */ -struct page * -__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, - struct zonelist *zonelist, nodemask_t *nodemask) +static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order, + struct zonelist *zonelist, nodemask_t *nodemask, + struct alloc_context *ac, gfp_t *alloc_mask, + unsigned int *alloc_flags) { - struct page *page; - unsigned int alloc_flags = ALLOC_WMARK_LOW; - gfp_t alloc_mask = gfp_mask; /* The gfp_t that was actually used for allocation */ - struct alloc_context ac = { - .high_zoneidx = gfp_zone(gfp_mask), - .zonelist = zonelist, - .nodemask = nodemask, - .migratetype = gfpflags_to_migratetype(gfp_mask), - }; + ac->high_zoneidx = gfp_zone(gfp_mask); + ac->zonelist = zonelist; + ac->nodemask = nodemask; + ac->migratetype = gfpflags_to_migratetype(gfp_mask); if (cpusets_enabled()) { - alloc_mask |= __GFP_HARDWALL; - alloc_flags |= ALLOC_CPUSET; - if (!ac.nodemask) - ac.nodemask = &cpuset_current_mems_allowed; + *alloc_mask |= __GFP_HARDWALL; + if (!ac->nodemask) + ac->nodemask = &cpuset_current_mems_allowed; + else + *alloc_flags |= ALLOC_CPUSET; } - gfp_mask &= gfp_allowed_mask; - lockdep_trace_alloc(gfp_mask); might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM); if (should_fail_alloc_page(gfp_mask, order)) - return NULL; + return false; - /* - * Check the zones suitable for the gfp_mask contain at least one - * valid zone. It's possible to have an empty zonelist as a result - * of __GFP_THISNODE and a memoryless node - */ - if (unlikely(!zonelist->_zonerefs->zone)) - return NULL; + if (IS_ENABLED(CONFIG_CMA) && ac->migratetype == MIGRATE_MOVABLE) + *alloc_flags |= ALLOC_CMA; - if (IS_ENABLED(CONFIG_CMA) && ac.migratetype == MIGRATE_MOVABLE) - alloc_flags |= ALLOC_CMA; + return true; +} +/* Determine whether to spread dirty pages and what the first usable zone */ +static inline void finalise_ac(gfp_t gfp_mask, + unsigned int order, struct alloc_context *ac) +{ /* Dirty zone balancing only done in the fast path */ - ac.spread_dirty_pages = (gfp_mask & __GFP_WRITE); + ac->spread_dirty_pages = (gfp_mask & __GFP_WRITE); /* * The preferred zone is used for statistics but crucially it is * also used as the starting point for the zonelist iterator. It * may get reset for allocations that ignore memory policies. */ - ac.preferred_zoneref = first_zones_zonelist(ac.zonelist, - ac.high_zoneidx, ac.nodemask); - if (!ac.preferred_zoneref->zone) { - page = NULL; - /* - * This might be due to race with cpuset_current_mems_allowed - * update, so make sure we retry with original nodemask in the - * slow path. - */ - goto no_zone; - } + ac->preferred_zoneref = first_zones_zonelist(ac->zonelist, + ac->high_zoneidx, ac->nodemask); +} + +/* + * This is the 'heart' of the zoned buddy allocator. + */ +struct page * +__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, + struct zonelist *zonelist, nodemask_t *nodemask) +{ + struct page *page; + unsigned int alloc_flags = ALLOC_WMARK_LOW; + gfp_t alloc_mask = gfp_mask; /* The gfp_t that was actually used for allocation */ + struct alloc_context ac = { }; + + gfp_mask &= gfp_allowed_mask; + if (!prepare_alloc_pages(gfp_mask, order, zonelist, nodemask, &ac, &alloc_mask, &alloc_flags)) + return NULL; + + finalise_ac(gfp_mask, order, &ac); /* First allocation attempt */ page = get_page_from_freelist(alloc_mask, order, alloc_flags, &ac); if (likely(page)) goto out; -no_zone: /* * Runtime PM, block IO and its error handling path can deadlock * because I/O on the device might not complete. @@ -4252,20 +4379,20 @@ void si_meminfo_node(struct sysinfo *val, int nid) * Determine whether the node should be displayed or not, depending on whether * SHOW_MEM_FILTER_NODES was passed to show_free_areas(). */ -bool skip_free_areas_node(unsigned int flags, int nid) +static bool show_mem_node_skip(unsigned int flags, int nid, nodemask_t *nodemask) { - bool ret = false; - unsigned int cpuset_mems_cookie; - if (!(flags & SHOW_MEM_FILTER_NODES)) - goto out; + return false; - do { - cpuset_mems_cookie = read_mems_allowed_begin(); - ret = !node_isset(nid, cpuset_current_mems_allowed); - } while (read_mems_allowed_retry(cpuset_mems_cookie)); -out: - return ret; + /* + * no node mask - aka implicit memory numa policy. Do not bother with + * the synchronization - read_mems_allowed_begin - because we do not + * have to be precise here. + */ + if (!nodemask) + nodemask = &cpuset_current_mems_allowed; + + return !node_isset(nid, *nodemask); } #define K(x) ((x) << (PAGE_SHIFT-10)) @@ -4306,7 +4433,7 @@ static void show_migration_types(unsigned char type) * SHOW_MEM_FILTER_NODES: suppress nodes that are not allowed by current's * cpuset. */ -void show_free_areas(unsigned int filter) +void show_free_areas(unsigned int filter, nodemask_t *nodemask) { unsigned long free_pcp = 0; int cpu; @@ -4314,7 +4441,7 @@ void show_free_areas(unsigned int filter) pg_data_t *pgdat; for_each_populated_zone(zone) { - if (skip_free_areas_node(filter, zone_to_nid(zone))) + if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask)) continue; for_each_online_cpu(cpu) @@ -4348,6 +4475,9 @@ void show_free_areas(unsigned int filter) global_page_state(NR_FREE_CMA_PAGES)); for_each_online_pgdat(pgdat) { + if (show_mem_node_skip(filter, pgdat->node_id, nodemask)) + continue; + printk("Node %d" " active_anon:%lukB" " inactive_anon:%lukB" @@ -4397,7 +4527,7 @@ void show_free_areas(unsigned int filter) for_each_populated_zone(zone) { int i; - if (skip_free_areas_node(filter, zone_to_nid(zone))) + if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask)) continue; free_pcp = 0; @@ -4462,7 +4592,7 @@ void show_free_areas(unsigned int filter) unsigned long nr[MAX_ORDER], flags, total = 0; unsigned char types[MAX_ORDER]; - if (skip_free_areas_node(filter, zone_to_nid(zone))) + if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask)) continue; show_node(zone); printk(KERN_CONT "%s: ", zone->name); @@ -5083,8 +5213,17 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, if (context != MEMMAP_EARLY) goto not_early; - if (!early_pfn_valid(pfn)) + if (!early_pfn_valid(pfn)) { +#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP + /* + * Skip to the pfn preceding the next valid one (or + * end_pfn), such that we hit a valid pfn (or end_pfn) + * on our next iteration of the loop. + */ + pfn = memblock_next_valid_pfn(pfn, end_pfn) - 1; +#endif continue; + } if (!early_pfn_in_nid(pfn, nid)) continue; if (!update_defer_init(pgdat, pfn, end_pfn, &nr_initialised)) @@ -6378,10 +6517,12 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) /* Print out the early node map */ pr_info("Early memory node ranges\n"); - for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) + for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { pr_info(" node %3d: [mem %#018Lx-%#018Lx]\n", nid, (u64)start_pfn << PAGE_SHIFT, ((u64)end_pfn << PAGE_SHIFT) - 1); + section_active_init(start_pfn, end_pfn - start_pfn); + } /* Initialise every node */ mminit_verify_pageflags_layout(); diff --git a/mm/page_isolation.c b/mm/page_isolation.c index a5594bfcc5ed..f4e17a57926a 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -83,7 +83,7 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype) unsigned long flags, nr_pages; bool isolated_page = false; unsigned int order; - unsigned long page_idx, buddy_idx; + unsigned long pfn, buddy_pfn; struct page *buddy; zone = page_zone(page); @@ -102,11 +102,11 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype) if (PageBuddy(page)) { order = page_order(page); if (order >= pageblock_order) { - page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1); - buddy_idx = __find_buddy_index(page_idx, order); - buddy = page + (buddy_idx - page_idx); + pfn = page_to_pfn(page); + buddy_pfn = __find_buddy_pfn(pfn, order); + buddy = page + (buddy_pfn - pfn); - if (pfn_valid_within(page_to_pfn(buddy)) && + if (pfn_valid_within(buddy_pfn) && !is_migrate_isolate_page(buddy)) { __isolate_free_page(page, order); isolated_page = true; diff --git a/mm/page_owner.c b/mm/page_owner.c index 60634dc53a88..c3cee247f2e6 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -261,7 +261,7 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m, */ for (; pfn < end_pfn; ) { if (!pfn_valid(pfn)) { - pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES); + pfn = ALIGN(pfn + 1, pageblock_nr_pages); continue; } @@ -527,7 +527,7 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone) */ for (; pfn < end_pfn; ) { if (!pfn_valid(pfn)) { - pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES); + pfn = ALIGN(pfn + 1, pageblock_nr_pages); continue; } diff --git a/mm/shmem.c b/mm/shmem.c index bb53285a1d99..c437a672c56b 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -34,6 +34,8 @@ #include <linux/uio.h> #include <linux/khugepaged.h> +#include <asm/tlbflush.h> /* for arch/microblaze update_mmu_cache() */ + static struct vfsmount *shm_mnt; #ifdef CONFIG_SHMEM @@ -70,6 +72,8 @@ static struct vfsmount *shm_mnt; #include <linux/syscalls.h> #include <linux/fcntl.h> #include <uapi/linux/memfd.h> +#include <linux/userfaultfd_k.h> +#include <linux/rmap.h> #include <linux/uaccess.h> #include <asm/pgtable.h> @@ -115,13 +119,14 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, struct shmem_inode_info *info, pgoff_t index); static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, struct page **pagep, enum sgp_type sgp, - gfp_t gfp, struct mm_struct *fault_mm, int *fault_type); + gfp_t gfp, struct vm_area_struct *vma, + struct vm_fault *vmf, int *fault_type); int shmem_getpage(struct inode *inode, pgoff_t index, struct page **pagep, enum sgp_type sgp) { return shmem_getpage_gfp(inode, index, pagep, sgp, - mapping_gfp_mask(inode->i_mapping), NULL, NULL); + mapping_gfp_mask(inode->i_mapping), NULL, NULL, NULL); } static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb) @@ -190,6 +195,11 @@ static const struct inode_operations shmem_special_inode_operations; static const struct vm_operations_struct shmem_vm_ops; static struct file_system_type shmem_fs_type; +bool vma_is_shmem(struct vm_area_struct *vma) +{ + return vma->vm_ops == &shmem_vm_ops; +} + static LIST_HEAD(shmem_swaplist); static DEFINE_MUTEX(shmem_swaplist_mutex); @@ -1563,7 +1573,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, */ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, struct page **pagep, enum sgp_type sgp, gfp_t gfp, - struct mm_struct *fault_mm, int *fault_type) + struct vm_area_struct *vma, struct vm_fault *vmf, int *fault_type) { struct address_space *mapping = inode->i_mapping; struct shmem_inode_info *info = SHMEM_I(inode); @@ -1617,7 +1627,7 @@ repeat: * bring it back from swap or allocate. */ sbinfo = SHMEM_SB(inode->i_sb); - charge_mm = fault_mm ? : current->mm; + charge_mm = vma ? vma->vm_mm : current->mm; if (swap.val) { /* Look it up and read it in.. */ @@ -1627,7 +1637,8 @@ repeat: if (fault_type) { *fault_type |= VM_FAULT_MAJOR; count_vm_event(PGMAJFAULT); - mem_cgroup_count_vm_event(fault_mm, PGMAJFAULT); + mem_cgroup_count_vm_event(charge_mm, + PGMAJFAULT); } /* Here we actually start the io */ page = shmem_swapin(swap, gfp, info, index); @@ -1696,6 +1707,11 @@ repeat: swap_free(swap); } else { + if (vma && userfaultfd_missing(vma)) { + *fault_type = handle_userfault(vmf, VM_UFFD_MISSING); + return 0; + } + /* shmem_symlink() */ if (mapping->a_ops != &shmem_aops) goto alloc_nohuge; @@ -1885,8 +1901,9 @@ static int synchronous_wake_function(wait_queue_t *wait, unsigned mode, int sync return ret; } -static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int shmem_fault(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; struct inode *inode = file_inode(vma->vm_file); gfp_t gfp = mapping_gfp_mask(inode->i_mapping); enum sgp_type sgp; @@ -1958,7 +1975,7 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) sgp = SGP_NOHUGE; error = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, sgp, - gfp, vma->vm_mm, &ret); + gfp, vma, vmf, &ret); if (error) return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS); return ret; @@ -2168,10 +2185,123 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode bool shmem_mapping(struct address_space *mapping) { - if (!mapping->host) - return false; + return mapping->a_ops == &shmem_aops; +} - return mapping->host->i_sb->s_op == &shmem_ops; +int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, + pmd_t *dst_pmd, + struct vm_area_struct *dst_vma, + unsigned long dst_addr, + unsigned long src_addr, + struct page **pagep) +{ + struct inode *inode = file_inode(dst_vma->vm_file); + struct shmem_inode_info *info = SHMEM_I(inode); + struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); + struct address_space *mapping = inode->i_mapping; + gfp_t gfp = mapping_gfp_mask(mapping); + pgoff_t pgoff = linear_page_index(dst_vma, dst_addr); + struct mem_cgroup *memcg; + spinlock_t *ptl; + void *page_kaddr; + struct page *page; + pte_t _dst_pte, *dst_pte; + int ret; + + ret = -ENOMEM; + if (shmem_acct_block(info->flags, 1)) + goto out; + if (sbinfo->max_blocks) { + if (percpu_counter_compare(&sbinfo->used_blocks, + sbinfo->max_blocks) >= 0) + goto out_unacct_blocks; + percpu_counter_inc(&sbinfo->used_blocks); + } + + if (!*pagep) { + page = shmem_alloc_page(gfp, info, pgoff); + if (!page) + goto out_dec_used_blocks; + + page_kaddr = kmap_atomic(page); + ret = copy_from_user(page_kaddr, (const void __user *)src_addr, + PAGE_SIZE); + kunmap_atomic(page_kaddr); + + /* fallback to copy_from_user outside mmap_sem */ + if (unlikely(ret)) { + *pagep = page; + if (sbinfo->max_blocks) + percpu_counter_add(&sbinfo->used_blocks, -1); + shmem_unacct_blocks(info->flags, 1); + /* don't free the page */ + return -EFAULT; + } + } else { + page = *pagep; + *pagep = NULL; + } + + VM_BUG_ON(PageLocked(page) || PageSwapBacked(page)); + __SetPageLocked(page); + __SetPageSwapBacked(page); + __SetPageUptodate(page); + + ret = mem_cgroup_try_charge(page, dst_mm, gfp, &memcg, false); + if (ret) + goto out_release; + + ret = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK); + if (!ret) { + ret = shmem_add_to_page_cache(page, mapping, pgoff, NULL); + radix_tree_preload_end(); + } + if (ret) + goto out_release_uncharge; + + mem_cgroup_commit_charge(page, memcg, false, false); + + _dst_pte = mk_pte(page, dst_vma->vm_page_prot); + if (dst_vma->vm_flags & VM_WRITE) + _dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte)); + + ret = -EEXIST; + dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl); + if (!pte_none(*dst_pte)) + goto out_release_uncharge_unlock; + + lru_cache_add_anon(page); + + spin_lock(&info->lock); + info->alloced++; + inode->i_blocks += BLOCKS_PER_PAGE; + shmem_recalc_inode(inode); + spin_unlock(&info->lock); + + inc_mm_counter(dst_mm, mm_counter_file(page)); + page_add_file_rmap(page, false); + set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte); + + /* No need to invalidate - it was non-present before */ + update_mmu_cache(dst_vma, dst_addr, dst_pte); + unlock_page(page); + pte_unmap_unlock(dst_pte, ptl); + ret = 0; +out: + return ret; +out_release_uncharge_unlock: + pte_unmap_unlock(dst_pte, ptl); +out_release_uncharge: + mem_cgroup_cancel_charge(page, memcg, false); +out_release: + unlock_page(page); + put_page(page); +out_dec_used_blocks: + if (sbinfo->max_blocks) + percpu_counter_add(&sbinfo->used_blocks, -1); +out_unacct_blocks: + shmem_unacct_blocks(info->flags, 1); + goto out; } #ifdef CONFIG_TMPFS @@ -4133,7 +4263,7 @@ struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, BUG_ON(mapping->a_ops != &shmem_aops); error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE, - gfp, NULL, NULL); + gfp, NULL, NULL, NULL); if (error) page = ERR_PTR(error); else diff --git a/mm/slab_common.c b/mm/slab_common.c index 296413c2bbcd..0d4fb895cfdd 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -255,7 +255,7 @@ struct kmem_cache *find_mergeable(size_t size, size_t align, { struct kmem_cache *s; - if (slab_nomerge || (flags & SLAB_NEVER_MERGE)) + if (slab_nomerge) return NULL; if (ctor) @@ -266,6 +266,9 @@ struct kmem_cache *find_mergeable(size_t size, size_t align, size = ALIGN(size, align); flags = kmem_cache_flags(size, flags, name, NULL); + if (flags & SLAB_NEVER_MERGE) + return NULL; + list_for_each_entry_reverse(s, &slab_caches, list) { if (slab_unmergeable(s)) continue; @@ -461,6 +464,9 @@ EXPORT_SYMBOL(kmem_cache_create); static int shutdown_cache(struct kmem_cache *s, struct list_head *release, bool *need_rcu_barrier) { + /* free asan quarantined objects */ + kasan_cache_shutdown(s); + if (__kmem_cache_shutdown(s) != 0) return -EBUSY; @@ -744,7 +750,6 @@ void kmem_cache_destroy(struct kmem_cache *s) get_online_cpus(); get_online_mems(); - kasan_cache_destroy(s); mutex_lock(&slab_mutex); s->refcount--; diff --git a/mm/slub.c b/mm/slub.c index 14fd2f22962c..d24e1ce2e4a7 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1626,6 +1626,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) flags &= ~GFP_SLAB_BUG_MASK; pr_warn("Unexpected gfp: %#x (%pGg). Fixing up to gfp: %#x (%pGg). Fix your code!\n", invalid_mask, &invalid_mask, flags, &flags); + dump_stack(); } return allocate_slab(s, diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 574c67b663fe..8679d4a81b98 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -248,20 +248,28 @@ int __meminit vmemmap_populate_basepages(unsigned long start, return 0; } -struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid) +struct page * __meminit __populate_section_memmap(unsigned long pfn, + unsigned long nr_pages, int nid) { unsigned long start; unsigned long end; - struct page *map; - map = pfn_to_page(pnum * PAGES_PER_SECTION); - start = (unsigned long)map; - end = (unsigned long)(map + PAGES_PER_SECTION); + /* + * The minimum granularity of memmap extensions is + * SECTION_ACTIVE_SIZE as allocations are tracked in the + * 'map_active' bitmap of the section. + */ + end = ALIGN(pfn + nr_pages, PHYS_PFN(SECTION_ACTIVE_SIZE)); + pfn &= PHYS_PFN(SECTION_ACTIVE_MASK); + nr_pages = end - pfn; + + start = (unsigned long) pfn_to_page(pfn); + end = start + nr_pages * sizeof(struct page); if (vmemmap_populate(start, end, nid)) return NULL; - return map; + return pfn_to_page(pfn); } void __init sparse_mem_maps_populate_node(struct page **map_map, @@ -284,11 +292,13 @@ void __init sparse_mem_maps_populate_node(struct page **map_map, for (pnum = pnum_begin; pnum < pnum_end; pnum++) { struct mem_section *ms; + unsigned long pfn = section_nr_to_pfn(pnum); if (!present_section_nr(pnum)) continue; - map_map[pnum] = sparse_mem_map_populate(pnum, nodeid); + map_map[pnum] = __populate_section_memmap(pfn, + PAGES_PER_SECTION, nodeid); if (map_map[pnum]) continue; ms = __nr_to_section(pnum); diff --git a/mm/sparse.c b/mm/sparse.c index 1e168bf2779a..3e0c1c73f814 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -24,6 +24,7 @@ #ifdef CONFIG_SPARSEMEM_EXTREME struct mem_section *mem_section[NR_SECTION_ROOTS] ____cacheline_internodealigned_in_smp; +static DEFINE_SPINLOCK(mem_section_lock); /* atomically instantiate new entries */ #else struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT] ____cacheline_internodealigned_in_smp; @@ -89,7 +90,22 @@ static int __meminit sparse_index_init(unsigned long section_nr, int nid) if (!section) return -ENOMEM; - mem_section[root] = section; + spin_lock(&mem_section_lock); + if (mem_section[root] == NULL) { + mem_section[root] = section; + section = NULL; + } + spin_unlock(&mem_section_lock); + + /* + * The only time we expect adding a section may race is during + * post-meminit hotplug. So, there is no expectation that 'section' + * leaks in the !slab_is_available() case. + */ + if (section && slab_is_available()) { + kfree(section); + return -EEXIST; + } return 0; } @@ -168,6 +184,66 @@ void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn, } } +static int __init section_active_index(phys_addr_t phys) +{ + return (phys & ~(PA_SECTION_MASK)) / SECTION_ACTIVE_SIZE; +} + +static unsigned long section_active_mask(unsigned long pfn, + unsigned long nr_pages) +{ + int idx_start, idx_size; + phys_addr_t start, size; + + WARN_ON((pfn & ~PAGE_SECTION_MASK) + nr_pages > PAGES_PER_SECTION); + if (!nr_pages) + return 0; + + /* + * The size is the number of pages left in the section or + * nr_pages, whichever is smaller. The size will be rounded up + * to the next SECTION_ACTIVE_SIZE boundary, the start will be + * rounded down. + */ + start = PFN_PHYS(pfn); + size = PFN_PHYS(min_not_zero(nr_pages, PAGES_PER_SECTION + - (pfn & ~PAGE_SECTION_MASK))); + size = ALIGN(size, SECTION_ACTIVE_SIZE); + + idx_start = section_active_index(start); + idx_size = section_active_index(size); + + if (idx_size == 0) + return ULONG_MAX; /* full section */ + return ((1UL << idx_size) - 1) << idx_start; +} + +void __init section_active_init(unsigned long pfn, unsigned long nr_pages) +{ + int end_sec = pfn_to_section_nr(pfn + nr_pages - 1); + int i, start_sec = pfn_to_section_nr(pfn); + + if (!nr_pages) + return; + + for (i = start_sec; i <= end_sec; i++) { + struct mem_section *ms; + unsigned long mask; + unsigned long pfns; + + pfns = min(nr_pages, PAGES_PER_SECTION + - (pfn & ~PAGE_SECTION_MASK)); + mask = section_active_mask(pfn, pfns); + + ms = __nr_to_section(i); + pr_debug("%s: sec: %d mask: %#018lx\n", __func__, i, mask); + ms->usage->map_active = mask; + + pfn += pfns; + nr_pages -= pfns; + } +} + /* Record a memory area against a node. */ void __init memory_present(int nid, unsigned long start, unsigned long end) { @@ -231,19 +307,23 @@ struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pn return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum); } -static int __meminit sparse_init_one_section(struct mem_section *ms, +static void __meminit sparse_init_one_section(struct mem_section *ms, unsigned long pnum, struct page *mem_map, - unsigned long *pageblock_bitmap) + struct mem_section_usage *usage) { - if (!present_section(ms)) - return -EINVAL; + /* + * Given that SPARSEMEM_VMEMMAP=y supports sub-section hotplug, + * ->section_mem_map can not be guaranteed to point to a full + * section's worth of memory. The field is only valid / used + * in the SPARSEMEM_VMEMMAP=n case. + */ + if (IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP)) + mem_map = NULL; ms->section_mem_map &= ~SECTION_MAP_MASK; ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum) | - SECTION_HAS_MEM_MAP; - ms->pageblock_flags = pageblock_bitmap; - - return 1; + SECTION_HAS_MEM_MAP; + ms->usage = usage; } unsigned long usemap_size(void) @@ -255,9 +335,13 @@ unsigned long usemap_size(void) } #ifdef CONFIG_MEMORY_HOTPLUG -static unsigned long *__kmalloc_section_usemap(void) +static struct mem_section_usage *__alloc_section_usage(void) { - return kmalloc(usemap_size(), GFP_KERNEL); + struct mem_section_usage *usage; + + usage = kzalloc(sizeof(*usage) + usemap_size(), GFP_KERNEL); + /* TODO: allocate the map_active bitmap */ + return usage; } #endif /* CONFIG_MEMORY_HOTPLUG */ @@ -293,7 +377,8 @@ again: return p; } -static void __init check_usemap_section_nr(int nid, unsigned long *usemap) +static void __init check_usemap_section_nr(int nid, + struct mem_section_usage *usage) { unsigned long usemap_snr, pgdat_snr; static unsigned long old_usemap_snr = NR_MEM_SECTIONS; @@ -301,7 +386,7 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap) struct pglist_data *pgdat = NODE_DATA(nid); int usemap_nid; - usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT); + usemap_snr = pfn_to_section_nr(__pa(usage) >> PAGE_SHIFT); pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT); if (usemap_snr == pgdat_snr) return; @@ -336,7 +421,8 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, return memblock_virt_alloc_node_nopanic(size, pgdat->node_id); } -static void __init check_usemap_section_nr(int nid, unsigned long *usemap) +static void __init check_usemap_section_nr(int nid, + struct mem_section_usage *usage) { } #endif /* CONFIG_MEMORY_HOTREMOVE */ @@ -344,31 +430,33 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap) static void __init sparse_early_usemaps_alloc_node(void *data, unsigned long pnum_begin, unsigned long pnum_end, - unsigned long usemap_count, int nodeid) + unsigned long usage_count, int nodeid) { - void *usemap; + void *usage; unsigned long pnum; - unsigned long **usemap_map = (unsigned long **)data; - int size = usemap_size(); + struct mem_section_usage **usage_map = data; + int size = sizeof(struct mem_section_usage) + usemap_size(); - usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid), - size * usemap_count); - if (!usemap) { + usage = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid), + size * usage_count); + if (!usage) { pr_warn("%s: allocation failed\n", __func__); return; } + memset(usage, 0, size * usage_count); for (pnum = pnum_begin; pnum < pnum_end; pnum++) { if (!present_section_nr(pnum)) continue; - usemap_map[pnum] = usemap; - usemap += size; - check_usemap_section_nr(nodeid, usemap_map[pnum]); + usage_map[pnum] = usage; + usage += size; + check_usemap_section_nr(nodeid, usage_map[pnum]); } } #ifndef CONFIG_SPARSEMEM_VMEMMAP -struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid) +struct page __init *__populate_section_memmap(unsigned long pfn, + unsigned long nr_pages, int nid) { struct page *map; unsigned long size; @@ -420,10 +508,12 @@ void __init sparse_mem_maps_populate_node(struct page **map_map, /* fallback */ for (pnum = pnum_begin; pnum < pnum_end; pnum++) { struct mem_section *ms; + unsigned long pfn = section_nr_to_pfn(pnum); if (!present_section_nr(pnum)) continue; - map_map[pnum] = sparse_mem_map_populate(pnum, nodeid); + map_map[pnum] = __populate_section_memmap(pfn, + PAGES_PER_SECTION, nodeid); if (map_map[pnum]) continue; ms = __nr_to_section(pnum); @@ -451,7 +541,8 @@ static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) struct mem_section *ms = __nr_to_section(pnum); int nid = sparse_early_nid(ms); - map = sparse_mem_map_populate(pnum, nid); + map = __populate_section_memmap(section_nr_to_pfn(pnum), + PAGES_PER_SECTION, nid); if (map) return map; @@ -468,7 +559,7 @@ void __weak __meminit vmemmap_populate_print_last(void) /** * alloc_usemap_and_memmap - memory alloction for pageblock flags and vmemmap - * @map: usemap_map for pageblock flags or mmap_map for vmemmap + * @map: usage_map for mem_section_usage or mmap_map for vmemmap */ static void __init alloc_usemap_and_memmap(void (*alloc_func) (void *, unsigned long, unsigned long, @@ -521,10 +612,9 @@ static void __init alloc_usemap_and_memmap(void (*alloc_func) */ void __init sparse_init(void) { + struct mem_section_usage *usage, **usage_map; unsigned long pnum; struct page *map; - unsigned long *usemap; - unsigned long **usemap_map; int size; #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER int size2; @@ -539,21 +629,21 @@ void __init sparse_init(void) /* * map is using big page (aka 2M in x86 64 bit) - * usemap is less one page (aka 24 bytes) + * usage is less one page (aka 24 bytes) * so alloc 2M (with 2M align) and 24 bytes in turn will * make next 2M slip to one more 2M later. * then in big system, the memory will have a lot of holes... * here try to allocate 2M pages continuously. * * powerpc need to call sparse_init_one_section right after each - * sparse_early_mem_map_alloc, so allocate usemap_map at first. + * sparse_early_mem_map_alloc, so allocate usage_map at first. */ - size = sizeof(unsigned long *) * NR_MEM_SECTIONS; - usemap_map = memblock_virt_alloc(size, 0); - if (!usemap_map) - panic("can not allocate usemap_map\n"); + size = sizeof(struct mem_section_usage *) * NR_MEM_SECTIONS; + usage_map = memblock_virt_alloc(size, 0); + if (!usage_map) + panic("can not allocate usage_map\n"); alloc_usemap_and_memmap(sparse_early_usemaps_alloc_node, - (void *)usemap_map); + (void *)usage_map); #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER size2 = sizeof(struct page *) * NR_MEM_SECTIONS; @@ -568,8 +658,8 @@ void __init sparse_init(void) if (!present_section_nr(pnum)) continue; - usemap = usemap_map[pnum]; - if (!usemap) + usage = usage_map[pnum]; + if (!usage) continue; #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER @@ -581,7 +671,7 @@ void __init sparse_init(void) continue; sparse_init_one_section(__nr_to_section(pnum), pnum, map, - usemap); + usage); } vmemmap_populate_print_last(); @@ -589,20 +679,21 @@ void __init sparse_init(void) #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER memblock_free_early(__pa(map_map), size2); #endif - memblock_free_early(__pa(usemap_map), size); + memblock_free_early(__pa(usage_map), size); } #ifdef CONFIG_MEMORY_HOTPLUG #ifdef CONFIG_SPARSEMEM_VMEMMAP -static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid) +static struct page *populate_section_memmap(unsigned long pfn, + unsigned long nr_pages, int nid) { - /* This will make the necessary allocations eventually. */ - return sparse_mem_map_populate(pnum, nid); + return __populate_section_memmap(pfn, nr_pages, nid); } -static void __kfree_section_memmap(struct page *memmap) + +static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages) { - unsigned long start = (unsigned long)memmap; - unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION); + unsigned long start = (unsigned long) pfn_to_page(pfn); + unsigned long end = start + nr_pages * sizeof(struct page); vmemmap_free(start, end); } @@ -616,11 +707,18 @@ static void free_map_bootmem(struct page *memmap) } #endif /* CONFIG_MEMORY_HOTREMOVE */ #else -static struct page *__kmalloc_section_memmap(void) +struct page *populate_section_memmap(unsigned long pfn, + unsigned long nr_pages, int nid) { struct page *page, *ret; unsigned long memmap_size = sizeof(struct page) * PAGES_PER_SECTION; + if ((pfn & ~PAGE_SECTION_MASK) || nr_pages != PAGES_PER_SECTION) { + WARN(1, "%s: called with section unaligned parameters\n", + __func__); + return NULL; + } + page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size)); if (page) goto got_map_page; @@ -637,13 +735,16 @@ got_map_ptr: return ret; } -static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid) +static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages) { - return __kmalloc_section_memmap(); -} + struct page *memmap = pfn_to_page(pfn); + + if ((pfn & ~PAGE_SECTION_MASK) || nr_pages != PAGES_PER_SECTION) { + WARN(1, "%s: called with section unaligned parameters\n", + __func__); + return; + } -static void __kfree_section_memmap(struct page *memmap) -{ if (is_vmalloc_addr(memmap)) vfree(memmap); else @@ -662,12 +763,12 @@ static void free_map_bootmem(struct page *memmap) >> PAGE_SHIFT; for (i = 0; i < nr_pages; i++, page++) { - magic = (unsigned long) page->lru.next; + magic = (unsigned long) page->freelist; BUG_ON(magic == NODE_INFO); maps_section_nr = pfn_to_section_nr(page_to_pfn(page)); - removing_section_nr = page->private; + removing_section_nr = page_private(page); /* * When this function is called, the removing section is @@ -684,18 +785,179 @@ static void free_map_bootmem(struct page *memmap) #endif /* CONFIG_MEMORY_HOTREMOVE */ #endif /* CONFIG_SPARSEMEM_VMEMMAP */ -/* - * returns the number of sections whose mem_maps were properly - * set. If this is <=0, then that means that the passed-in - * map was not consumed and must be freed. +static bool is_early_section(struct mem_section *ms) +{ + struct page *usemap_page; + + usemap_page = virt_to_page(ms->usage->pageblock_flags); + if (PageSlab(usemap_page) || PageCompound(usemap_page)) + return false; + else + return true; + +} + +#ifndef CONFIG_MEMORY_HOTREMOVE +static void free_map_bootmem(struct page *memmap) +{ +} +#endif + +static void section_deactivate(struct pglist_data *pgdat, unsigned long pfn, + unsigned long nr_pages) +{ + bool early_section; + struct page *memmap = NULL; + struct mem_section_usage *usage = NULL; + int section_nr = pfn_to_section_nr(pfn); + struct mem_section *ms = __nr_to_section(section_nr); + unsigned long mask = section_active_mask(pfn, nr_pages), flags; + + pgdat_resize_lock(pgdat, &flags); + if (!ms->usage || + WARN((ms->usage->map_active & mask) != mask, + "section already deactivated active: %#lx mask: %#lx\n", + ms->usage->map_active, mask)) { + pgdat_resize_unlock(pgdat, &flags); + return; + } + + early_section = is_early_section(ms); + ms->usage->map_active ^= mask; + if (ms->usage->map_active == 0) { + usage = ms->usage; + ms->usage = NULL; + memmap = sparse_decode_mem_map(ms->section_mem_map, + section_nr); + ms->section_mem_map = 0; + } + + pgdat_resize_unlock(pgdat, &flags); + + /* + * There are 3 cases to handle across two configurations + * (SPARSEMEM_VMEMMAP={y,n}): + * + * 1/ deactivation of a partial hot-added section (only possible + * in the SPARSEMEM_VMEMMAP=y case). + * a/ section was present at memory init + * b/ section was hot-added post memory init + * 2/ deactivation of a complete hot-added section + * 3/ deactivation of a complete section from memory init + * + * For 1/, when map_active does not go to zero we will not be + * freeing the usage map, but still need to free the vmemmap + * range. + * + * For 2/ and 3/ the SPARSEMEM_VMEMMAP={y,n} cases are unified + */ + if (!mask) + return; + if (nr_pages < PAGES_PER_SECTION) { + if (!IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP)) { + WARN(1, "partial memory section removal not supported\n"); + return; + } + if (!early_section) + depopulate_section_memmap(pfn, nr_pages); + memmap = 0; + } + + if (usage) { + if (!early_section) { + /* + * 'memmap' may be zero in the SPARSEMEM_VMEMMAP=y case + * (see sparse_init_one_section()), so we can't rely on + * it to determine if we need to depopulate the memmap. + * Instead, we uncoditionally depopulate due to 'usage' + * being valid. + */ + if (memmap || (nr_pages >= PAGES_PER_SECTION + && IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP))) + depopulate_section_memmap(pfn, nr_pages); + kfree(usage); + return; + } + } + + /* + * The usemap came from bootmem. This is packed with other usemaps + * on the section which has pgdat at boot time. Just keep it as is now. + */ + if (memmap) + free_map_bootmem(memmap); +} + +static struct page * __meminit section_activate(struct pglist_data *pgdat, + unsigned long pfn, unsigned nr_pages) +{ + struct mem_section *ms = __nr_to_section(pfn_to_section_nr(pfn)); + unsigned long mask = section_active_mask(pfn, nr_pages), flags; + struct mem_section_usage *usage; + bool early_section = false; + struct page *memmap; + int rc = 0; + + usage = __alloc_section_usage(); + if (!usage) + return ERR_PTR(-ENOMEM); + + pgdat_resize_lock(pgdat, &flags); + if (!ms->usage) { + ms->usage = usage; + usage = NULL; + } else + early_section = is_early_section(ms); + + if (!mask) + rc = -EINVAL; + else if (mask & ms->usage->map_active) + rc = -EBUSY; + else + ms->usage->map_active |= mask; + pgdat_resize_unlock(pgdat, &flags); + + kfree(usage); + + if (rc) + return ERR_PTR(rc); + + + /* + * The early init code does not consider partially populated + * initial sections, it simply assumes that memory will never be + * referenced. If we hot-add memory into such a section then we + * do not need to populate the memmap and can simply reuse what + * is already there. + */ + if (nr_pages < PAGES_PER_SECTION && early_section) + return pfn_to_page(pfn); + + memmap = populate_section_memmap(pfn, nr_pages, pgdat->node_id); + if (!memmap) { + section_deactivate(pgdat, pfn, nr_pages); + return ERR_PTR(-ENOMEM); + } + + return memmap; +} + +/** + * sparse_add_section() - create a new memmap section, or populate an + * existing one + * @zone: host zone for the new memory mapping + * @start_pfn: first pfn to add (section aligned if zone != ZONE_DEVICE) + * @nr_pages: number of new pages to add + * + * Returns 0 on success. */ -int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn) +int __meminit sparse_add_section(struct zone *zone, unsigned long start_pfn, + unsigned long nr_pages) { unsigned long section_nr = pfn_to_section_nr(start_pfn); struct pglist_data *pgdat = zone->zone_pgdat; struct mem_section *ms; struct page *memmap; - unsigned long *usemap; unsigned long flags; int ret; @@ -706,36 +968,28 @@ int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn) ret = sparse_index_init(section_nr, pgdat->node_id); if (ret < 0 && ret != -EEXIST) return ret; - memmap = kmalloc_section_memmap(section_nr, pgdat->node_id); - if (!memmap) - return -ENOMEM; - usemap = __kmalloc_section_usemap(); - if (!usemap) { - __kfree_section_memmap(memmap); - return -ENOMEM; - } - pgdat_resize_lock(pgdat, &flags); + memmap = section_activate(pgdat, start_pfn, nr_pages); + if (IS_ERR(memmap)) + return PTR_ERR(memmap); + pgdat_resize_lock(pgdat, &flags); ms = __pfn_to_section(start_pfn); - if (ms->section_mem_map & SECTION_MARKED_PRESENT) { - ret = -EEXIST; + if (nr_pages == PAGES_PER_SECTION && (ms->section_mem_map + & SECTION_MARKED_PRESENT)) { + ret = -EBUSY; goto out; } - - memset(memmap, 0, sizeof(struct page) * PAGES_PER_SECTION); - ms->section_mem_map |= SECTION_MARKED_PRESENT; - - ret = sparse_init_one_section(ms, section_nr, memmap, usemap); - + sparse_init_one_section(ms, section_nr, memmap, ms->usage); out: pgdat_resize_unlock(pgdat, &flags); - if (ret <= 0) { - kfree(usemap); - __kfree_section_memmap(memmap); + if (nr_pages == PAGES_PER_SECTION && ret < 0 && ret != -EEXIST) { + section_deactivate(pgdat, start_pfn, nr_pages); + return ret; } - return ret; + memset(memmap, 0, sizeof(struct page) * nr_pages); + return 0; } #ifdef CONFIG_MEMORY_HOTREMOVE @@ -760,53 +1014,15 @@ static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages) } #endif -static void free_section_usemap(struct page *memmap, unsigned long *usemap) -{ - struct page *usemap_page; - - if (!usemap) - return; - - usemap_page = virt_to_page(usemap); - /* - * Check to see if allocation came from hot-plug-add - */ - if (PageSlab(usemap_page) || PageCompound(usemap_page)) { - kfree(usemap); - if (memmap) - __kfree_section_memmap(memmap); - return; - } - - /* - * The usemap came from bootmem. This is packed with other usemaps - * on the section which has pgdat at boot time. Just keep it as is now. - */ - - if (memmap) - free_map_bootmem(memmap); -} - -void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, +void sparse_remove_section(struct zone *zone, struct mem_section *ms, + unsigned long pfn, unsigned long nr_pages, unsigned long map_offset) { - struct page *memmap = NULL; - unsigned long *usemap = NULL, flags; struct pglist_data *pgdat = zone->zone_pgdat; - pgdat_resize_lock(pgdat, &flags); - if (ms->section_mem_map) { - usemap = ms->pageblock_flags; - memmap = sparse_decode_mem_map(ms->section_mem_map, - __section_nr(ms)); - ms->section_mem_map = 0; - ms->pageblock_flags = NULL; - } - pgdat_resize_unlock(pgdat, &flags); - - clear_hwpoisoned_pages(memmap + map_offset, - PAGES_PER_SECTION - map_offset); - free_section_usemap(memmap, usemap); + clear_hwpoisoned_pages(pfn_to_page(pfn) + map_offset, + nr_pages - map_offset); + section_deactivate(pgdat, pfn, nr_pages); } #endif /* CONFIG_MEMORY_HOTREMOVE */ #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/mm/swap.c b/mm/swap.c index 844baedd2429..c4910f14f957 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -209,9 +209,10 @@ static void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec, { int *pgmoved = arg; - if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { - enum lru_list lru = page_lru_base_type(page); - list_move_tail(&page->lru, &lruvec->lists[lru]); + if (PageLRU(page) && !PageUnevictable(page)) { + del_page_from_lru_list(page, lruvec, page_lru(page)); + ClearPageActive(page); + add_page_to_lru_list_tail(page, lruvec, page_lru(page)); (*pgmoved)++; } } @@ -235,7 +236,7 @@ static void pagevec_move_tail(struct pagevec *pvec) */ void rotate_reclaimable_page(struct page *page) { - if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) && + if (!PageLocked(page) && !PageDirty(page) && !PageUnevictable(page) && PageLRU(page)) { struct pagevec *pvec; unsigned long flags; @@ -971,12 +972,6 @@ EXPORT_SYMBOL(pagevec_lookup_tag); void __init swap_setup(void) { unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT); -#ifdef CONFIG_SWAP - int i; - - for (i = 0; i < MAX_SWAPFILES; i++) - spin_lock_init(&swapper_spaces[i].tree_lock); -#endif /* Use a smaller cluster for small-memory machines */ if (megs < 16) diff --git a/mm/swap_slots.c b/mm/swap_slots.c new file mode 100644 index 000000000000..9b5bc86f96ad --- /dev/null +++ b/mm/swap_slots.c @@ -0,0 +1,342 @@ +/* + * Manage cache of swap slots to be used for and returned from + * swap. + * + * Copyright(c) 2016 Intel Corporation. + * + * Author: Tim Chen <tim.c.chen@linux.intel.com> + * + * We allocate the swap slots from the global pool and put + * it into local per cpu caches. This has the advantage + * of no needing to acquire the swap_info lock every time + * we need a new slot. + * + * There is also opportunity to simply return the slot + * to local caches without needing to acquire swap_info + * lock. We do not reuse the returned slots directly but + * move them back to the global pool in a batch. This + * allows the slots to coaellesce and reduce fragmentation. + * + * The swap entry allocated is marked with SWAP_HAS_CACHE + * flag in map_count that prevents it from being allocated + * again from the global pool. + * + * The swap slots cache is protected by a mutex instead of + * a spin lock as when we search for slots with scan_swap_map, + * we can possibly sleep. + */ + +#include <linux/swap_slots.h> +#include <linux/cpu.h> +#include <linux/cpumask.h> +#include <linux/vmalloc.h> +#include <linux/mutex.h> + +#ifdef CONFIG_SWAP + +static DEFINE_PER_CPU(struct swap_slots_cache, swp_slots); +static bool swap_slot_cache_active; +bool swap_slot_cache_enabled; +static bool swap_slot_cache_initialized; +DEFINE_MUTEX(swap_slots_cache_mutex); +/* Serialize swap slots cache enable/disable operations */ +DEFINE_MUTEX(swap_slots_cache_enable_mutex); + +static void __drain_swap_slots_cache(unsigned int type); +static void deactivate_swap_slots_cache(void); +static void reactivate_swap_slots_cache(void); + +#define use_swap_slot_cache (swap_slot_cache_active && \ + swap_slot_cache_enabled && swap_slot_cache_initialized) +#define SLOTS_CACHE 0x1 +#define SLOTS_CACHE_RET 0x2 + +static void deactivate_swap_slots_cache(void) +{ + mutex_lock(&swap_slots_cache_mutex); + swap_slot_cache_active = false; + __drain_swap_slots_cache(SLOTS_CACHE|SLOTS_CACHE_RET); + mutex_unlock(&swap_slots_cache_mutex); +} + +static void reactivate_swap_slots_cache(void) +{ + mutex_lock(&swap_slots_cache_mutex); + swap_slot_cache_active = true; + mutex_unlock(&swap_slots_cache_mutex); +} + +/* Must not be called with cpu hot plug lock */ +void disable_swap_slots_cache_lock(void) +{ + mutex_lock(&swap_slots_cache_enable_mutex); + swap_slot_cache_enabled = false; + if (swap_slot_cache_initialized) { + /* serialize with cpu hotplug operations */ + get_online_cpus(); + __drain_swap_slots_cache(SLOTS_CACHE|SLOTS_CACHE_RET); + put_online_cpus(); + } +} + +static void __reenable_swap_slots_cache(void) +{ + swap_slot_cache_enabled = has_usable_swap(); +} + +void reenable_swap_slots_cache_unlock(void) +{ + __reenable_swap_slots_cache(); + mutex_unlock(&swap_slots_cache_enable_mutex); +} + +static bool check_cache_active(void) +{ + long pages; + + if (!swap_slot_cache_enabled || !swap_slot_cache_initialized) + return false; + + pages = get_nr_swap_pages(); + if (!swap_slot_cache_active) { + if (pages > num_online_cpus() * + THRESHOLD_ACTIVATE_SWAP_SLOTS_CACHE) + reactivate_swap_slots_cache(); + goto out; + } + + /* if global pool of slot caches too low, deactivate cache */ + if (pages < num_online_cpus() * THRESHOLD_DEACTIVATE_SWAP_SLOTS_CACHE) + deactivate_swap_slots_cache(); +out: + return swap_slot_cache_active; +} + +static int alloc_swap_slot_cache(unsigned int cpu) +{ + struct swap_slots_cache *cache; + swp_entry_t *slots, *slots_ret; + + /* + * Do allocation outside swap_slots_cache_mutex + * as vzalloc could trigger reclaim and get_swap_page, + * which can lock swap_slots_cache_mutex. + */ + slots = vzalloc(sizeof(swp_entry_t) * SWAP_SLOTS_CACHE_SIZE); + if (!slots) + return -ENOMEM; + + slots_ret = vzalloc(sizeof(swp_entry_t) * SWAP_SLOTS_CACHE_SIZE); + if (!slots_ret) { + vfree(slots); + return -ENOMEM; + } + + mutex_lock(&swap_slots_cache_mutex); + cache = &per_cpu(swp_slots, cpu); + if (cache->slots || cache->slots_ret) + /* cache already allocated */ + goto out; + if (!cache->lock_initialized) { + mutex_init(&cache->alloc_lock); + spin_lock_init(&cache->free_lock); + cache->lock_initialized = true; + } + cache->nr = 0; + cache->cur = 0; + cache->n_ret = 0; + cache->slots = slots; + slots = NULL; + cache->slots_ret = slots_ret; + slots_ret = NULL; +out: + mutex_unlock(&swap_slots_cache_mutex); + if (slots) + vfree(slots); + if (slots_ret) + vfree(slots_ret); + return 0; +} + +static void drain_slots_cache_cpu(unsigned int cpu, unsigned int type, + bool free_slots) +{ + struct swap_slots_cache *cache; + swp_entry_t *slots = NULL; + + cache = &per_cpu(swp_slots, cpu); + if ((type & SLOTS_CACHE) && cache->slots) { + mutex_lock(&cache->alloc_lock); + swapcache_free_entries(cache->slots + cache->cur, cache->nr); + cache->cur = 0; + cache->nr = 0; + if (free_slots && cache->slots) { + vfree(cache->slots); + cache->slots = NULL; + } + mutex_unlock(&cache->alloc_lock); + } + if ((type & SLOTS_CACHE_RET) && cache->slots_ret) { + spin_lock_irq(&cache->free_lock); + swapcache_free_entries(cache->slots_ret, cache->n_ret); + cache->n_ret = 0; + if (free_slots && cache->slots_ret) { + slots = cache->slots_ret; + cache->slots_ret = NULL; + } + spin_unlock_irq(&cache->free_lock); + if (slots) + vfree(slots); + } +} + +static void __drain_swap_slots_cache(unsigned int type) +{ + unsigned int cpu; + + /* + * This function is called during + * 1) swapoff, when we have to make sure no + * left over slots are in cache when we remove + * a swap device; + * 2) disabling of swap slot cache, when we run low + * on swap slots when allocating memory and need + * to return swap slots to global pool. + * + * We cannot acquire cpu hot plug lock here as + * this function can be invoked in the cpu + * hot plug path: + * cpu_up -> lock cpu_hotplug -> cpu hotplug state callback + * -> memory allocation -> direct reclaim -> get_swap_page + * -> drain_swap_slots_cache + * + * Hence the loop over current online cpu below could miss cpu that + * is being brought online but not yet marked as online. + * That is okay as we do not schedule and run anything on a + * cpu before it has been marked online. Hence, we will not + * fill any swap slots in slots cache of such cpu. + * There are no slots on such cpu that need to be drained. + */ + for_each_online_cpu(cpu) + drain_slots_cache_cpu(cpu, type, false); +} + +static int free_slot_cache(unsigned int cpu) +{ + mutex_lock(&swap_slots_cache_mutex); + drain_slots_cache_cpu(cpu, SLOTS_CACHE | SLOTS_CACHE_RET, true); + mutex_unlock(&swap_slots_cache_mutex); + return 0; +} + +int enable_swap_slots_cache(void) +{ + int ret = 0; + + mutex_lock(&swap_slots_cache_enable_mutex); + if (swap_slot_cache_initialized) { + __reenable_swap_slots_cache(); + goto out_unlock; + } + + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "swap_slots_cache", + alloc_swap_slot_cache, free_slot_cache); + if (ret < 0) + goto out_unlock; + swap_slot_cache_initialized = true; + __reenable_swap_slots_cache(); +out_unlock: + mutex_unlock(&swap_slots_cache_enable_mutex); + return 0; +} + +/* called with swap slot cache's alloc lock held */ +static int refill_swap_slots_cache(struct swap_slots_cache *cache) +{ + if (!use_swap_slot_cache || cache->nr) + return 0; + + cache->cur = 0; + if (swap_slot_cache_active) + cache->nr = get_swap_pages(SWAP_SLOTS_CACHE_SIZE, cache->slots); + + return cache->nr; +} + +int free_swap_slot(swp_entry_t entry) +{ + struct swap_slots_cache *cache; + + BUG_ON(!swap_slot_cache_initialized); + + cache = &get_cpu_var(swp_slots); + if (use_swap_slot_cache && cache->slots_ret) { + spin_lock_irq(&cache->free_lock); + /* Swap slots cache may be deactivated before acquiring lock */ + if (!use_swap_slot_cache) { + spin_unlock_irq(&cache->free_lock); + goto direct_free; + } + if (cache->n_ret >= SWAP_SLOTS_CACHE_SIZE) { + /* + * Return slots to global pool. + * The current swap_map value is SWAP_HAS_CACHE. + * Set it to 0 to indicate it is available for + * allocation in global pool + */ + swapcache_free_entries(cache->slots_ret, cache->n_ret); + cache->n_ret = 0; + } + cache->slots_ret[cache->n_ret++] = entry; + spin_unlock_irq(&cache->free_lock); + } else { +direct_free: + swapcache_free_entries(&entry, 1); + } + put_cpu_var(swp_slots); + + return 0; +} + +swp_entry_t get_swap_page(void) +{ + swp_entry_t entry, *pentry; + struct swap_slots_cache *cache; + + /* + * Preemption is allowed here, because we may sleep + * in refill_swap_slots_cache(). But it is safe, because + * accesses to the per-CPU data structure are protected by the + * mutex cache->alloc_lock. + * + * The alloc path here does not touch cache->slots_ret + * so cache->free_lock is not taken. + */ + cache = raw_cpu_ptr(&swp_slots); + + entry.val = 0; + if (check_cache_active()) { + mutex_lock(&cache->alloc_lock); + if (cache->slots) { +repeat: + if (cache->nr) { + pentry = &cache->slots[cache->cur++]; + entry = *pentry; + pentry->val = 0; + cache->nr--; + } else { + if (refill_swap_slots_cache(cache)) + goto repeat; + } + } + mutex_unlock(&cache->alloc_lock); + if (entry.val) + return entry; + } + + get_swap_pages(1, &entry); + + return entry; +} + +#endif /* CONFIG_SWAP */ diff --git a/mm/swap_state.c b/mm/swap_state.c index 35d7e0ee1c77..2126e9ba23b2 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -17,6 +17,8 @@ #include <linux/blkdev.h> #include <linux/pagevec.h> #include <linux/migrate.h> +#include <linux/vmalloc.h> +#include <linux/swap_slots.h> #include <asm/pgtable.h> @@ -32,15 +34,8 @@ static const struct address_space_operations swap_aops = { #endif }; -struct address_space swapper_spaces[MAX_SWAPFILES] = { - [0 ... MAX_SWAPFILES - 1] = { - .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN), - .i_mmap_writable = ATOMIC_INIT(0), - .a_ops = &swap_aops, - /* swap cache doesn't use writeback related tags */ - .flags = 1 << AS_NO_WRITEBACK_TAGS, - } -}; +struct address_space *swapper_spaces[MAX_SWAPFILES]; +static unsigned int nr_swapper_spaces[MAX_SWAPFILES]; #define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0) @@ -53,11 +48,26 @@ static struct { unsigned long total_swapcache_pages(void) { - int i; + unsigned int i, j, nr; unsigned long ret = 0; + struct address_space *spaces; - for (i = 0; i < MAX_SWAPFILES; i++) - ret += swapper_spaces[i].nrpages; + rcu_read_lock(); + for (i = 0; i < MAX_SWAPFILES; i++) { + /* + * The corresponding entries in nr_swapper_spaces and + * swapper_spaces will be reused only after at least + * one grace period. So it is impossible for them + * belongs to different usage. + */ + nr = nr_swapper_spaces[i]; + spaces = rcu_dereference(swapper_spaces[i]); + if (!nr || !spaces) + continue; + for (j = 0; j < nr; j++) + ret += spaces[j].nrpages; + } + rcu_read_unlock(); return ret; } @@ -315,6 +325,17 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, break; /* + * Just skip read ahead for unused swap slot. + * During swap_off when swap_slot_cache is disabled, + * we have to handle the race between putting + * swap entry in swap cache and marking swap slot + * as SWAP_HAS_CACHE. That's done in later part of code or + * else swap_off will be aborted if we return NULL. + */ + if (!__swp_swapcount(entry) && swap_slot_cache_enabled) + return NULL; + + /* * Get a new page to read into from swap. */ if (!new_page) { @@ -505,3 +526,38 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, skip: return read_swap_cache_async(entry, gfp_mask, vma, addr); } + +int init_swap_address_space(unsigned int type, unsigned long nr_pages) +{ + struct address_space *spaces, *space; + unsigned int i, nr; + + nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES); + spaces = vzalloc(sizeof(struct address_space) * nr); + if (!spaces) + return -ENOMEM; + for (i = 0; i < nr; i++) { + space = spaces + i; + INIT_RADIX_TREE(&space->page_tree, GFP_ATOMIC|__GFP_NOWARN); + atomic_set(&space->i_mmap_writable, 0); + space->a_ops = &swap_aops; + /* swap cache doesn't use writeback related tags */ + mapping_set_no_writeback_tags(space); + spin_lock_init(&space->tree_lock); + } + nr_swapper_spaces[type] = nr; + rcu_assign_pointer(swapper_spaces[type], spaces); + + return 0; +} + +void exit_swap_address_space(unsigned int type) +{ + struct address_space *spaces; + + spaces = swapper_spaces[type]; + nr_swapper_spaces[type] = 0; + rcu_assign_pointer(swapper_spaces[type], NULL); + synchronize_rcu(); + kvfree(spaces); +} diff --git a/mm/swapfile.c b/mm/swapfile.c index 4761701d1721..32baef0473a8 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -34,6 +34,7 @@ #include <linux/frontswap.h> #include <linux/swapfile.h> #include <linux/export.h> +#include <linux/swap_slots.h> #include <asm/pgtable.h> #include <asm/tlbflush.h> @@ -257,6 +258,52 @@ static inline void cluster_set_null(struct swap_cluster_info *info) info->data = 0; } +static inline void __lock_cluster(struct swap_cluster_info *ci) +{ + spin_lock(&ci->lock); +} + +static inline struct swap_cluster_info *lock_cluster(struct swap_info_struct *si, + unsigned long offset) +{ + struct swap_cluster_info *ci; + + ci = si->cluster_info; + if (ci) { + ci += offset / SWAPFILE_CLUSTER; + __lock_cluster(ci); + } + return ci; +} + +static inline void unlock_cluster(struct swap_cluster_info *ci) +{ + if (ci) + spin_unlock(&ci->lock); +} + +static inline struct swap_cluster_info *lock_cluster_or_swap_info( + struct swap_info_struct *si, + unsigned long offset) +{ + struct swap_cluster_info *ci; + + ci = lock_cluster(si, offset); + if (!ci) + spin_lock(&si->lock); + + return ci; +} + +static inline void unlock_cluster_or_swap_info(struct swap_info_struct *si, + struct swap_cluster_info *ci) +{ + if (ci) + unlock_cluster(ci); + else + spin_unlock(&si->lock); +} + static inline bool cluster_list_empty(struct swap_cluster_list *list) { return cluster_is_null(&list->head); @@ -281,9 +328,17 @@ static void cluster_list_add_tail(struct swap_cluster_list *list, cluster_set_next_flag(&list->head, idx, 0); cluster_set_next_flag(&list->tail, idx, 0); } else { + struct swap_cluster_info *ci_tail; unsigned int tail = cluster_next(&list->tail); - cluster_set_next(&ci[tail], idx); + /* + * Nested cluster lock, but both cluster locks are + * only acquired when we held swap_info_struct->lock + */ + ci_tail = ci + tail; + __lock_cluster(ci_tail); + cluster_set_next(ci_tail, idx); + unlock_cluster(ci_tail); cluster_set_next_flag(&list->tail, idx, 0); } } @@ -328,7 +383,7 @@ static void swap_cluster_schedule_discard(struct swap_info_struct *si, */ static void swap_do_scheduled_discard(struct swap_info_struct *si) { - struct swap_cluster_info *info; + struct swap_cluster_info *info, *ci; unsigned int idx; info = si->cluster_info; @@ -341,10 +396,14 @@ static void swap_do_scheduled_discard(struct swap_info_struct *si) SWAPFILE_CLUSTER); spin_lock(&si->lock); - cluster_set_flag(&info[idx], CLUSTER_FLAG_FREE); + ci = lock_cluster(si, idx * SWAPFILE_CLUSTER); + cluster_set_flag(ci, CLUSTER_FLAG_FREE); + unlock_cluster(ci); cluster_list_add_tail(&si->free_clusters, info, idx); + ci = lock_cluster(si, idx * SWAPFILE_CLUSTER); memset(si->swap_map + idx * SWAPFILE_CLUSTER, 0, SWAPFILE_CLUSTER); + unlock_cluster(ci); } } @@ -443,12 +502,13 @@ scan_swap_map_ssd_cluster_conflict(struct swap_info_struct *si, * Try to get a swap entry from current cpu's swap entry pool (a cluster). This * might involve allocating a new cluster for current CPU too. */ -static void scan_swap_map_try_ssd_cluster(struct swap_info_struct *si, +static bool scan_swap_map_try_ssd_cluster(struct swap_info_struct *si, unsigned long *offset, unsigned long *scan_base) { struct percpu_cluster *cluster; + struct swap_cluster_info *ci; bool found_free; - unsigned long tmp; + unsigned long tmp, max; new_cluster: cluster = this_cpu_ptr(si->percpu_cluster); @@ -466,7 +526,7 @@ new_cluster: *scan_base = *offset = si->cluster_next; goto new_cluster; } else - return; + return false; } found_free = false; @@ -476,14 +536,21 @@ new_cluster: * check if there is still free entry in the cluster */ tmp = cluster->next; - while (tmp < si->max && tmp < (cluster_next(&cluster->index) + 1) * - SWAPFILE_CLUSTER) { + max = min_t(unsigned long, si->max, + (cluster_next(&cluster->index) + 1) * SWAPFILE_CLUSTER); + if (tmp >= max) { + cluster_set_null(&cluster->index); + goto new_cluster; + } + ci = lock_cluster(si, tmp); + while (tmp < max) { if (!si->swap_map[tmp]) { found_free = true; break; } tmp++; } + unlock_cluster(ci); if (!found_free) { cluster_set_null(&cluster->index); goto new_cluster; @@ -491,15 +558,22 @@ new_cluster: cluster->next = tmp + 1; *offset = tmp; *scan_base = tmp; + return found_free; } -static unsigned long scan_swap_map(struct swap_info_struct *si, - unsigned char usage) +static int scan_swap_map_slots(struct swap_info_struct *si, + unsigned char usage, int nr, + swp_entry_t slots[]) { + struct swap_cluster_info *ci; unsigned long offset; unsigned long scan_base; unsigned long last_in_cluster = 0; int latency_ration = LATENCY_LIMIT; + int n_ret = 0; + + if (nr > SWAP_BATCH) + nr = SWAP_BATCH; /* * We try to cluster swap pages by allocating them sequentially @@ -517,8 +591,10 @@ static unsigned long scan_swap_map(struct swap_info_struct *si, /* SSD algorithm */ if (si->cluster_info) { - scan_swap_map_try_ssd_cluster(si, &offset, &scan_base); - goto checks; + if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base)) + goto checks; + else + goto scan; } if (unlikely(!si->cluster_nr--)) { @@ -562,8 +638,14 @@ static unsigned long scan_swap_map(struct swap_info_struct *si, checks: if (si->cluster_info) { - while (scan_swap_map_ssd_cluster_conflict(si, offset)) - scan_swap_map_try_ssd_cluster(si, &offset, &scan_base); + while (scan_swap_map_ssd_cluster_conflict(si, offset)) { + /* take a break if we already got some slots */ + if (n_ret) + goto done; + if (!scan_swap_map_try_ssd_cluster(si, &offset, + &scan_base)) + goto scan; + } } if (!(si->flags & SWP_WRITEOK)) goto no_page; @@ -572,9 +654,11 @@ checks: if (offset > si->highest_bit) scan_base = offset = si->lowest_bit; + ci = lock_cluster(si, offset); /* reuse swap entry of cache-only swap if not busy. */ if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) { int swap_was_freed; + unlock_cluster(ci); spin_unlock(&si->lock); swap_was_freed = __try_to_reclaim_swap(si, offset); spin_lock(&si->lock); @@ -584,8 +668,13 @@ checks: goto scan; /* check next one */ } - if (si->swap_map[offset]) - goto scan; + if (si->swap_map[offset]) { + unlock_cluster(ci); + if (!n_ret) + goto scan; + else + goto done; + } if (offset == si->lowest_bit) si->lowest_bit++; @@ -601,10 +690,45 @@ checks: } si->swap_map[offset] = usage; inc_cluster_info_page(si, si->cluster_info, offset); + unlock_cluster(ci); si->cluster_next = offset + 1; - si->flags -= SWP_SCANNING; + slots[n_ret++] = swp_entry(si->type, offset); + + /* got enough slots or reach max slots? */ + if ((n_ret == nr) || (offset >= si->highest_bit)) + goto done; - return offset; + /* search for next available slot */ + + /* time to take a break? */ + if (unlikely(--latency_ration < 0)) { + if (n_ret) + goto done; + spin_unlock(&si->lock); + cond_resched(); + spin_lock(&si->lock); + latency_ration = LATENCY_LIMIT; + } + + /* try to get more slots in cluster */ + if (si->cluster_info) { + if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base)) + goto checks; + else + goto done; + } + /* non-ssd case */ + ++offset; + + /* non-ssd case, still more slots in cluster? */ + if (si->cluster_nr && !si->swap_map[offset]) { + --si->cluster_nr; + goto checks; + } + +done: + si->flags -= SWP_SCANNING; + return n_ret; scan: spin_unlock(&si->lock); @@ -642,17 +766,41 @@ scan: no_page: si->flags -= SWP_SCANNING; - return 0; + return n_ret; +} + +static unsigned long scan_swap_map(struct swap_info_struct *si, + unsigned char usage) +{ + swp_entry_t entry; + int n_ret; + + n_ret = scan_swap_map_slots(si, usage, 1, &entry); + + if (n_ret) + return swp_offset(entry); + else + return 0; + } -swp_entry_t get_swap_page(void) +int get_swap_pages(int n_goal, swp_entry_t swp_entries[]) { struct swap_info_struct *si, *next; - pgoff_t offset; + long avail_pgs; + int n_ret = 0; - if (atomic_long_read(&nr_swap_pages) <= 0) + avail_pgs = atomic_long_read(&nr_swap_pages); + if (avail_pgs <= 0) goto noswap; - atomic_long_dec(&nr_swap_pages); + + if (n_goal > SWAP_BATCH) + n_goal = SWAP_BATCH; + + if (n_goal > avail_pgs) + n_goal = avail_pgs; + + atomic_long_sub(n_goal, &nr_swap_pages); spin_lock(&swap_avail_lock); @@ -678,14 +826,14 @@ start_over: spin_unlock(&si->lock); goto nextsi; } - - /* This is called for allocating swap entry for cache */ - offset = scan_swap_map(si, SWAP_HAS_CACHE); + n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE, + n_goal, swp_entries); spin_unlock(&si->lock); - if (offset) - return swp_entry(si->type, offset); + if (n_ret) + goto check_out; pr_debug("scan_swap_map of si %d failed to find offset\n", - si->type); + si->type); + spin_lock(&swap_avail_lock); nextsi: /* @@ -696,7 +844,8 @@ nextsi: * up between us dropping swap_avail_lock and taking si->lock. * Since we dropped the swap_avail_lock, the swap_avail_head * list may have been modified; so if next is still in the - * swap_avail_head list then try it, otherwise start over. + * swap_avail_head list then try it, otherwise start over + * if we have not gotten any slots. */ if (plist_node_empty(&next->avail_list)) goto start_over; @@ -704,9 +853,11 @@ nextsi: spin_unlock(&swap_avail_lock); - atomic_long_inc(&nr_swap_pages); +check_out: + if (n_ret < n_goal) + atomic_long_add((long) (n_goal-n_ret), &nr_swap_pages); noswap: - return (swp_entry_t) {0}; + return n_ret; } /* The only caller of this function is now suspend routine */ @@ -731,7 +882,7 @@ swp_entry_t get_swap_page_of_type(int type) return (swp_entry_t) {0}; } -static struct swap_info_struct *swap_info_get(swp_entry_t entry) +static struct swap_info_struct *__swap_info_get(swp_entry_t entry) { struct swap_info_struct *p; unsigned long offset, type; @@ -747,34 +898,76 @@ static struct swap_info_struct *swap_info_get(swp_entry_t entry) offset = swp_offset(entry); if (offset >= p->max) goto bad_offset; - if (!p->swap_map[offset]) - goto bad_free; - spin_lock(&p->lock); return p; -bad_free: - pr_err("swap_free: %s%08lx\n", Unused_offset, entry.val); - goto out; bad_offset: - pr_err("swap_free: %s%08lx\n", Bad_offset, entry.val); + pr_err("swap_info_get: %s%08lx\n", Bad_offset, entry.val); goto out; bad_device: - pr_err("swap_free: %s%08lx\n", Unused_file, entry.val); + pr_err("swap_info_get: %s%08lx\n", Unused_file, entry.val); goto out; bad_nofile: - pr_err("swap_free: %s%08lx\n", Bad_file, entry.val); + pr_err("swap_info_get: %s%08lx\n", Bad_file, entry.val); +out: + return NULL; +} + +static struct swap_info_struct *_swap_info_get(swp_entry_t entry) +{ + struct swap_info_struct *p; + + p = __swap_info_get(entry); + if (!p) + goto out; + if (!p->swap_map[swp_offset(entry)]) + goto bad_free; + return p; + +bad_free: + pr_err("swap_info_get: %s%08lx\n", Unused_offset, entry.val); + goto out; out: return NULL; } -static unsigned char swap_entry_free(struct swap_info_struct *p, - swp_entry_t entry, unsigned char usage) +static struct swap_info_struct *swap_info_get(swp_entry_t entry) { + struct swap_info_struct *p; + + p = _swap_info_get(entry); + if (p) + spin_lock(&p->lock); + return p; +} + +static struct swap_info_struct *swap_info_get_cont(swp_entry_t entry, + struct swap_info_struct *q) +{ + struct swap_info_struct *p; + + p = _swap_info_get(entry); + + if (p != q) { + if (q != NULL) + spin_unlock(&q->lock); + if (p != NULL) + spin_lock(&p->lock); + } + return p; +} + +static unsigned char __swap_entry_free(struct swap_info_struct *p, + swp_entry_t entry, unsigned char usage) +{ + struct swap_cluster_info *ci; unsigned long offset = swp_offset(entry); unsigned char count; unsigned char has_cache; + ci = lock_cluster_or_swap_info(p, offset); + count = p->swap_map[offset]; + has_cache = count & SWAP_HAS_CACHE; count &= ~SWAP_HAS_CACHE; @@ -798,38 +991,52 @@ static unsigned char swap_entry_free(struct swap_info_struct *p, } usage = count | has_cache; - p->swap_map[offset] = usage; - - /* free if no reference */ - if (!usage) { - mem_cgroup_uncharge_swap(entry); - dec_cluster_info_page(p, p->cluster_info, offset); - if (offset < p->lowest_bit) - p->lowest_bit = offset; - if (offset > p->highest_bit) { - bool was_full = !p->highest_bit; - p->highest_bit = offset; - if (was_full && (p->flags & SWP_WRITEOK)) { - spin_lock(&swap_avail_lock); - WARN_ON(!plist_node_empty(&p->avail_list)); - if (plist_node_empty(&p->avail_list)) - plist_add(&p->avail_list, - &swap_avail_head); - spin_unlock(&swap_avail_lock); - } - } - atomic_long_inc(&nr_swap_pages); - p->inuse_pages--; - frontswap_invalidate_page(p->type, offset); - if (p->flags & SWP_BLKDEV) { - struct gendisk *disk = p->bdev->bd_disk; - if (disk->fops->swap_slot_free_notify) - disk->fops->swap_slot_free_notify(p->bdev, - offset); + p->swap_map[offset] = usage ? : SWAP_HAS_CACHE; + + unlock_cluster_or_swap_info(p, ci); + + return usage; +} + +static void swap_entry_free(struct swap_info_struct *p, swp_entry_t entry) +{ + struct swap_cluster_info *ci; + unsigned long offset = swp_offset(entry); + unsigned char count; + + ci = lock_cluster(p, offset); + count = p->swap_map[offset]; + VM_BUG_ON(count != SWAP_HAS_CACHE); + p->swap_map[offset] = 0; + dec_cluster_info_page(p, p->cluster_info, offset); + unlock_cluster(ci); + + mem_cgroup_uncharge_swap(entry); + if (offset < p->lowest_bit) + p->lowest_bit = offset; + if (offset > p->highest_bit) { + bool was_full = !p->highest_bit; + + p->highest_bit = offset; + if (was_full && (p->flags & SWP_WRITEOK)) { + spin_lock(&swap_avail_lock); + WARN_ON(!plist_node_empty(&p->avail_list)); + if (plist_node_empty(&p->avail_list)) + plist_add(&p->avail_list, + &swap_avail_head); + spin_unlock(&swap_avail_lock); } } + atomic_long_inc(&nr_swap_pages); + p->inuse_pages--; + frontswap_invalidate_page(p->type, offset); + if (p->flags & SWP_BLKDEV) { + struct gendisk *disk = p->bdev->bd_disk; - return usage; + if (disk->fops->swap_slot_free_notify) + disk->fops->swap_slot_free_notify(p->bdev, + offset); + } } /* @@ -840,10 +1047,10 @@ void swap_free(swp_entry_t entry) { struct swap_info_struct *p; - p = swap_info_get(entry); + p = _swap_info_get(entry); if (p) { - swap_entry_free(p, entry, 1); - spin_unlock(&p->lock); + if (!__swap_entry_free(p, entry, 1)) + free_swap_slot(entry); } } @@ -854,11 +1061,33 @@ void swapcache_free(swp_entry_t entry) { struct swap_info_struct *p; - p = swap_info_get(entry); + p = _swap_info_get(entry); if (p) { - swap_entry_free(p, entry, SWAP_HAS_CACHE); - spin_unlock(&p->lock); + if (!__swap_entry_free(p, entry, SWAP_HAS_CACHE)) + free_swap_slot(entry); + } +} + +void swapcache_free_entries(swp_entry_t *entries, int n) +{ + struct swap_info_struct *p, *prev; + int i; + + if (n <= 0) + return; + + prev = NULL; + p = NULL; + for (i = 0; i < n; ++i) { + p = swap_info_get_cont(entries[i], prev); + if (p) + swap_entry_free(p, entries[i]); + else + break; + prev = p; } + if (p) + spin_unlock(&p->lock); } /* @@ -870,13 +1099,39 @@ int page_swapcount(struct page *page) { int count = 0; struct swap_info_struct *p; + struct swap_cluster_info *ci; swp_entry_t entry; + unsigned long offset; entry.val = page_private(page); - p = swap_info_get(entry); + p = _swap_info_get(entry); if (p) { - count = swap_count(p->swap_map[swp_offset(entry)]); - spin_unlock(&p->lock); + offset = swp_offset(entry); + ci = lock_cluster_or_swap_info(p, offset); + count = swap_count(p->swap_map[offset]); + unlock_cluster_or_swap_info(p, ci); + } + return count; +} + +/* + * How many references to @entry are currently swapped out? + * This does not give an exact answer when swap count is continued, + * but does include the high COUNT_CONTINUED flag to allow for that. + */ +int __swp_swapcount(swp_entry_t entry) +{ + int count = 0; + pgoff_t offset; + struct swap_info_struct *si; + struct swap_cluster_info *ci; + + si = __swap_info_get(entry); + if (si) { + offset = swp_offset(entry); + ci = lock_cluster_or_swap_info(si, offset); + count = swap_count(si->swap_map[offset]); + unlock_cluster_or_swap_info(si, ci); } return count; } @@ -889,22 +1144,26 @@ int swp_swapcount(swp_entry_t entry) { int count, tmp_count, n; struct swap_info_struct *p; + struct swap_cluster_info *ci; struct page *page; pgoff_t offset; unsigned char *map; - p = swap_info_get(entry); + p = _swap_info_get(entry); if (!p) return 0; - count = swap_count(p->swap_map[swp_offset(entry)]); + offset = swp_offset(entry); + + ci = lock_cluster_or_swap_info(p, offset); + + count = swap_count(p->swap_map[offset]); if (!(count & COUNT_CONTINUED)) goto out; count &= ~COUNT_CONTINUED; n = SWAP_MAP_MAX + 1; - offset = swp_offset(entry); page = vmalloc_to_page(p->swap_map + offset); offset &= ~PAGE_MASK; VM_BUG_ON(page_private(page) != SWP_CONTINUED); @@ -919,7 +1178,7 @@ int swp_swapcount(swp_entry_t entry) n *= (SWAP_CONT_MAX + 1); } while (tmp_count & COUNT_CONTINUED); out: - spin_unlock(&p->lock); + unlock_cluster_or_swap_info(p, ci); return count; } @@ -1011,21 +1270,23 @@ int free_swap_and_cache(swp_entry_t entry) { struct swap_info_struct *p; struct page *page = NULL; + unsigned char count; if (non_swap_entry(entry)) return 1; - p = swap_info_get(entry); + p = _swap_info_get(entry); if (p) { - if (swap_entry_free(p, entry, 1) == SWAP_HAS_CACHE) { + count = __swap_entry_free(p, entry, 1); + if (count == SWAP_HAS_CACHE) { page = find_get_page(swap_address_space(entry), swp_offset(entry)); if (page && !trylock_page(page)) { put_page(page); page = NULL; } - } - spin_unlock(&p->lock); + } else if (!count) + free_swap_slot(entry); } if (page) { /* @@ -1853,6 +2114,17 @@ static void reinsert_swap_info(struct swap_info_struct *p) spin_unlock(&swap_lock); } +bool has_usable_swap(void) +{ + bool ret = true; + + spin_lock(&swap_lock); + if (plist_head_empty(&swap_active_head)) + ret = false; + spin_unlock(&swap_lock); + return ret; +} + SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) { struct swap_info_struct *p = NULL; @@ -1923,6 +2195,8 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) spin_unlock(&p->lock); spin_unlock(&swap_lock); + disable_swap_slots_cache_lock(); + set_current_oom_origin(); err = try_to_unuse(p->type, false, 0); /* force unuse all pages */ clear_current_oom_origin(); @@ -1930,9 +2204,12 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) if (err) { /* re-insert swap space back into swap_list */ reinsert_swap_info(p); + reenable_swap_slots_cache_unlock(); goto out_dput; } + reenable_swap_slots_cache_unlock(); + flush_work(&p->discard_work); destroy_swap_extents(p); @@ -1975,6 +2252,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) vfree(frontswap_map); /* Destroy swap account information */ swap_cgroup_swapoff(p->type); + exit_swap_address_space(p->type); inode = mapping->host; if (S_ISBLK(inode->i_mode)) { @@ -2298,6 +2576,13 @@ static unsigned long read_swap_header(struct swap_info_struct *p, return maxpages; } +#define SWAP_CLUSTER_INFO_COLS \ + DIV_ROUND_UP(L1_CACHE_BYTES, sizeof(struct swap_cluster_info)) +#define SWAP_CLUSTER_SPACE_COLS \ + DIV_ROUND_UP(SWAP_ADDRESS_SPACE_PAGES, SWAPFILE_CLUSTER) +#define SWAP_CLUSTER_COLS \ + max_t(unsigned int, SWAP_CLUSTER_INFO_COLS, SWAP_CLUSTER_SPACE_COLS) + static int setup_swap_map_and_extents(struct swap_info_struct *p, union swap_header *swap_header, unsigned char *swap_map, @@ -2305,11 +2590,12 @@ static int setup_swap_map_and_extents(struct swap_info_struct *p, unsigned long maxpages, sector_t *span) { - int i; + unsigned int j, k; unsigned int nr_good_pages; int nr_extents; unsigned long nr_clusters = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER); - unsigned long idx = p->cluster_next / SWAPFILE_CLUSTER; + unsigned long col = p->cluster_next / SWAPFILE_CLUSTER % SWAP_CLUSTER_COLS; + unsigned long i, idx; nr_good_pages = maxpages - 1; /* omit header page */ @@ -2357,15 +2643,23 @@ static int setup_swap_map_and_extents(struct swap_info_struct *p, if (!cluster_info) return nr_extents; - for (i = 0; i < nr_clusters; i++) { - if (!cluster_count(&cluster_info[idx])) { + + /* + * Reduce false cache line sharing between cluster_info and + * sharing same address space. + */ + for (k = 0; k < SWAP_CLUSTER_COLS; k++) { + j = (k + col) % SWAP_CLUSTER_COLS; + for (i = 0; i < DIV_ROUND_UP(nr_clusters, SWAP_CLUSTER_COLS); i++) { + idx = i * SWAP_CLUSTER_COLS + j; + if (idx >= nr_clusters) + continue; + if (cluster_count(&cluster_info[idx])) + continue; cluster_set_flag(&cluster_info[idx], CLUSTER_FLAG_FREE); cluster_list_add_tail(&p->free_clusters, cluster_info, idx); } - idx++; - if (idx == nr_clusters) - idx = 0; } return nr_extents; } @@ -2538,6 +2832,10 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) } } + error = init_swap_address_space(p->type, maxpages); + if (error) + goto bad_swap; + mutex_lock(&swapon_mutex); prio = -1; if (swap_flags & SWAP_FLAG_PREFER) @@ -2593,6 +2891,8 @@ out: putname(name); if (inode && S_ISREG(inode->i_mode)) inode_unlock(inode); + if (!error) + enable_swap_slots_cache(); return error; } @@ -2627,6 +2927,7 @@ void si_swapinfo(struct sysinfo *val) static int __swap_duplicate(swp_entry_t entry, unsigned char usage) { struct swap_info_struct *p; + struct swap_cluster_info *ci; unsigned long offset, type; unsigned char count; unsigned char has_cache; @@ -2640,10 +2941,10 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage) goto bad_file; p = swap_info[type]; offset = swp_offset(entry); - - spin_lock(&p->lock); if (unlikely(offset >= p->max)) - goto unlock_out; + goto out; + + ci = lock_cluster_or_swap_info(p, offset); count = p->swap_map[offset]; @@ -2686,7 +2987,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage) p->swap_map[offset] = count | has_cache; unlock_out: - spin_unlock(&p->lock); + unlock_cluster_or_swap_info(p, ci); out: return err; @@ -2775,6 +3076,7 @@ EXPORT_SYMBOL_GPL(__page_file_index); int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask) { struct swap_info_struct *si; + struct swap_cluster_info *ci; struct page *head; struct page *page; struct page *list_page; @@ -2798,6 +3100,9 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask) } offset = swp_offset(entry); + + ci = lock_cluster(si, offset); + count = si->swap_map[offset] & ~SWAP_HAS_CACHE; if ((count & ~COUNT_CONTINUED) != SWAP_MAP_MAX) { @@ -2810,6 +3115,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask) } if (!page) { + unlock_cluster(ci); spin_unlock(&si->lock); return -ENOMEM; } @@ -2858,6 +3164,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask) list_add_tail(&page->lru, &head->lru); page = NULL; /* now it's attached, don't free it */ out: + unlock_cluster(ci); spin_unlock(&si->lock); outer: if (page) @@ -2871,7 +3178,8 @@ outer: * into, carry if so, or else fail until a new continuation page is allocated; * when the original swap_map count is decremented from 0 with continuation, * borrow from the continuation and report whether it still holds more. - * Called while __swap_duplicate() or swap_entry_free() holds swap_lock. + * Called while __swap_duplicate() or swap_entry_free() holds swap or cluster + * lock. */ static bool swap_count_continued(struct swap_info_struct *si, pgoff_t offset, unsigned char count) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index af817e5060fb..a0817cc470b0 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -14,6 +14,9 @@ #include <linux/swapops.h> #include <linux/userfaultfd_k.h> #include <linux/mmu_notifier.h> +#include <linux/hugetlb.h> +#include <linux/pagemap.h> +#include <linux/shmem_fs.h> #include <asm/tlbflush.h> #include "internal.h" @@ -139,6 +142,198 @@ static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address) return pmd; } +#ifdef CONFIG_HUGETLB_PAGE +/* + * __mcopy_atomic processing for HUGETLB vmas. Note that this routine is + * called with mmap_sem held, it will release mmap_sem before returning. + */ +static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, + struct vm_area_struct *dst_vma, + unsigned long dst_start, + unsigned long src_start, + unsigned long len, + bool zeropage) +{ + ssize_t err; + pte_t *dst_pte; + unsigned long src_addr, dst_addr; + long copied; + struct page *page; + struct hstate *h; + unsigned long vma_hpagesize; + pgoff_t idx; + u32 hash; + struct address_space *mapping; + + /* + * There is no default zero huge page for all huge page sizes as + * supported by hugetlb. A PMD_SIZE huge pages may exist as used + * by THP. Since we can not reliably insert a zero page, this + * feature is not supported. + */ + if (zeropage) { + up_read(&dst_mm->mmap_sem); + return -EINVAL; + } + + src_addr = src_start; + dst_addr = dst_start; + copied = 0; + page = NULL; + vma_hpagesize = vma_kernel_pagesize(dst_vma); + + /* + * Validate alignment based on huge page size + */ + err = -EINVAL; + if (dst_start & (vma_hpagesize - 1) || len & (vma_hpagesize - 1)) + goto out_unlock; + +retry: + /* + * On routine entry dst_vma is set. If we had to drop mmap_sem and + * retry, dst_vma will be set to NULL and we must lookup again. + */ + if (!dst_vma) { + err = -EINVAL; + dst_vma = find_vma(dst_mm, dst_start); + if (!dst_vma || !is_vm_hugetlb_page(dst_vma)) + goto out_unlock; + + if (vma_hpagesize != vma_kernel_pagesize(dst_vma)) + goto out_unlock; + + /* + * Make sure the vma is not shared, that the remaining dst + * range is both valid and fully within a single existing vma. + */ + if (dst_vma->vm_flags & VM_SHARED) + goto out_unlock; + if (dst_start < dst_vma->vm_start || + dst_start + len > dst_vma->vm_end) + goto out_unlock; + } + + if (WARN_ON(dst_addr & (vma_hpagesize - 1) || + (len - copied) & (vma_hpagesize - 1))) + goto out_unlock; + + /* + * Only allow __mcopy_atomic_hugetlb on userfaultfd registered ranges. + */ + if (!dst_vma->vm_userfaultfd_ctx.ctx) + goto out_unlock; + + /* + * Ensure the dst_vma has a anon_vma. + */ + err = -ENOMEM; + if (unlikely(anon_vma_prepare(dst_vma))) + goto out_unlock; + + h = hstate_vma(dst_vma); + + while (src_addr < src_start + len) { + pte_t dst_pteval; + + BUG_ON(dst_addr >= dst_start + len); + VM_BUG_ON(dst_addr & ~huge_page_mask(h)); + + /* + * Serialize via hugetlb_fault_mutex + */ + idx = linear_page_index(dst_vma, dst_addr); + mapping = dst_vma->vm_file->f_mapping; + hash = hugetlb_fault_mutex_hash(h, dst_mm, dst_vma, mapping, + idx, dst_addr); + mutex_lock(&hugetlb_fault_mutex_table[hash]); + + err = -ENOMEM; + dst_pte = huge_pte_alloc(dst_mm, dst_addr, huge_page_size(h)); + if (!dst_pte) { + mutex_unlock(&hugetlb_fault_mutex_table[hash]); + goto out_unlock; + } + + err = -EEXIST; + dst_pteval = huge_ptep_get(dst_pte); + if (!huge_pte_none(dst_pteval)) { + mutex_unlock(&hugetlb_fault_mutex_table[hash]); + goto out_unlock; + } + + err = hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, + dst_addr, src_addr, &page); + + mutex_unlock(&hugetlb_fault_mutex_table[hash]); + + cond_resched(); + + if (unlikely(err == -EFAULT)) { + up_read(&dst_mm->mmap_sem); + BUG_ON(!page); + + err = copy_huge_page_from_user(page, + (const void __user *)src_addr, + pages_per_huge_page(h), true); + if (unlikely(err)) { + err = -EFAULT; + goto out; + } + down_read(&dst_mm->mmap_sem); + + dst_vma = NULL; + goto retry; + } else + BUG_ON(page); + + if (!err) { + dst_addr += vma_hpagesize; + src_addr += vma_hpagesize; + copied += vma_hpagesize; + + if (fatal_signal_pending(current)) + err = -EINTR; + } + if (err) + break; + } + +out_unlock: + up_read(&dst_mm->mmap_sem); +out: + if (page) { + /* + * We encountered an error and are about to free a newly + * allocated huge page. It is possible that there was a + * reservation associated with the page that has been + * consumed. See the routine restore_reserve_on_error + * for details. Unfortunately, we can not call + * restore_reserve_on_error now as it would require holding + * mmap_sem. Clear the PagePrivate flag so that the global + * reserve count will not be incremented in free_huge_page. + * The reservation map will still indicate the reservation + * was consumed and possibly prevent later page allocation. + * This is better than leaking a global reservation. + */ + ClearPagePrivate(page); + put_page(page); + } + BUG_ON(copied < 0); + BUG_ON(err > 0); + BUG_ON(!copied && !err); + return copied ? copied : err; +} +#else /* !CONFIG_HUGETLB_PAGE */ +/* fail at build time if gcc attempts to use this */ +extern ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, + struct vm_area_struct *dst_vma, + unsigned long dst_start, + unsigned long src_start, + unsigned long len, + bool zeropage); +#endif /* CONFIG_HUGETLB_PAGE */ + static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start, unsigned long src_start, @@ -175,13 +370,22 @@ retry: */ err = -EINVAL; dst_vma = find_vma(dst_mm, dst_start); - if (!dst_vma || (dst_vma->vm_flags & VM_SHARED)) + if (!dst_vma) + goto out_unlock; + if (!vma_is_shmem(dst_vma) && dst_vma->vm_flags & VM_SHARED) goto out_unlock; if (dst_start < dst_vma->vm_start || dst_start + len > dst_vma->vm_end) goto out_unlock; /* + * If this is a HUGETLB vma, pass off to appropriate routine + */ + if (is_vm_hugetlb_page(dst_vma)) + return __mcopy_atomic_hugetlb(dst_mm, dst_vma, dst_start, + src_start, len, zeropage); + + /* * Be strict and only allow __mcopy_atomic on userfaultfd * registered ranges to prevent userland errors going * unnoticed. As far as the VM consistency is concerned, it @@ -193,11 +397,7 @@ retry: if (!dst_vma->vm_userfaultfd_ctx.ctx) goto out_unlock; - /* - * FIXME: only allow copying on anonymous vmas, tmpfs should - * be added. - */ - if (dst_vma->vm_ops) + if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma)) goto out_unlock; /* @@ -206,7 +406,7 @@ retry: * dst_vma. */ err = -ENOMEM; - if (unlikely(anon_vma_prepare(dst_vma))) + if (vma_is_anonymous(dst_vma) && unlikely(anon_vma_prepare(dst_vma))) goto out_unlock; while (src_addr < src_start + len) { @@ -243,12 +443,21 @@ retry: BUG_ON(pmd_none(*dst_pmd)); BUG_ON(pmd_trans_huge(*dst_pmd)); - if (!zeropage) - err = mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma, - dst_addr, src_addr, &page); - else - err = mfill_zeropage_pte(dst_mm, dst_pmd, dst_vma, - dst_addr); + if (vma_is_anonymous(dst_vma)) { + if (!zeropage) + err = mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma, + dst_addr, src_addr, + &page); + else + err = mfill_zeropage_pte(dst_mm, dst_pmd, + dst_vma, dst_addr); + } else { + err = -EINVAL; /* if zeropage is true return -EINVAL */ + if (likely(!zeropage)) + err = shmem_mcopy_atomic_pte(dst_mm, dst_pmd, + dst_vma, dst_addr, + src_addr, &page); + } cond_resched(); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 3ca82d44edd3..d89034a393f2 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1662,7 +1662,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, return area->addr; fail: - warn_alloc(gfp_mask, + warn_alloc(gfp_mask, NULL, "vmalloc: allocation failure, allocated %ld of %ld bytes", (area->nr_pages*PAGE_SIZE), area->size); vfree(area->addr); @@ -1724,7 +1724,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, return addr; fail: - warn_alloc(gfp_mask, + warn_alloc(gfp_mask, NULL, "vmalloc: allocation failure: %lu bytes", real_size); return NULL; } @@ -2309,7 +2309,7 @@ EXPORT_SYMBOL_GPL(free_vm_area); #ifdef CONFIG_SMP static struct vmap_area *node_to_va(struct rb_node *n) { - return n ? rb_entry(n, struct vmap_area, rb_node) : NULL; + return rb_entry_safe(n, struct vmap_area, rb_node); } /** diff --git a/mm/vmscan.c b/mm/vmscan.c index 532a2a750952..947ab6f4db10 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -87,6 +87,7 @@ struct scan_control { /* The highest zone to isolate pages for reclaim from */ enum zone_type reclaim_idx; + /* Writepage batching in laptop mode; RECLAIM_WRITE */ unsigned int may_writepage:1; /* Can mapped pages be reclaimed? */ @@ -234,22 +235,39 @@ bool pgdat_reclaimable(struct pglist_data *pgdat) pgdat_reclaimable_pages(pgdat) * 6; } -unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru) +/** + * lruvec_lru_size - Returns the number of pages on the given LRU list. + * @lruvec: lru vector + * @lru: lru to use + * @zone_idx: zones to consider (use MAX_NR_ZONES for the whole LRU list) + */ +unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx) { + unsigned long lru_size; + int zid; + if (!mem_cgroup_disabled()) - return mem_cgroup_get_lru_size(lruvec, lru); + lru_size = mem_cgroup_get_lru_size(lruvec, lru); + else + lru_size = node_page_state(lruvec_pgdat(lruvec), NR_LRU_BASE + lru); - return node_page_state(lruvec_pgdat(lruvec), NR_LRU_BASE + lru); -} + for (zid = zone_idx + 1; zid < MAX_NR_ZONES; zid++) { + struct zone *zone = &lruvec_pgdat(lruvec)->node_zones[zid]; + unsigned long size; -unsigned long lruvec_zone_lru_size(struct lruvec *lruvec, enum lru_list lru, - int zone_idx) -{ - if (!mem_cgroup_disabled()) - return mem_cgroup_get_zone_lru_size(lruvec, lru, zone_idx); + if (!managed_zone(zone)) + continue; + + if (!mem_cgroup_disabled()) + size = mem_cgroup_get_zone_lru_size(lruvec, lru, zid); + else + size = zone_page_state(&lruvec_pgdat(lruvec)->node_zones[zid], + NR_ZONE_LRU_BASE + lru); + lru_size -= min(size, lru_size); + } + + return lru_size; - return zone_page_state(&lruvec_pgdat(lruvec)->node_zones[zone_idx], - NR_ZONE_LRU_BASE + lru); } /* @@ -912,6 +930,17 @@ static void page_check_dirty_writeback(struct page *page, mapping->a_ops->is_dirty_writeback(page, dirty, writeback); } +struct reclaim_stat { + unsigned nr_dirty; + unsigned nr_unqueued_dirty; + unsigned nr_congested; + unsigned nr_writeback; + unsigned nr_immediate; + unsigned nr_activate; + unsigned nr_ref_keep; + unsigned nr_unmap_fail; +}; + /* * shrink_page_list() returns the number of reclaimed pages */ @@ -919,22 +948,20 @@ static unsigned long shrink_page_list(struct list_head *page_list, struct pglist_data *pgdat, struct scan_control *sc, enum ttu_flags ttu_flags, - unsigned long *ret_nr_dirty, - unsigned long *ret_nr_unqueued_dirty, - unsigned long *ret_nr_congested, - unsigned long *ret_nr_writeback, - unsigned long *ret_nr_immediate, + struct reclaim_stat *stat, bool force_reclaim) { LIST_HEAD(ret_pages); LIST_HEAD(free_pages); int pgactivate = 0; - unsigned long nr_unqueued_dirty = 0; - unsigned long nr_dirty = 0; - unsigned long nr_congested = 0; - unsigned long nr_reclaimed = 0; - unsigned long nr_writeback = 0; - unsigned long nr_immediate = 0; + unsigned nr_unqueued_dirty = 0; + unsigned nr_dirty = 0; + unsigned nr_congested = 0; + unsigned nr_reclaimed = 0; + unsigned nr_writeback = 0; + unsigned nr_immediate = 0; + unsigned nr_ref_keep = 0; + unsigned nr_unmap_fail = 0; cond_resched(); @@ -1036,7 +1063,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, PageReclaim(page) && test_bit(PGDAT_WRITEBACK, &pgdat->flags)) { nr_immediate++; - goto keep_locked; + goto activate_locked; /* Case 2 above */ } else if (sane_reclaim(sc) || @@ -1054,7 +1081,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, */ SetPageReclaim(page); nr_writeback++; - goto keep_locked; + goto activate_locked; /* Case 3 above */ } else { @@ -1073,6 +1100,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, case PAGEREF_ACTIVATE: goto activate_locked; case PAGEREF_KEEP: + nr_ref_keep++; goto keep_locked; case PAGEREF_RECLAIM: case PAGEREF_RECLAIM_CLEAN: @@ -1110,6 +1138,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, (ttu_flags | TTU_BATCH_FLUSH | TTU_LZFREE) : (ttu_flags | TTU_BATCH_FLUSH))) { case SWAP_FAIL: + nr_unmap_fail++; goto activate_locked; case SWAP_AGAIN: goto keep_locked; @@ -1124,13 +1153,18 @@ static unsigned long shrink_page_list(struct list_head *page_list, if (PageDirty(page)) { /* - * Only kswapd can writeback filesystem pages to - * avoid risk of stack overflow but only writeback - * if many dirty pages have been encountered. + * Only kswapd can writeback filesystem pages + * to avoid risk of stack overflow. But avoid + * injecting inefficient single-page IO into + * flusher writeback as much as possible: only + * write pages when we've encountered many + * dirty pages, and when we've already scanned + * the rest of the LRU for clean pages and see + * the same dirty pages again (PageReclaim). */ if (page_is_file_cache(page) && - (!current_is_kswapd() || - !test_bit(PGDAT_DIRTY, &pgdat->flags))) { + (!current_is_kswapd() || !PageReclaim(page) || + !test_bit(PGDAT_DIRTY, &pgdat->flags))) { /* * Immediately reclaim when written back. * Similar in principal to deactivate_page() @@ -1140,7 +1174,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, inc_node_page_state(page, NR_VMSCAN_IMMEDIATE); SetPageReclaim(page); - goto keep_locked; + goto activate_locked; } if (references == PAGEREF_RECLAIM_CLEAN) @@ -1276,11 +1310,16 @@ keep: list_splice(&ret_pages, page_list); count_vm_events(PGACTIVATE, pgactivate); - *ret_nr_dirty += nr_dirty; - *ret_nr_congested += nr_congested; - *ret_nr_unqueued_dirty += nr_unqueued_dirty; - *ret_nr_writeback += nr_writeback; - *ret_nr_immediate += nr_immediate; + if (stat) { + stat->nr_dirty = nr_dirty; + stat->nr_congested = nr_congested; + stat->nr_unqueued_dirty = nr_unqueued_dirty; + stat->nr_writeback = nr_writeback; + stat->nr_immediate = nr_immediate; + stat->nr_activate = pgactivate; + stat->nr_ref_keep = nr_ref_keep; + stat->nr_unmap_fail = nr_unmap_fail; + } return nr_reclaimed; } @@ -1292,7 +1331,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone, .priority = DEF_PRIORITY, .may_unmap = 1, }; - unsigned long ret, dummy1, dummy2, dummy3, dummy4, dummy5; + unsigned long ret; struct page *page, *next; LIST_HEAD(clean_pages); @@ -1305,8 +1344,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone, } ret = shrink_page_list(&clean_pages, zone->zone_pgdat, &sc, - TTU_UNMAP|TTU_IGNORE_ACCESS, - &dummy1, &dummy2, &dummy3, &dummy4, &dummy5, true); + TTU_UNMAP|TTU_IGNORE_ACCESS, NULL, true); list_splice(&clean_pages, page_list); mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, -ret); return ret; @@ -1341,13 +1379,10 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode) * wants to isolate pages it will be able to operate on without * blocking - clean pages for the most part. * - * ISOLATE_CLEAN means that only clean pages should be isolated. This - * is used by reclaim when it is cannot write to backing storage - * * ISOLATE_ASYNC_MIGRATE is used to indicate that it only wants to pages * that it is possible to migrate without blocking */ - if (mode & (ISOLATE_CLEAN|ISOLATE_ASYNC_MIGRATE)) { + if (mode & ISOLATE_ASYNC_MIGRATE) { /* All the caller can do on PageWriteback is block */ if (PageWriteback(page)) return ret; @@ -1355,10 +1390,6 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode) if (PageDirty(page)) { struct address_space *mapping; - /* ISOLATE_CLEAN means only clean pages */ - if (mode & ISOLATE_CLEAN) - return ret; - /* * Only pages without mappings or that have a * ->migratepage callback are possible to migrate @@ -1437,6 +1468,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, unsigned long nr_taken = 0; unsigned long nr_zone_taken[MAX_NR_ZONES] = { 0 }; unsigned long nr_skipped[MAX_NR_ZONES] = { 0, }; + unsigned long skipped = 0, total_skipped = 0; unsigned long scan, nr_pages; LIST_HEAD(pages_skipped); @@ -1488,14 +1520,13 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, */ if (!list_empty(&pages_skipped)) { int zid; - unsigned long total_skipped = 0; for (zid = 0; zid < MAX_NR_ZONES; zid++) { if (!nr_skipped[zid]) continue; __count_zid_vm_events(PGSCAN_SKIP, zid, nr_skipped[zid]); - total_skipped += nr_skipped[zid]; + skipped += nr_skipped[zid]; } /* @@ -1503,13 +1534,13 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, * close to unreclaimable. If the LRU list is empty, account * skipped pages as a full scan. */ - scan += list_empty(src) ? total_skipped : total_skipped >> 2; + total_skipped = list_empty(src) ? skipped : skipped >> 2; list_splice(&pages_skipped, src); } - *nr_scanned = scan; - trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan, scan, - nr_taken, mode, is_file_lru(lru)); + *nr_scanned = scan + total_skipped; + trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan, + scan, skipped, nr_taken, mode, lru); update_lru_sizes(lruvec, lru, nr_zone_taken); return nr_taken; } @@ -1669,30 +1700,6 @@ static int current_may_throttle(void) bdi_write_congested(current->backing_dev_info); } -static bool inactive_reclaimable_pages(struct lruvec *lruvec, - struct scan_control *sc, enum lru_list lru) -{ - int zid; - struct zone *zone; - int file = is_file_lru(lru); - struct pglist_data *pgdat = lruvec_pgdat(lruvec); - - if (!global_reclaim(sc)) - return true; - - for (zid = sc->reclaim_idx; zid >= 0; zid--) { - zone = &pgdat->node_zones[zid]; - if (!managed_zone(zone)) - continue; - - if (zone_page_state_snapshot(zone, NR_ZONE_LRU_BASE + - LRU_FILE * file) >= SWAP_CLUSTER_MAX) - return true; - } - - return false; -} - /* * shrink_inactive_list() is a helper for shrink_node(). It returns the number * of reclaimed pages @@ -1705,19 +1712,12 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, unsigned long nr_scanned; unsigned long nr_reclaimed = 0; unsigned long nr_taken; - unsigned long nr_dirty = 0; - unsigned long nr_congested = 0; - unsigned long nr_unqueued_dirty = 0; - unsigned long nr_writeback = 0; - unsigned long nr_immediate = 0; + struct reclaim_stat stat = {}; isolate_mode_t isolate_mode = 0; int file = is_file_lru(lru); struct pglist_data *pgdat = lruvec_pgdat(lruvec); struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; - if (!inactive_reclaimable_pages(lruvec, sc, lru)) - return 0; - while (unlikely(too_many_isolated(pgdat, file, sc))) { congestion_wait(BLK_RW_ASYNC, HZ/10); @@ -1730,8 +1730,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, if (!sc->may_unmap) isolate_mode |= ISOLATE_UNMAPPED; - if (!sc->may_writepage) - isolate_mode |= ISOLATE_CLEAN; spin_lock_irq(&pgdat->lru_lock); @@ -1754,9 +1752,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, return 0; nr_reclaimed = shrink_page_list(&page_list, pgdat, sc, TTU_UNMAP, - &nr_dirty, &nr_unqueued_dirty, &nr_congested, - &nr_writeback, &nr_immediate, - false); + &stat, false); spin_lock_irq(&pgdat->lru_lock); @@ -1790,7 +1786,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, * of pages under pages flagged for immediate reclaim and stall if any * are encountered in the nr_immediate check below. */ - if (nr_writeback && nr_writeback == nr_taken) + if (stat.nr_writeback && stat.nr_writeback == nr_taken) set_bit(PGDAT_WRITEBACK, &pgdat->flags); /* @@ -1802,17 +1798,25 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, * Tag a zone as congested if all the dirty pages scanned were * backed by a congested BDI and wait_iff_congested will stall. */ - if (nr_dirty && nr_dirty == nr_congested) + if (stat.nr_dirty && stat.nr_dirty == stat.nr_congested) set_bit(PGDAT_CONGESTED, &pgdat->flags); /* * If dirty pages are scanned that are not queued for IO, it - * implies that flushers are not keeping up. In this case, flag - * the pgdat PGDAT_DIRTY and kswapd will start writing pages from - * reclaim context. + * implies that flushers are not doing their job. This can + * happen when memory pressure pushes dirty pages to the end + * of the LRU without the dirty limits being breached. It can + * also happen when the proportion of dirty pages grows not + * through writes but through memory pressure reclaiming all + * the clean cache. And in some cases, the flushers simply + * cannot keep up with the allocation rate. Nudge the flusher + * threads in case they are asleep, but also allow kswapd to + * start writing pages during reclaim. */ - if (nr_unqueued_dirty == nr_taken) + if (stat.nr_unqueued_dirty == nr_taken) { + wakeup_flusher_threads(0, WB_REASON_VMSCAN); set_bit(PGDAT_DIRTY, &pgdat->flags); + } /* * If kswapd scans pages marked marked for immediate @@ -1820,7 +1824,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, * that pages are cycling through the LRU faster than * they are written so also forcibly stall. */ - if (nr_immediate && current_may_throttle()) + if (stat.nr_immediate && current_may_throttle()) congestion_wait(BLK_RW_ASYNC, HZ/10); } @@ -1835,6 +1839,10 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, trace_mm_vmscan_lru_shrink_inactive(pgdat->node_id, nr_scanned, nr_reclaimed, + stat.nr_dirty, stat.nr_writeback, + stat.nr_congested, stat.nr_immediate, + stat.nr_activate, stat.nr_ref_keep, + stat.nr_unmap_fail, sc->priority, file); return nr_reclaimed; } @@ -1855,17 +1863,19 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, * * The downside is that we have to touch page->_refcount against each page. * But we had to alter page->flags anyway. + * + * Returns the number of pages moved to the given lru. */ -static void move_active_pages_to_lru(struct lruvec *lruvec, +static unsigned move_active_pages_to_lru(struct lruvec *lruvec, struct list_head *list, struct list_head *pages_to_free, enum lru_list lru) { struct pglist_data *pgdat = lruvec_pgdat(lruvec); - unsigned long pgmoved = 0; struct page *page; int nr_pages; + int nr_moved = 0; while (!list_empty(list)) { page = lru_to_page(list); @@ -1877,7 +1887,6 @@ static void move_active_pages_to_lru(struct lruvec *lruvec, nr_pages = hpage_nr_pages(page); update_lru_size(lruvec, lru, page_zonenum(page), nr_pages); list_move(&page->lru, &lruvec->lists[lru]); - pgmoved += nr_pages; if (put_page_testzero(page)) { __ClearPageLRU(page); @@ -1891,11 +1900,15 @@ static void move_active_pages_to_lru(struct lruvec *lruvec, spin_lock_irq(&pgdat->lru_lock); } else list_add(&page->lru, pages_to_free); + } else { + nr_moved += nr_pages; } } if (!is_active_lru(lru)) - __count_vm_events(PGDEACTIVATE, pgmoved); + __count_vm_events(PGDEACTIVATE, nr_moved); + + return nr_moved; } static void shrink_active_list(unsigned long nr_to_scan, @@ -1911,7 +1924,8 @@ static void shrink_active_list(unsigned long nr_to_scan, LIST_HEAD(l_inactive); struct page *page; struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; - unsigned long nr_rotated = 0; + unsigned nr_deactivate, nr_activate; + unsigned nr_rotated = 0; isolate_mode_t isolate_mode = 0; int file = is_file_lru(lru); struct pglist_data *pgdat = lruvec_pgdat(lruvec); @@ -1920,8 +1934,6 @@ static void shrink_active_list(unsigned long nr_to_scan, if (!sc->may_unmap) isolate_mode |= ISOLATE_UNMAPPED; - if (!sc->may_writepage) - isolate_mode |= ISOLATE_CLEAN; spin_lock_irq(&pgdat->lru_lock); @@ -1989,13 +2001,15 @@ static void shrink_active_list(unsigned long nr_to_scan, */ reclaim_stat->recent_rotated[file] += nr_rotated; - move_active_pages_to_lru(lruvec, &l_active, &l_hold, lru); - move_active_pages_to_lru(lruvec, &l_inactive, &l_hold, lru - LRU_ACTIVE); + nr_activate = move_active_pages_to_lru(lruvec, &l_active, &l_hold, lru); + nr_deactivate = move_active_pages_to_lru(lruvec, &l_inactive, &l_hold, lru - LRU_ACTIVE); __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken); spin_unlock_irq(&pgdat->lru_lock); mem_cgroup_uncharge_list(&l_hold); free_hot_cold_page_list(&l_hold, true); + trace_mm_vmscan_lru_shrink_active(pgdat->node_id, nr_taken, nr_activate, + nr_deactivate, nr_rotated, sc->priority, file); } /* @@ -2025,14 +2039,13 @@ static void shrink_active_list(unsigned long nr_to_scan, * 10TB 320 32GB */ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, - struct scan_control *sc) + struct scan_control *sc, bool trace) { unsigned long inactive_ratio; - unsigned long inactive; - unsigned long active; + unsigned long inactive, active; + enum lru_list inactive_lru = file * LRU_FILE; + enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE; unsigned long gb; - struct pglist_data *pgdat = lruvec_pgdat(lruvec); - int zid; /* * If we don't have swap space, anonymous page deactivation @@ -2041,27 +2054,8 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, if (!file && !total_swap_pages) return false; - inactive = lruvec_lru_size(lruvec, file * LRU_FILE); - active = lruvec_lru_size(lruvec, file * LRU_FILE + LRU_ACTIVE); - - /* - * For zone-constrained allocations, it is necessary to check if - * deactivations are required for lowmem to be reclaimed. This - * calculates the inactive/active pages available in eligible zones. - */ - for (zid = sc->reclaim_idx + 1; zid < MAX_NR_ZONES; zid++) { - struct zone *zone = &pgdat->node_zones[zid]; - unsigned long inactive_zone, active_zone; - - if (!managed_zone(zone)) - continue; - - inactive_zone = lruvec_zone_lru_size(lruvec, file * LRU_FILE, zid); - active_zone = lruvec_zone_lru_size(lruvec, (file * LRU_FILE) + LRU_ACTIVE, zid); - - inactive -= min(inactive, inactive_zone); - active -= min(active, active_zone); - } + inactive = lruvec_lru_size(lruvec, inactive_lru, sc->reclaim_idx); + active = lruvec_lru_size(lruvec, active_lru, sc->reclaim_idx); gb = (inactive + active) >> (30 - PAGE_SHIFT); if (gb) @@ -2069,6 +2063,13 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, else inactive_ratio = 1; + if (trace) + trace_mm_vmscan_inactive_list_is_low(lruvec_pgdat(lruvec)->node_id, + sc->reclaim_idx, + lruvec_lru_size(lruvec, inactive_lru, MAX_NR_ZONES), inactive, + lruvec_lru_size(lruvec, active_lru, MAX_NR_ZONES), active, + inactive_ratio, file); + return inactive * inactive_ratio < active; } @@ -2076,7 +2077,7 @@ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, struct lruvec *lruvec, struct scan_control *sc) { if (is_active_lru(lru)) { - if (inactive_list_is_low(lruvec, is_file_lru(lru), sc)) + if (inactive_list_is_low(lruvec, is_file_lru(lru), sc, true)) shrink_active_list(nr_to_scan, lruvec, sc, lru); return 0; } @@ -2207,8 +2208,8 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, * lruvec even if it has plenty of old anonymous pages unless the * system is under heavy pressure. */ - if (!inactive_list_is_low(lruvec, true, sc) && - lruvec_lru_size(lruvec, LRU_INACTIVE_FILE) >> sc->priority) { + if (!inactive_list_is_low(lruvec, true, sc, false) && + lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) { scan_balance = SCAN_FILE; goto out; } @@ -2234,10 +2235,10 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, * anon in [0], file in [1] */ - anon = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON) + - lruvec_lru_size(lruvec, LRU_INACTIVE_ANON); - file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE) + - lruvec_lru_size(lruvec, LRU_INACTIVE_FILE); + anon = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON, MAX_NR_ZONES) + + lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, MAX_NR_ZONES); + file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE, MAX_NR_ZONES) + + lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, MAX_NR_ZONES); spin_lock_irq(&pgdat->lru_lock); if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) { @@ -2275,7 +2276,7 @@ out: unsigned long size; unsigned long scan; - size = lruvec_lru_size(lruvec, lru); + size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx); scan = size >> sc->priority; if (!scan && pass && force_scan) @@ -2432,7 +2433,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc * Even if we did not try to evict anon pages at all, we want to * rebalance the anon lru active/inactive ratio. */ - if (inactive_list_is_low(lruvec, false, sc)) + if (inactive_list_is_low(lruvec, false, sc, true)) shrink_active_list(SWAP_CLUSTER_MAX, lruvec, sc, LRU_ACTIVE_ANON); } @@ -2761,8 +2762,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, struct scan_control *sc) { int initial_priority = sc->priority; - unsigned long total_scanned = 0; - unsigned long writeback_threshold; retry: delayacct_freepages_start(); @@ -2775,7 +2774,6 @@ retry: sc->nr_scanned = 0; shrink_zones(zonelist, sc); - total_scanned += sc->nr_scanned; if (sc->nr_reclaimed >= sc->nr_to_reclaim) break; @@ -2788,20 +2786,6 @@ retry: */ if (sc->priority < DEF_PRIORITY - 2) sc->may_writepage = 1; - - /* - * Try to write back as many pages as we just scanned. This - * tends to cause slow streaming writers to write data to the - * disk smoothly, at the dirtying rate, which is nice. But - * that's undesirable in laptop mode, where we *want* lumpy - * writeout. So in laptop mode, write out the whole world. - */ - writeback_threshold = sc->nr_to_reclaim + sc->nr_to_reclaim / 2; - if (total_scanned > writeback_threshold) { - wakeup_flusher_threads(laptop_mode ? 0 : total_scanned, - WB_REASON_TRY_TO_FREE_PAGES); - sc->may_writepage = 1; - } } while (--sc->priority >= 0); delayacct_freepages_end(); @@ -3082,7 +3066,7 @@ static void age_active_anon(struct pglist_data *pgdat, do { struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg); - if (inactive_list_is_low(lruvec, false, sc)) + if (inactive_list_is_low(lruvec, false, sc, true)) shrink_active_list(SWAP_CLUSTER_MAX, lruvec, sc, LRU_ACTIVE_ANON); diff --git a/mm/vmstat.c b/mm/vmstat.c index 7c28df36f50f..69f9aff39a2e 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1038,6 +1038,8 @@ const char * const vmstat_text[] = { "compact_fail", "compact_success", "compact_daemon_wake", + "compact_daemon_migrate_scanned", + "compact_daemon_free_scanned", #endif #ifdef CONFIG_HUGETLB_PAGE diff --git a/mm/workingset.c b/mm/workingset.c index 80c913c89f11..c573cb24d1c5 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -267,7 +267,7 @@ bool workingset_refault(void *shadow) } lruvec = mem_cgroup_lruvec(pgdat, memcg); refault = atomic_long_read(&lruvec->inactive_age); - active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE); + active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE, MAX_NR_ZONES); rcu_read_unlock(); /* diff --git a/mm/z3fold.c b/mm/z3fold.c index 8f9e89ca1d31..207e5ddc87a2 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -50,7 +50,7 @@ #define ZHDR_SIZE_ALIGNED CHUNK_SIZE #define NCHUNKS ((PAGE_SIZE - ZHDR_SIZE_ALIGNED) >> CHUNK_SHIFT) -#define BUDDY_MASK ((1 << NCHUNKS_ORDER) - 1) +#define BUDDY_MASK (0x3) struct z3fold_pool; struct z3fold_ops { @@ -109,7 +109,7 @@ struct z3fold_header { unsigned short middle_chunks; unsigned short last_chunks; unsigned short start_middle; - unsigned short first_num:NCHUNKS_ORDER; + unsigned short first_num:2; }; /* @@ -179,7 +179,11 @@ static struct z3fold_header *handle_to_z3fold_header(unsigned long handle) return (struct z3fold_header *)(handle & PAGE_MASK); } -/* Returns buddy number */ +/* + * (handle & BUDDY_MASK) < zhdr->first_num is possible in encode_handle + * but that doesn't matter. because the masking will result in the + * correct buddy number. + */ static enum buddy handle_to_buddy(unsigned long handle) { struct z3fold_header *zhdr = handle_to_z3fold_header(handle); diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 9cc3c0b2c2c1..a1f24989ac23 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -25,7 +25,7 @@ * Usage of struct page flags: * PG_private: identifies the first component page * PG_private2: identifies the last component page - * PG_owner_priv_1: indentifies the huge component page + * PG_owner_priv_1: identifies the huge component page * */ @@ -364,7 +364,7 @@ static struct zspage *cache_alloc_zspage(struct zs_pool *pool, gfp_t flags) { return kmem_cache_alloc(pool->zspage_cachep, flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE)); -}; +} static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage) { @@ -2383,7 +2383,7 @@ struct zs_pool *zs_create_pool(const char *name) goto err; /* - * Iterate reversly, because, size of size_class that we want to use + * Iterate reversely, because, size of size_class that we want to use * for merging should be larger or equal to current size. */ for (i = zs_size_classes - 1; i >= 0; i--) { diff --git a/mm/zswap.c b/mm/zswap.c index 067a0d62f318..9e8565d9c66b 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -76,9 +76,17 @@ static u64 zswap_duplicate_entry; * tunables **********************************/ +#define ZSWAP_PARAM_UNSET "" + /* Enable/disable zswap (disabled by default) */ static bool zswap_enabled; -module_param_named(enabled, zswap_enabled, bool, 0644); +static int zswap_enabled_param_set(const char *, + const struct kernel_param *); +static struct kernel_param_ops zswap_enabled_param_ops = { + .set = zswap_enabled_param_set, + .get = param_get_bool, +}; +module_param_cb(enabled, &zswap_enabled_param_ops, &zswap_enabled, 0644); /* Crypto compressor to use */ #define ZSWAP_COMPRESSOR_DEFAULT "lzo" @@ -176,6 +184,12 @@ static atomic_t zswap_pools_count = ATOMIC_INIT(0); /* used by param callback function */ static bool zswap_init_started; +/* fatal error during init */ +static bool zswap_init_failed; + +/* init completed, but couldn't create the initial pool */ +static bool zswap_has_pool; + /********************************* * helpers and fwd declarations **********************************/ @@ -415,7 +429,8 @@ static struct zswap_pool *__zswap_pool_current(void) struct zswap_pool *pool; pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list); - WARN_ON(!pool); + WARN_ONCE(!pool && zswap_has_pool, + "%s: no page storage pool!\n", __func__); return pool; } @@ -434,7 +449,7 @@ static struct zswap_pool *zswap_pool_current_get(void) rcu_read_lock(); pool = __zswap_pool_current(); - if (!pool || !zswap_pool_get(pool)) + if (!zswap_pool_get(pool)) pool = NULL; rcu_read_unlock(); @@ -450,7 +465,9 @@ static struct zswap_pool *zswap_pool_last_get(void) list_for_each_entry_rcu(pool, &zswap_pools, list) last = pool; - if (!WARN_ON(!last) && !zswap_pool_get(last)) + WARN_ONCE(!last && zswap_has_pool, + "%s: no page storage pool!\n", __func__); + if (!zswap_pool_get(last)) last = NULL; rcu_read_unlock(); @@ -486,6 +503,17 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor) gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; int ret; + if (!zswap_has_pool) { + /* if either are unset, pool initialization failed, and we + * need both params to be set correctly before trying to + * create a pool. + */ + if (!strcmp(type, ZSWAP_PARAM_UNSET)) + return NULL; + if (!strcmp(compressor, ZSWAP_PARAM_UNSET)) + return NULL; + } + pool = kzalloc(sizeof(*pool), GFP_KERNEL); if (!pool) { pr_err("pool alloc failed\n"); @@ -535,29 +563,41 @@ error: static __init struct zswap_pool *__zswap_pool_create_fallback(void) { - if (!crypto_has_comp(zswap_compressor, 0, 0)) { - if (!strcmp(zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT)) { - pr_err("default compressor %s not available\n", - zswap_compressor); - return NULL; - } + bool has_comp, has_zpool; + + has_comp = crypto_has_comp(zswap_compressor, 0, 0); + if (!has_comp && strcmp(zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT)) { pr_err("compressor %s not available, using default %s\n", zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT); param_free_charp(&zswap_compressor); zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT; + has_comp = crypto_has_comp(zswap_compressor, 0, 0); } - if (!zpool_has_pool(zswap_zpool_type)) { - if (!strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) { - pr_err("default zpool %s not available\n", - zswap_zpool_type); - return NULL; - } + if (!has_comp) { + pr_err("default compressor %s not available\n", + zswap_compressor); + param_free_charp(&zswap_compressor); + zswap_compressor = ZSWAP_PARAM_UNSET; + } + + has_zpool = zpool_has_pool(zswap_zpool_type); + if (!has_zpool && strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) { pr_err("zpool %s not available, using default %s\n", zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT); param_free_charp(&zswap_zpool_type); zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT; + has_zpool = zpool_has_pool(zswap_zpool_type); + } + if (!has_zpool) { + pr_err("default zpool %s not available\n", + zswap_zpool_type); + param_free_charp(&zswap_zpool_type); + zswap_zpool_type = ZSWAP_PARAM_UNSET; } + if (!has_comp || !has_zpool) + return NULL; + return zswap_pool_create(zswap_zpool_type, zswap_compressor); } @@ -573,6 +613,9 @@ static void zswap_pool_destroy(struct zswap_pool *pool) static int __must_check zswap_pool_get(struct zswap_pool *pool) { + if (!pool) + return 0; + return kref_get_unless_zero(&pool->kref); } @@ -624,8 +667,13 @@ static int __zswap_param_set(const char *val, const struct kernel_param *kp, char *s = strstrip((char *)val); int ret; + if (zswap_init_failed) { + pr_err("can't set param, initialization failed\n"); + return -ENODEV; + } + /* no change required */ - if (!strcmp(s, *(char **)kp->arg)) + if (!strcmp(s, *(char **)kp->arg) && zswap_has_pool) return 0; /* if this is load-time (pre-init) param setting, @@ -671,6 +719,7 @@ static int __zswap_param_set(const char *val, const struct kernel_param *kp, if (!ret) { put_pool = zswap_pool_current(); list_add_rcu(&pool->list, &zswap_pools); + zswap_has_pool = true; } else if (pool) { /* add the possibly pre-existing pool to the end of the pools * list; if it's new (and empty) then it'll be removed and @@ -678,6 +727,15 @@ static int __zswap_param_set(const char *val, const struct kernel_param *kp, */ list_add_tail_rcu(&pool->list, &zswap_pools); put_pool = pool; + } else if (!zswap_has_pool) { + /* if initial pool creation failed, and this pool creation also + * failed, maybe both compressor and zpool params were bad. + * Allow changing this param, so pool creation will succeed + * when the other param is changed. We already verified this + * param is ok in the zpool_has_pool() or crypto_has_comp() + * checks above. + */ + ret = param_set_charp(s, kp); } spin_unlock(&zswap_pools_lock); @@ -703,6 +761,21 @@ static int zswap_zpool_param_set(const char *val, return __zswap_param_set(val, kp, NULL, zswap_compressor); } +static int zswap_enabled_param_set(const char *val, + const struct kernel_param *kp) +{ + if (zswap_init_failed) { + pr_err("can't enable, initialization failed\n"); + return -ENODEV; + } + if (!zswap_has_pool && zswap_init_started) { + pr_err("can't enable, no pool configured\n"); + return -ENODEV; + } + + return param_set_bool(val, kp); +} + /********************************* * writeback code **********************************/ @@ -1180,27 +1253,29 @@ static int __init init_zswap(void) goto hp_fail; pool = __zswap_pool_create_fallback(); - if (!pool) { + if (pool) { + pr_info("loaded using pool %s/%s\n", pool->tfm_name, + zpool_get_type(pool->zpool)); + list_add(&pool->list, &zswap_pools); + zswap_has_pool = true; + } else { pr_err("pool creation failed\n"); - goto pool_fail; + zswap_enabled = false; } - pr_info("loaded using pool %s/%s\n", pool->tfm_name, - zpool_get_type(pool->zpool)); - - list_add(&pool->list, &zswap_pools); frontswap_register_ops(&zswap_frontswap_ops); if (zswap_debugfs_init()) pr_warn("debugfs initialization failed\n"); return 0; -pool_fail: - cpuhp_remove_state_nocalls(CPUHP_MM_ZSWP_POOL_PREPARE); hp_fail: cpuhp_remove_state(CPUHP_MM_ZSWP_MEM_PREPARE); dstmem_fail: zswap_entry_cache_destroy(); cache_fail: + /* if built-in, we aren't unloaded on failure; don't allow use */ + zswap_init_failed = true; + zswap_enabled = false; return -ENOMEM; } /* must be late so crypto has time to come up */ diff --git a/scripts/Lindent b/scripts/Lindent index 6d889de4e70b..57b564c24d61 100755 --- a/scripts/Lindent +++ b/scripts/Lindent @@ -1,21 +1,25 @@ #!/bin/sh + PARAM="-npro -kr -i8 -ts8 -sob -l80 -ss -ncs -cp1" -RES=`indent --version` + +RES=`indent --version | cut -d' ' -f3` if [ "$RES" = "" ]; then exit 1 fi -V1=`echo $RES | cut -d' ' -f3 | cut -d'.' -f1` -V2=`echo $RES | cut -d' ' -f3 | cut -d'.' -f2` -V3=`echo $RES | cut -d' ' -f3 | cut -d'.' -f3` +V1=`echo $RES | cut -d'.' -f1` +V2=`echo $RES | cut -d'.' -f2` +V3=`echo $RES | cut -d'.' -f3` + if [ $V1 -gt 2 ]; then PARAM="$PARAM -il0" elif [ $V1 -eq 2 ]; then if [ $V2 -gt 2 ]; then - PARAM="$PARAM -il0"; + PARAM="$PARAM -il0" elif [ $V2 -eq 2 ]; then if [ $V3 -ge 10 ]; then PARAM="$PARAM -il0" fi fi fi + indent $PARAM "$@" diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 982c52ca6473..a556d1a2d85e 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -424,7 +424,7 @@ our $typeTypedefs = qr{(?x: our $zero_initializer = qr{(?:(?:0[xX])?0+$Int_type?|NULL|false)\b}; our $logFunctions = qr{(?x: - printk(?:_ratelimited|_once|)| + printk(?:_ratelimited|_once|_deferred_once|_deferred|)| (?:[a-z0-9]+_){1,2}(?:printk|emerg|alert|crit|err|warning|warn|notice|info|debug|dbg|vdbg|devel|cont|WARN)(?:_ratelimited|_once|)| WARN(?:_RATELIMIT|_ONCE|)| panic| @@ -2134,7 +2134,7 @@ sub process { my $in_header_lines = $file ? 0 : 1; my $in_commit_log = 0; #Scanning lines before patch my $has_commit_log = 0; #Encountered lines before patch - my $commit_log_possible_stack_dump = 0; + my $commit_log_possible_stack_dump = 0; my $commit_log_long_line = 0; my $commit_log_has_diff = 0; my $reported_maintainer_file = 0; @@ -2154,6 +2154,7 @@ sub process { my $realline = 0; my $realcnt = 0; my $here = ''; + my $context_function; #undef'd unless there's a known function my $in_comment = 0; my $comment_edge = 0; my $first_line = 0; @@ -2192,7 +2193,8 @@ sub process { } #next; } - if ($rawline=~/^\@\@ -\d+(?:,\d+)? \+(\d+)(,(\d+))? \@\@/) { + if ($rawline=~/^\@\@ -\d+(?:,\d+)? \+(\d+)(,(\d+))? \@\@(.*)/) { + my $context = $4; $realline=$1-1; if (defined $2) { $realcnt=$3+1; @@ -2201,6 +2203,12 @@ sub process { } $in_comment = 0; + if ($context =~ /\b(\w+)\s*\(/) { + $context_function = $1; + } else { + undef $context_function; + } + # Guestimate if this is a continuing comment. Run # the context looking for a comment "edge". If this # edge is a close comment then we must be in a comment @@ -5157,6 +5165,16 @@ sub process { "break quoted strings at a space character\n" . $hereprev); } +#check for an embedded function name in a string when the function is known +# as part of a diff. This does not work for -f --file checking as it +#depends on patch context providing the function name + if ($line =~ /^\+.*$String/ && + defined($context_function) && + get_quoted_string($line, $rawline) =~ /\b$context_function\b/) { + WARN("EMBEDDED_FUNCTION_NAME", + "Prefer using \"%s\", __func__ to embedded function names\n" . $herecurr); + } + # check for spaces before a quoted newline if ($rawline =~ /^.*\".*\s\\n/) { if (WARN("QUOTED_WHITESPACE_BEFORE_NEWLINE", @@ -5269,6 +5287,12 @@ sub process { } } +# check for logging continuations + if ($line =~ /\bprintk\s*\(\s*KERN_CONT\b|\bpr_cont\s*\(/) { + WARN("LOGGING_CONTINUATION", + "Avoid logging continuation uses where feasible\n" . $herecurr); + } + # check for mask then right shift without a parentheses if ($^V && $^V ge 5.10.0 && $line =~ /$LvalOrFunc\s*\&\s*($LvalOrFunc)\s*>>/ && diff --git a/scripts/checkstack.pl b/scripts/checkstack.pl index eea5b782a3b7..36a8a788f5c5 100755 --- a/scripts/checkstack.pl +++ b/scripts/checkstack.pl @@ -78,6 +78,9 @@ my (@stack, $re, $dre, $x, $xs, $funcre); } elsif ($arch eq 'mips') { #88003254: 27bdffe0 addiu sp,sp,-32 $re = qr/.*addiu.*sp,sp,-(([0-9]{2}|[3-9])[0-9]{2})/o; + } elsif ($arch eq 'nios2') { + #25a8: defffb04 addi sp,sp,-20 + $re = qr/.*addi.*sp,sp,-(([0-9]{2}|[3-9])[0-9]{2})/o; } elsif ($arch eq 'parisc' || $arch eq 'parisc64') { $re = qr/.*ldo ($x{1,8})\(sp\),sp/o; } elsif ($arch eq 'ppc') { diff --git a/scripts/gdb/linux/constants.py.in b/scripts/gdb/linux/constants.py.in index 7986f4e0da12..7aad82406422 100644 --- a/scripts/gdb/linux/constants.py.in +++ b/scripts/gdb/linux/constants.py.in @@ -14,6 +14,7 @@ #include <linux/fs.h> #include <linux/mount.h> +#include <linux/of_fdt.h> /* We need to stringify expanded macros so that they can be parsed */ @@ -50,3 +51,9 @@ LX_VALUE(MNT_NOEXEC) LX_VALUE(MNT_NOATIME) LX_VALUE(MNT_NODIRATIME) LX_VALUE(MNT_RELATIME) + +/* linux/of_fdt.h> */ +LX_VALUE(OF_DT_HEADER) + +/* Kernel Configs */ +LX_CONFIG(CONFIG_OF) diff --git a/scripts/gdb/linux/proc.py b/scripts/gdb/linux/proc.py index 38b1f09d1cd9..086d27223c0c 100644 --- a/scripts/gdb/linux/proc.py +++ b/scripts/gdb/linux/proc.py @@ -16,6 +16,7 @@ from linux import constants from linux import utils from linux import tasks from linux import lists +from struct import * class LxCmdLine(gdb.Command): @@ -195,3 +196,75 @@ values of that process namespace""" info_opts(MNT_INFO, m_flags))) LxMounts() + + +class LxFdtDump(gdb.Command): + """Output Flattened Device Tree header and dump FDT blob to the filename + specified as the command argument. Equivalent to + 'cat /proc/fdt > fdtdump.dtb' on a running target""" + + def __init__(self): + super(LxFdtDump, self).__init__("lx-fdtdump", gdb.COMMAND_DATA, + gdb.COMPLETE_FILENAME) + + def fdthdr_to_cpu(self, fdt_header): + + fdt_header_be = ">IIIIIII" + fdt_header_le = "<IIIIIII" + + if utils.get_target_endianness() == 1: + output_fmt = fdt_header_le + else: + output_fmt = fdt_header_be + + return unpack(output_fmt, pack(fdt_header_be, + fdt_header['magic'], + fdt_header['totalsize'], + fdt_header['off_dt_struct'], + fdt_header['off_dt_strings'], + fdt_header['off_mem_rsvmap'], + fdt_header['version'], + fdt_header['last_comp_version'])) + + def invoke(self, arg, from_tty): + + if not constants.LX_CONFIG_OF: + raise gdb.GdbError("Kernel not compiled with CONFIG_OF\n") + + if len(arg) == 0: + filename = "fdtdump.dtb" + else: + filename = arg + + py_fdt_header_ptr = gdb.parse_and_eval( + "(const struct fdt_header *) initial_boot_params") + py_fdt_header = py_fdt_header_ptr.dereference() + + fdt_header = self.fdthdr_to_cpu(py_fdt_header) + + if fdt_header[0] != constants.LX_OF_DT_HEADER: + raise gdb.GdbError("No flattened device tree magic found\n") + + gdb.write("fdt_magic: 0x{:02X}\n".format(fdt_header[0])) + gdb.write("fdt_totalsize: 0x{:02X}\n".format(fdt_header[1])) + gdb.write("off_dt_struct: 0x{:02X}\n".format(fdt_header[2])) + gdb.write("off_dt_strings: 0x{:02X}\n".format(fdt_header[3])) + gdb.write("off_mem_rsvmap: 0x{:02X}\n".format(fdt_header[4])) + gdb.write("version: {}\n".format(fdt_header[5])) + gdb.write("last_comp_version: {}\n".format(fdt_header[6])) + + inf = gdb.inferiors()[0] + fdt_buf = utils.read_memoryview(inf, py_fdt_header_ptr, + fdt_header[1]).tobytes() + + try: + f = open(filename, 'wb') + except: + raise gdb.GdbError("Could not open file to dump fdt") + + f.write(fdt_buf) + f.close() + + gdb.write("Dumped fdt blob to " + filename + "\n") + +LxFdtDump() diff --git a/scripts/spelling.txt b/scripts/spelling.txt index 163c720d3f2b..ac5bf8708ff0 100644 --- a/scripts/spelling.txt +++ b/scripts/spelling.txt @@ -16,6 +16,7 @@ absense||absence absolut||absolute absoulte||absolute acccess||access +acceess||access acceleratoin||acceleration accelleration||acceleration accesing||accessing @@ -46,6 +47,7 @@ ackowledged||acknowledged acording||according activete||activate acumulating||accumulating +acumulator||accumulator adapater||adapter addional||additional additionaly||additionally @@ -184,6 +186,7 @@ cacluated||calculated caculation||calculation calender||calendar calle||called +callibration||calibration calucate||calculate calulate||calculate cancelation||cancellation @@ -244,6 +247,7 @@ compatiblity||compatibility competion||completion compilant||compliant compleatly||completely +completition||completion completly||completely complient||compliant componnents||components @@ -257,6 +261,7 @@ conected||connected configuratoin||configuration configuraton||configuration configuretion||configuration +configutation||configuration conider||consider conjuction||conjunction connectinos||connections @@ -317,6 +322,7 @@ dependant||dependent depreacted||deprecated depreacte||deprecate desactivate||deactivate +desciptor||descriptor desciptors||descriptors descripton||description descrition||description @@ -417,9 +423,12 @@ extention||extension extracter||extractor faild||failed faill||fail +failied||failed +faillure||failure failue||failure failuer||failure faireness||fairness +falied||failed faliure||failure familar||familiar fatser||faster @@ -441,6 +450,7 @@ forseeable||foreseeable forse||force fortan||fortran forwardig||forwarding +framming||framing framwork||framework frequncy||frequency frome||from @@ -482,6 +492,7 @@ howver||however hsould||should hypter||hyper identidier||identifier +illigal||illegal imblance||imbalance immeadiately||immediately immedaite||immediate @@ -520,6 +531,7 @@ informtion||information infromation||information ingore||ignore inital||initial +initalized||initialized initalised||initialized initalise||initialize initalize||initialize @@ -532,6 +544,7 @@ initilize||initialize inofficial||unofficial insititute||institute instal||install +instanciated||instantiated inteface||interface integreated||integrated integrety||integrity @@ -557,6 +570,7 @@ intialized||initialized intialize||initialize intregral||integral intrrupt||interrupt +intterrupt||interrupt intuative||intuitive invaid||invalid invalde||invald @@ -567,6 +581,8 @@ invokations||invocations irrelevent||irrelevant isnt||isn't isssue||issue +iternations||iterations +itertation||iteration itslef||itself jave||java jeffies||jiffies @@ -621,6 +637,7 @@ messsage||message messsages||messages microprocesspr||microprocessor milliseonds||milliseconds +minium||minimum minumum||minimum miscelleneous||miscellaneous misformed||malformed @@ -668,6 +685,7 @@ occurances||occurrences occured||occurred occurence||occurrence occure||occurred +occured||occurred occuring||occurring offet||offset omitt||omit @@ -681,8 +699,10 @@ optionnal||optional optmizations||optimizations orientatied||orientated orientied||oriented +orignal||original otherise||otherwise ouput||output +oustanding||outstanding overaall||overall overhread||overhead overlaping||overlapping @@ -705,6 +725,7 @@ paramter||parameter paramters||parameters particuarly||particularly particularily||particularly +partiton||partition pased||passed passin||passing pathes||paths @@ -724,6 +745,7 @@ pleaes||please ploting||plotting plugable||pluggable poinnter||pointer +pointeur||pointer poiter||pointer posible||possible positon||position @@ -752,6 +774,7 @@ procceed||proceed proccesors||processors procesed||processed proces||process +procesing||processing processessing||processing processess||processes processpr||processor @@ -780,6 +803,7 @@ protable||portable protcol||protocol protecion||protection protocoll||protocol +promixity||proximity psudo||pseudo psuedo||pseudo psychadelic||psychedelic @@ -801,6 +825,7 @@ recommanded||recommended recyle||recycle redircet||redirect redirectrion||redirection +reename||rename refcounf||refcount refence||reference refered||referred @@ -944,7 +969,9 @@ suble||subtle substract||subtract succesfully||successfully succesful||successful +successed||succeeded successfull||successful +successfuly||successfully sucessfully||successfully sucess||success superflous||superfluous @@ -960,6 +987,7 @@ suppport||support supress||suppress surpresses||suppresses susbsystem||subsystem +suspeneded||suspended suspicously||suspiciously swaping||swapping switchs||switches @@ -967,6 +995,7 @@ symetric||symmetric synax||syntax synchonized||synchronized syncronize||synchronize +syncronized||synchronized syncronizing||synchronizing syncronus||synchronous syste||system @@ -1005,22 +1034,30 @@ ture||true tyep||type udpate||update uesd||used +uncommited||uncommitted unconditionaly||unconditionally underun||underrun unecessary||unnecessary unexecpted||unexpected +unexpcted||unexpected unexpectd||unexpected unexpeted||unexpected +unexpexted||unexpected unfortunatelly||unfortunately unifiy||unify unintialized||uninitialized +unkmown||unknown unknonw||unknown unknow||unknown unkown||unknown unneedingly||unnecessarily +unnsupported||unsupported +unmached||unmatched unresgister||unregister +unrgesiter||unregister unsinged||unsigned unstabel||unstable +unsolicitied||unsolicited unsuccessfull||unsuccessful unsuported||unsupported untill||until diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index c354807381c1..c9e8a9898ce4 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -424,10 +424,9 @@ out: return ret; } -static int sel_mmap_policy_fault(struct vm_area_struct *vma, - struct vm_fault *vmf) +static int sel_mmap_policy_fault(struct vm_fault *vmf) { - struct policy_load_memory *plm = vma->vm_file->private_data; + struct policy_load_memory *plm = vmf->vma->vm_file->private_data; unsigned long offset; struct page *page; diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 9d33c1e85c79..aec9c92250fd 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -3245,10 +3245,9 @@ static unsigned int snd_pcm_capture_poll(struct file *file, poll_table * wait) /* * mmap status record */ -static int snd_pcm_mmap_status_fault(struct vm_area_struct *area, - struct vm_fault *vmf) +static int snd_pcm_mmap_status_fault(struct vm_fault *vmf) { - struct snd_pcm_substream *substream = area->vm_private_data; + struct snd_pcm_substream *substream = vmf->vma->vm_private_data; struct snd_pcm_runtime *runtime; if (substream == NULL) @@ -3282,10 +3281,9 @@ static int snd_pcm_mmap_status(struct snd_pcm_substream *substream, struct file /* * mmap control record */ -static int snd_pcm_mmap_control_fault(struct vm_area_struct *area, - struct vm_fault *vmf) +static int snd_pcm_mmap_control_fault(struct vm_fault *vmf) { - struct snd_pcm_substream *substream = area->vm_private_data; + struct snd_pcm_substream *substream = vmf->vma->vm_private_data; struct snd_pcm_runtime *runtime; if (substream == NULL) @@ -3341,10 +3339,9 @@ snd_pcm_default_page_ops(struct snd_pcm_substream *substream, unsigned long ofs) /* * fault callback for mmapping a RAM page */ -static int snd_pcm_mmap_data_fault(struct vm_area_struct *area, - struct vm_fault *vmf) +static int snd_pcm_mmap_data_fault(struct vm_fault *vmf) { - struct snd_pcm_substream *substream = area->vm_private_data; + struct snd_pcm_substream *substream = vmf->vma->vm_private_data; struct snd_pcm_runtime *runtime; unsigned long offset; struct page * page; diff --git a/sound/usb/usx2y/us122l.c b/sound/usb/usx2y/us122l.c index cf5dc33f4a6d..cf45bf1f7ee0 100644 --- a/sound/usb/usx2y/us122l.c +++ b/sound/usb/usx2y/us122l.c @@ -137,13 +137,12 @@ static void usb_stream_hwdep_vm_open(struct vm_area_struct *area) snd_printdd(KERN_DEBUG "%i\n", atomic_read(&us122l->mmap_count)); } -static int usb_stream_hwdep_vm_fault(struct vm_area_struct *area, - struct vm_fault *vmf) +static int usb_stream_hwdep_vm_fault(struct vm_fault *vmf) { unsigned long offset; struct page *page; void *vaddr; - struct us122l *us122l = area->vm_private_data; + struct us122l *us122l = vmf->vma->vm_private_data; struct usb_stream *s; mutex_lock(&us122l->mutex); diff --git a/sound/usb/usx2y/usX2Yhwdep.c b/sound/usb/usx2y/usX2Yhwdep.c index 0b34dbc8f302..605e1047c01d 100644 --- a/sound/usb/usx2y/usX2Yhwdep.c +++ b/sound/usb/usx2y/usX2Yhwdep.c @@ -31,19 +31,18 @@ #include "usbusx2y.h" #include "usX2Yhwdep.h" -static int snd_us428ctls_vm_fault(struct vm_area_struct *area, - struct vm_fault *vmf) +static int snd_us428ctls_vm_fault(struct vm_fault *vmf) { unsigned long offset; struct page * page; void *vaddr; snd_printdd("ENTER, start %lXh, pgoff %ld\n", - area->vm_start, + vmf->vma->vm_start, vmf->pgoff); offset = vmf->pgoff << PAGE_SHIFT; - vaddr = (char*)((struct usX2Ydev *)area->vm_private_data)->us428ctls_sharedmem + offset; + vaddr = (char *)((struct usX2Ydev *)vmf->vma->vm_private_data)->us428ctls_sharedmem + offset; page = virt_to_page(vaddr); get_page(page); vmf->page = page; diff --git a/sound/usb/usx2y/usx2yhwdeppcm.c b/sound/usb/usx2y/usx2yhwdeppcm.c index 90766a92e7fd..f95164b91152 100644 --- a/sound/usb/usx2y/usx2yhwdeppcm.c +++ b/sound/usb/usx2y/usx2yhwdeppcm.c @@ -652,14 +652,13 @@ static void snd_usX2Y_hwdep_pcm_vm_close(struct vm_area_struct *area) } -static int snd_usX2Y_hwdep_pcm_vm_fault(struct vm_area_struct *area, - struct vm_fault *vmf) +static int snd_usX2Y_hwdep_pcm_vm_fault(struct vm_fault *vmf) { unsigned long offset; void *vaddr; offset = vmf->pgoff << PAGE_SHIFT; - vaddr = (char*)((struct usX2Ydev *)area->vm_private_data)->hwdep_pcm_shm + offset; + vaddr = (char *)((struct usX2Ydev *)vmf->vma->vm_private_data)->hwdep_pcm_shm + offset; vmf->page = virt_to_page(vaddr); get_page(vmf->page); return 0; diff --git a/tools/lib/find_bit.c b/tools/lib/find_bit.c index 6d8b8f22cf55..42c15f906aac 100644 --- a/tools/lib/find_bit.c +++ b/tools/lib/find_bit.c @@ -34,7 +34,7 @@ static unsigned long _find_next_bit(const unsigned long *addr, { unsigned long tmp; - if (!nbits || start >= nbits) + if (unlikely(start >= nbits)) return nbits; tmp = addr[start / BITS_PER_LONG] ^ invert; diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 983140e68661..222ee457febf 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -11,6 +11,8 @@ TEST_GEN_FILES += on-fault-limit TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += userfaultfd +TEST_GEN_FILES += userfaultfd_hugetlb +TEST_GEN_FILES += userfaultfd_shmem TEST_GEN_FILES += mlock-random-test TEST_PROGS := run_vmtests @@ -18,6 +20,8 @@ TEST_PROGS := run_vmtests include ../lib.mk $(OUTPUT)/userfaultfd: LDLIBS += -lpthread ../../../../usr/include/linux/kernel.h +$(OUTPUT)/userfaultfd_hugetlb: LDLIBS += -lpthread ../../../../usr/include/linux/kernel.h +$(OUTPUT)/userfaultfd_shmem: LDLIBS += -lpthread ../../../../usr/include/linux/kernel.h $(OUTPUT)/mlock-random-test: LDLIBS += -lcap ../../../../usr/include/linux/kernel.h: diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests index e11968b3677e..c92f6cf31d0a 100755 --- a/tools/testing/selftests/vm/run_vmtests +++ b/tools/testing/selftests/vm/run_vmtests @@ -103,6 +103,30 @@ else echo "[PASS]" fi +echo "----------------------------" +echo "running userfaultfd_hugetlb" +echo "----------------------------" +# 258MB total huge pages == 128MB src and 128MB dst +./userfaultfd_hugetlb 128 32 $mnt/ufd_test_file +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi +rm -f $mnt/ufd_test_file + +echo "----------------------------" +echo "running userfaultfd_shmem" +echo "----------------------------" +./userfaultfd_shmem 128 32 +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + #cleanup umount $mnt rm -rf $mnt diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index d77ed41b2094..e9449c801888 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -63,6 +63,7 @@ #include <sys/mman.h> #include <sys/syscall.h> #include <sys/ioctl.h> +#include <sys/wait.h> #include <pthread.h> #include <linux/userfaultfd.h> @@ -76,8 +77,12 @@ static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size; #define BOUNCE_POLL (1<<3) static int bounces; +#ifdef HUGETLB_TEST +static int huge_fd; +static char *huge_fd_off0; +#endif static unsigned long long *count_verify; -static int uffd, finished, *pipefd; +static int uffd, uffd_flags, finished, *pipefd; static char *area_src, *area_dst; static char *zeropage; pthread_attr_t attr; @@ -97,6 +102,102 @@ pthread_attr_t attr; ~(unsigned long)(sizeof(unsigned long long) \ - 1))) +#if !defined(HUGETLB_TEST) && !defined(SHMEM_TEST) + +/* Anonymous memory */ +#define EXPECTED_IOCTLS ((1 << _UFFDIO_WAKE) | \ + (1 << _UFFDIO_COPY) | \ + (1 << _UFFDIO_ZEROPAGE)) + +static int release_pages(char *rel_area) +{ + int ret = 0; + + if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED)) { + perror("madvise"); + ret = 1; + } + + return ret; +} + +static void allocate_area(void **alloc_area) +{ + if (posix_memalign(alloc_area, page_size, nr_pages * page_size)) { + fprintf(stderr, "out of memory\n"); + *alloc_area = NULL; + } +} + +#else /* HUGETLB_TEST or SHMEM_TEST */ + +#define EXPECTED_IOCTLS UFFD_API_RANGE_IOCTLS_BASIC + +#ifdef HUGETLB_TEST + +/* HugeTLB memory */ +static int release_pages(char *rel_area) +{ + int ret = 0; + + if (fallocate(huge_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + rel_area == huge_fd_off0 ? 0 : + nr_pages * page_size, + nr_pages * page_size)) { + perror("fallocate"); + ret = 1; + } + + return ret; +} + + +static void allocate_area(void **alloc_area) +{ + *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_HUGETLB, huge_fd, + *alloc_area == area_src ? 0 : + nr_pages * page_size); + if (*alloc_area == MAP_FAILED) { + fprintf(stderr, "mmap of hugetlbfs file failed\n"); + *alloc_area = NULL; + } + + if (*alloc_area == area_src) + huge_fd_off0 = *alloc_area; +} + +#elif defined(SHMEM_TEST) + +/* Shared memory */ +static int release_pages(char *rel_area) +{ + int ret = 0; + + if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE)) { + perror("madvise"); + ret = 1; + } + + return ret; +} + +static void allocate_area(void **alloc_area) +{ + *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_SHARED, -1, 0); + if (*alloc_area == MAP_FAILED) { + fprintf(stderr, "shared memory mmap failed\n"); + *alloc_area = NULL; + } +} + +#else /* SHMEM_TEST */ +#error "Undefined test type" +#endif /* HUGETLB_TEST */ + +#endif /* !defined(HUGETLB_TEST) && !defined(SHMEM_TEST) */ + static int my_bcmp(char *str1, char *str2, size_t n) { unsigned long i; @@ -217,7 +318,7 @@ static void *locking_thread(void *arg) return NULL; } -static int copy_page(unsigned long offset) +static int copy_page(int ufd, unsigned long offset) { struct uffdio_copy uffdio_copy; @@ -229,7 +330,7 @@ static int copy_page(unsigned long offset) uffdio_copy.len = page_size; uffdio_copy.mode = 0; uffdio_copy.copy = 0; - if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy)) { + if (ioctl(ufd, UFFDIO_COPY, &uffdio_copy)) { /* real retval in ufdio_copy.copy */ if (uffdio_copy.copy != -EEXIST) fprintf(stderr, "UFFDIO_COPY error %Ld\n", @@ -247,6 +348,7 @@ static void *uffd_poll_thread(void *arg) unsigned long cpu = (unsigned long) arg; struct pollfd pollfd[2]; struct uffd_msg msg; + struct uffdio_register uffd_reg; int ret; unsigned long offset; char tmp_chr; @@ -278,16 +380,35 @@ static void *uffd_poll_thread(void *arg) continue; perror("nonblocking read error"), exit(1); } - if (msg.event != UFFD_EVENT_PAGEFAULT) + switch (msg.event) { + default: fprintf(stderr, "unexpected msg event %u\n", msg.event), exit(1); - if (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) - fprintf(stderr, "unexpected write fault\n"), exit(1); - offset = (char *)(unsigned long)msg.arg.pagefault.address - - area_dst; - offset &= ~(page_size-1); - if (copy_page(offset)) - userfaults++; + break; + case UFFD_EVENT_PAGEFAULT: + if (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) + fprintf(stderr, "unexpected write fault\n"), exit(1); + offset = (char *)(unsigned long)msg.arg.pagefault.address - + area_dst; + offset &= ~(page_size-1); + if (copy_page(uffd, offset)) + userfaults++; + break; + case UFFD_EVENT_FORK: + uffd = msg.arg.fork.ufd; + pollfd[0].fd = uffd; + break; + case UFFD_EVENT_REMOVE: + uffd_reg.range.start = msg.arg.remove.start; + uffd_reg.range.len = msg.arg.remove.end - + msg.arg.remove.start; + if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range)) + fprintf(stderr, "remove failure\n"), exit(1); + break; + case UFFD_EVENT_REMAP: + area_dst = (char *)(unsigned long)msg.arg.remap.to; + break; + } } return (void *)userfaults; } @@ -324,7 +445,7 @@ static void *uffd_read_thread(void *arg) offset = (char *)(unsigned long)msg.arg.pagefault.address - area_dst; offset &= ~(page_size-1); - if (copy_page(offset)) + if (copy_page(uffd, offset)) (*this_cpu_userfaults)++; } return (void *)NULL; @@ -338,7 +459,7 @@ static void *background_thread(void *arg) for (page_nr = cpu * nr_pages_per_cpu; page_nr < (cpu+1) * nr_pages_per_cpu; page_nr++) - copy_page(page_nr * page_size); + copy_page(uffd, page_nr * page_size); return NULL; } @@ -384,10 +505,8 @@ static int stress(unsigned long *userfaults) * UFFDIO_COPY without writing zero pages into area_dst * because the background threads already completed). */ - if (madvise(area_src, nr_pages * page_size, MADV_DONTNEED)) { - perror("madvise"); + if (release_pages(area_src)) return 1; - } for (cpu = 0; cpu < nr_cpus; cpu++) { char c; @@ -414,27 +533,9 @@ static int stress(unsigned long *userfaults) return 0; } -static int userfaultfd_stress(void) +static int userfaultfd_open(int features) { - void *area; - char *tmp_area; - unsigned long nr; - struct uffdio_register uffdio_register; struct uffdio_api uffdio_api; - unsigned long cpu; - int uffd_flags, err; - unsigned long userfaults[nr_cpus]; - - if (posix_memalign(&area, page_size, nr_pages * page_size)) { - fprintf(stderr, "out of memory\n"); - return 1; - } - area_src = area; - if (posix_memalign(&area, page_size, nr_pages * page_size)) { - fprintf(stderr, "out of memory\n"); - return 1; - } - area_dst = area; uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); if (uffd < 0) { @@ -445,7 +546,7 @@ static int userfaultfd_stress(void) uffd_flags = fcntl(uffd, F_GETFD, NULL); uffdio_api.api = UFFD_API; - uffdio_api.features = 0; + uffdio_api.features = features; if (ioctl(uffd, UFFDIO_API, &uffdio_api)) { fprintf(stderr, "UFFDIO_API\n"); return 1; @@ -455,6 +556,233 @@ static int userfaultfd_stress(void) return 1; } + return 0; +} + +/* + * For non-cooperative userfaultfd test we fork() a process that will + * generate pagefaults, will mremap the area monitored by the + * userfaultfd and at last this process will release the monitored + * area. + * For the anonymous and shared memory the area is divided into two + * parts, the first part is accessed before mremap, and the second + * part is accessed after mremap. Since hugetlbfs does not support + * mremap, the entire monitored area is accessed in a single pass for + * HUGETLB_TEST. + * The release of the pages currently generates event for shmem and + * anonymous memory (UFFD_EVENT_REMOVE), hence it is not checked + * for hugetlb. + */ +static int faulting_process(void) +{ + unsigned long nr; + unsigned long long count; + +#ifndef HUGETLB_TEST + unsigned long split_nr_pages = (nr_pages + 1) / 2; +#else + unsigned long split_nr_pages = nr_pages; +#endif + + for (nr = 0; nr < split_nr_pages; nr++) { + count = *area_count(area_dst, nr); + if (count != count_verify[nr]) { + fprintf(stderr, + "nr %lu memory corruption %Lu %Lu\n", + nr, count, + count_verify[nr]), exit(1); + } + } + +#ifndef HUGETLB_TEST + area_dst = mremap(area_dst, nr_pages * page_size, nr_pages * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, area_src); + if (area_dst == MAP_FAILED) + perror("mremap"), exit(1); + + for (; nr < nr_pages; nr++) { + count = *area_count(area_dst, nr); + if (count != count_verify[nr]) { + fprintf(stderr, + "nr %lu memory corruption %Lu %Lu\n", + nr, count, + count_verify[nr]), exit(1); + } + } + + if (release_pages(area_dst)) + return 1; + + for (nr = 0; nr < nr_pages; nr++) { + if (my_bcmp(area_dst + nr * page_size, zeropage, page_size)) + fprintf(stderr, "nr %lu is not zero\n", nr), exit(1); + } + +#endif /* HUGETLB_TEST */ + + return 0; +} + +static int uffdio_zeropage(int ufd, unsigned long offset) +{ + struct uffdio_zeropage uffdio_zeropage; + int ret; + unsigned long has_zeropage = EXPECTED_IOCTLS & (1 << _UFFDIO_ZEROPAGE); + + if (offset >= nr_pages * page_size) + fprintf(stderr, "unexpected offset %lu\n", + offset), exit(1); + uffdio_zeropage.range.start = (unsigned long) area_dst + offset; + uffdio_zeropage.range.len = page_size; + uffdio_zeropage.mode = 0; + ret = ioctl(ufd, UFFDIO_ZEROPAGE, &uffdio_zeropage); + if (ret) { + /* real retval in ufdio_zeropage.zeropage */ + if (has_zeropage) { + if (uffdio_zeropage.zeropage == -EEXIST) + fprintf(stderr, "UFFDIO_ZEROPAGE -EEXIST\n"), + exit(1); + else + fprintf(stderr, "UFFDIO_ZEROPAGE error %Ld\n", + uffdio_zeropage.zeropage), exit(1); + } else { + if (uffdio_zeropage.zeropage != -EINVAL) + fprintf(stderr, + "UFFDIO_ZEROPAGE not -EINVAL %Ld\n", + uffdio_zeropage.zeropage), exit(1); + } + } else if (has_zeropage) { + if (uffdio_zeropage.zeropage != page_size) { + fprintf(stderr, "UFFDIO_ZEROPAGE unexpected %Ld\n", + uffdio_zeropage.zeropage), exit(1); + } else + return 1; + } else { + fprintf(stderr, + "UFFDIO_ZEROPAGE succeeded %Ld\n", + uffdio_zeropage.zeropage), exit(1); + } + + return 0; +} + +/* exercise UFFDIO_ZEROPAGE */ +static int userfaultfd_zeropage_test(void) +{ + struct uffdio_register uffdio_register; + unsigned long expected_ioctls; + + printf("testing UFFDIO_ZEROPAGE: "); + fflush(stdout); + + if (release_pages(area_dst)) + return 1; + + if (userfaultfd_open(0) < 0) + return 1; + uffdio_register.range.start = (unsigned long) area_dst; + uffdio_register.range.len = nr_pages * page_size; + uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) + fprintf(stderr, "register failure\n"), exit(1); + + expected_ioctls = EXPECTED_IOCTLS; + if ((uffdio_register.ioctls & expected_ioctls) != + expected_ioctls) + fprintf(stderr, + "unexpected missing ioctl for anon memory\n"), + exit(1); + + if (uffdio_zeropage(uffd, 0)) { + if (my_bcmp(area_dst, zeropage, page_size)) + fprintf(stderr, "zeropage is not zero\n"), exit(1); + } + + close(uffd); + printf("done.\n"); + return 0; +} + +static int userfaultfd_events_test(void) +{ + struct uffdio_register uffdio_register; + unsigned long expected_ioctls; + unsigned long userfaults; + pthread_t uffd_mon; + int err, features; + pid_t pid; + char c; + + printf("testing events (fork, remap, remove): "); + fflush(stdout); + + if (release_pages(area_dst)) + return 1; + + features = UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_EVENT_REMAP | + UFFD_FEATURE_EVENT_REMOVE; + if (userfaultfd_open(features) < 0) + return 1; + fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); + + uffdio_register.range.start = (unsigned long) area_dst; + uffdio_register.range.len = nr_pages * page_size; + uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) + fprintf(stderr, "register failure\n"), exit(1); + + expected_ioctls = EXPECTED_IOCTLS; + if ((uffdio_register.ioctls & expected_ioctls) != + expected_ioctls) + fprintf(stderr, + "unexpected missing ioctl for anon memory\n"), + exit(1); + + if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, NULL)) + perror("uffd_poll_thread create"), exit(1); + + pid = fork(); + if (pid < 0) + perror("fork"), exit(1); + + if (!pid) + return faulting_process(); + + waitpid(pid, &err, 0); + if (err) + fprintf(stderr, "faulting process failed\n"), exit(1); + + if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) + perror("pipe write"), exit(1); + if (pthread_join(uffd_mon, (void **)&userfaults)) + return 1; + + close(uffd); + printf("userfaults: %ld\n", userfaults); + + return userfaults != nr_pages; +} + +static int userfaultfd_stress(void) +{ + void *area; + char *tmp_area; + unsigned long nr; + struct uffdio_register uffdio_register; + unsigned long cpu; + int err; + unsigned long userfaults[nr_cpus]; + + allocate_area((void **)&area_src); + if (!area_src) + return 1; + allocate_area((void **)&area_dst); + if (!area_dst) + return 1; + + if (userfaultfd_open(0) < 0) + return 1; + count_verify = malloc(nr_pages * sizeof(unsigned long long)); if (!count_verify) { perror("count_verify"); @@ -528,9 +856,7 @@ static int userfaultfd_stress(void) fprintf(stderr, "register failure\n"); return 1; } - expected_ioctls = (1 << _UFFDIO_WAKE) | - (1 << _UFFDIO_COPY) | - (1 << _UFFDIO_ZEROPAGE); + expected_ioctls = EXPECTED_IOCTLS; if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) { fprintf(stderr, @@ -562,10 +888,8 @@ static int userfaultfd_stress(void) * MADV_DONTNEED only after the UFFDIO_REGISTER, so it's * required to MADV_DONTNEED here. */ - if (madvise(area_dst, nr_pages * page_size, MADV_DONTNEED)) { - perror("madvise 2"); + if (release_pages(area_dst)) return 1; - } /* bounce pass */ if (stress(userfaults)) @@ -603,9 +927,15 @@ static int userfaultfd_stress(void) printf("\n"); } - return err; + if (err) + return err; + + close(uffd); + return userfaultfd_zeropage_test() || userfaultfd_events_test(); } +#ifndef HUGETLB_TEST + int main(int argc, char **argv) { if (argc < 3) @@ -632,6 +962,74 @@ int main(int argc, char **argv) return userfaultfd_stress(); } +#else /* HUGETLB_TEST */ + +/* + * Copied from mlock2-tests.c + */ +unsigned long default_huge_page_size(void) +{ + unsigned long hps = 0; + char *line = NULL; + size_t linelen = 0; + FILE *f = fopen("/proc/meminfo", "r"); + + if (!f) + return 0; + while (getline(&line, &linelen, f) > 0) { + if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) { + hps <<= 10; + break; + } + } + + free(line); + fclose(f); + return hps; +} + +int main(int argc, char **argv) +{ + if (argc < 4) + fprintf(stderr, "Usage: <MiB> <bounces> <hugetlbfs_file>\n"), + exit(1); + nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); + page_size = default_huge_page_size(); + if (!page_size) + fprintf(stderr, "Unable to determine huge page size\n"), + exit(2); + if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2 + > page_size) + fprintf(stderr, "Impossible to run this test\n"), exit(2); + nr_pages_per_cpu = atol(argv[1]) * 1024*1024 / page_size / + nr_cpus; + if (!nr_pages_per_cpu) { + fprintf(stderr, "invalid MiB\n"); + fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1); + } + bounces = atoi(argv[2]); + if (bounces <= 0) { + fprintf(stderr, "invalid bounces\n"); + fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1); + } + nr_pages = nr_pages_per_cpu * nr_cpus; + huge_fd = open(argv[3], O_CREAT | O_RDWR, 0755); + if (huge_fd < 0) { + fprintf(stderr, "Open of %s failed", argv[3]); + perror("open"); + exit(1); + } + if (ftruncate(huge_fd, 0)) { + fprintf(stderr, "ftruncate %s to size 0 failed", argv[3]); + perror("ftruncate"); + exit(1); + } + printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n", + nr_pages, nr_pages_per_cpu); + return userfaultfd_stress(); +} + +#endif #else /* __NR_userfaultfd */ #warning "missing __NR_userfaultfd definition" diff --git a/tools/vm/Makefile b/tools/vm/Makefile index 93aadaf7ff63..006029456988 100644 --- a/tools/vm/Makefile +++ b/tools/vm/Makefile @@ -9,6 +9,8 @@ CC = $(CROSS_COMPILE)gcc CFLAGS = -Wall -Wextra -I../lib/ LDFLAGS = $(LIBS) +all: $(TARGETS) + $(TARGETS): $(LIBS) $(LIBS): @@ -20,3 +22,9 @@ $(LIBS): clean: $(RM) page-types slabinfo page_owner_sort make -C $(LIB_DIR) clean + +sbindir ?= /usr/sbin + +install: all + install -d $(DESTDIR)$(sbindir) + install -m 755 -p $(TARGETS) $(DESTDIR)$(sbindir) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 482612b4e496..7556e1d56708 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2348,9 +2348,9 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) } EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); -static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int kvm_vcpu_fault(struct vm_fault *vmf) { - struct kvm_vcpu *vcpu = vma->vm_file->private_data; + struct kvm_vcpu *vcpu = vmf->vma->vm_file->private_data; struct page *page; if (vmf->pgoff == 0) |