diff options
27 files changed, 291 insertions, 219 deletions
diff --git a/Documentation/x86/boot.rst b/Documentation/x86/boot.rst index fa7ddc0428c8..5325c71ca877 100644 --- a/Documentation/x86/boot.rst +++ b/Documentation/x86/boot.rst @@ -1399,8 +1399,8 @@ must have read/write permission; CS must be __BOOT_CS and DS, ES, SS must be __BOOT_DS; interrupt must be disabled; %rsi must hold the base address of the struct boot_params. -EFI Handover Protocol -===================== +EFI Handover Protocol (deprecated) +================================== This protocol allows boot loaders to defer initialisation to the EFI boot stub. The boot loader is required to load the kernel/initrd(s) @@ -1408,6 +1408,12 @@ from the boot media and jump to the EFI handover protocol entry point which is hdr->handover_offset bytes from the beginning of startup_{32,64}. +The boot loader MUST respect the kernel's PE/COFF metadata when it comes +to section alignment, the memory footprint of the executable image beyond +the size of the file itself, and any other aspect of the PE/COFF header +that may affect correct operation of the image as a PE/COFF binary in the +execution context provided by the EFI firmware. + The function prototype for the handover entry point looks like this:: efi_main(void *handle, efi_system_table_t *table, struct boot_params *bp) @@ -1419,9 +1425,18 @@ UEFI specification. 'bp' is the boot loader-allocated boot params. The boot loader *must* fill out the following fields in bp:: - - hdr.code32_start - hdr.cmd_line_ptr - hdr.ramdisk_image (if applicable) - hdr.ramdisk_size (if applicable) All other fields should be zero. + +NOTE: The EFI Handover Protocol is deprecated in favour of the ordinary PE/COFF + entry point, combined with the LINUX_EFI_INITRD_MEDIA_GUID based initrd + loading protocol (refer to [0] for an example of the bootloader side of + this), which removes the need for any knowledge on the part of the EFI + bootloader regarding the internal representation of boot_params or any + requirements/limitations regarding the placement of the command line + and ramdisk in memory, or the placement of the kernel image itself. + +[0] https://github.com/u-boot/u-boot/commit/ec80b4735a593961fe701cc3a5d717d4739b0fd0 diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index cabdd8f4a248..e8e1c866e413 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -1450,7 +1450,8 @@ ENTRY(efi_enter_kernel) @ running beyond the PoU, and so calling cache_off below from @ inside the PE/COFF loader allocated region is unsafe unless @ we explicitly clean it to the PoC. - adr r0, call_cache_fn @ region of code we will + ARM( adrl r0, call_cache_fn ) + THUMB( adr r0, call_cache_fn ) @ region of code we will adr r1, 0f @ run with MMU off bl cache_clean_flush bl cache_off diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index a0765aa60ea9..1bff55aa2d54 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 generated-y += syscall_table.h generic-y += extable.h -generic-y += hardirq.h generic-y += kvm_para.h generic-y += local64.h generic-y += mcs_spinlock.h diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index b0da5320bcff..624f5d9b0f79 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -20,6 +20,7 @@ #include <linux/mm.h> #include <linux/hyperv.h> #include <linux/slab.h> +#include <linux/kernel.h> #include <linux/cpuhotplug.h> #include <linux/syscore_ops.h> #include <clocksource/hyperv_timer.h> @@ -419,11 +420,14 @@ void hyperv_cleanup(void) } EXPORT_SYMBOL_GPL(hyperv_cleanup); -void hyperv_report_panic(struct pt_regs *regs, long err) +void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die) { static bool panic_reported; u64 guest_id; + if (in_die && !panic_on_oops) + return; + /* * We prefer to report panic on 'die' chain as we have proper * registers to report, but if we miss it (e.g. on BUG()) we need diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index cdcf48d52a12..8391c115c0ec 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -178,8 +178,10 @@ extern void efi_free_boot_services(void); extern pgd_t * __init efi_uv1_memmap_phys_prolog(void); extern void __init efi_uv1_memmap_phys_epilog(pgd_t *save_pgd); +/* kexec external ABI */ struct efi_setup_data { u64 fw_vendor; + u64 __unused; u64 tables; u64 smbios; u64 reserved[8]; diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index caa032ce3fe3..ebf34c7bc8bc 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -227,8 +227,8 @@ static void __init ms_hyperv_init_platform(void) ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES); ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO); - pr_info("Hyper-V: features 0x%x, hints 0x%x\n", - ms_hyperv.features, ms_hyperv.hints); + pr_info("Hyper-V: features 0x%x, hints 0x%x, misc 0x%x\n", + ms_hyperv.features, ms_hyperv.hints, ms_hyperv.misc_features); ms_hyperv.max_vp_index = cpuid_eax(HYPERV_CPUID_IMPLEMENT_LIMITS); ms_hyperv.max_lp_index = cpuid_ebx(HYPERV_CPUID_IMPLEMENT_LIMITS); @@ -263,6 +263,16 @@ static void __init ms_hyperv_init_platform(void) cpuid_eax(HYPERV_CPUID_NESTED_FEATURES); } + /* + * Hyper-V expects to get crash register data or kmsg when + * crash enlightment is available and system crashes. Set + * crash_kexec_post_notifiers to be true to make sure that + * calling crash enlightment interface before running kdump + * kernel. + */ + if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) + crash_kexec_post_notifiers = true; + #ifdef CONFIG_X86_LOCAL_APIC if (ms_hyperv.features & HV_X64_ACCESS_FREQUENCY_MSRS && ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) { diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 211bb9358b73..c5e393f8bb3f 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -202,7 +202,7 @@ virt_to_phys_or_null_size(void *va, unsigned long size) int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) { - unsigned long pfn, text, pf; + unsigned long pfn, text, pf, rodata; struct page *page; unsigned npages; pgd_t *pgd = efi_mm.pgd; @@ -256,7 +256,7 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) efi_scratch.phys_stack = page_to_phys(page + 1); /* stack grows down */ - npages = (__end_rodata_aligned - _text) >> PAGE_SHIFT; + npages = (_etext - _text) >> PAGE_SHIFT; text = __pa(_text); pfn = text >> PAGE_SHIFT; @@ -266,6 +266,14 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) return 1; } + npages = (__end_rodata - __start_rodata) >> PAGE_SHIFT; + rodata = __pa(__start_rodata); + pfn = rodata >> PAGE_SHIFT; + if (kernel_map_pages_in_pgd(pgd, pfn, rodata, npages, pf)) { + pr_err("Failed to map kernel rodata 1:1\n"); + return 1; + } + return 0; } @@ -638,7 +646,7 @@ efi_thunk_set_variable(efi_char16_t *name, efi_guid_t *vendor, phys_vendor = virt_to_phys_or_null(vnd); phys_data = virt_to_phys_or_null_size(data, data_size); - if (!phys_name || !phys_data) + if (!phys_name || (data && !phys_data)) status = EFI_INVALID_PARAMETER; else status = efi_thunk(set_variable, phys_name, phys_vendor, @@ -669,7 +677,7 @@ efi_thunk_set_variable_nonblocking(efi_char16_t *name, efi_guid_t *vendor, phys_vendor = virt_to_phys_or_null(vnd); phys_data = virt_to_phys_or_null_size(data, data_size); - if (!phys_name || !phys_data) + if (!phys_name || (data && !phys_data)) status = EFI_INVALID_PARAMETER; else status = efi_thunk(set_variable, phys_name, phys_vendor, diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index b1af0de2e100..9d2512913d25 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -101,7 +101,7 @@ void cper_print_bits(const char *pfx, unsigned int bits, if (!len) len = snprintf(buf, sizeof(buf), "%s%s", pfx, str); else - len += snprintf(buf+len, sizeof(buf)-len, ", %s", str); + len += scnprintf(buf+len, sizeof(buf)-len, ", %s", str); } if (len) printk("%s\n", buf); diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index cc90a748bcf0..67d26949fd26 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -25,7 +25,7 @@ #define EFI_ALLOC_ALIGN EFI_PAGE_SIZE #endif -#ifdef CONFIG_ARM +#if defined(CONFIG_ARM) || defined(CONFIG_X86) #define __efistub_global __section(.data) #else #define __efistub_global diff --git a/drivers/firmware/efi/libstub/file.c b/drivers/firmware/efi/libstub/file.c index d4c7e5f59d2c..ea66b1f16a79 100644 --- a/drivers/firmware/efi/libstub/file.c +++ b/drivers/firmware/efi/libstub/file.c @@ -29,30 +29,31 @@ */ #define EFI_READ_CHUNK_SIZE SZ_1M +struct finfo { + efi_file_info_t info; + efi_char16_t filename[MAX_FILENAME_SIZE]; +}; + static efi_status_t efi_open_file(efi_file_protocol_t *volume, - efi_char16_t *filename_16, + struct finfo *fi, efi_file_protocol_t **handle, unsigned long *file_size) { - struct { - efi_file_info_t info; - efi_char16_t filename[MAX_FILENAME_SIZE]; - } finfo; efi_guid_t info_guid = EFI_FILE_INFO_ID; efi_file_protocol_t *fh; unsigned long info_sz; efi_status_t status; - status = volume->open(volume, &fh, filename_16, EFI_FILE_MODE_READ, 0); + status = volume->open(volume, &fh, fi->filename, EFI_FILE_MODE_READ, 0); if (status != EFI_SUCCESS) { pr_efi_err("Failed to open file: "); - efi_char16_printk(filename_16); + efi_char16_printk(fi->filename); efi_printk("\n"); return status; } - info_sz = sizeof(finfo); - status = fh->get_info(fh, &info_guid, &info_sz, &finfo); + info_sz = sizeof(struct finfo); + status = fh->get_info(fh, &info_guid, &info_sz, fi); if (status != EFI_SUCCESS) { pr_efi_err("Failed to get file info\n"); fh->close(fh); @@ -60,7 +61,7 @@ static efi_status_t efi_open_file(efi_file_protocol_t *volume, } *handle = fh; - *file_size = finfo.info.file_size; + *file_size = fi->info.file_size; return EFI_SUCCESS; } @@ -146,13 +147,13 @@ static efi_status_t handle_cmdline_files(efi_loaded_image_t *image, alloc_addr = alloc_size = 0; do { - efi_char16_t filename[MAX_FILENAME_SIZE]; + struct finfo fi; unsigned long size; void *addr; offset = find_file_option(cmdline, cmdline_len, optstr, optstr_size, - filename, ARRAY_SIZE(filename)); + fi.filename, ARRAY_SIZE(fi.filename)); if (!offset) break; @@ -166,7 +167,7 @@ static efi_status_t handle_cmdline_files(efi_loaded_image_t *image, return status; } - status = efi_open_file(volume, filename, &file, &size); + status = efi_open_file(volume, &fi, &file, &size); if (status != EFI_SUCCESS) goto err_close_volume; diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 8d3a707789de..05ccb229fb45 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -20,7 +20,7 @@ /* Maximum physical address for 64-bit kernel with 4-level paging */ #define MAXMEM_X86_64_4LEVEL (1ull << 46) -static efi_system_table_t *sys_table; +static efi_system_table_t *sys_table __efistub_global; extern const bool efi_is64; extern u32 image_offset; @@ -392,8 +392,6 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, image_base = efi_table_attr(image, image_base); image_offset = (void *)startup_32 - image_base; - hdr = &((struct boot_params *)image_base)->hdr; - status = efi_allocate_pages(0x4000, (unsigned long *)&boot_params, ULONG_MAX); if (status != EFI_SUCCESS) { efi_printk("Failed to allocate lowmem for boot params\n"); @@ -742,8 +740,15 @@ unsigned long efi_main(efi_handle_t handle, * now use KERNEL_IMAGE_SIZE, which will be 512MiB, the same as what * KASLR uses. * - * Also relocate it if image_offset is zero, i.e. we weren't loaded by - * LoadImage, but we are not aligned correctly. + * Also relocate it if image_offset is zero, i.e. the kernel wasn't + * loaded by LoadImage, but rather by a bootloader that called the + * handover entry. The reason we must always relocate in this case is + * to handle the case of systemd-boot booting a unified kernel image, + * which is a PE executable that contains the bzImage and an initrd as + * COFF sections. The initrd section is placed after the bzImage + * without ensuring that there are at least init_size bytes available + * for the bzImage, and thus the compressed kernel's startup code may + * overwrite the initrd unless it is moved out of the way. */ buffer_start = ALIGN(bzimage_addr - image_offset, @@ -753,8 +758,7 @@ unsigned long efi_main(efi_handle_t handle, if ((buffer_start < LOAD_PHYSICAL_ADDR) || (IS_ENABLED(CONFIG_X86_32) && buffer_end > KERNEL_IMAGE_SIZE) || (IS_ENABLED(CONFIG_X86_64) && buffer_end > MAXMEM_X86_64_4LEVEL) || - (image_offset == 0 && !IS_ALIGNED(bzimage_addr, - hdr->kernel_alignment))) { + (image_offset == 0)) { status = efi_relocate_kernel(&bzimage_addr, hdr->init_size, hdr->init_size, hdr->pref_address, diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 0370364169c4..501c43c5851d 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -839,6 +839,9 @@ void vmbus_initiate_unload(bool crash) { struct vmbus_channel_message_header hdr; + if (xchg(&vmbus_connection.conn_state, DISCONNECTED) == DISCONNECTED) + return; + /* Pre-Win2012R2 hosts don't support reconnect */ if (vmbus_proto_version < VERSION_WIN8_1) return; diff --git a/drivers/hv/hv_debugfs.c b/drivers/hv/hv_debugfs.c index 8a2878573582..ccf752b6659a 100644 --- a/drivers/hv/hv_debugfs.c +++ b/drivers/hv/hv_debugfs.c @@ -11,7 +11,7 @@ #include "hyperv_vmbus.h" -struct dentry *hv_debug_root; +static struct dentry *hv_debug_root; static int hv_debugfs_delay_get(void *data, u64 *val) { diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index f5fa3b3c9baf..70b30e223a57 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -292,7 +292,7 @@ struct vmbus_msginfo { struct list_head msglist_entry; /* The message itself */ - unsigned char msg[0]; + unsigned char msg[]; }; diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 029378c27421..a68bce4d0ddb 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -31,6 +31,7 @@ #include <linux/kdebug.h> #include <linux/efi.h> #include <linux/random.h> +#include <linux/kernel.h> #include <linux/syscore_ops.h> #include <clocksource/hyperv_timer.h> #include "hyperv_vmbus.h" @@ -48,14 +49,35 @@ static int hyperv_cpuhp_online; static void *hv_panic_page; +/* + * Boolean to control whether to report panic messages over Hyper-V. + * + * It can be set via /proc/sys/kernel/hyperv/record_panic_msg + */ +static int sysctl_record_panic_msg = 1; + +static int hyperv_report_reg(void) +{ + return !sysctl_record_panic_msg || !hv_panic_page; +} + static int hyperv_panic_event(struct notifier_block *nb, unsigned long val, void *args) { struct pt_regs *regs; - regs = current_pt_regs(); + vmbus_initiate_unload(true); - hyperv_report_panic(regs, val); + /* + * Hyper-V should be notified only once about a panic. If we will be + * doing hyperv_report_panic_msg() later with kmsg data, don't do + * the notification here. + */ + if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE + && hyperv_report_reg()) { + regs = current_pt_regs(); + hyperv_report_panic(regs, val, false); + } return NOTIFY_DONE; } @@ -65,7 +87,13 @@ static int hyperv_die_event(struct notifier_block *nb, unsigned long val, struct die_args *die = (struct die_args *)args; struct pt_regs *regs = die->regs; - hyperv_report_panic(regs, val); + /* + * Hyper-V should be notified only once about a panic. If we will be + * doing hyperv_report_panic_msg() later with kmsg data, don't do + * the notification here. + */ + if (hyperv_report_reg()) + hyperv_report_panic(regs, val, true); return NOTIFY_DONE; } @@ -1253,13 +1281,6 @@ static void vmbus_isr(void) } /* - * Boolean to control whether to report panic messages over Hyper-V. - * - * It can be set via /proc/sys/kernel/hyperv/record_panic_msg - */ -static int sysctl_record_panic_msg = 1; - -/* * Callback from kmsg_dump. Grab as much as possible from the end of the kmsg * buffer and call into Hyper-V to transfer the data. */ @@ -1382,19 +1403,29 @@ static int vmbus_bus_init(void) hv_panic_page = (void *)hv_alloc_hyperv_zeroed_page(); if (hv_panic_page) { ret = kmsg_dump_register(&hv_kmsg_dumper); - if (ret) + if (ret) { pr_err("Hyper-V: kmsg dump register " "error 0x%x\n", ret); + hv_free_hyperv_page( + (unsigned long)hv_panic_page); + hv_panic_page = NULL; + } } else pr_err("Hyper-V: panic message page memory " "allocation failed"); } register_die_notifier(&hyperv_die_block); - atomic_notifier_chain_register(&panic_notifier_list, - &hyperv_panic_block); } + /* + * Always register the panic notifier because we need to unload + * the VMbus channel connection to prevent any VMbus + * activity after the VM panics. + */ + atomic_notifier_chain_register(&panic_notifier_list, + &hyperv_panic_block); + vmbus_request_offers(); return 0; @@ -1407,7 +1438,6 @@ err_alloc: hv_remove_vmbus_irq(); bus_unregister(&hv_bus); - hv_free_hyperv_page((unsigned long)hv_panic_page); unregister_sysctl_table(hv_ctl_table_hdr); hv_ctl_table_hdr = NULL; return ret; @@ -2204,8 +2234,6 @@ static int vmbus_bus_suspend(struct device *dev) vmbus_initiate_unload(false); - vmbus_connection.conn_state = DISCONNECTED; - /* Reset the event for the next resume. */ reinit_completion(&vmbus_connection.ready_for_resume_event); @@ -2289,7 +2317,6 @@ static void hv_kexec_handler(void) { hv_stimer_global_cleanup(); vmbus_initiate_unload(false); - vmbus_connection.conn_state = DISCONNECTED; /* Make sure conn_state is set as hv_synic_cleanup checks for it */ mb(); cpuhp_remove_state(hyperv_cpuhp_online); @@ -2306,7 +2333,6 @@ static void hv_crash_handler(struct pt_regs *regs) * doing the cleanup for current CPU only. This should be sufficient * for kdump. */ - vmbus_connection.conn_state = DISCONNECTED; cpu = smp_processor_id(); hv_stimer_cleanup(cpu); hv_synic_disable_regs(cpu); diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 5c794f4b051a..d1e1caa23c8b 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -1032,7 +1032,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) struct dentry *parent; struct inode *inode; struct key *key; - afs_dataversion_t dir_version; + afs_dataversion_t dir_version, invalid_before; long de_version; int ret; @@ -1084,8 +1084,8 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) if (de_version == (long)dir_version) goto out_valid_noupdate; - dir_version = dir->invalid_before; - if (de_version - (long)dir_version >= 0) + invalid_before = dir->invalid_before; + if (de_version - (long)invalid_before >= 0) goto out_valid; _debug("dir modified"); @@ -1275,6 +1275,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) struct afs_fs_cursor fc; struct afs_vnode *dvnode = AFS_FS_I(dir); struct key *key; + afs_dataversion_t data_version; int ret; mode |= S_IFDIR; @@ -1295,7 +1296,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { - afs_dataversion_t data_version = dvnode->status.data_version + 1; + data_version = dvnode->status.data_version + 1; while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(dvnode); @@ -1316,10 +1317,14 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) goto error_key; } - if (ret == 0 && - test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) - afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid, - afs_edit_dir_for_create); + if (ret == 0) { + down_write(&dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && + dvnode->status.data_version == data_version) + afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid, + afs_edit_dir_for_create); + up_write(&dvnode->validate_lock); + } key_put(key); kfree(scb); @@ -1360,6 +1365,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) struct afs_fs_cursor fc; struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL; struct key *key; + afs_dataversion_t data_version; int ret; _enter("{%llx:%llu},{%pd}", @@ -1391,7 +1397,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { - afs_dataversion_t data_version = dvnode->status.data_version + 1; + data_version = dvnode->status.data_version + 1; while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(dvnode); @@ -1404,9 +1410,12 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) ret = afs_end_vnode_operation(&fc); if (ret == 0) { afs_dir_remove_subdir(dentry); - if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + down_write(&dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && + dvnode->status.data_version == data_version) afs_edit_dir_remove(dvnode, &dentry->d_name, afs_edit_dir_for_rmdir); + up_write(&dvnode->validate_lock); } } @@ -1544,10 +1553,15 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) ret = afs_end_vnode_operation(&fc); if (ret == 0 && !(scb[1].have_status || scb[1].have_error)) ret = afs_dir_remove_link(dvnode, dentry, key); - if (ret == 0 && - test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) - afs_edit_dir_remove(dvnode, &dentry->d_name, - afs_edit_dir_for_unlink); + + if (ret == 0) { + down_write(&dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && + dvnode->status.data_version == data_version) + afs_edit_dir_remove(dvnode, &dentry->d_name, + afs_edit_dir_for_unlink); + up_write(&dvnode->validate_lock); + } } if (need_rehash && ret < 0 && ret != -ENOENT) @@ -1573,6 +1587,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct afs_status_cb *scb; struct afs_vnode *dvnode = AFS_FS_I(dir); struct key *key; + afs_dataversion_t data_version; int ret; mode |= S_IFREG; @@ -1597,7 +1612,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { - afs_dataversion_t data_version = dvnode->status.data_version + 1; + data_version = dvnode->status.data_version + 1; while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(dvnode); @@ -1618,9 +1633,12 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, goto error_key; } - if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + down_write(&dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && + dvnode->status.data_version == data_version) afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid, afs_edit_dir_for_create); + up_write(&dvnode->validate_lock); kfree(scb); key_put(key); @@ -1648,6 +1666,7 @@ static int afs_link(struct dentry *from, struct inode *dir, struct afs_vnode *dvnode = AFS_FS_I(dir); struct afs_vnode *vnode = AFS_FS_I(d_inode(from)); struct key *key; + afs_dataversion_t data_version; int ret; _enter("{%llx:%llu},{%llx:%llu},{%pd}", @@ -1672,7 +1691,7 @@ static int afs_link(struct dentry *from, struct inode *dir, ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { - afs_dataversion_t data_version = dvnode->status.data_version + 1; + data_version = dvnode->status.data_version + 1; if (mutex_lock_interruptible_nested(&vnode->io_lock, 1) < 0) { afs_end_vnode_operation(&fc); @@ -1702,9 +1721,12 @@ static int afs_link(struct dentry *from, struct inode *dir, goto error_key; } - if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + down_write(&dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && + dvnode->status.data_version == data_version) afs_edit_dir_add(dvnode, &dentry->d_name, &vnode->fid, afs_edit_dir_for_link); + up_write(&dvnode->validate_lock); key_put(key); kfree(scb); @@ -1732,6 +1754,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, struct afs_status_cb *scb; struct afs_vnode *dvnode = AFS_FS_I(dir); struct key *key; + afs_dataversion_t data_version; int ret; _enter("{%llx:%llu},{%pd},%s", @@ -1759,7 +1782,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { - afs_dataversion_t data_version = dvnode->status.data_version + 1; + data_version = dvnode->status.data_version + 1; while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(dvnode); @@ -1780,9 +1803,12 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, goto error_key; } - if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + down_write(&dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && + dvnode->status.data_version == data_version) afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid, afs_edit_dir_for_symlink); + up_write(&dvnode->validate_lock); key_put(key); kfree(scb); @@ -1812,6 +1838,8 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, struct dentry *tmp = NULL, *rehash = NULL; struct inode *new_inode; struct key *key; + afs_dataversion_t orig_data_version; + afs_dataversion_t new_data_version; bool new_negative = d_is_negative(new_dentry); int ret; @@ -1890,10 +1918,6 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, orig_dvnode, key, true)) { - afs_dataversion_t orig_data_version; - afs_dataversion_t new_data_version; - struct afs_status_cb *new_scb = &scb[1]; - orig_data_version = orig_dvnode->status.data_version + 1; if (orig_dvnode != new_dvnode) { @@ -1904,7 +1928,6 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, new_data_version = new_dvnode->status.data_version + 1; } else { new_data_version = orig_data_version; - new_scb = &scb[0]; } while (afs_select_fileserver(&fc)) { @@ -1912,7 +1935,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, fc.cb_break_2 = afs_calc_vnode_cb_break(new_dvnode); afs_fs_rename(&fc, old_dentry->d_name.name, new_dvnode, new_dentry->d_name.name, - &scb[0], new_scb); + &scb[0], &scb[1]); } afs_vnode_commit_status(&fc, orig_dvnode, fc.cb_break, @@ -1930,18 +1953,25 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, if (ret == 0) { if (rehash) d_rehash(rehash); - if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags)) - afs_edit_dir_remove(orig_dvnode, &old_dentry->d_name, - afs_edit_dir_for_rename_0); + down_write(&orig_dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags) && + orig_dvnode->status.data_version == orig_data_version) + afs_edit_dir_remove(orig_dvnode, &old_dentry->d_name, + afs_edit_dir_for_rename_0); + if (orig_dvnode != new_dvnode) { + up_write(&orig_dvnode->validate_lock); - if (!new_negative && - test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags)) - afs_edit_dir_remove(new_dvnode, &new_dentry->d_name, - afs_edit_dir_for_rename_1); + down_write(&new_dvnode->validate_lock); + } + if (test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags) && + orig_dvnode->status.data_version == new_data_version) { + if (!new_negative) + afs_edit_dir_remove(new_dvnode, &new_dentry->d_name, + afs_edit_dir_for_rename_1); - if (test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags)) afs_edit_dir_add(new_dvnode, &new_dentry->d_name, &vnode->fid, afs_edit_dir_for_rename_2); + } new_inode = d_inode(new_dentry); if (new_inode) { @@ -1957,14 +1987,10 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, * Note that if we ever implement RENAME_EXCHANGE, we'll have * to update both dentries with opposing dir versions. */ - if (new_dvnode != orig_dvnode) { - afs_update_dentry_version(&fc, old_dentry, &scb[1]); - afs_update_dentry_version(&fc, new_dentry, &scb[1]); - } else { - afs_update_dentry_version(&fc, old_dentry, &scb[0]); - afs_update_dentry_version(&fc, new_dentry, &scb[0]); - } + afs_update_dentry_version(&fc, old_dentry, &scb[1]); + afs_update_dentry_version(&fc, new_dentry, &scb[1]); d_move(old_dentry, new_dentry); + up_write(&new_dvnode->validate_lock); goto error_tmp; } diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c index 361088a5edb9..d94e2b7cddff 100644 --- a/fs/afs/dir_silly.c +++ b/fs/afs/dir_silly.c @@ -21,6 +21,7 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode { struct afs_fs_cursor fc; struct afs_status_cb *scb; + afs_dataversion_t dir_data_version; int ret = -ERESTARTSYS; _enter("%pd,%pd", old, new); @@ -31,7 +32,7 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode trace_afs_silly_rename(vnode, false); if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { - afs_dataversion_t dir_data_version = dvnode->status.data_version + 1; + dir_data_version = dvnode->status.data_version + 1; while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(dvnode); @@ -54,12 +55,15 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode dvnode->silly_key = key_get(key); } - if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + down_write(&dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && + dvnode->status.data_version == dir_data_version) { afs_edit_dir_remove(dvnode, &old->d_name, afs_edit_dir_for_silly_0); - if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) afs_edit_dir_add(dvnode, &new->d_name, &vnode->fid, afs_edit_dir_for_silly_1); + } + up_write(&dvnode->validate_lock); } kfree(scb); @@ -181,10 +185,14 @@ static int afs_do_silly_unlink(struct afs_vnode *dvnode, struct afs_vnode *vnode clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); } } - if (ret == 0 && - test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) - afs_edit_dir_remove(dvnode, &dentry->d_name, - afs_edit_dir_for_unlink); + if (ret == 0) { + down_write(&dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && + dvnode->status.data_version == dir_data_version) + afs_edit_dir_remove(dvnode, &dentry->d_name, + afs_edit_dir_for_unlink); + up_write(&dvnode->validate_lock); + } } kfree(scb); diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 1f9c5d8e6fe5..68fc46634346 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -65,6 +65,7 @@ static int xdr_decode_AFSFetchStatus(const __be32 **_bp, bool inline_error = (call->operation_ID == afs_FS_InlineBulkStatus); u64 data_version, size; u32 type, abort_code; + int ret; abort_code = ntohl(xdr->abort_code); @@ -78,7 +79,7 @@ static int xdr_decode_AFSFetchStatus(const __be32 **_bp, */ status->abort_code = abort_code; scb->have_error = true; - return 0; + goto good; } pr_warn("Unknown AFSFetchStatus version %u\n", ntohl(xdr->if_version)); @@ -87,7 +88,8 @@ static int xdr_decode_AFSFetchStatus(const __be32 **_bp, if (abort_code != 0 && inline_error) { status->abort_code = abort_code; - return 0; + scb->have_error = true; + goto good; } type = ntohl(xdr->type); @@ -123,13 +125,16 @@ static int xdr_decode_AFSFetchStatus(const __be32 **_bp, data_version |= (u64)ntohl(xdr->data_version_hi) << 32; status->data_version = data_version; scb->have_status = true; - +good: + ret = 0; +advance: *_bp = (const void *)*_bp + sizeof(*xdr); - return 0; + return ret; bad: xdr_dump_bad(*_bp); - return afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status); + ret = afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status); + goto advance; } static time64_t xdr_decode_expiry(struct afs_call *call, u32 expiry) @@ -981,16 +986,16 @@ static int afs_deliver_fs_rename(struct afs_call *call) if (ret < 0) return ret; - /* unmarshall the reply once we've received all of it */ + /* If the two dirs are the same, we have two copies of the same status + * report, so we just decode it twice. + */ bp = call->buffer; ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb); if (ret < 0) return ret; - if (call->out_dir_scb != call->out_scb) { - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; - } + ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); + if (ret < 0) + return ret; xdr_decode_AFSVolSync(&bp, call->out_volsync); _leave(" = 0 [done]"); diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c index a26126ac7bf1..b5b45c57e1b1 100644 --- a/fs/afs/yfsclient.c +++ b/fs/afs/yfsclient.c @@ -165,15 +165,15 @@ static void xdr_dump_bad(const __be32 *bp) int i; pr_notice("YFS XDR: Bad status record\n"); - for (i = 0; i < 5 * 4 * 4; i += 16) { + for (i = 0; i < 6 * 4 * 4; i += 16) { memcpy(x, bp, 16); bp += 4; pr_notice("%03x: %08x %08x %08x %08x\n", i, ntohl(x[0]), ntohl(x[1]), ntohl(x[2]), ntohl(x[3])); } - memcpy(x, bp, 4); - pr_notice("0x50: %08x\n", ntohl(x[0])); + memcpy(x, bp, 8); + pr_notice("0x60: %08x %08x\n", ntohl(x[0]), ntohl(x[1])); } /* @@ -186,13 +186,14 @@ static int xdr_decode_YFSFetchStatus(const __be32 **_bp, const struct yfs_xdr_YFSFetchStatus *xdr = (const void *)*_bp; struct afs_file_status *status = &scb->status; u32 type; + int ret; status->abort_code = ntohl(xdr->abort_code); if (status->abort_code != 0) { if (status->abort_code == VNOVNODE) status->nlink = 0; scb->have_error = true; - return 0; + goto good; } type = ntohl(xdr->type); @@ -220,13 +221,16 @@ static int xdr_decode_YFSFetchStatus(const __be32 **_bp, status->size = xdr_to_u64(xdr->size); status->data_version = xdr_to_u64(xdr->data_version); scb->have_status = true; - +good: + ret = 0; +advance: *_bp += xdr_size(xdr); - return 0; + return ret; bad: xdr_dump_bad(*_bp); - return afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status); + ret = afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status); + goto advance; } /* @@ -1153,11 +1157,9 @@ static int yfs_deliver_fs_rename(struct afs_call *call) ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb); if (ret < 0) return ret; - if (call->out_dir_scb != call->out_scb) { - ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; - } + ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); + if (ret < 0) + return ret; xdr_decode_YFSVolSync(&bp, call->out_volsync); _leave(" = 0 [done]"); diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 786849fcc319..47f66c6a7d7f 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -3370,6 +3370,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) space_info->bytes_reserved > 0 || space_info->bytes_may_use > 0)) btrfs_dump_space_info(info, space_info, 0, 0); + WARN_ON(space_info->reclaim_size > 0); list_del(&space_info->list); btrfs_sysfs_remove_space_info(space_info); } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 8a144f9cb7ac..719e68ab552c 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2098,6 +2098,21 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) atomic_inc(&root->log_batch); /* + * If the inode needs a full sync, make sure we use a full range to + * avoid log tree corruption, due to hole detection racing with ordered + * extent completion for adjacent ranges and races between logging and + * completion of ordered extents for adjancent ranges - both races + * could lead to file extent items in the log with overlapping ranges. + * Do this while holding the inode lock, to avoid races with other + * tasks. + */ + if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, + &BTRFS_I(inode)->runtime_flags)) { + start = 0; + end = LLONG_MAX; + } + + /* * Before we acquired the inode's lock, someone may have dirtied more * pages in the target range. We need to make sure that writeback for * any such pages does not start while we are logging the inode, because diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c index d1973141d3bb..040009d1cc31 100644 --- a/fs/btrfs/reflink.c +++ b/fs/btrfs/reflink.c @@ -264,6 +264,7 @@ copy_inline_extent: size); inode_add_bytes(dst, datal); set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(dst)->runtime_flags); + ret = btrfs_inode_set_file_extent_range(BTRFS_I(dst), 0, aligned_end); out: if (!ret && !trans) { /* diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index f65595602aa8..7e362a6935fd 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -611,8 +611,8 @@ static int should_ignore_root(struct btrfs_root *root) if (!reloc_root) return 0; - if (btrfs_root_last_snapshot(&reloc_root->root_item) == - root->fs_info->running_transaction->transid - 1) + if (btrfs_header_generation(reloc_root->commit_root) == + root->fs_info->running_transaction->transid) return 0; /* * if there is reloc tree and it was created in previous diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 8b0fe053a25d..ff17a4420358 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -361,6 +361,16 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info, return 0; } +static void remove_ticket(struct btrfs_space_info *space_info, + struct reserve_ticket *ticket) +{ + if (!list_empty(&ticket->list)) { + list_del_init(&ticket->list); + ASSERT(space_info->reclaim_size >= ticket->bytes); + space_info->reclaim_size -= ticket->bytes; + } +} + /* * This is for space we already have accounted in space_info->bytes_may_use, so * basically when we're returning space from block_rsv's. @@ -388,9 +398,7 @@ again: btrfs_space_info_update_bytes_may_use(fs_info, space_info, ticket->bytes); - list_del_init(&ticket->list); - ASSERT(space_info->reclaim_size >= ticket->bytes); - space_info->reclaim_size -= ticket->bytes; + remove_ticket(space_info, ticket); ticket->bytes = 0; space_info->tickets_id++; wake_up(&ticket->wait); @@ -899,7 +907,7 @@ static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info, btrfs_info(fs_info, "failing ticket with %llu bytes", ticket->bytes); - list_del_init(&ticket->list); + remove_ticket(space_info, ticket); ticket->error = -ENOSPC; wake_up(&ticket->wait); @@ -1063,7 +1071,7 @@ static void wait_reserve_ticket(struct btrfs_fs_info *fs_info, * despite getting an error, resulting in a space leak * (bytes_may_use counter of our space_info). */ - list_del_init(&ticket->list); + remove_ticket(space_info, ticket); ticket->error = -EINTR; break; } @@ -1121,7 +1129,7 @@ static int handle_reserve_ticket(struct btrfs_fs_info *fs_info, * either the async reclaim job deletes the ticket from the list * or we delete it ourselves at wait_reserve_ticket(). */ - list_del_init(&ticket->list); + remove_ticket(space_info, ticket); if (!ret) ret = -ENOSPC; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 58c111474ba5..ec36a7c6ba3d 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -96,8 +96,8 @@ enum { static int btrfs_log_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_inode *inode, int inode_only, - u64 start, - u64 end, + const loff_t start, + const loff_t end, struct btrfs_log_ctx *ctx); static int link_to_fixup_dir(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -4533,15 +4533,13 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, static int btrfs_log_holes(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_inode *inode, - struct btrfs_path *path, - const u64 start, - const u64 end) + struct btrfs_path *path) { struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_key key; const u64 ino = btrfs_ino(inode); const u64 i_size = i_size_read(&inode->vfs_inode); - u64 prev_extent_end = start; + u64 prev_extent_end = 0; int ret; if (!btrfs_fs_incompat(fs_info, NO_HOLES) || i_size == 0) @@ -4549,21 +4547,14 @@ static int btrfs_log_holes(struct btrfs_trans_handle *trans, key.objectid = ino; key.type = BTRFS_EXTENT_DATA_KEY; - key.offset = start; + key.offset = 0; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) return ret; - if (ret > 0 && path->slots[0] > 0) { - btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0] - 1); - if (key.objectid == ino && key.type == BTRFS_EXTENT_DATA_KEY) - path->slots[0]--; - } - while (true) { struct extent_buffer *leaf = path->nodes[0]; - u64 extent_end; if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { ret = btrfs_next_leaf(root, path); @@ -4580,18 +4571,9 @@ static int btrfs_log_holes(struct btrfs_trans_handle *trans, if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) break; - extent_end = btrfs_file_extent_end(path); - if (extent_end <= start) - goto next_slot; - /* We have a hole, log it. */ if (prev_extent_end < key.offset) { - u64 hole_len; - - if (key.offset >= end) - hole_len = end - prev_extent_end; - else - hole_len = key.offset - prev_extent_end; + const u64 hole_len = key.offset - prev_extent_end; /* * Release the path to avoid deadlocks with other code @@ -4621,20 +4603,16 @@ static int btrfs_log_holes(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; } - prev_extent_end = min(extent_end, end); - if (extent_end >= end) - break; -next_slot: + prev_extent_end = btrfs_file_extent_end(path); path->slots[0]++; cond_resched(); } - if (prev_extent_end < end && prev_extent_end < i_size) { + if (prev_extent_end < i_size) { u64 hole_len; btrfs_release_path(path); - hole_len = min(ALIGN(i_size, fs_info->sectorsize), end); - hole_len -= prev_extent_end; + hole_len = ALIGN(i_size - prev_extent_end, fs_info->sectorsize); ret = btrfs_insert_file_extent(trans, root->log_root, ino, prev_extent_end, 0, 0, hole_len, 0, hole_len, @@ -4971,8 +4949,6 @@ static int copy_inode_items_to_log(struct btrfs_trans_handle *trans, const u64 logged_isize, const bool recursive_logging, const int inode_only, - const u64 start, - const u64 end, struct btrfs_log_ctx *ctx, bool *need_log_inode_item) { @@ -4981,21 +4957,6 @@ static int copy_inode_items_to_log(struct btrfs_trans_handle *trans, int ins_nr = 0; int ret; - /* - * We must make sure we don't copy extent items that are entirely out of - * the range [start, end - 1]. This is not just an optimization to avoid - * copying but also needed to avoid a corruption where we end up with - * file extent items in the log tree that have overlapping ranges - this - * can happen if we race with ordered extent completion for ranges that - * are outside our target range. For example we copy an extent item and - * when we move to the next leaf, that extent was trimmed and a new one - * covering a subrange of it, but with a higher key, was inserted - we - * would then copy this other extent too, resulting in a log tree with - * 2 extent items that represent overlapping ranges. - * - * We can copy the entire extents at the range bondaries however, even - * if they cover an area outside the target range. That's ok. - */ while (1) { ret = btrfs_search_forward(root, min_key, path, trans->transid); if (ret < 0) @@ -5063,29 +5024,6 @@ again: goto next_slot; } - if (min_key->type == BTRFS_EXTENT_DATA_KEY) { - const u64 extent_end = btrfs_file_extent_end(path); - - if (extent_end <= start) { - if (ins_nr > 0) { - ret = copy_items(trans, inode, dst_path, - path, ins_start_slot, - ins_nr, inode_only, - logged_isize); - if (ret < 0) - return ret; - ins_nr = 0; - } - goto next_slot; - } - if (extent_end >= end) { - ins_nr++; - if (ins_nr == 1) - ins_start_slot = path->slots[0]; - break; - } - } - if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { ins_nr++; goto next_slot; @@ -5151,8 +5089,8 @@ next_key: static int btrfs_log_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_inode *inode, int inode_only, - u64 start, - u64 end, + const loff_t start, + const loff_t end, struct btrfs_log_ctx *ctx) { struct btrfs_fs_info *fs_info = root->fs_info; @@ -5180,9 +5118,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, return -ENOMEM; } - start = ALIGN_DOWN(start, fs_info->sectorsize); - end = ALIGN(end, fs_info->sectorsize); - min_key.objectid = ino; min_key.type = BTRFS_INODE_ITEM_KEY; min_key.offset = 0; @@ -5298,8 +5233,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, err = copy_inode_items_to_log(trans, inode, &min_key, &max_key, path, dst_path, logged_isize, - recursive_logging, inode_only, - start, end, ctx, &need_log_inode_item); + recursive_logging, inode_only, ctx, + &need_log_inode_item); if (err) goto out_unlock; @@ -5312,7 +5247,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) { btrfs_release_path(path); btrfs_release_path(dst_path); - err = btrfs_log_holes(trans, root, inode, path, start, end); + err = btrfs_log_holes(trans, root, inode, path); if (err) goto out_unlock; } diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index b3f1082cc435..1c4fd950f091 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -163,7 +163,7 @@ static inline int cpumask_to_vpset(struct hv_vpset *vpset, return nr_bank; } -void hyperv_report_panic(struct pt_regs *regs, long err); +void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die); void hyperv_report_panic_msg(phys_addr_t pa, size_t size); bool hv_is_hyperv_initialized(void); bool hv_is_hibernation_supported(void); diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 8134924cfc17..e6b6cb0f8bc6 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -36,12 +36,10 @@ struct btrfs_ioctl_vol_args { #define BTRFS_DEVICE_PATH_NAME_MAX 1024 #define BTRFS_SUBVOL_NAME_MAX 4039 -/* - * Deprecated since 5.7: - * - * BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) - */ - +#ifndef __KERNEL__ +/* Deprecated since 5.7 */ +# define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) +#endif #define BTRFS_SUBVOL_RDONLY (1ULL << 1) #define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2) |