diff options
-rw-r--r-- | fs/proc/Kconfig | 18 | ||||
-rw-r--r-- | fs/proc/vmcore.c | 156 | ||||
-rw-r--r-- | include/linux/crash_dump.h | 9 |
3 files changed, 183 insertions, 0 deletions
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index d80a1431ef7b..5668620ab34d 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig @@ -61,6 +61,24 @@ config PROC_VMCORE_DEVICE_DUMP as ELF notes to /proc/vmcore. You can still disable device dump using the kernel command line option 'novmcoredd'. +config NEED_PROC_VMCORE_DEVICE_RAM + bool + +config PROC_VMCORE_DEVICE_RAM + def_bool y + depends on PROC_VMCORE && NEED_PROC_VMCORE_DEVICE_RAM + help + If the elfcore hdr is allocated and prepared by the dump kernel + ("2nd kernel") instead of the crashed kernel, RAM provided by memory + devices such as virtio-mem will not be included in the dump + image, because only the device driver can properly detect them. + + With this config enabled, these RAM ranges will be queried from the + device drivers once the device gets probed, so they can be included + in the crash dump. + + Relevant architectures should select NEED_PROC_VMCORE_DEVICE_RAM. + config PROC_SYSCTL bool "Sysctl support (/proc/sys)" if EXPERT depends on PROC_FS diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 6632d4bc4b05..8d39e238329e 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -79,6 +79,8 @@ static bool vmcore_opened; /* Whether the vmcore is currently open. */ static unsigned int vmcore_open; +static void vmcore_process_device_ram(struct vmcore_cb *cb); + void register_vmcore_cb(struct vmcore_cb *cb) { INIT_LIST_HEAD(&cb->next); @@ -90,6 +92,8 @@ void register_vmcore_cb(struct vmcore_cb *cb) */ if (vmcore_opened) pr_warn_once("Unexpected vmcore callback registration\n"); + if (!vmcore_open && cb->get_device_ram) + vmcore_process_device_ram(cb); mutex_unlock(&vmcore_mutex); } EXPORT_SYMBOL_GPL(register_vmcore_cb); @@ -1535,6 +1539,158 @@ out_err: EXPORT_SYMBOL(vmcore_add_device_dump); #endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */ +#ifdef CONFIG_PROC_VMCORE_DEVICE_RAM +static int vmcore_realloc_elfcore_buffer_elf64(size_t new_size) +{ + char *elfcorebuf_new; + + if (WARN_ON_ONCE(new_size < elfcorebuf_sz)) + return -EINVAL; + if (get_order(elfcorebuf_sz_orig) == get_order(new_size)) { + elfcorebuf_sz_orig = new_size; + return 0; + } + + elfcorebuf_new = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(new_size)); + if (!elfcorebuf_new) + return -ENOMEM; + memcpy(elfcorebuf_new, elfcorebuf, elfcorebuf_sz); + free_pages((unsigned long)elfcorebuf, get_order(elfcorebuf_sz_orig)); + elfcorebuf = elfcorebuf_new; + elfcorebuf_sz_orig = new_size; + return 0; +} + +static void vmcore_reset_offsets_elf64(void) +{ + Elf64_Phdr *phdr_start = (Elf64_Phdr *)(elfcorebuf + sizeof(Elf64_Ehdr)); + loff_t vmcore_off = elfcorebuf_sz + elfnotes_sz; + Elf64_Ehdr *ehdr = (Elf64_Ehdr *)elfcorebuf; + Elf64_Phdr *phdr; + int i; + + for (i = 0, phdr = phdr_start; i < ehdr->e_phnum; i++, phdr++) { + u64 start, end; + + /* + * After merge_note_headers_elf64() we should only have a single + * PT_NOTE entry that starts immediately after elfcorebuf_sz. + */ + if (phdr->p_type == PT_NOTE) { + phdr->p_offset = elfcorebuf_sz; + continue; + } + + start = rounddown(phdr->p_offset, PAGE_SIZE); + end = roundup(phdr->p_offset + phdr->p_memsz, PAGE_SIZE); + phdr->p_offset = vmcore_off + (phdr->p_offset - start); + vmcore_off = vmcore_off + end - start; + } + set_vmcore_list_offsets(elfcorebuf_sz, elfnotes_sz, &vmcore_list); +} + +static int vmcore_add_device_ram_elf64(struct list_head *list, size_t count) +{ + Elf64_Phdr *phdr_start = (Elf64_Phdr *)(elfcorebuf + sizeof(Elf64_Ehdr)); + Elf64_Ehdr *ehdr = (Elf64_Ehdr *)elfcorebuf; + struct vmcore_range *cur; + Elf64_Phdr *phdr; + size_t new_size; + int rc; + + if ((Elf32_Half)(ehdr->e_phnum + count) != ehdr->e_phnum + count) { + pr_err("too many device ram ranges\n"); + return -ENOSPC; + } + + /* elfcorebuf_sz must always cover full pages. */ + new_size = sizeof(Elf64_Ehdr) + + (ehdr->e_phnum + count) * sizeof(Elf64_Phdr); + new_size = roundup(new_size, PAGE_SIZE); + + /* + * Make sure we have sufficient space to include the new PT_LOAD + * entries. + */ + rc = vmcore_realloc_elfcore_buffer_elf64(new_size); + if (rc) { + pr_err("resizing elfcore failed\n"); + return rc; + } + + /* Modify our used elfcore buffer size to cover the new entries. */ + elfcorebuf_sz = new_size; + + /* Fill the added PT_LOAD entries. */ + phdr = phdr_start + ehdr->e_phnum; + list_for_each_entry(cur, list, list) { + WARN_ON_ONCE(!IS_ALIGNED(cur->paddr | cur->size, PAGE_SIZE)); + elfcorehdr_fill_device_ram_ptload_elf64(phdr, cur->paddr, cur->size); + + /* p_offset will be adjusted later. */ + phdr++; + ehdr->e_phnum++; + } + list_splice_tail(list, &vmcore_list); + + /* We changed elfcorebuf_sz and added new entries; reset all offsets. */ + vmcore_reset_offsets_elf64(); + + /* Finally, recalculate the total vmcore size. */ + vmcore_size = get_vmcore_size(elfcorebuf_sz, elfnotes_sz, + &vmcore_list); + proc_vmcore->size = vmcore_size; + return 0; +} + +static void vmcore_process_device_ram(struct vmcore_cb *cb) +{ + unsigned char *e_ident = (unsigned char *)elfcorebuf; + struct vmcore_range *first, *m; + LIST_HEAD(list); + int count; + + /* We only support Elf64 dumps for now. */ + if (WARN_ON_ONCE(e_ident[EI_CLASS] != ELFCLASS64)) { + pr_err("device ram ranges only support Elf64\n"); + return; + } + + if (cb->get_device_ram(cb, &list)) { + pr_err("obtaining device ram ranges failed\n"); + return; + } + count = list_count_nodes(&list); + if (!count) + return; + + /* + * For some reason these ranges are already know? Might happen + * with unusual register->unregister->register sequences; we'll simply + * sanity check using the first range. + */ + first = list_first_entry(&list, struct vmcore_range, list); + list_for_each_entry(m, &vmcore_list, list) { + unsigned long long m_end = m->paddr + m->size; + unsigned long long first_end = first->paddr + first->size; + + if (first->paddr < m_end && m->paddr < first_end) + goto out_free; + } + + /* If adding the mem nodes succeeds, they must not be freed. */ + if (!vmcore_add_device_ram_elf64(&list, count)) + return; +out_free: + vmcore_free_ranges(&list); +} +#else /* !CONFIG_PROC_VMCORE_DEVICE_RAM */ +static void vmcore_process_device_ram(struct vmcore_cb *cb) +{ +} +#endif /* CONFIG_PROC_VMCORE_DEVICE_RAM */ + /* Free all dumps in vmcore device dump list */ static void vmcore_free_device_dumps(void) { diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 5d61c7454fd6..2f2555e6407c 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -20,6 +20,8 @@ extern int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size); extern void elfcorehdr_free(unsigned long long addr); extern ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos); extern ssize_t elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos); +void elfcorehdr_fill_device_ram_ptload_elf64(Elf64_Phdr *phdr, + unsigned long long paddr, unsigned long long size); extern int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from, unsigned long pfn, unsigned long size, pgprot_t prot); @@ -99,6 +101,12 @@ static inline void vmcore_unusable(void) * indicated in the vmcore instead. For example, a ballooned page * contains no data and reading from such a page will cause high * load in the hypervisor. + * @get_device_ram: query RAM ranges that can only be detected by device + * drivers, such as the virtio-mem driver, so they can be included in + * the crash dump on architectures that allocate the elfcore hdr in the dump + * ("2nd") kernel. Indicated RAM ranges may contain holes to reduce the + * total number of ranges; such holes can be detected using the pfn_is_ram + * callback just like for other RAM. * @next: List head to manage registered callbacks internally; initialized by * register_vmcore_cb(). * @@ -109,6 +117,7 @@ static inline void vmcore_unusable(void) */ struct vmcore_cb { bool (*pfn_is_ram)(struct vmcore_cb *cb, unsigned long pfn); + int (*get_device_ram)(struct vmcore_cb *cb, struct list_head *list); struct list_head next; }; extern void register_vmcore_cb(struct vmcore_cb *cb); |