diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2016-12-20 11:54:45 +1100 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2016-12-20 11:54:45 +1100 |
commit | 09fc92cf819f627c5c6c2fe4b8921950d9f81275 (patch) | |
tree | 9d43e3644f926bc169ab2a46945d6d61a8263247 | |
parent | b8317fd612ceb1d118275e92f66ded6fdf0a2023 (diff) | |
parent | e1f8c36b6eb3b4b793064ecd4f440c02e7fdbe00 (diff) |
Merge branch 'akpm-current/current'
57 files changed, 1913 insertions, 293 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index be2d6d0a03a4..21e2d8863705 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1441,6 +1441,10 @@ The builtin appraise policy appraises all files owned by uid=0. + ima_canonical_fmt [IMA] + Use the canonical format for the binary runtime + measurements, instead of host native format. + ima_hash= [IMA] Format: { md5 | sha1 | rmd160 | sha256 | sha384 | sha512 | ... } diff --git a/arch/Kconfig b/arch/Kconfig index 19483aea4bbc..99839c23d453 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -5,6 +5,9 @@ config KEXEC_CORE bool +config HAVE_IMA_KEXEC + bool + config OPROFILE tristate "OProfile system profiling" depends on PROFILING diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h index 4355f0ec44d6..f98baaec0a15 100644 --- a/arch/arm/include/asm/page.h +++ b/arch/arm/include/asm/page.h @@ -17,6 +17,8 @@ #ifndef __ASSEMBLY__ +#include <linux/personality.h> /* For READ_IMPLIES_EXEC */ + #ifndef CONFIG_MMU #include <asm/page-nommu.h> diff --git a/arch/arm64/include/asm/setup.h b/arch/arm64/include/asm/setup.h new file mode 100644 index 000000000000..e7b59b930ce0 --- /dev/null +++ b/arch/arm64/include/asm/setup.h @@ -0,0 +1,19 @@ +/* + * arch/arm64/include/asm/setup.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __ASM_SETUP_H +#define __ASM_SETUP_H + +#include <uapi/asm/setup.h> + +static inline unsigned long kaslr_offset(void) +{ + return kimage_vaddr - KIMAGE_VADDR; +} + +#endif diff --git a/arch/arm64/include/uapi/asm/setup.h b/arch/arm64/include/uapi/asm/setup.h index 9cf2e46fbbdf..26631c8f0abc 100644 --- a/arch/arm64/include/uapi/asm/setup.h +++ b/arch/arm64/include/uapi/asm/setup.h @@ -16,8 +16,8 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#ifndef __ASM_SETUP_H -#define __ASM_SETUP_H +#ifndef _UAPI__ASM_SETUP_H +#define _UAPI__ASM_SETUP_H #include <linux/types.h> diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index a53f52ac81c6..b051367e2149 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -338,11 +338,11 @@ subsys_initcall(topology_init); static int dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) { - u64 const kaslr_offset = kimage_vaddr - KIMAGE_VADDR; + const unsigned long offset = kaslr_offset(); - if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset > 0) { - pr_emerg("Kernel Offset: 0x%llx from 0x%lx\n", - kaslr_offset, KIMAGE_VADDR); + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && offset > 0) { + pr_emerg("Kernel Offset: 0x%lx from 0x%lx\n", + offset, KIMAGE_VADDR); } else { pr_emerg("Kernel Offset: disabled\n"); } diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 3da87e198878..a8ee573fe610 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -469,6 +469,7 @@ config KEXEC config KEXEC_FILE bool "kexec file based system call" select KEXEC_CORE + select HAVE_IMA_KEXEC select BUILD_BIN2C depends on PPC64 depends on CRYPTO=y diff --git a/arch/powerpc/include/asm/ima.h b/arch/powerpc/include/asm/ima.h new file mode 100644 index 000000000000..2313bdface34 --- /dev/null +++ b/arch/powerpc/include/asm/ima.h @@ -0,0 +1,29 @@ +#ifndef _ASM_POWERPC_IMA_H +#define _ASM_POWERPC_IMA_H + +struct kimage; + +int ima_get_kexec_buffer(void **addr, size_t *size); +int ima_free_kexec_buffer(void); + +#ifdef CONFIG_IMA +void remove_ima_buffer(void *fdt, int chosen_node); +#else +static inline void remove_ima_buffer(void *fdt, int chosen_node) {} +#endif + +#ifdef CONFIG_IMA_KEXEC +int arch_ima_add_kexec_buffer(struct kimage *image, unsigned long load_addr, + size_t size); + +int setup_ima_buffer(const struct kimage *image, void *fdt, int chosen_node); +#else +static inline int setup_ima_buffer(const struct kimage *image, void *fdt, + int chosen_node) +{ + remove_ima_buffer(fdt, chosen_node); + return 0; +} +#endif /* CONFIG_IMA_KEXEC */ + +#endif /* _ASM_POWERPC_IMA_H */ diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 6c3b71502fbc..25668bc8cb2a 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -94,11 +94,22 @@ static inline bool kdump_in_progress(void) #ifdef CONFIG_KEXEC_FILE extern struct kexec_file_ops kexec_elf64_ops; +#ifdef CONFIG_IMA_KEXEC +#define ARCH_HAS_KIMAGE_ARCH + +struct kimage_arch { + phys_addr_t ima_buffer_addr; + size_t ima_buffer_size; +}; +#endif + int setup_purgatory(struct kimage *image, const void *slave_code, const void *fdt, unsigned long kernel_load_addr, unsigned long fdt_load_addr); -int setup_new_fdt(void *fdt, unsigned long initrd_load_addr, - unsigned long initrd_len, const char *cmdline); +int setup_new_fdt(const struct kimage *image, void *fdt, + unsigned long initrd_load_addr, unsigned long initrd_len, + const char *cmdline); +int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size); #endif /* CONFIG_KEXEC_FILE */ #else /* !CONFIG_KEXEC_CORE */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index a3a6047fd395..23f8082d7bfa 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -112,6 +112,10 @@ obj-$(CONFIG_PCI_MSI) += msi.o obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o crash.o \ machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file_$(BITS).o kexec_elf_$(BITS).o +ifeq ($(CONFIG_HAVE_IMA_KEXEC)$(CONFIG_IMA),yy) +obj-y += ima_kexec.o +endif + obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o diff --git a/arch/powerpc/kernel/ima_kexec.c b/arch/powerpc/kernel/ima_kexec.c new file mode 100644 index 000000000000..5ea42c937ca9 --- /dev/null +++ b/arch/powerpc/kernel/ima_kexec.c @@ -0,0 +1,223 @@ +/* + * Copyright (C) 2016 IBM Corporation + * + * Authors: + * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/slab.h> +#include <linux/kexec.h> +#include <linux/of.h> +#include <linux/memblock.h> +#include <linux/libfdt.h> + +static int get_addr_size_cells(int *addr_cells, int *size_cells) +{ + struct device_node *root; + + root = of_find_node_by_path("/"); + if (!root) + return -EINVAL; + + *addr_cells = of_n_addr_cells(root); + *size_cells = of_n_size_cells(root); + + of_node_put(root); + + return 0; +} + +static int do_get_kexec_buffer(const void *prop, int len, unsigned long *addr, + size_t *size) +{ + int ret, addr_cells, size_cells; + + ret = get_addr_size_cells(&addr_cells, &size_cells); + if (ret) + return ret; + + if (len < 4 * (addr_cells + size_cells)) + return -ENOENT; + + *addr = of_read_number(prop, addr_cells); + *size = of_read_number(prop + 4 * addr_cells, size_cells); + + return 0; +} + +/** + * ima_get_kexec_buffer - get IMA buffer from the previous kernel + * @addr: On successful return, set to point to the buffer contents. + * @size: On successful return, set to the buffer size. + * + * Return: 0 on success, negative errno on error. + */ +int ima_get_kexec_buffer(void **addr, size_t *size) +{ + int ret, len; + unsigned long tmp_addr; + size_t tmp_size; + const void *prop; + + prop = of_get_property(of_chosen, "linux,ima-kexec-buffer", &len); + if (!prop) + return -ENOENT; + + ret = do_get_kexec_buffer(prop, len, &tmp_addr, &tmp_size); + if (ret) + return ret; + + *addr = __va(tmp_addr); + *size = tmp_size; + + return 0; +} + +/** + * ima_free_kexec_buffer - free memory used by the IMA buffer + */ +int ima_free_kexec_buffer(void) +{ + int ret; + unsigned long addr; + size_t size; + struct property *prop; + + prop = of_find_property(of_chosen, "linux,ima-kexec-buffer", NULL); + if (!prop) + return -ENOENT; + + ret = do_get_kexec_buffer(prop->value, prop->length, &addr, &size); + if (ret) + return ret; + + ret = of_remove_property(of_chosen, prop); + if (ret) + return ret; + + return memblock_free(addr, size); + +} + +/** + * remove_ima_buffer - remove the IMA buffer property and reservation from @fdt + * + * The IMA measurement buffer is of no use to a subsequent kernel, so we always + * remove it from the device tree. + */ +void remove_ima_buffer(void *fdt, int chosen_node) +{ + int ret, len; + unsigned long addr; + size_t size; + const void *prop; + + prop = fdt_getprop(fdt, chosen_node, "linux,ima-kexec-buffer", &len); + if (!prop) + return; + + ret = do_get_kexec_buffer(prop, len, &addr, &size); + fdt_delprop(fdt, chosen_node, "linux,ima-kexec-buffer"); + if (ret) + return; + + ret = delete_fdt_mem_rsv(fdt, addr, size); + if (!ret) + pr_debug("Removed old IMA buffer reservation.\n"); +} + +#ifdef CONFIG_IMA_KEXEC +/** + * arch_ima_add_kexec_buffer - do arch-specific steps to add the IMA buffer + * + * Architectures should use this function to pass on the IMA buffer + * information to the next kernel. + * + * Return: 0 on success, negative errno on error. + */ +int arch_ima_add_kexec_buffer(struct kimage *image, unsigned long load_addr, + size_t size) +{ + image->arch.ima_buffer_addr = load_addr; + image->arch.ima_buffer_size = size; + + return 0; +} + +static int write_number(void *p, u64 value, int cells) +{ + if (cells == 1) { + u32 tmp; + + if (value > U32_MAX) + return -EINVAL; + + tmp = cpu_to_be32(value); + memcpy(p, &tmp, sizeof(tmp)); + } else if (cells == 2) { + u64 tmp; + + tmp = cpu_to_be64(value); + memcpy(p, &tmp, sizeof(tmp)); + } else + return -EINVAL; + + return 0; +} + +/** + * setup_ima_buffer - add IMA buffer information to the fdt + * @image: kexec image being loaded. + * @fdt: Flattened device tree for the next kernel. + * @chosen_node: Offset to the chosen node. + * + * Return: 0 on success, or negative errno on error. + */ +int setup_ima_buffer(const struct kimage *image, void *fdt, int chosen_node) +{ + int ret, addr_cells, size_cells, entry_size; + u8 value[16]; + + remove_ima_buffer(fdt, chosen_node); + if (!image->arch.ima_buffer_size) + return 0; + + ret = get_addr_size_cells(&addr_cells, &size_cells); + if (ret) + return ret; + + entry_size = 4 * (addr_cells + size_cells); + + if (entry_size > sizeof(value)) + return -EINVAL; + + ret = write_number(value, image->arch.ima_buffer_addr, addr_cells); + if (ret) + return ret; + + ret = write_number(value + 4 * addr_cells, image->arch.ima_buffer_size, + size_cells); + if (ret) + return ret; + + ret = fdt_setprop(fdt, chosen_node, "linux,ima-kexec-buffer", value, + entry_size); + if (ret < 0) + return -EINVAL; + + ret = fdt_add_mem_rsv(fdt, image->arch.ima_buffer_addr, + image->arch.ima_buffer_size); + if (ret) + return -EINVAL; + + pr_debug("IMA buffer at 0x%llx, size = 0x%zx\n", + image->arch.ima_buffer_addr, image->arch.ima_buffer_size); + + return 0; +} +#endif /* CONFIG_IMA_KEXEC */ diff --git a/arch/powerpc/kernel/kexec_elf_64.c b/arch/powerpc/kernel/kexec_elf_64.c index 6acffd34a70f..9a42309b091a 100644 --- a/arch/powerpc/kernel/kexec_elf_64.c +++ b/arch/powerpc/kernel/kexec_elf_64.c @@ -627,7 +627,7 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, goto out; } - ret = setup_new_fdt(fdt, initrd_load_addr, initrd_len, cmdline); + ret = setup_new_fdt(image, fdt, initrd_load_addr, initrd_len, cmdline); if (ret) goto out; diff --git a/arch/powerpc/kernel/machine_kexec_file_64.c b/arch/powerpc/kernel/machine_kexec_file_64.c index 7abc8a75ee48..992c0d258e5d 100644 --- a/arch/powerpc/kernel/machine_kexec_file_64.c +++ b/arch/powerpc/kernel/machine_kexec_file_64.c @@ -27,6 +27,7 @@ #include <linux/memblock.h> #include <linux/of_fdt.h> #include <linux/libfdt.h> +#include <asm/ima.h> #define SLAVE_CODE_SIZE 256 @@ -180,7 +181,7 @@ int setup_purgatory(struct kimage *image, const void *slave_code, * * Return: 0 on success, or negative errno on error. */ -static int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size) +int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size) { int i, ret, num_rsvs = fdt_num_mem_rsv(fdt); @@ -209,6 +210,7 @@ static int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size /* * setup_new_fdt - modify /chosen and memory reservation for the next kernel + * @image: kexec image being loaded. * @fdt: Flattened device tree for the next kernel. * @initrd_load_addr: Address where the next initrd will be loaded. * @initrd_len: Size of the next initrd, or 0 if there will be none. @@ -217,8 +219,9 @@ static int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size * * Return: 0 on success, or negative errno on error. */ -int setup_new_fdt(void *fdt, unsigned long initrd_load_addr, - unsigned long initrd_len, const char *cmdline) +int setup_new_fdt(const struct kimage *image, void *fdt, + unsigned long initrd_load_addr, unsigned long initrd_len, + const char *cmdline) { int ret, chosen_node; const void *prop; @@ -328,6 +331,12 @@ int setup_new_fdt(void *fdt, unsigned long initrd_load_addr, } } + ret = setup_ima_buffer(image, fdt, chosen_node); + if (ret) { + pr_err("Error setting up the new device tree.\n"); + return ret; + } + ret = fdt_setprop(fdt, chosen_node, "linux,booted-from-kexec", NULL, 0); if (ret) { pr_err("Error setting up the new device tree.\n"); diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 307b1f4543de..2e3c34b1df37 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -338,6 +338,7 @@ void arch_crash_save_vmcoreinfo(void) vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE); + VMCOREINFO_PHYS_BASE(phys_base); } /* arch-dependent functionality related to kexec file-based syscall */ diff --git a/block/genhd.c b/block/genhd.c index fcd6d4fae657..a178c8e59492 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -878,7 +878,7 @@ static int show_partition(struct seq_file *seqf, void *v) char buf[BDEVNAME_SIZE]; /* Don't show non-partitionable removeable devices or empty devices */ - if (!get_capacity(sgp) || (!disk_max_parts(sgp) && + if (!get_capacity(sgp) || (!(disk_max_parts(sgp) > 1) && (sgp->flags & GENHD_FL_REMOVABLE))) return 0; if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO) diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index a464c8088170..ae05b93fa0e1 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -2612,20 +2612,48 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, spin_lock(&dlm->master_lock); ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name, namelen, target, dlm->node_num); + if (ret == -EEXIST) { + if (oldmle) + __dlm_put_mle(oldmle); + + spin_unlock(&dlm->master_lock); + spin_unlock(&dlm->spinlock); + mlog(0, "another process is already migrating it\n"); + goto fail; + } + + /* + * If an old mle is found, it should be put. If its type is BLOCK, + * it should be put again. Because it has been unhasded from the map + * in the function dlm_add_migration_mle. + * Otherwise the memory will be leaked. It will not be found again from + * the hash map. + */ + if (oldmle) { + /* master is known, detach if not already detached */ + __dlm_mle_detach_hb_events(dlm, oldmle); + __dlm_put_mle(oldmle); + + /* + * If the type of the mle is BLOCK, it should be put once for + * release. Otherwise a memory leak may be caused because + * oldmle has been unhashed from the hash map and it will not + * be found any more. + */ + if (oldmle->type == DLM_MLE_BLOCK) + __dlm_put_mle(oldmle); + } + /* get an extra reference on the mle. * otherwise the assert_master from the new * master will destroy this. */ dlm_get_mle_inuse(mle); + mle_added = 1; + spin_unlock(&dlm->master_lock); spin_unlock(&dlm->spinlock); - if (ret == -EEXIST) { - mlog(0, "another process is already migrating it\n"); - goto fail; - } - mle_added = 1; - /* * set the MIGRATING flag and flush asts * if we fail after this we need to re-dirty the lockres @@ -2642,12 +2670,6 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, } fail: - if (ret != -EEXIST && oldmle) { - /* master is known, detach if not already detached */ - dlm_mle_detach_hb_events(dlm, oldmle); - dlm_put_mle(oldmle); - } - if (ret < 0) { if (mle_added) { dlm_mle_detach_hb_events(dlm, mle); @@ -3182,16 +3204,24 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data, if (ret < 0) kmem_cache_free(dlm_mle_cache, mle); + /* + * If an old mle is found, it should be put. If its type is BLOCK, + * it should be put again because it has been unhashed from the map + * in the dlm_add_migration_mle(). + * Otherwise the memory will be leaked. It will not be found again from + * the hash map. + */ + if (oldmle) { + __dlm_mle_detach_hb_events(dlm, oldmle); + __dlm_put_mle(oldmle); + if (ret >= 0 && oldmle->type == DLM_MLE_BLOCK) + __dlm_put_mle(oldmle); + } + spin_unlock(&dlm->master_lock); unlock: spin_unlock(&dlm->spinlock); - if (oldmle) { - /* master is known, detach if not already detached */ - dlm_mle_detach_hb_events(dlm, oldmle); - dlm_put_mle(oldmle); - } - if (res) dlm_lockres_put(res); leave: diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 83d576f6a287..77d1632e905d 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -3303,6 +3303,16 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb, mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name, lockres->l_level, new_level); + /* + * On DLM_LKF_VALBLK, fsdlm behaves differently with o2cb. It always + * expects DLM_LKF_VALBLK being set if the LKB has LVB, so that + * we can recover correctly from node failure. Otherwise, we may get + * invalid LVB in LKB, but without DLM_SBF_VALNOTVALIDÂ being set. + */ + if (!ocfs2_is_o2cb_active() && + lockres->l_ops->flags & LOCK_TYPE_USES_LVB) + lvb = 1; + if (lvb) dlm_flags |= DLM_LKF_VALBLK; diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 52c07346bea3..820359096c7a 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -48,6 +48,12 @@ static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl"; */ static struct ocfs2_stack_plugin *active_stack; +inline int ocfs2_is_o2cb_active(void) +{ + return !strcmp(active_stack->sp_name, OCFS2_STACK_PLUGIN_O2CB); +} +EXPORT_SYMBOL_GPL(ocfs2_is_o2cb_active); + static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name) { struct ocfs2_stack_plugin *p; diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h index f2dce10fae54..e3036e1790e8 100644 --- a/fs/ocfs2/stackglue.h +++ b/fs/ocfs2/stackglue.h @@ -298,6 +298,9 @@ void ocfs2_stack_glue_set_max_proto_version(struct ocfs2_protocol_version *max_p int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin); void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin); +/* In ocfs2_downconvert_lock(), we need to know which stack we are using */ +int ocfs2_is_o2cb_active(void); + extern struct kset *ocfs2_kset; #endif /* STACKGLUE_H */ diff --git a/include/linux/crc64_ecma.h b/include/linux/crc64_ecma.h new file mode 100644 index 000000000000..bba7a4d692b3 --- /dev/null +++ b/include/linux/crc64_ecma.h @@ -0,0 +1,56 @@ +/* + * Copyright 2013 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CRC64_ECMA_H_ +#define __CRC64_ECMA_H_ + +#include <linux/types.h> + + +#define CRC64_DEFAULT_INITVAL 0xFFFFFFFFFFFFFFFFULL + + +/* + * crc64_ecma_seed - Initializes the CRC64 ECMA seed. + */ +u64 crc64_ecma_seed(void); + +/* + * crc64_ecma - Computes the 64 bit ECMA CRC. + * + * @pdata: pointer to the data to compute checksum for. + * @nbytes: number of bytes in data buffer. + * @seed: CRC seed. + */ +u64 crc64_ecma(u8 const *pdata, u32 nbytes, u64 seed); + +#endif /* __CRC64_ECMA_H_ */ diff --git a/include/linux/ima.h b/include/linux/ima.h index 0eb7c2e7f0d6..7f6952f8d6aa 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -11,6 +11,7 @@ #define _LINUX_IMA_H #include <linux/fs.h> +#include <linux/kexec.h> struct linux_binprm; #ifdef CONFIG_IMA @@ -23,6 +24,10 @@ extern int ima_post_read_file(struct file *file, void *buf, loff_t size, enum kernel_read_file_id id); extern void ima_post_path_mknod(struct dentry *dentry); +#ifdef CONFIG_IMA_KEXEC +extern void ima_add_kexec_buffer(struct kimage *image); +#endif + #else static inline int ima_bprm_check(struct linux_binprm *bprm) { @@ -62,6 +67,13 @@ static inline void ima_post_path_mknod(struct dentry *dentry) #endif /* CONFIG_IMA */ +#ifndef CONFIG_IMA_KEXEC +struct kimage; + +static inline void ima_add_kexec_buffer(struct kimage *image) +{} +#endif + #ifdef CONFIG_IMA_APPRAISE extern void ima_inode_post_setattr(struct dentry *dentry); extern int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name, diff --git a/include/linux/kexec.h b/include/linux/kexec.h index d419d0e51fe5..e98e546b543c 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -283,6 +283,8 @@ phys_addr_t paddr_vmcoreinfo_note(void); vmcoreinfo_append_str("NUMBER(%s)=%ld\n", #name, (long)name) #define VMCOREINFO_CONFIG(name) \ vmcoreinfo_append_str("CONFIG_%s=y\n", #name) +#define VMCOREINFO_PHYS_BASE(value) \ + vmcoreinfo_append_str("PHYS_BASE=%lx\n", (unsigned long)value) extern struct kimage *kexec_image; extern struct kimage *kexec_crash_image; diff --git a/include/linux/nmi.h b/include/linux/nmi.h index aacca824a6ae..0a3fadc32693 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -110,6 +110,7 @@ extern int watchdog_user_enabled; extern int watchdog_thresh; extern unsigned long watchdog_enabled; extern unsigned long *watchdog_cpumask_bits; +extern atomic_t watchdog_park_in_progress; #ifdef CONFIG_SMP extern int sysctl_softlockup_all_cpu_backtrace; extern int sysctl_hardlockup_all_cpu_backtrace; diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h index 57c9e0622a38..56375edf2ed2 100644 --- a/include/linux/ratelimit.h +++ b/include/linux/ratelimit.h @@ -77,8 +77,11 @@ extern int ___ratelimit(struct ratelimit_state *rs, const char *func); #ifdef CONFIG_PRINTK -#define WARN_ON_RATELIMIT(condition, state) \ - WARN_ON((condition) && __ratelimit(state)) +#define WARN_ON_RATELIMIT(condition, state) ({ \ + bool __rtn_cond = !!(condition); \ + WARN_ON(__rtn_cond && __ratelimit(state)); \ + __rtn_cond; \ +}) #define WARN_RATELIMIT(condition, format, ...) \ ({ \ diff --git a/include/linux/sem.h b/include/linux/sem.h index d0efd6e6c20a..4fc222f8755d 100644 --- a/include/linux/sem.h +++ b/include/linux/sem.h @@ -21,7 +21,7 @@ struct sem_array { struct list_head list_id; /* undo requests on this array */ int sem_nsems; /* no. of semaphores in array */ int complex_count; /* pending complex operations */ - bool complex_mode; /* no parallel simple ops */ + unsigned int use_global_lock;/* >0: global lock required */ }; #ifdef CONFIG_SYSVIPC diff --git a/ipc/sem.c b/ipc/sem.c index e08b94851922..31eaa87ecba4 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -159,22 +159,42 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it); #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */ /* + * Switching from the mode suitable for simple ops + * to the mode for complex ops is costly. Therefore: + * use some hysteresis + */ +#define USE_GLOBAL_LOCK_HYSTERESIS 10 + +/* * Locking: * a) global sem_lock() for read/write * sem_undo.id_next, * sem_array.complex_count, - * sem_array.complex_mode * sem_array.pending{_alter,_const}, * sem_array.sem_undo * * b) global or semaphore sem_lock() for read/write: * sem_array.sem_base[i].pending_{const,alter}: - * sem_array.complex_mode (for read) * * c) special: * sem_undo_list.list_proc: * * undo_list->lock for write * * rcu for read + * use_global_lock: + * * global sem_lock() for write + * * either local or global sem_lock() for read. + * + * Memory ordering: + * Most ordering is enforced by using spin_lock() and spin_unlock(). + * The special case is use_global_lock: + * Setting it from non-zero to 0 is a RELEASE, this is ensured by + * using smp_store_release(). + * Testing if it is non-zero is an ACQUIRE, this is ensured by using + * smp_load_acquire(). + * Setting it from 0 to non-zero must be ordered with regards to + * this smp_load_acquire(), this is guaranteed because the smp_load_acquire() + * is inside a spin_lock() and after a write from 0 to non-zero a + * spin_lock()+spin_unlock() is done. */ #define sc_semmsl sem_ctls[0] @@ -273,29 +293,22 @@ static void complexmode_enter(struct sem_array *sma) int i; struct sem *sem; - if (sma->complex_mode) { - /* We are already in complex_mode. Nothing to do */ + if (sma->use_global_lock > 0) { + /* + * We are already in global lock mode. + * Nothing to do, just reset the + * counter until we return to simple mode. + */ + sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; return; } - - /* We need a full barrier after seting complex_mode: - * The write to complex_mode must be visible - * before we read the first sem->lock spinlock state. - */ - smp_store_mb(sma->complex_mode, true); + sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; for (i = 0; i < sma->sem_nsems; i++) { sem = sma->sem_base + i; - spin_unlock_wait(&sem->lock); + spin_lock(&sem->lock); + spin_unlock(&sem->lock); } - /* - * spin_unlock_wait() is not a memory barriers, it is only a - * control barrier. The code must pair with spin_unlock(&sem->lock), - * thus just the control barrier is insufficient. - * - * smp_rmb() is sufficient, as writes cannot pass the control barrier. - */ - smp_rmb(); } /* @@ -310,13 +323,17 @@ static void complexmode_tryleave(struct sem_array *sma) */ return; } - /* - * Immediately after setting complex_mode to false, - * a simple op can start. Thus: all memory writes - * performed by the current operation must be visible - * before we set complex_mode to false. - */ - smp_store_release(&sma->complex_mode, false); + if (sma->use_global_lock == 1) { + /* + * Immediately after setting use_global_lock to 0, + * a simple op can start. Thus: all memory writes + * performed by the current operation must be visible + * before we set use_global_lock to 0. + */ + smp_store_release(&sma->use_global_lock, 0); + } else { + sma->use_global_lock--; + } } #define SEM_GLOBAL_LOCK (-1) @@ -346,30 +363,23 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, * Optimized locking is possible if no complex operation * is either enqueued or processed right now. * - * Both facts are tracked by complex_mode. + * Both facts are tracked by use_global_mode. */ sem = sma->sem_base + sops->sem_num; /* - * Initial check for complex_mode. Just an optimization, + * Initial check for use_global_lock. Just an optimization, * no locking, no memory barrier. */ - if (!sma->complex_mode) { + if (!sma->use_global_lock) { /* * It appears that no complex operation is around. * Acquire the per-semaphore lock. */ spin_lock(&sem->lock); - /* - * See 51d7d5205d33 - * ("powerpc: Add smp_mb() to arch_spin_is_locked()"): - * A full barrier is required: the write of sem->lock - * must be visible before the read is executed - */ - smp_mb(); - - if (!smp_load_acquire(&sma->complex_mode)) { + /* pairs with smp_store_release() */ + if (!smp_load_acquire(&sma->use_global_lock)) { /* fast path successful! */ return sops->sem_num; } @@ -379,19 +389,26 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, /* slow path: acquire the full lock */ ipc_lock_object(&sma->sem_perm); - if (sma->complex_count == 0) { - /* False alarm: - * There is no complex operation, thus we can switch - * back to the fast path. + if (sma->use_global_lock == 0) { + /* + * The use_global_lock mode ended while we waited for + * sma->sem_perm.lock. Thus we must switch to locking + * with sem->lock. + * Unlike in the fast path, there is no need to recheck + * sma->use_global_lock after we have acquired sem->lock: + * We own sma->sem_perm.lock, thus use_global_lock cannot + * change. */ spin_lock(&sem->lock); + ipc_unlock_object(&sma->sem_perm); return sops->sem_num; } else { - /* Not a false alarm, thus complete the sequence for a - * full lock. + /* + * Not a false alarm, thus continue to use the global lock + * mode. No need for complexmode_enter(), this was done by + * the caller that has set use_global_mode to non-zero. */ - complexmode_enter(sma); return SEM_GLOBAL_LOCK; } } @@ -495,7 +512,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) } sma->complex_count = 0; - sma->complex_mode = true; /* dropped by sem_unlock below */ + sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; INIT_LIST_HEAD(&sma->pending_alter); INIT_LIST_HEAD(&sma->pending_const); INIT_LIST_HEAD(&sma->list_id); diff --git a/kernel/kcov.c b/kernel/kcov.c index cc2fa35ca480..85e5546cd791 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -19,6 +19,7 @@ #include <linux/debugfs.h> #include <linux/uaccess.h> #include <linux/kcov.h> +#include <asm/setup.h> /* * kcov descriptor (one per opened debugfs file). @@ -73,6 +74,11 @@ void notrace __sanitizer_cov_trace_pc(void) if (mode == KCOV_MODE_TRACE) { unsigned long *area; unsigned long pos; + unsigned long ip = _RET_IP_; + +#ifdef CONFIG_RANDOMIZE_BASE + ip -= kaslr_offset(); +#endif /* * There is some code that runs in interrupts but for which @@ -86,7 +92,7 @@ void notrace __sanitizer_cov_trace_pc(void) /* The first word is number of subsequent PCs. */ pos = READ_ONCE(area[0]) + 1; if (likely(pos < t->kcov_size)) { - area[pos] = _RET_IP_; + area[pos] = ip; WRITE_ONCE(area[0], pos); } } diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 0c2df7f73792..b56a558e406d 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -19,6 +19,7 @@ #include <linux/mutex.h> #include <linux/list.h> #include <linux/fs.h> +#include <linux/ima.h> #include <crypto/hash.h> #include <crypto/sha.h> #include <linux/syscalls.h> @@ -132,6 +133,9 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd, return ret; image->kernel_buf_len = size; + /* IMA needs to pass the measurement list to the next kernel. */ + ima_add_kexec_buffer(image); + /* Call arch image probe handlers */ ret = arch_kexec_kernel_image_probe(image, image->kernel_buf, image->kernel_buf_len); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index d4b0fa01cae3..63177be0159e 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -49,6 +49,8 @@ unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); #define for_each_watchdog_cpu(cpu) \ for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask) +atomic_t watchdog_park_in_progress = ATOMIC_INIT(0); + /* * The 'watchdog_running' variable is set to 1 when the watchdog threads * are registered/started and is set to 0 when the watchdog threads are @@ -260,6 +262,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) int duration; int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace; + if (atomic_read(&watchdog_park_in_progress) != 0) + return HRTIMER_NORESTART; + /* kick the hardlockup detector */ watchdog_interrupt_count(); @@ -467,12 +472,16 @@ static int watchdog_park_threads(void) { int cpu, ret = 0; + atomic_set(&watchdog_park_in_progress, 1); + for_each_watchdog_cpu(cpu) { ret = kthread_park(per_cpu(softlockup_watchdog, cpu)); if (ret) break; } + atomic_set(&watchdog_park_in_progress, 0); + return ret; } diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 84016c8aee6b..12b8dd640786 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -84,6 +84,9 @@ static void watchdog_overflow_callback(struct perf_event *event, /* Ensure the watchdog never gets throttled */ event->hw.interrupts = 0; + if (atomic_read(&watchdog_park_in_progress) != 0) + return; + if (__this_cpu_read(watchdog_nmi_touch) == true) { __this_cpu_write(watchdog_nmi_touch, false); return; diff --git a/lib/Kconfig b/lib/Kconfig index 260a80e313b9..924697dbd367 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -185,6 +185,13 @@ config CRC8 when they need to do cyclic redundancy check according CRC8 algorithm. Module will be called crc8. +config CRC64_ECMA + tristate "CRC64 ECMA function" + help + This option provides CRC64 ECMA function. Drivers may select this + when they need to do cyclic redundancy check according to the CRC64 + ECMA algorithm. + config AUDIT_GENERIC bool depends on AUDIT && !AUDIT_ARCH diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 7446097f72bd..cb66a4648840 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -26,7 +26,7 @@ config CONSOLE_LOGLEVEL_DEFAULT the kernel bootargs. loglevel=<x> continues to override whatever value is specified here as well. - Note: This does not affect the log level of un-prefixed prink() + Note: This does not affect the log level of un-prefixed printk() usage in the kernel. That is controlled by the MESSAGE_LOGLEVEL_DEFAULT option. diff --git a/lib/Makefile b/lib/Makefile index 50144a3aeebd..d15e235f72ea 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -93,6 +93,7 @@ obj-$(CONFIG_CRC32) += crc32.o obj-$(CONFIG_CRC7) += crc7.o obj-$(CONFIG_LIBCRC32C) += libcrc32c.o obj-$(CONFIG_CRC8) += crc8.o +obj-$(CONFIG_CRC64_ECMA) += crc64_ecma.o obj-$(CONFIG_GENERIC_ALLOCATOR) += genalloc.o obj-$(CONFIG_842_COMPRESS) += 842/ diff --git a/lib/crc64_ecma.c b/lib/crc64_ecma.c new file mode 100644 index 000000000000..41629ea5a60c --- /dev/null +++ b/lib/crc64_ecma.c @@ -0,0 +1,341 @@ +/* + * Copyright 2013 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/module.h> +#include <linux/crc64_ecma.h> + + +#define CRC64_BYTE_MASK 0xFF +#define CRC64_TABLE_SIZE 256 + + +struct crc64_table { + u64 seed; + u64 table[CRC64_TABLE_SIZE]; +}; + + +static struct crc64_table CRC64_ECMA_182 = { + CRC64_DEFAULT_INITVAL, + { + 0x0000000000000000ULL, + 0xb32e4cbe03a75f6fULL, + 0xf4843657a840a05bULL, + 0x47aa7ae9abe7ff34ULL, + 0x7bd0c384ff8f5e33ULL, + 0xc8fe8f3afc28015cULL, + 0x8f54f5d357cffe68ULL, + 0x3c7ab96d5468a107ULL, + 0xf7a18709ff1ebc66ULL, + 0x448fcbb7fcb9e309ULL, + 0x0325b15e575e1c3dULL, + 0xb00bfde054f94352ULL, + 0x8c71448d0091e255ULL, + 0x3f5f08330336bd3aULL, + 0x78f572daa8d1420eULL, + 0xcbdb3e64ab761d61ULL, + 0x7d9ba13851336649ULL, + 0xceb5ed8652943926ULL, + 0x891f976ff973c612ULL, + 0x3a31dbd1fad4997dULL, + 0x064b62bcaebc387aULL, + 0xb5652e02ad1b6715ULL, + 0xf2cf54eb06fc9821ULL, + 0x41e11855055bc74eULL, + 0x8a3a2631ae2dda2fULL, + 0x39146a8fad8a8540ULL, + 0x7ebe1066066d7a74ULL, + 0xcd905cd805ca251bULL, + 0xf1eae5b551a2841cULL, + 0x42c4a90b5205db73ULL, + 0x056ed3e2f9e22447ULL, + 0xb6409f5cfa457b28ULL, + 0xfb374270a266cc92ULL, + 0x48190ecea1c193fdULL, + 0x0fb374270a266cc9ULL, + 0xbc9d3899098133a6ULL, + 0x80e781f45de992a1ULL, + 0x33c9cd4a5e4ecdceULL, + 0x7463b7a3f5a932faULL, + 0xc74dfb1df60e6d95ULL, + 0x0c96c5795d7870f4ULL, + 0xbfb889c75edf2f9bULL, + 0xf812f32ef538d0afULL, + 0x4b3cbf90f69f8fc0ULL, + 0x774606fda2f72ec7ULL, + 0xc4684a43a15071a8ULL, + 0x83c230aa0ab78e9cULL, + 0x30ec7c140910d1f3ULL, + 0x86ace348f355aadbULL, + 0x3582aff6f0f2f5b4ULL, + 0x7228d51f5b150a80ULL, + 0xc10699a158b255efULL, + 0xfd7c20cc0cdaf4e8ULL, + 0x4e526c720f7dab87ULL, + 0x09f8169ba49a54b3ULL, + 0xbad65a25a73d0bdcULL, + 0x710d64410c4b16bdULL, + 0xc22328ff0fec49d2ULL, + 0x85895216a40bb6e6ULL, + 0x36a71ea8a7ace989ULL, + 0x0adda7c5f3c4488eULL, + 0xb9f3eb7bf06317e1ULL, + 0xfe5991925b84e8d5ULL, + 0x4d77dd2c5823b7baULL, + 0x64b62bcaebc387a1ULL, + 0xd7986774e864d8ceULL, + 0x90321d9d438327faULL, + 0x231c512340247895ULL, + 0x1f66e84e144cd992ULL, + 0xac48a4f017eb86fdULL, + 0xebe2de19bc0c79c9ULL, + 0x58cc92a7bfab26a6ULL, + 0x9317acc314dd3bc7ULL, + 0x2039e07d177a64a8ULL, + 0x67939a94bc9d9b9cULL, + 0xd4bdd62abf3ac4f3ULL, + 0xe8c76f47eb5265f4ULL, + 0x5be923f9e8f53a9bULL, + 0x1c4359104312c5afULL, + 0xaf6d15ae40b59ac0ULL, + 0x192d8af2baf0e1e8ULL, + 0xaa03c64cb957be87ULL, + 0xeda9bca512b041b3ULL, + 0x5e87f01b11171edcULL, + 0x62fd4976457fbfdbULL, + 0xd1d305c846d8e0b4ULL, + 0x96797f21ed3f1f80ULL, + 0x2557339fee9840efULL, + 0xee8c0dfb45ee5d8eULL, + 0x5da24145464902e1ULL, + 0x1a083bacedaefdd5ULL, + 0xa9267712ee09a2baULL, + 0x955cce7fba6103bdULL, + 0x267282c1b9c65cd2ULL, + 0x61d8f8281221a3e6ULL, + 0xd2f6b4961186fc89ULL, + 0x9f8169ba49a54b33ULL, + 0x2caf25044a02145cULL, + 0x6b055fede1e5eb68ULL, + 0xd82b1353e242b407ULL, + 0xe451aa3eb62a1500ULL, + 0x577fe680b58d4a6fULL, + 0x10d59c691e6ab55bULL, + 0xa3fbd0d71dcdea34ULL, + 0x6820eeb3b6bbf755ULL, + 0xdb0ea20db51ca83aULL, + 0x9ca4d8e41efb570eULL, + 0x2f8a945a1d5c0861ULL, + 0x13f02d374934a966ULL, + 0xa0de61894a93f609ULL, + 0xe7741b60e174093dULL, + 0x545a57dee2d35652ULL, + 0xe21ac88218962d7aULL, + 0x5134843c1b317215ULL, + 0x169efed5b0d68d21ULL, + 0xa5b0b26bb371d24eULL, + 0x99ca0b06e7197349ULL, + 0x2ae447b8e4be2c26ULL, + 0x6d4e3d514f59d312ULL, + 0xde6071ef4cfe8c7dULL, + 0x15bb4f8be788911cULL, + 0xa6950335e42fce73ULL, + 0xe13f79dc4fc83147ULL, + 0x521135624c6f6e28ULL, + 0x6e6b8c0f1807cf2fULL, + 0xdd45c0b11ba09040ULL, + 0x9aefba58b0476f74ULL, + 0x29c1f6e6b3e0301bULL, + 0xc96c5795d7870f42ULL, + 0x7a421b2bd420502dULL, + 0x3de861c27fc7af19ULL, + 0x8ec62d7c7c60f076ULL, + 0xb2bc941128085171ULL, + 0x0192d8af2baf0e1eULL, + 0x4638a2468048f12aULL, + 0xf516eef883efae45ULL, + 0x3ecdd09c2899b324ULL, + 0x8de39c222b3eec4bULL, + 0xca49e6cb80d9137fULL, + 0x7967aa75837e4c10ULL, + 0x451d1318d716ed17ULL, + 0xf6335fa6d4b1b278ULL, + 0xb199254f7f564d4cULL, + 0x02b769f17cf11223ULL, + 0xb4f7f6ad86b4690bULL, + 0x07d9ba1385133664ULL, + 0x4073c0fa2ef4c950ULL, + 0xf35d8c442d53963fULL, + 0xcf273529793b3738ULL, + 0x7c0979977a9c6857ULL, + 0x3ba3037ed17b9763ULL, + 0x888d4fc0d2dcc80cULL, + 0x435671a479aad56dULL, + 0xf0783d1a7a0d8a02ULL, + 0xb7d247f3d1ea7536ULL, + 0x04fc0b4dd24d2a59ULL, + 0x3886b22086258b5eULL, + 0x8ba8fe9e8582d431ULL, + 0xcc0284772e652b05ULL, + 0x7f2cc8c92dc2746aULL, + 0x325b15e575e1c3d0ULL, + 0x8175595b76469cbfULL, + 0xc6df23b2dda1638bULL, + 0x75f16f0cde063ce4ULL, + 0x498bd6618a6e9de3ULL, + 0xfaa59adf89c9c28cULL, + 0xbd0fe036222e3db8ULL, + 0x0e21ac88218962d7ULL, + 0xc5fa92ec8aff7fb6ULL, + 0x76d4de52895820d9ULL, + 0x317ea4bb22bfdfedULL, + 0x8250e80521188082ULL, + 0xbe2a516875702185ULL, + 0x0d041dd676d77eeaULL, + 0x4aae673fdd3081deULL, + 0xf9802b81de97deb1ULL, + 0x4fc0b4dd24d2a599ULL, + 0xfceef8632775faf6ULL, + 0xbb44828a8c9205c2ULL, + 0x086ace348f355aadULL, + 0x34107759db5dfbaaULL, + 0x873e3be7d8faa4c5ULL, + 0xc094410e731d5bf1ULL, + 0x73ba0db070ba049eULL, + 0xb86133d4dbcc19ffULL, + 0x0b4f7f6ad86b4690ULL, + 0x4ce50583738cb9a4ULL, + 0xffcb493d702be6cbULL, + 0xc3b1f050244347ccULL, + 0x709fbcee27e418a3ULL, + 0x3735c6078c03e797ULL, + 0x841b8ab98fa4b8f8ULL, + 0xadda7c5f3c4488e3ULL, + 0x1ef430e13fe3d78cULL, + 0x595e4a08940428b8ULL, + 0xea7006b697a377d7ULL, + 0xd60abfdbc3cbd6d0ULL, + 0x6524f365c06c89bfULL, + 0x228e898c6b8b768bULL, + 0x91a0c532682c29e4ULL, + 0x5a7bfb56c35a3485ULL, + 0xe955b7e8c0fd6beaULL, + 0xaeffcd016b1a94deULL, + 0x1dd181bf68bdcbb1ULL, + 0x21ab38d23cd56ab6ULL, + 0x9285746c3f7235d9ULL, + 0xd52f0e859495caedULL, + 0x6601423b97329582ULL, + 0xd041dd676d77eeaaULL, + 0x636f91d96ed0b1c5ULL, + 0x24c5eb30c5374ef1ULL, + 0x97eba78ec690119eULL, + 0xab911ee392f8b099ULL, + 0x18bf525d915feff6ULL, + 0x5f1528b43ab810c2ULL, + 0xec3b640a391f4fadULL, + 0x27e05a6e926952ccULL, + 0x94ce16d091ce0da3ULL, + 0xd3646c393a29f297ULL, + 0x604a2087398eadf8ULL, + 0x5c3099ea6de60cffULL, + 0xef1ed5546e415390ULL, + 0xa8b4afbdc5a6aca4ULL, + 0x1b9ae303c601f3cbULL, + 0x56ed3e2f9e224471ULL, + 0xe5c372919d851b1eULL, + 0xa26908783662e42aULL, + 0x114744c635c5bb45ULL, + 0x2d3dfdab61ad1a42ULL, + 0x9e13b115620a452dULL, + 0xd9b9cbfcc9edba19ULL, + 0x6a978742ca4ae576ULL, + 0xa14cb926613cf817ULL, + 0x1262f598629ba778ULL, + 0x55c88f71c97c584cULL, + 0xe6e6c3cfcadb0723ULL, + 0xda9c7aa29eb3a624ULL, + 0x69b2361c9d14f94bULL, + 0x2e184cf536f3067fULL, + 0x9d36004b35545910ULL, + 0x2b769f17cf112238ULL, + 0x9858d3a9ccb67d57ULL, + 0xdff2a94067518263ULL, + 0x6cdce5fe64f6dd0cULL, + 0x50a65c93309e7c0bULL, + 0xe388102d33392364ULL, + 0xa4226ac498dedc50ULL, + 0x170c267a9b79833fULL, + 0xdcd7181e300f9e5eULL, + 0x6ff954a033a8c131ULL, + 0x28532e49984f3e05ULL, + 0x9b7d62f79be8616aULL, + 0xa707db9acf80c06dULL, + 0x14299724cc279f02ULL, + 0x5383edcd67c06036ULL, + 0xe0ada17364673f59ULL + } +}; + + +/* + * crc64_ecma_seed - Initializes the CRC64 ECMA seed. + */ +u64 crc64_ecma_seed(void) +{ + return CRC64_ECMA_182.seed; +} +EXPORT_SYMBOL(crc64_ecma_seed); + +/* + * crc64_ecma - Computes the 64 bit ECMA CRC. + * + * pdata: pointer to the data to compute checksum for. + * nbytes: number of bytes in data buffer. + * seed: CRC seed. + */ +u64 crc64_ecma(u8 const *pdata, u32 nbytes, u64 seed) +{ + unsigned int i; + u64 crc = seed; + + for (i = 0; i < nbytes; i++) + crc = CRC64_ECMA_182.table[(crc ^ pdata[i]) & CRC64_BYTE_MASK] ^ + (crc >> 8); + + return crc; +} +EXPORT_SYMBOL(crc64_ecma); + +MODULE_DESCRIPTION("CRC64 ECMA function"); +MODULE_AUTHOR("Freescale Semiconductor Inc."); +MODULE_LICENSE("GPL"); diff --git a/mm/fadvise.c b/mm/fadvise.c index 6c707bfe02fd..a43013112581 100644 --- a/mm/fadvise.c +++ b/mm/fadvise.c @@ -139,7 +139,20 @@ SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice) } if (end_index >= start_index) { - unsigned long count = invalidate_mapping_pages(mapping, + unsigned long count; + + /* + * It's common to FADV_DONTNEED right after + * the read or write that instantiates the + * pages, in which case there will be some + * sitting on the local LRU cache. Try to + * avoid the expensive remote drain and the + * second cache tree walk below by flushing + * them out right away. + */ + lru_add_drain(); + + count = invalidate_mapping_pages(mapping, start_index, end_index); /* diff --git a/mm/internal.h b/mm/internal.h index 44d68895a9b9..ebb3cbd21937 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -131,9 +131,9 @@ struct alloc_context { * Assumption: *_mem_map is contiguous at least up to MAX_ORDER */ static inline unsigned long -__find_buddy_index(unsigned long page_idx, unsigned int order) +__find_buddy_pfn(unsigned long page_pfn, unsigned int order) { - return page_idx ^ (1 << order); + return page_pfn ^ (1 << order); } extern struct page *__pageblock_pfn_to_page(unsigned long start_pfn, diff --git a/mm/khugepaged.c b/mm/khugepaged.c index e32389a97030..b0924a68cc36 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1242,7 +1242,6 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) struct vm_area_struct *vma; unsigned long addr; pmd_t *pmd, _pmd; - bool deposited = false; i_mmap_lock_write(mapping); vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { @@ -1267,26 +1266,10 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) spinlock_t *ptl = pmd_lock(vma->vm_mm, pmd); /* assume page table is clear */ _pmd = pmdp_collapse_flush(vma, addr, pmd); - /* - * now deposit the pgtable for arch that need it - * otherwise free it. - */ - if (arch_needs_pgtable_deposit()) { - /* - * The deposit should be visibile only after - * collapse is seen by others. - */ - smp_wmb(); - pgtable_trans_huge_deposit(vma->vm_mm, pmd, - pmd_pgtable(_pmd)); - deposited = true; - } spin_unlock(ptl); up_write(&vma->vm_mm->mmap_sem); - if (!deposited) { - atomic_long_dec(&vma->vm_mm->nr_ptes); - pte_free(vma->vm_mm, pmd_pgtable(_pmd)); - } + atomic_long_dec(&vma->vm_mm->nr_ptes); + pte_free(vma->vm_mm, pmd_pgtable(_pmd)); } } i_mmap_unlock_write(mapping); diff --git a/mm/memory.c b/mm/memory.c index 455c3e628d52..36c774f9259e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3008,13 +3008,6 @@ static int do_set_pmd(struct vm_fault *vmf, struct page *page) ret = 0; count_vm_event(THP_FILE_MAPPED); out: - /* - * If we are going to fallback to pte mapping, do a - * withdraw with pmd lock held. - */ - if (arch_needs_pgtable_deposit() && ret == VM_FAULT_FALLBACK) - vmf->prealloc_pte = pgtable_trans_huge_withdraw(vma->vm_mm, - vmf->pmd); spin_unlock(vmf->ptl); return ret; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2c6d5f64feca..ef731757b80f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -714,7 +714,7 @@ static inline void rmv_page_order(struct page *page) /* * This function checks whether a page is free && is the buddy * we can do coalesce a page and its buddy if - * (a) the buddy is not in a hole && + * (a) the buddy is not in a hole (check before calling!) && * (b) the buddy is in the buddy system && * (c) a page and its buddy have the same order && * (d) a page and its buddy are in the same zone. @@ -729,9 +729,6 @@ static inline void rmv_page_order(struct page *page) static inline int page_is_buddy(struct page *page, struct page *buddy, unsigned int order) { - if (!pfn_valid_within(page_to_pfn(buddy))) - return 0; - if (page_is_guard(buddy) && page_order(buddy) == order) { if (page_zone_id(page) != page_zone_id(buddy)) return 0; @@ -787,9 +784,8 @@ static inline void __free_one_page(struct page *page, struct zone *zone, unsigned int order, int migratetype) { - unsigned long page_idx; - unsigned long combined_idx; - unsigned long uninitialized_var(buddy_idx); + unsigned long combined_pfn; + unsigned long uninitialized_var(buddy_pfn); struct page *buddy; unsigned int max_order; @@ -802,15 +798,16 @@ static inline void __free_one_page(struct page *page, if (likely(!is_migrate_isolate(migratetype))) __mod_zone_freepage_state(zone, 1 << order, migratetype); - page_idx = pfn & ((1 << MAX_ORDER) - 1); - - VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page); + VM_BUG_ON_PAGE(pfn & ((1 << order) - 1), page); VM_BUG_ON_PAGE(bad_range(zone, page), page); continue_merging: while (order < max_order - 1) { - buddy_idx = __find_buddy_index(page_idx, order); - buddy = page + (buddy_idx - page_idx); + buddy_pfn = __find_buddy_pfn(pfn, order); + buddy = page + (buddy_pfn - pfn); + + if (!pfn_valid_within(buddy_pfn)) + goto done_merging; if (!page_is_buddy(page, buddy, order)) goto done_merging; /* @@ -824,9 +821,9 @@ continue_merging: zone->free_area[order].nr_free--; rmv_page_order(buddy); } - combined_idx = buddy_idx & page_idx; - page = page + (combined_idx - page_idx); - page_idx = combined_idx; + combined_pfn = buddy_pfn & pfn; + page = page + (combined_pfn - pfn); + pfn = combined_pfn; order++; } if (max_order < MAX_ORDER) { @@ -841,8 +838,8 @@ continue_merging: if (unlikely(has_isolate_pageblock(zone))) { int buddy_mt; - buddy_idx = __find_buddy_index(page_idx, order); - buddy = page + (buddy_idx - page_idx); + buddy_pfn = __find_buddy_pfn(pfn, order); + buddy = page + (buddy_pfn - pfn); buddy_mt = get_pageblock_migratetype(buddy); if (migratetype != buddy_mt @@ -865,12 +862,12 @@ done_merging: * so it's less likely to be used soon and more likely to be merged * as a higher order page */ - if ((order < MAX_ORDER-2) && pfn_valid_within(page_to_pfn(buddy))) { + if ((order < MAX_ORDER-2) && pfn_valid_within(buddy_pfn)) { struct page *higher_page, *higher_buddy; - combined_idx = buddy_idx & page_idx; - higher_page = page + (combined_idx - page_idx); - buddy_idx = __find_buddy_index(combined_idx, order + 1); - higher_buddy = higher_page + (buddy_idx - combined_idx); + combined_pfn = buddy_pfn & pfn; + higher_page = page + (combined_pfn - pfn); + buddy_pfn = __find_buddy_pfn(combined_pfn, order + 1); + higher_buddy = higher_page + (buddy_pfn - combined_pfn); if (page_is_buddy(higher_page, higher_buddy, order + 1)) { list_add_tail(&page->lru, &zone->free_area[order].free_list[migratetype]); @@ -2603,6 +2600,9 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z, if (z->node == local_nid) { __inc_zone_state(z, NUMA_HIT); __inc_zone_state(z, local_stat); + } else if (z->node == preferred_zone->node) { + __inc_zone_state(z, NUMA_HIT); + __inc_zone_state(z, NUMA_OTHER); } else { __inc_zone_state(z, NUMA_MISS); __inc_zone_state(preferred_zone, NUMA_FOREIGN); @@ -3015,18 +3015,12 @@ static inline bool should_suppress_show_mem(void) return ret; } -static DEFINE_RATELIMIT_STATE(nopage_rs, - DEFAULT_RATELIMIT_INTERVAL, - DEFAULT_RATELIMIT_BURST); - -void warn_alloc(gfp_t gfp_mask, const char *fmt, ...) +static void warn_alloc_show_mem(gfp_t gfp_mask) { unsigned int filter = SHOW_MEM_FILTER_NODES; - struct va_format vaf; - va_list args; + static DEFINE_RATELIMIT_STATE(show_mem_rs, HZ, 1); - if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs) || - debug_guardpage_minorder() > 0) + if (should_suppress_show_mem() || !__ratelimit(&show_mem_rs)) return; /* @@ -3041,6 +3035,20 @@ void warn_alloc(gfp_t gfp_mask, const char *fmt, ...) if (in_interrupt() || !(gfp_mask & __GFP_DIRECT_RECLAIM)) filter &= ~SHOW_MEM_FILTER_NODES; + show_mem(filter); +} + +void warn_alloc(gfp_t gfp_mask, const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + static DEFINE_RATELIMIT_STATE(nopage_rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + + if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs) || + debug_guardpage_minorder() > 0) + return; + pr_warn("%s: ", current->comm); va_start(args, fmt); @@ -3052,8 +3060,7 @@ void warn_alloc(gfp_t gfp_mask, const char *fmt, ...) pr_cont(", mode:%#x(%pGg)\n", gfp_mask, &gfp_mask); dump_stack(); - if (!should_suppress_show_mem()) - show_mem(filter); + warn_alloc_show_mem(gfp_mask); } static inline struct page * diff --git a/mm/page_isolation.c b/mm/page_isolation.c index a5594bfcc5ed..f4e17a57926a 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -83,7 +83,7 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype) unsigned long flags, nr_pages; bool isolated_page = false; unsigned int order; - unsigned long page_idx, buddy_idx; + unsigned long pfn, buddy_pfn; struct page *buddy; zone = page_zone(page); @@ -102,11 +102,11 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype) if (PageBuddy(page)) { order = page_order(page); if (order >= pageblock_order) { - page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1); - buddy_idx = __find_buddy_index(page_idx, order); - buddy = page + (buddy_idx - page_idx); + pfn = page_to_pfn(page); + buddy_pfn = __find_buddy_pfn(pfn, order); + buddy = page + (buddy_pfn - pfn); - if (pfn_valid_within(page_to_pfn(buddy)) && + if (pfn_valid_within(buddy_pfn) && !is_migrate_isolate_page(buddy)) { __isolate_free_page(page, order); isolated_page = true; diff --git a/mm/page_owner.c b/mm/page_owner.c index 60634dc53a88..c3cee247f2e6 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -261,7 +261,7 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m, */ for (; pfn < end_pfn; ) { if (!pfn_valid(pfn)) { - pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES); + pfn = ALIGN(pfn + 1, pageblock_nr_pages); continue; } @@ -527,7 +527,7 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone) */ for (; pfn < end_pfn; ) { if (!pfn_valid(pfn)) { - pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES); + pfn = ALIGN(pfn + 1, pageblock_nr_pages); continue; } diff --git a/mm/shmem.c b/mm/shmem.c index b1b20dc63265..354c340ee505 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2168,10 +2168,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode bool shmem_mapping(struct address_space *mapping) { - if (!mapping->host) - return false; - - return mapping->host->i_sb->s_op == &shmem_ops; + return mapping->a_ops == &shmem_aops; } #ifdef CONFIG_TMPFS diff --git a/mm/z3fold.c b/mm/z3fold.c index 8f9e89ca1d31..0ef0d4039066 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -34,28 +34,59 @@ /***************** * Structures *****************/ +struct z3fold_pool; +struct z3fold_ops { + int (*evict)(struct z3fold_pool *pool, unsigned long handle); +}; + +enum buddy { + HEADLESS = 0, + FIRST, + MIDDLE, + LAST, + BUDDIES_MAX +}; + +/* + * struct z3fold_header - z3fold page metadata occupying the first chunk of each + * z3fold page, except for HEADLESS pages + * @buddy: links the z3fold page into the relevant list in the pool + * @page_lock: per-page lock + * @first_chunks: the size of the first buddy in chunks, 0 if free + * @middle_chunks: the size of the middle buddy in chunks, 0 if free + * @last_chunks: the size of the last buddy in chunks, 0 if free + * @first_num: the starting number (for the first handle) + */ +struct z3fold_header { + struct list_head buddy; + raw_spinlock_t page_lock; + unsigned short first_chunks; + unsigned short middle_chunks; + unsigned short last_chunks; + unsigned short start_middle; + unsigned short first_num:2; +}; + /* * NCHUNKS_ORDER determines the internal allocation granularity, effectively * adjusting internal fragmentation. It also determines the number of * freelists maintained in each pool. NCHUNKS_ORDER of 6 means that the - * allocation granularity will be in chunks of size PAGE_SIZE/64. As one chunk - * in allocated page is occupied by z3fold header, NCHUNKS will be calculated - * to 63 which shows the max number of free chunks in z3fold page, also there - * will be 63 freelists per pool. + * allocation granularity will be in chunks of size PAGE_SIZE/64. Some chunks + * in the beginning of an allocated page are occupied by z3fold header, so + * NCHUNKS will be calculated to 63 (or 62 in case CONFIG_DEBUG_SPINLOCK=y), + * which shows the max number of free chunks in z3fold page, also there will + * be 63, or 62, respectively, freelists per pool. */ #define NCHUNKS_ORDER 6 #define CHUNK_SHIFT (PAGE_SHIFT - NCHUNKS_ORDER) #define CHUNK_SIZE (1 << CHUNK_SHIFT) -#define ZHDR_SIZE_ALIGNED CHUNK_SIZE +#define ZHDR_SIZE_ALIGNED round_up(sizeof(struct z3fold_header), CHUNK_SIZE) +#define ZHDR_CHUNKS (ZHDR_SIZE_ALIGNED >> CHUNK_SHIFT) +#define TOTAL_CHUNKS (PAGE_SIZE >> CHUNK_SHIFT) #define NCHUNKS ((PAGE_SIZE - ZHDR_SIZE_ALIGNED) >> CHUNK_SHIFT) -#define BUDDY_MASK ((1 << NCHUNKS_ORDER) - 1) - -struct z3fold_pool; -struct z3fold_ops { - int (*evict)(struct z3fold_pool *pool, unsigned long handle); -}; +#define BUDDY_MASK (0x3) /** * struct z3fold_pool - stores metadata for each z3fold pool @@ -80,37 +111,12 @@ struct z3fold_pool { struct list_head unbuddied[NCHUNKS]; struct list_head buddied; struct list_head lru; - u64 pages_nr; + atomic64_t pages_nr; const struct z3fold_ops *ops; struct zpool *zpool; const struct zpool_ops *zpool_ops; }; -enum buddy { - HEADLESS = 0, - FIRST, - MIDDLE, - LAST, - BUDDIES_MAX -}; - -/* - * struct z3fold_header - z3fold page metadata occupying the first chunk of each - * z3fold page, except for HEADLESS pages - * @buddy: links the z3fold page into the relevant list in the pool - * @first_chunks: the size of the first buddy in chunks, 0 if free - * @middle_chunks: the size of the middle buddy in chunks, 0 if free - * @last_chunks: the size of the last buddy in chunks, 0 if free - * @first_num: the starting number (for the first handle) - */ -struct z3fold_header { - struct list_head buddy; - unsigned short first_chunks; - unsigned short middle_chunks; - unsigned short last_chunks; - unsigned short start_middle; - unsigned short first_num:NCHUNKS_ORDER; -}; /* * Internal z3fold page flags @@ -121,6 +127,7 @@ enum z3fold_page_flags { MIDDLE_CHUNK_MAPPED, }; + /***************** * Helpers *****************/ @@ -144,6 +151,7 @@ static struct z3fold_header *init_z3fold_page(struct page *page) clear_bit(PAGE_HEADLESS, &page->private); clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); + raw_spin_lock_init(&zhdr->page_lock); zhdr->first_chunks = 0; zhdr->middle_chunks = 0; zhdr->last_chunks = 0; @@ -159,6 +167,19 @@ static void free_z3fold_page(struct z3fold_header *zhdr) __free_page(virt_to_page(zhdr)); } +/* Lock a z3fold page */ +static inline void z3fold_page_lock(struct z3fold_header *zhdr) +{ + raw_spin_lock(&zhdr->page_lock); +} + +/* Unlock a z3fold page */ +static inline void z3fold_page_unlock(struct z3fold_header *zhdr) +{ + raw_spin_unlock(&zhdr->page_lock); +} + + /* * Encodes the handle of a particular buddy within a z3fold page * Pool lock should be held as this function accesses first_num @@ -179,7 +200,11 @@ static struct z3fold_header *handle_to_z3fold_header(unsigned long handle) return (struct z3fold_header *)(handle & PAGE_MASK); } -/* Returns buddy number */ +/* + * (handle & BUDDY_MASK) < zhdr->first_num is possible in encode_handle + * but that doesn't matter. because the masking will result in the + * correct buddy number. + */ static enum buddy handle_to_buddy(unsigned long handle) { struct z3fold_header *zhdr = handle_to_z3fold_header(handle); @@ -200,9 +225,10 @@ static int num_free_chunks(struct z3fold_header *zhdr) */ if (zhdr->middle_chunks != 0) { int nfree_before = zhdr->first_chunks ? - 0 : zhdr->start_middle - 1; + 0 : zhdr->start_middle - ZHDR_CHUNKS; int nfree_after = zhdr->last_chunks ? - 0 : NCHUNKS - zhdr->start_middle - zhdr->middle_chunks; + 0 : TOTAL_CHUNKS - + (zhdr->start_middle + zhdr->middle_chunks); nfree = max(nfree_before, nfree_after); } else nfree = NCHUNKS - zhdr->first_chunks - zhdr->last_chunks; @@ -234,7 +260,7 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp, INIT_LIST_HEAD(&pool->unbuddied[i]); INIT_LIST_HEAD(&pool->buddied); INIT_LIST_HEAD(&pool->lru); - pool->pages_nr = 0; + atomic64_set(&pool->pages_nr, 0); pool->ops = ops; return pool; } @@ -250,26 +276,67 @@ static void z3fold_destroy_pool(struct z3fold_pool *pool) kfree(pool); } +static inline void *mchunk_memmove(struct z3fold_header *zhdr, + unsigned short dst_chunk) +{ + void *beg = zhdr; + return memmove(beg + (dst_chunk << CHUNK_SHIFT), + beg + (zhdr->start_middle << CHUNK_SHIFT), + zhdr->middle_chunks << CHUNK_SHIFT); +} + +#define BIG_CHUNK_GAP 3 /* Has to be called with lock held */ static int z3fold_compact_page(struct z3fold_header *zhdr) { struct page *page = virt_to_page(zhdr); - void *beg = zhdr; + int ret = 0; + + if (test_bit(MIDDLE_CHUNK_MAPPED, &page->private)) + goto out; /* can't move middle chunk, it's used */ + if (zhdr->middle_chunks == 0) + goto out; /* nothing to compact */ - if (!test_bit(MIDDLE_CHUNK_MAPPED, &page->private) && - zhdr->middle_chunks != 0 && - zhdr->first_chunks == 0 && zhdr->last_chunks == 0) { - memmove(beg + ZHDR_SIZE_ALIGNED, - beg + (zhdr->start_middle << CHUNK_SHIFT), - zhdr->middle_chunks << CHUNK_SHIFT); + if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) { + /* move to the beginning */ + mchunk_memmove(zhdr, ZHDR_CHUNKS); zhdr->first_chunks = zhdr->middle_chunks; zhdr->middle_chunks = 0; zhdr->start_middle = 0; zhdr->first_num++; - return 1; + ret = 1; + goto out; } - return 0; + + /* + * moving data is expensive, so let's only do that if + * there's substantial gain (at least BIG_CHUNK_GAP chunks) + */ + if (zhdr->first_chunks != 0 && zhdr->last_chunks == 0 && + zhdr->start_middle - (zhdr->first_chunks + ZHDR_CHUNKS) >= + BIG_CHUNK_GAP) { + /* new_start: right after 1st chunk */ + unsigned short new_start = zhdr->first_chunks + ZHDR_CHUNKS; + mchunk_memmove(zhdr, new_start); + zhdr->start_middle = new_start; + ret = 1; + goto out; + } + if (zhdr->last_chunks != 0 && zhdr->first_chunks == 0 && + TOTAL_CHUNKS - + (zhdr->last_chunks + zhdr->start_middle + zhdr->middle_chunks) >= + BIG_CHUNK_GAP) { + /* new_start: right before last chunk */ + unsigned short new_start = TOTAL_CHUNKS - + (zhdr->last_chunks + zhdr->middle_chunks); + mchunk_memmove(zhdr, new_start); + zhdr->start_middle = new_start; + ret = 1; + goto out; + } +out: + return ret; } /** @@ -309,50 +376,60 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp, bud = HEADLESS; else { chunks = size_to_chunks(size); - spin_lock(&pool->lock); /* First, try to find an unbuddied z3fold page. */ zhdr = NULL; for_each_unbuddied_list(i, chunks) { - if (!list_empty(&pool->unbuddied[i])) { - zhdr = list_first_entry(&pool->unbuddied[i], + spin_lock(&pool->lock); + zhdr = list_first_entry_or_null(&pool->unbuddied[i], struct z3fold_header, buddy); - page = virt_to_page(zhdr); - if (zhdr->first_chunks == 0) { - if (zhdr->middle_chunks != 0 && - chunks >= zhdr->start_middle) - bud = LAST; - else - bud = FIRST; - } else if (zhdr->last_chunks == 0) + if (!zhdr) { + spin_unlock(&pool->lock); + continue; + } + list_del_init(&zhdr->buddy); + spin_unlock(&pool->lock); + + page = virt_to_page(zhdr); + z3fold_page_lock(zhdr); + if (zhdr->first_chunks == 0) { + if (zhdr->middle_chunks != 0 && + chunks >= zhdr->start_middle) bud = LAST; - else if (zhdr->middle_chunks == 0) - bud = MIDDLE; - else { - pr_err("No free chunks in unbuddied\n"); - WARN_ON(1); - continue; - } - list_del(&zhdr->buddy); - goto found; + else + bud = FIRST; + } else if (zhdr->last_chunks == 0) + bud = LAST; + else if (zhdr->middle_chunks == 0) + bud = MIDDLE; + else { + spin_lock(&pool->lock); + list_add(&zhdr->buddy, &pool->buddied); + spin_unlock(&pool->lock); + z3fold_page_unlock(zhdr); + pr_err("No free chunks in unbuddied\n"); + WARN_ON(1); + continue; } + goto found; } bud = FIRST; - spin_unlock(&pool->lock); } /* Couldn't find unbuddied z3fold page, create new one */ page = alloc_page(gfp); if (!page) return -ENOMEM; - spin_lock(&pool->lock); - pool->pages_nr++; + + atomic64_inc(&pool->pages_nr); zhdr = init_z3fold_page(page); if (bud == HEADLESS) { set_bit(PAGE_HEADLESS, &page->private); + spin_lock(&pool->lock); goto headless; } + z3fold_page_lock(zhdr); found: if (bud == FIRST) @@ -361,9 +438,10 @@ found: zhdr->last_chunks = chunks; else { zhdr->middle_chunks = chunks; - zhdr->start_middle = zhdr->first_chunks + 1; + zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS; } + spin_lock(&pool->lock); if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0 || zhdr->middle_chunks == 0) { /* Add to unbuddied list */ @@ -383,6 +461,8 @@ headless: *handle = encode_handle(zhdr, bud); spin_unlock(&pool->lock); + if (bud != HEADLESS) + z3fold_page_unlock(zhdr); return 0; } @@ -404,7 +484,6 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) struct page *page; enum buddy bud; - spin_lock(&pool->lock); zhdr = handle_to_z3fold_header(handle); page = virt_to_page(zhdr); @@ -412,6 +491,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) /* HEADLESS page stored */ bud = HEADLESS; } else { + z3fold_page_lock(zhdr); bud = handle_to_buddy(handle); switch (bud) { @@ -428,38 +508,67 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) default: pr_err("%s: unknown bud %d\n", __func__, bud); WARN_ON(1); - spin_unlock(&pool->lock); + z3fold_page_unlock(zhdr); return; } } if (test_bit(UNDER_RECLAIM, &page->private)) { /* z3fold page is under reclaim, reclaim will free */ - spin_unlock(&pool->lock); + if (bud != HEADLESS) + z3fold_page_unlock(zhdr); return; } + /* Remove from existing buddy list */ if (bud != HEADLESS) { - /* Remove from existing buddy list */ - list_del(&zhdr->buddy); + spin_lock(&pool->lock); + /* + * this object may have been removed from its list by + * z3fold_alloc(). In that case we just do nothing, + * z3fold_alloc() will allocate an object and add the page + * to the relevant list. + */ + if (!list_empty(&zhdr->buddy)) { + list_del(&zhdr->buddy); + } else { + spin_unlock(&pool->lock); + z3fold_page_unlock(zhdr); + return; + } + spin_unlock(&pool->lock); } if (bud == HEADLESS || (zhdr->first_chunks == 0 && zhdr->middle_chunks == 0 && zhdr->last_chunks == 0)) { /* z3fold page is empty, free */ + spin_lock(&pool->lock); list_del(&page->lru); + spin_unlock(&pool->lock); clear_bit(PAGE_HEADLESS, &page->private); + if (bud != HEADLESS) + z3fold_page_unlock(zhdr); free_z3fold_page(zhdr); - pool->pages_nr--; + atomic64_dec(&pool->pages_nr); } else { - z3fold_compact_page(zhdr); + int compacted = z3fold_compact_page(zhdr); /* Add to the unbuddied list */ + spin_lock(&pool->lock); freechunks = num_free_chunks(zhdr); - list_add(&zhdr->buddy, &pool->unbuddied[freechunks]); + /* + * If the page has been compacted, we want to use it + * in the first place. + */ + if (compacted) + list_add(&zhdr->buddy, &pool->unbuddied[freechunks]); + else + list_add_tail(&zhdr->buddy, + &pool->unbuddied[freechunks]); + spin_unlock(&pool->lock); + z3fold_page_unlock(zhdr); } - spin_unlock(&pool->lock); } /** @@ -506,12 +615,15 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) unsigned long first_handle = 0, middle_handle = 0, last_handle = 0; spin_lock(&pool->lock); - if (!pool->ops || !pool->ops->evict || list_empty(&pool->lru) || - retries == 0) { + if (!pool->ops || !pool->ops->evict || retries == 0) { spin_unlock(&pool->lock); return -EINVAL; } for (i = 0; i < retries; i++) { + if (list_empty(&pool->lru)) { + spin_unlock(&pool->lock); + return -EINVAL; + } page = list_last_entry(&pool->lru, struct page, lru); list_del(&page->lru); @@ -520,6 +632,8 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) zhdr = page_address(page); if (!test_bit(PAGE_HEADLESS, &page->private)) { list_del(&zhdr->buddy); + spin_unlock(&pool->lock); + z3fold_page_lock(zhdr); /* * We need encode the handles before unlocking, since * we can race with free that will set @@ -534,13 +648,13 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) middle_handle = encode_handle(zhdr, MIDDLE); if (zhdr->last_chunks) last_handle = encode_handle(zhdr, LAST); + z3fold_page_unlock(zhdr); } else { first_handle = encode_handle(zhdr, HEADLESS); last_handle = middle_handle = 0; + spin_unlock(&pool->lock); } - spin_unlock(&pool->lock); - /* Issue the eviction callback(s) */ if (middle_handle) { ret = pool->ops->evict(pool, middle_handle); @@ -558,7 +672,8 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) goto next; } next: - spin_lock(&pool->lock); + if (!test_bit(PAGE_HEADLESS, &page->private)) + z3fold_page_lock(zhdr); clear_bit(UNDER_RECLAIM, &page->private); if ((test_bit(PAGE_HEADLESS, &page->private) && ret == 0) || (zhdr->first_chunks == 0 && zhdr->last_chunks == 0 && @@ -567,26 +682,38 @@ next: * All buddies are now free, free the z3fold page and * return success. */ - clear_bit(PAGE_HEADLESS, &page->private); + if (!test_and_clear_bit(PAGE_HEADLESS, &page->private)) + z3fold_page_unlock(zhdr); free_z3fold_page(zhdr); - pool->pages_nr--; - spin_unlock(&pool->lock); + atomic64_dec(&pool->pages_nr); return 0; } else if (!test_bit(PAGE_HEADLESS, &page->private)) { if (zhdr->first_chunks != 0 && zhdr->last_chunks != 0 && zhdr->middle_chunks != 0) { /* Full, add to buddied list */ + spin_lock(&pool->lock); list_add(&zhdr->buddy, &pool->buddied); + spin_unlock(&pool->lock); } else { - z3fold_compact_page(zhdr); + int compacted = z3fold_compact_page(zhdr); /* add to unbuddied list */ + spin_lock(&pool->lock); freechunks = num_free_chunks(zhdr); - list_add(&zhdr->buddy, - &pool->unbuddied[freechunks]); + if (compacted) + list_add(&zhdr->buddy, + &pool->unbuddied[freechunks]); + else + list_add_tail(&zhdr->buddy, + &pool->unbuddied[freechunks]); + spin_unlock(&pool->lock); } } + if (!test_bit(PAGE_HEADLESS, &page->private)) + z3fold_page_unlock(zhdr); + + spin_lock(&pool->lock); /* add to beginning of LRU */ list_add(&page->lru, &pool->lru); } @@ -611,7 +738,6 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle) void *addr; enum buddy buddy; - spin_lock(&pool->lock); zhdr = handle_to_z3fold_header(handle); addr = zhdr; page = virt_to_page(zhdr); @@ -619,6 +745,7 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle) if (test_bit(PAGE_HEADLESS, &page->private)) goto out; + z3fold_page_lock(zhdr); buddy = handle_to_buddy(handle); switch (buddy) { case FIRST: @@ -637,8 +764,9 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle) addr = NULL; break; } + + z3fold_page_unlock(zhdr); out: - spin_unlock(&pool->lock); return addr; } @@ -653,31 +781,28 @@ static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle) struct page *page; enum buddy buddy; - spin_lock(&pool->lock); zhdr = handle_to_z3fold_header(handle); page = virt_to_page(zhdr); - if (test_bit(PAGE_HEADLESS, &page->private)) { - spin_unlock(&pool->lock); + if (test_bit(PAGE_HEADLESS, &page->private)) return; - } + z3fold_page_lock(zhdr); buddy = handle_to_buddy(handle); if (buddy == MIDDLE) clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); - spin_unlock(&pool->lock); + z3fold_page_unlock(zhdr); } /** * z3fold_get_pool_size() - gets the z3fold pool size in pages * @pool: pool whose size is being queried * - * Returns: size in pages of the given pool. The pool lock need not be - * taken to access pages_nr. + * Returns: size in pages of the given pool. */ static u64 z3fold_get_pool_size(struct z3fold_pool *pool) { - return pool->pages_nr; + return atomic64_read(&pool->pages_nr); } /***************** @@ -776,8 +901,8 @@ MODULE_ALIAS("zpool-z3fold"); static int __init init_z3fold(void) { - /* Make sure the z3fold header will fit in one chunk */ - BUILD_BUG_ON(sizeof(struct z3fold_header) > ZHDR_SIZE_ALIGNED); + /* Make sure the z3fold header is not larger than the page size */ + BUILD_BUG_ON(ZHDR_SIZE_ALIGNED > PAGE_SIZE); zpool_register_driver(&z3fold_zpool_driver); return 0; diff --git a/scripts/gdb/linux/constants.py.in b/scripts/gdb/linux/constants.py.in index 7986f4e0da12..7aad82406422 100644 --- a/scripts/gdb/linux/constants.py.in +++ b/scripts/gdb/linux/constants.py.in @@ -14,6 +14,7 @@ #include <linux/fs.h> #include <linux/mount.h> +#include <linux/of_fdt.h> /* We need to stringify expanded macros so that they can be parsed */ @@ -50,3 +51,9 @@ LX_VALUE(MNT_NOEXEC) LX_VALUE(MNT_NOATIME) LX_VALUE(MNT_NODIRATIME) LX_VALUE(MNT_RELATIME) + +/* linux/of_fdt.h> */ +LX_VALUE(OF_DT_HEADER) + +/* Kernel Configs */ +LX_CONFIG(CONFIG_OF) diff --git a/scripts/gdb/linux/proc.py b/scripts/gdb/linux/proc.py index 38b1f09d1cd9..086d27223c0c 100644 --- a/scripts/gdb/linux/proc.py +++ b/scripts/gdb/linux/proc.py @@ -16,6 +16,7 @@ from linux import constants from linux import utils from linux import tasks from linux import lists +from struct import * class LxCmdLine(gdb.Command): @@ -195,3 +196,75 @@ values of that process namespace""" info_opts(MNT_INFO, m_flags))) LxMounts() + + +class LxFdtDump(gdb.Command): + """Output Flattened Device Tree header and dump FDT blob to the filename + specified as the command argument. Equivalent to + 'cat /proc/fdt > fdtdump.dtb' on a running target""" + + def __init__(self): + super(LxFdtDump, self).__init__("lx-fdtdump", gdb.COMMAND_DATA, + gdb.COMPLETE_FILENAME) + + def fdthdr_to_cpu(self, fdt_header): + + fdt_header_be = ">IIIIIII" + fdt_header_le = "<IIIIIII" + + if utils.get_target_endianness() == 1: + output_fmt = fdt_header_le + else: + output_fmt = fdt_header_be + + return unpack(output_fmt, pack(fdt_header_be, + fdt_header['magic'], + fdt_header['totalsize'], + fdt_header['off_dt_struct'], + fdt_header['off_dt_strings'], + fdt_header['off_mem_rsvmap'], + fdt_header['version'], + fdt_header['last_comp_version'])) + + def invoke(self, arg, from_tty): + + if not constants.LX_CONFIG_OF: + raise gdb.GdbError("Kernel not compiled with CONFIG_OF\n") + + if len(arg) == 0: + filename = "fdtdump.dtb" + else: + filename = arg + + py_fdt_header_ptr = gdb.parse_and_eval( + "(const struct fdt_header *) initial_boot_params") + py_fdt_header = py_fdt_header_ptr.dereference() + + fdt_header = self.fdthdr_to_cpu(py_fdt_header) + + if fdt_header[0] != constants.LX_OF_DT_HEADER: + raise gdb.GdbError("No flattened device tree magic found\n") + + gdb.write("fdt_magic: 0x{:02X}\n".format(fdt_header[0])) + gdb.write("fdt_totalsize: 0x{:02X}\n".format(fdt_header[1])) + gdb.write("off_dt_struct: 0x{:02X}\n".format(fdt_header[2])) + gdb.write("off_dt_strings: 0x{:02X}\n".format(fdt_header[3])) + gdb.write("off_mem_rsvmap: 0x{:02X}\n".format(fdt_header[4])) + gdb.write("version: {}\n".format(fdt_header[5])) + gdb.write("last_comp_version: {}\n".format(fdt_header[6])) + + inf = gdb.inferiors()[0] + fdt_buf = utils.read_memoryview(inf, py_fdt_header_ptr, + fdt_header[1]).tobytes() + + try: + f = open(filename, 'wb') + except: + raise gdb.GdbError("Could not open file to dump fdt") + + f.write(fdt_buf) + f.close() + + gdb.write("Dumped fdt blob to " + filename + "\n") + +LxFdtDump() diff --git a/scripts/spelling.txt b/scripts/spelling.txt index 163c720d3f2b..ac5bf8708ff0 100644 --- a/scripts/spelling.txt +++ b/scripts/spelling.txt @@ -16,6 +16,7 @@ absense||absence absolut||absolute absoulte||absolute acccess||access +acceess||access acceleratoin||acceleration accelleration||acceleration accesing||accessing @@ -46,6 +47,7 @@ ackowledged||acknowledged acording||according activete||activate acumulating||accumulating +acumulator||accumulator adapater||adapter addional||additional additionaly||additionally @@ -184,6 +186,7 @@ cacluated||calculated caculation||calculation calender||calendar calle||called +callibration||calibration calucate||calculate calulate||calculate cancelation||cancellation @@ -244,6 +247,7 @@ compatiblity||compatibility competion||completion compilant||compliant compleatly||completely +completition||completion completly||completely complient||compliant componnents||components @@ -257,6 +261,7 @@ conected||connected configuratoin||configuration configuraton||configuration configuretion||configuration +configutation||configuration conider||consider conjuction||conjunction connectinos||connections @@ -317,6 +322,7 @@ dependant||dependent depreacted||deprecated depreacte||deprecate desactivate||deactivate +desciptor||descriptor desciptors||descriptors descripton||description descrition||description @@ -417,9 +423,12 @@ extention||extension extracter||extractor faild||failed faill||fail +failied||failed +faillure||failure failue||failure failuer||failure faireness||fairness +falied||failed faliure||failure familar||familiar fatser||faster @@ -441,6 +450,7 @@ forseeable||foreseeable forse||force fortan||fortran forwardig||forwarding +framming||framing framwork||framework frequncy||frequency frome||from @@ -482,6 +492,7 @@ howver||however hsould||should hypter||hyper identidier||identifier +illigal||illegal imblance||imbalance immeadiately||immediately immedaite||immediate @@ -520,6 +531,7 @@ informtion||information infromation||information ingore||ignore inital||initial +initalized||initialized initalised||initialized initalise||initialize initalize||initialize @@ -532,6 +544,7 @@ initilize||initialize inofficial||unofficial insititute||institute instal||install +instanciated||instantiated inteface||interface integreated||integrated integrety||integrity @@ -557,6 +570,7 @@ intialized||initialized intialize||initialize intregral||integral intrrupt||interrupt +intterrupt||interrupt intuative||intuitive invaid||invalid invalde||invald @@ -567,6 +581,8 @@ invokations||invocations irrelevent||irrelevant isnt||isn't isssue||issue +iternations||iterations +itertation||iteration itslef||itself jave||java jeffies||jiffies @@ -621,6 +637,7 @@ messsage||message messsages||messages microprocesspr||microprocessor milliseonds||milliseconds +minium||minimum minumum||minimum miscelleneous||miscellaneous misformed||malformed @@ -668,6 +685,7 @@ occurances||occurrences occured||occurred occurence||occurrence occure||occurred +occured||occurred occuring||occurring offet||offset omitt||omit @@ -681,8 +699,10 @@ optionnal||optional optmizations||optimizations orientatied||orientated orientied||oriented +orignal||original otherise||otherwise ouput||output +oustanding||outstanding overaall||overall overhread||overhead overlaping||overlapping @@ -705,6 +725,7 @@ paramter||parameter paramters||parameters particuarly||particularly particularily||particularly +partiton||partition pased||passed passin||passing pathes||paths @@ -724,6 +745,7 @@ pleaes||please ploting||plotting plugable||pluggable poinnter||pointer +pointeur||pointer poiter||pointer posible||possible positon||position @@ -752,6 +774,7 @@ procceed||proceed proccesors||processors procesed||processed proces||process +procesing||processing processessing||processing processess||processes processpr||processor @@ -780,6 +803,7 @@ protable||portable protcol||protocol protecion||protection protocoll||protocol +promixity||proximity psudo||pseudo psuedo||pseudo psychadelic||psychedelic @@ -801,6 +825,7 @@ recommanded||recommended recyle||recycle redircet||redirect redirectrion||redirection +reename||rename refcounf||refcount refence||reference refered||referred @@ -944,7 +969,9 @@ suble||subtle substract||subtract succesfully||successfully succesful||successful +successed||succeeded successfull||successful +successfuly||successfully sucessfully||successfully sucess||success superflous||superfluous @@ -960,6 +987,7 @@ suppport||support supress||suppress surpresses||suppresses susbsystem||subsystem +suspeneded||suspended suspicously||suspiciously swaping||swapping switchs||switches @@ -967,6 +995,7 @@ symetric||symmetric synax||syntax synchonized||synchronized syncronize||synchronize +syncronized||synchronized syncronizing||synchronizing syncronus||synchronous syste||system @@ -1005,22 +1034,30 @@ ture||true tyep||type udpate||update uesd||used +uncommited||uncommitted unconditionaly||unconditionally underun||underrun unecessary||unnecessary unexecpted||unexpected +unexpcted||unexpected unexpectd||unexpected unexpeted||unexpected +unexpexted||unexpected unfortunatelly||unfortunately unifiy||unify unintialized||uninitialized +unkmown||unknown unknonw||unknown unknow||unknown unkown||unknown unneedingly||unnecessarily +unnsupported||unsupported +unmached||unmatched unresgister||unregister +unrgesiter||unregister unsinged||unsigned unstabel||unstable +unsolicitied||unsolicited unsuccessfull||unsuccessful unsuported||unsupported untill||until diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig index 5487827fa86c..370eb2f4dd37 100644 --- a/security/integrity/ima/Kconfig +++ b/security/integrity/ima/Kconfig @@ -27,6 +27,18 @@ config IMA to learn more about IMA. If unsure, say N. +config IMA_KEXEC + bool "Enable carrying the IMA measurement list across a soft boot" + depends on IMA && TCG_TPM && HAVE_IMA_KEXEC + default n + help + TPM PCRs are only reset on a hard reboot. In order to validate + a TPM's quote after a soft boot, the IMA measurement list of the + running kernel must be saved and restored on boot. + + Depending on the IMA policy, the measurement list can grow to + be very large. + config IMA_MEASURE_PCR_IDX int depends on IMA diff --git a/security/integrity/ima/Makefile b/security/integrity/ima/Makefile index 9aeaedad1e2b..29f198bde02b 100644 --- a/security/integrity/ima/Makefile +++ b/security/integrity/ima/Makefile @@ -8,4 +8,5 @@ obj-$(CONFIG_IMA) += ima.o ima-y := ima_fs.o ima_queue.o ima_init.o ima_main.o ima_crypto.o ima_api.o \ ima_policy.o ima_template.o ima_template_lib.o ima-$(CONFIG_IMA_APPRAISE) += ima_appraise.o +ima-$(CONFIG_HAVE_IMA_KEXEC) += ima_kexec.o obj-$(CONFIG_IMA_BLACKLIST_KEYRING) += ima_mok.o diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h index db25f54a04fe..5e6180a4da7d 100644 --- a/security/integrity/ima/ima.h +++ b/security/integrity/ima/ima.h @@ -28,6 +28,10 @@ #include "../integrity.h" +#ifdef CONFIG_HAVE_IMA_KEXEC +#include <asm/ima.h> +#endif + enum ima_show_type { IMA_SHOW_BINARY, IMA_SHOW_BINARY_NO_FIELD_LEN, IMA_SHOW_BINARY_OLD_STRING_FMT, IMA_SHOW_ASCII }; enum tpm_pcrs { TPM_PCR0 = 0, TPM_PCR8 = 8 }; @@ -81,6 +85,7 @@ struct ima_template_field { /* IMA template descriptor definition */ struct ima_template_desc { + struct list_head list; char *name; char *fmt; int num_fields; @@ -102,6 +107,27 @@ struct ima_queue_entry { }; extern struct list_head ima_measurements; /* list of all measurements */ +/* Some details preceding the binary serialized measurement list */ +struct ima_kexec_hdr { + u16 version; + u16 _reserved0; + u32 _reserved1; + u64 buffer_size; + u64 count; +}; + +#ifdef CONFIG_HAVE_IMA_KEXEC +void ima_load_kexec_buffer(void); +#else +static inline void ima_load_kexec_buffer(void) {} +#endif /* CONFIG_HAVE_IMA_KEXEC */ + +/* + * The default binary_runtime_measurements list format is defined as the + * platform native format. The canonical format is defined as little-endian. + */ +extern bool ima_canonical_fmt; + /* Internal IMA function definitions */ int ima_init(void); int ima_fs_init(void); @@ -122,7 +148,12 @@ int ima_init_crypto(void); void ima_putc(struct seq_file *m, void *data, int datalen); void ima_print_digest(struct seq_file *m, u8 *digest, u32 size); struct ima_template_desc *ima_template_desc_current(void); +int ima_restore_measurement_entry(struct ima_template_entry *entry); +int ima_restore_measurement_list(loff_t bufsize, void *buf); +int ima_measurements_show(struct seq_file *m, void *v); +unsigned long ima_get_binary_runtime_size(void); int ima_init_template(void); +void ima_init_template_list(void); /* * used to protect h_table and sha_table diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c index 38f2ed830dd6..802d5d20f36f 100644 --- a/security/integrity/ima/ima_crypto.c +++ b/security/integrity/ima/ima_crypto.c @@ -477,11 +477,13 @@ static int ima_calc_field_array_hash_tfm(struct ima_field_data *field_data, u8 buffer[IMA_EVENT_NAME_LEN_MAX + 1] = { 0 }; u8 *data_to_hash = field_data[i].data; u32 datalen = field_data[i].len; + u32 datalen_to_hash = + !ima_canonical_fmt ? datalen : cpu_to_le32(datalen); if (strcmp(td->name, IMA_TEMPLATE_IMA_NAME) != 0) { rc = crypto_shash_update(shash, - (const u8 *) &field_data[i].len, - sizeof(field_data[i].len)); + (const u8 *) &datalen_to_hash, + sizeof(datalen_to_hash)); if (rc) break; } else if (strcmp(td->fields[i]->field_id, "n") == 0) { diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c index 3df46906492d..ca303e5d2b94 100644 --- a/security/integrity/ima/ima_fs.c +++ b/security/integrity/ima/ima_fs.c @@ -28,6 +28,16 @@ static DEFINE_MUTEX(ima_write_mutex); +bool ima_canonical_fmt; +static int __init default_canonical_fmt_setup(char *str) +{ +#ifdef __BIG_ENDIAN + ima_canonical_fmt = 1; +#endif + return 1; +} +__setup("ima_canonical_fmt", default_canonical_fmt_setup); + static int valid_policy = 1; #define TMPBUFLEN 12 static ssize_t ima_show_htable_value(char __user *buf, size_t count, @@ -116,13 +126,13 @@ void ima_putc(struct seq_file *m, void *data, int datalen) * [eventdata length] * eventdata[n]=template specific data */ -static int ima_measurements_show(struct seq_file *m, void *v) +int ima_measurements_show(struct seq_file *m, void *v) { /* the list never shrinks, so we don't need a lock here */ struct ima_queue_entry *qe = v; struct ima_template_entry *e; char *template_name; - int namelen; + u32 pcr, namelen, template_data_len; /* temporary fields */ bool is_ima_template = false; int i; @@ -139,25 +149,29 @@ static int ima_measurements_show(struct seq_file *m, void *v) * PCR used defaults to the same (config option) in * little-endian format, unless set in policy */ - ima_putc(m, &e->pcr, sizeof(e->pcr)); + pcr = !ima_canonical_fmt ? e->pcr : cpu_to_le32(e->pcr); + ima_putc(m, &pcr, sizeof(e->pcr)); /* 2nd: template digest */ ima_putc(m, e->digest, TPM_DIGEST_SIZE); /* 3rd: template name size */ - namelen = strlen(template_name); + namelen = !ima_canonical_fmt ? strlen(template_name) : + cpu_to_le32(strlen(template_name)); ima_putc(m, &namelen, sizeof(namelen)); /* 4th: template name */ - ima_putc(m, template_name, namelen); + ima_putc(m, template_name, strlen(template_name)); /* 5th: template length (except for 'ima' template) */ if (strcmp(template_name, IMA_TEMPLATE_IMA_NAME) == 0) is_ima_template = true; - if (!is_ima_template) - ima_putc(m, &e->template_data_len, - sizeof(e->template_data_len)); + if (!is_ima_template) { + template_data_len = !ima_canonical_fmt ? e->template_data_len : + cpu_to_le32(e->template_data_len); + ima_putc(m, &template_data_len, sizeof(e->template_data_len)); + } /* 6th: template specific data */ for (i = 0; i < e->template_desc->num_fields; i++) { diff --git a/security/integrity/ima/ima_init.c b/security/integrity/ima/ima_init.c index 2ac1f41db5c0..2967d497a665 100644 --- a/security/integrity/ima/ima_init.c +++ b/security/integrity/ima/ima_init.c @@ -129,6 +129,8 @@ int __init ima_init(void) if (rc != 0) return rc; + ima_load_kexec_buffer(); + rc = ima_add_boot_aggregate(); /* boot aggregate must be first entry */ if (rc != 0) return rc; diff --git a/security/integrity/ima/ima_kexec.c b/security/integrity/ima/ima_kexec.c new file mode 100644 index 000000000000..e473eee913cb --- /dev/null +++ b/security/integrity/ima/ima_kexec.c @@ -0,0 +1,168 @@ +/* + * Copyright (C) 2016 IBM Corporation + * + * Authors: + * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com> + * Mimi Zohar <zohar@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ +#include <linux/seq_file.h> +#include <linux/vmalloc.h> +#include <linux/kexec.h> +#include "ima.h" + +#ifdef CONFIG_IMA_KEXEC +static int ima_dump_measurement_list(unsigned long *buffer_size, void **buffer, + unsigned long segment_size) +{ + struct ima_queue_entry *qe; + struct seq_file file; + struct ima_kexec_hdr khdr; + int ret = 0; + + /* segment size can't change between kexec load and execute */ + file.buf = vmalloc(segment_size); + if (!file.buf) { + ret = -ENOMEM; + goto out; + } + + file.size = segment_size; + file.read_pos = 0; + file.count = sizeof(khdr); /* reserved space */ + + memset(&khdr, 0, sizeof(khdr)); + khdr.version = 1; + list_for_each_entry_rcu(qe, &ima_measurements, later) { + if (file.count < file.size) { + khdr.count++; + ima_measurements_show(&file, qe); + } else { + ret = -EINVAL; + break; + } + } + + if (ret < 0) + goto out; + + /* + * fill in reserved space with some buffer details + * (eg. version, buffer size, number of measurements) + */ + khdr.buffer_size = file.count; + if (ima_canonical_fmt) { + khdr.version = cpu_to_le16(khdr.version); + khdr.count = cpu_to_le64(khdr.count); + khdr.buffer_size = cpu_to_le64(khdr.buffer_size); + } + memcpy(file.buf, &khdr, sizeof(khdr)); + + print_hex_dump(KERN_DEBUG, "ima dump: ", DUMP_PREFIX_NONE, + 16, 1, file.buf, + file.count < 100 ? file.count : 100, true); + + *buffer_size = file.count; + *buffer = file.buf; +out: + if (ret == -EINVAL) + vfree(file.buf); + return ret; +} + +/* + * Called during kexec_file_load so that IMA can add a segment to the kexec + * image for the measurement list for the next kernel. + * + * This function assumes that kexec_mutex is held. + */ +void ima_add_kexec_buffer(struct kimage *image) +{ + struct kexec_buf kbuf = { .image = image, .buf_align = PAGE_SIZE, + .buf_min = 0, .buf_max = ULONG_MAX, + .top_down = true }; + unsigned long binary_runtime_size; + + /* use more understandable variable names than defined in kbuf */ + void *kexec_buffer = NULL; + size_t kexec_buffer_size; + size_t kexec_segment_size; + int ret; + + /* + * Reserve an extra half page of memory for additional measurements + * added during the kexec load. + */ + binary_runtime_size = ima_get_binary_runtime_size(); + if (binary_runtime_size >= ULONG_MAX - PAGE_SIZE) + kexec_segment_size = ULONG_MAX; + else + kexec_segment_size = ALIGN(ima_get_binary_runtime_size() + + PAGE_SIZE / 2, PAGE_SIZE); + if ((kexec_segment_size == ULONG_MAX) || + ((kexec_segment_size >> PAGE_SHIFT) > totalram_pages / 2)) { + pr_err("Binary measurement list too large.\n"); + return; + } + + ima_dump_measurement_list(&kexec_buffer_size, &kexec_buffer, + kexec_segment_size); + if (!kexec_buffer) { + pr_err("Not enough memory for the kexec measurement buffer.\n"); + return; + } + + kbuf.buffer = kexec_buffer; + kbuf.bufsz = kexec_buffer_size; + kbuf.memsz = kexec_segment_size; + ret = kexec_add_buffer(&kbuf); + if (ret) { + pr_err("Error passing over kexec measurement buffer.\n"); + return; + } + + ret = arch_ima_add_kexec_buffer(image, kbuf.mem, kexec_segment_size); + if (ret) { + pr_err("Error passing over kexec measurement buffer.\n"); + return; + } + + pr_debug("kexec measurement buffer for the loaded kernel at 0x%lx.\n", + kbuf.mem); +} +#endif /* IMA_KEXEC */ + +/* + * Restore the measurement list from the previous kernel. + */ +void ima_load_kexec_buffer(void) +{ + void *kexec_buffer = NULL; + size_t kexec_buffer_size = 0; + int rc; + + rc = ima_get_kexec_buffer(&kexec_buffer, &kexec_buffer_size); + switch (rc) { + case 0: + rc = ima_restore_measurement_list(kexec_buffer_size, + kexec_buffer); + if (rc != 0) + pr_err("Failed to restore the measurement list: %d\n", + rc); + + ima_free_kexec_buffer(); + break; + case -ENOTSUPP: + pr_debug("Restoring the measurement list not supported\n"); + break; + case -ENOENT: + pr_debug("No measurement list to restore\n"); + break; + default: + pr_debug("Error restoring the measurement list: %d\n", rc); + } +} diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 423d111b3b94..50818c60538b 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -418,6 +418,7 @@ static int __init init_ima(void) { int error; + ima_init_template_list(); hash_setup(CONFIG_IMA_DEFAULT_HASH); error = ima_init(); if (!error) { diff --git a/security/integrity/ima/ima_queue.c b/security/integrity/ima/ima_queue.c index 32f6ac0f96df..d9aa5ab71204 100644 --- a/security/integrity/ima/ima_queue.c +++ b/security/integrity/ima/ima_queue.c @@ -29,6 +29,11 @@ #define AUDIT_CAUSE_LEN_MAX 32 LIST_HEAD(ima_measurements); /* list of all measurements */ +#ifdef CONFIG_IMA_KEXEC +static unsigned long binary_runtime_size; +#else +static unsigned long binary_runtime_size = ULONG_MAX; +#endif /* key: inode (before secure-hashing a file) */ struct ima_h_table ima_htable = { @@ -64,12 +69,32 @@ static struct ima_queue_entry *ima_lookup_digest_entry(u8 *digest_value, return ret; } +/* + * Calculate the memory required for serializing a single + * binary_runtime_measurement list entry, which contains a + * couple of variable length fields (e.g template name and data). + */ +static int get_binary_runtime_size(struct ima_template_entry *entry) +{ + int size = 0; + + size += sizeof(u32); /* pcr */ + size += sizeof(entry->digest); + size += sizeof(int); /* template name size field */ + size += strlen(entry->template_desc->name) + 1; + size += sizeof(entry->template_data_len); + size += entry->template_data_len; + return size; +} + /* ima_add_template_entry helper function: - * - Add template entry to measurement list and hash table. + * - Add template entry to the measurement list and hash table, for + * all entries except those carried across kexec. * * (Called with ima_extend_list_mutex held.) */ -static int ima_add_digest_entry(struct ima_template_entry *entry) +static int ima_add_digest_entry(struct ima_template_entry *entry, + bool update_htable) { struct ima_queue_entry *qe; unsigned int key; @@ -85,11 +110,34 @@ static int ima_add_digest_entry(struct ima_template_entry *entry) list_add_tail_rcu(&qe->later, &ima_measurements); atomic_long_inc(&ima_htable.len); - key = ima_hash_key(entry->digest); - hlist_add_head_rcu(&qe->hnext, &ima_htable.queue[key]); + if (update_htable) { + key = ima_hash_key(entry->digest); + hlist_add_head_rcu(&qe->hnext, &ima_htable.queue[key]); + } + + if (binary_runtime_size != ULONG_MAX) { + int size; + + size = get_binary_runtime_size(entry); + binary_runtime_size = (binary_runtime_size < ULONG_MAX - size) ? + binary_runtime_size + size : ULONG_MAX; + } return 0; } +/* + * Return the amount of memory required for serializing the + * entire binary_runtime_measurement list, including the ima_kexec_hdr + * structure. + */ +unsigned long ima_get_binary_runtime_size(void) +{ + if (binary_runtime_size >= (ULONG_MAX - sizeof(struct ima_kexec_hdr))) + return ULONG_MAX; + else + return binary_runtime_size + sizeof(struct ima_kexec_hdr); +}; + static int ima_pcr_extend(const u8 *hash, int pcr) { int result = 0; @@ -103,8 +151,13 @@ static int ima_pcr_extend(const u8 *hash, int pcr) return result; } -/* Add template entry to the measurement list and hash table, - * and extend the pcr. +/* + * Add template entry to the measurement list and hash table, and + * extend the pcr. + * + * On systems which support carrying the IMA measurement list across + * kexec, maintain the total memory size required for serializing the + * binary_runtime_measurements. */ int ima_add_template_entry(struct ima_template_entry *entry, int violation, const char *op, struct inode *inode, @@ -126,7 +179,7 @@ int ima_add_template_entry(struct ima_template_entry *entry, int violation, } } - result = ima_add_digest_entry(entry); + result = ima_add_digest_entry(entry, 1); if (result < 0) { audit_cause = "ENOMEM"; audit_info = 0; @@ -149,3 +202,13 @@ out: op, audit_cause, result, audit_info); return result; } + +int ima_restore_measurement_entry(struct ima_template_entry *entry) +{ + int result = 0; + + mutex_lock(&ima_extend_list_mutex); + result = ima_add_digest_entry(entry, 0); + mutex_unlock(&ima_extend_list_mutex); + return result; +} diff --git a/security/integrity/ima/ima_template.c b/security/integrity/ima/ima_template.c index febd12ed9b55..cebb37c63629 100644 --- a/security/integrity/ima/ima_template.c +++ b/security/integrity/ima/ima_template.c @@ -15,16 +15,20 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/rculist.h> #include "ima.h" #include "ima_template_lib.h" -static struct ima_template_desc defined_templates[] = { +static struct ima_template_desc builtin_templates[] = { {.name = IMA_TEMPLATE_IMA_NAME, .fmt = IMA_TEMPLATE_IMA_FMT}, {.name = "ima-ng", .fmt = "d-ng|n-ng"}, {.name = "ima-sig", .fmt = "d-ng|n-ng|sig"}, {.name = "", .fmt = ""}, /* placeholder for a custom format */ }; +static LIST_HEAD(defined_templates); +static DEFINE_SPINLOCK(template_list); + static struct ima_template_field supported_fields[] = { {.field_id = "d", .field_init = ima_eventdigest_init, .field_show = ima_show_template_digest}, @@ -37,6 +41,7 @@ static struct ima_template_field supported_fields[] = { {.field_id = "sig", .field_init = ima_eventsig_init, .field_show = ima_show_template_sig}, }; +#define MAX_TEMPLATE_NAME_LEN 15 static struct ima_template_desc *ima_template; static struct ima_template_desc *lookup_template_desc(const char *name); @@ -52,6 +57,8 @@ static int __init ima_template_setup(char *str) if (ima_template) return 1; + ima_init_template_list(); + /* * Verify that a template with the supplied name exists. * If not, use CONFIG_IMA_DEFAULT_TEMPLATE. @@ -80,7 +87,7 @@ __setup("ima_template=", ima_template_setup); static int __init ima_template_fmt_setup(char *str) { - int num_templates = ARRAY_SIZE(defined_templates); + int num_templates = ARRAY_SIZE(builtin_templates); if (ima_template) return 1; @@ -91,22 +98,28 @@ static int __init ima_template_fmt_setup(char *str) return 1; } - defined_templates[num_templates - 1].fmt = str; - ima_template = defined_templates + num_templates - 1; + builtin_templates[num_templates - 1].fmt = str; + ima_template = builtin_templates + num_templates - 1; + return 1; } __setup("ima_template_fmt=", ima_template_fmt_setup); static struct ima_template_desc *lookup_template_desc(const char *name) { - int i; - - for (i = 0; i < ARRAY_SIZE(defined_templates); i++) { - if (strcmp(defined_templates[i].name, name) == 0) - return defined_templates + i; + struct ima_template_desc *template_desc; + int found = 0; + + rcu_read_lock(); + list_for_each_entry_rcu(template_desc, &defined_templates, list) { + if ((strcmp(template_desc->name, name) == 0) || + (strcmp(template_desc->fmt, name) == 0)) { + found = 1; + break; + } } - - return NULL; + rcu_read_unlock(); + return found ? template_desc : NULL; } static struct ima_template_field *lookup_template_field(const char *field_id) @@ -142,9 +155,14 @@ static int template_desc_init_fields(const char *template_fmt, { const char *template_fmt_ptr; struct ima_template_field *found_fields[IMA_TEMPLATE_NUM_FIELDS_MAX]; - int template_num_fields = template_fmt_size(template_fmt); + int template_num_fields; int i, len; + if (num_fields && *num_fields > 0) /* already initialized? */ + return 0; + + template_num_fields = template_fmt_size(template_fmt); + if (template_num_fields > IMA_TEMPLATE_NUM_FIELDS_MAX) { pr_err("format string '%s' contains too many fields\n", template_fmt); @@ -182,11 +200,28 @@ static int template_desc_init_fields(const char *template_fmt, return 0; } +void ima_init_template_list(void) +{ + int i; + + if (!list_empty(&defined_templates)) + return; + + spin_lock(&template_list); + for (i = 0; i < ARRAY_SIZE(builtin_templates); i++) { + list_add_tail_rcu(&builtin_templates[i].list, + &defined_templates); + } + spin_unlock(&template_list); +} + struct ima_template_desc *ima_template_desc_current(void) { - if (!ima_template) + if (!ima_template) { + ima_init_template_list(); ima_template = lookup_template_desc(CONFIG_IMA_DEFAULT_TEMPLATE); + } return ima_template; } @@ -205,3 +240,239 @@ int __init ima_init_template(void) return result; } + +static struct ima_template_desc *restore_template_fmt(char *template_name) +{ + struct ima_template_desc *template_desc = NULL; + int ret; + + ret = template_desc_init_fields(template_name, NULL, NULL); + if (ret < 0) { + pr_err("attempting to initialize the template \"%s\" failed\n", + template_name); + goto out; + } + + template_desc = kzalloc(sizeof(*template_desc), GFP_KERNEL); + if (!template_desc) + goto out; + + template_desc->name = ""; + template_desc->fmt = kstrdup(template_name, GFP_KERNEL); + if (!template_desc->fmt) + goto out; + + spin_lock(&template_list); + list_add_tail_rcu(&template_desc->list, &defined_templates); + spin_unlock(&template_list); +out: + return template_desc; +} + +static int ima_restore_template_data(struct ima_template_desc *template_desc, + void *template_data, + int template_data_size, + struct ima_template_entry **entry) +{ + struct binary_field_data { + u32 len; + u8 data[0]; + } __packed; + + struct binary_field_data *field_data; + int offset = 0; + int ret = 0; + int i; + + *entry = kzalloc(sizeof(**entry) + + template_desc->num_fields * sizeof(struct ima_field_data), + GFP_NOFS); + if (!*entry) + return -ENOMEM; + + (*entry)->template_desc = template_desc; + for (i = 0; i < template_desc->num_fields; i++) { + field_data = template_data + offset; + + /* Each field of the template data is prefixed with a length. */ + if (offset > (template_data_size - sizeof(*field_data))) { + pr_err("Restoring the template field failed\n"); + ret = -EINVAL; + break; + } + offset += sizeof(*field_data); + + if (ima_canonical_fmt) + field_data->len = le32_to_cpu(field_data->len); + + if (offset > (template_data_size - field_data->len)) { + pr_err("Restoring the template field data failed\n"); + ret = -EINVAL; + break; + } + offset += field_data->len; + + (*entry)->template_data[i].len = field_data->len; + (*entry)->template_data_len += sizeof(field_data->len); + + (*entry)->template_data[i].data = + kzalloc(field_data->len + 1, GFP_KERNEL); + if (!(*entry)->template_data[i].data) { + ret = -ENOMEM; + break; + } + memcpy((*entry)->template_data[i].data, field_data->data, + field_data->len); + (*entry)->template_data_len += field_data->len; + } + + if (ret < 0) { + ima_free_template_entry(*entry); + *entry = NULL; + } + + return ret; +} + +/* Restore the serialized binary measurement list without extending PCRs. */ +int ima_restore_measurement_list(loff_t size, void *buf) +{ + struct binary_hdr_v1 { + u32 pcr; + u8 digest[TPM_DIGEST_SIZE]; + u32 template_name_len; + char template_name[0]; + } __packed; + char template_name[MAX_TEMPLATE_NAME_LEN]; + + struct binary_data_v1 { + u32 template_data_size; + char template_data[0]; + } __packed; + + struct ima_kexec_hdr *khdr = buf; + struct binary_hdr_v1 *hdr_v1; + struct binary_data_v1 *data_v1; + + void *bufp = buf + sizeof(*khdr); + void *bufendp; + struct ima_template_entry *entry; + struct ima_template_desc *template_desc; + unsigned long count = 0; + int ret = 0; + + if (!buf || size < sizeof(*khdr)) + return 0; + + if (ima_canonical_fmt) { + khdr->version = le16_to_cpu(khdr->version); + khdr->count = le64_to_cpu(khdr->count); + khdr->buffer_size = le64_to_cpu(khdr->buffer_size); + } + + if (khdr->version != 1) { + pr_err("attempting to restore a incompatible measurement list"); + return -EINVAL; + } + + if (khdr->count > ULONG_MAX - 1) { + pr_err("attempting to restore too many measurements"); + return -EINVAL; + } + + /* + * ima kexec buffer prefix: version, buffer size, count + * v1 format: pcr, digest, template-name-len, template-name, + * template-data-size, template-data + */ + bufendp = buf + khdr->buffer_size; + while ((bufp < bufendp) && (count++ < khdr->count)) { + hdr_v1 = bufp; + if (bufp > (bufendp - sizeof(*hdr_v1))) { + pr_err("attempting to restore partial measurement\n"); + ret = -EINVAL; + break; + } + bufp += sizeof(*hdr_v1); + + if (ima_canonical_fmt) + hdr_v1->template_name_len = + le32_to_cpu(hdr_v1->template_name_len); + + if ((hdr_v1->template_name_len >= MAX_TEMPLATE_NAME_LEN) || + (bufp > (bufendp - hdr_v1->template_name_len))) { + pr_err("attempting to restore a template name \ + that is too long\n"); + ret = -EINVAL; + break; + } + data_v1 = bufp += (u_int8_t)hdr_v1->template_name_len; + + /* template name is not null terminated */ + memcpy(template_name, hdr_v1->template_name, + hdr_v1->template_name_len); + template_name[hdr_v1->template_name_len] = 0; + + if (strcmp(template_name, "ima") == 0) { + pr_err("attempting to restore an unsupported \ + template \"%s\" failed\n", template_name); + ret = -EINVAL; + break; + } + + template_desc = lookup_template_desc(template_name); + if (!template_desc) { + template_desc = restore_template_fmt(template_name); + if (!template_desc) + break; + } + + /* + * Only the running system's template format is initialized + * on boot. As needed, initialize the other template formats. + */ + ret = template_desc_init_fields(template_desc->fmt, + &(template_desc->fields), + &(template_desc->num_fields)); + if (ret < 0) { + pr_err("attempting to restore the template fmt \"%s\" \ + failed\n", template_desc->fmt); + ret = -EINVAL; + break; + } + + if (bufp > (bufendp - sizeof(data_v1->template_data_size))) { + pr_err("restoring the template data size failed\n"); + ret = -EINVAL; + break; + } + bufp += (u_int8_t) sizeof(data_v1->template_data_size); + + if (ima_canonical_fmt) + data_v1->template_data_size = + le32_to_cpu(data_v1->template_data_size); + + if (bufp > (bufendp - data_v1->template_data_size)) { + pr_err("restoring the template data failed\n"); + ret = -EINVAL; + break; + } + bufp += data_v1->template_data_size; + + ret = ima_restore_template_data(template_desc, + data_v1->template_data, + data_v1->template_data_size, + &entry); + if (ret < 0) + break; + + memcpy(entry->digest, hdr_v1->digest, TPM_DIGEST_SIZE); + entry->pcr = + !ima_canonical_fmt ? hdr_v1->pcr : le32_to_cpu(hdr_v1->pcr); + ret = ima_restore_measurement_entry(entry); + if (ret < 0) + break; + + } + return ret; +} diff --git a/security/integrity/ima/ima_template_lib.c b/security/integrity/ima/ima_template_lib.c index f9bae04ba176..f9ba37b3928d 100644 --- a/security/integrity/ima/ima_template_lib.c +++ b/security/integrity/ima/ima_template_lib.c @@ -103,8 +103,11 @@ static void ima_show_template_data_binary(struct seq_file *m, u32 len = (show == IMA_SHOW_BINARY_OLD_STRING_FMT) ? strlen(field_data->data) : field_data->len; - if (show != IMA_SHOW_BINARY_NO_FIELD_LEN) - ima_putc(m, &len, sizeof(len)); + if (show != IMA_SHOW_BINARY_NO_FIELD_LEN) { + u32 field_len = !ima_canonical_fmt ? len : cpu_to_le32(len); + + ima_putc(m, &field_len, sizeof(field_len)); + } if (!len) return; |