From d407bd25a204bd66b7346dde24bd3d37ef0e0b05 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 18 Jan 2017 15:14:17 +0100 Subject: bpf: don't trigger OOM killer under pressure with map alloc This patch adds two helpers, bpf_map_area_alloc() and bpf_map_area_free(), that are to be used for map allocations. Using kmalloc() for very large allocations can cause excessive work within the page allocator, so i) fall back earlier to vmalloc() when the attempt is considered costly anyway, and even more importantly ii) don't trigger OOM killer with any of the allocators. Since this is based on a user space request, for example, when creating maps with element pre-allocation, we really want such requests to fail instead of killing other user space processes. Also, don't spam the kernel log with warnings should any of the allocations fail under pressure. Given that, we can make backend selection in bpf_map_area_alloc() generic, and convert all maps over to use this API for spots with potentially large allocation requests. Note, replacing the one kmalloc_array() is fine as overflow checks happen earlier in htab_map_alloc(), since it must also protect the multiplication for vmalloc() should kmalloc_array() fail. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/stackmap.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'kernel/bpf/stackmap.c') diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 732ae16d12b7..be8519148c25 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include "percpu_freelist.h" @@ -32,7 +31,7 @@ static int prealloc_elems_and_freelist(struct bpf_stack_map *smap) u32 elem_size = sizeof(struct stack_map_bucket) + smap->map.value_size; int err; - smap->elems = vzalloc(elem_size * smap->map.max_entries); + smap->elems = bpf_map_area_alloc(elem_size * smap->map.max_entries); if (!smap->elems) return -ENOMEM; @@ -45,7 +44,7 @@ static int prealloc_elems_and_freelist(struct bpf_stack_map *smap) return 0; free_elems: - vfree(smap->elems); + bpf_map_area_free(smap->elems); return err; } @@ -76,12 +75,9 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) if (cost >= U32_MAX - PAGE_SIZE) return ERR_PTR(-E2BIG); - smap = kzalloc(cost, GFP_USER | __GFP_NOWARN); - if (!smap) { - smap = vzalloc(cost); - if (!smap) - return ERR_PTR(-ENOMEM); - } + smap = bpf_map_area_alloc(cost); + if (!smap) + return ERR_PTR(-ENOMEM); err = -E2BIG; cost += n_buckets * (value_size + sizeof(struct stack_map_bucket)); @@ -112,7 +108,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) put_buffers: put_callchain_buffers(); free_smap: - kvfree(smap); + bpf_map_area_free(smap); return ERR_PTR(err); } @@ -262,9 +258,9 @@ static void stack_map_free(struct bpf_map *map) /* wait for bpf programs to complete before freeing stack map */ synchronize_rcu(); - vfree(smap->elems); + bpf_map_area_free(smap->elems); pcpu_freelist_destroy(&smap->freelist); - kvfree(smap); + bpf_map_area_free(smap); put_callchain_buffers(); } -- cgit v1.2.3 From c78f8bdfa11fcceb9723c61212e4bd8f76c87f9e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 16 Feb 2017 22:24:48 +0100 Subject: bpf: mark all registered map/prog types as __ro_after_init All map types and prog types are registered to the BPF core through bpf_register_map_type() and bpf_register_prog_type() during init and remain unchanged thereafter. As by design we don't (and never will) have any pluggable code that can register to that at any later point in time, lets mark all the existing bpf_{map,prog}_type_list objects in the tree as __ro_after_init, so they can be moved to read-only section from then onwards. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/arraymap.c | 10 +++++----- kernel/bpf/hashtab.c | 8 ++++---- kernel/bpf/lpm_trie.c | 2 +- kernel/bpf/stackmap.c | 2 +- kernel/trace/bpf_trace.c | 6 +++--- net/core/filter.c | 18 +++++++++--------- 6 files changed, 23 insertions(+), 23 deletions(-) (limited to 'kernel/bpf/stackmap.c') diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 3d55d95dcf49..6b6f41f0b211 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -269,7 +269,7 @@ static const struct bpf_map_ops array_ops = { .map_delete_elem = array_map_delete_elem, }; -static struct bpf_map_type_list array_type __read_mostly = { +static struct bpf_map_type_list array_type __ro_after_init = { .ops = &array_ops, .type = BPF_MAP_TYPE_ARRAY, }; @@ -283,7 +283,7 @@ static const struct bpf_map_ops percpu_array_ops = { .map_delete_elem = array_map_delete_elem, }; -static struct bpf_map_type_list percpu_array_type __read_mostly = { +static struct bpf_map_type_list percpu_array_type __ro_after_init = { .ops = &percpu_array_ops, .type = BPF_MAP_TYPE_PERCPU_ARRAY, }; @@ -409,7 +409,7 @@ static const struct bpf_map_ops prog_array_ops = { .map_fd_put_ptr = prog_fd_array_put_ptr, }; -static struct bpf_map_type_list prog_array_type __read_mostly = { +static struct bpf_map_type_list prog_array_type __ro_after_init = { .ops = &prog_array_ops, .type = BPF_MAP_TYPE_PROG_ARRAY, }; @@ -522,7 +522,7 @@ static const struct bpf_map_ops perf_event_array_ops = { .map_release = perf_event_fd_array_release, }; -static struct bpf_map_type_list perf_event_array_type __read_mostly = { +static struct bpf_map_type_list perf_event_array_type __ro_after_init = { .ops = &perf_event_array_ops, .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, }; @@ -564,7 +564,7 @@ static const struct bpf_map_ops cgroup_array_ops = { .map_fd_put_ptr = cgroup_fd_array_put_ptr, }; -static struct bpf_map_type_list cgroup_array_type __read_mostly = { +static struct bpf_map_type_list cgroup_array_type __ro_after_init = { .ops = &cgroup_array_ops, .type = BPF_MAP_TYPE_CGROUP_ARRAY, }; diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index a753bbe7df0a..3ea87fb19a94 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -1023,7 +1023,7 @@ static const struct bpf_map_ops htab_ops = { .map_delete_elem = htab_map_delete_elem, }; -static struct bpf_map_type_list htab_type __read_mostly = { +static struct bpf_map_type_list htab_type __ro_after_init = { .ops = &htab_ops, .type = BPF_MAP_TYPE_HASH, }; @@ -1037,7 +1037,7 @@ static const struct bpf_map_ops htab_lru_ops = { .map_delete_elem = htab_lru_map_delete_elem, }; -static struct bpf_map_type_list htab_lru_type __read_mostly = { +static struct bpf_map_type_list htab_lru_type __ro_after_init = { .ops = &htab_lru_ops, .type = BPF_MAP_TYPE_LRU_HASH, }; @@ -1124,7 +1124,7 @@ static const struct bpf_map_ops htab_percpu_ops = { .map_delete_elem = htab_map_delete_elem, }; -static struct bpf_map_type_list htab_percpu_type __read_mostly = { +static struct bpf_map_type_list htab_percpu_type __ro_after_init = { .ops = &htab_percpu_ops, .type = BPF_MAP_TYPE_PERCPU_HASH, }; @@ -1138,7 +1138,7 @@ static const struct bpf_map_ops htab_lru_percpu_ops = { .map_delete_elem = htab_lru_map_delete_elem, }; -static struct bpf_map_type_list htab_lru_percpu_type __read_mostly = { +static struct bpf_map_type_list htab_lru_percpu_type __ro_after_init = { .ops = &htab_lru_percpu_ops, .type = BPF_MAP_TYPE_LRU_PERCPU_HASH, }; diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index e0f6a0bd279b..8bfe0afaee10 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -508,7 +508,7 @@ static const struct bpf_map_ops trie_ops = { .map_delete_elem = trie_delete_elem, }; -static struct bpf_map_type_list trie_type __read_mostly = { +static struct bpf_map_type_list trie_type __ro_after_init = { .ops = &trie_ops, .type = BPF_MAP_TYPE_LPM_TRIE, }; diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index be8519148c25..22aa45cd0324 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -273,7 +273,7 @@ static const struct bpf_map_ops stack_map_ops = { .map_delete_elem = stack_map_delete_elem, }; -static struct bpf_map_type_list stack_map_type __read_mostly = { +static struct bpf_map_type_list stack_map_type __ro_after_init = { .ops = &stack_map_ops, .type = BPF_MAP_TYPE_STACK_TRACE, }; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 424daa4586d1..cee9802cf3e0 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -506,7 +506,7 @@ static const struct bpf_verifier_ops kprobe_prog_ops = { .is_valid_access = kprobe_prog_is_valid_access, }; -static struct bpf_prog_type_list kprobe_tl = { +static struct bpf_prog_type_list kprobe_tl __ro_after_init = { .ops = &kprobe_prog_ops, .type = BPF_PROG_TYPE_KPROBE, }; @@ -589,7 +589,7 @@ static const struct bpf_verifier_ops tracepoint_prog_ops = { .is_valid_access = tp_prog_is_valid_access, }; -static struct bpf_prog_type_list tracepoint_tl = { +static struct bpf_prog_type_list tracepoint_tl __ro_after_init = { .ops = &tracepoint_prog_ops, .type = BPF_PROG_TYPE_TRACEPOINT, }; @@ -648,7 +648,7 @@ static const struct bpf_verifier_ops perf_event_prog_ops = { .convert_ctx_access = pe_prog_convert_ctx_access, }; -static struct bpf_prog_type_list perf_event_tl = { +static struct bpf_prog_type_list perf_event_tl __ro_after_init = { .ops = &perf_event_prog_ops, .type = BPF_PROG_TYPE_PERF_EVENT, }; diff --git a/net/core/filter.c b/net/core/filter.c index 0b753cbb2536..e466e0040137 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3296,47 +3296,47 @@ static const struct bpf_verifier_ops cg_sock_ops = { .convert_ctx_access = sock_filter_convert_ctx_access, }; -static struct bpf_prog_type_list sk_filter_type __read_mostly = { +static struct bpf_prog_type_list sk_filter_type __ro_after_init = { .ops = &sk_filter_ops, .type = BPF_PROG_TYPE_SOCKET_FILTER, }; -static struct bpf_prog_type_list sched_cls_type __read_mostly = { +static struct bpf_prog_type_list sched_cls_type __ro_after_init = { .ops = &tc_cls_act_ops, .type = BPF_PROG_TYPE_SCHED_CLS, }; -static struct bpf_prog_type_list sched_act_type __read_mostly = { +static struct bpf_prog_type_list sched_act_type __ro_after_init = { .ops = &tc_cls_act_ops, .type = BPF_PROG_TYPE_SCHED_ACT, }; -static struct bpf_prog_type_list xdp_type __read_mostly = { +static struct bpf_prog_type_list xdp_type __ro_after_init = { .ops = &xdp_ops, .type = BPF_PROG_TYPE_XDP, }; -static struct bpf_prog_type_list cg_skb_type __read_mostly = { +static struct bpf_prog_type_list cg_skb_type __ro_after_init = { .ops = &cg_skb_ops, .type = BPF_PROG_TYPE_CGROUP_SKB, }; -static struct bpf_prog_type_list lwt_in_type __read_mostly = { +static struct bpf_prog_type_list lwt_in_type __ro_after_init = { .ops = &lwt_inout_ops, .type = BPF_PROG_TYPE_LWT_IN, }; -static struct bpf_prog_type_list lwt_out_type __read_mostly = { +static struct bpf_prog_type_list lwt_out_type __ro_after_init = { .ops = &lwt_inout_ops, .type = BPF_PROG_TYPE_LWT_OUT, }; -static struct bpf_prog_type_list lwt_xmit_type __read_mostly = { +static struct bpf_prog_type_list lwt_xmit_type __ro_after_init = { .ops = &lwt_xmit_ops, .type = BPF_PROG_TYPE_LWT_XMIT, }; -static struct bpf_prog_type_list cg_sock_type __read_mostly = { +static struct bpf_prog_type_list cg_sock_type __ro_after_init = { .ops = &cg_sock_ops, .type = BPF_PROG_TYPE_CGROUP_SOCK }; -- cgit v1.2.3