From db69718b8efac802c7cc20d5a6c7dfc913f99c43 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 1 Apr 2024 19:13:04 -0700 Subject: bpf: inline bpf_map_lookup_elem() for PERCPU_ARRAY maps Using new per-CPU BPF instruction implement inlining for per-CPU ARRAY map lookup helper, if BPF JIT support is present. Signed-off-by: Andrii Nakryiko Acked-by: John Fastabend Link: https://lore.kernel.org/r/20240402021307.1012571-4-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/arraymap.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'kernel/bpf/arraymap.c') diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 13358675ff2e..8c1e6d7654bb 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -246,6 +246,38 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key) return this_cpu_ptr(array->pptrs[index & array->index_mask]); } +/* emit BPF instructions equivalent to C code of percpu_array_map_lookup_elem() */ +static int percpu_array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + struct bpf_insn *insn = insn_buf; + + if (!bpf_jit_supports_percpu_insn()) + return -EOPNOTSUPP; + + if (map->map_flags & BPF_F_INNER_MAP) + return -EOPNOTSUPP; + + BUILD_BUG_ON(offsetof(struct bpf_array, map) != 0); + *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct bpf_array, pptrs)); + + *insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0); + if (!map->bypass_spec_v1) { + *insn++ = BPF_JMP_IMM(BPF_JGE, BPF_REG_0, map->max_entries, 6); + *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_0, array->index_mask); + } else { + *insn++ = BPF_JMP_IMM(BPF_JGE, BPF_REG_0, map->max_entries, 5); + } + + *insn++ = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3); + *insn++ = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1); + *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0); + *insn++ = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0); + *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); + *insn++ = BPF_MOV64_IMM(BPF_REG_0, 0); + return insn - insn_buf; +} + static void *percpu_array_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu) { struct bpf_array *array = container_of(map, struct bpf_array, map); @@ -776,6 +808,7 @@ const struct bpf_map_ops percpu_array_map_ops = { .map_free = array_map_free, .map_get_next_key = array_map_get_next_key, .map_lookup_elem = percpu_array_map_lookup_elem, + .map_gen_lookup = percpu_array_map_gen_lookup, .map_update_elem = array_map_update_elem, .map_delete_elem = array_map_delete_elem, .map_lookup_percpu_elem = percpu_array_map_lookup_percpu_elem, -- cgit v1.2.3 From 246331e3f1eac905170a923f0ec76725c2558232 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Sat, 20 Apr 2024 11:09:09 +0200 Subject: bpf: allow struct bpf_wq to be embedded in arraymaps and hashmaps Currently bpf_wq_cancel_and_free() is just a placeholder as there is no memory allocation for bpf_wq just yet. Again, duplication of the bpf_timer approach Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20240420-bpf_wq-v2-9-6c986a5a741f@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 ++ kernel/bpf/arraymap.c | 18 ++++++++++------- kernel/bpf/hashtab.c | 55 ++++++++++++++++++++++++++++++++++++++++----------- kernel/bpf/helpers.c | 8 ++++++++ kernel/bpf/syscall.c | 9 +++++++++ 5 files changed, 73 insertions(+), 19 deletions(-) (limited to 'kernel/bpf/arraymap.c') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c7dcfd395555..64bf0cf3ee95 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -534,6 +534,7 @@ static inline void zero_map_value(struct bpf_map *map, void *dst) void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, bool lock_src); void bpf_timer_cancel_and_free(void *timer); +void bpf_wq_cancel_and_free(void *timer); void bpf_list_head_free(const struct btf_field *field, void *list_head, struct bpf_spin_lock *spin_lock); void bpf_rb_root_free(const struct btf_field *field, void *rb_root, @@ -2204,6 +2205,7 @@ void bpf_map_free_record(struct bpf_map *map); struct btf_record *btf_record_dup(const struct btf_record *rec); bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b); void bpf_obj_free_timer(const struct btf_record *rec, void *obj); +void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj); void bpf_obj_free_fields(const struct btf_record *rec, void *obj); void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu); diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 8c1e6d7654bb..580d07b15471 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -428,17 +428,21 @@ static void *array_map_vmalloc_addr(struct bpf_array *array) return (void *)round_down((unsigned long)array, PAGE_SIZE); } -static void array_map_free_timers(struct bpf_map *map) +static void array_map_free_timers_wq(struct bpf_map *map) { struct bpf_array *array = container_of(map, struct bpf_array, map); int i; - /* We don't reset or free fields other than timer on uref dropping to zero. */ - if (!btf_record_has_field(map->record, BPF_TIMER)) - return; + /* We don't reset or free fields other than timer and workqueue + * on uref dropping to zero. + */ + if (btf_record_has_field(map->record, BPF_TIMER)) + for (i = 0; i < array->map.max_entries; i++) + bpf_obj_free_timer(map->record, array_map_elem_ptr(array, i)); - for (i = 0; i < array->map.max_entries; i++) - bpf_obj_free_timer(map->record, array_map_elem_ptr(array, i)); + if (btf_record_has_field(map->record, BPF_WORKQUEUE)) + for (i = 0; i < array->map.max_entries; i++) + bpf_obj_free_workqueue(map->record, array_map_elem_ptr(array, i)); } /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ @@ -782,7 +786,7 @@ const struct bpf_map_ops array_map_ops = { .map_alloc = array_map_alloc, .map_free = array_map_free, .map_get_next_key = array_map_get_next_key, - .map_release_uref = array_map_free_timers, + .map_release_uref = array_map_free_timers_wq, .map_lookup_elem = array_map_lookup_elem, .map_update_elem = array_map_update_elem, .map_delete_elem = array_map_delete_elem, diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index c3e79a0b9361..0179183c543a 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -240,6 +240,26 @@ static void htab_free_prealloced_timers(struct bpf_htab *htab) } } +static void htab_free_prealloced_wq(struct bpf_htab *htab) +{ + u32 num_entries = htab->map.max_entries; + int i; + + if (!btf_record_has_field(htab->map.record, BPF_WORKQUEUE)) + return; + if (htab_has_extra_elems(htab)) + num_entries += num_possible_cpus(); + + for (i = 0; i < num_entries; i++) { + struct htab_elem *elem; + + elem = get_htab_elem(htab, i); + bpf_obj_free_workqueue(htab->map.record, + elem->key + round_up(htab->map.key_size, 8)); + cond_resched(); + } +} + static void htab_free_prealloced_fields(struct bpf_htab *htab) { u32 num_entries = htab->map.max_entries; @@ -1495,7 +1515,7 @@ static void delete_all_elements(struct bpf_htab *htab) migrate_enable(); } -static void htab_free_malloced_timers(struct bpf_htab *htab) +static void htab_free_malloced_timers_or_wq(struct bpf_htab *htab, bool is_timer) { int i; @@ -1507,24 +1527,35 @@ static void htab_free_malloced_timers(struct bpf_htab *htab) hlist_nulls_for_each_entry(l, n, head, hash_node) { /* We only free timer on uref dropping to zero */ - bpf_obj_free_timer(htab->map.record, l->key + round_up(htab->map.key_size, 8)); + if (is_timer) + bpf_obj_free_timer(htab->map.record, + l->key + round_up(htab->map.key_size, 8)); + else + bpf_obj_free_workqueue(htab->map.record, + l->key + round_up(htab->map.key_size, 8)); } cond_resched_rcu(); } rcu_read_unlock(); } -static void htab_map_free_timers(struct bpf_map *map) +static void htab_map_free_timers_and_wq(struct bpf_map *map) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); - /* We only free timer on uref dropping to zero */ - if (!btf_record_has_field(htab->map.record, BPF_TIMER)) - return; - if (!htab_is_prealloc(htab)) - htab_free_malloced_timers(htab); - else - htab_free_prealloced_timers(htab); + /* We only free timer and workqueue on uref dropping to zero */ + if (btf_record_has_field(htab->map.record, BPF_TIMER)) { + if (!htab_is_prealloc(htab)) + htab_free_malloced_timers_or_wq(htab, true); + else + htab_free_prealloced_timers(htab); + } + if (btf_record_has_field(htab->map.record, BPF_WORKQUEUE)) { + if (!htab_is_prealloc(htab)) + htab_free_malloced_timers_or_wq(htab, false); + else + htab_free_prealloced_wq(htab); + } } /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ @@ -2260,7 +2291,7 @@ const struct bpf_map_ops htab_map_ops = { .map_alloc = htab_map_alloc, .map_free = htab_map_free, .map_get_next_key = htab_map_get_next_key, - .map_release_uref = htab_map_free_timers, + .map_release_uref = htab_map_free_timers_and_wq, .map_lookup_elem = htab_map_lookup_elem, .map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem, .map_update_elem = htab_map_update_elem, @@ -2281,7 +2312,7 @@ const struct bpf_map_ops htab_lru_map_ops = { .map_alloc = htab_map_alloc, .map_free = htab_map_free, .map_get_next_key = htab_map_get_next_key, - .map_release_uref = htab_map_free_timers, + .map_release_uref = htab_map_free_timers_and_wq, .map_lookup_elem = htab_lru_map_lookup_elem, .map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem, .map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys, diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 0b8ba6c81994..fe827f6e5234 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1468,6 +1468,14 @@ void bpf_timer_cancel_and_free(void *val) kfree_rcu(t, cb.rcu); } +/* This function is called by map_delete/update_elem for individual element and + * by ops->map_release_uref when the user space reference to a map reaches zero. + */ +void bpf_wq_cancel_and_free(void *val) +{ + BTF_TYPE_EMIT(struct bpf_wq); +} + BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr) { unsigned long *kptr = map_value; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0848e4141b00..63e368337483 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -661,6 +661,13 @@ void bpf_obj_free_timer(const struct btf_record *rec, void *obj) bpf_timer_cancel_and_free(obj + rec->timer_off); } +void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj) +{ + if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_WORKQUEUE))) + return; + bpf_wq_cancel_and_free(obj + rec->wq_off); +} + void bpf_obj_free_fields(const struct btf_record *rec, void *obj) { const struct btf_field *fields; @@ -682,6 +689,7 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj) bpf_timer_cancel_and_free(field_ptr); break; case BPF_WORKQUEUE: + bpf_wq_cancel_and_free(field_ptr); break; case BPF_KPTR_UNREF: WRITE_ONCE(*(u64 *)field_ptr, 0); @@ -1119,6 +1127,7 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token, } break; case BPF_TIMER: + case BPF_WORKQUEUE: if (map->map_type != BPF_MAP_TYPE_HASH && map->map_type != BPF_MAP_TYPE_LRU_HASH && map->map_type != BPF_MAP_TYPE_ARRAY) { -- cgit v1.2.3 From b98a5c68ccaa94e93b9e898091fe2cf21c1500e6 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Tue, 30 Apr 2024 12:43:24 +0200 Subject: bpf: Do not walk twice the map on free If someone stores both a timer and a workqueue in a map, on free we would walk it twice. Add a check in array_map_free_timers_wq and free the timers and workqueues if they are present. Fixes: 246331e3f1ea ("bpf: allow struct bpf_wq to be embedded in arraymaps and hashmaps") Signed-off-by: Benjamin Tissoires Signed-off-by: Daniel Borkmann Acked-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/bpf/20240430-bpf-next-v3-1-27afe7f3b17c@kernel.org --- kernel/bpf/arraymap.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'kernel/bpf/arraymap.c') diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 580d07b15471..feabc0193852 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -436,13 +436,14 @@ static void array_map_free_timers_wq(struct bpf_map *map) /* We don't reset or free fields other than timer and workqueue * on uref dropping to zero. */ - if (btf_record_has_field(map->record, BPF_TIMER)) - for (i = 0; i < array->map.max_entries; i++) - bpf_obj_free_timer(map->record, array_map_elem_ptr(array, i)); - - if (btf_record_has_field(map->record, BPF_WORKQUEUE)) - for (i = 0; i < array->map.max_entries; i++) - bpf_obj_free_workqueue(map->record, array_map_elem_ptr(array, i)); + if (btf_record_has_field(map->record, BPF_TIMER | BPF_WORKQUEUE)) { + for (i = 0; i < array->map.max_entries; i++) { + if (btf_record_has_field(map->record, BPF_TIMER)) + bpf_obj_free_timer(map->record, array_map_elem_ptr(array, i)); + if (btf_record_has_field(map->record, BPF_WORKQUEUE)) + bpf_obj_free_workqueue(map->record, array_map_elem_ptr(array, i)); + } + } } /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ -- cgit v1.2.3