summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--kernel/bpf/syscall.c22
-rw-r--r--kernel/bpf/verifier.c35
-rw-r--r--tools/lib/bpf/Makefile42
-rw-r--r--tools/lib/bpf/README.rst1
-rw-r--r--tools/testing/selftests/bpf/verifier/calls.c25
5 files changed, 92 insertions, 33 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 62f6bced3a3c..afca36f53c49 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -136,21 +136,29 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
void *bpf_map_area_alloc(size_t size, int numa_node)
{
- /* We definitely need __GFP_NORETRY, so OOM killer doesn't
- * trigger under memory pressure as we really just want to
- * fail instead.
+ /* We really just want to fail instead of triggering OOM killer
+ * under memory pressure, therefore we set __GFP_NORETRY to kmalloc,
+ * which is used for lower order allocation requests.
+ *
+ * It has been observed that higher order allocation requests done by
+ * vmalloc with __GFP_NORETRY being set might fail due to not trying
+ * to reclaim memory from the page cache, thus we set
+ * __GFP_RETRY_MAYFAIL to avoid such situations.
*/
- const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO;
+
+ const gfp_t flags = __GFP_NOWARN | __GFP_ZERO;
void *area;
if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
- area = kmalloc_node(size, GFP_USER | flags, numa_node);
+ area = kmalloc_node(size, GFP_USER | __GFP_NORETRY | flags,
+ numa_node);
if (area != NULL)
return area;
}
- return __vmalloc_node_flags_caller(size, numa_node, GFP_KERNEL | flags,
- __builtin_return_address(0));
+ return __vmalloc_node_flags_caller(size, numa_node,
+ GFP_KERNEL | __GFP_RETRY_MAYFAIL |
+ flags, __builtin_return_address(0));
}
void bpf_map_area_free(void *area)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 86f9cd5d1c4e..fd502c1f71eb 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -352,6 +352,14 @@ static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
map_value_has_spin_lock(reg->map_ptr);
}
+static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
+{
+ return type == PTR_TO_SOCKET ||
+ type == PTR_TO_SOCKET_OR_NULL ||
+ type == PTR_TO_TCP_SOCK ||
+ type == PTR_TO_TCP_SOCK_OR_NULL;
+}
+
static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
{
return type == ARG_PTR_TO_SOCK_COMMON;
@@ -451,8 +459,9 @@ static void print_verifier_state(struct bpf_verifier_env *env,
if (t == PTR_TO_STACK)
verbose(env, ",call_%d", func(env, reg)->callsite);
} else {
- verbose(env, "(id=%d ref_obj_id=%d", reg->id,
- reg->ref_obj_id);
+ verbose(env, "(id=%d", reg->id);
+ if (reg_type_may_be_refcounted_or_null(t))
+ verbose(env, ",ref_obj_id=%d", reg->ref_obj_id);
if (t != SCALAR_VALUE)
verbose(env, ",off=%d", reg->off);
if (type_is_pkt_pointer(t))
@@ -3372,7 +3381,7 @@ do_sim:
*dst_reg = *ptr_reg;
}
ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true);
- if (!ptr_is_dst_reg)
+ if (!ptr_is_dst_reg && ret)
*dst_reg = tmp;
return !ret ? -EFAULT : 0;
}
@@ -6069,15 +6078,17 @@ static int propagate_liveness(struct bpf_verifier_env *env,
}
/* Propagate read liveness of registers... */
BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
- /* We don't need to worry about FP liveness because it's read-only */
- for (i = 0; i < BPF_REG_FP; i++) {
- if (vparent->frame[vparent->curframe]->regs[i].live & REG_LIVE_READ)
- continue;
- if (vstate->frame[vstate->curframe]->regs[i].live & REG_LIVE_READ) {
- err = mark_reg_read(env, &vstate->frame[vstate->curframe]->regs[i],
- &vparent->frame[vstate->curframe]->regs[i]);
- if (err)
- return err;
+ for (frame = 0; frame <= vstate->curframe; frame++) {
+ /* We don't need to worry about FP liveness, it's read-only */
+ for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
+ if (vparent->frame[frame]->regs[i].live & REG_LIVE_READ)
+ continue;
+ if (vstate->frame[frame]->regs[i].live & REG_LIVE_READ) {
+ err = mark_reg_read(env, &vstate->frame[frame]->regs[i],
+ &vparent->frame[frame]->regs[i]);
+ if (err)
+ return err;
+ }
}
}
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 61aaacf0cfa1..5bf8e52c41fc 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -3,7 +3,7 @@
BPF_VERSION = 0
BPF_PATCHLEVEL = 0
-BPF_EXTRAVERSION = 1
+BPF_EXTRAVERSION = 2
MAKEFLAGS += --no-print-directory
@@ -79,8 +79,6 @@ export prefix libdir src obj
libdir_SQ = $(subst ','\'',$(libdir))
libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
-LIB_FILE = libbpf.a libbpf.so
-
VERSION = $(BPF_VERSION)
PATCHLEVEL = $(BPF_PATCHLEVEL)
EXTRAVERSION = $(BPF_EXTRAVERSION)
@@ -88,7 +86,10 @@ EXTRAVERSION = $(BPF_EXTRAVERSION)
OBJ = $@
N =
-LIBBPF_VERSION = $(BPF_VERSION).$(BPF_PATCHLEVEL).$(BPF_EXTRAVERSION)
+LIBBPF_VERSION = $(BPF_VERSION).$(BPF_PATCHLEVEL).$(BPF_EXTRAVERSION)
+
+LIB_TARGET = libbpf.a libbpf.so.$(LIBBPF_VERSION)
+LIB_FILE = libbpf.a libbpf.so*
# Set compile option CFLAGS
ifdef EXTRA_CFLAGS
@@ -128,16 +129,18 @@ all:
export srctree OUTPUT CC LD CFLAGS V
include $(srctree)/tools/build/Makefile.include
-BPF_IN := $(OUTPUT)libbpf-in.o
-LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE))
-VERSION_SCRIPT := libbpf.map
+BPF_IN := $(OUTPUT)libbpf-in.o
+VERSION_SCRIPT := libbpf.map
+
+LIB_TARGET := $(addprefix $(OUTPUT),$(LIB_TARGET))
+LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE))
GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN) | \
awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {s++} END{print s}')
VERSIONED_SYM_COUNT = $(shell readelf -s --wide $(OUTPUT)libbpf.so | \
grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l)
-CMD_TARGETS = $(LIB_FILE)
+CMD_TARGETS = $(LIB_TARGET)
CXX_TEST_TARGET = $(OUTPUT)test_libbpf
@@ -170,9 +173,13 @@ $(BPF_IN): force elfdep bpfdep
echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true
$(Q)$(MAKE) $(build)=libbpf
-$(OUTPUT)libbpf.so: $(BPF_IN)
- $(QUIET_LINK)$(CC) --shared -Wl,--version-script=$(VERSION_SCRIPT) \
- $^ -o $@
+$(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION)
+
+$(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN)
+ $(QUIET_LINK)$(CC) --shared -Wl,-soname,libbpf.so.$(VERSION) \
+ -Wl,--version-script=$(VERSION_SCRIPT) $^ -o $@
+ @ln -sf $(@F) $(OUTPUT)libbpf.so
+ @ln -sf $(@F) $(OUTPUT)libbpf.so.$(VERSION)
$(OUTPUT)libbpf.a: $(BPF_IN)
$(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
@@ -192,6 +199,12 @@ check_abi: $(OUTPUT)libbpf.so
exit 1; \
fi
+define do_install_mkdir
+ if [ ! -d '$(DESTDIR_SQ)$1' ]; then \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \
+ fi
+endef
+
define do_install
if [ ! -d '$(DESTDIR_SQ)$2' ]; then \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
@@ -200,8 +213,9 @@ define do_install
endef
install_lib: all_cmd
- $(call QUIET_INSTALL, $(LIB_FILE)) \
- $(call do_install,$(LIB_FILE),$(libdir_SQ))
+ $(call QUIET_INSTALL, $(LIB_TARGET)) \
+ $(call do_install_mkdir,$(libdir_SQ)); \
+ cp -fpR $(LIB_FILE) $(DESTDIR)$(libdir_SQ)
install_headers:
$(call QUIET_INSTALL, headers) \
@@ -219,7 +233,7 @@ config-clean:
clean:
$(call QUIET_CLEAN, libbpf) $(RM) $(TARGETS) $(CXX_TEST_TARGET) \
- *.o *~ *.a *.so .*.d .*.cmd LIBBPF-CFLAGS
+ *.o *~ *.a *.so *.so.$(VERSION) .*.d .*.cmd LIBBPF-CFLAGS
$(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf
diff --git a/tools/lib/bpf/README.rst b/tools/lib/bpf/README.rst
index 5788479384ca..cef7b77eab69 100644
--- a/tools/lib/bpf/README.rst
+++ b/tools/lib/bpf/README.rst
@@ -111,6 +111,7 @@ starting from ``0.0.1``.
Every time ABI is being changed, e.g. because a new symbol is added or
semantic of existing symbol is changed, ABI version should be bumped.
+This bump in ABI version is at most once per kernel development cycle.
For example, if current state of ``libbpf.map`` is:
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 4004891afa9c..f2ccae39ee66 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -1940,3 +1940,28 @@
.errstr = "!read_ok",
.result = REJECT,
},
+{
+ "calls: cross frame pruning - liveness propagation",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_IMM(BPF_REG_8, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_8, 1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_IMM(BPF_REG_9, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_9, 1),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_8, 1, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
+ .errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+ .errstr = "!read_ok",
+ .result = REJECT,
+},