summaryrefslogtreecommitdiff
path: root/kernel/bpf/verifier.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-11-25 20:02:57 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2019-11-25 20:02:57 -0800
commit386403a115f95997c2715691226e11a7b5cffcfd (patch)
treea685df70bd3d5b295683713818ddf0752c3d75b6 /kernel/bpf/verifier.c
parent642356cb5f4a8c82b5ca5ebac288c327d10df236 (diff)
parent622dc5ad8052f4f0c6b7a12787696a5caa3c6a58 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from David Miller: "Another merge window, another pull full of stuff: 1) Support alternative names for network devices, from Jiri Pirko. 2) Introduce per-netns netdev notifiers, also from Jiri Pirko. 3) Support MSG_PEEK in vsock/virtio, from Matias Ezequiel Vara Larsen. 4) Allow compiling out the TLS TOE code, from Jakub Kicinski. 5) Add several new tracepoints to the kTLS code, also from Jakub. 6) Support set channels ethtool callback in ena driver, from Sameeh Jubran. 7) New SCTP events SCTP_ADDR_ADDED, SCTP_ADDR_REMOVED, SCTP_ADDR_MADE_PRIM, and SCTP_SEND_FAILED_EVENT. From Xin Long. 8) Add XDP support to mvneta driver, from Lorenzo Bianconi. 9) Lots of netfilter hw offload fixes, cleanups and enhancements, from Pablo Neira Ayuso. 10) PTP support for aquantia chips, from Egor Pomozov. 11) Add UDP segmentation offload support to igb, ixgbe, and i40e. From Josh Hunt. 12) Add smart nagle to tipc, from Jon Maloy. 13) Support L2 field rewrite by TC offloads in bnxt_en, from Venkat Duvvuru. 14) Add a flow mask cache to OVS, from Tonghao Zhang. 15) Add XDP support to ice driver, from Maciej Fijalkowski. 16) Add AF_XDP support to ice driver, from Krzysztof Kazimierczak. 17) Support UDP GSO offload in atlantic driver, from Igor Russkikh. 18) Support it in stmmac driver too, from Jose Abreu. 19) Support TIPC encryption and auth, from Tuong Lien. 20) Introduce BPF trampolines, from Alexei Starovoitov. 21) Make page_pool API more numa friendly, from Saeed Mahameed. 22) Introduce route hints to ipv4 and ipv6, from Paolo Abeni. 23) Add UDP segmentation offload to cxgb4, Rahul Lakkireddy" * git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1857 commits) libbpf: Fix usage of u32 in userspace code mm: Implement no-MMU variant of vmalloc_user_node_flags slip: Fix use-after-free Read in slip_open net: dsa: sja1105: fix sja1105_parse_rgmii_delays() macvlan: schedule bc_work even if error enetc: add support Credit Based Shaper(CBS) for hardware offload net: phy: add helpers phy_(un)lock_mdio_bus mdio_bus: don't use managed reset-controller ax88179_178a: add ethtool_op_get_ts_info() mlxsw: spectrum_router: Fix use of uninitialized adjacency index mlxsw: spectrum_router: After underlay moves, demote conflicting tunnels bpf: Simplify __bpf_arch_text_poke poke type handling bpf: Introduce BPF_TRACE_x helper for the tracing tests bpf: Add bpf_jit_blinding_enabled for !CONFIG_BPF_JIT bpf, testing: Add various tail call test cases bpf, x86: Emit patchable direct jump as tail call bpf: Constant map key tracking for prog array pokes bpf: Add poke dependency tracking for prog array maps bpf: Add initial poke descriptor table for jit images bpf: Move owner type, jited info into array auxiliary data ...
Diffstat (limited to 'kernel/bpf/verifier.c')
-rw-r--r--kernel/bpf/verifier.c543
1 files changed, 506 insertions, 37 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index ffc3e53f5300..a0482e1c4a77 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -23,7 +23,7 @@
#include "disasm.h"
static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
-#define BPF_PROG_TYPE(_id, _name) \
+#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
[_id] = & _name ## _verifier_ops,
#define BPF_MAP_TYPE(_id, _ops)
#include <linux/bpf_types.h>
@@ -171,6 +171,9 @@ struct bpf_verifier_stack_elem {
#define BPF_COMPLEXITY_LIMIT_JMP_SEQ 8192
#define BPF_COMPLEXITY_LIMIT_STATES 64
+#define BPF_MAP_KEY_POISON (1ULL << 63)
+#define BPF_MAP_KEY_SEEN (1ULL << 62)
+
#define BPF_MAP_PTR_UNPRIV 1UL
#define BPF_MAP_PTR_POISON ((void *)((0xeB9FUL << 1) + \
POISON_POINTER_DELTA))
@@ -178,12 +181,12 @@ struct bpf_verifier_stack_elem {
static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
{
- return BPF_MAP_PTR(aux->map_state) == BPF_MAP_PTR_POISON;
+ return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON;
}
static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
{
- return aux->map_state & BPF_MAP_PTR_UNPRIV;
+ return aux->map_ptr_state & BPF_MAP_PTR_UNPRIV;
}
static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
@@ -191,8 +194,31 @@ static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
{
BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
unpriv |= bpf_map_ptr_unpriv(aux);
- aux->map_state = (unsigned long)map |
- (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
+ aux->map_ptr_state = (unsigned long)map |
+ (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
+}
+
+static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
+{
+ return aux->map_key_state & BPF_MAP_KEY_POISON;
+}
+
+static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
+{
+ return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
+}
+
+static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
+{
+ return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
+}
+
+static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
+{
+ bool poisoned = bpf_map_key_poisoned(aux);
+
+ aux->map_key_state = state | BPF_MAP_KEY_SEEN |
+ (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
}
struct bpf_call_arg_meta {
@@ -205,8 +231,11 @@ struct bpf_call_arg_meta {
u64 msize_umax_value;
int ref_obj_id;
int func_id;
+ u32 btf_id;
};
+struct btf *btf_vmlinux;
+
static DEFINE_MUTEX(bpf_verifier_lock);
static const struct bpf_line_info *
@@ -243,6 +272,10 @@ void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
n = min(log->len_total - log->len_used - 1, n);
log->kbuf[n] = '\0';
+ if (log->level == BPF_LOG_KERNEL) {
+ pr_err("BPF:%s\n", log->kbuf);
+ return;
+ }
if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
log->len_used += n;
else
@@ -280,6 +313,19 @@ __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
va_end(args);
}
+__printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
+ const char *fmt, ...)
+{
+ va_list args;
+
+ if (!bpf_verifier_log_needed(log))
+ return;
+
+ va_start(args, fmt);
+ bpf_verifier_vlog(log, fmt, args);
+ va_end(args);
+}
+
static const char *ltrim(const char *s)
{
while (isspace(*s))
@@ -400,6 +446,7 @@ static const char * const reg_type_str[] = {
[PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
[PTR_TO_TP_BUFFER] = "tp_buffer",
[PTR_TO_XDP_SOCK] = "xdp_sock",
+ [PTR_TO_BTF_ID] = "ptr_",
};
static char slot_type_char[] = {
@@ -430,6 +477,12 @@ static struct bpf_func_state *func(struct bpf_verifier_env *env,
return cur->frame[reg->frameno];
}
+const char *kernel_type_name(u32 id)
+{
+ return btf_name_by_offset(btf_vmlinux,
+ btf_type_by_id(btf_vmlinux, id)->name_off);
+}
+
static void print_verifier_state(struct bpf_verifier_env *env,
const struct bpf_func_state *state)
{
@@ -454,6 +507,8 @@ static void print_verifier_state(struct bpf_verifier_env *env,
/* reg->off should be 0 for SCALAR_VALUE */
verbose(env, "%lld", reg->var_off.value + reg->off);
} else {
+ if (t == PTR_TO_BTF_ID)
+ verbose(env, "%s", kernel_type_name(reg->btf_id));
verbose(env, "(id=%d", reg->id);
if (reg_type_may_be_refcounted_or_null(t))
verbose(env, ",ref_obj_id=%d", reg->ref_obj_id);
@@ -978,6 +1033,17 @@ static void __reg_bound_offset(struct bpf_reg_state *reg)
reg->umax_value));
}
+static void __reg_bound_offset32(struct bpf_reg_state *reg)
+{
+ u64 mask = 0xffffFFFF;
+ struct tnum range = tnum_range(reg->umin_value & mask,
+ reg->umax_value & mask);
+ struct tnum lo32 = tnum_cast(reg->var_off, 4);
+ struct tnum hi32 = tnum_lshift(tnum_rshift(reg->var_off, 32), 32);
+
+ reg->var_off = tnum_or(hi32, tnum_intersect(lo32, range));
+}
+
/* Reset the min/max bounds of a register */
static void __mark_reg_unbounded(struct bpf_reg_state *reg)
{
@@ -2331,10 +2397,12 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
/* check access to 'struct bpf_context' fields. Supports fixed offsets only */
static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
- enum bpf_access_type t, enum bpf_reg_type *reg_type)
+ enum bpf_access_type t, enum bpf_reg_type *reg_type,
+ u32 *btf_id)
{
struct bpf_insn_access_aux info = {
.reg_type = *reg_type,
+ .log = &env->log,
};
if (env->ops->is_valid_access &&
@@ -2348,7 +2416,10 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
*/
*reg_type = info.reg_type;
- env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
+ if (*reg_type == PTR_TO_BTF_ID)
+ *btf_id = info.btf_id;
+ else
+ env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
/* remember the offset of last byte accessed in ctx */
if (env->prog->aux->max_ctx_offset < off + size)
env->prog->aux->max_ctx_offset = off + size;
@@ -2739,6 +2810,88 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
reg->smax_value = reg->umax_value;
}
+static bool bpf_map_is_rdonly(const struct bpf_map *map)
+{
+ return (map->map_flags & BPF_F_RDONLY_PROG) && map->frozen;
+}
+
+static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
+{
+ void *ptr;
+ u64 addr;
+ int err;
+
+ err = map->ops->map_direct_value_addr(map, &addr, off);
+ if (err)
+ return err;
+ ptr = (void *)(long)addr + off;
+
+ switch (size) {
+ case sizeof(u8):
+ *val = (u64)*(u8 *)ptr;
+ break;
+ case sizeof(u16):
+ *val = (u64)*(u16 *)ptr;
+ break;
+ case sizeof(u32):
+ *val = (u64)*(u32 *)ptr;
+ break;
+ case sizeof(u64):
+ *val = *(u64 *)ptr;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
+ struct bpf_reg_state *regs,
+ int regno, int off, int size,
+ enum bpf_access_type atype,
+ int value_regno)
+{
+ struct bpf_reg_state *reg = regs + regno;
+ const struct btf_type *t = btf_type_by_id(btf_vmlinux, reg->btf_id);
+ const char *tname = btf_name_by_offset(btf_vmlinux, t->name_off);
+ u32 btf_id;
+ int ret;
+
+ if (atype != BPF_READ) {
+ verbose(env, "only read is supported\n");
+ return -EACCES;
+ }
+
+ if (off < 0) {
+ verbose(env,
+ "R%d is ptr_%s invalid negative access: off=%d\n",
+ regno, tname, off);
+ return -EACCES;
+ }
+ if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
+ char tn_buf[48];
+
+ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+ verbose(env,
+ "R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
+ regno, tname, off, tn_buf);
+ return -EACCES;
+ }
+
+ ret = btf_struct_access(&env->log, t, off, size, atype, &btf_id);
+ if (ret < 0)
+ return ret;
+
+ if (ret == SCALAR_VALUE) {
+ mark_reg_unknown(env, regs, value_regno);
+ return 0;
+ }
+ mark_reg_known_zero(env, regs, value_regno);
+ regs[value_regno].type = PTR_TO_BTF_ID;
+ regs[value_regno].btf_id = btf_id;
+ return 0;
+}
+
/* check whether memory at (regno + off) is accessible for t = (read | write)
* if t==write, value_regno is a register which value is stored into memory
* if t==read, value_regno is a register which will receive the value from memory
@@ -2776,11 +2929,30 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
if (err)
return err;
err = check_map_access(env, regno, off, size, false);
- if (!err && t == BPF_READ && value_regno >= 0)
- mark_reg_unknown(env, regs, value_regno);
+ if (!err && t == BPF_READ && value_regno >= 0) {
+ struct bpf_map *map = reg->map_ptr;
+
+ /* if map is read-only, track its contents as scalars */
+ if (tnum_is_const(reg->var_off) &&
+ bpf_map_is_rdonly(map) &&
+ map->ops->map_direct_value_addr) {
+ int map_off = off + reg->var_off.value;
+ u64 val = 0;
+ err = bpf_map_direct_read(map, map_off, size,
+ &val);
+ if (err)
+ return err;
+
+ regs[value_regno].type = SCALAR_VALUE;
+ __mark_reg_known(&regs[value_regno], val);
+ } else {
+ mark_reg_unknown(env, regs, value_regno);
+ }
+ }
} else if (reg->type == PTR_TO_CTX) {
enum bpf_reg_type reg_type = SCALAR_VALUE;
+ u32 btf_id = 0;
if (t == BPF_WRITE && value_regno >= 0 &&
is_pointer_value(env, value_regno)) {
@@ -2792,7 +2964,9 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
if (err < 0)
return err;
- err = check_ctx_access(env, insn_idx, off, size, t, &reg_type);
+ err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf_id);
+ if (err)
+ verbose_linfo(env, insn_idx, "; ");
if (!err && t == BPF_READ && value_regno >= 0) {
/* ctx access returns either a scalar, or a
* PTR_TO_PACKET[_META,_END]. In the latter
@@ -2811,6 +2985,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
* a sub-register.
*/
regs[value_regno].subreg_def = DEF_NOT_SUBREG;
+ if (reg_type == PTR_TO_BTF_ID)
+ regs[value_regno].btf_id = btf_id;
}
regs[value_regno].type = reg_type;
}
@@ -2870,6 +3046,9 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
err = check_tp_buffer_access(env, reg, regno, off, size);
if (!err && t == BPF_READ && value_regno >= 0)
mark_reg_unknown(env, regs, value_regno);
+ } else if (reg->type == PTR_TO_BTF_ID) {
+ err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
+ value_regno);
} else {
verbose(env, "R%d invalid mem access '%s'\n", regno,
reg_type_str[reg->type]);
@@ -3298,6 +3477,22 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
expected_type = PTR_TO_SOCKET;
if (type != expected_type)
goto err_type;
+ } else if (arg_type == ARG_PTR_TO_BTF_ID) {
+ expected_type = PTR_TO_BTF_ID;
+ if (type != expected_type)
+ goto err_type;
+ if (reg->btf_id != meta->btf_id) {
+ verbose(env, "Helper has type %s got %s in R%d\n",
+ kernel_type_name(meta->btf_id),
+ kernel_type_name(reg->btf_id), regno);
+
+ return -EACCES;
+ }
+ if (!tnum_is_const(reg->var_off) || reg->var_off.value || reg->off) {
+ verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n",
+ regno);
+ return -EACCES;
+ }
} else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
if (meta->func_id == BPF_FUNC_spin_lock) {
if (process_spin_lock(env, regno, true))
@@ -3445,6 +3640,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
if (func_id != BPF_FUNC_perf_event_read &&
func_id != BPF_FUNC_perf_event_output &&
+ func_id != BPF_FUNC_skb_output &&
func_id != BPF_FUNC_perf_event_read_value)
goto error;
break;
@@ -3532,6 +3728,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
case BPF_FUNC_perf_event_read:
case BPF_FUNC_perf_event_output:
case BPF_FUNC_perf_event_read_value:
+ case BPF_FUNC_skb_output:
if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
goto error;
break;
@@ -3810,6 +4007,9 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
/* only increment it after check_reg_arg() finished */
state->curframe++;
+ if (btf_check_func_arg_match(env, subprog))
+ return -EINVAL;
+
/* and go analyze first insn of the callee */
*insn_idx = target_insn;
@@ -3916,15 +4116,49 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
return -EACCES;
}
- if (!BPF_MAP_PTR(aux->map_state))
+ if (!BPF_MAP_PTR(aux->map_ptr_state))
bpf_map_ptr_store(aux, meta->map_ptr,
meta->map_ptr->unpriv_array);
- else if (BPF_MAP_PTR(aux->map_state) != meta->map_ptr)
+ else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr)
bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
meta->map_ptr->unpriv_array);
return 0;
}
+static int
+record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
+ int func_id, int insn_idx)
+{
+ struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
+ struct bpf_reg_state *regs = cur_regs(env), *reg;
+ struct bpf_map *map = meta->map_ptr;
+ struct tnum range;
+ u64 val;
+
+ if (func_id != BPF_FUNC_tail_call)
+ return 0;
+ if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
+ verbose(env, "kernel subsystem misconfigured verifier\n");
+ return -EINVAL;
+ }
+
+ range = tnum_range(0, map->max_entries - 1);
+ reg = &regs[BPF_REG_3];
+
+ if (!register_is_const(reg) || !tnum_in(range, reg->var_off)) {
+ bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
+ return 0;
+ }
+
+ val = reg->var_off.value;
+ if (bpf_map_key_unseen(aux))
+ bpf_map_key_store(aux, val);
+ else if (!bpf_map_key_poisoned(aux) &&
+ bpf_map_key_immediate(aux) != val)
+ bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
+ return 0;
+}
+
static int check_reference_leak(struct bpf_verifier_env *env)
{
struct bpf_func_state *state = cur_func(env);
@@ -3986,23 +4220,20 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
meta.func_id = func_id;
/* check args */
- err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta);
- if (err)
- return err;
- err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
- if (err)
- return err;
- err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
- if (err)
- return err;
- err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &meta);
- if (err)
- return err;
- err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &meta);
+ for (i = 0; i < 5; i++) {
+ err = btf_resolve_helper_id(&env->log, fn, i);
+ if (err > 0)
+ meta.btf_id = err;
+ err = check_func_arg(env, BPF_REG_1 + i, fn->arg_type[i], &meta);
+ if (err)
+ return err;
+ }
+
+ err = record_func_map(env, &meta, func_id, insn_idx);
if (err)
return err;
- err = record_func_map(env, &meta, func_id, insn_idx);
+ err = record_func_key(env, &meta, func_id, insn_idx);
if (err)
return err;
@@ -5433,6 +5664,10 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg,
/* We might have learned some bits from the bounds. */
__reg_bound_offset(false_reg);
__reg_bound_offset(true_reg);
+ if (is_jmp32) {
+ __reg_bound_offset32(false_reg);
+ __reg_bound_offset32(true_reg);
+ }
/* Intersecting with the old var_off might have improved our bounds
* slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
* then new var_off is (0; 0x7f...fc) which improves our umax.
@@ -5542,6 +5777,10 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
/* We might have learned some bits from the bounds. */
__reg_bound_offset(false_reg);
__reg_bound_offset(true_reg);
+ if (is_jmp32) {
+ __reg_bound_offset32(false_reg);
+ __reg_bound_offset32(true_reg);
+ }
/* Intersecting with the old var_off might have improved our bounds
* slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
* then new var_off is (0; 0x7f...fc) which improves our umax.
@@ -6124,6 +6363,11 @@ static int check_return_code(struct bpf_verifier_env *env)
case BPF_PROG_TYPE_CGROUP_SYSCTL:
case BPF_PROG_TYPE_CGROUP_SOCKOPT:
break;
+ case BPF_PROG_TYPE_RAW_TRACEPOINT:
+ if (!env->prog->aux->attach_btf_id)
+ return 0;
+ range = tnum_const(0);
+ break;
default:
return 0;
}
@@ -6406,6 +6650,7 @@ static int check_btf_func(struct bpf_verifier_env *env,
u32 i, nfuncs, urec_size, min_size;
u32 krec_size = sizeof(struct bpf_func_info);
struct bpf_func_info *krecord;
+ struct bpf_func_info_aux *info_aux = NULL;
const struct btf_type *type;
struct bpf_prog *prog;
const struct btf *btf;
@@ -6439,6 +6684,9 @@ static int check_btf_func(struct bpf_verifier_env *env,
krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
if (!krecord)
return -ENOMEM;
+ info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
+ if (!info_aux)
+ goto err_free;
for (i = 0; i < nfuncs; i++) {
ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
@@ -6490,29 +6738,31 @@ static int check_btf_func(struct bpf_verifier_env *env,
ret = -EINVAL;
goto err_free;
}
-
prev_offset = krecord[i].insn_off;
urecord += urec_size;
}
prog->aux->func_info = krecord;
prog->aux->func_info_cnt = nfuncs;
+ prog->aux->func_info_aux = info_aux;
return 0;
err_free:
kvfree(krecord);
+ kfree(info_aux);
return ret;
}
static void adjust_btf_func(struct bpf_verifier_env *env)
{
+ struct bpf_prog_aux *aux = env->prog->aux;
int i;
- if (!env->prog->aux->func_info)
+ if (!aux->func_info)
return;
for (i = 0; i < env->subprog_cnt; i++)
- env->prog->aux->func_info[i].insn_off = env->subprog_info[i].start;
+ aux->func_info[i].insn_off = env->subprog_info[i].start;
}
#define MIN_BPF_LINEINFO_SIZE (offsetof(struct bpf_line_info, line_col) + \
@@ -7440,6 +7690,7 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type)
case PTR_TO_TCP_SOCK:
case PTR_TO_TCP_SOCK_OR_NULL:
case PTR_TO_XDP_SOCK:
+ case PTR_TO_BTF_ID:
return false;
default:
return true;
@@ -7492,6 +7743,9 @@ static int do_check(struct bpf_verifier_env *env)
0 /* frameno */,
0 /* subprogno, zero == main subprog */);
+ if (btf_check_func_arg_match(env, 0))
+ return -EINVAL;
+
for (;;) {
struct bpf_insn *insn;
u8 class;
@@ -8008,11 +8262,7 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
* will be used by the valid program until it's unloaded
* and all maps are released in free_used_maps()
*/
- map = bpf_map_inc(map, false);
- if (IS_ERR(map)) {
- fdput(f);
- return PTR_ERR(map);
- }
+ bpf_map_inc(map);
aux->map_index = env->used_map_cnt;
env->used_maps[env->used_map_cnt++] = map;
@@ -8581,6 +8831,14 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
case PTR_TO_XDP_SOCK:
convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
break;
+ case PTR_TO_BTF_ID:
+ if (type == BPF_WRITE) {
+ verbose(env, "Writes through BTF pointers are not allowed\n");
+ return -EINVAL;
+ }
+ insn->code = BPF_LDX | BPF_PROBE_MEM | BPF_SIZE((insn)->code);
+ env->prog->aux->num_exentries++;
+ continue;
default:
continue;
}
@@ -8871,6 +9129,7 @@ static int fixup_call_args(struct bpf_verifier_env *env)
static int fixup_bpf_calls(struct bpf_verifier_env *env)
{
struct bpf_prog *prog = env->prog;
+ bool expect_blinding = bpf_jit_blinding_enabled(prog);
struct bpf_insn *insn = prog->insnsi;
const struct bpf_func_proto *fn;
const int insn_cnt = prog->len;
@@ -8879,7 +9138,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
struct bpf_insn insn_buf[16];
struct bpf_prog *new_prog;
struct bpf_map *map_ptr;
- int i, cnt, delta = 0;
+ int i, ret, cnt, delta = 0;
for (i = 0; i < insn_cnt; i++, insn++) {
if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
@@ -9023,6 +9282,26 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
insn->code = BPF_JMP | BPF_TAIL_CALL;
aux = &env->insn_aux_data[i + delta];
+ if (prog->jit_requested && !expect_blinding &&
+ !bpf_map_key_poisoned(aux) &&
+ !bpf_map_ptr_poisoned(aux) &&
+ !bpf_map_ptr_unpriv(aux)) {
+ struct bpf_jit_poke_descriptor desc = {
+ .reason = BPF_POKE_REASON_TAIL_CALL,
+ .tail_call.map = BPF_MAP_PTR(aux->map_ptr_state),
+ .tail_call.key = bpf_map_key_immediate(aux),
+ };
+
+ ret = bpf_jit_add_poke_descriptor(prog, &desc);
+ if (ret < 0) {
+ verbose(env, "adding tail call poke descriptor failed\n");
+ return ret;
+ }
+
+ insn->imm = ret + 1;
+ continue;
+ }
+
if (!bpf_map_ptr_unpriv(aux))
continue;
@@ -9037,7 +9316,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
return -EINVAL;
}
- map_ptr = BPF_MAP_PTR(aux->map_state);
+ map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
map_ptr->max_entries, 2);
insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
@@ -9071,7 +9350,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
if (bpf_map_ptr_poisoned(aux))
goto patch_call_imm;
- map_ptr = BPF_MAP_PTR(aux->map_state);
+ map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
ops = map_ptr->ops;
if (insn->imm == BPF_FUNC_map_lookup_elem &&
ops->map_gen_lookup) {
@@ -9151,6 +9430,23 @@ patch_call_imm:
insn->imm = fn->func - __bpf_call_base;
}
+ /* Since poke tab is now finalized, publish aux to tracker. */
+ for (i = 0; i < prog->aux->size_poke_tab; i++) {
+ map_ptr = prog->aux->poke_tab[i].tail_call.map;
+ if (!map_ptr->ops->map_poke_track ||
+ !map_ptr->ops->map_poke_untrack ||
+ !map_ptr->ops->map_poke_run) {
+ verbose(env, "bpf verifier is misconfigured\n");
+ return -EINVAL;
+ }
+
+ ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
+ if (ret < 0) {
+ verbose(env, "tracking tail call prog failed\n");
+ return ret;
+ }
+ }
+
return 0;
}
@@ -9208,6 +9504,161 @@ static void print_verification_stats(struct bpf_verifier_env *env)
env->peak_states, env->longest_mark_read_walk);
}
+static int check_attach_btf_id(struct bpf_verifier_env *env)
+{
+ struct bpf_prog *prog = env->prog;
+ struct bpf_prog *tgt_prog = prog->aux->linked_prog;
+ u32 btf_id = prog->aux->attach_btf_id;
+ const char prefix[] = "btf_trace_";
+ int ret = 0, subprog = -1, i;
+ struct bpf_trampoline *tr;
+ const struct btf_type *t;
+ bool conservative = true;
+ const char *tname;
+ struct btf *btf;
+ long addr;
+ u64 key;
+
+ if (prog->type != BPF_PROG_TYPE_TRACING)
+ return 0;
+
+ if (!btf_id) {
+ verbose(env, "Tracing programs must provide btf_id\n");
+ return -EINVAL;
+ }
+ btf = bpf_prog_get_target_btf(prog);
+ if (!btf) {
+ verbose(env,
+ "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
+ return -EINVAL;
+ }
+ t = btf_type_by_id(btf, btf_id);
+ if (!t) {
+ verbose(env, "attach_btf_id %u is invalid\n", btf_id);
+ return -EINVAL;
+ }
+ tname = btf_name_by_offset(btf, t->name_off);
+ if (!tname) {
+ verbose(env, "attach_btf_id %u doesn't have a name\n", btf_id);
+ return -EINVAL;
+ }
+ if (tgt_prog) {
+ struct bpf_prog_aux *aux = tgt_prog->aux;
+
+ for (i = 0; i < aux->func_info_cnt; i++)
+ if (aux->func_info[i].type_id == btf_id) {
+ subprog = i;
+ break;
+ }
+ if (subprog == -1) {
+ verbose(env, "Subprog %s doesn't exist\n", tname);
+ return -EINVAL;
+ }
+ conservative = aux->func_info_aux[subprog].unreliable;
+ key = ((u64)aux->id) << 32 | btf_id;
+ } else {
+ key = btf_id;
+ }
+
+ switch (prog->expected_attach_type) {
+ case BPF_TRACE_RAW_TP:
+ if (tgt_prog) {
+ verbose(env,
+ "Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
+ return -EINVAL;
+ }
+ if (!btf_type_is_typedef(t)) {
+ verbose(env, "attach_btf_id %u is not a typedef\n",
+ btf_id);
+ return -EINVAL;
+ }
+ if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
+ verbose(env, "attach_btf_id %u points to wrong type name %s\n",
+ btf_id, tname);
+ return -EINVAL;
+ }
+ tname += sizeof(prefix) - 1;
+ t = btf_type_by_id(btf, t->type);
+ if (!btf_type_is_ptr(t))
+ /* should never happen in valid vmlinux build */
+ return -EINVAL;
+ t = btf_type_by_id(btf, t->type);
+ if (!btf_type_is_func_proto(t))
+ /* should never happen in valid vmlinux build */
+ return -EINVAL;
+
+ /* remember two read only pointers that are valid for
+ * the life time of the kernel
+ */
+ prog->aux->attach_func_name = tname;
+ prog->aux->attach_func_proto = t;
+ prog->aux->attach_btf_trace = true;
+ return 0;
+ case BPF_TRACE_FENTRY:
+ case BPF_TRACE_FEXIT:
+ if (!btf_type_is_func(t)) {
+ verbose(env, "attach_btf_id %u is not a function\n",
+ btf_id);
+ return -EINVAL;
+ }
+ t = btf_type_by_id(btf, t->type);
+ if (!btf_type_is_func_proto(t))
+ return -EINVAL;
+ tr = bpf_trampoline_lookup(key);
+ if (!tr)
+ return -ENOMEM;
+ prog->aux->attach_func_name = tname;
+ /* t is either vmlinux type or another program's type */
+ prog->aux->attach_func_proto = t;
+ mutex_lock(&tr->mutex);
+ if (tr->func.addr) {
+ prog->aux->trampoline = tr;
+ goto out;
+ }
+ if (tgt_prog && conservative) {
+ prog->aux->attach_func_proto = NULL;
+ t = NULL;
+ }
+ ret = btf_distill_func_proto(&env->log, btf, t,
+ tname, &tr->func.model);
+ if (ret < 0)
+ goto out;
+ if (tgt_prog) {
+ if (!tgt_prog->jited) {
+ /* for now */
+ verbose(env, "Can trace only JITed BPF progs\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ if (tgt_prog->type == BPF_PROG_TYPE_TRACING) {
+ /* prevent cycles */
+ verbose(env, "Cannot recursively attach\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
+ } else {
+ addr = kallsyms_lookup_name(tname);
+ if (!addr) {
+ verbose(env,
+ "The address of function %s cannot be found\n",
+ tname);
+ ret = -ENOENT;
+ goto out;
+ }
+ }
+ tr->func.addr = (void *)addr;
+ prog->aux->trampoline = tr;
+out:
+ mutex_unlock(&tr->mutex);
+ if (ret)
+ bpf_trampoline_put(tr);
+ return ret;
+ default:
+ return -EINVAL;
+ }
+}
+
int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
union bpf_attr __user *uattr)
{
@@ -9241,6 +9692,13 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
env->ops = bpf_verifier_ops[env->prog->type];
is_priv = capable(CAP_SYS_ADMIN);
+ if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
+ mutex_lock(&bpf_verifier_lock);
+ if (!btf_vmlinux)
+ btf_vmlinux = btf_parse_vmlinux();
+ mutex_unlock(&bpf_verifier_lock);
+ }
+
/* grab the mutex to protect few globals used by verifier */
if (!is_priv)
mutex_lock(&bpf_verifier_lock);
@@ -9260,6 +9718,17 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
goto err_unlock;
}
+ if (IS_ERR(btf_vmlinux)) {
+ /* Either gcc or pahole or kernel are broken. */
+ verbose(env, "in-kernel BTF is malformed\n");
+ ret = PTR_ERR(btf_vmlinux);
+ goto skip_full_check;
+ }
+
+ ret = check_attach_btf_id(env);
+ if (ret)
+ goto skip_full_check;
+
env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
env->strict_alignment = true;