summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/s390/net/bpf_jit_comp.c2
-rw-r--r--include/linux/bpf.h2
-rw-r--r--kernel/bpf/memalloc.c44
-rw-r--r--kernel/bpf/mprog.c3
-rw-r--r--kernel/bpf/verifier.c8
-rw-r--r--net/core/sock_map.c4
-rw-r--r--net/ipv4/tcp.c10
-rw-r--r--net/ipv4/tcp_bpf.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_basic.c51
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_opts.c84
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_cubic.c3
12 files changed, 176 insertions, 41 deletions
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index de2fb12120d2..2861e3360aff 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -2513,7 +2513,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
return -E2BIG;
}
- return ret;
+ return tjit.common.prg;
}
bool bpf_jit_supports_subprog_tailcalls(void)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 024e8b28c34b..49f8b691496c 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1307,7 +1307,7 @@ static inline int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
static inline struct bpf_trampoline *bpf_trampoline_get(u64 key,
struct bpf_attach_target_info *tgt_info)
{
- return ERR_PTR(-EOPNOTSUPP);
+ return NULL;
}
static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {}
#define DEFINE_BPF_DISPATCHER(name)
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index cf1941516643..d93ddac283d4 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -965,37 +965,31 @@ void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags)
return !ret ? NULL : ret + LLIST_NODE_SZ;
}
-/* Most of the logic is taken from setup_kmalloc_cache_index_table() */
static __init int bpf_mem_cache_adjust_size(void)
{
- unsigned int size, index;
+ unsigned int size;
- /* Normally KMALLOC_MIN_SIZE is 8-bytes, but it can be
- * up-to 256-bytes.
+ /* Adjusting the indexes in size_index() according to the object_size
+ * of underlying slab cache, so bpf_mem_alloc() will select a
+ * bpf_mem_cache with unit_size equal to the object_size of
+ * the underlying slab cache.
+ *
+ * The maximal value of KMALLOC_MIN_SIZE and __kmalloc_minalign() is
+ * 256-bytes, so only do adjustment for [8-bytes, 192-bytes].
*/
- size = KMALLOC_MIN_SIZE;
- if (size <= 192)
- index = size_index[(size - 1) / 8];
- else
- index = fls(size - 1) - 1;
- for (size = 8; size < KMALLOC_MIN_SIZE && size <= 192; size += 8)
- size_index[(size - 1) / 8] = index;
+ for (size = 192; size >= 8; size -= 8) {
+ unsigned int kmalloc_size, index;
- /* The minimal alignment is 64-bytes, so disable 96-bytes cache and
- * use 128-bytes cache instead.
- */
- if (KMALLOC_MIN_SIZE >= 64) {
- index = size_index[(128 - 1) / 8];
- for (size = 64 + 8; size <= 96; size += 8)
- size_index[(size - 1) / 8] = index;
- }
+ kmalloc_size = kmalloc_size_roundup(size);
+ if (kmalloc_size == size)
+ continue;
- /* The minimal alignment is 128-bytes, so disable 192-bytes cache and
- * use 256-bytes cache instead.
- */
- if (KMALLOC_MIN_SIZE >= 128) {
- index = fls(256 - 1) - 1;
- for (size = 128 + 8; size <= 192; size += 8)
+ if (kmalloc_size <= 192)
+ index = size_index[(kmalloc_size - 1) / 8];
+ else
+ index = fls(kmalloc_size - 1) - 1;
+ /* Only overwrite if necessary */
+ if (size_index[(size - 1) / 8] != index)
size_index[(size - 1) / 8] = index;
}
diff --git a/kernel/bpf/mprog.c b/kernel/bpf/mprog.c
index 32d2c4829eb8..007d98c799e2 100644
--- a/kernel/bpf/mprog.c
+++ b/kernel/bpf/mprog.c
@@ -253,6 +253,9 @@ int bpf_mprog_attach(struct bpf_mprog_entry *entry,
goto out;
}
idx = tidx;
+ } else if (bpf_mprog_total(entry) == bpf_mprog_max()) {
+ ret = -ERANGE;
+ goto out;
}
if (flags & BPF_F_BEFORE) {
tidx = bpf_mprog_pos_before(entry, &rtuple);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index bb78212fa5b2..c0c7d137066a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4047,11 +4047,9 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
bitmap_from_u64(mask, bt_reg_mask(bt));
for_each_set_bit(i, mask, 32) {
reg = &st->frame[0]->regs[i];
- if (reg->type != SCALAR_VALUE) {
- bt_clear_reg(bt, i);
- continue;
- }
- reg->precise = true;
+ bt_clear_reg(bt, i);
+ if (reg->type == SCALAR_VALUE)
+ reg->precise = true;
}
return 0;
}
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index cb11750b1df5..4292c2ed1828 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -668,6 +668,8 @@ BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg *, msg,
sk = __sock_map_lookup_elem(map, key);
if (unlikely(!sk || !sock_map_redirect_allowed(sk)))
return SK_DROP;
+ if (!(flags & BPF_F_INGRESS) && !sk_is_tcp(sk))
+ return SK_DROP;
msg->flags = flags;
msg->sk_redir = sk;
@@ -1267,6 +1269,8 @@ BPF_CALL_4(bpf_msg_redirect_hash, struct sk_msg *, msg,
sk = __sock_hash_lookup_elem(map, key);
if (unlikely(!sk || !sock_map_redirect_allowed(sk)))
return SK_DROP;
+ if (!(flags & BPF_F_INGRESS) && !sk_is_tcp(sk))
+ return SK_DROP;
msg->flags = flags;
msg->sk_redir = sk;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0c3040a63ebd..3f66cdeef7de 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1621,16 +1621,13 @@ EXPORT_SYMBOL(tcp_read_sock);
int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
{
- struct tcp_sock *tp = tcp_sk(sk);
- u32 seq = tp->copied_seq;
struct sk_buff *skb;
int copied = 0;
- u32 offset;
if (sk->sk_state == TCP_LISTEN)
return -ENOTCONN;
- while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) {
+ while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) {
u8 tcp_flags;
int used;
@@ -1643,13 +1640,10 @@ int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
copied = used;
break;
}
- seq += used;
copied += used;
- if (tcp_flags & TCPHDR_FIN) {
- ++seq;
+ if (tcp_flags & TCPHDR_FIN)
break;
- }
}
return copied;
}
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 81f0dff69e0b..327268203001 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -222,6 +222,7 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
int *addr_len)
{
struct tcp_sock *tcp = tcp_sk(sk);
+ int peek = flags & MSG_PEEK;
u32 seq = tcp->copied_seq;
struct sk_psock *psock;
int copied = 0;
@@ -311,7 +312,8 @@ msg_bytes_ready:
copied = -EAGAIN;
}
out:
- WRITE_ONCE(tcp->copied_seq, seq);
+ if (!peek)
+ WRITE_ONCE(tcp->copied_seq, seq);
tcp_rcv_space_adjust(sk);
if (copied > 0)
__tcp_cleanup_rbuf(sk, copied);
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
index a53c254c6058..4aabeaa525d4 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -185,6 +185,8 @@ static void test_cubic(void)
do_test("bpf_cubic", NULL);
+ ASSERT_EQ(cubic_skel->bss->bpf_cubic_acked_called, 1, "pkts_acked called");
+
bpf_link__destroy(link);
bpf_cubic__destroy(cubic_skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index 064cc5e8d9ad..dda7060e86a0 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -475,6 +475,55 @@ out:
test_sockmap_drop_prog__destroy(drop);
}
+static void test_sockmap_skb_verdict_peek(void)
+{
+ int err, map, verdict, s, c1, p1, zero = 0, sent, recvd, avail;
+ struct test_sockmap_pass_prog *pass;
+ char snd[256] = "0123456789";
+ char rcv[256] = "0";
+
+ pass = test_sockmap_pass_prog__open_and_load();
+ if (!ASSERT_OK_PTR(pass, "open_and_load"))
+ return;
+ verdict = bpf_program__fd(pass->progs.prog_skb_verdict);
+ map = bpf_map__fd(pass->maps.sock_map_rx);
+
+ err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach"))
+ goto out;
+
+ s = socket_loopback(AF_INET, SOCK_STREAM);
+ if (!ASSERT_GT(s, -1, "socket_loopback(s)"))
+ goto out;
+
+ err = create_pair(s, AF_INET, SOCK_STREAM, &c1, &p1);
+ if (!ASSERT_OK(err, "create_pairs(s)"))
+ goto out;
+
+ err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(c1)"))
+ goto out_close;
+
+ sent = xsend(p1, snd, sizeof(snd), 0);
+ ASSERT_EQ(sent, sizeof(snd), "xsend(p1)");
+ recvd = recv(c1, rcv, sizeof(rcv), MSG_PEEK);
+ ASSERT_EQ(recvd, sizeof(rcv), "recv(c1)");
+ err = ioctl(c1, FIONREAD, &avail);
+ ASSERT_OK(err, "ioctl(FIONREAD) error");
+ ASSERT_EQ(avail, sizeof(snd), "after peek ioctl(FIONREAD)");
+ recvd = recv(c1, rcv, sizeof(rcv), 0);
+ ASSERT_EQ(recvd, sizeof(rcv), "recv(p0)");
+ err = ioctl(c1, FIONREAD, &avail);
+ ASSERT_OK(err, "ioctl(FIONREAD) error");
+ ASSERT_EQ(avail, 0, "after read ioctl(FIONREAD)");
+
+out_close:
+ close(c1);
+ close(p1);
+out:
+ test_sockmap_pass_prog__destroy(pass);
+}
+
void test_sockmap_basic(void)
{
if (test__start_subtest("sockmap create_update_free"))
@@ -515,4 +564,6 @@ void test_sockmap_basic(void)
test_sockmap_skb_verdict_fionread(true);
if (test__start_subtest("sockmap skb_verdict fionread on drop"))
test_sockmap_skb_verdict_fionread(false);
+ if (test__start_subtest("sockmap skb_verdict msg_f_peek"))
+ test_sockmap_skb_verdict_peek();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_opts.c b/tools/testing/selftests/bpf/prog_tests/tc_opts.c
index 7a2ecd4eca5d..99af79ea21a9 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_opts.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_opts.c
@@ -2378,3 +2378,87 @@ void serial_test_tc_opts_chain_mixed(void)
test_tc_chain_mixed(BPF_TCX_INGRESS);
test_tc_chain_mixed(BPF_TCX_EGRESS);
}
+
+static int generate_dummy_prog(void)
+{
+ const struct bpf_insn prog_insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ const size_t prog_insn_cnt = sizeof(prog_insns) / sizeof(struct bpf_insn);
+ LIBBPF_OPTS(bpf_prog_load_opts, opts);
+ const size_t log_buf_sz = 256;
+ char *log_buf;
+ int fd = -1;
+
+ log_buf = malloc(log_buf_sz);
+ if (!ASSERT_OK_PTR(log_buf, "log_buf_alloc"))
+ return fd;
+ opts.log_buf = log_buf;
+ opts.log_size = log_buf_sz;
+
+ log_buf[0] = '\0';
+ opts.log_level = 0;
+ fd = bpf_prog_load(BPF_PROG_TYPE_SCHED_CLS, "tcx_prog", "GPL",
+ prog_insns, prog_insn_cnt, &opts);
+ ASSERT_STREQ(log_buf, "", "log_0");
+ ASSERT_GE(fd, 0, "prog_fd");
+ free(log_buf);
+ return fd;
+}
+
+static void test_tc_opts_max_target(int target, int flags, bool relative)
+{
+ int err, ifindex, i, prog_fd, last_fd = -1;
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ const int max_progs = 63;
+
+ ASSERT_OK(system("ip link add dev tcx_opts1 type veth peer name tcx_opts2"), "add veth");
+ ifindex = if_nametoindex("tcx_opts1");
+ ASSERT_NEQ(ifindex, 0, "non_zero_ifindex");
+
+ assert_mprog_count_ifindex(ifindex, target, 0);
+
+ for (i = 0; i < max_progs; i++) {
+ prog_fd = generate_dummy_prog();
+ if (!ASSERT_GE(prog_fd, 0, "dummy_prog"))
+ goto cleanup;
+ err = bpf_prog_attach_opts(prog_fd, ifindex, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+ assert_mprog_count_ifindex(ifindex, target, i + 1);
+ if (i == max_progs - 1 && relative)
+ last_fd = prog_fd;
+ else
+ close(prog_fd);
+ }
+
+ prog_fd = generate_dummy_prog();
+ if (!ASSERT_GE(prog_fd, 0, "dummy_prog"))
+ goto cleanup;
+ opta.flags = flags;
+ if (last_fd > 0)
+ opta.relative_fd = last_fd;
+ err = bpf_prog_attach_opts(prog_fd, ifindex, target, &opta);
+ ASSERT_EQ(err, -ERANGE, "prog_64_attach");
+ assert_mprog_count_ifindex(ifindex, target, max_progs);
+ close(prog_fd);
+cleanup:
+ if (last_fd > 0)
+ close(last_fd);
+ ASSERT_OK(system("ip link del dev tcx_opts1"), "del veth");
+ ASSERT_EQ(if_nametoindex("tcx_opts1"), 0, "dev1_removed");
+ ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed");
+}
+
+void serial_test_tc_opts_max(void)
+{
+ test_tc_opts_max_target(BPF_TCX_INGRESS, 0, false);
+ test_tc_opts_max_target(BPF_TCX_EGRESS, 0, false);
+
+ test_tc_opts_max_target(BPF_TCX_INGRESS, BPF_F_BEFORE, false);
+ test_tc_opts_max_target(BPF_TCX_EGRESS, BPF_F_BEFORE, true);
+
+ test_tc_opts_max_target(BPF_TCX_INGRESS, BPF_F_AFTER, true);
+ test_tc_opts_max_target(BPF_TCX_EGRESS, BPF_F_AFTER, false);
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c
index d9660e7200e2..c997e3e3d3fb 100644
--- a/tools/testing/selftests/bpf/progs/bpf_cubic.c
+++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c
@@ -490,6 +490,8 @@ static __always_inline void hystart_update(struct sock *sk, __u32 delay)
}
}
+int bpf_cubic_acked_called = 0;
+
void BPF_STRUCT_OPS(bpf_cubic_acked, struct sock *sk,
const struct ack_sample *sample)
{
@@ -497,6 +499,7 @@ void BPF_STRUCT_OPS(bpf_cubic_acked, struct sock *sk,
struct bictcp *ca = inet_csk_ca(sk);
__u32 delay;
+ bpf_cubic_acked_called = 1;
/* Some calls are for duplicates without timetamps */
if (sample->rtt_us < 0)
return;