diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-06-24 16:49:49 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-06-24 16:49:49 -0700 |
commit | e0456717e483bb8a9431b80a5bdc99a928b9b003 (patch) | |
tree | 5eb5add2bafd1f20326d70f5cb3b711d00a40b10 /kernel/bpf/core.c | |
parent | 98ec21a01896751b673b6c731ca8881daa8b2c6d (diff) | |
parent | 1ea2d020ba477cb7011a7174e8501a9e04a325d4 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
1) Add TX fast path in mac80211, from Johannes Berg.
2) Add TSO/GRO support to ibmveth, from Thomas Falcon
3) Move away from cached routes in ipv6, just like ipv4, from Martin
KaFai Lau.
4) Lots of new rhashtable tests, from Thomas Graf.
5) Run ingress qdisc lockless, from Alexei Starovoitov.
6) Allow servers to fetch TCP packet headers for SYN packets of new
connections, for fingerprinting. From Eric Dumazet.
7) Add mode parameter to pktgen, for testing receive. From Alexei
Starovoitov.
8) Cache access optimizations via simplifications of build_skb(), from
Alexander Duyck.
9) Move page frag allocator under mm/, also from Alexander.
10) Add xmit_more support to hv_netvsc, from KY Srinivasan.
11) Add a counter guard in case we try to perform endless reclassify
loops in the packet scheduler.
12) Extern flow dissector to be programmable and use it in new "Flower"
classifier. From Jiri Pirko.
13) AF_PACKET fanout rollover fixes, performance improvements, and new
statistics. From Willem de Bruijn.
14) Add netdev driver for GENEVE tunnels, from John W Linville.
15) Add ingress netfilter hooks and filtering, from Pablo Neira Ayuso.
16) Fix handling of epoll edge triggers in TCP, from Eric Dumazet.
17) Add an ECN retry fallback for the initial TCP handshake, from Daniel
Borkmann.
18) Add tail call support to BPF, from Alexei Starovoitov.
19) Add several pktgen helper scripts, from Jesper Dangaard Brouer.
20) Add zerocopy support to AF_UNIX, from Hannes Frederic Sowa.
21) Favor even port numbers for allocation to connect() requests, and
odd port numbers for bind(0), in an effort to help avoid
ip_local_port_range exhaustion. From Eric Dumazet.
22) Add Cavium ThunderX driver, from Sunil Goutham.
23) Allow bpf programs to access skb_iif and dev->ifindex SKB metadata,
from Alexei Starovoitov.
24) Add support for T6 chips in cxgb4vf driver, from Hariprasad Shenai.
25) Double TCP Small Queues default to 256K to accomodate situations
like the XEN driver and wireless aggregation. From Wei Liu.
26) Add more entropy inputs to flow dissector, from Tom Herbert.
27) Add CDG congestion control algorithm to TCP, from Kenneth Klette
Jonassen.
28) Convert ipset over to RCU locking, from Jozsef Kadlecsik.
29) Track and act upon link status of ipv4 route nexthops, from Andy
Gospodarek.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1670 commits)
bridge: vlan: flush the dynamically learned entries on port vlan delete
bridge: multicast: add a comment to br_port_state_selection about blocking state
net: inet_diag: export IPV6_V6ONLY sockopt
stmmac: troubleshoot unexpected bits in des0 & des1
net: ipv4 sysctl option to ignore routes when nexthop link is down
net: track link-status of ipv4 nexthops
net: switchdev: ignore unsupported bridge flags
net: Cavium: Fix MAC address setting in shutdown state
drivers: net: xgene: fix for ACPI support without ACPI
ip: report the original address of ICMP messages
net/mlx5e: Prefetch skb data on RX
net/mlx5e: Pop cq outside mlx5e_get_cqe
net/mlx5e: Remove mlx5e_cq.sqrq back-pointer
net/mlx5e: Remove extra spaces
net/mlx5e: Avoid TX CQE generation if more xmit packets expected
net/mlx5e: Avoid redundant dev_kfree_skb() upon NOP completion
net/mlx5e: Remove re-assignment of wq type in mlx5e_enable_rq()
net/mlx5e: Use skb_shinfo(skb)->gso_segs rather than counting them
net/mlx5e: Static mapping of netdev priv resources to/from netdev TX queues
net/mlx4_en: Use HW counters for rx/tx bytes/packets in PF device
...
Diffstat (limited to 'kernel/bpf/core.c')
-rw-r--r-- | kernel/bpf/core.c | 105 |
1 files changed, 97 insertions, 8 deletions
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 54f0e7fcd0e2..c5bedc82bc1c 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -26,9 +26,10 @@ #include <linux/vmalloc.h> #include <linux/random.h> #include <linux/moduleloader.h> -#include <asm/unaligned.h> #include <linux/bpf.h> +#include <asm/unaligned.h> + /* Registers */ #define BPF_R0 regs[BPF_REG_0] #define BPF_R1 regs[BPF_REG_1] @@ -62,6 +63,7 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns ptr = skb_network_header(skb) + k - SKF_NET_OFF; else if (k >= SKF_LL_OFF) ptr = skb_mac_header(skb) + k - SKF_LL_OFF; + if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb)) return ptr; @@ -244,6 +246,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) [BPF_ALU64 | BPF_NEG] = &&ALU64_NEG, /* Call instruction */ [BPF_JMP | BPF_CALL] = &&JMP_CALL, + [BPF_JMP | BPF_CALL | BPF_X] = &&JMP_TAIL_CALL, /* Jumps */ [BPF_JMP | BPF_JA] = &&JMP_JA, [BPF_JMP | BPF_JEQ | BPF_X] = &&JMP_JEQ_X, @@ -286,6 +289,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) [BPF_LD | BPF_IND | BPF_B] = &&LD_IND_B, [BPF_LD | BPF_IMM | BPF_DW] = &&LD_IMM_DW, }; + u32 tail_call_cnt = 0; void *ptr; int off; @@ -431,6 +435,30 @@ select_insn: BPF_R4, BPF_R5); CONT; + JMP_TAIL_CALL: { + struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2; + struct bpf_array *array = container_of(map, struct bpf_array, map); + struct bpf_prog *prog; + u64 index = BPF_R3; + + if (unlikely(index >= array->map.max_entries)) + goto out; + + if (unlikely(tail_call_cnt > MAX_TAIL_CALL_CNT)) + goto out; + + tail_call_cnt++; + + prog = READ_ONCE(array->prog[index]); + if (unlikely(!prog)) + goto out; + + ARG1 = BPF_R1; + insn = prog->insnsi; + goto select_insn; +out: + CONT; + } /* JMP */ JMP_JA: insn += insn->off; @@ -615,25 +643,63 @@ load_byte: return 0; } -void __weak bpf_int_jit_compile(struct bpf_prog *prog) +bool bpf_prog_array_compatible(struct bpf_array *array, + const struct bpf_prog *fp) { + if (!array->owner_prog_type) { + /* There's no owner yet where we could check for + * compatibility. + */ + array->owner_prog_type = fp->type; + array->owner_jited = fp->jited; + + return true; + } + + return array->owner_prog_type == fp->type && + array->owner_jited == fp->jited; +} + +static int bpf_check_tail_call(const struct bpf_prog *fp) +{ + struct bpf_prog_aux *aux = fp->aux; + int i; + + for (i = 0; i < aux->used_map_cnt; i++) { + struct bpf_map *map = aux->used_maps[i]; + struct bpf_array *array; + + if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY) + continue; + + array = container_of(map, struct bpf_array, map); + if (!bpf_prog_array_compatible(array, fp)) + return -EINVAL; + } + + return 0; } /** - * bpf_prog_select_runtime - select execution runtime for BPF program + * bpf_prog_select_runtime - select exec runtime for BPF program * @fp: bpf_prog populated with internal BPF program * - * try to JIT internal BPF program, if JIT is not available select interpreter - * BPF program will be executed via BPF_PROG_RUN() macro + * Try to JIT eBPF program, if JIT is not available, use interpreter. + * The BPF program will be executed via BPF_PROG_RUN() macro. */ -void bpf_prog_select_runtime(struct bpf_prog *fp) +int bpf_prog_select_runtime(struct bpf_prog *fp) { fp->bpf_func = (void *) __bpf_prog_run; - /* Probe if internal BPF can be JITed */ bpf_int_jit_compile(fp); - /* Lock whole bpf_prog as read-only */ bpf_prog_lock_ro(fp); + + /* The tail call compatibility check can only be done at + * this late stage as we need to determine, if we deal + * with JITed or non JITed program concatenations and not + * all eBPF JITs might immediately support all features. + */ + return bpf_check_tail_call(fp); } EXPORT_SYMBOL_GPL(bpf_prog_select_runtime); @@ -663,6 +729,29 @@ const struct bpf_func_proto bpf_map_delete_elem_proto __weak; const struct bpf_func_proto bpf_get_prandom_u32_proto __weak; const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak; +const struct bpf_func_proto bpf_ktime_get_ns_proto __weak; +const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak; +const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak; +const struct bpf_func_proto bpf_get_current_comm_proto __weak; +const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void) +{ + return NULL; +} + +/* Always built-in helper functions. */ +const struct bpf_func_proto bpf_tail_call_proto = { + .func = NULL, + .gpl_only = false, + .ret_type = RET_VOID, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, +}; + +/* For classic BPF JITs that don't implement bpf_int_jit_compile(). */ +void __weak bpf_int_jit_compile(struct bpf_prog *prog) +{ +} /* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call * skb_copy_bits(), so provide a weak definition of it for NET-less config. |