From a610b665ec9ec9ba076e6dd8703750999c81eae4 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 2 Oct 2018 13:35:41 -0700 Subject: Documentation: Describe bpf reference tracking Document the new pointer types in the verifier and how the pointer ID tracking works to ensure that references which are taken are later released. Signed-off-by: Joe Stringer Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- Documentation/networking/filter.txt | 64 +++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) (limited to 'Documentation/networking') diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index e6b4ebb2b243..4443ce958862 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt @@ -1125,6 +1125,14 @@ pointer type. The types of pointers describe their base, as follows: PTR_TO_STACK Frame pointer. PTR_TO_PACKET skb->data. PTR_TO_PACKET_END skb->data + headlen; arithmetic forbidden. + PTR_TO_SOCKET Pointer to struct bpf_sock_ops, implicitly refcounted. + PTR_TO_SOCKET_OR_NULL + Either a pointer to a socket, or NULL; socket lookup + returns this type, which becomes a PTR_TO_SOCKET when + checked != NULL. PTR_TO_SOCKET is reference-counted, + so programs must release the reference through the + socket release function before the end of the program. + Arithmetic on these pointers is forbidden. However, a pointer may be offset from this base (as a result of pointer arithmetic), and this is tracked in two parts: the 'fixed offset' and 'variable offset'. The former is used when an exactly-known value (e.g. an immediate @@ -1171,6 +1179,13 @@ over the Ethernet header, then reads IHL and addes (IHL * 4), the resulting pointer will have a variable offset known to be 4n+2 for some n, so adding the 2 bytes (NET_IP_ALIGN) gives a 4-byte alignment and so word-sized accesses through that pointer are safe. +The 'id' field is also used on PTR_TO_SOCKET and PTR_TO_SOCKET_OR_NULL, common +to all copies of the pointer returned from a socket lookup. This has similar +behaviour to the handling for PTR_TO_MAP_VALUE_OR_NULL->PTR_TO_MAP_VALUE, but +it also handles reference tracking for the pointer. PTR_TO_SOCKET implicitly +represents a reference to the corresponding 'struct sock'. To ensure that the +reference is not leaked, it is imperative to NULL-check the reference and in +the non-NULL case, and pass the valid reference to the socket release function. Direct packet access -------------------- @@ -1444,6 +1459,55 @@ Error: 8: (7a) *(u64 *)(r0 +0) = 1 R0 invalid mem access 'imm' +Program that performs a socket lookup then sets the pointer to NULL without +checking it: +value: + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_MOV64_IMM(BPF_REG_3, 4), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), +Error: + 0: (b7) r2 = 0 + 1: (63) *(u32 *)(r10 -8) = r2 + 2: (bf) r2 = r10 + 3: (07) r2 += -8 + 4: (b7) r3 = 4 + 5: (b7) r4 = 0 + 6: (b7) r5 = 0 + 7: (85) call bpf_sk_lookup_tcp#65 + 8: (b7) r0 = 0 + 9: (95) exit + Unreleased reference id=1, alloc_insn=7 + +Program that performs a socket lookup but does not NULL-check the returned +value: + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_MOV64_IMM(BPF_REG_3, 4), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp), + BPF_EXIT_INSN(), +Error: + 0: (b7) r2 = 0 + 1: (63) *(u32 *)(r10 -8) = r2 + 2: (bf) r2 = r10 + 3: (07) r2 += -8 + 4: (b7) r3 = 4 + 5: (b7) r4 = 0 + 6: (b7) r5 = 0 + 7: (85) call bpf_sk_lookup_tcp#65 + 8: (95) exit + Unreleased reference id=1, alloc_insn=7 + Testing ------- -- cgit v1.2.3 From 7ccc4f1887518a392752ad44c014fd6e1f526da7 Mon Sep 17 00:00:00 2001 From: Konrad Djimeli Date: Thu, 4 Oct 2018 18:01:32 +0100 Subject: bpf: typo fix in Documentation/networking/af_xdp.rst MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a simple typo: Completetion -> Completion Signed-off-by: Konrad Djimeli Acked-by: Björn Töpel Signed-off-by: Daniel Borkmann --- Documentation/networking/af_xdp.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Documentation/networking') diff --git a/Documentation/networking/af_xdp.rst b/Documentation/networking/af_xdp.rst index ff929cfab4f4..4ae4f9d8f8fe 100644 --- a/Documentation/networking/af_xdp.rst +++ b/Documentation/networking/af_xdp.rst @@ -159,8 +159,8 @@ log2(2048) LSB of the addr will be masked off, meaning that 2048, 2050 and 3000 refers to the same chunk. -UMEM Completetion Ring -~~~~~~~~~~~~~~~~~~~~~~ +UMEM Completion Ring +~~~~~~~~~~~~~~~~~~~~ The Completion Ring is used transfer ownership of UMEM frames from kernel-space to user-space. Just like the Fill ring, UMEM indicies are -- cgit v1.2.3 From 31ce8c4a1ad26852c91de900ebf6888a4d53fc1a Mon Sep 17 00:00:00 2001 From: Arthur Fabre Date: Sun, 7 Oct 2018 09:45:19 +0100 Subject: bpf, doc: Document Jump X addressing mode bpf_asm and the other classic BPF tools support jump conditions comparing register A to register X, in addition to comparing register A with constant K. Only the latter was documented in filter.txt, add two new addressing modes that describe the former. Signed-off-by: Arthur Fabre Signed-off-by: Daniel Borkmann --- Documentation/networking/filter.txt | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'Documentation/networking') diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index 4443ce958862..2196b824e96c 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt @@ -203,11 +203,11 @@ opcodes as defined in linux/filter.h stand for: Instruction Addressing mode Description - ld 1, 2, 3, 4, 10 Load word into A + ld 1, 2, 3, 4, 12 Load word into A ldi 4 Load word into A ldh 1, 2 Load half-word into A ldb 1, 2 Load byte into A - ldx 3, 4, 5, 10 Load word into X + ldx 3, 4, 5, 12 Load word into X ldxi 4 Load word into X ldxb 5 Load byte into X @@ -216,14 +216,14 @@ opcodes as defined in linux/filter.h stand for: jmp 6 Jump to label ja 6 Jump to label - jeq 7, 8 Jump on A == k - jneq 8 Jump on A != k - jne 8 Jump on A != k - jlt 8 Jump on A < k - jle 8 Jump on A <= k - jgt 7, 8 Jump on A > k - jge 7, 8 Jump on A >= k - jset 7, 8 Jump on A & k + jeq 7, 8, 9, 10 Jump on A == + jneq 9, 10 Jump on A != + jne 9, 10 Jump on A != + jlt 9, 10 Jump on A < + jle 9, 10 Jump on A <= + jgt 7, 8, 9, 10 Jump on A > + jge 7, 8, 9, 10 Jump on A >= + jset 7, 8, 9, 10 Jump on A & add 0, 4 A + sub 0, 4 A - @@ -240,7 +240,7 @@ opcodes as defined in linux/filter.h stand for: tax Copy A into X txa Copy X into A - ret 4, 9 Return + ret 4, 11 Return The next table shows addressing formats from the 2nd column: @@ -254,9 +254,11 @@ The next table shows addressing formats from the 2nd column: 5 4*([k]&0xf) Lower nibble * 4 at byte offset k in the packet 6 L Jump label L 7 #k,Lt,Lf Jump to Lt if true, otherwise jump to Lf - 8 #k,Lt Jump to Lt if predicate is true - 9 a/%a Accumulator A - 10 extension BPF extension + 8 x/%x,Lt,Lf Jump to Lt if true, otherwise jump to Lf + 9 #k,Lt Jump to Lt if predicate is true + 10 x/%x,Lt Jump to Lt if predicate is true + 11 a/%a Accumulator A + 12 extension BPF extension The Linux kernel also has a couple of BPF extensions that are used along with the class of load instructions by "overloading" the k argument with -- cgit v1.2.3