diff options
Diffstat (limited to 'tools')
143 files changed, 13090 insertions, 838 deletions
diff --git a/tools/arch/arm64/include/asm/barrier.h b/tools/arch/arm64/include/asm/barrier.h index 12835ea0e417..378c051fa177 100644 --- a/tools/arch/arm64/include/asm/barrier.h +++ b/tools/arch/arm64/include/asm/barrier.h @@ -14,74 +14,75 @@ #define wmb() asm volatile("dmb ishst" ::: "memory") #define rmb() asm volatile("dmb ishld" ::: "memory") -#define smp_store_release(p, v) \ -do { \ - union { typeof(*p) __val; char __c[1]; } __u = \ - { .__val = (__force typeof(*p)) (v) }; \ - \ - switch (sizeof(*p)) { \ - case 1: \ - asm volatile ("stlrb %w1, %0" \ - : "=Q" (*p) \ - : "r" (*(__u8 *)__u.__c) \ - : "memory"); \ - break; \ - case 2: \ - asm volatile ("stlrh %w1, %0" \ - : "=Q" (*p) \ - : "r" (*(__u16 *)__u.__c) \ - : "memory"); \ - break; \ - case 4: \ - asm volatile ("stlr %w1, %0" \ - : "=Q" (*p) \ - : "r" (*(__u32 *)__u.__c) \ - : "memory"); \ - break; \ - case 8: \ - asm volatile ("stlr %1, %0" \ - : "=Q" (*p) \ - : "r" (*(__u64 *)__u.__c) \ - : "memory"); \ - break; \ - default: \ - /* Only to shut up gcc ... */ \ - mb(); \ - break; \ - } \ +#define smp_store_release(p, v) \ +do { \ + union { typeof(*p) __val; char __c[1]; } __u = \ + { .__val = (v) }; \ + \ + switch (sizeof(*p)) { \ + case 1: \ + asm volatile ("stlrb %w1, %0" \ + : "=Q" (*p) \ + : "r" (*(__u8_alias_t *)__u.__c) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile ("stlrh %w1, %0" \ + : "=Q" (*p) \ + : "r" (*(__u16_alias_t *)__u.__c) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile ("stlr %w1, %0" \ + : "=Q" (*p) \ + : "r" (*(__u32_alias_t *)__u.__c) \ + : "memory"); \ + break; \ + case 8: \ + asm volatile ("stlr %1, %0" \ + : "=Q" (*p) \ + : "r" (*(__u64_alias_t *)__u.__c) \ + : "memory"); \ + break; \ + default: \ + /* Only to shut up gcc ... */ \ + mb(); \ + break; \ + } \ } while (0) -#define smp_load_acquire(p) \ -({ \ - union { typeof(*p) __val; char __c[1]; } __u; \ - \ - switch (sizeof(*p)) { \ - case 1: \ - asm volatile ("ldarb %w0, %1" \ - : "=r" (*(__u8 *)__u.__c) \ - : "Q" (*p) : "memory"); \ - break; \ - case 2: \ - asm volatile ("ldarh %w0, %1" \ - : "=r" (*(__u16 *)__u.__c) \ - : "Q" (*p) : "memory"); \ - break; \ - case 4: \ - asm volatile ("ldar %w0, %1" \ - : "=r" (*(__u32 *)__u.__c) \ - : "Q" (*p) : "memory"); \ - break; \ - case 8: \ - asm volatile ("ldar %0, %1" \ - : "=r" (*(__u64 *)__u.__c) \ - : "Q" (*p) : "memory"); \ - break; \ - default: \ - /* Only to shut up gcc ... */ \ - mb(); \ - break; \ - } \ - __u.__val; \ +#define smp_load_acquire(p) \ +({ \ + union { typeof(*p) __val; char __c[1]; } __u = \ + { .__c = { 0 } }; \ + \ + switch (sizeof(*p)) { \ + case 1: \ + asm volatile ("ldarb %w0, %1" \ + : "=r" (*(__u8_alias_t *)__u.__c) \ + : "Q" (*p) : "memory"); \ + break; \ + case 2: \ + asm volatile ("ldarh %w0, %1" \ + : "=r" (*(__u16_alias_t *)__u.__c) \ + : "Q" (*p) : "memory"); \ + break; \ + case 4: \ + asm volatile ("ldar %w0, %1" \ + : "=r" (*(__u32_alias_t *)__u.__c) \ + : "Q" (*p) : "memory"); \ + break; \ + case 8: \ + asm volatile ("ldar %0, %1" \ + : "=r" (*(__u64_alias_t *)__u.__c) \ + : "Q" (*p) : "memory"); \ + break; \ + default: \ + /* Only to shut up gcc ... */ \ + mb(); \ + break; \ + } \ + __u.__val; \ }) #endif /* _TOOLS_LINUX_ASM_AARCH64_BARRIER_H */ diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 89a048c2faec..28c4a502b419 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -331,6 +331,8 @@ #define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ #define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ #define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */ +#define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */ +#define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */ /* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */ #define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */ diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst index edbe81534c6d..d07ccf8a23f7 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst @@ -137,4 +137,10 @@ EXAMPLES SEE ALSO ======== - **bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8) + **bpf**\ (2), + **bpf-helpers**\ (7), + **bpftool**\ (8), + **bpftool-prog**\ (8), + **bpftool-map**\ (8), + **bpftool-net**\ (8), + **bpftool-perf**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index f55a2daed59b..5318dcb2085e 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -42,7 +42,8 @@ MAP COMMANDS | | **percpu_array** | **stack_trace** | **cgroup_array** | **lru_hash** | | **lru_percpu_hash** | **lpm_trie** | **array_of_maps** | **hash_of_maps** | | **devmap** | **sockmap** | **cpumap** | **xskmap** | **sockhash** -| | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage** } +| | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage** +| | **queue** | **stack** } DESCRIPTION =========== @@ -171,4 +172,10 @@ The following three commands are equivalent: SEE ALSO ======== - **bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8) + **bpf**\ (2), + **bpf-helpers**\ (7), + **bpftool**\ (8), + **bpftool-prog**\ (8), + **bpftool-cgroup**\ (8), + **bpftool-net**\ (8), + **bpftool-perf**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst index 408ec30d8872..ed87c9b619ad 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-net.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst @@ -136,4 +136,10 @@ EXAMPLES SEE ALSO ======== - **bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8) + **bpf**\ (2), + **bpf-helpers**\ (7), + **bpftool**\ (8), + **bpftool-prog**\ (8), + **bpftool-map**\ (8), + **bpftool-cgroup**\ (8), + **bpftool-perf**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst index e3eb0eab7641..f4c5e5538bb8 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-perf.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst @@ -78,4 +78,10 @@ EXAMPLES SEE ALSO ======== - **bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8) + **bpf**\ (2), + **bpf-helpers**\ (7), + **bpftool**\ (8), + **bpftool-prog**\ (8), + **bpftool-map**\ (8), + **bpftool-cgroup**\ (8), + **bpftool-net**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index ac4e904b10fb..bb1aeb98b6da 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -15,18 +15,20 @@ SYNOPSIS *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } } *COMMANDS* := - { **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load** | **help** } + { **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load** + | **loadall** | **help** } MAP COMMANDS ============= | **bpftool** **prog { show | list }** [*PROG*] -| **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes** | **visual**}] -| **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes**}] +| **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes** | **visual** | **linum**}] +| **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes** | **linum**}] | **bpftool** **prog pin** *PROG* *FILE* -| **bpftool** **prog load** *OBJ* *FILE* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] -| **bpftool** **prog attach** *PROG* *ATTACH_TYPE* *MAP* -| **bpftool** **prog detach** *PROG* *ATTACH_TYPE* *MAP* +| **bpftool** **prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] +| **bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*] +| **bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*] +| **bpftool** **prog tracelog** | **bpftool** **prog help** | | *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } @@ -39,7 +41,9 @@ MAP COMMANDS | **cgroup/bind4** | **cgroup/bind6** | **cgroup/post_bind4** | **cgroup/post_bind6** | | **cgroup/connect4** | **cgroup/connect6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** | } -| *ATTACH_TYPE* := { **msg_verdict** | **skb_verdict** | **skb_parse** } +| *ATTACH_TYPE* := { +| **msg_verdict** | **skb_verdict** | **skb_parse** | **flow_dissector** +| } DESCRIPTION @@ -52,7 +56,7 @@ DESCRIPTION Output will start with program ID followed by program type and zero or more named attributes (depending on kernel version). - **bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** | **visual** }] + **bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** | **visual** | **linum** }] Dump eBPF instructions of the program from the kernel. By default, eBPF will be disassembled and printed to standard output in human-readable format. In this case, **opcodes** @@ -65,13 +69,23 @@ DESCRIPTION built instead, and eBPF instructions will be presented with CFG in DOT format, on standard output. - **bpftool prog dump jited** *PROG* [{ **file** *FILE* | **opcodes** }] + If the prog has line_info available, the source line will + be displayed by default. If **linum** is specified, + the filename, line number and line column will also be + displayed on top of the source line. + + **bpftool prog dump jited** *PROG* [{ **file** *FILE* | **opcodes** | **linum** }] Dump jited image (host machine code) of the program. If *FILE* is specified image will be written to a file, otherwise it will be disassembled and printed to stdout. **opcodes** controls if raw opcodes will be printed. + If the prog has line_info available, the source line will + be displayed by default. If **linum** is specified, + the filename, line number and line column will also be + displayed on top of the source line. + **bpftool prog pin** *PROG* *FILE* Pin program *PROG* as *FILE*. @@ -79,8 +93,11 @@ DESCRIPTION contain a dot character ('.'), which is reserved for future extensions of *bpffs*. - **bpftool prog load** *OBJ* *FILE* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] - Load bpf program from binary *OBJ* and pin as *FILE*. + **bpftool prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*] + Load bpf program(s) from binary *OBJ* and pin as *PATH*. + **bpftool prog load** pins only the first program from the + *OBJ* as *PATH*. **bpftool prog loadall** pins all programs + from the *OBJ* under *PATH* directory. **type** is optional, if not specified program type will be inferred from section names. By default bpftool will create new maps as declared in the ELF @@ -92,18 +109,32 @@ DESCRIPTION use, referring to it by **id** or through a **pinned** file. If **dev** *NAME* is specified program will be loaded onto given networking device (offload). + Optional **pinmaps** argument can be provided to pin all + maps under *MAP_DIR* directory. - Note: *FILE* must be located in *bpffs* mount. It must not + Note: *PATH* must be located in *bpffs* mount. It must not contain a dot character ('.'), which is reserved for future extensions of *bpffs*. - **bpftool prog attach** *PROG* *ATTACH_TYPE* *MAP* - Attach bpf program *PROG* (with type specified by *ATTACH_TYPE*) - to the map *MAP*. - - **bpftool prog detach** *PROG* *ATTACH_TYPE* *MAP* - Detach bpf program *PROG* (with type specified by *ATTACH_TYPE*) - from the map *MAP*. + **bpftool prog attach** *PROG* *ATTACH_TYPE* [*MAP*] + Attach bpf program *PROG* (with type specified by + *ATTACH_TYPE*). Most *ATTACH_TYPEs* require a *MAP* + parameter, with the exception of *flow_dissector* which is + attached to current networking name space. + + **bpftool prog detach** *PROG* *ATTACH_TYPE* [*MAP*] + Detach bpf program *PROG* (with type specified by + *ATTACH_TYPE*). Most *ATTACH_TYPEs* require a *MAP* + parameter, with the exception of *flow_dissector* which is + detached from the current networking name space. + + **bpftool prog tracelog** + Dump the trace pipe of the system to the console (stdout). + Hit <Ctrl+C> to stop printing. BPF programs can write to this + trace pipe at runtime with the **bpf_trace_printk()** helper. + This should be used only for debugging purposes. For + streaming data from BPF programs to user space, one can use + perf events (see also **bpftool-map**\ (8)). **bpftool prog help** Print short help message. @@ -124,7 +155,8 @@ OPTIONS Generate human-readable JSON output. Implies **-j**. -f, --bpffs - Show file names of pinned programs. + When showing BPF programs, show file names of pinned + programs. EXAMPLES ======== @@ -206,4 +238,10 @@ EXAMPLES SEE ALSO ======== - **bpftool**\ (8), **bpftool-map**\ (8), **bpftool-cgroup**\ (8) + **bpf**\ (2), + **bpf-helpers**\ (7), + **bpftool**\ (8), + **bpftool-map**\ (8), + **bpftool-cgroup**\ (8), + **bpftool-net**\ (8), + **bpftool-perf**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index 04cd4f92ab89..129b7a9c0f9b 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -63,5 +63,10 @@ OPTIONS SEE ALSO ======== - **bpftool-map**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8) - **bpftool-perf**\ (8), **bpftool-net**\ (8) + **bpf**\ (2), + **bpf-helpers**\ (7), + **bpftool-prog**\ (8), + **bpftool-map**\ (8), + **bpftool-cgroup**\ (8), + **bpftool-net**\ (8), + **bpftool-perf**\ (8) diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index dac7eff4c7e5..1bea6b979082 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -53,7 +53,7 @@ ifneq ($(EXTRA_LDFLAGS),) LDFLAGS += $(EXTRA_LDFLAGS) endif -LIBS = -lelf -lbfd -lopcodes $(LIBBPF) +LIBS = -lelf $(LIBBPF) INSTALL ?= install RM ?= rm -f @@ -90,7 +90,16 @@ include $(wildcard $(OUTPUT)*.d) all: $(OUTPUT)bpftool -SRCS = $(wildcard *.c) +BFD_SRCS = jit_disasm.c + +SRCS = $(filter-out $(BFD_SRCS),$(wildcard *.c)) + +ifeq ($(feature-libbfd),1) +CFLAGS += -DHAVE_LIBBFD_SUPPORT +SRCS += $(BFD_SRCS) +LIBS += -lbfd -lopcodes +endif + OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 3f78e6404589..a57febd6abb1 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -191,7 +191,7 @@ _bpftool() # Deal with simplest keywords case $prev in - help|hex|opcodes|visual) + help|hex|opcodes|visual|linum) return 0 ;; tag) @@ -243,16 +243,20 @@ _bpftool() # Completion depends on object and command in use case $object in prog) - if [[ $command != "load" ]]; then - case $prev in - id) - _bpftool_get_prog_ids - return 0 - ;; - esac - fi + # Complete id, only for subcommands that use prog (but no map) ids + case $command in + show|list|dump|pin) + case $prev in + id) + _bpftool_get_prog_ids + return 0 + ;; + esac + ;; + esac local PROG_TYPE='id pinned tag' + local MAP_TYPE='id pinned' case $command in show|list) [[ $prev != "$command" ]] && return 0 @@ -274,10 +278,10 @@ _bpftool() *) _bpftool_once_attr 'file' if _bpftool_search_list 'xlated'; then - COMPREPLY+=( $( compgen -W 'opcodes visual' -- \ + COMPREPLY+=( $( compgen -W 'opcodes visual linum' -- \ "$cur" ) ) else - COMPREPLY+=( $( compgen -W 'opcodes' -- \ + COMPREPLY+=( $( compgen -W 'opcodes linum' -- \ "$cur" ) ) fi return 0 @@ -293,23 +297,45 @@ _bpftool() return 0 ;; attach|detach) - if [[ ${#words[@]} == 7 ]]; then - COMPREPLY=( $( compgen -W "id pinned" -- "$cur" ) ) - return 0 - fi - - if [[ ${#words[@]} == 6 ]]; then - COMPREPLY=( $( compgen -W "msg_verdict skb_verdict skb_parse" -- "$cur" ) ) - return 0 - fi - - if [[ $prev == "$command" ]]; then - COMPREPLY=( $( compgen -W "id pinned" -- "$cur" ) ) - return 0 - fi - return 0 + case $cword in + 3) + COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) ) + return 0 + ;; + 4) + case $prev in + id) + _bpftool_get_prog_ids + ;; + pinned) + _filedir + ;; + esac + return 0 + ;; + 5) + COMPREPLY=( $( compgen -W 'msg_verdict skb_verdict \ + skb_parse flow_dissector' -- "$cur" ) ) + return 0 + ;; + 6) + COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) ) + return 0 + ;; + 7) + case $prev in + id) + _bpftool_get_map_ids + ;; + pinned) + _filedir + ;; + esac + return 0 + ;; + esac ;; - load) + load|loadall) local obj if [[ ${#words[@]} -lt 6 ]]; then @@ -338,7 +364,16 @@ _bpftool() case $prev in type) - COMPREPLY=( $( compgen -W "socket kprobe kretprobe classifier action tracepoint raw_tracepoint xdp perf_event cgroup/skb cgroup/sock cgroup/dev lwt_in lwt_out lwt_xmit lwt_seg6local sockops sk_skb sk_msg lirc_mode2 cgroup/bind4 cgroup/bind6 cgroup/connect4 cgroup/connect6 cgroup/sendmsg4 cgroup/sendmsg6 cgroup/post_bind4 cgroup/post_bind6" -- \ + COMPREPLY=( $( compgen -W "socket kprobe \ + kretprobe classifier flow_dissector \ + action tracepoint raw_tracepoint \ + xdp perf_event cgroup/skb cgroup/sock \ + cgroup/dev lwt_in lwt_out lwt_xmit \ + lwt_seg6local sockops sk_skb sk_msg \ + lirc_mode2 cgroup/bind4 cgroup/bind6 \ + cgroup/connect4 cgroup/connect6 \ + cgroup/sendmsg4 cgroup/sendmsg6 \ + cgroup/post_bind4 cgroup/post_bind6" -- \ "$cur" ) ) return 0 ;; @@ -346,7 +381,7 @@ _bpftool() _bpftool_get_map_ids return 0 ;; - pinned) + pinned|pinmaps) _filedir return 0 ;; @@ -358,14 +393,18 @@ _bpftool() COMPREPLY=( $( compgen -W "map" -- "$cur" ) ) _bpftool_once_attr 'type' _bpftool_once_attr 'dev' + _bpftool_once_attr 'pinmaps' return 0 ;; esac ;; + tracelog) + return 0 + ;; *) [[ $prev == $object ]] && \ COMPREPLY=( $( compgen -W 'dump help pin attach detach load \ - show list' -- "$cur" ) ) + show list tracelog' -- "$cur" ) ) ;; esac ;; @@ -400,7 +439,7 @@ _bpftool() lru_percpu_hash lpm_trie array_of_maps \ hash_of_maps devmap sockmap cpumap xskmap \ sockhash cgroup_storage reuseport_sockarray \ - percpu_cgroup_storage' -- \ + percpu_cgroup_storage queue stack' -- \ "$cur" ) ) return 0 ;; diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c index 55bc512a1831..2392ccdc918f 100644 --- a/tools/bpf/bpftool/btf_dumper.c +++ b/tools/bpf/bpftool/btf_dumper.c @@ -32,7 +32,7 @@ static void btf_dumper_ptr(const void *data, json_writer_t *jw, } static int btf_dumper_modifier(const struct btf_dumper *d, __u32 type_id, - const void *data) + __u8 bit_offset, const void *data) { int actual_type_id; @@ -40,7 +40,7 @@ static int btf_dumper_modifier(const struct btf_dumper *d, __u32 type_id, if (actual_type_id < 0) return actual_type_id; - return btf_dumper_do_type(d, actual_type_id, 0, data); + return btf_dumper_do_type(d, actual_type_id, bit_offset, data); } static void btf_dumper_enum(const void *data, json_writer_t *jw) @@ -237,7 +237,7 @@ static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id, case BTF_KIND_VOLATILE: case BTF_KIND_CONST: case BTF_KIND_RESTRICT: - return btf_dumper_modifier(d, type_id, data); + return btf_dumper_modifier(d, type_id, bit_offset, data); default: jsonw_printf(d->jw, "(unsupported-kind"); return -EINVAL; @@ -249,3 +249,203 @@ int btf_dumper_type(const struct btf_dumper *d, __u32 type_id, { return btf_dumper_do_type(d, type_id, 0, data); } + +#define BTF_PRINT_ARG(...) \ + do { \ + pos += snprintf(func_sig + pos, size - pos, \ + __VA_ARGS__); \ + if (pos >= size) \ + return -1; \ + } while (0) +#define BTF_PRINT_TYPE(type) \ + do { \ + pos = __btf_dumper_type_only(btf, type, func_sig, \ + pos, size); \ + if (pos == -1) \ + return -1; \ + } while (0) + +static int btf_dump_func(const struct btf *btf, char *func_sig, + const struct btf_type *func_proto, + const struct btf_type *func, int pos, int size); + +static int __btf_dumper_type_only(const struct btf *btf, __u32 type_id, + char *func_sig, int pos, int size) +{ + const struct btf_type *proto_type; + const struct btf_array *array; + const struct btf_type *t; + + if (!type_id) { + BTF_PRINT_ARG("void "); + return pos; + } + + t = btf__type_by_id(btf, type_id); + + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_INT: + BTF_PRINT_ARG("%s ", btf__name_by_offset(btf, t->name_off)); + break; + case BTF_KIND_STRUCT: + BTF_PRINT_ARG("struct %s ", + btf__name_by_offset(btf, t->name_off)); + break; + case BTF_KIND_UNION: + BTF_PRINT_ARG("union %s ", + btf__name_by_offset(btf, t->name_off)); + break; + case BTF_KIND_ENUM: + BTF_PRINT_ARG("enum %s ", + btf__name_by_offset(btf, t->name_off)); + break; + case BTF_KIND_ARRAY: + array = (struct btf_array *)(t + 1); + BTF_PRINT_TYPE(array->type); + BTF_PRINT_ARG("[%d]", array->nelems); + break; + case BTF_KIND_PTR: + BTF_PRINT_TYPE(t->type); + BTF_PRINT_ARG("* "); + break; + case BTF_KIND_UNKN: + case BTF_KIND_FWD: + case BTF_KIND_TYPEDEF: + return -1; + case BTF_KIND_VOLATILE: + BTF_PRINT_ARG("volatile "); + BTF_PRINT_TYPE(t->type); + break; + case BTF_KIND_CONST: + BTF_PRINT_ARG("const "); + BTF_PRINT_TYPE(t->type); + break; + case BTF_KIND_RESTRICT: + BTF_PRINT_ARG("restrict "); + BTF_PRINT_TYPE(t->type); + break; + case BTF_KIND_FUNC_PROTO: + pos = btf_dump_func(btf, func_sig, t, NULL, pos, size); + if (pos == -1) + return -1; + break; + case BTF_KIND_FUNC: + proto_type = btf__type_by_id(btf, t->type); + pos = btf_dump_func(btf, func_sig, proto_type, t, pos, size); + if (pos == -1) + return -1; + break; + default: + return -1; + } + + return pos; +} + +static int btf_dump_func(const struct btf *btf, char *func_sig, + const struct btf_type *func_proto, + const struct btf_type *func, int pos, int size) +{ + int i, vlen; + + BTF_PRINT_TYPE(func_proto->type); + if (func) + BTF_PRINT_ARG("%s(", btf__name_by_offset(btf, func->name_off)); + else + BTF_PRINT_ARG("("); + vlen = BTF_INFO_VLEN(func_proto->info); + for (i = 0; i < vlen; i++) { + struct btf_param *arg = &((struct btf_param *)(func_proto + 1))[i]; + + if (i) + BTF_PRINT_ARG(", "); + if (arg->type) { + BTF_PRINT_TYPE(arg->type); + BTF_PRINT_ARG("%s", + btf__name_by_offset(btf, arg->name_off)); + } else { + BTF_PRINT_ARG("..."); + } + } + BTF_PRINT_ARG(")"); + + return pos; +} + +void btf_dumper_type_only(const struct btf *btf, __u32 type_id, char *func_sig, + int size) +{ + int err; + + func_sig[0] = '\0'; + if (!btf) + return; + + err = __btf_dumper_type_only(btf, type_id, func_sig, 0, size); + if (err < 0) + func_sig[0] = '\0'; +} + +static const char *ltrim(const char *s) +{ + while (isspace(*s)) + s++; + + return s; +} + +void btf_dump_linfo_plain(const struct btf *btf, + const struct bpf_line_info *linfo, + const char *prefix, bool linum) +{ + const char *line = btf__name_by_offset(btf, linfo->line_off); + + if (!line) + return; + line = ltrim(line); + + if (!prefix) + prefix = ""; + + if (linum) { + const char *file = btf__name_by_offset(btf, linfo->file_name_off); + + /* More forgiving on file because linum option is + * expected to provide more info than the already + * available src line. + */ + if (!file) + file = ""; + + printf("%s%s [file:%s line_num:%u line_col:%u]\n", + prefix, line, file, + BPF_LINE_INFO_LINE_NUM(linfo->line_col), + BPF_LINE_INFO_LINE_COL(linfo->line_col)); + } else { + printf("%s%s\n", prefix, line); + } +} + +void btf_dump_linfo_json(const struct btf *btf, + const struct bpf_line_info *linfo, bool linum) +{ + const char *line = btf__name_by_offset(btf, linfo->line_off); + + if (line) + jsonw_string_field(json_wtr, "src", ltrim(line)); + + if (linum) { + const char *file = btf__name_by_offset(btf, linfo->file_name_off); + + if (file) + jsonw_string_field(json_wtr, "file", file); + + if (BPF_LINE_INFO_LINE_NUM(linfo->line_col)) + jsonw_int_field(json_wtr, "line_num", + BPF_LINE_INFO_LINE_NUM(linfo->line_col)); + + if (BPF_LINE_INFO_LINE_COL(linfo->line_col)) + jsonw_int_field(json_wtr, "line_col", + BPF_LINE_INFO_LINE_COL(linfo->line_col)); + } +} diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 25af85304ebe..172d3761d9ab 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -46,8 +46,8 @@ #include <linux/magic.h> #include <net/if.h> #include <sys/mount.h> +#include <sys/resource.h> #include <sys/stat.h> -#include <sys/types.h> #include <sys/vfs.h> #include <bpf.h> @@ -99,6 +99,13 @@ static bool is_bpffs(char *path) return (unsigned long)st_fs.f_type == BPF_FS_MAGIC; } +void set_max_rlimit(void) +{ + struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; + + setrlimit(RLIMIT_MEMLOCK, &rinf); +} + static int mnt_bpffs(const char *target, char *buff, size_t bufflen) { bool bind_done = false; @@ -130,16 +137,17 @@ static int mnt_bpffs(const char *target, char *buff, size_t bufflen) return 0; } -int open_obj_pinned(char *path) +int open_obj_pinned(char *path, bool quiet) { int fd; fd = bpf_obj_get(path); if (fd < 0) { - p_err("bpf obj get (%s): %s", path, - errno == EACCES && !is_bpffs(dirname(path)) ? - "directory not in bpf file system (bpffs)" : - strerror(errno)); + if (!quiet) + p_err("bpf obj get (%s): %s", path, + errno == EACCES && !is_bpffs(dirname(path)) ? + "directory not in bpf file system (bpffs)" : + strerror(errno)); return -1; } @@ -151,7 +159,7 @@ int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type) enum bpf_obj_type type; int fd; - fd = open_obj_pinned(path); + fd = open_obj_pinned(path, false); if (fd < 0) return -1; @@ -169,34 +177,23 @@ int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type) return fd; } -int do_pin_fd(int fd, const char *name) +int mount_bpffs_for_pin(const char *name) { char err_str[ERR_MAX_LEN]; char *file; char *dir; int err = 0; - err = bpf_obj_pin(fd, name); - if (!err) - goto out; - file = malloc(strlen(name) + 1); strcpy(file, name); dir = dirname(file); - if (errno != EPERM || is_bpffs(dir)) { - p_err("can't pin the object (%s): %s", name, strerror(errno)); + if (is_bpffs(dir)) + /* nothing to do if already mounted */ goto out_free; - } - /* Attempt to mount bpffs, then retry pinning. */ err = mnt_bpffs(dir, err_str, ERR_MAX_LEN); - if (!err) { - err = bpf_obj_pin(fd, name); - if (err) - p_err("can't pin the object (%s): %s", name, - strerror(errno)); - } else { + if (err) { err_str[ERR_MAX_LEN - 1] = '\0'; p_err("can't mount BPF file system to pin the object (%s): %s", name, err_str); @@ -204,10 +201,20 @@ int do_pin_fd(int fd, const char *name) out_free: free(file); -out: return err; } +int do_pin_fd(int fd, const char *name) +{ + int err; + + err = mount_bpffs_for_pin(name); + if (err) + return err; + + return bpf_obj_pin(fd, name); +} + int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)) { unsigned int id; @@ -268,7 +275,7 @@ int get_fd_type(int fd) char buf[512]; ssize_t n; - snprintf(path, sizeof(path), "/proc/%d/fd/%d", getpid(), fd); + snprintf(path, sizeof(path), "/proc/self/fd/%d", fd); n = readlink(path, buf, sizeof(buf)); if (n < 0) { @@ -296,7 +303,7 @@ char *get_fdinfo(int fd, const char *key) ssize_t n; FILE *fdi; - snprintf(path, sizeof(path), "/proc/%d/fdinfo/%d", getpid(), fd); + snprintf(path, sizeof(path), "/proc/self/fdinfo/%d", fd); fdi = fopen(path, "r"); if (!fdi) { @@ -304,7 +311,7 @@ char *get_fdinfo(int fd, const char *key) return NULL; } - while ((n = getline(&line, &line_n, fdi))) { + while ((n = getline(&line, &line_n, fdi)) > 0) { char *value; int len; @@ -384,7 +391,7 @@ int build_pinned_obj_table(struct pinned_obj_table *tab, while ((ftse = fts_read(fts))) { if (!(ftse->fts_info & FTS_F)) continue; - fd = open_obj_pinned(ftse->fts_path); + fd = open_obj_pinned(ftse->fts_path, true); if (fd < 0) continue; @@ -597,7 +604,7 @@ void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode) if (!ifindex) return; - printf(" dev "); + printf(" offloaded_to "); if (ifindex_to_name_ns(ifindex, ns_dev, ns_inode, name)) printf("%s", name); else diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c index c75ffd9ce2bb..f381f8628ce9 100644 --- a/tools/bpf/bpftool/jit_disasm.c +++ b/tools/bpf/bpftool/jit_disasm.c @@ -19,29 +19,21 @@ #include <string.h> #include <bfd.h> #include <dis-asm.h> -#include <sys/types.h> #include <sys/stat.h> #include <limits.h> +#include <libbpf.h> #include "json_writer.h" #include "main.h" static void get_exec_path(char *tpath, size_t size) { + const char *path = "/proc/self/exe"; ssize_t len; - char *path; - - snprintf(tpath, size, "/proc/%d/exe", (int) getpid()); - tpath[size - 1] = 0; - - path = strdup(tpath); - assert(path); len = readlink(path, tpath, size - 1); assert(len > 0); tpath[len] = 0; - - free(path); } static int oper_count; @@ -77,10 +69,16 @@ static int fprintf_json(void *out, const char *fmt, ...) } void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, - const char *arch, const char *disassembler_options) + const char *arch, const char *disassembler_options, + const struct btf *btf, + const struct bpf_prog_linfo *prog_linfo, + __u64 func_ksym, unsigned int func_idx, + bool linum) { + const struct bpf_line_info *linfo = NULL; disassembler_ftype disassemble; struct disassemble_info info; + unsigned int nr_skip = 0; int count, i, pc = 0; char tpath[PATH_MAX]; bfd *bfdf; @@ -109,7 +107,7 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, if (inf) { bfdf->arch_info = inf; } else { - p_err("No libfd support for %s", arch); + p_err("No libbfd support for %s", arch); return; } } @@ -136,12 +134,26 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, if (json_output) jsonw_start_array(json_wtr); do { + if (prog_linfo) { + linfo = bpf_prog_linfo__lfind_addr_func(prog_linfo, + func_ksym + pc, + func_idx, + nr_skip); + if (linfo) + nr_skip++; + } + if (json_output) { jsonw_start_object(json_wtr); oper_count = 0; + if (linfo) + btf_dump_linfo_json(btf, linfo, linum); jsonw_name(json_wtr, "pc"); jsonw_printf(json_wtr, "\"0x%x\"", pc); } else { + if (linfo) + btf_dump_linfo_plain(btf, linfo, "; ", + linum); printf("%4x:\t", pc); } @@ -183,3 +195,9 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, bfd_close(bfdf); } + +int disasm_init(void) +{ + bfd_init(); + return 0; +} diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 75a3296dc0bc..5c4c1cd5a7ba 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -31,7 +31,6 @@ * SOFTWARE. */ -#include <bfd.h> #include <ctype.h> #include <errno.h> #include <getopt.h> @@ -399,8 +398,6 @@ int main(int argc, char **argv) if (argc < 0) usage(); - bfd_init(); - ret = cmd_select(cmds, argc, argv, do_help); if (json_output) diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 28322ace2856..0b37599f8cda 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -78,6 +78,32 @@ #define HELP_SPEC_MAP \ "MAP := { id MAP_ID | pinned FILE }" +static const char * const prog_type_name[] = { + [BPF_PROG_TYPE_UNSPEC] = "unspec", + [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter", + [BPF_PROG_TYPE_KPROBE] = "kprobe", + [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls", + [BPF_PROG_TYPE_SCHED_ACT] = "sched_act", + [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint", + [BPF_PROG_TYPE_XDP] = "xdp", + [BPF_PROG_TYPE_PERF_EVENT] = "perf_event", + [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb", + [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock", + [BPF_PROG_TYPE_LWT_IN] = "lwt_in", + [BPF_PROG_TYPE_LWT_OUT] = "lwt_out", + [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit", + [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops", + [BPF_PROG_TYPE_SK_SKB] = "sk_skb", + [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device", + [BPF_PROG_TYPE_SK_MSG] = "sk_msg", + [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", + [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr", + [BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local", + [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2", + [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport", + [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector", +}; + enum bpf_obj_type { BPF_OBJ_UNKNOWN, BPF_OBJ_PROG, @@ -100,6 +126,8 @@ bool is_prefix(const char *pfx, const char *str); void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep); void usage(void) __noreturn; +void set_max_rlimit(void); + struct pinned_obj_table { DECLARE_HASHTABLE(table, 16); }; @@ -110,6 +138,9 @@ struct pinned_obj { struct hlist_node hash; }; +struct btf; +struct bpf_line_info; + int build_pinned_obj_table(struct pinned_obj_table *table, enum bpf_obj_type type); void delete_pinned_obj_table(struct pinned_obj_table *tab); @@ -127,8 +158,9 @@ int cmd_select(const struct cmd *cmds, int argc, char **argv, int get_fd_type(int fd); const char *get_fd_type_name(enum bpf_obj_type type); char *get_fdinfo(int fd, const char *key); -int open_obj_pinned(char *path); +int open_obj_pinned(char *path, bool quiet); int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type); +int mount_bpffs_for_pin(const char *name); int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)); int do_pin_fd(int fd, const char *name); @@ -138,14 +170,38 @@ int do_event_pipe(int argc, char **argv); int do_cgroup(int argc, char **arg); int do_perf(int argc, char **arg); int do_net(int argc, char **arg); +int do_tracelog(int argc, char **arg); int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what); int prog_parse_fd(int *argc, char ***argv); int map_parse_fd(int *argc, char ***argv); int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len); +#ifdef HAVE_LIBBFD_SUPPORT +struct bpf_prog_linfo; +void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, + const char *arch, const char *disassembler_options, + const struct btf *btf, + const struct bpf_prog_linfo *prog_linfo, + __u64 func_ksym, unsigned int func_idx, + bool linum); +int disasm_init(void); +#else +static inline void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, - const char *arch, const char *disassembler_options); + const char *arch, const char *disassembler_options, + const struct btf *btf, + const struct bpf_prog_linfo *prog_linfo, + __u64 func_ksym, unsigned int func_idx, + bool linum) +{ +} +static inline int disasm_init(void) +{ + p_err("No libbfd support"); + return -1; +} +#endif void print_data_json(uint8_t *data, size_t len); void print_hex_data_json(uint8_t *data, size_t len); @@ -170,6 +226,14 @@ struct btf_dumper { */ int btf_dumper_type(const struct btf_dumper *d, __u32 type_id, const void *data); +void btf_dumper_type_only(const struct btf *btf, __u32 func_type_id, + char *func_only, int size); + +void btf_dump_linfo_plain(const struct btf *btf, + const struct bpf_line_info *linfo, + const char *prefix, bool linum); +void btf_dump_linfo_json(const struct btf *btf, + const struct bpf_line_info *linfo, bool linum); struct nlattr; struct ifinfomsg; diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 7bf38f0e152e..8469ea6cf1c8 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -52,28 +52,30 @@ #include "main.h" static const char * const map_type_name[] = { - [BPF_MAP_TYPE_UNSPEC] = "unspec", - [BPF_MAP_TYPE_HASH] = "hash", - [BPF_MAP_TYPE_ARRAY] = "array", - [BPF_MAP_TYPE_PROG_ARRAY] = "prog_array", - [BPF_MAP_TYPE_PERF_EVENT_ARRAY] = "perf_event_array", - [BPF_MAP_TYPE_PERCPU_HASH] = "percpu_hash", - [BPF_MAP_TYPE_PERCPU_ARRAY] = "percpu_array", - [BPF_MAP_TYPE_STACK_TRACE] = "stack_trace", - [BPF_MAP_TYPE_CGROUP_ARRAY] = "cgroup_array", - [BPF_MAP_TYPE_LRU_HASH] = "lru_hash", - [BPF_MAP_TYPE_LRU_PERCPU_HASH] = "lru_percpu_hash", - [BPF_MAP_TYPE_LPM_TRIE] = "lpm_trie", - [BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps", - [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps", - [BPF_MAP_TYPE_DEVMAP] = "devmap", - [BPF_MAP_TYPE_SOCKMAP] = "sockmap", - [BPF_MAP_TYPE_CPUMAP] = "cpumap", - [BPF_MAP_TYPE_XSKMAP] = "xskmap", - [BPF_MAP_TYPE_SOCKHASH] = "sockhash", - [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage", - [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray", + [BPF_MAP_TYPE_UNSPEC] = "unspec", + [BPF_MAP_TYPE_HASH] = "hash", + [BPF_MAP_TYPE_ARRAY] = "array", + [BPF_MAP_TYPE_PROG_ARRAY] = "prog_array", + [BPF_MAP_TYPE_PERF_EVENT_ARRAY] = "perf_event_array", + [BPF_MAP_TYPE_PERCPU_HASH] = "percpu_hash", + [BPF_MAP_TYPE_PERCPU_ARRAY] = "percpu_array", + [BPF_MAP_TYPE_STACK_TRACE] = "stack_trace", + [BPF_MAP_TYPE_CGROUP_ARRAY] = "cgroup_array", + [BPF_MAP_TYPE_LRU_HASH] = "lru_hash", + [BPF_MAP_TYPE_LRU_PERCPU_HASH] = "lru_percpu_hash", + [BPF_MAP_TYPE_LPM_TRIE] = "lpm_trie", + [BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps", + [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps", + [BPF_MAP_TYPE_DEVMAP] = "devmap", + [BPF_MAP_TYPE_SOCKMAP] = "sockmap", + [BPF_MAP_TYPE_CPUMAP] = "cpumap", + [BPF_MAP_TYPE_XSKMAP] = "xskmap", + [BPF_MAP_TYPE_SOCKHASH] = "sockhash", + [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage", + [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray", [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE] = "percpu_cgroup_storage", + [BPF_MAP_TYPE_QUEUE] = "queue", + [BPF_MAP_TYPE_STACK] = "stack", }; static bool map_is_per_cpu(__u32 type) @@ -215,70 +217,6 @@ err_end_obj: return ret; } -static int get_btf(struct bpf_map_info *map_info, struct btf **btf) -{ - struct bpf_btf_info btf_info = { 0 }; - __u32 len = sizeof(btf_info); - __u32 last_size; - int btf_fd; - void *ptr; - int err; - - err = 0; - *btf = NULL; - btf_fd = bpf_btf_get_fd_by_id(map_info->btf_id); - if (btf_fd < 0) - return 0; - - /* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so - * let's start with a sane default - 4KiB here - and resize it only if - * bpf_obj_get_info_by_fd() needs a bigger buffer. - */ - btf_info.btf_size = 4096; - last_size = btf_info.btf_size; - ptr = malloc(last_size); - if (!ptr) { - err = -ENOMEM; - goto exit_free; - } - - bzero(ptr, last_size); - btf_info.btf = ptr_to_u64(ptr); - err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len); - - if (!err && btf_info.btf_size > last_size) { - void *temp_ptr; - - last_size = btf_info.btf_size; - temp_ptr = realloc(ptr, last_size); - if (!temp_ptr) { - err = -ENOMEM; - goto exit_free; - } - ptr = temp_ptr; - bzero(ptr, last_size); - btf_info.btf = ptr_to_u64(ptr); - err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len); - } - - if (err || btf_info.btf_size > last_size) { - err = errno; - goto exit_free; - } - - *btf = btf__new((__u8 *)btf_info.btf, btf_info.btf_size, NULL); - if (IS_ERR(*btf)) { - err = PTR_ERR(*btf); - *btf = NULL; - } - -exit_free: - close(btf_fd); - free(ptr); - - return err; -} - static json_writer_t *get_btf_writer(void) { json_writer_t *jw = jsonw_new(stdout); @@ -383,7 +321,10 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key, printf(single_line ? " " : "\n"); printf("value:%c", break_names ? '\n' : ' '); - fprint_hex(stdout, value, info->value_size, " "); + if (value) + fprint_hex(stdout, value, info->value_size, " "); + else + printf("<no entry>"); printf("\n"); } else { @@ -398,8 +339,11 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key, for (i = 0; i < n; i++) { printf("value (CPU %02d):%c", i, info->value_size > 16 ? '\n' : ' '); - fprint_hex(stdout, value + i * step, - info->value_size, " "); + if (value) + fprint_hex(stdout, value + i * step, + info->value_size, " "); + else + printf("<no entry>"); printf("\n"); } } @@ -543,7 +487,6 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) char *memlock; memlock = get_fdinfo(fd, "memlock"); - close(fd); jsonw_start_object(json_wtr); @@ -570,6 +513,30 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) jsonw_int_field(json_wtr, "bytes_memlock", atoi(memlock)); free(memlock); + if (info->type == BPF_MAP_TYPE_PROG_ARRAY) { + char *owner_prog_type = get_fdinfo(fd, "owner_prog_type"); + char *owner_jited = get_fdinfo(fd, "owner_jited"); + + if (owner_prog_type) { + unsigned int prog_type = atoi(owner_prog_type); + + if (prog_type < ARRAY_SIZE(prog_type_name)) + jsonw_string_field(json_wtr, "owner_prog_type", + prog_type_name[prog_type]); + else + jsonw_uint_field(json_wtr, "owner_prog_type", + prog_type); + } + if (atoi(owner_jited)) + jsonw_bool_field(json_wtr, "owner_jited", true); + else + jsonw_bool_field(json_wtr, "owner_jited", false); + + free(owner_prog_type); + free(owner_jited); + } + close(fd); + if (!hash_empty(map_table.table)) { struct pinned_obj *obj; @@ -592,7 +559,6 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) char *memlock; memlock = get_fdinfo(fd, "memlock"); - close(fd); printf("%u: ", info->id); if (info->type < ARRAY_SIZE(map_type_name)) @@ -613,6 +579,30 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) printf(" memlock %sB", memlock); free(memlock); + if (info->type == BPF_MAP_TYPE_PROG_ARRAY) { + char *owner_prog_type = get_fdinfo(fd, "owner_prog_type"); + char *owner_jited = get_fdinfo(fd, "owner_jited"); + + printf("\n\t"); + if (owner_prog_type) { + unsigned int prog_type = atoi(owner_prog_type); + + if (prog_type < ARRAY_SIZE(prog_type_name)) + printf("owner_prog_type %s ", + prog_type_name[prog_type]); + else + printf("owner_prog_type %d ", prog_type); + } + if (atoi(owner_jited)) + printf("owner jited"); + else + printf("owner not jited"); + + free(owner_prog_type); + free(owner_jited); + } + close(fd); + printf("\n"); if (!hash_empty(map_table.table)) { struct pinned_obj *obj; @@ -731,7 +721,11 @@ static int dump_map_elem(int fd, void *key, void *value, jsonw_string_field(json_wtr, "error", strerror(lookup_errno)); jsonw_end_object(json_wtr); } else { - print_entry_error(map_info, key, strerror(lookup_errno)); + if (errno == ENOENT) + print_entry_plain(map_info, key, NULL); + else + print_entry_error(map_info, key, + strerror(lookup_errno)); } return 0; @@ -765,7 +759,7 @@ static int do_dump(int argc, char **argv) prev_key = NULL; - err = get_btf(&info, &btf); + err = btf__get_from_id(info.btf_id, &btf); if (err) { p_err("failed to get btf"); goto exit_free; @@ -909,7 +903,7 @@ static int do_lookup(int argc, char **argv) } /* here means bpf_map_lookup_elem() succeeded */ - err = get_btf(&info, &btf); + err = btf__get_from_id(info.btf_id, &btf); if (err) { p_err("failed to get btf"); goto exit_free; @@ -1140,6 +1134,8 @@ static int do_create(int argc, char **argv) return -1; } + set_max_rlimit(); + fd = bpf_create_map_xattr(&attr); if (fd < 0) { p_err("map create failed: %s", strerror(errno)); diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 5302ee282409..b73b4e473948 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -47,40 +47,18 @@ #include <linux/err.h> #include <bpf.h> +#include <btf.h> #include <libbpf.h> #include "cfg.h" #include "main.h" #include "xlated_dumper.h" -static const char * const prog_type_name[] = { - [BPF_PROG_TYPE_UNSPEC] = "unspec", - [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter", - [BPF_PROG_TYPE_KPROBE] = "kprobe", - [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls", - [BPF_PROG_TYPE_SCHED_ACT] = "sched_act", - [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint", - [BPF_PROG_TYPE_XDP] = "xdp", - [BPF_PROG_TYPE_PERF_EVENT] = "perf_event", - [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb", - [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock", - [BPF_PROG_TYPE_LWT_IN] = "lwt_in", - [BPF_PROG_TYPE_LWT_OUT] = "lwt_out", - [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit", - [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops", - [BPF_PROG_TYPE_SK_SKB] = "sk_skb", - [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device", - [BPF_PROG_TYPE_SK_MSG] = "sk_msg", - [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", - [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr", - [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2", - [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector", -}; - static const char * const attach_type_strings[] = { [BPF_SK_SKB_STREAM_PARSER] = "stream_parser", [BPF_SK_SKB_STREAM_VERDICT] = "stream_verdict", [BPF_SK_MSG_VERDICT] = "msg_verdict", + [BPF_FLOW_DISSECTOR] = "flow_dissector", [__MAX_BPF_ATTACH_TYPE] = NULL, }; @@ -357,10 +335,9 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd) if (!hash_empty(prog_table.table)) { struct pinned_obj *obj; - printf("\n"); hash_for_each_possible(prog_table.table, obj, hash, info->id) { if (obj->id == info->id) - printf("\tpinned %s\n", obj->path); + printf("\n\tpinned %s", obj->path); } } @@ -446,6 +423,10 @@ static int do_show(int argc, char **argv) static int do_dump(int argc, char **argv) { + unsigned int finfo_rec_size, linfo_rec_size, jited_linfo_rec_size; + void *func_info = NULL, *linfo = NULL, *jited_linfo = NULL; + unsigned int nr_finfo, nr_linfo = 0, nr_jited_linfo = 0; + struct bpf_prog_linfo *prog_linfo = NULL; unsigned long *func_ksyms = NULL; struct bpf_prog_info info = {}; unsigned int *func_lens = NULL; @@ -454,11 +435,14 @@ static int do_dump(int argc, char **argv) unsigned int nr_func_lens; struct dump_data dd = {}; __u32 len = sizeof(info); + struct btf *btf = NULL; unsigned int buf_size; char *filepath = NULL; bool opcodes = false; bool visual = false; + char func_sig[1024]; unsigned char *buf; + bool linum = false; __u32 *member_len; __u64 *member_ptr; ssize_t n; @@ -466,6 +450,9 @@ static int do_dump(int argc, char **argv) int fd; if (is_prefix(*argv, "jited")) { + if (disasm_init()) + return -1; + member_len = &info.jited_prog_len; member_ptr = &info.jited_prog_insns; } else if (is_prefix(*argv, "xlated")) { @@ -499,6 +486,9 @@ static int do_dump(int argc, char **argv) } else if (is_prefix(*argv, "visual")) { visual = true; NEXT_ARG(); + } else if (is_prefix(*argv, "linum")) { + linum = true; + NEXT_ARG(); } if (argc) { @@ -547,6 +537,43 @@ static int do_dump(int argc, char **argv) } } + nr_finfo = info.nr_func_info; + finfo_rec_size = info.func_info_rec_size; + if (nr_finfo && finfo_rec_size) { + func_info = malloc(nr_finfo * finfo_rec_size); + if (!func_info) { + p_err("mem alloc failed"); + close(fd); + goto err_free; + } + } + + linfo_rec_size = info.line_info_rec_size; + if (info.nr_line_info && linfo_rec_size && info.btf_id) { + nr_linfo = info.nr_line_info; + linfo = malloc(nr_linfo * linfo_rec_size); + if (!linfo) { + p_err("mem alloc failed"); + close(fd); + goto err_free; + } + } + + jited_linfo_rec_size = info.jited_line_info_rec_size; + if (info.nr_jited_line_info && + jited_linfo_rec_size && + info.nr_jited_ksyms && + info.nr_jited_func_lens && + info.btf_id) { + nr_jited_linfo = info.nr_jited_line_info; + jited_linfo = malloc(nr_jited_linfo * jited_linfo_rec_size); + if (!jited_linfo) { + p_err("mem alloc failed"); + close(fd); + goto err_free; + } + } + memset(&info, 0, sizeof(info)); *member_ptr = ptr_to_u64(buf); @@ -555,6 +582,15 @@ static int do_dump(int argc, char **argv) info.nr_jited_ksyms = nr_func_ksyms; info.jited_func_lens = ptr_to_u64(func_lens); info.nr_jited_func_lens = nr_func_lens; + info.nr_func_info = nr_finfo; + info.func_info_rec_size = finfo_rec_size; + info.func_info = ptr_to_u64(func_info); + info.nr_line_info = nr_linfo; + info.line_info_rec_size = linfo_rec_size; + info.line_info = ptr_to_u64(linfo); + info.nr_jited_line_info = nr_jited_linfo; + info.jited_line_info_rec_size = jited_linfo_rec_size; + info.jited_line_info = ptr_to_u64(jited_linfo); err = bpf_obj_get_info_by_fd(fd, &info, &len); close(fd); @@ -578,6 +614,49 @@ static int do_dump(int argc, char **argv) goto err_free; } + if (info.nr_func_info != nr_finfo) { + p_err("incorrect nr_func_info %d vs. expected %d", + info.nr_func_info, nr_finfo); + goto err_free; + } + + if (info.func_info_rec_size != finfo_rec_size) { + p_err("incorrect func_info_rec_size %d vs. expected %d", + info.func_info_rec_size, finfo_rec_size); + goto err_free; + } + + if (func_info && !info.func_info) { + /* kernel.kptr_restrict is set. No func_info available. */ + free(func_info); + func_info = NULL; + nr_finfo = 0; + } + + if (linfo && info.nr_line_info != nr_linfo) { + p_err("incorrect nr_line_info %u vs. expected %u", + info.nr_line_info, nr_linfo); + goto err_free; + } + + if (info.line_info_rec_size != linfo_rec_size) { + p_err("incorrect line_info_rec_size %u vs. expected %u", + info.line_info_rec_size, linfo_rec_size); + goto err_free; + } + + if (jited_linfo && info.nr_jited_line_info != nr_jited_linfo) { + p_err("incorrect nr_jited_line_info %u vs. expected %u", + info.nr_jited_line_info, nr_jited_linfo); + goto err_free; + } + + if (info.jited_line_info_rec_size != jited_linfo_rec_size) { + p_err("incorrect jited_line_info_rec_size %u vs. expected %u", + info.jited_line_info_rec_size, jited_linfo_rec_size); + goto err_free; + } + if ((member_len == &info.jited_prog_len && info.jited_prog_insns == 0) || (member_len == &info.xlated_prog_len && @@ -586,6 +665,17 @@ static int do_dump(int argc, char **argv) goto err_free; } + if (info.btf_id && btf__get_from_id(info.btf_id, &btf)) { + p_err("failed to get btf"); + goto err_free; + } + + if (nr_linfo) { + prog_linfo = bpf_prog_linfo__new(&info); + if (!prog_linfo) + p_info("error in processing bpf_line_info. continue without it."); + } + if (filepath) { fd = open(filepath, O_WRONLY | O_CREAT | O_TRUNC, 0600); if (fd < 0) { @@ -618,6 +708,7 @@ static int do_dump(int argc, char **argv) if (info.nr_jited_func_lens && info.jited_func_lens) { struct kernel_sym *sym = NULL; + struct bpf_func_info *record; char sym_name[SYM_MAX_NAME]; unsigned char *img = buf; __u64 *ksyms = NULL; @@ -644,17 +735,33 @@ static int do_dump(int argc, char **argv) strcpy(sym_name, "unknown"); } + if (func_info) { + record = func_info + i * finfo_rec_size; + btf_dumper_type_only(btf, record->type_id, + func_sig, + sizeof(func_sig)); + } + if (json_output) { jsonw_start_object(json_wtr); + if (func_info && func_sig[0] != '\0') { + jsonw_name(json_wtr, "proto"); + jsonw_string(json_wtr, func_sig); + } jsonw_name(json_wtr, "name"); jsonw_string(json_wtr, sym_name); jsonw_name(json_wtr, "insns"); } else { + if (func_info && func_sig[0] != '\0') + printf("%s:\n", func_sig); printf("%s:\n", sym_name); } - disasm_print_insn(img, lens[i], opcodes, name, - disasm_opt); + disasm_print_insn(img, lens[i], opcodes, + name, disasm_opt, btf, + prog_linfo, ksyms[i], i, + linum); + img += lens[i]; if (json_output) @@ -667,7 +774,7 @@ static int do_dump(int argc, char **argv) jsonw_end_array(json_wtr); } else { disasm_print_insn(buf, *member_len, opcodes, name, - disasm_opt); + disasm_opt, btf, NULL, 0, 0, false); } } else if (visual) { if (json_output) @@ -678,23 +785,37 @@ static int do_dump(int argc, char **argv) kernel_syms_load(&dd); dd.nr_jited_ksyms = info.nr_jited_ksyms; dd.jited_ksyms = (__u64 *) info.jited_ksyms; + dd.btf = btf; + dd.func_info = func_info; + dd.finfo_rec_size = finfo_rec_size; + dd.prog_linfo = prog_linfo; if (json_output) - dump_xlated_json(&dd, buf, *member_len, opcodes); + dump_xlated_json(&dd, buf, *member_len, opcodes, + linum); else - dump_xlated_plain(&dd, buf, *member_len, opcodes); + dump_xlated_plain(&dd, buf, *member_len, opcodes, + linum); kernel_syms_destroy(&dd); } free(buf); free(func_ksyms); free(func_lens); + free(func_info); + free(linfo); + free(jited_linfo); + bpf_prog_linfo__free(prog_linfo); return 0; err_free: free(buf); free(func_ksyms); free(func_lens); + free(func_info); + free(linfo); + free(jited_linfo); + bpf_prog_linfo__free(prog_linfo); return -1; } @@ -721,30 +842,49 @@ int map_replace_compar(const void *p1, const void *p2) return a->idx - b->idx; } -static int do_attach(int argc, char **argv) +static int parse_attach_detach_args(int argc, char **argv, int *progfd, + enum bpf_attach_type *attach_type, + int *mapfd) { - enum bpf_attach_type attach_type; - int err, mapfd, progfd; - - if (!REQ_ARGS(5)) { - p_err("too few parameters for map attach"); + if (!REQ_ARGS(3)) return -EINVAL; - } - progfd = prog_parse_fd(&argc, &argv); - if (progfd < 0) - return progfd; + *progfd = prog_parse_fd(&argc, &argv); + if (*progfd < 0) + return *progfd; - attach_type = parse_attach_type(*argv); - if (attach_type == __MAX_BPF_ATTACH_TYPE) { - p_err("invalid attach type"); + *attach_type = parse_attach_type(*argv); + if (*attach_type == __MAX_BPF_ATTACH_TYPE) { + p_err("invalid attach/detach type"); return -EINVAL; } + + if (*attach_type == BPF_FLOW_DISSECTOR) { + *mapfd = -1; + return 0; + } + NEXT_ARG(); + if (!REQ_ARGS(2)) + return -EINVAL; - mapfd = map_parse_fd(&argc, &argv); - if (mapfd < 0) - return mapfd; + *mapfd = map_parse_fd(&argc, &argv); + if (*mapfd < 0) + return *mapfd; + + return 0; +} + +static int do_attach(int argc, char **argv) +{ + enum bpf_attach_type attach_type; + int err, progfd; + int mapfd; + + err = parse_attach_detach_args(argc, argv, + &progfd, &attach_type, &mapfd); + if (err) + return err; err = bpf_prog_attach(progfd, mapfd, attach_type, 0); if (err) { @@ -760,27 +900,13 @@ static int do_attach(int argc, char **argv) static int do_detach(int argc, char **argv) { enum bpf_attach_type attach_type; - int err, mapfd, progfd; - - if (!REQ_ARGS(5)) { - p_err("too few parameters for map detach"); - return -EINVAL; - } - - progfd = prog_parse_fd(&argc, &argv); - if (progfd < 0) - return progfd; - - attach_type = parse_attach_type(*argv); - if (attach_type == __MAX_BPF_ATTACH_TYPE) { - p_err("invalid attach type"); - return -EINVAL; - } - NEXT_ARG(); + int err, progfd; + int mapfd; - mapfd = map_parse_fd(&argc, &argv); - if (mapfd < 0) - return mapfd; + err = parse_attach_detach_args(argc, argv, + &progfd, &attach_type, &mapfd); + if (err) + return err; err = bpf_prog_detach2(progfd, mapfd, attach_type); if (err) { @@ -792,15 +918,17 @@ static int do_detach(int argc, char **argv) jsonw_null(json_wtr); return 0; } -static int do_load(int argc, char **argv) + +static int load_with_options(int argc, char **argv, bool first_prog_only) { enum bpf_attach_type expected_attach_type; struct bpf_object_open_attr attr = { .prog_type = BPF_PROG_TYPE_UNSPEC, }; struct map_replace *map_replace = NULL; + struct bpf_program *prog = NULL, *pos; unsigned int old_map_fds = 0; - struct bpf_program *prog; + const char *pinmaps = NULL; struct bpf_object *obj; struct bpf_map *map; const char *pinfile; @@ -845,6 +973,7 @@ static int do_load(int argc, char **argv) } NEXT_ARG(); } else if (is_prefix(*argv, "map")) { + void *new_map_replace; char *endptr, *name; int fd; @@ -878,12 +1007,15 @@ static int do_load(int argc, char **argv) if (fd < 0) goto err_free_reuse_maps; - map_replace = reallocarray(map_replace, old_map_fds + 1, - sizeof(*map_replace)); - if (!map_replace) { + new_map_replace = reallocarray(map_replace, + old_map_fds + 1, + sizeof(*map_replace)); + if (!new_map_replace) { p_err("mem alloc failed"); goto err_free_reuse_maps; } + map_replace = new_map_replace; + map_replace[old_map_fds].idx = idx; map_replace[old_map_fds].name = name; map_replace[old_map_fds].fd = fd; @@ -905,6 +1037,13 @@ static int do_load(int argc, char **argv) goto err_free_reuse_maps; } NEXT_ARG(); + } else if (is_prefix(*argv, "pinmaps")) { + NEXT_ARG(); + + if (!REQ_ARGS(1)) + goto err_free_reuse_maps; + + pinmaps = GET_ARG(); } else { p_err("expected no more arguments, 'type', 'map' or 'dev', got: '%s'?", *argv); @@ -918,26 +1057,25 @@ static int do_load(int argc, char **argv) goto err_free_reuse_maps; } - prog = bpf_program__next(NULL, obj); - if (!prog) { - p_err("object file doesn't contain any bpf program"); - goto err_close_obj; - } + bpf_object__for_each_program(pos, obj) { + enum bpf_prog_type prog_type = attr.prog_type; - bpf_program__set_ifindex(prog, ifindex); - if (attr.prog_type == BPF_PROG_TYPE_UNSPEC) { - const char *sec_name = bpf_program__title(prog, false); + if (attr.prog_type == BPF_PROG_TYPE_UNSPEC) { + const char *sec_name = bpf_program__title(pos, false); - err = libbpf_prog_type_by_name(sec_name, &attr.prog_type, - &expected_attach_type); - if (err < 0) { - p_err("failed to guess program type based on section name %s\n", - sec_name); - goto err_close_obj; + err = libbpf_prog_type_by_name(sec_name, &prog_type, + &expected_attach_type); + if (err < 0) { + p_err("failed to guess program type based on section name %s\n", + sec_name); + goto err_close_obj; + } } + + bpf_program__set_ifindex(pos, ifindex); + bpf_program__set_type(pos, prog_type); + bpf_program__set_expected_attach_type(pos, expected_attach_type); } - bpf_program__set_type(prog, attr.prog_type); - bpf_program__set_expected_attach_type(prog, expected_attach_type); qsort(map_replace, old_map_fds, sizeof(*map_replace), map_replace_compar); @@ -995,15 +1133,47 @@ static int do_load(int argc, char **argv) goto err_close_obj; } + set_max_rlimit(); + err = bpf_object__load(obj); if (err) { p_err("failed to load object file"); goto err_close_obj; } - if (do_pin_fd(bpf_program__fd(prog), pinfile)) + err = mount_bpffs_for_pin(pinfile); + if (err) goto err_close_obj; + if (first_prog_only) { + prog = bpf_program__next(NULL, obj); + if (!prog) { + p_err("object file doesn't contain any bpf program"); + goto err_close_obj; + } + + err = bpf_obj_pin(bpf_program__fd(prog), pinfile); + if (err) { + p_err("failed to pin program %s", + bpf_program__title(prog, false)); + goto err_close_obj; + } + } else { + err = bpf_object__pin_programs(obj, pinfile); + if (err) { + p_err("failed to pin all programs"); + goto err_close_obj; + } + } + + if (pinmaps) { + err = bpf_object__pin_maps(obj, pinmaps); + if (err) { + p_err("failed to pin all maps"); + goto err_unpin; + } + } + if (json_output) jsonw_null(json_wtr); @@ -1014,6 +1184,11 @@ static int do_load(int argc, char **argv) return 0; +err_unpin: + if (first_prog_only) + unlink(pinfile); + else + bpf_object__unpin_programs(obj, pinfile); err_close_obj: bpf_object__close(obj); err_free_reuse_maps: @@ -1023,6 +1198,16 @@ err_free_reuse_maps: return -1; } +static int do_load(int argc, char **argv) +{ + return load_with_options(argc, argv, true); +} + +static int do_loadall(int argc, char **argv) +{ + return load_with_options(argc, argv, false); +} + static int do_help(int argc, char **argv) { if (json_output) { @@ -1032,13 +1217,16 @@ static int do_help(int argc, char **argv) fprintf(stderr, "Usage: %s %s { show | list } [PROG]\n" - " %s %s dump xlated PROG [{ file FILE | opcodes | visual }]\n" - " %s %s dump jited PROG [{ file FILE | opcodes }]\n" + " %s %s dump xlated PROG [{ file FILE | opcodes | visual | linum }]\n" + " %s %s dump jited PROG [{ file FILE | opcodes | linum }]\n" " %s %s pin PROG FILE\n" - " %s %s load OBJ FILE [type TYPE] [dev NAME] \\\n" - " [map { idx IDX | name NAME } MAP]\n" - " %s %s attach PROG ATTACH_TYPE MAP\n" - " %s %s detach PROG ATTACH_TYPE MAP\n" + " %s %s { load | loadall } OBJ PATH \\\n" + " [type TYPE] [dev NAME] \\\n" + " [map { idx IDX | name NAME } MAP]\\\n" + " [pinmaps MAP_DIR]\n" + " %s %s attach PROG ATTACH_TYPE [MAP]\n" + " %s %s detach PROG ATTACH_TYPE [MAP]\n" + " %s %s tracelog\n" " %s %s help\n" "\n" " " HELP_SPEC_MAP "\n" @@ -1047,15 +1235,17 @@ static int do_help(int argc, char **argv) " tracepoint | raw_tracepoint | xdp | perf_event | cgroup/skb |\n" " cgroup/sock | cgroup/dev | lwt_in | lwt_out | lwt_xmit |\n" " lwt_seg6local | sockops | sk_skb | sk_msg | lirc_mode2 |\n" + " sk_reuseport | flow_dissector |\n" " cgroup/bind4 | cgroup/bind6 | cgroup/post_bind4 |\n" " cgroup/post_bind6 | cgroup/connect4 | cgroup/connect6 |\n" " cgroup/sendmsg4 | cgroup/sendmsg6 }\n" - " ATTACH_TYPE := { msg_verdict | skb_verdict | skb_parse }\n" + " ATTACH_TYPE := { msg_verdict | skb_verdict | skb_parse |\n" + " flow_dissector }\n" " " HELP_SPEC_OPTIONS "\n" "", bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], - bin_name, argv[-2], bin_name, argv[-2]); + bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2]); return 0; } @@ -1067,8 +1257,10 @@ static const struct cmd cmds[] = { { "dump", do_dump }, { "pin", do_pin }, { "load", do_load }, + { "loadall", do_loadall }, { "attach", do_attach }, { "detach", do_detach }, + { "tracelog", do_tracelog }, { 0 } }; diff --git a/tools/bpf/bpftool/tracelog.c b/tools/bpf/bpftool/tracelog.c new file mode 100644 index 000000000000..1fa8e513f590 --- /dev/null +++ b/tools/bpf/bpftool/tracelog.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (c) 2015-2017 Daniel Borkmann */ +/* Copyright (c) 2018 Netronome Systems, Inc. */ + +#include <errno.h> +#include <limits.h> +#include <signal.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <linux/magic.h> +#include <sys/fcntl.h> +#include <sys/vfs.h> + +#include "main.h" + +#ifndef TRACEFS_MAGIC +# define TRACEFS_MAGIC 0x74726163 +#endif + +#define _textify(x) #x +#define textify(x) _textify(x) + +FILE *trace_pipe_fd; +char *buff; + +static int validate_tracefs_mnt(const char *mnt, unsigned long magic) +{ + struct statfs st_fs; + + if (statfs(mnt, &st_fs) < 0) + return -ENOENT; + if ((unsigned long)st_fs.f_type != magic) + return -ENOENT; + + return 0; +} + +static bool +find_tracefs_mnt_single(unsigned long magic, char *mnt, const char *mntpt) +{ + size_t src_len; + + if (validate_tracefs_mnt(mntpt, magic)) + return false; + + src_len = strlen(mntpt); + if (src_len + 1 >= PATH_MAX) { + p_err("tracefs mount point name too long"); + return false; + } + + strcpy(mnt, mntpt); + return true; +} + +static bool find_tracefs_pipe(char *mnt) +{ + static const char * const known_mnts[] = { + "/sys/kernel/debug/tracing", + "/sys/kernel/tracing", + "/tracing", + "/trace", + }; + const char *pipe_name = "/trace_pipe"; + const char *fstype = "tracefs"; + char type[100], format[32]; + const char * const *ptr; + bool found = false; + FILE *fp; + + for (ptr = known_mnts; ptr < known_mnts + ARRAY_SIZE(known_mnts); ptr++) + if (find_tracefs_mnt_single(TRACEFS_MAGIC, mnt, *ptr)) + goto exit_found; + + fp = fopen("/proc/mounts", "r"); + if (!fp) + return false; + + /* Allow room for NULL terminating byte and pipe file name */ + snprintf(format, sizeof(format), "%%*s %%%zds %%99s %%*s %%*d %%*d\\n", + PATH_MAX - strlen(pipe_name) - 1); + while (fscanf(fp, format, mnt, type) == 2) + if (strcmp(type, fstype) == 0) { + found = true; + break; + } + fclose(fp); + + /* The string from fscanf() might be truncated, check mnt is valid */ + if (!found || validate_tracefs_mnt(mnt, TRACEFS_MAGIC)) + return false; + +exit_found: + strcat(mnt, pipe_name); + return true; +} + +static void exit_tracelog(int signum) +{ + fclose(trace_pipe_fd); + free(buff); + + if (json_output) { + jsonw_end_array(json_wtr); + jsonw_destroy(&json_wtr); + } + + exit(0); +} + +int do_tracelog(int argc, char **argv) +{ + const struct sigaction act = { + .sa_handler = exit_tracelog + }; + char trace_pipe[PATH_MAX]; + bool found_trace_pipe; + size_t buff_len = 0; + + if (json_output) + jsonw_start_array(json_wtr); + + found_trace_pipe = find_tracefs_pipe(trace_pipe); + if (!found_trace_pipe) { + p_err("could not find trace pipe, tracefs not mounted?"); + return -1; + } + + trace_pipe_fd = fopen(trace_pipe, "r"); + if (!trace_pipe_fd) { + p_err("could not open trace pipe: %s", strerror(errno)); + return -1; + } + + sigaction(SIGHUP, &act, NULL); + sigaction(SIGINT, &act, NULL); + sigaction(SIGTERM, &act, NULL); + while (1) { + ssize_t ret; + + ret = getline(&buff, &buff_len, trace_pipe_fd); + if (ret <= 0) { + p_err("failed to read content from trace pipe: %s", + strerror(errno)); + break; + } + if (json_output) + jsonw_string(json_wtr, buff); + else + printf("%s", buff); + } + + fclose(trace_pipe_fd); + free(buff); + return -1; +} diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c index 3284759df98a..aef628dcccb6 100644 --- a/tools/bpf/bpftool/xlated_dumper.c +++ b/tools/bpf/bpftool/xlated_dumper.c @@ -41,6 +41,7 @@ #include <stdlib.h> #include <string.h> #include <sys/types.h> +#include <libbpf.h> #include "disasm.h" #include "json_writer.h" @@ -234,19 +235,25 @@ static const char *print_imm(void *private_data, } void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len, - bool opcodes) + bool opcodes, bool linum) { + const struct bpf_prog_linfo *prog_linfo = dd->prog_linfo; const struct bpf_insn_cbs cbs = { .cb_print = print_insn_json, .cb_call = print_call, .cb_imm = print_imm, .private_data = dd, }; + struct bpf_func_info *record; struct bpf_insn *insn = buf; + struct btf *btf = dd->btf; bool double_insn = false; + unsigned int nr_skip = 0; + char func_sig[1024]; unsigned int i; jsonw_start_array(json_wtr); + record = dd->func_info; for (i = 0; i < len / sizeof(*insn); i++) { if (double_insn) { double_insn = false; @@ -255,6 +262,30 @@ void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len, double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW); jsonw_start_object(json_wtr); + + if (btf && record) { + if (record->insn_off == i) { + btf_dumper_type_only(btf, record->type_id, + func_sig, + sizeof(func_sig)); + if (func_sig[0] != '\0') { + jsonw_name(json_wtr, "proto"); + jsonw_string(json_wtr, func_sig); + } + record = (void *)record + dd->finfo_rec_size; + } + } + + if (prog_linfo) { + const struct bpf_line_info *linfo; + + linfo = bpf_prog_linfo__lfind(prog_linfo, i, nr_skip); + if (linfo) { + btf_dump_linfo_json(btf, linfo, linum); + nr_skip++; + } + } + jsonw_name(json_wtr, "disasm"); print_bpf_insn(&cbs, insn + i, true); @@ -289,24 +320,52 @@ void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len, } void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len, - bool opcodes) + bool opcodes, bool linum) { + const struct bpf_prog_linfo *prog_linfo = dd->prog_linfo; const struct bpf_insn_cbs cbs = { .cb_print = print_insn, .cb_call = print_call, .cb_imm = print_imm, .private_data = dd, }; + struct bpf_func_info *record; struct bpf_insn *insn = buf; + struct btf *btf = dd->btf; + unsigned int nr_skip = 0; bool double_insn = false; + char func_sig[1024]; unsigned int i; + record = dd->func_info; for (i = 0; i < len / sizeof(*insn); i++) { if (double_insn) { double_insn = false; continue; } + if (btf && record) { + if (record->insn_off == i) { + btf_dumper_type_only(btf, record->type_id, + func_sig, + sizeof(func_sig)); + if (func_sig[0] != '\0') + printf("%s:\n", func_sig); + record = (void *)record + dd->finfo_rec_size; + } + } + + if (prog_linfo) { + const struct bpf_line_info *linfo; + + linfo = bpf_prog_linfo__lfind(prog_linfo, i, nr_skip); + if (linfo) { + btf_dump_linfo_plain(btf, linfo, "; ", + linum); + nr_skip++; + } + } + double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW); printf("% 4d: ", i); diff --git a/tools/bpf/bpftool/xlated_dumper.h b/tools/bpf/bpftool/xlated_dumper.h index 33d86e2b369b..a24f89df8cb2 100644 --- a/tools/bpf/bpftool/xlated_dumper.h +++ b/tools/bpf/bpftool/xlated_dumper.h @@ -40,6 +40,8 @@ #define SYM_MAX_NAME 256 +struct bpf_prog_linfo; + struct kernel_sym { unsigned long address; char name[SYM_MAX_NAME]; @@ -51,6 +53,10 @@ struct dump_data { __u32 sym_count; __u64 *jited_ksyms; __u32 nr_jited_ksyms; + struct btf *btf; + void *func_info; + __u32 finfo_rec_size; + const struct bpf_prog_linfo *prog_linfo; char scratch_buff[SYM_MAX_NAME + 8]; }; @@ -58,9 +64,9 @@ void kernel_syms_load(struct dump_data *dd); void kernel_syms_destroy(struct dump_data *dd); struct kernel_sym *kernel_syms_search(struct dump_data *dd, unsigned long key); void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len, - bool opcodes); + bool opcodes, bool linum); void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len, - bool opcodes); + bool opcodes, bool linum); void dump_xlated_for_graph(struct dump_data *dd, void *buf, void *buf_end, unsigned int start_index); diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index f216b2f5c3d7..d74bb9414d7c 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -33,6 +33,7 @@ FEATURE_TESTS_BASIC := \ dwarf_getlocations \ fortify-source \ sync-compare-and-swap \ + get_current_dir_name \ glibc \ gtk2 \ gtk2-infobar \ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 0516259be70f..304b984f11b9 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -7,6 +7,7 @@ FILES= \ test-dwarf_getlocations.bin \ test-fortify-source.bin \ test-sync-compare-and-swap.bin \ + test-get_current_dir_name.bin \ test-glibc.bin \ test-gtk2.bin \ test-gtk2-infobar.bin \ @@ -101,6 +102,9 @@ $(OUTPUT)test-bionic.bin: $(OUTPUT)test-libelf.bin: $(BUILD) -lelf +$(OUTPUT)test-get_current_dir_name.bin: + $(BUILD) + $(OUTPUT)test-glibc.bin: $(BUILD) diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 8dc20a61341f..56722bfe6bdd 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -34,6 +34,10 @@ # include "test-libelf-mmap.c" #undef main +#define main main_test_get_current_dir_name +# include "test-get_current_dir_name.c" +#undef main + #define main main_test_glibc # include "test-glibc.c" #undef main @@ -174,6 +178,7 @@ int main(int argc, char *argv[]) main_test_hello(); main_test_libelf(); main_test_libelf_mmap(); + main_test_get_current_dir_name(); main_test_glibc(); main_test_dwarf(); main_test_dwarf_getlocations(); diff --git a/tools/build/feature/test-get_current_dir_name.c b/tools/build/feature/test-get_current_dir_name.c new file mode 100644 index 000000000000..573000f93212 --- /dev/null +++ b/tools/build/feature/test-get_current_dir_name.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <unistd.h> +#include <stdlib.h> + +int main(void) +{ + free(get_current_dir_name()); + return 0; +} diff --git a/tools/include/uapi/asm-generic/ioctls.h b/tools/include/uapi/asm-generic/ioctls.h index 040651735662..cdc9f4ca8c27 100644 --- a/tools/include/uapi/asm-generic/ioctls.h +++ b/tools/include/uapi/asm-generic/ioctls.h @@ -79,6 +79,8 @@ #define TIOCGPTLCK _IOR('T', 0x39, int) /* Get Pty lock state */ #define TIOCGEXCL _IOR('T', 0x40, int) /* Get exclusive mode state */ #define TIOCGPTPEER _IO('T', 0x41) /* Safely open the slave */ +#define TIOCGISO7816 _IOR('T', 0x42, struct serial_iso7816) +#define TIOCSISO7816 _IOWR('T', 0x43, struct serial_iso7816) #define FIONCLEX 0x5450 #define FIOCLEX 0x5451 diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index 7f5634ce8e88..a4446f452040 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -529,6 +529,28 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_CS_TIMESTAMP_FREQUENCY 51 +/* + * Once upon a time we supposed that writes through the GGTT would be + * immediately in physical memory (once flushed out of the CPU path). However, + * on a few different processors and chipsets, this is not necessarily the case + * as the writes appear to be buffered internally. Thus a read of the backing + * storage (physical memory) via a different path (with different physical tags + * to the indirect write via the GGTT) will see stale values from before + * the GGTT write. Inside the kernel, we can for the most part keep track of + * the different read/write domains in use (e.g. set-domain), but the assumption + * of coherency is baked into the ABI, hence reporting its true state in this + * parameter. + * + * Reports true when writes via mmap_gtt are immediately visible following an + * lfence to flush the WCB. + * + * Reports false when writes via mmap_gtt are indeterminately delayed in an in + * internal buffer and are _not_ immediately visible to third parties accessing + * directly via mmap_cpu/mmap_wc. Use of mmap_gtt as part of an IPC + * communications channel when reporting false is strongly disadvised. + */ +#define I915_PARAM_MMAP_GTT_COHERENT 52 + typedef struct drm_i915_getparam { __s32 param; /* diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 852dc17ab47a..aa582cd5bfcf 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -232,6 +232,20 @@ enum bpf_attach_type { */ #define BPF_F_STRICT_ALIGNMENT (1U << 0) +/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the + * verifier will allow any alignment whatsoever. On platforms + * with strict alignment requirements for loads ands stores (such + * as sparc and mips) the verifier validates that all loads and + * stores provably follow this requirement. This flag turns that + * checking and enforcement off. + * + * It is mostly used for testing when we want to validate the + * context and memory access aspects of the verifier, but because + * of an unaligned access the alignment check would trigger before + * the one we are interested in. + */ +#define BPF_F_ANY_ALIGNMENT (1U << 1) + /* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */ #define BPF_PSEUDO_MAP_FD 1 @@ -257,9 +271,6 @@ enum bpf_attach_type { /* Specify numa node during map creation */ #define BPF_F_NUMA_NODE (1U << 2) -/* flags for BPF_PROG_QUERY */ -#define BPF_F_QUERY_EFFECTIVE (1U << 0) - #define BPF_OBJ_NAME_LEN 16U /* Flags for accessing BPF object */ @@ -269,6 +280,12 @@ enum bpf_attach_type { /* Flag for stack_map, store build_id+offset instead of pointer */ #define BPF_F_STACK_BUILD_ID (1U << 5) +/* Zero-initialize hash function seed. This should only be used for testing. */ +#define BPF_F_ZERO_SEED (1U << 6) + +/* flags for BPF_PROG_QUERY */ +#define BPF_F_QUERY_EFFECTIVE (1U << 0) + enum bpf_stack_build_id_status { /* user space need an empty entry to identify end of a trace */ BPF_STACK_BUILD_ID_EMPTY = 0, @@ -335,6 +352,13 @@ union bpf_attr { * (context accesses, allowed helpers, etc). */ __u32 expected_attach_type; + __u32 prog_btf_fd; /* fd pointing to BTF type data */ + __u32 func_info_rec_size; /* userspace bpf_func_info size */ + __aligned_u64 func_info; /* func info */ + __u32 func_info_cnt; /* number of bpf_func_info records */ + __u32 line_info_rec_size; /* userspace bpf_line_info size */ + __aligned_u64 line_info; /* line info */ + __u32 line_info_cnt; /* number of bpf_line_info records */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -353,8 +377,11 @@ union bpf_attr { struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ __u32 prog_fd; __u32 retval; - __u32 data_size_in; - __u32 data_size_out; + __u32 data_size_in; /* input: len of data_in */ + __u32 data_size_out; /* input/output: len of data_out + * returns ENOSPC if data_out + * is too small. + */ __aligned_u64 data_in; __aligned_u64 data_out; __u32 repeat; @@ -475,18 +502,6 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_pop_elem(struct bpf_map *map, void *value) - * Description - * Pop an element from *map*. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_map_peek_elem(struct bpf_map *map, void *value) - * Description - * Get an element from *map* without removing it. - * Return - * 0 on success, or a negative error in case of failure. - * * int bpf_probe_read(void *dst, u32 size, const void *src) * Description * For tracing programs, safely attempt to read *size* bytes from @@ -1910,9 +1925,9 @@ union bpf_attr { * is set to metric from route (IPv4/IPv6 only), and ifindex * is set to the device index of the nexthop from the FIB lookup. * - * *plen* argument is the size of the passed in struct. - * *flags* argument can be a combination of one or more of the - * following values: + * *plen* argument is the size of the passed in struct. + * *flags* argument can be a combination of one or more of the + * following values: * * **BPF_FIB_LOOKUP_DIRECT** * Do a direct table lookup vs full lookup using FIB @@ -1921,9 +1936,9 @@ union bpf_attr { * Perform lookup from an egress perspective (default is * ingress). * - * *ctx* is either **struct xdp_md** for XDP programs or - * **struct sk_buff** tc cls_act programs. - * Return + * *ctx* is either **struct xdp_md** for XDP programs or + * **struct sk_buff** tc cls_act programs. + * Return * * < 0 if any input argument is invalid * * 0 on success (packet is forwarded, nexthop neighbor exists) * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the @@ -2068,8 +2083,8 @@ union bpf_attr { * translated to a keycode using the rc keymap, and reported as * an input key down event. After a period a key up event is * generated. This period can be extended by calling either - * **bpf_rc_keydown** () again with the same values, or calling - * **bpf_rc_repeat** (). + * **bpf_rc_keydown**\ () again with the same values, or calling + * **bpf_rc_repeat**\ (). * * Some protocols include a toggle bit, in case the button was * released and pressed again between consecutive scancodes. @@ -2152,29 +2167,30 @@ union bpf_attr { * The *flags* meaning is specific for each map type, * and has to be 0 for cgroup local storage. * - * Depending on the bpf program type, a local storage area - * can be shared between multiple instances of the bpf program, + * Depending on the BPF program type, a local storage area + * can be shared between multiple instances of the BPF program, * running simultaneously. * * A user should care about the synchronization by himself. - * For example, by using the BPF_STX_XADD instruction to alter + * For example, by using the **BPF_STX_XADD** instruction to alter * the shared data. * Return - * Pointer to the local storage area. + * A pointer to the local storage area. * * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) * Description - * Select a SO_REUSEPORT sk from a BPF_MAP_TYPE_REUSEPORT_ARRAY map - * It checks the selected sk is matching the incoming - * request in the skb. + * Select a **SO_REUSEPORT** socket from a + * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*. + * It checks the selected socket is matching the incoming + * request in the socket buffer. * Return * 0 on success, or a negative error in case of failure. * - * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) + * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) * Description * Look for TCP socket matching *tuple*, optionally in a child * network namespace *netns*. The return value must be checked, - * and if non-NULL, released via **bpf_sk_release**\ (). + * and if non-**NULL**, released via **bpf_sk_release**\ (). * * The *ctx* should point to the context of the program, such as * the skb or socket (depending on the hook in use). This is used @@ -2187,12 +2203,14 @@ union bpf_attr { * **sizeof**\ (*tuple*\ **->ipv6**) * Look for an IPv6 socket. * - * If the *netns* is zero, then the socket lookup table in the - * netns associated with the *ctx* will be used. For the TC hooks, - * this in the netns of the device in the skb. For socket hooks, - * this in the netns of the socket. If *netns* is non-zero, then - * it specifies the ID of the netns relative to the netns - * associated with the *ctx*. + * If the *netns* is a negative signed 32-bit integer, then the + * socket lookup table in the netns associated with the *ctx* will + * will be used. For the TC hooks, this is the netns of the device + * in the skb. For socket hooks, this is the netns of the socket. + * If *netns* is any other signed 32-bit value greater than or + * equal to zero then it specifies the ID of the netns relative to + * the netns associated with the *ctx*. *netns* values beyond the + * range of 32-bit integers are reserved for future use. * * All values for *flags* are reserved for future usage, and must * be left at zero. @@ -2200,13 +2218,15 @@ union bpf_attr { * This helper is available only if the kernel was compiled with * **CONFIG_NET** configuration option. * Return - * Pointer to *struct bpf_sock*, or NULL in case of failure. + * Pointer to **struct bpf_sock**, or **NULL** in case of failure. + * For sockets with reuseport option, the **struct bpf_sock** + * result is from **reuse->socks**\ [] using the hash of the tuple. * - * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) + * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) * Description * Look for UDP socket matching *tuple*, optionally in a child * network namespace *netns*. The return value must be checked, - * and if non-NULL, released via **bpf_sk_release**\ (). + * and if non-**NULL**, released via **bpf_sk_release**\ (). * * The *ctx* should point to the context of the program, such as * the skb or socket (depending on the hook in use). This is used @@ -2219,12 +2239,14 @@ union bpf_attr { * **sizeof**\ (*tuple*\ **->ipv6**) * Look for an IPv6 socket. * - * If the *netns* is zero, then the socket lookup table in the - * netns associated with the *ctx* will be used. For the TC hooks, - * this in the netns of the device in the skb. For socket hooks, - * this in the netns of the socket. If *netns* is non-zero, then - * it specifies the ID of the netns relative to the netns - * associated with the *ctx*. + * If the *netns* is a negative signed 32-bit integer, then the + * socket lookup table in the netns associated with the *ctx* will + * will be used. For the TC hooks, this is the netns of the device + * in the skb. For socket hooks, this is the netns of the socket. + * If *netns* is any other signed 32-bit value greater than or + * equal to zero then it specifies the ID of the netns relative to + * the netns associated with the *ctx*. *netns* values beyond the + * range of 32-bit integers are reserved for future use. * * All values for *flags* are reserved for future usage, and must * be left at zero. @@ -2232,31 +2254,71 @@ union bpf_attr { * This helper is available only if the kernel was compiled with * **CONFIG_NET** configuration option. * Return - * Pointer to *struct bpf_sock*, or NULL in case of failure. + * Pointer to **struct bpf_sock**, or **NULL** in case of failure. + * For sockets with reuseport option, the **struct bpf_sock** + * result is from **reuse->socks**\ [] using the hash of the tuple. * - * int bpf_sk_release(struct bpf_sock *sk) + * int bpf_sk_release(struct bpf_sock *sock) * Description - * Release the reference held by *sock*. *sock* must be a non-NULL - * pointer that was returned from bpf_sk_lookup_xxx\ (). + * Release the reference held by *sock*. *sock* must be a + * non-**NULL** pointer that was returned from + * **bpf_sk_lookup_xxx**\ (). * Return * 0 on success, or a negative error in case of failure. * + * int bpf_map_pop_elem(struct bpf_map *map, void *value) + * Description + * Pop an element from *map*. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_map_peek_elem(struct bpf_map *map, void *value) + * Description + * Get an element from *map* without removing it. + * Return + * 0 on success, or a negative error in case of failure. + * * int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags) * Description - * For socket policies, insert *len* bytes into msg at offset + * For socket policies, insert *len* bytes into *msg* at offset * *start*. * * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a - * *msg* it may want to insert metadata or options into the msg. + * *msg* it may want to insert metadata or options into the *msg*. * This can later be read and used by any of the lower layer BPF * hooks. * * This helper may fail if under memory pressure (a malloc * fails) in these cases BPF programs will get an appropriate * error and BPF programs will need to handle them. + * Return + * 0 on success, or a negative error in case of failure. * + * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 pop, u64 flags) + * Description + * Will remove *pop* bytes from a *msg* starting at byte *start*. + * This may result in **ENOMEM** errors under certain situations if + * an allocation and copy are required due to a full ring buffer. + * However, the helper will try to avoid doing the allocation + * if possible. Other errors can occur if input parameters are + * invalid either due to *start* byte not being valid part of *msg* + * payload and/or *pop* value being to large. * Return * 0 on success, or a negative error in case of failure. + * + * int bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y) + * Description + * This helper is used in programs implementing IR decoding, to + * report a successfully decoded pointer movement. + * + * The *ctx* should point to the lirc sample as passed into + * the program. + * + * This helper is only available is the kernel was compiled with + * the **CONFIG_BPF_LIRC_MODE2** configuration option set to + * "**y**". + * Return + * 0 */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2349,7 +2411,9 @@ union bpf_attr { FN(map_push_elem), \ FN(map_pop_elem), \ FN(map_peek_elem), \ - FN(msg_push_data), + FN(msg_push_data), \ + FN(msg_pop_data), \ + FN(rc_pointer_rel), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -2405,6 +2469,9 @@ enum bpf_func_id { /* BPF_FUNC_perf_event_output for sk_buff input context. */ #define BPF_F_CTXLEN_MASK (0xfffffULL << 32) +/* Current network namespace */ +#define BPF_F_CURRENT_NETNS (-1L) + /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { BPF_ADJ_ROOM_NET, @@ -2422,6 +2489,12 @@ enum bpf_lwt_encap_mode { BPF_LWT_ENCAP_SEG6_INLINE }; +#define __bpf_md_ptr(type, name) \ +union { \ + type name; \ + __u64 :64; \ +} __attribute__((aligned(8))) + /* user accessible mirror of in-kernel sk_buff. * new fields can only be added to the end of this structure */ @@ -2456,7 +2529,9 @@ struct __sk_buff { /* ... here. */ __u32 data_meta; - struct bpf_flow_keys *flow_keys; + __bpf_md_ptr(struct bpf_flow_keys *, flow_keys); + __u64 tstamp; + __u32 wire_len; }; struct bpf_tunnel_key { @@ -2572,8 +2647,8 @@ enum sk_action { * be added to the end of this structure */ struct sk_msg_md { - void *data; - void *data_end; + __bpf_md_ptr(void *, data); + __bpf_md_ptr(void *, data_end); __u32 family; __u32 remote_ip4; /* Stored in network byte order */ @@ -2589,8 +2664,9 @@ struct sk_reuseport_md { * Start of directly accessible data. It begins from * the tcp/udp header. */ - void *data; - void *data_end; /* End of directly accessible data */ + __bpf_md_ptr(void *, data); + /* End of directly accessible data */ + __bpf_md_ptr(void *, data_end); /* * Total length of packet (starting from the tcp/udp header). * Note that the directly accessible bytes (data_end - data) @@ -2631,6 +2707,16 @@ struct bpf_prog_info { __u32 nr_jited_func_lens; __aligned_u64 jited_ksyms; __aligned_u64 jited_func_lens; + __u32 btf_id; + __u32 func_info_rec_size; + __aligned_u64 func_info; + __u32 nr_func_info; + __u32 nr_line_info; + __aligned_u64 line_info; + __aligned_u64 jited_line_info; + __u32 nr_jited_line_info; + __u32 line_info_rec_size; + __u32 jited_line_info_rec_size; } __attribute__((aligned(8))); struct bpf_map_info { @@ -2942,4 +3028,19 @@ struct bpf_flow_keys { }; }; +struct bpf_func_info { + __u32 insn_off; + __u32 type_id; +}; + +#define BPF_LINE_INFO_LINE_NUM(line_col) ((line_col) >> 10) +#define BPF_LINE_INFO_LINE_COL(line_col) ((line_col) & 0x3ff) + +struct bpf_line_info { + __u32 insn_off; + __u32 file_name_off; + __u32 line_off; + __u32 line_col; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h index 972265f32871..14f66948fc95 100644 --- a/tools/include/uapi/linux/btf.h +++ b/tools/include/uapi/linux/btf.h @@ -40,7 +40,8 @@ struct btf_type { /* "size" is used by INT, ENUM, STRUCT and UNION. * "size" tells the size of the type it is describing. * - * "type" is used by PTR, TYPEDEF, VOLATILE, CONST and RESTRICT. + * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, + * FUNC and FUNC_PROTO. * "type" is a type_id referring to another type. */ union { @@ -64,8 +65,10 @@ struct btf_type { #define BTF_KIND_VOLATILE 9 /* Volatile */ #define BTF_KIND_CONST 10 /* Const */ #define BTF_KIND_RESTRICT 11 /* Restrict */ -#define BTF_KIND_MAX 11 -#define NR_BTF_KINDS 12 +#define BTF_KIND_FUNC 12 /* Function */ +#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */ +#define BTF_KIND_MAX 13 +#define NR_BTF_KINDS 14 /* For some specific BTF_KIND, "struct btf_type" is immediately * followed by extra data. @@ -110,4 +113,13 @@ struct btf_member { __u32 offset; /* offset in bits */ }; +/* BTF_KIND_FUNC_PROTO is followed by multiple "struct btf_param". + * The exact number of btf_param is stored in the vlen (of the + * info in "struct btf_type"). + */ +struct btf_param { + __u32 name_off; + __u32 type; +}; + #endif /* _UAPI__LINUX_BTF_H__ */ diff --git a/tools/include/uapi/linux/pkt_cls.h b/tools/include/uapi/linux/pkt_cls.h new file mode 100644 index 000000000000..401d0c1e612d --- /dev/null +++ b/tools/include/uapi/linux/pkt_cls.h @@ -0,0 +1,612 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __LINUX_PKT_CLS_H +#define __LINUX_PKT_CLS_H + +#include <linux/types.h> +#include <linux/pkt_sched.h> + +#define TC_COOKIE_MAX_SIZE 16 + +/* Action attributes */ +enum { + TCA_ACT_UNSPEC, + TCA_ACT_KIND, + TCA_ACT_OPTIONS, + TCA_ACT_INDEX, + TCA_ACT_STATS, + TCA_ACT_PAD, + TCA_ACT_COOKIE, + __TCA_ACT_MAX +}; + +#define TCA_ACT_MAX __TCA_ACT_MAX +#define TCA_OLD_COMPAT (TCA_ACT_MAX+1) +#define TCA_ACT_MAX_PRIO 32 +#define TCA_ACT_BIND 1 +#define TCA_ACT_NOBIND 0 +#define TCA_ACT_UNBIND 1 +#define TCA_ACT_NOUNBIND 0 +#define TCA_ACT_REPLACE 1 +#define TCA_ACT_NOREPLACE 0 + +#define TC_ACT_UNSPEC (-1) +#define TC_ACT_OK 0 +#define TC_ACT_RECLASSIFY 1 +#define TC_ACT_SHOT 2 +#define TC_ACT_PIPE 3 +#define TC_ACT_STOLEN 4 +#define TC_ACT_QUEUED 5 +#define TC_ACT_REPEAT 6 +#define TC_ACT_REDIRECT 7 +#define TC_ACT_TRAP 8 /* For hw path, this means "trap to cpu" + * and don't further process the frame + * in hardware. For sw path, this is + * equivalent of TC_ACT_STOLEN - drop + * the skb and act like everything + * is alright. + */ +#define TC_ACT_VALUE_MAX TC_ACT_TRAP + +/* There is a special kind of actions called "extended actions", + * which need a value parameter. These have a local opcode located in + * the highest nibble, starting from 1. The rest of the bits + * are used to carry the value. These two parts together make + * a combined opcode. + */ +#define __TC_ACT_EXT_SHIFT 28 +#define __TC_ACT_EXT(local) ((local) << __TC_ACT_EXT_SHIFT) +#define TC_ACT_EXT_VAL_MASK ((1 << __TC_ACT_EXT_SHIFT) - 1) +#define TC_ACT_EXT_OPCODE(combined) ((combined) & (~TC_ACT_EXT_VAL_MASK)) +#define TC_ACT_EXT_CMP(combined, opcode) (TC_ACT_EXT_OPCODE(combined) == opcode) + +#define TC_ACT_JUMP __TC_ACT_EXT(1) +#define TC_ACT_GOTO_CHAIN __TC_ACT_EXT(2) +#define TC_ACT_EXT_OPCODE_MAX TC_ACT_GOTO_CHAIN + +/* Action type identifiers*/ +enum { + TCA_ID_UNSPEC=0, + TCA_ID_POLICE=1, + /* other actions go here */ + __TCA_ID_MAX=255 +}; + +#define TCA_ID_MAX __TCA_ID_MAX + +struct tc_police { + __u32 index; + int action; +#define TC_POLICE_UNSPEC TC_ACT_UNSPEC +#define TC_POLICE_OK TC_ACT_OK +#define TC_POLICE_RECLASSIFY TC_ACT_RECLASSIFY +#define TC_POLICE_SHOT TC_ACT_SHOT +#define TC_POLICE_PIPE TC_ACT_PIPE + + __u32 limit; + __u32 burst; + __u32 mtu; + struct tc_ratespec rate; + struct tc_ratespec peakrate; + int refcnt; + int bindcnt; + __u32 capab; +}; + +struct tcf_t { + __u64 install; + __u64 lastuse; + __u64 expires; + __u64 firstuse; +}; + +struct tc_cnt { + int refcnt; + int bindcnt; +}; + +#define tc_gen \ + __u32 index; \ + __u32 capab; \ + int action; \ + int refcnt; \ + int bindcnt + +enum { + TCA_POLICE_UNSPEC, + TCA_POLICE_TBF, + TCA_POLICE_RATE, + TCA_POLICE_PEAKRATE, + TCA_POLICE_AVRATE, + TCA_POLICE_RESULT, + TCA_POLICE_TM, + TCA_POLICE_PAD, + __TCA_POLICE_MAX +#define TCA_POLICE_RESULT TCA_POLICE_RESULT +}; + +#define TCA_POLICE_MAX (__TCA_POLICE_MAX - 1) + +/* tca flags definitions */ +#define TCA_CLS_FLAGS_SKIP_HW (1 << 0) /* don't offload filter to HW */ +#define TCA_CLS_FLAGS_SKIP_SW (1 << 1) /* don't use filter in SW */ +#define TCA_CLS_FLAGS_IN_HW (1 << 2) /* filter is offloaded to HW */ +#define TCA_CLS_FLAGS_NOT_IN_HW (1 << 3) /* filter isn't offloaded to HW */ +#define TCA_CLS_FLAGS_VERBOSE (1 << 4) /* verbose logging */ + +/* U32 filters */ + +#define TC_U32_HTID(h) ((h)&0xFFF00000) +#define TC_U32_USERHTID(h) (TC_U32_HTID(h)>>20) +#define TC_U32_HASH(h) (((h)>>12)&0xFF) +#define TC_U32_NODE(h) ((h)&0xFFF) +#define TC_U32_KEY(h) ((h)&0xFFFFF) +#define TC_U32_UNSPEC 0 +#define TC_U32_ROOT (0xFFF00000) + +enum { + TCA_U32_UNSPEC, + TCA_U32_CLASSID, + TCA_U32_HASH, + TCA_U32_LINK, + TCA_U32_DIVISOR, + TCA_U32_SEL, + TCA_U32_POLICE, + TCA_U32_ACT, + TCA_U32_INDEV, + TCA_U32_PCNT, + TCA_U32_MARK, + TCA_U32_FLAGS, + TCA_U32_PAD, + __TCA_U32_MAX +}; + +#define TCA_U32_MAX (__TCA_U32_MAX - 1) + +struct tc_u32_key { + __be32 mask; + __be32 val; + int off; + int offmask; +}; + +struct tc_u32_sel { + unsigned char flags; + unsigned char offshift; + unsigned char nkeys; + + __be16 offmask; + __u16 off; + short offoff; + + short hoff; + __be32 hmask; + struct tc_u32_key keys[0]; +}; + +struct tc_u32_mark { + __u32 val; + __u32 mask; + __u32 success; +}; + +struct tc_u32_pcnt { + __u64 rcnt; + __u64 rhit; + __u64 kcnts[0]; +}; + +/* Flags */ + +#define TC_U32_TERMINAL 1 +#define TC_U32_OFFSET 2 +#define TC_U32_VAROFFSET 4 +#define TC_U32_EAT 8 + +#define TC_U32_MAXDEPTH 8 + + +/* RSVP filter */ + +enum { + TCA_RSVP_UNSPEC, + TCA_RSVP_CLASSID, + TCA_RSVP_DST, + TCA_RSVP_SRC, + TCA_RSVP_PINFO, + TCA_RSVP_POLICE, + TCA_RSVP_ACT, + __TCA_RSVP_MAX +}; + +#define TCA_RSVP_MAX (__TCA_RSVP_MAX - 1 ) + +struct tc_rsvp_gpi { + __u32 key; + __u32 mask; + int offset; +}; + +struct tc_rsvp_pinfo { + struct tc_rsvp_gpi dpi; + struct tc_rsvp_gpi spi; + __u8 protocol; + __u8 tunnelid; + __u8 tunnelhdr; + __u8 pad; +}; + +/* ROUTE filter */ + +enum { + TCA_ROUTE4_UNSPEC, + TCA_ROUTE4_CLASSID, + TCA_ROUTE4_TO, + TCA_ROUTE4_FROM, + TCA_ROUTE4_IIF, + TCA_ROUTE4_POLICE, + TCA_ROUTE4_ACT, + __TCA_ROUTE4_MAX +}; + +#define TCA_ROUTE4_MAX (__TCA_ROUTE4_MAX - 1) + + +/* FW filter */ + +enum { + TCA_FW_UNSPEC, + TCA_FW_CLASSID, + TCA_FW_POLICE, + TCA_FW_INDEV, /* used by CONFIG_NET_CLS_IND */ + TCA_FW_ACT, /* used by CONFIG_NET_CLS_ACT */ + TCA_FW_MASK, + __TCA_FW_MAX +}; + +#define TCA_FW_MAX (__TCA_FW_MAX - 1) + +/* TC index filter */ + +enum { + TCA_TCINDEX_UNSPEC, + TCA_TCINDEX_HASH, + TCA_TCINDEX_MASK, + TCA_TCINDEX_SHIFT, + TCA_TCINDEX_FALL_THROUGH, + TCA_TCINDEX_CLASSID, + TCA_TCINDEX_POLICE, + TCA_TCINDEX_ACT, + __TCA_TCINDEX_MAX +}; + +#define TCA_TCINDEX_MAX (__TCA_TCINDEX_MAX - 1) + +/* Flow filter */ + +enum { + FLOW_KEY_SRC, + FLOW_KEY_DST, + FLOW_KEY_PROTO, + FLOW_KEY_PROTO_SRC, + FLOW_KEY_PROTO_DST, + FLOW_KEY_IIF, + FLOW_KEY_PRIORITY, + FLOW_KEY_MARK, + FLOW_KEY_NFCT, + FLOW_KEY_NFCT_SRC, + FLOW_KEY_NFCT_DST, + FLOW_KEY_NFCT_PROTO_SRC, + FLOW_KEY_NFCT_PROTO_DST, + FLOW_KEY_RTCLASSID, + FLOW_KEY_SKUID, + FLOW_KEY_SKGID, + FLOW_KEY_VLAN_TAG, + FLOW_KEY_RXHASH, + __FLOW_KEY_MAX, +}; + +#define FLOW_KEY_MAX (__FLOW_KEY_MAX - 1) + +enum { + FLOW_MODE_MAP, + FLOW_MODE_HASH, +}; + +enum { + TCA_FLOW_UNSPEC, + TCA_FLOW_KEYS, + TCA_FLOW_MODE, + TCA_FLOW_BASECLASS, + TCA_FLOW_RSHIFT, + TCA_FLOW_ADDEND, + TCA_FLOW_MASK, + TCA_FLOW_XOR, + TCA_FLOW_DIVISOR, + TCA_FLOW_ACT, + TCA_FLOW_POLICE, + TCA_FLOW_EMATCHES, + TCA_FLOW_PERTURB, + __TCA_FLOW_MAX +}; + +#define TCA_FLOW_MAX (__TCA_FLOW_MAX - 1) + +/* Basic filter */ + +enum { + TCA_BASIC_UNSPEC, + TCA_BASIC_CLASSID, + TCA_BASIC_EMATCHES, + TCA_BASIC_ACT, + TCA_BASIC_POLICE, + __TCA_BASIC_MAX +}; + +#define TCA_BASIC_MAX (__TCA_BASIC_MAX - 1) + + +/* Cgroup classifier */ + +enum { + TCA_CGROUP_UNSPEC, + TCA_CGROUP_ACT, + TCA_CGROUP_POLICE, + TCA_CGROUP_EMATCHES, + __TCA_CGROUP_MAX, +}; + +#define TCA_CGROUP_MAX (__TCA_CGROUP_MAX - 1) + +/* BPF classifier */ + +#define TCA_BPF_FLAG_ACT_DIRECT (1 << 0) + +enum { + TCA_BPF_UNSPEC, + TCA_BPF_ACT, + TCA_BPF_POLICE, + TCA_BPF_CLASSID, + TCA_BPF_OPS_LEN, + TCA_BPF_OPS, + TCA_BPF_FD, + TCA_BPF_NAME, + TCA_BPF_FLAGS, + TCA_BPF_FLAGS_GEN, + TCA_BPF_TAG, + TCA_BPF_ID, + __TCA_BPF_MAX, +}; + +#define TCA_BPF_MAX (__TCA_BPF_MAX - 1) + +/* Flower classifier */ + +enum { + TCA_FLOWER_UNSPEC, + TCA_FLOWER_CLASSID, + TCA_FLOWER_INDEV, + TCA_FLOWER_ACT, + TCA_FLOWER_KEY_ETH_DST, /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_DST_MASK, /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_SRC, /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_SRC_MASK, /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_TYPE, /* be16 */ + TCA_FLOWER_KEY_IP_PROTO, /* u8 */ + TCA_FLOWER_KEY_IPV4_SRC, /* be32 */ + TCA_FLOWER_KEY_IPV4_SRC_MASK, /* be32 */ + TCA_FLOWER_KEY_IPV4_DST, /* be32 */ + TCA_FLOWER_KEY_IPV4_DST_MASK, /* be32 */ + TCA_FLOWER_KEY_IPV6_SRC, /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_SRC_MASK, /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_DST, /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_DST_MASK, /* struct in6_addr */ + TCA_FLOWER_KEY_TCP_SRC, /* be16 */ + TCA_FLOWER_KEY_TCP_DST, /* be16 */ + TCA_FLOWER_KEY_UDP_SRC, /* be16 */ + TCA_FLOWER_KEY_UDP_DST, /* be16 */ + + TCA_FLOWER_FLAGS, + TCA_FLOWER_KEY_VLAN_ID, /* be16 */ + TCA_FLOWER_KEY_VLAN_PRIO, /* u8 */ + TCA_FLOWER_KEY_VLAN_ETH_TYPE, /* be16 */ + + TCA_FLOWER_KEY_ENC_KEY_ID, /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_SRC, /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,/* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_DST, /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,/* be32 */ + TCA_FLOWER_KEY_ENC_IPV6_SRC, /* struct in6_addr */ + TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,/* struct in6_addr */ + TCA_FLOWER_KEY_ENC_IPV6_DST, /* struct in6_addr */ + TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,/* struct in6_addr */ + + TCA_FLOWER_KEY_TCP_SRC_MASK, /* be16 */ + TCA_FLOWER_KEY_TCP_DST_MASK, /* be16 */ + TCA_FLOWER_KEY_UDP_SRC_MASK, /* be16 */ + TCA_FLOWER_KEY_UDP_DST_MASK, /* be16 */ + TCA_FLOWER_KEY_SCTP_SRC_MASK, /* be16 */ + TCA_FLOWER_KEY_SCTP_DST_MASK, /* be16 */ + + TCA_FLOWER_KEY_SCTP_SRC, /* be16 */ + TCA_FLOWER_KEY_SCTP_DST, /* be16 */ + + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT, /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK, /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_DST_PORT, /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK, /* be16 */ + + TCA_FLOWER_KEY_FLAGS, /* be32 */ + TCA_FLOWER_KEY_FLAGS_MASK, /* be32 */ + + TCA_FLOWER_KEY_ICMPV4_CODE, /* u8 */ + TCA_FLOWER_KEY_ICMPV4_CODE_MASK,/* u8 */ + TCA_FLOWER_KEY_ICMPV4_TYPE, /* u8 */ + TCA_FLOWER_KEY_ICMPV4_TYPE_MASK,/* u8 */ + TCA_FLOWER_KEY_ICMPV6_CODE, /* u8 */ + TCA_FLOWER_KEY_ICMPV6_CODE_MASK,/* u8 */ + TCA_FLOWER_KEY_ICMPV6_TYPE, /* u8 */ + TCA_FLOWER_KEY_ICMPV6_TYPE_MASK,/* u8 */ + + TCA_FLOWER_KEY_ARP_SIP, /* be32 */ + TCA_FLOWER_KEY_ARP_SIP_MASK, /* be32 */ + TCA_FLOWER_KEY_ARP_TIP, /* be32 */ + TCA_FLOWER_KEY_ARP_TIP_MASK, /* be32 */ + TCA_FLOWER_KEY_ARP_OP, /* u8 */ + TCA_FLOWER_KEY_ARP_OP_MASK, /* u8 */ + TCA_FLOWER_KEY_ARP_SHA, /* ETH_ALEN */ + TCA_FLOWER_KEY_ARP_SHA_MASK, /* ETH_ALEN */ + TCA_FLOWER_KEY_ARP_THA, /* ETH_ALEN */ + TCA_FLOWER_KEY_ARP_THA_MASK, /* ETH_ALEN */ + + TCA_FLOWER_KEY_MPLS_TTL, /* u8 - 8 bits */ + TCA_FLOWER_KEY_MPLS_BOS, /* u8 - 1 bit */ + TCA_FLOWER_KEY_MPLS_TC, /* u8 - 3 bits */ + TCA_FLOWER_KEY_MPLS_LABEL, /* be32 - 20 bits */ + + TCA_FLOWER_KEY_TCP_FLAGS, /* be16 */ + TCA_FLOWER_KEY_TCP_FLAGS_MASK, /* be16 */ + + TCA_FLOWER_KEY_IP_TOS, /* u8 */ + TCA_FLOWER_KEY_IP_TOS_MASK, /* u8 */ + TCA_FLOWER_KEY_IP_TTL, /* u8 */ + TCA_FLOWER_KEY_IP_TTL_MASK, /* u8 */ + + TCA_FLOWER_KEY_CVLAN_ID, /* be16 */ + TCA_FLOWER_KEY_CVLAN_PRIO, /* u8 */ + TCA_FLOWER_KEY_CVLAN_ETH_TYPE, /* be16 */ + + TCA_FLOWER_KEY_ENC_IP_TOS, /* u8 */ + TCA_FLOWER_KEY_ENC_IP_TOS_MASK, /* u8 */ + TCA_FLOWER_KEY_ENC_IP_TTL, /* u8 */ + TCA_FLOWER_KEY_ENC_IP_TTL_MASK, /* u8 */ + + TCA_FLOWER_KEY_ENC_OPTS, + TCA_FLOWER_KEY_ENC_OPTS_MASK, + + TCA_FLOWER_IN_HW_COUNT, + + __TCA_FLOWER_MAX, +}; + +#define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1) + +enum { + TCA_FLOWER_KEY_ENC_OPTS_UNSPEC, + TCA_FLOWER_KEY_ENC_OPTS_GENEVE, /* Nested + * TCA_FLOWER_KEY_ENC_OPT_GENEVE_ + * attributes + */ + __TCA_FLOWER_KEY_ENC_OPTS_MAX, +}; + +#define TCA_FLOWER_KEY_ENC_OPTS_MAX (__TCA_FLOWER_KEY_ENC_OPTS_MAX - 1) + +enum { + TCA_FLOWER_KEY_ENC_OPT_GENEVE_UNSPEC, + TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS, /* u16 */ + TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE, /* u8 */ + TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA, /* 4 to 128 bytes */ + + __TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX, +}; + +#define TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX \ + (__TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX - 1) + +enum { + TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0), + TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1), +}; + +/* Match-all classifier */ + +enum { + TCA_MATCHALL_UNSPEC, + TCA_MATCHALL_CLASSID, + TCA_MATCHALL_ACT, + TCA_MATCHALL_FLAGS, + __TCA_MATCHALL_MAX, +}; + +#define TCA_MATCHALL_MAX (__TCA_MATCHALL_MAX - 1) + +/* Extended Matches */ + +struct tcf_ematch_tree_hdr { + __u16 nmatches; + __u16 progid; +}; + +enum { + TCA_EMATCH_TREE_UNSPEC, + TCA_EMATCH_TREE_HDR, + TCA_EMATCH_TREE_LIST, + __TCA_EMATCH_TREE_MAX +}; +#define TCA_EMATCH_TREE_MAX (__TCA_EMATCH_TREE_MAX - 1) + +struct tcf_ematch_hdr { + __u16 matchid; + __u16 kind; + __u16 flags; + __u16 pad; /* currently unused */ +}; + +/* 0 1 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 + * +-----------------------+-+-+---+ + * | Unused |S|I| R | + * +-----------------------+-+-+---+ + * + * R(2) ::= relation to next ematch + * where: 0 0 END (last ematch) + * 0 1 AND + * 1 0 OR + * 1 1 Unused (invalid) + * I(1) ::= invert result + * S(1) ::= simple payload + */ +#define TCF_EM_REL_END 0 +#define TCF_EM_REL_AND (1<<0) +#define TCF_EM_REL_OR (1<<1) +#define TCF_EM_INVERT (1<<2) +#define TCF_EM_SIMPLE (1<<3) + +#define TCF_EM_REL_MASK 3 +#define TCF_EM_REL_VALID(v) (((v) & TCF_EM_REL_MASK) != TCF_EM_REL_MASK) + +enum { + TCF_LAYER_LINK, + TCF_LAYER_NETWORK, + TCF_LAYER_TRANSPORT, + __TCF_LAYER_MAX +}; +#define TCF_LAYER_MAX (__TCF_LAYER_MAX - 1) + +/* Ematch type assignments + * 1..32767 Reserved for ematches inside kernel tree + * 32768..65535 Free to use, not reliable + */ +#define TCF_EM_CONTAINER 0 +#define TCF_EM_CMP 1 +#define TCF_EM_NBYTE 2 +#define TCF_EM_U32 3 +#define TCF_EM_META 4 +#define TCF_EM_TEXT 5 +#define TCF_EM_VLAN 6 +#define TCF_EM_CANID 7 +#define TCF_EM_IPSET 8 +#define TCF_EM_IPT 9 +#define TCF_EM_MAX 9 + +enum { + TCF_EM_PROG_TC +}; + +enum { + TCF_EM_OPND_EQ, + TCF_EM_OPND_GT, + TCF_EM_OPND_LT +}; + +#endif diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index c0d7ea0bf5b6..b17201edfa09 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -212,6 +212,7 @@ struct prctl_mm_map { #define PR_SET_SPECULATION_CTRL 53 /* Speculation control variants */ # define PR_SPEC_STORE_BYPASS 0 +# define PR_SPEC_INDIRECT_BRANCH 1 /* Return and control values for PR_SET/GET_SPECULATION_CTRL */ # define PR_SPEC_NOT_AFFECTED 0 # define PR_SPEC_PRCTL (1UL << 0) diff --git a/tools/include/uapi/linux/tc_act/tc_bpf.h b/tools/include/uapi/linux/tc_act/tc_bpf.h new file mode 100644 index 000000000000..6e89a5df49a4 --- /dev/null +++ b/tools/include/uapi/linux/tc_act/tc_bpf.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __LINUX_TC_BPF_H +#define __LINUX_TC_BPF_H + +#include <linux/pkt_cls.h> + +#define TCA_ACT_BPF 13 + +struct tc_act_bpf { + tc_gen; +}; + +enum { + TCA_ACT_BPF_UNSPEC, + TCA_ACT_BPF_TM, + TCA_ACT_BPF_PARMS, + TCA_ACT_BPF_OPS_LEN, + TCA_ACT_BPF_OPS, + TCA_ACT_BPF_FD, + TCA_ACT_BPF_NAME, + TCA_ACT_BPF_PAD, + TCA_ACT_BPF_TAG, + TCA_ACT_BPF_ID, + __TCA_ACT_BPF_MAX, +}; +#define TCA_ACT_BPF_MAX (__TCA_ACT_BPF_MAX - 1) + +#endif diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index 7bc31c905018..197b40f5b5c6 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1 +1 @@ -libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o netlink.o +libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o netlink.o bpf_prog_linfo.o diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 425b480bda75..34d9c3619c96 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -66,7 +66,7 @@ ifndef VERBOSE endif FEATURE_USER = .libbpf -FEATURE_TESTS = libelf libelf-mmap bpf reallocarray +FEATURE_TESTS = libelf libelf-mmap bpf reallocarray cxx FEATURE_DISPLAY = libelf bpf INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi @@ -145,14 +145,26 @@ include $(srctree)/tools/build/Makefile.include BPF_IN := $(OUTPUT)libbpf-in.o LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) +VERSION_SCRIPT := libbpf.map + +GLOBAL_SYM_COUNT = $(shell readelf -s $(BPF_IN) | \ + awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {s++} END{print s}') +VERSIONED_SYM_COUNT = $(shell readelf -s $(OUTPUT)libbpf.so | \ + grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l) CMD_TARGETS = $(LIB_FILE) +CXX_TEST_TARGET = $(OUTPUT)test_libbpf + +ifeq ($(feature-cxx), 1) + CMD_TARGETS += $(CXX_TEST_TARGET) +endif + TARGETS = $(CMD_TARGETS) all: fixdep all_cmd -all_cmd: $(CMD_TARGETS) +all_cmd: $(CMD_TARGETS) check $(BPF_IN): force elfdep bpfdep @(test -f ../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \ @@ -170,11 +182,27 @@ $(BPF_IN): force elfdep bpfdep $(Q)$(MAKE) $(build)=libbpf $(OUTPUT)libbpf.so: $(BPF_IN) - $(QUIET_LINK)$(CC) --shared $^ -o $@ + $(QUIET_LINK)$(CC) --shared -Wl,--version-script=$(VERSION_SCRIPT) \ + $^ -o $@ $(OUTPUT)libbpf.a: $(BPF_IN) $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ +$(OUTPUT)test_libbpf: test_libbpf.cpp $(OUTPUT)libbpf.a + $(QUIET_LINK)$(CXX) $^ -lelf -o $@ + +check: check_abi + +check_abi: $(OUTPUT)libbpf.so + @if [ "$(GLOBAL_SYM_COUNT)" != "$(VERSIONED_SYM_COUNT)" ]; then \ + echo "Warning: Num of global symbols in $(BPF_IN)" \ + "($(GLOBAL_SYM_COUNT)) does NOT match with num of" \ + "versioned symbols in $^ ($(VERSIONED_SYM_COUNT))." \ + "Please make sure all LIBBPF_API symbols are" \ + "versioned in $(VERSION_SCRIPT)." >&2; \ + exit 1; \ + fi + define do_install if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ @@ -201,8 +229,8 @@ config-clean: $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null clean: - $(call QUIET_CLEAN, libbpf) $(RM) *.o *~ $(TARGETS) *.a *.so .*.d .*.cmd \ - $(RM) LIBBPF-CFLAGS + $(call QUIET_CLEAN, libbpf) $(RM) $(TARGETS) $(CXX_TEST_TARGET) \ + *.o *~ *.a *.so .*.d .*.cmd LIBBPF-CFLAGS $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf diff --git a/tools/lib/bpf/README.rst b/tools/lib/bpf/README.rst new file mode 100644 index 000000000000..056f38310722 --- /dev/null +++ b/tools/lib/bpf/README.rst @@ -0,0 +1,139 @@ +.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +libbpf API naming convention +============================ + +libbpf API provides access to a few logically separated groups of +functions and types. Every group has its own naming convention +described here. It's recommended to follow these conventions whenever a +new function or type is added to keep libbpf API clean and consistent. + +All types and functions provided by libbpf API should have one of the +following prefixes: ``bpf_``, ``btf_``, ``libbpf_``. + +System call wrappers +-------------------- + +System call wrappers are simple wrappers for commands supported by +sys_bpf system call. These wrappers should go to ``bpf.h`` header file +and map one-on-one to corresponding commands. + +For example ``bpf_map_lookup_elem`` wraps ``BPF_MAP_LOOKUP_ELEM`` +command of sys_bpf, ``bpf_prog_attach`` wraps ``BPF_PROG_ATTACH``, etc. + +Objects +------- + +Another class of types and functions provided by libbpf API is "objects" +and functions to work with them. Objects are high-level abstractions +such as BPF program or BPF map. They're represented by corresponding +structures such as ``struct bpf_object``, ``struct bpf_program``, +``struct bpf_map``, etc. + +Structures are forward declared and access to their fields should be +provided via corresponding getters and setters rather than directly. + +These objects are associated with corresponding parts of ELF object that +contains compiled BPF programs. + +For example ``struct bpf_object`` represents ELF object itself created +from an ELF file or from a buffer, ``struct bpf_program`` represents a +program in ELF object and ``struct bpf_map`` is a map. + +Functions that work with an object have names built from object name, +double underscore and part that describes function purpose. + +For example ``bpf_object__open`` consists of the name of corresponding +object, ``bpf_object``, double underscore and ``open`` that defines the +purpose of the function to open ELF file and create ``bpf_object`` from +it. + +Another example: ``bpf_program__load`` is named for corresponding +object, ``bpf_program``, that is separated from other part of the name +by double underscore. + +All objects and corresponding functions other than BTF related should go +to ``libbpf.h``. BTF types and functions should go to ``btf.h``. + +Auxiliary functions +------------------- + +Auxiliary functions and types that don't fit well in any of categories +described above should have ``libbpf_`` prefix, e.g. +``libbpf_get_error`` or ``libbpf_prog_type_by_name``. + +libbpf ABI +========== + +libbpf can be both linked statically or used as DSO. To avoid possible +conflicts with other libraries an application is linked with, all +non-static libbpf symbols should have one of the prefixes mentioned in +API documentation above. See API naming convention to choose the right +name for a new symbol. + +Symbol visibility +----------------- + +libbpf follow the model when all global symbols have visibility "hidden" +by default and to make a symbol visible it has to be explicitly +attributed with ``LIBBPF_API`` macro. For example: + +.. code-block:: c + + LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id); + +This prevents from accidentally exporting a symbol, that is not supposed +to be a part of ABI what, in turn, improves both libbpf developer- and +user-experiences. + +ABI versionning +--------------- + +To make future ABI extensions possible libbpf ABI is versioned. +Versioning is implemented by ``libbpf.map`` version script that is +passed to linker. + +Version name is ``LIBBPF_`` prefix + three-component numeric version, +starting from ``0.0.1``. + +Every time ABI is being changed, e.g. because a new symbol is added or +semantic of existing symbol is changed, ABI version should be bumped. + +For example, if current state of ``libbpf.map`` is: + +.. code-block:: + LIBBPF_0.0.1 { + global: + bpf_func_a; + bpf_func_b; + local: + \*; + }; + +, and a new symbol ``bpf_func_c`` is being introduced, then +``libbpf.map`` should be changed like this: + +.. code-block:: + LIBBPF_0.0.1 { + global: + bpf_func_a; + bpf_func_b; + local: + \*; + }; + LIBBPF_0.0.2 { + global: + bpf_func_c; + } LIBBPF_0.0.1; + +, where new version ``LIBBPF_0.0.2`` depends on the previous +``LIBBPF_0.0.1``. + +Format of version script and ways to handle ABI changes, including +incompatible ones, described in details in [1]. + +Links +===== + +[1] https://www.akkadia.org/drepper/dsohowto.pdf + (Chapter 3. Maintaining APIs and ABIs). diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 03f9bcc4ef50..3caaa3428774 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -173,9 +173,35 @@ int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, -1); } +static void * +alloc_zero_tailing_info(const void *orecord, __u32 cnt, + __u32 actual_rec_size, __u32 expected_rec_size) +{ + __u64 info_len = actual_rec_size * cnt; + void *info, *nrecord; + int i; + + info = malloc(info_len); + if (!info) + return NULL; + + /* zero out bytes kernel does not understand */ + nrecord = info; + for (i = 0; i < cnt; i++) { + memcpy(nrecord, orecord, expected_rec_size); + memset(nrecord + expected_rec_size, 0, + actual_rec_size - expected_rec_size); + orecord += actual_rec_size; + nrecord += actual_rec_size; + } + + return info; +} + int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, char *log_buf, size_t log_buf_sz) { + void *finfo = NULL, *linfo = NULL; union bpf_attr attr; __u32 name_len; int fd; @@ -196,19 +222,72 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, attr.log_level = 0; attr.kern_version = load_attr->kern_version; attr.prog_ifindex = load_attr->prog_ifindex; + attr.prog_btf_fd = load_attr->prog_btf_fd; + attr.func_info_rec_size = load_attr->func_info_rec_size; + attr.func_info_cnt = load_attr->func_info_cnt; + attr.func_info = ptr_to_u64(load_attr->func_info); + attr.line_info_rec_size = load_attr->line_info_rec_size; + attr.line_info_cnt = load_attr->line_info_cnt; + attr.line_info = ptr_to_u64(load_attr->line_info); memcpy(attr.prog_name, load_attr->name, min(name_len, BPF_OBJ_NAME_LEN - 1)); fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); - if (fd >= 0 || !log_buf || !log_buf_sz) + if (fd >= 0) return fd; + /* After bpf_prog_load, the kernel may modify certain attributes + * to give user space a hint how to deal with loading failure. + * Check to see whether we can make some changes and load again. + */ + while (errno == E2BIG && (!finfo || !linfo)) { + if (!finfo && attr.func_info_cnt && + attr.func_info_rec_size < load_attr->func_info_rec_size) { + /* try with corrected func info records */ + finfo = alloc_zero_tailing_info(load_attr->func_info, + load_attr->func_info_cnt, + load_attr->func_info_rec_size, + attr.func_info_rec_size); + if (!finfo) + goto done; + + attr.func_info = ptr_to_u64(finfo); + attr.func_info_rec_size = load_attr->func_info_rec_size; + } else if (!linfo && attr.line_info_cnt && + attr.line_info_rec_size < + load_attr->line_info_rec_size) { + linfo = alloc_zero_tailing_info(load_attr->line_info, + load_attr->line_info_cnt, + load_attr->line_info_rec_size, + attr.line_info_rec_size); + if (!linfo) + goto done; + + attr.line_info = ptr_to_u64(linfo); + attr.line_info_rec_size = load_attr->line_info_rec_size; + } else { + break; + } + + fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); + + if (fd >= 0) + goto done; + } + + if (!log_buf || !log_buf_sz) + goto done; + /* Try again with log */ attr.log_buf = ptr_to_u64(log_buf); attr.log_size = log_buf_sz; attr.log_level = 1; log_buf[0] = 0; - return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); + fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); +done: + free(finfo); + free(linfo); + return fd; } int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, @@ -231,9 +310,9 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, } int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, - size_t insns_cnt, int strict_alignment, - const char *license, __u32 kern_version, - char *log_buf, size_t log_buf_sz, int log_level) + size_t insns_cnt, __u32 prog_flags, const char *license, + __u32 kern_version, char *log_buf, size_t log_buf_sz, + int log_level) { union bpf_attr attr; @@ -247,7 +326,7 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, attr.log_level = log_level; log_buf[0] = 0; attr.kern_version = kern_version; - attr.prog_flags = strict_alignment ? BPF_F_STRICT_ALIGNMENT : 0; + attr.prog_flags = prog_flags; return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); } @@ -415,6 +494,29 @@ int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, return ret; } +int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr) +{ + union bpf_attr attr; + int ret; + + if (!test_attr->data_out && test_attr->data_size_out > 0) + return -EINVAL; + + bzero(&attr, sizeof(attr)); + attr.test.prog_fd = test_attr->prog_fd; + attr.test.data_in = ptr_to_u64(test_attr->data_in); + attr.test.data_out = ptr_to_u64(test_attr->data_out); + attr.test.data_size_in = test_attr->data_size_in; + attr.test.data_size_out = test_attr->data_size_out; + attr.test.repeat = test_attr->repeat; + + ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr)); + test_attr->data_size_out = attr.test.data_size_out; + test_attr->retval = attr.test.retval; + test_attr->duration = attr.test.duration; + return ret; +} + int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id) { union bpf_attr attr; diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 26a51538213c..8f09de482839 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -27,6 +27,10 @@ #include <stdbool.h> #include <stddef.h> +#ifdef __cplusplus +extern "C" { +#endif + #ifndef LIBBPF_API #define LIBBPF_API __attribute__((visibility("default"))) #endif @@ -74,6 +78,13 @@ struct bpf_load_program_attr { const char *license; __u32 kern_version; __u32 prog_ifindex; + __u32 prog_btf_fd; + __u32 func_info_rec_size; + const void *func_info; + __u32 func_info_cnt; + __u32 line_info_rec_size; + const void *line_info; + __u32 line_info_cnt; }; /* Flags to direct loading requirements */ @@ -90,7 +101,7 @@ LIBBPF_API int bpf_load_program(enum bpf_prog_type type, char *log_buf, size_t log_buf_sz); LIBBPF_API int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, - size_t insns_cnt, int strict_alignment, + size_t insns_cnt, __u32 prog_flags, const char *license, __u32 kern_version, char *log_buf, size_t log_buf_sz, int log_level); @@ -110,6 +121,25 @@ LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd, LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd, enum bpf_attach_type type); + +struct bpf_prog_test_run_attr { + int prog_fd; + int repeat; + const void *data_in; + __u32 data_size_in; + void *data_out; /* optional */ + __u32 data_size_out; /* in: max length of data_out + * out: length of data_out */ + __u32 retval; /* out: return code of the BPF program */ + __u32 duration; /* out: average per repetition in ns */ +}; + +LIBBPF_API int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr); + +/* + * bpf_prog_test_run does not check that data_out is large enough. Consider + * using bpf_prog_test_run_xattr instead. + */ LIBBPF_API int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, void *data_out, __u32 *size_out, __u32 *retval, __u32 *duration); @@ -128,4 +158,9 @@ LIBBPF_API int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset, __u64 *probe_addr); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + #endif /* __LIBBPF_BPF_H */ diff --git a/tools/lib/bpf/bpf_prog_linfo.c b/tools/lib/bpf/bpf_prog_linfo.c new file mode 100644 index 000000000000..addd6e9971cc --- /dev/null +++ b/tools/lib/bpf/bpf_prog_linfo.c @@ -0,0 +1,253 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2018 Facebook */ + +#include <string.h> +#include <stdlib.h> +#include <linux/err.h> +#include <linux/bpf.h> +#include "libbpf.h" + +#ifndef min +#define min(x, y) ((x) < (y) ? (x) : (y)) +#endif + +struct bpf_prog_linfo { + void *raw_linfo; + void *raw_jited_linfo; + __u32 *nr_jited_linfo_per_func; + __u32 *jited_linfo_func_idx; + __u32 nr_linfo; + __u32 nr_jited_func; + __u32 rec_size; + __u32 jited_rec_size; +}; + +static int dissect_jited_func(struct bpf_prog_linfo *prog_linfo, + const __u64 *ksym_func, const __u32 *ksym_len) +{ + __u32 nr_jited_func, nr_linfo; + const void *raw_jited_linfo; + const __u64 *jited_linfo; + __u64 last_jited_linfo; + /* + * Index to raw_jited_linfo: + * i: Index for searching the next ksym_func + * prev_i: Index to the last found ksym_func + */ + __u32 i, prev_i; + __u32 f; /* Index to ksym_func */ + + raw_jited_linfo = prog_linfo->raw_jited_linfo; + jited_linfo = raw_jited_linfo; + if (ksym_func[0] != *jited_linfo) + goto errout; + + prog_linfo->jited_linfo_func_idx[0] = 0; + nr_jited_func = prog_linfo->nr_jited_func; + nr_linfo = prog_linfo->nr_linfo; + + for (prev_i = 0, i = 1, f = 1; + i < nr_linfo && f < nr_jited_func; + i++) { + raw_jited_linfo += prog_linfo->jited_rec_size; + last_jited_linfo = *jited_linfo; + jited_linfo = raw_jited_linfo; + + if (ksym_func[f] == *jited_linfo) { + prog_linfo->jited_linfo_func_idx[f] = i; + + /* Sanity check */ + if (last_jited_linfo - ksym_func[f - 1] + 1 > + ksym_len[f - 1]) + goto errout; + + prog_linfo->nr_jited_linfo_per_func[f - 1] = + i - prev_i; + prev_i = i; + + /* + * The ksym_func[f] is found in jited_linfo. + * Look for the next one. + */ + f++; + } else if (*jited_linfo <= last_jited_linfo) { + /* Ensure the addr is increasing _within_ a func */ + goto errout; + } + } + + if (f != nr_jited_func) + goto errout; + + prog_linfo->nr_jited_linfo_per_func[nr_jited_func - 1] = + nr_linfo - prev_i; + + return 0; + +errout: + return -EINVAL; +} + +void bpf_prog_linfo__free(struct bpf_prog_linfo *prog_linfo) +{ + if (!prog_linfo) + return; + + free(prog_linfo->raw_linfo); + free(prog_linfo->raw_jited_linfo); + free(prog_linfo->nr_jited_linfo_per_func); + free(prog_linfo->jited_linfo_func_idx); + free(prog_linfo); +} + +struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info) +{ + struct bpf_prog_linfo *prog_linfo; + __u32 nr_linfo, nr_jited_func; + + nr_linfo = info->nr_line_info; + + /* + * Test !info->line_info because the kernel may NULL + * the ptr if kernel.kptr_restrict is set. + */ + if (!nr_linfo || !info->line_info) + return NULL; + + /* + * The min size that bpf_prog_linfo has to access for + * searching purpose. + */ + if (info->line_info_rec_size < + offsetof(struct bpf_line_info, file_name_off)) + return NULL; + + prog_linfo = calloc(1, sizeof(*prog_linfo)); + if (!prog_linfo) + return NULL; + + /* Copy xlated line_info */ + prog_linfo->nr_linfo = nr_linfo; + prog_linfo->rec_size = info->line_info_rec_size; + prog_linfo->raw_linfo = malloc(nr_linfo * prog_linfo->rec_size); + if (!prog_linfo->raw_linfo) + goto err_free; + memcpy(prog_linfo->raw_linfo, (void *)(long)info->line_info, + nr_linfo * prog_linfo->rec_size); + + nr_jited_func = info->nr_jited_ksyms; + if (!nr_jited_func || + !info->jited_line_info || + info->nr_jited_line_info != nr_linfo || + info->jited_line_info_rec_size < sizeof(__u64) || + info->nr_jited_func_lens != nr_jited_func || + !info->jited_ksyms || + !info->jited_func_lens) + /* Not enough info to provide jited_line_info */ + return prog_linfo; + + /* Copy jited_line_info */ + prog_linfo->nr_jited_func = nr_jited_func; + prog_linfo->jited_rec_size = info->jited_line_info_rec_size; + prog_linfo->raw_jited_linfo = malloc(nr_linfo * + prog_linfo->jited_rec_size); + if (!prog_linfo->raw_jited_linfo) + goto err_free; + memcpy(prog_linfo->raw_jited_linfo, + (void *)(long)info->jited_line_info, + nr_linfo * prog_linfo->jited_rec_size); + + /* Number of jited_line_info per jited func */ + prog_linfo->nr_jited_linfo_per_func = malloc(nr_jited_func * + sizeof(__u32)); + if (!prog_linfo->nr_jited_linfo_per_func) + goto err_free; + + /* + * For each jited func, + * the start idx to the "linfo" and "jited_linfo" array, + */ + prog_linfo->jited_linfo_func_idx = malloc(nr_jited_func * + sizeof(__u32)); + if (!prog_linfo->jited_linfo_func_idx) + goto err_free; + + if (dissect_jited_func(prog_linfo, + (__u64 *)(long)info->jited_ksyms, + (__u32 *)(long)info->jited_func_lens)) + goto err_free; + + return prog_linfo; + +err_free: + bpf_prog_linfo__free(prog_linfo); + return NULL; +} + +const struct bpf_line_info * +bpf_prog_linfo__lfind_addr_func(const struct bpf_prog_linfo *prog_linfo, + __u64 addr, __u32 func_idx, __u32 nr_skip) +{ + __u32 jited_rec_size, rec_size, nr_linfo, start, i; + const void *raw_jited_linfo, *raw_linfo; + const __u64 *jited_linfo; + + if (func_idx >= prog_linfo->nr_jited_func) + return NULL; + + nr_linfo = prog_linfo->nr_jited_linfo_per_func[func_idx]; + if (nr_skip >= nr_linfo) + return NULL; + + start = prog_linfo->jited_linfo_func_idx[func_idx] + nr_skip; + jited_rec_size = prog_linfo->jited_rec_size; + raw_jited_linfo = prog_linfo->raw_jited_linfo + + (start * jited_rec_size); + jited_linfo = raw_jited_linfo; + if (addr < *jited_linfo) + return NULL; + + nr_linfo -= nr_skip; + rec_size = prog_linfo->rec_size; + raw_linfo = prog_linfo->raw_linfo + (start * rec_size); + for (i = 0; i < nr_linfo; i++) { + if (addr < *jited_linfo) + break; + + raw_linfo += rec_size; + raw_jited_linfo += jited_rec_size; + jited_linfo = raw_jited_linfo; + } + + return raw_linfo - rec_size; +} + +const struct bpf_line_info * +bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo, + __u32 insn_off, __u32 nr_skip) +{ + const struct bpf_line_info *linfo; + __u32 rec_size, nr_linfo, i; + const void *raw_linfo; + + nr_linfo = prog_linfo->nr_linfo; + if (nr_skip >= nr_linfo) + return NULL; + + rec_size = prog_linfo->rec_size; + raw_linfo = prog_linfo->raw_linfo + (nr_skip * rec_size); + linfo = raw_linfo; + if (insn_off < linfo->insn_off) + return NULL; + + nr_linfo -= nr_skip; + for (i = 0; i < nr_linfo; i++) { + if (insn_off < linfo->insn_off) + break; + + raw_linfo += rec_size; + linfo = raw_linfo; + } + + return raw_linfo - rec_size; +} diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 449591aa9900..d682d3b8f7b9 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -37,6 +37,48 @@ struct btf { int fd; }; +struct btf_ext_info { + /* + * info points to a deep copy of the individual info section + * (e.g. func_info and line_info) from the .BTF.ext. + * It does not include the __u32 rec_size. + */ + void *info; + __u32 rec_size; + __u32 len; +}; + +struct btf_ext { + struct btf_ext_info func_info; + struct btf_ext_info line_info; +}; + +struct btf_ext_info_sec { + __u32 sec_name_off; + __u32 num_info; + /* Followed by num_info * record_size number of bytes */ + __u8 data[0]; +}; + +/* The minimum bpf_func_info checked by the loader */ +struct bpf_func_info_min { + __u32 insn_off; + __u32 type_id; +}; + +/* The minimum bpf_line_info checked by the loader */ +struct bpf_line_info_min { + __u32 insn_off; + __u32 file_name_off; + __u32 line_off; + __u32 line_col; +}; + +static inline __u64 ptr_to_u64(const void *ptr) +{ + return (__u64) (unsigned long) ptr; +} + static int btf_add_type(struct btf *btf, struct btf_type *t) { if (btf->types_size - btf->nr_types < 2) { @@ -165,6 +207,10 @@ static int btf_parse_type_sec(struct btf *btf, btf_print_fn_t err_log) case BTF_KIND_ENUM: next_type += vlen * sizeof(struct btf_enum); break; + case BTF_KIND_FUNC_PROTO: + next_type += vlen * sizeof(struct btf_param); + break; + case BTF_KIND_FUNC: case BTF_KIND_TYPEDEF: case BTF_KIND_PTR: case BTF_KIND_FWD: @@ -393,3 +439,350 @@ const char *btf__name_by_offset(const struct btf *btf, __u32 offset) else return NULL; } + +int btf__get_from_id(__u32 id, struct btf **btf) +{ + struct bpf_btf_info btf_info = { 0 }; + __u32 len = sizeof(btf_info); + __u32 last_size; + int btf_fd; + void *ptr; + int err; + + err = 0; + *btf = NULL; + btf_fd = bpf_btf_get_fd_by_id(id); + if (btf_fd < 0) + return 0; + + /* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so + * let's start with a sane default - 4KiB here - and resize it only if + * bpf_obj_get_info_by_fd() needs a bigger buffer. + */ + btf_info.btf_size = 4096; + last_size = btf_info.btf_size; + ptr = malloc(last_size); + if (!ptr) { + err = -ENOMEM; + goto exit_free; + } + + bzero(ptr, last_size); + btf_info.btf = ptr_to_u64(ptr); + err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len); + + if (!err && btf_info.btf_size > last_size) { + void *temp_ptr; + + last_size = btf_info.btf_size; + temp_ptr = realloc(ptr, last_size); + if (!temp_ptr) { + err = -ENOMEM; + goto exit_free; + } + ptr = temp_ptr; + bzero(ptr, last_size); + btf_info.btf = ptr_to_u64(ptr); + err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len); + } + + if (err || btf_info.btf_size > last_size) { + err = errno; + goto exit_free; + } + + *btf = btf__new((__u8 *)(long)btf_info.btf, btf_info.btf_size, NULL); + if (IS_ERR(*btf)) { + err = PTR_ERR(*btf); + *btf = NULL; + } + +exit_free: + close(btf_fd); + free(ptr); + + return err; +} + +struct btf_ext_sec_copy_param { + __u32 off; + __u32 len; + __u32 min_rec_size; + struct btf_ext_info *ext_info; + const char *desc; +}; + +static int btf_ext_copy_info(struct btf_ext *btf_ext, + __u8 *data, __u32 data_size, + struct btf_ext_sec_copy_param *ext_sec, + btf_print_fn_t err_log) +{ + const struct btf_ext_header *hdr = (struct btf_ext_header *)data; + const struct btf_ext_info_sec *sinfo; + struct btf_ext_info *ext_info; + __u32 info_left, record_size; + /* The start of the info sec (including the __u32 record_size). */ + const void *info; + + /* data and data_size do not include btf_ext_header from now on */ + data = data + hdr->hdr_len; + data_size -= hdr->hdr_len; + + if (ext_sec->off & 0x03) { + elog(".BTF.ext %s section is not aligned to 4 bytes\n", + ext_sec->desc); + return -EINVAL; + } + + if (data_size < ext_sec->off || + ext_sec->len > data_size - ext_sec->off) { + elog("%s section (off:%u len:%u) is beyond the end of the ELF section .BTF.ext\n", + ext_sec->desc, ext_sec->off, ext_sec->len); + return -EINVAL; + } + + info = data + ext_sec->off; + info_left = ext_sec->len; + + /* At least a record size */ + if (info_left < sizeof(__u32)) { + elog(".BTF.ext %s record size not found\n", ext_sec->desc); + return -EINVAL; + } + + /* The record size needs to meet the minimum standard */ + record_size = *(__u32 *)info; + if (record_size < ext_sec->min_rec_size || + record_size & 0x03) { + elog("%s section in .BTF.ext has invalid record size %u\n", + ext_sec->desc, record_size); + return -EINVAL; + } + + sinfo = info + sizeof(__u32); + info_left -= sizeof(__u32); + + /* If no records, return failure now so .BTF.ext won't be used. */ + if (!info_left) { + elog("%s section in .BTF.ext has no records", ext_sec->desc); + return -EINVAL; + } + + while (info_left) { + unsigned int sec_hdrlen = sizeof(struct btf_ext_info_sec); + __u64 total_record_size; + __u32 num_records; + + if (info_left < sec_hdrlen) { + elog("%s section header is not found in .BTF.ext\n", + ext_sec->desc); + return -EINVAL; + } + + num_records = sinfo->num_info; + if (num_records == 0) { + elog("%s section has incorrect num_records in .BTF.ext\n", + ext_sec->desc); + return -EINVAL; + } + + total_record_size = sec_hdrlen + + (__u64)num_records * record_size; + if (info_left < total_record_size) { + elog("%s section has incorrect num_records in .BTF.ext\n", + ext_sec->desc); + return -EINVAL; + } + + info_left -= total_record_size; + sinfo = (void *)sinfo + total_record_size; + } + + ext_info = ext_sec->ext_info; + ext_info->len = ext_sec->len - sizeof(__u32); + ext_info->rec_size = record_size; + ext_info->info = malloc(ext_info->len); + if (!ext_info->info) + return -ENOMEM; + memcpy(ext_info->info, info + sizeof(__u32), ext_info->len); + + return 0; +} + +static int btf_ext_copy_func_info(struct btf_ext *btf_ext, + __u8 *data, __u32 data_size, + btf_print_fn_t err_log) +{ + const struct btf_ext_header *hdr = (struct btf_ext_header *)data; + struct btf_ext_sec_copy_param param = { + .off = hdr->func_info_off, + .len = hdr->func_info_len, + .min_rec_size = sizeof(struct bpf_func_info_min), + .ext_info = &btf_ext->func_info, + .desc = "func_info" + }; + + return btf_ext_copy_info(btf_ext, data, data_size, ¶m, err_log); +} + +static int btf_ext_copy_line_info(struct btf_ext *btf_ext, + __u8 *data, __u32 data_size, + btf_print_fn_t err_log) +{ + const struct btf_ext_header *hdr = (struct btf_ext_header *)data; + struct btf_ext_sec_copy_param param = { + .off = hdr->line_info_off, + .len = hdr->line_info_len, + .min_rec_size = sizeof(struct bpf_line_info_min), + .ext_info = &btf_ext->line_info, + .desc = "line_info", + }; + + return btf_ext_copy_info(btf_ext, data, data_size, ¶m, err_log); +} + +static int btf_ext_parse_hdr(__u8 *data, __u32 data_size, + btf_print_fn_t err_log) +{ + const struct btf_ext_header *hdr = (struct btf_ext_header *)data; + + if (data_size < offsetof(struct btf_ext_header, func_info_off) || + data_size < hdr->hdr_len) { + elog("BTF.ext header not found"); + return -EINVAL; + } + + if (hdr->magic != BTF_MAGIC) { + elog("Invalid BTF.ext magic:%x\n", hdr->magic); + return -EINVAL; + } + + if (hdr->version != BTF_VERSION) { + elog("Unsupported BTF.ext version:%u\n", hdr->version); + return -ENOTSUP; + } + + if (hdr->flags) { + elog("Unsupported BTF.ext flags:%x\n", hdr->flags); + return -ENOTSUP; + } + + if (data_size == hdr->hdr_len) { + elog("BTF.ext has no data\n"); + return -EINVAL; + } + + return 0; +} + +void btf_ext__free(struct btf_ext *btf_ext) +{ + if (!btf_ext) + return; + + free(btf_ext->func_info.info); + free(btf_ext->line_info.info); + free(btf_ext); +} + +struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log) +{ + struct btf_ext *btf_ext; + int err; + + err = btf_ext_parse_hdr(data, size, err_log); + if (err) + return ERR_PTR(err); + + btf_ext = calloc(1, sizeof(struct btf_ext)); + if (!btf_ext) + return ERR_PTR(-ENOMEM); + + err = btf_ext_copy_func_info(btf_ext, data, size, err_log); + if (err) { + btf_ext__free(btf_ext); + return ERR_PTR(err); + } + + err = btf_ext_copy_line_info(btf_ext, data, size, err_log); + if (err) { + btf_ext__free(btf_ext); + return ERR_PTR(err); + } + + return btf_ext; +} + +static int btf_ext_reloc_info(const struct btf *btf, + const struct btf_ext_info *ext_info, + const char *sec_name, __u32 insns_cnt, + void **info, __u32 *cnt) +{ + __u32 sec_hdrlen = sizeof(struct btf_ext_info_sec); + __u32 i, record_size, existing_len, records_len; + struct btf_ext_info_sec *sinfo; + const char *info_sec_name; + __u64 remain_len; + void *data; + + record_size = ext_info->rec_size; + sinfo = ext_info->info; + remain_len = ext_info->len; + while (remain_len > 0) { + records_len = sinfo->num_info * record_size; + info_sec_name = btf__name_by_offset(btf, sinfo->sec_name_off); + if (strcmp(info_sec_name, sec_name)) { + remain_len -= sec_hdrlen + records_len; + sinfo = (void *)sinfo + sec_hdrlen + records_len; + continue; + } + + existing_len = (*cnt) * record_size; + data = realloc(*info, existing_len + records_len); + if (!data) + return -ENOMEM; + + memcpy(data + existing_len, sinfo->data, records_len); + /* adjust insn_off only, the rest data will be passed + * to the kernel. + */ + for (i = 0; i < sinfo->num_info; i++) { + __u32 *insn_off; + + insn_off = data + existing_len + (i * record_size); + *insn_off = *insn_off / sizeof(struct bpf_insn) + + insns_cnt; + } + *info = data; + *cnt += sinfo->num_info; + return 0; + } + + return -ENOENT; +} + +int btf_ext__reloc_func_info(const struct btf *btf, const struct btf_ext *btf_ext, + const char *sec_name, __u32 insns_cnt, + void **func_info, __u32 *cnt) +{ + return btf_ext_reloc_info(btf, &btf_ext->func_info, sec_name, + insns_cnt, func_info, cnt); +} + +int btf_ext__reloc_line_info(const struct btf *btf, const struct btf_ext *btf_ext, + const char *sec_name, __u32 insns_cnt, + void **line_info, __u32 *cnt) +{ + return btf_ext_reloc_info(btf, &btf_ext->line_info, sec_name, + insns_cnt, line_info, cnt); +} + +__u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext) +{ + return btf_ext->func_info.rec_size; +} + +__u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext) +{ + return btf_ext->line_info.rec_size; +} diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index b77e7080f7e7..b0610dcdae6b 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -6,15 +6,55 @@ #include <linux/types.h> +#ifdef __cplusplus +extern "C" { +#endif + #ifndef LIBBPF_API #define LIBBPF_API __attribute__((visibility("default"))) #endif #define BTF_ELF_SEC ".BTF" +#define BTF_EXT_ELF_SEC ".BTF.ext" struct btf; +struct btf_ext; struct btf_type; +/* + * The .BTF.ext ELF section layout defined as + * struct btf_ext_header + * func_info subsection + * + * The func_info subsection layout: + * record size for struct bpf_func_info in the func_info subsection + * struct btf_sec_func_info for section #1 + * a list of bpf_func_info records for section #1 + * where struct bpf_func_info mimics one in include/uapi/linux/bpf.h + * but may not be identical + * struct btf_sec_func_info for section #2 + * a list of bpf_func_info records for section #2 + * ...... + * + * Note that the bpf_func_info record size in .BTF.ext may not + * be the same as the one defined in include/uapi/linux/bpf.h. + * The loader should ensure that record_size meets minimum + * requirement and pass the record as is to the kernel. The + * kernel will handle the func_info properly based on its contents. + */ +struct btf_ext_header { + __u16 magic; + __u8 version; + __u8 flags; + __u32 hdr_len; + + /* All offsets are in bytes relative to the end of this header */ + __u32 func_info_off; + __u32 func_info_len; + __u32 line_info_off; + __u32 line_info_len; +}; + typedef int (*btf_print_fn_t)(const char *, ...) __attribute__((format(printf, 1, 2))); @@ -28,5 +68,23 @@ LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__fd(const struct btf *btf); LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); +LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf); + +struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log); +void btf_ext__free(struct btf_ext *btf_ext); +int btf_ext__reloc_func_info(const struct btf *btf, + const struct btf_ext *btf_ext, + const char *sec_name, __u32 insns_cnt, + void **func_info, __u32 *func_info_len); +int btf_ext__reloc_line_info(const struct btf *btf, + const struct btf_ext *btf_ext, + const char *sec_name, __u32 insns_cnt, + void **line_info, __u32 *cnt); +__u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext); +__u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext); + +#ifdef __cplusplus +} /* extern "C" */ +#endif #endif /* __LIBBPF_BTF_H */ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index d6e62e90e8d4..e2bc75ee1614 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -9,7 +9,9 @@ * Copyright (C) 2017 Nicira, Inc. */ +#ifndef _GNU_SOURCE #define _GNU_SOURCE +#endif #include <stdlib.h> #include <stdio.h> #include <stdarg.h> @@ -24,6 +26,7 @@ #include <linux/kernel.h> #include <linux/bpf.h> #include <linux/btf.h> +#include <linux/filter.h> #include <linux/list.h> #include <linux/limits.h> #include <linux/perf_event.h> @@ -114,6 +117,11 @@ void libbpf_set_print(libbpf_print_fn_t warn, # define LIBBPF_ELF_C_READ_MMAP ELF_C_READ #endif +struct bpf_capabilities { + /* v4.14: kernel support for program & map names. */ + __u32 name:1; +}; + /* * bpf_prog should be a better name but it has been used in * linux/filter.h. @@ -124,6 +132,10 @@ struct bpf_program { char *name; int prog_ifindex; char *section_name; + /* section_name with / replaced by _; makes recursive pinning + * in bpf_object__pin_programs easier + */ + char *pin_name; struct bpf_insn *insns; size_t insns_cnt, main_prog_cnt; enum bpf_prog_type type; @@ -152,6 +164,16 @@ struct bpf_program { bpf_program_clear_priv_t clear_priv; enum bpf_attach_type expected_attach_type; + int btf_fd; + void *func_info; + __u32 func_info_rec_size; + __u32 func_info_cnt; + + struct bpf_capabilities *caps; + + void *line_info; + __u32 line_info_rec_size; + __u32 line_info_cnt; }; struct bpf_map { @@ -159,6 +181,7 @@ struct bpf_map { char *name; size_t offset; int map_ifindex; + int inner_map_fd; struct bpf_map_def def; __u32 btf_key_type_id; __u32 btf_value_type_id; @@ -208,10 +231,13 @@ struct bpf_object { struct list_head list; struct btf *btf; + struct btf_ext *btf_ext; void *priv; bpf_object_clear_priv_t clear_priv; + struct bpf_capabilities caps; + char path[]; }; #define obj_elf_valid(o) ((o)->efile.elf) @@ -237,6 +263,9 @@ void bpf_program__unload(struct bpf_program *prog) prog->instances.nr = -1; zfree(&prog->instances.fds); + + zclose(prog->btf_fd); + zfree(&prog->func_info); } static void bpf_program__exit(struct bpf_program *prog) @@ -253,6 +282,7 @@ static void bpf_program__exit(struct bpf_program *prog) bpf_program__unload(prog); zfree(&prog->name); zfree(&prog->section_name); + zfree(&prog->pin_name); zfree(&prog->insns); zfree(&prog->reloc_desc); @@ -261,6 +291,17 @@ static void bpf_program__exit(struct bpf_program *prog) prog->idx = -1; } +static char *__bpf_program__pin_name(struct bpf_program *prog) +{ + char *name, *p; + + name = p = strdup(prog->section_name); + while ((p = strchr(p, '/'))) + *p = '_'; + + return name; +} + static int bpf_program__init(void *data, size_t size, char *section_name, int idx, struct bpf_program *prog) @@ -279,6 +320,13 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx, goto errout; } + prog->pin_name = __bpf_program__pin_name(prog); + if (!prog->pin_name) { + pr_warning("failed to alloc pin name for prog under section(%d) %s\n", + idx, section_name); + goto errout; + } + prog->insns = malloc(size); if (!prog->insns) { pr_warning("failed to alloc insns for prog under section %s\n", @@ -291,7 +339,8 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx, prog->idx = idx; prog->instances.fds = NULL; prog->instances.nr = -1; - prog->type = BPF_PROG_TYPE_KPROBE; + prog->type = BPF_PROG_TYPE_UNSPEC; + prog->btf_fd = -1; return 0; errout: @@ -310,6 +359,7 @@ bpf_object__add_program(struct bpf_object *obj, void *data, size_t size, if (err) return err; + prog.caps = &obj->caps; progs = obj->programs; nr_progs = obj->nr_programs; @@ -562,6 +612,14 @@ static int compare_bpf_map(const void *_a, const void *_b) return a->offset - b->offset; } +static bool bpf_map_type__is_map_in_map(enum bpf_map_type type) +{ + if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS || + type == BPF_MAP_TYPE_HASH_OF_MAPS) + return true; + return false; +} + static int bpf_object__init_maps(struct bpf_object *obj, int flags) { @@ -625,13 +683,15 @@ bpf_object__init_maps(struct bpf_object *obj, int flags) } obj->nr_maps = nr_maps; - /* - * fill all fd with -1 so won't close incorrect - * fd (fd=0 is stdin) when failure (zclose won't close - * negative fd)). - */ - for (i = 0; i < nr_maps; i++) + for (i = 0; i < nr_maps; i++) { + /* + * fill all fd with -1 so won't close incorrect + * fd (fd=0 is stdin) when failure (zclose won't close + * negative fd)). + */ obj->maps[i].fd = -1; + obj->maps[i].inner_map_fd = -1; + } /* * Fill obj->maps using data in "maps" section. @@ -723,6 +783,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) { Elf *elf = obj->efile.elf; GElf_Ehdr *ep = &obj->efile.ehdr; + Elf_Data *btf_ext_data = NULL; Elf_Scn *scn = NULL; int idx = 0, err = 0; @@ -784,6 +845,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) BTF_ELF_SEC, PTR_ERR(obj->btf)); obj->btf = NULL; } + } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) { + btf_ext_data = data; } else if (sh.sh_type == SHT_SYMTAB) { if (obj->efile.symbols) { pr_warning("bpf: multiple SYMTAB in %s\n", @@ -845,6 +908,22 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) pr_warning("Corrupted ELF file: index of strtab invalid\n"); return LIBBPF_ERRNO__FORMAT; } + if (btf_ext_data) { + if (!obj->btf) { + pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n", + BTF_EXT_ELF_SEC, BTF_ELF_SEC); + } else { + obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, + btf_ext_data->d_size, + __pr_debug); + if (IS_ERR(obj->btf_ext)) { + pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n", + BTF_EXT_ELF_SEC, + PTR_ERR(obj->btf_ext)); + obj->btf_ext = NULL; + } + } + } if (obj->efile.maps_shndx >= 0) { err = bpf_object__init_maps(obj, flags); if (err) @@ -1095,6 +1174,52 @@ err_free_new_name: } static int +bpf_object__probe_name(struct bpf_object *obj) +{ + struct bpf_load_program_attr attr; + char *cp, errmsg[STRERR_BUFSIZE]; + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int ret; + + /* make sure basic loading works */ + + memset(&attr, 0, sizeof(attr)); + attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + attr.insns = insns; + attr.insns_cnt = ARRAY_SIZE(insns); + attr.license = "GPL"; + + ret = bpf_load_program_xattr(&attr, NULL, 0); + if (ret < 0) { + cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); + pr_warning("Error in %s():%s(%d). Couldn't load basic 'r0 = 0' BPF program.\n", + __func__, cp, errno); + return -errno; + } + close(ret); + + /* now try the same program, but with the name */ + + attr.name = "test"; + ret = bpf_load_program_xattr(&attr, NULL, 0); + if (ret >= 0) { + obj->caps.name = 1; + close(ret); + } + + return 0; +} + +static int +bpf_object__probe_caps(struct bpf_object *obj) +{ + return bpf_object__probe_name(obj); +} + +static int bpf_object__create_maps(struct bpf_object *obj) { struct bpf_create_map_attr create_attr = {}; @@ -1113,7 +1238,8 @@ bpf_object__create_maps(struct bpf_object *obj) continue; } - create_attr.name = map->name; + if (obj->caps.name) + create_attr.name = map->name; create_attr.map_ifindex = map->map_ifindex; create_attr.map_type = def->type; create_attr.map_flags = def->map_flags; @@ -1123,6 +1249,9 @@ bpf_object__create_maps(struct bpf_object *obj) create_attr.btf_fd = 0; create_attr.btf_key_type_id = 0; create_attr.btf_value_type_id = 0; + if (bpf_map_type__is_map_in_map(def->type) && + map->inner_map_fd >= 0) + create_attr.inner_map_fd = map->inner_map_fd; if (obj->btf && !bpf_map_find_btf_info(map, obj->btf)) { create_attr.btf_fd = btf__fd(obj->btf); @@ -1161,12 +1290,89 @@ bpf_object__create_maps(struct bpf_object *obj) } static int +check_btf_ext_reloc_err(struct bpf_program *prog, int err, + void *btf_prog_info, const char *info_name) +{ + if (err != -ENOENT) { + pr_warning("Error in loading %s for sec %s.\n", + info_name, prog->section_name); + return err; + } + + /* err == -ENOENT (i.e. prog->section_name not found in btf_ext) */ + + if (btf_prog_info) { + /* + * Some info has already been found but has problem + * in the last btf_ext reloc. Must have to error + * out. + */ + pr_warning("Error in relocating %s for sec %s.\n", + info_name, prog->section_name); + return err; + } + + /* + * Have problem loading the very first info. Ignore + * the rest. + */ + pr_warning("Cannot find %s for main program sec %s. Ignore all %s.\n", + info_name, prog->section_name, info_name); + return 0; +} + +static int +bpf_program_reloc_btf_ext(struct bpf_program *prog, struct bpf_object *obj, + const char *section_name, __u32 insn_offset) +{ + int err; + + if (!insn_offset || prog->func_info) { + /* + * !insn_offset => main program + * + * For sub prog, the main program's func_info has to + * be loaded first (i.e. prog->func_info != NULL) + */ + err = btf_ext__reloc_func_info(obj->btf, obj->btf_ext, + section_name, insn_offset, + &prog->func_info, + &prog->func_info_cnt); + if (err) + return check_btf_ext_reloc_err(prog, err, + prog->func_info, + "bpf_func_info"); + + prog->func_info_rec_size = btf_ext__func_info_rec_size(obj->btf_ext); + } + + if (!insn_offset || prog->line_info) { + err = btf_ext__reloc_line_info(obj->btf, obj->btf_ext, + section_name, insn_offset, + &prog->line_info, + &prog->line_info_cnt); + if (err) + return check_btf_ext_reloc_err(prog, err, + prog->line_info, + "bpf_line_info"); + + prog->line_info_rec_size = btf_ext__line_info_rec_size(obj->btf_ext); + } + + if (!insn_offset) + prog->btf_fd = btf__fd(obj->btf); + + return 0; +} + +static int bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, struct reloc_desc *relo) { struct bpf_insn *insn, *new_insn; struct bpf_program *text; size_t new_cnt; + int err; if (relo->type != RELO_CALL) return -LIBBPF_ERRNO__RELOC; @@ -1189,6 +1395,15 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, pr_warning("oom in prog realloc\n"); return -ENOMEM; } + + if (obj->btf_ext) { + err = bpf_program_reloc_btf_ext(prog, obj, + text->section_name, + prog->insns_cnt); + if (err) + return err; + } + memcpy(new_insn + prog->insns_cnt, text->insns, text->insns_cnt * sizeof(*insn)); prog->insns = new_insn; @@ -1208,7 +1423,17 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj) { int i, err; - if (!prog || !prog->reloc_desc) + if (!prog) + return 0; + + if (obj->btf_ext) { + err = bpf_program_reloc_btf_ext(prog, obj, + prog->section_name, 0); + if (err) + return err; + } + + if (!prog->reloc_desc) return 0; for (i = 0; i < prog->nr_reloc; i++) { @@ -1296,9 +1521,8 @@ static int bpf_object__collect_reloc(struct bpf_object *obj) } static int -load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type, - const char *name, struct bpf_insn *insns, int insns_cnt, - char *license, __u32 kern_version, int *pfd, int prog_ifindex) +load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, + char *license, __u32 kern_version, int *pfd) { struct bpf_load_program_attr load_attr; char *cp, errmsg[STRERR_BUFSIZE]; @@ -1306,15 +1530,22 @@ load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type, int ret; memset(&load_attr, 0, sizeof(struct bpf_load_program_attr)); - load_attr.prog_type = type; - load_attr.expected_attach_type = expected_attach_type; - load_attr.name = name; + load_attr.prog_type = prog->type; + load_attr.expected_attach_type = prog->expected_attach_type; + if (prog->caps->name) + load_attr.name = prog->name; load_attr.insns = insns; load_attr.insns_cnt = insns_cnt; load_attr.license = license; load_attr.kern_version = kern_version; - load_attr.prog_ifindex = prog_ifindex; - + load_attr.prog_ifindex = prog->prog_ifindex; + load_attr.prog_btf_fd = prog->btf_fd >= 0 ? prog->btf_fd : 0; + load_attr.func_info = prog->func_info; + load_attr.func_info_rec_size = prog->func_info_rec_size; + load_attr.func_info_cnt = prog->func_info_cnt; + load_attr.line_info = prog->line_info; + load_attr.line_info_rec_size = prog->line_info_rec_size; + load_attr.line_info_cnt = prog->line_info_cnt; if (!load_attr.insns || !load_attr.insns_cnt) return -EINVAL; @@ -1394,10 +1625,8 @@ bpf_program__load(struct bpf_program *prog, pr_warning("Program '%s' is inconsistent: nr(%d) != 1\n", prog->section_name, prog->instances.nr); } - err = load_program(prog->type, prog->expected_attach_type, - prog->name, prog->insns, prog->insns_cnt, - license, kern_version, &fd, - prog->prog_ifindex); + err = load_program(prog, prog->insns, prog->insns_cnt, + license, kern_version, &fd); if (!err) prog->instances.fds[0] = fd; goto out; @@ -1425,11 +1654,9 @@ bpf_program__load(struct bpf_program *prog, continue; } - err = load_program(prog->type, prog->expected_attach_type, - prog->name, result.new_insn_ptr, + err = load_program(prog, result.new_insn_ptr, result.new_insn_cnt, - license, kern_version, &fd, - prog->prog_ifindex); + license, kern_version, &fd); if (err) { pr_warning("Loading the %dth instance of program '%s' failed\n", @@ -1495,12 +1722,12 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type) case BPF_PROG_TYPE_LIRC_MODE2: case BPF_PROG_TYPE_SK_REUSEPORT: case BPF_PROG_TYPE_FLOW_DISSECTOR: - return false; case BPF_PROG_TYPE_UNSPEC: - case BPF_PROG_TYPE_KPROBE: case BPF_PROG_TYPE_TRACEPOINT: - case BPF_PROG_TYPE_PERF_EVENT: case BPF_PROG_TYPE_RAW_TRACEPOINT: + case BPF_PROG_TYPE_PERF_EVENT: + return false; + case BPF_PROG_TYPE_KPROBE: default: return true; } @@ -1627,6 +1854,7 @@ int bpf_object__load(struct bpf_object *obj) obj->loaded = true; + CHECK_ERR(bpf_object__probe_caps(obj), err, out); CHECK_ERR(bpf_object__create_maps(obj), err, out); CHECK_ERR(bpf_object__relocate(obj), err, out); CHECK_ERR(bpf_object__load_progs(obj), err, out); @@ -1699,6 +1927,34 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path, return 0; } +int bpf_program__unpin_instance(struct bpf_program *prog, const char *path, + int instance) +{ + int err; + + err = check_path(path); + if (err) + return err; + + if (prog == NULL) { + pr_warning("invalid program pointer\n"); + return -EINVAL; + } + + if (instance < 0 || instance >= prog->instances.nr) { + pr_warning("invalid prog instance %d of prog %s (max %d)\n", + instance, prog->section_name, prog->instances.nr); + return -EINVAL; + } + + err = unlink(path); + if (err != 0) + return -errno; + pr_debug("unpinned program '%s'\n", path); + + return 0; +} + static int make_dir(const char *path) { char *cp, errmsg[STRERR_BUFSIZE]; @@ -1733,6 +1989,11 @@ int bpf_program__pin(struct bpf_program *prog, const char *path) return -EINVAL; } + if (prog->instances.nr == 1) { + /* don't create subdirs when pinning single instance */ + return bpf_program__pin_instance(prog, path, 0); + } + err = make_dir(path); if (err) return err; @@ -1742,16 +2003,83 @@ int bpf_program__pin(struct bpf_program *prog, const char *path) int len; len = snprintf(buf, PATH_MAX, "%s/%d", path, i); + if (len < 0) { + err = -EINVAL; + goto err_unpin; + } else if (len >= PATH_MAX) { + err = -ENAMETOOLONG; + goto err_unpin; + } + + err = bpf_program__pin_instance(prog, buf, i); + if (err) + goto err_unpin; + } + + return 0; + +err_unpin: + for (i = i - 1; i >= 0; i--) { + char buf[PATH_MAX]; + int len; + + len = snprintf(buf, PATH_MAX, "%s/%d", path, i); + if (len < 0) + continue; + else if (len >= PATH_MAX) + continue; + + bpf_program__unpin_instance(prog, buf, i); + } + + rmdir(path); + + return err; +} + +int bpf_program__unpin(struct bpf_program *prog, const char *path) +{ + int i, err; + + err = check_path(path); + if (err) + return err; + + if (prog == NULL) { + pr_warning("invalid program pointer\n"); + return -EINVAL; + } + + if (prog->instances.nr <= 0) { + pr_warning("no instances of prog %s to pin\n", + prog->section_name); + return -EINVAL; + } + + if (prog->instances.nr == 1) { + /* don't create subdirs when pinning single instance */ + return bpf_program__unpin_instance(prog, path, 0); + } + + for (i = 0; i < prog->instances.nr; i++) { + char buf[PATH_MAX]; + int len; + + len = snprintf(buf, PATH_MAX, "%s/%d", path, i); if (len < 0) return -EINVAL; else if (len >= PATH_MAX) return -ENAMETOOLONG; - err = bpf_program__pin_instance(prog, buf, i); + err = bpf_program__unpin_instance(prog, buf, i); if (err) return err; } + err = rmdir(path); + if (err) + return -errno; + return 0; } @@ -1776,12 +2104,33 @@ int bpf_map__pin(struct bpf_map *map, const char *path) } pr_debug("pinned map '%s'\n", path); + return 0; } -int bpf_object__pin(struct bpf_object *obj, const char *path) +int bpf_map__unpin(struct bpf_map *map, const char *path) +{ + int err; + + err = check_path(path); + if (err) + return err; + + if (map == NULL) { + pr_warning("invalid map pointer\n"); + return -EINVAL; + } + + err = unlink(path); + if (err != 0) + return -errno; + pr_debug("unpinned map '%s'\n", path); + + return 0; +} + +int bpf_object__pin_maps(struct bpf_object *obj, const char *path) { - struct bpf_program *prog; struct bpf_map *map; int err; @@ -1803,28 +2152,142 @@ int bpf_object__pin(struct bpf_object *obj, const char *path) len = snprintf(buf, PATH_MAX, "%s/%s", path, bpf_map__name(map)); + if (len < 0) { + err = -EINVAL; + goto err_unpin_maps; + } else if (len >= PATH_MAX) { + err = -ENAMETOOLONG; + goto err_unpin_maps; + } + + err = bpf_map__pin(map, buf); + if (err) + goto err_unpin_maps; + } + + return 0; + +err_unpin_maps: + while ((map = bpf_map__prev(map, obj))) { + char buf[PATH_MAX]; + int len; + + len = snprintf(buf, PATH_MAX, "%s/%s", path, + bpf_map__name(map)); + if (len < 0) + continue; + else if (len >= PATH_MAX) + continue; + + bpf_map__unpin(map, buf); + } + + return err; +} + +int bpf_object__unpin_maps(struct bpf_object *obj, const char *path) +{ + struct bpf_map *map; + int err; + + if (!obj) + return -ENOENT; + + bpf_map__for_each(map, obj) { + char buf[PATH_MAX]; + int len; + + len = snprintf(buf, PATH_MAX, "%s/%s", path, + bpf_map__name(map)); if (len < 0) return -EINVAL; else if (len >= PATH_MAX) return -ENAMETOOLONG; - err = bpf_map__pin(map, buf); + err = bpf_map__unpin(map, buf); if (err) return err; } + return 0; +} + +int bpf_object__pin_programs(struct bpf_object *obj, const char *path) +{ + struct bpf_program *prog; + int err; + + if (!obj) + return -ENOENT; + + if (!obj->loaded) { + pr_warning("object not yet loaded; load it first\n"); + return -ENOENT; + } + + err = make_dir(path); + if (err) + return err; + bpf_object__for_each_program(prog, obj) { char buf[PATH_MAX]; int len; len = snprintf(buf, PATH_MAX, "%s/%s", path, - prog->section_name); + prog->pin_name); + if (len < 0) { + err = -EINVAL; + goto err_unpin_programs; + } else if (len >= PATH_MAX) { + err = -ENAMETOOLONG; + goto err_unpin_programs; + } + + err = bpf_program__pin(prog, buf); + if (err) + goto err_unpin_programs; + } + + return 0; + +err_unpin_programs: + while ((prog = bpf_program__prev(prog, obj))) { + char buf[PATH_MAX]; + int len; + + len = snprintf(buf, PATH_MAX, "%s/%s", path, + prog->pin_name); + if (len < 0) + continue; + else if (len >= PATH_MAX) + continue; + + bpf_program__unpin(prog, buf); + } + + return err; +} + +int bpf_object__unpin_programs(struct bpf_object *obj, const char *path) +{ + struct bpf_program *prog; + int err; + + if (!obj) + return -ENOENT; + + bpf_object__for_each_program(prog, obj) { + char buf[PATH_MAX]; + int len; + + len = snprintf(buf, PATH_MAX, "%s/%s", path, + prog->pin_name); if (len < 0) return -EINVAL; else if (len >= PATH_MAX) return -ENAMETOOLONG; - err = bpf_program__pin(prog, buf); + err = bpf_program__unpin(prog, buf); if (err) return err; } @@ -1832,6 +2295,23 @@ int bpf_object__pin(struct bpf_object *obj, const char *path) return 0; } +int bpf_object__pin(struct bpf_object *obj, const char *path) +{ + int err; + + err = bpf_object__pin_maps(obj, path); + if (err) + return err; + + err = bpf_object__pin_programs(obj, path); + if (err) { + bpf_object__unpin_maps(obj, path); + return err; + } + + return 0; +} + void bpf_object__close(struct bpf_object *obj) { size_t i; @@ -1845,6 +2325,7 @@ void bpf_object__close(struct bpf_object *obj) bpf_object__elf_finish(obj); bpf_object__unload(obj); btf__free(obj->btf); + btf_ext__free(obj->btf_ext); for (i = 0; i < obj->nr_maps; i++) { zfree(&obj->maps[i].name); @@ -1918,23 +2399,26 @@ void *bpf_object__priv(struct bpf_object *obj) } static struct bpf_program * -__bpf_program__next(struct bpf_program *prev, struct bpf_object *obj) +__bpf_program__iter(struct bpf_program *p, struct bpf_object *obj, bool forward) { - size_t idx; + size_t nr_programs = obj->nr_programs; + ssize_t idx; - if (!obj->programs) + if (!nr_programs) return NULL; - /* First handler */ - if (prev == NULL) - return &obj->programs[0]; - if (prev->obj != obj) { + if (!p) + /* Iter from the beginning */ + return forward ? &obj->programs[0] : + &obj->programs[nr_programs - 1]; + + if (p->obj != obj) { pr_warning("error: program handler doesn't match object\n"); return NULL; } - idx = (prev - obj->programs) + 1; - if (idx >= obj->nr_programs) + idx = (p - obj->programs) + (forward ? 1 : -1); + if (idx >= obj->nr_programs || idx < 0) return NULL; return &obj->programs[idx]; } @@ -1945,7 +2429,19 @@ bpf_program__next(struct bpf_program *prev, struct bpf_object *obj) struct bpf_program *prog = prev; do { - prog = __bpf_program__next(prog, obj); + prog = __bpf_program__iter(prog, obj, true); + } while (prog && bpf_program__is_function_storage(prog, obj)); + + return prog; +} + +struct bpf_program * +bpf_program__prev(struct bpf_program *next, struct bpf_object *obj) +{ + struct bpf_program *prog = next; + + do { + prog = __bpf_program__iter(prog, obj, false); } while (prog && bpf_program__is_function_storage(prog, obj)); return prog; @@ -2272,10 +2768,24 @@ void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex) map->map_ifindex = ifindex; } -struct bpf_map * -bpf_map__next(struct bpf_map *prev, struct bpf_object *obj) +int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd) { - size_t idx; + if (!bpf_map_type__is_map_in_map(map->def.type)) { + pr_warning("error: unsupported map type\n"); + return -EINVAL; + } + if (map->inner_map_fd != -1) { + pr_warning("error: inner_map_fd already specified\n"); + return -EINVAL; + } + map->inner_map_fd = fd; + return 0; +} + +static struct bpf_map * +__bpf_map__iter(struct bpf_map *m, struct bpf_object *obj, int i) +{ + ssize_t idx; struct bpf_map *s, *e; if (!obj || !obj->maps) @@ -2284,22 +2794,40 @@ bpf_map__next(struct bpf_map *prev, struct bpf_object *obj) s = obj->maps; e = obj->maps + obj->nr_maps; - if (prev == NULL) - return s; - - if ((prev < s) || (prev >= e)) { + if ((m < s) || (m >= e)) { pr_warning("error in %s: map handler doesn't belong to object\n", __func__); return NULL; } - idx = (prev - obj->maps) + 1; - if (idx >= obj->nr_maps) + idx = (m - obj->maps) + i; + if (idx >= obj->nr_maps || idx < 0) return NULL; return &obj->maps[idx]; } struct bpf_map * +bpf_map__next(struct bpf_map *prev, struct bpf_object *obj) +{ + if (prev == NULL) + return obj->maps; + + return __bpf_map__iter(prev, obj, 1); +} + +struct bpf_map * +bpf_map__prev(struct bpf_map *next, struct bpf_object *obj) +{ + if (next == NULL) { + if (!obj->nr_maps) + return NULL; + return obj->maps + obj->nr_maps - 1; + } + + return __bpf_map__iter(next, obj, -1); +} + +struct bpf_map * bpf_object__find_map_by_name(struct bpf_object *obj, const char *name) { struct bpf_map *pos; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 1f3468dad8b2..5f68d7b75215 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -16,6 +16,10 @@ #include <sys/types.h> // for size_t #include <linux/bpf.h> +#ifdef __cplusplus +extern "C" { +#endif + #ifndef LIBBPF_API #define LIBBPF_API __attribute__((visibility("default"))) #endif @@ -71,6 +75,13 @@ struct bpf_object *__bpf_object__open_xattr(struct bpf_object_open_attr *attr, LIBBPF_API struct bpf_object *bpf_object__open_buffer(void *obj_buf, size_t obj_buf_sz, const char *name); +LIBBPF_API int bpf_object__pin_maps(struct bpf_object *obj, const char *path); +LIBBPF_API int bpf_object__unpin_maps(struct bpf_object *obj, + const char *path); +LIBBPF_API int bpf_object__pin_programs(struct bpf_object *obj, + const char *path); +LIBBPF_API int bpf_object__unpin_programs(struct bpf_object *obj, + const char *path); LIBBPF_API int bpf_object__pin(struct bpf_object *object, const char *path); LIBBPF_API void bpf_object__close(struct bpf_object *object); @@ -112,6 +123,9 @@ LIBBPF_API struct bpf_program *bpf_program__next(struct bpf_program *prog, (pos) != NULL; \ (pos) = bpf_program__next((pos), (obj))) +LIBBPF_API struct bpf_program *bpf_program__prev(struct bpf_program *prog, + struct bpf_object *obj); + typedef void (*bpf_program_clear_priv_t)(struct bpf_program *, void *); @@ -131,7 +145,11 @@ LIBBPF_API int bpf_program__fd(struct bpf_program *prog); LIBBPF_API int bpf_program__pin_instance(struct bpf_program *prog, const char *path, int instance); +LIBBPF_API int bpf_program__unpin_instance(struct bpf_program *prog, + const char *path, + int instance); LIBBPF_API int bpf_program__pin(struct bpf_program *prog, const char *path); +LIBBPF_API int bpf_program__unpin(struct bpf_program *prog, const char *path); LIBBPF_API void bpf_program__unload(struct bpf_program *prog); struct bpf_insn; @@ -260,6 +278,9 @@ bpf_map__next(struct bpf_map *map, struct bpf_object *obj); (pos) != NULL; \ (pos) = bpf_map__next((pos), (obj))) +LIBBPF_API struct bpf_map * +bpf_map__prev(struct bpf_map *map, struct bpf_object *obj); + LIBBPF_API int bpf_map__fd(struct bpf_map *map); LIBBPF_API const struct bpf_map_def *bpf_map__def(struct bpf_map *map); LIBBPF_API const char *bpf_map__name(struct bpf_map *map); @@ -274,6 +295,9 @@ LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd); LIBBPF_API bool bpf_map__is_offload_neutral(struct bpf_map *map); LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path); +LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path); + +LIBBPF_API int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd); LIBBPF_API long libbpf_get_error(const void *ptr); @@ -317,4 +341,22 @@ int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex, libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie); int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle, libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie); + +struct bpf_prog_linfo; +struct bpf_prog_info; + +LIBBPF_API void bpf_prog_linfo__free(struct bpf_prog_linfo *prog_linfo); +LIBBPF_API struct bpf_prog_linfo * +bpf_prog_linfo__new(const struct bpf_prog_info *info); +LIBBPF_API const struct bpf_line_info * +bpf_prog_linfo__lfind_addr_func(const struct bpf_prog_linfo *prog_linfo, + __u64 addr, __u32 func_idx, __u32 nr_skip); +LIBBPF_API const struct bpf_line_info * +bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo, + __u32 insn_off, __u32 nr_skip); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + #endif /* __LIBBPF_LIBBPF_H */ diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map new file mode 100644 index 000000000000..cd02cd4e2cc3 --- /dev/null +++ b/tools/lib/bpf/libbpf.map @@ -0,0 +1,126 @@ +LIBBPF_0.0.1 { + global: + bpf_btf_get_fd_by_id; + bpf_create_map; + bpf_create_map_in_map; + bpf_create_map_in_map_node; + bpf_create_map_name; + bpf_create_map_node; + bpf_create_map_xattr; + bpf_load_btf; + bpf_load_program; + bpf_load_program_xattr; + bpf_map__btf_key_type_id; + bpf_map__btf_value_type_id; + bpf_map__def; + bpf_map__fd; + bpf_map__is_offload_neutral; + bpf_map__name; + bpf_map__next; + bpf_map__pin; + bpf_map__prev; + bpf_map__priv; + bpf_map__reuse_fd; + bpf_map__set_ifindex; + bpf_map__set_inner_map_fd; + bpf_map__set_priv; + bpf_map__unpin; + bpf_map_delete_elem; + bpf_map_get_fd_by_id; + bpf_map_get_next_id; + bpf_map_get_next_key; + bpf_map_lookup_and_delete_elem; + bpf_map_lookup_elem; + bpf_map_update_elem; + bpf_obj_get; + bpf_obj_get_info_by_fd; + bpf_obj_pin; + bpf_object__btf_fd; + bpf_object__close; + bpf_object__find_map_by_name; + bpf_object__find_map_by_offset; + bpf_object__find_program_by_title; + bpf_object__kversion; + bpf_object__load; + bpf_object__name; + bpf_object__next; + bpf_object__open; + bpf_object__open_buffer; + bpf_object__open_xattr; + bpf_object__pin; + bpf_object__pin_maps; + bpf_object__pin_programs; + bpf_object__priv; + bpf_object__set_priv; + bpf_object__unload; + bpf_object__unpin_maps; + bpf_object__unpin_programs; + bpf_perf_event_read_simple; + bpf_prog_attach; + bpf_prog_detach; + bpf_prog_detach2; + bpf_prog_get_fd_by_id; + bpf_prog_get_next_id; + bpf_prog_load; + bpf_prog_load_xattr; + bpf_prog_query; + bpf_prog_test_run; + bpf_prog_test_run_xattr; + bpf_program__fd; + bpf_program__is_kprobe; + bpf_program__is_perf_event; + bpf_program__is_raw_tracepoint; + bpf_program__is_sched_act; + bpf_program__is_sched_cls; + bpf_program__is_socket_filter; + bpf_program__is_tracepoint; + bpf_program__is_xdp; + bpf_program__load; + bpf_program__next; + bpf_program__nth_fd; + bpf_program__pin; + bpf_program__pin_instance; + bpf_program__prev; + bpf_program__priv; + bpf_program__set_expected_attach_type; + bpf_program__set_ifindex; + bpf_program__set_kprobe; + bpf_program__set_perf_event; + bpf_program__set_prep; + bpf_program__set_priv; + bpf_program__set_raw_tracepoint; + bpf_program__set_sched_act; + bpf_program__set_sched_cls; + bpf_program__set_socket_filter; + bpf_program__set_tracepoint; + bpf_program__set_type; + bpf_program__set_xdp; + bpf_program__title; + bpf_program__unload; + bpf_program__unpin; + bpf_program__unpin_instance; + bpf_prog_linfo__free; + bpf_prog_linfo__new; + bpf_prog_linfo__lfind_addr_func; + bpf_prog_linfo__lfind; + bpf_raw_tracepoint_open; + bpf_set_link_xdp_fd; + bpf_task_fd_query; + bpf_verify_program; + btf__fd; + btf__find_by_name; + btf__free; + btf__get_from_id; + btf__name_by_offset; + btf__new; + btf__resolve_size; + btf__resolve_type; + btf__type_by_id; + libbpf_attach_type_by_name; + libbpf_get_error; + libbpf_prog_type_by_name; + libbpf_set_print; + libbpf_strerror; + local: + *; +}; diff --git a/tools/lib/bpf/libbpf_errno.c b/tools/lib/bpf/libbpf_errno.c index d83b17f8435c..4343e40588c6 100644 --- a/tools/lib/bpf/libbpf_errno.c +++ b/tools/lib/bpf/libbpf_errno.c @@ -7,6 +7,7 @@ * Copyright (C) 2017 Nicira, Inc. */ +#undef _GNU_SOURCE #include <stdio.h> #include <string.h> diff --git a/tools/lib/bpf/test_libbpf.cpp b/tools/lib/bpf/test_libbpf.cpp new file mode 100644 index 000000000000..abf3fc25c9fa --- /dev/null +++ b/tools/lib/bpf/test_libbpf.cpp @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#include "libbpf.h" +#include "bpf.h" +#include "btf.h" + +/* do nothing, just make sure we can link successfully */ + +int main(int argc, char *argv[]) +{ + /* libbpf.h */ + libbpf_set_print(NULL, NULL, NULL); + + /* bpf.h */ + bpf_prog_get_fd_by_id(0); + + /* btf.h */ + btf__new(NULL, 0, NULL); +} diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 6dbb9fae0f9d..b8f3cca8e58b 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -31,6 +31,8 @@ #include "elf.h" #include "warn.h" +#define MAX_NAME_LEN 128 + struct section *find_section_by_name(struct elf *elf, const char *name) { struct section *sec; @@ -298,6 +300,8 @@ static int read_symbols(struct elf *elf) /* Create parent/child links for any cold subfunctions */ list_for_each_entry(sec, &elf->sections, list) { list_for_each_entry(sym, &sec->symbol_list, list) { + char pname[MAX_NAME_LEN + 1]; + size_t pnamelen; if (sym->type != STT_FUNC) continue; sym->pfunc = sym->cfunc = sym; @@ -305,14 +309,21 @@ static int read_symbols(struct elf *elf) if (!coldstr) continue; - coldstr[0] = '\0'; - pfunc = find_symbol_by_name(elf, sym->name); - coldstr[0] = '.'; + pnamelen = coldstr - sym->name; + if (pnamelen > MAX_NAME_LEN) { + WARN("%s(): parent function name exceeds maximum length of %d characters", + sym->name, MAX_NAME_LEN); + return -1; + } + + strncpy(pname, sym->name, pnamelen); + pname[pnamelen] = '\0'; + pfunc = find_symbol_by_name(elf, pname); if (!pfunc) { WARN("%s(): can't find parent function", sym->name); - goto err; + return -1; } sym->pfunc = pfunc; diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 236b9b97dfdb..667c14e56031 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -55,7 +55,6 @@ counted. The following modifiers exist: S - read sample value (PERF_SAMPLE_READ) D - pin the event to the PMU W - group is weak and will fallback to non-group if not schedulable, - only supported in 'perf stat' for now. The 'p' modifier can be used for specifying how precise the instruction address should be. The 'p' modifier can be specified multiple times: diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index e30d20fb482d..a0e8c23f9125 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -299,6 +299,11 @@ ifndef NO_BIONIC endif endif +ifeq ($(feature-get_current_dir_name), 1) + CFLAGS += -DHAVE_GET_CURRENT_DIR_NAME +endif + + ifdef NO_LIBELF NO_DWARF := 1 NO_DEMANGLE := 1 diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 3ccb4f0bf088..d95655489f7e 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -387,7 +387,7 @@ SHELL = $(SHELL_PATH) linux_uapi_dir := $(srctree)/tools/include/uapi/linux asm_generic_uapi_dir := $(srctree)/tools/include/uapi/asm-generic -arch_asm_uapi_dir := $(srctree)/tools/arch/$(ARCH)/include/uapi/asm/ +arch_asm_uapi_dir := $(srctree)/tools/arch/$(SRCARCH)/include/uapi/asm/ beauty_outdir := $(OUTPUT)trace/beauty/generated beauty_ioctl_outdir := $(beauty_outdir)/ioctl diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 10cf889c6d75..488779bc4c8d 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -391,7 +391,12 @@ try_again: ui__warning("%s\n", msg); goto try_again; } - + if ((errno == EINVAL || errno == EBADF) && + pos->leader != pos && + pos->weak_group) { + pos = perf_evlist__reset_weak_group(evlist, pos); + goto try_again; + } rc = -errno; perf_evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg)); diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index d1028d7755bb..a635abfa77b6 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -383,32 +383,6 @@ static bool perf_evsel__should_store_id(struct perf_evsel *counter) return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID; } -static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel) -{ - struct perf_evsel *c2, *leader; - bool is_open = true; - - leader = evsel->leader; - pr_debug("Weak group for %s/%d failed\n", - leader->name, leader->nr_members); - - /* - * for_each_group_member doesn't work here because it doesn't - * include the first entry. - */ - evlist__for_each_entry(evsel_list, c2) { - if (c2 == evsel) - is_open = false; - if (c2->leader == leader) { - if (is_open) - perf_evsel__close(c2); - c2->leader = c2; - c2->nr_members = 0; - } - } - return leader; -} - static bool is_target_alive(struct target *_target, struct thread_map *threads) { @@ -477,7 +451,7 @@ try_again: if ((errno == EINVAL || errno == EBADF) && counter->leader != counter && counter->weak_group) { - counter = perf_evsel__reset_weak_group(counter); + counter = perf_evlist__reset_weak_group(evsel_list, counter); goto try_again; } diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index b2838de13de0..aa0c73e57924 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1429,6 +1429,9 @@ int cmd_top(int argc, const char **argv) } } + if (opts->branch_stack && callchain_param.enabled) + symbol_conf.show_branchflag_count = true; + sort__mode = SORT_MODE__TOP; /* display thread wants entries to be collapsed in a different tree */ perf_hpp_list.need_collapse = 1; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index dc8a6c4986ce..835619476370 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -108,6 +108,7 @@ struct trace { } stats; unsigned int max_stack; unsigned int min_stack; + bool raw_augmented_syscalls; bool not_ev_qualifier; bool live; bool full_time; @@ -1724,13 +1725,28 @@ static int trace__fprintf_sample(struct trace *trace, struct perf_evsel *evsel, return printed; } -static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size) +static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size, bool raw_augmented) { void *augmented_args = NULL; + /* + * For now with BPF raw_augmented we hook into raw_syscalls:sys_enter + * and there we get all 6 syscall args plus the tracepoint common + * fields (sizeof(long)) and the syscall_nr (another long). So we check + * if that is the case and if so don't look after the sc->args_size, + * but always after the full raw_syscalls:sys_enter payload, which is + * fixed. + * + * We'll revisit this later to pass s->args_size to the BPF augmenter + * (now tools/perf/examples/bpf/augmented_raw_syscalls.c, so that it + * copies only what we need for each syscall, like what happens when we + * use syscalls:sys_enter_NAME, so that we reduce the kernel/userspace + * traffic to just what is needed for each syscall. + */ + int args_size = raw_augmented ? (8 * (int)sizeof(long)) : sc->args_size; - *augmented_args_size = sample->raw_size - sc->args_size; + *augmented_args_size = sample->raw_size - args_size; if (*augmented_args_size > 0) - augmented_args = sample->raw_data + sc->args_size; + augmented_args = sample->raw_data + args_size; return augmented_args; } @@ -1780,7 +1796,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, * here and avoid using augmented syscalls when the evsel is the raw_syscalls one. */ if (evsel != trace->syscalls.events.sys_enter) - augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size); + augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls); ttrace->entry_time = sample->time; msg = ttrace->entry_str; printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name); @@ -1833,7 +1849,7 @@ static int trace__fprintf_sys_enter(struct trace *trace, struct perf_evsel *evse goto out_put; args = perf_evsel__sc_tp_ptr(evsel, args, sample); - augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size); + augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls); syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread); fprintf(trace->output, "%s", msg); err = 0; @@ -3501,7 +3517,15 @@ int cmd_trace(int argc, const char **argv) evsel->handler = trace__sys_enter; evlist__for_each_entry(trace.evlist, evsel) { + bool raw_syscalls_sys_exit = strcmp(perf_evsel__name(evsel), "raw_syscalls:sys_exit") == 0; + + if (raw_syscalls_sys_exit) { + trace.raw_augmented_syscalls = true; + goto init_augmented_syscall_tp; + } + if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_")) { +init_augmented_syscall_tp: perf_evsel__init_augmented_syscall_tp(evsel); perf_evsel__init_augmented_syscall_tp_ret(evsel); evsel->handler = trace__sys_exit; diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c new file mode 100644 index 000000000000..90a19336310b --- /dev/null +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Augment the raw_syscalls tracepoints with the contents of the pointer arguments. + * + * Test it with: + * + * perf trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c cat /etc/passwd > /dev/null + * + * This exactly matches what is marshalled into the raw_syscall:sys_enter + * payload expected by the 'perf trace' beautifiers. + * + * For now it just uses the existing tracepoint augmentation code in 'perf + * trace', in the next csets we'll hook up these with the sys_enter/sys_exit + * code that will combine entry/exit in a strace like way. + */ + +#include <stdio.h> +#include <linux/socket.h> + +/* bpf-output associated map */ +struct bpf_map SEC("maps") __augmented_syscalls__ = { + .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(u32), + .max_entries = __NR_CPUS__, +}; + +struct syscall_enter_args { + unsigned long long common_tp_fields; + long syscall_nr; + unsigned long args[6]; +}; + +struct syscall_exit_args { + unsigned long long common_tp_fields; + long syscall_nr; + long ret; +}; + +struct augmented_filename { + unsigned int size; + int reserved; + char value[256]; +}; + +#define SYS_OPEN 2 +#define SYS_OPENAT 257 + +SEC("raw_syscalls:sys_enter") +int sys_enter(struct syscall_enter_args *args) +{ + struct { + struct syscall_enter_args args; + struct augmented_filename filename; + } augmented_args; + unsigned int len = sizeof(augmented_args); + const void *filename_arg = NULL; + + probe_read(&augmented_args.args, sizeof(augmented_args.args), args); + /* + * Yonghong and Edward Cree sayz: + * + * https://www.spinics.net/lists/netdev/msg531645.html + * + * >> R0=inv(id=0) R1=inv2 R6=ctx(id=0,off=0,imm=0) R7=inv64 R10=fp0,call_-1 + * >> 10: (bf) r1 = r6 + * >> 11: (07) r1 += 16 + * >> 12: (05) goto pc+2 + * >> 15: (79) r3 = *(u64 *)(r1 +0) + * >> dereference of modified ctx ptr R1 off=16 disallowed + * > Aha, we at least got a different error message this time. + * > And indeed llvm has done that optimisation, rather than the more obvious + * > 11: r3 = *(u64 *)(r1 +16) + * > because it wants to have lots of reads share a single insn. You may be able + * > to defeat that optimisation by adding compiler barriers, idk. Maybe someone + * > with llvm knowledge can figure out how to stop it (ideally, llvm would know + * > when it's generating for bpf backend and not do that). -O0? ¯\_(ツ)_/¯ + * + * The optimization mostly likes below: + * + * br1: + * ... + * r1 += 16 + * goto merge + * br2: + * ... + * r1 += 20 + * goto merge + * merge: + * *(u64 *)(r1 + 0) + * + * The compiler tries to merge common loads. There is no easy way to + * stop this compiler optimization without turning off a lot of other + * optimizations. The easiest way is to add barriers: + * + * __asm__ __volatile__("": : :"memory") + * + * after the ctx memory access to prevent their down stream merging. + */ + switch (augmented_args.args.syscall_nr) { + case SYS_OPEN: filename_arg = (const void *)args->args[0]; + __asm__ __volatile__("": : :"memory"); + break; + case SYS_OPENAT: filename_arg = (const void *)args->args[1]; + break; + } + + if (filename_arg != NULL) { + augmented_args.filename.reserved = 0; + augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, + sizeof(augmented_args.filename.value), + filename_arg); + if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) { + len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size; + len &= sizeof(augmented_args.filename.value) - 1; + } + } else { + len = sizeof(augmented_args.args); + } + + perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len); + return 0; +} + +SEC("raw_syscalls:sys_exit") +int sys_exit(struct syscall_exit_args *args) +{ + return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */ +} + +license(GPL); diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c index ac1bcdc17dae..f7eb63cbbc65 100644 --- a/tools/perf/jvmti/jvmti_agent.c +++ b/tools/perf/jvmti/jvmti_agent.c @@ -125,7 +125,7 @@ perf_get_timestamp(void) } static int -debug_cache_init(void) +create_jit_cache_dir(void) { char str[32]; char *base, *p; @@ -144,8 +144,13 @@ debug_cache_init(void) strftime(str, sizeof(str), JIT_LANG"-jit-%Y%m%d", &tm); - snprintf(jit_path, PATH_MAX - 1, "%s/.debug/", base); - + ret = snprintf(jit_path, PATH_MAX, "%s/.debug/", base); + if (ret >= PATH_MAX) { + warnx("jvmti: cannot generate jit cache dir because %s/.debug/" + " is too long, please check the cwd, JITDUMPDIR, and" + " HOME variables", base); + return -1; + } ret = mkdir(jit_path, 0755); if (ret == -1) { if (errno != EEXIST) { @@ -154,20 +159,32 @@ debug_cache_init(void) } } - snprintf(jit_path, PATH_MAX - 1, "%s/.debug/jit", base); + ret = snprintf(jit_path, PATH_MAX, "%s/.debug/jit", base); + if (ret >= PATH_MAX) { + warnx("jvmti: cannot generate jit cache dir because" + " %s/.debug/jit is too long, please check the cwd," + " JITDUMPDIR, and HOME variables", base); + return -1; + } ret = mkdir(jit_path, 0755); if (ret == -1) { if (errno != EEXIST) { - warn("cannot create jit cache dir %s", jit_path); + warn("jvmti: cannot create jit cache dir %s", jit_path); return -1; } } - snprintf(jit_path, PATH_MAX - 1, "%s/.debug/jit/%s.XXXXXXXX", base, str); - + ret = snprintf(jit_path, PATH_MAX, "%s/.debug/jit/%s.XXXXXXXX", base, str); + if (ret >= PATH_MAX) { + warnx("jvmti: cannot generate jit cache dir because" + " %s/.debug/jit/%s.XXXXXXXX is too long, please check" + " the cwd, JITDUMPDIR, and HOME variables", + base, str); + return -1; + } p = mkdtemp(jit_path); if (p != jit_path) { - warn("cannot create jit cache dir %s", jit_path); + warn("jvmti: cannot create jit cache dir %s", jit_path); return -1; } @@ -228,7 +245,7 @@ void *jvmti_open(void) { char dump_path[PATH_MAX]; struct jitheader header; - int fd; + int fd, ret; FILE *fp; init_arch_timestamp(); @@ -245,12 +262,22 @@ void *jvmti_open(void) memset(&header, 0, sizeof(header)); - debug_cache_init(); + /* + * jitdump file dir + */ + if (create_jit_cache_dir() < 0) + return NULL; /* * jitdump file name */ - scnprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid()); + ret = snprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid()); + if (ret >= PATH_MAX) { + warnx("jvmti: cannot generate jitdump file full path because" + " %s/jit-%i.dump is too long, please check the cwd," + " JITDUMPDIR, and HOME variables", jit_path, getpid()); + return NULL; + } fd = open(dump_path, O_CREAT|O_TRUNC|O_RDWR, 0666); if (fd == -1) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index 24cb0bd56afa..f278ce5ebab7 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -119,6 +119,14 @@ def dsoname(name): return "[kernel]" return name +def findnth(s, sub, n, offs=0): + pos = s.find(sub) + if pos < 0: + return pos + if n <= 1: + return offs + pos + return findnth(s[pos + 1:], sub, n - 1, offs + pos + 1) + # Percent to one decimal place def PercentToOneDP(n, d): @@ -1464,6 +1472,317 @@ class BranchWindow(QMdiSubWindow): else: self.find_bar.NotFound() +# Dialog data item converted and validated using a SQL table + +class SQLTableDialogDataItem(): + + def __init__(self, glb, label, placeholder_text, table_name, match_column, column_name1, column_name2, parent): + self.glb = glb + self.label = label + self.placeholder_text = placeholder_text + self.table_name = table_name + self.match_column = match_column + self.column_name1 = column_name1 + self.column_name2 = column_name2 + self.parent = parent + + self.value = "" + + self.widget = QLineEdit() + self.widget.editingFinished.connect(self.Validate) + self.widget.textChanged.connect(self.Invalidate) + self.red = False + self.error = "" + self.validated = True + + self.last_id = 0 + self.first_time = 0 + self.last_time = 2 ** 64 + if self.table_name == "<timeranges>": + query = QSqlQuery(self.glb.db) + QueryExec(query, "SELECT id, time FROM samples ORDER BY id DESC LIMIT 1") + if query.next(): + self.last_id = int(query.value(0)) + self.last_time = int(query.value(1)) + QueryExec(query, "SELECT time FROM samples WHERE time != 0 ORDER BY id LIMIT 1") + if query.next(): + self.first_time = int(query.value(0)) + if placeholder_text: + placeholder_text += ", between " + str(self.first_time) + " and " + str(self.last_time) + + if placeholder_text: + self.widget.setPlaceholderText(placeholder_text) + + def ValueToIds(self, value): + ids = [] + query = QSqlQuery(self.glb.db) + stmt = "SELECT id FROM " + self.table_name + " WHERE " + self.match_column + " = '" + value + "'" + ret = query.exec_(stmt) + if ret: + while query.next(): + ids.append(str(query.value(0))) + return ids + + def IdBetween(self, query, lower_id, higher_id, order): + QueryExec(query, "SELECT id FROM samples WHERE id > " + str(lower_id) + " AND id < " + str(higher_id) + " ORDER BY id " + order + " LIMIT 1") + if query.next(): + return True, int(query.value(0)) + else: + return False, 0 + + def BinarySearchTime(self, lower_id, higher_id, target_time, get_floor): + query = QSqlQuery(self.glb.db) + while True: + next_id = int((lower_id + higher_id) / 2) + QueryExec(query, "SELECT time FROM samples WHERE id = " + str(next_id)) + if not query.next(): + ok, dbid = self.IdBetween(query, lower_id, next_id, "DESC") + if not ok: + ok, dbid = self.IdBetween(query, next_id, higher_id, "") + if not ok: + return str(higher_id) + next_id = dbid + QueryExec(query, "SELECT time FROM samples WHERE id = " + str(next_id)) + next_time = int(query.value(0)) + if get_floor: + if target_time > next_time: + lower_id = next_id + else: + higher_id = next_id + if higher_id <= lower_id + 1: + return str(higher_id) + else: + if target_time >= next_time: + lower_id = next_id + else: + higher_id = next_id + if higher_id <= lower_id + 1: + return str(lower_id) + + def ConvertRelativeTime(self, val): + print "val ", val + mult = 1 + suffix = val[-2:] + if suffix == "ms": + mult = 1000000 + elif suffix == "us": + mult = 1000 + elif suffix == "ns": + mult = 1 + else: + return val + val = val[:-2].strip() + if not self.IsNumber(val): + return val + val = int(val) * mult + if val >= 0: + val += self.first_time + else: + val += self.last_time + return str(val) + + def ConvertTimeRange(self, vrange): + print "vrange ", vrange + if vrange[0] == "": + vrange[0] = str(self.first_time) + if vrange[1] == "": + vrange[1] = str(self.last_time) + vrange[0] = self.ConvertRelativeTime(vrange[0]) + vrange[1] = self.ConvertRelativeTime(vrange[1]) + print "vrange2 ", vrange + if not self.IsNumber(vrange[0]) or not self.IsNumber(vrange[1]): + return False + print "ok1" + beg_range = max(int(vrange[0]), self.first_time) + end_range = min(int(vrange[1]), self.last_time) + if beg_range > self.last_time or end_range < self.first_time: + return False + print "ok2" + vrange[0] = self.BinarySearchTime(0, self.last_id, beg_range, True) + vrange[1] = self.BinarySearchTime(1, self.last_id + 1, end_range, False) + print "vrange3 ", vrange + return True + + def AddTimeRange(self, value, ranges): + print "value ", value + n = value.count("-") + if n == 1: + pass + elif n == 2: + if value.split("-")[1].strip() == "": + n = 1 + elif n == 3: + n = 2 + else: + return False + pos = findnth(value, "-", n) + vrange = [value[:pos].strip() ,value[pos+1:].strip()] + if self.ConvertTimeRange(vrange): + ranges.append(vrange) + return True + return False + + def InvalidValue(self, value): + self.value = "" + palette = QPalette() + palette.setColor(QPalette.Text,Qt.red) + self.widget.setPalette(palette) + self.red = True + self.error = self.label + " invalid value '" + value + "'" + self.parent.ShowMessage(self.error) + + def IsNumber(self, value): + try: + x = int(value) + except: + x = 0 + return str(x) == value + + def Invalidate(self): + self.validated = False + + def Validate(self): + input_string = self.widget.text() + self.validated = True + if self.red: + palette = QPalette() + self.widget.setPalette(palette) + self.red = False + if not len(input_string.strip()): + self.error = "" + self.value = "" + return + if self.table_name == "<timeranges>": + ranges = [] + for value in [x.strip() for x in input_string.split(",")]: + if not self.AddTimeRange(value, ranges): + return self.InvalidValue(value) + ranges = [("(" + self.column_name1 + " >= " + r[0] + " AND " + self.column_name1 + " <= " + r[1] + ")") for r in ranges] + self.value = " OR ".join(ranges) + elif self.table_name == "<ranges>": + singles = [] + ranges = [] + for value in [x.strip() for x in input_string.split(",")]: + if "-" in value: + vrange = value.split("-") + if len(vrange) != 2 or not self.IsNumber(vrange[0]) or not self.IsNumber(vrange[1]): + return self.InvalidValue(value) + ranges.append(vrange) + else: + if not self.IsNumber(value): + return self.InvalidValue(value) + singles.append(value) + ranges = [("(" + self.column_name1 + " >= " + r[0] + " AND " + self.column_name1 + " <= " + r[1] + ")") for r in ranges] + if len(singles): + ranges.append(self.column_name1 + " IN (" + ",".join(singles) + ")") + self.value = " OR ".join(ranges) + elif self.table_name: + all_ids = [] + for value in [x.strip() for x in input_string.split(",")]: + ids = self.ValueToIds(value) + if len(ids): + all_ids.extend(ids) + else: + return self.InvalidValue(value) + self.value = self.column_name1 + " IN (" + ",".join(all_ids) + ")" + if self.column_name2: + self.value = "( " + self.value + " OR " + self.column_name2 + " IN (" + ",".join(all_ids) + ") )" + else: + self.value = input_string.strip() + self.error = "" + self.parent.ClearMessage() + + def IsValid(self): + if not self.validated: + self.Validate() + if len(self.error): + self.parent.ShowMessage(self.error) + return False + return True + +# Selected branch report creation dialog + +class SelectedBranchDialog(QDialog): + + def __init__(self, glb, parent=None): + super(SelectedBranchDialog, self).__init__(parent) + + self.glb = glb + + self.name = "" + self.where_clause = "" + + self.setWindowTitle("Selected Branches") + self.setMinimumWidth(600) + + items = ( + ("Report name:", "Enter a name to appear in the window title bar", "", "", "", ""), + ("Time ranges:", "Enter time ranges", "<timeranges>", "", "samples.id", ""), + ("CPUs:", "Enter CPUs or ranges e.g. 0,5-6", "<ranges>", "", "cpu", ""), + ("Commands:", "Only branches with these commands will be included", "comms", "comm", "comm_id", ""), + ("PIDs:", "Only branches with these process IDs will be included", "threads", "pid", "thread_id", ""), + ("TIDs:", "Only branches with these thread IDs will be included", "threads", "tid", "thread_id", ""), + ("DSOs:", "Only branches with these DSOs will be included", "dsos", "short_name", "samples.dso_id", "to_dso_id"), + ("Symbols:", "Only branches with these symbols will be included", "symbols", "name", "symbol_id", "to_symbol_id"), + ("Raw SQL clause: ", "Enter a raw SQL WHERE clause", "", "", "", ""), + ) + self.data_items = [SQLTableDialogDataItem(glb, *x, parent=self) for x in items] + + self.grid = QGridLayout() + + for row in xrange(len(self.data_items)): + self.grid.addWidget(QLabel(self.data_items[row].label), row, 0) + self.grid.addWidget(self.data_items[row].widget, row, 1) + + self.status = QLabel() + + self.ok_button = QPushButton("Ok", self) + self.ok_button.setDefault(True) + self.ok_button.released.connect(self.Ok) + self.ok_button.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed) + + self.cancel_button = QPushButton("Cancel", self) + self.cancel_button.released.connect(self.reject) + self.cancel_button.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed) + + self.hbox = QHBoxLayout() + #self.hbox.addStretch() + self.hbox.addWidget(self.status) + self.hbox.addWidget(self.ok_button) + self.hbox.addWidget(self.cancel_button) + + self.vbox = QVBoxLayout() + self.vbox.addLayout(self.grid) + self.vbox.addLayout(self.hbox) + + self.setLayout(self.vbox); + + def Ok(self): + self.name = self.data_items[0].value + if not self.name: + self.ShowMessage("Report name is required") + return + for d in self.data_items: + if not d.IsValid(): + return + for d in self.data_items[1:]: + if len(d.value): + if len(self.where_clause): + self.where_clause += " AND " + self.where_clause += d.value + if len(self.where_clause): + self.where_clause = " AND ( " + self.where_clause + " ) " + else: + self.ShowMessage("No selection") + return + self.accept() + + def ShowMessage(self, msg): + self.status.setText("<font color=#FF0000>" + msg) + + def ClearMessage(self): + self.status.setText("") + # Event list def GetEventList(db): @@ -1656,7 +1975,7 @@ class TableWindow(QMdiSubWindow, ResizeColumnsToContentsBase): def FindDone(self, row): self.find_bar.Idle() if row >= 0: - self.view.setCurrentIndex(self.model.index(row, 0, QModelIndex())) + self.view.setCurrentIndex(self.model.mapFromSource(self.data_model.index(row, 0, QModelIndex()))) else: self.find_bar.NotFound() @@ -1765,6 +2084,149 @@ class WindowMenu(): def setActiveSubWindow(self, nr): self.mdi_area.setActiveSubWindow(self.mdi_area.subWindowList()[nr - 1]) +# Help text + +glb_help_text = """ +<h1>Contents</h1> +<style> +p.c1 { + text-indent: 40px; +} +p.c2 { + text-indent: 80px; +} +} +</style> +<p class=c1><a href=#reports>1. Reports</a></p> +<p class=c2><a href=#callgraph>1.1 Context-Sensitive Call Graph</a></p> +<p class=c2><a href=#allbranches>1.2 All branches</a></p> +<p class=c2><a href=#selectedbranches>1.3 Selected branches</a></p> +<p class=c1><a href=#tables>2. Tables</a></p> +<h1 id=reports>1. Reports</h1> +<h2 id=callgraph>1.1 Context-Sensitive Call Graph</h2> +The result is a GUI window with a tree representing a context-sensitive +call-graph. Expanding a couple of levels of the tree and adjusting column +widths to suit will display something like: +<pre> + Call Graph: pt_example +Call Path Object Count Time(ns) Time(%) Branch Count Branch Count(%) +v- ls + v- 2638:2638 + v- _start ld-2.19.so 1 10074071 100.0 211135 100.0 + |- unknown unknown 1 13198 0.1 1 0.0 + >- _dl_start ld-2.19.so 1 1400980 13.9 19637 9.3 + >- _d_linit_internal ld-2.19.so 1 448152 4.4 11094 5.3 + v-__libc_start_main@plt ls 1 8211741 81.5 180397 85.4 + >- _dl_fixup ld-2.19.so 1 7607 0.1 108 0.1 + >- __cxa_atexit libc-2.19.so 1 11737 0.1 10 0.0 + >- __libc_csu_init ls 1 10354 0.1 10 0.0 + |- _setjmp libc-2.19.so 1 0 0.0 4 0.0 + v- main ls 1 8182043 99.6 180254 99.9 +</pre> +<h3>Points to note:</h3> +<ul> +<li>The top level is a command name (comm)</li> +<li>The next level is a thread (pid:tid)</li> +<li>Subsequent levels are functions</li> +<li>'Count' is the number of calls</li> +<li>'Time' is the elapsed time until the function returns</li> +<li>Percentages are relative to the level above</li> +<li>'Branch Count' is the total number of branches for that function and all functions that it calls +</ul> +<h3>Find</h3> +Ctrl-F displays a Find bar which finds function names by either an exact match or a pattern match. +The pattern matching symbols are ? for any character and * for zero or more characters. +<h2 id=allbranches>1.2 All branches</h2> +The All branches report displays all branches in chronological order. +Not all data is fetched immediately. More records can be fetched using the Fetch bar provided. +<h3>Disassembly</h3> +Open a branch to display disassembly. This only works if: +<ol> +<li>The disassembler is available. Currently, only Intel XED is supported - see <a href=#xed>Intel XED Setup</a></li> +<li>The object code is available. Currently, only the perf build ID cache is searched for object code. +The default directory ~/.debug can be overridden by setting environment variable PERF_BUILDID_DIR. +One exception is kcore where the DSO long name is used (refer dsos_view on the Tables menu), +or alternatively, set environment variable PERF_KCORE to the kcore file name.</li> +</ol> +<h4 id=xed>Intel XED Setup</h4> +To use Intel XED, libxed.so must be present. To build and install libxed.so: +<pre> +git clone https://github.com/intelxed/mbuild.git mbuild +git clone https://github.com/intelxed/xed +cd xed +./mfile.py --share +sudo ./mfile.py --prefix=/usr/local install +sudo ldconfig +</pre> +<h3>Find</h3> +Ctrl-F displays a Find bar which finds substrings by either an exact match or a regular expression match. +Refer to Python documentation for the regular expression syntax. +All columns are searched, but only currently fetched rows are searched. +<h2 id=selectedbranches>1.3 Selected branches</h2> +This is the same as the <a href=#allbranches>All branches</a> report but with the data reduced +by various selection criteria. A dialog box displays available criteria which are AND'ed together. +<h3>1.3.1 Time ranges</h3> +The time ranges hint text shows the total time range. Relative time ranges can also be entered in +ms, us or ns. Also, negative values are relative to the end of trace. Examples: +<pre> + 81073085947329-81073085958238 From 81073085947329 to 81073085958238 + 100us-200us From 100us to 200us + 10ms- From 10ms to the end + -100ns The first 100ns + -10ms- The last 10ms +</pre> +N.B. Due to the granularity of timestamps, there could be no branches in any given time range. +<h1 id=tables>2. Tables</h1> +The Tables menu shows all tables and views in the database. Most tables have an associated view +which displays the information in a more friendly way. Not all data for large tables is fetched +immediately. More records can be fetched using the Fetch bar provided. Columns can be sorted, +but that can be slow for large tables. +<p>There are also tables of database meta-information. +For SQLite3 databases, the sqlite_master table is included. +For PostgreSQL databases, information_schema.tables/views/columns are included. +<h3>Find</h3> +Ctrl-F displays a Find bar which finds substrings by either an exact match or a regular expression match. +Refer to Python documentation for the regular expression syntax. +All columns are searched, but only currently fetched rows are searched. +<p>N.B. Results are found in id order, so if the table is re-ordered, find-next and find-previous +will go to the next/previous result in id order, instead of display order. +""" + +# Help window + +class HelpWindow(QMdiSubWindow): + + def __init__(self, glb, parent=None): + super(HelpWindow, self).__init__(parent) + + self.text = QTextBrowser() + self.text.setHtml(glb_help_text) + self.text.setReadOnly(True) + self.text.setOpenExternalLinks(True) + + self.setWidget(self.text) + + AddSubWindow(glb.mainwindow.mdi_area, self, "Exported SQL Viewer Help") + +# Main window that only displays the help text + +class HelpOnlyWindow(QMainWindow): + + def __init__(self, parent=None): + super(HelpOnlyWindow, self).__init__(parent) + + self.setMinimumSize(200, 100) + self.resize(800, 600) + self.setWindowTitle("Exported SQL Viewer Help") + self.setWindowIcon(self.style().standardIcon(QStyle.SP_MessageBoxInformation)) + + self.text = QTextBrowser() + self.text.setHtml(glb_help_text) + self.text.setReadOnly(True) + self.text.setOpenExternalLinks(True) + + self.setCentralWidget(self.text) + # Font resize def ResizeFont(widget, diff): @@ -1851,6 +2313,9 @@ class MainWindow(QMainWindow): self.window_menu = WindowMenu(self.mdi_area, menu) + help_menu = menu.addMenu("&Help") + help_menu.addAction(CreateAction("&Exported SQL Viewer Help", "Helpful information", self.Help, self, QKeySequence.HelpContents)) + def Find(self): win = self.mdi_area.activeSubWindow() if win: @@ -1888,6 +2353,8 @@ class MainWindow(QMainWindow): if event == "branches": label = "All branches" if branches_events == 1 else "All branches " + "(id=" + dbid + ")" reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda x=dbid: self.NewBranchView(x), self)) + label = "Selected branches" if branches_events == 1 else "Selected branches " + "(id=" + dbid + ")" + reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda x=dbid: self.NewSelectedBranchView(x), self)) def TableMenu(self, tables, menu): table_menu = menu.addMenu("&Tables") @@ -1900,9 +2367,18 @@ class MainWindow(QMainWindow): def NewBranchView(self, event_id): BranchWindow(self.glb, event_id, "", "", self) + def NewSelectedBranchView(self, event_id): + dialog = SelectedBranchDialog(self.glb, self) + ret = dialog.exec_() + if ret: + BranchWindow(self.glb, event_id, dialog.name, dialog.where_clause, self) + def NewTableView(self, table_name): TableWindow(self.glb, table_name, self) + def Help(self): + HelpWindow(self.glb, self) + # XED Disassembler class xed_state_t(Structure): @@ -1929,7 +2405,12 @@ class XEDInstruction(): class LibXED(): def __init__(self): - self.libxed = CDLL("libxed.so") + try: + self.libxed = CDLL("libxed.so") + except: + self.libxed = None + if not self.libxed: + self.libxed = CDLL("/usr/local/lib/libxed.so") self.xed_tables_init = self.libxed.xed_tables_init self.xed_tables_init.restype = None @@ -2097,10 +2578,16 @@ class DBRef(): def Main(): if (len(sys.argv) < 2): - print >> sys.stderr, "Usage is: exported-sql-viewer.py <database name>" + print >> sys.stderr, "Usage is: exported-sql-viewer.py {<database name> | --help-only}" raise Exception("Too few arguments") dbname = sys.argv[1] + if dbname == "--help-only": + app = QApplication(sys.argv) + mainwindow = HelpOnlyWindow() + mainwindow.show() + err = app.exec_() + sys.exit(err) is_sqlite3 = False try: diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record index 37940665f736..efd0157b9d22 100644 --- a/tools/perf/tests/attr/base-record +++ b/tools/perf/tests/attr/base-record @@ -9,7 +9,7 @@ size=112 config=0 sample_period=* sample_type=263 -read_format=0 +read_format=0|4 disabled=1 inherit=1 pinned=0 diff --git a/tools/perf/tests/attr/test-record-group-sampling b/tools/perf/tests/attr/test-record-group-sampling index 8a33ca4f9e1f..f0729c454f16 100644 --- a/tools/perf/tests/attr/test-record-group-sampling +++ b/tools/perf/tests/attr/test-record-group-sampling @@ -37,4 +37,3 @@ sample_freq=0 sample_period=0 freq=0 write_backward=0 -sample_id_all=0 diff --git a/tools/perf/trace/beauty/ioctl.c b/tools/perf/trace/beauty/ioctl.c index 5d2a7fd8d407..eae59ad15ce3 100644 --- a/tools/perf/trace/beauty/ioctl.c +++ b/tools/perf/trace/beauty/ioctl.c @@ -31,6 +31,7 @@ static size_t ioctl__scnprintf_tty_cmd(int nr, int dir, char *bf, size_t size) "TCSETSW2", "TCSETSF2", "TIOCGRS48", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK", "TIOCGDEV", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG", "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", [_IOC_NR(TIOCGEXCL)] = "TIOCGEXCL", "TIOCGPTPEER", + "TIOCGISO7816", "TIOCSISO7816", [_IOC_NR(FIONCLEX)] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG", "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS", "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI", diff --git a/tools/perf/util/Build b/tools/perf/util/Build index ecd9f9ceda77..b7bf201fe8a8 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -10,6 +10,7 @@ libperf-y += evlist.o libperf-y += evsel.o libperf-y += evsel_fprintf.o libperf-y += find_bit.o +libperf-y += get_current_dir_name.o libperf-y += kallsyms.o libperf-y += levenshtein.o libperf-y += llvm-utils.o diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index e88e6f9b1463..668d2a9ef0f4 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1810,3 +1810,30 @@ void perf_evlist__force_leader(struct perf_evlist *evlist) leader->forced_leader = true; } } + +struct perf_evsel *perf_evlist__reset_weak_group(struct perf_evlist *evsel_list, + struct perf_evsel *evsel) +{ + struct perf_evsel *c2, *leader; + bool is_open = true; + + leader = evsel->leader; + pr_debug("Weak group for %s/%d failed\n", + leader->name, leader->nr_members); + + /* + * for_each_group_member doesn't work here because it doesn't + * include the first entry. + */ + evlist__for_each_entry(evsel_list, c2) { + if (c2 == evsel) + is_open = false; + if (c2->leader == leader) { + if (is_open) + perf_evsel__close(c2); + c2->leader = c2; + c2->nr_members = 0; + } + } + return leader; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index dc66436add98..9919eed6d15b 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -312,4 +312,7 @@ bool perf_evlist__exclude_kernel(struct perf_evlist *evlist); void perf_evlist__force_leader(struct perf_evlist *evlist); +struct perf_evsel *perf_evlist__reset_weak_group(struct perf_evlist *evlist, + struct perf_evsel *evsel); + #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 6d187059a373..dbc0466db368 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -956,7 +956,6 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, attr->sample_freq = 0; attr->sample_period = 0; attr->write_backward = 0; - attr->sample_id_all = 0; } if (opts->no_samples) @@ -1093,7 +1092,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, attr->exclude_user = 1; } - if (evsel->own_cpus) + if (evsel->own_cpus || evsel->unit) evsel->attr.read_format |= PERF_FORMAT_ID; /* diff --git a/tools/perf/util/get_current_dir_name.c b/tools/perf/util/get_current_dir_name.c new file mode 100644 index 000000000000..267aa609a582 --- /dev/null +++ b/tools/perf/util/get_current_dir_name.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2018, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> +// +#ifndef HAVE_GET_CURRENT_DIR_NAME +#include "util.h" +#include <unistd.h> +#include <stdlib.h> +#include <stdlib.h> + +/* Android's 'bionic' library, for one, doesn't have this */ + +char *get_current_dir_name(void) +{ + char pwd[PATH_MAX]; + + return getcwd(pwd, sizeof(pwd)) == NULL ? NULL : strdup(pwd); +} +#endif // HAVE_GET_CURRENT_DIR_NAME diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 58f6a9ceb590..4503f3ca45ab 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -1474,6 +1474,8 @@ static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder) decoder->have_calc_cyc_to_tsc = false; intel_pt_calc_cyc_to_tsc(decoder, true); } + + intel_pt_log_to("Setting timestamp", decoder->timestamp); } static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder) @@ -1514,6 +1516,8 @@ static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder) decoder->timestamp = timestamp; decoder->timestamp_insn_cnt = 0; + + intel_pt_log_to("Setting timestamp", decoder->timestamp); } /* Walk PSB+ packets when already in sync. */ diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.c b/tools/perf/util/intel-pt-decoder/intel-pt-log.c index e02bc7b166a0..5e64da270f97 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-log.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.c @@ -31,6 +31,11 @@ static FILE *f; static char log_name[MAX_LOG_NAME]; bool intel_pt_enable_logging; +void *intel_pt_log_fp(void) +{ + return f; +} + void intel_pt_log_enable(void) { intel_pt_enable_logging = true; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.h b/tools/perf/util/intel-pt-decoder/intel-pt-log.h index 45b64f93f358..cc084937f701 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-log.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.h @@ -22,6 +22,7 @@ struct intel_pt_pkt; +void *intel_pt_log_fp(void); void intel_pt_log_enable(void); void intel_pt_log_disable(void); void intel_pt_log_set_name(const char *name); diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 86cc9a64e982..149ff361ca78 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -206,6 +206,16 @@ static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf, intel_pt_dump(pt, buf, len); } +static void intel_pt_log_event(union perf_event *event) +{ + FILE *f = intel_pt_log_fp(); + + if (!intel_pt_enable_logging || !f) + return; + + perf_event__fprintf(event, f); +} + static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a, struct auxtrace_buffer *b) { @@ -2010,9 +2020,9 @@ static int intel_pt_process_event(struct perf_session *session, event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) err = intel_pt_context_switch(pt, event, sample); - intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n", - perf_event__name(event->header.type), event->header.type, - sample->cpu, sample->time, timestamp); + intel_pt_log("event %u: cpu %d time %"PRIu64" tsc %#"PRIx64" ", + event->header.type, sample->cpu, sample->time, timestamp); + intel_pt_log_event(event); return err; } diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c index cf8bd123cf73..aed170bd4384 100644 --- a/tools/perf/util/namespaces.c +++ b/tools/perf/util/namespaces.c @@ -18,6 +18,7 @@ #include <stdio.h> #include <string.h> #include <unistd.h> +#include <asm/bug.h> struct namespaces *namespaces__new(struct namespaces_event *event) { @@ -186,6 +187,7 @@ void nsinfo__mountns_enter(struct nsinfo *nsi, char curpath[PATH_MAX]; int oldns = -1; int newns = -1; + char *oldcwd = NULL; if (nc == NULL) return; @@ -199,9 +201,13 @@ void nsinfo__mountns_enter(struct nsinfo *nsi, if (snprintf(curpath, PATH_MAX, "/proc/self/ns/mnt") >= PATH_MAX) return; + oldcwd = get_current_dir_name(); + if (!oldcwd) + return; + oldns = open(curpath, O_RDONLY); if (oldns < 0) - return; + goto errout; newns = open(nsi->mntns_path, O_RDONLY); if (newns < 0) @@ -210,11 +216,13 @@ void nsinfo__mountns_enter(struct nsinfo *nsi, if (setns(newns, CLONE_NEWNS) < 0) goto errout; + nc->oldcwd = oldcwd; nc->oldns = oldns; nc->newns = newns; return; errout: + free(oldcwd); if (oldns > -1) close(oldns); if (newns > -1) @@ -223,11 +231,16 @@ errout: void nsinfo__mountns_exit(struct nscookie *nc) { - if (nc == NULL || nc->oldns == -1 || nc->newns == -1) + if (nc == NULL || nc->oldns == -1 || nc->newns == -1 || !nc->oldcwd) return; setns(nc->oldns, CLONE_NEWNS); + if (nc->oldcwd) { + WARN_ON_ONCE(chdir(nc->oldcwd)); + zfree(&nc->oldcwd); + } + if (nc->oldns > -1) { close(nc->oldns); nc->oldns = -1; diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h index cae1a9a39722..d5f46c09ea31 100644 --- a/tools/perf/util/namespaces.h +++ b/tools/perf/util/namespaces.h @@ -38,6 +38,7 @@ struct nsinfo { struct nscookie { int oldns; int newns; + char *oldcwd; }; int nsinfo__init(struct nsinfo *nsi); diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 7799788f662f..7e49baad304d 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -773,7 +773,7 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) if (!is_arm_pmu_core(name)) { pname = pe->pmu ? pe->pmu : "cpu"; - if (strncmp(pname, name, strlen(pname))) + if (strcmp(pname, name)) continue; } diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 14508ee7707a..ece040b799f6 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -59,6 +59,10 @@ int fetch_kernel_version(unsigned int *puint, const char *perf_tip(const char *dirpath); +#ifndef HAVE_GET_CURRENT_DIR_NAME +char *get_current_dir_name(void); +#endif + #ifndef HAVE_SCHED_GETCPU_SUPPORT int sched_getcpu(void); #endif diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index 1dd5f4fcffd5..db66a952c173 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -129,7 +129,7 @@ WARNINGS += $(call cc-supports,-Wno-pointer-sign) WARNINGS += $(call cc-supports,-Wdeclaration-after-statement) WARNINGS += -Wshadow -CFLAGS += -DVERSION=\"$(VERSION)\" -DPACKAGE=\"$(PACKAGE)\" \ +override CFLAGS += -DVERSION=\"$(VERSION)\" -DPACKAGE=\"$(PACKAGE)\" \ -DPACKAGE_BUGREPORT=\"$(PACKAGE_BUGREPORT)\" -D_GNU_SOURCE UTIL_OBJS = utils/helpers/amd.o utils/helpers/msr.o \ @@ -156,12 +156,12 @@ LIB_SRC = lib/cpufreq.c lib/cpupower.c lib/cpuidle.c LIB_OBJS = lib/cpufreq.o lib/cpupower.o lib/cpuidle.o LIB_OBJS := $(addprefix $(OUTPUT),$(LIB_OBJS)) -CFLAGS += -pipe +override CFLAGS += -pipe ifeq ($(strip $(NLS)),true) INSTALL_NLS += install-gmo COMPILE_NLS += create-gmo - CFLAGS += -DNLS + override CFLAGS += -DNLS endif ifeq ($(strip $(CPUFREQ_BENCH)),true) @@ -175,7 +175,7 @@ ifeq ($(strip $(STATIC)),true) UTIL_SRC += $(LIB_SRC) endif -CFLAGS += $(WARNINGS) +override CFLAGS += $(WARNINGS) ifeq ($(strip $(V)),false) QUIET=@ @@ -188,10 +188,10 @@ export QUIET ECHO # if DEBUG is enabled, then we do not strip or optimize ifeq ($(strip $(DEBUG)),true) - CFLAGS += -O1 -g -DDEBUG + override CFLAGS += -O1 -g -DDEBUG STRIPCMD = /bin/true -Since_we_are_debugging else - CFLAGS += $(OPTIMIZATION) -fomit-frame-pointer + override CFLAGS += $(OPTIMIZATION) -fomit-frame-pointer STRIPCMD = $(STRIP) -s --remove-section=.note --remove-section=.comment endif diff --git a/tools/power/cpupower/bench/Makefile b/tools/power/cpupower/bench/Makefile index d79ab161cc75..f68b4bc55273 100644 --- a/tools/power/cpupower/bench/Makefile +++ b/tools/power/cpupower/bench/Makefile @@ -9,7 +9,7 @@ endif ifeq ($(strip $(STATIC)),true) LIBS = -L../ -L$(OUTPUT) -lm OBJS = $(OUTPUT)main.o $(OUTPUT)parse.o $(OUTPUT)system.o $(OUTPUT)benchmark.o \ - $(OUTPUT)../lib/cpufreq.o $(OUTPUT)../lib/sysfs.o + $(OUTPUT)../lib/cpufreq.o $(OUTPUT)../lib/cpupower.o else LIBS = -L../ -L$(OUTPUT) -lm -lcpupower OBJS = $(OUTPUT)main.o $(OUTPUT)parse.o $(OUTPUT)system.o $(OUTPUT)benchmark.o diff --git a/tools/power/cpupower/debug/x86_64/Makefile b/tools/power/cpupower/debug/x86_64/Makefile index 59af84b8ef45..b1b6c43644e7 100644 --- a/tools/power/cpupower/debug/x86_64/Makefile +++ b/tools/power/cpupower/debug/x86_64/Makefile @@ -13,10 +13,10 @@ INSTALL = /usr/bin/install default: all $(OUTPUT)centrino-decode: ../i386/centrino-decode.c - $(CC) $(CFLAGS) -o $@ $< + $(CC) $(CFLAGS) -o $@ $(LDFLAGS) $< $(OUTPUT)powernow-k8-decode: ../i386/powernow-k8-decode.c - $(CC) $(CFLAGS) -o $@ $< + $(CC) $(CFLAGS) -o $@ $(LDFLAGS) $< all: $(OUTPUT)centrino-decode $(OUTPUT)powernow-k8-decode diff --git a/tools/power/cpupower/lib/cpufreq.c b/tools/power/cpupower/lib/cpufreq.c index 1b993fe1ce23..0c0f3e3f0d80 100644 --- a/tools/power/cpupower/lib/cpufreq.c +++ b/tools/power/cpupower/lib/cpufreq.c @@ -28,7 +28,7 @@ static unsigned int sysfs_cpufreq_read_file(unsigned int cpu, const char *fname, snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpufreq/%s", cpu, fname); - return sysfs_read_file(path, buf, buflen); + return cpupower_read_sysfs(path, buf, buflen); } /* helper function to write a new value to a /sys file */ diff --git a/tools/power/cpupower/lib/cpuidle.c b/tools/power/cpupower/lib/cpuidle.c index 9bd4c7655fdb..852d25462388 100644 --- a/tools/power/cpupower/lib/cpuidle.c +++ b/tools/power/cpupower/lib/cpuidle.c @@ -319,7 +319,7 @@ static unsigned int sysfs_cpuidle_read_file(const char *fname, char *buf, snprintf(path, sizeof(path), PATH_TO_CPU "cpuidle/%s", fname); - return sysfs_read_file(path, buf, buflen); + return cpupower_read_sysfs(path, buf, buflen); } diff --git a/tools/power/cpupower/lib/cpupower.c b/tools/power/cpupower/lib/cpupower.c index 9c395ec924de..9711d628b0f4 100644 --- a/tools/power/cpupower/lib/cpupower.c +++ b/tools/power/cpupower/lib/cpupower.c @@ -15,7 +15,7 @@ #include "cpupower.h" #include "cpupower_intern.h" -unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen) +unsigned int cpupower_read_sysfs(const char *path, char *buf, size_t buflen) { int fd; ssize_t numread; @@ -95,7 +95,7 @@ static int sysfs_topology_read_file(unsigned int cpu, const char *fname, int *re snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/topology/%s", cpu, fname); - if (sysfs_read_file(path, linebuf, MAX_LINE_LEN) == 0) + if (cpupower_read_sysfs(path, linebuf, MAX_LINE_LEN) == 0) return -1; *result = strtol(linebuf, &endp, 0); if (endp == linebuf || errno == ERANGE) diff --git a/tools/power/cpupower/lib/cpupower_intern.h b/tools/power/cpupower/lib/cpupower_intern.h index 92affdfbe417..4887c76d23f8 100644 --- a/tools/power/cpupower/lib/cpupower_intern.h +++ b/tools/power/cpupower/lib/cpupower_intern.h @@ -3,4 +3,4 @@ #define MAX_LINE_LEN 4096 #define SYSFS_PATH_MAX 255 -unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen); +unsigned int cpupower_read_sysfs(const char *path, char *buf, size_t buflen); diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 9527d47a1070..6c16ac36d482 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -15,6 +15,7 @@ #include <linux/dma-mapping.h> #include <linux/workqueue.h> #include <linux/libnvdimm.h> +#include <linux/genalloc.h> #include <linux/vmalloc.h> #include <linux/device.h> #include <linux/module.h> @@ -140,8 +141,8 @@ static u32 handle[] = { [6] = NFIT_DIMM_HANDLE(1, 0, 0, 0, 1), }; -static unsigned long dimm_fail_cmd_flags[NUM_DCR]; -static int dimm_fail_cmd_code[NUM_DCR]; +static unsigned long dimm_fail_cmd_flags[ARRAY_SIZE(handle)]; +static int dimm_fail_cmd_code[ARRAY_SIZE(handle)]; static const struct nd_intel_smart smart_def = { .flags = ND_INTEL_SMART_HEALTH_VALID @@ -205,7 +206,7 @@ struct nfit_test { unsigned long deadline; spinlock_t lock; } ars_state; - struct device *dimm_dev[NUM_DCR]; + struct device *dimm_dev[ARRAY_SIZE(handle)]; struct nd_intel_smart *smart; struct nd_intel_smart_threshold *smart_threshold; struct badrange badrange; @@ -215,6 +216,8 @@ struct nfit_test { static struct workqueue_struct *nfit_wq; +static struct gen_pool *nfit_pool; + static struct nfit_test *to_nfit_test(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); @@ -1132,6 +1135,9 @@ static void release_nfit_res(void *data) list_del(&nfit_res->list); spin_unlock(&nfit_test_lock); + if (resource_size(&nfit_res->res) >= DIMM_SIZE) + gen_pool_free(nfit_pool, nfit_res->res.start, + resource_size(&nfit_res->res)); vfree(nfit_res->buf); kfree(nfit_res); } @@ -1144,7 +1150,7 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma, GFP_KERNEL); int rc; - if (!buf || !nfit_res) + if (!buf || !nfit_res || !*dma) goto err; rc = devm_add_action(dev, release_nfit_res, nfit_res); if (rc) @@ -1164,6 +1170,8 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma, return nfit_res->buf; err: + if (*dma && size >= DIMM_SIZE) + gen_pool_free(nfit_pool, *dma, size); if (buf) vfree(buf); kfree(nfit_res); @@ -1172,9 +1180,16 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma, static void *test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma) { + struct genpool_data_align data = { + .align = SZ_128M, + }; void *buf = vmalloc(size); - *dma = (unsigned long) buf; + if (size >= DIMM_SIZE) + *dma = gen_pool_alloc_algo(nfit_pool, size, + gen_pool_first_fit_align, &data); + else + *dma = (unsigned long) buf; return __test_alloc(t, size, dma, buf); } @@ -2680,7 +2695,7 @@ static int nfit_test_probe(struct platform_device *pdev) u32 nfit_handle = __to_nfit_memdev(nfit_mem)->device_handle; int i; - for (i = 0; i < NUM_DCR; i++) + for (i = 0; i < ARRAY_SIZE(handle); i++) if (nfit_handle == handle[i]) dev_set_drvdata(nfit_test->dimm_dev[i], nfit_mem); @@ -2839,6 +2854,17 @@ static __init int nfit_test_init(void) goto err_register; } + nfit_pool = gen_pool_create(ilog2(SZ_4M), NUMA_NO_NODE); + if (!nfit_pool) { + rc = -ENOMEM; + goto err_register; + } + + if (gen_pool_add(nfit_pool, SZ_4G, SZ_4G, NUMA_NO_NODE)) { + rc = -ENOMEM; + goto err_register; + } + for (i = 0; i < NUM_NFITS; i++) { struct nfit_test *nfit_test; struct platform_device *pdev; @@ -2894,6 +2920,9 @@ static __init int nfit_test_init(void) return 0; err_register: + if (nfit_pool) + gen_pool_destroy(nfit_pool); + destroy_workqueue(nfit_wq); for (i = 0; i < NUM_NFITS; i++) if (instances[i]) @@ -2917,6 +2946,8 @@ static __exit void nfit_test_exit(void) platform_driver_unregister(&nfit_test_driver); nfit_test_teardown(); + gen_pool_destroy(nfit_pool); + for (i = 0; i < NUM_NFITS; i++) put_device(&instances[i]->pdev.dev); class_destroy(nfit_test_dimm); diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index f1fe492c8e17..f0017c831e57 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -24,6 +24,7 @@ TARGETS += memory-hotplug TARGETS += mount TARGETS += mqueue TARGETS += net +TARGETS += netfilter TARGETS += nsfs TARGETS += powerpc TARGETS += proc diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 4a54236475ae..73aa6d8f4a2f 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -24,12 +24,13 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \ test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \ test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \ - test_netcnt + test_netcnt test_tcpnotify_user TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \ test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \ + test_tcpnotify_kern.o \ sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \ sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \ test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \ @@ -38,7 +39,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \ test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o \ test_sk_lookup_kern.o test_xdp_vlan.o test_queue_map.o test_stack_map.o \ - xdp_dummy.o + xdp_dummy.o test_map_in_map.o # Order correspond to 'make run_tests' order TEST_PROGS := test_kmod.sh \ @@ -75,6 +76,7 @@ $(OUTPUT)/test_sock_addr: cgroup_helpers.c $(OUTPUT)/test_socket_cookie: cgroup_helpers.c $(OUTPUT)/test_sockmap: cgroup_helpers.c $(OUTPUT)/test_tcpbpf_user: cgroup_helpers.c +$(OUTPUT)/test_tcpnotify_user: cgroup_helpers.c trace_helpers.c $(OUTPUT)/test_progs: trace_helpers.c $(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c @@ -125,7 +127,14 @@ $(OUTPUT)/test_stack_map.o: test_queue_stack_map.h BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris) BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF) BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm') +BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \ + $(CLANG) -target bpf -O2 -g -c -x c - -o ./llvm_btf_verify.o; \ + readelf -S ./llvm_btf_verify.o | grep BTF; \ + /bin/rm -f ./llvm_btf_verify.o) +ifneq ($(BTF_LLVM_PROBE),) + CLANG_FLAGS += -g +else ifneq ($(BTF_LLC_PROBE),) ifneq ($(BTF_PAHOLE_PROBE),) ifneq ($(BTF_OBJCOPY_PROBE),) @@ -135,6 +144,17 @@ ifneq ($(BTF_OBJCOPY_PROBE),) endif endif endif +endif + +# Have one program compiled without "-target bpf" to test whether libbpf loads +# it successfully +$(OUTPUT)/test_xdp.o: test_xdp.c + $(CLANG) $(CLANG_FLAGS) \ + -O2 -emit-llvm -c $< -o - | \ + $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@ +ifeq ($(DWARF2BTF),y) + $(BTF_PAHOLE) -J $@ +endif $(OUTPUT)/%.o: %.c $(CLANG) $(CLANG_FLAGS) \ diff --git a/tools/testing/selftests/bpf/bpf_flow.c b/tools/testing/selftests/bpf/bpf_flow.c index 107350a7821d..b9798f558ca7 100644 --- a/tools/testing/selftests/bpf/bpf_flow.c +++ b/tools/testing/selftests/bpf/bpf_flow.c @@ -116,7 +116,7 @@ static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto) return BPF_DROP; } -SEC("dissect") +SEC("flow_dissector") int _dissect(struct __sk_buff *skb) { if (!skb->vlan_present) diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 686e57ce40f4..6c77cf7bedce 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -113,6 +113,8 @@ static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) = (void *) BPF_FUNC_msg_pull_data; static int (*bpf_msg_push_data)(void *ctx, int start, int end, int flags) = (void *) BPF_FUNC_msg_push_data; +static int (*bpf_msg_pop_data)(void *ctx, int start, int cut, int flags) = + (void *) BPF_FUNC_msg_pop_data; static int (*bpf_bind)(void *ctx, void *addr, int addr_len) = (void *) BPF_FUNC_bind; static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) = @@ -154,12 +156,12 @@ static unsigned long long (*bpf_skb_ancestor_cgroup_id)(void *ctx, int level) = (void *) BPF_FUNC_skb_ancestor_cgroup_id; static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx, struct bpf_sock_tuple *tuple, - int size, unsigned int netns_id, + int size, unsigned long long netns_id, unsigned long long flags) = (void *) BPF_FUNC_sk_lookup_tcp; static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx, struct bpf_sock_tuple *tuple, - int size, unsigned int netns_id, + int size, unsigned long long netns_id, unsigned long long flags) = (void *) BPF_FUNC_sk_lookup_udp; static int (*bpf_sk_release)(struct bpf_sock *sk) = @@ -168,6 +170,8 @@ static int (*bpf_skb_vlan_push)(void *ctx, __be16 vlan_proto, __u16 vlan_tci) = (void *) BPF_FUNC_skb_vlan_push; static int (*bpf_skb_vlan_pop)(void *ctx) = (void *) BPF_FUNC_skb_vlan_pop; +static int (*bpf_rc_pointer_rel)(void *ctx, int rel_x, int rel_y) = + (void *) BPF_FUNC_rc_pointer_rel; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 7f90d3645af8..37f947ec44ed 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -22,3 +22,4 @@ CONFIG_NET_CLS_FLOWER=m CONFIG_LWTUNNEL=y CONFIG_BPF_STREAM_PARSER=y CONFIG_XDP_SOCKETS=y +CONFIG_FTRACE_SYSCALLS=y diff --git a/tools/testing/selftests/bpf/connect4_prog.c b/tools/testing/selftests/bpf/connect4_prog.c index 5a88a681d2ab..b8395f3c43e9 100644 --- a/tools/testing/selftests/bpf/connect4_prog.c +++ b/tools/testing/selftests/bpf/connect4_prog.c @@ -21,23 +21,48 @@ int _version SEC("version") = 1; SEC("cgroup/connect4") int connect_v4_prog(struct bpf_sock_addr *ctx) { + struct bpf_sock_tuple tuple = {}; struct sockaddr_in sa; + struct bpf_sock *sk; + + /* Verify that new destination is available. */ + memset(&tuple.ipv4.saddr, 0, sizeof(tuple.ipv4.saddr)); + memset(&tuple.ipv4.sport, 0, sizeof(tuple.ipv4.sport)); + + tuple.ipv4.daddr = bpf_htonl(DST_REWRITE_IP4); + tuple.ipv4.dport = bpf_htons(DST_REWRITE_PORT4); + + if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM) + return 0; + else if (ctx->type == SOCK_STREAM) + sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof(tuple.ipv4), 0, 0); + else + sk = bpf_sk_lookup_udp(ctx, &tuple, sizeof(tuple.ipv4), 0, 0); + + if (!sk) + return 0; + + if (sk->src_ip4 != tuple.ipv4.daddr || + sk->src_port != DST_REWRITE_PORT4) { + bpf_sk_release(sk); + return 0; + } + + bpf_sk_release(sk); /* Rewrite destination. */ ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4); ctx->user_port = bpf_htons(DST_REWRITE_PORT4); - if (ctx->type == SOCK_DGRAM || ctx->type == SOCK_STREAM) { - ///* Rewrite source. */ - memset(&sa, 0, sizeof(sa)); + /* Rewrite source. */ + memset(&sa, 0, sizeof(sa)); - sa.sin_family = AF_INET; - sa.sin_port = bpf_htons(0); - sa.sin_addr.s_addr = bpf_htonl(SRC_REWRITE_IP4); + sa.sin_family = AF_INET; + sa.sin_port = bpf_htons(0); + sa.sin_addr.s_addr = bpf_htonl(SRC_REWRITE_IP4); - if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0) - return 0; - } + if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0) + return 0; return 1; } diff --git a/tools/testing/selftests/bpf/connect6_prog.c b/tools/testing/selftests/bpf/connect6_prog.c index 8ea3f7d12dee..25f5dc7b7aa0 100644 --- a/tools/testing/selftests/bpf/connect6_prog.c +++ b/tools/testing/selftests/bpf/connect6_prog.c @@ -29,7 +29,41 @@ int _version SEC("version") = 1; SEC("cgroup/connect6") int connect_v6_prog(struct bpf_sock_addr *ctx) { + struct bpf_sock_tuple tuple = {}; struct sockaddr_in6 sa; + struct bpf_sock *sk; + + /* Verify that new destination is available. */ + memset(&tuple.ipv6.saddr, 0, sizeof(tuple.ipv6.saddr)); + memset(&tuple.ipv6.sport, 0, sizeof(tuple.ipv6.sport)); + + tuple.ipv6.daddr[0] = bpf_htonl(DST_REWRITE_IP6_0); + tuple.ipv6.daddr[1] = bpf_htonl(DST_REWRITE_IP6_1); + tuple.ipv6.daddr[2] = bpf_htonl(DST_REWRITE_IP6_2); + tuple.ipv6.daddr[3] = bpf_htonl(DST_REWRITE_IP6_3); + + tuple.ipv6.dport = bpf_htons(DST_REWRITE_PORT6); + + if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM) + return 0; + else if (ctx->type == SOCK_STREAM) + sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof(tuple.ipv6), 0, 0); + else + sk = bpf_sk_lookup_udp(ctx, &tuple, sizeof(tuple.ipv6), 0, 0); + + if (!sk) + return 0; + + if (sk->src_ip6[0] != tuple.ipv6.daddr[0] || + sk->src_ip6[1] != tuple.ipv6.daddr[1] || + sk->src_ip6[2] != tuple.ipv6.daddr[2] || + sk->src_ip6[3] != tuple.ipv6.daddr[3] || + sk->src_port != DST_REWRITE_PORT6) { + bpf_sk_release(sk); + return 0; + } + + bpf_sk_release(sk); /* Rewrite destination. */ ctx->user_ip6[0] = bpf_htonl(DST_REWRITE_IP6_0); @@ -39,21 +73,19 @@ int connect_v6_prog(struct bpf_sock_addr *ctx) ctx->user_port = bpf_htons(DST_REWRITE_PORT6); - if (ctx->type == SOCK_DGRAM || ctx->type == SOCK_STREAM) { - /* Rewrite source. */ - memset(&sa, 0, sizeof(sa)); + /* Rewrite source. */ + memset(&sa, 0, sizeof(sa)); - sa.sin6_family = AF_INET6; - sa.sin6_port = bpf_htons(0); + sa.sin6_family = AF_INET6; + sa.sin6_port = bpf_htons(0); - sa.sin6_addr.s6_addr32[0] = bpf_htonl(SRC_REWRITE_IP6_0); - sa.sin6_addr.s6_addr32[1] = bpf_htonl(SRC_REWRITE_IP6_1); - sa.sin6_addr.s6_addr32[2] = bpf_htonl(SRC_REWRITE_IP6_2); - sa.sin6_addr.s6_addr32[3] = bpf_htonl(SRC_REWRITE_IP6_3); + sa.sin6_addr.s6_addr32[0] = bpf_htonl(SRC_REWRITE_IP6_0); + sa.sin6_addr.s6_addr32[1] = bpf_htonl(SRC_REWRITE_IP6_1); + sa.sin6_addr.s6_addr32[2] = bpf_htonl(SRC_REWRITE_IP6_2); + sa.sin6_addr.s6_addr32[3] = bpf_htonl(SRC_REWRITE_IP6_3); - if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0) - return 0; - } + if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0) + return 0; return 1; } diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c index 5f377ec53f2f..3c789d03b629 100644 --- a/tools/testing/selftests/bpf/test_align.c +++ b/tools/testing/selftests/bpf/test_align.c @@ -620,8 +620,8 @@ static int do_test_single(struct bpf_align_test *test) prog_len = probe_filter_length(prog); fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, - prog, prog_len, 1, "GPL", 0, - bpf_vlog, sizeof(bpf_vlog), 2); + prog, prog_len, BPF_F_STRICT_ALIGNMENT, + "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 2); if (fd_prog < 0 && test->result != REJECT) { printf("Failed to load program.\n"); printf("%s", bpf_vlog); diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index f42b3396d622..f570e0a39959 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -5,6 +5,8 @@ #include <linux/btf.h> #include <linux/err.h> #include <linux/kernel.h> +#include <linux/filter.h> +#include <linux/unistd.h> #include <bpf/bpf.h> #include <sys/resource.h> #include <libelf.h> @@ -22,6 +24,9 @@ #include "bpf_rlimit.h" #include "bpf_util.h" +#define MAX_INSNS 512 +#define MAX_SUBPROGS 16 + static uint32_t pass_cnt; static uint32_t error_cnt; static uint32_t skip_cnt; @@ -85,24 +90,38 @@ static int __base_pr(const char *format, ...) #define BTF_TYPEDEF_ENC(name, type) \ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), type) -#define BTF_PTR_ENC(name, type) \ - BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), type) +#define BTF_PTR_ENC(type) \ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), type) + +#define BTF_CONST_ENC(type) \ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), type) + +#define BTF_FUNC_PROTO_ENC(ret_type, nargs) \ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, nargs), ret_type) + +#define BTF_FUNC_PROTO_ARG_ENC(name, type) \ + (name), (type) + +#define BTF_FUNC_ENC(name, func_proto) \ + BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), func_proto) #define BTF_END_RAW 0xdeadbeef #define NAME_TBD 0xdeadb33f -#define MAX_NR_RAW_TYPES 1024 +#define MAX_NR_RAW_U32 1024 #define BTF_LOG_BUF_SIZE 65535 static struct args { unsigned int raw_test_num; unsigned int file_test_num; unsigned int get_info_test_num; + unsigned int info_raw_test_num; bool raw_test; bool file_test; bool get_info_test; bool pprint_test; bool always_log; + bool info_raw_test; } args; static char btf_log_buf[BTF_LOG_BUF_SIZE]; @@ -118,7 +137,7 @@ struct btf_raw_test { const char *str_sec; const char *map_name; const char *err_str; - __u32 raw_types[MAX_NR_RAW_TYPES]; + __u32 raw_types[MAX_NR_RAW_U32]; __u32 str_sec_size; enum bpf_map_type map_type; __u32 key_size; @@ -137,6 +156,9 @@ struct btf_raw_test { int str_len_delta; }; +#define BTF_STR_SEC(str) \ + .str_sec = str, .str_sec_size = sizeof(str) + static struct btf_raw_test raw_tests[] = { /* enum E { * E0, @@ -432,11 +454,11 @@ static struct btf_raw_test raw_tests[] = { /* const void* */ /* [3] */ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* typedef const void * const_void_ptr */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3), - /* struct A { */ /* [4] */ + BTF_TYPEDEF_ENC(NAME_TBD, 3), /* [4] */ + /* struct A { */ /* [5] */ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)), /* const_void_ptr m; */ - BTF_MEMBER_ENC(NAME_TBD, 3, 0), + BTF_MEMBER_ENC(NAME_TBD, 4, 0), /* } */ BTF_END_RAW, }, @@ -494,10 +516,10 @@ static struct btf_raw_test raw_tests[] = { BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0), /* const void* */ /* [3] */ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), - /* typedef const void * const_void_ptr */ /* [4] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3), - /* const_void_ptr[4] */ /* [5] */ - BTF_TYPE_ARRAY_ENC(3, 1, 4), + /* typedef const void * const_void_ptr */ + BTF_TYPEDEF_ENC(NAME_TBD, 3), /* [4] */ + /* const_void_ptr[4] */ + BTF_TYPE_ARRAY_ENC(4, 1, 4), /* [5] */ BTF_END_RAW, }, .str_sec = "\0const_void_ptr", @@ -1293,6 +1315,367 @@ static struct btf_raw_test raw_tests[] = { }, { + .descr = "typedef (invalid name, name_off = 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPEDEF_ENC(0, 1), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0__int", + .str_sec_size = sizeof("\0__int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "typedef_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "typedef (invalid name, invalid identifier)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0__!int", + .str_sec_size = sizeof("\0__!int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "typedef_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "ptr type (invalid name, name_off <> 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 1), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0__int", + .str_sec_size = sizeof("\0__int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "ptr_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "volatile type (invalid name, name_off <> 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_VOLATILE, 0, 0), 1), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0__int", + .str_sec_size = sizeof("\0__int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "volatile_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "const type (invalid name, name_off <> 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 1), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0__int", + .str_sec_size = sizeof("\0__int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "const_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "restrict type (invalid name, name_off <> 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 1), /* [2] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_RESTRICT, 0, 0), 2), /* [3] */ + BTF_END_RAW, + }, + .str_sec = "\0__int", + .str_sec_size = sizeof("\0__int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "restrict_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "fwd type (invalid name, name_off = 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FWD, 0, 0), 0), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0__skb", + .str_sec_size = sizeof("\0__skb"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "fwd_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "fwd type (invalid name, invalid identifier)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_FWD, 0, 0), 0), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0__!skb", + .str_sec_size = sizeof("\0__!skb"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "fwd_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "array type (invalid name, name_off <> 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_ARRAY, 0, 0), 0), /* [2] */ + BTF_ARRAY_ENC(1, 1, 4), + BTF_END_RAW, + }, + .str_sec = "\0__skb", + .str_sec_size = sizeof("\0__skb"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "struct type (name_off = 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, + BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_END_RAW, + }, + .str_sec = "\0A", + .str_sec_size = sizeof("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "struct type (invalid name, invalid identifier)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_END_RAW, + }, + .str_sec = "\0A!\0B", + .str_sec_size = sizeof("\0A!\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "struct member (name_off = 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, + BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_END_RAW, + }, + .str_sec = "\0A", + .str_sec_size = sizeof("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "struct member (invalid name, invalid identifier)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_END_RAW, + }, + .str_sec = "\0A\0B*", + .str_sec_size = sizeof("\0A\0B*"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "enum type (name_off = 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, + BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), + sizeof(int)), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_END_RAW, + }, + .str_sec = "\0A\0B", + .str_sec_size = sizeof("\0A\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "enum_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "enum type (invalid name, invalid identifier)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), + sizeof(int)), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_END_RAW, + }, + .str_sec = "\0A!\0B", + .str_sec_size = sizeof("\0A!\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "enum_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "enum member (invalid name, name_off = 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, + BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), + sizeof(int)), /* [2] */ + BTF_ENUM_ENC(0, 0), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "enum_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "enum member (invalid name, invalid identifier)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, + BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), + sizeof(int)), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_END_RAW, + }, + .str_sec = "\0A!", + .str_sec_size = sizeof("\0A!"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "enum_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, +{ .descr = "arraymap invalid btf key (a bit field)", .raw_types = { /* int */ /* [1] */ @@ -1374,6 +1757,464 @@ static struct btf_raw_test raw_tests[] = { .map_create_err = true, }, +{ + .descr = "func proto (int (*)(int, unsigned int))", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* int (*)(int, unsigned int) */ + BTF_FUNC_PROTO_ENC(1, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(0, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 2), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "func proto (vararg)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int, unsigned int, ...) */ + BTF_FUNC_PROTO_ENC(0, 3), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(0, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 2), + BTF_FUNC_PROTO_ARG_ENC(0, 0), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "func proto (vararg with name)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int b, ... c) */ + BTF_FUNC_PROTO_ENC(0, 3), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 0), + BTF_END_RAW, + }, + .str_sec = "\0a\0b\0c", + .str_sec_size = sizeof("\0a\0b\0c"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid arg#3", +}, + +{ + .descr = "func proto (arg after vararg)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, ..., unsigned int b) */ + BTF_FUNC_PROTO_ENC(0, 3), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 0), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_END_RAW, + }, + .str_sec = "\0a\0b", + .str_sec_size = sizeof("\0a\0b"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid arg#2", +}, + +{ + .descr = "func proto (CONST=>TYPEDEF=>PTR=>FUNC_PROTO)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* typedef void (*func_ptr)(int, unsigned int) */ + BTF_TYPEDEF_ENC(NAME_TBD, 5), /* [3] */ + /* const func_ptr */ + BTF_CONST_ENC(3), /* [4] */ + BTF_PTR_ENC(6), /* [5] */ + BTF_FUNC_PROTO_ENC(0, 2), /* [6] */ + BTF_FUNC_PROTO_ARG_ENC(0, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 2), + BTF_END_RAW, + }, + .str_sec = "\0func_ptr", + .str_sec_size = sizeof("\0func_ptr"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "func proto (CONST=>TYPEDEF=>FUNC_PROTO)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + BTF_CONST_ENC(4), /* [3] */ + BTF_TYPEDEF_ENC(NAME_TBD, 5), /* [4] */ + BTF_FUNC_PROTO_ENC(0, 2), /* [5] */ + BTF_FUNC_PROTO_ARG_ENC(0, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 2), + BTF_END_RAW, + }, + .str_sec = "\0func_typedef", + .str_sec_size = sizeof("\0func_typedef"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid type_id", +}, + +{ + .descr = "func proto (btf_resolve(arg))", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* void (*)(const void *) */ + BTF_FUNC_PROTO_ENC(0, 1), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(0, 3), + BTF_CONST_ENC(4), /* [3] */ + BTF_PTR_ENC(0), /* [4] */ + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "func proto (Not all arg has name)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int, unsigned int b) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(0, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_END_RAW, + }, + .str_sec = "\0b", + .str_sec_size = sizeof("\0b"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "func proto (Bad arg name_off)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int <bad_name_off>) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(0xffffffff, 2), + BTF_END_RAW, + }, + .str_sec = "\0a", + .str_sec_size = sizeof("\0a"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid arg#2", +}, + +{ + .descr = "func proto (Bad arg name)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int !!!) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_END_RAW, + }, + .str_sec = "\0a\0!!!", + .str_sec_size = sizeof("\0a\0!!!"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid arg#2", +}, + +{ + .descr = "func proto (Invalid return type)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* <bad_ret_type> (*)(int, unsigned int) */ + BTF_FUNC_PROTO_ENC(100, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(0, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 2), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid return type", +}, + +{ + .descr = "func proto (with func name)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void func_proto(int, unsigned int) */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 2), 0), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(0, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 2), + BTF_END_RAW, + }, + .str_sec = "\0func_proto", + .str_sec_size = sizeof("\0func_proto"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "func proto (const void arg)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(const void) */ + BTF_FUNC_PROTO_ENC(0, 1), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(0, 4), + BTF_CONST_ENC(0), /* [4] */ + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid arg#1", +}, + +{ + .descr = "func (void func(int a, unsigned int b))", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int b) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + /* void func(int a, unsigned int b) */ + BTF_FUNC_ENC(NAME_TBD, 3), /* [4] */ + BTF_END_RAW, + }, + .str_sec = "\0a\0b\0func", + .str_sec_size = sizeof("\0a\0b\0func"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "func (No func name)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int b) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + /* void <no_name>(int a, unsigned int b) */ + BTF_FUNC_ENC(0, 3), /* [4] */ + BTF_END_RAW, + }, + .str_sec = "\0a\0b", + .str_sec_size = sizeof("\0a\0b"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "func (Invalid func name)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int b) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + /* void !!!(int a, unsigned int b) */ + BTF_FUNC_ENC(NAME_TBD, 3), /* [4] */ + BTF_END_RAW, + }, + .str_sec = "\0a\0b\0!!!", + .str_sec_size = sizeof("\0a\0b\0!!!"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "func (Some arg has no name)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 2), + /* void func(int a, unsigned int) */ + BTF_FUNC_ENC(NAME_TBD, 3), /* [4] */ + BTF_END_RAW, + }, + .str_sec = "\0a\0func", + .str_sec_size = sizeof("\0a\0func"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid arg#2", +}, + +{ + .descr = "func (Non zero vlen)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int b) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + /* void func(int a, unsigned int b) */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 2), 3), /* [4] */ + BTF_END_RAW, + }, + .str_sec = "\0a\0b\0func", + .str_sec_size = sizeof("\0a\0b\0func"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "vlen != 0", +}, + +{ + .descr = "func (Not referring to FUNC_PROTO)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_ENC(NAME_TBD, 1), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0func", + .str_sec_size = sizeof("\0func"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid type_id", +}, + }; /* struct btf_raw_test raw_tests[] */ static const char *get_next_str(const char *start, const char *end) @@ -1381,11 +2222,11 @@ static const char *get_next_str(const char *start, const char *end) return start < end - 1 ? start + 1 : NULL; } -static int get_type_sec_size(const __u32 *raw_types) +static int get_raw_sec_size(const __u32 *raw_types) { int i; - for (i = MAX_NR_RAW_TYPES - 1; + for (i = MAX_NR_RAW_U32 - 1; i >= 0 && raw_types[i] != BTF_END_RAW; i--) ; @@ -1397,7 +2238,8 @@ static void *btf_raw_create(const struct btf_header *hdr, const __u32 *raw_types, const char *str, unsigned int str_sec_size, - unsigned int *btf_size) + unsigned int *btf_size, + const char **ret_next_str) { const char *next_str = str, *end_str = str + str_sec_size; unsigned int size_needed, offset; @@ -1406,7 +2248,7 @@ static void *btf_raw_create(const struct btf_header *hdr, uint32_t *ret_types; void *raw_btf; - type_sec_size = get_type_sec_size(raw_types); + type_sec_size = get_raw_sec_size(raw_types); if (CHECK(type_sec_size < 0, "Cannot get nr_raw_types")) return NULL; @@ -1445,6 +2287,8 @@ static void *btf_raw_create(const struct btf_header *hdr, ret_hdr->str_len = str_sec_size; *btf_size = size_needed; + if (ret_next_str) + *ret_next_str = next_str; return raw_btf; } @@ -1464,7 +2308,7 @@ static int do_test_raw(unsigned int test_num) test->raw_types, test->str_sec, test->str_sec_size, - &raw_btf_size); + &raw_btf_size, NULL); if (!raw_btf) return -1; @@ -1541,7 +2385,7 @@ static int test_raw(void) struct btf_get_info_test { const char *descr; const char *str_sec; - __u32 raw_types[MAX_NR_RAW_TYPES]; + __u32 raw_types[MAX_NR_RAW_U32]; __u32 str_sec_size; int btf_size_delta; int (*special_test)(unsigned int test_num); @@ -1621,7 +2465,7 @@ static int test_big_btf_info(unsigned int test_num) test->raw_types, test->str_sec, test->str_sec_size, - &raw_btf_size); + &raw_btf_size, NULL); if (!raw_btf) return -1; @@ -1705,7 +2549,7 @@ static int test_btf_id(unsigned int test_num) test->raw_types, test->str_sec, test->str_sec_size, - &raw_btf_size); + &raw_btf_size, NULL); if (!raw_btf) return -1; @@ -1843,7 +2687,7 @@ static int do_test_get_info(unsigned int test_num) test->raw_types, test->str_sec, test->str_sec_size, - &raw_btf_size); + &raw_btf_size, NULL); if (!raw_btf) return -1; @@ -1940,13 +2784,13 @@ static struct btf_file_test file_tests[] = { }, }; -static int file_has_btf_elf(const char *fn) +static int file_has_btf_elf(const char *fn, bool *has_btf_ext) { Elf_Scn *scn = NULL; GElf_Ehdr ehdr; + int ret = 0; int elf_fd; Elf *elf; - int ret; if (CHECK(elf_version(EV_CURRENT) == EV_NONE, "elf_version(EV_CURRENT) == EV_NONE")) @@ -1978,14 +2822,12 @@ static int file_has_btf_elf(const char *fn) } sh_name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name); - if (!strcmp(sh_name, BTF_ELF_SEC)) { + if (!strcmp(sh_name, BTF_ELF_SEC)) ret = 1; - goto done; - } + if (!strcmp(sh_name, BTF_EXT_ELF_SEC)) + *has_btf_ext = true; } - ret = 0; - done: close(elf_fd); elf_end(elf); @@ -1995,15 +2837,24 @@ done: static int do_test_file(unsigned int test_num) { const struct btf_file_test *test = &file_tests[test_num - 1]; + const char *expected_fnames[] = {"_dummy_tracepoint", + "test_long_fname_1", + "test_long_fname_2"}; + struct bpf_prog_info info = {}; struct bpf_object *obj = NULL; + struct bpf_func_info *finfo; struct bpf_program *prog; + __u32 info_len, rec_size; + bool has_btf_ext = false; + struct btf *btf = NULL; + void *func_info = NULL; struct bpf_map *map; - int err; + int i, err, prog_fd; fprintf(stderr, "BTF libbpf test[%u] (%s): ", test_num, test->file); - err = file_has_btf_elf(test->file); + err = file_has_btf_elf(test->file, &has_btf_ext); if (err == -1) return err; @@ -2031,6 +2882,7 @@ static int do_test_file(unsigned int test_num) err = bpf_object__load(obj); if (CHECK(err < 0, "bpf_object__load: %d", err)) goto done; + prog_fd = bpf_program__fd(prog); map = bpf_object__find_map_by_name(obj, "btf_map"); if (CHECK(!map, "btf_map not found")) { @@ -2045,9 +2897,100 @@ static int do_test_file(unsigned int test_num) test->btf_kv_notfound)) goto done; + if (!has_btf_ext) + goto skip; + + /* get necessary program info */ + info_len = sizeof(struct bpf_prog_info); + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + + if (CHECK(err == -1, "invalid get info (1st) errno:%d", errno)) { + fprintf(stderr, "%s\n", btf_log_buf); + err = -1; + goto done; + } + if (CHECK(info.nr_func_info != 3, + "incorrect info.nr_func_info (1st) %d", + info.nr_func_info)) { + err = -1; + goto done; + } + rec_size = info.func_info_rec_size; + if (CHECK(rec_size < 4, + "incorrect info.func_info_rec_size (1st) %d\n", rec_size)) { + err = -1; + goto done; + } + + func_info = malloc(info.nr_func_info * rec_size); + if (CHECK(!func_info, "out of memory")) { + err = -1; + goto done; + } + + /* reset info to only retrieve func_info related data */ + memset(&info, 0, sizeof(info)); + info.nr_func_info = 3; + info.func_info_rec_size = rec_size; + info.func_info = ptr_to_u64(func_info); + + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + + if (CHECK(err == -1, "invalid get info (2nd) errno:%d", errno)) { + fprintf(stderr, "%s\n", btf_log_buf); + err = -1; + goto done; + } + if (CHECK(info.nr_func_info != 3, + "incorrect info.nr_func_info (2nd) %d", + info.nr_func_info)) { + err = -1; + goto done; + } + if (CHECK(info.func_info_rec_size != rec_size, + "incorrect info.func_info_rec_size (2nd) %d", + info.func_info_rec_size)) { + err = -1; + goto done; + } + + err = btf__get_from_id(info.btf_id, &btf); + if (CHECK(err, "cannot get btf from kernel, err: %d", err)) + goto done; + + /* check three functions */ + finfo = func_info; + for (i = 0; i < 3; i++) { + const struct btf_type *t; + const char *fname; + + t = btf__type_by_id(btf, finfo->type_id); + if (CHECK(!t, "btf__type_by_id failure: id %u", + finfo->type_id)) { + err = -1; + goto done; + } + + fname = btf__name_by_offset(btf, t->name_off); + err = strcmp(fname, expected_fnames[i]); + /* for the second and third functions in .text section, + * the compiler may order them either way. + */ + if (i && err) + err = strcmp(fname, expected_fnames[3 - i]); + if (CHECK(err, "incorrect fname %s", fname ? : "")) { + err = -1; + goto done; + } + + finfo = (void *)finfo + rec_size; + } + +skip: fprintf(stderr, "OK"); done: + free(func_info); bpf_object__close(obj); return err; } @@ -2273,7 +3216,7 @@ static int do_test_pprint(void) fprintf(stderr, "%s......", test->descr); raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types, test->str_sec, test->str_sec_size, - &raw_btf_size); + &raw_btf_size, NULL); if (!raw_btf) return -1; @@ -2477,16 +3420,894 @@ static int test_pprint(void) return err; } +#define BPF_LINE_INFO_ENC(insn_off, file_off, line_off, line_num, line_col) \ + (insn_off), (file_off), (line_off), ((line_num) << 10 | ((line_col) & 0x3ff)) + +static struct prog_info_raw_test { + const char *descr; + const char *str_sec; + const char *err_str; + __u32 raw_types[MAX_NR_RAW_U32]; + __u32 str_sec_size; + struct bpf_insn insns[MAX_INSNS]; + __u32 prog_type; + __u32 func_info[MAX_SUBPROGS][2]; + __u32 func_info_rec_size; + __u32 func_info_cnt; + __u32 line_info[MAX_NR_RAW_U32]; + __u32 line_info_rec_size; + __u32 nr_jited_ksyms; + bool expected_prog_load_failure; +} info_raw_tests[] = { +{ + .descr = "func_type (main func + one sub)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), /* [2] */ + BTF_FUNC_PROTO_ENC(1, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ENC(1, 2), /* [4] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 3), /* [5] */ + BTF_FUNC_ENC(NAME_TBD, 4), /* [6] */ + BTF_END_RAW, + }, + .str_sec = "\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB", + .str_sec_size = sizeof("\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB"), + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info = { {0, 5}, {3, 6} }, + .func_info_rec_size = 8, + .func_info_cnt = 2, + .line_info = { BTF_END_RAW }, +}, + +{ + .descr = "func_type (Incorrect func_info_rec_size)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), /* [2] */ + BTF_FUNC_PROTO_ENC(1, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ENC(1, 2), /* [4] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 3), /* [5] */ + BTF_FUNC_ENC(NAME_TBD, 4), /* [6] */ + BTF_END_RAW, + }, + .str_sec = "\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB", + .str_sec_size = sizeof("\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB"), + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info = { {0, 5}, {3, 6} }, + .func_info_rec_size = 4, + .func_info_cnt = 2, + .line_info = { BTF_END_RAW }, + .expected_prog_load_failure = true, +}, + +{ + .descr = "func_type (Incorrect func_info_cnt)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), /* [2] */ + BTF_FUNC_PROTO_ENC(1, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ENC(1, 2), /* [4] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 3), /* [5] */ + BTF_FUNC_ENC(NAME_TBD, 4), /* [6] */ + BTF_END_RAW, + }, + .str_sec = "\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB", + .str_sec_size = sizeof("\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB"), + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info = { {0, 5}, {3, 6} }, + .func_info_rec_size = 8, + .func_info_cnt = 1, + .line_info = { BTF_END_RAW }, + .expected_prog_load_failure = true, +}, + +{ + .descr = "func_type (Incorrect bpf_func_info.insn_off)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), /* [2] */ + BTF_FUNC_PROTO_ENC(1, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ENC(1, 2), /* [4] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 3), /* [5] */ + BTF_FUNC_ENC(NAME_TBD, 4), /* [6] */ + BTF_END_RAW, + }, + .str_sec = "\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB", + .str_sec_size = sizeof("\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB"), + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info = { {0, 5}, {2, 6} }, + .func_info_rec_size = 8, + .func_info_cnt = 2, + .line_info = { BTF_END_RAW }, + .expected_prog_load_failure = true, +}, + +{ + .descr = "line_info (No subprog)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1;\0int b=2;\0return a + b;\0return a + b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 4, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 1, +}, + +{ + .descr = "line_info (No subprog. insn_off >= prog->len)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1;\0int b=2;\0return a + b;\0return a + b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 4, 7), + BPF_LINE_INFO_ENC(4, 0, 0, 5, 6), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 1, + .err_str = "line_info[4].insn_off", + .expected_prog_load_failure = true, +}, + +{ + .descr = "line_info (No subprog. zero tailing line_info", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1;\0int b=2;\0return a + b;\0return a + b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), 0, + BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 2, 9), 0, + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), 0, + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 4, 7), 0, + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info) + sizeof(__u32), + .nr_jited_ksyms = 1, +}, + +{ + .descr = "line_info (No subprog. nonzero tailing line_info)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1;\0int b=2;\0return a + b;\0return a + b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), 0, + BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 2, 9), 0, + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), 0, + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 4, 7), 1, + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info) + sizeof(__u32), + .nr_jited_ksyms = 1, + .err_str = "nonzero tailing record in line_info", + .expected_prog_load_failure = true, +}, + +{ + .descr = "line_info (subprog)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1+1;\0return func(a);\0b+=1;\0return b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 4, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, +}, + +{ + .descr = "line_info (subprog + func_info)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_PROTO_ENC(1, 1), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [4] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0x\0sub\0main\0int a=1+1;\0return func(a);\0b+=1;\0return b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 2, + .func_info_rec_size = 8, + .func_info = { {0, 4}, {5, 3} }, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 4, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, +}, + +{ + .descr = "line_info (subprog. missing 1st func line info)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1+1;\0return func(a);\0b+=1;\0return b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 4, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, + .err_str = "missing bpf_line_info for func#0", + .expected_prog_load_failure = true, +}, + +{ + .descr = "line_info (subprog. missing 2nd func line info)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1+1;\0return func(a);\0b+=1;\0return b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(6, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 4, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, + .err_str = "missing bpf_line_info for func#1", + .expected_prog_load_failure = true, +}, + +{ + .descr = "line_info (subprog. unordered insn offset)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1+1;\0return func(a);\0b+=1;\0return b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 4, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, + .err_str = "Invalid line_info[2].insn_off", + .expected_prog_load_failure = true, +}, + +}; + +static size_t probe_prog_length(const struct bpf_insn *fp) +{ + size_t len; + + for (len = MAX_INSNS - 1; len > 0; --len) + if (fp[len].code != 0 || fp[len].imm != 0) + break; + return len + 1; +} + +static __u32 *patch_name_tbd(const __u32 *raw_u32, + const char *str, __u32 str_off, + unsigned int str_sec_size, + unsigned int *ret_size) +{ + int i, raw_u32_size = get_raw_sec_size(raw_u32); + const char *end_str = str + str_sec_size; + const char *next_str = str + str_off; + __u32 *new_u32 = NULL; + + if (raw_u32_size == -1) + return ERR_PTR(-EINVAL); + + if (!raw_u32_size) { + *ret_size = 0; + return NULL; + } + + new_u32 = malloc(raw_u32_size); + if (!new_u32) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < raw_u32_size / sizeof(raw_u32[0]); i++) { + if (raw_u32[i] == NAME_TBD) { + next_str = get_next_str(next_str, end_str); + if (CHECK(!next_str, "Error in getting next_str\n")) { + free(new_u32); + return ERR_PTR(-EINVAL); + } + new_u32[i] = next_str - str; + next_str += strlen(next_str); + } else { + new_u32[i] = raw_u32[i]; + } + } + + *ret_size = raw_u32_size; + return new_u32; +} + +static int test_get_finfo(const struct prog_info_raw_test *test, + int prog_fd) +{ + struct bpf_prog_info info = {}; + struct bpf_func_info *finfo; + __u32 info_len, rec_size, i; + void *func_info = NULL; + int err; + + /* get necessary lens */ + info_len = sizeof(struct bpf_prog_info); + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (CHECK(err == -1, "invalid get info (1st) errno:%d", errno)) { + fprintf(stderr, "%s\n", btf_log_buf); + return -1; + } + if (CHECK(info.nr_func_info != test->func_info_cnt, + "incorrect info.nr_func_info (1st) %d", + info.nr_func_info)) { + return -1; + } + + rec_size = info.func_info_rec_size; + if (CHECK(rec_size < 8, + "incorrect info.func_info_rec_size (1st) %d", rec_size)) { + return -1; + } + + if (!info.nr_func_info) + return 0; + + func_info = malloc(info.nr_func_info * rec_size); + if (CHECK(!func_info, "out of memory")) + return -1; + + /* reset info to only retrieve func_info related data */ + memset(&info, 0, sizeof(info)); + info.nr_func_info = test->func_info_cnt; + info.func_info_rec_size = rec_size; + info.func_info = ptr_to_u64(func_info); + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (CHECK(err == -1, "invalid get info (2nd) errno:%d", errno)) { + fprintf(stderr, "%s\n", btf_log_buf); + err = -1; + goto done; + } + if (CHECK(info.nr_func_info != test->func_info_cnt, + "incorrect info.nr_func_info (2nd) %d", + info.nr_func_info)) { + err = -1; + goto done; + } + if (CHECK(info.func_info_rec_size < 8, + "incorrect info.func_info_rec_size (2nd) %d", + info.func_info_rec_size)) { + err = -1; + goto done; + } + + if (CHECK(!info.func_info, + "info.func_info == 0. kernel.kptr_restrict is set?")) { + err = -1; + goto done; + } + + finfo = func_info; + for (i = 0; i < test->func_info_cnt; i++) { + if (CHECK(finfo->type_id != test->func_info[i][1], + "incorrect func_type %u expected %u", + finfo->type_id, test->func_info[i][1])) { + err = -1; + goto done; + } + finfo = (void *)finfo + rec_size; + } + + err = 0; + +done: + free(func_info); + return err; +} + +static int test_get_linfo(const struct prog_info_raw_test *test, + const void *patched_linfo, + __u32 cnt, int prog_fd) +{ + __u32 i, info_len, nr_jited_ksyms, nr_jited_func_lens; + __u64 *jited_linfo = NULL, *jited_ksyms = NULL; + __u32 rec_size, jited_rec_size, jited_cnt; + struct bpf_line_info *linfo = NULL; + __u32 cur_func_len, ksyms_found; + struct bpf_prog_info info = {}; + __u32 *jited_func_lens = NULL; + __u64 cur_func_ksyms; + int err; + + jited_cnt = cnt; + rec_size = sizeof(*linfo); + jited_rec_size = sizeof(*jited_linfo); + if (test->nr_jited_ksyms) + nr_jited_ksyms = test->nr_jited_ksyms; + else + nr_jited_ksyms = test->func_info_cnt; + nr_jited_func_lens = nr_jited_ksyms; + + info_len = sizeof(struct bpf_prog_info); + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (CHECK(err == -1, "err:%d errno:%d", err, errno)) { + err = -1; + goto done; + } + + if (!info.jited_prog_len) { + /* prog is not jited */ + jited_cnt = 0; + nr_jited_ksyms = 1; + nr_jited_func_lens = 1; + } + + if (CHECK(info.nr_line_info != cnt || + info.nr_jited_line_info != jited_cnt || + info.nr_jited_ksyms != nr_jited_ksyms || + info.nr_jited_func_lens != nr_jited_func_lens || + (!info.nr_line_info && info.nr_jited_line_info), + "info: nr_line_info:%u(expected:%u) nr_jited_line_info:%u(expected:%u) nr_jited_ksyms:%u(expected:%u) nr_jited_func_lens:%u(expected:%u)", + info.nr_line_info, cnt, + info.nr_jited_line_info, jited_cnt, + info.nr_jited_ksyms, nr_jited_ksyms, + info.nr_jited_func_lens, nr_jited_func_lens)) { + err = -1; + goto done; + } + + if (CHECK(info.line_info_rec_size < 16 || + info.jited_line_info_rec_size < 8, + "info: line_info_rec_size:%u(userspace expected:%u) jited_line_info_rec_size:%u(userspace expected:%u)", + info.line_info_rec_size, rec_size, + info.jited_line_info_rec_size, jited_rec_size)) { + err = -1; + goto done; + } + + if (!cnt) + return 0; + + rec_size = info.line_info_rec_size; + jited_rec_size = info.jited_line_info_rec_size; + + memset(&info, 0, sizeof(info)); + + linfo = calloc(cnt, rec_size); + if (CHECK(!linfo, "!linfo")) { + err = -1; + goto done; + } + info.nr_line_info = cnt; + info.line_info_rec_size = rec_size; + info.line_info = ptr_to_u64(linfo); + + if (jited_cnt) { + jited_linfo = calloc(jited_cnt, jited_rec_size); + jited_ksyms = calloc(nr_jited_ksyms, sizeof(*jited_ksyms)); + jited_func_lens = calloc(nr_jited_func_lens, + sizeof(*jited_func_lens)); + if (CHECK(!jited_linfo || !jited_ksyms || !jited_func_lens, + "jited_linfo:%p jited_ksyms:%p jited_func_lens:%p", + jited_linfo, jited_ksyms, jited_func_lens)) { + err = -1; + goto done; + } + + info.nr_jited_line_info = jited_cnt; + info.jited_line_info_rec_size = jited_rec_size; + info.jited_line_info = ptr_to_u64(jited_linfo); + info.nr_jited_ksyms = nr_jited_ksyms; + info.jited_ksyms = ptr_to_u64(jited_ksyms); + info.nr_jited_func_lens = nr_jited_func_lens; + info.jited_func_lens = ptr_to_u64(jited_func_lens); + } + + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + + /* + * Only recheck the info.*line_info* fields. + * Other fields are not the concern of this test. + */ + if (CHECK(err == -1 || + !info.line_info || + info.nr_line_info != cnt || + (jited_cnt && !info.jited_line_info) || + info.nr_jited_line_info != jited_cnt || + info.line_info_rec_size != rec_size || + info.jited_line_info_rec_size != jited_rec_size, + "err:%d errno:%d info: nr_line_info:%u(expected:%u) nr_jited_line_info:%u(expected:%u) line_info_rec_size:%u(expected:%u) jited_linfo_rec_size:%u(expected:%u) line_info:%p jited_line_info:%p", + err, errno, + info.nr_line_info, cnt, + info.nr_jited_line_info, jited_cnt, + info.line_info_rec_size, rec_size, + info.jited_line_info_rec_size, jited_rec_size, + (void *)(long)info.line_info, + (void *)(long)info.jited_line_info)) { + err = -1; + goto done; + } + + CHECK(linfo[0].insn_off, "linfo[0].insn_off:%u", + linfo[0].insn_off); + for (i = 1; i < cnt; i++) { + const struct bpf_line_info *expected_linfo; + + expected_linfo = patched_linfo + (i * test->line_info_rec_size); + if (CHECK(linfo[i].insn_off <= linfo[i - 1].insn_off, + "linfo[%u].insn_off:%u <= linfo[%u].insn_off:%u", + i, linfo[i].insn_off, + i - 1, linfo[i - 1].insn_off)) { + err = -1; + goto done; + } + if (CHECK(linfo[i].file_name_off != expected_linfo->file_name_off || + linfo[i].line_off != expected_linfo->line_off || + linfo[i].line_col != expected_linfo->line_col, + "linfo[%u] (%u, %u, %u) != (%u, %u, %u)", i, + linfo[i].file_name_off, + linfo[i].line_off, + linfo[i].line_col, + expected_linfo->file_name_off, + expected_linfo->line_off, + expected_linfo->line_col)) { + err = -1; + goto done; + } + } + + if (!jited_cnt) { + fprintf(stderr, "not jited. skipping jited_line_info check. "); + err = 0; + goto done; + } + + if (CHECK(jited_linfo[0] != jited_ksyms[0], + "jited_linfo[0]:%lx != jited_ksyms[0]:%lx", + (long)(jited_linfo[0]), (long)(jited_ksyms[0]))) { + err = -1; + goto done; + } + + ksyms_found = 1; + cur_func_len = jited_func_lens[0]; + cur_func_ksyms = jited_ksyms[0]; + for (i = 1; i < jited_cnt; i++) { + if (ksyms_found < nr_jited_ksyms && + jited_linfo[i] == jited_ksyms[ksyms_found]) { + cur_func_ksyms = jited_ksyms[ksyms_found]; + cur_func_len = jited_ksyms[ksyms_found]; + ksyms_found++; + continue; + } + + if (CHECK(jited_linfo[i] <= jited_linfo[i - 1], + "jited_linfo[%u]:%lx <= jited_linfo[%u]:%lx", + i, (long)jited_linfo[i], + i - 1, (long)(jited_linfo[i - 1]))) { + err = -1; + goto done; + } + + if (CHECK(jited_linfo[i] - cur_func_ksyms > cur_func_len, + "jited_linfo[%u]:%lx - %lx > %u", + i, (long)jited_linfo[i], (long)cur_func_ksyms, + cur_func_len)) { + err = -1; + goto done; + } + } + + if (CHECK(ksyms_found != nr_jited_ksyms, + "ksyms_found:%u != nr_jited_ksyms:%u", + ksyms_found, nr_jited_ksyms)) { + err = -1; + goto done; + } + + err = 0; + +done: + free(linfo); + free(jited_linfo); + free(jited_ksyms); + free(jited_func_lens); + return err; +} + +static int do_test_info_raw(unsigned int test_num) +{ + const struct prog_info_raw_test *test = &info_raw_tests[test_num - 1]; + unsigned int raw_btf_size, linfo_str_off, linfo_size; + int btf_fd = -1, prog_fd = -1, err = 0; + void *raw_btf, *patched_linfo = NULL; + const char *ret_next_str; + union bpf_attr attr = {}; + + fprintf(stderr, "BTF prog info raw test[%u] (%s): ", test_num, test->descr); + raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types, + test->str_sec, test->str_sec_size, + &raw_btf_size, &ret_next_str); + + if (!raw_btf) + return -1; + + *btf_log_buf = '\0'; + btf_fd = bpf_load_btf(raw_btf, raw_btf_size, + btf_log_buf, BTF_LOG_BUF_SIZE, + args.always_log); + free(raw_btf); + + if (CHECK(btf_fd == -1, "invalid btf_fd errno:%d", errno)) { + err = -1; + goto done; + } + + if (*btf_log_buf && args.always_log) + fprintf(stderr, "\n%s", btf_log_buf); + *btf_log_buf = '\0'; + + linfo_str_off = ret_next_str - test->str_sec; + patched_linfo = patch_name_tbd(test->line_info, + test->str_sec, linfo_str_off, + test->str_sec_size, &linfo_size); + if (IS_ERR(patched_linfo)) { + fprintf(stderr, "error in creating raw bpf_line_info"); + err = -1; + goto done; + } + + attr.prog_type = test->prog_type; + attr.insns = ptr_to_u64(test->insns); + attr.insn_cnt = probe_prog_length(test->insns); + attr.license = ptr_to_u64("GPL"); + attr.prog_btf_fd = btf_fd; + attr.func_info_rec_size = test->func_info_rec_size; + attr.func_info_cnt = test->func_info_cnt; + attr.func_info = ptr_to_u64(test->func_info); + attr.log_buf = ptr_to_u64(btf_log_buf); + attr.log_size = BTF_LOG_BUF_SIZE; + attr.log_level = 1; + if (linfo_size) { + attr.line_info_rec_size = test->line_info_rec_size; + attr.line_info = ptr_to_u64(patched_linfo); + attr.line_info_cnt = linfo_size / attr.line_info_rec_size; + } + + prog_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); + err = ((prog_fd == -1) != test->expected_prog_load_failure); + if (CHECK(err, "prog_fd:%d expected_prog_load_failure:%u errno:%d", + prog_fd, test->expected_prog_load_failure, errno) || + CHECK(test->err_str && !strstr(btf_log_buf, test->err_str), + "expected err_str:%s", test->err_str)) { + err = -1; + goto done; + } + + if (prog_fd == -1) + goto done; + + err = test_get_finfo(test, prog_fd); + if (err) + goto done; + + err = test_get_linfo(test, patched_linfo, attr.line_info_cnt, prog_fd); + if (err) + goto done; + +done: + if (!err) + fprintf(stderr, "OK"); + + if (*btf_log_buf && (err || args.always_log)) + fprintf(stderr, "\n%s", btf_log_buf); + + if (btf_fd != -1) + close(btf_fd); + if (prog_fd != -1) + close(prog_fd); + + if (!IS_ERR(patched_linfo)) + free(patched_linfo); + + return err; +} + +static int test_info_raw(void) +{ + unsigned int i; + int err = 0; + + if (args.info_raw_test_num) + return count_result(do_test_info_raw(args.info_raw_test_num)); + + for (i = 1; i <= ARRAY_SIZE(info_raw_tests); i++) + err |= count_result(do_test_info_raw(i)); + + return err; +} + static void usage(const char *cmd) { - fprintf(stderr, "Usage: %s [-l] [[-r test_num (1 - %zu)] | [-g test_num (1 - %zu)] | [-f test_num (1 - %zu)] | [-p]]\n", + fprintf(stderr, "Usage: %s [-l] [[-r btf_raw_test_num (1 - %zu)] |\n" + "\t[-g btf_get_info_test_num (1 - %zu)] |\n" + "\t[-f btf_file_test_num (1 - %zu)] |\n" + "\t[-k btf_prog_info_raw_test_num (1 - %zu)] |\n" + "\t[-p (pretty print test)]]\n", cmd, ARRAY_SIZE(raw_tests), ARRAY_SIZE(get_info_tests), - ARRAY_SIZE(file_tests)); + ARRAY_SIZE(file_tests), ARRAY_SIZE(info_raw_tests)); } static int parse_args(int argc, char **argv) { - const char *optstr = "lpf:r:g:"; + const char *optstr = "lpk:f:r:g:"; int opt; while ((opt = getopt(argc, argv, optstr)) != -1) { @@ -2509,6 +4330,10 @@ static int parse_args(int argc, char **argv) case 'p': args.pprint_test = true; break; + case 'k': + args.info_raw_test_num = atoi(optarg); + args.info_raw_test = true; + break; case 'h': usage(argv[0]); exit(0); @@ -2542,6 +4367,14 @@ static int parse_args(int argc, char **argv) return -1; } + if (args.info_raw_test_num && + (args.info_raw_test_num < 1 || + args.info_raw_test_num > ARRAY_SIZE(info_raw_tests))) { + fprintf(stderr, "BTF prog info raw test number must be [1 - %zu]\n", + ARRAY_SIZE(info_raw_tests)); + return -1; + } + return 0; } @@ -2574,13 +4407,17 @@ int main(int argc, char **argv) if (args.pprint_test) err |= test_pprint(); + if (args.info_raw_test) + err |= test_info_raw(); + if (args.raw_test || args.get_info_test || args.file_test || - args.pprint_test) + args.pprint_test || args.info_raw_test) goto done; err |= test_raw(); err |= test_get_info(); err |= test_file(); + err |= test_info_raw(); done: print_summary(); diff --git a/tools/testing/selftests/bpf/test_btf_haskv.c b/tools/testing/selftests/bpf/test_btf_haskv.c index b21b876f475d..e5c79fe0ffdb 100644 --- a/tools/testing/selftests/bpf/test_btf_haskv.c +++ b/tools/testing/selftests/bpf/test_btf_haskv.c @@ -24,8 +24,8 @@ struct dummy_tracepoint_args { struct sock *sock; }; -SEC("dummy_tracepoint") -int _dummy_tracepoint(struct dummy_tracepoint_args *arg) +__attribute__((noinline)) +static int test_long_fname_2(struct dummy_tracepoint_args *arg) { struct ipv_counts *counts; int key = 0; @@ -42,4 +42,16 @@ int _dummy_tracepoint(struct dummy_tracepoint_args *arg) return 0; } +__attribute__((noinline)) +static int test_long_fname_1(struct dummy_tracepoint_args *arg) +{ + return test_long_fname_2(arg); +} + +SEC("dummy_tracepoint") +int _dummy_tracepoint(struct dummy_tracepoint_args *arg) +{ + return test_long_fname_1(arg); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_btf_nokv.c b/tools/testing/selftests/bpf/test_btf_nokv.c index 0ed8e088eebf..434188c37774 100644 --- a/tools/testing/selftests/bpf/test_btf_nokv.c +++ b/tools/testing/selftests/bpf/test_btf_nokv.c @@ -22,8 +22,8 @@ struct dummy_tracepoint_args { struct sock *sock; }; -SEC("dummy_tracepoint") -int _dummy_tracepoint(struct dummy_tracepoint_args *arg) +__attribute__((noinline)) +static int test_long_fname_2(struct dummy_tracepoint_args *arg) { struct ipv_counts *counts; int key = 0; @@ -40,4 +40,16 @@ int _dummy_tracepoint(struct dummy_tracepoint_args *arg) return 0; } +__attribute__((noinline)) +static int test_long_fname_1(struct dummy_tracepoint_args *arg) +{ + return test_long_fname_2(arg); +} + +SEC("dummy_tracepoint") +int _dummy_tracepoint(struct dummy_tracepoint_args *arg) +{ + return test_long_fname_1(arg); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh index c0fb073b5eab..d23d4da66b83 100755 --- a/tools/testing/selftests/bpf/test_flow_dissector.sh +++ b/tools/testing/selftests/bpf/test_flow_dissector.sh @@ -59,7 +59,7 @@ else fi # Attach BPF program -./flow_dissector_load -p bpf_flow.o -s dissect +./flow_dissector_load -p bpf_flow.o -s flow_dissector # Setup tc qdisc add dev lo ingress diff --git a/tools/testing/selftests/bpf/test_libbpf.sh b/tools/testing/selftests/bpf/test_libbpf.sh index 156d89f1edcc..2989b2e2d856 100755 --- a/tools/testing/selftests/bpf/test_libbpf.sh +++ b/tools/testing/selftests/bpf/test_libbpf.sh @@ -33,17 +33,11 @@ trap exit_handler 0 2 3 6 9 libbpf_open_file test_l4lb.o -# TODO: fix libbpf to load noinline functions -# [warning] libbpf: incorrect bpf_call opcode -#libbpf_open_file test_l4lb_noinline.o +# Load a program with BPF-to-BPF calls +libbpf_open_file test_l4lb_noinline.o -# TODO: fix test_xdp_meta.c to load with libbpf -# [warning] libbpf: test_xdp_meta.o doesn't provide kernel version -#libbpf_open_file test_xdp_meta.o - -# TODO: fix libbpf to handle .eh_frame -# [warning] libbpf: relocation failed: no section(10) -#libbpf_open_file ../../../../samples/bpf/tracex3_kern.o +# Load a program compiled without the "-target bpf" flag +libbpf_open_file test_xdp.o # Success exit 0 diff --git a/tools/testing/selftests/bpf/test_lirc_mode2.sh b/tools/testing/selftests/bpf/test_lirc_mode2.sh index 677686198df3..ec4e15948e40 100755 --- a/tools/testing/selftests/bpf/test_lirc_mode2.sh +++ b/tools/testing/selftests/bpf/test_lirc_mode2.sh @@ -21,13 +21,14 @@ do if grep -q DRV_NAME=rc-loopback $i/uevent then LIRCDEV=$(grep DEVNAME= $i/lirc*/uevent | sed sQDEVNAME=Q/dev/Q) + INPUTDEV=$(grep DEVNAME= $i/input*/event*/uevent | sed sQDEVNAME=Q/dev/Q) fi done if [ -n $LIRCDEV ]; then TYPE=lirc_mode2 - ./test_lirc_mode2_user $LIRCDEV + ./test_lirc_mode2_user $LIRCDEV $INPUTDEV ret=$? if [ $ret -ne 0 ]; then echo -e ${RED}"FAIL: $TYPE"${NC} diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_kern.c b/tools/testing/selftests/bpf/test_lirc_mode2_kern.c index ba26855563a5..4147130cc3b7 100644 --- a/tools/testing/selftests/bpf/test_lirc_mode2_kern.c +++ b/tools/testing/selftests/bpf/test_lirc_mode2_kern.c @@ -15,6 +15,9 @@ int bpf_decoder(unsigned int *sample) if (duration & 0x10000) bpf_rc_keydown(sample, 0x40, duration & 0xffff, 0); + if (duration & 0x20000) + bpf_rc_pointer_rel(sample, (duration >> 8) & 0xff, + duration & 0xff); } return 0; diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_user.c b/tools/testing/selftests/bpf/test_lirc_mode2_user.c index d470d63c33db..fb5fd6841ef3 100644 --- a/tools/testing/selftests/bpf/test_lirc_mode2_user.c +++ b/tools/testing/selftests/bpf/test_lirc_mode2_user.c @@ -29,6 +29,7 @@ #include <linux/bpf.h> #include <linux/lirc.h> +#include <linux/input.h> #include <errno.h> #include <stdio.h> #include <stdlib.h> @@ -47,12 +48,13 @@ int main(int argc, char **argv) { struct bpf_object *obj; - int ret, lircfd, progfd, mode; - int testir = 0x1dead; + int ret, lircfd, progfd, inputfd; + int testir1 = 0x1dead; + int testir2 = 0x20101; u32 prog_ids[10], prog_flags[10], prog_cnt; - if (argc != 2) { - printf("Usage: %s /dev/lircN\n", argv[0]); + if (argc != 3) { + printf("Usage: %s /dev/lircN /dev/input/eventM\n", argv[0]); return 2; } @@ -76,9 +78,9 @@ int main(int argc, char **argv) return 1; } - mode = LIRC_MODE_SCANCODE; - if (ioctl(lircfd, LIRC_SET_REC_MODE, &mode)) { - printf("failed to set rec mode: %m\n"); + inputfd = open(argv[2], O_RDONLY | O_NONBLOCK); + if (inputfd == -1) { + printf("failed to open input device %s: %m\n", argv[1]); return 1; } @@ -102,29 +104,54 @@ int main(int argc, char **argv) } /* Write raw IR */ - ret = write(lircfd, &testir, sizeof(testir)); - if (ret != sizeof(testir)) { + ret = write(lircfd, &testir1, sizeof(testir1)); + if (ret != sizeof(testir1)) { printf("Failed to send test IR message: %m\n"); return 1; } - struct pollfd pfd = { .fd = lircfd, .events = POLLIN }; - struct lirc_scancode lsc; + struct pollfd pfd = { .fd = inputfd, .events = POLLIN }; + struct input_event event; - poll(&pfd, 1, 100); + for (;;) { + poll(&pfd, 1, 100); - /* Read decoded IR */ - ret = read(lircfd, &lsc, sizeof(lsc)); - if (ret != sizeof(lsc)) { - printf("Failed to read decoded IR: %m\n"); - return 1; + /* Read decoded IR */ + ret = read(inputfd, &event, sizeof(event)); + if (ret != sizeof(event)) { + printf("Failed to read decoded IR: %m\n"); + return 1; + } + + if (event.type == EV_MSC && event.code == MSC_SCAN && + event.value == 0xdead) { + break; + } } - if (lsc.scancode != 0xdead || lsc.rc_proto != 64) { - printf("Incorrect scancode decoded\n"); + /* Write raw IR */ + ret = write(lircfd, &testir2, sizeof(testir2)); + if (ret != sizeof(testir2)) { + printf("Failed to send test IR message: %m\n"); return 1; } + for (;;) { + poll(&pfd, 1, 100); + + /* Read decoded IR */ + ret = read(inputfd, &event, sizeof(event)); + if (ret != sizeof(event)) { + printf("Failed to read decoded IR: %m\n"); + return 1; + } + + if (event.type == EV_REL && event.code == REL_Y && + event.value == 1 ) { + break; + } + } + prog_cnt = 10; ret = bpf_prog_query(lircfd, BPF_LIRC_MODE2, 0, prog_flags, prog_ids, &prog_cnt); diff --git a/tools/testing/selftests/bpf/test_map_in_map.c b/tools/testing/selftests/bpf/test_map_in_map.c new file mode 100644 index 000000000000..ce923e67e08e --- /dev/null +++ b/tools/testing/selftests/bpf/test_map_in_map.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018 Facebook */ +#include <stddef.h> +#include <linux/bpf.h> +#include <linux/types.h> +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") mim_array = { + .type = BPF_MAP_TYPE_ARRAY_OF_MAPS, + .key_size = sizeof(int), + /* must be sizeof(__u32) for map in map */ + .value_size = sizeof(__u32), + .max_entries = 1, + .map_flags = 0, +}; + +struct bpf_map_def SEC("maps") mim_hash = { + .type = BPF_MAP_TYPE_HASH_OF_MAPS, + .key_size = sizeof(int), + /* must be sizeof(__u32) for map in map */ + .value_size = sizeof(__u32), + .max_entries = 1, + .map_flags = 0, +}; + +SEC("xdp_mimtest") +int xdp_mimtest0(struct xdp_md *ctx) +{ + int value = 123; + int key = 0; + void *map; + + map = bpf_map_lookup_elem(&mim_array, &key); + if (!map) + return XDP_DROP; + + bpf_map_update_elem(map, &key, &value, 0); + + map = bpf_map_lookup_elem(&mim_hash, &key); + if (!map) + return XDP_DROP; + + bpf_map_update_elem(map, &key, &value, 0); + + return XDP_PASS; +} + +int _version SEC("version") = 1; +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 4db2116e52be..9c79ee017df3 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -258,24 +258,36 @@ static void test_hashmap_percpu(int task, void *data) close(fd); } -static void test_hashmap_walk(int task, void *data) +static int helper_fill_hashmap(int max_entries) { - int fd, i, max_entries = 1000; - long long key, value, next_key; - bool next_key_valid = true; + int i, fd, ret; + long long key, value; fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), max_entries, map_flags); - if (fd < 0) { - printf("Failed to create hashmap '%s'!\n", strerror(errno)); - exit(1); - } + CHECK(fd < 0, + "failed to create hashmap", + "err: %s, flags: 0x%x\n", strerror(errno), map_flags); for (i = 0; i < max_entries; i++) { key = i; value = key; - assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == 0); + ret = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(ret != 0, + "can't update hashmap", + "err: %s\n", strerror(ret)); } + return fd; +} + +static void test_hashmap_walk(int task, void *data) +{ + int fd, i, max_entries = 1000; + long long key, value, next_key; + bool next_key_valid = true; + + fd = helper_fill_hashmap(max_entries); + for (i = 0; bpf_map_get_next_key(fd, !i ? NULL : &key, &next_key) == 0; i++) { key = next_key; @@ -306,6 +318,39 @@ static void test_hashmap_walk(int task, void *data) close(fd); } +static void test_hashmap_zero_seed(void) +{ + int i, first, second, old_flags; + long long key, next_first, next_second; + + old_flags = map_flags; + map_flags |= BPF_F_ZERO_SEED; + + first = helper_fill_hashmap(3); + second = helper_fill_hashmap(3); + + for (i = 0; ; i++) { + void *key_ptr = !i ? NULL : &key; + + if (bpf_map_get_next_key(first, key_ptr, &next_first) != 0) + break; + + CHECK(bpf_map_get_next_key(second, key_ptr, &next_second) != 0, + "next_key for second map must succeed", + "key_ptr: %p", key_ptr); + CHECK(next_first != next_second, + "keys must match", + "i: %d first: %lld second: %lld\n", i, + next_first, next_second); + + key = next_first; + } + + map_flags = old_flags; + close(first); + close(second); +} + static void test_arraymap(int task, void *data) { int key, next_key, fd; @@ -1080,6 +1125,94 @@ out_sockmap: exit(1); } +#define MAPINMAP_PROG "./test_map_in_map.o" +static void test_map_in_map(void) +{ + struct bpf_program *prog; + struct bpf_object *obj; + struct bpf_map *map; + int mim_fd, fd, err; + int pos = 0; + + obj = bpf_object__open(MAPINMAP_PROG); + + fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(int), sizeof(int), + 2, 0); + if (fd < 0) { + printf("Failed to create hashmap '%s'!\n", strerror(errno)); + exit(1); + } + + map = bpf_object__find_map_by_name(obj, "mim_array"); + if (IS_ERR(map)) { + printf("Failed to load array of maps from test prog\n"); + goto out_map_in_map; + } + err = bpf_map__set_inner_map_fd(map, fd); + if (err) { + printf("Failed to set inner_map_fd for array of maps\n"); + goto out_map_in_map; + } + + map = bpf_object__find_map_by_name(obj, "mim_hash"); + if (IS_ERR(map)) { + printf("Failed to load hash of maps from test prog\n"); + goto out_map_in_map; + } + err = bpf_map__set_inner_map_fd(map, fd); + if (err) { + printf("Failed to set inner_map_fd for hash of maps\n"); + goto out_map_in_map; + } + + bpf_object__for_each_program(prog, obj) { + bpf_program__set_xdp(prog); + } + bpf_object__load(obj); + + map = bpf_object__find_map_by_name(obj, "mim_array"); + if (IS_ERR(map)) { + printf("Failed to load array of maps from test prog\n"); + goto out_map_in_map; + } + mim_fd = bpf_map__fd(map); + if (mim_fd < 0) { + printf("Failed to get descriptor for array of maps\n"); + goto out_map_in_map; + } + + err = bpf_map_update_elem(mim_fd, &pos, &fd, 0); + if (err) { + printf("Failed to update array of maps\n"); + goto out_map_in_map; + } + + map = bpf_object__find_map_by_name(obj, "mim_hash"); + if (IS_ERR(map)) { + printf("Failed to load hash of maps from test prog\n"); + goto out_map_in_map; + } + mim_fd = bpf_map__fd(map); + if (mim_fd < 0) { + printf("Failed to get descriptor for hash of maps\n"); + goto out_map_in_map; + } + + err = bpf_map_update_elem(mim_fd, &pos, &fd, 0); + if (err) { + printf("Failed to update hash of maps\n"); + goto out_map_in_map; + } + + close(fd); + bpf_object__close(obj); + return; + +out_map_in_map: + close(fd); + exit(1); +} + #define MAP_SIZE (32 * 1024) static void test_map_large(void) @@ -1534,6 +1667,7 @@ static void run_all_tests(void) test_hashmap(0, NULL); test_hashmap_percpu(0, NULL); test_hashmap_walk(0, NULL); + test_hashmap_zero_seed(); test_arraymap(0, NULL); test_arraymap_percpu(0, NULL); @@ -1554,6 +1688,8 @@ static void run_all_tests(void) test_queuemap(0, NULL); test_stackmap(0, NULL); + + test_map_in_map(); } int main(void) diff --git a/tools/testing/selftests/bpf/test_netcnt.c b/tools/testing/selftests/bpf/test_netcnt.c index 7887df693399..44ed7f29f8ab 100644 --- a/tools/testing/selftests/bpf/test_netcnt.c +++ b/tools/testing/selftests/bpf/test_netcnt.c @@ -81,7 +81,10 @@ int main(int argc, char **argv) goto err; } - assert(system("ping localhost -6 -c 10000 -f -q > /dev/null") == 0); + if (system("which ping6 &>/dev/null") == 0) + assert(!system("ping6 localhost -c 10000 -f -q > /dev/null")); + else + assert(!system("ping -6 localhost -c 10000 -f -q > /dev/null")); if (bpf_prog_query(cgroup_fd, BPF_CGROUP_INET_EGRESS, 0, NULL, NULL, &prog_cnt)) { diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 2d3c04f45530..26f1fdf3e2bf 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -70,7 +70,7 @@ static struct { .tcp.urg_ptr = 123, }; -#define CHECK(condition, tag, format...) ({ \ +#define _CHECK(condition, tag, duration, format...) ({ \ int __ret = !!(condition); \ if (__ret) { \ error_cnt++; \ @@ -83,6 +83,11 @@ static struct { __ret; \ }) +#define CHECK(condition, tag, format...) \ + _CHECK(condition, tag, duration, format) +#define CHECK_ATTR(condition, tag, format...) \ + _CHECK(condition, tag, tattr.duration, format) + static int bpf_find_map(const char *test, struct bpf_object *obj, const char *name) { @@ -124,6 +129,53 @@ static void test_pkt_access(void) bpf_object__close(obj); } +static void test_prog_run_xattr(void) +{ + const char *file = "./test_pkt_access.o"; + struct bpf_object *obj; + char buf[10]; + int err; + struct bpf_prog_test_run_attr tattr = { + .repeat = 1, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .data_out = buf, + .data_size_out = 5, + }; + + err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, + &tattr.prog_fd); + if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno)) + return; + + memset(buf, 0, sizeof(buf)); + + err = bpf_prog_test_run_xattr(&tattr); + CHECK_ATTR(err != -1 || errno != ENOSPC || tattr.retval, "run", + "err %d errno %d retval %d\n", err, errno, tattr.retval); + + CHECK_ATTR(tattr.data_size_out != sizeof(pkt_v4), "data_size_out", + "incorrect output size, want %lu have %u\n", + sizeof(pkt_v4), tattr.data_size_out); + + CHECK_ATTR(buf[5] != 0, "overflow", + "BPF_PROG_TEST_RUN ignored size hint\n"); + + tattr.data_out = NULL; + tattr.data_size_out = 0; + errno = 0; + + err = bpf_prog_test_run_xattr(&tattr); + CHECK_ATTR(err || errno || tattr.retval, "run_no_output", + "err %d errno %d retval %d\n", err, errno, tattr.retval); + + tattr.data_size_out = 1; + err = bpf_prog_test_run_xattr(&tattr); + CHECK_ATTR(err != -EINVAL, "run_wrong_size_out", "err %d\n", err); + + bpf_object__close(obj); +} + static void test_xdp(void) { struct vip key4 = {.protocol = 6, .family = AF_INET}; @@ -524,7 +576,7 @@ static void test_bpf_obj_id(void) load_time < now - 60 || load_time > now + 60 || prog_infos[i].created_by_uid != my_uid || prog_infos[i].nr_map_ids != 1 || - *(int *)prog_infos[i].map_ids != map_infos[i].id || + *(int *)(long)prog_infos[i].map_ids != map_infos[i].id || strcmp((char *)prog_infos[i].name, expected_prog_name), "get-prog-info(fd)", "err %d errno %d i %d type %d(%d) info_len %u(%Zu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n", @@ -539,7 +591,7 @@ static void test_bpf_obj_id(void) load_time, now, prog_infos[i].created_by_uid, my_uid, prog_infos[i].nr_map_ids, 1, - *(int *)prog_infos[i].map_ids, map_infos[i].id, + *(int *)(long)prog_infos[i].map_ids, map_infos[i].id, prog_infos[i].name, expected_prog_name)) goto done; } @@ -585,7 +637,7 @@ static void test_bpf_obj_id(void) bzero(&prog_info, sizeof(prog_info)); info_len = sizeof(prog_info); - saved_map_id = *(int *)(prog_infos[i].map_ids); + saved_map_id = *(int *)((long)prog_infos[i].map_ids); prog_info.map_ids = prog_infos[i].map_ids; prog_info.nr_map_ids = 2; err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len); @@ -593,12 +645,12 @@ static void test_bpf_obj_id(void) prog_infos[i].xlated_prog_insns = 0; CHECK(err || info_len != sizeof(struct bpf_prog_info) || memcmp(&prog_info, &prog_infos[i], info_len) || - *(int *)prog_info.map_ids != saved_map_id, + *(int *)(long)prog_info.map_ids != saved_map_id, "get-prog-info(next_id->fd)", "err %d errno %d info_len %u(%Zu) memcmp %d map_id %u(%u)\n", err, errno, info_len, sizeof(struct bpf_prog_info), memcmp(&prog_info, &prog_infos[i], info_len), - *(int *)prog_info.map_ids, saved_map_id); + *(int *)(long)prog_info.map_ids, saved_map_id); close(prog_fd); } CHECK(nr_id_found != nr_iters, @@ -1703,7 +1755,7 @@ static void test_reference_tracking() const char *file = "./test_sk_lookup_kern.o"; struct bpf_object *obj; struct bpf_program *prog; - __u32 duration; + __u32 duration = 0; int err = 0; obj = bpf_object__open(file); @@ -1837,6 +1889,7 @@ int main(void) jit_enabled = is_jit_enabled(); test_pkt_access(); + test_prog_run_xattr(); test_xdp(); test_xdp_adjust_tail(); test_l4lb_all(); diff --git a/tools/testing/selftests/bpf/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/test_sk_lookup_kern.c index b745bdc08c2b..e21cd736c196 100644 --- a/tools/testing/selftests/bpf/test_sk_lookup_kern.c +++ b/tools/testing/selftests/bpf/test_sk_lookup_kern.c @@ -72,7 +72,7 @@ int bpf_sk_lookup_test0(struct __sk_buff *skb) return TC_ACT_SHOT; tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6); - sk = bpf_sk_lookup_tcp(skb, tuple, tuple_len, 0, 0); + sk = bpf_sk_lookup_tcp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0); if (sk) bpf_sk_release(sk); return sk ? TC_ACT_OK : TC_ACT_UNSPEC; @@ -84,7 +84,7 @@ int bpf_sk_lookup_test1(struct __sk_buff *skb) struct bpf_sock_tuple tuple = {}; struct bpf_sock *sk; - sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); if (sk) bpf_sk_release(sk); return 0; @@ -97,7 +97,7 @@ int bpf_sk_lookup_uaf(struct __sk_buff *skb) struct bpf_sock *sk; __u32 family = 0; - sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); if (sk) { bpf_sk_release(sk); family = sk->family; @@ -112,7 +112,7 @@ int bpf_sk_lookup_modptr(struct __sk_buff *skb) struct bpf_sock *sk; __u32 family; - sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); if (sk) { sk += 1; bpf_sk_release(sk); @@ -127,7 +127,7 @@ int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb) struct bpf_sock *sk; __u32 family; - sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); sk += 1; if (sk) bpf_sk_release(sk); @@ -139,7 +139,7 @@ int bpf_sk_lookup_test2(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; - bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); return 0; } @@ -149,7 +149,7 @@ int bpf_sk_lookup_test3(struct __sk_buff *skb) struct bpf_sock_tuple tuple = {}; struct bpf_sock *sk; - sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); bpf_sk_release(sk); bpf_sk_release(sk); return 0; @@ -161,7 +161,7 @@ int bpf_sk_lookup_test4(struct __sk_buff *skb) struct bpf_sock_tuple tuple = {}; struct bpf_sock *sk; - sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); bpf_sk_release(sk); return 0; } @@ -169,7 +169,7 @@ int bpf_sk_lookup_test4(struct __sk_buff *skb) void lookup_no_release(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; - bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); } SEC("fail_no_release_subcall") diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c index aeeb76a54d63..73b7493d4120 100644 --- a/tools/testing/selftests/bpf/test_sock_addr.c +++ b/tools/testing/selftests/bpf/test_sock_addr.c @@ -574,24 +574,44 @@ static int bind4_prog_load(const struct sock_addr_test *test) /* if (sk.family == AF_INET && */ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, offsetof(struct bpf_sock_addr, family)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 16), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 24), /* (sk.type == SOCK_DGRAM || sk.type == SOCK_STREAM) && */ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, offsetof(struct bpf_sock_addr, type)), BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_DGRAM, 1), BPF_JMP_A(1), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_STREAM, 12), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_STREAM, 20), /* 1st_byte_of_user_ip4 == expected && */ BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6, offsetof(struct bpf_sock_addr, user_ip4)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[0], 10), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[0], 18), + + /* 2nd_byte_of_user_ip4 == expected && */ + BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock_addr, user_ip4) + 1), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[1], 16), + + /* 3rd_byte_of_user_ip4 == expected && */ + BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock_addr, user_ip4) + 2), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[2], 14), + + /* 4th_byte_of_user_ip4 == expected && */ + BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock_addr, user_ip4) + 3), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[3], 12), /* 1st_half_of_user_ip4 == expected && */ BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6, offsetof(struct bpf_sock_addr, user_ip4)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[0], 8), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[0], 10), + + /* 2nd_half_of_user_ip4 == expected && */ + BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock_addr, user_ip4) + 2), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[1], 8), /* whole_user_ip4 == expected) { */ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 622ade0a0957..e85a771f607b 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -79,6 +79,8 @@ int txmsg_start; int txmsg_end; int txmsg_start_push; int txmsg_end_push; +int txmsg_start_pop; +int txmsg_pop; int txmsg_ingress; int txmsg_skb; int ktls; @@ -104,6 +106,8 @@ static const struct option long_options[] = { {"txmsg_end", required_argument, NULL, 'e'}, {"txmsg_start_push", required_argument, NULL, 'p'}, {"txmsg_end_push", required_argument, NULL, 'q'}, + {"txmsg_start_pop", required_argument, NULL, 'w'}, + {"txmsg_pop", required_argument, NULL, 'x'}, {"txmsg_ingress", no_argument, &txmsg_ingress, 1 }, {"txmsg_skb", no_argument, &txmsg_skb, 1 }, {"ktls", no_argument, &ktls, 1 }, @@ -473,13 +477,27 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, clock_gettime(CLOCK_MONOTONIC, &s->end); } else { int slct, recvp = 0, recv, max_fd = fd; + float total_bytes, txmsg_pop_total; int fd_flags = O_NONBLOCK; struct timeval timeout; - float total_bytes; fd_set w; fcntl(fd, fd_flags); + /* Account for pop bytes noting each iteration of apply will + * call msg_pop_data helper so we need to account for this + * by calculating the number of apply iterations. Note user + * of the tool can create cases where no data is sent by + * manipulating pop/push/pull/etc. For example txmsg_apply 1 + * with txmsg_pop 1 will try to apply 1B at a time but each + * iteration will then pop 1B so no data will ever be sent. + * This is really only useful for testing edge cases in code + * paths. + */ total_bytes = (float)iov_count * (float)iov_length * (float)cnt; + txmsg_pop_total = txmsg_pop; + if (txmsg_apply) + txmsg_pop_total *= (total_bytes / txmsg_apply); + total_bytes -= txmsg_pop_total; err = clock_gettime(CLOCK_MONOTONIC, &s->start); if (err < 0) perror("recv start time: "); @@ -488,7 +506,7 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, timeout.tv_sec = 0; timeout.tv_usec = 300000; } else { - timeout.tv_sec = 1; + timeout.tv_sec = 3; timeout.tv_usec = 0; } @@ -503,7 +521,7 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, goto out_errno; } else if (!slct) { if (opt->verbose) - fprintf(stderr, "unexpected timeout\n"); + fprintf(stderr, "unexpected timeout: recved %zu/%f pop_total %f\n", s->bytes_recvd, total_bytes, txmsg_pop_total); errno = -EIO; clock_gettime(CLOCK_MONOTONIC, &s->end); goto out_errno; @@ -619,7 +637,7 @@ static int sendmsg_test(struct sockmap_options *opt) iov_count = 1; err = msg_loop(rx_fd, iov_count, iov_buf, cnt, &s, false, opt); - if (err && opt->verbose) + if (opt->verbose) fprintf(stderr, "msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n", iov_count, iov_buf, cnt, err); @@ -931,6 +949,39 @@ run: } } + if (txmsg_start_pop) { + i = 4; + err = bpf_map_update_elem(map_fd[5], + &i, &txmsg_start_pop, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem %i@%i (txmsg_start_pop): %d (%s)\n", + txmsg_start_pop, i, err, strerror(errno)); + goto out; + } + } else { + i = 4; + bpf_map_update_elem(map_fd[5], + &i, &txmsg_start_pop, BPF_ANY); + } + + if (txmsg_pop) { + i = 5; + err = bpf_map_update_elem(map_fd[5], + &i, &txmsg_pop, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem %i@%i (txmsg_pop): %d (%s)\n", + txmsg_pop, i, err, strerror(errno)); + goto out; + } + } else { + i = 5; + bpf_map_update_elem(map_fd[5], + &i, &txmsg_pop, BPF_ANY); + + } + if (txmsg_ingress) { int in = BPF_F_INGRESS; @@ -1082,6 +1133,11 @@ static void test_options(char *options) snprintf(tstr, OPTSTRING, "end %d,", txmsg_end); strncat(options, tstr, OPTSTRING); } + if (txmsg_start_pop) { + snprintf(tstr, OPTSTRING, "pop (%d,%d),", + txmsg_start_pop, txmsg_start_pop + txmsg_pop); + strncat(options, tstr, OPTSTRING); + } if (txmsg_ingress) strncat(options, "ingress,", OPTSTRING); if (txmsg_skb) @@ -1264,6 +1320,7 @@ static int test_mixed(int cgrp) txmsg_apply = txmsg_cork = 0; txmsg_start = txmsg_end = 0; txmsg_start_push = txmsg_end_push = 0; + txmsg_start_pop = txmsg_pop = 0; /* Test small and large iov_count values with pass/redir/apply/cork */ txmsg_pass = 1; @@ -1383,6 +1440,19 @@ static int test_start_end(int cgrp) txmsg_end = 2; txmsg_start_push = 1; txmsg_end_push = 2; + txmsg_start_pop = 1; + txmsg_pop = 1; + err = test_txmsg(cgrp); + if (err) + goto out; + + /* Cut a byte of pushed data but leave reamining in place */ + txmsg_start = 1; + txmsg_end = 2; + txmsg_start_push = 1; + txmsg_end_push = 3; + txmsg_start_pop = 1; + txmsg_pop = 1; err = test_txmsg(cgrp); if (err) goto out; @@ -1393,6 +1463,9 @@ static int test_start_end(int cgrp) opt.iov_length = 100; txmsg_cork = 1600; + txmsg_start_pop = 0; + txmsg_pop = 0; + for (i = 99; i <= 1600; i += 500) { txmsg_start = 0; txmsg_end = i; @@ -1403,6 +1476,17 @@ static int test_start_end(int cgrp) goto out; } + /* Test pop data in middle of cork */ + for (i = 99; i <= 1600; i += 500) { + txmsg_start_pop = 10; + txmsg_pop = i; + err = test_exec(cgrp, &opt); + if (err) + goto out; + } + txmsg_start_pop = 0; + txmsg_pop = 0; + /* Test start/end with cork but pull data in middle */ for (i = 199; i <= 1600; i += 500) { txmsg_start = 100; @@ -1423,6 +1507,15 @@ static int test_start_end(int cgrp) if (err) goto out; + /* Test pop with cork pulling last sg entry */ + txmsg_start_pop = 1500; + txmsg_pop = 1600; + err = test_exec(cgrp, &opt); + if (err) + goto out; + txmsg_start_pop = 0; + txmsg_pop = 0; + /* Test start/end pull of single byte in last page */ txmsg_start = 1111; txmsg_end = 1112; @@ -1432,6 +1525,13 @@ static int test_start_end(int cgrp) if (err) goto out; + /* Test pop of single byte in last page */ + txmsg_start_pop = 1111; + txmsg_pop = 1112; + err = test_exec(cgrp, &opt); + if (err) + goto out; + /* Test start/end with end < start */ txmsg_start = 1111; txmsg_end = 0; @@ -1456,7 +1556,20 @@ static int test_start_end(int cgrp) txmsg_start_push = 1601; txmsg_end_push = 1600; err = test_exec(cgrp, &opt); + if (err) + goto out; + + /* Test pop with start > data */ + txmsg_start_pop = 1601; + txmsg_pop = 1; + err = test_exec(cgrp, &opt); + if (err) + goto out; + /* Test pop with pop range > data */ + txmsg_start_pop = 1599; + txmsg_pop = 10; + err = test_exec(cgrp, &opt); out: txmsg_start = 0; txmsg_end = 0; @@ -1641,6 +1754,12 @@ int main(int argc, char **argv) case 'q': txmsg_end_push = atoi(optarg); break; + case 'w': + txmsg_start_pop = atoi(optarg); + break; + case 'x': + txmsg_pop = atoi(optarg); + break; case 'a': txmsg_apply = atoi(optarg); break; diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.h b/tools/testing/selftests/bpf/test_sockmap_kern.h index 14b8bbac004f..e7639f66a941 100644 --- a/tools/testing/selftests/bpf/test_sockmap_kern.h +++ b/tools/testing/selftests/bpf/test_sockmap_kern.h @@ -74,7 +74,7 @@ struct bpf_map_def SEC("maps") sock_bytes = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), - .max_entries = 4 + .max_entries = 6 }; struct bpf_map_def SEC("maps") sock_redir_flags = { @@ -181,8 +181,8 @@ int bpf_sockmap(struct bpf_sock_ops *skops) SEC("sk_msg1") int bpf_prog4(struct sk_msg_md *msg) { - int *bytes, zero = 0, one = 1, two = 2, three = 3; - int *start, *end, *start_push, *end_push; + int *bytes, zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5; + int *start, *end, *start_push, *end_push, *start_pop, *pop; bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); if (bytes) @@ -198,15 +198,19 @@ int bpf_prog4(struct sk_msg_md *msg) end_push = bpf_map_lookup_elem(&sock_bytes, &three); if (start_push && end_push) bpf_msg_push_data(msg, *start_push, *end_push, 0); + start_pop = bpf_map_lookup_elem(&sock_bytes, &four); + pop = bpf_map_lookup_elem(&sock_bytes, &five); + if (start_pop && pop) + bpf_msg_pop_data(msg, *start_pop, *pop, 0); return SK_PASS; } SEC("sk_msg2") int bpf_prog5(struct sk_msg_md *msg) { - int zero = 0, one = 1, two = 2, three = 3; - int *start, *end, *start_push, *end_push; - int *bytes, len1, len2 = 0, len3; + int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5; + int *start, *end, *start_push, *end_push, *start_pop, *pop; + int *bytes, len1, len2 = 0, len3, len4; int err1 = -1, err2 = -1; bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); @@ -247,6 +251,20 @@ int bpf_prog5(struct sk_msg_md *msg) bpf_printk("sk_msg2: length push_update %i->%i\n", len2 ? len2 : len1, len3); } + start_pop = bpf_map_lookup_elem(&sock_bytes, &four); + pop = bpf_map_lookup_elem(&sock_bytes, &five); + if (start_pop && pop) { + int err; + + bpf_printk("sk_msg2: pop(%i@%i)\n", + start_pop, pop); + err = bpf_msg_pop_data(msg, *start_pop, *pop, 0); + if (err) + bpf_printk("sk_msg2: pop_data err %i\n", err); + len4 = (__u64)msg->data_end - (__u64)msg->data; + bpf_printk("sk_msg2: length pop_data %i->%i\n", + len1 ? len1 : 0, len4); + } bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n", len1, err1, err2); @@ -256,8 +274,8 @@ int bpf_prog5(struct sk_msg_md *msg) SEC("sk_msg3") int bpf_prog6(struct sk_msg_md *msg) { - int *bytes, *start, *end, *start_push, *end_push, *f; - int zero = 0, one = 1, two = 2, three = 3, key = 0; + int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5, key = 0; + int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop, *f; __u64 flags = 0; bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); @@ -277,6 +295,11 @@ int bpf_prog6(struct sk_msg_md *msg) if (start_push && end_push) bpf_msg_push_data(msg, *start_push, *end_push, 0); + start_pop = bpf_map_lookup_elem(&sock_bytes, &four); + pop = bpf_map_lookup_elem(&sock_bytes, &five); + if (start_pop && pop) + bpf_msg_pop_data(msg, *start_pop, *pop, 0); + f = bpf_map_lookup_elem(&sock_redir_flags, &zero); if (f && *f) { key = 2; @@ -292,8 +315,9 @@ int bpf_prog6(struct sk_msg_md *msg) SEC("sk_msg4") int bpf_prog7(struct sk_msg_md *msg) { - int zero = 0, one = 1, two = 2, three = 3, len1, len2 = 0, len3; - int *bytes, *start, *end, *start_push, *end_push, *f; + int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop, *f; + int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5; + int len1, len2 = 0, len3, len4; int err1 = 0, err2 = 0, key = 0; __u64 flags = 0; @@ -335,6 +359,22 @@ int bpf_prog7(struct sk_msg_md *msg) len2 ? len2 : len1, len3); } + start_pop = bpf_map_lookup_elem(&sock_bytes, &four); + pop = bpf_map_lookup_elem(&sock_bytes, &five); + if (start_pop && pop) { + int err; + + bpf_printk("sk_msg4: pop(%i@%i)\n", + start_pop, pop); + err = bpf_msg_pop_data(msg, *start_pop, *pop, 0); + if (err) + bpf_printk("sk_msg4: pop_data err %i\n", err); + len4 = (__u64)msg->data_end - (__u64)msg->data; + bpf_printk("sk_msg4: length pop_data %i->%i\n", + len1 ? len1 : 0, len4); + } + + f = bpf_map_lookup_elem(&sock_redir_flags, &zero); if (f && *f) { key = 2; @@ -389,8 +429,8 @@ int bpf_prog9(struct sk_msg_md *msg) SEC("sk_msg7") int bpf_prog10(struct sk_msg_md *msg) { - int *bytes, *start, *end, *start_push, *end_push; - int zero = 0, one = 1, two = 2, three = 3; + int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop; + int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5; bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); if (bytes) @@ -406,7 +446,11 @@ int bpf_prog10(struct sk_msg_md *msg) end_push = bpf_map_lookup_elem(&sock_bytes, &three); if (start_push && end_push) bpf_msg_push_data(msg, *start_push, *end_push, 0); - + start_pop = bpf_map_lookup_elem(&sock_bytes, &four); + pop = bpf_map_lookup_elem(&sock_bytes, &five); + if (start_pop && pop) + bpf_msg_pop_data(msg, *start_pop, *pop, 0); + bpf_printk("return sk drop\n"); return SK_DROP; } diff --git a/tools/testing/selftests/bpf/test_tcpnotify.h b/tools/testing/selftests/bpf/test_tcpnotify.h new file mode 100644 index 000000000000..8b6cea030bfc --- /dev/null +++ b/tools/testing/selftests/bpf/test_tcpnotify.h @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 + +#ifndef _TEST_TCPBPF_H +#define _TEST_TCPBPF_H + +struct tcpnotify_globals { + __u32 total_retrans; + __u32 ncalls; +}; + +struct tcp_notifier { + __u8 type; + __u8 subtype; + __u8 source; + __u8 hash; +}; + +#define TESTPORT 12877 +#endif diff --git a/tools/testing/selftests/bpf/test_tcpnotify_kern.c b/tools/testing/selftests/bpf/test_tcpnotify_kern.c new file mode 100644 index 000000000000..edbca203ce2d --- /dev/null +++ b/tools/testing/selftests/bpf/test_tcpnotify_kern.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stddef.h> +#include <string.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/tcp.h> +#include <netinet/in.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" +#include "test_tcpnotify.h" + +struct bpf_map_def SEC("maps") global_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct tcpnotify_globals), + .max_entries = 4, +}; + +struct bpf_map_def SEC("maps") perf_event_map = { + .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(__u32), + .max_entries = 2, +}; + +int _version SEC("version") = 1; + +SEC("sockops") +int bpf_testcb(struct bpf_sock_ops *skops) +{ + int rv = -1; + int op; + + op = (int) skops->op; + + if (bpf_ntohl(skops->remote_port) != TESTPORT) { + skops->reply = -1; + return 0; + } + + switch (op) { + case BPF_SOCK_OPS_TIMEOUT_INIT: + case BPF_SOCK_OPS_RWND_INIT: + case BPF_SOCK_OPS_NEEDS_ECN: + case BPF_SOCK_OPS_BASE_RTT: + case BPF_SOCK_OPS_RTO_CB: + rv = 1; + break; + + case BPF_SOCK_OPS_TCP_CONNECT_CB: + case BPF_SOCK_OPS_TCP_LISTEN_CB: + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + bpf_sock_ops_cb_flags_set(skops, (BPF_SOCK_OPS_RETRANS_CB_FLAG| + BPF_SOCK_OPS_RTO_CB_FLAG)); + rv = 1; + break; + case BPF_SOCK_OPS_RETRANS_CB: { + __u32 key = 0; + struct tcpnotify_globals g, *gp; + struct tcp_notifier msg = { + .type = 0xde, + .subtype = 0xad, + .source = 0xbe, + .hash = 0xef, + }; + + rv = 1; + + /* Update results */ + gp = bpf_map_lookup_elem(&global_map, &key); + if (!gp) + break; + g = *gp; + g.total_retrans = skops->total_retrans; + g.ncalls++; + bpf_map_update_elem(&global_map, &key, &g, + BPF_ANY); + bpf_perf_event_output(skops, &perf_event_map, + BPF_F_CURRENT_CPU, + &msg, sizeof(msg)); + } + break; + default: + rv = -1; + } + skops->reply = rv; + return 1; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c new file mode 100644 index 000000000000..ff3c4522aed6 --- /dev/null +++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c @@ -0,0 +1,186 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <pthread.h> +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <asm/types.h> +#include <sys/syscall.h> +#include <errno.h> +#include <string.h> +#include <linux/bpf.h> +#include <sys/socket.h> +#include <bpf/bpf.h> +#include <bpf/libbpf.h> +#include <sys/ioctl.h> +#include <linux/rtnetlink.h> +#include <signal.h> +#include <linux/perf_event.h> + +#include "bpf_rlimit.h" +#include "bpf_util.h" +#include "cgroup_helpers.h" + +#include "test_tcpnotify.h" +#include "trace_helpers.h" + +#define SOCKET_BUFFER_SIZE (getpagesize() < 8192L ? getpagesize() : 8192L) + +pthread_t tid; +int rx_callbacks; + +static int dummyfn(void *data, int size) +{ + struct tcp_notifier *t = data; + + if (t->type != 0xde || t->subtype != 0xad || + t->source != 0xbe || t->hash != 0xef) + return 1; + rx_callbacks++; + return 0; +} + +void tcp_notifier_poller(int fd) +{ + while (1) + perf_event_poller(fd, dummyfn); +} + +static void *poller_thread(void *arg) +{ + int fd = *(int *)arg; + + tcp_notifier_poller(fd); + return arg; +} + +int verify_result(const struct tcpnotify_globals *result) +{ + return (result->ncalls > 0 && result->ncalls == rx_callbacks ? 0 : 1); +} + +static int bpf_find_map(const char *test, struct bpf_object *obj, + const char *name) +{ + struct bpf_map *map; + + map = bpf_object__find_map_by_name(obj, name); + if (!map) { + printf("%s:FAIL:map '%s' not found\n", test, name); + return -1; + } + return bpf_map__fd(map); +} + +static int setup_bpf_perf_event(int mapfd) +{ + struct perf_event_attr attr = { + .sample_type = PERF_SAMPLE_RAW, + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_BPF_OUTPUT, + }; + int key = 0; + int pmu_fd; + + pmu_fd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, 0); + if (pmu_fd < 0) + return pmu_fd; + bpf_map_update_elem(mapfd, &key, &pmu_fd, BPF_ANY); + + ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0); + return pmu_fd; +} + +int main(int argc, char **argv) +{ + const char *file = "test_tcpnotify_kern.o"; + int prog_fd, map_fd, perf_event_fd; + struct tcpnotify_globals g = {0}; + const char *cg_path = "/foo"; + int error = EXIT_FAILURE; + struct bpf_object *obj; + int cg_fd = -1; + __u32 key = 0; + int rv; + char test_script[80]; + int pmu_fd; + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset); + + if (setup_cgroup_environment()) + goto err; + + cg_fd = create_and_get_cgroup(cg_path); + if (!cg_fd) + goto err; + + if (join_cgroup(cg_path)) + goto err; + + if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) { + printf("FAILED: load_bpf_file failed for: %s\n", file); + goto err; + } + + rv = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_SOCK_OPS, 0); + if (rv) { + printf("FAILED: bpf_prog_attach: %d (%s)\n", + error, strerror(errno)); + goto err; + } + + perf_event_fd = bpf_find_map(__func__, obj, "perf_event_map"); + if (perf_event_fd < 0) + goto err; + + map_fd = bpf_find_map(__func__, obj, "global_map"); + if (map_fd < 0) + goto err; + + pmu_fd = setup_bpf_perf_event(perf_event_fd); + if (pmu_fd < 0 || perf_event_mmap(pmu_fd) < 0) + goto err; + + pthread_create(&tid, NULL, poller_thread, (void *)&pmu_fd); + + sprintf(test_script, + "/usr/sbin/iptables -A INPUT -p tcp --dport %d -j DROP", + TESTPORT); + system(test_script); + + sprintf(test_script, + "/usr/bin/nc 127.0.0.1 %d < /etc/passwd > /dev/null 2>&1 ", + TESTPORT); + system(test_script); + + sprintf(test_script, + "/usr/sbin/iptables -D INPUT -p tcp --dport %d -j DROP", + TESTPORT); + system(test_script); + + rv = bpf_map_lookup_elem(map_fd, &key, &g); + if (rv != 0) { + printf("FAILED: bpf_map_lookup_elem returns %d\n", rv); + goto err; + } + + sleep(10); + + if (verify_result(&g)) { + printf("FAILED: Wrong stats Expected %d calls, got %d\n", + g.ncalls, rx_callbacks); + goto err; + } + + printf("PASSED!\n"); + error = 0; +err: + bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS); + close(cg_fd); + cleanup_cgroup_environment(); + return error; +} diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 6f61df62f690..a08c67c8767e 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -721,8 +721,18 @@ static struct bpf_test tests[] = { BPF_ALU32_IMM(BPF_ARSH, BPF_REG_0, 5), BPF_EXIT_INSN(), }, - .result = REJECT, - .errstr = "unknown opcode c4", + .result = ACCEPT, + .retval = 0, + }, + { + "arsh32 on imm 2", + .insns = { + BPF_LD_IMM64(BPF_REG_0, 0x1122334485667788), + BPF_ALU32_IMM(BPF_ARSH, BPF_REG_0, 7), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = -16069393, }, { "arsh32 on reg", @@ -732,8 +742,19 @@ static struct bpf_test tests[] = { BPF_ALU32_REG(BPF_ARSH, BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, - .result = REJECT, - .errstr = "unknown opcode cc", + .result = ACCEPT, + .retval = 0, + }, + { + "arsh32 on reg 2", + .insns = { + BPF_LD_IMM64(BPF_REG_0, 0xffff55667788), + BPF_MOV64_IMM(BPF_REG_1, 15), + BPF_ALU32_REG(BPF_ARSH, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 43724, }, { "arsh64 on imm", @@ -1823,6 +1844,7 @@ static struct bpf_test tests[] = { .errstr = "invalid bpf_context access", .result = REJECT, .prog_type = BPF_PROG_TYPE_SK_MSG, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct packet read for SK_MSG", @@ -2026,29 +2048,27 @@ static struct bpf_test tests[] = { .result = ACCEPT, }, { - "check skb->hash byte load not permitted 1", + "check skb->hash byte load permitted 1", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, hash) + 1), BPF_EXIT_INSN(), }, - .errstr = "invalid bpf_context access", - .result = REJECT, + .result = ACCEPT, }, { - "check skb->hash byte load not permitted 2", + "check skb->hash byte load permitted 2", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, hash) + 2), BPF_EXIT_INSN(), }, - .errstr = "invalid bpf_context access", - .result = REJECT, + .result = ACCEPT, }, { - "check skb->hash byte load not permitted 3", + "check skb->hash byte load permitted 3", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), #if __BYTE_ORDER == __LITTLE_ENDIAN @@ -2060,8 +2080,7 @@ static struct bpf_test tests[] = { #endif BPF_EXIT_INSN(), }, - .errstr = "invalid bpf_context access", - .result = REJECT, + .result = ACCEPT, }, { "check cb access: byte, wrong type", @@ -2173,7 +2192,7 @@ static struct bpf_test tests[] = { .result = ACCEPT, }, { - "check skb->hash half load not permitted", + "check skb->hash half load permitted 2", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), #if __BYTE_ORDER == __LITTLE_ENDIAN @@ -2185,10 +2204,43 @@ static struct bpf_test tests[] = { #endif BPF_EXIT_INSN(), }, + .result = ACCEPT, + }, + { + "check skb->hash half load not permitted, unaligned 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), +#if __BYTE_ORDER == __LITTLE_ENDIAN + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hash) + 1), +#else + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hash) + 3), +#endif + BPF_EXIT_INSN(), + }, .errstr = "invalid bpf_context access", .result = REJECT, }, { + "check skb->hash half load not permitted, unaligned 3", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), +#if __BYTE_ORDER == __LITTLE_ENDIAN + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hash) + 3), +#else + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hash) + 1), +#endif + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { "check cb access: half, wrong type", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), @@ -2418,6 +2470,10 @@ static struct bpf_test tests[] = { offsetof(struct __sk_buff, tc_index)), BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, offsetof(struct __sk_buff, cb[3])), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, tstamp)), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, tstamp)), BPF_EXIT_INSN(), }, .errstr_unpriv = "", @@ -2904,6 +2960,19 @@ static struct bpf_test tests[] = { .result = ACCEPT, }, { + "alu32: mov u32 const", + .insns = { + BPF_MOV32_IMM(BPF_REG_7, 0), + BPF_ALU32_IMM(BPF_AND, BPF_REG_7, 1), + BPF_MOV32_REG(BPF_REG_0, BPF_REG_7), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { "unpriv: partial copy of pointer", .insns = { BPF_MOV32_REG(BPF_REG_1, BPF_REG_10), @@ -3249,6 +3318,7 @@ static struct bpf_test tests[] = { .result = REJECT, .errstr = "R0 invalid mem access 'inv'", .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "raw_stack: skb_load_bytes, spilled regs corruption 2", @@ -3279,6 +3349,7 @@ static struct bpf_test tests[] = { .result = REJECT, .errstr = "R3 invalid mem access 'inv'", .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "raw_stack: skb_load_bytes, spilled regs + data", @@ -3778,6 +3849,7 @@ static struct bpf_test tests[] = { .errstr = "R2 invalid mem access 'inv'", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct packet access: test16 (arith on data_end)", @@ -3880,6 +3952,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct packet access: test21 (x += pkt_ptr, 2)", @@ -3905,6 +3978,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct packet access: test22 (x += pkt_ptr, 3)", @@ -3935,6 +4009,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct packet access: test23 (x += pkt_ptr, 4)", @@ -3961,6 +4036,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, .errstr = "invalid access to packet, off=0 size=8, R5(id=1,off=0,r=0)", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct packet access: test24 (x += pkt_ptr, 5)", @@ -3986,6 +4062,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct packet access: test25 (marking on <, good access)", @@ -5117,6 +5194,7 @@ static struct bpf_test tests[] = { .result = REJECT, .errstr = "invalid access to map value, value_size=64 off=-2 size=4", .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "invalid cgroup storage access 5", @@ -5233,6 +5311,7 @@ static struct bpf_test tests[] = { .result = REJECT, .errstr = "invalid access to map value, value_size=64 off=-2 size=4", .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "invalid per-cpu cgroup storage access 5", @@ -5270,6 +5349,31 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_CGROUP_SKB, }, { + "write tstamp from CGROUP_SKB", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, tstamp)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "invalid bpf_context access off=152 size=8", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "read tstamp from CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, tstamp)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { "multiple registers share map_lookup_elem result", .insns = { BPF_MOV64_IMM(BPF_REG_1, 10), @@ -7149,6 +7253,7 @@ static struct bpf_test tests[] = { .errstr = "invalid mem access 'inv'", .result = REJECT, .result_unpriv = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "map element value illegal alu op, 5", @@ -7171,6 +7276,7 @@ static struct bpf_test tests[] = { .fixup_map_hash_48b = { 3 }, .errstr = "R0 invalid mem access 'inv'", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "map element value is preserved across register spilling", @@ -7664,6 +7770,7 @@ static struct bpf_test tests[] = { .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .retval = 0 /* csum_diff of 64-byte packet */, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "helper access to variable memory: size = 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)", @@ -8576,7 +8683,7 @@ static struct bpf_test tests[] = { BPF_JMP_IMM(BPF_JA, 0, 0, -7), }, .fixup_map_hash_8b = { 4 }, - .errstr = "R0 invalid mem access 'inv'", + .errstr = "unbounded min value", .result = REJECT, }, { @@ -9626,6 +9733,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data' > pkt_end, bad access 1", @@ -9663,6 +9771,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_end > pkt_data', good access", @@ -9701,6 +9810,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_end > pkt_data', bad access 2", @@ -9719,6 +9829,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data' < pkt_end, good access", @@ -9757,6 +9868,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data' < pkt_end, bad access 2", @@ -9775,6 +9887,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_end < pkt_data', good access", @@ -9792,6 +9905,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_end < pkt_data', bad access 1", @@ -9829,6 +9943,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data' >= pkt_end, good access", @@ -9865,6 +9980,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data' >= pkt_end, bad access 2", @@ -9902,6 +10018,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_end >= pkt_data', bad access 1", @@ -9940,6 +10057,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data' <= pkt_end, good access", @@ -9958,6 +10076,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data' <= pkt_end, bad access 1", @@ -9996,6 +10115,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_end <= pkt_data', good access", @@ -10032,6 +10152,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_end <= pkt_data', bad access 2", @@ -10068,6 +10189,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_meta' > pkt_data, bad access 1", @@ -10105,6 +10227,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data > pkt_meta', good access", @@ -10143,6 +10266,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data > pkt_meta', bad access 2", @@ -10161,6 +10285,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_meta' < pkt_data, good access", @@ -10199,6 +10324,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_meta' < pkt_data, bad access 2", @@ -10217,6 +10343,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data < pkt_meta', good access", @@ -10234,6 +10361,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data < pkt_meta', bad access 1", @@ -10271,6 +10399,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_meta' >= pkt_data, good access", @@ -10307,6 +10436,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_meta' >= pkt_data, bad access 2", @@ -10344,6 +10474,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data >= pkt_meta', bad access 1", @@ -10382,6 +10513,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_meta' <= pkt_data, good access", @@ -10400,6 +10532,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_meta' <= pkt_data, bad access 1", @@ -10438,6 +10571,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data <= pkt_meta', good access", @@ -10474,6 +10608,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data <= pkt_meta', bad access 2", @@ -10547,7 +10682,7 @@ static struct bpf_test tests[] = { "check deducing bounds from const, 5", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 1), BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, @@ -10578,6 +10713,7 @@ static struct bpf_test tests[] = { }, .result = REJECT, .errstr = "dereference of modified ctx ptr", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "check deducing bounds from const, 8", @@ -10591,6 +10727,7 @@ static struct bpf_test tests[] = { }, .result = REJECT, .errstr = "dereference of modified ctx ptr", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "check deducing bounds from const, 9", @@ -11065,6 +11202,7 @@ static struct bpf_test tests[] = { .result = REJECT, .errstr = "R6 invalid mem access 'inv'", .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: two calls with args", @@ -11930,6 +12068,7 @@ static struct bpf_test tests[] = { .fixup_map_hash_8b = { 12, 22 }, .result = REJECT, .errstr = "invalid access to map value, value_size=8 off=2 size=8", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: two calls that receive map_value via arg=ptr_stack_of_caller. test2", @@ -12073,6 +12212,7 @@ static struct bpf_test tests[] = { .fixup_map_hash_8b = { 12, 22 }, .result = REJECT, .errstr = "invalid access to map value, value_size=8 off=2 size=8", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: two calls that receive map_value_ptr_or_null via arg. test1", @@ -12244,6 +12384,7 @@ static struct bpf_test tests[] = { .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .retval = POINTER_VALUE, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 2", @@ -12275,6 +12416,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .errstr = "invalid access to packet", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 3", @@ -12310,6 +12452,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, .retval = 1, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 4", @@ -12344,6 +12487,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, .retval = 1, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 5", @@ -12377,6 +12521,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .errstr = "same insn cannot be used with different", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 6", @@ -12412,6 +12557,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .errstr = "R4 invalid mem access", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 7", @@ -12446,6 +12592,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .errstr = "R4 invalid mem access", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 8", @@ -12486,6 +12633,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 9", @@ -12527,6 +12675,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .errstr = "invalid access to packet", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: caller stack init to zero or map_value_or_null", @@ -12892,6 +13041,7 @@ static struct bpf_test tests[] = { .result = REJECT, .errstr = "BPF_XADD stores into R2 pkt is not allowed", .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "xadd/w check whether src/dst got mangled, 1", @@ -13378,6 +13528,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .errstr = "Unreleased reference", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "reference tracking: alloc, check, free in both subbranches", @@ -13406,6 +13557,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "reference tracking in call: free reference in subprog", @@ -13896,6 +14048,57 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, }, + { + "calls: ctx read at start of subprog", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5), + BPF_JMP_REG(BPF_JSGT, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .errstr_unpriv = "function calls to other bpf functions are allowed for root only", + .result_unpriv = REJECT, + .result = ACCEPT, + }, + { + "check wire_len is not readable by sockets", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, wire_len)), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "check wire_len is readable by tc classifier", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, wire_len)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "check wire_len is not writable by tc classifier", + .insns = { + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, wire_len)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "invalid bpf_context access", + .errstr_unpriv = "R1 leaks addr", + .result = REJECT, + }, }; static int probe_filter_length(const struct bpf_insn *fp) @@ -14181,13 +14384,14 @@ out: static void do_test_single(struct bpf_test *test, bool unpriv, int *passes, int *errors) { - int fd_prog, expected_ret, reject_from_alignment; + int fd_prog, expected_ret, alignment_prevented_execution; int prog_len, prog_type = test->prog_type; struct bpf_insn *prog = test->insns; int map_fds[MAX_NR_MAPS]; const char *expected_err; uint32_t expected_val; uint32_t retval; + __u32 pflags; int i, err; for (i = 0; i < MAX_NR_MAPS; i++) @@ -14198,8 +14402,12 @@ static void do_test_single(struct bpf_test *test, bool unpriv, do_test_fixup(test, prog_type, prog, map_fds); prog_len = probe_filter_length(prog); - fd_prog = bpf_verify_program(prog_type, prog, prog_len, - test->flags & F_LOAD_WITH_STRICT_ALIGNMENT, + pflags = 0; + if (test->flags & F_LOAD_WITH_STRICT_ALIGNMENT) + pflags |= BPF_F_STRICT_ALIGNMENT; + if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) + pflags |= BPF_F_ANY_ALIGNMENT; + fd_prog = bpf_verify_program(prog_type, prog, prog_len, pflags, "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 1); expected_ret = unpriv && test->result_unpriv != UNDEF ? @@ -14209,28 +14417,27 @@ static void do_test_single(struct bpf_test *test, bool unpriv, expected_val = unpriv && test->retval_unpriv ? test->retval_unpriv : test->retval; - reject_from_alignment = fd_prog < 0 && - (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) && - strstr(bpf_vlog, "Unknown alignment."); -#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - if (reject_from_alignment) { - printf("FAIL\nFailed due to alignment despite having efficient unaligned access: '%s'!\n", - strerror(errno)); - goto fail_log; - } -#endif + alignment_prevented_execution = 0; + if (expected_ret == ACCEPT) { - if (fd_prog < 0 && !reject_from_alignment) { + if (fd_prog < 0) { printf("FAIL\nFailed to load prog '%s'!\n", strerror(errno)); goto fail_log; } +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + if (fd_prog >= 0 && + (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)) { + alignment_prevented_execution = 1; + goto test_ok; + } +#endif } else { if (fd_prog >= 0) { printf("FAIL\nUnexpected success to load!\n"); goto fail_log; } - if (!strstr(bpf_vlog, expected_err) && !reject_from_alignment) { + if (!strstr(bpf_vlog, expected_err)) { printf("FAIL\nUnexpected error message!\n\tEXP: %s\n\tRES: %s\n", expected_err, bpf_vlog); goto fail_log; @@ -14258,9 +14465,12 @@ static void do_test_single(struct bpf_test *test, bool unpriv, goto fail_log; } } +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +test_ok: +#endif (*passes)++; - printf("OK%s\n", reject_from_alignment ? - " (NOTE: reject due to unknown alignment)" : ""); + printf("OK%s\n", alignment_prevented_execution ? + " (NOTE: not executed due to unknown alignment)" : ""); close_fds: close(fd_prog); for (i = 0; i < MAX_NR_MAPS; i++) diff --git a/tools/testing/selftests/drivers/net/mlxsw/extack.sh b/tools/testing/selftests/drivers/net/mlxsw/extack.sh new file mode 100755 index 000000000000..d72d8488a3b2 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/extack.sh @@ -0,0 +1,145 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test operations that we expect to report extended ack. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + netdev_pre_up_test + vxlan_vlan_add_test + port_vlan_add_test +" +NUM_NETIFS=2 +source $lib_dir/lib.sh + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + + ip link set dev $swp1 up + ip link set dev $swp2 up +} + +cleanup() +{ + pre_cleanup + + ip link set dev $swp2 down + ip link set dev $swp1 down +} + +netdev_pre_up_test() +{ + RET=0 + + ip link add name br1 up type bridge vlan_filtering 0 mcast_snooping 0 + ip link add name vx1 up type vxlan id 1000 \ + local 192.0.2.17 remote 192.0.2.18 \ + dstport 4789 nolearning noudpcsum tos inherit ttl 100 + + ip link set dev vx1 master br1 + check_err $? + + ip link set dev $swp1 master br1 + check_err $? + + ip link add name br2 up type bridge vlan_filtering 0 mcast_snooping 0 + ip link add name vx2 up type vxlan id 2000 \ + local 192.0.2.17 remote 192.0.2.18 \ + dstport 4789 nolearning noudpcsum tos inherit ttl 100 + + ip link set dev vx2 master br2 + check_err $? + + ip link set dev $swp2 master br2 + check_err $? + + # Unsupported configuration: mlxsw demands that all offloaded VXLAN + # devices have the same TTL. + ip link set dev vx2 down + ip link set dev vx2 type vxlan ttl 200 + + ip link set dev vx2 up &>/dev/null + check_fail $? + + ip link set dev vx2 up 2>&1 >/dev/null | grep -q mlxsw_spectrum + check_err $? + + log_test "extack - NETDEV_PRE_UP" + + ip link del dev vx2 + ip link del dev br2 + + ip link del dev vx1 + ip link del dev br1 +} + +vxlan_vlan_add_test() +{ + RET=0 + + ip link add name br1 up type bridge vlan_filtering 1 mcast_snooping 0 + + # Unsupported configuration: mlxsw demands VXLAN with "noudpcsum". + ip link add name vx1 up type vxlan id 1000 \ + local 192.0.2.17 remote 192.0.2.18 \ + dstport 4789 tos inherit ttl 100 + + ip link set dev vx1 master br1 + check_err $? + + bridge vlan add dev vx1 vid 1 + check_err $? + + ip link set dev $swp1 master br1 + check_err $? + + bridge vlan add dev vx1 vid 1 pvid untagged 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? + + log_test "extack - map VLAN at VXLAN device" + + ip link del dev vx1 + ip link del dev br1 +} + +port_vlan_add_test() +{ + RET=0 + + ip link add name br1 up type bridge vlan_filtering 1 mcast_snooping 0 + + # Unsupported configuration: mlxsw demands VXLAN with "noudpcsum". + ip link add name vx1 up type vxlan id 1000 \ + local 192.0.2.17 remote 192.0.2.18 \ + dstport 4789 tos inherit ttl 100 + + ip link set dev $swp1 master br1 + check_err $? + + bridge vlan del dev $swp1 vid 1 + + ip link set dev vx1 master br1 + check_err $? + + bridge vlan add dev $swp1 vid 1 pvid untagged 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? + + log_test "extack - map VLAN at port" + + ip link del dev vx1 + ip link del dev br1 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh b/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh new file mode 100755 index 000000000000..f02d83e94576 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh @@ -0,0 +1,259 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test a "one-armed router" [1] scenario. Packets forwarded between H1 and H2 +# should be forwarded by the ASIC, but also trapped so that ICMP redirect +# packets could be potentially generated. +# +# 1. https://en.wikipedia.org/wiki/One-armed_router +# +# +---------------------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 192.0.2.1/24 | +# | | 2001:db8:1::1/64 | +# | | | +# | | default via 192.0.2.2 | +# | | default via 2001:db8:1::2 | +# +----|----------------------------+ +# | +# +----|----------------------------------------------------------------------+ +# | SW | | +# | +--|--------------------------------------------------------------------+ | +# | | + $swp1 BR0 (802.1d) | | +# | | | | +# | | 192.0.2.2/24 | | +# | | 2001:db8:1::2/64 | | +# | | 198.51.100.2/24 | | +# | | 2001:db8:2::2/64 | | +# | | | | +# | | + $swp2 | | +# | +--|--------------------------------------------------------------------+ | +# | | | +# +----|----------------------------------------------------------------------+ +# | +# +----|----------------------------+ +# | | default via 198.51.100.2 | +# | | default via 2001:db8:2::2 | +# | | | +# | | 2001:db8:2::1/64 | +# | | 198.51.100.1/24 | +# | + $h2 | +# | H2 (vrf) | +# +---------------------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS="ping_ipv4 ping_ipv6 fwd_mark_ipv4 fwd_mark_ipv6" +NUM_NETIFS=4 +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64 + + ip -4 route add default vrf v$h1 nexthop via 192.0.2.2 + ip -6 route add default vrf v$h1 nexthop via 2001:db8:1::2 +} + +h1_destroy() +{ + ip -6 route del default vrf v$h1 nexthop via 2001:db8:1::2 + ip -4 route del default vrf v$h1 nexthop via 192.0.2.2 + + simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 198.51.100.1/24 2001:db8:2::1/64 + + ip -4 route add default vrf v$h2 nexthop via 198.51.100.2 + ip -6 route add default vrf v$h2 nexthop via 2001:db8:2::2 +} + +h2_destroy() +{ + ip -6 route del default vrf v$h2 nexthop via 2001:db8:2::2 + ip -4 route del default vrf v$h2 nexthop via 198.51.100.2 + + simple_if_fini $h2 198.51.100.1/24 2001:db8:2::1/64 +} + +switch_create() +{ + ip link add name br0 type bridge mcast_snooping 0 + ip link set dev br0 up + + ip link set dev $swp1 master br0 + ip link set dev $swp1 up + ip link set dev $swp2 master br0 + ip link set dev $swp2 up + + tc qdisc add dev $swp1 clsact + tc qdisc add dev $swp2 clsact + + __addr_add_del br0 add 192.0.2.2/24 2001:db8:1::2/64 + __addr_add_del br0 add 198.51.100.2/24 2001:db8:2::2/64 +} + +switch_destroy() +{ + __addr_add_del br0 del 198.51.100.2/24 2001:db8:2::2/64 + __addr_add_del br0 del 192.0.2.2/24 2001:db8:1::2/64 + + tc qdisc del dev $swp2 clsact + tc qdisc del dev $swp1 clsact + + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + ip link set dev br0 down + ip link del dev br0 +} + +ping_ipv4() +{ + ping_test $h1 198.51.100.1 ": h1->h2" +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::1 ": h1->h2" +} + +fwd_mark_ipv4() +{ + # Transmit packets from H1 to H2 and make sure they are trapped at + # swp1 due to loopback error, but only forwarded by the ASIC through + # swp2 + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \ + skip_hw dst_ip 198.51.100.1 ip_proto udp dst_port 52768 \ + action pass + + tc filter add dev $swp2 egress protocol ip pref 1 handle 101 flower \ + skip_hw dst_ip 198.51.100.1 ip_proto udp dst_port 52768 \ + action pass + + tc filter add dev $swp2 egress protocol ip pref 2 handle 102 flower \ + skip_sw dst_ip 198.51.100.1 ip_proto udp dst_port 52768 \ + action pass + + ip vrf exec v$h1 $MZ $h1 -c 10 -d 100msec -p 64 -A 192.0.2.1 \ + -B 198.51.100.1 -t udp dp=52768,sp=42768 -q + + RET=0 + + tc_check_packets "dev $swp1 ingress" 101 10 + check_err $? + + log_test "fwd mark: trapping IPv4 packets due to LBERROR" + + RET=0 + + tc_check_packets "dev $swp2 egress" 101 0 + check_err $? + + log_test "fwd mark: forwarding IPv4 packets in software" + + RET=0 + + tc_check_packets "dev $swp2 egress" 102 10 + check_err $? + + log_test "fwd mark: forwarding IPv4 packets in hardware" + + tc filter del dev $swp2 egress protocol ip pref 2 handle 102 flower + tc filter del dev $swp2 egress protocol ip pref 1 handle 101 flower + tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower +} + +fwd_mark_ipv6() +{ + tc filter add dev $swp1 ingress protocol ipv6 pref 1 handle 101 flower \ + skip_hw dst_ip 2001:db8:2::1 ip_proto udp dst_port 52768 \ + action pass + + tc filter add dev $swp2 egress protocol ipv6 pref 1 handle 101 flower \ + skip_hw dst_ip 2001:db8:2::1 ip_proto udp dst_port 52768 \ + action pass + + tc filter add dev $swp2 egress protocol ipv6 pref 2 handle 102 flower \ + skip_sw dst_ip 2001:db8:2::1 ip_proto udp dst_port 52768 \ + action pass + + ip vrf exec v$h1 $MZ $h1 -6 -c 10 -d 100msec -p 64 -A 2001:db8:1::1 \ + -B 2001:db8:2::1 -t udp dp=52768,sp=42768 -q + + RET=0 + + tc_check_packets "dev $swp1 ingress" 101 10 + check_err $? + + log_test "fwd mark: trapping IPv6 packets due to LBERROR" + + RET=0 + + tc_check_packets "dev $swp2 egress" 101 0 + check_err $? + + log_test "fwd mark: forwarding IPv6 packets in software" + + RET=0 + + tc_check_packets "dev $swp2 egress" 102 10 + check_err $? + + log_test "fwd mark: forwarding IPv6 packets in hardware" + + tc filter del dev $swp2 egress protocol ipv6 pref 2 handle 102 flower + tc filter del dev $swp2 egress protocol ipv6 pref 1 handle 101 flower + tc filter del dev $swp1 ingress protocol ipv6 pref 1 handle 101 flower +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + forwarding_enable + + sysctl_set net.ipv4.conf.all.accept_redirects 0 + sysctl_set net.ipv6.conf.all.accept_redirects 0 + + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + + sysctl_restore net.ipv6.conf.all.accept_redirects + sysctl_restore net.ipv4.conf.all.accept_redirects + + forwarding_restore + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh new file mode 100755 index 000000000000..7f78b96279f3 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh @@ -0,0 +1,170 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test various interface configuration scenarios. Observe that configurations +# deemed valid by mlxsw succeed, invalid configurations fail and that no traces +# are produced. To prevent the test from passing in case traces are produced, +# the user can set the 'kernel.panic_on_warn' and 'kernel.panic_on_oops' +# sysctls in its environment. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + rif_set_addr_test + rif_inherit_bridge_addr_test + rif_non_inherit_bridge_addr_test +" +NUM_NETIFS=2 +source $lib_dir/lib.sh + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + + ip link set dev $swp1 up + ip link set dev $swp2 up +} + +cleanup() +{ + pre_cleanup + + ip link set dev $swp2 down + ip link set dev $swp1 down +} + +rif_set_addr_test() +{ + local swp1_mac=$(mac_get $swp1) + local swp2_mac=$(mac_get $swp2) + + RET=0 + + # $swp1 and $swp2 likely got their IPv6 local addresses already, but + # here we need to test the transition to RIF. + ip addr flush dev $swp1 + ip addr flush dev $swp2 + sleep .1 + + ip addr add dev $swp1 192.0.2.1/28 + check_err $? + + ip link set dev $swp1 addr 00:11:22:33:44:55 + check_err $? + + # IP address enablement should be rejected if the MAC address prefix + # doesn't match other RIFs. + ip addr add dev $swp2 192.0.2.2/28 &>/dev/null + check_fail $? "IP address addition passed for a device with a wrong MAC" + ip addr add dev $swp2 192.0.2.2/28 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "no extack for IP address addition" + + ip link set dev $swp2 addr 00:11:22:33:44:66 + check_err $? + ip addr add dev $swp2 192.0.2.2/28 &>/dev/null + check_err $? + + # Change of MAC address of a RIF should be forbidden if the new MAC + # doesn't share the prefix with other MAC addresses. + ip link set dev $swp2 addr 00:11:22:33:00:66 &>/dev/null + check_fail $? "change of MAC address passed for a wrong MAC" + ip link set dev $swp2 addr 00:11:22:33:00:66 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "no extack for MAC address change" + + log_test "RIF - bad MAC change" + + ip addr del dev $swp2 192.0.2.2/28 + ip addr del dev $swp1 192.0.2.1/28 + + ip link set dev $swp2 addr $swp2_mac + ip link set dev $swp1 addr $swp1_mac +} + +rif_inherit_bridge_addr_test() +{ + RET=0 + + # Create first RIF + ip addr add dev $swp1 192.0.2.1/28 + check_err $? + + # Create a FID RIF + ip link add name br1 up type bridge vlan_filtering 0 + ip link set dev $swp2 master br1 + ip addr add dev br1 192.0.2.17/28 + check_err $? + + # Prepare a device with a low MAC address + ip link add name d up type dummy + ip link set dev d addr 00:11:22:33:44:55 + + # Attach the device to br1. That prompts bridge address change, which + # should be vetoed, thus preventing the attachment. + ip link set dev d master br1 &>/dev/null + check_fail $? "Device with low MAC was permitted to attach a bridge with RIF" + ip link set dev d master br1 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "no extack for bridge attach rejection" + + ip link set dev $swp2 addr 00:11:22:33:44:55 &>/dev/null + check_fail $? "Changing swp2's MAC address permitted" + ip link set dev $swp2 addr 00:11:22:33:44:55 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "no extack for bridge port MAC address change rejection" + + log_test "RIF - attach port with bad MAC to bridge" + + ip link del dev d + ip link del dev br1 + ip addr del dev $swp1 192.0.2.1/28 +} + +rif_non_inherit_bridge_addr_test() +{ + local swp2_mac=$(mac_get $swp2) + + RET=0 + + # Create first RIF + ip addr add dev $swp1 192.0.2.1/28 + check_err $? + + # Create a FID RIF + ip link add name br1 up type bridge vlan_filtering 0 + ip link set dev br1 addr $swp2_mac + ip link set dev $swp2 master br1 + ip addr add dev br1 192.0.2.17/28 + check_err $? + + # Prepare a device with a low MAC address + ip link add name d up type dummy + ip link set dev d addr 00:11:22:33:44:55 + + # Attach the device to br1. Since the bridge address was set, it should + # work. + ip link set dev d master br1 &>/dev/null + check_err $? "Could not attach a device with low MAC to a bridge with RIF" + + # Port MAC address change should be allowed for a bridge with set MAC. + ip link set dev $swp2 addr 00:11:22:33:44:55 + check_err $? "Changing swp2's MAC address not permitted" + + log_test "RIF - attach port with bad MAC to bridge with set MAC" + + ip link set dev $swp2 addr $swp2_mac + ip link del dev d + ip link del dev br1 + ip addr del dev $swp1 192.0.2.1/28 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh index 3b75180f455d..b41d6256b2d0 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh @@ -8,7 +8,8 @@ lib_dir=$(dirname $0)/../../../../net/forwarding ALL_TESTS="single_mask_test identical_filters_test two_masks_test \ - multiple_masks_test ctcam_edge_cases_test" + multiple_masks_test ctcam_edge_cases_test delta_simple_test \ + bloom_simple_test bloom_complex_test bloom_delta_test" NUM_NETIFS=2 source $lib_dir/tc_common.sh source $lib_dir/lib.sh @@ -142,7 +143,7 @@ two_masks_test() tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ $tcflags dst_ip 192.0.2.2 action drop tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \ - $tcflags dst_ip 192.0.0.0/16 action drop + $tcflags dst_ip 192.0.0.0/8 action drop $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ -t ip -q @@ -235,7 +236,7 @@ ctcam_two_atcam_masks_test() $tcflags dst_ip 192.0.2.2 action drop # Filter goes into A-TCAM tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \ - $tcflags dst_ip 192.0.2.0/24 action drop + $tcflags dst_ip 192.0.0.0/16 action drop $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ -t ip -q @@ -324,6 +325,258 @@ ctcam_edge_cases_test() ctcam_no_atcam_masks_test } +tp_record() +{ + local tracepoint=$1 + local cmd=$2 + + perf record -q -e $tracepoint $cmd + return $? +} + +tp_check_hits() +{ + local tracepoint=$1 + local count=$2 + + perf_output=`perf script -F trace:event,trace` + hits=`echo $perf_output | grep "$tracepoint:" | wc -l` + if [[ "$count" -ne "$hits" ]]; then + return 1 + fi + return 0 +} + +delta_simple_test() +{ + # The first filter will create eRP, the second filter will fit into + # the first eRP with delta. Remove the first rule then and check that + # the eRP stays (referenced by the second filter). + + RET=0 + + if [[ "$tcflags" != "skip_sw" ]]; then + return 0; + fi + + tp_record "objagg:*" "tc filter add dev $h2 ingress protocol ip \ + pref 1 handle 101 flower $tcflags dst_ip 192.0.0.0/24 \ + action drop" + tp_check_hits "objagg:objagg_obj_root_create" 1 + check_err $? "eRP was not created" + + tp_record "objagg:*" "tc filter add dev $h2 ingress protocol ip \ + pref 2 handle 102 flower $tcflags dst_ip 192.0.2.2 \ + action drop" + tp_check_hits "objagg:objagg_obj_root_create" 0 + check_err $? "eRP was incorrectly created" + tp_check_hits "objagg:objagg_obj_parent_assign" 1 + check_err $? "delta was not created" + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched a wrong filter" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter" + + tp_record "objagg:*" "tc filter del dev $h2 ingress protocol ip \ + pref 1 handle 101 flower" + tp_check_hits "objagg:objagg_obj_root_destroy" 0 + check_err $? "eRP was incorrectly destroyed" + tp_check_hits "objagg:objagg_obj_parent_unassign" 0 + check_err $? "delta was incorrectly destroyed" + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 102 2 + check_err $? "Did not match on correct filter after the first was removed" + + tp_record "objagg:*" "tc filter del dev $h2 ingress protocol ip \ + pref 2 handle 102 flower" + tp_check_hits "objagg:objagg_obj_parent_unassign" 1 + check_err $? "delta was not destroyed" + tp_check_hits "objagg:objagg_obj_root_destroy" 1 + check_err $? "eRP was not destroyed" + + log_test "delta simple test ($tcflags)" +} + +bloom_simple_test() +{ + # Bloom filter requires that the eRP table is used. This test + # verifies that Bloom filter is not harming correctness of ACLs. + # First, make sure that eRP table is used and then set rule patterns + # which are distant enough and will result skipping a lookup after + # consulting the Bloom filter. Although some eRP lookups are skipped, + # the correct filter should be hit. + + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_ip 192.0.2.2 action drop + tc filter add dev $h2 ingress protocol ip pref 5 handle 104 flower \ + $tcflags dst_ip 198.51.100.2 action drop + tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \ + $tcflags dst_ip 192.0.0.0/8 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Two filters - did not match highest priority" + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 198.51.100.1 -B 198.51.100.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 104 1 + check_err $? "Single filter - did not match" + + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 103 1 + check_err $? "Low prio filter - did not match" + + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_ip 198.0.0.0/8 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 198.51.100.1 -B 198.51.100.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Two filters - did not match highest priority after add" + + tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + tc filter del dev $h2 ingress protocol ip pref 5 handle 104 flower + + log_test "bloom simple test ($tcflags)" +} + +bloom_complex_test() +{ + # Bloom filter index computation is affected from region ID, eRP + # ID and from the region key size. In order to excercise those parts + # of the Bloom filter code, use a series of regions, each with a + # different key size and send packet that should hit all of them. + local index + + RET=0 + NUM_CHAINS=4 + BASE_INDEX=100 + + # Create chain with up to 2 key blocks (ip_proto only) + tc chain add dev $h2 ingress chain 1 protocol ip flower \ + ip_proto tcp &> /dev/null + # Create chain with 2-4 key blocks (ip_proto, src MAC) + tc chain add dev $h2 ingress chain 2 protocol ip flower \ + ip_proto tcp \ + src_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF &> /dev/null + # Create chain with 4-8 key blocks (ip_proto, src & dst MAC, IPv4 dest) + tc chain add dev $h2 ingress chain 3 protocol ip flower \ + ip_proto tcp \ + dst_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF \ + src_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF \ + dst_ip 0.0.0.0/32 &> /dev/null + # Default chain contains all fields and therefore is 8-12 key blocks + tc chain add dev $h2 ingress chain 4 + + # We need at least 2 rules in every region to have eRP table active + # so create a dummy rule per chain using a different pattern + for i in $(eval echo {0..$NUM_CHAINS}); do + index=$((BASE_INDEX - 1 - i)) + tc filter add dev $h2 ingress chain $i protocol ip \ + pref 2 handle $index flower \ + $tcflags ip_proto tcp action drop + done + + # Add rules to test Bloom filter, each in a different chain + index=$BASE_INDEX + tc filter add dev $h2 ingress protocol ip \ + pref 1 handle $((++index)) flower \ + $tcflags dst_ip 192.0.0.0/16 action goto chain 1 + tc filter add dev $h2 ingress chain 1 protocol ip \ + pref 1 handle $((++index)) flower \ + $tcflags action goto chain 2 + tc filter add dev $h2 ingress chain 2 protocol ip \ + pref 1 handle $((++index)) flower \ + $tcflags src_mac $h1mac action goto chain 3 + tc filter add dev $h2 ingress chain 3 protocol ip \ + pref 1 handle $((++index)) flower \ + $tcflags dst_ip 192.0.0.0/8 action goto chain 4 + tc filter add dev $h2 ingress chain 4 protocol ip \ + pref 1 handle $((++index)) flower \ + $tcflags src_ip 192.0.2.0/24 action drop + + # Send a packet that is supposed to hit all chains + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + for i in $(eval echo {0..$NUM_CHAINS}); do + index=$((BASE_INDEX + i + 1)) + tc_check_packets "dev $h2 ingress" $index 1 + check_err $? "Did not match chain $i" + done + + # Rules cleanup + for i in $(eval echo {$NUM_CHAINS..0}); do + index=$((BASE_INDEX - i - 1)) + tc filter del dev $h2 ingress chain $i \ + pref 2 handle $index flower + index=$((BASE_INDEX + i + 1)) + tc filter del dev $h2 ingress chain $i \ + pref 1 handle $index flower + done + + # Chains cleanup + for i in $(eval echo {$NUM_CHAINS..1}); do + tc chain del dev $h2 ingress chain $i + done + + log_test "bloom complex test ($tcflags)" +} + + +bloom_delta_test() +{ + # When multiple masks are used, the eRP table is activated. When + # masks are close enough (delta) the masks reside on the same + # eRP table. This test verifies that the eRP table is correctly + # allocated and used in delta condition and that Bloom filter is + # still functional with delta. + + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \ + $tcflags dst_ip 192.1.0.0/16 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.1.2.1 -B 192.1.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 103 1 + check_err $? "Single filter - did not match" + + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_ip 192.2.1.0/24 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.2.1.1 -B 192.2.1.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Delta filters - did not match second filter" + + tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + + log_test "bloom delta test ($tcflags)" +} + setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh new file mode 100755 index 000000000000..ea11535f5a6e --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh @@ -0,0 +1,1043 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test various aspects of VxLAN offloading which are specific to mlxsw, such +# as sanitization of invalid configurations and offload indication. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS="sanitization_test offload_indication_test \ + sanitization_vlan_aware_test offload_indication_vlan_aware_test" +NUM_NETIFS=2 +source $lib_dir/lib.sh + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + + ip link set dev $swp1 up + ip link set dev $swp2 up +} + +cleanup() +{ + pre_cleanup + + ip link set dev $swp2 down + ip link set dev $swp1 down +} + +sanitization_single_dev_test_pass() +{ + ip link set dev $swp1 master br0 + check_err $? + ip link set dev vxlan0 master br0 + check_err $? + + ip link set dev $swp1 nomaster + + ip link set dev $swp1 master br0 + check_err $? +} + +sanitization_single_dev_test_fail() +{ + ip link set dev $swp1 master br0 + check_err $? + ip link set dev vxlan0 master br0 &> /dev/null + check_fail $? + + ip link set dev $swp1 nomaster + + ip link set dev vxlan0 master br0 + check_err $? + ip link set dev $swp1 master br0 &> /dev/null + check_fail $? +} + +sanitization_single_dev_valid_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + sanitization_single_dev_test_pass + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device - valid configuration" +} + +sanitization_single_dev_vlan_aware_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 vlan_filtering 1 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + sanitization_single_dev_test_pass + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with a vlan-aware bridge" +} + +sanitization_single_dev_mcast_enabled_test() +{ + RET=0 + + ip link add dev br0 type bridge + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with a multicast enabled bridge" +} + +sanitization_single_dev_mcast_group_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 \ + dev $swp2 group 239.0.0.1 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with a multicast group" +} + +sanitization_single_dev_no_local_ip_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit dstport 4789 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with no local ip" +} + +sanitization_single_dev_local_ipv6_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 2001:db8::1 dstport 4789 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with local ipv6 address" +} + +sanitization_single_dev_learning_enabled_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 learning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + sanitization_single_dev_test_pass + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with learning enabled" +} + +sanitization_single_dev_local_interface_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 dev $swp2 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with local interface" +} + +sanitization_single_dev_port_range_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 \ + srcport 4000 5000 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with udp source port range" +} + +sanitization_single_dev_tos_static_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos 20 local 198.51.100.1 dstport 4789 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with static tos" +} + +sanitization_single_dev_ttl_inherit_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl inherit tos inherit local 198.51.100.1 dstport 4789 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with inherit ttl" +} + +sanitization_single_dev_udp_checksum_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning udpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with udp checksum" +} + +sanitization_single_dev_test() +{ + # These tests make sure that we correctly sanitize VxLAN device + # configurations we do not support + sanitization_single_dev_valid_test + sanitization_single_dev_vlan_aware_test + sanitization_single_dev_mcast_enabled_test + sanitization_single_dev_mcast_group_test + sanitization_single_dev_no_local_ip_test + sanitization_single_dev_local_ipv6_test + sanitization_single_dev_learning_enabled_test + sanitization_single_dev_local_interface_test + sanitization_single_dev_port_range_test + sanitization_single_dev_tos_static_test + sanitization_single_dev_ttl_inherit_test + sanitization_single_dev_udp_checksum_test +} + +sanitization_multi_devs_test_pass() +{ + ip link set dev $swp1 master br0 + check_err $? + ip link set dev vxlan0 master br0 + check_err $? + ip link set dev $swp2 master br1 + check_err $? + ip link set dev vxlan1 master br1 + check_err $? + + ip link set dev $swp2 nomaster + ip link set dev $swp1 nomaster + + ip link set dev $swp1 master br0 + check_err $? + ip link set dev $swp2 master br1 + check_err $? +} + +sanitization_multi_devs_test_fail() +{ + ip link set dev $swp1 master br0 + check_err $? + ip link set dev vxlan0 master br0 + check_err $? + ip link set dev $swp2 master br1 + check_err $? + ip link set dev vxlan1 master br1 &> /dev/null + check_fail $? + + ip link set dev $swp2 nomaster + ip link set dev $swp1 nomaster + + ip link set dev vxlan1 master br1 + check_err $? + ip link set dev $swp1 master br0 + check_err $? + ip link set dev $swp2 master br1 &> /dev/null + check_fail $? +} + +sanitization_multi_devs_valid_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + ip link add dev br1 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + sanitization_multi_devs_test_pass + + ip link del dev vxlan1 + ip link del dev vxlan0 + ip link del dev br1 + ip link del dev br0 + + log_test "multiple vxlan devices - valid configuration" +} + +sanitization_multi_devs_ttl_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + ip link add dev br1 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \ + ttl 40 tos inherit local 198.51.100.1 dstport 4789 + + sanitization_multi_devs_test_fail + + ip link del dev vxlan1 + ip link del dev vxlan0 + ip link del dev br1 + ip link del dev br0 + + log_test "multiple vxlan devices with different ttl" +} + +sanitization_multi_devs_udp_dstport_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + ip link add dev br1 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 5789 + + sanitization_multi_devs_test_fail + + ip link del dev vxlan1 + ip link del dev vxlan0 + ip link del dev br1 + ip link del dev br0 + + log_test "multiple vxlan devices with different udp destination port" +} + +sanitization_multi_devs_local_ip_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + ip link add dev br1 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.2 dstport 4789 + + sanitization_multi_devs_test_fail + + ip link del dev vxlan1 + ip link del dev vxlan0 + ip link del dev br1 + ip link del dev br0 + + log_test "multiple vxlan devices with different local ip" +} + +sanitization_multi_devs_test() +{ + # The device has a single VTEP, which means all the VxLAN devices + # we offload must share certain properties such as source IP and + # UDP destination port. These tests make sure that we forbid + # configurations that violate this limitation + sanitization_multi_devs_valid_test + sanitization_multi_devs_ttl_test + sanitization_multi_devs_udp_dstport_test + sanitization_multi_devs_local_ip_test +} + +sanitization_test() +{ + sanitization_single_dev_test + sanitization_multi_devs_test +} + +offload_indication_setup_create() +{ + # Create a simple setup with two bridges, each with a VxLAN device + # and one local port + ip link add name br0 up type bridge mcast_snooping 0 + ip link add name br1 up type bridge mcast_snooping 0 + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br1 + + ip address add 198.51.100.1/32 dev lo + + ip link add name vxlan0 up master br0 type vxlan id 10 nolearning \ + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan1 up master br1 type vxlan id 20 nolearning \ + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 +} + +offload_indication_setup_destroy() +{ + ip link del dev vxlan1 + ip link del dev vxlan0 + + ip address del 198.51.100.1/32 dev lo + + ip link set dev $swp2 nomaster + ip link set dev $swp1 nomaster + + ip link del dev br1 + ip link del dev br0 +} + +offload_indication_fdb_flood_test() +{ + RET=0 + + bridge fdb append 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.2 + + bridge fdb show brport vxlan0 | grep 00:00:00:00:00:00 \ + | grep -q offload + check_err $? + + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self + + log_test "vxlan flood entry offload indication" +} + +offload_indication_fdb_bridge_test() +{ + RET=0 + + bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self master static \ + dst 198.51.100.2 + + bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep self \ + | grep -q offload + check_err $? + bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep -v self \ + | grep -q offload + check_err $? + + log_test "vxlan entry offload indication - initial state" + + # Remove FDB entry from the bridge driver and check that corresponding + # entry in the VxLAN driver is not marked as offloaded + RET=0 + + bridge fdb del de:ad:be:ef:13:37 dev vxlan0 master + bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep self \ + | grep -q offload + check_fail $? + + log_test "vxlan entry offload indication - after removal from bridge" + + # Add the FDB entry back to the bridge driver and make sure it is + # marked as offloaded in both drivers + RET=0 + + bridge fdb add de:ad:be:ef:13:37 dev vxlan0 master static + bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep self \ + | grep -q offload + check_err $? + bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep -v self \ + | grep -q offload + check_err $? + + log_test "vxlan entry offload indication - after re-add to bridge" + + # Remove FDB entry from the VxLAN driver and check that corresponding + # entry in the bridge driver is not marked as offloaded + RET=0 + + bridge fdb del de:ad:be:ef:13:37 dev vxlan0 self + bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep -v self \ + | grep -q offload + check_fail $? + + log_test "vxlan entry offload indication - after removal from vxlan" + + # Add the FDB entry back to the VxLAN driver and make sure it is + # marked as offloaded in both drivers + RET=0 + + bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self dst 198.51.100.2 + bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep self \ + | grep -q offload + check_err $? + bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep -v self \ + | grep -q offload + check_err $? + + log_test "vxlan entry offload indication - after re-add to vxlan" + + bridge fdb del de:ad:be:ef:13:37 dev vxlan0 self master +} + +offload_indication_fdb_test() +{ + offload_indication_fdb_flood_test + offload_indication_fdb_bridge_test +} + +offload_indication_decap_route_test() +{ + RET=0 + + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + ip link set dev vxlan0 down + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + ip link set dev vxlan1 down + ip route show table local | grep 198.51.100.1 | grep -q offload + check_fail $? + + log_test "vxlan decap route - vxlan device down" + + RET=0 + + ip link set dev vxlan1 up + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + ip link set dev vxlan0 up + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + log_test "vxlan decap route - vxlan device up" + + RET=0 + + ip address delete 198.51.100.1/32 dev lo + ip route show table local | grep 198.51.100.1 | grep -q offload + check_fail $? + + ip address add 198.51.100.1/32 dev lo + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + log_test "vxlan decap route - add local route" + + RET=0 + + ip link set dev $swp1 nomaster + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + ip link set dev $swp2 nomaster + ip route show table local | grep 198.51.100.1 | grep -q offload + check_fail $? + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br1 + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + log_test "vxlan decap route - local ports enslavement" + + RET=0 + + ip link del dev br0 + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + ip link del dev br1 + ip route show table local | grep 198.51.100.1 | grep -q offload + check_fail $? + + log_test "vxlan decap route - bridge device deletion" + + RET=0 + + ip link add name br0 up type bridge mcast_snooping 0 + ip link add name br1 up type bridge mcast_snooping 0 + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br1 + ip link set dev vxlan0 master br0 + ip link set dev vxlan1 master br1 + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + ip link del dev vxlan0 + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + ip link del dev vxlan1 + ip route show table local | grep 198.51.100.1 | grep -q offload + check_fail $? + + log_test "vxlan decap route - vxlan device deletion" + + ip link add name vxlan0 up master br0 type vxlan id 10 nolearning \ + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan1 up master br1 type vxlan id 20 nolearning \ + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 +} + +check_fdb_offloaded() +{ + local mac=00:11:22:33:44:55 + local zmac=00:00:00:00:00:00 + + bridge fdb show dev vxlan0 | grep $mac | grep self | grep -q offload + check_err $? + bridge fdb show dev vxlan0 | grep $mac | grep master | grep -q offload + check_err $? + + bridge fdb show dev vxlan0 | grep $zmac | grep self | grep -q offload + check_err $? +} + +check_vxlan_fdb_not_offloaded() +{ + local mac=00:11:22:33:44:55 + local zmac=00:00:00:00:00:00 + + bridge fdb show dev vxlan0 | grep $mac | grep -q self + check_err $? + bridge fdb show dev vxlan0 | grep $mac | grep self | grep -q offload + check_fail $? + + bridge fdb show dev vxlan0 | grep $zmac | grep -q self + check_err $? + bridge fdb show dev vxlan0 | grep $zmac | grep self | grep -q offload + check_fail $? +} + +check_bridge_fdb_not_offloaded() +{ + local mac=00:11:22:33:44:55 + local zmac=00:00:00:00:00:00 + + bridge fdb show dev vxlan0 | grep $mac | grep -q master + check_err $? + bridge fdb show dev vxlan0 | grep $mac | grep master | grep -q offload + check_fail $? +} + +__offload_indication_join_vxlan_first() +{ + local vid=$1; shift + + local mac=00:11:22:33:44:55 + local zmac=00:00:00:00:00:00 + + bridge fdb append $zmac dev vxlan0 self dst 198.51.100.2 + + ip link set dev vxlan0 master br0 + bridge fdb add dev vxlan0 $mac self master static dst 198.51.100.2 + + RET=0 + check_vxlan_fdb_not_offloaded + ip link set dev $swp1 master br0 + sleep .1 + check_fdb_offloaded + log_test "offload indication - attach vxlan first" + + RET=0 + ip link set dev vxlan0 down + check_vxlan_fdb_not_offloaded + check_bridge_fdb_not_offloaded + log_test "offload indication - set vxlan down" + + RET=0 + ip link set dev vxlan0 up + sleep .1 + check_fdb_offloaded + log_test "offload indication - set vxlan up" + + if [[ ! -z $vid ]]; then + RET=0 + bridge vlan del dev vxlan0 vid $vid + check_vxlan_fdb_not_offloaded + check_bridge_fdb_not_offloaded + log_test "offload indication - delete VLAN" + + RET=0 + bridge vlan add dev vxlan0 vid $vid + check_vxlan_fdb_not_offloaded + check_bridge_fdb_not_offloaded + log_test "offload indication - add tagged VLAN" + + RET=0 + bridge vlan add dev vxlan0 vid $vid pvid untagged + sleep .1 + check_fdb_offloaded + log_test "offload indication - add pvid/untagged VLAN" + fi + + RET=0 + ip link set dev $swp1 nomaster + check_vxlan_fdb_not_offloaded + log_test "offload indication - detach port" +} + +offload_indication_join_vxlan_first() +{ + ip link add dev br0 up type bridge mcast_snooping 0 + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + __offload_indication_join_vxlan_first + + ip link del dev vxlan0 + ip link del dev br0 +} + +__offload_indication_join_vxlan_last() +{ + local zmac=00:00:00:00:00:00 + + RET=0 + + bridge fdb append $zmac dev vxlan0 self dst 198.51.100.2 + + ip link set dev $swp1 master br0 + + bridge fdb show dev vxlan0 | grep $zmac | grep self | grep -q offload + check_fail $? + + ip link set dev vxlan0 master br0 + + bridge fdb show dev vxlan0 | grep $zmac | grep self | grep -q offload + check_err $? + + log_test "offload indication - attach vxlan last" +} + +offload_indication_join_vxlan_last() +{ + ip link add dev br0 up type bridge mcast_snooping 0 + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + __offload_indication_join_vxlan_last + + ip link del dev vxlan0 + ip link del dev br0 +} + +offload_indication_test() +{ + offload_indication_setup_create + offload_indication_fdb_test + offload_indication_decap_route_test + offload_indication_setup_destroy + + log_info "offload indication - replay & cleanup" + offload_indication_join_vxlan_first + offload_indication_join_vxlan_last +} + +sanitization_vlan_aware_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 vlan_filtering 1 + + ip link add name vxlan10 up master br0 type vxlan id 10 nolearning \ + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + ip link add name vxlan20 up master br0 type vxlan id 20 nolearning \ + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + # Test that when each VNI is mapped to a different VLAN we can enslave + # a port to the bridge + bridge vlan add vid 10 dev vxlan10 pvid untagged + bridge vlan add vid 20 dev vxlan20 pvid untagged + + ip link set dev $swp1 master br0 + check_err $? + + log_test "vlan-aware - enslavement to vlan-aware bridge" + + # Try to map both VNIs to the same VLAN and make sure configuration + # fails + RET=0 + + bridge vlan add vid 10 dev vxlan20 pvid untagged &> /dev/null + check_fail $? + + log_test "vlan-aware - two vnis mapped to the same vlan" + + # Test that enslavement of a port to a bridge fails when two VNIs + # are mapped to the same VLAN + RET=0 + + ip link set dev $swp1 nomaster + + bridge vlan del vid 20 dev vxlan20 pvid untagged + bridge vlan add vid 10 dev vxlan20 pvid untagged + + ip link set dev $swp1 master br0 &> /dev/null + check_fail $? + + log_test "vlan-aware - failed enslavement to vlan-aware bridge" + + ip link del dev vxlan20 + ip link del dev vxlan10 + ip link del dev br0 +} + +offload_indication_vlan_aware_setup_create() +{ + # Create a simple setup with two VxLAN devices and a single VLAN-aware + # bridge + ip link add name br0 up type bridge mcast_snooping 0 vlan_filtering 1 \ + vlan_default_pvid 0 + + ip link set dev $swp1 master br0 + + bridge vlan add vid 10 dev $swp1 + bridge vlan add vid 20 dev $swp1 + + ip address add 198.51.100.1/32 dev lo + + ip link add name vxlan10 up master br0 type vxlan id 10 nolearning \ + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan20 up master br0 type vxlan id 20 nolearning \ + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + bridge vlan add vid 10 dev vxlan10 pvid untagged + bridge vlan add vid 20 dev vxlan20 pvid untagged +} + +offload_indication_vlan_aware_setup_destroy() +{ + bridge vlan del vid 20 dev vxlan20 + bridge vlan del vid 10 dev vxlan10 + + ip link del dev vxlan20 + ip link del dev vxlan10 + + ip address del 198.51.100.1/32 dev lo + + bridge vlan del vid 20 dev $swp1 + bridge vlan del vid 10 dev $swp1 + + ip link set dev $swp1 nomaster + + ip link del dev br0 +} + +offload_indication_vlan_aware_fdb_test() +{ + RET=0 + + log_info "vxlan entry offload indication - vlan-aware" + + bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self master static \ + dst 198.51.100.2 vlan 10 + + bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep self \ + | grep -q offload + check_err $? + bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep -v self \ + | grep -q offload + check_err $? + + log_test "vxlan entry offload indication - initial state" + + # Remove FDB entry from the bridge driver and check that corresponding + # entry in the VxLAN driver is not marked as offloaded + RET=0 + + bridge fdb del de:ad:be:ef:13:37 dev vxlan10 master vlan 10 + bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep self \ + | grep -q offload + check_fail $? + + log_test "vxlan entry offload indication - after removal from bridge" + + # Add the FDB entry back to the bridge driver and make sure it is + # marked as offloaded in both drivers + RET=0 + + bridge fdb add de:ad:be:ef:13:37 dev vxlan10 master static vlan 10 + bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep self \ + | grep -q offload + check_err $? + bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep -v self \ + | grep -q offload + check_err $? + + log_test "vxlan entry offload indication - after re-add to bridge" + + # Remove FDB entry from the VxLAN driver and check that corresponding + # entry in the bridge driver is not marked as offloaded + RET=0 + + bridge fdb del de:ad:be:ef:13:37 dev vxlan10 self + bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep -v self \ + | grep -q offload + check_fail $? + + log_test "vxlan entry offload indication - after removal from vxlan" + + # Add the FDB entry back to the VxLAN driver and make sure it is + # marked as offloaded in both drivers + RET=0 + + bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self dst 198.51.100.2 + bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep self \ + | grep -q offload + check_err $? + bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep -v self \ + | grep -q offload + check_err $? + + log_test "vxlan entry offload indication - after re-add to vxlan" + + bridge fdb del de:ad:be:ef:13:37 dev vxlan10 self master vlan 10 +} + +offload_indication_vlan_aware_decap_route_test() +{ + RET=0 + + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + # Toggle PVID flag on one VxLAN device and make sure route is still + # marked as offloaded + bridge vlan add vid 10 dev vxlan10 untagged + + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + # Toggle PVID flag on second VxLAN device and make sure route is no + # longer marked as offloaded + bridge vlan add vid 20 dev vxlan20 untagged + + ip route show table local | grep 198.51.100.1 | grep -q offload + check_fail $? + + # Toggle PVID flag back and make sure route is marked as offloaded + bridge vlan add vid 10 dev vxlan10 pvid untagged + bridge vlan add vid 20 dev vxlan20 pvid untagged + + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + log_test "vxlan decap route - vni map/unmap" +} + +offload_indication_vlan_aware_join_vxlan_first() +{ + ip link add dev br0 up type bridge mcast_snooping 0 \ + vlan_filtering 1 vlan_default_pvid 1 + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + __offload_indication_join_vxlan_first 1 + + ip link del dev vxlan0 + ip link del dev br0 +} + +offload_indication_vlan_aware_join_vxlan_last() +{ + ip link add dev br0 up type bridge mcast_snooping 0 \ + vlan_filtering 1 vlan_default_pvid 1 + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + __offload_indication_join_vxlan_last + + ip link del dev vxlan0 + ip link del dev br0 +} + +offload_indication_vlan_aware_test() +{ + offload_indication_vlan_aware_setup_create + offload_indication_vlan_aware_fdb_test + offload_indication_vlan_aware_decap_route_test + offload_indication_vlan_aware_setup_destroy + + log_info "offload indication - replay & cleanup - vlan aware" + offload_indication_vlan_aware_join_vxlan_first + offload_indication_vlan_aware_join_vxlan_last +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh new file mode 100755 index 000000000000..fedcb7b35af9 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh @@ -0,0 +1,309 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test VxLAN flooding. The device stores flood records in a singly linked list +# where each record stores up to three IPv4 addresses of remote VTEPs. The test +# verifies that packets are correctly flooded in various cases such as deletion +# of a record in the middle of the list. +# +# +--------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 203.0.113.1/24| +# +----|---------------+ +# | +# +----|----------------------------------------------------------------------+ +# | SW | | +# | +--|--------------------------------------------------------------------+ | +# | | + $swp1 BR0 (802.1d) | | +# | | | | +# | | + vxlan0 (vxlan) | | +# | | local 198.51.100.1 | | +# | | remote 198.51.100.{2..13} | | +# | | id 10 dstport 4789 | | +# | +-----------------------------------------------------------------------+ | +# | | +# | 198.51.100.0/24 via 192.0.2.2 | +# | | +# | + $rp1 | +# | | 192.0.2.1/24 | +# +----|----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | R2 (vrf) | +# | + $rp2 | +# | 192.0.2.2/24 | +# | | +# +-------------------------------------------------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS="flooding_test" +NUM_NETIFS=4 +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh + +h1_create() +{ + simple_if_init $h1 203.0.113.1/24 +} + +h1_destroy() +{ + simple_if_fini $h1 203.0.113.1/24 +} + +switch_create() +{ + # Make sure the bridge uses the MAC address of the local port and + # not that of the VxLAN's device + ip link add dev br0 type bridge mcast_snooping 0 + ip link set dev br0 address $(mac_get $swp1) + + ip link add name vxlan0 type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + ip address add 198.51.100.1/32 dev lo + + ip link set dev $swp1 master br0 + ip link set dev vxlan0 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up + ip link set dev vxlan0 up +} + +switch_destroy() +{ + ip link set dev vxlan0 down + ip link set dev $swp1 down + ip link set dev br0 down + + ip link set dev vxlan0 nomaster + ip link set dev $swp1 nomaster + + ip address del 198.51.100.1/32 dev lo + + ip link del dev vxlan0 + + ip link del dev br0 +} + +router1_create() +{ + # This router is in the default VRF, where the VxLAN device is + # performing the L3 lookup + ip link set dev $rp1 up + ip address add 192.0.2.1/24 dev $rp1 + ip route add 198.51.100.0/24 via 192.0.2.2 +} + +router1_destroy() +{ + ip route del 198.51.100.0/24 via 192.0.2.2 + ip address del 192.0.2.1/24 dev $rp1 + ip link set dev $rp1 down +} + +router2_create() +{ + # This router is not in the default VRF, so use simple_if_init() + simple_if_init $rp2 192.0.2.2/24 +} + +router2_destroy() +{ + simple_if_fini $rp2 192.0.2.2/24 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + rp1=${NETIFS[p3]} + rp2=${NETIFS[p4]} + + vrf_prepare + + h1_create + + switch_create + + router1_create + router2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router2_destroy + router1_destroy + + switch_destroy + + h1_destroy + + vrf_cleanup +} + +flooding_remotes_add() +{ + local num_remotes=$1 + local lsb + local i + + for i in $(eval echo {1..$num_remotes}); do + lsb=$((i + 1)) + + bridge fdb append 00:00:00:00:00:00 dev vxlan0 self \ + dst 198.51.100.$lsb + done +} + +flooding_filters_add() +{ + local num_remotes=$1 + local lsb + local i + + tc qdisc add dev $rp2 clsact + + for i in $(eval echo {1..$num_remotes}); do + lsb=$((i + 1)) + + tc filter add dev $rp2 ingress protocol ip pref $i handle $i \ + flower ip_proto udp dst_ip 198.51.100.$lsb \ + dst_port 4789 skip_sw action drop + done +} + +flooding_filters_del() +{ + local num_remotes=$1 + local i + + for i in $(eval echo {1..$num_remotes}); do + tc filter del dev $rp2 ingress protocol ip pref $i \ + handle $i flower + done + + tc qdisc del dev $rp2 clsact +} + +flooding_check_packets() +{ + local packets=("$@") + local num_remotes=${#packets[@]} + local i + + for i in $(eval echo {1..$num_remotes}); do + tc_check_packets "dev $rp2 ingress" $i ${packets[i - 1]} + check_err $? "remote $i - did not get expected number of packets" + done +} + +flooding_test() +{ + # Use 12 remote VTEPs that will be stored in 4 records. The array + # 'packets' will store how many packets are expected to be received + # by each remote VTEP at each stage of the test + declare -a packets=(1 1 1 1 1 1 1 1 1 1 1 1) + local num_remotes=12 + + RET=0 + + # Add FDB entries for remote VTEPs and corresponding tc filters on the + # ingress of the nexthop router. These filters will count how many + # packets were flooded to each remote VTEP + flooding_remotes_add $num_remotes + flooding_filters_add $num_remotes + + # Send one packet and make sure it is flooded to all the remote VTEPs + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 1 packet" + + # Delete the third record which corresponds to VTEPs with LSB 8..10 + # and check that packet is flooded correctly when we remove a record + # from the middle of the list + RET=0 + + packets=(2 2 2 2 2 2 1 1 1 2 2 2) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.8 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.9 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.10 + + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 2 packets" + + # Delete the first record and make sure the packet is flooded correctly + RET=0 + + packets=(2 2 2 3 3 3 1 1 1 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.2 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.3 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.4 + + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 3 packets" + + # Delete the last record and make sure the packet is flooded correctly + RET=0 + + packets=(2 2 2 4 4 4 1 1 1 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.11 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.12 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.13 + + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 4 packets" + + # Delete the last record, one entry at a time and make sure single + # entries are correctly removed + RET=0 + + packets=(2 2 2 4 5 5 1 1 1 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.5 + + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 5 packets" + + RET=0 + + packets=(2 2 2 4 5 6 1 1 1 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.6 + + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 6 packets" + + RET=0 + + packets=(2 2 2 4 5 6 1 1 1 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.7 + + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 7 packets" + + flooding_filters_del $num_remotes +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 8cf22b3c2563..6f81130605d7 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -3,6 +3,7 @@ socket psock_fanout psock_snd psock_tpacket +reuseport_addr_any reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa @@ -14,4 +15,5 @@ udpgso_bench_rx udpgso_bench_tx tcp_inq tls +txring_overwrite ip_defrag diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 2ab3e3437a92..9543a4c2f9be 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -8,11 +8,11 @@ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh \ rtnetlink.sh xfrm_policy.sh TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh -TEST_PROGS += udpgro_bench.sh udpgro.sh +TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket -TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy -TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd +TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any +TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index cd3a2f1545b5..5821bdd98d20 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -14,3 +14,17 @@ CONFIG_IPV6_VTI=y CONFIG_DUMMY=y CONFIG_BRIDGE=y CONFIG_VLAN_8021Q=y +CONFIG_NETFILTER=y +CONFIG_NETFILTER_ADVANCED=y +CONFIG_NF_CONNTRACK=m +CONFIG_NF_NAT_IPV6=m +CONFIG_NF_NAT_IPV4=m +CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP6_NF_NAT=m +CONFIG_IP_NF_NAT=m +CONFIG_NF_TABLES=m +CONFIG_NF_TABLES_IPV6=y +CONFIG_NF_TABLES_IPV4=y +CONFIG_NFT_CHAIN_NAT_IPV6=m +CONFIG_NFT_CHAIN_NAT_IPV4=m diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 85d253546684..3f248d1f5b91 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -15,6 +15,8 @@ PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} PAUSE_ON_CLEANUP=${PAUSE_ON_CLEANUP:=no} NETIF_TYPE=${NETIF_TYPE:=veth} NETIF_CREATE=${NETIF_CREATE:=yes} +MCD=${MCD:=smcrouted} +MC_CLI=${MC_CLI:=smcroutectl} relative_path="${BASH_SOURCE%/*}" if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then @@ -104,7 +106,7 @@ create_netif_veth() { local i - for i in $(eval echo {1..$NUM_NETIFS}); do + for ((i = 1; i <= NUM_NETIFS; ++i)); do local j=$((i+1)) ip link show dev ${NETIFS[p$i]} &> /dev/null @@ -135,7 +137,7 @@ if [[ "$NETIF_CREATE" = "yes" ]]; then create_netif fi -for i in $(eval echo {1..$NUM_NETIFS}); do +for ((i = 1; i <= NUM_NETIFS; ++i)); do ip link show dev ${NETIFS[p$i]} &> /dev/null if [[ $? -ne 0 ]]; then echo "SKIP: could not find all required interfaces" @@ -477,11 +479,24 @@ master_name_get() ip -j link show dev $if_name | jq -r '.[]["master"]' } +link_stats_get() +{ + local if_name=$1; shift + local dir=$1; shift + local stat=$1; shift + + ip -j -s link show dev $if_name \ + | jq '.[]["stats64"]["'$dir'"]["'$stat'"]' +} + link_stats_tx_packets_get() { - local if_name=$1 + link_stats_get $1 tx packets +} - ip -j -s link show dev $if_name | jq '.[]["stats64"]["tx"]["packets"]' +link_stats_rx_errors_get() +{ + link_stats_get $1 rx errors } tc_rule_stats_get() @@ -783,6 +798,17 @@ multipath_eval() log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio" } +in_ns() +{ + local name=$1; shift + + ip netns exec $name bash <<-EOF + NUM_NETIFS=0 + source lib.sh + $(for a in "$@"; do printf "%q${IFS:0:1}" "$a"; done) + EOF +} + ############################################################################## # Tests @@ -790,10 +816,11 @@ ping_do() { local if_name=$1 local dip=$2 + local args=$3 local vrf_name vrf_name=$(master_name_get $if_name) - ip vrf exec $vrf_name $PING $dip -c 10 -i 0.1 -w 2 &> /dev/null + ip vrf exec $vrf_name $PING $args $dip -c 10 -i 0.1 -w 2 &> /dev/null } ping_test() @@ -802,17 +829,18 @@ ping_test() ping_do $1 $2 check_err $? - log_test "ping" + log_test "ping$3" } ping6_do() { local if_name=$1 local dip=$2 + local args=$3 local vrf_name vrf_name=$(master_name_get $if_name) - ip vrf exec $vrf_name $PING6 $dip -c 10 -i 0.1 -w 2 &> /dev/null + ip vrf exec $vrf_name $PING6 $args $dip -c 10 -i 0.1 -w 2 &> /dev/null } ping6_test() @@ -821,7 +849,7 @@ ping6_test() ping6_do $1 $2 check_err $? - log_test "ping6" + log_test "ping6$3" } learning_test() diff --git a/tools/testing/selftests/net/forwarding/router_multicast.sh b/tools/testing/selftests/net/forwarding/router_multicast.sh new file mode 100755 index 000000000000..109e6d785169 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/router_multicast.sh @@ -0,0 +1,311 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +------------------+ +# | H1 (v$h1) | +# | 2001:db8:1::2/64 | +# | 198.51.100.2/28 | +# | $h1 + | +# +-------------|----+ +# | +# +-------------|-------------------------------+ +# | SW1 | | +# | $rp1 + | +# | 198.51.100.1/28 | +# | 2001:db8:1::1/64 | +# | | +# | 2001:db8:2::1/64 2001:db8:3::1/64 | +# | 198.51.100.17/28 198.51.100.33/28 | +# | $rp2 + $rp3 + | +# +--------------|--------------------------|---+ +# | | +# | | +# +--------------|---+ +--------------|---+ +# | H2 (v$h2) | | | H3 (v$h3) | | +# | $h2 + | | $h3 + | +# | 198.51.100.18/28 | | 198.51.100.34/28 | +# | 2001:db8:2::2/64 | | 2001:db8:3::2/64 | +# +------------------+ +------------------+ +# + +ALL_TESTS="mcast_v4 mcast_v6" +NUM_NETIFS=6 +source lib.sh +source tc_common.sh + +require_command $MCD +require_command $MC_CLI +table_name=selftests + +h1_create() +{ + simple_if_init $h1 198.51.100.2/28 2001:db8:1::2/64 + + ip route add 198.51.100.16/28 vrf v$h1 nexthop via 198.51.100.1 + ip route add 198.51.100.32/28 vrf v$h1 nexthop via 198.51.100.1 + + ip route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::1 + ip route add 2001:db8:3::/64 vrf v$h1 nexthop via 2001:db8:1::1 +} + +h1_destroy() +{ + ip route del 2001:db8:3::/64 vrf v$h1 + ip route del 2001:db8:2::/64 vrf v$h1 + + ip route del 198.51.100.32/28 vrf v$h1 + ip route del 198.51.100.16/28 vrf v$h1 + + simple_if_fini $h1 198.51.100.2/28 2001:db8:1::2/64 +} + +h2_create() +{ + simple_if_init $h2 198.51.100.18/28 2001:db8:2::2/64 + + ip route add 198.51.100.0/28 vrf v$h2 nexthop via 198.51.100.17 + ip route add 198.51.100.32/28 vrf v$h2 nexthop via 198.51.100.17 + + ip route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1 + ip route add 2001:db8:3::/64 vrf v$h2 nexthop via 2001:db8:2::1 + + tc qdisc add dev $h2 ingress +} + +h2_destroy() +{ + tc qdisc del dev $h2 ingress + + ip route del 2001:db8:3::/64 vrf v$h2 + ip route del 2001:db8:1::/64 vrf v$h2 + + ip route del 198.51.100.32/28 vrf v$h2 + ip route del 198.51.100.0/28 vrf v$h2 + + simple_if_fini $h2 198.51.100.18/28 2001:db8:2::2/64 +} + +h3_create() +{ + simple_if_init $h3 198.51.100.34/28 2001:db8:3::2/64 + + ip route add 198.51.100.0/28 vrf v$h3 nexthop via 198.51.100.33 + ip route add 198.51.100.16/28 vrf v$h3 nexthop via 198.51.100.33 + + ip route add 2001:db8:1::/64 vrf v$h3 nexthop via 2001:db8:3::1 + ip route add 2001:db8:2::/64 vrf v$h3 nexthop via 2001:db8:3::1 + + tc qdisc add dev $h3 ingress +} + +h3_destroy() +{ + tc qdisc del dev $h3 ingress + + ip route del 2001:db8:2::/64 vrf v$h3 + ip route del 2001:db8:1::/64 vrf v$h3 + + ip route del 198.51.100.16/28 vrf v$h3 + ip route del 198.51.100.0/28 vrf v$h3 + + simple_if_fini $h3 198.51.100.34/28 2001:db8:3::2/64 +} + +router_create() +{ + ip link set dev $rp1 up + ip link set dev $rp2 up + ip link set dev $rp3 up + + ip address add 198.51.100.1/28 dev $rp1 + ip address add 198.51.100.17/28 dev $rp2 + ip address add 198.51.100.33/28 dev $rp3 + + ip address add 2001:db8:1::1/64 dev $rp1 + ip address add 2001:db8:2::1/64 dev $rp2 + ip address add 2001:db8:3::1/64 dev $rp3 +} + +router_destroy() +{ + ip address del 2001:db8:3::1/64 dev $rp3 + ip address del 2001:db8:2::1/64 dev $rp2 + ip address del 2001:db8:1::1/64 dev $rp1 + + ip address del 198.51.100.33/28 dev $rp3 + ip address del 198.51.100.17/28 dev $rp2 + ip address del 198.51.100.1/28 dev $rp1 + + ip link set dev $rp3 down + ip link set dev $rp2 down + ip link set dev $rp1 down +} + +start_mcd() +{ + SMCROUTEDIR="$(mktemp -d)" + + for ((i = 1; i <= $NUM_NETIFS; ++i)); do + echo "phyint ${NETIFS[p$i]} enable" >> \ + $SMCROUTEDIR/$table_name.conf + done + + $MCD -N -I $table_name -f $SMCROUTEDIR/$table_name.conf \ + -P $SMCROUTEDIR/$table_name.pid +} + +kill_mcd() +{ + pkill $MCD + rm -rf $SMCROUTEDIR +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp1=${NETIFS[p2]} + + rp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + start_mcd + + vrf_prepare + + h1_create + h2_create + h3_create + + router_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router_destroy + + h3_destroy + h2_destroy + h1_destroy + + vrf_cleanup + + kill_mcd +} + +create_mcast_sg() +{ + local if_name=$1; shift + local s_addr=$1; shift + local mcast=$1; shift + local dest_ifs=${@} + + $MC_CLI -I $table_name add $if_name $s_addr $mcast $dest_ifs +} + +delete_mcast_sg() +{ + local if_name=$1; shift + local s_addr=$1; shift + local mcast=$1; shift + local dest_ifs=${@} + + $MC_CLI -I $table_name remove $if_name $s_addr $mcast $dest_ifs +} + +mcast_v4() +{ + # Add two interfaces to an MC group, send a packet to the MC group and + # verify packets are received on both. Then delete the route and verify + # packets are no longer received. + + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 122 flower \ + dst_ip 225.1.2.3 action drop + tc filter add dev $h3 ingress protocol ip pref 1 handle 133 flower \ + dst_ip 225.1.2.3 action drop + + create_mcast_sg $rp1 198.51.100.2 225.1.2.3 $rp2 $rp3 + + # Send frames with the corresponding L2 destination address. + $MZ $h1 -c 5 -p 128 -t udp -a 00:11:22:33:44:55 -b 01:00:5e:01:02:03 \ + -A 198.51.100.2 -B 225.1.2.3 -q + + tc_check_packets "dev $h2 ingress" 122 5 + check_err $? "Multicast not received on first host" + tc_check_packets "dev $h3 ingress" 133 5 + check_err $? "Multicast not received on second host" + + delete_mcast_sg $rp1 198.51.100.2 225.1.2.3 $rp2 $rp3 + + $MZ $h1 -c 5 -p 128 -t udp -a 00:11:22:33:44:55 -b 01:00:5e:01:02:03 \ + -A 198.51.100.2 -B 225.1.2.3 -q + + tc_check_packets "dev $h2 ingress" 122 5 + check_err $? "Multicast received on host although deleted" + tc_check_packets "dev $h3 ingress" 133 5 + check_err $? "Multicast received on second host although deleted" + + tc filter del dev $h3 ingress protocol ip pref 1 handle 133 flower + tc filter del dev $h2 ingress protocol ip pref 1 handle 122 flower + + log_test "mcast IPv4" +} + +mcast_v6() +{ + # Add two interfaces to an MC group, send a packet to the MC group and + # verify packets are received on both. Then delete the route and verify + # packets are no longer received. + + RET=0 + + tc filter add dev $h2 ingress protocol ipv6 pref 1 handle 122 flower \ + dst_ip ff0e::3 action drop + tc filter add dev $h3 ingress protocol ipv6 pref 1 handle 133 flower \ + dst_ip ff0e::3 action drop + + create_mcast_sg $rp1 2001:db8:1::2 ff0e::3 $rp2 $rp3 + + # Send frames with the corresponding L2 destination address. + $MZ $h1 -6 -c 5 -p 128 -t udp -a 00:11:22:33:44:55 \ + -b 33:33:00:00:00:03 -A 2001:db8:1::2 -B ff0e::3 -q + + tc_check_packets "dev $h2 ingress" 122 5 + check_err $? "Multicast not received on first host" + tc_check_packets "dev $h3 ingress" 133 5 + check_err $? "Multicast not received on second host" + + delete_mcast_sg $rp1 2001:db8:1::2 ff0e::3 $rp2 $rp3 + + $MZ $h1 -6 -c 5 -p 128 -t udp -a 00:11:22:33:44:55 \ + -b 33:33:00:00:00:03 -A 2001:db8:1::2 -B ff0e::3 -q + + tc_check_packets "dev $h2 ingress" 122 5 + check_err $? "Multicast received on first host although deleted" + tc_check_packets "dev $h3 ingress" 133 5 + check_err $? "Multicast received on second host although deleted" + + tc filter del dev $h3 ingress protocol ipv6 pref 1 handle 133 flower + tc filter del dev $h2 ingress protocol ipv6 pref 1 handle 122 flower + + log_test "mcast IPv6" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh new file mode 100755 index 000000000000..56cef3b1c194 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh @@ -0,0 +1,786 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +--------------------+ +----------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1 | | + $h2 | +# | | 192.0.2.1/28 | | | 192.0.2.2/28 | +# +----|---------------+ +--|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|--------------------------------------------------|-----------------+ | +# | | + $swp1 BR1 (802.1d) + $swp2 | | +# | | | | +# | | + vx1 (vxlan) | | +# | | local 192.0.2.17 | | +# | | remote 192.0.2.34 192.0.2.50 | | +# | | id 1000 dstport $VXPORT | | +# | +-----------------------------------------------------------------------+ | +# | | +# | 192.0.2.32/28 via 192.0.2.18 | +# | 192.0.2.48/28 via 192.0.2.18 | +# | | +# | + $rp1 | +# | | 192.0.2.17/28 | +# +----|----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | VRP2 (vrf) | +# | + $rp2 | +# | 192.0.2.18/28 | +# | | (maybe) HW +# ============================================================================= +# | | (likely) SW +# | + v1 (veth) + v3 (veth) | +# | | 192.0.2.33/28 | 192.0.2.49/28 | +# +----|---------------------------------------|----------------+ +# | | +# +----|------------------------------+ +----|------------------------------+ +# | + v2 (veth) NS1 (netns) | | + v4 (veth) NS2 (netns) | +# | 192.0.2.34/28 | | 192.0.2.50/28 | +# | | | | +# | 192.0.2.16/28 via 192.0.2.33 | | 192.0.2.16/28 via 192.0.2.49 | +# | 192.0.2.50/32 via 192.0.2.33 | | 192.0.2.34/32 via 192.0.2.49 | +# | | | | +# | +-------------------------------+ | | +-------------------------------+ | +# | | BR2 (802.1d) | | | | BR2 (802.1d) | | +# | | + vx2 (vxlan) | | | | + vx2 (vxlan) | | +# | | local 192.0.2.34 | | | | local 192.0.2.50 | | +# | | remote 192.0.2.17 | | | | remote 192.0.2.17 | | +# | | remote 192.0.2.50 | | | | remote 192.0.2.34 | | +# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | | +# | | | | | | | | +# | | + w1 (veth) | | | | + w1 (veth) | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | | | | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | | VW2 (vrf) | | | | | VW2 (vrf) | | +# | | + w2 (veth) | | | | + w2 (veth) | | +# | | 192.0.2.3/28 | | | | 192.0.2.4/28 | | +# | +-------------------------------+ | | +-------------------------------+ | +# +-----------------------------------+ +-----------------------------------+ + +: ${VXPORT:=4789} +export VXPORT + +: ${ALL_TESTS:=" + ping_ipv4 + test_flood + test_unicast + test_ttl + test_tos + test_ecn_encap + test_ecn_decap + reapply_config + ping_ipv4 + test_flood + test_unicast + test_learning + "} + +NUM_NETIFS=6 +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 + tc qdisc add dev $h1 clsact +} + +h1_destroy() +{ + tc qdisc del dev $h1 clsact + simple_if_fini $h1 192.0.2.1/28 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/28 + tc qdisc add dev $h2 clsact +} + +h2_destroy() +{ + tc qdisc del dev $h2 clsact + simple_if_fini $h2 192.0.2.2/28 +} + +rp1_set_addr() +{ + ip address add dev $rp1 192.0.2.17/28 + + ip route add 192.0.2.32/28 nexthop via 192.0.2.18 + ip route add 192.0.2.48/28 nexthop via 192.0.2.18 +} + +rp1_unset_addr() +{ + ip route del 192.0.2.48/28 nexthop via 192.0.2.18 + ip route del 192.0.2.32/28 nexthop via 192.0.2.18 + + ip address del dev $rp1 192.0.2.17/28 +} + +switch_create() +{ + ip link add name br1 type bridge vlan_filtering 0 mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + ip link set dev $rp1 up + rp1_set_addr + + ip link add name vx1 type vxlan id 1000 \ + local 192.0.2.17 dstport "$VXPORT" \ + nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx1 up + + ip link set dev vx1 master br1 + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + + bridge fdb append dev vx1 00:00:00:00:00:00 dst 192.0.2.34 self + bridge fdb append dev vx1 00:00:00:00:00:00 dst 192.0.2.50 self +} + +switch_destroy() +{ + rp1_unset_addr + ip link set dev $rp1 down + + bridge fdb del dev vx1 00:00:00:00:00:00 dst 192.0.2.50 self + bridge fdb del dev vx1 00:00:00:00:00:00 dst 192.0.2.34 self + + ip link set dev vx1 nomaster + ip link set dev vx1 down + ip link del dev vx1 + + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + ip link set dev br1 down + ip link del dev br1 +} + +vrp2_create() +{ + simple_if_init $rp2 192.0.2.18/28 + __simple_if_init v1 v$rp2 192.0.2.33/28 + __simple_if_init v3 v$rp2 192.0.2.49/28 + tc qdisc add dev v1 clsact +} + +vrp2_destroy() +{ + tc qdisc del dev v1 clsact + __simple_if_fini v3 192.0.2.49/28 + __simple_if_fini v1 192.0.2.33/28 + simple_if_fini $rp2 192.0.2.18/28 +} + +ns_init_common() +{ + local in_if=$1; shift + local in_addr=$1; shift + local other_in_addr=$1; shift + local nh_addr=$1; shift + local host_addr=$1; shift + + ip link set dev $in_if up + ip address add dev $in_if $in_addr/28 + tc qdisc add dev $in_if clsact + + ip link add name br2 type bridge vlan_filtering 0 + ip link set dev br2 up + + ip link add name w1 type veth peer name w2 + + ip link set dev w1 master br2 + ip link set dev w1 up + + ip link add name vx2 type vxlan id 1000 local $in_addr dstport "$VXPORT" + ip link set dev vx2 up + bridge fdb append dev vx2 00:00:00:00:00:00 dst 192.0.2.17 self + bridge fdb append dev vx2 00:00:00:00:00:00 dst $other_in_addr self + + ip link set dev vx2 master br2 + tc qdisc add dev vx2 clsact + + simple_if_init w2 $host_addr/28 + + ip route add 192.0.2.16/28 nexthop via $nh_addr + ip route add $other_in_addr/32 nexthop via $nh_addr +} +export -f ns_init_common + +ns1_create() +{ + ip netns add ns1 + ip link set dev v2 netns ns1 + in_ns ns1 \ + ns_init_common v2 192.0.2.34 192.0.2.50 192.0.2.33 192.0.2.3 +} + +ns1_destroy() +{ + ip netns exec ns1 ip link set dev v2 netns 1 + ip netns del ns1 +} + +ns2_create() +{ + ip netns add ns2 + ip link set dev v4 netns ns2 + in_ns ns2 \ + ns_init_common v4 192.0.2.50 192.0.2.34 192.0.2.49 192.0.2.4 +} + +ns2_destroy() +{ + ip netns exec ns2 ip link set dev v4 netns 1 + ip netns del ns2 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1=${NETIFS[p5]} + rp2=${NETIFS[p6]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + switch_create + + ip link add name v1 type veth peer name v2 + ip link add name v3 type veth peer name v4 + vrp2_create + ns1_create + ns2_create + + r1_mac=$(in_ns ns1 mac_get w2) + r2_mac=$(in_ns ns2 mac_get w2) + h2_mac=$(mac_get $h2) +} + +cleanup() +{ + pre_cleanup + + ns2_destroy + ns1_destroy + vrp2_destroy + ip link del dev v3 + ip link del dev v1 + + switch_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +# For the first round of tests, vx1 is the first device to get attached to the +# bridge, and that at the point that the local IP is already configured. Try the +# other scenario of attaching the device to an already-offloaded bridge, and +# only then attach the local IP. +reapply_config() +{ + echo "Reapplying configuration" + + bridge fdb del dev vx1 00:00:00:00:00:00 dst 192.0.2.50 self + bridge fdb del dev vx1 00:00:00:00:00:00 dst 192.0.2.34 self + rp1_unset_addr + ip link set dev vx1 nomaster + sleep 5 + + ip link set dev vx1 master br1 + bridge fdb append dev vx1 00:00:00:00:00:00 dst 192.0.2.34 self + bridge fdb append dev vx1 00:00:00:00:00:00 dst 192.0.2.50 self + sleep 1 + rp1_set_addr + sleep 5 +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.2 ": local->local" + ping_test $h1 192.0.2.3 ": local->remote 1" + ping_test $h1 192.0.2.4 ": local->remote 2" +} + +maybe_in_ns() +{ + echo ${1:+in_ns} $1 +} + +__flood_counter_add_del() +{ + local add_del=$1; shift + local dev=$1; shift + local ns=$1; shift + + # Putting the ICMP capture both to HW and to SW will end up + # double-counting the packets that are trapped to slow path, such as for + # the unicast test. Adding either skip_hw or skip_sw fixes this problem, + # but with skip_hw, the flooded packets are not counted at all, because + # those are dropped due to MAC address mismatch; and skip_sw is a no-go + # for veth-based topologies. + # + # So try to install with skip_sw and fall back to skip_sw if that fails. + + $(maybe_in_ns $ns) __icmp_capture_add_del \ + $add_del 100 "" $dev skip_sw 2>/dev/null || \ + $(maybe_in_ns $ns) __icmp_capture_add_del \ + $add_del 100 "" $dev skip_hw +} + +flood_counter_install() +{ + __flood_counter_add_del add "$@" +} + +flood_counter_uninstall() +{ + __flood_counter_add_del del "$@" +} + +flood_fetch_stat() +{ + local dev=$1; shift + local ns=$1; shift + + $(maybe_in_ns $ns) tc_rule_stats_get $dev 100 ingress +} + +flood_fetch_stats() +{ + local counters=("${@}") + local counter + + for counter in "${counters[@]}"; do + flood_fetch_stat $counter + done +} + +vxlan_flood_test() +{ + local mac=$1; shift + local dst=$1; shift + local -a expects=("${@}") + + local -a counters=($h2 "vx2 ns1" "vx2 ns2") + local counter + local key + + for counter in "${counters[@]}"; do + flood_counter_install $counter + done + + local -a t0s=($(flood_fetch_stats "${counters[@]}")) + $MZ $h1 -c 10 -d 100msec -p 64 -b $mac -B $dst -t icmp -q + sleep 1 + local -a t1s=($(flood_fetch_stats "${counters[@]}")) + + for key in ${!t0s[@]}; do + local delta=$((t1s[$key] - t0s[$key])) + local expect=${expects[$key]} + + ((expect == delta)) + check_err $? "${counters[$key]}: Expected to capture $expect packets, got $delta." + done + + for counter in "${counters[@]}"; do + flood_counter_uninstall $counter + done +} + +__test_flood() +{ + local mac=$1; shift + local dst=$1; shift + local what=$1; shift + + RET=0 + + vxlan_flood_test $mac $dst 10 10 10 + + log_test "VXLAN: $what" +} + +test_flood() +{ + __test_flood de:ad:be:ef:13:37 192.0.2.100 "flood" +} + +vxlan_fdb_add_del() +{ + local add_del=$1; shift + local mac=$1; shift + local dev=$1; shift + local dst=$1; shift + + bridge fdb $add_del dev $dev $mac self static permanent \ + ${dst:+dst} $dst 2>/dev/null + bridge fdb $add_del dev $dev $mac master static 2>/dev/null +} + +__test_unicast() +{ + local mac=$1; shift + local dst=$1; shift + local hit_idx=$1; shift + local what=$1; shift + + RET=0 + + local -a expects=(0 0 0) + expects[$hit_idx]=10 + + vxlan_flood_test $mac $dst "${expects[@]}" + + log_test "VXLAN: $what" +} + +test_unicast() +{ + local -a targets=("$h2_mac $h2" + "$r1_mac vx1 192.0.2.34" + "$r2_mac vx1 192.0.2.50") + local target + + for target in "${targets[@]}"; do + vxlan_fdb_add_del add $target + done + + __test_unicast $h2_mac 192.0.2.2 0 "local MAC unicast" + __test_unicast $r1_mac 192.0.2.3 1 "remote MAC 1 unicast" + __test_unicast $r2_mac 192.0.2.4 2 "remote MAC 2 unicast" + + for target in "${targets[@]}"; do + vxlan_fdb_add_del del $target + done +} + +vxlan_ping_test() +{ + local ping_dev=$1; shift + local ping_dip=$1; shift + local ping_args=$1; shift + local capture_dev=$1; shift + local capture_dir=$1; shift + local capture_pref=$1; shift + local expect=$1; shift + + local t0=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir) + ping_do $ping_dev $ping_dip "$ping_args" + local t1=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir) + local delta=$((t1 - t0)) + + # Tolerate a couple stray extra packets. + ((expect <= delta && delta <= expect + 2)) + check_err $? "$capture_dev: Expected to capture $expect packets, got $delta." +} + +test_ttl() +{ + RET=0 + + tc filter add dev v1 egress pref 77 prot ip \ + flower ip_ttl 99 action pass + vxlan_ping_test $h1 192.0.2.3 "" v1 egress 77 10 + tc filter del dev v1 egress pref 77 prot ip + + log_test "VXLAN: envelope TTL" +} + +test_tos() +{ + RET=0 + + tc filter add dev v1 egress pref 77 prot ip \ + flower ip_tos 0x40 action pass + vxlan_ping_test $h1 192.0.2.3 "-Q 0x40" v1 egress 77 10 + vxlan_ping_test $h1 192.0.2.3 "-Q 0x30" v1 egress 77 0 + tc filter del dev v1 egress pref 77 prot ip + + log_test "VXLAN: envelope TOS inheritance" +} + +__test_ecn_encap() +{ + local q=$1; shift + local tos=$1; shift + + RET=0 + + tc filter add dev v1 egress pref 77 prot ip \ + flower ip_tos $tos action pass + sleep 1 + vxlan_ping_test $h1 192.0.2.3 "-Q $q" v1 egress 77 10 + tc filter del dev v1 egress pref 77 prot ip + + log_test "VXLAN: ECN encap: $q->$tos" +} + +test_ecn_encap() +{ + # In accordance with INET_ECN_encapsulate() + __test_ecn_encap 0x00 0x00 + __test_ecn_encap 0x01 0x01 + __test_ecn_encap 0x02 0x02 + __test_ecn_encap 0x03 0x02 +} + +vxlan_encapped_ping_do() +{ + local count=$1; shift + local dev=$1; shift + local next_hop_mac=$1; shift + local dest_ip=$1; shift + local dest_mac=$1; shift + local inner_tos=$1; shift + local outer_tos=$1; shift + + $MZ $dev -c $count -d 100msec -q \ + -b $next_hop_mac -B $dest_ip \ + -t udp tos=$outer_tos,sp=23456,dp=$VXPORT,p=$(: + )"08:"$( : VXLAN flags + )"00:00:00:"$( : VXLAN reserved + )"00:03:e8:"$( : VXLAN VNI + )"00:"$( : VXLAN reserved + )"$dest_mac:"$( : ETH daddr + )"$(mac_get w2):"$( : ETH saddr + )"08:00:"$( : ETH type + )"45:"$( : IP version + IHL + )"$inner_tos:"$( : IP TOS + )"00:54:"$( : IP total length + )"99:83:"$( : IP identification + )"40:00:"$( : IP flags + frag off + )"40:"$( : IP TTL + )"01:"$( : IP proto + )"00:00:"$( : IP header csum + )"c0:00:02:03:"$( : IP saddr: 192.0.2.3 + )"c0:00:02:01:"$( : IP daddr: 192.0.2.1 + )"08:"$( : ICMP type + )"00:"$( : ICMP code + )"8b:f2:"$( : ICMP csum + )"1f:6a:"$( : ICMP request identifier + )"00:01:"$( : ICMP request sequence number + )"4f:ff:c5:5b:00:00:00:00:"$( : ICMP payload + )"6d:74:0b:00:00:00:00:00:"$( : + )"10:11:12:13:14:15:16:17:"$( : + )"18:19:1a:1b:1c:1d:1e:1f:"$( : + )"20:21:22:23:24:25:26:27:"$( : + )"28:29:2a:2b:2c:2d:2e:2f:"$( : + )"30:31:32:33:34:35:36:37" +} +export -f vxlan_encapped_ping_do + +vxlan_encapped_ping_test() +{ + local ping_dev=$1; shift + local nh_dev=$1; shift + local ping_dip=$1; shift + local inner_tos=$1; shift + local outer_tos=$1; shift + local stat_get=$1; shift + local expect=$1; shift + + local t0=$($stat_get) + + in_ns ns1 \ + vxlan_encapped_ping_do 10 $ping_dev $(mac_get $nh_dev) \ + $ping_dip $(mac_get $h1) \ + $inner_tos $outer_tos + + local t1=$($stat_get) + local delta=$((t1 - t0)) + + # Tolerate a couple stray extra packets. + ((expect <= delta && delta <= expect + 2)) + check_err $? "Expected to capture $expect packets, got $delta." +} +export -f vxlan_encapped_ping_test + +__test_ecn_decap() +{ + local orig_inner_tos=$1; shift + local orig_outer_tos=$1; shift + local decapped_tos=$1; shift + + RET=0 + + tc filter add dev $h1 ingress pref 77 prot ip \ + flower ip_tos $decapped_tos action pass + sleep 1 + vxlan_encapped_ping_test v2 v1 192.0.2.17 \ + $orig_inner_tos $orig_outer_tos \ + "tc_rule_stats_get $h1 77 ingress" 10 + tc filter del dev $h1 ingress pref 77 + + log_test "VXLAN: ECN decap: $orig_outer_tos/$orig_inner_tos->$decapped_tos" +} + +test_ecn_decap_error() +{ + local orig_inner_tos=00 + local orig_outer_tos=03 + + RET=0 + + vxlan_encapped_ping_test v2 v1 192.0.2.17 \ + $orig_inner_tos $orig_outer_tos \ + "link_stats_rx_errors_get vx1" 10 + + log_test "VXLAN: ECN decap: $orig_outer_tos/$orig_inner_tos->error" +} + +test_ecn_decap() +{ + # In accordance with INET_ECN_decapsulate() + __test_ecn_decap 00 00 0x00 + __test_ecn_decap 01 01 0x01 + __test_ecn_decap 02 01 0x02 + __test_ecn_decap 01 03 0x03 + __test_ecn_decap 02 03 0x03 + test_ecn_decap_error +} + +test_learning() +{ + local mac=de:ad:be:ef:13:37 + local dst=192.0.2.100 + + # Enable learning on the VxLAN device and set ageing time to 10 seconds + ip link set dev br1 type bridge ageing_time 1000 + ip link set dev vx1 type vxlan ageing 10 + ip link set dev vx1 type vxlan learning + reapply_config + + # Check that flooding works + RET=0 + + vxlan_flood_test $mac $dst 10 10 10 + + log_test "VXLAN: flood before learning" + + # Send a packet with source mac set to $mac from host w2 and check that + # a corresponding entry is created in VxLAN device vx1 + RET=0 + + in_ns ns1 $MZ w2 -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff -B $dst \ + -t icmp -q + sleep 1 + + bridge fdb show brport vx1 | grep $mac | grep -q self + check_err $? + bridge fdb show brport vx1 | grep $mac | grep -q -v self + check_err $? + + log_test "VXLAN: show learned FDB entry" + + # Repeat first test and check that packets only reach host w2 in ns1 + RET=0 + + vxlan_flood_test $mac $dst 0 10 0 + + log_test "VXLAN: learned FDB entry" + + # Delete the learned FDB entry from the VxLAN and bridge devices and + # check that packets are flooded + RET=0 + + bridge fdb del dev vx1 $mac master self + sleep 1 + + vxlan_flood_test $mac $dst 10 10 10 + + log_test "VXLAN: deletion of learned FDB entry" + + # Re-learn the first FDB entry and check that it is correctly aged-out + RET=0 + + in_ns ns1 $MZ w2 -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff -B $dst \ + -t icmp -q + sleep 1 + + bridge fdb show brport vx1 | grep $mac | grep -q self + check_err $? + bridge fdb show brport vx1 | grep $mac | grep -q -v self + check_err $? + + vxlan_flood_test $mac $dst 0 10 0 + + sleep 20 + + bridge fdb show brport vx1 | grep $mac | grep -q self + check_fail $? + bridge fdb show brport vx1 | grep $mac | grep -q -v self + check_fail $? + + vxlan_flood_test $mac $dst 10 10 10 + + log_test "VXLAN: Ageing of learned FDB entry" + + # Toggle learning on the bridge port and check that the bridge's FDB + # is populated only when it should + RET=0 + + ip link set dev vx1 type bridge_slave learning off + + in_ns ns1 $MZ w2 -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff -B $dst \ + -t icmp -q + sleep 1 + + bridge fdb show brport vx1 | grep $mac | grep -q -v self + check_fail $? + + ip link set dev vx1 type bridge_slave learning on + + in_ns ns1 $MZ w2 -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff -B $dst \ + -t icmp -q + sleep 1 + + bridge fdb show brport vx1 | grep $mac | grep -q -v self + check_err $? + + log_test "VXLAN: learning toggling on bridge port" + + # Restore previous settings + ip link set dev vx1 type vxlan nolearning + ip link set dev vx1 type vxlan ageing 300 + ip link set dev br1 type bridge ageing_time 30000 + reapply_config +} + +test_all() +{ + echo "Running tests with UDP port $VXPORT" + tests_run +} + +trap cleanup EXIT + +setup_prepare +setup_wait +test_all + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472.sh new file mode 100755 index 000000000000..3bf3da69195f --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472.sh @@ -0,0 +1,10 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# A wrapper to run VXLAN tests with an unusual port number. + +VXPORT=8472 +ALL_TESTS=" + ping_ipv4 +" +source vxlan_bridge_1d.sh diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh new file mode 100755 index 000000000000..a5789721ba92 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh @@ -0,0 +1,860 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------+ +------------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1.10 | | + $h2.10 | +# | | 192.0.2.1/28 | | | 192.0.2.2/28 | +# | | | | | | +# | | + $h1.20 | | | + $h2.20 | +# | \ | 198.51.100.1/24 | | \ | 198.51.100.2/24 | +# | \| | | \| | +# | + $h1 | | + $h2 | +# +----|------------------+ +----|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|--------------------------------------------------|-----------------+ | +# | | + $swp1 BR1 (802.1q) + $swp2 | | +# | | vid 10 vid 10 | | +# | | vid 20 vid 20 | | +# | | | | +# | | + vx10 (vxlan) + vx20 (vxlan) | | +# | | local 192.0.2.17 local 192.0.2.17 | | +# | | remote 192.0.2.34 192.0.2.50 remote 192.0.2.34 192.0.2.50 | | +# | | id 1000 dstport $VXPORT id 2000 dstport $VXPORT | | +# | | vid 10 pvid untagged vid 20 pvid untagged | | +# | +-----------------------------------------------------------------------+ | +# | | +# | 192.0.2.32/28 via 192.0.2.18 | +# | 192.0.2.48/28 via 192.0.2.18 | +# | | +# | + $rp1 | +# | | 192.0.2.17/28 | +# +----|----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | VRP2 (vrf) | +# | + $rp2 | +# | 192.0.2.18/28 | +# | | (maybe) HW +# ============================================================================= +# | | (likely) SW +# | + v1 (veth) + v3 (veth) | +# | | 192.0.2.33/28 | 192.0.2.49/28 | +# +----|---------------------------------------|----------------+ +# | | +# +----|------------------------------+ +----|------------------------------+ +# | + v2 (veth) NS1 (netns) | | + v4 (veth) NS2 (netns) | +# | 192.0.2.34/28 | | 192.0.2.50/28 | +# | | | | +# | 192.0.2.16/28 via 192.0.2.33 | | 192.0.2.16/28 via 192.0.2.49 | +# | 192.0.2.50/32 via 192.0.2.33 | | 192.0.2.34/32 via 192.0.2.49 | +# | | | | +# | +-------------------------------+ | | +-------------------------------+ | +# | | BR2 (802.1q) | | | | BR2 (802.1q) | | +# | | + vx10 (vxlan) | | | | + vx10 (vxlan) | | +# | | local 192.0.2.34 | | | | local 192.0.2.50 | | +# | | remote 192.0.2.17 | | | | remote 192.0.2.17 | | +# | | remote 192.0.2.50 | | | | remote 192.0.2.34 | | +# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | | +# | | vid 10 pvid untagged | | | | vid 10 pvid untagged | | +# | | | | | | | | +# | | + vx20 (vxlan) | | | | + vx20 (vxlan) | | +# | | local 192.0.2.34 | | | | local 192.0.2.50 | | +# | | remote 192.0.2.17 | | | | remote 192.0.2.17 | | +# | | remote 192.0.2.50 | | | | remote 192.0.2.34 | | +# | | id 2000 dstport $VXPORT | | | | id 2000 dstport $VXPORT | | +# | | vid 20 pvid untagged | | | | vid 20 pvid untagged | | +# | | | | | | | | +# | | + w1 (veth) | | | | + w1 (veth) | | +# | | | vid 10 | | | | | vid 10 | | +# | | | vid 20 | | | | | vid 20 | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | | | | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | + w2 (veth) VW2 (vrf) | | | | + w2 (veth) VW2 (vrf) | | +# | | |\ | | | | |\ | | +# | | | + w2.10 | | | | | + w2.10 | | +# | | | 192.0.2.3/28 | | | | | 192.0.2.4/28 | | +# | | | | | | | | | | +# | | + w2.20 | | | | + w2.20 | | +# | | 198.51.100.3/24 | | | | 198.51.100.4/24 | | +# | +-------------------------------+ | | +-------------------------------+ | +# +-----------------------------------+ +-----------------------------------+ + +: ${VXPORT:=4789} +export VXPORT + +: ${ALL_TESTS:=" + ping_ipv4 + test_flood + test_unicast + reapply_config + ping_ipv4 + test_flood + test_unicast + test_learning + test_pvid + "} + +NUM_NETIFS=6 +source lib.sh + +h1_create() +{ + simple_if_init $h1 + tc qdisc add dev $h1 clsact + vlan_create $h1 10 v$h1 192.0.2.1/28 + vlan_create $h1 20 v$h1 198.51.100.1/24 +} + +h1_destroy() +{ + vlan_destroy $h1 20 + vlan_destroy $h1 10 + tc qdisc del dev $h1 clsact + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + tc qdisc add dev $h2 clsact + vlan_create $h2 10 v$h2 192.0.2.2/28 + vlan_create $h2 20 v$h2 198.51.100.2/24 +} + +h2_destroy() +{ + vlan_destroy $h2 20 + vlan_destroy $h2 10 + tc qdisc del dev $h2 clsact + simple_if_fini $h2 +} + +rp1_set_addr() +{ + ip address add dev $rp1 192.0.2.17/28 + + ip route add 192.0.2.32/28 nexthop via 192.0.2.18 + ip route add 192.0.2.48/28 nexthop via 192.0.2.18 +} + +rp1_unset_addr() +{ + ip route del 192.0.2.48/28 nexthop via 192.0.2.18 + ip route del 192.0.2.32/28 nexthop via 192.0.2.18 + + ip address del dev $rp1 192.0.2.17/28 +} + +switch_create() +{ + ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \ + mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + ip link set dev $rp1 up + rp1_set_addr + + ip link add name vx10 type vxlan id 1000 \ + local 192.0.2.17 dstport "$VXPORT" \ + nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx10 up + + ip link set dev vx10 master br1 + bridge vlan add vid 10 dev vx10 pvid untagged + + ip link add name vx20 type vxlan id 2000 \ + local 192.0.2.17 dstport "$VXPORT" \ + nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx20 up + + ip link set dev vx20 master br1 + bridge vlan add vid 20 dev vx20 pvid untagged + + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + bridge vlan add vid 10 dev $swp1 + bridge vlan add vid 20 dev $swp1 + + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + bridge vlan add vid 10 dev $swp2 + bridge vlan add vid 20 dev $swp2 + + bridge fdb append dev vx10 00:00:00:00:00:00 dst 192.0.2.34 self + bridge fdb append dev vx10 00:00:00:00:00:00 dst 192.0.2.50 self + + bridge fdb append dev vx20 00:00:00:00:00:00 dst 192.0.2.34 self + bridge fdb append dev vx20 00:00:00:00:00:00 dst 192.0.2.50 self +} + +switch_destroy() +{ + bridge fdb del dev vx20 00:00:00:00:00:00 dst 192.0.2.50 self + bridge fdb del dev vx20 00:00:00:00:00:00 dst 192.0.2.34 self + + bridge fdb del dev vx10 00:00:00:00:00:00 dst 192.0.2.50 self + bridge fdb del dev vx10 00:00:00:00:00:00 dst 192.0.2.34 self + + bridge vlan del vid 20 dev $swp2 + bridge vlan del vid 10 dev $swp2 + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + + bridge vlan del vid 20 dev $swp1 + bridge vlan del vid 10 dev $swp1 + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + bridge vlan del vid 20 dev vx20 + ip link set dev vx20 nomaster + + ip link set dev vx20 down + ip link del dev vx20 + + bridge vlan del vid 10 dev vx10 + ip link set dev vx10 nomaster + + ip link set dev vx10 down + ip link del dev vx10 + + rp1_unset_addr + ip link set dev $rp1 down + + ip link set dev br1 down + ip link del dev br1 +} + +vrp2_create() +{ + simple_if_init $rp2 192.0.2.18/28 + __simple_if_init v1 v$rp2 192.0.2.33/28 + __simple_if_init v3 v$rp2 192.0.2.49/28 + tc qdisc add dev v1 clsact +} + +vrp2_destroy() +{ + tc qdisc del dev v1 clsact + __simple_if_fini v3 192.0.2.49/28 + __simple_if_fini v1 192.0.2.33/28 + simple_if_fini $rp2 192.0.2.18/28 +} + +ns_init_common() +{ + local in_if=$1; shift + local in_addr=$1; shift + local other_in_addr=$1; shift + local nh_addr=$1; shift + local host_addr1=$1; shift + local host_addr2=$1; shift + + ip link set dev $in_if up + ip address add dev $in_if $in_addr/28 + tc qdisc add dev $in_if clsact + + ip link add name br2 type bridge vlan_filtering 1 vlan_default_pvid 0 + ip link set dev br2 up + + ip link add name w1 type veth peer name w2 + + ip link set dev w1 master br2 + ip link set dev w1 up + + bridge vlan add vid 10 dev w1 + bridge vlan add vid 20 dev w1 + + ip link add name vx10 type vxlan id 1000 local $in_addr \ + dstport "$VXPORT" + ip link set dev vx10 up + bridge fdb append dev vx10 00:00:00:00:00:00 dst 192.0.2.17 self + bridge fdb append dev vx10 00:00:00:00:00:00 dst $other_in_addr self + + ip link set dev vx10 master br2 + tc qdisc add dev vx10 clsact + + bridge vlan add vid 10 dev vx10 pvid untagged + + ip link add name vx20 type vxlan id 2000 local $in_addr \ + dstport "$VXPORT" + ip link set dev vx20 up + bridge fdb append dev vx20 00:00:00:00:00:00 dst 192.0.2.17 self + bridge fdb append dev vx20 00:00:00:00:00:00 dst $other_in_addr self + + ip link set dev vx20 master br2 + tc qdisc add dev vx20 clsact + + bridge vlan add vid 20 dev vx20 pvid untagged + + simple_if_init w2 + vlan_create w2 10 vw2 $host_addr1/28 + vlan_create w2 20 vw2 $host_addr2/24 + + ip route add 192.0.2.16/28 nexthop via $nh_addr + ip route add $other_in_addr/32 nexthop via $nh_addr +} +export -f ns_init_common + +ns1_create() +{ + ip netns add ns1 + ip link set dev v2 netns ns1 + in_ns ns1 \ + ns_init_common v2 192.0.2.34 192.0.2.50 192.0.2.33 192.0.2.3 \ + 198.51.100.3 +} + +ns1_destroy() +{ + ip netns exec ns1 ip link set dev v2 netns 1 + ip netns del ns1 +} + +ns2_create() +{ + ip netns add ns2 + ip link set dev v4 netns ns2 + in_ns ns2 \ + ns_init_common v4 192.0.2.50 192.0.2.34 192.0.2.49 192.0.2.4 \ + 198.51.100.4 +} + +ns2_destroy() +{ + ip netns exec ns2 ip link set dev v4 netns 1 + ip netns del ns2 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1=${NETIFS[p5]} + rp2=${NETIFS[p6]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + switch_create + + ip link add name v1 type veth peer name v2 + ip link add name v3 type veth peer name v4 + vrp2_create + ns1_create + ns2_create + + r1_mac=$(in_ns ns1 mac_get w2) + r2_mac=$(in_ns ns2 mac_get w2) + h2_mac=$(mac_get $h2) +} + +cleanup() +{ + pre_cleanup + + ns2_destroy + ns1_destroy + vrp2_destroy + ip link del dev v3 + ip link del dev v1 + + switch_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +# For the first round of tests, vx10 and vx20 were the first devices to get +# attached to the bridge, and that at the point that the local IP is already +# configured. Try the other scenario of attaching these devices to a bridge +# that already has local ports members, and only then assign the local IP. +reapply_config() +{ + log_info "Reapplying configuration" + + bridge fdb del dev vx20 00:00:00:00:00:00 dst 192.0.2.50 self + bridge fdb del dev vx20 00:00:00:00:00:00 dst 192.0.2.34 self + + bridge fdb del dev vx10 00:00:00:00:00:00 dst 192.0.2.50 self + bridge fdb del dev vx10 00:00:00:00:00:00 dst 192.0.2.34 self + + ip link set dev vx20 nomaster + ip link set dev vx10 nomaster + + rp1_unset_addr + sleep 5 + + ip link set dev vx10 master br1 + bridge vlan add vid 10 dev vx10 pvid untagged + + ip link set dev vx20 master br1 + bridge vlan add vid 20 dev vx20 pvid untagged + + bridge fdb append dev vx10 00:00:00:00:00:00 dst 192.0.2.34 self + bridge fdb append dev vx10 00:00:00:00:00:00 dst 192.0.2.50 self + + bridge fdb append dev vx20 00:00:00:00:00:00 dst 192.0.2.34 self + bridge fdb append dev vx20 00:00:00:00:00:00 dst 192.0.2.50 self + + rp1_set_addr + sleep 5 +} + +ping_ipv4() +{ + ping_test $h1.10 192.0.2.2 ": local->local vid 10" + ping_test $h1.20 198.51.100.2 ": local->local vid 20" + ping_test $h1.10 192.0.2.3 ": local->remote 1 vid 10" + ping_test $h1.10 192.0.2.4 ": local->remote 2 vid 10" + ping_test $h1.20 198.51.100.3 ": local->remote 1 vid 20" + ping_test $h1.20 198.51.100.4 ": local->remote 2 vid 20" +} + +maybe_in_ns() +{ + echo ${1:+in_ns} $1 +} + +__flood_counter_add_del() +{ + local add_del=$1; shift + local dev=$1; shift + local ns=$1; shift + + # Putting the ICMP capture both to HW and to SW will end up + # double-counting the packets that are trapped to slow path, such as for + # the unicast test. Adding either skip_hw or skip_sw fixes this problem, + # but with skip_hw, the flooded packets are not counted at all, because + # those are dropped due to MAC address mismatch; and skip_sw is a no-go + # for veth-based topologies. + # + # So try to install with skip_sw and fall back to skip_sw if that fails. + + $(maybe_in_ns $ns) __icmp_capture_add_del \ + $add_del 100 "" $dev skip_sw 2>/dev/null || \ + $(maybe_in_ns $ns) __icmp_capture_add_del \ + $add_del 100 "" $dev skip_hw +} + +flood_counter_install() +{ + __flood_counter_add_del add "$@" +} + +flood_counter_uninstall() +{ + __flood_counter_add_del del "$@" +} + +flood_fetch_stat() +{ + local dev=$1; shift + local ns=$1; shift + + $(maybe_in_ns $ns) tc_rule_stats_get $dev 100 ingress +} + +flood_fetch_stats() +{ + local counters=("${@}") + local counter + + for counter in "${counters[@]}"; do + flood_fetch_stat $counter + done +} + +vxlan_flood_test() +{ + local mac=$1; shift + local dst=$1; shift + local vid=$1; shift + local -a expects=("${@}") + + local -a counters=($h2 "vx10 ns1" "vx20 ns1" "vx10 ns2" "vx20 ns2") + local counter + local key + + # Packets reach the local host tagged whereas they reach the VxLAN + # devices untagged. In order to be able to use the same filter for + # all counters, make sure the packets also reach the local host + # untagged + bridge vlan add vid $vid dev $swp2 untagged + for counter in "${counters[@]}"; do + flood_counter_install $counter + done + + local -a t0s=($(flood_fetch_stats "${counters[@]}")) + $MZ $h1 -Q $vid -c 10 -d 100msec -p 64 -b $mac -B $dst -t icmp -q + sleep 1 + local -a t1s=($(flood_fetch_stats "${counters[@]}")) + + for key in ${!t0s[@]}; do + local delta=$((t1s[$key] - t0s[$key])) + local expect=${expects[$key]} + + ((expect == delta)) + check_err $? "${counters[$key]}: Expected to capture $expect packets, got $delta." + done + + for counter in "${counters[@]}"; do + flood_counter_uninstall $counter + done + bridge vlan add vid $vid dev $swp2 +} + +__test_flood() +{ + local mac=$1; shift + local dst=$1; shift + local vid=$1; shift + local what=$1; shift + local -a expects=("${@}") + + RET=0 + + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: $what" +} + +test_flood() +{ + __test_flood de:ad:be:ef:13:37 192.0.2.100 10 "flood vlan 10" \ + 10 10 0 10 0 + __test_flood ca:fe:be:ef:13:37 198.51.100.100 20 "flood vlan 20" \ + 10 0 10 0 10 +} + +vxlan_fdb_add_del() +{ + local add_del=$1; shift + local vid=$1; shift + local mac=$1; shift + local dev=$1; shift + local dst=$1; shift + + bridge fdb $add_del dev $dev $mac self static permanent \ + ${dst:+dst} $dst 2>/dev/null + bridge fdb $add_del dev $dev $mac master static vlan $vid 2>/dev/null +} + +__test_unicast() +{ + local mac=$1; shift + local dst=$1; shift + local hit_idx=$1; shift + local vid=$1; shift + local what=$1; shift + + RET=0 + + local -a expects=(0 0 0 0 0) + expects[$hit_idx]=10 + + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: $what" +} + +test_unicast() +{ + local -a targets=("$h2_mac $h2" + "$r1_mac vx10 192.0.2.34" + "$r2_mac vx10 192.0.2.50") + local target + + log_info "unicast vlan 10" + + for target in "${targets[@]}"; do + vxlan_fdb_add_del add 10 $target + done + + __test_unicast $h2_mac 192.0.2.2 0 10 "local MAC unicast" + __test_unicast $r1_mac 192.0.2.3 1 10 "remote MAC 1 unicast" + __test_unicast $r2_mac 192.0.2.4 3 10 "remote MAC 2 unicast" + + for target in "${targets[@]}"; do + vxlan_fdb_add_del del 10 $target + done + + log_info "unicast vlan 20" + + targets=("$h2_mac $h2" "$r1_mac vx20 192.0.2.34" \ + "$r2_mac vx20 192.0.2.50") + + for target in "${targets[@]}"; do + vxlan_fdb_add_del add 20 $target + done + + __test_unicast $h2_mac 198.51.100.2 0 20 "local MAC unicast" + __test_unicast $r1_mac 198.51.100.3 2 20 "remote MAC 1 unicast" + __test_unicast $r2_mac 198.51.100.4 4 20 "remote MAC 2 unicast" + + for target in "${targets[@]}"; do + vxlan_fdb_add_del del 20 $target + done +} + +test_pvid() +{ + local -a expects=(0 0 0 0 0) + local mac=de:ad:be:ef:13:37 + local dst=192.0.2.100 + local vid=10 + + # Check that flooding works + RET=0 + + expects[0]=10; expects[1]=10; expects[3]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood before pvid off" + + # Toggle PVID off and test that flood to remote hosts does not work + RET=0 + + bridge vlan add vid 10 dev vx10 + + expects[0]=10; expects[1]=0; expects[3]=0 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood after pvid off" + + # Toggle PVID on and test that flood to remote hosts does work + RET=0 + + bridge vlan add vid 10 dev vx10 pvid untagged + + expects[0]=10; expects[1]=10; expects[3]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood after pvid on" + + # Add a new VLAN and test that it does not affect flooding + RET=0 + + bridge vlan add vid 30 dev vx10 + + expects[0]=10; expects[1]=10; expects[3]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + bridge vlan del vid 30 dev vx10 + + log_test "VXLAN: flood after vlan add" + + # Remove currently mapped VLAN and test that flood to remote hosts does + # not work + RET=0 + + bridge vlan del vid 10 dev vx10 + + expects[0]=10; expects[1]=0; expects[3]=0 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood after vlan delete" + + # Re-add the VLAN and test that flood to remote hosts does work + RET=0 + + bridge vlan add vid 10 dev vx10 pvid untagged + + expects[0]=10; expects[1]=10; expects[3]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood after vlan re-add" +} + +vxlan_ping_test() +{ + local ping_dev=$1; shift + local ping_dip=$1; shift + local ping_args=$1; shift + local capture_dev=$1; shift + local capture_dir=$1; shift + local capture_pref=$1; shift + local expect=$1; shift + + local t0=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir) + ping_do $ping_dev $ping_dip "$ping_args" + local t1=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir) + local delta=$((t1 - t0)) + + # Tolerate a couple stray extra packets. + ((expect <= delta && delta <= expect + 2)) + check_err $? "$capture_dev: Expected to capture $expect packets, got $delta." +} + +__test_learning() +{ + local -a expects=(0 0 0 0 0) + local mac=$1; shift + local dst=$1; shift + local vid=$1; shift + local idx1=$1; shift + local idx2=$1; shift + local vx=vx$vid + + # Check that flooding works + RET=0 + + expects[0]=10; expects[$idx1]=10; expects[$idx2]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood before learning" + + # Send a packet with source mac set to $mac from host w2 and check that + # a corresponding entry is created in the VxLAN device + RET=0 + + in_ns ns1 $MZ w2 -Q $vid -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff \ + -B $dst -t icmp -q + sleep 1 + + bridge fdb show brport $vx | grep $mac | grep -q self + check_err $? + bridge fdb show brport $vx | grep $mac | grep "vlan $vid" \ + | grep -q -v self + check_err $? + + log_test "VXLAN: show learned FDB entry" + + # Repeat first test and check that packets only reach host w2 in ns1 + RET=0 + + expects[0]=0; expects[$idx1]=10; expects[$idx2]=0 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: learned FDB entry" + + # Delete the learned FDB entry from the VxLAN and bridge devices and + # check that packets are flooded + RET=0 + + bridge fdb del dev $vx $mac master self vlan $vid + sleep 1 + + expects[0]=10; expects[$idx1]=10; expects[$idx2]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: deletion of learned FDB entry" + + # Re-learn the first FDB entry and check that it is correctly aged-out + RET=0 + + in_ns ns1 $MZ w2 -Q $vid -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff \ + -B $dst -t icmp -q + sleep 1 + + bridge fdb show brport $vx | grep $mac | grep -q self + check_err $? + bridge fdb show brport $vx | grep $mac | grep "vlan $vid" \ + | grep -q -v self + check_err $? + + expects[0]=0; expects[$idx1]=10; expects[$idx2]=0 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + sleep 20 + + bridge fdb show brport $vx | grep $mac | grep -q self + check_fail $? + bridge fdb show brport $vx | grep $mac | grep "vlan $vid" \ + | grep -q -v self + check_fail $? + + expects[0]=10; expects[$idx1]=10; expects[$idx2]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: Ageing of learned FDB entry" + + # Toggle learning on the bridge port and check that the bridge's FDB + # is populated only when it should + RET=0 + + ip link set dev $vx type bridge_slave learning off + + in_ns ns1 $MZ w2 -Q $vid -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff \ + -B $dst -t icmp -q + sleep 1 + + bridge fdb show brport $vx | grep $mac | grep "vlan $vid" \ + | grep -q -v self + check_fail $? + + ip link set dev $vx type bridge_slave learning on + + in_ns ns1 $MZ w2 -Q $vid -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff \ + -B $dst -t icmp -q + sleep 1 + + bridge fdb show brport $vx | grep $mac | grep "vlan $vid" \ + | grep -q -v self + check_err $? + + log_test "VXLAN: learning toggling on bridge port" +} + +test_learning() +{ + local mac=de:ad:be:ef:13:37 + local dst=192.0.2.100 + local vid=10 + + # Enable learning on the VxLAN devices and set ageing time to 10 seconds + ip link set dev br1 type bridge ageing_time 1000 + ip link set dev vx10 type vxlan ageing 10 + ip link set dev vx10 type vxlan learning + ip link set dev vx20 type vxlan ageing 10 + ip link set dev vx20 type vxlan learning + reapply_config + + log_info "learning vlan 10" + + __test_learning $mac $dst $vid 1 3 + + log_info "learning vlan 20" + + mac=ca:fe:be:ef:13:37 + dst=198.51.100.100 + vid=20 + + __test_learning $mac $dst $vid 2 4 + + # Restore previous settings + ip link set dev vx20 type vxlan nolearning + ip link set dev vx20 type vxlan ageing 300 + ip link set dev vx10 type vxlan nolearning + ip link set dev vx10 type vxlan ageing 300 + ip link set dev br1 type bridge ageing_time 30000 + reapply_config +} + +test_all() +{ + log_info "Running tests with UDP port $VXPORT" + tests_run +} + +trap cleanup EXIT + +setup_prepare +setup_wait +test_all + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472.sh new file mode 100755 index 000000000000..b1b2d1a3164f --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472.sh @@ -0,0 +1,10 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# A wrapper to run VXLAN tests with an unusual port number. + +VXPORT=8472 +ALL_TESTS=" + ping_ipv4 +" +source vxlan_bridge_1q.sh diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c index 406cc70c571d..4b02933cab8a 100644 --- a/tools/testing/selftests/net/msg_zerocopy.c +++ b/tools/testing/selftests/net/msg_zerocopy.c @@ -651,12 +651,13 @@ static void do_flush_datagram(int fd, int type) static void do_rx(int domain, int type, int protocol) { + const int cfg_receiver_wait_ms = 400; uint64_t tstop; int fd; fd = do_setup_rx(domain, type, protocol); - tstop = gettimeofday_ms() + cfg_runtime_ms; + tstop = gettimeofday_ms() + cfg_runtime_ms + cfg_receiver_wait_ms; do { if (type == SOCK_STREAM) do_flush_tcp(fd); diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh index c43c6debda06..825ffec85cea 100755 --- a/tools/testing/selftests/net/msg_zerocopy.sh +++ b/tools/testing/selftests/net/msg_zerocopy.sh @@ -25,6 +25,8 @@ readonly path_sysctl_mem="net.core.optmem_max" if [[ "$#" -eq "0" ]]; then $0 4 tcp -t 1 $0 6 tcp -t 1 + $0 4 udp -t 1 + $0 6 udp -t 1 echo "OK. All tests passed" exit 0 fi diff --git a/tools/testing/selftests/net/reuseport_addr_any.c b/tools/testing/selftests/net/reuseport_addr_any.c new file mode 100644 index 000000000000..4ac3e24b7cfc --- /dev/null +++ b/tools/testing/selftests/net/reuseport_addr_any.c @@ -0,0 +1,309 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Test that sockets listening on a specific address are preferred + * over sockets listening on addr_any. + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <linux/dccp.h> +#include <linux/in.h> +#include <linux/unistd.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/epoll.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <unistd.h> + +static const char *IP4_ADDR = "127.0.0.1"; +static const char *IP6_ADDR = "::1"; +static const char *IP4_MAPPED6 = "::ffff:127.0.0.1"; + +static const int PORT = 8888; + +static void build_rcv_fd(int family, int proto, int *rcv_fds, int count, + const char *addr_str) +{ + struct sockaddr_in addr4 = {0}; + struct sockaddr_in6 addr6 = {0}; + struct sockaddr *addr; + int opt, i, sz; + + memset(&addr, 0, sizeof(addr)); + + switch (family) { + case AF_INET: + addr4.sin_family = family; + if (!addr_str) + addr4.sin_addr.s_addr = htonl(INADDR_ANY); + else if (!inet_pton(family, addr_str, &addr4.sin_addr.s_addr)) + error(1, errno, "inet_pton failed: %s", addr_str); + addr4.sin_port = htons(PORT); + sz = sizeof(addr4); + addr = (struct sockaddr *)&addr4; + break; + case AF_INET6: + addr6.sin6_family = AF_INET6; + if (!addr_str) + addr6.sin6_addr = in6addr_any; + else if (!inet_pton(family, addr_str, &addr6.sin6_addr)) + error(1, errno, "inet_pton failed: %s", addr_str); + addr6.sin6_port = htons(PORT); + sz = sizeof(addr6); + addr = (struct sockaddr *)&addr6; + break; + default: + error(1, 0, "Unsupported family %d", family); + } + + for (i = 0; i < count; ++i) { + rcv_fds[i] = socket(family, proto, 0); + if (rcv_fds[i] < 0) + error(1, errno, "failed to create receive socket"); + + opt = 1; + if (setsockopt(rcv_fds[i], SOL_SOCKET, SO_REUSEPORT, &opt, + sizeof(opt))) + error(1, errno, "failed to set SO_REUSEPORT"); + + if (bind(rcv_fds[i], addr, sz)) + error(1, errno, "failed to bind receive socket"); + + if (proto == SOCK_STREAM && listen(rcv_fds[i], 10)) + error(1, errno, "tcp: failed to listen on receive port"); + else if (proto == SOCK_DCCP) { + if (setsockopt(rcv_fds[i], SOL_DCCP, + DCCP_SOCKOPT_SERVICE, + &(int) {htonl(42)}, sizeof(int))) + error(1, errno, "failed to setsockopt"); + + if (listen(rcv_fds[i], 10)) + error(1, errno, "dccp: failed to listen on receive port"); + } + } +} + +static int connect_and_send(int family, int proto) +{ + struct sockaddr_in saddr4 = {0}; + struct sockaddr_in daddr4 = {0}; + struct sockaddr_in6 saddr6 = {0}; + struct sockaddr_in6 daddr6 = {0}; + struct sockaddr *saddr, *daddr; + int fd, sz; + + switch (family) { + case AF_INET: + saddr4.sin_family = AF_INET; + saddr4.sin_addr.s_addr = htonl(INADDR_ANY); + saddr4.sin_port = 0; + + daddr4.sin_family = AF_INET; + if (!inet_pton(family, IP4_ADDR, &daddr4.sin_addr.s_addr)) + error(1, errno, "inet_pton failed: %s", IP4_ADDR); + daddr4.sin_port = htons(PORT); + + sz = sizeof(saddr4); + saddr = (struct sockaddr *)&saddr4; + daddr = (struct sockaddr *)&daddr4; + break; + case AF_INET6: + saddr6.sin6_family = AF_INET6; + saddr6.sin6_addr = in6addr_any; + + daddr6.sin6_family = AF_INET6; + if (!inet_pton(family, IP6_ADDR, &daddr6.sin6_addr)) + error(1, errno, "inet_pton failed: %s", IP6_ADDR); + daddr6.sin6_port = htons(PORT); + + sz = sizeof(saddr6); + saddr = (struct sockaddr *)&saddr6; + daddr = (struct sockaddr *)&daddr6; + break; + default: + error(1, 0, "Unsupported family %d", family); + } + + fd = socket(family, proto, 0); + if (fd < 0) + error(1, errno, "failed to create send socket"); + + if (proto == SOCK_DCCP && + setsockopt(fd, SOL_DCCP, DCCP_SOCKOPT_SERVICE, + &(int){htonl(42)}, sizeof(int))) + error(1, errno, "failed to setsockopt"); + + if (bind(fd, saddr, sz)) + error(1, errno, "failed to bind send socket"); + + if (connect(fd, daddr, sz)) + error(1, errno, "failed to connect send socket"); + + if (send(fd, "a", 1, 0) < 0) + error(1, errno, "failed to send message"); + + return fd; +} + +static int receive_once(int epfd, int proto) +{ + struct epoll_event ev; + int i, fd; + char buf[8]; + + i = epoll_wait(epfd, &ev, 1, 3); + if (i < 0) + error(1, errno, "epoll_wait failed"); + + if (proto == SOCK_STREAM || proto == SOCK_DCCP) { + fd = accept(ev.data.fd, NULL, NULL); + if (fd < 0) + error(1, errno, "failed to accept"); + i = recv(fd, buf, sizeof(buf), 0); + close(fd); + } else { + i = recv(ev.data.fd, buf, sizeof(buf), 0); + } + + if (i < 0) + error(1, errno, "failed to recv"); + + return ev.data.fd; +} + +static void test(int *rcv_fds, int count, int family, int proto, int fd) +{ + struct epoll_event ev; + int epfd, i, send_fd, recv_fd; + + epfd = epoll_create(1); + if (epfd < 0) + error(1, errno, "failed to create epoll"); + + ev.events = EPOLLIN; + for (i = 0; i < count; ++i) { + ev.data.fd = rcv_fds[i]; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, rcv_fds[i], &ev)) + error(1, errno, "failed to register sock epoll"); + } + + send_fd = connect_and_send(family, proto); + + recv_fd = receive_once(epfd, proto); + if (recv_fd != fd) + error(1, 0, "received on an unexpected socket"); + + close(send_fd); + close(epfd); +} + +int main(void) +{ + /* Below we test that a socket listening on a specific address + * is always selected in preference over a socket listening + * on addr_any. Bugs where this is not the case often result + * in sockets created first or last to get picked. So below + * we make sure that there are always addr_any sockets created + * before and after a specific socket is created. + */ + int rcv_fds[10], i; + + fprintf(stderr, "---- UDP IPv4 ----\n"); + build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 2, 2, NULL); + build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds + 4, 1, IP4_ADDR); + build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds + 5, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 7, 2, NULL); + test(rcv_fds, 9, AF_INET, SOCK_DGRAM, rcv_fds[4]); + for (i = 0; i < 9; ++i) + close(rcv_fds[i]); + + fprintf(stderr, "---- UDP IPv6 ----\n"); + build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 2, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 4, 1, IP6_ADDR); + build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds + 5, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 7, 2, NULL); + test(rcv_fds, 9, AF_INET6, SOCK_DGRAM, rcv_fds[4]); + for (i = 0; i < 9; ++i) + close(rcv_fds[i]); + + fprintf(stderr, "---- UDP IPv4 mapped to IPv6 ----\n"); + build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 2, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 4, 1, IP4_MAPPED6); + build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds + 5, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 7, 2, NULL); + test(rcv_fds, 9, AF_INET, SOCK_DGRAM, rcv_fds[4]); + for (i = 0; i < 9; ++i) + close(rcv_fds[i]); + + fprintf(stderr, "---- TCP IPv4 ----\n"); + build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 2, 2, NULL); + build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds + 4, 1, IP4_ADDR); + build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds + 5, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 7, 2, NULL); + test(rcv_fds, 9, AF_INET, SOCK_STREAM, rcv_fds[4]); + for (i = 0; i < 9; ++i) + close(rcv_fds[i]); + + fprintf(stderr, "---- TCP IPv6 ----\n"); + build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 2, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 4, 1, IP6_ADDR); + build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds + 5, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 7, 2, NULL); + test(rcv_fds, 9, AF_INET6, SOCK_STREAM, rcv_fds[4]); + for (i = 0; i < 9; ++i) + close(rcv_fds[i]); + + fprintf(stderr, "---- TCP IPv4 mapped to IPv6 ----\n"); + build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 2, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 4, 1, IP4_MAPPED6); + build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds + 5, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 7, 2, NULL); + test(rcv_fds, 9, AF_INET, SOCK_STREAM, rcv_fds[4]); + for (i = 0; i < 9; ++i) + close(rcv_fds[i]); + + fprintf(stderr, "---- DCCP IPv4 ----\n"); + build_rcv_fd(AF_INET, SOCK_DCCP, rcv_fds, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DCCP, rcv_fds + 2, 2, NULL); + build_rcv_fd(AF_INET, SOCK_DCCP, rcv_fds + 4, 1, IP4_ADDR); + build_rcv_fd(AF_INET, SOCK_DCCP, rcv_fds + 5, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DCCP, rcv_fds + 7, 2, NULL); + test(rcv_fds, 9, AF_INET, SOCK_DCCP, rcv_fds[4]); + for (i = 0; i < 9; ++i) + close(rcv_fds[i]); + + fprintf(stderr, "---- DCCP IPv6 ----\n"); + build_rcv_fd(AF_INET, SOCK_DCCP, rcv_fds, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DCCP, rcv_fds + 2, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DCCP, rcv_fds + 4, 1, IP6_ADDR); + build_rcv_fd(AF_INET, SOCK_DCCP, rcv_fds + 5, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DCCP, rcv_fds + 7, 2, NULL); + test(rcv_fds, 9, AF_INET6, SOCK_DCCP, rcv_fds[4]); + for (i = 0; i < 9; ++i) + close(rcv_fds[i]); + + fprintf(stderr, "---- DCCP IPv4 mapped to IPv6 ----\n"); + build_rcv_fd(AF_INET, SOCK_DCCP, rcv_fds, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DCCP, rcv_fds + 2, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DCCP, rcv_fds + 4, 1, IP4_MAPPED6); + build_rcv_fd(AF_INET, SOCK_DCCP, rcv_fds + 5, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DCCP, rcv_fds + 7, 2, NULL); + test(rcv_fds, 9, AF_INET, SOCK_DCCP, rcv_fds[4]); + for (i = 0; i < 9; ++i) + close(rcv_fds[i]); + + fprintf(stderr, "SUCCESS\n"); + return 0; +} diff --git a/tools/testing/selftests/net/reuseport_addr_any.sh b/tools/testing/selftests/net/reuseport_addr_any.sh new file mode 100755 index 000000000000..104592f62ad4 --- /dev/null +++ b/tools/testing/selftests/net/reuseport_addr_any.sh @@ -0,0 +1,4 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +./in_netns.sh ./reuseport_addr_any diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index e101af52d1d6..bb3436f8807f 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -955,6 +955,58 @@ kci_test_ip6erspan() ip netns del "$testns" } +kci_test_fdb_get() +{ + IP="ip -netns testns" + BRIDGE="bridge -netns testns" + brdev="test-br0" + vxlandev="vxlan10" + test_mac=de:ad:be:ef:13:37 + localip="10.0.2.2" + dstip="10.0.2.3" + ret=0 + + bridge fdb help 2>&1 |grep -q 'bridge fdb get' + if [ $? -ne 0 ];then + echo "SKIP: fdb get tests: iproute2 too old" + return $ksft_skip + fi + + ip netns add testns + if [ $? -ne 0 ]; then + echo "SKIP fdb get tests: cannot add net namespace $testns" + return $ksft_skip + fi + + $IP link add "$vxlandev" type vxlan id 10 local $localip \ + dstport 4789 2>/dev/null + check_err $? + $IP link add name "$brdev" type bridge &>/dev/null + check_err $? + $IP link set dev "$vxlandev" master "$brdev" &>/dev/null + check_err $? + $BRIDGE fdb add $test_mac dev "$vxlandev" master &>/dev/null + check_err $? + $BRIDGE fdb add $test_mac dev "$vxlandev" dst $dstip self &>/dev/null + check_err $? + + $BRIDGE fdb get $test_mac brport "$vxlandev" 2>/dev/null | grep -q "dev $vxlandev master $brdev" + check_err $? + $BRIDGE fdb get $test_mac br "$brdev" 2>/dev/null | grep -q "dev $vxlandev master $brdev" + check_err $? + $BRIDGE fdb get $test_mac dev "$vxlandev" self 2>/dev/null | grep -q "dev $vxlandev dst $dstip" + check_err $? + + ip netns del testns &>/dev/null + + if [ $ret -ne 0 ]; then + echo "FAIL: bridge fdb get" + return 1 + fi + + echo "PASS: bridge fdb get" +} + kci_test_rtnl() { kci_add_dummy @@ -979,6 +1031,7 @@ kci_test_rtnl() kci_test_macsec kci_test_ipsec kci_test_ipsec_offload + kci_test_fdb_get kci_del_dummy } diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests index bea079edc278..2dc95fda7ef7 100755 --- a/tools/testing/selftests/net/run_afpackettests +++ b/tools/testing/selftests/net/run_afpackettests @@ -25,3 +25,13 @@ if [ $? -ne 0 ]; then else echo "[PASS]" fi + +echo "--------------------" +echo "running txring_overwrite test" +echo "--------------------" +./in_netns.sh ./txring_overwrite +if [ $? -ne 0 ]; then + echo "[FAIL]" +else + echo "[PASS]" +fi diff --git a/tools/testing/selftests/net/test_vxlan_under_vrf.sh b/tools/testing/selftests/net/test_vxlan_under_vrf.sh new file mode 100755 index 000000000000..09f9ed92cbe4 --- /dev/null +++ b/tools/testing/selftests/net/test_vxlan_under_vrf.sh @@ -0,0 +1,129 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test is for checking VXLAN underlay in a non-default VRF. +# +# It simulates two hypervisors running a VM each using four network namespaces: +# two for the HVs, two for the VMs. +# A small VXLAN tunnel is made between the two hypervisors to have the two vms +# in the same virtual L2: +# +# +-------------------+ +-------------------+ +# | | | | +# | vm-1 netns | | vm-2 netns | +# | | | | +# | +-------------+ | | +-------------+ | +# | | veth-hv | | | | veth-hv | | +# | | 10.0.0.1/24 | | | | 10.0.0.2/24 | | +# | +-------------+ | | +-------------+ | +# | . | | . | +# +-------------------+ +-------------------+ +# . . +# . . +# . . +# +-----------------------------------+ +------------------------------------+ +# | . | | . | +# | +----------+ | | +----------+ | +# | | veth-tap | | | | veth-tap | | +# | +----+-----+ | | +----+-----+ | +# | | | | | | +# | +--+--+ +--------------+ | | +--------------+ +--+--+ | +# | | br0 | | vrf-underlay | | | | vrf-underlay | | br0 | | +# | +--+--+ +-------+------+ | | +------+-------+ +--+--+ | +# | | | | | | | | +# | +---+----+ +-------+-------+ | | +-------+-------+ +---+----+ | +# | | vxlan0 |....| veth0 |.|...|.| veth0 |....| vxlan0 | | +# | +--------+ | 172.16.0.1/24 | | | | 172.16.0.2/24 | +--------+ | +# | +---------------+ | | +---------------+ | +# | | | | +# | hv-1 netns | | hv-2 netns | +# | | | | +# +-----------------------------------+ +------------------------------------+ +# +# This tests both the connectivity between vm-1 and vm-2, and that the underlay +# can be moved in and out of the vrf by unsetting and setting veth0's master. + +set -e + +cleanup() { + ip link del veth-hv-1 2>/dev/null || true + ip link del veth-tap 2>/dev/null || true + + for ns in hv-1 hv-2 vm-1 vm-2; do + ip netns del $ns || true + done +} + +# Clean start +cleanup &> /dev/null + +[[ $1 == "clean" ]] && exit 0 + +trap cleanup EXIT + +# Setup "Hypervisors" simulated with netns +ip link add veth-hv-1 type veth peer name veth-hv-2 +setup-hv-networking() { + hv=$1 + + ip netns add hv-$hv + ip link set veth-hv-$hv netns hv-$hv + ip -netns hv-$hv link set veth-hv-$hv name veth0 + + ip -netns hv-$hv link add vrf-underlay type vrf table 1 + ip -netns hv-$hv link set vrf-underlay up + ip -netns hv-$hv addr add 172.16.0.$hv/24 dev veth0 + ip -netns hv-$hv link set veth0 up + + ip -netns hv-$hv link add br0 type bridge + ip -netns hv-$hv link set br0 up + + ip -netns hv-$hv link add vxlan0 type vxlan id 10 local 172.16.0.$hv dev veth0 dstport 4789 + ip -netns hv-$hv link set vxlan0 master br0 + ip -netns hv-$hv link set vxlan0 up +} +setup-hv-networking 1 +setup-hv-networking 2 + +# Check connectivity between HVs by pinging hv-2 from hv-1 +echo -n "Checking HV connectivity " +ip netns exec hv-1 ping -c 1 -W 1 172.16.0.2 &> /dev/null || (echo "[FAIL]"; false) +echo "[ OK ]" + +# Setups a "VM" simulated by a netns an a veth pair +setup-vm() { + id=$1 + + ip netns add vm-$id + ip link add veth-tap type veth peer name veth-hv + + ip link set veth-tap netns hv-$id + ip -netns hv-$id link set veth-tap master br0 + ip -netns hv-$id link set veth-tap up + + ip link set veth-hv netns vm-$id + ip -netns vm-$id addr add 10.0.0.$id/24 dev veth-hv + ip -netns vm-$id link set veth-hv up +} +setup-vm 1 +setup-vm 2 + +# Setup VTEP routes to make ARP work +bridge -netns hv-1 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.2 self permanent +bridge -netns hv-2 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.1 self permanent + +echo -n "Check VM connectivity through VXLAN (underlay in the default VRF) " +ip netns exec vm-1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false) +echo "[ OK ]" + +# Move the underlay to a non-default VRF +ip -netns hv-1 link set veth0 vrf vrf-underlay +ip -netns hv-1 link set veth0 down +ip -netns hv-1 link set veth0 up +ip -netns hv-2 link set veth0 vrf vrf-underlay +ip -netns hv-2 link set veth0 down +ip -netns hv-2 link set veth0 up + +echo -n "Check VM connectivity through VXLAN (underlay in a VRF) " +ip netns exec vm-1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false) +echo "[ OK ]" diff --git a/tools/testing/selftests/net/txring_overwrite.c b/tools/testing/selftests/net/txring_overwrite.c new file mode 100644 index 000000000000..fd8b1c663c39 --- /dev/null +++ b/tools/testing/selftests/net/txring_overwrite.c @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Verify that consecutive sends over packet tx_ring are mirrored + * with their original content intact. + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <assert.h> +#include <error.h> +#include <errno.h> +#include <fcntl.h> +#include <linux/filter.h> +#include <linux/if_packet.h> +#include <net/ethernet.h> +#include <net/if.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/udp.h> +#include <poll.h> +#include <pthread.h> +#include <sched.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/utsname.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +const int eth_off = TPACKET_HDRLEN - sizeof(struct sockaddr_ll); +const int cfg_frame_size = 1000; + +static void build_packet(void *buffer, size_t blen, char payload_char) +{ + struct udphdr *udph; + struct ethhdr *eth; + struct iphdr *iph; + size_t off = 0; + + memset(buffer, 0, blen); + + eth = buffer; + eth->h_proto = htons(ETH_P_IP); + + off += sizeof(*eth); + iph = buffer + off; + iph->ttl = 8; + iph->ihl = 5; + iph->version = 4; + iph->saddr = htonl(INADDR_LOOPBACK); + iph->daddr = htonl(INADDR_LOOPBACK + 1); + iph->protocol = IPPROTO_UDP; + iph->tot_len = htons(blen - off); + iph->check = 0; + + off += sizeof(*iph); + udph = buffer + off; + udph->dest = htons(8000); + udph->source = htons(8001); + udph->len = htons(blen - off); + udph->check = 0; + + off += sizeof(*udph); + memset(buffer + off, payload_char, blen - off); +} + +static int setup_rx(void) +{ + int fdr; + + fdr = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_IP)); + if (fdr == -1) + error(1, errno, "socket r"); + + return fdr; +} + +static int setup_tx(char **ring) +{ + struct sockaddr_ll laddr = {}; + struct tpacket_req req = {}; + int fdt; + + fdt = socket(PF_PACKET, SOCK_RAW, 0); + if (fdt == -1) + error(1, errno, "socket t"); + + laddr.sll_family = AF_PACKET; + laddr.sll_protocol = htons(0); + laddr.sll_ifindex = if_nametoindex("lo"); + if (!laddr.sll_ifindex) + error(1, errno, "if_nametoindex"); + + if (bind(fdt, (void *)&laddr, sizeof(laddr))) + error(1, errno, "bind fdt"); + + req.tp_block_size = getpagesize(); + req.tp_block_nr = 1; + req.tp_frame_size = getpagesize(); + req.tp_frame_nr = 1; + + if (setsockopt(fdt, SOL_PACKET, PACKET_TX_RING, + (void *)&req, sizeof(req))) + error(1, errno, "setsockopt ring"); + + *ring = mmap(0, req.tp_block_size * req.tp_block_nr, + PROT_READ | PROT_WRITE, MAP_SHARED, fdt, 0); + if (!*ring) + error(1, errno, "mmap"); + + return fdt; +} + +static void send_pkt(int fdt, void *slot, char payload_char) +{ + struct tpacket_hdr *header = slot; + int ret; + + while (header->tp_status != TP_STATUS_AVAILABLE) + usleep(1000); + + build_packet(slot + eth_off, cfg_frame_size, payload_char); + + header->tp_len = cfg_frame_size; + header->tp_status = TP_STATUS_SEND_REQUEST; + + ret = sendto(fdt, NULL, 0, 0, NULL, 0); + if (ret == -1) + error(1, errno, "kick tx"); +} + +static int read_verify_pkt(int fdr, char payload_char) +{ + char buf[100]; + int ret; + + ret = read(fdr, buf, sizeof(buf)); + if (ret != sizeof(buf)) + error(1, errno, "read"); + + if (buf[60] != payload_char) { + printf("wrong pattern: 0x%x != 0x%x\n", buf[60], payload_char); + return 1; + } + + printf("read: %c (0x%x)\n", buf[60], buf[60]); + return 0; +} + +int main(int argc, char **argv) +{ + const char payload_patterns[] = "ab"; + char *ring; + int fdr, fdt, ret = 0; + + fdr = setup_rx(); + fdt = setup_tx(&ring); + + send_pkt(fdt, ring, payload_patterns[0]); + send_pkt(fdt, ring, payload_patterns[1]); + + ret |= read_verify_pkt(fdr, payload_patterns[0]); + ret |= read_verify_pkt(fdr, payload_patterns[1]); + + if (close(fdt)) + error(1, errno, "close t"); + if (close(fdr)) + error(1, errno, "close r"); + + return ret; +} diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index e94ef8067173..aeac53a99aeb 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -91,6 +91,28 @@ run_one_nat() { wait $(jobs -p) } +run_one_2sock() { + # use 'rx' as separator between sender args and receiver args + local -r all="$@" + local -r tx_args=${all%rx*} + local -r rx_args=${all#*rx} + + cfg_veth + + ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -p 12345 & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} && \ + echo "ok" || \ + echo "failed" & + + # Hack: let bg programs complete the startup + sleep 0.1 + ./udpgso_bench_tx ${tx_args} -p 12345 + sleep 0.1 + # first UDP GSO socket should be closed at this point + ./udpgso_bench_tx ${tx_args} + wait $(jobs -p) +} + run_nat_test() { local -r args=$@ @@ -98,6 +120,13 @@ run_nat_test() { ./in_netns.sh $0 __subprocess_nat $2 rx -r $3 } +run_2sock_test() { + local -r args=$@ + + printf " %-40s" "$1" + ./in_netns.sh $0 __subprocess_2sock $2 rx -G -r $3 +} + run_all() { local -r core_args="-l 4" local -r ipv4_args="${core_args} -4 -D 192.168.1.1" @@ -120,6 +149,7 @@ run_all() { run_test "GRO with custom segment size cmsg" "${ipv4_args} -M 1 -s 14720 -S 500 " "-4 -n 1 -l 14720 -S 500" run_nat_test "bad GRO lookup" "${ipv4_args} -M 1 -s 14720 -S 0" "-n 10 -l 1472" + run_2sock_test "multiple GRO socks" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720 -S 1472" echo "ipv6" run_test "no GRO" "${ipv6_args} -M 10 -s 1400" "-n 10 -l 1400" @@ -130,6 +160,7 @@ run_all() { run_test "GRO with custom segment size cmsg" "${ipv6_args} -M 1 -s 14520 -S 500" "-n 1 -l 14520 -S 500" run_nat_test "bad GRO lookup" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 10 -l 1452" + run_2sock_test "multiple GRO socks" "${ipv6_args} -M 1 -s 14520 -S 0 " "-n 1 -l 14520 -S 1452" } if [ ! -f ../bpf/xdp_dummy.o ]; then @@ -145,4 +176,7 @@ elif [[ $1 == "__subprocess" ]]; then elif [[ $1 == "__subprocess_nat" ]]; then shift run_one_nat $@ +elif [[ $1 == "__subprocess_2sock" ]]; then + shift + run_one_2sock $@ fi diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh index 0f0628613f81..5670a9ffd8eb 100755 --- a/tools/testing/selftests/net/udpgso_bench.sh +++ b/tools/testing/selftests/net/udpgso_bench.sh @@ -35,6 +35,9 @@ run_udp() { echo "udp gso" run_in_netns ${args} -S 0 + + echo "udp gso zerocopy" + run_in_netns ${args} -S 0 -z } run_tcp() { diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile new file mode 100644 index 000000000000..47ed6cef93fb --- /dev/null +++ b/tools/testing/selftests/netfilter/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for netfilter selftests + +TEST_PROGS := nft_trans_stress.sh + +include ../lib.mk diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config new file mode 100644 index 000000000000..1017313e41a8 --- /dev/null +++ b/tools/testing/selftests/netfilter/config @@ -0,0 +1,2 @@ +CONFIG_NET_NS=y +NF_TABLES_INET=y diff --git a/tools/testing/selftests/netfilter/nft_trans_stress.sh b/tools/testing/selftests/netfilter/nft_trans_stress.sh new file mode 100755 index 000000000000..f1affd12c4b1 --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_trans_stress.sh @@ -0,0 +1,78 @@ +#!/bin/bash +# +# This test is for stress-testing the nf_tables config plane path vs. +# packet path processing: Make sure we never release rules that are +# still visible to other cpus. +# +# set -e + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +testns=testns1 +tables="foo bar baz quux" + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +tmp=$(mktemp) + +for table in $tables; do + echo add table inet "$table" >> "$tmp" + echo flush table inet "$table" >> "$tmp" + + echo "add chain inet $table INPUT { type filter hook input priority 0; }" >> "$tmp" + echo "add chain inet $table OUTPUT { type filter hook output priority 0; }" >> "$tmp" + for c in $(seq 1 400); do + chain=$(printf "chain%03u" "$c") + echo "add chain inet $table $chain" >> "$tmp" + done + + for c in $(seq 1 400); do + chain=$(printf "chain%03u" "$c") + for BASE in INPUT OUTPUT; do + echo "add rule inet $table $BASE counter jump $chain" >> "$tmp" + done + echo "add rule inet $table $chain counter return" >> "$tmp" + done +done + +ip netns add "$testns" +ip -netns "$testns" link set lo up + +lscpu | grep ^CPU\(s\): | ( read cpu cpunum ; +cpunum=$((cpunum-1)) +for i in $(seq 0 $cpunum);do + mask=$(printf 0x%x $((1<<$i))) + ip netns exec "$testns" taskset $mask ping -4 127.0.0.1 -fq > /dev/null & + ip netns exec "$testns" taskset $mask ping -6 ::1 -fq > /dev/null & +done) + +sleep 1 + +for i in $(seq 1 10) ; do ip netns exec "$testns" nft -f "$tmp" & done + +for table in $tables;do + randsleep=$((RANDOM%10)) + sleep $randsleep + ip netns exec "$testns" nft delete table inet $table 2>/dev/null +done + +randsleep=$((RANDOM%10)) +sleep $randsleep + +pkill -9 ping + +wait + +rm -f "$tmp" +ip netns del "$testns" diff --git a/tools/testing/selftests/powerpc/mm/wild_bctr.c b/tools/testing/selftests/powerpc/mm/wild_bctr.c index 1b0e9e9a2ddc..f2fa101c5a6a 100644 --- a/tools/testing/selftests/powerpc/mm/wild_bctr.c +++ b/tools/testing/selftests/powerpc/mm/wild_bctr.c @@ -47,8 +47,9 @@ static int ok(void) return 0; } -#define REG_POISON 0x5a5aUL -#define POISONED_REG(n) ((REG_POISON << 48) | ((n) << 32) | (REG_POISON << 16) | (n)) +#define REG_POISON 0x5a5a +#define POISONED_REG(n) ((((unsigned long)REG_POISON) << 48) | ((n) << 32) | \ + (((unsigned long)REG_POISON) << 16) | (n)) static inline void poison_regs(void) { @@ -105,6 +106,20 @@ static void dump_regs(void) } } +#ifdef _CALL_AIXDESC +struct opd { + unsigned long ip; + unsigned long toc; + unsigned long env; +}; +static struct opd bad_opd = { + .ip = BAD_NIP, +}; +#define BAD_FUNC (&bad_opd) +#else +#define BAD_FUNC BAD_NIP +#endif + int test_wild_bctr(void) { int (*func_ptr)(void); @@ -133,7 +148,7 @@ int test_wild_bctr(void) poison_regs(); - func_ptr = (int (*)(void))BAD_NIP; + func_ptr = (int (*)(void))BAD_FUNC; func_ptr(); FAIL_IF(1); /* we didn't segv? */ diff --git a/tools/testing/selftests/proc/proc-self-map-files-002.c b/tools/testing/selftests/proc/proc-self-map-files-002.c index 6f1f4a6e1ecb..85744425b08d 100644 --- a/tools/testing/selftests/proc/proc-self-map-files-002.c +++ b/tools/testing/selftests/proc/proc-self-map-files-002.c @@ -13,7 +13,7 @@ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -/* Test readlink /proc/self/map_files/... with address 0. */ +/* Test readlink /proc/self/map_files/... with minimum address. */ #include <errno.h> #include <sys/types.h> #include <sys/stat.h> @@ -47,6 +47,11 @@ static void fail(const char *fmt, unsigned long a, unsigned long b) int main(void) { const unsigned int PAGE_SIZE = sysconf(_SC_PAGESIZE); +#ifdef __arm__ + unsigned long va = 2 * PAGE_SIZE; +#else + unsigned long va = 0; +#endif void *p; int fd; unsigned long a, b; @@ -55,7 +60,7 @@ int main(void) if (fd == -1) return 1; - p = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE|MAP_FIXED, fd, 0); + p = mmap((void *)va, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE|MAP_FIXED, fd, 0); if (p == MAP_FAILED) { if (errno == EPERM) return 2; diff --git a/tools/testing/selftests/tc-testing/.gitignore b/tools/testing/selftests/tc-testing/.gitignore index 7a60b85e148f..c5cc160948b3 100644 --- a/tools/testing/selftests/tc-testing/.gitignore +++ b/tools/testing/selftests/tc-testing/.gitignore @@ -1,2 +1,5 @@ __pycache__/ *.pyc +plugins/ +*.xml +*.tap diff --git a/tools/testing/selftests/tc-testing/TdcPlugin.py b/tools/testing/selftests/tc-testing/TdcPlugin.py index 3ee9a6dacb52..1d9e279331eb 100644 --- a/tools/testing/selftests/tc-testing/TdcPlugin.py +++ b/tools/testing/selftests/tc-testing/TdcPlugin.py @@ -18,11 +18,12 @@ class TdcPlugin: if self.args.verbose > 1: print(' -- {}.post_suite'.format(self.sub_class)) - def pre_case(self, test_ordinal, testid): + def pre_case(self, test_ordinal, testid, test_name): '''run commands before test_runner does one test''' if self.args.verbose > 1: print(' -- {}.pre_case'.format(self.sub_class)) self.args.testid = testid + self.args.test_name = test_name self.args.test_ordinal = test_ordinal def post_case(self): diff --git a/tools/testing/selftests/tc-testing/TdcResults.py b/tools/testing/selftests/tc-testing/TdcResults.py new file mode 100644 index 000000000000..1e4d95fdf8d0 --- /dev/null +++ b/tools/testing/selftests/tc-testing/TdcResults.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 + +from enum import Enum + +class ResultState(Enum): + noresult = -1 + skip = 0 + success = 1 + fail = 2 + +class TestResult: + def __init__(self, test_id="", test_name=""): + self.test_id = test_id + self.test_name = test_name + self.result = ResultState.noresult + self.failmsg = "" + self.errormsg = "" + self.steps = [] + + def set_result(self, result): + if (isinstance(result, ResultState)): + self.result = result + return True + else: + raise TypeError('Unknown result type, must be type ResultState') + + def get_result(self): + return self.result + + def set_errormsg(self, errormsg): + self.errormsg = errormsg + return True + + def append_errormsg(self, errormsg): + self.errormsg = '{}\n{}'.format(self.errormsg, errormsg) + + def get_errormsg(self): + return self.errormsg + + def set_failmsg(self, failmsg): + self.failmsg = failmsg + return True + + def append_failmsg(self, failmsg): + self.failmsg = '{}\n{}'.format(self.failmsg, failmsg) + + def get_failmsg(self): + return self.failmsg + + def add_steps(self, newstep): + if type(newstep) == list: + self.steps.extend(newstep) + elif type(newstep) == str: + self.steps.append(step) + else: + raise TypeError('TdcResults.add_steps() requires a list or str') + + def get_executed_steps(self): + return self.steps + +class TestSuiteReport(): + _testsuite = [] + + def add_resultdata(self, result_data): + if isinstance(result_data, TestResult): + self._testsuite.append(result_data) + return True + + def count_tests(self): + return len(self._testsuite) + + def count_failures(self): + return sum(1 for t in self._testsuite if t.result == ResultState.fail) + + def count_skips(self): + return sum(1 for t in self._testsuite if t.result == ResultState.skip) + + def find_result(self, test_id): + return next((tr for tr in self._testsuite if tr.test_id == test_id), None) + + def update_result(self, result_data): + orig = self.find_result(result_data.test_id) + if orig != None: + idx = self._testsuite.index(orig) + self._testsuite[idx] = result_data + else: + self.add_resultdata(result_data) + + def format_tap(self): + ftap = "" + ftap += '1..{}\n'.format(self.count_tests()) + index = 1 + for t in self._testsuite: + if t.result == ResultState.fail: + ftap += 'not ' + ftap += 'ok {} {} - {}'.format(str(index), t.test_id, t.test_name) + if t.result == ResultState.skip or t.result == ResultState.noresult: + ftap += ' # skipped - {}\n'.format(t.errormsg) + elif t.result == ResultState.fail: + if len(t.steps) > 0: + ftap += '\tCommands executed in this test case:' + for step in t.steps: + ftap += '\n\t\t{}'.format(step) + ftap += '\n\t{}'.format(t.failmsg) + ftap += '\n' + index += 1 + return ftap + + def format_xunit(self): + from xml.sax.saxutils import escape + xunit = "<testsuites>\n" + xunit += '\t<testsuite tests=\"{}\" skips=\"{}\">\n'.format(self.count_tests(), self.count_skips()) + for t in self._testsuite: + xunit += '\t\t<testcase classname=\"{}\" '.format(escape(t.test_id)) + xunit += 'name=\"{}\">\n'.format(escape(t.test_name)) + if t.failmsg: + xunit += '\t\t\t<failure>\n' + if len(t.steps) > 0: + xunit += 'Commands executed in this test case:\n' + for step in t.steps: + xunit += '\t{}\n'.format(escape(step)) + xunit += 'FAILURE: {}\n'.format(escape(t.failmsg)) + xunit += '\t\t\t</failure>\n' + if t.errormsg: + xunit += '\t\t\t<error>\n{}\n'.format(escape(t.errormsg)) + xunit += '\t\t\t</error>\n' + if t.result == ResultState.skip: + xunit += '\t\t\t<skipped/>\n' + xunit += '\t\t</testcase>\n' + xunit += '\t</testsuite>\n' + xunit += '</testsuites>\n' + return xunit diff --git a/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py index 477a7bd7d7fb..e00c798de0bb 100644 --- a/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py +++ b/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py @@ -11,6 +11,7 @@ from string import Template import subprocess import time from TdcPlugin import TdcPlugin +from TdcResults import * from tdc_config import * @@ -21,6 +22,7 @@ class SubPlugin(TdcPlugin): def __init__(self): self.sub_class = 'valgrind/SubPlugin' self.tap = '' + self._tsr = TestSuiteReport() super().__init__() def pre_suite(self, testcount, testidlist): @@ -34,10 +36,14 @@ class SubPlugin(TdcPlugin): def post_suite(self, index): '''run commands after test_runner goes into a test loop''' super().post_suite(index) - self._add_to_tap('\n|---\n') if self.args.verbose > 1: print('{}.post_suite'.format(self.sub_class)) - print('{}'.format(self.tap)) + #print('{}'.format(self.tap)) + for xx in range(index - 1, self.testcount): + res = TestResult('{}-mem'.format(self.testidlist[xx]), 'Test skipped') + res.set_result(ResultState.skip) + res.set_errormsg('Skipped because of prior setup/teardown failure') + self._add_results(res) if self.args.verbose < 4: subprocess.check_output('rm -f vgnd-*.log', shell=True) @@ -128,8 +134,17 @@ class SubPlugin(TdcPlugin): nle_num = int(nle_mo.group(1)) mem_results = '' + res = TestResult('{}-mem'.format(self.args.testid), + '{} memory leak check'.format(self.args.test_name)) if (def_num > 0) or (ind_num > 0) or (pos_num > 0) or (nle_num > 0): mem_results += 'not ' + res.set_result(ResultState.fail) + res.set_failmsg('Memory leak detected') + res.append_failmsg(content) + else: + res.set_result(ResultState.success) + + self._add_results(res) mem_results += 'ok {} - {}-mem # {}\n'.format( self.args.test_ordinal, self.args.testid, 'memory leak check') @@ -138,5 +153,8 @@ class SubPlugin(TdcPlugin): print('{}'.format(content)) self._add_to_tap(content) + def _add_results(self, res): + self._tsr.add_resultdata(res) + def _add_to_tap(self, more_tap_output): self.tap += more_tap_output diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index 87a04a8a5945..e6e4ce80a726 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -23,6 +23,7 @@ from tdc_config import * from tdc_helper import * import TdcPlugin +from TdcResults import * class PluginMgrTestFail(Exception): @@ -60,10 +61,10 @@ class PluginMgr: for pgn_inst in reversed(self.plugin_instances): pgn_inst.post_suite(index) - def call_pre_case(self, test_ordinal, testid): + def call_pre_case(self, test_ordinal, testid, test_name): for pgn_inst in self.plugin_instances: try: - pgn_inst.pre_case(test_ordinal, testid) + pgn_inst.pre_case(test_ordinal, testid, test_name) except Exception as ee: print('exception {} in call to pre_case for {} plugin'. format(ee, pgn_inst.__class__)) @@ -102,7 +103,6 @@ class PluginMgr: self.argparser = argparse.ArgumentParser( description='Linux TC unit tests') - def replace_keywords(cmd): """ For a given executable command, substitute any known @@ -131,12 +131,16 @@ def exec_cmd(args, pm, stage, command): stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=ENVIR) - (rawout, serr) = proc.communicate() - if proc.returncode != 0 and len(serr) > 0: - foutput = serr.decode("utf-8") - else: - foutput = rawout.decode("utf-8") + try: + (rawout, serr) = proc.communicate(timeout=NAMES['TIMEOUT']) + if proc.returncode != 0 and len(serr) > 0: + foutput = serr.decode("utf-8", errors="ignore") + else: + foutput = rawout.decode("utf-8", errors="ignore") + except subprocess.TimeoutExpired: + foutput = "Command \"{}\" timed out\n".format(command) + proc.returncode = 255 proc.stdout.close() proc.stderr.close() @@ -169,6 +173,8 @@ def prepare_env(args, pm, stage, prefix, cmdlist, output = None): file=sys.stderr) print("\n{} *** Error message: \"{}\"".format(prefix, foutput), file=sys.stderr) + print("returncode {}; expected {}".format(proc.returncode, + exit_codes)) print("\n{} *** Aborting test run.".format(prefix), file=sys.stderr) print("\n\n{} *** stdout ***".format(proc.stdout), file=sys.stderr) print("\n\n{} *** stderr ***".format(proc.stderr), file=sys.stderr) @@ -181,6 +187,7 @@ def run_one_test(pm, args, index, tidx): result = True tresult = "" tap = "" + res = TestResult(tidx['id'], tidx['name']) if args.verbose > 0: print("\t====================\n=====> ", end="") print("Test " + tidx["id"] + ": " + tidx["name"]) @@ -188,19 +195,26 @@ def run_one_test(pm, args, index, tidx): # populate NAMES with TESTID for this test NAMES['TESTID'] = tidx['id'] - pm.call_pre_case(index, tidx['id']) + pm.call_pre_case(index, tidx['id'], tidx['name']) prepare_env(args, pm, 'setup', "-----> prepare stage", tidx["setup"]) if (args.verbose > 0): print('-----> execute stage') pm.call_pre_execute() (p, procout) = exec_cmd(args, pm, 'execute', tidx["cmdUnderTest"]) - exit_code = p.returncode + if p: + exit_code = p.returncode + else: + exit_code = None + pm.call_post_execute() - if (exit_code != int(tidx["expExitCode"])): - result = False - print("exit:", exit_code, int(tidx["expExitCode"])) + if (exit_code is None or exit_code != int(tidx["expExitCode"])): + print("exit: {!r}".format(exit_code)) + print("exit: {}".format(int(tidx["expExitCode"]))) + #print("exit: {!r} {}".format(exit_code, int(tidx["expExitCode"]))) + res.set_result(ResultState.fail) + res.set_failmsg('Command exited with {}, expected {}\n{}'.format(exit_code, tidx["expExitCode"], procout)) print(procout) else: if args.verbose > 0: @@ -211,20 +225,15 @@ def run_one_test(pm, args, index, tidx): if procout: match_index = re.findall(match_pattern, procout) if len(match_index) != int(tidx["matchCount"]): - result = False + res.set_result(ResultState.fail) + res.set_failmsg('Could not match regex pattern. Verify command output:\n{}'.format(procout)) + else: + res.set_result(ResultState.success) elif int(tidx["matchCount"]) != 0: - result = False - - if not result: - tresult += 'not ' - tresult += 'ok {} - {} # {}\n'.format(str(index), tidx['id'], tidx['name']) - tap += tresult - - if result == False: - if procout: - tap += procout + res.set_result(ResultState.fail) + res.set_failmsg('No output generated by verify command.') else: - tap += 'No output!\n' + res.set_result(ResultState.success) prepare_env(args, pm, 'teardown', '-----> teardown stage', tidx['teardown'], procout) pm.call_post_case() @@ -233,7 +242,7 @@ def run_one_test(pm, args, index, tidx): # remove TESTID from NAMES del(NAMES['TESTID']) - return tap + return res def test_runner(pm, args, filtered_tests): """ @@ -253,25 +262,15 @@ def test_runner(pm, args, filtered_tests): emergency_exit = False emergency_exit_message = '' - if args.notap: - if args.verbose: - tap = 'notap requested: omitting test plan\n' - else: - tap = str(index) + ".." + str(tcount) + "\n" + tsr = TestSuiteReport() + try: pm.call_pre_suite(tcount, [tidx['id'] for tidx in testlist]) except Exception as ee: ex_type, ex, ex_tb = sys.exc_info() print('Exception {} {} (caught in pre_suite).'. format(ex_type, ex)) - # when the extra print statements are uncommented, - # the traceback does not appear between them - # (it appears way earlier in the tdc.py output) - # so don't bother ... - # print('--------------------(') - # print('traceback') traceback.print_tb(ex_tb) - # print('--------------------)') emergency_exit_message = 'EMERGENCY EXIT, call_pre_suite failed with exception {} {}\n'.format(ex_type, ex) emergency_exit = True stage = 'pre-SUITE' @@ -287,15 +286,26 @@ def test_runner(pm, args, filtered_tests): if args.verbose > 1: print('Not executing test {} {} because DEV2 not defined'. format(tidx['id'], tidx['name'])) + res = TestResult(tidx['id'], tidx['name']) + res.set_result(ResultState.skip) + res.set_errormsg('Not executed because DEV2 is not defined') + tsr.add_resultdata(res) continue try: badtest = tidx # in case it goes bad - tap += run_one_test(pm, args, index, tidx) + res = run_one_test(pm, args, index, tidx) + tsr.add_resultdata(res) except PluginMgrTestFail as pmtf: ex_type, ex, ex_tb = sys.exc_info() stage = pmtf.stage message = pmtf.message output = pmtf.output + res = TestResult(tidx['id'], tidx['name']) + res.set_result(ResultState.skip) + res.set_errormsg(pmtf.message) + res.set_failmsg(pmtf.output) + tsr.add_resultdata(res) + index += 1 print(message) print('Exception {} {} (caught in test_runner, running test {} {} {} stage {})'. format(ex_type, ex, index, tidx['id'], tidx['name'], stage)) @@ -314,16 +324,16 @@ def test_runner(pm, args, filtered_tests): # if we failed in setup or teardown, # fill in the remaining tests with ok-skipped count = index - if not args.notap: - tap += 'about to flush the tap output if tests need to be skipped\n' - if tcount + 1 != index: - for tidx in testlist[index - 1:]: - msg = 'skipped - previous {} failed'.format(stage) - tap += 'ok {} - {} # {} {} {}\n'.format( - count, tidx['id'], msg, index, badtest.get('id', '--Unknown--')) - count += 1 - tap += 'done flushing skipped test tap output\n' + if tcount + 1 != count: + for tidx in testlist[count - 1:]: + res = TestResult(tidx['id'], tidx['name']) + res.set_result(ResultState.skip) + msg = 'skipped - previous {} failed {} {}'.format(stage, + index, badtest.get('id', '--Unknown--')) + res.set_errormsg(msg) + tsr.add_resultdata(res) + count += 1 if args.pause: print('Want to pause\nPress enter to continue ...') @@ -332,7 +342,7 @@ def test_runner(pm, args, filtered_tests): pm.call_post_suite(index) - return tap + return tsr def has_blank_ids(idlist): """ @@ -373,6 +383,10 @@ def set_args(parser): Set the command line arguments for tdc. """ parser.add_argument( + '--outfile', type=str, + help='Path to the file in which results should be saved. ' + + 'Default target is the current directory.') + parser.add_argument( '-p', '--path', type=str, help='The full path to the tc executable to use') sg = parser.add_argument_group( @@ -408,8 +422,9 @@ def set_args(parser): '-v', '--verbose', action='count', default=0, help='Show the commands that are being run') parser.add_argument( - '-N', '--notap', action='store_true', - help='Suppress tap results for command under test') + '--format', default='tap', const='tap', nargs='?', + choices=['none', 'xunit', 'tap'], + help='Specify the format for test results. (Default: TAP)') parser.add_argument('-d', '--device', help='Execute the test case in flower category') parser.add_argument( @@ -430,6 +445,8 @@ def check_default_settings(args, remaining, pm): NAMES['TC'] = args.path if args.device != None: NAMES['DEV2'] = args.device + if 'TIMEOUT' not in NAMES: + NAMES['TIMEOUT'] = None if not os.path.isfile(NAMES['TC']): print("The specified tc path " + NAMES['TC'] + " does not exist.") exit(1) @@ -624,12 +641,30 @@ def set_operation_mode(pm, args): if len(alltests): catresults = test_runner(pm, args, alltests) + if args.format == 'none': + print('Test results output suppression requested\n') + else: + print('\nAll test results: \n') + if args.format == 'xunit': + suffix = 'xml' + res = catresults.format_xunit() + elif args.format == 'tap': + suffix = 'tap' + res = catresults.format_tap() + print(res) + print('\n\n') + if not args.outfile: + fname = 'test-results.{}'.format(suffix) + else: + fname = args.outfile + with open(fname, 'w') as fh: + fh.write(res) + fh.close() + if os.getenv('SUDO_UID') is not None: + os.chown(fname, uid=int(os.getenv('SUDO_UID')), + gid=int(os.getenv('SUDO_GID'))) else: - catresults = 'No tests found\n' - if args.notap: - print('Tap output suppression requested\n') - else: - print('All test results: \n\n{}'.format(catresults)) + print('No tests found\n') def main(): """ diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py index d651bc1501bd..6d91e48c2625 100644 --- a/tools/testing/selftests/tc-testing/tdc_config.py +++ b/tools/testing/selftests/tc-testing/tdc_config.py @@ -15,6 +15,8 @@ NAMES = { 'DEV1': 'v0p1', 'DEV2': '', 'BATCH_FILE': './batch.txt', + # Length of time in seconds to wait before terminating a command + 'TIMEOUT': 12, # Name of the namespace to use 'NS': 'tcut', # Directory containing eBPF test programs |